Skip to content

Commit

Permalink
llama : fix strncpy warning + note token_to_str does not write null
Browse files Browse the repository at this point in the history
  • Loading branch information
ggerganov committed Aug 16, 2023
1 parent a499313 commit 5b94b14
Show file tree
Hide file tree
Showing 6 changed files with 21 additions and 24 deletions.
7 changes: 3 additions & 4 deletions convert-llama-7b-pth-to-gguf.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,7 @@ def count_model_parts(dir_model: str) -> int:
toktype = 1 # defualt to normal token type
if tokenizer.is_unknown(i): toktype = 2
if tokenizer.is_control(i): toktype = 3

# TODO: How to determinate if a token is user defined?
# ref: https://github.com/google/sentencepiece/blob/master/src/sentencepiece_model.proto
# if tokenizer.is_user_defined(i): toktype = 4
Expand Down Expand Up @@ -223,7 +223,7 @@ def count_model_parts(dir_model: str) -> int:
sys.exit()

n_dims = len(data.shape)
data_dtype = data.dtype
data_dtype = data.dtype

# if f32 desired, convert any float16 to float32
if ftype == 0 and data.dtype == np.float16:
Expand Down Expand Up @@ -261,7 +261,6 @@ def count_model_parts(dir_model: str) -> int:
for name in model_part.keys():
data = model_part[name]


old_dtype = data.dtype

# we don't need these
Expand All @@ -284,7 +283,7 @@ def count_model_parts(dir_model: str) -> int:
sys.exit()

n_dims = len(data.shape)
data_dtype = data.dtype
data_dtype = data.dtype

# if f32 desired, convert any float16 to float32
if ftype == 0 and data.dtype == np.float16:
Expand Down
6 changes: 1 addition & 5 deletions examples/main/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -266,9 +266,6 @@ int main(int argc, char ** argv) {
params.interactive = true;
}

// determine newline token
auto llama_token_newline = ::llama_tokenize(ctx, "\n", false);

if (params.verbose_prompt) {
fprintf(stderr, "\n");
fprintf(stderr, "%s: prompt: '%s'\n", __func__, params.prompt.c_str());
Expand Down Expand Up @@ -778,8 +775,7 @@ int main(int argc, char ** argv) {
if (grammar != NULL) {
llama_grammar_free(grammar);

std::vector<const llama_grammar_element *> grammar_rules(
parsed_grammar.c_rules());
std::vector<const llama_grammar_element *> grammar_rules( parsed_grammar.c_rules());
grammar = llama_grammar_init(
grammar_rules.data(), grammar_rules.size(),
parsed_grammar.symbol_ids.at("root"));
Expand Down
8 changes: 4 additions & 4 deletions examples/quantize/quantize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -68,10 +68,10 @@ bool try_parse_ftype(const std::string & ftype_str_in, llama_ftype & ftype, std:
}

// usage:
// ./quantize [--allow-requantize] [--leave-output-tensor] models/llama/ggml-model.bin [models/llama/ggml-model-quant.bin] type [nthreads]
// ./quantize [--allow-requantize] [--leave-output-tensor] models/llama/ggml-model.gguf [models/llama/ggml-model-quant.gguf] type [nthreads]
//
void usage(const char * executable) {
fprintf(stderr, "usage: %s [--help] [--allow-requantize] [--leave-output-tensor] model-f32.bin [model-quant.bin] type [nthreads]\n\n", executable);
fprintf(stderr, "usage: %s [--help] [--allow-requantize] [--leave-output-tensor] model-f32.gguf [model-quant.gguf] type [nthreads]\n\n", executable);
fprintf(stderr, " --allow-requantize: Allows requantizing tensors that have already been quantized. Warning: This can severely reduce quality compared to quantizing from 16bit or 32bit\n");
fprintf(stderr, " --leave-output-tensor: Will leave output.weight un(re)quantized. Increases model size but may also increase quality, especially when requantizing\n");
fprintf(stderr, "\nAllowed quantization types:\n");
Expand Down Expand Up @@ -118,8 +118,8 @@ int main(int argc, char ** argv) {
if (pos != std::string::npos) {
fpath = fname_inp.substr(0, pos + 1);
}
// export as [inp path]/ggml-model-[ftype].bin
fname_out = fpath + "ggml-model-" + ftype_str + ".bin";
// export as [inp path]/ggml-model-[ftype].gguf
fname_out = fpath + "ggml-model-" + ftype_str + ".gguf";
arg_idx++;
}
else {
Expand Down
1 change: 0 additions & 1 deletion examples/save-load-state/save-load-state.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@ int main(int argc, char ** argv) {
auto lparams = llama_context_default_params();

lparams.n_ctx = params.n_ctx;
lparams.n_gqa = params.n_gqa;
lparams.seed = params.seed;
lparams.f16_kv = params.memory_f16;
lparams.use_mmap = params.use_mmap;
Expand Down
16 changes: 9 additions & 7 deletions llama.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4774,32 +4774,34 @@ float * llama_get_embeddings(struct llama_context * ctx) {
return ctx->embedding.data();
}

int llama_token_to_str_with_model(const struct llama_model * model, llama_token token, char * str, int length) {
// does not write null-terminator to str
int llama_token_to_str_with_model(const struct llama_model * model, llama_token token, char * buf, int length) {
if (0 <= token && token < llama_n_vocab_from_model(model)) {
if (llama_is_normal_token(model->vocab, token)) {
std::string result = model->vocab.id_to_token[token].tok;
if(llama_vocab_type(model->vocab) == "spm") {
if (llama_vocab_type(model->vocab) == "spm") {
result = llama_unescape_whitespace(result);
}
if (length < (int) result.length()) {
return -result.length();
}
strncpy(str, result.c_str(), result.length());
memcpy(buf, result.c_str(), result.length());
return result.length();
} else if (llama_is_unknown_token(model->vocab, token)) { // NOLINT
if (length < 3) {
return -3;
}
strncpy(str, "\xe2\x96\x85", 4);
buf[0] = '\xe2';
buf[1] = '\x96';
buf[2] = '\x85';
return 3;
} else if (llama_is_control_token(model->vocab, token)) {
;
} else if (llama_is_byte_token(model->vocab, token)) {
if (length < 1) {
return -1;
}
str[0] = llama_byte_to_char(model->vocab, token);
str[1] = 0x00;
buf[0] = llama_byte_to_char(model->vocab, token);
return 1;
}
}
Expand Down Expand Up @@ -4830,7 +4832,7 @@ int llama_token_to_str_bpe(const struct llama_context * ctx, llama_token token,
if (length < (int) result.length()) {
return -result.length();
}
strncpy(str, result.c_str(), result.length());
memcpy(str, result.c_str(), result.length());
return result.length();
}
return 0;
Expand Down
7 changes: 4 additions & 3 deletions llama.h
Original file line number Diff line number Diff line change
Expand Up @@ -355,22 +355,23 @@ extern "C" {
LLAMA_API float * llama_get_embeddings(struct llama_context * ctx);

// Token Id -> String. Uses the vocabulary in the provided context
// Does not write null terminator to the buffer
LLAMA_API int llama_token_to_str(
const struct llama_context * ctx,
llama_token token,
char * str,
char * buf,
int length);

LLAMA_API int llama_token_to_str_bpe(
const struct llama_context * ctx,
llama_token token,
char * str,
char * buf,
int length);

LLAMA_API int llama_token_to_str_with_model(
const struct llama_model * model,
llama_token token,
char * str,
char * buf,
int length);
// Special tokens
LLAMA_API llama_token llama_token_bos(void); // beginning-of-sentence
Expand Down

0 comments on commit 5b94b14

Please sign in to comment.