From 1ba3df3de5f9f5c49a1d10083e8c30b3ed817e44 Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Fri, 4 Oct 2024 11:54:32 +0300 Subject: [PATCH 1/4] rerank : use [SEP] token instead of [BOS] ggml-ci --- ci/run.sh | 3 ++- examples/server/server.cpp | 4 ++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/ci/run.sh b/ci/run.sh index 7d241ecc0ea06..7edae4f6ee68a 100755 --- a/ci/run.sh +++ b/ci/run.sh @@ -751,7 +751,8 @@ function gg_run_rerank_tiny { model_f16="${path_models}/ggml-model-f16.gguf" - (time ./bin/llama-embedding --model ${model_f16} -p "what is panda?hi\nwhat is panda?it's a bear\nwhat is panda?The giant panda (Ailuropoda melanoleuca), sometimes called a panda bear or simply panda, is a bear species endemic to China." --pooling rank --embd-normalize -1 --verbose-prompt) 2>&1 | tee -a $OUT/${ci}-rk-f16.log + # for this model, the SEP token is "" + (time ./bin/llama-embedding --model ${model_f16} -p "what is panda?hi\nwhat is panda?it's a bear\nwhat is panda?The giant panda (Ailuropoda melanoleuca), sometimes called a panda bear or simply panda, is a bear species endemic to China." --pooling rank --embd-normalize -1 --verbose-prompt) 2>&1 | tee -a $OUT/${ci}-rk-f16.log # sample output # rerank score 0: 0.029 diff --git a/examples/server/server.cpp b/examples/server/server.cpp index f343cc252f89a..13e54e5019490 100644 --- a/examples/server/server.cpp +++ b/examples/server/server.cpp @@ -2027,7 +2027,7 @@ struct server_context { continue; } - // prompt: querydoc + // prompt: [BOS]query[EOS][SEP]doc[EOS] prompt_tokens.clear(); prompt_tokens.push_back(llama_token_bos(model)); { @@ -2035,7 +2035,7 @@ struct server_context { prompt_tokens.insert(prompt_tokens.end(), part.begin(), part.end()); } prompt_tokens.push_back(llama_token_eos(model)); - prompt_tokens.push_back(llama_token_bos(model)); + prompt_tokens.push_back(llama_token_sep(model)); { const auto part = tokenize(slot.prompt[1], false); prompt_tokens.insert(prompt_tokens.end(), part.begin(), part.end()); From 9e897d44390f3196c6680e1847c3693354de00ed Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Fri, 4 Oct 2024 12:04:54 +0300 Subject: [PATCH 2/4] common : sanity check for non-NULL tokens ggml-ci --- common/common.cpp | 30 +++++++++++++++++++++++++++++- src/llama-vocab.h | 18 +++++++++--------- src/llama.cpp | 2 +- 3 files changed, 39 insertions(+), 11 deletions(-) diff --git a/common/common.cpp b/common/common.cpp index a0611f3d1734b..29df16c951ad0 100644 --- a/common/common.cpp +++ b/common/common.cpp @@ -838,6 +838,31 @@ struct llama_init_result llama_init_from_gpt_params(gpt_params & params) { return iparams; } + if (params.reranking) { + bool ok = true; + + if (llama_token_bos(model) == LLAMA_TOKEN_NULL) { + LOG_WRN("%s: warning: model does not have a BOS token, reranking will not work\n", __func__); + ok = false; + } + + if (llama_token_eos(model) == LLAMA_TOKEN_NULL) { + LOG_WRN("%s: warning: model does not have an EOS token, reranking will not work\n", __func__); + ok = false; + } + + if (llama_token_sep(model) == LLAMA_TOKEN_NULL) { + LOG_WRN("%s: warning: model does not have a SEP token, reranking will not work\n", __func__); + ok = false; + } + + if (!ok) { + llama_free_model(model); + + return iparams; + } + } + auto cparams = llama_context_params_from_gpt_params(params); llama_context * lctx = llama_new_context_with_model(model, cparams); @@ -855,6 +880,7 @@ struct llama_init_result llama_init_from_gpt_params(gpt_params & params) { if (cvec.n_embd == -1) { llama_free(lctx); llama_free_model(model); + return iparams; } @@ -867,6 +893,7 @@ struct llama_init_result llama_init_from_gpt_params(gpt_params & params) { if (err) { llama_free(lctx); llama_free_model(model); + return iparams; } } @@ -889,7 +916,7 @@ struct llama_init_result llama_init_from_gpt_params(gpt_params & params) { llama_lora_adapters_apply(lctx, iparams.lora_adapters); } - if (params.sparams.ignore_eos && llama_token_eos(model) == -1) { + if (params.sparams.ignore_eos && llama_token_eos(model) == LLAMA_TOKEN_NULL) { LOG_WRN("%s: warning: model does not have an EOS token, ignoring --ignore-eos\n", __func__); params.sparams.ignore_eos = false; } @@ -930,6 +957,7 @@ struct llama_init_result llama_init_from_gpt_params(gpt_params & params) { iparams.model = model; iparams.context = lctx; + return iparams; } diff --git a/src/llama-vocab.h b/src/llama-vocab.h index 069bdc423a60b..28bad9135bbf0 100644 --- a/src/llama-vocab.h +++ b/src/llama-vocab.h @@ -40,17 +40,17 @@ struct llama_vocab { id special_bos_id = 1; id special_eos_id = 2; id special_unk_id = 0; - id special_sep_id = -1; - id special_pad_id = -1; - id special_cls_id = -1; - id special_mask_id = -1; + id special_sep_id = LLAMA_TOKEN_NULL; + id special_pad_id = LLAMA_TOKEN_NULL; + id special_cls_id = LLAMA_TOKEN_NULL; + id special_mask_id = LLAMA_TOKEN_NULL; id linefeed_id = 13; - id special_prefix_id = -1; - id special_suffix_id = -1; - id special_middle_id = -1; - id special_eot_id = -1; // TODO: move above after "eos_id", and here add "file separator" token - id special_eom_id = -1; + id special_prefix_id = LLAMA_TOKEN_NULL; + id special_suffix_id = LLAMA_TOKEN_NULL; + id special_middle_id = LLAMA_TOKEN_NULL; + id special_eot_id = LLAMA_TOKEN_NULL; // TODO: move above after "eos_id", and here add "file separator" token + id special_eom_id = LLAMA_TOKEN_NULL; // set of all tokens that cause "end of generation" std::set special_eog_ids; diff --git a/src/llama.cpp b/src/llama.cpp index 3443b0689bf5e..bf6fd92778685 100644 --- a/src/llama.cpp +++ b/src/llama.cpp @@ -2412,7 +2412,7 @@ struct llama_hparams { // needed by encoder-decoder models (e.g. T5, FLAN-T5) // ref: https://github.com/ggerganov/llama.cpp/pull/8141 - llama_token dec_start_token_id = -1; + llama_token dec_start_token_id = LLAMA_TOKEN_NULL; enum llama_pooling_type pooling_type = LLAMA_POOLING_TYPE_NONE; enum llama_rope_type rope_type = LLAMA_ROPE_TYPE_NONE; From e51973f21ea52135d4935b2510ab32d25bbe979e Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Fri, 4 Oct 2024 12:06:58 +0300 Subject: [PATCH 3/4] ci : adjust rank score interval ggml-ci --- ci/run.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/run.sh b/ci/run.sh index 7edae4f6ee68a..810779f6fd4d1 100755 --- a/ci/run.sh +++ b/ci/run.sh @@ -775,7 +775,7 @@ function gg_run_rerank_tiny { check_score "rerank score 0" "$(cat $OUT/${ci}-rk-f16.log | grep "rerank score 0")" "0.00" "0.05" | tee -a $OUT/${ci}-rk-f16.log check_score "rerank score 1" "$(cat $OUT/${ci}-rk-f16.log | grep "rerank score 1")" "0.00" "0.05" | tee -a $OUT/${ci}-rk-f16.log - check_score "rerank score 2" "$(cat $OUT/${ci}-rk-f16.log | grep "rerank score 2")" "0.10" "0.15" | tee -a $OUT/${ci}-rk-f16.log + check_score "rerank score 2" "$(cat $OUT/${ci}-rk-f16.log | grep "rerank score 2")" "0.10" "0.30" | tee -a $OUT/${ci}-rk-f16.log set +e } From 6ada2e7cfc73a14607f3b46cfb3265c1949baa60 Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Sat, 5 Oct 2024 15:03:02 +0300 Subject: [PATCH 4/4] ci : add shebang to run.sh ggml-ci --- ci/run.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/run.sh b/ci/run.sh index 810779f6fd4d1..e067782193b9b 100755 --- a/ci/run.sh +++ b/ci/run.sh @@ -1,4 +1,4 @@ -#/bin/bash +#!/bin/bash # # sample usage: #