From c2b26000c3edebd20080eec9f40d2ae5e2e65005 Mon Sep 17 00:00:00 2001 From: Xuan Son Nguyen Date: Sun, 12 Jan 2025 13:54:25 +0100 Subject: [PATCH] remove reference to params.conversation in main --- examples/main/main.cpp | 26 +++----------------------- 1 file changed, 3 insertions(+), 23 deletions(-) diff --git a/examples/main/main.cpp b/examples/main/main.cpp index e3ecac0d0e9f1..1ba4dcb3dfcbc 100644 --- a/examples/main/main.cpp +++ b/examples/main/main.cpp @@ -40,7 +40,6 @@ static std::vector * g_input_tokens; static std::ostringstream * g_output_ss; static std::vector * g_output_tokens; static bool is_interacting = false; -static bool need_insert_eot = false; static void print_usage(int argc, char ** argv) { (void) argc; @@ -70,7 +69,6 @@ static void sigint_handler(int signo) { g_chat->interrupt(); } else if (!is_interacting && g_params->interactive) { is_interacting = true; - need_insert_eot = true; } else { console::cleanup(); LOG("\n"); @@ -763,26 +761,16 @@ int main(int argc, char ** argv) { } } - // if current token is not EOG, we add it to current assistant message - if (params.conversation) { - const auto id = common_sampler_last(smpl); - assistant_ss << common_token_to_piece(ctx, id, false); - } - if (n_past > 0 && is_interacting) { LOG_DBG("waiting for user input\n"); - if (params.conversation) { - LOG("\n> "); - } - if (params.input_prefix_bos) { LOG_DBG("adding input prefix BOS token\n"); embd_inp.push_back(llama_vocab_bos(vocab)); } std::string buffer; - if (!params.input_prefix.empty() && !params.conversation) { + if (!params.input_prefix.empty()) { LOG_DBG("appending input prefix: '%s'\n", params.input_prefix.c_str()); LOG("%s", params.input_prefix.c_str()); } @@ -806,7 +794,7 @@ int main(int argc, char ** argv) { // Entering a empty line lets the user pass control back if (buffer.length() > 1) { // append input suffix if any - if (!params.input_suffix.empty() && !params.conversation) { + if (!params.input_suffix.empty()) { LOG_DBG("appending input suffix: '%s'\n", params.input_suffix.c_str()); LOG("%s", params.input_suffix.c_str()); } @@ -819,22 +807,14 @@ int main(int argc, char ** argv) { string_process_escapes(buffer); } - bool format_chat = params.conversation && params.enable_chat_template; std::string user_inp = std::move(buffer); // TODO: one inconvenient of current chat template implementation is that we can't distinguish between user input and special tokens (prefix/postfix) const auto line_pfx = common_tokenize(ctx, params.input_prefix, false, true); - const auto line_inp = common_tokenize(ctx, user_inp, false, format_chat); + const auto line_inp = common_tokenize(ctx, user_inp, false, true); const auto line_sfx = common_tokenize(ctx, params.input_suffix, false, true); LOG_DBG("input tokens: %s\n", string_from(ctx, line_inp).c_str()); - // if user stop generation mid-way, we must add EOT to finish model's last response - if (need_insert_eot && format_chat) { - llama_token eot = llama_vocab_eot(vocab); - embd_inp.push_back(eot == LLAMA_TOKEN_NULL ? llama_vocab_eos(vocab) : eot); - need_insert_eot = false; - } - embd_inp.insert(embd_inp.end(), line_pfx.begin(), line_pfx.end()); embd_inp.insert(embd_inp.end(), line_inp.begin(), line_inp.end()); embd_inp.insert(embd_inp.end(), line_sfx.begin(), line_sfx.end());