Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

main: use jinja chat template system prompt by default #12118

Merged
merged 5 commits into from
Mar 2, 2025
Merged
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 10 additions & 10 deletions examples/main/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,6 @@
#pragma warning(disable: 4244 4267) // possible loss of data
#endif

static const char * DEFAULT_SYSTEM_MESSAGE = "You are a helpful assistant";

static llama_context ** g_ctx;
static llama_model ** g_model;
static common_sampler ** g_smpl;
Expand Down Expand Up @@ -263,6 +261,7 @@ int main(int argc, char ** argv) {

std::vector<llama_token> embd_inp;

bool waiting_for_first_input = params.conversation_mode && params.enable_chat_template && params.prompt.empty();
auto chat_add_and_format = [&chat_msgs, &chat_templates](const std::string & role, const std::string & content) {
common_chat_msg new_msg;
new_msg.role = role;
Expand All @@ -274,9 +273,9 @@ int main(int argc, char ** argv) {
};

{
auto prompt = (params.conversation_mode && params.enable_chat_template)
// format the system prompt in conversation mode (fallback to default if empty)
? chat_add_and_format("system", params.prompt.empty() ? DEFAULT_SYSTEM_MESSAGE : params.prompt)
auto prompt = (params.enable_chat_template && !params.prompt.empty())
// format the user prompt or system prompt if in conversation mode
? chat_add_and_format(params.conversation_mode ? "system" : "user", params.prompt)
// otherwise use the prompt as is
: params.prompt;
if (params.interactive_first || !params.prompt.empty() || session_tokens.empty()) {
Expand All @@ -292,7 +291,7 @@ int main(int argc, char ** argv) {
}

// Should not run without any tokens
if (embd_inp.empty()) {
if (!params.conversation_mode && embd_inp.empty()) {
if (add_bos) {
embd_inp.push_back(llama_vocab_bos(vocab));
LOG_WRN("embd_inp was considered empty and bos was added: %s\n", string_from(ctx, embd_inp).c_str());
Expand Down Expand Up @@ -773,7 +772,7 @@ int main(int argc, char ** argv) {
}

// deal with end of generation tokens in interactive mode
if (llama_vocab_is_eog(vocab, common_sampler_last(smpl))) {
if (!waiting_for_first_input && llama_vocab_is_eog(vocab, common_sampler_last(smpl))) {
LOG_DBG("found an EOG token\n");

if (params.interactive) {
Expand All @@ -793,12 +792,12 @@ int main(int argc, char ** argv) {
}

// if current token is not EOG, we add it to current assistant message
if (params.conversation_mode) {
if (params.conversation_mode && !waiting_for_first_input) {
const auto id = common_sampler_last(smpl);
assistant_ss << common_token_to_piece(ctx, id, false);
}

if (n_past > 0 && is_interacting) {
if ((n_past > 0 || waiting_for_first_input) && is_interacting) {
LOG_DBG("waiting for user input\n");

if (params.conversation_mode) {
Expand Down Expand Up @@ -888,11 +887,12 @@ int main(int argc, char ** argv) {
input_echo = false; // do not echo this again
}

if (n_past > 0) {
if (n_past > 0 || waiting_for_first_input) {
if (is_interacting) {
common_sampler_reset(smpl);
}
is_interacting = false;
waiting_for_first_input = false;
}
}

Expand Down
Loading