Skip to content

Commit

Permalink
Rename argument
Browse files Browse the repository at this point in the history
  • Loading branch information
sasha0552 authored Jun 7, 2024
1 parent 36083dc commit 42fb804
Show file tree
Hide file tree
Showing 3 changed files with 10 additions and 10 deletions.
6 changes: 3 additions & 3 deletions common/common.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1465,7 +1465,7 @@ bool gpt_params_find_arg(int argc, char ** argv, const std::string & arg, gpt_pa
invalid_param = true;
return true;
}
params.lcp_similarity = std::stof(argv[i]);
params.slot_prompt_similarity = std::stof(argv[i]);
return true;
}
if (arg == "-pps") {
Expand Down Expand Up @@ -1839,8 +1839,8 @@ void gpt_params_print_usage(int /*argc*/, char ** argv, const gpt_params & param
"set custom jinja chat template (default: template taken from model's metadata)\n"
"only commonly used templates are accepted:\n"
"https://github.com/ggerganov/llama.cpp/wiki/Templates-supported-by-llama_chat_apply_template" });
options.push_back({ "server", " --lcp-similarity SIMILARITY",
"how much the prompt of a request must match the prompt of a slot in order to use that slot (default: %.2f)\n", params.lcp_similarity });
options.push_back({ "server", "-sps, --slot-prompt-similarity SIMILARITY",
"how much the prompt of a request must match the prompt of a slot in order to use that slot (default: %.2f, 0.0 = disabled)\n", params.slot_prompt_similarity });

#ifndef LOG_DISABLE_LOGS
options.push_back({ "logging" });
Expand Down
2 changes: 1 addition & 1 deletion common/common.h
Original file line number Diff line number Diff line change
Expand Up @@ -202,7 +202,7 @@ struct gpt_params {

std::string slot_save_path;

float lcp_similarity = 0.0f;
float slot_prompt_similarity = 0.5f;

// batched-bench params
bool is_pp_shared = false;
Expand Down
12 changes: 6 additions & 6 deletions examples/server/server.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -647,8 +647,8 @@ struct server_context {

server_metrics metrics;

// Longest Common Prefix similarity for slot selection
float lcp_similarity = 0.0f;
// Necessary similarity of prompt for slot selection
float slot_prompt_similarity = 0.0f;

~server_context() {
if (ctx) {
Expand Down Expand Up @@ -812,7 +812,7 @@ struct server_context {
server_slot * ret = nullptr;

// find the slot that has at least n% prompt similarity
if (ret == nullptr && lcp_similarity != 0.0f && !prompt.empty()) {
if (ret == nullptr && slot_prompt_similarity != 0.0f && !prompt.empty()) {
int max_lcp_len = 0;
float similarity = 0;

Expand Down Expand Up @@ -840,7 +840,7 @@ struct server_context {
similarity = static_cast<float>(lcp_len) / slot_prompt_len;

// select the current slot if the criteria match
if (lcp_len > max_lcp_len && similarity > lcp_similarity) {
if (lcp_len > max_lcp_len && similarity > slot_prompt_similarity) {
max_lcp_len = lcp_len;
ret = &slot;
}
Expand Down Expand Up @@ -2568,8 +2568,8 @@ int main(int argc, char ** argv) {
log_data["api_key"] = "api_key: " + std::to_string(params.api_keys.size()) + " keys loaded";
}

// Longest Common Prefix similarity for slot selection
ctx_server.lcp_similarity = params.lcp_similarity;
// Necessary similarity of prompt for slot selection
ctx_server.slot_prompt_similarity = params.slot_prompt_similarity;

// load the model
if (!ctx_server.load_model(params)) {
Expand Down

0 comments on commit 42fb804

Please sign in to comment.