Skip to content

Commit

Permalink
perplexity : only tokenize selected tasks for Winogrande
Browse files Browse the repository at this point in the history
  • Loading branch information
ggerganov committed Jan 19, 2024
1 parent bb58b0e commit 9e4ad80
Showing 1 changed file with 23 additions and 21 deletions.
44 changes: 23 additions & 21 deletions examples/perplexity/perplexity.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -838,6 +838,29 @@ static void winogrande_score(llama_context * ctx, const gpt_params & params) {
return;
}

fprintf(stderr, "%s : loaded %zu tasks from prompt.\n", __func__, data.size());

if (params.winogrande_tasks > 0 && params.winogrande_tasks < data.size()) {
fprintf(stderr, "%s : selecting %zu random tasks\n", __func__, params.winogrande_tasks);
std::mt19937 rng(1);
std::vector<int> aux(data.size());
for (int i = 0; i < int(data.size()); ++i) {
aux[i] = i;
}
float scale = 1/(1.f + (float)rng.max());
std::vector<winogrande_entry> selected;
selected.resize(params.winogrande_tasks);
for (int i = 0; i < int(params.winogrande_tasks); ++i) {
int j = int(scale*rng()*aux.size());
selected[i] = std::move(data[aux[j]]);
aux[j] = aux.back();
aux.pop_back();
}
data = std::move(selected);
}

fprintf(stderr, "%s : tokenizing selected tasks\n", __func__);

// This is needed as usual for LLaMA models
const bool add_bos = llama_should_add_bos_token(llama_get_model(ctx));

Expand All @@ -861,27 +884,6 @@ static void winogrande_score(llama_context * ctx, const gpt_params & params) {
task.n_base2 = ::llama_tokenize(ctx, task.first + task.choices[1], add_bos).size();
}

fprintf(stderr, "%s : loaded %zu tasks from prompt.\n", __func__, data.size());

if (params.winogrande_tasks > 0 && params.winogrande_tasks < data.size()) {
fprintf(stderr, "%s : selecting %zu random tasks\n", __func__, params.winogrande_tasks);
std::mt19937 rng(1);
std::vector<int> aux(data.size());
for (int i = 0; i < int(data.size()); ++i) {
aux[i] = i;
}
float scale = 1/(1.f + (float)rng.max());
std::vector<winogrande_entry> selected;
selected.resize(params.winogrande_tasks);
for (int i = 0; i < int(params.winogrande_tasks); ++i) {
int j = int(scale*rng()*aux.size());
selected[i] = std::move(data[aux[j]]);
aux[j] = aux.back();
aux.pop_back();
}
data = std::move(selected);
}

fprintf(stderr, "%s : calculating winogrande score over selected tasks.\n", __func__);

const int n_vocab = llama_n_vocab(llama_get_model(ctx));
Expand Down

0 comments on commit 9e4ad80

Please sign in to comment.