Skip to content

Commit

Permalink
fix
Browse files Browse the repository at this point in the history
  • Loading branch information
goliaro committed Mar 4, 2025
1 parent 7dac710 commit 85a6d99
Show file tree
Hide file tree
Showing 3 changed files with 15 additions and 14 deletions.
14 changes: 8 additions & 6 deletions src/runtime/page_manager.cc
Original file line number Diff line number Diff line change
Expand Up @@ -288,14 +288,16 @@ void PageManager::append_tokens(RequestGuid const &request_guid,
}
// update the number of used pages and the number of tokens in the last used
// page
if (req_info.num_tokens_in_last_used_page == 0 && req_info.num_used_pages == 0) {
req_info.num_used_pages = 1;
}

req_info.num_tokens_in_last_used_page += num_tokens;
int tot_num_tokens = req_info.num_tokens_in_last_used_page;
if (req_info.num_used_pages > 0) {
tot_num_tokens += (req_info.num_used_pages-1) * tokens_per_page;
while (req_info.num_tokens_in_last_used_page > tokens_per_page) {
req_info.num_used_pages += 1;
req_info.num_tokens_in_last_used_page -= tokens_per_page;
}
req_info.num_used_pages = ceilDiv(tot_num_tokens, tokens_per_page);
req_info.num_tokens_in_last_used_page =
req_info.num_tokens_in_last_used_page % tokens_per_page;

printf("appending %d tokens to request %d. It now has %d tokens in the last "
"used page and %d used pages\n",
num_tokens, request_guid, req_info.num_tokens_in_last_used_page,
Expand Down
12 changes: 6 additions & 6 deletions tests/inference/generate_inf_test_configs.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
ff_init_configs = {
# required parameters
"num_gpus": 4,
"memory_per_gpu": 24000,
"memory_per_gpu": 14000,
"zero_copy_memory_per_node": 40000,
# optional parameters
"num_cpus": 8,
Expand Down Expand Up @@ -139,15 +139,15 @@ def gen_spec_configs(prompt_file, output_folder, specinfer_model_pairs, parallel
gen_incr_dec_configs(prompt_file,
output_folder,
incr_dec_models=[llama_models[0], opt_models[0]],
parallelism_settings=[Parallelism(1, 1)],
parallelism_settings=[Parallelism(4, 1)],
full_precision_settings=[False,],
config_output_folder=config_output_folder
)
# small models tp=2, pp=2
gen_incr_dec_configs(prompt_file,
output_folder,
incr_dec_models=[llama_models[1], opt_models[1]],
parallelism_settings=[Parallelism(1, 1)],
parallelism_settings=[Parallelism(2, 2)],
full_precision_settings=[False,],
config_output_folder=config_output_folder
)
Expand All @@ -156,15 +156,15 @@ def gen_spec_configs(prompt_file, output_folder, specinfer_model_pairs, parallel
gen_spec_configs(prompt_file,
output_folder,
specinfer_model_pairs=[SpecModelPair(llama_models[0], llama_models[1]),],
parallelism_settings=[Parallelism(1, 1), Parallelism(1, 1), Parallelism(1, 1)],
parallelism_settings=[Parallelism(4, 1), Parallelism(2, 2), Parallelism(1, 4)],
full_precision_settings=[False,],
config_output_folder=config_output_folder
)
# opt, tp=4 only
gen_spec_configs(prompt_file,
output_folder,
specinfer_model_pairs=[SpecModelPair(opt_models[0], opt_models[1]),],
parallelism_settings=[Parallelism(1, 1)],
parallelism_settings=[Parallelism(4, 1)],
full_precision_settings=[False,],
config_output_folder=config_output_folder
)
)
3 changes: 1 addition & 2 deletions tests/inference_tests.sh
Original file line number Diff line number Diff line change
Expand Up @@ -61,8 +61,7 @@ for file in ./inference/inf_test_configs/*.json; do
script="./inference/python/spec_infer.py"
fi
# Run script
python "$script" -config-file "$file"
exit 0
python "$script" -config-file "$file"
done

############## Run inference in HuggingFace ##############
Expand Down

0 comments on commit 85a6d99

Please sign in to comment.