Skip to content

Commit

Permalink
fix bugs
Browse files Browse the repository at this point in the history
  • Loading branch information
goliaro committed Mar 8, 2025
1 parent f4a3a98 commit 0a889ae
Show file tree
Hide file tree
Showing 13 changed files with 92 additions and 66 deletions.
3 changes: 2 additions & 1 deletion include/flexflow/request_manager.h
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@ struct Request {
struct PeftFinetuningInfo {
FinetuningStatus status = FORWARD_PHASE;
std::string dataset_filepath;
int max_training_steps = 1;
int max_training_epochs = 1;
// overall state
int completed_training_steps = 0;
// fwd state
Expand Down Expand Up @@ -456,6 +456,7 @@ class RequestManager {
double start_time, finish_time;
double registration_time, first_token_time;
bool first_token_time_set = false;
int num_evictions = 0;
};
std::unordered_map<RequestGuid, ProfileInfo> profiling_requests;
double total_request_run_time;
Expand Down
34 changes: 18 additions & 16 deletions inference/flexllm/peft_train.cc
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ void parse_input_args(char **argv,
int &max_tokens_per_batch,
int &max_sequence_length,
int &num_kv_cache_slots,
int &max_training_steps,
int &max_training_epochs,
int &num_layers_per_finetuning_step,
bool &run_warmup) {
for (int i = 1; i < argc; i++) {
Expand Down Expand Up @@ -144,7 +144,7 @@ void parse_input_args(char **argv,
continue;
}
if (!strcmp(argv[i], "--max-training-steps")) {
max_training_steps = std::stoi(argv[++i]);
max_training_epochs = std::stoi(argv[++i]);
continue;
}
if (!strcmp(argv[i], "--num-layers-per-finetuning-step")) {
Expand Down Expand Up @@ -183,7 +183,8 @@ std::vector<Request> make_warmup_requests(int num_inf_request,
finetuning_req.warmup = true;
finetuning_req.peft_model_id =
(peft_model_id != nullptr) ? *peft_model_id : PEFTModelID::NO_ID;
finetuning_req.peft_finetuning_info.max_training_steps = num_finetuning_steps;
finetuning_req.peft_finetuning_info.max_training_epochs =
num_finetuning_steps;
warmup_requests.push_back(finetuning_req);
return warmup_requests;
}
Expand Down Expand Up @@ -229,7 +230,12 @@ std::vector<Request> load_trace(nlohmann::ordered_json prompt_json,
std::vector<Request> load_requests(std::string prompt_file_path,
int max_length_if_needed) {
std::ifstream file_handle(prompt_file_path);
assert(!file_handle.good() && "Error opening prompt file!");
if (!file_handle.good()) {
std::cerr << "Error opening prompt file " << prompt_file_path << std::endl;
std::cerr << "Current working directory: "
<< std::filesystem::current_path() << std::endl;
assert(!file_handle.good() && "Error opening prompt file!");
}
nlohmann::ordered_json prompt_json;
try {
prompt_json = nlohmann::ordered_json::parse(file_handle,
Expand All @@ -248,9 +254,9 @@ std::vector<Request> load_requests(std::string prompt_file_path,
std::cerr << "Error: JSON file is null!" << std::endl;
assert(false);
} else if (prompt_json.is_array()) {
return load_prompt_list(prompt_file_path, max_length_if_needed);
return load_prompt_list(prompt_json, max_length_if_needed);
} else if (prompt_json.is_object()) {
return load_trace(prompt_file_path);
return load_trace(prompt_json);
} else {
std::cerr << "JSON is neither an array nor an object!" << std::endl;
assert(false);
Expand All @@ -277,7 +283,7 @@ void FlexFlow::top_level_task(Task const *task,
int max_requests_per_batch = 1;
int max_tokens_per_batch = 128;
int max_sequence_length = 256;
int max_training_steps = 2;
int max_training_epochs = 2;
bool enable_peft_finetuning = true;
int num_layers_per_finetuning_step = -1;
bool run_warmup = false;
Expand All @@ -301,7 +307,7 @@ void FlexFlow::top_level_task(Task const *task,
max_tokens_per_batch,
max_sequence_length,
num_kv_cache_slots,
max_training_steps,
max_training_epochs,
num_layers_per_finetuning_step,
run_warmup);
enable_peft_finetuning = file_paths.dataset_file_path.empty() ? false : true;
Expand Down Expand Up @@ -386,8 +392,7 @@ void FlexFlow::top_level_task(Task const *task,
// load PEFT config
int rank = 16;
LoraOptimizerConfig *optim_config = new LoraSGDOptimizerConfig(0.001f);
std::vector<std::string> target_modules = {
"qkv_proj", "o_proj", "gate_proj", "down_proj", "up_proj"};
std::vector<std::string> target_modules = {"down_proj"};
LoraLinearConfig peft_config_finetuning(file_paths.cache_folder_path,
peft_model_name,
true /*trainable*/,
Expand Down Expand Up @@ -480,20 +485,17 @@ void FlexFlow::top_level_task(Task const *task,
load_requests(file_paths.prompt_file_path, 128);

// Add fine-tuning request

assert(!file_paths.dataset_file_path.empty() &&
"Dataset file path is required for fine-tuning.");
printf("Finetuning request with dataset %s\n",
file_paths.dataset_file_path.c_str());
Request fine_tuning_req;
fine_tuning_req.req_type = RequestType::REQ_FINETUNING;
fine_tuning_req.peft_model_id = (peft_model_id_finetuning != nullptr)
? *peft_model_id_finetuning
: PEFTModelID::NO_ID;
fine_tuning_req.peft_model_id = *peft_model_id_finetuning;
fine_tuning_req.peft_finetuning_info.dataset_filepath =
file_paths.dataset_file_path;
fine_tuning_req.peft_finetuning_info.max_training_steps =
max_training_steps;
fine_tuning_req.peft_finetuning_info.max_training_epochs =
max_training_epochs;
requests.push_back(fine_tuning_req);

std::cout << "----------inference started--------------" << std::endl;
Expand Down
3 changes: 2 additions & 1 deletion inference/models/llama.cc
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,8 @@ void LLAMA::create_llama_model(FFModel &ff,
assert(false && "The number of attention heads is smaller, or it is not "
"divisible by the tensor parallelism degree");
}

std::cout << "Creating llama model with ff.config.enable_peft_finetuning="
<< ff.config.enable_peft_finetuning << std::endl;
assert(llama_config.hidden_size % llama_config.num_attention_heads == 0 &&
"Hidden size not divisible by number of attention heads");
int head_dim = llama_config.hidden_size / llama_config.num_attention_heads;
Expand Down
22 changes: 11 additions & 11 deletions inference/peft/peft.cc
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ void parse_input_args(char **argv,
int &max_tokens_per_batch,
int &max_sequence_length,
int &num_kv_cache_slots,
int &max_training_steps,
int &max_training_epochs,
int &num_layers_per_finetuning_step,
bool &run_warmup) {
for (int i = 1; i < argc; i++) {
Expand Down Expand Up @@ -87,7 +87,7 @@ void parse_input_args(char **argv,
continue;
}
// dataset for finetuning
if (!strcmp(argv[i], "")) {
if (!strcmp(argv[i], "-finetuning-dataset")) {
paths.dataset_file_path = std::string(argv[++i]);
continue;
}
Expand Down Expand Up @@ -144,7 +144,7 @@ void parse_input_args(char **argv,
continue;
}
if (!strcmp(argv[i], "--max-training-steps")) {
max_training_steps = std::stoi(argv[++i]);
max_training_epochs = std::stoi(argv[++i]);
continue;
}
if (!strcmp(argv[i], "--num-layers-per-finetuning-step")) {
Expand Down Expand Up @@ -183,7 +183,8 @@ std::vector<Request> make_warmup_requests(int num_inf_request,
finetuning_req.warmup = true;
finetuning_req.peft_model_id =
(peft_model_id != nullptr) ? *peft_model_id : PEFTModelID::NO_ID;
finetuning_req.peft_finetuning_info.max_training_steps = num_finetuning_steps;
finetuning_req.peft_finetuning_info.max_training_epochs =
num_finetuning_steps;
warmup_requests.push_back(finetuning_req);
return warmup_requests;
}
Expand All @@ -207,7 +208,7 @@ void FlexFlow::top_level_task(Task const *task,
int max_requests_per_batch = 1;
int max_tokens_per_batch = 128;
int max_sequence_length = 256;
int max_training_steps = 2;
int max_training_epochs = 2;
bool enable_peft_finetuning = true;
int num_layers_per_finetuning_step = -1;
bool run_warmup = false;
Expand All @@ -231,7 +232,7 @@ void FlexFlow::top_level_task(Task const *task,
max_tokens_per_batch,
max_sequence_length,
num_kv_cache_slots,
max_training_steps,
max_training_epochs,
num_layers_per_finetuning_step,
run_warmup);

Expand Down Expand Up @@ -357,6 +358,8 @@ void FlexFlow::top_level_task(Task const *task,
rm->set_enable_peft_finetuning(enable_peft_finetuning);

FFModel model(ffconfig, ffconfig.cpu_offload);
model.set_num_kv_cache_pages(compute_num_kv_cache_pages_needed(
max_sequence_length, max_requests_per_batch, false));
if (model_type == ModelType::LLAMA) {
LLAMA::create_llama_model(model,
config_filepath,
Expand Down Expand Up @@ -394,9 +397,6 @@ void FlexFlow::top_level_task(Task const *task,
assert(false && "unknow model type");
}

model.set_num_kv_cache_pages(compute_num_kv_cache_pages_needed(
max_sequence_length, max_requests_per_batch, false));

rm->set_num_transformer_layers(model.current_transformer_layer_id + 1);
if (num_layers_per_finetuning_step > 0) {
rm->set_num_layers_per_finetuning_step(num_layers_per_finetuning_step);
Expand Down Expand Up @@ -464,8 +464,8 @@ void FlexFlow::top_level_task(Task const *task,
: PEFTModelID::NO_ID;
fine_tuning_req.peft_finetuning_info.dataset_filepath =
file_paths.dataset_file_path;
fine_tuning_req.peft_finetuning_info.max_training_steps =
max_training_steps;
fine_tuning_req.peft_finetuning_info.max_training_epochs =
max_training_epochs;
requests.push_back(fine_tuning_req);
}
std::vector<GenerationResult> result = model.generate(requests);
Expand Down
2 changes: 1 addition & 1 deletion inference/python/ff_peft.py
Original file line number Diff line number Diff line change
Expand Up @@ -165,7 +165,7 @@ def main():
ff.RequestType.REQ_FINETUNING,
peft_model_id=llm.get_ff_peft_id(lora_finetuning_config),
dataset_filepath=configs.finetuning_dataset,
max_training_steps=2,
max_training_epochs=2,
)
requests.append(finetuning_request)

Expand Down
8 changes: 4 additions & 4 deletions inference/python/peft_demo/demo.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@
" \"max_requests_per_batch\": 1,\n",
" \"max_sequence_length\": 128,\n",
" \"max_tokens_per_batch\": 128,\n",
" \"max_training_steps\": 100,\n",
" \"max_training_epochs\": 100,\n",
" \"seed\": 42,\n",
"}\n",
"model_configs = {\n",
Expand Down Expand Up @@ -1082,7 +1082,7 @@
" max_sequence_length=configs.max_sequence_length,\n",
" peft_model_id=llm.get_ff_peft_id(lora_finetuning_config),\n",
" dataset_filepath=os.path.join(os.getcwd(), configs.finetuning_dataset),\n",
" max_training_steps=configs.max_training_steps,\n",
" max_training_epochs=configs.max_training_epochs,\n",
")\n",
"ft_res = llm.generate([finetuning_request])"
]
Expand All @@ -1104,7 +1104,7 @@
}
],
"source": [
"epochs = list(range(configs_dict[\"max_training_steps\"]))\n",
"epochs = list(range(configs_dict[\"max_training_epochs\"]))\n",
"loss_values = ft_res[0].finetuning_losses\n",
"\n",
"plt.figure(figsize=(10, 6))\n",
Expand Down Expand Up @@ -1778,7 +1778,7 @@
" \"max_requests_per_batch\": 1,\n",
" \"max_sequence_length\": 128,\n",
" \"max_tokens_per_batch\": 128,\n",
" \"max_training_steps\": 100,\n",
" \"max_training_epochs\": 100,\n",
" \"seed\": 42,\n",
"}\n",
"model_configs = {\n",
Expand Down
6 changes: 3 additions & 3 deletions inference/python/peft_demo/demo.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ def create_datasets(finetune_dataset_size=2, inference_file_path='inference_data
"max_requests_per_batch": 1,
"max_sequence_length": 128,
"max_tokens_per_batch": 128,
"max_training_steps": 100,
"max_training_epochs": 100,
"seed": 42,
}
model_configs = {
Expand Down Expand Up @@ -185,7 +185,7 @@ def create_datasets(finetune_dataset_size=2, inference_file_path='inference_data
max_sequence_length=configs.max_sequence_length,
peft_model_id=llm.get_ff_peft_id(lora_finetuning_config),
dataset_filepath=os.path.join(os.getcwd(), configs.finetuning_dataset),
max_training_steps=configs.max_training_steps,
max_training_epochs=configs.max_training_epochs,
)
ft_res = llm.generate([finetuning_request])
for res in ft_res:
Expand Down Expand Up @@ -231,7 +231,7 @@ def create_datasets(finetune_dataset_size=2, inference_file_path='inference_data
print("==Inference result after finetuning: ", inf_req_res_2[0].output_text)


epochs = list(range(configs_dict["max_training_steps"]))
epochs = list(range(configs_dict["max_training_epochs"]))
loss_values = ft_res[0].finetuning_losses

plt.figure(figsize=(10, 6))
Expand Down
4 changes: 2 additions & 2 deletions python/flexflow/core/flexflow_cffi.py
Original file line number Diff line number Diff line change
Expand Up @@ -2136,7 +2136,7 @@ class Request:
add_special_tokens: bool = True
peft_model_id: Optional[PEFTModelID] = None
dataset_filepath: Optional[str] = None
max_training_steps: int = 1
max_training_epochs: int = 1


# -----------------------------------------------------------------------
Expand Down Expand Up @@ -4492,7 +4492,7 @@ def generate(self, requests_list: List[Request]):
dataset_filepaths = [
get_c_name(request.dataset_filepath) for request in requests_list
]
training_steps = [request.max_training_steps for request in requests_list]
training_steps = [request.max_training_epochs for request in requests_list]
num_finetuning_losses = ffi.new("int *")
# c_finetuning_losses = ffi.new("float**")
# TODO: set this value automatically
Expand Down
2 changes: 1 addition & 1 deletion src/c/flexflow_c.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1604,7 +1604,7 @@ void flexflow_model_generate(flexflow_model_t handle_,
}
std::string const dataset_fp(dataset_filepaths[i]);
fine_tuning_req.peft_finetuning_info.dataset_filepath = dataset_fp;
fine_tuning_req.peft_finetuning_info.max_training_steps =
fine_tuning_req.peft_finetuning_info.max_training_epochs =
training_steps[i];
requests.push_back(fine_tuning_req);
DEBUG_PRINT("[Model] finetune[%d] %p %s %i %i %i %i",
Expand Down
2 changes: 1 addition & 1 deletion src/ops/inc_multihead_self_attention.cu
Original file line number Diff line number Diff line change
Expand Up @@ -1388,7 +1388,7 @@ void peft_bwd_kernel(IncMultiHeadSelfAttentionMeta const *m,
assert(num_tokens == num_total_tokens);
assert(num_total_tokens == bc->requestsInfo[i].max_length);
assert(m->qProjSize == m->kProjSize && m->kProjSize == m->vProjSize);
assert(bc->requestsInfo[i].first_token_offset_in_batch == 0);
// assert(bc->requestsInfo[i].first_token_offset_in_batch == 0);
if (m->inference_debugging) {
// save result to file for checking
Expand Down
2 changes: 1 addition & 1 deletion src/ops/kernels/residual_rms_norm_kernels.cu
Original file line number Diff line number Diff line change
Expand Up @@ -429,7 +429,7 @@ void peft_bwd_kernel(ResidualRMSNormMeta const *m,
bc->peft_bwd_applies_to_this_layer(m->layer_guid.transformer_layer_id));
int i = bc->finetuning_request_index();

int M = bc->requestsInfo[i].num_tokens_in_batch;
int M = bc->num_finetuning_bwd_tokens();
int N = m->in_dim;

T const *residual_output_rms_input_ptr =
Expand Down
4 changes: 2 additions & 2 deletions src/ops/kernels/rms_norm_kernels.cu
Original file line number Diff line number Diff line change
Expand Up @@ -448,8 +448,8 @@ void peft_bwd_kernel(RMSNormMeta const *m,
bc->peft_bwd_applies_to_this_layer(m->layer_guid.transformer_layer_id));
int i = bc->finetuning_request_index();

int M = bc->requestsInfo[i].num_tokens_in_batch;
int N = m->num_elements;
int M = bc->num_finetuning_bwd_tokens();
int N = m->in_dim;
ComputeInternalGradientsCUDAKernel<T>
<<<M, std::min(N, CUDA_NUM_THREADS), 0, stream>>>(
N,
Expand Down
Loading

0 comments on commit 0a889ae

Please sign in to comment.