Skip to content

Commit

Permalink
linting
Browse files Browse the repository at this point in the history
  • Loading branch information
goliaro committed Feb 21, 2025
1 parent 536e450 commit ef098b9
Show file tree
Hide file tree
Showing 24 changed files with 252 additions and 244 deletions.
3 changes: 2 additions & 1 deletion include/flexflow/flexflow_c.h
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,8 @@ int flexflow_config_get_pipeline_parallelism_degree(flexflow_config_t handle_);
bool flexflow_config_get_enable_peft(flexflow_config_t handle_);

bool flexflow_config_get_enable_peft_finetuning(flexflow_config_t handle_);
void flexflow_config_set_enable_peft_finetuning(flexflow_config_t handle_, bool value);
void flexflow_config_set_enable_peft_finetuning(flexflow_config_t handle_,
bool value);

void flexflow_config_set_data_parallelism_degree(flexflow_config_t handle_,
int value);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,10 +27,10 @@ void compute_attention_kernel_generation(IncMultiHeadSelfAttentionMeta const *m,

template <typename DT>
void apply_scaling_and_rotary(IncMultiHeadSelfAttentionMeta const *m,
BatchConfig const *bc,
int shard_id,
DT *output_ptr,
ffStream_t stream);
BatchConfig const *bc,
int shard_id,
DT *output_ptr,
ffStream_t stream);

template <typename DT>
__global__ void apply_position_bias_qkprd(DT *input_ptr,
Expand Down
6 changes: 3 additions & 3 deletions include/flexflow/ops/kernels/softmax_kernels.h
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
#ifndef _FLEXFLOW_OPS_KERNELS_SOFTMAX_KERNELS_H
#define _FLEXFLOW_OPS_KERNELS_SOFTMAX_KERNELS_H

#include "flexflow/ffconst_utils.h"
#include "flexflow/device.h"
#include "flexflow/ffconst_utils.h"
#include "flexflow/fftype.h"
#include "flexflow/op_meta.h"
#include "flexflow/ops/softmax.h"
Expand Down Expand Up @@ -85,8 +85,8 @@ template <typename DT>
void store_peft_activations(SoftmaxMeta *m,
BatchConfig const *bc,
int num_classes,
DT *output_ptr,
cudaStream_t stream);
DT *output_ptr,
cudaStream_t stream);

} // namespace Internal
} // namespace Softmax
Expand Down
8 changes: 5 additions & 3 deletions include/flexflow/request_manager.h
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,10 @@ class InferenceManager {
static InferenceManager *get_inference_manager();
void compile_model_and_allocate_buffer(FFModel *model);
void init_operators_inference(FFModel *model);
InferenceResultFuture inference(FFModel *model, int index, BatchConfig const &bc);
InferenceResultFuture inference(FFModel *model, int index, BatchConfigFuture const &bc);
InferenceResultFuture
inference(FFModel *model, int index, BatchConfig const &bc);
InferenceResultFuture
inference(FFModel *model, int index, BatchConfigFuture const &bc);
std::vector<FinetuningBwdFuture>
peft_bwd(FFModel *model, int index, BatchConfigFuture const &bc);
void load_input_tokens_from_batch_config(FFModel *model,
Expand Down Expand Up @@ -374,7 +376,7 @@ class RequestManager {
std::vector<Legion::PhysicalRegion> const &regions,
Legion::Context ctx,
Legion::Runtime *runtime);

int run_idx = 0;

private:
Expand Down
2 changes: 1 addition & 1 deletion include/flexflow/utils/peft_weight_allocator.h
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ class PEFTMemoryManager {
max_peft_tokens(max_peft_tokens_),
lora_layername_substr(lora_layername_substr_), dt(dt_),
base_ptr(nullptr), finetuning_ptr(nullptr),
finetuning_model_id(PEFTModelID::NO_ID), log_instance_creation(false) {
finetuning_model_id(PEFTModelID::NO_ID), log_instance_creation(false) {
max_lora_size =
data_type_size(dt) * (max_rank * in_dim + max_rank * out_dim);
assert(max_concurrent_adapters > 0 &&
Expand Down
21 changes: 14 additions & 7 deletions inference/peft/peft.cc
Original file line number Diff line number Diff line change
Expand Up @@ -389,12 +389,15 @@ void FlexFlow::top_level_task(Task const *task,
peft_model_id = model.register_peft_adapter(peft_config);
}
if (enable_peft_finetuning) {
peft_model_id_finetuning = model.register_peft_adapter(peft_config_finetuning);
peft_model_id_finetuning =
model.register_peft_adapter(peft_config_finetuning);
}

if (run_warmup) {
std::vector<Request> warmup_requests = make_warmup_requests(10, 1000, peft_model_id_finetuning);
std::vector<GenerationResult> warmup_result = model.generate(warmup_requests);
std::vector<Request> warmup_requests =
make_warmup_requests(10, 1000, peft_model_id_finetuning);
std::vector<GenerationResult> warmup_result =
model.generate(warmup_requests);
rm->set_inference_finished(false); // reset inference finished flag
std::cout << "----------warmup finished--------------" << std::endl;
}
Expand Down Expand Up @@ -437,8 +440,10 @@ void FlexFlow::top_level_task(Task const *task,
fine_tuning_req.peft_model_id = (peft_model_id_finetuning != nullptr)
? *peft_model_id_finetuning
: PEFTModelID::NO_ID;
fine_tuning_req.peft_finetuning_info.dataset_filepath = file_paths.dataset_file_path;
fine_tuning_req.peft_finetuning_info.max_training_steps = max_training_steps;
fine_tuning_req.peft_finetuning_info.dataset_filepath =
file_paths.dataset_file_path;
fine_tuning_req.peft_finetuning_info.max_training_steps =
max_training_steps;
requests.push_back(fine_tuning_req);
}
std::vector<GenerationResult> result = model.generate(requests);
Expand All @@ -456,10 +461,12 @@ void FlexFlow::top_level_task(Task const *task,
if (!file_paths.profiling_folder_path.empty()) {
std::cout << "Saving profiling info..." << std::endl;
std::string dataset_name;
// set dataset name to "wildchat" if the prompt file path contains "wildchat"
// set dataset name to "wildchat" if the prompt file path contains
// "wildchat"
if (file_paths.prompt_file_path.find("wildchat") != std::string::npos) {
dataset_name = "wildchat";
} else if (file_paths.prompt_file_path.find("sharegpt") != std::string::npos) {
} else if (file_paths.prompt_file_path.find("sharegpt") !=
std::string::npos) {
dataset_name = "sharegpt";
} else {
dataset_name = "unknown";
Expand Down
7 changes: 4 additions & 3 deletions src/c/flexflow_c.cc
Original file line number Diff line number Diff line change
Expand Up @@ -181,7 +181,8 @@ bool flexflow_config_get_enable_peft_finetuning(flexflow_config_t handle_) {
FFConfig *handle = FFCObjectWrapper::unwrap(handle_);
return handle->enable_peft_finetuning;
}
void flexflow_config_set_enable_peft_finetuning(flexflow_config_t handle_, bool value) {
void flexflow_config_set_enable_peft_finetuning(flexflow_config_t handle_,
bool value) {
FFConfig *handle = FFCObjectWrapper::unwrap(handle_);
handle->enable_peft_finetuning = value;
}
Expand Down Expand Up @@ -2805,15 +2806,15 @@ void flexflow_request_manager_set_num_transformers_layers(
RequestManager *handle = FFCObjectWrapper::unwrap(handle_);
handle->set_num_transformer_layers(num_transformers_layers_);
DEBUG_PRINT("[RequestManager] set num_transformers_layers %d",
num_transformers_layers_);
num_transformers_layers_);
}

void flexflow_request_manager_set_num_layers_per_finetuning_step(
flexflow_request_manager_t handle_, int num_layers_per_finetuning_step_) {
RequestManager *handle = FFCObjectWrapper::unwrap(handle_);
handle->set_num_layers_per_finetuning_step(num_layers_per_finetuning_step_);
DEBUG_PRINT("[RequestManager] set num layers per finetuning step %d",
num_layers_per_finetuning_step_);
num_layers_per_finetuning_step_);
}

void flexflow_request_manager_register_tokenizer(
Expand Down
7 changes: 3 additions & 4 deletions src/ops/add_bias_residual_layer_norm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -41,10 +41,9 @@ AddBiasResidualLayerNormMeta::AddBiasResidualLayerNormMeta(
DataType data_type = ln->data_type;
size_t in_dim = ln->inputs[0]->dims[0].size / ln->inputs[0]->dims[0].degree;
allocated_peft_buffer_size =
enable_peft_finetuning
? (data_type_size(data_type) *
BatchConfig::max_sequence_length() * in_dim)
: 0;
enable_peft_finetuning ? (data_type_size(data_type) *
BatchConfig::max_sequence_length() * in_dim)
: 0;
size_t totalSize = effective_batch_size * data_type_size(data_type) * 3 +
allocated_peft_buffer_size;
gpu_mem_allocator.create_legion_instance(
Expand Down
7 changes: 3 additions & 4 deletions src/ops/add_bias_residual_layer_norm.cu
Original file line number Diff line number Diff line change
Expand Up @@ -40,10 +40,9 @@ AddBiasResidualLayerNormMeta::AddBiasResidualLayerNormMeta(
DataType data_type = ln->data_type;
size_t in_dim = ln->inputs[0]->dims[0].size / ln->inputs[0]->dims[0].degree;
allocated_peft_buffer_size =
enable_peft_finetuning
? (data_type_size(data_type) *
BatchConfig::max_sequence_length() * in_dim)
: 0;
enable_peft_finetuning ? (data_type_size(data_type) *
BatchConfig::max_sequence_length() * in_dim)
: 0;
size_t totalSize = effective_batch_size * data_type_size(data_type) * 3 +
allocated_peft_buffer_size;

Expand Down
8 changes: 4 additions & 4 deletions src/ops/inc_multihead_self_attention.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -827,10 +827,10 @@ __global__ void

template <typename DT>
void apply_scaling_and_rotary(IncMultiHeadSelfAttentionMeta const *m,
BatchConfig const *bc,
int shard_id,
DT *output_ptr,
hipStream_t stream) {
BatchConfig const *bc,
int shard_id,
DT *output_ptr,
hipStream_t stream) {

checkCUDA(hipblasSetStream(m->handle.blas, stream));
checkCUDNN(miopenSetStream(m->handle.dnn, stream));
Expand Down
Loading

0 comments on commit ef098b9

Please sign in to comment.