linting

flexflow · Feb 21, 2025 · ef098b9 · ef098b9
1 parent 536e450
commit ef098b9
Show file tree

Hide file tree

Showing 24 changed files with 252 additions and 244 deletions.
diff --git a/include/flexflow/flexflow_c.h b/include/flexflow/flexflow_c.h
@@ -94,7 +94,8 @@ int flexflow_config_get_pipeline_parallelism_degree(flexflow_config_t handle_);
 bool flexflow_config_get_enable_peft(flexflow_config_t handle_);
 
 bool flexflow_config_get_enable_peft_finetuning(flexflow_config_t handle_);
-void flexflow_config_set_enable_peft_finetuning(flexflow_config_t handle_, bool value);
+void flexflow_config_set_enable_peft_finetuning(flexflow_config_t handle_,
+                                                bool value);
 
 void flexflow_config_set_data_parallelism_degree(flexflow_config_t handle_,
                                                  int value);

diff --git a/include/flexflow/ops/kernels/inc_multihead_self_attention_kernels.h b/include/flexflow/ops/kernels/inc_multihead_self_attention_kernels.h
@@ -27,10 +27,10 @@ void compute_attention_kernel_generation(IncMultiHeadSelfAttentionMeta const *m,
 
 template <typename DT>
 void apply_scaling_and_rotary(IncMultiHeadSelfAttentionMeta const *m,
-                            BatchConfig const *bc,
-                            int shard_id,
-                            DT *output_ptr,
-                            ffStream_t stream);
+                              BatchConfig const *bc,
+                              int shard_id,
+                              DT *output_ptr,
+                              ffStream_t stream);
 
 template <typename DT>
 __global__ void apply_position_bias_qkprd(DT *input_ptr,

diff --git a/include/flexflow/ops/kernels/softmax_kernels.h b/include/flexflow/ops/kernels/softmax_kernels.h
@@ -1,8 +1,8 @@
 #ifndef _FLEXFLOW_OPS_KERNELS_SOFTMAX_KERNELS_H
 #define _FLEXFLOW_OPS_KERNELS_SOFTMAX_KERNELS_H
 
-#include "flexflow/ffconst_utils.h"
 #include "flexflow/device.h"
+#include "flexflow/ffconst_utils.h"
 #include "flexflow/fftype.h"
 #include "flexflow/op_meta.h"
 #include "flexflow/ops/softmax.h"
@@ -85,8 +85,8 @@ template <typename DT>
 void store_peft_activations(SoftmaxMeta *m,
                             BatchConfig const *bc,
                             int num_classes,
-                          DT *output_ptr,
-                          cudaStream_t stream);
+                            DT *output_ptr,
+                            cudaStream_t stream);
 
 } // namespace Internal
 } // namespace Softmax

diff --git a/include/flexflow/request_manager.h b/include/flexflow/request_manager.h
@@ -37,8 +37,10 @@ class InferenceManager {
   static InferenceManager *get_inference_manager();
   void compile_model_and_allocate_buffer(FFModel *model);
   void init_operators_inference(FFModel *model);
-  InferenceResultFuture inference(FFModel *model, int index, BatchConfig const &bc);
-  InferenceResultFuture inference(FFModel *model, int index, BatchConfigFuture const &bc);
+  InferenceResultFuture
+      inference(FFModel *model, int index, BatchConfig const &bc);
+  InferenceResultFuture
+      inference(FFModel *model, int index, BatchConfigFuture const &bc);
   std::vector<FinetuningBwdFuture>
       peft_bwd(FFModel *model, int index, BatchConfigFuture const &bc);
   void load_input_tokens_from_batch_config(FFModel *model,
@@ -374,7 +376,7 @@ class RequestManager {
       std::vector<Legion::PhysicalRegion> const &regions,
       Legion::Context ctx,
       Legion::Runtime *runtime);
-  
+
   int run_idx = 0;
 
 private:

diff --git a/include/flexflow/utils/peft_weight_allocator.h b/include/flexflow/utils/peft_weight_allocator.h
@@ -81,7 +81,7 @@ class PEFTMemoryManager {
         max_peft_tokens(max_peft_tokens_),
         lora_layername_substr(lora_layername_substr_), dt(dt_),
         base_ptr(nullptr), finetuning_ptr(nullptr),
-        finetuning_model_id(PEFTModelID::NO_ID), log_instance_creation(false)  {
+        finetuning_model_id(PEFTModelID::NO_ID), log_instance_creation(false) {
     max_lora_size =
         data_type_size(dt) * (max_rank * in_dim + max_rank * out_dim);
     assert(max_concurrent_adapters > 0 &&

diff --git a/inference/peft/peft.cc b/inference/peft/peft.cc
@@ -389,12 +389,15 @@ void FlexFlow::top_level_task(Task const *task,
     peft_model_id = model.register_peft_adapter(peft_config);
   }
   if (enable_peft_finetuning) {
-    peft_model_id_finetuning = model.register_peft_adapter(peft_config_finetuning);
+    peft_model_id_finetuning =
+        model.register_peft_adapter(peft_config_finetuning);
   }
 
   if (run_warmup) {
-    std::vector<Request> warmup_requests = make_warmup_requests(10, 1000, peft_model_id_finetuning);
-    std::vector<GenerationResult> warmup_result = model.generate(warmup_requests);
+    std::vector<Request> warmup_requests =
+        make_warmup_requests(10, 1000, peft_model_id_finetuning);
+    std::vector<GenerationResult> warmup_result =
+        model.generate(warmup_requests);
     rm->set_inference_finished(false); // reset inference finished flag
     std::cout << "----------warmup finished--------------" << std::endl;
   }
@@ -437,8 +440,10 @@ void FlexFlow::top_level_task(Task const *task,
       fine_tuning_req.peft_model_id = (peft_model_id_finetuning != nullptr)
                                           ? *peft_model_id_finetuning
                                           : PEFTModelID::NO_ID;
-      fine_tuning_req.peft_finetuning_info.dataset_filepath = file_paths.dataset_file_path;
-      fine_tuning_req.peft_finetuning_info.max_training_steps = max_training_steps;
+      fine_tuning_req.peft_finetuning_info.dataset_filepath =
+          file_paths.dataset_file_path;
+      fine_tuning_req.peft_finetuning_info.max_training_steps =
+          max_training_steps;
       requests.push_back(fine_tuning_req);
     }
     std::vector<GenerationResult> result = model.generate(requests);
@@ -456,10 +461,12 @@ void FlexFlow::top_level_task(Task const *task,
   if (!file_paths.profiling_folder_path.empty()) {
     std::cout << "Saving profiling info..." << std::endl;
     std::string dataset_name;
-    // set dataset name to "wildchat" if the prompt file path contains "wildchat" 
+    // set dataset name to "wildchat" if the prompt file path contains
+    // "wildchat"
     if (file_paths.prompt_file_path.find("wildchat") != std::string::npos) {
       dataset_name = "wildchat";
-    } else if (file_paths.prompt_file_path.find("sharegpt") != std::string::npos) {
+    } else if (file_paths.prompt_file_path.find("sharegpt") !=
+               std::string::npos) {
       dataset_name = "sharegpt";
     } else {
       dataset_name = "unknown";

diff --git a/src/c/flexflow_c.cc b/src/c/flexflow_c.cc
@@ -181,7 +181,8 @@ bool flexflow_config_get_enable_peft_finetuning(flexflow_config_t handle_) {
   FFConfig *handle = FFCObjectWrapper::unwrap(handle_);
   return handle->enable_peft_finetuning;
 }
-void flexflow_config_set_enable_peft_finetuning(flexflow_config_t handle_, bool value) {
+void flexflow_config_set_enable_peft_finetuning(flexflow_config_t handle_,
+                                                bool value) {
   FFConfig *handle = FFCObjectWrapper::unwrap(handle_);
   handle->enable_peft_finetuning = value;
 }
@@ -2805,15 +2806,15 @@ void flexflow_request_manager_set_num_transformers_layers(
   RequestManager *handle = FFCObjectWrapper::unwrap(handle_);
   handle->set_num_transformer_layers(num_transformers_layers_);
   DEBUG_PRINT("[RequestManager] set num_transformers_layers %d",
-    num_transformers_layers_);
+              num_transformers_layers_);
 }
 
 void flexflow_request_manager_set_num_layers_per_finetuning_step(
     flexflow_request_manager_t handle_, int num_layers_per_finetuning_step_) {
   RequestManager *handle = FFCObjectWrapper::unwrap(handle_);
   handle->set_num_layers_per_finetuning_step(num_layers_per_finetuning_step_);
   DEBUG_PRINT("[RequestManager] set num layers per finetuning step %d",
-    num_layers_per_finetuning_step_);
+              num_layers_per_finetuning_step_);
 }
 
 void flexflow_request_manager_register_tokenizer(

diff --git a/src/ops/add_bias_residual_layer_norm.cpp b/src/ops/add_bias_residual_layer_norm.cpp
@@ -41,10 +41,9 @@ AddBiasResidualLayerNormMeta::AddBiasResidualLayerNormMeta(
   DataType data_type = ln->data_type;
   size_t in_dim = ln->inputs[0]->dims[0].size / ln->inputs[0]->dims[0].degree;
   allocated_peft_buffer_size =
-      enable_peft_finetuning
-          ? (data_type_size(data_type) *
-             BatchConfig::max_sequence_length() * in_dim)
-          : 0;
+      enable_peft_finetuning ? (data_type_size(data_type) *
+                                BatchConfig::max_sequence_length() * in_dim)
+                             : 0;
   size_t totalSize = effective_batch_size * data_type_size(data_type) * 3 +
                      allocated_peft_buffer_size;
   gpu_mem_allocator.create_legion_instance(

diff --git a/src/ops/add_bias_residual_layer_norm.cu b/src/ops/add_bias_residual_layer_norm.cu
@@ -40,10 +40,9 @@ AddBiasResidualLayerNormMeta::AddBiasResidualLayerNormMeta(
   DataType data_type = ln->data_type;
   size_t in_dim = ln->inputs[0]->dims[0].size / ln->inputs[0]->dims[0].degree;
   allocated_peft_buffer_size =
-      enable_peft_finetuning
-          ? (data_type_size(data_type) *
-             BatchConfig::max_sequence_length() * in_dim)
-          : 0;
+      enable_peft_finetuning ? (data_type_size(data_type) *
+                                BatchConfig::max_sequence_length() * in_dim)
+                             : 0;
   size_t totalSize = effective_batch_size * data_type_size(data_type) * 3 +
                      allocated_peft_buffer_size;
 

diff --git a/src/ops/inc_multihead_self_attention.cpp b/src/ops/inc_multihead_self_attention.cpp
@@ -827,10 +827,10 @@ __global__ void
 
 template <typename DT>
 void apply_scaling_and_rotary(IncMultiHeadSelfAttentionMeta const *m,
-                        BatchConfig const *bc,
-                        int shard_id,
-                        DT *output_ptr,
-                        hipStream_t stream) {
+                              BatchConfig const *bc,
+                              int shard_id,
+                              DT *output_ptr,
+                              hipStream_t stream) {
 
   checkCUDA(hipblasSetStream(m->handle.blas, stream));
   checkCUDNN(miopenSetStream(m->handle.dnn, stream));