lora configs serialize / deserialize into single file

flexflow · Oct 2, 2024 · f3de642 · f3de642
1 parent 8d4641c
commit f3de642
Show file tree

Hide file tree

Showing 5 changed files with 114 additions and 63 deletions.
diff --git a/include/flexflow/batch_config.h b/include/flexflow/batch_config.h
@@ -44,6 +44,11 @@ struct OptimizerTasks {
   bool save_updated_weights = false;
 };
 
+struct NewPeftModelPath {
+  PEFTModelID peft_model_id;
+  std::string filepath;
+};
+
 void set_optimizer_tasks(OptimizerTasks &tasks,
                          int max_training_steps,
                          int completed_training_steps,
@@ -135,6 +140,7 @@ class BatchConfig {
   PerRequestInfo requestsInfo[MAX_NUM_REQUESTS];
   PerTokenInfo tokensInfo[MAX_NUM_TOKENS];
   PerTokenInfo labelsInfo[MAX_NUM_TOKENS];
+  NewPeftModelPath new_peft_model_paths[MAX_NUM_REQUESTS];
 
   bool request_completed[MAX_NUM_REQUESTS];
   bool request_running[MAX_NUM_REQUESTS];

diff --git a/include/flexflow/ops/lora_linear_params.h b/include/flexflow/ops/lora_linear_params.h
@@ -17,7 +17,10 @@ namespace FlexFlow {
 class LoraOptimizerConfig {
 public:
   LoraOptimizerConfig();
-  virtual ~LoraOptimizerConfig() {}
+  virtual std::string getType() const = 0;
+  virtual nlohmann::json toJson() const = 0;
+  static std::unique_ptr<LoraOptimizerConfig> fromJson(const nlohmann::json& j);
+  virtual ~LoraOptimizerConfig() = default;
 };
 
 class LoraSGDOptimizerConfig : public LoraOptimizerConfig {
@@ -29,9 +32,25 @@ class LoraSGDOptimizerConfig : public LoraOptimizerConfig {
                          bool weight_decay_ = 0.0f);
   friend std::ostream &operator<<(std::ostream &os,
                                   LoraSGDOptimizerConfig const &llc);
-
-  NLOHMANN_DEFINE_TYPE_INTRUSIVE(
-      LoraSGDOptimizerConfig, lr, momentum, nesterov, weight_decay)
+
+  std::string getType() const override { return "SGD"; }  
+
+  nlohmann::json toJson() const override {
+    return {{"type", "SGD"},
+            {"lr", lr},
+            {"momentum", momentum},
+            {"nesterov", nesterov},
+            {"weight_decay", weight_decay}};
+  }
+
+  static std::unique_ptr<LoraSGDOptimizerConfig> fromJson(const nlohmann::json& j) {
+    auto sgd = std::make_unique<LoraSGDOptimizerConfig>();
+    sgd->lr = j["lr"];
+    sgd->momentum = j["momentum"];
+    sgd->nesterov = j["nesterov"];
+    sgd->weight_decay = j["weight_decay"];
+    return sgd;
+  }
 
 public:
   double lr = 0.001f;
@@ -50,9 +69,27 @@ class LoraAdamOptimizerConfig : public LoraOptimizerConfig {
                           double epsilon_ = 1e-8);
   friend std::ostream &operator<<(std::ostream &os,
                                   LoraAdamOptimizerConfig const &llc);
-
-  NLOHMANN_DEFINE_TYPE_INTRUSIVE(
-      LoraAdamOptimizerConfig, alpha, beta1, beta2, weight_decay, epsilon)
+
+  std::string getType() const override { return "Adam"; }  
+
+  nlohmann::json toJson() const override {
+    return {{"type", "Adam"},
+            {"alpha", alpha},
+            {"beta1", beta1},
+            {"beta2", beta2},
+            {"weight_decay", weight_decay},
+            {"epsilon", epsilon}};
+  }
+
+  static std::unique_ptr<LoraAdamOptimizerConfig> fromJson(const nlohmann::json& j) {
+    auto adam = std::make_unique<LoraAdamOptimizerConfig>();
+    adam->alpha = j["alpha"];
+    adam->beta1 = j["beta1"];
+    adam->beta2 = j["beta2"];
+    adam->weight_decay = j["weight_decay"];
+    adam->epsilon = j["epsilon"];
+    return adam;
+  }
 
 public:
   // Adam
@@ -63,13 +100,13 @@ class LoraAdamOptimizerConfig : public LoraOptimizerConfig {
   double epsilon = 1e-8;
 };
 
-// Serialization helpers
-template <typename T>
-void serialize_to_json_file(T const &obj, fs::path const &filepath);
+std::unique_ptr<LoraOptimizerConfig> LoraOptimizerConfig::fromJson(const nlohmann::json& j) {
+  std::string type = j["type"];
+  if (type == "SGD") return LoraSGDOptimizerConfig::fromJson(j);
+  if (type == "Adam") return LoraAdamOptimizerConfig::fromJson(j);
+  throw std::runtime_error("Unknown optimizer type");
+}
 
-// Function to deserialize JSON from file and create object
-template <typename T>
-std::unique_ptr<T> deserialize_from_json_file(fs::path const &filepath);
 
 class LoraLinearConfig {
 public:
@@ -87,22 +124,54 @@ class LoraLinearConfig {
                    std::vector<std::string> const &target_modules_ = {});
   // constructor used to support std::unordered_map
   LoraLinearConfig();
+  template<typename T>
+    void setOptimizer(T&& opt) {
+        optimizer_config = std::make_unique<T>(std::forward<T>(opt));
+    }
   friend bool operator==(LoraLinearConfig const &lhs,
                          LoraLinearConfig const &rhs);
   friend std::ostream &operator<<(std::ostream &os,
                                   LoraLinearConfig const &llc);
-
-  NLOHMANN_DEFINE_TYPE_INTRUSIVE(LoraLinearConfig,
-                                 cache_folder,
-                                 peft_model_id,
-                                 rank,
-                                 lora_alpha,
-                                 lora_dropout,
-                                 target_modules,
-                                 trainable,
-                                 init_lora_weights,
-                                 base_model_name_or_path,
-                                 precision)
+  void serialize_to_json_file(const std::string& filename) const {
+    json j = {
+        {"cache_folder", cache_folder},
+        {"peft_model_id", peft_model_id},
+        {"rank", rank},
+        {"lora_alpha", lora_alpha},
+        {"lora_dropout", lora_dropout},
+        {"target_modules", target_modules},
+        {"trainable", trainable},
+        {"init_lora_weights", init_lora_weights},
+        {"base_model_name_or_path", base_model_name_or_path},
+        {"precision", precision},
+        {"optimizer_config", optimizer_config ? optimizer_config->toJson() : nullptr}
+    };
+
+    std::ofstream file(filename);
+    file << j.dump(4);  // Use 4 spaces for indentation
+  }
+  // Deserialization method
+  static LoraLinearConfig deserialize_from_json_file(const std::string& filename) {
+    std::ifstream file(filename);
+    json j;
+    file >> j;
+    LoraLinearConfig metadata(
+      j["cache_folder"].get<std::string>(),
+      j["peft_model_id"].get<std::vector<int>>(),
+      j["rank"].get<std::string>(),
+      j["lora_alpha"].get<std::string>(),
+      j["lora_dropout"].get<std::string>(),
+      j["target_modules"].get<std::vector<std::string>>(),
+      j["trainable"].get<bool>(),
+      j["init_lora_weights"].get<bool>(),
+      j["base_model_name_or_path"].get<std::string>(),
+      j["precision"].get<std::string>()
+    );
+    if (!j["optimizer_config"].is_null()) {
+      metadata.optimizer_config = LoraOptimizerConfig::fromJson(j["optimizer_config"]);
+    }
+    return metadata;
+  }
 
   std::string cache_folder;
   // Huggingface model ID (for download and/or upload)
@@ -116,7 +185,8 @@ class LoraLinearConfig {
   // whether the weights are trainable (fine-tuning scenario) or not
   // (inference-only). If set to true, allocate space for the gradients
   bool trainable = false;
-  LoraOptimizerConfig *optimizer_config;
+  // LoraOptimizerConfig *optimizer_config;
+  std::unique_ptr<LoraOptimizerConfig> optimizer_config;
   // whether to initialize weights randomly (instead of attempting to load them
   // from file)
   bool init_lora_weights;

diff --git a/include/flexflow/utils/peft_weight_allocator.h b/include/flexflow/utils/peft_weight_allocator.h
@@ -95,7 +95,7 @@ class PEFTMemoryManager {
   : max_concurrent_adapters(max_concurrent_adapters_), max_lora_size(max_lora_size_), base_ptr(nullptr) {}
 
   // allocate memory for all the PEFT adapters for a given layer on a given shard
-  void allocate_memory(Memory gpu_mem) {
+  void allocate_inference_memory(Memory gpu_mem) {
     // allocate chunk of memory for all the PEFT adapters
     Realm::Rect<1, coord_t> bounds(
         Realm::Point<1, coord_t>(0),
@@ -111,6 +111,9 @@ class PEFTMemoryManager {
         .wait();
     base_ptr = peftLegionInst.pointer_untyped(0, sizeof(char));
   }
+  void allocate_finetuning_memory(Memory gpu_mem) {
+
+  }
 
   // Returns the slot in memory where the peft model weights are/will be stored. 
   // If the model is not in memory (cache miss), set the cache_miss flag to true.
@@ -160,7 +163,7 @@ class PEFTMemoryManager {
   int max_concurrent_adapters;
   size_t max_lora_size;
   Realm::RegionInstance peftLegionInst;
-  void *base_ptr;
+  void *base_ptr; void *finetuning_ptr;
   std::unordered_map<PEFTModelID, int> lru_hashtable;
   std::vector<PEFTModelID> lru_list; // head = least recently used, tail=most recently used
   std::unordered_map<PEFTModelID, int> peft2mem_slot;

diff --git a/src/ops/lora_linear.cc b/src/ops/lora_linear.cc
@@ -519,12 +519,17 @@ OpMeta *LoraLinear::init_task(Task const *task,
   std::string lora_layername_substr =
       lora_layername.substr(0, found + searchString.length());
 
+  // allocate space for lora weights
   size_t max_lora_size = data_type_size(dt) * (lora->max_rank * in_dim + lora->max_rank * out_dim);
   m->peft_memory_manager = new PEFTMemoryManager(max_lora_size, lora->max_concurrent_adapters);
   Memory gpu_mem = get_proc_mem(Machine::get_machine(), task->target_proc);
-  m->peft_memory_manager->allocate_memory(gpu_mem);
+  m->peft_memory_manager->allocate_inference_memory(gpu_mem);
 
-  for (auto const &kv : lora->peft_configs) {
+  return m;
+}
+
+void load_peft_adapters(BatchConfig const *bc){
+  for (auto const &kv : bc->peft_configs) {
     PEFTModelID const &model_id = kv.first;
     LoraLinearConfig const &lora_config = kv.second;
 
@@ -680,7 +685,6 @@ OpMeta *LoraLinear::init_task(Task const *task,
     m->model_state[model_id].cache_folder = lora_config.cache_folder;
     m->model_state[model_id].peft_model_id = lora_config.peft_model_id;
   }
-  return m;
 }
 
 void LoraLinear::forward(FFModel const &ff) {

diff --git a/src/ops/lora_linear_params.cc b/src/ops/lora_linear_params.cc
@@ -50,38 +50,6 @@ std::ostream &operator<<(std::ostream &os, LoraAdamOptimizerConfig const &llc) {
   return os;
 }
 
-// Serialization helpers
-template <typename T>
-void serialize_to_json_file(T const &obj, fs::path const &filepath) {
-  json j = obj;
-  std::ofstream file(filepath);
-  file << j.dump(4);
-}
-
-template <typename T>
-std::unique_ptr<T> deserialize_from_json_file(fs::path const &filepath) {
-  std::ifstream file(filepath);
-  json j;
-  file >> j;
-  return std::make_unique<T>(j.get<T>());
-}
-
-template void
-    serialize_to_json_file<LoraLinearConfig>(LoraLinearConfig const &obj,
-                                             fs::path const &filepath);
-template void serialize_to_json_file<LoraSGDOptimizerConfig>(
-    LoraSGDOptimizerConfig const &obj, fs::path const &filepath);
-template void serialize_to_json_file<LoraAdamOptimizerConfig>(
-    LoraAdamOptimizerConfig const &obj, fs::path const &filepath);
-template std::unique_ptr<LoraLinearConfig>
-    deserialize_from_json_file<LoraLinearConfig>(fs::path const &filepath);
-template std::unique_ptr<LoraSGDOptimizerConfig>
-    deserialize_from_json_file<LoraSGDOptimizerConfig>(
-        fs::path const &filepath);
-template std::unique_ptr<LoraAdamOptimizerConfig>
-    deserialize_from_json_file<LoraAdamOptimizerConfig>(
-        fs::path const &filepath);
-
 // ------------------ LoRA configs -------------------
 // ---------------------------------------------------
 const LoraLinearConfig LoraLinearConfig::EmptyConfig = LoraLinearConfig("", "");