Skip to content

Commit

Permalink
lora configs serialize / deserialize into single file
Browse files Browse the repository at this point in the history
  • Loading branch information
goliaro committed Oct 2, 2024
1 parent 8d4641c commit f3de642
Show file tree
Hide file tree
Showing 5 changed files with 114 additions and 63 deletions.
6 changes: 6 additions & 0 deletions include/flexflow/batch_config.h
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,11 @@ struct OptimizerTasks {
bool save_updated_weights = false;
};

struct NewPeftModelPath {
PEFTModelID peft_model_id;
std::string filepath;
};

void set_optimizer_tasks(OptimizerTasks &tasks,
int max_training_steps,
int completed_training_steps,
Expand Down Expand Up @@ -135,6 +140,7 @@ class BatchConfig {
PerRequestInfo requestsInfo[MAX_NUM_REQUESTS];
PerTokenInfo tokensInfo[MAX_NUM_TOKENS];
PerTokenInfo labelsInfo[MAX_NUM_TOKENS];
NewPeftModelPath new_peft_model_paths[MAX_NUM_REQUESTS];

bool request_completed[MAX_NUM_REQUESTS];
bool request_running[MAX_NUM_REQUESTS];
Expand Down
122 changes: 96 additions & 26 deletions include/flexflow/ops/lora_linear_params.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,10 @@ namespace FlexFlow {
class LoraOptimizerConfig {
public:
LoraOptimizerConfig();
virtual ~LoraOptimizerConfig() {}
virtual std::string getType() const = 0;
virtual nlohmann::json toJson() const = 0;
static std::unique_ptr<LoraOptimizerConfig> fromJson(const nlohmann::json& j);
virtual ~LoraOptimizerConfig() = default;
};

class LoraSGDOptimizerConfig : public LoraOptimizerConfig {
Expand All @@ -29,9 +32,25 @@ class LoraSGDOptimizerConfig : public LoraOptimizerConfig {
bool weight_decay_ = 0.0f);
friend std::ostream &operator<<(std::ostream &os,
LoraSGDOptimizerConfig const &llc);

NLOHMANN_DEFINE_TYPE_INTRUSIVE(
LoraSGDOptimizerConfig, lr, momentum, nesterov, weight_decay)

std::string getType() const override { return "SGD"; }

nlohmann::json toJson() const override {
return {{"type", "SGD"},
{"lr", lr},
{"momentum", momentum},
{"nesterov", nesterov},
{"weight_decay", weight_decay}};
}

static std::unique_ptr<LoraSGDOptimizerConfig> fromJson(const nlohmann::json& j) {
auto sgd = std::make_unique<LoraSGDOptimizerConfig>();
sgd->lr = j["lr"];
sgd->momentum = j["momentum"];
sgd->nesterov = j["nesterov"];
sgd->weight_decay = j["weight_decay"];
return sgd;
}

public:
double lr = 0.001f;
Expand All @@ -50,9 +69,27 @@ class LoraAdamOptimizerConfig : public LoraOptimizerConfig {
double epsilon_ = 1e-8);
friend std::ostream &operator<<(std::ostream &os,
LoraAdamOptimizerConfig const &llc);

NLOHMANN_DEFINE_TYPE_INTRUSIVE(
LoraAdamOptimizerConfig, alpha, beta1, beta2, weight_decay, epsilon)

std::string getType() const override { return "Adam"; }

nlohmann::json toJson() const override {
return {{"type", "Adam"},
{"alpha", alpha},
{"beta1", beta1},
{"beta2", beta2},
{"weight_decay", weight_decay},
{"epsilon", epsilon}};
}

static std::unique_ptr<LoraAdamOptimizerConfig> fromJson(const nlohmann::json& j) {
auto adam = std::make_unique<LoraAdamOptimizerConfig>();
adam->alpha = j["alpha"];
adam->beta1 = j["beta1"];
adam->beta2 = j["beta2"];
adam->weight_decay = j["weight_decay"];
adam->epsilon = j["epsilon"];
return adam;
}

public:
// Adam
Expand All @@ -63,13 +100,13 @@ class LoraAdamOptimizerConfig : public LoraOptimizerConfig {
double epsilon = 1e-8;
};

// Serialization helpers
template <typename T>
void serialize_to_json_file(T const &obj, fs::path const &filepath);
std::unique_ptr<LoraOptimizerConfig> LoraOptimizerConfig::fromJson(const nlohmann::json& j) {
std::string type = j["type"];
if (type == "SGD") return LoraSGDOptimizerConfig::fromJson(j);
if (type == "Adam") return LoraAdamOptimizerConfig::fromJson(j);
throw std::runtime_error("Unknown optimizer type");
}

// Function to deserialize JSON from file and create object
template <typename T>
std::unique_ptr<T> deserialize_from_json_file(fs::path const &filepath);

class LoraLinearConfig {
public:
Expand All @@ -87,22 +124,54 @@ class LoraLinearConfig {
std::vector<std::string> const &target_modules_ = {});
// constructor used to support std::unordered_map
LoraLinearConfig();
template<typename T>
void setOptimizer(T&& opt) {
optimizer_config = std::make_unique<T>(std::forward<T>(opt));
}
friend bool operator==(LoraLinearConfig const &lhs,
LoraLinearConfig const &rhs);
friend std::ostream &operator<<(std::ostream &os,
LoraLinearConfig const &llc);

NLOHMANN_DEFINE_TYPE_INTRUSIVE(LoraLinearConfig,
cache_folder,
peft_model_id,
rank,
lora_alpha,
lora_dropout,
target_modules,
trainable,
init_lora_weights,
base_model_name_or_path,
precision)
void serialize_to_json_file(const std::string& filename) const {
json j = {
{"cache_folder", cache_folder},
{"peft_model_id", peft_model_id},
{"rank", rank},
{"lora_alpha", lora_alpha},
{"lora_dropout", lora_dropout},
{"target_modules", target_modules},
{"trainable", trainable},
{"init_lora_weights", init_lora_weights},
{"base_model_name_or_path", base_model_name_or_path},
{"precision", precision},
{"optimizer_config", optimizer_config ? optimizer_config->toJson() : nullptr}
};

std::ofstream file(filename);
file << j.dump(4); // Use 4 spaces for indentation
}
// Deserialization method
static LoraLinearConfig deserialize_from_json_file(const std::string& filename) {
std::ifstream file(filename);
json j;
file >> j;
LoraLinearConfig metadata(
j["cache_folder"].get<std::string>(),
j["peft_model_id"].get<std::vector<int>>(),
j["rank"].get<std::string>(),
j["lora_alpha"].get<std::string>(),
j["lora_dropout"].get<std::string>(),
j["target_modules"].get<std::vector<std::string>>(),
j["trainable"].get<bool>(),
j["init_lora_weights"].get<bool>(),
j["base_model_name_or_path"].get<std::string>(),
j["precision"].get<std::string>()
);
if (!j["optimizer_config"].is_null()) {
metadata.optimizer_config = LoraOptimizerConfig::fromJson(j["optimizer_config"]);
}
return metadata;
}

std::string cache_folder;
// Huggingface model ID (for download and/or upload)
Expand All @@ -116,7 +185,8 @@ class LoraLinearConfig {
// whether the weights are trainable (fine-tuning scenario) or not
// (inference-only). If set to true, allocate space for the gradients
bool trainable = false;
LoraOptimizerConfig *optimizer_config;
// LoraOptimizerConfig *optimizer_config;
std::unique_ptr<LoraOptimizerConfig> optimizer_config;
// whether to initialize weights randomly (instead of attempting to load them
// from file)
bool init_lora_weights;
Expand Down
7 changes: 5 additions & 2 deletions include/flexflow/utils/peft_weight_allocator.h
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ class PEFTMemoryManager {
: max_concurrent_adapters(max_concurrent_adapters_), max_lora_size(max_lora_size_), base_ptr(nullptr) {}

// allocate memory for all the PEFT adapters for a given layer on a given shard
void allocate_memory(Memory gpu_mem) {
void allocate_inference_memory(Memory gpu_mem) {
// allocate chunk of memory for all the PEFT adapters
Realm::Rect<1, coord_t> bounds(
Realm::Point<1, coord_t>(0),
Expand All @@ -111,6 +111,9 @@ class PEFTMemoryManager {
.wait();
base_ptr = peftLegionInst.pointer_untyped(0, sizeof(char));
}
void allocate_finetuning_memory(Memory gpu_mem) {

}

// Returns the slot in memory where the peft model weights are/will be stored.
// If the model is not in memory (cache miss), set the cache_miss flag to true.
Expand Down Expand Up @@ -160,7 +163,7 @@ class PEFTMemoryManager {
int max_concurrent_adapters;
size_t max_lora_size;
Realm::RegionInstance peftLegionInst;
void *base_ptr;
void *base_ptr; void *finetuning_ptr;
std::unordered_map<PEFTModelID, int> lru_hashtable;
std::vector<PEFTModelID> lru_list; // head = least recently used, tail=most recently used
std::unordered_map<PEFTModelID, int> peft2mem_slot;
Expand Down
10 changes: 7 additions & 3 deletions src/ops/lora_linear.cc
Original file line number Diff line number Diff line change
Expand Up @@ -519,12 +519,17 @@ OpMeta *LoraLinear::init_task(Task const *task,
std::string lora_layername_substr =
lora_layername.substr(0, found + searchString.length());

// allocate space for lora weights
size_t max_lora_size = data_type_size(dt) * (lora->max_rank * in_dim + lora->max_rank * out_dim);
m->peft_memory_manager = new PEFTMemoryManager(max_lora_size, lora->max_concurrent_adapters);
Memory gpu_mem = get_proc_mem(Machine::get_machine(), task->target_proc);
m->peft_memory_manager->allocate_memory(gpu_mem);
m->peft_memory_manager->allocate_inference_memory(gpu_mem);

for (auto const &kv : lora->peft_configs) {
return m;
}

void load_peft_adapters(BatchConfig const *bc){
for (auto const &kv : bc->peft_configs) {
PEFTModelID const &model_id = kv.first;
LoraLinearConfig const &lora_config = kv.second;

Expand Down Expand Up @@ -680,7 +685,6 @@ OpMeta *LoraLinear::init_task(Task const *task,
m->model_state[model_id].cache_folder = lora_config.cache_folder;
m->model_state[model_id].peft_model_id = lora_config.peft_model_id;
}
return m;
}

void LoraLinear::forward(FFModel const &ff) {
Expand Down
32 changes: 0 additions & 32 deletions src/ops/lora_linear_params.cc
Original file line number Diff line number Diff line change
Expand Up @@ -50,38 +50,6 @@ std::ostream &operator<<(std::ostream &os, LoraAdamOptimizerConfig const &llc) {
return os;
}

// Serialization helpers
template <typename T>
void serialize_to_json_file(T const &obj, fs::path const &filepath) {
json j = obj;
std::ofstream file(filepath);
file << j.dump(4);
}

template <typename T>
std::unique_ptr<T> deserialize_from_json_file(fs::path const &filepath) {
std::ifstream file(filepath);
json j;
file >> j;
return std::make_unique<T>(j.get<T>());
}

template void
serialize_to_json_file<LoraLinearConfig>(LoraLinearConfig const &obj,
fs::path const &filepath);
template void serialize_to_json_file<LoraSGDOptimizerConfig>(
LoraSGDOptimizerConfig const &obj, fs::path const &filepath);
template void serialize_to_json_file<LoraAdamOptimizerConfig>(
LoraAdamOptimizerConfig const &obj, fs::path const &filepath);
template std::unique_ptr<LoraLinearConfig>
deserialize_from_json_file<LoraLinearConfig>(fs::path const &filepath);
template std::unique_ptr<LoraSGDOptimizerConfig>
deserialize_from_json_file<LoraSGDOptimizerConfig>(
fs::path const &filepath);
template std::unique_ptr<LoraAdamOptimizerConfig>
deserialize_from_json_file<LoraAdamOptimizerConfig>(
fs::path const &filepath);

// ------------------ LoRA configs -------------------
// ---------------------------------------------------
const LoraLinearConfig LoraLinearConfig::EmptyConfig = LoraLinearConfig("", "");
Expand Down

0 comments on commit f3de642

Please sign in to comment.