nebuly-ai · PierpaoloSorbellini · Mar 27, 2023 · Mar 27, 2023 · Mar 27, 2023 · Mar 27, 2023
diff --git a/apps/accelerate/chatllama/README.md b/apps/accelerate/chatllama/README.md
@@ -408,6 +408,24 @@ We support 3 different options to prepare the `reward_training_data`:
 - **(⚠️WIP)** Few examples provided by the user and dataset synthetically expanded using LLM
 </details>
 
+## Single-Node Multi-GPU Training
+Currently chatllama supports [Accelerate](https://github.com/huggingface/accelerate) and [DeepSpeed](https://github.com/microsoft/DeepSpeed) for multi-GPU training. 
+To run a distributed training you need to enable one of them in your `/artifacts/config/config.yaml` file by setting either
+`deepspeed_enable` or `accelerate_enable` to `True` or `False`. <br />
+Each type of training (i.e. reward model training, actor supervised fine-tuning, RLHF) has its own flags to tweak.
+Deepspeed settings can be customised using the `/artifacts/config/ds_config.yaml` file, while accelerate can be configured by running
+```bash
+accelerate config
+```
+from the command line. 
+Once the project is configured, the trainin must be started using:
+```bash
+deepspeed artifacts/main.py artifacts/config/config.yaml --type <type_of_training>
+```
+or
+```bash
+accelerate launch artifacts/main.py artifacts/config/config.yaml --type <type_of_training>
+```
 # License
 
 See the [LICENSE](https://github.com/nebuly-ai/nebullvm/blob/main/apps/accelerate/chatllama/LICENSE) file.
diff --git a/apps/accelerate/chatllama/artifacts/config/config.yaml b/apps/accelerate/chatllama/artifacts/config/config.yaml
@@ -28,14 +28,18 @@ trainer_config:
   # here specify the name of the actor_rl checkpoint from which resume 
   # during actor RL training. If null load the last one.
   checkpoint_name: null
+  deepspeed_enable: False
+  deepspeed_config_path: "artifacts/config/ds_config.json"
+  accelerate_enable: False
 
 actor_config:
-  model: "facebook/opt-1.3b"
+  model: "facebook/opt-125m"
+  load_8bit: False
   model_folder: "./models"
   tokenizer_path: "path-to-tokenizer"
   train_dataset_path: "./datasets/actor_training_data.json"
   validation_dataset_path: null
-  # froze model embedding during training
+  # froze model embedding during training (only for llama)
   froze_embeddings: True
   # use fairscale layers to build the model instead of vanilla pytorch
   # only for llama
@@ -51,7 +55,7 @@ actor_config:
   additonal_prompt_tokens: 20
   # temperature for the actor
   temperature: 0.1
-  batch_size: 2
+  batch_size: 1
   # number iteration after print
   iteration_per_print: 1
   lr: 0.000009
@@ -78,34 +82,40 @@ reward_config:
   # more can be simply added in the reward.py __init__()
   model: "facebook/opt-125m"
   model_folder: "./models"
+  load_8bit: False
   # hidden size of the additional ffw head to produce the scores
   model_head_hidden_size: 2048
   max_sequence_length: 2048
   train_dataset_path: "./datasets/reward_training_data.json"
   validation_dataset_path: null
-  batch_size: 8
+  batch_size: 1
   epochs: 1
   iteration_per_print: 1
   # steps after which the checkpoint are saved
-  checkpoint_steps: 10000
+  checkpoint_steps: 200
   # here specify the name of the reward checkpoint from which resume 
   # during reward training. If null load the last one.
   checkpoint_name: null
   lr: 0.000009
   # deepspeed settings
-  deepspeed_enable: False
+  deepspeed_enable: True
   deepspeed_config_path: "./artifacts/config/ds_config.json"
   # accelerate settings
   accelerate_enable: False
+  peft_enable: False
+  peft_config_path: "./artifacts/config/peft_config.yaml"
 
 critic_config:
   # model to be chosen are gp2-large, bart-base, longformer-base-4096
   # more can be simply added in the reward.py __init__()
   model: "facebook/opt-125m"
+  load_8bit: False
   # hidden size of the additional ffw head to produce the scores
   model_head_hidden_size: 2048
   max_sequence_length: 2048
   model_folder: "./models"
   # here specify the name of the critic checkpoint from which resume 
   # during critic training. If null load the last one.
   checkpoint_name: null
+  peft_enable: True
+  peft_config_path: "./artifacts/config/peft_config.yaml"
diff --git a/apps/accelerate/chatllama/artifacts/config/ds_config.json b/apps/accelerate/chatllama/artifacts/config/ds_config.json
@@ -48,5 +48,11 @@
     "stage3_gather_16bit_weights_on_model_save": true,
     "ignore_unused_parameters": true,
     "round_robin_gradients": true
+    },
+    "comms_logger": {
+      "enabled": false,
+      "verbose": false,
+      "prof_all": false,
+      "debug": false
     }
   }
diff --git a/...ama/artifacts/datasets/actor_dataset.json → ...tifacts/datasets/actor_training_data.json b/...ama/artifacts/datasets/actor_dataset.json → ...tifacts/datasets/actor_training_data.json
diff --git a/...ma/artifacts/datasets/reward_dataset.json → ...ifacts/datasets/reward_training_data.json b/...ma/artifacts/datasets/reward_dataset.json → ...ifacts/datasets/reward_training_data.json
diff --git a/...lama/artifacts/datasets/rlhf_dataset.json → ...rtifacts/datasets/rlhf_training_data.json b/...lama/artifacts/datasets/rlhf_dataset.json → ...rtifacts/datasets/rlhf_training_data.json
diff --git a/apps/accelerate/chatllama/artifacts/download_dataset.py b/apps/accelerate/chatllama/artifacts/download_dataset.py
@@ -1,7 +1,11 @@
 import argparse
 import os
 
-from chatllama.rlhf.dataset import AnthropicRLHF, StanfordNLPSHPDataset
+from chatllama.rlhf.dataset import (
+    AnthropicRLHF,
+    SelfInstruct,
+    StanfordNLPSHP,
+)
 
 
 if __name__ == "__main__":
@@ -15,7 +19,7 @@
     parser.add_argument(
         "dataset_name",
         help="dataset name it can be. SSHP: stanfordnlp/SHP or ",
-        choices=["SHP", "ARLHF"],
+        choices=["SHP", "ARLHF", "SI"],
     )
     parser.add_argument(
         "-p",
@@ -40,7 +44,7 @@
         raise ValueError("Number of samples should be an integer")
 
     if args.dataset_name == "SHP":
-        dataset = StanfordNLPSHPDataset()
+        dataset = StanfordNLPSHP()
         dataset.save_dataset(args.path, n_samples)
 
     elif args.dataset_name == "ARLHF":
@@ -49,3 +53,10 @@
             args.path,
             n_samples,
         )
+    elif args.dataset_name == "SI":
+        dataset = SelfInstruct()
+        dataset.save_dataset(
+            args.path,
+            n_samples,
+        )
+
diff --git a/apps/accelerate/chatllama/artifacts/main.py b/apps/accelerate/chatllama/artifacts/main.py
@@ -2,7 +2,6 @@
 
 from chatllama.rlhf.actor import ActorTrainer
 from chatllama.rlhf.config import Config
-from chatllama.rlhf.dataset import BaseDataset
 from chatllama.rlhf.reward import RewardTrainer
 from chatllama.rlhf.trainer import RLTrainer
 
@@ -31,7 +30,16 @@
 parser.add_argument(
     "-r", "--reward", help="Specify reward model by name", default=None
 )
-parser.add_argument("--local_rank", help="Local rank parameter for deepspeed", default=None)
+
+parser.add_argument(
+    "--local_rank",
+    type=int,
+    default=-1,
+    help="local rank passed from distributed launcher",
+)
+
+# Include DeepSpeed configuration arguments
+# parser = deepspeed.add_config_arguments(parser)
 
 # parse arguments
 args = parser.parse_args()
@@ -53,15 +61,12 @@
         config.critic.max_sequence_length,
     )
     config.actor.max_sequence_length = max_seq
-    BaseDataset.clean_dataset(config)
     rlhf_trainer = RLTrainer(config)
     rlhf_trainer.train()
 elif args.type == "ACTOR":
-    BaseDataset.clean_dataset(config.actor)
     actor_trainer = ActorTrainer(config.actor)
     actor_trainer.train()
 elif args.type == "REWARD":
-    BaseDataset.clean_dataset(config.reward)
     reward_trainer = RewardTrainer(config.reward)
     reward_trainer.train()
 elif args.type == "ALL":