add sample config

Signed-off-by: Yu Chin Fabian Lim <flim@sg.ibm.com>
foundation-model-stack · May 28, 2024 · 445e14a · 445e14a
1 parent c288268
commit 445e14a
Show file tree

Hide file tree

Showing 2 changed files with 59 additions and 5 deletions.
diff --git a/sample-configurations/accelerated-peft-autogptq-foak-sample-configuration.yaml b/sample-configurations/accelerated-peft-autogptq-foak-sample-configuration.yaml
@@ -0,0 +1,44 @@
+# FMS Acceleration Plugin Configuration. 
+#
+# Each stanza incorporates various configurations for 
+# different fine-tuning / training tasks.
+plugins:
+    # PEFT-related acceleration
+  peft:
+
+      # quantization-releated acceleration
+      # e.g., kernels for quantized base weights
+    quantization:
+
+        # AutoGPTQ quantized base weights.
+      auto_gptq:
+
+          # Kernel to be used for GPTQ linear laeyer
+          # NOTE: Not all kernels are suitable for PEFT training; need to use 
+          # kernels that support autograd forward / backward. The best 
+          # recommendation at the moment is "triton_v2".
+        kernel: triton_v2
+
+          # If true, then will already expect quantized checkpoint 
+          # passed into TrainingArguments.model_name_or_path
+        from_quantized: true
+      fused_ops_and_kernels:
+
+        # load unsloth optimizations for these 4bit base layer weights.
+        # currently only support "auto_gptq" and "bitsandbytes"
+        base_layer: auto_gptq
+
+        # activate various unsloth optimizations
+        # NOTE: currently supports only all-or-nothing.
+
+        # fused kernels for lora linear layers
+        fused_lora: true
+
+        # fast loss triton kernels
+        fast_loss: true
+
+        # fast rms norm triton kernels
+        fast_rsm_layernorm: true
+
+        # fast RoPE embedding triton kernels
+        fast_rope_embeddings: true
diff --git a/scripts/generate_sample_configurations.py b/scripts/generate_sample_configurations.py
@@ -139,9 +139,10 @@ def read_configuration(path: str) -> Dict:
 #
 # NOTE: an augmentation (path, value) will augment a config at the
 # specified key path, with the value.
-KEY_AUTO_GPTQ = "auto_gptq"
+KEY_AUTO_GPTQ = "auto-gptq"
 KEY_BNB_NF4 = "bnb-nf4"
 KEY_BNB_NF4_BASELINE = "baseline-bnb-nf4"
+KEY_AUTO_GPTQ_FOAK = "auto-gptq-foak"
 
 CONFIGURATIONS = {
     KEY_AUTO_GPTQ: "plugins/accelerated-peft/configs/autogptq.yaml",
@@ -156,6 +157,10 @@ def read_configuration(path: str) -> Dict:
             ("peft.quantization.bitsandbytes.no_peft_model", True), 
         ],
     ),
+    KEY_AUTO_GPTQ_FOAK: (
+        "plugins/fused-ops-and-kernels/configs/fast_quantized_peft.yaml",
+        [("peft.quantization.fused_ops_and_kernels.base_layer", "auto_gptq")],
+    ),
 }
 
 # list of (tag, combi) tuples
@@ -167,19 +172,24 @@ def read_configuration(path: str) -> Dict:
     ("accelerated-peft-autogptq", (KEY_AUTO_GPTQ,)),
     ("accelerated-peft-bnb-nf4", (KEY_BNB_NF4,)),
     ("baseline-peft-bnb-nf4", (KEY_BNB_NF4_BASELINE,)),
+    ("accelerated-peft-autogptq-foak", (KEY_AUTO_GPTQ, KEY_AUTO_GPTQ_FOAK)),
 ]
 
-
 # TODO: throw error if merge conflicts
 def merge_configs(config_contents: List[Dict]):
     "helper function to merge configuration contents."
 
     # merge in place
     def _merge(result: Dict, new_contents: Dict):
-        for k in new_contents:
+        for k, v in new_contents.items():
             if k not in result:
-                result[k] = {}
-            _merge(result[k], new_contents)
+                # if k is not in result, it means v does not 
+                # exist as a subtree under result, so we just do
+                # an assingment
+                result[k] = v 
+            else:
+                # otherwise we call the merge
+                _merge(result[k], v)
 
     if len(config_contents) == 0:
         return {}