foundation-model-stack · fabianlim · Aug 14, 2024 · Aug 6, 2024
@@ -14,6 +14,7 @@ jobs:
           - "framework"
           - "accelerated-peft"
           - "fused-ops-and-kernels"
+          - "attention-and-distributed-packing"
 
     permissions:
       id-token: write  # IMPORTANT: this permission is mandatory for trusted publishing

@@ -29,7 +29,7 @@ jobs:
           - "framework"
           - "accelerated-peft"
           - "fused-ops-and-kernels"
-          - "instruct-lab"
+          - "attention-and-distributed-packing"
 
     steps:
       - uses: actions/checkout@v4

@@ -33,7 +33,7 @@ Plugin | Description | Depends | License | Status
 [framework](./plugins/framework/README.md) | This acceleration framework for integration with huggingface trainers | | | Alpha
 [accelerated-peft](./plugins/accelerated-peft/README.md) | For PEFT-training, e.g., 4bit QLoRA. | Huggingface<br>AutoGPTQ | Apache 2.0<br>MIT | Alpha
 [fused-op-and-kernels](./plugins/fused-ops-and-kernels/README.md)  | Fused LoRA and triton kernels (e.g., fast cross-entropy, rms, rope) | -- | Apache 2.0 [(contains extracted code)](./plugins/fused-ops-and-kernels/README.md#code-extracted-from-unsloth)| Beta
-[instruct-lab](./plugins/instruct-lab/README.md)  | Padding-Free Flash Attention Computation | flash-attn | Apache 2.0 | Beta
+[attention-and-distributed-packing](./plugins/attention-and-distributed-packing/README.md)  | Padding-Free Flash Attention Computation | flash-attn | Apache 2.0 | Beta
  MOE-training-acceleration  | [MegaBlocks](https://github.com/databricks/megablocks) inspired triton Kernels and acclerations for Mixture-of-Expert models |  | Apache 2.0 | Coming Soon
 
 ## Usage with FMS HF Tuning

@@ -8,6 +8,3 @@ import_heading_firstparty=First Party
 import_heading_localfolder=Local
 known_firstparty=
 known_localfolder=tuning
-
-# skip code imported from unsloth
-skip_glob=**/unsloth*/**
@@ -1,4 +1,4 @@
-# FMS Acceleration for Instruct Lab
+# FMS Acceleration for Attention And Distributed Packing Plugin
 
 This library contains plugins to accelerate finetuning with the following optimizations:
 

@@ -3,9 +3,9 @@ requires = ["hatchling"]
 build-backend = "hatchling.build"
 
 [project]
-name = "fms-acceleration-ilab"
+name = "fms-acceleration-aadp"
 version = '0.0.1'
-description = "FMS Acceleration Plugin for Functionalities Used in Instruct Lab Training"
+description = "FMS Acceleration Plugin for Attention and Distributed Packing Optimizations"
 authors = [
   {name = "Fabian Lim", email = "flim@sg.ibm.com"},
   {name = "Aaron Chew", email = "aaron.chew1@ibm.com"},
@@ -24,7 +24,7 @@ classifiers=[
 ]
 
 [tool.hatch.build.targets.wheel]
-only-include = ["src/fms_acceleration_ilab"]
+only-include = ["src/fms_acceleration_aadp"]
 
 [tool.hatch.build.targets.wheel.sources]
 "src" = ""
@@ -185,7 +185,7 @@ def _patch_dataloader(
         except ImportError:
             # Otherwise, use the locally implemented DataCollatorWithFlattening
             # pylint: disable=import-outside-toplevel
-            from .ilab_utils import (
+            from .aadp_utils import (
                 DataCollatorWithFlattening,
             )
 

@@ -17,13 +17,13 @@
     instantiate_framework,
     read_configuration,
 )
-from fms_acceleration_ilab import PaddingFreeAccelerationPlugin
+from fms_acceleration_aadp import PaddingFreeAccelerationPlugin
 
 # configuration
 DIRNAME = os.path.dirname(__file__)
-CONFIG_PATH_ILAB = os.path.join(DIRNAME, "../configs/instruct_lab.yaml")
+CONFIG_PATH_ILAB = os.path.join(DIRNAME, "../configs/aadp.yaml")
 
-def test_framework_installs_ilab_padding_free_plugin():
+def test_framework_installs_aadp_padding_free_plugin():
     with instantiate_framework(
         read_configuration(CONFIG_PATH_ILAB), require_packages_check=False
     ) as framework:

@@ -21,4 +21,4 @@
 # and activated.
 # - hence the plugins that have model loaders should be on top of this list
 
-PLUGINS = ["peft", "foak", "ilab"]
+PLUGINS = ["peft", "foak", "aadp"]
@@ -33,7 +33,7 @@ framework_configs:
         - fused-ops-and-kernels
       filename: accelerated-peft-bnb-nf4-foak-sample-configuration.yaml
 
-    - shortname: ilab-padding-free
+    - shortname: aadp-padding-free
       plugins:
-        - instruct-lab
-      filename: ilab-padding-free-sample-configuration.yaml
+        - attention-and-distributed-packing
+      filename: aadp-padding-free-sample-configuration.yaml
@@ -144,7 +144,7 @@ def read_configuration(path: str) -> Dict:
 KEY_BNB_NF4_BASELINE = "baseline-bnb-nf4"
 KEY_AUTO_GPTQ_FOAK = "auto-gptq-foak"
 KEY_BNB_NF4_FOAK = "bnb-nf4-foak"
-KEY_ILAB_PADDING_FREE = "ilab-padding-free"
+KEY_AADP_PADDING_FREE = "aadp-padding-free"
 
 CONFIGURATIONS = {
     KEY_AUTO_GPTQ: "plugins/accelerated-peft/configs/autogptq.yaml",
@@ -167,7 +167,7 @@ def read_configuration(path: str) -> Dict:
         "plugins/fused-ops-and-kernels/configs/fast_quantized_peft.yaml",
         [("peft.quantization.fused_ops_and_kernels.base_layer", "bitsandbytes")],
     ),
-    KEY_ILAB_PADDING_FREE: "plugins/instruct-lab/configs/instruct_lab.yaml",
+    KEY_AADP_PADDING_FREE: "plugins/attention-and-distributed-packing/configs/aadp.yaml",
 }
 
 # list of (tag, combi) tuples
@@ -181,7 +181,7 @@ def read_configuration(path: str) -> Dict:
     ("baseline-peft-bnb-nf4", (KEY_BNB_NF4_BASELINE,)),
     ("accelerated-peft-autogptq-foak", (KEY_AUTO_GPTQ, KEY_AUTO_GPTQ_FOAK)),
     ("accelerated-peft-bnb-nf4-foak", (KEY_BNB_NF4, KEY_BNB_NF4_FOAK)),
-    ("ilab-padding-free", (KEY_ILAB_PADDING_FREE,)),
+    ("aadp-padding-free", (KEY_AADP_PADDING_FREE,)),
 ]