diff --git a/.github/workflows/build-and-publish.yml b/.github/workflows/build-and-publish.yml index 8ebb698f..307ade0e 100644 --- a/.github/workflows/build-and-publish.yml +++ b/.github/workflows/build-and-publish.yml @@ -14,6 +14,7 @@ jobs: - "framework" - "accelerated-peft" - "fused-ops-and-kernels" + - "attention-and-distributed-packing" permissions: id-token: write # IMPORTANT: this permission is mandatory for trusted publishing diff --git a/.github/workflows/format.yml b/.github/workflows/format.yml index d6d6b089..90f7210a 100644 --- a/.github/workflows/format.yml +++ b/.github/workflows/format.yml @@ -29,7 +29,7 @@ jobs: - "framework" - "accelerated-peft" - "fused-ops-and-kernels" - - "instruct-lab" + - "attention-and-distributed-packing" steps: - uses: actions/checkout@v4 diff --git a/README.md b/README.md index f79026f4..1158550c 100644 --- a/README.md +++ b/README.md @@ -33,7 +33,7 @@ Plugin | Description | Depends | License | Status [framework](./plugins/framework/README.md) | This acceleration framework for integration with huggingface trainers | | | Alpha [accelerated-peft](./plugins/accelerated-peft/README.md) | For PEFT-training, e.g., 4bit QLoRA. | Huggingface
AutoGPTQ | Apache 2.0
MIT | Alpha [fused-op-and-kernels](./plugins/fused-ops-and-kernels/README.md) | Fused LoRA and triton kernels (e.g., fast cross-entropy, rms, rope) | -- | Apache 2.0 [(contains extracted code)](./plugins/fused-ops-and-kernels/README.md#code-extracted-from-unsloth)| Beta -[instruct-lab](./plugins/instruct-lab/README.md) | Padding-Free Flash Attention Computation | flash-attn | Apache 2.0 | Beta +[attention-and-distributed-packing](./plugins/attention-and-distributed-packing/README.md) | Padding-Free Flash Attention Computation | flash-attn | Apache 2.0 | Beta MOE-training-acceleration | [MegaBlocks](https://github.com/databricks/megablocks) inspired triton Kernels and acclerations for Mixture-of-Expert models | | Apache 2.0 | Coming Soon ## Usage with FMS HF Tuning diff --git a/plugins/instruct-lab/.isort.cfg b/plugins/attention-and-distributed-packing/.isort.cfg similarity index 80% rename from plugins/instruct-lab/.isort.cfg rename to plugins/attention-and-distributed-packing/.isort.cfg index 4aa62fac..98382601 100644 --- a/plugins/instruct-lab/.isort.cfg +++ b/plugins/attention-and-distributed-packing/.isort.cfg @@ -8,6 +8,3 @@ import_heading_firstparty=First Party import_heading_localfolder=Local known_firstparty= known_localfolder=tuning - -# skip code imported from unsloth -skip_glob=**/unsloth*/** diff --git a/plugins/instruct-lab/.pylintrc b/plugins/attention-and-distributed-packing/.pylintrc similarity index 100% rename from plugins/instruct-lab/.pylintrc rename to plugins/attention-and-distributed-packing/.pylintrc diff --git a/plugins/instruct-lab/README.md b/plugins/attention-and-distributed-packing/README.md similarity index 96% rename from plugins/instruct-lab/README.md rename to plugins/attention-and-distributed-packing/README.md index d76f327e..cad6ec63 100644 --- a/plugins/instruct-lab/README.md +++ b/plugins/attention-and-distributed-packing/README.md @@ -1,4 +1,4 @@ -# FMS Acceleration for Instruct Lab +# FMS Acceleration for Attention And Distributed Packing Plugin This library contains plugins to accelerate finetuning with the following optimizations: diff --git a/plugins/instruct-lab/configs/instruct_lab.yaml b/plugins/attention-and-distributed-packing/configs/aadp.yaml similarity index 100% rename from plugins/instruct-lab/configs/instruct_lab.yaml rename to plugins/attention-and-distributed-packing/configs/aadp.yaml diff --git a/plugins/instruct-lab/pyproject.toml b/plugins/attention-and-distributed-packing/pyproject.toml similarity index 81% rename from plugins/instruct-lab/pyproject.toml rename to plugins/attention-and-distributed-packing/pyproject.toml index e6e4adb1..00f1a155 100644 --- a/plugins/instruct-lab/pyproject.toml +++ b/plugins/attention-and-distributed-packing/pyproject.toml @@ -3,9 +3,9 @@ requires = ["hatchling"] build-backend = "hatchling.build" [project] -name = "fms-acceleration-ilab" +name = "fms-acceleration-aadp" version = '0.0.1' -description = "FMS Acceleration Plugin for Functionalities Used in Instruct Lab Training" +description = "FMS Acceleration Plugin for Attention and Distributed Packing Optimizations" authors = [ {name = "Fabian Lim", email = "flim@sg.ibm.com"}, {name = "Aaron Chew", email = "aaron.chew1@ibm.com"}, @@ -24,7 +24,7 @@ classifiers=[ ] [tool.hatch.build.targets.wheel] -only-include = ["src/fms_acceleration_ilab"] +only-include = ["src/fms_acceleration_aadp"] [tool.hatch.build.targets.wheel.sources] "src" = "" diff --git a/plugins/instruct-lab/src/fms_acceleration_ilab/__init__.py b/plugins/attention-and-distributed-packing/src/fms_acceleration_aadp/__init__.py similarity index 100% rename from plugins/instruct-lab/src/fms_acceleration_ilab/__init__.py rename to plugins/attention-and-distributed-packing/src/fms_acceleration_aadp/__init__.py diff --git a/plugins/instruct-lab/src/fms_acceleration_ilab/ilab_utils.py b/plugins/attention-and-distributed-packing/src/fms_acceleration_aadp/aadp_utils.py similarity index 100% rename from plugins/instruct-lab/src/fms_acceleration_ilab/ilab_utils.py rename to plugins/attention-and-distributed-packing/src/fms_acceleration_aadp/aadp_utils.py diff --git a/plugins/instruct-lab/src/fms_acceleration_ilab/flash_attn.py b/plugins/attention-and-distributed-packing/src/fms_acceleration_aadp/flash_attn.py similarity index 100% rename from plugins/instruct-lab/src/fms_acceleration_ilab/flash_attn.py rename to plugins/attention-and-distributed-packing/src/fms_acceleration_aadp/flash_attn.py diff --git a/plugins/instruct-lab/src/fms_acceleration_ilab/framework_plugin_padding_free.py b/plugins/attention-and-distributed-packing/src/fms_acceleration_aadp/framework_plugin_padding_free.py similarity index 99% rename from plugins/instruct-lab/src/fms_acceleration_ilab/framework_plugin_padding_free.py rename to plugins/attention-and-distributed-packing/src/fms_acceleration_aadp/framework_plugin_padding_free.py index 33a592ee..4680313e 100644 --- a/plugins/instruct-lab/src/fms_acceleration_ilab/framework_plugin_padding_free.py +++ b/plugins/attention-and-distributed-packing/src/fms_acceleration_aadp/framework_plugin_padding_free.py @@ -185,7 +185,7 @@ def _patch_dataloader( except ImportError: # Otherwise, use the locally implemented DataCollatorWithFlattening # pylint: disable=import-outside-toplevel - from .ilab_utils import ( + from .aadp_utils import ( DataCollatorWithFlattening, ) diff --git a/plugins/instruct-lab/tests/__init__.py b/plugins/attention-and-distributed-packing/tests/__init__.py similarity index 100% rename from plugins/instruct-lab/tests/__init__.py rename to plugins/attention-and-distributed-packing/tests/__init__.py diff --git a/plugins/instruct-lab/tests/test_ilab_plugin.py b/plugins/attention-and-distributed-packing/tests/test_aadp_plugin.py similarity index 83% rename from plugins/instruct-lab/tests/test_ilab_plugin.py rename to plugins/attention-and-distributed-packing/tests/test_aadp_plugin.py index c3185d83..ea38158b 100644 --- a/plugins/instruct-lab/tests/test_ilab_plugin.py +++ b/plugins/attention-and-distributed-packing/tests/test_aadp_plugin.py @@ -17,13 +17,13 @@ instantiate_framework, read_configuration, ) -from fms_acceleration_ilab import PaddingFreeAccelerationPlugin +from fms_acceleration_aadp import PaddingFreeAccelerationPlugin # configuration DIRNAME = os.path.dirname(__file__) -CONFIG_PATH_ILAB = os.path.join(DIRNAME, "../configs/instruct_lab.yaml") +CONFIG_PATH_ILAB = os.path.join(DIRNAME, "../configs/aadp.yaml") -def test_framework_installs_ilab_padding_free_plugin(): +def test_framework_installs_aadp_padding_free_plugin(): with instantiate_framework( read_configuration(CONFIG_PATH_ILAB), require_packages_check=False ) as framework: diff --git a/plugins/instruct-lab/tox.ini b/plugins/attention-and-distributed-packing/tox.ini similarity index 100% rename from plugins/instruct-lab/tox.ini rename to plugins/attention-and-distributed-packing/tox.ini diff --git a/plugins/framework/src/fms_acceleration/constants.py b/plugins/framework/src/fms_acceleration/constants.py index 3cdef252..6a81d977 100644 --- a/plugins/framework/src/fms_acceleration/constants.py +++ b/plugins/framework/src/fms_acceleration/constants.py @@ -21,4 +21,4 @@ # and activated. # - hence the plugins that have model loaders should be on top of this list -PLUGINS = ["peft", "foak", "ilab"] +PLUGINS = ["peft", "foak", "aadp"] diff --git a/sample-configurations/CONTENTS.yaml b/sample-configurations/CONTENTS.yaml index f5dc6819..e2eccbc1 100644 --- a/sample-configurations/CONTENTS.yaml +++ b/sample-configurations/CONTENTS.yaml @@ -33,7 +33,7 @@ framework_configs: - fused-ops-and-kernels filename: accelerated-peft-bnb-nf4-foak-sample-configuration.yaml - - shortname: ilab-padding-free + - shortname: aadp-padding-free plugins: - - instruct-lab - filename: ilab-padding-free-sample-configuration.yaml \ No newline at end of file + - attention-and-distributed-packing + filename: aadp-padding-free-sample-configuration.yaml \ No newline at end of file diff --git a/sample-configurations/ilab-padding-free-sample-configuration.yaml b/sample-configurations/aadp-padding-free-sample-configuration.yaml similarity index 100% rename from sample-configurations/ilab-padding-free-sample-configuration.yaml rename to sample-configurations/aadp-padding-free-sample-configuration.yaml diff --git a/scripts/generate_sample_configurations.py b/scripts/generate_sample_configurations.py index c147df6a..3dd80b92 100644 --- a/scripts/generate_sample_configurations.py +++ b/scripts/generate_sample_configurations.py @@ -144,7 +144,7 @@ def read_configuration(path: str) -> Dict: KEY_BNB_NF4_BASELINE = "baseline-bnb-nf4" KEY_AUTO_GPTQ_FOAK = "auto-gptq-foak" KEY_BNB_NF4_FOAK = "bnb-nf4-foak" -KEY_ILAB_PADDING_FREE = "ilab-padding-free" +KEY_AADP_PADDING_FREE = "aadp-padding-free" CONFIGURATIONS = { KEY_AUTO_GPTQ: "plugins/accelerated-peft/configs/autogptq.yaml", @@ -167,7 +167,7 @@ def read_configuration(path: str) -> Dict: "plugins/fused-ops-and-kernels/configs/fast_quantized_peft.yaml", [("peft.quantization.fused_ops_and_kernels.base_layer", "bitsandbytes")], ), - KEY_ILAB_PADDING_FREE: "plugins/instruct-lab/configs/instruct_lab.yaml", + KEY_AADP_PADDING_FREE: "plugins/attention-and-distributed-packing/configs/aadp.yaml", } # list of (tag, combi) tuples @@ -181,7 +181,7 @@ def read_configuration(path: str) -> Dict: ("baseline-peft-bnb-nf4", (KEY_BNB_NF4_BASELINE,)), ("accelerated-peft-autogptq-foak", (KEY_AUTO_GPTQ, KEY_AUTO_GPTQ_FOAK)), ("accelerated-peft-bnb-nf4-foak", (KEY_BNB_NF4, KEY_BNB_NF4_FOAK)), - ("ilab-padding-free", (KEY_ILAB_PADDING_FREE,)), + ("aadp-padding-free", (KEY_AADP_PADDING_FREE,)), ]