Skip to content

Commit

Permalink
add configurations and minor updates
Browse files Browse the repository at this point in the history
  • Loading branch information
fabianlim committed Aug 19, 2024
1 parent 0f48421 commit 736d3c1
Show file tree
Hide file tree
Showing 8 changed files with 36 additions and 23 deletions.
2 changes: 1 addition & 1 deletion plugins/accelerated-moe/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,5 +9,5 @@ This library contains plugins to accelerate finetuning with the following optimi
Currently databricks megablocks does not have a PyPi repository and does not have a proper release, so we have to install from the github repository as below. Please note that installing from github will require CUDA Toolkit to build.

```
pip install git+https://github.com/databricks/megablocks.git
pip install git+https://github.com/databricks/megablocks.git@bce5d7b2aaf5038bc93b36f76c2baf51c2939bd2
```
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ def requires_custom_loading(self):
def model_loader(self, model_name: str, **kwargs):
# guarded
from .megablocks_utils.config_utils import update_mlp_registry
from megablocks_utils.shard_moe_utils import shard_moe, get_moe_kwargs
from .megablocks_utils.shard_moe_utils import shard_moe, get_moe_kwargs

# this one does a forward patching on MLP, but needs to be fixed
# properly as the load balancing loss is currently not properly
Expand Down Expand Up @@ -88,6 +88,8 @@ def model_loader(self, model_name: str, **kwargs):
),
)

return model

def get_callbacks_and_ready_for_train(
self, model: torch.nn.Module = None, accelerator=None
):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,8 @@ class SparseMLPv2(torch.nn.Module):
def __init__(self, args : Arguments):
super().__init__()
self.args = args
self._num_rows_per_rank = (
(mpu.experts_per_rank(args) * mpu.features_per_rank(args)) //
mpu.get_weight_parallel_world_size(args)
)
self._num_rows_per_rank = mpu.experts_per_rank(args) * mpu.features_per_rank(args)


self.w1 = torch.nn.Parameter(torch.empty(
self._num_rows_per_rank,
Expand Down Expand Up @@ -46,8 +44,7 @@ def __init__(self, args : Arguments):
args, args.moe_num_experts, args.ffn_hidden_size,
args.hidden_size, args.output_layer_init_method))

self._should_set_parallelism_attribute = (
args.moe_expert_model_parallelism or args.moe_weight_parallelism)
self._should_set_parallelism_attribute = args.moe_expert_model_parallelism
mpu.set_expert_model_parallel_attributes(
self.w1, self._should_set_parallelism_attribute)
mpu.set_expert_model_parallel_attributes(
Expand Down
2 changes: 1 addition & 1 deletion plugins/framework/src/fms_acceleration/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,4 +21,4 @@
# and activated.
# - hence the plugins that have model loaders should be on top of this list

PLUGINS = ["peft", "foak", "aadp"]
PLUGINS = ["peft", "foak", "aadp", "moe"]
7 changes: 6 additions & 1 deletion sample-configurations/CONTENTS.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -36,4 +36,9 @@ framework_configs:
- shortname: aadp-padding-free
plugins:
- attention-and-distributed-packing
filename: aadp-padding-free-sample-configuration.yaml
filename: aadp-padding-free-sample-configuration.yaml

- shortname: moe-megablocks
plugins:
- accelerated-moe
filename: moe-megablocks-sample-configuration.yaml
14 changes: 14 additions & 0 deletions sample-configurations/moe-megablocks-sample-configuration.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
# FMS Acceleration Plugin Configuration.
#
# Each stanza incorporates various configurations for
# different fine-tuning / training tasks.
plugins:
training:

# mixture-of-experts configurations
moe:

# expert-parallel for MoE
megablocks:

dummy: 1
18 changes: 5 additions & 13 deletions scripts/benchmarks/scenarios.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -67,20 +67,12 @@ scenarios:
- 'mistralai/Mixtral-8x7B-Instruct-v0.1'
- 'NousResearch/Llama-2-70b-hf'

- name: accelerated-peft-gptq
- name: accelerated-moe-megablocks
framework_config:
- accelerated-peft-autogptq
- accelerated-peft-autogptq-foak
- moe-megablocks
arguments:
learning_rate: 2e-4
fp16: True
torch_dtype: float16
peft_method: lora
r: 16
lora_alpha: 16
lora_dropout: 0.1
target_modules: ["q_proj", "k_proj", "v_proj", "o_proj"]
bf16: True
torch_dtype: bfloat16
model_name_or_path:
- 'TheBloke/Mistral-7B-v0.1-GPTQ'
- 'TheBloke/Mixtral-8x7B-Instruct-v0.1-GPTQ'
- 'TheBloke/Llama-2-70B-GPTQ'
- 'mistralai/Mixtral-8x7B-Instruct-v0.1'
3 changes: 3 additions & 0 deletions scripts/generate_sample_configurations.py
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,7 @@ def read_configuration(path: str) -> Dict:
KEY_AUTO_GPTQ_FOAK = "auto-gptq-foak"
KEY_BNB_NF4_FOAK = "bnb-nf4-foak"
KEY_AADP_PADDING_FREE = "aadp-padding-free"
KEY_MEGABLOCKS = "moe-megablocks"

CONFIGURATIONS = {
KEY_AUTO_GPTQ: "plugins/accelerated-peft/configs/autogptq.yaml",
Expand All @@ -168,6 +169,7 @@ def read_configuration(path: str) -> Dict:
[("peft.quantization.fused_ops_and_kernels.base_layer", "bitsandbytes")],
),
KEY_AADP_PADDING_FREE: "plugins/attention-and-distributed-packing/configs/aadp.yaml",
KEY_MEGABLOCKS: "plugins/accelerated-moe/configs/megablocks.yaml",
}

# list of (tag, combi) tuples
Expand All @@ -182,6 +184,7 @@ def read_configuration(path: str) -> Dict:
("accelerated-peft-autogptq-foak", (KEY_AUTO_GPTQ, KEY_AUTO_GPTQ_FOAK)),
("accelerated-peft-bnb-nf4-foak", (KEY_BNB_NF4, KEY_BNB_NF4_FOAK)),
("aadp-padding-free", (KEY_AADP_PADDING_FREE,)),
("moe-megablocks", (KEY_MEGABLOCKS,)),
]


Expand Down

0 comments on commit 736d3c1

Please sign in to comment.