diff --git a/sample-configurations/accelerated-peft-autogptq-foak-sample-configuration.yaml b/sample-configurations/accelerated-peft-autogptq-foak-sample-configuration.yaml new file mode 100644 index 00000000..1eb38df3 --- /dev/null +++ b/sample-configurations/accelerated-peft-autogptq-foak-sample-configuration.yaml @@ -0,0 +1,44 @@ +# FMS Acceleration Plugin Configuration. +# +# Each stanza incorporates various configurations for +# different fine-tuning / training tasks. +plugins: + # PEFT-related acceleration + peft: + + # quantization-releated acceleration + # e.g., kernels for quantized base weights + quantization: + + # AutoGPTQ quantized base weights. + auto_gptq: + + # Kernel to be used for GPTQ linear laeyer + # NOTE: Not all kernels are suitable for PEFT training; need to use + # kernels that support autograd forward / backward. The best + # recommendation at the moment is "triton_v2". + kernel: triton_v2 + + # If true, then will already expect quantized checkpoint + # passed into TrainingArguments.model_name_or_path + from_quantized: true + fused_ops_and_kernels: + + # load unsloth optimizations for these 4bit base layer weights. + # currently only support "auto_gptq" and "bitsandbytes" + base_layer: auto_gptq + + # activate various unsloth optimizations + # NOTE: currently supports only all-or-nothing. + + # fused kernels for lora linear layers + fused_lora: true + + # fast loss triton kernels + fast_loss: true + + # fast rms norm triton kernels + fast_rsm_layernorm: true + + # fast RoPE embedding triton kernels + fast_rope_embeddings: true diff --git a/scripts/generate_sample_configurations.py b/scripts/generate_sample_configurations.py index 67ad4058..fd51d965 100644 --- a/scripts/generate_sample_configurations.py +++ b/scripts/generate_sample_configurations.py @@ -139,9 +139,10 @@ def read_configuration(path: str) -> Dict: # # NOTE: an augmentation (path, value) will augment a config at the # specified key path, with the value. -KEY_AUTO_GPTQ = "auto_gptq" +KEY_AUTO_GPTQ = "auto-gptq" KEY_BNB_NF4 = "bnb-nf4" KEY_BNB_NF4_BASELINE = "baseline-bnb-nf4" +KEY_AUTO_GPTQ_FOAK = "auto-gptq-foak" CONFIGURATIONS = { KEY_AUTO_GPTQ: "plugins/accelerated-peft/configs/autogptq.yaml", @@ -156,6 +157,10 @@ def read_configuration(path: str) -> Dict: ("peft.quantization.bitsandbytes.no_peft_model", True), ], ), + KEY_AUTO_GPTQ_FOAK: ( + "plugins/fused-ops-and-kernels/configs/fast_quantized_peft.yaml", + [("peft.quantization.fused_ops_and_kernels.base_layer", "auto_gptq")], + ), } # list of (tag, combi) tuples @@ -167,19 +172,24 @@ def read_configuration(path: str) -> Dict: ("accelerated-peft-autogptq", (KEY_AUTO_GPTQ,)), ("accelerated-peft-bnb-nf4", (KEY_BNB_NF4,)), ("baseline-peft-bnb-nf4", (KEY_BNB_NF4_BASELINE,)), + ("accelerated-peft-autogptq-foak", (KEY_AUTO_GPTQ, KEY_AUTO_GPTQ_FOAK)), ] - # TODO: throw error if merge conflicts def merge_configs(config_contents: List[Dict]): "helper function to merge configuration contents." # merge in place def _merge(result: Dict, new_contents: Dict): - for k in new_contents: + for k, v in new_contents.items(): if k not in result: - result[k] = {} - _merge(result[k], new_contents) + # if k is not in result, it means v does not + # exist as a subtree under result, so we just do + # an assingment + result[k] = v + else: + # otherwise we call the merge + _merge(result[k], v) if len(config_contents) == 0: return {}