👧🏽 Adding DoRA support to model config (#2974)

huggingface · Feb 27, 2025 · f074dcd · f074dcd
1 parent 0caff61
commit f074dcd
Show file tree

Hide file tree

Showing 2 changed files with 18 additions and 0 deletions.
diff --git a/trl/trainer/model_config.py b/trl/trainer/model_config.py
@@ -62,6 +62,13 @@ class ModelConfig:
         use_rslora (`bool`, *optional*, defaults to `False`):
             Whether to use Rank-Stabilized LoRA, which sets the adapter scaling factor to `lora_alpha/√r`, instead of
             the original default value of `lora_alpha/r`.
+        use_dora (`bool`, *optional*, defaults to `False`):
+            Enable [Weight-Decomposed Low-Rank Adaptation (DoRA)](https://huggingface.co/papers/2402.09353). This
+            technique decomposes the updates of the weights into two parts, magnitude and direction. Direction is
+            handled by normal LoRA, whereas the magnitude is handled by a separate learnable parameter. This can
+            improve the performance of LoRA, especially at low ranks. Right now, DoRA only supports linear and Conv2D
+            layers. DoRA introduces a bigger overhead than pure LoRA, so it is recommended to merge weights for
+            inference.
         load_in_8bit (`bool`, *optional*, defaults to `False`):
             Whether to use 8 bit precision for the base model. Works only with LoRA.
         load_in_4bit (`bool`, *optional*, defaults to `False`):
@@ -137,6 +144,16 @@ class ModelConfig:
             "instead of the original default value of `lora_alpha/r`."
         },
     )
+    use_dora: bool = field(
+        default=False,
+        metadata={
+            "help": "Enable Weight-Decomposed Low-Rank Adaptation (DoRA). This technique decomposes the updates of "
+            "the weights into two parts, magnitude and direction. Direction is handled by normal LoRA, whereas the "
+            "magnitude is handled by a separate learnable parameter. This can improve the performance of LoRA, "
+            "especially at low ranks. Right now, DoRA only supports linear and Conv2D layers. DoRA introduces a "
+            "bigger overhead than pure LoRA, so it is recommended to merge weights for inference."
+        },
+    )
     load_in_8bit: bool = field(
         default=False,
         metadata={"help": "Whether to use 8 bit precision for the base model. Works only with LoRA."},

diff --git a/trl/trainer/utils.py b/trl/trainer/utils.py
@@ -920,6 +920,7 @@ def get_peft_config(model_args: ModelConfig) -> "Optional[PeftConfig]":
         lora_dropout=model_args.lora_dropout,
         bias="none",
         use_rslora=model_args.use_rslora,
+        use_dora=model_args.use_dora,
         modules_to_save=model_args.lora_modules_to_save,
     )