From 6f40f202334e4457b67abb73dc473d6b4b813c64 Mon Sep 17 00:00:00 2001
From: Alvaro Bartolome <alvarobartt@gmail.com>
Date: Wed, 31 Jan 2024 22:49:41 +0900
Subject: [PATCH] Fix `DPOTrainer` docstrings (#1298)

Some issues were leading the auto-generation of the API reference to fail and the args were overlapped in the documentation page
---
 trl/trainer/dpo_trainer.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/trl/trainer/dpo_trainer.py b/trl/trainer/dpo_trainer.py
index 25462f9c0f..3a27c50889 100644
--- a/trl/trainer/dpo_trainer.py
+++ b/trl/trainer/dpo_trainer.py
@@ -123,11 +123,11 @@ class DPOTrainer(Trainer):
         precompute_ref_log_probs (`bool`, defaults to `False`):
             Flag to precompute reference model log probabilities and evaluation datasets. This is useful if you want to train
             without the reference model and reduce the total GPU memory needed.
-        dataset_num_proc (`Optional[int]`):
+        dataset_num_proc (`Optional[int]`, *optional*):
             The number of workers to use to tokenize the data. Defaults to None.
-        model_init_kwargs: (`Optional[Dict]`, *optional*):
+        model_init_kwargs (`Optional[Dict]`, *optional*):
             Dict of Optional kwargs to pass when instantiating the model from a string
-        ref_model_init_kwargs: (`Optional[Dict]`, *optional*):
+        ref_model_init_kwargs (`Optional[Dict]`, *optional*):
             Dict of Optional kwargs to pass when instantiating the ref model from a string
         model_adapter_name (`str`, defaults to `None`):
             Name of the train target PEFT adapter, when using LoRA with multiple adapters.