✌️ Remove double compute of sum in SFTTrainer (#3001)

Co-authored-by: Quentin Gallouédec <45557362+qgallouedec@users.noreply.github.com>
huggingface · Mar 4, 2025 · ea1d9be · ea1d9be
1 parent 402187b
commit ea1d9be
Showing 1 changed file with 2 additions and 1 deletion.
diff --git a/trl/trainer/sft_trainer.py b/trl/trainer/sft_trainer.py
@@ -488,7 +488,8 @@ def compute_loss(self, model, inputs, return_outputs=False, num_items_in_batch=N
             total_tokens = self.accelerator.gather_for_metrics(total_tokens)
 
             # Compute the mean token accuracy and log it
-            accuracy = (correct_tokens.sum() / total_tokens.sum()).item() if total_tokens.sum() > 0 else 0.0
+            total_sum = total_tokens.sum()
+            accuracy = (correct_tokens.sum() / total_sum).item() if total_sum > 0 else 0.0
             self._metrics[mode]["mean_token_accuracy"].append(accuracy)
 
         return (loss, outputs) if return_outputs else loss