From 4eb17b26e77611d4fbcdcbbc20c7bf275eb015c9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Quentin=20Gallou=C3=A9dec?= <45557362+qgallouedec@users.noreply.github.com> Date: Thu, 26 Dec 2024 14:58:53 +0100 Subject: [PATCH] Drop inplace operation for loss computation with gradient accumulation (#35416) Fix inplace loss computation --- src/transformers/trainer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/transformers/trainer.py b/src/transformers/trainer.py index 5957f8025d2..c2327739549 100755 --- a/src/transformers/trainer.py +++ b/src/transformers/trainer.py @@ -3700,7 +3700,7 @@ def training_step( else: # Finally we need to normalize the loss for reporting if num_items_in_batch is None: - loss /= self.args.gradient_accumulation_steps + loss = loss / self.args.gradient_accumulation_steps self.accelerator.backward(loss, **kwargs)