Move model to device before wrapping with FSDP

huggingface · Mar 7, 2025 · 78c7135 · 78c7135
1 parent 5bf13ac
commit 78c7135
Showing 1 changed file with 3 additions and 0 deletions.
diff --git a/optimum/habana/accelerate/accelerator.py b/optimum/habana/accelerate/accelerator.py
@@ -476,6 +476,9 @@ def prepare_model(self, model: torch.nn.Module, device_placement: bool = None, e
                         "limit_all_gathers": fsdp_plugin.limit_all_gathers,
                         "device_id": torch.device("hpu", torch.hpu.current_device()),
                     }
+                    # There's issue with moving view tensors to device within FSDP class [See: https://github.com/pytorch/pytorch/issues/147321]
+                    # Due to above issue, view tensor's may lead to silent incorrent behavior, while pretending to be view they're really not
+                    model = model.to(kwargs["device_id"])
                     model = FSDP(model, **kwargs)
                     if fsdp_plugin.activation_checkpointing:
                         from torch.distributed.algorithms._checkpoint.checkpoint_wrapper import (