From 71443473b47e6a2ac8dae1cf3bee76b804bb0ca9 Mon Sep 17 00:00:00 2001 From: ZhengHongming888 Date: Fri, 20 Dec 2024 09:54:54 -0800 Subject: [PATCH 1/2] create branch fix_distributed_for_timm --- examples/pytorch-image-models/train_hpu_graph.py | 6 ------ examples/pytorch-image-models/train_hpu_lazy.py | 5 ----- 2 files changed, 11 deletions(-) diff --git a/examples/pytorch-image-models/train_hpu_graph.py b/examples/pytorch-image-models/train_hpu_graph.py index 767142e469..0bcfbe7295 100755 --- a/examples/pytorch-image-models/train_hpu_graph.py +++ b/examples/pytorch-image-models/train_hpu_graph.py @@ -635,10 +635,6 @@ def _parse_args(): return args, args_text -def setup(): - dist.init_process_group(backend="hccl") - - def cleanup(): dist.destroy_process_group() @@ -663,8 +659,6 @@ def main(): device = torch.device("hpu") if args.distributed: - setup() - _logger.info( "Training in distributed mode with multiple processes, 1 device per process." f"Process {args.rank}, total {args.world_size}, device {args.device}." diff --git a/examples/pytorch-image-models/train_hpu_lazy.py b/examples/pytorch-image-models/train_hpu_lazy.py index 834f9ce043..4cba3a47e0 100755 --- a/examples/pytorch-image-models/train_hpu_lazy.py +++ b/examples/pytorch-image-models/train_hpu_lazy.py @@ -637,9 +637,6 @@ def _parse_args(): return args, args_text -def setup(): - dist.init_process_group(backend="hccl") - def cleanup(): dist.destroy_process_group() @@ -665,8 +662,6 @@ def main(): device = torch.device("hpu") if args.distributed: - setup() - _logger.info( "Training in distributed mode with multiple processes, 1 device per process." f"Process {args.rank}, total {args.world_size}, device {args.device}." From f74da395ff38e48e9d7e5487f195dd64a4610200 Mon Sep 17 00:00:00 2001 From: regisss <15324346+regisss@users.noreply.github.com> Date: Mon, 23 Dec 2024 17:06:18 +0000 Subject: [PATCH 2/2] Make style --- examples/pytorch-image-models/train_hpu_lazy.py | 1 - 1 file changed, 1 deletion(-) diff --git a/examples/pytorch-image-models/train_hpu_lazy.py b/examples/pytorch-image-models/train_hpu_lazy.py index 4cba3a47e0..bca523c9b4 100755 --- a/examples/pytorch-image-models/train_hpu_lazy.py +++ b/examples/pytorch-image-models/train_hpu_lazy.py @@ -637,7 +637,6 @@ def _parse_args(): return args, args_text - def cleanup(): dist.destroy_process_group()