diff --git a/examples/pytorch/image-classification/run_image_classification_no_trainer.py b/examples/pytorch/image-classification/run_image_classification_no_trainer.py index 8f5c08709446..0c8068d4d45d 100644 --- a/examples/pytorch/image-classification/run_image_classification_no_trainer.py +++ b/examples/pytorch/image-classification/run_image_classification_no_trainer.py @@ -544,7 +544,7 @@ def collate_fn(examples): completed_steps += 1 if isinstance(checkpointing_steps, int): - if completed_steps % checkpointing_steps == 0: + if completed_steps % checkpointing_steps == 0 and accelerator.sync_gradients: output_dir = f"step_{completed_steps}" if args.output_dir is not None: output_dir = os.path.join(args.output_dir, output_dir) diff --git a/examples/pytorch/image-pretraining/run_mim_no_trainer.py b/examples/pytorch/image-pretraining/run_mim_no_trainer.py index f208498e8bfb..e533ddfa8b01 100644 --- a/examples/pytorch/image-pretraining/run_mim_no_trainer.py +++ b/examples/pytorch/image-pretraining/run_mim_no_trainer.py @@ -723,7 +723,7 @@ def preprocess_images(examples): completed_steps += 1 if isinstance(checkpointing_steps, int): - if completed_steps % checkpointing_steps == 0: + if completed_steps % checkpointing_steps == 0 and accelerator.sync_gradients: output_dir = f"step_{completed_steps}" if args.output_dir is not None: output_dir = os.path.join(args.output_dir, output_dir) diff --git a/examples/pytorch/instance-segmentation/run_instance_segmentation_no_trainer.py b/examples/pytorch/instance-segmentation/run_instance_segmentation_no_trainer.py index 73609284da95..1605f607acb0 100644 --- a/examples/pytorch/instance-segmentation/run_instance_segmentation_no_trainer.py +++ b/examples/pytorch/instance-segmentation/run_instance_segmentation_no_trainer.py @@ -639,7 +639,7 @@ def main(): completed_steps += 1 if isinstance(checkpointing_steps, int): - if completed_steps % checkpointing_steps == 0: + if completed_steps % checkpointing_steps == 0 and accelerator.sync_gradients: output_dir = f"step_{completed_steps}" if args.output_dir is not None: output_dir = os.path.join(args.output_dir, output_dir) diff --git a/examples/pytorch/language-modeling/run_clm_no_trainer.py b/examples/pytorch/language-modeling/run_clm_no_trainer.py index 4bba7de4bb60..43ecba5f4d8f 100755 --- a/examples/pytorch/language-modeling/run_clm_no_trainer.py +++ b/examples/pytorch/language-modeling/run_clm_no_trainer.py @@ -638,7 +638,7 @@ def group_texts(examples): completed_steps += 1 if isinstance(checkpointing_steps, int): - if completed_steps % checkpointing_steps == 0: + if completed_steps % checkpointing_steps == 0 and accelerator.sync_gradients: output_dir = f"step_{completed_steps}" if args.output_dir is not None: output_dir = os.path.join(args.output_dir, output_dir) diff --git a/examples/pytorch/language-modeling/run_fim_no_trainer.py b/examples/pytorch/language-modeling/run_fim_no_trainer.py index f70f60b31c6a..2c954a1b6535 100644 --- a/examples/pytorch/language-modeling/run_fim_no_trainer.py +++ b/examples/pytorch/language-modeling/run_fim_no_trainer.py @@ -838,7 +838,7 @@ def apply_fim(examples): completed_steps += 1 if isinstance(checkpointing_steps, int): - if completed_steps % checkpointing_steps == 0: + if completed_steps % checkpointing_steps == 0 and accelerator.sync_gradients: output_dir = f"step_{completed_steps}" if args.output_dir is not None: output_dir = os.path.join(args.output_dir, output_dir) diff --git a/examples/pytorch/language-modeling/run_mlm_no_trainer.py b/examples/pytorch/language-modeling/run_mlm_no_trainer.py index 7d50383cfe72..c98687efadf5 100755 --- a/examples/pytorch/language-modeling/run_mlm_no_trainer.py +++ b/examples/pytorch/language-modeling/run_mlm_no_trainer.py @@ -675,7 +675,7 @@ def group_texts(examples): completed_steps += 1 if isinstance(checkpointing_steps, int): - if completed_steps % checkpointing_steps == 0: + if completed_steps % checkpointing_steps == 0 and accelerator.sync_gradients: output_dir = f"step_{completed_steps}" if args.output_dir is not None: output_dir = os.path.join(args.output_dir, output_dir) diff --git a/examples/pytorch/multiple-choice/run_swag_no_trainer.py b/examples/pytorch/multiple-choice/run_swag_no_trainer.py index a7eb30d47b4c..8f7693ae5b0d 100755 --- a/examples/pytorch/multiple-choice/run_swag_no_trainer.py +++ b/examples/pytorch/multiple-choice/run_swag_no_trainer.py @@ -619,7 +619,7 @@ def preprocess_function(examples): completed_steps += 1 if isinstance(checkpointing_steps, int): - if completed_steps % checkpointing_steps == 0: + if completed_steps % checkpointing_steps == 0 and accelerator.sync_gradients: output_dir = f"step_{completed_steps}" if args.output_dir is not None: output_dir = os.path.join(args.output_dir, output_dir) diff --git a/examples/pytorch/object-detection/run_object_detection_no_trainer.py b/examples/pytorch/object-detection/run_object_detection_no_trainer.py index 7913a3b4c5b2..6de61be63092 100644 --- a/examples/pytorch/object-detection/run_object_detection_no_trainer.py +++ b/examples/pytorch/object-detection/run_object_detection_no_trainer.py @@ -677,7 +677,7 @@ def main(): completed_steps += 1 if isinstance(checkpointing_steps, int): - if completed_steps % checkpointing_steps == 0: + if completed_steps % checkpointing_steps == 0 and accelerator.sync_gradients: output_dir = f"step_{completed_steps}" if args.output_dir is not None: output_dir = os.path.join(args.output_dir, output_dir) diff --git a/examples/pytorch/question-answering/run_qa_beam_search_no_trainer.py b/examples/pytorch/question-answering/run_qa_beam_search_no_trainer.py index f86d5f400459..ee791c0c8ddf 100644 --- a/examples/pytorch/question-answering/run_qa_beam_search_no_trainer.py +++ b/examples/pytorch/question-answering/run_qa_beam_search_no_trainer.py @@ -879,7 +879,7 @@ def create_and_fill_np_array(start_or_end_logits, dataset, max_len): completed_steps += 1 if isinstance(checkpointing_steps, int): - if completed_steps % checkpointing_steps == 0: + if completed_steps % checkpointing_steps == 0 and accelerator.sync_gradients: accelerator.save_state(f"step_{completed_steps}") if completed_steps >= args.max_train_steps: diff --git a/examples/pytorch/question-answering/run_qa_no_trainer.py b/examples/pytorch/question-answering/run_qa_no_trainer.py index 2bca56cad08c..7ae0d488bc40 100755 --- a/examples/pytorch/question-answering/run_qa_no_trainer.py +++ b/examples/pytorch/question-answering/run_qa_no_trainer.py @@ -894,7 +894,7 @@ def create_and_fill_np_array(start_or_end_logits, dataset, max_len): completed_steps += 1 if isinstance(checkpointing_steps, int): - if completed_steps % checkpointing_steps == 0: + if completed_steps % checkpointing_steps == 0 and accelerator.sync_gradients: output_dir = f"step_{completed_steps}" if args.output_dir is not None: output_dir = os.path.join(args.output_dir, output_dir) diff --git a/examples/pytorch/semantic-segmentation/run_semantic_segmentation_no_trainer.py b/examples/pytorch/semantic-segmentation/run_semantic_segmentation_no_trainer.py index 11e9d56c1ac3..35c3744ab5f3 100644 --- a/examples/pytorch/semantic-segmentation/run_semantic_segmentation_no_trainer.py +++ b/examples/pytorch/semantic-segmentation/run_semantic_segmentation_no_trainer.py @@ -516,7 +516,7 @@ def preprocess_batch(example_batch, transforms: A.Compose): completed_steps += 1 if isinstance(checkpointing_steps, int): - if completed_steps % checkpointing_steps == 0: + if completed_steps % checkpointing_steps == 0 and accelerator.sync_gradients: output_dir = f"step_{completed_steps}" if args.output_dir is not None: output_dir = os.path.join(args.output_dir, output_dir) diff --git a/examples/pytorch/summarization/run_summarization_no_trainer.py b/examples/pytorch/summarization/run_summarization_no_trainer.py index 8af76fe80796..36cd590ea5c9 100644 --- a/examples/pytorch/summarization/run_summarization_no_trainer.py +++ b/examples/pytorch/summarization/run_summarization_no_trainer.py @@ -688,7 +688,7 @@ def postprocess_text(preds, labels): completed_steps += 1 if isinstance(checkpointing_steps, int): - if completed_steps % checkpointing_steps == 0: + if completed_steps % checkpointing_steps == 0 and accelerator.sync_gradients: output_dir = f"step_{completed_steps}" if args.output_dir is not None: output_dir = os.path.join(args.output_dir, output_dir) diff --git a/examples/pytorch/text-classification/run_glue_no_trainer.py b/examples/pytorch/text-classification/run_glue_no_trainer.py index 8dfe23dfb330..ac62edbe5e9f 100644 --- a/examples/pytorch/text-classification/run_glue_no_trainer.py +++ b/examples/pytorch/text-classification/run_glue_no_trainer.py @@ -564,7 +564,7 @@ def preprocess_function(examples): completed_steps += 1 if isinstance(checkpointing_steps, int): - if completed_steps % checkpointing_steps == 0: + if completed_steps % checkpointing_steps == 0 and accelerator.sync_gradients: output_dir = f"step_{completed_steps}" if args.output_dir is not None: output_dir = os.path.join(args.output_dir, output_dir) diff --git a/examples/pytorch/token-classification/run_ner_no_trainer.py b/examples/pytorch/token-classification/run_ner_no_trainer.py index 2fa100f635bf..2afb38bb44b4 100755 --- a/examples/pytorch/token-classification/run_ner_no_trainer.py +++ b/examples/pytorch/token-classification/run_ner_no_trainer.py @@ -722,7 +722,7 @@ def compute_metrics(): completed_steps += 1 if isinstance(checkpointing_steps, int): - if completed_steps % checkpointing_steps == 0: + if completed_steps % checkpointing_steps == 0 and accelerator.sync_gradients: output_dir = f"step_{completed_steps}" if args.output_dir is not None: output_dir = os.path.join(args.output_dir, output_dir) diff --git a/examples/pytorch/translation/run_translation_no_trainer.py b/examples/pytorch/translation/run_translation_no_trainer.py index 6d8ac35268a9..97da3f9541d9 100644 --- a/examples/pytorch/translation/run_translation_no_trainer.py +++ b/examples/pytorch/translation/run_translation_no_trainer.py @@ -664,7 +664,7 @@ def postprocess_text(preds, labels): completed_steps += 1 if isinstance(checkpointing_steps, int): - if completed_steps % checkpointing_steps == 0: + if completed_steps % checkpointing_steps == 0 and accelerator.sync_gradients: output_dir = f"step_{completed_steps}" if args.output_dir is not None: output_dir = os.path.join(args.output_dir, output_dir)