Skip to content

Commit

Permalink
chore: correct update_step and correct gradient_accumulation_steps (h…
Browse files Browse the repository at this point in the history
  • Loading branch information
pphuc25 authored and parambharat committed Sep 26, 2023
1 parent 6abd213 commit 5bd36a6
Show file tree
Hide file tree
Showing 11 changed files with 13 additions and 12 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -477,8 +477,8 @@ def collate_fn(examples):
# need to multiply `gradient_accumulation_steps` to reflect real steps
resume_step = int(training_difference.replace("step_", "")) * args.gradient_accumulation_steps
starting_epoch = resume_step // len(train_dataloader)
completed_steps = resume_step // args.gradient_accumulation_steps
resume_step -= starting_epoch * len(train_dataloader)
completed_steps = resume_step // args.gradient_accumulation_step

# update the progress_bar if load from checkpoint
progress_bar.update(completed_steps)
Expand Down
2 changes: 1 addition & 1 deletion examples/pytorch/image-pretraining/run_mim_no_trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -701,8 +701,8 @@ def preprocess_images(examples):
# need to multiply `gradient_accumulation_steps` to reflect real steps
resume_step = int(training_difference.replace("step_", "")) * args.gradient_accumulation_steps
starting_epoch = resume_step // len(train_dataloader)
resume_step -= starting_epoch * len(train_dataloader)
completed_steps = resume_step // args.gradient_accumulation_steps
resume_step -= starting_epoch * len(train_dataloader)

# update the progress_bar if load from checkpoint
progress_bar.update(completed_steps)
Expand Down
2 changes: 1 addition & 1 deletion examples/pytorch/language-modeling/run_mlm_no_trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -636,8 +636,8 @@ def group_texts(examples):
# need to multiply `gradient_accumulation_steps` to reflect real steps
resume_step = int(training_difference.replace("step_", "")) * args.gradient_accumulation_steps
starting_epoch = resume_step // len(train_dataloader)
resume_step -= starting_epoch * len(train_dataloader)
completed_steps = resume_step // args.gradient_accumulation_steps
resume_step -= starting_epoch * len(train_dataloader)

# update the progress_bar if load from checkpoint
progress_bar.update(completed_steps)
Expand Down
2 changes: 1 addition & 1 deletion examples/pytorch/multiple-choice/run_swag_no_trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -583,8 +583,8 @@ def preprocess_function(examples):
# need to multiply `gradient_accumulation_steps` to reflect real steps
resume_step = int(training_difference.replace("step_", "")) * args.gradient_accumulation_steps
starting_epoch = resume_step // len(train_dataloader)
completed_steps = resume_step // args.gradient_accumulation_steps
resume_step -= starting_epoch * len(train_dataloader)
completed_steps = resume_step // args.gradient_accumulation_stepp

# update the progress_bar if load from checkpoint
progress_bar.update(completed_steps)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -820,8 +820,8 @@ def create_and_fill_np_array(start_or_end_logits, dataset, max_len):
# need to multiply `gradient_accumulation_steps` to reflect real steps
resume_step = int(training_difference.replace("step_", "")) * args.gradient_accumulation_steps
starting_epoch = resume_step // len(train_dataloader)
completed_steps = resume_step // args.gradient_accumulation_steps
resume_step -= starting_epoch * len(train_dataloader)
completed_steps = resume_step // args.gradient_accumulation_stepp

# update the progress_bar if load from checkpoint
progress_bar.update(completed_steps)
Expand Down
5 changes: 3 additions & 2 deletions examples/pytorch/question-answering/run_qa_no_trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -848,10 +848,11 @@ def create_and_fill_np_array(start_or_end_logits, dataset, max_len):
resume_step = None
completed_steps = starting_epoch * num_update_steps_per_epoch
else:
resume_step = int(training_difference.replace("step_", ""))
# need to multiply `gradient_accumulation_steps` to reflect real steps
resume_step = int(training_difference.replace("step_", "")) * args.gradient_accumulation_steps
starting_epoch = resume_step // len(train_dataloader)
completed_steps = resume_step // args.gradient_accumulation_steps
resume_step -= starting_epoch * len(train_dataloader)
completed_steps = resume_step // args.gradient_accumulation_stepp

# update the progress_bar if load from checkpoint
progress_bar.update(completed_steps)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -581,8 +581,8 @@ def preprocess_val(example_batch):
# need to multiply `gradient_accumulation_steps` to reflect real steps
resume_step = int(training_difference.replace("step_", "")) * args.gradient_accumulation_steps
starting_epoch = resume_step // len(train_dataloader)
completed_steps = resume_step // args.gradient_accumulation_steps
resume_step -= starting_epoch * len(train_dataloader)
completed_steps = resume_step // args.gradient_accumulation_stepp

# update the progress_bar if load from checkpoint
progress_bar.update(completed_steps)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -652,8 +652,8 @@ def postprocess_text(preds, labels):
# need to multiply `gradient_accumulation_steps` to reflect real steps
resume_step = int(training_difference.replace("step_", "")) * args.gradient_accumulation_steps
starting_epoch = resume_step // len(train_dataloader)
completed_steps = resume_step // args.gradient_accumulation_steps
resume_step -= starting_epoch * len(train_dataloader)
completed_steps = resume_step // args.gradient_accumulation_stepp

# update the progress_bar if load from checkpoint
progress_bar.update(completed_steps)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -530,8 +530,8 @@ def preprocess_function(examples):
# need to multiply `gradient_accumulation_steps` to reflect real steps
resume_step = int(training_difference.replace("step_", "")) * args.gradient_accumulation_steps
starting_epoch = resume_step // len(train_dataloader)
completed_steps = resume_step // args.gradient_accumulation_steps
resume_step -= starting_epoch * len(train_dataloader)
completed_steps = resume_step // args.gradient_accumulation_step

# update the progress_bar if load from checkpoint
progress_bar.update(completed_steps)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -690,8 +690,8 @@ def compute_metrics():
# need to multiply `gradient_accumulation_steps` to reflect real steps
resume_step = int(training_difference.replace("step_", "")) * args.gradient_accumulation_steps
starting_epoch = resume_step // len(train_dataloader)
completed_steps = resume_step // args.gradient_accumulation_steps
resume_step -= starting_epoch * len(train_dataloader)
completed_steps = resume_step // args.gradient_accumulation_stepp

# update the progress_bar if load from checkpoint
progress_bar.update(completed_steps)
Expand Down
2 changes: 1 addition & 1 deletion examples/pytorch/translation/run_translation_no_trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -633,8 +633,8 @@ def postprocess_text(preds, labels):
# need to multiply `gradient_accumulation_steps` to reflect real steps
resume_step = int(training_difference.replace("step_", "")) * args.gradient_accumulation_steps
starting_epoch = resume_step // len(train_dataloader)
completed_steps = resume_step // args.gradient_accumulation_steps
resume_step -= starting_epoch * len(train_dataloader)
completed_steps = resume_step // args.gradient_accumulation_stepp

# update the progress_bar if load from checkpoint
progress_bar.update(completed_steps)
Expand Down

0 comments on commit 5bd36a6

Please sign in to comment.