From 4af01fb1c013eefa3d49f09eeb619c339816c8a9 Mon Sep 17 00:00:00 2001 From: pphuc25 Date: Tue, 19 Sep 2023 01:25:04 +0700 Subject: [PATCH 1/6] docs: change assert to raise and some small docs --- .../language-modeling/run_clm_no_trainer.py | 15 +++++++++++---- .../language-modeling/run_mlm_no_trainer.py | 5 +++-- 2 files changed, 14 insertions(+), 6 deletions(-) diff --git a/examples/pytorch/language-modeling/run_clm_no_trainer.py b/examples/pytorch/language-modeling/run_clm_no_trainer.py index 3de3c7219c63..160e660c9789 100755 --- a/examples/pytorch/language-modeling/run_clm_no_trainer.py +++ b/examples/pytorch/language-modeling/run_clm_no_trainer.py @@ -246,13 +246,16 @@ def parse_args(): else: if args.train_file is not None: extension = args.train_file.split(".")[-1] - assert extension in ["csv", "json", "txt"], "`train_file` should be a csv, json or txt file." + if extension not in ["csv", "json", "txt"]: + raise ValueError("`train_file` should be a csv, json or txt file.") if args.validation_file is not None: extension = args.validation_file.split(".")[-1] - assert extension in ["csv", "json", "txt"], "`validation_file` should be a csv, json or txt file." + if extension not in ["csv", "json", "txt"]: + raise ValueError("`validation_file` should be a csv, json or txt file.") if args.push_to_hub: - assert args.output_dir is not None, "Need an `output_dir` to create a repo when `--push_to_hub` is passed." + if args.output_dir is None: + raise ValueError("Need an `output_dir` to create a repo when `--push_to_hub` is passed.") return args @@ -514,6 +517,9 @@ def group_texts(examples): ] optimizer = torch.optim.AdamW(optimizer_grouped_parameters, lr=args.learning_rate) + # Note -> the training dataloader needs to be prepared before we grab his length below (cause its length will be + # shorter in multiprocess) + # Scheduler and math around the number of training steps. overrode_max_train_steps = False num_update_steps_per_epoch = math.ceil(len(train_dataloader) / args.gradient_accumulation_steps) @@ -636,6 +642,7 @@ def group_texts(examples): if args.output_dir is not None: output_dir = os.path.join(args.output_dir, output_dir) accelerator.save_state(output_dir) + if completed_steps >= args.max_train_steps: break @@ -655,7 +662,7 @@ def group_texts(examples): except OverflowError: perplexity = float("inf") - logger.info(f"epoch {epoch}: perplexity: {perplexity} eval_loss: {eval_loss}") + logger.info(f"epoch {epoch}: perplexity: {perplexity} | eval_loss: {eval_loss}") if args.with_tracking: accelerator.log( diff --git a/examples/pytorch/language-modeling/run_mlm_no_trainer.py b/examples/pytorch/language-modeling/run_mlm_no_trainer.py index 3d035fded543..0a9fee6b230c 100755 --- a/examples/pytorch/language-modeling/run_mlm_no_trainer.py +++ b/examples/pytorch/language-modeling/run_mlm_no_trainer.py @@ -261,7 +261,8 @@ def parse_args(): raise ValueError("`validation_file` should be a csv, json or txt file.") if args.push_to_hub: - assert args.output_dir is not None, "Need an `output_dir` to create a repo when `--push_to_hub` is passed." + if args.output_dir is None: + raise ValueError("Need an `output_dir` to create a repo when `--push_to_hub` is passed.") return args @@ -694,7 +695,7 @@ def group_texts(examples): except OverflowError: perplexity = float("inf") - logger.info(f"epoch {epoch}: perplexity: {perplexity}") + logger.info(f"epoch {epoch}: perplexity: {perplexity} | eval_loss: {eval_loss}") if args.with_tracking: accelerator.log( From d9ca970f81e3b59c584fe928057450759ba909b5 Mon Sep 17 00:00:00 2001 From: pphuc25 Date: Tue, 19 Sep 2023 01:57:57 +0700 Subject: [PATCH 2/6] docs: add rule and some document --- examples/pytorch/language-modeling/run_clm_no_trainer.py | 8 +++++--- examples/pytorch/language-modeling/run_mlm_no_trainer.py | 4 +++- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/examples/pytorch/language-modeling/run_clm_no_trainer.py b/examples/pytorch/language-modeling/run_clm_no_trainer.py index 160e660c9789..bdd598e622c0 100755 --- a/examples/pytorch/language-modeling/run_clm_no_trainer.py +++ b/examples/pytorch/language-modeling/run_clm_no_trainer.py @@ -490,9 +490,11 @@ def group_texts(examples): train_dataset = lm_datasets["train"] eval_dataset = lm_datasets["validation"] - # Log a few random samples from the training set: - for index in random.sample(range(len(train_dataset)), 3): - logger.info(f"Sample {index} of the training set: {train_dataset[index]}.") + # Conditional for small test subsets + if len(train_dataset) > 3: + # Log a few random samples from the training set: + for index in random.sample(range(len(train_dataset)), 3): + logger.info(f"Sample {index} of the training set: {train_dataset[index]}.") # DataLoaders creation: train_dataloader = DataLoader( diff --git a/examples/pytorch/language-modeling/run_mlm_no_trainer.py b/examples/pytorch/language-modeling/run_mlm_no_trainer.py index 0a9fee6b230c..0e8c90340980 100755 --- a/examples/pytorch/language-modeling/run_mlm_no_trainer.py +++ b/examples/pytorch/language-modeling/run_mlm_no_trainer.py @@ -533,7 +533,9 @@ def group_texts(examples): train_dataloader = DataLoader( train_dataset, shuffle=True, collate_fn=data_collator, batch_size=args.per_device_train_batch_size ) - eval_dataloader = DataLoader(eval_dataset, collate_fn=data_collator, batch_size=args.per_device_eval_batch_size) + eval_dataloader = DataLoader( + eval_dataset, collate_fn=data_collator, batch_size=args.per_device_eval_batch_size + ) # Optimizer # Split weights in two groups, one with weight decay and the other not. From 3761cec1d64ac02b13178c2e3e4a92297175a019 Mon Sep 17 00:00:00 2001 From: pphuc25 Date: Thu, 21 Sep 2023 23:53:04 +0700 Subject: [PATCH 3/6] fix: fix bug --- src/transformers/models/persimmon/modeling_persimmon.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/transformers/models/persimmon/modeling_persimmon.py b/src/transformers/models/persimmon/modeling_persimmon.py index 5c6cde7f8a6d..654660e4fa9f 100644 --- a/src/transformers/models/persimmon/modeling_persimmon.py +++ b/src/transformers/models/persimmon/modeling_persimmon.py @@ -72,7 +72,7 @@ def _expand_mask(mask: torch.Tensor, dtype: torch.dtype, tgt_len: Optional[int] # Copied from transformers.models.llama.modeling_llama.LlamaRotaryEmbedding with Llama->Persimmon -class PersimmonRotaryEmbedding(torch.nn.Module): +class PersimmonRotaryEmbedding(nn.Module): def __init__(self, dim, max_position_embeddings=2048, base=10000, device=None): super().__init__() From a8c267d23bbbeec9047d140e1d59de2b07e33b3c Mon Sep 17 00:00:00 2001 From: pphuc25 Date: Tue, 26 Sep 2023 12:28:37 +0700 Subject: [PATCH 4/6] fix: fix bug --- examples/pytorch/language-modeling/run_mlm_no_trainer.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/examples/pytorch/language-modeling/run_mlm_no_trainer.py b/examples/pytorch/language-modeling/run_mlm_no_trainer.py index 0e8c90340980..0a9fee6b230c 100755 --- a/examples/pytorch/language-modeling/run_mlm_no_trainer.py +++ b/examples/pytorch/language-modeling/run_mlm_no_trainer.py @@ -533,9 +533,7 @@ def group_texts(examples): train_dataloader = DataLoader( train_dataset, shuffle=True, collate_fn=data_collator, batch_size=args.per_device_train_batch_size ) - eval_dataloader = DataLoader( - eval_dataset, collate_fn=data_collator, batch_size=args.per_device_eval_batch_size - ) + eval_dataloader = DataLoader(eval_dataset, collate_fn=data_collator, batch_size=args.per_device_eval_batch_size) # Optimizer # Split weights in two groups, one with weight decay and the other not. From 1f9b28c2b01b84ae448161d6a2eab9ed4296f7b4 Mon Sep 17 00:00:00 2001 From: pphuc25 Date: Wed, 27 Sep 2023 17:22:06 +0700 Subject: [PATCH 5/6] chorse: revert logging --- examples/pytorch/language-modeling/run_clm_no_trainer.py | 2 +- examples/pytorch/language-modeling/run_mlm_no_trainer.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/examples/pytorch/language-modeling/run_clm_no_trainer.py b/examples/pytorch/language-modeling/run_clm_no_trainer.py index bdd598e622c0..2d0d67e33e8d 100755 --- a/examples/pytorch/language-modeling/run_clm_no_trainer.py +++ b/examples/pytorch/language-modeling/run_clm_no_trainer.py @@ -664,7 +664,7 @@ def group_texts(examples): except OverflowError: perplexity = float("inf") - logger.info(f"epoch {epoch}: perplexity: {perplexity} | eval_loss: {eval_loss}") + logger.info(f"epoch {epoch}: perplexity: {perplexity} eval_loss: {eval_loss}") if args.with_tracking: accelerator.log( diff --git a/examples/pytorch/language-modeling/run_mlm_no_trainer.py b/examples/pytorch/language-modeling/run_mlm_no_trainer.py index 0a9fee6b230c..749810cd31df 100755 --- a/examples/pytorch/language-modeling/run_mlm_no_trainer.py +++ b/examples/pytorch/language-modeling/run_mlm_no_trainer.py @@ -695,7 +695,7 @@ def group_texts(examples): except OverflowError: perplexity = float("inf") - logger.info(f"epoch {epoch}: perplexity: {perplexity} | eval_loss: {eval_loss}") + logger.info(f"epoch {epoch}: perplexity: {perplexity} eval_loss: {eval_loss}") if args.with_tracking: accelerator.log( From 8b6c615365a8ac34df65708b42a420ce86b1e636 Mon Sep 17 00:00:00 2001 From: pphuc25 Date: Wed, 27 Sep 2023 17:44:17 +0700 Subject: [PATCH 6/6] chorse: revert --- .../pytorch/language-modeling/run_clm_no_trainer.py | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/examples/pytorch/language-modeling/run_clm_no_trainer.py b/examples/pytorch/language-modeling/run_clm_no_trainer.py index 2d0d67e33e8d..b02a89e6dfcc 100755 --- a/examples/pytorch/language-modeling/run_clm_no_trainer.py +++ b/examples/pytorch/language-modeling/run_clm_no_trainer.py @@ -490,11 +490,9 @@ def group_texts(examples): train_dataset = lm_datasets["train"] eval_dataset = lm_datasets["validation"] - # Conditional for small test subsets - if len(train_dataset) > 3: - # Log a few random samples from the training set: - for index in random.sample(range(len(train_dataset)), 3): - logger.info(f"Sample {index} of the training set: {train_dataset[index]}.") + # Log a few random samples from the training set: + for index in random.sample(range(len(train_dataset)), 3): + logger.info(f"Sample {index} of the training set: {train_dataset[index]}.") # DataLoaders creation: train_dataloader = DataLoader( @@ -519,9 +517,6 @@ def group_texts(examples): ] optimizer = torch.optim.AdamW(optimizer_grouped_parameters, lr=args.learning_rate) - # Note -> the training dataloader needs to be prepared before we grab his length below (cause its length will be - # shorter in multiprocess) - # Scheduler and math around the number of training steps. overrode_max_train_steps = False num_update_steps_per_epoch = math.ceil(len(train_dataloader) / args.gradient_accumulation_steps) @@ -644,7 +639,6 @@ def group_texts(examples): if args.output_dir is not None: output_dir = os.path.join(args.output_dir, output_dir) accelerator.save_state(output_dir) - if completed_steps >= args.max_train_steps: break