Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

docs: change assert to raise and some small docs #26232

Merged
merged 6 commits into from
Sep 28, 2023
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 16 additions & 7 deletions examples/pytorch/language-modeling/run_clm_no_trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -246,13 +246,16 @@ def parse_args():
else:
if args.train_file is not None:
extension = args.train_file.split(".")[-1]
assert extension in ["csv", "json", "txt"], "`train_file` should be a csv, json or txt file."
if extension not in ["csv", "json", "txt"]:
raise ValueError("`train_file` should be a csv, json or txt file.")
if args.validation_file is not None:
extension = args.validation_file.split(".")[-1]
assert extension in ["csv", "json", "txt"], "`validation_file` should be a csv, json or txt file."
if extension not in ["csv", "json", "txt"]:
raise ValueError("`validation_file` should be a csv, json or txt file.")

if args.push_to_hub:
assert args.output_dir is not None, "Need an `output_dir` to create a repo when `--push_to_hub` is passed."
if args.output_dir is None:
raise ValueError("Need an `output_dir` to create a repo when `--push_to_hub` is passed.")

return args

Expand Down Expand Up @@ -487,9 +490,11 @@ def group_texts(examples):
train_dataset = lm_datasets["train"]
eval_dataset = lm_datasets["validation"]

# Log a few random samples from the training set:
for index in random.sample(range(len(train_dataset)), 3):
logger.info(f"Sample {index} of the training set: {train_dataset[index]}.")
# Conditional for small test subsets
if len(train_dataset) > 3:
pphuc25 marked this conversation as resolved.
Show resolved Hide resolved
# Log a few random samples from the training set:
for index in random.sample(range(len(train_dataset)), 3):
logger.info(f"Sample {index} of the training set: {train_dataset[index]}.")

# DataLoaders creation:
train_dataloader = DataLoader(
Expand All @@ -514,6 +519,9 @@ def group_texts(examples):
]
optimizer = torch.optim.AdamW(optimizer_grouped_parameters, lr=args.learning_rate)

# Note -> the training dataloader needs to be prepared before we grab his length below (cause its length will be
# shorter in multiprocess)

pphuc25 marked this conversation as resolved.
Show resolved Hide resolved
# Scheduler and math around the number of training steps.
overrode_max_train_steps = False
num_update_steps_per_epoch = math.ceil(len(train_dataloader) / args.gradient_accumulation_steps)
Expand Down Expand Up @@ -636,6 +644,7 @@ def group_texts(examples):
if args.output_dir is not None:
output_dir = os.path.join(args.output_dir, output_dir)
accelerator.save_state(output_dir)

pphuc25 marked this conversation as resolved.
Show resolved Hide resolved
if completed_steps >= args.max_train_steps:
break

Expand All @@ -655,7 +664,7 @@ def group_texts(examples):
except OverflowError:
perplexity = float("inf")

logger.info(f"epoch {epoch}: perplexity: {perplexity} eval_loss: {eval_loss}")
logger.info(f"epoch {epoch}: perplexity: {perplexity} | eval_loss: {eval_loss}")

if args.with_tracking:
accelerator.log(
Expand Down
9 changes: 6 additions & 3 deletions examples/pytorch/language-modeling/run_mlm_no_trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -261,7 +261,8 @@ def parse_args():
raise ValueError("`validation_file` should be a csv, json or txt file.")

if args.push_to_hub:
assert args.output_dir is not None, "Need an `output_dir` to create a repo when `--push_to_hub` is passed."
if args.output_dir is None:
raise ValueError("Need an `output_dir` to create a repo when `--push_to_hub` is passed.")

return args

Expand Down Expand Up @@ -532,7 +533,9 @@ def group_texts(examples):
train_dataloader = DataLoader(
train_dataset, shuffle=True, collate_fn=data_collator, batch_size=args.per_device_train_batch_size
)
eval_dataloader = DataLoader(eval_dataset, collate_fn=data_collator, batch_size=args.per_device_eval_batch_size)
eval_dataloader = DataLoader(
eval_dataset, collate_fn=data_collator, batch_size=args.per_device_eval_batch_size
)

# Optimizer
# Split weights in two groups, one with weight decay and the other not.
Expand Down Expand Up @@ -694,7 +697,7 @@ def group_texts(examples):
except OverflowError:
perplexity = float("inf")

logger.info(f"epoch {epoch}: perplexity: {perplexity}")
logger.info(f"epoch {epoch}: perplexity: {perplexity} | eval_loss: {eval_loss}")

if args.with_tracking:
accelerator.log(
Expand Down
2 changes: 1 addition & 1 deletion src/transformers/models/persimmon/modeling_persimmon.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ def _expand_mask(mask: torch.Tensor, dtype: torch.dtype, tgt_len: Optional[int]


# Copied from transformers.models.llama.modeling_llama.LlamaRotaryEmbedding with Llama->Persimmon
class PersimmonRotaryEmbedding(torch.nn.Module):
class PersimmonRotaryEmbedding(nn.Module):
pphuc25 marked this conversation as resolved.
Show resolved Hide resolved
def __init__(self, dim, max_position_embeddings=2048, base=10000, device=None):
super().__init__()

Expand Down