Skip to content

Commit

Permalink
docs: add docs for map, and add num procs to load_dataset (huggingfac…
Browse files Browse the repository at this point in the history
  • Loading branch information
pphuc25 authored Nov 16, 2023
1 parent 85fde09 commit 69c9b89
Showing 1 changed file with 3 additions and 1 deletion.
Original file line number Diff line number Diff line change
Expand Up @@ -439,6 +439,7 @@ def main():
data_args.dataset_config_name,
split=data_args.train_split_name,
cache_dir=data_args.dataset_cache_dir,
num_proc=data_args.preprocessing_num_workers,
token=True if model_args.use_auth_token else None,
)

Expand All @@ -448,6 +449,7 @@ def main():
data_args.dataset_config_name,
split=data_args.eval_split_name,
cache_dir=data_args.dataset_cache_dir,
num_proc=data_args.preprocessing_num_workers,
token=True if model_args.use_auth_token else None,
)

Expand Down Expand Up @@ -551,7 +553,7 @@ def prepare_dataset(batch):
prepare_dataset,
remove_columns=next(iter(raw_datasets.values())).column_names,
num_proc=num_workers,
desc="preprocess train dataset",
desc="preprocess train and eval dataset",
)

# filter training data with inputs longer than max_input_length
Expand Down

0 comments on commit 69c9b89

Please sign in to comment.