From a1bc9b182b8c2788b54380f786fd57d4ca47afae Mon Sep 17 00:00:00 2001 From: Justin Yu Date: Fri, 9 Jun 2023 11:57:37 -0700 Subject: [PATCH 1/5] Run aws cli as anonymous via --no-sign-request (so gce release test can download without credentials) Signed-off-by: Justin Yu --- doc/source/ray-air/examples/gptj_deepspeed_fine_tuning.ipynb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/ray-air/examples/gptj_deepspeed_fine_tuning.ipynb b/doc/source/ray-air/examples/gptj_deepspeed_fine_tuning.ipynb index fdb7d94b13b57..1e8a405835bf7 100644 --- a/doc/source/ray-air/examples/gptj_deepspeed_fine_tuning.ipynb +++ b/doc/source/ray-air/examples/gptj_deepspeed_fine_tuning.ipynb @@ -223,7 +223,7 @@ " \"aws\",\n", " \"s3\",\n", " \"sync\",\n", - " \"--quiet\",\n", + " \"--no-sign-request\",\n", " \"s3://large-dl-models-mirror/models--EleutherAI--gpt-j-6B/main/\",\n", " os.path.join(path, \"snapshots\", \"main\"),\n", " ]\n", From 2b9ae59960b4ef3167b0f68767753da36d366d5a Mon Sep 17 00:00:00 2001 From: Justin Yu Date: Wed, 14 Jun 2023 13:58:06 -0700 Subject: [PATCH 2/5] Increase timeout for gptj finetuning from 1 hr -> 1.25 hr Signed-off-by: Justin Yu --- release/release_tests.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/release/release_tests.yaml b/release/release_tests.yaml index 2d146904daf1a..6226afcd25e38 100644 --- a/release/release_tests.yaml +++ b/release/release_tests.yaml @@ -810,7 +810,7 @@ cluster_compute: gptj_deepspeed_compute_aws.yaml run: - timeout: 3600 + timeout: 4500 script: python test_myst_doc.py --path gptj_deepspeed_fine_tuning.ipynb variations: From 5f6e70053620e2a0c86e77cb6fbe844d9b8cf95a Mon Sep 17 00:00:00 2001 From: Justin Yu Date: Thu, 15 Jun 2023 16:09:05 -0700 Subject: [PATCH 3/5] Bump datasets version, and fix failure caused by datasets==2.13.0 Signed-off-by: Justin Yu --- .../transformers/_transformers_utils.py | 22 ++----------------- python/requirements/ml/requirements_train.txt | 2 +- 2 files changed, 3 insertions(+), 21 deletions(-) diff --git a/python/ray/train/huggingface/transformers/_transformers_utils.py b/python/ray/train/huggingface/transformers/_transformers_utils.py index e67bb3342b7ac..520f13974d91c 100644 --- a/python/ray/train/huggingface/transformers/_transformers_utils.py +++ b/python/ray/train/huggingface/transformers/_transformers_utils.py @@ -66,30 +66,12 @@ def get_train_dataloader(self): return trainer -# TODO(ml-team): Replace with a Datasets-HuggingFace integration when available. -class RayDatasetHFIterable(datasets.iterable_dataset.ExamplesIterable): - """HF ExamplesIterable backed by a Dataset.""" - - def __init__(self, dataset: DataIterator) -> None: - self.dataset = dataset - self.generate_examples_fn = self.dataset.iter_rows - - # Required for the superclass - self.kwargs = {} - - def __iter__(self): - for row in self.generate_examples_fn(**self.kwargs): - yield (0, {k: v for k, v in row.items()}) - - def process_dataset_for_hf( dataset: DataIterator, disable_transformers_splitting: bool = False ) -> "IterableDataset": """Converts a Ray Dataset into a HF IterableDataset.""" - hf_iterable = RayDatasetHFIterable(dataset) - - iterable_dataset = datasets.iterable_dataset.IterableDataset( - hf_iterable, format_type="torch" + iterable_dataset = datasets.iterable_dataset.IterableDataset.from_generator( + dataset.iter_rows ).with_format("torch") if isinstance(dataset, StreamSplitDataIterator): diff --git a/python/requirements/ml/requirements_train.txt b/python/requirements/ml/requirements_train.txt index 4a6ac751c6212..e63361de044cd 100644 --- a/python/requirements/ml/requirements_train.txt +++ b/python/requirements/ml/requirements_train.txt @@ -22,5 +22,5 @@ accelerate==0.5.1; python_version <= '3.6' accelerate==0.20.3; python_version > '3.6' # Tracking issue: https://github.com/ray-project/ray/issues/34399 deepspeed==0.8.3; python_version > '3.6' -datasets==2.0.0 +datasets==2.13.0; python_version > '3.6' sentencepiece==0.1.96 From 98dae60d674e24e3eda5542148ea21b4919b5c3f Mon Sep 17 00:00:00 2001 From: Justin Yu Date: Thu, 15 Jun 2023 23:52:38 -0700 Subject: [PATCH 4/5] Revert "Bump datasets version, and fix failure caused by datasets==2.13.0" This reverts commit 5f6e70053620e2a0c86e77cb6fbe844d9b8cf95a. Signed-off-by: Justin Yu --- .../transformers/_transformers_utils.py | 22 +++++++++++++++++-- python/requirements/ml/requirements_train.txt | 2 +- 2 files changed, 21 insertions(+), 3 deletions(-) diff --git a/python/ray/train/huggingface/transformers/_transformers_utils.py b/python/ray/train/huggingface/transformers/_transformers_utils.py index 520f13974d91c..e67bb3342b7ac 100644 --- a/python/ray/train/huggingface/transformers/_transformers_utils.py +++ b/python/ray/train/huggingface/transformers/_transformers_utils.py @@ -66,12 +66,30 @@ def get_train_dataloader(self): return trainer +# TODO(ml-team): Replace with a Datasets-HuggingFace integration when available. +class RayDatasetHFIterable(datasets.iterable_dataset.ExamplesIterable): + """HF ExamplesIterable backed by a Dataset.""" + + def __init__(self, dataset: DataIterator) -> None: + self.dataset = dataset + self.generate_examples_fn = self.dataset.iter_rows + + # Required for the superclass + self.kwargs = {} + + def __iter__(self): + for row in self.generate_examples_fn(**self.kwargs): + yield (0, {k: v for k, v in row.items()}) + + def process_dataset_for_hf( dataset: DataIterator, disable_transformers_splitting: bool = False ) -> "IterableDataset": """Converts a Ray Dataset into a HF IterableDataset.""" - iterable_dataset = datasets.iterable_dataset.IterableDataset.from_generator( - dataset.iter_rows + hf_iterable = RayDatasetHFIterable(dataset) + + iterable_dataset = datasets.iterable_dataset.IterableDataset( + hf_iterable, format_type="torch" ).with_format("torch") if isinstance(dataset, StreamSplitDataIterator): diff --git a/python/requirements/ml/requirements_train.txt b/python/requirements/ml/requirements_train.txt index e63361de044cd..4a6ac751c6212 100644 --- a/python/requirements/ml/requirements_train.txt +++ b/python/requirements/ml/requirements_train.txt @@ -22,5 +22,5 @@ accelerate==0.5.1; python_version <= '3.6' accelerate==0.20.3; python_version > '3.6' # Tracking issue: https://github.com/ray-project/ray/issues/34399 deepspeed==0.8.3; python_version > '3.6' -datasets==2.13.0; python_version > '3.6' +datasets==2.0.0 sentencepiece==0.1.96 From 5b0f340022fe9419dd95b7a99a66fb3c2d1fc7c2 Mon Sep 17 00:00:00 2001 From: Justin Yu Date: Fri, 16 Jun 2023 00:25:11 -0700 Subject: [PATCH 5/5] Use _BaseExamplesIterable instead and call super init Signed-off-by: Justin Yu --- .../transformers/_transformers_utils.py | 23 +++++++++++-------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/python/ray/train/huggingface/transformers/_transformers_utils.py b/python/ray/train/huggingface/transformers/_transformers_utils.py index e67bb3342b7ac..32eb13bc0714c 100644 --- a/python/ray/train/huggingface/transformers/_transformers_utils.py +++ b/python/ray/train/huggingface/transformers/_transformers_utils.py @@ -1,6 +1,6 @@ import logging from pathlib import Path -from typing import TYPE_CHECKING, Any, Optional, Tuple, Type +from typing import TYPE_CHECKING, Any, Iterator, Optional, Tuple, Type import datasets.iterable_dataset import transformers.trainer @@ -67,19 +67,22 @@ def get_train_dataloader(self): # TODO(ml-team): Replace with a Datasets-HuggingFace integration when available. -class RayDatasetHFIterable(datasets.iterable_dataset.ExamplesIterable): - """HF ExamplesIterable backed by a Dataset.""" +class RayDatasetHFIterable(datasets.iterable_dataset._BaseExamplesIterable): + """HF ``_BaseExamplesIterable`` backed by a ``ray.data.DataIterator``. + + The other abstract methods of shuffling and sharding the data are not implemented, + since those operations should be done by Ray Data. For example, the dataset + is already sharded to each data parallel worker and is disabled + (see ``wrap_transformers_trainer`` above). + """ def __init__(self, dataset: DataIterator) -> None: + super().__init__() self.dataset = dataset - self.generate_examples_fn = self.dataset.iter_rows - - # Required for the superclass - self.kwargs = {} - def __iter__(self): - for row in self.generate_examples_fn(**self.kwargs): - yield (0, {k: v for k, v in row.items()}) + def __iter__(self) -> Iterator[Tuple[int, dict]]: + for idx, row in enumerate(self.dataset.iter_rows()): + yield (idx, {k: v for k, v in row.items()}) def process_dataset_for_hf(