From 03f99b00a841b1fd1390fa0dc82d3dad8bfe0b36 Mon Sep 17 00:00:00 2001 From: Scott Lee Date: Tue, 23 Jul 2024 12:20:09 -0700 Subject: [PATCH] fix zip docs Signed-off-by: Scott Lee --- python/ray/data/_internal/plan.py | 1 + python/ray/data/dataset.py | 4 +--- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/python/ray/data/_internal/plan.py b/python/ray/data/_internal/plan.py index 6e7dfa7672e3b..0e291e04771f6 100644 --- a/python/ray/data/_internal/plan.py +++ b/python/ray/data/_internal/plan.py @@ -96,6 +96,7 @@ def __repr__(self) -> str: f"ExecutionPlan(" f"dataset_uuid={self._dataset_uuid}, " f"snapshot_operator={self._snapshot_operator}" + f")" ) def get_plan_as_string(self, dataset_cls: Type["Dataset"]) -> str: diff --git a/python/ray/data/dataset.py b/python/ray/data/dataset.py index b2e0eee018292..8e731dd84ac5e 100644 --- a/python/ray/data/dataset.py +++ b/python/ray/data/dataset.py @@ -2256,7 +2256,7 @@ def sort( @PublicAPI(api_group=SMD_API_GROUP) def zip(self, other: "Dataset") -> "Dataset": - """Materialize and zip the columns of this dataset with the columns of another. + """Zip the columns of this dataset with the columns of another. The datasets must have the same number of rows. Their column sets are merged, and any duplicate column names are disambiguated with suffixes like @@ -2277,8 +2277,6 @@ def zip(self, other: "Dataset") -> "Dataset": >>> ds1.zip(ds2).take_batch() {'id': array([0, 1, 2, 3, 4]), 'id_1': array([0, 1, 2, 3, 4])} - Time complexity: O(dataset size / parallelism) - Args: other: The dataset to zip with on the right hand side.