[Data] Update Dataset.zip() docs (#46757)

Update Dataset.zip() docs to remove incorrect statement about materializing the dataset Fix ExecutionPlan.__repr__() string typo Signed-off-by: Scott Lee <sjl@anyscale.com>
ray-project · Jul 24, 2024 · dca3fb5 · dca3fb5
1 parent 9cd160d
commit dca3fb5
Show file tree

Hide file tree

Showing 2 changed files with 2 additions and 3 deletions.
diff --git a/python/ray/data/_internal/plan.py b/python/ray/data/_internal/plan.py
@@ -96,6 +96,7 @@ def __repr__(self) -> str:
             f"ExecutionPlan("
             f"dataset_uuid={self._dataset_uuid}, "
             f"snapshot_operator={self._snapshot_operator}"
+            f")"
         )
 
     def get_plan_as_string(self, dataset_cls: Type["Dataset"]) -> str:

diff --git a/python/ray/data/dataset.py b/python/ray/data/dataset.py
@@ -2256,7 +2256,7 @@ def sort(
 
     @PublicAPI(api_group=SMD_API_GROUP)
     def zip(self, other: "Dataset") -> "Dataset":
-        """Materialize and zip the columns of this dataset with the columns of another.
+        """Zip the columns of this dataset with the columns of another.
 
         The datasets must have the same number of rows. Their column sets are
         merged, and any duplicate column names are disambiguated with suffixes like
@@ -2277,8 +2277,6 @@ def zip(self, other: "Dataset") -> "Dataset":
             >>> ds1.zip(ds2).take_batch()
             {'id': array([0, 1, 2, 3, 4]), 'id_1': array([0, 1, 2, 3, 4])}
 
-        Time complexity: O(dataset size / parallelism)
-
         Args:
             other: The dataset to zip with on the right hand side.