ray-project · amogkam · May 15, 2023 · May 10, 2023 · May 10, 2023 · May 11, 2023
@@ -126,6 +126,7 @@ Inspecting Metadata
    :toctree: doc/
 
    Dataset.count
+   Dataset.columns
    Dataset.schema
    Dataset.default_batch_format
    Dataset.num_blocks

@@ -2158,6 +2158,39 @@ def schema(self, fetch_if_missing: bool = True) -> Optional["Schema"]:
         else:
             return base_schema
 
+    @ConsumptionAPI(
+        if_more_than_read=True,
+        datasource_metadata="schema",
+        extra_condition="or if ``fetch_if_missing=True`` (the default)",
+        pattern="Time complexity:",
+    )
+    def columns(self, fetch_if_missing: bool = True) -> Optional[List[str]]:
+        """Returns the columns of this Dataset.
+
+        Time complexity: O(1)
+
+        Example:
+            >>> import ray
+            >>> # Create dataset from synthetic data.
+            >>> ds = ray.data.range(1000)
+            >>> ds.columns()
+            ['id']
+
+        Args:
+            fetch_if_missing: If True, synchronously fetch the column names from the
+                schema if it's not known. If False, None is returned if the schema is
+                not known. Default is True.
+
+        Returns:
+            A list of the column names for this Dataset or None if schema is not known
+            and `fetch_if_missing` is False.
+
+        """
+        schema = self.schema(fetch_if_missing=fetch_if_missing)
+        if schema is not None:
+            return schema.names
+        return None
+
     def num_blocks(self) -> int:
         """Return the number of blocks of this dataset.
 
@@ -4361,10 +4394,6 @@ def __del__(self):
             self._current_executor.shutdown()
 
 
-# Backwards compatibility alias.
-Dataset = Dataset
-
-
 @PublicAPI
 class MaterializedDataset(Dataset, Generic[T]):
     """A Dataset materialized in Ray memory, e.g., via `.materialize()`.

@@ -249,6 +249,15 @@ def test_schema_lazy(ray_start_regular_shared):
     assert ds._plan.execute()._num_computed() == 0
 
 
+def test_columns(ray_start_regular_shared):
+    ds = ray.data.range(1)
+    assert ds.columns() == ds.schema().names
+    assert ds.columns() == ["id"]
+
+    ds = ds.map(lambda x: x)
+    assert ds.columns(fetch_if_missing=False) is None
+
+
 def test_schema_repr(ray_start_regular_shared):
     ds = ray.data.from_items([{"text": "spam", "number": 0}])
     # fmt: off