diff --git a/doc/source/data/doc_code/quick_start.py b/doc/source/data/doc_code/quick_start.py
deleted file mode 100644
index 5d63ddbaab557..0000000000000
--- a/doc/source/data/doc_code/quick_start.py
+++ /dev/null
@@ -1,91 +0,0 @@
-# flake8: noqa
-
-# fmt: off
-# __create_from_python_begin__
-import ray
-
-# Create a Dataset of Python objects.
-ds = ray.data.range(10000)
-# -> Dataset(num_blocks=200, num_rows=10000, schema=<class 'int'>)
-
-ds.take(5)
-# -> [0, 1, 2, 3, 4]
-
-ds.schema()
-# <class 'int'>
-
-# Create a Dataset from Python objects, which are held as Arrow records.
-ds = ray.data.from_items([
-        {"sepal.length": 5.1, "sepal.width": 3.5,
-         "petal.length": 1.4, "petal.width": 0.2, "variety": "Setosa"},
-        {"sepal.length": 4.9, "sepal.width": 3.0,
-         "petal.length": 1.4, "petal.width": 0.2, "variety": "Setosa"},
-        {"sepal.length": 4.7, "sepal.width": 3.2,
-         "petal.length": 1.3, "petal.width": 0.2, "variety": "Setosa"},
-     ])
-# Dataset(num_blocks=3, num_rows=3,
-#         schema={sepal.length: float64, sepal.width: float64,
-#                 petal.length: float64, petal.width: float64, variety: object})
-
-ds.show()
-# -> {'sepal.length': 5.1, 'sepal.width': 3.5,
-#     'petal.length': 1.4, 'petal.width': 0.2, 'variety': 'Setosa'}
-# -> {'sepal.length': 4.9, 'sepal.width': 3.0,
-#     'petal.length': 1.4, 'petal.width': 0.2, 'variety': 'Setosa'}
-# -> {'sepal.length': 4.7, 'sepal.width': 3.2,
-#     'petal.length': 1.3, 'petal.width': 0.2, 'variety': 'Setosa'}
-
-ds.schema()
-# -> sepal.length: double
-# -> sepal.width: double
-# -> petal.length: double
-# -> petal.width: double
-# -> variety: string
-# __create_from_python_end__
-# fmt: on
-
-# fmt: off
-# __create_from_files_begin__
-# Create from CSV.
-ds = ray.data.read_csv("s3://anonymous@air-example-data/iris.csv")
-# Dataset(num_blocks=1, num_rows=150,
-#         schema={sepal length (cm): double, sepal width (cm): double, 
-#         petal length (cm): double, petal width (cm): double, target: int64})
-
-# Create from Parquet.
-ds = ray.data.read_parquet("s3://anonymous@air-example-data/iris.parquet")
-# Dataset(num_blocks=1, num_rows=150,
-#         schema={sepal.length: double, sepal.width: double, 
-#         petal.length: double, petal.width: double, variety: string})
-
-# __create_from_files_end__
-# fmt: on
-
-# fmt: off
-# __data_transform_begin__
-import pandas
-
-# Create 10 blocks for parallelism.
-ds = ds.repartition(10)
-# Dataset(num_blocks=10, num_rows=150,
-#         schema={sepal.length: float64, sepal.width: float64,
-#                 petal.length: float64, petal.width: float64, variety: object})
-
-# Find rows with sepal.length < 5.5 and petal.length > 3.5.
-def transform_batch(df: pandas.DataFrame) -> pandas.DataFrame:
-    return df[(df["sepal.length"] < 5.5) & (df["petal.length"] > 3.5)]
-
-transformed_ds = ds.map_batches(transform_batch, batch_format="pandas")
-# Dataset(num_blocks=10, num_rows=3,
-#         schema={sepal.length: float64, sepal.width: float64,
-#                 petal.length: float64, petal.width: float64, variety: object})
-
-transformed_ds.show()
-# -> {'sepal.length': 5.2, 'sepal.width': 2.7,
-#     'petal.length': 3.9, 'petal.width': 1.4, 'variety': 'Versicolor'}
-# -> {'sepal.length': 5.4, 'sepal.width': 3.0,
-#     'petal.length': 4.5, 'petal.width': 1.5, 'variety': 'Versicolor'}
-# -> {'sepal.length': 4.9, 'sepal.width': 2.5,
-#     'petal.length': 4.5, 'petal.width': 1.7, 'variety': 'Virginica'}
-# __data_transform_end__
-# fmt: on
diff --git a/doc/source/ray-overview/getting-started.md b/doc/source/ray-overview/getting-started.md
index c3d9d61dc61b2..8a0fce79f92a0 100644
--- a/doc/source/ray-overview/getting-started.md
+++ b/doc/source/ray-overview/getting-started.md
@@ -4,7 +4,7 @@
 (gentle-intro)=
 
 # Getting Started
-Use Ray to scale applications on your laptop or the cloud. Choose the right guide for your task. 
+Use Ray to scale applications on your laptop or the cloud. Choose the right guide for your task.
 * Scale end-to-end ML applications: [Ray AI Runtime Quickstart](#ray-ai-runtime-quickstart)
 * Scale single ML workloads: [Ray Libraries Quickstart](#ray-libraries-quickstart)
 * Scale general Python applications: [Ray Core Quickstart](#ray-core-quickstart)
@@ -94,49 +94,54 @@ Learn more about Ray AIR
 
 ## Ray Libraries Quickstart
 
-Use individual libraries for single ML workloads, without having to install the full AI Runtime package. Click on the dropdowns for your workload below. 
+Use individual libraries for single ML workloads, without having to install the full AI Runtime package. Click on the dropdowns for your workload below.
 
 `````{dropdown} <img src="images/ray_svg_logo.svg" alt="ray" width="50px"> Data: Scalable Datasets for ML
 :animate: fade-in-slide-down
 
-Ray Data is the standard way to load and exchange data in Ray libraries and applications.
-Ray Data provides basic distributed data transformations such as `map`, `filter`, and `repartition`.
-They are compatible with a variety of file formats, datasources, and distributed frameworks.
+Scale offline inference and training ingest with [Ray Data](data_key_concepts) --
+a data processing library designed for ML.
+
+To learn more, see [Offline batch inference](batch_inference_overview) and
+[Data preprocessing and ingest for ML training](ml_ingest_overview).
 
 ````{note}
-To run this example install Ray Data and Dask:
+To run this example, install Ray Data:
 
 ```bash
-pip install -U "ray[data]" dask
+pip install -U "ray[data]"
 ```
 ````
 
-Get started by creating a Dataset from synthetic data using ``ray.data.range()`` and ``ray.data.from_items()``.
-A Dataset can hold either plain Python objects (schema is a Python type), or Arrow records (schema is Arrow).
+```{testcode}
+from typing import Dict
+import numpy as np
+import ray
 
-```{literalinclude} ../data/doc_code/quick_start.py
-:language: python
-:start-after: __create_from_python_begin__
-:end-before: __create_from_python_end__
-```
+# Create datasets from on-disk files, Python objects, and cloud storage like S3.
+ds = ray.data.read_csv("s3://anonymous@ray-example-data/iris.csv")
 
-Datasets can be created from files on local disk or remote datasources such as S3. Any filesystem 
-[supported by pyarrow](http://arrow.apache.org/docs/python/generated/pyarrow.fs.FileSystem.html) can be used to specify file locations.
-You can also create a ``Dataset`` from existing data in the Ray object store or Ray-compatible distributed DataFrames:
+# Apply functions to transform data. Ray Data executes transformations in parallel.
+def compute_area(batch: Dict[str, np.ndarray]) -> Dict[str, np.ndarray]:
+    length = batch["petal length (cm)"]
+    width = batch["petal width (cm)"]
+    batch["petal area (cm^2)"] = length * width
+    return batch
 
-```{literalinclude} ../data/doc_code/quick_start.py
-:language: python
-:start-after: __create_from_files_begin__
-:end-before: __create_from_files_end__
+transformed_ds = ds.map_batches(compute_area)
+
+# Iterate over batches of data.
+for batch in transformed_ds.iter_batches(batch_size=4):
+    print(batch)
+
+# Save dataset contents to on-disk files or cloud storage.
+transformed_ds.write_parquet("local:///tmp/iris/")
 ```
-Datasets can be transformed in parallel using ``.map()``. 
-Transformations are executed *eagerly* and block until the operation is finished.
-Datasets also supports ``.filter()`` and ``.flat_map()``.
 
-```{literalinclude} ../data/doc_code/quick_start.py
-:language: python
-:start-after: __data_transform_begin__
-:end-before: __data_transform_end__
+```{testoutput}
+:hide:
+
+...
 ```
 
 ```{button-ref}  ../data/data
@@ -152,7 +157,7 @@ Learn more about Ray Data
 :animate: fade-in-slide-down
 
 Ray Train abstracts away the complexity of setting up a distributed training
-system. 
+system.
 
 `````{tab-set}
 
@@ -296,8 +301,8 @@ Learn more about Ray Train
 `````{dropdown} <img src="images/ray_svg_logo.svg" alt="ray" width="50px"> Tune: Hyperparameter Tuning at Scale
 :animate: fade-in-slide-down
 
-[Tune](../tune/index.rst) is a library for hyperparameter tuning at any scale. 
-With Tune, you can launch a multi-node distributed hyperparameter sweep in less than 10 lines of code. 
+[Tune](../tune/index.rst) is a library for hyperparameter tuning at any scale.
+With Tune, you can launch a multi-node distributed hyperparameter sweep in less than 10 lines of code.
 Tune supports any deep learning framework, including PyTorch, TensorFlow, and Keras.
 
 ````{note}
@@ -336,7 +341,7 @@ Learn more about Ray Tune
 `````{dropdown} <img src="images/ray_svg_logo.svg" alt="ray" width="50px"> Serve: Scalable Model Serving
 :animate: fade-in-slide-down
 
-[Ray Serve](../serve/index) is a scalable model-serving library built on Ray. 
+[Ray Serve](../serve/index) is a scalable model-serving library built on Ray.
 
 ````{note}
 To run this example, install Ray Serve and scikit-learn:
@@ -400,7 +405,7 @@ Learn more about Ray RLlib
 ## Ray Core Quickstart
 
 Turn functions and classes easily into Ray tasks and actors,
-for Python and Java, with simple primitives for building and running distributed applications. 
+for Python and Java, with simple primitives for building and running distributed applications.
 
 
 ``````{dropdown} <img src="images/ray_svg_logo.svg" alt="ray" width="50px"> Core: Parallelizing Functions with Ray Tasks
@@ -445,7 +450,7 @@ To run this example, add the [ray-api](https://mvnrepository.com/artifact/io.ray
 ```
 
 Use `Ray.init` to initialize Ray runtime.
-Then use `Ray.task(...).remote()` to convert any Java static method into a Ray task. 
+Then use `Ray.task(...).remote()` to convert any Java static method into a Ray task.
 The task runs asynchronously in a remote worker process. The `remote` method returns an ``ObjectRef``,
 and you can fetch the actual result with ``get``.
 
@@ -461,7 +466,7 @@ public class RayDemo {
     public static int square(int x) {
         return x * x;
     }
-    
+
     public static void main(String[] args) {
         // Intialize Ray runtime.
         Ray.init();
@@ -556,18 +561,18 @@ import java.util.stream.Collectors;
 public class RayDemo {
 
     public static class Counter {
-    
+
         private int value = 0;
-        
+
         public void increment() {
             this.value += 1;
         }
-        
+
         public int read() {
             return this.value;
         }
     }
-        
+
     public static void main(String[] args) {
         // Intialize Ray runtime.
         Ray.init();
@@ -577,7 +582,7 @@ public class RayDemo {
         for (int i = 0; i < 4; i++) {
             counters.add(Ray.actor(Counter::new).remote());
         }
-        
+
         // Invoke the `increment` method on each actor.
         // This will send an actor task to each remote actor.
         for (ActorHandle<Counter> counter : counters) {
@@ -712,12 +717,12 @@ Run the following code.
     def task_running_300_seconds():
         print("Start!")
         time.sleep(300)
-    
+
     @ray.remote
     class Actor:
         def __init__(self):
             print("Actor created")
-    
+
     # Create 2 tasks
     tasks = [task_running_300_seconds.remote() for _ in range(2)]