[data] Update the strict mode message to be less confusing (ray-proje…

…ct#35185)
architkulkarni · May 16, 2023 · 6df3629 · 6df3629
1 parent 256c035
commit 6df3629
Show file tree

Hide file tree

Showing 7 changed files with 23 additions and 31 deletions.
diff --git a/doc/source/data/faq.rst b/doc/source/data/faq.rst
@@ -287,17 +287,14 @@ Ray Data doesn't perform query optimization, so some manual performance
 tuning may be necessary depending on your use case and data scale. Please see our
 :ref:`performance tuning guide <data_performance_tips>` for more information.
 
-What is strict mode?
-====================
+Migrating to strict mode
+========================
 
 In Ray 2.5, Ray Data by default always requires data schemas, dropping support for
 standalone Python objects. In addition to unification and simplicity benefits, this
 aligns the Ray Data API closer to industry-standard distributed data APIs like Apache
 Spark and also emerging standards for machine learning datasets like HuggingFace.
 
-Migrating to strict mode
-~~~~~~~~~~~~~~~~~~~~~~~~
-
 You can disable strict mode temporarily by setting the environment variable
 ``RAY_DATA_STRICT_MODE=0`` on all cluster processes. Strict mode will not be
 possible to disable in future releases.

diff --git a/python/ray/data/_internal/compute.py b/python/ray/data/_internal/compute.py
@@ -223,7 +223,7 @@ def __init__(
         if legacy_min_size is not None or legacy_max_size is not None:
             if ctx.strict_mode:
                 raise StrictModeError(
-                    "In strict mode, ActorPoolStrategy requires min_size and "
+                    "In Ray 2.5, ActorPoolStrategy requires min_size and "
                     "max_size to be explicit kwargs."
                 )
             else:
@@ -503,7 +503,7 @@ def get_compute(compute_spec: Union[str, ComputeStrategy]) -> ComputeStrategy:
         compute_spec, (TaskPoolStrategy, ActorPoolStrategy)
     ):
         raise StrictModeError(
-            "In strict mode, the compute spec must be either "
+            "In Ray 2.5, the compute spec must be either "
             f"TaskPoolStrategy or ActorPoolStategy, was: {compute_spec}."
         )
     elif not compute_spec or compute_spec == "tasks":

diff --git a/python/ray/data/_internal/planner/map_rows.py b/python/ray/data/_internal/planner/map_rows.py
@@ -30,7 +30,7 @@ def fn(
                     raise StrictModeError(
                         f"Error validating {_truncated_repr(item)}: "
                         "Standalone Python objects are not "
-                        "allowed in strict mode. To return Python objects from map(), "
+                        "allowed in Ray 2.5. To return Python objects from map(), "
                         "wrap them in a dict, e.g., "
                         "return `{'item': item}` instead of just `item`."
                     )

diff --git a/python/ray/data/block.py b/python/ray/data/block.py
@@ -53,14 +53,13 @@
 
 STRICT_MODE_EXPLANATION = (
     colorama.Fore.YELLOW
-    + "[IMPORTANT]: Ray Data strict mode is on by default in Ray 2.5. When in strict "
-    "mode, data schemas are required, standalone Python "
-    "objects are no longer supported, and the default batch format changes to `numpy` "
-    "from `pandas`. To disable strict mode temporarily, set the environment variable "
-    "RAY_DATA_STRICT_MODE=0 on all cluster processes. Strict mode will not be "
-    "possible to disable in future releases.\n\n"
-    "Learn more here: https://docs.ray.io/en/master/data/faq.html#what-is-strict-mode"
-    + colorama.Style.RESET_ALL
+    + "Important: Ray Data requires schemas for all datasets in Ray 2.5. This means "
+    "that standalone Python objects are no longer supported. In addition, the default "
+    "batch format is fixed to NumPy. To revert to legacy behavior temporarily, "
+    "set the "
+    "environment variable RAY_DATA_STRICT_MODE=0 on all cluster processes.\n\n"
+    "Learn more here: https://docs.ray.io/en/master/data/faq.html#"
+    "migrating-to-strict-mode" + colorama.Style.RESET_ALL
 )
 
 
@@ -92,7 +91,7 @@ def _validate_key_fn(
                 "schema '{}'.".format(key, schema)
             )
     elif ctx.strict_mode:
-        raise StrictModeError(f"In strict mode, the key must be a string, was: {key}")
+        raise StrictModeError(f"In Ray 2.5, the key must be a string, was: {key}")
     elif key is None:
         if not is_simple_format:
             raise ValueError(
@@ -161,7 +160,7 @@ def _apply_strict_mode_batch_format(given_batch_format: Optional[str]) -> str:
         if given_batch_format not in VALID_BATCH_FORMATS_STRICT_MODE:
             raise StrictModeError(
                 f"The given batch format {given_batch_format} is not allowed "
-                f"in strict mode (must be one of {VALID_BATCH_FORMATS_STRICT_MODE})."
+                f"in Ray 2.5 (must be one of {VALID_BATCH_FORMATS_STRICT_MODE})."
             )
     return given_batch_format
 
@@ -424,7 +423,7 @@ def batch_to_block(batch: DataBatch) -> Block:
                 raise StrictModeError(
                     f"Error validating {_truncated_repr(batch)}: "
                     "Standalone numpy arrays are not "
-                    "allowed in strict mode. Return a dict of field -> array, "
+                    "allowed in Ray 2.5. Return a dict of field -> array, "
                     "e.g., `{'data': array}` instead of `array`."
                 )
 
@@ -472,7 +471,7 @@ def for_block(block: Block) -> "BlockAccessor[T]":
                 raise StrictModeError(
                     f"Error validating {_truncated_repr(block)}: "
                     "Standalone Python objects are not "
-                    "allowed in strict mode. To use Python objects in a datastream, "
+                    "allowed in Ray 2.5. To use Python objects in a datastream, "
                     "wrap them in a dict of numpy arrays, e.g., "
                     "return `{'item': np.array(batch)}` instead of just `batch`."
                 )

diff --git a/python/ray/data/datastream.py b/python/ray/data/datastream.py
@@ -2609,7 +2609,7 @@ def write_numpy(
         context = DataContext.get_current()
         if context.strict_mode and not column:
             raise StrictModeError(
-                "In strict mode, the column must be specified "
+                "In Ray 2.5, the column must be specified "
                 "(e.g., `write_numpy(column='data')`)."
             )
         column = column or TENSOR_COLUMN_NAME
@@ -4116,9 +4116,7 @@ def _divide(self, block_idx: int) -> ("Datastream", "Datastream"):
     def default_batch_format(self) -> Type:
         context = DataContext.get_current()
         if context.strict_mode:
-            raise StrictModeError(
-                "default_batch_format() is not allowed in strict mode"
-            )
+            raise StrictModeError("default_batch_format() is not allowed in Ray 2.5")
 
         import pandas as pd
         import pyarrow as pa
@@ -4138,7 +4136,7 @@ def default_batch_format(self) -> Type:
     def dataset_format(self) -> BlockFormat:
         context = DataContext.get_current()
         if context.strict_mode:
-            raise StrictModeError("dataset_format() is not allowed in strict mode")
+            raise StrictModeError("dataset_format() is not allowed in Ray 2.5")
 
         if context.use_streaming_executor:
             raise DeprecationWarning(

diff --git a/python/ray/data/read_api.py b/python/ray/data/read_api.py
@@ -245,9 +245,7 @@ def range(n: int, *, parallelism: int = -1) -> Datastream:
 def range_table(n: int, *, parallelism: int = -1) -> Datastream:
     ctx = ray.data.DataContext.get_current()
     if ctx.strict_mode:
-        raise DeprecationWarning(
-            "In strict mode, use range() instead of range_table()."
-        )
+        raise DeprecationWarning("In Ray 2.5, use range() instead of range_table().")
     return read_datasource(
         RangeDatasource(),
         parallelism=parallelism,

diff --git a/python/ray/data/tests/test_pipeline.py b/python/ray/data/tests/test_pipeline.py
@@ -22,19 +22,19 @@ def __init__(self):
         self.infos = []
 
     def warning(self, msg):
-        if "strict mode" in msg:
+        if "STRICT_MODE" in msg:
             return
         self.warnings.append(msg)
         print("warning:", msg)
 
     def info(self, msg):
-        if "strict mode" in msg:
+        if "STRICT_MODE" in msg:
             return
         self.infos.append(msg)
         print("info:", msg)
 
     def debug(self, msg):
-        if "strict mode" in msg:
+        if "STRICT_MODE" in msg:
             return
         print("debug:", msg)