ray-project · krfricke · May 18, 2023 · May 16, 2023 · May 16, 2023 · May 16, 2023
diff --git a/python/ray/air/constants.py b/python/ray/air/constants.py
@@ -70,3 +70,32 @@
     "RAY_AIR_NEW_OUTPUT",
     "RAY_AIR_RICH_LAYOUT",
 }
+
+# The timestamp of when the result is generated.
+# Default to when the result is processed by tune.
+TIMESTAMP = "timestamp"
+
+# (Auto-filled) Time in seconds this iteration took to run.
+# This may be overridden to override the system-computed time difference.
+TIME_THIS_ITER_S = "time_this_iter_s"
+
+# (Auto-filled) The index of this training iteration.
+TRAINING_ITERATION = "training_iteration"
+
+# File that stores parameters of the trial.
+EXPR_PARAM_FILE = "params.json"
+
+# Pickle File that stores parameters of the trial.
+EXPR_PARAM_PICKLE_FILE = "params.pkl"
+
+# File that stores the progress of the trial.
+EXPR_PROGRESS_FILE = "progress.csv"
+
+# File that stores results of the trial.
+EXPR_RESULT_FILE = "result.json"
+
+# File that stores the pickled error file
+EXPR_ERROR_PICKLE_FILE = "error.pkl"
+
+# File that stores the error file
+EXPR_ERROR_FILE = "error.txt"
diff --git a/python/ray/air/integrations/mlflow.py b/python/ray/air/integrations/mlflow.py
@@ -7,8 +7,9 @@
 from ray.air import session
 
 from ray.air._internal.mlflow import _MLflowLoggerUtil
+from ray.air.constants import TRAINING_ITERATION
 from ray.tune.logger import LoggerCallback
-from ray.tune.result import TIMESTEPS_TOTAL, TRAINING_ITERATION
+from ray.tune.result import TIMESTEPS_TOTAL
 from ray.tune.experiment import Trial
 from ray.util.annotations import PublicAPI
 

@@ -13,13 +13,13 @@
 from ray.train._internal.utils import construct_path
 from ray.train.constants import (
     CHECKPOINT_RANK_KEY,
-    TIMESTAMP,
     TRAIN_CHECKPOINT_SUBDIR,
     TUNE_CHECKPOINT_ID,
     TUNE_INSTALLED,
     CHECKPOINT_METADATA_KEY,
     LAZY_CHECKPOINT_MARKER_FILE,
 )
+from ray.air.constants import TIMESTAMP
 
 if TUNE_INSTALLED:
     from ray import tune

@@ -15,7 +15,12 @@
 import ray
 from ray.air._internal.util import StartTraceback, RunnerThread
 from ray.air.checkpoint import Checkpoint
-from ray.air.constants import _RESULT_FETCH_TIMEOUT, _ERROR_FETCH_TIMEOUT
+from ray.air.constants import (
+    _RESULT_FETCH_TIMEOUT,
+    _ERROR_FETCH_TIMEOUT,
+    TIMESTAMP,
+    TIME_THIS_ITER_S,
+)
 from ray.data import Dataset, DatasetPipeline
 from ray.train._internal.accelerator import Accelerator
 from ray.train.constants import (
@@ -26,11 +31,10 @@
     WORKER_HOSTNAME,
     WORKER_NODE_IP,
     WORKER_PID,
-    TIME_THIS_ITER_S,
     TIME_TOTAL_S,
-    TIMESTAMP,
     LAZY_CHECKPOINT_MARKER_FILE,
 )
+
 from ray.train.error import SessionMisuseError
 from ray.train.session import _TrainSessionImpl
 from ray.util.annotations import DeveloperAPI

@@ -18,13 +18,6 @@
 )
 
 # Autofilled session.report() metrics. Keys should be consistent with Tune.
-# The train provided `TIME_THIS_ITER_S` and `TIMESTAMP` will triumph what's
-# auto-filled by Tune session.
-# TODO: Combine the following two with tune's, once there is a centralized
-#  file for both tune/train constants.
-TIMESTAMP = "timestamp"
-TIME_THIS_ITER_S = "time_this_iter_s"
-
 TIME_TOTAL_S = "_time_total_s"
 
 WORKER_HOSTNAME = "_hostname"

@@ -1,7 +1,7 @@
 import pytest
 
 from ray.air.config import ScalingConfig
-from ray.tune.result import TRAINING_ITERATION
+from ray.air.constants import TRAINING_ITERATION
 
 from ray.train.examples.horovod.horovod_example import (
     train_func as horovod_torch_train_func,

@@ -4,6 +4,7 @@
 from ray.air import Checkpoint, session
 
 from ray.air.config import ScalingConfig
+from ray.air.constants import TRAINING_ITERATION
 from ray.train.examples.horovod.horovod_example import (
     train_func as horovod_torch_train_func,
 )
@@ -20,7 +21,6 @@
 )
 from ray.train.tensorflow.tensorflow_trainer import TensorflowTrainer
 from ray.train.torch.torch_trainer import TorchTrainer
-from ray.tune.result import TRAINING_ITERATION
 
 
 def test_tensorflow_mnist_gpu(ray_start_4_cpus_2_gpus):

@@ -8,9 +8,9 @@
 
 from ray.air import session
 from ray.air.config import ScalingConfig
+from ray.air.constants import TRAINING_ITERATION
 import ray.train as train
 from ray.train.trainer import TrainingFailedError
-from ray.tune.result import TRAINING_ITERATION
 
 
 scaling_config = ScalingConfig(num_workers=2, use_gpu=False)

@@ -15,6 +15,12 @@
 )
 from ray.air._internal.uri_utils import _join_path_or_uri, URI
 from ray.air.checkpoint import Checkpoint
+from ray.air.constants import (
+    EXPR_PROGRESS_FILE,
+    EXPR_RESULT_FILE,
+    EXPR_PARAM_FILE,
+    TRAINING_ITERATION,
+)
 from ray.tune.syncer import SyncConfig
 from ray.tune.utils import flatten_dict
 from ray.tune.utils.serialization import TuneFunctionDecoder
@@ -31,11 +37,7 @@
 from ray.tune.error import TuneError
 from ray.tune.result import (
     DEFAULT_METRIC,
-    EXPR_PROGRESS_FILE,
-    EXPR_RESULT_FILE,
-    EXPR_PARAM_FILE,
     CONFIG_PREFIX,
-    TRAINING_ITERATION,
 )
 from ray.tune.experiment import Trial
 from ray.tune.execution.trial_runner import _find_newest_experiment_checkpoint

@@ -4,6 +4,10 @@
 
 from threading import Thread
 
+from ray.air.constants import (
+    EXPR_PARAM_FILE,
+    EXPR_RESULT_FILE,
+)
 from ray.tune.automl.board.common.exception import CollectorError
 from ray.tune.automl.board.common.utils import (
     parse_json,
@@ -14,8 +18,6 @@
 from ray.tune.result import (
     DEFAULT_RESULTS_DIR,
     JOB_META_FILE,
-    EXPR_PARAM_FILE,
-    EXPR_RESULT_FILE,
     EXPR_META_FILE,
 )
 

@@ -10,6 +10,7 @@
 
 import pandas as pd
 from pandas.api.types import is_string_dtype, is_numeric_dtype
+from ray.air.constants import EXPR_RESULT_FILE
 from ray.tune.result import (
     DEFAULT_EXPERIMENT_INFO_KEYS,
     DEFAULT_RESULT_KEYS,
@@ -223,7 +224,7 @@ def list_experiments(
 
     for experiment_dir in experiment_folders:
         num_trials = sum(
-            "result.json" in files
+            EXPR_RESULT_FILE in files
             for _, _, files in os.walk(os.path.join(base, experiment_dir))
         )
 

@@ -12,8 +12,9 @@
 
 import ray
 from ray.air._internal.uri_utils import URI
-from ray.air.config import CheckpointConfig
 from ray.air._internal.checkpoint_manager import CheckpointStorage, _TrackedCheckpoint
+from ray.air.config import CheckpointConfig
+from ray.air.constants import TIME_THIS_ITER_S
 from ray.exceptions import RayTaskError
 from ray.tune.error import _TuneStopTrialError, _TuneRestoreError
 from ray.tune.execution.experiment_state import (
@@ -38,7 +39,6 @@
     DEBUG_METRICS,
     DEFAULT_METRIC,
     DONE,
-    TIME_THIS_ITER_S,
     RESULT_DUPLICATE,
     SHOULD_CHECKPOINT,
     _get_defaults_results_dir,

@@ -20,6 +20,12 @@
 from ray.air import CheckpointConfig
 from ray.air._internal.uri_utils import URI
 from ray.air._internal.checkpoint_manager import _TrackedCheckpoint, CheckpointStorage
+from ray.air.constants import (
+    EXPR_ERROR_PICKLE_FILE,
+    EXPR_ERROR_FILE,
+    TRAINING_ITERATION,
+)
+
 import ray.cloudpickle as cloudpickle
 from ray.exceptions import RayActorError, RayTaskError
 from ray.tune import TuneError
@@ -35,7 +41,6 @@
     DONE,
     NODE_IP,
     PID,
-    TRAINING_ITERATION,
     TRIAL_ID,
     DEBUG_METRICS,
     TRIAL_INFO,
@@ -924,10 +929,10 @@ def handle_error(self, exc: Optional[Union[TuneError, RayTaskError]] = None):
             self.num_failures += 1
 
         if self.local_path:
-            self.error_filename = "error.txt"
+            self.error_filename = EXPR_ERROR_FILE
             if isinstance(exc, RayTaskError):
                 # Piping through the actual error to result grid.
-                self.pickled_error_filename = "error.pkl"
+                self.pickled_error_filename = EXPR_ERROR_PICKLE_FILE
                 with open(self.pickled_error_file, "wb") as f:
                     cloudpickle.dump(exc, f)
             with open(self.error_file, "a+") as f:

@@ -43,6 +43,7 @@
     DataRow,
 )
 from ray.air._internal.checkpoint_manager import _TrackedCheckpoint
+from ray.air.constants import TRAINING_ITERATION
 from ray.tune.callback import Callback
 from ray.tune.result import (
     AUTO_RESULT_KEYS,
@@ -51,7 +52,6 @@
     MEAN_LOSS,
     TIME_TOTAL_S,
     TIMESTEPS_TOTAL,
-    TRAINING_ITERATION,
 )
 from ray.tune.experiment.trial import Trial
 

@@ -3,9 +3,9 @@
 import numpy as np
 from typing import TYPE_CHECKING, Dict, Optional, List, Union
 
+from ray.air.constants import TRAINING_ITERATION
 from ray.tune.logger.logger import LoggerCallback
 from ray.tune.result import (
-    TRAINING_ITERATION,
     TIME_TOTAL_S,
     TIMESTEPS_TOTAL,
 )

@@ -4,8 +4,8 @@
 
 from typing import TYPE_CHECKING, Dict, TextIO
 
+from ray.air.constants import EXPR_PROGRESS_FILE
 from ray.tune.logger.logger import _LOGGER_DEPRECATION_WARNING, Logger, LoggerCallback
-from ray.tune.result import EXPR_PROGRESS_FILE
 from ray.tune.utils import flatten_dict
 from ray.util.annotations import Deprecated, PublicAPI
 

@@ -5,15 +5,14 @@
 
 from typing import TYPE_CHECKING, Dict, TextIO
 
-import ray.cloudpickle as cloudpickle
-
-from ray.tune.logger.logger import _LOGGER_DEPRECATION_WARNING, Logger, LoggerCallback
-from ray.tune.utils.util import SafeFallbackEncoder
-from ray.tune.result import (
+from ray.air.constants import (
     EXPR_PARAM_FILE,
     EXPR_PARAM_PICKLE_FILE,
     EXPR_RESULT_FILE,
 )
+import ray.cloudpickle as cloudpickle
+from ray.tune.logger.logger import _LOGGER_DEPRECATION_WARNING, Logger, LoggerCallback
+from ray.tune.utils.util import SafeFallbackEncoder
 from ray.util.annotations import Deprecated, PublicAPI
 
 if TYPE_CHECKING:

@@ -3,10 +3,10 @@
 
 from typing import TYPE_CHECKING, Dict
 
+from ray.air.constants import TRAINING_ITERATION
 from ray.tune.logger.logger import _LOGGER_DEPRECATION_WARNING, Logger, LoggerCallback
 from ray.util.debug import log_once
 from ray.tune.result import (
-    TRAINING_ITERATION,
     TIME_TOTAL_S,
     TIMESTEPS_TOTAL,
 )

@@ -19,6 +19,7 @@
 from ray._private.thirdparty.tabulate.tabulate import tabulate
 from ray.experimental.tqdm_ray import safe_print
 from ray.air.util.node import _force_on_current_node
+from ray.air.constants import EXPR_ERROR_FILE, TRAINING_ITERATION
 from ray.tune.callback import Callback
 from ray.tune.logger import pretty_print
 from ray.tune.result import (
@@ -33,7 +34,6 @@
     PID,
     TIME_TOTAL_S,
     TIMESTEPS_TOTAL,
-    TRAINING_ITERATION,
     TRIAL_ID,
 )
 from ray.tune.experiment.trial import DEBUG_PRINT_INTERVAL, Trial, _Location
@@ -1373,7 +1373,7 @@ def print_result(self, trial: Trial, result: Dict, error: bool, done: bool):
         elif has_verbosity(Verbosity.V2_TRIAL_NORM):
             metric_name = self._metric or "_metric"
             metric_value = result.get(metric_name, -99.0)
-            error_file = os.path.join(trial.local_path, "error.txt")
+            error_file = os.path.join(trial.local_path, EXPR_ERROR_FILE)
 
             info = ""
             if done:

@@ -1,4 +1,9 @@
 import os
+from ray.air.constants import (
+    TIMESTAMP,
+    TIME_THIS_ITER_S,
+    TRAINING_ITERATION,
+)
 
 # fmt: off
 # __sphinx_doc_begin__
@@ -41,25 +46,15 @@
 # (Optional/Auto-filled) Accumulated number of episodes for this trial.
 EPISODES_TOTAL = "episodes_total"
 
-# The timestamp of when the result is generated.
-# Default to when the result is processed by tune.
-TIMESTAMP = "timestamp"
-
 # Number of timesteps in this iteration.
 TIMESTEPS_THIS_ITER = "timesteps_this_iter"
 
 # (Auto-filled) Accumulated number of timesteps for this entire trial.
 TIMESTEPS_TOTAL = "timesteps_total"
 
-# (Auto-filled) Time in seconds this iteration took to run.
-# This may be overridden to override the system-computed time difference.
-TIME_THIS_ITER_S = "time_this_iter_s"
-
 # (Auto-filled) Accumulated time in seconds for this entire trial.
 TIME_TOTAL_S = "time_total_s"
 
-# (Auto-filled) The index of this training iteration.
-TRAINING_ITERATION = "training_iteration"
 # __sphinx_doc_end__
 # fmt: on
 
@@ -144,17 +139,5 @@ def _get_defaults_results_dir() -> str:
 # by automlboard if exists.
 EXPR_META_FILE = "trial_status.json"
 
-# File that stores parameters of the trial.
-EXPR_PARAM_FILE = "params.json"
-
-# Pickle File that stores parameters of the trial.
-EXPR_PARAM_PICKLE_FILE = "params.pkl"
-
-# File that stores the progress of the trial.
-EXPR_PROGRESS_FILE = "progress.csv"
-
-# File that stores results of the trial.
-EXPR_RESULT_FILE = "result.json"
-
 # Config prefix when using ExperimentAnalysis.
 CONFIG_PREFIX = "config"