Skip to content

Commit

Permalink
[AIR] Move Constants from tune/results.py to air/constants.py (ray-pr…
Browse files Browse the repository at this point in the history
…oject#35404)

Currently we have result-related constants stored in `ray/tune/result.py`, However, our Result object is defined in `ray/air/result.py`. When we are trying to import constants from `ray/tune/result.py` in `ray/air/result.py`, there will be a cyclic import error:
```
import Result -> import ray.tune.result -> ray.tune.init -> import ResultGrid -> import Result
```

This PR only moves the following constants and changed the import path in all affected files accordingly. In the future, we will gradually move the rest constants along with the module class.

Signed-off-by: woshiyyya <xiaoyunxuan1998@gmail.com>
Signed-off-by: Yunxuan Xiao <xiaoyunxuan1998@gmail.com>
  • Loading branch information
woshiyyya authored and scv119 committed Jun 11, 2023
1 parent 8fc5e4c commit d722000
Show file tree
Hide file tree
Showing 41 changed files with 130 additions and 91 deletions.
30 changes: 30 additions & 0 deletions python/ray/air/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,36 @@
# checkpointing.
LAZY_CHECKPOINT_MARKER_FILE = ".lazy_checkpoint_marker"


# The timestamp of when the result is generated.
# Default to when the result is processed by tune.
TIMESTAMP = "timestamp"

# (Auto-filled) Time in seconds this iteration took to run.
# This may be overridden to override the system-computed time difference.
TIME_THIS_ITER_S = "time_this_iter_s"

# (Auto-filled) The index of this training iteration.
TRAINING_ITERATION = "training_iteration"

# File that stores parameters of the trial.
EXPR_PARAM_FILE = "params.json"

# Pickle File that stores parameters of the trial.
EXPR_PARAM_PICKLE_FILE = "params.pkl"

# File that stores the progress of the trial.
EXPR_PROGRESS_FILE = "progress.csv"

# File that stores results of the trial.
EXPR_RESULT_FILE = "result.json"

# File that stores the pickled error file
EXPR_ERROR_PICKLE_FILE = "error.pkl"

# File that stores the error file
EXPR_ERROR_FILE = "error.txt"

# ==================================================
# Environment Variables
# ==================================================
Expand Down
3 changes: 2 additions & 1 deletion python/ray/air/integrations/mlflow.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,9 @@
from ray.air import session
from ray.air._internal.mlflow import _MLflowLoggerUtil
from ray.air._internal import usage as air_usage
from ray.air.constants import TRAINING_ITERATION
from ray.tune.logger import LoggerCallback
from ray.tune.result import TIMESTEPS_TOTAL, TRAINING_ITERATION
from ray.tune.result import TIMESTEPS_TOTAL
from ray.tune.experiment import Trial
from ray.util.annotations import PublicAPI

Expand Down
2 changes: 1 addition & 1 deletion python/ray/train/_internal/checkpoint.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,13 +13,13 @@
from ray.train._internal.utils import construct_path
from ray.train.constants import (
CHECKPOINT_RANK_KEY,
TIMESTAMP,
TRAIN_CHECKPOINT_SUBDIR,
TUNE_CHECKPOINT_ID,
TUNE_INSTALLED,
CHECKPOINT_METADATA_KEY,
LAZY_CHECKPOINT_MARKER_FILE,
)
from ray.air.constants import TIMESTAMP

if TUNE_INSTALLED:
from ray import tune
Expand Down
10 changes: 7 additions & 3 deletions python/ray/train/_internal/session.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,12 @@
import ray
from ray.air._internal.util import StartTraceback, RunnerThread
from ray.air.checkpoint import Checkpoint
from ray.air.constants import _RESULT_FETCH_TIMEOUT, _ERROR_FETCH_TIMEOUT
from ray.air.constants import (
_RESULT_FETCH_TIMEOUT,
_ERROR_FETCH_TIMEOUT,
TIMESTAMP,
TIME_THIS_ITER_S,
)
from ray.data import Dataset, DatasetPipeline
from ray.train._internal.accelerator import Accelerator
from ray.train.constants import (
Expand All @@ -26,11 +31,10 @@
WORKER_HOSTNAME,
WORKER_NODE_IP,
WORKER_PID,
TIME_THIS_ITER_S,
TIME_TOTAL_S,
TIMESTAMP,
LAZY_CHECKPOINT_MARKER_FILE,
)

from ray.train.error import SessionMisuseError
from ray.train.session import _TrainSessionImpl
from ray.util.annotations import DeveloperAPI
Expand Down
7 changes: 0 additions & 7 deletions python/ray/train/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,13 +18,6 @@
)

# Autofilled session.report() metrics. Keys should be consistent with Tune.
# The train provided `TIME_THIS_ITER_S` and `TIMESTAMP` will triumph what's
# auto-filled by Tune session.
# TODO: Combine the following two with tune's, once there is a centralized
# file for both tune/train constants.
TIMESTAMP = "timestamp"
TIME_THIS_ITER_S = "time_this_iter_s"

TIME_TOTAL_S = "_time_total_s"

WORKER_HOSTNAME = "_hostname"
Expand Down
2 changes: 1 addition & 1 deletion python/ray/train/tests/test_examples.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import pytest

from ray.air.config import ScalingConfig
from ray.tune.result import TRAINING_ITERATION
from ray.air.constants import TRAINING_ITERATION

from ray.train.examples.horovod.horovod_example import (
train_func as horovod_torch_train_func,
Expand Down
2 changes: 1 addition & 1 deletion python/ray/train/tests/test_gpu_examples.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from ray.air import Checkpoint, session

from ray.air.config import ScalingConfig
from ray.air.constants import TRAINING_ITERATION
from ray.train.examples.horovod.horovod_example import (
train_func as horovod_torch_train_func,
)
Expand All @@ -20,7 +21,6 @@
)
from ray.train.tensorflow.tensorflow_trainer import TensorflowTrainer
from ray.train.torch.torch_trainer import TorchTrainer
from ray.tune.result import TRAINING_ITERATION


def test_tensorflow_mnist_gpu(ray_start_4_cpus_2_gpus):
Expand Down
2 changes: 1 addition & 1 deletion python/ray/train/tests/test_mosaic_trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,9 @@

from ray.air import session
from ray.air.config import ScalingConfig
from ray.air.constants import TRAINING_ITERATION
import ray.train as train
from ray.train.trainer import TrainingFailedError
from ray.tune.result import TRAINING_ITERATION


scaling_config = ScalingConfig(num_workers=2, use_gpu=False)
Expand Down
10 changes: 6 additions & 4 deletions python/ray/tune/analysis/experiment_analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,12 @@
)
from ray.air._internal.uri_utils import _join_path_or_uri, URI
from ray.air.checkpoint import Checkpoint
from ray.air.constants import (
EXPR_PROGRESS_FILE,
EXPR_RESULT_FILE,
EXPR_PARAM_FILE,
TRAINING_ITERATION,
)
from ray.tune.syncer import SyncConfig
from ray.tune.utils import flatten_dict
from ray.tune.utils.serialization import TuneFunctionDecoder
Expand All @@ -31,11 +37,7 @@
from ray.tune.error import TuneError
from ray.tune.result import (
DEFAULT_METRIC,
EXPR_PROGRESS_FILE,
EXPR_RESULT_FILE,
EXPR_PARAM_FILE,
CONFIG_PREFIX,
TRAINING_ITERATION,
)
from ray.tune.experiment import Trial
from ray.tune.execution.trial_runner import _find_newest_experiment_checkpoint
Expand Down
6 changes: 4 additions & 2 deletions python/ray/tune/automl/board/backend/collector.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,10 @@

from threading import Thread

from ray.air.constants import (
EXPR_PARAM_FILE,
EXPR_RESULT_FILE,
)
from ray.tune.automl.board.common.exception import CollectorError
from ray.tune.automl.board.common.utils import (
parse_json,
Expand All @@ -14,8 +18,6 @@
from ray.tune.result import (
DEFAULT_RESULTS_DIR,
JOB_META_FILE,
EXPR_PARAM_FILE,
EXPR_RESULT_FILE,
EXPR_META_FILE,
)

Expand Down
3 changes: 2 additions & 1 deletion python/ray/tune/cli/commands.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@

import pandas as pd
from pandas.api.types import is_string_dtype, is_numeric_dtype
from ray.air.constants import EXPR_RESULT_FILE
from ray.tune.result import (
DEFAULT_EXPERIMENT_INFO_KEYS,
DEFAULT_RESULT_KEYS,
Expand Down Expand Up @@ -223,7 +224,7 @@ def list_experiments(

for experiment_dir in experiment_folders:
num_trials = sum(
"result.json" in files
EXPR_RESULT_FILE in files
for _, _, files in os.walk(os.path.join(base, experiment_dir))
)

Expand Down
4 changes: 2 additions & 2 deletions python/ray/tune/execution/trial_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,9 @@

import ray
from ray.air._internal.uri_utils import URI
from ray.air.config import CheckpointConfig
from ray.air._internal.checkpoint_manager import CheckpointStorage, _TrackedCheckpoint
from ray.air.config import CheckpointConfig
from ray.air.constants import TIME_THIS_ITER_S
from ray.exceptions import RayTaskError
from ray.tune.error import _TuneStopTrialError, _TuneRestoreError
from ray.tune.execution.experiment_state import (
Expand All @@ -38,7 +39,6 @@
DEBUG_METRICS,
DEFAULT_METRIC,
DONE,
TIME_THIS_ITER_S,
RESULT_DUPLICATE,
SHOULD_CHECKPOINT,
_get_defaults_results_dir,
Expand Down
11 changes: 8 additions & 3 deletions python/ray/tune/experiment/trial.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,12 @@
from ray.air import CheckpointConfig
from ray.air._internal.uri_utils import URI
from ray.air._internal.checkpoint_manager import _TrackedCheckpoint, CheckpointStorage
from ray.air.constants import (
EXPR_ERROR_PICKLE_FILE,
EXPR_ERROR_FILE,
TRAINING_ITERATION,
)

import ray.cloudpickle as cloudpickle
from ray.exceptions import RayActorError, RayTaskError
from ray.tune import TuneError
Expand All @@ -35,7 +41,6 @@
DONE,
NODE_IP,
PID,
TRAINING_ITERATION,
TRIAL_ID,
DEBUG_METRICS,
TRIAL_INFO,
Expand Down Expand Up @@ -924,10 +929,10 @@ def handle_error(self, exc: Optional[Union[TuneError, RayTaskError]] = None):
self.num_failures += 1

if self.local_path:
self.error_filename = "error.txt"
self.error_filename = EXPR_ERROR_FILE
if isinstance(exc, RayTaskError):
# Piping through the actual error to result grid.
self.pickled_error_filename = "error.pkl"
self.pickled_error_filename = EXPR_ERROR_PICKLE_FILE
with open(self.pickled_error_file, "wb") as f:
cloudpickle.dump(exc, f)
with open(self.error_file, "a+") as f:
Expand Down
2 changes: 1 addition & 1 deletion python/ray/tune/experimental/output.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@
DataRow,
)
from ray.air._internal.checkpoint_manager import _TrackedCheckpoint
from ray.air.constants import TRAINING_ITERATION
from ray.tune.callback import Callback
from ray.tune.result import (
AUTO_RESULT_KEYS,
Expand All @@ -52,7 +53,6 @@
MEAN_LOSS,
TIME_TOTAL_S,
TIMESTEPS_TOTAL,
TRAINING_ITERATION,
)
from ray.tune.experiment.trial import Trial

Expand Down
2 changes: 1 addition & 1 deletion python/ray/tune/logger/aim.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,9 @@
import numpy as np
from typing import TYPE_CHECKING, Dict, Optional, List, Union

from ray.air.constants import TRAINING_ITERATION
from ray.tune.logger.logger import LoggerCallback
from ray.tune.result import (
TRAINING_ITERATION,
TIME_TOTAL_S,
TIMESTEPS_TOTAL,
)
Expand Down
2 changes: 1 addition & 1 deletion python/ray/tune/logger/csv.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@

from typing import TYPE_CHECKING, Dict, TextIO

from ray.air.constants import EXPR_PROGRESS_FILE
from ray.tune.logger.logger import _LOGGER_DEPRECATION_WARNING, Logger, LoggerCallback
from ray.tune.result import EXPR_PROGRESS_FILE
from ray.tune.utils import flatten_dict
from ray.util.annotations import Deprecated, PublicAPI

Expand Down
9 changes: 4 additions & 5 deletions python/ray/tune/logger/json.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,15 +5,14 @@

from typing import TYPE_CHECKING, Dict, TextIO

import ray.cloudpickle as cloudpickle

from ray.tune.logger.logger import _LOGGER_DEPRECATION_WARNING, Logger, LoggerCallback
from ray.tune.utils.util import SafeFallbackEncoder
from ray.tune.result import (
from ray.air.constants import (
EXPR_PARAM_FILE,
EXPR_PARAM_PICKLE_FILE,
EXPR_RESULT_FILE,
)
import ray.cloudpickle as cloudpickle
from ray.tune.logger.logger import _LOGGER_DEPRECATION_WARNING, Logger, LoggerCallback
from ray.tune.utils.util import SafeFallbackEncoder
from ray.util.annotations import Deprecated, PublicAPI

if TYPE_CHECKING:
Expand Down
2 changes: 1 addition & 1 deletion python/ray/tune/logger/tensorboardx.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,10 @@

from typing import TYPE_CHECKING, Dict

from ray.air.constants import TRAINING_ITERATION
from ray.tune.logger.logger import _LOGGER_DEPRECATION_WARNING, Logger, LoggerCallback
from ray.util.debug import log_once
from ray.tune.result import (
TRAINING_ITERATION,
TIME_TOTAL_S,
TIMESTEPS_TOTAL,
)
Expand Down
4 changes: 2 additions & 2 deletions python/ray/tune/progress_reporter.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
from ray._private.thirdparty.tabulate.tabulate import tabulate
from ray.experimental.tqdm_ray import safe_print
from ray.air.util.node import _force_on_current_node
from ray.air.constants import EXPR_ERROR_FILE, TRAINING_ITERATION
from ray.tune.callback import Callback
from ray.tune.logger import pretty_print
from ray.tune.result import (
Expand All @@ -33,7 +34,6 @@
PID,
TIME_TOTAL_S,
TIMESTEPS_TOTAL,
TRAINING_ITERATION,
TRIAL_ID,
)
from ray.tune.experiment.trial import DEBUG_PRINT_INTERVAL, Trial, _Location
Expand Down Expand Up @@ -1373,7 +1373,7 @@ def print_result(self, trial: Trial, result: Dict, error: bool, done: bool):
elif has_verbosity(Verbosity.V2_TRIAL_NORM):
metric_name = self._metric or "_metric"
metric_value = result.get(metric_name, -99.0)
error_file = os.path.join(trial.local_path, "error.txt")
error_file = os.path.join(trial.local_path, EXPR_ERROR_FILE)

info = ""
if done:
Expand Down
Loading

0 comments on commit d722000

Please sign in to comment.