Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[AIR] Move Constants from tune/results.py to air/constants.py #35404

Merged
merged 10 commits into from
May 18, 2023
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 29 additions & 0 deletions python/ray/air/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,3 +70,32 @@
"RAY_AIR_NEW_OUTPUT",
"RAY_AIR_RICH_LAYOUT",
}

# The timestamp of when the result is generated.
# Default to when the result is processed by tune.
TIMESTAMP = "timestamp"
woshiyyya marked this conversation as resolved.
Show resolved Hide resolved

# (Auto-filled) Time in seconds this iteration took to run.
# This may be overridden to override the system-computed time difference.
TIME_THIS_ITER_S = "time_this_iter_s"

# (Auto-filled) The index of this training iteration.
TRAINING_ITERATION = "training_iteration"

# File that stores parameters of the trial.
EXPR_PARAM_FILE = "params.json"

# Pickle File that stores parameters of the trial.
EXPR_PARAM_PICKLE_FILE = "params.pkl"

# File that stores the progress of the trial.
EXPR_PROGRESS_FILE = "progress.csv"

# File that stores results of the trial.
EXPR_RESULT_FILE = "result.json"

# File that stores the pickled error file
EXPR_ERROR_PICKLE_FILE = "error.pkl"

# File that stores the error file
EXPR_ERROR_FILE = "error.txt"
3 changes: 2 additions & 1 deletion python/ray/air/integrations/mlflow.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,9 @@
from ray.air import session

from ray.air._internal.mlflow import _MLflowLoggerUtil
from ray.air.constants import TRAINING_ITERATION
from ray.tune.logger import LoggerCallback
from ray.tune.result import TIMESTEPS_TOTAL, TRAINING_ITERATION
from ray.tune.result import TIMESTEPS_TOTAL
from ray.tune.experiment import Trial
from ray.util.annotations import PublicAPI

Expand Down
2 changes: 1 addition & 1 deletion python/ray/train/_internal/checkpoint.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,13 +13,13 @@
from ray.train._internal.utils import construct_path
from ray.train.constants import (
CHECKPOINT_RANK_KEY,
TIMESTAMP,
TRAIN_CHECKPOINT_SUBDIR,
TUNE_CHECKPOINT_ID,
TUNE_INSTALLED,
CHECKPOINT_METADATA_KEY,
LAZY_CHECKPOINT_MARKER_FILE,
)
from ray.air.constants import TIMESTAMP

if TUNE_INSTALLED:
from ray import tune
Expand Down
10 changes: 7 additions & 3 deletions python/ray/train/_internal/session.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,12 @@
import ray
from ray.air._internal.util import StartTraceback, RunnerThread
from ray.air.checkpoint import Checkpoint
from ray.air.constants import _RESULT_FETCH_TIMEOUT, _ERROR_FETCH_TIMEOUT
from ray.air.constants import (
_RESULT_FETCH_TIMEOUT,
_ERROR_FETCH_TIMEOUT,
TIMESTAMP,
TIME_THIS_ITER_S,
)
from ray.data import Dataset, DatasetPipeline
from ray.train._internal.accelerator import Accelerator
from ray.train.constants import (
Expand All @@ -26,11 +31,10 @@
WORKER_HOSTNAME,
WORKER_NODE_IP,
WORKER_PID,
TIME_THIS_ITER_S,
TIME_TOTAL_S,
TIMESTAMP,
LAZY_CHECKPOINT_MARKER_FILE,
)

from ray.train.error import SessionMisuseError
from ray.train.session import _TrainSessionImpl
from ray.util.annotations import DeveloperAPI
Expand Down
7 changes: 0 additions & 7 deletions python/ray/train/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,13 +18,6 @@
)

# Autofilled session.report() metrics. Keys should be consistent with Tune.
# The train provided `TIME_THIS_ITER_S` and `TIMESTAMP` will triumph what's
# auto-filled by Tune session.
# TODO: Combine the following two with tune's, once there is a centralized
# file for both tune/train constants.
TIMESTAMP = "timestamp"
TIME_THIS_ITER_S = "time_this_iter_s"
woshiyyya marked this conversation as resolved.
Show resolved Hide resolved

TIME_TOTAL_S = "_time_total_s"

WORKER_HOSTNAME = "_hostname"
Expand Down
2 changes: 1 addition & 1 deletion python/ray/train/tests/test_examples.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import pytest

from ray.air.config import ScalingConfig
from ray.tune.result import TRAINING_ITERATION
from ray.air.constants import TRAINING_ITERATION

from ray.train.examples.horovod.horovod_example import (
train_func as horovod_torch_train_func,
Expand Down
2 changes: 1 addition & 1 deletion python/ray/train/tests/test_gpu_examples.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from ray.air import Checkpoint, session

from ray.air.config import ScalingConfig
from ray.air.constants import TRAINING_ITERATION
from ray.train.examples.horovod.horovod_example import (
train_func as horovod_torch_train_func,
)
Expand All @@ -20,7 +21,6 @@
)
from ray.train.tensorflow.tensorflow_trainer import TensorflowTrainer
from ray.train.torch.torch_trainer import TorchTrainer
from ray.tune.result import TRAINING_ITERATION


def test_tensorflow_mnist_gpu(ray_start_4_cpus_2_gpus):
Expand Down
2 changes: 1 addition & 1 deletion python/ray/train/tests/test_mosaic_trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,9 @@

from ray.air import session
from ray.air.config import ScalingConfig
from ray.air.constants import TRAINING_ITERATION
import ray.train as train
from ray.train.trainer import TrainingFailedError
from ray.tune.result import TRAINING_ITERATION


scaling_config = ScalingConfig(num_workers=2, use_gpu=False)
Expand Down
10 changes: 6 additions & 4 deletions python/ray/tune/analysis/experiment_analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,12 @@
)
from ray.air._internal.uri_utils import _join_path_or_uri, URI
from ray.air.checkpoint import Checkpoint
from ray.air.constants import (
EXPR_PROGRESS_FILE,
EXPR_RESULT_FILE,
EXPR_PARAM_FILE,
TRAINING_ITERATION,
)
from ray.tune.syncer import SyncConfig
from ray.tune.utils import flatten_dict
from ray.tune.utils.serialization import TuneFunctionDecoder
Expand All @@ -31,11 +37,7 @@
from ray.tune.error import TuneError
from ray.tune.result import (
DEFAULT_METRIC,
EXPR_PROGRESS_FILE,
EXPR_RESULT_FILE,
EXPR_PARAM_FILE,
CONFIG_PREFIX,
TRAINING_ITERATION,
)
from ray.tune.experiment import Trial
from ray.tune.execution.trial_runner import _find_newest_experiment_checkpoint
Expand Down
6 changes: 4 additions & 2 deletions python/ray/tune/automl/board/backend/collector.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,10 @@

from threading import Thread

from ray.air.constants import (
EXPR_PARAM_FILE,
EXPR_RESULT_FILE,
)
from ray.tune.automl.board.common.exception import CollectorError
from ray.tune.automl.board.common.utils import (
parse_json,
Expand All @@ -14,8 +18,6 @@
from ray.tune.result import (
DEFAULT_RESULTS_DIR,
JOB_META_FILE,
EXPR_PARAM_FILE,
EXPR_RESULT_FILE,
EXPR_META_FILE,
)

Expand Down
3 changes: 2 additions & 1 deletion python/ray/tune/cli/commands.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@

import pandas as pd
from pandas.api.types import is_string_dtype, is_numeric_dtype
from ray.air.constants import EXPR_RESULT_FILE
from ray.tune.result import (
DEFAULT_EXPERIMENT_INFO_KEYS,
DEFAULT_RESULT_KEYS,
Expand Down Expand Up @@ -223,7 +224,7 @@ def list_experiments(

for experiment_dir in experiment_folders:
num_trials = sum(
"result.json" in files
EXPR_RESULT_FILE in files
for _, _, files in os.walk(os.path.join(base, experiment_dir))
)

Expand Down
4 changes: 2 additions & 2 deletions python/ray/tune/execution/trial_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,9 @@

import ray
from ray.air._internal.uri_utils import URI
from ray.air.config import CheckpointConfig
from ray.air._internal.checkpoint_manager import CheckpointStorage, _TrackedCheckpoint
from ray.air.config import CheckpointConfig
from ray.air.constants import TIME_THIS_ITER_S
from ray.exceptions import RayTaskError
from ray.tune.error import _TuneStopTrialError, _TuneRestoreError
from ray.tune.execution.experiment_state import (
Expand All @@ -38,7 +39,6 @@
DEBUG_METRICS,
DEFAULT_METRIC,
DONE,
TIME_THIS_ITER_S,
RESULT_DUPLICATE,
SHOULD_CHECKPOINT,
_get_defaults_results_dir,
Expand Down
11 changes: 8 additions & 3 deletions python/ray/tune/experiment/trial.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,12 @@
from ray.air import CheckpointConfig
from ray.air._internal.uri_utils import URI
from ray.air._internal.checkpoint_manager import _TrackedCheckpoint, CheckpointStorage
from ray.air.constants import (
EXPR_ERROR_PICKLE_FILE,
EXPR_ERROR_FILE,
TRAINING_ITERATION,
)

import ray.cloudpickle as cloudpickle
from ray.exceptions import RayActorError, RayTaskError
from ray.tune import TuneError
Expand All @@ -35,7 +41,6 @@
DONE,
NODE_IP,
PID,
TRAINING_ITERATION,
TRIAL_ID,
DEBUG_METRICS,
TRIAL_INFO,
Expand Down Expand Up @@ -924,10 +929,10 @@ def handle_error(self, exc: Optional[Union[TuneError, RayTaskError]] = None):
self.num_failures += 1

if self.local_path:
self.error_filename = "error.txt"
self.error_filename = EXPR_ERROR_FILE
if isinstance(exc, RayTaskError):
# Piping through the actual error to result grid.
self.pickled_error_filename = "error.pkl"
self.pickled_error_filename = EXPR_ERROR_PICKLE_FILE
with open(self.pickled_error_file, "wb") as f:
cloudpickle.dump(exc, f)
with open(self.error_file, "a+") as f:
Expand Down
2 changes: 1 addition & 1 deletion python/ray/tune/experimental/output.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@
DataRow,
)
from ray.air._internal.checkpoint_manager import _TrackedCheckpoint
from ray.air.constants import TRAINING_ITERATION
from ray.tune.callback import Callback
from ray.tune.result import (
AUTO_RESULT_KEYS,
Expand All @@ -51,7 +52,6 @@
MEAN_LOSS,
TIME_TOTAL_S,
TIMESTEPS_TOTAL,
TRAINING_ITERATION,
)
from ray.tune.experiment.trial import Trial

Expand Down
2 changes: 1 addition & 1 deletion python/ray/tune/logger/aim.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,9 @@
import numpy as np
from typing import TYPE_CHECKING, Dict, Optional, List, Union

from ray.air.constants import TRAINING_ITERATION
from ray.tune.logger.logger import LoggerCallback
from ray.tune.result import (
TRAINING_ITERATION,
TIME_TOTAL_S,
TIMESTEPS_TOTAL,
)
Expand Down
2 changes: 1 addition & 1 deletion python/ray/tune/logger/csv.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@

from typing import TYPE_CHECKING, Dict, TextIO

from ray.air.constants import EXPR_PROGRESS_FILE
from ray.tune.logger.logger import _LOGGER_DEPRECATION_WARNING, Logger, LoggerCallback
from ray.tune.result import EXPR_PROGRESS_FILE
from ray.tune.utils import flatten_dict
from ray.util.annotations import Deprecated, PublicAPI

Expand Down
9 changes: 4 additions & 5 deletions python/ray/tune/logger/json.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,15 +5,14 @@

from typing import TYPE_CHECKING, Dict, TextIO

import ray.cloudpickle as cloudpickle

from ray.tune.logger.logger import _LOGGER_DEPRECATION_WARNING, Logger, LoggerCallback
from ray.tune.utils.util import SafeFallbackEncoder
from ray.tune.result import (
from ray.air.constants import (
EXPR_PARAM_FILE,
EXPR_PARAM_PICKLE_FILE,
EXPR_RESULT_FILE,
)
import ray.cloudpickle as cloudpickle
from ray.tune.logger.logger import _LOGGER_DEPRECATION_WARNING, Logger, LoggerCallback
from ray.tune.utils.util import SafeFallbackEncoder
from ray.util.annotations import Deprecated, PublicAPI

if TYPE_CHECKING:
Expand Down
2 changes: 1 addition & 1 deletion python/ray/tune/logger/tensorboardx.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,10 @@

from typing import TYPE_CHECKING, Dict

from ray.air.constants import TRAINING_ITERATION
from ray.tune.logger.logger import _LOGGER_DEPRECATION_WARNING, Logger, LoggerCallback
from ray.util.debug import log_once
from ray.tune.result import (
TRAINING_ITERATION,
TIME_TOTAL_S,
TIMESTEPS_TOTAL,
)
Expand Down
4 changes: 2 additions & 2 deletions python/ray/tune/progress_reporter.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
from ray._private.thirdparty.tabulate.tabulate import tabulate
from ray.experimental.tqdm_ray import safe_print
from ray.air.util.node import _force_on_current_node
from ray.air.constants import EXPR_ERROR_FILE, TRAINING_ITERATION
from ray.tune.callback import Callback
from ray.tune.logger import pretty_print
from ray.tune.result import (
Expand All @@ -33,7 +34,6 @@
PID,
TIME_TOTAL_S,
TIMESTEPS_TOTAL,
TRAINING_ITERATION,
TRIAL_ID,
)
from ray.tune.experiment.trial import DEBUG_PRINT_INTERVAL, Trial, _Location
Expand Down Expand Up @@ -1373,7 +1373,7 @@ def print_result(self, trial: Trial, result: Dict, error: bool, done: bool):
elif has_verbosity(Verbosity.V2_TRIAL_NORM):
metric_name = self._metric or "_metric"
metric_value = result.get(metric_name, -99.0)
error_file = os.path.join(trial.local_path, "error.txt")
error_file = os.path.join(trial.local_path, EXPR_ERROR_FILE)

info = ""
if done:
Expand Down
27 changes: 5 additions & 22 deletions python/ray/tune/result.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,9 @@
import os
from ray.air.constants import (
TIMESTAMP,
TIME_THIS_ITER_S,
TRAINING_ITERATION,
)
woshiyyya marked this conversation as resolved.
Show resolved Hide resolved

# fmt: off
# __sphinx_doc_begin__
Expand Down Expand Up @@ -41,25 +46,15 @@
# (Optional/Auto-filled) Accumulated number of episodes for this trial.
EPISODES_TOTAL = "episodes_total"

# The timestamp of when the result is generated.
# Default to when the result is processed by tune.
TIMESTAMP = "timestamp"

# Number of timesteps in this iteration.
TIMESTEPS_THIS_ITER = "timesteps_this_iter"

# (Auto-filled) Accumulated number of timesteps for this entire trial.
TIMESTEPS_TOTAL = "timesteps_total"

# (Auto-filled) Time in seconds this iteration took to run.
# This may be overridden to override the system-computed time difference.
TIME_THIS_ITER_S = "time_this_iter_s"

# (Auto-filled) Accumulated time in seconds for this entire trial.
TIME_TOTAL_S = "time_total_s"

# (Auto-filled) The index of this training iteration.
TRAINING_ITERATION = "training_iteration"
# __sphinx_doc_end__
# fmt: on

Expand Down Expand Up @@ -144,17 +139,5 @@ def _get_defaults_results_dir() -> str:
# by automlboard if exists.
EXPR_META_FILE = "trial_status.json"

# File that stores parameters of the trial.
EXPR_PARAM_FILE = "params.json"

# Pickle File that stores parameters of the trial.
EXPR_PARAM_PICKLE_FILE = "params.pkl"

# File that stores the progress of the trial.
EXPR_PROGRESS_FILE = "progress.csv"

# File that stores results of the trial.
EXPR_RESULT_FILE = "result.json"

Comment on lines -147 to -158
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we also import these in result.py for backwards compatibility?

Copy link
Member Author

@woshiyyya woshiyyya May 17, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good idea, it should make migrations safer:)

# Config prefix when using ExperimentAnalysis.
CONFIG_PREFIX = "config"
Loading