From 1246ef9fee1ea4c11881a89f6e492e419be7d69f Mon Sep 17 00:00:00 2001 From: Avnish Narayan <38871737+avnishn@users.noreply.github.com> Date: Thu, 29 Jun 2023 15:49:22 -0700 Subject: [PATCH] [RLlib] RLlib deprecation Notices Part 2 (models/tf, models/torch, base_mode, catalog, modelv2, models/temp_spec_classes, policy/) (#36840) Signed-off-by: Avnish Signed-off-by: e428265 --- rllib/algorithms/callbacks.py | 1 + rllib/models/base_model.py | 9 +++ rllib/models/catalog.py | 3 +- rllib/models/tf/attention_net.py | 19 +++++- rllib/models/tf/complex_input_net.py | 5 ++ rllib/models/tf/fcnet.py | 4 ++ rllib/models/tf/layers/gru_gate.py | 6 ++ .../models/tf/layers/multi_head_attention.py | 6 ++ rllib/models/tf/layers/noisy_layer.py | 6 ++ .../layers/relative_multi_head_attention.py | 6 ++ rllib/models/tf/layers/skip_connection.py | 6 ++ rllib/models/tf/misc.py | 12 ++++ rllib/models/tf/noop.py | 2 + rllib/models/tf/primitives.py | 3 + rllib/models/tf/recurrent_net.py | 13 +++- rllib/models/tf/tf_action_dist.py | 50 ++++++++++++++ rllib/models/tf/tf_distributions.py | 4 +- rllib/models/tf/tf_modelv2.py | 5 ++ rllib/models/tf/visionnet.py | 7 ++ rllib/models/torch/attention_net.py | 8 +++ rllib/models/torch/complex_input_net.py | 6 ++ rllib/models/torch/fcnet.py | 6 ++ rllib/models/torch/mingpt.py | 8 ++- rllib/models/torch/model.py | 4 ++ rllib/models/torch/noop.py | 2 + rllib/models/torch/primitives.py | 6 ++ rllib/models/torch/recurrent_net.py | 13 ++++ rllib/models/torch/torch_action_dist.py | 68 ++++++++++++++++++- rllib/models/torch/torch_modelv2.py | 8 ++- rllib/models/torch/visionnet.py | 4 ++ rllib/models/utils.py | 7 +- rllib/policy/dynamic_tf_policy.py | 10 ++- rllib/policy/eager_tf_policy.py | 11 ++- rllib/policy/policy_template.py | 5 +- rllib/policy/tf_mixins.py | 11 ++- rllib/policy/tf_policy.py | 2 +- rllib/policy/tf_policy_template.py | 10 ++- rllib/policy/torch_mixins.py | 8 ++- rllib/policy/torch_policy.py | 5 +- rllib/utils/exploration/curiosity.py | 4 +- rllib/utils/exploration/epsilon_greedy.py | 4 +- rllib/utils/exploration/exploration.py | 2 +- rllib/utils/exploration/gaussian_noise.py | 4 +- .../exploration/ornstein_uhlenbeck_noise.py | 4 +- rllib/utils/exploration/parameter_noise.py | 4 +- .../exploration/per_worker_epsilon_greedy.py | 4 +- .../exploration/per_worker_gaussian_noise.py | 4 +- .../per_worker_ornstein_uhlenbeck_noise.py | 4 +- rllib/utils/exploration/random.py | 4 +- rllib/utils/exploration/random_encoder.py | 8 +-- .../utils/exploration/slate_epsilon_greedy.py | 4 +- rllib/utils/exploration/slate_soft_q.py | 4 +- rllib/utils/exploration/soft_q.py | 4 +- .../utils/exploration/stochastic_sampling.py | 4 +- rllib/utils/exploration/thompson_sampling.py | 4 +- .../exploration/upper_confidence_bound.py | 4 +- 56 files changed, 371 insertions(+), 68 deletions(-) diff --git a/rllib/algorithms/callbacks.py b/rllib/algorithms/callbacks.py index b74e958b0c238..0659abe4e8b4e 100644 --- a/rllib/algorithms/callbacks.py +++ b/rllib/algorithms/callbacks.py @@ -667,6 +667,7 @@ def on_train_result(self, *, algorithm=None, result: dict, **kwargs) -> None: # This Callback is used by the RE3 exploration strategy. # See rllib/examples/re3_exploration.py for details. +@Deprecated(error=False) class RE3UpdateCallbacks(DefaultCallbacks): """Update input callbacks to mutate batch with states entropy rewards.""" diff --git a/rllib/models/base_model.py b/rllib/models/base_model.py index 1b85c2ad4b866..800f1d05ee69b 100644 --- a/rllib/models/base_model.py +++ b/rllib/models/base_model.py @@ -23,6 +23,8 @@ override, ExperimentalAPI, ) +from ray.rllib.utils.deprecation import deprecation_warning, Deprecated +from ray.util import log_once ForwardOutputType = TensorDict @@ -56,6 +58,10 @@ class RecurrentModel(abc.ABC): """ def __init__(self, name: Optional[str] = None): + if log_once("recurrent_model_deprecation"): + deprecation_warning( + old="ray.rllib.models.base_model.RecurrentModel", + ) self._name = name or self.__class__.__name__ @property @@ -201,6 +207,7 @@ def _update_outputs_and_next_state( return outputs, next_state +@Deprecated(error=False) class Model(RecurrentModel): """A RecurrentModel made non-recurrent by ignoring the input/output states. @@ -299,6 +306,8 @@ class ModelIO(abc.ABC): """ def __init__(self, config: ModelConfig) -> None: + if log_once("rllib_base_model_io_deprecation"): + deprecation_warning(old="ray.rllib.models.base_model.ModelIO") self._config = config @DeveloperAPI diff --git a/rllib/models/catalog.py b/rllib/models/catalog.py index 9d4c9aca522b7..ca6b548a4efd4 100644 --- a/rllib/models/catalog.py +++ b/rllib/models/catalog.py @@ -33,6 +33,7 @@ from ray.rllib.utils.deprecation import ( DEPRECATED_VALUE, deprecation_warning, + Deprecated, ) from ray.rllib.utils.error import UnsupportedSpaceException from ray.rllib.utils.framework import try_import_tf, try_import_torch @@ -200,7 +201,7 @@ # fmt: on -@PublicAPI +@Deprecated(old="rllib.models.catalog.ModelCatalog", error=False) class ModelCatalog: """Registry of models, preprocessors, and action distributions for envs. diff --git a/rllib/models/tf/attention_net.py b/rllib/models/tf/attention_net.py index ebdfded9cb1b6..255541ad632c9 100644 --- a/rllib/models/tf/attention_net.py +++ b/rllib/models/tf/attention_net.py @@ -29,6 +29,8 @@ from ray.rllib.utils.spaces.space_utils import get_base_struct_from_space from ray.rllib.utils.tf_utils import flatten_inputs_to_1d_tensor, one_hot from ray.rllib.utils.typing import ModelConfigDict, TensorType, List +from ray.rllib.utils.deprecation import deprecation_warning +from ray.util import log_once tf1, tf, tfv = try_import_tf() @@ -58,6 +60,10 @@ def __init__( self._output_layer = tf.keras.layers.Dense( out_dim, activation=output_activation ) + if log_once("positionwise_feedforward_tf"): + deprecation_warning( + old="rllib.models.tf.attention_net.PositionwiseFeedforward", + ) def call(self, inputs: TensorType, **kwargs) -> TensorType: del kwargs @@ -98,7 +104,10 @@ def __init__( first of the two layers within the PositionwiseFeedforward. The second layer always has size=`attention_dim`. """ - + if log_once("trxl_net_tf"): + deprecation_warning( + old="rllib.models.tf.attention_net.TrXLNet", + ) super().__init__( observation_space, action_space, num_outputs, model_config, name ) @@ -233,7 +242,8 @@ def __init__( (two GRUs per Transformer unit, one after the MHA, one after the position-wise MLP). """ - + if log_once("gtrxl_net_tf"): + deprecation_warning(old="ray.rllib.models.tf.attention_net.GTrXLNet") super().__init__( observation_space, action_space, num_outputs, model_config, name ) @@ -383,7 +393,10 @@ def __init__( model_config: ModelConfigDict, name: str, ): - + if log_once("attention_wrapper_tf_deprecation"): + deprecation_warning( + old="ray.rllib.models.tf.attention_net.AttentionWrapper" + ) super().__init__(obs_space, action_space, None, model_config, name) self.use_n_prev_actions = model_config["attention_use_n_prev_actions"] diff --git a/rllib/models/tf/complex_input_net.py b/rllib/models/tf/complex_input_net.py index c4ea9ea20e3d0..ccad438e1fb6c 100644 --- a/rllib/models/tf/complex_input_net.py +++ b/rllib/models/tf/complex_input_net.py @@ -12,6 +12,8 @@ from ray.rllib.utils.framework import try_import_tf from ray.rllib.utils.spaces.space_utils import flatten_space from ray.rllib.utils.tf_utils import one_hot +from ray.rllib.utils.deprecation import deprecation_warning +from ray.util import log_once tf1, tf, tfv = try_import_tf() @@ -32,6 +34,9 @@ class ComplexInputNetwork(TFModelV2): """ def __init__(self, obs_space, action_space, num_outputs, model_config, name): + if log_once("rllib_tf_complex_input_net_deprecation"): + deprecation_warning(old="rllib.models.tf.ComplexInputNetwork") + self.original_space = ( obs_space.original_space if hasattr(obs_space, "original_space") diff --git a/rllib/models/tf/fcnet.py b/rllib/models/tf/fcnet.py index cde4de3a81e70..5589bd90a9d01 100644 --- a/rllib/models/tf/fcnet.py +++ b/rllib/models/tf/fcnet.py @@ -8,6 +8,8 @@ from ray.rllib.utils.framework import try_import_tf from ray.rllib.utils.typing import TensorType, List, ModelConfigDict from ray.rllib.utils.annotations import DeveloperAPI +from ray.rllib.utils.deprecation import deprecation_warning +from ray.util import log_once tf1, tf, tfv = try_import_tf() @@ -25,6 +27,8 @@ def __init__( model_config: ModelConfigDict, name: str, ): + if log_once("rllib_models_fcnet_deprecation"): + deprecation_warning(old="ray.rllib.models.tf.fcnet.FullyConnectedNetwork") super(FullyConnectedNetwork, self).__init__( obs_space, action_space, num_outputs, model_config, name ) diff --git a/rllib/models/tf/layers/gru_gate.py b/rllib/models/tf/layers/gru_gate.py index 7ccc0b07376c2..a41b23bbf534a 100644 --- a/rllib/models/tf/layers/gru_gate.py +++ b/rllib/models/tf/layers/gru_gate.py @@ -1,5 +1,7 @@ from ray.rllib.utils.framework import try_import_tf from ray.rllib.utils.typing import TensorType, TensorShape +from ray.rllib.utils.deprecation import deprecation_warning +from ray.util import log_once tf1, tf, tfv = try_import_tf() @@ -8,6 +10,10 @@ class GRUGate(tf.keras.layers.Layer if tf else object): def __init__(self, init_bias: float = 0.0, **kwargs): super().__init__(**kwargs) self._init_bias = init_bias + if log_once("gru_gate"): + deprecation_warning( + old="rllib.models.tf.layers.GRUGate", + ) def build(self, input_shape: TensorShape): h_shape, x_shape = input_shape diff --git a/rllib/models/tf/layers/multi_head_attention.py b/rllib/models/tf/layers/multi_head_attention.py index f9e02f2fd6b51..595608989f0b7 100644 --- a/rllib/models/tf/layers/multi_head_attention.py +++ b/rllib/models/tf/layers/multi_head_attention.py @@ -5,6 +5,8 @@ """ from ray.rllib.utils.framework import try_import_tf from ray.rllib.utils.typing import TensorType +from ray.rllib.utils.deprecation import deprecation_warning +from ray.util import log_once tf1, tf, tfv = try_import_tf() @@ -24,6 +26,10 @@ def __init__(self, out_dim: int, num_heads: int, head_dim: int, **kwargs): self._linear_layer = tf.keras.layers.TimeDistributed( tf.keras.layers.Dense(out_dim, use_bias=False) ) + if log_once("multi_head_attention"): + deprecation_warning( + old="rllib.models.tf.layers.MultiHeadAttention", + ) def call(self, inputs: TensorType) -> TensorType: L = tf.shape(inputs)[1] # length of segment diff --git a/rllib/models/tf/layers/noisy_layer.py b/rllib/models/tf/layers/noisy_layer.py index 790cc3f38efc7..5bc149d5de13b 100644 --- a/rllib/models/tf/layers/noisy_layer.py +++ b/rllib/models/tf/layers/noisy_layer.py @@ -7,6 +7,8 @@ TensorType, TensorShape, ) +from ray.rllib.utils.deprecation import deprecation_warning +from ray.util import log_once tf1, tf, tfv = try_import_tf() @@ -47,6 +49,10 @@ def __init__( self.b = None # Biases. self.sigma_w = None # Noise for weight matrix self.sigma_b = None # Noise for biases. + if log_once("noisy_layer"): + deprecation_warning( + old="rllib.models.tf.layers.NoisyLayer", + ) def build(self, input_shape: TensorShape): in_size = int(input_shape[1]) diff --git a/rllib/models/tf/layers/relative_multi_head_attention.py b/rllib/models/tf/layers/relative_multi_head_attention.py index 9a670ea6b1d80..f88486ff20516 100644 --- a/rllib/models/tf/layers/relative_multi_head_attention.py +++ b/rllib/models/tf/layers/relative_multi_head_attention.py @@ -2,6 +2,8 @@ from ray.rllib.utils.framework import try_import_tf from ray.rllib.utils.typing import TensorType +from ray.rllib.utils.deprecation import deprecation_warning +from ray.util import log_once tf1, tf, tfv = try_import_tf() @@ -36,6 +38,10 @@ def __init__( activation function. Should be relu for GTrXL. **kwargs: """ + if log_once("relative_multi_head_attention"): + deprecation_warning( + old="rllib.models.tf.layers.RelativeMultiHeadAttention", + ) super().__init__(**kwargs) # No bias or non-linearity. diff --git a/rllib/models/tf/layers/skip_connection.py b/rllib/models/tf/layers/skip_connection.py index dbf3f12945cdc..3ee1751caf36e 100644 --- a/rllib/models/tf/layers/skip_connection.py +++ b/rllib/models/tf/layers/skip_connection.py @@ -2,6 +2,8 @@ from ray.rllib.utils.framework import try_import_tf from ray.rllib.utils.typing import TensorType +from ray.rllib.utils.deprecation import deprecation_warning +from ray.util import log_once tf1, tf, tfv = try_import_tf() @@ -22,6 +24,10 @@ def __init__(self, layer: Any, fan_in_layer: Optional[Any] = None, **kwargs): layer taking two inputs: The original input and the output of `layer`. """ + if log_once("skip_connection"): + deprecation_warning( + old="rllib.models.tf.layers.SkipConnection", + ) super().__init__(**kwargs) self._layer = layer self._fan_in_layer = fan_in_layer diff --git a/rllib/models/tf/misc.py b/rllib/models/tf/misc.py index 2e293917b94bb..8d1bfc7c6ab1f 100644 --- a/rllib/models/tf/misc.py +++ b/rllib/models/tf/misc.py @@ -4,12 +4,17 @@ from ray.rllib.utils.annotations import DeveloperAPI from ray.rllib.utils.framework import try_import_tf from ray.rllib.utils.typing import TensorType +from ray.rllib.utils.deprecation import deprecation_warning +from ray.util import log_once tf1, tf, tfv = try_import_tf() @DeveloperAPI def normc_initializer(std: float = 1.0) -> Any: + if log_once("rllib_models_normc_initializer_tf_deprecation"): + deprecation_warning(old="ray.rllib.models.tf.misc.normc_initializer") + def _initializer(shape, dtype=None, partition_info=None): out = np.random.randn(*shape).astype( dtype.name if hasattr(dtype, "name") else dtype or np.float32 @@ -31,6 +36,9 @@ def conv2d( dtype: Optional[Any] = None, collections: Optional[Any] = None, ) -> TensorType: + if log_once("rllib_models_conv2d_tf_deprecation"): + deprecation_warning(old="ray.rllib.models.tf.misc.conv2d") + if dtype is None: dtype = tf.float32 @@ -76,6 +84,8 @@ def linear( initializer: Optional[Any] = None, bias_init: float = 0.0, ) -> TensorType: + if log_once("rllib_models_linear_tf_deprecation"): + deprecation_warning(old="ray.rllib.models.tf.misc.linear") w = tf1.get_variable(name + "/w", [x.get_shape()[1], size], initializer=initializer) b = tf1.get_variable( name + "/b", [size], initializer=tf1.constant_initializer(bias_init) @@ -85,4 +95,6 @@ def linear( @DeveloperAPI def flatten(x: TensorType) -> TensorType: + if log_once("rllib_models_flatten_tf_deprecation"): + deprecation_warning(old="ray.rllib.models.tf.misc.flatten") return tf.reshape(x, [-1, np.prod(x.get_shape().as_list()[1:])]) diff --git a/rllib/models/tf/noop.py b/rllib/models/tf/noop.py index 83e6453202ff3..9d2283023f705 100644 --- a/rllib/models/tf/noop.py +++ b/rllib/models/tf/noop.py @@ -2,10 +2,12 @@ from ray.rllib.models.tf.tf_modelv2 import TFModelV2 from ray.rllib.utils.annotations import override from ray.rllib.utils.framework import try_import_tf +from ray.rllib.utils.deprecation import Deprecated _, tf, _ = try_import_tf() +@Deprecated(error=False) class NoopModel(TFModelV2): """Trivial model that just returns the obs flattened. diff --git a/rllib/models/tf/primitives.py b/rllib/models/tf/primitives.py index 395ce98631355..0914f10f49408 100644 --- a/rllib/models/tf/primitives.py +++ b/rllib/models/tf/primitives.py @@ -1,12 +1,14 @@ from typing import List from ray.rllib.utils.annotations import override from ray.rllib.utils.framework import try_import_tf +from ray.rllib.utils.deprecation import Deprecated _, tf, _ = try_import_tf() # TODO (Kourosh): Find a better hierarchy for the primitives after the POC is done. +@Deprecated(error=False) class FCNet(tf.keras.Model): """A simple fully connected network. @@ -47,6 +49,7 @@ def call(self, inputs, training=None, mask=None): return self.network(inputs) +@Deprecated(error=False) class IdentityNetwork(tf.keras.Model): """A network that returns the input as the output.""" diff --git a/rllib/models/tf/recurrent_net.py b/rllib/models/tf/recurrent_net.py index 420128a1f2f72..7cb1e660dcfac 100644 --- a/rllib/models/tf/recurrent_net.py +++ b/rllib/models/tf/recurrent_net.py @@ -15,6 +15,8 @@ from ray.rllib.utils.spaces.space_utils import get_base_struct_from_space from ray.rllib.utils.tf_utils import flatten_inputs_to_1d_tensor, one_hot from ray.rllib.utils.typing import ModelConfigDict, TensorType +from ray.rllib.utils.deprecation import deprecation_warning +from ray.util.debug import log_once tf1, tf, tfv = try_import_tf() logger = logging.getLogger(__name__) @@ -67,6 +69,14 @@ def forward( """Adds time dimension to batch before sending inputs to forward_rnn(). You should implement forward_rnn() in your subclass.""" + # Creating a __init__ function that acts as a passthrough and adding the warning + # there led to errors probably due to the multiple inheritance. We encountered + # the same error if we add the Deprecated decorator. We therefore add the + # deprecation warning here. + if log_once("recurrent_network_tf"): + deprecation_warning( + old="ray.rllib.models.tf.recurrent_net.RecurrentNetwork" + ) assert seq_lens is not None flat_inputs = input_dict["obs_flat"] inputs = add_time_dimension( @@ -131,7 +141,8 @@ def __init__( model_config: ModelConfigDict, name: str, ): - + if log_once("lstm_wrapper_tf"): + deprecation_warning(old="ray.rllib.models.tf.recurrent_net.LSTMWrapper") super(LSTMWrapper, self).__init__( obs_space, action_space, None, model_config, name ) diff --git a/rllib/models/tf/tf_action_dist.py b/rllib/models/tf/tf_action_dist.py index a28ac4cce4225..9ce5ba44435ab 100644 --- a/rllib/models/tf/tf_action_dist.py +++ b/rllib/models/tf/tf_action_dist.py @@ -12,6 +12,8 @@ from ray.rllib.utils.framework import try_import_tf, try_import_tfp from ray.rllib.utils.spaces.space_utils import get_base_struct_from_space from ray.rllib.utils.typing import TensorType, List, Union, Tuple, ModelConfigDict +from ray.rllib.utils.deprecation import deprecation_warning +from ray.util import log_once tf1, tf, tfv = try_import_tf() tfp = try_import_tfp() @@ -23,6 +25,11 @@ class TFActionDistribution(ActionDistribution): @override(ActionDistribution) def __init__(self, inputs: List[TensorType], model: ModelV2): + if log_once("tf_action_dist_deprecation"): + deprecation_warning( + old="ray.rllib.models.tf.tf_action_dist.TFActionDistribution", + new="ray.rllib.models.tf.tf_distributions.TfDistribution", + ) super().__init__(inputs, model) self.sample_op = self._build_sample_op() self.sampled_action_logp_op = self.logp(self.sample_op) @@ -53,6 +60,11 @@ class Categorical(TFActionDistribution): def __init__( self, inputs: List[TensorType], model: ModelV2 = None, temperature: float = 1.0 ): + if log_once("tf_action_dist_categorical_deprecation"): + deprecation_warning( + old="ray.rllib.models.tf.tf_action_dist.Categorical", + new="ray.rllib.models.tf.tf_distributions.Categorical", + ) assert temperature > 0.0, "Categorical `temperature` must be > 0.0!" # Allow softmax formula w/ temperature != 1.0: # Divide inputs by temperature. @@ -100,6 +112,14 @@ def required_model_output_shape(action_space, model_config): @DeveloperAPI def get_categorical_class_with_temperature(t: float): """Categorical distribution class that has customized default temperature.""" + if log_once("tf_action_dist_categorical_w_temp_deprecation"): + deprecation_warning( + old=( + "ray.rllib.models.tf.tf_action_dist.get_categorical_class_with" + "_temperature" + ), + new="ray.rllib.models.tf.tf_distributions.Categorical", + ) class CategoricalWithTemperature(Categorical): def __init__(self, inputs, model=None, temperature=t): @@ -119,6 +139,11 @@ def __init__( input_lens: Union[List[int], np.ndarray, Tuple[int, ...]], action_space=None, ): + if log_once("tf_action_dist_multicat_deprecation"): + deprecation_warning( + old="ray.rllib.models.tf.tf_action_dist.MultiCategorical", + new="ray.rllib.models.tf.tf_distributions.TfMultiCategorical", + ) # skip TFActionDistribution init ActionDistribution.__init__(self, inputs, model) self.cats = [ @@ -222,6 +247,10 @@ def __init__( action_space: Optional[gym.spaces.MultiDiscrete] = None, all_slates=None, ): + if log_once("tf_action_dist_slate_multi_categorical_deprecation"): + deprecation_warning( + old="ray.rllib.models.tf.tf_action_dist.SlateMultiCategorical" + ) assert temperature > 0.0, "Categorical `temperature` must be > 0.0!" # Allow softmax formula w/ temperature != 1.0: # Divide inputs by temperature. @@ -276,6 +305,8 @@ def __init__( For high temperatures, the expected value approaches a uniform distribution. """ + if log_once("tf_action_dist_gumbel_softmax_deprecation"): + deprecation_warning(old="ray.rllib.models.tf.tf_action_dist.GumbelSoftmax") assert temperature >= 0.0 self.dist = tfp.distributions.RelaxedOneHotCategorical( temperature=temperature, logits=inputs @@ -334,6 +365,11 @@ def __init__( *, action_space: Optional[gym.spaces.Space] = None ): + if log_once("tf_action_dist_diag_gaussian_deprecation"): + deprecation_warning( + old="ray.rllib.models.tf.tf_action_dist.DiagGaussian", + new="ray.rllib.models.tf.tf_distributions.TfDiagGaussian", + ) mean, log_std = tf.split(inputs, 2, axis=1) self.mean = mean self.log_std = log_std @@ -414,6 +450,10 @@ def __init__( high: The highest possible sampling value (excluding this value). """ + if log_once("tf_action_dist_squashed_gaussian_deprecation"): + deprecation_warning( + old="ray.rllib.models.tf.tf_action_dist.SquashedGaussian" + ) assert tfp is not None mean, log_std = tf.split(inputs, 2, axis=-1) # Clip `scale` values (coming from NN) to reasonable values. @@ -508,6 +548,8 @@ def __init__( low: float = 0.0, high: float = 1.0, ): + if log_once("tf_action_dist_beta_deprecation"): + deprecation_warning(old="ray.rllib.models.tf.tf_action_dist.Beta") # Stabilize input parameters (possibly coming from a linear layer). inputs = tf.clip_by_value(inputs, log(SMALL_NUMBER), -log(SMALL_NUMBER)) inputs = tf.math.log(tf.math.exp(inputs) + 1.0) + 1.0 @@ -585,6 +627,12 @@ class MultiActionDistribution(TFActionDistribution): def __init__( self, inputs, model, *, child_distributions, input_lens, action_space, **kwargs ): + if log_once("tf_action_dist_multi_action_deprecation"): + deprecation_warning( + old="ray.rllib.models.tf.tf_action_dist.MultiActionDistribution", + new="ray.rllib.models.tf.tf_distributions.TfMultiDistribution", + ) + ActionDistribution.__init__(self, inputs, model) self.action_space_struct = get_base_struct_from_space(action_space) @@ -693,6 +741,8 @@ def __init__(self, inputs: List[TensorType], model: ModelV2): See issue #4440 for more details. """ + if log_once("tf_action_dist_dirichlet_deprecation"): + deprecation_warning(old="ray.rllib.models.tf.tf_action_dist.Dirichlet") self.epsilon = 1e-7 concentration = tf.exp(inputs) + self.epsilon self.dist = tf1.distributions.Dirichlet( diff --git a/rllib/models/tf/tf_distributions.py b/rllib/models/tf/tf_distributions.py index 26db6d642f97c..d700119dd4dd7 100644 --- a/rllib/models/tf/tf_distributions.py +++ b/rllib/models/tf/tf_distributions.py @@ -403,7 +403,7 @@ def __init__( self, child_distribution_struct: Union[Tuple, List, Dict], ): - """Initializes a TorchMultiActionDistribution object. + """Initializes a TfMultiDistribution object. Args: child_distribution_struct: Any struct @@ -528,7 +528,7 @@ def from_logits( **kwargs: Forward compatibility kwargs. Returns: - A TorchMultiActionDistribution object. + A TfMultiDistribution object. """ logit_lens = tree.flatten(input_lens) child_distribution_cls_list = tree.flatten(child_distribution_cls_struct) diff --git a/rllib/models/tf/tf_modelv2.py b/rllib/models/tf/tf_modelv2.py index 580c8bef2373a..de54696c68b7d 100644 --- a/rllib/models/tf/tf_modelv2.py +++ b/rllib/models/tf/tf_modelv2.py @@ -42,6 +42,11 @@ def __init__(self, *args, **kwargs): self.base_model = tf.keras.Model( input_layer, [output_layer, value_layer]) """ + if log_once("deprecated_tfmodelv2"): + deprecation_warning( + old="ray.rllib.models.tf.tf_modelv2.TFModelV2", + new="ray.rllib.core.rl_module.RLModule", + ) super().__init__( obs_space, action_space, num_outputs, model_config, name, framework="tf" ) diff --git a/rllib/models/tf/visionnet.py b/rllib/models/tf/visionnet.py index 44a742832765d..cfac1d9e01f4b 100644 --- a/rllib/models/tf/visionnet.py +++ b/rllib/models/tf/visionnet.py @@ -7,6 +7,8 @@ from ray.rllib.models.utils import get_activation_fn, get_filter_config from ray.rllib.utils.framework import try_import_tf from ray.rllib.utils.typing import ModelConfigDict, TensorType +from ray.rllib.utils.deprecation import deprecation_warning +from ray.util import log_once tf1, tf, tfv = try_import_tf() @@ -30,6 +32,11 @@ def __init__( model_config: ModelConfigDict, name: str, ): + if log_once("deprecated_tfmodelv2_visionnet"): + deprecation_warning( + old="ray.rllib.models.tf.visionnet.VisionNetwork", + new="ray.rllib.core.rl_module.RLModule", + ) if not model_config.get("conv_filters"): model_config["conv_filters"] = get_filter_config(obs_space.shape) diff --git a/rllib/models/torch/attention_net.py b/rllib/models/torch/attention_net.py index f72bec839e36c..ca2ee8fc11412 100644 --- a/rllib/models/torch/attention_net.py +++ b/rllib/models/torch/attention_net.py @@ -30,6 +30,8 @@ from ray.rllib.utils.spaces.space_utils import get_base_struct_from_space from ray.rllib.utils.torch_utils import flatten_inputs_to_1d_tensor, one_hot from ray.rllib.utils.typing import ModelConfigDict, TensorType, List +from ray.rllib.utils.deprecation import deprecation_warning +from ray.util import log_once torch, nn = try_import_torch() @@ -102,6 +104,8 @@ def __init__( (two GRUs per Transformer unit, one after the MHA, one after the position-wise MLP). """ + if log_once("deprecate_gtrxlnet_torch"): + deprecation_warning(old="ray.rllib.models.torch.attention_net.GTrXLNet") super().__init__( observation_space, action_space, num_outputs, model_config, name @@ -268,6 +272,10 @@ def __init__( model_config: ModelConfigDict, name: str, ): + if log_once("deprecate_attention_wrapper_torch"): + deprecation_warning( + old="ray.rllib.models.torch.attention_net.AttentionWrapper" + ) nn.Module.__init__(self) super().__init__(obs_space, action_space, None, model_config, name) diff --git a/rllib/models/torch/complex_input_net.py b/rllib/models/torch/complex_input_net.py index f3cb4311521d7..9e381dd8f55e8 100644 --- a/rllib/models/torch/complex_input_net.py +++ b/rllib/models/torch/complex_input_net.py @@ -17,6 +17,8 @@ from ray.rllib.utils.framework import try_import_torch from ray.rllib.utils.spaces.space_utils import flatten_space from ray.rllib.utils.torch_utils import one_hot +from ray.rllib.utils.deprecation import deprecation_warning +from ray.util import log_once torch, nn = try_import_torch() @@ -36,6 +38,10 @@ class ComplexInputNetwork(TorchModelV2, nn.Module): """ def __init__(self, obs_space, action_space, num_outputs, model_config, name): + if log_once("complex_input_net_deprecation_torch"): + deprecation_warning( + old="ray.rllib.models.torch.complex_input_net.ComplexInputNetwork", + ) self.original_space = ( obs_space.original_space if hasattr(obs_space, "original_space") diff --git a/rllib/models/torch/fcnet.py b/rllib/models/torch/fcnet.py index 97bb9096bb645..7bd09cfe58496 100644 --- a/rllib/models/torch/fcnet.py +++ b/rllib/models/torch/fcnet.py @@ -7,6 +7,8 @@ from ray.rllib.utils.annotations import override from ray.rllib.utils.framework import try_import_torch from ray.rllib.utils.typing import Dict, TensorType, List, ModelConfigDict +from ray.rllib.utils.deprecation import deprecation_warning +from ray.util import log_once torch, nn = try_import_torch() @@ -24,6 +26,10 @@ def __init__( model_config: ModelConfigDict, name: str, ): + if log_once("fully_connected_net_deprecation_torch_modelv2"): + deprecation_warning( + old=("ray.rllib.models.torch.fcnet." "FullyConnectedNetwork") + ) TorchModelV2.__init__( self, obs_space, action_space, num_outputs, model_config, name ) diff --git a/rllib/models/torch/mingpt.py b/rllib/models/torch/mingpt.py index 00a192e9ec913..4bf54aa2fe8e5 100644 --- a/rllib/models/torch/mingpt.py +++ b/rllib/models/torch/mingpt.py @@ -20,6 +20,7 @@ from torch.nn import functional as F from ray.rllib.utils.annotations import DeveloperAPI +from ray.rllib.utils.deprecation import Deprecated @DeveloperAPI @@ -39,6 +40,7 @@ class GPTConfig: attn_pdrop: float = 0.1 +@Deprecated(error=False) class NewGELU(nn.Module): """ Implementation of the GELU activation function currently in Google BERT @@ -60,6 +62,7 @@ def forward(self, x): ) +@Deprecated(error=False) class CausalSelfAttention(nn.Module): """ Vanilla multi-head masked self-attention layer with a projection at the end. @@ -119,6 +122,7 @@ def forward(self, x, attention_masks=None): return y, att +@Deprecated(error=False) class Block(nn.Module): """an unassuming Transformer block""" @@ -149,7 +153,7 @@ def forward(self, x, attention_masks=None): return x, att -@DeveloperAPI +@Deprecated(error=False) def configure_gpt_optimizer( model: nn.Module, learning_rate: float, @@ -215,7 +219,7 @@ def configure_gpt_optimizer( return optimizer -@DeveloperAPI +@Deprecated(error=False) class GPT(nn.Module): """GPT Transformer Model""" diff --git a/rllib/models/torch/model.py b/rllib/models/torch/model.py index 9d363cee1fd73..20a07e8005606 100644 --- a/rllib/models/torch/model.py +++ b/rllib/models/torch/model.py @@ -8,8 +8,10 @@ ) from ray.rllib.models.temp_spec_classes import TensorDict, ModelConfig from ray.rllib.models.base_model import RecurrentModel, Model, ModelIO +from ray.rllib.utils.deprecation import Deprecated +@Deprecated(error=False) class TorchModelIO(ModelIO): """Save/Load mixin for torch models @@ -40,6 +42,7 @@ def load(self, path: str) -> RecurrentModel: self.load_state_dict(torch.load(path)) +@Deprecated(error=False) class TorchRecurrentModel(RecurrentModel, nn.Module, TorchModelIO): """The base class for recurrent pytorch models. @@ -151,6 +154,7 @@ def _initial_state(self) -> TensorDict: ) +@Deprecated(error=False) class TorchModel(Model, nn.Module, TorchModelIO): """The base class for non-recurrent pytorch models. diff --git a/rllib/models/torch/noop.py b/rllib/models/torch/noop.py index 8b0705b11874f..76f0f38196f93 100644 --- a/rllib/models/torch/noop.py +++ b/rllib/models/torch/noop.py @@ -1,8 +1,10 @@ from ray.rllib.models.modelv2 import ModelV2 from ray.rllib.models.torch.torch_modelv2 import TorchModelV2 from ray.rllib.utils.annotations import override +from ray.rllib.utils.deprecation import Deprecated +@Deprecated(error=False) class TorchNoopModel(TorchModelV2): """Trivial model that just returns the obs flattened. diff --git a/rllib/models/torch/primitives.py b/rllib/models/torch/primitives.py index eaa43a6db3d40..3203c52b76a94 100644 --- a/rllib/models/torch/primitives.py +++ b/rllib/models/torch/primitives.py @@ -1,5 +1,7 @@ from typing import List, Optional from ray.rllib.utils.framework import try_import_torch +from ray.rllib.utils.deprecation import deprecation_warning +from ray.util import log_once torch, nn = try_import_torch() @@ -24,6 +26,10 @@ def __init__( output_dim: Optional[int] = None, activation: str = "linear", ): + if log_once("fc_net_torch_deprecation"): + deprecation_warning( + old="ray.rllib.models.torch.fcnet.FCNet", + ) super().__init__() self.input_dim = input_dim self.hidden_layers = hidden_layers diff --git a/rllib/models/torch/recurrent_net.py b/rllib/models/torch/recurrent_net.py index ec3f7b3b797c3..9cef2140e18c8 100644 --- a/rllib/models/torch/recurrent_net.py +++ b/rllib/models/torch/recurrent_net.py @@ -15,6 +15,8 @@ from ray.rllib.utils.spaces.space_utils import get_base_struct_from_space from ray.rllib.utils.torch_utils import flatten_inputs_to_1d_tensor, one_hot from ray.rllib.utils.typing import ModelConfigDict, TensorType +from ray.rllib.utils.deprecation import deprecation_warning +from ray.util.debug import log_once torch, nn = try_import_torch() @@ -74,6 +76,14 @@ def forward( """Adds time dimension to batch before sending inputs to forward_rnn(). You should implement forward_rnn() in your subclass.""" + # Creating a __init__ function that acts as a passthrough and adding the warning + # there led to errors probably due to the multiple inheritance. We encountered + # the same error if we add the Deprecated decorator. We therefore add the + # deprecation warning here. + if log_once("recurrent_network_tf"): + deprecation_warning( + old="ray.rllib.models.torch.recurrent_net.RecurrentNetwork" + ) flat_inputs = input_dict["obs_flat"].float() # Note that max_seq_len != input_dict.max_seq_len != seq_lens.max() # as input_dict may have extra zero-padding beyond seq_lens.max(). @@ -113,6 +123,7 @@ def forward_rnn(self, inputs, state, seq_lens): raise NotImplementedError("You must implement this for an RNN model") +@DeveloperAPI class LSTMWrapper(RecurrentNetwork, nn.Module): """An LSTM wrapper serving as an interface for ModelV2s that set use_lstm.""" @@ -124,6 +135,8 @@ def __init__( model_config: ModelConfigDict, name: str, ): + if log_once("lstm_wrapper_torch"): + deprecation_warning(old="ray.rllib.models.tf.recurrent_net.LSTMWrapper") nn.Module.__init__(self) super(LSTMWrapper, self).__init__( diff --git a/rllib/models/torch/torch_action_dist.py b/rllib/models/torch/torch_action_dist.py index 743cd323e7e94..baa605ca2436c 100644 --- a/rllib/models/torch/torch_action_dist.py +++ b/rllib/models/torch/torch_action_dist.py @@ -12,6 +12,8 @@ from ray.rllib.utils.numpy import SMALL_NUMBER, MIN_LOG_NN_OUTPUT, MAX_LOG_NN_OUTPUT from ray.rllib.utils.spaces.space_utils import get_base_struct_from_space from ray.rllib.utils.typing import TensorType, List, Union, Tuple, ModelConfigDict +from ray.rllib.utils.deprecation import deprecation_warning, Deprecated +from ray.util.debug import log_once torch, nn = try_import_torch() @@ -24,6 +26,14 @@ class TorchDistributionWrapper(ActionDistribution): def __init__(self, inputs: List[TensorType], model: TorchModelV2): # If inputs are not a torch Tensor, make them one and make sure they # are on the correct device. + if log_once("torch_distribution_wrapper_deprecation"): + deprecation_warning( + old=( + "ray.rllib.models.torch.torch_action_dist.Torch" + "DistributionWrapper" + ), + new="ray.rllib.models.torch.torch_distributions.", + ) if not isinstance(inputs, torch.Tensor): inputs = torch.from_numpy(inputs) if isinstance(model, TorchModelV2): @@ -66,6 +76,14 @@ def __init__( model: TorchModelV2 = None, temperature: float = 1.0, ): + if log_once("torch_distribution_wrapper_deprecation"): + deprecation_warning( + old=( + "ray.rllib.models.torch.torch_action_dist.Torch" + "DistributionWrapper" + ), + new=("ray.rllib.models.torch.torch_distributions." "TorchCategorical"), + ) if temperature != 1.0: assert temperature > 0.0, "Categorical `temperature` must be > 0.0!" inputs /= temperature @@ -88,6 +106,12 @@ def required_model_output_shape( @DeveloperAPI def get_torch_categorical_class_with_temperature(t: float): """TorchCategorical distribution class that has customized default temperature.""" + if log_once("torch_distribution_categorial_temp_deprecation"): + deprecation_warning( + old="ray.rllib.models.torch.torch_action_dist." + "get_torch_categorical_class_with_temperature", + new=("ray.rllib.models.torch.torch_distributions." "TorchCategorical"), + ) class TorchCategoricalWithTemperature(TorchCategorical): def __init__(self, inputs, model=None, temperature=t): @@ -108,6 +132,12 @@ def __init__( input_lens: Union[List[int], np.ndarray, Tuple[int, ...]], action_space=None, ): + if log_once("torch_distribution_multi_categorical_deprecation"): + deprecation_warning( + old=( + "ray.rllib.models.torch.torch_action_dist." "TorchMultiCategorical" + ) + ) super().__init__(inputs, model) # If input_lens is np.ndarray or list, force-make it a tuple. inputs_split = self.inputs.split(tuple(input_lens), dim=1) @@ -207,6 +237,13 @@ def __init__( action_space: Optional[gym.spaces.MultiDiscrete] = None, all_slates=None, ): + if log_once("torch_distribution_multi_categorical_slate_deprecation"): + deprecation_warning( + old=( + "ray.rllib.models.torch.torch_action_dist." + "TorchSlateMultiCategorical" + ) + ) assert temperature > 0.0, "Categorical `temperature` must be > 0.0!" # Allow softmax formula w/ temperature != 1.0: # Divide inputs by temperature. @@ -244,6 +281,11 @@ def __init__( *, action_space: Optional[gym.spaces.Space] = None ): + if log_once("torch_action_dist_diag_gaussian_deprecation"): + deprecation_warning( + old="ray.rllib.models.torch.torch_action_dist.TorchDiagGaussian", + new="ray.rllib.models.torch.torch_distributions.TorchDiagGaussian", + ) super().__init__(inputs, model) mean, log_std = torch.chunk(self.inputs, 2, dim=1) self.log_std = log_std @@ -306,6 +348,10 @@ def __init__( high: The highest possible sampling value (excluding this value). """ + if log_once("torch_action_dist_squashed_gaussian_deprecation"): + deprecation_warning( + old="ray.rllib.models.torch.torch_action_dist.TorchSquashedGaussian" + ) super().__init__(inputs, model) # Split inputs into mean and log(std). mean, log_std = torch.chunk(self.inputs, 2, dim=-1) @@ -407,6 +453,10 @@ def __init__( low: float = 0.0, high: float = 1.0, ): + if log_once("torch_action_dist_beta_deprecation"): + deprecation_warning( + old="ray.rllib.models.torch.torch_action_dist.TorchBeta" + ) super().__init__(inputs, model) # Stabilize input parameters (possibly coming from a linear layer). self.inputs = torch.clamp(self.inputs, log(SMALL_NUMBER), -log(SMALL_NUMBER)) @@ -449,7 +499,11 @@ def required_model_output_shape( return np.prod(action_space.shape, dtype=np.int32) * 2 -@DeveloperAPI +@Deprecated( + old="ray.rllib.models.torch.torch_action_dist.TorchDeterministic", + new="ray.rllib.models.torch.torch_distributions.TorchDeterminstic", + error=False, +) class TorchDeterministic(TorchDistributionWrapper): """Action distribution that returns the input values directly. @@ -498,6 +552,14 @@ def __init__(self, inputs, model, *, child_distributions, input_lens, action_spa action_space (Union[gym.spaces.Dict,gym.spaces.Tuple]): The complex and possibly nested action space. """ + if log_once("torch_multi_action_dist_deprecation"): + deprecation_warning( + old=( + "ray.rllib.models.torch.torch_action_dist." + "TorchMultiActionDistribution" + ), + new="ray.rllib.models.torch.torch_distributions.TorchMultiDistribution", + ) if not isinstance(inputs, torch.Tensor): inputs = torch.from_numpy(inputs) if isinstance(model, TorchModelV2): @@ -612,6 +674,10 @@ def __init__(self, inputs, model): See issue #4440 for more details. """ + if log_once("torch_dirichlet_action_dist_deprecation"): + deprecation_warning( + old="ray.rllib.models.torch.torch_action_dist.TorchDirichlet", + ) self.epsilon = torch.tensor(1e-7).to(inputs.device) concentration = torch.exp(inputs) + self.epsilon self.dist = torch.distributions.dirichlet.Dirichlet( diff --git a/rllib/models/torch/torch_modelv2.py b/rllib/models/torch/torch_modelv2.py index b56bf425fb6fb..27a28227d0919 100644 --- a/rllib/models/torch/torch_modelv2.py +++ b/rllib/models/torch/torch_modelv2.py @@ -5,6 +5,8 @@ from ray.rllib.utils.annotations import override, PublicAPI from ray.rllib.utils.framework import try_import_torch from ray.rllib.utils.typing import ModelConfigDict, TensorType +from ray.rllib.utils.deprecation import deprecation_warning +from ray.util import log_once _, nn = try_import_torch() @@ -36,7 +38,11 @@ def __init__(self, *args, **kwargs): self._logits = ... self._value_branch = ... """ - + if log_once("torch_modelv2_deprecation"): + deprecation_warning( + old="ray.rllib.models.torch.torch_modelv2.TorchModelV2", + new="ray.rllib.core.rl_module.rl_module.RLModule", + ) if not isinstance(self, nn.Module): raise ValueError( "Subclasses of TorchModelV2 must also inherit from " diff --git a/rllib/models/torch/visionnet.py b/rllib/models/torch/visionnet.py index 32153b1e2e807..ce755e784b9f4 100644 --- a/rllib/models/torch/visionnet.py +++ b/rllib/models/torch/visionnet.py @@ -13,6 +13,8 @@ from ray.rllib.utils.annotations import override from ray.rllib.utils.framework import try_import_torch from ray.rllib.utils.typing import ModelConfigDict, TensorType +from ray.rllib.utils.deprecation import deprecation_warning +from ray.util import log_once torch, nn = try_import_torch() @@ -28,6 +30,8 @@ def __init__( model_config: ModelConfigDict, name: str, ): + if log_once("torch_visionnet_deprecation"): + deprecation_warning(old="ray.rllib.models.torch.visionnet.VisionNetwork") if not model_config.get("conv_filters"): model_config["conv_filters"] = get_filter_config(obs_space.shape) diff --git a/rllib/models/utils.py b/rllib/models/utils.py index 69a137568fef8..2b040677498d7 100644 --- a/rllib/models/utils.py +++ b/rllib/models/utils.py @@ -2,11 +2,12 @@ from ray.rllib.core.models.specs.specs_base import TensorSpec from ray.rllib.core.models.specs.specs_dict import SpecDict -from ray.rllib.utils.annotations import DeveloperAPI, ExperimentalAPI +from ray.rllib.utils.annotations import DeveloperAPI +from ray.rllib.utils.deprecation import Deprecated from ray.rllib.utils.framework import try_import_jax, try_import_tf, try_import_torch -@ExperimentalAPI +@Deprecated(error=False) def input_to_output_specs( input_specs: SpecDict, num_input_feature_dims: int, @@ -153,7 +154,7 @@ def get_activation_fn( ) -@DeveloperAPI +@Deprecated(error=False) def get_filter_config(shape): """Returns a default Conv2D filter config (list) for a given image shape. diff --git a/rllib/policy/dynamic_tf_policy.py b/rllib/policy/dynamic_tf_policy.py index fb7a1d6347fcd..7698d66516baa 100644 --- a/rllib/policy/dynamic_tf_policy.py +++ b/rllib/policy/dynamic_tf_policy.py @@ -16,7 +16,11 @@ from ray.rllib.utils import force_list from ray.rllib.utils.annotations import override, DeveloperAPI from ray.rllib.utils.debug import summarize -from ray.rllib.utils.deprecation import deprecation_warning, DEPRECATED_VALUE +from ray.rllib.utils.deprecation import ( + deprecation_warning, + DEPRECATED_VALUE, + Deprecated, +) from ray.rllib.utils.framework import try_import_tf from ray.rllib.utils.metrics import ( DIFF_NUM_GRAD_UPDATES_VS_SAMPLER_POLICY, @@ -39,7 +43,7 @@ TOWER_SCOPE_NAME = "tower" -@DeveloperAPI +@Deprecated(error=False) class DynamicTFPolicy(TFPolicy): """A TFPolicy that auto-defines placeholders dynamically at runtime. @@ -919,7 +923,7 @@ def _do_loss_init(self, train_batch: SampleBatch): return losses -@DeveloperAPI +@Deprecated(error=False) class TFMultiGPUTowerStack: """Optimizer that runs in parallel across multiple local devices. diff --git a/rllib/policy/eager_tf_policy.py b/rllib/policy/eager_tf_policy.py index cb258d9a4b945..abc49a06eea9b 100644 --- a/rllib/policy/eager_tf_policy.py +++ b/rllib/policy/eager_tf_policy.py @@ -17,8 +17,12 @@ from ray.rllib.policy.rnn_sequencing import pad_batch_to_sequences_of_same_size from ray.rllib.policy.sample_batch import SampleBatch from ray.rllib.utils import add_mixins, force_list -from ray.rllib.utils.annotations import DeveloperAPI, override -from ray.rllib.utils.deprecation import DEPRECATED_VALUE, deprecation_warning +from ray.rllib.utils.annotations import override +from ray.rllib.utils.deprecation import ( + DEPRECATED_VALUE, + deprecation_warning, + Deprecated, +) from ray.rllib.utils.error import ERR_MSG_TF_POLICY_CANNOT_SAVE_KERAS_MODEL from ray.rllib.utils.framework import try_import_tf from ray.rllib.utils.metrics import ( @@ -141,7 +145,7 @@ def _func(self_, *args, **kwargs): return _func -@DeveloperAPI +@Deprecated(error=False) class EagerTFPolicy(Policy): """Dummy class to recognize any eagerized TFPolicy by its inheritance.""" @@ -298,6 +302,7 @@ def compute_gradients(self, loss, var_list): return list(zip(self.tape.gradient(loss, var_list), var_list)) +@Deprecated(error=False) def _build_eager_tf_policy( name, loss_fn, diff --git a/rllib/policy/policy_template.py b/rllib/policy/policy_template.py index d9f9aff306e5f..933e369728a3e 100644 --- a/rllib/policy/policy_template.py +++ b/rllib/policy/policy_template.py @@ -21,7 +21,8 @@ from ray.rllib.policy.sample_batch import SampleBatch from ray.rllib.policy.torch_policy import TorchPolicy from ray.rllib.utils import add_mixins, NullContextManager -from ray.rllib.utils.annotations import override, DeveloperAPI +from ray.rllib.utils.annotations import override +from ray.rllib.utils.deprecation import Deprecated from ray.rllib.utils.framework import try_import_torch, try_import_jax from ray.rllib.utils.metrics.learner_info import LEARNER_STATS_KEY from ray.rllib.utils.numpy import convert_to_numpy @@ -35,7 +36,7 @@ # TODO: Deprecate in favor of directly sub-classing from TorchPolicy. -@DeveloperAPI +@Deprecated(error=False) def build_policy_class( name: str, framework: str, diff --git a/rllib/policy/tf_mixins.py b/rllib/policy/tf_mixins.py index 49ba23a0f0395..5530387286ef5 100644 --- a/rllib/policy/tf_mixins.py +++ b/rllib/policy/tf_mixins.py @@ -11,6 +11,7 @@ from ray.rllib.policy.sample_batch import SampleBatch from ray.rllib.policy.tf_policy import TFPolicy from ray.rllib.utils.annotations import DeveloperAPI, override +from ray.rllib.utils.deprecation import Deprecated from ray.rllib.utils.framework import get_variable, try_import_tf from ray.rllib.utils.schedules import PiecewiseSchedule from ray.rllib.utils.tf_utils import make_tf_callable @@ -26,7 +27,7 @@ tf1, tf, tfv = try_import_tf() -@DeveloperAPI +@Deprecated(error=False) class LearningRateSchedule: """Mixin for TFPolicy that adds a learning rate schedule.""" @@ -73,7 +74,7 @@ def optimizer(self): return tf.keras.optimizers.Adam(self.cur_lr) -@DeveloperAPI +@Deprecated(error=False) class EntropyCoeffSchedule: """Mixin for TFPolicy that adds entropy coeff decay.""" @@ -132,6 +133,7 @@ def on_global_var_update(self, global_vars): self.entropy_coeff.assign(new_val, read_value=False) +@Deprecated(error=False) class KLCoeffMixin: """Assigns the `update_kl()` and other KL-related methods to a TFPolicy. @@ -206,6 +208,7 @@ def set_state(self, state: PolicyState) -> None: super().set_state(state) +@Deprecated(error=False) class TargetNetworkMixin: """Assign the `update_target` method to the policy. @@ -281,6 +284,7 @@ def set_weights(self, weights): self.update_target(self.config.get("tau", 1.0)) +@Deprecated(error=False) class ValueNetworkMixin: """Assigns the `_value()` method to a TFPolicy. @@ -362,6 +366,7 @@ def extra_action_out_fn(self) -> Dict[str, TensorType]: return self._cached_extra_action_fetches +@Deprecated(error=False) class GradStatsMixin: def __init__(self): pass @@ -383,7 +388,7 @@ def grad_stats_fn( # TODO: find a better place for this util, since it's not technically MixIns. -@DeveloperAPI +@Deprecated(error=False) def compute_gradients( policy, optimizer: LocalOptimizer, loss: TensorType ) -> ModelGradients: diff --git a/rllib/policy/tf_policy.py b/rllib/policy/tf_policy.py index 8dd541ebf2c4e..02c7bb7bc12b8 100644 --- a/rllib/policy/tf_policy.py +++ b/rllib/policy/tf_policy.py @@ -42,7 +42,7 @@ logger = logging.getLogger(__name__) -@DeveloperAPI +@Deprecated(error=False) class TFPolicy(Policy): """An agent policy and loss implemented in TensorFlow. diff --git a/rllib/policy/tf_policy_template.py b/rllib/policy/tf_policy_template.py index df9d1bde5a52e..2d79007f9a6e6 100644 --- a/rllib/policy/tf_policy_template.py +++ b/rllib/policy/tf_policy_template.py @@ -9,8 +9,12 @@ from ray.rllib.policy.sample_batch import SampleBatch from ray.rllib.policy.tf_policy import TFPolicy from ray.rllib.utils import add_mixins, force_list -from ray.rllib.utils.annotations import override, DeveloperAPI -from ray.rllib.utils.deprecation import deprecation_warning, DEPRECATED_VALUE +from ray.rllib.utils.annotations import override +from ray.rllib.utils.deprecation import ( + deprecation_warning, + DEPRECATED_VALUE, + Deprecated, +) from ray.rllib.utils.framework import try_import_tf from ray.rllib.utils.metrics.learner_info import LEARNER_STATS_KEY from ray.rllib.utils.typing import ( @@ -26,7 +30,7 @@ tf1, tf, tfv = try_import_tf() -@DeveloperAPI +@Deprecated(error=False) def build_tf_policy( name: str, *, diff --git a/rllib/policy/torch_mixins.py b/rllib/policy/torch_mixins.py index 359eb31089a59..f09b5ce353f3a 100644 --- a/rllib/policy/torch_mixins.py +++ b/rllib/policy/torch_mixins.py @@ -2,13 +2,14 @@ from ray.rllib.policy.sample_batch import SampleBatch from ray.rllib.policy.torch_policy import TorchPolicy from ray.rllib.utils.annotations import DeveloperAPI, override +from ray.rllib.utils.deprecation import Deprecated from ray.rllib.utils.framework import try_import_torch from ray.rllib.utils.schedules import PiecewiseSchedule torch, nn = try_import_torch() -@DeveloperAPI +@Deprecated(error=False) class LearningRateSchedule: """Mixin for TorchPolicy that adds a learning rate schedule.""" @@ -35,7 +36,7 @@ def on_global_var_update(self, global_vars): p["lr"] = self.cur_lr -@DeveloperAPI +@Deprecated(error=False) class EntropyCoeffSchedule: """Mixin for TorchPolicy that adds entropy coeff decay.""" @@ -74,6 +75,7 @@ def on_global_var_update(self, global_vars): ) +@Deprecated(error=False) class KLCoeffMixin: """Assigns the `update_kl()` method to a TorchPolicy. @@ -112,6 +114,7 @@ def set_state(self, state: PolicyState) -> None: super().set_state(state) +@Deprecated(error=False) class ValueNetworkMixin: """Assigns the `_value()` method to a TorchPolicy. @@ -171,6 +174,7 @@ def extra_action_out(self, input_dict, state_batches, model, action_dist): } +@Deprecated(error=False) class TargetNetworkMixin: """Mixin class adding a method for (soft) target net(s) synchronizations. diff --git a/rllib/policy/torch_policy.py b/rllib/policy/torch_policy.py index 9c2d573635e31..1f70283611de1 100644 --- a/rllib/policy/torch_policy.py +++ b/rllib/policy/torch_policy.py @@ -32,6 +32,7 @@ from ray.rllib.policy.sample_batch import SampleBatch from ray.rllib.utils import NullContextManager, force_list from ray.rllib.utils.annotations import DeveloperAPI, override +from ray.rllib.utils.deprecation import Deprecated from ray.rllib.utils.error import ERR_MSG_TORCH_POLICY_CANNOT_SAVE_MODEL from ray.rllib.utils.framework import try_import_torch from ray.rllib.utils.metrics import ( @@ -61,7 +62,7 @@ logger = logging.getLogger(__name__) -@DeveloperAPI +@Deprecated(error=False) class TorchPolicy(Policy): """PyTorch specific Policy class to use with RLlib.""" @@ -1208,7 +1209,7 @@ def _worker(shard_idx, model, sample_batch, device): return outputs -@DeveloperAPI +@Deprecated(error=False) class DirectStepOptimizer: """Typesafe method for indicating `apply_gradients` can directly step the optimizers with in-place gradients. diff --git a/rllib/utils/exploration/curiosity.py b/rllib/utils/exploration/curiosity.py index 2e53fa815df31..c0a604d12bd55 100644 --- a/rllib/utils/exploration/curiosity.py +++ b/rllib/utils/exploration/curiosity.py @@ -2,7 +2,6 @@ import numpy as np from typing import Optional, Tuple, Union -from ray.rllib.utils.annotations import PublicAPI from ray.rllib.models.action_dist import ActionDistribution from ray.rllib.models.catalog import ModelCatalog from ray.rllib.models.modelv2 import ModelV2 @@ -16,6 +15,7 @@ from ray.rllib.policy.sample_batch import SampleBatch from ray.rllib.utils import NullContextManager from ray.rllib.utils.annotations import override +from ray.rllib.utils.deprecation import Deprecated from ray.rllib.utils.exploration.exploration import Exploration from ray.rllib.utils.framework import try_import_tf, try_import_torch from ray.rllib.utils.from_config import from_config @@ -30,7 +30,7 @@ F = nn.functional -@PublicAPI +@Deprecated(error=False) class Curiosity(Exploration): """Implementation of: [1] Curiosity-driven Exploration by Self-supervised Prediction diff --git a/rllib/utils/exploration/epsilon_greedy.py b/rllib/utils/exploration/epsilon_greedy.py index 42f19fab47f29..8a47b2eed3686 100644 --- a/rllib/utils/exploration/epsilon_greedy.py +++ b/rllib/utils/exploration/epsilon_greedy.py @@ -4,10 +4,10 @@ import random from typing import Union, Optional -from ray.rllib.utils.annotations import PublicAPI from ray.rllib.models.torch.torch_action_dist import TorchMultiActionDistribution from ray.rllib.models.action_dist import ActionDistribution from ray.rllib.utils.annotations import override +from ray.rllib.utils.deprecation import Deprecated from ray.rllib.utils.exploration.exploration import Exploration, TensorType from ray.rllib.utils.framework import try_import_tf, try_import_torch, get_variable from ray.rllib.utils.from_config import from_config @@ -19,7 +19,7 @@ torch, _ = try_import_torch() -@PublicAPI +@Deprecated(error=False) class EpsilonGreedy(Exploration): """Epsilon-greedy Exploration class that produces exploration actions. diff --git a/rllib/utils/exploration/exploration.py b/rllib/utils/exploration/exploration.py index 6203e1006b86a..c872ce6c103f2 100644 --- a/rllib/utils/exploration/exploration.py +++ b/rllib/utils/exploration/exploration.py @@ -19,7 +19,7 @@ _, nn = try_import_torch() -@DeveloperAPI +@Deprecated(error=False) class Exploration: """Implements an exploration strategy for Policies. diff --git a/rllib/utils/exploration/gaussian_noise.py b/rllib/utils/exploration/gaussian_noise.py index fb31454e72ef1..419fb6f60dc21 100644 --- a/rllib/utils/exploration/gaussian_noise.py +++ b/rllib/utils/exploration/gaussian_noise.py @@ -2,10 +2,10 @@ import numpy as np from typing import Union, Optional -from ray.rllib.utils.annotations import PublicAPI from ray.rllib.models.action_dist import ActionDistribution from ray.rllib.models.modelv2 import ModelV2 from ray.rllib.utils.annotations import override +from ray.rllib.utils.deprecation import Deprecated from ray.rllib.utils.exploration.exploration import Exploration from ray.rllib.utils.exploration.random import Random from ray.rllib.utils.framework import ( @@ -23,7 +23,7 @@ torch, _ = try_import_torch() -@PublicAPI +@Deprecated(error=False) class GaussianNoise(Exploration): """An exploration that adds white noise to continuous actions. diff --git a/rllib/utils/exploration/ornstein_uhlenbeck_noise.py b/rllib/utils/exploration/ornstein_uhlenbeck_noise.py index 2b9daf6dcc048..861b053d8afd0 100644 --- a/rllib/utils/exploration/ornstein_uhlenbeck_noise.py +++ b/rllib/utils/exploration/ornstein_uhlenbeck_noise.py @@ -1,7 +1,7 @@ import numpy as np from typing import Optional, Union -from ray.rllib.utils.annotations import PublicAPI +from ray.rllib.utils.deprecation import Deprecated from ray.rllib.models.action_dist import ActionDistribution from ray.rllib.utils.annotations import override from ray.rllib.utils.exploration.gaussian_noise import GaussianNoise @@ -19,7 +19,7 @@ torch, _ = try_import_torch() -@PublicAPI +@Deprecated(error=False) class OrnsteinUhlenbeckNoise(GaussianNoise): """An exploration that adds Ornstein-Uhlenbeck noise to continuous actions. diff --git a/rllib/utils/exploration/parameter_noise.py b/rllib/utils/exploration/parameter_noise.py index 185c07ac7e6b2..9c2161b3ed308 100644 --- a/rllib/utils/exploration/parameter_noise.py +++ b/rllib/utils/exploration/parameter_noise.py @@ -2,7 +2,6 @@ import numpy as np from typing import Optional, TYPE_CHECKING, Union -from ray.rllib.utils.annotations import PublicAPI from ray.rllib.env.base_env import BaseEnv from ray.rllib.models.action_dist import ActionDistribution from ray.rllib.models.modelv2 import ModelV2 @@ -13,6 +12,7 @@ ) from ray.rllib.policy.sample_batch import SampleBatch from ray.rllib.utils.annotations import override +from ray.rllib.utils.deprecation import Deprecated from ray.rllib.utils.exploration.exploration import Exploration from ray.rllib.utils.framework import get_variable, try_import_tf, try_import_torch from ray.rllib.utils.from_config import from_config @@ -26,7 +26,7 @@ torch, _ = try_import_torch() -@PublicAPI +@Deprecated(error=False) class ParameterNoise(Exploration): """An exploration that changes a Model's parameters. diff --git a/rllib/utils/exploration/per_worker_epsilon_greedy.py b/rllib/utils/exploration/per_worker_epsilon_greedy.py index ff3ecf24ceea0..24ef1409195a5 100644 --- a/rllib/utils/exploration/per_worker_epsilon_greedy.py +++ b/rllib/utils/exploration/per_worker_epsilon_greedy.py @@ -1,12 +1,12 @@ from gymnasium.spaces import Space from typing import Optional -from ray.rllib.utils.annotations import PublicAPI +from ray.rllib.utils.deprecation import Deprecated from ray.rllib.utils.exploration.epsilon_greedy import EpsilonGreedy from ray.rllib.utils.schedules import ConstantSchedule -@PublicAPI +@Deprecated(error=False) class PerWorkerEpsilonGreedy(EpsilonGreedy): """A per-worker epsilon-greedy class for distributed algorithms. diff --git a/rllib/utils/exploration/per_worker_gaussian_noise.py b/rllib/utils/exploration/per_worker_gaussian_noise.py index e3e5446f8f0da..ba7b7eb2f5d8c 100644 --- a/rllib/utils/exploration/per_worker_gaussian_noise.py +++ b/rllib/utils/exploration/per_worker_gaussian_noise.py @@ -1,12 +1,12 @@ from gymnasium.spaces import Space from typing import Optional -from ray.rllib.utils.annotations import PublicAPI +from ray.rllib.utils.deprecation import Deprecated from ray.rllib.utils.exploration.gaussian_noise import GaussianNoise from ray.rllib.utils.schedules import ConstantSchedule -@PublicAPI +@Deprecated(error=False) class PerWorkerGaussianNoise(GaussianNoise): """A per-worker Gaussian noise class for distributed algorithms. diff --git a/rllib/utils/exploration/per_worker_ornstein_uhlenbeck_noise.py b/rllib/utils/exploration/per_worker_ornstein_uhlenbeck_noise.py index 52aa7cff119f7..4ffaf1a8c190a 100644 --- a/rllib/utils/exploration/per_worker_ornstein_uhlenbeck_noise.py +++ b/rllib/utils/exploration/per_worker_ornstein_uhlenbeck_noise.py @@ -1,12 +1,12 @@ from gymnasium.spaces import Space from typing import Optional -from ray.rllib.utils.annotations import PublicAPI +from ray.rllib.utils.deprecation import Deprecated from ray.rllib.utils.exploration.ornstein_uhlenbeck_noise import OrnsteinUhlenbeckNoise from ray.rllib.utils.schedules import ConstantSchedule -@PublicAPI +@Deprecated(error=False) class PerWorkerOrnsteinUhlenbeckNoise(OrnsteinUhlenbeckNoise): """A per-worker Ornstein Uhlenbeck noise class for distributed algorithms. diff --git a/rllib/utils/exploration/random.py b/rllib/utils/exploration/random.py index cf86f6066adb1..fca85eed0a98f 100644 --- a/rllib/utils/exploration/random.py +++ b/rllib/utils/exploration/random.py @@ -3,10 +3,10 @@ import tree # pip install dm_tree from typing import Union, Optional -from ray.rllib.utils.annotations import PublicAPI from ray.rllib.models.action_dist import ActionDistribution from ray.rllib.models.modelv2 import ModelV2 from ray.rllib.utils.annotations import override +from ray.rllib.utils.deprecation import Deprecated from ray.rllib.utils.exploration.exploration import Exploration from ray.rllib.utils import force_tuple from ray.rllib.utils.framework import try_import_tf, try_import_torch, TensorType @@ -18,7 +18,7 @@ torch, _ = try_import_torch() -@PublicAPI +@Deprecated(error=False) class Random(Exploration): """A random action selector (deterministic/greedy for explore=False). diff --git a/rllib/utils/exploration/random_encoder.py b/rllib/utils/exploration/random_encoder.py index 079d202b41fbe..c7371d9491451 100644 --- a/rllib/utils/exploration/random_encoder.py +++ b/rllib/utils/exploration/random_encoder.py @@ -2,12 +2,12 @@ import numpy as np from typing import List, Optional, Union -from ray.rllib.utils.annotations import PublicAPI from ray.rllib.models.action_dist import ActionDistribution from ray.rllib.models.catalog import ModelCatalog from ray.rllib.models.modelv2 import ModelV2 from ray.rllib.policy.sample_batch import SampleBatch from ray.rllib.utils.annotations import override +from ray.rllib.utils.deprecation import Deprecated from ray.rllib.utils.exploration.exploration import Exploration from ray.rllib.utils.framework import try_import_tf from ray.rllib.utils.from_config import from_config @@ -79,7 +79,7 @@ def std(self) -> float: return np.sqrt(self.var) -@PublicAPI +@Deprecated(error=False) def update_beta(beta_schedule: str, beta: float, rho: float, step: int) -> float: """Update beta based on schedule and training step. @@ -97,7 +97,7 @@ def update_beta(beta_schedule: str, beta: float, rho: float, step: int) -> float return beta -@PublicAPI +@Deprecated(error=False) def compute_states_entropy( obs_embeds: np.ndarray, embed_dim: int, k_nn: int ) -> np.ndarray: @@ -117,7 +117,7 @@ def compute_states_entropy( return dist.argsort(axis=-1)[:, :k_nn][:, -1].astype(np.float32) -@PublicAPI +@Deprecated(error=False) class RE3(Exploration): """Random Encoder for Efficient Exploration. diff --git a/rllib/utils/exploration/slate_epsilon_greedy.py b/rllib/utils/exploration/slate_epsilon_greedy.py index abc7b7c12bba0..d4cf3cf59d966 100644 --- a/rllib/utils/exploration/slate_epsilon_greedy.py +++ b/rllib/utils/exploration/slate_epsilon_greedy.py @@ -1,8 +1,8 @@ from typing import Union -from ray.rllib.utils.annotations import PublicAPI from ray.rllib.models.action_dist import ActionDistribution from ray.rllib.utils.annotations import override +from ray.rllib.utils.deprecation import Deprecated from ray.rllib.utils.exploration.epsilon_greedy import EpsilonGreedy from ray.rllib.utils.exploration.exploration import TensorType from ray.rllib.utils.framework import try_import_tf, try_import_torch @@ -11,7 +11,7 @@ torch, _ = try_import_torch() -@PublicAPI +@Deprecated(error=False) class SlateEpsilonGreedy(EpsilonGreedy): @override(EpsilonGreedy) def _get_tf_exploration_action_op( diff --git a/rllib/utils/exploration/slate_soft_q.py b/rllib/utils/exploration/slate_soft_q.py index 4d43ebee70dc2..f8b859acfaafe 100644 --- a/rllib/utils/exploration/slate_soft_q.py +++ b/rllib/utils/exploration/slate_soft_q.py @@ -1,8 +1,8 @@ from typing import Union -from ray.rllib.utils.annotations import PublicAPI from ray.rllib.models.action_dist import ActionDistribution from ray.rllib.utils.annotations import override +from ray.rllib.utils.deprecation import Deprecated from ray.rllib.utils.exploration.exploration import TensorType from ray.rllib.utils.exploration.soft_q import SoftQ from ray.rllib.utils.framework import try_import_tf, try_import_torch @@ -11,7 +11,7 @@ torch, _ = try_import_torch() -@PublicAPI +@Deprecated(error=False) class SlateSoftQ(SoftQ): @override(SoftQ) def get_exploration_action( diff --git a/rllib/utils/exploration/soft_q.py b/rllib/utils/exploration/soft_q.py index 347e227301d40..c6acad4dca7ed 100644 --- a/rllib/utils/exploration/soft_q.py +++ b/rllib/utils/exploration/soft_q.py @@ -1,7 +1,7 @@ from gymnasium.spaces import Discrete, MultiDiscrete, Space from typing import Union, Optional -from ray.rllib.utils.annotations import PublicAPI +from ray.rllib.utils.deprecation import Deprecated from ray.rllib.models.action_dist import ActionDistribution from ray.rllib.models.tf.tf_action_dist import Categorical from ray.rllib.models.torch.torch_action_dist import TorchCategorical @@ -10,7 +10,7 @@ from ray.rllib.utils.framework import TensorType -@PublicAPI +@Deprecated(error=False) class SoftQ(StochasticSampling): """Special case of StochasticSampling w/ Categorical and temperature param. diff --git a/rllib/utils/exploration/stochastic_sampling.py b/rllib/utils/exploration/stochastic_sampling.py index 3c24f752c576f..748e46f69a187 100644 --- a/rllib/utils/exploration/stochastic_sampling.py +++ b/rllib/utils/exploration/stochastic_sampling.py @@ -3,7 +3,7 @@ import numpy as np from typing import Optional, Union -from ray.rllib.utils.annotations import PublicAPI +from ray.rllib.utils.deprecation import Deprecated from ray.rllib.models.action_dist import ActionDistribution from ray.rllib.models.modelv2 import ModelV2 from ray.rllib.utils.annotations import override @@ -21,7 +21,7 @@ torch, _ = try_import_torch() -@PublicAPI +@Deprecated(error=False) class StochasticSampling(Exploration): """An exploration that simply samples from a distribution. diff --git a/rllib/utils/exploration/thompson_sampling.py b/rllib/utils/exploration/thompson_sampling.py index 8dbf5d3732990..cf4f57b95c9fe 100644 --- a/rllib/utils/exploration/thompson_sampling.py +++ b/rllib/utils/exploration/thompson_sampling.py @@ -1,6 +1,6 @@ from typing import Union -from ray.rllib.utils.annotations import PublicAPI +from ray.rllib.utils.deprecation import Deprecated from ray.rllib.models.action_dist import ActionDistribution from ray.rllib.utils.annotations import override from ray.rllib.utils.exploration.exploration import Exploration @@ -12,7 +12,7 @@ tf1, tf, tfv = try_import_tf() -@PublicAPI +@Deprecated(error=False) class ThompsonSampling(Exploration): @override(Exploration) def get_exploration_action( diff --git a/rllib/utils/exploration/upper_confidence_bound.py b/rllib/utils/exploration/upper_confidence_bound.py index 68cbdd2e84de4..b154a746ce6a5 100644 --- a/rllib/utils/exploration/upper_confidence_bound.py +++ b/rllib/utils/exploration/upper_confidence_bound.py @@ -1,6 +1,6 @@ from typing import Union -from ray.rllib.utils.annotations import PublicAPI +from ray.rllib.utils.deprecation import Deprecated from ray.rllib.models.action_dist import ActionDistribution from ray.rllib.utils.annotations import override from ray.rllib.utils.exploration.exploration import Exploration @@ -12,7 +12,7 @@ tf1, tf, tfv = try_import_tf() -@PublicAPI +@Deprecated(error=False) class UpperConfidenceBound(Exploration): @override(Exploration) def get_exploration_action(