From b9befd2b87dfd6a5984ff796933cce6b5ddb05b0 Mon Sep 17 00:00:00 2001 From: sven1977 Date: Thu, 27 Oct 2022 13:19:48 +0200 Subject: [PATCH 1/3] wip Signed-off-by: sven1977 --- doc/source/rllib/rllib-training.rst | 7 ++--- rllib/algorithms/algorithm.py | 10 +++---- rllib/algorithms/algorithm_config.py | 8 ++++- rllib/algorithms/ars/ars_tf_policy.py | 11 +++---- rllib/algorithms/cql/cql.py | 2 +- rllib/algorithms/cql/cql_tf_policy.py | 6 ++-- rllib/algorithms/ddpg/ddpg_tf_policy.py | 6 ++-- rllib/algorithms/dqn/dqn_tf_policy.py | 2 +- rllib/algorithms/dqn/learner_thread.py | 2 +- rllib/algorithms/es/es_tf_policy.py | 6 ++-- rllib/algorithms/impala/impala.py | 2 +- rllib/algorithms/impala/impala_tf_policy.py | 2 +- rllib/algorithms/marwil/marwil_tf_policy.py | 2 +- rllib/algorithms/ppo/tests/test_ppo.py | 2 +- rllib/algorithms/sac/sac.py | 2 +- rllib/algorithms/sac/sac_tf_policy.py | 6 ++-- rllib/algorithms/sac/tests/test_sac.py | 12 ++++---- rllib/algorithms/slateq/slateq_tf_policy.py | 2 +- rllib/evaluation/rollout_worker.py | 4 +-- rllib/evaluation/sampler.py | 4 +-- rllib/examples/action_masking.py | 4 +-- rllib/examples/attention_net.py | 2 +- rllib/examples/autoregressive_action_dist.py | 2 +- rllib/examples/batch_norm_model.py | 2 +- rllib/examples/cartpole_lstm.py | 4 +-- rllib/examples/centralized_critic.py | 2 +- rllib/examples/centralized_critic_2.py | 2 +- .../examples/checkpoint_by_custom_criteria.py | 6 ++-- rllib/examples/complex_struct_space.py | 4 +-- rllib/examples/curriculum_learning.py | 2 +- rllib/examples/custom_env.py | 2 +- rllib/examples/custom_eval.py | 2 +- rllib/examples/custom_fast_model.py | 2 +- rllib/examples/custom_input_api.py | 2 +- rllib/examples/custom_logger.py | 6 ++-- .../examples/custom_metrics_and_callbacks.py | 2 +- rllib/examples/custom_model_api.py | 2 +- .../examples/custom_model_loss_and_metrics.py | 2 +- rllib/examples/custom_rnn_model.py | 2 +- rllib/examples/custom_train_fn.py | 2 +- rllib/examples/custom_vector_env.py | 2 +- rllib/examples/deterministic_training.py | 2 +- rllib/examples/eager_execution.py | 4 +-- rllib/examples/env_rendering_and_recording.py | 2 +- rllib/examples/fractional_gpus.py | 2 +- rllib/examples/hierarchical_training.py | 2 +- .../policy_inference_after_training.py | 4 +-- ...inference_after_training_with_attention.py | 4 +-- ...licy_inference_after_training_with_lstm.py | 4 +-- .../inference_and_serving/serve_and_rllib.py | 2 +- .../iterated_prisoners_dilemma_env.py | 2 +- rllib/examples/mobilenet_v2_with_lstm.py | 2 +- rllib/examples/multi_agent_cartpole.py | 4 +-- rllib/examples/multi_agent_custom_policy.py | 2 +- ...multi_agent_different_spaces_for_agents.py | 2 +- rllib/examples/multi_agent_two_trainers.py | 2 +- rllib/examples/nested_action_spaces.py | 2 +- .../parallel_evaluation_and_training.py | 6 ++-- rllib/examples/parametric_actions_cartpole.py | 2 +- ...ons_cartpole_embeddings_learnt_by_model.py | 2 +- rllib/examples/preprocessing_disabled.py | 2 +- ...commender_system_with_recsim_and_slateq.py | 2 +- .../remote_base_env_with_custom_api.py | 2 +- ...e_envs_with_inference_done_on_main_node.py | 2 +- rllib/examples/replay_buffer_api.py | 2 +- .../restore_1_of_n_agents_from_checkpoint.py | 2 +- .../rock_paper_scissors_multiagent.py | 3 +- .../self_play_league_based_with_open_spiel.py | 2 +- rllib/examples/self_play_with_open_spiel.py | 2 +- rllib/examples/serving/cartpole_server.py | 2 +- rllib/examples/serving/unity3d_server.py | 2 +- rllib/examples/trajectory_view_api.py | 2 +- rllib/examples/two_step_game.py | 2 +- rllib/examples/unity3d_env_local.py | 2 +- rllib/examples/vizdoom_with_attention_net.py | 6 ++-- rllib/execution/learner_thread.py | 2 +- rllib/models/catalog.py | 18 +++++------ rllib/models/modelv2.py | 4 +-- rllib/models/tests/test_distributions.py | 2 +- rllib/models/utils.py | 8 ++--- rllib/policy/eager_tf_policy.py | 8 ++--- rllib/policy/policy.py | 2 +- rllib/policy/rnn_sequencing.py | 4 +-- .../debug_learning_failure_git_bisect.py | 2 +- rllib/tests/run_regression_tests.py | 4 +-- rllib/tests/test_checkpoint_restore.py | 6 ++-- rllib/tests/test_eager_support.py | 2 +- .../tests/test_nn_framework_import_errors.py | 2 +- rllib/tests/test_supported_multi_agent.py | 2 +- rllib/tests/test_supported_spaces.py | 12 ++++---- rllib/utils/debug/deterministic.py | 10 +++---- rllib/utils/exploration/epsilon_greedy.py | 4 +-- rllib/utils/exploration/gaussian_noise.py | 2 +- .../exploration/ornstein_uhlenbeck_noise.py | 4 +-- rllib/utils/exploration/parameter_noise.py | 6 ++-- rllib/utils/exploration/random.py | 4 +-- .../utils/exploration/slate_epsilon_greedy.py | 2 +- .../utils/exploration/stochastic_sampling.py | 2 +- .../exploration/tests/test_parameter_noise.py | 2 +- rllib/utils/framework.py | 2 +- rllib/utils/policy.py | 2 +- rllib/utils/schedules/schedule.py | 2 +- rllib/utils/schedules/tests/test_schedules.py | 10 +++---- rllib/utils/test_utils.py | 30 +++++++------------ rllib/utils/tests/run_memory_leak_tests.py | 4 +-- .../test_framework_agnostic_components.py | 17 +++++------ rllib/utils/tf_utils.py | 4 +-- 107 files changed, 210 insertions(+), 216 deletions(-) diff --git a/doc/source/rllib/rllib-training.rst b/doc/source/rllib/rllib-training.rst index 8c11e8eba54c5..d9cb2beae2163 100644 --- a/doc/source/rllib/rllib-training.rst +++ b/doc/source/rllib/rllib-training.rst @@ -345,20 +345,19 @@ The following is a list of the common algorithm hyper-parameters: # === Deep Learning Framework Settings === # tf: TensorFlow (static-graph) # tf2: TensorFlow 2.x (eager or traced, if eager_tracing=True) - # tfe: TensorFlow eager (or traced, if eager_tracing=True) # torch: PyTorch "framework": "tf", # Enable tracing in eager mode. This greatly improves performance # (speedup ~2x), but makes it slightly harder to debug since Python # code won't be evaluated after the initial eager pass. - # Only possible if framework=[tf2|tfe]. + # Only supported if framework=tf2. "eager_tracing": False, # Maximum number of tf.function re-traces before a runtime error is raised. # This is to prevent unnoticed retraces of methods inside the # `..._eager_traced` Policy, which could slow down execution by a # factor of 4, without the user noticing what the root cause for this # slowdown could be. - # Only necessary for framework=[tf2|tfe]. + # Only supported for framework=tf2. # Set to None to ignore the re-trace count and never throw an error. "eager_max_retraces": 20, @@ -1549,7 +1548,7 @@ Eager Mode Policies built with ``build_tf_policy`` (most of the reference algorithms are) can be run in eager mode by setting the -``"framework": "[tf2|tfe]"`` / ``"eager_tracing": true`` config options or using +``"framework": "tf2"`` / ``"eager_tracing": true`` config options or using ``rllib train --config '{"framework": "tf2"}' [--trace]``. This will tell RLlib to execute the model forward pass, action distribution, loss, and stats functions in eager mode. diff --git a/rllib/algorithms/algorithm.py b/rllib/algorithms/algorithm.py index 94b12bbb39ad8..a19836f20771e 100644 --- a/rllib/algorithms/algorithm.py +++ b/rllib/algorithms/algorithm.py @@ -2223,7 +2223,7 @@ def validate_framework( _tf1, _tf, _tfv = None, None, None _torch = None framework = config["framework"] - tf_valid_frameworks = {"tf", "tf2", "tfe"} + tf_valid_frameworks = {"tf", "tf2"} if framework not in tf_valid_frameworks and framework != "torch": return elif framework in tf_valid_frameworks: @@ -2257,7 +2257,7 @@ def check_if_correct_nn_framework_installed(): def resolve_tf_settings(): """Check and resolve tf settings.""" - if _tf1 and config["framework"] in ["tf2", "tfe"]: + if _tf1 and config["framework"] == "tf2": if config["framework"] == "tf2" and _tfv < 2: raise ValueError( "You configured `framework`=tf2, but your installed " @@ -2323,7 +2323,7 @@ def validate_config( # TODO: AlphaStar uses >1 GPUs differently (1 per policy actor), so this is # ok for tf2 here. # Remove this hacky check, once we have fully moved to the RLTrainer API. - if framework in ["tfe", "tf2"] and type(self).__name__ != "AlphaStar": + if framework == "tf2" and type(self).__name__ != "AlphaStar": raise ValueError( "`num_gpus` > 1 not supported yet for " "framework={}!".format(framework) @@ -2378,7 +2378,7 @@ def validate_config( # User manually set simple-optimizer to False -> Error if tf-eager. elif simple_optim_setting is False: - if framework in ["tfe", "tf2"]: + if framework == "tf2": raise ValueError( "`simple_optimizer=False` not supported for " "config.framework({})!".format(framework) @@ -2777,7 +2777,7 @@ def _run_one_training_iteration(self) -> Tuple[ResultDict, "TrainIterCtx"]: # we may have to re-enable eager mode here (gets disabled in the # thread). if ( - self.config.get("framework") in ["tf2", "tfe"] + self.config.get("framework") == "tf2" and not tf.executing_eagerly() ): tf1.enable_eager_execution() diff --git a/rllib/algorithms/algorithm_config.py b/rllib/algorithms/algorithm_config.py index f82bd772fbf29..204439346cfe0 100644 --- a/rllib/algorithms/algorithm_config.py +++ b/rllib/algorithms/algorithm_config.py @@ -588,7 +588,7 @@ def framework( methods inside the `..._eager_traced` Policy, which could slow down execution by a factor of 4, without the user noticing what the root cause for this slowdown could be. - Only necessary for framework=[tf2|tfe]. + Only necessary for framework=tf2. Set to None to ignore the re-trace count and never throw an error. tf_session_args: Configures TF for single-process operation by default. local_tf_session_args: Override the following tf session args on the local @@ -598,6 +598,12 @@ def framework( This updated AlgorithmConfig object. """ if framework is not None: + if framework == "tfe": + raise deprecation_warning( + old="AlgorithmConfig.framework('tfe')", + new="AlgorithmConfig.framework('tf2')", + error=True, + ) self.framework_str = framework if eager_tracing is not None: self.eager_tracing = eager_tracing diff --git a/rllib/algorithms/ars/ars_tf_policy.py b/rllib/algorithms/ars/ars_tf_policy.py index 56b9d67144902..0a19f9c50edc4 100644 --- a/rllib/algorithms/ars/ars_tf_policy.py +++ b/rllib/algorithms/ars/ars_tf_policy.py @@ -46,12 +46,13 @@ def __init__(self, obs_space, action_space, config): tf1.enable_eager_execution() self.sess = self.inputs = None if config.get("seed") is not None: - # Tf2.x. + # Non-static-graph TF. if config.get("framework") == "tf2": - tf.random.set_seed(config["seed"]) - # Tf-eager. - elif tf1 and config.get("framework") == "tfe": - tf1.set_random_seed(config["seed"]) + # Tf1.x. + if tf1: + tf1.set_random_seed(config["seed"]) + else: + tf.random.set_seed(config["seed"]) # Policy network. self.dist_class, dist_dim = ModelCatalog.get_action_dist( diff --git a/rllib/algorithms/cql/cql.py b/rllib/algorithms/cql/cql.py index e0beffee4c091..6b9fb501507dc 100644 --- a/rllib/algorithms/cql/cql.py +++ b/rllib/algorithms/cql/cql.py @@ -151,7 +151,7 @@ def validate_config(self, config: AlgorithmConfigDict) -> None: if config["simple_optimizer"] is not True and config["framework"] == "torch": config["simple_optimizer"] = True - if config["framework"] in ["tf", "tf2", "tfe"] and tfp is None: + if config["framework"] in ["tf", "tf2"] and tfp is None: logger.warning( "You need `tensorflow_probability` in order to run CQL! " "Install it via `pip install tensorflow_probability`. Your " diff --git a/rllib/algorithms/cql/cql_tf_policy.py b/rllib/algorithms/cql/cql_tf_policy.py index 705026b37863c..f6bf44087096e 100644 --- a/rllib/algorithms/cql/cql_tf_policy.py +++ b/rllib/algorithms/cql/cql_tf_policy.py @@ -299,7 +299,7 @@ def __init__(self, config): super().__init__(config) if config["lagrangian"]: # Eager mode. - if config["framework"] in ["tf2", "tfe"]: + if config["framework"] == "tf2": self._alpha_prime_optimizer = tf.keras.optimizers.Adam( learning_rate=config["optimization"]["critic_learning_rate"] ) @@ -354,7 +354,7 @@ def compute_gradients_fn( if policy.config["lagrangian"]: # Eager: Use GradientTape (which is a property of the `optimizer` # object (an OptimizerWrapper): see rllib/policy/eager_tf_policy.py). - if policy.config["framework"] in ["tf2", "tfe"]: + if policy.config["framework"] == "tf2": tape = optimizer.tape log_alpha_prime = [policy.model.log_alpha_prime] alpha_prime_grads_and_vars = list( @@ -391,7 +391,7 @@ def apply_gradients_fn(policy, optimizer, grads_and_vars): if policy.config["lagrangian"]: # Eager mode -> Just apply and return None. - if policy.config["framework"] in ["tf2", "tfe"]: + if policy.config["framework"] == "tf2": policy._alpha_prime_optimizer.apply_gradients( policy._alpha_prime_grads_and_vars ) diff --git a/rllib/algorithms/ddpg/ddpg_tf_policy.py b/rllib/algorithms/ddpg/ddpg_tf_policy.py index 2d0e3025bd428..034b8357c2edd 100644 --- a/rllib/algorithms/ddpg/ddpg_tf_policy.py +++ b/rllib/algorithms/ddpg/ddpg_tf_policy.py @@ -120,7 +120,7 @@ def optimizer( self, ) -> List["tf.keras.optimizers.Optimizer"]: """Create separate optimizers for actor & critic losses.""" - if self.config["framework"] in ["tf2", "tfe"]: + if self.config["framework"] == "tf2": self.global_step = get_variable(0, tf_name="global_step") self._actor_optimizer = tf.keras.optimizers.Adam( learning_rate=self.config["actor_lr"] @@ -143,7 +143,7 @@ def optimizer( def compute_gradients_fn( self, optimizer: LocalOptimizer, loss: TensorType ) -> ModelGradients: - if self.config["framework"] in ["tf2", "tfe"]: + if self.config["framework"] == "tf2": tape = optimizer.tape pol_weights = self.model.policy_variables() actor_grads_and_vars = list( @@ -203,7 +203,7 @@ def make_apply_op(): self._critic_grads_and_vars ) # Increment global step & apply ops. - if self.config["framework"] in ["tf2", "tfe"]: + if self.config["framework"] == "tf2": self.global_step.assign_add(1) return tf.no_op() else: diff --git a/rllib/algorithms/dqn/dqn_tf_policy.py b/rllib/algorithms/dqn/dqn_tf_policy.py index acdbe8015cee3..15e1587c58132 100644 --- a/rllib/algorithms/dqn/dqn_tf_policy.py +++ b/rllib/algorithms/dqn/dqn_tf_policy.py @@ -339,7 +339,7 @@ def build_q_losses(policy: Policy, model, _, train_batch: SampleBatch) -> Tensor def adam_optimizer( policy: Policy, config: AlgorithmConfigDict ) -> "tf.keras.optimizers.Optimizer": - if policy.config["framework"] in ["tf2", "tfe"]: + if policy.config["framework"] == "tf2": return tf.keras.optimizers.Adam( learning_rate=policy.cur_lr, epsilon=config["adam_epsilon"] ) diff --git a/rllib/algorithms/dqn/learner_thread.py b/rllib/algorithms/dqn/learner_thread.py index 918f2d0637f6b..168d703b380ef 100644 --- a/rllib/algorithms/dqn/learner_thread.py +++ b/rllib/algorithms/dqn/learner_thread.py @@ -36,7 +36,7 @@ def __init__(self, local_worker): def run(self): # Switch on eager mode if configured. - if self.local_worker.policy_config.get("framework") in ["tf2", "tfe"]: + if self.local_worker.policy_config.get("framework") == "tf2": tf1.enable_eager_execution() while not self.stopped: self.step() diff --git a/rllib/algorithms/es/es_tf_policy.py b/rllib/algorithms/es/es_tf_policy.py index 213934b260174..120ae6187fe90 100644 --- a/rllib/algorithms/es/es_tf_policy.py +++ b/rllib/algorithms/es/es_tf_policy.py @@ -109,10 +109,10 @@ def __init__(self, obs_space, action_space, config): self.sess = self.inputs = None if config.get("seed") is not None: # Tf2.x. - if config.get("framework") == "tf2": + if tfv == 2: tf.random.set_seed(config["seed"]) - # Tf-eager. - elif tf1 and config.get("framework") == "tfe": + # Tf1.x. + else: tf1.set_random_seed(config["seed"]) # Policy network. diff --git a/rllib/algorithms/impala/impala.py b/rllib/algorithms/impala/impala.py index 4d06325b92c34..f3c5bc5392454 100644 --- a/rllib/algorithms/impala/impala.py +++ b/rllib/algorithms/impala/impala.py @@ -524,7 +524,7 @@ def validate_config(self, config): # TODO(sven): Need to change APPO|IMPALATorchPolicies (and the # models to return separate sets of weights in order to create # the different torch optimizers). - if config["framework"] not in ["tf", "tf2", "tfe"]: + if config["framework"] not in ["tf", "tf2"]: raise ValueError( "`_separate_vf_optimizer` only supported to tf so far!" ) diff --git a/rllib/algorithms/impala/impala_tf_policy.py b/rllib/algorithms/impala/impala_tf_policy.py index 01f2e3a0cef1b..2655013277d00 100644 --- a/rllib/algorithms/impala/impala_tf_policy.py +++ b/rllib/algorithms/impala/impala_tf_policy.py @@ -226,7 +226,7 @@ def optimizer( ) -> Union["tf.keras.optimizers.Optimizer", List["tf.keras.optimizers.Optimizer"]]: config = self.config if config["opt_type"] == "adam": - if config["framework"] in ["tf2", "tfe"]: + if config["framework"] == "tf2": optim = tf.keras.optimizers.Adam(self.cur_lr) if config["_separate_vf_optimizer"]: return optim, tf.keras.optimizers.Adam(config["_lr_vf"]) diff --git a/rllib/algorithms/marwil/marwil_tf_policy.py b/rllib/algorithms/marwil/marwil_tf_policy.py index 11e549af9d53b..e16f19c933aee 100644 --- a/rllib/algorithms/marwil/marwil_tf_policy.py +++ b/rllib/algorithms/marwil/marwil_tf_policy.py @@ -98,7 +98,7 @@ def __init__( # Update averaged advantage norm. # Eager. - if policy.config["framework"] in ["tf2", "tfe"]: + if policy.config["framework"] == "tf2": update_term = adv_squared - policy._moving_average_sqd_adv_norm policy._moving_average_sqd_adv_norm.assign_add(rate * update_term) diff --git a/rllib/algorithms/ppo/tests/test_ppo.py b/rllib/algorithms/ppo/tests/test_ppo.py index 00013d060d50a..9cc6d0f9a86c6 100644 --- a/rllib/algorithms/ppo/tests/test_ppo.py +++ b/rllib/algorithms/ppo/tests/test_ppo.py @@ -308,7 +308,7 @@ def test_ppo_loss_function(self): check(train_batch[Postprocessing.VALUE_TARGETS], [0.50005, -0.505, 0.5]) # Calculate actual PPO loss. - if fw in ["tf2", "tfe"]: + if fw == "tf2": PPOTF2Policy.loss(policy, policy.model, Categorical, train_batch) elif fw == "torch": PPOTorchPolicy.loss( diff --git a/rllib/algorithms/sac/sac.py b/rllib/algorithms/sac/sac.py index 01a32b99f93f7..05ffa3abcd4a5 100644 --- a/rllib/algorithms/sac/sac.py +++ b/rllib/algorithms/sac/sac.py @@ -335,7 +335,7 @@ def validate_config(self, config: AlgorithmConfigDict) -> None: if config["grad_clip"] is not None and config["grad_clip"] <= 0.0: raise ValueError("`grad_clip` value must be > 0.0!") - if config["framework"] in ["tf", "tf2", "tfe"] and tfp is None: + if config["framework"] in ["tf", "tf2"] and tfp is None: logger.warning( "You need `tensorflow_probability` in order to run SAC! " "Install it via `pip install tensorflow_probability`. Your " diff --git a/rllib/algorithms/sac/sac_tf_policy.py b/rllib/algorithms/sac/sac_tf_policy.py index e8fd3783675e9..1ae92f2374376 100644 --- a/rllib/algorithms/sac/sac_tf_policy.py +++ b/rllib/algorithms/sac/sac_tf_policy.py @@ -457,7 +457,7 @@ def compute_and_clip_gradients( """ # Eager: Use GradientTape (which is a property of the `optimizer` object # (an OptimizerWrapper): see rllib/policy/eager_tf_policy.py). - if policy.config["framework"] in ["tf2", "tfe"]: + if policy.config["framework"] == "tf2": tape = optimizer.tape pol_weights = policy.model.policy_variables() actor_grads_and_vars = list( @@ -563,7 +563,7 @@ def apply_gradients( critic_apply_ops = [policy._critic_optimizer[0].apply_gradients(cgrads)] # Eager mode -> Just apply and return None. - if policy.config["framework"] in ["tf2", "tfe"]: + if policy.config["framework"] == "tf2": policy._alpha_optimizer.apply_gradients(policy._alpha_grads_and_vars) return # Tf static graph -> Return op. @@ -607,7 +607,7 @@ class ActorCriticOptimizerMixin: def __init__(self, config): # Eager mode. - if config["framework"] in ["tf2", "tfe"]: + if config["framework"] == "tf2": self.global_step = get_variable(0, tf_name="global_step") self._actor_optimizer = tf.keras.optimizers.Adam( learning_rate=config["optimization"]["actor_learning_rate"] diff --git a/rllib/algorithms/sac/tests/test_sac.py b/rllib/algorithms/sac/tests/test_sac.py index 122f22cbb2781..ffc79542a6a52 100644 --- a/rllib/algorithms/sac/tests/test_sac.py +++ b/rllib/algorithms/sac/tests/test_sac.py @@ -260,7 +260,7 @@ def test_sac_loss_function(self): # Set all weights (of all nets) to fixed values. if weights_dict is None: # Start with the tf vars-dict. - assert fw in ["tf2", "tf", "tfe"] + assert fw in ["tf2", "tf"] weights_dict_list = ( policy.model.variables() + policy.target_model.variables() @@ -271,9 +271,9 @@ def test_sac_loss_function(self): ) weights_dict = collector.get_weights() - if fw == "tfe": + if fw == "tf2": log_alpha = weights_dict[10] - weights_dict = self._translate_tfe_weights(weights_dict, map_) + weights_dict = self._translate_tf2_weights(weights_dict, map_) else: assert fw == "torch" # Then transfer that to torch Model. model_dict = self._translate_weights_to_torch(weights_dict, map_) @@ -339,7 +339,7 @@ def test_sac_loss_function(self): tf_a_grads = [g for g, v in tf_a_grads] tf_e_grads = [g for g, v in tf_e_grads] - elif fw == "tfe": + elif fw == "tf2": with tf.GradientTape() as tape: tf_loss(policy, policy.model, None, input_) c, a, e, t = ( @@ -680,7 +680,7 @@ def _sac_loss_helper(self, train_batch, weights, ks, log_alpha, fw, gamma, sess) framework=fw, ) else: - assert fw == "tfe" + assert fw == "tf2" q_tp1 = fc( relu( fc( @@ -733,7 +733,7 @@ def _translate_weights_to_torch(self, weights_dict, map_): return model_dict - def _translate_tfe_weights(self, weights_dict, map_): + def _translate_tf2_weights(self, weights_dict, map_): model_dict = { "default_policy/log_alpha": None, "default_policy/log_alpha_target": None, diff --git a/rllib/algorithms/slateq/slateq_tf_policy.py b/rllib/algorithms/slateq/slateq_tf_policy.py index 40ffec283a093..283a19e7ed193 100644 --- a/rllib/algorithms/slateq/slateq_tf_policy.py +++ b/rllib/algorithms/slateq/slateq_tf_policy.py @@ -344,7 +344,7 @@ def setup_late_mixins( def rmsprop_optimizer( policy: Policy, config: AlgorithmConfigDict ) -> "tf.keras.optimizers.Optimizer": - if policy.config["framework"] in ["tf2", "tfe"]: + if policy.config["framework"] == "tf2": return tf.keras.optimizers.RMSprop( learning_rate=policy.cur_lr, epsilon=config["rmsprop_epsilon"], diff --git a/rllib/evaluation/rollout_worker.py b/rllib/evaluation/rollout_worker.py index 06e7efd9b48b9..e90d5dd89c68f 100644 --- a/rllib/evaluation/rollout_worker.py +++ b/rllib/evaluation/rollout_worker.py @@ -488,7 +488,7 @@ def gen_rollouts(): if ( tf1 and ( - config.framework_str in ["tf2", "tfe"] or config.enable_tf1_exec_eagerly + config.framework_str == "tf2" or config.enable_tf1_exec_eagerly ) # This eager check is necessary for certain all-framework tests # that use tf's eager_mode() context generator. @@ -667,7 +667,7 @@ def wrap(env): ): devices = [] - if self.config.framework_str in ["tf2", "tf", "tfe"]: + if self.config.framework_str in ["tf2", "tf"]: devices = get_tf_gpu_devices() elif self.config.framework_str == "torch": devices = list(range(torch.cuda.device_count())) diff --git a/rllib/evaluation/sampler.py b/rllib/evaluation/sampler.py index e14fe8ccf00a1..e2a08e9842c36 100644 --- a/rllib/evaluation/sampler.py +++ b/rllib/evaluation/sampler.py @@ -451,10 +451,10 @@ def run(self): raise e def _run(self): - # We are in a thread: Switch on eager execution mode, iff framework==tf2|tfe. + # We are in a thread: Switch on eager execution mode, iff framework==tf2. if ( tf1 - and self.worker.config.framework_str in ["tf2", "tfe"] + and self.worker.config.framework_str == "tf2" and not tf1.executing_eagerly() ): tf1.enable_eager_execution() diff --git a/rllib/examples/action_masking.py b/rllib/examples/action_masking.py index 39ce7861d9061..e5ebb6dd5c8d0 100644 --- a/rllib/examples/action_masking.py +++ b/rllib/examples/action_masking.py @@ -69,7 +69,7 @@ def get_cli_args(): parser.add_argument("--num-cpus", type=int, default=0) parser.add_argument( "--framework", - choices=["tf", "tf2", "tfe", "torch"], + choices=["tf", "tf2", "torch"], default="tf", help="The DL framework specifier.", ) @@ -131,7 +131,7 @@ def get_cli_args(): # Use GPUs iff `RLLIB_NUM_GPUS` env var set to > 0. "num_gpus": int(os.environ.get("RLLIB_NUM_GPUS", "0")), "framework": args.framework, - # Run with tracing enabled for tfe/tf2? + # Run with tracing enabled for tf2? "eager_tracing": args.eager_tracing, } diff --git a/rllib/examples/attention_net.py b/rllib/examples/attention_net.py index ae21ddb3c1bae..66b440c9e1565 100644 --- a/rllib/examples/attention_net.py +++ b/rllib/examples/attention_net.py @@ -73,7 +73,7 @@ def get_cli_args(): parser.add_argument("--num-cpus", type=int, default=3) parser.add_argument( "--framework", - choices=["tf", "tf2", "tfe", "torch"], + choices=["tf", "tf2", "torch"], default="tf", help="The DL framework specifier.", ) diff --git a/rllib/examples/autoregressive_action_dist.py b/rllib/examples/autoregressive_action_dist.py index 1206b0853b880..c27c1c58f2307 100644 --- a/rllib/examples/autoregressive_action_dist.py +++ b/rllib/examples/autoregressive_action_dist.py @@ -73,7 +73,7 @@ def get_cli_args(): ) parser.add_argument( "--framework", - choices=["tf", "tf2", "tfe", "torch"], + choices=["tf", "tf2", "torch"], default="tf", help="The DL framework specifier.", ) diff --git a/rllib/examples/batch_norm_model.py b/rllib/examples/batch_norm_model.py index 8f349519c9f0a..8e4dfaeee8733 100644 --- a/rllib/examples/batch_norm_model.py +++ b/rllib/examples/batch_norm_model.py @@ -22,7 +22,7 @@ ) parser.add_argument( "--framework", - choices=["tf", "tf2", "tfe", "torch"], + choices=["tf", "tf2", "torch"], default="tf", help="The DL framework specifier.", ) diff --git a/rllib/examples/cartpole_lstm.py b/rllib/examples/cartpole_lstm.py index 55f9db21a1316..c14ed8d9ec362 100644 --- a/rllib/examples/cartpole_lstm.py +++ b/rllib/examples/cartpole_lstm.py @@ -11,7 +11,7 @@ parser.add_argument("--num-cpus", type=int, default=0) parser.add_argument( "--framework", - choices=["tf", "tf2", "tfe", "torch"], + choices=["tf", "tf2", "torch"], default="tf", help="The DL framework specifier.", ) @@ -70,7 +70,7 @@ "lstm_use_prev_reward": args.use_prev_reward, }, "framework": args.framework, - # Run with tracing enabled for tfe/tf2? + # Run with tracing enabled for tf2? "eager_tracing": args.eager_tracing, } ) diff --git a/rllib/examples/centralized_critic.py b/rllib/examples/centralized_critic.py index c343f06d90dd7..726916b8276c9 100644 --- a/rllib/examples/centralized_critic.py +++ b/rllib/examples/centralized_critic.py @@ -50,7 +50,7 @@ parser = argparse.ArgumentParser() parser.add_argument( "--framework", - choices=["tf", "tf2", "tfe", "torch"], + choices=["tf", "tf2", "torch"], default="tf", help="The DL framework specifier.", ) diff --git a/rllib/examples/centralized_critic_2.py b/rllib/examples/centralized_critic_2.py index 74cb40d7f92ab..567b6b5f26a36 100644 --- a/rllib/examples/centralized_critic_2.py +++ b/rllib/examples/centralized_critic_2.py @@ -28,7 +28,7 @@ parser = argparse.ArgumentParser() parser.add_argument( "--framework", - choices=["tf", "tf2", "tfe", "torch"], + choices=["tf", "tf2", "torch"], default="tf", help="The DL framework specifier.", ) diff --git a/rllib/examples/checkpoint_by_custom_criteria.py b/rllib/examples/checkpoint_by_custom_criteria.py index 93d8e25752db7..d704c3faeb4b5 100644 --- a/rllib/examples/checkpoint_by_custom_criteria.py +++ b/rllib/examples/checkpoint_by_custom_criteria.py @@ -11,7 +11,7 @@ parser.add_argument("--num-cpus", type=int, default=0) parser.add_argument( "--framework", - choices=["tf", "tf2", "tfe", "torch"], + choices=["tf", "tf2", "torch"], default="tf", help="The DL framework specifier.", ) @@ -32,8 +32,8 @@ # Use GPUs iff `RLLIB_NUM_GPUS` env var set to > 0. "num_gpus": int(os.environ.get("RLLIB_NUM_GPUS", "0")), "framework": args.framework, - # Run with tracing enabled for tfe/tf2. - "eager_tracing": args.framework in ["tfe", "tf2"], + # Run with tracing enabled for tf2. + "eager_tracing": args.framework == "tf2", } stop = { diff --git a/rllib/examples/complex_struct_space.py b/rllib/examples/complex_struct_space.py index 2a0073524e75d..7d7773b9727f0 100644 --- a/rllib/examples/complex_struct_space.py +++ b/rllib/examples/complex_struct_space.py @@ -4,7 +4,7 @@ - using a custom environment with Repeated / struct observations - using a custom model to view the batched list observations -For PyTorch / TF eager mode, use the `--framework=[torch|tf2|tfe]` flag. +For PyTorch / TF eager mode, use the `--framework=[torch|tf2]` flag. """ import argparse @@ -22,7 +22,7 @@ parser = argparse.ArgumentParser() parser.add_argument( "--framework", - choices=["tf", "tf2", "tfe", "torch"], + choices=["tf", "tf2", "torch"], default="tf2", help="The DL framework specifier.", ) diff --git a/rllib/examples/curriculum_learning.py b/rllib/examples/curriculum_learning.py index fa7004dc911ed..e260bde3c7e89 100644 --- a/rllib/examples/curriculum_learning.py +++ b/rllib/examples/curriculum_learning.py @@ -31,7 +31,7 @@ ) parser.add_argument( "--framework", - choices=["tf", "tf2", "tfe", "torch"], + choices=["tf", "tf2", "torch"], default="tf", help="The DL framework specifier.", ) diff --git a/rllib/examples/custom_env.py b/rllib/examples/custom_env.py index a692633f0446f..a1e4cf6bf706c 100644 --- a/rllib/examples/custom_env.py +++ b/rllib/examples/custom_env.py @@ -42,7 +42,7 @@ ) parser.add_argument( "--framework", - choices=["tf", "tf2", "tfe", "torch"], + choices=["tf", "tf2", "torch"], default="tf", help="The DL framework specifier.", ) diff --git a/rllib/examples/custom_eval.py b/rllib/examples/custom_eval.py index 69bb60f7750cd..f0b31c9182e31 100644 --- a/rllib/examples/custom_eval.py +++ b/rllib/examples/custom_eval.py @@ -80,7 +80,7 @@ parser.add_argument("--num-cpus", type=int, default=0) parser.add_argument( "--framework", - choices=["tf", "tf2", "tfe", "torch"], + choices=["tf", "tf2", "torch"], default="tf", help="The DL framework specifier.", ) diff --git a/rllib/examples/custom_fast_model.py b/rllib/examples/custom_fast_model.py index 2a2b8a5401f18..edd7b68487fb5 100644 --- a/rllib/examples/custom_fast_model.py +++ b/rllib/examples/custom_fast_model.py @@ -18,7 +18,7 @@ parser.add_argument("--num-cpus", type=int, default=4) parser.add_argument( "--framework", - choices=["tf", "tf2", "tfe", "torch"], + choices=["tf", "tf2", "torch"], default="tf", help="The DL framework specifier.", ) diff --git a/rllib/examples/custom_input_api.py b/rllib/examples/custom_input_api.py index 9071f9f912234..82f563a4fcead 100644 --- a/rllib/examples/custom_input_api.py +++ b/rllib/examples/custom_input_api.py @@ -24,7 +24,7 @@ ) parser.add_argument( "--framework", - choices=["tf", "tf2", "tfe", "torch"], + choices=["tf", "tf2", "torch"], default="tf", help="The DL framework specifier.", ) diff --git a/rllib/examples/custom_logger.py b/rllib/examples/custom_logger.py index d6d69047f0463..b98e297d3c2a6 100644 --- a/rllib/examples/custom_logger.py +++ b/rllib/examples/custom_logger.py @@ -24,7 +24,7 @@ parser.add_argument("--num-cpus", type=int, default=0) parser.add_argument( "--framework", - choices=["tf", "tf2", "tfe", "torch"], + choices=["tf", "tf2", "torch"], default="tf", help="The DL framework specifier.", ) @@ -80,8 +80,8 @@ def flush(self): # Use GPUs iff `RLLIB_NUM_GPUS` env var set to > 0. "num_gpus": int(os.environ.get("RLLIB_NUM_GPUS", "0")), "framework": args.framework, - # Run with tracing enabled for tfe/tf2. - "eager_tracing": args.framework in ["tfe", "tf2"], + # Run with tracing enabled for tf2. + "eager_tracing": args.framework == "tf2", # Setting up a custom logger config. # ---------------------------------- # The following are different examples of custom logging setups: diff --git a/rllib/examples/custom_metrics_and_callbacks.py b/rllib/examples/custom_metrics_and_callbacks.py index c34f80101ac2a..5d113974fcc4f 100644 --- a/rllib/examples/custom_metrics_and_callbacks.py +++ b/rllib/examples/custom_metrics_and_callbacks.py @@ -20,7 +20,7 @@ parser = argparse.ArgumentParser() parser.add_argument( "--framework", - choices=["tf", "tf2", "tfe", "torch"], + choices=["tf", "tf2", "torch"], default="tf", help="The DL framework specifier.", ) diff --git a/rllib/examples/custom_model_api.py b/rllib/examples/custom_model_api.py index 484eb55bbbfe6..a2f5f48c8ba84 100644 --- a/rllib/examples/custom_model_api.py +++ b/rllib/examples/custom_model_api.py @@ -18,7 +18,7 @@ parser = argparse.ArgumentParser() parser.add_argument( "--framework", - choices=["tf", "tf2", "tfe", "torch"], + choices=["tf", "tf2", "torch"], default="tf", help="The DL framework specifier.", ) diff --git a/rllib/examples/custom_model_loss_and_metrics.py b/rllib/examples/custom_model_loss_and_metrics.py index 33217551e956c..fdf6424c100c6 100644 --- a/rllib/examples/custom_model_loss_and_metrics.py +++ b/rllib/examples/custom_model_loss_and_metrics.py @@ -33,7 +33,7 @@ ) parser.add_argument( "--framework", - choices=["tf", "tf2", "tfe", "torch"], + choices=["tf", "tf2", "torch"], default="tf", help="The DL framework specifier.", ) diff --git a/rllib/examples/custom_rnn_model.py b/rllib/examples/custom_rnn_model.py index 82c57073a7b55..3d1b7e8bb09ae 100644 --- a/rllib/examples/custom_rnn_model.py +++ b/rllib/examples/custom_rnn_model.py @@ -20,7 +20,7 @@ parser.add_argument("--num-cpus", type=int, default=0) parser.add_argument( "--framework", - choices=["tf", "tf2", "tfe", "torch"], + choices=["tf", "tf2", "torch"], default="tf", help="The DL framework specifier.", ) diff --git a/rllib/examples/custom_train_fn.py b/rllib/examples/custom_train_fn.py index 3ac0498fb98a8..5fed145f41885 100644 --- a/rllib/examples/custom_train_fn.py +++ b/rllib/examples/custom_train_fn.py @@ -15,7 +15,7 @@ parser = argparse.ArgumentParser() parser.add_argument( "--framework", - choices=["tf", "tf2", "tfe", "torch"], + choices=["tf", "tf2", "torch"], default="tf", help="The DL framework specifier.", ) diff --git a/rllib/examples/custom_vector_env.py b/rllib/examples/custom_vector_env.py index 940d714ed2048..efc5a70e6feb4 100644 --- a/rllib/examples/custom_vector_env.py +++ b/rllib/examples/custom_vector_env.py @@ -16,7 +16,7 @@ ) parser.add_argument( "--framework", - choices=["tf", "tf2", "tfe", "torch"], + choices=["tf", "tf2", "torch"], default="tf", help="The DL framework specifier.", ) diff --git a/rllib/examples/deterministic_training.py b/rllib/examples/deterministic_training.py index 75065739aa8d7..692ac8d14b893 100644 --- a/rllib/examples/deterministic_training.py +++ b/rllib/examples/deterministic_training.py @@ -16,7 +16,7 @@ parser = argparse.ArgumentParser() parser.add_argument("--run", type=str, default="PPO") -parser.add_argument("--framework", choices=["tf2", "tf", "tfe", "torch"], default="tf") +parser.add_argument("--framework", choices=["tf2", "tf", "torch"], default="tf") parser.add_argument("--seed", type=int, default=42) parser.add_argument("--as-test", action="store_true") parser.add_argument("--stop-iters", type=int, default=2) diff --git a/rllib/examples/eager_execution.py b/rllib/examples/eager_execution.py index 21de00a3858d5..1c25284babb22 100644 --- a/rllib/examples/eager_execution.py +++ b/rllib/examples/eager_execution.py @@ -25,8 +25,8 @@ # >> x.numpy() # 0.0 -# RLlib will automatically enable eager mode, if you specify your "framework" -# config key to be either "tfe" or "tf2". +# RLlib will automatically enable eager mode, if you set +# AlgorithmConfig.framework("tf2", eager_tracing=False). # If you would like to remain in tf static-graph mode, but still use tf2.x's # new APIs (some of which are not supported by tf1.x), specify your "framework" # as "tf" and check for the version (tfv) to be 2: diff --git a/rllib/examples/env_rendering_and_recording.py b/rllib/examples/env_rendering_and_recording.py index b7d7f599c03f1..0772c6f8a7057 100644 --- a/rllib/examples/env_rendering_and_recording.py +++ b/rllib/examples/env_rendering_and_recording.py @@ -10,7 +10,7 @@ parser = argparse.ArgumentParser() parser.add_argument( "--framework", - choices=["tf", "tf2", "tfe", "torch"], + choices=["tf", "tf2", "torch"], default="tf", help="The DL framework specifier.", ) diff --git a/rllib/examples/fractional_gpus.py b/rllib/examples/fractional_gpus.py index 6f5b2d25eaf4a..4af2bf49cdac2 100644 --- a/rllib/examples/fractional_gpus.py +++ b/rllib/examples/fractional_gpus.py @@ -24,7 +24,7 @@ ) parser.add_argument( "--framework", - choices=["tf", "tf2", "tfe", "torch"], + choices=["tf", "tf2", "torch"], default="tf", help="The DL framework specifier.", ) diff --git a/rllib/examples/hierarchical_training.py b/rllib/examples/hierarchical_training.py index d24da537ec76b..ff79dae7c32fc 100644 --- a/rllib/examples/hierarchical_training.py +++ b/rllib/examples/hierarchical_training.py @@ -36,7 +36,7 @@ parser.add_argument("--flat", action="store_true") parser.add_argument( "--framework", - choices=["tf", "tf2", "tfe", "torch"], + choices=["tf", "tf2", "torch"], default="tf", help="The DL framework specifier.", ) diff --git a/rllib/examples/inference_and_serving/policy_inference_after_training.py b/rllib/examples/inference_and_serving/policy_inference_after_training.py index ede328bfa4834..b56e979cb328f 100644 --- a/rllib/examples/inference_and_serving/policy_inference_after_training.py +++ b/rllib/examples/inference_and_serving/policy_inference_after_training.py @@ -20,7 +20,7 @@ parser.add_argument("--num-cpus", type=int, default=0) parser.add_argument( "--framework", - choices=["tf", "tf2", "tfe", "torch"], + choices=["tf", "tf2", "torch"], default="tf", help="The DL framework specifier.", ) @@ -66,7 +66,7 @@ # Use GPUs iff `RLLIB_NUM_GPUS` env var set to > 0. "num_gpus": int(os.environ.get("RLLIB_NUM_GPUS", "0")), "framework": args.framework, - # Run with tracing enabled for tfe/tf2? + # Run with tracing enabled for tf2? "eager_tracing": args.eager_tracing, } diff --git a/rllib/examples/inference_and_serving/policy_inference_after_training_with_attention.py b/rllib/examples/inference_and_serving/policy_inference_after_training_with_attention.py index e7ce3c14d4867..812b02f3d18fd 100644 --- a/rllib/examples/inference_and_serving/policy_inference_after_training_with_attention.py +++ b/rllib/examples/inference_and_serving/policy_inference_after_training_with_attention.py @@ -21,7 +21,7 @@ parser.add_argument("--num-cpus", type=int, default=0) parser.add_argument( "--framework", - choices=["tf", "tf2", "tfe", "torch"], + choices=["tf", "tf2", "torch"], default="tf", help="The DL framework specifier.", ) @@ -93,7 +93,7 @@ "attention_memory_training": 10, }, "framework": args.framework, - # Run with tracing enabled for tfe/tf2? + # Run with tracing enabled for tf2? "eager_tracing": args.eager_tracing, } diff --git a/rllib/examples/inference_and_serving/policy_inference_after_training_with_lstm.py b/rllib/examples/inference_and_serving/policy_inference_after_training_with_lstm.py index 095fedc6241e5..900b5bfa07a05 100644 --- a/rllib/examples/inference_and_serving/policy_inference_after_training_with_lstm.py +++ b/rllib/examples/inference_and_serving/policy_inference_after_training_with_lstm.py @@ -21,7 +21,7 @@ parser.add_argument("--num-cpus", type=int, default=0) parser.add_argument( "--framework", - choices=["tf", "tf2", "tfe", "torch"], + choices=["tf", "tf2", "torch"], default="tf", help="The DL framework specifier.", ) @@ -88,7 +88,7 @@ "lstm_use_prev_reward": args.prev_reward, }, "framework": args.framework, - # Run with tracing enabled for tfe/tf2? + # Run with tracing enabled for tf2? "eager_tracing": args.eager_tracing, } diff --git a/rllib/examples/inference_and_serving/serve_and_rllib.py b/rllib/examples/inference_and_serving/serve_and_rllib.py index 8d5f236f3de1a..74b10854a25a4 100644 --- a/rllib/examples/inference_and_serving/serve_and_rllib.py +++ b/rllib/examples/inference_and_serving/serve_and_rllib.py @@ -18,7 +18,7 @@ parser = argparse.ArgumentParser() parser.add_argument( "--framework", - choices=["tf", "tf2", "tfe", "torch"], + choices=["tf", "tf2", "torch"], default="tf", help="The DL framework specifier.", ) diff --git a/rllib/examples/iterated_prisoners_dilemma_env.py b/rllib/examples/iterated_prisoners_dilemma_env.py index 6c21a1ede649b..a5c1ccbfff7ba 100644 --- a/rllib/examples/iterated_prisoners_dilemma_env.py +++ b/rllib/examples/iterated_prisoners_dilemma_env.py @@ -15,7 +15,7 @@ parser = argparse.ArgumentParser() parser.add_argument( "--framework", - choices=["tf", "tf2", "tfe", "torch"], + choices=["tf", "tf2", "torch"], default="tf", help="The DL framework specifier.", ) diff --git a/rllib/examples/mobilenet_v2_with_lstm.py b/rllib/examples/mobilenet_v2_with_lstm.py index 81bc17898b398..b3d6d2139b6c0 100644 --- a/rllib/examples/mobilenet_v2_with_lstm.py +++ b/rllib/examples/mobilenet_v2_with_lstm.py @@ -25,7 +25,7 @@ parser = argparse.ArgumentParser() parser.add_argument( "--framework", - choices=["tf", "tf2", "tfe", "torch"], + choices=["tf", "tf2", "torch"], default="tf", help="The DL framework specifier.", ) diff --git a/rllib/examples/multi_agent_cartpole.py b/rllib/examples/multi_agent_cartpole.py index f690e32879ab5..b5a2b9f6a5eb1 100644 --- a/rllib/examples/multi_agent_cartpole.py +++ b/rllib/examples/multi_agent_cartpole.py @@ -36,7 +36,7 @@ parser.add_argument("--num-cpus", type=int, default=0) parser.add_argument( "--framework", - choices=["tf", "tf2", "tfe", "torch"], + choices=["tf", "tf2", "torch"], default="tf", help="The DL framework specifier.", ) @@ -64,7 +64,7 @@ # Register the models to use. if args.framework == "torch": mod1 = mod2 = TorchSharedWeightsModel - elif args.framework in ["tfe", "tf2"]: + elif args.framework == "tf2": mod1 = mod2 = TF2SharedWeightsModel else: mod1 = SharedWeightsModel1 diff --git a/rllib/examples/multi_agent_custom_policy.py b/rllib/examples/multi_agent_custom_policy.py index 731d889d4d468..6b7202fba390e 100644 --- a/rllib/examples/multi_agent_custom_policy.py +++ b/rllib/examples/multi_agent_custom_policy.py @@ -27,7 +27,7 @@ parser = argparse.ArgumentParser() parser.add_argument( "--framework", - choices=["tf", "tf2", "tfe", "torch"], + choices=["tf", "tf2", "torch"], default="tf", help="The DL framework specifier.", ) diff --git a/rllib/examples/multi_agent_different_spaces_for_agents.py b/rllib/examples/multi_agent_different_spaces_for_agents.py index 8da1cbae1e806..54ac08f59abd4 100644 --- a/rllib/examples/multi_agent_different_spaces_for_agents.py +++ b/rllib/examples/multi_agent_different_spaces_for_agents.py @@ -78,7 +78,7 @@ def get_cli_args(): parser.add_argument("--num-cpus", type=int, default=0) parser.add_argument( "--framework", - choices=["tf", "tf2", "tfe", "torch"], + choices=["tf", "tf2", "torch"], default="tf", help="The DL framework specifier.", ) diff --git a/rllib/examples/multi_agent_two_trainers.py b/rllib/examples/multi_agent_two_trainers.py index 75a2f5896b00a..22fda86c5099f 100644 --- a/rllib/examples/multi_agent_two_trainers.py +++ b/rllib/examples/multi_agent_two_trainers.py @@ -28,7 +28,7 @@ # Use torch for both policies. parser.add_argument( "--framework", - choices=["tf", "tf2", "tfe", "torch"], + choices=["tf", "tf2", "torch"], default="tf", help="The DL framework specifier.", ) diff --git a/rllib/examples/nested_action_spaces.py b/rllib/examples/nested_action_spaces.py index c3b116c41cc0d..1c01c4f302afc 100644 --- a/rllib/examples/nested_action_spaces.py +++ b/rllib/examples/nested_action_spaces.py @@ -16,7 +16,7 @@ ) parser.add_argument( "--framework", - choices=["tf", "tf2", "tfe", "torch"], + choices=["tf", "tf2", "torch"], default="tf", help="The DL framework specifier.", ) diff --git a/rllib/examples/parallel_evaluation_and_training.py b/rllib/examples/parallel_evaluation_and_training.py index 89ea126ec6697..8562293ffe600 100644 --- a/rllib/examples/parallel_evaluation_and_training.py +++ b/rllib/examples/parallel_evaluation_and_training.py @@ -41,7 +41,7 @@ parser.add_argument("--num-cpus", type=int, default=0) parser.add_argument( "--framework", - choices=["tf", "tf2", "tfe", "torch"], + choices=["tf", "tf2", "torch"], default="tf", help="The DL framework specifier.", ) @@ -125,8 +125,8 @@ def on_train_result(self, *, algorithm, result, **kwargs): # Use GPUs iff `RLLIB_NUM_GPUS` env var set to > 0. "num_gpus": int(os.environ.get("RLLIB_NUM_GPUS", "0")), "framework": args.framework, - # Run with tracing enabled for tfe/tf2. - "eager_tracing": args.framework in ["tfe", "tf2"], + # Run with tracing enabled for tf2. + "eager_tracing": args.framework == "tf2", # Parallel evaluation+training config. # Switch on evaluation in parallel with training. "evaluation_parallel_to_training": True, diff --git a/rllib/examples/parametric_actions_cartpole.py b/rllib/examples/parametric_actions_cartpole.py index 0721baf48ca6b..0d93c62a3f897 100644 --- a/rllib/examples/parametric_actions_cartpole.py +++ b/rllib/examples/parametric_actions_cartpole.py @@ -34,7 +34,7 @@ ) parser.add_argument( "--framework", - choices=["tf", "tf2", "tfe", "torch"], + choices=["tf", "tf2", "torch"], default="tf", help="The DL framework specifier.", ) diff --git a/rllib/examples/parametric_actions_cartpole_embeddings_learnt_by_model.py b/rllib/examples/parametric_actions_cartpole_embeddings_learnt_by_model.py index 5a9e43088d004..e0f1a7c858c09 100644 --- a/rllib/examples/parametric_actions_cartpole_embeddings_learnt_by_model.py +++ b/rllib/examples/parametric_actions_cartpole_embeddings_learnt_by_model.py @@ -33,7 +33,7 @@ parser.add_argument("--run", type=str, default="PPO") parser.add_argument( "--framework", - choices=["tf", "tf2", "tfe"], + choices=["tf", "tf2"], default="tf", help="The DL framework specifier (torch not supported yet " "due to lack of model).", diff --git a/rllib/examples/preprocessing_disabled.py b/rllib/examples/preprocessing_disabled.py index 66cc80970e287..88b3d1539a429 100644 --- a/rllib/examples/preprocessing_disabled.py +++ b/rllib/examples/preprocessing_disabled.py @@ -29,7 +29,7 @@ def get_cli_args(): parser.add_argument("--num-cpus", type=int, default=3) parser.add_argument( "--framework", - choices=["tf", "tf2", "tfe", "torch"], + choices=["tf", "tf2", "torch"], default="tf", help="The DL framework specifier.", ) diff --git a/rllib/examples/recommender_system_with_recsim_and_slateq.py b/rllib/examples/recommender_system_with_recsim_and_slateq.py index 6b555e78e7085..d454338c5ecab 100644 --- a/rllib/examples/recommender_system_with_recsim_and_slateq.py +++ b/rllib/examples/recommender_system_with_recsim_and_slateq.py @@ -34,7 +34,7 @@ ) parser.add_argument( "--framework", - choices=["tf", "tf2", "tfe", "torch"], + choices=["tf", "tf2", "torch"], default="tf", help="The DL framework specifier.", ) diff --git a/rllib/examples/remote_base_env_with_custom_api.py b/rllib/examples/remote_base_env_with_custom_api.py index 6284ffda296d0..30b4bb18034b4 100644 --- a/rllib/examples/remote_base_env_with_custom_api.py +++ b/rllib/examples/remote_base_env_with_custom_api.py @@ -22,7 +22,7 @@ ) parser.add_argument( "--framework", - choices=["tf", "tf2", "tfe", "torch"], + choices=["tf", "tf2", "torch"], default="tf", help="The DL framework specifier.", ) diff --git a/rllib/examples/remote_envs_with_inference_done_on_main_node.py b/rllib/examples/remote_envs_with_inference_done_on_main_node.py index 3ca4f3358eddd..cff0f84aff2b8 100644 --- a/rllib/examples/remote_envs_with_inference_done_on_main_node.py +++ b/rllib/examples/remote_envs_with_inference_done_on_main_node.py @@ -33,7 +33,7 @@ def get_cli_args(): # general args parser.add_argument( "--framework", - choices=["tf", "tf2", "tfe", "torch"], + choices=["tf", "tf2", "torch"], default="tf", help="The DL framework specifier.", ) diff --git a/rllib/examples/replay_buffer_api.py b/rllib/examples/replay_buffer_api.py index 4ef20ea7b82f4..bfce6734809c1 100644 --- a/rllib/examples/replay_buffer_api.py +++ b/rllib/examples/replay_buffer_api.py @@ -23,7 +23,7 @@ parser.add_argument("--num-cpus", type=int, default=0) parser.add_argument( "--framework", - choices=["tf", "tf2", "tfe", "torch"], + choices=["tf", "tf2", "torch"], default="tf", help="The DL framework specifier.", ) diff --git a/rllib/examples/restore_1_of_n_agents_from_checkpoint.py b/rllib/examples/restore_1_of_n_agents_from_checkpoint.py index 4b4018af2a9b2..ef9c345ea31f4 100644 --- a/rllib/examples/restore_1_of_n_agents_from_checkpoint.py +++ b/rllib/examples/restore_1_of_n_agents_from_checkpoint.py @@ -32,7 +32,7 @@ parser.add_argument("--num-cpus", type=int, default=0) parser.add_argument( "--framework", - choices=["tf", "tf2", "tfe", "torch"], + choices=["tf", "tf2", "torch"], default="tf", help="The DL framework specifier.", ) diff --git a/rllib/examples/rock_paper_scissors_multiagent.py b/rllib/examples/rock_paper_scissors_multiagent.py index 09c768747aa2e..6072ef8ff5446 100644 --- a/rllib/examples/rock_paper_scissors_multiagent.py +++ b/rllib/examples/rock_paper_scissors_multiagent.py @@ -37,7 +37,7 @@ parser = argparse.ArgumentParser() parser.add_argument( "--framework", - choices=["tf", "tf2", "tfe", "torch"], + choices=["tf", "tf2", "torch"], default="tf", help="The DL framework specifier.", ) @@ -159,7 +159,6 @@ def run_with_custom_entropy_loss(args, stop): "torch": PGTorchPolicy, "tf": PGTF1Policy, "tf2": PGTF2Policy, - "tfe": PGTF2Policy, }[args.framework] class EntropyPolicy(policy_cls): diff --git a/rllib/examples/self_play_league_based_with_open_spiel.py b/rllib/examples/self_play_league_based_with_open_spiel.py index 27ba77fb0fe63..6ee1ee0e06c6b 100644 --- a/rllib/examples/self_play_league_based_with_open_spiel.py +++ b/rllib/examples/self_play_league_based_with_open_spiel.py @@ -50,7 +50,7 @@ parser = argparse.ArgumentParser() parser.add_argument( "--framework", - choices=["tf", "tf2", "tfe", "torch"], + choices=["tf", "tf2", "torch"], default="tf", help="The DL framework specifier.", ) diff --git a/rllib/examples/self_play_with_open_spiel.py b/rllib/examples/self_play_with_open_spiel.py index 1f6f76da35830..7259316d72a12 100644 --- a/rllib/examples/self_play_with_open_spiel.py +++ b/rllib/examples/self_play_with_open_spiel.py @@ -37,7 +37,7 @@ parser = argparse.ArgumentParser() parser.add_argument( "--framework", - choices=["tf", "tf2", "tfe", "torch"], + choices=["tf", "tf2", "torch"], default="tf", help="The DL framework specifier.", ) diff --git a/rllib/examples/serving/cartpole_server.py b/rllib/examples/serving/cartpole_server.py index 573900a3d7dd5..9e92af8895074 100755 --- a/rllib/examples/serving/cartpole_server.py +++ b/rllib/examples/serving/cartpole_server.py @@ -83,7 +83,7 @@ def get_cli_args(): parser.add_argument("--num-cpus", type=int, default=3) parser.add_argument( "--framework", - choices=["tf", "tf2", "tfe", "torch"], + choices=["tf", "tf2", "torch"], default="tf", help="The DL framework specifier.", ) diff --git a/rllib/examples/serving/unity3d_server.py b/rllib/examples/serving/unity3d_server.py index 34edc92440918..d98409c026e26 100755 --- a/rllib/examples/serving/unity3d_server.py +++ b/rllib/examples/serving/unity3d_server.py @@ -48,7 +48,7 @@ ) parser.add_argument( "--framework", - choices=["tf", "tf2", "tfe", "torch"], + choices=["tf", "tf2", "torch"], default="tf", help="The DL framework specifier.", ) diff --git a/rllib/examples/trajectory_view_api.py b/rllib/examples/trajectory_view_api.py index 1ef6633b7736e..dda46aebae4af 100644 --- a/rllib/examples/trajectory_view_api.py +++ b/rllib/examples/trajectory_view_api.py @@ -21,7 +21,7 @@ ) parser.add_argument( "--framework", - choices=["tf", "tf2", "tfe", "torch"], + choices=["tf", "tf2", "torch"], default="tf", help="The DL framework specifier.", ) diff --git a/rllib/examples/two_step_game.py b/rllib/examples/two_step_game.py index f8d8941804542..3d46d7dfb44cd 100644 --- a/rllib/examples/two_step_game.py +++ b/rllib/examples/two_step_game.py @@ -30,7 +30,7 @@ ) parser.add_argument( "--framework", - choices=["tf", "tf2", "tfe", "torch"], + choices=["tf", "tf2", "torch"], default="tf", help="The DL framework specifier.", ) diff --git a/rllib/examples/unity3d_env_local.py b/rllib/examples/unity3d_env_local.py index 300a992090521..df69718aafbda 100644 --- a/rllib/examples/unity3d_env_local.py +++ b/rllib/examples/unity3d_env_local.py @@ -93,7 +93,7 @@ ) parser.add_argument( "--framework", - choices=["tf", "tf2", "tfe", "torch"], + choices=["tf", "tf2", "torch"], default="tf", help="The DL framework specifier.", ) diff --git a/rllib/examples/vizdoom_with_attention_net.py b/rllib/examples/vizdoom_with_attention_net.py index abce1f04c3a70..331e1d21e120f 100644 --- a/rllib/examples/vizdoom_with_attention_net.py +++ b/rllib/examples/vizdoom_with_attention_net.py @@ -8,7 +8,7 @@ parser.add_argument("--num-cpus", type=int, default=0) parser.add_argument( "--framework", - choices=["tf", "tf2", "tfe", "torch"], + choices=["tf", "tf2", "torch"], default="tf", help="The DL framework specifier.", ) @@ -54,8 +54,8 @@ "attention_use_n_prev_rewards": args.use_n_prev_rewards, }, "framework": args.framework, - # Run with tracing enabled for tfe/tf2. - "eager_tracing": args.framework in ["tfe", "tf2"], + # Run with tracing enabled for tf2. + "eager_tracing": args.framework == "tf2", "num_workers": args.num_workers, "vf_loss_coeff": 0.01, } diff --git a/rllib/execution/learner_thread.py b/rllib/execution/learner_thread.py index 8e412f14754fd..de79418d88774 100644 --- a/rllib/execution/learner_thread.py +++ b/rllib/execution/learner_thread.py @@ -68,7 +68,7 @@ def __init__( def run(self) -> None: # Switch on eager mode if configured. - if self.local_worker.policy_config.get("framework") in ["tf2", "tfe"]: + if self.local_worker.policy_config.get("framework") == "tf2": tf1.enable_eager_execution() while not self.stopped: self.step() diff --git a/rllib/models/catalog.py b/rllib/models/catalog.py index 8cdb3e2bad460..fd2d883d0f4fd 100644 --- a/rllib/models/catalog.py +++ b/rllib/models/catalog.py @@ -224,7 +224,7 @@ def get_action_dist( dist_type (Optional[Union[str, Type[ActionDistribution]]]): Identifier of the action distribution (str) interpreted as a hint or the actual ActionDistribution class to use. - framework: One of "tf2", "tf", "tfe", "torch", or "jax". + framework: One of "tf2", "tf", "torch", or "jax". kwargs: Optional kwargs to pass on to the Distribution's constructor. @@ -428,7 +428,7 @@ def get_model_v2( num_outputs: The size of the output vector of the model. model_config: The "model" sub-config dict within the Trainer's config dict. - framework: One of "tf2", "tf", "tfe", "torch", or "jax". + framework: One of "tf2", "tf", "torch", or "jax". name: Name (scope) for the model. model_interface: Interface required for the model default_model: Override the default class for the model. This @@ -472,7 +472,7 @@ def get_model_v2( # Only allow ModelV2 or native keras Models. if not issubclass(model_cls, ModelV2): - if framework not in ["tf", "tf2", "tfe"] or not issubclass( + if framework not in ["tf", "tf2"] or not issubclass( model_cls, tf.keras.Model ): raise ValueError( @@ -483,7 +483,7 @@ def get_model_v2( logger.info("Wrapping {} as {}".format(model_cls, model_interface)) model_cls = ModelCatalog._wrap_if_needed(model_cls, model_interface) - if framework in ["tf2", "tf", "tfe"]: + if framework in ["tf2", "tf"]: # Try wrapping custom model with LSTM/attention, if required. if model_config.get("use_lstm") or model_config.get("use_attention"): from ray.rllib.models.tf.attention_net import ( @@ -644,14 +644,14 @@ def track_var_creation(next_creator, **kw): raise e else: raise NotImplementedError( - "`framework` must be 'tf2|tf|tfe|torch', but is " + "`framework` must be 'tf2|tf|torch', but is " "{}!".format(framework) ) return instance # Find a default TFModelV2 and wrap with model_interface. - if framework in ["tf", "tfe", "tf2"]: + if framework in ["tf", "tf2"]: v2_class = None # Try to get a default v2 model. if not model_config.get("custom_model"): @@ -755,7 +755,7 @@ def track_var_creation(next_creator, **kw): ) else: raise NotImplementedError( - "`framework` must be 'tf2|tf|tfe|torch', but is " + "`framework` must be 'tf2|tf|torch', but is " "{}!".format(framework) ) @@ -897,7 +897,7 @@ def _get_v2_model_class( Keras_FCNet = None Keras_VisionNet = None - if framework in ["tf2", "tf", "tfe"]: + if framework in ["tf2", "tf"]: from ray.rllib.models.tf.fcnet import ( FullyConnectedNetwork as FCNet, Keras_FullyConnectedNetwork as Keras_FCNet, @@ -998,7 +998,7 @@ def _validate_config( within the Trainer's config dict. action_space: The action space of the model, whose config are validated. - framework: One of "jax", "tf2", "tf", "tfe", or "torch". + framework: One of "jax", "tf2", "tf", or "torch". Raises: ValueError: If something is wrong with the given config. diff --git a/rllib/models/modelv2.py b/rllib/models/modelv2.py index ef8aaf11e6d23..5b931748b0928 100644 --- a/rllib/models/modelv2.py +++ b/rllib/models/modelv2.py @@ -367,7 +367,7 @@ def from_batch( @DeveloperAPI def flatten(obs: TensorType, framework: str) -> TensorType: """Flatten the given tensor.""" - if framework in ["tf2", "tf", "tfe"]: + if framework in ["tf2", "tf"]: return tf1.keras.layers.Flatten()(obs) elif framework == "torch": assert torch is not None @@ -398,7 +398,7 @@ def restore_original_dimensions( observation space. """ - if tensorlib in ["tf", "tfe", "tf2"]: + if tensorlib in ["tf", "tf2"]: assert tf is not None tensorlib = tf elif tensorlib == "torch": diff --git a/rllib/models/tests/test_distributions.py b/rllib/models/tests/test_distributions.py index e991637625bc1..2453356e006d5 100644 --- a/rllib/models/tests/test_distributions.py +++ b/rllib/models/tests/test_distributions.py @@ -492,7 +492,7 @@ def test_beta(self): def test_gumbel_softmax(self): """Tests the GumbelSoftmax ActionDistribution (tf + eager only).""" for fw, sess in framework_iterator( - frameworks=("tf2", "tf", "tfe"), session=True + frameworks=("tf2", "tf"), session=True ): batch_size = 1000 num_categories = 5 diff --git a/rllib/models/utils.py b/rllib/models/utils.py index 28d582d07c4be..3a63e79ea8ca6 100644 --- a/rllib/models/utils.py +++ b/rllib/models/utils.py @@ -11,7 +11,7 @@ def get_activation_fn(name: Optional[str] = None, framework: str = "tf"): Args: name (Optional[str]): One of "relu" (default), "tanh", "elu", "swish", or "linear" (same as None). - framework: One of "jax", "tf|tfe|tf2" or "torch". + framework: One of "jax", "tf|tf2" or "torch". Returns: A framework-specific activtion function. e.g. tf.nn.tanh or @@ -52,7 +52,7 @@ def get_activation_fn(name: Optional[str] = None, framework: str = "tf"): elif name == "elu": return jax.nn.elu else: - assert framework in ["tf", "tfe", "tf2"], "Unsupported framework `{}`!".format( + assert framework in ["tf", "tf2"], "Unsupported framework `{}`!".format( framework ) if name in ["linear", None]: @@ -146,7 +146,7 @@ def get_initializer(name, framework="tf"): Args: name: One of "xavier_uniform" (default), "xavier_normal". - framework: One of "jax", "tf|tfe|tf2" or "torch". + framework: One of "jax", "tf|tf2" or "torch". Returns: A framework-specific initializer function, e.g. @@ -177,7 +177,7 @@ def get_initializer(name, framework="tf"): elif name == "xavier_normal": return nn.init.xavier_normal_ else: - assert framework in ["tf", "tfe", "tf2"], "Unsupported framework `{}`!".format( + assert framework in ["tf", "tf2"], "Unsupported framework `{}`!".format( framework ) tf1, tf, tfv = try_import_tf() diff --git a/rllib/policy/eager_tf_policy.py b/rllib/policy/eager_tf_policy.py index b12a05745d59b..b867670a06c8b 100644 --- a/rllib/policy/eager_tf_policy.py +++ b/rllib/policy/eager_tf_policy.py @@ -142,7 +142,7 @@ def _traced_eager_policy(eager_policy_cls): """Wrapper class that enables tracing for all eager policy methods. This is enabled by the `--trace`/`eager_tracing=True` config when - framework=[tf2|tfe]. + framework=tf2. """ class TracedEagerPolicy(eager_policy_cls): @@ -296,8 +296,8 @@ def _build_eager_tf_policy( much simpler, but has lower performance. You shouldn't need to call this directly. Rather, prefer to build a TF - graph policy and use set {"framework": "tfe"} in the Algorithm's config to have - it automatically be converted to an eager policy. + graph policy and use set `.framework("tf2", eager_tracing=False) in your + AlgorithmConfig to have it automatically be converted to an eager policy. This has the same signature as build_tf_policy().""" @@ -322,7 +322,7 @@ def __init__(self, observation_space, action_space, config): # have been activated yet. if not tf1.executing_eagerly(): tf1.enable_eager_execution() - self.framework = config.get("framework", "tfe") + self.framework = config.get("framework", "tf2") EagerTFPolicy.__init__(self, observation_space, action_space, config) # Global timestep should be a tensor. diff --git a/rllib/policy/policy.py b/rllib/policy/policy.py index 592a9bc7ba44a..d092d081d7eb9 100644 --- a/rllib/policy/policy.py +++ b/rllib/policy/policy.py @@ -995,7 +995,7 @@ def on_global_var_update(self, global_vars: Dict[str, TensorType]) -> None: # steps). # Make sure, we keep global_timestep as a Tensor for tf-eager # (leads to memory leaks if not doing so). - if self.framework in ["tfe", "tf2"]: + if self.framework == "tf2": self.global_timestep.assign(global_vars["timestep"]) else: self.global_timestep = global_vars["timestep"] diff --git a/rllib/policy/rnn_sequencing.py b/rllib/policy/rnn_sequencing.py index 2b11dd547923a..753109cb4b295 100644 --- a/rllib/policy/rnn_sequencing.py +++ b/rllib/policy/rnn_sequencing.py @@ -173,7 +173,7 @@ def add_time_dimension( A, B, C are sequence elements and * denotes padding. seq_lens: A 1D tensor of sequence lengths, denoting the non-padded length in timesteps of each rollout in the batch. - framework: The framework string ("tf2", "tf", "tfe", "torch"). + framework: The framework string ("tf2", "tf", "torch"). time_major: Whether data should be returned in time-major (TxB) format or not (BxT). @@ -184,7 +184,7 @@ def add_time_dimension( # Sequence lengths have to be specified for LSTM batch inputs. The # input batch must be padded to the max seq length given here. That is, # batch_size == len(seq_lens) * max(seq_lens) - if framework in ["tf2", "tf", "tfe"]: + if framework in ["tf2", "tf"]: assert time_major is False, "time-major not supported yet for tf!" padded_batch_size = tf.shape(padded_inputs)[0] # Dynamically reshape the padded batch to introduce a time dimension. diff --git a/rllib/tests/git_bisect/debug_learning_failure_git_bisect.py b/rllib/tests/git_bisect/debug_learning_failure_git_bisect.py index b0cdbb557f672..d05ee85bdf276 100644 --- a/rllib/tests/git_bisect/debug_learning_failure_git_bisect.py +++ b/rllib/tests/git_bisect/debug_learning_failure_git_bisect.py @@ -42,7 +42,7 @@ ) parser.add_argument( "--framework", - choices=["tf", "tf2", "tfe", "torch"], + choices=["tf", "tf2", "torch"], default=None, help="The DL framework specifier.", ) diff --git a/rllib/tests/run_regression_tests.py b/rllib/tests/run_regression_tests.py index cc11436b44630..996d9708a3caa 100644 --- a/rllib/tests/run_regression_tests.py +++ b/rllib/tests/run_regression_tests.py @@ -29,7 +29,7 @@ parser = argparse.ArgumentParser() parser.add_argument( "--framework", - choices=["jax", "tf2", "tf", "tfe", "torch"], + choices=["jax", "tf2", "tf", "torch"], default="tf", help="The deep learning framework to use.", ) @@ -108,7 +108,7 @@ continue # Always run with eager-tracing when framework=tf2 if not in local-mode. - if args.framework in ["tf2", "tfe"] and not args.local_mode: + if args.framework == "tf2" and not args.local_mode: exp["config"]["eager_tracing"] = True # Print out the actual config. diff --git a/rllib/tests/test_checkpoint_restore.py b/rllib/tests/test_checkpoint_restore.py index ad6308ea02adc..7a80829091ade 100644 --- a/rllib/tests/test_checkpoint_restore.py +++ b/rllib/tests/test_checkpoint_restore.py @@ -71,13 +71,13 @@ def get_mean_action(alg, obs): } -def ckpt_restore_test(alg_name, tfe=False, object_store=False, replay_buffer=False): +def ckpt_restore_test(alg_name, tf2=False, object_store=False, replay_buffer=False): config = CONFIGS[alg_name].copy() # If required, store replay buffer data in checkpoints as well. if replay_buffer: config["store_buffer_in_checkpoints"] = True - frameworks = (["tf2"] if tfe else []) + ["torch", "tf"] + frameworks = (["tf2"] if tf2 else []) + ["torch", "tf"] for fw in framework_iterator(config, frameworks=frameworks): for use_object_store in [False, True] if object_store else [False]: print("use_object_store={}".format(use_object_store)) @@ -108,7 +108,7 @@ def ckpt_restore_test(alg_name, tfe=False, object_store=False, replay_buffer=Fal if optim_state: s2 = alg2.get_policy().get_state().get("_optimizer_variables") # Tf -> Compare states 1:1. - if fw in ["tf2", "tf", "tfe"]: + if fw in ["tf2", "tf"]: check(s2, optim_state) # For torch, optimizers have state_dicts with keys=params, # which are different for the two models (ignore these diff --git a/rllib/tests/test_eager_support.py b/rllib/tests/test_eager_support.py index 80afe23f60b5e..dcbf8238a73c9 100644 --- a/rllib/tests/test_eager_support.py +++ b/rllib/tests/test_eager_support.py @@ -10,7 +10,7 @@ def check_support(alg, config, test_eager=False, test_trace=True): - config["framework"] = "tfe" + config["framework"] = "tf2" config["log_level"] = "ERROR" # Test both continuous and discrete actions. for cont in [True, False]: diff --git a/rllib/tests/test_nn_framework_import_errors.py b/rllib/tests/test_nn_framework_import_errors.py index beb627abe2cf5..2f490743cc411 100644 --- a/rllib/tests/test_nn_framework_import_errors.py +++ b/rllib/tests/test_nn_framework_import_errors.py @@ -14,7 +14,7 @@ def test_dont_import_tf_error(): os.environ["RLLIB_TEST_NO_TF_IMPORT"] = "1" config = {} - for _ in framework_iterator(config, frameworks=("tf", "tf2", "tfe")): + for _ in framework_iterator(config, frameworks=("tf", "tf2")): with pytest.raises( ImportError, match="However, there was no installation found." ): diff --git a/rllib/tests/test_supported_multi_agent.py b/rllib/tests/test_supported_multi_agent.py index a1142369ee42d..ef634ff452c33 100644 --- a/rllib/tests/test_supported_multi_agent.py +++ b/rllib/tests/test_supported_multi_agent.py @@ -33,7 +33,7 @@ def policy_mapping_fn(agent_id, episode, worker, **kwargs): } for fw in framework_iterator(config): - if fw in ["tf2", "tfe"] and alg in ["A3C", "APEX", "APEX_DDPG", "IMPALA"]: + if fw == "tf2" and alg in ["A3C", "APEX", "APEX_DDPG", "IMPALA"]: continue if alg in ["DDPG", "APEX_DDPG", "SAC"]: a = get_algorithm_class(alg)(config=config, env="multi_agent_mountaincar") diff --git a/rllib/tests/test_supported_spaces.py b/rllib/tests/test_supported_spaces.py index 81e78eabe179f..cabdcce6c4015 100644 --- a/rllib/tests/test_supported_spaces.py +++ b/rllib/tests/test_supported_spaces.py @@ -48,7 +48,7 @@ } -def check_support(alg, config, train=True, check_bounds=False, tfe=False): +def check_support(alg, config, train=True, check_bounds=False, tf2=False): config["log_level"] = "ERROR" config["train_batch_size"] = 10 config["rollout_fragment_length"] = 10 @@ -115,8 +115,8 @@ def _do_check(alg, config, a_name, o_name): print(stat) frameworks = ("tf", "torch") - if tfe: - frameworks += ("tf2", "tfe") + if tf2: + frameworks += ("tf2",) for _ in framework_iterator(config, frameworks=frameworks): # Zip through action- and obs-spaces. for a_name, o_name in zip( @@ -160,11 +160,11 @@ def test_ppo(self): "num_sgd_iter": 1, "sgd_minibatch_size": 10, } - check_support("PPO", config, check_bounds=True, tfe=True) + check_support("PPO", config, check_bounds=True, tf2=True) def test_pg(self): config = {"num_workers": 1, "optimizer": {}} - check_support("PG", config, train=False, check_bounds=True, tfe=True) + check_support("PG", config, train=False, check_bounds=True, tf2=True) class TestSupportedSpacesOffPolicy(unittest.TestCase): @@ -197,7 +197,7 @@ def test_dqn(self): "capacity": 1000, }, } - check_support("DQN", config, tfe=True) + check_support("DQN", config, tf2=True) def test_sac(self): check_support( diff --git a/rllib/utils/debug/deterministic.py b/rllib/utils/debug/deterministic.py index 1976046ed7634..f41fdabf323bf 100644 --- a/rllib/utils/debug/deterministic.py +++ b/rllib/utils/debug/deterministic.py @@ -49,11 +49,11 @@ def update_global_seed_if_necessary( torch.set_deterministic(True) # This is only for Convolution no problem. torch.backends.cudnn.deterministic = True - elif framework == "tf2" or framework == "tfe": - tf1, tf, _ = try_import_tf() + elif framework == "tf2": + tf1, tf, tfv = try_import_tf() # Tf2.x. - if framework == "tf2": + if tfv == 2: tf.random.set_seed(seed) - # Tf-eager. - elif framework == "tfe": + # Tf1.x. + else: tf1.set_random_seed(seed) diff --git a/rllib/utils/exploration/epsilon_greedy.py b/rllib/utils/exploration/epsilon_greedy.py index 1628b04e1a30c..102ae47cf6168 100644 --- a/rllib/utils/exploration/epsilon_greedy.py +++ b/rllib/utils/exploration/epsilon_greedy.py @@ -92,7 +92,7 @@ def get_exploration_action( explore: Optional[Union[bool, TensorType]] = True, ): - if self.framework in ["tf2", "tf", "tfe"]: + if self.framework in ["tf2", "tf"]: return self._get_tf_exploration_action_op( action_distribution, explore, timestep ) @@ -152,7 +152,7 @@ def _get_tf_exploration_action_op( false_fn=lambda: exploit_action, ) - if self.framework in ["tf2", "tfe"] and not self.policy_config["eager_tracing"]: + if self.framework == "tf2" and not self.policy_config["eager_tracing"]: self.last_timestep = timestep return action, tf.zeros_like(action, dtype=tf.float32) else: diff --git a/rllib/utils/exploration/gaussian_noise.py b/rllib/utils/exploration/gaussian_noise.py index 234287852cf32..2adbde60e709f 100644 --- a/rllib/utils/exploration/gaussian_noise.py +++ b/rllib/utils/exploration/gaussian_noise.py @@ -159,7 +159,7 @@ def _get_tf_exploration_action_op( logp = zero_logps_from_actions(deterministic_actions) # Increment `last_timestep` by 1 (or set to `timestep`). - if self.framework in ["tf2", "tfe"]: + if self.framework == "tf2": if timestep is None: self.last_timestep.assign_add(1) else: diff --git a/rllib/utils/exploration/ornstein_uhlenbeck_noise.py b/rllib/utils/exploration/ornstein_uhlenbeck_noise.py index f6ace79d397b5..2b9daf6dcc048 100644 --- a/rllib/utils/exploration/ornstein_uhlenbeck_noise.py +++ b/rllib/utils/exploration/ornstein_uhlenbeck_noise.py @@ -115,7 +115,7 @@ def _get_tf_exploration_action_op( shape=[self.action_space.low.size], stddev=self.stddev ) ou_new = self.ou_theta * -self.ou_state + self.ou_sigma * gaussian_sample - if self.framework in ["tf2", "tfe"]: + if self.framework == "tf2": self.ou_state.assign_add(ou_new) ou_state_new = self.ou_state else: @@ -153,7 +153,7 @@ def _get_tf_exploration_action_op( logp = zero_logps_from_actions(deterministic_actions) # Increment `last_timestep` by 1 (or set to `timestep`). - if self.framework in ["tf2", "tfe"]: + if self.framework == "tf2": if timestep is None: self.last_timestep.assign_add(1) else: diff --git a/rllib/utils/exploration/parameter_noise.py b/rllib/utils/exploration/parameter_noise.py index 41e4a6f611668..39b52ff2dee5d 100644 --- a/rllib/utils/exploration/parameter_noise.py +++ b/rllib/utils/exploration/parameter_noise.py @@ -308,7 +308,7 @@ def _sample_new_noise(self, *, tf_sess=None): """Samples new noise and stores it in `self.noise`.""" if self.framework == "tf": tf_sess.run(self.tf_sample_new_noise_op) - elif self.framework in ["tfe", "tf2"]: + elif self.framework == "tf2": self._tf_sample_new_noise_op() else: for i in range(len(self.noise)): @@ -359,7 +359,7 @@ def _add_stored_noise(self, *, tf_sess=None): # Add stored noise to the model's parameters. if self.framework == "tf": tf_sess.run(self.tf_add_stored_noise_op) - elif self.framework in ["tf2", "tfe"]: + elif self.framework == "tf2": self._tf_add_stored_noise_op() else: for var, noise in zip(self.model_variables, self.noise): @@ -399,7 +399,7 @@ def _remove_noise(self, *, tf_sess=None): # Removes the stored noise from the model's parameters. if self.framework == "tf": tf_sess.run(self.tf_remove_noise_op) - elif self.framework in ["tf2", "tfe"]: + elif self.framework == "tf2": self._tf_remove_noise_op() else: for var, noise in zip(self.model_variables, self.noise): diff --git a/rllib/utils/exploration/random.py b/rllib/utils/exploration/random.py index 91782b8613034..0fa7a89e7e5ad 100644 --- a/rllib/utils/exploration/random.py +++ b/rllib/utils/exploration/random.py @@ -34,7 +34,7 @@ def __init__( Args: action_space: The gym action space used by the environment. - framework: One of None, "tf", "tfe", "torch". + framework: One of None, "tf", "torch". """ super().__init__( action_space=action_space, model=model, framework=framework, **kwargs @@ -51,7 +51,7 @@ def get_exploration_action( explore: bool = True ): # Instantiate the distribution object. - if self.framework in ["tf2", "tf", "tfe"]: + if self.framework in ["tf2", "tf"]: return self.get_tf_exploration_action_op(action_distribution, explore) else: return self.get_torch_exploration_action(action_distribution, explore) diff --git a/rllib/utils/exploration/slate_epsilon_greedy.py b/rllib/utils/exploration/slate_epsilon_greedy.py index 94cbea7a2b10e..abc7b7c12bba0 100644 --- a/rllib/utils/exploration/slate_epsilon_greedy.py +++ b/rllib/utils/exploration/slate_epsilon_greedy.py @@ -61,7 +61,7 @@ def _get_tf_exploration_action_op( false_fn=lambda: exploit_action, ) - if self.framework in ["tf2", "tfe"] and not self.policy_config["eager_tracing"]: + if self.framework == "tf2" and not self.policy_config["eager_tracing"]: self.last_timestep = timestep return action, action_logp else: diff --git a/rllib/utils/exploration/stochastic_sampling.py b/rllib/utils/exploration/stochastic_sampling.py index 704779f8f8da7..f229b69e3a72b 100644 --- a/rllib/utils/exploration/stochastic_sampling.py +++ b/rllib/utils/exploration/stochastic_sampling.py @@ -114,7 +114,7 @@ def _get_tf_exploration_action_op(self, action_dist, timestep, explore): ) # Increment `last_timestep` by 1 (or set to `timestep`). - if self.framework in ["tf2", "tfe"]: + if self.framework == "tf2": self.last_timestep.assign_add(1) return action, logp else: diff --git a/rllib/utils/exploration/tests/test_parameter_noise.py b/rllib/utils/exploration/tests/test_parameter_noise.py index 4abe1f1883f24..e1ee16ec28e42 100644 --- a/rllib/utils/exploration/tests/test_parameter_noise.py +++ b/rllib/utils/exploration/tests/test_parameter_noise.py @@ -221,7 +221,7 @@ def _get_current_weight(self, policy, fw): # DDPG model. else: return weights["policy_model.action_0._model.0.weight"][0][0] - key = 0 if fw in ["tf2", "tfe"] else list(weights.keys())[0] + key = 0 if fw == "tf2" else list(weights.keys())[0] return weights[key][0][0] diff --git a/rllib/utils/framework.py b/rllib/utils/framework.py index a711905c908d1..a52293d0721b8 100644 --- a/rllib/utils/framework.py +++ b/rllib/utils/framework.py @@ -239,7 +239,7 @@ def get_variable( A framework-specific variable (tf.Variable, torch.tensor, or python primitive). """ - if framework in ["tf2", "tf", "tfe"]: + if framework in ["tf2", "tf"]: import tensorflow as tf dtype = dtype or getattr( diff --git a/rllib/utils/policy.py b/rllib/utils/policy.py index 48cb30417982e..9847119e0e8d4 100644 --- a/rllib/utils/policy.py +++ b/rllib/utils/policy.py @@ -90,7 +90,7 @@ def create_policy_for_framework( framework = merged_config.get("framework", "tf") # Tf. - if framework in ["tf2", "tf", "tfe"]: + if framework in ["tf2", "tf"]: var_scope = policy_id + (f"_wk{worker_index}" if worker_index else "") # For tf static graph, build every policy in its own graph # and create a new session for it. diff --git a/rllib/utils/schedules/schedule.py b/rllib/utils/schedules/schedule.py index 671e7d7cb978d..6b600380f87f1 100644 --- a/rllib/utils/schedules/schedule.py +++ b/rllib/utils/schedules/schedule.py @@ -37,7 +37,7 @@ def value(self, t: Union[int, TensorType]) -> Any: Returns: The calculated value depending on the schedule and `t`. """ - if self.framework in ["tf2", "tf", "tfe"]: + if self.framework in ["tf2", "tf"]: return self._tf_value_op(t) return self._value(t) diff --git a/rllib/utils/schedules/tests/test_schedules.py b/rllib/utils/schedules/tests/test_schedules.py index c8b43178d7289..b3f256d635402 100644 --- a/rllib/utils/schedules/tests/test_schedules.py +++ b/rllib/utils/schedules/tests/test_schedules.py @@ -22,7 +22,7 @@ def test_constant_schedule(self): config = {"value": value} - for fw in framework_iterator(frameworks=["tf2", "tf", "tfe", "torch", None]): + for fw in framework_iterator(frameworks=["tf2", "tf", "torch", None]): constant = from_config(ConstantSchedule, config, framework=fw) for t in ts: out = constant(t) @@ -39,7 +39,7 @@ def test_linear_schedule(self): expected = [2.1 - (min(t, 100) / 100) * (2.1 - 0.6) for t in ts] config = {"schedule_timesteps": 100, "initial_p": 2.1, "final_p": 0.6} - for fw in framework_iterator(frameworks=["tf2", "tf", "tfe", "torch", None]): + for fw in framework_iterator(frameworks=["tf2", "tf", "torch", None]): linear = from_config(LinearSchedule, config, framework=fw) for t, e in zip(ts, expected): out = linear(t) @@ -62,7 +62,7 @@ def test_polynomial_schedule(self): power=2.0, ) - for fw in framework_iterator(frameworks=["tf2", "tf", "tfe", "torch", None]): + for fw in framework_iterator(frameworks=["tf2", "tf", "torch", None]): polynomial = from_config(config, framework=fw) for t, e in zip(ts, expected): out = polynomial(t) @@ -80,7 +80,7 @@ def test_exponential_schedule(self): expected = [2.0 * decay_rate ** (t / 100) for t in ts] config = dict(initial_p=2.0, decay_rate=decay_rate, schedule_timesteps=100) - for fw in framework_iterator(frameworks=["tf2", "tf", "tfe", "torch", None]): + for fw in framework_iterator(frameworks=["tf2", "tf", "torch", None]): exponential = from_config(ExponentialSchedule, config, framework=fw) for t, e in zip(ts, expected): out = exponential(t) @@ -99,7 +99,7 @@ def test_piecewise_schedule(self): endpoints=[(0, 50.0), (25, 100.0), (30, 200.0)], outside_value=14.5 ) - for fw in framework_iterator(frameworks=["tf2", "tf", "tfe", "torch", None]): + for fw in framework_iterator(frameworks=["tf2", "tf", "torch", None]): piecewise = from_config(PiecewiseSchedule, config, framework=fw) for t, e in zip(ts, expected): out = piecewise(t) diff --git a/rllib/utils/test_utils.py b/rllib/utils/test_utils.py index b9d515d0b5549..9e2c497c72a04 100644 --- a/rllib/utils/test_utils.py +++ b/rllib/utils/test_utils.py @@ -50,7 +50,7 @@ def framework_iterator( config: Optional[Union["AlgorithmConfig", PartialAlgorithmConfigDict]] = None, - frameworks: Sequence[str] = ("tf2", "tf", "tfe", "torch"), + frameworks: Sequence[str] = ("tf2", "tf", "torch"), session: bool = False, with_eager_tracing: bool = False, time_iterations: Optional[dict] = None, @@ -58,35 +58,31 @@ def framework_iterator( """An generator that allows for looping through n frameworks for testing. Provides the correct config entries ("framework") as well - as the correct eager/non-eager contexts for tfe/tf. + as the correct eager/non-eager contexts for tf/tf2. Args: config: An optional config dict or AlgorithmConfig object. This will be modified (value for "framework" changed) depending on the iteration. frameworks: A list/tuple of the frameworks to be tested. - Allowed are: "tf2", "tf", "tfe", "torch", and None. + Allowed are: "tf2", "tf", "torch", and None. session: If True and only in the tf-case: Enter a tf.Session() and yield that as second return value (otherwise yield (fw, None)). Also sets a seed (42) on the session to make the test deterministic. with_eager_tracing: Include `eager_tracing=True` in the returned - configs, when framework=[tfe|tf2]. + configs, when framework=tf2. time_iterations: If provided, will write to the given dict (by framework key) the times in seconds that each (framework's) iteration takes. Yields: - If `session` is False: The current framework [tf2|tf|tfe|torch] used. + If `session` is False: The current framework [tf2|tf|torch] used. If `session` is True: A tuple consisting of the current framework string and the tf1.Session (if fw="tf", otherwise None). """ config = config or {} frameworks = [frameworks] if isinstance(frameworks, str) else list(frameworks) - # Both tf2 and tfe present -> remove "tfe" or "tf2" depending on version. - if "tf2" in frameworks and "tfe" in frameworks: - frameworks.remove("tfe" if tfv == 2 else "tf2") - for fw in frameworks: # Skip non-installed frameworks. if fw == "torch" and not torch: @@ -97,19 +93,13 @@ def framework_iterator( "framework_iterator skipping {} (tf not installed)!".format(fw) ) continue - elif fw == "tfe" and not eager_mode: - logger.warning( - "framework_iterator skipping tf-eager (could not " - "import `eager_mode` from tensorflow.python)!" - ) - continue elif fw == "tf2" and tfv != 2: logger.warning("framework_iterator skipping tf2.x (tf version is < 2.0)!") continue elif fw == "jax" and not jax: logger.warning("framework_iterator skipping JAX (not installed)!") continue - assert fw in ["tf2", "tf", "tfe", "torch", "jax", None] + assert fw in ["tf2", "tf", "torch", "jax", None] # Do we need a test session? sess = None @@ -124,8 +114,8 @@ def framework_iterator( config.framework(fw) eager_ctx = None - # Enable eager mode for tf2 and tfe. - if fw in ["tf2", "tfe"]: + # Enable eager mode for tf2. + if fw == "tf2": eager_ctx = eager_mode() eager_ctx.__enter__() assert tf1.executing_eagerly() @@ -134,7 +124,7 @@ def framework_iterator( assert not tf1.executing_eagerly() # Additionally loop through eager_tracing=True + False, if necessary. - if fw in ["tf2", "tfe"] and with_eager_tracing: + if fw == "tf2" and with_eager_tracing: for tracing in [True, False]: if isinstance(config, dict): config["eager_tracing"] = tracing @@ -179,7 +169,7 @@ def check(x, y, decimals=5, atol=None, rtol=None, false=False): x: The value to be compared (to the expectation: `y`). This may be a Tensor. y: The expected value to be compared to `x`. This must not - be a tf-Tensor, but may be a tfe/torch-Tensor. + be a tf-Tensor, but may be a tf/torch-Tensor. decimals: The number of digits after the floating point up to which all numeric values have to match. atol: Absolute tolerance of the difference between x and y diff --git a/rllib/utils/tests/run_memory_leak_tests.py b/rllib/utils/tests/run_memory_leak_tests.py index 545cdf4f348d0..e7781d347ebb6 100644 --- a/rllib/utils/tests/run_memory_leak_tests.py +++ b/rllib/utils/tests/run_memory_leak_tests.py @@ -30,7 +30,7 @@ parser.add_argument( "--framework", required=False, - choices=["jax", "tf2", "tf", "tfe", "torch", None], + choices=["jax", "tf2", "tf", "torch", None], default=None, help="The deep learning framework to use.", ) @@ -94,7 +94,7 @@ # Create env on local_worker for memory leak testing just the env. experiment["config"]["create_env_on_driver"] = True # Always run with eager-tracing when framework=tf2 if not in local-mode. - if args.framework in ["tf2", "tfe"] and not args.local_mode: + if args.framework == "tf2" and not args.local_mode: experiment["config"]["eager_tracing"] = True # experiment["config"]["callbacks"] = MemoryTrackingCallbacks diff --git a/rllib/utils/tests/test_framework_agnostic_components.py b/rllib/utils/tests/test_framework_agnostic_components.py index 8c617a66d28d9..d5fc18e2c9e78 100644 --- a/rllib/utils/tests/test_framework_agnostic_components.py +++ b/rllib/utils/tests/test_framework_agnostic_components.py @@ -64,23 +64,22 @@ def test_dummy_components(self): abs_path = script_dir.absolute() for fw, sess in framework_iterator(session=True): - fw_ = fw if fw != "tfe" else "tf" # Try to create from an abstract class w/o default constructor. # Expect None. - test = from_config({"type": AbstractDummyComponent, "framework": fw_}) + test = from_config({"type": AbstractDummyComponent, "framework": fw}) check(test, None) # Create a Component via python API (config dict). component = from_config( dict( - type=DummyComponent, prop_a=1.0, prop_d="non_default", framework=fw_ + type=DummyComponent, prop_a=1.0, prop_d="non_default", framework=fw ) ) check(component.prop_d, "non_default") # Create a tf Component from json file. config_file = str(abs_path.joinpath("dummy_config.json")) - component = from_config(config_file, framework=fw_) + component = from_config(config_file, framework=fw) check(component.prop_c, "default") check(component.prop_d, 4) # default value = component.add(3.3) @@ -90,7 +89,7 @@ def test_dummy_components(self): # Create a torch Component from yaml file. config_file = str(abs_path.joinpath("dummy_config.yml")) - component = from_config(config_file, framework=fw_) + component = from_config(config_file, framework=fw) check(component.prop_a, "something else") check(component.prop_d, 3) value = component.add(1.2) @@ -103,7 +102,7 @@ def test_dummy_components(self): '{"type": "ray.rllib.utils.tests.' 'test_framework_agnostic_components.DummyComponent", ' '"prop_a": "A", "prop_b": -1.0, "prop_c": "non-default", ' - '"framework": "' + fw_ + '"}' + '"framework": "' + fw + '"}' ) check(component.prop_a, "A") check(component.prop_d, 4) # default @@ -117,7 +116,7 @@ def test_dummy_components(self): DummyComponent, '{"type": "NonAbstractChildOfDummyComponent", ' '"prop_a": "A", "prop_b": -1.0, "prop_c": "non-default",' - '"framework": "' + fw_ + '"}', + '"framework": "' + fw + '"}', ) check(component.prop_a, "A") check(component.prop_d, 4) # default @@ -136,7 +135,7 @@ def test_dummy_components(self): { "type": "EpsilonGreedy", "action_space": Discrete(2), - "framework": fw_, + "framework": fw, "num_workers": 0, "worker_index": 0, "policy_config": {}, @@ -152,7 +151,7 @@ def test_dummy_components(self): "type: ray.rllib.utils.tests." "test_framework_agnostic_components.DummyComponent\n" "prop_a: B\nprop_b: -1.5\nprop_c: non-default\nframework: " - "{}".format(fw_) + "{}".format(fw) ) check(component.prop_a, "B") check(component.prop_d, 4) # default diff --git a/rllib/utils/tf_utils.py b/rllib/utils/tf_utils.py index fda9da630192c..a102fb24b36d6 100644 --- a/rllib/utils/tf_utils.py +++ b/rllib/utils/tf_utils.py @@ -243,10 +243,10 @@ class for. cls = orig_cls framework = config.get("framework", "tf") - if framework in ["tf2", "tf", "tfe"] and not tf1: + if framework in ["tf2", "tf"] and not tf1: raise ImportError("Could not import tensorflow!") - if framework in ["tf2", "tfe"]: + if framework == "tf2": assert tf1.executing_eagerly() from ray.rllib.policy.tf_policy import TFPolicy From 2f411a2e0c39839ecb07df7f351b5de13d89d034 Mon Sep 17 00:00:00 2001 From: sven1977 Date: Thu, 27 Oct 2022 15:55:05 +0200 Subject: [PATCH 2/3] LINT Signed-off-by: sven1977 --- rllib/BUILD | 2 +- rllib/algorithms/algorithm.py | 5 +---- rllib/algorithms/ddpg/ddpg_tf_policy.py | 2 +- rllib/algorithms/dqn/dqn_tf_policy.py | 4 ++-- rllib/evaluation/rollout_worker.py | 4 +--- rllib/models/catalog.py | 15 +++++++++------ rllib/models/tests/test_distributions.py | 4 +--- 7 files changed, 16 insertions(+), 20 deletions(-) diff --git a/rllib/BUILD b/rllib/BUILD index da529557be874..58c1c988d1998 100644 --- a/rllib/BUILD +++ b/rllib/BUILD @@ -2680,7 +2680,7 @@ py_test( tags = ["team:rllib", "exclusive", "examples"], size = "small", srcs = ["examples/complex_struct_space.py"], - args = ["--framework=tfe"], + args = ["--framework=tf2"], ) py_test( diff --git a/rllib/algorithms/algorithm.py b/rllib/algorithms/algorithm.py index a19836f20771e..068e98e7b70a7 100644 --- a/rllib/algorithms/algorithm.py +++ b/rllib/algorithms/algorithm.py @@ -2776,10 +2776,7 @@ def _run_one_training_iteration(self) -> Tuple[ResultDict, "TrainIterCtx"]: # In case we are training (in a thread) parallel to evaluation, # we may have to re-enable eager mode here (gets disabled in the # thread). - if ( - self.config.get("framework") == "tf2" - and not tf.executing_eagerly() - ): + if self.config.get("framework") == "tf2" and not tf.executing_eagerly(): tf1.enable_eager_execution() results = None diff --git a/rllib/algorithms/ddpg/ddpg_tf_policy.py b/rllib/algorithms/ddpg/ddpg_tf_policy.py index 034b8357c2edd..6a1d0fe0bb666 100644 --- a/rllib/algorithms/ddpg/ddpg_tf_policy.py +++ b/rllib/algorithms/ddpg/ddpg_tf_policy.py @@ -326,7 +326,7 @@ def loss( # Compute RHS of bellman equation. q_t_selected_target = tf.stop_gradient( tf.cast(train_batch[SampleBatch.REWARDS], tf.float32) - + gamma**n_step * q_tp1_best_masked + + gamma ** n_step * q_tp1_best_masked ) # Compute the error (potentially clipped). diff --git a/rllib/algorithms/dqn/dqn_tf_policy.py b/rllib/algorithms/dqn/dqn_tf_policy.py index 15e1587c58132..0c6f6c00c6a9e 100644 --- a/rllib/algorithms/dqn/dqn_tf_policy.py +++ b/rllib/algorithms/dqn/dqn_tf_policy.py @@ -60,7 +60,7 @@ def __init__( z = v_min + z * (v_max - v_min) / float(num_atoms - 1) # (batch_size, 1) * (1, num_atoms) = (batch_size, num_atoms) - r_tau = tf.expand_dims(rewards, -1) + gamma**n_step * tf.expand_dims( + r_tau = tf.expand_dims(rewards, -1) + gamma ** n_step * tf.expand_dims( 1.0 - done_mask, -1 ) * tf.expand_dims(z, 0) r_tau = tf.clip_by_value(r_tau, v_min, v_max) @@ -100,7 +100,7 @@ def __init__( q_tp1_best_masked = (1.0 - done_mask) * q_tp1_best # compute RHS of bellman equation - q_t_selected_target = rewards + gamma**n_step * q_tp1_best_masked + q_t_selected_target = rewards + gamma ** n_step * q_tp1_best_masked # compute the error (potentially clipped) self.td_error = q_t_selected - tf.stop_gradient(q_t_selected_target) diff --git a/rllib/evaluation/rollout_worker.py b/rllib/evaluation/rollout_worker.py index e90d5dd89c68f..d90feea046732 100644 --- a/rllib/evaluation/rollout_worker.py +++ b/rllib/evaluation/rollout_worker.py @@ -487,9 +487,7 @@ def gen_rollouts(): if ( tf1 - and ( - config.framework_str == "tf2" or config.enable_tf1_exec_eagerly - ) + and (config.framework_str == "tf2" or config.enable_tf1_exec_eagerly) # This eager check is necessary for certain all-framework tests # that use tf's eager_mode() context generator. and not tf1.executing_eagerly() diff --git a/rllib/models/catalog.py b/rllib/models/catalog.py index fd2d883d0f4fd..e8c362c60210c 100644 --- a/rllib/models/catalog.py +++ b/rllib/models/catalog.py @@ -305,10 +305,14 @@ def get_action_dist( else: dist_cls = Categorical # Tuple/Dict Spaces -> MultiAction. - elif dist_type in ( - MultiActionDistribution, - TorchMultiActionDistribution, - ) or isinstance(action_space, (Tuple, Dict)): + elif ( + dist_type + in ( + MultiActionDistribution, + TorchMultiActionDistribution, + ) + or isinstance(action_space, (Tuple, Dict)) + ): return ModelCatalog._get_multi_action_distribution( ( MultiActionDistribution @@ -755,8 +759,7 @@ def track_var_creation(next_creator, **kw): ) else: raise NotImplementedError( - "`framework` must be 'tf2|tf|torch', but is " - "{}!".format(framework) + "`framework` must be 'tf2|tf|torch', but is " "{}!".format(framework) ) @staticmethod diff --git a/rllib/models/tests/test_distributions.py b/rllib/models/tests/test_distributions.py index 2453356e006d5..657f56174e97b 100644 --- a/rllib/models/tests/test_distributions.py +++ b/rllib/models/tests/test_distributions.py @@ -491,9 +491,7 @@ def test_beta(self): def test_gumbel_softmax(self): """Tests the GumbelSoftmax ActionDistribution (tf + eager only).""" - for fw, sess in framework_iterator( - frameworks=("tf2", "tf"), session=True - ): + for fw, sess in framework_iterator(frameworks=("tf2", "tf"), session=True): batch_size = 1000 num_categories = 5 input_space = Box(-1.0, 1.0, shape=(batch_size, num_categories)) From 8709bac87445393018337fac866d70afb282c4f1 Mon Sep 17 00:00:00 2001 From: sven1977 Date: Thu, 27 Oct 2022 16:26:40 +0200 Subject: [PATCH 3/3] LINT Signed-off-by: sven1977 --- rllib/algorithms/ddpg/ddpg_tf_policy.py | 2 +- rllib/algorithms/dqn/dqn_tf_policy.py | 4 ++-- rllib/models/catalog.py | 12 ++++-------- 3 files changed, 7 insertions(+), 11 deletions(-) diff --git a/rllib/algorithms/ddpg/ddpg_tf_policy.py b/rllib/algorithms/ddpg/ddpg_tf_policy.py index 6a1d0fe0bb666..034b8357c2edd 100644 --- a/rllib/algorithms/ddpg/ddpg_tf_policy.py +++ b/rllib/algorithms/ddpg/ddpg_tf_policy.py @@ -326,7 +326,7 @@ def loss( # Compute RHS of bellman equation. q_t_selected_target = tf.stop_gradient( tf.cast(train_batch[SampleBatch.REWARDS], tf.float32) - + gamma ** n_step * q_tp1_best_masked + + gamma**n_step * q_tp1_best_masked ) # Compute the error (potentially clipped). diff --git a/rllib/algorithms/dqn/dqn_tf_policy.py b/rllib/algorithms/dqn/dqn_tf_policy.py index 0c6f6c00c6a9e..15e1587c58132 100644 --- a/rllib/algorithms/dqn/dqn_tf_policy.py +++ b/rllib/algorithms/dqn/dqn_tf_policy.py @@ -60,7 +60,7 @@ def __init__( z = v_min + z * (v_max - v_min) / float(num_atoms - 1) # (batch_size, 1) * (1, num_atoms) = (batch_size, num_atoms) - r_tau = tf.expand_dims(rewards, -1) + gamma ** n_step * tf.expand_dims( + r_tau = tf.expand_dims(rewards, -1) + gamma**n_step * tf.expand_dims( 1.0 - done_mask, -1 ) * tf.expand_dims(z, 0) r_tau = tf.clip_by_value(r_tau, v_min, v_max) @@ -100,7 +100,7 @@ def __init__( q_tp1_best_masked = (1.0 - done_mask) * q_tp1_best # compute RHS of bellman equation - q_t_selected_target = rewards + gamma ** n_step * q_tp1_best_masked + q_t_selected_target = rewards + gamma**n_step * q_tp1_best_masked # compute the error (potentially clipped) self.td_error = q_t_selected - tf.stop_gradient(q_t_selected_target) diff --git a/rllib/models/catalog.py b/rllib/models/catalog.py index e8c362c60210c..f55d2c5bd8b9a 100644 --- a/rllib/models/catalog.py +++ b/rllib/models/catalog.py @@ -305,14 +305,10 @@ def get_action_dist( else: dist_cls = Categorical # Tuple/Dict Spaces -> MultiAction. - elif ( - dist_type - in ( - MultiActionDistribution, - TorchMultiActionDistribution, - ) - or isinstance(action_space, (Tuple, Dict)) - ): + elif dist_type in ( + MultiActionDistribution, + TorchMultiActionDistribution, + ) or isinstance(action_space, (Tuple, Dict)): return ModelCatalog._get_multi_action_distribution( ( MultiActionDistribution