ray-project · sven1977 · Jun 20, 2023 · Jun 19, 2023 · Jun 19, 2023 · Jun 20, 2023
@@ -260,7 +260,7 @@ def __init__(self, algo_class=None):
 
         # `self.framework()`
         self.framework_str = "torch"
-        self.eager_tracing = False
+        self.eager_tracing = True
         self.eager_max_retraces = 20
         self.tf_session_args = {
             # note: overridden by `local_tf_session_args`
@@ -1237,8 +1237,8 @@ def framework(
         """Sets the config's DL framework settings.
 
         Args:
-            framework: tf: TensorFlow (static-graph); tf2: TensorFlow 2.x
-                (eager or traced, if eager_tracing=True); torch: PyTorch
+            framework: torch: PyTorch; tf2: TensorFlow 2.x (eager execution or traced
+                if eager_tracing=True); tf: TensorFlow (static-graph);
             eager_tracing: Enable tracing in eager mode. This greatly improves
                 performance (speedup ~2x), but makes it slightly harder to debug
                 since Python code won't be evaluated after the initial eager pass.

@@ -25,7 +25,7 @@ def test_appo_compilation(self):
         config = appo.APPOConfig().rollouts(num_rollout_workers=1)
         num_iterations = 2
 
-        for _ in framework_iterator(config, with_eager_tracing=True):
+        for _ in framework_iterator(config):
             print("w/o v-trace")
             config.vtrace = False
             algo = config.build(env="CartPole-v1")
@@ -55,7 +55,7 @@ def test_appo_compilation_use_kl_loss(self):
         )
         num_iterations = 2
 
-        for _ in framework_iterator(config, with_eager_tracing=True):
+        for _ in framework_iterator(config):
             algo = config.build(env="CartPole-v1")
             for i in range(num_iterations):
                 results = algo.train()
@@ -117,30 +117,30 @@ def test_appo_entropy_coeff_schedule(self):
         )
 
         def _step_n_times(algo, n: int):
-            """Step Algorithm n times.
-
-            Returns:
-                learning rate at the end of the execution.
-            """
             for _ in range(n):
                 results = algo.train()
                 print(algo.workers.local_worker().global_vars)
                 print(results)
-            return results["info"][LEARNER_INFO][DEFAULT_POLICY_ID][LEARNER_STATS_KEY][
-                "entropy_coeff"
-            ]
+            return (
+                results["info"][LEARNER_INFO][DEFAULT_POLICY_ID][LEARNER_STATS_KEY][
+                    "entropy_coeff"
+                ],
+                results["num_env_steps_sampled"],
+            )
 
         for _ in framework_iterator(config):
             algo = config.build(env="CartPole-v1")
 
-            coeff = _step_n_times(algo, 10)  # 200 timesteps
-            # Should be close to the starting coeff of 0.01.
-            self.assertLessEqual(coeff, 0.01)
-            self.assertGreaterEqual(coeff, 0.001)
+            coeff, num_env_steps_sampled = _step_n_times(algo, 5)  # ~100 timesteps
+            if num_env_steps_sampled > 300:
+                self.assertLessEqual(coeff, 0.001)
+                self.assertGreaterEqual(coeff, 0.0001)
+            else:
+                self.assertLessEqual(coeff, 0.01)
+                self.assertGreaterEqual(coeff, 0.001)
 
-            coeff = _step_n_times(algo, 20)  # 400 timesteps
-            # Should have annealed to the final coeff of 0.0001.
-            self.assertLessEqual(coeff, 0.001)
+            coeff, num_env_steps_sampled = _step_n_times(algo, 20)  # ~400 timesteps
+            self.assertLessEqual(coeff, 0.0005)
 
             algo.stop()
 

@@ -116,7 +116,6 @@ def test_kl_coeff_changes(self):
         config = (
             appo.APPOConfig()
             .environment("CartPole-v1")
-            .framework(eager_tracing=True)
             # Asynchronous Algo, make sure we have some results after 1 iteration.
             .reporting(min_time_s_per_iteration=10)
             .rollouts(

@@ -96,7 +96,7 @@ class FrameworkHyperparameters:
             Module in Torch.
     """
 
-    eager_tracing: bool = False
+    eager_tracing: bool = True
     torch_compile_cfg: Optional["TorchCompileConfig"] = None
 
 

@@ -55,7 +55,7 @@ def __init__(self, cls: Type[LearnerGroup] = None) -> None:
         self.local_gpu_idx = 0
 
         # `self.framework()`
-        self.eager_tracing = False
+        self.eager_tracing = True
         self.torch_compile_cfg = None
 
     def validate(self) -> None:

@@ -36,7 +36,7 @@ def tearDown(cls) -> None:
     def test_end_to_end_update(self):
 
         for fw in framework_iterator(frameworks=("torch", "tf2")):
-            learner = get_learner(framework=fw, eager_tracing=True, env=self.ENV)
+            learner = get_learner(framework=fw, env=self.ENV)
             reader = get_cartpole_dataset_reader(batch_size=512)
 
             min_loss = float("inf")
@@ -60,7 +60,7 @@ def test_compute_gradients(self):
         the weights is all ones.
         """
         for fw in framework_iterator(frameworks=("torch", "tf2")):
-            learner = get_learner(framework=fw, eager_tracing=True, env=self.ENV)
+            learner = get_learner(framework=fw, env=self.ENV)
 
             params = learner.get_parameters(learner.module[DEFAULT_POLICY_ID])
 
@@ -94,7 +94,6 @@ def test_postprocess_gradients(self):
 
             learner = get_learner(
                 framework=fw,
-                eager_tracing=True,
                 env=self.ENV,
                 learner_hps=hps,
             )
@@ -119,7 +118,6 @@ def test_postprocess_gradients(self):
             hps.grad_clip_by = "norm"
             learner = get_learner(
                 framework=fw,
-                eager_tracing=True,
                 env=self.ENV,
                 learner_hps=hps,
             )

@@ -62,7 +62,7 @@ def local_training_helper(self, fw, scaling_mode) -> None:
             tf.random.set_seed(0)
         env = gym.make("CartPole-v1")
         scaling_config = LOCAL_SCALING_CONFIGS[scaling_mode]
-        learner_group = get_learner_group(fw, env, scaling_config, eager_tracing=True)
+        learner_group = get_learner_group(fw, env, scaling_config)
         local_learner = get_learner(framework=fw, env=env)
         local_learner.build()
 
@@ -136,9 +136,7 @@ def test_update_multigpu(self):
             env = gym.make("CartPole-v1")
 
             scaling_config = REMOTE_SCALING_CONFIGS[scaling_mode]
-            learner_group = get_learner_group(
-                fw, env, scaling_config, eager_tracing=True
-            )
+            learner_group = get_learner_group(fw, env, scaling_config)
             reader = get_cartpole_dataset_reader(batch_size=1024)
 
             min_loss = float("inf")
@@ -188,9 +186,7 @@ def test_add_remove_module(self):
             print(f"Testing framework: {fw}, scaling mode: {scaling_mode}.")
             env = gym.make("CartPole-v1")
             scaling_config = REMOTE_SCALING_CONFIGS[scaling_mode]
-            learner_group = get_learner_group(
-                fw, env, scaling_config, eager_tracing=True
-            )
+            learner_group = get_learner_group(fw, env, scaling_config)
             reader = get_cartpole_dataset_reader(batch_size=512)
             batch = reader.next()
 
@@ -267,7 +263,7 @@ def test_load_module_state(self):
                 scaling_mode
             ) or LOCAL_SCALING_CONFIGS.get(scaling_mode)
             learner_group = get_learner_group(
-                fw, env, scaling_config, eager_tracing=True, is_multi_agent=True
+                fw, env, scaling_config, is_multi_agent=True
             )
             spec = get_module_spec(framework=fw, env=env)
             learner_group.add_module(module_id="0", module_spec=spec)
@@ -342,7 +338,7 @@ def test_load_module_state_errors(self):
 
         scaling_config = LOCAL_SCALING_CONFIGS["local-cpu"]
         learner_group = get_learner_group(
-            "torch", env, scaling_config, eager_tracing=True, is_multi_agent=True
+            "torch", env, scaling_config, is_multi_agent=True
         )
         spec = get_module_spec(framework="torch", env=env)
         learner_group.add_module(module_id="0", module_spec=spec)
@@ -404,9 +400,7 @@ def test_save_load_state(self):
             scaling_config = REMOTE_SCALING_CONFIGS.get(
                 scaling_mode
             ) or LOCAL_SCALING_CONFIGS.get(scaling_mode)
-            initial_learner_group = get_learner_group(
-                fw, env, scaling_config, eager_tracing=True
-            )
+            initial_learner_group = get_learner_group(fw, env, scaling_config)
 
             # checkpoint the initial learner state for later comparison
             initial_learner_checkpoint_dir = tempfile.TemporaryDirectory().name
@@ -424,9 +418,7 @@ def test_save_load_state(self):
             # learner into the new one
             initial_learner_group.shutdown()
             del initial_learner_group
-            new_learner_group = get_learner_group(
-                fw, env, scaling_config, eager_tracing=True
-            )
+            new_learner_group = get_learner_group(fw, env, scaling_config)
             new_learner_group.load_state(learner_after_1_update_checkpoint_dir)
 
             # do another update
@@ -438,9 +430,7 @@ def test_save_load_state(self):
             del new_learner_group
 
             # construct a new learner group and load the initial state of the learner
-            learner_group = get_learner_group(
-                fw, env, scaling_config, eager_tracing=True
-            )
+            learner_group = get_learner_group(fw, env, scaling_config)
             learner_group.load_state(initial_learner_checkpoint_dir)
             check(learner_group.get_weights(), initial_learner_group_weights)
             learner_group.update(batch.as_multi_agent(), reduce_fn=None)
@@ -477,9 +467,7 @@ def test_async_update(self):
             print(f"Testing framework: {fw}, scaling mode: {scaling_mode}.")
             env = gym.make("CartPole-v1")
             scaling_config = REMOTE_SCALING_CONFIGS[scaling_mode]
-            learner_group = get_learner_group(
-                fw, env, scaling_config, eager_tracing=True
-            )
+            learner_group = get_learner_group(fw, env, scaling_config)
             reader = get_cartpole_dataset_reader(batch_size=512)
             min_loss = float("inf")
             batch = reader.next()

@@ -138,7 +138,7 @@ def get_learner_group(
     env: "gym.Env",
     scaling_config: LearnerGroupScalingConfig,
     is_multi_agent: bool = False,
-    eager_tracing: bool = False,
+    eager_tracing: bool = True,
 ) -> LearnerGroup:
     """Construct a learner_group for testing.
 

@@ -73,7 +73,6 @@ def get_cli_args():
         default="torch",
         help="The DL framework specifier.",
     )
-    parser.add_argument("--eager-tracing", action="store_true")
     parser.add_argument(
         "--stop-iters", type=int, default=10, help="Number of iterations to train."
     )
@@ -133,7 +132,7 @@ def get_cli_args():
                 "custom_model_config": {"no_masking": args.no_masking},
             },
         )
-        .framework(args.framework, eager_tracing=args.eager_tracing)
+        .framework(args.framework)
         .resources(
             # Use GPUs iff `RLLIB_NUM_GPUS` env var set to > 0.
             num_gpus=int(os.environ.get("RLLIB_NUM_GPUS", "0"))

@@ -43,11 +43,7 @@ def plot_model_weights(means, covs):
 
     num_iter = 10
     print("Running training for %s time steps" % num_iter)
-    config = (
-        BanditLinTSConfig()
-        .environment(WheelBanditEnv)
-        .framework(args.framework, eager_tracing=args.framework == "tf2")
-    )
+    config = BanditLinTSConfig().environment(WheelBanditEnv).framework(args.framework)
     algo = config.build()
 
     policy = algo.get_policy()

@@ -43,11 +43,7 @@ def plot_model_weights(means, covs, ax):
 
     ray.init(num_cpus=2)
 
-    config = (
-        BanditLinTSConfig()
-        .environment(WheelBanditEnv)
-        .framework(args.framework, eager_tracing=args.framework == "tf2")
-    )
+    config = BanditLinTSConfig().environment(WheelBanditEnv).framework(args.framework)
 
     # Actual env steps per `train()` call will be
     # 10 * `min_sample_timesteps_per_iteration` (100 by default) = 1,000

@@ -57,7 +57,7 @@
                 "user_time_budget": 1.0,
             },
         )
-        .framework(args.framework, eager_tracing=args.framework == "tf2")
+        .framework(args.framework)
         # Test with batched inference.
         .rollouts(num_envs_per_worker=2)
         .evaluation(

@@ -40,7 +40,7 @@
                 "convert_to_discrete_action_space": True,
                 "wrap_for_bandits": True,
             },
-        ).framework(args.framework, eager_tracing=args.framework == "tf2")
+        ).framework(args.framework)
     )
 
     # Actual env timesteps per `train()` call will be

@@ -36,7 +36,7 @@
         .get_default_config()
         .environment("CartPole-v1")
         # Run with tracing enabled for tf2.
-        .framework(args.framework, eager_tracing=args.framework == "tf2")
+        .framework(args.framework)
         # Run 3 trials.
         .training(
             lr=tune.grid_search([0.01, 0.001, 0.0001]), train_batch_size=2341

@@ -83,7 +83,7 @@ def flush(self):
             "CartPole-v1" if args.run not in ["DDPG", "TD3"] else "Pendulum-v1"
         )
         # Run with tracing enabled for tf2.
-        .framework(args.framework, eager_tracing=args.framework == "tf2")
+        .framework(args.framework)
         # Setting up a custom logger config.
         # ----------------------------------
         # The following are different examples of custom logging setups:

@@ -26,8 +26,8 @@
 # >> x.numpy()
 # 0.0
 
-# RLlib will automatically enable eager mode, if you set
-# AlgorithmConfig.framework("tf2", eager_tracing=False).
+# RLlib will enable eager execution mode, if you set
+# `AlgorithmConfig.framework("tf2", eager_tracing=False)`.
 # If you would like to remain in tf static-graph mode, but still use tf2.x's
 # new APIs (some of which are not supported by tf1.x), specify your "framework"
 # as "tf" and check for the version (tfv) to be 2:

@@ -25,7 +25,6 @@
     default="torch",
     help="The DL framework specifier.",
 )
-parser.add_argument("--eager-tracing", action="store_true")
 parser.add_argument(
     "--stop-iters",
     type=int,
@@ -67,7 +66,7 @@
         .get_default_config()
         .environment("FrozenLake-v1")
         # Run with tracing enabled for tf2?
-        .framework(args.framework, eager_tracing=args.eager_tracing)
+        .framework(args.framework)
         # Use GPUs iff `RLLIB_NUM_GPUS` env var set to > 0.
         .resources(num_gpus=int(os.environ.get("RLLIB_NUM_GPUS", "0")))
     )

@@ -26,12 +26,6 @@
     default="torch",
     help="The DL framework specifier.",
 )
-parser.add_argument(
-    "--eager-tracing",
-    action="store_true",
-    help="Use tf eager tracing to speed up execution in tf2.x. Only supported"
-    " for `framework=tf2`.",
-)
 parser.add_argument(
     "--prev-n-actions",
     type=int,
@@ -85,7 +79,7 @@
         .get_default_config()
         .environment("FrozenLake-v1")
         # Run with tracing enabled for tf2?
-        .framework(args.framework, eager_tracing=args.eager_tracing)
+        .framework(args.framework)
         .training(
             model={
                 "use_attention": True,

@@ -26,12 +26,6 @@
     default="torch",
     help="The DL framework specifier.",
 )
-parser.add_argument(
-    "--eager-tracing",
-    action="store_true",
-    help="Use tf eager tracing to speed up execution in tf2.x. Only supported"
-    " for `framework=tf2`.",
-)
 parser.add_argument(
     "--prev-action",
     action="store_true",
@@ -83,7 +77,7 @@
         .get_default_config()
         .environment("FrozenLake-v1")
         # Run with tracing enabled for tf2?
-        .framework(args.framework, eager_tracing=args.eager_tracing)
+        .framework(args.framework)
         .training(
             model={
                 "use_lstm": True,