Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[RLlib] Enable eager_tracing=True by default. #36556

Merged
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions rllib/algorithms/algorithm_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -260,7 +260,7 @@ def __init__(self, algo_class=None):

# `self.framework()`
self.framework_str = "torch"
self.eager_tracing = False
self.eager_tracing = True
self.eager_max_retraces = 20
self.tf_session_args = {
# note: overridden by `local_tf_session_args`
Expand Down Expand Up @@ -1237,8 +1237,8 @@ def framework(
"""Sets the config's DL framework settings.

Args:
framework: tf: TensorFlow (static-graph); tf2: TensorFlow 2.x
(eager or traced, if eager_tracing=True); torch: PyTorch
framework: torch: PyTorch; tf2: TensorFlow 2.x (eager execution or traced
if eager_tracing=True); tf: TensorFlow (static-graph);
eager_tracing: Enable tracing in eager mode. This greatly improves
performance (speedup ~2x), but makes it slightly harder to debug
since Python code won't be evaluated after the initial eager pass.
Expand Down
34 changes: 17 additions & 17 deletions rllib/algorithms/appo/tests/test_appo.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ def test_appo_compilation(self):
config = appo.APPOConfig().rollouts(num_rollout_workers=1)
num_iterations = 2

for _ in framework_iterator(config, with_eager_tracing=True):
for _ in framework_iterator(config):
print("w/o v-trace")
config.vtrace = False
algo = config.build(env="CartPole-v1")
Expand Down Expand Up @@ -55,7 +55,7 @@ def test_appo_compilation_use_kl_loss(self):
)
num_iterations = 2

for _ in framework_iterator(config, with_eager_tracing=True):
for _ in framework_iterator(config):
algo = config.build(env="CartPole-v1")
for i in range(num_iterations):
results = algo.train()
Expand Down Expand Up @@ -117,30 +117,30 @@ def test_appo_entropy_coeff_schedule(self):
)

def _step_n_times(algo, n: int):
"""Step Algorithm n times.

Returns:
learning rate at the end of the execution.
"""
for _ in range(n):
results = algo.train()
print(algo.workers.local_worker().global_vars)
print(results)
return results["info"][LEARNER_INFO][DEFAULT_POLICY_ID][LEARNER_STATS_KEY][
"entropy_coeff"
]
return (
results["info"][LEARNER_INFO][DEFAULT_POLICY_ID][LEARNER_STATS_KEY][
"entropy_coeff"
],
results["num_env_steps_sampled"],
)

for _ in framework_iterator(config):
algo = config.build(env="CartPole-v1")

coeff = _step_n_times(algo, 10) # 200 timesteps
# Should be close to the starting coeff of 0.01.
self.assertLessEqual(coeff, 0.01)
self.assertGreaterEqual(coeff, 0.001)
coeff, num_env_steps_sampled = _step_n_times(algo, 5) # ~100 timesteps
if num_env_steps_sampled > 300:
self.assertLessEqual(coeff, 0.001)
self.assertGreaterEqual(coeff, 0.0001)
else:
self.assertLessEqual(coeff, 0.01)
self.assertGreaterEqual(coeff, 0.001)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we redesign this test a little bit?

It could be simpler for example by doing:

entropy_coeff_schedule=[[0, 0.1], [200, 0.001], [600, 0.0001]]

Also, _step_n_times() should be "step_until_n_steps_reached()".
We should then be able to reuse this with entropy coefficient tests for other algorithms if so desired.
The "~100 timesteps" thing can easily change per algorithm or when something else in the algorithm under test changes that has nothing to do with the coefficient schedule.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I need to fix this, yes. I think b/c eager tracing is much faster, the async sampling also runs faster in the background.
I will add a proper check here to make sure this test performs the right checks based on the actual timesteps sampled.


coeff = _step_n_times(algo, 20) # 400 timesteps
# Should have annealed to the final coeff of 0.0001.
self.assertLessEqual(coeff, 0.001)
coeff, num_env_steps_sampled = _step_n_times(algo, 20) # ~400 timesteps
self.assertLessEqual(coeff, 0.0005)

algo.stop()

Expand Down
1 change: 0 additions & 1 deletion rllib/algorithms/appo/tests/test_appo_learner.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,6 @@ def test_kl_coeff_changes(self):
config = (
appo.APPOConfig()
.environment("CartPole-v1")
.framework(eager_tracing=True)
# Asynchronous Algo, make sure we have some results after 1 iteration.
.reporting(min_time_s_per_iteration=10)
.rollouts(
Expand Down
2 changes: 1 addition & 1 deletion rllib/core/learner/learner.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ class FrameworkHyperparameters:
Module in Torch.
"""

eager_tracing: bool = False
eager_tracing: bool = True
torch_compile_cfg: Optional["TorchCompileConfig"] = None


Expand Down
2 changes: 1 addition & 1 deletion rllib/core/learner/learner_group_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ def __init__(self, cls: Type[LearnerGroup] = None) -> None:
self.local_gpu_idx = 0

# `self.framework()`
self.eager_tracing = False
self.eager_tracing = True
self.torch_compile_cfg = None

def validate(self) -> None:
Expand Down
6 changes: 2 additions & 4 deletions rllib/core/learner/tests/test_learner.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ def tearDown(cls) -> None:
def test_end_to_end_update(self):

for fw in framework_iterator(frameworks=("torch", "tf2")):
learner = get_learner(framework=fw, eager_tracing=True, env=self.ENV)
learner = get_learner(framework=fw, env=self.ENV)
reader = get_cartpole_dataset_reader(batch_size=512)

min_loss = float("inf")
Expand All @@ -60,7 +60,7 @@ def test_compute_gradients(self):
the weights is all ones.
"""
for fw in framework_iterator(frameworks=("torch", "tf2")):
learner = get_learner(framework=fw, eager_tracing=True, env=self.ENV)
learner = get_learner(framework=fw, env=self.ENV)

params = learner.get_parameters(learner.module[DEFAULT_POLICY_ID])

Expand Down Expand Up @@ -94,7 +94,6 @@ def test_postprocess_gradients(self):

learner = get_learner(
framework=fw,
eager_tracing=True,
env=self.ENV,
learner_hps=hps,
)
Expand All @@ -119,7 +118,6 @@ def test_postprocess_gradients(self):
hps.grad_clip_by = "norm"
learner = get_learner(
framework=fw,
eager_tracing=True,
env=self.ENV,
learner_hps=hps,
)
Expand Down
30 changes: 9 additions & 21 deletions rllib/core/learner/tests/test_learner_group.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ def local_training_helper(self, fw, scaling_mode) -> None:
tf.random.set_seed(0)
env = gym.make("CartPole-v1")
scaling_config = LOCAL_SCALING_CONFIGS[scaling_mode]
learner_group = get_learner_group(fw, env, scaling_config, eager_tracing=True)
learner_group = get_learner_group(fw, env, scaling_config)
local_learner = get_learner(framework=fw, env=env)
local_learner.build()

Expand Down Expand Up @@ -136,9 +136,7 @@ def test_update_multigpu(self):
env = gym.make("CartPole-v1")

scaling_config = REMOTE_SCALING_CONFIGS[scaling_mode]
learner_group = get_learner_group(
fw, env, scaling_config, eager_tracing=True
)
learner_group = get_learner_group(fw, env, scaling_config)
reader = get_cartpole_dataset_reader(batch_size=1024)

min_loss = float("inf")
Expand Down Expand Up @@ -188,9 +186,7 @@ def test_add_remove_module(self):
print(f"Testing framework: {fw}, scaling mode: {scaling_mode}.")
env = gym.make("CartPole-v1")
scaling_config = REMOTE_SCALING_CONFIGS[scaling_mode]
learner_group = get_learner_group(
fw, env, scaling_config, eager_tracing=True
)
learner_group = get_learner_group(fw, env, scaling_config)
reader = get_cartpole_dataset_reader(batch_size=512)
batch = reader.next()

Expand Down Expand Up @@ -267,7 +263,7 @@ def test_load_module_state(self):
scaling_mode
) or LOCAL_SCALING_CONFIGS.get(scaling_mode)
learner_group = get_learner_group(
fw, env, scaling_config, eager_tracing=True, is_multi_agent=True
fw, env, scaling_config, is_multi_agent=True
)
spec = get_module_spec(framework=fw, env=env)
learner_group.add_module(module_id="0", module_spec=spec)
Expand Down Expand Up @@ -342,7 +338,7 @@ def test_load_module_state_errors(self):

scaling_config = LOCAL_SCALING_CONFIGS["local-cpu"]
learner_group = get_learner_group(
"torch", env, scaling_config, eager_tracing=True, is_multi_agent=True
"torch", env, scaling_config, is_multi_agent=True
)
spec = get_module_spec(framework="torch", env=env)
learner_group.add_module(module_id="0", module_spec=spec)
Expand Down Expand Up @@ -404,9 +400,7 @@ def test_save_load_state(self):
scaling_config = REMOTE_SCALING_CONFIGS.get(
scaling_mode
) or LOCAL_SCALING_CONFIGS.get(scaling_mode)
initial_learner_group = get_learner_group(
fw, env, scaling_config, eager_tracing=True
)
initial_learner_group = get_learner_group(fw, env, scaling_config)

# checkpoint the initial learner state for later comparison
initial_learner_checkpoint_dir = tempfile.TemporaryDirectory().name
Expand All @@ -424,9 +418,7 @@ def test_save_load_state(self):
# learner into the new one
initial_learner_group.shutdown()
del initial_learner_group
new_learner_group = get_learner_group(
fw, env, scaling_config, eager_tracing=True
)
new_learner_group = get_learner_group(fw, env, scaling_config)
new_learner_group.load_state(learner_after_1_update_checkpoint_dir)

# do another update
Expand All @@ -438,9 +430,7 @@ def test_save_load_state(self):
del new_learner_group

# construct a new learner group and load the initial state of the learner
learner_group = get_learner_group(
fw, env, scaling_config, eager_tracing=True
)
learner_group = get_learner_group(fw, env, scaling_config)
learner_group.load_state(initial_learner_checkpoint_dir)
check(learner_group.get_weights(), initial_learner_group_weights)
learner_group.update(batch.as_multi_agent(), reduce_fn=None)
Expand Down Expand Up @@ -477,9 +467,7 @@ def test_async_update(self):
print(f"Testing framework: {fw}, scaling mode: {scaling_mode}.")
env = gym.make("CartPole-v1")
scaling_config = REMOTE_SCALING_CONFIGS[scaling_mode]
learner_group = get_learner_group(
fw, env, scaling_config, eager_tracing=True
)
learner_group = get_learner_group(fw, env, scaling_config)
reader = get_cartpole_dataset_reader(batch_size=512)
min_loss = float("inf")
batch = reader.next()
Expand Down
2 changes: 1 addition & 1 deletion rllib/core/testing/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -138,7 +138,7 @@ def get_learner_group(
env: "gym.Env",
scaling_config: LearnerGroupScalingConfig,
is_multi_agent: bool = False,
eager_tracing: bool = False,
eager_tracing: bool = True,
) -> LearnerGroup:
"""Construct a learner_group for testing.

Expand Down
3 changes: 1 addition & 2 deletions rllib/examples/action_masking.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,6 @@ def get_cli_args():
default="torch",
help="The DL framework specifier.",
)
parser.add_argument("--eager-tracing", action="store_true")
parser.add_argument(
"--stop-iters", type=int, default=10, help="Number of iterations to train."
)
Expand Down Expand Up @@ -133,7 +132,7 @@ def get_cli_args():
"custom_model_config": {"no_masking": args.no_masking},
},
)
.framework(args.framework, eager_tracing=args.eager_tracing)
.framework(args.framework)
.resources(
# Use GPUs iff `RLLIB_NUM_GPUS` env var set to > 0.
num_gpus=int(os.environ.get("RLLIB_NUM_GPUS", "0"))
Expand Down
6 changes: 1 addition & 5 deletions rllib/examples/bandit/lin_ts_train_wheel_env.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,11 +43,7 @@ def plot_model_weights(means, covs):

num_iter = 10
print("Running training for %s time steps" % num_iter)
config = (
BanditLinTSConfig()
.environment(WheelBanditEnv)
.framework(args.framework, eager_tracing=args.framework == "tf2")
)
config = BanditLinTSConfig().environment(WheelBanditEnv).framework(args.framework)
algo = config.build()

policy = algo.get_policy()
Expand Down
6 changes: 1 addition & 5 deletions rllib/examples/bandit/tune_lin_ts_train_wheel_env.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,11 +43,7 @@ def plot_model_weights(means, covs, ax):

ray.init(num_cpus=2)

config = (
BanditLinTSConfig()
.environment(WheelBanditEnv)
.framework(args.framework, eager_tracing=args.framework == "tf2")
)
config = BanditLinTSConfig().environment(WheelBanditEnv).framework(args.framework)

# Actual env steps per `train()` call will be
# 10 * `min_sample_timesteps_per_iteration` (100 by default) = 1,000
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@
"user_time_budget": 1.0,
},
)
.framework(args.framework, eager_tracing=args.framework == "tf2")
.framework(args.framework)
# Test with batched inference.
.rollouts(num_envs_per_worker=2)
.evaluation(
Expand Down
2 changes: 1 addition & 1 deletion rllib/examples/bandit/tune_lin_ucb_train_recsim_env.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@
"convert_to_discrete_action_space": True,
"wrap_for_bandits": True,
},
).framework(args.framework, eager_tracing=args.framework == "tf2")
).framework(args.framework)
)

# Actual env timesteps per `train()` call will be
Expand Down
2 changes: 1 addition & 1 deletion rllib/examples/checkpoint_by_custom_criteria.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@
.get_default_config()
.environment("CartPole-v1")
# Run with tracing enabled for tf2.
.framework(args.framework, eager_tracing=args.framework == "tf2")
.framework(args.framework)
# Run 3 trials.
.training(
lr=tune.grid_search([0.01, 0.001, 0.0001]), train_batch_size=2341
Expand Down
2 changes: 1 addition & 1 deletion rllib/examples/custom_logger.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ def flush(self):
"CartPole-v1" if args.run not in ["DDPG", "TD3"] else "Pendulum-v1"
)
# Run with tracing enabled for tf2.
.framework(args.framework, eager_tracing=args.framework == "tf2")
.framework(args.framework)
# Setting up a custom logger config.
# ----------------------------------
# The following are different examples of custom logging setups:
Expand Down
4 changes: 2 additions & 2 deletions rllib/examples/eager_execution.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,8 @@
# >> x.numpy()
# 0.0

# RLlib will automatically enable eager mode, if you set
# AlgorithmConfig.framework("tf2", eager_tracing=False).
# RLlib will enable eager execution mode, if you set
# `AlgorithmConfig.framework("tf2", eager_tracing=False)`.
# If you would like to remain in tf static-graph mode, but still use tf2.x's
# new APIs (some of which are not supported by tf1.x), specify your "framework"
# as "tf" and check for the version (tfv) to be 2:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@
default="torch",
help="The DL framework specifier.",
)
parser.add_argument("--eager-tracing", action="store_true")
parser.add_argument(
"--stop-iters",
type=int,
Expand Down Expand Up @@ -67,7 +66,7 @@
.get_default_config()
.environment("FrozenLake-v1")
# Run with tracing enabled for tf2?
.framework(args.framework, eager_tracing=args.eager_tracing)
.framework(args.framework)
# Use GPUs iff `RLLIB_NUM_GPUS` env var set to > 0.
.resources(num_gpus=int(os.environ.get("RLLIB_NUM_GPUS", "0")))
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,12 +26,6 @@
default="torch",
help="The DL framework specifier.",
)
parser.add_argument(
"--eager-tracing",
action="store_true",
help="Use tf eager tracing to speed up execution in tf2.x. Only supported"
" for `framework=tf2`.",
)
parser.add_argument(
"--prev-n-actions",
type=int,
Expand Down Expand Up @@ -85,7 +79,7 @@
.get_default_config()
.environment("FrozenLake-v1")
# Run with tracing enabled for tf2?
.framework(args.framework, eager_tracing=args.eager_tracing)
.framework(args.framework)
.training(
model={
"use_attention": True,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,12 +26,6 @@
default="torch",
help="The DL framework specifier.",
)
parser.add_argument(
"--eager-tracing",
action="store_true",
help="Use tf eager tracing to speed up execution in tf2.x. Only supported"
" for `framework=tf2`.",
)
parser.add_argument(
"--prev-action",
action="store_true",
Expand Down Expand Up @@ -83,7 +77,7 @@
.get_default_config()
.environment("FrozenLake-v1")
# Run with tracing enabled for tf2?
.framework(args.framework, eager_tracing=args.eager_tracing)
.framework(args.framework)
.training(
model={
"use_lstm": True,
Expand Down
Loading