Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[RLlib] AlgorithmConfig: Next steps (volume 01) #29395

Merged
merged 74 commits into from
Oct 26, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
74 commits
Select commit Hold shift + click to select a range
250a88a
wip
sven1977 Sep 28, 2022
b268d14
Merge branch 'master' into algo_configs_next_steps_1
sven1977 Oct 14, 2022
dc8ea88
wip
sven1977 Oct 16, 2022
1edaf5f
Merge branch 'master' of https://github.com/ray-project/ray into algo…
sven1977 Oct 16, 2022
9b619da
wip
sven1977 Oct 16, 2022
c810b41
wip
sven1977 Oct 17, 2022
e6ee47e
wip
sven1977 Oct 18, 2022
4ac1944
wip
sven1977 Oct 18, 2022
d777f7a
wip
sven1977 Oct 18, 2022
ad2aafc
wip
sven1977 Oct 18, 2022
60b8b41
wip
sven1977 Oct 18, 2022
301ef3d
wip
sven1977 Oct 18, 2022
e735dbb
Merge branch 'master' of https://github.com/ray-project/ray into algo…
sven1977 Oct 18, 2022
7928682
wip
sven1977 Oct 18, 2022
306106b
wip
sven1977 Oct 18, 2022
884d9fb
wip
sven1977 Oct 18, 2022
4ee1959
fix
sven1977 Oct 18, 2022
7106a6d
wip
sven1977 Oct 18, 2022
a538604
wip
sven1977 Oct 18, 2022
36d93c4
wip
sven1977 Oct 18, 2022
ccb8569
wip
sven1977 Oct 18, 2022
8c86ef4
wip
sven1977 Oct 18, 2022
9246fb1
wip
sven1977 Oct 19, 2022
56865ee
wip
sven1977 Oct 19, 2022
3054fac
wip
sven1977 Oct 19, 2022
c0b2941
wip
sven1977 Oct 19, 2022
85267d7
wip
sven1977 Oct 19, 2022
9545cde
wip
sven1977 Oct 19, 2022
2e6d5da
wip
sven1977 Oct 19, 2022
ae8a0e0
wip
sven1977 Oct 19, 2022
b582a22
wip
sven1977 Oct 19, 2022
c829f47
wip
sven1977 Oct 19, 2022
eb75ea4
wip
sven1977 Oct 19, 2022
d12897f
wip
sven1977 Oct 19, 2022
c7d2ab2
Merge branch 'master' of https://github.com/ray-project/ray into algo…
sven1977 Oct 19, 2022
cc3d21e
wip
sven1977 Oct 19, 2022
b4de363
wip
sven1977 Oct 19, 2022
e48b043
wip
sven1977 Oct 20, 2022
d1bd8c7
wip
sven1977 Oct 20, 2022
76d667b
wip
sven1977 Oct 20, 2022
eb30f88
Merge branch 'master' of https://github.com/ray-project/ray into algo…
sven1977 Oct 20, 2022
a740a9c
wip
sven1977 Oct 20, 2022
4cdbfb0
wip
sven1977 Oct 20, 2022
318b73a
wip
sven1977 Oct 20, 2022
9010fba
wip
sven1977 Oct 20, 2022
3ebf10a
Merge branch 'master' of https://github.com/ray-project/ray into algo…
sven1977 Oct 20, 2022
91aefe2
wip
sven1977 Oct 21, 2022
ae21570
wip
sven1977 Oct 21, 2022
d59198f
wip
sven1977 Oct 21, 2022
16f82cc
wip
sven1977 Oct 21, 2022
ebffe52
Merge branch 'master' into algo_configs_next_steps_1
sven1977 Oct 21, 2022
49b4564
wip
sven1977 Oct 21, 2022
be953b8
wip
sven1977 Oct 21, 2022
e01461b
wip
sven1977 Oct 21, 2022
95b3ffa
Merge branch 'master' of https://github.com/ray-project/ray into algo…
sven1977 Oct 21, 2022
f5d1100
wip
sven1977 Oct 21, 2022
2bcd192
wip
sven1977 Oct 24, 2022
96fff6b
Merge branch 'master' of https://github.com/ray-project/ray into algo…
sven1977 Oct 24, 2022
c3d9acd
wip
sven1977 Oct 24, 2022
9ff3ecc
wip
sven1977 Oct 25, 2022
73c7b21
Merge branch 'master' into algo_configs_next_steps_1
sven1977 Oct 25, 2022
1c6f5ec
wip
sven1977 Oct 25, 2022
dfaf935
wip
sven1977 Oct 25, 2022
b4c17a5
wip
sven1977 Oct 25, 2022
6e7a684
wip
sven1977 Oct 25, 2022
d4f8ff7
wip
sven1977 Oct 25, 2022
6da2356
wip
sven1977 Oct 25, 2022
8e81a4d
wip
sven1977 Oct 25, 2022
6443d8e
wip
sven1977 Oct 25, 2022
cab6ddd
wip
sven1977 Oct 25, 2022
1d66f41
wip
sven1977 Oct 25, 2022
e5920ea
wip
sven1977 Oct 25, 2022
1617b46
wip
sven1977 Oct 26, 2022
cdc2157
Merge branch 'master' into algo_configs_next_steps_1
sven1977 Oct 26, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ handle any checkpoints created with Ray 2.0 or any version up to ``V``.

.. code-block:: shell
$ mode rllib_checkpoint.json
$ more rllib_checkpoint.json
{"type": "Algorithm", "checkpoint_version": "1.0"}
Now, let's check out the `policies/` sub-directory:
Expand Down
7 changes: 4 additions & 3 deletions rllib/algorithms/a2c/a2c.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,11 +35,12 @@ class A2CConfig(A3CConfig):
>>> from ray import tune
>>> config = A2CConfig().training(lr=0.01, grad_clip=30.0)\
... .resources(num_gpus=0)\
... .rollouts(num_rollout_workers=2)
... .rollouts(num_rollout_workers=2)\
... .environment("CartPole-v1")
>>> print(config.to_dict())
>>> # Build a Algorithm object from the config and run 1 training iteration.
>>> trainer = config.build(env="CartPole-v1")
>>> trainer.train()
>>> algo = config.build()
>>> algo.train()
Example:
>>> import ray.air as air
Expand Down
25 changes: 13 additions & 12 deletions rllib/algorithms/a2c/tests/test_a2c.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,13 +27,14 @@ def test_a2c_compilation(self):
# Test against all frameworks.
for _ in framework_iterator(config, with_eager_tracing=True):
for env in ["CartPole-v0", "Pendulum-v1", "PongDeterministic-v0"]:
trainer = config.build(env=env)
config.environment(env)
algo = config.build()
for i in range(num_iterations):
results = trainer.train()
results = algo.train()
check_train_results(results)
print(results)
check_compute_single_action(trainer)
trainer.stop()
check_compute_single_action(algo)
algo.stop()

def test_a2c_exec_impl(self):
config = (
Expand All @@ -43,12 +44,12 @@ def test_a2c_exec_impl(self):
)

for _ in framework_iterator(config):
trainer = config.build()
results = trainer.train()
algo = config.build()
results = algo.train()
check_train_results(results)
print(results)
check_compute_single_action(trainer)
trainer.stop()
check_compute_single_action(algo)
algo.stop()

def test_a2c_exec_impl_microbatch(self):
config = (
Expand All @@ -59,12 +60,12 @@ def test_a2c_exec_impl_microbatch(self):
)

for _ in framework_iterator(config):
trainer = config.build()
results = trainer.train()
algo = config.build()
results = algo.train()
check_train_results(results)
print(results)
check_compute_single_action(trainer)
trainer.stop()
check_compute_single_action(algo)
algo.stop()


if __name__ == "__main__":
Expand Down
7 changes: 4 additions & 3 deletions rllib/algorithms/a3c/a3c.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,11 +35,12 @@ class A3CConfig(AlgorithmConfig):
>>> from ray import tune
>>> config = A3CConfig().training(lr=0.01, grad_clip=30.0)\
... .resources(num_gpus=0)\
... .rollouts(num_rollout_workers=4)
... .rollouts(num_rollout_workers=4)\
... .environment("CartPole-v1")
>>> print(config.to_dict())
>>> # Build a Algorithm object from the config and run 1 training iteration.
>>> trainer = config.build(env="CartPole-v1")
>>> trainer.train()
>>> algo = config.build()
>>> algo.train()
Example:
>>> config = A3CConfig()
Expand Down
16 changes: 8 additions & 8 deletions rllib/algorithms/a3c/tests/test_a3c.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,15 +31,15 @@ def test_a3c_compilation(self):
for env in ["CartPole-v1", "Pendulum-v1", "PongDeterministic-v0"]:
print("env={}".format(env))
config.model["use_lstm"] = env == "CartPole-v1"
trainer = config.build(env=env)
algo = config.build(env=env)
for i in range(num_iterations):
results = trainer.train()
results = algo.train()
check_train_results(results)
print(results)
check_compute_single_action(
trainer, include_state=config.model["use_lstm"]
algo, include_state=config.model["use_lstm"]
)
trainer.stop()
algo.stop()

def test_a3c_entropy_coeff_schedule(self):
"""Test A3C entropy coeff schedule support."""
Expand Down Expand Up @@ -78,17 +78,17 @@ def _step_n_times(trainer, n: int):

# Test against all frameworks.
for _ in framework_iterator(config):
trainer = config.build(env="CartPole-v1")
algo = config.build(env="CartPole-v1")

coeff = _step_n_times(trainer, 1) # 20 timesteps
coeff = _step_n_times(algo, 1) # 20 timesteps
# Should be close to the starting coeff of 0.01
self.assertGreaterEqual(coeff, 0.005)

coeff = _step_n_times(trainer, 10) # 200 timesteps
coeff = _step_n_times(algo, 10) # 200 timesteps
# Should have annealed to the final coeff of 0.0001.
self.assertLessEqual(coeff, 0.00011)

trainer.stop()
algo.stop()


if __name__ == "__main__":
Expand Down
Loading