Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[RLlib] Cleanup examples folder vol. 41: Add new example on async gym vector env. #49527

Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
185 changes: 96 additions & 89 deletions doc/source/rllib/rllib-examples.rst

Large diffs are not rendered by default.

8 changes: 8 additions & 0 deletions rllib/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -2295,6 +2295,14 @@ py_test(
srcs = ["examples/envs/agents_act_in_sequence.py"],
args = ["--enable-new-api-stack", "--num-agents=2", "--stop-iters=3"]
)
py_test(
name = "examples/envs/async_gym_env_vectorization",
main = "examples/envs/async_gym_env_vectorization.py",
tags = ["team:rllib", "exclusive", "examples"],
size = "medium",
srcs = ["examples/envs/async_gym_env_vectorization.py"],
args = ["--enable-new-api-stack", "--as-test", "--vectorize-mode=BOTH"]
)
py_test(
name = "examples/envs/custom_env_render_method",
main = "examples/envs/custom_env_render_method.py",
Expand Down
36 changes: 24 additions & 12 deletions rllib/algorithms/algorithm_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -318,6 +318,9 @@ def __init__(self, algo_class: Optional[type] = None):
self.env_runner_cls = None
self.num_env_runners = 0
self.num_envs_per_env_runner = 1
# TODO (sven): Once new ormsgpack system in place, reaplce the string
# with proper `gym.envs.registration.VectorizeMode.SYNC`.
self.gym_env_vectorize_mode = "SYNC"
self.num_cpus_per_env_runner = 1
self.num_gpus_per_env_runner = 0
self.custom_resources_per_env_runner = {}
Expand Down Expand Up @@ -904,25 +907,15 @@ def freeze(self) -> None:
def validate(self) -> None:
"""Validates all values in this config."""

# Check callbacks settings.
self._validate_env_runner_settings()
self._validate_callbacks_settings()
# Check framework specific settings.
self._validate_framework_settings()
# Check resources specific settings.
self._validate_resources_settings()
# Check multi-agent specific settings.
self._validate_multi_agent_settings()
# Check input specific settings.
self._validate_input_settings()
# Check evaluation specific settings.
self._validate_evaluation_settings()
# Check offline specific settings (new API stack).
self._validate_offline_settings()

# Check new API stack specific settings.
self._validate_new_api_stack_settings()

# Check to-be-deprecated settings (however that are still in use).
self._validate_to_be_deprecated_settings()

def build(
Expand Down Expand Up @@ -1738,6 +1731,7 @@ def env_runners(
env_runner_cls: Optional[type] = NotProvided,
num_env_runners: Optional[int] = NotProvided,
num_envs_per_env_runner: Optional[int] = NotProvided,
gym_env_vectorize_mode: Optional[str] = NotProvided,
num_cpus_per_env_runner: Optional[int] = NotProvided,
num_gpus_per_env_runner: Optional[Union[float, int]] = NotProvided,
custom_resources_per_env_runner: Optional[dict] = NotProvided,
Expand Down Expand Up @@ -1795,6 +1789,11 @@ def env_runners(
(vector-wise) per EnvRunner. This enables batching when computing
actions through RLModule inference, which can improve performance
for inference-bottlenecked workloads.
gym_env_vectorize_mode: The gymnasium vectorization mode for vector envs.
Must be a `gymnasium.envs.registration.VectorizeMode` (enum) value.
Default is SYNC. Set this to ASYNC to parallelize the individual sub
environments within the vector. This can speed up your EnvRunners
significantly when using heavier environments.
num_cpus_per_env_runner: Number of CPUs to allocate per EnvRunner.
num_gpus_per_env_runner: Number of GPUs to allocate per EnvRunner. This can
be fractional. This is usually needed only if your env itself requires a
Expand Down Expand Up @@ -1975,7 +1974,8 @@ def env_runners(
"larger 0!"
)
self.num_envs_per_env_runner = num_envs_per_env_runner

if gym_env_vectorize_mode is not NotProvided:
self.gym_env_vectorize_mode = gym_env_vectorize_mode
if num_cpus_per_env_runner is not NotProvided:
self.num_cpus_per_env_runner = num_cpus_per_env_runner
if num_gpus_per_env_runner is not NotProvided:
Expand Down Expand Up @@ -4381,6 +4381,18 @@ def _model_config_auto_includes(self) -> Dict[str, Any]:
# -----------------------------------------------------------
# Various validation methods for different types of settings.
# -----------------------------------------------------------
def _validate_env_runner_settings(self) -> None:
allowed_vectorize_modes = set(
list(gym.envs.registration.VectorizeMode.__members__.keys())
+ list(gym.envs.registration.VectorizeMode.__members__.values())
)
if self.gym_env_vectorize_mode not in allowed_vectorize_modes:
raise ValueError(
f"`gym_env_vectorize_mode` ({self.gym_env_vectorize_mode}) must be a "
"member of `gym.envs.registration.VectorizeMode`! Allowed values "
f"are {allowed_vectorize_modes}."
)

def _validate_callbacks_settings(self) -> None:
"""Validates callbacks settings."""
# Old API stack:
Expand Down
8 changes: 4 additions & 4 deletions rllib/env/single_agent_env_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@

import gymnasium as gym
from gymnasium.wrappers.vector import DictInfoToList
from gymnasium.envs.registration import VectorizeMode

from ray.rllib.algorithms.algorithm_config import AlgorithmConfig
from ray.rllib.callbacks.callbacks import RLlibCallback
Expand Down Expand Up @@ -637,15 +636,16 @@ def make_env(self) -> None:
env_context=env_ctx,
)
gym.register("rllib-single-agent-env-v0", entry_point=entry_point)
vectorize_mode = self.config.gym_env_vectorize_mode

self.env = DictInfoToList(
gym.make_vec(
"rllib-single-agent-env-v0",
num_envs=self.config.num_envs_per_env_runner,
vectorization_mode=(
VectorizeMode.ASYNC
if self.config.remote_worker_envs
else VectorizeMode.SYNC
vectorize_mode
if isinstance(vectorize_mode, gym.envs.registration.VectorizeMode)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Isn't this already validated in AlgorithmConfig? Imo we need to validate only once. Either here or in the AlgorithmConfig.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

True, but an AlgorithmConfig only gets validated in the algo, not when it enters an EnvRunner. So - in theory - users could still run into this if they instantiate an EnvRunner in isolation with some config that has this defined as a string (e.g. "SYNC").

else gym.envs.registration.VectorizeMode(vectorize_mode.lower())
),
)
)
Expand Down
137 changes: 137 additions & 0 deletions rllib/examples/envs/async_gym_env_vectorization.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,137 @@
"""Example demo'ing async gym vector envs, in which sub-envs have their own process.

Setting up env vectorization works through setting the `config.num_envs_per_env_runner`
value to > 1. However, by default the n sub-environments are stepped through
sequentially, rather than in parallel.

This script shows the effect of setting the `config.gym_env_vectorize_mode` from its
default value of "SYNC" (all sub envs are located in the same EnvRunner process)
to "ASYNC" (all sub envs in each EnvRunner get their own process).

This example:
- shows, which config settings to change in order to switch from sub-envs being
stepped in sequence to each sub-envs owning its own process (and compute resource)
and thus the vector being stepped in parallel.
- shows, how this setup can increase EnvRunner performance significantly, especially
for heavier, slower environments.
- uses an artificially slow CartPole-v1 environment for demonstration purposes.


How to run this script
----------------------
`python [script file name].py --enable-new-api-stack `

Use the `--vectorize-mode=BOTH` option to run both modes (SYNC and ASYNC)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Awesome!

through Tune at the same time and get a better comparison of the throughputs
achieved.

For debugging, use the following additional command line options
`--no-tune --num-env-runners=0`
which should allow you to set breakpoints anywhere in the RLlib code and
have the execution stop there for inspection and debugging.

For logging to your WandB account, use:
`--wandb-key=[your WandB API key] --wandb-project=[some project name]
--wandb-run-name=[optional: WandB run name (within the defined project)]`


Results to expect
-----------------
You should see results similar to the following in your console output
when using the

+--------------------------+------------+------------------------+------+
| Trial name | status | gym_env_vectorize_mode | iter |
| | | | |
|--------------------------+------------+------------------------+------+
| PPO_slow-env_6ddf4_00000 | TERMINATED | SYNC | 4 |
| PPO_slow-env_6ddf4_00001 | TERMINATED | ASYNC | 4 |
+--------------------------+------------+------------------------+------+
+------------------+----------------------+------------------------+
| total time (s) | episode_return_mean | num_env_steps_sample |
| | | d_lifetime |
|------------------+----------------------+------------------------+
| 60.8794 | 73.53 | 16040 |
| 19.1203 | 73.86 | 16037 |
+------------------+----------------------+------------------------+

You can see that the ASYNC mode, given that the env is sufficiently slow,
achieves much better results when using vectorization.

You should see no difference, however, when only using
`--num-envs-per-env-runner=1`.
"""
import time

import gymnasium as gym

from ray.rllib.algorithms.ppo import PPOConfig
from ray.rllib.utils.test_utils import (
add_rllib_example_script_args,
run_rllib_example_script_experiment,
)
from ray import tune

parser = add_rllib_example_script_args(default_reward=60.0)
parser.set_defaults(
enable_new_api_stack=True,
env="CartPole-v1",
num_envs_per_env_runner=6,
)
parser.add_argument(
"--vectorize-mode",
type=str,
default="ASYNC",
help="The value `gym.envs.registration.VectorizeMode` to use for env "
"vectorization. SYNC steps through all sub-envs in sequence. ASYNC (default) "
"parallelizes sub-envs through multiprocessing and can speed up EnvRunners "
"significantly. Use the special value `BOTH` to run both ASYNC and SYNC through a "
"Tune grid-search.",
)


class SlowEnv(gym.ObservationWrapper):
def observation(self, observation):
time.sleep(0.005)
return observation


if __name__ == "__main__":
args = parser.parse_args()

# Wrap the env with the slowness wrapper.
def _env_creator(cfg):
return SlowEnv(gym.make(args.env, **cfg))

tune.register_env("slow-env", _env_creator)

if args.vectorize_mode == "BOTH" and args.no_tune:
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why not just setting no_tune=False if vectorize_mode="BOTH"?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good catch. Added an error, instead and simplified the if-statement.

raise ValueError(
"`--vectorize-mode=BOTH` and `--no-tune` not allowed in combination!"
)

base_config = (
PPOConfig()
.environment("slow-env")
.env_runners(
gym_env_vectorize_mode=(
tune.grid_search(["SYNC", "ASYNC"])
if args.vectorize_mode == "BOTH"
else args.vectorize_mode
),
)
)

results = run_rllib_example_script_experiment(base_config, args)

# Compare the throughputs and assert that ASYNC is much faster than SYNC.
if args.vectorize_mode == "BOTH" and args.as_test:
throughput_sync = (
results[0].metrics["num_env_steps_sampled_lifetime"]
/ results[0].metrics["time_total_s"]
)
throughput_async = (
results[1].metrics["num_env_steps_sampled_lifetime"]
/ results[1].metrics["time_total_s"]
)
assert throughput_async > throughput_sync
4 changes: 3 additions & 1 deletion rllib/utils/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -1087,9 +1087,11 @@ def run_rllib_example_script_experiment(
enable_env_runner_and_connector_v2=False,
)

# Define EnvRunner/RolloutWorker scaling and behavior.
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

And ... gone :D

# Define EnvRunner scaling and behavior.
if args.num_env_runners is not None:
config.env_runners(num_env_runners=args.num_env_runners)
if args.num_envs_per_env_runner is not None:
config.env_runners(num_envs_per_env_runner=args.num_envs_per_env_runner)

# Define compute resources used automatically (only using the --num-learners
# and --num-gpus-per-learner args).
Expand Down
Loading