ray-project · sven1977 · Jan 3, 2025 · Jan 1, 2025 · Jan 1, 2025 · Jan 1, 2025
@@ -2295,6 +2295,14 @@ py_test(
     srcs = ["examples/envs/agents_act_in_sequence.py"],
     args = ["--enable-new-api-stack", "--num-agents=2", "--stop-iters=3"]
 )
+py_test(
+    name = "examples/envs/async_gym_env_vectorization",
+    main = "examples/envs/async_gym_env_vectorization.py",
+    tags = ["team:rllib", "exclusive", "examples"],
+    size = "medium",
+    srcs = ["examples/envs/async_gym_env_vectorization.py"],
+    args = ["--enable-new-api-stack", "--as-test", "--vectorize-mode=BOTH"]
+)
 py_test(
     name = "examples/envs/custom_env_render_method",
     main = "examples/envs/custom_env_render_method.py",

@@ -318,6 +318,9 @@ def __init__(self, algo_class: Optional[type] = None):
         self.env_runner_cls = None
         self.num_env_runners = 0
         self.num_envs_per_env_runner = 1
+        # TODO (sven): Once new ormsgpack system in place, reaplce the string
+        #  with proper `gym.envs.registration.VectorizeMode.SYNC`.
+        self.gym_env_vectorize_mode = "SYNC"
         self.num_cpus_per_env_runner = 1
         self.num_gpus_per_env_runner = 0
         self.custom_resources_per_env_runner = {}
@@ -904,25 +907,15 @@ def freeze(self) -> None:
     def validate(self) -> None:
         """Validates all values in this config."""
 
-        # Check callbacks settings.
+        self._validate_env_runner_settings()
         self._validate_callbacks_settings()
-        # Check framework specific settings.
         self._validate_framework_settings()
-        # Check resources specific settings.
         self._validate_resources_settings()
-        # Check multi-agent specific settings.
         self._validate_multi_agent_settings()
-        # Check input specific settings.
         self._validate_input_settings()
-        # Check evaluation specific settings.
         self._validate_evaluation_settings()
-        # Check offline specific settings (new API stack).
         self._validate_offline_settings()
-
-        # Check new API stack specific settings.
         self._validate_new_api_stack_settings()
-
-        # Check to-be-deprecated settings (however that are still in use).
         self._validate_to_be_deprecated_settings()
 
     def build(
@@ -1738,6 +1731,7 @@ def env_runners(
         env_runner_cls: Optional[type] = NotProvided,
         num_env_runners: Optional[int] = NotProvided,
         num_envs_per_env_runner: Optional[int] = NotProvided,
+        gym_env_vectorize_mode: Optional[str] = NotProvided,
         num_cpus_per_env_runner: Optional[int] = NotProvided,
         num_gpus_per_env_runner: Optional[Union[float, int]] = NotProvided,
         custom_resources_per_env_runner: Optional[dict] = NotProvided,
@@ -1795,6 +1789,11 @@ def env_runners(
                 (vector-wise) per EnvRunner. This enables batching when computing
                 actions through RLModule inference, which can improve performance
                 for inference-bottlenecked workloads.
+            gym_env_vectorize_mode: The gymnasium vectorization mode for vector envs.
+                Must be a `gymnasium.envs.registration.VectorizeMode` (enum) value.
+                Default is SYNC. Set this to ASYNC to parallelize the individual sub
+                environments within the vector. This can speed up your EnvRunners
+                significantly when using heavier environments.
             num_cpus_per_env_runner: Number of CPUs to allocate per EnvRunner.
             num_gpus_per_env_runner: Number of GPUs to allocate per EnvRunner. This can
                 be fractional. This is usually needed only if your env itself requires a
@@ -1975,7 +1974,8 @@ def env_runners(
                     "larger 0!"
                 )
             self.num_envs_per_env_runner = num_envs_per_env_runner
-
+        if gym_env_vectorize_mode is not NotProvided:
+            self.gym_env_vectorize_mode = gym_env_vectorize_mode
         if num_cpus_per_env_runner is not NotProvided:
             self.num_cpus_per_env_runner = num_cpus_per_env_runner
         if num_gpus_per_env_runner is not NotProvided:
@@ -4381,6 +4381,18 @@ def _model_config_auto_includes(self) -> Dict[str, Any]:
     # -----------------------------------------------------------
     # Various validation methods for different types of settings.
     # -----------------------------------------------------------
+    def _validate_env_runner_settings(self) -> None:
+        allowed_vectorize_modes = set(
+            list(gym.envs.registration.VectorizeMode.__members__.keys())
+            + list(gym.envs.registration.VectorizeMode.__members__.values())
+        )
+        if self.gym_env_vectorize_mode not in allowed_vectorize_modes:
+            raise ValueError(
+                f"`gym_env_vectorize_mode` ({self.gym_env_vectorize_mode}) must be a "
+                "member of `gym.envs.registration.VectorizeMode`! Allowed values "
+                f"are {allowed_vectorize_modes}."
+            )
+
     def _validate_callbacks_settings(self) -> None:
         """Validates callbacks settings."""
         # Old API stack:

@@ -6,7 +6,6 @@
 
 import gymnasium as gym
 from gymnasium.wrappers.vector import DictInfoToList
-from gymnasium.envs.registration import VectorizeMode
 
 from ray.rllib.algorithms.algorithm_config import AlgorithmConfig
 from ray.rllib.callbacks.callbacks import RLlibCallback
@@ -637,15 +636,16 @@ def make_env(self) -> None:
                 env_context=env_ctx,
             )
         gym.register("rllib-single-agent-env-v0", entry_point=entry_point)
+        vectorize_mode = self.config.gym_env_vectorize_mode
 
         self.env = DictInfoToList(
             gym.make_vec(
                 "rllib-single-agent-env-v0",
                 num_envs=self.config.num_envs_per_env_runner,
                 vectorization_mode=(
-                    VectorizeMode.ASYNC
-                    if self.config.remote_worker_envs
-                    else VectorizeMode.SYNC
+                    vectorize_mode
+                    if isinstance(vectorize_mode, gym.envs.registration.VectorizeMode)
+                    else gym.envs.registration.VectorizeMode(vectorize_mode.lower())
                 ),
             )
         )

@@ -0,0 +1,137 @@
+"""Example demo'ing async gym vector envs, in which sub-envs have their own process.
+
+Setting up env vectorization works through setting the `config.num_envs_per_env_runner`
+value to > 1. However, by default the n sub-environments are stepped through
+sequentially, rather than in parallel.
+
+This script shows the effect of setting the `config.gym_env_vectorize_mode` from its
+default value of "SYNC" (all sub envs are located in the same EnvRunner process)
+to "ASYNC" (all sub envs in each EnvRunner get their own process).
+
+This example:
+    - shows, which config settings to change in order to switch from sub-envs being
+    stepped in sequence to each sub-envs owning its own process (and compute resource)
+    and thus the vector being stepped in parallel.
+    - shows, how this setup can increase EnvRunner performance significantly, especially
+    for heavier, slower environments.
+    - uses an artificially slow CartPole-v1 environment for demonstration purposes.
+
+
+How to run this script
+----------------------
+`python [script file name].py --enable-new-api-stack `
+
+Use the `--vectorize-mode=BOTH` option to run both modes (SYNC and ASYNC)
+through Tune at the same time and get a better comparison of the throughputs
+achieved.
+
+For debugging, use the following additional command line options
+`--no-tune --num-env-runners=0`
+which should allow you to set breakpoints anywhere in the RLlib code and
+have the execution stop there for inspection and debugging.
+
+For logging to your WandB account, use:
+`--wandb-key=[your WandB API key] --wandb-project=[some project name]
+--wandb-run-name=[optional: WandB run name (within the defined project)]`
+
+
+Results to expect
+-----------------
+You should see results similar to the following in your console output
+when using the
+
++--------------------------+------------+------------------------+------+
+| Trial name               | status     | gym_env_vectorize_mode | iter |
+|                          |            |                        |      |
+|--------------------------+------------+------------------------+------+
+| PPO_slow-env_6ddf4_00000 | TERMINATED | SYNC                   |    4 |
+| PPO_slow-env_6ddf4_00001 | TERMINATED | ASYNC                  |    4 |
++--------------------------+------------+------------------------+------+
++------------------+----------------------+------------------------+
+|   total time (s) |  episode_return_mean |   num_env_steps_sample |
+|                  |                      |             d_lifetime |
+|------------------+----------------------+------------------------+
+|          60.8794 |                73.53 |                  16040 |
+|          19.1203 |                73.86 |                  16037 |
++------------------+----------------------+------------------------+
+
+You can see that the ASYNC mode, given that the env is sufficiently slow,
+achieves much better results when using vectorization.
+
+You should see no difference, however, when only using
+`--num-envs-per-env-runner=1`.
+"""
+import time
+
+import gymnasium as gym
+
+from ray.rllib.algorithms.ppo import PPOConfig
+from ray.rllib.utils.test_utils import (
+    add_rllib_example_script_args,
+    run_rllib_example_script_experiment,
+)
+from ray import tune
+
+parser = add_rllib_example_script_args(default_reward=60.0)
+parser.set_defaults(
+    enable_new_api_stack=True,
+    env="CartPole-v1",
+    num_envs_per_env_runner=6,
+)
+parser.add_argument(
+    "--vectorize-mode",
+    type=str,
+    default="ASYNC",
+    help="The value `gym.envs.registration.VectorizeMode` to use for env "
+    "vectorization. SYNC steps through all sub-envs in sequence. ASYNC (default) "
+    "parallelizes sub-envs through multiprocessing and can speed up EnvRunners "
+    "significantly. Use the special value `BOTH` to run both ASYNC and SYNC through a "
+    "Tune grid-search.",
+)
+
+
+class SlowEnv(gym.ObservationWrapper):
+    def observation(self, observation):
+        time.sleep(0.005)
+        return observation
+
+
+if __name__ == "__main__":
+    args = parser.parse_args()
+
+    # Wrap the env with the slowness wrapper.
+    def _env_creator(cfg):
+        return SlowEnv(gym.make(args.env, **cfg))
+
+    tune.register_env("slow-env", _env_creator)
+
+    if args.vectorize_mode == "BOTH" and args.no_tune:
+        raise ValueError(
+            "`--vectorize-mode=BOTH` and `--no-tune` not allowed in combination!"
+        )
+
+    base_config = (
+        PPOConfig()
+        .environment("slow-env")
+        .env_runners(
+            gym_env_vectorize_mode=(
+                tune.grid_search(["SYNC", "ASYNC"])
+                if args.vectorize_mode == "BOTH"
+                else args.vectorize_mode
+            ),
+        )
+    )
+
+    results = run_rllib_example_script_experiment(base_config, args)
+
+    # Compare the throughputs and assert that ASYNC is much faster than SYNC.
+    if args.vectorize_mode == "BOTH" and args.as_test:
+        throughput_sync = (
+            results[0].metrics["num_env_steps_sampled_lifetime"]
+            / results[0].metrics["time_total_s"]
+        )
+        throughput_async = (
+            results[1].metrics["num_env_steps_sampled_lifetime"]
+            / results[1].metrics["time_total_s"]
+        )
+        assert throughput_async > throughput_sync
@@ -1087,9 +1087,11 @@ def run_rllib_example_script_experiment(
                 enable_env_runner_and_connector_v2=False,
             )
 
-        # Define EnvRunner/RolloutWorker scaling and behavior.
+        # Define EnvRunner scaling and behavior.
         if args.num_env_runners is not None:
             config.env_runners(num_env_runners=args.num_env_runners)
+        if args.num_envs_per_env_runner is not None:
+            config.env_runners(num_envs_per_env_runner=args.num_envs_per_env_runner)
 
         # Define compute resources used automatically (only using the --num-learners
         # and --num-gpus-per-learner args).