ray-project · sven1977 · Jul 30, 2024 · Jul 29, 2024 · Jul 29, 2024 · Jul 29, 2024
@@ -102,7 +102,7 @@
 # __sphinx_doc_algo_configs_begin__
 from ray.rllib.algorithms.ppo.ppo_catalog import PPOCatalog
 from ray.rllib.algorithms.ppo import PPOConfig
-from ray.rllib.core.rl_module.rl_module import SingleAgentRLModuleSpec
+from ray.rllib.core.rl_module.rl_module import RLModuleSpec
 
 
 class MyPPOCatalog(PPOCatalog):
@@ -119,9 +119,7 @@ def __init__(self, *args, **kwargs):
 )
 
 # Specify the catalog to use for the PPORLModule.
-config = config.rl_module(
-    rl_module_spec=SingleAgentRLModuleSpec(catalog_class=MyPPOCatalog)
-)
+config = config.rl_module(rl_module_spec=RLModuleSpec(catalog_class=MyPPOCatalog))
 # This is how RLlib constructs a PPORLModule
 # It will say "Hi from within PPORLModule!".
 ppo = config.build()

@@ -28,12 +28,12 @@
 
 # __constructing-rlmodules-sa-begin__
 import gymnasium as gym
-from ray.rllib.core.rl_module.rl_module import SingleAgentRLModuleSpec
+from ray.rllib.core.rl_module.rl_module import RLModuleSpec
 from ray.rllib.core.testing.torch.bc_module import DiscreteBCTorchModule
 
 env = gym.make("CartPole-v1")
 
-spec = SingleAgentRLModuleSpec(
+spec = RLModuleSpec(
     module_class=DiscreteBCTorchModule,
     observation_space=env.observation_space,
     action_space=env.action_space,
@@ -46,19 +46,19 @@
 
 # __constructing-rlmodules-ma-begin__
 import gymnasium as gym
-from ray.rllib.core.rl_module.rl_module import SingleAgentRLModuleSpec
-from ray.rllib.core.rl_module.marl_module import MultiAgentRLModuleSpec
+from ray.rllib.core.rl_module.rl_module import RLModuleSpec
+from ray.rllib.core.rl_module.multi_rl_module import MultiRLModuleSpec
 from ray.rllib.core.testing.torch.bc_module import DiscreteBCTorchModule
 
-spec = MultiAgentRLModuleSpec(
+spec = MultiRLModuleSpec(
     module_specs={
-        "module_1": SingleAgentRLModuleSpec(
+        "module_1": RLModuleSpec(
             module_class=DiscreteBCTorchModule,
             observation_space=gym.spaces.Box(low=-1, high=1, shape=(10,)),
             action_space=gym.spaces.Discrete(2),
             model_config_dict={"fcnet_hiddens": [32]},
         ),
-        "module_2": SingleAgentRLModuleSpec(
+        "module_2": RLModuleSpec(
             module_class=DiscreteBCTorchModule,
             observation_space=gym.spaces.Box(low=-1, high=1, shape=(5,)),
             action_space=gym.spaces.Discrete(2),
@@ -67,13 +67,13 @@
     },
 )
 
-marl_module = spec.build()
+multi_rl_module = spec.build()
 # __constructing-rlmodules-ma-end__
 
 
 # __pass-specs-to-configs-sa-begin__
 import gymnasium as gym
-from ray.rllib.core.rl_module.rl_module import SingleAgentRLModuleSpec
+from ray.rllib.core.rl_module.rl_module import RLModuleSpec
 from ray.rllib.core.testing.torch.bc_module import DiscreteBCTorchModule
 from ray.rllib.core.testing.bc_algorithm import BCConfigTest
 
@@ -84,7 +84,7 @@
     .environment("CartPole-v1")
     .rl_module(
         model_config_dict={"fcnet_hiddens": [32, 32]},
-        rl_module_spec=SingleAgentRLModuleSpec(module_class=DiscreteBCTorchModule),
+        rl_module_spec=RLModuleSpec(module_class=DiscreteBCTorchModule),
     )
 )
 
@@ -94,8 +94,8 @@
 
 # __pass-specs-to-configs-ma-begin__
 import gymnasium as gym
-from ray.rllib.core.rl_module.rl_module import SingleAgentRLModuleSpec
-from ray.rllib.core.rl_module.marl_module import MultiAgentRLModuleSpec
+from ray.rllib.core.rl_module.rl_module import RLModuleSpec
+from ray.rllib.core.rl_module.multi_rl_module import MultiRLModuleSpec
 from ray.rllib.core.testing.torch.bc_module import DiscreteBCTorchModule
 from ray.rllib.core.testing.bc_algorithm import BCConfigTest
 from ray.rllib.examples.envs.classes.multi_agent import MultiAgentCartPole
@@ -107,8 +107,8 @@
     .environment(MultiAgentCartPole, env_config={"num_agents": 2})
     .rl_module(
         model_config_dict={"fcnet_hiddens": [32, 32]},
-        rl_module_spec=MultiAgentRLModuleSpec(
-            module_specs=SingleAgentRLModuleSpec(module_class=DiscreteBCTorchModule)
+        rl_module_spec=MultiRLModuleSpec(
+            module_specs=RLModuleSpec(module_class=DiscreteBCTorchModule)
         ),
     )
 )
@@ -117,19 +117,19 @@
 
 # __convert-sa-to-ma-begin__
 import gymnasium as gym
-from ray.rllib.core.rl_module.rl_module import SingleAgentRLModuleSpec
+from ray.rllib.core.rl_module.rl_module import RLModuleSpec
 from ray.rllib.core.testing.torch.bc_module import DiscreteBCTorchModule
 
 env = gym.make("CartPole-v1")
-spec = SingleAgentRLModuleSpec(
+spec = RLModuleSpec(
     module_class=DiscreteBCTorchModule,
     observation_space=env.observation_space,
     action_space=env.action_space,
     model_config_dict={"fcnet_hiddens": [64]},
 )
 
 module = spec.build()
-marl_module = module.as_multi_agent()
+multi_rl_module = module.as_multi_rl_module()
 # __convert-sa-to-ma-end__
 
 
@@ -279,12 +279,9 @@ def output_specs_exploration(self) -> SpecType:
 # __extend-spec-checking-type-specs-end__
 
 
-# __write-custom-marlmodule-shared-enc-begin__
+# __write-custom-multirlmodule-shared-enc-begin__
 from ray.rllib.core.rl_module.torch.torch_rl_module import TorchRLModule
-from ray.rllib.core.rl_module.marl_module import (
-    MultiAgentRLModuleConfig,
-    MultiAgentRLModule,
-)
+from ray.rllib.core.rl_module.multi_rl_module import MultiRLModuleConfig, MultiRLModule
 
 import torch
 import torch.nn as nn
@@ -325,8 +322,8 @@ def _common_forward(self, batch):
         return {"action_dist": torch.distributions.Categorical(logits=action_logits)}
 
 
-class BCTorchMultiAgentModuleWithSharedEncoder(MultiAgentRLModule):
-    def __init__(self, config: MultiAgentRLModuleConfig) -> None:
+class BCTorchMultiAgentModuleWithSharedEncoder(MultiRLModule):
+    def __init__(self, config: MultiRLModuleConfig) -> None:
         super().__init__(config)
 
     def setup(self):
@@ -353,18 +350,18 @@ def setup(self):
         self._rl_modules = rl_modules
 
 
-# __write-custom-marlmodule-shared-enc-end__
+# __write-custom-multirlmodule-shared-enc-end__
 
 
-# __pass-custom-marlmodule-shared-enc-begin__
+# __pass-custom-multirlmodule-shared-enc-begin__
 import gymnasium as gym
-from ray.rllib.core.rl_module.rl_module import SingleAgentRLModuleSpec
-from ray.rllib.core.rl_module.marl_module import MultiAgentRLModuleSpec
+from ray.rllib.core.rl_module.rl_module import RLModuleSpec
+from ray.rllib.core.rl_module.multi_rl_module import MultiRLModuleSpec
 
-spec = MultiAgentRLModuleSpec(
-    marl_module_class=BCTorchMultiAgentModuleWithSharedEncoder,
+spec = MultiRLModuleSpec(
+    multi_rl_module_class=BCTorchMultiAgentModuleWithSharedEncoder,
     module_specs={
-        "local_2d": SingleAgentRLModuleSpec(
+        "local_2d": RLModuleSpec(
             observation_space=gym.spaces.Dict(
                 {
                     "global": gym.spaces.Box(low=-1, high=1, shape=(2,)),
@@ -374,7 +371,7 @@ def setup(self):
             action_space=gym.spaces.Discrete(2),
             model_config_dict={"fcnet_hiddens": [64]},
         ),
-        "local_5d": SingleAgentRLModuleSpec(
+        "local_5d": RLModuleSpec(
             observation_space=gym.spaces.Dict(
                 {
                     "global": gym.spaces.Box(low=-1, high=1, shape=(2,)),
@@ -388,7 +385,7 @@ def setup(self):
 )
 
 module = spec.build()
-# __pass-custom-marlmodule-shared-enc-end__
+# __pass-custom-multirlmodule-shared-enc-end__
 
 
 # __checkpointing-begin__
@@ -398,7 +395,7 @@ def setup(self):
 from ray.rllib.algorithms.ppo import PPOConfig
 from ray.rllib.algorithms.ppo.ppo_catalog import PPOCatalog
 from ray.rllib.algorithms.ppo.torch.ppo_torch_rl_module import PPOTorchRLModule
-from ray.rllib.core.rl_module.rl_module import RLModule, SingleAgentRLModuleSpec
+from ray.rllib.core.rl_module.rl_module import RLModule, RLModuleSpec
 
 config = (
     PPOConfig()
@@ -407,7 +404,7 @@ def setup(self):
 )
 env = gym.make("CartPole-v1")
 # Create an RL Module that we would like to checkpoint
-module_spec = SingleAgentRLModuleSpec(
+module_spec = RLModuleSpec(
     module_class=PPOTorchRLModule,
     observation_space=env.observation_space,
     action_space=env.action_space,

@@ -122,7 +122,7 @@ implement reinforcement learning policies in RLlib and can therefore be found in
 where their exploration and inference logic is used to sample from an environment.
 The second place in RLlib where RL Modules commonly occur is the :py:class:`~ray.rllib.core.learner.learner.Learner`,
 where their training logic is used in training the neural network.
-RL Modules extend to the multi-agent case, where a single :py:class:`~ray.rllib.core.rl_module.marl_module.MultiAgentRLModule`
+RL Modules extend to the multi-agent case, where a single :py:class:`~ray.rllib.core.rl_module.multi_rl_module.MultiRLModule`
 contains multiple RL Modules. The following figure is a rough sketch of how the above can look in practice:
 
 .. image:: images/rllib-concepts-rlmodules-sketch.png

@@ -99,7 +99,7 @@ Getter methods
     ~AlgorithmConfig.get_default_learner_class
     ~AlgorithmConfig.get_default_rl_module_spec
     ~AlgorithmConfig.get_evaluation_config_object
-    ~AlgorithmConfig.get_marl_module_spec
+    ~AlgorithmConfig.get_multi_rl_module_spec
     ~AlgorithmConfig.get_multi_agent_setup
     ~AlgorithmConfig.get_rollout_fragment_length
 

@@ -23,9 +23,9 @@ Single Agent
     :nosignatures:
     :toctree: doc/
 
-    SingleAgentRLModuleSpec
-    SingleAgentRLModuleSpec.build
-    SingleAgentRLModuleSpec.get_rl_module_config
+    RLModuleSpec
+    RLModuleSpec.build
+    RLModuleSpec.get_rl_module_config
 
 RLModule Configuration
 +++++++++++++++++++++++
@@ -39,18 +39,18 @@ RLModule Configuration
     RLModuleConfig.from_dict
     RLModuleConfig.get_catalog
 
-Multi Agent
-++++++++++++
+Multi RLModule (multi-agent)
+++++++++++++++++++++++++++++
 
-.. currentmodule:: ray.rllib.core.rl_module.marl_module
+.. currentmodule:: ray.rllib.core.rl_module.multi_rl_module
 
 .. autosummary::
     :nosignatures:
     :toctree: doc/
 
-    MultiAgentRLModuleSpec
-    MultiAgentRLModuleSpec.build
-    MultiAgentRLModuleSpec.get_marl_config
+    MultiRLModuleSpec
+    MultiRLModuleSpec.build
+    MultiRLModuleSpec.get_multi_rl_module_config
 
 
 
@@ -68,7 +68,7 @@ Constructor
     :toctree: doc/
 
     RLModule
-    RLModule.as_multi_agent
+    RLModule.as_multi_rl_module
 
 
 Forward methods
@@ -119,7 +119,7 @@ Saving and Loading
 Multi Agent RL Module API
 -------------------------
 
-.. currentmodule:: ray.rllib.core.rl_module.marl_module
+.. currentmodule:: ray.rllib.core.rl_module.multi_rl_module
 
 Constructor
 +++++++++++
@@ -128,9 +128,9 @@ Constructor
     :nosignatures:
     :toctree: doc/
 
-    MultiAgentRLModule
-    MultiAgentRLModule.setup
-    MultiAgentRLModule.as_multi_agent
+    MultiRLModule
+    MultiRLModule.setup
+    MultiRLModule.as_multi_rl_module
 
 Modifying the underlying RL modules
 ++++++++++++++++++++++++++++++++++++
@@ -139,8 +139,8 @@ Modifying the underlying RL modules
     :nosignatures:
     :toctree: doc/
 
-    ~MultiAgentRLModule.add_module
-    ~MultiAgentRLModule.remove_module
+    ~MultiRLModule.add_module
+    ~MultiRLModule.remove_module
 
 Saving and Loading
 ++++++++++++++++++++++
@@ -149,5 +149,5 @@ Saving and Loading
     :nosignatures:
     :toctree: doc/
 
-    ~MultiAgentRLModule.save_state
-    ~MultiAgentRLModule.load_state
+    ~MultiRLModule.save_to_path
+    ~MultiRLModule.restore_from_path
@@ -146,9 +146,9 @@ Since Catalogs effectively control what ``models`` and ``distributions`` RLlib u
 they are also part of RLlib’s configurations. As the primary entry point for configuring RLlib,
 :py:class:`~ray.rllib.algorithms.algorithm_config.AlgorithmConfig` is the place where you can configure the
 Catalogs of the RLModules that are created.
-You set the ``catalog class`` by going through the :py:class:`~ray.rllib.core.rl_module.rl_module.SingleAgentRLModuleSpec`
-or :py:class:`~ray.rllib.core.rl_module.marl_module.MultiAgentRLModuleSpec` of an AlgorithmConfig.
-For example, in heterogeneous multi-agent cases, you modify the MultiAgentRLModuleSpec.
+You set the ``catalog class`` by going through the :py:class:`~ray.rllib.core.rl_module.rl_module.RLModuleSpec`
+or :py:class:`~ray.rllib.core.rl_module.multi_rl_module.MultiRLModuleSpec` of an AlgorithmConfig.
+For example, in heterogeneous multi-agent cases, you modify the MultiRLModuleSpec.
 
 .. image:: images/catalog/catalog_rlmspecs_diagram.svg
     :align: center

@@ -115,7 +115,7 @@ and :py:class:`~ray.rllib.core.learner.learner.Learner` APIs via the :py:class:`
 
     import ray
     from ray.rllib.algorithms.ppo import PPOConfig
-    from ray.rllib.core.rl_module.rl_module import SingleAgentRLModuleSpec
+    from ray.rllib.core.rl_module.rl_module import RLModuleSpec
     from ray.rllib.core.learner.learner_group import LearnerGroup