Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[RLlib] Rename MultiAgent...RLModule... into MultiRL...Module for more generality. #46840

Merged
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 2 additions & 4 deletions doc/source/rllib/doc_code/catalog_guide.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@
# __sphinx_doc_algo_configs_begin__
from ray.rllib.algorithms.ppo.ppo_catalog import PPOCatalog
from ray.rllib.algorithms.ppo import PPOConfig
from ray.rllib.core.rl_module.rl_module import SingleAgentRLModuleSpec
from ray.rllib.core.rl_module.rl_module import RLModuleSpec


class MyPPOCatalog(PPOCatalog):
Expand All @@ -119,9 +119,7 @@ def __init__(self, *args, **kwargs):
)

# Specify the catalog to use for the PPORLModule.
config = config.rl_module(
rl_module_spec=SingleAgentRLModuleSpec(catalog_class=MyPPOCatalog)
)
config = config.rl_module(rl_module_spec=RLModuleSpec(catalog_class=MyPPOCatalog))
# This is how RLlib constructs a PPORLModule
# It will say "Hi from within PPORLModule!".
ppo = config.build()
Expand Down
67 changes: 32 additions & 35 deletions doc/source/rllib/doc_code/rlmodule_guide.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,12 +28,12 @@

# __constructing-rlmodules-sa-begin__
import gymnasium as gym
from ray.rllib.core.rl_module.rl_module import SingleAgentRLModuleSpec
from ray.rllib.core.rl_module.rl_module import RLModuleSpec
from ray.rllib.core.testing.torch.bc_module import DiscreteBCTorchModule

env = gym.make("CartPole-v1")

spec = SingleAgentRLModuleSpec(
spec = RLModuleSpec(
module_class=DiscreteBCTorchModule,
observation_space=env.observation_space,
action_space=env.action_space,
Expand All @@ -46,19 +46,19 @@

# __constructing-rlmodules-ma-begin__
import gymnasium as gym
from ray.rllib.core.rl_module.rl_module import SingleAgentRLModuleSpec
from ray.rllib.core.rl_module.marl_module import MultiAgentRLModuleSpec
from ray.rllib.core.rl_module.rl_module import RLModuleSpec
from ray.rllib.core.rl_module.multi_rl_module import MultiRLModuleSpec
from ray.rllib.core.testing.torch.bc_module import DiscreteBCTorchModule

spec = MultiAgentRLModuleSpec(
spec = MultiRLModuleSpec(
module_specs={
"module_1": SingleAgentRLModuleSpec(
"module_1": RLModuleSpec(
module_class=DiscreteBCTorchModule,
observation_space=gym.spaces.Box(low=-1, high=1, shape=(10,)),
action_space=gym.spaces.Discrete(2),
model_config_dict={"fcnet_hiddens": [32]},
),
"module_2": SingleAgentRLModuleSpec(
"module_2": RLModuleSpec(
module_class=DiscreteBCTorchModule,
observation_space=gym.spaces.Box(low=-1, high=1, shape=(5,)),
action_space=gym.spaces.Discrete(2),
Expand All @@ -67,13 +67,13 @@
},
)

marl_module = spec.build()
multi_rl_module = spec.build()
# __constructing-rlmodules-ma-end__


# __pass-specs-to-configs-sa-begin__
import gymnasium as gym
from ray.rllib.core.rl_module.rl_module import SingleAgentRLModuleSpec
from ray.rllib.core.rl_module.rl_module import RLModuleSpec
from ray.rllib.core.testing.torch.bc_module import DiscreteBCTorchModule
from ray.rllib.core.testing.bc_algorithm import BCConfigTest

Expand All @@ -84,7 +84,7 @@
.environment("CartPole-v1")
.rl_module(
model_config_dict={"fcnet_hiddens": [32, 32]},
rl_module_spec=SingleAgentRLModuleSpec(module_class=DiscreteBCTorchModule),
rl_module_spec=RLModuleSpec(module_class=DiscreteBCTorchModule),
)
)

Expand All @@ -94,8 +94,8 @@

# __pass-specs-to-configs-ma-begin__
import gymnasium as gym
from ray.rllib.core.rl_module.rl_module import SingleAgentRLModuleSpec
from ray.rllib.core.rl_module.marl_module import MultiAgentRLModuleSpec
from ray.rllib.core.rl_module.rl_module import RLModuleSpec
from ray.rllib.core.rl_module.multi_rl_module import MultiRLModuleSpec
from ray.rllib.core.testing.torch.bc_module import DiscreteBCTorchModule
from ray.rllib.core.testing.bc_algorithm import BCConfigTest
from ray.rllib.examples.envs.classes.multi_agent import MultiAgentCartPole
Expand All @@ -107,8 +107,8 @@
.environment(MultiAgentCartPole, env_config={"num_agents": 2})
.rl_module(
model_config_dict={"fcnet_hiddens": [32, 32]},
rl_module_spec=MultiAgentRLModuleSpec(
module_specs=SingleAgentRLModuleSpec(module_class=DiscreteBCTorchModule)
rl_module_spec=MultiRLModuleSpec(
module_specs=RLModuleSpec(module_class=DiscreteBCTorchModule)
),
)
)
Expand All @@ -117,19 +117,19 @@

# __convert-sa-to-ma-begin__
import gymnasium as gym
from ray.rllib.core.rl_module.rl_module import SingleAgentRLModuleSpec
from ray.rllib.core.rl_module.rl_module import RLModuleSpec
from ray.rllib.core.testing.torch.bc_module import DiscreteBCTorchModule

env = gym.make("CartPole-v1")
spec = SingleAgentRLModuleSpec(
spec = RLModuleSpec(
module_class=DiscreteBCTorchModule,
observation_space=env.observation_space,
action_space=env.action_space,
model_config_dict={"fcnet_hiddens": [64]},
)

module = spec.build()
marl_module = module.as_multi_agent()
multi_rl_module = module.as_multi_rl_module()
# __convert-sa-to-ma-end__


Expand Down Expand Up @@ -279,12 +279,9 @@ def output_specs_exploration(self) -> SpecType:
# __extend-spec-checking-type-specs-end__


# __write-custom-marlmodule-shared-enc-begin__
# __write-custom-multirlmodule-shared-enc-begin__
from ray.rllib.core.rl_module.torch.torch_rl_module import TorchRLModule
from ray.rllib.core.rl_module.marl_module import (
MultiAgentRLModuleConfig,
MultiAgentRLModule,
)
from ray.rllib.core.rl_module.multi_rl_module import MultiRLModuleConfig, MultiRLModule

import torch
import torch.nn as nn
Expand Down Expand Up @@ -325,8 +322,8 @@ def _common_forward(self, batch):
return {"action_dist": torch.distributions.Categorical(logits=action_logits)}


class BCTorchMultiAgentModuleWithSharedEncoder(MultiAgentRLModule):
def __init__(self, config: MultiAgentRLModuleConfig) -> None:
class BCTorchMultiAgentModuleWithSharedEncoder(MultiRLModule):
def __init__(self, config: MultiRLModuleConfig) -> None:
super().__init__(config)

def setup(self):
Expand All @@ -353,18 +350,18 @@ def setup(self):
self._rl_modules = rl_modules


# __write-custom-marlmodule-shared-enc-end__
# __write-custom-multirlmodule-shared-enc-end__


# __pass-custom-marlmodule-shared-enc-begin__
# __pass-custom-multirlmodule-shared-enc-begin__
import gymnasium as gym
from ray.rllib.core.rl_module.rl_module import SingleAgentRLModuleSpec
from ray.rllib.core.rl_module.marl_module import MultiAgentRLModuleSpec
from ray.rllib.core.rl_module.rl_module import RLModuleSpec
from ray.rllib.core.rl_module.multi_rl_module import MultiRLModuleSpec

spec = MultiAgentRLModuleSpec(
marl_module_class=BCTorchMultiAgentModuleWithSharedEncoder,
spec = MultiRLModuleSpec(
multi_rl_module_class=BCTorchMultiAgentModuleWithSharedEncoder,
module_specs={
"local_2d": SingleAgentRLModuleSpec(
"local_2d": RLModuleSpec(
observation_space=gym.spaces.Dict(
{
"global": gym.spaces.Box(low=-1, high=1, shape=(2,)),
Expand All @@ -374,7 +371,7 @@ def setup(self):
action_space=gym.spaces.Discrete(2),
model_config_dict={"fcnet_hiddens": [64]},
),
"local_5d": SingleAgentRLModuleSpec(
"local_5d": RLModuleSpec(
observation_space=gym.spaces.Dict(
{
"global": gym.spaces.Box(low=-1, high=1, shape=(2,)),
Expand All @@ -388,7 +385,7 @@ def setup(self):
)

module = spec.build()
# __pass-custom-marlmodule-shared-enc-end__
# __pass-custom-multirlmodule-shared-enc-end__


# __checkpointing-begin__
Expand All @@ -398,7 +395,7 @@ def setup(self):
from ray.rllib.algorithms.ppo import PPOConfig
from ray.rllib.algorithms.ppo.ppo_catalog import PPOCatalog
from ray.rllib.algorithms.ppo.torch.ppo_torch_rl_module import PPOTorchRLModule
from ray.rllib.core.rl_module.rl_module import RLModule, SingleAgentRLModuleSpec
from ray.rllib.core.rl_module.rl_module import RLModule, RLModuleSpec

config = (
PPOConfig()
Expand All @@ -407,7 +404,7 @@ def setup(self):
)
env = gym.make("CartPole-v1")
# Create an RL Module that we would like to checkpoint
module_spec = SingleAgentRLModuleSpec(
module_spec = RLModuleSpec(
module_class=PPOTorchRLModule,
observation_space=env.observation_space,
action_space=env.action_space,
Expand Down
2 changes: 1 addition & 1 deletion doc/source/rllib/key-concepts.rst
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,7 @@ implement reinforcement learning policies in RLlib and can therefore be found in
where their exploration and inference logic is used to sample from an environment.
The second place in RLlib where RL Modules commonly occur is the :py:class:`~ray.rllib.core.learner.learner.Learner`,
where their training logic is used in training the neural network.
RL Modules extend to the multi-agent case, where a single :py:class:`~ray.rllib.core.rl_module.marl_module.MultiAgentRLModule`
RL Modules extend to the multi-agent case, where a single :py:class:`~ray.rllib.core.rl_module.multi_rl_module.MultiRLModule`
contains multiple RL Modules. The following figure is a rough sketch of how the above can look in practice:

.. image:: images/rllib-concepts-rlmodules-sketch.png
Expand Down
2 changes: 1 addition & 1 deletion doc/source/rllib/package_ref/algorithm.rst
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,7 @@ Getter methods
~AlgorithmConfig.get_default_learner_class
~AlgorithmConfig.get_default_rl_module_spec
~AlgorithmConfig.get_evaluation_config_object
~AlgorithmConfig.get_marl_module_spec
~AlgorithmConfig.get_multi_rl_module_spec
~AlgorithmConfig.get_multi_agent_setup
~AlgorithmConfig.get_rollout_fragment_length

Expand Down
36 changes: 18 additions & 18 deletions doc/source/rllib/package_ref/rl_modules.rst
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,9 @@ Single Agent
:nosignatures:
:toctree: doc/

SingleAgentRLModuleSpec
SingleAgentRLModuleSpec.build
SingleAgentRLModuleSpec.get_rl_module_config
RLModuleSpec
RLModuleSpec.build
RLModuleSpec.get_rl_module_config

RLModule Configuration
+++++++++++++++++++++++
Expand All @@ -39,18 +39,18 @@ RLModule Configuration
RLModuleConfig.from_dict
RLModuleConfig.get_catalog

Multi Agent
++++++++++++
Multi RLModule (multi-agent)
++++++++++++++++++++++++++++

.. currentmodule:: ray.rllib.core.rl_module.marl_module
.. currentmodule:: ray.rllib.core.rl_module.multi_rl_module

.. autosummary::
:nosignatures:
:toctree: doc/

MultiAgentRLModuleSpec
MultiAgentRLModuleSpec.build
MultiAgentRLModuleSpec.get_marl_config
MultiRLModuleSpec
MultiRLModuleSpec.build
MultiRLModuleSpec.get_multi_rl_module_config



Expand All @@ -68,7 +68,7 @@ Constructor
:toctree: doc/

RLModule
RLModule.as_multi_agent
RLModule.as_multi_rl_module


Forward methods
Expand Down Expand Up @@ -119,7 +119,7 @@ Saving and Loading
Multi Agent RL Module API
-------------------------

.. currentmodule:: ray.rllib.core.rl_module.marl_module
.. currentmodule:: ray.rllib.core.rl_module.multi_rl_module

Constructor
+++++++++++
Expand All @@ -128,9 +128,9 @@ Constructor
:nosignatures:
:toctree: doc/

MultiAgentRLModule
MultiAgentRLModule.setup
MultiAgentRLModule.as_multi_agent
MultiRLModule
MultiRLModule.setup
MultiRLModule.as_multi_rl_module

Modifying the underlying RL modules
++++++++++++++++++++++++++++++++++++
Expand All @@ -139,8 +139,8 @@ Modifying the underlying RL modules
:nosignatures:
:toctree: doc/

~MultiAgentRLModule.add_module
~MultiAgentRLModule.remove_module
~MultiRLModule.add_module
~MultiRLModule.remove_module

Saving and Loading
++++++++++++++++++++++
Expand All @@ -149,5 +149,5 @@ Saving and Loading
:nosignatures:
:toctree: doc/

~MultiAgentRLModule.save_state
~MultiAgentRLModule.load_state
~MultiRLModule.save_to_path
~MultiRLModule.restore_from_path
6 changes: 3 additions & 3 deletions doc/source/rllib/rllib-catalogs.rst
Original file line number Diff line number Diff line change
Expand Up @@ -146,9 +146,9 @@ Since Catalogs effectively control what ``models`` and ``distributions`` RLlib u
they are also part of RLlib’s configurations. As the primary entry point for configuring RLlib,
:py:class:`~ray.rllib.algorithms.algorithm_config.AlgorithmConfig` is the place where you can configure the
Catalogs of the RLModules that are created.
You set the ``catalog class`` by going through the :py:class:`~ray.rllib.core.rl_module.rl_module.SingleAgentRLModuleSpec`
or :py:class:`~ray.rllib.core.rl_module.marl_module.MultiAgentRLModuleSpec` of an AlgorithmConfig.
For example, in heterogeneous multi-agent cases, you modify the MultiAgentRLModuleSpec.
You set the ``catalog class`` by going through the :py:class:`~ray.rllib.core.rl_module.rl_module.RLModuleSpec`
or :py:class:`~ray.rllib.core.rl_module.multi_rl_module.MultiRLModuleSpec` of an AlgorithmConfig.
For example, in heterogeneous multi-agent cases, you modify the MultiRLModuleSpec.

.. image:: images/catalog/catalog_rlmspecs_diagram.svg
:align: center
Expand Down
2 changes: 1 addition & 1 deletion doc/source/rllib/rllib-learner.rst
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,7 @@ and :py:class:`~ray.rllib.core.learner.learner.Learner` APIs via the :py:class:`

import ray
from ray.rllib.algorithms.ppo import PPOConfig
from ray.rllib.core.rl_module.rl_module import SingleAgentRLModuleSpec
from ray.rllib.core.rl_module.rl_module import RLModuleSpec
from ray.rllib.core.learner.learner_group import LearnerGroup


Expand Down
Loading