[RLlib] Cleanup examples folder vol. 24: Add example script for how t…

…o log custom metrics in `training_step()`. (#49976)
ray-project · Jan 21, 2025 · 2cd22aa · 2cd22aa
1 parent a918028
commit 2cd22aa
Show file tree

Hide file tree

Showing 10 changed files with 220 additions and 54 deletions.
diff --git a/doc/source/rllib/package_ref/algorithm.rst b/doc/source/rllib/package_ref/algorithm.rst
@@ -56,8 +56,8 @@ Algorithm API
 
 .. currentmodule:: ray.rllib.algorithms.algorithm
 
-Constructor
-~~~~~~~~~~~
+Construction and setup
+~~~~~~~~~~~~~~~~~~~~~~
 
 .. autosummary::
     :nosignatures:
@@ -66,50 +66,46 @@ Constructor
     ~Algorithm
     ~Algorithm.setup
     ~Algorithm.get_default_config
+    ~Algorithm.env_runner
+    ~Algorithm.eval_env_runner
+
 
-Inference and Evaluation
-~~~~~~~~~~~~~~~~~~~~~~~~
+Training
+~~~~~~~~
 .. autosummary::
     :nosignatures:
     :toctree: doc/
 
-    ~Algorithm.compute_actions
-    ~Algorithm.compute_single_action
-    ~Algorithm.evaluate
+    ~Algorithm.train
+    ~Algorithm.training_step
 
-Saving and Restoring
+Saving and restoring
 ~~~~~~~~~~~~~~~~~~~~
 .. autosummary::
     :nosignatures:
     :toctree: doc/
 
+    ~Algorithm.save_to_path
+    ~Algorithm.restore_from_path
     ~Algorithm.from_checkpoint
-    ~Algorithm.from_state
-    ~Algorithm.get_weights
-    ~Algorithm.set_weights
-    ~Algorithm.export_model
-    ~Algorithm.export_policy_checkpoint
-    ~Algorithm.export_policy_model
-    ~Algorithm.restore
-    ~Algorithm.restore_workers
-    ~Algorithm.save
-    ~Algorithm.save_checkpoint
+    ~Algorithm.get_state
+    ~Algorithm.set_state
 
 
-Training
-~~~~~~~~
+Evaluation
+~~~~~~~~~~
 .. autosummary::
     :nosignatures:
     :toctree: doc/
 
-    ~Algorithm.train
-    ~Algorithm.training_step
+    ~Algorithm.evaluate
 
 Multi Agent
 ~~~~~~~~~~~
 .. autosummary::
     :nosignatures:
     :toctree: doc/
 
+    ~Algorithm.get_module
     ~Algorithm.add_policy
     ~Algorithm.remove_policy
diff --git a/doc/source/rllib/package_ref/learner.rst b/doc/source/rllib/package_ref/learner.rst
@@ -135,8 +135,8 @@ Gradient computation
     Learner.apply_gradients
     Learner._get_clip_function
 
-Saving, loading, checkpointing, and restoring states
-----------------------------------------------------
+Saving and restoring
+--------------------
 
 .. autosummary::
     :nosignatures:

diff --git a/doc/source/rllib/package_ref/rl_modules.rst b/doc/source/rllib/package_ref/rl_modules.rst
@@ -100,8 +100,8 @@ Override these private methods to define your custom model's forward behavior.
     ~RLModule._forward_train
 
 
-Saving and Loading
-++++++++++++++++++++++
+Saving and restoring
+++++++++++++++++++++
 
 .. autosummary::
     :nosignatures:

diff --git a/doc/source/rllib/package_ref/utils.rst b/doc/source/rllib/package_ref/utils.rst
@@ -9,6 +9,42 @@ RLlib Utilities
 
 Here is a list of all the utilities available in RLlib.
 
+MetricsLogger API
+-----------------
+
+RLlib uses the MetricsLogger API to log stats and metrics for the various components. Users can also
+
+For example:
+
+.. testcode::
+
+    from ray.rllib.utils.metrics.metrics_logger import MetricsLogger
+
+    logger = MetricsLogger()
+
+    # Log a scalar float value under the `loss` key. By default, all logged
+    # values under that key are averaged, once `reduce()` is called.
+    logger.log_value("loss", 0.05, reduce="mean", window=2)
+    logger.log_value("loss", 0.1)
+    logger.log_value("loss", 0.2)
+
+    logger.peek("loss")  # expect: 0.15 (mean of last 2 values: 0.1 and 0.2)
+
+
+.. currentmodule:: ray.rllib.utils.metrics.metrics_logger
+
+.. autosummary::
+    :nosignatures:
+    :toctree: doc/
+
+    MetricsLogger
+    MetricsLogger.peek
+    MetricsLogger.log_value
+    MetricsLogger.log_dict
+    MetricsLogger.merge_and_log_n_dicts
+    MetricsLogger.log_time
+
+
 Scheduler API
 -------------
 

diff --git a/doc/source/rllib/rllib-callback.rst b/doc/source/rllib/rllib-callback.rst
@@ -225,8 +225,10 @@ The following are two examples showing you how to setup custom callbacks on the
 :ref:`EnvRunner <rllib-key-concepts-env-runners>` processes.
 
 
+.. _rllib-callback-example-on-train-result:
+
 Example 1: `on_train_result`
-----------------------------
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
 The following example demonstrates how to implement a simple custom function writing the replay buffer
 contents to disk from time to time.
@@ -272,8 +274,11 @@ controlled fashion through a custom callback could be a good compromise.
 
 See :ref:`Callbacks invoked in Algorithm <rllib-callback-reference-algorithm-bound>` for the exact call signatures of all available callbacks and the argument types that they expect.
 
+
+.. _rllib-callback-example-on-episode-step-and-end:
+
 Example 2: `on_episode_step` and `on_episode_end`
--------------------------------------------------
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
 The following example demonstrates how to implement a custom :py:class:`~ray.rllib.callbacks.callbacks.RLlibCallback` class
 computing the average "first-joint angle" of the

diff --git a/doc/source/rllib/rllib-examples.rst b/doc/source/rllib/rllib-examples.rst
@@ -254,6 +254,11 @@ Learners
 Metrics
 +++++++
 
+- `Logging custom metrics in Algorithm.training_step <https://github.com/ray-project/ray/blob/master/rllib/examples/metrics/custom_metrics_in_algorithm_training_step.py>`__:
+   Shows how to log custom metrics inside a custom :py:class:`~ray.rllib.algorithms.algorithm.Algorithm` through overriding
+   the :py:meth:`` method and making calls to the :py:meth:`~ray.rllib.utils.metrics.metrics_logger.MetricsLogger.log_value` method
+   of the :py:class:`~ray.rllib.utils.metrics.metrics_logger.MetricsLogger` instance.
+
 - `Logging custom metrics in EnvRunners <https://github.com/ray-project/ray/blob/master/rllib/examples/metrics/custom_metrics_in_env_runners.py>`__:
    Demonstrates adding custom metrics to :py:class:`~ray.rllib.env.env_runner.EnvRunner` actors, providing a way to track specific
    performance- and environment indicators beyond the standard RLlib metrics.

diff --git a/rllib/BUILD b/rllib/BUILD
@@ -2632,6 +2632,14 @@ py_test(
 # subdirectory: metrics/
 # ....................................
 
+py_test(
+    name = "examples/metrics/custom_metrics_in_algorithm_training_step",
+    main = "examples/metrics/custom_metrics_in_algorithm_training_step.py",
+    tags = ["team:rllib", "exclusive", "examples"],
+    size = "small",
+    srcs = ["examples/metrics/custom_metrics_in_algorithm_training_step.py"],
+    args = ["--enable-new-api-stack"]
+)
 py_test(
     name = "examples/metrics/custom_metrics_in_env_runners",
     main = "examples/metrics/custom_metrics_in_env_runners.py",

diff --git a/rllib/algorithms/algorithm.py b/rllib/algorithms/algorithm.py
@@ -183,6 +183,7 @@
 
 if TYPE_CHECKING:
     from ray.rllib.core.learner.learner_group import LearnerGroup
+    from ray.rllib.offline.offline_data import OfflineData
 
 try:
     from ray.rllib.extensions import AlgorithmBase
@@ -225,36 +226,45 @@ def _get_learner_bundles(
 
 @PublicAPI
 class Algorithm(Checkpointable, Trainable, AlgorithmBase):
-    """An RLlib algorithm responsible for optimizing one or more Policies.
-
-    Algorithms contain a EnvRunnerGroup under `self.env_runner_group`. An EnvRunnerGroup
-    is composed of a single local EnvRunner (`self.env_runner_group.local_env_runner`),
-    serving as the reference copy of the NeuralNetwork(s) to be trained and optionally
-    one or more remote EnvRunners used to generate environment samples in parallel.
-    EnvRunnerGroup is fault-tolerant and elastic. It tracks health states for all
-    the managed remote EnvRunner actors. As a result, Algorithm should never
-    access the underlying actor handles directly. Instead, always access them
-    via all the foreach APIs with assigned IDs of the underlying EnvRunners.
-
-    Each EnvRunners (remotes or local) contains a PolicyMap, which itself
-    may contain either one policy for single-agent training or one or more
-    policies for multi-agent training. Policies are synchronized
-    automatically from time to time using ray.remote calls. The exact
-    synchronization logic depends on the specific algorithm used,
-    but this usually happens from local worker to all remote workers and
-    after each training update.
+    """An RLlib algorithm responsible for training one or more neural network models.
 
     You can write your own Algorithm classes by sub-classing from `Algorithm`
-    or any of its built-in sub-classes.
-    This allows you to override the `training_step` method to implement
-    your own algorithm logic. You can find the different built-in
-    algorithms' `training_step()` methods in their respective main .py files,
-    e.g. rllib.algorithms.dqn.dqn.py or rllib.algorithms.impala.impala.py.
+    or any of its built-in subclasses.
+    Override the `training_step` method to implement your own algorithm logic.
+    Find the various built-in `training_step()` methods for different algorithms in
+    their respective [algo name].py files, for example:
+    `ray.rllib.algorithms.dqn.dqn.py` or `ray.rllib.algorithms.impala.impala.py`.
 
     The most important API methods a Algorithm exposes are `train()`,
     `evaluate()`, `save_to_path()` and `restore_from_path()`.
     """
 
+    #: The AlgorithmConfig instance of the Algorithm.
+    config: Optional[AlgorithmConfig] = None
+    #: The MetricsLogger instance of the Algorithm. RLlib uses this to log
+    #: metrics from within the `training_step()` method. Users can use it to log
+    #: metrics from within their custom Algorithm-based callbacks.
+    metrics: Optional[MetricsLogger] = None
+    #: The `EnvRunnerGroup` of the Algorithm. An `EnvRunnerGroup` is
+    #: composed of a single local `EnvRunner` (see: `self.env_runner`), serving as
+    #: the reference copy of the models to be trained and optionally one or more
+    #: remote `EnvRunners` used to generate training samples from the RL
+    #: environment, in parallel. EnvRunnerGroup is fault-tolerant and elastic. It
+    #: tracks health states for all the managed remote EnvRunner actors. As a
+    #: result, Algorithm should never access the underlying actor handles directly.
+    #: Instead, always access them via all the foreach APIs with assigned IDs of
+    #: the underlying EnvRunners.
+    env_runner_group: Optional[EnvRunnerGroup] = None
+    #: A special EnvRunnerGroup only used for evaluation, not to
+    #: collect training samples.
+    eval_env_runner_group: Optional[EnvRunnerGroup] = None
+    #: The `LearnerGroup` instance of the Algorithm, managing either
+    #: one local `Learner` or one or more remote `Learner` actors. Responsible for
+    #: updating the models from RL environment (episode) data.
+    learner_group: Optional["LearnerGroup"] = None
+    #: An optional OfflineData instance, used for offline RL.
+    offline_data: Optional["OfflineData"] = None
+
     # Whether to allow unknown top-level config keys.
     _allow_unknown_configs = False
 
@@ -442,7 +452,6 @@ def from_state(state: Dict) -> "Algorithm":
         # Return the new algo.
         return new_algo
 
-    @PublicAPI
     def __init__(
         self,
         config: Optional[AlgorithmConfig] = None,
@@ -3647,10 +3656,12 @@ def __repr__(self):
 
     @property
     def env_runner(self):
+        """The local EnvRunner instance within the algo's EnvRunnerGroup."""
         return self.env_runner_group.local_env_runner
 
     @property
     def eval_env_runner(self):
+        """The local EnvRunner instance within the algo's evaluation EnvRunnerGroup."""
         return self.eval_env_runner_group.local_env_runner
 
     def _record_usage(self, config):

diff --git a/rllib/examples/algorithms/classes/vpg.py b/rllib/examples/algorithms/classes/vpg.py
@@ -155,8 +155,8 @@ def training_step(self) -> None:
 
     def _sample_episodes(self):
         # How many episodes to sample from each EnvRunner?
-        num_episodes_per_env_runner = (
-            self.config.num_episodes_per_train_batch // self.config.num_env_runners
+        num_episodes_per_env_runner = self.config.num_episodes_per_train_batch // (
+            self.config.num_env_runners or 1
         )
         # Send parallel remote requests to sample and get the metrics.
         sampled_data = self.env_runner_group.foreach_env_runner(