[RLlib] RLlib-in-60sec and other doc upgrades. (#29544)

ray-project · Oct 21, 2022 · bf4894d · bf4894d
1 parent fe2e50f
commit bf4894d
Show file tree

Hide file tree

Showing 3 changed files with 48 additions and 33 deletions.
diff --git a/doc/source/rllib/rllib-saving-and-loading-algos-and-policies.rst b/doc/source/rllib/rllib-saving-and-loading-algos-and-policies.rst
@@ -343,6 +343,31 @@ Here is the example code that illustrates these:
     :start-after: __export-models-begin__
     :end-before: __export-models-end__
 
+We can now export the Keras NN model (that our PPOTF1Policy inside the PPO Algorithm uses)
+to disk ...
+
+1) Using the Policy object:
+
+.. literalinclude:: ../../../rllib/examples/documentation/saving_and_loading_algos_and_policies.py
+    :language: python
+    :start-after: __export-models-1-begin__
+    :end-before: __export-models-1-end__
+
+2) Via the Policy's checkpointing method:
+
+.. literalinclude:: ../../../rllib/examples/documentation/saving_and_loading_algos_and_policies.py
+    :language: python
+    :start-after: __export-models-2-begin__
+    :end-before: __export-models-2-end__
+
+3) Via the Algorithm (Policy) checkpoint:
+
+.. literalinclude:: ../../../rllib/examples/documentation/saving_and_loading_algos_and_policies.py
+    :language: python
+    :start-after: __export-models-3-begin__
+    :end-before: __export-models-3-end__
+
+
 And what about exporting my NN Models in ONNX format?
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 

diff --git a/rllib/examples/documentation/rllib_in_60s.py b/rllib/examples/documentation/rllib_in_60s.py
@@ -1,34 +1,35 @@
 # __rllib-in-60s-begin__
 # Import the RL algorithm (Algorithm) we would like to use.
-from ray.rllib.algorithms.ppo import PPO
+from ray.rllib.algorithms.ppo import PPOConfig
 
 # Configure the algorithm.
-config = {
+config = (
+    PPOConfig()
     # Environment (RLlib understands openAI gym registered strings).
-    "env": "Taxi-v3",
+    .environment("Taxi-v3")
     # Use 2 environment workers (aka "rollout workers") that parallelly
     # collect samples from their own environment clone(s).
-    "num_workers": 2,
-    # Change this to "framework: torch", if you are using PyTorch.
-    # Also, use "framework: tf2" for tf2.x eager execution.
-    "framework": "tf",
+    .rollouts(num_rollout_workers=2)
+    # Change this to `framework("torch")`, if you are using PyTorch.
+    # Use `framework("tf2", eager_tracing=True)` for tf2.x traced execution.
+    .framework("tf")
     # Tweak the default model provided automatically by RLlib,
     # given the environment's observation- and action spaces.
-    "model": {
-        "fcnet_hiddens": [64, 64],
-        "fcnet_activation": "relu",
-    },
+    .training(
+        model={
+            "fcnet_hiddens": [64, 64],
+            "fcnet_activation": "relu",
+        }
+    )
     # Set up a separate evaluation worker set for the
     # `algo.evaluate()` call after training (see below).
-    "evaluation_num_workers": 1,
-    # Only for evaluation runs, render the env.
-    "evaluation_config": {
-        "render_env": True,
-    },
-}
+    .evaluation(
+        evaluation_num_workers=1,
+    )
+)
 
-# Create our RLlib Trainer.
-algo = PPO(config=config)
+# Create our RLlib Trainer from the config object.
+algo = config.build()
 
 # Run it for n training iterations. A training iteration includes
 # parallel sample collection by the environment workers as well as

diff --git a/rllib/examples/documentation/saving_and_loading_algos_and_policies.py b/rllib/examples/documentation/saving_and_loading_algos_and_policies.py
@@ -1,7 +1,6 @@
 # flake8: noqa
 
 # __create-algo-checkpoint-begin__
-
 # Create a PPO algorithm object using a config object ..
 from ray.rllib.algorithms.ppo import PPOConfig
 
@@ -40,7 +39,6 @@
 my_new_ppo.stop()
 
 # __restore-from-algo-checkpoint-2-begin__
-
 # Re-build a fresh algorithm.
 my_new_ppo = my_ppo_config.build()
 
@@ -55,7 +53,6 @@
 my_new_ppo.stop()
 
 # __multi-agent-checkpoints-begin__
-
 import os
 
 # Use our example multi-agent CartPole environment to train in.
@@ -107,7 +104,6 @@
 my_ma_algo_clone.stop()
 
 # __multi-agent-checkpoints-restore-policy-sub-set-begin__
-
 # Here, we use the same (multi-agent Algorithm) checkpoint as above, but only restore
 # it with the first Policy ("pol1").
 
@@ -136,7 +132,6 @@
 my_ma_algo_only_pol1.stop()
 
 # __create-policy-checkpoint-begin__
-
 # Retrieve the Policy object from an Algorithm.
 # Note that for normal, single-agent Algorithms, the Policy ID is "default_policy".
 policy1 = my_ma_algo.get_policy(policy_id="pol1")
@@ -148,7 +143,6 @@
 # __create-policy-checkpoint-end__
 
 # __restore-policy-begin__
-
 import numpy as np
 
 from ray.rllib.policy.policy import Policy
@@ -166,7 +160,6 @@
 
 
 # __restore-algorithm-from-checkpoint-with-fewer-policies-begin__
-
 from ray.rllib.algorithms.ppo import PPOConfig
 from ray.rllib.examples.env.multi_agent import MultiAgentCartPole
 
@@ -225,7 +218,6 @@ def new_policy_mapping_fn(agent_id, episode, worker, **kwargs):
 
 
 # __export-models-begin__
-
 from ray.rllib.algorithms.ppo import PPOConfig
 
 # Create a new Algorithm (which contains a Policy, which contains a NN Model).
@@ -253,7 +245,6 @@ def new_policy_mapping_fn(agent_id, episode, worker, **kwargs):
 # 1) .. using the Policy object:
 
 # __export-models-1-begin__
-
 ppo_policy.export_model("/tmp/my_nn_model")
 # .. check /tmp/my_nn_model/ for the keras model files. You should be able to recover
 # the keras model via:
@@ -275,9 +266,9 @@ def new_policy_mapping_fn(agent_id, episode, worker, **kwargs):
 
 # __export-models-1-end__
 
-# __export-models-2-begin__
-
 # 2) .. via the Policy's checkpointing method:
+
+# __export-models-2-begin__
 checkpoint_dir = ppo_policy.export_checkpoint("tmp/ppo_policy")
 # .. check /tmp/ppo_policy/model/ for the keras model files.
 # You should be able to recover the keras model via:
@@ -289,10 +280,9 @@ def new_policy_mapping_fn(agent_id, episode, worker, **kwargs):
 
 # __export-models-2-end__
 
+# 3) .. via the Algorithm (Policy) checkpoint:
 
 # __export-models-3-begin__
-
-# 3) .. via the Algorithm (Policy) checkpoint:
 checkpoint_dir = ppo.save()
 # .. check `checkpoint_dir` for the Algorithm checkpoint files.
 # You should be able to recover the keras model via:
@@ -306,7 +296,6 @@ def new_policy_mapping_fn(agent_id, episode, worker, **kwargs):
 
 
 # __export-models-as-onnx-begin__
-
 # Using the same Policy object, we can also export our NN Model in the ONNX format:
 ppo_policy.export_model("/tmp/my_nn_model", onnx=True)