Skip to content

Commit

Permalink
[RLlib] RLlib-in-60sec and other doc upgrades. (#29544)
Browse files Browse the repository at this point in the history
  • Loading branch information
sven1977 authored Oct 21, 2022
1 parent fe2e50f commit bf4894d
Show file tree
Hide file tree
Showing 3 changed files with 48 additions and 33 deletions.
25 changes: 25 additions & 0 deletions doc/source/rllib/rllib-saving-and-loading-algos-and-policies.rst
Original file line number Diff line number Diff line change
Expand Up @@ -343,6 +343,31 @@ Here is the example code that illustrates these:
:start-after: __export-models-begin__
:end-before: __export-models-end__

We can now export the Keras NN model (that our PPOTF1Policy inside the PPO Algorithm uses)
to disk ...

1) Using the Policy object:

.. literalinclude:: ../../../rllib/examples/documentation/saving_and_loading_algos_and_policies.py
:language: python
:start-after: __export-models-1-begin__
:end-before: __export-models-1-end__

2) Via the Policy's checkpointing method:

.. literalinclude:: ../../../rllib/examples/documentation/saving_and_loading_algos_and_policies.py
:language: python
:start-after: __export-models-2-begin__
:end-before: __export-models-2-end__

3) Via the Algorithm (Policy) checkpoint:

.. literalinclude:: ../../../rllib/examples/documentation/saving_and_loading_algos_and_policies.py
:language: python
:start-after: __export-models-3-begin__
:end-before: __export-models-3-end__


And what about exporting my NN Models in ONNX format?
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

Expand Down
39 changes: 20 additions & 19 deletions rllib/examples/documentation/rllib_in_60s.py
Original file line number Diff line number Diff line change
@@ -1,34 +1,35 @@
# __rllib-in-60s-begin__
# Import the RL algorithm (Algorithm) we would like to use.
from ray.rllib.algorithms.ppo import PPO
from ray.rllib.algorithms.ppo import PPOConfig

# Configure the algorithm.
config = {
config = (
PPOConfig()
# Environment (RLlib understands openAI gym registered strings).
"env": "Taxi-v3",
.environment("Taxi-v3")
# Use 2 environment workers (aka "rollout workers") that parallelly
# collect samples from their own environment clone(s).
"num_workers": 2,
# Change this to "framework: torch", if you are using PyTorch.
# Also, use "framework: tf2" for tf2.x eager execution.
"framework": "tf",
.rollouts(num_rollout_workers=2)
# Change this to `framework("torch")`, if you are using PyTorch.
# Use `framework("tf2", eager_tracing=True)` for tf2.x traced execution.
.framework("tf")
# Tweak the default model provided automatically by RLlib,
# given the environment's observation- and action spaces.
"model": {
"fcnet_hiddens": [64, 64],
"fcnet_activation": "relu",
},
.training(
model={
"fcnet_hiddens": [64, 64],
"fcnet_activation": "relu",
}
)
# Set up a separate evaluation worker set for the
# `algo.evaluate()` call after training (see below).
"evaluation_num_workers": 1,
# Only for evaluation runs, render the env.
"evaluation_config": {
"render_env": True,
},
}
.evaluation(
evaluation_num_workers=1,
)
)

# Create our RLlib Trainer.
algo = PPO(config=config)
# Create our RLlib Trainer from the config object.
algo = config.build()

# Run it for n training iterations. A training iteration includes
# parallel sample collection by the environment workers as well as
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
# flake8: noqa

# __create-algo-checkpoint-begin__

# Create a PPO algorithm object using a config object ..
from ray.rllib.algorithms.ppo import PPOConfig

Expand Down Expand Up @@ -40,7 +39,6 @@
my_new_ppo.stop()

# __restore-from-algo-checkpoint-2-begin__

# Re-build a fresh algorithm.
my_new_ppo = my_ppo_config.build()

Expand All @@ -55,7 +53,6 @@
my_new_ppo.stop()

# __multi-agent-checkpoints-begin__

import os

# Use our example multi-agent CartPole environment to train in.
Expand Down Expand Up @@ -107,7 +104,6 @@
my_ma_algo_clone.stop()

# __multi-agent-checkpoints-restore-policy-sub-set-begin__

# Here, we use the same (multi-agent Algorithm) checkpoint as above, but only restore
# it with the first Policy ("pol1").

Expand Down Expand Up @@ -136,7 +132,6 @@
my_ma_algo_only_pol1.stop()

# __create-policy-checkpoint-begin__

# Retrieve the Policy object from an Algorithm.
# Note that for normal, single-agent Algorithms, the Policy ID is "default_policy".
policy1 = my_ma_algo.get_policy(policy_id="pol1")
Expand All @@ -148,7 +143,6 @@
# __create-policy-checkpoint-end__

# __restore-policy-begin__

import numpy as np

from ray.rllib.policy.policy import Policy
Expand All @@ -166,7 +160,6 @@


# __restore-algorithm-from-checkpoint-with-fewer-policies-begin__

from ray.rllib.algorithms.ppo import PPOConfig
from ray.rllib.examples.env.multi_agent import MultiAgentCartPole

Expand Down Expand Up @@ -225,7 +218,6 @@ def new_policy_mapping_fn(agent_id, episode, worker, **kwargs):


# __export-models-begin__

from ray.rllib.algorithms.ppo import PPOConfig

# Create a new Algorithm (which contains a Policy, which contains a NN Model).
Expand Down Expand Up @@ -253,7 +245,6 @@ def new_policy_mapping_fn(agent_id, episode, worker, **kwargs):
# 1) .. using the Policy object:

# __export-models-1-begin__

ppo_policy.export_model("/tmp/my_nn_model")
# .. check /tmp/my_nn_model/ for the keras model files. You should be able to recover
# the keras model via:
Expand All @@ -275,9 +266,9 @@ def new_policy_mapping_fn(agent_id, episode, worker, **kwargs):

# __export-models-1-end__

# __export-models-2-begin__

# 2) .. via the Policy's checkpointing method:

# __export-models-2-begin__
checkpoint_dir = ppo_policy.export_checkpoint("tmp/ppo_policy")
# .. check /tmp/ppo_policy/model/ for the keras model files.
# You should be able to recover the keras model via:
Expand All @@ -289,10 +280,9 @@ def new_policy_mapping_fn(agent_id, episode, worker, **kwargs):

# __export-models-2-end__

# 3) .. via the Algorithm (Policy) checkpoint:

# __export-models-3-begin__

# 3) .. via the Algorithm (Policy) checkpoint:
checkpoint_dir = ppo.save()
# .. check `checkpoint_dir` for the Algorithm checkpoint files.
# You should be able to recover the keras model via:
Expand All @@ -306,7 +296,6 @@ def new_policy_mapping_fn(agent_id, episode, worker, **kwargs):


# __export-models-as-onnx-begin__

# Using the same Policy object, we can also export our NN Model in the ONNX format:
ppo_policy.export_model("/tmp/my_nn_model", onnx=True)

Expand Down

0 comments on commit bf4894d

Please sign in to comment.