From b8d1233a348647ecab76f67c365cea8ce1f3f24c Mon Sep 17 00:00:00 2001 From: zjowowen Date: Tue, 18 Jun 2024 14:11:52 +0800 Subject: [PATCH] Polish configurations and documents. --- docs/source/tutorials/installation/index.rst | 6 - docs/source/tutorials/quick_start/index.rst | 18 +- .../configurations/adroit_penhuman_v2_qgpo.py | 159 ----------------- .../antmaze_large_diverse_v0_qgpo.py | 159 ----------------- .../antmaze_large_play_v0_qgpo.py | 159 ----------------- .../antmaze_medium_diverse_v0_qgpo.py | 159 ----------------- .../antmaze_medium_play_v0_qgpo.py | 159 ----------------- .../antmaze_umaze_diverse_v0_qgpo.py | 159 ----------------- .../configurations/antmaze_umaze_v0_qgpo.py | 159 ----------------- .../configurations/antmaze_umaze_v2_qgpo.py | 159 ----------------- .../configurations/d4rl_halfcheetah_cps.py | 132 -------------- .../kitchen_complete_v0_qgpo.py | 159 ----------------- .../configurations/kitchen_mixed_v0_qgpo.py | 159 ----------------- .../configurations/kitchen_partial_v0_qgpo.py | 159 ----------------- .../lunarlander_continuous_cps.py | 127 -------------- .../lunarlander_continuous_qgpo_oneline.py | 161 ------------------ .../configurations/pen_cloned_v1_qgpo.py | 159 ----------------- .../configurations/pen_human_v1_qgpo.py | 159 ----------------- 18 files changed, 9 insertions(+), 2502 deletions(-) delete mode 100644 grl_pipelines/diffusion_model/configurations/adroit_penhuman_v2_qgpo.py delete mode 100644 grl_pipelines/diffusion_model/configurations/antmaze_large_diverse_v0_qgpo.py delete mode 100644 grl_pipelines/diffusion_model/configurations/antmaze_large_play_v0_qgpo.py delete mode 100644 grl_pipelines/diffusion_model/configurations/antmaze_medium_diverse_v0_qgpo.py delete mode 100644 grl_pipelines/diffusion_model/configurations/antmaze_medium_play_v0_qgpo.py delete mode 100644 grl_pipelines/diffusion_model/configurations/antmaze_umaze_diverse_v0_qgpo.py delete mode 100644 grl_pipelines/diffusion_model/configurations/antmaze_umaze_v0_qgpo.py delete mode 100644 grl_pipelines/diffusion_model/configurations/antmaze_umaze_v2_qgpo.py delete mode 100644 grl_pipelines/diffusion_model/configurations/d4rl_halfcheetah_cps.py delete mode 100644 grl_pipelines/diffusion_model/configurations/kitchen_complete_v0_qgpo.py delete mode 100644 grl_pipelines/diffusion_model/configurations/kitchen_mixed_v0_qgpo.py delete mode 100644 grl_pipelines/diffusion_model/configurations/kitchen_partial_v0_qgpo.py delete mode 100644 grl_pipelines/diffusion_model/configurations/lunarlander_continuous_cps.py delete mode 100644 grl_pipelines/diffusion_model/configurations/lunarlander_continuous_qgpo_oneline.py delete mode 100644 grl_pipelines/diffusion_model/configurations/pen_cloned_v1_qgpo.py delete mode 100644 grl_pipelines/diffusion_model/configurations/pen_human_v1_qgpo.py diff --git a/docs/source/tutorials/installation/index.rst b/docs/source/tutorials/installation/index.rst index b3be7df..000f97a 100644 --- a/docs/source/tutorials/installation/index.rst +++ b/docs/source/tutorials/installation/index.rst @@ -9,12 +9,6 @@ GenerativeRL can be installed using pip: You can also install the latest development version from GitHub: -.. code-block:: console - - $ pip install git+https://github.com/OpenDILab/GenerativeRL.git - -If you want to try a preview of the latest features, you can install the latest development version from GitHub: - .. code-block:: console $ pip install git+https://github.com/opendilab/GenerativeRL.git diff --git a/docs/source/tutorials/quick_start/index.rst b/docs/source/tutorials/quick_start/index.rst index fb23e51..1ce4568 100644 --- a/docs/source/tutorials/quick_start/index.rst +++ b/docs/source/tutorials/quick_start/index.rst @@ -5,7 +5,7 @@ Generative model in GenerativeRL --------- GenerativeRL support easy-to-use APIs for training and deploying generative model. -We provide a simple example of how to train a diffusion model on the swiss roll dataset in [Colab](https://colab.research.google.com/drive/18yHUAmcMh_7xq2U6TBCtcLKX2y4YvNyk?usp=drive_link). +We provide a simple example of how to train a diffusion model on the swiss roll dataset in `Colab `_. More usage examples can be found in the folder `grl_pipelines/tutorials/`. @@ -41,16 +41,16 @@ Explanation 1. First, we import the necessary components from the GenerativeRL library, including the configuration for the HalfCheetah environment and the QGPO algorithm, as well as the logging utility and the OpenAI Gym environment. -2. The `qgpo_pipeline` function encapsulates the training and deployment process: +2. The ``qgpo_pipeline`` function encapsulates the training and deployment process: - - An instance of the `QGPOAlgorithm` is created with the provided configuration. - - The `qgpo.train()` method is called to train the QGPO agent on the HalfCheetah environment. - - After training, the `qgpo.deploy()` method is called to obtain the trained agent for deployment. - - A new instance of the HalfCheetah environment is created using `gym.make`. - - The environment is reset to its initial state with `env.reset()`. - - A loop is executed for the specified number of steps (`config.deploy.num_deploy_steps`), rendering the environment and stepping through it using the agent's `act` method. + - An instance of the ``QGPOAlgorithm`` is created with the provided configuration. + - The ``qgpo.train()`` method is called to train the QGPO agent on the HalfCheetah environment. + - After training, the ``qgpo.deploy()`` method is called to obtain the trained agent for deployment. + - A new instance of the HalfCheetah environment is created using ``gym.make``. + - The environment is reset to its initial state with ``env.reset()``. + - A loop is executed for the specified number of steps (``config.deploy.num_deploy_steps``), rendering the environment and stepping through it using the agent's ``act`` method. -3. In the `if __name__ == '__main__'` block, the configuration is printed to the console using the logging utility, and the `qgpo_pipeline` function is called with the provided configuration. +3. In the ``if __name__ == '__main__'`` block, the configuration is printed to the console using the logging utility, and the ``qgpo_pipeline`` function is called with the provided configuration. This example demonstrates how to utilize the GenerativeRL library to train a QGPO agent on the HalfCheetah environment and then deploy the trained agent for evaluation within the environment. You can modify the configuration and algorithm as needed to suit your specific use case. diff --git a/grl_pipelines/diffusion_model/configurations/adroit_penhuman_v2_qgpo.py b/grl_pipelines/diffusion_model/configurations/adroit_penhuman_v2_qgpo.py deleted file mode 100644 index 713884b..0000000 --- a/grl_pipelines/diffusion_model/configurations/adroit_penhuman_v2_qgpo.py +++ /dev/null @@ -1,159 +0,0 @@ -import torch -from easydict import EasyDict - -action_size = 24 -state_size = 45 -device = torch.device("cuda:0") if torch.cuda.is_available() else torch.device("cpu") -t_embedding_dim = 32 -t_encoder = dict( - type="GaussianFourierProjectionTimeEncoder", - args=dict( - embed_dim=t_embedding_dim, - scale=30.0, - ), -) -solver_type = "DPMSolver" -config = EasyDict( - train=dict( - project="minari-pen-human-v2-qgpo", - simulator=dict( - type="GymEnvSimulator", - args=dict( - env_id="pen-human-v1", - ), - ), - dataset=dict( - type="MinariDataset", - args=dict( - env_id="pen-human-v2", - device=device, - ), - ), - model=dict( - QGPOPolicy=dict( - device=device, - critic=dict( - device=device, - q_alpha=1.0, - DoubleQNetwork=dict( - backbone=dict( - type="ConcatenateMLP", - args=dict( - hidden_sizes=[action_size + state_size, 256, 256], - output_size=1, - activation="relu", - ), - ), - ), - ), - diffusion_model=dict( - device=device, - x_size=action_size, - alpha=1.0, - solver=( - dict( - type="DPMSolver", - args=dict( - order=2, - device=device, - steps=17, - ), - ) - if solver_type == "DPMSolver" - else ( - dict( - type="ODESolver", - args=dict( - library="torchdyn", - ), - ) - if solver_type == "ODESolver" - else dict( - type="SDESolver", - args=dict( - library="torchsde", - ), - ) - ) - ), - path=dict( - type="linear_vp_sde", - beta_0=0.1, - beta_1=20.0, - ), - reverse_path=dict( - type="linear_vp_sde", - beta_0=0.1, - beta_1=20.0, - ), - model=dict( - type="noise_function", - args=dict( - t_encoder=t_encoder, - backbone=dict( - type="TemporalSpatialResidualNet", - args=dict( - hidden_sizes=[512, 256, 128], - output_dim=action_size, - t_dim=t_embedding_dim, - condition_dim=state_size, - condition_hidden_dim=32, - t_condition_hidden_dim=128, - ), - ), - ), - ), - energy_guidance=dict( - t_encoder=t_encoder, - backbone=dict( - type="ConcatenateMLP", - args=dict( - hidden_sizes=[ - action_size + state_size + t_embedding_dim, - 256, - 256, - ], - output_size=1, - activation="silu", - ), - ), - ), - ), - ) - ), - parameter=dict( - behaviour_policy=dict( - batch_size=4096, - learning_rate=1e-4, - iterations=600000, - ), - sample_per_state=16, - fake_data_t_span=None if solver_type == "DPMSolver" else 32, - energy_guided_policy=dict( - batch_size=256, - ), - critic=dict( - stop_training_iterations=500000, - learning_rate=3e-4, - discount_factor=0.99, - update_momentum=0.995, - ), - energy_guidance=dict( - iterations=600000, - learning_rate=3e-4, - ), - evaluation=dict( - evaluation_interval=10000, - guidance_scale=[0.0, 1.0, 2.0, 3.0, 5.0, 8.0, 10.0], - ), - ), - ), - deploy=dict( - device=device, - env=dict( - env_id="pen-human-v1", - seed=0, - ), - num_deploy_steps=1000, - ), -) diff --git a/grl_pipelines/diffusion_model/configurations/antmaze_large_diverse_v0_qgpo.py b/grl_pipelines/diffusion_model/configurations/antmaze_large_diverse_v0_qgpo.py deleted file mode 100644 index 7ce1f95..0000000 --- a/grl_pipelines/diffusion_model/configurations/antmaze_large_diverse_v0_qgpo.py +++ /dev/null @@ -1,159 +0,0 @@ -import torch -from easydict import EasyDict - -action_size = 8 -state_size = 29 -device = torch.device("cuda:0") if torch.cuda.is_available() else torch.device("cpu") -t_embedding_dim = 32 -t_encoder = dict( - type="GaussianFourierProjectionTimeEncoder", - args=dict( - embed_dim=t_embedding_dim, - scale=30.0, - ), -) -solver_type = "DPMSolver" -config = EasyDict( - train=dict( - project="antmaze-large-diverse-v0-qgpo", - simulator=dict( - type="GymEnvSimulator", - args=dict( - env_id="antmaze-large-diverse-v0", - ), - ), - dataset=dict( - type="QGPOD4RLDataset", - args=dict( - env_id="antmaze-large-diverse-v0", - device=device, - ), - ), - model=dict( - QGPOPolicy=dict( - device=device, - critic=dict( - device=device, - q_alpha=1.0, - DoubleQNetwork=dict( - backbone=dict( - type="ConcatenateMLP", - args=dict( - hidden_sizes=[action_size + state_size, 256, 256], - output_size=1, - activation="relu", - ), - ), - ), - ), - diffusion_model=dict( - device=device, - x_size=action_size, - alpha=1.0, - solver=( - dict( - type="DPMSolver", - args=dict( - order=2, - device=device, - steps=17, - ), - ) - if solver_type == "DPMSolver" - else ( - dict( - type="ODESolver", - args=dict( - library="torchdyn", - ), - ) - if solver_type == "ODESolver" - else dict( - type="SDESolver", - args=dict( - library="torchsde", - ), - ) - ) - ), - path=dict( - type="linear_vp_sde", - beta_0=0.1, - beta_1=20.0, - ), - reverse_path=dict( - type="linear_vp_sde", - beta_0=0.1, - beta_1=20.0, - ), - model=dict( - type="noise_function", - args=dict( - t_encoder=t_encoder, - backbone=dict( - type="TemporalSpatialResidualNet", - args=dict( - hidden_sizes=[512, 256, 128], - output_dim=action_size, - t_dim=t_embedding_dim, - condition_dim=state_size, - condition_hidden_dim=32, - t_condition_hidden_dim=128, - ), - ), - ), - ), - energy_guidance=dict( - t_encoder=t_encoder, - backbone=dict( - type="ConcatenateMLP", - args=dict( - hidden_sizes=[ - action_size + state_size + t_embedding_dim, - 256, - 256, - ], - output_size=1, - activation="silu", - ), - ), - ), - ), - ) - ), - parameter=dict( - behaviour_policy=dict( - batch_size=4096, - learning_rate=1e-4, - iterations=600000, - ), - sample_per_state=16, - fake_data_t_span=None if solver_type == "DPMSolver" else 32, - energy_guided_policy=dict( - batch_size=256, - ), - critic=dict( - stop_training_iterations=500000, - learning_rate=3e-4, - discount_factor=0.99, - update_momentum=0.995, - ), - energy_guidance=dict( - iterations=600000, - learning_rate=3e-4, - ), - evaluation=dict( - evaluation_interval=10000, - guidance_scale=[0.0, 1.0, 2.0, 3.0, 5.0, 8.0, 10.0], - ), - ), - ), - deploy=dict( - device=device, - env=dict( - env_id="antmaze-large-diverse-v0", - seed=0, - ), - num_deploy_steps=1000, - ), -) diff --git a/grl_pipelines/diffusion_model/configurations/antmaze_large_play_v0_qgpo.py b/grl_pipelines/diffusion_model/configurations/antmaze_large_play_v0_qgpo.py deleted file mode 100644 index a63d8ad..0000000 --- a/grl_pipelines/diffusion_model/configurations/antmaze_large_play_v0_qgpo.py +++ /dev/null @@ -1,159 +0,0 @@ -import torch -from easydict import EasyDict - -action_size = 8 -state_size = 29 -device = torch.device("cuda:0") if torch.cuda.is_available() else torch.device("cpu") -t_embedding_dim = 32 -t_encoder = dict( - type="GaussianFourierProjectionTimeEncoder", - args=dict( - embed_dim=t_embedding_dim, - scale=30.0, - ), -) -solver_type = "DPMSolver" -config = EasyDict( - train=dict( - project="antmaze-large-play-v0-qgpo", - simulator=dict( - type="GymEnvSimulator", - args=dict( - env_id="antmaze-large-play-v0", - ), - ), - dataset=dict( - type="QGPOD4RLDataset", - args=dict( - env_id="antmaze-large-play-v0", - device=device, - ), - ), - model=dict( - QGPOPolicy=dict( - device=device, - critic=dict( - device=device, - q_alpha=1.0, - DoubleQNetwork=dict( - backbone=dict( - type="ConcatenateMLP", - args=dict( - hidden_sizes=[action_size + state_size, 256, 256], - output_size=1, - activation="relu", - ), - ), - ), - ), - diffusion_model=dict( - device=device, - x_size=action_size, - alpha=1.0, - solver=( - dict( - type="DPMSolver", - args=dict( - order=2, - device=device, - steps=17, - ), - ) - if solver_type == "DPMSolver" - else ( - dict( - type="ODESolver", - args=dict( - library="torchdyn", - ), - ) - if solver_type == "ODESolver" - else dict( - type="SDESolver", - args=dict( - library="torchsde", - ), - ) - ) - ), - path=dict( - type="linear_vp_sde", - beta_0=0.1, - beta_1=20.0, - ), - reverse_path=dict( - type="linear_vp_sde", - beta_0=0.1, - beta_1=20.0, - ), - model=dict( - type="noise_function", - args=dict( - t_encoder=t_encoder, - backbone=dict( - type="TemporalSpatialResidualNet", - args=dict( - hidden_sizes=[512, 256, 128], - output_dim=action_size, - t_dim=t_embedding_dim, - condition_dim=state_size, - condition_hidden_dim=32, - t_condition_hidden_dim=128, - ), - ), - ), - ), - energy_guidance=dict( - t_encoder=t_encoder, - backbone=dict( - type="ConcatenateMLP", - args=dict( - hidden_sizes=[ - action_size + state_size + t_embedding_dim, - 256, - 256, - ], - output_size=1, - activation="silu", - ), - ), - ), - ), - ) - ), - parameter=dict( - behaviour_policy=dict( - batch_size=4096, - learning_rate=1e-4, - iterations=600000, - ), - sample_per_state=16, - fake_data_t_span=None if solver_type == "DPMSolver" else 32, - energy_guided_policy=dict( - batch_size=256, - ), - critic=dict( - stop_training_iterations=500000, - learning_rate=3e-4, - discount_factor=0.99, - update_momentum=0.995, - ), - energy_guidance=dict( - iterations=600000, - learning_rate=3e-4, - ), - evaluation=dict( - evaluation_interval=10000, - guidance_scale=[0.0, 1.0, 2.0, 3.0, 5.0, 8.0, 10.0], - ), - ), - ), - deploy=dict( - device=device, - env=dict( - env_id="antmaze-large-play-v0", - seed=0, - ), - num_deploy_steps=1000, - ), -) diff --git a/grl_pipelines/diffusion_model/configurations/antmaze_medium_diverse_v0_qgpo.py b/grl_pipelines/diffusion_model/configurations/antmaze_medium_diverse_v0_qgpo.py deleted file mode 100644 index 790d3a4..0000000 --- a/grl_pipelines/diffusion_model/configurations/antmaze_medium_diverse_v0_qgpo.py +++ /dev/null @@ -1,159 +0,0 @@ -import torch -from easydict import EasyDict - -action_size = 8 -state_size = 29 -device = torch.device("cuda:0") if torch.cuda.is_available() else torch.device("cpu") -t_embedding_dim = 32 -t_encoder = dict( - type="GaussianFourierProjectionTimeEncoder", - args=dict( - embed_dim=t_embedding_dim, - scale=30.0, - ), -) -solver_type = "DPMSolver" -config = EasyDict( - train=dict( - project="antmaze-medium-diverse-v0-qgpo", - simulator=dict( - type="GymEnvSimulator", - args=dict( - env_id="antmaze-medium-diverse-v0", - ), - ), - dataset=dict( - type="QGPOD4RLDataset", - args=dict( - env_id="antmaze-medium-diverse-v0", - device=device, - ), - ), - model=dict( - QGPOPolicy=dict( - device=device, - critic=dict( - device=device, - q_alpha=1.0, - DoubleQNetwork=dict( - backbone=dict( - type="ConcatenateMLP", - args=dict( - hidden_sizes=[action_size + state_size, 256, 256], - output_size=1, - activation="relu", - ), - ), - ), - ), - diffusion_model=dict( - device=device, - x_size=action_size, - alpha=1.0, - solver=( - dict( - type="DPMSolver", - args=dict( - order=2, - device=device, - steps=17, - ), - ) - if solver_type == "DPMSolver" - else ( - dict( - type="ODESolver", - args=dict( - library="torchdyn", - ), - ) - if solver_type == "ODESolver" - else dict( - type="SDESolver", - args=dict( - library="torchsde", - ), - ) - ) - ), - path=dict( - type="linear_vp_sde", - beta_0=0.1, - beta_1=20.0, - ), - reverse_path=dict( - type="linear_vp_sde", - beta_0=0.1, - beta_1=20.0, - ), - model=dict( - type="noise_function", - args=dict( - t_encoder=t_encoder, - backbone=dict( - type="TemporalSpatialResidualNet", - args=dict( - hidden_sizes=[512, 256, 128], - output_dim=action_size, - t_dim=t_embedding_dim, - condition_dim=state_size, - condition_hidden_dim=32, - t_condition_hidden_dim=128, - ), - ), - ), - ), - energy_guidance=dict( - t_encoder=t_encoder, - backbone=dict( - type="ConcatenateMLP", - args=dict( - hidden_sizes=[ - action_size + state_size + t_embedding_dim, - 256, - 256, - ], - output_size=1, - activation="silu", - ), - ), - ), - ), - ) - ), - parameter=dict( - behaviour_policy=dict( - batch_size=4096, - learning_rate=1e-4, - iterations=600000, - ), - sample_per_state=16, - fake_data_t_span=None if solver_type == "DPMSolver" else 32, - energy_guided_policy=dict( - batch_size=256, - ), - critic=dict( - stop_training_iterations=500000, - learning_rate=3e-4, - discount_factor=0.99, - update_momentum=0.995, - ), - energy_guidance=dict( - iterations=600000, - learning_rate=3e-4, - ), - evaluation=dict( - evaluation_interval=10000, - guidance_scale=[0.0, 1.0, 2.0, 3.0, 5.0, 8.0, 10.0], - ), - ), - ), - deploy=dict( - device=device, - env=dict( - env_id="antmaze-medium-diverse-v0", - seed=0, - ), - num_deploy_steps=1000, - ), -) diff --git a/grl_pipelines/diffusion_model/configurations/antmaze_medium_play_v0_qgpo.py b/grl_pipelines/diffusion_model/configurations/antmaze_medium_play_v0_qgpo.py deleted file mode 100644 index 97a210b..0000000 --- a/grl_pipelines/diffusion_model/configurations/antmaze_medium_play_v0_qgpo.py +++ /dev/null @@ -1,159 +0,0 @@ -import torch -from easydict import EasyDict - -action_size = 8 -state_size = 29 -device = torch.device("cuda:0") if torch.cuda.is_available() else torch.device("cpu") -t_embedding_dim = 32 -t_encoder = dict( - type="GaussianFourierProjectionTimeEncoder", - args=dict( - embed_dim=t_embedding_dim, - scale=30.0, - ), -) -solver_type = "DPMSolver" -config = EasyDict( - train=dict( - project="antmaze-medium-play-v0-qgpo", - simulator=dict( - type="GymEnvSimulator", - args=dict( - env_id="antmaze-medium-play-v0", - ), - ), - dataset=dict( - type="QGPOD4RLDataset", - args=dict( - env_id="antmaze-medium-play-v0", - device=device, - ), - ), - model=dict( - QGPOPolicy=dict( - device=device, - critic=dict( - device=device, - q_alpha=1.0, - DoubleQNetwork=dict( - backbone=dict( - type="ConcatenateMLP", - args=dict( - hidden_sizes=[action_size + state_size, 256, 256], - output_size=1, - activation="relu", - ), - ), - ), - ), - diffusion_model=dict( - device=device, - x_size=action_size, - alpha=1.0, - solver=( - dict( - type="DPMSolver", - args=dict( - order=2, - device=device, - steps=17, - ), - ) - if solver_type == "DPMSolver" - else ( - dict( - type="ODESolver", - args=dict( - library="torchdyn", - ), - ) - if solver_type == "ODESolver" - else dict( - type="SDESolver", - args=dict( - library="torchsde", - ), - ) - ) - ), - path=dict( - type="linear_vp_sde", - beta_0=0.1, - beta_1=20.0, - ), - reverse_path=dict( - type="linear_vp_sde", - beta_0=0.1, - beta_1=20.0, - ), - model=dict( - type="noise_function", - args=dict( - t_encoder=t_encoder, - backbone=dict( - type="TemporalSpatialResidualNet", - args=dict( - hidden_sizes=[512, 256, 128], - output_dim=action_size, - t_dim=t_embedding_dim, - condition_dim=state_size, - condition_hidden_dim=32, - t_condition_hidden_dim=128, - ), - ), - ), - ), - energy_guidance=dict( - t_encoder=t_encoder, - backbone=dict( - type="ConcatenateMLP", - args=dict( - hidden_sizes=[ - action_size + state_size + t_embedding_dim, - 256, - 256, - ], - output_size=1, - activation="silu", - ), - ), - ), - ), - ) - ), - parameter=dict( - behaviour_policy=dict( - batch_size=4096, - learning_rate=1e-4, - iterations=600000, - ), - sample_per_state=16, - fake_data_t_span=None if solver_type == "DPMSolver" else 32, - energy_guided_policy=dict( - batch_size=256, - ), - critic=dict( - stop_training_iterations=500000, - learning_rate=3e-4, - discount_factor=0.99, - update_momentum=0.995, - ), - energy_guidance=dict( - iterations=600000, - learning_rate=3e-4, - ), - evaluation=dict( - evaluation_interval=10000, - guidance_scale=[0.0, 1.0, 2.0, 3.0, 5.0, 8.0, 10.0], - ), - ), - ), - deploy=dict( - device=device, - env=dict( - env_id="antmaze-medium-play-v0", - seed=0, - ), - num_deploy_steps=1000, - ), -) diff --git a/grl_pipelines/diffusion_model/configurations/antmaze_umaze_diverse_v0_qgpo.py b/grl_pipelines/diffusion_model/configurations/antmaze_umaze_diverse_v0_qgpo.py deleted file mode 100644 index 64b8cb9..0000000 --- a/grl_pipelines/diffusion_model/configurations/antmaze_umaze_diverse_v0_qgpo.py +++ /dev/null @@ -1,159 +0,0 @@ -import torch -from easydict import EasyDict - -action_size = 8 -state_size = 29 -device = torch.device("cuda:0") if torch.cuda.is_available() else torch.device("cpu") -t_embedding_dim = 32 -t_encoder = dict( - type="GaussianFourierProjectionTimeEncoder", - args=dict( - embed_dim=t_embedding_dim, - scale=30.0, - ), -) -solver_type = "DPMSolver" -config = EasyDict( - train=dict( - project="antmaze-umaze-diverse-v0-qgpo", - simulator=dict( - type="GymEnvSimulator", - args=dict( - env_id="antmaze-umaze-diverse-v0", - ), - ), - dataset=dict( - type="QGPOD4RLDataset", - args=dict( - env_id="antmaze-umaze-diverse-v0", - device=device, - ), - ), - model=dict( - QGPOPolicy=dict( - device=device, - critic=dict( - device=device, - q_alpha=1.0, - DoubleQNetwork=dict( - backbone=dict( - type="ConcatenateMLP", - args=dict( - hidden_sizes=[action_size + state_size, 256, 256], - output_size=1, - activation="relu", - ), - ), - ), - ), - diffusion_model=dict( - device=device, - x_size=action_size, - alpha=1.0, - solver=( - dict( - type="DPMSolver", - args=dict( - order=2, - device=device, - steps=17, - ), - ) - if solver_type == "DPMSolver" - else ( - dict( - type="ODESolver", - args=dict( - library="torchdyn", - ), - ) - if solver_type == "ODESolver" - else dict( - type="SDESolver", - args=dict( - library="torchsde", - ), - ) - ) - ), - path=dict( - type="linear_vp_sde", - beta_0=0.1, - beta_1=20.0, - ), - reverse_path=dict( - type="linear_vp_sde", - beta_0=0.1, - beta_1=20.0, - ), - model=dict( - type="noise_function", - args=dict( - t_encoder=t_encoder, - backbone=dict( - type="TemporalSpatialResidualNet", - args=dict( - hidden_sizes=[512, 256, 128], - output_dim=action_size, - t_dim=t_embedding_dim, - condition_dim=state_size, - condition_hidden_dim=32, - t_condition_hidden_dim=128, - ), - ), - ), - ), - energy_guidance=dict( - t_encoder=t_encoder, - backbone=dict( - type="ConcatenateMLP", - args=dict( - hidden_sizes=[ - action_size + state_size + t_embedding_dim, - 256, - 256, - ], - output_size=1, - activation="silu", - ), - ), - ), - ), - ) - ), - parameter=dict( - behaviour_policy=dict( - batch_size=4096, - learning_rate=1e-4, - iterations=600000, - ), - sample_per_state=16, - fake_data_t_span=None if solver_type == "DPMSolver" else 32, - energy_guided_policy=dict( - batch_size=256, - ), - critic=dict( - stop_training_iterations=500000, - learning_rate=3e-4, - discount_factor=0.99, - update_momentum=0.995, - ), - energy_guidance=dict( - iterations=600000, - learning_rate=3e-4, - ), - evaluation=dict( - evaluation_interval=10000, - guidance_scale=[0.0, 1.0, 2.0, 3.0, 5.0, 8.0, 10.0], - ), - ), - ), - deploy=dict( - device=device, - env=dict( - env_id="antmaze-umaze-diverse-v0", - seed=0, - ), - num_deploy_steps=1000, - ), -) diff --git a/grl_pipelines/diffusion_model/configurations/antmaze_umaze_v0_qgpo.py b/grl_pipelines/diffusion_model/configurations/antmaze_umaze_v0_qgpo.py deleted file mode 100644 index 5c2dae9..0000000 --- a/grl_pipelines/diffusion_model/configurations/antmaze_umaze_v0_qgpo.py +++ /dev/null @@ -1,159 +0,0 @@ -import torch -from easydict import EasyDict - -action_size = 8 -state_size = 29 -device = torch.device("cuda:0") if torch.cuda.is_available() else torch.device("cpu") -t_embedding_dim = 32 -t_encoder = dict( - type="GaussianFourierProjectionTimeEncoder", - args=dict( - embed_dim=t_embedding_dim, - scale=30.0, - ), -) -solver_type = "DPMSolver" -config = EasyDict( - train=dict( - project="antmaze-umaze-v0-qgpo", - simulator=dict( - type="GymEnvSimulator", - args=dict( - env_id="antmaze-umaze-v0", - ), - ), - dataset=dict( - type="QGPOD4RLDataset", - args=dict( - env_id="antmaze-umaze-v0", - device=device, - ), - ), - model=dict( - QGPOPolicy=dict( - device=device, - critic=dict( - device=device, - q_alpha=1.0, - DoubleQNetwork=dict( - backbone=dict( - type="ConcatenateMLP", - args=dict( - hidden_sizes=[action_size + state_size, 256, 256], - output_size=1, - activation="relu", - ), - ), - ), - ), - diffusion_model=dict( - device=device, - x_size=action_size, - alpha=1.0, - solver=( - dict( - type="DPMSolver", - args=dict( - order=2, - device=device, - steps=17, - ), - ) - if solver_type == "DPMSolver" - else ( - dict( - type="ODESolver", - args=dict( - library="torchdyn", - ), - ) - if solver_type == "ODESolver" - else dict( - type="SDESolver", - args=dict( - library="torchsde", - ), - ) - ) - ), - path=dict( - type="linear_vp_sde", - beta_0=0.1, - beta_1=20.0, - ), - reverse_path=dict( - type="linear_vp_sde", - beta_0=0.1, - beta_1=20.0, - ), - model=dict( - type="noise_function", - args=dict( - t_encoder=t_encoder, - backbone=dict( - type="TemporalSpatialResidualNet", - args=dict( - hidden_sizes=[512, 256, 128], - output_dim=action_size, - t_dim=t_embedding_dim, - condition_dim=state_size, - condition_hidden_dim=32, - t_condition_hidden_dim=128, - ), - ), - ), - ), - energy_guidance=dict( - t_encoder=t_encoder, - backbone=dict( - type="ConcatenateMLP", - args=dict( - hidden_sizes=[ - action_size + state_size + t_embedding_dim, - 256, - 256, - ], - output_size=1, - activation="silu", - ), - ), - ), - ), - ) - ), - parameter=dict( - behaviour_policy=dict( - batch_size=4096, - learning_rate=1e-4, - iterations=600000, - ), - sample_per_state=16, - fake_data_t_span=None if solver_type == "DPMSolver" else 32, - energy_guided_policy=dict( - batch_size=256, - ), - critic=dict( - stop_training_iterations=500000, - learning_rate=3e-4, - discount_factor=0.99, - update_momentum=0.995, - ), - energy_guidance=dict( - iterations=600000, - learning_rate=3e-4, - ), - evaluation=dict( - evaluation_interval=10000, - guidance_scale=[0.0, 1.0, 2.0, 3.0, 5.0, 8.0, 10.0], - ), - ), - ), - deploy=dict( - device=device, - env=dict( - env_id="antmaze-umaze-v0", - seed=0, - ), - num_deploy_steps=1000, - ), -) diff --git a/grl_pipelines/diffusion_model/configurations/antmaze_umaze_v2_qgpo.py b/grl_pipelines/diffusion_model/configurations/antmaze_umaze_v2_qgpo.py deleted file mode 100644 index e036025..0000000 --- a/grl_pipelines/diffusion_model/configurations/antmaze_umaze_v2_qgpo.py +++ /dev/null @@ -1,159 +0,0 @@ -import torch -from easydict import EasyDict - -action_size = 8 -state_size = 29 -device = torch.device("cuda:0") if torch.cuda.is_available() else torch.device("cpu") -t_embedding_dim = 32 -t_encoder = dict( - type="GaussianFourierProjectionTimeEncoder", - args=dict( - embed_dim=t_embedding_dim, - scale=30.0, - ), -) -solver_type = "DPMSolver" -config = EasyDict( - train=dict( - project="d4rl-antmaze-v2-qgpo", - simulator=dict( - type="GymEnvSimulator", - args=dict( - env_id="antmaze-umaze-v2", - ), - ), - dataset=dict( - type="QGPOD4RLDataset", - args=dict( - env_id="antmaze-umaze-v2", - device=device, - ), - ), - model=dict( - QGPOPolicy=dict( - device=device, - critic=dict( - device=device, - q_alpha=1.0, - DoubleQNetwork=dict( - backbone=dict( - type="ConcatenateMLP", - args=dict( - hidden_sizes=[action_size + state_size, 256, 256], - output_size=1, - activation="relu", - ), - ), - ), - ), - diffusion_model=dict( - device=device, - x_size=action_size, - alpha=1.0, - solver=( - dict( - type="DPMSolver", - args=dict( - order=2, - device=device, - steps=17, - ), - ) - if solver_type == "DPMSolver" - else ( - dict( - type="ODESolver", - args=dict( - library="torchdyn", - ), - ) - if solver_type == "ODESolver" - else dict( - type="SDESolver", - args=dict( - library="torchsde", - ), - ) - ) - ), - path=dict( - type="linear_vp_sde", - beta_0=0.1, - beta_1=20.0, - ), - reverse_path=dict( - type="linear_vp_sde", - beta_0=0.1, - beta_1=20.0, - ), - model=dict( - type="noise_function", - args=dict( - t_encoder=t_encoder, - backbone=dict( - type="TemporalSpatialResidualNet", - args=dict( - hidden_sizes=[512, 256, 128], - output_dim=action_size, - t_dim=t_embedding_dim, - condition_dim=state_size, - condition_hidden_dim=32, - t_condition_hidden_dim=128, - ), - ), - ), - ), - energy_guidance=dict( - t_encoder=t_encoder, - backbone=dict( - type="ConcatenateMLP", - args=dict( - hidden_sizes=[ - action_size + state_size + t_embedding_dim, - 256, - 256, - ], - output_size=1, - activation="silu", - ), - ), - ), - ), - ) - ), - parameter=dict( - behaviour_policy=dict( - batch_size=4096, - learning_rate=1e-4, - iterations=600000, - ), - sample_per_state=16, - fake_data_t_span=None if solver_type == "DPMSolver" else 32, - energy_guided_policy=dict( - batch_size=256, - ), - critic=dict( - stop_training_iterations=500000, - learning_rate=3e-4, - discount_factor=0.99, - update_momentum=0.995, - ), - energy_guidance=dict( - iterations=600000, - learning_rate=3e-4, - ), - evaluation=dict( - evaluation_interval=10000, - guidance_scale=[0.0, 1.0, 2.0, 3.0, 5.0, 8.0, 10.0], - ), - ), - ), - deploy=dict( - device=device, - env=dict( - env_id="antmaze-umaze-v2", - seed=0, - ), - num_deploy_steps=1000, - ), -) diff --git a/grl_pipelines/diffusion_model/configurations/d4rl_halfcheetah_cps.py b/grl_pipelines/diffusion_model/configurations/d4rl_halfcheetah_cps.py deleted file mode 100644 index 0cf642b..0000000 --- a/grl_pipelines/diffusion_model/configurations/d4rl_halfcheetah_cps.py +++ /dev/null @@ -1,132 +0,0 @@ -import torch -from easydict import EasyDict - -action_size = 6 -state_size = 17 -device = torch.device("cuda:0") if torch.cuda.is_available() else torch.device("cpu") - -t_embedding_dim = 16 # CHANGE -t_encoder = dict( - type="SinusoidalPosEmb", - args=dict(dim=t_embedding_dim), -) - -config = EasyDict( - train=dict( - project="d4rl-halfcheetah-cps", - simulator=dict( - type="GymEnvSimulator", - args=dict( - env_id="HalfCheetah-v2", - ), - ), - dataset=dict( - type="D4RLDataset", - args=dict( - env_id="halfcheetah-medium-expert-v2", - device=device, - ), - ), - model=dict( - CPSPolicy=dict( - device=device, - policy_model=dict( - state_dim=state_size, - action_dim=action_size, - layer=2, - ), - LA=1.0, - LA_min=0, - LA_max=100, - target_kl=0.04, - critic=dict( - device=device, - adim=action_size, - sdim=state_size, - DoubleQNetwork=dict( - backbone=dict( - type="ConcatenateMLP", - args=dict( - hidden_sizes=[action_size + state_size, 256, 256, 256], - output_size=1, - activation="mish", - ), - ), - ), - ), - diffusion_model=dict( - device=device, - x_size=action_size, - alpha=1.0, - beta=0.01, - solver=dict( - type="ODESolver", - args=dict( - library="torchdiffeq_adjoint", - ), - ), - path=dict( - type="gvp", - beta_0=0.1, - beta_1=20.0, - ), - model=dict( - type="velocity_function", - args=dict( - t_encoder=t_encoder, - backbone=dict( - type="CONCATMLP", - args=dict( - state_dim=state_size, - action_dim=action_size, - t_dim=t_embedding_dim, - ), - ), - ), - ), - ), - ) - ), - parameter=dict( - behaviour_policy=dict( - batch_size=256, - iterations=2000000, - learning_rate=3e-4, - lr_learning_rate=3e-5, - update_momentum=0.005, - update_target_every=5, - update_policy_every=2, - update_lr_every=1000, - step_start_target=1000, - grad_norm=7.0, - t_max=2000, - ), - sample_per_state=16, - critic=dict( - batch_size=256, - learning_rate=3e-4, - discount_factor=0.99, - update_momentum=0.005, - grad_norm=7.0, - max_action=1.0, - t_max=2000, - ), - actor=dict( - batch_size=256, - iterations=1000000, - learning_rate=3e-4, - ), - evaluation=dict( - evaluation_interval=1000, - ), - ), - ), - deploy=dict( - device=device, - env=dict( - env_id="HalfCheetah-v2", - seed=0, - ), - num_deploy_steps=1000, - ), -) diff --git a/grl_pipelines/diffusion_model/configurations/kitchen_complete_v0_qgpo.py b/grl_pipelines/diffusion_model/configurations/kitchen_complete_v0_qgpo.py deleted file mode 100644 index 2be2cf8..0000000 --- a/grl_pipelines/diffusion_model/configurations/kitchen_complete_v0_qgpo.py +++ /dev/null @@ -1,159 +0,0 @@ -import torch -from easydict import EasyDict - -action_size = 9 -state_size = 60 -device = torch.device("cuda:0") if torch.cuda.is_available() else torch.device("cpu") -t_embedding_dim = 32 -t_encoder = dict( - type="GaussianFourierProjectionTimeEncoder", - args=dict( - embed_dim=t_embedding_dim, - scale=30.0, - ), -) -solver_type = "DPMSolver" -config = EasyDict( - train=dict( - project="kitchen-complete-v0-qgpo", - simulator=dict( - type="GymEnvSimulator", - args=dict( - env_id="kitchen-complete-v0", - ), - ), - dataset=dict( - type="D4RLDataset", - args=dict( - env_id="kitchen-complete-v0", - device=device, - ), - ), - model=dict( - QGPOPolicy=dict( - device=device, - critic=dict( - device=device, - q_alpha=1.0, - DoubleQNetwork=dict( - backbone=dict( - type="ConcatenateMLP", - args=dict( - hidden_sizes=[action_size + state_size, 256, 256], - output_size=1, - activation="relu", - ), - ), - ), - ), - diffusion_model=dict( - device=device, - x_size=action_size, - alpha=1.0, - solver=( - dict( - type="DPMSolver", - args=dict( - order=2, - device=device, - steps=17, - ), - ) - if solver_type == "DPMSolver" - else ( - dict( - type="ODESolver", - args=dict( - library="torchdyn", - ), - ) - if solver_type == "ODESolver" - else dict( - type="SDESolver", - args=dict( - library="torchsde", - ), - ) - ) - ), - path=dict( - type="linear_vp_sde", - beta_0=0.1, - beta_1=20.0, - ), - reverse_path=dict( - type="linear_vp_sde", - beta_0=0.1, - beta_1=20.0, - ), - model=dict( - type="noise_function", - args=dict( - t_encoder=t_encoder, - backbone=dict( - type="TemporalSpatialResidualNet", - args=dict( - hidden_sizes=[512, 256, 128], - output_dim=action_size, - t_dim=t_embedding_dim, - condition_dim=state_size, - condition_hidden_dim=32, - t_condition_hidden_dim=128, - ), - ), - ), - ), - energy_guidance=dict( - t_encoder=t_encoder, - backbone=dict( - type="ConcatenateMLP", - args=dict( - hidden_sizes=[ - action_size + state_size + t_embedding_dim, - 256, - 256, - ], - output_size=1, - activation="silu", - ), - ), - ), - ), - ) - ), - parameter=dict( - behaviour_policy=dict( - batch_size=4096, - learning_rate=1e-4, - iterations=600000, - ), - sample_per_state=16, - fake_data_t_span=None if solver_type == "DPMSolver" else 32, - energy_guided_policy=dict( - batch_size=256, - ), - critic=dict( - stop_training_iterations=500000, - learning_rate=3e-4, - discount_factor=0.99, - update_momentum=0.995, - ), - energy_guidance=dict( - iterations=600000, - learning_rate=3e-4, - ), - evaluation=dict( - evaluation_interval=10000, - guidance_scale=[0.0, 1.0, 2.0, 3.0, 5.0, 8.0, 10.0], - ), - ), - ), - deploy=dict( - device=device, - env=dict( - env_id="kitchen-complete-v0", - seed=0, - ), - num_deploy_steps=1000, - ), -) diff --git a/grl_pipelines/diffusion_model/configurations/kitchen_mixed_v0_qgpo.py b/grl_pipelines/diffusion_model/configurations/kitchen_mixed_v0_qgpo.py deleted file mode 100644 index 54f490e..0000000 --- a/grl_pipelines/diffusion_model/configurations/kitchen_mixed_v0_qgpo.py +++ /dev/null @@ -1,159 +0,0 @@ -import torch -from easydict import EasyDict - -action_size = 9 -state_size = 60 -device = torch.device("cuda:0") if torch.cuda.is_available() else torch.device("cpu") -t_embedding_dim = 32 -t_encoder = dict( - type="GaussianFourierProjectionTimeEncoder", - args=dict( - embed_dim=t_embedding_dim, - scale=30.0, - ), -) -solver_type = "DPMSolver" -config = EasyDict( - train=dict( - project="pen-cloned-v1-qgpo", - simulator=dict( - type="GymEnvSimulator", - args=dict( - env_id="pen-cloned-v1", - ), - ), - dataset=dict( - type="D4RLDataset", - args=dict( - env_id="pen-cloned-v1", - device=device, - ), - ), - model=dict( - QGPOPolicy=dict( - device=device, - critic=dict( - device=device, - q_alpha=1.0, - DoubleQNetwork=dict( - backbone=dict( - type="ConcatenateMLP", - args=dict( - hidden_sizes=[action_size + state_size, 256, 256], - output_size=1, - activation="relu", - ), - ), - ), - ), - diffusion_model=dict( - device=device, - x_size=action_size, - alpha=1.0, - solver=( - dict( - type="DPMSolver", - args=dict( - order=2, - device=device, - steps=17, - ), - ) - if solver_type == "DPMSolver" - else ( - dict( - type="ODESolver", - args=dict( - library="torchdyn", - ), - ) - if solver_type == "ODESolver" - else dict( - type="SDESolver", - args=dict( - library="torchsde", - ), - ) - ) - ), - path=dict( - type="linear_vp_sde", - beta_0=0.1, - beta_1=20.0, - ), - reverse_path=dict( - type="linear_vp_sde", - beta_0=0.1, - beta_1=20.0, - ), - model=dict( - type="noise_function", - args=dict( - t_encoder=t_encoder, - backbone=dict( - type="TemporalSpatialResidualNet", - args=dict( - hidden_sizes=[512, 256, 128], - output_dim=action_size, - t_dim=t_embedding_dim, - condition_dim=state_size, - condition_hidden_dim=32, - t_condition_hidden_dim=128, - ), - ), - ), - ), - energy_guidance=dict( - t_encoder=t_encoder, - backbone=dict( - type="ConcatenateMLP", - args=dict( - hidden_sizes=[ - action_size + state_size + t_embedding_dim, - 256, - 256, - ], - output_size=1, - activation="silu", - ), - ), - ), - ), - ) - ), - parameter=dict( - behaviour_policy=dict( - batch_size=4096, - learning_rate=1e-4, - iterations=600000, - ), - sample_per_state=16, - fake_data_t_span=None if solver_type == "DPMSolver" else 32, - energy_guided_policy=dict( - batch_size=256, - ), - critic=dict( - stop_training_iterations=500000, - learning_rate=3e-4, - discount_factor=0.99, - update_momentum=0.995, - ), - energy_guidance=dict( - iterations=600000, - learning_rate=3e-4, - ), - evaluation=dict( - evaluation_interval=10000, - guidance_scale=[0.0, 1.0, 2.0, 3.0, 5.0, 8.0, 10.0], - ), - ), - ), - deploy=dict( - device=device, - env=dict( - env_id="pen-cloned-v1", - seed=0, - ), - num_deploy_steps=1000, - ), -) diff --git a/grl_pipelines/diffusion_model/configurations/kitchen_partial_v0_qgpo.py b/grl_pipelines/diffusion_model/configurations/kitchen_partial_v0_qgpo.py deleted file mode 100644 index 889c116..0000000 --- a/grl_pipelines/diffusion_model/configurations/kitchen_partial_v0_qgpo.py +++ /dev/null @@ -1,159 +0,0 @@ -import torch -from easydict import EasyDict - -action_size = 9 -state_size = 60 -device = torch.device("cuda:0") if torch.cuda.is_available() else torch.device("cpu") -t_embedding_dim = 32 -t_encoder = dict( - type="GaussianFourierProjectionTimeEncoder", - args=dict( - embed_dim=t_embedding_dim, - scale=30.0, - ), -) -solver_type = "DPMSolver" -config = EasyDict( - train=dict( - project="kitchen-partial-v0-qgpo", - simulator=dict( - type="GymEnvSimulator", - args=dict( - env_id="kitchen-partial-v0", - ), - ), - dataset=dict( - type="D4RLDataset", - args=dict( - env_id="kitchen-partial-v0", - device=device, - ), - ), - model=dict( - QGPOPolicy=dict( - device=device, - critic=dict( - device=device, - q_alpha=1.0, - DoubleQNetwork=dict( - backbone=dict( - type="ConcatenateMLP", - args=dict( - hidden_sizes=[action_size + state_size, 256, 256], - output_size=1, - activation="relu", - ), - ), - ), - ), - diffusion_model=dict( - device=device, - x_size=action_size, - alpha=1.0, - solver=( - dict( - type="DPMSolver", - args=dict( - order=2, - device=device, - steps=17, - ), - ) - if solver_type == "DPMSolver" - else ( - dict( - type="ODESolver", - args=dict( - library="torchdyn", - ), - ) - if solver_type == "ODESolver" - else dict( - type="SDESolver", - args=dict( - library="torchsde", - ), - ) - ) - ), - path=dict( - type="linear_vp_sde", - beta_0=0.1, - beta_1=20.0, - ), - reverse_path=dict( - type="linear_vp_sde", - beta_0=0.1, - beta_1=20.0, - ), - model=dict( - type="noise_function", - args=dict( - t_encoder=t_encoder, - backbone=dict( - type="TemporalSpatialResidualNet", - args=dict( - hidden_sizes=[512, 256, 128], - output_dim=action_size, - t_dim=t_embedding_dim, - condition_dim=state_size, - condition_hidden_dim=32, - t_condition_hidden_dim=128, - ), - ), - ), - ), - energy_guidance=dict( - t_encoder=t_encoder, - backbone=dict( - type="ConcatenateMLP", - args=dict( - hidden_sizes=[ - action_size + state_size + t_embedding_dim, - 256, - 256, - ], - output_size=1, - activation="silu", - ), - ), - ), - ), - ) - ), - parameter=dict( - behaviour_policy=dict( - batch_size=4096, - learning_rate=1e-4, - iterations=600000, - ), - sample_per_state=16, - fake_data_t_span=None if solver_type == "DPMSolver" else 32, - energy_guided_policy=dict( - batch_size=256, - ), - critic=dict( - stop_training_iterations=500000, - learning_rate=3e-4, - discount_factor=0.99, - update_momentum=0.995, - ), - energy_guidance=dict( - iterations=600000, - learning_rate=3e-4, - ), - evaluation=dict( - evaluation_interval=10000, - guidance_scale=[0.0, 1.0, 2.0, 3.0, 5.0, 8.0, 10.0], - ), - ), - ), - deploy=dict( - device=device, - env=dict( - env_id="kitchen-partial-v0", - seed=0, - ), - num_deploy_steps=1000, - ), -) diff --git a/grl_pipelines/diffusion_model/configurations/lunarlander_continuous_cps.py b/grl_pipelines/diffusion_model/configurations/lunarlander_continuous_cps.py deleted file mode 100644 index 63d2663..0000000 --- a/grl_pipelines/diffusion_model/configurations/lunarlander_continuous_cps.py +++ /dev/null @@ -1,127 +0,0 @@ -import torch -from easydict import EasyDict - -action_size = 2 -state_size = 8 -device = torch.device("cuda:0") if torch.cuda.is_available() else torch.device("cpu") - -t_embedding_dim = 64 # CHANGE -t_encoder = dict( - type="GaussianFourierProjectionTimeEncoder", - args=dict( - embed_dim=t_embedding_dim, - scale=30.0, - ), -) - -config = EasyDict( - train=dict( - project="LunarLanderContinuous-cps-srpo", - device=device, - simulator=dict( - type="GymEnvSimulator", - args=dict( - env_id="LunarLanderContinuous-v2", - ), - ), - dataset=dict( - type="QGPOCustomizedDataset", - args=dict( - env_id="LunarLanderContinuous-v2", - device=device, - numpy_data_path="./data.npz", - ), - ), - model=dict( - CPSPolicy=dict( - device=device, - policy_model=dict( - state_dim=state_size, - action_dim=action_size, - layer=2, - ), - critic=dict( - device=device, - adim=action_size, - sdim=state_size, - layers=2, - update_momentum=0.95, - DoubleQNetwork=dict( - backbone=dict( - type="ConcatenateMLP", - args=dict( - hidden_sizes=[action_size + state_size, 256, 256], - output_size=1, - activation="relu", - ), - ), - ), - ), - diffusion_model=dict( - device=device, - x_size=action_size, - alpha=1.0, - beta=0.01, - solver=dict( - type="ODESolver", - args=dict( - library="torchdiffeq_adjoint", - ), - ), - path=dict( - type="linear", - beta_0=0.1, - beta_1=20.0, - ), - model=dict( - type="noise_function", - args=dict( - t_encoder=t_encoder, - backbone=dict( - type="ALLCONCATMLP", - args=dict( - input_dim=state_size + action_size, - output_dim=action_size, - num_blocks=3, - ), - ), - ), - ), - ), - ) - ), - parameter=dict( - training_loss_type="score_matching", - behaviour_policy=dict( - batch_size=2048, - learning_rate=3e-4, - iterations=600000, - ), - sample_per_state=16, - critic=dict( - batch_size=256, - iterations=600000, - learning_rate=3e-4, - discount_factor=0.99, - tau=0.7, - moment=0.995, - ), - actor=dict( - batch_size=256, - iterations=1000000, - learning_rate=3e-4, - ), - evaluation=dict( - evaluation_interval=1000, - ), - ), - ), - deploy=dict( - device=device, - env=dict( - env_id="LunarLanderContinuous-v2", - seed=0, - ), - num_deploy_steps=1000, - ), -) diff --git a/grl_pipelines/diffusion_model/configurations/lunarlander_continuous_qgpo_oneline.py b/grl_pipelines/diffusion_model/configurations/lunarlander_continuous_qgpo_oneline.py deleted file mode 100644 index ffe7205..0000000 --- a/grl_pipelines/diffusion_model/configurations/lunarlander_continuous_qgpo_oneline.py +++ /dev/null @@ -1,161 +0,0 @@ -import torch -from easydict import EasyDict - -action_size = 2 -state_size = 8 -sample_per_state = 16 -device = torch.device("cuda:0") if torch.cuda.is_available() else torch.device("cpu") -t_embedding_dim = 32 -t_encoder = dict( - type="GaussianFourierProjectionTimeEncoder", - args=dict( - embed_dim=t_embedding_dim, - scale=30.0, - ), -) -solver_type = "ODESolver" - -config = EasyDict( - train=dict( - project="LunarLanderContinuous-v2-QGPO-Online", - simulator=dict( - type="GymEnvSimulator", - args=dict( - env_id="LunarLanderContinuous-v2", - ), - ), - dataset=dict( - type="QGPOOnlineDataset", - args=dict( - fake_action_shape=sample_per_state, - device=device, - ), - ), - model=dict( - QGPOPolicy=dict( - device=device, - critic=dict( - device=device, - q_alpha=1.0, - DoubleQNetwork=dict( - backbone=dict( - type="ConcatenateMLP", - args=dict( - hidden_sizes=[action_size + state_size, 256, 256], - output_size=1, - activation="relu", - ), - ), - ), - ), - diffusion_model=dict( - device=device, - x_size=action_size, - alpha=1.0, - solver=( - dict( - type="DPMSolver", - args=dict( - order=2, - device=device, - steps=17, - ), - ) - if solver_type == "DPMSolver" - else ( - dict( - type="ODESolver", - args=dict( - library="torchdyn", - ), - ) - if solver_type == "ODESolver" - else dict( - type="SDESolver", - args=dict( - library="torchsde", - ), - ) - ) - ), - path=dict( - type="linear_vp_sde", - beta_0=0.1, - beta_1=20.0, - ), - reverse_path=dict( - type="linear_vp_sde", - beta_0=0.1, - beta_1=20.0, - ), - model=dict( - type="noise_function", - args=dict( - t_encoder=t_encoder, - backbone=dict( - type="TemporalSpatialResidualNet", - args=dict( - hidden_sizes=[512, 256, 128], - output_dim=action_size, - t_dim=t_embedding_dim, - condition_dim=state_size, - condition_hidden_dim=32, - t_condition_hidden_dim=128, - ), - ), - ), - ), - energy_guidance=dict( - t_encoder=t_encoder, - backbone=dict( - type="ConcatenateMLP", - args=dict( - hidden_sizes=[ - action_size + state_size + t_embedding_dim, - 256, - 256, - ], - output_size=1, - activation="silu", - ), - ), - ), - ), - ) - ), - parameter=dict( - online_rl=dict( - iterations=100000, - collect_steps=1, - collect_steps_at_the_beginning=10000, - drop_ratio=0.00001, - batch_size=2000, - ), - behaviour_policy=dict( - learning_rate=1e-4, - ), - sample_per_state=16, - t_span=None if solver_type == "DPMSolver" else 32, - critic=dict( - learning_rate=1e-4, - discount_factor=0.99, - update_momentum=0.995, - ), - energy_guidance=dict( - learning_rate=1e-4, - ), - evaluation=dict( - evaluation_interval=1000, - guidance_scale=[0.0, 1.0, 4.0, 16.0], - ), - ), - ), - deploy=dict( - device=device, - env=dict( - env_id="LunarLanderContinuous-v2", - seed=0, - ), - num_deploy_steps=1000, - ), -) diff --git a/grl_pipelines/diffusion_model/configurations/pen_cloned_v1_qgpo.py b/grl_pipelines/diffusion_model/configurations/pen_cloned_v1_qgpo.py deleted file mode 100644 index 50cac2a..0000000 --- a/grl_pipelines/diffusion_model/configurations/pen_cloned_v1_qgpo.py +++ /dev/null @@ -1,159 +0,0 @@ -import torch -from easydict import EasyDict - -action_size = 24 -state_size = 45 -device = torch.device("cuda:0") if torch.cuda.is_available() else torch.device("cpu") -t_embedding_dim = 32 -t_encoder = dict( - type="GaussianFourierProjectionTimeEncoder", - args=dict( - embed_dim=t_embedding_dim, - scale=30.0, - ), -) -solver_type = "DPMSolver" -config = EasyDict( - train=dict( - project="pen-cloned-v1-qgpo", - simulator=dict( - type="GymEnvSimulator", - args=dict( - env_id="pen-cloned-v1", - ), - ), - dataset=dict( - type="D4RLDataset", - args=dict( - env_id="pen-cloned-v1", - device=device, - ), - ), - model=dict( - QGPOPolicy=dict( - device=device, - critic=dict( - device=device, - q_alpha=1.0, - DoubleQNetwork=dict( - backbone=dict( - type="ConcatenateMLP", - args=dict( - hidden_sizes=[action_size + state_size, 256, 256], - output_size=1, - activation="relu", - ), - ), - ), - ), - diffusion_model=dict( - device=device, - x_size=action_size, - alpha=1.0, - solver=( - dict( - type="DPMSolver", - args=dict( - order=2, - device=device, - steps=17, - ), - ) - if solver_type == "DPMSolver" - else ( - dict( - type="ODESolver", - args=dict( - library="torchdyn", - ), - ) - if solver_type == "ODESolver" - else dict( - type="SDESolver", - args=dict( - library="torchsde", - ), - ) - ) - ), - path=dict( - type="linear_vp_sde", - beta_0=0.1, - beta_1=20.0, - ), - reverse_path=dict( - type="linear_vp_sde", - beta_0=0.1, - beta_1=20.0, - ), - model=dict( - type="noise_function", - args=dict( - t_encoder=t_encoder, - backbone=dict( - type="TemporalSpatialResidualNet", - args=dict( - hidden_sizes=[512, 256, 128], - output_dim=action_size, - t_dim=t_embedding_dim, - condition_dim=state_size, - condition_hidden_dim=32, - t_condition_hidden_dim=128, - ), - ), - ), - ), - energy_guidance=dict( - t_encoder=t_encoder, - backbone=dict( - type="ConcatenateMLP", - args=dict( - hidden_sizes=[ - action_size + state_size + t_embedding_dim, - 256, - 256, - ], - output_size=1, - activation="silu", - ), - ), - ), - ), - ) - ), - parameter=dict( - behaviour_policy=dict( - batch_size=4096, - learning_rate=1e-4, - iterations=600000, - ), - sample_per_state=16, - fake_data_t_span=None if solver_type == "DPMSolver" else 32, - energy_guided_policy=dict( - batch_size=256, - ), - critic=dict( - stop_training_iterations=500000, - learning_rate=3e-4, - discount_factor=0.99, - update_momentum=0.995, - ), - energy_guidance=dict( - iterations=600000, - learning_rate=3e-4, - ), - evaluation=dict( - evaluation_interval=10000, - guidance_scale=[0.0, 1.0, 2.0, 3.0, 5.0, 8.0, 10.0], - ), - ), - ), - deploy=dict( - device=device, - env=dict( - env_id="pen-cloned-v1", - seed=0, - ), - num_deploy_steps=1000, - ), -) diff --git a/grl_pipelines/diffusion_model/configurations/pen_human_v1_qgpo.py b/grl_pipelines/diffusion_model/configurations/pen_human_v1_qgpo.py deleted file mode 100644 index 68d02db..0000000 --- a/grl_pipelines/diffusion_model/configurations/pen_human_v1_qgpo.py +++ /dev/null @@ -1,159 +0,0 @@ -import torch -from easydict import EasyDict - -action_size = 24 -state_size = 45 -device = torch.device("cuda:0") if torch.cuda.is_available() else torch.device("cpu") -t_embedding_dim = 32 -t_encoder = dict( - type="GaussianFourierProjectionTimeEncoder", - args=dict( - embed_dim=t_embedding_dim, - scale=30.0, - ), -) -solver_type = "DPMSolver" -config = EasyDict( - train=dict( - project="pen-human-v1-qgpo", - simulator=dict( - type="GymEnvSimulator", - args=dict( - env_id="pen-human-v1", - ), - ), - dataset=dict( - type="D4RLDataset", - args=dict( - env_id="pen-human-v1", - device=device, - ), - ), - model=dict( - QGPOPolicy=dict( - device=device, - critic=dict( - device=device, - q_alpha=1.0, - DoubleQNetwork=dict( - backbone=dict( - type="ConcatenateMLP", - args=dict( - hidden_sizes=[action_size + state_size, 256, 256], - output_size=1, - activation="relu", - ), - ), - ), - ), - diffusion_model=dict( - device=device, - x_size=action_size, - alpha=1.0, - solver=( - dict( - type="DPMSolver", - args=dict( - order=2, - device=device, - steps=17, - ), - ) - if solver_type == "DPMSolver" - else ( - dict( - type="ODESolver", - args=dict( - library="torchdyn", - ), - ) - if solver_type == "ODESolver" - else dict( - type="SDESolver", - args=dict( - library="torchsde", - ), - ) - ) - ), - path=dict( - type="linear_vp_sde", - beta_0=0.1, - beta_1=20.0, - ), - reverse_path=dict( - type="linear_vp_sde", - beta_0=0.1, - beta_1=20.0, - ), - model=dict( - type="noise_function", - args=dict( - t_encoder=t_encoder, - backbone=dict( - type="TemporalSpatialResidualNet", - args=dict( - hidden_sizes=[512, 256, 128], - output_dim=action_size, - t_dim=t_embedding_dim, - condition_dim=state_size, - condition_hidden_dim=32, - t_condition_hidden_dim=128, - ), - ), - ), - ), - energy_guidance=dict( - t_encoder=t_encoder, - backbone=dict( - type="ConcatenateMLP", - args=dict( - hidden_sizes=[ - action_size + state_size + t_embedding_dim, - 256, - 256, - ], - output_size=1, - activation="silu", - ), - ), - ), - ), - ) - ), - parameter=dict( - behaviour_policy=dict( - batch_size=4096, - learning_rate=1e-4, - iterations=600000, - ), - sample_per_state=16, - fake_data_t_span=None if solver_type == "DPMSolver" else 32, - energy_guided_policy=dict( - batch_size=256, - ), - critic=dict( - stop_training_iterations=500000, - learning_rate=3e-4, - discount_factor=0.99, - update_momentum=0.995, - ), - energy_guidance=dict( - iterations=600000, - learning_rate=3e-4, - ), - evaluation=dict( - evaluation_interval=10000, - guidance_scale=[0.0, 1.0, 2.0, 3.0, 5.0, 8.0, 10.0], - ), - ), - ), - deploy=dict( - device=device, - env=dict( - env_id="pen-human-v1", - seed=0, - ), - num_deploy_steps=1000, - ), -)