diff --git a/examples/observation_space.py b/examples/observation_space.py index 9a96b7a2..ae070fcc 100644 --- a/examples/observation_space.py +++ b/examples/observation_space.py @@ -1,8 +1,9 @@ import gymnasium as gym import hydra -from omegaconf import DictConfig +from omegaconf import DictConfig, OmegaConf from sheeprl.utils.env import make_env +from sheeprl.utils.utils import dotdict @hydra.main(version_base="1.3", config_path="../sheeprl/configs", config_name="env_config") @@ -23,6 +24,7 @@ def main(cfg: DictConfig) -> None: "droq", "ppo_recurrent", }: + cfg = dotdict(OmegaConf.to_container(cfg, resolve=True)) env: gym.Env = make_env(cfg, cfg.seed, 0)() else: raise ValueError( diff --git a/howto/select_observations.md b/howto/select_observations.md index 69c24ef8..6eb2a5b7 100644 --- a/howto/select_observations.md +++ b/howto/select_observations.md @@ -19,10 +19,10 @@ The algorithms that can work with both image and vector observations are specifi * Plan2Explore (Dreamer-V3) To run one of these algorithms, it is necessary to specify which observations to use: it is possible to select all the vector observations or only some of them or none of them. Moreover, you can select all/some/none of the image observations. -You just need to pass the `mlp_keys` and `cnn_keys` of the encoder and the decoder to the script to select the vector observations and the image observations, respectively. +You just need to pass the `algo.mlp_keys` and `algo.cnn_keys` of the encoder and the decoder to the script to select the vector observations and the image observations, respectively. > **Note** > -> The `mlp_keys` and the `cnn_keys` specified for the encoder are used by default as `mlp_keys` and `cnn_keys` of the decoder, respectively. +> The `algo.mlp_keys` and the `algo.cnn_keys` specified for the encoder are used by default as `algo.mlp_keys` and `algo.cnn_keys` of the decoder, respectively. > **Recommended** > @@ -35,34 +35,34 @@ diambra run python sheeprl.py exp=ppo env=diambra env.id=doapp env.num_envs=1 al > **Note** > -> By default the `mlp_keys` and `cnn_keys` arguments are set to `[]` (empty list), so no observations are selected for the training. This might raise an exception: the agent tries to automatically set the *mlp* or *cnn* keys, but it is not always possible, so it is **strongly recommended to properly set them**. +> By default the `algo.mlp_keys` and `algo.cnn_keys` arguments are set to `[]` (empty list), so no observations are selected for the training. This will raise an exception: if fact, **every algorithm must specify at least one of them**. It is important to know the observations the environment provides, for instance, the *DIAMBRA* environments provide both vector observations and image observations, whereas all the atari environments provide only the image observations. > **Note** > -> For some environments provided by Gymnasium, e.g. `LunarLander-v2` or `CartPole-v1`, only vector observations are returned, but it is possible to extract the image observation from the render. To do this, it is sufficient to specify the `rgb` key to the `cnn_keys` args: +> For some environments provided by Gymnasium, e.g. `LunarLander-v2` or `CartPole-v1`, only vector observations are returned, but it is possible to extract the image observation from the render. To do this, it is sufficient to specify the `rgb` key to the `algo.cnn_keys` args: > `python sheeprl.py exp=... algo.cnn_keys.encoder=[rgb]` #### Frame Stack -For image observations, it is possible to stack the last $n$ observations with the argument `frame_stack`. All the observations specified in the `cnn_keys` argument are stacked. +For image observations, it is possible to stack the last $n$ observations with the argument `frame_stack`. All the observations specified in the `algo.cnn_keys` argument are stacked. ```bash python sheeprl.py exp=... env=dmc algo.cnn_keys.encoder=[rgb] env.frame_stack=3 ``` #### How to choose the correct keys -When the environment provides both the vector and image observations, you just need to specify which observations you want to use with the `mlp_keys` and `cnn_keys`, respectively. +When the environment provides both the vector and image observations, you just need to specify which observations you want to use with the `algo.mlp_keys` and `algo.cnn_keys`, respectively. Instead, for those environments that natively do not support both types of observations, we provide a method to obtain the **image observations from the vector observations (NOT VICE VERSA)**. It means that if you choose an environment with only vector observations, you can get also the image observations, but if you choose an environment with only image observations, you **cannot** get the vector observations. There can be three possible scenarios: -1. You do **not** want to **use** the **image** observations: you don't have to specify any `cnn_keys` while you have to select the `mlp_keys`: - 1. if the environment provides more than one vector observation, then you have to choose between them; - 2. if the environment provides only one vector observation, you can choose the name of the *mlp key* or use the default one (`state`, used when you do not specify any *mlp keys*). -2. You want to **use only** the **image** observation: you don't have to specify any `mlp_keys` while **you must specify the name of the *cnn key*** (if the image observation has to be created from the vector one, the `make_env` function will automatically bind the observation with the specified key, otherwise you must choose a valid one). -3. You want to **use both** the **vector** and **image** observations: You must specify the *cnn key* (as point 2). Instead, for the vector observations, you have two possibilities: - 1. if the environment provides more than one vector observation, then you **must choose between them**; - 2. if the environment provides only one vector observation, you **must specify** the default vector observation key, i.e., **`state`**. +1. You do **not** want to **use** the **image** observations: you don't have to specify any `algo.cnn_keys` while you have to select the `algo.mlp_keys`: + 1. if the environment provides more than one vector observation, then you **must choose between them**; + 2. if the environment provides only one vector observation, you can choose the name of the *mlp key*. +2. You want to **use only** the **image** observation: you don't have to specify any `algo.mlp_keys` while **you must specify the name of the *cnn key*** (if the image observation has to be created from the vector one, the `make_env` function will automatically bind the observation with the specified key, otherwise you must choose a valid one). +3. You want to **use both** the **vector** and **image** observations: you must specify the *cnn key* (as point 2). Instead, for the vector observations, you have two possibilities: + 1. if the environment provides more than one vector observation, then you **must choose between them**; + 2. if the environment provides only one vector observation, you can choose the name of the *mlp key*. #### Different observations for the Encoder and the Decoder You can specify different observations for the encoder and the decoder, but there are some constraints: diff --git a/sheeprl/algos/p2e_dv1/p2e_dv1_finetuning.py b/sheeprl/algos/p2e_dv1/p2e_dv1_finetuning.py index b595789e..109f9c83 100644 --- a/sheeprl/algos/p2e_dv1/p2e_dv1_finetuning.py +++ b/sheeprl/algos/p2e_dv1/p2e_dv1_finetuning.py @@ -70,7 +70,7 @@ def main(fabric: Fabric, cfg: Dict[str, Any], exploration_cfg: Dict[str, Any]): cfg.env.num_envs = exploration_cfg.env.num_envs # There must be the same cnn and mlp keys during exploration and finetuning cfg.algo.cnn_keys = exploration_cfg.algo.cnn_keys - cfg.mlp_keys = exploration_cfg.mlp_keys + cfg.algo.mlp_keys = exploration_cfg.algo.mlp_keys # These arguments cannot be changed cfg.env.screen_size = 64 diff --git a/sheeprl/algos/p2e_dv2/p2e_dv2_finetuning.py b/sheeprl/algos/p2e_dv2/p2e_dv2_finetuning.py index 72aff80f..54d5fec1 100644 --- a/sheeprl/algos/p2e_dv2/p2e_dv2_finetuning.py +++ b/sheeprl/algos/p2e_dv2/p2e_dv2_finetuning.py @@ -74,7 +74,7 @@ def main(fabric: Fabric, cfg: Dict[str, Any], exploration_cfg: Dict[str, Any]): cfg.env.num_envs = exploration_cfg.env.num_envs # There must be the same cnn and mlp keys during exploration and finetuning cfg.algo.cnn_keys = exploration_cfg.algo.cnn_keys - cfg.mlp_keys = exploration_cfg.mlp_keys + cfg.algo.mlp_keys = exploration_cfg.algo.mlp_keys # These arguments cannot be changed cfg.env.screen_size = 64 diff --git a/sheeprl/algos/p2e_dv3/p2e_dv3_finetuning.py b/sheeprl/algos/p2e_dv3/p2e_dv3_finetuning.py index 633556de..83debfe3 100644 --- a/sheeprl/algos/p2e_dv3/p2e_dv3_finetuning.py +++ b/sheeprl/algos/p2e_dv3/p2e_dv3_finetuning.py @@ -69,7 +69,7 @@ def main(fabric: Fabric, cfg: Dict[str, Any], exploration_cfg: Dict[str, Any]): cfg.env.num_envs = exploration_cfg.env.num_envs # There must be the same cnn and mlp keys during exploration and finetuning cfg.algo.cnn_keys = exploration_cfg.algo.cnn_keys - cfg.mlp_keys = exploration_cfg.mlp_keys + cfg.algo.mlp_keys = exploration_cfg.algo.mlp_keys # These arguments cannot be changed cfg.env.frame_stack = 1 diff --git a/sheeprl/configs/env_config.yaml b/sheeprl/configs/env_config.yaml index d36ba66b..8f17821b 100644 --- a/sheeprl/configs/env_config.yaml +++ b/sheeprl/configs/env_config.yaml @@ -14,9 +14,11 @@ exp_name: "default" root_dir: $env_logs run_name: ${env.id} agent: ??? -cnn_keys: - encoder: [] - decoder: ${algo.cnn_keys.encoder} -mlp_keys: - encoder: [] - decoder: ${algo.mlp_keys.encoder} + +algo: + cnn_keys: + encoder: [] + decoder: ${algo.cnn_keys.encoder} + mlp_keys: + encoder: [] + decoder: ${algo.mlp_keys.encoder}