Update after conversation on gh

Eclectic-Sheep · Nov 24, 2023 · 8fc25ee · 8fc25ee
1 parent 428e875
commit 8fc25ee
Show file tree

Hide file tree

Showing 6 changed files with 27 additions and 23 deletions.
diff --git a/examples/observation_space.py b/examples/observation_space.py
@@ -1,8 +1,9 @@
 import gymnasium as gym
 import hydra
-from omegaconf import DictConfig
+from omegaconf import DictConfig, OmegaConf
 
 from sheeprl.utils.env import make_env
+from sheeprl.utils.utils import dotdict
 
 
 @hydra.main(version_base="1.3", config_path="../sheeprl/configs", config_name="env_config")
@@ -23,6 +24,7 @@ def main(cfg: DictConfig) -> None:
         "droq",
         "ppo_recurrent",
     }:
+        cfg = dotdict(OmegaConf.to_container(cfg, resolve=True))
         env: gym.Env = make_env(cfg, cfg.seed, 0)()
     else:
         raise ValueError(

diff --git a/howto/select_observations.md b/howto/select_observations.md
@@ -19,10 +19,10 @@ The algorithms that can work with both image and vector observations are specifi
 * Plan2Explore (Dreamer-V3)
 
 To run one of these algorithms, it is necessary to specify which observations to use: it is possible to select all the vector observations or only some of them or none of them. Moreover, you can select all/some/none of the image observations.
-You just need to pass the `mlp_keys` and `cnn_keys` of the encoder and the decoder to the script to select the vector observations and the image observations, respectively.
+You just need to pass the  `algo.mlp_keys` and  `algo.cnn_keys` of the encoder and the decoder to the script to select the vector observations and the image observations, respectively.
 > **Note**
 >
-> The `mlp_keys` and the `cnn_keys` specified for the encoder are used by default as `mlp_keys` and `cnn_keys` of the decoder, respectively.
+> The  `algo.mlp_keys` and the  `algo.cnn_keys` specified for the encoder are used by default as  `algo.mlp_keys` and  `algo.cnn_keys` of the decoder, respectively.
 
 > **Recommended**
 >
@@ -35,34 +35,34 @@ diambra run python sheeprl.py exp=ppo env=diambra env.id=doapp env.num_envs=1 al
 
 > **Note**
 >
-> By default the `mlp_keys` and `cnn_keys` arguments are set to `[]` (empty list), so no observations are selected for the training. This might raise an exception: the agent tries to automatically set the *mlp* or *cnn* keys, but it is not always possible, so it is **strongly recommended to properly set them**.
+> By default the  `algo.mlp_keys` and  `algo.cnn_keys` arguments are set to `[]` (empty list), so no observations are selected for the training. This will raise an exception: if fact, **every algorithm must specify at least one of them**.
 
 It is important to know the observations the environment provides, for instance, the *DIAMBRA* environments provide both vector observations and image observations, whereas all the atari environments provide only the image observations. 
 > **Note**
 >
-> For some environments provided by Gymnasium, e.g. `LunarLander-v2` or `CartPole-v1`, only vector observations are returned, but it is possible to extract the image observation from the render. To do this, it is sufficient to specify the `rgb` key to the `cnn_keys` args:
+> For some environments provided by Gymnasium, e.g. `LunarLander-v2` or `CartPole-v1`, only vector observations are returned, but it is possible to extract the image observation from the render. To do this, it is sufficient to specify the `rgb` key to the  `algo.cnn_keys` args:
 > `python sheeprl.py exp=... algo.cnn_keys.encoder=[rgb]`
 
 #### Frame Stack
-For image observations, it is possible to stack the last $n$ observations with the argument `frame_stack`. All the observations specified in the `cnn_keys` argument are stacked.
+For image observations, it is possible to stack the last $n$ observations with the argument `frame_stack`. All the observations specified in the  `algo.cnn_keys` argument are stacked.
 
 ```bash
 python sheeprl.py exp=... env=dmc algo.cnn_keys.encoder=[rgb] env.frame_stack=3
 ```
 
 #### How to choose the correct keys
-When the environment provides both the vector and image observations, you just need to specify which observations you want to use with the `mlp_keys` and `cnn_keys`, respectively.
+When the environment provides both the vector and image observations, you just need to specify which observations you want to use with the  `algo.mlp_keys` and  `algo.cnn_keys`, respectively.
 
 Instead, for those environments that natively do not support both types of observations, we provide a method to obtain the **image observations from the vector observations (NOT VICE VERSA)**. It means that if you choose an environment with only vector observations, you can get also the image observations, but if you choose an environment with only image observations, you **cannot** get the vector observations.
 
 There can be three possible scenarios:
-1. You do **not** want to **use** the **image** observations: you don't have to specify any `cnn_keys` while you have to select the `mlp_keys`:
-   1. if the environment provides more than one vector observation, then you have to choose between them;
-   2. if the environment provides only one vector observation, you can choose the name of the *mlp key* or use the default one (`state`, used when you do not specify any *mlp keys*).
-2. You want to **use only** the **image** observation: you don't have to specify any `mlp_keys` while **you must specify the name of the *cnn key*** (if the image observation has to be created from the vector one, the `make_env` function will automatically bind the observation with the specified key, otherwise you must choose a valid one).
-3. You want to **use both** the **vector** and **image** observations: You must specify the *cnn key* (as point 2). Instead, for the vector observations, you have two possibilities:
-   1. if the environment provides more than one vector observation, then you **must choose between them**; 
-   2. if the environment provides only one vector observation, you **must specify** the default vector observation key, i.e., **`state`**.
+1. You do **not** want to **use** the **image** observations: you don't have to specify any  `algo.cnn_keys` while you have to select the  `algo.mlp_keys`:
+   1. if the environment provides more than one vector observation, then you **must choose between them**;
+   2. if the environment provides only one vector observation, you can choose the name of the *mlp key*.
+2. You want to **use only** the **image** observation: you don't have to specify any  `algo.mlp_keys` while **you must specify the name of the *cnn key*** (if the image observation has to be created from the vector one, the `make_env` function will automatically bind the observation with the specified key, otherwise you must choose a valid one).
+3. You want to **use both** the **vector** and **image** observations: you must specify the *cnn key* (as point 2). Instead, for the vector observations, you have two possibilities:
+   1. if the environment provides more than one vector observation, then you **must choose between them**;
+   2. if the environment provides only one vector observation, you can choose the name of the *mlp key*.
 
 #### Different observations for the Encoder and the Decoder
 You can specify different observations for the encoder and the decoder, but there are some constraints:

diff --git a/sheeprl/algos/p2e_dv1/p2e_dv1_finetuning.py b/sheeprl/algos/p2e_dv1/p2e_dv1_finetuning.py
@@ -70,7 +70,7 @@ def main(fabric: Fabric, cfg: Dict[str, Any], exploration_cfg: Dict[str, Any]):
         cfg.env.num_envs = exploration_cfg.env.num_envs
     # There must be the same cnn and mlp keys during exploration and finetuning
     cfg.algo.cnn_keys = exploration_cfg.algo.cnn_keys
-    cfg.mlp_keys = exploration_cfg.mlp_keys
+    cfg.algo.mlp_keys = exploration_cfg.algo.mlp_keys
 
     # These arguments cannot be changed
     cfg.env.screen_size = 64

diff --git a/sheeprl/algos/p2e_dv2/p2e_dv2_finetuning.py b/sheeprl/algos/p2e_dv2/p2e_dv2_finetuning.py
@@ -74,7 +74,7 @@ def main(fabric: Fabric, cfg: Dict[str, Any], exploration_cfg: Dict[str, Any]):
         cfg.env.num_envs = exploration_cfg.env.num_envs
     # There must be the same cnn and mlp keys during exploration and finetuning
     cfg.algo.cnn_keys = exploration_cfg.algo.cnn_keys
-    cfg.mlp_keys = exploration_cfg.mlp_keys
+    cfg.algo.mlp_keys = exploration_cfg.algo.mlp_keys
 
     # These arguments cannot be changed
     cfg.env.screen_size = 64

diff --git a/sheeprl/algos/p2e_dv3/p2e_dv3_finetuning.py b/sheeprl/algos/p2e_dv3/p2e_dv3_finetuning.py
@@ -69,7 +69,7 @@ def main(fabric: Fabric, cfg: Dict[str, Any], exploration_cfg: Dict[str, Any]):
         cfg.env.num_envs = exploration_cfg.env.num_envs
     # There must be the same cnn and mlp keys during exploration and finetuning
     cfg.algo.cnn_keys = exploration_cfg.algo.cnn_keys
-    cfg.mlp_keys = exploration_cfg.mlp_keys
+    cfg.algo.mlp_keys = exploration_cfg.algo.mlp_keys
 
     # These arguments cannot be changed
     cfg.env.frame_stack = 1

diff --git a/sheeprl/configs/env_config.yaml b/sheeprl/configs/env_config.yaml
@@ -14,9 +14,11 @@ exp_name: "default"
 root_dir: $env_logs
 run_name: ${env.id}
 agent: ???
-cnn_keys:
-  encoder: []
-  decoder: ${algo.cnn_keys.encoder}
-mlp_keys:
-  encoder: []
-  decoder: ${algo.mlp_keys.encoder}
+
+algo:
+  cnn_keys:
+    encoder: []
+    decoder: ${algo.cnn_keys.encoder}
+  mlp_keys:
+    encoder: []
+    decoder: ${algo.mlp_keys.encoder}