Eclectic-Sheep · belerico · Sep 18, 2023 · Sep 16, 2023
@@ -9,9 +9,8 @@ max_episode_steps: 1000
 env:
   _target_: sheeprl.envs.dmc.DMCWrapper
   id: ${env.id}
-  height: ${env.screen_size}
   width: ${env.screen_size}
-  frame_skip: ${env.action_repeat}
+  height: ${env.screen_size}
   seed: null
   from_pixels: True
   from_vectors: False
@@ -5,15 +5,15 @@
 if not _IS_DMC_AVAILABLE:
     raise ModuleNotFoundError(_IS_DMC_AVAILABLE)
 
-from typing import Any, Dict, Optional, Tuple
+from typing import Any, Dict, Optional, SupportsFloat, Tuple, Union
 
 import numpy as np
 from dm_control import suite
 from dm_env import specs
 from gymnasium import core, spaces
 
 
-def _spec_to_box(spec, dtype) -> spaces.Space:
+def _spec_to_box(spec, dtype) -> spaces.Box:
     def extract_min_max(s):
         assert s.dtype == np.float64 or s.dtype == np.float32
         dim = int(np.prod(s.shape))
@@ -54,7 +54,6 @@ def __init__(
         height: int = 84,
         width: int = 84,
         camera_id: int = 0,
-        frame_skip: int = 1,
         task_kwargs: Optional[Dict[Any, Any]] = None,
         environment_kwargs: Optional[Dict[Any, Any]] = None,
         channels_first: bool = True,
@@ -93,9 +92,6 @@ def __init__(
                 Defaults to 84.
             camera_id (int, optional): the id of the camera from where to take the image observation.
                 Defaults to 0.
-            frame_skip (int, optional): action repeat value. Given an action, `frame_skip` steps will be performed
-                in the environment with the given action.
-                Defaults to 1.
             task_kwargs (Optional[Dict[Any, Any]], optional): Optional dict of keyword arguments for the task.
                 Defaults to None.
             environment_kwargs (Optional[Dict[Any, Any]], optional): Optional dict specifying
@@ -121,7 +117,6 @@ def __init__(
         self._height = height
         self._width = width
         self._camera_id = camera_id
-        self._frame_skip = frame_skip
         self._channels_first = channels_first
 
         # create task
@@ -137,6 +132,10 @@ def __init__(
         self._true_action_space = _spec_to_box([self._env.action_spec()], np.float32)
         self._norm_action_space = spaces.Box(low=-1.0, high=1.0, shape=self._true_action_space.shape, dtype=np.float32)
 
+        # set the reward range
+        reward_space = _spec_to_box([self._env.reward_spec()], np.float32)
+        self._reward_range = (reward_space.low.item(), reward_space.high.item())
+
         # create observation space
         if from_pixels:
             shape = (3, height, width) if channels_first else (height, width, 3)
@@ -163,7 +162,7 @@ def __init__(
     def __getattr__(self, name):
         return getattr(self._env, name)
 
-    def _get_obs(self, time_step):
+    def _get_obs(self, time_step) -> Union[Dict[str, np.ndarray], np.ndarray]:
         if self._from_pixels:
             rgb_obs = self.render(camera_id=self._camera_id)
             if self._channels_first:
@@ -177,7 +176,7 @@ def _get_obs(self, time_step):
         else:
             return rgb_obs
 
-    def _convert_action(self, action):
+    def _convert_action(self, action) -> np.ndarray:
         action = action.astype(np.float64)
         true_delta = self._true_action_space.high - self._true_action_space.low
         norm_delta = self._norm_action_space.high - self._norm_action_space.low
@@ -187,20 +186,20 @@ def _convert_action(self, action):
         return action
 
     @property
-    def observation_space(self):
+    def observation_space(self) -> Union[spaces.Dict, spaces.Box]:
         return self._observation_space
 
     @property
-    def state_space(self):
+    def state_space(self) -> spaces.Box:
         return self._state_space
 
     @property
-    def action_space(self):
+    def action_space(self) -> spaces.Box:
         return self._norm_action_space
 
     @property
-    def reward_range(self):
-        return 0, self._frame_skip
+    def reward_range(self) -> Tuple[float, float]:
+        return self._reward_range
 
     @property
     def render_mode(self) -> str:
@@ -211,33 +210,31 @@ def seed(self, seed: Optional[int] = None):
         self._norm_action_space.seed(seed)
         self._observation_space.seed(seed)
 
-    def step(self, action):
+    def step(
+        self, action: Any
+    ) -> Tuple[Union[Dict[str, np.ndarray], np.ndarray], SupportsFloat, bool, bool, Dict[str, Any]]:
         assert self._norm_action_space.contains(action)
         action = self._convert_action(action)
         assert self._true_action_space.contains(action)
-        reward = 0
-        extra = {"internal_state": self._env.physics.get_state().copy()}
-
-        for _ in range(self._frame_skip):
-            time_step = self._env.step(action)
-            reward += time_step.reward or 0
-            done = time_step.last()
-            if done:
-                break
+        time_step = self._env.step(action)
+        reward = time_step.reward or 0.0
+        done = time_step.last()
         obs = self._get_obs(time_step)
         self.current_state = _flatten_obs(time_step.observation)
+        extra = {}
         extra["discount"] = time_step.discount
+        extra["internal_state"] = self._env.physics.get_state().copy()
         return obs, reward, done, False, extra
 
     def reset(
         self, seed: Optional[int] = None, options: Optional[Dict[str, Any]] = None
-    ) -> Tuple[np.ndarray, Dict[str, Any]]:
+    ) -> Tuple[Union[Dict[str, np.ndarray], np.ndarray], Dict[str, Any]]:
         time_step = self._env.reset()
         self.current_state = _flatten_obs(time_step.observation)
         obs = self._get_obs(time_step)
         return obs, {}
 
-    def render(self, camera_id: Optional[int] = None):
+    def render(self, camera_id: Optional[int] = None) -> np.ndarray:
         return self._env.physics.render(height=self._height, width=self._width, camera_id=camera_id or self._camera_id)
 
     def close(self):

@@ -14,7 +14,7 @@
 if _IS_DIAMBRA_ARENA_AVAILABLE and _IS_DIAMBRA_AVAILABLE:
     from sheeprl.envs.diambra_wrapper import DiambraWrapper
 if _IS_DMC_AVAILABLE:
-    from sheeprl.envs.dmc import DMCWrapper
+    pass
 
 
 def make_env(
@@ -86,14 +86,11 @@ def thunk() -> gym.Env:
         if "rank" in cfg.env.env:
             instantiate_kwargs["rank"] = rank + vector_env_idx
         env = hydra.utils.instantiate(cfg.env.env, **instantiate_kwargs)
-        if "mujoco" in env_spec:
-            env.frame_skip = 0
 
         # action repeat
         if (
             cfg.env.action_repeat > 1
             and "atari" not in env_spec
-            and (not _IS_DMC_AVAILABLE or not isinstance(env, DMCWrapper))
             and (not (_IS_DIAMBRA_ARENA_AVAILABLE and _IS_DIAMBRA_AVAILABLE) or not isinstance(env, DiambraWrapper))
         ):
             env = ActionRepeat(env, cfg.env.action_repeat)
@@ -154,41 +151,38 @@ def thunk() -> gym.Env:
 
         def transform_obs(obs: Dict[str, Any]):
             for k in cnn_keys:
-                shape = obs[k].shape
+                current_obs = obs[k]
+                shape = current_obs.shape
                 is_3d = len(shape) == 3
                 is_grayscale = not is_3d or shape[0] == 1 or shape[-1] == 1
                 channel_first = not is_3d or shape[0] in (1, 3)
 
                 # to 3D image
                 if not is_3d:
-                    obs.update({k: np.expand_dims(obs[k], axis=0)})
+                    current_obs = np.expand_dims(current_obs, axis=0)
 
                 # channel last (opencv needs it)
                 if channel_first:
-                    obs.update({k: obs[k].transpose(1, 2, 0)})
+                    current_obs = np.transpose(current_obs, (1, 2, 0))
 
                 # resize
-                if obs[k].shape[:-1] != (cfg.env.screen_size, cfg.env.screen_size):
-                    obs.update(
-                        {
-                            k: cv2.resize(
-                                obs[k], (cfg.env.screen_size, cfg.env.screen_size), interpolation=cv2.INTER_AREA
-                            )
-                        }
+                if current_obs.shape[:-1] != (cfg.env.screen_size, cfg.env.screen_size):
+                    current_obs = cv2.resize(
+                        current_obs, (cfg.env.screen_size, cfg.env.screen_size), interpolation=cv2.INTER_AREA
                     )
 
                 # to grayscale
                 if cfg.env.grayscale and not is_grayscale:
-                    obs.update({k: cv2.cvtColor(obs[k], cv2.COLOR_RGB2GRAY)})
+                    current_obs = cv2.cvtColor(current_obs, cv2.COLOR_RGB2GRAY)
 
                 # back to 3D
-                if len(obs[k].shape) == 2:
-                    obs.update({k: np.expand_dims(obs[k], axis=-1)})
+                if len(current_obs.shape) == 2:
+                    current_obs = np.expand_dims(current_obs, axis=-1)
                     if not cfg.env.grayscale:
-                        obs.update({k: np.repeat(obs[k], 3, axis=-1)})
+                        current_obs = np.repeat(current_obs, 3, axis=-1)
 
                 # channel first (PyTorch default)
-                obs.update({k: obs[k].transpose(2, 0, 1)})
+                obs[k] = current_obs.transpose(2, 0, 1)
 
             return obs