Skip to content

Commit

Permalink
Fix missing seed / options args in subprocvecenv env resets due to ep…
Browse files Browse the repository at this point in the history
…isode termination
  • Loading branch information
npit committed Jan 11, 2024
1 parent 7996d3d commit 3e09f4d
Showing 1 changed file with 5 additions and 4 deletions.
9 changes: 5 additions & 4 deletions stable_baselines3/common/vec_env/subproc_vec_env.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,14 +32,15 @@ def _worker(
try:
cmd, data = remote.recv()
if cmd == "step":
observation, reward, terminated, truncated, info = env.step(data)
action, seed, options = data
observation, reward, terminated, truncated, info = env.step(action)
# convert to SB3 VecEnv api
done = terminated or truncated
info["TimeLimit.truncated"] = truncated and not terminated
if done:
# save final observation where user can get it, then reset
info["terminal_observation"] = observation
observation, reset_info = env.reset()
observation, reset_info = env.reset(seed=seed, options=options)
remote.send((observation, reward, done, info, reset_info))
elif cmd == "reset":
maybe_options = {"options": data[1]} if data[1] else {}
Expand Down Expand Up @@ -121,8 +122,8 @@ def __init__(self, env_fns: List[Callable[[], gym.Env]], start_method: Optional[
super().__init__(len(env_fns), observation_space, action_space)

def step_async(self, actions: np.ndarray) -> None:
for remote, action in zip(self.remotes, actions):
remote.send(("step", action))
for remote, action, seed, option in zip(self.remotes, actions, self._seeds, self._options):
remote.send(("step", (action, seed, option)))
self.waiting = True

def step_wait(self) -> VecEnvStepReturn:
Expand Down

0 comments on commit 3e09f4d

Please sign in to comment.