From cfbace58c1c8650f34546078829764f238083b9b Mon Sep 17 00:00:00 2001 From: Ethan Brooks Date: Wed, 8 Nov 2017 15:12:45 -0500 Subject: [PATCH 01/10] all envs run offscreen --- gym/envs/mujoco/ant.py | 8 ++-- gym/envs/mujoco/half_cheetah.py | 8 ++-- gym/envs/mujoco/hopper.py | 8 ++-- gym/envs/mujoco/humanoid.py | 16 ++++---- gym/envs/mujoco/humanoidstandup.py | 11 ++---- gym/envs/mujoco/inverted_double_pendulum.py | 16 ++++---- gym/envs/mujoco/inverted_pendulum.py | 2 +- gym/envs/mujoco/mujoco_env.py | 41 ++++++++------------- gym/envs/mujoco/pusher.py | 5 +-- gym/envs/mujoco/reacher.py | 6 +-- gym/envs/mujoco/striker.py | 4 +- gym/envs/mujoco/swimmer.py | 8 ++-- gym/envs/mujoco/thrower.py | 4 +- gym/envs/mujoco/walker2d.py | 8 ++-- 14 files changed, 65 insertions(+), 80 deletions(-) diff --git a/gym/envs/mujoco/ant.py b/gym/envs/mujoco/ant.py index 57947912582..09ac3cc44af 100644 --- a/gym/envs/mujoco/ant.py +++ b/gym/envs/mujoco/ant.py @@ -14,7 +14,7 @@ def _step(self, a): forward_reward = (xposafter - xposbefore)/self.dt ctrl_cost = .5 * np.square(a).sum() contact_cost = 0.5 * 1e-3 * np.sum( - np.square(np.clip(self.model.data.cfrc_ext, -1, 1))) + np.square(np.clip(self.sim.data.cfrc_ext, -1, 1))) survive_reward = 1.0 reward = forward_reward - ctrl_cost - contact_cost + survive_reward state = self.state_vector() @@ -30,9 +30,9 @@ def _step(self, a): def _get_obs(self): return np.concatenate([ - self.model.data.qpos.flat[2:], - self.model.data.qvel.flat, - np.clip(self.model.data.cfrc_ext, -1, 1).flat, + self.sim.data.qpos.flat[2:], + self.sim.data.qvel.flat, + np.clip(self.sim.data.cfrc_ext, -1, 1).flat, ]) def reset_model(self): diff --git a/gym/envs/mujoco/half_cheetah.py b/gym/envs/mujoco/half_cheetah.py index 9a49e0a32e0..c2b6ba2209c 100644 --- a/gym/envs/mujoco/half_cheetah.py +++ b/gym/envs/mujoco/half_cheetah.py @@ -8,9 +8,9 @@ def __init__(self): utils.EzPickle.__init__(self) def _step(self, action): - xposbefore = self.model.data.qpos[0, 0] + xposbefore = self.sim.data.qpos[0] self.do_simulation(action, self.frame_skip) - xposafter = self.model.data.qpos[0, 0] + xposafter = self.sim.data.qpos[0] ob = self._get_obs() reward_ctrl = - 0.1 * np.square(action).sum() reward_run = (xposafter - xposbefore)/self.dt @@ -20,8 +20,8 @@ def _step(self, action): def _get_obs(self): return np.concatenate([ - self.model.data.qpos.flat[1:], - self.model.data.qvel.flat, + self.sim.data.qpos.flat[1:], + self.sim.data.qvel.flat, ]) def reset_model(self): diff --git a/gym/envs/mujoco/hopper.py b/gym/envs/mujoco/hopper.py index 2a5a3999fb3..28fb1449b43 100644 --- a/gym/envs/mujoco/hopper.py +++ b/gym/envs/mujoco/hopper.py @@ -8,9 +8,9 @@ def __init__(self): utils.EzPickle.__init__(self) def _step(self, a): - posbefore = self.model.data.qpos[0, 0] + posbefore = self.sim.data.qpos[0] self.do_simulation(a, self.frame_skip) - posafter, height, ang = self.model.data.qpos[0:3, 0] + posafter, height, ang = self.sim.data.qpos[0:3] alive_bonus = 1.0 reward = (posafter - posbefore) / self.dt reward += alive_bonus @@ -23,8 +23,8 @@ def _step(self, a): def _get_obs(self): return np.concatenate([ - self.model.data.qpos.flat[1:], - np.clip(self.model.data.qvel.flat, -10, 10) + self.sim.data.qpos.flat[1:], + np.clip(self.sim.data.qvel.flat, -10, 10) ]) def reset_model(self): diff --git a/gym/envs/mujoco/humanoid.py b/gym/envs/mujoco/humanoid.py index 83bb2c13d6a..7c926b60f0c 100644 --- a/gym/envs/mujoco/humanoid.py +++ b/gym/envs/mujoco/humanoid.py @@ -2,9 +2,9 @@ from gym.envs.mujoco import mujoco_env from gym import utils -def mass_center(model): - mass = model.body_mass - xpos = model.data.xipos +def mass_center(model, sim): + mass = np.expand_dims(model.body_mass, 1) + xpos = sim.data.xipos return (np.sum(mass * xpos, 0) / np.sum(mass))[0] class HumanoidEnv(mujoco_env.MujocoEnv, utils.EzPickle): @@ -13,7 +13,7 @@ def __init__(self): utils.EzPickle.__init__(self) def _get_obs(self): - data = self.model.data + data = self.sim.data return np.concatenate([data.qpos.flat[2:], data.qvel.flat, data.cinert.flat, @@ -22,17 +22,17 @@ def _get_obs(self): data.cfrc_ext.flat]) def _step(self, a): - pos_before = mass_center(self.model) + pos_before = mass_center(self.model, self.sim) self.do_simulation(a, self.frame_skip) - pos_after = mass_center(self.model) + pos_after = mass_center(self.model, self.sim) alive_bonus = 5.0 - data = self.model.data + data = self.sim.data lin_vel_cost = 0.25 * (pos_after - pos_before) / self.model.opt.timestep quad_ctrl_cost = 0.1 * np.square(data.ctrl).sum() quad_impact_cost = .5e-6 * np.square(data.cfrc_ext).sum() quad_impact_cost = min(quad_impact_cost, 10) reward = lin_vel_cost - quad_ctrl_cost - quad_impact_cost + alive_bonus - qpos = self.model.data.qpos + qpos = self.sim.data.qpos done = bool((qpos[2] < 1.0) or (qpos[2] > 2.0)) return self._get_obs(), reward, done, dict(reward_linvel=lin_vel_cost, reward_quadctrl=-quad_ctrl_cost, reward_alive=alive_bonus, reward_impact=-quad_impact_cost) diff --git a/gym/envs/mujoco/humanoidstandup.py b/gym/envs/mujoco/humanoidstandup.py index ebc5fb500af..98b771853eb 100644 --- a/gym/envs/mujoco/humanoidstandup.py +++ b/gym/envs/mujoco/humanoidstandup.py @@ -2,18 +2,13 @@ from gym.envs.mujoco import mujoco_env from gym import utils -def mass_center(model): - mass = model.body_mass - xpos = model.data.xipos - return (np.sum(mass * xpos, 0) / np.sum(mass))[0] - class HumanoidStandupEnv(mujoco_env.MujocoEnv, utils.EzPickle): def __init__(self): mujoco_env.MujocoEnv.__init__(self, 'humanoidstandup.xml', 5) utils.EzPickle.__init__(self) def _get_obs(self): - data = self.model.data + data = self.sim.data return np.concatenate([data.qpos.flat[2:], data.qvel.flat, data.cinert.flat, @@ -23,8 +18,8 @@ def _get_obs(self): def _step(self, a): self.do_simulation(a, self.frame_skip) - pos_after = self.model.data.qpos[2][0] - data = self.model.data + pos_after = self.sim.data.qpos[2] + data = self.sim.data uph_cost = (pos_after - 0) / self.model.opt.timestep quad_ctrl_cost = 0.1 * np.square(data.ctrl).sum() diff --git a/gym/envs/mujoco/inverted_double_pendulum.py b/gym/envs/mujoco/inverted_double_pendulum.py index 0c29659581d..ae9f2e44eed 100644 --- a/gym/envs/mujoco/inverted_double_pendulum.py +++ b/gym/envs/mujoco/inverted_double_pendulum.py @@ -11,22 +11,22 @@ def __init__(self): def _step(self, action): self.do_simulation(action, self.frame_skip) ob = self._get_obs() - x, _, y = self.model.data.site_xpos[0] + x, _, y = self.sim.data.site_xpos[0] dist_penalty = 0.01 * x ** 2 + (y - 2) ** 2 - v1, v2 = self.model.data.qvel[1:3] + v1, v2 = self.sim.data.qvel[1:3] vel_penalty = 1e-3 * v1**2 + 5e-3 * v2**2 alive_bonus = 10 - r = (alive_bonus - dist_penalty - vel_penalty)[0] + r = alive_bonus - dist_penalty - vel_penalty done = bool(y <= 1) return ob, r, done, {} def _get_obs(self): return np.concatenate([ - self.model.data.qpos[:1], # cart x pos - np.sin(self.model.data.qpos[1:]), # link angles - np.cos(self.model.data.qpos[1:]), - np.clip(self.model.data.qvel, -10, 10), - np.clip(self.model.data.qfrc_constraint, -10, 10) + self.sim.data.qpos[:1], # cart x pos + np.sin(self.sim.data.qpos[1:]), # link angles + np.cos(self.sim.data.qpos[1:]), + np.clip(self.sim.data.qvel, -10, 10), + np.clip(self.sim.data.qfrc_constraint, -10, 10) ]).ravel() def reset_model(self): diff --git a/gym/envs/mujoco/inverted_pendulum.py b/gym/envs/mujoco/inverted_pendulum.py index 86a1f27d149..371f156babc 100644 --- a/gym/envs/mujoco/inverted_pendulum.py +++ b/gym/envs/mujoco/inverted_pendulum.py @@ -22,7 +22,7 @@ def reset_model(self): return self._get_obs() def _get_obs(self): - return np.concatenate([self.model.data.qpos, self.model.data.qvel]).ravel() + return np.concatenate([self.sim.data.qpos, self.sim.data.qvel]).ravel() def viewer_setup(self): v = self.viewer diff --git a/gym/envs/mujoco/mujoco_env.py b/gym/envs/mujoco/mujoco_env.py index 32378c3e33d..e00e120571d 100644 --- a/gym/envs/mujoco/mujoco_env.py +++ b/gym/envs/mujoco/mujoco_env.py @@ -9,7 +9,6 @@ try: import mujoco_py - from mujoco_py.mjlib import mjlib except ImportError as e: raise error.DependencyNotInstalled("{}. (HINT: you need to install mujoco_py, and also perform the setup instructions here: https://github.com/openai/mujoco-py/.)".format(e)) @@ -25,8 +24,9 @@ def __init__(self, model_path, frame_skip): if not path.exists(fullpath): raise IOError("File %s does not exist" % fullpath) self.frame_skip = frame_skip - self.model = mujoco_py.MjModel(fullpath) - self.data = self.model.data + self.model = mujoco_py.load_model_from_path(fullpath) + self.sim = mujoco_py.MjSim(self.model) + self.data = self.sim.data self.viewer = None self.metadata = { @@ -34,8 +34,8 @@ def __init__(self, model_path, frame_skip): 'video.frames_per_second': int(np.round(1.0 / self.dt)) } - self.init_qpos = self.model.data.qpos.ravel().copy() - self.init_qvel = self.model.data.qvel.ravel().copy() + self.init_qpos = self.sim.data.qpos.ravel().copy() + self.init_qvel = self.sim.data.qvel.ravel().copy() observation, _reward, done, _info = self._step(np.zeros(self.model.nu)) assert not done self.obs_dim = observation.size @@ -76,28 +76,28 @@ def viewer_setup(self): # ----------------------------- def _reset(self): - mjlib.mj_resetData(self.model.ptr, self.data.ptr) + self.sim.reset() ob = self.reset_model() if self.viewer is not None: - self.viewer.autoscale() self.viewer_setup() return ob def set_state(self, qpos, qvel): assert qpos.shape == (self.model.nq,) and qvel.shape == (self.model.nv,) - self.model.data.qpos = qpos - self.model.data.qvel = qvel - self.model._compute_subtree() # pylint: disable=W0212 - self.model.forward() + old_state = self.sim.get_state() + new_state = mujoco_py.MjSimState(old_state.time, qpos, qvel, + old_state.act, old_state.udd_state) + self.sim.set_state(new_state) + self.sim.forward() @property def dt(self): return self.model.opt.timestep * self.frame_skip def do_simulation(self, ctrl, n_frames): - self.model.data.ctrl = ctrl + self.sim.data.ctrl[:] = ctrl for _ in range(n_frames): - self.model.step() + self.sim.step() def _render(self, mode='human', close=False): if close: @@ -122,19 +122,10 @@ def _get_viewer(self): return self.viewer def get_body_com(self, body_name): - idx = self.model.body_names.index(six.b(body_name)) - return self.model.data.com_subtree[idx] - - def get_body_comvel(self, body_name): - idx = self.model.body_names.index(six.b(body_name)) - return self.model.body_comvels[idx] - - def get_body_xmat(self, body_name): - idx = self.model.body_names.index(six.b(body_name)) - return self.model.data.xmat[idx].reshape((3, 3)) + return self.data.get_body_xpos(body_name) def state_vector(self): return np.concatenate([ - self.model.data.qpos.flat, - self.model.data.qvel.flat + self.sim.data.qpos.flat, + self.sim.data.qvel.flat ]) diff --git a/gym/envs/mujoco/pusher.py b/gym/envs/mujoco/pusher.py index 96bfe9dbd5e..c5bed3db668 100644 --- a/gym/envs/mujoco/pusher.py +++ b/gym/envs/mujoco/pusher.py @@ -3,7 +3,6 @@ from gym.envs.mujoco import mujoco_env import mujoco_py -from mujoco_py.mjlib import mjlib class PusherEnv(mujoco_env.MujocoEnv, utils.EzPickle): def __init__(self): @@ -50,8 +49,8 @@ def reset_model(self): def _get_obs(self): return np.concatenate([ - self.model.data.qpos.flat[:7], - self.model.data.qvel.flat[:7], + self.sim.data.qpos.flat[:7], + self.sim.data.qvel.flat[:7], self.get_body_com("tips_arm"), self.get_body_com("object"), self.get_body_com("goal"), diff --git a/gym/envs/mujoco/reacher.py b/gym/envs/mujoco/reacher.py index 1730db9c30b..e288df5ae02 100644 --- a/gym/envs/mujoco/reacher.py +++ b/gym/envs/mujoco/reacher.py @@ -33,11 +33,11 @@ def reset_model(self): return self._get_obs() def _get_obs(self): - theta = self.model.data.qpos.flat[:2] + theta = self.sim.data.qpos.flat[:2] return np.concatenate([ np.cos(theta), np.sin(theta), - self.model.data.qpos.flat[2:], - self.model.data.qvel.flat[:2], + self.sim.data.qpos.flat[2:], + self.sim.data.qvel.flat[:2], self.get_body_com("fingertip") - self.get_body_com("target") ]) diff --git a/gym/envs/mujoco/striker.py b/gym/envs/mujoco/striker.py index 24a01e3761b..7855d801c77 100644 --- a/gym/envs/mujoco/striker.py +++ b/gym/envs/mujoco/striker.py @@ -67,8 +67,8 @@ def reset_model(self): def _get_obs(self): return np.concatenate([ - self.model.data.qpos.flat[:7], - self.model.data.qvel.flat[:7], + self.sim.data.qpos.flat[:7], + self.sim.data.qvel.flat[:7], self.get_body_com("tips_arm"), self.get_body_com("object"), self.get_body_com("goal"), diff --git a/gym/envs/mujoco/swimmer.py b/gym/envs/mujoco/swimmer.py index b79829e717e..8d351ab79b3 100644 --- a/gym/envs/mujoco/swimmer.py +++ b/gym/envs/mujoco/swimmer.py @@ -9,9 +9,9 @@ def __init__(self): def _step(self, a): ctrl_cost_coeff = 0.0001 - xposbefore = self.model.data.qpos[0, 0] + xposbefore = self.sim.data.qpos[0] self.do_simulation(a, self.frame_skip) - xposafter = self.model.data.qpos[0, 0] + xposafter = self.sim.data.qpos[0] reward_fwd = (xposafter - xposbefore) / self.dt reward_ctrl = - ctrl_cost_coeff * np.square(a).sum() reward = reward_fwd + reward_ctrl @@ -19,8 +19,8 @@ def _step(self, a): return ob, reward, False, dict(reward_fwd=reward_fwd, reward_ctrl=reward_ctrl) def _get_obs(self): - qpos = self.model.data.qpos - qvel = self.model.data.qvel + qpos = self.sim.data.qpos + qvel = self.sim.data.qvel return np.concatenate([qpos.flat[2:], qvel.flat]) def reset_model(self): diff --git a/gym/envs/mujoco/thrower.py b/gym/envs/mujoco/thrower.py index 2627e0a8c94..116e43ca295 100644 --- a/gym/envs/mujoco/thrower.py +++ b/gym/envs/mujoco/thrower.py @@ -52,8 +52,8 @@ def reset_model(self): def _get_obs(self): return np.concatenate([ - self.model.data.qpos.flat[:7], - self.model.data.qvel.flat[:7], + self.sim.data.qpos.flat[:7], + self.sim.data.qvel.flat[:7], self.get_body_com("r_wrist_roll_link"), self.get_body_com("ball"), self.get_body_com("goal"), diff --git a/gym/envs/mujoco/walker2d.py b/gym/envs/mujoco/walker2d.py index 2fa94593306..728a6b18400 100644 --- a/gym/envs/mujoco/walker2d.py +++ b/gym/envs/mujoco/walker2d.py @@ -9,9 +9,9 @@ def __init__(self): utils.EzPickle.__init__(self) def _step(self, a): - posbefore = self.model.data.qpos[0, 0] + posbefore = self.sim.data.qpos[0] self.do_simulation(a, self.frame_skip) - posafter, height, ang = self.model.data.qpos[0:3, 0] + posafter, height, ang = self.sim.data.qpos[0:3] alive_bonus = 1.0 reward = ((posafter - posbefore) / self.dt) reward += alive_bonus @@ -22,8 +22,8 @@ def _step(self, a): return ob, reward, done, {} def _get_obs(self): - qpos = self.model.data.qpos - qvel = self.model.data.qvel + qpos = self.sim.data.qpos + qvel = self.sim.data.qvel return np.concatenate([qpos[1:], np.clip(qvel, -10, 10)]).ravel() def reset_model(self): From 1730538e2e28cf81903d3defdebb0a12a830b4fc Mon Sep 17 00:00:00 2001 From: Ethan Brooks Date: Wed, 8 Nov 2017 15:17:25 -0500 Subject: [PATCH 02/10] render works --- gym/envs/mujoco/mujoco_env.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/gym/envs/mujoco/mujoco_env.py b/gym/envs/mujoco/mujoco_env.py index e00e120571d..e67621c67f5 100644 --- a/gym/envs/mujoco/mujoco_env.py +++ b/gym/envs/mujoco/mujoco_env.py @@ -102,7 +102,7 @@ def do_simulation(self, ctrl, n_frames): def _render(self, mode='human', close=False): if close: if self.viewer is not None: - self._get_viewer().finish() + self._get_viewer() self.viewer = None return @@ -111,13 +111,11 @@ def _render(self, mode='human', close=False): data, width, height = self._get_viewer().get_image() return np.fromstring(data, dtype='uint8').reshape(height, width, 3)[::-1, :, :] elif mode == 'human': - self._get_viewer().loop_once() + self._get_viewer().render() def _get_viewer(self): if self.viewer is None: - self.viewer = mujoco_py.MjViewer() - self.viewer.start() - self.viewer.set_model(self.model) + self.viewer = mujoco_py.MjViewer(self.sim) self.viewer_setup() return self.viewer From 5947270aaaca59556757f00bc9c46ed28a709d10 Mon Sep 17 00:00:00 2001 From: Ethan Brooks Date: Mon, 13 Nov 2017 10:34:26 -0500 Subject: [PATCH 03/10] changed mujoco-py version --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 666cb8a7543..36c2671f066 100644 --- a/setup.py +++ b/setup.py @@ -11,7 +11,7 @@ 'board_game' : ['pachi-py>=0.0.19'], 'box2d': ['Box2D-kengz'], 'classic_control': ['PyOpenGL'], - 'mujoco': ['mujoco_py<1.0.0,>=0.4.3', 'imageio'], + 'mujoco': ['mujoco_py>=1.5', 'imageio'], 'parameter_tuning': ['keras', 'theano'], } From 7c10d454e6669ea718ca9fe52e3c05e827e64b9a Mon Sep 17 00:00:00 2001 From: Matthias Plappert Date: Wed, 24 Jan 2018 10:26:05 -0800 Subject: [PATCH 04/10] Bump versions --- gym/envs/__init__.py | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/gym/envs/__init__.py b/gym/envs/__init__.py index db015440229..fe06cc4779b 100644 --- a/gym/envs/__init__.py +++ b/gym/envs/__init__.py @@ -204,89 +204,89 @@ # 2D register( - id='Reacher-v1', + id='Reacher-v2', entry_point='gym.envs.mujoco:ReacherEnv', max_episode_steps=50, reward_threshold=-3.75, ) register( - id='Pusher-v0', + id='Pusher-v1', entry_point='gym.envs.mujoco:PusherEnv', max_episode_steps=100, reward_threshold=0.0, ) register( - id='Thrower-v0', + id='Thrower-v1', entry_point='gym.envs.mujoco:ThrowerEnv', max_episode_steps=100, reward_threshold=0.0, ) register( - id='Striker-v0', + id='Striker-v1', entry_point='gym.envs.mujoco:StrikerEnv', max_episode_steps=100, reward_threshold=0.0, ) register( - id='InvertedPendulum-v1', + id='InvertedPendulum-v2', entry_point='gym.envs.mujoco:InvertedPendulumEnv', max_episode_steps=1000, reward_threshold=950.0, ) register( - id='InvertedDoublePendulum-v1', + id='InvertedDoublePendulum-v2', entry_point='gym.envs.mujoco:InvertedDoublePendulumEnv', max_episode_steps=1000, reward_threshold=9100.0, ) register( - id='HalfCheetah-v1', + id='HalfCheetah-v2', entry_point='gym.envs.mujoco:HalfCheetahEnv', max_episode_steps=1000, reward_threshold=4800.0, ) register( - id='Hopper-v1', + id='Hopper-v2', entry_point='gym.envs.mujoco:HopperEnv', max_episode_steps=1000, reward_threshold=3800.0, ) register( - id='Swimmer-v1', + id='Swimmer-v2', entry_point='gym.envs.mujoco:SwimmerEnv', max_episode_steps=1000, reward_threshold=360.0, ) register( - id='Walker2d-v1', + id='Walker2d-v2', max_episode_steps=1000, entry_point='gym.envs.mujoco:Walker2dEnv', ) register( - id='Ant-v1', + id='Ant-v2', entry_point='gym.envs.mujoco:AntEnv', max_episode_steps=1000, reward_threshold=6000.0, ) register( - id='Humanoid-v1', + id='Humanoid-v2', entry_point='gym.envs.mujoco:HumanoidEnv', max_episode_steps=1000, ) register( - id='HumanoidStandup-v1', + id='HumanoidStandup-v2', entry_point='gym.envs.mujoco:HumanoidStandupEnv', max_episode_steps=1000, ) From 18b1a6f2b7a455fc25a7e65ffc0faeebabd0eeb5 Mon Sep 17 00:00:00 2001 From: Matthias Plappert Date: Wed, 24 Jan 2018 13:05:03 -0800 Subject: [PATCH 05/10] Update version and README --- README.rst | 8 ++++++-- gym/version.py | 2 +- setup.py | 2 +- 3 files changed, 8 insertions(+), 4 deletions(-) diff --git a/README.rst b/README.rst index d9bfb4fda04..b86901fbc2e 100644 --- a/README.rst +++ b/README.rst @@ -4,7 +4,7 @@ OpenAI Gym **OpenAI Gym is a toolkit for developing and comparing reinforcement learning algorithms.** This is the ``gym`` open-source library, which gives you access to an ever-growing variety of environments. .. image:: https://travis-ci.org/openai/gym.svg?branch=master - :target: https://travis-ci.org/openai/gym + :target: https://travis-ci.org/openai/gym `See What's New section below <#what-s-new>`_ @@ -126,7 +126,7 @@ fake display. The easiest way to do this is by running under .. code:: shell - xvfb-run -s "-screen 0 1400x900x24" bash + xvfb-run -s "-screen 0 1400x900x24" bash Installing dependencies for specific environments ------------------------------------------------- @@ -262,6 +262,10 @@ We are using `pytest `_ for tests. You can run them via: What's new ========== +- 2018-01-24: All continuous control environments now use mujoco_py >= 1.50. + Versions have been updated accordingly to -v2, e.g. HalfCheetah-v2. Performance + should be similar (see https://github.com/openai/gym/pull/834) but there likely + some differences due to changes in MuJoCo. - 2017-06-16: Make env.spec into a property to fix a bug that occurs when you try to print out an unregistered Env. - 2017-05-13: BACKWARDS INCOMPATIBILITY: The Atari environments are now at diff --git a/gym/version.py b/gym/version.py index aa111788f04..a482f45003f 100644 --- a/gym/version.py +++ b/gym/version.py @@ -1 +1 @@ -VERSION = '0.9.3' +VERSION = '0.9.4' diff --git a/setup.py b/setup.py index 36c2671f066..79f990906b0 100644 --- a/setup.py +++ b/setup.py @@ -11,7 +11,7 @@ 'board_game' : ['pachi-py>=0.0.19'], 'box2d': ['Box2D-kengz'], 'classic_control': ['PyOpenGL'], - 'mujoco': ['mujoco_py>=1.5', 'imageio'], + 'mujoco': ['mujoco_py>=1.50', 'imageio'], 'parameter_tuning': ['keras', 'theano'], } From a522943663d0a36aa451668cb6dd1af50035b4ea Mon Sep 17 00:00:00 2001 From: Matthias Plappert Date: Wed, 24 Jan 2018 13:06:31 -0800 Subject: [PATCH 06/10] Same versioning for all mujoco envs --- gym/envs/__init__.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/gym/envs/__init__.py b/gym/envs/__init__.py index fe06cc4779b..32fb6b241da 100644 --- a/gym/envs/__init__.py +++ b/gym/envs/__init__.py @@ -211,21 +211,21 @@ ) register( - id='Pusher-v1', + id='Pusher-v2', entry_point='gym.envs.mujoco:PusherEnv', max_episode_steps=100, reward_threshold=0.0, ) register( - id='Thrower-v1', + id='Thrower-v2', entry_point='gym.envs.mujoco:ThrowerEnv', max_episode_steps=100, reward_threshold=0.0, ) register( - id='Striker-v1', + id='Striker-v2', entry_point='gym.envs.mujoco:StrikerEnv', max_episode_steps=100, reward_threshold=0.0, From 1fb945c1612414c721fcc583ca9828689144ba17 Mon Sep 17 00:00:00 2001 From: Matthias Plappert Date: Wed, 24 Jan 2018 13:09:45 -0800 Subject: [PATCH 07/10] Fix typo --- README.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.rst b/README.rst index b86901fbc2e..21b7cf6a5fa 100644 --- a/README.rst +++ b/README.rst @@ -264,7 +264,7 @@ What's new - 2018-01-24: All continuous control environments now use mujoco_py >= 1.50. Versions have been updated accordingly to -v2, e.g. HalfCheetah-v2. Performance - should be similar (see https://github.com/openai/gym/pull/834) but there likely + should be similar (see https://github.com/openai/gym/pull/834) but there are likely some differences due to changes in MuJoCo. - 2017-06-16: Make env.spec into a property to fix a bug that occurs when you try to print out an unregistered Env. From decc5779811801deb6ae9fad697dfe247d2bdd94 Mon Sep 17 00:00:00 2001 From: Matthias Plappert Date: Wed, 24 Jan 2018 15:38:47 -0800 Subject: [PATCH 08/10] Fix version --- README.rst | 88 +++++++++++++++++++++++++++--------------------------- 1 file changed, 44 insertions(+), 44 deletions(-) diff --git a/README.rst b/README.rst index 21b7cf6a5fa..55450ac938e 100644 --- a/README.rst +++ b/README.rst @@ -4,7 +4,7 @@ OpenAI Gym **OpenAI Gym is a toolkit for developing and comparing reinforcement learning algorithms.** This is the ``gym`` open-source library, which gives you access to an ever-growing variety of environments. .. image:: https://travis-ci.org/openai/gym.svg?branch=master - :target: https://travis-ci.org/openai/gym + :target: https://travis-ci.org/openai/gym `See What's New section below <#what-s-new>`_ @@ -15,12 +15,12 @@ If you're not sure where to start, we recommend beginning with the A whitepaper for OpenAI Gym is available at http://arxiv.org/abs/1606.01540, and here's a BibTeX entry that you can use to cite it in a publication:: - @misc{1606.01540, - Author = {Greg Brockman and Vicki Cheung and Ludwig Pettersson and Jonas Schneider and John Schulman and Jie Tang and Wojciech Zaremba}, - Title = {OpenAI Gym}, - Year = {2016}, - Eprint = {arXiv:1606.01540}, - } + @misc{1606.01540, + Author = {Greg Brockman and Vicki Cheung and Ludwig Pettersson and Jonas Schneider and John Schulman and Jie Tang and Wojciech Zaremba}, + Title = {OpenAI Gym}, + Year = {2016}, + Eprint = {arXiv:1606.01540}, + } .. contents:: **Contents of this document** :depth: 2 @@ -50,15 +50,15 @@ You can perform a minimal install of ``gym`` with: .. code:: shell - git clone https://github.com/openai/gym.git - cd gym - pip install -e . + git clone https://github.com/openai/gym.git + cd gym + pip install -e . If you prefer, you can do a minimal install of the packaged version directly from PyPI: .. code:: shell - pip install gym + pip install gym You'll be able to run a few environments right away: @@ -80,13 +80,13 @@ On OSX: .. code:: shell - brew install cmake boost boost-python sdl2 swig wget + brew install cmake boost boost-python sdl2 swig wget On Ubuntu 14.04: .. code:: shell - apt-get install -y python-numpy python-dev cmake zlib1g-dev libjpeg-dev xvfb libav-tools xorg-dev python-opengl libboost-all-dev libsdl2-dev swig + apt-get install -y python-numpy python-dev cmake zlib1g-dev libjpeg-dev xvfb libav-tools xorg-dev python-opengl libboost-all-dev libsdl2-dev swig MuJoCo has a proprietary dependency we can't set up for you. Follow the @@ -102,7 +102,7 @@ We currently support Linux and OS X running Python 2.7 or 3.5. Some users on OSX .. code:: shell - brew install boost-python --with-python3 + brew install boost-python --with-python3 If you want to access Gym from languages other than python, we have limited support for non-python frameworks, such as lua/Torch, using the OpenAI Gym `HTTP API `_. @@ -126,7 +126,7 @@ fake display. The easiest way to do this is by running under .. code:: shell - xvfb-run -s "-screen 0 1400x900x24" bash + xvfb-run -s "-screen 0 1400x900x24" bash Installing dependencies for specific environments ------------------------------------------------- @@ -154,10 +154,10 @@ sequence. .. code:: python - import gym - env = gym.make('Copy-v0') - env.reset() - env.render() + import gym + env = gym.make('Copy-v0') + env.reset() + env.render() Atari ----- @@ -166,10 +166,10 @@ The Atari environments are a variety of Atari video games. If you didn't do the .. code:: python - import gym - env = gym.make('SpaceInvaders-v0') - env.reset() - env.render() + import gym + env = gym.make('SpaceInvaders-v0') + env.reset() + env.render() This will install ``atari-py``, which automatically compiles the `Arcade Learning Environment `_. This can take quite a while (a few minutes on a decent laptop), so just be prepared. @@ -180,10 +180,10 @@ The board game environments are a variety of board games. If you didn't do the f .. code:: python - import gym - env = gym.make('Go9x9-v0') - env.reset() - env.render() + import gym + env = gym.make('Go9x9-v0') + env.reset() + env.render() Box2d ----------- @@ -192,10 +192,10 @@ Box2d is a 2D physics engine. You can install it via ``pip install -e '.[box2d] .. code:: python - import gym - env = gym.make('LunarLander-v2') - env.reset() - env.render() + import gym + env = gym.make('LunarLander-v2') + env.reset() + env.render() Classic control --------------- @@ -204,10 +204,10 @@ These are a variety of classic control tasks, which would appear in a typical re .. code:: python - import gym - env = gym.make('CartPole-v0') - env.reset() - env.render() + import gym + env = gym.make('CartPole-v0') + env.reset() + env.render() MuJoCo ------ @@ -220,10 +220,10 @@ to set it up. You'll have to also run ``pip install -e '.[mujoco]'`` if you didn .. code:: python - import gym - env = gym.make('Humanoid-v1') - env.reset() - env.render() + import gym + env = gym.make('Humanoid-v1') + env.reset() + env.render() Toy text -------- @@ -232,10 +232,10 @@ Toy environments which are text-based. There's no extra dependency to install, s .. code:: python - import gym - env = gym.make('FrozenLake-v0') - env.reset() - env.render() + import gym + env = gym.make('FrozenLake-v0') + env.reset() + env.render() Examples ======== @@ -254,7 +254,7 @@ We are using `pytest `_ for tests. You can run them via: .. code:: shell - pytest + pytest .. _See What's New section below: From 26103a5e69ad4e4fb27ac1bc71e8855609149c10 Mon Sep 17 00:00:00 2001 From: Matthias Plappert Date: Wed, 24 Jan 2018 15:39:59 -0800 Subject: [PATCH 09/10] Bump version again --- gym/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gym/version.py b/gym/version.py index a482f45003f..ce86b2da7d7 100644 --- a/gym/version.py +++ b/gym/version.py @@ -1 +1 @@ -VERSION = '0.9.4' +VERSION = '0.9.5' From 09a7eff12aebc739b744e43f44c77b8d7906b78a Mon Sep 17 00:00:00 2001 From: Matthias Plappert Date: Wed, 24 Jan 2018 15:41:07 -0800 Subject: [PATCH 10/10] Revert "Fix version" This reverts commit decc5779811801deb6ae9fad697dfe247d2bdd94. --- README.rst | 88 +++++++++++++++++++++++++++--------------------------- 1 file changed, 44 insertions(+), 44 deletions(-) diff --git a/README.rst b/README.rst index 55450ac938e..21b7cf6a5fa 100644 --- a/README.rst +++ b/README.rst @@ -4,7 +4,7 @@ OpenAI Gym **OpenAI Gym is a toolkit for developing and comparing reinforcement learning algorithms.** This is the ``gym`` open-source library, which gives you access to an ever-growing variety of environments. .. image:: https://travis-ci.org/openai/gym.svg?branch=master - :target: https://travis-ci.org/openai/gym + :target: https://travis-ci.org/openai/gym `See What's New section below <#what-s-new>`_ @@ -15,12 +15,12 @@ If you're not sure where to start, we recommend beginning with the A whitepaper for OpenAI Gym is available at http://arxiv.org/abs/1606.01540, and here's a BibTeX entry that you can use to cite it in a publication:: - @misc{1606.01540, - Author = {Greg Brockman and Vicki Cheung and Ludwig Pettersson and Jonas Schneider and John Schulman and Jie Tang and Wojciech Zaremba}, - Title = {OpenAI Gym}, - Year = {2016}, - Eprint = {arXiv:1606.01540}, - } + @misc{1606.01540, + Author = {Greg Brockman and Vicki Cheung and Ludwig Pettersson and Jonas Schneider and John Schulman and Jie Tang and Wojciech Zaremba}, + Title = {OpenAI Gym}, + Year = {2016}, + Eprint = {arXiv:1606.01540}, + } .. contents:: **Contents of this document** :depth: 2 @@ -50,15 +50,15 @@ You can perform a minimal install of ``gym`` with: .. code:: shell - git clone https://github.com/openai/gym.git - cd gym - pip install -e . + git clone https://github.com/openai/gym.git + cd gym + pip install -e . If you prefer, you can do a minimal install of the packaged version directly from PyPI: .. code:: shell - pip install gym + pip install gym You'll be able to run a few environments right away: @@ -80,13 +80,13 @@ On OSX: .. code:: shell - brew install cmake boost boost-python sdl2 swig wget + brew install cmake boost boost-python sdl2 swig wget On Ubuntu 14.04: .. code:: shell - apt-get install -y python-numpy python-dev cmake zlib1g-dev libjpeg-dev xvfb libav-tools xorg-dev python-opengl libboost-all-dev libsdl2-dev swig + apt-get install -y python-numpy python-dev cmake zlib1g-dev libjpeg-dev xvfb libav-tools xorg-dev python-opengl libboost-all-dev libsdl2-dev swig MuJoCo has a proprietary dependency we can't set up for you. Follow the @@ -102,7 +102,7 @@ We currently support Linux and OS X running Python 2.7 or 3.5. Some users on OSX .. code:: shell - brew install boost-python --with-python3 + brew install boost-python --with-python3 If you want to access Gym from languages other than python, we have limited support for non-python frameworks, such as lua/Torch, using the OpenAI Gym `HTTP API `_. @@ -126,7 +126,7 @@ fake display. The easiest way to do this is by running under .. code:: shell - xvfb-run -s "-screen 0 1400x900x24" bash + xvfb-run -s "-screen 0 1400x900x24" bash Installing dependencies for specific environments ------------------------------------------------- @@ -154,10 +154,10 @@ sequence. .. code:: python - import gym - env = gym.make('Copy-v0') - env.reset() - env.render() + import gym + env = gym.make('Copy-v0') + env.reset() + env.render() Atari ----- @@ -166,10 +166,10 @@ The Atari environments are a variety of Atari video games. If you didn't do the .. code:: python - import gym - env = gym.make('SpaceInvaders-v0') - env.reset() - env.render() + import gym + env = gym.make('SpaceInvaders-v0') + env.reset() + env.render() This will install ``atari-py``, which automatically compiles the `Arcade Learning Environment `_. This can take quite a while (a few minutes on a decent laptop), so just be prepared. @@ -180,10 +180,10 @@ The board game environments are a variety of board games. If you didn't do the f .. code:: python - import gym - env = gym.make('Go9x9-v0') - env.reset() - env.render() + import gym + env = gym.make('Go9x9-v0') + env.reset() + env.render() Box2d ----------- @@ -192,10 +192,10 @@ Box2d is a 2D physics engine. You can install it via ``pip install -e '.[box2d] .. code:: python - import gym - env = gym.make('LunarLander-v2') - env.reset() - env.render() + import gym + env = gym.make('LunarLander-v2') + env.reset() + env.render() Classic control --------------- @@ -204,10 +204,10 @@ These are a variety of classic control tasks, which would appear in a typical re .. code:: python - import gym - env = gym.make('CartPole-v0') - env.reset() - env.render() + import gym + env = gym.make('CartPole-v0') + env.reset() + env.render() MuJoCo ------ @@ -220,10 +220,10 @@ to set it up. You'll have to also run ``pip install -e '.[mujoco]'`` if you didn .. code:: python - import gym - env = gym.make('Humanoid-v1') - env.reset() - env.render() + import gym + env = gym.make('Humanoid-v1') + env.reset() + env.render() Toy text -------- @@ -232,10 +232,10 @@ Toy environments which are text-based. There's no extra dependency to install, s .. code:: python - import gym - env = gym.make('FrozenLake-v0') - env.reset() - env.render() + import gym + env = gym.make('FrozenLake-v0') + env.reset() + env.render() Examples ======== @@ -254,7 +254,7 @@ We are using `pytest `_ for tests. You can run them via: .. code:: shell - pytest + pytest .. _See What's New section below: