From d199778b9eb06c1370af65a1326238de2ab36746 Mon Sep 17 00:00:00 2001 From: Ariel Kwiatkowski Date: Sat, 25 Sep 2021 20:00:28 +0200 Subject: [PATCH] Pendulum updates (#2423) * Pendulum env updates Simplify the math a bit (no difference in behavior) * Reorder the clipping of angular velocity * Bump version of Pendulum * black * Update mentions of Pendulum-v0 to Pendulum-v1. --- gym/envs/__init__.py | 2 +- gym/envs/classic_control/pendulum.py | 7 ++----- gym/wrappers/frame_stack.py | 2 +- gym/wrappers/test_frame_stack.py | 2 +- gym/wrappers/test_record_episode_statistics.py | 2 +- gym/wrappers/test_rescale_action.py | 4 ++-- gym/wrappers/test_time_aware_observation.py | 2 +- gym/wrappers/test_transform_observation.py | 2 +- gym/wrappers/test_transform_reward.py | 2 +- 9 files changed, 11 insertions(+), 14 deletions(-) diff --git a/gym/envs/__init__.py b/gym/envs/__init__.py index a6b48bdc147..a90b0370730 100644 --- a/gym/envs/__init__.py +++ b/gym/envs/__init__.py @@ -42,7 +42,7 @@ ) register( - id="Pendulum-v0", + id="Pendulum-v1", entry_point="gym.envs.classic_control:PendulumEnv", max_episode_steps=200, ) diff --git a/gym/envs/classic_control/pendulum.py b/gym/envs/classic_control/pendulum.py index d728284b82e..10ced4d43d2 100644 --- a/gym/envs/classic_control/pendulum.py +++ b/gym/envs/classic_control/pendulum.py @@ -41,12 +41,9 @@ def step(self, u): self.last_u = u # for rendering costs = angle_normalize(th) ** 2 + 0.1 * thdot ** 2 + 0.001 * (u ** 2) - newthdot = ( - thdot - + (-3 * g / (2 * l) * np.sin(th + np.pi) + 3.0 / (m * l ** 2) * u) * dt - ) - newth = th + newthdot * dt + newthdot = thdot + (3 * g / (2 * l) * np.sin(th) + 3.0 / (m * l ** 2) * u) * dt newthdot = np.clip(newthdot, -self.max_speed, self.max_speed) + newth = th + newthdot * dt self.state = np.array([newth, newthdot]) return self._get_obs(), -costs, False, {} diff --git a/gym/wrappers/frame_stack.py b/gym/wrappers/frame_stack.py index 5980c9dcd65..e087482c6ae 100644 --- a/gym/wrappers/frame_stack.py +++ b/gym/wrappers/frame_stack.py @@ -64,7 +64,7 @@ class FrameStack(ObservationWrapper): r"""Observation wrapper that stacks the observations in a rolling manner. For example, if the number of stacks is 4, then the returned observation contains - the most recent 4 observations. For environment 'Pendulum-v0', the original observation + the most recent 4 observations. For environment 'Pendulum-v1', the original observation is an array with shape [3], so if we stack 4 observations, the processed observation has shape [4, 3]. diff --git a/gym/wrappers/test_frame_stack.py b/gym/wrappers/test_frame_stack.py index 59eac35d959..d899bb81a52 100644 --- a/gym/wrappers/test_frame_stack.py +++ b/gym/wrappers/test_frame_stack.py @@ -12,7 +12,7 @@ lz4 = None -@pytest.mark.parametrize("env_id", ["CartPole-v1", "Pendulum-v0", "Pong-v0"]) +@pytest.mark.parametrize("env_id", ["CartPole-v1", "Pendulum-v1", "Pong-v0"]) @pytest.mark.parametrize("num_stack", [2, 3, 4]) @pytest.mark.parametrize( "lz4_compress", diff --git a/gym/wrappers/test_record_episode_statistics.py b/gym/wrappers/test_record_episode_statistics.py index ce68eab4991..9e088064761 100644 --- a/gym/wrappers/test_record_episode_statistics.py +++ b/gym/wrappers/test_record_episode_statistics.py @@ -4,7 +4,7 @@ from gym.wrappers import RecordEpisodeStatistics -@pytest.mark.parametrize("env_id", ["CartPole-v0", "Pendulum-v0"]) +@pytest.mark.parametrize("env_id", ["CartPole-v0", "Pendulum-v1"]) @pytest.mark.parametrize("deque_size", [2, 5]) def test_record_episode_statistics(env_id, deque_size): env = gym.make(env_id) diff --git a/gym/wrappers/test_rescale_action.py b/gym/wrappers/test_rescale_action.py index 4e408aa540f..c33e5adc85e 100644 --- a/gym/wrappers/test_rescale_action.py +++ b/gym/wrappers/test_rescale_action.py @@ -12,8 +12,8 @@ def test_rescale_action(): env = RescaleAction(env, -1, 1) del env - env = gym.make("Pendulum-v0") - wrapped_env = RescaleAction(gym.make("Pendulum-v0"), -1, 1) + env = gym.make("Pendulum-v1") + wrapped_env = RescaleAction(gym.make("Pendulum-v1"), -1, 1) seed = 0 env.seed(seed) diff --git a/gym/wrappers/test_time_aware_observation.py b/gym/wrappers/test_time_aware_observation.py index 018f4971398..a996d608cdc 100644 --- a/gym/wrappers/test_time_aware_observation.py +++ b/gym/wrappers/test_time_aware_observation.py @@ -4,7 +4,7 @@ from gym.wrappers import TimeAwareObservation -@pytest.mark.parametrize("env_id", ["CartPole-v1", "Pendulum-v0"]) +@pytest.mark.parametrize("env_id", ["CartPole-v1", "Pendulum-v1"]) def test_time_aware_observation(env_id): env = gym.make(env_id) wrapped_env = TimeAwareObservation(env) diff --git a/gym/wrappers/test_transform_observation.py b/gym/wrappers/test_transform_observation.py index 8c43cfb68ad..6818cce77ea 100644 --- a/gym/wrappers/test_transform_observation.py +++ b/gym/wrappers/test_transform_observation.py @@ -6,7 +6,7 @@ from gym.wrappers import TransformObservation -@pytest.mark.parametrize("env_id", ["CartPole-v1", "Pendulum-v0"]) +@pytest.mark.parametrize("env_id", ["CartPole-v1", "Pendulum-v1"]) def test_transform_observation(env_id): affine_transform = lambda x: 3 * x + 2 env = gym.make(env_id) diff --git a/gym/wrappers/test_transform_reward.py b/gym/wrappers/test_transform_reward.py index 0f96f2ffed1..6bc380cf2ce 100644 --- a/gym/wrappers/test_transform_reward.py +++ b/gym/wrappers/test_transform_reward.py @@ -6,7 +6,7 @@ from gym.wrappers import TransformReward -@pytest.mark.parametrize("env_id", ["CartPole-v1", "Pendulum-v0"]) +@pytest.mark.parametrize("env_id", ["CartPole-v1", "Pendulum-v1"]) def test_transform_reward(env_id): # use case #1: scale scales = [0.1, 200]