Skip to content

Commit

Permalink
Merge pull request #167 from toslunar/gym0.23
Browse files Browse the repository at this point in the history
Make `Monitor` optional
  • Loading branch information
muupan authored Mar 13, 2022
2 parents ca5eadc + 7267243 commit cee0777
Show file tree
Hide file tree
Showing 41 changed files with 154 additions and 140 deletions.
6 changes: 3 additions & 3 deletions examples/atari/reproduction/a3c/train_a3c.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ def main():
parser.add_argument("--t-max", type=int, default=5)
parser.add_argument("--beta", type=float, default=1e-2)
parser.add_argument("--profile", action="store_true")
parser.add_argument("--steps", type=int, default=8 * 10 ** 7)
parser.add_argument("--steps", type=int, default=8 * 10**7)
parser.add_argument(
"--max-frames",
type=int,
Expand Down Expand Up @@ -84,15 +84,15 @@ def main():
# If seed=0 and processes=4, subprocess seeds are [0, 1, 2, 3].
# If seed=1 and processes=4, subprocess seeds are [4, 5, 6, 7].
process_seeds = np.arange(args.processes) + args.seed * args.processes
assert process_seeds.max() < 2 ** 31
assert process_seeds.max() < 2**31

args.outdir = experiments.prepare_output_dir(args, args.outdir)
print("Output files are saved in {}".format(args.outdir))

def make_env(process_idx, test):
# Use different random seeds for train and test envs
process_seed = process_seeds[process_idx]
env_seed = 2 ** 31 - 1 - process_seed if test else process_seed
env_seed = 2**31 - 1 - process_seed if test else process_seed
env = atari_wrappers.wrap_deepmind(
atari_wrappers.make_atari(args.env, max_frames=args.max_frames),
episode_life=not test,
Expand Down
12 changes: 6 additions & 6 deletions examples/atari/reproduction/dqn/train_dqn.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,13 +64,13 @@ def main():
parser.add_argument(
"--steps",
type=int,
default=5 * 10 ** 7,
default=5 * 10**7,
help="Total number of timesteps to train the agent.",
)
parser.add_argument(
"--replay-start-size",
type=int,
default=5 * 10 ** 4,
default=5 * 10**4,
help="Minimum replay buffer size before " + "performing gradient updates.",
)
parser.add_argument("--eval-n-steps", type=int, default=125000)
Expand All @@ -87,7 +87,7 @@ def main():

# Set different random seeds for train and test envs.
train_seed = args.seed
test_seed = 2 ** 31 - 1 - args.seed
test_seed = 2**31 - 1 - args.seed

args.outdir = experiments.prepare_output_dir(args, args.outdir)
print("Output files are saved in {}".format(args.outdir))
Expand Down Expand Up @@ -133,12 +133,12 @@ def make_env(test):
centered=True,
)

rbuf = replay_buffers.ReplayBuffer(10 ** 6)
rbuf = replay_buffers.ReplayBuffer(10**6)

explorer = explorers.LinearDecayEpsilonGreedy(
start_epsilon=1.0,
end_epsilon=0.1,
decay_steps=10 ** 6,
decay_steps=10**6,
random_action_func=lambda: np.random.randint(n_actions),
)

Expand All @@ -155,7 +155,7 @@ def phi(x):
gamma=0.99,
explorer=explorer,
replay_start_size=args.replay_start_size,
target_update_interval=10 ** 4,
target_update_interval=10**4,
clip_delta=True,
update_interval=4,
batch_accumulator="sum",
Expand Down
12 changes: 6 additions & 6 deletions examples/atari/reproduction/iqn/train_iqn.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,18 +31,18 @@ def main():
"--pretrained-type", type=str, default="best", choices=["best", "final"]
)
parser.add_argument("--load", type=str, default=None)
parser.add_argument("--final-exploration-frames", type=int, default=10 ** 6)
parser.add_argument("--final-exploration-frames", type=int, default=10**6)
parser.add_argument("--final-epsilon", type=float, default=0.01)
parser.add_argument("--eval-epsilon", type=float, default=0.001)
parser.add_argument("--steps", type=int, default=5 * 10 ** 7)
parser.add_argument("--steps", type=int, default=5 * 10**7)
parser.add_argument(
"--max-frames",
type=int,
default=30 * 60 * 60, # 30 minutes with 60 fps
help="Maximum number of frames for each episode.",
)
parser.add_argument("--replay-start-size", type=int, default=5 * 10 ** 4)
parser.add_argument("--target-update-interval", type=int, default=10 ** 4)
parser.add_argument("--replay-start-size", type=int, default=5 * 10**4)
parser.add_argument("--target-update-interval", type=int, default=10**4)
parser.add_argument("--eval-interval", type=int, default=250000)
parser.add_argument("--eval-n-steps", type=int, default=125000)
parser.add_argument("--update-interval", type=int, default=4)
Expand Down Expand Up @@ -85,7 +85,7 @@ def main():

# Set different random seeds for train and test envs.
train_seed = args.seed
test_seed = 2 ** 31 - 1 - args.seed
test_seed = 2**31 - 1 - args.seed

args.outdir = experiments.prepare_output_dir(args, args.outdir)
print("Output files are saved in {}".format(args.outdir))
Expand Down Expand Up @@ -138,7 +138,7 @@ def make_env(test):
# Use the same hyper parameters as https://arxiv.org/abs/1710.10044
opt = torch.optim.Adam(q_func.parameters(), lr=5e-5, eps=1e-2 / args.batch_size)

rbuf = replay_buffers.ReplayBuffer(10 ** 6)
rbuf = replay_buffers.ReplayBuffer(10**6)

explorer = explorers.LinearDecayEpsilonGreedy(
1.0,
Expand Down
10 changes: 5 additions & 5 deletions examples/atari/reproduction/rainbow/train_rainbow.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,14 +35,14 @@ def main():
parser.add_argument("--load", type=str, default=None)
parser.add_argument("--eval-epsilon", type=float, default=0.0)
parser.add_argument("--noisy-net-sigma", type=float, default=0.5)
parser.add_argument("--steps", type=int, default=5 * 10 ** 7)
parser.add_argument("--steps", type=int, default=5 * 10**7)
parser.add_argument(
"--max-frames",
type=int,
default=30 * 60 * 60, # 30 minutes with 60 fps
help="Maximum number of frames for each episode.",
)
parser.add_argument("--replay-start-size", type=int, default=2 * 10 ** 4)
parser.add_argument("--replay-start-size", type=int, default=2 * 10**4)
parser.add_argument("--eval-n-steps", type=int, default=125000)
parser.add_argument("--eval-interval", type=int, default=250000)
parser.add_argument(
Expand Down Expand Up @@ -77,7 +77,7 @@ def main():

# Set different random seeds for train and test envs.
train_seed = args.seed
test_seed = 2 ** 31 - 1 - args.seed
test_seed = 2**31 - 1 - args.seed

args.outdir = experiments.prepare_output_dir(args, args.outdir)
print("Output files are saved in {}".format(args.outdir))
Expand Down Expand Up @@ -123,14 +123,14 @@ def make_env(test):
explorer = explorers.Greedy()

# Use the same hyper parameters as https://arxiv.org/abs/1710.02298
opt = torch.optim.Adam(q_func.parameters(), 6.25e-5, eps=1.5 * 10 ** -4)
opt = torch.optim.Adam(q_func.parameters(), 6.25e-5, eps=1.5 * 10**-4)

# Prioritized Replay
# Anneal beta from beta0 to 1 throughout training
update_interval = 4
betasteps = args.steps / update_interval
rbuf = replay_buffers.PrioritizedReplayBuffer(
10 ** 6,
10**6,
alpha=0.5,
beta0=0.4,
betasteps=betasteps,
Expand Down
8 changes: 4 additions & 4 deletions examples/atari/train_a2c_ale.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ def main():
default=30 * 60 * 60, # 30 minutes with 60 fps
help="Maximum number of frames for each episode.",
)
parser.add_argument("--steps", type=int, default=8 * 10 ** 7)
parser.add_argument("--steps", type=int, default=8 * 10**7)
parser.add_argument("--update-steps", type=int, default=5)
parser.add_argument("--lr", type=float, default=7e-4)
parser.add_argument("--gamma", type=float, default=0.99, help="discount factor")
Expand All @@ -43,7 +43,7 @@ def main():
parser.add_argument(
"--alpha", type=float, default=0.99, help="RMSprop optimizer alpha"
)
parser.add_argument("--eval-interval", type=int, default=10 ** 6)
parser.add_argument("--eval-interval", type=int, default=10**6)
parser.add_argument("--eval-n-runs", type=int, default=10)
parser.add_argument("--demo", action="store_true", default=False)
parser.add_argument("--load", type=str, default="")
Expand Down Expand Up @@ -92,15 +92,15 @@ def main():
# If seed=0 and processes=4, subprocess seeds are [0, 1, 2, 3].
# If seed=1 and processes=4, subprocess seeds are [4, 5, 6, 7].
process_seeds = np.arange(args.num_envs) + args.seed * args.num_envs
assert process_seeds.max() < 2 ** 31
assert process_seeds.max() < 2**31

args.outdir = experiments.prepare_output_dir(args, args.outdir)
print("Output files are saved in {}".format(args.outdir))

def make_env(process_idx, test):
# Use different random seeds for train and test envs
process_seed = process_seeds[process_idx]
env_seed = 2 ** 31 - 1 - process_seed if test else process_seed
env_seed = 2**31 - 1 - process_seed if test else process_seed
env = atari_wrappers.wrap_deepmind(
atari_wrappers.make_atari(args.env, max_frames=args.max_frames),
episode_life=not test,
Expand Down
10 changes: 5 additions & 5 deletions examples/atari/train_acer_ale.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,15 +38,15 @@ def main():
parser.add_argument("--n-times-replay", type=int, default=4)
parser.add_argument("--beta", type=float, default=1e-2)
parser.add_argument("--profile", action="store_true")
parser.add_argument("--steps", type=int, default=10 ** 7)
parser.add_argument("--steps", type=int, default=10**7)
parser.add_argument(
"--max-frames",
type=int,
default=30 * 60 * 60, # 30 minutes with 60 fps
help="Maximum number of frames for each episode.",
)
parser.add_argument("--lr", type=float, default=7e-4)
parser.add_argument("--eval-interval", type=int, default=10 ** 5)
parser.add_argument("--eval-interval", type=int, default=10**5)
parser.add_argument("--eval-n-runs", type=int, default=10)
parser.add_argument("--use-lstm", action="store_true")
parser.add_argument("--demo", action="store_true", default=False)
Expand Down Expand Up @@ -87,7 +87,7 @@ def main():
# If seed=0 and processes=4, subprocess seeds are [0, 1, 2, 3].
# If seed=1 and processes=4, subprocess seeds are [4, 5, 6, 7].
process_seeds = np.arange(args.processes) + args.seed * args.processes
assert process_seeds.max() < 2 ** 31
assert process_seeds.max() < 2**31

args.outdir = experiments.prepare_output_dir(args, args.outdir)
print("Output files are saved in {}".format(args.outdir))
Expand Down Expand Up @@ -130,7 +130,7 @@ def main():
model.parameters(), lr=args.lr, eps=4e-3, alpha=0.99
)

replay_buffer = EpisodicReplayBuffer(10 ** 6 // args.processes)
replay_buffer = EpisodicReplayBuffer(10**6 // args.processes)

def phi(x):
# Feature extractor
Expand All @@ -156,7 +156,7 @@ def phi(x):
def make_env(process_idx, test):
# Use different random seeds for train and test envs
process_seed = process_seeds[process_idx]
env_seed = 2 ** 31 - 1 - process_seed if test else process_seed
env_seed = 2**31 - 1 - process_seed if test else process_seed
env = atari_wrappers.wrap_deepmind(
atari_wrappers.make_atari(args.env, max_frames=args.max_frames),
episode_life=not test,
Expand Down
14 changes: 7 additions & 7 deletions examples/atari/train_categorical_dqn_ale.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,19 +24,19 @@ def main():
parser.add_argument("--gpu", type=int, default=0)
parser.add_argument("--demo", action="store_true", default=False)
parser.add_argument("--load", type=str, default=None)
parser.add_argument("--final-exploration-frames", type=int, default=10 ** 6)
parser.add_argument("--final-exploration-frames", type=int, default=10**6)
parser.add_argument("--final-epsilon", type=float, default=0.1)
parser.add_argument("--eval-epsilon", type=float, default=0.05)
parser.add_argument("--steps", type=int, default=10 ** 7)
parser.add_argument("--steps", type=int, default=10**7)
parser.add_argument(
"--max-frames",
type=int,
default=30 * 60 * 60, # 30 minutes with 60 fps
help="Maximum number of frames for each episode.",
)
parser.add_argument("--replay-start-size", type=int, default=5 * 10 ** 4)
parser.add_argument("--target-update-interval", type=int, default=10 ** 4)
parser.add_argument("--eval-interval", type=int, default=10 ** 5)
parser.add_argument("--replay-start-size", type=int, default=5 * 10**4)
parser.add_argument("--target-update-interval", type=int, default=10**4)
parser.add_argument("--eval-interval", type=int, default=10**5)
parser.add_argument("--update-interval", type=int, default=4)
parser.add_argument("--eval-n-runs", type=int, default=10)
parser.add_argument("--batch-size", type=int, default=32)
Expand Down Expand Up @@ -71,7 +71,7 @@ def main():

# Set different random seeds for train and test envs.
train_seed = args.seed
test_seed = 2 ** 31 - 1 - args.seed
test_seed = 2**31 - 1 - args.seed

args.outdir = experiments.prepare_output_dir(args, args.outdir)
print("Output files are saved in {}".format(args.outdir))
Expand Down Expand Up @@ -120,7 +120,7 @@ def make_env(test):
# Use the same hyper parameters as https://arxiv.org/abs/1707.06887
opt = torch.optim.Adam(q_func.parameters(), 2.5e-4, eps=1e-2 / args.batch_size)

rbuf = replay_buffers.ReplayBuffer(10 ** 6)
rbuf = replay_buffers.ReplayBuffer(10**6)

explorer = explorers.LinearDecayEpsilonGreedy(
1.0,
Expand Down
16 changes: 8 additions & 8 deletions examples/atari/train_dqn_ale.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ def main():
parser.add_argument(
"--final-exploration-frames",
type=int,
default=10 ** 6,
default=10**6,
help="Timesteps after which we stop " + "annealing exploration rate",
)
parser.add_argument(
Expand All @@ -112,7 +112,7 @@ def main():
parser.add_argument(
"--steps",
type=int,
default=5 * 10 ** 7,
default=5 * 10**7,
help="Total number of timesteps to train the agent.",
)
parser.add_argument(
Expand All @@ -124,19 +124,19 @@ def main():
parser.add_argument(
"--replay-start-size",
type=int,
default=5 * 10 ** 4,
default=5 * 10**4,
help="Minimum replay buffer size before " + "performing gradient updates.",
)
parser.add_argument(
"--target-update-interval",
type=int,
default=3 * 10 ** 4,
default=3 * 10**4,
help="Frequency (in timesteps) at which " + "the target network is updated.",
)
parser.add_argument(
"--eval-interval",
type=int,
default=10 ** 5,
default=10**5,
help="Frequency (in timesteps) of evaluation phase.",
)
parser.add_argument(
Expand Down Expand Up @@ -196,7 +196,7 @@ def main():

# Set different random seeds for train and test envs.
train_seed = args.seed
test_seed = 2 ** 31 - 1 - args.seed
test_seed = 2**31 - 1 - args.seed

args.outdir = experiments.prepare_output_dir(args, args.outdir)
print("Output files are saved in {}".format(args.outdir))
Expand Down Expand Up @@ -254,14 +254,14 @@ def make_env(test):
# Anneal beta from beta0 to 1 throughout training
betasteps = args.steps / args.update_interval
rbuf = replay_buffers.PrioritizedReplayBuffer(
10 ** 6,
10**6,
alpha=0.6,
beta0=0.4,
betasteps=betasteps,
num_steps=args.num_step_return,
)
else:
rbuf = replay_buffers.ReplayBuffer(10 ** 6, args.num_step_return)
rbuf = replay_buffers.ReplayBuffer(10**6, args.num_step_return)

def phi(x):
# Feature extractor
Expand Down
Loading

0 comments on commit cee0777

Please sign in to comment.