|
| 1 | +<h3 align="center"> |
| 2 | + <img src="assets/atari_icon_web.png" width="300"> |
| 3 | +</h3> |
| 4 | + |
| 5 | +# Atari |
| 6 | + |
| 7 | +Research Playground built on top of [OpenAI's Atari Gym](https://gym.openai.com/envs/#atari), prepared for implementing various Reinforcement Learning algorithms. |
| 8 | + |
| 9 | +It can emulate any of the following games: |
| 10 | + |
| 11 | +> ['Asterix', 'Asteroids', |
| 12 | +> 'MsPacman', 'Kaboom', 'BankHeist', 'Kangaroo', |
| 13 | +> 'Skiing', 'FishingDerby', 'Krull', 'Berzerk', |
| 14 | +> 'Tutankham', 'Zaxxon', 'Venture', 'Riverraid', |
| 15 | +> 'Centipede', 'Adventure', 'BeamRider', 'CrazyClimber', |
| 16 | +> 'TimePilot', 'Carnival', 'Tennis', 'Seaquest', |
| 17 | +> 'Bowling', 'SpaceInvaders', 'Freeway', 'YarsRevenge', |
| 18 | +> 'RoadRunner', 'JourneyEscape', 'WizardOfWor', |
| 19 | +> 'Gopher', 'Breakout', 'StarGunner', 'Atlantis', |
| 20 | +> 'DoubleDunk', 'Hero', 'BattleZone', 'Solaris', |
| 21 | +> 'UpNDown', 'Frostbite', 'KungFuMaster', 'Pooyan', |
| 22 | +> 'Pitfall', 'MontezumaRevenge', 'PrivateEye', |
| 23 | +> 'AirRaid', 'Amidar', 'Robotank', 'DemonAttack', |
| 24 | +> 'Defender', 'NameThisGame', 'Phoenix', 'Gravitar', |
| 25 | +> 'ElevatorAction', 'Pong', 'VideoPinball', 'IceHockey', |
| 26 | +> 'Boxing', 'Assault', 'Alien', 'Qbert', 'Enduro', |
| 27 | +> 'ChopperCommand', 'Jamesbond'] |
| 28 | +
|
| 29 | +## Purpose |
| 30 | +Ultimate goal of this project is to implement and compare various RL approaches with atari games as a common denominator. |
| 31 | + |
| 32 | +## Usage |
| 33 | + |
| 34 | +1. Clone the repo. |
| 35 | +2. Go to the project's root folder. |
| 36 | +3. Install required packages`pip install -r requirements.txt`. |
| 37 | +4. Launch atari. I recommend starting with help command to see all available modes `python atari.py --help`. |
| 38 | + |
| 39 | + |
| 40 | +## Modes |
| 41 | + |
| 42 | +All below modes were benchmarked using following |
| 43 | + |
| 44 | + |
| 45 | +10M - 85h on Tesla K80 |
| 46 | +5M ~40h on Tesla K80 |
| 47 | + |
| 48 | +Breakout (human: 28.3) |
| 49 | +- [ ] DDQN |
| 50 | +- [ ] GE |
| 51 | + |
| 52 | +SpaceInvaders (human: 372.5) |
| 53 | +- [ ] DDQN |
| 54 | +- [ ] GE |
| 55 | + |
| 56 | + |
| 57 | + |
| 58 | + |
| 59 | +GAMMA = 0.99 |
| 60 | +MEMORY_SIZE = 900000 |
| 61 | +BATCH_SIZE = 32 |
| 62 | +TRAINING_FREQUENCY = 4 |
| 63 | +TARGET_NETWORK_UPDATE_FREQUENCY = TRAINING_FREQUENCY*10000 |
| 64 | +MODEL_PERSISTENCE_UPDATE_FREQUENCY = 10000 |
| 65 | +REPLAY_START_SIZE = 50000 |
| 66 | + |
| 67 | +EXPLORATION_MAX = 1.0 |
| 68 | +EXPLORATION_MIN = 0.1 |
| 69 | +EXPLORATION_TEST = 0.01 |
| 70 | +EXPLORATION_STEPS = 850000 |
| 71 | +EXPLORATION_DECAY = (EXPLORATION_MAX-EXPLORATION_MIN)/EXPLORATION_STEPS |
| 72 | + |
| 73 | + |
| 74 | + def __init__(self, input_shape, action_space): |
| 75 | + self.model = Sequential() |
| 76 | + self.model.add(Conv2D(32, |
| 77 | + 8, |
| 78 | + strides=(4, 4), |
| 79 | + padding="valid", |
| 80 | + activation="relu", |
| 81 | + input_shape=input_shape, |
| 82 | + data_format="channels_first")) |
| 83 | + self.model.add(Conv2D(64, |
| 84 | + 4, |
| 85 | + strides=(2, 2), |
| 86 | + padding="valid", |
| 87 | + activation="relu", |
| 88 | + input_shape=input_shape, |
| 89 | + data_format="channels_first")) |
| 90 | + self.model.add(Conv2D(64, |
| 91 | + 3, |
| 92 | + strides=(1, 1), |
| 93 | + padding="valid", |
| 94 | + activation="relu", |
| 95 | + input_shape=input_shape, |
| 96 | + data_format="channels_first")) |
| 97 | + self.model.add(Flatten()) |
| 98 | + self.model.add(Dense(512, activation="relu")) |
| 99 | + self.model.add(Dense(action_space)) |
| 100 | + self.model.compile(loss="mean_squared_error", |
| 101 | + optimizer=RMSprop(lr=0.00025, |
| 102 | + rho=0.95, |
| 103 | + epsilon=0.01), |
| 104 | + metrics=["accuracy"]) |
| 105 | + self.model.summary() |
0 commit comments