hopefully working memory management of experience replay

gsurma · gsurma · commit 70f76fbf31fe · 2018-09-20T10:56:15.000-07:00
diff --git a/.idea/workspace.xml b/.idea/workspace.xml
diff --git a/atari.py b/atari.py
@@ -58,7 +58,21 @@ def _main_loop(self, game_model, env, render, total_step_limit):
 
     def _preprocess_observation(self, obs):
         image = Image.fromarray(obs, "RGB").convert("L").resize((FRAME_SIZE, FRAME_SIZE))
-        return np.asarray(image.getdata(), dtype=np.uint8).reshape(image.size[1], image.size[0]) #TODO: possibly memory heavy
+        return np.asarray(image.getdata(), dtype=np.uint8).reshape(image.size[1], image.size[0]) #TODO: possibly memory heavy, we should pass regular lists here
+
+    # class WarpFrame(gym.ObservationWrapper):
+    #     def __init__(self, env):
+    #         """Warp frames to 84x84 as done in the Nature paper and later work."""
+    #         gym.ObservationWrapper.__init__(self, env)
+    #         self.width = 84
+    #         self.height = 84
+    #         self.observation_space = spaces.Box(low=0, high=255,
+    #                                             shape=(self.height, self.width, 1), dtype=np.uint8)
+    #
+    #     def observation(self, frame):
+    #         frame = cv2.cvtColor(frame, cv2.COLOR_RGB2GRAY)
+    #         frame = cv2.resize(frame, (self.width, self.height), interpolation=cv2.INTER_AREA)
+    #         return frame[:, :, None]
 
     def _args(self):
         parser = argparse.ArgumentParser()
diff --git a/convolutional_neural_network.py b/convolutional_neural_network.py
@@ -32,6 +32,8 @@ def __init__(self, input_shape, action_space):
         self.model.add(Dense(512, activation="relu"))
         self.model.add(Dense(action_space))
         self.model.compile(loss="mean_squared_error",
-                           optimizer=RMSprop(lr=0.00025, rho=0.95, epsilon=0.01),
+                           optimizer=RMSprop(lr=0.00025,
+                                             rho=0.95,
+                                             epsilon=0.01),
                            metrics=["accuracy"])
         self.model.summary()
diff --git a/game_models/ddqn_game_model.py b/game_models/ddqn_game_model.py
@@ -25,7 +25,8 @@
 class DDQNGameModel(BaseGameModel):
 
     def __init__(self, game_name, mode_name, input_shape, action_space, logger_path, model_path):
-        BaseGameModel.__init__(self, game_name,
+        BaseGameModel.__init__(self,
+                               game_name,
                                mode_name,
                                logger_path,
                                input_shape,
@@ -47,7 +48,7 @@ class DDQNSolver(DDQNGameModel):
 
     def __init__(self, game_name, input_shape, action_space):
         testing_model_path = "./output/neural_nets/" + game_name + "/ddqn/testing/model.h5"
-        assert os.path.exists(os.path.dirname(testing_model_path)), "No testing model in: " + str(testing_model_path)
+        assert os.path.exists(os.path.dirname(testing_model_path)), "No model to test in: " + str(testing_model_path)
         DDQNGameModel.__init__(self,
                                game_name,
                                "DDQN testing",
@@ -89,10 +90,10 @@ def move(self, state):
         return np.argmax(q_values[0])
 
     def remember(self, current_state, action, reward, next_state, terminal):
-        self.memory.append({"current_state": current_state, #np.asarray([current_state])
+        self.memory.append({"current_state": np.asarray([current_state]),
                             "action": action,
                             "reward": reward,
-                            "next_state": next_state,
+                            "next_state": np.asarray([next_state]),
                             "terminal": terminal})
         if len(self.memory) > MEMORY_SIZE:
             self.memory.pop(0)
@@ -127,9 +128,9 @@ def _train(self):
         max_q_values = []
 
         for entry in batch:
-            current_state = np.expand_dims(entry["current_state"].astype(np.float64), axis=0)
+            current_state = entry["current_state"].astype(np.float64)
             current_states.append(current_state)
-            next_state = np.expand_dims(entry["next_state"].astype(np.float64), axis=0)
+            next_state = entry["next_state"].astype(np.float64)
             next_state_prediction = self.ddqn_target.predict(next_state).ravel()
             next_q_value = np.max(next_state_prediction)
             q = list(self.ddqn.predict(current_state)[0])