fix ppo

UoA-CARES · Sep 8, 2023 · e491ce6 · e491ce6
1 parent 9581281
commit e491ce6
Showing 1 changed file with 6 additions and 1 deletion.
diff --git a/src/reinforcement_learning/reinforcement_learning/train.py b/src/reinforcement_learning/reinforcement_learning/train.py
@@ -28,6 +28,7 @@ def main():
     global BATCH_SIZE
     global EVALUATE_EVERY_N_STEPS
     global EVALUATE_FOR_M_EPISODES
+    global ALGORITHM
 
     ENVIRONMENT, \
     ALGORITHM, \
@@ -309,7 +310,11 @@ def evaluate_policy(env, agent, num_episodes):
 
         while not truncated and not terminated:
 
-            action = agent.select_action_from_policy(state, evaluation=True)
+            if ALGORITHM == 'PPO':
+                action = agent.select_action_from_policy(state)
+            else:
+                action = agent.select_action_from_policy(state, evaluation=True)
+
             action = hlp.denormalize(action, env.MAX_ACTIONS, env.MIN_ACTIONS)
             next_state, reward, terminated, truncated, _ = env.step(action)