Merge pull request #134 from UoA-CARES/feat/add-parse-observation

Feat/add parse observation
UoA-CARES · Oct 25, 2023 · 3a5a0bc · 3a5a0bc
2 parents e3d5c23 + a7be3ec
commit 3a5a0bc
Show file tree

Hide file tree

Showing 8 changed files with 204 additions and 23 deletions.
diff --git a/src/environments/environments/CarBeatEnvironment.py b/src/environments/environments/CarBeatEnvironment.py
@@ -19,6 +19,46 @@
 
 class CarBeatEnvironment(Node):
 
+    """
+    CarBeat Reinforcement Learning Environment:
+
+        Task:
+            Agent learns to drive a track and overtake a car that is driving at a constant speed.
+            The second car is using the Follow The Gap algorithm.
+
+        Observation:
+            full:
+                Car Position (x, y)
+                Car Orientation (x, y, z, w)
+                Car Velocity
+                Car Angular Velocity
+                Lidar Data
+            no_position:
+                Car Orientation (x, y, z, w)
+                Car Velocity
+                Car Angular Velocity
+                Lidar Data
+            lidar_only:
+                Car Velocity
+                Car Angular Velocity
+                Lidar Data
+
+            No. of lidar points is configurable
+
+        Action:
+            It's linear and angular velocity (Twist)
+        
+        Reward:
+            +2 if it comes within REWARD_RANGE units of a goal
+            +200 if it overtakes the Follow The Gap car
+            -25 if it collides with a wall
+
+        Termination Conditions:
+            When the agent collides with a wall or the Follow The Gap car
+        
+        Truncation Condition:
+            When the number of steps surpasses MAX_GOALS
+    """
     def __init__(self,
                  rl_car_name,
                  ftg_car_name,
@@ -365,4 +405,30 @@ def update_goal_service(self, x, y):
         rclpy.spin_until_future_complete(self, future)
 
         return future.result()
+
+    # function that parses the state and returns a string that can be printed to the terminal
+    def parse_observation(self, observation):
+        string = f'CarBeat Observation: \n'
+
+        if self.OBSERVATION_MODE == 'full':
+            string += f'Car Position: {observation[0:2]} \n'
+            string += f'Car Orientation: {observation[2:6]} \n' 
+            string += f'Car Velocity: {observation[6]} \n'
+            string += f'Car Angular Velocity: {observation[7]} \n'
+            string += f'Car Lidar: {observation[8:]} \n'
+        elif self.OBSERVATION_MODE == 'no_position':
+            string += f'Car Orientation: {observation[:4]} \n' 
+            string += f'Car Velocity: {observation[4]} \n'
+            string += f'Car Angular Velocity: {observation[5]} \n'
+            string += f'Car Lidar: {observation[6:]} \n'
+        elif self.OBSERVATION_MODE == 'lidar_only':
+            string += f'Car Velocity: {observation[0]} \n'
+            string += f'Car Angular Velocity: {observation[1]} \n'
+            string += f'Car Lidar: {observation[2:]} \n'
+        else:
+            raise ValueError(f'Invalid observation mode: {self.OBSERVATION_MODE}')
+
+        return string
+
+
 
diff --git a/src/environments/environments/CarBlockEnvironment.py b/src/environments/environments/CarBlockEnvironment.py
@@ -14,25 +14,28 @@
 
 class CarBlockEnvironment(F1tenthEnvironment):
     """
-    CarWall Reinforcement Learning Environment:
+    CarBlock Reinforcement Learning Environment:
 
         Task:
-            Here the agent learns to drive the f1tenth car to a goal position
+            Agent learns to navigate to a goal position while avoiding obstacles that are dynamically placed at the start of each episode
 
         Observation:
-            It's position (x, y), orientation (w, x, y, z), lidar points (approx. ~600 rays) and the goal's position (x, y)
+            Car Position (x, y)
+            Car Orientation (x, y, z, w)
+            Car Velocity
+            Car Angular Velocity
+            Lidar Data
 
         Action:
-            It's linear and angular velocity
+            Its linear and angular velocity (Twist)
         
         Reward:
-            It's progress toward the goal plus,
-            100+ if it reaches the goal plus,
-            -50 if it collides with the wall
+            Its progress towards the goal * 10
+            +100 if it reaches the goal
+            -25 if it collides with an obstacle or flips over
 
         Termination Conditions:
-            When the agent is within REWARD_RANGE units or,
-            When the agent is within COLLISION_RANGE units
+            When the agent collides with a wall or reaches the goal
         
         Truncation Condition:
             When the number of steps surpasses MAX_STEPS
@@ -114,5 +117,16 @@ def compute_reward(self, state, next_state):
             reward -= 25  # TODO: find optimal value for this
 
         return reward
+
+    def parse_observation(self, observation):
+        string = f'CarBlock Observation\n'
+        string += f'Position: {observation[:2]}\n'
+        string += f'Orientation: {observation[2:6]}\n'
+        string += f'Car Velocity: {observation[6]}\n'
+        string += f'Car Angular Velocity: {observation[7]}\n'
+        string += f'Lidar: {observation[8:-2]}\n'
+        string += f'Goal Position: {observation[-2:]}\n'
+
+        return string
 
 
diff --git a/src/environments/environments/CarGoalEnvironment.py b/src/environments/environments/CarGoalEnvironment.py
@@ -8,26 +8,28 @@
 
 class CarGoalEnvironment(F1tenthEnvironment):
     """
-    CarWall Reinforcement Learning Environment:
+    CarGoal Reinforcement Learning Environment:
 
         Task:
             Here the agent learns to drive the f1tenth car to a goal position.
-            This happens all within a 10x10 box
+            This happens in an open area.
 
         Observation:
-            It's position (x, y), orientation (w, x, y, z), lidar points (approx. ~600 rays) and the goal's position (x, y)
+            Car Position (x, y)
+            Car Orientation (x, y, z, w)
+            Car Velocity
+            Car Angular Velocity
+            Goal Position (x, y)
 
         Action:
-            It's linear and angular velocity
+            Its linear and angular velocity
         
         Reward:
-            It's progress toward the goal plus,
-            100+ if it reaches the goal plus,
-            -50 if it collides with the wall
+            Its progress toward the goal plus,
+            +100 if it reaches the goal plus
 
         Termination Conditions:
-            When the agent is within REWARD_RANGE units of the goal or,
-            When the agent is within COLLISION_RANGE units of a wall
+            When the agent is within REWARD_RANGE units of the goal
         
         Truncation Condition:
             When the number of steps surpasses MAX_STEPS
@@ -101,5 +103,16 @@ def compute_reward(self, state, next_state):
 
         return reward
 
+    def parse_observation(self, observation):
+
+        string = f'CarGoal Observation\n'
+        string += f'Position: {observation[:2]}\n'
+        string += f'Orientation: {observation[2:6]}\n'
+        string += f'Car Velocity: {observation[6]}\n'
+        string += f'Car Angular Velocity: {observation[7]}\n'
+        string += f'Goal Position: {observation[-2:]}\n'
+
+        return string
+
 
 
diff --git a/src/environments/environments/CarTrackEnvironment.py b/src/environments/environments/CarTrackEnvironment.py
@@ -11,6 +11,43 @@
 
 class CarTrackEnvironment(F1tenthEnvironment):
 
+    """
+    CarTrack Reinforcement Learning Environment:
+
+        Task:
+            Agent learns to drive a track
+
+        Observation:
+            full:
+                Car Position (x, y)
+                Car Orientation (x, y, z, w)
+                Car Velocity
+                Car Angular Velocity
+                Lidar Data
+            no_position:
+                Car Orientation (x, y, z, w)
+                Car Velocity
+                Car Angular Velocity
+                Lidar Data
+            lidar_only:
+                Car Velocity
+                Car Angular Velocity
+                Lidar Data
+
+        Action:
+            It's linear and angular velocity (Twist)
+        
+        Reward:
+            +2 if it comes within REWARD_RANGE units of a goal
+            -25 if it collides with a wall
+
+        Termination Conditions:
+            When the agent collides with a wall or the Follow The Gap car
+        
+        Truncation Condition:
+            When the number of steps surpasses MAX_GOALS
+    """
+
     def __init__(self, 
                  car_name, 
                  reward_range=0.5, 
@@ -208,3 +245,26 @@ def sleep(self):
             rclpy.spin_once(self)
 
         self.timer_future = Future()
+
+    def parse_observation(self, observation):
+
+        string = f'CarTrack Observation\n'
+
+        match (self.observation_mode):
+            case 'no_position':
+                string += f'Orientation: {observation[:4]}\n'
+                string += f'Car Velocity: {observation[4]}\n'
+                string += f'Car Angular Velocity: {observation[5]}\n'
+                string += f'Lidar: {observation[6:]}\n'
+            case 'lidar_only':
+                string += f'Car Velocity: {observation[0]}\n'
+                string += f'Car Angular Velocity: {observation[1]}\n'
+                string += f'Lidar: {observation[2:]}\n'
+            case _:
+                string += f'Position: {observation[:2]}\n'
+                string += f'Orientation: {observation[2:6]}\n'
+                string += f'Car Velocity: {observation[6]}\n'
+                string += f'Car Angular Velocity: {observation[7]}\n'
+                string += f'Lidar: {observation[8:]}\n'
+
+        return string
diff --git a/src/environments/environments/CarWallEnvironment.py b/src/environments/environments/CarWallEnvironment.py
@@ -18,7 +18,12 @@ class CarWallEnvironment(F1tenthEnvironment):
             This happens all within a 10x10 box
 
         Observation:
-            It's position (x, y), orientation (w, x, y, z), lidar points (approx. ~600 rays) and the goal's position (x, y)
+            Car Position (x, y)
+            Car Orientation (x, y, z, w)
+            Car Velocity
+            Car Angular Velocity
+            Lidar Data
+            Goal Position (x, y)
 
         Action:
             It's linear and angular velocity
@@ -111,4 +116,14 @@ def compute_reward(self, state, next_state):
 
         return reward
 
-
+    def parse_observation(self, observation):
+
+        string = f'CarWall Observation\n'
+        string += f'Position: {observation[:2]}\n'
+        string += f'Orientation: {observation[2:6]}\n'
+        string += f'Car Velocity: {observation[6]}\n'
+        string += f'Car Angular Velocity: {observation[7]}\n'
+        string += f'Lidar Points: {observation[8:-2]}\n'
+        string += f'Goal Position: {observation[-2:]}\n'
+
+        return string
diff --git a/src/reinforcement_learning/config/train.yaml b/src/reinforcement_learning/config/train.yaml
@@ -2,5 +2,8 @@ train:
   ros__parameters:
     environment: 'CarBeat' # CarGoal, CarWall, CarBlock, CarTrack, CarBeat
     track: 'multi_track' # track_1, track_2, track_3, multi_track, multi_track_testing -> only applies for CarTrack
-    car_name: 'New_Car'
-    ftg_car_name: 'New_Car_two'
+    car_name: 'f1tenth'
+    algorithm: 'TD3'
+    g: 1
+    buffer_size: 50000
+    observation_mode: 'lidar_only'
diff --git a/src/reinforcement_learning/reinforcement_learning/parse_args.py b/src/reinforcement_learning/reinforcement_learning/parse_args.py
@@ -28,7 +28,7 @@ def __declare_params():
             ('step_length', 0.25),
             ('reward_range', 0.2),
             ('collision_range', 0.2),
-            ('observation_mode', 'no_position'),
+            ('observation_mode', 'lidar_only'),
             ('max_goals', 500),
             ('num_lidar_points', 10),
 

diff --git a/src/reinforcement_learning/reinforcement_learning/training_loops.py b/src/reinforcement_learning/reinforcement_learning/training_loops.py
@@ -19,6 +19,11 @@ def off_policy_train(env, agent, memory, record, algorithm_config):
     evaluate = False
 
     state, _ = env.reset()
+
+    obs = env.parse_observation(state)
+    env.get_logger().info('-----------------------------------')
+    env.get_logger().info('\n' + obs)
+    env.get_logger().info('-----------------------------------')
 
     for step_counter in range(max_steps_training):
         episode_timesteps += 1
@@ -133,6 +138,11 @@ def ppo_train(env, agent, memory, record, algorithm_config):
 
     state, _ = env.reset()
 
+    obs = env.parse_observation(state)
+    env.get_logger().info('-----------------------------------')
+    env.get_logger().info('\n' + obs)
+    env.get_logger().info('-----------------------------------')
+
     for step_counter in range(max_steps_training):
         episode_timesteps += 1