Merge pull request #107 from UoA-CARES/fix-termination-condition

Fix termination condition
UoA-CARES · Aug 29, 2023 · e105aab · e105aab
2 parents a55248d + c56195d
commit e105aab
Show file tree

Hide file tree

Showing 25 changed files with 793 additions and 176 deletions.
diff --git a/helperScripts/1manual.sh b/helperScripts/1manual.sh
@@ -18,4 +18,6 @@ export ROS_DOMAIN_ID=50
 colcon build
 ros2 launch reinforcement_learning sanity_check.launch.py &
 
+gz sim -g &
+
 wait
diff --git a/helperScripts/2train.sh b/helperScripts/2train.sh
@@ -18,4 +18,6 @@ export ROS_DOMAIN_ID=100
 colcon build
 ros2 launch reinforcement_learning train.launch.py &
 
+gz sim -g &
+
 wait
diff --git a/helperScripts/3test.sh b/helperScripts/3test.sh
@@ -18,4 +18,6 @@ export ROS_DOMAIN_ID=150
 colcon build
 ros2 launch reinforcement_learning test.launch.py &
 
+gz sim -g &
+
 wait
diff --git a/src/controllers/controllers/ftg_policy.py b/src/controllers/controllers/ftg_policy.py
@@ -214,7 +214,7 @@ def select_action(self,state,goal_pos):
         dmin = min(border_ranges)
         alpha = 1
         goal_angle = self.constrain_angle(goal_angle)
-        final_heading_angle = ((alpha/dmin)*gap_centre_angle+goal_angle)/((alpha/dmin)+1)
+        final_heading_angle = gap_centre_angle #((alpha/dmin)*gap_centre_angle+goal_angle)/((alpha/dmin)+1)
         # Convert to angular velocity
         ang = self.angle_to_ang_vel(final_heading_angle, lin)
         action = np.asarray([lin, ang])

diff --git a/src/environments/environments/CarTrackEnvironment.py b/src/environments/environments/CarTrackEnvironment.py
@@ -3,48 +3,24 @@
 import numpy as np
 import rclpy
 from rclpy import Future
-from sensor_msgs.msg import LaserScan
-
+import random
 from environment_interfaces.srv import Reset
 from environments.F1tenthEnvironment import F1tenthEnvironment
 from .termination import has_collided, has_flipped_over
 from .util import process_odom, reduce_lidar
-from .track_reset import track_info
+from .goal_positions import goal_positions
+from .waypoints import waypoints
 
 class CarTrackEnvironment(F1tenthEnvironment):
-    """
-    CarTrack Reinforcement Learning Environment:
-
-        Task:
-            Here the agent learns to drive the f1tenth car to a goal position
-
-        Observation:
-            It's position (x, y), orientation (w, x, y, z), lidar points (approx. ~600 rays) and the goal's position (x, y)
-
-        Action:
-            It's linear and angular velocity
-        
-        Reward:
-            It's progress toward the goal plus,
-            50+ if it reaches the goal plus,
-            -25 if it collides with the wall
-
-        Termination Conditions:
-            When the agent is within REWARD_RANGE units or,
-            When the agent is within COLLISION_RANGE units
-        
-        Truncation Condition:
-            When the number of steps surpasses MAX_STEPS
-    """
 
     def __init__(self, 
                  car_name, 
-                 reward_range=1, 
+                 reward_range=0.5, 
                  max_steps=500, 
                  collision_range=0.2, 
                  step_length=0.5, 
                  track='track_1',
-                 observation_mode='full',
+                 observation_mode='lidar_only',
                  ):
         super().__init__('car_track', car_name, max_steps, step_length)
 
@@ -63,30 +39,78 @@ def __init__(self,
         self.REWARD_RANGE = reward_range
 
         self.observation_mode = observation_mode
+        self.track = track
 
         # Reset Client -----------------------------------------------
-        self.goal_number = 0
-        self.all_goals = track_info[track]['goals']
 
-        self.car_reset_positions = track_info[track]['reset']
+        self.goals_reached = 0
+        self.start_goal_index = 0
+        self.steps_since_last_goal = 0
+
+        if track != 'multi_track':
+            self.all_goals = goal_positions[track]
+            self.car_waypoints = waypoints[track]
+        else:
+            austin_gp = goal_positions['austin_track']
+            budapest_gp = goal_positions['budapest_track']
+            budapest_gp = [[x + 200, y] for x, y in budapest_gp]
+            hockenheim_gp = goal_positions['hockenheim_track']
+            hockenheim_gp = [[x + 300, y] for x, y in hockenheim_gp]
+
+            self.all_car_goals = {
+                'austin_track': austin_gp,
+                'budapest_track': budapest_gp,
+                'hockenheim_track': hockenheim_gp 
+            }
+
+            austin_wp = waypoints['austin_track']
+
+            budapest_wp = []
+            for x, y, yaw, index in waypoints['budapest_track']:
+                x += 200
+                budapest_wp.append((x, y, yaw, index))
+
+            hockenheim_wp = []
+            for x, y, yaw, index in waypoints['hockenheim_track']:
+                x += 300
+                hockenheim_wp.append((x, y, yaw, index))
+
+            self.all_car_waypoints = {
+                'austin_track': austin_wp,
+                'budapest_track': budapest_wp,
+                'hockenheim_track': hockenheim_wp
+            }
+
+            self.current_track = 'austin_track'
+
+            self.all_goals = self.all_car_goals[self.current_track]
+            self.car_waypoints = self.all_car_waypoints[self.current_track]
 
         self.get_logger().info('Environment Setup Complete')
 
     def reset(self):
         self.step_counter = 0
+        self.steps_since_last_goal = 0
+        self.goals_reached = 0
 
         self.set_velocity(0, 0)
+
+        if self.track == 'multi_track': 
+            self.current_track = random.choice(list(self.all_car_goals.keys()))
+            self.all_goals = self.all_car_goals[self.current_track]
+            self.car_waypoints = self.all_car_waypoints[self.current_track]
 
-        # TODO: Remove Hard coded-ness of 10x10
-        self.goal_number = 0
-        self.goal_position = self.generate_goal(self.goal_number)
-
-        while not self.timer_future.done():
-            rclpy.spin_once(self)
+        # New random starting point for car
+        car_x, car_y, car_yaw, index = random.choice(self.car_waypoints)
+
+        # Update goal pointer to reflect starting position
+        self.start_goal_index = index
+        self.goal_position = self.all_goals[self.start_goal_index]
 
-        self.timer_future = Future()
+        self.sleep()
 
-        self.call_reset_service()
+        goal_x, goal_y = self.goal_position
+        self.call_reset_service(car_x=car_x, car_y=car_y, car_Y=car_yaw, goal_x=goal_x, goal_y=goal_y)
 
         observation, _ = self.get_observation()
 
@@ -102,66 +126,21 @@ def step(self, action):
         lin_vel, ang_vel = action
         self.set_velocity(lin_vel, ang_vel)
 
-        while not self.timer_future.done():
-            rclpy.spin_once(self)
-
-        self.timer_future = Future()
+        self.sleep()
 
         next_state, full_next_state = self.get_observation()
+
         reward = self.compute_reward(full_state, full_next_state)
         terminated = self.is_terminated(full_next_state)
-        truncated = self.step_counter >= self.MAX_STEPS
+        truncated = self.steps_since_last_goal >= 10
         info = {}
 
         return next_state, reward, terminated, truncated, info
 
     def is_terminated(self, state):
         return has_collided(state[8:], self.COLLISION_RANGE) \
             or has_flipped_over(state[2:6]) or \
-            self.goal_number >= len(self.all_goals)
-
-
-    def generate_goal(self, number):
-        print("Goal", number, "spawned")
-        return self.all_goals[number % len(self.all_goals)]
-
-    def call_reset_service(self):
-        """
-        Reset the car and goal position
-        """
-
-        x, y = self.goal_position
-
-        request = Reset.Request()
-        request.gx = x
-        request.gy = y
-        request.cx = self.car_reset_positions['x']
-        request.cy = self.car_reset_positions['y']
-        request.cyaw = self.car_reset_positions['yaw']
-        request.flag = "car_and_goal"
-
-        future = self.reset_client.call_async(request)
-        rclpy.spin_until_future_complete(self, future)
-
-        return future.result()
-
-    def update_goal_service(self, number):
-        """
-        Reset the goal position
-        """
-
-        x, y = self.generate_goal(number)
-        self.goal_position = [x, y]
-
-        request = Reset.Request()
-        request.gx = x
-        request.gy = y
-        request.flag = "goal_only"
-
-        future = self.reset_client.call_async(request)
-        rclpy.spin_until_future_complete(self, future)
-
-        return future.result()
+            self.goals_reached == len(self.all_goals)
 
     def get_observation(self):
 
@@ -187,32 +166,76 @@ def get_observation(self):
 
     def compute_reward(self, state, next_state):
 
-        # TESTING ONLY
-
-        # if self.goal_number < len(self.all_goals) - 1:
-        #     self.goal_number += 1
-        # else:
-        #     self.goal_number = 0
-
-        # self.update_goal_service(self.goal_number)
-        # ==============================================================
-
         reward = 0
 
         goal_position = self.goal_position
 
-        prev_distance = math.dist(goal_position, state[:2])
         current_distance = math.dist(goal_position, next_state[:2])
-        
-        reward += 10 * (prev_distance - current_distance) / prev_distance 
-        
+
+        self.steps_since_last_goal += 1
+
         if current_distance < self.REWARD_RANGE:
-            reward += 50
-            self.goal_number += 1
-            self.step_counter = 0
-            self.update_goal_service(self.goal_number)
+            print(f'Goal #{self.goals_reached} Reached')
+            reward += 2
+            self.goals_reached += 1
 
+            # Updating Goal Position
+            new_x, new_y = self.all_goals[(self.start_goal_index + self.goals_reached) % len(self.all_goals)]
+            self.goal_position = [new_x, new_y]
+
+            self.update_goal_service(new_x, new_y)
+
+            self.steps_since_last_goal = 0
+
+        if self.steps_since_last_goal >= 10:
+            reward -= 10
+
+        if self.goals_reached == len(self.all_goals):
+            reward += 100
+
         if has_collided(next_state[8:], self.COLLISION_RANGE) or has_flipped_over(next_state[2:6]):
-            reward -= 25  # TODO: find optimal value for this
+            reward -= 25
 
         return reward
+
+
+    # Utility Functions --------------------------------------------
+
+    def call_reset_service(self, car_x, car_y, car_Y, goal_x, goal_y):
+        """
+        Reset the car and goal position
+        """
+
+        request = Reset.Request()
+        request.gx = float(goal_x)
+        request.gy = float(goal_y)
+        request.cx = float(car_x)
+        request.cy = float(car_y)
+        request.cyaw = float(car_Y)
+        request.flag = "car_and_goal"
+
+        future = self.reset_client.call_async(request)
+        rclpy.spin_until_future_complete(self, future)
+
+        return future.result()
+
+    def update_goal_service(self, x, y):
+        """
+        Reset the goal position
+        """
+
+        request = Reset.Request()
+        request.gx = x
+        request.gy = y
+        request.flag = "goal_only"
+
+        future = self.reset_client.call_async(request)
+        rclpy.spin_until_future_complete(self, future)
+
+        return future.result()
+
+    def sleep(self):
+        while not self.timer_future.done():
+            rclpy.spin_once(self)
+
+        self.timer_future = Future()