Merge pull request #103 from UoA-CARES/carbeat-reward

Carbeat reward
UoA-CARES · Aug 24, 2023 · f6e0f59 · f6e0f59
2 parents e3bfc06 + 9519f04
commit f6e0f59
Show file tree

Hide file tree

Showing 9 changed files with 193 additions and 100 deletions.
diff --git a/.gitignore b/.gitignore
@@ -10,4 +10,5 @@ cares_reinforcement_learning
 __pycache__
 rl_logs
 .idea
-src/f1tenth/f1tenth_control/f1tenth_control/__pycache__/*
+src/f1tenth/f1tenth_control/f1tenth_control/__pycache__/*
+Retrain1hTrue
diff --git a/src/controllers/controllers/ftg_controller.py b/src/controllers/controllers/ftg_controller.py
@@ -18,6 +18,7 @@ def __init__(self, node_name, car_name, step_length, track_name):
         self.goal_position = self.goals[1]
 
     def reset_cb(self, msg):
+        self.get_logger().info('Reset Detected from FTG Controller')
         self.goal_position = self.goals[1]
 
     def get_observation(self):

diff --git a/src/controllers/controllers/ftg_policy.py b/src/controllers/controllers/ftg_policy.py
@@ -25,7 +25,7 @@ def main():
     state = controller.get_observation()
 
     while True:
-        controller.get_logger().info(f"State: {state[:-2]}") 
+        # controller.get_logger().info(f"State: {state[:-2]}") 
         action = policy.select_action(state[:-2], state[-2:])  
         state = controller.step(action)
 
@@ -42,12 +42,13 @@ def calc_func(self):
 
     def calc_border_distances(self,range):
         obstacle_buffer = 0.0001 # value needs to be tinkered with
-        chassis_width = 0.18
-        d_n = np.sqrt(range**2-(obstacle_buffer+chassis_width)**2)
+        chassis_width = 0.16
+
+        d_n = np.sqrt(max(0.001,range**2-(obstacle_buffer+chassis_width)**2))
         return d_n
 
     def angle_to_ang_vel(self, driving_angle, lin):
-        return driving_angle
+        return driving_angle*lin
 
     def constrain_angle(self, angle):
         val = angle
@@ -60,37 +61,28 @@ def constrain_angle(self, angle):
     def select_action(self,state,goal_pos):
         # Current x: state[0], current y: state[1], orientation w: state[2], orientation x: state[3], orientation y: state[4], orientation z: state[5]
         # linear vel x: state[6], angular vel z: state[7], LIDAR points 1-10: state[8-17] where each entry is the 64th LIDAR point
-        lin = 0.6
+        lin = 2.5 #0.6
         turn_angle = 0.4667
         min_turn_radius = 0.625
         lidar_angle=1.396
         min_lidar_range = 0.08
         max_lidar_range = 10
         lidar_poss_angles = np.linspace(-1.396, 1.396, 640)
         meeting_dist = self.calc_func()
-        #roll, pitch, rotation = euler_from_quaternion(state[2], state[3], state[4], state[5])
 
 
         rotation = np.arctan2((2*(state[2]*state[5]+state[3]*state[4])),(1-2*(state[4]**2+state[5]**2)))
         if (goal_pos[0] > state[0]):
             if (goal_pos[1] > state[1]):
-                print(f"State 1")
                 goal_angle = np.arctan((goal_pos[1]-state[1])/(goal_pos[0]-state[0])) - rotation
             else:
-                print(f"State 2")
                 goal_angle = np.arctan((goal_pos[1]-state[1])/(goal_pos[0]-state[0])) - rotation
         else:
             if (goal_pos[1] > state[1]):
-                print(f"State 3")
                 goal_angle = abs(np.arctan((goal_pos[0]-state[0])/(goal_pos[1]-state[1]))) - rotation + np.pi/2
             else:
-                print(f"State 4")
-                print(f"goal_pos: {goal_pos}")
-                print(f"State: {state}")
-                print(f"Rotation: {rotation}")
                 goal_angle = np.arctan((goal_pos[0]-state[0])/(goal_pos[1]-state[1]))*-1 - rotation - np.pi/2
 
-        # each value in lidar_angles corresponds to a lidar range
         obstacles_angles = []
         obstacles_ranges = []
         obstacle_max_val = 2
@@ -105,7 +97,6 @@ def select_action(self,state,goal_pos):
             action = np.asarray([lin, ang])
             return action
 
-        #print(f"Obstacles are at: {obstacles_angles}")
         # Add obstacle border values to array
         border_ranges = []
         border_angles = []
@@ -117,7 +108,6 @@ def select_action(self,state,goal_pos):
             border_angles.append(obstacles_angles[i]-angle)
             border_angles.append(obstacles_angles[i]+angle)
 
-        #print(f"Obstacles are at: {border_angles}")
         # Calculate nonholonomic edge constraints
         if (border_ranges[0] < meeting_dist):
             angle_constraint_l = turn_angle
@@ -130,30 +120,24 @@ def select_action(self,state,goal_pos):
             angle_constraint_r = lidar_angle*-1
 
         r_del_index = -1 
-        for i in range(1, len(border_angles), 2): # Implement on left
+        for i in range(1, len(border_angles), 2): 
             if (border_angles[i]<angle_constraint_r):
                 r_del_index = i+1
         if (r_del_index>0):
-            #print(f"Pre-deletion: {border_angles}")
-            #print(f"Del index: {del_index}")
             del border_angles[0:r_del_index]
             del border_ranges[0:r_del_index]
-            #print(f"Post-deletion: {border_angles}")
         if (len(border_ranges) < 1):
             ang = self.angle_to_ang_vel(goal_angle, lin)
             action = np.asarray([lin, ang])
             return action
 
         l_del_index = len(border_angles)
-        for i in range(len(border_angles)-2, 0, -2): # Implement on left
+        for i in range(len(border_angles)-2, 0, -2): 
             if (border_angles[i]>angle_constraint_l):
                 l_del_index = i
         if (l_del_index<len(border_angles)):
-            #print(f"Pre-deletion: {border_angles}")
-            #print(f"Del index: {l_del_index}")
             del border_angles[l_del_index:]
             del border_ranges[l_del_index:]
-            #print(f"Post-deletion: {border_angles}")
 
         dist_constraint_l = border_ranges[-1]*np.cos(angle_constraint_l)
         dist_constraint_r = border_ranges[0]*np.cos(angle_constraint_r)
@@ -188,10 +172,6 @@ def select_action(self,state,goal_pos):
             return action 
         greatest_gap_index = G.index(greatest_gap)
 
-        #print(f"Gap array: {G}")
-        #print(f"Greatest gap: {greatest_gap}")
-        #print(f"Index: {greatest_gap_index}")
-
         # Find whether greatest gap is between two obstacles or one obstacle and a border
         if greatest_gap_index < 1: # Between right constraint and rightmost obstacle
             d1 = dist_constraint_r
@@ -208,12 +188,6 @@ def select_action(self,state,goal_pos):
             d2 = border_ranges[greatest_gap_index*2]
             theta1 = border_angles[greatest_gap_index*2-1]
             theta2 = border_angles[greatest_gap_index*2]
-        print(f"Theta1: {theta1}")
-        print(f"Theta 2: {theta2}")
-        print(f"R constraint: {angle_constraint_r}")
-        print(f"Borders: {border_angles}")
-        print(f"L constraint: {angle_constraint_l}")
-        #gap_centre_angle = np.arccos((d1+d2*np.cos(theta1+theta2))/(np.sqrt(d1**2+d2**2+2*d1*d2*np.cos(theta1+theta2))))-theta1
         if (theta1 > 0): # Both obstacles to left of robot
             phi = theta2-theta1
             l = np.sqrt((d1**2+d2**2-2*d1*d2*np.cos(phi))/4)
@@ -235,20 +209,14 @@ def select_action(self,state,goal_pos):
             else: # Turning left
                 gap_centre_angle = np.arccos((d1**2+h**2-l**2)/(2*d1*h))-abs(theta1)
         gap_centre_angle = self.constrain_angle(gap_centre_angle)
-        print(f"Gap centre angle: {gap_centre_angle}")
-        #print(f"Goal Angle: {goal_angle}")
+
         # Calculate final heading angle
         dmin = min(border_ranges)
         alpha = 1
-        print(f"dmin: {dmin}")
-        print(f"Initial goal_angle: {goal_angle}")
         goal_angle = self.constrain_angle(goal_angle)
-        print(f"Final goal_angle: {goal_angle}")
         final_heading_angle = ((alpha/dmin)*gap_centre_angle+goal_angle)/((alpha/dmin)+1)
-        print(f"Final angle: {final_heading_angle}")
         # Convert to angular velocity
         ang = self.angle_to_ang_vel(final_heading_angle, lin)
-        #ang = self.angle_to_ang_vel(-2)
         action = np.asarray([lin, ang])
         return action
 

diff --git a/src/environments/environments/CarBeatEnvironment.py b/src/environments/environments/CarBeatEnvironment.py
@@ -24,7 +24,7 @@
 
 class CarBeatEnvironment(Node):
 
-    def __init__(self, car_one_name, car_two_name, reward_range=1, max_steps=50, collision_range=0.2, step_length=0.5, track='track_1'):
+    def __init__(self, car_one_name, car_two_name, reward_range=1, max_steps=50, collision_range=0.2, step_length=0.5, track='track_1', observation_mode= 'full'):
         super().__init__('car_beat_environment')
 
         # Environment Details ----------------------------------------
@@ -35,9 +35,18 @@ def __init__(self, car_one_name, car_two_name, reward_range=1, max_steps=50, col
         self.MAX_ACTIONS = np.asarray([3, 3.14])
         self.MIN_ACTIONS = np.asarray([0, -3.14])
         self.MAX_STEPS_PER_GOAL = max_steps
+        self.OBSERVATION_MODE = observation_mode
+
+        match observation_mode:
+            case 'full':
+                self.OBSERVATION_SIZE = 8 + 10 
+            case 'no_position':
+                self.OBSERVATION_SIZE = 6 + 10
+            case 'lidar_only':
+                self.OBSERVATION_SIZE = 2 + 10
+            case _:
+                raise ValueError(f'Invalid observation mode: {observation_mode}')
 
-        # TODO: Update this
-        self.OBSERVATION_SIZE = 8 + 10  # Car position + Lidar rays
         self.COLLISION_RANGE = collision_range
         self.REWARD_RANGE = reward_range
         self.ACTION_NUM = 2
@@ -46,6 +55,8 @@ def __init__(self, car_one_name, car_two_name, reward_range=1, max_steps=50, col
 
         # Goal/Track Info -----------------------------------------------
         self.goal_number = 0
+        self.ftg_goal_number = 1
+
         self.all_goals = track_info[track]['goals']
 
         self.car_reset_positions = track_info[track]['reset']
@@ -117,6 +128,8 @@ def reset(self):
 
         # TODO: Remove Hard coded-ness of 10x10
         self.goal_number = 0
+        self.ftg_goal_number = 1
+
         self.goal_position = self.generate_goal(self.goal_number)
 
         while not self.timer_future.done():
@@ -126,16 +139,16 @@ def reset(self):
 
         self.call_reset_service()
 
-        observation = self.get_observation()
+        state, _ = self.get_observation()
 
         info = {}
 
-        return observation, info
+        return state, info
 
     def step(self, action):
         self.step_counter += 1
 
-        state = self.get_observation()
+        _, full_state = self.get_observation()
 
         lin_vel, ang_vel = action
         self.set_velocity(lin_vel, ang_vel)
@@ -145,9 +158,9 @@ def step(self, action):
 
         self.timer_future = Future()
 
-        next_state = self.get_observation()
-        reward = self.compute_reward(state, next_state)
-        terminated = self.is_terminated(next_state)
+        next_state, full_next_state  = self.get_observation()
+        reward = self.compute_reward(full_state, full_next_state)
+        terminated = self.is_terminated(full_next_state)
         truncated = self.step_counter >= self.MAX_STEPS
         info = {}
 
@@ -182,7 +195,8 @@ def timer_cb(self):
 
     def is_terminated(self, state):
         return has_collided(state[8:19], self.COLLISION_RANGE) \
-            or has_flipped_over(state[2:6])
+            or has_flipped_over(state[2:6]) \
+            or self.goal_number > self.ftg_goal_number
 
     def generate_goal(self, number):
         print("Goal", number, "spawned")
@@ -250,11 +264,20 @@ def get_observation(self):
         lidar_one = reduce_lidar(lidar_one)
         lidar_two = reduce_lidar(lidar_two)
 
-        self.get_logger().info(
-            f'odom_one: {odom_one} \n\nlidar_one: {lidar_one} \n\nodom_two: {odom_two} \n\nlidar_two: {lidar_two}')
+        match self.OBSERVATION_MODE:
+            case 'full':
+                state = odom_one + lidar_one
+            case 'no_position':
+                state = odom_one[2:] + lidar_one
+            case 'lidar_only':
+                state = odom_one[-2:] + lidar_one
+            case _:
+                ValueError(f'Invalid observation mode: {self.OBSERVATION_MODE}')
 
-        return odom_one + lidar_one + odom_two + lidar_two + self.goal_position
+        full_state = odom_one + lidar_one + odom_two + lidar_two + self.goal_position
 
+        return state, full_state
+
     def compute_reward(self, state, next_state):
 
         reward = 0
@@ -272,7 +295,28 @@ def compute_reward(self, state, next_state):
             self.step_counter = 0
             self.update_goal_service(self.goal_number)
 
+        ftg_current_distance = math.dist(self.all_goals[self.ftg_goal_number], next_state[18:20])
+
+        # Keeping track of FTG car goal number
+        if ftg_current_distance < self.REWARD_RANGE:
+            self.ftg_goal_number += 1
+
+        # If RL car has overtaken FTG car
+        if self.goal_number > self.ftg_goal_number:
+            reward  += 200
+
         if has_collided(next_state[8:19], self.COLLISION_RANGE) or has_flipped_over(next_state[2:6]):
             reward -= 25  # TODO: find optimal value for this
 
+        prev_car_one_pos = state[:2]
+        prev_car_two_pos = state[18:20]
+
+        curr_car_one_pos = next_state[:2]
+        curr_car_two_pos = next_state[18:20]
+
+        prev_progress = math.dist(prev_car_one_pos, prev_car_two_pos)
+        curr_progress = math.dist(curr_car_one_pos, curr_car_two_pos)
+
+        reward += (prev_progress - curr_progress) / prev_progress
+
         return reward
diff --git a/src/environments/environments/CarTrackEnvironment.py b/src/environments/environments/CarTrackEnvironment.py
@@ -55,7 +55,7 @@ def __init__(self,
             case 'no_position':
                 self.OBSERVATION_SIZE = 6 + 10
             case 'lidar_only':
-                self.OBSERVATION_SIZE = 10
+                self.OBSERVATION_SIZE = 10 + 2
             case _:
                 self.OBSERVATION_SIZE = 8 + 10
 

diff --git a/src/reinforcement_learning/config/train.yaml b/src/reinforcement_learning/config/train.yaml
@@ -1,18 +1,20 @@
 train:
   ros__parameters:
     environment: 'CarBeat' # CarGoal, CarWall, CarBlock, CarTrack, CarBeat
-    track: 'track_2' # track_1, track_2, track_3 -> only applies for CarTrack
-    max_steps_exploration: 5000
+    track: 'track_1' # track_1, track_2, track_3 -> only applies for CarTrack
+    algorithm: 'TD3'
+    max_steps_exploration: 1000 
     max_steps_training: 1000000
-    reward_range: 1.0
+    reward_range: 3.0
+    reward_range: 3.0
     collision_range: 0.2
-    observation_mode: 'no_position'
+    observation_mode: 'lidar_only'
     # actor_path & critic_path must exist, it can't be commented
     # actor_path: 'rl_logs/23_08_02_17:59:13/models/actor_checkpoint.pht'
     # critic_path: 'rl_logs/23_08_02_17:59:13/models/critic_checkpoint.pht'
     # gamma: 0.95
     # tau: 0.005
-    # g: 5
+    g: 1
     # batch_size: 32
     # buffer_size: 1000000
     # actor_lr: 0.0001

diff --git a/src/reinforcement_learning/launch/test.launch.py b/src/reinforcement_learning/launch/test.launch.py
@@ -11,12 +11,12 @@
     'CarGoal': 'cargoal',
     'CarWall': 'carwall',
     'CarBlock': 'carblock',
-    'CarTrack': 'cartrack'
+    'CarTrack': 'cartrack',
+    'CarBeat': 'carbeat',
 }
 
 def generate_launch_description():
     pkg_f1tenth_description = get_package_share_directory('f1tenth_description')
-    pkg_f1tenth_bringup = get_package_share_directory('f1tenth_bringup')
     pkg_environments = get_package_share_directory('environments')
 
     config_path = os.path.join(
@@ -35,15 +35,6 @@ def generate_launch_description():
         }.items() #TODO: this doesn't do anything
     )
 
-    f1tenth = IncludeLaunchDescription(
-        launch_description_source=PythonLaunchDescriptionSource(
-            os.path.join(pkg_f1tenth_bringup, 'simulation_bringup.launch.py')),
-        launch_arguments={
-            'name': 'f1tenth',
-            'world': 'empty'
-        }.items()
-    )
-
     # Launch the Environment
     main = Node(
             package='reinforcement_learning',
@@ -61,6 +52,5 @@ def generate_launch_description():
         SetEnvironmentVariable(name='GZ_SIM_RESOURCE_PATH', value=pkg_f1tenth_description[:-19]),
         SetParameter(name='use_sim_time', value=True),
         environment,
-        f1tenth,
         main
 ])