Skip to content

Commit

Permalink
Merge pull request #103 from UoA-CARES/carbeat-reward
Browse files Browse the repository at this point in the history
Carbeat reward
  • Loading branch information
retinfai authored Aug 24, 2023
2 parents e3bfc06 + 9519f04 commit f6e0f59
Show file tree
Hide file tree
Showing 9 changed files with 193 additions and 100 deletions.
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -10,4 +10,5 @@ cares_reinforcement_learning
__pycache__
rl_logs
.idea
src/f1tenth/f1tenth_control/f1tenth_control/__pycache__/*
src/f1tenth/f1tenth_control/f1tenth_control/__pycache__/*
Retrain1hTrue
1 change: 1 addition & 0 deletions src/controllers/controllers/ftg_controller.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ def __init__(self, node_name, car_name, step_length, track_name):
self.goal_position = self.goals[1]

def reset_cb(self, msg):
self.get_logger().info('Reset Detected from FTG Controller')
self.goal_position = self.goals[1]

def get_observation(self):
Expand Down
50 changes: 9 additions & 41 deletions src/controllers/controllers/ftg_policy.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ def main():
state = controller.get_observation()

while True:
controller.get_logger().info(f"State: {state[:-2]}")
# controller.get_logger().info(f"State: {state[:-2]}")
action = policy.select_action(state[:-2], state[-2:])
state = controller.step(action)

Expand All @@ -42,12 +42,13 @@ def calc_func(self):

def calc_border_distances(self,range):
obstacle_buffer = 0.0001 # value needs to be tinkered with
chassis_width = 0.18
d_n = np.sqrt(range**2-(obstacle_buffer+chassis_width)**2)
chassis_width = 0.16

d_n = np.sqrt(max(0.001,range**2-(obstacle_buffer+chassis_width)**2))
return d_n

def angle_to_ang_vel(self, driving_angle, lin):
return driving_angle
return driving_angle*lin

def constrain_angle(self, angle):
val = angle
Expand All @@ -60,37 +61,28 @@ def constrain_angle(self, angle):
def select_action(self,state,goal_pos):
# Current x: state[0], current y: state[1], orientation w: state[2], orientation x: state[3], orientation y: state[4], orientation z: state[5]
# linear vel x: state[6], angular vel z: state[7], LIDAR points 1-10: state[8-17] where each entry is the 64th LIDAR point
lin = 0.6
lin = 2.5 #0.6
turn_angle = 0.4667
min_turn_radius = 0.625
lidar_angle=1.396
min_lidar_range = 0.08
max_lidar_range = 10
lidar_poss_angles = np.linspace(-1.396, 1.396, 640)
meeting_dist = self.calc_func()
#roll, pitch, rotation = euler_from_quaternion(state[2], state[3], state[4], state[5])


rotation = np.arctan2((2*(state[2]*state[5]+state[3]*state[4])),(1-2*(state[4]**2+state[5]**2)))
if (goal_pos[0] > state[0]):
if (goal_pos[1] > state[1]):
print(f"State 1")
goal_angle = np.arctan((goal_pos[1]-state[1])/(goal_pos[0]-state[0])) - rotation
else:
print(f"State 2")
goal_angle = np.arctan((goal_pos[1]-state[1])/(goal_pos[0]-state[0])) - rotation
else:
if (goal_pos[1] > state[1]):
print(f"State 3")
goal_angle = abs(np.arctan((goal_pos[0]-state[0])/(goal_pos[1]-state[1]))) - rotation + np.pi/2
else:
print(f"State 4")
print(f"goal_pos: {goal_pos}")
print(f"State: {state}")
print(f"Rotation: {rotation}")
goal_angle = np.arctan((goal_pos[0]-state[0])/(goal_pos[1]-state[1]))*-1 - rotation - np.pi/2

# each value in lidar_angles corresponds to a lidar range
obstacles_angles = []
obstacles_ranges = []
obstacle_max_val = 2
Expand All @@ -105,7 +97,6 @@ def select_action(self,state,goal_pos):
action = np.asarray([lin, ang])
return action

#print(f"Obstacles are at: {obstacles_angles}")
# Add obstacle border values to array
border_ranges = []
border_angles = []
Expand All @@ -117,7 +108,6 @@ def select_action(self,state,goal_pos):
border_angles.append(obstacles_angles[i]-angle)
border_angles.append(obstacles_angles[i]+angle)

#print(f"Obstacles are at: {border_angles}")
# Calculate nonholonomic edge constraints
if (border_ranges[0] < meeting_dist):
angle_constraint_l = turn_angle
Expand All @@ -130,30 +120,24 @@ def select_action(self,state,goal_pos):
angle_constraint_r = lidar_angle*-1

r_del_index = -1
for i in range(1, len(border_angles), 2): # Implement on left
for i in range(1, len(border_angles), 2):
if (border_angles[i]<angle_constraint_r):
r_del_index = i+1
if (r_del_index>0):
#print(f"Pre-deletion: {border_angles}")
#print(f"Del index: {del_index}")
del border_angles[0:r_del_index]
del border_ranges[0:r_del_index]
#print(f"Post-deletion: {border_angles}")
if (len(border_ranges) < 1):
ang = self.angle_to_ang_vel(goal_angle, lin)
action = np.asarray([lin, ang])
return action

l_del_index = len(border_angles)
for i in range(len(border_angles)-2, 0, -2): # Implement on left
for i in range(len(border_angles)-2, 0, -2):
if (border_angles[i]>angle_constraint_l):
l_del_index = i
if (l_del_index<len(border_angles)):
#print(f"Pre-deletion: {border_angles}")
#print(f"Del index: {l_del_index}")
del border_angles[l_del_index:]
del border_ranges[l_del_index:]
#print(f"Post-deletion: {border_angles}")

dist_constraint_l = border_ranges[-1]*np.cos(angle_constraint_l)
dist_constraint_r = border_ranges[0]*np.cos(angle_constraint_r)
Expand Down Expand Up @@ -188,10 +172,6 @@ def select_action(self,state,goal_pos):
return action
greatest_gap_index = G.index(greatest_gap)

#print(f"Gap array: {G}")
#print(f"Greatest gap: {greatest_gap}")
#print(f"Index: {greatest_gap_index}")

# Find whether greatest gap is between two obstacles or one obstacle and a border
if greatest_gap_index < 1: # Between right constraint and rightmost obstacle
d1 = dist_constraint_r
Expand All @@ -208,12 +188,6 @@ def select_action(self,state,goal_pos):
d2 = border_ranges[greatest_gap_index*2]
theta1 = border_angles[greatest_gap_index*2-1]
theta2 = border_angles[greatest_gap_index*2]
print(f"Theta1: {theta1}")
print(f"Theta 2: {theta2}")
print(f"R constraint: {angle_constraint_r}")
print(f"Borders: {border_angles}")
print(f"L constraint: {angle_constraint_l}")
#gap_centre_angle = np.arccos((d1+d2*np.cos(theta1+theta2))/(np.sqrt(d1**2+d2**2+2*d1*d2*np.cos(theta1+theta2))))-theta1
if (theta1 > 0): # Both obstacles to left of robot
phi = theta2-theta1
l = np.sqrt((d1**2+d2**2-2*d1*d2*np.cos(phi))/4)
Expand All @@ -235,20 +209,14 @@ def select_action(self,state,goal_pos):
else: # Turning left
gap_centre_angle = np.arccos((d1**2+h**2-l**2)/(2*d1*h))-abs(theta1)
gap_centre_angle = self.constrain_angle(gap_centre_angle)
print(f"Gap centre angle: {gap_centre_angle}")
#print(f"Goal Angle: {goal_angle}")

# Calculate final heading angle
dmin = min(border_ranges)
alpha = 1
print(f"dmin: {dmin}")
print(f"Initial goal_angle: {goal_angle}")
goal_angle = self.constrain_angle(goal_angle)
print(f"Final goal_angle: {goal_angle}")
final_heading_angle = ((alpha/dmin)*gap_centre_angle+goal_angle)/((alpha/dmin)+1)
print(f"Final angle: {final_heading_angle}")
# Convert to angular velocity
ang = self.angle_to_ang_vel(final_heading_angle, lin)
#ang = self.angle_to_ang_vel(-2)
action = np.asarray([lin, ang])
return action

Expand Down
70 changes: 57 additions & 13 deletions src/environments/environments/CarBeatEnvironment.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@

class CarBeatEnvironment(Node):

def __init__(self, car_one_name, car_two_name, reward_range=1, max_steps=50, collision_range=0.2, step_length=0.5, track='track_1'):
def __init__(self, car_one_name, car_two_name, reward_range=1, max_steps=50, collision_range=0.2, step_length=0.5, track='track_1', observation_mode= 'full'):
super().__init__('car_beat_environment')

# Environment Details ----------------------------------------
Expand All @@ -35,9 +35,18 @@ def __init__(self, car_one_name, car_two_name, reward_range=1, max_steps=50, col
self.MAX_ACTIONS = np.asarray([3, 3.14])
self.MIN_ACTIONS = np.asarray([0, -3.14])
self.MAX_STEPS_PER_GOAL = max_steps
self.OBSERVATION_MODE = observation_mode

match observation_mode:
case 'full':
self.OBSERVATION_SIZE = 8 + 10
case 'no_position':
self.OBSERVATION_SIZE = 6 + 10
case 'lidar_only':
self.OBSERVATION_SIZE = 2 + 10
case _:
raise ValueError(f'Invalid observation mode: {observation_mode}')

# TODO: Update this
self.OBSERVATION_SIZE = 8 + 10 # Car position + Lidar rays
self.COLLISION_RANGE = collision_range
self.REWARD_RANGE = reward_range
self.ACTION_NUM = 2
Expand All @@ -46,6 +55,8 @@ def __init__(self, car_one_name, car_two_name, reward_range=1, max_steps=50, col

# Goal/Track Info -----------------------------------------------
self.goal_number = 0
self.ftg_goal_number = 1

self.all_goals = track_info[track]['goals']

self.car_reset_positions = track_info[track]['reset']
Expand Down Expand Up @@ -117,6 +128,8 @@ def reset(self):

# TODO: Remove Hard coded-ness of 10x10
self.goal_number = 0
self.ftg_goal_number = 1

self.goal_position = self.generate_goal(self.goal_number)

while not self.timer_future.done():
Expand All @@ -126,16 +139,16 @@ def reset(self):

self.call_reset_service()

observation = self.get_observation()
state, _ = self.get_observation()

info = {}

return observation, info
return state, info

def step(self, action):
self.step_counter += 1

state = self.get_observation()
_, full_state = self.get_observation()

lin_vel, ang_vel = action
self.set_velocity(lin_vel, ang_vel)
Expand All @@ -145,9 +158,9 @@ def step(self, action):

self.timer_future = Future()

next_state = self.get_observation()
reward = self.compute_reward(state, next_state)
terminated = self.is_terminated(next_state)
next_state, full_next_state = self.get_observation()
reward = self.compute_reward(full_state, full_next_state)
terminated = self.is_terminated(full_next_state)
truncated = self.step_counter >= self.MAX_STEPS
info = {}

Expand Down Expand Up @@ -182,7 +195,8 @@ def timer_cb(self):

def is_terminated(self, state):
return has_collided(state[8:19], self.COLLISION_RANGE) \
or has_flipped_over(state[2:6])
or has_flipped_over(state[2:6]) \
or self.goal_number > self.ftg_goal_number

def generate_goal(self, number):
print("Goal", number, "spawned")
Expand Down Expand Up @@ -250,11 +264,20 @@ def get_observation(self):
lidar_one = reduce_lidar(lidar_one)
lidar_two = reduce_lidar(lidar_two)

self.get_logger().info(
f'odom_one: {odom_one} \n\nlidar_one: {lidar_one} \n\nodom_two: {odom_two} \n\nlidar_two: {lidar_two}')
match self.OBSERVATION_MODE:
case 'full':
state = odom_one + lidar_one
case 'no_position':
state = odom_one[2:] + lidar_one
case 'lidar_only':
state = odom_one[-2:] + lidar_one
case _:
ValueError(f'Invalid observation mode: {self.OBSERVATION_MODE}')

return odom_one + lidar_one + odom_two + lidar_two + self.goal_position
full_state = odom_one + lidar_one + odom_two + lidar_two + self.goal_position

return state, full_state

def compute_reward(self, state, next_state):

reward = 0
Expand All @@ -272,7 +295,28 @@ def compute_reward(self, state, next_state):
self.step_counter = 0
self.update_goal_service(self.goal_number)

ftg_current_distance = math.dist(self.all_goals[self.ftg_goal_number], next_state[18:20])

# Keeping track of FTG car goal number
if ftg_current_distance < self.REWARD_RANGE:
self.ftg_goal_number += 1

# If RL car has overtaken FTG car
if self.goal_number > self.ftg_goal_number:
reward += 200

if has_collided(next_state[8:19], self.COLLISION_RANGE) or has_flipped_over(next_state[2:6]):
reward -= 25 # TODO: find optimal value for this

prev_car_one_pos = state[:2]
prev_car_two_pos = state[18:20]

curr_car_one_pos = next_state[:2]
curr_car_two_pos = next_state[18:20]

prev_progress = math.dist(prev_car_one_pos, prev_car_two_pos)
curr_progress = math.dist(curr_car_one_pos, curr_car_two_pos)

reward += (prev_progress - curr_progress) / prev_progress

return reward
2 changes: 1 addition & 1 deletion src/environments/environments/CarTrackEnvironment.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ def __init__(self,
case 'no_position':
self.OBSERVATION_SIZE = 6 + 10
case 'lidar_only':
self.OBSERVATION_SIZE = 10
self.OBSERVATION_SIZE = 10 + 2
case _:
self.OBSERVATION_SIZE = 8 + 10

Expand Down
12 changes: 7 additions & 5 deletions src/reinforcement_learning/config/train.yaml
Original file line number Diff line number Diff line change
@@ -1,18 +1,20 @@
train:
ros__parameters:
environment: 'CarBeat' # CarGoal, CarWall, CarBlock, CarTrack, CarBeat
track: 'track_2' # track_1, track_2, track_3 -> only applies for CarTrack
max_steps_exploration: 5000
track: 'track_1' # track_1, track_2, track_3 -> only applies for CarTrack
algorithm: 'TD3'
max_steps_exploration: 1000
max_steps_training: 1000000
reward_range: 1.0
reward_range: 3.0
reward_range: 3.0
collision_range: 0.2
observation_mode: 'no_position'
observation_mode: 'lidar_only'
# actor_path & critic_path must exist, it can't be commented
# actor_path: 'rl_logs/23_08_02_17:59:13/models/actor_checkpoint.pht'
# critic_path: 'rl_logs/23_08_02_17:59:13/models/critic_checkpoint.pht'
# gamma: 0.95
# tau: 0.005
# g: 5
g: 1
# batch_size: 32
# buffer_size: 1000000
# actor_lr: 0.0001
Expand Down
14 changes: 2 additions & 12 deletions src/reinforcement_learning/launch/test.launch.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,12 +11,12 @@
'CarGoal': 'cargoal',
'CarWall': 'carwall',
'CarBlock': 'carblock',
'CarTrack': 'cartrack'
'CarTrack': 'cartrack',
'CarBeat': 'carbeat',
}

def generate_launch_description():
pkg_f1tenth_description = get_package_share_directory('f1tenth_description')
pkg_f1tenth_bringup = get_package_share_directory('f1tenth_bringup')
pkg_environments = get_package_share_directory('environments')

config_path = os.path.join(
Expand All @@ -35,15 +35,6 @@ def generate_launch_description():
}.items() #TODO: this doesn't do anything
)

f1tenth = IncludeLaunchDescription(
launch_description_source=PythonLaunchDescriptionSource(
os.path.join(pkg_f1tenth_bringup, 'simulation_bringup.launch.py')),
launch_arguments={
'name': 'f1tenth',
'world': 'empty'
}.items()
)

# Launch the Environment
main = Node(
package='reinforcement_learning',
Expand All @@ -61,6 +52,5 @@ def generate_launch_description():
SetEnvironmentVariable(name='GZ_SIM_RESOURCE_PATH', value=pkg_f1tenth_description[:-19]),
SetParameter(name='use_sim_time', value=True),
environment,
f1tenth,
main
])
Loading

0 comments on commit f6e0f59

Please sign in to comment.