Skip to content

Commit

Permalink
Merge pull request #107 from UoA-CARES/fix-termination-condition
Browse files Browse the repository at this point in the history
Fix termination condition
  • Loading branch information
retinfai authored Aug 29, 2023
2 parents a55248d + c56195d commit e105aab
Show file tree
Hide file tree
Showing 25 changed files with 793 additions and 176 deletions.
2 changes: 2 additions & 0 deletions helperScripts/1manual.sh
Original file line number Diff line number Diff line change
Expand Up @@ -18,4 +18,6 @@ export ROS_DOMAIN_ID=50
colcon build
ros2 launch reinforcement_learning sanity_check.launch.py &

gz sim -g &

wait
2 changes: 2 additions & 0 deletions helperScripts/2train.sh
Original file line number Diff line number Diff line change
Expand Up @@ -18,4 +18,6 @@ export ROS_DOMAIN_ID=100
colcon build
ros2 launch reinforcement_learning train.launch.py &

gz sim -g &

wait
2 changes: 2 additions & 0 deletions helperScripts/3test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -18,4 +18,6 @@ export ROS_DOMAIN_ID=150
colcon build
ros2 launch reinforcement_learning test.launch.py &

gz sim -g &

wait
2 changes: 1 addition & 1 deletion src/controllers/controllers/ftg_policy.py
Original file line number Diff line number Diff line change
Expand Up @@ -214,7 +214,7 @@ def select_action(self,state,goal_pos):
dmin = min(border_ranges)
alpha = 1
goal_angle = self.constrain_angle(goal_angle)
final_heading_angle = ((alpha/dmin)*gap_centre_angle+goal_angle)/((alpha/dmin)+1)
final_heading_angle = gap_centre_angle #((alpha/dmin)*gap_centre_angle+goal_angle)/((alpha/dmin)+1)
# Convert to angular velocity
ang = self.angle_to_ang_vel(final_heading_angle, lin)
action = np.asarray([lin, ang])
Expand Down
239 changes: 131 additions & 108 deletions src/environments/environments/CarTrackEnvironment.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,48 +3,24 @@
import numpy as np
import rclpy
from rclpy import Future
from sensor_msgs.msg import LaserScan

import random
from environment_interfaces.srv import Reset
from environments.F1tenthEnvironment import F1tenthEnvironment
from .termination import has_collided, has_flipped_over
from .util import process_odom, reduce_lidar
from .track_reset import track_info
from .goal_positions import goal_positions
from .waypoints import waypoints

class CarTrackEnvironment(F1tenthEnvironment):
"""
CarTrack Reinforcement Learning Environment:
Task:
Here the agent learns to drive the f1tenth car to a goal position
Observation:
It's position (x, y), orientation (w, x, y, z), lidar points (approx. ~600 rays) and the goal's position (x, y)
Action:
It's linear and angular velocity
Reward:
It's progress toward the goal plus,
50+ if it reaches the goal plus,
-25 if it collides with the wall
Termination Conditions:
When the agent is within REWARD_RANGE units or,
When the agent is within COLLISION_RANGE units
Truncation Condition:
When the number of steps surpasses MAX_STEPS
"""

def __init__(self,
car_name,
reward_range=1,
reward_range=0.5,
max_steps=500,
collision_range=0.2,
step_length=0.5,
track='track_1',
observation_mode='full',
observation_mode='lidar_only',
):
super().__init__('car_track', car_name, max_steps, step_length)

Expand All @@ -63,30 +39,78 @@ def __init__(self,
self.REWARD_RANGE = reward_range

self.observation_mode = observation_mode
self.track = track

# Reset Client -----------------------------------------------
self.goal_number = 0
self.all_goals = track_info[track]['goals']

self.car_reset_positions = track_info[track]['reset']
self.goals_reached = 0
self.start_goal_index = 0
self.steps_since_last_goal = 0

if track != 'multi_track':
self.all_goals = goal_positions[track]
self.car_waypoints = waypoints[track]
else:
austin_gp = goal_positions['austin_track']
budapest_gp = goal_positions['budapest_track']
budapest_gp = [[x + 200, y] for x, y in budapest_gp]
hockenheim_gp = goal_positions['hockenheim_track']
hockenheim_gp = [[x + 300, y] for x, y in hockenheim_gp]

self.all_car_goals = {
'austin_track': austin_gp,
'budapest_track': budapest_gp,
'hockenheim_track': hockenheim_gp
}

austin_wp = waypoints['austin_track']

budapest_wp = []
for x, y, yaw, index in waypoints['budapest_track']:
x += 200
budapest_wp.append((x, y, yaw, index))

hockenheim_wp = []
for x, y, yaw, index in waypoints['hockenheim_track']:
x += 300
hockenheim_wp.append((x, y, yaw, index))

self.all_car_waypoints = {
'austin_track': austin_wp,
'budapest_track': budapest_wp,
'hockenheim_track': hockenheim_wp
}

self.current_track = 'austin_track'

self.all_goals = self.all_car_goals[self.current_track]
self.car_waypoints = self.all_car_waypoints[self.current_track]

self.get_logger().info('Environment Setup Complete')

def reset(self):
self.step_counter = 0
self.steps_since_last_goal = 0
self.goals_reached = 0

self.set_velocity(0, 0)

if self.track == 'multi_track':
self.current_track = random.choice(list(self.all_car_goals.keys()))
self.all_goals = self.all_car_goals[self.current_track]
self.car_waypoints = self.all_car_waypoints[self.current_track]

# TODO: Remove Hard coded-ness of 10x10
self.goal_number = 0
self.goal_position = self.generate_goal(self.goal_number)

while not self.timer_future.done():
rclpy.spin_once(self)
# New random starting point for car
car_x, car_y, car_yaw, index = random.choice(self.car_waypoints)

# Update goal pointer to reflect starting position
self.start_goal_index = index
self.goal_position = self.all_goals[self.start_goal_index]

self.timer_future = Future()
self.sleep()

self.call_reset_service()
goal_x, goal_y = self.goal_position
self.call_reset_service(car_x=car_x, car_y=car_y, car_Y=car_yaw, goal_x=goal_x, goal_y=goal_y)

observation, _ = self.get_observation()

Expand All @@ -102,66 +126,21 @@ def step(self, action):
lin_vel, ang_vel = action
self.set_velocity(lin_vel, ang_vel)

while not self.timer_future.done():
rclpy.spin_once(self)

self.timer_future = Future()
self.sleep()

next_state, full_next_state = self.get_observation()

reward = self.compute_reward(full_state, full_next_state)
terminated = self.is_terminated(full_next_state)
truncated = self.step_counter >= self.MAX_STEPS
truncated = self.steps_since_last_goal >= 10
info = {}

return next_state, reward, terminated, truncated, info

def is_terminated(self, state):
return has_collided(state[8:], self.COLLISION_RANGE) \
or has_flipped_over(state[2:6]) or \
self.goal_number >= len(self.all_goals)


def generate_goal(self, number):
print("Goal", number, "spawned")
return self.all_goals[number % len(self.all_goals)]

def call_reset_service(self):
"""
Reset the car and goal position
"""

x, y = self.goal_position

request = Reset.Request()
request.gx = x
request.gy = y
request.cx = self.car_reset_positions['x']
request.cy = self.car_reset_positions['y']
request.cyaw = self.car_reset_positions['yaw']
request.flag = "car_and_goal"

future = self.reset_client.call_async(request)
rclpy.spin_until_future_complete(self, future)

return future.result()

def update_goal_service(self, number):
"""
Reset the goal position
"""

x, y = self.generate_goal(number)
self.goal_position = [x, y]

request = Reset.Request()
request.gx = x
request.gy = y
request.flag = "goal_only"

future = self.reset_client.call_async(request)
rclpy.spin_until_future_complete(self, future)

return future.result()
self.goals_reached == len(self.all_goals)

def get_observation(self):

Expand All @@ -187,32 +166,76 @@ def get_observation(self):

def compute_reward(self, state, next_state):

# TESTING ONLY

# if self.goal_number < len(self.all_goals) - 1:
# self.goal_number += 1
# else:
# self.goal_number = 0

# self.update_goal_service(self.goal_number)
# ==============================================================

reward = 0

goal_position = self.goal_position

prev_distance = math.dist(goal_position, state[:2])
current_distance = math.dist(goal_position, next_state[:2])
reward += 10 * (prev_distance - current_distance) / prev_distance

self.steps_since_last_goal += 1

if current_distance < self.REWARD_RANGE:
reward += 50
self.goal_number += 1
self.step_counter = 0
self.update_goal_service(self.goal_number)
print(f'Goal #{self.goals_reached} Reached')
reward += 2
self.goals_reached += 1

# Updating Goal Position
new_x, new_y = self.all_goals[(self.start_goal_index + self.goals_reached) % len(self.all_goals)]
self.goal_position = [new_x, new_y]

self.update_goal_service(new_x, new_y)

self.steps_since_last_goal = 0

if self.steps_since_last_goal >= 10:
reward -= 10

if self.goals_reached == len(self.all_goals):
reward += 100

if has_collided(next_state[8:], self.COLLISION_RANGE) or has_flipped_over(next_state[2:6]):
reward -= 25 # TODO: find optimal value for this
reward -= 25

return reward


# Utility Functions --------------------------------------------

def call_reset_service(self, car_x, car_y, car_Y, goal_x, goal_y):
"""
Reset the car and goal position
"""

request = Reset.Request()
request.gx = float(goal_x)
request.gy = float(goal_y)
request.cx = float(car_x)
request.cy = float(car_y)
request.cyaw = float(car_Y)
request.flag = "car_and_goal"

future = self.reset_client.call_async(request)
rclpy.spin_until_future_complete(self, future)

return future.result()

def update_goal_service(self, x, y):
"""
Reset the goal position
"""

request = Reset.Request()
request.gx = x
request.gy = y
request.flag = "goal_only"

future = self.reset_client.call_async(request)
rclpy.spin_until_future_complete(self, future)

return future.result()

def sleep(self):
while not self.timer_future.done():
rclpy.spin_once(self)

self.timer_future = Future()
Loading

0 comments on commit e105aab

Please sign in to comment.