From 37d93ac7e62b68a8d471b534ce70641532c4f223 Mon Sep 17 00:00:00 2001 From: retinfai Date: Thu, 3 Aug 2023 10:50:18 +1200 Subject: [PATCH 1/7] feat: add CarBeat reset interface --- src/environment_interfaces/CMakeLists.txt | 1 + src/environment_interfaces/srv/CarBeatReset.srv | 11 +++++++++++ 2 files changed, 12 insertions(+) create mode 100644 src/environment_interfaces/srv/CarBeatReset.srv diff --git a/src/environment_interfaces/CMakeLists.txt b/src/environment_interfaces/CMakeLists.txt index 97ccfdef..7071dd2f 100644 --- a/src/environment_interfaces/CMakeLists.txt +++ b/src/environment_interfaces/CMakeLists.txt @@ -27,6 +27,7 @@ find_package(rosidl_default_generators REQUIRED) rosidl_generate_interfaces(${PROJECT_NAME} "srv/Reset.srv" + "srv/CarBeatReset.srv" ) ament_package() diff --git a/src/environment_interfaces/srv/CarBeatReset.srv b/src/environment_interfaces/srv/CarBeatReset.srv new file mode 100644 index 00000000..6d8d5b61 --- /dev/null +++ b/src/environment_interfaces/srv/CarBeatReset.srv @@ -0,0 +1,11 @@ +float64 gx # goal x +float64 gy # goal y +float64 cx_one # car_one x +float64 cy_one # car_one y +float64 cyaw_one # car_one yaw +float64 cx_two # car_two x +float64 cy_two # car_two y +float64 cyaw_two # car_two yaw +string flag # flag +--- +bool success \ No newline at end of file From 8c13d2d0a17aa691788f58572acf7b8a99936d7a Mon Sep 17 00:00:00 2001 From: retinfai Date: Thu, 3 Aug 2023 11:09:56 +1200 Subject: [PATCH 2/7] feat: add reset to CarBeat --- .../environments/CarBeatEnvironment.py | 180 ++++++++++++++---- src/environments/environments/CarBeatReset.py | 13 +- src/environments/launch/carbeat.launch.py | 10 +- src/reinforcement_learning/config/train.yaml | 4 +- .../reinforcement_learning/train.py | 2 +- 5 files changed, 158 insertions(+), 51 deletions(-) diff --git a/src/environments/environments/CarBeatEnvironment.py b/src/environments/environments/CarBeatEnvironment.py index eea92b26..a9e04f64 100644 --- a/src/environments/environments/CarBeatEnvironment.py +++ b/src/environments/environments/CarBeatEnvironment.py @@ -4,59 +4,106 @@ import rclpy from rclpy import Future from sensor_msgs.msg import LaserScan +from launch_ros.actions import Node -from environment_interfaces.srv import Reset +import numpy as np +import rclpy +from geometry_msgs.msg import Twist +from message_filters import Subscriber, ApproximateTimeSynchronizer +from rclpy import Future +from rclpy.node import Node +from sensor_msgs.msg import LaserScan +from nav_msgs.msg import Odometry + +from environment_interfaces.srv import CarBeatReset from environments.F1tenthEnvironment import F1tenthEnvironment from .termination import has_collided, has_flipped_over from .util import process_odom, reduce_lidar from .track_reset import track_info -class CarBeatEnvironment(F1tenthEnvironment): - """ - CarTrack Reinforcement Learning Environment: - - Task: - Here the agent learns to drive the f1tenth car to a goal position +class CarBeatEnvironment(Node): - Observation: - It's position (x, y), orientation (w, x, y, z), lidar points (approx. ~600 rays) and the goal's position (x, y) - - Action: - It's linear and angular velocity - - Reward: - It's progress toward the goal plus, - 50+ if it reaches the goal plus, - -25 if it collides with the wall - - Termination Conditions: - When the agent is within REWARD_RANGE units or, - When the agent is within COLLISION_RANGE units - - Truncation Condition: - When the number of steps surpasses MAX_STEPS - """ - - def __init__(self, car_name, reward_range=1, max_steps=50, collision_range=0.2, step_length=0.5, track='track_1'): - super().__init__('car_beat', car_name, max_steps, step_length) + def __init__(self, car_one_name, car_two_name, reward_range=1, max_steps=50, collision_range=0.2, step_length=0.5, track='track_1'): + super().__init__('car_beat_environment') # Environment Details ---------------------------------------- + self.NAME = car_one_name + self.OTHER_CAR_NAME = car_two_name + self.MAX_STEPS = max_steps + self.STEP_LENGTH = step_length + self.MAX_ACTIONS = np.asarray([3, 3.14]) + self.MIN_ACTIONS = np.asarray([0, -3.14]) self.MAX_STEPS_PER_GOAL = max_steps + + # TODO: Update this self.OBSERVATION_SIZE = 8 + 10 # Car position + Lidar rays self.COLLISION_RANGE = collision_range self.REWARD_RANGE = reward_range + self.ACTION_NUM = 2 - # Reset Client ----------------------------------------------- + self.step_counter = 0 + + # Goal/Track Info ----------------------------------------------- self.goal_number = 0 self.all_goals = track_info[track]['goals'] self.car_reset_positions = track_info[track]['reset'] - self.get_logger().info('Environment Setup Complete') + # Pub/Sub ---------------------------------------------------- + self.cmd_vel_pub = self.create_publisher( + Twist, + f'/{self.NAME}/cmd_vel', + 10 + ) + + self.odom_sub_one = Subscriber( + self, + Odometry, + f'/{self.NAME}/odometry', + ) + + self.lidar_sub_one = Subscriber( + self, + LaserScan, + f'/{self.NAME}/scan', + ) + + self.odom_sub_two = Subscriber( + self, + Odometry, + f'/{self.OTHER_CAR_NAME}/odometry', + ) + + self.lidar_sub_two = Subscriber( + self, + LaserScan, + f'/{self.OTHER_CAR_NAME}/scan', + ) + + self.message_filter = ApproximateTimeSynchronizer( + [self.odom_sub_one, self.lidar_sub_one, self.odom_sub_two, self.lidar_sub_two], + 10, + 0.1, + ) + + self.message_filter.registerCallback(self.message_filter_callback) + + self.observation_future = Future() + + # Reset Client ----------------------------------------------- + self.reset_client = self.create_client( + CarBeatReset, + 'car_beat_reset' + ) + + while not self.reset_client.wait_for_service(timeout_sec=1.0): + self.get_logger().info('reset service not available, waiting again...') + + self.timer = self.create_timer(step_length, self.timer_cb) + self.timer_future = Future() def reset(self): # self.get_logger().info('Environment Reset Called') - self.step_counter = 0 self.set_velocity(0, 0) @@ -83,6 +130,54 @@ def reset(self): return observation, info + def step(self, action): + self.step_counter += 1 + + state = self.get_observation() + + lin_vel, ang_vel = action + self.set_velocity(lin_vel, ang_vel) + + while not self.timer_future.done(): + rclpy.spin_once(self) + + self.timer_future = Future() + + next_state = self.get_observation() + reward = self.compute_reward(state, next_state) + terminated = self.is_terminated(next_state) + truncated = self.step_counter >= self.MAX_STEPS + info = {} + + return next_state, reward, terminated, truncated, info + + def message_filter_callback(self, odom_one: Odometry, lidar_one: LaserScan, odom_two: Odometry, lidar_two: LaserScan): + self.observation_future.set_result({'odom': odom_one, 'lidar': lidar_one, 'odom_two': odom_two, 'lidar_two': lidar_two}) + + def get_data(self): + rclpy.spin_until_future_complete(self, self.observation_future) + future = self.observation_future + self.observation_future = Future() + data = future.result() + return data['odom_one'], data['lidar_one'], data['odom_two'], data['lidar_two'] + + def set_velocity(self, linear, angular): + """ + Publish Twist messages to f1tenth cmd_vel topic + """ + velocity_msg = Twist() + velocity_msg.angular.z = float(angular) + velocity_msg.linear.x = float(linear) + + self.cmd_vel_pub.publish(velocity_msg) + + def sleep(self): + while not self.timer_future.done(): + rclpy.spin_once(self) + + def timer_cb(self): + self.timer_future.set_result(True) + def is_terminated(self, state): return has_collided(state[8:], self.COLLISION_RANGE) \ or has_flipped_over(state[2:6]) @@ -98,12 +193,21 @@ def call_reset_service(self): x, y = self.goal_position - request = Reset.Request() + request = CarBeatReset.Request() + request.gx = x request.gy = y - request.cx = self.car_reset_positions['x'] - request.cy = self.car_reset_positions['y'] - request.cyaw = self.car_reset_positions['yaw'] + + request.cx_one = self.car_reset_positions['x'] + request.cy_one = self.car_reset_positions['y'] + request.cyaw_one = self.car_reset_positions['yaw'] + + request.cx_two = self.all_goals[0][0] + request.cy_two = self.all_goals[0][1] + + # TODO: Fix this + request.cyaw_two = self.car_reset_positions['yaw'] + request.flag = "car_and_goal" future = self.reset_client.call_async(request) @@ -119,7 +223,7 @@ def update_goal_service(self, number): x, y = self.generate_goal(number) self.goal_position = [x, y] - request = Reset.Request() + request = CarBeatReset.Request() request.gx = x request.gy = y request.flag = "goal_only" @@ -132,10 +236,10 @@ def update_goal_service(self, number): def get_observation(self): # Get Position and Orientation of F1tenth - odom, lidar = self.get_data() + odom_one, lidar_one, odom_two, lidar_two = self.get_data() odom = process_odom(odom) - reduced_range = reduce_lidar(lidar) + reduced_range = reduce_lidar(lidar_one) # Get Goal Position return odom + reduced_range diff --git a/src/environments/environments/CarBeatReset.py b/src/environments/environments/CarBeatReset.py index 3471fe0a..710bfbc6 100644 --- a/src/environments/environments/CarBeatReset.py +++ b/src/environments/environments/CarBeatReset.py @@ -4,7 +4,7 @@ from rclpy.callback_groups import MutuallyExclusiveCallbackGroup, ReentrantCallbackGroup from rclpy.executors import MultiThreadedExecutor -from environment_interfaces.srv import Reset +from environment_interfaces.srv import CarBeatReset as CarBeatResetSrv from f1tenth_control.SimulationServices import SimulationServices from ros_gz_interfaces.srv import SetEntityPose from ros_gz_interfaces.msg import Entity @@ -19,7 +19,7 @@ def __init__(self): super().__init__('car_beat_reset') srv_cb_group = MutuallyExclusiveCallbackGroup() - self.srv = self.create_service(Reset, 'car_beat_reset', callback=self.service_callback, callback_group=srv_cb_group) + self.srv = self.create_service(CarBeatResetSrv, 'car_beat_reset', callback=self.service_callback, callback_group=srv_cb_group) set_pose_cb_group = MutuallyExclusiveCallbackGroup() self.set_pose_client = self.create_client( @@ -34,10 +34,11 @@ def __init__(self): def service_callback(self, request, response): - self.get_logger().info(f'Reset Service Request Received: relocating goal to x={request.cx} y={request.cy}') + self.get_logger().info(f'Reset Service Request Received: relocating goal to x={request.cx_one} y={request.cy_one}') goal_req = self.create_request('goal', x=request.gx, y=request.gy, z=1) - car_req = self.create_request('f1tenth_one', x=request.cx, y=request.cy, z=0.1, yaw=request.cyaw) + car_req_one = self.create_request('f1tenth_one', x=request.cx_one, y=request.cy_one, z=0.1, yaw=request.cyaw_one) + car_req_two = self.create_request('f1tenth_two', x=request.cx_two, y=request.cy_two, z=0.1, yaw=request.cyaw_two) while not self.set_pose_client.wait_for_service(timeout_sec=1.0): self.get_logger().info('set_pose service not available, waiting again...') @@ -47,7 +48,9 @@ def service_callback(self, request, response): self.set_pose_client.call(goal_req) else: self.set_pose_client.call(goal_req) - self.set_pose_client.call(car_req) + self.set_pose_client.call(car_req_one) + self.set_pose_client.call(car_req_two) + # self.get_logger().info('Successfully Reset') response.success = True diff --git a/src/environments/launch/carbeat.launch.py b/src/environments/launch/carbeat.launch.py index 73c37328..d9c94682 100644 --- a/src/environments/launch/carbeat.launch.py +++ b/src/environments/launch/carbeat.launch.py @@ -52,8 +52,8 @@ def generate_launch_description(): launch_arguments={ 'name': 'f1tenth_one', 'world': 'empty', - 'x': '-5', - 'y': '-5', + 'x': '0', + 'y': '0', 'z': '1', }.items() ) @@ -64,9 +64,9 @@ def generate_launch_description(): launch_arguments={ 'name': 'f1tenth_two', 'world': 'empty', - 'x': '5', - 'y': '5', - 'z': '0.4', + 'x': '0', + 'y': '1', + 'z': '1', }.items() ) diff --git a/src/reinforcement_learning/config/train.yaml b/src/reinforcement_learning/config/train.yaml index eacaca3a..10ed53df 100644 --- a/src/reinforcement_learning/config/train.yaml +++ b/src/reinforcement_learning/config/train.yaml @@ -10,8 +10,8 @@ train: collision_range: 0.2 observation_mode: 'no_position' # actor_path & critic_path must exist, it can't be commented - actor_path: 'rl_logs/23_08_02_17:59:13/models/actor_checkpoint.pht' - critic_path: 'rl_logs/23_08_02_17:59:13/models/critic_checkpoint.pht' + # actor_path: 'rl_logs/23_08_02_17:59:13/models/actor_checkpoint.pht' + # critic_path: 'rl_logs/23_08_02_17:59:13/models/critic_checkpoint.pht' # gamma: 0.95 # tau: 0.005 # g: 5 diff --git a/src/reinforcement_learning/reinforcement_learning/train.py b/src/reinforcement_learning/reinforcement_learning/train.py index 70669dba..8a8ae07e 100644 --- a/src/reinforcement_learning/reinforcement_learning/train.py +++ b/src/reinforcement_learning/reinforcement_learning/train.py @@ -81,7 +81,7 @@ def main(): case 'CarTrack': env = CarTrackEnvironment('f1tenth', step_length=STEP_LENGTH, max_steps=MAX_STEPS, reward_range=REWARD_RANGE, collision_range=COLLISION_RANGE, track=TRACK) case 'CarBeat': - env = CarBeatEnvironment('f1tenth_one', step_length=STEP_LENGTH, max_steps=MAX_STEPS, reward_range=REWARD_RANGE, collision_range=COLLISION_RANGE, track=TRACK) + env = CarBeatEnvironment('f1tenth_one', 'f1tenth_two', step_length=STEP_LENGTH, max_steps=MAX_STEPS, reward_range=REWARD_RANGE, collision_range=COLLISION_RANGE, track=TRACK) case _: env = CarGoalEnvironment('f1tenth', step_length=STEP_LENGTH, max_steps=MAX_STEPS, reward_range=REWARD_RANGE) From 3653d654e46509aaacc10cd31c1bb05fbb70c653 Mon Sep 17 00:00:00 2001 From: retinfai Date: Thu, 3 Aug 2023 11:38:35 +1200 Subject: [PATCH 3/7] refactor: spawn cars closer to origin --- src/environments/launch/carbeat.launch.py | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/src/environments/launch/carbeat.launch.py b/src/environments/launch/carbeat.launch.py index d9c94682..3dd26a04 100644 --- a/src/environments/launch/carbeat.launch.py +++ b/src/environments/launch/carbeat.launch.py @@ -54,7 +54,7 @@ def generate_launch_description(): 'world': 'empty', 'x': '0', 'y': '0', - 'z': '1', + 'z': '5', }.items() ) @@ -66,7 +66,7 @@ def generate_launch_description(): 'world': 'empty', 'x': '0', 'y': '1', - 'z': '1', + 'z': '5', }.items() ) @@ -79,6 +79,15 @@ def generate_launch_description(): output='screen', ) + controller = Node( + package='controllers', + executable='random_controller', + output='screen', + parameters=[ + {'car_name': 'f1tenth_two'}, + ], + ) + ld = LaunchDescription([ track_arg, OpaqueFunction(function=launch), @@ -86,6 +95,7 @@ def generate_launch_description(): reset, f1tenth_one, f1tenth_two, + controller, ]) return ld \ No newline at end of file From 45508dfc0efc16606fe46768713524db8c63122f Mon Sep 17 00:00:00 2001 From: retinfai Date: Thu, 3 Aug 2023 11:38:47 +1200 Subject: [PATCH 4/7] chore: fix import error --- src/controllers/controllers/controller.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/controllers/controllers/controller.py b/src/controllers/controllers/controller.py index 85c1016d..485e71fd 100644 --- a/src/controllers/controllers/controller.py +++ b/src/controllers/controllers/controller.py @@ -6,7 +6,7 @@ from sensor_msgs.msg import LaserScan from nav_msgs.msg import Odometry -from environments.utils import process_lidar, process_odom +from environments.util import process_lidar, process_odom class Controller(Node): def __init__(self, node_name, car_name, step_length): From 7ec72794f0848c27e0b533961917b97c4ddb251d Mon Sep 17 00:00:00 2001 From: retinfai Date: Thu, 3 Aug 2023 11:39:07 +1200 Subject: [PATCH 5/7] bug_fix: initiate rclpy in random --- src/controllers/controllers/random.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/controllers/controllers/random.py b/src/controllers/controllers/random.py index f9d8f4d9..0c72d78b 100644 --- a/src/controllers/controllers/random.py +++ b/src/controllers/controllers/random.py @@ -4,6 +4,8 @@ import random def main(): + rclpy.init() + param_node = rclpy.create_node('params') param_node.declare_parameters( From d75c53bd0fbec7ce81a211c2e5953de812ce7417 Mon Sep 17 00:00:00 2001 From: retinfai Date: Thu, 3 Aug 2023 11:39:31 +1200 Subject: [PATCH 6/7] chore: make track real time faster --- src/environments/worlds/track_2.sdf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/environments/worlds/track_2.sdf b/src/environments/worlds/track_2.sdf index d0a2e948..8be32538 100644 --- a/src/environments/worlds/track_2.sdf +++ b/src/environments/worlds/track_2.sdf @@ -4,7 +4,7 @@ 0.001 - 1.5 + 10 Date: Thu, 3 Aug 2023 11:41:55 +1200 Subject: [PATCH 7/7] feat: add CarBeat Environment --- .../environments/CarBeatEnvironment.py | 34 +++++++------------ 1 file changed, 12 insertions(+), 22 deletions(-) diff --git a/src/environments/environments/CarBeatEnvironment.py b/src/environments/environments/CarBeatEnvironment.py index a9e04f64..db7aa859 100644 --- a/src/environments/environments/CarBeatEnvironment.py +++ b/src/environments/environments/CarBeatEnvironment.py @@ -103,11 +103,10 @@ def __init__(self, car_one_name, car_two_name, reward_range=1, max_steps=50, col self.timer_future = Future() def reset(self): - # self.get_logger().info('Environment Reset Called') + self.step_counter = 0 self.set_velocity(0, 0) - # self.get_logger().info('Velocity set') # TODO: Remove Hard coded-ness of 10x10 self.goal_number = 0 @@ -116,16 +115,12 @@ def reset(self): while not self.timer_future.done(): rclpy.spin_once(self) - # self.get_logger().info('Sleep called') - self.timer_future = Future() self.call_reset_service() - # self.get_logger().info('Reset Called and returned') observation = self.get_observation() - # self.get_logger().info('Observation returned') info = {} return observation, info @@ -152,7 +147,7 @@ def step(self, action): return next_state, reward, terminated, truncated, info def message_filter_callback(self, odom_one: Odometry, lidar_one: LaserScan, odom_two: Odometry, lidar_two: LaserScan): - self.observation_future.set_result({'odom': odom_one, 'lidar': lidar_one, 'odom_two': odom_two, 'lidar_two': lidar_two}) + self.observation_future.set_result({'odom_one': odom_one, 'lidar_one': lidar_one, 'odom_two': odom_two, 'lidar_two': lidar_two}) def get_data(self): rclpy.spin_until_future_complete(self, self.observation_future) @@ -179,7 +174,7 @@ def timer_cb(self): self.timer_future.set_result(True) def is_terminated(self, state): - return has_collided(state[8:], self.COLLISION_RANGE) \ + return has_collided(state[8:19], self.COLLISION_RANGE) \ or has_flipped_over(state[2:6]) def generate_goal(self, number): @@ -237,24 +232,19 @@ def get_observation(self): # Get Position and Orientation of F1tenth odom_one, lidar_one, odom_two, lidar_two = self.get_data() - odom = process_odom(odom) - reduced_range = reduce_lidar(lidar_one) + odom_one = process_odom(odom_one) + odom_two = process_odom(odom_two) - # Get Goal Position - return odom + reduced_range + lidar_one = reduce_lidar(lidar_one) + lidar_two = reduce_lidar(lidar_two) - def compute_reward(self, state, next_state): + self.get_logger().info( + f'odom_one: {odom_one} \n\nlidar_one: {lidar_one} \n\nodom_two: {odom_two} \n\nlidar_two: {lidar_two}') - # TESTING ONLY + return odom_one + lidar_one + odom_two + lidar_two + self.goal_position - # if self.goal_number < len(self.all_goals) - 1: - # self.goal_number += 1 - # else: - # self.goal_number = 0 - - # self.update_goal_service(self.goal_number) - # ============================================================== + def compute_reward(self, state, next_state): reward = 0 @@ -271,7 +261,7 @@ def compute_reward(self, state, next_state): self.step_counter = 0 self.update_goal_service(self.goal_number) - if has_collided(next_state[8:], self.COLLISION_RANGE) or has_flipped_over(next_state[2:6]): + if has_collided(next_state[8:19], self.COLLISION_RANGE) or has_flipped_over(next_state[2:6]): reward -= 25 # TODO: find optimal value for this return reward