Skip to content

Commit

Permalink
Merge pull request #134 from UoA-CARES/feat/add-parse-observation
Browse files Browse the repository at this point in the history
Feat/add parse observation
  • Loading branch information
retinfai authored Oct 25, 2023
2 parents e3d5c23 + a7be3ec commit 3a5a0bc
Show file tree
Hide file tree
Showing 8 changed files with 204 additions and 23 deletions.
66 changes: 66 additions & 0 deletions src/environments/environments/CarBeatEnvironment.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,46 @@

class CarBeatEnvironment(Node):

"""
CarBeat Reinforcement Learning Environment:
Task:
Agent learns to drive a track and overtake a car that is driving at a constant speed.
The second car is using the Follow The Gap algorithm.
Observation:
full:
Car Position (x, y)
Car Orientation (x, y, z, w)
Car Velocity
Car Angular Velocity
Lidar Data
no_position:
Car Orientation (x, y, z, w)
Car Velocity
Car Angular Velocity
Lidar Data
lidar_only:
Car Velocity
Car Angular Velocity
Lidar Data
No. of lidar points is configurable
Action:
It's linear and angular velocity (Twist)
Reward:
+2 if it comes within REWARD_RANGE units of a goal
+200 if it overtakes the Follow The Gap car
-25 if it collides with a wall
Termination Conditions:
When the agent collides with a wall or the Follow The Gap car
Truncation Condition:
When the number of steps surpasses MAX_GOALS
"""
def __init__(self,
rl_car_name,
ftg_car_name,
Expand Down Expand Up @@ -365,4 +405,30 @@ def update_goal_service(self, x, y):
rclpy.spin_until_future_complete(self, future)

return future.result()

# function that parses the state and returns a string that can be printed to the terminal
def parse_observation(self, observation):
string = f'CarBeat Observation: \n'

if self.OBSERVATION_MODE == 'full':
string += f'Car Position: {observation[0:2]} \n'
string += f'Car Orientation: {observation[2:6]} \n'
string += f'Car Velocity: {observation[6]} \n'
string += f'Car Angular Velocity: {observation[7]} \n'
string += f'Car Lidar: {observation[8:]} \n'
elif self.OBSERVATION_MODE == 'no_position':
string += f'Car Orientation: {observation[:4]} \n'
string += f'Car Velocity: {observation[4]} \n'
string += f'Car Angular Velocity: {observation[5]} \n'
string += f'Car Lidar: {observation[6:]} \n'
elif self.OBSERVATION_MODE == 'lidar_only':
string += f'Car Velocity: {observation[0]} \n'
string += f'Car Angular Velocity: {observation[1]} \n'
string += f'Car Lidar: {observation[2:]} \n'
else:
raise ValueError(f'Invalid observation mode: {self.OBSERVATION_MODE}')

return string



32 changes: 23 additions & 9 deletions src/environments/environments/CarBlockEnvironment.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,25 +14,28 @@

class CarBlockEnvironment(F1tenthEnvironment):
"""
CarWall Reinforcement Learning Environment:
CarBlock Reinforcement Learning Environment:
Task:
Here the agent learns to drive the f1tenth car to a goal position
Agent learns to navigate to a goal position while avoiding obstacles that are dynamically placed at the start of each episode
Observation:
It's position (x, y), orientation (w, x, y, z), lidar points (approx. ~600 rays) and the goal's position (x, y)
Car Position (x, y)
Car Orientation (x, y, z, w)
Car Velocity
Car Angular Velocity
Lidar Data
Action:
It's linear and angular velocity
Its linear and angular velocity (Twist)
Reward:
It's progress toward the goal plus,
100+ if it reaches the goal plus,
-50 if it collides with the wall
Its progress towards the goal * 10
+100 if it reaches the goal
-25 if it collides with an obstacle or flips over
Termination Conditions:
When the agent is within REWARD_RANGE units or,
When the agent is within COLLISION_RANGE units
When the agent collides with a wall or reaches the goal
Truncation Condition:
When the number of steps surpasses MAX_STEPS
Expand Down Expand Up @@ -114,5 +117,16 @@ def compute_reward(self, state, next_state):
reward -= 25 # TODO: find optimal value for this

return reward

def parse_observation(self, observation):
string = f'CarBlock Observation\n'
string += f'Position: {observation[:2]}\n'
string += f'Orientation: {observation[2:6]}\n'
string += f'Car Velocity: {observation[6]}\n'
string += f'Car Angular Velocity: {observation[7]}\n'
string += f'Lidar: {observation[8:-2]}\n'
string += f'Goal Position: {observation[-2:]}\n'

return string


31 changes: 22 additions & 9 deletions src/environments/environments/CarGoalEnvironment.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,26 +8,28 @@

class CarGoalEnvironment(F1tenthEnvironment):
"""
CarWall Reinforcement Learning Environment:
CarGoal Reinforcement Learning Environment:
Task:
Here the agent learns to drive the f1tenth car to a goal position.
This happens all within a 10x10 box
This happens in an open area.
Observation:
It's position (x, y), orientation (w, x, y, z), lidar points (approx. ~600 rays) and the goal's position (x, y)
Car Position (x, y)
Car Orientation (x, y, z, w)
Car Velocity
Car Angular Velocity
Goal Position (x, y)
Action:
It's linear and angular velocity
Its linear and angular velocity
Reward:
It's progress toward the goal plus,
100+ if it reaches the goal plus,
-50 if it collides with the wall
Its progress toward the goal plus,
+100 if it reaches the goal plus
Termination Conditions:
When the agent is within REWARD_RANGE units of the goal or,
When the agent is within COLLISION_RANGE units of a wall
When the agent is within REWARD_RANGE units of the goal
Truncation Condition:
When the number of steps surpasses MAX_STEPS
Expand Down Expand Up @@ -101,5 +103,16 @@ def compute_reward(self, state, next_state):

return reward

def parse_observation(self, observation):

string = f'CarGoal Observation\n'
string += f'Position: {observation[:2]}\n'
string += f'Orientation: {observation[2:6]}\n'
string += f'Car Velocity: {observation[6]}\n'
string += f'Car Angular Velocity: {observation[7]}\n'
string += f'Goal Position: {observation[-2:]}\n'

return string



60 changes: 60 additions & 0 deletions src/environments/environments/CarTrackEnvironment.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,43 @@

class CarTrackEnvironment(F1tenthEnvironment):

"""
CarTrack Reinforcement Learning Environment:
Task:
Agent learns to drive a track
Observation:
full:
Car Position (x, y)
Car Orientation (x, y, z, w)
Car Velocity
Car Angular Velocity
Lidar Data
no_position:
Car Orientation (x, y, z, w)
Car Velocity
Car Angular Velocity
Lidar Data
lidar_only:
Car Velocity
Car Angular Velocity
Lidar Data
Action:
It's linear and angular velocity (Twist)
Reward:
+2 if it comes within REWARD_RANGE units of a goal
-25 if it collides with a wall
Termination Conditions:
When the agent collides with a wall or the Follow The Gap car
Truncation Condition:
When the number of steps surpasses MAX_GOALS
"""

def __init__(self,
car_name,
reward_range=0.5,
Expand Down Expand Up @@ -208,3 +245,26 @@ def sleep(self):
rclpy.spin_once(self)

self.timer_future = Future()

def parse_observation(self, observation):

string = f'CarTrack Observation\n'

match (self.observation_mode):
case 'no_position':
string += f'Orientation: {observation[:4]}\n'
string += f'Car Velocity: {observation[4]}\n'
string += f'Car Angular Velocity: {observation[5]}\n'
string += f'Lidar: {observation[6:]}\n'
case 'lidar_only':
string += f'Car Velocity: {observation[0]}\n'
string += f'Car Angular Velocity: {observation[1]}\n'
string += f'Lidar: {observation[2:]}\n'
case _:
string += f'Position: {observation[:2]}\n'
string += f'Orientation: {observation[2:6]}\n'
string += f'Car Velocity: {observation[6]}\n'
string += f'Car Angular Velocity: {observation[7]}\n'
string += f'Lidar: {observation[8:]}\n'

return string
19 changes: 17 additions & 2 deletions src/environments/environments/CarWallEnvironment.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,12 @@ class CarWallEnvironment(F1tenthEnvironment):
This happens all within a 10x10 box
Observation:
It's position (x, y), orientation (w, x, y, z), lidar points (approx. ~600 rays) and the goal's position (x, y)
Car Position (x, y)
Car Orientation (x, y, z, w)
Car Velocity
Car Angular Velocity
Lidar Data
Goal Position (x, y)
Action:
It's linear and angular velocity
Expand Down Expand Up @@ -111,4 +116,14 @@ def compute_reward(self, state, next_state):

return reward


def parse_observation(self, observation):

string = f'CarWall Observation\n'
string += f'Position: {observation[:2]}\n'
string += f'Orientation: {observation[2:6]}\n'
string += f'Car Velocity: {observation[6]}\n'
string += f'Car Angular Velocity: {observation[7]}\n'
string += f'Lidar Points: {observation[8:-2]}\n'
string += f'Goal Position: {observation[-2:]}\n'

return string
7 changes: 5 additions & 2 deletions src/reinforcement_learning/config/train.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,5 +2,8 @@ train:
ros__parameters:
environment: 'CarBeat' # CarGoal, CarWall, CarBlock, CarTrack, CarBeat
track: 'multi_track' # track_1, track_2, track_3, multi_track, multi_track_testing -> only applies for CarTrack
car_name: 'New_Car'
ftg_car_name: 'New_Car_two'
car_name: 'f1tenth'
algorithm: 'TD3'
g: 1
buffer_size: 50000
observation_mode: 'lidar_only'
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ def __declare_params():
('step_length', 0.25),
('reward_range', 0.2),
('collision_range', 0.2),
('observation_mode', 'no_position'),
('observation_mode', 'lidar_only'),
('max_goals', 500),
('num_lidar_points', 10),

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,11 @@ def off_policy_train(env, agent, memory, record, algorithm_config):
evaluate = False

state, _ = env.reset()

obs = env.parse_observation(state)
env.get_logger().info('-----------------------------------')
env.get_logger().info('\n' + obs)
env.get_logger().info('-----------------------------------')

for step_counter in range(max_steps_training):
episode_timesteps += 1
Expand Down Expand Up @@ -133,6 +138,11 @@ def ppo_train(env, agent, memory, record, algorithm_config):

state, _ = env.reset()

obs = env.parse_observation(state)
env.get_logger().info('-----------------------------------')
env.get_logger().info('\n' + obs)
env.get_logger().info('-----------------------------------')

for step_counter in range(max_steps_training):
episode_timesteps += 1

Expand Down

0 comments on commit 3a5a0bc

Please sign in to comment.