Skip to content

Commit

Permalink
Merge pull request #67 from UoA-CARES/58-standardise-parent-class
Browse files Browse the repository at this point in the history
58 standardise parent class
  • Loading branch information
emilysteiner71 authored Jul 12, 2023
2 parents 196781d + 444219f commit e84856a
Show file tree
Hide file tree
Showing 21 changed files with 437 additions and 535 deletions.
69 changes: 36 additions & 33 deletions src/environments/environments/CarBlockEnvironment.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,15 @@
from rclpy import Future
from sensor_msgs.msg import LaserScan

from environments.GeneralCarEnvironment import GeneralCarEnvironment
from environments.F1tenthEnvironment import F1tenthEnvironment
from environments.CarWallEnvironment import CarWallEnvironment

from .util import reduce_lidar, process_odom, generate_position
from .termination import reached_goal, has_collided, has_flipped_over

from environment_interfaces.srv import Reset

class CarBlockEnvironment(GeneralCarEnvironment):
class CarBlockEnvironment(F1tenthEnvironment):
"""
CarWall Reinforcement Learning Environment:
Expand All @@ -36,49 +41,58 @@ class CarBlockEnvironment(GeneralCarEnvironment):
"""

def __init__(self, car_name, reward_range=0.2, max_steps=50, collision_range=0.2, step_length=0.5):
super().__init__('car_block', car_name, reward_range, max_steps, collision_range, step_length)
super().__init__('car_block', car_name, max_steps, step_length)

self.OBSERVATION_SIZE = 8 + 10 + 2 # odom + lidar + goal_position
self.COLLISION_RANGE = collision_range
self.REWARD_RANGE = reward_range

self.goal_position = [10, 10]

self.get_logger().info('Environment Setup Complete')

def reset(self):
self.step_counter = 0

self.set_velocity(0, 0)

# TODO: Remove Hard coded-ness of 10x10
self.goal_position = self.generate_goal(11, 3)
self.goal_position = generate_position(11, 13)

while not self.timer_future.done():
rclpy.spin_once(self)
self.sleep()

self.timer_future = Future()

self.call_reset_service()
new_x, new_y = self.goal_position
self.call_reset_service(new_x, new_y)

observation = self.get_observation()

info = {}

return observation, info

def generate_goal(self, inner_bound=3, outer_bound=5):
inner_bound = float(inner_bound)
outer_bound = float(outer_bound)
def is_terminated(self, state):
return \
reached_goal(state[:2], state[-2:],self.REWARD_RANGE) \
or has_collided(state[8:-2], self.COLLISION_RANGE) \
or has_flipped_over(state[2:6])

x_pos = random.uniform(-outer_bound, outer_bound)
x_pos = x_pos + inner_bound if x_pos >= 0 else x_pos - inner_bound
y_pos = random.uniform(-outer_bound, outer_bound)
y_pos = y_pos + inner_bound if y_pos >= 0 else y_pos - inner_bound

return [x_pos, y_pos]
def call_reset_service(self, goal_x, goal_y):
req = Reset.Request()

req.gx = goal_x
req.gy = goal_y

future = self.reset_client.call_async(req)
rclpy.spin_until_future_complete(future=future, node=self)

def get_observation(self):

# Get Position and Orientation of F1tenth
odom, lidar = self.get_data()
odom = self.process_odom(odom)
ranges, _ = self.process_lidar(lidar)
odom = process_odom(odom)

reduced_range = self.reduce_lidar(lidar)
reduced_range = reduce_lidar(lidar)

# Get Goal Position
return odom + reduced_range + self.goal_position
Expand All @@ -97,20 +111,9 @@ def compute_reward(self, state, next_state):
if current_distance < self.REWARD_RANGE:
reward += 100

if self.has_collided(next_state) or self.has_flipped_over(next_state):
if has_collided(next_state[8:-2], self.COLLISION_RANGE) or has_flipped_over(next_state[2:6]):
reward -= 25 # TODO: find optimal value for this

return reward

def reduce_lidar(self, lidar: LaserScan):
ranges = lidar.ranges
ranges = np.nan_to_num(ranges, posinf=float(-1), neginf=float(-1))
ranges = list(ranges)

reduced_range = []

for i in range(10):
sample = ranges[i * 64]
reduced_range.append(sample)

return reduced_range

189 changes: 38 additions & 151 deletions src/environments/environments/CarGoalEnvironment.py
Original file line number Diff line number Diff line change
@@ -1,165 +1,87 @@
import time
import math
import numpy as np
import random

import rclpy
from rclpy.node import Node
from rclpy import Future

from geometry_msgs.msg import Twist
from nav_msgs.msg import Odometry
from std_srvs.srv import Trigger
from .util import process_odom, generate_position
from .termination import reached_goal
from environments.F1tenthEnvironment import F1tenthEnvironment
from environment_interfaces.srv import Reset


class CarGoalEnvironment(Node):
class CarGoalEnvironment(F1tenthEnvironment):
"""
CarGoal Reinforcement Learning Environment:
CarWall Reinforcement Learning Environment:
Task:
Here the agent learns to drive the f1tenth car to a goal position
Here the agent learns to drive the f1tenth car to a goal position.
This happens all within a 10x10 box
Observation:
It's position (x, y), orientation (w, x, y, z) and the goal's position (x, y)
It's position (x, y), orientation (w, x, y, z), lidar points (approx. ~600 rays) and the goal's position (x, y)
Action:
It's linear and angular velocity
Reward:
It's progress toward the goal plus,
100+ if it reaches the goal plus,
-50 if it collides with the wall
Termination Conditions:
When the agent is within REWARD_RANGE units
When the agent is within REWARD_RANGE units of the goal or,
When the agent is within COLLISION_RANGE units of a wall
Truncation Condition:
When the number of steps surpasses MAX_STEPS
"""

def __init__(self, car_name, reward_range=1, max_steps=50, step_length=0.5):
super().__init__('car_goal_environment')
def __init__(self, car_name, reward_range=0.2, max_steps=50, step_length=0.5):
super().__init__('car_goal', car_name, max_steps, step_length)

# Environment Details ----------------------------------------
self.NAME = car_name
self.OBSERVATION_SIZE = 8 + 2 # odom + goal_position
self.REWARD_RANGE = reward_range
self.MAX_STEPS = max_steps
self.STEP_LENGTH = step_length

self.MAX_ACTIONS = np.asarray([3, 1])
self.MIN_ACTIONS = np.asarray([0, -1])

self.OBSERVATION_SIZE = 8 + 0 + 2 # Car position + Lidar rays + goal position
self.ACTION_NUM = 2
self.step_counter = 0

# Pub/Sub ----------------------------------------------------
self.cmd_vel_pub = self.create_publisher(
Twist,
f'/{self.NAME}/cmd_vel',
10
)

self.odom_sub = self.create_subscription(
Odometry,
f'/{self.NAME}/odometry',
self.odom_callback,
10
)

self.odom_future = Future()

# Reset Client -----------------------------------------------
self.reset_client = self.create_client(
Reset,
'car_wall_reset'
)

while not self.reset_client.wait_for_service(timeout_sec=1.0):
self.get_logger().info('reset service not available, waiting again...')

time.sleep(2)

# TODO: generate goal
self.goal_position = [0, 0] # x and y

self.goal_position = [10, 10]

self.get_logger().info('Environment Setup Complete')

def reset(self):
self.step_counter = 0

# Call reset Service
self.set_velocity(0, 0)

self.goal_position = self.generate_goal()

time.sleep(self.STEP_LENGTH)
self.goal_position = generate_position(5, 10)

self.call_reset_service()
self.sleep()

self.timer_future = Future()

time.sleep(self.STEP_LENGTH)
new_x, new_y = self.goal_position

self.call_reset_service(new_x, new_y)

observation = self.get_observation()

info = {}

return observation, info

def generate_goal(self, inner_bound=3, outer_bound=7):
inner_bound = float(inner_bound)
outer_bound = float(outer_bound)

x_pos = random.uniform(-outer_bound, outer_bound)
x_pos = x_pos + inner_bound if x_pos >= 0 else x_pos - inner_bound
y_pos = random.uniform(-outer_bound, outer_bound)
y_pos = y_pos + inner_bound if y_pos >= 0 else y_pos - inner_bound

return [x_pos, y_pos]


def step(self, action):
self.step_counter += 1

state = self.get_observation()

lin_vel, ang_vel = action
self.set_velocity(lin_vel, ang_vel)

time.sleep(self.STEP_LENGTH)
def is_terminated(self, state):
return reached_goal(state[:2], state[-2:],self.REWARD_RANGE)

def call_reset_service(self, goal_x, goal_y):
req = Reset.Request()

req.gx = goal_x
req.gy = goal_y

future = self.reset_client.call_async(req)
rclpy.spin_until_future_complete(future=future, node=self)

next_state = self.get_observation()
reward = self.compute_reward(state, next_state)
terminated = self.is_terminated(next_state)
truncated = self.step_counter >= self.MAX_STEPS
info = {}

return next_state, reward, terminated, truncated, info

def get_observation(self):
odom, _ = self.get_data()
odom = process_odom(odom)

# Get Position and Orientation of F1tenth
odom = self.get_odom()
odom = self.process_odom(odom)

# Get Goal Position
return odom + self.goal_position

def call_reset_service(self):
x, y = self.goal_position

request = Reset.Request()
request.gx = x
request.gy = y

future = self.reset_client.call_async(request)
rclpy.spin_until_future_complete(self, future)

# print(f'Reset Response Recieved: {future.result()}')
return future.result()

def is_terminated(self, observation):
current_distance = math.dist(observation[-2:], observation[:2])
return current_distance <= self.REWARD_RANGE

def compute_reward(self, state, next_state):

goal_position = state[-2:]
Expand All @@ -170,7 +92,6 @@ def compute_reward(self, state, next_state):
delta_distance = old_distance - current_distance

reward = -0.25
reward += next_state[6] / 5

if current_distance < self.REWARD_RANGE:
reward += 100
Expand All @@ -179,39 +100,5 @@ def compute_reward(self, state, next_state):

return reward

def odom_callback(self, odom):
"""
Callback for listening to Odometry on topic
"""
self.odom_future.set_result(odom)

def get_odom(self):
rclpy.spin_until_future_complete(self, self.odom_future)
future = self.odom_future
self.odom_future = Future()
return future.result()

def process_odom(self, odom: Odometry):
pose = odom.pose.pose
position = pose.position
orientation = pose.orientation

twist = odom.twist.twist
lin_vel = twist.linear
ang_vel = twist.angular

return [position.x, position.y, orientation.w, orientation.x, orientation.y, orientation.z, lin_vel.x, ang_vel.z]

def set_velocity(self, linear: float, angular: float):
"""
Publish Twist messages to f1tenth cmd_vel topic
"""
velocity_msg = Twist()
velocity_msg.angular.z = float(angular)
velocity_msg.linear.x = float(linear)

self.cmd_vel_pub.publish(velocity_msg)




Loading

0 comments on commit e84856a

Please sign in to comment.