Merge pull request #67 from UoA-CARES/58-standardise-parent-class

58 standardise parent class
UoA-CARES · Jul 12, 2023 · e84856a · e84856a
2 parents 196781d + 444219f
commit e84856a
Show file tree

Hide file tree

Showing 21 changed files with 437 additions and 535 deletions.
diff --git a/src/environments/environments/CarBlockEnvironment.py b/src/environments/environments/CarBlockEnvironment.py
@@ -6,10 +6,15 @@
 from rclpy import Future
 from sensor_msgs.msg import LaserScan
 
-from environments.GeneralCarEnvironment import GeneralCarEnvironment
+from environments.F1tenthEnvironment import F1tenthEnvironment
+from environments.CarWallEnvironment import CarWallEnvironment
+
+from .util import reduce_lidar, process_odom, generate_position
+from .termination import reached_goal, has_collided, has_flipped_over
 
+from environment_interfaces.srv import Reset
 
-class CarBlockEnvironment(GeneralCarEnvironment):
+class CarBlockEnvironment(F1tenthEnvironment):
     """
     CarWall Reinforcement Learning Environment:
 
@@ -36,49 +41,58 @@ class CarBlockEnvironment(GeneralCarEnvironment):
     """
 
     def __init__(self, car_name, reward_range=0.2, max_steps=50, collision_range=0.2, step_length=0.5):
-        super().__init__('car_block', car_name, reward_range, max_steps, collision_range, step_length)
+        super().__init__('car_block', car_name, max_steps, step_length)
+
+        self.OBSERVATION_SIZE = 8 + 10 + 2 # odom + lidar + goal_position
+        self.COLLISION_RANGE = collision_range
+        self.REWARD_RANGE = reward_range
+
+        self.goal_position = [10, 10]
+
         self.get_logger().info('Environment Setup Complete')
 
     def reset(self):
         self.step_counter = 0
 
         self.set_velocity(0, 0)
 
-        # TODO: Remove Hard coded-ness of 10x10
-        self.goal_position = self.generate_goal(11, 3)
+        self.goal_position = generate_position(11, 13)
 
-        while not self.timer_future.done():
-            rclpy.spin_once(self)
+        self.sleep()
 
         self.timer_future = Future()
 
-        self.call_reset_service()
+        new_x, new_y = self.goal_position
+        self.call_reset_service(new_x, new_y)
 
         observation = self.get_observation()
 
         info = {}
 
         return observation, info
 
-    def generate_goal(self, inner_bound=3, outer_bound=5):
-        inner_bound = float(inner_bound)
-        outer_bound = float(outer_bound)
+    def is_terminated(self, state):
+        return \
+            reached_goal(state[:2], state[-2:],self.REWARD_RANGE) \
+            or has_collided(state[8:-2], self.COLLISION_RANGE) \
+            or has_flipped_over(state[2:6])
 
-        x_pos = random.uniform(-outer_bound, outer_bound)
-        x_pos = x_pos + inner_bound if x_pos >= 0 else x_pos - inner_bound
-        y_pos = random.uniform(-outer_bound, outer_bound)
-        y_pos = y_pos + inner_bound if y_pos >= 0 else y_pos - inner_bound
-
-        return [x_pos, y_pos]
+    def call_reset_service(self, goal_x, goal_y):
+        req = Reset.Request()
+
+        req.gx = goal_x
+        req.gy = goal_y
+
+        future = self.reset_client.call_async(req)
+        rclpy.spin_until_future_complete(future=future, node=self)
 
     def get_observation(self):
 
         # Get Position and Orientation of F1tenth
         odom, lidar = self.get_data()
-        odom = self.process_odom(odom)
-        ranges, _ = self.process_lidar(lidar)
+        odom = process_odom(odom)
 
-        reduced_range = self.reduce_lidar(lidar)
+        reduced_range = reduce_lidar(lidar)
 
         # Get Goal Position
         return odom + reduced_range + self.goal_position
@@ -97,20 +111,9 @@ def compute_reward(self, state, next_state):
         if current_distance < self.REWARD_RANGE:
             reward += 100
 
-        if self.has_collided(next_state) or self.has_flipped_over(next_state):
+        if has_collided(next_state[8:-2], self.COLLISION_RANGE) or has_flipped_over(next_state[2:6]):
             reward -= 25  # TODO: find optimal value for this
 
         return reward
 
-    def reduce_lidar(self, lidar: LaserScan):
-        ranges = lidar.ranges
-        ranges = np.nan_to_num(ranges, posinf=float(-1), neginf=float(-1))
-        ranges = list(ranges)
-
-        reduced_range = []
-
-        for i in range(10):
-            sample = ranges[i * 64]
-            reduced_range.append(sample)
-
-        return reduced_range
+
diff --git a/src/environments/environments/CarGoalEnvironment.py b/src/environments/environments/CarGoalEnvironment.py
@@ -1,165 +1,87 @@
-import time
 import math
-import numpy as np
-import random
-
 import rclpy
-from rclpy.node import Node
 from rclpy import Future
-
-from geometry_msgs.msg import Twist
-from nav_msgs.msg import Odometry
-from std_srvs.srv import Trigger
+from .util import process_odom, generate_position
+from .termination import reached_goal
+from environments.F1tenthEnvironment import F1tenthEnvironment
 from environment_interfaces.srv import Reset
 
-
-class CarGoalEnvironment(Node):
+class CarGoalEnvironment(F1tenthEnvironment):
     """
-    CarGoal Reinforcement Learning Environment:
+    CarWall Reinforcement Learning Environment:
 
         Task:
-            Here the agent learns to drive the f1tenth car to a goal position
+            Here the agent learns to drive the f1tenth car to a goal position.
+            This happens all within a 10x10 box
 
         Observation:
-            It's position (x, y), orientation (w, x, y, z) and the goal's position (x, y)
+            It's position (x, y), orientation (w, x, y, z), lidar points (approx. ~600 rays) and the goal's position (x, y)
 
         Action:
             It's linear and angular velocity
         
         Reward:
             It's progress toward the goal plus,
             100+ if it reaches the goal plus,
+            -50 if it collides with the wall
 
         Termination Conditions:
-            When the agent is within REWARD_RANGE units
+            When the agent is within REWARD_RANGE units of the goal or,
+            When the agent is within COLLISION_RANGE units of a wall
         
         Truncation Condition:
             When the number of steps surpasses MAX_STEPS
     """
 
-    def __init__(self, car_name, reward_range=1, max_steps=50, step_length=0.5):
-        super().__init__('car_goal_environment')
+    def __init__(self, car_name, reward_range=0.2, max_steps=50, step_length=0.5):
+        super().__init__('car_goal', car_name, max_steps, step_length)
 
-        # Environment Details ----------------------------------------
-        self.NAME = car_name
+        self.OBSERVATION_SIZE = 8 + 2 # odom + goal_position
         self.REWARD_RANGE = reward_range
-        self.MAX_STEPS = max_steps
-        self.STEP_LENGTH = step_length
-
-        self.MAX_ACTIONS = np.asarray([3, 1])
-        self.MIN_ACTIONS = np.asarray([0, -1])
-
-        self.OBSERVATION_SIZE = 8 + 0 + 2 # Car position + Lidar rays + goal position
-        self.ACTION_NUM = 2
-        self.step_counter = 0
-
-        # Pub/Sub ----------------------------------------------------
-        self.cmd_vel_pub = self.create_publisher(
-                Twist,
-                f'/{self.NAME}/cmd_vel',
-                10
-            )
-
-        self.odom_sub = self.create_subscription(
-            Odometry,
-            f'/{self.NAME}/odometry',
-            self.odom_callback,
-            10
-            )
 
-        self.odom_future = Future()
-
-        # Reset Client -----------------------------------------------
-        self.reset_client = self.create_client(
-            Reset,
-            'car_wall_reset'
-        )
-
-        while not self.reset_client.wait_for_service(timeout_sec=1.0):
-            self.get_logger().info('reset service not available, waiting again...')
-
-        time.sleep(2)
-
-        # TODO: generate goal
-        self.goal_position = [0, 0] # x and y
-
+        self.goal_position = [10, 10]
 
+        self.get_logger().info('Environment Setup Complete')
+
     def reset(self):
         self.step_counter = 0
 
-        # Call reset Service
         self.set_velocity(0, 0)
-
-        self.goal_position = self.generate_goal()
 
-        time.sleep(self.STEP_LENGTH)
+        self.goal_position = generate_position(5, 10)
 
-        self.call_reset_service()
+        self.sleep()
+
+        self.timer_future = Future()
 
-        time.sleep(self.STEP_LENGTH)
+        new_x, new_y = self.goal_position
 
+        self.call_reset_service(new_x, new_y)
+
         observation = self.get_observation()
-        
+
         info = {}
 
         return observation, info
 
-    def generate_goal(self, inner_bound=3, outer_bound=7):
-        inner_bound = float(inner_bound)
-        outer_bound = float(outer_bound)
-
-        x_pos = random.uniform(-outer_bound, outer_bound)
-        x_pos = x_pos + inner_bound if x_pos >= 0 else x_pos - inner_bound
-        y_pos = random.uniform(-outer_bound, outer_bound)
-        y_pos = y_pos + inner_bound if y_pos >= 0 else y_pos - inner_bound
-
-        return [x_pos, y_pos]
-
-
-    def step(self, action):
-        self.step_counter += 1
-
-        state = self.get_observation()
-
-        lin_vel, ang_vel = action
-        self.set_velocity(lin_vel, ang_vel)
-
-        time.sleep(self.STEP_LENGTH)
+    def is_terminated(self, state):
+        return reached_goal(state[:2], state[-2:],self.REWARD_RANGE)
+
+    def call_reset_service(self, goal_x, goal_y):
+        req = Reset.Request()
+
+        req.gx = goal_x
+        req.gy = goal_y
+
+        future = self.reset_client.call_async(req)
+        rclpy.spin_until_future_complete(future=future, node=self)
 
-        next_state = self.get_observation()
-        reward = self.compute_reward(state, next_state)
-        terminated = self.is_terminated(next_state)
-        truncated = self.step_counter >= self.MAX_STEPS
-        info = {}
-
-        return next_state, reward, terminated, truncated, info
-
     def get_observation(self):
+        odom, _ = self.get_data()
+        odom = process_odom(odom)
 
-        # Get Position and Orientation of F1tenth
-        odom = self.get_odom()
-        odom = self.process_odom(odom)
-
-        # Get Goal Position
         return odom + self.goal_position
 
-    def call_reset_service(self):
-        x, y = self.goal_position
-
-        request = Reset.Request()
-        request.gx = x
-        request.gy = y
-
-        future = self.reset_client.call_async(request)
-        rclpy.spin_until_future_complete(self, future)
-
-        # print(f'Reset Response Recieved: {future.result()}')
-        return future.result()
-
-    def is_terminated(self, observation):
-        current_distance = math.dist(observation[-2:], observation[:2])
-        return current_distance <= self.REWARD_RANGE
-
     def compute_reward(self, state, next_state):
 
         goal_position = state[-2:]
@@ -170,7 +92,6 @@ def compute_reward(self, state, next_state):
         delta_distance = old_distance - current_distance
 
         reward = -0.25
-        reward += next_state[6] / 5
 
         if current_distance < self.REWARD_RANGE:
             reward += 100
@@ -179,39 +100,5 @@ def compute_reward(self, state, next_state):
 
         return reward
 
-    def odom_callback(self, odom):
-        """
-        Callback for listening to Odometry on topic
-        """
-        self.odom_future.set_result(odom)
-
-    def get_odom(self):
-        rclpy.spin_until_future_complete(self, self.odom_future)
-        future = self.odom_future
-        self.odom_future = Future()
-        return future.result()
 
-    def process_odom(self, odom: Odometry):
-        pose = odom.pose.pose
-        position = pose.position
-        orientation = pose.orientation
-
-        twist = odom.twist.twist
-        lin_vel = twist.linear
-        ang_vel = twist.angular
-
-        return [position.x, position.y, orientation.w, orientation.x, orientation.y, orientation.z, lin_vel.x, ang_vel.z]
-
-    def set_velocity(self, linear: float, angular: float):
-        """
-        Publish Twist messages to f1tenth cmd_vel topic
-        """
-        velocity_msg = Twist()
-        velocity_msg.angular.z = float(angular)
-        velocity_msg.linear.x = float(linear)
-
-        self.cmd_vel_pub.publish(velocity_msg)
-
-
-