UoA-CARES · retinfai · Sep 13, 2023 · Sep 11, 2023 · Sep 13, 2023
diff --git a/.gitignore b/.gitignore
@@ -11,4 +11,5 @@ __pycache__
 rl_logs
 .idea
 src/f1tenth/f1tenth_control/f1tenth_control/__pycache__/*
-Retrain1hTrue
+Retrain1hTrue
+results/
diff --git a/src/controllers/controllers/controller.py b/src/controllers/controllers/controller.py
@@ -82,7 +82,7 @@ def get_data(self):
         data = future.result()
         return data['odom'], data['lidar']
 
-    def set_velocity(self, linear, angular, L):
+    def set_velocity(self, linear, angular):
         """
         Publish Twist messages to f1tenth cmd_vel topic
         """

diff --git a/src/environments/environments/CarBeatEnvironment.py b/src/environments/environments/CarBeatEnvironment.py
@@ -36,7 +36,7 @@ def __init__(self,
         self.OTHER_CAR_NAME = car_two_name
         self.MAX_STEPS = max_steps
         self.STEP_LENGTH = step_length
-        self.MAX_ACTIONS = np.asarray([3, 3.14])
+        self.MAX_ACTIONS = np.asarray([5, 3.14])
         self.MIN_ACTIONS = np.asarray([0, -3.14])
         self.MAX_STEPS_PER_GOAL = max_steps
         self.OBSERVATION_MODE = observation_mode
@@ -159,7 +159,7 @@ def reset(self):
         ftg_x, ftg_y, ftg_yaw, ftg_index = self.car_waypoints[(index + self.ftg_offset) % len(self.car_waypoints)]
 
         self.start_goal_index = index
-        self.ftg_start_goal_index = index
+        self.ftg_start_goal_index = ftg_index
 
         self.goal_position = self.all_goals[self.start_goal_index]
         self.ftg_goal_position = self.all_goals[self.ftg_start_goal_index]
@@ -350,7 +350,7 @@ def compute_reward(self, state, next_state):
             self.steps_since_last_goal = 0
 
         ftg_current_distance = math.dist(self.all_goals[(self.ftg_start_goal_index + self.ftg_goals_reached) % len(self.all_goals)], next_state[18:20])
-        
+
         # Keeping track of FTG car goal number
         if ftg_current_distance < self.REWARD_RANGE:
             self.ftg_goals_reached += 1

diff --git a/src/f1tenth b/src/f1tenth
diff --git a/src/reinforcement_learning/config/test.yaml b/src/reinforcement_learning/config/test.yaml
@@ -1,11 +1,12 @@
 test:
   ros__parameters:
-    environment: 'CarTrack'
-    track: 'multi_track_testing' # track_1, track_2, track_3, multi_track, multi_track_testing -> only applies for CarTrack
+    environment: 'CarBeat'
+    track: 'multi_track' # track_1, track_2, track_3, multi_track, multi_track_testing -> only applies for CarTrack
     max_steps_evaluation: 1000000
     max_steps: 3000
-    actor_path: rl_logs/multi_track_with_speed12/models/actor_checkpoint.pht
-    critic_path: rl_logs/multi_track_with_speed12/models/critic_checkpoint.pht
+    actor_path: results/23_09_08_06:28:51/models/actor_checkpoint.pht
+    critic_path: results/23_09_08_06:28:51/models/critic_checkpoint.pht
+    algorithm: 'TD3'
     step_length: 0.1
     reward_range: 3.0
     collision_range: 0.2

diff --git a/src/reinforcement_learning/reinforcement_learning/test.py b/src/reinforcement_learning/reinforcement_learning/test.py
@@ -5,7 +5,7 @@
 from cares_reinforcement_learning.algorithm.policy import TD3
 from cares_reinforcement_learning.networks.TD3 import Actor, Critic
 from cares_reinforcement_learning.util import Record
-from cares_reinforcement_learning.util import helpers as hlp
+from cares_reinforcement_learning.util import helpers as hlp, NetworkFactory
 
 from environments.CarBlockEnvironment import CarBlockEnvironment
 from environments.CarGoalEnvironment import CarGoalEnvironment
@@ -31,21 +31,8 @@ def main():
     COLLISION_RANGE, \
     ACTOR_PATH, \
     CRITIC_PATH, \
-    OBSERVAION_MODE = [param.value for param in params]
-
-    print(
-        f'---------------------------------------------\n'
-        f'Environment: {ENVIRONMENT}\n'
-        f'Evaluation Steps: {MAX_STEPS_EVALUATION}\n'
-        f'Steps per Episode: {MAX_STEPS}\n'
-        f'Step Length: {STEP_LENGTH}\n'
-        f'Reward Range: {REWARD_RANGE}\n'
-        f'Collision Range: {COLLISION_RANGE}\n'
-        f'Critic Path: {CRITIC_PATH}\n'
-        f'Actor Path: {ACTOR_PATH}\n'
-        f'Observation Mode: {OBSERVAION_MODE}\n'
-        f'---------------------------------------------\n'
-    )
+    OBSERVAION_MODE, \
+    ALGORITHM = [param.value for param in params]
 
     if ACTOR_PATH == '' or CRITIC_PATH == '':
         raise Exception('Actor or Critic path not provided')
@@ -62,34 +49,53 @@ def main():
         case 'CarTrack':
             env = CarTrackEnvironment('f1tenth', step_length=STEP_LENGTH, max_steps=MAX_STEPS, reward_range=REWARD_RANGE, collision_range=COLLISION_RANGE, track=TRACK, observation_mode=OBSERVAION_MODE)
         case 'CarBeat':
-            env = CarBeatEnvironment('f1tenth_one', 'f1tenth_two', step_length=STEP_LENGTH, max_steps=MAX_STEPS, reward_range=REWARD_RANGE, collision_range=COLLISION_RANGE, track=TRACK)
+            env = CarBeatEnvironment('f1tenth_one', 'f1tenth_two', step_length=STEP_LENGTH, max_steps=MAX_STEPS, reward_range=REWARD_RANGE, collision_range=COLLISION_RANGE, track=TRACK, observation_mode=OBSERVAION_MODE)
         case _:
             env = CarGoalEnvironment('f1tenth', step_length=STEP_LENGTH, max_steps=MAX_STEPS, reward_range=REWARD_RANGE)
 
-    actor = Actor(observation_size=env.OBSERVATION_SIZE, num_actions=env.ACTION_NUM, learning_rate=0.1)
-    critic = Critic(observation_size=env.OBSERVATION_SIZE, num_actions=env.ACTION_NUM, learning_rate=0.1)
+    print(
+        f'---------------------------------------------\n'
+        f'Environment: {ENVIRONMENT}\n'
+        f'Evaluation Steps: {MAX_STEPS_EVALUATION}\n'
+        f'Steps per Episode: {MAX_STEPS}\n'
+        f'Step Length: {STEP_LENGTH}\n'
+        f'Reward Range: {REWARD_RANGE}\n'
+        f'Collision Range: {COLLISION_RANGE}\n'
+        f'Critic Path: {CRITIC_PATH}\n'
+        f'Actor Path: {ACTOR_PATH}\n'
+        f'Observation Mode: {OBSERVAION_MODE}\n'
+        f'Observation size: {env.OBSERVATION_SIZE}'
+        f'Algorithm: {ALGORITHM}\n'
+        f'---------------------------------------------\n'
+    )
+
+    network_factory_args = {
+        'observation_size': env.OBSERVATION_SIZE,
+        'action_num': env.ACTION_NUM,
+        'actor_lr': 0.11,
+        'critic_lr': 0.11,
+        'gamma': 0.23,
+        'tau': 0.001,
+        'device': DEVICE
+    }
+
+    agent_factory = NetworkFactory()
+    agent = agent_factory.create_network(ALGORITHM, network_factory_args)
 
     print('Reading saved models into actor and critic')
-    actor.load_state_dict(torch.load(ACTOR_PATH))
-    critic.load_state_dict(torch.load(CRITIC_PATH))
+    agent.actor_net.load_state_dict(torch.load(ACTOR_PATH))
+    agent.critic_net.load_state_dict(torch.load(CRITIC_PATH))
 
     print('Successfully Loaded models')
 
-    agent = TD3(
-        actor_network=actor,
-        critic_network=critic,
-        gamma=0.999,
-        tau=0.002,
-        action_num=env.ACTION_NUM,
-        device=DEVICE
-    )
+
 
     record = Record(checkpoint_freq=100, log_dir="multi_track_with_speed12_evaluation")
 
     test(env=env, agent=agent, record=record)
 
 
-def test(env, agent: TD3, record: Record):
+def test(env, agent, record: Record):
     state, _ = env.reset()
     episode_timesteps = 0
     episode_reward = 0
@@ -145,7 +151,8 @@ def get_params():
             ('collision_range', 0.2),
             ('actor_path', ''),
             ('critic_path', ''),
-            ('observation_mode', 'full')
+            ('observation_mode', 'full'),
+            ('algorithm', 'TD3')
         ]
     )
 
@@ -159,7 +166,8 @@ def get_params():
         'collision_range',
         'actor_path',
         'critic_path',
-        'observation_mode'
+        'observation_mode',
+        'algorithm'
     ])
+19 −24		f1tenth_description/gazebo/lidar.gazebo.xacro
+7 −0		f1tenth_description/urdf/joints.xacro
+5 −1		f1tenth_description/urdf/links.xacro
+25 −0		f1tenth_description/urdf/macro.xacro