diff --git a/src/environments/environments/CarTrackParentEnvironment.py b/src/environments/environments/CarTrackParentEnvironment.py index ff87e7d2..7ae48a19 100644 --- a/src/environments/environments/CarTrackParentEnvironment.py +++ b/src/environments/environments/CarTrackParentEnvironment.py @@ -40,7 +40,6 @@ def __init__(self, car_name, reward_range=1, max_steps=50, collision_range=0.2, # Environment Details ---------------------------------------- self.MAX_STEPS_PER_GOAL = max_steps - self.MIN_ACTIONS = np.asarray([0, -3.14]) self.OBSERVATION_SIZE = 8 + 10 # Car position + Lidar rays self.check_goal = False @@ -151,8 +150,11 @@ def compute_reward(self, state, next_state): goal_position = self.goal_position + prev_distance = math.dist(goal_position, state[:2]) current_distance = math.dist(goal_position, next_state[:2]) - + + reward += prev_distance - current_distance + if current_distance < self.REWARD_RANGE: reward += 50 self.goal_number += 1 diff --git a/src/environments/environments/ParentCarEnvironment.py b/src/environments/environments/ParentCarEnvironment.py index b997678e..4135e23e 100644 --- a/src/environments/environments/ParentCarEnvironment.py +++ b/src/environments/environments/ParentCarEnvironment.py @@ -49,7 +49,7 @@ def __init__(self, env_name, car_name, reward_range, max_steps, collision_range, self.lidar_sub = Subscriber( self, LaserScan, - f'/lidar', + f'/{self.NAME}/scan', ) self.message_filter = ApproximateTimeSynchronizer( diff --git a/src/f1tenth b/src/f1tenth index 017b2304..15ed4c6e 160000 --- a/src/f1tenth +++ b/src/f1tenth @@ -1 +1 @@ -Subproject commit 017b23043dbb07f6658c38839d39364c2f3085fe +Subproject commit 15ed4c6ebafb0f110d67da51a68281bcb141df6d diff --git a/src/reinforcement_learning/config/test.yaml b/src/reinforcement_learning/config/test.yaml index 353b4476..ffcb9978 100644 --- a/src/reinforcement_learning/config/test.yaml +++ b/src/reinforcement_learning/config/test.yaml @@ -2,9 +2,9 @@ test: ros__parameters: environment: 'CarTrack1' max_steps_evaluation: 1000000 - actor_path: models/cartrack_1_training-26-06-2023-06:50:49_150000_actor.pht - critic_path: models/cartrack_1_training-26-06-2023-06:50:49_150000_critic.pht - max_steps: 100 + actor_path: models/23_07_05_03:52:58/models/actor_checkpoint.pht + critic_path: models/23_07_05_03:52:58/models/critic_checkpoint.pht + max_steps: 300 step_length: 0.25 - reward_range: 0.2 + reward_range: 2.0 collision_range: 0.2 \ No newline at end of file diff --git a/src/reinforcement_learning/reinforcement_learning/train.py b/src/reinforcement_learning/reinforcement_learning/train.py index 5dc07a5b..4642d64c 100644 --- a/src/reinforcement_learning/reinforcement_learning/train.py +++ b/src/reinforcement_learning/reinforcement_learning/train.py @@ -110,7 +110,7 @@ def main(): 'reward_range': REWARD_RANGE, 'collision_range': COLLISION_RANGE } - record = Record(networks=networks, checkpoint_freq=MAX_STEPS_TRAINING / 10, config=config) + record = Record(networks=networks, checkpoint_freq=100, config=config) train(env=env, agent=agent, record=record)