diff --git a/src/environments/environments/CarGoalEnvironment.py b/src/environments/environments/CarGoalEnvironment.py index 5e5560bb..61e6b4ad 100644 --- a/src/environments/environments/CarGoalEnvironment.py +++ b/src/environments/environments/CarGoalEnvironment.py @@ -37,14 +37,13 @@ class CarGoalEnvironment(Node): When the number of steps surpasses MAX_STEPS """ - def __init__(self, car_name, reward_range=1, max_steps=50, collision_range=0.5, step_length=0.5): + def __init__(self, car_name, reward_range=1, max_steps=50, step_length=0.5): super().__init__('car_goal_environment') # Environment Details ---------------------------------------- self.NAME = car_name self.REWARD_RANGE = reward_range self.MAX_STEPS = max_steps - self.COLLISION_RANGE = collision_range self.STEP_LENGTH = step_length self.step_counter = 0 diff --git a/src/reinforcement_learning/config/car_goal.yaml b/src/reinforcement_learning/config/car_goal.yaml index 4d6e4ca6..b4d9fb0d 100644 --- a/src/reinforcement_learning/config/car_goal.yaml +++ b/src/reinforcement_learning/config/car_goal.yaml @@ -1,11 +1,12 @@ meta: ros__parameters: - mode: evaluation # training + mode: training # training car_goal_training: ros__parameters: max_steps_exploration: 5000 max_steps_training: 1000000 + reward_range: 2.0 # gamma: 0.95 # tau: 0.005 # g: 5 diff --git a/src/reinforcement_learning/config/car_wall.yaml b/src/reinforcement_learning/config/car_wall.yaml index 60fa48f6..76c08f43 100644 --- a/src/reinforcement_learning/config/car_wall.yaml +++ b/src/reinforcement_learning/config/car_wall.yaml @@ -1,11 +1,13 @@ meta: ros__parameters: - mode: evaluation # training + mode: training # training car_wall_training: ros__parameters: max_steps_exploration: 5000 max_steps_training: 1000000 + reward_range: 1.0 + collision_range: 2.0 # gamma: 0.95 # tau: 0.005 # g: 5 diff --git a/src/reinforcement_learning/launch/car_goal.launch.py b/src/reinforcement_learning/launch/car_goal.launch.py index d7ff0064..4bf44dc5 100644 --- a/src/reinforcement_learning/launch/car_goal.launch.py +++ b/src/reinforcement_learning/launch/car_goal.launch.py @@ -33,7 +33,7 @@ def generate_launch_description(): #TODO: include the train launch file here 'car_goal.yaml' ) - config = yaml.load(open(config_path)) + config = yaml.load(open(config_path), Loader=yaml.Loader) mode = config['meta']['ros__parameters']['mode'] # Launch the Environment diff --git a/src/reinforcement_learning/launch/car_wall.launch.py b/src/reinforcement_learning/launch/car_wall.launch.py index 2839a343..ea607313 100644 --- a/src/reinforcement_learning/launch/car_wall.launch.py +++ b/src/reinforcement_learning/launch/car_wall.launch.py @@ -35,7 +35,7 @@ def generate_launch_description(): - config = yaml.load(open(config_path)) + config = yaml.load(open(config_path), Loader=yaml.Loader) mode = config['meta']['ros__parameters']['mode'] # Launch the Environment diff --git a/src/reinforcement_learning/reinforcement_learning/car_goal_training.py b/src/reinforcement_learning/reinforcement_learning/car_goal_training.py index ed956dc3..08c10f43 100644 --- a/src/reinforcement_learning/reinforcement_learning/car_goal_training.py +++ b/src/reinforcement_learning/reinforcement_learning/car_goal_training.py @@ -30,7 +30,8 @@ ('max_steps_training', 1_000_000), ('max_steps_exploration', 1_000), ('max_steps', 100), - ('step_length', 0.25) + ('step_length', 0.25), + ('reward_range', 0.2) ] ) @@ -47,6 +48,7 @@ 'critic_lr', 'max_steps', 'step_length', + 'reward_range' ]) MAX_STEPS_TRAINING,\ @@ -60,21 +62,25 @@ ACTOR_LR,\ CRITIC_LR,\ MAX_STEPS,\ -STEP_LENGTH = [param.value for param in params] +STEP_LENGTH,\ +REWARD_RANGE = [param.value for param in params] print( - f'Exploration Steps: {MAX_STEPS_EXPLORATION}\n', - f'Training Steps: {MAX_STEPS_TRAINING}\n', - f'Gamma: {GAMMA}\n', - f'Tau: {TAU}\n', - f'G: {G}\n', - f'Batch Size: {BATCH_SIZE}\n', - f'Buffer Size: {BUFFER_SIZE}\n', - f'Seed: {SEED}\n', - f'Actor LR: {ACTOR_LR}\n', - f'Critic LR: {CRITIC_LR}\n', - f'Steps per Episode: {MAX_STEPS}\n', + f'---------------------------------------------\n' + f'Exploration Steps: {MAX_STEPS_EXPLORATION}\n' + f'Training Steps: {MAX_STEPS_TRAINING}\n' + f'Gamma: {GAMMA}\n' + f'Tau: {TAU}\n' + f'G: {G}\n' + f'Batch Size: {BATCH_SIZE}\n' + f'Buffer Size: {BUFFER_SIZE}\n' + f'Seed: {SEED}\n' + f'Actor LR: {ACTOR_LR}\n' + f'Critic LR: {CRITIC_LR}\n' + f'Steps per Episode: {MAX_STEPS}\n' f'Step Length: {STEP_LENGTH}\n' + f'Reward Range: {REWARD_RANGE}\n' + f'---------------------------------------------\n' ) MAX_ACTIONS = np.asarray([3, 1]) MIN_ACTIONS = np.asarray([0, -1]) diff --git a/src/reinforcement_learning/reinforcement_learning/car_wall_training.py b/src/reinforcement_learning/reinforcement_learning/car_wall_training.py index 720ef5fa..07221ec2 100644 --- a/src/reinforcement_learning/reinforcement_learning/car_wall_training.py +++ b/src/reinforcement_learning/reinforcement_learning/car_wall_training.py @@ -29,7 +29,9 @@ ('max_steps_training', 1_000_000), ('max_steps_exploration', 1_000), ('max_steps', 100), - ('step_length', 0.25) + ('step_length', 0.25), + ('reward_range', 0.2), + ('collision_range', 0.2) ] ) @@ -46,6 +48,8 @@ 'critic_lr', 'max_steps', 'step_length', + 'reward_range', + 'collision_range' ]) MAX_STEPS_TRAINING,\ @@ -59,21 +63,27 @@ ACTOR_LR,\ CRITIC_LR,\ MAX_STEPS,\ -STEP_LENGTH = [param.value for param in params] +STEP_LENGTH,\ +REWARD_RANGE,\ +COLLISION_RANGE = [param.value for param in params] print( - f'Exploration Steps: {MAX_STEPS_EXPLORATION}\n', - f'Training Steps: {MAX_STEPS_TRAINING}\n', - f'Gamma: {GAMMA}\n', - f'Tau: {TAU}\n', - f'G: {G}\n', - f'Batch Size: {BATCH_SIZE}\n', - f'Buffer Size: {BUFFER_SIZE}\n', - f'Seed: {SEED}\n', - f'Actor LR: {ACTOR_LR}\n', - f'Critic LR: {CRITIC_LR}\n', - f'Steps per Episode: {MAX_STEPS}\n', + f'---------------------------------------------\n' + f'Exploration Steps: {MAX_STEPS_EXPLORATION}\n' + f'Training Steps: {MAX_STEPS_TRAINING}\n' + f'Gamma: {GAMMA}\n' + f'Tau: {TAU}\n' + f'G: {G}\n' + f'Batch Size: {BATCH_SIZE}\n' + f'Buffer Size: {BUFFER_SIZE}\n' + f'Seed: {SEED}\n' + f'Actor LR: {ACTOR_LR}\n' + f'Critic LR: {CRITIC_LR}\n' + f'Steps per Episode: {MAX_STEPS}\n' f'Step Length: {STEP_LENGTH}\n' + f'Reward Range: {REWARD_RANGE}\n' + f'Collision Range: {COLLISION_RANGE}\n' + f'---------------------------------------------\n' ) MAX_ACTIONS = np.asarray([3, 3.14]) MIN_ACTIONS = np.asarray([-0.5, -3.14]) @@ -86,7 +96,7 @@ def main(): time.sleep(3) - env = CarWallEnvironment('f1tenth', step_length=STEP_LENGTH, max_steps=MAX_STEPS) + env = CarWallEnvironment('f1tenth', step_length=STEP_LENGTH, max_steps=MAX_STEPS, reward_range=REWARD_RANGE, collision_range=COLLISION_RANGE) actor = Actor(observation_size=OBSERVATION_SIZE, num_actions=ACTION_NUM, learning_rate=ACTOR_LR) critic = Critic(observation_size=OBSERVATION_SIZE, num_actions=ACTION_NUM, learning_rate=CRITIC_LR)