Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: add actor path to parser #141

Merged
merged 1 commit into from
Oct 31, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 5 additions & 5 deletions src/reinforcement_learning/config/test.yaml
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
test:
ros__parameters:
environment: 'CarBeat'
environment: 'CarTrack'
track: 'multi_track' # track_1, track_2, track_3, multi_track, multi_track_testing -> only applies for CarTrack
max_steps_evaluation: 1000000
number_eval_episodes: 100
max_steps: 3000
actor_path: results/23_09_08_06:28:51/models/actor_checkpoint.pht
critic_path: results/23_09_08_06:28:51/models/critic_checkpoint.pht
algorithm: 'TD3'
actor_path: training_logs/SAC-CarTrack-23_10_25_10:41:10/models/SAC-checkpoint-15000_actor.pht
critic_path: training_logs/SAC-CarTrack-23_10_25_10:41:10/models/SAC-checkpoint-15000_critic.pht
algorithm: 'SAC'
step_length: 0.1
reward_range: 3.0
collision_range: 0.2
Expand Down
3 changes: 3 additions & 0 deletions src/reinforcement_learning/launch/test.launch.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,9 @@ def generate_launch_description():
os.path.join(pkg_environments, f'{env_launch[env]}.launch.py')),
launch_arguments={
'track': TextSubstitution(text=str(config['test']['ros__parameters']['track'])),
'car_name': TextSubstitution(text=str(config['test']['ros__parameters']['car_name']) if 'car_name' in config['test']['ros__parameters'] else 'f1tenth'),
'car_one': TextSubstitution(text=str(config['test']['ros__parameters']['car_name']) if 'car_name' in config['test']['ros__parameters'] else 'f1tenth'),
'car_two': TextSubstitution(text=str(config['test']['ros__parameters']['ftg_car_name']) if 'ftg_car_name' in config['test']['ros__parameters'] else 'ftg_car'),
}.items() #TODO: this doesn't do anything
)

Expand Down
29 changes: 22 additions & 7 deletions src/reinforcement_learning/reinforcement_learning/parse_args.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,13 @@
def parse_args():
param_node = __declare_params()

env_params = __get_env_params(param_node)
algorithm_params = __get_algorithm_params(param_node)
network_params = __get_network_params(param_node)
env_params, rest_env = __get_env_params(param_node)
algorithm_params, rest_alg = __get_algorithm_params(param_node)
network_params, rest_params = __get_network_params(param_node)

return env_params, algorithm_params, network_params
rest = {**rest_env, **rest_alg, **rest_params}

return env_params, algorithm_params, network_params, rest


def __declare_params():
Expand Down Expand Up @@ -92,7 +94,12 @@ def __get_env_params(param_node: Node):
case _:
raise Exception(f'Environment {params_dict["environment"]} not implemented')

return config
# Collect all the parameters that were not used into a python dictionary
rest = set(params_dict.keys()).difference(set(config.dict().keys()))
rest = {key: params_dict[key] for key in rest}

param_node.get_logger().info(f'Rest: {rest}')
return config, rest

def __get_algorithm_params(param_node: Node):
params = param_node.get_parameters([
Expand All @@ -114,7 +121,11 @@ def __get_algorithm_params(param_node: Node):

config = cfg.TrainingConfig(**params_dict)

return config
rest = set(params_dict.keys()).difference(set(config.dict().keys()))
rest = {key: params_dict[key] for key in rest}

param_node.get_logger().info(f'Rest: {rest}')
return config, rest

def __get_network_params(param_node: Node):
params = param_node.get_parameters([
Expand Down Expand Up @@ -144,5 +155,9 @@ def __get_network_params(param_node: Node):
case _:
raise Exception(f'Algorithm {params_dict["algorithm"]} not implemented')

return config
rest = set(params_dict.keys()).difference(set(config.dict().keys()))
param_node.get_logger().info(f'Rest: {rest}')
rest = {key: params_dict[key] for key in rest}

return config, rest

24 changes: 6 additions & 18 deletions src/reinforcement_learning/reinforcement_learning/test.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
def main():
rclpy.init()

env_config, algorithm_config, network_config = parse_args()
env_config, algorithm_config, network_config, rest = parse_args()

print(
f'Environment Config: ------------------------------------- \n'
Expand All @@ -30,26 +30,14 @@ def main():
env_factory = EnvironmentFactory()
network_factory = NetworkFactory()

match network_config['algorithm']:
case 'PPO':
config = cfg.PPOConfig(**network_config)
case 'DDPG':
config = cfg.DDPGConfig(**network_config)
case 'SAC':
config = cfg.SACConfig(**network_config)
case 'TD3':
config = cfg.TD3Config(**network_config)
case _:
raise Exception(f'Algorithm {network_config["algorithm"]} not implemented')

env = env_factory.create(env_config['environment'], env_config)
agent = network_factory.create_network(env.OBSERVATION_SIZE, env.ACTION_NUM, config=config)
agent = network_factory.create_network(env.OBSERVATION_SIZE, env.ACTION_NUM, config=network_config)

# Load models if both paths are provided
if network_config['actor_path'] and network_config['critic_path']:
if rest['actor_path'] and rest['critic_path']:
print('Reading saved models into actor and critic')
agent.actor_net.load_state_dict(torch.load(network_config['actor_path']))
agent.critic_net.load_state_dict(torch.load(network_config['critic_path']))
agent.actor_net.load_state_dict(torch.load(rest['actor_path']))
agent.critic_net.load_state_dict(torch.load(rest['critic_path']))
print('Successfully Loaded models')
else:
raise Exception('Both actor and critic paths must be provided')
Expand All @@ -58,7 +46,7 @@ def main():
case 'PPO':
ppo_evaluate(env, agent, algorithm_config)
case _:
off_policy_evaluate(env, agent, algorithm_config)
off_policy_evaluate(env, agent, algorithm_config['number_eval_episodes'])

if __name__ == '__main__':
main()
2 changes: 1 addition & 1 deletion src/reinforcement_learning/reinforcement_learning/train.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
def main():
rclpy.init()

env_config, algorithm_config, network_config = parse_args()
env_config, algorithm_config, network_config, _ = parse_args()

# Set Seeds
torch.manual_seed(algorithm_config['seed'])
Expand Down