forked from HorizonRobotics/alf
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathppo_pr2.gin
40 lines (30 loc) · 1.16 KB
/
ppo_pr2.gin
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
include 'ppo.gin'
import alf.environments.suite_socialbot
# environment config
create_environment.env_name="SocialBot-Pr2Gripper-v0"
create_environment.num_parallel_environments=60
create_environment.env_load_fn=@suite_socialbot.load
PPOLoss.entropy_regularization=0.0
PPOLoss.gamma=0.99
PPOLoss.normalize_advantages=True
PPOLoss.td_lambda=0.95
PPOLoss.td_error_loss_fn=@element_wise_squared_loss
PPOLoss.check_numerics=True
NormalProjectionNetwork.std_bias_initializer_value=-1.
actor/ActorDistributionNetwork.fc_layer_params=(100, 50, 25)
actor/ActorDistributionNetwork.activation=@alf.math.softsign
value/ValueNetwork.fc_layer_params=(100, 50, 25)
ac/AdamTF.lr=2e-4
ActorCriticAlgorithm.actor_network_ctor=@actor/ActorDistributionNetwork
ActorCriticAlgorithm.value_network_ctor=@value/ValueNetwork
Agent.optimizer=@ac/AdamTF()
# training config
TrainerConfig.mini_batch_length=1
TrainerConfig.unroll_length=100
TrainerConfig.mini_batch_size=6000
TrainerConfig.num_iterations=10000
TrainerConfig.num_updates_per_train_iter=25
TrainerConfig.eval_interval=1000
TrainerConfig.debug_summaries=True
TrainerConfig.summarize_grads_and_vars=True
TrainerConfig.summary_interval=5