-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathbase_sac_ms2.yml
97 lines (82 loc) · 1.76 KB
/
base_sac_ms2.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
jax_env: False
seed: 0
algo: sac
verbose: 1
# Environment configuration
env:
env_id: None
max_episode_steps: 100
num_envs: 8
env_type: "gym:cpu"
env_kwargs:
control_mode: "pd_ee_delta_pose"
render_mode: "rgb_array"
reward_mode: "sparse"
eval_env:
num_envs: 2
max_episode_steps: 100
sac:
num_seed_steps: 5_000
seed_with_policy: False
replay_buffer_capacity: 1_000_000
batch_size: 256
steps_per_env: 4
grad_updates_per_step: 16
actor_update_freq: 1
num_qs: 2
num_min_qs: 2
discount: 0.9
tau: 0.005
backup_entropy: False
eval_freq: 50_000
eval_steps: 500
log_freq: 1000
save_freq: 50_000
learnable_temp: True
initial_temperature: 1.0
network:
actor:
type: "mlp"
arch_cfg:
features: [256, 256, 256]
output_activation: "relu"
critic:
type: "mlp"
arch_cfg:
features: [256, 256, 256]
output_activation: "relu"
use_layer_norm: True
train:
actor_lr: 3e-4
critic_lr: 3e-4
steps: 100_000_000
dataset_path: None
shuffle_demos: True
num_demos: 1000
data_action_scale: null
## Reverse curriculum configs
reverse_step_size: 4
start_step_sampler: "geometric"
curriculum_method: "per_demo"
per_demo_buffer_size: 3
demo_horizon_to_max_steps_ratio: 3
train_on_demo_actions: True
load_actor: True
load_critic: True
load_as_offline_buffer: True
load_as_online_buffer: False
## Forward curriculum configs
forward_curriculum: "success_once_score"
staleness_coef: 0.1
staleness_temperature: 0.1
staleness_transform: "rankmin"
score_transform: "rankmin"
score_temperature: 0.1
num_seeds: 1000
logger:
tensorboard: True
wandb: False
workspace: "exps"
project_name: "RFCL-Sparse"
wandb_cfg:
group: "RFCL-ManiSkill2-Sparse-Baseline"