Skip to content

Commit

Permalink
Merge pull request #164 from kywch/isaacgym
Browse files Browse the repository at this point in the history
Sync isaacgym
  • Loading branch information
jsuarez5341 authored Feb 13, 2025
2 parents 2a06665 + 161ceef commit 15ccf49
Show file tree
Hide file tree
Showing 11 changed files with 357 additions and 361 deletions.
3 changes: 2 additions & 1 deletion clean_pufferl.py
Original file line number Diff line number Diff line change
Expand Up @@ -356,7 +356,8 @@ def close(data):
artifact = data.wandb.Artifact(artifact_name, type="model")
model_path = save_checkpoint(data)
artifact.add_file(model_path)
data.wandb.run.log_artifact(artifact)
# NOTE: PHC model is large to save for all sweep runs
# data.wandb.run.log_artifact(artifact)
data.wandb.finish()

class Profile:
Expand Down
78 changes: 61 additions & 17 deletions config/morph.ini
Original file line number Diff line number Diff line change
Expand Up @@ -6,17 +6,19 @@ policy_name = Policy
# rnn_name = Recurrent

[policy]
input_dim = 934
action_dim = 69
demo_dim = 358
hidden = 2048
demo_size = 358
hidden_size = 1024

[rnn]
input_size = 1024
hidden_size = 1024

[env]
motion_file = "resources/morph/totalcapture_acting_poses.pkl"
#has_self_collision = True
has_self_collision = False
motion_file = "resources/morph/dfaust_one_leg_jump.pkl"
has_self_collision = True
# has_self_collision = False
num_envs = 2048
#num_envs = 32
# num_envs = 32
#headless = False

[train]
Expand All @@ -30,14 +32,14 @@ norm_adv = True
target_kl = None

total_timesteps = 5_000_000_000
eval_timesteps = 100_000
eval_timesteps = 655_000

num_workers = 1
num_envs = 1
batch_size = 65536
minibatch_size = 16384
#batch_size = 1024
#minibatch_size = 256
# batch_size = 1024
# minibatch_size = 256

disc_coef = 5.0

Expand All @@ -46,11 +48,53 @@ bptt_horizon = 8
anneal_lr = False
gae_lambda = 0.95
gamma = 0.99
clip_coef = 0.2
clip_coef = 0.5
clip_vloss = True
vf_coef = 2.0
vf_clip_coef = 0.2
vf_coef = 1.0
vf_clip_coef = 0.5
max_grad_norm = 1.0
ent_coef = 0.0
learning_rate = 2e-5
checkpoint_interval = 10000
ent_coef = 0.0001
learning_rate = 0.000015
checkpoint_interval = 1000


[sweep]
method = bayes
name = sweep

[sweep.metric]
goal = maximize
name = environment/episode_return

[sweep.parameters.train.parameters.total_timesteps]
distribution = log_uniform_values
min = 10_000_000
max = 200_000_000

[sweep.parameters.train.parameters.learning_rate]
distribution = log_uniform_values
min = 1e-5
max = 1e-1

[sweep.parameters.train.parameters.gamma]
distribution = uniform
min = 0.0
max = 1.0

[sweep.parameters.train.parameters.gae_lambda]
distribution = uniform
min = 0.0
max = 1.0

[sweep.parameters.train.parameters.vf_coef]
distribution = uniform
min = 0.0
max = 5.0

[sweep.parameters.train.parameters.max_grad_norm]
distribution = uniform
min = 0.0
max = 20.0

[sweep.parameters.train.parameters.bptt_horizon]
values = [4, 8, 16]
46 changes: 23 additions & 23 deletions demo.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,7 @@ def make_policy(env, policy_cls, rnn_cls, args):
policy = rnn_cls(env, policy, **args['rnn'])
policy = pufferlib.cleanrl.RecurrentPolicy(policy)
else:
if not isinstance(policy, pufferlib.cleanrl.Policy):
policy = pufferlib.cleanrl.Policy(policy)
policy = pufferlib.cleanrl.Policy(policy)

return policy.to(args['train']['device'])

Expand Down Expand Up @@ -146,11 +145,11 @@ def carbs_param(group, name, space, wandb_params, mmin=None, mmax=None,
param_spaces.append(carbs_param('train', 'total_timesteps', 'log', sweep_parameters,
search_center=min_timesteps, is_integer=True))

batch_param = sweep_parameters['train']['parameters']['batch_size']
default_batch = (batch_param['max'] - batch_param['min']) // 2
# batch_param = sweep_parameters['train']['parameters']['batch_size']
# default_batch = (batch_param['max'] - batch_param['min']) // 2

minibatch_param = sweep_parameters['train']['parameters']['minibatch_size']
default_minibatch = (minibatch_param['max'] - minibatch_param['min']) // 2
# minibatch_param = sweep_parameters['train']['parameters']['minibatch_size']
# default_minibatch = (minibatch_param['max'] - minibatch_param['min']) // 2

if 'env' in sweep_parameters:
env_params = sweep_parameters['env']['parameters']
Expand Down Expand Up @@ -184,19 +183,19 @@ def carbs_param(group, name, space, wandb_params, mmin=None, mmax=None,
carbs_param('train', 'learning_rate', 'log', sweep_parameters, search_center=1e-3),
carbs_param('train', 'gamma', 'logit', sweep_parameters, search_center=0.95),
carbs_param('train', 'gae_lambda', 'logit', sweep_parameters, search_center=0.90),
carbs_param('train', 'update_epochs', 'linear', sweep_parameters,
search_center=1, scale=3, is_integer=True),
carbs_param('train', 'clip_coef', 'logit', sweep_parameters, search_center=0.1),
carbs_param('train', 'vf_coef', 'logit', sweep_parameters, search_center=0.5),
carbs_param('train', 'vf_clip_coef', 'logit', sweep_parameters, search_center=0.1),
carbs_param('train', 'max_grad_norm', 'linear', sweep_parameters, search_center=0.5),
carbs_param('train', 'ent_coef', 'log', sweep_parameters, search_center=0.01),
carbs_param('train', 'batch_size', 'log', sweep_parameters,
search_center=default_batch, is_integer=True),
carbs_param('train', 'minibatch_size', 'log', sweep_parameters,
search_center=default_minibatch, is_integer=True),
# carbs_param('train', 'update_epochs', 'linear', sweep_parameters,
# search_center=1, scale=3, is_integer=True),
# carbs_param('train', 'clip_coef', 'logit', sweep_parameters, search_center=0.5),
carbs_param('train', 'vf_coef', 'linear', sweep_parameters, search_center=1.0),
# carbs_param('train', 'vf_clip_coef', 'logit', sweep_parameters, search_center=0.5),
carbs_param('train', 'max_grad_norm', 'linear', sweep_parameters, search_center=1.0),
# carbs_param('train', 'ent_coef', 'log', sweep_parameters, search_center=0.0001),
# carbs_param('train', 'batch_size', 'log', sweep_parameters,
# search_center=default_batch, is_integer=True),
# carbs_param('train', 'minibatch_size', 'log', sweep_parameters,
# search_center=default_minibatch, is_integer=True),
carbs_param('train', 'bptt_horizon', 'log', sweep_parameters,
search_center=16, is_integer=True),
search_center=8, is_integer=True),
]

carbs_params = CARBSParams(
Expand Down Expand Up @@ -236,10 +235,10 @@ def main():
train_suggestion = {k.split('/')[1]: v for k, v in suggestion.items() if k.startswith('train/')}
env_suggestion = {k.split('/')[1]: v for k, v in suggestion.items() if k.startswith('env/')}
args['train'].update(train_suggestion)
args['train']['batch_size'] = closest_power(
train_suggestion['batch_size'])
args['train']['minibatch_size'] = closest_power(
train_suggestion['minibatch_size'])
# args['train']['batch_size'] = closest_power(
# train_suggestion['batch_size'])
# args['train']['minibatch_size'] = closest_power(
# train_suggestion['minibatch_size'])
args['train']['bptt_horizon'] = closest_power(
train_suggestion['bptt_horizon'])

Expand Down Expand Up @@ -330,7 +329,8 @@ def train(args, make_env, policy_cls, rnn_cls, wandb,

uptime = data.profile.uptime
steps_evaluated = 0
steps_to_eval = int(args['train']['total_timesteps'] * eval_frac)
# steps_to_eval = int(args['train']['total_timesteps'] * eval_frac)
steps_to_eval = int(args['train']['eval_timesteps'])
batch_size = args['train']['batch_size']
while steps_evaluated < steps_to_eval:
stats, _ = clean_pufferl.evaluate(data)
Expand Down
17 changes: 2 additions & 15 deletions pufferlib/environments/morph/__init__.py
Original file line number Diff line number Diff line change
@@ -1,25 +1,12 @@
from .environment import env_creator

try:
import torch
except ImportError:
pass
else:
from .torch import Policy
try:
from .torch import Recurrent
except:
Recurrent = None

'''
try:
import pufferlib.environments.morph.policy as torch
except ImportError:
pass
else:
from .policy import Policy
from pufferlib.environments.morph.policy import Policy
try:
from .policy import Recurrent
from pufferlib.environments.morph.policy import Recurrent
except:
Recurrent = None
'''
131 changes: 0 additions & 131 deletions pufferlib/environments/morph/_torch.py

This file was deleted.

Loading

0 comments on commit 15ccf49

Please sign in to comment.