Skip to content

Commit

Permalink
Small logging fixes
Browse files Browse the repository at this point in the history
  • Loading branch information
Joseph Suarez committed Feb 8, 2025
1 parent 392010f commit 7d14155
Show file tree
Hide file tree
Showing 2 changed files with 8 additions and 5 deletions.
2 changes: 2 additions & 0 deletions clean_pufferl.py
Original file line number Diff line number Diff line change
Expand Up @@ -284,6 +284,7 @@ def train(data):
losses.old_approx_kl += old_approx_kl.item() / total_minibatches
losses.approx_kl += approx_kl.item() / total_minibatches
losses.clipfrac += clipfrac.item() / total_minibatches
losses.discriminator += disc_loss.item() / total_minibatches

if config.target_kl is not None:
if approx_kl > config.target_kl:
Expand Down Expand Up @@ -423,6 +424,7 @@ def make_losses():
approx_kl=0,
clipfrac=0,
explained_variance=0,
discriminator=0,
)

class Experience:
Expand Down
11 changes: 6 additions & 5 deletions pufferlib/environments/morph/environment.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ def make(name, **kwargs):

class PHCPufferEnv(pufferlib.PufferEnv):
def __init__(self, motion_file, has_self_collision, num_envs=32, device_type="cuda",
device_id=0, headless=True, log_interval=128):
device_id=0, headless=True, log_interval=32):
cfg = {
'env': {
'num_envs': num_envs,
Expand Down Expand Up @@ -59,6 +59,7 @@ def reset(self, seed=None):
self.env.reset()
self.demo = self.env.demo
self.state = self.env.state
self.tick = 0
return self.observations, []

def step(self, actions_np):
Expand All @@ -69,12 +70,9 @@ def step(self, actions_np):
self.demo = self.env.demo
self.state = self.env.state

# NOTE: rl-games reset done envs in the training script. Keeping this here for now.
# TODO: Move this into the env
done_indices = torch.nonzero(self.terminals).squeeze(-1)
if len(done_indices) > 0:
self.observations[done_indices] = self.env.reset(done_indices)[done_indices]

self._infos["episode_return"] += self.episode_returns[done_indices].tolist()
self._infos["episode_length"] += self.episode_lengths[done_indices].tolist()
self.episode_returns[done_indices] = 0
Expand All @@ -84,7 +82,10 @@ def step(self, actions_np):
self.episode_lengths += 1

# TODO: self.env.extras has infos. Extract useful info?
info = self.mean_and_log()
info = []
self.tick += 1
if self.tick % self.log_interval == 0:
info = self.mean_and_log()

return self.observations, self.rewards, self.terminals, self.truncations, info

Expand Down

0 comments on commit 7d14155

Please sign in to comment.