Skip to content

Commit

Permalink
Store kineto traces to a manifold bucket
Browse files Browse the repository at this point in the history
Summary: Store kineto traces to a manifold bucket (`hpc_traces`)

Reviewed By: hiwotadese

Differential Revision: D52034905

fbshipit-source-id: 867a3d333781f7777bf37044cb83fc8eea7f3e6c
  • Loading branch information
TaekyungHeo authored and facebook-github-bot committed Dec 11, 2023
1 parent ee4c17c commit 36b22e2
Showing 1 changed file with 15 additions and 1 deletion.
16 changes: 15 additions & 1 deletion train/compute/python/tools/et_replay.py
Original file line number Diff line number Diff line change
Expand Up @@ -1331,6 +1331,20 @@ def benchTime(self):

prev_iter = self.numWarmupIters
if self.profile_replay:
try:
from aiplatform.monitoring.unitrace.upload_manifold import (
export_trace_func,
)

rank = self.comms_env_params["local_rank"]
on_trace_ready = export_trace_func(
"/tmp",
worker_name=f"rank-{rank}",
bucket_name="hpc_traces",
zoomer_request_callsite="hpc",
)
except ImportError:
on_trace_ready = trace_handler
with torch.profiler.profile(
activities=[
torch.profiler.ProfilerActivity.CPU,
Expand All @@ -1340,7 +1354,7 @@ def benchTime(self):
wait=0, warmup=self.numWarmupIters, active=self.numIters
),
record_shapes=True,
on_trace_ready=trace_handler,
on_trace_ready=on_trace_ready,
) as prof:
for iter in range(self.numWarmupIters + self.numIters):
if self.et_profile:
Expand Down

0 comments on commit 36b22e2

Please sign in to comment.