Skip to content

Commit

Permalink
PR comments
Browse files Browse the repository at this point in the history
  • Loading branch information
gjulianm committed Mar 6, 2025
1 parent 7399985 commit cb5962f
Show file tree
Hide file tree
Showing 3 changed files with 8 additions and 3 deletions.
2 changes: 2 additions & 0 deletions pkg/collector/corechecks/gpu/gpu.go
Original file line number Diff line number Diff line change
Expand Up @@ -161,6 +161,8 @@ func (c *Check) Run() error {
// Commit the metrics even in case of an error
defer snd.Commit()

// build the mapping of GPU devices -> containers to allow tagging device
// metrics with the tags of containers that are using them
gpuToContainersMap := c.getGPUToContainersMap()

if err := c.emitSysprobeMetrics(snd, gpuToContainersMap); err != nil {
Expand Down
7 changes: 5 additions & 2 deletions pkg/collector/corechecks/gpu/model/model.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,11 @@ type MemoryMetrics struct {

// UtilizationMetrics contains the GPU stats for a given device and process
type UtilizationMetrics struct {
UsedCores float64 `json:"used_cores"`
Memory MemoryMetrics `json:"memory"`
// UsedCores stores the average number of GPU cores used by this process in the interval
UsedCores float64 `json:"used_cores"`

// Memory stores the memory stats for the process during the interval
Memory MemoryMetrics `json:"memory"`
}

// StatsKey is the key used to identify a GPUStats object
Expand Down
2 changes: 1 addition & 1 deletion pkg/gpu/aggregator.go
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ func (agg *aggregator) processKernelSpan(span *kernelSpan) {
// threads that were enqueued.
//
// An example of a situation where this distinction is important: say we
// have a kernel launch with 100 threads, but the GPU can only run 500
// have a kernel launch with 100 threads, but the GPU can only run 50
// threads, and assume this kernel runs for 1 second and that we want to
// report utilization for the last 2 seconds. If we were looking at the
// actual GPU utilization in real-time, we'd see 100% utilization for the
Expand Down

0 comments on commit cb5962f

Please sign in to comment.