Skip to content

Commit

Permalink
[receiver/hostmetrics] Fix calculation of process.cpu.utilization (#…
Browse files Browse the repository at this point in the history
…19166)

Fixes the calculation of process CPU utilization. Rather than using a single ucal.CPUUtilizationCalculator struct to calculate the utilization over the collection interval, maintain one for each PID. After each scrape, remove any entries for PIDs that are no longer present.
  • Loading branch information
antonblock authored Mar 2, 2023
1 parent d3ef7dc commit e61f8ba
Show file tree
Hide file tree
Showing 4 changed files with 40 additions and 10 deletions.
16 changes: 16 additions & 0 deletions .chloggen/fix_hostmetricsreceiver-processutilization.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
# One of 'breaking', 'deprecation', 'new_component', 'enhancement', 'bug_fix'
change_type: bug_fix

# The name of the component, or a single word describing the area of concern, (e.g. filelogreceiver)
component: hostmetricsreceiver

# A brief description of the change. Surround your text with quotes ("") if it needs to start with a backtick (`).
note: Fix `process.cpu.utilization` calculation by using individual `CPUUtilizationCalculator` instances per-PID.

# One or more tracking issues related to the change
issues: [19119]

# (Optional) One or more lines of additional information to render under the primary note.
# These lines will be padded with 2 spaces and then inserted directly into the document.
# Use pipe (|) for multiline entries.
subtext: Update `processscraper` to keep a map of PIDs to `CPUUtilizationCalculator`s, rather than using a single instance globally.
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ func TestCreateResourceMetricsScraper(t *testing.T) {

scraper, err := factory.CreateMetricsScraper(context.Background(), receivertest.NewNopCreateSettings(), cfg)

if runtime.GOOS == "linux" || runtime.GOOS == "windows" {
if runtime.GOOS == "linux" || runtime.GOOS == "windows" || runtime.GOOS == "darwin" {
assert.NoError(t, err)
assert.NotNil(t, scraper)
} else {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ type scraper struct {
includeFS filterset.FilterSet
excludeFS filterset.FilterSet
scrapeProcessDelay time.Duration
ucal *ucal.CPUUtilizationCalculator
ucals map[int32]*ucal.CPUUtilizationCalculator
// for mocking
getProcessCreateTime func(p processHandle) (int64, error)
getProcessHandles func() (processHandles, error)
Expand All @@ -69,7 +69,7 @@ func newProcessScraper(settings receiver.CreateSettings, cfg *Config) (*scraper,
getProcessCreateTime: processHandle.CreateTime,
getProcessHandles: getProcessHandlesInternal,
scrapeProcessDelay: cfg.ScrapeProcessDelay,
ucal: &ucal.CPUUtilizationCalculator{},
ucals: make(map[int32]*ucal.CPUUtilizationCalculator),
}

var err error
Expand Down Expand Up @@ -109,10 +109,14 @@ func (s *scraper) scrape(_ context.Context) (pmetric.Metrics, error) {
errs.AddPartial(partialErr.Failed, partialErr)
}

presentPIDs := make(map[int32]struct{}, len(data))

for _, md := range data {
presentPIDs[md.pid] = struct{}{}

now := pcommon.NewTimestampFromTime(time.Now())

if err = s.scrapeAndAppendCPUTimeMetric(now, md.handle); err != nil {
if err = s.scrapeAndAppendCPUTimeMetric(now, md.handle, md.pid); err != nil {
errs.AddPartial(cpuMetricsLen, fmt.Errorf("error reading cpu times for process %q (pid %v): %w", md.executable.name, md.pid, err))
}

Expand Down Expand Up @@ -152,6 +156,13 @@ func (s *scraper) scrape(_ context.Context) (pmetric.Metrics, error) {
s.mb.EmitForResource(options...)
}

// Cleanup any [ucal.CPUUtilizationCalculator]s for PIDs that are no longer present
for pid := range s.ucals {
if _, ok := presentPIDs[pid]; !ok {
delete(s.ucals, pid)
}
}

return s.mb.Emit(), errs.Combine()
}

Expand Down Expand Up @@ -227,7 +238,7 @@ func (s *scraper) getProcessMetadata() ([]*processMetadata, error) {
return data, errs.Combine()
}

func (s *scraper) scrapeAndAppendCPUTimeMetric(now pcommon.Timestamp, handle processHandle) error {
func (s *scraper) scrapeAndAppendCPUTimeMetric(now pcommon.Timestamp, handle processHandle, pid int32) error {
if !s.config.MetricsBuilderConfig.Metrics.ProcessCPUTime.Enabled {
return nil
}
Expand All @@ -238,8 +249,11 @@ func (s *scraper) scrapeAndAppendCPUTimeMetric(now pcommon.Timestamp, handle pro
}

s.recordCPUTimeMetric(now, times)
if _, ok := s.ucals[pid]; !ok {
s.ucals[pid] = &ucal.CPUUtilizationCalculator{}
}

err = s.ucal.CalculateAndRecord(now, times, s.recordCPUUtilization)
err = s.ucals[pid].CalculateAndRecord(now, times, s.recordCPUUtilization)
return err
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -50,13 +50,13 @@ func (c *CPUUtilizationCalculator) CalculateAndRecord(now pcommon.Timestamp, cur

// cpuUtilization calculates the difference between 2 cpu.TimesStat using spent time between them
func cpuUtilization(startStats *cpu.TimesStat, startTime pcommon.Timestamp, endStats *cpu.TimesStat, endTime pcommon.Timestamp) CPUUtilization {
elapsedTime := time.Duration(endTime - startTime)
elapsedTime := time.Duration(endTime - startTime).Seconds()
if elapsedTime <= 0 {
return CPUUtilization{}
}
return CPUUtilization{
User: (endStats.User - startStats.User) / elapsedTime.Seconds(),
System: (endStats.System - startStats.System) / elapsedTime.Seconds(),
Iowait: (endStats.Iowait - startStats.Iowait) / elapsedTime.Seconds(),
User: (endStats.User - startStats.User) / elapsedTime,
System: (endStats.System - startStats.System) / elapsedTime,
Iowait: (endStats.Iowait - startStats.Iowait) / elapsedTime,
}
}

0 comments on commit e61f8ba

Please sign in to comment.