Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[receiver/hostmetrics] Fix calculation of process.cpu.utilization #19166

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 16 additions & 0 deletions .chloggen/fix_hostmetricsreceiver-processutilization.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
# One of 'breaking', 'deprecation', 'new_component', 'enhancement', 'bug_fix'
change_type: bug_fix

# The name of the component, or a single word describing the area of concern, (e.g. filelogreceiver)
component: hostmetricsreceiver

# A brief description of the change. Surround your text with quotes ("") if it needs to start with a backtick (`).
note: Fix `process.cpu.utilization` calculation by using individual `CPUUtilizationCalculator` instances per-PID.

# One or more tracking issues related to the change
issues: [19119]

# (Optional) One or more lines of additional information to render under the primary note.
# These lines will be padded with 2 spaces and then inserted directly into the document.
# Use pipe (|) for multiline entries.
subtext: Update `processscraper` to keep a map of PIDs to `CPUUtilizationCalculator`s, rather than using a single instance globally.
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ func TestCreateResourceMetricsScraper(t *testing.T) {

scraper, err := factory.CreateMetricsScraper(context.Background(), receivertest.NewNopCreateSettings(), cfg)

if runtime.GOOS == "linux" || runtime.GOOS == "windows" {
if runtime.GOOS == "linux" || runtime.GOOS == "windows" || runtime.GOOS == "darwin" {
assert.NoError(t, err)
assert.NotNil(t, scraper)
} else {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ type scraper struct {
includeFS filterset.FilterSet
excludeFS filterset.FilterSet
scrapeProcessDelay time.Duration
ucal *ucal.CPUUtilizationCalculator
ucals map[int32]*ucal.CPUUtilizationCalculator
// for mocking
getProcessCreateTime func(p processHandle) (int64, error)
getProcessHandles func() (processHandles, error)
Expand All @@ -69,7 +69,7 @@ func newProcessScraper(settings receiver.CreateSettings, cfg *Config) (*scraper,
getProcessCreateTime: processHandle.CreateTime,
getProcessHandles: getProcessHandlesInternal,
scrapeProcessDelay: cfg.ScrapeProcessDelay,
ucal: &ucal.CPUUtilizationCalculator{},
ucals: make(map[int32]*ucal.CPUUtilizationCalculator),
}

var err error
Expand Down Expand Up @@ -109,10 +109,14 @@ func (s *scraper) scrape(_ context.Context) (pmetric.Metrics, error) {
errs.AddPartial(partialErr.Failed, partialErr)
}

presentPIDs := make(map[int32]struct{}, len(data))

for _, md := range data {
presentPIDs[md.pid] = struct{}{}

now := pcommon.NewTimestampFromTime(time.Now())

if err = s.scrapeAndAppendCPUTimeMetric(now, md.handle); err != nil {
if err = s.scrapeAndAppendCPUTimeMetric(now, md.handle, md.pid); err != nil {
errs.AddPartial(cpuMetricsLen, fmt.Errorf("error reading cpu times for process %q (pid %v): %w", md.executable.name, md.pid, err))
}

Expand Down Expand Up @@ -152,6 +156,13 @@ func (s *scraper) scrape(_ context.Context) (pmetric.Metrics, error) {
s.mb.EmitForResource(options...)
}

// Cleanup any [ucal.CPUUtilizationCalculator]s for PIDs that are no longer present
for pid := range s.ucals {
if _, ok := presentPIDs[pid]; !ok {
delete(s.ucals, pid)
}
}

return s.mb.Emit(), errs.Combine()
}

Expand Down Expand Up @@ -227,7 +238,7 @@ func (s *scraper) getProcessMetadata() ([]*processMetadata, error) {
return data, errs.Combine()
}

func (s *scraper) scrapeAndAppendCPUTimeMetric(now pcommon.Timestamp, handle processHandle) error {
func (s *scraper) scrapeAndAppendCPUTimeMetric(now pcommon.Timestamp, handle processHandle, pid int32) error {
if !s.config.MetricsBuilderConfig.Metrics.ProcessCPUTime.Enabled {
return nil
}
Expand All @@ -238,8 +249,11 @@ func (s *scraper) scrapeAndAppendCPUTimeMetric(now pcommon.Timestamp, handle pro
}

s.recordCPUTimeMetric(now, times)
if _, ok := s.ucals[pid]; !ok {
s.ucals[pid] = &ucal.CPUUtilizationCalculator{}
}

err = s.ucal.CalculateAndRecord(now, times, s.recordCPUUtilization)
err = s.ucals[pid].CalculateAndRecord(now, times, s.recordCPUUtilization)
return err
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -50,13 +50,13 @@ func (c *CPUUtilizationCalculator) CalculateAndRecord(now pcommon.Timestamp, cur

// cpuUtilization calculates the difference between 2 cpu.TimesStat using spent time between them
func cpuUtilization(startStats *cpu.TimesStat, startTime pcommon.Timestamp, endStats *cpu.TimesStat, endTime pcommon.Timestamp) CPUUtilization {
elapsedTime := time.Duration(endTime - startTime)
elapsedTime := time.Duration(endTime - startTime).Seconds()
if elapsedTime <= 0 {
return CPUUtilization{}
}
return CPUUtilization{
User: (endStats.User - startStats.User) / elapsedTime.Seconds(),
System: (endStats.System - startStats.System) / elapsedTime.Seconds(),
Iowait: (endStats.Iowait - startStats.Iowait) / elapsedTime.Seconds(),
User: (endStats.User - startStats.User) / elapsedTime,
System: (endStats.System - startStats.System) / elapsedTime,
Iowait: (endStats.Iowait - startStats.Iowait) / elapsedTime,
}
}