From 712c688a545b0541d8bf6290785671711288a3a2 Mon Sep 17 00:00:00 2001 From: Nico Stewart Date: Mon, 27 Feb 2023 13:09:50 -0500 Subject: [PATCH 1/3] WIP: Keep individual calculators per-PID --- .../scraper/processscraper/process_scraper.go | 24 +++++++++++++++---- .../ucal/cpu_utilization_calculator.go | 8 +++---- 2 files changed, 23 insertions(+), 9 deletions(-) diff --git a/receiver/hostmetricsreceiver/internal/scraper/processscraper/process_scraper.go b/receiver/hostmetricsreceiver/internal/scraper/processscraper/process_scraper.go index 7b2259932c62..7ebf00731cbc 100644 --- a/receiver/hostmetricsreceiver/internal/scraper/processscraper/process_scraper.go +++ b/receiver/hostmetricsreceiver/internal/scraper/processscraper/process_scraper.go @@ -55,7 +55,7 @@ type scraper struct { includeFS filterset.FilterSet excludeFS filterset.FilterSet scrapeProcessDelay time.Duration - ucal *ucal.CPUUtilizationCalculator + ucals map[int32]*ucal.CPUUtilizationCalculator // for mocking getProcessCreateTime func(p processHandle) (int64, error) getProcessHandles func() (processHandles, error) @@ -69,7 +69,7 @@ func newProcessScraper(settings receiver.CreateSettings, cfg *Config) (*scraper, getProcessCreateTime: processHandle.CreateTime, getProcessHandles: getProcessHandlesInternal, scrapeProcessDelay: cfg.ScrapeProcessDelay, - ucal: &ucal.CPUUtilizationCalculator{}, + ucals: make(map[int32]*ucal.CPUUtilizationCalculator), } var err error @@ -109,10 +109,14 @@ func (s *scraper) scrape(_ context.Context) (pmetric.Metrics, error) { errs.AddPartial(partialErr.Failed, partialErr) } + presentPIDs := make(map[int32]struct{}, len(data)) + for _, md := range data { + presentPIDs[md.pid] = struct{}{} + now := pcommon.NewTimestampFromTime(time.Now()) - if err = s.scrapeAndAppendCPUTimeMetric(now, md.handle); err != nil { + if err = s.scrapeAndAppendCPUTimeMetric(now, md.handle, md.pid); err != nil { errs.AddPartial(cpuMetricsLen, fmt.Errorf("error reading cpu times for process %q (pid %v): %w", md.executable.name, md.pid, err)) } @@ -152,6 +156,13 @@ func (s *scraper) scrape(_ context.Context) (pmetric.Metrics, error) { s.mb.EmitForResource(options...) } + // Cleanup any [ucal.CPUUtilizationCalculator]s for PIDs that are no longer present + for pid := range s.ucals { + if _, ok := presentPIDs[pid]; !ok { + delete(s.ucals, pid) + } + } + return s.mb.Emit(), errs.Combine() } @@ -227,7 +238,7 @@ func (s *scraper) getProcessMetadata() ([]*processMetadata, error) { return data, errs.Combine() } -func (s *scraper) scrapeAndAppendCPUTimeMetric(now pcommon.Timestamp, handle processHandle) error { +func (s *scraper) scrapeAndAppendCPUTimeMetric(now pcommon.Timestamp, handle processHandle, processHandle, pid int32) error { if !s.config.MetricsBuilderConfig.Metrics.ProcessCPUTime.Enabled { return nil } @@ -238,8 +249,11 @@ func (s *scraper) scrapeAndAppendCPUTimeMetric(now pcommon.Timestamp, handle pro } s.recordCPUTimeMetric(now, times) + if _, ok := s.ucals[pid]; !ok { + s.ucals[pid] = &ucal.CPUUtilizationCalculator{} + } - err = s.ucal.CalculateAndRecord(now, times, s.recordCPUUtilization) + err = s.ucals[pid].CalculateAndRecord(now, times, s.recordCPUUtilization) return err } diff --git a/receiver/hostmetricsreceiver/internal/scraper/processscraper/ucal/cpu_utilization_calculator.go b/receiver/hostmetricsreceiver/internal/scraper/processscraper/ucal/cpu_utilization_calculator.go index 7d6da3cd63e4..85699b067ce0 100644 --- a/receiver/hostmetricsreceiver/internal/scraper/processscraper/ucal/cpu_utilization_calculator.go +++ b/receiver/hostmetricsreceiver/internal/scraper/processscraper/ucal/cpu_utilization_calculator.go @@ -50,13 +50,13 @@ func (c *CPUUtilizationCalculator) CalculateAndRecord(now pcommon.Timestamp, cur // cpuUtilization calculates the difference between 2 cpu.TimesStat using spent time between them func cpuUtilization(startStats *cpu.TimesStat, startTime pcommon.Timestamp, endStats *cpu.TimesStat, endTime pcommon.Timestamp) CPUUtilization { - elapsedTime := time.Duration(endTime - startTime) + elapsedTime := time.Duration(endTime - startTime).Seconds() if elapsedTime <= 0 { return CPUUtilization{} } return CPUUtilization{ - User: (endStats.User - startStats.User) / elapsedTime.Seconds(), - System: (endStats.System - startStats.System) / elapsedTime.Seconds(), - Iowait: (endStats.Iowait - startStats.Iowait) / elapsedTime.Seconds(), + User: (endStats.User - startStats.User) / elapsedTime, + System: (endStats.System - startStats.System) / elapsedTime, + Iowait: (endStats.Iowait - startStats.Iowait) / elapsedTime, } } From 57749c8c59d43beae7a0db318e8cdce7d42cd1ef Mon Sep 17 00:00:00 2001 From: Nico Stewart Date: Tue, 28 Feb 2023 10:55:21 -0500 Subject: [PATCH 2/3] Fix processscraper/TestCreateResourceMetricsScraper - Error is no longer returned for darwin GOOS --- .../internal/scraper/processscraper/factory_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/receiver/hostmetricsreceiver/internal/scraper/processscraper/factory_test.go b/receiver/hostmetricsreceiver/internal/scraper/processscraper/factory_test.go index aad03ae6e94f..2406005d1d5f 100644 --- a/receiver/hostmetricsreceiver/internal/scraper/processscraper/factory_test.go +++ b/receiver/hostmetricsreceiver/internal/scraper/processscraper/factory_test.go @@ -35,7 +35,7 @@ func TestCreateResourceMetricsScraper(t *testing.T) { scraper, err := factory.CreateMetricsScraper(context.Background(), receivertest.NewNopCreateSettings(), cfg) - if runtime.GOOS == "linux" || runtime.GOOS == "windows" { + if runtime.GOOS == "linux" || runtime.GOOS == "windows" || runtime.GOOS == "darwin" { assert.NoError(t, err) assert.NotNil(t, scraper) } else { From 59ab8c2a774e717b0f451605859e06d3888c5c6f Mon Sep 17 00:00:00 2001 From: Nico Stewart Date: Wed, 1 Mar 2023 10:58:24 -0500 Subject: [PATCH 3/3] Add chloggen entry --- ...x_hostmetricsreceiver-processutilization.yaml | 16 ++++++++++++++++ .../scraper/processscraper/process_scraper.go | 2 +- 2 files changed, 17 insertions(+), 1 deletion(-) create mode 100755 .chloggen/fix_hostmetricsreceiver-processutilization.yaml diff --git a/.chloggen/fix_hostmetricsreceiver-processutilization.yaml b/.chloggen/fix_hostmetricsreceiver-processutilization.yaml new file mode 100755 index 000000000000..5f7676d53f0b --- /dev/null +++ b/.chloggen/fix_hostmetricsreceiver-processutilization.yaml @@ -0,0 +1,16 @@ +# One of 'breaking', 'deprecation', 'new_component', 'enhancement', 'bug_fix' +change_type: bug_fix + +# The name of the component, or a single word describing the area of concern, (e.g. filelogreceiver) +component: hostmetricsreceiver + +# A brief description of the change. Surround your text with quotes ("") if it needs to start with a backtick (`). +note: Fix `process.cpu.utilization` calculation by using individual `CPUUtilizationCalculator` instances per-PID. + +# One or more tracking issues related to the change +issues: [19119] + +# (Optional) One or more lines of additional information to render under the primary note. +# These lines will be padded with 2 spaces and then inserted directly into the document. +# Use pipe (|) for multiline entries. +subtext: Update `processscraper` to keep a map of PIDs to `CPUUtilizationCalculator`s, rather than using a single instance globally. diff --git a/receiver/hostmetricsreceiver/internal/scraper/processscraper/process_scraper.go b/receiver/hostmetricsreceiver/internal/scraper/processscraper/process_scraper.go index 7ebf00731cbc..2a4378beb18d 100644 --- a/receiver/hostmetricsreceiver/internal/scraper/processscraper/process_scraper.go +++ b/receiver/hostmetricsreceiver/internal/scraper/processscraper/process_scraper.go @@ -238,7 +238,7 @@ func (s *scraper) getProcessMetadata() ([]*processMetadata, error) { return data, errs.Combine() } -func (s *scraper) scrapeAndAppendCPUTimeMetric(now pcommon.Timestamp, handle processHandle, processHandle, pid int32) error { +func (s *scraper) scrapeAndAppendCPUTimeMetric(now pcommon.Timestamp, handle processHandle, pid int32) error { if !s.config.MetricsBuilderConfig.Metrics.ProcessCPUTime.Enabled { return nil }