From 512e086dec6fd5d3ebc30974066f69f69b1f85b9 Mon Sep 17 00:00:00 2001 From: david Date: Wed, 15 Dec 2021 12:07:38 +0100 Subject: [PATCH 01/10] Implement #2250: Add "isolated" label on cpu collector on linux Signed-off-by: david --- collector/cpu_linux.go | 99 +++++++++++++++++++++++++++++++------ collector/cpu_linux_test.go | 28 +++++++++++ 2 files changed, 113 insertions(+), 14 deletions(-) diff --git a/collector/cpu_linux.go b/collector/cpu_linux.go index 84ad373b00..eec6c4ebe7 100644 --- a/collector/cpu_linux.go +++ b/collector/cpu_linux.go @@ -18,9 +18,11 @@ package collector import ( "fmt" + "io/ioutil" "path/filepath" "regexp" "strconv" + "strings" "sync" "github.com/go-kit/log" @@ -42,6 +44,7 @@ type cpuCollector struct { logger log.Logger cpuStats []procfs.CPUStat cpuStatsMutex sync.Mutex + isolatedCpus []uint16 cpuFlagsIncludeRegexp *regexp.Regexp cpuBugsIncludeRegexp *regexp.Regexp @@ -62,15 +65,69 @@ func init() { registerCollector("cpu", defaultEnabled, NewCPUCollector) } +func parseIsolCpus(data []byte) ([]uint16, error) { + isolcpus_str := strings.TrimRight(string(data), "\n") + + var isolcpus_int = []uint16{} + + for _, cpu := range strings.Split(isolcpus_str, ",") { + if cpu == "" { + continue + } + if strings.Contains(cpu, "-") { + ranges := strings.Split(cpu, "-") + startRange, err := strconv.Atoi(ranges[0]) + if err != nil { + return nil, err + } + endRange, err := strconv.Atoi(ranges[1]) + if err != nil { + return nil, err + } + + for i := startRange; i <= endRange; i++ { + isolcpus_int = append(isolcpus_int, uint16(i)) + } + continue + } + + _cpu, err := strconv.Atoi(cpu) + if err != nil { + return nil, err + } + isolcpus_int = append(isolcpus_int, uint16(_cpu)) + } + return isolcpus_int, nil +} + +func readIsolCpus() ([]uint16, error) { + isolcpus, err := ioutil.ReadFile(sysFilePath("devices/system/cpu/isolated")) + if err != nil { + return nil, fmt.Errorf("failed to read isolcpus from sysfs: %w", err) + } + + return parseIsolCpus(isolcpus) +} + // NewCPUCollector returns a new Collector exposing kernel/system statistics. func NewCPUCollector(logger log.Logger) (Collector, error) { fs, err := procfs.NewFS(*procPath) if err != nil { return nil, fmt.Errorf("failed to open procfs: %w", err) } + + isolcpus, err := readIsolCpus() + if err != nil { + return nil, fmt.Errorf("failed to open procfs: %w", err) + } + c := &cpuCollector{ - fs: fs, - cpu: nodeCPUSecondsDesc, + fs: fs, + cpu: prometheus.NewDesc( + prometheus.BuildFQName(namespace, cpuCollectorSubsystem, "seconds_total"), + "Seconds the CPUs spent in each mode.", + []string{"cpu", "mode", "isolated"}, nil, + ), cpuInfo: prometheus.NewDesc( prometheus.BuildFQName(namespace, cpuCollectorSubsystem, "info"), "CPU information from /proc/cpuinfo.", @@ -89,7 +146,7 @@ func NewCPUCollector(logger log.Logger) (Collector, error) { cpuGuest: prometheus.NewDesc( prometheus.BuildFQName(namespace, cpuCollectorSubsystem, "guest_seconds_total"), "Seconds the CPUs spent in guests (VMs) for each mode.", - []string{"cpu", "mode"}, nil, + []string{"cpu", "mode", "isolated"}, nil, ), cpuCoreThrottle: prometheus.NewDesc( prometheus.BuildFQName(namespace, cpuCollectorSubsystem, "core_throttles_total"), @@ -101,7 +158,8 @@ func NewCPUCollector(logger log.Logger) (Collector, error) { "Number of times this CPU package has been throttled.", []string{"package"}, nil, ), - logger: logger, + logger: logger, + isolatedCpus: isolcpus, } err = c.compileIncludeFlags(flagsInclude, bugsInclude) if err != nil { @@ -276,6 +334,15 @@ func (c *cpuCollector) updateThermalThrottle(ch chan<- prometheus.Metric) error return nil } +func contains(s []uint16, e uint16) bool { + for _, a := range s { + if a == e { + return true + } + } + return false +} + // updateStat reads /proc/stat through procfs and exports CPU-related metrics. func (c *cpuCollector) updateStat(ch chan<- prometheus.Metric) error { stats, err := c.fs.Stat() @@ -290,19 +357,23 @@ func (c *cpuCollector) updateStat(ch chan<- prometheus.Metric) error { defer c.cpuStatsMutex.Unlock() for cpuID, cpuStat := range c.cpuStats { cpuNum := strconv.Itoa(cpuID) - ch <- prometheus.MustNewConstMetric(c.cpu, prometheus.CounterValue, cpuStat.User, cpuNum, "user") - ch <- prometheus.MustNewConstMetric(c.cpu, prometheus.CounterValue, cpuStat.Nice, cpuNum, "nice") - ch <- prometheus.MustNewConstMetric(c.cpu, prometheus.CounterValue, cpuStat.System, cpuNum, "system") - ch <- prometheus.MustNewConstMetric(c.cpu, prometheus.CounterValue, cpuStat.Idle, cpuNum, "idle") - ch <- prometheus.MustNewConstMetric(c.cpu, prometheus.CounterValue, cpuStat.Iowait, cpuNum, "iowait") - ch <- prometheus.MustNewConstMetric(c.cpu, prometheus.CounterValue, cpuStat.IRQ, cpuNum, "irq") - ch <- prometheus.MustNewConstMetric(c.cpu, prometheus.CounterValue, cpuStat.SoftIRQ, cpuNum, "softirq") - ch <- prometheus.MustNewConstMetric(c.cpu, prometheus.CounterValue, cpuStat.Steal, cpuNum, "steal") + isIsolated := "0" + if contains(c.isolatedCpus, uint16(cpuID)) { + isIsolated = "1" + } + ch <- prometheus.MustNewConstMetric(c.cpu, prometheus.CounterValue, cpuStat.User, cpuNum, "user", isIsolated) + ch <- prometheus.MustNewConstMetric(c.cpu, prometheus.CounterValue, cpuStat.Nice, cpuNum, "nice", isIsolated) + ch <- prometheus.MustNewConstMetric(c.cpu, prometheus.CounterValue, cpuStat.System, cpuNum, "system", isIsolated) + ch <- prometheus.MustNewConstMetric(c.cpu, prometheus.CounterValue, cpuStat.Idle, cpuNum, "idle", isIsolated) + ch <- prometheus.MustNewConstMetric(c.cpu, prometheus.CounterValue, cpuStat.Iowait, cpuNum, "iowait", isIsolated) + ch <- prometheus.MustNewConstMetric(c.cpu, prometheus.CounterValue, cpuStat.IRQ, cpuNum, "irq", isIsolated) + ch <- prometheus.MustNewConstMetric(c.cpu, prometheus.CounterValue, cpuStat.SoftIRQ, cpuNum, "softirq", isIsolated) + ch <- prometheus.MustNewConstMetric(c.cpu, prometheus.CounterValue, cpuStat.Steal, cpuNum, "steal", isIsolated) if *enableCPUGuest { // Guest CPU is also accounted for in cpuStat.User and cpuStat.Nice, expose these as separate metrics. - ch <- prometheus.MustNewConstMetric(c.cpuGuest, prometheus.CounterValue, cpuStat.Guest, cpuNum, "user") - ch <- prometheus.MustNewConstMetric(c.cpuGuest, prometheus.CounterValue, cpuStat.GuestNice, cpuNum, "nice") + ch <- prometheus.MustNewConstMetric(c.cpuGuest, prometheus.CounterValue, cpuStat.Guest, cpuNum, "user", isIsolated) + ch <- prometheus.MustNewConstMetric(c.cpuGuest, prometheus.CounterValue, cpuStat.GuestNice, cpuNum, "nice", isIsolated) } } diff --git a/collector/cpu_linux_test.go b/collector/cpu_linux_test.go index 93b493b2e9..7bcd5e4917 100644 --- a/collector/cpu_linux_test.go +++ b/collector/cpu_linux_test.go @@ -104,3 +104,31 @@ func TestCPU(t *testing.T) { t.Fatalf("should have %v CPU Stat: got %v", resetIdle, got) } } +func TestIsolatedParsingCPU(t *testing.T) { + var testParams = []struct { + in []byte + res []uint16 + err error + }{ + {[]byte(""), []uint16{}, nil}, + {[]byte("1\n"), []uint16{1}, nil}, + {[]byte("1"), []uint16{1}, nil}, + {[]byte("1,2"), []uint16{1, 2}, nil}, + {[]byte("1-2"), []uint16{1, 2}, nil}, + {[]byte("1-3"), []uint16{1, 2, 3}, nil}, + {[]byte("1,2-4"), []uint16{1, 2, 3, 4}, nil}, + {[]byte("1,3-4"), []uint16{1, 3, 4}, nil}, + {[]byte("1,3-4,7,20-21"), []uint16{1, 3, 4, 7, 20, 21}, nil}, + } + for _, params := range testParams { + t.Run("blabla", func(t *testing.T) { + res, err := parseIsolCpus(params.in) + if !reflect.DeepEqual(res, params.res) { + t.Fatalf("should have %v result: got %v", params.res, res) + } + if err != params.err { + t.Fatalf("should have %v error: got %v", params.err, err) + } + }) + } +} From 5d68d5b9ad4d4d953e08b8fdde1e4f8549058f8f Mon Sep 17 00:00:00 2001 From: david Date: Wed, 15 Dec 2021 14:35:19 +0100 Subject: [PATCH 02/10] move logic to procfs; create a new metric for isolation Signed-off-by: david --- collector/cpu_linux.go | 123 ++++++++++++++++-------------------- collector/cpu_linux_test.go | 28 -------- 2 files changed, 54 insertions(+), 97 deletions(-) diff --git a/collector/cpu_linux.go b/collector/cpu_linux.go index eec6c4ebe7..63c3768f23 100644 --- a/collector/cpu_linux.go +++ b/collector/cpu_linux.go @@ -18,17 +18,16 @@ package collector import ( "fmt" - "io/ioutil" "path/filepath" "regexp" "strconv" - "strings" "sync" "github.com/go-kit/log" "github.com/go-kit/log/level" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/procfs" + "github.com/prometheus/procfs/sysfs" "gopkg.in/alecthomas/kingpin.v2" ) @@ -41,6 +40,7 @@ type cpuCollector struct { cpuGuest *prometheus.Desc cpuCoreThrottle *prometheus.Desc cpuPackageThrottle *prometheus.Desc + cpuIsolated *prometheus.Desc logger log.Logger cpuStats []procfs.CPUStat cpuStatsMutex sync.Mutex @@ -65,50 +65,6 @@ func init() { registerCollector("cpu", defaultEnabled, NewCPUCollector) } -func parseIsolCpus(data []byte) ([]uint16, error) { - isolcpus_str := strings.TrimRight(string(data), "\n") - - var isolcpus_int = []uint16{} - - for _, cpu := range strings.Split(isolcpus_str, ",") { - if cpu == "" { - continue - } - if strings.Contains(cpu, "-") { - ranges := strings.Split(cpu, "-") - startRange, err := strconv.Atoi(ranges[0]) - if err != nil { - return nil, err - } - endRange, err := strconv.Atoi(ranges[1]) - if err != nil { - return nil, err - } - - for i := startRange; i <= endRange; i++ { - isolcpus_int = append(isolcpus_int, uint16(i)) - } - continue - } - - _cpu, err := strconv.Atoi(cpu) - if err != nil { - return nil, err - } - isolcpus_int = append(isolcpus_int, uint16(_cpu)) - } - return isolcpus_int, nil -} - -func readIsolCpus() ([]uint16, error) { - isolcpus, err := ioutil.ReadFile(sysFilePath("devices/system/cpu/isolated")) - if err != nil { - return nil, fmt.Errorf("failed to read isolcpus from sysfs: %w", err) - } - - return parseIsolCpus(isolcpus) -} - // NewCPUCollector returns a new Collector exposing kernel/system statistics. func NewCPUCollector(logger log.Logger) (Collector, error) { fs, err := procfs.NewFS(*procPath) @@ -116,18 +72,19 @@ func NewCPUCollector(logger log.Logger) (Collector, error) { return nil, fmt.Errorf("failed to open procfs: %w", err) } - isolcpus, err := readIsolCpus() + sysfs, err := sysfs.NewFS(*sysPath) if err != nil { - return nil, fmt.Errorf("failed to open procfs: %w", err) + return nil, fmt.Errorf("failed to open sysfs: %w", err) + } + + isolcpus, err := sysfs.IsolatedCPUs() + if err != nil { + return nil, fmt.Errorf("failed to read isolcpus from sysfs: %w", err) } c := &cpuCollector{ - fs: fs, - cpu: prometheus.NewDesc( - prometheus.BuildFQName(namespace, cpuCollectorSubsystem, "seconds_total"), - "Seconds the CPUs spent in each mode.", - []string{"cpu", "mode", "isolated"}, nil, - ), + fs: fs, + cpu: nodeCPUSecondsDesc, cpuInfo: prometheus.NewDesc( prometheus.BuildFQName(namespace, cpuCollectorSubsystem, "info"), "CPU information from /proc/cpuinfo.", @@ -146,7 +103,7 @@ func NewCPUCollector(logger log.Logger) (Collector, error) { cpuGuest: prometheus.NewDesc( prometheus.BuildFQName(namespace, cpuCollectorSubsystem, "guest_seconds_total"), "Seconds the CPUs spent in guests (VMs) for each mode.", - []string{"cpu", "mode", "isolated"}, nil, + []string{"cpu", "mode"}, nil, ), cpuCoreThrottle: prometheus.NewDesc( prometheus.BuildFQName(namespace, cpuCollectorSubsystem, "core_throttles_total"), @@ -158,6 +115,11 @@ func NewCPUCollector(logger log.Logger) (Collector, error) { "Number of times this CPU package has been throttled.", []string{"package"}, nil, ), + cpuIsolated: prometheus.NewDesc( + prometheus.BuildFQName(namespace, cpuCollectorSubsystem, "isolated"), + "Whether each core is isolated, information from /sys/devices/system/cpu/isolated.", + []string{"cpu"}, nil, + ), logger: logger, isolatedCpus: isolcpus, } @@ -200,6 +162,9 @@ func (c *cpuCollector) Update(ch chan<- prometheus.Metric) error { if err := c.updateStat(ch); err != nil { return err } + if err := c.updateIsolated(ch); err != nil { + return err + } return c.updateThermalThrottle(ch) } @@ -344,7 +309,7 @@ func contains(s []uint16, e uint16) bool { } // updateStat reads /proc/stat through procfs and exports CPU-related metrics. -func (c *cpuCollector) updateStat(ch chan<- prometheus.Metric) error { +func (c *cpuCollector) updateIsolated(ch chan<- prometheus.Metric) error { stats, err := c.fs.Stat() if err != nil { return err @@ -355,25 +320,45 @@ func (c *cpuCollector) updateStat(ch chan<- prometheus.Metric) error { // Acquire a lock to read the stats. c.cpuStatsMutex.Lock() defer c.cpuStatsMutex.Unlock() - for cpuID, cpuStat := range c.cpuStats { + for cpuID, _ := range c.cpuStats { cpuNum := strconv.Itoa(cpuID) - isIsolated := "0" + isIsolated := 0.0 if contains(c.isolatedCpus, uint16(cpuID)) { - isIsolated = "1" + isIsolated = 1.0 } - ch <- prometheus.MustNewConstMetric(c.cpu, prometheus.CounterValue, cpuStat.User, cpuNum, "user", isIsolated) - ch <- prometheus.MustNewConstMetric(c.cpu, prometheus.CounterValue, cpuStat.Nice, cpuNum, "nice", isIsolated) - ch <- prometheus.MustNewConstMetric(c.cpu, prometheus.CounterValue, cpuStat.System, cpuNum, "system", isIsolated) - ch <- prometheus.MustNewConstMetric(c.cpu, prometheus.CounterValue, cpuStat.Idle, cpuNum, "idle", isIsolated) - ch <- prometheus.MustNewConstMetric(c.cpu, prometheus.CounterValue, cpuStat.Iowait, cpuNum, "iowait", isIsolated) - ch <- prometheus.MustNewConstMetric(c.cpu, prometheus.CounterValue, cpuStat.IRQ, cpuNum, "irq", isIsolated) - ch <- prometheus.MustNewConstMetric(c.cpu, prometheus.CounterValue, cpuStat.SoftIRQ, cpuNum, "softirq", isIsolated) - ch <- prometheus.MustNewConstMetric(c.cpu, prometheus.CounterValue, cpuStat.Steal, cpuNum, "steal", isIsolated) + ch <- prometheus.MustNewConstMetric(c.cpuIsolated, prometheus.GaugeValue, isIsolated, cpuNum) + } + + return nil +} + +// updateStat reads /proc/stat through procfs and exports CPU-related metrics. +func (c *cpuCollector) updateStat(ch chan<- prometheus.Metric) error { + stats, err := c.fs.Stat() + if err != nil { + return err + } + + c.updateCPUStats(stats.CPU) + + // Acquire a lock to read the stats. + c.cpuStatsMutex.Lock() + defer c.cpuStatsMutex.Unlock() + for cpuID, cpuStat := range c.cpuStats { + cpuNum := strconv.Itoa(cpuID) + ch <- prometheus.MustNewConstMetric(c.cpu, prometheus.CounterValue, cpuStat.User, cpuNum, "user") + ch <- prometheus.MustNewConstMetric(c.cpu, prometheus.CounterValue, cpuStat.Nice, cpuNum, "nice") + ch <- prometheus.MustNewConstMetric(c.cpu, prometheus.CounterValue, cpuStat.System, cpuNum, "system") + ch <- prometheus.MustNewConstMetric(c.cpu, prometheus.CounterValue, cpuStat.Idle, cpuNum, "idle") + ch <- prometheus.MustNewConstMetric(c.cpu, prometheus.CounterValue, cpuStat.Iowait, cpuNum, "iowait") + ch <- prometheus.MustNewConstMetric(c.cpu, prometheus.CounterValue, cpuStat.IRQ, cpuNum, "irq") + ch <- prometheus.MustNewConstMetric(c.cpu, prometheus.CounterValue, cpuStat.SoftIRQ, cpuNum, "softirq") + ch <- prometheus.MustNewConstMetric(c.cpu, prometheus.CounterValue, cpuStat.Steal, cpuNum, "steal") if *enableCPUGuest { // Guest CPU is also accounted for in cpuStat.User and cpuStat.Nice, expose these as separate metrics. - ch <- prometheus.MustNewConstMetric(c.cpuGuest, prometheus.CounterValue, cpuStat.Guest, cpuNum, "user", isIsolated) - ch <- prometheus.MustNewConstMetric(c.cpuGuest, prometheus.CounterValue, cpuStat.GuestNice, cpuNum, "nice", isIsolated) + ch <- prometheus.MustNewConstMetric(c.cpuGuest, prometheus.CounterValue, cpuStat.Guest, cpuNum, "user") + ch <- prometheus.MustNewConstMetric(c.cpuGuest, prometheus.CounterValue, cpuStat.GuestNice, cpuNum, "nice") } } diff --git a/collector/cpu_linux_test.go b/collector/cpu_linux_test.go index 7bcd5e4917..93b493b2e9 100644 --- a/collector/cpu_linux_test.go +++ b/collector/cpu_linux_test.go @@ -104,31 +104,3 @@ func TestCPU(t *testing.T) { t.Fatalf("should have %v CPU Stat: got %v", resetIdle, got) } } -func TestIsolatedParsingCPU(t *testing.T) { - var testParams = []struct { - in []byte - res []uint16 - err error - }{ - {[]byte(""), []uint16{}, nil}, - {[]byte("1\n"), []uint16{1}, nil}, - {[]byte("1"), []uint16{1}, nil}, - {[]byte("1,2"), []uint16{1, 2}, nil}, - {[]byte("1-2"), []uint16{1, 2}, nil}, - {[]byte("1-3"), []uint16{1, 2, 3}, nil}, - {[]byte("1,2-4"), []uint16{1, 2, 3, 4}, nil}, - {[]byte("1,3-4"), []uint16{1, 3, 4}, nil}, - {[]byte("1,3-4,7,20-21"), []uint16{1, 3, 4, 7, 20, 21}, nil}, - } - for _, params := range testParams { - t.Run("blabla", func(t *testing.T) { - res, err := parseIsolCpus(params.in) - if !reflect.DeepEqual(res, params.res) { - t.Fatalf("should have %v result: got %v", params.res, res) - } - if err != params.err { - t.Fatalf("should have %v error: got %v", params.err, err) - } - }) - } -} From 698670bb6eabccc4ee6da426a391ed158166650d Mon Sep 17 00:00:00 2001 From: david Date: Thu, 16 Dec 2021 10:59:26 +0100 Subject: [PATCH 03/10] add fixture & e2e output Signed-off-by: david --- collector/fixtures/e2e-output.txt | 10 ++++++++++ collector/fixtures/sys.ttar | 5 +++++ 2 files changed, 15 insertions(+) diff --git a/collector/fixtures/e2e-output.txt b/collector/fixtures/e2e-output.txt index 97c899acb2..d377115fd8 100644 --- a/collector/fixtures/e2e-output.txt +++ b/collector/fixtures/e2e-output.txt @@ -319,6 +319,16 @@ node_cpu_info{cachesize="8192 KB",core="2",cpu="2",family="6",microcode="0xb4",m node_cpu_info{cachesize="8192 KB",core="2",cpu="6",family="6",microcode="0xb4",model="142",model_name="Intel(R) Core(TM) i7-8650U CPU @ 1.90GHz",package="0",stepping="10",vendor="GenuineIntel"} 1 node_cpu_info{cachesize="8192 KB",core="3",cpu="3",family="6",microcode="0xb4",model="142",model_name="Intel(R) Core(TM) i7-8650U CPU @ 1.90GHz",package="0",stepping="10",vendor="GenuineIntel"} 1 node_cpu_info{cachesize="8192 KB",core="3",cpu="7",family="6",microcode="0xb4",model="142",model_name="Intel(R) Core(TM) i7-8650U CPU @ 1.90GHz",package="0",stepping="10",vendor="GenuineIntel"} 1 +# HELP node_cpu_isolated Whether each core is isolated, information from /sys/devices/system/cpu/isolated. +# TYPE node_cpu_isolated gauge +node_cpu_isolated{cpu="0"} 0 +node_cpu_isolated{cpu="1"} 1 +node_cpu_isolated{cpu="2"} 0 +node_cpu_isolated{cpu="3"} 1 +node_cpu_isolated{cpu="4"} 1 +node_cpu_isolated{cpu="5"} 1 +node_cpu_isolated{cpu="6"} 0 +node_cpu_isolated{cpu="7"} 0 # HELP node_cpu_package_throttles_total Number of times this CPU package has been throttled. # TYPE node_cpu_package_throttles_total counter node_cpu_package_throttles_total{package="0"} 30 diff --git a/collector/fixtures/sys.ttar b/collector/fixtures/sys.ttar index 72a97b4133..b837f3dd47 100644 --- a/collector/fixtures/sys.ttar +++ b/collector/fixtures/sys.ttar @@ -3540,6 +3540,11 @@ Lines: 1 1 Mode: 644 # ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +Path: sys/devices/system/cpu/isolated +Lines: 1 +1,3-5,9 +Mode: 664 +# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Directory: sys/devices/system/edac Mode: 755 # ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - From 9ea9a5f029c62d49a4d2e31d7f516ace67d73760 Mon Sep 17 00:00:00 2001 From: david Date: Thu, 16 Dec 2021 13:20:20 +0100 Subject: [PATCH 04/10] only publish metrics for isolated cpus Signed-off-by: david --- collector/cpu_linux.go | 29 +++-------------------------- collector/fixtures/e2e-output.txt | 5 +---- 2 files changed, 4 insertions(+), 30 deletions(-) diff --git a/collector/cpu_linux.go b/collector/cpu_linux.go index 63c3768f23..017ffec6df 100644 --- a/collector/cpu_linux.go +++ b/collector/cpu_linux.go @@ -299,34 +299,11 @@ func (c *cpuCollector) updateThermalThrottle(ch chan<- prometheus.Metric) error return nil } -func contains(s []uint16, e uint16) bool { - for _, a := range s { - if a == e { - return true - } - } - return false -} - // updateStat reads /proc/stat through procfs and exports CPU-related metrics. func (c *cpuCollector) updateIsolated(ch chan<- prometheus.Metric) error { - stats, err := c.fs.Stat() - if err != nil { - return err - } - - c.updateCPUStats(stats.CPU) - - // Acquire a lock to read the stats. - c.cpuStatsMutex.Lock() - defer c.cpuStatsMutex.Unlock() - for cpuID, _ := range c.cpuStats { - cpuNum := strconv.Itoa(cpuID) - isIsolated := 0.0 - if contains(c.isolatedCpus, uint16(cpuID)) { - isIsolated = 1.0 - } - ch <- prometheus.MustNewConstMetric(c.cpuIsolated, prometheus.GaugeValue, isIsolated, cpuNum) + for _, cpu := range c.isolatedCpus { + cpuNum := strconv.Itoa(int(cpu)) + ch <- prometheus.MustNewConstMetric(c.cpuIsolated, prometheus.GaugeValue, 1.0, cpuNum) } return nil diff --git a/collector/fixtures/e2e-output.txt b/collector/fixtures/e2e-output.txt index d377115fd8..8a98a16155 100644 --- a/collector/fixtures/e2e-output.txt +++ b/collector/fixtures/e2e-output.txt @@ -321,14 +321,11 @@ node_cpu_info{cachesize="8192 KB",core="3",cpu="3",family="6",microcode="0xb4",m node_cpu_info{cachesize="8192 KB",core="3",cpu="7",family="6",microcode="0xb4",model="142",model_name="Intel(R) Core(TM) i7-8650U CPU @ 1.90GHz",package="0",stepping="10",vendor="GenuineIntel"} 1 # HELP node_cpu_isolated Whether each core is isolated, information from /sys/devices/system/cpu/isolated. # TYPE node_cpu_isolated gauge -node_cpu_isolated{cpu="0"} 0 node_cpu_isolated{cpu="1"} 1 -node_cpu_isolated{cpu="2"} 0 node_cpu_isolated{cpu="3"} 1 node_cpu_isolated{cpu="4"} 1 node_cpu_isolated{cpu="5"} 1 -node_cpu_isolated{cpu="6"} 0 -node_cpu_isolated{cpu="7"} 0 +node_cpu_isolated{cpu="9"} 1 # HELP node_cpu_package_throttles_total Number of times this CPU package has been throttled. # TYPE node_cpu_package_throttles_total counter node_cpu_package_throttles_total{package="0"} 30 From c05af934af4796954268a109779efccc681e93ae Mon Sep 17 00:00:00 2001 From: david Date: Thu, 16 Dec 2021 14:00:54 +0100 Subject: [PATCH 05/10] warn if isolcpus cannot be read and default to an empty slice Signed-off-by: david --- collector/cpu_linux.go | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/collector/cpu_linux.go b/collector/cpu_linux.go index 017ffec6df..565d4c3088 100644 --- a/collector/cpu_linux.go +++ b/collector/cpu_linux.go @@ -79,7 +79,8 @@ func NewCPUCollector(logger log.Logger) (Collector, error) { isolcpus, err := sysfs.IsolatedCPUs() if err != nil { - return nil, fmt.Errorf("failed to read isolcpus from sysfs: %w", err) + level.Warn(logger).Log("msg", "Unable to get isolated cpus, defaulting to []") + isolcpus = []uint16{} } c := &cpuCollector{ From 5340d1ec373e6c47493901dc70a318719781673d Mon Sep 17 00:00:00 2001 From: david Date: Fri, 17 Dec 2021 21:31:53 +0100 Subject: [PATCH 06/10] add debug log for not existent file Signed-off-by: david --- collector/cpu_linux.go | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/collector/cpu_linux.go b/collector/cpu_linux.go index 565d4c3088..f680871da5 100644 --- a/collector/cpu_linux.go +++ b/collector/cpu_linux.go @@ -18,6 +18,7 @@ package collector import ( "fmt" + "os" "path/filepath" "regexp" "strconv" @@ -79,8 +80,11 @@ func NewCPUCollector(logger log.Logger) (Collector, error) { isolcpus, err := sysfs.IsolatedCPUs() if err != nil { - level.Warn(logger).Log("msg", "Unable to get isolated cpus, defaulting to []") - isolcpus = []uint16{} + if os.IsNotExist(err) { + level.Debug(logger).Log("msg", "Could not open isolated file", "error", err) + } else { + return nil, fmt.Errorf("Unable to get isolated cpus: %w", err) + } } c := &cpuCollector{ From 840d32622fbfc6b3a796a4b7d3e7b48f54da8c37 Mon Sep 17 00:00:00 2001 From: david Date: Fri, 17 Dec 2021 21:32:17 +0100 Subject: [PATCH 07/10] check for nil isolatedCpus before calling updateIsolated Signed-off-by: david --- collector/cpu_linux.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/collector/cpu_linux.go b/collector/cpu_linux.go index f680871da5..afc3a23c39 100644 --- a/collector/cpu_linux.go +++ b/collector/cpu_linux.go @@ -167,8 +167,8 @@ func (c *cpuCollector) Update(ch chan<- prometheus.Metric) error { if err := c.updateStat(ch); err != nil { return err } - if err := c.updateIsolated(ch); err != nil { - return err + if c.isolatedCpus != nil { + c.updateIsolated(ch) } return c.updateThermalThrottle(ch) } From 75c05f3d97827a55282452089a6f20c2279319a9 Mon Sep 17 00:00:00 2001 From: david Date: Fri, 17 Dec 2021 21:32:37 +0100 Subject: [PATCH 08/10] remove error from signature; update doc for function Signed-off-by: david --- collector/cpu_linux.go | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/collector/cpu_linux.go b/collector/cpu_linux.go index afc3a23c39..a8089c3169 100644 --- a/collector/cpu_linux.go +++ b/collector/cpu_linux.go @@ -304,14 +304,12 @@ func (c *cpuCollector) updateThermalThrottle(ch chan<- prometheus.Metric) error return nil } -// updateStat reads /proc/stat through procfs and exports CPU-related metrics. -func (c *cpuCollector) updateIsolated(ch chan<- prometheus.Metric) error { +// updateIsolated reads /sys/devices/system/cpu/isolated through sysfs and exports isolation level metrics. +func (c *cpuCollector) updateIsolated(ch chan<- prometheus.Metric) { for _, cpu := range c.isolatedCpus { cpuNum := strconv.Itoa(int(cpu)) ch <- prometheus.MustNewConstMetric(c.cpuIsolated, prometheus.GaugeValue, 1.0, cpuNum) } - - return nil } // updateStat reads /proc/stat through procfs and exports CPU-related metrics. From c2085cf8caf8bbac753539911aad7e53dcaacd16 Mon Sep 17 00:00:00 2001 From: david Date: Mon, 20 Dec 2021 18:28:12 +0100 Subject: [PATCH 09/10] flip branches for early return Signed-off-by: david --- collector/cpu_linux.go | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/collector/cpu_linux.go b/collector/cpu_linux.go index a8089c3169..933774bc3e 100644 --- a/collector/cpu_linux.go +++ b/collector/cpu_linux.go @@ -80,11 +80,10 @@ func NewCPUCollector(logger log.Logger) (Collector, error) { isolcpus, err := sysfs.IsolatedCPUs() if err != nil { - if os.IsNotExist(err) { - level.Debug(logger).Log("msg", "Could not open isolated file", "error", err) - } else { + if !os.IsNotExist(err) { return nil, fmt.Errorf("Unable to get isolated cpus: %w", err) } + level.Debug(logger).Log("msg", "Could not open isolated file", "error", err) } c := &cpuCollector{ From 6477a197da1a4c68ab7cef9468c66a21c078d266 Mon Sep 17 00:00:00 2001 From: DavidVentura Date: Tue, 26 Jul 2022 12:25:23 +0200 Subject: [PATCH 10/10] adjust expected output for 64k file Signed-off-by: DavidVentura --- collector/fixtures/e2e-64k-page-output.txt | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/collector/fixtures/e2e-64k-page-output.txt b/collector/fixtures/e2e-64k-page-output.txt index 7e0ef4c64a..fba44e551b 100644 --- a/collector/fixtures/e2e-64k-page-output.txt +++ b/collector/fixtures/e2e-64k-page-output.txt @@ -297,6 +297,13 @@ node_cpu_guest_seconds_total{cpu="6",mode="nice"} 0.07 node_cpu_guest_seconds_total{cpu="6",mode="user"} 0.08 node_cpu_guest_seconds_total{cpu="7",mode="nice"} 0.08 node_cpu_guest_seconds_total{cpu="7",mode="user"} 0.09 +# HELP node_cpu_isolated Whether each core is isolated, information from /sys/devices/system/cpu/isolated. +# TYPE node_cpu_isolated gauge +node_cpu_isolated{cpu="1"} 1 +node_cpu_isolated{cpu="3"} 1 +node_cpu_isolated{cpu="4"} 1 +node_cpu_isolated{cpu="5"} 1 +node_cpu_isolated{cpu="9"} 1 # HELP node_cpu_package_throttles_total Number of times this CPU package has been throttled. # TYPE node_cpu_package_throttles_total counter node_cpu_package_throttles_total{package="0"} 30