diff --git a/cadvisor.go b/cadvisor.go index f35af67fff..ff84b4d516 100644 --- a/cadvisor.go +++ b/cadvisor.go @@ -72,6 +72,7 @@ var ( container.NetworkUsageMetrics: struct{}{}, container.NetworkTcpUsageMetrics: struct{}{}, container.NetworkUdpUsageMetrics: struct{}{}, + container.PerCpuUsageMetrics: struct{}{}, } ) @@ -103,7 +104,7 @@ func (ml *metricSetValue) Set(value string) error { } func init() { - flag.Var(&ignoreMetrics, "disable_metrics", "comma-separated list of `metrics` to be disabled. Options are 'disk', 'network', 'tcp', 'udp'. Note: tcp and udp are disabled by default due to high CPU usage.") + flag.Var(&ignoreMetrics, "disable_metrics", "comma-separated list of `metrics` to be disabled. Options are 'disk', 'network', 'tcp', 'udp', 'percpu'. Note: tcp and udp are disabled by default due to high CPU usage.") // Default logging verbosity to V(2) flag.Set("v", "2") diff --git a/container/factory.go b/container/factory.go index befb4a9e63..07445f9dd7 100644 --- a/container/factory.go +++ b/container/factory.go @@ -42,6 +42,7 @@ type MetricKind string const ( CpuUsageMetrics MetricKind = "cpu" + PerCpuUsageMetrics MetricKind = "percpu" MemoryUsageMetrics MetricKind = "memory" CpuLoadMetrics MetricKind = "cpuLoad" DiskIOMetrics MetricKind = "diskIO" diff --git a/container/libcontainer/helpers.go b/container/libcontainer/helpers.go index c2194ac34a..2f9270758b 100644 --- a/container/libcontainer/helpers.go +++ b/container/libcontainer/helpers.go @@ -113,7 +113,8 @@ func GetStats(cgroupManager cgroups.Manager, rootFs string, pid int, ignoreMetri libcontainerStats := &libcontainer.Stats{ CgroupStats: cgroupStats, } - stats := newContainerStats(libcontainerStats) + withPerCPU := !ignoreMetrics.Has(container.PerCpuUsageMetrics) + stats := newContainerStats(libcontainerStats, withPerCPU) // If we know the pid then get network stats from /proc//net/dev if pid == 0 { @@ -467,14 +468,17 @@ func minUint32(x, y uint32) uint32 { var numCpusFunc = getNumberOnlineCPUs // Convert libcontainer stats to info.ContainerStats. -func setCpuStats(s *cgroups.Stats, ret *info.ContainerStats) { +func setCpuStats(s *cgroups.Stats, ret *info.ContainerStats, withPerCPU bool) { ret.Cpu.Usage.User = s.CpuStats.CpuUsage.UsageInUsermode ret.Cpu.Usage.System = s.CpuStats.CpuUsage.UsageInKernelmode - ret.Cpu.Usage.Total = 0 + ret.Cpu.Usage.Total = s.CpuStats.CpuUsage.TotalUsage ret.Cpu.CFS.Periods = s.CpuStats.ThrottlingData.Periods ret.Cpu.CFS.ThrottledPeriods = s.CpuStats.ThrottlingData.ThrottledPeriods ret.Cpu.CFS.ThrottledTime = s.CpuStats.ThrottlingData.ThrottledTime + if !withPerCPU { + return + } if len(s.CpuStats.CpuUsage.PercpuUsage) == 0 { // libcontainer's 'GetStats' can leave 'PercpuUsage' nil if it skipped the // cpuacct subsystem. @@ -501,7 +505,6 @@ func setCpuStats(s *cgroups.Stats, ret *info.ContainerStats) { for i := uint32(0); i < numActual; i++ { ret.Cpu.Usage.PerCpu[i] = s.CpuStats.CpuUsage.PercpuUsage[i] - ret.Cpu.Usage.Total += s.CpuStats.CpuUsage.PercpuUsage[i] } } @@ -587,13 +590,13 @@ func setNetworkStats(libcontainerStats *libcontainer.Stats, ret *info.ContainerS } } -func newContainerStats(libcontainerStats *libcontainer.Stats) *info.ContainerStats { +func newContainerStats(libcontainerStats *libcontainer.Stats, withPerCPU bool) *info.ContainerStats { ret := &info.ContainerStats{ Timestamp: time.Now(), } if s := libcontainerStats.CgroupStats; s != nil { - setCpuStats(s, ret) + setCpuStats(s, ret, withPerCPU) setDiskIoStats(s, ret) setMemoryStats(s, ret) } diff --git a/container/libcontainer/helpers_test.go b/container/libcontainer/helpers_test.go index ffde8e2bd6..b2d15a502f 100644 --- a/container/libcontainer/helpers_test.go +++ b/container/libcontainer/helpers_test.go @@ -122,7 +122,7 @@ func TestMorePossibleCPUs(t *testing.T) { }, } var ret info.ContainerStats - setCpuStats(s, &ret) + setCpuStats(s, &ret, true) expected := info.ContainerStats{ Cpu: info.CpuStats{ @@ -130,7 +130,7 @@ func TestMorePossibleCPUs(t *testing.T) { PerCpu: perCpuUsage[0:realNumCPUs], User: s.CpuStats.CpuUsage.UsageInUsermode, System: s.CpuStats.CpuUsage.UsageInKernelmode, - Total: 8562955455524 * uint64(realNumCPUs), + Total: 33802947350272, }, }, } diff --git a/metrics/prometheus.go b/metrics/prometheus.go index 2dd7747b83..4354071b81 100644 --- a/metrics/prometheus.go +++ b/metrics/prometheus.go @@ -150,10 +150,18 @@ func NewPrometheusCollector(i infoProvider, f ContainerLabelsFunc) *PrometheusCo }, }, { name: "container_cpu_usage_seconds_total", - help: "Cumulative cpu time consumed per cpu in seconds.", + help: "Cumulative cpu time consumed in seconds.", valueType: prometheus.CounterValue, extraLabels: []string{"cpu"}, getValues: func(s *info.ContainerStats) metricValues { + if len(s.Cpu.Usage.PerCpu) == 0 { + if s.Cpu.Usage.Total > 0 { + return metricValues{{ + value: float64(s.Cpu.Usage.Total) / float64(time.Second), + labels: []string{"total"}, + }} + } + } values := make(metricValues, 0, len(s.Cpu.Usage.PerCpu)) for i, value := range s.Cpu.Usage.PerCpu { if value > 0 { diff --git a/metrics/testdata/prometheus_metrics b/metrics/testdata/prometheus_metrics index 5b8d0ac9bd..26438abd86 100644 --- a/metrics/testdata/prometheus_metrics +++ b/metrics/testdata/prometheus_metrics @@ -28,7 +28,7 @@ container_cpu_load_average_10s{container_env_foo_env="prod",container_label_foo_ # HELP container_cpu_system_seconds_total Cumulative system cpu time consumed in seconds. # TYPE container_cpu_system_seconds_total counter container_cpu_system_seconds_total{container_env_foo_env="prod",container_label_foo_label="bar",id="testcontainer",image="test",name="testcontaineralias",zone_name="hello"} 7e-09 -# HELP container_cpu_usage_seconds_total Cumulative cpu time consumed per cpu in seconds. +# HELP container_cpu_usage_seconds_total Cumulative cpu time consumed in seconds. # TYPE container_cpu_usage_seconds_total counter container_cpu_usage_seconds_total{container_env_foo_env="prod",container_label_foo_label="bar",cpu="cpu00",id="testcontainer",image="test",name="testcontaineralias",zone_name="hello"} 2e-09 container_cpu_usage_seconds_total{container_env_foo_env="prod",container_label_foo_label="bar",cpu="cpu01",id="testcontainer",image="test",name="testcontaineralias",zone_name="hello"} 3e-09