From 3688650a6b39df50917437a1ed8661dbf5e52309 Mon Sep 17 00:00:00 2001 From: Lantao Liu Date: Fri, 23 Feb 2018 22:20:38 +0000 Subject: [PATCH] Add CPU/Memory pod stats for CRI stats. Signed-off-by: Lantao Liu --- pkg/kubelet/stats/BUILD | 1 + pkg/kubelet/stats/cadvisor_stats_provider.go | 6 +-- pkg/kubelet/stats/cri_stats_provider.go | 48 ++++++++++++-------- pkg/kubelet/stats/cri_stats_provider_test.go | 22 +++++++++ 4 files changed, 55 insertions(+), 22 deletions(-) diff --git a/pkg/kubelet/stats/BUILD b/pkg/kubelet/stats/BUILD index 1a991f578246e..8bbca05b29ebb 100644 --- a/pkg/kubelet/stats/BUILD +++ b/pkg/kubelet/stats/BUILD @@ -99,6 +99,7 @@ go_test( "//pkg/kubelet/apis/cri/testing:go_default_library", "//pkg/kubelet/apis/stats/v1alpha1:go_default_library", "//pkg/kubelet/cadvisor/testing:go_default_library", + "//pkg/kubelet/cm:go_default_library", "//pkg/kubelet/container:go_default_library", "//pkg/kubelet/container/testing:go_default_library", "//pkg/kubelet/kuberuntime:go_default_library", diff --git a/pkg/kubelet/stats/cadvisor_stats_provider.go b/pkg/kubelet/stats/cadvisor_stats_provider.go index 891a38f5915ba..67da08d408d4c 100644 --- a/pkg/kubelet/stats/cadvisor_stats_provider.go +++ b/pkg/kubelet/stats/cadvisor_stats_provider.go @@ -133,7 +133,7 @@ func (p *cadvisorStatsProvider) ListPodStats() ([]statsapi.PodStats, error) { } podStats.EphemeralStorage = calcEphemeralStorage(podStats.Containers, ephemeralStats, &rootFsInfo) // Lookup the pod-level cgroup's CPU and memory stats - podInfo := getcadvisorPodInfoFromPodUID(podUID, allInfos) + podInfo := getCadvisorPodInfoFromPodUID(podUID, allInfos) if podInfo != nil { cpu, memory := cadvisorInfoToCPUandMemoryStats(podInfo) podStats.CPU = cpu @@ -251,8 +251,8 @@ func isPodManagedContainer(cinfo *cadvisorapiv2.ContainerInfo) bool { return managed } -// getcadvisorPodInfoFromPodUID returns a pod cgroup information by matching the podUID with its CgroupName identifier base name -func getcadvisorPodInfoFromPodUID(podUID types.UID, infos map[string]cadvisorapiv2.ContainerInfo) *cadvisorapiv2.ContainerInfo { +// getCadvisorPodInfoFromPodUID returns a pod cgroup information by matching the podUID with its CgroupName identifier base name +func getCadvisorPodInfoFromPodUID(podUID types.UID, infos map[string]cadvisorapiv2.ContainerInfo) *cadvisorapiv2.ContainerInfo { for key, info := range infos { if cm.IsSystemdStyleName(key) { key = cm.RevertFromSystemdToCgroupStyleName(key) diff --git a/pkg/kubelet/stats/cri_stats_provider.go b/pkg/kubelet/stats/cri_stats_provider.go index 64955edb47055..1ed2e300efd09 100644 --- a/pkg/kubelet/stats/cri_stats_provider.go +++ b/pkg/kubelet/stats/cri_stats_provider.go @@ -119,23 +119,22 @@ func (p *criStatsProvider) ListPodStats() ([]statsapi.PodStats, error) { containerMap[c.Id] = c } - caInfos, err := getCRICadvisorStats(p.cadvisor) + allInfos, err := getCadvisorContainerInfo(p.cadvisor) if err != nil { - return nil, fmt.Errorf("failed to get container info from cadvisor: %v", err) + return nil, fmt.Errorf("failed to fetch cadvisor stats: %v", err) } + caInfos := getCRICadvisorStats(allInfos) for _, stats := range resp { containerID := stats.Attributes.Id container, found := containerMap[containerID] if !found { - glog.Errorf("Unable to find container id %q in container stats list", containerID) continue } podSandboxID := container.PodSandboxId podSandbox, found := podSandboxMap[podSandboxID] if !found { - glog.Errorf("Unable to find pod sandbox id %q in pod stats list", podSandboxID) continue } @@ -145,12 +144,8 @@ func (p *criStatsProvider) ListPodStats() ([]statsapi.PodStats, error) { if !found { ps = buildPodStats(podSandbox) // Fill stats from cadvisor is available for full set of required pod stats - caPodSandbox, found := caInfos[podSandboxID] - if !found { - glog.V(4).Infof("Unable to find cadvisor stats for sandbox %q", podSandboxID) - } else { - p.addCadvisorPodStats(ps, &caPodSandbox) - } + p.addCadvisorPodNetworkStats(ps, podSandboxID, caInfos) + p.addCadvisorPodCPUMemoryStats(ps, types.UID(podSandbox.Metadata.Uid), allInfos) sandboxIDToPodStats[podSandboxID] = ps } cs := p.makeContainerStats(stats, container, &rootFsInfo, fsIDtoInfo, podSandbox.GetMetadata().GetUid()) @@ -269,11 +264,30 @@ func (p *criStatsProvider) makePodStorageStats(s *statsapi.PodStats, rootFsInfo return s } -func (p *criStatsProvider) addCadvisorPodStats( +func (p *criStatsProvider) addCadvisorPodNetworkStats( ps *statsapi.PodStats, - caPodSandbox *cadvisorapiv2.ContainerInfo, + podSandboxID string, + caInfos map[string]cadvisorapiv2.ContainerInfo, ) { - ps.Network = cadvisorInfoToNetworkStats(ps.PodRef.Name, caPodSandbox) + caPodSandbox, found := caInfos[podSandboxID] + if found { + ps.Network = cadvisorInfoToNetworkStats(ps.PodRef.Name, &caPodSandbox) + } else { + glog.V(4).Infof("Unable to find cadvisor stats for sandbox %q", podSandboxID) + } +} + +func (p *criStatsProvider) addCadvisorPodCPUMemoryStats( + ps *statsapi.PodStats, + podUID types.UID, + allInfos map[string]cadvisorapiv2.ContainerInfo, +) { + podCgroupInfo := getCadvisorPodInfoFromPodUID(podUID, allInfos) + if podCgroupInfo != nil { + cpu, memory := cadvisorInfoToCPUandMemoryStats(podCgroupInfo) + ps.CPU = cpu + ps.Memory = memory + } } func (p *criStatsProvider) makeContainerStats( @@ -395,12 +409,8 @@ func (p *criStatsProvider) addCadvisorContainerStats( } } -func getCRICadvisorStats(ca cadvisor.Interface) (map[string]cadvisorapiv2.ContainerInfo, error) { +func getCRICadvisorStats(infos map[string]cadvisorapiv2.ContainerInfo) map[string]cadvisorapiv2.ContainerInfo { stats := make(map[string]cadvisorapiv2.ContainerInfo) - infos, err := getCadvisorContainerInfo(ca) - if err != nil { - return nil, fmt.Errorf("failed to fetch cadvisor stats: %v", err) - } infos = removeTerminatedContainerInfo(infos) for key, info := range infos { // On systemd using devicemapper each mount into the container has an @@ -416,7 +426,7 @@ func getCRICadvisorStats(ca cadvisor.Interface) (map[string]cadvisorapiv2.Contai } stats[path.Base(key)] = info } - return stats, nil + return stats } // TODO Cache the metrics in container log manager diff --git a/pkg/kubelet/stats/cri_stats_provider_test.go b/pkg/kubelet/stats/cri_stats_provider_test.go index 1a790f5563128..3ce0b2378519e 100644 --- a/pkg/kubelet/stats/cri_stats_provider_test.go +++ b/pkg/kubelet/stats/cri_stats_provider_test.go @@ -31,6 +31,7 @@ import ( critest "k8s.io/kubernetes/pkg/kubelet/apis/cri/testing" statsapi "k8s.io/kubernetes/pkg/kubelet/apis/stats/v1alpha1" cadvisortest "k8s.io/kubernetes/pkg/kubelet/cadvisor/testing" + "k8s.io/kubernetes/pkg/kubelet/cm" kubecontainertest "k8s.io/kubernetes/pkg/kubelet/container/testing" "k8s.io/kubernetes/pkg/kubelet/kuberuntime" "k8s.io/kubernetes/pkg/kubelet/leaky" @@ -78,6 +79,7 @@ func TestCRIListPodStats(t *testing.T) { rootFsInfo = getTestFsInfo(1000) sandbox0 = makeFakePodSandbox("sandbox0-name", "sandbox0-uid", "sandbox0-ns") + sandbox0Cgroup = "/" + cm.GetPodCgroupNameSuffix(types.UID(sandbox0.PodSandboxStatus.Metadata.Uid)) container0 = makeFakeContainer(sandbox0, cName0, 0, false) containerStats0 = makeFakeContainerStats(container0, imageFsMountpoint) containerLogStats0 = makeFakeLogStats(1000) @@ -86,11 +88,13 @@ func TestCRIListPodStats(t *testing.T) { containerLogStats1 = makeFakeLogStats(2000) sandbox1 = makeFakePodSandbox("sandbox1-name", "sandbox1-uid", "sandbox1-ns") + sandbox1Cgroup = "/" + cm.GetPodCgroupNameSuffix(types.UID(sandbox1.PodSandboxStatus.Metadata.Uid)) container2 = makeFakeContainer(sandbox1, cName2, 0, false) containerStats2 = makeFakeContainerStats(container2, imageFsMountpoint) containerLogStats2 = makeFakeLogStats(3000) sandbox2 = makeFakePodSandbox("sandbox2-name", "sandbox2-uid", "sandbox2-ns") + sandbox2Cgroup = "/" + cm.GetPodCgroupNameSuffix(types.UID(sandbox2.PodSandboxStatus.Metadata.Uid)) container3 = makeFakeContainer(sandbox2, cName3, 0, true) containerStats3 = makeFakeContainerStats(container3, imageFsMountpoint) container4 = makeFakeContainer(sandbox2, cName3, 1, false) @@ -112,11 +116,14 @@ func TestCRIListPodStats(t *testing.T) { "/kubelet": getTestContainerInfo(seedKubelet, "", "", ""), "/system": getTestContainerInfo(seedMisc, "", "", ""), sandbox0.PodSandboxStatus.Id: getTestContainerInfo(seedSandbox0, pName0, sandbox0.PodSandboxStatus.Metadata.Namespace, leaky.PodInfraContainerName), + sandbox0Cgroup: getTestContainerInfo(seedSandbox0, "", "", ""), container0.ContainerStatus.Id: getTestContainerInfo(seedContainer0, pName0, sandbox0.PodSandboxStatus.Metadata.Namespace, cName0), container1.ContainerStatus.Id: getTestContainerInfo(seedContainer1, pName0, sandbox0.PodSandboxStatus.Metadata.Namespace, cName1), sandbox1.PodSandboxStatus.Id: getTestContainerInfo(seedSandbox1, pName1, sandbox1.PodSandboxStatus.Metadata.Namespace, leaky.PodInfraContainerName), + sandbox1Cgroup: getTestContainerInfo(seedSandbox1, "", "", ""), container2.ContainerStatus.Id: getTestContainerInfo(seedContainer2, pName1, sandbox1.PodSandboxStatus.Metadata.Namespace, cName2), sandbox2.PodSandboxStatus.Id: getTestContainerInfo(seedSandbox2, pName2, sandbox2.PodSandboxStatus.Metadata.Namespace, leaky.PodInfraContainerName), + sandbox2Cgroup: getTestContainerInfo(seedSandbox2, "", "", ""), container4.ContainerStatus.Id: getTestContainerInfo(seedContainer3, pName2, sandbox2.PodSandboxStatus.Metadata.Namespace, cName3), } @@ -199,6 +206,7 @@ func TestCRIListPodStats(t *testing.T) { checkCRIRootfsStats(assert, c1, containerStats1, nil) checkCRILogsStats(assert, c1, &rootFsInfo, containerLogStats1) checkCRINetworkStats(assert, p0.Network, infos[sandbox0.PodSandboxStatus.Id].Stats[0].Network) + checkCRIPodCPUAndMemoryStats(assert, p0, infos[sandbox0Cgroup].Stats[0]) p1 := podStatsMap[statsapi.PodReference{Name: "sandbox1-name", UID: "sandbox1-uid", Namespace: "sandbox1-ns"}] assert.Equal(sandbox1.CreatedAt, p1.StartTime.UnixNano()) @@ -212,6 +220,7 @@ func TestCRIListPodStats(t *testing.T) { checkCRIRootfsStats(assert, c2, containerStats2, &imageFsInfo) checkCRILogsStats(assert, c2, &rootFsInfo, containerLogStats2) checkCRINetworkStats(assert, p1.Network, infos[sandbox1.PodSandboxStatus.Id].Stats[0].Network) + checkCRIPodCPUAndMemoryStats(assert, p1, infos[sandbox1Cgroup].Stats[0]) p2 := podStatsMap[statsapi.PodReference{Name: "sandbox2-name", UID: "sandbox2-uid", Namespace: "sandbox2-ns"}] assert.Equal(sandbox2.CreatedAt, p2.StartTime.UnixNano()) @@ -227,6 +236,7 @@ func TestCRIListPodStats(t *testing.T) { checkCRILogsStats(assert, c3, &rootFsInfo, containerLogStats4) checkCRINetworkStats(assert, p2.Network, infos[sandbox2.PodSandboxStatus.Id].Stats[0].Network) + checkCRIPodCPUAndMemoryStats(assert, p2, infos[sandbox2Cgroup].Stats[0]) mockCadvisor.AssertExpectations(t) } @@ -453,6 +463,18 @@ func checkCRINetworkStats(assert *assert.Assertions, actual *statsapi.NetworkSta assert.Equal(expected.Interfaces[0].TxErrors, *actual.TxErrors) } +func checkCRIPodCPUAndMemoryStats(assert *assert.Assertions, actual statsapi.PodStats, cs *cadvisorapiv2.ContainerStats) { + assert.Equal(cs.Timestamp.UnixNano(), actual.CPU.Time.UnixNano()) + assert.Equal(cs.Cpu.Usage.Total, *actual.CPU.UsageCoreNanoSeconds) + assert.Equal(cs.CpuInst.Usage.Total, *actual.CPU.UsageNanoCores) + + assert.Equal(cs.Memory.Usage, *actual.Memory.UsageBytes) + assert.Equal(cs.Memory.WorkingSet, *actual.Memory.WorkingSetBytes) + assert.Equal(cs.Memory.RSS, *actual.Memory.RSSBytes) + assert.Equal(cs.Memory.ContainerData.Pgfault, *actual.Memory.PageFaults) + assert.Equal(cs.Memory.ContainerData.Pgmajfault, *actual.Memory.MajorPageFaults) +} + func makeFakeLogStats(seed int) *volume.Metrics { m := &volume.Metrics{} m.Used = resource.NewQuantity(int64(seed+offsetUsage), resource.BinarySI)