diff --git a/internal/aws/containerinsight/const.go b/internal/aws/containerinsight/const.go index adaaa589321d..11e68f2f0706 100644 --- a/internal/aws/containerinsight/const.go +++ b/internal/aws/containerinsight/const.go @@ -120,6 +120,9 @@ const ( TypeContainer = "Container" TypeContainerFS = "ContainerFS" TypeContainerDiskIO = "ContainerDiskIO" + // Special type for pause container + // because containerd does not set container name pause container name to POD like docker does. + TypeInfraContainer = "InfraContainer" // unit UnitBytes = "Bytes" diff --git a/receiver/awscontainerinsightreceiver/README.md b/receiver/awscontainerinsightreceiver/README.md index e783a71340ad..ab90715383bb 100644 --- a/receiver/awscontainerinsightreceiver/README.md +++ b/receiver/awscontainerinsightreceiver/README.md @@ -272,6 +272,9 @@ spec: - name: varlibdocker mountPath: /var/lib/docker readOnly: true + - name: containerdsock + mountPath: /run/containerd/containerd.sock + readOnly: true - name: sys mountPath: /sys readOnly: true @@ -303,6 +306,9 @@ spec: - name: varlibdocker hostPath: path: /var/lib/docker + - name: containerdsock + hostPath: + path: /run/containerd/containerd.sock - name: sys hostPath: path: /sys diff --git a/receiver/awscontainerinsightreceiver/internal/cadvisor/container_info_processor.go b/receiver/awscontainerinsightreceiver/internal/cadvisor/container_info_processor.go index 0b7c9f3cdf8e..6bae8f75203d 100644 --- a/receiver/awscontainerinsightreceiver/internal/cadvisor/container_info_processor.go +++ b/receiver/awscontainerinsightreceiver/internal/cadvisor/container_info_processor.go @@ -117,7 +117,9 @@ func processContainer(info *cInfo.ContainerInfo, mInfo extractors.CPUMemInfoProv namespace := info.Spec.Labels[namespaceLabel] podName := info.Spec.Labels[podNameLabel] podID := info.Spec.Labels[podIDLabel] - if containerName == "" || namespace == "" || podName == "" { + // NOTE: containerName can be empty for pause container on containerd + // https://github.com/containerd/cri/issues/922#issuecomment-423729537 + if namespace == "" || podName == "" { logger.Debug("Container labels are missing", zap.String("containerName", containerName), zap.String("namespace", namespace), @@ -136,16 +138,23 @@ func processContainer(info *cInfo.ContainerInfo, mInfo extractors.CPUMemInfoProv tags[ci.PodIDKey] = podID tags[ci.K8sPodNameKey] = podName tags[ci.K8sNamespace] = namespace - if containerName != infraContainerName { + switch containerName { + // For docker, pause container name is set to POD while containerd does not set it. + // See https://github.com/aws/amazon-cloudwatch-agent/issues/188 + case "", infraContainerName: + // NOTE: the pod here is only used by NetMetricExtractor, + // other pod info like CPU, Mem are dealt within in processPod. + containerType = ci.TypeInfraContainer + default: tags[ci.ContainerNamekey] = containerName containerID := path.Base(info.Name) tags[ci.ContainerIDkey] = containerID pKey.containerIds = []string{containerID} containerType = ci.TypeContainer - } else { - // NOTE: the pod here is only used by NetMetricExtractor, - // other pod info like CPU, Mem are dealt within in processPod. - containerType = ci.TypePod + // TODO(pvasir): wait for upstream fix https://github.com/google/cadvisor/issues/2785 + if !info.Spec.HasFilesystem { + logger.Debug("D! containerd does not have container filesystem metrics from cadvisor, See https://github.com/google/cadvisor/issues/2785") + } } } else { containerType = ci.TypeNode diff --git a/receiver/awscontainerinsightreceiver/internal/cadvisor/extractors/cpu_extractor.go b/receiver/awscontainerinsightreceiver/internal/cadvisor/extractors/cpu_extractor.go index a6638f68e73c..034ed34a4280 100644 --- a/receiver/awscontainerinsightreceiver/internal/cadvisor/extractors/cpu_extractor.go +++ b/receiver/awscontainerinsightreceiver/internal/cadvisor/extractors/cpu_extractor.go @@ -37,13 +37,15 @@ func (c *CPUMetricExtractor) HasValue(info *cInfo.ContainerInfo) bool { func (c *CPUMetricExtractor) GetValue(info *cInfo.ContainerInfo, mInfo CPUMemInfoProvider, containerType string) []*CAdvisorMetric { var metrics []*CAdvisorMetric - if info.Spec.Labels[containerNameLable] == infraContainerName { + // Skip infra container and handle node, pod, other containers in pod + if containerType == ci.TypeInfraContainer { return metrics } // When there is more than one stats point, always use the last one curStats := GetStats(info) metric := newCadvisorMetric(containerType, c.logger) + metric.cgroupPath = info.Name multiplier := float64(decimalToMillicores) assignRateValueToField(&c.rateCalculator, metric.fields, ci.MetricName(containerType, ci.CPUTotal), info.Name, float64(curStats.Cpu.Usage.Total), curStats.Timestamp, multiplier) assignRateValueToField(&c.rateCalculator, metric.fields, ci.MetricName(containerType, ci.CPUUser), info.Name, float64(curStats.Cpu.Usage.User), curStats.Timestamp, multiplier) diff --git a/receiver/awscontainerinsightreceiver/internal/cadvisor/extractors/extractor.go b/receiver/awscontainerinsightreceiver/internal/cadvisor/extractors/extractor.go index b583373f812e..9cc9940dc735 100644 --- a/receiver/awscontainerinsightreceiver/internal/cadvisor/extractors/extractor.go +++ b/receiver/awscontainerinsightreceiver/internal/cadvisor/extractors/extractor.go @@ -26,12 +26,6 @@ import ( awsmetrics "github.com/open-telemetry/opentelemetry-collector-contrib/internal/aws/metrics" ) -const ( - containerNameLable = "io.kubernetes.container.name" - // TODO: https://github.com/containerd/cri/issues/922#issuecomment-423729537 the container name can be empty on containerd - infraContainerName = "POD" -) - func GetStats(info *cinfo.ContainerInfo) *cinfo.ContainerStats { if len(info.Stats) == 0 { return nil @@ -47,10 +41,12 @@ type CPUMemInfoProvider interface { type MetricExtractor interface { HasValue(*cinfo.ContainerInfo) bool - GetValue(*cinfo.ContainerInfo, CPUMemInfoProvider, string) []*CAdvisorMetric + GetValue(info *cinfo.ContainerInfo, mInfo CPUMemInfoProvider, containerType string) []*CAdvisorMetric } type CAdvisorMetric struct { + // source of the metric for debugging merge conflict + cgroupPath string //key/value pairs that are typed and contain the metric (numerical) data fields map[string]interface{} //key/value string pairs that are used to identify the metrics @@ -120,6 +116,7 @@ func (c *CAdvisorMetric) Merge(src *CAdvisorMetric) { for k, v := range src.fields { if _, ok := c.fields[k]; ok { c.logger.Debug(fmt.Sprintf("metric being merged has conflict in fields, src: %v, dest: %v \n", *src, *c)) + c.logger.Debug("metric being merged has conflict in fields", zap.String("src", src.cgroupPath), zap.String("dest", c.cgroupPath)) if c.tags[ci.Timestamp] < src.tags[ci.Timestamp] { continue } diff --git a/receiver/awscontainerinsightreceiver/internal/cadvisor/extractors/fs_extractor.go b/receiver/awscontainerinsightreceiver/internal/cadvisor/extractors/fs_extractor.go index c51d445a071d..e7153bca217d 100644 --- a/receiver/awscontainerinsightreceiver/internal/cadvisor/extractors/fs_extractor.go +++ b/receiver/awscontainerinsightreceiver/internal/cadvisor/extractors/fs_extractor.go @@ -39,7 +39,7 @@ func (f *FileSystemMetricExtractor) HasValue(info *cinfo.ContainerInfo) bool { func (f *FileSystemMetricExtractor) GetValue(info *cinfo.ContainerInfo, _ CPUMemInfoProvider, containerType string) []*CAdvisorMetric { var metrics []*CAdvisorMetric - if containerType == ci.TypePod || info.Spec.Labels[containerNameLable] == infraContainerName { + if containerType == ci.TypePod || containerType == ci.TypeInfraContainer { return metrics } @@ -71,6 +71,7 @@ func (f *FileSystemMetricExtractor) GetValue(info *cinfo.ContainerInfo, _ CPUMem metric.fields[ci.MetricName(containerType, ci.FSInodesfree)] = v.InodesFree } + metric.cgroupPath = info.Name metrics = append(metrics, metric) } return metrics diff --git a/receiver/awscontainerinsightreceiver/internal/cadvisor/extractors/mem_extractor.go b/receiver/awscontainerinsightreceiver/internal/cadvisor/extractors/mem_extractor.go index 53d68435f8e5..36e7f03b1c35 100644 --- a/receiver/awscontainerinsightreceiver/internal/cadvisor/extractors/mem_extractor.go +++ b/receiver/awscontainerinsightreceiver/internal/cadvisor/extractors/mem_extractor.go @@ -35,11 +35,12 @@ func (m *MemMetricExtractor) HasValue(info *cinfo.ContainerInfo) bool { func (m *MemMetricExtractor) GetValue(info *cinfo.ContainerInfo, mInfo CPUMemInfoProvider, containerType string) []*CAdvisorMetric { var metrics []*CAdvisorMetric - if info.Spec.Labels[containerNameLable] == infraContainerName { + if containerType == ci.TypeInfraContainer { return metrics } metric := newCadvisorMetric(containerType, m.logger) + metric.cgroupPath = info.Name curStats := GetStats(info) metric.fields[ci.MetricName(containerType, ci.MemUsage)] = curStats.Memory.Usage diff --git a/receiver/awscontainerinsightreceiver/internal/cadvisor/extractors/net_extractor.go b/receiver/awscontainerinsightreceiver/internal/cadvisor/extractors/net_extractor.go index 3ef70132ba9a..8338439c3018 100644 --- a/receiver/awscontainerinsightreceiver/internal/cadvisor/extractors/net_extractor.go +++ b/receiver/awscontainerinsightreceiver/internal/cadvisor/extractors/net_extractor.go @@ -46,10 +46,15 @@ func (n *NetMetricExtractor) GetValue(info *cinfo.ContainerInfo, _ CPUMemInfoPro var metrics []*CAdvisorMetric // Just a protection here, there is no Container level Net metrics - if (containerType == ci.TypePod && info.Spec.Labels[containerNameLable] != infraContainerName) || containerType == ci.TypeContainer { + if containerType == ci.TypePod || containerType == ci.TypeContainer { return metrics } + // Rename type to pod so the metric name prefix is pod_ + if containerType == ci.TypeInfraContainer { + containerType = ci.TypePod + } + curStats := GetStats(info) curIfceStats := getInterfacesStats(curStats) diff --git a/unreleased/detect-pod.yaml b/unreleased/detect-pod.yaml new file mode 100755 index 000000000000..59e196fbcd4c --- /dev/null +++ b/unreleased/detect-pod.yaml @@ -0,0 +1,16 @@ +# One of 'breaking', 'deprecation', 'new_component', 'enhancement', 'bug_fix' +change_type: enhancement + +# The name of the component, or a single word describing the area of concern, (e.g. filelogreceiver) +component: awscontainerinsightreceiver + +# A brief description of the change. Surround your text with quotes ("") if it needs to start with a backtick (`). +note: "Pod Detection changes to support Containerd runtime in K8s" + +# One or more tracking issues related to the change +issues: [ 12638 ] + +# (Optional) One or more lines of additional information to render under the primary note. +# These lines will be padded with 2 spaces and then inserted directly into the document. +# Use pipe (|) for multiline entries. +subtext: