Skip to content

Commit

Permalink
Overhaul all metrics
Browse files Browse the repository at this point in the history
- Fix names to comply with the [official
  guidelines](https://prometheus.io/docs/practices/naming/#metric-and-label-naming)
  and to better mirror the names of similar timeseries from the
  much-more-popular cAdvisor, when reasonable. And don't use the word
  "svc" to refer to tasks, as it is just not correct.
- Improve `help`s.
- Stop reporting per-CPU usage metrics. They're empirically only
  available in Fargate, but the current collector implementation assumes
  they're available everywhere. (They were previously available in EC2 but
  that stopped being the case when ecs-agent was upgraded to use cgroups
  v2.)  Given that it's not clear why per-CPU numbers are useful in
  general, remove them everywhere instead of exposing disjoint metrics for
  Fargate and EC2. This will also prevent Fargate from potentially
  spontaneously breaking in the same way EC2 did.
- Fix task-level memory limit to actually be in bytes (it previously
  said "bytes" but was in fact MiB).
- Correctly report container-level memory limits in all cases - the
  stats `limit` is nonsense if, as in Fargate, there is no container-level
  limit configured in the task definition. While the right data for all
  cases is hiding in the stats response somewhere, I have instead opted to
  cut out the stats middleman and use the task metadata directly to drive
  this metric. I think it's substantially less likely that ECS fails to
  effect the configured limits upon cgroups correctly than it is that we
  fail to interrogate cgroups output correctly: the latter empirically
  happens with some frequency :^).
- Add metrics concerning Fargate ephemeral storage, and one for task
  image pull duration.
- Add more labels for task- and container-level metrics. While we should
  always be cautious when adding common labels to timeseries, I think
  the existing ones were insufficient for doing basic aggregations (e.g.
  "average memory usage for a given task family grouped by revision").
  These labels are in proportion to those used by cAdvisor for its own
  container timeseries.

I have tested these changes both in Fargate and EC2 and they look
correct to me.

Signed-off-by: Ian Kerins <git@isk.haus>
  • Loading branch information
isker committed Oct 13, 2024
1 parent 593ea5f commit 5c7aa78
Show file tree
Hide file tree
Showing 2 changed files with 159 additions and 90 deletions.
14 changes: 11 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -37,12 +37,20 @@ from App Runner services.

## Labels

* **container**: Container associated with a metric.
* **cpu**: Available to CPU metrics, helps to breakdown metrics by CPU.
* **device**: Network interface device associated with the metric. Only
### On task-level metrics
* **task_arn**: [ARN of the task](https://docs.aws.amazon.com/service-authorization/latest/reference/list_amazonelasticcontainerservice.html#amazonelasticcontainerservice-resources-for-iam-policies) associated with a metric.
* **family**: Task definition family associated with a metric.
* **revision**: Revision of the task definition family associated with a metric.

### On container-level metrics

* **container**: Name of the container (as in the ECS task definition) associated with a metric.
* **image**: Docker image identifier (e.g. `name:tag`, `name@digest`) of the container.
* **interface**: Network interface device associated with the metric. Only
available for several network metrics.

## Example output
TODO update

```
# HELP ecs_cpu_seconds_total Total CPU usage in seconds.
Expand Down
235 changes: 148 additions & 87 deletions ecscollector/collector.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@ package ecscollector

import (
"context"
"fmt"
"log/slog"
"time"

Expand All @@ -27,94 +26,116 @@ import (

// ECS cpu_stats are from upstream docker/moby. These values are in nanoseconds.
// https://github.com/moby/moby/blob/49f021ebf00a76d74f5ce158244083e2dfba26fb/api/types/stats.go#L18-L40
const nanoSeconds = 1.0e9
const nanoseconds = 1 / 1.0e9

var (
metadataDesc = prometheus.NewDesc(
"ecs_metadata_info",
"ECS service metadata.",
metadataLabels, nil)

svcCPULimitDesc = prometheus.NewDesc(
"ecs_svc_cpu_limit",
"Total CPU Limit.",
svcLabels, nil)
// Task definition memory parameters are defined in MiB, while Prometheus
// standard metrics use bytes.
const mebibytes = 1024 * 1024

svcMemLimitDesc = prometheus.NewDesc(
"ecs_svc_memory_limit_bytes",
"Total MEM Limit in bytes.",
svcLabels, nil)
var (
taskMetadataDesc = prometheus.NewDesc(
"ecs_task_metadata_info",
"ECS task metadata, sourced from the task metadata endpoint version 4.",
taskMetadataLabels, nil)

taskCPULimitDesc = prometheus.NewDesc(
"ecs_task_cpu_limit_vcpus",
"Configured task CPU limit in vCPUs (1 vCPU = 1024 CPU units). This is optional when running on EC2; if no limit is set, this metric has no value.",
taskLabels, nil)

taskMemLimitDesc = prometheus.NewDesc(
"ecs_task_memory_limit_bytes",
"Configured task memory limit in bytes. This is optional when running on EC2; if no limit is set, this metric has no value.",
taskLabels, nil)

taskEphemeralStorageUsedDesc = prometheus.NewDesc(
"ecs_task_ephemeral_storage_used_bytes",
"Current Fargate task ephemeral storage usage in bytes.",
taskLabels, nil)

taskEphemeralStorageAllocatedDesc = prometheus.NewDesc(
"ecs_task_ephemeral_storage_allocated_bytes",
"Configured Fargate task ephemeral storage allocated size in bytes.",
taskLabels, nil)

taskImagePullDurationDesc = prometheus.NewDesc(
"ecs_task_image_pull_duration_seconds",
"How long container image pulling took for the task on startup.",
taskLabels, nil)

cpuTotalDesc = prometheus.NewDesc(
"ecs_cpu_seconds_total",
"Total CPU usage in seconds.",
cpuLabels, nil)
"ecs_container_cpu_usage_seconds_total",
"Cumulative total container CPU usage in seconds.",
containerLabels, nil)

memUsageDesc = prometheus.NewDesc(
"ecs_memory_bytes",
"Memory usage in bytes.",
labels, nil)
"ecs_container_memory_usage_bytes",
"Current container memory usage in bytes.",
containerLabels, nil)

memLimitDesc = prometheus.NewDesc(
"ecs_memory_limit_bytes",
"Memory limit in bytes.",
labels, nil)
"ecs_container_memory_limit_bytes",
"Configured container memory limit in bytes, set from the container-level limit in the task definition if any, otherwise the task-level limit.",
containerLabels, nil)

memCacheUsageDesc = prometheus.NewDesc(
"ecs_memory_cache_usage",
"Memory cache usage in bytes.",
labels, nil)
memCacheSizeDesc = prometheus.NewDesc(
"ecs_container_memory_page_cache_size_bytes",
"Current container memory page cache size in bytes. This is not a subset of used bytes.",
containerLabels, nil)

networkRxBytesDesc = prometheus.NewDesc(
"ecs_network_receive_bytes_total",
"Network received in bytes.",
networkLabels, nil)
"ecs_container_network_receive_bytes_total",
"Cumulative total size of container network packets received in bytes.",
containerNetworkLabels, nil)

networkRxPacketsDesc = prometheus.NewDesc(
"ecs_network_receive_packets_total",
"Network packets received.",
networkLabels, nil)
"ecs_container_network_receive_packets_total",
"Cumulative total count of container network packets received.",
containerNetworkLabels, nil)

networkRxDroppedDesc = prometheus.NewDesc(
"ecs_network_receive_dropped_total",
"Network packets dropped in receiving.",
networkLabels, nil)
"ecs_container_network_receive_packets_dropped_total",
"Cumulative total count of container network packets dropped in receiving.",
containerNetworkLabels, nil)

networkRxErrorsDesc = prometheus.NewDesc(
"ecs_network_receive_errors_total",
"Network errors in receiving.",
networkLabels, nil)
"ecs_container_network_receive_errors_total",
"Cumulative total count of container network errors in receiving.",
containerNetworkLabels, nil)

networkTxBytesDesc = prometheus.NewDesc(
"ecs_network_transmit_bytes_total",
"Network transmitted in bytes.",
networkLabels, nil)
"ecs_container_network_transmit_bytes_total",
"Cumulative total size of container network packets transmitted in bytes.",
containerNetworkLabels, nil)

networkTxPacketsDesc = prometheus.NewDesc(
"ecs_network_transmit_packets_total",
"Network packets transmitted.",
networkLabels, nil)
"ecs_container_network_transmit_packets_total",
"Cumulative total count of container network packets transmitted.",
containerNetworkLabels, nil)

networkTxDroppedDesc = prometheus.NewDesc(
"ecs_network_transmit_dropped_total",
"Network packets dropped in transmit.",
networkLabels, nil)
"ecs_container_network_transmit_dropped_total",
"Cumulative total count of container network packets dropped in transmit.",
containerNetworkLabels, nil)

networkTxErrorsDesc = prometheus.NewDesc(
"ecs_network_transmit_errors_total",
"Network errors in transmit.",
networkLabels, nil)
"ecs_container_network_transmit_errors_total",
"Cumulative total count of container network errors in transmit.",
containerNetworkLabels, nil)
)

var labels = []string{
"container",
var containerLabels = []string{
"container_name",
"image",
}

var svcLabels = []string{
var taskLabels = []string{
"task_arn",
"family",
"revision",
}

var metadataLabels = []string{
var taskMetadataLabels = []string{
"cluster",
"task_arn",
"family",
Expand All @@ -127,14 +148,9 @@ var metadataLabels = []string{
"launch_type",
}

var cpuLabels = append(
labels,
"cpu",
)

var networkLabels = append(
labels,
"device",
var containerNetworkLabels = append(
containerLabels,
"interface",
)

// NewCollector returns a new Collector that queries ECS metadata server
Expand All @@ -149,10 +165,16 @@ type collector struct {
}

func (c *collector) Describe(ch chan<- *prometheus.Desc) {
ch <- taskMetadataDesc
ch <- taskCPULimitDesc
ch <- taskMemLimitDesc
ch <- taskEphemeralStorageUsedDesc
ch <- taskEphemeralStorageAllocatedDesc
ch <- taskImagePullDurationDesc
ch <- cpuTotalDesc
ch <- memUsageDesc
ch <- memLimitDesc
ch <- memCacheUsageDesc
ch <- memCacheSizeDesc
ch <- networkRxBytesDesc
ch <- networkRxPacketsDesc
ch <- networkRxDroppedDesc
Expand All @@ -173,7 +195,7 @@ func (c *collector) Collect(ch chan<- prometheus.Metric) {
c.logger.Debug("Got ECS task metadata response", "stats", metadata)

ch <- prometheus.MustNewConstMetric(
metadataDesc,
taskMetadataDesc,
prometheus.GaugeValue,
1.0,
metadata.Cluster,
Expand All @@ -188,27 +210,57 @@ func (c *collector) Collect(ch chan<- prometheus.Metric) {
metadata.LaunchType,
)

taskLabelVals := []string{
metadata.TaskARN,
metadata.Family,
metadata.Revision,
}

// Task CPU/memory limits are optional when running on EC2 - the relevant
// limits may only exist at the container level.
if metadata.Limits != nil {
if metadata.Limits.CPU != nil {
ch <- prometheus.MustNewConstMetric(
svcCPULimitDesc,
taskCPULimitDesc,
prometheus.GaugeValue,
*metadata.Limits.CPU,
metadata.TaskARN,
taskLabelVals...,
)
}
if metadata.Limits.Memory != nil {
ch <- prometheus.MustNewConstMetric(
svcMemLimitDesc,
taskMemLimitDesc,
prometheus.GaugeValue,
float64(*metadata.Limits.Memory),
metadata.TaskARN,
float64(*metadata.Limits.Memory*mebibytes),
taskLabelVals...,
)
}
}

if metadata.EphemeralStorageMetrics != nil {
ch <- prometheus.MustNewConstMetric(
taskEphemeralStorageUsedDesc,
prometheus.GaugeValue,
float64(metadata.EphemeralStorageMetrics.UtilizedMiBs*mebibytes),
taskLabelVals...,
)
ch <- prometheus.MustNewConstMetric(
taskEphemeralStorageAllocatedDesc,
prometheus.GaugeValue,
float64(metadata.EphemeralStorageMetrics.ReservedMiBs*mebibytes),
taskLabelVals...,
)
}

if metadata.PullStartedAt != nil && metadata.PullStoppedAt != nil {
ch <- prometheus.MustNewConstMetric(
taskImagePullDurationDesc,
prometheus.GaugeValue,
float64(metadata.PullStoppedAt.Sub(*metadata.PullStartedAt))*nanoseconds,
taskLabelVals...,
)
}

stats, err := c.client.RetrieveTaskStats(ctx)
if err != nil {
c.logger.Debug("Failed to retrieve container stats", "error", err)
Expand All @@ -223,41 +275,50 @@ func (c *collector) Collect(ch chan<- prometheus.Metric) {
continue
}

labelVals := []string{
containerLabelVals := []string{
container.Name,
container.Image,
}

for i, cpuUsage := range s.CPUStats.CPUUsage.PercpuUsage {
cpu := fmt.Sprintf("%d", i)
ch <- prometheus.MustNewConstMetric(
cpuTotalDesc,
prometheus.CounterValue,
float64(cpuUsage)/nanoSeconds,
append(labelVals, cpu)...,
)
}
ch <- prometheus.MustNewConstMetric(
cpuTotalDesc,
prometheus.CounterValue,
float64(s.CPUStats.CPUUsage.TotalUsage)*nanoseconds,
containerLabelVals...,
)

cacheValue := 0.0
if val, ok := s.MemoryStats.Stats["cache"]; ok {
cacheValue = float64(val)
}

// Report the container's memory limit as its own, if any, otherwise the
// task's limit. This is correct in that this is the precise logic used
// to configure the cgroups limit for the container.
var containerMemoryLimitMib int64
if container.Limits.Memory != nil {
containerMemoryLimitMib = *container.Limits.Memory
} else {
// This must be set if the container limit is not set, and thus is
// safe to dereference.
containerMemoryLimitMib = *metadata.Limits.Memory
}
for desc, value := range map[*prometheus.Desc]float64{
memUsageDesc: float64(s.MemoryStats.Usage),
memLimitDesc: float64(s.MemoryStats.Limit),
memCacheUsageDesc: cacheValue,
memUsageDesc: float64(s.MemoryStats.Usage),
memLimitDesc: float64(containerMemoryLimitMib * mebibytes),
memCacheSizeDesc: cacheValue,
} {
ch <- prometheus.MustNewConstMetric(
desc,
prometheus.GaugeValue,
value,
labelVals...,
containerLabelVals...,
)
}

// Network metrics per interface.
for iface, netStats := range s.Networks {
networkLabelVals := append(labelVals, iface)
networkLabelVals := append(containerLabelVals, iface)

for desc, value := range map[*prometheus.Desc]float64{
networkRxBytesDesc: float64(netStats.RxBytes),
Expand Down

0 comments on commit 5c7aa78

Please sign in to comment.