Skip to content

Commit

Permalink
Add swap memory checks to cadvisor kubelet checks
Browse files Browse the repository at this point in the history
  • Loading branch information
adammw committed Jun 1, 2019
1 parent 913cc86 commit a762a18
Show file tree
Hide file tree
Showing 5 changed files with 77 additions and 3 deletions.
3 changes: 2 additions & 1 deletion kubelet/datadog_checks/kubelet/cadvisor.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,8 @@
NAMESPACE = "kubernetes"
DEFAULT_MAX_DEPTH = 10
DEFAULT_ENABLED_RATES = ['diskio.io_service_bytes.stats.total', 'network.??_bytes', 'cpu.*.total']
DEFAULT_ENABLED_GAUGES = ['memory.usage', 'memory.working_set', 'memory.rss', 'filesystem.usage']
DEFAULT_ENABLED_GAUGES = ['memory.cache', 'memory.usage', 'memory.swap', 'memory.working_set',
'memory.rss', 'filesystem.usage']
DEFAULT_POD_LEVEL_METRICS = ['network.*']

NET_ERRORS = ['rx_errors', 'tx_errors', 'rx_dropped', 'tx_dropped']
Expand Down
24 changes: 23 additions & 1 deletion kubelet/datadog_checks/kubelet/prometheus.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ def __init__(self, *args, **kwargs):
# and container_<metric-name>_limit_<metric-unit> reads it to compute <metric-name>usage_pct
self.fs_usage_bytes = {}
self.mem_usage_bytes = {}
self.swap_usage_bytes = {}

self.CADVISOR_METRIC_TRANSFORMERS = {
'container_cpu_usage_seconds_total': self.container_cpu_usage_seconds_total,
Expand All @@ -53,8 +54,11 @@ def __init__(self, *args, **kwargs):
'container_fs_limit_bytes': self.container_fs_limit_bytes,
'container_memory_usage_bytes': self.container_memory_usage_bytes,
'container_memory_working_set_bytes': self.container_memory_working_set_bytes,
'container_memory_cache': self.container_memory_cache,
'container_memory_rss': self.container_memory_rss,
'container_memory_swap': self.container_memory_swap,
'container_spec_memory_limit_bytes': self.container_spec_memory_limit_bytes,
'container_spec_memory_swap_limit_bytes': self.container_spec_memory_swap_limit_bytes,
}

def _create_cadvisor_prometheus_instance(self, instance):
Expand Down Expand Up @@ -87,7 +91,6 @@ def _create_cadvisor_prometheus_instance(self, instance):
'container_fs_writes_total',
'container_last_seen',
'container_start_time_seconds',
'container_spec_memory_swap_limit_bytes',
'container_scrape_error',
],
# Defaults that were set when CadvisorPrometheusScraper was based on PrometheusScraper
Expand Down Expand Up @@ -526,14 +529,33 @@ def container_memory_working_set_bytes(self, metric, scraper_config):
metric_name = scraper_config['namespace'] + '.memory.working_set'
self._process_container_metric('gauge', metric_name, metric, scraper_config)

def container_memory_cache(self, metric, scraper_config):
metric_name = scraper_config['namespace'] + '.memory.cache'
self._process_container_metric('gauge', metric_name, metric, scraper_config)

def container_memory_rss(self, metric, scraper_config):
metric_name = scraper_config['namespace'] + '.memory.rss'
self._process_container_metric('gauge', metric_name, metric, scraper_config)

def container_memory_swap(self, metric, scraper_config):
metric_name = scraper_config['namespace'] + '.memory.swap'
if metric.type not in METRIC_TYPES:
self.log.error("Metric type %s unsupported for metric %s" % (metric.type, metric.name))
return
self._process_usage_metric(metric_name, metric, self.swap_usage_bytes, scraper_config)

def container_spec_memory_limit_bytes(self, metric, scraper_config):
metric_name = scraper_config['namespace'] + '.memory.limits'
pct_m_name = scraper_config['namespace'] + '.memory.usage_pct'
if metric.type not in METRIC_TYPES:
self.log.error("Metric type %s unsupported for metric %s" % (metric.type, metric.name))
return
self._process_limit_metric(metric_name, metric, self.mem_usage_bytes, scraper_config, pct_m_name=pct_m_name)

def container_spec_memory_swap_limit_bytes(self, metric, scraper_config):
metric_name = scraper_config['namespace'] + '.memory.sw_limit'
pct_m_name = scraper_config['namespace'] + '.memory.sw_in_use'
if metric.type not in METRIC_TYPES:
self.log.error("Metric type %s unsupported for metric %s" % (metric.type, metric.name))
return
self._process_limit_metric(metric_name, metric, self.swap_usage_bytes, scraper_config, pct_m_name=pct_m_name)
4 changes: 4 additions & 0 deletions kubelet/metadata.csv
Original file line number Diff line number Diff line change
Expand Up @@ -21,11 +21,15 @@ kubernetes.io.read_bytes,gauge,,byte,,The amount of bytes read from the disk,0,k
kubernetes.io.write_bytes,gauge,,byte,,The amount of bytes written to the disk,0,kubelet,k8_io_write_bytes
kubernetes.memory.capacity,gauge,,byte,,The amount of memory (in bytes) in this machine,0,kubelet,k8s.mem.capacity
kubernetes.memory.limits,gauge,,byte,,The limit of memory set,0,kubelet,k8s.mem.limits
kubernetes.memory.sw_limit,gauge,,byte,,The limit of swap space set,0,kubelet,k8s.mem.sw_limit
kubernetes.memory.requests,gauge,,byte,,The requested memory,0,kubelet,k8s.mem.requests
kubernetes.memory.usage,gauge,,byte,,Current memory usage in bytes including all memory regardless of when it was accessed,-1,kubelet,k8s.mem
kubernetes.memory.working_set,gauge,,byte,,Current working set in bytes - this is what the OOM killer is watching for,-1,kubelet,k8s.mem.ws
kubernetes.memory.cache,gauge,,byte,,The amount of memory that is being used to cache data from disk (e.g. memory contents that can be associated precisely with a block on a block device),-1,kubelet,k8s.mem.cache
kubernetes.memory.rss,gauge,,byte,,Size of RSS in bytes,-1,kubelet,k8s.mem.rss
kubernetes.memory.swap,gauge,,byte,,The amount of swap currently used by by processes in this cgroup,-1,kubelet,k8s.mem.swap
kubernetes.memory.usage_pct,gauge,,fraction,,The percentage of memory used,-1,kubelet,k8s.mem.used_pct
kubernetes.memory.sw_in_use,gauge,,fraction,,The percentage of swap space used,-1,kubelet,k8s.mem.sw_in_use
kubernetes.network.rx_bytes,gauge,,byte,second,The amount of bytes per second received,0,kubelet,k8s.net.rx
kubernetes.network.rx_dropped,gauge,,packet,second,The amount of rx packets dropped per second,-1,kubelet,k8s.net.rx.drop
kubernetes.network.rx_errors,gauge,,error,second,The amount of rx errors per second,-1,kubelet,k8s.net.rx.errors
Expand Down
Loading

0 comments on commit a762a18

Please sign in to comment.