From 3f6e16eeb00ab5e714b1bc289ae5719e2581daec Mon Sep 17 00:00:00 2001 From: Hippolyte HENRY Date: Fri, 28 Jun 2019 13:44:40 -0400 Subject: [PATCH] Revert "Revert "Add swap memory checks to cadvisor kubelet checks (#3808)" (#4007)" This reverts commit 16276b8fa01e6d34ff252e54493f1eab1226615e. --- kubelet/datadog_checks/kubelet/cadvisor.py | 9 ++++- kubelet/datadog_checks/kubelet/prometheus.py | 24 ++++++++++- kubelet/metadata.csv | 4 ++ kubelet/tests/fixtures/cadvisor_1.2.json | 42 ++++++++++++++++++++ kubelet/tests/test_kubelet.py | 3 ++ 5 files changed, 80 insertions(+), 2 deletions(-) diff --git a/kubelet/datadog_checks/kubelet/cadvisor.py b/kubelet/datadog_checks/kubelet/cadvisor.py index 7cc303f9b7ffe..03dbe9865a863 100644 --- a/kubelet/datadog_checks/kubelet/cadvisor.py +++ b/kubelet/datadog_checks/kubelet/cadvisor.py @@ -23,7 +23,14 @@ NAMESPACE = "kubernetes" DEFAULT_MAX_DEPTH = 10 DEFAULT_ENABLED_RATES = ['diskio.io_service_bytes.stats.total', 'network.??_bytes', 'cpu.*.total'] -DEFAULT_ENABLED_GAUGES = ['memory.usage', 'memory.working_set', 'memory.rss', 'filesystem.usage'] +DEFAULT_ENABLED_GAUGES = [ + 'memory.cache', + 'memory.usage', + 'memory.swap', + 'memory.working_set', + 'memory.rss', + 'filesystem.usage', +] DEFAULT_POD_LEVEL_METRICS = ['network.*'] NET_ERRORS = ['rx_errors', 'tx_errors', 'rx_dropped', 'tx_dropped'] diff --git a/kubelet/datadog_checks/kubelet/prometheus.py b/kubelet/datadog_checks/kubelet/prometheus.py index aef18953b7fd2..497678d0fab28 100644 --- a/kubelet/datadog_checks/kubelet/prometheus.py +++ b/kubelet/datadog_checks/kubelet/prometheus.py @@ -31,6 +31,7 @@ def __init__(self, *args, **kwargs): # and container__limit_ reads it to compute usage_pct self.fs_usage_bytes = {} self.mem_usage_bytes = {} + self.swap_usage_bytes = {} self.CADVISOR_METRIC_TRANSFORMERS = { 'container_cpu_usage_seconds_total': self.container_cpu_usage_seconds_total, @@ -51,8 +52,11 @@ def __init__(self, *args, **kwargs): 'container_fs_limit_bytes': self.container_fs_limit_bytes, 'container_memory_usage_bytes': self.container_memory_usage_bytes, 'container_memory_working_set_bytes': self.container_memory_working_set_bytes, + 'container_memory_cache': self.container_memory_cache, 'container_memory_rss': self.container_memory_rss, + 'container_memory_swap': self.container_memory_swap, 'container_spec_memory_limit_bytes': self.container_spec_memory_limit_bytes, + 'container_spec_memory_swap_limit_bytes': self.container_spec_memory_swap_limit_bytes, } def _create_cadvisor_prometheus_instance(self, instance): @@ -85,7 +89,6 @@ def _create_cadvisor_prometheus_instance(self, instance): 'container_fs_writes_total', 'container_last_seen', 'container_start_time_seconds', - 'container_spec_memory_swap_limit_bytes', 'container_scrape_error', ], # Defaults that were set when CadvisorPrometheusScraper was based on PrometheusScraper @@ -514,10 +517,21 @@ def container_memory_working_set_bytes(self, metric, scraper_config): metric_name = scraper_config['namespace'] + '.memory.working_set' self._process_container_metric('gauge', metric_name, metric, scraper_config) + def container_memory_cache(self, metric, scraper_config): + metric_name = scraper_config['namespace'] + '.memory.cache' + self._process_container_metric('gauge', metric_name, metric, scraper_config) + def container_memory_rss(self, metric, scraper_config): metric_name = scraper_config['namespace'] + '.memory.rss' self._process_container_metric('gauge', metric_name, metric, scraper_config) + def container_memory_swap(self, metric, scraper_config): + metric_name = scraper_config['namespace'] + '.memory.swap' + if metric.type not in METRIC_TYPES: + self.log.error("Metric type %s unsupported for metric %s" % (metric.type, metric.name)) + return + self._process_usage_metric(metric_name, metric, self.swap_usage_bytes, scraper_config) + def container_spec_memory_limit_bytes(self, metric, scraper_config): metric_name = scraper_config['namespace'] + '.memory.limits' pct_m_name = scraper_config['namespace'] + '.memory.usage_pct' @@ -525,3 +539,11 @@ def container_spec_memory_limit_bytes(self, metric, scraper_config): self.log.error("Metric type %s unsupported for metric %s" % (metric.type, metric.name)) return self._process_limit_metric(metric_name, metric, self.mem_usage_bytes, scraper_config, pct_m_name=pct_m_name) + + def container_spec_memory_swap_limit_bytes(self, metric, scraper_config): + metric_name = scraper_config['namespace'] + '.memory.sw_limit' + pct_m_name = scraper_config['namespace'] + '.memory.sw_in_use' + if metric.type not in METRIC_TYPES: + self.log.error("Metric type %s unsupported for metric %s" % (metric.type, metric.name)) + return + self._process_limit_metric(metric_name, metric, self.swap_usage_bytes, scraper_config, pct_m_name=pct_m_name) diff --git a/kubelet/metadata.csv b/kubelet/metadata.csv index 7107349ab82b1..fe2123c41df49 100644 --- a/kubelet/metadata.csv +++ b/kubelet/metadata.csv @@ -21,11 +21,15 @@ kubernetes.io.read_bytes,gauge,,byte,,The amount of bytes read from the disk,0,k kubernetes.io.write_bytes,gauge,,byte,,The amount of bytes written to the disk,0,kubelet,k8_io_write_bytes kubernetes.memory.capacity,gauge,,byte,,The amount of memory (in bytes) in this machine,0,kubelet,k8s.mem.capacity kubernetes.memory.limits,gauge,,byte,,The limit of memory set,0,kubelet,k8s.mem.limits +kubernetes.memory.sw_limit,gauge,,byte,,The limit of swap space set,0,kubelet,k8s.mem.sw_limit kubernetes.memory.requests,gauge,,byte,,The requested memory,0,kubelet,k8s.mem.requests kubernetes.memory.usage,gauge,,byte,,Current memory usage in bytes including all memory regardless of when it was accessed,-1,kubelet,k8s.mem kubernetes.memory.working_set,gauge,,byte,,Current working set in bytes - this is what the OOM killer is watching for,-1,kubelet,k8s.mem.ws +kubernetes.memory.cache,gauge,,byte,,The amount of memory that is being used to cache data from disk (e.g. memory contents that can be associated precisely with a block on a block device),-1,kubelet,k8s.mem.cache kubernetes.memory.rss,gauge,,byte,,Size of RSS in bytes,-1,kubelet,k8s.mem.rss +kubernetes.memory.swap,gauge,,byte,,The amount of swap currently used by by processes in this cgroup,-1,kubelet,k8s.mem.swap kubernetes.memory.usage_pct,gauge,,fraction,,The percentage of memory used,-1,kubelet,k8s.mem.used_pct +kubernetes.memory.sw_in_use,gauge,,fraction,,The percentage of swap space used,-1,kubelet,k8s.mem.sw_in_use kubernetes.network.rx_bytes,gauge,,byte,second,The amount of bytes per second received,0,kubelet,k8s.net.rx kubernetes.network.rx_dropped,gauge,,packet,second,The amount of rx packets dropped per second,-1,kubelet,k8s.net.rx.drop kubernetes.network.rx_errors,gauge,,error,second,The amount of rx errors per second,-1,kubelet,k8s.net.rx.errors diff --git a/kubelet/tests/fixtures/cadvisor_1.2.json b/kubelet/tests/fixtures/cadvisor_1.2.json index 081331c59033c..0a659bba90382 100644 --- a/kubelet/tests/fixtures/cadvisor_1.2.json +++ b/kubelet/tests/fixtures/cadvisor_1.2.json @@ -135,6 +135,7 @@ "usage": 1068859392, "cache": 1025363968, "rss": 43405312, + "swap": 0, "working_set": 382758912, "failcnt": 0, "container_data": { @@ -307,6 +308,7 @@ "usage": 1068822528, "cache": 1025363968, "rss": 43409408, + "swap": 0, "working_set": 382722048, "failcnt": 0, "container_data": { @@ -479,6 +481,7 @@ "usage": 1068773376, "cache": 1025363968, "rss": 43409408, + "swap": 0, "working_set": 382672896, "failcnt": 0, "container_data": { @@ -651,6 +654,7 @@ "usage": 1068765184, "cache": 1025363968, "rss": 43401216, + "swap": 0, "working_set": 382664704, "failcnt": 0, "container_data": { @@ -823,6 +827,7 @@ "usage": 1068773376, "cache": 1025363968, "rss": 43409408, + "swap": 0, "working_set": 382672896, "failcnt": 0, "container_data": { @@ -995,6 +1000,7 @@ "usage": 1068769280, "cache": 1025363968, "rss": 43405312, + "swap": 0, "working_set": 382668800, "failcnt": 0, "container_data": { @@ -1167,6 +1173,7 @@ "usage": 1068761088, "cache": 1025363968, "rss": 43397120, + "swap": 0, "working_set": 382660608, "failcnt": 0, "container_data": { @@ -1339,6 +1346,7 @@ "usage": 1068773376, "cache": 1025363968, "rss": 43409408, + "swap": 0, "working_set": 382672896, "failcnt": 0, "container_data": { @@ -1561,6 +1569,7 @@ "usage": 97558528, "cache": 19447808, "rss": 78110720, + "swap": 0, "working_set": 90456064, "failcnt": 10636, "container_data": { @@ -1755,6 +1764,7 @@ "usage": 100651008, "cache": 19836928, "rss": 80814080, + "swap": 0, "working_set": 93540352, "failcnt": 10636, "container_data": { @@ -1949,6 +1959,7 @@ "usage": 102555648, "cache": 20086784, "rss": 82468864, + "swap": 0, "working_set": 95449088, "failcnt": 10636, "container_data": { @@ -2143,6 +2154,7 @@ "usage": 100925440, "cache": 20119552, "rss": 80805888, + "swap": 0, "working_set": 93818880, "failcnt": 10636, "container_data": { @@ -2337,6 +2349,7 @@ "usage": 117256192, "cache": 23101440, "rss": 94154752, + "swap": 0, "working_set": 110149632, "failcnt": 10636, "container_data": { @@ -2531,6 +2544,7 @@ "usage": 101531648, "cache": 23130112, "rss": 78155776, + "swap": 0, "working_set": 94425088, "failcnt": 10636, "container_data": { @@ -2725,6 +2739,7 @@ "usage": 101298176, "cache": 23142400, "rss": 78155776, + "swap": 0, "working_set": 94191616, "failcnt": 10636, "container_data": { @@ -2919,6 +2934,7 @@ "usage": 101560320, "cache": 23179264, "rss": 78155776, + "swap": 0, "working_set": 94449664, "failcnt": 10636, "container_data": { @@ -3194,6 +3210,7 @@ "usage": 3849244672, "cache": 1538514944, "rss": 26042368, + "swap": 0, "working_set": 1948114944, "failcnt": 0, "container_data": { @@ -3412,6 +3429,7 @@ "usage": 3850002432, "cache": 1538514944, "rss": 26042368, + "swap": 0, "working_set": 1948868608, "failcnt": 0, "container_data": { @@ -3630,6 +3648,7 @@ "usage": 3855687680, "cache": 1538514944, "rss": 26042368, + "swap": 0, "working_set": 1954549760, "failcnt": 0, "container_data": { @@ -3848,6 +3867,7 @@ "usage": 3851542528, "cache": 1538514944, "rss": 26042368, + "swap": 0, "working_set": 1950404608, "failcnt": 0, "container_data": { @@ -4066,6 +4086,7 @@ "usage": 3870437376, "cache": 1538514944, "rss": 26042368, + "swap": 0, "working_set": 1969303552, "failcnt": 0, "container_data": { @@ -4284,6 +4305,7 @@ "usage": 3870306304, "cache": 1538514944, "rss": 26042368, + "swap": 0, "working_set": 1969172480, "failcnt": 0, "container_data": { @@ -4502,6 +4524,7 @@ "usage": 3854356480, "cache": 1538514944, "rss": 26042368, + "swap": 0, "working_set": 1953222656, "failcnt": 0, "container_data": { @@ -4720,6 +4743,7 @@ "usage": 3854516224, "cache": 1538514944, "rss": 26042368, + "swap": 0, "working_set": 1953378304, "failcnt": 0, "container_data": { @@ -4938,6 +4962,7 @@ "usage": 3854888960, "cache": 1538514944, "rss": 26042368, + "swap": 0, "working_set": 1953751040, "failcnt": 0, "container_data": { @@ -5109,6 +5134,7 @@ "usage": 1482752, "cache": 12288, "rss": 1470464, + "swap": 0, "working_set": 1482752, "failcnt": 0, "container_data": { @@ -5219,6 +5245,7 @@ "usage": 1482752, "cache": 12288, "rss": 1470464, + "swap": 0, "working_set": 1482752, "failcnt": 0, "container_data": { @@ -5329,6 +5356,7 @@ "usage": 1482752, "cache": 12288, "rss": 1470464, + "swap": 0, "working_set": 1482752, "failcnt": 0, "container_data": { @@ -5439,6 +5467,7 @@ "usage": 1482752, "cache": 12288, "rss": 1470464, + "swap": 0, "working_set": 1482752, "failcnt": 0, "container_data": { @@ -5549,6 +5578,7 @@ "usage": 1482752, "cache": 12288, "rss": 1470464, + "swap": 0, "working_set": 1482752, "failcnt": 0, "container_data": { @@ -5659,6 +5689,7 @@ "usage": 1482752, "cache": 12288, "rss": 1470464, + "swap": 0, "working_set": 1482752, "failcnt": 0, "container_data": { @@ -5769,6 +5800,7 @@ "usage": 1482752, "cache": 12288, "rss": 1470464, + "swap": 0, "working_set": 1482752, "failcnt": 0, "container_data": { @@ -5879,6 +5911,7 @@ "usage": 1482752, "cache": 12288, "rss": 1470464, + "swap": 0, "working_set": 1482752, "failcnt": 0, "container_data": { @@ -6108,6 +6141,7 @@ "usage": 28196864, "cache": 2928640, "rss": 25137152, + "swap": 0, "working_set": 28028928, "failcnt": 0, "container_data": { @@ -6280,6 +6314,7 @@ "usage": 28065792, "cache": 2928640, "rss": 25137152, + "swap": 0, "working_set": 27897856, "failcnt": 0, "container_data": { @@ -6452,6 +6487,7 @@ "usage": 28188672, "cache": 2928640, "rss": 25137152, + "swap": 0, "working_set": 28020736, "failcnt": 0, "container_data": { @@ -6624,6 +6660,7 @@ "usage": 28151808, "cache": 2928640, "rss": 25137152, + "swap": 0, "working_set": 27983872, "failcnt": 0, "container_data": { @@ -6796,6 +6833,7 @@ "usage": 28188672, "cache": 2928640, "rss": 25137152, + "swap": 0, "working_set": 28020736, "failcnt": 0, "container_data": { @@ -6968,6 +7006,7 @@ "usage": 28065792, "cache": 2928640, "rss": 25137152, + "swap": 0, "working_set": 27897856, "failcnt": 0, "container_data": { @@ -7140,6 +7179,7 @@ "usage": 28192768, "cache": 2932736, "rss": 25137152, + "swap": 0, "working_set": 28020736, "failcnt": 0, "container_data": { @@ -7312,6 +7352,7 @@ "usage": 28164096, "cache": 2932736, "rss": 25137152, + "swap": 0, "working_set": 27992064, "failcnt": 0, "container_data": { @@ -7484,6 +7525,7 @@ "usage": 28364800, "cache": 2932736, "rss": 25309184, + "swap": 0, "working_set": 28196864, "failcnt": 0, "container_data": { diff --git a/kubelet/tests/test_kubelet.py b/kubelet/tests/test_kubelet.py index 0aa39f530874d..a410902d8617f 100644 --- a/kubelet/tests/test_kubelet.py +++ b/kubelet/tests/test_kubelet.py @@ -49,7 +49,9 @@ 'kubernetes.memory.requests', 'kubernetes.memory.usage', 'kubernetes.memory.working_set', + 'kubernetes.memory.cache', 'kubernetes.memory.rss', + 'kubernetes.memory.swap', 'kubernetes.network.rx_bytes', 'kubernetes.network.tx_bytes', ] @@ -61,6 +63,7 @@ 'kubernetes.cpu.cfs.throttled.periods', 'kubernetes.cpu.cfs.throttled.seconds', 'kubernetes.memory.usage_pct', + 'kubernetes.memory.sw_limit', 'kubernetes.network.rx_dropped', 'kubernetes.network.rx_errors', 'kubernetes.network.tx_dropped',