From 25415d9ef8e46682c95d9b562dc90e301b6bb7c2 Mon Sep 17 00:00:00 2001 From: David Bouchare Date: Fri, 7 Jun 2019 18:18:01 +0200 Subject: [PATCH 1/3] Add coredns prometheus health and ksm cronjob sc --- coredns/assets/service_checks.json | 12 +++++++++++- kubernetes_state/assets/service_checks.json | 9 +++++++++ 2 files changed, 20 insertions(+), 1 deletion(-) diff --git a/coredns/assets/service_checks.json b/coredns/assets/service_checks.json index fe51488c7066f..73bffca1904d8 100644 --- a/coredns/assets/service_checks.json +++ b/coredns/assets/service_checks.json @@ -1 +1,11 @@ -[] +[ + { + "agent_version": "6.11.0", + "integration":"coredns", + "check": "coredns.prometheus.health", + "statuses": ["ok", "critical"], + "groups": ["endpoint"], + "name": "CoreDNS prometheus health", + "description": "Returns `CRITICAL` if the check cannot access the metrics endpoint. Returns `OK` otherwise." + } +] diff --git a/kubernetes_state/assets/service_checks.json b/kubernetes_state/assets/service_checks.json index 0aea1183f062c..2eb1557cde7da 100644 --- a/kubernetes_state/assets/service_checks.json +++ b/kubernetes_state/assets/service_checks.json @@ -43,5 +43,14 @@ "groups": ["host", "node"], "name": "Node Network Unavailable", "description": "Returns `CRITICAL` if a cluster node is in a network unavailable state. Returns `UNKNOWN` if status is unknown. Returns `OK` otherwise." + }, + { + "agent_version": "5.6.0", + "integration":"kubernetes", + "check": "kubernetes_state.cronjob.next_schedule_time", + "statuses": ["ok", "unknown", "critical"], + "groups": ["host", "node"], + "name": "CronJob next scheduled time", + "description": "Returns `CRITICAL` if a cron job does not have a next scheduled time for execution. Returns `UNKNOWN` if the scheduled time is unknown. Returns `OK` otherwise." } ] From fafcf6d5e1661980e6ce7ccafdc5de8e9e83a656 Mon Sep 17 00:00:00 2001 From: David Bouchare Date: Wed, 12 Jun 2019 14:55:50 +0200 Subject: [PATCH 2/3] Add service checks in READMEs --- coredns/README.md | 4 +++- kubernetes_state/README.md | 4 ++++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/coredns/README.md b/coredns/README.md index 32abbb89f1a5e..05c08164958f6 100644 --- a/coredns/README.md +++ b/coredns/README.md @@ -46,7 +46,9 @@ The CoreDNS check does not include any events. ### Service Checks -The CoreDNS check does not include any service checks. +`coredns.prometheus.health`: + +Returns CRITICAL if the Agent cannot reach the metrics endpoints. ## Troubleshooting diff --git a/kubernetes_state/README.md b/kubernetes_state/README.md index 7e30346470d31..fba1c465dc9ed 100644 --- a/kubernetes_state/README.md +++ b/kubernetes_state/README.md @@ -53,6 +53,10 @@ Returns `OK` otherwise. Returns `CRITICAL` if a cluster node is in a network unavailable state. Returns `OK` otherwise. +**kubernetes_state.cronjob.next_schedule_time** +Returns `CRITICAL` if a cron job does not have a next scheduled time for execution. +Returns `OK` otherwise. + ## Troubleshooting Need help? Contact [Datadog support][6]. From a8dcb1d7377e44cef697c2f7d8fb51b676835578 Mon Sep 17 00:00:00 2001 From: David Bouchare Date: Tue, 18 Jun 2019 17:13:49 +0200 Subject: [PATCH 3/3] Markdown for critical - coredns --- coredns/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/coredns/README.md b/coredns/README.md index 05c08164958f6..a0bc286a28f13 100644 --- a/coredns/README.md +++ b/coredns/README.md @@ -48,7 +48,7 @@ The CoreDNS check does not include any events. `coredns.prometheus.health`: -Returns CRITICAL if the Agent cannot reach the metrics endpoints. +Returns `CRITICAL` if the Agent cannot reach the metrics endpoints. ## Troubleshooting