From 89d1bf32b11005182a485d586ce9eaa530b9de26 Mon Sep 17 00:00:00 2001 From: Shai Katz Date: Tue, 28 Aug 2018 14:11:15 +0300 Subject: [PATCH 1/5] add pods deleted metric and update client_golang/prometheus version to latest --- Gopkg.lock | 6 +++--- Gopkg.toml | 2 +- chaoskube/chaoskube.go | 12 +++++++++--- 3 files changed, 13 insertions(+), 7 deletions(-) diff --git a/Gopkg.lock b/Gopkg.lock index b9cd806e..fa049ae7 100644 --- a/Gopkg.lock +++ b/Gopkg.lock @@ -156,10 +156,10 @@ name = "github.com/prometheus/client_golang" packages = [ "prometheus", + "prometheus/promauto", "prometheus/promhttp" ] - revision = "c5b7fccd204277076155f10851dad72b76a49317" - version = "v0.8.0" + revision = "3653aff4d509dd87a6ba41b82d57c0c662733e14" [[projects]] branch = "master" @@ -461,6 +461,6 @@ [solve-meta] analyzer-name = "dep" analyzer-version = 1 - inputs-digest = "bf2032d0afbea09aafb0db352827c9efe8c39e5479d51fbe38b1d42c70d6b104" + inputs-digest = "e6d36cabd191aaf85d9202563752292e05e0f7372bd611af9c4f366aef4050dc" solver-name = "gps-cdcl" solver-version = 1 diff --git a/Gopkg.toml b/Gopkg.toml index a71d25e4..15118835 100644 --- a/Gopkg.toml +++ b/Gopkg.toml @@ -24,7 +24,7 @@ [[constraint]] name = "github.com/prometheus/client_golang" - version = "0.8.0" + revision = "3653aff4d509dd87a6ba41b82d57c0c662733e14" [[override]] name = "github.com/golang/protobuf" diff --git a/chaoskube/chaoskube.go b/chaoskube/chaoskube.go index d734f8db..7bf25f45 100644 --- a/chaoskube/chaoskube.go +++ b/chaoskube/chaoskube.go @@ -7,15 +7,15 @@ import ( "math/rand" "time" + "github.com/linki/chaoskube/util" + "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/client_golang/prometheus/promauto" log "github.com/sirupsen/logrus" - "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/labels" "k8s.io/apimachinery/pkg/selection" "k8s.io/client-go/kubernetes" - - "github.com/linki/chaoskube/util" ) // Chaoskube represents an instance of chaoskube @@ -57,6 +57,11 @@ var ( msgTimeOfDayExcluded = "time of day excluded" // msgDayOfYearExcluded is the log message when termination is suspended due to the day of year filter msgDayOfYearExcluded = "day of year excluded" + // podsDeleted is the pods deleted counter + podsDeleted = promauto.NewCounter(prometheus.CounterOpts{ + Name: "pods_deleted", + Help: "The total number of pods deleted", + }) ) // New returns a new instance of Chaoskube. It expects: @@ -91,6 +96,7 @@ func (c *Chaoskube) Run(ctx context.Context, next <-chan time.Time) { c.Logger.WithField("err", err).Error("failed to terminate victim") } + podsDeleted.Inc() c.Logger.Debug("sleeping...") select { case <-next: From f3eb70d6b186567cc7ed82c210a08bfebcc07412 Mon Sep 17 00:00:00 2001 From: Shai Katz Date: Tue, 28 Aug 2018 14:38:59 +0300 Subject: [PATCH 2/5] add some more counters --- chaoskube/chaoskube.go | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/chaoskube/chaoskube.go b/chaoskube/chaoskube.go index 7bf25f45..1f2bb513 100644 --- a/chaoskube/chaoskube.go +++ b/chaoskube/chaoskube.go @@ -7,9 +7,8 @@ import ( "math/rand" "time" + "github.com/linki/chaoskube/metrics" "github.com/linki/chaoskube/util" - "github.com/prometheus/client_golang/prometheus" - "github.com/prometheus/client_golang/prometheus/promauto" log "github.com/sirupsen/logrus" "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" @@ -57,11 +56,6 @@ var ( msgTimeOfDayExcluded = "time of day excluded" // msgDayOfYearExcluded is the log message when termination is suspended due to the day of year filter msgDayOfYearExcluded = "day of year excluded" - // podsDeleted is the pods deleted counter - podsDeleted = promauto.NewCounter(prometheus.CounterOpts{ - Name: "pods_deleted", - Help: "The total number of pods deleted", - }) ) // New returns a new instance of Chaoskube. It expects: @@ -92,11 +86,14 @@ func New(client kubernetes.Interface, labels, annotations, namespaces labels.Sel // described by channel next. It returns when the given context is canceled. func (c *Chaoskube) Run(ctx context.Context, next <-chan time.Time) { for { + metrics.RunCounter.Inc() if err := c.TerminateVictim(); err != nil { + metrics.ErrorCounter.Inc() c.Logger.WithField("err", err).Error("failed to terminate victim") + } else { + metrics.PodsDeletedCounter.Inc() } - podsDeleted.Inc() c.Logger.Debug("sleeping...") select { case <-next: From 68f4ede9746260492c382f59ac22e4fa391a4734 Mon Sep 17 00:00:00 2001 From: Shai Katz Date: Tue, 28 Aug 2018 15:10:38 +0300 Subject: [PATCH 3/5] adding delete pod timing --- chaoskube/chaoskube.go | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/chaoskube/chaoskube.go b/chaoskube/chaoskube.go index 1f2bb513..434efbcd 100644 --- a/chaoskube/chaoskube.go +++ b/chaoskube/chaoskube.go @@ -87,6 +87,7 @@ func New(client kubernetes.Interface, labels, annotations, namespaces labels.Sel func (c *Chaoskube) Run(ctx context.Context, next <-chan time.Time) { for { metrics.RunCounter.Inc() + if err := c.TerminateVictim(); err != nil { metrics.ErrorCounter.Inc() c.Logger.WithField("err", err).Error("failed to terminate victim") @@ -194,7 +195,11 @@ func (c *Chaoskube) DeletePod(victim v1.Pod) error { return nil } - return c.Client.CoreV1().Pods(victim.Namespace).Delete(victim.Name, nil) + start := time.Now() + e := c.Client.CoreV1().Pods(victim.Namespace).Delete(victim.Name, nil) + metrics.TerminationHistogram.Observe(time.Since(start).Seconds()) + + return e } // filterByNamespaces filters a list of pods by a given namespace selector. From 7e9655ccf2fbb07ff37263ea88da357f57886bd2 Mon Sep 17 00:00:00 2001 From: Shai Katz Date: Tue, 28 Aug 2018 15:14:13 +0300 Subject: [PATCH 4/5] add missing file --- metrics/metrics.go | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) create mode 100644 metrics/metrics.go diff --git a/metrics/metrics.go b/metrics/metrics.go new file mode 100644 index 00000000..585f9b68 --- /dev/null +++ b/metrics/metrics.go @@ -0,0 +1,29 @@ +package metrics + +import ( + "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/client_golang/prometheus/promauto" +) + +var ( + // PodsDeletedCounter is the pods deleted counter + PodsDeletedCounter = promauto.NewCounter(prometheus.CounterOpts{ + Name: "pods_deleted", + Help: "The total number of pods deleted", + }) + // RunCounter is the run function executions counter + RunCounter = promauto.NewCounter(prometheus.CounterOpts{ + Name: "run_counts", + Help: "The total number of pod termination logic runs", + }) + // ErrorCounter is the run function executions counter + ErrorCounter = promauto.NewCounter(prometheus.CounterOpts{ + Name: "termination_errors", + Help: "The total number of errors on terminate victim operation", + }) + // errorCounter is the run function executions counter + TerminationHistogram = promauto.NewHistogram(prometheus.HistogramOpts{ + Name: "termination_time_milliseconds", + Help: "The time took single pod termination to finish", + }) +) From 16a02969596ac4b6a53484bee2235fa32403333f Mon Sep 17 00:00:00 2001 From: Shai Katz Date: Mon, 3 Sep 2018 16:41:33 +0300 Subject: [PATCH 5/5] Update metrics.go --- metrics/metrics.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/metrics/metrics.go b/metrics/metrics.go index 585f9b68..9d86d8ae 100644 --- a/metrics/metrics.go +++ b/metrics/metrics.go @@ -23,7 +23,7 @@ var ( }) // errorCounter is the run function executions counter TerminationHistogram = promauto.NewHistogram(prometheus.HistogramOpts{ - Name: "termination_time_milliseconds", + Name: "termination_time_seconds", Help: "The time took single pod termination to finish", }) )