Skip to content

Commit

Permalink
ref(metrics): slightly change implementation and naming of metrics
Browse files Browse the repository at this point in the history
  • Loading branch information
Martin Linkhorst committed Jan 8, 2019
1 parent 54e75ab commit 0ee6ac9
Show file tree
Hide file tree
Showing 3 changed files with 52 additions and 28 deletions.
12 changes: 6 additions & 6 deletions chaoskube/chaoskube.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,8 @@ import (
"k8s.io/client-go/tools/record"
"k8s.io/client-go/tools/reference"

"github.com/linki/chaoskube/util"
"github.com/linki/chaoskube/metrics"
"github.com/linki/chaoskube/util"
)

// Chaoskube represents an instance of chaoskube
Expand Down Expand Up @@ -102,15 +102,13 @@ func New(client kubernetes.Interface, labels, annotations, namespaces labels.Sel
// described by channel next. It returns when the given context is canceled.
func (c *Chaoskube) Run(ctx context.Context, next <-chan time.Time) {
for {
metrics.RunCounter.Inc()
if err := c.TerminateVictim(); err != nil {
metrics.ErrorCounter.Inc()
c.Logger.WithField("err", err).Error("failed to terminate victim")
} else {
metrics.PodsDeletedCounter.Inc()
metrics.ErrorsTotal.Inc()
}

c.Logger.Debug("sleeping...")
metrics.IntervalsTotal.Inc()
select {
case <-next:
case <-ctx.Done():
Expand Down Expand Up @@ -212,11 +210,13 @@ func (c *Chaoskube) DeletePod(victim v1.Pod) error {

start := time.Now()
err := c.Client.CoreV1().Pods(victim.Namespace).Delete(victim.Name, deleteOptions(c.GracePeriod))
metrics.TerminationHistogram.Observe(time.Since(start).Seconds())
metrics.TerminationDurationSeconds.Observe(time.Since(start).Seconds())
if err != nil {
return err
}

metrics.PodsDeletedTotal.Inc()

ref, err := reference.GetReference(scheme.Scheme, &victim)
if err != nil {
return err
Expand Down
20 changes: 20 additions & 0 deletions chaoskube/chaoskube_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -234,6 +234,26 @@ func (suite *Suite) TestDeletePod() {
}
}

func (suite *Suite) TestDeletePodNotFound() {
chaoskube := suite.setup(
labels.Everything(),
labels.Everything(),
labels.Everything(),
[]time.Weekday{},
[]util.TimePeriod{},
[]time.Time{},
time.UTC,
time.Duration(0),
false,
10,
)

victim := util.NewPod("default", "foo", v1.PodRunning)

err := chaoskube.DeletePod(victim)
suite.EqualError(err, `pods "foo" not found`)
}

func (suite *Suite) TestTerminateVictim() {
midnight := util.NewTimePeriod(
ThankGodItsFriday{}.Now().Add(-16*time.Hour),
Expand Down
48 changes: 26 additions & 22 deletions metrics/metrics.go
Original file line number Diff line number Diff line change
@@ -1,29 +1,33 @@
package metrics

import (
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promauto"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promauto"
)

var (
// PodsDeletedCounter is the pods deleted counter
PodsDeletedCounter = promauto.NewCounter(prometheus.CounterOpts{
Name: "chaoskube_pods_deleted",
Help: "The total number of pods deleted",
})
// RunCounter is the run function executions counter
RunCounter = promauto.NewCounter(prometheus.CounterOpts{
Name: "chaoskube_run_counts",
Help: "The total number of pod termination logic runs",
})
// ErrorCounter is the run function executions counter
ErrorCounter = promauto.NewCounter(prometheus.CounterOpts{
Name: "chaoskube_termination_errors",
Help: "The total number of errors on terminate victim operation",
})
// errorCounter is the run function executions counter
TerminationHistogram = promauto.NewHistogram(prometheus.HistogramOpts{
Name: "chaoskube_termination_time_seconds",
Help: "The time took single pod termination to finish",
})
// PodsDeletedTotal is the total number of deleted pods.
PodsDeletedTotal = promauto.NewCounter(prometheus.CounterOpts{
Namespace: "chaoskube",
Name: "pods_deleted_total",
Help: "The total number of pods deleted",
})
// IntervalsTotal is the total number of intervals, i.e. call to Run().
IntervalsTotal = promauto.NewCounter(prometheus.CounterOpts{
Namespace: "chaoskube",
Name: "intervals_total",
Help: "The total number of pod termination logic runs",
})
// ErrorsTotal is the total number of errors encountered while trying to terminate pods.
ErrorsTotal = promauto.NewCounter(prometheus.CounterOpts{
Namespace: "chaoskube",
Name: "errors_total",
Help: "The total number of errors on terminate victim operation",
})
// TerminationDurationSeconds is a histogram over the time it took to terminate pods.
TerminationDurationSeconds = promauto.NewHistogram(prometheus.HistogramOpts{
Namespace: "chaoskube",
Name: "termination_duration_seconds",
Help: "The time it took a single pod termination to finish",
})
)

0 comments on commit 0ee6ac9

Please sign in to comment.