Skip to content

Commit

Permalink
✨ added reconciles_total metric
Browse files Browse the repository at this point in the history
  • Loading branch information
droot committed Dec 14, 2018
1 parent 43351af commit f497653
Show file tree
Hide file tree
Showing 3 changed files with 143 additions and 7 deletions.
5 changes: 4 additions & 1 deletion pkg/internal/controller/controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -214,13 +214,15 @@ func (c *Controller) processNextWorkItem() bool {
c.Queue.AddRateLimited(req)
log.Error(err, "Reconciler error", "controller", c.Name, "request", req)
ctrlmetrics.ReconcileErrors.WithLabelValues(c.Name).Inc()

ctrlmetrics.ReconcileTotal.WithLabelValues(c.Name, "error").Inc()
return false
} else if result.RequeueAfter > 0 {
c.Queue.AddAfter(req, result.RequeueAfter)
ctrlmetrics.ReconcileTotal.WithLabelValues(c.Name, "requeue_after").Inc()
return true
} else if result.Requeue {
c.Queue.AddRateLimited(req)
ctrlmetrics.ReconcileTotal.WithLabelValues(c.Name, "requeue").Inc()
return true
}

Expand All @@ -231,6 +233,7 @@ func (c *Controller) processNextWorkItem() bool {
// TODO(directxman12): What does 1 mean? Do we want level constants? Do we want levels at all?
log.V(1).Info("Successfully Reconciled", "controller", c.Name, "request", req)

ctrlmetrics.ReconcileTotal.WithLabelValues(c.Name, "success").Inc()
// Return true, don't take a break
return true
}
Expand Down
129 changes: 126 additions & 3 deletions pkg/internal/controller/controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -408,14 +408,137 @@ var _ = Describe("controller", func() {
// TODO(community): write this test
})

Context("prometheus metric reconcile_total", func() {
var reconcileTotal dto.Metric

BeforeEach(func() {
ctrlmetrics.ReconcileTotal.Reset()
})

It("should get updated on successful reconciliation", func(done Done) {
Expect(func() error {
ctrlmetrics.ReconcileTotal.WithLabelValues(ctrl.Name, "success").Write(&reconcileTotal)
if reconcileTotal.GetCounter().GetValue() != 0.0 {
return fmt.Errorf("metric reconcile total not reset")
}
return nil
}()).Should(Succeed())

go func() {
defer GinkgoRecover()
Expect(ctrl.Start(stop)).NotTo(HaveOccurred())
}()
By("Invoking Reconciler which will succeed")
ctrl.Queue.Add(request)

Expect(<-reconciled).To(Equal(request))
Eventually(func() error {
ctrlmetrics.ReconcileTotal.WithLabelValues(ctrl.Name, "success").Write(&reconcileTotal)
if reconcileTotal.GetCounter().GetValue() != 1.0 {
return fmt.Errorf("metric reconcile total not updated")
}
return nil
}, 2.0).Should(Succeed())

close(done)
}, 2.0)

It("should get updated on reconcile errors", func(done Done) {
Expect(func() error {
ctrlmetrics.ReconcileTotal.WithLabelValues(ctrl.Name, "error").Write(&reconcileTotal)
if reconcileTotal.GetCounter().GetValue() != 0.0 {
return fmt.Errorf("metric reconcile total not reset")
}
return nil
}()).Should(Succeed())

fakeReconcile.Err = fmt.Errorf("expected error: reconcile")
go func() {
defer GinkgoRecover()
Expect(ctrl.Start(stop)).NotTo(HaveOccurred())
}()
By("Invoking Reconciler which will give an error")
ctrl.Queue.Add(request)

Expect(<-reconciled).To(Equal(request))
Eventually(func() error {
ctrlmetrics.ReconcileTotal.WithLabelValues(ctrl.Name, "error").Write(&reconcileTotal)
if reconcileTotal.GetCounter().GetValue() != 1.0 {
return fmt.Errorf("metric reconcile total not updated")
}
return nil
}, 2.0).Should(Succeed())

close(done)
}, 2.0)

It("should get updated when reconcile returns with retry enabled", func(done Done) {
Expect(func() error {
ctrlmetrics.ReconcileTotal.WithLabelValues(ctrl.Name, "retry").Write(&reconcileTotal)
if reconcileTotal.GetCounter().GetValue() != 0.0 {
return fmt.Errorf("metric reconcile total not reset")
}
return nil
}()).Should(Succeed())

fakeReconcile.Result.Requeue = true
go func() {
defer GinkgoRecover()
Expect(ctrl.Start(stop)).NotTo(HaveOccurred())
}()
By("Invoking Reconciler which will return result with Requeue enabled")
ctrl.Queue.Add(request)

Expect(<-reconciled).To(Equal(request))
Eventually(func() error {
ctrlmetrics.ReconcileTotal.WithLabelValues(ctrl.Name, "requeue").Write(&reconcileTotal)
if reconcileTotal.GetCounter().GetValue() != 1.0 {
return fmt.Errorf("metric reconcile total not updated")
}
return nil
}, 2.0).Should(Succeed())

close(done)
}, 2.0)

It("should get updated when reconcile returns with retryAfter enabled", func(done Done) {
Expect(func() error {
ctrlmetrics.ReconcileTotal.WithLabelValues(ctrl.Name, "retry_after").Write(&reconcileTotal)
if reconcileTotal.GetCounter().GetValue() != 0.0 {
return fmt.Errorf("metric reconcile total not reset")
}
return nil
}()).Should(Succeed())

fakeReconcile.Result.RequeueAfter = 5 * time.Hour
go func() {
defer GinkgoRecover()
Expect(ctrl.Start(stop)).NotTo(HaveOccurred())
}()
By("Invoking Reconciler which will return result with requeueAfter enabled")
ctrl.Queue.Add(request)

Expect(<-reconciled).To(Equal(request))
Eventually(func() error {
ctrlmetrics.ReconcileTotal.WithLabelValues(ctrl.Name, "requeue_after").Write(&reconcileTotal)
if reconcileTotal.GetCounter().GetValue() != 1.0 {
return fmt.Errorf("metric reconcile total not updated")
}
return nil
}, 2.0).Should(Succeed())

close(done)
}, 2.0)
})

Context("should update prometheus metrics", func() {
It("should requeue a Request if there is an error and continue processing items", func(done Done) {
var queueLength, reconcileErrs dto.Metric
ctrlmetrics.QueueLength.Reset()
Expect(func() error {
ctrlmetrics.QueueLength.WithLabelValues(ctrl.Name).Write(&queueLength)
if queueLength.GetGauge().GetValue() != 0.0 {
return fmt.Errorf("metrics not reset")
return fmt.Errorf("metric queue length not reset")
}
return nil
}()).Should(Succeed())
Expand All @@ -424,7 +547,7 @@ var _ = Describe("controller", func() {
Expect(func() error {
ctrlmetrics.ReconcileErrors.WithLabelValues(ctrl.Name).Write(&reconcileErrs)
if reconcileErrs.GetCounter().GetValue() != 0.0 {
return fmt.Errorf("metrics not reset")
return fmt.Errorf("metric reconcile errors not reset")
}
return nil
}()).Should(Succeed())
Expand All @@ -444,7 +567,7 @@ var _ = Describe("controller", func() {
Eventually(func() error {
ctrlmetrics.QueueLength.WithLabelValues(ctrl.Name).Write(&queueLength)
if queueLength.GetGauge().GetValue() != 1.0 {
return fmt.Errorf("metrics not updated")
return fmt.Errorf("metric queue length not updated")
}
return nil
}, 2.0).Should(Succeed())
Expand Down
16 changes: 13 additions & 3 deletions pkg/internal/controller/metrics/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,24 +29,34 @@ var (
Help: "Length of reconcile queue per controller",
}, []string{"controller"})

// ReconcileTotal is a prometheus counter metrics which holds the total
// number of reconciliations per controller. It has two labels. controller label refers
// to the controller name and result label refers to the reconcile result i.e
// success, error, requeue, requeue_after
ReconcileTotal = prometheus.NewCounterVec(prometheus.CounterOpts{
Name: "controller_runtime_reconcile_total",
Help: "Total number of reconciliations per controller",
}, []string{"controller", "result"})

// ReconcileErrors is a prometheus counter metrics which holds the total
// number of errors from the Reconciler
ReconcileErrors = prometheus.NewCounterVec(prometheus.CounterOpts{
Name: "controller_runtime_reconcile_errors_total",
Help: "Total number of reconcile errors per controller",
Help: "Total number of reconciliation errors per controller",
}, []string{"controller"})

// ReconcileTime is a prometheus metric which keeps track of the duration
// of reconciles
// of reconciliations
ReconcileTime = prometheus.NewHistogramVec(prometheus.HistogramOpts{
Name: "controller_runtime_reconcile_time_seconds",
Help: "Length of time per reconcile per controller",
Help: "Length of time per reconcilation per controller",
}, []string{"controller"})
)

func init() {
metrics.Registry.MustRegister(
QueueLength,
ReconcileTotal,
ReconcileErrors,
ReconcileTime,
// expose process metrics like CPU, Memory, file descriptor usage etc.
Expand Down

0 comments on commit f497653

Please sign in to comment.