diff --git a/pkg/daemon/drain.go b/pkg/daemon/drain.go index c10ddfa6dc..ba5adb3fc1 100644 --- a/pkg/daemon/drain.go +++ b/pkg/daemon/drain.go @@ -73,6 +73,7 @@ func (dn *Daemon) drain() error { done <- true failMsg := fmt.Sprintf("failed to drain node : %s after 1 hour", dn.node.Name) dn.recorder.Eventf(getNodeRef(dn.node), corev1.EventTypeWarning, "FailedToDrain", failMsg) + MCDDrainErr.Set(1) return errors.New(failMsg) case <-drainer(): return nil @@ -98,7 +99,6 @@ func (dn *Daemon) performDrain() error { } // We are here, that means we need to cordon and drain node - MCDDrainErr.WithLabelValues(dn.node.Name, "").Set(0) dn.logSystem("Update prepared; beginning drain") startTime := time.Now() @@ -111,7 +111,7 @@ func (dn *Daemon) performDrain() error { dn.logSystem("drain complete") t := time.Since(startTime).Seconds() glog.Infof("Successful drain took %v seconds", t) - MCDDrainErr.WithLabelValues(dn.node.Name, "").Set(0) + MCDDrainErr.Set(0) return nil } diff --git a/pkg/daemon/metrics.go b/pkg/daemon/metrics.go index 5923c257c3..8f6786d2a5 100644 --- a/pkg/daemon/metrics.go +++ b/pkg/daemon/metrics.go @@ -27,12 +27,12 @@ var ( Help: "indicates a successful SSH login", }) - // MCDDrainErr logs errors received during failed drain - MCDDrainErr = prometheus.NewGaugeVec( + // MCDDrainErr logs failed drain + MCDDrainErr = prometheus.NewGauge( prometheus.GaugeOpts{ Name: "mcd_drain_err", - Help: "errors from failed drain", - }, []string{"node", "err"}) + Help: "logs failed drain", + }) // MCDPivotErr shows errors encountered during pivot MCDPivotErr = prometheus.NewGaugeVec( @@ -88,6 +88,7 @@ func registerMCDMetrics() error { } } + MCDDrainErr.Set(0) MCDPivotErr.WithLabelValues("", "", "").Set(0) KubeletHealthState.Set(0) MCDRebootErr.WithLabelValues("", "", "").Set(0)