Skip to content

Commit

Permalink
metrics: update MCDRebootErr & MCDPivotErr to add correct node label
Browse files Browse the repository at this point in the history
  • Loading branch information
kikisdeliveryservice committed Oct 26, 2020
1 parent fe5f137 commit a330d1d
Show file tree
Hide file tree
Showing 2 changed files with 6 additions and 6 deletions.
8 changes: 4 additions & 4 deletions pkg/daemon/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ var (
prometheus.GaugeOpts{
Name: "mcd_pivot_err",
Help: "errors encountered during pivot",
}, []string{"pivot_target", "err"})
}, []string{"node", "pivot_target", "err"})

// MCDState is state of mcd for indicated node (ex: degraded)
MCDState = prometheus.NewGaugeVec(
Expand All @@ -59,7 +59,7 @@ var (
MCDRebootErr = prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Name: "mcd_reboot_err",
}, []string{"message", "err"})
}, []string{"node", "message", "err"})

// MCDUpdateState logs completed update or error
MCDUpdateState = prometheus.NewGaugeVec(
Expand Down Expand Up @@ -88,9 +88,9 @@ func registerMCDMetrics() error {
}
}

MCDPivotErr.WithLabelValues("", "").Set(0)
MCDPivotErr.WithLabelValues("", "", "").Set(0)
KubeletHealthState.Set(0)
MCDRebootErr.WithLabelValues("", "").Set(0)
MCDRebootErr.WithLabelValues("", "", "").Set(0)
MCDUpdateState.WithLabelValues("", "").Set(0)

return nil
Expand Down
4 changes: 2 additions & 2 deletions pkg/daemon/update.go
Original file line number Diff line number Diff line change
Expand Up @@ -1614,13 +1614,13 @@ func (dn *Daemon) reboot(rationale string) error {
// either, we just have one for the MCD itself.
if err := rebootCmd.Run(); err != nil {
dn.logSystem("failed to run reboot: %v", err)
MCDRebootErr.WithLabelValues("failed to run reboot", err.Error()).SetToCurrentTime()
MCDRebootErr.WithLabelValues(dn.node.Name, "failed to run reboot", err.Error()).SetToCurrentTime()
}

// wait to be killed via SIGTERM from the kubelet shutting down
time.Sleep(defaultRebootTimeout)

// if everything went well, this should be unreachable.
MCDRebootErr.WithLabelValues("reboot failed", "this error should be unreachable, something is seriously wrong").SetToCurrentTime()
MCDRebootErr.WithLabelValues(dn.node.Name, "reboot failed", "this error should be unreachable, something is seriously wrong").SetToCurrentTime()
return fmt.Errorf("reboot failed; this error should be unreachable, something is seriously wrong")
}

0 comments on commit a330d1d

Please sign in to comment.