diff --git a/notify/notify.go b/notify/notify.go index d3f13fb962..fc04cbbe32 100644 --- a/notify/notify.go +++ b/notify/notify.go @@ -262,7 +262,7 @@ func NewMetrics(r prometheus.Registerer) *Metrics { Namespace: "alertmanager", Name: "notifications_failed_total", Help: "The total number of failed notifications.", - }, []string{"integration", "code"}), + }, []string{"integration", "reason"}), numNotificationRequestsTotal: prometheus.NewCounterVec(prometheus.CounterOpts{ Namespace: "alertmanager", Name: "notification_requests_total", @@ -297,8 +297,8 @@ func NewMetrics(r prometheus.Registerer) *Metrics { m.numNotificationRequestsFailedTotal.WithLabelValues(integration) m.notificationLatencySeconds.WithLabelValues(integration) - for _, code := range possibleFailureStatusCategory { - m.numTotalFailedNotifications.WithLabelValues(integration, code) + for _, reason := range possibleFailureReasonCategory { + m.numTotalFailedNotifications.WithLabelValues(integration, reason) } } r.MustRegister( @@ -666,10 +666,10 @@ func (r RetryStage) Exec(ctx context.Context, l log.Logger, alerts ...*types.Ale r.metrics.numNotifications.WithLabelValues(r.integration.Name()).Inc() ctx, alerts, err := r.exec(ctx, l, alerts...) - failureReason := defaultFailureReason + failureReason := DefaultReason.String() if err != nil { if e, ok := errors.Cause(err).(*ErrorWithReason); ok { - failureReason = e.Reason + failureReason = e.Reason.String() } r.metrics.numTotalFailedNotifications.WithLabelValues(r.integration.Name(), failureReason).Inc() } diff --git a/notify/notify_test.go b/notify/notify_test.go index 6950eadd15..fb4a9a253d 100644 --- a/notify/notify_test.go +++ b/notify/notify_test.go @@ -425,27 +425,25 @@ func TestRetryStageWithError(t *testing.T) { func TestRetryStageWithErrorCode(t *testing.T) { testcases := map[string]struct { - reason string - reasonlabel string - expectedCount int + isNewErrorWithReason bool + reason Reason + reasonlabel string + expectedCount int }{ - "for clientError": {reason: clientError, reasonlabel: clientError, expectedCount: 1}, - "for serverError": {reason: serverError, reasonlabel: serverError, expectedCount: 1}, - "for unexpected code": {reason: "unexpected", reasonlabel: defaultFailureReason, expectedCount: 1}, + "for clientError": {isNewErrorWithReason: true, reason: ClientErrorReason, reasonlabel: ClientErrorReason.String(), expectedCount: 1}, + "for serverError": {isNewErrorWithReason: true, reason: ServerErrorReason, reasonlabel: ServerErrorReason.String(), expectedCount: 1}, + "for unexpected code": {isNewErrorWithReason: false, reason: DefaultReason, reasonlabel: DefaultReason.String(), expectedCount: 1}, } for _, testData := range testcases { - fail, retry := true, false - sent := []*types.Alert{} + retry := false testData := testData i := Integration{ name: "test", notifier: notifierFunc(func(ctx context.Context, alerts ...*types.Alert) (bool, error) { - if fail { - fail = false - return retry, NewErrorWithReason(testData.reason, errors.New("fail to deliver notification")) + if !testData.isNewErrorWithReason { + return retry, errors.New("fail to deliver notification") } - sent = append(sent, alerts...) - return false, nil + return retry, NewErrorWithReason(testData.reason, errors.New("fail to deliver notification")) }), rs: sendResolved(false), } diff --git a/notify/util.go b/notify/util.go index 98d8943641..21c41a0538 100644 --- a/notify/util.go +++ b/notify/util.go @@ -30,17 +30,6 @@ import ( "github.com/prometheus/alertmanager/types" ) -// defaultFailureReason is the default reason for numTotalFailedNotifications metric -const defaultFailureReason = "other" - -const ( - clientError = "clientError" - serverError = "ServerError" -) - -// possibleFailureStatusCategory is a list of possible failure reason -var possibleFailureStatusCategory = []string{clientError, serverError, defaultFailureReason} - // UserAgentHeader is the default User-Agent for notification requests var UserAgentHeader = fmt.Sprintf("Alertmanager/%s", version.Version) @@ -225,10 +214,10 @@ type ErrorWithReason struct { Err error // The reason of the failure. - Reason string + Reason Reason } -func NewErrorWithReason(reason string, err error) *ErrorWithReason { +func NewErrorWithReason(reason Reason, err error) *ErrorWithReason { return &ErrorWithReason{ Err: err, Reason: reason, @@ -239,16 +228,41 @@ func (e *ErrorWithReason) Error() string { return e.Err.Error() } +// Reason is the failure reason +type Reason int + +const ( + DefaultReason Reason = iota + ClientErrorReason + ServerErrorReason +) + +func (s Reason) String() string { + switch s { + case DefaultReason: + return "other" + case ClientErrorReason: + return "clientError" + case ServerErrorReason: + return "serverError" + default: + panic(fmt.Sprintf("unknown Reason: %d", s)) + } +} + +// possibleFailureReasonCategory is a list of possible failure reason +var possibleFailureReasonCategory = []string{DefaultReason.String(), ClientErrorReason.String(), ServerErrorReason.String()} + // GetFailureReasonFromStatusCode return the reason for failure request // the status starts with 4 will return 4xx and starts with 5 will return 5xx // other than 4xx and 5xx input status will return an 5xx. -func GetFailureReasonFromStatusCode(statusCode int) string { +func GetFailureReasonFromStatusCode(statusCode int) Reason { if statusCode/100 == 4 { - return clientError + return ClientErrorReason } if statusCode/100 == 5 { - return serverError + return ServerErrorReason } - return defaultFailureReason + return DefaultReason }