Skip to content

Commit

Permalink
use status reason rather than code
Browse files Browse the repository at this point in the history
Signed-off-by: Yijie Qin <qinyijie@amazon.com>
  • Loading branch information
qinxx108 committed Jan 12, 2023
1 parent ebbb2f7 commit e10015d
Show file tree
Hide file tree
Showing 4 changed files with 46 additions and 48 deletions.
33 changes: 4 additions & 29 deletions notify/notify.go
Original file line number Diff line number Diff line change
Expand Up @@ -51,17 +51,6 @@ type Peer interface {
// to a notification pipeline.
const MinTimeout = 10 * time.Second

// defaultStatusCodeCategory is the default status code category for numTotalFailedNotifications metric
const defaultStatusCodeCategory = "5xx"

const (
failure4xxCategoryCode = "4xx"
failure5xxCategoryCode = "5xx"
)

// possibleFailureStatusCategory is a list of possible failure status code category
var possibleFailureStatusCategory = []string{failure4xxCategoryCode, failure5xxCategoryCode}

// Notifier notifies about alerts under constraints of the given context. It
// returns an error if unsuccessful and a flag whether the error is
// recoverable. This information is useful for a retry logic.
Expand Down Expand Up @@ -677,12 +666,12 @@ func (r RetryStage) Exec(ctx context.Context, l log.Logger, alerts ...*types.Ale
r.metrics.numNotifications.WithLabelValues(r.integration.Name()).Inc()
ctx, alerts, err := r.exec(ctx, l, alerts...)

statusCodeCategory := defaultStatusCodeCategory
failureReason := defaultFailureReason
if err != nil {
if e, ok := errors.Cause(err).(*ErrorWithStatusCode); ok {
statusCodeCategory = getFailureStatusCodeCategory(e.StatusCode)
if e, ok := errors.Cause(err).(*ErrorWithReason); ok {
failureReason = e.Reason
}
r.metrics.numTotalFailedNotifications.WithLabelValues(r.integration.Name(), statusCodeCategory).Inc()
r.metrics.numTotalFailedNotifications.WithLabelValues(r.integration.Name(), failureReason).Inc()
}
return ctx, alerts, err
}
Expand Down Expand Up @@ -895,17 +884,3 @@ func inTimeIntervals(now time.Time, intervals map[string][]timeinterval.TimeInte
}
return false, nil
}

// getFailureStatusCodeCategory return the status code category for failure request
// the status starts with 4 will return 4xx and starts with 5 will return 5xx
// other than 4xx and 5xx input status will return an 5xx.
func getFailureStatusCodeCategory(statusCode int) string {
if statusCode/100 == 4 {
return failure4xxCategoryCode
}
if statusCode/100 == 5 {
return failure5xxCategoryCode
}

return defaultStatusCodeCategory
}
16 changes: 7 additions & 9 deletions notify/notify_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -425,15 +425,13 @@ func TestRetryStageWithError(t *testing.T) {

func TestRetryStageWithErrorCode(t *testing.T) {
testcases := map[string]struct {
errorcode int
codelabel string
reason string
reasonlabel string
expectedCount int
}{
"for 400": {errorcode: 400, codelabel: failure4xxCategoryCode, expectedCount: 1},
"for 402": {errorcode: 402, codelabel: failure4xxCategoryCode, expectedCount: 1},
"for 500": {errorcode: 500, codelabel: failure5xxCategoryCode, expectedCount: 1},
"for 502": {errorcode: 502, codelabel: failure5xxCategoryCode, expectedCount: 1},
"for expected code 100": {errorcode: 100, codelabel: failure5xxCategoryCode, expectedCount: 1},
"for clientError": {reason: clientError, reasonlabel: clientError, expectedCount: 1},
"for serverError": {reason: serverError, reasonlabel: serverError, expectedCount: 1},
"for unexpected code": {reason: "unexpected", reasonlabel: defaultFailureReason, expectedCount: 1},
}
for _, testData := range testcases {
fail, retry := true, false
Expand All @@ -444,7 +442,7 @@ func TestRetryStageWithErrorCode(t *testing.T) {
notifier: notifierFunc(func(ctx context.Context, alerts ...*types.Alert) (bool, error) {
if fail {
fail = false
return retry, NewErrorWithStatusCode(testData.errorcode, errors.New("fail to deliver notification"))
return retry, NewErrorWithReason(testData.reason, errors.New("fail to deliver notification"))
}
sent = append(sent, alerts...)
return false, nil
Expand All @@ -471,7 +469,7 @@ func TestRetryStageWithErrorCode(t *testing.T) {
resctx, _, err := r.Exec(ctx, log.NewNopLogger(), alerts...)
counter := r.metrics.numTotalFailedNotifications

require.Equal(t, testData.expectedCount, int(prom_testutil.ToFloat64(counter.WithLabelValues(r.integration.Name(), testData.codelabel))))
require.Equal(t, testData.expectedCount, int(prom_testutil.ToFloat64(counter.WithLabelValues(r.integration.Name(), testData.reasonlabel))))

require.NotNil(t, err)
require.NotNil(t, resctx)
Expand Down
4 changes: 2 additions & 2 deletions notify/sns/sns.go
Original file line number Diff line number Diff line change
Expand Up @@ -85,8 +85,8 @@ func (n *Notifier) Notify(ctx context.Context, alert ...*types.Alert) (bool, err
if e, ok := err.(awserr.RequestFailure); ok {
retryable, error := n.retrier.Check(e.StatusCode(), strings.NewReader(e.Message()))

statusErr := notify.NewErrorWithStatusCode(e.StatusCode(), error)
return retryable, statusErr
reasonErr := notify.NewErrorWithReason(notify.GetFailureReasonFromStatusCode(e.StatusCode()), error)
return retryable, reasonErr
}
return true, err
}
Expand Down
41 changes: 33 additions & 8 deletions notify/util.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,17 @@ import (
"github.com/prometheus/alertmanager/types"
)

// defaultFailureReason is the default reason for numTotalFailedNotifications metric
const defaultFailureReason = "other"

const (
clientError = "clientError"
serverError = "ServerError"
)

// possibleFailureStatusCategory is a list of possible failure reason
var possibleFailureStatusCategory = []string{clientError, serverError, defaultFailureReason}

// UserAgentHeader is the default User-Agent for notification requests
var UserAgentHeader = fmt.Sprintf("Alertmanager/%s", version.Version)

Expand Down Expand Up @@ -210,20 +221,34 @@ func (r *Retrier) Check(statusCode int, body io.Reader) (bool, error) {
return retry, errors.New(s)
}

type ErrorWithStatusCode struct {
type ErrorWithReason struct {
Err error

// The status code of the HTTP response.
StatusCode int
// The reason of the failure.
Reason string
}

func NewErrorWithStatusCode(statusCode int, err error) *ErrorWithStatusCode {
return &ErrorWithStatusCode{
Err: err,
StatusCode: statusCode,
func NewErrorWithReason(reason string, err error) *ErrorWithReason {
return &ErrorWithReason{
Err: err,
Reason: reason,
}
}

func (e *ErrorWithStatusCode) Error() string {
func (e *ErrorWithReason) Error() string {
return e.Err.Error()
}

// GetFailureReasonFromStatusCode return the reason for failure request
// the status starts with 4 will return 4xx and starts with 5 will return 5xx
// other than 4xx and 5xx input status will return an 5xx.
func GetFailureReasonFromStatusCode(statusCode int) string {
if statusCode/100 == 4 {
return clientError
}
if statusCode/100 == 5 {
return serverError
}

return defaultFailureReason
}

0 comments on commit e10015d

Please sign in to comment.