From 655947d7e0ea0a15815a6348fcf74a7ca79f8e91 Mon Sep 17 00:00:00 2001 From: Simon Pasquier Date: Fri, 2 Aug 2019 16:17:40 +0200 Subject: [PATCH] notify: refactor code to retry requests (#1974) * notify: refactor code to retry requests Signed-off-by: Simon Pasquier * s/Process/Check/ Signed-off-by: Simon Pasquier --- notify/hipchat/hipchat.go | 26 +++----- notify/hipchat/hipchat_test.go | 11 +++- notify/opsgenie/opsgenie.go | 38 ++++-------- notify/opsgenie/opsgenie_test.go | 43 +++---------- notify/pagerduty/pagerduty.go | 63 +++++++------------ notify/pagerduty/pagerduty_test.go | 44 ++++++++------ notify/pushover/pushover.go | 37 ++++-------- notify/pushover/pushover_test.go | 11 +++- notify/slack/slack.go | 37 ++++-------- notify/slack/slack_test.go | 50 +++------------ notify/util.go | 57 +++++++++++++++++- notify/util_test.go | 97 ++++++++++++++++++++++++++++++ notify/victorops/victorops.go | 26 +++----- notify/victorops/victorops_test.go | 12 +++- notify/webhook/webhook.go | 29 +++++---- notify/webhook/webhook_test.go | 18 +++++- 16 files changed, 334 insertions(+), 265 deletions(-) diff --git a/notify/hipchat/hipchat.go b/notify/hipchat/hipchat.go index 9dc2c63416..98bb1476a4 100644 --- a/notify/hipchat/hipchat.go +++ b/notify/hipchat/hipchat.go @@ -31,10 +31,11 @@ import ( // Notifier implements a Notifier for Hipchat notifications. type Notifier struct { - conf *config.HipchatConfig - tmpl *template.Template - logger log.Logger - client *http.Client + conf *config.HipchatConfig + tmpl *template.Template + logger log.Logger + client *http.Client + retrier *notify.Retrier } // New returns a new Hipchat notification handler. @@ -48,6 +49,10 @@ func New(c *config.HipchatConfig, t *template.Template, l log.Logger) (*Notifier tmpl: t, logger: l, client: client, + // Response codes 429 (rate limiting) and 5xx can potentially recover. + // 2xx response codes indicate successful requests. + // https://developer.atlassian.com/hipchat/guide/hipchat-rest-api/api-response-codes + retrier: ¬ify.Retrier{RetryCodes: []int{http.StatusTooManyRequests}}, }, nil } @@ -103,16 +108,5 @@ func (n *Notifier) Notify(ctx context.Context, as ...*types.Alert) (bool, error) } defer notify.Drain(resp) - return n.retry(resp.StatusCode) -} - -func (n *Notifier) retry(statusCode int) (bool, error) { - // Response codes 429 (rate limiting) and 5xx can potentially recover. - // 2xx response codes indicate successful requests. - // https://developer.atlassian.com/hipchat/guide/hipchat-rest-api/api-response-codes - if statusCode/100 != 2 { - return (statusCode == 429 || statusCode/100 == 5), fmt.Errorf("unexpected status code %v", statusCode) - } - - return false, nil + return n.retrier.Check(resp.StatusCode, nil) } diff --git a/notify/hipchat/hipchat_test.go b/notify/hipchat/hipchat_test.go index 24522be806..7c35430ee8 100644 --- a/notify/hipchat/hipchat_test.go +++ b/notify/hipchat/hipchat_test.go @@ -27,10 +27,17 @@ import ( ) func TestHipchatRetry(t *testing.T) { - notifier := new(Notifier) + notifier, err := New( + &config.HipchatConfig{ + HTTPConfig: &commoncfg.HTTPClientConfig{}, + }, + test.CreateTmpl(t), + log.NewNopLogger(), + ) + require.NoError(t, err) retryCodes := append(test.DefaultRetryCodes(), http.StatusTooManyRequests) for statusCode, expected := range test.RetryTests(retryCodes) { - actual, _ := notifier.retry(statusCode) + actual, _ := notifier.retrier.Check(statusCode, nil) require.Equal(t, expected, actual, fmt.Sprintf("error on status %d", statusCode)) } } diff --git a/notify/opsgenie/opsgenie.go b/notify/opsgenie/opsgenie.go index 3e23a435fe..41185ed9ab 100644 --- a/notify/opsgenie/opsgenie.go +++ b/notify/opsgenie/opsgenie.go @@ -18,8 +18,6 @@ import ( "context" "encoding/json" "fmt" - "io" - "io/ioutil" "net/http" "strings" @@ -37,10 +35,11 @@ import ( // Notifier implements a Notifier for OpsGenie notifications. type Notifier struct { - conf *config.OpsGenieConfig - tmpl *template.Template - logger log.Logger - client *http.Client + conf *config.OpsGenieConfig + tmpl *template.Template + logger log.Logger + client *http.Client + retrier *notify.Retrier } // New returns a new OpsGenie notifier. @@ -49,7 +48,13 @@ func New(c *config.OpsGenieConfig, t *template.Template, l log.Logger) (*Notifie if err != nil { return nil, err } - return &Notifier{conf: c, tmpl: t, logger: l, client: client}, nil + return &Notifier{ + conf: c, + tmpl: t, + logger: l, + client: client, + retrier: ¬ify.Retrier{RetryCodes: []int{http.StatusTooManyRequests}}, + }, nil } type opsGenieCreateMessage struct { @@ -88,7 +93,7 @@ func (n *Notifier) Notify(ctx context.Context, as ...*types.Alert) (bool, error) } defer notify.Drain(resp) - return n.retry(resp.StatusCode, resp.Body) + return n.retrier.Check(resp.StatusCode, resp.Body) } // Like Split but filter out empty strings. @@ -191,20 +196,3 @@ func (n *Notifier) createRequest(ctx context.Context, as ...*types.Alert) (*http req.Header.Set("Authorization", fmt.Sprintf("GenieKey %s", apiKey)) return req.WithContext(ctx), true, nil } - -func (n *Notifier) retry(statusCode int, body io.Reader) (bool, error) { - if statusCode/100 == 2 { - return false, nil - } - - err := errors.Errorf("unexpected status code %v", statusCode) - if body != nil { - if bs, errRead := ioutil.ReadAll(body); errRead == nil { - err = errors.Errorf("%s: %s", err, string(bs)) - } - } - - // https://docs.opsgenie.com/docs/response#section-response-codes - // Response codes 429 (rate limiting) and 5xx are potentially recoverable - return statusCode/100 == 5 || statusCode == 429, err -} diff --git a/notify/opsgenie/opsgenie_test.go b/notify/opsgenie/opsgenie_test.go index fc16c06420..7fb3489e43 100644 --- a/notify/opsgenie/opsgenie_test.go +++ b/notify/opsgenie/opsgenie_test.go @@ -14,10 +14,8 @@ package opsgenie import ( - "bytes" "context" "fmt" - "io" "io/ioutil" "net/http" "net/url" @@ -36,45 +34,22 @@ import ( ) func TestOpsGenieRetry(t *testing.T) { - notifier := new(Notifier) + notifier, err := New( + &config.OpsGenieConfig{ + HTTPConfig: &commoncfg.HTTPClientConfig{}, + }, + test.CreateTmpl(t), + log.NewNopLogger(), + ) + require.NoError(t, err) retryCodes := append(test.DefaultRetryCodes(), http.StatusTooManyRequests) for statusCode, expected := range test.RetryTests(retryCodes) { - actual, _ := notifier.retry(statusCode, nil) + actual, _ := notifier.retrier.Check(statusCode, nil) require.Equal(t, expected, actual, fmt.Sprintf("error on status %d", statusCode)) } } -func TestOpsGenieErr(t *testing.T) { - notifier := new(Notifier) - for _, tc := range []struct { - status int - body io.Reader - expected string - }{ - { - status: http.StatusUnprocessableEntity, - body: nil, - expected: "unexpected status code 422", - }, - { - status: http.StatusUnprocessableEntity, - body: bytes.NewBuffer([]byte(`{"message":"Request body is not processable. Please check the errors.","errors":{"priority":"should be one of [ P1, P2, P3, P4, P5 ]"},"took":0.002,"requestId":"865a4f83-99d9-48c8-9550-42a375a3a387"}`)), - expected: `unexpected status code 422: {"message":"Request body is not processable. Please check the errors.","errors":{"priority":"should be one of [ P1, P2, P3, P4, P5 ]"},"took":0.002,"requestId":"865a4f83-99d9-48c8-9550-42a375a3a387"}`, - }, - { - status: http.StatusInternalServerError, - body: bytes.NewBuffer([]byte("internal error")), - expected: "unexpected status code 500: internal error", - }, - } { - t.Run("", func(t *testing.T) { - _, err := notifier.retry(tc.status, tc.body) - require.Equal(t, err.Error(), tc.expected) - }) - } -} - func TestOpsGenieRedactedURL(t *testing.T) { ctx, u, fn := test.GetContextWithCancelingURL() defer fn() diff --git a/notify/pagerduty/pagerduty.go b/notify/pagerduty/pagerduty.go index b839d94b05..04207fe9aa 100644 --- a/notify/pagerduty/pagerduty.go +++ b/notify/pagerduty/pagerduty.go @@ -35,11 +35,12 @@ import ( // Notifier implements a Notifier for PagerDuty notifications. type Notifier struct { - conf *config.PagerdutyConfig - tmpl *template.Template - logger log.Logger - apiV1 string // for tests. - client *http.Client + conf *config.PagerdutyConfig + tmpl *template.Template + logger log.Logger + apiV1 string // for tests. + client *http.Client + retrier *notify.Retrier } // New returns a new PagerDuty notifier. @@ -51,6 +52,13 @@ func New(c *config.PagerdutyConfig, t *template.Template, l log.Logger) (*Notifi n := &Notifier{conf: c, tmpl: t, logger: l, client: client} if c.ServiceKey != "" { n.apiV1 = "https://events.pagerduty.com/generic/2010-04-15/create_event.json" + // Retrying can solve the issue on 403 (rate limiting) and 5xx response codes. + // https://v2.developer.pagerduty.com/docs/trigger-events + n.retrier = ¬ify.Retrier{RetryCodes: []int{http.StatusForbidden}, CustomDetailsFunc: errDetails} + } else { + // Retrying can solve the issue on 429 (rate limiting) and 5xx response codes. + // https://v2.developer.pagerduty.com/docs/events-api-v2#api-response-codes--retry-logic + n.retrier = ¬ify.Retrier{RetryCodes: []int{http.StatusTooManyRequests}, CustomDetailsFunc: errDetails} } return n, nil } @@ -142,7 +150,7 @@ func (n *Notifier) notifyV1( } defer notify.Drain(resp) - return n.retryV1(resp) + return n.retrier.Check(resp.StatusCode, resp.Body) } func (n *Notifier) notifyV2( @@ -210,7 +218,7 @@ func (n *Notifier) notifyV2( } defer notify.Drain(resp) - return n.retryV2(resp) + return n.retrier.Check(resp.StatusCode, resp.Body) } // Notify implements the Notifier interface. @@ -248,44 +256,19 @@ func (n *Notifier) Notify(ctx context.Context, as ...*types.Alert) (bool, error) return n.notifyV2(ctx, eventType, key, data, details, as...) } -func pagerDutyErr(status int, body io.Reader) error { +func errDetails(status int, body io.Reader) string { // See https://v2.developer.pagerduty.com/docs/trigger-events for the v1 events API. // See https://v2.developer.pagerduty.com/docs/send-an-event-events-api-v2 for the v2 events API. - type pagerDutyResponse struct { + if status != http.StatusBadRequest || body == nil { + return "" + } + var pgr struct { Status string `json:"status"` Message string `json:"message"` Errors []string `json:"errors"` } - if status == http.StatusBadRequest && body != nil { - var r pagerDutyResponse - if err := json.NewDecoder(body).Decode(&r); err == nil { - return fmt.Errorf("%s: %s", r.Message, strings.Join(r.Errors, ",")) - } - } - return fmt.Errorf("unexpected status code: %v", status) -} - -func (n *Notifier) retryV1(resp *http.Response) (bool, error) { - // Retrying can solve the issue on 403 (rate limiting) and 5xx response codes. - // 2xx response codes indicate a successful request. - // https://v2.developer.pagerduty.com/docs/trigger-events - statusCode := resp.StatusCode - - if statusCode/100 != 2 { - return (statusCode == http.StatusForbidden || statusCode/100 == 5), pagerDutyErr(statusCode, resp.Body) + if err := json.NewDecoder(body).Decode(&pgr); err != nil { + return "" } - return false, nil -} - -func (n *Notifier) retryV2(resp *http.Response) (bool, error) { - // Retrying can solve the issue on 429 (rate limiting) and 5xx response codes. - // 2xx response codes indicate a successful request. - // https://v2.developer.pagerduty.com/docs/events-api-v2#api-response-codes--retry-logic - statusCode := resp.StatusCode - - if statusCode/100 != 2 { - return (statusCode == http.StatusTooManyRequests || statusCode/100 == 5), pagerDutyErr(statusCode, resp.Body) - } - - return false, nil + return fmt.Sprintf("%s: %s", pgr.Message, strings.Join(pgr.Errors, ",")) } diff --git a/notify/pagerduty/pagerduty_test.go b/notify/pagerduty/pagerduty_test.go index 1a50eff98e..27d85bb735 100644 --- a/notify/pagerduty/pagerduty_test.go +++ b/notify/pagerduty/pagerduty_test.go @@ -29,27 +29,37 @@ import ( ) func TestPagerDutyRetryV1(t *testing.T) { - notifier := new(Notifier) + notifier, err := New( + &config.PagerdutyConfig{ + ServiceKey: config.Secret("01234567890123456789012345678901"), + HTTPConfig: &commoncfg.HTTPClientConfig{}, + }, + test.CreateTmpl(t), + log.NewNopLogger(), + ) + require.NoError(t, err) retryCodes := append(test.DefaultRetryCodes(), http.StatusForbidden) for statusCode, expected := range test.RetryTests(retryCodes) { - resp := &http.Response{ - StatusCode: statusCode, - } - actual, _ := notifier.retryV1(resp) + actual, _ := notifier.retrier.Check(statusCode, nil) require.Equal(t, expected, actual, fmt.Sprintf("retryv1 - error on status %d", statusCode)) } } func TestPagerDutyRetryV2(t *testing.T) { - notifier := new(Notifier) + notifier, err := New( + &config.PagerdutyConfig{ + RoutingKey: config.Secret("01234567890123456789012345678901"), + HTTPConfig: &commoncfg.HTTPClientConfig{}, + }, + test.CreateTmpl(t), + log.NewNopLogger(), + ) + require.NoError(t, err) retryCodes := append(test.DefaultRetryCodes(), http.StatusTooManyRequests) for statusCode, expected := range test.RetryTests(retryCodes) { - resp := &http.Response{ - StatusCode: statusCode, - } - actual, _ := notifier.retryV2(resp) + actual, _ := notifier.retrier.Check(statusCode, nil) require.Equal(t, expected, actual, fmt.Sprintf("retryv2 - error on status %d", statusCode)) } } @@ -92,7 +102,7 @@ func TestPagerDutyRedactedURLV2(t *testing.T) { test.AssertNotifyLeaksNoSecret(t, ctx, notifier, key) } -func TestPagerDutyErr(t *testing.T) { +func TestErrDetails(t *testing.T) { for _, tc := range []struct { status int body io.Reader @@ -111,25 +121,23 @@ func TestPagerDutyErr(t *testing.T) { status: http.StatusBadRequest, body: bytes.NewBuffer([]byte(`{"status"}`)), - exp: "unexpected status code: 400", + exp: "", }, { status: http.StatusBadRequest, - body: nil, - exp: "unexpected status code: 400", + exp: "", }, { status: http.StatusTooManyRequests, - body: bytes.NewBuffer([]byte("")), - exp: "unexpected status code: 429", + exp: "", }, } { tc := tc t.Run("", func(t *testing.T) { - err := pagerDutyErr(tc.status, tc.body) - require.Contains(t, err.Error(), tc.exp) + err := errDetails(tc.status, tc.body) + require.Contains(t, err, tc.exp) }) } } diff --git a/notify/pushover/pushover.go b/notify/pushover/pushover.go index c828756cf6..f5e421c9a8 100644 --- a/notify/pushover/pushover.go +++ b/notify/pushover/pushover.go @@ -33,11 +33,12 @@ import ( // Notifier implements a Notifier for Pushover notifications. type Notifier struct { - conf *config.PushoverConfig - tmpl *template.Template - logger log.Logger - client *http.Client - apiURL string // for tests. + conf *config.PushoverConfig + tmpl *template.Template + logger log.Logger + client *http.Client + retrier *notify.Retrier + apiURL string // for tests. } // New returns a new Pushover notifier. @@ -47,11 +48,12 @@ func New(c *config.PushoverConfig, t *template.Template, l log.Logger) (*Notifie return nil, err } return &Notifier{ - conf: c, - tmpl: t, - logger: l, - client: client, - apiURL: "https://api.pushover.net/1/messages.json", + conf: c, + tmpl: t, + logger: l, + client: client, + retrier: ¬ify.Retrier{}, + apiURL: "https://api.pushover.net/1/messages.json", }, nil } @@ -128,18 +130,5 @@ func (n *Notifier) Notify(ctx context.Context, as ...*types.Alert) (bool, error) } defer notify.Drain(resp) - return n.retry(resp.StatusCode) -} - -func (n *Notifier) retry(statusCode int) (bool, error) { - // Only documented behaviour is that 2xx response codes are successful and - // 4xx are unsuccessful, therefore assuming only 5xx are recoverable. - // https://pushover.net/api#response - if statusCode/100 == 5 { - return true, fmt.Errorf("unexpected status code %v", statusCode) - } else if statusCode/100 != 2 { - return false, fmt.Errorf("unexpected status code %v", statusCode) - } - - return false, nil + return n.retrier.Check(resp.StatusCode, nil) } diff --git a/notify/pushover/pushover_test.go b/notify/pushover/pushover_test.go index 8076e67de6..260c94f40c 100644 --- a/notify/pushover/pushover_test.go +++ b/notify/pushover/pushover_test.go @@ -26,9 +26,16 @@ import ( ) func TestPushoverRetry(t *testing.T) { - notifier := new(Notifier) + notifier, err := New( + &config.PushoverConfig{ + HTTPConfig: &commoncfg.HTTPClientConfig{}, + }, + test.CreateTmpl(t), + log.NewNopLogger(), + ) + require.NoError(t, err) for statusCode, expected := range test.RetryTests(test.DefaultRetryCodes()) { - actual, _ := notifier.retry(statusCode) + actual, _ := notifier.retrier.Check(statusCode, nil) require.Equal(t, expected, actual, fmt.Sprintf("error on status %d", statusCode)) } } diff --git a/notify/slack/slack.go b/notify/slack/slack.go index 2f5dea3f19..b5d41592d3 100644 --- a/notify/slack/slack.go +++ b/notify/slack/slack.go @@ -17,9 +17,6 @@ import ( "bytes" "context" "encoding/json" - "fmt" - "io" - "io/ioutil" "net/http" "github.com/go-kit/kit/log" @@ -33,10 +30,11 @@ import ( // Notifier implements a Notifier for Slack notifications. type Notifier struct { - conf *config.SlackConfig - tmpl *template.Template - logger log.Logger - client *http.Client + conf *config.SlackConfig + tmpl *template.Template + logger log.Logger + client *http.Client + retrier *notify.Retrier } // New returns a new Slack notification handler. @@ -47,10 +45,11 @@ func New(c *config.SlackConfig, t *template.Template, l log.Logger) (*Notifier, } return &Notifier{ - conf: c, - tmpl: t, - logger: l, - client: client, + conf: c, + tmpl: t, + logger: l, + client: client, + retrier: ¬ify.Retrier{}, }, nil } @@ -181,22 +180,8 @@ func (n *Notifier) Notify(ctx context.Context, as ...*types.Alert) (bool, error) } defer notify.Drain(resp) - return n.retry(resp.StatusCode, resp.Body) -} - -func (n *Notifier) retry(statusCode int, body io.Reader) (bool, error) { // Only 5xx response codes are recoverable and 2xx codes are successful. // https://api.slack.com/incoming-webhooks#handling_errors // https://api.slack.com/changelog/2016-05-17-changes-to-errors-for-incoming-webhooks - if statusCode/100 == 2 { - return false, nil - } - - err := fmt.Errorf("unexpected status code %v", statusCode) - if body != nil { - if bs, errRead := ioutil.ReadAll(body); errRead == nil { - err = fmt.Errorf("%s: %q", err, string(bs)) - } - } - return statusCode/100 == 5, err + return n.retrier.Check(resp.StatusCode, resp.Body) } diff --git a/notify/slack/slack_test.go b/notify/slack/slack_test.go index 1f2305d65d..2b77e63ee0 100644 --- a/notify/slack/slack_test.go +++ b/notify/slack/slack_test.go @@ -14,10 +14,7 @@ package slack import ( - "bytes" "fmt" - "io" - "net/http" "testing" "github.com/go-kit/kit/log" @@ -29,48 +26,21 @@ import ( ) func TestSlackRetry(t *testing.T) { - notifier := new(Notifier) + notifier, err := New( + &config.SlackConfig{ + HTTPConfig: &commoncfg.HTTPClientConfig{}, + }, + test.CreateTmpl(t), + log.NewNopLogger(), + ) + require.NoError(t, err) + for statusCode, expected := range test.RetryTests(test.DefaultRetryCodes()) { - actual, _ := notifier.retry(statusCode, nil) + actual, _ := notifier.retrier.Check(statusCode, nil) require.Equal(t, expected, actual, fmt.Sprintf("error on status %d", statusCode)) } } -func TestSlackErr(t *testing.T) { - notifier := new(Notifier) - for _, tc := range []struct { - status int - body io.Reader - expected string - }{ - { - status: http.StatusBadRequest, - body: nil, - expected: "unexpected status code 400", - }, - { - status: http.StatusBadRequest, - body: bytes.NewBuffer([]byte("invalid_payload")), - expected: "unexpected status code 400: \"invalid_payload\"", - }, - { - status: http.StatusNotFound, - body: bytes.NewBuffer([]byte("channel_not_found")), - expected: "unexpected status code 404: \"channel_not_found\"", - }, - { - status: http.StatusInternalServerError, - body: bytes.NewBuffer([]byte("rollup_error")), - expected: "unexpected status code 500: \"rollup_error\"", - }, - } { - t.Run("", func(t *testing.T) { - _, err := notifier.retry(tc.status, tc.body) - require.Contains(t, err.Error(), tc.expected) - }) - } -} - func TestSlackRedactedURL(t *testing.T) { ctx, u, fn := test.GetContextWithCancelingURL() defer fn() diff --git a/notify/util.go b/notify/util.go index 20f8b5f6d3..06f3940831 100644 --- a/notify/util.go +++ b/notify/util.go @@ -24,6 +24,7 @@ import ( "github.com/go-kit/kit/log" "github.com/go-kit/kit/log/level" + "github.com/pkg/errors" "github.com/prometheus/alertmanager/template" "github.com/prometheus/alertmanager/types" @@ -108,7 +109,7 @@ type Key string func ExtractGroupKey(ctx context.Context) (Key, error) { key, ok := GroupKey(ctx) if !ok { - return "", fmt.Errorf("group key missing") + return "", errors.Errorf("group key missing") } return Key(key), nil } @@ -139,3 +140,57 @@ func GetTemplateData(ctx context.Context, tmpl *template.Template, alerts []*typ } return tmpl.Data(recv, groupLabels, alerts...) } + +func readAll(r io.Reader) string { + if r == nil { + return "" + } + bs, err := ioutil.ReadAll(r) + if err != nil { + return "" + } + return string(bs) +} + +// Retrier knows when to retry an HTTP request to a receiver. 2xx status codes +// are successful, anything else is a failure and only 5xx status codes should +// be retried. +type Retrier struct { + // Function to return additional information in the error message. + CustomDetailsFunc func(code int, body io.Reader) string + // Additional HTTP status codes that should be retried. + RetryCodes []int +} + +// Check returns a boolean indicating whether the request should be retried +// and an optional error if the request has failed. If body is not nil, it will +// be included in the error message. +func (r *Retrier) Check(statusCode int, body io.Reader) (bool, error) { + // 2xx responses are considered to be always successful. + if statusCode/100 == 2 { + return false, nil + } + + // 5xx responses are considered to be always retried. + retry := statusCode/100 == 5 + if !retry { + for _, code := range r.RetryCodes { + if code == statusCode { + retry = true + break + } + } + } + + s := fmt.Sprintf("unexpected status code %v", statusCode) + var details string + if r.CustomDetailsFunc != nil { + details = r.CustomDetailsFunc(statusCode, body) + } else { + details = readAll(body) + } + if details != "" { + s = fmt.Sprintf("%s: %s", s, details) + } + return retry, errors.New(s) +} diff --git a/notify/util_test.go b/notify/util_test.go index 3d033b2ca9..d0ac8667a0 100644 --- a/notify/util_test.go +++ b/notify/util_test.go @@ -14,7 +14,11 @@ package notify import ( + "bytes" "fmt" + "io" + "io/ioutil" + "net/http" "testing" "github.com/stretchr/testify/require" @@ -80,3 +84,96 @@ func TestTruncate(t *testing.T) { }) } } + +type brokenReader struct{} + +func (b brokenReader) Read([]byte) (int, error) { + return 0, fmt.Errorf("some error") +} + +func TestRetrierCheck(t *testing.T) { + for _, tc := range []struct { + retrier Retrier + status int + body io.Reader + + retry bool + expectedErr string + }{ + { + retrier: Retrier{}, + status: http.StatusOK, + body: bytes.NewBuffer([]byte("ok")), + + retry: false, + }, + { + retrier: Retrier{}, + status: http.StatusNoContent, + + retry: false, + }, + { + retrier: Retrier{}, + status: http.StatusBadRequest, + + retry: false, + expectedErr: "unexpected status code 400", + }, + { + retrier: Retrier{RetryCodes: []int{http.StatusTooManyRequests}}, + status: http.StatusBadRequest, + body: bytes.NewBuffer([]byte("invalid request")), + + retry: false, + expectedErr: "unexpected status code 400: invalid request", + }, + { + retrier: Retrier{RetryCodes: []int{http.StatusTooManyRequests}}, + status: http.StatusTooManyRequests, + + retry: true, + expectedErr: "unexpected status code 429", + }, + { + retrier: Retrier{}, + status: http.StatusServiceUnavailable, + body: bytes.NewBuffer([]byte("retry later")), + + retry: true, + expectedErr: "unexpected status code 503: retry later", + }, + { + retrier: Retrier{}, + status: http.StatusBadGateway, + body: &brokenReader{}, + + retry: true, + expectedErr: "unexpected status code 502", + }, + { + retrier: Retrier{CustomDetailsFunc: func(status int, b io.Reader) string { + if status != http.StatusServiceUnavailable { + return "invalid" + } + bs, _ := ioutil.ReadAll(b) + return fmt.Sprintf("server response is %q", string(bs)) + }}, + status: http.StatusServiceUnavailable, + body: bytes.NewBuffer([]byte("retry later")), + + retry: true, + expectedErr: "unexpected status code 503: server response is \"retry later\"", + }, + } { + t.Run("", func(t *testing.T) { + retry, err := tc.retrier.Check(tc.status, tc.body) + require.Equal(t, tc.retry, retry) + if tc.expectedErr == "" { + require.NoError(t, err) + return + } + require.EqualError(t, err, tc.expectedErr) + }) + } +} diff --git a/notify/victorops/victorops.go b/notify/victorops/victorops.go index 725a7e86cc..89046484a4 100644 --- a/notify/victorops/victorops.go +++ b/notify/victorops/victorops.go @@ -33,10 +33,11 @@ import ( // Notifier implements a Notifier for VictorOps notifications. type Notifier struct { - conf *config.VictorOpsConfig - tmpl *template.Template - logger log.Logger - client *http.Client + conf *config.VictorOpsConfig + tmpl *template.Template + logger log.Logger + client *http.Client + retrier *notify.Retrier } // New returns a new VictorOps notifier. @@ -50,6 +51,9 @@ func New(c *config.VictorOpsConfig, t *template.Template, l log.Logger) (*Notifi tmpl: t, logger: l, client: client, + // Missing documentation therefore assuming only 5xx response codes are + // recoverable. + retrier: ¬ify.Retrier{}, }, nil } @@ -80,7 +84,7 @@ func (n *Notifier) Notify(ctx context.Context, as ...*types.Alert) (bool, error) } defer notify.Drain(resp) - return n.retry(resp.StatusCode) + return n.retrier.Check(resp.StatusCode, nil) } // Create the JSON payload to be sent to the VictorOps API. @@ -144,15 +148,3 @@ func (n *Notifier) createVictorOpsPayload(ctx context.Context, as ...*types.Aler } return &buf, nil } - -func (n *Notifier) retry(statusCode int) (bool, error) { - // Missing documentation therefore assuming only 5xx response codes are - // recoverable. - if statusCode/100 == 5 { - return true, fmt.Errorf("unexpected status code %v", statusCode) - } else if statusCode/100 != 2 { - return false, fmt.Errorf("unexpected status code %v", statusCode) - } - - return false, nil -} diff --git a/notify/victorops/victorops_test.go b/notify/victorops/victorops_test.go index aceee19a1f..59aad7f539 100644 --- a/notify/victorops/victorops_test.go +++ b/notify/victorops/victorops_test.go @@ -83,9 +83,17 @@ func TestVictorOpsCustomFields(t *testing.T) { } func TestVictorOpsRetry(t *testing.T) { - notifier := new(Notifier) + notifier, err := New( + &config.VictorOpsConfig{ + APIKey: config.Secret("secret"), + HTTPConfig: &commoncfg.HTTPClientConfig{}, + }, + test.CreateTmpl(t), + log.NewNopLogger(), + ) + require.NoError(t, err) for statusCode, expected := range test.RetryTests(test.DefaultRetryCodes()) { - actual, _ := notifier.retry(statusCode) + actual, _ := notifier.retrier.Check(statusCode, nil) require.Equal(t, expected, actual, fmt.Sprintf("error on status %d", statusCode)) } } diff --git a/notify/webhook/webhook.go b/notify/webhook/webhook.go index 347bd9f8ae..01ef383bee 100644 --- a/notify/webhook/webhook.go +++ b/notify/webhook/webhook.go @@ -18,6 +18,7 @@ import ( "context" "encoding/json" "fmt" + "io" "net/http" "github.com/go-kit/kit/log" @@ -35,10 +36,11 @@ var userAgentHeader = fmt.Sprintf("Alertmanager/%s", version.Version) // Notifier implements a Notifier for generic webhooks. type Notifier struct { - conf *config.WebhookConfig - tmpl *template.Template - logger log.Logger - client *http.Client + conf *config.WebhookConfig + tmpl *template.Template + logger log.Logger + client *http.Client + retrier *notify.Retrier } // New returns a new Webhook. @@ -52,6 +54,13 @@ func New(conf *config.WebhookConfig, t *template.Template, l log.Logger) (*Notif tmpl: t, logger: l, client: client, + // Webhooks are assumed to respond with 2xx response codes on a successful + // request and 5xx response codes are assumed to be recoverable. + retrier: ¬ify.Retrier{ + CustomDetailsFunc: func(int, io.Reader) string { + return conf.URL.String() + }, + }, }, nil } @@ -97,15 +106,5 @@ func (n *Notifier) Notify(ctx context.Context, alerts ...*types.Alert) (bool, er } notify.Drain(resp) - return n.retry(resp.StatusCode) -} - -func (n *Notifier) retry(statusCode int) (bool, error) { - // Webhooks are assumed to respond with 2xx response codes on a successful - // request and 5xx response codes are assumed to be recoverable. - if statusCode/100 != 2 { - return (statusCode/100 == 5), fmt.Errorf("unexpected status code %v from %s", statusCode, n.conf.URL) - } - - return false, nil + return n.retrier.Check(resp.StatusCode, nil) } diff --git a/notify/webhook/webhook_test.go b/notify/webhook/webhook_test.go index 6c66b4c35d..2816af1fe1 100644 --- a/notify/webhook/webhook_test.go +++ b/notify/webhook/webhook_test.go @@ -18,6 +18,8 @@ import ( "net/url" "testing" + "github.com/go-kit/kit/log" + commoncfg "github.com/prometheus/common/config" "github.com/stretchr/testify/require" "github.com/prometheus/alertmanager/config" @@ -27,11 +29,21 @@ import ( func TestWebhookRetry(t *testing.T) { u, err := url.Parse("http://example.com") if err != nil { - t.Fatalf("failed to parse URL: %v", err) + require.NoError(t, err) + } + notifier, err := New( + &config.WebhookConfig{ + URL: &config.URL{URL: u}, + HTTPConfig: &commoncfg.HTTPClientConfig{}, + }, + test.CreateTmpl(t), + log.NewNopLogger(), + ) + if err != nil { + require.NoError(t, err) } - notifier := &Notifier{conf: &config.WebhookConfig{URL: &config.URL{URL: u}}} for statusCode, expected := range test.RetryTests(test.DefaultRetryCodes()) { - actual, _ := notifier.retry(statusCode) + actual, _ := notifier.retrier.Check(statusCode, nil) require.Equal(t, expected, actual, fmt.Sprintf("error on status %d", statusCode)) } }