Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Handle errors in Slack's postMessage API #3160

Closed
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 29 additions & 1 deletion notify/slack/slack.go
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,11 @@ type attachment struct {
MrkdwnIn []string `json:"mrkdwn_in,omitempty"`
}

type postMessageResponse struct {
Ok bool `json:"ok"`
Error string `json:"error"`
}

// Notify implements the Notifier interface.
func (n *Notifier) Notify(ctx context.Context, as ...*types.Alert) (bool, error) {
var err error
Expand Down Expand Up @@ -205,10 +210,33 @@ func (n *Notifier) Notify(ctx context.Context, as ...*types.Alert) (bool, error)
}
defer notify.Drain(resp)

// Only 5xx response codes are recoverable and 2xx codes are successful.
// Only 5xx response codes are recoverable and 2xx codes are successful (unless using the
// postMessage API, see below).
// https://api.slack.com/incoming-webhooks#handling_errors
// https://api.slack.com/changelog/2016-05-17-changes-to-errors-for-incoming-webhooks
retry, err := n.retrier.Check(resp.StatusCode, resp.Body)
err = errors.Wrap(err, fmt.Sprintf("channel %q", req.Channel))

// For the postMessage API, if the status code is 200, we need to do
// additional checks, since the API returns 200 even for circumstances
// which might typically have been a 4xx response (eg invalid channel)
// https://api.slack.com/methods/chat.postMessage#errors
if resp.StatusCode == 200 && strings.HasSuffix(u, "api/chat.postMessage") {
apiResp := postMessageResponse{}
d := json.NewDecoder(resp.Body)
decErr := d.Decode(&apiResp)
if decErr != nil {
// This should only happen if the Slack API is returning non-json,
// which would presumably only happen in major outages where they
// do not process calls(?). Always retry.
err = errors.Wrap(errors.New("could not decode slack API response"), fmt.Sprintf("channel %q", req.Channel))
retry = true
}
if !apiResp.Ok {
err = errors.Wrap(errors.New(apiResp.Error), fmt.Sprintf("channel %q", req.Channel))
retry = false
}
}

return retry, err
}
70 changes: 70 additions & 0 deletions notify/slack/slack_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,16 +14,23 @@
package slack

import (
"context"
"fmt"
"net/http"
"net/http/httptest"
"net/url"
"os"
"testing"
"time"

"github.com/go-kit/log"
commoncfg "github.com/prometheus/common/config"
"github.com/prometheus/common/model"
"github.com/stretchr/testify/require"

"github.com/prometheus/alertmanager/config"
"github.com/prometheus/alertmanager/notify/test"
"github.com/prometheus/alertmanager/types"
)

func TestSlackRetry(t *testing.T) {
Expand All @@ -42,6 +49,69 @@ func TestSlackRetry(t *testing.T) {
}
}

func TestSlackPostMessageHappyPath(t *testing.T) {
ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
fmt.Fprintln(w, `{"ok": true}`)
}))
defer ts.Close()
tsUrl, err := url.Parse(ts.URL)
require.NoError(t, err)
tsUrl.Path = "api/chat.postMessage"

notifier, err := New(
&config.SlackConfig{
HTTPConfig: &commoncfg.HTTPClientConfig{},
APIURL: &config.SecretURL{URL: tsUrl},
},
test.CreateTmpl(t),
log.NewNopLogger(),
)
require.NoError(t, err)

_, err = notifier.Notify(context.TODO(), &types.Alert{
Alert: model.Alert{
Labels: model.LabelSet{
"lbl1": "val1",
},
StartsAt: time.Now(),
EndsAt: time.Now().Add(time.Hour),
},
})
require.NoError(t, err)
}

func TestSlackPostMessageErrorHandling(t *testing.T) {
ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
fmt.Fprintln(w, `{"ok": false, "error": "no_text"}`)
}))
defer ts.Close()
tsUrl, err := url.Parse(ts.URL)
require.NoError(t, err)
tsUrl.Path = "api/chat.postMessage"

notifier, err := New(
&config.SlackConfig{
HTTPConfig: &commoncfg.HTTPClientConfig{},
APIURL: &config.SecretURL{URL: tsUrl},
},
test.CreateTmpl(t),
log.NewNopLogger(),
)
require.NoError(t, err)

retry, err := notifier.Notify(context.TODO(), &types.Alert{
Alert: model.Alert{
Labels: model.LabelSet{
"lbl1": "val1",
},
StartsAt: time.Now(),
EndsAt: time.Now().Add(time.Hour),
},
})
require.False(t, retry, "Should not retry configuration errors")
require.Error(t, err)
}

func TestSlackRedactedURL(t *testing.T) {
ctx, u, fn := test.GetContextWithCancelingURL()
defer fn()
Expand Down