Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add timeout option for webhook notifier. #4137

Merged
merged 2 commits into from
Dec 2, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions config/notifiers.go
Original file line number Diff line number Diff line change
Expand Up @@ -535,6 +535,10 @@ type WebhookConfig struct {
// Alerts exceeding this threshold will be truncated. Setting this to 0
// allows an unlimited number of alerts.
MaxAlerts uint64 `yaml:"max_alerts" json:"max_alerts"`

// Timeout is the maximum time allowed to invoke the webhook. Setting this to 0
// does not impose a timeout.
Timeout time.Duration `yaml:"timeout" json:"timeout"`
}

// UnmarshalYAML implements the yaml.Unmarshaler interface.
Expand Down
7 changes: 7 additions & 0 deletions docs/configuration.md
Original file line number Diff line number Diff line change
Expand Up @@ -1594,6 +1594,13 @@ url_file: <filepath>
# above this threshold are truncated. When leaving this at its default value of
# 0, all alerts are included.
[ max_alerts: <int> | default = 0 ]

# The maximum time to wait for a webhook request to complete, before failing the
# request and allowing it to be retried. The default value of 0s indicates that
# no timeout should be applied.
# NOTE: This will have no effect if set higher than the group_interval.
[ timeout: <duration> | default = 0s ]

```

The Alertmanager
Expand Down
9 changes: 9 additions & 0 deletions notify/webhook/webhook.go
Original file line number Diff line number Diff line change
Expand Up @@ -112,8 +112,17 @@ func (n *Notifier) Notify(ctx context.Context, alerts ...*types.Alert) (bool, er
url = strings.TrimSpace(string(content))
}

if n.conf.Timeout > 0 {
postCtx, cancel := context.WithTimeoutCause(ctx, n.conf.Timeout, fmt.Errorf("configured webhook timeout reached (%s)", n.conf.Timeout))
defer cancel()
ctx = postCtx
}

resp, err := notify.PostJSON(ctx, n.client, url, &buf)
if err != nil {
if ctx.Err() != nil {
err = fmt.Errorf("%w: %w", err, context.Cause(ctx))
}
return true, notify.RedactURL(err)
}
defer notify.Drain(resp)
Expand Down
49 changes: 49 additions & 0 deletions test/with_api_v2/acceptance/send_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -464,3 +464,52 @@ receivers:

t.Log(co.Check())
}

func TestWebhookTimeout(t *testing.T) {
t.Parallel()

// This integration test uses an extended group_interval to check that
// the webhook level timeout has the desired effect, and that notification
// sending is retried in this case.
conf := `
route:
receiver: "default"
group_by: [alertname]
group_wait: 1s
group_interval: 1m
repeat_interval: 1m

receivers:
- name: "default"
webhook_configs:
- url: 'http://%s'
timeout: 500ms
`

at := NewAcceptanceTest(t, &AcceptanceOpts{
Tolerance: 150 * time.Millisecond,
})

co := at.Collector("webhook")
wh := NewWebhook(t, co)

wh.Func = func(ts float64) bool {
// Make some webhook requests slow enough to hit the webhook
// timeout, but not so slow as to hit the dispatcher timeout.
if ts < 3 {
time.Sleep(time.Second)
return true
}
return false
}

am := at.AlertmanagerCluster(fmt.Sprintf(conf, wh.Address()), 1)

am.Push(At(1), Alert("alertname", "test1"))

co.Want(Between(3, 4), Alert("alertname", "test1").Active(1))

at.Run()

t.Log(co.Check())
}