From 1c13650f49569ce1bfb75c735f805e692c04ef41 Mon Sep 17 00:00:00 2001 From: Steve Simpson Date: Wed, 27 Nov 2024 16:04:51 +0100 Subject: [PATCH] Add timeout option for webhook notifier. Signed-off-by: Steve Simpson --- config/notifiers.go | 3 ++ notify/webhook/webhook.go | 10 +++++ test/with_api_v2/acceptance/send_test.go | 49 ++++++++++++++++++++++++ 3 files changed, 62 insertions(+) diff --git a/config/notifiers.go b/config/notifiers.go index fe28ca05c4..3ff062c1ad 100644 --- a/config/notifiers.go +++ b/config/notifiers.go @@ -535,6 +535,9 @@ type WebhookConfig struct { // Alerts exceeding this threshold will be truncated. Setting this to 0 // allows an unlimited number of alerts. MaxAlerts uint64 `yaml:"max_alerts" json:"max_alerts"` + + // Timeout is the maximum time allowed to invoke the webhook. + Timeout *time.Duration `yaml:"timeout" json:"timeout"` } // UnmarshalYAML implements the yaml.Unmarshaler interface. diff --git a/notify/webhook/webhook.go b/notify/webhook/webhook.go index 1b5546ddbe..801d5c0f5b 100644 --- a/notify/webhook/webhook.go +++ b/notify/webhook/webhook.go @@ -17,6 +17,7 @@ import ( "bytes" "context" "encoding/json" + "errors" "fmt" "log/slog" "net/http" @@ -112,8 +113,17 @@ func (n *Notifier) Notify(ctx context.Context, alerts ...*types.Alert) (bool, er url = strings.TrimSpace(string(content)) } + if n.conf.Timeout != nil { + postCtx, cancel := context.WithTimeoutCause(ctx, *n.conf.Timeout, fmt.Errorf("configured webhook timeout (%s) reached", *n.conf.Timeout)) + defer cancel() + ctx = postCtx + } + resp, err := notify.PostJSON(ctx, n.client, url, &buf) if err != nil { + if errors.Is(err, context.DeadlineExceeded) && ctx.Err() != nil { + err = context.Cause(ctx) + } return true, notify.RedactURL(err) } defer notify.Drain(resp) diff --git a/test/with_api_v2/acceptance/send_test.go b/test/with_api_v2/acceptance/send_test.go index 4e13d37ca5..fa190f44c1 100644 --- a/test/with_api_v2/acceptance/send_test.go +++ b/test/with_api_v2/acceptance/send_test.go @@ -464,3 +464,52 @@ receivers: t.Log(co.Check()) } + +func TestWebhookTimeout(t *testing.T) { + t.Parallel() + + // This integration test uses an extended group_interval to check that + // the webhook level timeout has the desired effect, and that notification + // sending is retried in this case. + conf := ` +route: + receiver: "default" + group_by: [alertname] + group_wait: 1s + group_interval: 1m + repeat_interval: 1m + +receivers: +- name: "default" + webhook_configs: + - url: 'http://%s' + timeout: 500ms +` + + at := NewAcceptanceTest(t, &AcceptanceOpts{ + Tolerance: 150 * time.Millisecond, + }) + + co := at.Collector("webhook") + wh := NewWebhook(t, co) + + wh.Func = func(ts float64) bool { + // Make some webhook requests slow enough to hit the webhook + // timeout, but not so slow as to hit the dispatcher timeout. + if ts < 3 { + time.Sleep(time.Second) + return true + } + return false + } + + am := at.AlertmanagerCluster(fmt.Sprintf(conf, wh.Address()), 1) + + am.Push(At(1), Alert("alertname", "test1")) + + co.Want(Between(3, 4), Alert("alertname", "test1").Active(1)) + + at.Run() + + t.Log(co.Check()) +}