diff --git a/CHANGELOG.md b/CHANGELOG.md index cafc70b890e..f9ccc6fade6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,7 @@ ### Grafana Mimir +* [ENHANCEMENT] Alertmanager: Adds metric `cortex_alertmanager_notifications_suppressed_total` that counts the total number of notifications suppressed for being silenced, inhibited, outside of active time intervals or within muted time intervals. * [CHANGE] Alertmanager: Deprecates the `v1` API. All `v1` API endpoints now respond with a JSON deprecation notice and a status code of `410`. All endpoints have a `v2` equivalent. The list of endpoints is: #7103 * `/api/v1/alerts` * `/api/v1/receivers` diff --git a/go.mod b/go.mod index 848a017b112..8777d4d725b 100644 --- a/go.mod +++ b/go.mod @@ -31,7 +31,7 @@ require ( github.com/opentracing-contrib/go-stdlib v1.0.0 github.com/opentracing/opentracing-go v1.2.1-0.20220228012449-10b1cf09e00b github.com/pkg/errors v0.9.1 - github.com/prometheus/alertmanager v0.26.1-0.20240208095903-f69a5086657b + github.com/prometheus/alertmanager v0.26.1-0.20240215111258-80b3cb072fbd github.com/prometheus/client_golang v1.18.0 github.com/prometheus/client_model v0.5.0 github.com/prometheus/common v0.46.0 diff --git a/go.sum b/go.sum index 238e597ae83..2e5b0ff060f 100644 --- a/go.sum +++ b/go.sum @@ -769,8 +769,8 @@ github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 h1:Jamvg5psRI github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/posener/complete v1.1.1/go.mod h1:em0nMJCgc9GFtwrmVmEMR/ZL6WyhyjMBndrE9hABlRI= github.com/posener/complete v1.2.3/go.mod h1:WZIdtGGp+qx0sLrYKtIRAruyNpv6hFCicSgv7Sy7s/s= -github.com/prometheus/alertmanager v0.26.1-0.20240208095903-f69a5086657b h1:qdLfwUabfhvvvOhnObLgRfXo5wq2V3pZSdLhUMgN4QE= -github.com/prometheus/alertmanager v0.26.1-0.20240208095903-f69a5086657b/go.mod h1:8Ia/R3urPmbzJ8OsdvmZvIprDwvwmYCmUbwBL+jlPOE= +github.com/prometheus/alertmanager v0.26.1-0.20240215111258-80b3cb072fbd h1:41+1zd8AibDiY4xov0REU1rNW+Kg+ioVVQRynAXRvZg= +github.com/prometheus/alertmanager v0.26.1-0.20240215111258-80b3cb072fbd/go.mod h1:8Ia/R3urPmbzJ8OsdvmZvIprDwvwmYCmUbwBL+jlPOE= github.com/prometheus/client_golang v0.9.1/go.mod h1:7SWBe2y4D6OKWSNQJUaRYU/AaXPKyh/dDVn+NZz0KFw= github.com/prometheus/client_golang v1.0.0/go.mod h1:db9x61etRT2tGnBNRi70OPL5FsnadC4Ky3P0J6CfImo= github.com/prometheus/client_golang v1.4.0/go.mod h1:e9GMxYsXl05ICDXkRhurwBS4Q3OK1iX/F2sw+iXX5zU= diff --git a/pkg/alertmanager/alertmanager_config_test.go b/pkg/alertmanager/alertmanager_config_test.go index 82c8a09b3c5..bc49d828673 100644 --- a/pkg/alertmanager/alertmanager_config_test.go +++ b/pkg/alertmanager/alertmanager_config_test.go @@ -102,11 +102,11 @@ inhibit_rules: }, expected: []string{ `level=debug user=4 msg="Parsing with UTF-8 matchers parser, with fallback to classic matchers parser" input="foo=" origin=test`, - `level=warn user=4 msg="Alertmanager is moving to a new parser for labels and matchers, and this input is incompatible. Alertmanager has instead parsed the input using the old matchers parser as a fallback. To make this input compatible with the new parser please make sure all regular expressions and values are double-quoted. If you are still seeing this message please open an issue." input="foo=" origin=test err="end of input: expected label value" suggestion="foo=\"\""`, + `level=warn user=4 msg="Alertmanager is moving to a new parser for labels and matchers, and this input is incompatible. Alertmanager has instead parsed the input using the classic matchers parser as a fallback. To make this input compatible with the UTF-8 matchers parser please make sure all regular expressions and values are double-quoted. If you are still seeing this message please open an issue." input="foo=" origin=test err="end of input: expected label value" suggestion="foo=\"\""`, `level=debug user=4 msg="Parsing with UTF-8 matchers parser, with fallback to classic matchers parser" input="bar=" origin=test`, - `level=warn user=4 msg="Alertmanager is moving to a new parser for labels and matchers, and this input is incompatible. Alertmanager has instead parsed the input using the old matchers parser as a fallback. To make this input compatible with the new parser please make sure all regular expressions and values are double-quoted. If you are still seeing this message please open an issue." input="bar=" origin=test err="end of input: expected label value" suggestion="bar=\"\""`, + `level=warn user=4 msg="Alertmanager is moving to a new parser for labels and matchers, and this input is incompatible. Alertmanager has instead parsed the input using the classic matchers parser as a fallback. To make this input compatible with the UTF-8 matchers parser please make sure all regular expressions and values are double-quoted. If you are still seeing this message please open an issue." input="bar=" origin=test err="end of input: expected label value" suggestion="bar=\"\""`, `level=debug user=4 msg="Parsing with UTF-8 matchers parser, with fallback to classic matchers parser" input="baz=" origin=test`, - `level=warn user=4 msg="Alertmanager is moving to a new parser for labels and matchers, and this input is incompatible. Alertmanager has instead parsed the input using the old matchers parser as a fallback. To make this input compatible with the new parser please make sure all regular expressions and values are double-quoted. If you are still seeing this message please open an issue." input="baz=" origin=test err="end of input: expected label value" suggestion="baz=\"\""`, + `level=warn user=4 msg="Alertmanager is moving to a new parser for labels and matchers, and this input is incompatible. Alertmanager has instead parsed the input using the classic matchers parser as a fallback. To make this input compatible with the UTF-8 matchers parser please make sure all regular expressions and values are double-quoted. If you are still seeing this message please open an issue." input="baz=" origin=test err="end of input: expected label value" suggestion="baz=\"\""`, }, }, { name: "config contains disagreement", diff --git a/pkg/alertmanager/alertmanager_metrics.go b/pkg/alertmanager/alertmanager_metrics.go index d68a5a4ca55..79c59b4a07f 100644 --- a/pkg/alertmanager/alertmanager_metrics.go +++ b/pkg/alertmanager/alertmanager_metrics.go @@ -26,6 +26,7 @@ type alertmanagerMetrics struct { numFailedNotifications *prometheus.Desc numNotificationRequestsTotal *prometheus.Desc numNotificationRequestsFailedTotal *prometheus.Desc + numNotificationSuppressedTotal *prometheus.Desc notificationLatencySeconds *prometheus.Desc // exported metrics, gathered from Alertmanager nflog @@ -107,6 +108,10 @@ func newAlertmanagerMetrics() *alertmanagerMetrics { "cortex_alertmanager_notification_requests_failed_total", "The total number of failed notification requests.", []string{"user", "integration"}, nil), + numNotificationSuppressedTotal: prometheus.NewDesc( + "cortex_alertmanager_notifications_suppressed_total", + "The total number of notifications suppressed for being silenced, inhibited, outside of active time intervals or within muted time intervals.", + []string{"user", "reason"}, nil), notificationLatencySeconds: prometheus.NewDesc( "cortex_alertmanager_notification_latency_seconds", "The latency of notifications in seconds.", @@ -287,6 +292,7 @@ func (m *alertmanagerMetrics) Describe(out chan<- *prometheus.Desc) { out <- m.numFailedNotifications out <- m.numNotificationRequestsTotal out <- m.numNotificationRequestsFailedTotal + out <- m.numNotificationSuppressedTotal out <- m.notificationLatencySeconds out <- m.markerAlerts out <- m.nflogGCDuration @@ -339,6 +345,7 @@ func (m *alertmanagerMetrics) Collect(out chan<- prometheus.Metric) { data.SendSumOfCountersPerTenant(out, m.numFailedNotifications, "alertmanager_notifications_failed_total", dskit_metrics.WithLabels("integration", "reason"), dskit_metrics.WithSkipZeroValueMetrics) data.SendSumOfCountersPerTenant(out, m.numNotificationRequestsTotal, "alertmanager_notification_requests_total", dskit_metrics.WithLabels("integration"), dskit_metrics.WithSkipZeroValueMetrics) data.SendSumOfCountersPerTenant(out, m.numNotificationRequestsFailedTotal, "alertmanager_notification_requests_failed_total", dskit_metrics.WithLabels("integration"), dskit_metrics.WithSkipZeroValueMetrics) + data.SendSumOfCountersPerTenant(out, m.numNotificationSuppressedTotal, "alertmanager_notifications_suppressed_total", dskit_metrics.WithLabels("reason"), dskit_metrics.WithSkipZeroValueMetrics) data.SendSumOfHistograms(out, m.notificationLatencySeconds, "alertmanager_notification_latency_seconds") data.SendSumOfGaugesPerTenantWithLabels(out, m.markerAlerts, "alertmanager_alerts", "state") diff --git a/pkg/alertmanager/alertmanager_metrics_test.go b/pkg/alertmanager/alertmanager_metrics_test.go index a09a3169681..7c69c9c0e91 100644 --- a/pkg/alertmanager/alertmanager_metrics_test.go +++ b/pkg/alertmanager/alertmanager_metrics_test.go @@ -145,6 +145,17 @@ func TestAlertmanagerMetricsStore(t *testing.T) { cortex_alertmanager_notifications_failed_total{integration="telegram",reason="clientError",user="user1"} 9 cortex_alertmanager_notifications_failed_total{integration="telegram",reason="clientError",user="user2"} 90 cortex_alertmanager_notifications_failed_total{integration="telegram",reason="clientError",user="user3"} 900 + # HELP cortex_alertmanager_notifications_suppressed_total The total number of notifications suppressed for being silenced, inhibited, outside of active time intervals or within muted time intervals. + # TYPE cortex_alertmanager_notifications_suppressed_total counter + cortex_alertmanager_notifications_suppressed_total{reason="active_time_interval",user="user1"} 3 + cortex_alertmanager_notifications_suppressed_total{reason="active_time_interval",user="user2"} 30 + cortex_alertmanager_notifications_suppressed_total{reason="active_time_interval",user="user3"} 300 + cortex_alertmanager_notifications_suppressed_total{reason="inhibition",user="user1"} 1 + cortex_alertmanager_notifications_suppressed_total{reason="inhibition",user="user2"} 10 + cortex_alertmanager_notifications_suppressed_total{reason="inhibition",user="user3"} 100 + cortex_alertmanager_notifications_suppressed_total{reason="mute_time_interval",user="user1"} 2 + cortex_alertmanager_notifications_suppressed_total{reason="mute_time_interval",user="user2"} 20 + cortex_alertmanager_notifications_suppressed_total{reason="mute_time_interval",user="user3"} 200 # HELP cortex_alertmanager_notification_requests_total The total number of attempted notification requests. # TYPE cortex_alertmanager_notification_requests_total counter cortex_alertmanager_notification_requests_total{integration="opsgenie",user="user1"} 5 @@ -531,6 +542,18 @@ func TestAlertmanagerMetricsRemoval(t *testing.T) { cortex_alertmanager_notifications_failed_total{integration="telegram",reason="clientError",user="user2"} 90 cortex_alertmanager_notifications_failed_total{integration="telegram",reason="clientError",user="user3"} 900 + # HELP cortex_alertmanager_notifications_suppressed_total The total number of notifications suppressed for being silenced, inhibited, outside of active time intervals or within muted time intervals. + # TYPE cortex_alertmanager_notifications_suppressed_total counter + cortex_alertmanager_notifications_suppressed_total{reason="active_time_interval",user="user1"} 3 + cortex_alertmanager_notifications_suppressed_total{reason="active_time_interval",user="user2"} 30 + cortex_alertmanager_notifications_suppressed_total{reason="active_time_interval",user="user3"} 300 + cortex_alertmanager_notifications_suppressed_total{reason="inhibition",user="user1"} 1 + cortex_alertmanager_notifications_suppressed_total{reason="inhibition",user="user2"} 10 + cortex_alertmanager_notifications_suppressed_total{reason="inhibition",user="user3"} 100 + cortex_alertmanager_notifications_suppressed_total{reason="mute_time_interval",user="user1"} 2 + cortex_alertmanager_notifications_suppressed_total{reason="mute_time_interval",user="user2"} 20 + cortex_alertmanager_notifications_suppressed_total{reason="mute_time_interval",user="user3"} 200 + # HELP cortex_alertmanager_notifications_total The total number of attempted notifications. # TYPE cortex_alertmanager_notifications_total counter cortex_alertmanager_notifications_total{integration="opsgenie",user="user1"} 5 @@ -822,6 +845,15 @@ func TestAlertmanagerMetricsRemoval(t *testing.T) { cortex_alertmanager_notifications_failed_total{integration="telegram",reason="clientError",user="user1"} 9 cortex_alertmanager_notifications_failed_total{integration="telegram",reason="clientError",user="user2"} 90 + # HELP cortex_alertmanager_notifications_suppressed_total The total number of notifications suppressed for being silenced, inhibited, outside of active time intervals or within muted time intervals. + # TYPE cortex_alertmanager_notifications_suppressed_total counter + cortex_alertmanager_notifications_suppressed_total{reason="active_time_interval",user="user1"} 3 + cortex_alertmanager_notifications_suppressed_total{reason="active_time_interval",user="user2"} 30 + cortex_alertmanager_notifications_suppressed_total{reason="inhibition",user="user1"} 1 + cortex_alertmanager_notifications_suppressed_total{reason="inhibition",user="user2"} 10 + cortex_alertmanager_notifications_suppressed_total{reason="mute_time_interval",user="user1"} 2 + cortex_alertmanager_notifications_suppressed_total{reason="mute_time_interval",user="user2"} 20 + # HELP cortex_alertmanager_notifications_total The total number of attempted notifications. # TYPE cortex_alertmanager_notifications_total counter cortex_alertmanager_notifications_total{integration="opsgenie",user="user1"} 5 @@ -989,6 +1021,9 @@ func populateAlertmanager(base float64) *prometheus.Registry { nm.numNotificationRequestsFailedTotal.WithLabelValues(integration).Add(base * float64(i)) nm.notificationLatencySeconds.WithLabelValues(integration).Observe(base * float64(i) * 0.025) } + for i, reason := range possibleSuppressedReason { + nm.numNotificationSuppressedTotal.WithLabelValues(reason).Add(base * float64(i)) + } m := newMarkerMetrics(reg) m.alerts.WithLabelValues(string(types.AlertStateActive)).Add(base) @@ -1157,6 +1192,7 @@ type notifyMetrics struct { numTotalFailedNotifications *prometheus.CounterVec numNotificationRequestsTotal *prometheus.CounterVec numNotificationRequestsFailedTotal *prometheus.CounterVec + numNotificationSuppressedTotal *prometheus.CounterVec notificationLatencySeconds *prometheus.HistogramVec } @@ -1182,6 +1218,11 @@ func newNotifyMetrics(r prometheus.Registerer) *notifyMetrics { Name: "notification_requests_failed_total", Help: "The total number of failed notification requests.", }, []string{"integration"}), + numNotificationSuppressedTotal: promauto.With(r).NewCounterVec(prometheus.CounterOpts{ + Namespace: "alertmanager", + Name: "notifications_suppressed_total", + Help: "The total number of notifications suppressed for being silenced, inhibited, outside of active time intervals or within muted time intervals.", + }, []string{"reason"}), notificationLatencySeconds: promauto.With(r).NewHistogramVec(prometheus.HistogramOpts{ Namespace: "alertmanager", Name: "notification_latency_seconds", @@ -1210,12 +1251,16 @@ func newNotifyMetrics(r prometheus.Registerer) *notifyMetrics { m.numTotalFailedNotifications.WithLabelValues(integration, reason) } } + for _, reason := range possibleSuppressedReason { + m.numNotificationSuppressedTotal.WithLabelValues(reason) + } return m } // Copied from github.com/alertmanager/notify/util.go // possibleFailureReasonCategory is a list of possible failure reason. var possibleFailureReasonCategory = []string{notify.DefaultReason.String(), notify.ClientErrorReason.String(), notify.ServerErrorReason.String()} +var possibleSuppressedReason = []string{notify.SuppressedReasonSilence, notify.SuppressedReasonInhibition, notify.SuppressedReasonMuteTimeInterval, notify.SuppressedReasonActiveTimeInterval} type markerMetrics struct { alerts *prometheus.GaugeVec diff --git a/vendor/github.com/prometheus/alertmanager/featurecontrol/featurecontrol.go b/vendor/github.com/prometheus/alertmanager/featurecontrol/featurecontrol.go index a8a5585267f..9ff7a2d8fd2 100644 --- a/vendor/github.com/prometheus/alertmanager/featurecontrol/featurecontrol.go +++ b/vendor/github.com/prometheus/alertmanager/featurecontrol/featurecontrol.go @@ -97,7 +97,7 @@ func NewFlags(logger log.Logger, features string) (Flagger, error) { level.Warn(logger).Log("msg", "Classic mode enabled") case FeatureUTF8StrictMode: opts = append(opts, enableUTF8StrictMode()) - level.Warn(logger).Log("msg", "UTF-8 mode enabled") + level.Warn(logger).Log("msg", "UTF-8 strict mode enabled") default: return nil, fmt.Errorf("Unknown option '%s' for --enable-feature", feature) } diff --git a/vendor/github.com/prometheus/alertmanager/matchers/compat/parse.go b/vendor/github.com/prometheus/alertmanager/matchers/compat/parse.go index 7aa4e2d95b5..0c0dfffb1fd 100644 --- a/vendor/github.com/prometheus/alertmanager/matchers/compat/parse.go +++ b/vendor/github.com/prometheus/alertmanager/matchers/compat/parse.go @@ -133,7 +133,7 @@ func FallbackMatcherParser(l log.Logger) ParseMatcher { // The input is valid in the pkg/labels parser, but not the matchers/parse // parser. This means the input is not forwards compatible. suggestion := cMatcher.String() - level.Warn(l).Log("msg", "Alertmanager is moving to a new parser for labels and matchers, and this input is incompatible. Alertmanager has instead parsed the input using the old matchers parser as a fallback. To make this input compatible with the new parser please make sure all regular expressions and values are double-quoted. If you are still seeing this message please open an issue.", "input", input, "origin", origin, "err", nErr, "suggestion", suggestion) + level.Warn(l).Log("msg", "Alertmanager is moving to a new parser for labels and matchers, and this input is incompatible. Alertmanager has instead parsed the input using the classic matchers parser as a fallback. To make this input compatible with the UTF-8 matchers parser please make sure all regular expressions and values are double-quoted. If you are still seeing this message please open an issue.", "input", input, "origin", origin, "err", nErr, "suggestion", suggestion) return cMatcher, nil } // If the input is valid in both parsers, but produces different results, @@ -173,7 +173,7 @@ func FallbackMatchersParser(l log.Logger) ParseMatchers { suggestion := sb.String() // The input is valid in the pkg/labels parser, but not the // new matchers/parse parser. - level.Warn(l).Log("msg", "Alertmanager is moving to a new parser for labels and matchers, and this input is incompatible. Alertmanager has instead parsed the input using the old matchers parser as a fallback. To make this input compatible with the new parser please make sure all regular expressions and values are double-quoted. If you are still seeing this message please open an issue.", "input", input, "origin", origin, "err", nErr, "suggestion", suggestion) + level.Warn(l).Log("msg", "Alertmanager is moving to a new parser for labels and matchers, and this input is incompatible. Alertmanager has instead parsed the input using the classic matchers parser as a fallback. To make this input compatible with the UTF-8 matchers parser please make sure all regular expressions and values are double-quoted. If you are still seeing this message please open an issue.", "input", input, "origin", origin, "err", nErr, "suggestion", suggestion) return cMatchers, nil } // If the input is valid in both parsers, but produces different results, diff --git a/vendor/github.com/prometheus/alertmanager/notify/notify.go b/vendor/github.com/prometheus/alertmanager/notify/notify.go index 0a2b0d032b3..1d7597c9c68 100644 --- a/vendor/github.com/prometheus/alertmanager/notify/notify.go +++ b/vendor/github.com/prometheus/alertmanager/notify/notify.go @@ -251,6 +251,7 @@ type Metrics struct { numTotalFailedNotifications *prometheus.CounterVec numNotificationRequestsTotal *prometheus.CounterVec numNotificationRequestsFailedTotal *prometheus.CounterVec + numNotificationSuppressedTotal *prometheus.CounterVec notificationLatencySeconds *prometheus.HistogramVec ff featurecontrol.Flagger @@ -284,6 +285,11 @@ func NewMetrics(r prometheus.Registerer, ff featurecontrol.Flagger) *Metrics { Name: "notification_requests_failed_total", Help: "The total number of failed notification requests.", }, labels), + numNotificationSuppressedTotal: prometheus.NewCounterVec(prometheus.CounterOpts{ + Namespace: "alertmanager", + Name: "notifications_suppressed_total", + Help: "The total number of notifications suppressed for being silenced, inhibited, outside of active time intervals or within muted time intervals.", + }, []string{"reason"}), notificationLatencySeconds: prometheus.NewHistogramVec(prometheus.HistogramOpts{ Namespace: "alertmanager", Name: "notification_latency_seconds", @@ -296,7 +302,7 @@ func NewMetrics(r prometheus.Registerer, ff featurecontrol.Flagger) *Metrics { r.MustRegister( m.numNotifications, m.numTotalFailedNotifications, m.numNotificationRequestsTotal, m.numNotificationRequestsFailedTotal, - m.notificationLatencySeconds, + m.numNotificationSuppressedTotal, m.notificationLatencySeconds, ) return m @@ -381,10 +387,10 @@ func (pb *PipelineBuilder) New( rs := make(RoutingStage, len(receivers)) ms := NewGossipSettleStage(peer) - is := NewMuteStage(inhibitor) - tas := NewTimeActiveStage(intervener) - tms := NewTimeMuteStage(intervener) - ss := NewMuteStage(silencer) + is := NewMuteStage(inhibitor, pb.metrics) + tas := NewTimeActiveStage(intervener, pb.metrics) + tms := NewTimeMuteStage(intervener, pb.metrics) + ss := NewMuteStage(silencer, pb.metrics) for name := range receivers { st := createReceiverStage(name, receivers[name], wait, notificationLog, pb.metrics) @@ -507,14 +513,22 @@ func (n *GossipSettleStage) Exec(ctx context.Context, _ log.Logger, alerts ...*t return ctx, alerts, nil } +const ( + SuppressedReasonSilence = "silence" + SuppressedReasonInhibition = "inhibition" + SuppressedReasonMuteTimeInterval = "mute_time_interval" + SuppressedReasonActiveTimeInterval = "active_time_interval" +) + // MuteStage filters alerts through a Muter. type MuteStage struct { - muter types.Muter + muter types.Muter + metrics *Metrics } // NewMuteStage return a new MuteStage. -func NewMuteStage(m types.Muter) *MuteStage { - return &MuteStage{muter: m} +func NewMuteStage(m types.Muter, metrics *Metrics) *MuteStage { + return &MuteStage{muter: m, metrics: metrics} } // Exec implements the Stage interface. @@ -534,8 +548,19 @@ func (n *MuteStage) Exec(ctx context.Context, logger log.Logger, alerts ...*type // TODO(fabxc): increment muted alerts counter if muted. } if len(muted) > 0 { - level.Debug(logger).Log("msg", "Notifications will not be sent for muted alerts", "alerts", fmt.Sprintf("%v", muted)) + + var reason string + switch n.muter.(type) { + case *silence.Silencer: + reason = SuppressedReasonSilence + case *inhibit.Inhibitor: + reason = SuppressedReasonInhibition + default: + } + n.metrics.numNotificationSuppressedTotal.WithLabelValues(reason).Add(float64(len(muted))) + level.Debug(logger).Log("msg", "Notifications will not be sent for muted alerts", "alerts", fmt.Sprintf("%v", muted), "reason", reason) } + return ctx, filtered, nil } @@ -894,13 +919,14 @@ func (n SetNotifiesStage) Exec(ctx context.Context, l log.Logger, alerts ...*typ } type timeStage struct { - muter types.TimeMuter + muter types.TimeMuter + metrics *Metrics } type TimeMuteStage timeStage -func NewTimeMuteStage(m types.TimeMuter) *TimeMuteStage { - return &TimeMuteStage{m} +func NewTimeMuteStage(m types.TimeMuter, metrics *Metrics) *TimeMuteStage { + return &TimeMuteStage{m, metrics} } // Exec implements the stage interface for TimeMuteStage. @@ -927,7 +953,8 @@ func (tms TimeMuteStage) Exec(ctx context.Context, l log.Logger, alerts ...*type // If the current time is inside a mute time, all alerts are removed from the pipeline. if muted { - level.Debug(l).Log("msg", "Notifications not sent, route is within mute time") + tms.metrics.numNotificationSuppressedTotal.WithLabelValues(SuppressedReasonMuteTimeInterval).Add(float64(len(alerts))) + level.Debug(l).Log("msg", "Notifications not sent, route is within mute time", "alerts", len(alerts)) return ctx, nil, nil } return ctx, alerts, nil @@ -935,8 +962,8 @@ func (tms TimeMuteStage) Exec(ctx context.Context, l log.Logger, alerts ...*type type TimeActiveStage timeStage -func NewTimeActiveStage(m types.TimeMuter) *TimeActiveStage { - return &TimeActiveStage{m} +func NewTimeActiveStage(m types.TimeMuter, metrics *Metrics) *TimeActiveStage { + return &TimeActiveStage{m, metrics} } // Exec implements the stage interface for TimeActiveStage. @@ -964,7 +991,8 @@ func (tas TimeActiveStage) Exec(ctx context.Context, l log.Logger, alerts ...*ty // If the current time is not inside an active time, all alerts are removed from the pipeline if !muted { - level.Debug(l).Log("msg", "Notifications not sent, route is not within active time") + tas.metrics.numNotificationSuppressedTotal.WithLabelValues(SuppressedReasonActiveTimeInterval).Add(float64(len(alerts))) + level.Debug(l).Log("msg", "Notifications not sent, route is not within active time", "alerts", len(alerts)) return ctx, nil, nil } diff --git a/vendor/modules.txt b/vendor/modules.txt index 50e88ea4265..816fca7df4d 100644 --- a/vendor/modules.txt +++ b/vendor/modules.txt @@ -823,7 +823,7 @@ github.com/pkg/errors # github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 ## explicit github.com/pmezard/go-difflib/difflib -# github.com/prometheus/alertmanager v0.26.1-0.20240208095903-f69a5086657b +# github.com/prometheus/alertmanager v0.26.1-0.20240215111258-80b3cb072fbd ## explicit; go 1.21 github.com/prometheus/alertmanager/api github.com/prometheus/alertmanager/api/metrics