Skip to content

Commit

Permalink
Alertmanager: Update Alertmanager to commit 80b3cb0 (#7384)
Browse files Browse the repository at this point in the history
* Alertmanager: Update Alertmanager to commit d352d16

This commit updates Alertmanager from f69a508 to d352d16.
It has the following changes:

- prometheus/alertmanager#3565
- prometheus/alertmanager#3718
- prometheus/alertmanager#3707
- prometheus/alertmanager#3719
- prometheus/alertmanager#3592
- prometheus/alertmanager#3572
- prometheus/alertmanager#3722
  • Loading branch information
grobinson-grafana authored Feb 15, 2024
1 parent cd77d1a commit 1ae1641
Show file tree
Hide file tree
Showing 10 changed files with 107 additions and 26 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

### Grafana Mimir

* [ENHANCEMENT] Alertmanager: Adds metric `cortex_alertmanager_notifications_suppressed_total` that counts the total number of notifications suppressed for being silenced, inhibited, outside of active time intervals or within muted time intervals.
* [CHANGE] Alertmanager: Deprecates the `v1` API. All `v1` API endpoints now respond with a JSON deprecation notice and a status code of `410`. All endpoints have a `v2` equivalent. The list of endpoints is: #7103
* `<alertmanager-web.external-url>/api/v1/alerts`
* `<alertmanager-web.external-url>/api/v1/receivers`
Expand Down
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ require (
github.com/opentracing-contrib/go-stdlib v1.0.0
github.com/opentracing/opentracing-go v1.2.1-0.20220228012449-10b1cf09e00b
github.com/pkg/errors v0.9.1
github.com/prometheus/alertmanager v0.26.1-0.20240208095903-f69a5086657b
github.com/prometheus/alertmanager v0.26.1-0.20240215111258-80b3cb072fbd
github.com/prometheus/client_golang v1.18.0
github.com/prometheus/client_model v0.5.0
github.com/prometheus/common v0.46.0
Expand Down
4 changes: 2 additions & 2 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -769,8 +769,8 @@ github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 h1:Jamvg5psRI
github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/posener/complete v1.1.1/go.mod h1:em0nMJCgc9GFtwrmVmEMR/ZL6WyhyjMBndrE9hABlRI=
github.com/posener/complete v1.2.3/go.mod h1:WZIdtGGp+qx0sLrYKtIRAruyNpv6hFCicSgv7Sy7s/s=
github.com/prometheus/alertmanager v0.26.1-0.20240208095903-f69a5086657b h1:qdLfwUabfhvvvOhnObLgRfXo5wq2V3pZSdLhUMgN4QE=
github.com/prometheus/alertmanager v0.26.1-0.20240208095903-f69a5086657b/go.mod h1:8Ia/R3urPmbzJ8OsdvmZvIprDwvwmYCmUbwBL+jlPOE=
github.com/prometheus/alertmanager v0.26.1-0.20240215111258-80b3cb072fbd h1:41+1zd8AibDiY4xov0REU1rNW+Kg+ioVVQRynAXRvZg=
github.com/prometheus/alertmanager v0.26.1-0.20240215111258-80b3cb072fbd/go.mod h1:8Ia/R3urPmbzJ8OsdvmZvIprDwvwmYCmUbwBL+jlPOE=
github.com/prometheus/client_golang v0.9.1/go.mod h1:7SWBe2y4D6OKWSNQJUaRYU/AaXPKyh/dDVn+NZz0KFw=
github.com/prometheus/client_golang v1.0.0/go.mod h1:db9x61etRT2tGnBNRi70OPL5FsnadC4Ky3P0J6CfImo=
github.com/prometheus/client_golang v1.4.0/go.mod h1:e9GMxYsXl05ICDXkRhurwBS4Q3OK1iX/F2sw+iXX5zU=
Expand Down
6 changes: 3 additions & 3 deletions pkg/alertmanager/alertmanager_config_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -102,11 +102,11 @@ inhibit_rules:
},
expected: []string{
`level=debug user=4 msg="Parsing with UTF-8 matchers parser, with fallback to classic matchers parser" input="foo=" origin=test`,
`level=warn user=4 msg="Alertmanager is moving to a new parser for labels and matchers, and this input is incompatible. Alertmanager has instead parsed the input using the old matchers parser as a fallback. To make this input compatible with the new parser please make sure all regular expressions and values are double-quoted. If you are still seeing this message please open an issue." input="foo=" origin=test err="end of input: expected label value" suggestion="foo=\"\""`,
`level=warn user=4 msg="Alertmanager is moving to a new parser for labels and matchers, and this input is incompatible. Alertmanager has instead parsed the input using the classic matchers parser as a fallback. To make this input compatible with the UTF-8 matchers parser please make sure all regular expressions and values are double-quoted. If you are still seeing this message please open an issue." input="foo=" origin=test err="end of input: expected label value" suggestion="foo=\"\""`,
`level=debug user=4 msg="Parsing with UTF-8 matchers parser, with fallback to classic matchers parser" input="bar=" origin=test`,
`level=warn user=4 msg="Alertmanager is moving to a new parser for labels and matchers, and this input is incompatible. Alertmanager has instead parsed the input using the old matchers parser as a fallback. To make this input compatible with the new parser please make sure all regular expressions and values are double-quoted. If you are still seeing this message please open an issue." input="bar=" origin=test err="end of input: expected label value" suggestion="bar=\"\""`,
`level=warn user=4 msg="Alertmanager is moving to a new parser for labels and matchers, and this input is incompatible. Alertmanager has instead parsed the input using the classic matchers parser as a fallback. To make this input compatible with the UTF-8 matchers parser please make sure all regular expressions and values are double-quoted. If you are still seeing this message please open an issue." input="bar=" origin=test err="end of input: expected label value" suggestion="bar=\"\""`,
`level=debug user=4 msg="Parsing with UTF-8 matchers parser, with fallback to classic matchers parser" input="baz=" origin=test`,
`level=warn user=4 msg="Alertmanager is moving to a new parser for labels and matchers, and this input is incompatible. Alertmanager has instead parsed the input using the old matchers parser as a fallback. To make this input compatible with the new parser please make sure all regular expressions and values are double-quoted. If you are still seeing this message please open an issue." input="baz=" origin=test err="end of input: expected label value" suggestion="baz=\"\""`,
`level=warn user=4 msg="Alertmanager is moving to a new parser for labels and matchers, and this input is incompatible. Alertmanager has instead parsed the input using the classic matchers parser as a fallback. To make this input compatible with the UTF-8 matchers parser please make sure all regular expressions and values are double-quoted. If you are still seeing this message please open an issue." input="baz=" origin=test err="end of input: expected label value" suggestion="baz=\"\""`,
},
}, {
name: "config contains disagreement",
Expand Down
7 changes: 7 additions & 0 deletions pkg/alertmanager/alertmanager_metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ type alertmanagerMetrics struct {
numFailedNotifications *prometheus.Desc
numNotificationRequestsTotal *prometheus.Desc
numNotificationRequestsFailedTotal *prometheus.Desc
numNotificationSuppressedTotal *prometheus.Desc
notificationLatencySeconds *prometheus.Desc

// exported metrics, gathered from Alertmanager nflog
Expand Down Expand Up @@ -107,6 +108,10 @@ func newAlertmanagerMetrics() *alertmanagerMetrics {
"cortex_alertmanager_notification_requests_failed_total",
"The total number of failed notification requests.",
[]string{"user", "integration"}, nil),
numNotificationSuppressedTotal: prometheus.NewDesc(
"cortex_alertmanager_notifications_suppressed_total",
"The total number of notifications suppressed for being silenced, inhibited, outside of active time intervals or within muted time intervals.",
[]string{"user", "reason"}, nil),
notificationLatencySeconds: prometheus.NewDesc(
"cortex_alertmanager_notification_latency_seconds",
"The latency of notifications in seconds.",
Expand Down Expand Up @@ -287,6 +292,7 @@ func (m *alertmanagerMetrics) Describe(out chan<- *prometheus.Desc) {
out <- m.numFailedNotifications
out <- m.numNotificationRequestsTotal
out <- m.numNotificationRequestsFailedTotal
out <- m.numNotificationSuppressedTotal
out <- m.notificationLatencySeconds
out <- m.markerAlerts
out <- m.nflogGCDuration
Expand Down Expand Up @@ -339,6 +345,7 @@ func (m *alertmanagerMetrics) Collect(out chan<- prometheus.Metric) {
data.SendSumOfCountersPerTenant(out, m.numFailedNotifications, "alertmanager_notifications_failed_total", dskit_metrics.WithLabels("integration", "reason"), dskit_metrics.WithSkipZeroValueMetrics)
data.SendSumOfCountersPerTenant(out, m.numNotificationRequestsTotal, "alertmanager_notification_requests_total", dskit_metrics.WithLabels("integration"), dskit_metrics.WithSkipZeroValueMetrics)
data.SendSumOfCountersPerTenant(out, m.numNotificationRequestsFailedTotal, "alertmanager_notification_requests_failed_total", dskit_metrics.WithLabels("integration"), dskit_metrics.WithSkipZeroValueMetrics)
data.SendSumOfCountersPerTenant(out, m.numNotificationSuppressedTotal, "alertmanager_notifications_suppressed_total", dskit_metrics.WithLabels("reason"), dskit_metrics.WithSkipZeroValueMetrics)
data.SendSumOfHistograms(out, m.notificationLatencySeconds, "alertmanager_notification_latency_seconds")
data.SendSumOfGaugesPerTenantWithLabels(out, m.markerAlerts, "alertmanager_alerts", "state")

Expand Down
45 changes: 45 additions & 0 deletions pkg/alertmanager/alertmanager_metrics_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,17 @@ func TestAlertmanagerMetricsStore(t *testing.T) {
cortex_alertmanager_notifications_failed_total{integration="telegram",reason="clientError",user="user1"} 9
cortex_alertmanager_notifications_failed_total{integration="telegram",reason="clientError",user="user2"} 90
cortex_alertmanager_notifications_failed_total{integration="telegram",reason="clientError",user="user3"} 900
# HELP cortex_alertmanager_notifications_suppressed_total The total number of notifications suppressed for being silenced, inhibited, outside of active time intervals or within muted time intervals.
# TYPE cortex_alertmanager_notifications_suppressed_total counter
cortex_alertmanager_notifications_suppressed_total{reason="active_time_interval",user="user1"} 3
cortex_alertmanager_notifications_suppressed_total{reason="active_time_interval",user="user2"} 30
cortex_alertmanager_notifications_suppressed_total{reason="active_time_interval",user="user3"} 300
cortex_alertmanager_notifications_suppressed_total{reason="inhibition",user="user1"} 1
cortex_alertmanager_notifications_suppressed_total{reason="inhibition",user="user2"} 10
cortex_alertmanager_notifications_suppressed_total{reason="inhibition",user="user3"} 100
cortex_alertmanager_notifications_suppressed_total{reason="mute_time_interval",user="user1"} 2
cortex_alertmanager_notifications_suppressed_total{reason="mute_time_interval",user="user2"} 20
cortex_alertmanager_notifications_suppressed_total{reason="mute_time_interval",user="user3"} 200
# HELP cortex_alertmanager_notification_requests_total The total number of attempted notification requests.
# TYPE cortex_alertmanager_notification_requests_total counter
cortex_alertmanager_notification_requests_total{integration="opsgenie",user="user1"} 5
Expand Down Expand Up @@ -531,6 +542,18 @@ func TestAlertmanagerMetricsRemoval(t *testing.T) {
cortex_alertmanager_notifications_failed_total{integration="telegram",reason="clientError",user="user2"} 90
cortex_alertmanager_notifications_failed_total{integration="telegram",reason="clientError",user="user3"} 900
# HELP cortex_alertmanager_notifications_suppressed_total The total number of notifications suppressed for being silenced, inhibited, outside of active time intervals or within muted time intervals.
# TYPE cortex_alertmanager_notifications_suppressed_total counter
cortex_alertmanager_notifications_suppressed_total{reason="active_time_interval",user="user1"} 3
cortex_alertmanager_notifications_suppressed_total{reason="active_time_interval",user="user2"} 30
cortex_alertmanager_notifications_suppressed_total{reason="active_time_interval",user="user3"} 300
cortex_alertmanager_notifications_suppressed_total{reason="inhibition",user="user1"} 1
cortex_alertmanager_notifications_suppressed_total{reason="inhibition",user="user2"} 10
cortex_alertmanager_notifications_suppressed_total{reason="inhibition",user="user3"} 100
cortex_alertmanager_notifications_suppressed_total{reason="mute_time_interval",user="user1"} 2
cortex_alertmanager_notifications_suppressed_total{reason="mute_time_interval",user="user2"} 20
cortex_alertmanager_notifications_suppressed_total{reason="mute_time_interval",user="user3"} 200
# HELP cortex_alertmanager_notifications_total The total number of attempted notifications.
# TYPE cortex_alertmanager_notifications_total counter
cortex_alertmanager_notifications_total{integration="opsgenie",user="user1"} 5
Expand Down Expand Up @@ -822,6 +845,15 @@ func TestAlertmanagerMetricsRemoval(t *testing.T) {
cortex_alertmanager_notifications_failed_total{integration="telegram",reason="clientError",user="user1"} 9
cortex_alertmanager_notifications_failed_total{integration="telegram",reason="clientError",user="user2"} 90
# HELP cortex_alertmanager_notifications_suppressed_total The total number of notifications suppressed for being silenced, inhibited, outside of active time intervals or within muted time intervals.
# TYPE cortex_alertmanager_notifications_suppressed_total counter
cortex_alertmanager_notifications_suppressed_total{reason="active_time_interval",user="user1"} 3
cortex_alertmanager_notifications_suppressed_total{reason="active_time_interval",user="user2"} 30
cortex_alertmanager_notifications_suppressed_total{reason="inhibition",user="user1"} 1
cortex_alertmanager_notifications_suppressed_total{reason="inhibition",user="user2"} 10
cortex_alertmanager_notifications_suppressed_total{reason="mute_time_interval",user="user1"} 2
cortex_alertmanager_notifications_suppressed_total{reason="mute_time_interval",user="user2"} 20
# HELP cortex_alertmanager_notifications_total The total number of attempted notifications.
# TYPE cortex_alertmanager_notifications_total counter
cortex_alertmanager_notifications_total{integration="opsgenie",user="user1"} 5
Expand Down Expand Up @@ -989,6 +1021,9 @@ func populateAlertmanager(base float64) *prometheus.Registry {
nm.numNotificationRequestsFailedTotal.WithLabelValues(integration).Add(base * float64(i))
nm.notificationLatencySeconds.WithLabelValues(integration).Observe(base * float64(i) * 0.025)
}
for i, reason := range possibleSuppressedReason {
nm.numNotificationSuppressedTotal.WithLabelValues(reason).Add(base * float64(i))
}

m := newMarkerMetrics(reg)
m.alerts.WithLabelValues(string(types.AlertStateActive)).Add(base)
Expand Down Expand Up @@ -1157,6 +1192,7 @@ type notifyMetrics struct {
numTotalFailedNotifications *prometheus.CounterVec
numNotificationRequestsTotal *prometheus.CounterVec
numNotificationRequestsFailedTotal *prometheus.CounterVec
numNotificationSuppressedTotal *prometheus.CounterVec
notificationLatencySeconds *prometheus.HistogramVec
}

Expand All @@ -1182,6 +1218,11 @@ func newNotifyMetrics(r prometheus.Registerer) *notifyMetrics {
Name: "notification_requests_failed_total",
Help: "The total number of failed notification requests.",
}, []string{"integration"}),
numNotificationSuppressedTotal: promauto.With(r).NewCounterVec(prometheus.CounterOpts{
Namespace: "alertmanager",
Name: "notifications_suppressed_total",
Help: "The total number of notifications suppressed for being silenced, inhibited, outside of active time intervals or within muted time intervals.",
}, []string{"reason"}),
notificationLatencySeconds: promauto.With(r).NewHistogramVec(prometheus.HistogramOpts{
Namespace: "alertmanager",
Name: "notification_latency_seconds",
Expand Down Expand Up @@ -1210,12 +1251,16 @@ func newNotifyMetrics(r prometheus.Registerer) *notifyMetrics {
m.numTotalFailedNotifications.WithLabelValues(integration, reason)
}
}
for _, reason := range possibleSuppressedReason {
m.numNotificationSuppressedTotal.WithLabelValues(reason)
}
return m
}

// Copied from github.com/alertmanager/notify/util.go
// possibleFailureReasonCategory is a list of possible failure reason.
var possibleFailureReasonCategory = []string{notify.DefaultReason.String(), notify.ClientErrorReason.String(), notify.ServerErrorReason.String()}
var possibleSuppressedReason = []string{notify.SuppressedReasonSilence, notify.SuppressedReasonInhibition, notify.SuppressedReasonMuteTimeInterval, notify.SuppressedReasonActiveTimeInterval}

type markerMetrics struct {
alerts *prometheus.GaugeVec
Expand Down

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit 1ae1641

Please sign in to comment.