Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

#3513: Add GroupMarker interface #3792

Merged
2 changes: 1 addition & 1 deletion dispatch/dispatch.go
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@ func NewDispatcher(
ap provider.Alerts,
r *Route,
s notify.Stage,
mk types.Marker,
mk types.AlertMarker,
to func(time.Duration) time.Duration,
lim Limits,
l log.Logger,
Expand Down
4 changes: 2 additions & 2 deletions inhibit/inhibit.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,15 +36,15 @@ import (
type Inhibitor struct {
alerts provider.Alerts
rules []*InhibitRule
marker types.Marker
marker types.AlertMarker
logger log.Logger

mtx sync.RWMutex
cancel func()
}

// NewInhibitor returns a new Inhibitor.
func NewInhibitor(ap provider.Alerts, rs []config.InhibitRule, mk types.Marker, logger log.Logger) *Inhibitor {
func NewInhibitor(ap provider.Alerts, rs []config.InhibitRule, mk types.AlertMarker, logger log.Logger) *Inhibitor {
ih := &Inhibitor{
alerts: ap,
marker: mk,
Expand Down
4 changes: 2 additions & 2 deletions provider/mem/mem.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ type Alerts struct {
cancel context.CancelFunc

alerts *store.Alerts
marker types.Marker
marker types.AlertMarker

mtx sync.Mutex
listeners map[int]listeningAlerts
Expand Down Expand Up @@ -85,7 +85,7 @@ func (a *Alerts) registerMetrics(r prometheus.Registerer) {
}

// NewAlerts returns a new alert provider.
func NewAlerts(ctx context.Context, m types.Marker, intervalGC time.Duration, alertCallback AlertStoreCallback, l log.Logger, r prometheus.Registerer) (*Alerts, error) {
func NewAlerts(ctx context.Context, m types.AlertMarker, intervalGC time.Duration, alertCallback AlertStoreCallback, l log.Logger, r prometheus.Registerer) (*Alerts, error) {
if alertCallback == nil {
alertCallback = noopCallback{}
}
Expand Down
6 changes: 3 additions & 3 deletions silence/silence.go
Original file line number Diff line number Diff line change
Expand Up @@ -92,16 +92,16 @@ func (c matcherCache) add(s *pb.Silence) (labels.Matchers, error) {
return ms, nil
}

// Silencer binds together a Marker and a Silences to implement the Muter
// Silencer binds together a AlertMarker and a Silences to implement the Muter
// interface.
type Silencer struct {
silences *Silences
marker types.Marker
marker types.AlertMarker
logger log.Logger
}

// NewSilencer returns a new Silencer.
func NewSilencer(s *Silences, m types.Marker, l log.Logger) *Silencer {
func NewSilencer(s *Silences, m types.AlertMarker, l log.Logger) *Silencer {
return &Silencer{
silences: s,
marker: m,
Expand Down
123 changes: 86 additions & 37 deletions types/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -52,9 +52,17 @@ type AlertStatus struct {
silencesVersion int
}

// Marker helps to mark alerts as silenced and/or inhibited.
// groupStatus stores the state of the group, and, as applicable, the names
// of all active and mute time intervals that are muting it.
Comment on lines +55 to +56
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This claims to store the state of the group but its only attribute is mutedBy is this correct?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes! I copied it from here:

// AlertStatus stores the state of an alert and, as applicable, the IDs of
// silences silencing the alert and of other alerts inhibiting the alert.

type groupStatus struct {
// mutedBy contains the names of all active and mute time intervals that
// are muting it.
mutedBy []string
}

// AlertMarker helps to mark alerts as silenced and/or inhibited.
// All methods are goroutine-safe.
type Marker interface {
type AlertMarker interface {
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I renamed this interface to AlertMarker so it could be better differentiated from GroupMarker.

// SetActiveOrSilenced replaces the previous SilencedBy by the provided IDs of
// active and pending silences, including the version number of the
// silences state. The set of provided IDs is supposed to represent the
Expand Down Expand Up @@ -92,24 +100,65 @@ type Marker interface {
Inhibited(model.Fingerprint) ([]string, bool)
}

// NewMarker returns an instance of a Marker implementation.
func NewMarker(r prometheus.Registerer) Marker {
m := &memMarker{
m: map[model.Fingerprint]*AlertStatus{},
}
// GroupMarker helps to mark groups as active or muted.
// All methods are goroutine-safe.
//
// TODO(grobinson): routeID is used in Muted and SetMuted because groupKey
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I added a TODO in the source code to make it clear that this is not how I want the interface to look, but fixing this is blocked due to #3817 that we will fix in another PR.

// is not unique (see #3817). Once groupKey uniqueness is fixed routeID can
// be removed from the GroupMarker interface.
type GroupMarker interface {
// Muted returns true if the group is muted, otherwise false. If the group
// is muted then it also returns the names of the time intervals that muted
// it.
Muted(routeID, groupKey string) ([]string, bool)
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I added routeID to avoid the issue of non unique group keys from causing groups to be incorrectly marked as muted. See #3817 for more information.


// SetMuted marks the group as muted, and sets the names of the time
// intervals that mute it. If the list of names is nil or the empty slice
// then the muted marker is removed.
SetMuted(routeID, groupKey string, timeIntervalNames []string)
}

// NewMarker returns an instance of a AlertMarker implementation.
func NewMarker(r prometheus.Registerer) *MemMarker {
m := &MemMarker{
alerts: map[model.Fingerprint]*AlertStatus{},
groups: map[string]*groupStatus{},
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

How is this gargage collected? As in, how does the marker now that it no longer needs to store the status of an alert.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks.

}
m.registerMetrics(r)

return m
}

type memMarker struct {
m map[model.Fingerprint]*AlertStatus
type MemMarker struct {
alerts map[model.Fingerprint]*AlertStatus
groups map[string]*groupStatus

mtx sync.RWMutex
}

func (m *memMarker) registerMetrics(r prometheus.Registerer) {
// Muted implements GroupMarker.
func (m *MemMarker) Muted(routeID, groupKey string) ([]string, bool) {
m.mtx.Lock()
defer m.mtx.Unlock()
status, ok := m.groups[routeID+groupKey]
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Since routeID will be removed in the future I have chosen to just concatenate the two strings together.

if !ok {
return nil, false
}
return status.mutedBy, len(status.mutedBy) > 0
}

// SetMuted implements GroupMarker.
func (m *MemMarker) SetMuted(routeID, groupKey string, timeIntervalNames []string) {
m.mtx.Lock()
defer m.mtx.Unlock()
status, ok := m.groups[routeID+groupKey]
if !ok {
status = &groupStatus{}
m.groups[routeID+groupKey] = status
}
status.mutedBy = timeIntervalNames
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Don't we need to aggregate all the time-intervals that this has been muted by?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We don't – the reason is that the Mutes method we just merged returns them all:

func (i *Intervener) Mutes(names []string, now time.Time) (bool, []string, error) {
var in []string
for _, name := range names {
interval, ok := i.intervals[name]
if !ok {
return false, nil, fmt.Errorf("time interval %s doesn't exist in config", name)
}
for _, ti := range interval {
if ti.ContainsTime(now.UTC()) {
in = append(in, name)
}
}
}
return len(in) > 0, in, nil
}
.

}

func (m *MemMarker) registerMetrics(r prometheus.Registerer) {
newMarkedAlertMetricByState := func(st AlertState) prometheus.GaugeFunc {
return prometheus.NewGaugeFunc(
prometheus.GaugeOpts{
Expand All @@ -132,17 +181,17 @@ func (m *memMarker) registerMetrics(r prometheus.Registerer) {
r.MustRegister(alertStateUnprocessed)
}

// Count implements Marker.
func (m *memMarker) Count(states ...AlertState) int {
// Count implements AlertMarker.
func (m *MemMarker) Count(states ...AlertState) int {
m.mtx.RLock()
defer m.mtx.RUnlock()

if len(states) == 0 {
return len(m.m)
return len(m.alerts)
}

var count int
for _, status := range m.m {
for _, status := range m.alerts {
for _, state := range states {
if status.State == state {
count++
Expand All @@ -152,15 +201,15 @@ func (m *memMarker) Count(states ...AlertState) int {
return count
}

// SetActiveOrSilenced implements Marker.
func (m *memMarker) SetActiveOrSilenced(alert model.Fingerprint, version int, activeIDs, pendingIDs []string) {
// SetActiveOrSilenced implements AlertMarker.
func (m *MemMarker) SetActiveOrSilenced(alert model.Fingerprint, version int, activeIDs, pendingIDs []string) {
m.mtx.Lock()
defer m.mtx.Unlock()

s, found := m.m[alert]
s, found := m.alerts[alert]
if !found {
s = &AlertStatus{}
m.m[alert] = s
m.alerts[alert] = s
}
s.SilencedBy = activeIDs
s.pendingSilences = pendingIDs
Expand All @@ -177,15 +226,15 @@ func (m *memMarker) SetActiveOrSilenced(alert model.Fingerprint, version int, ac
s.State = AlertStateSuppressed
}

// SetInhibited implements Marker.
func (m *memMarker) SetInhibited(alert model.Fingerprint, ids ...string) {
// SetInhibited implements AlertMarker.
func (m *MemMarker) SetInhibited(alert model.Fingerprint, ids ...string) {
m.mtx.Lock()
defer m.mtx.Unlock()

s, found := m.m[alert]
s, found := m.alerts[alert]
if !found {
s = &AlertStatus{}
m.m[alert] = s
m.alerts[alert] = s
}
s.InhibitedBy = ids

Expand All @@ -200,12 +249,12 @@ func (m *memMarker) SetInhibited(alert model.Fingerprint, ids ...string) {
s.State = AlertStateSuppressed
}

// Status implements Marker.
func (m *memMarker) Status(alert model.Fingerprint) AlertStatus {
// Status implements AlertMarker.
func (m *MemMarker) Status(alert model.Fingerprint) AlertStatus {
m.mtx.RLock()
defer m.mtx.RUnlock()

if s, found := m.m[alert]; found {
if s, found := m.alerts[alert]; found {
return *s
}
return AlertStatus{
Expand All @@ -215,26 +264,26 @@ func (m *memMarker) Status(alert model.Fingerprint) AlertStatus {
}
}

// Delete implements Marker.
func (m *memMarker) Delete(alert model.Fingerprint) {
// Delete implements AlertMarker.
func (m *MemMarker) Delete(alert model.Fingerprint) {
m.mtx.Lock()
defer m.mtx.Unlock()

delete(m.m, alert)
delete(m.alerts, alert)
}

// Unprocessed implements Marker.
func (m *memMarker) Unprocessed(alert model.Fingerprint) bool {
// Unprocessed implements AlertMarker.
func (m *MemMarker) Unprocessed(alert model.Fingerprint) bool {
return m.Status(alert).State == AlertStateUnprocessed
}

// Active implements Marker.
func (m *memMarker) Active(alert model.Fingerprint) bool {
// Active implements AlertMarker.
func (m *MemMarker) Active(alert model.Fingerprint) bool {
return m.Status(alert).State == AlertStateActive
}

// Inhibited implements Marker.
func (m *memMarker) Inhibited(alert model.Fingerprint) ([]string, bool) {
// Inhibited implements AlertMarker.
func (m *MemMarker) Inhibited(alert model.Fingerprint) ([]string, bool) {
s := m.Status(alert)
return s.InhibitedBy,
s.State == AlertStateSuppressed && len(s.InhibitedBy) > 0
Expand All @@ -243,7 +292,7 @@ func (m *memMarker) Inhibited(alert model.Fingerprint) ([]string, bool) {
// Silenced returns whether the alert for the given Fingerprint is in the
// Silenced state, any associated silence IDs, and the silences state version
// the result is based on.
func (m *memMarker) Silenced(alert model.Fingerprint) (activeIDs, pendingIDs []string, version int, silenced bool) {
func (m *MemMarker) Silenced(alert model.Fingerprint) (activeIDs, pendingIDs []string, version int, silenced bool) {
s := m.Status(alert)
return s.SilencedBy, s.pendingSilences, s.silencesVersion,
s.State == AlertStateSuppressed && len(s.SilencedBy) > 0
Expand Down Expand Up @@ -410,7 +459,7 @@ func (a *Alert) Merge(o *Alert) *Alert {
}

// A Muter determines whether a given label set is muted. Implementers that
// maintain an underlying Marker are expected to update it during a call of
// maintain an underlying AlertMarker are expected to update it during a call of
// Mutes.
type Muter interface {
Mutes(model.LabelSet) bool
Expand Down
32 changes: 32 additions & 0 deletions types/types_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,38 @@ import (
"github.com/prometheus/alertmanager/matchers/compat"
)

func TestMemMarker_Muted(t *testing.T) {
r := prometheus.NewRegistry()
marker := NewMarker(r)

// No groups should be muted.
timeIntervalNames, isMuted := marker.Muted("route1", "group1")
require.False(t, isMuted)
require.Empty(t, timeIntervalNames)

// Mark the group as muted because it's the weekend.
marker.SetMuted("route1", "group1", []string{"weekends"})
timeIntervalNames, isMuted = marker.Muted("route1", "group1")
require.True(t, isMuted)
require.Equal(t, []string{"weekends"}, timeIntervalNames)

// Other groups should not be marked as muted.
timeIntervalNames, isMuted = marker.Muted("route1", "group2")
require.False(t, isMuted)
require.Empty(t, timeIntervalNames)

// Other routes should not be marked as muted either.
timeIntervalNames, isMuted = marker.Muted("route2", "group1")
require.False(t, isMuted)
require.Empty(t, timeIntervalNames)

// The group is no longer muted.
marker.SetMuted("route1", "group1", nil)
timeIntervalNames, isMuted = marker.Muted("route1", "group1")
require.False(t, isMuted)
require.Empty(t, timeIntervalNames)
}

func TestMemMarker_Count(t *testing.T) {
r := prometheus.NewRegistry()
marker := NewMarker(r)
Expand Down
Loading