Skip to content

Commit

Permalink
Merge pull request #998 from iksaif/silence-alert-counters
Browse files Browse the repository at this point in the history
silence|alerts: add metrics about current silences and alerts
  • Loading branch information
brancz authored Oct 5, 2017
2 parents 5328885 + bff889b commit d47f8b9
Show file tree
Hide file tree
Showing 4 changed files with 98 additions and 3 deletions.
24 changes: 24 additions & 0 deletions cmd/alertmanager/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,8 @@ var (
Name: "alertmanager_config_last_reload_success_timestamp_seconds",
Help: "Timestamp of the last successful configuration reload.",
})
alertsActive prometheus.GaugeFunc
alertsSuppressed prometheus.GaugeFunc
)

func init() {
Expand All @@ -72,6 +74,27 @@ func init() {
prometheus.MustRegister(version.NewCollector("alertmanager"))
}

func newAlertMetricByState(marker types.Marker, st types.AlertState) prometheus.GaugeFunc {
return prometheus.NewGaugeFunc(
prometheus.GaugeOpts{
Name: "alertmanager_alerts",
Help: "How many alerts by state.",
ConstLabels: prometheus.Labels{"state": string(st)},
},
func() float64 {
return float64(marker.Count(st))
},
)
}

func newMarkerMetrics(marker types.Marker) {
alertsActive = newAlertMetricByState(marker, types.AlertStateActive)
alertsSuppressed = newAlertMetricByState(marker, types.AlertStateSuppressed)

prometheus.MustRegister(alertsActive)
prometheus.MustRegister(alertsSuppressed)
}

func main() {
peers := &stringset{}
var (
Expand Down Expand Up @@ -148,6 +171,7 @@ func main() {
}

marker := types.NewMarker()
newMarkerMetrics(marker)

silenceOpts := silence.Options{
SnapshotFile: filepath.Join(*dataDir, "silences"),
Expand Down
43 changes: 41 additions & 2 deletions silence/silence.go
Original file line number Diff line number Diff line change
Expand Up @@ -113,9 +113,29 @@ type metrics struct {
queriesTotal prometheus.Counter
queryErrorsTotal prometheus.Counter
queryDuration prometheus.Histogram
silencesActive prometheus.GaugeFunc
silencesPending prometheus.GaugeFunc
silencesExpired prometheus.GaugeFunc
}

func newSilenceMetricByState(s *Silences, st SilenceState) prometheus.GaugeFunc {
return prometheus.NewGaugeFunc(
prometheus.GaugeOpts{
Name: "alertmanager_silences",
Help: "How many silences by state.",
ConstLabels: prometheus.Labels{"state": string(st)},
},
func() float64 {
count, err := s.CountState(st)
if err != nil {
s.logger.With("err", err).Error("counting silences failed")
}
return float64(count)
},
)
}

func newMetrics(r prometheus.Registerer) *metrics {
func newMetrics(r prometheus.Registerer, s *Silences) *metrics {
m := &metrics{}

m.gcDuration = prometheus.NewSummary(prometheus.SummaryOpts{
Expand All @@ -138,6 +158,11 @@ func newMetrics(r prometheus.Registerer) *metrics {
Name: "alertmanager_silences_query_duration_seconds",
Help: "Duration of silence query evaluation.",
})
if s != nil {
m.silencesActive = newSilenceMetricByState(s, StateActive)
m.silencesPending = newSilenceMetricByState(s, StatePending)
m.silencesExpired = newSilenceMetricByState(s, StateExpired)
}

if r != nil {
r.MustRegister(
Expand All @@ -146,6 +171,9 @@ func newMetrics(r prometheus.Registerer) *metrics {
m.queriesTotal,
m.queryErrorsTotal,
m.queryDuration,
m.silencesActive,
m.silencesPending,
m.silencesExpired,
)
}
return m
Expand Down Expand Up @@ -195,12 +223,13 @@ func New(o Options) (*Silences, error) {
s := &Silences{
mc: matcherCache{},
logger: log.NewNopLogger(),
metrics: newMetrics(o.Metrics),
retention: o.Retention,
now: utcNow,
gossip: nopGossip{},
st: newGossipData(),
}
s.metrics = newMetrics(o.Metrics, s)

if o.Logger != nil {
s.logger = o.Logger
}
Expand Down Expand Up @@ -587,6 +616,16 @@ func (s *Silences) Query(params ...QueryParam) ([]*pb.Silence, error) {
return sils, err
}

// Count silences by state.
func (s *Silences) CountState(states ...SilenceState) (int, error) {
// This could probably be optimized.
sils, err := s.Query(QState(states...))
if err != nil {
return -1, err
}
return len(sils), nil
}

func (s *Silences) query(q *query, now time.Time) ([]*pb.Silence, error) {
// If we have an ID constraint, all silences are our base set.
// This and the use of post-filter functions is the
Expand Down
11 changes: 10 additions & 1 deletion silence/silence_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -142,7 +142,7 @@ func TestSilencesSnapshot(t *testing.T) {
f, err := ioutil.TempFile("", "snapshot")
require.NoError(t, err, "creating temp file failed")

s1 := &Silences{st: newGossipData(), metrics: newMetrics(nil)}
s1 := &Silences{st: newGossipData(), metrics: newMetrics(nil, nil)}
// Setup internal state manually.
for _, e := range c.entries {
s1.st.data[e.Silence.Id] = e
Expand Down Expand Up @@ -778,6 +778,10 @@ func TestSilenceExpire(t *testing.T) {
},
}

count, err := s.CountState(StatePending)
require.NoError(t, err)
require.Equal(t, 1, count)

require.NoError(t, s.expire("pending"))
require.NoError(t, s.expire("active"))

Expand All @@ -794,6 +798,11 @@ func TestSilenceExpire(t *testing.T) {
EndsAt: now,
UpdatedAt: now,
}, sil)

count, err = s.CountState(StatePending)
require.NoError(t, err)
require.Equal(t, 0, count)

// Expiring a pending Silence should make the API return the
// SilenceStateExpired Silence state.
silenceState := types.CalcSilenceState(sil.StartsAt, sil.EndsAt)
Expand Down
23 changes: 23 additions & 0 deletions types/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,8 @@ type Marker interface {
SetInhibited(alert model.Fingerprint, ids ...string)
SetSilenced(alert model.Fingerprint, ids ...string)

Count(...AlertState) int

Status(model.Fingerprint) AlertStatus
Delete(model.Fingerprint)

Expand All @@ -67,6 +69,27 @@ type memMarker struct {
mtx sync.RWMutex
}

// Count alerts of a given state.
func (m *memMarker) Count(states ...AlertState) int {
count := 0

m.mtx.RLock()
defer m.mtx.RUnlock()

if len(states) == 0 {
count = len(m.m)
} else {
for _, status := range m.m {
for _, state := range states {
if status.State == state {
count += 1
}
}
}
}
return count
}

// SetSilenced sets the AlertStatus to suppressed and stores the associated silence IDs.
func (m *memMarker) SetSilenced(alert model.Fingerprint, ids ...string) {
m.mtx.Lock()
Expand Down

0 comments on commit d47f8b9

Please sign in to comment.