Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Rename Alertmanager -> GrafanaAlertmanager and remove KVStore #4

Merged
merged 2 commits into from
Jul 14, 2022
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
96 changes: 49 additions & 47 deletions alerting/grafana_alertmanager.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,6 @@ import (

pb "github.com/prometheus/alertmanager/silence/silencepb"

"github.com/grafana/grafana/pkg/infra/kvstore"
"github.com/grafana/grafana/pkg/infra/log"
apimodels "github.com/grafana/grafana/pkg/services/ngalert/api/tooling/definitions"
"github.com/grafana/grafana/pkg/services/ngalert/metrics"
Expand Down Expand Up @@ -91,12 +90,11 @@ type AlertingStore interface {
store.ImageStore
}

type Alertmanager struct {
type GrafanaAlertmanager struct {
logger log.Logger

Settings *setting.Cfg
Store AlertingStore
fileStore *FileStore
Metrics *metrics.Alertmanager
NotificationService notifications.Service

Expand Down Expand Up @@ -133,9 +131,21 @@ type Alertmanager struct {
decryptFn channels.GetDecryptedValueFn
}

func newAlertmanager(ctx context.Context, orgID int64, cfg *setting.Cfg, store AlertingStore, kvStore kvstore.KVStore,
peer ClusterPeer, decryptFn channels.GetDecryptedValueFn, ns notifications.Service, m *metrics.Alertmanager) (*Alertmanager, error) {
am := &Alertmanager{
type MaintenanceOptions interface {
Filepath() string
Retention() time.Duration
MaintenanceFrequency() time.Duration
MaintenanceFunc() (int64, error)
}

type GrafanaAlertmanagerConfig struct {
Silences MaintenanceOptions
Nflog MaintenanceOptions
}

func NewGrafanaAlertmanager(ctx context.Context, orgID int64, cfg *setting.Cfg, store AlertingStore, config *GrafanaAlertmanagerConfig,
peer ClusterPeer, decryptFn channels.GetDecryptedValueFn, ns notifications.Service, m *metrics.Alertmanager) (*GrafanaAlertmanager, error) {
am := &GrafanaAlertmanager{
Settings: cfg,
stopc: make(chan struct{}),
logger: log.New("alertmanager", "org", orgID),
Expand All @@ -151,24 +161,16 @@ func newAlertmanager(ctx context.Context, orgID int64, cfg *setting.Cfg, store A
decryptFn: decryptFn,
}

am.fileStore = NewFileStore(am.orgID, kvStore, am.WorkingDirPath())

nflogFilepath, err := am.fileStore.FilepathFor(ctx, notificationLogFilename)
if err != nil {
return nil, err
}
silencesFilePath, err := am.fileStore.FilepathFor(ctx, silencesFilename)
if err != nil {
return nil, err
}
var err error

// Initialize the notification log
am.wg.Add(1)
am.notificationLog, err = nflog.New(
nflog.WithRetention(retentionNotificationsAndSilences),
nflog.WithSnapshot(nflogFilepath),
nflog.WithMaintenance(maintenanceNotificationAndSilences, am.stopc, am.wg.Done, func() (int64, error) {
return am.fileStore.Persist(ctx, notificationLogFilename, am.notificationLog)
nflog.WithRetention(config.Nflog.Retention()),
nflog.WithSnapshot(config.Nflog.Filepath()),
nflog.WithMaintenance(config.Nflog.MaintenanceFrequency(), am.stopc, am.wg.Done, func() (int64, error) {
//TODO: There's a bug here, we need to call GC to ensure we cleanup old entries: https://github.com/grafana/alerting/issues/3
return config.Nflog.MaintenanceFunc()
}),
)
if err != nil {
Expand All @@ -180,8 +182,8 @@ func newAlertmanager(ctx context.Context, orgID int64, cfg *setting.Cfg, store A
// Initialize silences
am.silences, err = silence.New(silence.Options{
Metrics: m.Registerer,
SnapshotFile: silencesFilePath,
Retention: retentionNotificationsAndSilences,
SnapshotFile: config.Silences.Filepath(),
Retention: config.Silences.Retention(),
})
if err != nil {
return nil, fmt.Errorf("unable to initialize the silencing component of alerting: %w", err)
Expand All @@ -192,15 +194,15 @@ func newAlertmanager(ctx context.Context, orgID int64, cfg *setting.Cfg, store A

am.wg.Add(1)
go func() {
am.silences.Maintenance(silenceMaintenanceInterval, silencesFilePath, am.stopc, func() (int64, error) {
am.silences.Maintenance(config.Silences.MaintenanceFrequency(), config.Silences.Filepath(), am.stopc, func() (int64, error) {
// Delete silences older than the retention period.
if _, err := am.silences.GC(); err != nil {
am.logger.Error("silence garbage collection", "err", err)
// Don't return here - we need to snapshot our state first.
}

// Snapshot our silences to the Grafana KV store
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think the comment might need an update too as it's not the Grafana KV store exclusively.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Noted in #8

return am.fileStore.Persist(ctx, silencesFilename, am.silences)
return config.Silences.MaintenanceFunc()
})
am.wg.Done()
}()
Expand All @@ -214,7 +216,7 @@ func newAlertmanager(ctx context.Context, orgID int64, cfg *setting.Cfg, store A
return am, nil
}

func (am *Alertmanager) Ready() bool {
func (am *GrafanaAlertmanager) Ready() bool {
// We consider AM as ready only when the config has been
// applied at least once successfully. Until then, some objects
// can still be nil.
Expand All @@ -224,11 +226,11 @@ func (am *Alertmanager) Ready() bool {
return am.ready()
}

func (am *Alertmanager) ready() bool {
func (am *GrafanaAlertmanager) ready() bool {
return am.config != nil
}

func (am *Alertmanager) StopAndWait() {
func (am *GrafanaAlertmanager) StopAndWait() {
if am.dispatcher != nil {
am.dispatcher.Stop()
}
Expand All @@ -244,9 +246,9 @@ func (am *Alertmanager) StopAndWait() {
am.wg.Wait()
}

// SaveAndApplyDefaultConfig saves the default configuration the database and applies the configuration to the Alertmanager.
// SaveAndApplyDefaultConfig saves the default configuration the database and applies the configuration to the GrafanaAlertmanager.
// It rollbacks the save if we fail to apply the configuration.
func (am *Alertmanager) SaveAndApplyDefaultConfig(ctx context.Context) error {
func (am *GrafanaAlertmanager) SaveAndApplyDefaultConfig(ctx context.Context) error {
am.reloadConfigMtx.Lock()
defer am.reloadConfigMtx.Unlock()

Expand Down Expand Up @@ -275,12 +277,12 @@ func (am *Alertmanager) SaveAndApplyDefaultConfig(ctx context.Context) error {
return nil
}

// SaveAndApplyConfig saves the configuration the database and applies the configuration to the Alertmanager.
// SaveAndApplyConfig saves the configuration the database and applies the configuration to the GrafanaAlertmanager.
// It rollbacks the save if we fail to apply the configuration.
func (am *Alertmanager) SaveAndApplyConfig(ctx context.Context, cfg *apimodels.PostableUserConfig) error {
func (am *GrafanaAlertmanager) SaveAndApplyConfig(ctx context.Context, cfg *apimodels.PostableUserConfig) error {
rawConfig, err := json.Marshal(&cfg)
if err != nil {
return fmt.Errorf("failed to serialize to the Alertmanager configuration: %w", err)
return fmt.Errorf("failed to serialize to the GrafanaAlertmanager configuration: %w", err)
}

am.reloadConfigMtx.Lock()
Expand All @@ -305,12 +307,12 @@ func (am *Alertmanager) SaveAndApplyConfig(ctx context.Context, cfg *apimodels.P
return nil
}

// ApplyConfig applies the configuration to the Alertmanager.
func (am *Alertmanager) ApplyConfig(dbCfg *ngmodels.AlertConfiguration) error {
// ApplyConfig applies the configuration to the GrafanaAlertmanager.
func (am *GrafanaAlertmanager) ApplyConfig(dbCfg *ngmodels.AlertConfiguration) error {
var err error
cfg, err := Load([]byte(dbCfg.AlertmanagerConfiguration))
if err != nil {
return fmt.Errorf("failed to parse Alertmanager config: %w", err)
return fmt.Errorf("failed to parse GrafanaAlertmanager config: %w", err)
}

am.reloadConfigMtx.Lock()
Expand All @@ -322,7 +324,7 @@ func (am *Alertmanager) ApplyConfig(dbCfg *ngmodels.AlertConfiguration) error {
return nil
}

func (am *Alertmanager) getTemplate() (*template.Template, error) {
func (am *GrafanaAlertmanager) getTemplate() (*template.Template, error) {
am.reloadConfigMtx.RLock()
defer am.reloadConfigMtx.RUnlock()
if !am.ready() {
Expand All @@ -335,7 +337,7 @@ func (am *Alertmanager) getTemplate() (*template.Template, error) {
return am.templateFromPaths(paths...)
}

func (am *Alertmanager) templateFromPaths(paths ...string) (*template.Template, error) {
func (am *GrafanaAlertmanager) templateFromPaths(paths ...string) (*template.Template, error) {
tmpl, err := template.FromGlobs(paths...)
if err != nil {
return nil, err
Expand All @@ -348,7 +350,7 @@ func (am *Alertmanager) templateFromPaths(paths ...string) (*template.Template,
return tmpl, nil
}

func (am *Alertmanager) buildMuteTimesMap(muteTimeIntervals []config.MuteTimeInterval) map[string][]timeinterval.TimeInterval {
func (am *GrafanaAlertmanager) buildMuteTimesMap(muteTimeIntervals []config.MuteTimeInterval) map[string][]timeinterval.TimeInterval {
muteTimes := make(map[string][]timeinterval.TimeInterval, len(muteTimeIntervals))
for _, ti := range muteTimeIntervals {
muteTimes[ti.Name] = ti.TimeIntervals
Expand All @@ -358,7 +360,7 @@ func (am *Alertmanager) buildMuteTimesMap(muteTimeIntervals []config.MuteTimeInt

// applyConfig applies a new configuration by re-initializing all components using the configuration provided.
// It is not safe to call concurrently.
func (am *Alertmanager) applyConfig(cfg *apimodels.PostableUserConfig, rawConfig []byte) (err error) {
func (am *GrafanaAlertmanager) applyConfig(cfg *apimodels.PostableUserConfig, rawConfig []byte) (err error) {
// First, let's make sure this config is not already loaded
var configChanged bool
if rawConfig == nil {
Expand Down Expand Up @@ -447,12 +449,12 @@ func (am *Alertmanager) applyConfig(cfg *apimodels.PostableUserConfig, rawConfig
return nil
}

func (am *Alertmanager) WorkingDirPath() string {
func (am *GrafanaAlertmanager) WorkingDirPath() string {
return filepath.Join(am.Settings.DataPath, workingDir, strconv.Itoa(int(am.orgID)))
}

// buildIntegrationsMap builds a map of name to the list of Grafana integration notifiers off of a list of receiver config.
func (am *Alertmanager) buildIntegrationsMap(receivers []*apimodels.PostableApiReceiver, templates *template.Template) (map[string][]notify.Integration, error) {
func (am *GrafanaAlertmanager) buildIntegrationsMap(receivers []*apimodels.PostableApiReceiver, templates *template.Template) (map[string][]notify.Integration, error) {
integrationsMap := make(map[string][]notify.Integration, len(receivers))
for _, receiver := range receivers {
integrations, err := am.buildReceiverIntegrations(receiver, templates)
Expand All @@ -466,7 +468,7 @@ func (am *Alertmanager) buildIntegrationsMap(receivers []*apimodels.PostableApiR
}

// buildReceiverIntegrations builds a list of integration notifiers off of a receiver config.
func (am *Alertmanager) buildReceiverIntegrations(receiver *apimodels.PostableApiReceiver, tmpl *template.Template) ([]notify.Integration, error) {
func (am *GrafanaAlertmanager) buildReceiverIntegrations(receiver *apimodels.PostableApiReceiver, tmpl *template.Template) ([]notify.Integration, error) {
var integrations []notify.Integration
for i, r := range receiver.GrafanaManagedReceivers {
n, err := am.buildReceiverIntegration(r, tmpl)
Expand All @@ -478,7 +480,7 @@ func (am *Alertmanager) buildReceiverIntegrations(receiver *apimodels.PostableAp
return integrations, nil
}

func (am *Alertmanager) buildReceiverIntegration(r *apimodels.PostableGrafanaReceiver, tmpl *template.Template) (channels.NotificationChannel, error) {
func (am *GrafanaAlertmanager) buildReceiverIntegration(r *apimodels.PostableGrafanaReceiver, tmpl *template.Template) (channels.NotificationChannel, error) {
// secure settings are already encrypted at this point
secureSettings := make(map[string][]byte, len(r.SecureSettings))

Expand Down Expand Up @@ -529,7 +531,7 @@ func (am *Alertmanager) buildReceiverIntegration(r *apimodels.PostableGrafanaRec
}

// PutAlerts receives the alerts and then sends them through the corresponding route based on whenever the alert has a receiver embedded or not
func (am *Alertmanager) PutAlerts(postableAlerts apimodels.PostableAlerts) error {
func (am *GrafanaAlertmanager) PutAlerts(postableAlerts apimodels.PostableAlerts) error {
now := time.Now()
alerts := make([]*types.Alert, 0, len(postableAlerts.PostableAlerts))
var validationErr *AlertValidationError
Expand Down Expand Up @@ -667,7 +669,7 @@ func (e AlertValidationError) Error() string {
}

// createReceiverStage creates a pipeline of stages for a receiver.
func (am *Alertmanager) createReceiverStage(name string, integrations []notify.Integration, wait func() time.Duration, notificationLog notify.NotificationLog) notify.Stage {
func (am *GrafanaAlertmanager) createReceiverStage(name string, integrations []notify.Integration, wait func() time.Duration, notificationLog notify.NotificationLog) notify.Stage {
var fs notify.FanoutStage
for i := range integrations {
recv := &nflogpb.Receiver{
Expand All @@ -686,11 +688,11 @@ func (am *Alertmanager) createReceiverStage(name string, integrations []notify.I
return fs
}

func (am *Alertmanager) waitFunc() time.Duration {
func (am *GrafanaAlertmanager) waitFunc() time.Duration {
return time.Duration(am.peer.Position()) * am.peerTimeout
}

func (am *Alertmanager) timeoutFunc(d time.Duration) time.Duration {
func (am *GrafanaAlertmanager) timeoutFunc(d time.Duration) time.Duration {
// time.Duration d relates to the receiver's group_interval. Even with a group interval of 1s,
// we need to make sure (non-position-0) peers in the cluster wait before flushing the notifications.
if d < notify.MinTimeout {
Expand Down