feat(DMVP-5017): refactor alerts related config and sub-module structure

dasmeta · Aug 30, 2024 · 4022e6f · 4022e6f
1 parent 8f63b2a
commit 4022e6f
Show file tree

Hide file tree

Showing 71 changed files with 895 additions and 489 deletions.
diff --git a/README.md b/README.md
diff --git a/dashboard.tf b/dashboard.tf
diff --git a/main.tf b/main.tf
@@ -1,23 +1,22 @@
-module "alerts" {
-  source = "./modules/alerts"
-
-  alert_interval_seconds = var.alert_interval_seconds
-  alert_rules            = var.alert_rules
-}
+module "application_dashboard" {
+  source = "./modules/dashboard/"
 
-module "contact_points" {
-  source = "./modules/contact-points"
+  count = length(var.application_dashboard) > 0 ? 1 : 0
 
-  count = length(var.alert_rules) != 0 ? 1 : 0
-
-  slack_endpoints    = var.slack_endpoints
-  opsgenie_endpoints = var.opsgenie_endpoints
+  name        = var.name
+  rows        = var.application_dashboard.rows
+  data_source = var.application_dashboard.data_source
+  variables   = var.application_dashboard.variables
 }
 
-module "notifications" {
-  source = "./modules/notifications"
+module "alerts" {
+  source = "./modules/alerts"
 
-  count = length(var.alert_rules) != 0 ? 1 : 0
+  count = var.alerts != null ? 1 : 0
 
-  notifications = var.notifications
+  alert_interval_seconds = var.alerts.alert_interval_seconds
+  disable_provenance     = var.alerts.disable_provenance
+  rules                  = var.alerts.rules
+  contact_points         = var.alerts.contact_points
+  notifications          = var.alerts.notifications
 }
diff --git a/modules/alert-contact-points/README.md b/modules/alert-contact-points/README.md
@@ -0,0 +1,43 @@
+## Usage
+This Terraform module enables the creation of Grafana contact points for various integrations such as Slack and OpsGenie. Contact points allow you to configure alert notifications to different services based on your requirements.
+
+There are numerous integrations available for Grafana, but currently, this module supports only Slack and OpsGenie. We are continuously working to add more integrations in the future. If you have any questions or need assistance, feel free to open an issue or contact our team.
+<!-- BEGINNING OF PRE-COMMIT-TERRAFORM DOCS HOOK -->
+## Requirements
+
+| Name | Version |
+|------|---------|
+| <a name="requirement_terraform"></a> [terraform](#requirement\_terraform) | >= 1.3.0 |
+| <a name="requirement_grafana"></a> [grafana](#requirement\_grafana) | >= 1.40.0 |
+
+## Providers
+
+| Name | Version |
+|------|---------|
+| <a name="provider_grafana"></a> [grafana](#provider\_grafana) | >= 1.40.0 |
+
+## Modules
+
+No modules.
+
+## Resources
+
+| Name | Type |
+|------|------|
+| [grafana_contact_point.opsgenie_contact_point](https://registry.terraform.io/providers/grafana/grafana/latest/docs/resources/contact_point) | resource |
+| [grafana_contact_point.slack_contact_point](https://registry.terraform.io/providers/grafana/grafana/latest/docs/resources/contact_point) | resource |
+| [grafana_contact_point.webhook_contact_point](https://registry.terraform.io/providers/grafana/grafana/latest/docs/resources/contact_point) | resource |
+
+## Inputs
+
+| Name | Description | Type | Default | Required |
+|------|-------------|------|---------|:--------:|
+| <a name="input_disable_provenance"></a> [disable\_provenance](#input\_disable\_provenance) | Allow modifying the contact point from other sources than Terraform or the Grafana API. | `bool` | `true` | no |
+| <a name="input_opsgenie_endpoints"></a> [opsgenie\_endpoints](#input\_opsgenie\_endpoints) | OpsGenie contact points list. | <pre>list(object({<br>    name                    = string                                                 # The name of the contact point<br>    api_key                 = string                                                 # The OpsGenie API key to use<br>    auto_close              = optional(bool, false)                                  # Whether to auto-close alerts in OpsGenie when they resolve in the Alert manager<br>    message                 = optional(string, "")                                   # The templated content of the message<br>    api_url                 = optional(string, "https://api.opsgenie.com/v2/alerts") # Allows customization of the OpsGenie API URL<br>    disable_resolve_message = optional(bool, false)                                  # Whether to disable sending resolve messages<br>  }))</pre> | `[]` | no |
+| <a name="input_slack_endpoints"></a> [slack\_endpoints](#input\_slack\_endpoints) | Slack contact points list. | <pre>list(object({<br>    name                    = string                                                     # The name of the contact point<br>    endpoint_url            = optional(string, "https://slack.com/api/chat.postMessage") # Use this to override the Slack API endpoint URL to send requests to<br>    icon_emoji              = optional(string, "")                                       # The name of a Slack workspace emoji to use as the bot icon<br>    icon_url                = optional(string, "")                                       # A URL of an image to use as the bot icon<br>    recipient               = optional(string, null)                                     # Channel, private group, or IM channel (can be an encoded ID or a name) to send messages to<br>    text                    = optional(string, "")                                       # Templated content of the message<br>    title                   = optional(string, "")                                       # Templated title of the message<br>    token                   = optional(string, "")                                       # A Slack API token,for sending messages directly without the webhook method<br>    webhook_url             = optional(string, "")                                       # A Slack webhook URL,for sending messages via the webhook method<br>    username                = optional(string, "")                                       # Username for the bot to use<br>    disable_resolve_message = optional(bool, false)                                      # Whether to disable sending resolve messages<br>  }))</pre> | `[]` | no |
+| <a name="input_webhook_endpoints"></a> [webhook\_endpoints](#input\_webhook\_endpoints) | Contact points that send notifications to an arbitrary webhook, using the Prometheus webhook format. | <pre>list(object({<br>    name                      = string                 # The name of the contact point<br>    url                       = string                 # The URL to send webhook requests to<br>    authorization_credentials = optional(string, null) # Allows a custom authorization scheme - attaches an auth header with this value. Do not use in conjunction with basic auth parameters<br>    authorization_scheme      = optional(string, null) # Allows a custom authorization scheme - attaches an auth header with this name. Do not use in conjunction with basic auth parameters<br>    basic_auth_password       = optional(string, null) # The password component of the basic auth credentials to use<br>    basic_auth_user           = optional(string, null) # The username component of the basic auth credentials to use<br>    disable_resolve_message   = optional(bool, false)  # Whether to disable sending resolve messages. Defaults to<br>    settings                  = any                    # Additional custom properties to attach to the notifier<br>  }))</pre> | `[]` | no |
+
+## Outputs
+
+No outputs.
+<!-- END OF PRE-COMMIT-TERRAFORM DOCS HOOK -->
diff --git a/modules/contact-points/main.tf → modules/alert-contact-points/main.tf b/modules/contact-points/main.tf → modules/alert-contact-points/main.tf
@@ -1,8 +1,9 @@
-//Slack Integration
+# Slack Integration
 resource "grafana_contact_point" "slack_contact_point" {
   for_each = { for cp in var.slack_endpoints : cp.name => cp }
 
-  name = each.key
+  name               = each.key
+  disable_provenance = var.disable_provenance
 
   slack {
     endpoint_url            = each.value.webhook_url
@@ -18,11 +19,12 @@ resource "grafana_contact_point" "slack_contact_point" {
   }
 }
 
-//OpsGenie Integration
+# OpsGenie Integration
 resource "grafana_contact_point" "opsgenie_contact_point" {
   for_each = { for cp in var.opsgenie_endpoints : cp.name => cp }
 
-  name = each.key
+  name               = each.key
+  disable_provenance = var.disable_provenance
 
   opsgenie {
     api_key                 = each.value.api_key
@@ -32,3 +34,22 @@ resource "grafana_contact_point" "opsgenie_contact_point" {
     disable_resolve_message = each.value.disable_resolve_message
   }
 }
+
+
+# Webhook endpoints Integration
+resource "grafana_contact_point" "webhook_contact_point" {
+  for_each = { for cp in var.webhook_endpoints : cp.name => cp }
+
+  name               = each.key
+  disable_provenance = var.disable_provenance
+
+  webhook {
+    url                       = each.value.url
+    authorization_credentials = each.value.authorization_credentials
+    authorization_scheme      = each.value.authorization_scheme
+    basic_auth_password       = each.value.basic_auth_password
+    basic_auth_user           = each.value.basic_auth_user
+    disable_resolve_message   = each.value.disable_resolve_message
+    settings                  = each.value.settings
+  }
+}
diff --git a/...nts/tests/mixed-contact-points/0-setup.tf → ...nts/tests/mixed-contact-points/0-setup.tf b/...nts/tests/mixed-contact-points/0-setup.tf → ...nts/tests/mixed-contact-points/0-setup.tf
diff --git a/...s/tests/mixed-contact-points/1-example.tf → ...s/tests/mixed-contact-points/1-example.tf b/...s/tests/mixed-contact-points/1-example.tf → ...s/tests/mixed-contact-points/1-example.tf
diff --git a/...s/tests/autoscaling-max-usage/2-assert.tf → ...ts/tests/mixed-contact-points/2-assert.tf b/...s/tests/autoscaling-max-usage/2-assert.tf → ...ts/tests/mixed-contact-points/2-assert.tf
diff --git a/...ints/tests/mixed-contact-points/README.md → ...ints/tests/mixed-contact-points/README.md b/...ints/tests/mixed-contact-points/README.md → ...ints/tests/mixed-contact-points/README.md
diff --git a/.../contact-points/tests/opsgenie/0-setup.tf → ...-contact-points/tests/opsgenie/0-setup.tf b/.../contact-points/tests/opsgenie/0-setup.tf → ...-contact-points/tests/opsgenie/0-setup.tf
diff --git a/...ontact-points/tests/opsgenie/1-example.tf → ...ontact-points/tests/opsgenie/1-example.tf b/...ontact-points/tests/opsgenie/1-example.tf → ...ontact-points/tests/opsgenie/1-example.tf
diff --git a/...tests/available-replica-count/2-assert.tf → ...contact-points/tests/opsgenie/2-assert.tf b/...tests/available-replica-count/2-assert.tf → ...contact-points/tests/opsgenie/2-assert.tf
diff --git a/...s/contact-points/tests/opsgenie/README.md → ...t-contact-points/tests/opsgenie/README.md b/...s/contact-points/tests/opsgenie/README.md → ...t-contact-points/tests/opsgenie/README.md
diff --git a/...les/contact-points/tests/slack/0-setup.tf → ...ert-contact-points/tests/slack/0-setup.tf b/...les/contact-points/tests/slack/0-setup.tf → ...ert-contact-points/tests/slack/0-setup.tf
diff --git a/...s/contact-points/tests/slack/1-example.tf → ...t-contact-points/tests/slack/1-example.tf b/...s/contact-points/tests/slack/1-example.tf → ...t-contact-points/tests/slack/1-example.tf
diff --git a/...erts/tests/container-restarts/2-assert.tf → ...rt-contact-points/tests/slack/2-assert.tf b/...erts/tests/container-restarts/2-assert.tf → ...rt-contact-points/tests/slack/2-assert.tf
diff --git a/modules/contact-points/tests/slack/README.md → ...lert-contact-points/tests/slack/README.md b/modules/contact-points/tests/slack/README.md → ...lert-contact-points/tests/slack/README.md
diff --git a/modules/alert-contact-points/variables.tf b/modules/alert-contact-points/variables.tf
@@ -0,0 +1,51 @@
+variable "disable_provenance" {
+  type        = bool
+  default     = true
+  description = "Allow modifying the contact point from other sources than Terraform or the Grafana API."
+}
+
+variable "slack_endpoints" {
+  type = list(object({
+    name                    = string                                                     # The name of the contact point
+    endpoint_url            = optional(string, "https://slack.com/api/chat.postMessage") # Use this to override the Slack API endpoint URL to send requests to
+    icon_emoji              = optional(string, "")                                       # The name of a Slack workspace emoji to use as the bot icon
+    icon_url                = optional(string, "")                                       # A URL of an image to use as the bot icon
+    recipient               = optional(string, null)                                     # Channel, private group, or IM channel (can be an encoded ID or a name) to send messages to
+    text                    = optional(string, "")                                       # Templated content of the message
+    title                   = optional(string, "")                                       # Templated title of the message
+    token                   = optional(string, "")                                       # A Slack API token,for sending messages directly without the webhook method
+    webhook_url             = optional(string, "")                                       # A Slack webhook URL,for sending messages via the webhook method
+    username                = optional(string, "")                                       # Username for the bot to use
+    disable_resolve_message = optional(bool, false)                                      # Whether to disable sending resolve messages
+  }))
+  default     = []
+  description = "Slack contact points list."
+}
+
+variable "opsgenie_endpoints" {
+  type = list(object({
+    name                    = string                                                 # The name of the contact point
+    api_key                 = string                                                 # The OpsGenie API key to use
+    auto_close              = optional(bool, false)                                  # Whether to auto-close alerts in OpsGenie when they resolve in the Alert manager
+    message                 = optional(string, "")                                   # The templated content of the message
+    api_url                 = optional(string, "https://api.opsgenie.com/v2/alerts") # Allows customization of the OpsGenie API URL
+    disable_resolve_message = optional(bool, false)                                  # Whether to disable sending resolve messages
+  }))
+  default     = []
+  description = "OpsGenie contact points list."
+}
+
+variable "webhook_endpoints" {
+  type = list(object({
+    name                      = string                 # The name of the contact point
+    url                       = string                 # The URL to send webhook requests to
+    authorization_credentials = optional(string, null) # Allows a custom authorization scheme - attaches an auth header with this value. Do not use in conjunction with basic auth parameters
+    authorization_scheme      = optional(string, null) # Allows a custom authorization scheme - attaches an auth header with this name. Do not use in conjunction with basic auth parameters
+    basic_auth_password       = optional(string, null) # The password component of the basic auth credentials to use
+    basic_auth_user           = optional(string, null) # The username component of the basic auth credentials to use
+    disable_resolve_message   = optional(bool, false)  # Whether to disable sending resolve messages. Defaults to
+    settings                  = any                    # Additional custom properties to attach to the notifier
+  }))
+  default     = []
+  description = "Contact points that send notifications to an arbitrary webhook, using the Prometheus webhook format."
+}
diff --git a/modules/contact-points/versions.tf → modules/alert-contact-points/versions.tf b/modules/contact-points/versions.tf → modules/alert-contact-points/versions.tf
diff --git a/modules/alert-notifications/README.md b/modules/alert-notifications/README.md
@@ -0,0 +1,42 @@
+## Usage
+This Terraform module helps you manage Grafana notification policies, making it easier to configure alert notifications for different contact points and conditions.
+
+Notification policies can be created for various contact points. Additionally, you can have nested policies.
+
+Please refer to the `tests` folder for real examples.
+<!-- BEGINNING OF PRE-COMMIT-TERRAFORM DOCS HOOK -->
+## Requirements
+
+| Name | Version |
+|------|---------|
+| <a name="requirement_terraform"></a> [terraform](#requirement\_terraform) | >= 1.3.0 |
+| <a name="requirement_grafana"></a> [grafana](#requirement\_grafana) | >= 1.40.0 |
+
+## Providers
+
+| Name | Version |
+|------|---------|
+| <a name="provider_grafana"></a> [grafana](#provider\_grafana) | >= 1.40.0 |
+
+## Modules
+
+No modules.
+
+## Resources
+
+| Name | Type |
+|------|------|
+| [grafana_mute_timing.this](https://registry.terraform.io/providers/grafana/grafana/latest/docs/resources/mute_timing) | resource |
+| [grafana_notification_policy.this](https://registry.terraform.io/providers/grafana/grafana/latest/docs/resources/notification_policy) | resource |
+
+## Inputs
+
+| Name | Description | Type | Default | Required |
+|------|-------------|------|---------|:--------:|
+| <a name="input_disable_provenance"></a> [disable\_provenance](#input\_disable\_provenance) | Allow modifying the notification policy from other sources than Terraform or the Grafana API. | `bool` | `true` | no |
+| <a name="input_notifications"></a> [notifications](#input\_notifications) | Represents the configuration options for Grafana notification policies. This config is global for grafana and overrides existing configs. | <pre>object({<br>    contact_point   = optional(string, "Slack")       # The default contact point to route all unmatched notifications to<br>    group_by        = optional(list(string), ["..."]) # A list of alert labels to group alerts into notifications by<br>    group_interval  = optional(string, "5m")          # Minimum time interval between two notifications for the same group<br>    repeat_interval = optional(string, "4h")          # Minimum time interval for re-sending a notification if an alert is still firing<br><br>    mute_timing = optional(object({                  # Mute timing config, which will be applied on all policies<br>      name = optional(string, "Default mute timing") # the name of mute timing<br>      intervals = optional(list(object({             # the mute timing interval configs<br>        weekdays      = optional(string, null)<br>        days_of_month = optional(string, null)<br>        months        = optional(string, null)<br>        years         = optional(string, null)<br>        location      = optional(string, null)<br>        times = optional(object({<br>          start = optional(string, "00:00")<br>          end   = optional(string, "24:59")<br>        }), null)<br>      })), [])<br>    }), null)<br><br>    policies = optional(list(object({<br>      contact_point = optional(string, null) # The contact point to route notifications that match this rule to<br>      continue      = optional(bool, true)   # Whether to continue matching subsequent rules if an alert matches the current rule. Otherwise, the rule will be 'consumed' by the first policy to match it<br>      group_by      = optional(list(string), ["..."])<br><br>      matchers = optional(list(object({<br>        label = optional(string, "priority") # The name of the label to match against<br>        match = optional(string, "=")        # The operator to apply when matching values of the given label. Allowed operators are = for equality, != for negated equality, =~ for regex equality, and !~ for negated regex equality<br>        value = optional(string, "P1")       # The label value to match against<br>      })), [])<br>      policies = optional(list(object({ # sub-policies(there is also possibility to implement also ability for sub.sub.sub-policies, but for not seems existing configs are enough)<br>        contact_point = optional(string, null)<br>        continue      = optional(bool, true)<br>        group_by      = optional(list(string), ["..."])<br>        mute_timings  = optional(list(string), [])<br><br>        matchers = optional(list(object({<br>          label = optional(string, "priority")<br>          match = optional(string, "=")<br>          value = optional(string, "P1")<br>        })), [])<br>      })), [])<br>    })), [])<br>  })</pre> | `null` | no |
+
+## Outputs
+
+No outputs.
+<!-- END OF PRE-COMMIT-TERRAFORM DOCS HOOK -->
diff --git a/modules/alert-notifications/main.tf b/modules/alert-notifications/main.tf
@@ -0,0 +1,80 @@
+resource "grafana_notification_policy" "this" {
+  count = var.notifications != null ? 1 : 0
+
+  contact_point      = var.notifications.contact_point
+  group_by           = var.notifications.group_by
+  group_interval     = var.notifications.group_interval
+  repeat_interval    = var.notifications.repeat_interval
+  disable_provenance = var.disable_provenance
+
+  dynamic "policy" {
+    for_each = var.notifications.policies
+
+    content {
+      contact_point = policy.value.contact_point
+      continue      = policy.value.continue
+      group_by      = policy.value.group_by
+      mute_timings  = grafana_mute_timing.this.*.name
+
+      dynamic "matcher" {
+        for_each = policy.value.matchers
+
+        content {
+          label = matcher.value.label
+          match = matcher.value.match
+          value = matcher.value.value
+        }
+      }
+
+      dynamic "policy" {
+        for_each = try(policy.value.policies, [])
+
+        content {
+          contact_point = policy.value.contact_point
+          continue      = policy.value.continue
+          group_by      = policy.value.group_by
+          mute_timings  = grafana_mute_timing.this.*.name
+
+          dynamic "matcher" {
+            for_each = policy.value.matchers
+
+            content {
+              label = matcher.value.label
+              match = matcher.value.match
+              value = matcher.value.value
+            }
+          }
+        }
+      }
+    }
+  }
+}
+
+resource "grafana_mute_timing" "this" {
+  count = try(var.notifications.mute_timing, null) != null ? 1 : 0
+
+  name               = "Default mute timing"
+  disable_provenance = var.disable_provenance
+
+
+  dynamic "intervals" {
+    for_each = try(var.notifications.mute_timing.intervals, [])
+
+    content {
+      weekdays      = try(intervals.value.weekdays, null)
+      days_of_month = try(intervals.value.days_of_month, null)
+      months        = try(intervals.value.months, null)
+      years         = try(intervals.value.years, null)
+      location      = try(intervals.value.location, null)
+
+      dynamic "times" {
+        for_each = try(intervals.value.times, [])
+
+        content {
+          start = try(times.value.start, "00:00")
+          end   = try(times.value.end, "24:59")
+        }
+      }
+    }
+  }
+}
diff --git a/.../alerts/tests/node-autoscaling/0-setup.tf → ...ions/tests/slack-with-opsgenie/0-setup.tf b/.../alerts/tests/node-autoscaling/0-setup.tf → ...ions/tests/slack-with-opsgenie/0-setup.tf
diff --git a/...ns/tests/slack-with-opsgenie/1-example.tf → ...ns/tests/slack-with-opsgenie/1-example.tf b/...ns/tests/slack-with-opsgenie/1-example.tf → ...ns/tests/slack-with-opsgenie/1-example.tf
diff --git a/modules/alerts/tests/expressions/2-assert.tf → ...ons/tests/slack-with-opsgenie/2-assert.tf b/modules/alerts/tests/expressions/2-assert.tf → ...ons/tests/slack-with-opsgenie/2-assert.tf
diff --git a/...tions/tests/slack-with-opsgenie/README.md → ...tions/tests/slack-with-opsgenie/README.md b/...tions/tests/slack-with-opsgenie/README.md → ...tions/tests/slack-with-opsgenie/README.md
diff --git a/...ions/tests/slack-with-opsgenie/0-setup.tf → ...lert-notifications/tests/slack/0-setup.tf b/...ions/tests/slack-with-opsgenie/0-setup.tf → ...lert-notifications/tests/slack/0-setup.tf
diff --git a/...es/notifications/tests/slack/1-example.tf → ...rt-notifications/tests/slack/1-example.tf b/...es/notifications/tests/slack/1-example.tf → ...rt-notifications/tests/slack/1-example.tf
diff --git a/...es/alerts/tests/mixed-metrics/2-assert.tf → ...ert-notifications/tests/slack/2-assert.tf b/...es/alerts/tests/mixed-metrics/2-assert.tf → ...ert-notifications/tests/slack/2-assert.tf
diff --git a/modules/notifications/tests/slack/README.md → ...alert-notifications/tests/slack/README.md b/modules/notifications/tests/slack/README.md → ...alert-notifications/tests/slack/README.md