From 9c03150271605314b6229c2012dd7cd2dba36da1 Mon Sep 17 00:00:00 2001 From: The Magician Date: Wed, 28 Oct 2020 15:33:50 -0700 Subject: [PATCH] Add MQL based alerts (#4157) (#7664) Signed-off-by: Modular Magician --- .changelog/4157.txt | 3 + google/resource_monitoring_alert_policy.go | 217 +++++++++++++++++- .../resource_monitoring_alert_policy_test.go | 51 ++++ .../r/monitoring_alert_policy.html.markdown | 56 +++++ 4 files changed, 323 insertions(+), 4 deletions(-) create mode 100644 .changelog/4157.txt diff --git a/.changelog/4157.txt b/.changelog/4157.txt new file mode 100644 index 00000000000..dc362e7f56d --- /dev/null +++ b/.changelog/4157.txt @@ -0,0 +1,3 @@ +```release-note:enhancement +monitoring: Added Monitoring Query Language based alerting for `google_monitoring_alert_policy` +``` diff --git a/google/resource_monitoring_alert_policy.go b/google/resource_monitoring_alert_policy.go index 92df934f116..0aaab474343 100644 --- a/google/resource_monitoring_alert_policy.go +++ b/google/resource_monitoring_alert_policy.go @@ -237,6 +237,71 @@ condition to be triggered.`, Optional: true, Description: `The percentage of time series that must fail the predicate for the +condition to be triggered.`, + }, + }, + }, + }, + }, + }, + }, + "condition_monitoring_query_language": { + Type: schema.TypeList, + Optional: true, + Description: `A Monitoring Query Language query that outputs a boolean stream`, + MaxItems: 1, + Elem: &schema.Resource{ + Schema: map[string]*schema.Schema{ + "duration": { + Type: schema.TypeString, + Required: true, + Description: `The amount of time that a time series must +violate the threshold to be considered +failing. Currently, only values that are a +multiple of a minute--e.g., 0, 60, 120, or +300 seconds--are supported. If an invalid +value is given, an error will be returned. +When choosing a duration, it is useful to +keep in mind the frequency of the underlying +time series data (which may also be affected +by any alignments specified in the +aggregations field); a good duration is long +enough so that a single outlier does not +generate spurious alerts, but short enough +that unhealthy states are detected and +alerted on quickly.`, + }, + "query": { + Type: schema.TypeString, + Required: true, + Description: `Monitoring Query Language query that outputs a boolean stream.`, + }, + "trigger": { + Type: schema.TypeList, + Optional: true, + Description: `The number/percent of time series for which +the comparison must hold in order for the +condition to trigger. If unspecified, then +the condition will trigger if the comparison +is true for any of the time series that have +been identified by filter and aggregations, +or by the ratio, if denominator_filter and +denominator_aggregations are specified.`, + MaxItems: 1, + Elem: &schema.Resource{ + Schema: map[string]*schema.Schema{ + "count": { + Type: schema.TypeInt, + Optional: true, + Description: `The absolute number of time series +that must fail the predicate for the +condition to be triggered.`, + }, + "percent": { + Type: schema.TypeFloat, + Optional: true, + Description: `The percentage of time series that +must fail the predicate for the condition to be triggered.`, }, }, @@ -1123,10 +1188,11 @@ func flattenMonitoringAlertPolicyConditions(v interface{}, d *schema.ResourceDat continue } transformed = append(transformed, map[string]interface{}{ - "condition_absent": flattenMonitoringAlertPolicyConditionsConditionAbsent(original["conditionAbsent"], d, config), - "name": flattenMonitoringAlertPolicyConditionsName(original["name"], d, config), - "condition_threshold": flattenMonitoringAlertPolicyConditionsConditionThreshold(original["conditionThreshold"], d, config), - "display_name": flattenMonitoringAlertPolicyConditionsDisplayName(original["displayName"], d, config), + "condition_absent": flattenMonitoringAlertPolicyConditionsConditionAbsent(original["conditionAbsent"], d, config), + "name": flattenMonitoringAlertPolicyConditionsName(original["name"], d, config), + "condition_monitoring_query_language": flattenMonitoringAlertPolicyConditionsConditionMonitoringQueryLanguage(original["conditionMonitoringQueryLanguage"], d, config), + "condition_threshold": flattenMonitoringAlertPolicyConditionsConditionThreshold(original["conditionThreshold"], d, config), + "display_name": flattenMonitoringAlertPolicyConditionsDisplayName(original["displayName"], d, config), }) } return transformed @@ -1235,6 +1301,67 @@ func flattenMonitoringAlertPolicyConditionsName(v interface{}, d *schema.Resourc return v } +func flattenMonitoringAlertPolicyConditionsConditionMonitoringQueryLanguage(v interface{}, d *schema.ResourceData, config *Config) interface{} { + if v == nil { + return nil + } + original := v.(map[string]interface{}) + if len(original) == 0 { + return nil + } + transformed := make(map[string]interface{}) + transformed["query"] = + flattenMonitoringAlertPolicyConditionsConditionMonitoringQueryLanguageQuery(original["query"], d, config) + transformed["duration"] = + flattenMonitoringAlertPolicyConditionsConditionMonitoringQueryLanguageDuration(original["duration"], d, config) + transformed["trigger"] = + flattenMonitoringAlertPolicyConditionsConditionMonitoringQueryLanguageTrigger(original["trigger"], d, config) + return []interface{}{transformed} +} +func flattenMonitoringAlertPolicyConditionsConditionMonitoringQueryLanguageQuery(v interface{}, d *schema.ResourceData, config *Config) interface{} { + return v +} + +func flattenMonitoringAlertPolicyConditionsConditionMonitoringQueryLanguageDuration(v interface{}, d *schema.ResourceData, config *Config) interface{} { + return v +} + +func flattenMonitoringAlertPolicyConditionsConditionMonitoringQueryLanguageTrigger(v interface{}, d *schema.ResourceData, config *Config) interface{} { + if v == nil { + return nil + } + original := v.(map[string]interface{}) + if len(original) == 0 { + return nil + } + transformed := make(map[string]interface{}) + transformed["percent"] = + flattenMonitoringAlertPolicyConditionsConditionMonitoringQueryLanguageTriggerPercent(original["percent"], d, config) + transformed["count"] = + flattenMonitoringAlertPolicyConditionsConditionMonitoringQueryLanguageTriggerCount(original["count"], d, config) + return []interface{}{transformed} +} +func flattenMonitoringAlertPolicyConditionsConditionMonitoringQueryLanguageTriggerPercent(v interface{}, d *schema.ResourceData, config *Config) interface{} { + return v +} + +func flattenMonitoringAlertPolicyConditionsConditionMonitoringQueryLanguageTriggerCount(v interface{}, d *schema.ResourceData, config *Config) interface{} { + // Handles the string fixed64 format + if strVal, ok := v.(string); ok { + if intVal, err := strconv.ParseInt(strVal, 10, 64); err == nil { + return intVal + } + } + + // number values are represented as float64 + if floatVal, ok := v.(float64); ok { + intVal := int(floatVal) + return intVal + } + + return v // let terraform core handle it otherwise +} + func flattenMonitoringAlertPolicyConditionsConditionThreshold(v interface{}, d *schema.ResourceData, config *Config) interface{} { if v == nil { return nil @@ -1463,6 +1590,13 @@ func expandMonitoringAlertPolicyConditions(v interface{}, d TerraformResourceDat transformed["name"] = transformedName } + transformedConditionMonitoringQueryLanguage, err := expandMonitoringAlertPolicyConditionsConditionMonitoringQueryLanguage(original["condition_monitoring_query_language"], d, config) + if err != nil { + return nil, err + } else if val := reflect.ValueOf(transformedConditionMonitoringQueryLanguage); val.IsValid() && !isEmptyValue(val) { + transformed["conditionMonitoringQueryLanguage"] = transformedConditionMonitoringQueryLanguage + } + transformedConditionThreshold, err := expandMonitoringAlertPolicyConditionsConditionThreshold(original["condition_threshold"], d, config) if err != nil { return nil, err @@ -1627,6 +1761,81 @@ func expandMonitoringAlertPolicyConditionsName(v interface{}, d TerraformResourc return v, nil } +func expandMonitoringAlertPolicyConditionsConditionMonitoringQueryLanguage(v interface{}, d TerraformResourceData, config *Config) (interface{}, error) { + l := v.([]interface{}) + if len(l) == 0 || l[0] == nil { + return nil, nil + } + raw := l[0] + original := raw.(map[string]interface{}) + transformed := make(map[string]interface{}) + + transformedQuery, err := expandMonitoringAlertPolicyConditionsConditionMonitoringQueryLanguageQuery(original["query"], d, config) + if err != nil { + return nil, err + } else if val := reflect.ValueOf(transformedQuery); val.IsValid() && !isEmptyValue(val) { + transformed["query"] = transformedQuery + } + + transformedDuration, err := expandMonitoringAlertPolicyConditionsConditionMonitoringQueryLanguageDuration(original["duration"], d, config) + if err != nil { + return nil, err + } else if val := reflect.ValueOf(transformedDuration); val.IsValid() && !isEmptyValue(val) { + transformed["duration"] = transformedDuration + } + + transformedTrigger, err := expandMonitoringAlertPolicyConditionsConditionMonitoringQueryLanguageTrigger(original["trigger"], d, config) + if err != nil { + return nil, err + } else if val := reflect.ValueOf(transformedTrigger); val.IsValid() && !isEmptyValue(val) { + transformed["trigger"] = transformedTrigger + } + + return transformed, nil +} + +func expandMonitoringAlertPolicyConditionsConditionMonitoringQueryLanguageQuery(v interface{}, d TerraformResourceData, config *Config) (interface{}, error) { + return v, nil +} + +func expandMonitoringAlertPolicyConditionsConditionMonitoringQueryLanguageDuration(v interface{}, d TerraformResourceData, config *Config) (interface{}, error) { + return v, nil +} + +func expandMonitoringAlertPolicyConditionsConditionMonitoringQueryLanguageTrigger(v interface{}, d TerraformResourceData, config *Config) (interface{}, error) { + l := v.([]interface{}) + if len(l) == 0 || l[0] == nil { + return nil, nil + } + raw := l[0] + original := raw.(map[string]interface{}) + transformed := make(map[string]interface{}) + + transformedPercent, err := expandMonitoringAlertPolicyConditionsConditionMonitoringQueryLanguageTriggerPercent(original["percent"], d, config) + if err != nil { + return nil, err + } else if val := reflect.ValueOf(transformedPercent); val.IsValid() && !isEmptyValue(val) { + transformed["percent"] = transformedPercent + } + + transformedCount, err := expandMonitoringAlertPolicyConditionsConditionMonitoringQueryLanguageTriggerCount(original["count"], d, config) + if err != nil { + return nil, err + } else if val := reflect.ValueOf(transformedCount); val.IsValid() && !isEmptyValue(val) { + transformed["count"] = transformedCount + } + + return transformed, nil +} + +func expandMonitoringAlertPolicyConditionsConditionMonitoringQueryLanguageTriggerPercent(v interface{}, d TerraformResourceData, config *Config) (interface{}, error) { + return v, nil +} + +func expandMonitoringAlertPolicyConditionsConditionMonitoringQueryLanguageTriggerCount(v interface{}, d TerraformResourceData, config *Config) (interface{}, error) { + return v, nil +} + func expandMonitoringAlertPolicyConditionsConditionThreshold(v interface{}, d TerraformResourceData, config *Config) (interface{}, error) { l := v.([]interface{}) if len(l) == 0 || l[0] == nil { diff --git a/google/resource_monitoring_alert_policy_test.go b/google/resource_monitoring_alert_policy_test.go index 09af729168f..b7351ceab38 100644 --- a/google/resource_monitoring_alert_policy_test.go +++ b/google/resource_monitoring_alert_policy_test.go @@ -16,6 +16,7 @@ func TestAccMonitoringAlertPolicy(t *testing.T) { "basic": testAccMonitoringAlertPolicy_basic, "full": testAccMonitoringAlertPolicy_full, "update": testAccMonitoringAlertPolicy_update, + "mql": testAccMonitoringAlertPolicy_mql, } for name, tc := range testCases { @@ -110,6 +111,28 @@ func testAccMonitoringAlertPolicy_full(t *testing.T) { }) } +func testAccMonitoringAlertPolicy_mql(t *testing.T) { + + alertName := fmt.Sprintf("tf-test-%s", randString(t, 10)) + conditionName := fmt.Sprintf("tf-test-%s", randString(t, 10)) + + vcrTest(t, resource.TestCase{ + PreCheck: func() { testAccPreCheck(t) }, + Providers: testAccProviders, + CheckDestroy: testAccCheckAlertPolicyDestroyProducer(t), + Steps: []resource.TestStep{ + { + Config: testAccMonitoringAlertPolicy_mqlCfg(alertName, conditionName), + }, + { + ResourceName: "google_monitoring_alert_policy.mql", + ImportState: true, + ImportStateVerify: true, + }, + }, + }) +} + func testAccCheckAlertPolicyDestroyProducer(t *testing.T) func(s *terraform.State) error { return func(s *terraform.State) error { config := googleProviderConfig(t) @@ -226,3 +249,31 @@ resource "google_monitoring_alert_policy" "full" { } `, alertName, conditionName1, conditionName2) } + +func testAccMonitoringAlertPolicy_mqlCfg(alertName, conditionName string) string { + return fmt.Sprintf(` +resource "google_monitoring_alert_policy" "mql" { + display_name = "%s" + combiner = "OR" + enabled = true + + conditions { + display_name = "%s" + + condition_monitoring_query_language { + query = "fetch gce_instance::compute.googleapis.com/instance/cpu/utilization | align mean_aligner() | window 5m | condition value.utilization > .15 '10^2.%%'" + duration = "60s" + + trigger { + count = 2 + } + } + } + + documentation { + content = "test content" + mime_type = "text/markdown" + } +} +`, alertName, conditionName) +} diff --git a/website/docs/r/monitoring_alert_policy.html.markdown b/website/docs/r/monitoring_alert_policy.html.markdown index 996329b6214..9807b0103e0 100644 --- a/website/docs/r/monitoring_alert_policy.html.markdown +++ b/website/docs/r/monitoring_alert_policy.html.markdown @@ -104,6 +104,11 @@ The `conditions` block supports: the condition is created as part of a new or updated alerting policy. +* `condition_monitoring_query_language` - + (Optional) + A Monitoring Query Language query that outputs a boolean stream + Structure is documented below. + * `condition_threshold` - (Optional) A condition that compares a time series against a @@ -259,6 +264,57 @@ The `aggregations` block supports: The `trigger` block supports: +* `percent` - + (Optional) + The percentage of time series that + must fail the predicate for the + condition to be triggered. + +* `count` - + (Optional) + The absolute number of time series + that must fail the predicate for the + condition to be triggered. + +The `condition_monitoring_query_language` block supports: + +* `query` - + (Required) + Monitoring Query Language query that outputs a boolean stream. + +* `duration` - + (Required) + The amount of time that a time series must + violate the threshold to be considered + failing. Currently, only values that are a + multiple of a minute--e.g., 0, 60, 120, or + 300 seconds--are supported. If an invalid + value is given, an error will be returned. + When choosing a duration, it is useful to + keep in mind the frequency of the underlying + time series data (which may also be affected + by any alignments specified in the + aggregations field); a good duration is long + enough so that a single outlier does not + generate spurious alerts, but short enough + that unhealthy states are detected and + alerted on quickly. + +* `trigger` - + (Optional) + The number/percent of time series for which + the comparison must hold in order for the + condition to trigger. If unspecified, then + the condition will trigger if the comparison + is true for any of the time series that have + been identified by filter and aggregations, + or by the ratio, if denominator_filter and + denominator_aggregations are specified. + Structure is documented below. + + +The `trigger` block supports: + * `percent` - (Optional) The percentage of time series that