From 444811eda11ccaa559ece40bc4aae73b8060bdb4 Mon Sep 17 00:00:00 2001 From: Robert Kenny Date: Wed, 30 Oct 2024 11:24:55 +0000 Subject: [PATCH] add message on queue age alarm --- RELEASE.md | 15 +++++++++++++++ queue/alarms.tf | 34 ++++++++++++++++++++++++++++++++-- queue/variables.tf | 26 ++++++++++++++++++++++++++ 3 files changed, 73 insertions(+), 2 deletions(-) create mode 100644 RELEASE.md diff --git a/RELEASE.md b/RELEASE.md new file mode 100644 index 0000000..3df04b4 --- /dev/null +++ b/RELEASE.md @@ -0,0 +1,15 @@ +RELEASE_TYPE: minor + +This change adds a main queue alarm that triggers when the main queue has messages older than a certain age. + +This is useful for monitoring the health of the queue and ensuring that messages are being processed in a timely manner. + +Adds the variables: + +- `dlq_alarm_topic_arn` - The ARN of the SNS topic to send DLQ alarm notifications to +- `main_q_age_alarm_topic_arn` - The ARN of the SNS topic to send main queue age alarm notifications to +- `max_age_in_hours` - The maximum age of a message in the main queue before the alarm triggers +- `queue_age_alarm_name_suffix` - The suffix to append to the age alarm name, used to allow EventBridge to filter on the alarm name +- `dlq_not_empty_alarm_name_suffix` - The suffix to append to the dlq not empty alarm name, used to allow EventBridge to filter on the alarm name + +We deprecate the `alarm_topic_arn` variable in favour of the new `dlq_alarm_topic_arn` and `main_q_age_alarm_topic_arn` variables. \ No newline at end of file diff --git a/queue/alarms.tf b/queue/alarms.tf index 4845538..7b1e51b 100644 --- a/queue/alarms.tf +++ b/queue/alarms.tf @@ -1,5 +1,17 @@ +locals { + max_age_in_seconds = var.max_age_in_hours * 3600 + + # Allows for deprecation of alarm_topic_arn in favor of dlq_alarm_topic_arn + dlq_alarm_topic_arn = var.dlq_alarm_topic_arn != null ? var.dlq_alarm_topic_arn : var.alarm_topic_arn + + # Name suffix allows for EventBridge rules to pick up alarms using wildcard + queue_age_alarm_name_suffix = var.queue_age_alarm_name_suffix != null ? "_${var.queue_age_alarm_name_suffix}" : "" + dlq_not_empty_alarm_name_suffix = var.dlq_not_empty_alarm_name_suffix != null ? "_${var.dlq_not_empty_alarm_name_suffix}" : "" +} + + resource "aws_cloudwatch_metric_alarm" "dlq_not_empty" { - count = var.alarm_topic_arn != null ? 1 : 0 + count = local.dlq_alarm_topic_arn != null ? 1 : 0 alarm_name = "${aws_sqs_queue.dlq.name}_not_empty" comparison_operator = "GreaterThanThreshold" @@ -14,6 +26,24 @@ resource "aws_cloudwatch_metric_alarm" "dlq_not_empty" { QueueName = aws_sqs_queue.dlq.name } - alarm_actions = [var.alarm_topic_arn] + alarm_actions = [local.dlq_alarm_topic_arn] } +resource "aws_cloudwatch_metric_alarm" "queue_age" { + count = var.main_q_age_alarm_topic_arn != null ? 1 : 0 + + alarm_name = "${aws_sqs_queue.q.name}_age${local.queue_age_alarm_name_suffix}" + comparison_operator = "GreaterThanThreshold" + evaluation_periods = 1 + metric_name = "ApproximateAgeOfOldestMessage" + namespace = "AWS/SQS" + period = 60 + threshold = local.max_age_in_seconds + statistic = "Maximum" + + dimensions = { + QueueName = aws_sqs_queue.q.name + } + + alarm_actions = [var.main_q_age_alarm_topic_arn] +} \ No newline at end of file diff --git a/queue/variables.tf b/queue/variables.tf index 7e0150b..3d97006 100644 --- a/queue/variables.tf +++ b/queue/variables.tf @@ -38,10 +38,36 @@ variable "max_receive_count" { } variable "alarm_topic_arn" { + description = "DEPRECATED, use dlq_alarm_topic_arn: ARN of the topic where to send notification for DLQs not being empty. If null, no alarm will be created." + default = null +} + +variable "dlq_alarm_topic_arn" { description = "ARN of the topic where to send notification for DLQs not being empty. If null, no alarm will be created." default = null } +variable "main_q_age_alarm_topic_arn" { + description = "ARN of the topic where to send notification for messages exceeding max_age_in_hours If null, no alarm will be created." + default = null +} + +variable "max_age_in_hours" { + description = "The maximum age of a message in hours" + type = number + default = 6 +} + +variable "queue_age_alarm_name_suffix" { + description = "Suffix to append to the queue name for the age alarm" + default = null +} + +variable "dlq_not_empty_alarm_name_suffix" { + description = "Suffix to append to the DLQ name for the not empty alarm" + default = null +} + variable "fifo_queue" { description = "Boolean designating a FIFO queue" default = false