-
Notifications
You must be signed in to change notification settings - Fork 299
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Fix "Continue escalation if >X alerts per Y minutes" escalation step #2636
Changes from 6 commits
dcc473b
31adc7c
3d63e4f
1812548
3e460d7
0927711
14723eb
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -198,9 +198,7 @@ def _deserialize_escalation_snapshot(self, raw_escalation_snapshot) -> Escalatio | |
|
||
@property | ||
def escalation_chain_exists(self) -> bool: | ||
if self.pause_escalation: | ||
return False | ||
elif not self.channel_filter: | ||
if not self.channel_filter: | ||
return False | ||
return self.channel_filter.escalation_chain is not None | ||
|
||
|
@@ -232,17 +230,11 @@ def start_escalation_if_needed(self, countdown=START_ESCALATION_DELAY, eta=None) | |
|
||
is_on_maintenace_or_debug_mode = self.channel.maintenance_mode is not None | ||
|
||
if ( | ||
self.is_restricted | ||
or is_on_maintenace_or_debug_mode | ||
or self.pause_escalation | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Escalations for paused alert groups should not be skipped. Paused alert groups should be escalated further and get handled by the "Continue escalation if >X alerts per Y minutes" step. |
||
or not self.escalation_chain_exists | ||
): | ||
if self.is_restricted or is_on_maintenace_or_debug_mode or not self.escalation_chain_exists: | ||
logger.debug( | ||
f"Not escalating alert group w/ pk: {self.pk}\n" | ||
f"is_restricted: {self.is_restricted}\n" | ||
f"is_on_maintenace_or_debug_mode: {is_on_maintenace_or_debug_mode}\n" | ||
f"pause_escalation: {self.pause_escalation}\n" | ||
f"escalation_chain_exists: {self.escalation_chain_exists}" | ||
) | ||
return | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -14,17 +14,23 @@ def distribute_alert(alert_id): | |
""" | ||
We need this task to make task processing async and to make sure the task is delivered. | ||
""" | ||
from apps.alerts.models import Alert, AlertGroup | ||
from apps.alerts.models import Alert | ||
|
||
alert = Alert.objects.get(pk=alert_id) | ||
task_logger.debug(f"Start distribute_alert for alert {alert_id} from alert_group {alert.group_id}") | ||
|
||
send_alert_create_signal.apply_async((alert_id,)) | ||
# If it's the first alert, let's launch the escalation! | ||
|
||
# Launch escalation for the group if it's the first alert, or if the group is paused. | ||
# "paused" means that the current escalation step is "Continue escalation if >X alerts per Y minutes" and there are | ||
# not enough alerts to trigger the escalation further. Launching escalation for a paused group will re-evaluate | ||
# the threshold and advance the escalation if needed, or go back to the same "paused" state if the threshold is | ||
# still not reached. | ||
if alert.is_the_first_alert_in_group or alert.group.pause_escalation: | ||
alert.group.start_escalation_if_needed(countdown=TASK_DELAY_SECONDS) | ||
Comment on lines
+29
to
+30
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The main change of the PR: run |
||
|
||
if alert.is_the_first_alert_in_group: | ||
alert_group = AlertGroup.objects.filter(pk=alert.group_id).get() | ||
alert_group.start_escalation_if_needed(countdown=TASK_DELAY_SECONDS) | ||
alert_group_escalation_snapshot_built.send(sender=distribute_alert, alert_group=alert_group) | ||
alert_group_escalation_snapshot_built.send(sender=distribute_alert, alert_group=alert.group) | ||
|
||
updated_rows = Alert.objects.filter(pk=alert_id, delivered=True).update(delivered=True) | ||
if updated_rows != 1: | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,6 +1,9 @@ | ||
from unittest.mock import PropertyMock, patch | ||
|
||
import pytest | ||
|
||
from apps.alerts.models import Alert | ||
from apps.alerts.models import Alert, EscalationPolicy | ||
from apps.alerts.tasks import distribute_alert, escalate_alert_group | ||
|
||
|
||
@pytest.mark.django_db | ||
|
@@ -41,3 +44,96 @@ def test_alert_create_custom_channel_filter(make_organization, make_alert_receiv | |
) | ||
|
||
assert alert.group.channel_filter == other_channel_filter | ||
|
||
|
||
@pytest.mark.django_db | ||
def test_distribute_alert_escalate_alert_group( | ||
make_organization, | ||
make_alert_receive_channel, | ||
make_channel_filter, | ||
make_alert_group, | ||
make_alert, | ||
make_escalation_chain, | ||
make_escalation_policy, | ||
): | ||
""" | ||
Check escalate_alert_group is called for the first alert in the group and not called for the second alert in the group. | ||
""" | ||
organization = make_organization() | ||
escalation_chain = make_escalation_chain(organization) | ||
make_escalation_policy( | ||
escalation_chain=escalation_chain, | ||
escalation_policy_step=EscalationPolicy.STEP_NOTIFY_MULTIPLE_USERS, | ||
) | ||
alert_receive_channel = make_alert_receive_channel(organization) | ||
channel_filter = make_channel_filter(alert_receive_channel, escalation_chain=escalation_chain) | ||
alert_group = make_alert_group(alert_receive_channel, channel_filter=channel_filter) | ||
|
||
# Check escalate_alert_group is called for the first alert in the group | ||
alert_1 = make_alert( | ||
alert_group=alert_group, | ||
is_the_first_alert_in_group=True, | ||
raw_request_data=alert_receive_channel.config.example_payload, | ||
) | ||
with patch.object(escalate_alert_group, "apply_async") as mock_escalate_alert_group_1: | ||
distribute_alert(alert_1.pk) | ||
mock_escalate_alert_group_1.assert_called_once() | ||
|
||
# Check escalate_alert_group is not called for the second alert in the group | ||
alert_2 = make_alert( | ||
alert_group=alert_group, | ||
is_the_first_alert_in_group=False, | ||
raw_request_data=alert_receive_channel.config.example_payload, | ||
) | ||
with patch.object(escalate_alert_group, "apply_async") as mock_escalate_alert_group_2: | ||
distribute_alert(alert_2.pk) | ||
mock_escalate_alert_group_2.assert_not_called() | ||
|
||
|
||
@pytest.mark.django_db | ||
def test_distribute_alert_escalate_alert_group_when_escalation_paused( | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Adding a test to check that |
||
make_organization, | ||
make_alert_receive_channel, | ||
make_channel_filter, | ||
make_alert_group, | ||
make_alert, | ||
make_escalation_chain, | ||
make_escalation_policy, | ||
): | ||
""" | ||
Check escalate_alert_group is called for the first alert in the group and for the second alert in the group when | ||
escalation is paused. | ||
""" | ||
organization = make_organization() | ||
escalation_chain = make_escalation_chain(organization) | ||
make_escalation_policy( | ||
escalation_chain=escalation_chain, | ||
escalation_policy_step=EscalationPolicy.STEP_NOTIFY_MULTIPLE_USERS, | ||
) | ||
alert_receive_channel = make_alert_receive_channel(organization) | ||
channel_filter = make_channel_filter(alert_receive_channel, escalation_chain=escalation_chain) | ||
alert_group = make_alert_group(alert_receive_channel, channel_filter=channel_filter) | ||
|
||
# Check escalate_alert_group is called for the first alert in the group | ||
alert_1 = make_alert( | ||
alert_group=alert_group, | ||
is_the_first_alert_in_group=True, | ||
raw_request_data=alert_receive_channel.config.example_payload, | ||
) | ||
with patch.object(escalate_alert_group, "apply_async") as mock_escalate_alert_group_1: | ||
distribute_alert(alert_1.pk) | ||
mock_escalate_alert_group_1.assert_called_once() | ||
|
||
# Check escalate_alert_group is called for the second alert in the group when escalation is paused | ||
alert_2 = make_alert( | ||
alert_group=alert_group, | ||
is_the_first_alert_in_group=False, | ||
raw_request_data=alert_receive_channel.config.example_payload, | ||
) | ||
with patch( | ||
"apps.alerts.escalation_snapshot.escalation_snapshot_mixin.EscalationSnapshotMixin.pause_escalation", | ||
new_callable=PropertyMock(return_value=True), | ||
): | ||
with patch.object(escalate_alert_group, "apply_async") as mock_escalate_alert_group_2: | ||
distribute_alert(alert_2.pk) | ||
mock_escalate_alert_group_2.assert_called_once() |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -77,7 +77,8 @@ def test_build_raw_escalation_snapshot_escalation_chain_does_not_exist_escalatio | |
channel_filter = make_channel_filter(alert_receive_channel, escalation_chain=escalation_chain) | ||
alert_group = make_alert_group(alert_receive_channel, channel_filter=channel_filter) | ||
|
||
assert alert_group.build_raw_escalation_snapshot() == EMPTY_RAW_ESCALATION_SNAPSHOT | ||
# Check that setting pause_escalation to True doesn't make the snapshot empty | ||
assert alert_group.build_raw_escalation_snapshot() != EMPTY_RAW_ESCALATION_SNAPSHOT | ||
Comment on lines
+80
to
+81
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Updating the test to reflect this change. |
||
|
||
|
||
@pytest.mark.django_db | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
escalation_chain_exists
should not depend onpause_escalation
, these are two separate properties that are not dependent on each other. For example, an alert group that has a proper escalation chain could be paused, and it would be a totally valid state.