Skip to content

Commit

Permalink
address more PR comments
Browse files Browse the repository at this point in the history
  • Loading branch information
joeyorlando committed Mar 17, 2023
1 parent bc87356 commit 5e96d7d
Show file tree
Hide file tree
Showing 4 changed files with 33 additions and 47 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -105,13 +105,6 @@ def next_user_in_sorted_queue(self) -> User:
next_user = users_queue[(last_user_index + 1) % len(users_queue)]
return next_user

def has_triggered_log_record(self, alert_group_id: int) -> bool:
return AlertGroupLogRecord.objects.filter(
escalation_policy_id=self.id,
alert_group_id=alert_group_id,
type=AlertGroupLogRecord.TYPE_ESCALATION_TRIGGERED,
).exists()

def execute(self, alert_group, reason) -> StepExecutionResultData:
action_map = {
EscalationPolicy.STEP_WAIT: self._escalation_step_wait,
Expand Down
33 changes: 23 additions & 10 deletions engine/apps/alerts/tasks/check_escalation_finished.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,8 @@ def send_alert_group_escalation_auditor_task_heartbeat() -> None:


def audit_alert_group_escalation(alert_group: "AlertGroup") -> None:
AlertGroupLogRecord = apps.get_model("alerts", "AlertGroupLogRecord")

escalation_snapshot = alert_group.escalation_snapshot
alert_group_id = alert_group.id
base_msg = f"Alert group {alert_group_id}"
Expand Down Expand Up @@ -69,18 +71,29 @@ def audit_alert_group_escalation(alert_group: "AlertGroup") -> None:
f"{base_msg}'s escalation snapshot has {num_of_executed_escalation_policy_snapshots} executed escalation policies"
)

for executed_escalation_policy_snapshot in executed_escalation_policy_snapshots:
escalation_policy_id = executed_escalation_policy_snapshot.id

if not executed_escalation_policy_snapshot.has_triggered_log_record(alert_group_id):
raise AlertGroupEscalationPolicyExecutionAuditException(
f"{base_msg}'s escalation policy snapshot {escalation_policy_id} does not have a triggered alert group log record associated with it"
)
# compare number of triggered/failed alert group log records to the number of executed
# escalation policy snapshot steps
num_of_relevant_log_records = AlertGroupLogRecord.objects.filter(
alert_group_id=alert_group_id,
type__in=[AlertGroupLogRecord.TYPE_ESCALATION_TRIGGERED, AlertGroupLogRecord.TYPE_ESCALATION_FAILED],
).count()

task_logger.info(
f"{base_msg}'s escalation policy snapshot {escalation_policy_id} has a triggered alert group log record associated with it"
if num_of_relevant_log_records < num_of_executed_escalation_policy_snapshots:
raise AlertGroupEscalationPolicyExecutionAuditException(
f"{base_msg}'s number of triggered/failed alert group log records ({num_of_relevant_log_records}) is less "
f"than the number of executed escalation policy snapshot steps ({num_of_executed_escalation_policy_snapshots})"
)

task_logger.info(
f"{base_msg}'s number of triggered/failed alert group log records ({num_of_relevant_log_records}) is greater "
f"than or equal to the number of executed escalation policy snapshot steps ({num_of_executed_escalation_policy_snapshots})"
)

# TODO: check following steps:
# STEP_NOTIFY_IF_NUM_ALERTS_IN_TIME_WINDOW
# STEP_REPEAT_ESCALATION_N_TIMES
# not sure what to do here?

task_logger.info(f"{base_msg} passed the audit checks")


Expand All @@ -99,7 +112,7 @@ def get_auditable_alert_groups_started_at_range() -> typing.Tuple[datetime.datet
alert groups, whose integration did not have an escalation chain at the time the alert group was created
we would raise errors
"""
return (datetime.datetime(2023, 3, 5), timezone.now() - timezone.timedelta(days=2))
return (datetime.datetime(2023, 3, 25), timezone.now() - timezone.timedelta(days=2))


# don't retry this task as the AlertGroup DB query is rather expensive
Expand Down
15 changes: 10 additions & 5 deletions engine/apps/alerts/tests/test_check_escalation_finished_task.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import random
from unittest.mock import Mock, PropertyMock, patch

import pytest
Expand All @@ -16,6 +17,10 @@
MOCKED_HEARTBEAT_URL = "https://hello.com/lsdjjkf"


def _get_relevant_log_record_type() -> int:
return random.choice([AlertGroupLogRecord.TYPE_ESCALATION_TRIGGERED, AlertGroupLogRecord.TYPE_ESCALATION_FAILED])


def test_send_alert_group_escalation_auditor_task_heartbeat_does_not_call_the_heartbeat_url_if_one_is_not_configured():
with patch("apps.alerts.tasks.check_escalation_finished.requests") as mock_requests:
send_alert_group_escalation_auditor_task_heartbeat()
Expand Down Expand Up @@ -127,9 +132,9 @@ def test_audit_alert_group_escalation_all_executed_escalation_policy_snapshots_h

for escalation_policy_snapshot in escalation_policies_snapshots:
escalation_policy = EscalationPolicy.objects.get(id=escalation_policy_snapshot.id)
make_alert_group_log_record(
alert_group, AlertGroupLogRecord.TYPE_ESCALATION_TRIGGERED, user, escalation_policy=escalation_policy
)
log_record_type = _get_relevant_log_record_type()

make_alert_group_log_record(alert_group, log_record_type, user, escalation_policy=escalation_policy)

with patch(
"apps.alerts.escalation_snapshot.snapshot_classes.escalation_snapshot.EscalationSnapshot.executed_escalation_policy_snapshots",
Expand All @@ -150,12 +155,12 @@ def test_audit_alert_group_escalation_one_executed_escalation_policy_snapshot_do
alert_group, _, _, _ = escalation_snapshot_test_setup
escalation_policies_snapshots = alert_group.escalation_snapshot.escalation_policies_snapshots

# let's skip creating a TRIGGERED alert group log record for the first executed escalation policy
# let's skip creating a relevant alert group log record for the first executed escalation policy
for idx, escalation_policy_snapshot in enumerate(escalation_policies_snapshots):
if idx != 0:
escalation_policy = EscalationPolicy.objects.get(id=escalation_policy_snapshot.id)
make_alert_group_log_record(
alert_group, AlertGroupLogRecord.TYPE_ESCALATION_TRIGGERED, user, escalation_policy=escalation_policy
alert_group, _get_relevant_log_record_type(), user, escalation_policy=escalation_policy
)

with patch(
Expand Down
25 changes: 0 additions & 25 deletions engine/apps/alerts/tests/test_escalation_policy_snapshot.py
Original file line number Diff line number Diff line change
Expand Up @@ -568,28 +568,3 @@ def test_escalation_step_with_deleted_user(

deserialized_escalation_snapshot = EscalationPolicySnapshotSerializer().to_internal_value(raw_snapshot)
assert deserialized_escalation_snapshot["notify_to_users_queue"] == [user]


@pytest.mark.django_db
@pytest.mark.parametrize(
"log_record_type,expected",
[
(AlertGroupLogRecord.TYPE_ESCALATION_TRIGGERED, True),
(AlertGroupLogRecord.TYPE_ESCALATION_FAILED, False),
(AlertGroupLogRecord.TYPE_ESCALATION_FINISHED, False),
],
)
def has_triggered_log_record(
escalation_step_test_setup, make_escalation_policy, make_alert_group_log_record, log_record_type, expected
):
_, user, _, channel_filter, alert_group, _ = escalation_step_test_setup
wait_step = make_escalation_policy(
escalation_chain=channel_filter.escalation_chain,
escalation_policy_step=EscalationPolicy.STEP_WAIT,
wait_delay=EscalationPolicy.FIFTEEN_MINUTES,
)

make_alert_group_log_record(alert_group, log_record_type, user, escalation_policy=wait_step)

escalation_policy_snapshot = get_escalation_policy_snapshot_from_model(wait_step)
assert escalation_policy_snapshot.has_triggered_log_record() == expected

0 comments on commit 5e96d7d

Please sign in to comment.