From 0aa0c05af3e9be3e4c35a6c9baeee939d79cb313 Mon Sep 17 00:00:00 2001 From: Matias Bordese Date: Fri, 8 Sep 2023 17:31:08 -0300 Subject: [PATCH 1/2] Trigger alert group signal after transaction commit --- engine/apps/alerts/models/alert.py | 5 +- engine/apps/alerts/models/alert_group.py | 13 +-- engine/apps/alerts/models/invitation.py | 7 +- engine/apps/api/tests/test_alert_group.py | 102 ++++++++++++---------- 4 files changed, 71 insertions(+), 56 deletions(-) diff --git a/engine/apps/alerts/models/alert.py b/engine/apps/alerts/models/alert.py index 50efdd84a3..060c961b39 100644 --- a/engine/apps/alerts/models/alert.py +++ b/engine/apps/alerts/models/alert.py @@ -1,11 +1,12 @@ import hashlib import logging import typing +from functools import partial from uuid import uuid4 from django.conf import settings from django.core.validators import MinLengthValidator -from django.db import models +from django.db import models, transaction from django.db.models import JSONField from apps.alerts import tasks @@ -163,7 +164,7 @@ def create( f"log record {log_record_for_root_incident.pk} with type " f"'{log_record_for_root_incident.get_type_display()}'" ) - tasks.send_alert_group_signal.apply_async((log_record_for_root_incident.pk,)) + transaction.on_commit(partial(tasks.send_alert_group_signal.delay, log_record_for_root_incident.pk)) except AlertGroup.DoesNotExist: pass diff --git a/engine/apps/alerts/models/alert_group.py b/engine/apps/alerts/models/alert_group.py index fb64d83c70..9888e8be4c 100644 --- a/engine/apps/alerts/models/alert_group.py +++ b/engine/apps/alerts/models/alert_group.py @@ -3,6 +3,7 @@ import typing import urllib from collections import namedtuple +from functools import partial from urllib.parse import urljoin from celery import uuid as celery_uuid @@ -1215,7 +1216,7 @@ def _bulk_acknowledge(user: User, alert_groups_to_acknowledge: "QuerySet[AlertGr alert_group.start_ack_reminder_if_needed() log_record = alert_group.log_records.create(type=AlertGroupLogRecord.TYPE_ACK, author=user) - send_alert_group_signal.apply_async((log_record.pk,)) + transaction.on_commit(partial(send_alert_group_signal.delay, log_record.pk)) @staticmethod def bulk_acknowledge(user: User, alert_groups: "QuerySet[AlertGroup]") -> None: @@ -1287,7 +1288,7 @@ def _bulk_resolve(user: User, alert_groups_to_resolve: "QuerySet[AlertGroup]") - state=AlertGroupState.RESOLVED, ) log_record = alert_group.log_records.create(type=AlertGroupLogRecord.TYPE_RESOLVED, author=user) - send_alert_group_signal.apply_async((log_record.pk,)) + transaction.on_commit(partial(send_alert_group_signal.delay, log_record.pk)) @staticmethod def bulk_resolve(user: User, alert_groups: "QuerySet[AlertGroup]") -> None: @@ -1360,7 +1361,7 @@ def _bulk_restart_unack(user: User, alert_groups_to_restart_unack: "QuerySet[Ale if alert_group.is_root_alert_group: alert_group.start_escalation_if_needed() - send_alert_group_signal.apply_async((log_record.pk,)) + transaction.on_commit(partial(send_alert_group_signal.delay, log_record.pk)) @staticmethod def _bulk_restart_unresolve(user: User, alert_groups_to_restart_unresolve: "QuerySet[AlertGroup]") -> None: @@ -1403,7 +1404,7 @@ def _bulk_restart_unresolve(user: User, alert_groups_to_restart_unresolve: "Quer if alert_group.is_root_alert_group: alert_group.start_escalation_if_needed() - send_alert_group_signal.apply_async((log_record.pk,)) + transaction.on_commit(partial(send_alert_group_signal.delay, log_record.pk)) @staticmethod def _bulk_restart_unsilence(user: User, alert_groups_to_restart_unsilence: "QuerySet[AlertGroup]") -> None: @@ -1442,7 +1443,7 @@ def _bulk_restart_unsilence(user: User, alert_groups_to_restart_unsilence: "Quer ) alert_group.start_escalation_if_needed() - send_alert_group_signal.apply_async((log_record.pk,)) + transaction.on_commit(partial(send_alert_group_signal.delay, log_record.pk)) @staticmethod def bulk_restart(user: User, alert_groups: "QuerySet[AlertGroup]") -> None: @@ -1578,7 +1579,7 @@ def _bulk_silence(user: User, alert_groups_to_silence: "QuerySet[AlertGroup]", s reason="Bulk action silence", ) - send_alert_group_signal.apply_async((log_record.pk,)) + transaction.on_commit(partial(send_alert_group_signal.delay, log_record.pk)) if silence_for_period and alert_group.is_root_alert_group: alert_group.start_unsilence_task(countdown=silence_delay) diff --git a/engine/apps/alerts/models/invitation.py b/engine/apps/alerts/models/invitation.py index afc26ccecf..80f8525f1e 100644 --- a/engine/apps/alerts/models/invitation.py +++ b/engine/apps/alerts/models/invitation.py @@ -1,5 +1,6 @@ import datetime import logging +from functools import partial from django.db import models, transaction @@ -92,8 +93,8 @@ def invite_user(invitee_user, alert_group, user): f"log record {log_record.pk} with type '{log_record.get_type_display()}'" ) - tasks.send_alert_group_signal.apply_async((log_record.pk,)) - tasks.invite_user_to_join_incident.apply_async((invitation.pk,)) + transaction.on_commit(partial(tasks.send_alert_group_signal.delay, log_record.pk)) + transaction.on_commit(partial(tasks.invite_user_to_join_incident.delay, invitation.pk)) @staticmethod def stop_invitation(invitation_pk, user): @@ -119,4 +120,4 @@ def stop_invitation(invitation_pk, user): f"call send_alert_group_signal for alert_group {invitation.alert_group.pk}, " f"log record {log_record.pk} with type '{log_record.get_type_display()}'" ) - tasks.send_alert_group_signal.apply_async((log_record.pk,)) + transaction.on_commit(partial(tasks.send_alert_group_signal.delay, log_record.pk)) diff --git a/engine/apps/api/tests/test_alert_group.py b/engine/apps/api/tests/test_alert_group.py index f83c782209..680209514d 100644 --- a/engine/apps/api/tests/test_alert_group.py +++ b/engine/apps/api/tests/test_alert_group.py @@ -1374,7 +1374,7 @@ def test_invalid_bulk_action( assert response.status_code == status.HTTP_400_BAD_REQUEST -@patch("apps.alerts.tasks.send_alert_group_signal.send_alert_group_signal.apply_async", return_value=None) +@patch("apps.alerts.tasks.send_alert_group_signal.send_alert_group_signal.delay", return_value=None) @patch("apps.alerts.tasks.send_update_log_report_signal.send_update_log_report_signal.apply_async", return_value=None) @patch("apps.alerts.models.AlertGroup.start_escalation_if_needed", return_value=None) @pytest.mark.django_db @@ -1384,6 +1384,7 @@ def test_bulk_action_restart( mocked_start_escalate_alert, make_user_auth_headers, alert_group_internal_api_setup, + django_capture_on_commit_callbacks, ): client = APIClient() user, token, alert_groups = alert_group_internal_api_setup @@ -1406,18 +1407,20 @@ def test_bulk_action_restart( author=user, ).exists() - # restart alert groups - response = client.post( - url, - data={ - "alert_group_pks": [alert_group.public_primary_key for alert_group in alert_groups], - "action": AlertGroup.RESTART, - }, - format="json", - **make_user_auth_headers(user, token), - ) + with django_capture_on_commit_callbacks(execute=True) as callbacks: + # restart alert groups + response = client.post( + url, + data={ + "alert_group_pks": [alert_group.public_primary_key for alert_group in alert_groups], + "action": AlertGroup.RESTART, + }, + format="json", + **make_user_auth_headers(user, token), + ) assert response.status_code == status.HTTP_200_OK + assert len(callbacks) == 3 assert resolved_alert_group.log_records.filter( type=AlertGroupLogRecord.TYPE_UN_RESOLVED, @@ -1439,7 +1442,7 @@ def test_bulk_action_restart( assert mocked_start_escalate_alert.called -@patch("apps.alerts.tasks.send_alert_group_signal.send_alert_group_signal.apply_async", return_value=None) +@patch("apps.alerts.tasks.send_alert_group_signal.send_alert_group_signal.delay", return_value=None) @patch("apps.alerts.tasks.send_update_log_report_signal.send_update_log_report_signal.apply_async", return_value=None) @pytest.mark.django_db def test_bulk_action_acknowledge( @@ -1447,6 +1450,7 @@ def test_bulk_action_acknowledge( mocked_log_report_signal_task, make_user_auth_headers, alert_group_internal_api_setup, + django_capture_on_commit_callbacks, ): client = APIClient() user, token, alert_groups = alert_group_internal_api_setup @@ -1459,18 +1463,20 @@ def test_bulk_action_acknowledge( author=user, ).exists() - # acknowledge alert groups - response = client.post( - url, - data={ - "alert_group_pks": [alert_group.public_primary_key for alert_group in alert_groups], - "action": AlertGroup.ACKNOWLEDGE, - }, - format="json", - **make_user_auth_headers(user, token), - ) + with django_capture_on_commit_callbacks(execute=True) as callbacks: + # acknowledge alert groups + response = client.post( + url, + data={ + "alert_group_pks": [alert_group.public_primary_key for alert_group in alert_groups], + "action": AlertGroup.ACKNOWLEDGE, + }, + format="json", + **make_user_auth_headers(user, token), + ) assert response.status_code == status.HTTP_200_OK + assert len(callbacks) == 3 assert new_alert_group.log_records.filter( type=AlertGroupLogRecord.TYPE_ACK, @@ -1496,7 +1502,7 @@ def test_bulk_action_acknowledge( assert mocked_log_report_signal_task.called -@patch("apps.alerts.tasks.send_alert_group_signal.send_alert_group_signal.apply_async", return_value=None) +@patch("apps.alerts.tasks.send_alert_group_signal.send_alert_group_signal.delay", return_value=None) @patch("apps.alerts.tasks.send_update_log_report_signal.send_update_log_report_signal.apply_async", return_value=None) @pytest.mark.django_db def test_bulk_action_resolve( @@ -1504,6 +1510,7 @@ def test_bulk_action_resolve( mocked_log_report_signal_task, make_user_auth_headers, alert_group_internal_api_setup, + django_capture_on_commit_callbacks, ): client = APIClient() user, token, alert_groups = alert_group_internal_api_setup @@ -1516,18 +1523,20 @@ def test_bulk_action_resolve( author=user, ).exists() - # resolve alert groups - response = client.post( - url, - data={ - "alert_group_pks": [alert_group.public_primary_key for alert_group in alert_groups], - "action": AlertGroup.RESOLVE, - }, - format="json", - **make_user_auth_headers(user, token), - ) + with django_capture_on_commit_callbacks(execute=True) as callbacks: + # resolve alert groups + response = client.post( + url, + data={ + "alert_group_pks": [alert_group.public_primary_key for alert_group in alert_groups], + "action": AlertGroup.RESOLVE, + }, + format="json", + **make_user_auth_headers(user, token), + ) assert response.status_code == status.HTTP_200_OK + assert len(callbacks) == 3 assert new_alert_group.log_records.filter( type=AlertGroupLogRecord.TYPE_RESOLVED, @@ -1548,7 +1557,7 @@ def test_bulk_action_resolve( assert mocked_log_report_signal_task.called -@patch("apps.alerts.tasks.send_alert_group_signal.send_alert_group_signal.apply_async", return_value=None) +@patch("apps.alerts.tasks.send_alert_group_signal.send_alert_group_signal.delay", return_value=None) @patch("apps.alerts.tasks.send_update_log_report_signal.send_update_log_report_signal.apply_async", return_value=None) @patch("apps.alerts.models.AlertGroup.start_unsilence_task", return_value=None) @pytest.mark.django_db @@ -1558,6 +1567,7 @@ def test_bulk_action_silence( mocked_start_unsilence_task, make_user_auth_headers, alert_group_internal_api_setup, + django_capture_on_commit_callbacks, ): client = APIClient() user, token, alert_groups = alert_group_internal_api_setup @@ -1570,19 +1580,21 @@ def test_bulk_action_silence( author=user, ).exists() - # silence alert groups - response = client.post( - url, - data={ - "alert_group_pks": [alert_group.public_primary_key for alert_group in alert_groups], - "action": AlertGroup.SILENCE, - "delay": 180, - }, - format="json", - **make_user_auth_headers(user, token), - ) + with django_capture_on_commit_callbacks(execute=True) as callbacks: + # silence alert groups + response = client.post( + url, + data={ + "alert_group_pks": [alert_group.public_primary_key for alert_group in alert_groups], + "action": AlertGroup.SILENCE, + "delay": 180, + }, + format="json", + **make_user_auth_headers(user, token), + ) assert response.status_code == status.HTTP_200_OK + assert len(callbacks) == 4 assert new_alert_group.log_records.filter( type=AlertGroupLogRecord.TYPE_SILENCE, From 36894aecf11fa6d942f8816d69af44fc1248869b Mon Sep 17 00:00:00 2001 From: Matias Bordese Date: Fri, 8 Sep 2023 17:35:16 -0300 Subject: [PATCH 2/2] Update changelog --- CHANGELOG.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 0f57225ab0..3d90a9524d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Notify user via Slack/mobile push-notification when their shift swap request is taken by @joeyorlando ([#2992](https://github.com/grafana/oncall/pull/2992)) +### Fixed + +- Avoid task retries because of missing AlertGroupLogRecord on send_alert_group_signal ([#3001](https://github.com/grafana/oncall/pull/3001)) + ## v1.3.36 (2023-09-07) ### Added