diff --git a/CHANGELOG.md b/CHANGELOG.md index 6b67f826dd..ad0128255e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Fixed - Fix Slack access token length issue by @toolchainX ([#3016](https://github.com/grafana/oncall/pull/3016)) +- Handle Slack ratelimit on alert group deletion by @vadimkerr ([#3038](https://github.com/grafana/oncall/pull/3038)) ## v1.3.37 (2023-09-12) diff --git a/engine/apps/alerts/tasks/delete_alert_group.py b/engine/apps/alerts/tasks/delete_alert_group.py index 452fe3fc86..f8a1d2ae6a 100644 --- a/engine/apps/alerts/tasks/delete_alert_group.py +++ b/engine/apps/alerts/tasks/delete_alert_group.py @@ -1,6 +1,7 @@ from celery.utils.log import get_task_logger from django.conf import settings +from apps.slack.errors import SlackAPIRatelimitError from common.custom_celery_tasks import shared_dedicated_queue_retry_task logger = get_task_logger(__name__) @@ -23,4 +24,8 @@ def delete_alert_group(alert_group_pk, user_pk): logger.debug("User not found, skipping delete_alert_group") return - alert_group.delete_by_user(user) + try: + alert_group.delete_by_user(user) + except SlackAPIRatelimitError as e: + # Handle Slack API ratelimit raised in apps.slack.scenarios.distribute_alerts.DeleteGroupStep.process_signal + delete_alert_group.apply_async((alert_group_pk, user_pk), countdown=e.retry_after) diff --git a/engine/apps/alerts/tests/test_alert_group.py b/engine/apps/alerts/tests/test_alert_group.py index 450665e75b..5748c44f72 100644 --- a/engine/apps/alerts/tests/test_alert_group.py +++ b/engine/apps/alerts/tests/test_alert_group.py @@ -1,11 +1,14 @@ -from unittest.mock import patch +from unittest.mock import call, patch import pytest from apps.alerts.incident_appearance.renderers.phone_call_renderer import AlertGroupPhoneCallRenderer from apps.alerts.models import AlertGroup, AlertGroupLogRecord from apps.alerts.tasks.delete_alert_group import delete_alert_group +from apps.slack.client import SlackClient +from apps.slack.errors import SlackAPIMessageNotFoundError, SlackAPIRatelimitError from apps.slack.models import SlackMessage +from apps.slack.tests.conftest import build_slack_response @pytest.mark.django_db @@ -46,56 +49,180 @@ def test_render_for_phone_call( assert expected_verbose_name in rendered_text +@patch.object(SlackClient, "reactions_remove") +@patch.object(SlackClient, "chat_delete") @pytest.mark.django_db def test_delete( + mock_chat_delete, + mock_reactions_remove, make_organization_with_slack_team_identity, make_user, - make_slack_channel, make_alert_receive_channel, make_alert_group, make_alert, + make_slack_message, + make_resolution_note_slack_message, ): """test alert group deleting""" organization, slack_team_identity = make_organization_with_slack_team_identity() - slack_channel = make_slack_channel(slack_team_identity, name="general", slack_id="CWER1ASD") user = make_user(organization=organization) alert_receive_channel = make_alert_receive_channel(organization) alert_group = make_alert_group(alert_receive_channel) - SlackMessage.objects.create(channel_id="CWER1ASD", alert_group=alert_group) + make_alert(alert_group, raw_request_data={}) - make_alert( - alert_group, - raw_request_data={ - "evalMatches": [ - {"value": 100, "metric": "High value", "tags": None}, - {"value": 200, "metric": "Higher Value", "tags": None}, - ], - "message": "Someone is testing the alert notification within grafana.", - "ruleId": 0, - "ruleName": "Test notification", - "ruleUrl": "http://localhost:3000/", - "state": "alerting", - "title": f"Incident for channel <#{slack_channel.slack_id}> Where a > b & c < d", - }, + # Create Slack messages + slack_message = make_slack_message(alert_group=alert_group, channel_id="test_channel_id", slack_id="test_slack_id") + resolution_note_1 = make_resolution_note_slack_message( + alert_group=alert_group, + user=user, + added_by_user=user, + posted_by_bot=True, + slack_channel_id="test1_channel_id", + ts="test1_ts", + ) + resolution_note_2 = make_resolution_note_slack_message( + alert_group=alert_group, + user=user, + added_by_user=user, + added_to_resolution_note=True, + slack_channel_id="test2_channel_id", + ts="test2_ts", ) - alerts = alert_group.alerts - slack_messages = alert_group.slack_messages - - assert alerts.count() > 0 - assert slack_messages.count() > 0 + assert alert_group.alerts.count() == 1 + assert alert_group.slack_messages.count() == 1 + assert alert_group.resolution_note_slack_messages.count() == 2 delete_alert_group(alert_group.pk, user.pk) - assert alerts.count() == 0 - assert slack_messages.count() == 0 + assert not alert_group.alerts.exists() + assert not alert_group.slack_messages.exists() + assert not alert_group.resolution_note_slack_messages.exists() with pytest.raises(AlertGroup.DoesNotExist): alert_group.refresh_from_db() + # Check that appropriate Slack API calls are made + assert mock_chat_delete.call_count == 2 + assert mock_chat_delete.call_args_list[0] == call( + channel=resolution_note_1.slack_channel_id, ts=resolution_note_1.ts + ) + assert mock_chat_delete.call_args_list[1] == call(channel=slack_message.channel_id, ts=slack_message.slack_id) + mock_reactions_remove.assert_called_once_with( + channel=resolution_note_2.slack_channel_id, name="memo", timestamp=resolution_note_2.ts + ) + + +@pytest.mark.parametrize("api_method", ["reactions_remove", "chat_delete"]) +@patch.object(delete_alert_group, "apply_async") +@pytest.mark.django_db +def test_delete_slack_ratelimit( + mock_delete_alert_group, + api_method, + make_organization_with_slack_team_identity, + make_user, + make_alert_receive_channel, + make_alert_group, + make_alert, + make_slack_message, + make_resolution_note_slack_message, +): + organization, slack_team_identity = make_organization_with_slack_team_identity() + user = make_user(organization=organization) + + alert_receive_channel = make_alert_receive_channel(organization) + + alert_group = make_alert_group(alert_receive_channel) + make_alert(alert_group, raw_request_data={}) + + # Create Slack messages + make_slack_message(alert_group=alert_group, channel_id="test_channel_id", slack_id="test_slack_id") + make_resolution_note_slack_message( + alert_group=alert_group, + user=user, + added_by_user=user, + posted_by_bot=True, + slack_channel_id="test1_channel_id", + ts="test1_ts", + ) + make_resolution_note_slack_message( + alert_group=alert_group, + user=user, + added_by_user=user, + added_to_resolution_note=True, + slack_channel_id="test2_channel_id", + ts="test2_ts", + ) + + with patch.object( + SlackClient, + api_method, + side_effect=SlackAPIRatelimitError( + response=build_slack_response({"ok": False, "error": "ratelimited"}, headers={"Retry-After": 42}) + ), + ): + delete_alert_group(alert_group.pk, user.pk) + + # Check task is retried gracefully + mock_delete_alert_group.assert_called_once_with((alert_group.pk, user.pk), countdown=42) + + +@pytest.mark.parametrize("api_method", ["reactions_remove", "chat_delete"]) +@patch.object(delete_alert_group, "apply_async") +@pytest.mark.django_db +def test_delete_slack_api_error_other_than_ratelimit( + mock_delete_alert_group, + api_method, + make_organization_with_slack_team_identity, + make_user, + make_alert_receive_channel, + make_alert_group, + make_alert, + make_slack_message, + make_resolution_note_slack_message, +): + organization, slack_team_identity = make_organization_with_slack_team_identity() + user = make_user(organization=organization) + + alert_receive_channel = make_alert_receive_channel(organization) + + alert_group = make_alert_group(alert_receive_channel) + make_alert(alert_group, raw_request_data={}) + + # Create Slack messages + make_slack_message(alert_group=alert_group, channel_id="test_channel_id", slack_id="test_slack_id") + make_resolution_note_slack_message( + alert_group=alert_group, + user=user, + added_by_user=user, + posted_by_bot=True, + slack_channel_id="test1_channel_id", + ts="test1_ts", + ) + make_resolution_note_slack_message( + alert_group=alert_group, + user=user, + added_by_user=user, + added_to_resolution_note=True, + slack_channel_id="test2_channel_id", + ts="test2_ts", + ) + + with patch.object( + SlackClient, + api_method, + side_effect=SlackAPIMessageNotFoundError( + response=build_slack_response({"ok": False, "error": "message_not_found"}) + ), + ): + delete_alert_group(alert_group.pk, user.pk) # check no exception is raised + + # Check task is not retried + mock_delete_alert_group.assert_not_called() + @pytest.mark.django_db def test_alerts_count_gt( diff --git a/engine/apps/slack/scenarios/distribute_alerts.py b/engine/apps/slack/scenarios/distribute_alerts.py index 1718ce43d6..3f551025cc 100644 --- a/engine/apps/slack/scenarios/distribute_alerts.py +++ b/engine/apps/slack/scenarios/distribute_alerts.py @@ -880,34 +880,38 @@ class DeleteGroupStep(scenario_step.ScenarioStep): def process_signal(self, log_record: AlertGroupLogRecord) -> None: alert_group = log_record.alert_group - self.remove_resolution_note_reaction(alert_group) - - bot_messages_ts: typing.List[str] = [] - bot_messages_ts.extend(alert_group.slack_messages.values_list("slack_id", flat=True)) - bot_messages_ts.extend( - alert_group.resolution_note_slack_messages.filter(posted_by_bot=True).values_list("ts", flat=True) - ) - channel_id = alert_group.slack_message.channel_id + # Remove "memo" emoji from resolution note messages + for message in alert_group.resolution_note_slack_messages.filter(added_to_resolution_note=True): + try: + self._slack_client.reactions_remove(channel=message.slack_channel_id, name="memo", timestamp=message.ts) + except SlackAPIRatelimitError: + # retries on ratelimit are handled in apps.alerts.tasks.delete_alert_group.delete_alert_group + raise + except SlackAPIError: + pass + message.delete() - for message_ts in bot_messages_ts: + # Remove resolution note messages posted by OnCall bot + for message in alert_group.resolution_note_slack_messages.filter(posted_by_bot=True): try: - self._slack_client.chat_delete(channel=channel_id, ts=message_ts) - except ( - SlackAPITokenError, - SlackAPIChannelNotFoundError, - SlackAPIMessageNotFoundError, - SlackAPIChannelArchivedError, - ): + self._slack_client.chat_delete(channel=message.slack_channel_id, ts=message.ts) + except SlackAPIRatelimitError: + # retries on ratelimit are handled in apps.alerts.tasks.delete_alert_group.delete_alert_group + raise + except SlackAPIError: pass + message.delete() - def remove_resolution_note_reaction(self, alert_group: AlertGroup) -> None: - for message in alert_group.resolution_note_slack_messages.filter(added_to_resolution_note=True): - message.added_to_resolution_note = False - message.save(update_fields=["added_to_resolution_note"]) + # Remove alert group Slack messages + for message in alert_group.slack_messages.all(): try: - self._slack_client.reactions_remove(channel=message.slack_channel_id, name="memo", timestamp=message.ts) + self._slack_client.chat_delete(channel=message.channel_id, ts=message.slack_id) + except SlackAPIRatelimitError: + # retries on ratelimit are handled in apps.alerts.tasks.delete_alert_group.delete_alert_group + raise except SlackAPIError: pass + message.delete() class UpdateLogReportMessageStep(scenario_step.ScenarioStep):