Skip to content

Commit

Permalink
Handle Slack ratelimit on alert group deletion
Browse files Browse the repository at this point in the history
  • Loading branch information
vstpme committed Sep 18, 2023
1 parent 69fcb58 commit ed38cd9
Show file tree
Hide file tree
Showing 4 changed files with 184 additions and 47 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
### Fixed

- Fix Slack access token length issue by @toolchainX ([#3016](https://github.com/grafana/oncall/pull/3016))
- Handle Slack ratelimit on alert group deletion by @vadimkerr ([#3038](https://github.com/grafana/oncall/pull/3038))

## v1.3.37 (2023-09-12)

Expand Down
7 changes: 6 additions & 1 deletion engine/apps/alerts/tasks/delete_alert_group.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from celery.utils.log import get_task_logger
from django.conf import settings

from apps.slack.errors import SlackAPIRatelimitError
from common.custom_celery_tasks import shared_dedicated_queue_retry_task

logger = get_task_logger(__name__)
Expand All @@ -23,4 +24,8 @@ def delete_alert_group(alert_group_pk, user_pk):
logger.debug("User not found, skipping delete_alert_group")
return

alert_group.delete_by_user(user)
try:
alert_group.delete_by_user(user)
except SlackAPIRatelimitError as e:
# Handle Slack API ratelimit raised in apps.slack.scenarios.distribute_alerts.DeleteGroupStep.process_signal
delete_alert_group.apply_async((alert_group_pk, user_pk), countdown=e.retry_after)
177 changes: 152 additions & 25 deletions engine/apps/alerts/tests/test_alert_group.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,14 @@
from unittest.mock import patch
from unittest.mock import call, patch

import pytest

from apps.alerts.incident_appearance.renderers.phone_call_renderer import AlertGroupPhoneCallRenderer
from apps.alerts.models import AlertGroup, AlertGroupLogRecord
from apps.alerts.tasks.delete_alert_group import delete_alert_group
from apps.slack.client import SlackClient
from apps.slack.errors import SlackAPIMessageNotFoundError, SlackAPIRatelimitError
from apps.slack.models import SlackMessage
from apps.slack.tests.conftest import build_slack_response


@pytest.mark.django_db
Expand Down Expand Up @@ -46,56 +49,180 @@ def test_render_for_phone_call(
assert expected_verbose_name in rendered_text


@patch.object(SlackClient, "reactions_remove")
@patch.object(SlackClient, "chat_delete")
@pytest.mark.django_db
def test_delete(
mock_chat_delete,
mock_reactions_remove,
make_organization_with_slack_team_identity,
make_user,
make_slack_channel,
make_alert_receive_channel,
make_alert_group,
make_alert,
make_slack_message,
make_resolution_note_slack_message,
):
"""test alert group deleting"""

organization, slack_team_identity = make_organization_with_slack_team_identity()
slack_channel = make_slack_channel(slack_team_identity, name="general", slack_id="CWER1ASD")
user = make_user(organization=organization)

alert_receive_channel = make_alert_receive_channel(organization)

alert_group = make_alert_group(alert_receive_channel)
SlackMessage.objects.create(channel_id="CWER1ASD", alert_group=alert_group)
make_alert(alert_group, raw_request_data={})

make_alert(
alert_group,
raw_request_data={
"evalMatches": [
{"value": 100, "metric": "High value", "tags": None},
{"value": 200, "metric": "Higher Value", "tags": None},
],
"message": "Someone is testing the alert notification within grafana.",
"ruleId": 0,
"ruleName": "Test notification",
"ruleUrl": "http://localhost:3000/",
"state": "alerting",
"title": f"Incident for channel <#{slack_channel.slack_id}> Where a > b & c < d",
},
# Create Slack messages
slack_message = make_slack_message(alert_group=alert_group, channel_id="test_channel_id", slack_id="test_slack_id")
resolution_note_1 = make_resolution_note_slack_message(
alert_group=alert_group,
user=user,
added_by_user=user,
posted_by_bot=True,
slack_channel_id="test1_channel_id",
ts="test1_ts",
)
resolution_note_2 = make_resolution_note_slack_message(
alert_group=alert_group,
user=user,
added_by_user=user,
added_to_resolution_note=True,
slack_channel_id="test2_channel_id",
ts="test2_ts",
)

alerts = alert_group.alerts
slack_messages = alert_group.slack_messages

assert alerts.count() > 0
assert slack_messages.count() > 0
assert alert_group.alerts.count() == 1
assert alert_group.slack_messages.count() == 1
assert alert_group.resolution_note_slack_messages.count() == 2

delete_alert_group(alert_group.pk, user.pk)

assert alerts.count() == 0
assert slack_messages.count() == 0
assert not alert_group.alerts.exists()
assert not alert_group.slack_messages.exists()
assert not alert_group.resolution_note_slack_messages.exists()

with pytest.raises(AlertGroup.DoesNotExist):
alert_group.refresh_from_db()

# Check that appropriate Slack API calls are made
assert mock_chat_delete.call_count == 2
assert mock_chat_delete.call_args_list[0] == call(
channel=resolution_note_1.slack_channel_id, ts=resolution_note_1.ts
)
assert mock_chat_delete.call_args_list[1] == call(channel=slack_message.channel_id, ts=slack_message.slack_id)
mock_reactions_remove.assert_called_once_with(
channel=resolution_note_2.slack_channel_id, name="memo", timestamp=resolution_note_2.ts
)


@pytest.mark.parametrize("api_method", ["reactions_remove", "chat_delete"])
@patch.object(delete_alert_group, "apply_async")
@pytest.mark.django_db
def test_delete_slack_ratelimit(
mock_delete_alert_group,
api_method,
make_organization_with_slack_team_identity,
make_user,
make_alert_receive_channel,
make_alert_group,
make_alert,
make_slack_message,
make_resolution_note_slack_message,
):
organization, slack_team_identity = make_organization_with_slack_team_identity()
user = make_user(organization=organization)

alert_receive_channel = make_alert_receive_channel(organization)

alert_group = make_alert_group(alert_receive_channel)
make_alert(alert_group, raw_request_data={})

# Create Slack messages
make_slack_message(alert_group=alert_group, channel_id="test_channel_id", slack_id="test_slack_id")
make_resolution_note_slack_message(
alert_group=alert_group,
user=user,
added_by_user=user,
posted_by_bot=True,
slack_channel_id="test1_channel_id",
ts="test1_ts",
)
make_resolution_note_slack_message(
alert_group=alert_group,
user=user,
added_by_user=user,
added_to_resolution_note=True,
slack_channel_id="test2_channel_id",
ts="test2_ts",
)

with patch.object(
SlackClient,
api_method,
side_effect=SlackAPIRatelimitError(
response=build_slack_response({"ok": False, "error": "ratelimited"}, headers={"Retry-After": 42})
),
):
delete_alert_group(alert_group.pk, user.pk)

# Check task is retried gracefully
mock_delete_alert_group.assert_called_once_with((alert_group.pk, user.pk), countdown=42)


@pytest.mark.parametrize("api_method", ["reactions_remove", "chat_delete"])
@patch.object(delete_alert_group, "apply_async")
@pytest.mark.django_db
def test_delete_slack_api_error_other_than_ratelimit(
mock_delete_alert_group,
api_method,
make_organization_with_slack_team_identity,
make_user,
make_alert_receive_channel,
make_alert_group,
make_alert,
make_slack_message,
make_resolution_note_slack_message,
):
organization, slack_team_identity = make_organization_with_slack_team_identity()
user = make_user(organization=organization)

alert_receive_channel = make_alert_receive_channel(organization)

alert_group = make_alert_group(alert_receive_channel)
make_alert(alert_group, raw_request_data={})

# Create Slack messages
make_slack_message(alert_group=alert_group, channel_id="test_channel_id", slack_id="test_slack_id")
make_resolution_note_slack_message(
alert_group=alert_group,
user=user,
added_by_user=user,
posted_by_bot=True,
slack_channel_id="test1_channel_id",
ts="test1_ts",
)
make_resolution_note_slack_message(
alert_group=alert_group,
user=user,
added_by_user=user,
added_to_resolution_note=True,
slack_channel_id="test2_channel_id",
ts="test2_ts",
)

with patch.object(
SlackClient,
api_method,
side_effect=SlackAPIMessageNotFoundError(
response=build_slack_response({"ok": False, "error": "message_not_found"})
),
):
delete_alert_group(alert_group.pk, user.pk) # check no exception is raised

# Check task is not retried
mock_delete_alert_group.assert_not_called()


@pytest.mark.django_db
def test_alerts_count_gt(
Expand Down
46 changes: 25 additions & 21 deletions engine/apps/slack/scenarios/distribute_alerts.py
Original file line number Diff line number Diff line change
Expand Up @@ -880,34 +880,38 @@ class DeleteGroupStep(scenario_step.ScenarioStep):
def process_signal(self, log_record: AlertGroupLogRecord) -> None:
alert_group = log_record.alert_group

self.remove_resolution_note_reaction(alert_group)

bot_messages_ts: typing.List[str] = []
bot_messages_ts.extend(alert_group.slack_messages.values_list("slack_id", flat=True))
bot_messages_ts.extend(
alert_group.resolution_note_slack_messages.filter(posted_by_bot=True).values_list("ts", flat=True)
)
channel_id = alert_group.slack_message.channel_id
# Remove "memo" emoji from resolution note messages
for message in alert_group.resolution_note_slack_messages.filter(added_to_resolution_note=True):
try:
self._slack_client.reactions_remove(channel=message.slack_channel_id, name="memo", timestamp=message.ts)
except SlackAPIRatelimitError:
# retries on ratelimit are handled in apps.alerts.tasks.delete_alert_group.delete_alert_group
raise
except SlackAPIError:
pass
message.delete()

for message_ts in bot_messages_ts:
# Remove resolution note messages posted by OnCall bot
for message in alert_group.resolution_note_slack_messages.filter(posted_by_bot=True):
try:
self._slack_client.chat_delete(channel=channel_id, ts=message_ts)
except (
SlackAPITokenError,
SlackAPIChannelNotFoundError,
SlackAPIMessageNotFoundError,
SlackAPIChannelArchivedError,
):
self._slack_client.chat_delete(channel=message.slack_channel_id, ts=message.ts)
except SlackAPIRatelimitError:
# retries on ratelimit are handled in apps.alerts.tasks.delete_alert_group.delete_alert_group
raise
except SlackAPIError:
pass
message.delete()

def remove_resolution_note_reaction(self, alert_group: AlertGroup) -> None:
for message in alert_group.resolution_note_slack_messages.filter(added_to_resolution_note=True):
message.added_to_resolution_note = False
message.save(update_fields=["added_to_resolution_note"])
# Remove alert group Slack messages
for message in alert_group.slack_messages.all():
try:
self._slack_client.reactions_remove(channel=message.slack_channel_id, name="memo", timestamp=message.ts)
self._slack_client.chat_delete(channel=message.channel_id, ts=message.slack_id)
except SlackAPIRatelimitError:
# retries on ratelimit are handled in apps.alerts.tasks.delete_alert_group.delete_alert_group
raise
except SlackAPIError:
pass
message.delete()


class UpdateLogReportMessageStep(scenario_step.ScenarioStep):
Expand Down

0 comments on commit ed38cd9

Please sign in to comment.