Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Handle Slack ratelimit on alert group deletion #3038

Merged
merged 2 commits into from
Sep 19, 2023
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
### Fixed

- Fix Slack access token length issue by @toolchainX ([#3016](https://github.com/grafana/oncall/pull/3016))
- Handle Slack ratelimit on alert group deletion by @vadimkerr ([#3038](https://github.com/grafana/oncall/pull/3038))

## v1.3.37 (2023-09-12)

Expand Down
7 changes: 6 additions & 1 deletion engine/apps/alerts/tasks/delete_alert_group.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from celery.utils.log import get_task_logger
from django.conf import settings

from apps.slack.errors import SlackAPIRatelimitError
from common.custom_celery_tasks import shared_dedicated_queue_retry_task

logger = get_task_logger(__name__)
Expand All @@ -23,4 +24,8 @@ def delete_alert_group(alert_group_pk, user_pk):
logger.debug("User not found, skipping delete_alert_group")
return

alert_group.delete_by_user(user)
try:
alert_group.delete_by_user(user)
except SlackAPIRatelimitError as e:
# Handle Slack API ratelimit raised in apps.slack.scenarios.distribute_alerts.DeleteGroupStep.process_signal
delete_alert_group.apply_async((alert_group_pk, user_pk), countdown=e.retry_after)
177 changes: 152 additions & 25 deletions engine/apps/alerts/tests/test_alert_group.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,14 @@
from unittest.mock import patch
from unittest.mock import call, patch

import pytest

from apps.alerts.incident_appearance.renderers.phone_call_renderer import AlertGroupPhoneCallRenderer
from apps.alerts.models import AlertGroup, AlertGroupLogRecord
from apps.alerts.tasks.delete_alert_group import delete_alert_group
from apps.slack.client import SlackClient
from apps.slack.errors import SlackAPIMessageNotFoundError, SlackAPIRatelimitError
from apps.slack.models import SlackMessage
from apps.slack.tests.conftest import build_slack_response


@pytest.mark.django_db
Expand Down Expand Up @@ -46,56 +49,180 @@ def test_render_for_phone_call(
assert expected_verbose_name in rendered_text


@patch.object(SlackClient, "reactions_remove")
@patch.object(SlackClient, "chat_delete")
@pytest.mark.django_db
def test_delete(
mock_chat_delete,
mock_reactions_remove,
make_organization_with_slack_team_identity,
make_user,
make_slack_channel,
make_alert_receive_channel,
make_alert_group,
make_alert,
make_slack_message,
make_resolution_note_slack_message,
):
"""test alert group deleting"""

organization, slack_team_identity = make_organization_with_slack_team_identity()
slack_channel = make_slack_channel(slack_team_identity, name="general", slack_id="CWER1ASD")
user = make_user(organization=organization)

alert_receive_channel = make_alert_receive_channel(organization)

alert_group = make_alert_group(alert_receive_channel)
SlackMessage.objects.create(channel_id="CWER1ASD", alert_group=alert_group)
make_alert(alert_group, raw_request_data={})

make_alert(
alert_group,
raw_request_data={
"evalMatches": [
{"value": 100, "metric": "High value", "tags": None},
{"value": 200, "metric": "Higher Value", "tags": None},
],
"message": "Someone is testing the alert notification within grafana.",
"ruleId": 0,
"ruleName": "Test notification",
"ruleUrl": "http://localhost:3000/",
"state": "alerting",
"title": f"Incident for channel <#{slack_channel.slack_id}> Where a > b & c < d",
},
# Create Slack messages
slack_message = make_slack_message(alert_group=alert_group, channel_id="test_channel_id", slack_id="test_slack_id")
resolution_note_1 = make_resolution_note_slack_message(
alert_group=alert_group,
user=user,
added_by_user=user,
posted_by_bot=True,
slack_channel_id="test1_channel_id",
ts="test1_ts",
)
resolution_note_2 = make_resolution_note_slack_message(
alert_group=alert_group,
user=user,
added_by_user=user,
added_to_resolution_note=True,
slack_channel_id="test2_channel_id",
ts="test2_ts",
)

alerts = alert_group.alerts
slack_messages = alert_group.slack_messages

assert alerts.count() > 0
assert slack_messages.count() > 0
assert alert_group.alerts.count() == 1
assert alert_group.slack_messages.count() == 1
assert alert_group.resolution_note_slack_messages.count() == 2

delete_alert_group(alert_group.pk, user.pk)

assert alerts.count() == 0
assert slack_messages.count() == 0
assert not alert_group.alerts.exists()
assert not alert_group.slack_messages.exists()
assert not alert_group.resolution_note_slack_messages.exists()

with pytest.raises(AlertGroup.DoesNotExist):
alert_group.refresh_from_db()

# Check that appropriate Slack API calls are made
assert mock_chat_delete.call_count == 2
assert mock_chat_delete.call_args_list[0] == call(
channel=resolution_note_1.slack_channel_id, ts=resolution_note_1.ts
)
assert mock_chat_delete.call_args_list[1] == call(channel=slack_message.channel_id, ts=slack_message.slack_id)
mock_reactions_remove.assert_called_once_with(
channel=resolution_note_2.slack_channel_id, name="memo", timestamp=resolution_note_2.ts
)


@pytest.mark.parametrize("api_method", ["reactions_remove", "chat_delete"])
@patch.object(delete_alert_group, "apply_async")
@pytest.mark.django_db
def test_delete_slack_ratelimit(
mock_delete_alert_group,
api_method,
make_organization_with_slack_team_identity,
make_user,
make_alert_receive_channel,
make_alert_group,
make_alert,
make_slack_message,
make_resolution_note_slack_message,
):
organization, slack_team_identity = make_organization_with_slack_team_identity()
user = make_user(organization=organization)

alert_receive_channel = make_alert_receive_channel(organization)

alert_group = make_alert_group(alert_receive_channel)
make_alert(alert_group, raw_request_data={})

# Create Slack messages
make_slack_message(alert_group=alert_group, channel_id="test_channel_id", slack_id="test_slack_id")
make_resolution_note_slack_message(
alert_group=alert_group,
user=user,
added_by_user=user,
posted_by_bot=True,
slack_channel_id="test1_channel_id",
ts="test1_ts",
)
make_resolution_note_slack_message(
alert_group=alert_group,
user=user,
added_by_user=user,
added_to_resolution_note=True,
slack_channel_id="test2_channel_id",
ts="test2_ts",
)

with patch.object(
SlackClient,
api_method,
side_effect=SlackAPIRatelimitError(
response=build_slack_response({"ok": False, "error": "ratelimited"}, headers={"Retry-After": 42})
),
):
delete_alert_group(alert_group.pk, user.pk)

# Check task is retried gracefully
mock_delete_alert_group.assert_called_once_with((alert_group.pk, user.pk), countdown=42)


@pytest.mark.parametrize("api_method", ["reactions_remove", "chat_delete"])
@patch.object(delete_alert_group, "apply_async")
@pytest.mark.django_db
def test_delete_slack_api_error_other_than_ratelimit(
mock_delete_alert_group,
api_method,
make_organization_with_slack_team_identity,
make_user,
make_alert_receive_channel,
make_alert_group,
make_alert,
make_slack_message,
make_resolution_note_slack_message,
):
organization, slack_team_identity = make_organization_with_slack_team_identity()
user = make_user(organization=organization)

alert_receive_channel = make_alert_receive_channel(organization)

alert_group = make_alert_group(alert_receive_channel)
make_alert(alert_group, raw_request_data={})

# Create Slack messages
make_slack_message(alert_group=alert_group, channel_id="test_channel_id", slack_id="test_slack_id")
make_resolution_note_slack_message(
alert_group=alert_group,
user=user,
added_by_user=user,
posted_by_bot=True,
slack_channel_id="test1_channel_id",
ts="test1_ts",
)
make_resolution_note_slack_message(
alert_group=alert_group,
user=user,
added_by_user=user,
added_to_resolution_note=True,
slack_channel_id="test2_channel_id",
ts="test2_ts",
)

with patch.object(
SlackClient,
api_method,
side_effect=SlackAPIMessageNotFoundError(
response=build_slack_response({"ok": False, "error": "message_not_found"})
),
):
delete_alert_group(alert_group.pk, user.pk) # check no exception is raised

# Check task is not retried
mock_delete_alert_group.assert_not_called()


@pytest.mark.django_db
def test_alerts_count_gt(
Expand Down
46 changes: 25 additions & 21 deletions engine/apps/slack/scenarios/distribute_alerts.py
Original file line number Diff line number Diff line change
Expand Up @@ -880,34 +880,38 @@ class DeleteGroupStep(scenario_step.ScenarioStep):
def process_signal(self, log_record: AlertGroupLogRecord) -> None:
alert_group = log_record.alert_group

self.remove_resolution_note_reaction(alert_group)

bot_messages_ts: typing.List[str] = []
bot_messages_ts.extend(alert_group.slack_messages.values_list("slack_id", flat=True))
bot_messages_ts.extend(
alert_group.resolution_note_slack_messages.filter(posted_by_bot=True).values_list("ts", flat=True)
)
channel_id = alert_group.slack_message.channel_id
# Remove "memo" emoji from resolution note messages
for message in alert_group.resolution_note_slack_messages.filter(added_to_resolution_note=True):
try:
self._slack_client.reactions_remove(channel=message.slack_channel_id, name="memo", timestamp=message.ts)
except SlackAPIRatelimitError:
# retries on ratelimit are handled in apps.alerts.tasks.delete_alert_group.delete_alert_group
raise
except SlackAPIError:
pass
message.delete()

for message_ts in bot_messages_ts:
# Remove resolution note messages posted by OnCall bot
for message in alert_group.resolution_note_slack_messages.filter(posted_by_bot=True):
try:
self._slack_client.chat_delete(channel=channel_id, ts=message_ts)
except (
SlackAPITokenError,
SlackAPIChannelNotFoundError,
SlackAPIMessageNotFoundError,
SlackAPIChannelArchivedError,
):
self._slack_client.chat_delete(channel=message.slack_channel_id, ts=message.ts)
except SlackAPIRatelimitError:
# retries on ratelimit are handled in apps.alerts.tasks.delete_alert_group.delete_alert_group
raise
except SlackAPIError:
pass
message.delete()

def remove_resolution_note_reaction(self, alert_group: AlertGroup) -> None:
for message in alert_group.resolution_note_slack_messages.filter(added_to_resolution_note=True):
message.added_to_resolution_note = False
message.save(update_fields=["added_to_resolution_note"])
# Remove alert group Slack messages
for message in alert_group.slack_messages.all():
try:
self._slack_client.reactions_remove(channel=message.slack_channel_id, name="memo", timestamp=message.ts)
self._slack_client.chat_delete(channel=message.channel_id, ts=message.slack_id)
except SlackAPIRatelimitError:
# retries on ratelimit are handled in apps.alerts.tasks.delete_alert_group.delete_alert_group
raise
except SlackAPIError:
pass
message.delete()


class UpdateLogReportMessageStep(scenario_step.ScenarioStep):
Expand Down