Skip to content

Commit

Permalink
feat(uptime): Allow regions to be configured as shadow mode
Browse files Browse the repository at this point in the history
This introduces shadow mode to our uptime region system. Configs are partitioned separately between shadow/active. So if regions 1, 2 are active, and 3,4 are shadow, then configs going to regions 1/2 will have just regions 1/2 as their active regions. Configs going 3/4 will also only have regions 3/4 as their active regions.

This uses the same override system we use to disable regions. Regions will be slowly migrated over to shadow mode over time.

Note that this pr doesn't handle the last piece required here - we need to ignore results produced by shadow regions. That will be handled in a follow up pr.
  • Loading branch information
wedamija committed Feb 21, 2025
1 parent 8811fc1 commit de9ac7d
Show file tree
Hide file tree
Showing 10 changed files with 341 additions and 106 deletions.
8 changes: 6 additions & 2 deletions src/sentry/testutils/factories.py
Original file line number Diff line number Diff line change
Expand Up @@ -2021,10 +2021,14 @@ def create_project_uptime_subscription(

@staticmethod
def create_uptime_subscription_region(
subscription: UptimeSubscription, region_slug: str
subscription: UptimeSubscription,
region_slug: str,
mode: UptimeSubscriptionRegion.RegionMode,
) -> UptimeSubscriptionRegion:
return UptimeSubscriptionRegion.objects.create(
uptime_subscription=subscription, region_slug=region_slug
uptime_subscription=subscription,
region_slug=region_slug,
mode=mode,
)

@staticmethod
Expand Down
11 changes: 10 additions & 1 deletion src/sentry/testutils/fixtures.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@
ProjectUptimeSubscriptionMode,
UptimeStatus,
UptimeSubscription,
UptimeSubscriptionRegion,
)
from sentry.users.models.identity import Identity, IdentityProvider
from sentry.users.models.user import User
Expand Down Expand Up @@ -716,10 +717,18 @@ def create_uptime_subscription(
trace_sampling=trace_sampling,
)
for region_slug in region_slugs:
Factories.create_uptime_subscription_region(subscription, region_slug)
self.create_uptime_subscription_region(subscription, region_slug)

return subscription

def create_uptime_subscription_region(
self,
subscription: UptimeSubscription,
region_slug: str,
mode: UptimeSubscriptionRegion.RegionMode = UptimeSubscriptionRegion.RegionMode.ACTIVE,
):
Factories.create_uptime_subscription_region(subscription, region_slug, mode)

def create_project_uptime_subscription(
self,
project: Project | None = None,
Expand Down
38 changes: 26 additions & 12 deletions src/sentry/uptime/consumers/results_consumer.py
Original file line number Diff line number Diff line change
Expand Up @@ -136,24 +136,38 @@ def check_and_update_regions(self, subscription: UptimeSubscription, result: Che
if subscription_run != current_run:
return

subscription_region_slugs = {r.region_slug for r in subscription.regions.all()}
active_region_slugs = {c.slug for c in get_active_region_configs()}
if subscription_region_slugs == active_region_slugs:
subscription_regions = {
r.region_slug: UptimeSubscriptionRegion.RegionMode(r.mode)
for r in subscription.regions.all()
}
active_regions = {c.slug: mode for c, mode in get_active_region_configs()}
if subscription_regions == active_regions:
# Regions haven't changed, exit early.
return

new_region_slugs = active_region_slugs - subscription_region_slugs
removed_region_slugs = subscription_region_slugs - active_region_slugs
if new_region_slugs:
new_regions = [
UptimeSubscriptionRegion(uptime_subscription=subscription, region_slug=slug)
for slug in new_region_slugs
new_or_updated_regions = {
slug: mode
for slug, mode in active_regions.items()
if slug not in subscription_regions or subscription_regions[slug] != mode
}
removed_regions = subscription_regions.keys() - active_regions.keys()
if new_or_updated_regions:
new_or_updated_region_objs = [
UptimeSubscriptionRegion(
uptime_subscription=subscription, region_slug=slug, mode=mode
)
for slug, mode in new_or_updated_regions.items()
]
UptimeSubscriptionRegion.objects.bulk_create(new_regions, ignore_conflicts=True)
UptimeSubscriptionRegion.objects.bulk_create(
new_or_updated_region_objs,
update_conflicts=True,
update_fields=["mode"],
unique_fields=["uptime_subscription", "region_slug"],
)

if removed_region_slugs:
if removed_regions:
for deleted_region in UptimeSubscriptionRegion.objects.filter(
uptime_subscription=subscription, region_slug__in=removed_region_slugs
uptime_subscription=subscription, region_slug__in=removed_regions
):
if subscription.subscription_id:
# We need to explicitly send deletes here before we remove the region
Expand Down
5 changes: 5 additions & 0 deletions src/sentry/uptime/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,8 +122,13 @@ class UptimeSubscriptionRegion(DefaultFieldsModel):
__relocation_scope__ = RelocationScope.Excluded

class RegionMode(enum.StrEnum):
# Region is running as usual
ACTIVE = "active"
# Region is disabled and not running
INACTIVE = "inactive"
# Region is running in shadow mode. This means it is performing checks, but results are
# ignored.
SHADOW = "shadow"

uptime_subscription = FlexibleForeignKey("uptime.UptimeSubscription", related_name="regions")
region_slug = models.CharField(max_length=255, db_index=True, db_default="")
Expand Down
13 changes: 10 additions & 3 deletions src/sentry/uptime/subscriptions/regions.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,15 +7,22 @@
from sentry.uptime.models import UptimeSubscriptionRegion


def get_active_region_configs() -> list[UptimeRegionConfig]:
def get_active_region_configs() -> (
list[tuple[UptimeRegionConfig, UptimeSubscriptionRegion.RegionMode]]
):
configured_regions: Sequence[UptimeRegionConfig] = settings.UPTIME_REGIONS
region_mode_override: Mapping[str, str] = options.get("uptime.checker-regions-mode-override")

return [
c
(
c,
UptimeSubscriptionRegion.RegionMode(
region_mode_override.get(c.slug, UptimeSubscriptionRegion.RegionMode.ACTIVE)
),
)
for c in configured_regions
if region_mode_override.get(c.slug, UptimeSubscriptionRegion.RegionMode.ACTIVE)
== UptimeSubscriptionRegion.RegionMode.ACTIVE
in [UptimeSubscriptionRegion.RegionMode.ACTIVE, UptimeSubscriptionRegion.RegionMode.SHADOW]
]


Expand Down
10 changes: 6 additions & 4 deletions src/sentry/uptime/subscriptions/subscriptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -155,17 +155,19 @@ def get_or_create_uptime_subscription(
if subscription.status == UptimeSubscription.Status.DELETING.value:
# This is pretty unlikely to happen, but we should avoid deleting the subscription here and just confirm it
# exists in the checker.
subscription.update(status=UptimeSubscription.Status.CREATING.value)
created = True

# Associate active regions with this subscription
for region_config in get_active_region_configs():
for region_config, region_mode in get_active_region_configs():
# If we add a region here we need to resend the subscriptions
created |= UptimeSubscriptionRegion.objects.get_or_create(
uptime_subscription=subscription, region_slug=region_config.slug
created |= UptimeSubscriptionRegion.objects.update_or_create(
uptime_subscription=subscription,
region_slug=region_config.slug,
defaults={"mode": region_mode},
)[1]

if created:
subscription.update(status=UptimeSubscription.Status.CREATING.value)
create_remote_uptime_subscription.delay(subscription.id)
fetch_subscription_rdap_info.delay(subscription.id)
return subscription
Expand Down
35 changes: 22 additions & 13 deletions src/sentry/uptime/subscriptions/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,11 @@
from sentry.snuba.models import QuerySubscription
from sentry.tasks.base import instrumented_task
from sentry.uptime.config_producer import produce_config, produce_config_removal
from sentry.uptime.models import UptimeRegionScheduleMode, UptimeSubscription
from sentry.uptime.models import (
UptimeRegionScheduleMode,
UptimeSubscription,
UptimeSubscriptionRegion,
)
from sentry.utils import metrics

logger = logging.getLogger(__name__)
Expand All @@ -37,10 +41,8 @@ def create_remote_uptime_subscription(uptime_subscription_id, **kwargs):
metrics.incr("uptime.subscriptions.create.incorrect_status", sample_rate=1.0)
return

region_slugs = [s.region_slug for s in subscription.regions.all()]

for region_slug in region_slugs:
send_uptime_subscription_config(region_slug, subscription)
for region in subscription.regions.all():
send_uptime_subscription_config(region, subscription)
subscription.update(
status=QuerySubscription.Status.ACTIVE.value,
subscription_id=subscription.subscription_id,
Expand All @@ -66,10 +68,8 @@ def update_remote_uptime_subscription(uptime_subscription_id, **kwargs):
metrics.incr("uptime.subscriptions.update.incorrect_status", sample_rate=1.0)
return

region_slugs = [s.region_slug for s in subscription.regions.all()]

for region_slug in region_slugs:
send_uptime_subscription_config(region_slug, subscription)
for region in subscription.regions.all():
send_uptime_subscription_config(region, subscription)
subscription.update(
status=QuerySubscription.Status.ACTIVE.value,
subscription_id=subscription.subscription_id,
Expand Down Expand Up @@ -109,16 +109,25 @@ def delete_remote_uptime_subscription(uptime_subscription_id, **kwargs):
send_uptime_config_deletion(region_slug, subscription_id)


def send_uptime_subscription_config(region_slug: str, subscription: UptimeSubscription):
def send_uptime_subscription_config(
region: UptimeSubscriptionRegion, subscription: UptimeSubscription
):
if subscription.subscription_id is None:
subscription.subscription_id = uuid4().hex
produce_config(
region_slug, uptime_subscription_to_check_config(subscription, subscription.subscription_id)
region.region_slug,
uptime_subscription_to_check_config(
subscription,
subscription.subscription_id,
UptimeSubscriptionRegion.RegionMode(region.mode),
),
)


def uptime_subscription_to_check_config(
subscription: UptimeSubscription, subscription_id: str
subscription: UptimeSubscription,
subscription_id: str,
region_mode: UptimeSubscriptionRegion.RegionMode,
) -> CheckConfig:
headers = subscription.headers
# XXX: Temporary translation code. We want to support headers with the same keys, so convert to a list
Expand All @@ -133,7 +142,7 @@ def uptime_subscription_to_check_config(
"request_method": subscription.method,
"request_headers": headers,
"trace_sampling": subscription.trace_sampling,
"active_regions": [r.region_slug for r in subscription.regions.all()],
"active_regions": [r.region_slug for r in subscription.regions.filter(mode=region_mode)],
"region_schedule_mode": UptimeRegionScheduleMode.ROUND_ROBIN.value,
}
if subscription.body is not None:
Expand Down
Loading

0 comments on commit de9ac7d

Please sign in to comment.