Skip to content

Commit

Permalink
feat: Errors migration (#1801)
Browse files Browse the repository at this point in the history
Convert the errors backfill script into a migration. Also adds a migration
to truncate the events table once the data is copied to the new table.

This also flips the switch on the errors rollout so that we are now using the
new table. This will affect onpremise, Sentry dev and CI environments.
  • Loading branch information
lynnagara authored Apr 12, 2021
1 parent f9c5c97 commit efc62e8
Show file tree
Hide file tree
Showing 8 changed files with 125 additions and 72 deletions.
11 changes: 0 additions & 11 deletions scripts/backfill_errors.py

This file was deleted.

2 changes: 2 additions & 0 deletions snuba/migrations/groups.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,8 @@ def get_migrations(self) -> Sequence[str]:
"0011_rebuild_errors",
"0012_errors_make_level_nullable",
"0013_errors_add_hierarchical_hashes",
"0014_backfill_errors",
"0015_truncate_events",
]


Expand Down
Original file line number Diff line number Diff line change
@@ -1,17 +1,11 @@
"""\
Backfills the errors table from events.
This script will eventually be moved to a migration - after we have a multistorage
consumer running in all environments populating new events into both tables.
Errors replacements should be turned off while this script is running.
"""
from datetime import date, datetime, timedelta
from typing import Sequence

from snuba.clusters.cluster import ClickhouseClientSettings
from snuba.clusters.storage_sets import StorageSetKey
from snuba.datasets.storages import StorageKey
from snuba.datasets.storages.factory import get_writable_storage
from snuba.migrations import migration, operations
from snuba.migrations.operations import InsertIntoSelect


Expand Down Expand Up @@ -160,6 +154,9 @@ def backfill_errors() -> None:

timestamp = get_monday(ts.date())

total_partitions = int((timestamp - BEGINNING_OF_TIME).days / 7)
migrated_partitions = 0

print(f"Starting migration from {format_date(timestamp)}")

while True:
Expand All @@ -176,7 +173,11 @@ def backfill_errors() -> None:
)
clickhouse.execute(operation.format_sql())

print(f"Migrated {format_date(timestamp)}.")
migrated_partitions += 1

print(
f"Migrated {format_date(timestamp)}. ({migrated_partitions} of {total_partitions} partitions done)"
)

timestamp -= WINDOW

Expand Down Expand Up @@ -206,3 +207,24 @@ def backfill_errors() -> None:
PARTITION {partition} FINAL DEDUPLICATE
"""
)


class Migration(migration.CodeMigration):
"""
Backfills the errors table from events.
Errors replacements should be turned off while this script is running.
Note this migration is not reversible.
"""

blocking = True

def forwards_global(self) -> Sequence[operations.RunPython]:

return [
operations.RunPython(
func=backfill_errors, description="Backfill errors table from events"
),
]

def backwards_global(self) -> Sequence[operations.RunPython]:
return []
28 changes: 28 additions & 0 deletions snuba/migrations/snuba_migrations/events/0015_truncate_events.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
from typing import Sequence

from snuba.clusters.storage_sets import StorageSetKey
from snuba.migrations import migration, operations


class Migration(migration.ClickhouseNodeMigration):
"""
Truncate the events table. Cannot be reversed.
"""

blocking = False

def forwards_local(self) -> Sequence[operations.SqlOperation]:
return [
operations.TruncateTable(
storage_set=StorageSetKey.EVENTS, table_name="sentry_local"
)
]

def backwards_local(self) -> Sequence[operations.SqlOperation]:
return []

def forwards_dist(self) -> Sequence[operations.SqlOperation]:
return []

def backwards_dist(self) -> Sequence[operations.SqlOperation]:
return []
3 changes: 0 additions & 3 deletions snuba/settings_ci.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,3 @@
USE_REDIS_CLUSTER = env("USE_REDIS_CLUSTER", "1") != "0"
REDIS_DB = 0
REDIS_PORT = int(env("REDIS_PORT", "7000"))

ERRORS_ROLLOUT_ALL = False
ERRORS_ROLLOUT_WRITABLE_STORAGE = False
3 changes: 0 additions & 3 deletions snuba/settings_docker.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,3 @@
DOGSTATSD_PORT = env("DOGSTATSD_PORT")

SENTRY_DSN = env("SENTRY_DSN")

ERRORS_ROLLOUT_ALL = False
ERRORS_ROLLOUT_WRITABLE_STORAGE = False
45 changes: 0 additions & 45 deletions tests/migrations/test_backfill_errors.py

This file was deleted.

65 changes: 64 additions & 1 deletion tests/migrations/test_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,8 @@
from snuba.migrations.status import Status
from snuba.utils.metrics.backends.dummy import DummyMetricsBackend
from snuba.writer import BatchWriterEncoderWrapper
from tests.fixtures import get_raw_transaction
from tests.fixtures import get_raw_event, get_raw_transaction
from tests.helpers import write_unprocessed_events


def _drop_all_tables() -> None:
Expand Down Expand Up @@ -327,6 +328,68 @@ def test_groupedmessages_compatibility() -> None:
) == [("project_id, id",)]


def test_backfill_errors() -> None:

backfill_migration_id = "0014_backfill_errors"
runner = Runner()
runner.run_migration(MigrationKey(MigrationGroup.SYSTEM, "0001_migrations"))

events_migrations = next(
group_migrations
for (group, group_migrations) in runner.show_all()
if group == MigrationGroup.EVENTS
)

# Run migrations up 0014_backfill_errors
for migration in events_migrations:
if migration.migration_id == backfill_migration_id:
break

runner.run_migration(
MigrationKey(MigrationGroup.EVENTS, migration.migration_id), force=True
)

errors_storage = get_writable_storage(StorageKey.ERRORS)
clickhouse = errors_storage.get_cluster().get_query_connection(
ClickhouseClientSettings.QUERY
)
errors_table_name = errors_storage.get_table_writer().get_schema().get_table_name()

def get_errors_count() -> int:
return clickhouse.execute(f"SELECT count() from {errors_table_name}")[0][0]

raw_events = []
for i in range(10):
event = get_raw_event()
raw_events.append(event)

events_storage = get_writable_storage(StorageKey.EVENTS)

write_unprocessed_events(events_storage, raw_events)

assert get_errors_count() == 0

# Run 0014_backfill_errors
runner.run_migration(
MigrationKey(MigrationGroup.EVENTS, backfill_migration_id), force=True
)

assert get_errors_count() == 10

assert clickhouse.execute(
f"SELECT contexts.key, contexts.value from {errors_table_name} LIMIT 1;"
)[0] == (
(
"device.model_id",
"geo.city",
"geo.country_code",
"geo.region",
"os.kernel_version",
),
("Galaxy", "San Francisco", "US", "CA", "1.1.1"),
)


def test_settings_skipped_group() -> None:
from snuba.migrations import groups, runner

Expand Down

0 comments on commit efc62e8

Please sign in to comment.