Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feat(metrics): Crash free breakdown on metrics backend [INGEST-249] #28945

Merged
merged 5 commits into from
Oct 4, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions src/sentry/api/endpoints/project_release_stats.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from sentry.api.exceptions import ResourceDoesNotExist
from sentry.api.serializers import serialize
from sentry.models import Release, ReleaseProject
from sentry.snuba.sessions import get_crash_free_breakdown, get_project_release_stats
from sentry.snuba.sessions import get_project_release_stats
from sentry.utils.dates import get_rollup_from_request


Expand Down Expand Up @@ -74,7 +74,7 @@ def get(self, request, project, version):
)

users_breakdown = []
for data in get_crash_free_breakdown(
for data in release_health.get_crash_free_breakdown(
project_id=params["project_id"][0],
release=version,
environments=params.get("environment"),
Expand Down
18 changes: 18 additions & 0 deletions src/sentry/release_health/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,14 @@ class ReleaseAdoption(TypedDict):
ReleasesAdoption = Mapping[Tuple[ProjectId, ReleaseName], ReleaseAdoption]


class CrashFreeBreakdown(TypedDict):
date: datetime
total_users: int
crash_free_users: Optional[float]
total_sessions: int
crash_free_sessions: Optional[float]


class ReleaseHealthBackend(Service): # type: ignore
"""Abstraction layer for all release health related queries"""

Expand All @@ -63,6 +71,7 @@ class ReleaseHealthBackend(Service): # type: ignore
"check_has_health_data",
"get_release_sessions_time_bounds",
"check_releases_have_health_data",
"get_crash_free_breakdown",
"get_changed_project_release_model_adoptions",
"get_oldest_health_data_for_releases",
)
Expand Down Expand Up @@ -178,6 +187,15 @@ def check_releases_have_health_data(
"""
raise NotImplementedError()

def get_crash_free_breakdown(
self,
project_id: ProjectId,
release: ReleaseName,
start: datetime,
environments: Optional[Sequence[EnvironmentName]] = None,
) -> Sequence[CrashFreeBreakdown]:
"""Get stats about crash free sessions and stats for the last 1, 2, 7, 14 and 30 days"""

def get_changed_project_release_model_adoptions(
self,
project_ids: Sequence[ProjectId],
Expand Down
159 changes: 143 additions & 16 deletions src/sentry/release_health/metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@

from sentry.models.project import Project
from sentry.release_health.base import (
CrashFreeBreakdown,
CurrentAndPreviousCrashFreeRates,
EnvironmentName,
OrganizationId,
Expand All @@ -22,13 +23,17 @@
)
from sentry.sentry_metrics import indexer
from sentry.snuba.dataset import Dataset, EntityKey
from sentry.utils.snuba import raw_snql_query
from sentry.utils.snuba import QueryOutsideRetentionError, raw_snql_query


class MetricIndexNotFound(Exception):
pass


def get_tag_values_list(org_id: int, values: Sequence[str]) -> Sequence[int]:
return [x for x in [try_get_string_index(org_id, x) for x in values] if x is not None]


def metric_id(org_id: int, name: str) -> int:
index = indexer.resolve(org_id, name) # type: ignore
if index is None:
Expand All @@ -50,7 +55,7 @@ def tag_value(org_id: int, name: str) -> int:
return index # type: ignore


def try_get_tag_value(org_id: int, name: str) -> Optional[int]:
def try_get_string_index(org_id: int, name: str) -> Optional[int]:
return indexer.resolve(org_id, name) # type: ignore


Expand Down Expand Up @@ -382,13 +387,7 @@ def get_release_sessions_time_bounds(
]

if environments is not None:
env_filter = [
x
for x in [
try_get_tag_value(org_id, environment) for environment in environments
]
if x is not None
]
env_filter = get_tag_values_list(org_id, environments)
if not env_filter:
raise MetricIndexNotFound()

Expand Down Expand Up @@ -524,11 +523,7 @@ def check_has_health_data(
if includes_releases:
releases = [x[1] for x in projects_list] # type: ignore
release_column_name = tag_key(org_id, "release")
releases_ids = [
release_id
for release_id in [try_get_tag_value(org_id, release) for release in releases]
if release_id is not None
]
releases_ids = get_tag_values_list(org_id, releases)
where_clause.append(Condition(Column(release_column_name), Op.IN, releases_ids))
column_names = ["project_id", release_column_name]

Expand Down Expand Up @@ -578,7 +573,7 @@ def check_releases_have_health_data(
releases_ids = [
release_id
for release_id in [
try_get_tag_value(organization_id, release) for release in release_versions
try_get_string_index(organization_id, release) for release in release_versions
]
if release_id is not None
]
Expand Down Expand Up @@ -608,6 +603,138 @@ def extract_row_info(row: Mapping[str, Union[OrganizationId, str]]) -> ReleaseNa

return {extract_row_info(row) for row in result["data"]}

def _get_crash_free_breakdown_fn(
self,
org_id: int,
project_id: ProjectId,
release: ReleaseName,
start: datetime,
environments: Optional[Sequence[EnvironmentName]] = None,
) -> Callable[[datetime], CrashFreeBreakdown]:
def generate_defaults(end: datetime) -> CrashFreeBreakdown:
"""Function to use if querying snuba is not necessary"""
return {
"crash_free_sessions": None,
"crash_free_users": None,
"date": end,
"total_sessions": 0,
"total_users": 0,
}

# 1) Get required string indexes
try:
release_key = tag_key(org_id, "release")
release_value = tag_value(org_id, release)
environment_key = tag_key(org_id, "environment")
status_key = tag_key(org_id, "session.status")
except MetricIndexNotFound:
# No need to query snuba if any of these is missing
return generate_defaults

environment_values = None
if environments is not None:
environment_values = get_tag_values_list(org_id, environments)

if environment_values == []:
# No need to query snuba with an empty list
return generate_defaults

status_init = tag_value(org_id, "init")
status_crashed = tag_value(org_id, "crashed")

conditions = [
Condition(Column("org_id"), Op.EQ, org_id),
Condition(Column("project_id"), Op.EQ, project_id),
Condition(Column(release_key), Op.EQ, release_value),
Condition(Column("timestamp"), Op.GTE, start),
Condition(Column(status_key), Op.IN, [status_init, status_crashed]),
]
if environment_values is not None:
conditions.append(Condition(Column(environment_key), Op.IN, environment_values))

def query_stats(end: datetime) -> CrashFreeBreakdown:
def _get_data(entity_key: EntityKey, metric_name: str) -> Tuple[int, int]:
total = 0
crashed = 0
metric_id = try_get_string_index(org_id, metric_name)
if metric_id is not None:
where = conditions + [
Condition(Column("metric_id"), Op.EQ, metric_id),
Condition(Column("timestamp"), Op.LT, end),
]
data = raw_snql_query(
Query(
dataset=Dataset.Metrics.value,
match=Entity(entity_key.value),
select=[Column("value")],
where=where,
groupby=[Column(status_key)],
),
referrer="release_health.metrics.crash-free-breakdown.session",
)["data"]
for row in data:
if row[status_key] == status_init:
total = int(row["value"])
elif row[status_key] == status_crashed:
crashed = int(row["value"])

return total, crashed

sessions_total, sessions_crashed = _get_data(EntityKey.MetricsCounters, "session")
users_total, users_crashed = _get_data(EntityKey.MetricsSets, "user")

return {
"date": end,
"total_users": users_total,
"crash_free_users": 100 - users_crashed / float(users_total) * 100
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would unify how this is calculated everywhere because if i recall correctly @untitaker might have been doing this differently?

Copy link
Member

@untitaker untitaker Oct 4, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No I did exactly this... we're just porting the formulas from sessions.py where it was also duplicated but also consistent.

not opposed to deduping in any case

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

if users_total
else None,
"total_sessions": sessions_total,
"crash_free_sessions": 100 - sessions_crashed / float(sessions_total) * 100
if sessions_total
else None,
}

return query_stats

def get_crash_free_breakdown(
self,
project_id: ProjectId,
release: ReleaseName,
start: datetime,
environments: Optional[Sequence[EnvironmentName]] = None,
) -> Sequence[CrashFreeBreakdown]:

org_id = self._get_org_id([project_id])

now = datetime.now(pytz.utc)
query_fn = self._get_crash_free_breakdown_fn(
org_id, project_id, release, start, environments
)

last: Optional[datetime] = None
rv = []
for offset in (
timedelta(days=1),
timedelta(days=2),
timedelta(days=7),
timedelta(days=14),
Copy link
Contributor

@ahmedetefy ahmedetefy Sep 29, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

  • What is the reason behind looping over a tuple here?
  • Not a big fan of looping over the timedelta, so much repitition, why don't you just loop over the number of days and thencall timedelta once outside of the loop

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

tuples are cheaper to construct

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sure but how expensive really is it to have a list of 4 items?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I get the argument that lists are supposed to be the homogenous container and tuples the heterogenous but I don't buy it. Not a lot of code works this way and it seems like a style guideline that was invented long after tuples and lists were. For all practical purposes tuples are immutable lists as long as mypy doesn't get in your way about it.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

FWIW I deliberately did not touch this code but simply copied it over from sessions.py.

timedelta(days=30),
):
try:
end = start + offset
if end > now:
if last is None or (end - last).days > 1:
rv.append(query_fn(now))
break
rv.append(query_fn(end))
last = end
except QueryOutsideRetentionError:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

instead of having dummy_fn, what about letting the tag indexer exceptions bubble up until here?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I wanted to do the whole "does tag exist" dance once instead of in every loop iteration.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Okay, makes sense. I think we may want an in-process cache around indexer anyway, because we have some endpoints that just call one method on the releasehealth service after the other.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, the in-memory cache should be part of the indexer service IMO. I'm writing all my code under the assumption that lookups will be optimized there.

# cannot query for these
pass

return rv

def get_changed_project_release_model_adoptions(
self,
project_ids: Sequence[ProjectId],
Expand Down Expand Up @@ -667,7 +794,7 @@ def get_oldest_health_data_for_releases(
releases = [x[1] for x in project_releases]
releases_ids = [
release_id
for release_id in [try_get_tag_value(org_id, release) for release in releases]
for release_id in [try_get_string_index(org_id, release) for release in releases]
if release_id is not None
]

Expand Down
13 changes: 13 additions & 0 deletions src/sentry/release_health/sessions.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
from typing import Mapping, Optional, Sequence, Set, Tuple

from sentry.release_health.base import (
CrashFreeBreakdown,
CurrentAndPreviousCrashFreeRates,
EnvironmentName,
OrganizationId,
Expand All @@ -17,6 +18,7 @@
_check_has_health_data,
_check_releases_have_health_data,
_get_changed_project_release_model_adoptions,
_get_crash_free_breakdown,
_get_oldest_health_data_for_releases,
_get_release_adoption,
_get_release_sessions_time_bounds,
Expand Down Expand Up @@ -89,6 +91,17 @@ def check_releases_have_health_data(
end,
)

def get_crash_free_breakdown(
self,
project_id: ProjectId,
release: ReleaseName,
start: datetime,
environments: Optional[Sequence[EnvironmentName]] = None,
) -> Sequence[CrashFreeBreakdown]:
return _get_crash_free_breakdown( # type: ignore
project_id=project_id, release=release, start=start, environments=environments
)

def get_changed_project_release_model_adoptions(
self,
project_ids: Sequence[ProjectId],
Expand Down
2 changes: 1 addition & 1 deletion src/sentry/snuba/sessions.py
Original file line number Diff line number Diff line change
Expand Up @@ -480,7 +480,7 @@ def get_release_health_data_overview(
return rv


def get_crash_free_breakdown(project_id, release, start, environments=None):
def _get_crash_free_breakdown(project_id, release, start, environments=None):
filter_keys = {"project_id": [project_id]}
conditions = [["release", "=", release]]
if environments is not None:
Expand Down
32 changes: 32 additions & 0 deletions tests/sentry/api/endpoints/test_project_release_stats.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
from datetime import datetime

from django.urls import reverse

from sentry.models import Release
from sentry.testutils import APITestCase


class ProjectReleaseStatsTest(APITestCase):
def test_simple(self):
"""Minimal test to ensure code coverage of the endpoint"""
self.login_as(user=self.user)

project = self.create_project(name="foo")
release = Release.objects.create(
organization_id=project.organization_id,
version="1",
date_added=datetime(2013, 8, 13, 3, 8, 24, 880386),
)
release.add_project(project)

url = reverse(
"sentry-api-0-project-release-stats",
kwargs={
"organization_slug": project.organization.slug,
"project_slug": project.slug,
"version": "1",
},
)
response = self.client.get(url, format="json")

assert response.status_code == 200, response.content
Loading