Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(discover): Use SnQL for some of event-stats #29471

Merged
merged 4 commits into from
Oct 25, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions src/sentry/api/endpoints/organization_events_stats.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,9 @@ def has_chart_interpolation(self, organization: Organization, request: Request)
def has_top_events(self, organization: Organization, request: Request) -> bool:
return features.has("organizations:discover-top-events", organization, actor=request.user)

def has_discover_snql(self, organization: Organization, request: Request) -> bool:
return features.has("organizations:discover-use-snql", organization, actor=request.user)

def get(self, request: Request, organization: Organization) -> Response:
with sentry_sdk.start_span(op="discover.endpoint", description="filter_params") as span:
span.set_data("organization", organization)
Expand Down Expand Up @@ -118,6 +121,7 @@ def get_event_stats(
referrer=referrer,
zerofill_results=zerofill_results,
comparison_delta=comparison_delta,
use_snql=self.has_discover_snql(organization, request),
)

try:
Expand Down
13 changes: 10 additions & 3 deletions src/sentry/discover/arithmetic.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import re
from dataclasses import dataclass
from typing import Any, List, Optional, Tuple, Union

from parsimonious.exceptions import ParseError
Expand Down Expand Up @@ -76,6 +77,12 @@ def __repr__(self) -> str:
return repr([self.operator, self.lhs, self.rhs])


@dataclass(frozen=True)
class ParsedEquation:
equation: Operation
contains_functions: bool


def flatten(remaining):
"""Take all the remaining terms and reduce them to a single tree"""
term = remaining.pop(0)
Expand Down Expand Up @@ -305,7 +312,7 @@ def resolve_equation_list(
auto_add: Optional[bool] = False,
plain_math: Optional[bool] = False,
use_snql: Optional[bool] = False,
) -> Tuple[List[JsonQueryType], List[str], List[Operation]]:
) -> Tuple[List[JsonQueryType], List[str], List[Operation], List[bool]]:
"""Given a list of equation strings, resolve them to their equivalent snuba json query formats
:param equations: list of equations strings that haven't been parsed yet
:param selected_columns: list of public aliases from the endpoint, can be a mix of fields and aggregates
Expand All @@ -317,7 +324,7 @@ def resolve_equation_list(
:param use_snql: Whether we're resolving for snql or not
"""
resolved_equations: List[JsonQueryType] = []
parsed_equations: List[Operation] = []
parsed_equations: List[ParsedEquation] = []
resolved_columns: List[str] = selected_columns[:]
for index, equation in enumerate(equations):
parsed_equation, fields, functions = parse_arithmetic(equation, use_snql=use_snql)
Expand Down Expand Up @@ -349,7 +356,7 @@ def resolve_equation_list(
resolved_equations.append(parsed_equation.to_snuba_json(f"equation[{index}]"))
# TODO: currently returning "resolved_equations" for the json syntax
# once we're converted to SnQL this should only return parsed_equations
parsed_equations.append(parsed_equation)
parsed_equations.append(ParsedEquation(parsed_equation, len(functions) > 0))
return resolved_equations, resolved_columns, parsed_equations


Expand Down
4 changes: 3 additions & 1 deletion src/sentry/search/events/base.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from typing import List, Mapping, Optional, Set
from typing import Dict, List, Mapping, Optional, Set

from django.utils.functional import cached_property
from snuba_sdk.aliased_expression import AliasedExpression
Expand All @@ -18,11 +18,13 @@ def __init__(
params: ParamsType,
auto_fields: bool = False,
functions_acl: Optional[List[str]] = None,
equation_config: Optional[Dict[str, bool]] = None,
):
self.dataset = dataset
self.params = params
self.auto_fields = auto_fields
self.functions_acl = set() if functions_acl is None else functions_acl
self.equation_config = equation_config if equation_config is not None else {}

# Function is a subclass of CurriedFunction
self.where: List[WhereType] = []
Expand Down
60 changes: 53 additions & 7 deletions src/sentry/search/events/builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,9 @@
from snuba_sdk.column import Column
from snuba_sdk.conditions import Condition
from snuba_sdk.entity import Entity
from snuba_sdk.expressions import Limit, Offset
from snuba_sdk.expressions import Granularity, Limit, Offset
from snuba_sdk.function import CurriedFunction
from snuba_sdk.orderby import LimitBy
from snuba_sdk.orderby import Direction, LimitBy, OrderBy
from snuba_sdk.query import Query

from sentry.search.events.fields import InvalidSearchQuery
Expand Down Expand Up @@ -53,10 +53,6 @@ def __init__(
self.columns = self.resolve_select(selected_columns, equations)
self.orderby = self.resolve_orderby(orderby)

@property
def select(self) -> Optional[List[SelectType]]:
return self.columns

def resolve_limitby(self, limitby: Optional[Tuple[str, int]]) -> Optional[LimitBy]:
if limitby is None:
return None
Expand Down Expand Up @@ -125,7 +121,7 @@ def get_snql_query(self) -> Query:
return Query(
dataset=self.dataset.value,
match=Entity(self.dataset.value),
select=self.select,
select=self.columns,
where=self.where,
having=self.having,
groupby=self.groupby,
Expand All @@ -134,3 +130,53 @@ def get_snql_query(self) -> Query:
offset=self.offset,
limitby=self.limitby,
)


class TimeseriesQueryBuilder(QueryFilter):
time_column = Column("time")

def __init__(
self,
dataset: Dataset,
params: ParamsType,
granularity: int,
query: Optional[str] = None,
selected_columns: Optional[List[str]] = None,
equations: Optional[List[str]] = None,
limit: Optional[int] = 10000,
):
super().__init__(
dataset,
params,
auto_fields=False,
functions_acl=[],
equation_config={"auto_add": True, "aggregates_only": True},
)
self.where, self.having = self.resolve_conditions(query, use_aggregate_conditions=True)

self.limit = None if limit is None else Limit(limit)

# params depends on parse_query, and conditions being resolved first since there may be projects in conditions
self.where += self.resolve_params()
self.columns = self.resolve_select(selected_columns, equations)
self.granularity = Granularity(granularity)

@property
def select(self) -> List[SelectType]:
if not self.aggregates:
raise InvalidSearchQuery("Cannot query a timeseries without a Y-Axis")
return self.aggregates

def get_snql_query(self) -> Query:
return Query(
dataset=self.dataset.value,
match=Entity(self.dataset.value),
select=self.select,
where=self.where,
having=self.having,
# This is a timeseries, the groupby will always be time
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This assumption breaks with top 5 charts right? Did you already have an idea for that?

Copy link
Member Author

@wmak wmak Oct 21, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Dang I considered commenting this but thought it was overly verbose for this PR

Yeah I think for top events we should have just introduce a different timeseries builder that inherits from this one and changes the groupby etc. as needed

groupby=[self.time_column],
orderby=[OrderBy(self.time_column, Direction.ASC)],
granularity=self.granularity,
limit=self.limit,
)
18 changes: 10 additions & 8 deletions src/sentry/search/events/fields.py
Original file line number Diff line number Diff line change
Expand Up @@ -2313,8 +2313,9 @@ def __init__(
params: ParamsType,
auto_fields: bool = False,
functions_acl: Optional[List[str]] = None,
equation_config: Optional[Dict[str, bool]] = None,
):
super().__init__(dataset, params, auto_fields, functions_acl)
super().__init__(dataset, params, auto_fields, functions_acl, equation_config)

self.function_alias_map: Dict[str, FunctionDetails] = {}
self.field_alias_converter: Mapping[str, Callable[[str], SelectType]] = {
Expand Down Expand Up @@ -2812,14 +2813,15 @@ def resolve_select(

if equations:
_, _, parsed_equations = resolve_equation_list(
equations, stripped_columns, use_snql=True
)
resolved_columns.extend(
[
self.resolve_equation(equation, f"equation[{index}]")
for index, equation in enumerate(parsed_equations)
]
equations, stripped_columns, use_snql=True, **self.equation_config
)
for index, parsed_equation in enumerate(parsed_equations):
resolved_equation = self.resolve_equation(
parsed_equation.equation, f"equation[{index}]"
)
resolved_columns.append(resolved_equation)
if parsed_equation.contains_functions:
self.aggregates.append(resolved_equation)

# Add threshold config alias if there's a function that depends on it
# TODO: this should be replaced with an explicit request for the project_threshold_config as a column
Expand Down
5 changes: 3 additions & 2 deletions src/sentry/search/events/filter.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from datetime import datetime
from typing import Callable, List, Mapping, Optional, Sequence, Tuple, Union
from typing import Callable, Dict, List, Mapping, Optional, Sequence, Tuple, Union

from parsimonious.exceptions import ParseError
from sentry_relay import parse_release as parse_release_relay
Expand Down Expand Up @@ -1045,8 +1045,9 @@ def __init__(
params: ParamsType,
auto_fields: bool = False,
functions_acl: Optional[List[str]] = None,
equation_config: Optional[Dict[str, bool]] = None,
):
super().__init__(dataset, params, auto_fields, functions_acl)
super().__init__(dataset, params, auto_fields, functions_acl, equation_config)

self.search_filter_converter: Mapping[
str, Callable[[SearchFilter], Optional[WhereType]]
Expand Down
32 changes: 31 additions & 1 deletion src/sentry/snuba/discover.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,13 @@
from typing import Dict, Optional, Sequence

import sentry_sdk
from dateutil.parser import parse as parse_datetime

from sentry import options
from sentry.discover.arithmetic import categorize_columns, resolve_equation_list
from sentry.models import Group
from sentry.models.transaction_threshold import ProjectTransactionThreshold
from sentry.search.events.builder import QueryBuilder
from sentry.search.events.builder import QueryBuilder, TimeseriesQueryBuilder
from sentry.search.events.constants import CONFIGURABLE_AGGREGATES, DEFAULT_PROJECT_THRESHOLD
from sentry.search.events.fields import (
FIELD_ALIASES,
Expand All @@ -24,6 +25,7 @@
from sentry.search.events.filter import get_filter
from sentry.tagstore.base import TOP_VALUES_DEFAULT_LIMIT
from sentry.utils.compat import filter
from sentry.utils.dates import to_timestamp
from sentry.utils.math import mean, nice_int
from sentry.utils.snuba import (
SNUBA_AND,
Expand Down Expand Up @@ -106,6 +108,9 @@ def zerofill(data, start, end, rollup, orderby):
data_by_time = {}

for obj in data:
# This is needed for SnQL, and was originally done in utils.snuba.get_snuba_translators
if isinstance(obj["time"], str):
obj["time"] = int(to_timestamp(parse_datetime(obj["time"])))
Comment on lines +112 to +113
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why is this needed? Does snql return a str now instead of a int like it did previously?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

so the query has always returned the date as a str, but there was a reverse processor here
Which we're no longer using.

if obj["time"] in data_by_time:
data_by_time[obj["time"]].append(obj)
else:
Expand Down Expand Up @@ -467,6 +472,7 @@ def timeseries_query(
referrer: Optional[str] = None,
zerofill_results: bool = True,
comparison_delta: Optional[timedelta] = None,
use_snql: Optional[bool] = False,
):
"""
High-level API for doing arbitrary user timeseries queries against events.
Expand All @@ -490,6 +496,30 @@ def timeseries_query(
query time-shifted back by comparison_delta, and compare the results to get the % change for each
time bucket. Requires that we only pass
"""
sentry_sdk.set_tag("discover.use_snql", use_snql and comparison_delta is None)
if use_snql and comparison_delta is None:
# temporarily add snql to referrer
referrer = f"{referrer}.wip-snql"
equations, columns = categorize_columns(selected_columns)
builder = TimeseriesQueryBuilder(
Dataset.Discover,
params,
rollup,
query=query,
selected_columns=columns,
equations=equations,
)
snql_query = builder.get_snql_query()

query_results = raw_snql_query(snql_query, referrer)

result = (
zerofill(query_results["data"], params["start"], params["end"], rollup, "time")
if zerofill_results
else query_results["data"]
)
return SnubaTSResult({"data": result}, params["start"], params["end"], rollup)

with sentry_sdk.start_span(
op="discover.discover", description="timeseries.filter_transform"
) as span:
Expand Down
6 changes: 3 additions & 3 deletions tests/sentry/search/events/test_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ def test_simple_query(self):
],
)
self.assertCountEqual(
query.select,
query.columns,
[
AliasedExpression(Column("email"), "user.email"),
Column("release"),
Expand Down Expand Up @@ -232,7 +232,7 @@ def test_project_alias_column(self):
],
)
self.assertCountEqual(
query.select,
query.columns,
[
Function(
"transform",
Expand Down Expand Up @@ -268,7 +268,7 @@ def test_project_alias_column_with_project_condition(self):
)
# Because of the condition on project there should only be 1 project in the transform
self.assertCountEqual(
query.select,
query.columns,
[
Function(
"transform",
Expand Down
Loading