Skip to content

Commit

Permalink
feat(discover): Add snql support to the events-facets endpoint (#29674)
Browse files Browse the repository at this point in the history
- This also adds support for turbo + sample rate to the QueryBuilder
- This updates the tests so feature modifying for snql is easier
- Otherwise mostly copy paste from the existing code
  • Loading branch information
wmak authored Nov 3, 2021
1 parent 9ca1ef0 commit 0128740
Show file tree
Hide file tree
Showing 7 changed files with 237 additions and 42 deletions.
5 changes: 4 additions & 1 deletion src/sentry/api/endpoints/organization_events_facets.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import sentry_sdk
from rest_framework.response import Response

from sentry import tagstore
from sentry import features, tagstore
from sentry.api.bases import NoProjects, OrganizationEventsV2EndpointBase
from sentry.snuba import discover

Expand All @@ -24,6 +24,9 @@ def get(self, request, organization):
query=request.GET.get("query"),
params=params,
referrer="api.organization-events-facets.top-tags",
use_snql=features.has(
"organizations:discover-use-snql", organization, actor=request.user
),
)

with sentry_sdk.start_span(op="discover.endpoint", description="populate_results") as span:
Expand Down
9 changes: 7 additions & 2 deletions src/sentry/search/events/builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from snuba_sdk.column import Column
from snuba_sdk.conditions import And, Condition, Op, Or
from snuba_sdk.entity import Entity
from snuba_sdk.expressions import Granularity, Limit, Offset
from snuba_sdk.expressions import Granularity, Limit, Offset, Turbo
from snuba_sdk.function import CurriedFunction, Function
from snuba_sdk.orderby import Direction, LimitBy, OrderBy
from snuba_sdk.query import Query
Expand Down Expand Up @@ -35,6 +35,8 @@ def __init__(
limit: Optional[int] = 50,
offset: Optional[int] = 0,
limitby: Optional[Tuple[str, int]] = None,
turbo: Optional[bool] = False,
sample_rate: Optional[float] = None,
):
super().__init__(dataset, params, auto_fields, functions_acl)

Expand All @@ -44,6 +46,8 @@ def __init__(
self.offset = None if offset is None else Offset(offset)

self.limitby = self.resolve_limitby(limitby)
self.turbo = Turbo(turbo)
self.sample_rate = sample_rate

self.where, self.having = self.resolve_conditions(
query, use_aggregate_conditions=use_aggregate_conditions
Expand Down Expand Up @@ -130,7 +134,7 @@ def get_snql_query(self) -> Query:

return Query(
dataset=self.dataset.value,
match=Entity(self.dataset.value),
match=Entity(self.dataset.value, sample=self.sample_rate),
select=self.columns,
array_join=self.array_join,
where=self.where,
Expand All @@ -140,6 +144,7 @@ def get_snql_query(self) -> Query:
limit=self.limit,
offset=self.offset,
limitby=self.limitby,
turbo=self.turbo,
)


Expand Down
137 changes: 135 additions & 2 deletions src/sentry/snuba/discover.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
resolve_field_list,
)
from sentry.search.events.filter import get_filter
from sentry.search.events.types import ParamsType
from sentry.tagstore.base import TOP_VALUES_DEFAULT_LIMIT
from sentry.utils.compat import filter
from sentry.utils.dates import to_timestamp
Expand Down Expand Up @@ -73,6 +74,7 @@
resolve_discover_column = resolve_column(Dataset.Discover)

OTHER_KEY = "Other"
TOP_KEYS_DEFAULT_LIMIT = 10


def is_real_column(col):
Expand Down Expand Up @@ -1019,7 +1021,13 @@ def get_id(result):
return result[1]


def get_facets(query, params, limit=10, referrer=None):
def get_facets(
query: str,
params: ParamsType,
referrer: str,
limit: Optional[int] = TOP_KEYS_DEFAULT_LIMIT,
use_snql: Optional[bool] = False,
):
"""
High-level API for getting 'facet map' results.
Expand All @@ -1029,11 +1037,136 @@ def get_facets(query, params, limit=10, referrer=None):
query (str) Filter query string to create conditions from.
params (Dict[str, str]) Filtering parameters with start, end, project_id, environment
referrer (str) A referrer string to help locate the origin of this query.
limit (int) The number of records to fetch.
referrer (str|None) A referrer string to help locate the origin of this query.
Returns Sequence[FacetResult]
"""
sentry_sdk.set_tag("discover.use_snql", use_snql)
if use_snql:
# temporarily add snql to referrer
referrer = f"{referrer}.wip-snql"
sample = len(params["project_id"]) > 2

with sentry_sdk.start_span(op="discover.discover", description="facets.frequent_tags"):
key_name_builder = QueryBuilder(
Dataset.Discover,
params,
query=query,
selected_columns=["tags_key", "count()"],
orderby=["-count()", "tags_key"],
limit=limit,
turbo=sample,
)
key_names = raw_snql_query(key_name_builder.get_snql_query(), referrer=referrer)
# Sampling keys for multi-project results as we don't need accuracy
# with that much data.
top_tags = [r["tags_key"] for r in key_names["data"]]
if not top_tags:
return []

# TODO: Make the sampling rate scale based on the result size and scaling factor in
# sentry.options. To test the lowest acceptable sampling rate, we use 0.1 which
# is equivalent to turbo. We don't use turbo though as we need to re-scale data, and
# using turbo could cause results to be wrong if the value of turbo is changed in snuba.
sample_rate = 0.1 if (key_names["data"][0]["count"] > 10000) else None
# Rescale the results if we're sampling
multiplier = 1 / sample_rate if sample_rate is not None else 1

fetch_projects = False
if len(params.get("project_id", [])) > 1:
if len(top_tags) == limit:
top_tags.pop()
fetch_projects = True

results = []
if fetch_projects:
with sentry_sdk.start_span(op="discover.discover", description="facets.projects"):
project_value_builder = QueryBuilder(
Dataset.Discover,
params,
query=query,
selected_columns=["count()", "project_id"],
orderby=["-count()"],
# Ensures Snuba will not apply FINAL
turbo=sample_rate is not None,
sample_rate=sample_rate,
)
project_values = raw_snql_query(
project_value_builder.get_snql_query(), referrer=referrer
)
results.extend(
[
FacetResult("project", r["project_id"], int(r["count"]) * multiplier)
for r in project_values["data"]
]
)

# Get tag counts for our top tags. Fetching them individually
# allows snuba to leverage promoted tags better and enables us to get
# the value count we want.
individual_tags = []
aggregate_tags = []
for i, tag in enumerate(top_tags):
if tag == "environment":
# Add here tags that you want to be individual
individual_tags.append(tag)
elif i >= len(top_tags) - TOP_KEYS_DEFAULT_LIMIT:
aggregate_tags.append(tag)
else:
individual_tags.append(tag)

with sentry_sdk.start_span(
op="discover.discover", description="facets.individual_tags"
) as span:
span.set_data("tag_count", len(individual_tags))
for tag_name in individual_tags:
tag = f"tags[{tag_name}]"
tag_value_builder = QueryBuilder(
Dataset.Discover,
params,
query=query,
selected_columns=["count()", tag],
orderby=["-count()"],
limit=TOP_VALUES_DEFAULT_LIMIT,
# Ensures Snuba will not apply FINAL
turbo=sample_rate is not None,
sample_rate=sample_rate,
)
tag_values = raw_snql_query(tag_value_builder.get_snql_query(), referrer=referrer)
results.extend(
[
FacetResult(tag_name, r[tag], int(r["count"]) * multiplier)
for r in tag_values["data"]
]
)

if aggregate_tags:
with sentry_sdk.start_span(op="discover.discover", description="facets.aggregate_tags"):
aggregate_value_builder = QueryBuilder(
Dataset.Discover,
params,
query=(query if query is not None else "")
+ f" tags_key:[{','.join(aggregate_tags)}]",
selected_columns=["count()", "tags_key", "tags_value"],
orderby=["tags_key", "-count()"],
limitby=("tags_key", TOP_VALUES_DEFAULT_LIMIT),
# Ensures Snuba will not apply FINAL
turbo=sample_rate is not None,
sample_rate=sample_rate,
)
aggregate_values = raw_snql_query(
aggregate_value_builder.get_snql_query(), referrer=referrer
)
results.extend(
[
FacetResult(r["tags_key"], r["tags_value"], int(r["count"]) * multiplier)
for r in aggregate_values["data"]
]
)

return results

with sentry_sdk.start_span(
op="discover.discover", description="facets.filter_transform"
) as span:
Expand Down
4 changes: 3 additions & 1 deletion src/sentry/snuba/events.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,9 @@ class Columns(Enum):
TAGS_KEY = Column("events.tags.key", "tags.key", "tags.key", "tags.key", "tags.key")
TAGS_VALUE = Column("events.tags.value", "tags.value", "tags.value", "tags.value", "tags.value")
TAGS_KEYS = Column("events.tags_key", "tags_key", "tags_key", "tags_key", "tags_key")
TAGS_VALUES = Column("events.tags_value", "tags_value", "tags_value", None, "tags_value")
TAGS_VALUES = Column(
"events.tags_value", "tags_value", "tags_value", "tags_value", "tags_value"
)
TRANSACTION = Column(
"events.transaction", "transaction", "transaction_name", "transaction", "transaction"
)
Expand Down
30 changes: 30 additions & 0 deletions tests/sentry/search/events/test_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -455,3 +455,33 @@ def test_array_join_clause(self):
)
assert query.array_join == Column("spans.op")
query.get_snql_query().validate()

def test_sample_rate(self):
query = QueryBuilder(
Dataset.Discover,
self.params,
"",
selected_columns=[
"count()",
],
sample_rate=0.1,
)
assert query.sample_rate == 0.1
snql_query = query.get_snql_query()
snql_query.validate()
assert snql_query.match.sample == 0.1

def test_turbo(self):
query = QueryBuilder(
Dataset.Discover,
self.params,
"",
selected_columns=[
"count()",
],
turbo=True,
)
assert query.turbo.value
snql_query = query.get_snql_query()
snql_query.validate()
assert snql_query.turbo.value
26 changes: 15 additions & 11 deletions tests/sentry/snuba/test_discover.py
Original file line number Diff line number Diff line change
Expand Up @@ -6212,12 +6212,16 @@ def setUp(self):
def test_invalid_query(self):
with pytest.raises(InvalidSearchQuery):
discover.get_facets(
"\n", {"project_id": [self.project.id], "end": self.min_ago, "start": self.day_ago}
"\n",
{"project_id": [self.project.id], "end": self.min_ago, "start": self.day_ago},
"testing.get-facets-test",
)

def test_no_results(self):
results = discover.get_facets(
"", {"project_id": [self.project.id], "end": self.min_ago, "start": self.day_ago}
"",
{"project_id": [self.project.id], "end": self.min_ago, "start": self.day_ago},
"testing.get-facets-test",
)
assert results == []

Expand All @@ -6241,7 +6245,7 @@ def test_single_project(self):
project_id=self.project.id,
)
params = {"project_id": [self.project.id], "start": self.day_ago, "end": self.min_ago}
result = discover.get_facets("", params)
result = discover.get_facets("", params, "testing.get-facets-test")
assert len(result) == 5
assert {r.key for r in result} == {"color", "paying", "level"}
assert {r.value for r in result} == {"red", "blue", "1", "0", "error"}
Expand All @@ -6268,7 +6272,7 @@ def test_project_filter(self):
project_id=other_project.id,
)
params = {"project_id": [self.project.id], "start": self.day_ago, "end": self.min_ago}
result = discover.get_facets("", params)
result = discover.get_facets("", params, "testing.get-facets-test")
keys = {r.key for r in result}
assert keys == {"color", "level"}

Expand All @@ -6278,7 +6282,7 @@ def test_project_filter(self):
"start": self.day_ago,
"end": self.min_ago,
}
result = discover.get_facets("", params)
result = discover.get_facets("", params, "testing.get-facets-test")
keys = {r.key for r in result}
assert keys == {"level", "toy", "color", "project"}

Expand All @@ -6297,7 +6301,7 @@ def test_environment_promoted_tag(self):
project_id=self.project.id,
)
params = {"project_id": [self.project.id], "start": self.day_ago, "end": self.min_ago}
result = discover.get_facets("", params)
result = discover.get_facets("", params, "testing.get-facets-test")
keys = {r.key for r in result}
assert keys == {"environment", "level"}
assert {None, "prod", "staging"} == {f.value for f in result if f.key == "environment"}
Expand All @@ -6323,12 +6327,12 @@ def test_query_string(self):
project_id=self.project.id,
)
params = {"project_id": [self.project.id], "start": self.day_ago, "end": self.min_ago}
result = discover.get_facets("bad", params)
result = discover.get_facets("bad", params, "testing.get-facets-test")
keys = {r.key for r in result}
assert "color" in keys
assert "toy" not in keys

result = discover.get_facets("color:red", params)
result = discover.get_facets("color:red", params, "testing.get-facets-test")
keys = {r.key for r in result}
assert "color" in keys
assert "toy" not in keys
Expand All @@ -6353,12 +6357,12 @@ def test_query_string_with_aggregate_condition(self):
project_id=self.project.id,
)
params = {"project_id": [self.project.id], "start": self.day_ago, "end": self.min_ago}
result = discover.get_facets("bad", params)
result = discover.get_facets("bad", params, "testing.get-facets-test")
keys = {r.key for r in result}
assert "color" in keys
assert "toy" not in keys

result = discover.get_facets("color:red p95():>1", params)
result = discover.get_facets("color:red p95():>1", params, "testing.get-facets-test")
keys = {r.key for r in result}
assert "color" in keys
assert "toy" not in keys
Expand All @@ -6383,7 +6387,7 @@ def test_date_params(self):
project_id=self.project.id,
)
params = {"project_id": [self.project.id], "start": self.day_ago, "end": self.min_ago}
result = discover.get_facets("", params)
result = discover.get_facets("", params, "testing.get-facets-test")
keys = {r.key for r in result}
assert "color" in keys
assert "toy" not in keys
Expand Down
Loading

0 comments on commit 0128740

Please sign in to comment.