Skip to content

Commit

Permalink
fix: Fix queries on columns with different underlying types (#2113)
Browse files Browse the repository at this point in the history
Queries involving string columns stored internally as UUID did not behave
properly when it included a condition involving one of the >, >=, <. <=,
LIKE or NOT LIKE operators.

The BaseTypeConverter contained an optimization that attempted to use
the internal column for the comparison however this is not correct
as the comparison returns completely different results depending on
the type.

This change drops the optimizations on all comparisons except =, !=,
IS NULL and IS NOT NULL. Queries containing a mix of optimizable and
non optimizable conditions are treated as unoptimized and the
external type is used.
  • Loading branch information
lynnagara authored Sep 27, 2021
1 parent df36224 commit 92e15e4
Show file tree
Hide file tree
Showing 6 changed files with 144 additions and 10 deletions.
60 changes: 54 additions & 6 deletions snuba/query/processors/type_converters/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

from snuba.clickhouse.processors import QueryProcessor
from snuba.clickhouse.query import Query
from snuba.query.conditions import FUNCTION_TO_OPERATOR, ConditionFunctions
from snuba.query.conditions import ConditionFunctions
from snuba.query.exceptions import ValidationException
from snuba.query.expressions import Column, Expression, FunctionCall, Literal
from snuba.query.matchers import Any as AnyMatch
Expand All @@ -19,19 +19,46 @@ class ColumnTypeError(ValidationException):


class BaseTypeConverter(QueryProcessor, ABC):
def __init__(self, columns: Set[str]):
def __init__(self, columns: Set[str], optimize_ordering: bool = False):
self.columns = columns
self.optimize_ordering = optimize_ordering
column_match = Or([String(col) for col in columns])

literal = Param("literal", LiteralMatch(AnyMatch(str)))

ordering_operators = (
ConditionFunctions.GT,
ConditionFunctions.GTE,
ConditionFunctions.LT,
ConditionFunctions.LTE,
)

operator = Param(
"operator",
Or(
[
String(op)
for op in FUNCTION_TO_OPERATOR
if op not in (ConditionFunctions.IN, ConditionFunctions.NOT_IN)
for op in (
ConditionFunctions.EQ,
ConditionFunctions.NEQ,
ConditionFunctions.IS_NULL,
ConditionFunctions.IS_NOT_NULL,
*(ordering_operators if self.optimize_ordering else ()),
)
]
),
)

unoptimizable_operator = Param(
"operator",
Or(
[
String(op)
for op in (
ConditionFunctions.LIKE,
ConditionFunctions.NOT_LIKE,
*(() if self.optimize_ordering else ordering_operators),
)
]
),
)
Expand Down Expand Up @@ -62,16 +89,26 @@ def __init__(self, columns: Set[str]):
),
)

self.__unoptimizable_condition_matcher = Or(
[
FunctionCallMatch(unoptimizable_operator, (literal, col)),
FunctionCallMatch(unoptimizable_operator, (col, literal)),
]
)

def process_query(self, query: Query, request_settings: RequestSettings) -> None:
query.transform_expressions(
self._process_expressions, skip_transform_condition=True
)

condition = query.get_condition()
if condition is not None:
processed = condition.transform(self.__process_optimizable_condition)
if processed == condition:
if self.__contains_unoptimizable_condition(condition):
processed = condition.transform(self._process_expressions)
else:
processed = condition.transform(self.__process_optimizable_condition)
if condition == processed:
processed = processed.transform(self._process_expressions)

query.set_ast_condition(processed)

Expand All @@ -81,6 +118,17 @@ def __strip_column_alias(self, exp: Expression) -> Expression:
alias=None, table_name=exp.table_name, column_name=exp.column_name
)

def __contains_unoptimizable_condition(self, exp: Expression) -> bool:
"""
Returns true if there is an unoptimizable condition, otherwise false.
"""
for e in exp:
match = self.__unoptimizable_condition_matcher.match(e)
if match is not None:
return True

return False

def __process_optimizable_condition(self, exp: Expression) -> Expression:
def assert_literal(lit: Expression) -> Literal:
assert isinstance(lit, Literal)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
class FixedStringArrayColumnProcessor(BaseTypeConverter):
def __init__(self, columns: Set[str], fixed_length: int):
self.fixed_length = fixed_length
super().__init__(columns)
super().__init__(columns, optimize_ordering=True)

def _translate_literal(self, exp: Literal) -> Expression:
try:
Expand Down
Original file line number Diff line number Diff line change
@@ -1,8 +1,13 @@
from typing import Set

from snuba.query.expressions import Column, Expression, FunctionCall, Literal
from snuba.query.processors.type_converters import BaseTypeConverter, ColumnTypeError


class HexIntColumnProcessor(BaseTypeConverter):
def __init__(self, columns: Set[str]) -> None:
super().__init__(columns, optimize_ordering=True)

def _translate_literal(self, exp: Literal) -> Literal:
try:
assert isinstance(exp.value, str)
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import uuid
from typing import Set

from snuba.query.expressions import (
Argument,
Expand All @@ -12,6 +13,9 @@


class UUIDArrayColumnProcessor(BaseTypeConverter):
def __init__(self, columns: Set[str]) -> None:
super().__init__(columns)

def _translate_literal(self, exp: Literal) -> Expression:
try:
assert isinstance(exp.value, str)
Expand Down
Original file line number Diff line number Diff line change
@@ -1,10 +1,14 @@
import uuid
from typing import Set

from snuba.query.expressions import Column, Expression, FunctionCall, Literal
from snuba.query.processors.type_converters import BaseTypeConverter, ColumnTypeError


class UUIDColumnProcessor(BaseTypeConverter):
def __init__(self, columns: Set[str]) -> None:
super().__init__(columns, optimize_ordering=False)

def _translate_literal(self, exp: Literal) -> Literal:
try:
assert isinstance(exp.value, str)
Expand Down
79 changes: 76 additions & 3 deletions tests/query/processors/test_uuid_column_processor.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,18 @@
import pytest
import uuid

import pytest

from snuba.clickhouse.columns import ColumnSet
from snuba.clickhouse.formatter.expression import ClickhouseExpressionFormatter
from snuba.clickhouse.query import Query
from snuba.query import SelectedExpression
from snuba.query.conditions import (
binary_condition,
BooleanFunctions,
ConditionFunctions,
binary_condition,
)
from snuba.query.data_source.simple import Table
from snuba.query.expressions import Column, Expression, FunctionCall, Literal
from snuba.clickhouse.query import Query
from snuba.query.processors.type_converters import ColumnTypeError
from snuba.query.processors.type_converters.uuid_column_processor import (
UUIDColumnProcessor,
Expand Down Expand Up @@ -107,6 +108,78 @@
"equals(column1, 'a7d67cf7-9677-4551-a95b-e6543cacd459')",
id="equals(column1, 'a7d67cf7-9677-4551-a95b-e6543cacd459')",
),
pytest.param(
binary_condition(
ConditionFunctions.GTE,
Column(None, None, "column1"),
Literal(None, "a7d67cf796774551a95be6543cacd459"),
),
binary_condition(
ConditionFunctions.GTE,
FunctionCall(
None,
"replaceAll",
(
FunctionCall(None, "toString", (Column(None, None, "column1"),),),
Literal(None, "-"),
Literal(None, ""),
),
),
Literal(None, "a7d67cf796774551a95be6543cacd459"),
),
"greaterOrEquals(replaceAll(toString(column1), '-', ''), 'a7d67cf796774551a95be6543cacd459')",
id="Use string UUID if GTE",
),
pytest.param(
binary_condition(
BooleanFunctions.OR,
binary_condition(
ConditionFunctions.EQ,
Column(None, None, "column1"),
Literal(None, "a7d67cf796774551a95be6543cacd458"),
),
binary_condition(
ConditionFunctions.GTE,
Column(None, None, "column1"),
Literal(None, "a7d67cf796774551a95be6543cacd459"),
),
),
binary_condition(
BooleanFunctions.OR,
binary_condition(
ConditionFunctions.EQ,
FunctionCall(
None,
"replaceAll",
(
FunctionCall(
None, "toString", (Column(None, None, "column1"),),
),
Literal(None, "-"),
Literal(None, ""),
),
),
Literal(None, "a7d67cf796774551a95be6543cacd458"),
),
binary_condition(
ConditionFunctions.GTE,
FunctionCall(
None,
"replaceAll",
(
FunctionCall(
None, "toString", (Column(None, None, "column1"),),
),
Literal(None, "-"),
Literal(None, ""),
),
),
Literal(None, "a7d67cf796774551a95be6543cacd459"),
),
),
"(equals(replaceAll(toString(column1), '-', ''), 'a7d67cf796774551a95be6543cacd458') OR greaterOrEquals(replaceAll(toString(column1), '-', ''), 'a7d67cf796774551a95be6543cacd459'))",
id="Both optimizable and unoptimizable conditions in query",
),
pytest.param(
binary_condition(
ConditionFunctions.EQ,
Expand Down

0 comments on commit 92e15e4

Please sign in to comment.