From 92e15e47b180897ef974609a86bf215d47a6321d Mon Sep 17 00:00:00 2001 From: Lyn Nagara Date: Mon, 27 Sep 2021 13:49:11 -0700 Subject: [PATCH] fix: Fix queries on columns with different underlying types (#2113) Queries involving string columns stored internally as UUID did not behave properly when it included a condition involving one of the >, >=, <. <=, LIKE or NOT LIKE operators. The BaseTypeConverter contained an optimization that attempted to use the internal column for the comparison however this is not correct as the comparison returns completely different results depending on the type. This change drops the optimizations on all comparisons except =, !=, IS NULL and IS NOT NULL. Queries containing a mix of optimizable and non optimizable conditions are treated as unoptimized and the external type is used. --- .../processors/type_converters/__init__.py | 60 ++++++++++++-- .../fixedstring_array_column_processor.py | 2 +- .../hexint_column_processor.py | 5 ++ .../uuid_array_column_processor.py | 4 + .../type_converters/uuid_column_processor.py | 4 + .../processors/test_uuid_column_processor.py | 79 ++++++++++++++++++- 6 files changed, 144 insertions(+), 10 deletions(-) diff --git a/snuba/query/processors/type_converters/__init__.py b/snuba/query/processors/type_converters/__init__.py index 7d482dcf5df..9e72d1d60db 100644 --- a/snuba/query/processors/type_converters/__init__.py +++ b/snuba/query/processors/type_converters/__init__.py @@ -3,7 +3,7 @@ from snuba.clickhouse.processors import QueryProcessor from snuba.clickhouse.query import Query -from snuba.query.conditions import FUNCTION_TO_OPERATOR, ConditionFunctions +from snuba.query.conditions import ConditionFunctions from snuba.query.exceptions import ValidationException from snuba.query.expressions import Column, Expression, FunctionCall, Literal from snuba.query.matchers import Any as AnyMatch @@ -19,19 +19,46 @@ class ColumnTypeError(ValidationException): class BaseTypeConverter(QueryProcessor, ABC): - def __init__(self, columns: Set[str]): + def __init__(self, columns: Set[str], optimize_ordering: bool = False): self.columns = columns + self.optimize_ordering = optimize_ordering column_match = Or([String(col) for col in columns]) literal = Param("literal", LiteralMatch(AnyMatch(str))) + ordering_operators = ( + ConditionFunctions.GT, + ConditionFunctions.GTE, + ConditionFunctions.LT, + ConditionFunctions.LTE, + ) + operator = Param( "operator", Or( [ String(op) - for op in FUNCTION_TO_OPERATOR - if op not in (ConditionFunctions.IN, ConditionFunctions.NOT_IN) + for op in ( + ConditionFunctions.EQ, + ConditionFunctions.NEQ, + ConditionFunctions.IS_NULL, + ConditionFunctions.IS_NOT_NULL, + *(ordering_operators if self.optimize_ordering else ()), + ) + ] + ), + ) + + unoptimizable_operator = Param( + "operator", + Or( + [ + String(op) + for op in ( + ConditionFunctions.LIKE, + ConditionFunctions.NOT_LIKE, + *(() if self.optimize_ordering else ordering_operators), + ) ] ), ) @@ -62,6 +89,13 @@ def __init__(self, columns: Set[str]): ), ) + self.__unoptimizable_condition_matcher = Or( + [ + FunctionCallMatch(unoptimizable_operator, (literal, col)), + FunctionCallMatch(unoptimizable_operator, (col, literal)), + ] + ) + def process_query(self, query: Query, request_settings: RequestSettings) -> None: query.transform_expressions( self._process_expressions, skip_transform_condition=True @@ -69,9 +103,12 @@ def process_query(self, query: Query, request_settings: RequestSettings) -> None condition = query.get_condition() if condition is not None: - processed = condition.transform(self.__process_optimizable_condition) - if processed == condition: + if self.__contains_unoptimizable_condition(condition): processed = condition.transform(self._process_expressions) + else: + processed = condition.transform(self.__process_optimizable_condition) + if condition == processed: + processed = processed.transform(self._process_expressions) query.set_ast_condition(processed) @@ -81,6 +118,17 @@ def __strip_column_alias(self, exp: Expression) -> Expression: alias=None, table_name=exp.table_name, column_name=exp.column_name ) + def __contains_unoptimizable_condition(self, exp: Expression) -> bool: + """ + Returns true if there is an unoptimizable condition, otherwise false. + """ + for e in exp: + match = self.__unoptimizable_condition_matcher.match(e) + if match is not None: + return True + + return False + def __process_optimizable_condition(self, exp: Expression) -> Expression: def assert_literal(lit: Expression) -> Literal: assert isinstance(lit, Literal) diff --git a/snuba/query/processors/type_converters/fixedstring_array_column_processor.py b/snuba/query/processors/type_converters/fixedstring_array_column_processor.py index 5a38c18e228..6cbe958b1d3 100644 --- a/snuba/query/processors/type_converters/fixedstring_array_column_processor.py +++ b/snuba/query/processors/type_converters/fixedstring_array_column_processor.py @@ -7,7 +7,7 @@ class FixedStringArrayColumnProcessor(BaseTypeConverter): def __init__(self, columns: Set[str], fixed_length: int): self.fixed_length = fixed_length - super().__init__(columns) + super().__init__(columns, optimize_ordering=True) def _translate_literal(self, exp: Literal) -> Expression: try: diff --git a/snuba/query/processors/type_converters/hexint_column_processor.py b/snuba/query/processors/type_converters/hexint_column_processor.py index 9aab32318ee..2efc1bb78ed 100644 --- a/snuba/query/processors/type_converters/hexint_column_processor.py +++ b/snuba/query/processors/type_converters/hexint_column_processor.py @@ -1,8 +1,13 @@ +from typing import Set + from snuba.query.expressions import Column, Expression, FunctionCall, Literal from snuba.query.processors.type_converters import BaseTypeConverter, ColumnTypeError class HexIntColumnProcessor(BaseTypeConverter): + def __init__(self, columns: Set[str]) -> None: + super().__init__(columns, optimize_ordering=True) + def _translate_literal(self, exp: Literal) -> Literal: try: assert isinstance(exp.value, str) diff --git a/snuba/query/processors/type_converters/uuid_array_column_processor.py b/snuba/query/processors/type_converters/uuid_array_column_processor.py index 1957b585e3e..e2e63e1624d 100644 --- a/snuba/query/processors/type_converters/uuid_array_column_processor.py +++ b/snuba/query/processors/type_converters/uuid_array_column_processor.py @@ -1,4 +1,5 @@ import uuid +from typing import Set from snuba.query.expressions import ( Argument, @@ -12,6 +13,9 @@ class UUIDArrayColumnProcessor(BaseTypeConverter): + def __init__(self, columns: Set[str]) -> None: + super().__init__(columns) + def _translate_literal(self, exp: Literal) -> Expression: try: assert isinstance(exp.value, str) diff --git a/snuba/query/processors/type_converters/uuid_column_processor.py b/snuba/query/processors/type_converters/uuid_column_processor.py index e274737ddeb..cf1c6ed51b6 100644 --- a/snuba/query/processors/type_converters/uuid_column_processor.py +++ b/snuba/query/processors/type_converters/uuid_column_processor.py @@ -1,10 +1,14 @@ import uuid +from typing import Set from snuba.query.expressions import Column, Expression, FunctionCall, Literal from snuba.query.processors.type_converters import BaseTypeConverter, ColumnTypeError class UUIDColumnProcessor(BaseTypeConverter): + def __init__(self, columns: Set[str]) -> None: + super().__init__(columns, optimize_ordering=False) + def _translate_literal(self, exp: Literal) -> Literal: try: assert isinstance(exp.value, str) diff --git a/tests/query/processors/test_uuid_column_processor.py b/tests/query/processors/test_uuid_column_processor.py index 90107008a26..6d581c031c8 100644 --- a/tests/query/processors/test_uuid_column_processor.py +++ b/tests/query/processors/test_uuid_column_processor.py @@ -1,17 +1,18 @@ -import pytest import uuid +import pytest + from snuba.clickhouse.columns import ColumnSet from snuba.clickhouse.formatter.expression import ClickhouseExpressionFormatter +from snuba.clickhouse.query import Query from snuba.query import SelectedExpression from snuba.query.conditions import ( - binary_condition, BooleanFunctions, ConditionFunctions, + binary_condition, ) from snuba.query.data_source.simple import Table from snuba.query.expressions import Column, Expression, FunctionCall, Literal -from snuba.clickhouse.query import Query from snuba.query.processors.type_converters import ColumnTypeError from snuba.query.processors.type_converters.uuid_column_processor import ( UUIDColumnProcessor, @@ -107,6 +108,78 @@ "equals(column1, 'a7d67cf7-9677-4551-a95b-e6543cacd459')", id="equals(column1, 'a7d67cf7-9677-4551-a95b-e6543cacd459')", ), + pytest.param( + binary_condition( + ConditionFunctions.GTE, + Column(None, None, "column1"), + Literal(None, "a7d67cf796774551a95be6543cacd459"), + ), + binary_condition( + ConditionFunctions.GTE, + FunctionCall( + None, + "replaceAll", + ( + FunctionCall(None, "toString", (Column(None, None, "column1"),),), + Literal(None, "-"), + Literal(None, ""), + ), + ), + Literal(None, "a7d67cf796774551a95be6543cacd459"), + ), + "greaterOrEquals(replaceAll(toString(column1), '-', ''), 'a7d67cf796774551a95be6543cacd459')", + id="Use string UUID if GTE", + ), + pytest.param( + binary_condition( + BooleanFunctions.OR, + binary_condition( + ConditionFunctions.EQ, + Column(None, None, "column1"), + Literal(None, "a7d67cf796774551a95be6543cacd458"), + ), + binary_condition( + ConditionFunctions.GTE, + Column(None, None, "column1"), + Literal(None, "a7d67cf796774551a95be6543cacd459"), + ), + ), + binary_condition( + BooleanFunctions.OR, + binary_condition( + ConditionFunctions.EQ, + FunctionCall( + None, + "replaceAll", + ( + FunctionCall( + None, "toString", (Column(None, None, "column1"),), + ), + Literal(None, "-"), + Literal(None, ""), + ), + ), + Literal(None, "a7d67cf796774551a95be6543cacd458"), + ), + binary_condition( + ConditionFunctions.GTE, + FunctionCall( + None, + "replaceAll", + ( + FunctionCall( + None, "toString", (Column(None, None, "column1"),), + ), + Literal(None, "-"), + Literal(None, ""), + ), + ), + Literal(None, "a7d67cf796774551a95be6543cacd459"), + ), + ), + "(equals(replaceAll(toString(column1), '-', ''), 'a7d67cf796774551a95be6543cacd458') OR greaterOrEquals(replaceAll(toString(column1), '-', ''), 'a7d67cf796774551a95be6543cacd459'))", + id="Both optimizable and unoptimizable conditions in query", + ), pytest.param( binary_condition( ConditionFunctions.EQ,