diff --git a/snuba/query/processors/type_converters/__init__.py b/snuba/query/processors/type_converters/__init__.py index 7d482dcf5df..9e72d1d60db 100644 --- a/snuba/query/processors/type_converters/__init__.py +++ b/snuba/query/processors/type_converters/__init__.py @@ -3,7 +3,7 @@ from snuba.clickhouse.processors import QueryProcessor from snuba.clickhouse.query import Query -from snuba.query.conditions import FUNCTION_TO_OPERATOR, ConditionFunctions +from snuba.query.conditions import ConditionFunctions from snuba.query.exceptions import ValidationException from snuba.query.expressions import Column, Expression, FunctionCall, Literal from snuba.query.matchers import Any as AnyMatch @@ -19,19 +19,46 @@ class ColumnTypeError(ValidationException): class BaseTypeConverter(QueryProcessor, ABC): - def __init__(self, columns: Set[str]): + def __init__(self, columns: Set[str], optimize_ordering: bool = False): self.columns = columns + self.optimize_ordering = optimize_ordering column_match = Or([String(col) for col in columns]) literal = Param("literal", LiteralMatch(AnyMatch(str))) + ordering_operators = ( + ConditionFunctions.GT, + ConditionFunctions.GTE, + ConditionFunctions.LT, + ConditionFunctions.LTE, + ) + operator = Param( "operator", Or( [ String(op) - for op in FUNCTION_TO_OPERATOR - if op not in (ConditionFunctions.IN, ConditionFunctions.NOT_IN) + for op in ( + ConditionFunctions.EQ, + ConditionFunctions.NEQ, + ConditionFunctions.IS_NULL, + ConditionFunctions.IS_NOT_NULL, + *(ordering_operators if self.optimize_ordering else ()), + ) + ] + ), + ) + + unoptimizable_operator = Param( + "operator", + Or( + [ + String(op) + for op in ( + ConditionFunctions.LIKE, + ConditionFunctions.NOT_LIKE, + *(() if self.optimize_ordering else ordering_operators), + ) ] ), ) @@ -62,6 +89,13 @@ def __init__(self, columns: Set[str]): ), ) + self.__unoptimizable_condition_matcher = Or( + [ + FunctionCallMatch(unoptimizable_operator, (literal, col)), + FunctionCallMatch(unoptimizable_operator, (col, literal)), + ] + ) + def process_query(self, query: Query, request_settings: RequestSettings) -> None: query.transform_expressions( self._process_expressions, skip_transform_condition=True @@ -69,9 +103,12 @@ def process_query(self, query: Query, request_settings: RequestSettings) -> None condition = query.get_condition() if condition is not None: - processed = condition.transform(self.__process_optimizable_condition) - if processed == condition: + if self.__contains_unoptimizable_condition(condition): processed = condition.transform(self._process_expressions) + else: + processed = condition.transform(self.__process_optimizable_condition) + if condition == processed: + processed = processed.transform(self._process_expressions) query.set_ast_condition(processed) @@ -81,6 +118,17 @@ def __strip_column_alias(self, exp: Expression) -> Expression: alias=None, table_name=exp.table_name, column_name=exp.column_name ) + def __contains_unoptimizable_condition(self, exp: Expression) -> bool: + """ + Returns true if there is an unoptimizable condition, otherwise false. + """ + for e in exp: + match = self.__unoptimizable_condition_matcher.match(e) + if match is not None: + return True + + return False + def __process_optimizable_condition(self, exp: Expression) -> Expression: def assert_literal(lit: Expression) -> Literal: assert isinstance(lit, Literal) diff --git a/snuba/query/processors/type_converters/fixedstring_array_column_processor.py b/snuba/query/processors/type_converters/fixedstring_array_column_processor.py index 5a38c18e228..6cbe958b1d3 100644 --- a/snuba/query/processors/type_converters/fixedstring_array_column_processor.py +++ b/snuba/query/processors/type_converters/fixedstring_array_column_processor.py @@ -7,7 +7,7 @@ class FixedStringArrayColumnProcessor(BaseTypeConverter): def __init__(self, columns: Set[str], fixed_length: int): self.fixed_length = fixed_length - super().__init__(columns) + super().__init__(columns, optimize_ordering=True) def _translate_literal(self, exp: Literal) -> Expression: try: diff --git a/snuba/query/processors/type_converters/hexint_column_processor.py b/snuba/query/processors/type_converters/hexint_column_processor.py index 9aab32318ee..2efc1bb78ed 100644 --- a/snuba/query/processors/type_converters/hexint_column_processor.py +++ b/snuba/query/processors/type_converters/hexint_column_processor.py @@ -1,8 +1,13 @@ +from typing import Set + from snuba.query.expressions import Column, Expression, FunctionCall, Literal from snuba.query.processors.type_converters import BaseTypeConverter, ColumnTypeError class HexIntColumnProcessor(BaseTypeConverter): + def __init__(self, columns: Set[str]) -> None: + super().__init__(columns, optimize_ordering=True) + def _translate_literal(self, exp: Literal) -> Literal: try: assert isinstance(exp.value, str) diff --git a/snuba/query/processors/type_converters/uuid_array_column_processor.py b/snuba/query/processors/type_converters/uuid_array_column_processor.py index 1957b585e3e..e2e63e1624d 100644 --- a/snuba/query/processors/type_converters/uuid_array_column_processor.py +++ b/snuba/query/processors/type_converters/uuid_array_column_processor.py @@ -1,4 +1,5 @@ import uuid +from typing import Set from snuba.query.expressions import ( Argument, @@ -12,6 +13,9 @@ class UUIDArrayColumnProcessor(BaseTypeConverter): + def __init__(self, columns: Set[str]) -> None: + super().__init__(columns) + def _translate_literal(self, exp: Literal) -> Expression: try: assert isinstance(exp.value, str) diff --git a/snuba/query/processors/type_converters/uuid_column_processor.py b/snuba/query/processors/type_converters/uuid_column_processor.py index e274737ddeb..cf1c6ed51b6 100644 --- a/snuba/query/processors/type_converters/uuid_column_processor.py +++ b/snuba/query/processors/type_converters/uuid_column_processor.py @@ -1,10 +1,14 @@ import uuid +from typing import Set from snuba.query.expressions import Column, Expression, FunctionCall, Literal from snuba.query.processors.type_converters import BaseTypeConverter, ColumnTypeError class UUIDColumnProcessor(BaseTypeConverter): + def __init__(self, columns: Set[str]) -> None: + super().__init__(columns, optimize_ordering=False) + def _translate_literal(self, exp: Literal) -> Literal: try: assert isinstance(exp.value, str) diff --git a/tests/query/processors/test_uuid_column_processor.py b/tests/query/processors/test_uuid_column_processor.py index 90107008a26..6d581c031c8 100644 --- a/tests/query/processors/test_uuid_column_processor.py +++ b/tests/query/processors/test_uuid_column_processor.py @@ -1,17 +1,18 @@ -import pytest import uuid +import pytest + from snuba.clickhouse.columns import ColumnSet from snuba.clickhouse.formatter.expression import ClickhouseExpressionFormatter +from snuba.clickhouse.query import Query from snuba.query import SelectedExpression from snuba.query.conditions import ( - binary_condition, BooleanFunctions, ConditionFunctions, + binary_condition, ) from snuba.query.data_source.simple import Table from snuba.query.expressions import Column, Expression, FunctionCall, Literal -from snuba.clickhouse.query import Query from snuba.query.processors.type_converters import ColumnTypeError from snuba.query.processors.type_converters.uuid_column_processor import ( UUIDColumnProcessor, @@ -107,6 +108,78 @@ "equals(column1, 'a7d67cf7-9677-4551-a95b-e6543cacd459')", id="equals(column1, 'a7d67cf7-9677-4551-a95b-e6543cacd459')", ), + pytest.param( + binary_condition( + ConditionFunctions.GTE, + Column(None, None, "column1"), + Literal(None, "a7d67cf796774551a95be6543cacd459"), + ), + binary_condition( + ConditionFunctions.GTE, + FunctionCall( + None, + "replaceAll", + ( + FunctionCall(None, "toString", (Column(None, None, "column1"),),), + Literal(None, "-"), + Literal(None, ""), + ), + ), + Literal(None, "a7d67cf796774551a95be6543cacd459"), + ), + "greaterOrEquals(replaceAll(toString(column1), '-', ''), 'a7d67cf796774551a95be6543cacd459')", + id="Use string UUID if GTE", + ), + pytest.param( + binary_condition( + BooleanFunctions.OR, + binary_condition( + ConditionFunctions.EQ, + Column(None, None, "column1"), + Literal(None, "a7d67cf796774551a95be6543cacd458"), + ), + binary_condition( + ConditionFunctions.GTE, + Column(None, None, "column1"), + Literal(None, "a7d67cf796774551a95be6543cacd459"), + ), + ), + binary_condition( + BooleanFunctions.OR, + binary_condition( + ConditionFunctions.EQ, + FunctionCall( + None, + "replaceAll", + ( + FunctionCall( + None, "toString", (Column(None, None, "column1"),), + ), + Literal(None, "-"), + Literal(None, ""), + ), + ), + Literal(None, "a7d67cf796774551a95be6543cacd458"), + ), + binary_condition( + ConditionFunctions.GTE, + FunctionCall( + None, + "replaceAll", + ( + FunctionCall( + None, "toString", (Column(None, None, "column1"),), + ), + Literal(None, "-"), + Literal(None, ""), + ), + ), + Literal(None, "a7d67cf796774551a95be6543cacd459"), + ), + ), + "(equals(replaceAll(toString(column1), '-', ''), 'a7d67cf796774551a95be6543cacd458') OR greaterOrEquals(replaceAll(toString(column1), '-', ''), 'a7d67cf796774551a95be6543cacd459'))", + id="Both optimizable and unoptimizable conditions in query", + ), pytest.param( binary_condition( ConditionFunctions.EQ,