Skip to content

Commit

Permalink
Improved IN filter error messages and tests (#793)
Browse files Browse the repository at this point in the history
  • Loading branch information
vicilliar authored Mar 27, 2024
1 parent 1a9ca83 commit 9302d98
Show file tree
Hide file tree
Showing 3 changed files with 67 additions and 47 deletions.
13 changes: 11 additions & 2 deletions src/marqo/core/search/search_filter.py
Original file line number Diff line number Diff line change
Expand Up @@ -238,7 +238,12 @@ def _term_divider_is_IN(self, i: int, filter_string: str) -> bool:
Given 'i' and the full filter string, determine if 'i' is at the beginning of the IN term divider.
If any of the spaces or parenthesis are missing, this will return False.
"""
return filter_string[i:i + len(self.IN_TERM_DIVIDER)].upper() == self.IN_TERM_DIVIDER
candidate_substring = filter_string[i:i + len(self.IN_TERM_DIVIDER)].upper()
# Extra check if substring has " IN " but is missing the open parenthesis
if candidate_substring[:4] == ' IN ' and candidate_substring[4] != '(':
self._error("Expected ( after ' IN '", filter_string, i)

return candidate_substring == self.IN_TERM_DIVIDER

def _append_to_term_value(self, c: str):
"""
Expand Down Expand Up @@ -394,8 +399,8 @@ def parse(self, filter_string: str) -> SearchFilter:
self._current_raw_token.append(c)
escape = True
elif c == ' ':
# Found the ' IN ' operator. Look for a list starting with '(' on the next pass.
if self._term_divider_is_IN(i, filter_string):
# Found the ' IN ' operator. Look for a list starting with '(' on the next pass.
self._read_term_value = True
self._term_type = MarqoFilterStringParser._TermType.In
self._term_value = [[]]
Expand All @@ -407,7 +412,11 @@ def parse(self, filter_string: str) -> SearchFilter:
# Skip the next 4 characters (they were used in ' IN (' operator).
i += len(self.IN_TERM_DIVIDER) - 1

elif not self._reached_term_end and self._term_type == MarqoFilterStringParser._TermType.In:
# Found UNGROUPED white space in IN term list. This should be an error.
self._error('Unexpected white space in IN term list', filter_string, i)
else:
# Found the space AFTER a full term/token.
self._push_token(
stack,
filter_string,
Expand Down
2 changes: 1 addition & 1 deletion src/marqo/version.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
__version__ = "2.3.0"
__version__ = "2.4.0"


def get_version() -> str:
Expand Down
99 changes: 55 additions & 44 deletions tests/core/search/test_search_filter.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ def test_parse_successful(self):
right=Not(EqualityTerm('c', '3', 'c:3'))
)
),
'extra parantheses'
'extra parentheses'
),
(
'a:1 AND (b:2 OR c:3)',
Expand Down Expand Up @@ -247,15 +247,6 @@ def test_parse_successful(self):
)),
"Parenthesis around equality and IN terms"
),
(
"((float_field_1:[0 TO 1])) AND ((text_field_1 in ((some text))))",
SearchFilter(
root=And(
left=RangeTerm('float_field_1', 0, 1, 'float_field_1:[0 TO 1]'),
right=InTerm('text_field_1', ['some text'], 'text_field_1 IN ((some text))')
)),
"Even more parenthesis around equality and IN terms"
),
(
"((float_field_1:[0 TO 1])) AND ((text_field_1 in ((some text)) OR text_field_2 IN (1,2,3)))",
SearchFilter(
Expand All @@ -268,6 +259,23 @@ def test_parse_successful(self):
)),
"Many parenthesis, nested groupings"
),
# A bit of everything
(
'(a:1 AND NOT (b:[1 TO 10] OR (c IN (x, y, (hello world)))))',
SearchFilter(
root=And(
left=EqualityTerm('a', '1', 'a:1'),
right=Not(
Or(
left=RangeTerm('b', 1, 10, 'b:[1 TO 10]'),
right=InTerm('c', ['x', 'y', 'hello world'], 'c IN (x,y,(hello world))')
)
)
)
),
'Complex filter string, all types'
)

]

for filter_string, expected_filter, msg in test_cases:
Expand All @@ -281,49 +289,52 @@ def test_parse_malformedString_fails(self):
# modifier not valid after term, after modifier, at the end of exp, end of string
# after modifier, can only have a term
test_cases = [
('AND a:1 OR b:2', 'string starts with operator'),
('a:(b))', 'extra parenthesis after grouped text'),
('a:1 AND b:2 (OR c:3)', 'expression starts with operator'),
('a:1 AND b:2 OR', 'string ends with operator'),
('a:1 AND (b:2 OR c:3 AND) OR e:5', 'expression ends with operator'),
('a:1 AND b:2 OR OR c:3', 'operator after operator'),
('a:1 AND b:2 OR NOT OR c:3', 'operator after modifier'),
('a:1 AND b:2 OR NOT', 'string ends with a modifier'),
('a:1 AND (b:2 OR c:3 NOT) OR e:5', 'expression ends with a modifier'),
('a:1 AND b:2 OR NOT NOT c:3', 'modifier after modifier'),
('a:1 NOT a:1', 'modifier after term'),
('a:1 a:1', 'term after term'),
('(a:1 AND b:2) b:2', 'term after expression'),
('(a:1 AND b:2)(c:3 AND d:4)', 'expression after expression'),
('a:1 (c:3 AND d:4)', 'expression after term'),
('a:1 AND b:2)', 'expression not opened'),
('(a:1 AND b:2', 'expression not closed'),
('', 'empty expression'),
(' ', 'empty expression'),
(' ', 'empty expression'),
('(', '('),
(')', ')'),
('()', '()'),
('a:1 AND (b:2 OR (c:3 AND (d:4 OR e:5)) OR d:6', 'imbalanced parentheses, not closed'),
('a:1 AND b:2 OR (c:3 AND (d:4 OR e:5))) OR d:6', 'imbalanced parentheses, not opened'),
('AND a:1 OR b:2', 'Unexpected AND', 'string starts with operator'),
('a:(b))', 'Unexpected )', 'extra parenthesis after grouped text'),
('a:1 AND b:2 (OR c:3)', 'Unexpected OR', 'expression starts with operator'),
('a:1 AND b:2 OR', 'but found OR', 'string ends with operator'),
('a:1 AND (b:2 OR c:3 AND) OR e:5', 'but found AND', 'expression ends with operator'),
('a:1 AND b:2 OR OR c:3', 'but found OR', 'operator after operator'),
('a:1 AND b:2 OR NOT OR c:3', 'but found OR', 'operator after modifier'),
('a:1 AND b:2 OR NOT', 'but found NOT', 'string ends with a modifier'),
('a:1 AND (b:2 OR c:3 NOT) OR e:5', "Unexpected modifier 'NOT'", 'expression ends with a modifier'),
('a:1 AND b:2 OR NOT NOT c:3', "Unexpected modifier 'NOT'", 'modifier after modifier'),
('a:1 NOT a:1', "Unexpected modifier 'NOT'", 'modifier after term'),
('a:1 a:1', 'Expected an operator', 'term after term'),
('(a:1 AND b:2) b:2', 'Expected an operator', 'term after expression'),
('(a:1 AND b:2)(c:3 AND d:4)', 'Unexpected expression ending', 'expression after expression'),
('a:1 (c:3 AND d:4)', 'Unexpected expression ending', 'expression after term'),
('a:1 AND b:2)', 'Unexpected )', 'expression not opened'),
('(a:1 AND b:2', 'Unbalanced parentheses', 'expression not closed'),
('', 'empty filter string', 'empty expression'),
(' ', 'Empty filter string', 'empty expression'),
(' ', 'Empty filter string', 'empty expression'),
('(', 'Unbalanced parentheses', 'unbalanced parenthesis'),
(')', 'Unexpected )', 'unexpected )'),
('()', 'Empty expression', '()'),
('a:1 AND (b:2 OR (c:3 AND (d:4 OR e:5)) OR d:6', 'Unbalanced parentheses', 'imbalanced parentheses, not closed'),
('a:1 AND b:2 OR (c:3 AND (d:4 OR e:5))) OR d:6', 'Unexpected )', 'imbalanced parentheses, not opened'),

# In term tests
('a IN (1, 2 OR 3)', 'OR in IN term'),
('a IN (1, 2 AND 3)', 'AND in IN term'),
('a IN (1, 2 NOT 3)', 'NOT in IN term'),
('a IN (1, 2, 3, [0 TO 1])', 'RANGE in IN term'),
('a IN (1, 2, 3))', 'extra parenthesis in IN term'),
('a IN (val1, val 2, val3)', 'ungrouped space in IN term'),
('a IN 1, 2, 3)', 'IN term with no opening parenthesis'),
('a IN (1, 2 OR 3)', 'Unexpected white space', 'OR in IN term'),
('a IN (1, 2 AND 3)', 'Unexpected white space', 'AND in IN term'),
('a IN (1, 2 NOT 3)', 'Unexpected white space', 'NOT in IN term'),
('a IN (1, 2, 3, [0 TO 1])', 'Unexpected [ after IN operator', 'RANGE in IN term'),
('a IN (1, 2, 3))', 'Unexpected )', 'extra parenthesis in IN term'),
('a IN (val1, val 2, val3)', 'Unexpected white space', 'ungrouped space in IN term'),
('a IN 1, 2, 3)', 'Expected (', 'IN term with no opening parenthesis'),
]

for filter_string, msg in test_cases:
for filter_string, expected_error_msg, msg in test_cases:
with self.subTest(msg):
with self.assertRaises(FilterStringParsingError,
msg=f"Did not raise error on malformed filter string '{filter_string}'"):
msg=f"Did not raise error on malformed filter string '{filter_string}'") as cm:
parser = MarqoFilterStringParser()
parser.parse(filter_string)

actual_error = str(cm.exception)
self.assertIn(expected_error_msg, actual_error)

def test_equality(self):
"""
Test all equality functions
Expand Down

0 comments on commit 9302d98

Please sign in to comment.