Skip to content

Commit

Permalink
Merge pull request #213 from marqo-ai/space_filtering_fix
Browse files Browse the repository at this point in the history
Space filtering fix
  • Loading branch information
pandu-k authored Dec 8, 2022
2 parents 0b31f02 + 000443c commit bb54a78
Show file tree
Hide file tree
Showing 2 changed files with 34 additions and 1 deletion.
6 changes: 5 additions & 1 deletion src/marqo/tensor_search/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,11 @@ def contextualise_filter(filter_string: str, simple_properties: typing.Iterable)
"""
contextualised_filter = filter_string
for field in simple_properties:
contextualised_filter = contextualised_filter.replace(f'{field}:', f'{enums.TensorField.chunks}.{field}:')
if ' ' in field:
field_with_escaped_space = field.replace(' ', '\ ')
contextualised_filter = contextualised_filter.replace(f'{field_with_escaped_space}:', f'{enums.TensorField.chunks}.{field_with_escaped_space}:')
else:
contextualised_filter = contextualised_filter.replace(f'{field}:', f'{enums.TensorField.chunks}.{field}:')
return contextualised_filter


Expand Down
29 changes: 29 additions & 0 deletions tests/tensor_search/test_search.py
Original file line number Diff line number Diff line change
Expand Up @@ -382,6 +382,35 @@ def test_filtering(self):
filter="*:*", verbose=1
)["hits"])

def test_filter_spaced_fields(self):
tensor_search.add_documents(
config=self.config, index_name=self.index_name_1, docs=[
{"abc": "some text", "other field": "baaadd", "_id": "5678", "my_string": "b"},
{"abc": "some text", "other field": "Close match hehehe", "_id": "1234", "an_int": 2},
{"abc": "some text", "other field": "Close match hehehe", "_id": "1233", "my_bool": True},
{"abc": "some text", "Floaty Field": 0.548, "_id": "344", "my_bool": True},
], auto_refresh=True)

res = tensor_search.search(
config=self.config, index_name=self.index_name_1, text='', filter="other\ field:baaadd")

assert len(res['hits']) == 1
assert res['hits'][0]['_id'] == "5678"

res_mult = tensor_search.search(
config=self.config, index_name=self.index_name_1, text='', filter="other\ field:(Close match hehehe)")
assert len(res_mult['hits']) == 2
assert res_mult['hits'][0]['_id'] in {'1234', '1233'}
assert res_mult['hits'][1]['_id'] in {'1234', '1233'}
assert res_mult['hits'][1]['_id'] != res_mult['hits'][0]['_id']

res_float = tensor_search.search(
config=self.config, index_name=self.index_name_1, text='', filter="(Floaty\ Field:[0 TO 1]) AND (abc:(some text))")
get_res = tensor_search.get_document_by_id(config=self.config, index_name=self.index_name_1, document_id='344')

assert len(res_float['hits']) == 1
assert res_float['hits'][0]['_id'] == "344"

def test_filtering_bad_syntax(self):
tensor_search.add_documents(
config=self.config, index_name=self.index_name_1, docs=[
Expand Down

0 comments on commit bb54a78

Please sign in to comment.