Skip to content

Commit

Permalink
fix existing filter tests, add in filter tests, bump version (#224)
Browse files Browse the repository at this point in the history
* Fix a tests that uses different dimensions
* fix filter tests, add in filter tests
* bump marqo version
* Fix an error message

---------

Co-authored-by: Li Wan <lwan3@student.unimelb.edu.au>
Co-authored-by: Li Wan <49334982+wanliAlex@users.noreply.github.com>
  • Loading branch information
3 people authored Apr 5, 2024
1 parent a4576d0 commit 10aefd4
Show file tree
Hide file tree
Showing 4 changed files with 61 additions and 38 deletions.
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
"tox"
],
name="marqo",
version="3.2.0",
version="3.2.1",
author="marqo org",
author_email="org@marqo.io",
description="Tensor search for humans",
Expand Down
2 changes: 1 addition & 1 deletion src/marqo/version.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
__marqo_version__ = "2.3.0"
__marqo_version__ = "2.4.0"
__marqo_release_page__ = f"https://github.com/marqo-ai/marqo/releases/tag/{__marqo_version__}"

__minimum_supported_marqo_version__ = "2.0"
Expand Down
71 changes: 38 additions & 33 deletions tests/v2_tests/test_custom_vector_search.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,30 +41,34 @@ def setUp(self) -> None:
def search_with_context(self, context_vector: Optional[Dict[str, List[Dict[str, Any]]]] = None) -> Dict[str, Any]:
return self.client.index(self.test_index_name).search(
q=self.query,
context = context_vector
context=context_vector
)

def test_custom_vector_search_format(self):
if self.IS_MULTI_INSTANCE:
self.warm_request(lambda: self.search_with_context({"tensor": [{"vector": [1, ] * self.vector_dim, "weight": 0}, {"vector": [2, ] * self.vector_dim, "weight": 0}], }))
self.warm_request(lambda: self.search_with_context({"tensor": [
{"vector": [1, ] * self.vector_dim, "weight": 0}, {"vector": [2, ] * self.vector_dim, "weight": 0}], }))

custom_res = self.search_with_context({"tensor": [{"vector": [1, ] * self.vector_dim, "weight": 0}, {"vector": [2, ] * self.vector_dim, "weight": 0}], })
custom_res = self.search_with_context({"tensor": [{"vector": [1, ] * self.vector_dim, "weight": 0},
{"vector": [2, ] * self.vector_dim, "weight": 0}], })

if self.IS_MULTI_INSTANCE:
self.warm_request(lambda: self.search_with_context())

original_res = self.search_with_context()

original_res.pop('processingTimeMs', None)
custom_res.pop('processingTimeMs', None)

self.assertEqual(custom_res, original_res)

def test_custom_search_results(self):
if self.IS_MULTI_INSTANCE:
self.warm_request(lambda: self.search_with_context({"tensor": [{"vector": [1, ] * self.vector_dim, "weight": 0}, {"vector": [2, ] * self.vector_dim, "weight": 0}], }))

custom_res = self.search_with_context({"tensor": [{"vector": [1, ] * self.vector_dim, "weight": 0}, {"vector": [2, ] * self.vector_dim, "weight": 0}], })
self.warm_request(lambda: self.search_with_context({"tensor": [
{"vector": [1, ] * self.vector_dim, "weight": 0}, {"vector": [2, ] * self.vector_dim, "weight": 0}], }))

custom_res = self.search_with_context({"tensor": [{"vector": [1, ] * self.vector_dim, "weight": 0},
{"vector": [2, ] * self.vector_dim, "weight": 0}], })

if self.IS_MULTI_INSTANCE:
self.warm_request(lambda: self.search_with_context())
Expand All @@ -80,47 +84,48 @@ def test_custom_vector_search_query_format(self):
try:
if self.IS_MULTI_INSTANCE:
self.warm_request(lambda: self.search_with_context({
"tensor": [
{"vector": [1, ] * self.vector_dim, "weight": 0},
{"vector": [2, ] * self.vector_dim, "weight": 0}
],
}))
"tensor": [
{"vector": [1, ] * self.vector_dim, "weight": 0},
{"vector": [2, ] * self.vector_dim, "weight": 0}
],
}))

self.search_with_context({
"tensorss": [
{"vector": [1, ] * self.vector_dim, "weight": 0},
{"vector": [2, ] * self.vector_dim, "weight": 0}
],
],
})
raise AssertionError
except MarqoWebError:
pass

def test_context_dimension_have_different_dimensions_to_index(self):
correct_context = {"tensor": [{"vector": [1, ] * self.vector_dim, "weight": 1}]}
wrong_context = {"tensor": [{"vector": [1, ] * 2, "weight": 1}]}
if self.IS_MULTI_INSTANCE:
self.warm_request(lambda: self.search_with_context(correct_context))
try:
self.search_with_context(wrong_context)
raise AssertionError
except MarqoWebError as e:
assert "The provided vectors are not in the same dimension of the index" in str(e)
correct_context = {"tensor": [{"vector": [1, ] * self.vector_dim, "weight": 1}]}
wrong_context = {"tensor": [{"vector": [1, ] * 2, "weight": 1}]}
if self.IS_MULTI_INSTANCE:
self.warm_request(lambda: self.search_with_context(correct_context))
with self.assertRaises(MarqoWebError) as e:
self.search_with_context(wrong_context)
self.assertIn("The dimension of the vectors returned by the model or given by the context "
"vectors does not match the expected dimension", str(e.exception))

def test_context_dimension_have_inconsistent_dimensions(self):
correct_context = {"tensor": [{"vector": [1, ] * self.vector_dim, "weight": 1}, {"vector": [2, ] * self.vector_dim, "weight": 0}]}
wrong_context = {"tensor": [{"vector": [1, ] * self.vector_dim, "weight": 1}, {"vector": [2, ] * (self.vector_dim + 1), "weight": 0}]}
if self.IS_MULTI_INSTANCE:
self.warm_request(lambda: self.search_with_context(correct_context))
try:
self.search_with_context(wrong_context)
raise AssertionError
except MarqoWebError as e:
assert "The provided vectors are not in the same dimension of the index" in str(e)
correct_context = {"tensor": [{"vector": [1, ] * self.vector_dim, "weight": 1},
{"vector": [2, ] * self.vector_dim, "weight": 0}]}
wrong_context = {"tensor": [{"vector": [1, ] * self.vector_dim, "weight": 1},
{"vector": [2, ] * (self.vector_dim + 1), "weight": 0}]}
if self.IS_MULTI_INSTANCE:
self.warm_request(lambda: self.search_with_context(correct_context))
with self.assertRaises(MarqoWebError) as e:
self.search_with_context(wrong_context)
self.assertIn("The dimension of the vectors returned by the model or given by the context "
"vectors does not match the expected dimension", str(e.exception))

def test_context_vector_with_flat_query(self):
self.query = "What are the best pets"
context = {"tensor": [{"vector": [1, ] * self.vector_dim, "weight": 1}, {"vector": [2, ] * self.vector_dim, "weight": 0}]}
context = {"tensor": [{"vector": [1, ] * self.vector_dim, "weight": 1},
{"vector": [2, ] * self.vector_dim, "weight": 0}]}
try:
result = self.search_with_context(context)
raise AssertionError(f"The query should not be accepted. Returned: {result}")
Expand All @@ -129,4 +134,4 @@ def test_context_vector_with_flat_query(self):
finally:

## Ensure other tests are not affected
self.query = {"What are the best pets": 1}
self.query = {"What are the best pets": 1}
24 changes: 21 additions & 3 deletions tests/v2_tests/test_tensor_search.py
Original file line number Diff line number Diff line change
Expand Up @@ -209,6 +209,7 @@ def run():
args, kwargs0 = mock__post.call_args_list[0]
assert "device" not in kwargs0["path"]

@mark.fixed
def test_filter_string_and_searchable_attributes(self):
self.test_cases = [
(CloudTestIndex.structured_text, self.structured_index_name),
Expand All @@ -223,13 +224,13 @@ def test_filter_string_and_searchable_attributes(self):
"_id": "0", # content in field_a
"text_field_1": "random content",
"text_field_2": "apple",
"int_field_1": 0,
"int_filter_field_1": 0,
},
{
"_id": "1", # content in field_b
"text_field_3": "random content",
"text_field_2": "banana",
"int_field_1": 0,
"int_filter_field_1": 0,
},
{
"_id": "2", # content in both
Expand Down Expand Up @@ -268,6 +269,24 @@ def test_filter_string_and_searchable_attributes(self):
"searchable_attributes": None,
"expected": ["3"]
},
{ # filter string only (IN with AND)
"query": "random content",
"filter_string": "text_field_2 in (banana, orange) AND int_filter_field_1 in (0, 1)",
"searchable_attributes": None,
"expected": ["1", "3"]
},
{ # filter string (IN with OR)
"query": "random content",
"filter_string": "text_field_2 in (banana, orange) OR int_filter_field_1 in (1)",
"searchable_attributes": None,
"expected": ["1", "2", "3"]
},
{ # filter string (IN with _id)
"query": "random content",
"filter_string": "_id in (1, 2)",
"searchable_attributes": None,
"expected": ["1", "2"]
},
{ # searchable attributes only (one)
"query": "random content",
"filter_string": None,
Expand Down Expand Up @@ -307,7 +326,6 @@ def test_filter_string_and_searchable_attributes(self):
filter_string=case.get("filter_string", ""),
searchable_attributes=case.get("searchable_attributes", None)
)
print(search_res, case["expected"], case["query"])
assert len(search_res["hits"]) == len(case["expected"])
assert set([hit["_id"] for hit in search_res["hits"]]) == set(case["expected"])

Expand Down

0 comments on commit 10aefd4

Please sign in to comment.