fix existing filter tests, add in filter tests, bump version (#224)

* Fix a tests that uses different dimensions * fix filter tests, add in filter tests * bump marqo version * Fix an error message --------- Co-authored-by: Li Wan <lwan3@student.unimelb.edu.au> Co-authored-by: Li Wan <49334982+wanliAlex@users.noreply.github.com>
marqo-ai · Apr 5, 2024 · 10aefd4 · 10aefd4
1 parent a4576d0
commit 10aefd4
Show file tree

Hide file tree

Showing 4 changed files with 61 additions and 38 deletions.
diff --git a/setup.py b/setup.py
@@ -18,7 +18,7 @@
         "tox"
     ],
     name="marqo",
-    version="3.2.0",
+    version="3.2.1",
     author="marqo org",
     author_email="org@marqo.io",
     description="Tensor search for humans",

diff --git a/src/marqo/version.py b/src/marqo/version.py
@@ -1,4 +1,4 @@
-__marqo_version__ = "2.3.0"
+__marqo_version__ = "2.4.0"
 __marqo_release_page__ = f"https://github.com/marqo-ai/marqo/releases/tag/{__marqo_version__}"
 
 __minimum_supported_marqo_version__ = "2.0"

diff --git a/tests/v2_tests/test_custom_vector_search.py b/tests/v2_tests/test_custom_vector_search.py
@@ -41,30 +41,34 @@ def setUp(self) -> None:
     def search_with_context(self, context_vector: Optional[Dict[str, List[Dict[str, Any]]]] = None) -> Dict[str, Any]:
         return self.client.index(self.test_index_name).search(
             q=self.query,
-            context = context_vector
+            context=context_vector
         )
 
     def test_custom_vector_search_format(self):
         if self.IS_MULTI_INSTANCE:
-            self.warm_request(lambda: self.search_with_context({"tensor": [{"vector": [1, ] * self.vector_dim, "weight": 0}, {"vector": [2, ] * self.vector_dim, "weight": 0}], }))
+            self.warm_request(lambda: self.search_with_context({"tensor": [
+                {"vector": [1, ] * self.vector_dim, "weight": 0}, {"vector": [2, ] * self.vector_dim, "weight": 0}], }))
 
-        custom_res = self.search_with_context({"tensor": [{"vector": [1, ] * self.vector_dim, "weight": 0}, {"vector": [2, ] * self.vector_dim, "weight": 0}], })
+        custom_res = self.search_with_context({"tensor": [{"vector": [1, ] * self.vector_dim, "weight": 0},
+                                                          {"vector": [2, ] * self.vector_dim, "weight": 0}], })
 
         if self.IS_MULTI_INSTANCE:
             self.warm_request(lambda: self.search_with_context())
 
         original_res = self.search_with_context()
-        
+
         original_res.pop('processingTimeMs', None)
         custom_res.pop('processingTimeMs', None)
 
         self.assertEqual(custom_res, original_res)
 
     def test_custom_search_results(self):
         if self.IS_MULTI_INSTANCE:
-            self.warm_request(lambda: self.search_with_context({"tensor": [{"vector": [1, ] * self.vector_dim, "weight": 0}, {"vector": [2, ] * self.vector_dim, "weight": 0}], }))
-
-        custom_res = self.search_with_context({"tensor": [{"vector": [1, ] * self.vector_dim, "weight": 0}, {"vector": [2, ] * self.vector_dim, "weight": 0}], })
+            self.warm_request(lambda: self.search_with_context({"tensor": [
+                {"vector": [1, ] * self.vector_dim, "weight": 0}, {"vector": [2, ] * self.vector_dim, "weight": 0}], }))
+
+        custom_res = self.search_with_context({"tensor": [{"vector": [1, ] * self.vector_dim, "weight": 0},
+                                                          {"vector": [2, ] * self.vector_dim, "weight": 0}], })
 
         if self.IS_MULTI_INSTANCE:
             self.warm_request(lambda: self.search_with_context())
@@ -80,47 +84,48 @@ def test_custom_vector_search_query_format(self):
         try:
             if self.IS_MULTI_INSTANCE:
                 self.warm_request(lambda: self.search_with_context({
-                "tensor": [
-                    {"vector": [1, ] * self.vector_dim, "weight": 0},
-                    {"vector": [2, ] * self.vector_dim, "weight": 0}
-                ], 
-            }))
+                    "tensor": [
+                        {"vector": [1, ] * self.vector_dim, "weight": 0},
+                        {"vector": [2, ] * self.vector_dim, "weight": 0}
+                    ],
+                }))
 
             self.search_with_context({
                 "tensorss": [
                     {"vector": [1, ] * self.vector_dim, "weight": 0},
                     {"vector": [2, ] * self.vector_dim, "weight": 0}
-                ], 
+                ],
             })
             raise AssertionError
         except MarqoWebError:
             pass
 
     def test_context_dimension_have_different_dimensions_to_index(self):
-         correct_context = {"tensor": [{"vector": [1, ] * self.vector_dim, "weight": 1}]}
-         wrong_context = {"tensor": [{"vector": [1, ] * 2, "weight": 1}]}
-         if self.IS_MULTI_INSTANCE:
-             self.warm_request(lambda: self.search_with_context(correct_context))
-         try:
-             self.search_with_context(wrong_context)
-             raise AssertionError
-         except MarqoWebError as e:
-            assert "The provided vectors are not in the same dimension of the index" in str(e)
+        correct_context = {"tensor": [{"vector": [1, ] * self.vector_dim, "weight": 1}]}
+        wrong_context = {"tensor": [{"vector": [1, ] * 2, "weight": 1}]}
+        if self.IS_MULTI_INSTANCE:
+            self.warm_request(lambda: self.search_with_context(correct_context))
+        with self.assertRaises(MarqoWebError) as e:
+            self.search_with_context(wrong_context)
+        self.assertIn("The dimension of the vectors returned by the model or given by the context "
+                      "vectors does not match the expected dimension", str(e.exception))
 
     def test_context_dimension_have_inconsistent_dimensions(self):
-         correct_context = {"tensor": [{"vector": [1, ] * self.vector_dim, "weight": 1}, {"vector": [2, ] * self.vector_dim, "weight": 0}]}
-         wrong_context = {"tensor": [{"vector": [1, ] * self.vector_dim, "weight": 1}, {"vector": [2, ] * (self.vector_dim + 1), "weight": 0}]}
-         if self.IS_MULTI_INSTANCE:
-             self.warm_request(lambda: self.search_with_context(correct_context))
-         try:
-             self.search_with_context(wrong_context)
-             raise AssertionError
-         except MarqoWebError as e:
-            assert "The provided vectors are not in the same dimension of the index" in str(e)
+        correct_context = {"tensor": [{"vector": [1, ] * self.vector_dim, "weight": 1},
+                                      {"vector": [2, ] * self.vector_dim, "weight": 0}]}
+        wrong_context = {"tensor": [{"vector": [1, ] * self.vector_dim, "weight": 1},
+                                    {"vector": [2, ] * (self.vector_dim + 1), "weight": 0}]}
+        if self.IS_MULTI_INSTANCE:
+            self.warm_request(lambda: self.search_with_context(correct_context))
+        with self.assertRaises(MarqoWebError) as e:
+            self.search_with_context(wrong_context)
+        self.assertIn("The dimension of the vectors returned by the model or given by the context "
+                      "vectors does not match the expected dimension", str(e.exception))
 
     def test_context_vector_with_flat_query(self):
         self.query = "What are the best pets"
-        context = {"tensor": [{"vector": [1, ] * self.vector_dim, "weight": 1}, {"vector": [2, ] * self.vector_dim, "weight": 0}]}
+        context = {"tensor": [{"vector": [1, ] * self.vector_dim, "weight": 1},
+                              {"vector": [2, ] * self.vector_dim, "weight": 0}]}
         try:
             result = self.search_with_context(context)
             raise AssertionError(f"The query should not be accepted. Returned: {result}")
@@ -129,4 +134,4 @@ def test_context_vector_with_flat_query(self):
         finally:
 
             ## Ensure other tests are not affected
-            self.query = {"What are the best pets": 1}
+            self.query = {"What are the best pets": 1}
diff --git a/tests/v2_tests/test_tensor_search.py b/tests/v2_tests/test_tensor_search.py
@@ -209,6 +209,7 @@ def run():
         args, kwargs0 = mock__post.call_args_list[0]
         assert "device" not in kwargs0["path"]
 
+    @mark.fixed
     def test_filter_string_and_searchable_attributes(self):
         self.test_cases = [
             (CloudTestIndex.structured_text, self.structured_index_name),
@@ -223,13 +224,13 @@ def test_filter_string_and_searchable_attributes(self):
                     "_id": "0",                     # content in field_a
                     "text_field_1": "random content",
                     "text_field_2": "apple",
-                    "int_field_1": 0,
+                    "int_filter_field_1": 0,
                 },
                 {
                     "_id": "1",                     # content in field_b
                     "text_field_3": "random content",
                     "text_field_2": "banana",
-                    "int_field_1": 0,
+                    "int_filter_field_1": 0,
                 },
                 {
                     "_id": "2",                     # content in both
@@ -268,6 +269,24 @@ def test_filter_string_and_searchable_attributes(self):
                     "searchable_attributes": None,
                     "expected": ["3"]
                 },
+                {  # filter string only (IN with AND)
+                    "query": "random content",
+                    "filter_string": "text_field_2 in (banana, orange) AND int_filter_field_1 in (0, 1)",
+                    "searchable_attributes": None,
+                    "expected": ["1", "3"]
+                },
+                {  # filter string (IN with OR)
+                    "query": "random content",
+                    "filter_string": "text_field_2 in (banana, orange) OR int_filter_field_1 in (1)",
+                    "searchable_attributes": None,
+                    "expected": ["1", "2", "3"]
+                },
+                {   # filter string (IN with _id)
+                    "query": "random content",
+                    "filter_string": "_id in (1, 2)",
+                    "searchable_attributes": None,
+                    "expected": ["1", "2"]
+                },
                 {   # searchable attributes only (one)
                     "query": "random content",
                     "filter_string": None,
@@ -307,7 +326,6 @@ def test_filter_string_and_searchable_attributes(self):
                     filter_string=case.get("filter_string", ""),
                     searchable_attributes=case.get("searchable_attributes", None)
                 )
-                print(search_res, case["expected"], case["query"])
                 assert len(search_res["hits"]) == len(case["expected"])
                 assert set([hit["_id"] for hit in search_res["hits"]]) == set(case["expected"])