Skip to content

Commit

Permalink
Prevent extra fields in index settings (#365)
Browse files Browse the repository at this point in the history
* prevent xtra fields in index_settings

* removed incorrect field from index_defaults in test
  • Loading branch information
pandu-k authored Mar 3, 2023
1 parent 73bd429 commit 55bbb9c
Show file tree
Hide file tree
Showing 3 changed files with 151 additions and 18 deletions.
2 changes: 2 additions & 0 deletions src/marqo/tensor_search/models/settings_object.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
NsFields.index_defaults,
NsFields.number_of_shards
],
"additionalProperties": False,
"properties": {
NsFields.index_defaults: {
"type": "object",
Expand All @@ -18,6 +19,7 @@
NsFields.text_preprocessing,
NsFields.image_preprocessing
],
"additionalProperties": False,
"properties": {
NsFields.treat_urls_and_pointers_as_images: {
"type": "boolean",
Expand Down
2 changes: 0 additions & 2 deletions tests/s2_inference/test_generic_clip_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -362,7 +362,6 @@ def test_vectorise_without_clip_type(self):


def test_validate_model_properties_unknown_model_error(self):
pass
"""_validate_model_properties should throw an error if model is not in registry,
and if model_properties have not been given in index
"""
Expand All @@ -372,7 +371,6 @@ def test_validate_model_properties_unknown_model_error(self):
index_settings={
"index_defaults": {
'model': model_name,
'type' : "clip"
}
}
)
Expand Down
165 changes: 149 additions & 16 deletions tests/tensor_search/test_validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -325,27 +325,65 @@ def get_good_index_settings():
}

def test_validate_index_settings(self):
good_settings = {
"index_defaults": {
"treat_urls_and_pointers_as_images": False,
"model": "hf/all_datasets_v4_MiniLM-L6",
"normalize_embeddings": True,
"text_preprocessing": {
"split_length": 2,
"split_overlap": 0,
"split_method": "sentence"

good_settings =[
{
"index_defaults": {
"treat_urls_and_pointers_as_images": False,
"model": "hf/all_datasets_v4_MiniLM-L6",
"normalize_embeddings": True,
"text_preprocessing": {
"split_length": 2,
"split_overlap": 0,
"split_method": "sentence"
},
"image_preprocessing": {
"patch_method": None
}
},
"image_preprocessing": {
"patch_method": None
}
"number_of_shards": 5
},
"number_of_shards": 5
}
assert good_settings == validation.validate_settings_object(good_settings)
{ # extra field in text_preprocessing: OK
"index_defaults": {
"treat_urls_and_pointers_as_images": False,
"model": "hf/all_datasets_v4_MiniLM-L6",
"normalize_embeddings": True,
"text_preprocessing": {
"split_length": 2,
"split_overlap": 0,
"split_method": "sentence",
"blah blah blah": "woohoo"
},
"image_preprocessing": {
"patch_method": None
}
},
"number_of_shards": 5
},
{ # extra field in image_preprocessing: OK
"index_defaults": {
"treat_urls_and_pointers_as_images": False,
"model": "hf/all_datasets_v4_MiniLM-L6",
"normalize_embeddings": True,
"text_preprocessing": {
"split_length": 2,
"split_overlap": 0,
"split_method": "sentence",
},
"image_preprocessing": {
"patch_method": None,
"blah blah blah": "woohoo"
}
},
"number_of_shards": 5,
}
]
for settings in good_settings:
assert settings == validation.validate_settings_object(settings)

def test_validate_index_settings_model_properties(self):
good_settings = self.get_good_index_settings()
good_settings['model_properties'] = dict()
good_settings['index_defaults']['model_properties'] = dict()
assert good_settings == validation.validate_settings_object(good_settings)

def test_validate_index_settings_bad(self):
Expand Down Expand Up @@ -422,6 +460,101 @@ def test_validate_index_settings_img_preprocessing(self):
settings['index_defaults']['image_preprocessing']["path_method"] = "frcnn"
assert settings == validation.validate_settings_object(settings)

def test_validate_index_settings_misplaced_fields(self):
bad_settings = [
{
"index_defaults": {
"treat_urls_and_pointers_as_images": False,
"model": "hf/all_datasets_v4_MiniLM-L6",
"normalize_embeddings": True,
"text_preprocessing": {
"split_length": 2,
"split_overlap": 0,
"split_method": "sentence"
},
"image_preprocessing": {
"patch_method": None
}
},
"number_of_shards": 5,
"model": "hf/all_datasets_v4_MiniLM-L6" # model is also outside, here...
},
{
"index_defaults": {
"image_preprocessing": {
"patch_method": None # no models here
},
"normalize_embeddings": True,
"text_preprocessing": {
"split_length": 2,
"split_method": "sentence",
"split_overlap": 0
},
"treat_urls_and_pointers_as_images": False
},
"model": "open_clip/ViT-L-14/laion2b_s32b_b82k", # model here (bad)
"number_of_shards": 5,
"treat_urls_and_pointers_as_images": True
},
{
"index_defaults": {
"image_preprocessing": {
"patch_method": None,
"model": "open_clip/ViT-L-14/laion2b_s32b_b82k",
},
"normalize_embeddings": True,
"text_preprocessing": {
"split_length": 2,
"split_method": "sentence",
"split_overlap": 0
},
"treat_urls_and_pointers_as_images": False,
"number_of_shards": 5, # shouldn't be here
},
"treat_urls_and_pointers_as_images": True
},
{ # good, BUT extra field in index_defaults
"index_defaults": {
"number_of_shards": 5,
"treat_urls_and_pointers_as_images": False,
"model": "hf/all_datasets_v4_MiniLM-L6",
"normalize_embeddings": True,
"text_preprocessing": {
"split_length": 2,
"split_overlap": 0,
"split_method": "sentence"
},
"image_preprocessing": {
"patch_method": None
}
},
"number_of_shards": 5
},
{ # good, BUT extra field in root
"model": "hf/all_datasets_v4_MiniLM-L6",
"index_defaults": {
"treat_urls_and_pointers_as_images": False,
"model": "hf/all_datasets_v4_MiniLM-L6",
"normalize_embeddings": True,
"text_preprocessing": {
"split_length": 2,
"split_overlap": 0,
"split_method": "sentence"
},
"image_preprocessing": {
"patch_method": None
}
},
"number_of_shards": 5
}
]
for bad_set in bad_settings:
try:
validation.validate_settings_object(bad_set)
raise AssertionError
except InvalidArgError as e:
pass

def test_validate_mappings(self):
mappings = [
{
Expand Down

0 comments on commit 55bbb9c

Please sign in to comment.