diff --git a/src/marqo/tensor_search/backend.py b/src/marqo/tensor_search/backend.py index fe69101f1..53dfd9af0 100644 --- a/src/marqo/tensor_search/backend.py +++ b/src/marqo/tensor_search/backend.py @@ -77,6 +77,7 @@ def add_customer_field_properties(config: Config, index_name: str, HTTP Response """ engine = "lucene" + existing_info = get_cache()[index_name] # check if there is multimodal fie;ds and convert the fields name to a list with the same # format of customer_field_names @@ -94,21 +95,13 @@ def add_customer_field_properties(config: Config, index_name: str, utils.generate_vector_name(field_name[0])): { "type": "knn_vector", "dimension": model_properties["dimensions"], - "method": { - "name": "hnsw", - "space_type": "cosinesimil", - "engine": engine, - "parameters": { - "ef_construction": 128, - "m": 16 - } - } + "method": existing_info.get_ann_parameters() } for field_name in knn_field_names } } } } - existing_info = get_cache()[index_name] + new_index_properties = existing_info.properties.copy() # copy fields to the chunk for prefiltering. If it is text, convert it to a keyword type to save space diff --git a/src/marqo/tensor_search/configs.py b/src/marqo/tensor_search/configs.py index 6e654874e..7562e47e1 100644 --- a/src/marqo/tensor_search/configs.py +++ b/src/marqo/tensor_search/configs.py @@ -19,12 +19,26 @@ def get_default_index_settings(): # TODO move these into a processing dict with sub-dicts NsFields.image_preprocessing: { NsFields.patch_method: None - } + }, + NsFields.ann_parameters: get_default_ann_parameters() }, NsFields.number_of_shards: 5, NsFields.number_of_replicas : 1, } +def get_default_ann_parameters(): + return { + NsFields.ann_method: "hnsw", + NsFields.ann_metric: "cosinesimil", + + # `ann_engine` not exposed to customer (via index settings). + NsFields.ann_engine: "lucene", + NsFields.ann_method_parameters: { + NsFields.hnsw_ef_construction: 128, + NsFields.hnsw_m: 24 + } + } + def default_env_vars() -> dict: """Returns a dict of default env vars. diff --git a/src/marqo/tensor_search/enums.py b/src/marqo/tensor_search/enums.py index 13721b82a..8381feffb 100644 --- a/src/marqo/tensor_search/enums.py +++ b/src/marqo/tensor_search/enums.py @@ -56,6 +56,16 @@ class IndexSettingsField: number_of_shards = "number_of_shards" number_of_replicas = "number_of_replicas" + ann_parameters = "ann_parameters" + ann_method = "method" + ann_metric = "space_type" + ann_engine = "engine" + ann_method_parameters = "method_parameters" + + # method_parameters keys for "method"="hnsw" + hnsw_ef_construction = "ef_construction" + hnsw_m = "m" + class SplitMethod: # consider moving this enum into processing diff --git a/src/marqo/tensor_search/models/index_info.py b/src/marqo/tensor_search/models/index_info.py index 14a3b8cdd..074115f14 100644 --- a/src/marqo/tensor_search/models/index_info.py +++ b/src/marqo/tensor_search/models/index_info.py @@ -1,7 +1,8 @@ import pprint -from typing import NamedTuple, Any +from typing import NamedTuple, Any, Dict from marqo.tensor_search import enums - +from marqo.tensor_search.enums import IndexSettingsField as NsFields +from marqo.tensor_search import configs class IndexInfo(NamedTuple): """ @@ -66,4 +67,28 @@ def get_true_text_properties(self) -> dict: true_text_props[text_field] = text_props except KeyError: continue - return true_text_props \ No newline at end of file + return true_text_props + + def get_ann_parameters(self) -> Dict[str, Any]: + """Gets the ANN parameters to use as the default for the index. + + Preferentially use index settings over generic defaults, when index settings exist. + + Returns: + Dict of ann parameters. Structure can be seen at `configs.get_default_ann_parameters`. + + """ + ann_default = configs.get_default_ann_parameters() + index_ann_defaults = self.index_settings[NsFields.index_defaults].get(NsFields.ann_parameters, {}) + + # index defaults override generic defaults + ann_params = { + **ann_default, + **index_ann_defaults + } + ann_params[NsFields.ann_method_parameters] = { + **ann_default[NsFields.ann_method_parameters], + **index_ann_defaults.get(NsFields.ann_method_parameters, {}) + } + + return ann_params \ No newline at end of file diff --git a/src/marqo/tensor_search/models/settings_object.py b/src/marqo/tensor_search/models/settings_object.py index c9d5ef21c..5e992acd3 100644 --- a/src/marqo/tensor_search/models/settings_object.py +++ b/src/marqo/tensor_search/models/settings_object.py @@ -92,6 +92,56 @@ "examples": [{ NsFields.patch_method: None }] + }, + NsFields.ann_parameters: { + "type": "object", + "required": [ + # Non required for backwards compatibility + ], + "properties": { + NsFields.ann_method: { + "type": "string", + "examples": [ + "hnsw" + ] + }, + NsFields.ann_metric: { + "type": "string", + "examples": [ + "cosinesimil" + ] + }, + NsFields.ann_method_parameters: { + "type": "object", + "required": [], + "properties": { + NsFields.hnsw_ef_construction: { + "type": "integer", + "examples": [ + 128 + ] + }, + NsFields.hnsw_m: { + "type": "integer", + "examples": [ + 24 + ] + }, + }, + "examples": [{ + NsFields.hnsw_ef_construction: 128, + NsFields.hnsw_m: 24 + }] + } + }, + "examples": [{ + NsFields.ann_method: "hnsw", + NsFields.ann_metric: "cosinesimil", + NsFields.ann_method_parameters: { + NsFields.hnsw_ef_construction: 128, + NsFields.hnsw_m: 24 + } + }] } }, "examples": [{ @@ -105,6 +155,14 @@ }, NsFields.image_preprocessing: { NsFields.patch_method: None + }, + NsFields.ann_parameters: { + NsFields.ann_method: "hnsw", + NsFields.ann_metric: "cosinesimil", + NsFields.ann_method_parameters: { + NsFields.hnsw_ef_construction: 128, + NsFields.hnsw_m: 24 + } } }] }, @@ -135,6 +193,14 @@ }, NsFields.image_preprocessing: { NsFields.patch_method: None + }, + NsFields.ann_parameters: { + NsFields.ann_method: "hnsw", + NsFields.ann_metric: "cosinesimil", + NsFields.ann_method_parameters: { + NsFields.hnsw_ef_construction: 128, + NsFields.hnsw_m: 24 + } } }, NsFields.number_of_shards: 5, diff --git a/tests/tensor_search/test_backend.py b/tests/tensor_search/test_backend.py index 62c8d5d20..0a30fecc4 100644 --- a/tests/tensor_search/test_backend.py +++ b/tests/tensor_search/test_backend.py @@ -5,6 +5,7 @@ import requests from marqo.tensor_search import enums, backend, utils from marqo.tensor_search import tensor_search +from marqo.tensor_search.configs import get_default_ann_parameters from marqo.errors import MarqoApiError, IndexNotFoundError from tests.marqo_test import MarqoTestCase from unittest import mock @@ -68,4 +69,53 @@ def run(): args, kwargs0 = mock__put.call_args_list[0] sent_dict = json.loads(kwargs0["body"]) assert "lucene" == sent_dict["properties"][enums.TensorField.chunks - ]["properties"][utils.generate_vector_name(field_name="f1")]["method"]["engine"] \ No newline at end of file + ]["properties"][utils.generate_vector_name(field_name="f1")]["method"]["engine"] + + def test_add_customer_field_properties_default_ann_parameters(self): + mock_config = copy.deepcopy(self.config) + mock__put = mock.MagicMock() + + tensor_search.create_vector_index( + config=mock_config, index_name=self.index_name_1) + @mock.patch("marqo._httprequests.HttpRequests.put", mock__put) + def run(): + tensor_search.add_documents(config=mock_config, docs=[{"f1": "doc"}, {"f2":"C"}], + index_name=self.index_name_1, auto_refresh=True) + return True + assert run() + args, kwargs0 = mock__put.call_args_list[0] + sent_dict = json.loads(kwargs0["body"]) + assert sent_dict["properties"][enums.TensorField.chunks]["properties"][utils.generate_vector_name(field_name="f1")]["method"] == get_default_ann_parameters() + + + def test_add_customer_field_properties_index_ann_parameters(self): + mock_config = copy.deepcopy(self.config) + mock__put = mock.MagicMock() + + tensor_search.create_vector_index( + config=mock_config, + index_name=self.index_name_1, + index_settings={ + enums.IndexSettingsField.index_defaults: { + enums.IndexSettingsField.ann_parameters: { + enums.IndexSettingsField.ann_method_parameters: { + enums.IndexSettingsField.hnsw_ef_construction: 1, + enums.IndexSettingsField.hnsw_m: 2 + } + } + } + } + ) + @mock.patch("marqo._httprequests.HttpRequests.put", mock__put) + def run(): + tensor_search.add_documents(config=mock_config, docs=[{"f1": "doc"}, {"f2":"C"}], + index_name=self.index_name_1, auto_refresh=True) + return True + assert run() + args, kwargs0 = mock__put.call_args_list[0] + sent_dict = json.loads(kwargs0["body"]) + assert sent_dict["properties"][enums.TensorField.chunks]["properties"][utils.generate_vector_name(field_name="f1")]["method"]['engine'] == "lucene" + assert sent_dict["properties"][enums.TensorField.chunks]["properties"][utils.generate_vector_name(field_name="f1")]["method"]["method_parameters"] == { + enums.IndexSettingsField.hnsw_ef_construction: 1, + enums.IndexSettingsField.hnsw_m: 2 + } \ No newline at end of file diff --git a/tests/tensor_search/test_index_info.py b/tests/tensor_search/test_index_info.py index 4bfac6171..f56dbafaf 100644 --- a/tests/tensor_search/test_index_info.py +++ b/tests/tensor_search/test_index_info.py @@ -2,7 +2,7 @@ import unittest from marqo.tensor_search.models.index_info import IndexInfo from marqo.tensor_search.models import index_info -from marqo.tensor_search.enums import TensorField +from marqo.tensor_search.enums import IndexSettingsField as NsFields, TensorField from marqo.tensor_search import configs @@ -98,3 +98,63 @@ def test_get_text_properties_some_text_props(self): index_settings=configs.get_default_index_settings() ) assert {"some_text_prop": {1:2334}, "cat": {"hat": "ter"}} == ii.get_text_properties() + + def test_get_ann_parameters_default_index_param(self): + ii = IndexInfo( + model_name='some model', + properties={}, + index_settings=configs.get_default_index_settings() + ) + assert ii.get_ann_parameters() == configs.get_default_ann_parameters() + + def test_get_ann_parameters_without_default_ann_parameters_use_defaults(self): + index_settings = configs.get_default_index_settings() + del index_settings[NsFields.index_defaults][NsFields.ann_parameters] + + ii = IndexInfo( + model_name='some model', + properties={}, + index_settings=index_settings + ) + assert ii.get_ann_parameters() == configs.get_default_ann_parameters() + + def test_get_ann_parameters_use_specified_index_settings(self): + index_settings = configs.get_default_index_settings() + index_settings[NsFields.index_defaults][NsFields.ann_parameters][NsFields.ann_method] = "not-hnsw" + + ii = IndexInfo( + model_name='some model', + properties={}, + index_settings=index_settings + ) + actual = ii.get_ann_parameters() + default = configs.get_default_ann_parameters() + assert actual[NsFields.ann_method] == "not-hnsw" + + del actual[NsFields.ann_method] + del default[NsFields.ann_method] + + assert actual == default + + def test_get_ann_parameters_use_specified_ann_method_parameters(self): + index_settings = configs.get_default_index_settings() + index_settings[NsFields.index_defaults][NsFields.ann_parameters][NsFields.ann_method_parameters] = { + NsFields.hnsw_ef_construction: 1, + NsFields.hnsw_m: 2 + } + + ii = IndexInfo( + model_name='some model', + properties={}, + index_settings=index_settings + ) + default = configs.get_default_ann_parameters() + actual = ii.get_ann_parameters() + assert actual[NsFields.ann_method_parameters] == { + NsFields.hnsw_ef_construction: 1, + NsFields.hnsw_m: 2 + } + del actual[NsFields.ann_method_parameters] + del default[NsFields.ann_method_parameters] + + assert actual == default \ No newline at end of file