Preload / Prewarm custom models via URL on startup (#475)

* initial draft work * initial draft work * updated error messages and added try catch to warmup * made preload_model an outside function * draft test work * fixed unit tests for preload function * fixed debug messages * Update version.py to 0.0.20 * updated error message
marqo-ai · May 16, 2023 · fb8d22d · fb8d22d
1 parent 7acd249
commit fb8d22d
Show file tree

Hide file tree

Showing 4 changed files with 152 additions and 10 deletions.
diff --git a/src/marqo/s2_inference/s2_inference.py b/src/marqo/s2_inference/s2_inference.py
@@ -172,7 +172,7 @@ def _update_available_models(model_cache_key: str, model_name: str, validated_mo
                     f"and Marqo has access to the weights file.")
     else:
         most_recently_used_time = datetime.datetime.now()
-        logger.debug(f'renew {model_name} on device {device} with new time={most_recently_used_time}.')
+        logger.debug(f'renewed {model_name} on device {device} with new most recently time={most_recently_used_time}.')
         try:
             available_models[model_cache_key][AvailableModelsKey.most_recently_used_time] = most_recently_used_time
         except KeyError:

diff --git a/src/marqo/tensor_search/on_start_script.py b/src/marqo/tensor_search/on_start_script.py
@@ -12,6 +12,7 @@
 from marqo import errors
 from marqo.tensor_search.throttling.redis_throttle import throttle
 from marqo.connections import redis_driver
+from marqo.s2_inference.s2_inference import vectorise
 
 
 def on_start(marqo_os_url: str):
@@ -95,6 +96,7 @@ def id_to_device(id):
             device_names.append( {'id':device_id, 'name':id_to_device(device_id)})
         self.logger.info(f"found devices {device_names}")
 
+
 class ModelsForCacheing:
     """warms the in-memory model cache by preloading good defaults
     """
@@ -109,10 +111,12 @@ def __init__(self):
             try:
                 self.models = json.loads(warmed_models)
             except json.JSONDecodeError as e:
+                # TODO: Change error message to match new format
                 raise errors.EnvVarError(
                     f"Could not parse environment variable `{EnvVars.MARQO_MODELS_TO_PRELOAD}`. "
-                    f"Please ensure that this a JSON-encoded array of strings. For example:\n"
+                    f"Please ensure that this a JSON-encoded array of strings or dicts. For example:\n"
                     f"""export {EnvVars.MARQO_MODELS_TO_PRELOAD}='["ViT-L/14", "onnx/all_datasets_v4_MiniLM-L6"]'"""
+                    f"""To add a custom model, it must be a dict with keys `model` and `model_properties` as defined in `https://marqo.pages.dev/0.0.20/Models-Reference/bring_your_own_model/`"""
                 ) from e
         else:
             self.models = warmed_models
@@ -123,32 +127,66 @@ def __init__(self):
         self.logger.info(f"pre-loading {self.models} onto devices={self.default_devices}")
 
     def run(self):
-        from marqo.s2_inference.s2_inference import vectorise
-
         test_string = 'this is a test string'
         N = 10
         messages = []
         for model in self.models:
             for device in self.default_devices:
+                self.logger.debug(f"Beginning loading for model: {model} on device: {device}")
 
                 # warm it up
-                _ = vectorise(model, test_string, device=device)
+                _ = _preload_model(model=model, content=test_string, device=device)
 
                 t = 0
                 for n in range(N):
                     t0 = time.time()
-                    _ = vectorise(model, test_string, device=device)
+                    _ = _preload_model(model=model, content=test_string, device=device)
                     t1 = time.time()
                     t += (t1 - t0)
                 message = f"{(t)/float((N))} for {model} and {device}"
                 messages.append(message)
+                self.logger.debug(f"{model} {device} vectorise run {N} times.")
                 self.logger.info(f"{model} {device} run succesfully!")
 
         for message in messages:
             self.logger.info(message)
         self.logger.info("completed loading models")
 
 
+def _preload_model(model, content, device):
+    """
+        Calls vectorise for a model once. This will load in the model if it isn't already loaded.
+        If `model` is a str, it should be a model name in the registry
+        If `model is a dict, it should be an object containing `model_name` and `model_properties`
+        Model properties will be passed to vectorise call if object exists
+    """
+    if isinstance(model, str):
+        # For models IN REGISTRY
+        _ = vectorise(
+            model_name=model, 
+            content=content, 
+            device=device
+        )
+    elif isinstance(model, dict):
+        # For models from URL
+        """
+        TODO: include validation from on start script (model name properties etc)
+        _check_model_name(index_settings)
+        """
+        try:
+            _ = vectorise(
+                model_name=model["model"], 
+                model_properties=model["model_properties"], 
+                content=content, 
+                device=device
+            )
+        except KeyError as e:
+            raise errors.EnvVarError(
+                f"Your custom model {model} is missing either `model` or `model_properties`."
+                f"""To add a custom model, it must be a dict with keys `model` and `model_properties` as defined in `https://marqo.pages.dev/0.0.20/Advanced-Usage/configuration/#configuring-preloaded-models`"""
+            ) from e
+
+
 class InitializeRedis:
 
     def __init__(self, host: str, port: int):

diff --git a/src/marqo/version.py b/src/marqo/version.py
@@ -1,4 +1,4 @@
-__version__ = "0.0.19"
+__version__ = "0.0.20"
 
 
 def get_version() -> str:

diff --git a/tests/tensor_search/test_on_start_script.py b/tests/tensor_search/test_on_start_script.py
@@ -10,7 +10,7 @@
 
 class TestOnStartScript(MarqoTestCase):
 
-    def test_preload_models(self):
+    def test_preload_registry_models(self):
         environ_expected_models = [
             ({enums.EnvVars.MARQO_MODELS_TO_PRELOAD: []}, []),
             ({enums.EnvVars.MARQO_MODELS_TO_PRELOAD: ""}, []),
@@ -28,11 +28,11 @@ def test_preload_models(self):
         for mock_environ, expected in environ_expected_models:
             mock_vectorise = mock.MagicMock()
             @mock.patch("os.environ", mock_environ)
-            @mock.patch("marqo.s2_inference.s2_inference.vectorise", mock_vectorise)
+            @mock.patch("marqo.tensor_search.on_start_script.vectorise", mock_vectorise)
             def run():
                 model_caching_script = on_start_script.ModelsForCacheing()
                 model_caching_script.run()
-                loaded_models = {args[0] for args, kwargs in mock_vectorise.call_args_list}
+                loaded_models = {kwargs["model_name"] for args, kwargs in mock_vectorise.call_args_list}
                 assert loaded_models == set(expected)
                 return True
             assert run()
@@ -47,7 +47,111 @@ def run():
                 print(str(e))
                 return True
         assert run()
+
+    def test_preload_url_models(self):
+        clip_model_object = {
+            "model": "generic-clip-test-model-2",
+            "model_properties": {
+                "name": "ViT-B/32",
+                "dimensions": 512,
+                "type": "clip",
+                "url": "https://openaipublic.azureedge.net/clip/models/40d365715913c9da98579312b702a82c18be219cc2a73407c4526f58eba950af/ViT-B-32.pt"
+            }
+        }
 
+        clip_model_expected = (
+            "generic-clip-test-model-2", 
+            "ViT-B/32", 
+            512, 
+            "clip", 
+            "https://openaipublic.azureedge.net/clip/models/40d365715913c9da98579312b702a82c18be219cc2a73407c4526f58eba950af/ViT-B-32.pt"
+        )
+
+        open_clip_model_object = {
+            "model": "random-open-clip-1",
+            "model_properties": {
+                "name": "ViT-B-32-quickgelu",
+                "dimensions": 512,
+                "type": "open_clip",
+                "url": "https://github.com/mlfoundations/open_clip/releases/download/v0.2-weights/vit_b_32-quickgelu-laion400m_avg-8a00ab3c.pt"
+            }
+        }
+
+        # must be an immutable datatype
+        open_clip_model_expected = (
+            "random-open-clip-1", 
+            "ViT-B-32-quickgelu", 
+            512, 
+            "open_clip", 
+            "https://github.com/mlfoundations/open_clip/releases/download/v0.2-weights/vit_b_32-quickgelu-laion400m_avg-8a00ab3c.pt"
+        )
+
+        # So far has clip and open clip tests
+        environ_expected_models = [
+            ({enums.EnvVars.MARQO_MODELS_TO_PRELOAD: [clip_model_object, open_clip_model_object]}, [clip_model_expected, open_clip_model_expected]),
+            ({enums.EnvVars.MARQO_MODELS_TO_PRELOAD: json.dumps([clip_model_object, open_clip_model_object])}, [clip_model_expected, open_clip_model_expected])
+        ]
+        for mock_environ, expected in environ_expected_models:
+            mock_vectorise = mock.MagicMock()
+            @mock.patch("os.environ", mock_environ)
+            @mock.patch("marqo.tensor_search.on_start_script.vectorise", mock_vectorise)
+            def run():
+                model_caching_script = on_start_script.ModelsForCacheing()
+                model_caching_script.run()
+                loaded_models = {
+                    (
+                        kwargs["model_name"],
+                        kwargs["model_properties"]["name"],
+                        kwargs["model_properties"]["dimensions"],
+                        kwargs["model_properties"]["type"],
+                        kwargs["model_properties"]["url"]
+                    )
+                    for args, kwargs in mock_vectorise.call_args_list
+                }
+                assert loaded_models == set(expected)
+                return True
+            assert run()
+
+    def test_preload_url_missing_model(self):
+        open_clip_model_object = {
+            "model_properties": {
+                "name": "ViT-B-32-quickgelu",
+                "dimensions": 512,
+                "type": "open_clip",
+                "url": "https://github.com/mlfoundations/open_clip/releases/download/v0.2-weights/vit_b_32-quickgelu-laion400m_avg-8a00ab3c.pt"
+            }
+        }
+        mock_vectorise = mock.MagicMock()
+        @mock.patch("marqo.tensor_search.on_start_script.vectorise", mock_vectorise)
+        @mock.patch("os.environ", {enums.EnvVars.MARQO_MODELS_TO_PRELOAD: [open_clip_model_object]})
+        def run():
+            try:
+                model_caching_script = on_start_script.ModelsForCacheing()
+                # There should be a KeyError -> EnvVarError when attempting to call vectorise
+                model_caching_script.run()
+                raise AssertionError
+            except errors.EnvVarError as e:
+                return True
+        assert run()
+
+    def test_preload_url_missing_model_properties(self):
+        open_clip_model_object = {
+            "model": "random-open-clip-1"
+        }
+        mock_vectorise = mock.MagicMock()
+        @mock.patch("marqo.tensor_search.on_start_script.vectorise", mock_vectorise)
+        @mock.patch("os.environ", {enums.EnvVars.MARQO_MODELS_TO_PRELOAD: [open_clip_model_object]})
+        def run():
+            try:
+                model_caching_script = on_start_script.ModelsForCacheing()
+                # There should be a KeyError -> EnvVarError when attempting to call vectorise
+                model_caching_script.run()
+                raise AssertionError
+            except errors.EnvVarError as e:
+                return True
+        assert run()
+
+    # TODO: test bad/no names/URLS in end-to-end tests, as this logic is done in vectorise call