Skip to content

Commit

Permalink
Preload / Prewarm custom models via URL on startup (#475)
Browse files Browse the repository at this point in the history
* initial draft work

* initial draft work

* updated error messages and added try catch to warmup

* made preload_model an outside function

* draft test work

* fixed unit tests for preload function

* fixed debug messages

* Update version.py to 0.0.20

* updated error message
  • Loading branch information
vicilliar authored May 16, 2023
1 parent 7acd249 commit fb8d22d
Show file tree
Hide file tree
Showing 4 changed files with 152 additions and 10 deletions.
2 changes: 1 addition & 1 deletion src/marqo/s2_inference/s2_inference.py
Original file line number Diff line number Diff line change
Expand Up @@ -172,7 +172,7 @@ def _update_available_models(model_cache_key: str, model_name: str, validated_mo
f"and Marqo has access to the weights file.")
else:
most_recently_used_time = datetime.datetime.now()
logger.debug(f'renew {model_name} on device {device} with new time={most_recently_used_time}.')
logger.debug(f'renewed {model_name} on device {device} with new most recently time={most_recently_used_time}.')
try:
available_models[model_cache_key][AvailableModelsKey.most_recently_used_time] = most_recently_used_time
except KeyError:
Expand Down
48 changes: 43 additions & 5 deletions src/marqo/tensor_search/on_start_script.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
from marqo import errors
from marqo.tensor_search.throttling.redis_throttle import throttle
from marqo.connections import redis_driver
from marqo.s2_inference.s2_inference import vectorise


def on_start(marqo_os_url: str):
Expand Down Expand Up @@ -95,6 +96,7 @@ def id_to_device(id):
device_names.append( {'id':device_id, 'name':id_to_device(device_id)})
self.logger.info(f"found devices {device_names}")


class ModelsForCacheing:
"""warms the in-memory model cache by preloading good defaults
"""
Expand All @@ -109,10 +111,12 @@ def __init__(self):
try:
self.models = json.loads(warmed_models)
except json.JSONDecodeError as e:
# TODO: Change error message to match new format
raise errors.EnvVarError(
f"Could not parse environment variable `{EnvVars.MARQO_MODELS_TO_PRELOAD}`. "
f"Please ensure that this a JSON-encoded array of strings. For example:\n"
f"Please ensure that this a JSON-encoded array of strings or dicts. For example:\n"
f"""export {EnvVars.MARQO_MODELS_TO_PRELOAD}='["ViT-L/14", "onnx/all_datasets_v4_MiniLM-L6"]'"""
f"""To add a custom model, it must be a dict with keys `model` and `model_properties` as defined in `https://marqo.pages.dev/0.0.20/Models-Reference/bring_your_own_model/`"""
) from e
else:
self.models = warmed_models
Expand All @@ -123,32 +127,66 @@ def __init__(self):
self.logger.info(f"pre-loading {self.models} onto devices={self.default_devices}")

def run(self):
from marqo.s2_inference.s2_inference import vectorise

test_string = 'this is a test string'
N = 10
messages = []
for model in self.models:
for device in self.default_devices:
self.logger.debug(f"Beginning loading for model: {model} on device: {device}")

# warm it up
_ = vectorise(model, test_string, device=device)
_ = _preload_model(model=model, content=test_string, device=device)

t = 0
for n in range(N):
t0 = time.time()
_ = vectorise(model, test_string, device=device)
_ = _preload_model(model=model, content=test_string, device=device)
t1 = time.time()
t += (t1 - t0)
message = f"{(t)/float((N))} for {model} and {device}"
messages.append(message)
self.logger.debug(f"{model} {device} vectorise run {N} times.")
self.logger.info(f"{model} {device} run succesfully!")

for message in messages:
self.logger.info(message)
self.logger.info("completed loading models")


def _preload_model(model, content, device):
"""
Calls vectorise for a model once. This will load in the model if it isn't already loaded.
If `model` is a str, it should be a model name in the registry
If `model is a dict, it should be an object containing `model_name` and `model_properties`
Model properties will be passed to vectorise call if object exists
"""
if isinstance(model, str):
# For models IN REGISTRY
_ = vectorise(
model_name=model,
content=content,
device=device
)
elif isinstance(model, dict):
# For models from URL
"""
TODO: include validation from on start script (model name properties etc)
_check_model_name(index_settings)
"""
try:
_ = vectorise(
model_name=model["model"],
model_properties=model["model_properties"],
content=content,
device=device
)
except KeyError as e:
raise errors.EnvVarError(
f"Your custom model {model} is missing either `model` or `model_properties`."
f"""To add a custom model, it must be a dict with keys `model` and `model_properties` as defined in `https://marqo.pages.dev/0.0.20/Advanced-Usage/configuration/#configuring-preloaded-models`"""
) from e


class InitializeRedis:

def __init__(self, host: str, port: int):
Expand Down
2 changes: 1 addition & 1 deletion src/marqo/version.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
__version__ = "0.0.19"
__version__ = "0.0.20"


def get_version() -> str:
Expand Down
110 changes: 107 additions & 3 deletions tests/tensor_search/test_on_start_script.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@

class TestOnStartScript(MarqoTestCase):

def test_preload_models(self):
def test_preload_registry_models(self):
environ_expected_models = [
({enums.EnvVars.MARQO_MODELS_TO_PRELOAD: []}, []),
({enums.EnvVars.MARQO_MODELS_TO_PRELOAD: ""}, []),
Expand All @@ -28,11 +28,11 @@ def test_preload_models(self):
for mock_environ, expected in environ_expected_models:
mock_vectorise = mock.MagicMock()
@mock.patch("os.environ", mock_environ)
@mock.patch("marqo.s2_inference.s2_inference.vectorise", mock_vectorise)
@mock.patch("marqo.tensor_search.on_start_script.vectorise", mock_vectorise)
def run():
model_caching_script = on_start_script.ModelsForCacheing()
model_caching_script.run()
loaded_models = {args[0] for args, kwargs in mock_vectorise.call_args_list}
loaded_models = {kwargs["model_name"] for args, kwargs in mock_vectorise.call_args_list}
assert loaded_models == set(expected)
return True
assert run()
Expand All @@ -47,7 +47,111 @@ def run():
print(str(e))
return True
assert run()

def test_preload_url_models(self):
clip_model_object = {
"model": "generic-clip-test-model-2",
"model_properties": {
"name": "ViT-B/32",
"dimensions": 512,
"type": "clip",
"url": "https://openaipublic.azureedge.net/clip/models/40d365715913c9da98579312b702a82c18be219cc2a73407c4526f58eba950af/ViT-B-32.pt"
}
}

clip_model_expected = (
"generic-clip-test-model-2",
"ViT-B/32",
512,
"clip",
"https://openaipublic.azureedge.net/clip/models/40d365715913c9da98579312b702a82c18be219cc2a73407c4526f58eba950af/ViT-B-32.pt"
)

open_clip_model_object = {
"model": "random-open-clip-1",
"model_properties": {
"name": "ViT-B-32-quickgelu",
"dimensions": 512,
"type": "open_clip",
"url": "https://github.com/mlfoundations/open_clip/releases/download/v0.2-weights/vit_b_32-quickgelu-laion400m_avg-8a00ab3c.pt"
}
}

# must be an immutable datatype
open_clip_model_expected = (
"random-open-clip-1",
"ViT-B-32-quickgelu",
512,
"open_clip",
"https://github.com/mlfoundations/open_clip/releases/download/v0.2-weights/vit_b_32-quickgelu-laion400m_avg-8a00ab3c.pt"
)

# So far has clip and open clip tests
environ_expected_models = [
({enums.EnvVars.MARQO_MODELS_TO_PRELOAD: [clip_model_object, open_clip_model_object]}, [clip_model_expected, open_clip_model_expected]),
({enums.EnvVars.MARQO_MODELS_TO_PRELOAD: json.dumps([clip_model_object, open_clip_model_object])}, [clip_model_expected, open_clip_model_expected])
]
for mock_environ, expected in environ_expected_models:
mock_vectorise = mock.MagicMock()
@mock.patch("os.environ", mock_environ)
@mock.patch("marqo.tensor_search.on_start_script.vectorise", mock_vectorise)
def run():
model_caching_script = on_start_script.ModelsForCacheing()
model_caching_script.run()
loaded_models = {
(
kwargs["model_name"],
kwargs["model_properties"]["name"],
kwargs["model_properties"]["dimensions"],
kwargs["model_properties"]["type"],
kwargs["model_properties"]["url"]
)
for args, kwargs in mock_vectorise.call_args_list
}
assert loaded_models == set(expected)
return True
assert run()

def test_preload_url_missing_model(self):
open_clip_model_object = {
"model_properties": {
"name": "ViT-B-32-quickgelu",
"dimensions": 512,
"type": "open_clip",
"url": "https://github.com/mlfoundations/open_clip/releases/download/v0.2-weights/vit_b_32-quickgelu-laion400m_avg-8a00ab3c.pt"
}
}
mock_vectorise = mock.MagicMock()
@mock.patch("marqo.tensor_search.on_start_script.vectorise", mock_vectorise)
@mock.patch("os.environ", {enums.EnvVars.MARQO_MODELS_TO_PRELOAD: [open_clip_model_object]})
def run():
try:
model_caching_script = on_start_script.ModelsForCacheing()
# There should be a KeyError -> EnvVarError when attempting to call vectorise
model_caching_script.run()
raise AssertionError
except errors.EnvVarError as e:
return True
assert run()

def test_preload_url_missing_model_properties(self):
open_clip_model_object = {
"model": "random-open-clip-1"
}
mock_vectorise = mock.MagicMock()
@mock.patch("marqo.tensor_search.on_start_script.vectorise", mock_vectorise)
@mock.patch("os.environ", {enums.EnvVars.MARQO_MODELS_TO_PRELOAD: [open_clip_model_object]})
def run():
try:
model_caching_script = on_start_script.ModelsForCacheing()
# There should be a KeyError -> EnvVarError when attempting to call vectorise
model_caching_script.run()
raise AssertionError
except errors.EnvVarError as e:
return True
assert run()

# TODO: test bad/no names/URLS in end-to-end tests, as this logic is done in vectorise call



Expand Down

0 comments on commit fb8d22d

Please sign in to comment.