From d260fd577fbb4d84fb0e469075a56cc59a1ae338 Mon Sep 17 00:00:00 2001 From: Oliver Lade Date: Wed, 16 Oct 2024 15:18:57 +1100 Subject: [PATCH 1/3] Bump Pydantic to v2 --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index c8cc3a3..5daae26 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,3 @@ -pydantic~=1.9.1 +pydantic>=2.0.0 jsonschema==4.17.1 setuptools~=65.5.1 From 64c535b509da367972ce86b17f0a9786c6a7d659 Mon Sep 17 00:00:00 2001 From: Oliver Lade Date: Wed, 16 Oct 2024 17:21:28 +1100 Subject: [PATCH 2/3] Bump Pydantic in setup.py as well --- setup.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/setup.py b/setup.py index 6b2b800..ca258d1 100644 --- a/setup.py +++ b/setup.py @@ -1,19 +1,19 @@ -from setuptools import setup, find_packages +from setuptools import find_packages, setup with open("README.md", "r", encoding="utf8") as fh: long_description = fh.read() setup( install_requires=[ - "pydantic==1.9.1", + "pydantic>=2.0.0", "jsonschema==4.17.1", "setuptools~=65.5.1", ], name="marqo-commons", - version="1.0.0", - author="marqo org", - author_email="org@marqo.io", - description="Commons for marqo projects", + version="1.0.1", + author="Marqo", + author_email="support@marqo.ai", + description="Commons for Marqo projects", long_description=long_description, long_description_content_type="text/markdown", packages=find_packages(where="src", exclude=("tests*",)), @@ -27,4 +27,4 @@ include_package_data=True, python_requires=">=3", package_dir={"": "src"}, -) \ No newline at end of file +) From d6f0f882c9d2870494a516a2c4beb8ce09320db4 Mon Sep 17 00:00:00 2001 From: Oliver Lade Date: Wed, 16 Oct 2024 17:36:48 +1100 Subject: [PATCH 3/3] Fix tests --- .../model_properties_data/clip_properties.py | 32 +++-- .../fp16_clip_properties.py | 16 ++- .../model_properties_data/hf_properties.py | 12 +- .../languagebind_model_properties.py | 35 +++-- .../multilingual_clip_properties.py | 13 +- .../model_properties_data/no_model.py | 18 ++- .../onnx_clip_properties.py | 16 ++- .../open_clip_properties.py | 70 +++++----- .../random_properties.py | 14 +- .../sbert_onnx_properties.py | 11 +- .../model_properties_data/sbert_properties.py | 12 +- .../model_properties_data/test_properties.py | 14 +- .../model_registry/model_properties_object.py | 14 +- tests/test_model_properties.py | 127 ++++++++++++------ 14 files changed, 266 insertions(+), 138 deletions(-) diff --git a/src/marqo_commons/model_registry/model_properties_data/clip_properties.py b/src/marqo_commons/model_registry/model_properties_data/clip_properties.py index 016cddd..fcb6501 100644 --- a/src/marqo_commons/model_registry/model_properties_data/clip_properties.py +++ b/src/marqo_commons/model_registry/model_properties_data/clip_properties.py @@ -4,9 +4,15 @@ This file defines properties for CLIP models. It is intended to be used in conjunction with the model registry and should not be used in isolation. """ -from marqo_commons.model_registry.model_properties_object import ModelProperties, VectorNumericType, Modality, \ - ModelType, T + from typing import Dict, List + +from marqo_commons.model_registry.model_properties_object import ( + Modality, + ModelProperties, + ModelType, + VectorNumericType, +) from marqo_commons.model_registry.utils import convert_model_properties_to_dict @@ -16,57 +22,57 @@ class ClipProperties(ModelProperties): default_memory_size: float = 1.0 type: ModelType = ModelType.clip - @classmethod - def get_all_model_properties_objects(cls) -> Dict[str, T]: + def get_all_model_properties_objects(cls) -> Dict[str, "ClipProperties"]: return { - 'RN50': ClipProperties( + "RN50": ClipProperties( name="RN50", dimensions=1024, notes="CLIP resnet50", ), - 'RN101': ClipProperties( + "RN101": ClipProperties( name="RN101", dimensions=512, notes="CLIP resnet101", ), - 'RN50x4': ClipProperties( + "RN50x4": ClipProperties( name="RN50x4", dimensions=640, notes="CLIP resnet50x4", ), - 'RN50x16': ClipProperties( + "RN50x16": ClipProperties( name="RN50x16", dimensions=768, notes="CLIP resnet50x16", ), - 'RN50x64': ClipProperties( + "RN50x64": ClipProperties( name="RN50x64", dimensions=1024, notes="CLIP resnet50x64", ), - 'ViT-B/32': ClipProperties( + "ViT-B/32": ClipProperties( name="ViT-B/32", dimensions=512, notes="CLIP ViT-B/32", ), - 'ViT-B/16': ClipProperties( + "ViT-B/16": ClipProperties( name="ViT-B/16", dimensions=512, notes="CLIP ViT-B/16", ), - 'ViT-L/14': ClipProperties( + "ViT-L/14": ClipProperties( name="ViT-L/14", dimensions=768, notes="CLIP ViT-L/14", ), - 'ViT-L/14@336px': ClipProperties( + "ViT-L/14@336px": ClipProperties( name="ViT-L/14@336px", dimensions=768, notes="CLIP ViT-L/14@336px", ), } + @convert_model_properties_to_dict def _get_clip_properties() -> Dict: return ClipProperties.get_all_model_properties_objects() diff --git a/src/marqo_commons/model_registry/model_properties_data/fp16_clip_properties.py b/src/marqo_commons/model_registry/model_properties_data/fp16_clip_properties.py index c2f9395..1b47503 100644 --- a/src/marqo_commons/model_registry/model_properties_data/fp16_clip_properties.py +++ b/src/marqo_commons/model_registry/model_properties_data/fp16_clip_properties.py @@ -4,10 +4,15 @@ This file defines properties for FP16 Clip models. It is intended to be used in conjunction with the model registry and should not be used in isolation. """ + from typing import Dict, List -from marqo_commons.model_registry.model_properties_object import ModelProperties, VectorNumericType, Modality, \ - ModelType, T +from marqo_commons.model_registry.model_properties_object import ( + Modality, + ModelProperties, + ModelType, + VectorNumericType, +) from marqo_commons.model_registry.utils import convert_model_properties_to_dict @@ -17,25 +22,26 @@ class FP16ClipModelProperties(ModelProperties): type: ModelType = ModelType.fp16_clip @classmethod - def get_all_model_properties_objects(cls) -> Dict[str, T]: + def get_all_model_properties_objects(cls) -> Dict[str, "FP16ClipModelProperties"]: return { "fp16/ViT-L/14": FP16ClipModelProperties( name="fp16/ViT-L/14", dimensions=768, notes="The faster version (fp16, load from `cuda`) of openai clip model", ), - 'fp16/ViT-B/32': FP16ClipModelProperties( + "fp16/ViT-B/32": FP16ClipModelProperties( name="fp16/ViT-B/32", dimensions=512, notes="The faster version (fp16, load from `cuda`) of openai clip model", ), - 'fp16/ViT-B/16': FP16ClipModelProperties( + "fp16/ViT-B/16": FP16ClipModelProperties( name="fp16/ViT-B/16", dimensions=512, notes="The faster version (fp16, load from `cuda`) of openai clip model", ), } + @convert_model_properties_to_dict def _get_fp16_clip_properties() -> Dict: return FP16ClipModelProperties.get_all_model_properties_objects() diff --git a/src/marqo_commons/model_registry/model_properties_data/hf_properties.py b/src/marqo_commons/model_registry/model_properties_data/hf_properties.py index 285a4c8..c99820f 100644 --- a/src/marqo_commons/model_registry/model_properties_data/hf_properties.py +++ b/src/marqo_commons/model_registry/model_properties_data/hf_properties.py @@ -4,10 +4,15 @@ This file defines properties for HF models. It is intended to be used in conjunction with the model registry and should not be used in isolation. """ + from typing import Dict, List -from marqo_commons.model_registry.model_properties_object import ModelProperties, VectorNumericType, Modality, \ - ModelType, T +from marqo_commons.model_registry.model_properties_object import ( + Modality, + ModelProperties, + ModelType, + VectorNumericType, +) from marqo_commons.model_registry.utils import convert_model_properties_to_dict @@ -22,7 +27,7 @@ class HFModelProperties(ModelProperties): text_chunk_prefix: str = "" @classmethod - def get_all_model_properties_objects(cls) -> Dict[str, T]: + def get_all_model_properties_objects(cls) -> Dict[str, "HFModelProperties"]: return { "hf/all-MiniLM-L6-v1": HFModelProperties( name="sentence-transformers/all-MiniLM-L6-v1", @@ -245,7 +250,6 @@ def get_all_model_properties_objects(cls) -> Dict[str, T]: pass - @convert_model_properties_to_dict def _get_hf_properties() -> Dict: return HFModelProperties.get_all_model_properties_objects() diff --git a/src/marqo_commons/model_registry/model_properties_data/languagebind_model_properties.py b/src/marqo_commons/model_registry/model_properties_data/languagebind_model_properties.py index 603e66a..e0b9aff 100644 --- a/src/marqo_commons/model_registry/model_properties_data/languagebind_model_properties.py +++ b/src/marqo_commons/model_registry/model_properties_data/languagebind_model_properties.py @@ -4,11 +4,15 @@ This file contains properties for OpenCLIP models. It is intended to be used in conjunction with the model registry and should not be used in isolation. """ + from typing import Dict, List, Optional -from marqo_commons.model_registry.model_properties_data.onnx_clip_properties import OnnxClipModelProperties -from marqo_commons.model_registry.model_properties_object import ModelProperties, VectorNumericType, Modality, \ - ModelType, T +from marqo_commons.model_registry.model_properties_object import ( + Modality, + ModelProperties, + ModelType, + VectorNumericType, +) from marqo_commons.model_registry.utils import convert_model_properties_to_dict @@ -17,40 +21,47 @@ class LanguagebindModelProperties(ModelProperties): default_memory_size: float = 8 modality: List[Modality] type: ModelType = ModelType.languagebind - pretrained: Optional[str] + pretrained: Optional[str] = None notes: str = "" @classmethod - def get_all_model_properties_objects(cls) -> Dict[str, T]: + def get_all_model_properties_objects( + cls, + ) -> Dict[str, "LanguagebindModelProperties"]: # use this link to find all the model_configs # https://github.com/mlfoundations/open_clip/tree/main/src/open_clip/model_configs return { - 'LanguageBind/Video_V1.5_FT_Audio_FT_Image': LanguagebindModelProperties( + "LanguageBind/Video_V1.5_FT_Audio_FT_Image": LanguagebindModelProperties( name="LanguageBind/Video_V1.5_FT_Audio_FT_Image", dimensions=768, - modality=[Modality.video, Modality.audio, Modality.text, Modality.image], + modality=[ + Modality.video, + Modality.audio, + Modality.text, + Modality.image, + ], ), - 'LanguageBind/Video_V1.5_FT_Audio_FT': LanguagebindModelProperties( + "LanguageBind/Video_V1.5_FT_Audio_FT": LanguagebindModelProperties( name="LanguageBind/Video_V1.5_FT_Audio_FT", dimensions=768, modality=[Modality.video, Modality.audio, Modality.text], ), - 'LanguageBind/Video_V1.5_FT_Image': LanguagebindModelProperties( + "LanguageBind/Video_V1.5_FT_Image": LanguagebindModelProperties( name="LanguageBind/Video_V1.5_FT_Image", dimensions=768, modality=[Modality.video, Modality.text, Modality.image], ), - 'LanguageBind/Audio_FT_Image': LanguagebindModelProperties( + "LanguageBind/Audio_FT_Image": LanguagebindModelProperties( name="LanguageBind/Audio_FT_Image", dimensions=768, modality=[Modality.audio, Modality.text, Modality.image], ), - 'LanguageBind/Audio_FT': LanguagebindModelProperties( + "LanguageBind/Audio_FT": LanguagebindModelProperties( name="LanguageBind/Audio_FT", dimensions=768, modality=[Modality.audio, Modality.text], ), - 'LanguageBind/Video_V1.5_FT': LanguagebindModelProperties( + "LanguageBind/Video_V1.5_FT": LanguagebindModelProperties( name="LanguageBind/Video_V1.5_FT", dimensions=768, modality=[Modality.video, Modality.text], diff --git a/src/marqo_commons/model_registry/model_properties_data/multilingual_clip_properties.py b/src/marqo_commons/model_registry/model_properties_data/multilingual_clip_properties.py index 727d6f1..48da165 100644 --- a/src/marqo_commons/model_registry/model_properties_data/multilingual_clip_properties.py +++ b/src/marqo_commons/model_registry/model_properties_data/multilingual_clip_properties.py @@ -5,10 +5,15 @@ It is intended to be used in conjunction with the model registry and should not be used in isolation. """ + from typing import Dict, List -from marqo_commons.model_registry.model_properties_object import ModelProperties, VectorNumericType, Modality, \ - ModelType, T +from marqo_commons.model_registry.model_properties_object import ( + Modality, + ModelProperties, + ModelType, + VectorNumericType, +) from marqo_commons.model_registry.utils import convert_model_properties_to_dict @@ -22,7 +27,9 @@ class MultilingualClipModelProperties(ModelProperties): notes: str = "" @classmethod - def get_all_model_properties_objects(cls) -> Dict[str, T]: + def get_all_model_properties_objects( + cls, + ) -> Dict[str, "MultilingualClipModelProperties"]: """This is moved here from the model registry to avoid a circular import""" # Models are from github repo # https://github.com/FreddeFrallan/Multilingual-CLIP diff --git a/src/marqo_commons/model_registry/model_properties_data/no_model.py b/src/marqo_commons/model_registry/model_properties_data/no_model.py index d845017..3ca87c1 100644 --- a/src/marqo_commons/model_registry/model_properties_data/no_model.py +++ b/src/marqo_commons/model_registry/model_properties_data/no_model.py @@ -4,10 +4,15 @@ This file contains random model properties. It is intended to be used in conjunction with the model registry and should not be used in isolation. """ + from typing import Dict, List -from marqo_commons.model_registry.model_properties_object import ModelProperties, VectorNumericType, Modality, \ - ModelType, T +from marqo_commons.model_registry.model_properties_object import ( + Modality, + ModelProperties, + ModelType, + VectorNumericType, +) from marqo_commons.model_registry.utils import convert_model_properties_to_dict @@ -20,17 +25,18 @@ class NoModelProperties(ModelProperties): notes: str = "" @classmethod - def get_all_model_properties_objects(cls) -> Dict[str, T]: + def get_all_model_properties_objects(cls) -> Dict[str, "NoModelProperties"]: return { - 'no_model': NoModelProperties( - name='no_model', + "no_model": NoModelProperties( + name="no_model", dimensions=0, notes="This is a special model no_model that requires users to provide 'dimensions'", type=ModelType.no_model, - tokens=0 # Assuming default value for tokens + tokens=0, # Assuming default value for tokens ) } + @convert_model_properties_to_dict def _get_no_model_properties() -> Dict: return NoModelProperties.get_all_model_properties_objects() diff --git a/src/marqo_commons/model_registry/model_properties_data/onnx_clip_properties.py b/src/marqo_commons/model_registry/model_properties_data/onnx_clip_properties.py index ce4c4e8..26ac124 100644 --- a/src/marqo_commons/model_registry/model_properties_data/onnx_clip_properties.py +++ b/src/marqo_commons/model_registry/model_properties_data/onnx_clip_properties.py @@ -4,10 +4,15 @@ This file defines properties for Onnx Clip models. It is intended to be used in conjunction with the model registry and should not be used in isolation. """ -from typing import Dict, Optional, List, Tuple -from marqo_commons.model_registry.model_properties_object import ModelProperties, VectorNumericType, Modality, \ - ModelType, T +from typing import Dict, List, Optional, Tuple + +from marqo_commons.model_registry.model_properties_object import ( + Modality, + ModelProperties, + ModelType, + VectorNumericType, +) from marqo_commons.model_registry.utils import convert_model_properties_to_dict @@ -25,7 +30,7 @@ class OnnxClipModelProperties(ModelProperties): image_std: Optional[Tuple[float, ...]] = None @classmethod - def get_all_model_properties_objects(cls) -> Dict[str, T]: + def get_all_model_properties_objects(cls) -> Dict[str, "OnnxClipModelProperties"]: return { "onnx32/openai/ViT-L/14": OnnxClipModelProperties( name="onnx32/openai/ViT-L/14", @@ -257,7 +262,7 @@ def get_all_model_properties_objects(cls) -> Dict[str, T]: visual_file="onnx32-open_clip-ViT-B-32-quickgelu-laion400m_e32-visual.onnx", textual_file="onnx32-open_clip-ViT-B-32-quickgelu-laion400m_e32-textual.onnx", resolution=224, - pretrained="laion400m_e32" + pretrained="laion400m_e32", ), "onnx16/open_clip/ViT-B-32-quickgelu/laion400m_e32": OnnxClipModelProperties( name="onnx16/open_clip/ViT-B-32-quickgelu/laion400m_e32", @@ -672,6 +677,7 @@ def get_all_model_properties_objects(cls) -> Dict[str, T]: } pass + @convert_model_properties_to_dict def _get_onnx_clip_properties() -> Dict: return OnnxClipModelProperties.get_all_model_properties_objects() diff --git a/src/marqo_commons/model_registry/model_properties_data/open_clip_properties.py b/src/marqo_commons/model_registry/model_properties_data/open_clip_properties.py index e01c1b9..c5bc50f 100644 --- a/src/marqo_commons/model_registry/model_properties_data/open_clip_properties.py +++ b/src/marqo_commons/model_registry/model_properties_data/open_clip_properties.py @@ -4,11 +4,15 @@ This file contains properties for OpenCLIP models. It is intended to be used in conjunction with the model registry and should not be used in isolation. """ + from typing import Dict, List, Optional -from marqo_commons.model_registry.model_properties_data.onnx_clip_properties import OnnxClipModelProperties -from marqo_commons.model_registry.model_properties_object import ModelProperties, VectorNumericType, Modality, \ - ModelType, T +from marqo_commons.model_registry.model_properties_object import ( + Modality, + ModelProperties, + ModelType, + VectorNumericType, +) from marqo_commons.model_registry.utils import convert_model_properties_to_dict @@ -17,11 +21,11 @@ class OpenClipModelProperties(ModelProperties): default_memory_size: float = 1.0 modality: List[Modality] = [Modality.text, Modality.image] type: ModelType = ModelType.open_clip - pretrained: Optional[str] + pretrained: Optional[str] = None notes: str = "" @classmethod - def get_all_model_properties_objects(cls) -> Dict[str, T]: + def get_all_model_properties_objects(cls) -> Dict[str, "OpenClipModelProperties"]: # use this link to find all the model_configs # https://github.com/mlfoundations/open_clip/tree/main/src/open_clip/model_configs return { @@ -319,19 +323,19 @@ def get_all_model_properties_objects(cls) -> Dict[str, T]: name="open_clip/convnext_xxlarge/laion2b_s34b_b82k_augreg", dimensions=1024, notes="open_clip models", - pretrained="laion2b_s34b_b82k_augreg" + pretrained="laion2b_s34b_b82k_augreg", ), "open_clip/convnext_xxlarge/laion2b_s34b_b82k_augreg_rewind": OpenClipModelProperties( name="open_clip/convnext_xxlarge/laion2b_s34b_b82k_augreg_rewind", dimensions=1024, notes="open_clip models", - pretrained="laion2b_s34b_b82k_augreg_rewind" + pretrained="laion2b_s34b_b82k_augreg_rewind", ), "open_clip/convnext_xxlarge/laion2b_s34b_b82k_augreg_soup": OpenClipModelProperties( name="open_clip/convnext_xxlarge/laion2b_s34b_b82k_augreg_soup", dimensions=1024, notes="open_clip models", - pretrained="laion2b_s34b_b82k_augreg_soup" + pretrained="laion2b_s34b_b82k_augreg_soup", ), "open_clip/coca_ViT-B-32/laion2b_s13b_b90k": OpenClipModelProperties( name="open_clip/coca_ViT-B-32/laion2b_s13b_b90k", @@ -355,7 +359,7 @@ def get_all_model_properties_objects(cls) -> Dict[str, T]: name="open_clip/coca_ViT-L-14/mscoco_finetuned_laion2b_s13b_b90k", dimensions=768, notes="open_clip models", - pretrained="mscoco_finetuned_laion2b_s13b_b90k" + pretrained="mscoco_finetuned_laion2b_s13b_b90k", ), # New models as of Marqo 2.7.0 # Added for: Open CLIP 2.24.0 @@ -363,107 +367,107 @@ def get_all_model_properties_objects(cls) -> Dict[str, T]: name="open_clip/ViT-SO400M-14-SigLIP-384/webli", dimensions=1152, notes="open_clip model: ViT-SO400M-14-SigLIP-384/webli", - pretrained="webli" + pretrained="webli", ), "open_clip/ViT-H-14-378-quickgelu/dfn5b": OpenClipModelProperties( name="open_clip/ViT-H-14-378-quickgelu/dfn5b", dimensions=1024, notes="open_clip model: ViT-H-14-378-quickgelu/dfn5b", - pretrained="dfn5b" + pretrained="dfn5b", ), "open_clip/ViT-L-16-SigLIP-384/webli": OpenClipModelProperties( name="open_clip/ViT-L-16-SigLIP-384/webli", dimensions=1024, notes="open_clip model: ViT-L-16-SigLIP-384/webli", - pretrained="webli" + pretrained="webli", ), "open_clip/ViT-H-14-quickgelu/dfn5b": OpenClipModelProperties( name="open_clip/ViT-H-14-quickgelu/dfn5b", dimensions=1024, notes="open_clip model: ViT-H-14-quickgelu/dfn5b", - pretrained="dfn5b" + pretrained="dfn5b", ), "open_clip/ViT-L-16-SigLIP-256/webli": OpenClipModelProperties( name="open_clip/ViT-L-16-SigLIP-256/webli", dimensions=1024, notes="open_clip model: ViT-L-16-SigLIP-256/webli", - pretrained="webli" + pretrained="webli", ), "open_clip/ViT-B-16-SigLIP-512/webli": OpenClipModelProperties( name="open_clip/ViT-B-16-SigLIP-512/webli", dimensions=768, notes="open_clip model: ViT-B-16-SigLIP-512/webli", - pretrained="webli" + pretrained="webli", ), "open_clip/ViT-B-16-SigLIP-384/webli": OpenClipModelProperties( name="open_clip/ViT-B-16-SigLIP-384/webli", dimensions=768, notes="open_clip model: ViT-B-16-SigLIP-384/webli", - pretrained="webli" + pretrained="webli", ), "open_clip/ViT-B-16-SigLIP-256/webli": OpenClipModelProperties( name="open_clip/ViT-B-16-SigLIP-256/webli", dimensions=768, notes="open_clip model: ViT-B-16-SigLIP-256/webli", - pretrained="webli" + pretrained="webli", ), "open_clip/ViT-B-16-SigLIP/webli": OpenClipModelProperties( name="open_clip/ViT-B-16-SigLIP/webli", dimensions=768, notes="open_clip model: ViT-B-16-SigLIP/webli", - pretrained="webli" + pretrained="webli", ), "open_clip/ViT-L-14-quickgelu/dfn2b": OpenClipModelProperties( name="open_clip/ViT-L-14-quickgelu/dfn2b", dimensions=768, notes="open_clip model: ViT-L-14-quickgelu/dfn2b", - pretrained="dfn2b" + pretrained="dfn2b", ), "open_clip/EVA02-L-14-336/merged2b_s6b_b61k": OpenClipModelProperties( name="open_clip/EVA02-L-14-336/merged2b_s6b_b61k", dimensions=768, notes="open_clip model: EVA02-L-14-336/merged2b_s6b_b61k", - pretrained="merged2b_s6b_b61k" + pretrained="merged2b_s6b_b61k", ), "open_clip/EVA02-B-16/merged2b_s8b_b131k": OpenClipModelProperties( name="open_clip/EVA02-B-16/merged2b_s8b_b131k", dimensions=512, notes="open_clip model: EVA02-B-16/merged2b_s8b_b131k", - pretrained="merged2b_s8b_b131k" + pretrained="merged2b_s8b_b131k", ), "open_clip/EVA02-L-14/merged2b_s4b_b131k": OpenClipModelProperties( name="open_clip/EVA02-L-14/merged2b_s4b_b131k", dimensions=768, notes="open_clip model: EVA02-L-14/merged2b_s4b_b131k", - pretrained="merged2b_s4b_b131k" + pretrained="merged2b_s4b_b131k", ), "open_clip/ViT-B-16-quickgelu/metaclip_fullcc": OpenClipModelProperties( name="open_clip/ViT-B-16-quickgelu/metaclip_fullcc", dimensions=512, notes="open_clip model: ViT-B-16-quickgelu/metaclip_fullcc", - pretrained="metaclip_fullcc" + pretrained="metaclip_fullcc", ), "open_clip/ViT-L-14-CLIPA-336/datacomp1b": OpenClipModelProperties( name="open_clip/ViT-L-14-CLIPA-336/datacomp1b", dimensions=768, notes="open_clip model: ViT-L-14-CLIPA-336/datacomp1b", - pretrained="datacomp1b" + pretrained="datacomp1b", ), "open_clip/ViT-B-32-256/datacomp_s34b_b86k": OpenClipModelProperties( name="open_clip/ViT-B-32-256/datacomp_s34b_b86k", dimensions=512, notes="open_clip model: ViT-B-32-256/datacomp_s34b_b86k", - pretrained="datacomp_s34b_b86k" + pretrained="datacomp_s34b_b86k", ), "Marqo/marqo-fashionCLIP": OpenClipModelProperties( - name = "hf-hub:Marqo/marqo-fashionCLIP", - dimensions = 512, - note = "Marqo's fashionCLIP model", + name="hf-hub:Marqo/marqo-fashionCLIP", + dimensions=512, + note="Marqo's fashionCLIP model", ), "Marqo/marqo-fashionSigLIP": OpenClipModelProperties( - name = "hf-hub:Marqo/marqo-fashionSigLIP", - dimensions = 768, - note = "Marqo's fashionSigLIP model", + name="hf-hub:Marqo/marqo-fashionSigLIP", + dimensions=768, + note="Marqo's fashionSigLIP model", ), "visheratin/nllb-clip-base-siglip": OpenClipModelProperties( name="hf-hub:visheratin/nllb-clip-base-siglip", @@ -484,8 +488,8 @@ def get_all_model_properties_objects(cls) -> Dict[str, T]: name="hf-hub:visheratin/nllb-siglip-mrl-large", dimensions=1152, note="A multilingual CLIP model", - ) - } + ), + } @convert_model_properties_to_dict diff --git a/src/marqo_commons/model_registry/model_properties_data/random_properties.py b/src/marqo_commons/model_registry/model_properties_data/random_properties.py index f929c87..5525fb2 100644 --- a/src/marqo_commons/model_registry/model_properties_data/random_properties.py +++ b/src/marqo_commons/model_registry/model_properties_data/random_properties.py @@ -4,10 +4,15 @@ This file contains random model properties. It is intended to be used in conjunction with the model registry and should not be used in isolation. """ + from typing import Dict, List -from marqo_commons.model_registry.model_properties_object import ModelProperties, VectorNumericType, Modality, \ - ModelType, T +from marqo_commons.model_registry.model_properties_object import ( + Modality, + ModelProperties, + ModelType, + VectorNumericType, +) from marqo_commons.model_registry.utils import convert_model_properties_to_dict @@ -20,8 +25,8 @@ class RandomModelProperties(ModelProperties): notes: str = "" @classmethod - def get_all_model_properties_objects(cls) -> Dict[str, T]: - return { + def get_all_model_properties_objects(cls) -> Dict[str, "RandomModelProperties"]: + return { "random": RandomModelProperties( name="random", dimensions=384, @@ -44,6 +49,7 @@ def get_all_model_properties_objects(cls) -> Dict[str, T]: ), } + @convert_model_properties_to_dict def _get_random_properties() -> Dict: return RandomModelProperties.get_all_model_properties_objects() diff --git a/src/marqo_commons/model_registry/model_properties_data/sbert_onnx_properties.py b/src/marqo_commons/model_registry/model_properties_data/sbert_onnx_properties.py index bf07169..a325c7e 100644 --- a/src/marqo_commons/model_registry/model_properties_data/sbert_onnx_properties.py +++ b/src/marqo_commons/model_registry/model_properties_data/sbert_onnx_properties.py @@ -4,10 +4,15 @@ This file contains properties for SBERT ONNX models. It is intended to be used in conjunction with the model registry and should not be used in isolation. """ + from typing import Dict, List -from marqo_commons.model_registry.model_properties_object import ModelProperties, VectorNumericType, Modality, \ - ModelType, T +from marqo_commons.model_registry.model_properties_object import ( + Modality, + ModelProperties, + ModelType, + VectorNumericType, +) from marqo_commons.model_registry.utils import convert_model_properties_to_dict @@ -20,7 +25,7 @@ class SbertOnnxProperties(ModelProperties): notes: str = "" @classmethod - def get_all_model_properties_objects(cls) -> Dict[str, T]: + def get_all_model_properties_objects(cls) -> Dict[str, "SbertOnnxProperties"]: return { "onnx/all-MiniLM-L6-v1": SbertOnnxProperties( name="sentence-transformers/all-MiniLM-L6-v1", diff --git a/src/marqo_commons/model_registry/model_properties_data/sbert_properties.py b/src/marqo_commons/model_registry/model_properties_data/sbert_properties.py index e7a2982..5ab2f00 100644 --- a/src/marqo_commons/model_registry/model_properties_data/sbert_properties.py +++ b/src/marqo_commons/model_registry/model_properties_data/sbert_properties.py @@ -4,10 +4,15 @@ This file contains SBERT model properties. It is intended to be used in conjunction with the model registry and should not be used in isolation. """ + from typing import Dict, List -from marqo_commons.model_registry.model_properties_object import ModelProperties, VectorNumericType, Modality, \ - ModelType, T +from marqo_commons.model_registry.model_properties_object import ( + Modality, + ModelProperties, + ModelType, + VectorNumericType, +) from marqo_commons.model_registry.utils import convert_model_properties_to_dict @@ -20,7 +25,7 @@ class SbertProperties(ModelProperties): notes: str = "" @classmethod - def get_all_model_properties_objects(cls) -> Dict[str, T]: + def get_all_model_properties_objects(cls) -> Dict[str, "SbertProperties"]: return { "sentence-transformers/all-MiniLM-L6-v1": SbertProperties( name="sentence-transformers/all-MiniLM-L6-v1", @@ -84,6 +89,7 @@ def get_all_model_properties_objects(cls) -> Dict[str, T]: ), } + @convert_model_properties_to_dict def _get_sbert_properties() -> Dict: return SbertProperties.get_all_model_properties_objects() diff --git a/src/marqo_commons/model_registry/model_properties_data/test_properties.py b/src/marqo_commons/model_registry/model_properties_data/test_properties.py index ea1995c..c133ba8 100644 --- a/src/marqo_commons/model_registry/model_properties_data/test_properties.py +++ b/src/marqo_commons/model_registry/model_properties_data/test_properties.py @@ -4,10 +4,15 @@ This file contains test model properties. It is intended to be used in conjunction with the model registry and should not be used in isolation. """ + from typing import Dict, List -from marqo_commons.model_registry.model_properties_object import ModelProperties, VectorNumericType, Modality, \ - ModelType, T +from marqo_commons.model_registry.model_properties_object import ( + Modality, + ModelProperties, + ModelType, + VectorNumericType, +) from marqo_commons.model_registry.utils import convert_model_properties_to_dict @@ -21,7 +26,7 @@ class SbertTestModelProperties(ModelProperties): text_chunk_prefix: str = "" @classmethod - def get_all_model_properties_objects(cls) -> Dict[str, T]: + def get_all_model_properties_objects(cls) -> Dict[str, "SbertTestModelProperties"]: return { "sentence-transformers/test": SbertTestModelProperties( name="sentence-transformers/all-MiniLM-L6-v1", @@ -40,9 +45,10 @@ def get_all_model_properties_objects(cls) -> Dict[str, T]: type=ModelType.test, text_query_prefix="test query: ", text_chunk_prefix="test passage: ", - ) + ), } + @convert_model_properties_to_dict def _get_sbert_test_properties() -> Dict: return SbertTestModelProperties.get_all_model_properties_objects() diff --git a/src/marqo_commons/model_registry/model_properties_object.py b/src/marqo_commons/model_registry/model_properties_object.py index 298506e..6e1dc8c 100644 --- a/src/marqo_commons/model_registry/model_properties_object.py +++ b/src/marqo_commons/model_registry/model_properties_object.py @@ -1,11 +1,13 @@ from abc import ABC, abstractmethod -from typing import List, TypeVar, Dict +from typing import Dict, List, TypeVar from pydantic import BaseModel, Field -from marqo_commons.shared_utils.enums import Modality, VectorNumericType, ModelType + from marqo_commons.shared_utils import constants +from marqo_commons.shared_utils.enums import Modality, ModelType, VectorNumericType + +T = TypeVar("T", bound="ModelProperties") -T = TypeVar('T', bound='ModelProperties') class ModelProperties(BaseModel, ABC): name: str = Field(..., title="Model name") @@ -18,7 +20,9 @@ class ModelProperties(BaseModel, ABC): memory_size: float = Field(..., title="Model memory size") modality: List[Modality] = Field(..., title="Model modality") - vector_numeric_type: VectorNumericType = Field(..., title="Model vector numeric type") + vector_numeric_type: VectorNumericType = Field( + ..., title="Model vector numeric type" + ) def __init__(self, **kwargs): if "memory_size" not in kwargs: @@ -51,7 +55,7 @@ def _get_default_model_size(cls, name) -> float: return cls.__fields__["default_memory_size"].default def to_dict(self): - """ Function returns a dict of the model properties without the default values. + """Function returns a dict of the model properties without the default values. Handles deletion of default values by collecting the keys of the dict that start with "default". And then deleting them from the dict. Implemented this way to avoid dict size changes during iteration. diff --git a/tests/test_model_properties.py b/tests/test_model_properties.py index 87dbfa0..5444015 100644 --- a/tests/test_model_properties.py +++ b/tests/test_model_properties.py @@ -1,32 +1,60 @@ import json from unittest import TestCase -from marqo_commons.model_registry.model_properties_data.clip_properties import _get_clip_properties -from marqo_commons.model_registry.model_properties_data.fp16_clip_properties import _get_fp16_clip_properties -from marqo_commons.model_registry.model_properties_data.hf_properties import _get_hf_properties -from marqo_commons.model_registry.model_properties_data.multilingual_clip_properties import _get_multilingual_clip_properties -from marqo_commons.model_registry.model_properties_data.no_model import _get_no_model_properties -from marqo_commons.model_registry.model_properties_data.onnx_clip_properties import _get_onnx_clip_properties -from marqo_commons.model_registry.model_properties_data.open_clip_properties import _get_open_clip_properties -from marqo_commons.model_registry.model_properties_data.random_properties import _get_random_properties -from marqo_commons.model_registry.model_properties_data.sbert_onnx_properties import _get_sbert_onnx_properties -from marqo_commons.model_registry.model_properties_data.sbert_properties import _get_sbert_properties -from marqo_commons.model_registry.model_properties_data.test_properties import _get_sbert_test_properties -from marqo_commons.model_registry.model_properties_data.languagebind_model_properties import _get_languagebind_properties - -from marqo_commons.model_registry.model_registry import get_model_properties_dict, get_model_properties_json +from marqo_commons.model_registry.model_properties_data.clip_properties import ( + _get_clip_properties, +) +from marqo_commons.model_registry.model_properties_data.fp16_clip_properties import ( + _get_fp16_clip_properties, +) +from marqo_commons.model_registry.model_properties_data.hf_properties import ( + _get_hf_properties, +) +from marqo_commons.model_registry.model_properties_data.languagebind_model_properties import ( + _get_languagebind_properties, +) +from marqo_commons.model_registry.model_properties_data.multilingual_clip_properties import ( + _get_multilingual_clip_properties, +) +from marqo_commons.model_registry.model_properties_data.no_model import ( + _get_no_model_properties, +) +from marqo_commons.model_registry.model_properties_data.onnx_clip_properties import ( + _get_onnx_clip_properties, +) +from marqo_commons.model_registry.model_properties_data.open_clip_properties import ( + _get_open_clip_properties, +) +from marqo_commons.model_registry.model_properties_data.random_properties import ( + _get_random_properties, +) +from marqo_commons.model_registry.model_properties_data.sbert_onnx_properties import ( + _get_sbert_onnx_properties, +) +from marqo_commons.model_registry.model_properties_data.sbert_properties import ( + _get_sbert_properties, +) +from marqo_commons.model_registry.model_properties_data.test_properties import ( + _get_sbert_test_properties, +) +from marqo_commons.model_registry.model_registry import ( + get_model_properties_dict, + get_model_properties_json, +) from marqo_commons.shared_utils.enums import ModelType class TestModelProperties(TestCase): def test_models_count(self): - """ this test ensures that model names are not duplicated and thus the model registry is valid """ + """this test ensures that model names are not duplicated and thus the model registry is valid""" model_properties = get_model_properties_dict() total_count_from_model_registry = len(model_properties) total_count_from_all_model_properties = 0 sbert_model_properties = _get_sbert_properties() - sbert_model_properties.update({k.split('/')[-1]: v for k, v in sbert_model_properties.items()}) + sbert_model_properties.update( + {k.split("/")[-1]: v for k, v in sbert_model_properties.items()} + ) total_count_from_all_model_properties += len(sbert_model_properties) total_count_from_all_model_properties += len(_get_sbert_onnx_properties()) @@ -36,63 +64,83 @@ def test_models_count(self): total_count_from_all_model_properties += len(_get_hf_properties()) total_count_from_all_model_properties += len(_get_open_clip_properties()) total_count_from_all_model_properties += len(_get_onnx_clip_properties()) - total_count_from_all_model_properties += len(_get_multilingual_clip_properties()) + total_count_from_all_model_properties += len( + _get_multilingual_clip_properties() + ) total_count_from_all_model_properties += len(_get_fp16_clip_properties()) total_count_from_all_model_properties += len(_get_no_model_properties()) total_count_from_all_model_properties += len(_get_languagebind_properties()) self.assertEqual( - total_count_from_all_model_properties, total_count_from_model_registry, + total_count_from_all_model_properties, + total_count_from_model_registry, "Number of models in get_model_properties_dict is not equal to total " - "concatenated number of models from all model_properties.py files") + "concatenated number of models from all model_properties.py files", + ) def test_all_models_have_text_modality(self): - """ this test ensures that all models have text modality """ + """this test ensures that all models have text modality""" model_properties = get_model_properties_dict() for model_name, model_property in model_properties.items(): - self.assertIn("text", model_property["modality"], f"Model {model_name} does not have text modality") + self.assertIn( + "text", + model_property["modality"], + f"Model {model_name} does not have text modality", + ) def test_all_models_have_memory_size(self): - """ this test ensures that all models have memory_size """ + """this test ensures that all models have memory_size""" model_properties = get_model_properties_dict() for model_name, model_property in model_properties.items(): - self.assertNotIn(model_property["memory_size"], [None, 0], f"Model {model_name} does not have memory_size") + self.assertNotIn( + model_property["memory_size"], + [None, 0], + f"Model {model_name} does not have memory_size", + ) def test_all_model_types_are_used(self): - """ this test ensures that all model types are used """ + """this test ensures that all model types are used""" model_properties = get_model_properties_dict() - model_types = [model_property["type"] for model_property in model_properties.values()] + model_types = [ + model_property["type"] for model_property in model_properties.values() + ] for model_type in ModelType: - self.assertIn(model_type, model_types, f"Model type {model_type} is not used") + self.assertIn( + model_type, model_types, f"Model type {model_type} is not used" + ) def test_serialized_and_deserialized_model_properties_are_equal(self): - """ this test ensures that all models passed to json are correct and all keys are unique """ + """this test ensures that all models passed to json are correct and all keys are unique""" model_properties = get_model_properties_dict() model_properties_json = get_model_properties_json() deserialized_model_properties_json = json.loads(model_properties_json) for model_name, model_property in model_properties.items(): # check if model name is in deserialized model properties self.assertIn( - model_name, deserialized_model_properties_json, - f"Model {model_name} is not in deserialized model properties" + model_name, + deserialized_model_properties_json, + f"Model {model_name} is not in deserialized model properties", ) for key, value in model_property.items(): # check if key is in deserialized model properties self.assertIn( - key, deserialized_model_properties_json[model_name], - f"Model {model_name} does not have key {key} in deserialized model properties" + key, + deserialized_model_properties_json[model_name], + f"Model {model_name} does not have key {key} in deserialized model properties", ) if type(deserialized_model_properties_json[model_name][key]) is list: # check if all items are in deserialized model properties for item in value: self.assertIn( - item, deserialized_model_properties_json[model_name][key], - f"Model {model_name} has item {item} for key {key} but deserialized model properties does not have it" + item, + deserialized_model_properties_json[model_name][key], + f"Model {model_name} has item {item} for key {key} but deserialized model properties does not have it", ) else: # check if value is equal to deserialized model properties self.assertEqual( - value, deserialized_model_properties_json[model_name][key], - f"Model {model_name} has value {value} for key {key} but deserialized model properties has value {deserialized_model_properties_json[model_name][key]}" + value, + deserialized_model_properties_json[model_name][key], + f"Model {model_name} has value {value} for key {key} but deserialized model properties has value {deserialized_model_properties_json[model_name][key]}", ) def test_old_model_registry_matches_new(self): @@ -100,7 +148,10 @@ def test_old_model_registry_matches_new(self): "model_size": "memory_size", "note": "notes", } - with open("tests/data/old_serialized_model_registry_0df0edd2400a1b5b40598ee109f72a6ea261441b.json", "r") as f: + with open( + "tests/data/old_serialized_model_registry_0df0edd2400a1b5b40598ee109f72a6ea261441b.json", + "r", + ) as f: old_model_registry_dict = json.load(f) new_model_registry_json = get_model_properties_json() new_model_registry_dict = json.loads(new_model_registry_json) @@ -110,7 +161,7 @@ def test_old_model_registry_matches_new(self): self.assertEqual( old_model_registry_dict[model][key], new_model_registry_dict[model][old_to_new_values_mappings[key]], - f"Model {model} has different value for key {key} in old and new model registry" + f"Model {model} has different value for key {key} in old and new model registry", ) else: if key == "token": # token key was removed from model registry @@ -118,5 +169,5 @@ def test_old_model_registry_matches_new(self): self.assertEqual( old_model_registry_dict[model][key], new_model_registry_dict[model][key], - f"Model {model} has different value for key {key} in old and new model registry" + f"Model {model} has different value for key {key} in old and new model registry", )