Skip to content

Commit

Permalink
Add SigLIP imagePreprocessor config
Browse files Browse the repository at this point in the history
SigLIP models do not use the default OpenCLIP preprocessor which can
result in failure to load an index properly.

Also set size as per the base model's image_size configuration.

See https://docs.marqo.ai/latest/models/marqo/bring-your-own-model/#example-2-load-a-custom-openclip-model-from-a-public-url-with-custom-configurations

> It is very important to provide the correct imagePreprocessor
> configuration to match the model architecture as Marqo can not infer
> the correct configuration from the model name when you load a
> checkpoint file and will use the default configuration("OpenCLIP")
  • Loading branch information
ishaaq committed Dec 11, 2024
1 parent 33cfb7d commit c0d5278
Show file tree
Hide file tree
Showing 2 changed files with 42 additions and 0 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@ class OpenClipModelProperties(ModelProperties):
modality: List[Modality] = [Modality.text, Modality.image]
type: ModelType = ModelType.open_clip
pretrained: Optional[str] = None
imagePreprocessor: Optional[str] = None # if unspecified Marqo will default to "OpenCLIP"
size: Optional[int] = None # image_size for image preprocessor
notes: str = ""

@classmethod
Expand Down Expand Up @@ -368,6 +370,8 @@ def get_all_model_properties_objects(cls) -> Dict[str, "OpenClipModelProperties"
dimensions=1152,
notes="open_clip model: ViT-SO400M-14-SigLIP-384/webli",
pretrained="webli",
imagePreprocessor="SigLIP",
size=384,
),
"open_clip/ViT-H-14-378-quickgelu/dfn5b": OpenClipModelProperties(
name="open_clip/ViT-H-14-378-quickgelu/dfn5b",
Expand All @@ -380,6 +384,8 @@ def get_all_model_properties_objects(cls) -> Dict[str, "OpenClipModelProperties"
dimensions=1024,
notes="open_clip model: ViT-L-16-SigLIP-384/webli",
pretrained="webli",
imagePreprocessor="SigLIP",
size=384,
),
"open_clip/ViT-H-14-quickgelu/dfn5b": OpenClipModelProperties(
name="open_clip/ViT-H-14-quickgelu/dfn5b",
Expand All @@ -392,30 +398,40 @@ def get_all_model_properties_objects(cls) -> Dict[str, "OpenClipModelProperties"
dimensions=1024,
notes="open_clip model: ViT-L-16-SigLIP-256/webli",
pretrained="webli",
imagePreprocessor="SigLIP",
size=256,
),
"open_clip/ViT-B-16-SigLIP-512/webli": OpenClipModelProperties(
name="open_clip/ViT-B-16-SigLIP-512/webli",
dimensions=768,
notes="open_clip model: ViT-B-16-SigLIP-512/webli",
pretrained="webli",
imagePreprocessor="SigLIP",
size=512,
),
"open_clip/ViT-B-16-SigLIP-384/webli": OpenClipModelProperties(
name="open_clip/ViT-B-16-SigLIP-384/webli",
dimensions=768,
notes="open_clip model: ViT-B-16-SigLIP-384/webli",
pretrained="webli",
imagePreprocessor="SigLIP",
size=384,
),
"open_clip/ViT-B-16-SigLIP-256/webli": OpenClipModelProperties(
name="open_clip/ViT-B-16-SigLIP-256/webli",
dimensions=768,
notes="open_clip model: ViT-B-16-SigLIP-256/webli",
pretrained="webli",
imagePreprocessor="SigLIP",
size=256,
),
"open_clip/ViT-B-16-SigLIP/webli": OpenClipModelProperties(
name="open_clip/ViT-B-16-SigLIP/webli",
dimensions=768,
notes="open_clip model: ViT-B-16-SigLIP/webli",
pretrained="webli",
imagePreprocessor="SigLIP",
size=224,
),
"open_clip/ViT-L-14-quickgelu/dfn2b": OpenClipModelProperties(
name="open_clip/ViT-L-14-quickgelu/dfn2b",
Expand Down Expand Up @@ -468,26 +484,36 @@ def get_all_model_properties_objects(cls) -> Dict[str, "OpenClipModelProperties"
name="hf-hub:Marqo/marqo-fashionSigLIP",
dimensions=768,
note="Marqo's fashionSigLIP model",
imagePreprocessor="SigLIP",
size=224,
),
"visheratin/nllb-clip-base-siglip": OpenClipModelProperties(
name="hf-hub:visheratin/nllb-clip-base-siglip",
dimensions=768,
note="A multilingual CLIP model",
imagePreprocessor="SigLIP",
size=384,
),
"visheratin/nllb-siglip-mrl-base": OpenClipModelProperties(
name="hf-hub:visheratin/nllb-siglip-mrl-base",
dimensions=768,
note="A multilingual CLIP model",
imagePreprocessor="SigLIP",
size=384,
),
"visheratin/nllb-clip-large-siglip": OpenClipModelProperties(
name="hf-hub:visheratin/nllb-clip-large-siglip",
dimensions=1152,
note="A multilingual CLIP model",
imagePreprocessor="SigLIP",
size=384,
),
"visheratin/nllb-siglip-mrl-large": OpenClipModelProperties(
name="hf-hub:visheratin/nllb-siglip-mrl-large",
dimensions=1152,
note="A multilingual CLIP model",
imagePreprocessor="SigLIP",
size=384,
),
}

Expand Down
16 changes: 16 additions & 0 deletions tests/test_model_properties.py
Original file line number Diff line number Diff line change
Expand Up @@ -171,3 +171,19 @@ def test_old_model_registry_matches_new(self):
new_model_registry_dict[model][key],
f"Model {model} has different value for key {key} in old and new model registry",
)

def test_siglip_models_have_imageprocesser_set(self):
open_clip_properties = _get_open_clip_properties()
for model_name, model_property in open_clip_properties.items():
if "siglip" in model_name.lower():
self.assertEqual(
model_property["imageProcessor"],
"SigLIP",
f"Model {model_name} does not have imageProcessor set to SigLIP",
)
else:
# not a SigLIP model, imageProcessor should be None
self.assertIsNone(
model_property.get("imageProcessor"),
f"Model {model_name} has imageProcessor set even though it is not a SigLIP model",
)

0 comments on commit c0d5278

Please sign in to comment.