Merge branch 'mainline' into farshid/disable-auto-create-index

marqo-ai · Jun 26, 2023 · 8e7035b · 8e7035b
2 parents 90fa67c + 41de2fb
commit 8e7035b
Show file tree

Hide file tree

Showing 54 changed files with 1,539 additions and 604 deletions.
diff --git a/src/marqo/config.py b/src/marqo/config.py
@@ -7,8 +7,6 @@ def __init__(
         self,
         url: str,
         timeout: Optional[int] = None,
-        indexing_device: Optional[Union[enums.Device, str]] = None,
-        search_device: Optional[Union[enums.Device, str]] = None,
         backend: Optional[Union[enums.SearchDb, str]] = None,
     ) -> None:
         """
@@ -20,10 +18,6 @@ def __init__(
         self.cluster_is_remote = False
         self.url = self.set_url(url)
         self.timeout = timeout
-        default_device = enums.Device.cpu
-
-        self.indexing_device = indexing_device if indexing_device is not None else default_device
-        self.search_device = search_device if search_device is not None else default_device
         self.backend = backend if backend is not None else enums.SearchDb.opensearch
 
     def set_url(self, url):

diff --git a/src/marqo/s2_inference/clip_utils.py b/src/marqo/s2_inference/clip_utils.py
@@ -17,6 +17,7 @@
 from marqo.s2_inference.processing.custom_clip_utils import HFTokenizer, download_model
 from torchvision.transforms import InterpolationMode
 from marqo.s2_inference.configs import ModelCache
+from marqo.errors import InternalError
 from marqo.tensor_search.telemetry import RequestMetrics, RequestMetricsStore
 
 logger = get_logger(__name__)
@@ -199,10 +200,13 @@ class CLIP:
     conveniance class wrapper to make clip work easily for both text and image encoding
     """
 
-    def __init__(self, model_type: str = "ViT-B/32", device: str = 'cpu',  embedding_dim: int = None,
+    def __init__(self, model_type: str = "ViT-B/32", device: str = None,  embedding_dim: int = None,
                             truncate: bool = True, **kwargs) -> None:
 
         self.model_type = model_type
+
+        if not device:
+            raise InternalError("`device` is required for loading CLIP models!")
         self.device = device
         self.model = None
         self.tokenizer = None
@@ -247,6 +251,7 @@ def load(self) -> None:
         path = self.model_properties.get("localpath", None) or self.model_properties.get("url",None)
 
         if path is None and not model_location_presence:
+            # We must load the model into CPU then transfer it to the desired device, always
             # The original method to load the openai clip model
             # https://github.com/openai/CLIP/issues/30
             self.model, self.preprocess = clip.load(self.model_type, device='cpu', jit=False, download_root=ModelCache.clip_cache_path)
@@ -281,6 +286,7 @@ def custom_clip_load(self):
         self.model_name = self.model_properties.get("name", None)
 
         logger.info(f"The name of the custom clip model is {self.model_name}. We use openai clip load")
+        # We must load the model into CPU then transfer it to the desired device, always
         model, preprocess = clip.load(name=self.model_path, device="cpu", jit= self.jit, download_root=ModelCache.clip_cache_path)
         model = model.to(self.device)
         return model, preprocess
@@ -364,7 +370,7 @@ def encode(self, inputs: Union[str, ImageType, List[Union[str, ImageType]]],
 
 
 class FP16_CLIP(CLIP):
-    def __init__(self, model_type: str = "fp16/ViT-B/32", device: str = 'cuda',  embedding_dim: int = None,
+    def __init__(self, model_type: str = "fp16/ViT-B/32", device: str = None,  embedding_dim: int = None,
                             truncate: bool = True, **kwargs) -> None:
         super().__init__(model_type, device, embedding_dim, truncate, **kwargs)
         '''This class loads the provided clip model directly from cuda in float16 version. The inference time is halved
@@ -390,7 +396,7 @@ def load(self) -> None:
 
 
 class OPEN_CLIP(CLIP):
-    def __init__(self, model_type: str = "open_clip/ViT-B-32-quickgelu/laion400m_e32", device: str = 'cpu',  embedding_dim: int = None,
+    def __init__(self, model_type: str = "open_clip/ViT-B-32-quickgelu/laion400m_e32", device: str = None,  embedding_dim: int = None,
                             truncate: bool = True, **kwargs) -> None:
         super().__init__(model_type, device,  embedding_dim, truncate , **kwargs)
         self.model_name = model_type.split("/", 3)[1] if model_type.startswith("open_clip/") else model_type
@@ -511,9 +517,12 @@ def encode_text(self, sentence: Union[str, List[str]], normalize=True) -> FloatT
 
 
 class MULTILINGUAL_CLIP(CLIP):
-    def __init__(self, model_type: str = "multilingual-clip/ViT-L/14", device: str = 'cpu',  embedding_dim: int = None,
+    def __init__(self, model_type: str = "multilingual-clip/ViT-L/14", device: str = None,  embedding_dim: int = None,
                             truncate: bool = True, **kwargs) -> None:
 
+        if not device:
+            raise InternalError("`device` is required for loading MULTILINGUAL CLIP models!")
+
         self.model_name = model_type
         self.model_info = get_multilingual_clip_properties()[self.model_name]
         self.visual_name = self.model_info["visual_model"]
@@ -526,6 +535,8 @@ def __init__(self, model_type: str = "multilingual-clip/ViT-L/14", device: str =
     def load(self) -> None:
         if self.visual_name.startswith("openai/"):
             clip_name = self.visual_name.replace("openai/", "")
+            # We must load the model into CPU then transfer it to the desired device, always
+            # The reason is this issue: https://github.com/openai/CLIP/issues/30
             self.visual_model, self.preprocess = clip.load(name = clip_name, device = "cpu", jit = False, download_root=ModelCache.clip_cache_path)
             self.visual_model = self.visual_model.to(self.device)
             self.visual_model = self.visual_model.visual

diff --git a/src/marqo/s2_inference/configs.py b/src/marqo/s2_inference/configs.py
@@ -27,11 +27,6 @@ class Ignore:
 
     files = ('flax_model.msgpack', 'rust_model.ot', 'tf_model.h5')
 
-
-
-def get_default_device():
-    return 'cpu'
-
 def get_default_normalization():
     return True
 

diff --git a/src/marqo/s2_inference/hf_utils.py b/src/marqo/s2_inference/hf_utils.py
@@ -25,7 +25,7 @@ class HF_MODEL(Model):
 
     def __init__(self, *args, **kwargs) -> None:
         super().__init__(*args, **kwargs)
-
+        
         if self.max_seq_length is None:
             self.max_seq_length = 128
         self.model_properties = kwargs.get("model_properties", dict())

diff --git a/src/marqo/s2_inference/onnx_clip_utils.py b/src/marqo/s2_inference/onnx_clip_utils.py
@@ -18,6 +18,7 @@
 from zipfile import ZipFile
 from huggingface_hub.utils import RevisionNotFoundError,RepositoryNotFoundError, EntryNotFoundError, LocalEntryNotFoundError
 from marqo.s2_inference.errors import ModelDownloadError
+from marqo.errors import InternalError
 
 # Loading shared functions from clip_utils.py. This part should be decoupled from models in the future
 from marqo.s2_inference.clip_utils import get_allowed_image_types, format_and_load_CLIP_image, \
@@ -56,11 +57,13 @@ class CLIP_ONNX(object):
     Load a clip model and convert it to onnx version for faster inference
     """
 
-    def __init__(self, model_name="onnx32/openai/ViT-L/14", device="cpu", embedding_dim: int = None,
+    def __init__(self, model_name: str ="onnx32/openai/ViT-L/14", device: str = None, embedding_dim: int = None,
                  truncate: bool = True,
                  load=True, **kwargs):
         self.model_name = model_name
         self.onnx_type, self.source, self.clip_model = self.model_name.split("/", 2)
+        if not device:
+            raise InternalError("`device` is required for loading CLIP ONNX models!")
         self.device = device
         self.truncate = truncate
         self.provider = ['CUDAExecutionProvider', "CPUExecutionProvider"] if self.device.startswith("cuda") else [

diff --git a/src/marqo/s2_inference/processing/DINO_utils.py b/src/marqo/s2_inference/processing/DINO_utils.py
@@ -7,6 +7,7 @@
 from marqo.s2_inference.s2_inference import get_logger
 from marqo.s2_inference.types import Dict, List, Union, ImageType, Tuple, FloatTensor, ndarray, Any, Literal
 from marqo.s2_inference.errors import ModelLoadError
+from marqo.errors import InternalError
 
 logger = get_logger(__name__)
 
@@ -82,20 +83,23 @@ def _get_DINO_transform(image_size: Tuple = (224, 224)) -> Any:
     ])
 
 def DINO_inference(model: Any, transform: Any, img: ImageType = None, 
-                        patch_size: int = None, device: str = "cpu") -> FloatTensor:
+                        patch_size: int = None, device: str = None) -> FloatTensor:
     """runs inference for a model, transform and image
 
     Args:
         model (Any): ('vit_small', 'vit_base')
         transform (Any): _get_DINO_transform
         img (ImageType, optional): the image to infer on. Defaults to None.
         patch_size (int, optional): the patch size the model architecture uses. Defaults to None.
-        device (str, optional): device for the model to run on. Defaults to "cpu".
+        device (str): device for the model to run on. Required to be set
 
     Returns:
         FloatTensor: returns N x w x h tensor
     """
 
+    if not device:
+        raise InternalError("`device` is required for DINO inference!")
+
     img = transform(img)
 
     # make the image divisible by the patch size

diff --git a/src/marqo/s2_inference/processing/image.py b/src/marqo/s2_inference/processing/image.py
@@ -37,6 +37,8 @@
     generate_boxes
 )
 
+from marqo.errors import InternalError
+
 logger = get_logger(__name__)
 
 
@@ -151,13 +153,13 @@ def process(self):
 class PatchifyModel:
     """class to do the patching. this is the base class for model based chunking
     """
-    def __init__(self, device: str = 'cpu', size: Tuple = (224, 224), min_area: float = 60*60, 
+    def __init__(self, device: str = None, size: Tuple = (224, 224), min_area: float = 60*60, 
                 nms: bool = True, replace_small: bool = True, top_k: int = 10, 
                 filter_bb: bool = True, min_area_replace: float = 60*60, **kwargs):
         """_summary_
 
         Args:
-            device (str, optional): the device to run the model on. Defaults to 'cpu'.
+            device (str): the device to run the model on. Required to be set.
             size (Tuple, optional): the final image size to go to the model. Defaults to (224, 224).
             min_area (float, optional): the min area (pixels) that a box must meet to be kept. 
                 areas lower than this are removed. Defaults to 60*60.
@@ -172,6 +174,9 @@ def __init__(self, device: str = 'cpu', size: Tuple = (224, 224), min_area: floa
 
         # this is the resized size 
         self.size = size
+
+        if not device:
+            raise InternalError("`device` is required for loading CLIP models!")
         self.device = device
 
         self.min_area = min_area

diff --git a/src/marqo/s2_inference/processing/pytorch_utils.py b/src/marqo/s2_inference/processing/pytorch_utils.py
@@ -32,6 +32,7 @@ def load_pytorch(model_name: str, device: str):
 
 def load_pretrained_mobilenet():
     """"
+    TODO: Remove, not used anywhere in the repo
     loads marqo trained model
     """
     model = fasterrcnn_mobilenet_v3_large_fpn(device='cpu', num_classes=1204,
@@ -51,6 +52,7 @@ def load_pretrained_mobilenet():
 
 def load_pretrained_mobilenet320():
     """"
+    TODO: Remove, not used anywhere in the repo
     loads marqo trained model
     """
     model = fasterrcnn_mobilenet_v3_large_fpn(device='cpu', num_classes=1204,

diff --git a/src/marqo/s2_inference/reranking/cross_encoders.py b/src/marqo/s2_inference/reranking/cross_encoders.py
@@ -225,7 +225,7 @@ class ReRankerText(ReRanker):
     """
     class for reranking with hf based text models
     """
-    def __init__(self, model_name: str, device: str = 'cpu', max_length: int = 512, num_highlights: int = 1, 
+    def __init__(self, model_name: str, device: str, max_length: int = 512, num_highlights: int = 1, 
                         split_params: Dict = get_default_text_processing_parameters()):
         super().__init__()
 

diff --git a/src/marqo/s2_inference/reranking/model_utils.py b/src/marqo/s2_inference/reranking/model_utils.py
@@ -98,12 +98,12 @@ def _verify_model_inputs(list_of_lists: List[List]) -> bool:
     """
     return all(isinstance(x, (list, tuple)) for x in list_of_lists)
 
-def convert_device_id_to_int(device: str = 'cpu'):
+def convert_device_id_to_int(device: str):
     """maps the string device, 'cpu', 'cuda', 'cuda:#'
     to an int for HF pipelines device representation
 
     Args:
-        device (str, optional): _description_. Defaults to 'cpu'.
+        device (str, optional): No default.
 
     Raises:
         ValueError: _description_
@@ -153,7 +153,7 @@ class HFClassificationOnnx:
         _type_: _description_
     """
 
-    def __init__(self, model_name: str, device: str = 'cpu', max_length: int = 512) -> None:
+    def __init__(self, model_name: str, device: str, max_length: int = 512) -> None:
 
         self.model_name = model_name
         self.save_path = None
@@ -239,7 +239,7 @@ def predict(self, inputs: List[Dict]) -> List[Dict]:
         return self.outputs
 
 
-def load_sbert_cross_encoder_model(model_name: str, device: str = 'cpu', max_length: int = 512) -> Dict:
+def load_sbert_cross_encoder_model(model_name: str, device: str, max_length: int = 512) -> Dict:
     """    
     https://huggingface.co/cross-encoder/ms-marco-TinyBERT-L-2
     scores = model.predict([('Query', 'Paragraph1'), ('Query', 'Paragraph2') , ('Query', 'Paragraph3')])
@@ -273,7 +273,7 @@ def load_sbert_cross_encoder_model(model_name: str, device: str = 'cpu', max_len
     return {'model':model}
 
 
-def load_hf_cross_encoder_model(model_name: str, device: str = 'cpu') -> Dict:
+def load_hf_cross_encoder_model(model_name: str, device: str) -> Dict:
     """    
     
     features = tokenizer(['How many people live in Berlin?', 'How many people live in Berlin?'], ['Berlin has a population of 3,520,031 registered inhabitants in an area of 891.82 square kilometers.', 'New York City is famous for the Metropolitan Museum of Art.'],  padding=True, truncation=True, return_tensors="pt")
@@ -301,12 +301,12 @@ def load_hf_cross_encoder_model(model_name: str, device: str = 'cpu') -> Dict:
 
     return {'model':model, 'tokenizer':tokenizer}
 
-def load_owl_vit(model_name: str, device: str = 'cpu') -> Dict:
+def load_owl_vit(model_name: str, device: str) -> Dict:
     """loader for owl vit for image reranking
 
     Args:
         model_name (str): _description_
-        device (str, optional): _description_. Defaults to 'cpu'.
+        device (str, optional): _description_. No default.
 
     Returns:
         Dict: _description_

diff --git a/src/marqo/s2_inference/s2_inference.py b/src/marqo/s2_inference/s2_inference.py
@@ -2,26 +2,24 @@
 The functions defined here would have endpoints, later on.
 """
 import numpy as np
-from marqo.errors import ModelCacheManagementError
+from marqo.errors import ModelCacheManagementError, InvalidArgError, ConfigurationError, InternalError
 from marqo.s2_inference.errors import (
     VectoriseError, InvalidModelPropertiesError, ModelLoadError,
     UnknownModelError, ModelNotInCacheError, ModelDownloadError)
 from PIL import UnidentifiedImageError
 from marqo.s2_inference.model_registry import load_model_properties
-from marqo.s2_inference.configs import get_default_device, get_default_normalization, get_default_seq_length
+from marqo.s2_inference.configs import get_default_normalization, get_default_seq_length
 from marqo.s2_inference.types import *
 from marqo.s2_inference.logger import get_logger
 import torch
 import datetime
 from marqo.s2_inference import constants
-from marqo.tensor_search.utils import read_env_vars_and_defaults
 from marqo.tensor_search.enums import AvailableModelsKey
 from marqo.tensor_search.configs import EnvVars
 from marqo.tensor_search.models.private_models import ModelAuth
 import threading
 from marqo.tensor_search.utils import read_env_vars_and_defaults, generate_batches
 from marqo.tensor_search.configs import EnvVars
-from marqo.errors import ConfigurationError
 
 logger = get_logger(__name__)
 
@@ -34,7 +32,7 @@
 
 
 def vectorise(model_name: str, content: Union[str, List[str]], model_properties: dict = None,
-              device: str = get_default_device(), normalize_embeddings: bool = get_default_normalization(),
+              device: str = None, normalize_embeddings: bool = get_default_normalization(),
               model_auth: ModelAuth = None, **kwargs) -> List[List[float]]:
     """vectorizes the content by model name
 
@@ -55,6 +53,9 @@ def vectorise(model_name: str, content: Union[str, List[str]], model_properties:
         VectoriseError: if the content can't be vectorised, for some reason.
     """
 
+    if not device:
+        raise InternalError(message=f"vectorise (internal function) cannot be called without setting device!")
+
     validated_model_properties = _validate_model_properties(model_name, model_properties)
     model_cache_key = _create_model_cache_key(model_name, device, validated_model_properties)
 
@@ -311,15 +312,15 @@ def get_model_size(model_name: str, model_properties: dict) -> (int, float):
 
 
 def _load_model(
-        model_name: str, model_properties: dict, device: Optional[str] = None,
+        model_name: str, model_properties: dict, device: str,
         calling_func: str = None, model_auth: Optional[ModelAuth] = None
 ) -> Any:
     """_summary_
 
     Args:
         model_name (str): Actual model_name to be fetched from external library
                         prefer passing it in the form of model_properties['name']
-        device (str, optional): _description_. Defaults to 'cpu'.
+        device (str): Required. Should always be passed when loading model
         model_auth: Authorisation details for downloading a model (if required)
 
     Returns:
@@ -330,7 +331,6 @@ def _load_model(
                            f"`unit_test` or `_update_available_models` for threading safeness.")
 
     print(f"loading for: model_name={model_name} and properties={model_properties}")
-    if device is None: device = get_default_device()
     loader = _get_model_loader(model_properties.get('name', None), model_properties)
 
     max_sequence_length = model_properties.get('tokens', get_default_seq_length())
@@ -402,18 +402,18 @@ def _check_output_type(output: List[List[float]]) -> bool:
     return True
 
 
-def _float_tensor_to_list(output: FloatTensor, device: str = get_default_device()) -> Union[
+def _float_tensor_to_list(output: FloatTensor) -> Union[
     List[List[float]], List[float]]:
     """
-
     Args:
         output (FloatTensor): _description_
 
     Returns:
         List[List[float]]: _description_
     """
-
-    return output.detach().to(device).tolist()
+
+    # Hardcoded to CPU always
+    return output.detach().to("cpu").tolist()
 
 
 def _nd_array_to_list(output: ndarray) -> Union[List[List[float]], List[float]]: