Skip to content

Commit

Permalink
Merge branch 'mainline' into farshid/disable-auto-create-index
Browse files Browse the repository at this point in the history
  • Loading branch information
farshidz committed Jun 26, 2023
2 parents 90fa67c + 41de2fb commit 8e7035b
Show file tree
Hide file tree
Showing 54 changed files with 1,539 additions and 604 deletions.
6 changes: 0 additions & 6 deletions src/marqo/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,6 @@ def __init__(
self,
url: str,
timeout: Optional[int] = None,
indexing_device: Optional[Union[enums.Device, str]] = None,
search_device: Optional[Union[enums.Device, str]] = None,
backend: Optional[Union[enums.SearchDb, str]] = None,
) -> None:
"""
Expand All @@ -20,10 +18,6 @@ def __init__(
self.cluster_is_remote = False
self.url = self.set_url(url)
self.timeout = timeout
default_device = enums.Device.cpu

self.indexing_device = indexing_device if indexing_device is not None else default_device
self.search_device = search_device if search_device is not None else default_device
self.backend = backend if backend is not None else enums.SearchDb.opensearch

def set_url(self, url):
Expand Down
19 changes: 15 additions & 4 deletions src/marqo/s2_inference/clip_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
from marqo.s2_inference.processing.custom_clip_utils import HFTokenizer, download_model
from torchvision.transforms import InterpolationMode
from marqo.s2_inference.configs import ModelCache
from marqo.errors import InternalError
from marqo.tensor_search.telemetry import RequestMetrics, RequestMetricsStore

logger = get_logger(__name__)
Expand Down Expand Up @@ -199,10 +200,13 @@ class CLIP:
conveniance class wrapper to make clip work easily for both text and image encoding
"""

def __init__(self, model_type: str = "ViT-B/32", device: str = 'cpu', embedding_dim: int = None,
def __init__(self, model_type: str = "ViT-B/32", device: str = None, embedding_dim: int = None,
truncate: bool = True, **kwargs) -> None:

self.model_type = model_type

if not device:
raise InternalError("`device` is required for loading CLIP models!")
self.device = device
self.model = None
self.tokenizer = None
Expand Down Expand Up @@ -247,6 +251,7 @@ def load(self) -> None:
path = self.model_properties.get("localpath", None) or self.model_properties.get("url",None)

if path is None and not model_location_presence:
# We must load the model into CPU then transfer it to the desired device, always
# The original method to load the openai clip model
# https://github.com/openai/CLIP/issues/30
self.model, self.preprocess = clip.load(self.model_type, device='cpu', jit=False, download_root=ModelCache.clip_cache_path)
Expand Down Expand Up @@ -281,6 +286,7 @@ def custom_clip_load(self):
self.model_name = self.model_properties.get("name", None)

logger.info(f"The name of the custom clip model is {self.model_name}. We use openai clip load")
# We must load the model into CPU then transfer it to the desired device, always
model, preprocess = clip.load(name=self.model_path, device="cpu", jit= self.jit, download_root=ModelCache.clip_cache_path)
model = model.to(self.device)
return model, preprocess
Expand Down Expand Up @@ -364,7 +370,7 @@ def encode(self, inputs: Union[str, ImageType, List[Union[str, ImageType]]],


class FP16_CLIP(CLIP):
def __init__(self, model_type: str = "fp16/ViT-B/32", device: str = 'cuda', embedding_dim: int = None,
def __init__(self, model_type: str = "fp16/ViT-B/32", device: str = None, embedding_dim: int = None,
truncate: bool = True, **kwargs) -> None:
super().__init__(model_type, device, embedding_dim, truncate, **kwargs)
'''This class loads the provided clip model directly from cuda in float16 version. The inference time is halved
Expand All @@ -390,7 +396,7 @@ def load(self) -> None:


class OPEN_CLIP(CLIP):
def __init__(self, model_type: str = "open_clip/ViT-B-32-quickgelu/laion400m_e32", device: str = 'cpu', embedding_dim: int = None,
def __init__(self, model_type: str = "open_clip/ViT-B-32-quickgelu/laion400m_e32", device: str = None, embedding_dim: int = None,
truncate: bool = True, **kwargs) -> None:
super().__init__(model_type, device, embedding_dim, truncate , **kwargs)
self.model_name = model_type.split("/", 3)[1] if model_type.startswith("open_clip/") else model_type
Expand Down Expand Up @@ -511,9 +517,12 @@ def encode_text(self, sentence: Union[str, List[str]], normalize=True) -> FloatT


class MULTILINGUAL_CLIP(CLIP):
def __init__(self, model_type: str = "multilingual-clip/ViT-L/14", device: str = 'cpu', embedding_dim: int = None,
def __init__(self, model_type: str = "multilingual-clip/ViT-L/14", device: str = None, embedding_dim: int = None,
truncate: bool = True, **kwargs) -> None:

if not device:
raise InternalError("`device` is required for loading MULTILINGUAL CLIP models!")

self.model_name = model_type
self.model_info = get_multilingual_clip_properties()[self.model_name]
self.visual_name = self.model_info["visual_model"]
Expand All @@ -526,6 +535,8 @@ def __init__(self, model_type: str = "multilingual-clip/ViT-L/14", device: str =
def load(self) -> None:
if self.visual_name.startswith("openai/"):
clip_name = self.visual_name.replace("openai/", "")
# We must load the model into CPU then transfer it to the desired device, always
# The reason is this issue: https://github.com/openai/CLIP/issues/30
self.visual_model, self.preprocess = clip.load(name = clip_name, device = "cpu", jit = False, download_root=ModelCache.clip_cache_path)
self.visual_model = self.visual_model.to(self.device)
self.visual_model = self.visual_model.visual
Expand Down
5 changes: 0 additions & 5 deletions src/marqo/s2_inference/configs.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,11 +27,6 @@ class Ignore:

files = ('flax_model.msgpack', 'rust_model.ot', 'tf_model.h5')



def get_default_device():
return 'cpu'

def get_default_normalization():
return True

Expand Down
2 changes: 1 addition & 1 deletion src/marqo/s2_inference/hf_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ class HF_MODEL(Model):

def __init__(self, *args, **kwargs) -> None:
super().__init__(*args, **kwargs)

if self.max_seq_length is None:
self.max_seq_length = 128
self.model_properties = kwargs.get("model_properties", dict())
Expand Down
5 changes: 4 additions & 1 deletion src/marqo/s2_inference/onnx_clip_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
from zipfile import ZipFile
from huggingface_hub.utils import RevisionNotFoundError,RepositoryNotFoundError, EntryNotFoundError, LocalEntryNotFoundError
from marqo.s2_inference.errors import ModelDownloadError
from marqo.errors import InternalError

# Loading shared functions from clip_utils.py. This part should be decoupled from models in the future
from marqo.s2_inference.clip_utils import get_allowed_image_types, format_and_load_CLIP_image, \
Expand Down Expand Up @@ -56,11 +57,13 @@ class CLIP_ONNX(object):
Load a clip model and convert it to onnx version for faster inference
"""

def __init__(self, model_name="onnx32/openai/ViT-L/14", device="cpu", embedding_dim: int = None,
def __init__(self, model_name: str ="onnx32/openai/ViT-L/14", device: str = None, embedding_dim: int = None,
truncate: bool = True,
load=True, **kwargs):
self.model_name = model_name
self.onnx_type, self.source, self.clip_model = self.model_name.split("/", 2)
if not device:
raise InternalError("`device` is required for loading CLIP ONNX models!")
self.device = device
self.truncate = truncate
self.provider = ['CUDAExecutionProvider', "CPUExecutionProvider"] if self.device.startswith("cuda") else [
Expand Down
8 changes: 6 additions & 2 deletions src/marqo/s2_inference/processing/DINO_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from marqo.s2_inference.s2_inference import get_logger
from marqo.s2_inference.types import Dict, List, Union, ImageType, Tuple, FloatTensor, ndarray, Any, Literal
from marqo.s2_inference.errors import ModelLoadError
from marqo.errors import InternalError

logger = get_logger(__name__)

Expand Down Expand Up @@ -82,20 +83,23 @@ def _get_DINO_transform(image_size: Tuple = (224, 224)) -> Any:
])

def DINO_inference(model: Any, transform: Any, img: ImageType = None,
patch_size: int = None, device: str = "cpu") -> FloatTensor:
patch_size: int = None, device: str = None) -> FloatTensor:
"""runs inference for a model, transform and image
Args:
model (Any): ('vit_small', 'vit_base')
transform (Any): _get_DINO_transform
img (ImageType, optional): the image to infer on. Defaults to None.
patch_size (int, optional): the patch size the model architecture uses. Defaults to None.
device (str, optional): device for the model to run on. Defaults to "cpu".
device (str): device for the model to run on. Required to be set
Returns:
FloatTensor: returns N x w x h tensor
"""

if not device:
raise InternalError("`device` is required for DINO inference!")

img = transform(img)

# make the image divisible by the patch size
Expand Down
9 changes: 7 additions & 2 deletions src/marqo/s2_inference/processing/image.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,8 @@
generate_boxes
)

from marqo.errors import InternalError

logger = get_logger(__name__)


Expand Down Expand Up @@ -151,13 +153,13 @@ def process(self):
class PatchifyModel:
"""class to do the patching. this is the base class for model based chunking
"""
def __init__(self, device: str = 'cpu', size: Tuple = (224, 224), min_area: float = 60*60,
def __init__(self, device: str = None, size: Tuple = (224, 224), min_area: float = 60*60,
nms: bool = True, replace_small: bool = True, top_k: int = 10,
filter_bb: bool = True, min_area_replace: float = 60*60, **kwargs):
"""_summary_
Args:
device (str, optional): the device to run the model on. Defaults to 'cpu'.
device (str): the device to run the model on. Required to be set.
size (Tuple, optional): the final image size to go to the model. Defaults to (224, 224).
min_area (float, optional): the min area (pixels) that a box must meet to be kept.
areas lower than this are removed. Defaults to 60*60.
Expand All @@ -172,6 +174,9 @@ def __init__(self, device: str = 'cpu', size: Tuple = (224, 224), min_area: floa

# this is the resized size
self.size = size

if not device:
raise InternalError("`device` is required for loading CLIP models!")
self.device = device

self.min_area = min_area
Expand Down
2 changes: 2 additions & 0 deletions src/marqo/s2_inference/processing/pytorch_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ def load_pytorch(model_name: str, device: str):

def load_pretrained_mobilenet():
""""
TODO: Remove, not used anywhere in the repo
loads marqo trained model
"""
model = fasterrcnn_mobilenet_v3_large_fpn(device='cpu', num_classes=1204,
Expand All @@ -51,6 +52,7 @@ def load_pretrained_mobilenet():

def load_pretrained_mobilenet320():
""""
TODO: Remove, not used anywhere in the repo
loads marqo trained model
"""
model = fasterrcnn_mobilenet_v3_large_fpn(device='cpu', num_classes=1204,
Expand Down
2 changes: 1 addition & 1 deletion src/marqo/s2_inference/reranking/cross_encoders.py
Original file line number Diff line number Diff line change
Expand Up @@ -225,7 +225,7 @@ class ReRankerText(ReRanker):
"""
class for reranking with hf based text models
"""
def __init__(self, model_name: str, device: str = 'cpu', max_length: int = 512, num_highlights: int = 1,
def __init__(self, model_name: str, device: str, max_length: int = 512, num_highlights: int = 1,
split_params: Dict = get_default_text_processing_parameters()):
super().__init__()

Expand Down
14 changes: 7 additions & 7 deletions src/marqo/s2_inference/reranking/model_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,12 +98,12 @@ def _verify_model_inputs(list_of_lists: List[List]) -> bool:
"""
return all(isinstance(x, (list, tuple)) for x in list_of_lists)

def convert_device_id_to_int(device: str = 'cpu'):
def convert_device_id_to_int(device: str):
"""maps the string device, 'cpu', 'cuda', 'cuda:#'
to an int for HF pipelines device representation
Args:
device (str, optional): _description_. Defaults to 'cpu'.
device (str, optional): No default.
Raises:
ValueError: _description_
Expand Down Expand Up @@ -153,7 +153,7 @@ class HFClassificationOnnx:
_type_: _description_
"""

def __init__(self, model_name: str, device: str = 'cpu', max_length: int = 512) -> None:
def __init__(self, model_name: str, device: str, max_length: int = 512) -> None:

self.model_name = model_name
self.save_path = None
Expand Down Expand Up @@ -239,7 +239,7 @@ def predict(self, inputs: List[Dict]) -> List[Dict]:
return self.outputs


def load_sbert_cross_encoder_model(model_name: str, device: str = 'cpu', max_length: int = 512) -> Dict:
def load_sbert_cross_encoder_model(model_name: str, device: str, max_length: int = 512) -> Dict:
"""
https://huggingface.co/cross-encoder/ms-marco-TinyBERT-L-2
scores = model.predict([('Query', 'Paragraph1'), ('Query', 'Paragraph2') , ('Query', 'Paragraph3')])
Expand Down Expand Up @@ -273,7 +273,7 @@ def load_sbert_cross_encoder_model(model_name: str, device: str = 'cpu', max_len
return {'model':model}


def load_hf_cross_encoder_model(model_name: str, device: str = 'cpu') -> Dict:
def load_hf_cross_encoder_model(model_name: str, device: str) -> Dict:
"""
features = tokenizer(['How many people live in Berlin?', 'How many people live in Berlin?'], ['Berlin has a population of 3,520,031 registered inhabitants in an area of 891.82 square kilometers.', 'New York City is famous for the Metropolitan Museum of Art.'], padding=True, truncation=True, return_tensors="pt")
Expand Down Expand Up @@ -301,12 +301,12 @@ def load_hf_cross_encoder_model(model_name: str, device: str = 'cpu') -> Dict:

return {'model':model, 'tokenizer':tokenizer}

def load_owl_vit(model_name: str, device: str = 'cpu') -> Dict:
def load_owl_vit(model_name: str, device: str) -> Dict:
"""loader for owl vit for image reranking
Args:
model_name (str): _description_
device (str, optional): _description_. Defaults to 'cpu'.
device (str, optional): _description_. No default.
Returns:
Dict: _description_
Expand Down
24 changes: 12 additions & 12 deletions src/marqo/s2_inference/s2_inference.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,26 +2,24 @@
The functions defined here would have endpoints, later on.
"""
import numpy as np
from marqo.errors import ModelCacheManagementError
from marqo.errors import ModelCacheManagementError, InvalidArgError, ConfigurationError, InternalError
from marqo.s2_inference.errors import (
VectoriseError, InvalidModelPropertiesError, ModelLoadError,
UnknownModelError, ModelNotInCacheError, ModelDownloadError)
from PIL import UnidentifiedImageError
from marqo.s2_inference.model_registry import load_model_properties
from marqo.s2_inference.configs import get_default_device, get_default_normalization, get_default_seq_length
from marqo.s2_inference.configs import get_default_normalization, get_default_seq_length
from marqo.s2_inference.types import *
from marqo.s2_inference.logger import get_logger
import torch
import datetime
from marqo.s2_inference import constants
from marqo.tensor_search.utils import read_env_vars_and_defaults
from marqo.tensor_search.enums import AvailableModelsKey
from marqo.tensor_search.configs import EnvVars
from marqo.tensor_search.models.private_models import ModelAuth
import threading
from marqo.tensor_search.utils import read_env_vars_and_defaults, generate_batches
from marqo.tensor_search.configs import EnvVars
from marqo.errors import ConfigurationError

logger = get_logger(__name__)

Expand All @@ -34,7 +32,7 @@


def vectorise(model_name: str, content: Union[str, List[str]], model_properties: dict = None,
device: str = get_default_device(), normalize_embeddings: bool = get_default_normalization(),
device: str = None, normalize_embeddings: bool = get_default_normalization(),
model_auth: ModelAuth = None, **kwargs) -> List[List[float]]:
"""vectorizes the content by model name
Expand All @@ -55,6 +53,9 @@ def vectorise(model_name: str, content: Union[str, List[str]], model_properties:
VectoriseError: if the content can't be vectorised, for some reason.
"""

if not device:
raise InternalError(message=f"vectorise (internal function) cannot be called without setting device!")

validated_model_properties = _validate_model_properties(model_name, model_properties)
model_cache_key = _create_model_cache_key(model_name, device, validated_model_properties)

Expand Down Expand Up @@ -311,15 +312,15 @@ def get_model_size(model_name: str, model_properties: dict) -> (int, float):


def _load_model(
model_name: str, model_properties: dict, device: Optional[str] = None,
model_name: str, model_properties: dict, device: str,
calling_func: str = None, model_auth: Optional[ModelAuth] = None
) -> Any:
"""_summary_
Args:
model_name (str): Actual model_name to be fetched from external library
prefer passing it in the form of model_properties['name']
device (str, optional): _description_. Defaults to 'cpu'.
device (str): Required. Should always be passed when loading model
model_auth: Authorisation details for downloading a model (if required)
Returns:
Expand All @@ -330,7 +331,6 @@ def _load_model(
f"`unit_test` or `_update_available_models` for threading safeness.")

print(f"loading for: model_name={model_name} and properties={model_properties}")
if device is None: device = get_default_device()
loader = _get_model_loader(model_properties.get('name', None), model_properties)

max_sequence_length = model_properties.get('tokens', get_default_seq_length())
Expand Down Expand Up @@ -402,18 +402,18 @@ def _check_output_type(output: List[List[float]]) -> bool:
return True


def _float_tensor_to_list(output: FloatTensor, device: str = get_default_device()) -> Union[
def _float_tensor_to_list(output: FloatTensor) -> Union[
List[List[float]], List[float]]:
"""
Args:
output (FloatTensor): _description_
Returns:
List[List[float]]: _description_
"""

return output.detach().to(device).tolist()

# Hardcoded to CPU always
return output.detach().to("cpu").tolist()


def _nd_array_to_list(output: ndarray) -> Union[List[List[float]], List[float]]:
Expand Down
Loading

0 comments on commit 8e7035b

Please sign in to comment.