openvinotoolkit · samet-akcay · May 17, 2024 · May 10, 2024 · May 10, 2024 · May 15, 2024
@@ -39,7 +39,7 @@ core = [
     "torchmetrics>=1.3.2",
     "open-clip-torch>=2.23.0",
 ]
-openvino = ["openvino-dev>=2023.1", "nncf>=2.6.0", "onnx>=1.16.0"]
+openvino = ["openvino>=2024.0", "nncf>=2.10.0", "onnx>=1.16.0"]
 loggers = [
     "comet-ml>=3.31.7",
     "gradio>=4",

@@ -3,11 +3,12 @@
 # Copyright (C) 2022-2024 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 
-from .export import ExportType, export_to_onnx, export_to_openvino, export_to_torch
+from .export import CompressionType, ExportType, export_to_onnx, export_to_openvino, export_to_torch
 from .inferencers import Inferencer, OpenVINOInferencer, TorchInferencer
 
 __all__ = [
     "ExportType",
+    "CompressionType",
     "Inferencer",
     "OpenVINOInferencer",
     "TorchInferencer",

@@ -16,6 +16,7 @@
 from torchvision.transforms.v2 import CenterCrop, Compose, Resize, Transform
 
 from anomalib import TaskType
+from anomalib.data import AnomalibDataModule
 from anomalib.data.transforms import ExportableCenterCrop
 from anomalib.models.components import AnomalyModule
 from anomalib.utils.exceptions import try_import
@@ -44,6 +45,29 @@ class ExportType(str, Enum):
     TORCH = "torch"
 
 
+class CompressionType(str, Enum):
+    """Model compression type when exporting to OpenVINO.
+
+    Examples:
+        >>> from anomalib.deploy import CompressionType
+        >>> CompressionType.INT8_PTQ
+        'int8_ptq'
+    """
+
+    FP16 = "fp16"
+    """
+    Weight compression (FP16)
+    """
+    INT8 = "int8"
+    """
+    Weight compression (INT8)
+    """
+    INT8_PTQ = "int8_ptq"
+    """
+    Full integer quantization (INT8)
+    """
+
+
 class InferenceModel(nn.Module):
     """Inference model for export.
 
@@ -235,6 +259,8 @@ def export_to_openvino(
     export_root: Path | str,
     input_size: tuple[int, int] | None = None,
     transform: Transform | None = None,
+    compression_type: CompressionType | None = CompressionType.FP16,
+    datamodule: AnomalibDataModule | None = None,
     ov_args: dict[str, Any] | None = None,
     task: TaskType | None = None,
 ) -> Path:
@@ -248,7 +274,11 @@ def export_to_openvino(
         transform (Transform, optional): Input transforms used for the model. If not provided, the transform is taken
             from the model.
             Defaults to ``None``.
-        ov_args: Model optimizer arguments for OpenVINO model conversion.
+        compression_type (CompressionType, optional): Compression type for better inference performance.
+            Defaults to ``CompressionType.FP16``.
+        datamodule (AnomalibDataModule | None, optional): Lightning datamodule.
+            Must be provided if CompressionType.INT8_PTQ is selected. Defaults to None.
+        ov_args (dict | None): Model optimizer arguments for OpenVINO model conversion.
             Defaults to ``None``.
         task (TaskType | None): Task type.
             Defaults to ``None``.
@@ -299,15 +329,28 @@ def export_to_openvino(
         logger.exception("Could not find OpenVINO. Please check OpenVINO installation.")
         raise ModuleNotFoundError
 
+    import nncf
     import openvino as ov
 
     model_path = export_to_onnx(model, export_root, input_size, transform, task, ExportType.OPENVINO)
     ov_model_path = model_path.with_suffix(".xml")
     ov_args = {} if ov_args is None else ov_args
-    # fp16 compression is enabled by default
-    compress_to_fp16 = ov_args.get("compress_to_fp16", True)
 
     model = ov.convert_model(model_path, **ov_args)
+    if compression_type == CompressionType.INT8:
+        model = nncf.compress_weights(model)
+    elif compression_type == CompressionType.INT8_PTQ:
+        assert datamodule is not None, "datamodule must be provided for OpenVINO INT8_PTQ compression"
+        dataloader = datamodule.val_dataloader()
+        if len(dataloader.dataset) < 300:
+            logger.warning(
+                f">300 images recommended for INT8 quantization, found only {len(dataloader.dataset)} images",
+            )
+        calibration_dataset = nncf.Dataset(dataloader, lambda x: x["image"])
+        model = nncf.quantize(model, calibration_dataset)
+
+    # fp16 compression is enabled by default
+    compress_to_fp16 = compression_type == CompressionType.FP16
     ov.save_model(model, ov_model_path, compress_to_fp16=compress_to_fp16)
 
     return ov_model_path

@@ -23,10 +23,10 @@
 logger = logging.getLogger("anomalib")
 
 if find_spec("openvino") is not None:
-    import openvino.runtime as ov
+    import openvino as ov
 
     if TYPE_CHECKING:
-        from openvino.runtime import CompiledModel
+        from openvino import CompiledModel
 else:
     logger.warning("OpenVINO is not installed. Please install OpenVINO to use OpenVINOInferencer.")
 

@@ -26,7 +26,7 @@
 from anomalib.callbacks.timer import TimerCallback
 from anomalib.callbacks.visualizer import _VisualizationCallback
 from anomalib.data import AnomalibDataModule, AnomalibDataset, PredictDataset
-from anomalib.deploy.export import ExportType, export_to_onnx, export_to_openvino, export_to_torch
+from anomalib.deploy.export import CompressionType, ExportType, export_to_onnx, export_to_openvino, export_to_torch
 from anomalib.models import AnomalyModule
 from anomalib.utils.normalization import NormalizationMethod
 from anomalib.utils.path import create_versioned_dir
@@ -869,6 +869,8 @@ def export(
         export_root: str | Path | None = None,
         input_size: tuple[int, int] | None = None,
         transform: Transform | None = None,
+        compression_type: CompressionType | None = CompressionType.FP16,
+        datamodule: AnomalibDataModule | None = None,
         ov_args: dict[str, Any] | None = None,
         ckpt_path: str | Path | None = None,
     ) -> Path | None:
@@ -882,7 +884,11 @@ def export(
             input_size (tuple[int, int] | None, optional): A statis input shape for the model, which is exported to ONNX
                 and OpenVINO format. Defaults to None.
             transform (Transform | None, optional): Input transform to include in the exported model. If not provided,
-                the engine will try to use the transform from the datamodule or dataset. Defaults to None.
+                the transform is taken from the model. Defaults to None.
+            compression_type (CompressionType | None, optional): Compression type for OpenVINO exporting only.
+                Defaults to ``CompressionType.FP16``.
+            datamodule (AnomalibDataModule | None, optional): Lightning datamodule.
+                Must be provided if CompressionType.INT8_PTQ is selected. Defaults to None.
             ov_args (dict[str, Any] | None, optional): This is optional and used only for OpenVINO's model optimizer.
                 Defaults to None.
             ckpt_path (str | Path | None): Checkpoint path. If provided, the model will be loaded from this path.
@@ -944,6 +950,8 @@ def export(
                 input_size=input_size,
                 transform=transform,
                 task=self.task,
+                compression_type=compression_type,
+                datamodule=datamodule,
                 ov_args=ov_args,
             )
         else: