diff --git a/CHANGELOG.md b/CHANGELOG.md index 29cff8fc46..5c3c2ebb0c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/). - 🚀 Update OpenVINO and ONNX export to support fixed input shape by @adrianboguszewski in https://github.com/openvinotoolkit/anomalib/pull/2006 - Add data_path argument to predict entrypoint and add properties for retrieving model path by @djdameln in https://github.com/openvinotoolkit/anomalib/pull/2018 +- 🚀 Add compression and quantization for OpenVINO export by @adrianboguszewski in https://github.com/openvinotoolkit/anomalib/pull/2052 ### Changed diff --git a/README.md b/README.md index 34a8f26c39..8c336e07b4 100644 --- a/README.md +++ b/README.md @@ -38,7 +38,7 @@ Anomalib is a deep learning library that aims to collect state-of-the-art anomal - Simple and modular API and CLI for training, inference, benchmarking, and hyperparameter optimization. - The largest public collection of ready-to-use deep learning anomaly detection algorithms and benchmark datasets. - [**Lightning**](https://www.lightning.ai/) based model implementations to reduce boilerplate code and limit the implementation efforts to the bare essentials. -- All models can be exported to [**OpenVINO**](https://www.intel.com/content/www/us/en/developer/tools/openvino-toolkit/overview.html) Intermediate Representation (IR) for accelerated inference on intel hardware. +- The majority of models can be exported to [**OpenVINO**](https://www.intel.com/content/www/us/en/developer/tools/openvino-toolkit/overview.html) Intermediate Representation (IR) for accelerated inference on Intel hardware. - A set of [inference tools](tools) for quick and easy deployment of the standard or custom anomaly detection models. # 📦 Installation diff --git a/docs/source/index.md b/docs/source/index.md index b53269e4d0..eea06f1275 100644 --- a/docs/source/index.md +++ b/docs/source/index.md @@ -8,7 +8,7 @@ Anomalib is a deep learning library that aims to collect state-of-the-art anomal - Simple and modular API and CLI for training, inference, benchmarking, and hyperparameter optimization. - The largest public collection of ready-to-use deep learning anomaly detection algorithms and benchmark datasets. - Lightning based model implementations to reduce boilerplate code and limit the implementation efforts to the bare essentials. -- All models can be exported to OpenVINO Intermediate Representation (IR) for accelerated inference on intel hardware. +- The majority of models can be exported to OpenVINO Intermediate Representation (IR) for accelerated inference on Intel hardware. - A set of inference tools for quick and easy deployment of the standard or custom anomaly detection models. ::: diff --git a/pyproject.toml b/pyproject.toml index a8a57bfc1e..69b767601a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -39,7 +39,7 @@ core = [ "torchmetrics>=1.3.2", "open-clip-torch>=2.23.0", ] -openvino = ["openvino-dev>=2023.1", "nncf>=2.6.0", "onnx>=1.16.0"] +openvino = ["openvino>=2024.0", "nncf>=2.10.0", "onnx>=1.16.0"] loggers = [ "comet-ml>=3.31.7", "gradio>=4", diff --git a/src/anomalib/cli/cli.py b/src/anomalib/cli/cli.py index d13ed039ec..b619b8317c 100644 --- a/src/anomalib/cli/cli.py +++ b/src/anomalib/cli/cli.py @@ -229,10 +229,15 @@ def add_export_arguments(self, parser: ArgumentParser) -> None: fail_untyped=False, required=True, ) + parser.add_argument( + "--data", + type=AnomalibDataModule, + required=False, + ) added = parser.add_method_arguments( Engine, "export", - skip={"ov_args", "model"}, + skip={"ov_args", "model", "datamodule"}, ) self.subcommand_method_arguments["export"] = added add_openvino_export_arguments(parser) diff --git a/src/anomalib/deploy/__init__.py b/src/anomalib/deploy/__init__.py index 45581bd8dd..e2bec10b1f 100644 --- a/src/anomalib/deploy/__init__.py +++ b/src/anomalib/deploy/__init__.py @@ -3,7 +3,7 @@ # Copyright (C) 2022-2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 -from .export import ExportType +from .export import CompressionType, ExportType from .inferencers import Inferencer, OpenVINOInferencer, TorchInferencer -__all__ = ["Inferencer", "OpenVINOInferencer", "TorchInferencer", "ExportType"] +__all__ = ["Inferencer", "OpenVINOInferencer", "TorchInferencer", "ExportType", "CompressionType"] diff --git a/src/anomalib/deploy/export.py b/src/anomalib/deploy/export.py index 2430413fbc..87066c9ef9 100644 --- a/src/anomalib/deploy/export.py +++ b/src/anomalib/deploy/export.py @@ -33,6 +33,32 @@ class ExportType(str, Enum): TORCH = "torch" +class CompressionType(str, Enum): + """Model compression type when exporting to OpenVINO. + + Examples: + >>> from anomalib.deploy import CompressionType + >>> CompressionType.INT8_PTQ + 'int8_ptq' + """ + + FP16 = "fp16" + """ + Weight compression (FP16) + All weights are converted to FP16. + """ + INT8 = "int8" + """ + Weight compression (INT8) + All weights are quantized to INT8, but are dequantized to floating point before inference. + """ + INT8_PTQ = "int8_ptq" + """ + Full integer post-training quantization (INT8) + All weights and operations are quantized to INT8. Inference is done in INT8 precision. + """ + + class InferenceModel(nn.Module): """Inference model for export. diff --git a/src/anomalib/deploy/inferencers/openvino_inferencer.py b/src/anomalib/deploy/inferencers/openvino_inferencer.py index 3206b39e30..91970a0adf 100644 --- a/src/anomalib/deploy/inferencers/openvino_inferencer.py +++ b/src/anomalib/deploy/inferencers/openvino_inferencer.py @@ -23,10 +23,10 @@ logger = logging.getLogger("anomalib") if find_spec("openvino") is not None: - import openvino.runtime as ov + import openvino as ov if TYPE_CHECKING: - from openvino.runtime import CompiledModel + from openvino import CompiledModel else: logger.warning("OpenVINO is not installed. Please install OpenVINO to use OpenVINOInferencer.") diff --git a/src/anomalib/engine/engine.py b/src/anomalib/engine/engine.py index 6dbaa15a10..c41ecdf531 100644 --- a/src/anomalib/engine/engine.py +++ b/src/anomalib/engine/engine.py @@ -26,7 +26,7 @@ from anomalib.callbacks.timer import TimerCallback from anomalib.callbacks.visualizer import _VisualizationCallback from anomalib.data import AnomalibDataModule, AnomalibDataset, PredictDataset -from anomalib.deploy import ExportType +from anomalib.deploy import CompressionType, ExportType from anomalib.models import AnomalyModule from anomalib.utils.normalization import NormalizationMethod from anomalib.utils.path import create_versioned_dir @@ -869,6 +869,8 @@ def export( export_root: str | Path | None = None, input_size: tuple[int, int] | None = None, transform: Transform | None = None, + compression_type: CompressionType | None = None, + datamodule: AnomalibDataModule | None = None, ov_args: dict[str, Any] | None = None, ckpt_path: str | Path | None = None, ) -> Path | None: @@ -884,6 +886,11 @@ def export( transform (Transform | None, optional): Input transform to include in the exported model. If not provided, the engine will try to use the default transform from the model. Defaults to ``None``. + compression_type (CompressionType | None, optional): Compression type for OpenVINO exporting only. + Defaults to ``None``. + datamodule (AnomalibDataModule | None, optional): Lightning datamodule. + Must be provided if CompressionType.INT8_PTQ is selected. + Defaults to ``None``. ov_args (dict[str, Any] | None, optional): This is optional and used only for OpenVINO's model optimizer. Defaults to None. ckpt_path (str | Path | None): Checkpoint path. If provided, the model will be loaded from this path. @@ -910,7 +917,7 @@ def export( anomalib export --model Padim --export_mode openvino --ckpt_path \ --input_size "[256,256]" ``` - 4. You can also overrride OpenVINO model optimizer by adding the ``--ov_args.`` arguments. + 4. You can also override OpenVINO model optimizer by adding the ``--ov_args.`` arguments. ```python anomalib export --model Padim --export_mode openvino --ckpt_path \ --input_size "[256,256]" --ov_args.compress_to_fp16 False @@ -945,6 +952,8 @@ def export( input_size=input_size, transform=transform, task=self.task, + compression_type=compression_type, + datamodule=datamodule, ov_args=ov_args, ) else: diff --git a/src/anomalib/models/components/base/export_mixin.py b/src/anomalib/models/components/base/export_mixin.py index b0ac4449ef..9b0c2d41e2 100644 --- a/src/anomalib/models/components/base/export_mixin.py +++ b/src/anomalib/models/components/base/export_mixin.py @@ -17,7 +17,8 @@ from torchvision.transforms.v2 import Transform from anomalib import TaskType -from anomalib.deploy.export import ExportType, InferenceModel +from anomalib.data import AnomalibDataModule +from anomalib.deploy.export import CompressionType, ExportType, InferenceModel from anomalib.utils.exceptions import try_import if TYPE_CHECKING: @@ -156,6 +157,8 @@ def to_openvino( export_root: Path | str, input_size: tuple[int, int] | None = None, transform: Transform | None = None, + compression_type: CompressionType | None = None, + datamodule: AnomalibDataModule | None = None, ov_args: dict[str, Any] | None = None, task: TaskType | None = None, ) -> Path: @@ -168,7 +171,12 @@ def to_openvino( transform (Transform, optional): Input transforms used for the model. If not provided, the transform is taken from the model. Defaults to ``None``. - ov_args: Model optimizer arguments for OpenVINO model conversion. + compression_type (CompressionType, optional): Compression type for better inference performance. + Defaults to ``None``. + datamodule (AnomalibDataModule | None, optional): Lightning datamodule. + Must be provided if CompressionType.INT8_PTQ is selected. + Defaults to ``None``. + ov_args (dict | None): Model optimizer arguments for OpenVINO model conversion. Defaults to ``None``. task (TaskType | None): Task type. Defaults to ``None``. @@ -213,7 +221,11 @@ def to_openvino( if not try_import("openvino"): logger.exception("Could not find OpenVINO. Please check OpenVINO installation.") raise ModuleNotFoundError + if not try_import("nncf"): + logger.exception("Could not find NNCF. Please check NNCF installation.") + raise ModuleNotFoundError + import nncf import openvino as ov with TemporaryDirectory() as onnx_directory: @@ -221,10 +233,25 @@ def to_openvino( export_root = _create_export_root(export_root, ExportType.OPENVINO) ov_model_path = export_root / "model.xml" ov_args = {} if ov_args is None else ov_args - # fp16 compression is enabled by default - compress_to_fp16 = ov_args.get("compress_to_fp16", True) model = ov.convert_model(model_path, **ov_args) + if compression_type == CompressionType.INT8: + model = nncf.compress_weights(model) + elif compression_type == CompressionType.INT8_PTQ: + if datamodule is None: + msg = "Datamodule must be provided for OpenVINO INT8_PTQ compression" + raise ValueError(msg) + + dataloader = datamodule.val_dataloader() + if len(dataloader.dataset) < 300: + logger.warning( + f">300 images recommended for INT8 quantization, found only {len(dataloader.dataset)} images", + ) + calibration_dataset = nncf.Dataset(dataloader, lambda x: x["image"]) + model = nncf.quantize(model, calibration_dataset) + + # fp16 compression is enabled by default + compress_to_fp16 = compression_type == CompressionType.FP16 ov.save_model(model, ov_model_path, compress_to_fp16=compress_to_fp16) _write_metadata_to_json(self._get_metadata(task), export_root)