From e750fd4c72bec27bbb98f7784bb2b50a25da7a5d Mon Sep 17 00:00:00 2001 From: "Kim, Sungchul" Date: Fri, 7 Jul 2023 16:06:20 +0900 Subject: [PATCH 01/12] Initial commit --- src/otx/algorithms/visual_prompting/tasks/openvino.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/otx/algorithms/visual_prompting/tasks/openvino.py b/src/otx/algorithms/visual_prompting/tasks/openvino.py index e2d24c9d14a..363f426034c 100644 --- a/src/otx/algorithms/visual_prompting/tasks/openvino.py +++ b/src/otx/algorithms/visual_prompting/tasks/openvino.py @@ -335,4 +335,5 @@ def optimize( optimization_parameters: Optional[OptimizationParameters] = None, ): """Optimize function of OpenVINOVisualPromptingTask.""" + logger.info("Start PTQ optimization") raise NotImplementedError From bddcf67d14b4e0ffb469b4d1203e4736ba0b0ab8 Mon Sep 17 00:00:00 2001 From: "Kim, Sungchul" Date: Wed, 12 Jul 2023 14:32:23 +0900 Subject: [PATCH 02/12] Update block --- .../models/visual_prompters/segment_anything.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/otx/algorithms/visual_prompting/adapters/pytorch_lightning/models/visual_prompters/segment_anything.py b/src/otx/algorithms/visual_prompting/adapters/pytorch_lightning/models/visual_prompters/segment_anything.py index 3dbe568091f..2460df0aec3 100644 --- a/src/otx/algorithms/visual_prompting/adapters/pytorch_lightning/models/visual_prompters/segment_anything.py +++ b/src/otx/algorithms/visual_prompting/adapters/pytorch_lightning/models/visual_prompters/segment_anything.py @@ -174,9 +174,9 @@ def replace_state_dict_keys(state_dict, revise_keys): state_dict = replace_state_dict_keys(state_dict, revise_keys) self.load_state_dict(state_dict) - ################################################# - # forward for inference (export/deploy) # - ################################################# + ########################################################## + # forward for inference (export/deploy/optimize) # + ########################################################## @torch.no_grad() def forward( self, From c59398c81432d64ecad3c29d5c796680672bd0ca Mon Sep 17 00:00:00 2001 From: "Kim, Sungchul" Date: Wed, 12 Jul 2023 16:06:57 +0900 Subject: [PATCH 03/12] (WIP) otx optimize --- .../config/visual_prompting_config.py | 4 +- .../datasets/pipelines/sam_transforms.py | 9 +- .../configs/base/configuration.py | 21 ++- .../configs/sam_vit_b/configuration.yaml | 60 +------- .../visual_prompting/tasks/openvino.py | 143 +++++++++++++++++- src/otx/cli/tools/optimize.py | 8 +- 6 files changed, 171 insertions(+), 74 deletions(-) diff --git a/src/otx/algorithms/visual_prompting/adapters/pytorch_lightning/config/visual_prompting_config.py b/src/otx/algorithms/visual_prompting/adapters/pytorch_lightning/config/visual_prompting_config.py index ddd4d4dc070..e3382f25526 100644 --- a/src/otx/algorithms/visual_prompting/adapters/pytorch_lightning/config/visual_prompting_config.py +++ b/src/otx/algorithms/visual_prompting/adapters/pytorch_lightning/config/visual_prompting_config.py @@ -97,8 +97,8 @@ def update_visual_prompting_config( if groups: for group in groups: if group in ["learning_parameters", "nncf_optimization", "pot_parameters", "postprocessing"]: - if group in ["nncf_optimization", "pot_parameters"]: - # TODO (sungchul): Consider pot_parameters, nncf_optimization, and postprocessing + if group in ["nncf_optimization"]: + # TODO (sungchul): Consider nncf_optimization logger.warning(f"{group} will be implemented.") continue update_visual_prompting_config(visual_prompting_config, getattr(otx_config, group)) diff --git a/src/otx/algorithms/visual_prompting/adapters/pytorch_lightning/datasets/pipelines/sam_transforms.py b/src/otx/algorithms/visual_prompting/adapters/pytorch_lightning/datasets/pipelines/sam_transforms.py index 74e80f1b383..c3cae7d78a7 100644 --- a/src/otx/algorithms/visual_prompting/adapters/pytorch_lightning/datasets/pipelines/sam_transforms.py +++ b/src/otx/algorithms/visual_prompting/adapters/pytorch_lightning/datasets/pipelines/sam_transforms.py @@ -36,7 +36,8 @@ def __call__(self, item: Dict[str, Union[List, Tensor]]) -> Dict[str, Union[List Dict[str, Union[List, Tensor]]: Dictionary of batch data. """ item["images"] = torch.as_tensor( - self.apply_image(item["images"]).transpose((2, 0, 1)), dtype=torch.get_default_dtype() + self.apply_image(item["images"], self.target_length).transpose((2, 0, 1)), + dtype=torch.get_default_dtype() ) item["gt_masks"] = [torch.as_tensor(gt_mask) for gt_mask in item["gt_masks"]] item["bboxes"] = self.apply_boxes(item["bboxes"], item["original_size"]) @@ -44,16 +45,18 @@ def __call__(self, item: Dict[str, Union[List, Tensor]]) -> Dict[str, Union[List item["points"] = self.apply_coords(item["points"], item["original_size"]) return item - def apply_image(self, image: np.ndarray) -> np.ndarray: + @staticmethod + def apply_image(image: np.ndarray, target_length: int) -> np.ndarray: """Expects a numpy array with shape HxWxC in uint8 format. Args: image (np.ndarray): Image array. + target_length (int): The length of the longest side of the image. Returns: np.ndarray: Resized image. """ - target_size = self.get_preprocess_shape(image.shape[0], image.shape[1], self.target_length) + target_size = ResizeLongestSide.get_preprocess_shape(image.shape[0], image.shape[1], target_length) return np.array(resize(to_pil_image(image), target_size)) def apply_coords(self, coords: np.ndarray, original_size: Union[List[Any], Tensor]) -> np.ndarray: diff --git a/src/otx/algorithms/visual_prompting/configs/base/configuration.py b/src/otx/algorithms/visual_prompting/configs/base/configuration.py index eeb174c4875..f89e7ac9896 100644 --- a/src/otx/algorithms/visual_prompting/configs/base/configuration.py +++ b/src/otx/algorithms/visual_prompting/configs/base/configuration.py @@ -15,15 +15,19 @@ # and limitations under the License. +from sys import maxsize + from attr import attrs -from otx.algorithms.common.configs import BaseConfig +from otx.algorithms.common.configs import BaseConfig, POTQuantizationPreset from otx.api.configuration.elements import ( ParameterGroup, add_parameter_group, + boolean_attribute, configurable_boolean, configurable_float, configurable_integer, + selectable, string_attribute, ) from otx.api.configuration.model_lifecycle import ModelLifecycle @@ -95,5 +99,20 @@ class __Postprocessing(ParameterGroup): affects_outcome_of=ModelLifecycle.INFERENCE, ) + @attrs + class __POTParameter(BaseConfig.BasePOTParameter): + header = string_attribute("POT Parameters") + description = header + visible_in_ui = boolean_attribute(False) + + preset = selectable( + default_value=POTQuantizationPreset.MIXED, + header="Preset", + description="Quantization preset that defines quantization scheme", + editable=True, + visible_in_ui=True, + ) + learning_parameters = add_parameter_group(__LearningParameters) postprocessing = add_parameter_group(__Postprocessing) + pot_parameters = add_parameter_group(__POTParameter) diff --git a/src/otx/algorithms/visual_prompting/configs/sam_vit_b/configuration.yaml b/src/otx/algorithms/visual_prompting/configs/sam_vit_b/configuration.yaml index e20429f60b2..8a867588912 100644 --- a/src/otx/algorithms/visual_prompting/configs/sam_vit_b/configuration.yaml +++ b/src/otx/algorithms/visual_prompting/configs/sam_vit_b/configuration.yaml @@ -85,62 +85,6 @@ learning_parameters: visible_in_ui: true warning: null auto_hpo_state: NOT_POSSIBLE -nncf_optimization: - description: Optimization by NNCF - enable_pruning: - affects_outcome_of: NONE - auto_hpo_state: not_possible - auto_hpo_value: null - default_value: false - description: Enable filter pruning algorithm - editable: true - header: Enable filter pruning algorithm - type: BOOLEAN - ui_rules: - action: DISABLE_EDITING - operator: AND - rules: [] - type: UI_RULES - value: false - visible_in_ui: true - warning: null - enable_quantization: - affects_outcome_of: NONE - auto_hpo_state: not_possible - auto_hpo_value: null - default_value: true - description: Enable quantization algorithm - editable: true - header: Enable quantization algorithm - type: BOOLEAN - ui_rules: - action: DISABLE_EDITING - operator: AND - rules: [] - type: UI_RULES - value: true - visible_in_ui: true - warning: null - header: Optimization by NNCF - pruning_supported: - affects_outcome_of: TRAINING - auto_hpo_state: not_possible - auto_hpo_value: null - default_value: false - description: Whether filter pruning is supported - editable: false - header: Whether filter pruning is supported - type: BOOLEAN - ui_rules: - action: DISABLE_EDITING - operator: AND - rules: [] - type: UI_RULES - value: false - visible_in_ui: false - warning: null - type: PARAMETER_GROUP - visible_in_ui: true pot_parameters: description: POT Parameters header: POT Parameters @@ -148,7 +92,7 @@ pot_parameters: affects_outcome_of: NONE auto_hpo_state: not_possible auto_hpo_value: null - default_value: Performance + default_value: Mixed description: Quantization preset that defines quantization scheme editable: true enum_name: POTQuantizationPreset @@ -162,7 +106,7 @@ pot_parameters: operator: AND rules: [] type: UI_RULES - value: Performance + value: Mixed visible_in_ui: true warning: null stat_subset_size: diff --git a/src/otx/algorithms/visual_prompting/tasks/openvino.py b/src/otx/algorithms/visual_prompting/tasks/openvino.py index 363f426034c..f0a0d9a4747 100644 --- a/src/otx/algorithms/visual_prompting/tasks/openvino.py +++ b/src/otx/algorithms/visual_prompting/tasks/openvino.py @@ -17,16 +17,22 @@ import io import json import os +import tempfile import time +import random from pathlib import Path from typing import Any, Dict, List, Optional, Tuple, Union from zipfile import ZipFile import attr +import nncf import numpy as np +import openvino.runtime as ov +from nncf.common.quantization.structs import QuantizationPreset from openvino.model_api.adapters import create_core from openvino.model_api.models import Model +from otx.algorithms.common.utils.ir import check_if_quantized from otx.algorithms.common.utils.logger import get_logger from otx.algorithms.common.utils.utils import get_default_async_reqs_num from otx.algorithms.visual_prompting.adapters.openvino import model_wrappers @@ -37,6 +43,7 @@ OTXVisualPromptingDataset, get_transform, ) +from otx.algorithms.visual_prompting.adapters.pytorch_lightning.datasets.pipelines import ResizeLongestSide from otx.algorithms.visual_prompting.configs.base import VisualPromptingBaseConfig from otx.api.entities.annotation import Annotation from otx.api.entities.dataset_item import DatasetItemEntity @@ -46,12 +53,19 @@ default_progress_callback, ) from otx.api.entities.label_schema import LabelSchemaEntity -from otx.api.entities.model import ModelEntity +from otx.api.entities.model import ( + ModelEntity, + ModelFormat, + ModelOptimizationType, + ModelPrecision, + OptimizationMethod, +) from otx.api.entities.model_template import TaskType from otx.api.entities.optimization_parameters import OptimizationParameters from otx.api.entities.resultset import ResultSetEntity +from otx.api.entities.subset import Subset from otx.api.entities.task_environment import TaskEnvironment -from otx.api.serialization.label_mapper import LabelSchemaMapper +from otx.api.serialization.label_mapper import LabelSchemaMapper, label_schema_to_bytes from otx.api.usecases.evaluation.metrics_helper import MetricsHelper from otx.api.usecases.exportable_code import demo from otx.api.usecases.exportable_code.inference import BaseInferencer @@ -129,8 +143,7 @@ def pre_process(self, dataset_item: DatasetItemEntity) -> Dict[str, Any]: # typ images, meta = self.model["image_encoder"].preprocess(dataset_item.numpy) prompts = OTXVisualPromptingDataset.get_prompts(dataset_item, self.labels) # to be replaced prompts = self.model["decoder"].preprocess(prompts, meta) - items = {**images, **meta, "prompts": prompts} - return items + return images, meta, prompts def post_process( self, prediction: Dict[str, np.ndarray], metadata: Dict[str, Any] @@ -143,13 +156,13 @@ def post_process( def predict(self, dataset_item: DatasetItemEntity) -> List[Annotation]: # type: ignore """Perform a prediction for a given input image.""" # forward image encoder - items = self.pre_process(dataset_item) - image_embeddings = self.forward({"images": items["images"]}) + images, meta, prompts = self.pre_process(dataset_item) + image_embeddings = self.forward(images) annotations: List[Annotation] = [] hard_predictions: List[np.ndarray] = [] soft_predictions: List[np.ndarray] = [] - for prompt in items["prompts"]: + for prompt in prompts: label = prompt.pop("label") prompt.update(image_embeddings) @@ -178,6 +191,54 @@ def await_all(self) -> None: self.model["decoder"].await_all() +class OTXOpenVinoDataLoader: + """DataLoader implementation for VisualPromptingOpenVINOTask.""" + + def __init__(self, dataset: Any, inferencer: BaseInferencer, shuffle: bool = True, is_encoder: bool = True, output_model: Optional[ModelEntity] = None): + self.dataset = dataset + self.inferencer = inferencer + self.shuffler = None + if shuffle: + self.shuffler = list(range(len(dataset))) + random.shuffle(self.shuffler) + + self.is_encoder = is_encoder + self.target_length = self.inferencer.model["image_encoder"].orig_width + if not self.is_encoder: + core = ov.Core() + compressed_model = core.read_model( + output_model.get_data("visual_prompting_image_encoder.xml"), + output_model.get_data("visual_prompting_image_encoder.bin")) + self.compressed_model = core.compile_model( + model=compressed_model, + device_name=inferencer.model["image_encoder"].inference_adapter.device) + + def __getitem__(self, index: int): + """Get item from dataset.""" + if self.shuffler is not None: + index = self.shuffler[index] + + items = self.dataset[index] + images, _, prompts = self.inferencer.pre_process(items) + processed_image = ResizeLongestSide.apply_image(images["images"][0], self.target_length).transpose(2, 0, 1) + _, h, w = processed_image.shape + pad_width = ((0, 0), (0, self.target_length - h), (0, self.target_length - w)) + processed_image = np.pad(processed_image, pad_width, mode="constant", constant_values=0) + if self.is_encoder: + return {"images": processed_image[None]} + else: + image_embeddings = self.compressed_model(processed_image[None]) + prompt = prompts[0] # only use the first prompt + prompt.pop("label") + prompt.update({"image_embeddings": image_embeddings["image_embeddings"]}) + return prompt + # TODO (sungchul): change has_mask_input + + def __len__(self): + """Get length of dataset.""" + return len(self.dataset) + + class OpenVINOVisualPromptingTask(IInferenceTask, IEvaluationTask, IOptimizationTask, IDeploymentTask): """Task implementation for Visual Prompting using OpenVINO backend.""" @@ -336,4 +397,70 @@ def optimize( ): """Optimize function of OpenVINOVisualPromptingTask.""" logger.info("Start PTQ optimization") - raise NotImplementedError + if self.model is None: + raise RuntimeError("PTQ optimize failed, model is None") + + if optimization_type is not OptimizationType.POT: + raise ValueError("PTQ is the only supported optimization type for OpenVino models") + + dataset = dataset.get_subset(Subset.TRAINING) + + for i, (name, is_encoder) in enumerate( + zip(["image_encoder", "decoder"], [True, False]), 1 + ): + data_loader = OTXOpenVinoDataLoader(dataset, self.inferencer, is_encoder=is_encoder, output_model=output_model) + quantization_dataset = nncf.Dataset(data_loader, lambda data: data) + + with tempfile.TemporaryDirectory() as tempdir: + xml_path = os.path.join(tempdir, f"visual_prompting_{name}.xml") + bin_path = os.path.join(tempdir, f"visual_prompting_{name}.bin") + with open(xml_path, "wb") as f: + f.write(self.model.get_data(f"visual_prompting_{name}.xml")) + with open(bin_path, "wb") as f: + f.write(self.model.get_data(f"visual_prompting_{name}.bin")) + + ov_model = ov.Core().read_model(xml_path, bin_path) + if check_if_quantized(ov_model): + raise RuntimeError("Model is already optimized by PTQ") + + if optimization_parameters is not None: + optimization_parameters.update_progress(10 * i + 35 * (i - 1), None) + + stat_subset_size = self.hparams.pot_parameters.stat_subset_size + preset = QuantizationPreset(self.hparams.pot_parameters.preset.name.lower()) + from nncf.quantization.advanced_parameters import AdvancedQuantizationParameters + advanced_parameters = AdvancedQuantizationParameters(backend_params={"use_pot": True}) + + compressed_model = nncf.quantize( + ov_model, quantization_dataset, subset_size=min(stat_subset_size, len(data_loader)), preset=preset, advanced_parameters=advanced_parameters + ) + + if optimization_parameters is not None: + optimization_parameters.update_progress(45 * i, None) + + with tempfile.TemporaryDirectory() as tempdir: + xml_path = os.path.join(tempdir, f"visual_prompting_{name}.xml") + bin_path = os.path.join(tempdir, f"visual_prompting_{name}.bin") + ov.serialize(compressed_model, xml_path) + with open(xml_path, "rb") as f: + output_model.set_data(f"visual_prompting_{name}.xml", f.read()) + with open(bin_path, "rb") as f: + output_model.set_data(f"visual_prompting_{name}.bin", f.read()) + + output_model.set_data( + "label_schema.json", + label_schema_to_bytes(self.task_environment.label_schema), + ) + + # set model attributes for quantized model + output_model.model_format = ModelFormat.OPENVINO + output_model.optimization_type = ModelOptimizationType.POT + output_model.optimization_methods = [OptimizationMethod.QUANTIZATION] + output_model.precision = [ModelPrecision.INT8] + + self.model = output_model + self.inferencer = self.load_inferencer() + + if optimization_parameters is not None: + optimization_parameters.update_progress(100, None) + logger.info("POT optimization completed") diff --git a/src/otx/cli/tools/optimize.py b/src/otx/cli/tools/optimize.py index eaa9cc2e7c3..df866c63009 100644 --- a/src/otx/cli/tools/optimize.py +++ b/src/otx/cli/tools/optimize.py @@ -19,6 +19,7 @@ from otx.api.entities.inference_parameters import InferenceParameters from otx.api.entities.model import ModelEntity +from otx.api.entities.model_template import TaskType from otx.api.entities.optimization_parameters import OptimizationParameters from otx.api.entities.resultset import ResultSetEntity from otx.api.entities.subset import Subset @@ -140,8 +141,11 @@ def main(): validation_dataset = dataset.get_subset(Subset.VALIDATION) predicted_validation_dataset = task.infer( - validation_dataset.with_empty_annotations(), - InferenceParameters(is_evaluation=True), + # temp (sungchul): remain annotation for visual prompting + validation_dataset + if getattr(task, "task_type", None) == TaskType.VISUAL_PROMPTING + else validation_dataset.with_empty_annotations(), + InferenceParameters(is_evaluation=False), ) resultset = ResultSetEntity( From dd8f2879afe1757508325efa3b12cae778bac101 Mon Sep 17 00:00:00 2001 From: "Kim, Sungchul" Date: Wed, 12 Jul 2023 17:37:53 +0900 Subject: [PATCH 04/12] Fix --- .../adapters/openvino/model_wrappers/openvino_models.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/otx/algorithms/visual_prompting/adapters/openvino/model_wrappers/openvino_models.py b/src/otx/algorithms/visual_prompting/adapters/openvino/model_wrappers/openvino_models.py index 83f327a7eca..5d4ba5e8917 100644 --- a/src/otx/algorithms/visual_prompting/adapters/openvino/model_wrappers/openvino_models.py +++ b/src/otx/algorithms/visual_prompting/adapters/openvino/model_wrappers/openvino_models.py @@ -63,7 +63,6 @@ def __init__( preload: bool = False, ): super().__init__(model_adapter, configuration, preload) - self.output_blob_name = "low_res_masks" @classmethod def parameters(cls): # noqa: D102 @@ -71,6 +70,9 @@ def parameters(cls): # noqa: D102 parameters.update({"image_size": NumericalValue(value_type=int, default_value=1024, min=0, max=2048)}) return parameters + def _get_outputs(self): + return "low_res_masks" + def preprocess(self, inputs: Dict[str, Any], meta: Dict[str, Any]): """Preprocess prompts.""" processed_prompts = [] From 2aa149ae6b92c9544529d3d1f99264f689e20d1f Mon Sep 17 00:00:00 2001 From: "Kim, Sungchul" Date: Wed, 12 Jul 2023 21:55:33 +0900 Subject: [PATCH 05/12] WIP --- .../visual_prompters/segment_anything.py | 20 ++++++++++--------- .../visual_prompting/tasks/openvino.py | 15 ++++++++++---- 2 files changed, 22 insertions(+), 13 deletions(-) diff --git a/src/otx/algorithms/visual_prompting/adapters/pytorch_lightning/models/visual_prompters/segment_anything.py b/src/otx/algorithms/visual_prompting/adapters/pytorch_lightning/models/visual_prompters/segment_anything.py index 2460df0aec3..efa3f792265 100644 --- a/src/otx/algorithms/visual_prompting/adapters/pytorch_lightning/models/visual_prompters/segment_anything.py +++ b/src/otx/algorithms/visual_prompting/adapters/pytorch_lightning/models/visual_prompters/segment_anything.py @@ -185,7 +185,7 @@ def forward( point_labels: Tensor, mask_input: Tensor, has_mask_input: Tensor, - orig_size: Tensor, + # orig_size: Tensor, ): """Forward method for SAM inference (export/deploy). @@ -227,16 +227,18 @@ def forward( if self.config.model.return_single_mask: masks, scores = self.select_masks(masks, scores, point_coords.shape[1]) - upscaled_masks = self.mask_postprocessing(masks, orig_size[0]) + return scores, masks + # TODO (sungchul): apply inner postprocessing + # upscaled_masks = self.mask_postprocessing(masks, orig_size[0]) - if self.config.model.return_extra_metrics: - stability_scores = self.calculate_stability_score( - upscaled_masks, self.config.model.mask_threshold, self.config.model.stability_score_offset - ) - areas = (upscaled_masks > self.config.model.mask_threshold).sum(-1).sum(-1) - return upscaled_masks, scores, stability_scores, areas, masks + # if self.config.model.return_extra_metrics: + # stability_scores = self.calculate_stability_score( + # upscaled_masks, self.config.model.mask_threshold, self.config.model.stability_score_offset + # ) + # areas = (upscaled_masks > self.config.model.mask_threshold).sum(-1).sum(-1) + # return upscaled_masks, scores, stability_scores, areas, masks - return upscaled_masks, scores, masks + # return upscaled_masks, scores, masks def _embed_points(self, point_coords: Tensor, point_labels: Tensor) -> Tensor: """Embed sparse input prompts. diff --git a/src/otx/algorithms/visual_prompting/tasks/openvino.py b/src/otx/algorithms/visual_prompting/tasks/openvino.py index f0a0d9a4747..4ea8349cc77 100644 --- a/src/otx/algorithms/visual_prompting/tasks/openvino.py +++ b/src/otx/algorithms/visual_prompting/tasks/openvino.py @@ -164,11 +164,12 @@ def predict(self, dataset_item: DatasetItemEntity) -> List[Annotation]: # type: soft_predictions: List[np.ndarray] = [] for prompt in prompts: label = prompt.pop("label") + orig_size = prompt.pop("orig_size") prompt.update(image_embeddings) # forward decoder to get predicted mask prediction = self.forward_decoder(prompt) - metadata = {"label": label, "original_size": prompt["orig_size"]} + metadata = {"label": label, "original_size": orig_size} # set annotation for eval annotation, hard_prediction, soft_prediction = self.post_process(prediction, metadata) @@ -230,6 +231,7 @@ def __getitem__(self, index: int): image_embeddings = self.compressed_model(processed_image[None]) prompt = prompts[0] # only use the first prompt prompt.pop("label") + prompt.pop("orig_size") prompt.update({"image_embeddings": image_embeddings["image_embeddings"]}) return prompt # TODO (sungchul): change has_mask_input @@ -408,6 +410,13 @@ def optimize( for i, (name, is_encoder) in enumerate( zip(["image_encoder", "decoder"], [True, False]), 1 ): + if name == "decoder": + # TODO (sungchul): quantize decoder, too + logger.info(f"{name} won't do PTQ.") + output_model.set_data(f"visual_prompting_{name}.xml", self.model.get_data(f"visual_prompting_{name}.xml")) + output_model.set_data(f"visual_prompting_{name}.bin", self.model.get_data(f"visual_prompting_{name}.bin")) + continue + data_loader = OTXOpenVinoDataLoader(dataset, self.inferencer, is_encoder=is_encoder, output_model=output_model) quantization_dataset = nncf.Dataset(data_loader, lambda data: data) @@ -428,11 +437,9 @@ def optimize( stat_subset_size = self.hparams.pot_parameters.stat_subset_size preset = QuantizationPreset(self.hparams.pot_parameters.preset.name.lower()) - from nncf.quantization.advanced_parameters import AdvancedQuantizationParameters - advanced_parameters = AdvancedQuantizationParameters(backend_params={"use_pot": True}) compressed_model = nncf.quantize( - ov_model, quantization_dataset, subset_size=min(stat_subset_size, len(data_loader)), preset=preset, advanced_parameters=advanced_parameters + ov_model, quantization_dataset, subset_size=min(stat_subset_size, len(data_loader)), preset=preset ) if optimization_parameters is not None: From d94f33b673e8322700d8c58bb93dcd7e85f7fbd0 Mon Sep 17 00:00:00 2001 From: "Kim, Sungchul" Date: Thu, 13 Jul 2023 10:58:55 +0900 Subject: [PATCH 06/12] Update configs & exported outputs --- .../algorithms/visual_prompting/configs/configuration.yaml | 4 ++-- .../algorithms/visual_prompting/configs/sam_vit_b/config.yaml | 4 ++-- src/otx/algorithms/visual_prompting/tasks/inference.py | 3 +-- 3 files changed, 5 insertions(+), 6 deletions(-) diff --git a/src/otx/algorithms/visual_prompting/configs/configuration.yaml b/src/otx/algorithms/visual_prompting/configs/configuration.yaml index e20429f60b2..1949d14f2a3 100644 --- a/src/otx/algorithms/visual_prompting/configs/configuration.yaml +++ b/src/otx/algorithms/visual_prompting/configs/configuration.yaml @@ -148,7 +148,7 @@ pot_parameters: affects_outcome_of: NONE auto_hpo_state: not_possible auto_hpo_value: null - default_value: Performance + default_value: Mixed description: Quantization preset that defines quantization scheme editable: true enum_name: POTQuantizationPreset @@ -162,7 +162,7 @@ pot_parameters: operator: AND rules: [] type: UI_RULES - value: Performance + value: Mixed visible_in_ui: true warning: null stat_subset_size: diff --git a/src/otx/algorithms/visual_prompting/configs/sam_vit_b/config.yaml b/src/otx/algorithms/visual_prompting/configs/sam_vit_b/config.yaml index 393cfa468a2..3738303c911 100644 --- a/src/otx/algorithms/visual_prompting/configs/sam_vit_b/config.yaml +++ b/src/otx/algorithms/visual_prompting/configs/sam_vit_b/config.yaml @@ -1,6 +1,6 @@ dataset: task: visual_prompting - train_batch_size: 2 + train_batch_size: 4 val_batch_size: 1 test_batch_size: 1 num_workers: 4 @@ -35,7 +35,7 @@ model: optimizer: name: Adam - lr: 0.0001 + lr: 0.000001 callback: checkpoint: # arguments for ModelCheckpoint diff --git a/src/otx/algorithms/visual_prompting/tasks/inference.py b/src/otx/algorithms/visual_prompting/tasks/inference.py index 6c93a05caa9..b84984e5fef 100644 --- a/src/otx/algorithms/visual_prompting/tasks/inference.py +++ b/src/otx/algorithms/visual_prompting/tasks/inference.py @@ -281,9 +281,8 @@ def _export_to_onnx(self, onnx_path: Dict[str, str]): "point_labels": torch.randint(low=0, high=4, size=(1, 2), dtype=torch.float), "mask_input": torch.randn(1, 1, *mask_input_size, dtype=torch.float), "has_mask_input": torch.tensor([[1]], dtype=torch.float), - "orig_size": torch.tensor([[height, width]], dtype=torch.float), } - output_names = ["masks", "iou_predictions", "low_res_masks"] + output_names = ["iou_predictions", "low_res_masks"] model_to_export = self.model with warnings.catch_warnings(): From 451f5d13f1cfab2f21019d2d2177b6027a887f93 Mon Sep 17 00:00:00 2001 From: "Kim, Sungchul" Date: Thu, 13 Jul 2023 11:41:46 +0900 Subject: [PATCH 07/12] Remove unused modules for torch --- .../datasets/pipelines/sam_transforms.py | 50 ------------------- 1 file changed, 50 deletions(-) diff --git a/src/otx/algorithms/visual_prompting/adapters/pytorch_lightning/datasets/pipelines/sam_transforms.py b/src/otx/algorithms/visual_prompting/adapters/pytorch_lightning/datasets/pipelines/sam_transforms.py index c3cae7d78a7..1170c4efebb 100644 --- a/src/otx/algorithms/visual_prompting/adapters/pytorch_lightning/datasets/pipelines/sam_transforms.py +++ b/src/otx/algorithms/visual_prompting/adapters/pytorch_lightning/datasets/pipelines/sam_transforms.py @@ -91,56 +91,6 @@ def apply_boxes(self, boxes: np.ndarray, original_size: Union[List[Any], Tensor] boxes = self.apply_coords(boxes.reshape(-1, 2, 2), original_size) return boxes.reshape(-1, 4) - def apply_image_torch(self, image: torch.Tensor) -> torch.Tensor: - """Expects batched images with shape BxCxHxW and float format. - - This transformation may not exactly match apply_image. - apply_image is the transformation expected by the model. - - Args: - image (torch.Tensor): Image tensor. - - Returns: - torch.Tensor: Resized image. - """ - # Expects an image in BCHW format. May not exactly match apply_image. - target_size = self.get_preprocess_shape(image.shape[2], image.shape[3], self.target_length) - return F.interpolate(image, target_size, mode="bilinear", align_corners=False, antialias=True) - - def apply_coords_torch(self, coords: torch.Tensor, original_size: Tuple[int, ...]) -> torch.Tensor: - """Expects a torch tensor with length 2 in the last dimension. - - Requires the original image size in (H, W) format. - - Args: - coords (torch.Tensor): Coordinates tensor. - original_size (Tuple[int, ...]): Original size of image. - - Returns: - torch.Tensor: Resized coordinates. - """ - old_h, old_w = original_size - new_h, new_w = self.get_preprocess_shape(original_size[0], original_size[1], self.target_length) - coords = deepcopy(coords).to(torch.float) - coords[..., 0] = coords[..., 0] * (new_w / old_w) - coords[..., 1] = coords[..., 1] * (new_h / old_h) - return coords - - def apply_boxes_torch(self, boxes: torch.Tensor, original_size: Tuple[int, ...]) -> torch.Tensor: - """Expects a torch tensor with shape Bx4. - - Requires the original image size in (H, W) format. - - Args: - boxes (torch.Tensor): Boxes tensor. - original_size (Tuple[int, ...]): Original size of image. - - Returns: - torch.Tensor: Resized boxes. - """ - boxes = self.apply_coords_torch(boxes.reshape(-1, 2, 2), original_size) - return boxes.reshape(-1, 4) - @staticmethod def get_preprocess_shape(oldh: int, oldw: int, long_side_length: int) -> Tuple[int, int]: """Compute the output size given input size and target long side length. From a06dd5c38c4f0ebfc8636b215298a33c975d24cf Mon Sep 17 00:00:00 2001 From: "Kim, Sungchul" Date: Thu, 13 Jul 2023 13:37:29 +0900 Subject: [PATCH 08/12] Add unit tests --- .../model_wrappers/test_openvino_models.py | 7 ++ .../config/test_visual_prompting_config.py | 10 +- .../datasets/pipelines/test_sam_transforms.py | 95 ++++++++++--------- .../visual_prompting/tasks/test_openvino.py | 95 +++++++++++++++++-- 4 files changed, 151 insertions(+), 56 deletions(-) diff --git a/tests/unit/algorithms/visual_prompting/adapters/openvino/model_wrappers/test_openvino_models.py b/tests/unit/algorithms/visual_prompting/adapters/openvino/model_wrappers/test_openvino_models.py index efdf2c0b495..437e4f9d326 100644 --- a/tests/unit/algorithms/visual_prompting/adapters/openvino/model_wrappers/test_openvino_models.py +++ b/tests/unit/algorithms/visual_prompting/adapters/openvino/model_wrappers/test_openvino_models.py @@ -64,6 +64,13 @@ def test_parameters(self): assert isinstance(params.get("image_size"), NumericalValue) assert params.get("image_size").default_value == 1024 + @e2e_pytest_unit + def test_get_outputs(self): + """Test _get_outputs.""" + results = self.decoder._get_outputs() + + assert "low_res_masks" == results + @e2e_pytest_unit def test_preprocess(self): """Test preprocess""" diff --git a/tests/unit/algorithms/visual_prompting/adapters/pytorch_lightning/config/test_visual_prompting_config.py b/tests/unit/algorithms/visual_prompting/adapters/pytorch_lightning/config/test_visual_prompting_config.py index d5d57119ca2..e7ae231d157 100644 --- a/tests/unit/algorithms/visual_prompting/adapters/pytorch_lightning/config/test_visual_prompting_config.py +++ b/tests/unit/algorithms/visual_prompting/adapters/pytorch_lightning/config/test_visual_prompting_config.py @@ -1,3 +1,4 @@ + """Tests the methods in config.""" # Copyright (C) 2023 Intel Corporation @@ -61,13 +62,18 @@ def test_update_visual_prompting_config(): """Test update_visual_prompting_config.""" otx_config = OmegaConf.create( { - "groups": ["learning_parameters"], + "groups": ["learning_parameters", "pot_parameters", "postprocessing"], "learning_parameters": {"parameters": ["param1"], "param1": "updated_value1"}, + "pot_parameters": {"parameters": ["param2"], "param2": "updated_value2"}, + "postprocessing": {"parameters": ["param3"], "param3": "updated_value3"}, "parameters": [], } ) - visual_prompting_config = OmegaConf.create({"param1": "value1", "param2": "value2"}) + visual_prompting_config = OmegaConf.create({"param1": "value1", "param2": "value2", "param3": "value3", "param4": "value4"}) update_visual_prompting_config(visual_prompting_config, otx_config) assert visual_prompting_config["param1"] == "updated_value1" + assert visual_prompting_config["param2"] == "updated_value2" + assert visual_prompting_config["param3"] == "updated_value3" + assert visual_prompting_config["param4"] == "value4" diff --git a/tests/unit/algorithms/visual_prompting/adapters/pytorch_lightning/datasets/pipelines/test_sam_transforms.py b/tests/unit/algorithms/visual_prompting/adapters/pytorch_lightning/datasets/pipelines/test_sam_transforms.py index c79be668f22..82355933ed5 100644 --- a/tests/unit/algorithms/visual_prompting/adapters/pytorch_lightning/datasets/pipelines/test_sam_transforms.py +++ b/tests/unit/algorithms/visual_prompting/adapters/pytorch_lightning/datasets/pipelines/test_sam_transforms.py @@ -5,7 +5,8 @@ # import numpy as np -import torch +from typing import Tuple +import pytest from otx.algorithms.visual_prompting.adapters.pytorch_lightning.datasets.pipelines.sam_transforms import ( ResizeLongestSide, ) @@ -14,60 +15,62 @@ class TestResizeLongestSide: - @e2e_pytest_unit - def test_apply_boxes(self): - """Test apply_boxes.""" - resize_longest_side = ResizeLongestSide(100) - boxes = np.array([[10, 20, 30, 40], [50, 60, 70, 80]]) - original_size = (200, 200) - expected_result = np.array([[5, 10, 15, 20], [25, 30, 35, 40]]) - - result = resize_longest_side.apply_boxes(boxes, original_size) - - assert np.array_equal(result, expected_result) + @pytest.fixture(autouse=True) + def setup(self): + self.resize_longest_side = ResizeLongestSide(8) @e2e_pytest_unit - def test_apply_image_torch(self): - """Test apply_image_torch.""" - resize_longest_side = ResizeLongestSide(100) - image = torch.zeros((1, 3, 200, 300), dtype=torch.float32) - expected_result_shape = (1, 3, 67, 100) - - result = resize_longest_side.apply_image_torch(image) - - assert result.shape == expected_result_shape + def test_call(self): + """Test __call__.""" @e2e_pytest_unit - def test_apply_coords_torch(self): - """Test apply_coords_torch.""" - resize_longest_side = ResizeLongestSide(100) - coords = torch.Tensor([[50, 50], [100, 100]]) - original_size = (200, 200) - expected_result = torch.Tensor([[25, 25], [50, 50]]) - - result = resize_longest_side.apply_coords_torch(coords, original_size) - - assert torch.allclose(result, expected_result) + @pytest.mark.parametrize("image,expected", + [ + (np.zeros((2, 4, 3), dtype=np.uint8), (4, 8, 3)), + (np.zeros((12, 16, 3), dtype=np.uint8), (6, 8, 3)), + ] + ) + def test_apply_image(self, image: np.ndarray, expected: Tuple[int, int, int]): + """Test apply_image.""" + results = self.resize_longest_side.apply_image(image, self.resize_longest_side.target_length) + + assert results.shape == expected @e2e_pytest_unit - def test_apply_boxes_torch(self): - """Test apply_boxes_torch.""" - resize_longest_side = ResizeLongestSide(100) - boxes = torch.Tensor([[10, 20, 30, 40], [50, 60, 70, 80]]) - original_size = (200, 200) - expected_result = torch.Tensor([[5, 10, 15, 20], [25, 30, 35, 40]]) + @pytest.mark.parametrize("coords,original_size,expected", + [ + (np.array([[1, 1], [2, 2]]), (4, 4), np.array([[2, 2], [4, 4]])), + (np.array([[4, 4], [8, 8]]), (16, 16), np.array([[2, 2], [4, 4]])), + ] + ) + def test_apply_coords(self, coords: np.ndarray, original_size: Tuple[int, int], expected: np.ndarray): + """Test apply_coords.""" + result = self.resize_longest_side.apply_coords(coords, original_size) + + assert np.array_equal(result, expected) - result = resize_longest_side.apply_boxes_torch(boxes, original_size) + @e2e_pytest_unit + @pytest.mark.parametrize("boxes,original_size,expected", + [ + (np.array([[1, 1, 2, 2], [2, 2, 3, 3]]), (4, 4), np.array([[2, 2, 4, 4], [4, 4, 6, 6]])), + (np.array([[4, 4, 8, 8], [8, 8, 12, 12]]), (16, 16), np.array([[2, 2, 4, 4], [4, 4, 6, 6]])), + ] + ) + def test_apply_boxes(self, boxes: np.ndarray, original_size: Tuple[int, int], expected: np.ndarray): + """Test apply_boxes.""" + result = self.resize_longest_side.apply_boxes(boxes, original_size) - assert torch.allclose(result, expected_result) + assert np.array_equal(result, expected) @e2e_pytest_unit - def test_get_preprocess_shape(self): + @pytest.mark.parametrize("oldh,oldw,expected", + [ + (3, 4, (6, 8)), + (12, 16, (6, 8)), + ] + ) + def test_get_preprocess_shape(self, oldh: int, oldw: int, expected: Tuple[int, int]): """Test get_preprocess_shape.""" - resize_longest_side = ResizeLongestSide(100) - oldh, oldw = 200, 300 - expected_result = (67, 100) - - result = resize_longest_side.get_preprocess_shape(oldh, oldw, resize_longest_side.target_length) + result = self.resize_longest_side.get_preprocess_shape(oldh, oldw, self.resize_longest_side.target_length) - assert result == expected_result + assert result == expected diff --git a/tests/unit/algorithms/visual_prompting/tasks/test_openvino.py b/tests/unit/algorithms/visual_prompting/tasks/test_openvino.py index d7e97499649..17588177bd2 100644 --- a/tests/unit/algorithms/visual_prompting/tasks/test_openvino.py +++ b/tests/unit/algorithms/visual_prompting/tasks/test_openvino.py @@ -5,11 +5,15 @@ # from copy import deepcopy +from typing import Optional import numpy as np +import pathlib import pytest +from otx.api.usecases.tasks.interfaces.optimization_interface import OptimizationType import torch from openvino.model_api.models import Model +from otx.api.entities.subset import Subset from otx.algorithms.visual_prompting.adapters.pytorch_lightning.datasets.dataset import ( OTXVisualPromptingDataset, @@ -18,6 +22,7 @@ from otx.algorithms.visual_prompting.tasks.openvino import ( OpenVINOVisualPromptingInferencer, OpenVINOVisualPromptingTask, + OTXOpenVinoDataLoader ) from otx.api.configuration.configurable_parameters import ConfigurableParameters from otx.api.entities.annotation import Annotation @@ -86,7 +91,7 @@ def test_pre_process(self, mocker): returned_value = self.visual_prompting_ov_inferencer.pre_process(fake_input) - assert isinstance(returned_value, dict) + assert isinstance(returned_value, tuple) mocker_get_prompts.assert_called_once() @e2e_pytest_unit @@ -112,10 +117,10 @@ def test_predict(self, mocker): mocker_pre_process = mocker.patch.object( OpenVINOVisualPromptingInferencer, "pre_process", - return_value={ - "index": 0, - "images": torch.rand((1, 3, 2, 2)), - "prompts": [ + return_value=( + torch.zeros((1, 3, 2, 2)), + {}, + [ { "point_coords": [np.array([[[1, 1], [2, 2]]])], "point_labels": [1, 2], @@ -123,7 +128,7 @@ def test_predict(self, mocker): "orig_size": (4, 4), } ], - }, + ), ) mocker_forward = mocker.patch.object( OpenVINOVisualPromptingInferencer, "forward", return_value={"image_embeddings": np.empty((4, 2, 2))} @@ -165,6 +170,48 @@ def test_forward_decoder(self): assert returned_value == fake_output +class TestOTXOpenVinoDataLoader: + @pytest.fixture + def load_dataloader(self, mocker): + def _load_dataloader(is_encoder: bool = True, output_model: Optional[ModelEntity] = None): + dataset = generate_visual_prompting_dataset() + dataset = dataset.get_subset(Subset.TRAINING) + return OTXOpenVinoDataLoader(dataset, self.mocker_inferencer, is_encoder=is_encoder, output_model=output_model) + return _load_dataloader + + @pytest.fixture(autouse=True) + def setup(self, mocker): + self.mocker_read_model = mocker.patch("otx.algorithms.visual_prompting.tasks.openvino.ov.Core.read_model") + self.mocker_compile_model = mocker.patch("otx.algorithms.visual_prompting.tasks.openvino.ov.Core.compile_model") + self.mocker_inferencer = mocker.patch.object(OpenVINOVisualPromptingInferencer, "__init__") + + @e2e_pytest_unit + @pytest.mark.parametrize("is_encoder", [True, False]) + def test_getitem(self, mocker, load_dataloader, is_encoder: bool): + """Test __getitem__.""" + mocker_output_model = mocker.patch("otx.api.entities.model.ModelEntity") + if not is_encoder: + mocker.patch.object(mocker_output_model, "get_data") + self.mocker_read_model.reset_mock() + self.mocker_compile_model.reset_mock() + + dataloader = load_dataloader(is_encoder, mocker_output_model) + + setattr(dataloader, "target_length", 8) + mocker.patch.object(dataloader.inferencer, "pre_process", return_value=({"images": np.zeros((1, 4, 3, 3), dtype=np.uint8)}, None, [{"label": 1, "orig_size": 1}])) + + results = dataloader.__getitem__(0) + + if is_encoder: + assert results['images'].shape == (1, 3, 8, 8) + else: + self.mocker_read_model.assert_called_once() + self.mocker_compile_model.assert_called_once() + assert "label" not in results + assert "orig_size" not in results + assert "image_embeddings" in results + + class TestOpenVINOVisualPromptingTask: @pytest.fixture def otx_model(self): @@ -240,11 +287,43 @@ def test_evaluate(self, mocker): @e2e_pytest_unit def test_deploy(self): + """Test deploy.""" output_model = deepcopy(self.task_environment.model) - self.visual_prompting_ov_task.model.set_data("visual_prompting_image_encoder.bin", b"image_encoder_bin") self.visual_prompting_ov_task.model.set_data("visual_prompting_image_encoder.xml", b"image_encoder_xml") + self.visual_prompting_ov_task.model.set_data("visual_prompting_image_encoder.bin", b"image_encoder_bin") + self.visual_prompting_ov_task.model.set_data("visual_prompting_decoder.xml", b"decoder_xml") self.visual_prompting_ov_task.model.set_data("visual_prompting_decoder.bin", b"decoder_bin") - self.visual_prompting_ov_task.model.set_data("visual_prompting_decoder.xml", b"deocder_xml") + self.visual_prompting_ov_task.deploy(output_model) assert output_model.exportable_code is not None + + @e2e_pytest_unit + def test_optimize(self, mocker): + """Test optimize.""" + def patch_save_model(model, output_xml): + with open(output_xml, "wb") as f: + f.write(b"compressed_image_encoder_xml") + bin_path = pathlib.Path(output_xml).parent / pathlib.Path(str(pathlib.Path(output_xml).stem) + ".bin") + with open(bin_path, "wb") as f: + f.write(b"compressed_image_encoder_bin") + + dataset = generate_visual_prompting_dataset() + output_model = deepcopy(self.task_environment.model) + self.visual_prompting_ov_task.model.set_data("visual_prompting_image_encoder.xml", b"image_encoder_xml") + self.visual_prompting_ov_task.model.set_data("visual_prompting_image_encoder.bin", b"image_encoder_bin") + self.visual_prompting_ov_task.model.set_data("visual_prompting_decoder.xml", b"decoder_xml") + self.visual_prompting_ov_task.model.set_data("visual_prompting_decoder.bin", b"decoder_bin") + mocker.patch("otx.algorithms.visual_prompting.tasks.openvino.ov.Core.read_model", autospec=True) + mocker.patch("otx.algorithms.visual_prompting.tasks.openvino.ov.serialize", new=patch_save_model) + fake_quantize = mocker.patch("otx.algorithms.visual_prompting.tasks.openvino.nncf.quantize", autospec=True) + + self.visual_prompting_ov_task.optimize(OptimizationType.POT, dataset=dataset, output_model=output_model) + + fake_quantize.assert_called_once() + # check if only image encoder was compressed + assert self.visual_prompting_ov_task.model.get_data("visual_prompting_image_encoder.xml") == b"compressed_image_encoder_xml" + assert self.visual_prompting_ov_task.model.get_data("visual_prompting_image_encoder.bin") == b"compressed_image_encoder_bin" + assert self.visual_prompting_ov_task.model.get_data("visual_prompting_decoder.xml") == b"decoder_xml" + assert self.visual_prompting_ov_task.model.get_data("visual_prompting_decoder.bin") == b"decoder_bin" + From ff80cd4b9634a40a9f962a5a2313d92bf557e51b Mon Sep 17 00:00:00 2001 From: "Kim, Sungchul" Date: Thu, 13 Jul 2023 13:53:36 +0900 Subject: [PATCH 09/12] pre-commit --- .../datasets/pipelines/sam_transforms.py | 4 +- .../configs/base/configuration.py | 2 - .../visual_prompting/tasks/openvino.py | 40 +++++++++++++------ .../config/test_visual_prompting_config.py | 5 ++- .../datasets/pipelines/test_sam_transforms.py | 20 ++++++---- .../visual_prompting/tasks/test_openvino.py | 31 +++++++++----- 6 files changed, 65 insertions(+), 37 deletions(-) diff --git a/src/otx/algorithms/visual_prompting/adapters/pytorch_lightning/datasets/pipelines/sam_transforms.py b/src/otx/algorithms/visual_prompting/adapters/pytorch_lightning/datasets/pipelines/sam_transforms.py index 1170c4efebb..f09275d4a21 100644 --- a/src/otx/algorithms/visual_prompting/adapters/pytorch_lightning/datasets/pipelines/sam_transforms.py +++ b/src/otx/algorithms/visual_prompting/adapters/pytorch_lightning/datasets/pipelines/sam_transforms.py @@ -10,7 +10,6 @@ import numpy as np import torch from torch import Tensor -from torch.nn import functional as F from torchvision.transforms.functional import resize, to_pil_image # type: ignore @@ -36,8 +35,7 @@ def __call__(self, item: Dict[str, Union[List, Tensor]]) -> Dict[str, Union[List Dict[str, Union[List, Tensor]]: Dictionary of batch data. """ item["images"] = torch.as_tensor( - self.apply_image(item["images"], self.target_length).transpose((2, 0, 1)), - dtype=torch.get_default_dtype() + self.apply_image(item["images"], self.target_length).transpose((2, 0, 1)), dtype=torch.get_default_dtype() ) item["gt_masks"] = [torch.as_tensor(gt_mask) for gt_mask in item["gt_masks"]] item["bboxes"] = self.apply_boxes(item["bboxes"], item["original_size"]) diff --git a/src/otx/algorithms/visual_prompting/configs/base/configuration.py b/src/otx/algorithms/visual_prompting/configs/base/configuration.py index f89e7ac9896..63dc1e726a2 100644 --- a/src/otx/algorithms/visual_prompting/configs/base/configuration.py +++ b/src/otx/algorithms/visual_prompting/configs/base/configuration.py @@ -15,8 +15,6 @@ # and limitations under the License. -from sys import maxsize - from attr import attrs from otx.algorithms.common.configs import BaseConfig, POTQuantizationPreset diff --git a/src/otx/algorithms/visual_prompting/tasks/openvino.py b/src/otx/algorithms/visual_prompting/tasks/openvino.py index 4ea8349cc77..7d5ad6fcd2e 100644 --- a/src/otx/algorithms/visual_prompting/tasks/openvino.py +++ b/src/otx/algorithms/visual_prompting/tasks/openvino.py @@ -17,9 +17,9 @@ import io import json import os +import random import tempfile import time -import random from pathlib import Path from typing import Any, Dict, List, Optional, Tuple, Union from zipfile import ZipFile @@ -138,12 +138,14 @@ def __init__( self.labels = label_schema.get_labels(include_empty=False) self.transform = get_transform() # TODO (sungchul): insert args - def pre_process(self, dataset_item: DatasetItemEntity) -> Dict[str, Any]: # type: ignore + def pre_process( # type: ignore + self, dataset_item: DatasetItemEntity + ) -> Tuple[Dict[str, Any], Dict[str, Any], List[Dict[str, Any]]]: """Pre-process function of OpenVINO Visual Prompting Inferencer for image encoder.""" images, meta = self.model["image_encoder"].preprocess(dataset_item.numpy) prompts = OTXVisualPromptingDataset.get_prompts(dataset_item, self.labels) # to be replaced prompts = self.model["decoder"].preprocess(prompts, meta) - return images, meta, prompts + return images, meta, prompts # type: ignore def post_process( self, prediction: Dict[str, np.ndarray], metadata: Dict[str, Any] @@ -195,7 +197,14 @@ def await_all(self) -> None: class OTXOpenVinoDataLoader: """DataLoader implementation for VisualPromptingOpenVINOTask.""" - def __init__(self, dataset: Any, inferencer: BaseInferencer, shuffle: bool = True, is_encoder: bool = True, output_model: Optional[ModelEntity] = None): + def __init__( + self, + dataset: Any, + inferencer: OpenVINOVisualPromptingInferencer, + shuffle: bool = True, + is_encoder: bool = True, + output_model: Optional[ModelEntity] = None, + ): self.dataset = dataset self.inferencer = inferencer self.shuffler = None @@ -209,10 +218,11 @@ def __init__(self, dataset: Any, inferencer: BaseInferencer, shuffle: bool = Tru core = ov.Core() compressed_model = core.read_model( output_model.get_data("visual_prompting_image_encoder.xml"), - output_model.get_data("visual_prompting_image_encoder.bin")) + output_model.get_data("visual_prompting_image_encoder.bin"), + ) self.compressed_model = core.compile_model( - model=compressed_model, - device_name=inferencer.model["image_encoder"].inference_adapter.device) + model=compressed_model, device_name=inferencer.model["image_encoder"].inference_adapter.device + ) def __getitem__(self, index: int): """Get item from dataset.""" @@ -407,17 +417,21 @@ def optimize( dataset = dataset.get_subset(Subset.TRAINING) - for i, (name, is_encoder) in enumerate( - zip(["image_encoder", "decoder"], [True, False]), 1 - ): + for i, (name, is_encoder) in enumerate(zip(["image_encoder", "decoder"], [True, False]), 1): if name == "decoder": # TODO (sungchul): quantize decoder, too logger.info(f"{name} won't do PTQ.") - output_model.set_data(f"visual_prompting_{name}.xml", self.model.get_data(f"visual_prompting_{name}.xml")) - output_model.set_data(f"visual_prompting_{name}.bin", self.model.get_data(f"visual_prompting_{name}.bin")) + output_model.set_data( + f"visual_prompting_{name}.xml", self.model.get_data(f"visual_prompting_{name}.xml") + ) + output_model.set_data( + f"visual_prompting_{name}.bin", self.model.get_data(f"visual_prompting_{name}.bin") + ) continue - data_loader = OTXOpenVinoDataLoader(dataset, self.inferencer, is_encoder=is_encoder, output_model=output_model) + data_loader = OTXOpenVinoDataLoader( + dataset, self.inferencer, is_encoder=is_encoder, output_model=output_model + ) quantization_dataset = nncf.Dataset(data_loader, lambda data: data) with tempfile.TemporaryDirectory() as tempdir: diff --git a/tests/unit/algorithms/visual_prompting/adapters/pytorch_lightning/config/test_visual_prompting_config.py b/tests/unit/algorithms/visual_prompting/adapters/pytorch_lightning/config/test_visual_prompting_config.py index e7ae231d157..c61e6b46589 100644 --- a/tests/unit/algorithms/visual_prompting/adapters/pytorch_lightning/config/test_visual_prompting_config.py +++ b/tests/unit/algorithms/visual_prompting/adapters/pytorch_lightning/config/test_visual_prompting_config.py @@ -1,4 +1,3 @@ - """Tests the methods in config.""" # Copyright (C) 2023 Intel Corporation @@ -69,7 +68,9 @@ def test_update_visual_prompting_config(): "parameters": [], } ) - visual_prompting_config = OmegaConf.create({"param1": "value1", "param2": "value2", "param3": "value3", "param4": "value4"}) + visual_prompting_config = OmegaConf.create( + {"param1": "value1", "param2": "value2", "param3": "value3", "param4": "value4"} + ) update_visual_prompting_config(visual_prompting_config, otx_config) diff --git a/tests/unit/algorithms/visual_prompting/adapters/pytorch_lightning/datasets/pipelines/test_sam_transforms.py b/tests/unit/algorithms/visual_prompting/adapters/pytorch_lightning/datasets/pipelines/test_sam_transforms.py index 82355933ed5..35c00c0198b 100644 --- a/tests/unit/algorithms/visual_prompting/adapters/pytorch_lightning/datasets/pipelines/test_sam_transforms.py +++ b/tests/unit/algorithms/visual_prompting/adapters/pytorch_lightning/datasets/pipelines/test_sam_transforms.py @@ -24,11 +24,12 @@ def test_call(self): """Test __call__.""" @e2e_pytest_unit - @pytest.mark.parametrize("image,expected", + @pytest.mark.parametrize( + "image,expected", [ (np.zeros((2, 4, 3), dtype=np.uint8), (4, 8, 3)), (np.zeros((12, 16, 3), dtype=np.uint8), (6, 8, 3)), - ] + ], ) def test_apply_image(self, image: np.ndarray, expected: Tuple[int, int, int]): """Test apply_image.""" @@ -37,11 +38,12 @@ def test_apply_image(self, image: np.ndarray, expected: Tuple[int, int, int]): assert results.shape == expected @e2e_pytest_unit - @pytest.mark.parametrize("coords,original_size,expected", + @pytest.mark.parametrize( + "coords,original_size,expected", [ (np.array([[1, 1], [2, 2]]), (4, 4), np.array([[2, 2], [4, 4]])), (np.array([[4, 4], [8, 8]]), (16, 16), np.array([[2, 2], [4, 4]])), - ] + ], ) def test_apply_coords(self, coords: np.ndarray, original_size: Tuple[int, int], expected: np.ndarray): """Test apply_coords.""" @@ -50,11 +52,12 @@ def test_apply_coords(self, coords: np.ndarray, original_size: Tuple[int, int], assert np.array_equal(result, expected) @e2e_pytest_unit - @pytest.mark.parametrize("boxes,original_size,expected", + @pytest.mark.parametrize( + "boxes,original_size,expected", [ (np.array([[1, 1, 2, 2], [2, 2, 3, 3]]), (4, 4), np.array([[2, 2, 4, 4], [4, 4, 6, 6]])), (np.array([[4, 4, 8, 8], [8, 8, 12, 12]]), (16, 16), np.array([[2, 2, 4, 4], [4, 4, 6, 6]])), - ] + ], ) def test_apply_boxes(self, boxes: np.ndarray, original_size: Tuple[int, int], expected: np.ndarray): """Test apply_boxes.""" @@ -63,11 +66,12 @@ def test_apply_boxes(self, boxes: np.ndarray, original_size: Tuple[int, int], ex assert np.array_equal(result, expected) @e2e_pytest_unit - @pytest.mark.parametrize("oldh,oldw,expected", + @pytest.mark.parametrize( + "oldh,oldw,expected", [ (3, 4, (6, 8)), (12, 16, (6, 8)), - ] + ], ) def test_get_preprocess_shape(self, oldh: int, oldw: int, expected: Tuple[int, int]): """Test get_preprocess_shape.""" diff --git a/tests/unit/algorithms/visual_prompting/tasks/test_openvino.py b/tests/unit/algorithms/visual_prompting/tasks/test_openvino.py index 17588177bd2..f44e25e1b1c 100644 --- a/tests/unit/algorithms/visual_prompting/tasks/test_openvino.py +++ b/tests/unit/algorithms/visual_prompting/tasks/test_openvino.py @@ -22,7 +22,7 @@ from otx.algorithms.visual_prompting.tasks.openvino import ( OpenVINOVisualPromptingInferencer, OpenVINOVisualPromptingTask, - OTXOpenVinoDataLoader + OTXOpenVinoDataLoader, ) from otx.api.configuration.configurable_parameters import ConfigurableParameters from otx.api.entities.annotation import Annotation @@ -176,7 +176,10 @@ def load_dataloader(self, mocker): def _load_dataloader(is_encoder: bool = True, output_model: Optional[ModelEntity] = None): dataset = generate_visual_prompting_dataset() dataset = dataset.get_subset(Subset.TRAINING) - return OTXOpenVinoDataLoader(dataset, self.mocker_inferencer, is_encoder=is_encoder, output_model=output_model) + return OTXOpenVinoDataLoader( + dataset, self.mocker_inferencer, is_encoder=is_encoder, output_model=output_model + ) + return _load_dataloader @pytest.fixture(autouse=True) @@ -198,12 +201,16 @@ def test_getitem(self, mocker, load_dataloader, is_encoder: bool): dataloader = load_dataloader(is_encoder, mocker_output_model) setattr(dataloader, "target_length", 8) - mocker.patch.object(dataloader.inferencer, "pre_process", return_value=({"images": np.zeros((1, 4, 3, 3), dtype=np.uint8)}, None, [{"label": 1, "orig_size": 1}])) - + mocker.patch.object( + dataloader.inferencer, + "pre_process", + return_value=({"images": np.zeros((1, 4, 3, 3), dtype=np.uint8)}, None, [{"label": 1, "orig_size": 1}]), + ) + results = dataloader.__getitem__(0) - + if is_encoder: - assert results['images'].shape == (1, 3, 8, 8) + assert results["images"].shape == (1, 3, 8, 8) else: self.mocker_read_model.assert_called_once() self.mocker_compile_model.assert_called_once() @@ -301,6 +308,7 @@ def test_deploy(self): @e2e_pytest_unit def test_optimize(self, mocker): """Test optimize.""" + def patch_save_model(model, output_xml): with open(output_xml, "wb") as f: f.write(b"compressed_image_encoder_xml") @@ -322,8 +330,13 @@ def patch_save_model(model, output_xml): fake_quantize.assert_called_once() # check if only image encoder was compressed - assert self.visual_prompting_ov_task.model.get_data("visual_prompting_image_encoder.xml") == b"compressed_image_encoder_xml" - assert self.visual_prompting_ov_task.model.get_data("visual_prompting_image_encoder.bin") == b"compressed_image_encoder_bin" + assert ( + self.visual_prompting_ov_task.model.get_data("visual_prompting_image_encoder.xml") + == b"compressed_image_encoder_xml" + ) + assert ( + self.visual_prompting_ov_task.model.get_data("visual_prompting_image_encoder.bin") + == b"compressed_image_encoder_bin" + ) assert self.visual_prompting_ov_task.model.get_data("visual_prompting_decoder.xml") == b"decoder_xml" assert self.visual_prompting_ov_task.model.get_data("visual_prompting_decoder.bin") == b"decoder_bin" - From 40ce5976780091b61cb77732df771d865406bf18 Mon Sep 17 00:00:00 2001 From: "Kim, Sungchul" Date: Thu, 13 Jul 2023 14:16:34 +0900 Subject: [PATCH 10/12] Update CHANGELOG --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index fcb7d47864d..7bbe22f40e8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -15,6 +15,7 @@ All notable changes to this project will be documented in this file. - Add new visual prompting task: train/eval (https://github.com/openvinotoolkit/training_extensions/pull/2203) - Add new visual prompting task: export (https://github.com/openvinotoolkit/training_extensions/pull/2274) - Add new visual prompting task: deploy (https://github.com/openvinotoolkit/training_extensions/pull/2311) +- Add new visual prompting task: optimize (PTQ) (https://github.com/openvinotoolkit/training_extensions/pull/2318) - Add new object detector ResNeXt101-ATSS () ### Enhancements From 60ae7ebe6044ba41d040d7692e02cf0e052ef46f Mon Sep 17 00:00:00 2001 From: "Kim, Sungchul" Date: Thu, 13 Jul 2023 15:45:02 +0900 Subject: [PATCH 11/12] Update from staticmethod to classmethod --- .../pytorch_lightning/datasets/pipelines/sam_transforms.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/otx/algorithms/visual_prompting/adapters/pytorch_lightning/datasets/pipelines/sam_transforms.py b/src/otx/algorithms/visual_prompting/adapters/pytorch_lightning/datasets/pipelines/sam_transforms.py index f09275d4a21..aeb0cc98baf 100644 --- a/src/otx/algorithms/visual_prompting/adapters/pytorch_lightning/datasets/pipelines/sam_transforms.py +++ b/src/otx/algorithms/visual_prompting/adapters/pytorch_lightning/datasets/pipelines/sam_transforms.py @@ -43,8 +43,8 @@ def __call__(self, item: Dict[str, Union[List, Tensor]]) -> Dict[str, Union[List item["points"] = self.apply_coords(item["points"], item["original_size"]) return item - @staticmethod - def apply_image(image: np.ndarray, target_length: int) -> np.ndarray: + @classmethod + def apply_image(cls, image: np.ndarray, target_length: int) -> np.ndarray: """Expects a numpy array with shape HxWxC in uint8 format. Args: @@ -54,7 +54,7 @@ def apply_image(image: np.ndarray, target_length: int) -> np.ndarray: Returns: np.ndarray: Resized image. """ - target_size = ResizeLongestSide.get_preprocess_shape(image.shape[0], image.shape[1], target_length) + target_size = cls.get_preprocess_shape(image.shape[0], image.shape[1], target_length) return np.array(resize(to_pil_image(image), target_size)) def apply_coords(self, coords: np.ndarray, original_size: Union[List[Any], Tensor]) -> np.ndarray: From f8b307ea861caf93312dff1c1df5aa4db5225110 Mon Sep 17 00:00:00 2001 From: "Kim, Sungchul" Date: Thu, 13 Jul 2023 18:29:09 +0900 Subject: [PATCH 12/12] Move `ResizeLongestSide` into preprocess --- .../model_wrappers/openvino_models.py | 10 +++++++- .../visual_prompting/tasks/openvino.py | 23 ++++++++++--------- .../visual_prompting/tasks/test_openvino.py | 2 +- 3 files changed, 22 insertions(+), 13 deletions(-) diff --git a/src/otx/algorithms/visual_prompting/adapters/openvino/model_wrappers/openvino_models.py b/src/otx/algorithms/visual_prompting/adapters/openvino/model_wrappers/openvino_models.py index 5d4ba5e8917..ee18acd4bd6 100644 --- a/src/otx/algorithms/visual_prompting/adapters/openvino/model_wrappers/openvino_models.py +++ b/src/otx/algorithms/visual_prompting/adapters/openvino/model_wrappers/openvino_models.py @@ -23,6 +23,7 @@ from openvino.model_api.models import ImageModel, SegmentationModel from openvino.model_api.models.types import NumericalValue, StringValue +from otx.algorithms.visual_prompting.adapters.pytorch_lightning.datasets.pipelines import ResizeLongestSide from otx.api.utils.segmentation_utils import create_hard_prediction_from_soft_prediction @@ -40,13 +41,20 @@ def parameters(cls) -> Dict[str, Any]: # noqa: D102 parameters.update( { "resize_type": StringValue(default_value="fit_to_window"), + "image_size": NumericalValue(value_type=int, default_value=1024, min=0, max=2048), } ) return parameters - def preprocess(self, inputs: np.ndarray) -> Tuple[Dict[str, np.ndarray], Dict[str, Any]]: + def preprocess( + self, inputs: np.ndarray, extra_processing: bool = False + ) -> Tuple[Dict[str, np.ndarray], Dict[str, Any]]: """Update meta for image encoder.""" dict_inputs, meta = super().preprocess(inputs) + if extra_processing: + dict_inputs["images"] = ResizeLongestSide.apply_image(dict_inputs["images"][0], self.image_size).transpose( + 2, 0, 1 + )[None] meta["resize_type"] = self.resize_type return dict_inputs, meta diff --git a/src/otx/algorithms/visual_prompting/tasks/openvino.py b/src/otx/algorithms/visual_prompting/tasks/openvino.py index 7d5ad6fcd2e..f7d045f1e6c 100644 --- a/src/otx/algorithms/visual_prompting/tasks/openvino.py +++ b/src/otx/algorithms/visual_prompting/tasks/openvino.py @@ -43,7 +43,6 @@ OTXVisualPromptingDataset, get_transform, ) -from otx.algorithms.visual_prompting.adapters.pytorch_lightning.datasets.pipelines import ResizeLongestSide from otx.algorithms.visual_prompting.configs.base import VisualPromptingBaseConfig from otx.api.entities.annotation import Annotation from otx.api.entities.dataset_item import DatasetItemEntity @@ -115,13 +114,16 @@ def __init__( self.model = {} model_parameters = {"decoder": {"input_layouts": "image_embeddings:NCHW"}} self.configuration = { + "image_encoder": { + **attr.asdict(hparams.postprocessing, filter=lambda attr, value: attr.name in ["image_size"]) + }, "decoder": { **attr.asdict( hparams.postprocessing, filter=lambda attr, value: attr.name not in ["header", "description", "type", "visible_in_ui", "class_name"], ) - } + }, } for name in ["image_encoder", "decoder"]: model_adapter = VisualPromptingOpenvinoAdapter( @@ -139,10 +141,10 @@ def __init__( self.transform = get_transform() # TODO (sungchul): insert args def pre_process( # type: ignore - self, dataset_item: DatasetItemEntity + self, dataset_item: DatasetItemEntity, extra_processing: bool = False ) -> Tuple[Dict[str, Any], Dict[str, Any], List[Dict[str, Any]]]: """Pre-process function of OpenVINO Visual Prompting Inferencer for image encoder.""" - images, meta = self.model["image_encoder"].preprocess(dataset_item.numpy) + images, meta = self.model["image_encoder"].preprocess(dataset_item.numpy, extra_processing) prompts = OTXVisualPromptingDataset.get_prompts(dataset_item, self.labels) # to be replaced prompts = self.model["decoder"].preprocess(prompts, meta) return images, meta, prompts # type: ignore @@ -230,15 +232,14 @@ def __getitem__(self, index: int): index = self.shuffler[index] items = self.dataset[index] - images, _, prompts = self.inferencer.pre_process(items) - processed_image = ResizeLongestSide.apply_image(images["images"][0], self.target_length).transpose(2, 0, 1) - _, h, w = processed_image.shape - pad_width = ((0, 0), (0, self.target_length - h), (0, self.target_length - w)) - processed_image = np.pad(processed_image, pad_width, mode="constant", constant_values=0) + images, _, prompts = self.inferencer.pre_process(items, extra_processing=True) + _, _, h, w = images["images"].shape + pad_width = ((0, 0), (0, 0), (0, self.target_length - h), (0, self.target_length - w)) + images["images"] = np.pad(images["images"], pad_width, mode="constant", constant_values=0) if self.is_encoder: - return {"images": processed_image[None]} + return images else: - image_embeddings = self.compressed_model(processed_image[None]) + image_embeddings = self.compressed_model(images["images"]) prompt = prompts[0] # only use the first prompt prompt.pop("label") prompt.pop("orig_size") diff --git a/tests/unit/algorithms/visual_prompting/tasks/test_openvino.py b/tests/unit/algorithms/visual_prompting/tasks/test_openvino.py index f44e25e1b1c..8a8229a9bf9 100644 --- a/tests/unit/algorithms/visual_prompting/tasks/test_openvino.py +++ b/tests/unit/algorithms/visual_prompting/tasks/test_openvino.py @@ -204,7 +204,7 @@ def test_getitem(self, mocker, load_dataloader, is_encoder: bool): mocker.patch.object( dataloader.inferencer, "pre_process", - return_value=({"images": np.zeros((1, 4, 3, 3), dtype=np.uint8)}, None, [{"label": 1, "orig_size": 1}]), + return_value=({"images": np.zeros((1, 3, 4, 4), dtype=np.uint8)}, None, [{"label": 1, "orig_size": 1}]), ) results = dataloader.__getitem__(0)