diff --git a/CHANGELOG.md b/CHANGELOG.md index 48aea3c4e23..42efa008f6f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -18,6 +18,8 @@ All notable changes to this project will be documented in this file. (https://github.com/openvinotoolkit/training_extensions/pull/3749) - Enable torch.compile to work with classification (https://github.com/openvinotoolkit/training_extensions/pull/3758) +- Add `otx benchmark` subcommand + (https://github.com/openvinotoolkit/training_extensions/pull/3762) ### Enhancements diff --git a/docs/source/guide/get_started/api_tutorial.rst b/docs/source/guide/get_started/api_tutorial.rst index 9c05405efeb..b170875affc 100644 --- a/docs/source/guide/get_started/api_tutorial.rst +++ b/docs/source/guide/get_started/api_tutorial.rst @@ -107,10 +107,10 @@ If you want to use other models offered by OpenVINO™ Training Extension beside model_lists = list_models(task="DETECTION", print_table=True) ''' - ┏━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓ - ┃ Task ┃ Model Name ┃ Recipe Path ┃ - ┡━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┩ - │ DETECTION │ yolox_tiny │ src/otx/recipe/detection/yolox_tiny.yaml │ + ┏━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓ + ┃ Task ┃ Model Name ┃ Recipe Path ┃ + ┡━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┩ + │ DETECTION │ yolox_tiny │ src/otx/recipe/detection/yolox_tiny.yaml │ │ ... │ │ │ └───────────┴───────────────────────┴────────────────────────────────────────────────────────────────┘ ''' @@ -328,7 +328,7 @@ The datamodule used by the Engine is of type ``otx.core.data.module.OTXDataModul engine.train(precision="16") .. note:: - + This uses lightning's precision value. You can use the values below: - "64", "32", "16", "bf16", - 64, 32, 16 @@ -518,4 +518,32 @@ You can validate the optimized model as the usual model. For example for the NNC engine.test(checkpoint="") -That's it. Now, we can use OpenVINO™ Training Extensions APIs to create, train, and deploy deep learning models using the OpenVINO™ Training Extension. +************ +Benchmarking +************ + +``Engine`` allows to perform benchmarking of the trained model, and provide theoretical complexity information in case of torch model. +The estimated by ``Engine.benchmark()`` performance may differ from the performance of the deployed model, since the measurements are conducted +via OTX inference API, which can introduce additional burden. + +.. tab-set:: + + .. tab-item:: Benchmark Model + + .. code-block:: python + + engine.benchmark() + + .. tab-item:: Benchmark OpenVINO™ IR model + + .. code-block:: python + + engine.benchmark(checkpoint="") + + .. note:: + + Specifying a checkpoint only makes sense for OpenVINO™ IR models. + +Conclusion +""""""""""" +That's it! Now, we can use OpenVINO™ Training Extensions APIs to create, train, and deploy deep learning models using the OpenVINO™ Training Extensions. diff --git a/docs/source/guide/get_started/cli_commands.rst b/docs/source/guide/get_started/cli_commands.rst index 0ea6d1ad479..0b79956025f 100644 --- a/docs/source/guide/get_started/cli_commands.rst +++ b/docs/source/guide/get_started/cli_commands.rst @@ -50,6 +50,7 @@ Help │ export Export the trained model to OpenVINO Intermediate Representation (IR) or ONNX formats. │ │ optimize Applies NNCF.PTQ to the underlying models (now works only for OV models). │ │ explain Run XAI using the specified model and data (test subset). │ + | benchmark Executes model micro benchmarking on random data. | │ │ ╰─────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ @@ -232,25 +233,25 @@ Example to find ready-to-use recipes for the detection task: .. code-block:: shell (otx) ...$ otx find --task DETECTION - ┏━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓ - ┃ Task ┃ Model Name ┃ Recipe Path ┃ - ┡━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┩ - │ DETECTION │ yolox_tiny │ recipe/detection/yolox_tiny.yaml │ - │ DETECTION │ atss_mobilenetv2_tile │ recipe/detection/atss_mobilenetv2_tile.yaml │ - │ DETECTION │ openvino_model │ recipe/detection/openvino_model.yaml │ - │ DETECTION │ atss_mobilenetv2 │ recipe/detection/atss_mobilenetv2.yaml │ - │ DETECTION │ atss_resnext101 │ recipe/detection/atss_resnext101.yaml │ - │ DETECTION │ yolox_l_tile │ recipe/detection/yolox_l_tile.yaml │ - │ DETECTION │ ssd_mobilenetv2_tile │ recipe/detection/ssd_mobilenetv2_tile.yaml │ - │ DETECTION │ atss_r50_fpn │ recipe/detection/atss_r50_fpn.yaml │ - │ DETECTION │ yolox_tiny_tile │ recipe/detection/yolox_tiny_tile.yaml │ - │ DETECTION │ yolox_s │ recipe/detection/yolox_s.yaml │ - │ DETECTION │ yolox_s_tile │ recipe/detection/yolox_s_tile.yaml │ - │ DETECTION │ rtmdet_tiny │ recipe/detection/rtmdet_tiny.yaml │ - │ DETECTION │ yolox_x │ recipe/detection/yolox_x.yaml │ - │ DETECTION │ yolox_x_tile │ recipe/detection/yolox_x_tile.yaml │ - │ DETECTION │ ssd_mobilenetv2 │ recipe/detection/ssd_mobilenetv2.yaml │ - │ DETECTION │ yolox_l │ recipe/detection/yolox_l.yaml │ + ┏━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓ + ┃ Task ┃ Model Name ┃ Recipe Path ┃ + ┡━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┩ + │ DETECTION │ yolox_tiny │ recipe/detection/yolox_tiny.yaml │ + │ DETECTION │ atss_mobilenetv2_tile │ recipe/detection/atss_mobilenetv2_tile.yaml │ + │ DETECTION │ openvino_model │ recipe/detection/openvino_model.yaml │ + │ DETECTION │ atss_mobilenetv2 │ recipe/detection/atss_mobilenetv2.yaml │ + │ DETECTION │ atss_resnext101 │ recipe/detection/atss_resnext101.yaml │ + │ DETECTION │ yolox_l_tile │ recipe/detection/yolox_l_tile.yaml │ + │ DETECTION │ ssd_mobilenetv2_tile │ recipe/detection/ssd_mobilenetv2_tile.yaml │ + │ DETECTION │ atss_r50_fpn │ recipe/detection/atss_r50_fpn.yaml │ + │ DETECTION │ yolox_tiny_tile │ recipe/detection/yolox_tiny_tile.yaml │ + │ DETECTION │ yolox_s │ recipe/detection/yolox_s.yaml │ + │ DETECTION │ yolox_s_tile │ recipe/detection/yolox_s_tile.yaml │ + │ DETECTION │ rtmdet_tiny │ recipe/detection/rtmdet_tiny.yaml │ + │ DETECTION │ yolox_x │ recipe/detection/yolox_x.yaml │ + │ DETECTION │ yolox_x_tile │ recipe/detection/yolox_x_tile.yaml │ + │ DETECTION │ ssd_mobilenetv2 │ recipe/detection/ssd_mobilenetv2.yaml │ + │ DETECTION │ yolox_l │ recipe/detection/yolox_l.yaml │ └───────────┴───────────────────────┴─────────────────────────────────────────────┘ Example to find yolo named model for the detection task: @@ -258,17 +259,17 @@ Example to find yolo named model for the detection task: .. code-block:: shell (otx) ...$ otx find --task DETECTION --pattern 'yolo*' - ┏━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓ - ┃ Task ┃ Model Name ┃ Recipe Path ┃ - ┡━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┩ - │ DETECTION │ yolox_tiny │ recipe/detection/yolox_tiny.yaml │ - │ DETECTION │ yolox_x │ recipe/detection/yolox_x.yaml │ - │ DETECTION │ yolox_l_tile │ recipe/detection/yolox_l_tile.yaml │ - │ DETECTION │ yolox_s │ recipe/detection/yolox_s.yaml │ - │ DETECTION │ yolox_l │ recipe/detection/yolox_l.yaml │ - │ DETECTION │ yolox_x_tile │ recipe/detection/yolox_x_tile.yaml │ - │ DETECTION │ yolox_s_tile │ recipe/detection/yolox_s_tile.yaml │ - │ DETECTION │ yolox_tiny_tile │ recipe/detection/yolox_tiny_tile.yaml │ + ┏━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓ + ┃ Task ┃ Model Name ┃ Recipe Path ┃ + ┡━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┩ + │ DETECTION │ yolox_tiny │ recipe/detection/yolox_tiny.yaml │ + │ DETECTION │ yolox_x │ recipe/detection/yolox_x.yaml │ + │ DETECTION │ yolox_l_tile │ recipe/detection/yolox_l_tile.yaml │ + │ DETECTION │ yolox_s │ recipe/detection/yolox_s.yaml │ + │ DETECTION │ yolox_l │ recipe/detection/yolox_l.yaml │ + │ DETECTION │ yolox_x_tile │ recipe/detection/yolox_x_tile.yaml │ + │ DETECTION │ yolox_s_tile │ recipe/detection/yolox_s_tile.yaml │ + │ DETECTION │ yolox_tiny_tile │ recipe/detection/yolox_tiny_tile.yaml │ └───────────┴─────────────────┴───────────────────────────────────────┘ @@ -476,7 +477,7 @@ The command below will generate saliency maps (heatmaps with red colored areas o .. note:: - It is possible to pass both PyTorch weights ``.ckpt`` or OpenVINO™ IR ``exported_model.xml`` to ``--load-weights`` option. + It is possible to pass both PyTorch weights ``.ckpt`` or OpenVINO™ IR ``exported_model.xml`` to ``--checkpoint`` option. By default, the model is exported to the OpenVINO™ IR format without extra feature information needed for the ``explain`` function. To use OpenVINO™ IR model in ``otx explain``, please first export it with ``--explain`` parameter: @@ -486,6 +487,24 @@ By default, the model is exported to the OpenVINO™ IR format without extra fea --explain True (otx) ...$ otx explain ... --checkpoint outputs/openvino/with_features \ +******************* +Micro-benchmarking +******************* + +``otx benchmark`` tool allows performing a fast in-place benchmarking on randomly generated data. The benchmark excludes data loading cost, but takes into account extra burden of OTX API and ModelAPI (in case of OpenVINO™ IR models). + +The command requires checkpoint path for OpenVINO™ IR models. In case of torch models, this parameter is optional. Also, for torch model number of trainable parameters and theoretical computational complexity are estimated when model's structure allows that. +It worth noticing that the latency and throughput are depend on batch size. Varying the batch size parameter, one can quickly explore the trade-off for the considered model. For OpenVINO™ IR models batching is imitated by using async inference API. + +.. code-block:: shell + + (otx) ...$ otx benchmark ... --data_root \ + --checkpoint \ + --batch_size 1 + +.. note:: + + It is possible to pass both PyTorch weights ``.ckpt`` or OpenVINO™ IR ``exported_model.xml`` to ``--checkpoint`` option. *********** Workspace diff --git a/src/otx/algo/classification/dino_v2.py b/src/otx/algo/classification/dino_v2.py index a24adf76d2a..fedbec588f5 100644 --- a/src/otx/algo/classification/dino_v2.py +++ b/src/otx/algo/classification/dino_v2.py @@ -280,7 +280,7 @@ def _customize_inputs(self, inputs: MulticlassClsBatchDataEntity) -> dict[str, A "mode": mode, } return { - "images": inputs.images, + "images": inputs.stacked_images, "labels": torch.cat(inputs.labels, dim=0), "imgs_info": inputs.imgs_info, "mode": mode, diff --git a/src/otx/algo/classification/efficientnet.py b/src/otx/algo/classification/efficientnet.py index c939e1b1421..bba5dafd61b 100644 --- a/src/otx/algo/classification/efficientnet.py +++ b/src/otx/algo/classification/efficientnet.py @@ -145,7 +145,7 @@ def _exporter(self) -> OTXModelExporter: """Creates OTXModelExporter object that can export the model.""" return OTXNativeModelExporter( task_level_export_parameters=self._export_parameters, - input_size=(1, 3, 224, 224), + input_size=self.image_size, mean=(123.675, 116.28, 103.53), std=(58.395, 57.12, 57.375), resize_mode="standard", @@ -230,7 +230,7 @@ def _customize_inputs(self, inputs: MulticlassClsBatchDataEntity) -> dict[str, A "mode": mode, } return { - "images": inputs.images, + "images": inputs.stacked_images, "labels": torch.cat(inputs.labels, dim=0), "imgs_info": inputs.imgs_info, "mode": mode, diff --git a/src/otx/algo/classification/efficientnet_v2.py b/src/otx/algo/classification/efficientnet_v2.py index 3d6ed09369b..cc88cc3281b 100644 --- a/src/otx/algo/classification/efficientnet_v2.py +++ b/src/otx/algo/classification/efficientnet_v2.py @@ -140,7 +140,7 @@ def _exporter(self) -> OTXModelExporter: """Creates OTXModelExporter object that can export the model.""" return OTXNativeModelExporter( task_level_export_parameters=self._export_parameters, - input_size=(1, 3, 224, 224), + input_size=self.image_size, mean=(123.675, 116.28, 103.53), std=(58.395, 57.12, 57.375), resize_mode="standard", @@ -222,7 +222,7 @@ def _customize_inputs(self, inputs: MulticlassClsBatchDataEntity) -> dict[str, A "mode": mode, } return { - "images": inputs.images, + "images": inputs.stacked_images, "labels": torch.cat(inputs.labels, dim=0), "imgs_info": inputs.imgs_info, "mode": mode, diff --git a/src/otx/algo/classification/heads/vision_transformer_head.py b/src/otx/algo/classification/heads/vision_transformer_head.py index 831d46a21b4..849913a2dce 100644 --- a/src/otx/algo/classification/heads/vision_transformer_head.py +++ b/src/otx/algo/classification/heads/vision_transformer_head.py @@ -141,4 +141,4 @@ def predict( def _get_predictions(self, cls_score: torch.Tensor) -> torch.Tensor: """Get the score from the classification score.""" - return functional.softmax(cls_score, dim=1) + return functional.softmax(cls_score, dim=-1) diff --git a/src/otx/algo/classification/huggingface_model.py b/src/otx/algo/classification/huggingface_model.py index 8f088b668c4..906b65b4123 100644 --- a/src/otx/algo/classification/huggingface_model.py +++ b/src/otx/algo/classification/huggingface_model.py @@ -110,7 +110,7 @@ def _exporter(self) -> OTXModelExporter: """Creates OTXModelExporter object that can export the model.""" return OTXNativeModelExporter( task_level_export_parameters=self._export_parameters, - input_size=(1, 3, 224, 224), + input_size=self.image_size, mean=(123.675, 116.28, 103.53), std=(58.395, 57.12, 57.375), resize_mode="standard", diff --git a/src/otx/algo/classification/mobilenet_v3.py b/src/otx/algo/classification/mobilenet_v3.py index b5808a29a00..70eeebff731 100644 --- a/src/otx/algo/classification/mobilenet_v3.py +++ b/src/otx/algo/classification/mobilenet_v3.py @@ -152,7 +152,7 @@ def _exporter(self) -> OTXModelExporter: """Creates OTXModelExporter object that can export the model.""" return OTXNativeModelExporter( task_level_export_parameters=self._export_parameters, - input_size=(1, 3, 224, 224), + input_size=self.image_size, mean=(123.675, 116.28, 103.53), std=(58.395, 57.12, 57.375), resize_mode="standard", @@ -237,7 +237,7 @@ def _customize_inputs(self, inputs: MulticlassClsBatchDataEntity) -> dict[str, A "mode": mode, } return { - "images": inputs.images, + "images": inputs.stacked_images, "labels": torch.cat(inputs.labels, dim=0), "imgs_info": inputs.imgs_info, "mode": mode, diff --git a/src/otx/algo/classification/torchvision_model.py b/src/otx/algo/classification/torchvision_model.py index 65f542d767f..7a81b536965 100644 --- a/src/otx/algo/classification/torchvision_model.py +++ b/src/otx/algo/classification/torchvision_model.py @@ -427,6 +427,7 @@ def __init__( self.freeze_backbone = freeze_backbone self.train_type = train_type self.task = task + self.image_size: tuple[int, ...] = (1, 3, 224, 224) # TODO(@harimkang): Need to make it configurable. if task == OTXTaskType.MULTI_CLASS_CLS: @@ -552,7 +553,7 @@ def _exporter(self) -> OTXModelExporter: """Creates OTXModelExporter object that can export the model.""" return OTXNativeModelExporter( task_level_export_parameters=self._export_parameters, - input_size=(1, 3, 224, 224), + input_size=self.image_size, mean=(123.675, 116.28, 103.53), std=(58.395, 57.12, 57.375), resize_mode="standard", @@ -647,3 +648,17 @@ def _convert_pred_entity_to_compute_metric( "preds": pred, "target": target, } + + def get_dummy_input(self, batch_size: int = 1) -> CLASSIFICATION_BATCH_DATA_ENTITY: + """Returns a dummy input for classification model.""" + images = [torch.rand(*self.image_size[1:]) for _ in range(batch_size)] + labels = [torch.LongTensor([0])] * batch_size + + if self.task == OTXTaskType.MULTI_CLASS_CLS: + return MulticlassClsBatchDataEntity(batch_size, images, [], labels=labels) + if self.task == OTXTaskType.MULTI_LABEL_CLS: + return MultilabelClsBatchDataEntity(batch_size, images, [], labels=labels) + if self.task == OTXTaskType.H_LABEL_CLS: + return HlabelClsBatchDataEntity(batch_size, images, [], labels=labels) + msg = f"Task type {self.task} is not supported." + raise NotImplementedError(msg) diff --git a/src/otx/algo/classification/vit.py b/src/otx/algo/classification/vit.py index 86d05b71218..31d21a76635 100644 --- a/src/otx/algo/classification/vit.py +++ b/src/otx/algo/classification/vit.py @@ -346,7 +346,7 @@ def _exporter(self) -> OTXModelExporter: """Creates OTXModelExporter object that can export the model.""" return OTXNativeModelExporter( task_level_export_parameters=self._export_parameters, - input_size=(1, 3, 224, 224), + input_size=self.image_size, mean=(123.675, 116.28, 103.53), std=(58.395, 57.12, 57.375), resize_mode="standard", @@ -413,7 +413,7 @@ def _customize_inputs(self, inputs: MulticlassClsBatchDataEntity) -> dict[str, A "mode": mode, } return { - "images": inputs.images, + "images": inputs.stacked_images, "labels": torch.cat(inputs.labels, dim=0), "imgs_info": inputs.imgs_info, "mode": mode, diff --git a/src/otx/algo/detection/atss.py b/src/otx/algo/detection/atss.py index b6febc9f981..2733205e55b 100644 --- a/src/otx/algo/detection/atss.py +++ b/src/otx/algo/detection/atss.py @@ -32,7 +32,8 @@ class ATSS(ExplainableOTXDetModel): def _exporter(self) -> OTXModelExporter: """Creates OTXModelExporter object that can export the model.""" if self.image_size is None: - raise ValueError(self.image_size) + msg = f"Image size attribute is not set for {self.__class__}" + raise ValueError(msg) return OTXNativeModelExporter( task_level_export_parameters=self._export_parameters, diff --git a/src/otx/algo/detection/rtdetr.py b/src/otx/algo/detection/rtdetr.py index 1a7fdb6eba9..7367e12d67f 100644 --- a/src/otx/algo/detection/rtdetr.py +++ b/src/otx/algo/detection/rtdetr.py @@ -164,7 +164,8 @@ def _get_optim_params(cfg: list[dict[str, Any]] | None, model: nn.Module) -> lis def _exporter(self) -> OTXModelExporter: """Creates OTXModelExporter object that can export the model.""" if self.image_size is None: - raise ValueError(self.image_size) + msg = f"Image size attribute is not set for {self.__class__}" + raise ValueError(msg) return OTXNativeModelExporter( task_level_export_parameters=self._export_parameters, diff --git a/src/otx/algo/detection/rtmdet.py b/src/otx/algo/detection/rtmdet.py index 75e2e956f55..051df93020c 100644 --- a/src/otx/algo/detection/rtmdet.py +++ b/src/otx/algo/detection/rtmdet.py @@ -28,7 +28,8 @@ class RTMDet(ExplainableOTXDetModel): def _exporter(self) -> OTXModelExporter: """Creates OTXModelExporter object that can export the model.""" if self.image_size is None: - raise ValueError(self.image_size) + msg = f"Image size attribute is not set for {self.__class__}" + raise ValueError(msg) return OTXNativeModelExporter( task_level_export_parameters=self._export_parameters, diff --git a/src/otx/algo/detection/ssd.py b/src/otx/algo/detection/ssd.py index 3b23ded94f0..09c85a82599 100644 --- a/src/otx/algo/detection/ssd.py +++ b/src/otx/algo/detection/ssd.py @@ -267,7 +267,9 @@ def load_state_dict_pre_hook(self, state_dict: dict[str, torch.Tensor], prefix: def _exporter(self) -> OTXModelExporter: """Creates OTXModelExporter object that can export the model.""" if self.image_size is None: - raise ValueError(self.image_size) + msg = f"Image size attribute is not set for {self.__class__}" + raise ValueError(msg) + return OTXNativeModelExporter( task_level_export_parameters=self._export_parameters, input_size=self.image_size, diff --git a/src/otx/algo/detection/yolox.py b/src/otx/algo/detection/yolox.py index d7af5f7a806..7f5adb573d5 100644 --- a/src/otx/algo/detection/yolox.py +++ b/src/otx/algo/detection/yolox.py @@ -41,7 +41,8 @@ def _customize_inputs( def _exporter(self) -> OTXModelExporter: """Creates OTXModelExporter object that can export the model.""" if self.image_size is None: - raise ValueError(self.image_size) + msg = f"Image size attribute is not set for {self.__class__}" + raise ValueError(msg) swap_rgb = not isinstance(self, YOLOXTINY) # only YOLOX-TINY uses RGB input_size = self.tile_image_size if self.tile_config.enable_tiler else self.image_size diff --git a/src/otx/algo/instance_segmentation/maskrcnn.py b/src/otx/algo/instance_segmentation/maskrcnn.py index a8910e91bb3..6a9f95c9974 100644 --- a/src/otx/algo/instance_segmentation/maskrcnn.py +++ b/src/otx/algo/instance_segmentation/maskrcnn.py @@ -33,7 +33,8 @@ class MaskRCNN(ExplainableOTXInstanceSegModel): def _exporter(self) -> OTXModelExporter: """Creates OTXModelExporter object that can export the model.""" if self.image_size is None: - raise ValueError(self.image_size) + msg = f"Image size attribute is not set for {self.__class__}" + raise ValueError(msg) input_size = self.tile_image_size if self.tile_config.enable_tiler else self.image_size diff --git a/src/otx/algo/instance_segmentation/maskrcnn_tv.py b/src/otx/algo/instance_segmentation/maskrcnn_tv.py index d6f5bea1bda..8ebe35386a5 100644 --- a/src/otx/algo/instance_segmentation/maskrcnn_tv.py +++ b/src/otx/algo/instance_segmentation/maskrcnn_tv.py @@ -219,7 +219,8 @@ def _customize_outputs( def _exporter(self) -> OTXModelExporter: """Creates OTXModelExporter object that can export the model.""" if self.image_size is None: - raise ValueError(self.image_size) + msg = f"Image size attribute is not set for {self.__class__}" + raise ValueError(msg) input_size = self.tile_image_size if self.tile_config.enable_tiler else self.image_size diff --git a/src/otx/algo/instance_segmentation/rtmdet_inst.py b/src/otx/algo/instance_segmentation/rtmdet_inst.py index 60c6bea25ca..53f028e0f9e 100644 --- a/src/otx/algo/instance_segmentation/rtmdet_inst.py +++ b/src/otx/algo/instance_segmentation/rtmdet_inst.py @@ -32,7 +32,8 @@ class RTMDetInst(ExplainableOTXInstanceSegModel): def _exporter(self) -> OTXModelExporter: """Creates OTXModelExporter object that can export the model.""" if self.image_size is None: - raise ValueError(self.image_size) + msg = f"Image size attribute is not set for {self.__class__}" + raise ValueError(msg) return OTXNativeModelExporter( task_level_export_parameters=self._export_parameters, diff --git a/src/otx/algo/segmentation/litehrnet.py b/src/otx/algo/segmentation/litehrnet.py index b24ea9bd77d..6684c63ed3b 100644 --- a/src/otx/algo/segmentation/litehrnet.py +++ b/src/otx/algo/segmentation/litehrnet.py @@ -558,6 +558,10 @@ def _optimization_config(self) -> dict[str, Any]: @property def _exporter(self) -> OTXModelExporter: """Creates OTXModelExporter object that can export the model.""" + if self.image_size is None: + msg = f"Image size attribute is not set for {self.__class__}" + raise ValueError(msg) + return OTXNativeModelExporter( task_level_export_parameters=self._export_parameters, input_size=self.image_size, diff --git a/src/otx/cli/cli.py b/src/otx/cli/cli.py index 983c2fe927b..c1a3756714a 100644 --- a/src/otx/cli/cli.py +++ b/src/otx/cli/cli.py @@ -220,6 +220,7 @@ def engine_subcommands() -> dict[str, set[str]]: "export": device_kwargs, "optimize": {"datamodule"}.union(device_kwargs), "explain": {"datamodule"}.union(device_kwargs), + "benchmark": device_kwargs, } def add_subcommands(self) -> None: diff --git a/src/otx/core/data/entity/base.py b/src/otx/core/data/entity/base.py index d33ebbcf7c8..bd8a922f941 100644 --- a/src/otx/core/data/entity/base.py +++ b/src/otx/core/data/entity/base.py @@ -47,8 +47,12 @@ def custom_wrap(wrappee: Tensor, *, like: tv_tensors.TVTensor, **kwargs) -> tv_t ) elif isinstance(like, Points): # noqa: RET505 return Points._wrap(wrappee, canvas_size=kwargs.get("canvas_size", like.canvas_size)) # noqa: SLF001 - else: - return wrappee.as_subclass(type(like)) + + # TODO(Vlad): remove this after torch upgrade. This workaround prevents a failure when like is also a Tensor + if type(like) == type(wrappee): + return wrappee + + return wrappee.as_subclass(type(like)) tv_tensors.wrap = custom_wrap diff --git a/src/otx/core/model/action_classification.py b/src/otx/core/model/action_classification.py index 08e2553a895..8727308fbad 100644 --- a/src/otx/core/model/action_classification.py +++ b/src/otx/core/model/action_classification.py @@ -14,7 +14,7 @@ from otx.algo.action_classification.utils.data_sample import ActionDataSample from otx.core.data.entity.action_classification import ActionClsBatchDataEntity, ActionClsBatchPredEntity -from otx.core.data.entity.base import OTXBatchLossEntity +from otx.core.data.entity.base import ImageInfo, OTXBatchLossEntity from otx.core.exporter.native import OTXNativeModelExporter from otx.core.metrics import MetricInput from otx.core.metrics.accuracy import MultiClassClsMetricCallable @@ -165,6 +165,21 @@ def get_classification_layers(self, prefix: str = "model.") -> dict[str, dict[st classification_layers[prefix + key] = {"stride": stride, "num_extra_classes": num_extra_classes} return classification_layers + def get_dummy_input(self, batch_size: int = 1) -> ActionClsBatchDataEntity: + """Returns a dummy input for action classification model.""" + images = torch.rand(batch_size, *self.image_size[1:]) + labels = [torch.LongTensor([0])] * batch_size + infos = [] + for i, img in enumerate(images): + infos.append( + ImageInfo( + img_idx=i, + img_shape=img.shape, + ori_shape=img.shape, + ), + ) + return ActionClsBatchDataEntity(batch_size, images, infos, labels=labels) + class MMActionCompatibleModel(OTXActionClsModel): """Action classification model compitible for MMAction. @@ -355,3 +370,19 @@ def transform_fn(self, data_batch: ActionClsBatchDataEntity) -> np.array: def model_adapter_parameters(self) -> dict: """Model parameters for export.""" return {"input_layouts": "NSCTHW"} + + def get_dummy_input(self, batch_size: int = 1) -> ActionClsBatchDataEntity: + """Returns a dummy input for action classification OV model.""" + # Resize is embedded to the OV model, which means we don't need to know the actual size + images = [torch.rand(8, 3, 224, 224) for _ in range(batch_size)] + labels = [torch.LongTensor([0])] * batch_size + infos = [] + for i, img in enumerate(images): + infos.append( + ImageInfo( + img_idx=i, + img_shape=img.shape, + ori_shape=img.shape, + ), + ) + return ActionClsBatchDataEntity(batch_size, images, infos, labels=labels) diff --git a/src/otx/core/model/anomaly.py b/src/otx/core/model/anomaly.py index 44edf869b0b..b2c87945c84 100644 --- a/src/otx/core/model/anomaly.py +++ b/src/otx/core/model/anomaly.py @@ -24,6 +24,7 @@ AnomalySegmentationBatchPrediction, AnomalySegmentationDataBatch, ) +from otx.core.data.entity.base import ImageInfo from otx.core.exporter.anomaly import OTXAnomalyModelExporter from otx.core.types.export import OTXExportFormatType from otx.core.types.precision import OTXPrecisionType @@ -37,6 +38,7 @@ from lightning.pytorch import Trainer from lightning.pytorch.callbacks.callback import Callback from lightning.pytorch.cli import LRSchedulerCallable, OptimizerCallable + from lightning.pytorch.utilities.types import STEP_OUTPUT from torchmetrics import Metric @@ -221,13 +223,25 @@ def configure_optimizers(self) -> tuple[list[torch.optim.Optimizer], list[torch. return optimizer(params=params) return super().configure_optimizers() # type: ignore[misc] + def validation_step( + self, + inputs: AnomalyModelInputs, + batch_idx: int = 0, + ) -> STEP_OUTPUT: + """Call validation step of the anomalib model.""" + raise NotImplementedError + def forward( self, inputs: AnomalyModelInputs, ) -> AnomalyModelOutputs: """Wrap forward method of the Anomalib model.""" - _inputs: dict = self._customize_inputs(inputs) - outputs = self.model.model.forward(_inputs) + outputs = self.validation_step(inputs) + # TODO(Ashwin): update forward implementation to comply with other OTX models + _PostProcessorCallback._post_process(outputs) # noqa: SLF001 + _PostProcessorCallback._compute_scores_and_labels(self, outputs) # noqa: SLF001 + _MinMaxNormalizationCallback._normalize_batch(outputs, self) # noqa: SLF001 + return self._customize_outputs(outputs=outputs, inputs=inputs) def _customize_inputs( @@ -353,3 +367,44 @@ def export( precision=precision, to_exportable_code=to_exportable_code, ) + + def get_dummy_input(self, batch_size: int = 1) -> AnomalyModelInputs: + """Returns a dummy input for anomaly model.""" + image_size, _, _ = self._get_values_from_transforms() + images = torch.rand(batch_size, 3, *image_size) + infos = [] + for i, img in enumerate(images): + infos.append( + ImageInfo( + img_idx=i, + img_shape=img.shape, + ori_shape=img.shape, + ), + ) + if self.task == AnomalibTaskType.CLASSIFICATION: + return AnomalyClassificationDataBatch( + batch_size=batch_size, + images=images, + imgs_info=infos, + labels=[torch.LongTensor(0)], + ) + if self.task == AnomalibTaskType.SEGMENTATION: + return AnomalySegmentationDataBatch( + batch_size=batch_size, + images=images, + imgs_info=infos, + labels=[torch.LongTensor(0)], + masks=torch.tensor(0), + ) + if self.task == AnomalibTaskType.DETECTION: + return AnomalyDetectionDataBatch( + batch_size=batch_size, + images=images, + imgs_info=infos, + labels=[torch.LongTensor(0)], + boxes=torch.tensor(0), + masks=torch.tensor(0), + ) + + msg = "Wrong anomaly task type" + raise RuntimeError(msg) diff --git a/src/otx/core/model/base.py b/src/otx/core/model/base.py index 8e2a26acf6d..4f483021ce3 100644 --- a/src/otx/core/model/base.py +++ b/src/otx/core/model/base.py @@ -32,6 +32,8 @@ from otx import __version__ from otx.core.config.data import TileConfig from otx.core.data.entity.base import ( + ImageInfo, + OTXBatchDataEntity, OTXBatchLossEntity, T_OTXBatchDataEntity, T_OTXBatchPredEntity, @@ -784,6 +786,17 @@ def tile_config(self, tile_config: TileConfig) -> None: self._tile_config = tile_config + def get_dummy_input(self, batch_size: int = 1) -> OTXBatchDataEntity[Any]: + """Generates a dummy input, suitable for launching forward() on it. + + Args: + batch_size (int, optional): number of elements in a dummy input sequence. Defaults to 1. + + Returns: + OTXBatchDataEntity[Any]: An entity containing randomly generated inference data. + """ + raise NotImplementedError + @staticmethod def _dispatch_label_info(label_info: LabelInfoTypes) -> LabelInfo: if isinstance(label_info, int): @@ -1095,3 +1108,18 @@ def _create_label_info_from_ov_ir(self) -> LabelInfo: msg = "Cannot construct LabelInfo from OpenVINO IR. Please check this model is trained by OTX." raise ValueError(msg) + + def get_dummy_input(self, batch_size: int = 1) -> OTXBatchDataEntity: + """Returns a dummy input for base OV model.""" + # Resize is embedded to the OV model, which means we don't need to know the actual size + images = [torch.rand(3, 224, 224) for _ in range(batch_size)] + infos = [] + for i, img in enumerate(images): + infos.append( + ImageInfo( + img_idx=i, + img_shape=img.shape, + ori_shape=img.shape, + ), + ) + return OTXBatchDataEntity(batch_size=batch_size, images=images, imgs_info=infos) diff --git a/src/otx/core/model/classification.py b/src/otx/core/model/classification.py index c189b9b9e32..4a43a7888ee 100644 --- a/src/otx/core/model/classification.py +++ b/src/otx/core/model/classification.py @@ -63,6 +63,7 @@ def __init__( metric=metric, torch_compile=torch_compile, ) + self.image_size = (1, 3, 224, 224) @property def _export_parameters(self) -> TaskLevelExportParameters: @@ -86,6 +87,12 @@ def _convert_pred_entity_to_compute_metric( "target": target, } + def get_dummy_input(self, batch_size: int = 1) -> MulticlassClsBatchDataEntity: + """Returns a dummy input for classification model.""" + images = [torch.rand(*self.image_size[1:]) for _ in range(batch_size)] + labels = [torch.LongTensor([0])] * batch_size + return MulticlassClsBatchDataEntity(batch_size, images, [], labels=labels) + class MMPretrainMulticlassClsModel(OTXMulticlassClsModel): """Multi-class Classification model compatible for MMPretrain. @@ -107,7 +114,6 @@ def __init__( config = inplace_num_classes(cfg=config, num_classes=self._dispatch_label_info(label_info).num_classes) self.config = config self.load_from = config.pop("load_from", None) - self.image_size = (1, 3, 224, 224) super().__init__( label_info=label_info, optimizer=optimizer, @@ -255,6 +261,7 @@ def __init__( metric=metric, torch_compile=torch_compile, ) + self.image_size = (1, 3, 224, 224) @property def _export_parameters(self) -> TaskLevelExportParameters: @@ -281,6 +288,12 @@ def forward_for_tracing(self, image: Tensor) -> Tensor | dict[str, Tensor]: """Model forward function used for the model tracing during model exportation.""" return self.model.forward(image, mode="tensor") + def get_dummy_input(self, batch_size: int = 1) -> MultilabelClsBatchDataEntity: + """Returns a dummy input for classification OV model.""" + images = [torch.rand(*self.image_size[1:]) for _ in range(batch_size)] + labels = [torch.LongTensor([0])] * batch_size + return MultilabelClsBatchDataEntity(batch_size, images, [], labels=labels) + class MMPretrainMultilabelClsModel(OTXMultilabelClsModel): """Multi-label Classification model compatible for MMPretrain. @@ -302,7 +315,6 @@ def __init__( config = inplace_num_classes(cfg=config, num_classes=self._dispatch_label_info(label_info).num_classes) self.config = config self.load_from = config.pop("load_from", None) - self.image_size = (1, 3, 224, 224) super().__init__( label_info=label_info, optimizer=optimizer, @@ -444,6 +456,7 @@ def __init__( metric=metric, torch_compile=torch_compile, ) + self.image_size = (1, 3, 224, 224) @property def _export_parameters(self) -> TaskLevelExportParameters: @@ -481,6 +494,12 @@ def _dispatch_label_info(label_info: LabelInfoTypes) -> LabelInfo: return label_info + def get_dummy_input(self, batch_size: int = 1) -> HlabelClsBatchDataEntity: + """Returns a dummy input for classification OV model.""" + images = [torch.rand(*self.image_size[1:]) for _ in range(batch_size)] + labels = [torch.LongTensor([0])] * batch_size + return HlabelClsBatchDataEntity(batch_size, images, [], labels=labels) + class MMPretrainHlabelClsModel(OTXHlabelClsModel): """H-label Classification model compatible for MMPretrain. @@ -509,7 +528,6 @@ def __init__( self.config = config self.load_from = config.pop("load_from", None) - self.image_size = (1, 3, 224, 224) super().__init__( label_info=label_info, optimizer=optimizer, diff --git a/src/otx/core/model/detection.py b/src/otx/core/model/detection.py index 167157324f3..e52b66b0602 100644 --- a/src/otx/core/model/detection.py +++ b/src/otx/core/model/detection.py @@ -18,7 +18,7 @@ from otx.algo.utils.mmengine_utils import InstanceData, load_checkpoint from otx.core.config.data import TileConfig -from otx.core.data.entity.base import OTXBatchLossEntity +from otx.core.data.entity.base import ImageInfo, OTXBatchLossEntity from otx.core.data.entity.detection import DetBatchDataEntity, DetBatchPredEntity from otx.core.data.entity.tile import OTXTileBatchDataEntity from otx.core.data.entity.utils import stack_batch @@ -45,6 +45,8 @@ class OTXDetectionModel(OTXModel[DetBatchDataEntity, DetBatchPredEntity]): """Base class for the detection models used in OTX.""" + image_size: tuple[int, int, int, int] | None = None + def test_step(self, batch: DetBatchDataEntity, batch_idx: int) -> None: """Perform a single test step on a batch of data from the test set. @@ -364,6 +366,24 @@ def best_confidence_threshold(self) -> float: self._best_confidence_threshold = 0.5 return self._best_confidence_threshold + def get_dummy_input(self, batch_size: int = 1) -> DetBatchDataEntity: + """Returns a dummy input for detection model.""" + if self.image_size is None: + msg = f"Image size attribute is not set for {self.__class__}" + raise ValueError(msg) + + images = [torch.rand(*self.image_size[1:]) for _ in range(batch_size)] + infos = [] + for i, img in enumerate(images): + infos.append( + ImageInfo( + img_idx=i, + img_shape=img.shape, + ori_shape=img.shape, + ), + ) + return DetBatchDataEntity(batch_size, images, infos, bboxes=[], labels=[]) + class ExplainableOTXDetModel(OTXDetectionModel): """OTX detection model which can attach a XAI (Explainable AI) branch.""" @@ -530,7 +550,6 @@ def __init__( config = inplace_num_classes(cfg=config, num_classes=self._dispatch_label_info(label_info).num_classes) self.config = config self.load_from = config.pop("load_from", None) - self.image_size: tuple[int, int, int, int] | None = None super().__init__( label_info=label_info, optimizer=optimizer, diff --git a/src/otx/core/model/instance_segmentation.py b/src/otx/core/model/instance_segmentation.py index 9a838ceae0b..4cd78530813 100644 --- a/src/otx/core/model/instance_segmentation.py +++ b/src/otx/core/model/instance_segmentation.py @@ -21,7 +21,7 @@ from otx.algo.instance_segmentation.two_stage import TwoStageDetector from otx.algo.utils.mmengine_utils import InstanceData, load_checkpoint from otx.core.config.data import TileConfig -from otx.core.data.entity.base import OTXBatchLossEntity +from otx.core.data.entity.base import ImageInfo, OTXBatchLossEntity from otx.core.data.entity.instance_segmentation import InstanceSegBatchDataEntity, InstanceSegBatchPredEntity from otx.core.data.entity.tile import OTXTileBatchDataEntity from otx.core.data.entity.utils import stack_batch @@ -50,6 +50,8 @@ class OTXInstanceSegModel(OTXModel[InstanceSegBatchDataEntity, InstanceSegBatchPredEntity]): """Base class for the Instance Segmentation models used in OTX.""" + image_size: tuple[int, int, int, int] | None = None + def __init__( self, label_info: LabelInfoTypes, @@ -361,6 +363,24 @@ def _convert_pred_entity_to_compute_metric( ) return {"preds": pred_info, "target": target_info} + def get_dummy_input(self, batch_size: int = 1) -> InstanceSegBatchDataEntity: + """Returns a dummy input for instance segmentation model.""" + if self.image_size is None: + msg = f"Image size attribute is not set for {self.__class__}" + raise ValueError(msg) + + images = [torch.rand(*self.image_size[1:]) for _ in range(batch_size)] + infos = [] + for i, img in enumerate(images): + infos.append( + ImageInfo( + img_idx=i, + img_shape=img.shape, + ori_shape=img.shape, + ), + ) + return InstanceSegBatchDataEntity(batch_size, images, infos, bboxes=[], masks=[], labels=[], polygons=[]) + class ExplainableOTXInstanceSegModel(OTXInstanceSegModel): """OTX Instance Segmentation model which can attach a XAI (Explainable AI) branch.""" @@ -522,7 +542,6 @@ def __init__( config = inplace_num_classes(cfg=config, num_classes=self._dispatch_label_info(label_info).num_classes) self.config = config self.load_from = self.config.pop("load_from", None) - self.image_size: tuple[int, int, int, int] | None = None super().__init__( label_info=label_info, optimizer=optimizer, diff --git a/src/otx/core/model/segmentation.py b/src/otx/core/model/segmentation.py index 0e8cbf85e06..085f7824452 100644 --- a/src/otx/core/model/segmentation.py +++ b/src/otx/core/model/segmentation.py @@ -12,7 +12,7 @@ import torch from torchvision import tv_tensors -from otx.core.data.entity.base import OTXBatchLossEntity +from otx.core.data.entity.base import ImageInfo, OTXBatchLossEntity from otx.core.data.entity.segmentation import SegBatchDataEntity, SegBatchPredEntity from otx.core.exporter.base import OTXModelExporter from otx.core.exporter.native import OTXNativeModelExporter @@ -37,6 +37,8 @@ class OTXSegmentationModel(OTXModel[SegBatchDataEntity, SegBatchPredEntity]): """Base class for the semantic segmentation models used in OTX.""" + image_size: tuple[int, ...] | None = None + def __init__( self, label_info: LabelInfoTypes, @@ -105,6 +107,24 @@ def forward_for_tracing(self, image: Tensor) -> Tensor | dict[str, Tensor]: """Model forward function used for the model tracing during model exportation.""" return self.model(inputs=image, mode="tensor") + def get_dummy_input(self, batch_size: int = 1) -> SegBatchDataEntity: + """Returns a dummy input for semantic segmentation model.""" + if self.image_size is None: + msg = f"Image size attribute is not set for {self.__class__}" + raise ValueError(msg) + + images = torch.rand(batch_size, *self.image_size[1:]) + infos = [] + for i, img in enumerate(images): + infos.append( + ImageInfo( + img_idx=i, + img_shape=img.shape, + ori_shape=img.shape, + ), + ) + return SegBatchDataEntity(batch_size, images, infos, masks=[]) + class TorchVisionCompatibleModel(OTXSegmentationModel): """Segmentation model compatible with torchvision data pipeline.""" @@ -193,6 +213,10 @@ def _customize_outputs( @property def _exporter(self) -> OTXModelExporter: """Creates OTXModelExporter object that can export the model.""" + if self.image_size is None: + msg = f"Image size attribute is not set for {self.__class__}" + raise ValueError(msg) + return OTXNativeModelExporter( task_level_export_parameters=self._export_parameters, input_size=self.image_size, diff --git a/src/otx/core/model/visual_prompting.py b/src/otx/core/model/visual_prompting.py index a27bd2a179c..991f6531a0c 100644 --- a/src/otx/core/model/visual_prompting.py +++ b/src/otx/core/model/visual_prompting.py @@ -25,12 +25,13 @@ from torch import Tensor from torchvision import tv_tensors -from otx.core.data.entity.base import Points +from otx.core.data.entity.base import ImageInfo, Points from otx.core.data.entity.visual_prompting import ( VisualPromptingBatchDataEntity, VisualPromptingBatchPredEntity, ZeroShotVisualPromptingBatchDataEntity, ZeroShotVisualPromptingBatchPredEntity, + ZeroShotVisualPromptingLabel, ) from otx.core.exporter.base import OTXModelExporter from otx.core.exporter.visual_prompting import OTXVisualPromptingModelExporter @@ -260,6 +261,22 @@ def _set_label_info(self, _: LabelInfoTypes) -> None: msg = f"Reconfiguring label_info has no effect on {self.__class__.__name__}." log.warning(msg) + def get_dummy_input(self, batch_size: int = 1) -> VisualPromptingBatchDataEntity: + """Returns a dummy input for VPT model.""" + images = [torch.rand(3, self.model.image_size, self.model.image_size) for _ in range(batch_size)] + labels = [{"points": torch.LongTensor([0] * batch_size)}] * batch_size + prompts = [torch.zeros((1, 2))] * batch_size + return VisualPromptingBatchDataEntity( + batch_size, + images, + imgs_info=[], + labels=labels, + points=prompts, + masks=[None] * batch_size, + polygons=[[None]] * batch_size, + bboxes=[None] * batch_size, + ) + class OTXZeroShotVisualPromptingModel( OTXModel[ZeroShotVisualPromptingBatchDataEntity, ZeroShotVisualPromptingBatchPredEntity], @@ -426,6 +443,30 @@ def _set_label_info(self, _: LabelInfoTypes) -> None: msg = f"Reconfiguring label_info has no effect on {self.__class__.__name__}." log.warning(msg) + def get_dummy_input(self, batch_size: int = 1) -> ZeroShotVisualPromptingBatchDataEntity: + """Returns a dummy input for ZSL VPT model.""" + images = [torch.rand(3, self.model.image_size, self.model.image_size) for _ in range(batch_size)] + labels = [ZeroShotVisualPromptingLabel(prompts=torch.LongTensor([0]))] * batch_size + prompts = [torch.zeros((1, 2))] * batch_size + infos = [] + for i, img in enumerate(images): + infos.append( + ImageInfo( + img_idx=i, + img_shape=img.shape, + ori_shape=img.shape, + ), + ) + return ZeroShotVisualPromptingBatchDataEntity( + batch_size, + images, + imgs_info=infos, + labels=labels, + prompts=prompts, + masks=[], + polygons=[], + ) + class OVVisualPromptingModel( OVModel[ @@ -749,6 +790,23 @@ def _set_label_info(self, _: LabelInfoTypes) -> None: msg = f"Reconfiguring label_info has no effect on {self.__class__.__name__}." log.warning(msg) + def get_dummy_input(self, batch_size: int = 1) -> VisualPromptingBatchDataEntity: + """Returns a dummy input for classification OV model.""" + # Resize is embedded to the OV model, which means we don't need to know the actual size + images = [torch.rand(3, 224, 224) for _ in range(batch_size)] + labels = [{"points": torch.LongTensor([0] * batch_size)}] * batch_size + prompts = [torch.zeros((1, 2))] * batch_size + return VisualPromptingBatchDataEntity( + batch_size, + images, + imgs_info=[], + labels=labels, + points=prompts, + masks=[None] * batch_size, + polygons=[[None]] * batch_size, + bboxes=[None] * batch_size, + ) + class OVZeroShotVisualPromptingModel( OVModel[ @@ -1330,3 +1388,28 @@ def _create_label_info_from_ov_ir(self) -> LabelInfo: def _set_label_info(self, _: LabelInfoTypes) -> None: msg = f"Reconfiguring label_info has no effect on {self.__class__.__name__}." log.warning(msg) + + def get_dummy_input(self, batch_size: int = 1) -> ZeroShotVisualPromptingBatchDataEntity: + """Returns a dummy input for classification OV model.""" + # Resize is embedded to the OV model, which means we don't need to know the actual size + images = [torch.rand(3, 224, 224) for _ in range(batch_size)] + labels = [ZeroShotVisualPromptingLabel(prompts=torch.LongTensor([0]))] * batch_size + prompts = [torch.zeros((1, 2))] * batch_size + infos = [] + for i, img in enumerate(images): + infos.append( + ImageInfo( + img_idx=i, + img_shape=img.shape, + ori_shape=img.shape, + ), + ) + return ZeroShotVisualPromptingBatchDataEntity( + batch_size, + images, + imgs_info=infos, + labels=labels, + prompts=prompts, + masks=[None] * batch_size, + polygons=[[None]] * batch_size, + ) diff --git a/src/otx/engine/engine.py b/src/otx/engine/engine.py index a730d77f572..065b9fc48bc 100644 --- a/src/otx/engine/engine.py +++ b/src/otx/engine/engine.py @@ -5,9 +5,11 @@ from __future__ import annotations +import csv import inspect import logging import tempfile +import time from contextlib import contextmanager from pathlib import Path from typing import TYPE_CHECKING, Any, ClassVar, Iterable, Iterator, Literal @@ -28,7 +30,7 @@ from otx.core.types.precision import OTXPrecisionType from otx.core.types.task import OTXTaskType from otx.core.utils.cache import TrainerArgumentsCache -from otx.utils.utils import is_xpu_available +from otx.utils.utils import is_xpu_available, measure_flops from .adaptive_bs import adapt_batch_size from .hpo import execute_hpo, update_hyper_parameter @@ -564,7 +566,7 @@ def export( export_demo_package = False if is_ir_ckpt and not export_demo_package: - msg = "IR model is passed as a checkpoint, export automaticaly switched to exportable code." + msg = "IR model is passed as a checkpoint, export automatically switched to exportable code." warn(msg, stacklevel=1) export_demo_package = True @@ -781,6 +783,116 @@ def explain( model.explain_mode = False return predict_result + def benchmark( + self, + checkpoint: PathLike | None = None, + batch_size: int = 1, + n_iters: int = 10, + extended_stats: bool = False, + ) -> dict[str, str]: + """Executes model micro benchmarking on random data. + + Benchmark can provide latency, throughput, number of parameters, + and theoretical computational complexity with batch size 1. + The latter two characteristics are available for torch model recipes only. + Before the measurements, a warm-up is done. + + Args: + checkpoint (PathLike | None, optional): Path to checkpoint. Optional for torch models. Defaults to None. + batch_size (int, optional): Batch size for benchmarking. Defaults to 1. + n_iters (int, optional): Number of iterations to average on. Defaults to 10. + extended_stats (bool, optional): Flag that enables printing of per module complexity for torch model. + Defaults to False. + + Returns: + dict[str, str]: a dict with the benchmark results. + + Example: + >>> engine.benchmark( + ... datamodule=OTXDataModule(), + ... batch_size=1, + ... n_iters=20, + ... extended_stats=True, + ... ) + + CLI Usage: + To run benchmark using the configuration, launch + ```shell + >>> otx benchmark \ + ... --config --data_root \ + ... --checkpoint + ``` + """ + checkpoint = checkpoint if checkpoint is not None else self.checkpoint + + if checkpoint is not None: + is_ir_ckpt = Path(checkpoint).suffix in [".xml"] + if is_ir_ckpt and not isinstance(self.model, OVModel): + # create OVModel + self.model = self._auto_configurator.get_ov_model( + model_name=str(checkpoint), + label_info=self.datamodule.label_info, + ) + + if not is_ir_ckpt: + model_cls = self.model.__class__ + self.model = model_cls.load_from_checkpoint( + checkpoint_path=checkpoint, + map_location="cpu", + **self.model.hparams, + ) + elif isinstance(self.model, OVModel): + msg = "To run benchmark on OV model, checkpoint must be specified." + raise RuntimeError(msg) + + self.model.eval() + + def dummy_infer(model: OTXModel, batch_size: int = 1) -> float: + input_batch = model.get_dummy_input(batch_size) + start = time.perf_counter() + model.forward(input_batch) + end = time.perf_counter() + return end - start + + warmup_iters = max(1, int(n_iters / 10)) + for _ in range(warmup_iters): + dummy_infer(self.model, batch_size) + + total_time = 0.0 + for _ in range(n_iters): + total_time += dummy_infer(self.model, batch_size) + latency = total_time / n_iters + fps = batch_size / latency + + final_stats = {"latency": f"{latency:.3f} s", "throughput": f"{(fps):.3f} FPS"} + + if not isinstance(self.model, OVModel): + try: + from torch.utils.flop_counter import convert_num_with_suffix, get_suffix_str + + input_batch = self.model.get_dummy_input(1) + model_fwd = lambda: self.model.forward(input_batch) + depth = 3 if extended_stats else 0 + fwd_flops = measure_flops(self.model.model, model_fwd, print_stats_depth=depth) + flops_str = convert_num_with_suffix(fwd_flops, get_suffix_str(fwd_flops * 10**3)) + final_stats["complexity"] = flops_str + " MACs" + except Exception as e: + logging.warning(f"Failed to complete complexity estimation: {e}") + + params_num = sum(p.numel() for p in self.model.parameters() if p.requires_grad) + params_num_str = convert_num_with_suffix(params_num, get_suffix_str(params_num * 100)) + final_stats["parameters_number"] = params_num_str + + for name, val in final_stats.items(): + print(f"{name:<20} | {val}") + + with (Path(self.work_dir) / "benchmark_report.csv").open("w") as f: + writer = csv.writer(f) + writer.writerow(list(final_stats)) + writer.writerow(list(final_stats.values())) + + return final_stats + @classmethod def from_config( cls, diff --git a/src/otx/utils/utils.py b/src/otx/utils/utils.py index 053bd086deb..e084eb7bd95 100644 --- a/src/otx/utils/utils.py +++ b/src/otx/utils/utils.py @@ -11,7 +11,7 @@ from decimal import Decimal from functools import partial from types import LambdaType -from typing import TYPE_CHECKING, Any +from typing import TYPE_CHECKING, Any, Callable import torch @@ -260,3 +260,21 @@ def check_pickleable(obj: Any) -> bool: # noqa: ANN401 except Exception: return False return True + + +def measure_flops( + model: torch.nn.Module, + forward_fn: Callable[[], torch.Tensor], + loss_fn: Callable[[torch.Tensor], torch.Tensor] | None = None, + print_stats_depth: int = 0, +) -> int: + """Utility to compute the total number of FLOPs used by a module during training or during inference.""" + from torch.utils.flop_counter import FlopCounterMode + + flop_counter = FlopCounterMode(model, display=print_stats_depth > 0, depth=print_stats_depth) + with flop_counter: + if loss_fn is None: + forward_fn() + else: + loss_fn(forward_fn()).backward() + return flop_counter.get_total_flops() diff --git a/tests/integration/cli/test_export_inference.py b/tests/integration/cli/test_export_inference.py index 9047892c392..fdb46e90ea8 100644 --- a/tests/integration/cli/test_export_inference.py +++ b/tests/integration/cli/test_export_inference.py @@ -3,6 +3,7 @@ from __future__ import annotations import logging +from copy import copy from pathlib import Path import pandas as pd @@ -147,6 +148,7 @@ def run_cli_test( work_dir: Path, device: str = fxt_accelerator, cli_override_command: list[str] | None = None, + with_benchmark: bool = False, ) -> Path: tmp_path_test = tmp_path / f"otx_test_{model_name}" command_cfg = [ @@ -168,12 +170,25 @@ def run_cli_test( if cli_override_command is not None: command_cfg.extend(cli_override_command) + if with_benchmark: + benchmark_command_cfg = copy(command_cfg) + updated = False + for i, term in enumerate(benchmark_command_cfg): + if term == "test": + benchmark_command_cfg[i] = "benchmark" + updated = True + break + + assert updated + benchmark_command_cfg.extend(["--n_iters", "1", "--batch_size", "1"]) + run_main(command_cfg=benchmark_command_cfg, open_subprocess=fxt_open_subprocess) + run_main(command_cfg=command_cfg, open_subprocess=fxt_open_subprocess) return tmp_path_test checkpoint_path: str = str(ckpt_files[-1]) - tmp_path_test = run_cli_test(recipe, checkpoint_path, Path("outputs") / "torch") + tmp_path_test = run_cli_test(recipe, checkpoint_path, Path("outputs") / "torch", with_benchmark=True) if task == "zero_shot_visual_prompting": # Check when using reference infos obtained by otx train @@ -191,6 +206,7 @@ def run_cli_test( ) assert (tmp_path_test / "outputs").exists() + assert (tmp_path_test / "outputs" / "torch" / ".latest" / "benchmark" / "benchmark_report.csv").exists() # 3) otx export format_to_ext = {"OPENVINO": "xml"} # [TODO](@Vlad): extend to "ONNX": "onnx" diff --git a/tests/unit/core/model/test_base.py b/tests/unit/core/model/test_base.py index 8b33b22d7e9..dc164a577be 100644 --- a/tests/unit/core/model/test_base.py +++ b/tests/unit/core/model/test_base.py @@ -124,3 +124,8 @@ def test_forward(self, model, input_batch) -> None: assert isinstance(outputs, list) assert len(outputs) == 3 assert isinstance(outputs[2], ClassificationResult) + + def test_dummy_input(self, model: OVModel): + batch_size = 2 + batch = model.get_dummy_input(batch_size) + assert batch.batch_size == batch_size diff --git a/tests/unit/core/model/test_classification.py b/tests/unit/core/model/test_classification.py index fdfa26453f7..362cd048f72 100644 --- a/tests/unit/core/model/test_classification.py +++ b/tests/unit/core/model/test_classification.py @@ -249,6 +249,11 @@ def test_forward_for_tracing(self, otx_model): output = otx_model.forward_for_tracing(torch.randn(1, 3, 32, 32)) assert len(output) == 1 + def test_dummy_input(self, otx_model: MMPretrainMulticlassClsModel): + batch_size = 2 + batch = otx_model.get_dummy_input(batch_size) + assert batch.batch_size == batch_size + class TestOTXMultilabelClsModel: @pytest.fixture() diff --git a/tests/unit/core/model/test_detection.py b/tests/unit/core/model/test_detection.py index df92e1e40fe..3cc654e8750 100644 --- a/tests/unit/core/model/test_detection.py +++ b/tests/unit/core/model/test_detection.py @@ -132,3 +132,8 @@ def test_export_parameters(self, otx_model): parameters = otx_model._export_parameters assert isinstance(parameters, TaskLevelExportParameters) assert parameters.task_type == "detection" + + def test_dummy_input(self, otx_model: MobileNetV2ATSS): + batch_size = 2 + batch = otx_model.get_dummy_input(batch_size) + assert batch.batch_size == batch_size diff --git a/tests/unit/core/model/test_inst_segmentation.py b/tests/unit/core/model/test_inst_segmentation.py index 8da37a16221..cd213e81bd6 100644 --- a/tests/unit/core/model/test_inst_segmentation.py +++ b/tests/unit/core/model/test_inst_segmentation.py @@ -69,3 +69,8 @@ def test_export_parameters(self, otx_model): parameters = otx_model._export_parameters assert isinstance(parameters, TaskLevelExportParameters) assert parameters.task_type == "instance_segmentation" + + def test_dummy_input(self, otx_model: MMDetInstanceSegCompatibleModel): + batch_size = 2 + batch = otx_model.get_dummy_input(batch_size) + assert batch.batch_size == batch_size diff --git a/tests/unit/core/model/test_segmentation.py b/tests/unit/core/model/test_segmentation.py index dd5ffd7d9e0..573e11d773d 100644 --- a/tests/unit/core/model/test_segmentation.py +++ b/tests/unit/core/model/test_segmentation.py @@ -73,7 +73,7 @@ def test_dispatch_label_info(self, model, label_info, expected_label_info): class TestTorchVisionCompatibleModel: @pytest.fixture() - def model(self, label_info, optimizer, scheduler, metric, torch_compile): + def model(self, label_info, optimizer, scheduler, metric, torch_compile) -> TorchVisionCompatibleModel: return TorchVisionCompatibleModel(label_info, optimizer, scheduler, metric, torch_compile) @pytest.fixture() @@ -108,3 +108,8 @@ def test_customize_outputs_predict(self, model, batch_data_entity): assert len(customized_outputs.scores) == 0 assert customized_outputs.images.shape == (2, 3, 224, 224) assert customized_outputs.imgs_info == [] + + def test_dummy_input(self, model: TorchVisionCompatibleModel): + batch_size = 2 + batch = model.get_dummy_input(batch_size) + assert batch.batch_size == batch_size diff --git a/tests/unit/core/model/test_visual_prompting.py b/tests/unit/core/model/test_visual_prompting.py index 30e9c8a04bc..e8cc1e2fe93 100644 --- a/tests/unit/core/model/test_visual_prompting.py +++ b/tests/unit/core/model/test_visual_prompting.py @@ -179,6 +179,11 @@ def test_optimization_config(self, otx_visual_prompting_model) -> None: }, } + def test_dummy_input(self, otx_visual_prompting_model): + batch_size = 2 + batch = otx_visual_prompting_model.get_dummy_input(batch_size) + assert batch.batch_size == batch_size + class TestOTXZeroShotVisualPromptingModel: def test_exporter(self, otx_zero_shot_visual_prompting_model) -> None: @@ -265,6 +270,11 @@ def test_on_train_epoch_end(self, mocker, tmpdir, otx_zero_shot_visual_prompting otx_zero_shot_visual_prompting_model.on_train_epoch_end() + def test_dummy_input(self, otx_zero_shot_visual_prompting_model): + batch_size = 2 + batch = otx_zero_shot_visual_prompting_model.get_dummy_input(batch_size) + assert batch.batch_size == batch_size + class TestOVVisualPromptingModel: @pytest.fixture() @@ -367,6 +377,12 @@ def test_optimize(self, tmpdir, mocker, set_ov_visual_prompting_model) -> None: assert "image_encoder" in results assert "decoder" in results + def test_dummy_input(self, set_ov_visual_prompting_model): + batch_size = 2 + ov_visual_prompting_model = set_ov_visual_prompting_model() + batch = ov_visual_prompting_model.get_dummy_input(batch_size) + assert batch.batch_size == batch_size + class TestOVZeroShotVisualPromptingModel: @pytest.fixture() @@ -440,6 +456,11 @@ def test_forward( mocker_fn.assert_called_once() mocker_customize_outputs.assert_called_once() + def test_dummy_input(self, ov_zero_shot_visual_prompting_model: OVZeroShotVisualPromptingModel): + batch_size = 2 + batch = ov_zero_shot_visual_prompting_model.get_dummy_input(batch_size) + assert batch.batch_size == batch_size + def test_learn(self, mocker, ov_zero_shot_visual_prompting_model, fxt_zero_shot_vpm_data_entity) -> None: """Test learn.""" entity = deepcopy(fxt_zero_shot_vpm_data_entity[1]) diff --git a/tests/unit/engine/test_engine.py b/tests/unit/engine/test_engine.py index b3f72255aba..b4b34fb4b2c 100644 --- a/tests/unit/engine/test_engine.py +++ b/tests/unit/engine/test_engine.py @@ -368,6 +368,34 @@ def test_from_config(self, tmp_path) -> None: assert engine.datamodule.train_subset.batch_size == 3 assert engine.datamodule.test_subset.subset_name == "TESTING" + @pytest.mark.parametrize( + "checkpoint", + [ + "path/to/checkpoint.ckpt", + "path/to/checkpoint.xml", + ], + ) + def test_benchmark(self, fxt_engine, checkpoint, mocker: MockerFixture) -> None: + _ = mocker.patch("otx.engine.engine.AutoConfigurator.update_ov_subset_pipeline") + mock_get_ov_model = mocker.patch("otx.engine.engine.AutoConfigurator.get_ov_model") + mock_load_from_checkpoint = mocker.patch.object(fxt_engine.model.__class__, "load_from_checkpoint") + + ext = Path(checkpoint).suffix + + if ext == ".ckpt": + mock_model = mocker.create_autospec(OTXModel) + + mock_load_from_checkpoint.return_value = mock_model + else: + mock_model = mocker.create_autospec(OVModel) + + mock_get_ov_model.return_value = mock_model + + # Correct label_info from the checkpoint + mock_model.label_info = fxt_engine.datamodule.label_info + result = fxt_engine.benchmark(checkpoint=checkpoint) + assert "latency" in result + def test_num_devices(self, fxt_engine, tmp_path) -> None: assert fxt_engine.num_devices == 1 assert fxt_engine._cache.args.get("devices") == 1