From 1d529f75b2bd101f130596bc911634e4b64af1ce Mon Sep 17 00:00:00 2001
From: Jaeguk Hyun <jaeguk.hyun@intel.com>
Date: Fri, 3 May 2024 13:37:21 +0900
Subject: [PATCH 01/15] Change default fmeasure confidence_threshold from 0 to
 None (#3441)

Change default confidence_threshold from 0 to None
---
 src/otx/core/model/detection.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/otx/core/model/detection.py b/src/otx/core/model/detection.py
index 79154e75c83..0246ebc0959 100644
--- a/src/otx/core/model/detection.py
+++ b/src/otx/core/model/detection.py
@@ -105,7 +105,7 @@ def _export_parameters(self) -> TaskLevelExportParameters:
         return super()._export_parameters.wrap(
             model_type="ssd",
             task_type="detection",
-            confidence_threshold=self.hparams.get("best_confidence_threshold", 0.0),
+            confidence_threshold=self.hparams.get("best_confidence_threshold", None),
             iou_threshold=0.5,
             tile_config=self.tile_config if self.tile_config.enable_tiler else None,
         )
@@ -545,10 +545,10 @@ def _create_model(self) -> Model:
                 "Cannot get best_confidence_threshold from OpenVINO IR's rt_info. "
                 "Please check whether this model is trained by OTX or not. "
                 "Without this information, it can produce a wrong F1 metric score. "
-                "At this time, it will be set as the default value = 0.0."
+                "At this time, it will be set as the default value = None."
             )
             log.warning(msg)
-            self.hparams["best_confidence_threshold"] = 0.0
+            self.hparams["best_confidence_threshold"] = None
 
         return Model.create_model(model_adapter, model_type=self.model_type, configuration=self.model_api_configuration)
 
@@ -643,6 +643,6 @@ def _convert_pred_entity_to_compute_metric(
         }
 
     def _log_metrics(self, meter: Metric, key: Literal["val", "test"], **compute_kwargs) -> None:
-        best_confidence_threshold = self.hparams.get("best_confidence_threshold", 0.0)
+        best_confidence_threshold = self.hparams.get("best_confidence_threshold", None)
         compute_kwargs = {"best_confidence_threshold": best_confidence_threshold}
         return super()._log_metrics(meter, key, **compute_kwargs)

From 8f0dd3f018ee665bd35d5b88c6011796ca34afd3 Mon Sep 17 00:00:00 2001
From: Yunchu Lee <yunchu.lee@intel.com>
Date: Fri, 3 May 2024 15:26:07 +0900
Subject: [PATCH 02/15] Modify label info comparison (#3442)

---
 src/otx/engine/engine.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/otx/engine/engine.py b/src/otx/engine/engine.py
index 55bcaf2ad02..11853e36589 100644
--- a/src/otx/engine/engine.py
+++ b/src/otx/engine/engine.py
@@ -371,7 +371,7 @@ def test(
             model_cls = self.model.__class__
             model = model_cls.load_from_checkpoint(checkpoint_path=checkpoint)
 
-        if model.label_info != self.datamodule.label_info:
+        if model.label_info.as_dict() != self.datamodule.label_info.as_dict():
             msg = (
                 "To launch a test pipeline, the label information should be same "
                 "between the training and testing datasets. "
@@ -452,7 +452,7 @@ def predict(
             model_cls = self.model.__class__
             model = model_cls.load_from_checkpoint(checkpoint_path=checkpoint)
 
-        if model.label_info != self.datamodule.label_info:
+        if model.label_info.as_dict() != self.datamodule.label_info.as_dict():
             msg = (
                 "To launch a predict pipeline, the label information should be same "
                 "between the training and testing datasets. "
@@ -691,7 +691,7 @@ def explain(
             model_cls = model.__class__
             model = model_cls.load_from_checkpoint(checkpoint_path=checkpoint)
 
-        if model.label_info != self.datamodule.label_info:
+        if model.label_info.as_dict() != self.datamodule.label_info.as_dict():
             msg = (
                 "To launch a explain pipeline, the label information should be same "
                 "between the training and testing datasets. "

From f6d03c348ceb81a77ebc85caa2104f504b70c90f Mon Sep 17 00:00:00 2001
From: Prokofiev Kirill <kirill.prokofiev@intel.com>
Date: Fri, 3 May 2024 10:22:48 +0200
Subject: [PATCH 03/15] Update segmentation documentation (#3425)

* remove unnecessery test

* fix documentation

* fix some issues during forward

* added dino

* fix pre-commit

* added numbers for some models

* updated numbers

* fix nerge problems

* revert changes back

* update final accuracy
---
 .../segmentation/semantic_segmentation.rst    | 76 +++++++++----------
 src/otx/algo/segmentation/base_model.py       |  2 +-
 src/otx/core/model/segmentation.py            |  2 +-
 3 files changed, 38 insertions(+), 42 deletions(-)

diff --git a/docs/source/guide/explanation/algorithms/segmentation/semantic_segmentation.rst b/docs/source/guide/explanation/algorithms/segmentation/semantic_segmentation.rst
index e631301702d..a41013ef3ea 100644
--- a/docs/source/guide/explanation/algorithms/segmentation/semantic_segmentation.rst
+++ b/docs/source/guide/explanation/algorithms/segmentation/semantic_segmentation.rst
@@ -14,16 +14,17 @@ The output of semantic segmentation is typically an image where each pixel is co
 
 |
 
-We solve this task by utilizing `FCN Head <https://arxiv.org/pdf/1411.4038.pdf>`_ with implementation from `MMSegmentation <https://mmsegmentation.readthedocs.io/en/latest/_modules/mmseg/models/decode_heads/fcn_head.html>`_ on the multi-level image features obtained by the feature extractor backbone (`Lite-HRNet <https://arxiv.org/abs/2104.06403>`_).
+We solve this task by utilizing segmentation decoder heads on the multi-level image features obtained by the feature extractor backbone.
 For the supervised training we use the following algorithms components:
 
 .. _semantic_segmentation_supervised_pipeline:
 
 - ``Augmentations``: Besides basic augmentations like random flip, random rotate and random crop, we use mixing images technique with different `photometric distortions <https://mmsegmentation.readthedocs.io/en/latest/api.html#mmseg.datasets.pipelines.PhotoMetricDistortion>`_.
 
-- ``Optimizer``: We use `Adam <https://arxiv.org/abs/1412.6980>`_ optimizer with weight decay set to zero and gradient clipping with maximum quadratic norm equals to 40.
+- ``Optimizer``: We use `Adam <https://arxiv.org/abs/1412.6980>`_ and `AdamW <https://arxiv.org/abs/1711.05101>` optimizers.
 
-- ``Learning rate schedule``: For scheduling training process we use **ReduceLROnPlateau** with linear learning rate warmup for 100 iterations. This method monitors a target metric (in our case we use metric on the validation set) and if no improvement is seen for a ``patience`` number of epochs, the learning rate is reduced.
+- ``Learning rate schedule``: For scheduling training process we use **ReduceLROnPlateau** with linear learning rate warmup for 100 iterations for `Lite-HRNet <https://arxiv.org/abs/2104.06403>`_ family. This method monitors a target metric (in our case we use metric on the validation set) and if no improvement is seen for a ``patience`` number of epochs, the learning rate is reduced.
+    For `SegNext <https://arxiv.org/abs/2209.08575>`_ and `DinoV2 <https://arxiv.org/abs/2304.07193>`_ models we use `PolynomialLR <https://pytorch.org/docs/stable/generated/torch.optim.lr_scheduler.PolynomialLR.html>`_ scheduler.
 
 - ``Loss function``: We use standard `Cross Entropy Loss <https://en.wikipedia.org/wiki/Cross_entropy>`_  to train a model.
 
@@ -39,14 +40,6 @@ For the dataset handling inside OpenVINO™ Training Extensions, we use `Dataset
 At this end we support `Common Semantic Segmentation <https://github.com/openvinotoolkit/datumaro/blob/develop/docs/source/docs/data-formats/formats/common_semantic_segmentation.md>`_ data format.
 If you organized supported dataset format, starting training will be very simple. We just need to pass a path to the root folder and desired model recipe to start training:
 
-.. note::
-
-    Due to some internal limitations, the dataset should always consist of a "background" label. If your dataset doesn't have a background label, rename the first label to "background" in the ``meta.json`` file.
-
-
-.. note::
-
-    Currently, metrics with models trained with our OTX dataset adapter can differ from popular benchmarks. To avoid this and train the model on exactly the same segmentation masks as intended by the authors, please, set the parameter ``use_otx_adapter`` to ``False``.
 
 ******
 Models
@@ -55,43 +48,46 @@ Models
 
 We support the following ready-to-use model recipes:
 
-+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+------------------------+---------------------+-----------------+
-| Recipe ID                                                                                                                                                                            | Name                   | Complexity (GFLOPs) | Model size (MB) |
-+======================================================================================================================================================================================+========================+=====================+=================+
-| `Custom_Semantic_Segmentation_Lite-HRNet-s-mod2_OCR <https://github.com/openvinotoolkit/training_extensions/blob/develop/src/otx/recipe/semantic_segmentation/litehrnet_s.yaml>`_    | Lite-HRNet-s-mod2      | 1.44                | 3.2             |
-+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+------------------------+---------------------+-----------------+
-| `Custom_Semantic_Segmentation_Lite-HRNet-18-mod2_OCR <https://github.com/openvinotoolkit/training_extensions/blob/develop/src/otx/recipe/semantic_segmentation/litehrnet_18.yaml>`_  | Lite-HRNet-18-mod2     | 2.82                | 4.3             |
-+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+------------------------+---------------------+-----------------+
-| `Custom_Semantic_Segmentation_Lite-HRNet-x-mod3_OCR <https://github.com/openvinotoolkit/training_extensions/blob/develop/src/otx/recipe/semantic_segmentation/litehrnet_x.yaml>`_    | Lite-HRNet-x-mod3      | 9.20                | 5.7             |
-+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+------------------------+---------------------+-----------------+
-| `Custom_Semantic_Segmentation_SegNext_T <https://github.com/openvinotoolkit/training_extensions/blob/develop/src/otx/recipe/semantic_segmentation/segnext_t.yaml>`_                  | SegNext-t              | 6.07                | 4.23            |
-+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+------------------------+---------------------+-----------------+
-| `Custom_Semantic_Segmentation_SegNext_S <https://github.com/openvinotoolkit/training_extensions/blob/develop/src/otx/recipe/semantic_segmentation/segnext_s.yaml>`_                  | SegNext-s              | 15.35               | 13.9            |
-+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+------------------------+---------------------+-----------------+
-| `Custom_Semantic_Segmentation_SegNext_B <https://github.com/openvinotoolkit/training_extensions/blob/develop/src/otx/recipe/semantic_segmentation/segnext_b.yaml>`_                  | SegNext-b              |   32.08             | 27.56           |
-+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+------------------------+---------------------+-----------------+
-
-All of these models are members of the same `Lite-HRNet <https://arxiv.org/abs/2104.06403>`_ backbones family. They differ in the trade-off between accuracy and inference/training speed. ``Lite-HRNet-x-mod3`` is the recipe with heavy-size architecture for accurate predictions but it requires long training.
-Whereas the ``Lite-HRNet-s-mod2`` is the lightweight architecture for fast inference and training. It is the best choice for the scenario of a limited amount of data. The ``Lite-HRNet-18-mod2`` model is the middle-sized architecture for the balance between fast inference and training time.
-
-Use `SegNext <https://arxiv.org/abs/2209.08575>`_ model which can achieve superior perfomance while preserving fast inference and fast training.
-
-In the table below the `Dice score <https://en.wikipedia.org/wiki/S%C3%B8rensen%E2%80%93Dice_coefficient>`_ on some academic datasets using our :ref:`supervised pipeline <semantic_segmentation_supervised_pipeline>` is presented. We use 512x512 image crop resolution, for other hyperparameters, please, refer to the related recipe. We trained each model with single Nvidia GeForce RTX3090.
++--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+---------------------+-----------------+-----------------+-----------------+
+| Recipe Path                                                                                                                                                                          | Complexity (GFLOPs) | Model size (M)  | FPS (GPU)       | iter time (sec) |
++======================================================================================================================================================================================+=====================+=================+=================+=================+
+| `Lite-HRNet-s-mod2 <https://github.com/openvinotoolkit/training_extensions/blob/develop/src/otx/recipe/semantic_segmentation/litehrnet_s.yaml>`_                                     | 1.44                | 0.82            |  37.68          |     0.151       |
++--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+---------------------+-----------------+-----------------+-----------------+
+| `Lite-HRNet-18-mod2 <https://github.com/openvinotoolkit/training_extensions/blob/develop/src/otx/recipe/semantic_segmentation/litehrnet_18.yaml>`_                                   | 2.63                | 1.10            |  31.17          |     0.176       |
++--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+---------------------+-----------------+-----------------+-----------------+
+| `Lite-HRNet-x-mod3 <https://github.com/openvinotoolkit/training_extensions/blob/develop/src/otx/recipe/semantic_segmentation/litehrnet_x.yaml>`_                                     | 9.20                | 1.50            |  15.07          |     0.347       |
++--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+---------------------+-----------------+-----------------+-----------------+
+| `SegNext_T <https://github.com/openvinotoolkit/training_extensions/blob/develop/src/otx/recipe/semantic_segmentation/segnext_t.yaml>`_                                               | 12.44               | 4.23            |  104.90         |     0.126       |
++--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+---------------------+-----------------+-----------------+-----------------+
+| `SegNext_S <https://github.com/openvinotoolkit/training_extensions/blob/develop/src/otx/recipe/semantic_segmentation/segnext_s.yaml>`_                                               | 30.93               | 13.90           |  85.67          |     0.134       |
++--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+---------------------+-----------------+-----------------+-----------------+
+| `SegNext_B <https://github.com/openvinotoolkit/training_extensions/blob/develop/src/otx/recipe/semantic_segmentation/segnext_b.yaml>`_                                               | 64.65               | 27.56           |  61.91          |     0.215       |
++--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+---------------------+-----------------+-----------------+-----------------+
+| `DinoV2 <https://github.com/openvinotoolkit/training_extensions/blob/develop/src/otx/recipe/semantic_segmentation/dino_v2.yaml>`_                                                    | 124.01              | 24.40           |  3.52           |     0.116       |
++--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+---------------------+-----------------+-----------------+-----------------+
+
+All of these models differ in the trade-off between accuracy and inference/training speed. For example, ``SegNext_B`` is the recipe with heavy-size architecture for more accurate predictions, but it requires longer training.
+Whereas the ``Lite-HRNet-s-mod2`` is the lightweight architecture for fast inference and training. It is the best choice for the scenario of a limited amount of data. The ``Lite-HRNet-18-mod2`` and ``SegNext_S``  models are the middle-sized architectures for the balance between fast inference and training time.
+``DinoV2`` is the state-of-the-art model producing universal features suitable for all image-level and pixel-level visual tasks. This model doesn't require fine-tuning of the whole backbone, but only segmentation decode head. Because of that, it provides faster training preserving high accuracy.
+
+In the table below the `Dice score <https://en.wikipedia.org/wiki/S%C3%B8rensen%E2%80%93Dice_coefficient>`_ on some academic datasets using our :ref:`supervised pipeline <semantic_segmentation_supervised_pipeline>` is presented. We use 512x512 (560x560 fot DinoV2) image crop resolution, for other hyperparameters, please, refer to the related recipe. We trained each model with single Nvidia GeForce RTX3090.
 
 +-----------------------+--------------------------------------------------------------+-----------------------------------------------------+----------------------------------------------------------------------+-----------------------------------------------------------------+--------+
-| Model name            | `DIS5K <https://xuebinqin.github.io/dis/index.html>`_        | `Cityscapes <https://www.cityscapes-dataset.com/>`_ | `Pascal-VOC 2012 <http://host.robots.ox.ac.uk/pascal/VOC/voc2012/>`_ | `KITTI full <https://www.cvlibs.net/datasets/kitti/index.php>`_ | Mean   |
+| Model name            | `DIS5K <https://xuebinqin.github.io/dis/index.html>`_        | `Cityscapes <https://www.cityscapes-dataset.com/>`_ | `Pascal-VOC 2012 <http://host.robots.ox.ac.uk/pascal/VOC/voc2012/>`_ | `KITTI <https://www.cvlibs.net/datasets/kitti/index.php>`_      | Mean   |
 +=======================+==============================================================+=====================================================+======================================================================+=================================================================+========+
-| Lite-HRNet-s-mod2     | 79.95                                                        | 62.38                                               | 58.26                                                                | 36.06                                                           | 59.16  |
+| Lite-HRNet-s-mod2     | 78.73                                                        | 69.25                                               | 63.26                                                                | 41.73                                                           | 63.24  |
++-----------------------+--------------------------------------------------------------+-----------------------------------------------------+----------------------------------------------------------------------+-----------------------------------------------------------------+--------+
+| Lite-HRNet-18-mod2    | 81.43                                                        | 72.66                                               | 62.10                                                                | 46.73                                                           | 65.73  |
 +-----------------------+--------------------------------------------------------------+-----------------------------------------------------+----------------------------------------------------------------------+-----------------------------------------------------------------+--------+
-| Lite-HRNet-18-mod2    | 81.12                                                        | 65.04                                               | 63.48                                                                | 39.14                                                           | 62.20  |
+| Lite-HRNet-x-mod3     | 82.36                                                        | 74.57                                               | 59.55                                                                | 49.97                                                           | 66.61  |
 +-----------------------+--------------------------------------------------------------+-----------------------------------------------------+----------------------------------------------------------------------+-----------------------------------------------------------------+--------+
-| Lite-HRNet-x-mod3     | 79.98                                                        | 59.97                                               | 61.9                                                                 | 41.55                                                           | 60.85  |
+| SegNext-t             | 83.99                                                        | 77.09                                               | 84.05                                                                | 48.99                                                           | 73.53  |
 +-----------------------+--------------------------------------------------------------+-----------------------------------------------------+----------------------------------------------------------------------+-----------------------------------------------------------------+--------+
-| SegNext-t             | 85.05                                                        | 70.67                                               | 80.73                                                                | 51.25                                                           | 68.99  |
+| SegNext-s             | 85.54                                                        | 79.45                                               | 86.00                                                                | 52.19                                                           | 75.80  |
 +-----------------------+--------------------------------------------------------------+-----------------------------------------------------+----------------------------------------------------------------------+-----------------------------------------------------------------+--------+
-| SegNext-s             | 85.62                                                        | 70.91                                               | 82.31                                                                | 52.94                                                           | 69.82  |
+| SegNext-b             | 86.76                                                        | 76.14                                               | 87.92                                                                | 57.73                                                           | 77.14  |
 +-----------------------+--------------------------------------------------------------+-----------------------------------------------------+----------------------------------------------------------------------+-----------------------------------------------------------------+--------+
-| SegNext-b             | 87.92                                                        | 76.94                                               | 85.01                                                                | 55.49                                                           | 73.45  |
+| DinoV2                | 84.87                                                        | 73.58                                               | 88.15                                                                | 65.91                                                           | 78.13  |
 +-----------------------+--------------------------------------------------------------+-----------------------------------------------------+----------------------------------------------------------------------+-----------------------------------------------------------------+--------+
 
 .. note::
diff --git a/src/otx/algo/segmentation/base_model.py b/src/otx/algo/segmentation/base_model.py
index 76de6df2047..057c9b3c1b4 100644
--- a/src/otx/algo/segmentation/base_model.py
+++ b/src/otx/algo/segmentation/base_model.py
@@ -67,7 +67,7 @@ def forward(
                 - Otherwise, returns the model outputs after interpolation.
         """
         enc_feats = self.backbone(inputs)
-        outputs = self.decode_head(enc_feats)
+        outputs = self.decode_head(inputs=enc_feats)
 
         if mode == "tensor":
             return outputs
diff --git a/src/otx/core/model/segmentation.py b/src/otx/core/model/segmentation.py
index 80578929aca..6d3c63cad7b 100644
--- a/src/otx/core/model/segmentation.py
+++ b/src/otx/core/model/segmentation.py
@@ -164,7 +164,7 @@ def __init__(
     def _customize_inputs(self, entity: SegBatchDataEntity) -> dict[str, Any]:
         mode = "loss" if self.training else "predict"
 
-        masks = torch.stack(entity.masks).long()
+        masks = torch.stack(entity.masks).long() if mode == "loss" else None
 
         return {"inputs": entity.images, "img_metas": entity.imgs_info, "masks": masks, "mode": mode}
 

From 91ac4bba5cc0104525f20d0e04075a84972cc9bd Mon Sep 17 00:00:00 2001
From: Jaeguk Hyun <jaeguk.hyun@intel.com>
Date: Fri, 3 May 2024 17:40:35 +0900
Subject: [PATCH 04/15] Generate label info from v1 checkpoint (#3444)

* Generate label info from v1 ckpt

* Add unit tests
---
 src/otx/algo/detection/atss.py         |  2 +-
 src/otx/algo/detection/ssd.py          |  2 +-
 src/otx/algo/detection/yolox.py        |  2 +-
 src/otx/core/model/base.py             | 16 ++++++++++++----
 tests/unit/algo/detection/test_atss.py |  2 +-
 tests/unit/core/model/test_base.py     | 10 ++++++++++
 6 files changed, 26 insertions(+), 8 deletions(-)

diff --git a/src/otx/algo/detection/atss.py b/src/otx/algo/detection/atss.py
index 2a318c355ed..b9e8326f47d 100644
--- a/src/otx/algo/detection/atss.py
+++ b/src/otx/algo/detection/atss.py
@@ -218,7 +218,7 @@ def forward_for_tracing(self, inputs: Tensor) -> list[InstanceData]:
         meta_info_list = [meta_info] * len(inputs)
         return self.model.export(inputs, meta_info_list, explain_mode=self.explain_mode)
 
-    def load_from_otx_v1_ckpt(self, state_dict: dict, add_prefix: str = "model.model.") -> dict:
+    def load_from_otx_v1_ckpt(self, state_dict: dict, add_prefix: str = "model.") -> dict:
         """Load the previous OTX ckpt according to OTX2.0."""
         return OTXv1Helper.load_det_ckpt(state_dict, add_prefix)
 
diff --git a/src/otx/algo/detection/ssd.py b/src/otx/algo/detection/ssd.py
index 0c8c0706468..fa745a29e23 100644
--- a/src/otx/algo/detection/ssd.py
+++ b/src/otx/algo/detection/ssd.py
@@ -692,6 +692,6 @@ def on_load_checkpoint(self, checkpoint: dict[str, Any]) -> None:
 
         return super().on_load_checkpoint(checkpoint)
 
-    def load_from_otx_v1_ckpt(self, state_dict: dict, add_prefix: str = "model.model.") -> dict:
+    def load_from_otx_v1_ckpt(self, state_dict: dict, add_prefix: str = "model.") -> dict:
         """Load the previous OTX ckpt according to OTX2.0."""
         return OTXv1Helper.load_ssd_ckpt(state_dict, add_prefix)
diff --git a/src/otx/algo/detection/yolox.py b/src/otx/algo/detection/yolox.py
index e2736833e6a..baefdb1255b 100644
--- a/src/otx/algo/detection/yolox.py
+++ b/src/otx/algo/detection/yolox.py
@@ -206,7 +206,7 @@ def forward_for_tracing(self, inputs: Tensor) -> list[InstanceData]:
         meta_info_list = [meta_info] * len(inputs)
         return self.model.export(inputs, meta_info_list, explain_mode=self.explain_mode)
 
-    def load_from_otx_v1_ckpt(self, state_dict: dict, add_prefix: str = "model.model.") -> dict:
+    def load_from_otx_v1_ckpt(self, state_dict: dict, add_prefix: str = "model.") -> dict:
         """Load the previous OTX ckpt according to OTX2.0."""
         return OTXv1Helper.load_det_ckpt(state_dict, add_prefix)
 
diff --git a/src/otx/core/model/base.py b/src/otx/core/model/base.py
index c706e2a0ab9..d5fcc8bc32e 100644
--- a/src/otx/core/model/base.py
+++ b/src/otx/core/model/base.py
@@ -19,6 +19,7 @@
 import numpy as np
 import openvino
 import torch
+from datumaro import LabelCategories
 from jsonargparse import ArgumentParser
 from lightning import LightningModule, Trainer
 from model_api.models import Model
@@ -369,7 +370,9 @@ def on_load_checkpoint(self, checkpoint: dict[str, Any]) -> None:
 
     def load_state_dict_incrementally(self, ckpt: dict[str, Any], *args, **kwargs) -> None:
         """Load state dict incrementally."""
-        ckpt_label_info: LabelInfo | None = ckpt.get("label_info", None)
+        ckpt_label_info: LabelInfo | None = (
+            ckpt.get("label_info", None) if not is_ckpt_from_otx_v1(ckpt) else self.get_ckpt_label_info_v1(ckpt)
+        )
 
         if ckpt_label_info is None:
             msg = "Checkpoint should have `label_info`."
@@ -388,11 +391,11 @@ def load_state_dict_incrementally(self, ckpt: dict[str, Any], *args, **kwargs) -
             )
 
         # Model weights
-        state_dict: dict[str, Any] = ckpt.get("state_dict", None)
+        state_dict: dict[str, Any] = ckpt.get("state_dict", None) if not is_ckpt_from_otx_v1(ckpt) else ckpt
 
-        if ckpt_label_info is None:
+        if state_dict is None:
             msg = "Checkpoint should have `state_dict`."
-            raise ValueError(msg, ckpt_label_info)
+            raise ValueError(msg, state_dict)
 
         self.load_state_dict(state_dict, *args, **kwargs)
 
@@ -419,6 +422,11 @@ def load_from_otx_v1_ckpt(self, ckpt: dict[str, Any]) -> dict:
         """Load the previous OTX ckpt according to OTX2.0."""
         raise NotImplementedError
 
+    @staticmethod
+    def get_ckpt_label_info_v1(ckpt: dict) -> LabelInfo:
+        """Generate label info from OTX v1 checkpoint."""
+        return LabelInfo.from_dm_label_groups(LabelCategories.from_iterable(ckpt["labels"].keys()))
+
     @property
     def label_info(self) -> LabelInfo:
         """Get this model label information."""
diff --git a/tests/unit/algo/detection/test_atss.py b/tests/unit/algo/detection/test_atss.py
index 007fda1d181..0c906cb5426 100644
--- a/tests/unit/algo/detection/test_atss.py
+++ b/tests/unit/algo/detection/test_atss.py
@@ -13,7 +13,7 @@ def test(self, mocker) -> None:
         model = MobileNetV2ATSS(2)
         mock_load_ckpt = mocker.patch.object(OTXv1Helper, "load_det_ckpt")
         model.load_from_otx_v1_ckpt({})
-        mock_load_ckpt.assert_called_once_with({}, "model.model.")
+        mock_load_ckpt.assert_called_once_with({}, "model.")
 
         assert isinstance(model._export_parameters, TaskLevelExportParameters)
         assert isinstance(model._exporter, OTXModelExporter)
diff --git a/tests/unit/core/model/test_base.py b/tests/unit/core/model/test_base.py
index dcd7a5c2142..6a82da35505 100644
--- a/tests/unit/core/model/test_base.py
+++ b/tests/unit/core/model/test_base.py
@@ -88,6 +88,16 @@ def test_lr_scheduler_step(self, mocker: MockerFixture) -> None:
         # Regardless of the activation status, LinearWarmupScheduler can be called
         assert mock_linear_warmup_scheduler.step.call_count == 2
 
+    def test_v1_checkpoint_loading(self, mocker):
+        model = OTXModel(label_info=3)
+        mocker.patch.object(model, "load_from_otx_v1_ckpt", return_value={})
+        v1_ckpt = {
+            "model": {"state_dict": {"backbone": torch.randn(2, 2)}},
+            "labels": {"label_0": (), "label_1": (), "label_2": ()},
+            "VERSION": 1,
+        }
+        assert model.load_state_dict_incrementally(v1_ckpt) is None
+
 
 class TestOVModel:
     @pytest.fixture()

From 610ad20d56bf483ef4c0275c06a414b606b88047 Mon Sep 17 00:00:00 2001
From: Prokofiev Kirill <kirill.prokofiev@intel.com>
Date: Fri, 3 May 2024 10:49:13 +0200
Subject: [PATCH 05/15] Add polygons support for semantic segmentation (#3439)

* remove unnecessery test

* initial experiment

* add support for mask polygons

* fix pre-commit

* fill mask with background label

* fill with background label
---
 src/otx/core/data/dataset/segmentation.py | 50 +++++++++++++++--------
 1 file changed, 32 insertions(+), 18 deletions(-)

diff --git a/src/otx/core/data/dataset/segmentation.py b/src/otx/core/data/dataset/segmentation.py
index 8822209ec97..3d9f92884da 100644
--- a/src/otx/core/data/dataset/segmentation.py
+++ b/src/otx/core/data/dataset/segmentation.py
@@ -10,7 +10,7 @@
 
 import cv2
 import numpy as np
-from datumaro.components.annotation import Image, Mask
+from datumaro.components.annotation import Ellipse, Image, Mask, Polygon
 from torchvision import tv_tensors
 
 from otx.core.data.dataset.base import Transforms
@@ -96,36 +96,50 @@ def _extract_class_mask(item: DatasetItem, img_shape: tuple[int, int], ignore_in
         msg = "It is not currently support an ignore index which is more than 255."
         raise ValueError(msg, ignore_index)
 
-    class_mask = np.full(shape=img_shape[:2], fill_value=ignore_index, dtype=np.uint8)
+    # fill mask with background label if we have Polygon/Ellipse annotations
+    fill_value = 0 if isinstance(item.annotations[0], (Ellipse, Polygon)) else ignore_index
+    class_mask = np.full(shape=img_shape[:2], fill_value=fill_value, dtype=np.uint8)
+
     for mask in sorted(
-        [ann for ann in item.annotations if isinstance(ann, Mask)],
+        [ann for ann in item.annotations if isinstance(ann, (Mask, Ellipse, Polygon))],
         key=lambda ann: ann.z_order,
     ):
-        binary_mask = mask.image
         index = mask.label
 
         if index is None:
             msg = "Mask's label index should not be None."
             raise ValueError(msg)
 
-        if index > 255:
-            msg = "Mask's label index should not be more than 255."
-            raise ValueError(msg, index)
+        if isinstance(mask, (Ellipse, Polygon)):
+            polygons = np.asarray(mask.as_polygon(), dtype=np.int32).reshape((-1, 1, 2))
+            class_index = index + 1  # NOTE: disregard the background index. Objects start from index=1
+            this_class_mask = cv2.drawContours(class_mask, [polygons], 0, (class_index, class_index, class_index))
 
-        this_class_mask = _make_index_mask(
-            binary_mask=binary_mask,
-            index=index,
-            ignore_index=ignore_index,
-            dtype=np.uint8,
-        )
+        elif isinstance(mask, Mask):
+            binary_mask = mask.image
 
-        if this_class_mask.shape != img_shape:
-            this_class_mask = cv2.resize(
-                this_class_mask,
-                dsize=(img_shape[1], img_shape[0]),  # NOTE: cv2.resize() uses (width, height) format
-                interpolation=cv2.INTER_NEAREST,
+            if index is None:
+                msg = "Mask's label index should not be None."
+                raise ValueError(msg)
+
+            if index > 255:
+                msg = "Mask's label index should not be more than 255."
+                raise ValueError(msg, index)
+
+            this_class_mask = _make_index_mask(
+                binary_mask=binary_mask,
+                index=index,
+                ignore_index=ignore_index,
+                dtype=np.uint8,
             )
 
+            if this_class_mask.shape != img_shape:
+                this_class_mask = cv2.resize(
+                    this_class_mask,
+                    dsize=(img_shape[1], img_shape[0]),  # NOTE: cv2.resize() uses (width, height) format
+                    interpolation=cv2.INTER_NEAREST,
+                )
+
         class_mask = np.where(this_class_mask != ignore_index, this_class_mask, class_mask)
 
     return class_mask

From c21e26e3281e830af879ecbf09f168d38210c113 Mon Sep 17 00:00:00 2001
From: Jaeguk Hyun <jaeguk.hyun@intel.com>
Date: Fri, 3 May 2024 17:52:58 +0900
Subject: [PATCH 06/15] Add unit tests for detectors' forward function (#3447)

---
 tests/unit/algo/detection/test_atss.py  | 28 ++++++++++++++++++++++++-
 tests/unit/algo/detection/test_ssd.py   | 24 +++++++++++++++++++++
 tests/unit/algo/detection/test_yolox.py | 28 ++++++++++++++++++++++++-
 3 files changed, 78 insertions(+), 2 deletions(-)

diff --git a/tests/unit/algo/detection/test_atss.py b/tests/unit/algo/detection/test_atss.py
index 0c906cb5426..9dfa7659be7 100644
--- a/tests/unit/algo/detection/test_atss.py
+++ b/tests/unit/algo/detection/test_atss.py
@@ -2,8 +2,11 @@
 # SPDX-License-Identifier: Apache-2.0
 """Test of OTX SSD architecture."""
 
-from otx.algo.detection.atss import MobileNetV2ATSS
+import pytest
+import torch
+from otx.algo.detection.atss import MobileNetV2ATSS, ResNeXt101ATSS
 from otx.algo.utils.support_otx_v1 import OTXv1Helper
+from otx.core.data.entity.detection import DetBatchPredEntity
 from otx.core.exporter.native import OTXModelExporter
 from otx.core.types.export import TaskLevelExportParameters
 
@@ -17,3 +20,26 @@ def test(self, mocker) -> None:
 
         assert isinstance(model._export_parameters, TaskLevelExportParameters)
         assert isinstance(model._exporter, OTXModelExporter)
+
+    @pytest.mark.parametrize("model", [MobileNetV2ATSS(3), ResNeXt101ATSS(3)])
+    def test_loss(self, model, fxt_data_module):
+        data = next(iter(fxt_data_module.train_dataloader()))
+        data.images = [torch.randn(3, 32, 32), torch.randn(3, 48, 48)]
+        output = model(data)
+        assert "loss_cls" in output
+        assert "loss_bbox" in output
+        assert "loss_centerness" in output
+
+    @pytest.mark.parametrize("model", [MobileNetV2ATSS(3), ResNeXt101ATSS(3)])
+    def test_predict(self, model, fxt_data_module):
+        data = next(iter(fxt_data_module.train_dataloader()))
+        data.images = [torch.randn(3, 32, 32), torch.randn(3, 48, 48)]
+        model.eval()
+        output = model(data)
+        assert isinstance(output, DetBatchPredEntity)
+
+    @pytest.mark.parametrize("model", [MobileNetV2ATSS(3), ResNeXt101ATSS(3)])
+    def test_export(self, model):
+        model.eval()
+        output = model.forward_for_tracing(torch.randn(1, 3, 32, 32))
+        assert len(output) == 2
diff --git a/tests/unit/algo/detection/test_ssd.py b/tests/unit/algo/detection/test_ssd.py
index 36018446e87..7a69dc4b172 100644
--- a/tests/unit/algo/detection/test_ssd.py
+++ b/tests/unit/algo/detection/test_ssd.py
@@ -5,8 +5,10 @@
 from pathlib import Path
 
 import pytest
+import torch
 from lightning import Trainer
 from otx.algo.detection.ssd import SSD
+from otx.core.data.entity.detection import DetBatchPredEntity
 
 
 class TestSSD:
@@ -36,3 +38,25 @@ def test_save_and_load_anchors(self, fxt_checkpoint) -> None:
 
         assert loaded_model.model.bbox_head.anchor_generator.widths[0][0] == 40
         assert loaded_model.model.bbox_head.anchor_generator.heights[0][0] == 50
+
+    def test_loss(self, fxt_data_module):
+        model = SSD(3)
+        data = next(iter(fxt_data_module.train_dataloader()))
+        data.images = [torch.randn(3, 32, 32), torch.randn(3, 48, 48)]
+        output = model(data)
+        assert "loss_cls" in output
+        assert "loss_bbox" in output
+
+    def test_predict(self, fxt_data_module):
+        model = SSD(3)
+        data = next(iter(fxt_data_module.train_dataloader()))
+        data.images = [torch.randn(3, 32, 32), torch.randn(3, 48, 48)]
+        model.eval()
+        output = model(data)
+        assert isinstance(output, DetBatchPredEntity)
+
+    def test_export(self):
+        model = SSD(3)
+        model.eval()
+        output = model.forward_for_tracing(torch.randn(1, 3, 32, 32))
+        assert len(output) == 2
diff --git a/tests/unit/algo/detection/test_yolox.py b/tests/unit/algo/detection/test_yolox.py
index 25d35efca9f..c5ba277c1da 100644
--- a/tests/unit/algo/detection/test_yolox.py
+++ b/tests/unit/algo/detection/test_yolox.py
@@ -2,10 +2,13 @@
 # SPDX-License-Identifier: Apache-2.0
 """Test of OTX YOLOX architecture."""
 
+import pytest
+import torch
 from otx.algo.detection.backbones.csp_darknet import CSPDarknet
 from otx.algo.detection.heads.yolox_head import YOLOXHead
 from otx.algo.detection.necks.yolox_pafpn import YOLOXPAFPN
-from otx.algo.detection.yolox import YOLOXL, YOLOXTINY
+from otx.algo.detection.yolox import YOLOXL, YOLOXS, YOLOXTINY, YOLOXX
+from otx.core.data.entity.detection import DetBatchPredEntity
 from otx.core.exporter.native import OTXNativeModelExporter
 
 
@@ -32,3 +35,26 @@ def test_exporter(self) -> None:
         otx_yolox_tiny_exporter = otx_yolox_tiny._exporter
         assert isinstance(otx_yolox_tiny_exporter, OTXNativeModelExporter)
         assert otx_yolox_tiny_exporter.swap_rgb is False
+
+    @pytest.mark.parametrize("model", [YOLOXTINY(3), YOLOXS(3), YOLOXL(3), YOLOXX(3)])
+    def test_loss(self, model, fxt_data_module):
+        data = next(iter(fxt_data_module.train_dataloader()))
+        data.images = [torch.randn(3, 32, 32), torch.randn(3, 48, 48)]
+        output = model(data)
+        assert "loss_cls" in output
+        assert "loss_bbox" in output
+        assert "loss_obj" in output
+
+    @pytest.mark.parametrize("model", [YOLOXTINY(3), YOLOXS(3), YOLOXL(3), YOLOXX(3)])
+    def test_predict(self, model, fxt_data_module):
+        data = next(iter(fxt_data_module.train_dataloader()))
+        data.images = [torch.randn(3, 32, 32), torch.randn(3, 48, 48)]
+        model.eval()
+        output = model(data)
+        assert isinstance(output, DetBatchPredEntity)
+
+    @pytest.mark.parametrize("model", [YOLOXTINY(3), YOLOXS(3), YOLOXL(3), YOLOXX(3)])
+    def test_export(self, model):
+        model.eval()
+        output = model.forward_for_tracing(torch.randn(1, 3, 32, 32))
+        assert len(output) == 2

From ad1a5ec534b663f96151dddc8afb6172ead68537 Mon Sep 17 00:00:00 2001
From: Jaeguk Hyun <jaeguk.hyun@intel.com>
Date: Mon, 6 May 2024 22:13:53 +0900
Subject: [PATCH 07/15] Change load_stat_dict to on_load_checkpoint (#3443)

---
 src/otx/core/model/detection.py             |  6 +++---
 src/otx/core/model/instance_segmentation.py | 14 +++++++-------
 tests/unit/core/model/test_detection.py     |  2 +-
 3 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/src/otx/core/model/detection.py b/src/otx/core/model/detection.py
index 0246ebc0959..53d45a474d7 100644
--- a/src/otx/core/model/detection.py
+++ b/src/otx/core/model/detection.py
@@ -137,7 +137,7 @@ def _convert_pred_entity_to_compute_metric(
             ],
         }
 
-    def load_state_dict(self, ckpt: dict[str, Any], *args, **kwargs) -> None:
+    def on_load_checkpoint(self, ckpt: dict[str, Any]) -> None:
         """Load state_dict from checkpoint.
 
         For detection, it is need to update confidence threshold information when
@@ -148,7 +148,7 @@ def load_state_dict(self, ckpt: dict[str, Any], *args, **kwargs) -> None:
             and (best_confidence_threshold := hyper_parameters.get("best_confidence_threshold", None))
         ):
             self.hparams["best_confidence_threshold"] = best_confidence_threshold
-        super().load_state_dict(ckpt, *args, **kwargs)
+        super().on_load_checkpoint(ckpt)
 
     def _log_metrics(self, meter: Metric, key: Literal["val", "test"], **compute_kwargs) -> None:
         if key == "val":
@@ -539,7 +539,7 @@ def _create_model(self) -> Model:
 
         if model_adapter.model.has_rt_info(["model_info", "confidence_threshold"]):
             best_confidence_threshold = model_adapter.model.get_rt_info(["model_info", "confidence_threshold"]).value
-            self.hparams["best_confidence_threshold"] = best_confidence_threshold
+            self.hparams["best_confidence_threshold"] = float(best_confidence_threshold)
         else:
             msg = (
                 "Cannot get best_confidence_threshold from OpenVINO IR's rt_info. "
diff --git a/src/otx/core/model/instance_segmentation.py b/src/otx/core/model/instance_segmentation.py
index 58ca328bc95..8cea389d233 100644
--- a/src/otx/core/model/instance_segmentation.py
+++ b/src/otx/core/model/instance_segmentation.py
@@ -113,12 +113,12 @@ def _export_parameters(self) -> TaskLevelExportParameters:
         return super()._export_parameters.wrap(
             model_type="MaskRCNN",
             task_type="instance_segmentation",
-            confidence_threshold=self.hparams.get("best_confidence_threshold", 0.0),
+            confidence_threshold=self.hparams.get("best_confidence_threshold", None),
             iou_threshold=0.5,
             tile_config=self.tile_config if self.tile_config.enable_tiler else None,
         )
 
-    def load_state_dict(self, ckpt: dict[str, Any], *args, **kwargs) -> None:
+    def on_load_checkpoint(self, ckpt: dict[str, Any]) -> None:
         """Load state_dict from checkpoint.
 
         For detection, it is need to update confidence threshold information when
@@ -129,7 +129,7 @@ def load_state_dict(self, ckpt: dict[str, Any], *args, **kwargs) -> None:
             and (best_confidence_threshold := hyper_parameters.get("best_confidence_threshold", None))
         ):
             self.hparams["best_confidence_threshold"] = best_confidence_threshold
-        super().load_state_dict(ckpt, *args, **kwargs)
+        super().on_load_checkpoint(ckpt)
 
     def _log_metrics(self, meter: Metric, key: Literal["val", "test"], **compute_kwargs) -> None:
         if key == "val":
@@ -597,16 +597,16 @@ def _create_model(self) -> Model:
 
         if model_adapter.model.has_rt_info(["model_info", "confidence_threshold"]):
             best_confidence_threshold = model_adapter.model.get_rt_info(["model_info", "confidence_threshold"]).value
-            self.hparams["best_confidence_threshold"] = best_confidence_threshold
+            self.hparams["best_confidence_threshold"] = float(best_confidence_threshold)
         else:
             msg = (
                 "Cannot get best_confidence_threshold from OpenVINO IR's rt_info. "
                 "Please check whether this model is trained by OTX or not. "
                 "Without this information, it can produce a wrong F1 metric score. "
-                "At this time, it will be set as the default value = 0.0."
+                "At this time, it will be set as the default value = None."
             )
             log.warning(msg)
-            self.hparams["best_confidence_threshold"] = 0.0
+            self.hparams["best_confidence_threshold"] = None
 
         return Model.create_model(model_adapter, model_type=self.model_type, configuration=self.model_api_configuration)
 
@@ -729,6 +729,6 @@ def _convert_pred_entity_to_compute_metric(
         return {"preds": pred_info, "target": target_info}
 
     def _log_metrics(self, meter: Metric, key: Literal["val", "test"], **compute_kwargs) -> None:
-        best_confidence_threshold = self.hparams.get("best_confidence_threshold", 0.0)
+        best_confidence_threshold = self.hparams.get("best_confidence_threshold", None)
         compute_kwargs = {"best_confidence_threshold": best_confidence_threshold}
         return super()._log_metrics(meter, key, **compute_kwargs)
diff --git a/tests/unit/core/model/test_detection.py b/tests/unit/core/model/test_detection.py
index 7ef81129cf1..61fac037603 100644
--- a/tests/unit/core/model/test_detection.py
+++ b/tests/unit/core/model/test_detection.py
@@ -72,7 +72,7 @@ def test_configure_metric_with_ckpt(
             metric=FMeasureCallable,
         )
 
-        model.load_state_dict(mock_ckpt)
+        model.on_load_checkpoint(mock_ckpt)
 
         assert model.hparams["best_confidence_threshold"] == 0.35
 

From 440e61a1a24975c8f212d9a5408030e8ef611ec7 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 7 May 2024 11:00:50 +0900
Subject: [PATCH 08/15] Bump jinja2 from 3.1.3 to 3.1.4 in
 /.ci/requirements/benchmark (#3456)

---
 .ci/requirements/benchmark/requirements.txt | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/.ci/requirements/benchmark/requirements.txt b/.ci/requirements/benchmark/requirements.txt
index 1d0b2716af9..513357742c0 100644
--- a/.ci/requirements/benchmark/requirements.txt
+++ b/.ci/requirements/benchmark/requirements.txt
@@ -171,20 +171,20 @@ fonttools==4.51.0 \
 ipykernel==6.29.4 \
     --hash=sha256:1181e653d95c6808039c509ef8e67c4126b3b3af7781496c7cbfb5ed938a27da \
     --hash=sha256:3d44070060f9475ac2092b760123fadf105d2e2493c24848b6691a7c4f42af5c
-    # via -r .ci/requirements/benchmark/requirements.in
+    # via -r requirements.in
 ipython==8.23.0 \
     --hash=sha256:07232af52a5ba146dc3372c7bf52a0f890a23edf38d77caef8d53f9cdc2584c1 \
     --hash=sha256:7468edaf4f6de3e1b912e57f66c241e6fd3c7099f2ec2136e239e142e800274d
     # via
-    #   -r .ci/requirements/benchmark/requirements.in
+    #   -r requirements.in
     #   ipykernel
 jedi==0.19.1 \
     --hash=sha256:cf0496f3651bc65d7174ac1b7d043eff454892c708a87d1b683e57b569927ffd \
     --hash=sha256:e983c654fe5c02867aef4cdfce5a2fbb4a50adc0af145f70504238f18ef5e7e0
     # via ipython
-jinja2==3.1.3 \
-    --hash=sha256:7d6d50dd97d52cbc355597bd845fabfbac3f551e1f99619e39a35ce8c370b5fa \
-    --hash=sha256:ac8bd6544d4bb2c9792bf3a159e80bba8fda7f07e81bc3aed565432d5925ba90
+jinja2==3.1.4 \
+    --hash=sha256:4a3aee7acbbe7303aede8e9648d13b8bf88a429282aa6122a993f0ac800cb369 \
+    --hash=sha256:bc5dd2abb727a5319567b7a813e6a2e7318c39f4f487cfe6c89c6f9c7d25197d
     # via nbconvert
 jsonschema==4.21.1 \
     --hash=sha256:7996507afae316306f9e2290407761157c6f78002dcf7419acb99822143d1c6f \
@@ -412,7 +412,7 @@ matplotlib==3.8.4 \
     --hash=sha256:ecd79298550cba13a43c340581a3ec9c707bd895a6a061a78fa2524660482fc0 \
     --hash=sha256:f51c4c869d4b60d769f7b4406eec39596648d9d70246428745a681c327a8ad30 \
     --hash=sha256:fb44f53af0a62dc80bba4443d9b27f2fde6acfdac281d95bc872dc148a6509cc
-    # via -r .ci/requirements/benchmark/requirements.in
+    # via -r requirements.in
 matplotlib-inline==0.1.7 \
     --hash=sha256:8423b23ec666be3d16e16b60bdd8ac4e86e840ebd1dd11a30b9f117f2fa0ab90 \
     --hash=sha256:df192d39a4ff8f21b1895d72e6a13f5fcc5099f00fa84384e0ea28c2cc0653ca
@@ -430,7 +430,7 @@ nbclient==0.10.0 \
 nbconvert==7.16.3 \
     --hash=sha256:a6733b78ce3d47c3f85e504998495b07e6ea9cf9bf6ec1c98dda63ec6ad19142 \
     --hash=sha256:ddeff14beeeedf3dd0bc506623e41e4507e551736de59df69a91f86700292b3b
-    # via -r .ci/requirements/benchmark/requirements.in
+    # via -r requirements.in
 nbformat==5.10.4 \
     --hash=sha256:322168b14f937a5d11362988ecac2a4952d3d8e3a2cbeb2319584631226d5b3a \
     --hash=sha256:3b48d6c8fbca4b299bf3982ea7db1af21580e4fec269ad087b9e81588891200b
@@ -485,7 +485,7 @@ numpy==1.26.4 \
 openpyxl==3.1.2 \
     --hash=sha256:a6f5977418eff3b2d5500d54d9db50c8277a368436f4e4f8ddb1be3422870184 \
     --hash=sha256:f91456ead12ab3c6c2e9491cf33ba6d08357d802192379bb482f1033ade496f5
-    # via -r .ci/requirements/benchmark/requirements.in
+    # via -r requirements.in
 packaging==24.0 \
     --hash=sha256:2ddfb553fdf02fb784c234c7ba6ccc288296ceabec964ad2eae3777778130bc5 \
     --hash=sha256:eb82c5e3e56209074766e6885bb04b8c38a0c015d0a30036ebe7ece34c9989e9
@@ -523,7 +523,7 @@ pandas==2.2.2 \
     --hash=sha256:ddf818e4e6c7c6f4f7c8a12709696d193976b591cc7dc50588d3d1a6b5dc8772 \
     --hash=sha256:e9b79011ff7a0f4b1d6da6a61aa1aa604fb312d6647de5bad20013682d1429ce \
     --hash=sha256:eee3a87076c0756de40b05c5e9a6069c035ba43e8dd71c379e68cab2c20f16ad
-    # via -r .ci/requirements/benchmark/requirements.in
+    # via -r requirements.in
 pandocfilters==1.5.1 \
     --hash=sha256:002b4a555ee4ebc03f8b66307e287fa492e4a77b4ea14d3f934328297bb4939e \
     --hash=sha256:93be382804a9cdb0a7267585f157e5d1731bbe5545a85b268d6f5fe6232de2bc

From 0c37ec5b6739fc7bbc1d09cd7cc3e8e3598544e3 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 7 May 2024 11:01:04 +0900
Subject: [PATCH 09/15] Bump werkzeug from 3.0.1 to 3.0.3 in
 /for_developers/regression_test (#3454)

---
 for_developers/regression_test/requirements.txt | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/for_developers/regression_test/requirements.txt b/for_developers/regression_test/requirements.txt
index 0dcbb2a2245..5bb88aac9f4 100644
--- a/for_developers/regression_test/requirements.txt
+++ b/for_developers/regression_test/requirements.txt
@@ -1051,9 +1051,9 @@ websocket-client==1.7.0 \
     --hash=sha256:10e511ea3a8c744631d3bd77e61eb17ed09304c413ad42cf6ddfa4c7787e8fe6 \
     --hash=sha256:f4c3d22fec12a2461427a29957ff07d35098ee2d976d3ba244e688b8b4057588
     # via docker
-werkzeug==3.0.1 \
-    --hash=sha256:507e811ecea72b18a404947aded4b3390e1db8f826b494d76550ef45bb3b1dcc \
-    --hash=sha256:90a285dc0e42ad56b34e696398b8122ee4c681833fb35b8334a095d82c56da10
+werkzeug==3.0.3 \
+    --hash=sha256:097e5bfda9f0aba8da6b8545146def481d06aa7d3266e7448e2cccf67dd8bd18 \
+    --hash=sha256:fc9645dc43e03e4d630d23143a04a7f947a9a3b5727cd535fdfe155a17cc48c8
     # via flask
 zipp==3.17.0 \
     --hash=sha256:0e923e726174922dce09c53c59ad483ff7bbb8e572e00c7f7c46b88556409f31 \

From 308a0f372af3a1fb745b5366ed5a27b3ed752caa Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 7 May 2024 15:36:47 +0900
Subject: [PATCH 10/15] Bump jinja2 from 3.1.3 to 3.1.4 in
 /for_developers/regression_test (#3455)

---
 for_developers/regression_test/requirements.txt | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/for_developers/regression_test/requirements.txt b/for_developers/regression_test/requirements.txt
index 5bb88aac9f4..051c24a73f7 100644
--- a/for_developers/regression_test/requirements.txt
+++ b/for_developers/regression_test/requirements.txt
@@ -326,9 +326,9 @@ itsdangerous==2.1.2 \
     --hash=sha256:2c2349112351b88699d8d4b6b075022c0808887cb7ad10069318a8b0bc88db44 \
     --hash=sha256:5dbbc68b317e5e42f327f9021763545dc3fc3bfe22e6deb96aaf1fc38874156a
     # via flask
-jinja2==3.1.3 \
-    --hash=sha256:7d6d50dd97d52cbc355597bd845fabfbac3f551e1f99619e39a35ce8c370b5fa \
-    --hash=sha256:ac8bd6544d4bb2c9792bf3a159e80bba8fda7f07e81bc3aed565432d5925ba90
+jinja2==3.1.4 \
+    --hash=sha256:4a3aee7acbbe7303aede8e9648d13b8bf88a429282aa6122a993f0ac800cb369 \
+    --hash=sha256:bc5dd2abb727a5319567b7a813e6a2e7318c39f4f487cfe6c89c6f9c7d25197d
     # via
     #   flask
     #   mlflow

From 434697d465bfe1e87f8c501e91d596d078d6f7df Mon Sep 17 00:00:00 2001
From: Emily Chun <emily.chun@intel.com>
Date: Tue, 7 May 2024 15:56:44 +0900
Subject: [PATCH 11/15] Add try - except to keep running the remaining tests
 (#3448)

* Add try - except to keep running the remaining tests

* Add to raise aggregated errors
---
 tests/perf/benchmark.py | 228 ++++++++++++++++++++++------------------
 1 file changed, 123 insertions(+), 105 deletions(-)

diff --git a/tests/perf/benchmark.py b/tests/perf/benchmark.py
index 837982bd53d..d590ba993a2 100644
--- a/tests/perf/benchmark.py
+++ b/tests/perf/benchmark.py
@@ -21,6 +21,16 @@
 log = logging.getLogger(__name__)
 
 
+class AggregateError(Exception):
+    def __init__(self, errors):
+        error_messages = []
+        for seed, error in errors:
+            error_messages.append(f"Seed {seed}: {error}")
+        error_message = "\n".join(error_messages)
+
+        super().__init__(f"Exceptions occurred in the following seeds:\n{error_message}")
+
+
 class Benchmark:
     """Benchmark runner for OTX2.x.
 
@@ -151,122 +161,51 @@ def run(
         if self.num_repeat > 0:
             num_repeat = self.num_repeat  # Override by global setting
 
+        exceptions = []
         for seed in range(num_repeat):
-            sub_work_dir = work_dir / str(seed)
-            tags["seed"] = str(seed)
-
-            # Train & test
-            command = [
-                "otx",
-                "train",
-                "--config",
-                f"src/otx/recipe/{model.task}/{model.name}.yaml",
-                "--data_root",
-                str(data_root),
-                "--work_dir",
-                str(sub_work_dir),
-                "--engine.device",
-                self.accelerator,
-            ]
-            for key, value in dataset.extra_overrides.get("train", {}).items():
-                command.append(f"--{key}")
-                command.append(str(value))
-            command.extend(["--seed", str(seed)])
-            # TODO(someone): Disable deterministic for instance segmentation as it causes OOM.
-            # https://github.com/pytorch/vision/issues/8168#issuecomment-1890599205
-            command.extend(["--deterministic", str(self.deterministic)])
-            if self.num_epoch > 0:
-                command.extend(["--max_epochs", str(self.num_epoch)])
-            start_time = time()
-            self._run_command(command)
-            extra_metrics = {"train/e2e_time": time() - start_time}
-            self._rename_raw_data(
-                work_dir=sub_work_dir / ".latest" / "train",
-                replaces={"train_": "train/", "{pre}": "train/"},
-            )
-            self._log_metrics(
-                work_dir=sub_work_dir / ".latest" / "train",
-                tags=tags,
-                criteria=criteria,
-                extra_metrics=extra_metrics,
-            )
-
-            command = [
-                "otx",
-                "test",
-                "--work_dir",
-                str(sub_work_dir),
-            ]
-            for key, value in dataset.extra_overrides.get("test", {}).items():
-                command.append(f"--{key}")
-                command.append(str(value))
-            self._run_command(command)
-            self._rename_raw_data(
-                work_dir=sub_work_dir / ".latest" / "test",
-                replaces={"test_": "test/", "{pre}": "test/"},
-            )
-            self._log_metrics(work_dir=sub_work_dir / ".latest" / "test", tags=tags, criteria=criteria)
-
-            # Export & test
-            if self.eval_upto in ["export", "optimize"]:
-                command = [
-                    "otx",
-                    "export",
-                    "--work_dir",
-                    str(sub_work_dir),
-                ]
-                for key, value in dataset.extra_overrides.get("export", {}).items():
-                    command.append(f"--{key}")
-                    command.append(str(value))
-                self._run_command(command)
-
-                exported_model_path = sub_work_dir / ".latest" / "export" / "exported_model.xml"
-                if not exported_model_path.exists():
-                    exported_model_path = sub_work_dir / ".latest" / "export" / "exported_model_decoder.xml"
+            try:
+                sub_work_dir = work_dir / str(seed)
+                tags["seed"] = str(seed)
 
-                command = [  # NOTE: not working for h_label_cls. to be fixed
+                # Train & test
+                command = [
                     "otx",
-                    "test",
-                    "--checkpoint",
-                    str(exported_model_path),
+                    "train",
+                    "--config",
+                    f"src/otx/recipe/{model.task}/{model.name}.yaml",
+                    "--data_root",
+                    str(data_root),
                     "--work_dir",
                     str(sub_work_dir),
+                    "--engine.device",
+                    self.accelerator,
                 ]
-                for key, value in dataset.extra_overrides.get("test", {}).items():
+                for key, value in dataset.extra_overrides.get("train", {}).items():
                     command.append(f"--{key}")
                     command.append(str(value))
+                command.extend(["--seed", str(seed)])
+                # TODO(someone): Disable deterministic for instance segmentation as it causes OOM.
+                # https://github.com/pytorch/vision/issues/8168#issuecomment-1890599205
+                command.extend(["--deterministic", str(self.deterministic)])
+                if self.num_epoch > 0:
+                    command.extend(["--max_epochs", str(self.num_epoch)])
+                start_time = time()
                 self._run_command(command)
-
+                extra_metrics = {"train/e2e_time": time() - start_time}
                 self._rename_raw_data(
-                    work_dir=sub_work_dir / ".latest" / "test",
-                    replaces={"test": "export", "{pre}": "export/"},
+                    work_dir=sub_work_dir / ".latest" / "train",
+                    replaces={"train_": "train/", "{pre}": "train/"},
+                )
+                self._log_metrics(
+                    work_dir=sub_work_dir / ".latest" / "train",
+                    tags=tags,
+                    criteria=criteria,
+                    extra_metrics=extra_metrics,
                 )
-                self._log_metrics(work_dir=sub_work_dir / ".latest" / "test", tags=tags, criteria=criteria)
-
-            # Optimize & test
-            if self.eval_upto == "optimize":
-                command = [
-                    "otx",
-                    "optimize",
-                    "--checkpoint",
-                    str(exported_model_path),
-                    "--work_dir",
-                    str(sub_work_dir),
-                ]
-                for key, value in dataset.extra_overrides.get("optimize", {}).items():
-                    command.append(f"--{key}")
-                    command.append(str(value))
-                self._run_command(command)
-
-                optimized_model_path = sub_work_dir / ".latest" / "optimize" / "optimized_model.xml"
-                if not optimized_model_path.exists():
-                    optimized_model_path = sub_work_dir / ".latest" / "optimize" / "optimized_model_decoder.xml"
 
                 command = [
                     "otx",
                     "test",
-                    "--checkpoint",
-                    str(optimized_model_path),
                     "--work_dir",
                     str(sub_work_dir),
                 ]
@@ -274,15 +213,94 @@ def run(
                     command.append(f"--{key}")
                     command.append(str(value))
                 self._run_command(command)
-
                 self._rename_raw_data(
                     work_dir=sub_work_dir / ".latest" / "test",
-                    replaces={"test": "optimize", "{pre}": "optimize/"},
+                    replaces={"test_": "test/", "{pre}": "test/"},
                 )
                 self._log_metrics(work_dir=sub_work_dir / ".latest" / "test", tags=tags, criteria=criteria)
 
-            # Force memory clean up
-            gc.collect()
+                # Export & test
+                if self.eval_upto in ["export", "optimize"]:
+                    command = [
+                        "otx",
+                        "export",
+                        "--work_dir",
+                        str(sub_work_dir),
+                    ]
+                    for key, value in dataset.extra_overrides.get("export", {}).items():
+                        command.append(f"--{key}")
+                        command.append(str(value))
+                    self._run_command(command)
+
+                    exported_model_path = sub_work_dir / ".latest" / "export" / "exported_model.xml"
+                    if not exported_model_path.exists():
+                        exported_model_path = sub_work_dir / ".latest" / "export" / "exported_model_decoder.xml"
+
+                    command = [  # NOTE: not working for h_label_cls. to be fixed
+                        "otx",
+                        "test",
+                        "--checkpoint",
+                        str(exported_model_path),
+                        "--work_dir",
+                        str(sub_work_dir),
+                    ]
+                    for key, value in dataset.extra_overrides.get("test", {}).items():
+                        command.append(f"--{key}")
+                        command.append(str(value))
+                    self._run_command(command)
+
+                    self._rename_raw_data(
+                        work_dir=sub_work_dir / ".latest" / "test",
+                        replaces={"test": "export", "{pre}": "export/"},
+                    )
+                    self._log_metrics(work_dir=sub_work_dir / ".latest" / "test", tags=tags, criteria=criteria)
+
+                # Optimize & test
+                if self.eval_upto == "optimize":
+                    command = [
+                        "otx",
+                        "optimize",
+                        "--checkpoint",
+                        str(exported_model_path),
+                        "--work_dir",
+                        str(sub_work_dir),
+                    ]
+                    for key, value in dataset.extra_overrides.get("optimize", {}).items():
+                        command.append(f"--{key}")
+                        command.append(str(value))
+                    self._run_command(command)
+
+                    optimized_model_path = sub_work_dir / ".latest" / "optimize" / "optimized_model.xml"
+                    if not optimized_model_path.exists():
+                        optimized_model_path = sub_work_dir / ".latest" / "optimize" / "optimized_model_decoder.xml"
+
+                    command = [
+                        "otx",
+                        "test",
+                        "--checkpoint",
+                        str(optimized_model_path),
+                        "--work_dir",
+                        str(sub_work_dir),
+                    ]
+                    for key, value in dataset.extra_overrides.get("test", {}).items():
+                        command.append(f"--{key}")
+                        command.append(str(value))
+                    self._run_command(command)
+
+                    self._rename_raw_data(
+                        work_dir=sub_work_dir / ".latest" / "test",
+                        replaces={"test": "optimize", "{pre}": "optimize/"},
+                    )
+                    self._log_metrics(work_dir=sub_work_dir / ".latest" / "test", tags=tags, criteria=criteria)
+
+                # Force memory clean up
+                gc.collect()
+            except Exception as e:  # noqa: PERF203
+                exceptions.append((seed, str(e)))
+
+        if exceptions:
+            # Raise the custom exception with all collected errors
+            raise AggregateError(exceptions)
 
         result = self.load_result(work_dir)
         if result is None:

From 1c5da52501bc9f111419cdc2f4bebd0ff8df85eb Mon Sep 17 00:00:00 2001
From: Evgeny Tsykunov <evgeny.tsykunov@intel.com>
Date: Wed, 8 May 2024 01:50:13 +0200
Subject: [PATCH 12/15] Fix e2e xai tests (#3440)

* Fix data type

* Disable mrcnn in otx_explain

* revert mrcnn for test_otx_explain_e2e_cli
---
 tests/e2e/cli/test_cli.py | 23 ++++++++++++++---------
 1 file changed, 14 insertions(+), 9 deletions(-)

diff --git a/tests/e2e/cli/test_cli.py b/tests/e2e/cli/test_cli.py
index 480a984362f..cbe69b07cee 100644
--- a/tests/e2e/cli/test_cli.py
+++ b/tests/e2e/cli/test_cli.py
@@ -284,6 +284,11 @@ def test_otx_explain_e2e_cli(
     if ("_cls" not in task) and (task not in ["detection", "instance_segmentation"]):
         pytest.skip("Supported only for classification, detection and instance segmentation task.")
 
+    deterministic = "True"
+    if task == "instance_segmentation":
+        # Determinism is not required for this test for instance_segmentation models.
+        deterministic = "False"
+
     if "dino" in model_name:
         pytest.skip("DINO is not supported.")
 
@@ -303,7 +308,7 @@ def test_otx_explain_e2e_cli(
         "--seed",
         "0",
         "--deterministic",
-        "True",
+        deterministic,
         "--dump",
         "True",
         *fxt_cli_override_command_per_task[task],
@@ -326,29 +331,29 @@ def test_otx_explain_e2e_cli(
     reference_sal_vals = {
         # Classification
         "multi_label_cls_efficientnet_v2_light": (
-            np.array([201, 209, 196, 158, 157, 119, 77], dtype=np.uint8),
+            np.array([201, 209, 196, 158, 157, 119, 77], dtype=np.int16),
             "American_Crow_0031_25433_class_0_saliency_map.png",
         ),
         "h_label_cls_efficientnet_v2_light": (
-            np.array([102, 141, 134, 79, 66, 92, 84], dtype=np.uint8),
+            np.array([102, 141, 134, 79, 66, 92, 84], dtype=np.int16),
             "108_class_4_saliency_map.png",
         ),
         # Detection
         "detection_yolox_tiny": (
-            np.array([182, 194, 187, 179, 188, 206, 215, 207, 177, 130], dtype=np.uint8),
+            np.array([182, 194, 187, 179, 188, 206, 215, 207, 177, 130], dtype=np.int16),
             "img_371_jpg_rf_a893e0bdc6fda0ba1b2a7f07d56cec23_class_0_saliency_map.png",
         ),
         "detection_ssd_mobilenetv2": (
-            np.array([118, 188, 241, 213, 160, 120, 86, 94, 111, 138], dtype=np.uint8),
+            np.array([113, 139, 211, 190, 135, 91, 70, 103, 102, 89], dtype=np.int16),
             "img_371_jpg_rf_a893e0bdc6fda0ba1b2a7f07d56cec23_class_0_saliency_map.png",
         ),
         "detection_atss_mobilenetv2": (
-            np.array([29, 39, 55, 69, 80, 88, 92, 86, 100, 88], dtype=np.uint8),
+            np.array([60, 95, 128, 107, 86, 111, 127, 125, 117, 116], dtype=np.int16),
             "img_371_jpg_rf_a893e0bdc6fda0ba1b2a7f07d56cec23_class_0_saliency_map.png",
         ),
         # Instance Segmentation
         "instance_segmentation_maskrcnn_efficientnetb2b": (
-            np.array([5, 5, 5, 5, 5, 5, 5, 5, 5, 5], dtype=np.uint8),
+            np.array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0], dtype=np.int16),
             "CDY_2018_class_0_saliency_map.png",
         ),
     }
@@ -357,9 +362,9 @@ def test_otx_explain_e2e_cli(
         actual_sal_vals = cv2.imread(str(latest_dir / "saliency_map" / reference_sal_vals[test_case_name][1]))
         if test_case_name == "instance_segmentation_maskrcnn_efficientnetb2b":
             # Take lower corner values due to map sparsity of InstSeg
-            actual_sal_vals = (actual_sal_vals[-10:, -1, 0]).astype(np.uint16)
+            actual_sal_vals = (actual_sal_vals[-10:, -1, 0]).astype(np.int16)
         else:
-            actual_sal_vals = (actual_sal_vals[:10, 0, 0]).astype(np.uint16)
+            actual_sal_vals = (actual_sal_vals[:10, 0, 0]).astype(np.int16)
         ref_sal_vals = reference_sal_vals[test_case_name][0]
         assert np.max(np.abs(actual_sal_vals - ref_sal_vals) <= sal_diff_thresh)
 

From 3a5327a6bd9b726d100c6bdbc06af1caf5902e49 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Thu, 9 May 2024 10:28:31 +0900
Subject: [PATCH 13/15] Bump actions/upload-artifact from 4.3.2 to 4.3.3
 (#3423)

---
 .github/workflows/code_scan.yaml      | 4 ++--
 .github/workflows/daily.yaml          | 2 +-
 .github/workflows/perf_benchmark.yaml | 4 ++--
 .github/workflows/publish.yaml        | 4 ++--
 .github/workflows/scorecard.yaml      | 2 +-
 5 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/.github/workflows/code_scan.yaml b/.github/workflows/code_scan.yaml
index 6f7ae66d3b4..12beaaa0c97 100644
--- a/.github/workflows/code_scan.yaml
+++ b/.github/workflows/code_scan.yaml
@@ -35,7 +35,7 @@ jobs:
           scan-ref: .
           scanners: vuln,secret
       - name: Upload Trivy results artifact
-        uses: actions/upload-artifact@1746f4ab65b179e0ea60a494b83293b640dd5bba # v4.3.2
+        uses: actions/upload-artifact@65462800fd760344b1a7b4382951275a0abb4808 # v4.3.3
         with:
           name: trivy-results
           path: "${{ github.workspace }}/trivy-results.csv"
@@ -53,7 +53,7 @@ jobs:
       - name: Bandit Scanning
         run: tox -e bandit-scan
       - name: Upload Bandit artifact
-        uses: actions/upload-artifact@1746f4ab65b179e0ea60a494b83293b640dd5bba # v4.3.2
+        uses: actions/upload-artifact@65462800fd760344b1a7b4382951275a0abb4808 # v4.3.3
         with:
           name: bandit-report
           path: .tox/bandit-report.txt
diff --git a/.github/workflows/daily.yaml b/.github/workflows/daily.yaml
index 0e19a2d89c1..9d6056fbdcd 100644
--- a/.github/workflows/daily.yaml
+++ b/.github/workflows/daily.yaml
@@ -40,7 +40,7 @@ jobs:
       - name: Run E2E Test
         run: tox -vv -e e2e-test-${{ matrix.task }}
       - name: Upload test results
-        uses: actions/upload-artifact@1746f4ab65b179e0ea60a494b83293b640dd5bba # v4.3.2
+        uses: actions/upload-artifact@65462800fd760344b1a7b4382951275a0abb4808 # v4.3.3
         with:
           name: ${{ matrix.task }}-py310
           path: .tox/e2e-test-${{ matrix.task }}.csv
diff --git a/.github/workflows/perf_benchmark.yaml b/.github/workflows/perf_benchmark.yaml
index 3d28f82d568..c3f659af1e3 100644
--- a/.github/workflows/perf_benchmark.yaml
+++ b/.github/workflows/perf_benchmark.yaml
@@ -146,7 +146,7 @@ jobs:
           --user-name ${{ github.triggering_actor }}
           --otx-ref ${{ inputs.otx-ref }}
       - name: Upload test results
-        uses: actions/upload-artifact@1746f4ab65b179e0ea60a494b83293b640dd5bba # v4.3.2
+        uses: actions/upload-artifact@65462800fd760344b1a7b4382951275a0abb4808 # v4.3.3
         with:
           name: perf-benchmark-${{ matrix.task-short }}
           path: .tox/perf-benchmark-*.*
@@ -177,7 +177,7 @@ jobs:
           python tests/perf/history/summary.py tests/perf/history ./perf-benchmark-summary --pattern "*raw*.csv" --normalize
           jupyter nbconvert --execute --to html --no-input tests/perf/history/summary.ipynb --output-dir ./perf-benchmark-summary --output perf-benchmark-summary
       - name: Upload benchmark summary
-        uses: actions/upload-artifact@1746f4ab65b179e0ea60a494b83293b640dd5bba # v4.3.2
+        uses: actions/upload-artifact@65462800fd760344b1a7b4382951275a0abb4808 # v4.3.3
         with:
           name: perf-benchmark-summary
           path: perf-benchmark-summary
diff --git a/.github/workflows/publish.yaml b/.github/workflows/publish.yaml
index 3d9a66b99cf..03acf19faf1 100644
--- a/.github/workflows/publish.yaml
+++ b/.github/workflows/publish.yaml
@@ -17,7 +17,7 @@ jobs:
         uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
       - name: Build wheels
         uses: pypa/cibuildwheel@0ecddd92b62987d7a2ae8911f4bb8ec9e2e4496a # v2.13.1
-      - uses: actions/upload-artifact@1746f4ab65b179e0ea60a494b83293b640dd5bba # v4.3.2
+      - uses: actions/upload-artifact@65462800fd760344b1a7b4382951275a0abb4808 # v4.3.3
         with:
           name: artifact-wheels
           path: ./wheelhouse/*.whl
@@ -36,7 +36,7 @@ jobs:
         run: python -m pip install --require-hashes --no-deps -r .ci/requirements/publish/requirements.txt
       - name: Build sdist
         run: python -m build --sdist
-      - uses: actions/upload-artifact@1746f4ab65b179e0ea60a494b83293b640dd5bba # v4.3.2
+      - uses: actions/upload-artifact@65462800fd760344b1a7b4382951275a0abb4808 # v4.3.3
         with:
           name: artifact-sdist
           path: dist/*.tar.gz
diff --git a/.github/workflows/scorecard.yaml b/.github/workflows/scorecard.yaml
index cd986e32295..224a4d6b0f1 100644
--- a/.github/workflows/scorecard.yaml
+++ b/.github/workflows/scorecard.yaml
@@ -59,7 +59,7 @@ jobs:
       # Upload the results as artifacts (optional). Commenting out will disable uploads of run results in SARIF
       # format to the repository Actions tab.
       - name: "Upload artifact"
-        uses: actions/upload-artifact@1746f4ab65b179e0ea60a494b83293b640dd5bba # v4.3.2
+        uses: actions/upload-artifact@65462800fd760344b1a7b4382951275a0abb4808 # v4.3.3
         with:
           name: SARIF file
           path: results.sarif

From 2a9f0be9e70a1b540bde0326a3d0ad72ef829333 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Thu, 9 May 2024 10:28:40 +0900
Subject: [PATCH 14/15] Bump tox from 4.4.5 to 4.14.2 (#3197)

---
 pyproject.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index 2856bb94b0d..5c2ceac5d2c 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -41,7 +41,7 @@ dependencies = [
 
 [project.optional-dependencies]
 dev = [
-    "tox==4.4.5",
+    "tox==4.14.2",
     "pre-commit==3.7.0",
     "pylint",
     "pytest",

From 250d6d13472ebdee60f58722898ec36805d34283 Mon Sep 17 00:00:00 2001
From: Harim Kang <harim.kang@intel.com>
Date: Mon, 13 May 2024 18:24:45 +0900
Subject: [PATCH 15/15] Fix conflicts between develop and 2.0.0 (#3490)

Fix conflict in develop with 2.0.0
---
 tests/e2e/cli/test_cli.py | 47 ++++++++++++++++++++++++++-------------
 1 file changed, 31 insertions(+), 16 deletions(-)

diff --git a/tests/e2e/cli/test_cli.py b/tests/e2e/cli/test_cli.py
index cbe69b07cee..2cc35b161ec 100644
--- a/tests/e2e/cli/test_cli.py
+++ b/tests/e2e/cli/test_cli.py
@@ -41,9 +41,15 @@ def test_otx_e2e_cli(
     Returns:
         None
     """
-    task = recipe.split("/")[-2]
+    task = recipe.split("/")[-2].upper()
     model_name = recipe.split("/")[-1].split(".")[0]
 
+    if task == OTXTaskType.INSTANCE_SEGMENTATION:
+        is_tiling = "tile" in recipe
+        dataset_path = fxt_target_dataset_per_task[task]["tiling" if is_tiling else "non_tiling"]
+    else:
+        dataset_path = fxt_target_dataset_per_task[task]
+
     if task == OTXTaskType.ACTION_DETECTION:
         pytest.xfail("Fix for action detection issue will be low priority. Refer to issue #3267.")
 
@@ -55,13 +61,13 @@ def test_otx_e2e_cli(
         "--config",
         recipe,
         "--data_root",
-        str(fxt_target_dataset_per_task[task]),
+        str(dataset_path),
         "--work_dir",
         str(tmp_path_train / "outputs"),
         "--engine.device",
         fxt_accelerator,
         "--max_epochs",
-        "1" if task in ("zero_shot_visual_prompting") else "2",
+        "1" if task in (OTXTaskType.ZERO_SHOT_VISUAL_PROMPTING) else "2",
         *fxt_cli_override_command_per_task[task],
     ]
 
@@ -94,7 +100,7 @@ def test_otx_e2e_cli(
         "--config",
         recipe,
         "--data_root",
-        str(fxt_target_dataset_per_task[task]),
+        str(dataset_path),
         "--work_dir",
         str(tmp_path_test / "outputs"),
         "--engine.device",
@@ -152,7 +158,7 @@ def test_otx_e2e_cli(
             "--config",
             recipe,
             "--data_root",
-            str(fxt_target_dataset_per_task[task]),
+            str(dataset_path),
             "--work_dir",
             str(tmp_path_test / "outputs" / fmt),
             *overrides,
@@ -190,7 +196,7 @@ def test_otx_e2e_cli(
         "--config",
         recipe,
         "--data_root",
-        str(fxt_target_dataset_per_task[task]),
+        str(dataset_path),
         "--work_dir",
         str(tmp_path_test / "outputs"),
         "--engine.device",
@@ -230,7 +236,7 @@ def test_otx_e2e_cli(
             "--config",
             recipe,
             "--data_root",
-            str(fxt_target_dataset_per_task[task]),
+            str(dataset_path),
             "--work_dir",
             str(tmp_path_test / "outputs" / fmt),
             *fxt_cli_override_command_per_task[task],
@@ -278,16 +284,20 @@ def test_otx_explain_e2e_cli(
     """
     import cv2
 
-    task = recipe.split("/")[-2]
+    task = recipe.split("/")[-2].upper()
     model_name = recipe.split("/")[-1].split(".")[0]
 
-    if ("_cls" not in task) and (task not in ["detection", "instance_segmentation"]):
+    if ("_cls" not in task) and (task not in [OTXTaskType.DETECTION, OTXTaskType.INSTANCE_SEGMENTATION]):
         pytest.skip("Supported only for classification, detection and instance segmentation task.")
 
     deterministic = "True"
-    if task == "instance_segmentation":
+    if task == OTXTaskType.INSTANCE_SEGMENTATION:
         # Determinism is not required for this test for instance_segmentation models.
         deterministic = "False"
+        is_tiling = "tile" in recipe
+        dataset_path = fxt_target_dataset_per_task[task]["tiling" if is_tiling else "non_tiling"]
+    else:
+        dataset_path = fxt_target_dataset_per_task[task]
 
     if "dino" in model_name:
         pytest.skip("DINO is not supported.")
@@ -300,7 +310,7 @@ def test_otx_explain_e2e_cli(
         "--config",
         recipe,
         "--data_root",
-        str(fxt_target_dataset_per_task[task]),
+        str(dataset_path),
         "--work_dir",
         str(tmp_path_explain / "outputs"),
         "--engine.device",
@@ -388,6 +398,7 @@ def test_otx_hpo_e2e_cli(
     Returns:
         None
     """
+    task = task.upper()
     if task not in DEFAULT_CONFIG_PER_TASK:
         pytest.skip(f"Task {task} is not supported in the auto-configuration.")
     if task == OTXTaskType.ZERO_SHOT_VISUAL_PROMPTING:
@@ -404,8 +415,12 @@ def test_otx_hpo_e2e_cli(
     }:
         model_cfg = ["--config", str(DEFAULT_CONFIG_PER_TASK[task].parent / "stfpm.yaml")]
 
-    task = task.lower()
-    tmp_path_hpo = tmp_path / f"otx_hpo_{task}"
+    if task == OTXTaskType.INSTANCE_SEGMENTATION:
+        dataset_path = fxt_target_dataset_per_task[task]["non_tiling"]
+    else:
+        dataset_path = fxt_target_dataset_per_task[task]
+
+    tmp_path_hpo = tmp_path / f"otx_hpo_{task.lower()}"
     tmp_path_hpo.mkdir(parents=True)
 
     command_cfg = [
@@ -413,15 +428,15 @@ def test_otx_hpo_e2e_cli(
         "train",
         *model_cfg,
         "--task",
-        task.upper(),
+        task,
         "--data_root",
-        str(fxt_target_dataset_per_task[task]),
+        str(dataset_path),
         "--work_dir",
         str(tmp_path_hpo),
         "--engine.device",
         fxt_accelerator,
         "--max_epochs",
-        "1" if task in ("zero_shot_visual_prompting") else "2",
+        "1" if task in (OTXTaskType.ZERO_SHOT_VISUAL_PROMPTING) else "2",
         "--run_hpo",
         "true",
         "--hpo_config.expected_time_ratio",