From 9d5818a6178fbbf0d1c88bf495997fae49919ff9 Mon Sep 17 00:00:00 2001
From: "Kim, Vinnam" <vinnam.kim@intel.com>
Date: Thu, 11 Jan 2024 13:40:51 +0900
Subject: [PATCH 1/8] Add export() and register_explain_hook() to OTXModel

Signed-off-by: Kim, Vinnam <vinnam.kim@intel.com>
---
 src/otx/core/model/entity/base.py | 48 +++++++++++++++++++++++++++++++
 src/otx/core/model/module/base.py | 12 ++++++++
 src/otx/core/types/export.py      | 16 +++++++++++
 3 files changed, 76 insertions(+)
 create mode 100644 src/otx/core/types/export.py

diff --git a/src/otx/core/model/entity/base.py b/src/otx/core/model/entity/base.py
index 19a8cff1a31..dea02ecb65e 100644
--- a/src/otx/core/model/entity/base.py
+++ b/src/otx/core/model/entity/base.py
@@ -15,8 +15,11 @@
     T_OTXBatchDataEntity,
     T_OTXBatchPredEntity,
 )
+from otx.core.types.export import OTXExportFormat
 
 if TYPE_CHECKING:
+    from pathlib import Path
+
     import torch
 
 
@@ -116,3 +119,48 @@ def map_class_names(src_classes: list[str], dst_classes: list[str]) -> list[int]
             else:
                 src2dst.append(-1)
         return src2dst
+
+    def export(self, output_dir: Path, export_format: OTXExportFormat) -> None:
+        """Export this model to the specified output directory.
+
+        Args:
+            output_dir: Directory path to save exported binary files.
+            export_format: Format in which this `OTXModel` is exported.
+        """
+        if export_format == OTXExportFormat.OPENVINO:
+            self._export_to_openvino(output_dir)
+        if export_format == OTXExportFormat.ONNX:
+            self._export_to_onnx()
+        if export_format == OTXExportFormat.EXPORTABLE_CODE:
+            self._export_to_exportable_code()
+
+    def _export_to_openvino(self, output_dir: Path) -> None:
+        """Export to OpenVINO Intermediate Representation format.
+
+        Args:
+            output_dir: Directory path to save exported binary files
+        """
+        raise NotImplementedError
+
+    def _export_to_onnx(self) -> None:
+        """Export to ONNX format.
+
+        Args:
+            output_dir: Directory path to save exported binary files
+        """
+        raise NotImplementedError
+
+    def _export_to_exportable_code(self) -> None:
+        """Export to exportable code format.
+
+        Args:
+            output_dir: Directory path to save exported binary files
+        """
+        raise NotImplementedError
+
+    def register_explain_hook(self) -> None:
+        """Register explain hook.
+
+        TBD
+        """
+        raise NotImplementedError
diff --git a/src/otx/core/model/module/base.py b/src/otx/core/model/module/base.py
index 2305ed2715a..f1a85943705 100644
--- a/src/otx/core/model/module/base.py
+++ b/src/otx/core/model/module/base.py
@@ -14,8 +14,11 @@
 
 from otx.core.data.entity.base import OTXBatchDataEntity
 from otx.core.model.entity.base import OTXModel
+from otx.core.types.export import OTXExportFormat
 
 if TYPE_CHECKING:
+    from pathlib import Path
+
     from otx.core.data.dataset.base import DataMetaInfo
 
 
@@ -172,3 +175,12 @@ def meta_info(self) -> DataMetaInfo:
     @meta_info.setter
     def meta_info(self, meta_info: DataMetaInfo) -> None:
         self._meta_info = meta_info
+
+    def export(self, output_dir: Path, export_format: OTXExportFormat) -> None:
+        """Export the member `OTXModel` of this module to the specified output directory.
+
+        Args:
+            output_dir: Directory path to save exported binary files.
+            export_format: Format in which this `OTXModel` is exported.
+        """
+        self.model.export(output_dir, export_format)
diff --git a/src/otx/core/types/export.py b/src/otx/core/types/export.py
new file mode 100644
index 00000000000..923c36601c1
--- /dev/null
+++ b/src/otx/core/types/export.py
@@ -0,0 +1,16 @@
+# Copyright (C) 2023 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+#
+"""OTX export type definition."""
+
+from __future__ import annotations
+
+from enum import Enum
+
+
+class OTXExportFormat(str, Enum):
+    """OTX export type definition."""
+
+    OPENVINO = "OPENVINO"
+    ONNX = "ONNX"
+    EXPORTABLE_CODE = "EXPORTABLE_CODE"

From bc6e7fcb47f4c2af54de4b3c1a103d6a0f633fab Mon Sep 17 00:00:00 2001
From: "Kim, Vinnam" <vinnam.kim@intel.com>
Date: Thu, 11 Jan 2024 15:48:19 +0900
Subject: [PATCH 2/8] Add num_classes to OTXModel and revisit DataMetaInfo

Signed-off-by: Kim, Vinnam <vinnam.kim@intel.com>
---
 src/otx/algo/classification/otx_dino_v2.py    |  4 +-
 src/otx/config/model/default.yaml             |  3 ++
 src/otx/config/model/mmdet_inst_seg.yaml      |  3 ++
 src/otx/config/model/mmseg.yaml               |  1 +
 src/otx/core/data/dataset/base.py             | 26 ++++++++---
 src/otx/core/data/dataset/classification.py   |  6 +--
 src/otx/core/data/module.py                   |  6 +--
 .../model/entity/action_classification.py     |  6 ++-
 src/otx/core/model/entity/action_detection.py |  6 ++-
 src/otx/core/model/entity/base.py             | 46 ++++++++++++++++++-
 src/otx/core/model/entity/classification.py   | 21 +++++----
 src/otx/core/model/entity/detection.py        | 10 ++--
 .../model/entity/instance_segmentation.py     | 11 +++--
 src/otx/core/model/entity/segmentation.py     | 11 +++--
 src/otx/core/model/module/base.py             | 24 +++++-----
 src/otx/core/model/module/classification.py   |  8 ++--
 src/otx/core/utils/build.py                   |  2 +-
 src/otx/core/utils/config.py                  | 35 ++++++++++++--
 src/otx/recipe/detection/openvino_model.yaml  |  1 +
 .../instance_segmentation/openvino_model.yaml |  1 +
 .../openvino_model.yaml                       |  1 +
 .../recipe/segmentation/openvino_model.yaml   |  1 +
 tests/integration/cli/test_cli.py             | 17 ++++---
 tests/regression/test_regression.py           |  7 +--
 tests/unit/core/model/entity/test_base.py     |  4 +-
 .../core/model/entity/test_segmentation.py    |  4 +-
 26 files changed, 183 insertions(+), 82 deletions(-)

diff --git a/src/otx/algo/classification/otx_dino_v2.py b/src/otx/algo/classification/otx_dino_v2.py
index be00231560a..e6c37c43844 100644
--- a/src/otx/algo/classification/otx_dino_v2.py
+++ b/src/otx/algo/classification/otx_dino_v2.py
@@ -65,9 +65,9 @@ def forward(self, imgs: torch.Tensor, labels: torch.Tensor = None) -> torch.Tens
 class DINOv2RegisterClassifier(OTXMulticlassClsModel):
     """DINO-v2 Classification Model with register."""
 
-    def __init__(self, config: DictConfig) -> None:
+    def __init__(self, num_classes: int, config: DictConfig) -> None:
         self.config = config
-        super().__init__()  # create the model
+        super().__init__(num_classes=num_classes)  # create the model
 
     def _create_model(self) -> nn.Module:
         """Create the model."""
diff --git a/src/otx/config/model/default.yaml b/src/otx/config/model/default.yaml
index 6c54cd1e680..c938721f066 100644
--- a/src/otx/config/model/default.yaml
+++ b/src/otx/config/model/default.yaml
@@ -10,3 +10,6 @@ scheduler:
   mode: min
   factor: 0.1
   patience: 10
+
+otx_model:
+  num_classes: ???
diff --git a/src/otx/config/model/mmdet_inst_seg.yaml b/src/otx/config/model/mmdet_inst_seg.yaml
index a4a1c9fd4d2..716e1555e39 100644
--- a/src/otx/config/model/mmdet_inst_seg.yaml
+++ b/src/otx/config/model/mmdet_inst_seg.yaml
@@ -1,3 +1,6 @@
+defaults:
+  - default
+
 _target_: otx.core.model.module.instance_segmentation.OTXInstanceSegLitModule
 
 optimizer:
diff --git a/src/otx/config/model/mmseg.yaml b/src/otx/config/model/mmseg.yaml
index 72a859130f0..7072fee469e 100644
--- a/src/otx/config/model/mmseg.yaml
+++ b/src/otx/config/model/mmseg.yaml
@@ -14,6 +14,7 @@ scheduler:
 otx_model:
   _target_: otx.core.model.entity.segmentation.MMSegCompatibleModel
   config: ???
+  num_classes: ???
 
 # compile model for faster training with pytorch 2.0
 torch_compile: false
diff --git a/src/otx/core/data/dataset/base.py b/src/otx/core/data/dataset/base.py
index caa4e24b1ba..21981e5d305 100644
--- a/src/otx/core/data/dataset/base.py
+++ b/src/otx/core/data/dataset/base.py
@@ -30,15 +30,27 @@
 
 
 @dataclass
-class DataMetaInfo:
-    """Meta information of each subset datasets."""
+class LabelInfo:
+    """Object to represent label information."""
 
-    class_names: list[str]
+    label_names: list[str]
 
     @property
     def num_classes(self) -> int:
-        """Return number of classes."""
-        return len(self.class_names)
+        """Return number of labels."""
+        return len(self.label_names)
+
+    @classmethod
+    def from_num_classes(cls, num_classes: int) -> LabelInfo:
+        """Create this object from the number of classes.
+
+        Args:
+            num_classes: Number of classes
+
+        Returns:
+            LabelInfo(label_names=["label_0", ...])
+        """
+        return LabelInfo(label_names=[f"label_{idx}" for idx in range(num_classes)])
 
 
 class OTXDataset(Dataset, Generic[T_OTXDataEntity]):
@@ -59,8 +71,8 @@ def __init__(
         self.mem_cache_img_max_size = mem_cache_img_max_size
         self.max_refetch = max_refetch
 
-        self.meta_info = DataMetaInfo(
-            class_names=[category.name for category in self.dm_subset.categories()[AnnotationType.label]],
+        self.meta_info = LabelInfo(
+            label_names=[category.name for category in self.dm_subset.categories()[AnnotationType.label]],
         )
 
     def __len__(self) -> int:
diff --git a/src/otx/core/data/dataset/classification.py b/src/otx/core/data/dataset/classification.py
index 5b996b36c72..0e6d98d3168 100644
--- a/src/otx/core/data/dataset/classification.py
+++ b/src/otx/core/data/dataset/classification.py
@@ -13,7 +13,7 @@
 from datumaro.components.annotation import AnnotationType
 from torch.nn import functional
 
-from otx.core.data.dataset.base import DataMetaInfo, OTXDataset
+from otx.core.data.dataset.base import LabelInfo, OTXDataset
 from otx.core.data.entity.base import ImageInfo
 from otx.core.data.entity.classification import (
     HlabelClsBatchDataEntity,
@@ -27,7 +27,7 @@
 
 
 @dataclass
-class HLabelMetaInfo(DataMetaInfo):
+class HLabelMetaInfo(LabelInfo):
     """Meta information of hlabel classification."""
 
     hlabel_info: HLabelInfo
@@ -110,7 +110,7 @@ def __init__(self, **kwargs) -> None:
 
         # Hlabel classification used HLabelMetaInfo to insert the HLabelInfo.
         self.meta_info = HLabelMetaInfo(
-            class_names=[category.name for category in self.dm_categories],
+            label_names=[category.name for category in self.dm_categories],
             hlabel_info=HLabelInfo.from_dm_label_groups(self.dm_categories),
         )
 
diff --git a/src/otx/core/data/module.py b/src/otx/core/data/module.py
index 98b599f826d..69b24771d7a 100644
--- a/src/otx/core/data/module.py
+++ b/src/otx/core/data/module.py
@@ -12,7 +12,7 @@
 from omegaconf import DictConfig, OmegaConf
 from torch.utils.data import DataLoader
 
-from otx.core.data.dataset.base import DataMetaInfo
+from otx.core.data.dataset.base import LabelInfo
 from otx.core.data.factory import OTXDatasetFactory
 from otx.core.data.mem_cache import (
     MemCacheHandlerSingleton,
@@ -68,7 +68,7 @@ def __init__(
             mem_size=mem_size,
         )
 
-        meta_infos: list[DataMetaInfo] = []
+        meta_infos: list[LabelInfo] = []
         for name, dm_subset in dataset.subsets().items():
             if name not in config_mapping:
                 log.warning(f"{name} is not available. Skip it")
@@ -91,7 +91,7 @@ def __init__(
 
         self.meta_info = next(iter(meta_infos))
 
-    def _is_meta_info_valid(self, meta_infos: list[DataMetaInfo]) -> bool:
+    def _is_meta_info_valid(self, meta_infos: list[LabelInfo]) -> bool:
         """Check whether there are mismatches in the metainfo for the all subsets."""
         if all(meta_info == meta_infos[0] for meta_info in meta_infos):
             return True
diff --git a/src/otx/core/model/entity/action_classification.py b/src/otx/core/model/entity/action_classification.py
index 7ac5ac016bb..048c690ea7c 100644
--- a/src/otx/core/model/entity/action_classification.py
+++ b/src/otx/core/model/entity/action_classification.py
@@ -14,6 +14,7 @@
 from otx.core.data.entity.base import OTXBatchLossEntity
 from otx.core.model.entity.base import OTXModel
 from otx.core.utils.build import build_mm_model, get_classification_layers
+from otx.core.utils.config import inplace_num_classes
 
 if TYPE_CHECKING:
     from omegaconf import DictConfig
@@ -32,10 +33,11 @@ class MMActionCompatibleModel(OTXActionClsModel):
     compatible for OTX pipelines.
     """
 
-    def __init__(self, config: DictConfig) -> None:
+    def __init__(self, num_classes: int, config: DictConfig) -> None:
+        config = inplace_num_classes(cfg=config, num_classes=num_classes)
         self.config = config
         self.load_from = config.pop("load_from", None)
-        super().__init__()
+        super().__init__(num_classes=num_classes)
 
     def _create_model(self) -> nn.Module:
         from mmaction.models.data_preprocessors import (
diff --git a/src/otx/core/model/entity/action_detection.py b/src/otx/core/model/entity/action_detection.py
index 67cb7272e22..f980b0dfd56 100644
--- a/src/otx/core/model/entity/action_detection.py
+++ b/src/otx/core/model/entity/action_detection.py
@@ -13,6 +13,7 @@
 from otx.core.data.entity.base import OTXBatchLossEntity
 from otx.core.model.entity.base import OTXModel
 from otx.core.utils.build import build_mm_model, get_classification_layers
+from otx.core.utils.config import inplace_num_classes
 
 if TYPE_CHECKING:
     from omegaconf import DictConfig
@@ -31,10 +32,11 @@ class MMActionCompatibleModel(OTXActionDetModel):
     compatible for OTX pipelines.
     """
 
-    def __init__(self, config: DictConfig) -> None:
+    def __init__(self, num_classes: int, config: DictConfig) -> None:
+        config = inplace_num_classes(cfg=config, num_classes=num_classes)
         self.config = config
         self.load_from = config.pop("load_from", None)
-        super().__init__()
+        super().__init__(num_classes=num_classes)
 
     def _create_model(self) -> nn.Module:
         from mmaction.models.data_preprocessors import (
diff --git a/src/otx/core/model/entity/base.py b/src/otx/core/model/entity/base.py
index dea02ecb65e..05a855f22e7 100644
--- a/src/otx/core/model/entity/base.py
+++ b/src/otx/core/model/entity/base.py
@@ -5,11 +5,13 @@
 
 from __future__ import annotations
 
+import warnings
 from abc import abstractmethod
 from typing import TYPE_CHECKING, Any, Generic
 
 from torch import nn
 
+from otx.core.data.dataset.base import LabelInfo
 from otx.core.data.entity.base import (
     OTXBatchLossEntity,
     T_OTXBatchDataEntity,
@@ -24,13 +26,45 @@
 
 
 class OTXModel(nn.Module, Generic[T_OTXBatchDataEntity, T_OTXBatchPredEntity]):
-    """Base class for the models used in OTX."""
+    """Base class for the models used in OTX.
 
-    def __init__(self) -> None:
+    Args:
+        num_classes: Number of classes this model can predict.
+    """
+
+    def __init__(self, num_classes: int) -> None:
         super().__init__()
+
+        self._label_info = LabelInfo.from_num_classes(num_classes)
         self.classification_layers: dict[str, dict[str, Any]] = {}
         self.model = self._create_model()
 
+    @property
+    def label_info(self) -> LabelInfo:
+        """Get this model label information."""
+        return self._label_info
+
+    @label_info.setter
+    def label_info(self, label_info: LabelInfo | list[str]) -> None:
+        """Set this model label information."""
+        if isinstance(label_info, list):
+            label_info = LabelInfo(label_names=label_info)
+
+        old_num_classes = self._label_info.num_classes
+        new_num_classes = label_info.num_classes
+
+        if old_num_classes != new_num_classes:
+            msg = (
+                f"Given LabelInfo has the different number of classes "
+                f"({old_num_classes}!={new_num_classes}). "
+                "The model prediction layer is reset to the new number of classes "
+                f"(={new_num_classes})."
+            )
+            warnings.warn(msg, stacklevel=0)
+            self._reset_prediction_layer(num_classes=label_info.num_classes)
+
+        self._label_info = label_info
+
     @abstractmethod
     def _create_model(self) -> nn.Module:
         """Create a PyTorch model for this class."""
@@ -164,3 +198,11 @@ def register_explain_hook(self) -> None:
         TBD
         """
         raise NotImplementedError
+
+    def _reset_prediction_layer(self, num_classes: int) -> None:
+        """Reset its prediction layer with a given number of classes.
+
+        Args:
+            num_classes: Number of classes
+        """
+        raise NotImplementedError
diff --git a/src/otx/core/model/entity/classification.py b/src/otx/core/model/entity/classification.py
index 86448f3d2b9..8a4cac402e5 100644
--- a/src/otx/core/model/entity/classification.py
+++ b/src/otx/core/model/entity/classification.py
@@ -21,6 +21,7 @@
 )
 from otx.core.model.entity.base import OTXModel
 from otx.core.utils.build import build_mm_model, get_classification_layers
+from otx.core.utils.config import inplace_num_classes
 
 if TYPE_CHECKING:
     from mmpretrain.models.utils import ClsDataPreprocessor
@@ -63,10 +64,11 @@ class MMPretrainMulticlassClsModel(OTXMulticlassClsModel):
     compatible for OTX pipelines.
     """
 
-    def __init__(self, config: DictConfig) -> None:
+    def __init__(self, num_classes: int, config: DictConfig) -> None:
+        config = inplace_num_classes(cfg=config, num_classes=num_classes)
         self.config = config
         self.load_from = config.pop("load_from", None)
-        super().__init__()
+        super().__init__(num_classes=num_classes)
 
     def _create_model(self) -> nn.Module:
         model, classification_layers = _create_mmpretrain_model(self.config, self.load_from)
@@ -155,10 +157,11 @@ class MMPretrainMultilabelClsModel(OTXMultilabelClsModel):
     compatible for OTX pipelines.
     """
 
-    def __init__(self, config: DictConfig) -> None:
+    def __init__(self, num_classes: int, config: DictConfig) -> None:
+        config = inplace_num_classes(cfg=config, num_classes=num_classes)
         self.config = config
         self.load_from = config.pop("load_from", None)
-        super().__init__()
+        super().__init__(num_classes=num_classes)
 
     def _create_model(self) -> nn.Module:
         model, classification_layers = _create_mmpretrain_model(self.config, self.load_from)
@@ -241,10 +244,11 @@ class MMPretrainHlabelClsModel(OTXHlabelClsModel):
     compatible for OTX pipelines.
     """
 
-    def __init__(self, config: DictConfig) -> None:
+    def __init__(self, num_classes: int, config: DictConfig) -> None:
+        config = inplace_num_classes(cfg=config, num_classes=num_classes)
         self.config = config
         self.load_from = config.pop("load_from", None)
-        super().__init__()
+        super().__init__(num_classes=num_classes)
 
     def _create_model(self) -> nn.Module:
         model, classification_layers = _create_mmpretrain_model(self.config, self.load_from)
@@ -322,10 +326,11 @@ class OVClassificationCompatibleModel(OTXMulticlassClsModel):
     and create the OTX classification model compatible for OTX testing pipeline.
     """
 
-    def __init__(self, config: DictConfig) -> None:
+    def __init__(self, num_classes: int, config: DictConfig) -> None:
         self.model_name = config.pop("model_name")
+        config = inplace_num_classes(cfg=config, num_classes=num_classes)
         self.config = config
-        super().__init__()
+        super().__init__(num_classes=num_classes)
 
     def _create_model(self) -> nn.Module:
         from openvino.model_api.models import ClassificationModel
diff --git a/src/otx/core/model/entity/detection.py b/src/otx/core/model/entity/detection.py
index 4b51af8eb07..64857355208 100644
--- a/src/otx/core/model/entity/detection.py
+++ b/src/otx/core/model/entity/detection.py
@@ -16,6 +16,7 @@
 from otx.core.data.entity.detection import DetBatchDataEntity, DetBatchPredEntity
 from otx.core.model.entity.base import OTXModel
 from otx.core.utils.build import build_mm_model, get_classification_layers
+from otx.core.utils.config import inplace_num_classes
 
 if TYPE_CHECKING:
     from mmdet.models.data_preprocessors import DetDataPreprocessor
@@ -35,10 +36,11 @@ class MMDetCompatibleModel(OTXDetectionModel):
     compatible for OTX pipelines.
     """
 
-    def __init__(self, config: DictConfig) -> None:
+    def __init__(self, num_classes: int, config: DictConfig) -> None:
+        config = inplace_num_classes(cfg=config, num_classes=num_classes)
         self.config = config
         self.load_from = config.pop("load_from", None)
-        super().__init__()
+        super().__init__(num_classes=num_classes)
 
     def _create_model(self) -> nn.Module:
         from mmdet.models.data_preprocessors import (
@@ -154,10 +156,10 @@ class OVDetectionCompatibleModel(OTXDetectionModel):
     and create the OTX detection model compatible for OTX testing pipeline.
     """
 
-    def __init__(self, config: DictConfig) -> None:
+    def __init__(self, num_classes: int, config: DictConfig) -> None:
         self.model_name = config.pop("model_name")
         self.config = config
-        super().__init__()
+        super().__init__(num_classes=num_classes)
 
     def _create_model(self) -> nn.Module:
         from openvino.model_api.models import DetectionModel
diff --git a/src/otx/core/model/entity/instance_segmentation.py b/src/otx/core/model/entity/instance_segmentation.py
index 10fcc7f69a9..e51513dddf0 100644
--- a/src/otx/core/model/entity/instance_segmentation.py
+++ b/src/otx/core/model/entity/instance_segmentation.py
@@ -18,6 +18,7 @@
 )
 from otx.core.model.entity.base import OTXModel
 from otx.core.utils.build import build_mm_model, get_classification_layers
+from otx.core.utils.config import inplace_num_classes
 
 if TYPE_CHECKING:
     from mmdet.models.data_preprocessors import DetDataPreprocessor
@@ -34,10 +35,11 @@ class OTXInstanceSegModel(
 class MMDetInstanceSegCompatibleModel(OTXInstanceSegModel):
     """Instance Segmentation model compatible for MMDet."""
 
-    def __init__(self, config: DictConfig) -> None:
+    def __init__(self, num_classes: int, config: DictConfig) -> None:
+        config = inplace_num_classes(cfg=config, num_classes=num_classes)
         self.config = config
         self.load_from = self.config.pop("load_from", None)
-        super().__init__()
+        super().__init__(num_classes=num_classes)
 
     def _create_model(self) -> nn.Module:
         from mmdet.models.data_preprocessors import (
@@ -176,11 +178,12 @@ class OVInstanceSegCompatibleModel(OTXInstanceSegModel):
     and create the OTX detection model compatible for OTX testing pipeline.
     """
 
-    def __init__(self, config: DictConfig) -> None:
+    def __init__(self, num_classes: int, config: DictConfig) -> None:
         self.model_name = config.pop("model_name")
         self.model_type = config.pop("model_type")
+        config = inplace_num_classes(cfg=config, num_classes=num_classes)
         self.config = config
-        super().__init__()
+        super().__init__(num_classes=num_classes)
 
     def _create_model(self) -> nn.Module:
         from openvino.model_api.models import Model
diff --git a/src/otx/core/model/entity/segmentation.py b/src/otx/core/model/entity/segmentation.py
index 467cc3bde66..bbef77170d5 100644
--- a/src/otx/core/model/entity/segmentation.py
+++ b/src/otx/core/model/entity/segmentation.py
@@ -14,6 +14,7 @@
 from otx.core.data.entity.segmentation import SegBatchDataEntity, SegBatchPredEntity
 from otx.core.model.entity.base import OTXModel
 from otx.core.utils.build import build_mm_model, get_classification_layers
+from otx.core.utils.config import inplace_num_classes
 
 if TYPE_CHECKING:
     from mmseg.models.data_preprocessor import SegDataPreProcessor
@@ -33,10 +34,11 @@ class MMSegCompatibleModel(OTXSegmentationModel):
     compatible for OTX pipelines.
     """
 
-    def __init__(self, config: DictConfig) -> None:
+    def __init__(self, num_classes: int, config: DictConfig) -> None:
+        config = inplace_num_classes(cfg=config, num_classes=num_classes)
         self.config = config
         self.load_from = self.config.pop("load_from", None)
-        super().__init__()
+        super().__init__(num_classes=num_classes)
 
     def _create_model(self) -> nn.Module:
         from mmengine.registry import MODELS as MMENGINE_MODELS
@@ -130,10 +132,11 @@ class OVSegmentationCompatibleModel(OTXSegmentationModel):
     and create the OTX segmentation model compatible for OTX testing pipeline.
     """
 
-    def __init__(self, config: DictConfig) -> None:
+    def __init__(self, num_classes: int, config: DictConfig) -> None:
         self.model_name = config.pop("model_name")
+        config = inplace_num_classes(cfg=config, num_classes=num_classes)
         self.config = config
-        super().__init__()
+        super().__init__(num_classes=num_classes)
 
     def _create_model(self) -> nn.Module:
         from openvino.model_api.models import SegmentationModel
diff --git a/src/otx/core/model/module/base.py b/src/otx/core/model/module/base.py
index f1a85943705..cde6566f065 100644
--- a/src/otx/core/model/module/base.py
+++ b/src/otx/core/model/module/base.py
@@ -19,7 +19,7 @@
 if TYPE_CHECKING:
     from pathlib import Path
 
-    from otx.core.data.dataset.base import DataMetaInfo
+    from otx.core.data.dataset.base import LabelInfo
 
 
 class OTXLitModule(LightningModule):
@@ -151,10 +151,10 @@ def load_state_dict(self, state_dict: dict[str, Any], *args, **kwargs) -> None:
             logger = logging.getLogger()
             logger.info(
                 f"Data classes from checkpoint: {ckpt_meta_info.class_names} -> "
-                f"Data classes from training data: {self.meta_info.class_names}",
+                f"Data classes from training data: {self.meta_info.label_names}",
             )
             self.register_load_state_dict_pre_hook(
-                self.meta_info.class_names,
+                self.meta_info.label_names,
                 ckpt_meta_info.class_names,
             )
         return super().load_state_dict(state_dict, *args, **kwargs)
@@ -165,16 +165,14 @@ def lr_scheduler_monitor_key(self) -> str:
         return "val/loss"
 
     @property
-    def meta_info(self) -> DataMetaInfo:
-        """Meta information of OTXLitModule."""
-        if self._meta_info is None:
-            err_msg = "meta_info is referenced before assignment"
-            raise ValueError(err_msg)
-        return self._meta_info
-
-    @meta_info.setter
-    def meta_info(self, meta_info: DataMetaInfo) -> None:
-        self._meta_info = meta_info
+    def label_info(self) -> LabelInfo:
+        """Get the member `OTXModel` label information."""
+        return self.model.label_info
+
+    @label_info.setter
+    def label_info(self, label_info: LabelInfo | list[str]) -> None:
+        """Set the member `OTXModel` label information."""
+        self.model.label_info = label_info  # type: ignore[assignment]
 
     def export(self, output_dir: Path, export_format: OTXExportFormat) -> None:
         """Export the member `OTXModel` of this module to the specified output directory.
diff --git a/src/otx/core/model/module/classification.py b/src/otx/core/model/module/classification.py
index b444654ee92..97a22cf5189 100644
--- a/src/otx/core/model/module/classification.py
+++ b/src/otx/core/model/module/classification.py
@@ -25,7 +25,7 @@
 from otx.core.model.module.base import OTXLitModule
 
 if TYPE_CHECKING:
-    from otx.core.data.dataset.base import DataMetaInfo
+    from otx.core.data.dataset.base import LabelInfo
 
 
 class OTXMulticlassClsLitModule(OTXLitModule):
@@ -220,7 +220,7 @@ def _set_hlabel_setup(self) -> None:
         self.model.model.head.set_hlabel_info(self.hlabel_info)
 
         # Set the OTXHlabelClsLitModule params.
-        self.num_labels = len(self.meta_info.class_names)
+        self.num_labels = len(self.meta_info.label_names)
         self.num_multiclass_heads = self.hlabel_info.num_multiclass_heads
         self.num_multilabel_classes = self.hlabel_info.num_multilabel_classes
         self.num_singlelabel_classes = self.num_labels - self.num_multilabel_classes
@@ -310,7 +310,7 @@ def lr_scheduler_monitor_key(self) -> str:
         return "train/loss"
 
     @property
-    def meta_info(self) -> DataMetaInfo:
+    def meta_info(self) -> LabelInfo:
         """Meta information of OTXLitModule."""
         if self._meta_info is None:
             err_msg = "meta_info is referenced before assignment"
@@ -318,6 +318,6 @@ def meta_info(self) -> DataMetaInfo:
         return self._meta_info
 
     @meta_info.setter
-    def meta_info(self, meta_info: DataMetaInfo) -> None:
+    def meta_info(self, meta_info: LabelInfo) -> None:
         self._meta_info = meta_info
         self._set_hlabel_setup()
diff --git a/src/otx/core/utils/build.py b/src/otx/core/utils/build.py
index 3dcba680693..49d8af4cb16 100644
--- a/src/otx/core/utils/build.py
+++ b/src/otx/core/utils/build.py
@@ -8,7 +8,6 @@
 from copy import deepcopy
 from typing import TYPE_CHECKING
 
-from mmengine.logging import MMLogger
 from omegaconf import DictConfig
 
 from otx.core.utils.config import convert_conf_to_mmconfig_dict
@@ -20,6 +19,7 @@
 
 def build_mm_model(config: DictConfig, model_registry: Registry, load_from: str | None = None) -> nn.Module:
     """Build a model by using the registry."""
+    from mmengine.logging import MMLogger
     from mmengine.runner import load_checkpoint
 
     from otx import algo  # noqa: F401
diff --git a/src/otx/core/utils/config.py b/src/otx/core/utils/config.py
index 9cf44b95115..18a64e0caa3 100644
--- a/src/otx/core/utils/config.py
+++ b/src/otx/core/utils/config.py
@@ -6,13 +6,12 @@
 from __future__ import annotations
 
 from numbers import Number
-from typing import TYPE_CHECKING, Literal
+from typing import TYPE_CHECKING, Any, Literal
 
-from mmengine.config import Config as MMConfig
-from omegaconf import OmegaConf
+from omegaconf import DictConfig, ListConfig, OmegaConf
 
 if TYPE_CHECKING:
-    from omegaconf import DictConfig
+    from mmengine.config import Config as MMConfig
 
 
 def to_tuple(dict_: dict) -> dict:
@@ -48,6 +47,8 @@ def convert_conf_to_mmconfig_dict(
     to: Literal["tuple", "list"] = "tuple",
 ) -> MMConfig:
     """Convert OTX format config object to MMEngine config object."""
+    from mmengine.config import Config as MMConfig
+
     dict_cfg = OmegaConf.to_container(cfg)
 
     if to == "tuple":
@@ -66,3 +67,29 @@ def mmconfig_dict_to_dict(obj: MMConfig | list[MMConfig]) -> list | dict:
         return {k: mmconfig_dict_to_dict(v) for k, v in obj.to_dict().items()}
 
     return obj
+
+
+def inplace_num_classes(
+    cfg: DictConfig | ListConfig | Any,  # noqa: ANN401
+    num_classes: int,
+) -> DictConfig | ListConfig | Any:  # noqa: ANN401
+    """Inplace the number of classes values in a given config object.
+
+    Args:
+        cfg: Config object to inplace the number of classes values
+        num_classes: Number of classes to inplace
+    Returns:
+        Inplaced config object
+    """
+    if isinstance(cfg, DictConfig):
+        for key in cfg:
+            if key == "num_classes" and isinstance(cfg[key], int):
+                cfg[key] = num_classes
+            else:
+                cfg[key] = inplace_num_classes(cfg[key], num_classes)
+
+    if isinstance(cfg, ListConfig):
+        for idx in range(len(cfg)):
+            cfg[idx] = inplace_num_classes(cfg[idx], num_classes)
+
+    return cfg
diff --git a/src/otx/recipe/detection/openvino_model.yaml b/src/otx/recipe/detection/openvino_model.yaml
index c55b0c2bb4e..72b79ad0984 100644
--- a/src/otx/recipe/detection/openvino_model.yaml
+++ b/src/otx/recipe/detection/openvino_model.yaml
@@ -13,3 +13,4 @@ model:
     _target_: otx.core.model.entity.detection.OVDetectionCompatibleModel
     config:
       model_name: ssd300
+    num_classes: 20
diff --git a/src/otx/recipe/instance_segmentation/openvino_model.yaml b/src/otx/recipe/instance_segmentation/openvino_model.yaml
index 5925b7c19b3..d766a4437a3 100644
--- a/src/otx/recipe/instance_segmentation/openvino_model.yaml
+++ b/src/otx/recipe/instance_segmentation/openvino_model.yaml
@@ -14,3 +14,4 @@ model:
     config:
       model_name: yolact-resnet50-fpn-pytorch
       model_type: YOLACT
+    num_classes: 80
diff --git a/src/otx/recipe/multiclass_classification/openvino_model.yaml b/src/otx/recipe/multiclass_classification/openvino_model.yaml
index 27e0862f214..727ef9e3f17 100644
--- a/src/otx/recipe/multiclass_classification/openvino_model.yaml
+++ b/src/otx/recipe/multiclass_classification/openvino_model.yaml
@@ -15,3 +15,4 @@ model:
       model_name: efficientnet-b0-pytorch
       head:
         num_classes: 1000
+    num_classes: 1000
diff --git a/src/otx/recipe/segmentation/openvino_model.yaml b/src/otx/recipe/segmentation/openvino_model.yaml
index d031a396761..5df6f080cd4 100644
--- a/src/otx/recipe/segmentation/openvino_model.yaml
+++ b/src/otx/recipe/segmentation/openvino_model.yaml
@@ -15,3 +15,4 @@ model:
       model_name: drn-d-38
       decode_head:
         num_classes: 19
+    num_classes: 19
diff --git a/tests/integration/cli/test_cli.py b/tests/integration/cli/test_cli.py
index 4d347138327..85167aae76c 100644
--- a/tests/integration/cli/test_cli.py
+++ b/tests/integration/cli/test_cli.py
@@ -22,46 +22,45 @@
     "multiclass_classification": {
         "data_dir": "tests/assets/classification_dataset",
         "overrides": [
-            "model.otx_model.config.head.num_classes=2",
+            "model.otx_model.num_classes=2",
         ],
     },
     "multilabel_classification": {
         "data_dir": "tests/assets/multilabel_classification",
         "overrides": [
-            "model.otx_model.config.head.num_classes=2",
+            "model.otx_model.num_classes=2",
         ],
     },
     "hlabel_classification": {
         "data_dir": "tests/assets/hlabel_classification",
         "overrides": [
-            "model.otx_model.config.head.num_classes=7",
+            "model.otx_model.num_classes=7",
             "model.otx_model.config.head.num_multiclass_heads=2",
             "model.otx_model.config.head.num_multilabel_classes=3",
         ],
     },
     "detection": {
         "data_dir": "tests/assets/car_tree_bug",
-        "overrides": ["model.otx_model.config.bbox_head.num_classes=3"],
+        "overrides": ["model.otx_model.num_classes=3"],
     },
     "instance_segmentation": {
         "data_dir": "tests/assets/car_tree_bug",
         "overrides": [
-            "model.otx_model.config.roi_head.bbox_head.num_classes=3",
-            "model.otx_model.config.roi_head.mask_head.num_classes=3",
+            "model.otx_model.num_classes=3",
         ],
     },
     "segmentation": {
         "data_dir": "tests/assets/common_semantic_segmentation_dataset/supervised",
-        "overrides": ["model.otx_model.config.decode_head.num_classes=2"],
+        "overrides": ["model.otx_model.num_classes=2"],
     },
     "action_classification": {
         "data_dir": "tests/assets/action_classification_dataset/",
-        "overrides": ["model.otx_model.config.cls_head.num_classes=2"],
+        "overrides": ["model.otx_model.num_classes=2"],
     },
     "action_detection": {
         "data_dir": "tests/assets/action_detection_dataset/",
         "overrides": [
-            "model.otx_model.config.roi_head.bbox_head.num_classes=5",
+            "model.otx_model.num_classes=5",
             "+model.otx_model.config.roi_head.bbox_head.topk=3",
         ],
     },
diff --git a/tests/regression/test_regression.py b/tests/regression/test_regression.py
index 020d5fbd8f5..ebcd234e05d 100644
--- a/tests/regression/test_regression.py
+++ b/tests/regression/test_regression.py
@@ -45,7 +45,6 @@ def _test_regression(
         fxt_num_repeat: int,
         fxt_accelerator: str,
         tmpdir: pytest.TempdirFactory,
-        head_name: str,
     ) -> None:
         for seed in range(fxt_num_repeat):
             test_case = RegressionTestCase(
@@ -70,7 +69,7 @@ def _test_regression(
             with mlflow.start_run(tags=tags, run_name=run_name):
                 overrides = [
                     f"+recipe={test_case.model.task}/{test_case.model.name}",
-                    f"model.otx_model.config.{head_name}.num_classes={test_case.dataset.num_classes}",
+                    f"model.otx_model.num_classes={test_case.dataset.num_classes}",
                     f"data.data_root={data_root}",
                     f"data.data_format={test_case.dataset.data_format}",
                     f"base.output_dir={test_case.output_dir}",
@@ -152,7 +151,6 @@ def test_regression(
             fxt_num_repeat=fxt_num_repeat,
             fxt_accelerator=fxt_accelerator,
             tmpdir=tmpdir,
-            head_name="head",
         )
 
 
@@ -219,7 +217,6 @@ def test_regression(
             fxt_num_repeat=fxt_num_repeat,
             fxt_accelerator=fxt_accelerator,
             tmpdir=tmpdir,
-            head_name="head",
         )
 
 
@@ -271,7 +268,6 @@ def test_regression(
             fxt_num_repeat=fxt_num_repeat,
             fxt_accelerator=fxt_accelerator,
             tmpdir=tmpdir,
-            head_name="head",
         )
 
 class TestObjectDetection(BaseTest):
@@ -338,5 +334,4 @@ def test_regression(
             fxt_tags=fxt_tags,
             fxt_num_repeat=fxt_num_repeat,
             tmpdir=tmpdir,
-            head_name="bbox_head",
         )
diff --git a/tests/unit/core/model/entity/test_base.py b/tests/unit/core/model/entity/test_base.py
index 4b126f9250c..85641ba629e 100644
--- a/tests/unit/core/model/entity/test_base.py
+++ b/tests/unit/core/model/entity/test_base.py
@@ -12,10 +12,10 @@ def __init__(self, num_classes):
 class TestOTXModel:
     def test_smart_weight_loading(self, mocker) -> None:
         mocker.patch.object(OTXModel, "_create_model", return_value=MockNNModule(2))
-        prev_model = OTXModel()
+        prev_model = OTXModel(num_classes=2)
 
         mocker.patch.object(OTXModel, "_create_model", return_value=MockNNModule(3))
-        current_model = OTXModel()
+        current_model = OTXModel(num_classes=3)
         current_model.classification_layers = ["model.head.weight", "model.head.bias"]
         current_model.classification_layers = {
             "model.head.weight": {"stride": 1, "num_extra_classes": 0},
diff --git a/tests/unit/core/model/entity/test_segmentation.py b/tests/unit/core/model/entity/test_segmentation.py
index cacfd0ec7b2..41293669c01 100644
--- a/tests/unit/core/model/entity/test_segmentation.py
+++ b/tests/unit/core/model/entity/test_segmentation.py
@@ -19,11 +19,11 @@
 class TestOTXSegmentationModel:
     @pytest.fixture()
     def config(self) -> DictConfig:
-        return OmegaConf.load("src/otx/recipe/segmentation/segnext_s.yaml")
+        return OmegaConf.load("src/otx/recipe/segmentation/segnext_s.yaml").model.otx_model.config
 
     @pytest.fixture()
     def model(self, config) -> MMSegCompatibleModel:
-        return MMSegCompatibleModel(config.model.otx_model.config)
+        return MMSegCompatibleModel(num_classes=1, config=config)
 
     def test_create_model(self, model) -> None:
         mmseg_model = model._create_model()

From 3e4e7df18be1ffc8e6089028f4b53506de92ea42 Mon Sep 17 00:00:00 2001
From: "Kim, Vinnam" <vinnam.kim@intel.com>
Date: Thu, 11 Jan 2024 15:56:34 +0900
Subject: [PATCH 3/8] Fix test error

Signed-off-by: Kim, Vinnam <vinnam.kim@intel.com>
---
 tests/integration/detection/test_model.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/integration/detection/test_model.py b/tests/integration/detection/test_model.py
index 60776f45a45..cd450d595ae 100644
--- a/tests/integration/detection/test_model.py
+++ b/tests/integration/detection/test_model.py
@@ -15,7 +15,7 @@ def fxt_rtmdet_tiny_model_config(self, fxt_rtmdet_tiny_config) -> DictConfig:
 
     @pytest.fixture()
     def fxt_model(self, fxt_rtmdet_tiny_model_config) -> MMDetCompatibleModel:
-        return MMDetCompatibleModel(config=fxt_rtmdet_tiny_model_config)
+        return MMDetCompatibleModel(num_classes=3, config=fxt_rtmdet_tiny_model_config)
 
     def test_forward_train(
         self,

From 1afea455fb92b16844a4b90c1d949856dd6a6ffc Mon Sep 17 00:00:00 2001
From: "Kim, Vinnam" <vinnam.kim@intel.com>
Date: Thu, 11 Jan 2024 17:56:25 +0900
Subject: [PATCH 4/8] Change action cls and det

Signed-off-by: Kim, Vinnam <vinnam.kim@intel.com>
---
 .../algo/action_classification/__init__.py    |  4 ++
 .../action_classification/mmconfigs/x3d.yaml  | 28 ++++++++++
 src/otx/algo/action_classification/x3d.py     | 15 +++++
 src/otx/algo/action_detection/__init__.py     |  4 ++
 .../mmconfigs/x3d_fastrcnn.yaml               | 51 +++++++++++++++++
 src/otx/algo/action_detection/template.py     | 16 ++++++
 src/otx/algo/action_detection/x3d_fastrcnn.py | 17 ++++++
 src/otx/algo/utils/mmconfig.py                | 31 +++++++++++
 .../config/model/mmaction_classification.yaml |  2 +-
 src/otx/config/model/mmaction_detection.yaml  |  2 +-
 src/otx/recipe/action_classification/x3d.yaml | 30 +---------
 .../recipe/action_detection/x3d_fastrcnn.yaml | 55 +------------------
 tests/integration/cli/test_cli.py             |  2 +-
 13 files changed, 173 insertions(+), 84 deletions(-)
 create mode 100644 src/otx/algo/action_classification/__init__.py
 create mode 100644 src/otx/algo/action_classification/mmconfigs/x3d.yaml
 create mode 100644 src/otx/algo/action_classification/x3d.py
 create mode 100644 src/otx/algo/action_detection/__init__.py
 create mode 100644 src/otx/algo/action_detection/mmconfigs/x3d_fastrcnn.yaml
 create mode 100644 src/otx/algo/action_detection/template.py
 create mode 100644 src/otx/algo/action_detection/x3d_fastrcnn.py
 create mode 100644 src/otx/algo/utils/mmconfig.py

diff --git a/src/otx/algo/action_classification/__init__.py b/src/otx/algo/action_classification/__init__.py
new file mode 100644
index 00000000000..2f4d6dc99d5
--- /dev/null
+++ b/src/otx/algo/action_classification/__init__.py
@@ -0,0 +1,4 @@
+# Copyright (C) 2023 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+#
+"""Module for OTX action classification models, hooks, utils, etc."""
diff --git a/src/otx/algo/action_classification/mmconfigs/x3d.yaml b/src/otx/algo/action_classification/mmconfigs/x3d.yaml
new file mode 100644
index 00000000000..aad0dcf8f8c
--- /dev/null
+++ b/src/otx/algo/action_classification/mmconfigs/x3d.yaml
@@ -0,0 +1,28 @@
+load_from: https://download.openmmlab.com/mmaction/recognition/x3d/facebook/x3d_m_facebook_16x5x1_kinetics400_rgb_20201027-3f42382a.pth
+backbone:
+  gamma_b: 2.25
+  gamma_d: 2.2
+  gamma_w: 1
+  type: X3D
+cls_head:
+  average_clips: prob
+  dropout_ratio: 0.5
+  fc1_bias: false
+  in_channels: 432
+  num_classes: 400
+  spatial_type: avg
+  type: X3DHead
+data_preprocessor:
+  format_shape: NCTHW
+  mean:
+    - 114.75
+    - 114.75
+    - 114.75
+  std:
+    - 57.38
+    - 57.38
+    - 57.38
+  type: ActionDataPreprocessor
+test_cfg: null
+train_cfg: null
+type: Recognizer3D
diff --git a/src/otx/algo/action_classification/x3d.py b/src/otx/algo/action_classification/x3d.py
new file mode 100644
index 00000000000..d7fbddc94ad
--- /dev/null
+++ b/src/otx/algo/action_classification/x3d.py
@@ -0,0 +1,15 @@
+# Copyright (C) 2023 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+#
+"""X3D model implementation."""
+
+from otx.algo.utils.mmconfig import read_mmconfig
+from otx.core.model.entity.action_classification import MMActionCompatibleModel
+
+
+class X3D(MMActionCompatibleModel):
+    """X3D Model."""
+
+    def __init__(self, num_classes: int) -> None:
+        config = read_mmconfig("x3d")
+        super().__init__(num_classes=num_classes, config=config)
diff --git a/src/otx/algo/action_detection/__init__.py b/src/otx/algo/action_detection/__init__.py
new file mode 100644
index 00000000000..4b58f3a3c7b
--- /dev/null
+++ b/src/otx/algo/action_detection/__init__.py
@@ -0,0 +1,4 @@
+# Copyright (C) 2023 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+#
+"""Module for OTX action detection models, hooks, utils, etc."""
diff --git a/src/otx/algo/action_detection/mmconfigs/x3d_fastrcnn.yaml b/src/otx/algo/action_detection/mmconfigs/x3d_fastrcnn.yaml
new file mode 100644
index 00000000000..a9e49c244c1
--- /dev/null
+++ b/src/otx/algo/action_detection/mmconfigs/x3d_fastrcnn.yaml
@@ -0,0 +1,51 @@
+type: FastRCNN
+_scope_: mmdet
+init_cfg:
+  type: Pretrained
+  checkpoint: https://download.openmmlab.com/mmaction/recognition/x3d/facebook/x3d_m_facebook_16x5x1_kinetics400_rgb_20201027-3f42382a.pth
+backbone:
+  type: mmaction.X3D
+  gamma_b: 2.25
+  gamma_d: 2.2
+  gamma_w: 1
+roi_head:
+  type: AVARoIHead
+  bbox_roi_extractor:
+    type: SingleRoIExtractor3D
+    roi_layer_type: RoIAlign
+    output_size: 8
+    with_temporal_pool: true
+  bbox_head:
+    type: BBoxHeadAVA
+    background_class: true
+    in_channels: 432
+    num_classes: 81
+    multilabel: false
+    dropout_ratio: 0.5
+data_preprocessor:
+  type: ActionDataPreprocessor
+  mean:
+    - 123.675
+    - 116.28
+    - 103.53
+  std:
+    - 58.395
+    - 57.12
+    - 57.375
+  format_shape: NCTHW
+train_cfg:
+  rcnn:
+    assigner:
+      type: MaxIoUAssignerAVA
+      pos_iou_thr: 0.9
+      neg_iou_thr: 0.9
+      min_pos_iou: 0.9
+    sampler:
+      type: RandomSampler
+      num: 32
+      pos_fraction: 1
+      neg_pos_ub: -1
+      add_gt_as_proposals: true
+    pos_weight: 1.0
+test_cfg:
+  rcnn: null
diff --git a/src/otx/algo/action_detection/template.py b/src/otx/algo/action_detection/template.py
new file mode 100644
index 00000000000..374800094d8
--- /dev/null
+++ b/src/otx/algo/action_detection/template.py
@@ -0,0 +1,16 @@
+# Copyright (C) 2023 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+#
+"""X3DFastRCNN model implementation."""
+
+from otx.core.model.entity.action_detection import MMActionCompatibleModel
+
+_MM_CONFIG = """
+
+"""
+
+
+class X3DFastRCNN(MMActionCompatibleModel):
+    """X3D Model."""
+
+    MM_CONFIG = _MM_CONFIG
diff --git a/src/otx/algo/action_detection/x3d_fastrcnn.py b/src/otx/algo/action_detection/x3d_fastrcnn.py
new file mode 100644
index 00000000000..af0f3402f78
--- /dev/null
+++ b/src/otx/algo/action_detection/x3d_fastrcnn.py
@@ -0,0 +1,17 @@
+# Copyright (C) 2023 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+#
+"""X3DFastRCNN model implementation."""
+from __future__ import annotations
+
+from otx.algo.utils.mmconfig import read_mmconfig
+from otx.core.model.entity.action_detection import MMActionCompatibleModel
+
+
+class X3DFastRCNN(MMActionCompatibleModel):
+    """X3D Model."""
+
+    def __init__(self, num_classes: int, topk: int | tuple[int]):
+        config = read_mmconfig("x3d_fastrcnn")
+        config.roi_head.bbox_head.topk = topk
+        super().__init__(num_classes=num_classes, config=config)
diff --git a/src/otx/algo/utils/mmconfig.py b/src/otx/algo/utils/mmconfig.py
new file mode 100644
index 00000000000..4860950f1f7
--- /dev/null
+++ b/src/otx/algo/utils/mmconfig.py
@@ -0,0 +1,31 @@
+# Copyright (C) 2023 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+#
+"""Utils used for MMConfigs."""
+
+import inspect
+from pathlib import Path
+
+from omegaconf import DictConfig, OmegaConf
+
+
+def read_mmconfig(model_name: str) -> DictConfig:
+    """Read MMConfig.
+
+    It try to read MMConfig from the yaml file which exists in
+    `<Directory path of __file__ who calls this function>/mmconfigs/<model_name>.yaml`
+    """
+    frame = inspect.stack()[1]
+    module = inspect.getmodule(frame[0])
+
+    if module is None or module.__file__ is None:
+        msg = "Cannot get valid model from stack"
+        raise RuntimeError(msg)
+
+    root_dir = Path().parent / "mmconfigs"
+    fpath = root_dir / f"{model_name}.yaml"
+
+    if not fpath.exists():
+        raise FileNotFoundError
+
+    return OmegaConf.load(fpath)
diff --git a/src/otx/config/model/mmaction_classification.yaml b/src/otx/config/model/mmaction_classification.yaml
index a5c1ca5a4e4..17b41f7784a 100644
--- a/src/otx/config/model/mmaction_classification.yaml
+++ b/src/otx/config/model/mmaction_classification.yaml
@@ -5,7 +5,7 @@ _target_: otx.core.model.module.action_classification.OTXActionClsLitModule
 
 otx_model:
   _target_: otx.core.model.entity.action_classification.MMActionCompatibleModel
-  config: ???
+  num_classes: ???
 
 # compile model for faster training with pytorch 2.0
 torch_compile: false
diff --git a/src/otx/config/model/mmaction_detection.yaml b/src/otx/config/model/mmaction_detection.yaml
index 217c56f5032..6a8fe289b78 100644
--- a/src/otx/config/model/mmaction_detection.yaml
+++ b/src/otx/config/model/mmaction_detection.yaml
@@ -5,7 +5,7 @@ _target_: otx.core.model.module.action_detection.OTXActionDetLitModule
 
 otx_model:
   _target_: otx.core.model.entity.action_detection.MMActionCompatibleModel
-  config: ???
+  num_classes: ???
 
 # compile model for faster training with pytorch 2.0
 torch_compile: false
diff --git a/src/otx/recipe/action_classification/x3d.yaml b/src/otx/recipe/action_classification/x3d.yaml
index a7e4b02e00e..46f575e12d7 100644
--- a/src/otx/recipe/action_classification/x3d.yaml
+++ b/src/otx/recipe/action_classification/x3d.yaml
@@ -78,35 +78,7 @@ data:
           - scale_factor
 model:
   otx_model:
-    config:
-      load_from: https://download.openmmlab.com/mmaction/recognition/x3d/facebook/x3d_m_facebook_16x5x1_kinetics400_rgb_20201027-3f42382a.pth
-      backbone:
-        gamma_b: 2.25
-        gamma_d: 2.2
-        gamma_w: 1
-        type: X3D
-      cls_head:
-        average_clips: prob
-        dropout_ratio: 0.5
-        fc1_bias: false
-        in_channels: 432
-        num_classes: 400
-        spatial_type: avg
-        type: X3DHead
-      data_preprocessor:
-        format_shape: NCTHW
-        mean:
-          - 114.75
-          - 114.75
-          - 114.75
-        std:
-          - 57.38
-          - 57.38
-          - 57.38
-        type: ActionDataPreprocessor
-      test_cfg: null
-      train_cfg: null
-      type: Recognizer3D
+    _target_: otx.algo.action_classification.x3d.X3D
   optimizer:
     _target_: torch.optim.AdamW
     lr: 0.001
diff --git a/src/otx/recipe/action_detection/x3d_fastrcnn.yaml b/src/otx/recipe/action_detection/x3d_fastrcnn.yaml
index 328d2a657a7..b602798ea7e 100644
--- a/src/otx/recipe/action_detection/x3d_fastrcnn.yaml
+++ b/src/otx/recipe/action_detection/x3d_fastrcnn.yaml
@@ -67,58 +67,9 @@ data:
       - type: PackActionInputs
 model:
   otx_model:
-    config:
-      type: FastRCNN
-      _scope_: mmdet
-      init_cfg:
-        type: Pretrained
-        checkpoint: https://download.openmmlab.com/mmaction/recognition/x3d/facebook/x3d_m_facebook_16x5x1_kinetics400_rgb_20201027-3f42382a.pth
-      backbone:
-        type: mmaction.X3D
-        gamma_b: 2.25
-        gamma_d: 2.2
-        gamma_w: 1
-      roi_head:
-        type: AVARoIHead
-        bbox_roi_extractor:
-          type: SingleRoIExtractor3D
-          roi_layer_type: RoIAlign
-          output_size: 8
-          with_temporal_pool: true
-        bbox_head:
-          type: BBoxHeadAVA
-          background_class: true
-          in_channels: 432
-          num_classes: 81
-          multilabel: false
-          dropout_ratio: 0.5
-      data_preprocessor:
-        type: ActionDataPreprocessor
-        mean:
-          - 123.675
-          - 116.28
-          - 103.53
-        std:
-          - 58.395
-          - 57.12
-          - 57.375
-        format_shape: NCTHW
-      train_cfg:
-        rcnn:
-          assigner:
-            type: MaxIoUAssignerAVA
-            pos_iou_thr: 0.9
-            neg_iou_thr: 0.9
-            min_pos_iou: 0.9
-          sampler:
-            type: RandomSampler
-            num: 32
-            pos_fraction: 1
-            neg_pos_ub: -1
-            add_gt_as_proposals: true
-          pos_weight: 1.0
-      test_cfg:
-        rcnn: null
+    _target_: otx.algo.action_detection.x3d_fastrcnn.X3DFastRCNN
+    topk: ???
+
   optimizer:
     _target_: torch.optim.SGD
     lr: 0.005
diff --git a/tests/integration/cli/test_cli.py b/tests/integration/cli/test_cli.py
index 85167aae76c..fa1c126678a 100644
--- a/tests/integration/cli/test_cli.py
+++ b/tests/integration/cli/test_cli.py
@@ -61,7 +61,7 @@
         "data_dir": "tests/assets/action_detection_dataset/",
         "overrides": [
             "model.otx_model.num_classes=5",
-            "+model.otx_model.config.roi_head.bbox_head.topk=3",
+            "model.otx_model.topk=3",
         ],
     },
 }

From 03345ba2ac2a2ad7081a366e56dc5370b525793a Mon Sep 17 00:00:00 2001
From: "Kim, Vinnam" <vinnam.kim@intel.com>
Date: Thu, 11 Jan 2024 19:30:58 +0900
Subject: [PATCH 5/8] Change classification models

Signed-off-by: Kim, Vinnam <vinnam.kim@intel.com>
---
 src/otx/algo/action_detection/template.py     | 16 -------
 src/otx/algo/classification/deit_tiny.py      | 37 ++++++++++++++++
 .../algo/classification/efficientnet_b0.py    | 38 +++++++++++++++++
 .../algo/classification/efficientnet_v2.py    | 38 +++++++++++++++++
 .../hlabel_classification/deit_tiny.yaml      | 40 ++++++++++++++++++
 .../efficientnet_b0_light.yaml                | 32 ++++++++++++++
 .../efficientnet_v2_light.yaml                | 33 +++++++++++++++
 .../mobilenet_v3_large_light.yaml             | 30 +++++++++++++
 .../multiclass_classification/deit_tiny.yaml  | 33 +++++++++++++++
 .../efficientnet_b0.yaml                      | 39 +++++++++++++++++
 .../efficientnet_b0_light.yaml                | 28 +++++++++++++
 .../efficientnet_v2.yaml                      | 38 +++++++++++++++++
 .../efficientnet_v2_light.yaml                | 27 ++++++++++++
 .../mobilenet_v3_large.yaml                   | 37 ++++++++++++++++
 .../mobilenet_v3_large_light.yaml             | 26 ++++++++++++
 .../multilabel_classification/deit_tiny.yaml  | 32 ++++++++++++++
 .../efficientnet_b0_light.yaml                | 29 +++++++++++++
 .../efficientnet_v2_light.yaml                | 28 +++++++++++++
 .../mobilenet_v3_large_light.yaml             | 30 +++++++++++++
 .../algo/classification/mobilenet_v3_large.py | 38 +++++++++++++++++
 src/otx/algo/utils/mmconfig.py                | 14 +++----
 src/otx/config/model/hlabel_mmpretrain.yaml   |  4 +-
 .../config/model/multiclass_mmpretrain.yaml   |  2 +-
 .../config/model/multilabel_mmpretrain.yaml   |  2 +-
 .../efficientnet_b0_light.yaml                | 34 +--------------
 .../efficientnet_v2_light.yaml                | 35 +---------------
 .../mobilenet_v3_large_light.yaml             | 32 +-------------
 .../hlabel_classification/otx_deit_tiny.yaml  | 42 +------------------
 .../efficientnet_b0_light.yaml                | 31 +-------------
 .../efficientnet_v2_light.yaml                | 30 +------------
 .../mobilenet_v3_large_light.yaml             | 29 +------------
 .../otx_deit_tiny.yaml                        | 35 +---------------
 .../otx_efficientnet_b0.yaml                  | 41 +-----------------
 .../otx_efficientnet_v2.yaml                  | 40 +-----------------
 .../otx_mobilenet_v3_large.yaml               | 39 +----------------
 .../efficientnet_b0_light.yaml                | 31 +-------------
 .../efficientnet_v2_light.yaml                | 30 +------------
 .../mobilenet_v3_large_light.yaml             | 32 +-------------
 .../otx_deit_tiny.yaml                        | 34 +--------------
 tests/integration/cli/test_cli.py             |  4 +-
 40 files changed, 665 insertions(+), 525 deletions(-)
 delete mode 100644 src/otx/algo/action_detection/template.py
 create mode 100644 src/otx/algo/classification/deit_tiny.py
 create mode 100644 src/otx/algo/classification/efficientnet_b0.py
 create mode 100644 src/otx/algo/classification/efficientnet_v2.py
 create mode 100644 src/otx/algo/classification/mmconfigs/hlabel_classification/deit_tiny.yaml
 create mode 100644 src/otx/algo/classification/mmconfigs/hlabel_classification/efficientnet_b0_light.yaml
 create mode 100644 src/otx/algo/classification/mmconfigs/hlabel_classification/efficientnet_v2_light.yaml
 create mode 100644 src/otx/algo/classification/mmconfigs/hlabel_classification/mobilenet_v3_large_light.yaml
 create mode 100644 src/otx/algo/classification/mmconfigs/multiclass_classification/deit_tiny.yaml
 create mode 100644 src/otx/algo/classification/mmconfigs/multiclass_classification/efficientnet_b0.yaml
 create mode 100644 src/otx/algo/classification/mmconfigs/multiclass_classification/efficientnet_b0_light.yaml
 create mode 100644 src/otx/algo/classification/mmconfigs/multiclass_classification/efficientnet_v2.yaml
 create mode 100644 src/otx/algo/classification/mmconfigs/multiclass_classification/efficientnet_v2_light.yaml
 create mode 100644 src/otx/algo/classification/mmconfigs/multiclass_classification/mobilenet_v3_large.yaml
 create mode 100644 src/otx/algo/classification/mmconfigs/multiclass_classification/mobilenet_v3_large_light.yaml
 create mode 100644 src/otx/algo/classification/mmconfigs/multilabel_classification/deit_tiny.yaml
 create mode 100644 src/otx/algo/classification/mmconfigs/multilabel_classification/efficientnet_b0_light.yaml
 create mode 100644 src/otx/algo/classification/mmconfigs/multilabel_classification/efficientnet_v2_light.yaml
 create mode 100644 src/otx/algo/classification/mmconfigs/multilabel_classification/mobilenet_v3_large_light.yaml
 create mode 100644 src/otx/algo/classification/mobilenet_v3_large.py

diff --git a/src/otx/algo/action_detection/template.py b/src/otx/algo/action_detection/template.py
deleted file mode 100644
index 374800094d8..00000000000
--- a/src/otx/algo/action_detection/template.py
+++ /dev/null
@@ -1,16 +0,0 @@
-# Copyright (C) 2023 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-#
-"""X3DFastRCNN model implementation."""
-
-from otx.core.model.entity.action_detection import MMActionCompatibleModel
-
-_MM_CONFIG = """
-
-"""
-
-
-class X3DFastRCNN(MMActionCompatibleModel):
-    """X3D Model."""
-
-    MM_CONFIG = _MM_CONFIG
diff --git a/src/otx/algo/classification/deit_tiny.py b/src/otx/algo/classification/deit_tiny.py
new file mode 100644
index 00000000000..7107b99e232
--- /dev/null
+++ b/src/otx/algo/classification/deit_tiny.py
@@ -0,0 +1,37 @@
+# Copyright (C) 2023 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+#
+"""DeitTiny model implementation."""
+
+from otx.algo.utils.mmconfig import read_mmconfig
+from otx.core.model.entity.classification import (
+    MMPretrainHlabelClsModel,
+    MMPretrainMulticlassClsModel,
+    MMPretrainMultilabelClsModel,
+)
+
+
+class DeitTinyForHLabelCls(MMPretrainHlabelClsModel):
+    """DeitTiny Model for hierarchical label classification task."""
+
+    def __init__(self, num_classes: int, num_multiclass_heads: int, num_multilabel_classes: int) -> None:
+        config = read_mmconfig(model_name="deit_tiny", subdir_name="hlabel_classification")
+        config.head.num_multiclass_heads = num_multiclass_heads
+        config.head.num_multilabel_classes = num_multilabel_classes
+        super().__init__(num_classes=num_classes, config=config)
+
+
+class DeitTinyForMulticlassCls(MMPretrainMulticlassClsModel):
+    """DeitTiny Model for multi-label classification task."""
+
+    def __init__(self, num_classes: int) -> None:
+        config = read_mmconfig("deit_tiny", subdir_name="multiclass_classification")
+        super().__init__(num_classes=num_classes, config=config)
+
+
+class DeitTinyForMultilabelCls(MMPretrainMultilabelClsModel):
+    """DeitTiny Model for multi-class classification task."""
+
+    def __init__(self, num_classes: int) -> None:
+        config = read_mmconfig("deit_tiny", subdir_name="multilabel_classification")
+        super().__init__(num_classes=num_classes, config=config)
diff --git a/src/otx/algo/classification/efficientnet_b0.py b/src/otx/algo/classification/efficientnet_b0.py
new file mode 100644
index 00000000000..3259d1e0921
--- /dev/null
+++ b/src/otx/algo/classification/efficientnet_b0.py
@@ -0,0 +1,38 @@
+# Copyright (C) 2023 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+#
+"""EfficientNetB0 model implementation."""
+
+from otx.algo.utils.mmconfig import read_mmconfig
+from otx.core.model.entity.classification import (
+    MMPretrainHlabelClsModel,
+    MMPretrainMulticlassClsModel,
+    MMPretrainMultilabelClsModel,
+)
+
+
+class EfficientNetB0ForHLabelCls(MMPretrainHlabelClsModel):
+    """EfficientNetB0 Model for hierarchical label classification task."""
+
+    def __init__(self, num_classes: int, num_multiclass_heads: int, num_multilabel_classes: int) -> None:
+        config = read_mmconfig(model_name="efficientnet_b0_light", subdir_name="hlabel_classification")
+        config.head.num_multiclass_heads = num_multiclass_heads
+        config.head.num_multilabel_classes = num_multilabel_classes
+        super().__init__(num_classes=num_classes, config=config)
+
+
+class EfficientNetB0ForMulticlassCls(MMPretrainMulticlassClsModel):
+    """EfficientNetB0 Model for multi-label classification task."""
+
+    def __init__(self, num_classes: int, light: bool = True) -> None:
+        model_name = "efficientnet_b0_light" if light else "otx_efficientnet_b0"
+        config = read_mmconfig(model_name=model_name, subdir_name="multiclass_classification")
+        super().__init__(num_classes=num_classes, config=config)
+
+
+class EfficientNetB0ForMultilabelCls(MMPretrainMultilabelClsModel):
+    """EfficientNetB0 Model for multi-class classification task."""
+
+    def __init__(self, num_classes: int) -> None:
+        config = read_mmconfig(model_name="efficientnet_b0_light", subdir_name="multilabel_classification")
+        super().__init__(num_classes=num_classes, config=config)
diff --git a/src/otx/algo/classification/efficientnet_v2.py b/src/otx/algo/classification/efficientnet_v2.py
new file mode 100644
index 00000000000..861278ebba3
--- /dev/null
+++ b/src/otx/algo/classification/efficientnet_v2.py
@@ -0,0 +1,38 @@
+# Copyright (C) 2023 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+#
+"""EfficientNetV2 model implementation."""
+
+from otx.algo.utils.mmconfig import read_mmconfig
+from otx.core.model.entity.classification import (
+    MMPretrainHlabelClsModel,
+    MMPretrainMulticlassClsModel,
+    MMPretrainMultilabelClsModel,
+)
+
+
+class EfficientNetV2ForHLabelCls(MMPretrainHlabelClsModel):
+    """EfficientNetV2 Model for hierarchical label classification task."""
+
+    def __init__(self, num_classes: int, num_multiclass_heads: int, num_multilabel_classes: int) -> None:
+        config = read_mmconfig("efficientnet_v2_light", subdir_name="hlabel_classification")
+        config.head.num_multiclass_heads = num_multiclass_heads
+        config.head.num_multilabel_classes = num_multilabel_classes
+        super().__init__(num_classes=num_classes, config=config)
+
+
+class EfficientNetV2ForMulticlassCls(MMPretrainMulticlassClsModel):
+    """EfficientNetV2 Model for multi-label classification task."""
+
+    def __init__(self, num_classes: int, light: bool = True) -> None:
+        model_name = "efficientnet_v2_light" if light else "otx_efficientnet_v2"
+        config = read_mmconfig(model_name=model_name, subdir_name="multiclass_classification")
+        super().__init__(num_classes=num_classes, config=config)
+
+
+class EfficientNetV2ForMultilabelCls(MMPretrainMultilabelClsModel):
+    """EfficientNetV2 Model for multi-class classification task."""
+
+    def __init__(self, num_classes: int) -> None:
+        config = read_mmconfig("efficientnet_v2_light", subdir_name="multilabel_classification")
+        super().__init__(num_classes=num_classes, config=config)
diff --git a/src/otx/algo/classification/mmconfigs/hlabel_classification/deit_tiny.yaml b/src/otx/algo/classification/mmconfigs/hlabel_classification/deit_tiny.yaml
new file mode 100644
index 00000000000..5e2585fc4ca
--- /dev/null
+++ b/src/otx/algo/classification/mmconfigs/hlabel_classification/deit_tiny.yaml
@@ -0,0 +1,40 @@
+load_from: https://download.openmmlab.com/mmclassification/v0/deit/deit-tiny_pt-4xb256_in1k_20220218-13b382a0.pth
+backbone:
+  arch: deit-tiny
+  type: VisionTransformer
+  img_size: 224
+  patch_size: 16
+head:
+  num_multiclass_heads: 0
+  num_multilabel_classes: 0
+  in_channels: 192
+  num_classes: 1000
+  multiclass_loss_cfg:
+    loss_weight: 1.0
+    type: CrossEntropyLoss
+  multilabel_loss_cfg:
+    reduction: sum
+    gamma_neg: 1.0
+    gamma_pos: 0.0
+    type: AsymmetricAngularLossWithIgnore
+  type: CustomHierarchicalClsHead
+data_preprocessor:
+  mean:
+    - 123.675
+    - 116.28
+    - 103.53
+  std:
+    - 58.395
+    - 57.12
+    - 57.375
+  to_rgb: False
+  type: ClsDataPreprocessor
+init_cfg:
+  - std: 0.2
+    layer: Linear
+    type: TruncNormal
+  - bias: 0.
+    val: 1.
+    layer: LayerNorm
+    type: Constant
+type: ImageClassifier
diff --git a/src/otx/algo/classification/mmconfigs/hlabel_classification/efficientnet_b0_light.yaml b/src/otx/algo/classification/mmconfigs/hlabel_classification/efficientnet_b0_light.yaml
new file mode 100644
index 00000000000..1f9e6b14ec8
--- /dev/null
+++ b/src/otx/algo/classification/mmconfigs/hlabel_classification/efficientnet_b0_light.yaml
@@ -0,0 +1,32 @@
+backbone:
+  version: b0
+  pretrained: true
+  type: OTXEfficientNet
+head:
+  num_multiclass_heads: 0
+  num_multilabel_classes: 0
+  in_channels: 1280
+  num_classes: 1000
+  multiclass_loss_cfg:
+    loss_weight: 1.0
+    type: CrossEntropyLoss
+  multilabel_loss_cfg:
+    reduction: sum
+    gamma_neg: 1.0
+    gamma_pos: 0.0
+    type: AsymmetricAngularLossWithIgnore
+  type: CustomHierarchicalClsHead
+neck:
+  type: GlobalAveragePooling
+data_preprocessor:
+  mean:
+    - 123.675
+    - 116.28
+    - 103.53
+  std:
+    - 58.395
+    - 57.12
+    - 57.375
+  to_rgb: False
+  type: ClsDataPreprocessor
+type: ImageClassifier
diff --git a/src/otx/algo/classification/mmconfigs/hlabel_classification/efficientnet_v2_light.yaml b/src/otx/algo/classification/mmconfigs/hlabel_classification/efficientnet_v2_light.yaml
new file mode 100644
index 00000000000..57e9ef6dd29
--- /dev/null
+++ b/src/otx/algo/classification/mmconfigs/hlabel_classification/efficientnet_v2_light.yaml
@@ -0,0 +1,33 @@
+backbone:
+  pretrained: true
+  type: OTXEfficientNetV2
+head:
+  num_multiclass_heads: 0
+  num_multilabel_classes: 0
+  in_channels: 1280
+  num_classes: 1000
+  multiclass_loss_cfg:
+    loss_weight: 1.0
+    type: CrossEntropyLoss
+  multilabel_loss_cfg:
+    reduction: sum
+    gamma_neg: 1.0
+    gamma_pos: 0.0
+    type: AsymmetricAngularLossWithIgnore
+  normalized: true
+  scale: 7.0
+  type: CustomHierarchicalClsHead
+neck:
+  type: GlobalAveragePooling
+data_preprocessor:
+  mean:
+    - 123.675
+    - 116.28
+    - 103.53
+  std:
+    - 58.395
+    - 57.12
+    - 57.375
+  to_rgb: False
+  type: ClsDataPreprocessor
+type: ImageClassifier
diff --git a/src/otx/algo/classification/mmconfigs/hlabel_classification/mobilenet_v3_large_light.yaml b/src/otx/algo/classification/mmconfigs/hlabel_classification/mobilenet_v3_large_light.yaml
new file mode 100644
index 00000000000..edac4f3c49c
--- /dev/null
+++ b/src/otx/algo/classification/mmconfigs/hlabel_classification/mobilenet_v3_large_light.yaml
@@ -0,0 +1,30 @@
+backbone:
+  type: OTXMobileNetV3
+head:
+  num_multiclass_heads: 0
+  num_multilabel_classes: 0
+  in_channels: 960
+  num_classes: 1000
+  multiclass_loss_cfg:
+    loss_weight: 1.0
+    type: CrossEntropyLoss
+  multilabel_loss_cfg:
+    reduction: sum
+    gamma_neg: 1.0
+    gamma_pos: 0.0
+    type: AsymmetricAngularLossWithIgnore
+  type: CustomHierarchicalClsHead
+neck:
+  type: GlobalAveragePooling
+data_preprocessor:
+  mean:
+    - 123.675
+    - 116.28
+    - 103.53
+  std:
+    - 58.395
+    - 57.12
+    - 57.375
+  to_rgb: False
+  type: ClsDataPreprocessor
+type: ImageClassifier
diff --git a/src/otx/algo/classification/mmconfigs/multiclass_classification/deit_tiny.yaml b/src/otx/algo/classification/mmconfigs/multiclass_classification/deit_tiny.yaml
new file mode 100644
index 00000000000..3832334c0d0
--- /dev/null
+++ b/src/otx/algo/classification/mmconfigs/multiclass_classification/deit_tiny.yaml
@@ -0,0 +1,33 @@
+load_from: https://download.openmmlab.com/mmclassification/v0/deit/deit-tiny_pt-4xb256_in1k_20220218-13b382a0.pth
+backbone:
+  arch: deit-tiny
+  type: VisionTransformer
+  img_size: 224
+  patch_size: 16
+head:
+  loss:
+    loss_weight: 1.0
+    type: CrossEntropyLoss
+  in_channels: 192
+  num_classes: 1000
+  type: VisionTransformerClsHead
+data_preprocessor:
+  mean:
+    - 123.675
+    - 116.28
+    - 103.53
+  std:
+    - 58.395
+    - 57.12
+    - 57.375
+  to_rgb: False
+  type: ClsDataPreprocessor
+init_cfg:
+  - std: 0.2
+    layer: Linear
+    type: TruncNormal
+  - bias: 0.
+    val: 1.
+    layer: LayerNorm
+    type: Constant
+type: ImageClassifier
diff --git a/src/otx/algo/classification/mmconfigs/multiclass_classification/efficientnet_b0.yaml b/src/otx/algo/classification/mmconfigs/multiclass_classification/efficientnet_b0.yaml
new file mode 100644
index 00000000000..4a6496424a7
--- /dev/null
+++ b/src/otx/algo/classification/mmconfigs/multiclass_classification/efficientnet_b0.yaml
@@ -0,0 +1,39 @@
+backbone:
+  version: b0
+  pretrained: true
+  type: OTXEfficientNet
+head:
+  act_cfg:
+    type: HSwish
+  dropout_rate: 0.2
+  in_channels: 1280
+  init_cfg:
+    bias: 0.0
+    layer: Linear
+    mean: 0.0
+    std: 0.01
+    type: Normal
+  loss:
+    loss_weight: 1.0
+    type: CrossEntropyLoss
+  mid_channels:
+    - 1280
+  num_classes: 1000
+  topk:
+    - 1
+    - 5
+  type: StackedLinearClsHead
+neck:
+  type: GlobalAveragePooling
+data_preprocessor:
+  mean:
+    - 123.675
+    - 116.28
+    - 103.53
+  std:
+    - 58.395
+    - 57.12
+    - 57.375
+  to_rgb: False
+  type: ClsDataPreprocessor
+type: ImageClassifier
diff --git a/src/otx/algo/classification/mmconfigs/multiclass_classification/efficientnet_b0_light.yaml b/src/otx/algo/classification/mmconfigs/multiclass_classification/efficientnet_b0_light.yaml
new file mode 100644
index 00000000000..38b5e8c373b
--- /dev/null
+++ b/src/otx/algo/classification/mmconfigs/multiclass_classification/efficientnet_b0_light.yaml
@@ -0,0 +1,28 @@
+backbone:
+  version: b0
+  pretrained: true
+  type: OTXEfficientNet
+head:
+  in_channels: 1280
+  loss:
+    loss_weight: 1.0
+    type: CrossEntropyLoss
+  num_classes: 1000
+  topk:
+    - 1
+    - 5
+  type: LinearClsHead
+neck:
+  type: GlobalAveragePooling
+data_preprocessor:
+  mean:
+    - 123.675
+    - 116.28
+    - 103.53
+  std:
+    - 58.395
+    - 57.12
+    - 57.375
+  to_rgb: False
+  type: ClsDataPreprocessor
+type: ImageClassifier
diff --git a/src/otx/algo/classification/mmconfigs/multiclass_classification/efficientnet_v2.yaml b/src/otx/algo/classification/mmconfigs/multiclass_classification/efficientnet_v2.yaml
new file mode 100644
index 00000000000..077d00d3a6c
--- /dev/null
+++ b/src/otx/algo/classification/mmconfigs/multiclass_classification/efficientnet_v2.yaml
@@ -0,0 +1,38 @@
+backbone:
+  pretrained: true
+  type: OTXEfficientNetV2
+head:
+  act_cfg:
+    type: HSwish
+  dropout_rate: 0.2
+  in_channels: 1280
+  init_cfg:
+    bias: 0.0
+    layer: Linear
+    mean: 0.0
+    std: 0.01
+    type: Normal
+  loss:
+    loss_weight: 1.0
+    type: CrossEntropyLoss
+  mid_channels:
+    - 1280
+  num_classes: 1000
+  topk:
+    - 1
+    - 5
+  type: StackedLinearClsHead
+neck:
+  type: GlobalAveragePooling
+data_preprocessor:
+  mean:
+    - 123.675
+    - 116.28
+    - 103.53
+  std:
+    - 58.395
+    - 57.12
+    - 57.375
+  to_rgb: False
+  type: ClsDataPreprocessor
+type: ImageClassifier
diff --git a/src/otx/algo/classification/mmconfigs/multiclass_classification/efficientnet_v2_light.yaml b/src/otx/algo/classification/mmconfigs/multiclass_classification/efficientnet_v2_light.yaml
new file mode 100644
index 00000000000..c2599fa9605
--- /dev/null
+++ b/src/otx/algo/classification/mmconfigs/multiclass_classification/efficientnet_v2_light.yaml
@@ -0,0 +1,27 @@
+backbone:
+  pretrained: true
+  type: OTXEfficientNetV2
+head:
+  in_channels: 1280
+  loss:
+    loss_weight: 1.0
+    type: CrossEntropyLoss
+  num_classes: 1000
+  topk:
+    - 1
+    - 5
+  type: LinearClsHead
+neck:
+  type: GlobalAveragePooling
+data_preprocessor:
+  mean:
+    - 123.675
+    - 116.28
+    - 103.53
+  std:
+    - 58.395
+    - 57.12
+    - 57.375
+  to_rgb: False
+  type: ClsDataPreprocessor
+type: ImageClassifier
diff --git a/src/otx/algo/classification/mmconfigs/multiclass_classification/mobilenet_v3_large.yaml b/src/otx/algo/classification/mmconfigs/multiclass_classification/mobilenet_v3_large.yaml
new file mode 100644
index 00000000000..73e539b12fa
--- /dev/null
+++ b/src/otx/algo/classification/mmconfigs/multiclass_classification/mobilenet_v3_large.yaml
@@ -0,0 +1,37 @@
+backbone:
+  type: OTXMobileNetV3
+head:
+  act_cfg:
+    type: HSwish
+  dropout_rate: 0.2
+  in_channels: 960
+  init_cfg:
+    bias: 0.0
+    layer: Linear
+    mean: 0.0
+    std: 0.01
+    type: Normal
+  loss:
+    loss_weight: 1.0
+    type: CrossEntropyLoss
+  mid_channels:
+    - 1280
+  num_classes: 1000
+  topk:
+    - 1
+    - 5
+  type: StackedLinearClsHead
+neck:
+  type: GlobalAveragePooling
+data_preprocessor:
+  mean:
+    - 123.675
+    - 116.28
+    - 103.53
+  std:
+    - 58.395
+    - 57.12
+    - 57.375
+  to_rgb: False
+  type: ClsDataPreprocessor
+type: ImageClassifier
diff --git a/src/otx/algo/classification/mmconfigs/multiclass_classification/mobilenet_v3_large_light.yaml b/src/otx/algo/classification/mmconfigs/multiclass_classification/mobilenet_v3_large_light.yaml
new file mode 100644
index 00000000000..5da80351d46
--- /dev/null
+++ b/src/otx/algo/classification/mmconfigs/multiclass_classification/mobilenet_v3_large_light.yaml
@@ -0,0 +1,26 @@
+backbone:
+  type: OTXMobileNetV3
+head:
+  in_channels: 960
+  loss:
+    loss_weight: 1.0
+    type: CrossEntropyLoss
+  num_classes: 1000
+  topk:
+    - 1
+    - 5
+  type: LinearClsHead
+neck:
+  type: GlobalAveragePooling
+data_preprocessor:
+  mean:
+    - 123.675
+    - 116.28
+    - 103.53
+  std:
+    - 58.395
+    - 57.12
+    - 57.375
+  to_rgb: False
+  type: ClsDataPreprocessor
+type: ImageClassifier
diff --git a/src/otx/algo/classification/mmconfigs/multilabel_classification/deit_tiny.yaml b/src/otx/algo/classification/mmconfigs/multilabel_classification/deit_tiny.yaml
new file mode 100644
index 00000000000..7074faf4632
--- /dev/null
+++ b/src/otx/algo/classification/mmconfigs/multilabel_classification/deit_tiny.yaml
@@ -0,0 +1,32 @@
+load_from: https://download.openmmlab.com/mmclassification/v0/deit/deit-tiny_pt-4xb256_in1k_20220218-13b382a0.pth
+backbone:
+  arch: deit-tiny
+  type: VisionTransformer
+  img_size: 224
+  patch_size: 16
+head:
+  in_channels: 192
+  num_classes: 1000
+  loss:
+    type: AsymmetricAngularLossWithIgnore
+  type: CustomMultiLabelLinearClsHead
+data_preprocessor:
+  mean:
+    - 123.675
+    - 116.28
+    - 103.53
+  std:
+    - 58.395
+    - 57.12
+    - 57.375
+  to_rgb: False
+  type: ClsDataPreprocessor
+init_cfg:
+  - std: 0.2
+    layer: Linear
+    type: TruncNormal
+  - bias: 0.
+    val: 1.
+    layer: LayerNorm
+    type: Constant
+type: ImageClassifier
diff --git a/src/otx/algo/classification/mmconfigs/multilabel_classification/efficientnet_b0_light.yaml b/src/otx/algo/classification/mmconfigs/multilabel_classification/efficientnet_b0_light.yaml
new file mode 100644
index 00000000000..8e4cc2af946
--- /dev/null
+++ b/src/otx/algo/classification/mmconfigs/multilabel_classification/efficientnet_b0_light.yaml
@@ -0,0 +1,29 @@
+backbone:
+  version: b0
+  pretrained: true
+  type: OTXEfficientNet
+head:
+  num_classes: 1000
+  in_channels: 1280
+  loss:
+    reduction: sum
+    gamma_neg: 1.0
+    gamma_pos: 0.0
+    type: AsymmetricAngularLossWithIgnore
+  normalized: true
+  scale: 7.0
+  type: CustomMultiLabelLinearClsHead
+neck:
+  type: GlobalAveragePooling
+data_preprocessor:
+  mean:
+    - 123.675
+    - 116.28
+    - 103.53
+  std:
+    - 58.395
+    - 57.12
+    - 57.375
+  to_rgb: False
+  type: ClsDataPreprocessor
+type: ImageClassifier
diff --git a/src/otx/algo/classification/mmconfigs/multilabel_classification/efficientnet_v2_light.yaml b/src/otx/algo/classification/mmconfigs/multilabel_classification/efficientnet_v2_light.yaml
new file mode 100644
index 00000000000..29eb048563c
--- /dev/null
+++ b/src/otx/algo/classification/mmconfigs/multilabel_classification/efficientnet_v2_light.yaml
@@ -0,0 +1,28 @@
+backbone:
+  pretrained: true
+  type: OTXEfficientNetV2
+head:
+  in_channels: 1280
+  num_classes: 1000
+  loss:
+    reduction: sum
+    gamma_neg: 1.0
+    gamma_pos: 0.0
+    type: AsymmetricAngularLossWithIgnore
+  normalized: true
+  scale: 7.0
+  type: CustomMultiLabelLinearClsHead
+neck:
+  type: GlobalAveragePooling
+data_preprocessor:
+  mean:
+    - 123.675
+    - 116.28
+    - 103.53
+  std:
+    - 58.395
+    - 57.12
+    - 57.375
+  to_rgb: False
+  type: ClsDataPreprocessor
+type: ImageClassifier
diff --git a/src/otx/algo/classification/mmconfigs/multilabel_classification/mobilenet_v3_large_light.yaml b/src/otx/algo/classification/mmconfigs/multilabel_classification/mobilenet_v3_large_light.yaml
new file mode 100644
index 00000000000..b183d39773c
--- /dev/null
+++ b/src/otx/algo/classification/mmconfigs/multilabel_classification/mobilenet_v3_large_light.yaml
@@ -0,0 +1,30 @@
+backbone:
+  type: OTXMobileNetV3
+head:
+  num_classes: 1000
+  in_channels: 960
+  hid_channels: 1280
+  loss:
+    reduction: sum
+    gamma_neg: 1.0
+    gamma_pos: 0.0
+    type: AsymmetricAngularLossWithIgnore
+  normalized: true
+  scale: 7.0
+  act_cfg:
+    type: PReLU
+  type: CustomMultiLabelNonLinearClsHead
+neck:
+  type: GlobalAveragePooling
+data_preprocessor:
+  mean:
+    - 123.675
+    - 116.28
+    - 103.53
+  std:
+    - 58.395
+    - 57.12
+    - 57.375
+  to_rgb: False
+  type: ClsDataPreprocessor
+type: ImageClassifier
diff --git a/src/otx/algo/classification/mobilenet_v3_large.py b/src/otx/algo/classification/mobilenet_v3_large.py
new file mode 100644
index 00000000000..126ccb00d44
--- /dev/null
+++ b/src/otx/algo/classification/mobilenet_v3_large.py
@@ -0,0 +1,38 @@
+# Copyright (C) 2023 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+#
+"""MobileNetV3 model implementation."""
+
+from otx.algo.utils.mmconfig import read_mmconfig
+from otx.core.model.entity.classification import (
+    MMPretrainHlabelClsModel,
+    MMPretrainMulticlassClsModel,
+    MMPretrainMultilabelClsModel,
+)
+
+
+class MobileNetV3ForHLabelCls(MMPretrainHlabelClsModel):
+    """MobileNetV3 Model for hierarchical label classification task."""
+
+    def __init__(self, num_classes: int, num_multiclass_heads: int, num_multilabel_classes: int) -> None:
+        config = read_mmconfig(model_name="mobilenet_v3_large_light", subdir_name="hlabel_classification")
+        config.head.num_multiclass_heads = num_multiclass_heads
+        config.head.num_multilabel_classes = num_multilabel_classes
+        super().__init__(num_classes=num_classes, config=config)
+
+
+class MobileNetV3ForMulticlassCls(MMPretrainMulticlassClsModel):
+    """MobileNetV3 Model for multi-label classification task."""
+
+    def __init__(self, num_classes: int, light: bool = True) -> None:
+        model_name = "mobilenet_v3_large_light" if light else "otx_mobilenet_v3_large"
+        config = read_mmconfig(model_name=model_name, subdir_name="multiclass_classification")
+        super().__init__(num_classes=num_classes, config=config)
+
+
+class MobileNetV3ForMultilabelCls(MMPretrainMultilabelClsModel):
+    """MobileNetV3 Model for multi-class classification task."""
+
+    def __init__(self, num_classes: int) -> None:
+        config = read_mmconfig("mobilenet_v3_large_light", subdir_name="multilabel_classification")
+        super().__init__(num_classes=num_classes, config=config)
diff --git a/src/otx/algo/utils/mmconfig.py b/src/otx/algo/utils/mmconfig.py
index 4860950f1f7..501bbeb5242 100644
--- a/src/otx/algo/utils/mmconfig.py
+++ b/src/otx/algo/utils/mmconfig.py
@@ -9,23 +9,23 @@
 from omegaconf import DictConfig, OmegaConf
 
 
-def read_mmconfig(model_name: str) -> DictConfig:
+def read_mmconfig(model_name: str, subdir_name: str = ".") -> DictConfig:
     """Read MMConfig.
 
     It try to read MMConfig from the yaml file which exists in
-    `<Directory path of __file__ who calls this function>/mmconfigs/<model_name>.yaml`
+    `<Directory path of __file__ who calls this function>/mmconfigs/<subdir_name>/<model_name>.yaml`
     """
     frame = inspect.stack()[1]
     module = inspect.getmodule(frame[0])
 
-    if module is None or module.__file__ is None:
+    if module is None or (mod_fpath := module.__file__) is None:
         msg = "Cannot get valid model from stack"
         raise RuntimeError(msg)
 
-    root_dir = Path().parent / "mmconfigs"
-    fpath = root_dir / f"{model_name}.yaml"
+    root_dir = Path(mod_fpath).parent / "mmconfigs" / subdir_name
+    yaml_fpath = root_dir / f"{model_name}.yaml"
 
-    if not fpath.exists():
+    if not yaml_fpath.exists():
         raise FileNotFoundError
 
-    return OmegaConf.load(fpath)
+    return OmegaConf.load(yaml_fpath)
diff --git a/src/otx/config/model/hlabel_mmpretrain.yaml b/src/otx/config/model/hlabel_mmpretrain.yaml
index aa4cbca9c7f..ba9ba3d6dd5 100644
--- a/src/otx/config/model/hlabel_mmpretrain.yaml
+++ b/src/otx/config/model/hlabel_mmpretrain.yaml
@@ -5,7 +5,9 @@ _target_: otx.core.model.module.classification.OTXHlabelClsLitModule
 
 otx_model:
   _target_: otx.core.model.entity.classification.MMPretrainHlabelClsModel
-  config: ???
+  num_classes: ???
+  num_multiclass_heads: ???
+  num_multilabel_classes: ???
 
 # compile model for faster training with pytorch 2.0
 torch_compile: false
diff --git a/src/otx/config/model/multiclass_mmpretrain.yaml b/src/otx/config/model/multiclass_mmpretrain.yaml
index 6688c5a1650..b51b2832179 100644
--- a/src/otx/config/model/multiclass_mmpretrain.yaml
+++ b/src/otx/config/model/multiclass_mmpretrain.yaml
@@ -5,7 +5,7 @@ _target_: otx.core.model.module.classification.OTXMulticlassClsLitModule
 
 otx_model:
   _target_: otx.core.model.entity.classification.MMPretrainMulticlassClsModel
-  config: ???
+  num_classes: ???
 
 # compile model for faster training with pytorch 2.0
 torch_compile: false
diff --git a/src/otx/config/model/multilabel_mmpretrain.yaml b/src/otx/config/model/multilabel_mmpretrain.yaml
index f86b8bf081e..320007faa95 100644
--- a/src/otx/config/model/multilabel_mmpretrain.yaml
+++ b/src/otx/config/model/multilabel_mmpretrain.yaml
@@ -5,7 +5,7 @@ _target_: otx.core.model.module.classification.OTXMultilabelClsLitModule
 
 otx_model:
   _target_: otx.core.model.entity.classification.MMPretrainMultilabelClsModel
-  config: ???
+  num_classes: ???
 
 # compile model for faster training with pytorch 2.0
 torch_compile: false
diff --git a/src/otx/recipe/hlabel_classification/efficientnet_b0_light.yaml b/src/otx/recipe/hlabel_classification/efficientnet_b0_light.yaml
index 227c5224ba4..2f8b3d1e64d 100644
--- a/src/otx/recipe/hlabel_classification/efficientnet_b0_light.yaml
+++ b/src/otx/recipe/hlabel_classification/efficientnet_b0_light.yaml
@@ -37,39 +37,7 @@ data:
       - type: PackInputs
 model:
   otx_model:
-    config:
-      backbone:
-        version: b0
-        pretrained: true
-        type: OTXEfficientNet
-      head:
-        num_multiclass_heads: 0
-        num_multilabel_classes: 0
-        in_channels: 1280
-        num_classes: 1000
-        multiclass_loss_cfg:
-          loss_weight: 1.0
-          type: CrossEntropyLoss
-        multilabel_loss_cfg:
-          reduction: sum
-          gamma_neg: 1.0
-          gamma_pos: 0.0
-          type: AsymmetricAngularLossWithIgnore
-        type: CustomHierarchicalClsHead
-      neck:
-        type: GlobalAveragePooling
-      data_preprocessor:
-        mean:
-          - 123.675
-          - 116.28
-          - 103.53
-        std:
-          - 58.395
-          - 57.12
-          - 57.375
-        to_rgb: False
-        type: ClsDataPreprocessor
-      type: ImageClassifier
+    _target_: otx.algo.classification.efficientnet_b0.EfficientNetB0ForHLabelCls
   optimizer:
     _target_: torch.optim.SGD
     lr: 0.0049
diff --git a/src/otx/recipe/hlabel_classification/efficientnet_v2_light.yaml b/src/otx/recipe/hlabel_classification/efficientnet_v2_light.yaml
index 0872a998655..38545bdaa9c 100644
--- a/src/otx/recipe/hlabel_classification/efficientnet_v2_light.yaml
+++ b/src/otx/recipe/hlabel_classification/efficientnet_v2_light.yaml
@@ -40,40 +40,7 @@ data:
       - type: PackInputs
 model:
   otx_model:
-    config:
-      backbone:
-        pretrained: true
-        type: OTXEfficientNetV2
-      head:
-        num_multiclass_heads: 0
-        num_multilabel_classes: 0
-        in_channels: 1280
-        num_classes: 1000
-        multiclass_loss_cfg:
-          loss_weight: 1.0
-          type: CrossEntropyLoss
-        multilabel_loss_cfg:
-          reduction: sum
-          gamma_neg: 1.0
-          gamma_pos: 0.0
-          type: AsymmetricAngularLossWithIgnore
-        normalized: true
-        scale: 7.0
-        type: CustomHierarchicalClsHead
-      neck:
-        type: GlobalAveragePooling
-      data_preprocessor:
-        mean:
-          - 123.675
-          - 116.28
-          - 103.53
-        std:
-          - 58.395
-          - 57.12
-          - 57.375
-        to_rgb: False
-        type: ClsDataPreprocessor
-      type: ImageClassifier
+    _target_: otx.algo.classification.efficientnet_v2.EfficientNetV2ForHLabelCls
   optimizer:
     _target_: torch.optim.SGD
     _partial_: true
diff --git a/src/otx/recipe/hlabel_classification/mobilenet_v3_large_light.yaml b/src/otx/recipe/hlabel_classification/mobilenet_v3_large_light.yaml
index 20e63b6a33a..52c5089ac72 100644
--- a/src/otx/recipe/hlabel_classification/mobilenet_v3_large_light.yaml
+++ b/src/otx/recipe/hlabel_classification/mobilenet_v3_large_light.yaml
@@ -40,37 +40,7 @@ data:
       - type: PackInputs
 model:
   otx_model:
-    config:
-      backbone:
-        type: OTXMobileNetV3
-      head:
-        num_multiclass_heads: 0
-        num_multilabel_classes: 0
-        in_channels: 960
-        num_classes: 1000
-        multiclass_loss_cfg:
-          loss_weight: 1.0
-          type: CrossEntropyLoss
-        multilabel_loss_cfg:
-          reduction: sum
-          gamma_neg: 1.0
-          gamma_pos: 0.0
-          type: AsymmetricAngularLossWithIgnore
-        type: CustomHierarchicalClsHead
-      neck:
-        type: GlobalAveragePooling
-      data_preprocessor:
-        mean:
-          - 123.675
-          - 116.28
-          - 103.53
-        std:
-          - 58.395
-          - 57.12
-          - 57.375
-        to_rgb: False
-        type: ClsDataPreprocessor
-      type: ImageClassifier
+    _target_: otx.algo.classification.mobilenet_v3_large.MobileNetV3ForHLabelCls
   optimizer:
     _target_: torch.optim.SGD
     _partial_: true
diff --git a/src/otx/recipe/hlabel_classification/otx_deit_tiny.yaml b/src/otx/recipe/hlabel_classification/otx_deit_tiny.yaml
index 6a8749ecfba..ec37e0d30c2 100644
--- a/src/otx/recipe/hlabel_classification/otx_deit_tiny.yaml
+++ b/src/otx/recipe/hlabel_classification/otx_deit_tiny.yaml
@@ -37,47 +37,7 @@ data:
       - type: PackInputs
 model:
   otx_model:
-    config:
-      load_from: https://download.openmmlab.com/mmclassification/v0/deit/deit-tiny_pt-4xb256_in1k_20220218-13b382a0.pth
-      backbone:
-        arch: deit-tiny
-        type: VisionTransformer
-        img_size: 224
-        patch_size: 16
-      head:
-        num_multiclass_heads: 0
-        num_multilabel_classes: 0
-        in_channels: 192
-        num_classes: 1000
-        multiclass_loss_cfg:
-          loss_weight: 1.0
-          type: CrossEntropyLoss
-        multilabel_loss_cfg:
-          reduction: sum
-          gamma_neg: 1.0
-          gamma_pos: 0.0
-          type: AsymmetricAngularLossWithIgnore
-        type: CustomHierarchicalClsHead
-      data_preprocessor:
-        mean:
-          - 123.675
-          - 116.28
-          - 103.53
-        std:
-          - 58.395
-          - 57.12
-          - 57.375
-        to_rgb: False
-        type: ClsDataPreprocessor
-      init_cfg:
-        - std: 0.2
-          layer: Linear
-          type: TruncNormal
-        - bias: 0.
-          val: 1.
-          layer: LayerNorm
-          type: Constant
-      type: ImageClassifier
+    _target_: otx.algo.classification.deit_tiny.DeitTinyForHLabelCls
   optimizer:
     _target_: torch.optim.SGD
     _partial_: true
diff --git a/src/otx/recipe/multiclass_classification/efficientnet_b0_light.yaml b/src/otx/recipe/multiclass_classification/efficientnet_b0_light.yaml
index 4af293e853c..6a53ceb5403 100644
--- a/src/otx/recipe/multiclass_classification/efficientnet_b0_light.yaml
+++ b/src/otx/recipe/multiclass_classification/efficientnet_b0_light.yaml
@@ -37,35 +37,8 @@ data:
       - type: PackInputs
 model:
   otx_model:
-    config:
-      backbone:
-        version: b0
-        pretrained: true
-        type: OTXEfficientNet
-      head:
-        in_channels: 1280
-        loss:
-          loss_weight: 1.0
-          type: CrossEntropyLoss
-        num_classes: 1000
-        topk:
-          - 1
-          - 5
-        type: LinearClsHead
-      neck:
-        type: GlobalAveragePooling
-      data_preprocessor:
-        mean:
-          - 123.675
-          - 116.28
-          - 103.53
-        std:
-          - 58.395
-          - 57.12
-          - 57.375
-        to_rgb: False
-        type: ClsDataPreprocessor
-      type: ImageClassifier
+    _target_: otx.algo.classification.efficientnet_b0.EfficientNetB0ForMulticlassCls
+    light: True
   optimizer:
     _target_: torch.optim.SGD
     _partial_: true
diff --git a/src/otx/recipe/multiclass_classification/efficientnet_v2_light.yaml b/src/otx/recipe/multiclass_classification/efficientnet_v2_light.yaml
index bcba891ed3f..2490ac766d2 100644
--- a/src/otx/recipe/multiclass_classification/efficientnet_v2_light.yaml
+++ b/src/otx/recipe/multiclass_classification/efficientnet_v2_light.yaml
@@ -40,34 +40,8 @@ data:
       - type: PackInputs
 model:
   otx_model:
-    config:
-      backbone:
-        pretrained: true
-        type: OTXEfficientNetV2
-      head:
-        in_channels: 1280
-        loss:
-          loss_weight: 1.0
-          type: CrossEntropyLoss
-        num_classes: 1000
-        topk:
-          - 1
-          - 5
-        type: LinearClsHead
-      neck:
-        type: GlobalAveragePooling
-      data_preprocessor:
-        mean:
-          - 123.675
-          - 116.28
-          - 103.53
-        std:
-          - 58.395
-          - 57.12
-          - 57.375
-        to_rgb: False
-        type: ClsDataPreprocessor
-      type: ImageClassifier
+    _target_: otx.algo.classification.efficientnet_v2.EfficientNetV2ForMulticlassCls
+    light: True
   optimizer:
     _target_: torch.optim.SGD
     _partial_: true
diff --git a/src/otx/recipe/multiclass_classification/mobilenet_v3_large_light.yaml b/src/otx/recipe/multiclass_classification/mobilenet_v3_large_light.yaml
index 3b5d70f9cd1..4bdcc22c2b6 100644
--- a/src/otx/recipe/multiclass_classification/mobilenet_v3_large_light.yaml
+++ b/src/otx/recipe/multiclass_classification/mobilenet_v3_large_light.yaml
@@ -40,33 +40,8 @@ data:
       - type: PackInputs
 model:
   otx_model:
-    config:
-      backbone:
-        type: OTXMobileNetV3
-      head:
-        in_channels: 960
-        loss:
-          loss_weight: 1.0
-          type: CrossEntropyLoss
-        num_classes: 1000
-        topk:
-          - 1
-          - 5
-        type: LinearClsHead
-      neck:
-        type: GlobalAveragePooling
-      data_preprocessor:
-        mean:
-          - 123.675
-          - 116.28
-          - 103.53
-        std:
-          - 58.395
-          - 57.12
-          - 57.375
-        to_rgb: False
-        type: ClsDataPreprocessor
-      type: ImageClassifier
+    _target_: otx.algo.classification.mobilenet_v3_large.MobileNetV3ForMulticlassCls
+    light: True
   optimizer:
     _target_: torch.optim.SGD
     _partial_: true
diff --git a/src/otx/recipe/multiclass_classification/otx_deit_tiny.yaml b/src/otx/recipe/multiclass_classification/otx_deit_tiny.yaml
index a3670eccb1d..ff11b2f3c6f 100644
--- a/src/otx/recipe/multiclass_classification/otx_deit_tiny.yaml
+++ b/src/otx/recipe/multiclass_classification/otx_deit_tiny.yaml
@@ -37,40 +37,7 @@ data:
       - type: PackInputs
 model:
   otx_model:
-    config:
-      load_from: https://download.openmmlab.com/mmclassification/v0/deit/deit-tiny_pt-4xb256_in1k_20220218-13b382a0.pth
-      backbone:
-        arch: deit-tiny
-        type: VisionTransformer
-        img_size: 224
-        patch_size: 16
-      head:
-        loss:
-          loss_weight: 1.0
-          type: CrossEntropyLoss
-        in_channels: 192
-        num_classes: 1000
-        type: VisionTransformerClsHead
-      data_preprocessor:
-        mean:
-          - 123.675
-          - 116.28
-          - 103.53
-        std:
-          - 58.395
-          - 57.12
-          - 57.375
-        to_rgb: False
-        type: ClsDataPreprocessor
-      init_cfg:
-        - std: 0.2
-          layer: Linear
-          type: TruncNormal
-        - bias: 0.
-          val: 1.
-          layer: LayerNorm
-          type: Constant
-      type: ImageClassifier
+    _target_: otx.algo.classification.deit_tiny.DeitTinyForMulticlassCls
   optimizer:
     _target_: torch.optim.SGD
     _partial_: true
diff --git a/src/otx/recipe/multiclass_classification/otx_efficientnet_b0.yaml b/src/otx/recipe/multiclass_classification/otx_efficientnet_b0.yaml
index 813490b79b4..dd7a80dc6fd 100644
--- a/src/otx/recipe/multiclass_classification/otx_efficientnet_b0.yaml
+++ b/src/otx/recipe/multiclass_classification/otx_efficientnet_b0.yaml
@@ -37,46 +37,7 @@ data:
       - type: PackInputs
 model:
   otx_model:
-    config:
-      backbone:
-        version: b0
-        pretrained: true
-        type: OTXEfficientNet
-      head:
-        act_cfg:
-          type: HSwish
-        dropout_rate: 0.2
-        in_channels: 1280
-        init_cfg:
-          bias: 0.0
-          layer: Linear
-          mean: 0.0
-          std: 0.01
-          type: Normal
-        loss:
-          loss_weight: 1.0
-          type: CrossEntropyLoss
-        mid_channels:
-          - 1280
-        num_classes: 1000
-        topk:
-          - 1
-          - 5
-        type: StackedLinearClsHead
-      neck:
-        type: GlobalAveragePooling
-      data_preprocessor:
-        mean:
-          - 123.675
-          - 116.28
-          - 103.53
-        std:
-          - 58.395
-          - 57.12
-          - 57.375
-        to_rgb: False
-        type: ClsDataPreprocessor
-      type: ImageClassifier
+    _target_: otx.algo.classification.efficientnet_b0.EfficientNetB0ForMulticlassCls
   optimizer:
     _target_: torch.optim.SGD
     _partial_: true
diff --git a/src/otx/recipe/multiclass_classification/otx_efficientnet_v2.yaml b/src/otx/recipe/multiclass_classification/otx_efficientnet_v2.yaml
index a8f8a1f72e9..31c1d603967 100644
--- a/src/otx/recipe/multiclass_classification/otx_efficientnet_v2.yaml
+++ b/src/otx/recipe/multiclass_classification/otx_efficientnet_v2.yaml
@@ -40,45 +40,7 @@ data:
       - type: PackInputs
 model:
   otx_model:
-    config:
-      backbone:
-        pretrained: true
-        type: OTXEfficientNetV2
-      head:
-        act_cfg:
-          type: HSwish
-        dropout_rate: 0.2
-        in_channels: 1280
-        init_cfg:
-          bias: 0.0
-          layer: Linear
-          mean: 0.0
-          std: 0.01
-          type: Normal
-        loss:
-          loss_weight: 1.0
-          type: CrossEntropyLoss
-        mid_channels:
-          - 1280
-        num_classes: 1000
-        topk:
-          - 1
-          - 5
-        type: StackedLinearClsHead
-      neck:
-        type: GlobalAveragePooling
-      data_preprocessor:
-        mean:
-          - 123.675
-          - 116.28
-          - 103.53
-        std:
-          - 58.395
-          - 57.12
-          - 57.375
-        to_rgb: False
-        type: ClsDataPreprocessor
-      type: ImageClassifier
+    _target_: otx.algo.classification.efficientnet_v2.EfficientNetV2ForMulticlassCls
   optimizer:
     _target_: torch.optim.SGD
     _partial_: true
diff --git a/src/otx/recipe/multiclass_classification/otx_mobilenet_v3_large.yaml b/src/otx/recipe/multiclass_classification/otx_mobilenet_v3_large.yaml
index d102ad1017e..09ceb9e555b 100644
--- a/src/otx/recipe/multiclass_classification/otx_mobilenet_v3_large.yaml
+++ b/src/otx/recipe/multiclass_classification/otx_mobilenet_v3_large.yaml
@@ -40,44 +40,7 @@ data:
       - type: PackInputs
 model:
   otx_model:
-    config:
-      backbone:
-        type: OTXMobileNetV3
-      head:
-        act_cfg:
-          type: HSwish
-        dropout_rate: 0.2
-        in_channels: 960
-        init_cfg:
-          bias: 0.0
-          layer: Linear
-          mean: 0.0
-          std: 0.01
-          type: Normal
-        loss:
-          loss_weight: 1.0
-          type: CrossEntropyLoss
-        mid_channels:
-          - 1280
-        num_classes: 1000
-        topk:
-          - 1
-          - 5
-        type: StackedLinearClsHead
-      neck:
-        type: GlobalAveragePooling
-      data_preprocessor:
-        mean:
-          - 123.675
-          - 116.28
-          - 103.53
-        std:
-          - 58.395
-          - 57.12
-          - 57.375
-        to_rgb: False
-        type: ClsDataPreprocessor
-      type: ImageClassifier
+    _target_: otx.algo.classification.mobilenet_v3_large.MobileNetV3ForMulticlassCls
   optimizer:
     _target_: torch.optim.SGD
     _partial_: true
diff --git a/src/otx/recipe/multilabel_classification/efficientnet_b0_light.yaml b/src/otx/recipe/multilabel_classification/efficientnet_b0_light.yaml
index 60c8f196fcd..a1e6cadc31c 100644
--- a/src/otx/recipe/multilabel_classification/efficientnet_b0_light.yaml
+++ b/src/otx/recipe/multilabel_classification/efficientnet_b0_light.yaml
@@ -37,36 +37,7 @@ data:
       - type: PackInputs
 model:
   otx_model:
-    config:
-      backbone:
-        version: b0
-        pretrained: true
-        type: OTXEfficientNet
-      head:
-        num_classes: 1000
-        in_channels: 1280
-        loss:
-          reduction: sum
-          gamma_neg: 1.0
-          gamma_pos: 0.0
-          type: AsymmetricAngularLossWithIgnore
-        normalized: true
-        scale: 7.0
-        type: CustomMultiLabelLinearClsHead
-      neck:
-        type: GlobalAveragePooling
-      data_preprocessor:
-        mean:
-          - 123.675
-          - 116.28
-          - 103.53
-        std:
-          - 58.395
-          - 57.12
-          - 57.375
-        to_rgb: False
-        type: ClsDataPreprocessor
-      type: ImageClassifier
+    _target_: otx.algo.classification.efficientnet_b0.EfficientNetB0ForMultilabelCls
   optimizer:
     _target_: torch.optim.SGD
     lr: 0.0049
diff --git a/src/otx/recipe/multilabel_classification/efficientnet_v2_light.yaml b/src/otx/recipe/multilabel_classification/efficientnet_v2_light.yaml
index 4ab9ba2e2d1..b7e8e238dae 100644
--- a/src/otx/recipe/multilabel_classification/efficientnet_v2_light.yaml
+++ b/src/otx/recipe/multilabel_classification/efficientnet_v2_light.yaml
@@ -40,35 +40,7 @@ data:
       - type: PackInputs
 model:
   otx_model:
-    config:
-      backbone:
-        pretrained: true
-        type: OTXEfficientNetV2
-      head:
-        in_channels: 1280
-        num_classes: 1000
-        loss:
-          reduction: sum
-          gamma_neg: 1.0
-          gamma_pos: 0.0
-          type: AsymmetricAngularLossWithIgnore
-        normalized: true
-        scale: 7.0
-        type: CustomMultiLabelLinearClsHead
-      neck:
-        type: GlobalAveragePooling
-      data_preprocessor:
-        mean:
-          - 123.675
-          - 116.28
-          - 103.53
-        std:
-          - 58.395
-          - 57.12
-          - 57.375
-        to_rgb: False
-        type: ClsDataPreprocessor
-      type: ImageClassifier
+    _target_: otx.algo.classification.efficientnet_v2.EfficientNetV2ForMultilabelCls
   optimizer:
     _target_: torch.optim.SGD
     _partial_: true
diff --git a/src/otx/recipe/multilabel_classification/mobilenet_v3_large_light.yaml b/src/otx/recipe/multilabel_classification/mobilenet_v3_large_light.yaml
index 204edbdc6c5..53695f65cb0 100644
--- a/src/otx/recipe/multilabel_classification/mobilenet_v3_large_light.yaml
+++ b/src/otx/recipe/multilabel_classification/mobilenet_v3_large_light.yaml
@@ -40,37 +40,7 @@ data:
       - type: PackInputs
 model:
   otx_model:
-    config:
-      backbone:
-        type: OTXMobileNetV3
-      head:
-        num_classes: 1000
-        in_channels: 960
-        hid_channels: 1280
-        loss:
-          reduction: sum
-          gamma_neg: 1.0
-          gamma_pos: 0.0
-          type: AsymmetricAngularLossWithIgnore
-        normalized: true
-        scale: 7.0
-        act_cfg:
-          type: PReLU
-        type: CustomMultiLabelNonLinearClsHead
-      neck:
-        type: GlobalAveragePooling
-      data_preprocessor:
-        mean:
-          - 123.675
-          - 116.28
-          - 103.53
-        std:
-          - 58.395
-          - 57.12
-          - 57.375
-        to_rgb: False
-        type: ClsDataPreprocessor
-      type: ImageClassifier
+    _target_: otx.algo.classification.mobilenet_v3_large.MobileNetV3ForMultilabelCls
   optimizer:
     _target_: torch.optim.SGD
     _partial_: true
diff --git a/src/otx/recipe/multilabel_classification/otx_deit_tiny.yaml b/src/otx/recipe/multilabel_classification/otx_deit_tiny.yaml
index e39d06ccb40..67f02b66077 100644
--- a/src/otx/recipe/multilabel_classification/otx_deit_tiny.yaml
+++ b/src/otx/recipe/multilabel_classification/otx_deit_tiny.yaml
@@ -37,39 +37,7 @@ data:
       - type: PackInputs
 model:
   otx_model:
-    config:
-      load_from: https://download.openmmlab.com/mmclassification/v0/deit/deit-tiny_pt-4xb256_in1k_20220218-13b382a0.pth
-      backbone:
-        arch: deit-tiny
-        type: VisionTransformer
-        img_size: 224
-        patch_size: 16
-      head:
-        in_channels: 192
-        num_classes: 1000
-        loss:
-          type: AsymmetricAngularLossWithIgnore
-        type: CustomMultiLabelLinearClsHead
-      data_preprocessor:
-        mean:
-          - 123.675
-          - 116.28
-          - 103.53
-        std:
-          - 58.395
-          - 57.12
-          - 57.375
-        to_rgb: False
-        type: ClsDataPreprocessor
-      init_cfg:
-        - std: 0.2
-          layer: Linear
-          type: TruncNormal
-        - bias: 0.
-          val: 1.
-          layer: LayerNorm
-          type: Constant
-      type: ImageClassifier
+    _target_: otx.algo.classification.deit_tiny.DeitTinyForMultilabelCls
   optimizer:
     _target_: torch.optim.SGD
     _partial_: true
diff --git a/tests/integration/cli/test_cli.py b/tests/integration/cli/test_cli.py
index fa1c126678a..852f1776def 100644
--- a/tests/integration/cli/test_cli.py
+++ b/tests/integration/cli/test_cli.py
@@ -35,8 +35,8 @@
         "data_dir": "tests/assets/hlabel_classification",
         "overrides": [
             "model.otx_model.num_classes=7",
-            "model.otx_model.config.head.num_multiclass_heads=2",
-            "model.otx_model.config.head.num_multilabel_classes=3",
+            "model.otx_model.num_multiclass_heads=2",
+            "model.otx_model.num_multilabel_classes=3",
         ],
     },
     "detection": {

From 9dfaa941e2a1c5dd4159a2c1f5ddfc5dc079c32b Mon Sep 17 00:00:00 2001
From: "Kim, Vinnam" <vinnam.kim@intel.com>
Date: Thu, 11 Jan 2024 19:57:59 +0900
Subject: [PATCH 6/8] Fix light=False as default

Signed-off-by: Kim, Vinnam <vinnam.kim@intel.com>
---
 src/otx/algo/classification/efficientnet_b0.py                | 4 ++--
 src/otx/algo/classification/efficientnet_v2.py                | 4 ++--
 src/otx/algo/classification/mobilenet_v3_large.py             | 4 ++--
 .../recipe/multiclass_classification/otx_efficientnet_b0.yaml | 1 +
 .../recipe/multiclass_classification/otx_efficientnet_v2.yaml | 1 +
 .../multiclass_classification/otx_mobilenet_v3_large.yaml     | 1 +
 6 files changed, 9 insertions(+), 6 deletions(-)

diff --git a/src/otx/algo/classification/efficientnet_b0.py b/src/otx/algo/classification/efficientnet_b0.py
index 3259d1e0921..a7c92b4433d 100644
--- a/src/otx/algo/classification/efficientnet_b0.py
+++ b/src/otx/algo/classification/efficientnet_b0.py
@@ -24,8 +24,8 @@ def __init__(self, num_classes: int, num_multiclass_heads: int, num_multilabel_c
 class EfficientNetB0ForMulticlassCls(MMPretrainMulticlassClsModel):
     """EfficientNetB0 Model for multi-label classification task."""
 
-    def __init__(self, num_classes: int, light: bool = True) -> None:
-        model_name = "efficientnet_b0_light" if light else "otx_efficientnet_b0"
+    def __init__(self, num_classes: int, light: bool = False) -> None:
+        model_name = "efficientnet_b0_light" if light else "efficientnet_b0"
         config = read_mmconfig(model_name=model_name, subdir_name="multiclass_classification")
         super().__init__(num_classes=num_classes, config=config)
 
diff --git a/src/otx/algo/classification/efficientnet_v2.py b/src/otx/algo/classification/efficientnet_v2.py
index 861278ebba3..56829c2a1a3 100644
--- a/src/otx/algo/classification/efficientnet_v2.py
+++ b/src/otx/algo/classification/efficientnet_v2.py
@@ -24,8 +24,8 @@ def __init__(self, num_classes: int, num_multiclass_heads: int, num_multilabel_c
 class EfficientNetV2ForMulticlassCls(MMPretrainMulticlassClsModel):
     """EfficientNetV2 Model for multi-label classification task."""
 
-    def __init__(self, num_classes: int, light: bool = True) -> None:
-        model_name = "efficientnet_v2_light" if light else "otx_efficientnet_v2"
+    def __init__(self, num_classes: int, light: bool = False) -> None:
+        model_name = "efficientnet_v2_light" if light else "efficientnet_v2"
         config = read_mmconfig(model_name=model_name, subdir_name="multiclass_classification")
         super().__init__(num_classes=num_classes, config=config)
 
diff --git a/src/otx/algo/classification/mobilenet_v3_large.py b/src/otx/algo/classification/mobilenet_v3_large.py
index 126ccb00d44..049b9bf238f 100644
--- a/src/otx/algo/classification/mobilenet_v3_large.py
+++ b/src/otx/algo/classification/mobilenet_v3_large.py
@@ -24,8 +24,8 @@ def __init__(self, num_classes: int, num_multiclass_heads: int, num_multilabel_c
 class MobileNetV3ForMulticlassCls(MMPretrainMulticlassClsModel):
     """MobileNetV3 Model for multi-label classification task."""
 
-    def __init__(self, num_classes: int, light: bool = True) -> None:
-        model_name = "mobilenet_v3_large_light" if light else "otx_mobilenet_v3_large"
+    def __init__(self, num_classes: int, light: bool = False) -> None:
+        model_name = "mobilenet_v3_large_light" if light else "mobilenet_v3_large"
         config = read_mmconfig(model_name=model_name, subdir_name="multiclass_classification")
         super().__init__(num_classes=num_classes, config=config)
 
diff --git a/src/otx/recipe/multiclass_classification/otx_efficientnet_b0.yaml b/src/otx/recipe/multiclass_classification/otx_efficientnet_b0.yaml
index dd7a80dc6fd..7498be0bfb0 100644
--- a/src/otx/recipe/multiclass_classification/otx_efficientnet_b0.yaml
+++ b/src/otx/recipe/multiclass_classification/otx_efficientnet_b0.yaml
@@ -38,6 +38,7 @@ data:
 model:
   otx_model:
     _target_: otx.algo.classification.efficientnet_b0.EfficientNetB0ForMulticlassCls
+    light: false
   optimizer:
     _target_: torch.optim.SGD
     _partial_: true
diff --git a/src/otx/recipe/multiclass_classification/otx_efficientnet_v2.yaml b/src/otx/recipe/multiclass_classification/otx_efficientnet_v2.yaml
index 31c1d603967..d5c6d454171 100644
--- a/src/otx/recipe/multiclass_classification/otx_efficientnet_v2.yaml
+++ b/src/otx/recipe/multiclass_classification/otx_efficientnet_v2.yaml
@@ -41,6 +41,7 @@ data:
 model:
   otx_model:
     _target_: otx.algo.classification.efficientnet_v2.EfficientNetV2ForMulticlassCls
+    light: false
   optimizer:
     _target_: torch.optim.SGD
     _partial_: true
diff --git a/src/otx/recipe/multiclass_classification/otx_mobilenet_v3_large.yaml b/src/otx/recipe/multiclass_classification/otx_mobilenet_v3_large.yaml
index 09ceb9e555b..e6b749832ac 100644
--- a/src/otx/recipe/multiclass_classification/otx_mobilenet_v3_large.yaml
+++ b/src/otx/recipe/multiclass_classification/otx_mobilenet_v3_large.yaml
@@ -41,6 +41,7 @@ data:
 model:
   otx_model:
     _target_: otx.algo.classification.mobilenet_v3_large.MobileNetV3ForMulticlassCls
+    light: false
   optimizer:
     _target_: torch.optim.SGD
     _partial_: true

From 669d935209ac14f857b52b25ce3957fc9ce03f15 Mon Sep 17 00:00:00 2001
From: "Kim, Vinnam" <vinnam.kim@intel.com>
Date: Fri, 12 Jan 2024 10:31:18 +0900
Subject: [PATCH 7/8] Make read_mmconfig() more descriptive

Signed-off-by: Kim, Vinnam <vinnam.kim@intel.com>
---
 src/otx/algo/action_classification/__init__.py |  2 +-
 src/otx/algo/action_detection/__init__.py      |  2 +-
 src/otx/algo/utils/mmconfig.py                 | 15 +++++++++++----
 3 files changed, 13 insertions(+), 6 deletions(-)

diff --git a/src/otx/algo/action_classification/__init__.py b/src/otx/algo/action_classification/__init__.py
index 2f4d6dc99d5..231852979c0 100644
--- a/src/otx/algo/action_classification/__init__.py
+++ b/src/otx/algo/action_classification/__init__.py
@@ -1,4 +1,4 @@
 # Copyright (C) 2023 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 #
-"""Module for OTX action classification models, hooks, utils, etc."""
+"""Module for OTX action classification models."""
diff --git a/src/otx/algo/action_detection/__init__.py b/src/otx/algo/action_detection/__init__.py
index 4b58f3a3c7b..0510329eab3 100644
--- a/src/otx/algo/action_detection/__init__.py
+++ b/src/otx/algo/action_detection/__init__.py
@@ -1,4 +1,4 @@
 # Copyright (C) 2023 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 #
-"""Module for OTX action detection models, hooks, utils, etc."""
+"""Module for OTX action detection models."""
diff --git a/src/otx/algo/utils/mmconfig.py b/src/otx/algo/utils/mmconfig.py
index 501bbeb5242..55dc5cda5a9 100644
--- a/src/otx/algo/utils/mmconfig.py
+++ b/src/otx/algo/utils/mmconfig.py
@@ -14,18 +14,25 @@ def read_mmconfig(model_name: str, subdir_name: str = ".") -> DictConfig:
 
     It try to read MMConfig from the yaml file which exists in
     `<Directory path of __file__ who calls this function>/mmconfigs/<subdir_name>/<model_name>.yaml`
+
+    For example, if this function is called in `otx/algo/action_classification/x3d.py`,
+    `otx/algo/action_classification/mmconfigs/x3d.yaml` will be read.
     """
     frame = inspect.stack()[1]
     module = inspect.getmodule(frame[0])
 
-    if module is None or (mod_fpath := module.__file__) is None:
-        msg = "Cannot get valid model from stack"
+    if module is None or (module_file_path := module.__file__) is None:
+        msg = (
+            "Cannot get Cannot get a valid module from Python function stack. "
+            "Please refer to this function docstring to see how to use correctly."
+        )
         raise RuntimeError(msg)
 
-    root_dir = Path(mod_fpath).parent / "mmconfigs" / subdir_name
+    root_dir = Path(module_file_path).parent / "mmconfigs" / subdir_name
     yaml_fpath = root_dir / f"{model_name}.yaml"
 
     if not yaml_fpath.exists():
-        raise FileNotFoundError
+        msg = f"mmconfig file for {model_name} is not found in {yaml_fpath}"
+        raise FileNotFoundError(msg)
 
     return OmegaConf.load(yaml_fpath)

From 360decc81b41328cccb3cb266350baa559d0b254 Mon Sep 17 00:00:00 2001
From: "Kim, Vinnam" <vinnam.kim@intel.com>
Date: Fri, 12 Jan 2024 11:57:32 +0900
Subject: [PATCH 8/8] Change inst seg task

Signed-off-by: Kim, Vinnam <vinnam.kim@intel.com>
---
 .../algo/instance_segmentation/__init__.py    |   4 +
 .../algo/instance_segmentation/maskrcnn.py    |  18 ++
 .../mmconfigs/maskrcnn_efficientnetb2b.yaml   | 198 ++++++++++++++++
 .../mmconfigs/maskrcnn_r50.yaml               | 198 ++++++++++++++++
 .../mmconfigs/maskrcnn_swint.yaml             | 211 +++++++++++++++++
 src/otx/config/model/mmdet_inst_seg.yaml      |   1 -
 src/otx/config/model/mmseg.yaml               |   1 -
 .../maskrcnn_efficientnetb2b.yaml             | 201 +---------------
 .../instance_segmentation/maskrcnn_r50.yaml   | 201 +---------------
 .../instance_segmentation/maskrcnn_swint.yaml | 214 +-----------------
 10 files changed, 635 insertions(+), 612 deletions(-)
 create mode 100644 src/otx/algo/instance_segmentation/__init__.py
 create mode 100644 src/otx/algo/instance_segmentation/maskrcnn.py
 create mode 100644 src/otx/algo/instance_segmentation/mmconfigs/maskrcnn_efficientnetb2b.yaml
 create mode 100644 src/otx/algo/instance_segmentation/mmconfigs/maskrcnn_r50.yaml
 create mode 100644 src/otx/algo/instance_segmentation/mmconfigs/maskrcnn_swint.yaml

diff --git a/src/otx/algo/instance_segmentation/__init__.py b/src/otx/algo/instance_segmentation/__init__.py
new file mode 100644
index 00000000000..61cbaeff261
--- /dev/null
+++ b/src/otx/algo/instance_segmentation/__init__.py
@@ -0,0 +1,4 @@
+# Copyright (C) 2023 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+#
+"""Module for OTX instance segmentation models."""
diff --git a/src/otx/algo/instance_segmentation/maskrcnn.py b/src/otx/algo/instance_segmentation/maskrcnn.py
new file mode 100644
index 00000000000..7e590f255db
--- /dev/null
+++ b/src/otx/algo/instance_segmentation/maskrcnn.py
@@ -0,0 +1,18 @@
+# Copyright (C) 2023 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+#
+"""ATSS model implementations."""
+
+from typing import Literal
+
+from otx.algo.utils.mmconfig import read_mmconfig
+from otx.core.model.entity.instance_segmentation import MMDetInstanceSegCompatibleModel
+
+
+class MaskRCNN(MMDetInstanceSegCompatibleModel):
+    """MaskRCNN Model."""
+
+    def __init__(self, num_classes: int, variant: Literal["efficientnetb2b", "r50", "swint"]) -> None:
+        model_name = f"maskrcnn_{variant}"
+        config = read_mmconfig(model_name=model_name)
+        super().__init__(num_classes=num_classes, config=config)
diff --git a/src/otx/algo/instance_segmentation/mmconfigs/maskrcnn_efficientnetb2b.yaml b/src/otx/algo/instance_segmentation/mmconfigs/maskrcnn_efficientnetb2b.yaml
new file mode 100644
index 00000000000..ae1e8a885ef
--- /dev/null
+++ b/src/otx/algo/instance_segmentation/mmconfigs/maskrcnn_efficientnetb2b.yaml
@@ -0,0 +1,198 @@
+load_from: https://storage.openvinotoolkit.org/repositories/openvino_training_extensions/models/instance_segmentation/v2/efficientnet_b2b-mask_rcnn-576x576.pth
+data_preprocessor:
+  type: "DetDataPreprocessor"
+  bgr_to_rgb: false
+  mean:
+    - 123.675
+    - 116.28
+    - 103.53
+  pad_mask: true
+  pad_size_divisor: 32
+  std:
+    - 1.0
+    - 1.0
+    - 1.0
+type: MaskRCNN
+backbone:
+  type: efficientnet_b2b
+  out_indices:
+    - 2
+    - 3
+    - 4
+    - 5
+  frozen_stages: -1
+  pretrained: true
+  activation_cfg:
+    type: torch_swish
+  norm_cfg:
+    type: BN
+    requires_grad: true
+neck:
+  type: FPN
+  in_channels:
+    - 24
+    - 48
+    - 120
+    - 352
+  out_channels: 80
+  num_outs: 5
+rpn_head:
+  type: RPNHead
+  in_channels: 80
+  feat_channels: 80
+  anchor_generator:
+    type: AnchorGenerator
+    scales:
+      - 8
+    ratios:
+      - 0.5
+      - 1.0
+      - 2.0
+    strides:
+      - 4
+      - 8
+      - 16
+      - 32
+      - 64
+  bbox_coder:
+    type: DeltaXYWHBBoxCoder
+    target_means:
+      - 0.0
+      - 0.0
+      - 0.0
+      - 0.0
+    target_stds:
+      - 1.0
+      - 1.0
+      - 1.0
+      - 1.0
+  loss_cls:
+    type: CrossEntropyLoss
+    use_sigmoid: true
+    loss_weight: 1.0
+  loss_bbox:
+    type: L1Loss
+    loss_weight: 1.0
+roi_head:
+  type: StandardRoIHead
+  bbox_roi_extractor:
+    type: SingleRoIExtractor
+    roi_layer:
+      type: RoIAlign
+      output_size: 7
+      sampling_ratio: 0
+    out_channels: 80
+    featmap_strides:
+      - 4
+      - 8
+      - 16
+      - 32
+  bbox_head:
+    type: Shared2FCBBoxHead
+    in_channels: 80
+    fc_out_channels: 1024
+    roi_feat_size: 7
+    num_classes: 80
+    bbox_coder:
+      type: DeltaXYWHBBoxCoder
+      target_means:
+        - 0.0
+        - 0.0
+        - 0.0
+        - 0.0
+      target_stds:
+        - 0.1
+        - 0.1
+        - 0.2
+        - 0.2
+    reg_class_agnostic: false
+    loss_cls:
+      type: CrossEntropyLoss
+      use_sigmoid: false
+      loss_weight: 1.0
+    loss_bbox:
+      type: L1Loss
+      loss_weight: 1.0
+  mask_roi_extractor:
+    type: SingleRoIExtractor
+    roi_layer:
+      type: RoIAlign
+      output_size: 14
+      sampling_ratio: 0
+    out_channels: 80
+    featmap_strides:
+      - 4
+      - 8
+      - 16
+      - 32
+  mask_head:
+    type: FCNMaskHead
+    num_convs: 4
+    in_channels: 80
+    conv_out_channels: 80
+    num_classes: 80
+    loss_mask:
+      type: CrossEntropyLoss
+      use_mask: true
+      loss_weight: 1.0
+train_cfg:
+  rpn:
+    assigner:
+      type: MaxIoUAssigner
+      pos_iou_thr: 0.7
+      neg_iou_thr: 0.3
+      min_pos_iou: 0.3
+      match_low_quality: true
+      ignore_iof_thr: -1
+      gpu_assign_thr: 300
+    sampler:
+      type: RandomSampler
+      num: 256
+      pos_fraction: 0.5
+      neg_pos_ub: -1
+      add_gt_as_proposals: false
+    allowed_border: -1
+    pos_weight: -1
+    debug: false
+  rpn_proposal:
+    nms_across_levels: false
+    nms_pre: 2000
+    max_per_img: 1000
+    nms:
+      type: nms
+      iou_threshold: 0.8
+    min_bbox_size: 0
+  rcnn:
+    assigner:
+      type: MaxIoUAssigner
+      pos_iou_thr: 0.5
+      neg_iou_thr: 0.5
+      min_pos_iou: 0.5
+      match_low_quality: true
+      ignore_iof_thr: -1
+      gpu_assign_thr: 300
+    sampler:
+      type: RandomSampler
+      num: 256
+      pos_fraction: 0.25
+      neg_pos_ub: -1
+      add_gt_as_proposals: true
+    mask_size: 28
+    pos_weight: -1
+    debug: false
+test_cfg:
+  rpn:
+    nms_across_levels: false
+    nms_pre: 800
+    max_per_img: 500
+    nms:
+      type: nms
+      iou_threshold: 0.8
+    min_bbox_size: 0
+  rcnn:
+    score_thr: 0.05
+    nms:
+      type: nms
+      iou_threshold: 0.7
+    max_per_img: 500
+    mask_thr_binary: 0.5
diff --git a/src/otx/algo/instance_segmentation/mmconfigs/maskrcnn_r50.yaml b/src/otx/algo/instance_segmentation/mmconfigs/maskrcnn_r50.yaml
new file mode 100644
index 00000000000..7d9534e14f1
--- /dev/null
+++ b/src/otx/algo/instance_segmentation/mmconfigs/maskrcnn_r50.yaml
@@ -0,0 +1,198 @@
+load_from: https://download.openmmlab.com/mmdetection/v2.0/mask_rcnn/mask_rcnn_r50_fpn_mstrain-poly_3x_coco/mask_rcnn_r50_fpn_mstrain-poly_3x_coco_20210524_201154-21b550bb.pth
+backbone:
+  depth: 50
+  frozen_stages: 1
+  init_cfg:
+    checkpoint: "torchvision://resnet50"
+    type: "Pretrained"
+  norm_cfg:
+    requires_grad: true
+    type: "BN"
+  norm_eval: true
+  num_stages: 4
+  out_indices:
+    - 0
+    - 1
+    - 2
+    - 3
+  style: "pytorch"
+  type: "ResNet"
+data_preprocessor:
+  bgr_to_rgb: false
+  mean:
+    - 123.675
+    - 116.28
+    - 103.53
+  pad_mask: true
+  pad_size_divisor: 32
+  std:
+    - 58.395
+    - 57.12
+    - 57.375
+  type: "DetDataPreprocessor"
+neck:
+  in_channels:
+    - 256
+    - 512
+    - 1024
+    - 2048
+  num_outs: 5
+  out_channels: 256
+  type: "FPN"
+roi_head:
+  bbox_head:
+    bbox_coder:
+      target_means:
+        - 0.0
+        - 0.0
+        - 0.0
+        - 0.0
+      target_stds:
+        - 0.1
+        - 0.1
+        - 0.2
+        - 0.2
+      type: "DeltaXYWHBBoxCoder"
+    fc_out_channels: 1024
+    in_channels: 256
+    loss_bbox:
+      loss_weight: 1.0
+      type: "L1Loss"
+    loss_cls:
+      loss_weight: 1.0
+      type: "CrossEntropyLoss"
+      use_sigmoid: false
+    num_classes: 5
+    reg_class_agnostic: false
+    roi_feat_size: 7
+    type: "Shared2FCBBoxHead"
+  bbox_roi_extractor:
+    featmap_strides:
+      - 4
+      - 8
+      - 16
+      - 32
+    out_channels: 256
+    roi_layer:
+      output_size: 7
+      sampling_ratio: 0
+      type: "RoIAlign"
+    type: "SingleRoIExtractor"
+  mask_head:
+    conv_out_channels: 256
+    in_channels: 256
+    loss_mask:
+      loss_weight: 1.0
+      type: "CrossEntropyLoss"
+      use_mask: true
+    num_classes: 5
+    num_convs: 4
+    type: "FCNMaskHead"
+  mask_roi_extractor:
+    featmap_strides:
+      - 4
+      - 8
+      - 16
+      - 32
+    out_channels: 256
+    roi_layer:
+      output_size: 14
+      sampling_ratio: 0
+      type: "RoIAlign"
+    type: "SingleRoIExtractor"
+  type: "StandardRoIHead"
+rpn_head:
+  anchor_generator:
+    ratios:
+      - 0.5
+      - 1.0
+      - 2.0
+    scales:
+      - 8
+    strides:
+      - 4
+      - 8
+      - 16
+      - 32
+      - 64
+    type: "AnchorGenerator"
+  bbox_coder:
+    target_means:
+      - 0.0
+      - 0.0
+      - 0.0
+      - 0.0
+    target_stds:
+      - 1.0
+      - 1.0
+      - 1.0
+      - 1.0
+    type: "DeltaXYWHBBoxCoder"
+  feat_channels: 256
+  in_channels: 256
+  loss_bbox:
+    loss_weight: 1.0
+    type: "L1Loss"
+  loss_cls:
+    loss_weight: 1.0
+    type: "CrossEntropyLoss"
+    use_sigmoid: true
+  type: "RPNHead"
+test_cfg:
+  rcnn:
+    mask_thr_binary: 0.5
+    max_per_img: 100
+    nms:
+      iou_threshold: 0.5
+      type: "nms"
+    score_thr: 0.05
+  rpn:
+    max_per_img: 1000
+    min_bbox_size: 0
+    nms:
+      iou_threshold: 0.7
+      type: "nms"
+    nms_pre: 1000
+train_cfg:
+  rcnn:
+    assigner:
+      ignore_iof_thr: -1
+      match_low_quality: true
+      min_pos_iou: 0.5
+      neg_iou_thr: 0.5
+      pos_iou_thr: 0.5
+      type: "MaxIoUAssigner"
+    debug: false
+    mask_size: 28
+    pos_weight: -1
+    sampler:
+      add_gt_as_proposals: true
+      neg_pos_ub: -1
+      num: 512
+      pos_fraction: 0.25
+      type: "RandomSampler"
+  rpn:
+    allowed_border: -1
+    assigner:
+      ignore_iof_thr: -1
+      match_low_quality: true
+      min_pos_iou: 0.3
+      neg_iou_thr: 0.3
+      pos_iou_thr: 0.7
+      type: "MaxIoUAssigner"
+    debug: false
+    pos_weight: -1
+    sampler:
+      add_gt_as_proposals: false
+      neg_pos_ub: -1
+      num: 256
+      pos_fraction: 0.5
+      type: "RandomSampler"
+  rpn_proposal:
+    max_per_img: 1000
+    min_bbox_size: 0
+    nms:
+      iou_threshold: 0.7
+      type: "nms"
+    nms_pre: 2000
+type: "MaskRCNN"
diff --git a/src/otx/algo/instance_segmentation/mmconfigs/maskrcnn_swint.yaml b/src/otx/algo/instance_segmentation/mmconfigs/maskrcnn_swint.yaml
new file mode 100644
index 00000000000..a6a8459cc97
--- /dev/null
+++ b/src/otx/algo/instance_segmentation/mmconfigs/maskrcnn_swint.yaml
@@ -0,0 +1,211 @@
+load_from: https://download.openmmlab.com/mmdetection/v2.0/swin/mask_rcnn_swin-t-p4-w7_fpn_fp16_ms-crop-3x_coco/mask_rcnn_swin-t-p4-w7_fpn_fp16_ms-crop-3x_coco_20210908_165006-90a4008c.pth
+backbone:
+  attn_drop_rate: 0.0
+  convert_weights: true
+  depths:
+    - 2
+    - 2
+    - 6
+    - 2
+  drop_path_rate: 0.2
+  drop_rate: 0.0
+  embed_dims: 96
+  init_cfg:
+    checkpoint: https://github.com/SwinTransformer/storage/releases/download/v1.0.0/swin_tiny_patch4_window7_224.pth
+    type: Pretrained
+  mlp_ratio: 4
+  num_heads:
+    - 3
+    - 6
+    - 12
+    - 24
+  out_indices:
+    - 0
+    - 1
+    - 2
+    - 3
+  patch_norm: true
+  qk_scale: null
+  qkv_bias: true
+  type: SwinTransformer
+  window_size: 7
+  with_cp: false
+data_preprocessor:
+  bgr_to_rgb: false
+  mean:
+    - 123.675
+    - 116.28
+    - 103.53
+  pad_mask: true
+  pad_size_divisor: 32
+  std:
+    - 58.395
+    - 57.12
+    - 57.375
+  type: DetDataPreprocessor
+neck:
+  in_channels:
+    - 96
+    - 192
+    - 384
+    - 768
+  num_outs: 5
+  out_channels: 256
+  type: FPN
+roi_head:
+  bbox_head:
+    bbox_coder:
+      target_means:
+        - 0.0
+        - 0.0
+        - 0.0
+        - 0.0
+      target_stds:
+        - 0.1
+        - 0.1
+        - 0.2
+        - 0.2
+      type: DeltaXYWHBBoxCoder
+    fc_out_channels: 1024
+    in_channels: 256
+    loss_bbox:
+      loss_weight: 1.0
+      type: L1Loss
+    loss_cls:
+      loss_weight: 1.0
+      type: CrossEntropyLoss
+      use_sigmoid: false
+    num_classes: 80
+    reg_class_agnostic: false
+    roi_feat_size: 7
+    type: Shared2FCBBoxHead
+  bbox_roi_extractor:
+    featmap_strides:
+      - 4
+      - 8
+      - 16
+      - 32
+    out_channels: 256
+    roi_layer:
+      output_size: 7
+      sampling_ratio: 0
+      type: RoIAlign
+    type: SingleRoIExtractor
+  mask_head:
+    conv_out_channels: 256
+    in_channels: 256
+    loss_mask:
+      loss_weight: 1.0
+      type: CrossEntropyLoss
+      use_mask: true
+    num_classes: 80
+    num_convs: 4
+    type: FCNMaskHead
+  mask_roi_extractor:
+    featmap_strides:
+      - 4
+      - 8
+      - 16
+      - 32
+    out_channels: 256
+    roi_layer:
+      output_size: 14
+      sampling_ratio: 0
+      type: RoIAlign
+    type: SingleRoIExtractor
+  type: StandardRoIHead
+rpn_head:
+  anchor_generator:
+    ratios:
+      - 0.5
+      - 1.0
+      - 2.0
+    scales:
+      - 8
+    strides:
+      - 4
+      - 8
+      - 16
+      - 32
+      - 64
+    type: AnchorGenerator
+  bbox_coder:
+    target_means:
+      - 0.0
+      - 0.0
+      - 0.0
+      - 0.0
+    target_stds:
+      - 1.0
+      - 1.0
+      - 1.0
+      - 1.0
+    type: DeltaXYWHBBoxCoder
+  feat_channels: 256
+  in_channels: 256
+  loss_bbox:
+    loss_weight: 1.0
+    type: L1Loss
+  loss_cls:
+    loss_weight: 1.0
+    type: CrossEntropyLoss
+    use_sigmoid: true
+  type: RPNHead
+test_cfg:
+  rcnn:
+    mask_thr_binary: 0.5
+    max_per_img: 100
+    nms:
+      iou_threshold: 0.5
+      type: nms
+    score_thr: 0.05
+  rpn:
+    max_per_img: 1000
+    min_bbox_size: 0
+    nms:
+      iou_threshold: 0.7
+      type: nms
+    nms_pre: 1000
+train_cfg:
+  rcnn:
+    assigner:
+      ignore_iof_thr: -1
+      match_low_quality: true
+      min_pos_iou: 0.5
+      neg_iou_thr: 0.5
+      pos_iou_thr: 0.5
+      type: MaxIoUAssigner
+    debug: false
+    mask_size: 28
+    pos_weight: -1
+    sampler:
+      add_gt_as_proposals: true
+      neg_pos_ub: -1
+      num: 512
+      pos_fraction: 0.25
+      type: RandomSampler
+  rpn:
+    allowed_border: -1
+    assigner:
+      ignore_iof_thr: -1
+      match_low_quality: true
+      min_pos_iou: 0.3
+      neg_iou_thr: 0.3
+      pos_iou_thr: 0.7
+      type: MaxIoUAssigner
+    debug: false
+    pos_weight: -1
+    sampler:
+      add_gt_as_proposals: false
+      neg_pos_ub: -1
+      num: 256
+      pos_fraction: 0.5
+      type: RandomSampler
+  rpn_proposal:
+    max_per_img: 1000
+    min_bbox_size: 0
+    nms:
+      iou_threshold: 0.7
+      type: nms
+    nms_pre: 2000
+type: MaskRCNN
diff --git a/src/otx/config/model/mmdet_inst_seg.yaml b/src/otx/config/model/mmdet_inst_seg.yaml
index 716e1555e39..8a1bc66deb2 100644
--- a/src/otx/config/model/mmdet_inst_seg.yaml
+++ b/src/otx/config/model/mmdet_inst_seg.yaml
@@ -17,7 +17,6 @@ scheduler:
 
 otx_model:
   _target_: otx.core.model.entity.instance_segmentation.MMDetInstanceSegCompatibleModel
-  config: ???
 
 # compile model for faster training with pytorch 2.0
 torch_compile: false
diff --git a/src/otx/config/model/mmseg.yaml b/src/otx/config/model/mmseg.yaml
index 7072fee469e..0c7f6b93b70 100644
--- a/src/otx/config/model/mmseg.yaml
+++ b/src/otx/config/model/mmseg.yaml
@@ -13,7 +13,6 @@ scheduler:
 
 otx_model:
   _target_: otx.core.model.entity.segmentation.MMSegCompatibleModel
-  config: ???
   num_classes: ???
 
 # compile model for faster training with pytorch 2.0
diff --git a/src/otx/recipe/instance_segmentation/maskrcnn_efficientnetb2b.yaml b/src/otx/recipe/instance_segmentation/maskrcnn_efficientnetb2b.yaml
index e6088dfe1c6..446996a4a07 100644
--- a/src/otx/recipe/instance_segmentation/maskrcnn_efficientnetb2b.yaml
+++ b/src/otx/recipe/instance_segmentation/maskrcnn_efficientnetb2b.yaml
@@ -65,205 +65,8 @@ data:
 
 model:
   otx_model:
-    config:
-      load_from: https://storage.openvinotoolkit.org/repositories/openvino_training_extensions/models/instance_segmentation/v2/efficientnet_b2b-mask_rcnn-576x576.pth
-      data_preprocessor:
-        type: "DetDataPreprocessor"
-        bgr_to_rgb: false
-        mean:
-          - 123.675
-          - 116.28
-          - 103.53
-        pad_mask: true
-        pad_size_divisor: 32
-        std:
-          - 1.0
-          - 1.0
-          - 1.0
-      type: MaskRCNN
-      backbone:
-        type: efficientnet_b2b
-        out_indices:
-          - 2
-          - 3
-          - 4
-          - 5
-        frozen_stages: -1
-        pretrained: true
-        activation_cfg:
-          type: torch_swish
-        norm_cfg:
-          type: BN
-          requires_grad: true
-      neck:
-        type: FPN
-        in_channels:
-          - 24
-          - 48
-          - 120
-          - 352
-        out_channels: 80
-        num_outs: 5
-      rpn_head:
-        type: RPNHead
-        in_channels: 80
-        feat_channels: 80
-        anchor_generator:
-          type: AnchorGenerator
-          scales:
-            - 8
-          ratios:
-            - 0.5
-            - 1.0
-            - 2.0
-          strides:
-            - 4
-            - 8
-            - 16
-            - 32
-            - 64
-        bbox_coder:
-          type: DeltaXYWHBBoxCoder
-          target_means:
-            - 0.0
-            - 0.0
-            - 0.0
-            - 0.0
-          target_stds:
-            - 1.0
-            - 1.0
-            - 1.0
-            - 1.0
-        loss_cls:
-          type: CrossEntropyLoss
-          use_sigmoid: true
-          loss_weight: 1.0
-        loss_bbox:
-          type: L1Loss
-          loss_weight: 1.0
-      roi_head:
-        type: StandardRoIHead
-        bbox_roi_extractor:
-          type: SingleRoIExtractor
-          roi_layer:
-            type: RoIAlign
-            output_size: 7
-            sampling_ratio: 0
-          out_channels: 80
-          featmap_strides:
-            - 4
-            - 8
-            - 16
-            - 32
-        bbox_head:
-          type: Shared2FCBBoxHead
-          in_channels: 80
-          fc_out_channels: 1024
-          roi_feat_size: 7
-          num_classes: 80
-          bbox_coder:
-            type: DeltaXYWHBBoxCoder
-            target_means:
-              - 0.0
-              - 0.0
-              - 0.0
-              - 0.0
-            target_stds:
-              - 0.1
-              - 0.1
-              - 0.2
-              - 0.2
-          reg_class_agnostic: false
-          loss_cls:
-            type: CrossEntropyLoss
-            use_sigmoid: false
-            loss_weight: 1.0
-          loss_bbox:
-            type: L1Loss
-            loss_weight: 1.0
-        mask_roi_extractor:
-          type: SingleRoIExtractor
-          roi_layer:
-            type: RoIAlign
-            output_size: 14
-            sampling_ratio: 0
-          out_channels: 80
-          featmap_strides:
-            - 4
-            - 8
-            - 16
-            - 32
-        mask_head:
-          type: FCNMaskHead
-          num_convs: 4
-          in_channels: 80
-          conv_out_channels: 80
-          num_classes: 80
-          loss_mask:
-            type: CrossEntropyLoss
-            use_mask: true
-            loss_weight: 1.0
-      train_cfg:
-        rpn:
-          assigner:
-            type: MaxIoUAssigner
-            pos_iou_thr: 0.7
-            neg_iou_thr: 0.3
-            min_pos_iou: 0.3
-            match_low_quality: true
-            ignore_iof_thr: -1
-            gpu_assign_thr: 300
-          sampler:
-            type: RandomSampler
-            num: 256
-            pos_fraction: 0.5
-            neg_pos_ub: -1
-            add_gt_as_proposals: false
-          allowed_border: -1
-          pos_weight: -1
-          debug: false
-        rpn_proposal:
-          nms_across_levels: false
-          nms_pre: 2000
-          max_per_img: 1000
-          nms:
-            type: nms
-            iou_threshold: 0.8
-          min_bbox_size: 0
-        rcnn:
-          assigner:
-            type: MaxIoUAssigner
-            pos_iou_thr: 0.5
-            neg_iou_thr: 0.5
-            min_pos_iou: 0.5
-            match_low_quality: true
-            ignore_iof_thr: -1
-            gpu_assign_thr: 300
-          sampler:
-            type: RandomSampler
-            num: 256
-            pos_fraction: 0.25
-            neg_pos_ub: -1
-            add_gt_as_proposals: true
-          mask_size: 28
-          pos_weight: -1
-          debug: false
-      test_cfg:
-        rpn:
-          nms_across_levels: false
-          nms_pre: 800
-          max_per_img: 500
-          nms:
-            type: nms
-            iou_threshold: 0.8
-          min_bbox_size: 0
-        rcnn:
-          score_thr: 0.05
-          nms:
-            type: nms
-            iou_threshold: 0.7
-          max_per_img: 500
-          mask_thr_binary: 0.5
+    _target_: otx.algo.instance_segmentation.maskrcnn.MaskRCNN
+    variant: efficientnetb2b
   optimizer:
     lr: 0.007
     weight_decay: 0.001
diff --git a/src/otx/recipe/instance_segmentation/maskrcnn_r50.yaml b/src/otx/recipe/instance_segmentation/maskrcnn_r50.yaml
index a0b01837d0e..03181a9a483 100644
--- a/src/otx/recipe/instance_segmentation/maskrcnn_r50.yaml
+++ b/src/otx/recipe/instance_segmentation/maskrcnn_r50.yaml
@@ -65,205 +65,8 @@ data:
 
 model:
   otx_model:
-    config:
-      load_from: https://download.openmmlab.com/mmdetection/v2.0/mask_rcnn/mask_rcnn_r50_fpn_mstrain-poly_3x_coco/mask_rcnn_r50_fpn_mstrain-poly_3x_coco_20210524_201154-21b550bb.pth
-      backbone:
-        depth: 50
-        frozen_stages: 1
-        init_cfg:
-          checkpoint: "torchvision://resnet50"
-          type: "Pretrained"
-        norm_cfg:
-          requires_grad: true
-          type: "BN"
-        norm_eval: true
-        num_stages: 4
-        out_indices:
-          - 0
-          - 1
-          - 2
-          - 3
-        style: "pytorch"
-        type: "ResNet"
-      data_preprocessor:
-        bgr_to_rgb: false
-        mean:
-          - 123.675
-          - 116.28
-          - 103.53
-        pad_mask: true
-        pad_size_divisor: 32
-        std:
-          - 58.395
-          - 57.12
-          - 57.375
-        type: "DetDataPreprocessor"
-      neck:
-        in_channels:
-          - 256
-          - 512
-          - 1024
-          - 2048
-        num_outs: 5
-        out_channels: 256
-        type: "FPN"
-      roi_head:
-        bbox_head:
-          bbox_coder:
-            target_means:
-              - 0.0
-              - 0.0
-              - 0.0
-              - 0.0
-            target_stds:
-              - 0.1
-              - 0.1
-              - 0.2
-              - 0.2
-            type: "DeltaXYWHBBoxCoder"
-          fc_out_channels: 1024
-          in_channels: 256
-          loss_bbox:
-            loss_weight: 1.0
-            type: "L1Loss"
-          loss_cls:
-            loss_weight: 1.0
-            type: "CrossEntropyLoss"
-            use_sigmoid: false
-          num_classes: 5
-          reg_class_agnostic: false
-          roi_feat_size: 7
-          type: "Shared2FCBBoxHead"
-        bbox_roi_extractor:
-          featmap_strides:
-            - 4
-            - 8
-            - 16
-            - 32
-          out_channels: 256
-          roi_layer:
-            output_size: 7
-            sampling_ratio: 0
-            type: "RoIAlign"
-          type: "SingleRoIExtractor"
-        mask_head:
-          conv_out_channels: 256
-          in_channels: 256
-          loss_mask:
-            loss_weight: 1.0
-            type: "CrossEntropyLoss"
-            use_mask: true
-          num_classes: 5
-          num_convs: 4
-          type: "FCNMaskHead"
-        mask_roi_extractor:
-          featmap_strides:
-            - 4
-            - 8
-            - 16
-            - 32
-          out_channels: 256
-          roi_layer:
-            output_size: 14
-            sampling_ratio: 0
-            type: "RoIAlign"
-          type: "SingleRoIExtractor"
-        type: "StandardRoIHead"
-      rpn_head:
-        anchor_generator:
-          ratios:
-            - 0.5
-            - 1.0
-            - 2.0
-          scales:
-            - 8
-          strides:
-            - 4
-            - 8
-            - 16
-            - 32
-            - 64
-          type: "AnchorGenerator"
-        bbox_coder:
-          target_means:
-            - 0.0
-            - 0.0
-            - 0.0
-            - 0.0
-          target_stds:
-            - 1.0
-            - 1.0
-            - 1.0
-            - 1.0
-          type: "DeltaXYWHBBoxCoder"
-        feat_channels: 256
-        in_channels: 256
-        loss_bbox:
-          loss_weight: 1.0
-          type: "L1Loss"
-        loss_cls:
-          loss_weight: 1.0
-          type: "CrossEntropyLoss"
-          use_sigmoid: true
-        type: "RPNHead"
-      test_cfg:
-        rcnn:
-          mask_thr_binary: 0.5
-          max_per_img: 100
-          nms:
-            iou_threshold: 0.5
-            type: "nms"
-          score_thr: 0.05
-        rpn:
-          max_per_img: 1000
-          min_bbox_size: 0
-          nms:
-            iou_threshold: 0.7
-            type: "nms"
-          nms_pre: 1000
-      train_cfg:
-        rcnn:
-          assigner:
-            ignore_iof_thr: -1
-            match_low_quality: true
-            min_pos_iou: 0.5
-            neg_iou_thr: 0.5
-            pos_iou_thr: 0.5
-            type: "MaxIoUAssigner"
-          debug: false
-          mask_size: 28
-          pos_weight: -1
-          sampler:
-            add_gt_as_proposals: true
-            neg_pos_ub: -1
-            num: 512
-            pos_fraction: 0.25
-            type: "RandomSampler"
-        rpn:
-          allowed_border: -1
-          assigner:
-            ignore_iof_thr: -1
-            match_low_quality: true
-            min_pos_iou: 0.3
-            neg_iou_thr: 0.3
-            pos_iou_thr: 0.7
-            type: "MaxIoUAssigner"
-          debug: false
-          pos_weight: -1
-          sampler:
-            add_gt_as_proposals: false
-            neg_pos_ub: -1
-            num: 256
-            pos_fraction: 0.5
-            type: "RandomSampler"
-        rpn_proposal:
-          max_per_img: 1000
-          min_bbox_size: 0
-          nms:
-            iou_threshold: 0.7
-            type: "nms"
-          nms_pre: 2000
-      type: "MaskRCNN"
+    _target_: otx.algo.instance_segmentation.maskrcnn.MaskRCNN
+    variant: r50
   optimizer:
     lr: 0.007
     weight_decay: 0.001
diff --git a/src/otx/recipe/instance_segmentation/maskrcnn_swint.yaml b/src/otx/recipe/instance_segmentation/maskrcnn_swint.yaml
index b6fa877887d..f933b053765 100644
--- a/src/otx/recipe/instance_segmentation/maskrcnn_swint.yaml
+++ b/src/otx/recipe/instance_segmentation/maskrcnn_swint.yaml
@@ -65,218 +65,8 @@ data:
 
 model:
   otx_model:
-    config:
-      load_from: https://download.openmmlab.com/mmdetection/v2.0/swin/mask_rcnn_swin-t-p4-w7_fpn_fp16_ms-crop-3x_coco/mask_rcnn_swin-t-p4-w7_fpn_fp16_ms-crop-3x_coco_20210908_165006-90a4008c.pth
-      backbone:
-        attn_drop_rate: 0.0
-        convert_weights: true
-        depths:
-          - 2
-          - 2
-          - 6
-          - 2
-        drop_path_rate: 0.2
-        drop_rate: 0.0
-        embed_dims: 96
-        init_cfg:
-          checkpoint: https://github.com/SwinTransformer/storage/releases/download/v1.0.0/swin_tiny_patch4_window7_224.pth
-          type: Pretrained
-        mlp_ratio: 4
-        num_heads:
-          - 3
-          - 6
-          - 12
-          - 24
-        out_indices:
-          - 0
-          - 1
-          - 2
-          - 3
-        patch_norm: true
-        qk_scale: null
-        qkv_bias: true
-        type: SwinTransformer
-        window_size: 7
-        with_cp: false
-      data_preprocessor:
-        bgr_to_rgb: false
-        mean:
-          - 123.675
-          - 116.28
-          - 103.53
-        pad_mask: true
-        pad_size_divisor: 32
-        std:
-          - 58.395
-          - 57.12
-          - 57.375
-        type: DetDataPreprocessor
-      neck:
-        in_channels:
-          - 96
-          - 192
-          - 384
-          - 768
-        num_outs: 5
-        out_channels: 256
-        type: FPN
-      roi_head:
-        bbox_head:
-          bbox_coder:
-            target_means:
-              - 0.0
-              - 0.0
-              - 0.0
-              - 0.0
-            target_stds:
-              - 0.1
-              - 0.1
-              - 0.2
-              - 0.2
-            type: DeltaXYWHBBoxCoder
-          fc_out_channels: 1024
-          in_channels: 256
-          loss_bbox:
-            loss_weight: 1.0
-            type: L1Loss
-          loss_cls:
-            loss_weight: 1.0
-            type: CrossEntropyLoss
-            use_sigmoid: false
-          num_classes: 80
-          reg_class_agnostic: false
-          roi_feat_size: 7
-          type: Shared2FCBBoxHead
-        bbox_roi_extractor:
-          featmap_strides:
-            - 4
-            - 8
-            - 16
-            - 32
-          out_channels: 256
-          roi_layer:
-            output_size: 7
-            sampling_ratio: 0
-            type: RoIAlign
-          type: SingleRoIExtractor
-        mask_head:
-          conv_out_channels: 256
-          in_channels: 256
-          loss_mask:
-            loss_weight: 1.0
-            type: CrossEntropyLoss
-            use_mask: true
-          num_classes: 80
-          num_convs: 4
-          type: FCNMaskHead
-        mask_roi_extractor:
-          featmap_strides:
-            - 4
-            - 8
-            - 16
-            - 32
-          out_channels: 256
-          roi_layer:
-            output_size: 14
-            sampling_ratio: 0
-            type: RoIAlign
-          type: SingleRoIExtractor
-        type: StandardRoIHead
-      rpn_head:
-        anchor_generator:
-          ratios:
-            - 0.5
-            - 1.0
-            - 2.0
-          scales:
-            - 8
-          strides:
-            - 4
-            - 8
-            - 16
-            - 32
-            - 64
-          type: AnchorGenerator
-        bbox_coder:
-          target_means:
-            - 0.0
-            - 0.0
-            - 0.0
-            - 0.0
-          target_stds:
-            - 1.0
-            - 1.0
-            - 1.0
-            - 1.0
-          type: DeltaXYWHBBoxCoder
-        feat_channels: 256
-        in_channels: 256
-        loss_bbox:
-          loss_weight: 1.0
-          type: L1Loss
-        loss_cls:
-          loss_weight: 1.0
-          type: CrossEntropyLoss
-          use_sigmoid: true
-        type: RPNHead
-      test_cfg:
-        rcnn:
-          mask_thr_binary: 0.5
-          max_per_img: 100
-          nms:
-            iou_threshold: 0.5
-            type: nms
-          score_thr: 0.05
-        rpn:
-          max_per_img: 1000
-          min_bbox_size: 0
-          nms:
-            iou_threshold: 0.7
-            type: nms
-          nms_pre: 1000
-      train_cfg:
-        rcnn:
-          assigner:
-            ignore_iof_thr: -1
-            match_low_quality: true
-            min_pos_iou: 0.5
-            neg_iou_thr: 0.5
-            pos_iou_thr: 0.5
-            type: MaxIoUAssigner
-          debug: false
-          mask_size: 28
-          pos_weight: -1
-          sampler:
-            add_gt_as_proposals: true
-            neg_pos_ub: -1
-            num: 512
-            pos_fraction: 0.25
-            type: RandomSampler
-        rpn:
-          allowed_border: -1
-          assigner:
-            ignore_iof_thr: -1
-            match_low_quality: true
-            min_pos_iou: 0.3
-            neg_iou_thr: 0.3
-            pos_iou_thr: 0.7
-            type: MaxIoUAssigner
-          debug: false
-          pos_weight: -1
-          sampler:
-            add_gt_as_proposals: false
-            neg_pos_ub: -1
-            num: 256
-            pos_fraction: 0.5
-            type: RandomSampler
-        rpn_proposal:
-          max_per_img: 1000
-          min_bbox_size: 0
-          nms:
-            iou_threshold: 0.7
-            type: nms
-          nms_pre: 2000
-      type: MaskRCNN
+    _target_: otx.algo.instance_segmentation.maskrcnn.MaskRCNN
+    variant: swint
   optimizer:
     _target_: torch.optim.AdamW
     _partial_: true