From 9d5818a6178fbbf0d1c88bf495997fae49919ff9 Mon Sep 17 00:00:00 2001 From: "Kim, Vinnam" Date: Thu, 11 Jan 2024 13:40:51 +0900 Subject: [PATCH 1/8] Add export() and register_explain_hook() to OTXModel Signed-off-by: Kim, Vinnam --- src/otx/core/model/entity/base.py | 48 +++++++++++++++++++++++++++++++ src/otx/core/model/module/base.py | 12 ++++++++ src/otx/core/types/export.py | 16 +++++++++++ 3 files changed, 76 insertions(+) create mode 100644 src/otx/core/types/export.py diff --git a/src/otx/core/model/entity/base.py b/src/otx/core/model/entity/base.py index 19a8cff1a31..dea02ecb65e 100644 --- a/src/otx/core/model/entity/base.py +++ b/src/otx/core/model/entity/base.py @@ -15,8 +15,11 @@ T_OTXBatchDataEntity, T_OTXBatchPredEntity, ) +from otx.core.types.export import OTXExportFormat if TYPE_CHECKING: + from pathlib import Path + import torch @@ -116,3 +119,48 @@ def map_class_names(src_classes: list[str], dst_classes: list[str]) -> list[int] else: src2dst.append(-1) return src2dst + + def export(self, output_dir: Path, export_format: OTXExportFormat) -> None: + """Export this model to the specified output directory. + + Args: + output_dir: Directory path to save exported binary files. + export_format: Format in which this `OTXModel` is exported. + """ + if export_format == OTXExportFormat.OPENVINO: + self._export_to_openvino(output_dir) + if export_format == OTXExportFormat.ONNX: + self._export_to_onnx() + if export_format == OTXExportFormat.EXPORTABLE_CODE: + self._export_to_exportable_code() + + def _export_to_openvino(self, output_dir: Path) -> None: + """Export to OpenVINO Intermediate Representation format. + + Args: + output_dir: Directory path to save exported binary files + """ + raise NotImplementedError + + def _export_to_onnx(self) -> None: + """Export to ONNX format. + + Args: + output_dir: Directory path to save exported binary files + """ + raise NotImplementedError + + def _export_to_exportable_code(self) -> None: + """Export to exportable code format. + + Args: + output_dir: Directory path to save exported binary files + """ + raise NotImplementedError + + def register_explain_hook(self) -> None: + """Register explain hook. + + TBD + """ + raise NotImplementedError diff --git a/src/otx/core/model/module/base.py b/src/otx/core/model/module/base.py index 2305ed2715a..f1a85943705 100644 --- a/src/otx/core/model/module/base.py +++ b/src/otx/core/model/module/base.py @@ -14,8 +14,11 @@ from otx.core.data.entity.base import OTXBatchDataEntity from otx.core.model.entity.base import OTXModel +from otx.core.types.export import OTXExportFormat if TYPE_CHECKING: + from pathlib import Path + from otx.core.data.dataset.base import DataMetaInfo @@ -172,3 +175,12 @@ def meta_info(self) -> DataMetaInfo: @meta_info.setter def meta_info(self, meta_info: DataMetaInfo) -> None: self._meta_info = meta_info + + def export(self, output_dir: Path, export_format: OTXExportFormat) -> None: + """Export the member `OTXModel` of this module to the specified output directory. + + Args: + output_dir: Directory path to save exported binary files. + export_format: Format in which this `OTXModel` is exported. + """ + self.model.export(output_dir, export_format) diff --git a/src/otx/core/types/export.py b/src/otx/core/types/export.py new file mode 100644 index 00000000000..923c36601c1 --- /dev/null +++ b/src/otx/core/types/export.py @@ -0,0 +1,16 @@ +# Copyright (C) 2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# +"""OTX export type definition.""" + +from __future__ import annotations + +from enum import Enum + + +class OTXExportFormat(str, Enum): + """OTX export type definition.""" + + OPENVINO = "OPENVINO" + ONNX = "ONNX" + EXPORTABLE_CODE = "EXPORTABLE_CODE" From bc6e7fcb47f4c2af54de4b3c1a103d6a0f633fab Mon Sep 17 00:00:00 2001 From: "Kim, Vinnam" Date: Thu, 11 Jan 2024 15:48:19 +0900 Subject: [PATCH 2/8] Add num_classes to OTXModel and revisit DataMetaInfo Signed-off-by: Kim, Vinnam --- src/otx/algo/classification/otx_dino_v2.py | 4 +- src/otx/config/model/default.yaml | 3 ++ src/otx/config/model/mmdet_inst_seg.yaml | 3 ++ src/otx/config/model/mmseg.yaml | 1 + src/otx/core/data/dataset/base.py | 26 ++++++++--- src/otx/core/data/dataset/classification.py | 6 +-- src/otx/core/data/module.py | 6 +-- .../model/entity/action_classification.py | 6 ++- src/otx/core/model/entity/action_detection.py | 6 ++- src/otx/core/model/entity/base.py | 46 ++++++++++++++++++- src/otx/core/model/entity/classification.py | 21 +++++---- src/otx/core/model/entity/detection.py | 10 ++-- .../model/entity/instance_segmentation.py | 11 +++-- src/otx/core/model/entity/segmentation.py | 11 +++-- src/otx/core/model/module/base.py | 24 +++++----- src/otx/core/model/module/classification.py | 8 ++-- src/otx/core/utils/build.py | 2 +- src/otx/core/utils/config.py | 35 ++++++++++++-- src/otx/recipe/detection/openvino_model.yaml | 1 + .../instance_segmentation/openvino_model.yaml | 1 + .../openvino_model.yaml | 1 + .../recipe/segmentation/openvino_model.yaml | 1 + tests/integration/cli/test_cli.py | 17 ++++--- tests/regression/test_regression.py | 7 +-- tests/unit/core/model/entity/test_base.py | 4 +- .../core/model/entity/test_segmentation.py | 4 +- 26 files changed, 183 insertions(+), 82 deletions(-) diff --git a/src/otx/algo/classification/otx_dino_v2.py b/src/otx/algo/classification/otx_dino_v2.py index be00231560a..e6c37c43844 100644 --- a/src/otx/algo/classification/otx_dino_v2.py +++ b/src/otx/algo/classification/otx_dino_v2.py @@ -65,9 +65,9 @@ def forward(self, imgs: torch.Tensor, labels: torch.Tensor = None) -> torch.Tens class DINOv2RegisterClassifier(OTXMulticlassClsModel): """DINO-v2 Classification Model with register.""" - def __init__(self, config: DictConfig) -> None: + def __init__(self, num_classes: int, config: DictConfig) -> None: self.config = config - super().__init__() # create the model + super().__init__(num_classes=num_classes) # create the model def _create_model(self) -> nn.Module: """Create the model.""" diff --git a/src/otx/config/model/default.yaml b/src/otx/config/model/default.yaml index 6c54cd1e680..c938721f066 100644 --- a/src/otx/config/model/default.yaml +++ b/src/otx/config/model/default.yaml @@ -10,3 +10,6 @@ scheduler: mode: min factor: 0.1 patience: 10 + +otx_model: + num_classes: ??? diff --git a/src/otx/config/model/mmdet_inst_seg.yaml b/src/otx/config/model/mmdet_inst_seg.yaml index a4a1c9fd4d2..716e1555e39 100644 --- a/src/otx/config/model/mmdet_inst_seg.yaml +++ b/src/otx/config/model/mmdet_inst_seg.yaml @@ -1,3 +1,6 @@ +defaults: + - default + _target_: otx.core.model.module.instance_segmentation.OTXInstanceSegLitModule optimizer: diff --git a/src/otx/config/model/mmseg.yaml b/src/otx/config/model/mmseg.yaml index 72a859130f0..7072fee469e 100644 --- a/src/otx/config/model/mmseg.yaml +++ b/src/otx/config/model/mmseg.yaml @@ -14,6 +14,7 @@ scheduler: otx_model: _target_: otx.core.model.entity.segmentation.MMSegCompatibleModel config: ??? + num_classes: ??? # compile model for faster training with pytorch 2.0 torch_compile: false diff --git a/src/otx/core/data/dataset/base.py b/src/otx/core/data/dataset/base.py index caa4e24b1ba..21981e5d305 100644 --- a/src/otx/core/data/dataset/base.py +++ b/src/otx/core/data/dataset/base.py @@ -30,15 +30,27 @@ @dataclass -class DataMetaInfo: - """Meta information of each subset datasets.""" +class LabelInfo: + """Object to represent label information.""" - class_names: list[str] + label_names: list[str] @property def num_classes(self) -> int: - """Return number of classes.""" - return len(self.class_names) + """Return number of labels.""" + return len(self.label_names) + + @classmethod + def from_num_classes(cls, num_classes: int) -> LabelInfo: + """Create this object from the number of classes. + + Args: + num_classes: Number of classes + + Returns: + LabelInfo(label_names=["label_0", ...]) + """ + return LabelInfo(label_names=[f"label_{idx}" for idx in range(num_classes)]) class OTXDataset(Dataset, Generic[T_OTXDataEntity]): @@ -59,8 +71,8 @@ def __init__( self.mem_cache_img_max_size = mem_cache_img_max_size self.max_refetch = max_refetch - self.meta_info = DataMetaInfo( - class_names=[category.name for category in self.dm_subset.categories()[AnnotationType.label]], + self.meta_info = LabelInfo( + label_names=[category.name for category in self.dm_subset.categories()[AnnotationType.label]], ) def __len__(self) -> int: diff --git a/src/otx/core/data/dataset/classification.py b/src/otx/core/data/dataset/classification.py index 5b996b36c72..0e6d98d3168 100644 --- a/src/otx/core/data/dataset/classification.py +++ b/src/otx/core/data/dataset/classification.py @@ -13,7 +13,7 @@ from datumaro.components.annotation import AnnotationType from torch.nn import functional -from otx.core.data.dataset.base import DataMetaInfo, OTXDataset +from otx.core.data.dataset.base import LabelInfo, OTXDataset from otx.core.data.entity.base import ImageInfo from otx.core.data.entity.classification import ( HlabelClsBatchDataEntity, @@ -27,7 +27,7 @@ @dataclass -class HLabelMetaInfo(DataMetaInfo): +class HLabelMetaInfo(LabelInfo): """Meta information of hlabel classification.""" hlabel_info: HLabelInfo @@ -110,7 +110,7 @@ def __init__(self, **kwargs) -> None: # Hlabel classification used HLabelMetaInfo to insert the HLabelInfo. self.meta_info = HLabelMetaInfo( - class_names=[category.name for category in self.dm_categories], + label_names=[category.name for category in self.dm_categories], hlabel_info=HLabelInfo.from_dm_label_groups(self.dm_categories), ) diff --git a/src/otx/core/data/module.py b/src/otx/core/data/module.py index 98b599f826d..69b24771d7a 100644 --- a/src/otx/core/data/module.py +++ b/src/otx/core/data/module.py @@ -12,7 +12,7 @@ from omegaconf import DictConfig, OmegaConf from torch.utils.data import DataLoader -from otx.core.data.dataset.base import DataMetaInfo +from otx.core.data.dataset.base import LabelInfo from otx.core.data.factory import OTXDatasetFactory from otx.core.data.mem_cache import ( MemCacheHandlerSingleton, @@ -68,7 +68,7 @@ def __init__( mem_size=mem_size, ) - meta_infos: list[DataMetaInfo] = [] + meta_infos: list[LabelInfo] = [] for name, dm_subset in dataset.subsets().items(): if name not in config_mapping: log.warning(f"{name} is not available. Skip it") @@ -91,7 +91,7 @@ def __init__( self.meta_info = next(iter(meta_infos)) - def _is_meta_info_valid(self, meta_infos: list[DataMetaInfo]) -> bool: + def _is_meta_info_valid(self, meta_infos: list[LabelInfo]) -> bool: """Check whether there are mismatches in the metainfo for the all subsets.""" if all(meta_info == meta_infos[0] for meta_info in meta_infos): return True diff --git a/src/otx/core/model/entity/action_classification.py b/src/otx/core/model/entity/action_classification.py index 7ac5ac016bb..048c690ea7c 100644 --- a/src/otx/core/model/entity/action_classification.py +++ b/src/otx/core/model/entity/action_classification.py @@ -14,6 +14,7 @@ from otx.core.data.entity.base import OTXBatchLossEntity from otx.core.model.entity.base import OTXModel from otx.core.utils.build import build_mm_model, get_classification_layers +from otx.core.utils.config import inplace_num_classes if TYPE_CHECKING: from omegaconf import DictConfig @@ -32,10 +33,11 @@ class MMActionCompatibleModel(OTXActionClsModel): compatible for OTX pipelines. """ - def __init__(self, config: DictConfig) -> None: + def __init__(self, num_classes: int, config: DictConfig) -> None: + config = inplace_num_classes(cfg=config, num_classes=num_classes) self.config = config self.load_from = config.pop("load_from", None) - super().__init__() + super().__init__(num_classes=num_classes) def _create_model(self) -> nn.Module: from mmaction.models.data_preprocessors import ( diff --git a/src/otx/core/model/entity/action_detection.py b/src/otx/core/model/entity/action_detection.py index 67cb7272e22..f980b0dfd56 100644 --- a/src/otx/core/model/entity/action_detection.py +++ b/src/otx/core/model/entity/action_detection.py @@ -13,6 +13,7 @@ from otx.core.data.entity.base import OTXBatchLossEntity from otx.core.model.entity.base import OTXModel from otx.core.utils.build import build_mm_model, get_classification_layers +from otx.core.utils.config import inplace_num_classes if TYPE_CHECKING: from omegaconf import DictConfig @@ -31,10 +32,11 @@ class MMActionCompatibleModel(OTXActionDetModel): compatible for OTX pipelines. """ - def __init__(self, config: DictConfig) -> None: + def __init__(self, num_classes: int, config: DictConfig) -> None: + config = inplace_num_classes(cfg=config, num_classes=num_classes) self.config = config self.load_from = config.pop("load_from", None) - super().__init__() + super().__init__(num_classes=num_classes) def _create_model(self) -> nn.Module: from mmaction.models.data_preprocessors import ( diff --git a/src/otx/core/model/entity/base.py b/src/otx/core/model/entity/base.py index dea02ecb65e..05a855f22e7 100644 --- a/src/otx/core/model/entity/base.py +++ b/src/otx/core/model/entity/base.py @@ -5,11 +5,13 @@ from __future__ import annotations +import warnings from abc import abstractmethod from typing import TYPE_CHECKING, Any, Generic from torch import nn +from otx.core.data.dataset.base import LabelInfo from otx.core.data.entity.base import ( OTXBatchLossEntity, T_OTXBatchDataEntity, @@ -24,13 +26,45 @@ class OTXModel(nn.Module, Generic[T_OTXBatchDataEntity, T_OTXBatchPredEntity]): - """Base class for the models used in OTX.""" + """Base class for the models used in OTX. - def __init__(self) -> None: + Args: + num_classes: Number of classes this model can predict. + """ + + def __init__(self, num_classes: int) -> None: super().__init__() + + self._label_info = LabelInfo.from_num_classes(num_classes) self.classification_layers: dict[str, dict[str, Any]] = {} self.model = self._create_model() + @property + def label_info(self) -> LabelInfo: + """Get this model label information.""" + return self._label_info + + @label_info.setter + def label_info(self, label_info: LabelInfo | list[str]) -> None: + """Set this model label information.""" + if isinstance(label_info, list): + label_info = LabelInfo(label_names=label_info) + + old_num_classes = self._label_info.num_classes + new_num_classes = label_info.num_classes + + if old_num_classes != new_num_classes: + msg = ( + f"Given LabelInfo has the different number of classes " + f"({old_num_classes}!={new_num_classes}). " + "The model prediction layer is reset to the new number of classes " + f"(={new_num_classes})." + ) + warnings.warn(msg, stacklevel=0) + self._reset_prediction_layer(num_classes=label_info.num_classes) + + self._label_info = label_info + @abstractmethod def _create_model(self) -> nn.Module: """Create a PyTorch model for this class.""" @@ -164,3 +198,11 @@ def register_explain_hook(self) -> None: TBD """ raise NotImplementedError + + def _reset_prediction_layer(self, num_classes: int) -> None: + """Reset its prediction layer with a given number of classes. + + Args: + num_classes: Number of classes + """ + raise NotImplementedError diff --git a/src/otx/core/model/entity/classification.py b/src/otx/core/model/entity/classification.py index 86448f3d2b9..8a4cac402e5 100644 --- a/src/otx/core/model/entity/classification.py +++ b/src/otx/core/model/entity/classification.py @@ -21,6 +21,7 @@ ) from otx.core.model.entity.base import OTXModel from otx.core.utils.build import build_mm_model, get_classification_layers +from otx.core.utils.config import inplace_num_classes if TYPE_CHECKING: from mmpretrain.models.utils import ClsDataPreprocessor @@ -63,10 +64,11 @@ class MMPretrainMulticlassClsModel(OTXMulticlassClsModel): compatible for OTX pipelines. """ - def __init__(self, config: DictConfig) -> None: + def __init__(self, num_classes: int, config: DictConfig) -> None: + config = inplace_num_classes(cfg=config, num_classes=num_classes) self.config = config self.load_from = config.pop("load_from", None) - super().__init__() + super().__init__(num_classes=num_classes) def _create_model(self) -> nn.Module: model, classification_layers = _create_mmpretrain_model(self.config, self.load_from) @@ -155,10 +157,11 @@ class MMPretrainMultilabelClsModel(OTXMultilabelClsModel): compatible for OTX pipelines. """ - def __init__(self, config: DictConfig) -> None: + def __init__(self, num_classes: int, config: DictConfig) -> None: + config = inplace_num_classes(cfg=config, num_classes=num_classes) self.config = config self.load_from = config.pop("load_from", None) - super().__init__() + super().__init__(num_classes=num_classes) def _create_model(self) -> nn.Module: model, classification_layers = _create_mmpretrain_model(self.config, self.load_from) @@ -241,10 +244,11 @@ class MMPretrainHlabelClsModel(OTXHlabelClsModel): compatible for OTX pipelines. """ - def __init__(self, config: DictConfig) -> None: + def __init__(self, num_classes: int, config: DictConfig) -> None: + config = inplace_num_classes(cfg=config, num_classes=num_classes) self.config = config self.load_from = config.pop("load_from", None) - super().__init__() + super().__init__(num_classes=num_classes) def _create_model(self) -> nn.Module: model, classification_layers = _create_mmpretrain_model(self.config, self.load_from) @@ -322,10 +326,11 @@ class OVClassificationCompatibleModel(OTXMulticlassClsModel): and create the OTX classification model compatible for OTX testing pipeline. """ - def __init__(self, config: DictConfig) -> None: + def __init__(self, num_classes: int, config: DictConfig) -> None: self.model_name = config.pop("model_name") + config = inplace_num_classes(cfg=config, num_classes=num_classes) self.config = config - super().__init__() + super().__init__(num_classes=num_classes) def _create_model(self) -> nn.Module: from openvino.model_api.models import ClassificationModel diff --git a/src/otx/core/model/entity/detection.py b/src/otx/core/model/entity/detection.py index 4b51af8eb07..64857355208 100644 --- a/src/otx/core/model/entity/detection.py +++ b/src/otx/core/model/entity/detection.py @@ -16,6 +16,7 @@ from otx.core.data.entity.detection import DetBatchDataEntity, DetBatchPredEntity from otx.core.model.entity.base import OTXModel from otx.core.utils.build import build_mm_model, get_classification_layers +from otx.core.utils.config import inplace_num_classes if TYPE_CHECKING: from mmdet.models.data_preprocessors import DetDataPreprocessor @@ -35,10 +36,11 @@ class MMDetCompatibleModel(OTXDetectionModel): compatible for OTX pipelines. """ - def __init__(self, config: DictConfig) -> None: + def __init__(self, num_classes: int, config: DictConfig) -> None: + config = inplace_num_classes(cfg=config, num_classes=num_classes) self.config = config self.load_from = config.pop("load_from", None) - super().__init__() + super().__init__(num_classes=num_classes) def _create_model(self) -> nn.Module: from mmdet.models.data_preprocessors import ( @@ -154,10 +156,10 @@ class OVDetectionCompatibleModel(OTXDetectionModel): and create the OTX detection model compatible for OTX testing pipeline. """ - def __init__(self, config: DictConfig) -> None: + def __init__(self, num_classes: int, config: DictConfig) -> None: self.model_name = config.pop("model_name") self.config = config - super().__init__() + super().__init__(num_classes=num_classes) def _create_model(self) -> nn.Module: from openvino.model_api.models import DetectionModel diff --git a/src/otx/core/model/entity/instance_segmentation.py b/src/otx/core/model/entity/instance_segmentation.py index 10fcc7f69a9..e51513dddf0 100644 --- a/src/otx/core/model/entity/instance_segmentation.py +++ b/src/otx/core/model/entity/instance_segmentation.py @@ -18,6 +18,7 @@ ) from otx.core.model.entity.base import OTXModel from otx.core.utils.build import build_mm_model, get_classification_layers +from otx.core.utils.config import inplace_num_classes if TYPE_CHECKING: from mmdet.models.data_preprocessors import DetDataPreprocessor @@ -34,10 +35,11 @@ class OTXInstanceSegModel( class MMDetInstanceSegCompatibleModel(OTXInstanceSegModel): """Instance Segmentation model compatible for MMDet.""" - def __init__(self, config: DictConfig) -> None: + def __init__(self, num_classes: int, config: DictConfig) -> None: + config = inplace_num_classes(cfg=config, num_classes=num_classes) self.config = config self.load_from = self.config.pop("load_from", None) - super().__init__() + super().__init__(num_classes=num_classes) def _create_model(self) -> nn.Module: from mmdet.models.data_preprocessors import ( @@ -176,11 +178,12 @@ class OVInstanceSegCompatibleModel(OTXInstanceSegModel): and create the OTX detection model compatible for OTX testing pipeline. """ - def __init__(self, config: DictConfig) -> None: + def __init__(self, num_classes: int, config: DictConfig) -> None: self.model_name = config.pop("model_name") self.model_type = config.pop("model_type") + config = inplace_num_classes(cfg=config, num_classes=num_classes) self.config = config - super().__init__() + super().__init__(num_classes=num_classes) def _create_model(self) -> nn.Module: from openvino.model_api.models import Model diff --git a/src/otx/core/model/entity/segmentation.py b/src/otx/core/model/entity/segmentation.py index 467cc3bde66..bbef77170d5 100644 --- a/src/otx/core/model/entity/segmentation.py +++ b/src/otx/core/model/entity/segmentation.py @@ -14,6 +14,7 @@ from otx.core.data.entity.segmentation import SegBatchDataEntity, SegBatchPredEntity from otx.core.model.entity.base import OTXModel from otx.core.utils.build import build_mm_model, get_classification_layers +from otx.core.utils.config import inplace_num_classes if TYPE_CHECKING: from mmseg.models.data_preprocessor import SegDataPreProcessor @@ -33,10 +34,11 @@ class MMSegCompatibleModel(OTXSegmentationModel): compatible for OTX pipelines. """ - def __init__(self, config: DictConfig) -> None: + def __init__(self, num_classes: int, config: DictConfig) -> None: + config = inplace_num_classes(cfg=config, num_classes=num_classes) self.config = config self.load_from = self.config.pop("load_from", None) - super().__init__() + super().__init__(num_classes=num_classes) def _create_model(self) -> nn.Module: from mmengine.registry import MODELS as MMENGINE_MODELS @@ -130,10 +132,11 @@ class OVSegmentationCompatibleModel(OTXSegmentationModel): and create the OTX segmentation model compatible for OTX testing pipeline. """ - def __init__(self, config: DictConfig) -> None: + def __init__(self, num_classes: int, config: DictConfig) -> None: self.model_name = config.pop("model_name") + config = inplace_num_classes(cfg=config, num_classes=num_classes) self.config = config - super().__init__() + super().__init__(num_classes=num_classes) def _create_model(self) -> nn.Module: from openvino.model_api.models import SegmentationModel diff --git a/src/otx/core/model/module/base.py b/src/otx/core/model/module/base.py index f1a85943705..cde6566f065 100644 --- a/src/otx/core/model/module/base.py +++ b/src/otx/core/model/module/base.py @@ -19,7 +19,7 @@ if TYPE_CHECKING: from pathlib import Path - from otx.core.data.dataset.base import DataMetaInfo + from otx.core.data.dataset.base import LabelInfo class OTXLitModule(LightningModule): @@ -151,10 +151,10 @@ def load_state_dict(self, state_dict: dict[str, Any], *args, **kwargs) -> None: logger = logging.getLogger() logger.info( f"Data classes from checkpoint: {ckpt_meta_info.class_names} -> " - f"Data classes from training data: {self.meta_info.class_names}", + f"Data classes from training data: {self.meta_info.label_names}", ) self.register_load_state_dict_pre_hook( - self.meta_info.class_names, + self.meta_info.label_names, ckpt_meta_info.class_names, ) return super().load_state_dict(state_dict, *args, **kwargs) @@ -165,16 +165,14 @@ def lr_scheduler_monitor_key(self) -> str: return "val/loss" @property - def meta_info(self) -> DataMetaInfo: - """Meta information of OTXLitModule.""" - if self._meta_info is None: - err_msg = "meta_info is referenced before assignment" - raise ValueError(err_msg) - return self._meta_info - - @meta_info.setter - def meta_info(self, meta_info: DataMetaInfo) -> None: - self._meta_info = meta_info + def label_info(self) -> LabelInfo: + """Get the member `OTXModel` label information.""" + return self.model.label_info + + @label_info.setter + def label_info(self, label_info: LabelInfo | list[str]) -> None: + """Set the member `OTXModel` label information.""" + self.model.label_info = label_info # type: ignore[assignment] def export(self, output_dir: Path, export_format: OTXExportFormat) -> None: """Export the member `OTXModel` of this module to the specified output directory. diff --git a/src/otx/core/model/module/classification.py b/src/otx/core/model/module/classification.py index b444654ee92..97a22cf5189 100644 --- a/src/otx/core/model/module/classification.py +++ b/src/otx/core/model/module/classification.py @@ -25,7 +25,7 @@ from otx.core.model.module.base import OTXLitModule if TYPE_CHECKING: - from otx.core.data.dataset.base import DataMetaInfo + from otx.core.data.dataset.base import LabelInfo class OTXMulticlassClsLitModule(OTXLitModule): @@ -220,7 +220,7 @@ def _set_hlabel_setup(self) -> None: self.model.model.head.set_hlabel_info(self.hlabel_info) # Set the OTXHlabelClsLitModule params. - self.num_labels = len(self.meta_info.class_names) + self.num_labels = len(self.meta_info.label_names) self.num_multiclass_heads = self.hlabel_info.num_multiclass_heads self.num_multilabel_classes = self.hlabel_info.num_multilabel_classes self.num_singlelabel_classes = self.num_labels - self.num_multilabel_classes @@ -310,7 +310,7 @@ def lr_scheduler_monitor_key(self) -> str: return "train/loss" @property - def meta_info(self) -> DataMetaInfo: + def meta_info(self) -> LabelInfo: """Meta information of OTXLitModule.""" if self._meta_info is None: err_msg = "meta_info is referenced before assignment" @@ -318,6 +318,6 @@ def meta_info(self) -> DataMetaInfo: return self._meta_info @meta_info.setter - def meta_info(self, meta_info: DataMetaInfo) -> None: + def meta_info(self, meta_info: LabelInfo) -> None: self._meta_info = meta_info self._set_hlabel_setup() diff --git a/src/otx/core/utils/build.py b/src/otx/core/utils/build.py index 3dcba680693..49d8af4cb16 100644 --- a/src/otx/core/utils/build.py +++ b/src/otx/core/utils/build.py @@ -8,7 +8,6 @@ from copy import deepcopy from typing import TYPE_CHECKING -from mmengine.logging import MMLogger from omegaconf import DictConfig from otx.core.utils.config import convert_conf_to_mmconfig_dict @@ -20,6 +19,7 @@ def build_mm_model(config: DictConfig, model_registry: Registry, load_from: str | None = None) -> nn.Module: """Build a model by using the registry.""" + from mmengine.logging import MMLogger from mmengine.runner import load_checkpoint from otx import algo # noqa: F401 diff --git a/src/otx/core/utils/config.py b/src/otx/core/utils/config.py index 9cf44b95115..18a64e0caa3 100644 --- a/src/otx/core/utils/config.py +++ b/src/otx/core/utils/config.py @@ -6,13 +6,12 @@ from __future__ import annotations from numbers import Number -from typing import TYPE_CHECKING, Literal +from typing import TYPE_CHECKING, Any, Literal -from mmengine.config import Config as MMConfig -from omegaconf import OmegaConf +from omegaconf import DictConfig, ListConfig, OmegaConf if TYPE_CHECKING: - from omegaconf import DictConfig + from mmengine.config import Config as MMConfig def to_tuple(dict_: dict) -> dict: @@ -48,6 +47,8 @@ def convert_conf_to_mmconfig_dict( to: Literal["tuple", "list"] = "tuple", ) -> MMConfig: """Convert OTX format config object to MMEngine config object.""" + from mmengine.config import Config as MMConfig + dict_cfg = OmegaConf.to_container(cfg) if to == "tuple": @@ -66,3 +67,29 @@ def mmconfig_dict_to_dict(obj: MMConfig | list[MMConfig]) -> list | dict: return {k: mmconfig_dict_to_dict(v) for k, v in obj.to_dict().items()} return obj + + +def inplace_num_classes( + cfg: DictConfig | ListConfig | Any, # noqa: ANN401 + num_classes: int, +) -> DictConfig | ListConfig | Any: # noqa: ANN401 + """Inplace the number of classes values in a given config object. + + Args: + cfg: Config object to inplace the number of classes values + num_classes: Number of classes to inplace + Returns: + Inplaced config object + """ + if isinstance(cfg, DictConfig): + for key in cfg: + if key == "num_classes" and isinstance(cfg[key], int): + cfg[key] = num_classes + else: + cfg[key] = inplace_num_classes(cfg[key], num_classes) + + if isinstance(cfg, ListConfig): + for idx in range(len(cfg)): + cfg[idx] = inplace_num_classes(cfg[idx], num_classes) + + return cfg diff --git a/src/otx/recipe/detection/openvino_model.yaml b/src/otx/recipe/detection/openvino_model.yaml index c55b0c2bb4e..72b79ad0984 100644 --- a/src/otx/recipe/detection/openvino_model.yaml +++ b/src/otx/recipe/detection/openvino_model.yaml @@ -13,3 +13,4 @@ model: _target_: otx.core.model.entity.detection.OVDetectionCompatibleModel config: model_name: ssd300 + num_classes: 20 diff --git a/src/otx/recipe/instance_segmentation/openvino_model.yaml b/src/otx/recipe/instance_segmentation/openvino_model.yaml index 5925b7c19b3..d766a4437a3 100644 --- a/src/otx/recipe/instance_segmentation/openvino_model.yaml +++ b/src/otx/recipe/instance_segmentation/openvino_model.yaml @@ -14,3 +14,4 @@ model: config: model_name: yolact-resnet50-fpn-pytorch model_type: YOLACT + num_classes: 80 diff --git a/src/otx/recipe/multiclass_classification/openvino_model.yaml b/src/otx/recipe/multiclass_classification/openvino_model.yaml index 27e0862f214..727ef9e3f17 100644 --- a/src/otx/recipe/multiclass_classification/openvino_model.yaml +++ b/src/otx/recipe/multiclass_classification/openvino_model.yaml @@ -15,3 +15,4 @@ model: model_name: efficientnet-b0-pytorch head: num_classes: 1000 + num_classes: 1000 diff --git a/src/otx/recipe/segmentation/openvino_model.yaml b/src/otx/recipe/segmentation/openvino_model.yaml index d031a396761..5df6f080cd4 100644 --- a/src/otx/recipe/segmentation/openvino_model.yaml +++ b/src/otx/recipe/segmentation/openvino_model.yaml @@ -15,3 +15,4 @@ model: model_name: drn-d-38 decode_head: num_classes: 19 + num_classes: 19 diff --git a/tests/integration/cli/test_cli.py b/tests/integration/cli/test_cli.py index 4d347138327..85167aae76c 100644 --- a/tests/integration/cli/test_cli.py +++ b/tests/integration/cli/test_cli.py @@ -22,46 +22,45 @@ "multiclass_classification": { "data_dir": "tests/assets/classification_dataset", "overrides": [ - "model.otx_model.config.head.num_classes=2", + "model.otx_model.num_classes=2", ], }, "multilabel_classification": { "data_dir": "tests/assets/multilabel_classification", "overrides": [ - "model.otx_model.config.head.num_classes=2", + "model.otx_model.num_classes=2", ], }, "hlabel_classification": { "data_dir": "tests/assets/hlabel_classification", "overrides": [ - "model.otx_model.config.head.num_classes=7", + "model.otx_model.num_classes=7", "model.otx_model.config.head.num_multiclass_heads=2", "model.otx_model.config.head.num_multilabel_classes=3", ], }, "detection": { "data_dir": "tests/assets/car_tree_bug", - "overrides": ["model.otx_model.config.bbox_head.num_classes=3"], + "overrides": ["model.otx_model.num_classes=3"], }, "instance_segmentation": { "data_dir": "tests/assets/car_tree_bug", "overrides": [ - "model.otx_model.config.roi_head.bbox_head.num_classes=3", - "model.otx_model.config.roi_head.mask_head.num_classes=3", + "model.otx_model.num_classes=3", ], }, "segmentation": { "data_dir": "tests/assets/common_semantic_segmentation_dataset/supervised", - "overrides": ["model.otx_model.config.decode_head.num_classes=2"], + "overrides": ["model.otx_model.num_classes=2"], }, "action_classification": { "data_dir": "tests/assets/action_classification_dataset/", - "overrides": ["model.otx_model.config.cls_head.num_classes=2"], + "overrides": ["model.otx_model.num_classes=2"], }, "action_detection": { "data_dir": "tests/assets/action_detection_dataset/", "overrides": [ - "model.otx_model.config.roi_head.bbox_head.num_classes=5", + "model.otx_model.num_classes=5", "+model.otx_model.config.roi_head.bbox_head.topk=3", ], }, diff --git a/tests/regression/test_regression.py b/tests/regression/test_regression.py index 020d5fbd8f5..ebcd234e05d 100644 --- a/tests/regression/test_regression.py +++ b/tests/regression/test_regression.py @@ -45,7 +45,6 @@ def _test_regression( fxt_num_repeat: int, fxt_accelerator: str, tmpdir: pytest.TempdirFactory, - head_name: str, ) -> None: for seed in range(fxt_num_repeat): test_case = RegressionTestCase( @@ -70,7 +69,7 @@ def _test_regression( with mlflow.start_run(tags=tags, run_name=run_name): overrides = [ f"+recipe={test_case.model.task}/{test_case.model.name}", - f"model.otx_model.config.{head_name}.num_classes={test_case.dataset.num_classes}", + f"model.otx_model.num_classes={test_case.dataset.num_classes}", f"data.data_root={data_root}", f"data.data_format={test_case.dataset.data_format}", f"base.output_dir={test_case.output_dir}", @@ -152,7 +151,6 @@ def test_regression( fxt_num_repeat=fxt_num_repeat, fxt_accelerator=fxt_accelerator, tmpdir=tmpdir, - head_name="head", ) @@ -219,7 +217,6 @@ def test_regression( fxt_num_repeat=fxt_num_repeat, fxt_accelerator=fxt_accelerator, tmpdir=tmpdir, - head_name="head", ) @@ -271,7 +268,6 @@ def test_regression( fxt_num_repeat=fxt_num_repeat, fxt_accelerator=fxt_accelerator, tmpdir=tmpdir, - head_name="head", ) class TestObjectDetection(BaseTest): @@ -338,5 +334,4 @@ def test_regression( fxt_tags=fxt_tags, fxt_num_repeat=fxt_num_repeat, tmpdir=tmpdir, - head_name="bbox_head", ) diff --git a/tests/unit/core/model/entity/test_base.py b/tests/unit/core/model/entity/test_base.py index 4b126f9250c..85641ba629e 100644 --- a/tests/unit/core/model/entity/test_base.py +++ b/tests/unit/core/model/entity/test_base.py @@ -12,10 +12,10 @@ def __init__(self, num_classes): class TestOTXModel: def test_smart_weight_loading(self, mocker) -> None: mocker.patch.object(OTXModel, "_create_model", return_value=MockNNModule(2)) - prev_model = OTXModel() + prev_model = OTXModel(num_classes=2) mocker.patch.object(OTXModel, "_create_model", return_value=MockNNModule(3)) - current_model = OTXModel() + current_model = OTXModel(num_classes=3) current_model.classification_layers = ["model.head.weight", "model.head.bias"] current_model.classification_layers = { "model.head.weight": {"stride": 1, "num_extra_classes": 0}, diff --git a/tests/unit/core/model/entity/test_segmentation.py b/tests/unit/core/model/entity/test_segmentation.py index cacfd0ec7b2..41293669c01 100644 --- a/tests/unit/core/model/entity/test_segmentation.py +++ b/tests/unit/core/model/entity/test_segmentation.py @@ -19,11 +19,11 @@ class TestOTXSegmentationModel: @pytest.fixture() def config(self) -> DictConfig: - return OmegaConf.load("src/otx/recipe/segmentation/segnext_s.yaml") + return OmegaConf.load("src/otx/recipe/segmentation/segnext_s.yaml").model.otx_model.config @pytest.fixture() def model(self, config) -> MMSegCompatibleModel: - return MMSegCompatibleModel(config.model.otx_model.config) + return MMSegCompatibleModel(num_classes=1, config=config) def test_create_model(self, model) -> None: mmseg_model = model._create_model() From 3e4e7df18be1ffc8e6089028f4b53506de92ea42 Mon Sep 17 00:00:00 2001 From: "Kim, Vinnam" Date: Thu, 11 Jan 2024 15:56:34 +0900 Subject: [PATCH 3/8] Fix test error Signed-off-by: Kim, Vinnam --- tests/integration/detection/test_model.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integration/detection/test_model.py b/tests/integration/detection/test_model.py index 60776f45a45..cd450d595ae 100644 --- a/tests/integration/detection/test_model.py +++ b/tests/integration/detection/test_model.py @@ -15,7 +15,7 @@ def fxt_rtmdet_tiny_model_config(self, fxt_rtmdet_tiny_config) -> DictConfig: @pytest.fixture() def fxt_model(self, fxt_rtmdet_tiny_model_config) -> MMDetCompatibleModel: - return MMDetCompatibleModel(config=fxt_rtmdet_tiny_model_config) + return MMDetCompatibleModel(num_classes=3, config=fxt_rtmdet_tiny_model_config) def test_forward_train( self, From 1afea455fb92b16844a4b90c1d949856dd6a6ffc Mon Sep 17 00:00:00 2001 From: "Kim, Vinnam" Date: Thu, 11 Jan 2024 17:56:25 +0900 Subject: [PATCH 4/8] Change action cls and det Signed-off-by: Kim, Vinnam --- .../algo/action_classification/__init__.py | 4 ++ .../action_classification/mmconfigs/x3d.yaml | 28 ++++++++++ src/otx/algo/action_classification/x3d.py | 15 +++++ src/otx/algo/action_detection/__init__.py | 4 ++ .../mmconfigs/x3d_fastrcnn.yaml | 51 +++++++++++++++++ src/otx/algo/action_detection/template.py | 16 ++++++ src/otx/algo/action_detection/x3d_fastrcnn.py | 17 ++++++ src/otx/algo/utils/mmconfig.py | 31 +++++++++++ .../config/model/mmaction_classification.yaml | 2 +- src/otx/config/model/mmaction_detection.yaml | 2 +- src/otx/recipe/action_classification/x3d.yaml | 30 +--------- .../recipe/action_detection/x3d_fastrcnn.yaml | 55 +------------------ tests/integration/cli/test_cli.py | 2 +- 13 files changed, 173 insertions(+), 84 deletions(-) create mode 100644 src/otx/algo/action_classification/__init__.py create mode 100644 src/otx/algo/action_classification/mmconfigs/x3d.yaml create mode 100644 src/otx/algo/action_classification/x3d.py create mode 100644 src/otx/algo/action_detection/__init__.py create mode 100644 src/otx/algo/action_detection/mmconfigs/x3d_fastrcnn.yaml create mode 100644 src/otx/algo/action_detection/template.py create mode 100644 src/otx/algo/action_detection/x3d_fastrcnn.py create mode 100644 src/otx/algo/utils/mmconfig.py diff --git a/src/otx/algo/action_classification/__init__.py b/src/otx/algo/action_classification/__init__.py new file mode 100644 index 00000000000..2f4d6dc99d5 --- /dev/null +++ b/src/otx/algo/action_classification/__init__.py @@ -0,0 +1,4 @@ +# Copyright (C) 2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# +"""Module for OTX action classification models, hooks, utils, etc.""" diff --git a/src/otx/algo/action_classification/mmconfigs/x3d.yaml b/src/otx/algo/action_classification/mmconfigs/x3d.yaml new file mode 100644 index 00000000000..aad0dcf8f8c --- /dev/null +++ b/src/otx/algo/action_classification/mmconfigs/x3d.yaml @@ -0,0 +1,28 @@ +load_from: https://download.openmmlab.com/mmaction/recognition/x3d/facebook/x3d_m_facebook_16x5x1_kinetics400_rgb_20201027-3f42382a.pth +backbone: + gamma_b: 2.25 + gamma_d: 2.2 + gamma_w: 1 + type: X3D +cls_head: + average_clips: prob + dropout_ratio: 0.5 + fc1_bias: false + in_channels: 432 + num_classes: 400 + spatial_type: avg + type: X3DHead +data_preprocessor: + format_shape: NCTHW + mean: + - 114.75 + - 114.75 + - 114.75 + std: + - 57.38 + - 57.38 + - 57.38 + type: ActionDataPreprocessor +test_cfg: null +train_cfg: null +type: Recognizer3D diff --git a/src/otx/algo/action_classification/x3d.py b/src/otx/algo/action_classification/x3d.py new file mode 100644 index 00000000000..d7fbddc94ad --- /dev/null +++ b/src/otx/algo/action_classification/x3d.py @@ -0,0 +1,15 @@ +# Copyright (C) 2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# +"""X3D model implementation.""" + +from otx.algo.utils.mmconfig import read_mmconfig +from otx.core.model.entity.action_classification import MMActionCompatibleModel + + +class X3D(MMActionCompatibleModel): + """X3D Model.""" + + def __init__(self, num_classes: int) -> None: + config = read_mmconfig("x3d") + super().__init__(num_classes=num_classes, config=config) diff --git a/src/otx/algo/action_detection/__init__.py b/src/otx/algo/action_detection/__init__.py new file mode 100644 index 00000000000..4b58f3a3c7b --- /dev/null +++ b/src/otx/algo/action_detection/__init__.py @@ -0,0 +1,4 @@ +# Copyright (C) 2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# +"""Module for OTX action detection models, hooks, utils, etc.""" diff --git a/src/otx/algo/action_detection/mmconfigs/x3d_fastrcnn.yaml b/src/otx/algo/action_detection/mmconfigs/x3d_fastrcnn.yaml new file mode 100644 index 00000000000..a9e49c244c1 --- /dev/null +++ b/src/otx/algo/action_detection/mmconfigs/x3d_fastrcnn.yaml @@ -0,0 +1,51 @@ +type: FastRCNN +_scope_: mmdet +init_cfg: + type: Pretrained + checkpoint: https://download.openmmlab.com/mmaction/recognition/x3d/facebook/x3d_m_facebook_16x5x1_kinetics400_rgb_20201027-3f42382a.pth +backbone: + type: mmaction.X3D + gamma_b: 2.25 + gamma_d: 2.2 + gamma_w: 1 +roi_head: + type: AVARoIHead + bbox_roi_extractor: + type: SingleRoIExtractor3D + roi_layer_type: RoIAlign + output_size: 8 + with_temporal_pool: true + bbox_head: + type: BBoxHeadAVA + background_class: true + in_channels: 432 + num_classes: 81 + multilabel: false + dropout_ratio: 0.5 +data_preprocessor: + type: ActionDataPreprocessor + mean: + - 123.675 + - 116.28 + - 103.53 + std: + - 58.395 + - 57.12 + - 57.375 + format_shape: NCTHW +train_cfg: + rcnn: + assigner: + type: MaxIoUAssignerAVA + pos_iou_thr: 0.9 + neg_iou_thr: 0.9 + min_pos_iou: 0.9 + sampler: + type: RandomSampler + num: 32 + pos_fraction: 1 + neg_pos_ub: -1 + add_gt_as_proposals: true + pos_weight: 1.0 +test_cfg: + rcnn: null diff --git a/src/otx/algo/action_detection/template.py b/src/otx/algo/action_detection/template.py new file mode 100644 index 00000000000..374800094d8 --- /dev/null +++ b/src/otx/algo/action_detection/template.py @@ -0,0 +1,16 @@ +# Copyright (C) 2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# +"""X3DFastRCNN model implementation.""" + +from otx.core.model.entity.action_detection import MMActionCompatibleModel + +_MM_CONFIG = """ + +""" + + +class X3DFastRCNN(MMActionCompatibleModel): + """X3D Model.""" + + MM_CONFIG = _MM_CONFIG diff --git a/src/otx/algo/action_detection/x3d_fastrcnn.py b/src/otx/algo/action_detection/x3d_fastrcnn.py new file mode 100644 index 00000000000..af0f3402f78 --- /dev/null +++ b/src/otx/algo/action_detection/x3d_fastrcnn.py @@ -0,0 +1,17 @@ +# Copyright (C) 2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# +"""X3DFastRCNN model implementation.""" +from __future__ import annotations + +from otx.algo.utils.mmconfig import read_mmconfig +from otx.core.model.entity.action_detection import MMActionCompatibleModel + + +class X3DFastRCNN(MMActionCompatibleModel): + """X3D Model.""" + + def __init__(self, num_classes: int, topk: int | tuple[int]): + config = read_mmconfig("x3d_fastrcnn") + config.roi_head.bbox_head.topk = topk + super().__init__(num_classes=num_classes, config=config) diff --git a/src/otx/algo/utils/mmconfig.py b/src/otx/algo/utils/mmconfig.py new file mode 100644 index 00000000000..4860950f1f7 --- /dev/null +++ b/src/otx/algo/utils/mmconfig.py @@ -0,0 +1,31 @@ +# Copyright (C) 2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# +"""Utils used for MMConfigs.""" + +import inspect +from pathlib import Path + +from omegaconf import DictConfig, OmegaConf + + +def read_mmconfig(model_name: str) -> DictConfig: + """Read MMConfig. + + It try to read MMConfig from the yaml file which exists in + `/mmconfigs/.yaml` + """ + frame = inspect.stack()[1] + module = inspect.getmodule(frame[0]) + + if module is None or module.__file__ is None: + msg = "Cannot get valid model from stack" + raise RuntimeError(msg) + + root_dir = Path().parent / "mmconfigs" + fpath = root_dir / f"{model_name}.yaml" + + if not fpath.exists(): + raise FileNotFoundError + + return OmegaConf.load(fpath) diff --git a/src/otx/config/model/mmaction_classification.yaml b/src/otx/config/model/mmaction_classification.yaml index a5c1ca5a4e4..17b41f7784a 100644 --- a/src/otx/config/model/mmaction_classification.yaml +++ b/src/otx/config/model/mmaction_classification.yaml @@ -5,7 +5,7 @@ _target_: otx.core.model.module.action_classification.OTXActionClsLitModule otx_model: _target_: otx.core.model.entity.action_classification.MMActionCompatibleModel - config: ??? + num_classes: ??? # compile model for faster training with pytorch 2.0 torch_compile: false diff --git a/src/otx/config/model/mmaction_detection.yaml b/src/otx/config/model/mmaction_detection.yaml index 217c56f5032..6a8fe289b78 100644 --- a/src/otx/config/model/mmaction_detection.yaml +++ b/src/otx/config/model/mmaction_detection.yaml @@ -5,7 +5,7 @@ _target_: otx.core.model.module.action_detection.OTXActionDetLitModule otx_model: _target_: otx.core.model.entity.action_detection.MMActionCompatibleModel - config: ??? + num_classes: ??? # compile model for faster training with pytorch 2.0 torch_compile: false diff --git a/src/otx/recipe/action_classification/x3d.yaml b/src/otx/recipe/action_classification/x3d.yaml index a7e4b02e00e..46f575e12d7 100644 --- a/src/otx/recipe/action_classification/x3d.yaml +++ b/src/otx/recipe/action_classification/x3d.yaml @@ -78,35 +78,7 @@ data: - scale_factor model: otx_model: - config: - load_from: https://download.openmmlab.com/mmaction/recognition/x3d/facebook/x3d_m_facebook_16x5x1_kinetics400_rgb_20201027-3f42382a.pth - backbone: - gamma_b: 2.25 - gamma_d: 2.2 - gamma_w: 1 - type: X3D - cls_head: - average_clips: prob - dropout_ratio: 0.5 - fc1_bias: false - in_channels: 432 - num_classes: 400 - spatial_type: avg - type: X3DHead - data_preprocessor: - format_shape: NCTHW - mean: - - 114.75 - - 114.75 - - 114.75 - std: - - 57.38 - - 57.38 - - 57.38 - type: ActionDataPreprocessor - test_cfg: null - train_cfg: null - type: Recognizer3D + _target_: otx.algo.action_classification.x3d.X3D optimizer: _target_: torch.optim.AdamW lr: 0.001 diff --git a/src/otx/recipe/action_detection/x3d_fastrcnn.yaml b/src/otx/recipe/action_detection/x3d_fastrcnn.yaml index 328d2a657a7..b602798ea7e 100644 --- a/src/otx/recipe/action_detection/x3d_fastrcnn.yaml +++ b/src/otx/recipe/action_detection/x3d_fastrcnn.yaml @@ -67,58 +67,9 @@ data: - type: PackActionInputs model: otx_model: - config: - type: FastRCNN - _scope_: mmdet - init_cfg: - type: Pretrained - checkpoint: https://download.openmmlab.com/mmaction/recognition/x3d/facebook/x3d_m_facebook_16x5x1_kinetics400_rgb_20201027-3f42382a.pth - backbone: - type: mmaction.X3D - gamma_b: 2.25 - gamma_d: 2.2 - gamma_w: 1 - roi_head: - type: AVARoIHead - bbox_roi_extractor: - type: SingleRoIExtractor3D - roi_layer_type: RoIAlign - output_size: 8 - with_temporal_pool: true - bbox_head: - type: BBoxHeadAVA - background_class: true - in_channels: 432 - num_classes: 81 - multilabel: false - dropout_ratio: 0.5 - data_preprocessor: - type: ActionDataPreprocessor - mean: - - 123.675 - - 116.28 - - 103.53 - std: - - 58.395 - - 57.12 - - 57.375 - format_shape: NCTHW - train_cfg: - rcnn: - assigner: - type: MaxIoUAssignerAVA - pos_iou_thr: 0.9 - neg_iou_thr: 0.9 - min_pos_iou: 0.9 - sampler: - type: RandomSampler - num: 32 - pos_fraction: 1 - neg_pos_ub: -1 - add_gt_as_proposals: true - pos_weight: 1.0 - test_cfg: - rcnn: null + _target_: otx.algo.action_detection.x3d_fastrcnn.X3DFastRCNN + topk: ??? + optimizer: _target_: torch.optim.SGD lr: 0.005 diff --git a/tests/integration/cli/test_cli.py b/tests/integration/cli/test_cli.py index 85167aae76c..fa1c126678a 100644 --- a/tests/integration/cli/test_cli.py +++ b/tests/integration/cli/test_cli.py @@ -61,7 +61,7 @@ "data_dir": "tests/assets/action_detection_dataset/", "overrides": [ "model.otx_model.num_classes=5", - "+model.otx_model.config.roi_head.bbox_head.topk=3", + "model.otx_model.topk=3", ], }, } From 03345ba2ac2a2ad7081a366e56dc5370b525793a Mon Sep 17 00:00:00 2001 From: "Kim, Vinnam" Date: Thu, 11 Jan 2024 19:30:58 +0900 Subject: [PATCH 5/8] Change classification models Signed-off-by: Kim, Vinnam --- src/otx/algo/action_detection/template.py | 16 ------- src/otx/algo/classification/deit_tiny.py | 37 ++++++++++++++++ .../algo/classification/efficientnet_b0.py | 38 +++++++++++++++++ .../algo/classification/efficientnet_v2.py | 38 +++++++++++++++++ .../hlabel_classification/deit_tiny.yaml | 40 ++++++++++++++++++ .../efficientnet_b0_light.yaml | 32 ++++++++++++++ .../efficientnet_v2_light.yaml | 33 +++++++++++++++ .../mobilenet_v3_large_light.yaml | 30 +++++++++++++ .../multiclass_classification/deit_tiny.yaml | 33 +++++++++++++++ .../efficientnet_b0.yaml | 39 +++++++++++++++++ .../efficientnet_b0_light.yaml | 28 +++++++++++++ .../efficientnet_v2.yaml | 38 +++++++++++++++++ .../efficientnet_v2_light.yaml | 27 ++++++++++++ .../mobilenet_v3_large.yaml | 37 ++++++++++++++++ .../mobilenet_v3_large_light.yaml | 26 ++++++++++++ .../multilabel_classification/deit_tiny.yaml | 32 ++++++++++++++ .../efficientnet_b0_light.yaml | 29 +++++++++++++ .../efficientnet_v2_light.yaml | 28 +++++++++++++ .../mobilenet_v3_large_light.yaml | 30 +++++++++++++ .../algo/classification/mobilenet_v3_large.py | 38 +++++++++++++++++ src/otx/algo/utils/mmconfig.py | 14 +++---- src/otx/config/model/hlabel_mmpretrain.yaml | 4 +- .../config/model/multiclass_mmpretrain.yaml | 2 +- .../config/model/multilabel_mmpretrain.yaml | 2 +- .../efficientnet_b0_light.yaml | 34 +-------------- .../efficientnet_v2_light.yaml | 35 +--------------- .../mobilenet_v3_large_light.yaml | 32 +------------- .../hlabel_classification/otx_deit_tiny.yaml | 42 +------------------ .../efficientnet_b0_light.yaml | 31 +------------- .../efficientnet_v2_light.yaml | 30 +------------ .../mobilenet_v3_large_light.yaml | 29 +------------ .../otx_deit_tiny.yaml | 35 +--------------- .../otx_efficientnet_b0.yaml | 41 +----------------- .../otx_efficientnet_v2.yaml | 40 +----------------- .../otx_mobilenet_v3_large.yaml | 39 +---------------- .../efficientnet_b0_light.yaml | 31 +------------- .../efficientnet_v2_light.yaml | 30 +------------ .../mobilenet_v3_large_light.yaml | 32 +------------- .../otx_deit_tiny.yaml | 34 +-------------- tests/integration/cli/test_cli.py | 4 +- 40 files changed, 665 insertions(+), 525 deletions(-) delete mode 100644 src/otx/algo/action_detection/template.py create mode 100644 src/otx/algo/classification/deit_tiny.py create mode 100644 src/otx/algo/classification/efficientnet_b0.py create mode 100644 src/otx/algo/classification/efficientnet_v2.py create mode 100644 src/otx/algo/classification/mmconfigs/hlabel_classification/deit_tiny.yaml create mode 100644 src/otx/algo/classification/mmconfigs/hlabel_classification/efficientnet_b0_light.yaml create mode 100644 src/otx/algo/classification/mmconfigs/hlabel_classification/efficientnet_v2_light.yaml create mode 100644 src/otx/algo/classification/mmconfigs/hlabel_classification/mobilenet_v3_large_light.yaml create mode 100644 src/otx/algo/classification/mmconfigs/multiclass_classification/deit_tiny.yaml create mode 100644 src/otx/algo/classification/mmconfigs/multiclass_classification/efficientnet_b0.yaml create mode 100644 src/otx/algo/classification/mmconfigs/multiclass_classification/efficientnet_b0_light.yaml create mode 100644 src/otx/algo/classification/mmconfigs/multiclass_classification/efficientnet_v2.yaml create mode 100644 src/otx/algo/classification/mmconfigs/multiclass_classification/efficientnet_v2_light.yaml create mode 100644 src/otx/algo/classification/mmconfigs/multiclass_classification/mobilenet_v3_large.yaml create mode 100644 src/otx/algo/classification/mmconfigs/multiclass_classification/mobilenet_v3_large_light.yaml create mode 100644 src/otx/algo/classification/mmconfigs/multilabel_classification/deit_tiny.yaml create mode 100644 src/otx/algo/classification/mmconfigs/multilabel_classification/efficientnet_b0_light.yaml create mode 100644 src/otx/algo/classification/mmconfigs/multilabel_classification/efficientnet_v2_light.yaml create mode 100644 src/otx/algo/classification/mmconfigs/multilabel_classification/mobilenet_v3_large_light.yaml create mode 100644 src/otx/algo/classification/mobilenet_v3_large.py diff --git a/src/otx/algo/action_detection/template.py b/src/otx/algo/action_detection/template.py deleted file mode 100644 index 374800094d8..00000000000 --- a/src/otx/algo/action_detection/template.py +++ /dev/null @@ -1,16 +0,0 @@ -# Copyright (C) 2023 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 -# -"""X3DFastRCNN model implementation.""" - -from otx.core.model.entity.action_detection import MMActionCompatibleModel - -_MM_CONFIG = """ - -""" - - -class X3DFastRCNN(MMActionCompatibleModel): - """X3D Model.""" - - MM_CONFIG = _MM_CONFIG diff --git a/src/otx/algo/classification/deit_tiny.py b/src/otx/algo/classification/deit_tiny.py new file mode 100644 index 00000000000..7107b99e232 --- /dev/null +++ b/src/otx/algo/classification/deit_tiny.py @@ -0,0 +1,37 @@ +# Copyright (C) 2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# +"""DeitTiny model implementation.""" + +from otx.algo.utils.mmconfig import read_mmconfig +from otx.core.model.entity.classification import ( + MMPretrainHlabelClsModel, + MMPretrainMulticlassClsModel, + MMPretrainMultilabelClsModel, +) + + +class DeitTinyForHLabelCls(MMPretrainHlabelClsModel): + """DeitTiny Model for hierarchical label classification task.""" + + def __init__(self, num_classes: int, num_multiclass_heads: int, num_multilabel_classes: int) -> None: + config = read_mmconfig(model_name="deit_tiny", subdir_name="hlabel_classification") + config.head.num_multiclass_heads = num_multiclass_heads + config.head.num_multilabel_classes = num_multilabel_classes + super().__init__(num_classes=num_classes, config=config) + + +class DeitTinyForMulticlassCls(MMPretrainMulticlassClsModel): + """DeitTiny Model for multi-label classification task.""" + + def __init__(self, num_classes: int) -> None: + config = read_mmconfig("deit_tiny", subdir_name="multiclass_classification") + super().__init__(num_classes=num_classes, config=config) + + +class DeitTinyForMultilabelCls(MMPretrainMultilabelClsModel): + """DeitTiny Model for multi-class classification task.""" + + def __init__(self, num_classes: int) -> None: + config = read_mmconfig("deit_tiny", subdir_name="multilabel_classification") + super().__init__(num_classes=num_classes, config=config) diff --git a/src/otx/algo/classification/efficientnet_b0.py b/src/otx/algo/classification/efficientnet_b0.py new file mode 100644 index 00000000000..3259d1e0921 --- /dev/null +++ b/src/otx/algo/classification/efficientnet_b0.py @@ -0,0 +1,38 @@ +# Copyright (C) 2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# +"""EfficientNetB0 model implementation.""" + +from otx.algo.utils.mmconfig import read_mmconfig +from otx.core.model.entity.classification import ( + MMPretrainHlabelClsModel, + MMPretrainMulticlassClsModel, + MMPretrainMultilabelClsModel, +) + + +class EfficientNetB0ForHLabelCls(MMPretrainHlabelClsModel): + """EfficientNetB0 Model for hierarchical label classification task.""" + + def __init__(self, num_classes: int, num_multiclass_heads: int, num_multilabel_classes: int) -> None: + config = read_mmconfig(model_name="efficientnet_b0_light", subdir_name="hlabel_classification") + config.head.num_multiclass_heads = num_multiclass_heads + config.head.num_multilabel_classes = num_multilabel_classes + super().__init__(num_classes=num_classes, config=config) + + +class EfficientNetB0ForMulticlassCls(MMPretrainMulticlassClsModel): + """EfficientNetB0 Model for multi-label classification task.""" + + def __init__(self, num_classes: int, light: bool = True) -> None: + model_name = "efficientnet_b0_light" if light else "otx_efficientnet_b0" + config = read_mmconfig(model_name=model_name, subdir_name="multiclass_classification") + super().__init__(num_classes=num_classes, config=config) + + +class EfficientNetB0ForMultilabelCls(MMPretrainMultilabelClsModel): + """EfficientNetB0 Model for multi-class classification task.""" + + def __init__(self, num_classes: int) -> None: + config = read_mmconfig(model_name="efficientnet_b0_light", subdir_name="multilabel_classification") + super().__init__(num_classes=num_classes, config=config) diff --git a/src/otx/algo/classification/efficientnet_v2.py b/src/otx/algo/classification/efficientnet_v2.py new file mode 100644 index 00000000000..861278ebba3 --- /dev/null +++ b/src/otx/algo/classification/efficientnet_v2.py @@ -0,0 +1,38 @@ +# Copyright (C) 2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# +"""EfficientNetV2 model implementation.""" + +from otx.algo.utils.mmconfig import read_mmconfig +from otx.core.model.entity.classification import ( + MMPretrainHlabelClsModel, + MMPretrainMulticlassClsModel, + MMPretrainMultilabelClsModel, +) + + +class EfficientNetV2ForHLabelCls(MMPretrainHlabelClsModel): + """EfficientNetV2 Model for hierarchical label classification task.""" + + def __init__(self, num_classes: int, num_multiclass_heads: int, num_multilabel_classes: int) -> None: + config = read_mmconfig("efficientnet_v2_light", subdir_name="hlabel_classification") + config.head.num_multiclass_heads = num_multiclass_heads + config.head.num_multilabel_classes = num_multilabel_classes + super().__init__(num_classes=num_classes, config=config) + + +class EfficientNetV2ForMulticlassCls(MMPretrainMulticlassClsModel): + """EfficientNetV2 Model for multi-label classification task.""" + + def __init__(self, num_classes: int, light: bool = True) -> None: + model_name = "efficientnet_v2_light" if light else "otx_efficientnet_v2" + config = read_mmconfig(model_name=model_name, subdir_name="multiclass_classification") + super().__init__(num_classes=num_classes, config=config) + + +class EfficientNetV2ForMultilabelCls(MMPretrainMultilabelClsModel): + """EfficientNetV2 Model for multi-class classification task.""" + + def __init__(self, num_classes: int) -> None: + config = read_mmconfig("efficientnet_v2_light", subdir_name="multilabel_classification") + super().__init__(num_classes=num_classes, config=config) diff --git a/src/otx/algo/classification/mmconfigs/hlabel_classification/deit_tiny.yaml b/src/otx/algo/classification/mmconfigs/hlabel_classification/deit_tiny.yaml new file mode 100644 index 00000000000..5e2585fc4ca --- /dev/null +++ b/src/otx/algo/classification/mmconfigs/hlabel_classification/deit_tiny.yaml @@ -0,0 +1,40 @@ +load_from: https://download.openmmlab.com/mmclassification/v0/deit/deit-tiny_pt-4xb256_in1k_20220218-13b382a0.pth +backbone: + arch: deit-tiny + type: VisionTransformer + img_size: 224 + patch_size: 16 +head: + num_multiclass_heads: 0 + num_multilabel_classes: 0 + in_channels: 192 + num_classes: 1000 + multiclass_loss_cfg: + loss_weight: 1.0 + type: CrossEntropyLoss + multilabel_loss_cfg: + reduction: sum + gamma_neg: 1.0 + gamma_pos: 0.0 + type: AsymmetricAngularLossWithIgnore + type: CustomHierarchicalClsHead +data_preprocessor: + mean: + - 123.675 + - 116.28 + - 103.53 + std: + - 58.395 + - 57.12 + - 57.375 + to_rgb: False + type: ClsDataPreprocessor +init_cfg: + - std: 0.2 + layer: Linear + type: TruncNormal + - bias: 0. + val: 1. + layer: LayerNorm + type: Constant +type: ImageClassifier diff --git a/src/otx/algo/classification/mmconfigs/hlabel_classification/efficientnet_b0_light.yaml b/src/otx/algo/classification/mmconfigs/hlabel_classification/efficientnet_b0_light.yaml new file mode 100644 index 00000000000..1f9e6b14ec8 --- /dev/null +++ b/src/otx/algo/classification/mmconfigs/hlabel_classification/efficientnet_b0_light.yaml @@ -0,0 +1,32 @@ +backbone: + version: b0 + pretrained: true + type: OTXEfficientNet +head: + num_multiclass_heads: 0 + num_multilabel_classes: 0 + in_channels: 1280 + num_classes: 1000 + multiclass_loss_cfg: + loss_weight: 1.0 + type: CrossEntropyLoss + multilabel_loss_cfg: + reduction: sum + gamma_neg: 1.0 + gamma_pos: 0.0 + type: AsymmetricAngularLossWithIgnore + type: CustomHierarchicalClsHead +neck: + type: GlobalAveragePooling +data_preprocessor: + mean: + - 123.675 + - 116.28 + - 103.53 + std: + - 58.395 + - 57.12 + - 57.375 + to_rgb: False + type: ClsDataPreprocessor +type: ImageClassifier diff --git a/src/otx/algo/classification/mmconfigs/hlabel_classification/efficientnet_v2_light.yaml b/src/otx/algo/classification/mmconfigs/hlabel_classification/efficientnet_v2_light.yaml new file mode 100644 index 00000000000..57e9ef6dd29 --- /dev/null +++ b/src/otx/algo/classification/mmconfigs/hlabel_classification/efficientnet_v2_light.yaml @@ -0,0 +1,33 @@ +backbone: + pretrained: true + type: OTXEfficientNetV2 +head: + num_multiclass_heads: 0 + num_multilabel_classes: 0 + in_channels: 1280 + num_classes: 1000 + multiclass_loss_cfg: + loss_weight: 1.0 + type: CrossEntropyLoss + multilabel_loss_cfg: + reduction: sum + gamma_neg: 1.0 + gamma_pos: 0.0 + type: AsymmetricAngularLossWithIgnore + normalized: true + scale: 7.0 + type: CustomHierarchicalClsHead +neck: + type: GlobalAveragePooling +data_preprocessor: + mean: + - 123.675 + - 116.28 + - 103.53 + std: + - 58.395 + - 57.12 + - 57.375 + to_rgb: False + type: ClsDataPreprocessor +type: ImageClassifier diff --git a/src/otx/algo/classification/mmconfigs/hlabel_classification/mobilenet_v3_large_light.yaml b/src/otx/algo/classification/mmconfigs/hlabel_classification/mobilenet_v3_large_light.yaml new file mode 100644 index 00000000000..edac4f3c49c --- /dev/null +++ b/src/otx/algo/classification/mmconfigs/hlabel_classification/mobilenet_v3_large_light.yaml @@ -0,0 +1,30 @@ +backbone: + type: OTXMobileNetV3 +head: + num_multiclass_heads: 0 + num_multilabel_classes: 0 + in_channels: 960 + num_classes: 1000 + multiclass_loss_cfg: + loss_weight: 1.0 + type: CrossEntropyLoss + multilabel_loss_cfg: + reduction: sum + gamma_neg: 1.0 + gamma_pos: 0.0 + type: AsymmetricAngularLossWithIgnore + type: CustomHierarchicalClsHead +neck: + type: GlobalAveragePooling +data_preprocessor: + mean: + - 123.675 + - 116.28 + - 103.53 + std: + - 58.395 + - 57.12 + - 57.375 + to_rgb: False + type: ClsDataPreprocessor +type: ImageClassifier diff --git a/src/otx/algo/classification/mmconfigs/multiclass_classification/deit_tiny.yaml b/src/otx/algo/classification/mmconfigs/multiclass_classification/deit_tiny.yaml new file mode 100644 index 00000000000..3832334c0d0 --- /dev/null +++ b/src/otx/algo/classification/mmconfigs/multiclass_classification/deit_tiny.yaml @@ -0,0 +1,33 @@ +load_from: https://download.openmmlab.com/mmclassification/v0/deit/deit-tiny_pt-4xb256_in1k_20220218-13b382a0.pth +backbone: + arch: deit-tiny + type: VisionTransformer + img_size: 224 + patch_size: 16 +head: + loss: + loss_weight: 1.0 + type: CrossEntropyLoss + in_channels: 192 + num_classes: 1000 + type: VisionTransformerClsHead +data_preprocessor: + mean: + - 123.675 + - 116.28 + - 103.53 + std: + - 58.395 + - 57.12 + - 57.375 + to_rgb: False + type: ClsDataPreprocessor +init_cfg: + - std: 0.2 + layer: Linear + type: TruncNormal + - bias: 0. + val: 1. + layer: LayerNorm + type: Constant +type: ImageClassifier diff --git a/src/otx/algo/classification/mmconfigs/multiclass_classification/efficientnet_b0.yaml b/src/otx/algo/classification/mmconfigs/multiclass_classification/efficientnet_b0.yaml new file mode 100644 index 00000000000..4a6496424a7 --- /dev/null +++ b/src/otx/algo/classification/mmconfigs/multiclass_classification/efficientnet_b0.yaml @@ -0,0 +1,39 @@ +backbone: + version: b0 + pretrained: true + type: OTXEfficientNet +head: + act_cfg: + type: HSwish + dropout_rate: 0.2 + in_channels: 1280 + init_cfg: + bias: 0.0 + layer: Linear + mean: 0.0 + std: 0.01 + type: Normal + loss: + loss_weight: 1.0 + type: CrossEntropyLoss + mid_channels: + - 1280 + num_classes: 1000 + topk: + - 1 + - 5 + type: StackedLinearClsHead +neck: + type: GlobalAveragePooling +data_preprocessor: + mean: + - 123.675 + - 116.28 + - 103.53 + std: + - 58.395 + - 57.12 + - 57.375 + to_rgb: False + type: ClsDataPreprocessor +type: ImageClassifier diff --git a/src/otx/algo/classification/mmconfigs/multiclass_classification/efficientnet_b0_light.yaml b/src/otx/algo/classification/mmconfigs/multiclass_classification/efficientnet_b0_light.yaml new file mode 100644 index 00000000000..38b5e8c373b --- /dev/null +++ b/src/otx/algo/classification/mmconfigs/multiclass_classification/efficientnet_b0_light.yaml @@ -0,0 +1,28 @@ +backbone: + version: b0 + pretrained: true + type: OTXEfficientNet +head: + in_channels: 1280 + loss: + loss_weight: 1.0 + type: CrossEntropyLoss + num_classes: 1000 + topk: + - 1 + - 5 + type: LinearClsHead +neck: + type: GlobalAveragePooling +data_preprocessor: + mean: + - 123.675 + - 116.28 + - 103.53 + std: + - 58.395 + - 57.12 + - 57.375 + to_rgb: False + type: ClsDataPreprocessor +type: ImageClassifier diff --git a/src/otx/algo/classification/mmconfigs/multiclass_classification/efficientnet_v2.yaml b/src/otx/algo/classification/mmconfigs/multiclass_classification/efficientnet_v2.yaml new file mode 100644 index 00000000000..077d00d3a6c --- /dev/null +++ b/src/otx/algo/classification/mmconfigs/multiclass_classification/efficientnet_v2.yaml @@ -0,0 +1,38 @@ +backbone: + pretrained: true + type: OTXEfficientNetV2 +head: + act_cfg: + type: HSwish + dropout_rate: 0.2 + in_channels: 1280 + init_cfg: + bias: 0.0 + layer: Linear + mean: 0.0 + std: 0.01 + type: Normal + loss: + loss_weight: 1.0 + type: CrossEntropyLoss + mid_channels: + - 1280 + num_classes: 1000 + topk: + - 1 + - 5 + type: StackedLinearClsHead +neck: + type: GlobalAveragePooling +data_preprocessor: + mean: + - 123.675 + - 116.28 + - 103.53 + std: + - 58.395 + - 57.12 + - 57.375 + to_rgb: False + type: ClsDataPreprocessor +type: ImageClassifier diff --git a/src/otx/algo/classification/mmconfigs/multiclass_classification/efficientnet_v2_light.yaml b/src/otx/algo/classification/mmconfigs/multiclass_classification/efficientnet_v2_light.yaml new file mode 100644 index 00000000000..c2599fa9605 --- /dev/null +++ b/src/otx/algo/classification/mmconfigs/multiclass_classification/efficientnet_v2_light.yaml @@ -0,0 +1,27 @@ +backbone: + pretrained: true + type: OTXEfficientNetV2 +head: + in_channels: 1280 + loss: + loss_weight: 1.0 + type: CrossEntropyLoss + num_classes: 1000 + topk: + - 1 + - 5 + type: LinearClsHead +neck: + type: GlobalAveragePooling +data_preprocessor: + mean: + - 123.675 + - 116.28 + - 103.53 + std: + - 58.395 + - 57.12 + - 57.375 + to_rgb: False + type: ClsDataPreprocessor +type: ImageClassifier diff --git a/src/otx/algo/classification/mmconfigs/multiclass_classification/mobilenet_v3_large.yaml b/src/otx/algo/classification/mmconfigs/multiclass_classification/mobilenet_v3_large.yaml new file mode 100644 index 00000000000..73e539b12fa --- /dev/null +++ b/src/otx/algo/classification/mmconfigs/multiclass_classification/mobilenet_v3_large.yaml @@ -0,0 +1,37 @@ +backbone: + type: OTXMobileNetV3 +head: + act_cfg: + type: HSwish + dropout_rate: 0.2 + in_channels: 960 + init_cfg: + bias: 0.0 + layer: Linear + mean: 0.0 + std: 0.01 + type: Normal + loss: + loss_weight: 1.0 + type: CrossEntropyLoss + mid_channels: + - 1280 + num_classes: 1000 + topk: + - 1 + - 5 + type: StackedLinearClsHead +neck: + type: GlobalAveragePooling +data_preprocessor: + mean: + - 123.675 + - 116.28 + - 103.53 + std: + - 58.395 + - 57.12 + - 57.375 + to_rgb: False + type: ClsDataPreprocessor +type: ImageClassifier diff --git a/src/otx/algo/classification/mmconfigs/multiclass_classification/mobilenet_v3_large_light.yaml b/src/otx/algo/classification/mmconfigs/multiclass_classification/mobilenet_v3_large_light.yaml new file mode 100644 index 00000000000..5da80351d46 --- /dev/null +++ b/src/otx/algo/classification/mmconfigs/multiclass_classification/mobilenet_v3_large_light.yaml @@ -0,0 +1,26 @@ +backbone: + type: OTXMobileNetV3 +head: + in_channels: 960 + loss: + loss_weight: 1.0 + type: CrossEntropyLoss + num_classes: 1000 + topk: + - 1 + - 5 + type: LinearClsHead +neck: + type: GlobalAveragePooling +data_preprocessor: + mean: + - 123.675 + - 116.28 + - 103.53 + std: + - 58.395 + - 57.12 + - 57.375 + to_rgb: False + type: ClsDataPreprocessor +type: ImageClassifier diff --git a/src/otx/algo/classification/mmconfigs/multilabel_classification/deit_tiny.yaml b/src/otx/algo/classification/mmconfigs/multilabel_classification/deit_tiny.yaml new file mode 100644 index 00000000000..7074faf4632 --- /dev/null +++ b/src/otx/algo/classification/mmconfigs/multilabel_classification/deit_tiny.yaml @@ -0,0 +1,32 @@ +load_from: https://download.openmmlab.com/mmclassification/v0/deit/deit-tiny_pt-4xb256_in1k_20220218-13b382a0.pth +backbone: + arch: deit-tiny + type: VisionTransformer + img_size: 224 + patch_size: 16 +head: + in_channels: 192 + num_classes: 1000 + loss: + type: AsymmetricAngularLossWithIgnore + type: CustomMultiLabelLinearClsHead +data_preprocessor: + mean: + - 123.675 + - 116.28 + - 103.53 + std: + - 58.395 + - 57.12 + - 57.375 + to_rgb: False + type: ClsDataPreprocessor +init_cfg: + - std: 0.2 + layer: Linear + type: TruncNormal + - bias: 0. + val: 1. + layer: LayerNorm + type: Constant +type: ImageClassifier diff --git a/src/otx/algo/classification/mmconfigs/multilabel_classification/efficientnet_b0_light.yaml b/src/otx/algo/classification/mmconfigs/multilabel_classification/efficientnet_b0_light.yaml new file mode 100644 index 00000000000..8e4cc2af946 --- /dev/null +++ b/src/otx/algo/classification/mmconfigs/multilabel_classification/efficientnet_b0_light.yaml @@ -0,0 +1,29 @@ +backbone: + version: b0 + pretrained: true + type: OTXEfficientNet +head: + num_classes: 1000 + in_channels: 1280 + loss: + reduction: sum + gamma_neg: 1.0 + gamma_pos: 0.0 + type: AsymmetricAngularLossWithIgnore + normalized: true + scale: 7.0 + type: CustomMultiLabelLinearClsHead +neck: + type: GlobalAveragePooling +data_preprocessor: + mean: + - 123.675 + - 116.28 + - 103.53 + std: + - 58.395 + - 57.12 + - 57.375 + to_rgb: False + type: ClsDataPreprocessor +type: ImageClassifier diff --git a/src/otx/algo/classification/mmconfigs/multilabel_classification/efficientnet_v2_light.yaml b/src/otx/algo/classification/mmconfigs/multilabel_classification/efficientnet_v2_light.yaml new file mode 100644 index 00000000000..29eb048563c --- /dev/null +++ b/src/otx/algo/classification/mmconfigs/multilabel_classification/efficientnet_v2_light.yaml @@ -0,0 +1,28 @@ +backbone: + pretrained: true + type: OTXEfficientNetV2 +head: + in_channels: 1280 + num_classes: 1000 + loss: + reduction: sum + gamma_neg: 1.0 + gamma_pos: 0.0 + type: AsymmetricAngularLossWithIgnore + normalized: true + scale: 7.0 + type: CustomMultiLabelLinearClsHead +neck: + type: GlobalAveragePooling +data_preprocessor: + mean: + - 123.675 + - 116.28 + - 103.53 + std: + - 58.395 + - 57.12 + - 57.375 + to_rgb: False + type: ClsDataPreprocessor +type: ImageClassifier diff --git a/src/otx/algo/classification/mmconfigs/multilabel_classification/mobilenet_v3_large_light.yaml b/src/otx/algo/classification/mmconfigs/multilabel_classification/mobilenet_v3_large_light.yaml new file mode 100644 index 00000000000..b183d39773c --- /dev/null +++ b/src/otx/algo/classification/mmconfigs/multilabel_classification/mobilenet_v3_large_light.yaml @@ -0,0 +1,30 @@ +backbone: + type: OTXMobileNetV3 +head: + num_classes: 1000 + in_channels: 960 + hid_channels: 1280 + loss: + reduction: sum + gamma_neg: 1.0 + gamma_pos: 0.0 + type: AsymmetricAngularLossWithIgnore + normalized: true + scale: 7.0 + act_cfg: + type: PReLU + type: CustomMultiLabelNonLinearClsHead +neck: + type: GlobalAveragePooling +data_preprocessor: + mean: + - 123.675 + - 116.28 + - 103.53 + std: + - 58.395 + - 57.12 + - 57.375 + to_rgb: False + type: ClsDataPreprocessor +type: ImageClassifier diff --git a/src/otx/algo/classification/mobilenet_v3_large.py b/src/otx/algo/classification/mobilenet_v3_large.py new file mode 100644 index 00000000000..126ccb00d44 --- /dev/null +++ b/src/otx/algo/classification/mobilenet_v3_large.py @@ -0,0 +1,38 @@ +# Copyright (C) 2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# +"""MobileNetV3 model implementation.""" + +from otx.algo.utils.mmconfig import read_mmconfig +from otx.core.model.entity.classification import ( + MMPretrainHlabelClsModel, + MMPretrainMulticlassClsModel, + MMPretrainMultilabelClsModel, +) + + +class MobileNetV3ForHLabelCls(MMPretrainHlabelClsModel): + """MobileNetV3 Model for hierarchical label classification task.""" + + def __init__(self, num_classes: int, num_multiclass_heads: int, num_multilabel_classes: int) -> None: + config = read_mmconfig(model_name="mobilenet_v3_large_light", subdir_name="hlabel_classification") + config.head.num_multiclass_heads = num_multiclass_heads + config.head.num_multilabel_classes = num_multilabel_classes + super().__init__(num_classes=num_classes, config=config) + + +class MobileNetV3ForMulticlassCls(MMPretrainMulticlassClsModel): + """MobileNetV3 Model for multi-label classification task.""" + + def __init__(self, num_classes: int, light: bool = True) -> None: + model_name = "mobilenet_v3_large_light" if light else "otx_mobilenet_v3_large" + config = read_mmconfig(model_name=model_name, subdir_name="multiclass_classification") + super().__init__(num_classes=num_classes, config=config) + + +class MobileNetV3ForMultilabelCls(MMPretrainMultilabelClsModel): + """MobileNetV3 Model for multi-class classification task.""" + + def __init__(self, num_classes: int) -> None: + config = read_mmconfig("mobilenet_v3_large_light", subdir_name="multilabel_classification") + super().__init__(num_classes=num_classes, config=config) diff --git a/src/otx/algo/utils/mmconfig.py b/src/otx/algo/utils/mmconfig.py index 4860950f1f7..501bbeb5242 100644 --- a/src/otx/algo/utils/mmconfig.py +++ b/src/otx/algo/utils/mmconfig.py @@ -9,23 +9,23 @@ from omegaconf import DictConfig, OmegaConf -def read_mmconfig(model_name: str) -> DictConfig: +def read_mmconfig(model_name: str, subdir_name: str = ".") -> DictConfig: """Read MMConfig. It try to read MMConfig from the yaml file which exists in - `/mmconfigs/.yaml` + `/mmconfigs//.yaml` """ frame = inspect.stack()[1] module = inspect.getmodule(frame[0]) - if module is None or module.__file__ is None: + if module is None or (mod_fpath := module.__file__) is None: msg = "Cannot get valid model from stack" raise RuntimeError(msg) - root_dir = Path().parent / "mmconfigs" - fpath = root_dir / f"{model_name}.yaml" + root_dir = Path(mod_fpath).parent / "mmconfigs" / subdir_name + yaml_fpath = root_dir / f"{model_name}.yaml" - if not fpath.exists(): + if not yaml_fpath.exists(): raise FileNotFoundError - return OmegaConf.load(fpath) + return OmegaConf.load(yaml_fpath) diff --git a/src/otx/config/model/hlabel_mmpretrain.yaml b/src/otx/config/model/hlabel_mmpretrain.yaml index aa4cbca9c7f..ba9ba3d6dd5 100644 --- a/src/otx/config/model/hlabel_mmpretrain.yaml +++ b/src/otx/config/model/hlabel_mmpretrain.yaml @@ -5,7 +5,9 @@ _target_: otx.core.model.module.classification.OTXHlabelClsLitModule otx_model: _target_: otx.core.model.entity.classification.MMPretrainHlabelClsModel - config: ??? + num_classes: ??? + num_multiclass_heads: ??? + num_multilabel_classes: ??? # compile model for faster training with pytorch 2.0 torch_compile: false diff --git a/src/otx/config/model/multiclass_mmpretrain.yaml b/src/otx/config/model/multiclass_mmpretrain.yaml index 6688c5a1650..b51b2832179 100644 --- a/src/otx/config/model/multiclass_mmpretrain.yaml +++ b/src/otx/config/model/multiclass_mmpretrain.yaml @@ -5,7 +5,7 @@ _target_: otx.core.model.module.classification.OTXMulticlassClsLitModule otx_model: _target_: otx.core.model.entity.classification.MMPretrainMulticlassClsModel - config: ??? + num_classes: ??? # compile model for faster training with pytorch 2.0 torch_compile: false diff --git a/src/otx/config/model/multilabel_mmpretrain.yaml b/src/otx/config/model/multilabel_mmpretrain.yaml index f86b8bf081e..320007faa95 100644 --- a/src/otx/config/model/multilabel_mmpretrain.yaml +++ b/src/otx/config/model/multilabel_mmpretrain.yaml @@ -5,7 +5,7 @@ _target_: otx.core.model.module.classification.OTXMultilabelClsLitModule otx_model: _target_: otx.core.model.entity.classification.MMPretrainMultilabelClsModel - config: ??? + num_classes: ??? # compile model for faster training with pytorch 2.0 torch_compile: false diff --git a/src/otx/recipe/hlabel_classification/efficientnet_b0_light.yaml b/src/otx/recipe/hlabel_classification/efficientnet_b0_light.yaml index 227c5224ba4..2f8b3d1e64d 100644 --- a/src/otx/recipe/hlabel_classification/efficientnet_b0_light.yaml +++ b/src/otx/recipe/hlabel_classification/efficientnet_b0_light.yaml @@ -37,39 +37,7 @@ data: - type: PackInputs model: otx_model: - config: - backbone: - version: b0 - pretrained: true - type: OTXEfficientNet - head: - num_multiclass_heads: 0 - num_multilabel_classes: 0 - in_channels: 1280 - num_classes: 1000 - multiclass_loss_cfg: - loss_weight: 1.0 - type: CrossEntropyLoss - multilabel_loss_cfg: - reduction: sum - gamma_neg: 1.0 - gamma_pos: 0.0 - type: AsymmetricAngularLossWithIgnore - type: CustomHierarchicalClsHead - neck: - type: GlobalAveragePooling - data_preprocessor: - mean: - - 123.675 - - 116.28 - - 103.53 - std: - - 58.395 - - 57.12 - - 57.375 - to_rgb: False - type: ClsDataPreprocessor - type: ImageClassifier + _target_: otx.algo.classification.efficientnet_b0.EfficientNetB0ForHLabelCls optimizer: _target_: torch.optim.SGD lr: 0.0049 diff --git a/src/otx/recipe/hlabel_classification/efficientnet_v2_light.yaml b/src/otx/recipe/hlabel_classification/efficientnet_v2_light.yaml index 0872a998655..38545bdaa9c 100644 --- a/src/otx/recipe/hlabel_classification/efficientnet_v2_light.yaml +++ b/src/otx/recipe/hlabel_classification/efficientnet_v2_light.yaml @@ -40,40 +40,7 @@ data: - type: PackInputs model: otx_model: - config: - backbone: - pretrained: true - type: OTXEfficientNetV2 - head: - num_multiclass_heads: 0 - num_multilabel_classes: 0 - in_channels: 1280 - num_classes: 1000 - multiclass_loss_cfg: - loss_weight: 1.0 - type: CrossEntropyLoss - multilabel_loss_cfg: - reduction: sum - gamma_neg: 1.0 - gamma_pos: 0.0 - type: AsymmetricAngularLossWithIgnore - normalized: true - scale: 7.0 - type: CustomHierarchicalClsHead - neck: - type: GlobalAveragePooling - data_preprocessor: - mean: - - 123.675 - - 116.28 - - 103.53 - std: - - 58.395 - - 57.12 - - 57.375 - to_rgb: False - type: ClsDataPreprocessor - type: ImageClassifier + _target_: otx.algo.classification.efficientnet_v2.EfficientNetV2ForHLabelCls optimizer: _target_: torch.optim.SGD _partial_: true diff --git a/src/otx/recipe/hlabel_classification/mobilenet_v3_large_light.yaml b/src/otx/recipe/hlabel_classification/mobilenet_v3_large_light.yaml index 20e63b6a33a..52c5089ac72 100644 --- a/src/otx/recipe/hlabel_classification/mobilenet_v3_large_light.yaml +++ b/src/otx/recipe/hlabel_classification/mobilenet_v3_large_light.yaml @@ -40,37 +40,7 @@ data: - type: PackInputs model: otx_model: - config: - backbone: - type: OTXMobileNetV3 - head: - num_multiclass_heads: 0 - num_multilabel_classes: 0 - in_channels: 960 - num_classes: 1000 - multiclass_loss_cfg: - loss_weight: 1.0 - type: CrossEntropyLoss - multilabel_loss_cfg: - reduction: sum - gamma_neg: 1.0 - gamma_pos: 0.0 - type: AsymmetricAngularLossWithIgnore - type: CustomHierarchicalClsHead - neck: - type: GlobalAveragePooling - data_preprocessor: - mean: - - 123.675 - - 116.28 - - 103.53 - std: - - 58.395 - - 57.12 - - 57.375 - to_rgb: False - type: ClsDataPreprocessor - type: ImageClassifier + _target_: otx.algo.classification.mobilenet_v3_large.MobileNetV3ForHLabelCls optimizer: _target_: torch.optim.SGD _partial_: true diff --git a/src/otx/recipe/hlabel_classification/otx_deit_tiny.yaml b/src/otx/recipe/hlabel_classification/otx_deit_tiny.yaml index 6a8749ecfba..ec37e0d30c2 100644 --- a/src/otx/recipe/hlabel_classification/otx_deit_tiny.yaml +++ b/src/otx/recipe/hlabel_classification/otx_deit_tiny.yaml @@ -37,47 +37,7 @@ data: - type: PackInputs model: otx_model: - config: - load_from: https://download.openmmlab.com/mmclassification/v0/deit/deit-tiny_pt-4xb256_in1k_20220218-13b382a0.pth - backbone: - arch: deit-tiny - type: VisionTransformer - img_size: 224 - patch_size: 16 - head: - num_multiclass_heads: 0 - num_multilabel_classes: 0 - in_channels: 192 - num_classes: 1000 - multiclass_loss_cfg: - loss_weight: 1.0 - type: CrossEntropyLoss - multilabel_loss_cfg: - reduction: sum - gamma_neg: 1.0 - gamma_pos: 0.0 - type: AsymmetricAngularLossWithIgnore - type: CustomHierarchicalClsHead - data_preprocessor: - mean: - - 123.675 - - 116.28 - - 103.53 - std: - - 58.395 - - 57.12 - - 57.375 - to_rgb: False - type: ClsDataPreprocessor - init_cfg: - - std: 0.2 - layer: Linear - type: TruncNormal - - bias: 0. - val: 1. - layer: LayerNorm - type: Constant - type: ImageClassifier + _target_: otx.algo.classification.deit_tiny.DeitTinyForHLabelCls optimizer: _target_: torch.optim.SGD _partial_: true diff --git a/src/otx/recipe/multiclass_classification/efficientnet_b0_light.yaml b/src/otx/recipe/multiclass_classification/efficientnet_b0_light.yaml index 4af293e853c..6a53ceb5403 100644 --- a/src/otx/recipe/multiclass_classification/efficientnet_b0_light.yaml +++ b/src/otx/recipe/multiclass_classification/efficientnet_b0_light.yaml @@ -37,35 +37,8 @@ data: - type: PackInputs model: otx_model: - config: - backbone: - version: b0 - pretrained: true - type: OTXEfficientNet - head: - in_channels: 1280 - loss: - loss_weight: 1.0 - type: CrossEntropyLoss - num_classes: 1000 - topk: - - 1 - - 5 - type: LinearClsHead - neck: - type: GlobalAveragePooling - data_preprocessor: - mean: - - 123.675 - - 116.28 - - 103.53 - std: - - 58.395 - - 57.12 - - 57.375 - to_rgb: False - type: ClsDataPreprocessor - type: ImageClassifier + _target_: otx.algo.classification.efficientnet_b0.EfficientNetB0ForMulticlassCls + light: True optimizer: _target_: torch.optim.SGD _partial_: true diff --git a/src/otx/recipe/multiclass_classification/efficientnet_v2_light.yaml b/src/otx/recipe/multiclass_classification/efficientnet_v2_light.yaml index bcba891ed3f..2490ac766d2 100644 --- a/src/otx/recipe/multiclass_classification/efficientnet_v2_light.yaml +++ b/src/otx/recipe/multiclass_classification/efficientnet_v2_light.yaml @@ -40,34 +40,8 @@ data: - type: PackInputs model: otx_model: - config: - backbone: - pretrained: true - type: OTXEfficientNetV2 - head: - in_channels: 1280 - loss: - loss_weight: 1.0 - type: CrossEntropyLoss - num_classes: 1000 - topk: - - 1 - - 5 - type: LinearClsHead - neck: - type: GlobalAveragePooling - data_preprocessor: - mean: - - 123.675 - - 116.28 - - 103.53 - std: - - 58.395 - - 57.12 - - 57.375 - to_rgb: False - type: ClsDataPreprocessor - type: ImageClassifier + _target_: otx.algo.classification.efficientnet_v2.EfficientNetV2ForMulticlassCls + light: True optimizer: _target_: torch.optim.SGD _partial_: true diff --git a/src/otx/recipe/multiclass_classification/mobilenet_v3_large_light.yaml b/src/otx/recipe/multiclass_classification/mobilenet_v3_large_light.yaml index 3b5d70f9cd1..4bdcc22c2b6 100644 --- a/src/otx/recipe/multiclass_classification/mobilenet_v3_large_light.yaml +++ b/src/otx/recipe/multiclass_classification/mobilenet_v3_large_light.yaml @@ -40,33 +40,8 @@ data: - type: PackInputs model: otx_model: - config: - backbone: - type: OTXMobileNetV3 - head: - in_channels: 960 - loss: - loss_weight: 1.0 - type: CrossEntropyLoss - num_classes: 1000 - topk: - - 1 - - 5 - type: LinearClsHead - neck: - type: GlobalAveragePooling - data_preprocessor: - mean: - - 123.675 - - 116.28 - - 103.53 - std: - - 58.395 - - 57.12 - - 57.375 - to_rgb: False - type: ClsDataPreprocessor - type: ImageClassifier + _target_: otx.algo.classification.mobilenet_v3_large.MobileNetV3ForMulticlassCls + light: True optimizer: _target_: torch.optim.SGD _partial_: true diff --git a/src/otx/recipe/multiclass_classification/otx_deit_tiny.yaml b/src/otx/recipe/multiclass_classification/otx_deit_tiny.yaml index a3670eccb1d..ff11b2f3c6f 100644 --- a/src/otx/recipe/multiclass_classification/otx_deit_tiny.yaml +++ b/src/otx/recipe/multiclass_classification/otx_deit_tiny.yaml @@ -37,40 +37,7 @@ data: - type: PackInputs model: otx_model: - config: - load_from: https://download.openmmlab.com/mmclassification/v0/deit/deit-tiny_pt-4xb256_in1k_20220218-13b382a0.pth - backbone: - arch: deit-tiny - type: VisionTransformer - img_size: 224 - patch_size: 16 - head: - loss: - loss_weight: 1.0 - type: CrossEntropyLoss - in_channels: 192 - num_classes: 1000 - type: VisionTransformerClsHead - data_preprocessor: - mean: - - 123.675 - - 116.28 - - 103.53 - std: - - 58.395 - - 57.12 - - 57.375 - to_rgb: False - type: ClsDataPreprocessor - init_cfg: - - std: 0.2 - layer: Linear - type: TruncNormal - - bias: 0. - val: 1. - layer: LayerNorm - type: Constant - type: ImageClassifier + _target_: otx.algo.classification.deit_tiny.DeitTinyForMulticlassCls optimizer: _target_: torch.optim.SGD _partial_: true diff --git a/src/otx/recipe/multiclass_classification/otx_efficientnet_b0.yaml b/src/otx/recipe/multiclass_classification/otx_efficientnet_b0.yaml index 813490b79b4..dd7a80dc6fd 100644 --- a/src/otx/recipe/multiclass_classification/otx_efficientnet_b0.yaml +++ b/src/otx/recipe/multiclass_classification/otx_efficientnet_b0.yaml @@ -37,46 +37,7 @@ data: - type: PackInputs model: otx_model: - config: - backbone: - version: b0 - pretrained: true - type: OTXEfficientNet - head: - act_cfg: - type: HSwish - dropout_rate: 0.2 - in_channels: 1280 - init_cfg: - bias: 0.0 - layer: Linear - mean: 0.0 - std: 0.01 - type: Normal - loss: - loss_weight: 1.0 - type: CrossEntropyLoss - mid_channels: - - 1280 - num_classes: 1000 - topk: - - 1 - - 5 - type: StackedLinearClsHead - neck: - type: GlobalAveragePooling - data_preprocessor: - mean: - - 123.675 - - 116.28 - - 103.53 - std: - - 58.395 - - 57.12 - - 57.375 - to_rgb: False - type: ClsDataPreprocessor - type: ImageClassifier + _target_: otx.algo.classification.efficientnet_b0.EfficientNetB0ForMulticlassCls optimizer: _target_: torch.optim.SGD _partial_: true diff --git a/src/otx/recipe/multiclass_classification/otx_efficientnet_v2.yaml b/src/otx/recipe/multiclass_classification/otx_efficientnet_v2.yaml index a8f8a1f72e9..31c1d603967 100644 --- a/src/otx/recipe/multiclass_classification/otx_efficientnet_v2.yaml +++ b/src/otx/recipe/multiclass_classification/otx_efficientnet_v2.yaml @@ -40,45 +40,7 @@ data: - type: PackInputs model: otx_model: - config: - backbone: - pretrained: true - type: OTXEfficientNetV2 - head: - act_cfg: - type: HSwish - dropout_rate: 0.2 - in_channels: 1280 - init_cfg: - bias: 0.0 - layer: Linear - mean: 0.0 - std: 0.01 - type: Normal - loss: - loss_weight: 1.0 - type: CrossEntropyLoss - mid_channels: - - 1280 - num_classes: 1000 - topk: - - 1 - - 5 - type: StackedLinearClsHead - neck: - type: GlobalAveragePooling - data_preprocessor: - mean: - - 123.675 - - 116.28 - - 103.53 - std: - - 58.395 - - 57.12 - - 57.375 - to_rgb: False - type: ClsDataPreprocessor - type: ImageClassifier + _target_: otx.algo.classification.efficientnet_v2.EfficientNetV2ForMulticlassCls optimizer: _target_: torch.optim.SGD _partial_: true diff --git a/src/otx/recipe/multiclass_classification/otx_mobilenet_v3_large.yaml b/src/otx/recipe/multiclass_classification/otx_mobilenet_v3_large.yaml index d102ad1017e..09ceb9e555b 100644 --- a/src/otx/recipe/multiclass_classification/otx_mobilenet_v3_large.yaml +++ b/src/otx/recipe/multiclass_classification/otx_mobilenet_v3_large.yaml @@ -40,44 +40,7 @@ data: - type: PackInputs model: otx_model: - config: - backbone: - type: OTXMobileNetV3 - head: - act_cfg: - type: HSwish - dropout_rate: 0.2 - in_channels: 960 - init_cfg: - bias: 0.0 - layer: Linear - mean: 0.0 - std: 0.01 - type: Normal - loss: - loss_weight: 1.0 - type: CrossEntropyLoss - mid_channels: - - 1280 - num_classes: 1000 - topk: - - 1 - - 5 - type: StackedLinearClsHead - neck: - type: GlobalAveragePooling - data_preprocessor: - mean: - - 123.675 - - 116.28 - - 103.53 - std: - - 58.395 - - 57.12 - - 57.375 - to_rgb: False - type: ClsDataPreprocessor - type: ImageClassifier + _target_: otx.algo.classification.mobilenet_v3_large.MobileNetV3ForMulticlassCls optimizer: _target_: torch.optim.SGD _partial_: true diff --git a/src/otx/recipe/multilabel_classification/efficientnet_b0_light.yaml b/src/otx/recipe/multilabel_classification/efficientnet_b0_light.yaml index 60c8f196fcd..a1e6cadc31c 100644 --- a/src/otx/recipe/multilabel_classification/efficientnet_b0_light.yaml +++ b/src/otx/recipe/multilabel_classification/efficientnet_b0_light.yaml @@ -37,36 +37,7 @@ data: - type: PackInputs model: otx_model: - config: - backbone: - version: b0 - pretrained: true - type: OTXEfficientNet - head: - num_classes: 1000 - in_channels: 1280 - loss: - reduction: sum - gamma_neg: 1.0 - gamma_pos: 0.0 - type: AsymmetricAngularLossWithIgnore - normalized: true - scale: 7.0 - type: CustomMultiLabelLinearClsHead - neck: - type: GlobalAveragePooling - data_preprocessor: - mean: - - 123.675 - - 116.28 - - 103.53 - std: - - 58.395 - - 57.12 - - 57.375 - to_rgb: False - type: ClsDataPreprocessor - type: ImageClassifier + _target_: otx.algo.classification.efficientnet_b0.EfficientNetB0ForMultilabelCls optimizer: _target_: torch.optim.SGD lr: 0.0049 diff --git a/src/otx/recipe/multilabel_classification/efficientnet_v2_light.yaml b/src/otx/recipe/multilabel_classification/efficientnet_v2_light.yaml index 4ab9ba2e2d1..b7e8e238dae 100644 --- a/src/otx/recipe/multilabel_classification/efficientnet_v2_light.yaml +++ b/src/otx/recipe/multilabel_classification/efficientnet_v2_light.yaml @@ -40,35 +40,7 @@ data: - type: PackInputs model: otx_model: - config: - backbone: - pretrained: true - type: OTXEfficientNetV2 - head: - in_channels: 1280 - num_classes: 1000 - loss: - reduction: sum - gamma_neg: 1.0 - gamma_pos: 0.0 - type: AsymmetricAngularLossWithIgnore - normalized: true - scale: 7.0 - type: CustomMultiLabelLinearClsHead - neck: - type: GlobalAveragePooling - data_preprocessor: - mean: - - 123.675 - - 116.28 - - 103.53 - std: - - 58.395 - - 57.12 - - 57.375 - to_rgb: False - type: ClsDataPreprocessor - type: ImageClassifier + _target_: otx.algo.classification.efficientnet_v2.EfficientNetV2ForMultilabelCls optimizer: _target_: torch.optim.SGD _partial_: true diff --git a/src/otx/recipe/multilabel_classification/mobilenet_v3_large_light.yaml b/src/otx/recipe/multilabel_classification/mobilenet_v3_large_light.yaml index 204edbdc6c5..53695f65cb0 100644 --- a/src/otx/recipe/multilabel_classification/mobilenet_v3_large_light.yaml +++ b/src/otx/recipe/multilabel_classification/mobilenet_v3_large_light.yaml @@ -40,37 +40,7 @@ data: - type: PackInputs model: otx_model: - config: - backbone: - type: OTXMobileNetV3 - head: - num_classes: 1000 - in_channels: 960 - hid_channels: 1280 - loss: - reduction: sum - gamma_neg: 1.0 - gamma_pos: 0.0 - type: AsymmetricAngularLossWithIgnore - normalized: true - scale: 7.0 - act_cfg: - type: PReLU - type: CustomMultiLabelNonLinearClsHead - neck: - type: GlobalAveragePooling - data_preprocessor: - mean: - - 123.675 - - 116.28 - - 103.53 - std: - - 58.395 - - 57.12 - - 57.375 - to_rgb: False - type: ClsDataPreprocessor - type: ImageClassifier + _target_: otx.algo.classification.mobilenet_v3_large.MobileNetV3ForMultilabelCls optimizer: _target_: torch.optim.SGD _partial_: true diff --git a/src/otx/recipe/multilabel_classification/otx_deit_tiny.yaml b/src/otx/recipe/multilabel_classification/otx_deit_tiny.yaml index e39d06ccb40..67f02b66077 100644 --- a/src/otx/recipe/multilabel_classification/otx_deit_tiny.yaml +++ b/src/otx/recipe/multilabel_classification/otx_deit_tiny.yaml @@ -37,39 +37,7 @@ data: - type: PackInputs model: otx_model: - config: - load_from: https://download.openmmlab.com/mmclassification/v0/deit/deit-tiny_pt-4xb256_in1k_20220218-13b382a0.pth - backbone: - arch: deit-tiny - type: VisionTransformer - img_size: 224 - patch_size: 16 - head: - in_channels: 192 - num_classes: 1000 - loss: - type: AsymmetricAngularLossWithIgnore - type: CustomMultiLabelLinearClsHead - data_preprocessor: - mean: - - 123.675 - - 116.28 - - 103.53 - std: - - 58.395 - - 57.12 - - 57.375 - to_rgb: False - type: ClsDataPreprocessor - init_cfg: - - std: 0.2 - layer: Linear - type: TruncNormal - - bias: 0. - val: 1. - layer: LayerNorm - type: Constant - type: ImageClassifier + _target_: otx.algo.classification.deit_tiny.DeitTinyForMultilabelCls optimizer: _target_: torch.optim.SGD _partial_: true diff --git a/tests/integration/cli/test_cli.py b/tests/integration/cli/test_cli.py index fa1c126678a..852f1776def 100644 --- a/tests/integration/cli/test_cli.py +++ b/tests/integration/cli/test_cli.py @@ -35,8 +35,8 @@ "data_dir": "tests/assets/hlabel_classification", "overrides": [ "model.otx_model.num_classes=7", - "model.otx_model.config.head.num_multiclass_heads=2", - "model.otx_model.config.head.num_multilabel_classes=3", + "model.otx_model.num_multiclass_heads=2", + "model.otx_model.num_multilabel_classes=3", ], }, "detection": { From 9dfaa941e2a1c5dd4159a2c1f5ddfc5dc079c32b Mon Sep 17 00:00:00 2001 From: "Kim, Vinnam" Date: Thu, 11 Jan 2024 19:57:59 +0900 Subject: [PATCH 6/8] Fix light=False as default Signed-off-by: Kim, Vinnam --- src/otx/algo/classification/efficientnet_b0.py | 4 ++-- src/otx/algo/classification/efficientnet_v2.py | 4 ++-- src/otx/algo/classification/mobilenet_v3_large.py | 4 ++-- .../recipe/multiclass_classification/otx_efficientnet_b0.yaml | 1 + .../recipe/multiclass_classification/otx_efficientnet_v2.yaml | 1 + .../multiclass_classification/otx_mobilenet_v3_large.yaml | 1 + 6 files changed, 9 insertions(+), 6 deletions(-) diff --git a/src/otx/algo/classification/efficientnet_b0.py b/src/otx/algo/classification/efficientnet_b0.py index 3259d1e0921..a7c92b4433d 100644 --- a/src/otx/algo/classification/efficientnet_b0.py +++ b/src/otx/algo/classification/efficientnet_b0.py @@ -24,8 +24,8 @@ def __init__(self, num_classes: int, num_multiclass_heads: int, num_multilabel_c class EfficientNetB0ForMulticlassCls(MMPretrainMulticlassClsModel): """EfficientNetB0 Model for multi-label classification task.""" - def __init__(self, num_classes: int, light: bool = True) -> None: - model_name = "efficientnet_b0_light" if light else "otx_efficientnet_b0" + def __init__(self, num_classes: int, light: bool = False) -> None: + model_name = "efficientnet_b0_light" if light else "efficientnet_b0" config = read_mmconfig(model_name=model_name, subdir_name="multiclass_classification") super().__init__(num_classes=num_classes, config=config) diff --git a/src/otx/algo/classification/efficientnet_v2.py b/src/otx/algo/classification/efficientnet_v2.py index 861278ebba3..56829c2a1a3 100644 --- a/src/otx/algo/classification/efficientnet_v2.py +++ b/src/otx/algo/classification/efficientnet_v2.py @@ -24,8 +24,8 @@ def __init__(self, num_classes: int, num_multiclass_heads: int, num_multilabel_c class EfficientNetV2ForMulticlassCls(MMPretrainMulticlassClsModel): """EfficientNetV2 Model for multi-label classification task.""" - def __init__(self, num_classes: int, light: bool = True) -> None: - model_name = "efficientnet_v2_light" if light else "otx_efficientnet_v2" + def __init__(self, num_classes: int, light: bool = False) -> None: + model_name = "efficientnet_v2_light" if light else "efficientnet_v2" config = read_mmconfig(model_name=model_name, subdir_name="multiclass_classification") super().__init__(num_classes=num_classes, config=config) diff --git a/src/otx/algo/classification/mobilenet_v3_large.py b/src/otx/algo/classification/mobilenet_v3_large.py index 126ccb00d44..049b9bf238f 100644 --- a/src/otx/algo/classification/mobilenet_v3_large.py +++ b/src/otx/algo/classification/mobilenet_v3_large.py @@ -24,8 +24,8 @@ def __init__(self, num_classes: int, num_multiclass_heads: int, num_multilabel_c class MobileNetV3ForMulticlassCls(MMPretrainMulticlassClsModel): """MobileNetV3 Model for multi-label classification task.""" - def __init__(self, num_classes: int, light: bool = True) -> None: - model_name = "mobilenet_v3_large_light" if light else "otx_mobilenet_v3_large" + def __init__(self, num_classes: int, light: bool = False) -> None: + model_name = "mobilenet_v3_large_light" if light else "mobilenet_v3_large" config = read_mmconfig(model_name=model_name, subdir_name="multiclass_classification") super().__init__(num_classes=num_classes, config=config) diff --git a/src/otx/recipe/multiclass_classification/otx_efficientnet_b0.yaml b/src/otx/recipe/multiclass_classification/otx_efficientnet_b0.yaml index dd7a80dc6fd..7498be0bfb0 100644 --- a/src/otx/recipe/multiclass_classification/otx_efficientnet_b0.yaml +++ b/src/otx/recipe/multiclass_classification/otx_efficientnet_b0.yaml @@ -38,6 +38,7 @@ data: model: otx_model: _target_: otx.algo.classification.efficientnet_b0.EfficientNetB0ForMulticlassCls + light: false optimizer: _target_: torch.optim.SGD _partial_: true diff --git a/src/otx/recipe/multiclass_classification/otx_efficientnet_v2.yaml b/src/otx/recipe/multiclass_classification/otx_efficientnet_v2.yaml index 31c1d603967..d5c6d454171 100644 --- a/src/otx/recipe/multiclass_classification/otx_efficientnet_v2.yaml +++ b/src/otx/recipe/multiclass_classification/otx_efficientnet_v2.yaml @@ -41,6 +41,7 @@ data: model: otx_model: _target_: otx.algo.classification.efficientnet_v2.EfficientNetV2ForMulticlassCls + light: false optimizer: _target_: torch.optim.SGD _partial_: true diff --git a/src/otx/recipe/multiclass_classification/otx_mobilenet_v3_large.yaml b/src/otx/recipe/multiclass_classification/otx_mobilenet_v3_large.yaml index 09ceb9e555b..e6b749832ac 100644 --- a/src/otx/recipe/multiclass_classification/otx_mobilenet_v3_large.yaml +++ b/src/otx/recipe/multiclass_classification/otx_mobilenet_v3_large.yaml @@ -41,6 +41,7 @@ data: model: otx_model: _target_: otx.algo.classification.mobilenet_v3_large.MobileNetV3ForMulticlassCls + light: false optimizer: _target_: torch.optim.SGD _partial_: true From 669d935209ac14f857b52b25ce3957fc9ce03f15 Mon Sep 17 00:00:00 2001 From: "Kim, Vinnam" Date: Fri, 12 Jan 2024 10:31:18 +0900 Subject: [PATCH 7/8] Make read_mmconfig() more descriptive Signed-off-by: Kim, Vinnam --- src/otx/algo/action_classification/__init__.py | 2 +- src/otx/algo/action_detection/__init__.py | 2 +- src/otx/algo/utils/mmconfig.py | 15 +++++++++++---- 3 files changed, 13 insertions(+), 6 deletions(-) diff --git a/src/otx/algo/action_classification/__init__.py b/src/otx/algo/action_classification/__init__.py index 2f4d6dc99d5..231852979c0 100644 --- a/src/otx/algo/action_classification/__init__.py +++ b/src/otx/algo/action_classification/__init__.py @@ -1,4 +1,4 @@ # Copyright (C) 2023 Intel Corporation # SPDX-License-Identifier: Apache-2.0 # -"""Module for OTX action classification models, hooks, utils, etc.""" +"""Module for OTX action classification models.""" diff --git a/src/otx/algo/action_detection/__init__.py b/src/otx/algo/action_detection/__init__.py index 4b58f3a3c7b..0510329eab3 100644 --- a/src/otx/algo/action_detection/__init__.py +++ b/src/otx/algo/action_detection/__init__.py @@ -1,4 +1,4 @@ # Copyright (C) 2023 Intel Corporation # SPDX-License-Identifier: Apache-2.0 # -"""Module for OTX action detection models, hooks, utils, etc.""" +"""Module for OTX action detection models.""" diff --git a/src/otx/algo/utils/mmconfig.py b/src/otx/algo/utils/mmconfig.py index 501bbeb5242..55dc5cda5a9 100644 --- a/src/otx/algo/utils/mmconfig.py +++ b/src/otx/algo/utils/mmconfig.py @@ -14,18 +14,25 @@ def read_mmconfig(model_name: str, subdir_name: str = ".") -> DictConfig: It try to read MMConfig from the yaml file which exists in `/mmconfigs//.yaml` + + For example, if this function is called in `otx/algo/action_classification/x3d.py`, + `otx/algo/action_classification/mmconfigs/x3d.yaml` will be read. """ frame = inspect.stack()[1] module = inspect.getmodule(frame[0]) - if module is None or (mod_fpath := module.__file__) is None: - msg = "Cannot get valid model from stack" + if module is None or (module_file_path := module.__file__) is None: + msg = ( + "Cannot get Cannot get a valid module from Python function stack. " + "Please refer to this function docstring to see how to use correctly." + ) raise RuntimeError(msg) - root_dir = Path(mod_fpath).parent / "mmconfigs" / subdir_name + root_dir = Path(module_file_path).parent / "mmconfigs" / subdir_name yaml_fpath = root_dir / f"{model_name}.yaml" if not yaml_fpath.exists(): - raise FileNotFoundError + msg = f"mmconfig file for {model_name} is not found in {yaml_fpath}" + raise FileNotFoundError(msg) return OmegaConf.load(yaml_fpath) From 360decc81b41328cccb3cb266350baa559d0b254 Mon Sep 17 00:00:00 2001 From: "Kim, Vinnam" Date: Fri, 12 Jan 2024 11:57:32 +0900 Subject: [PATCH 8/8] Change inst seg task Signed-off-by: Kim, Vinnam --- .../algo/instance_segmentation/__init__.py | 4 + .../algo/instance_segmentation/maskrcnn.py | 18 ++ .../mmconfigs/maskrcnn_efficientnetb2b.yaml | 198 ++++++++++++++++ .../mmconfigs/maskrcnn_r50.yaml | 198 ++++++++++++++++ .../mmconfigs/maskrcnn_swint.yaml | 211 +++++++++++++++++ src/otx/config/model/mmdet_inst_seg.yaml | 1 - src/otx/config/model/mmseg.yaml | 1 - .../maskrcnn_efficientnetb2b.yaml | 201 +--------------- .../instance_segmentation/maskrcnn_r50.yaml | 201 +--------------- .../instance_segmentation/maskrcnn_swint.yaml | 214 +----------------- 10 files changed, 635 insertions(+), 612 deletions(-) create mode 100644 src/otx/algo/instance_segmentation/__init__.py create mode 100644 src/otx/algo/instance_segmentation/maskrcnn.py create mode 100644 src/otx/algo/instance_segmentation/mmconfigs/maskrcnn_efficientnetb2b.yaml create mode 100644 src/otx/algo/instance_segmentation/mmconfigs/maskrcnn_r50.yaml create mode 100644 src/otx/algo/instance_segmentation/mmconfigs/maskrcnn_swint.yaml diff --git a/src/otx/algo/instance_segmentation/__init__.py b/src/otx/algo/instance_segmentation/__init__.py new file mode 100644 index 00000000000..61cbaeff261 --- /dev/null +++ b/src/otx/algo/instance_segmentation/__init__.py @@ -0,0 +1,4 @@ +# Copyright (C) 2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# +"""Module for OTX instance segmentation models.""" diff --git a/src/otx/algo/instance_segmentation/maskrcnn.py b/src/otx/algo/instance_segmentation/maskrcnn.py new file mode 100644 index 00000000000..7e590f255db --- /dev/null +++ b/src/otx/algo/instance_segmentation/maskrcnn.py @@ -0,0 +1,18 @@ +# Copyright (C) 2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# +"""ATSS model implementations.""" + +from typing import Literal + +from otx.algo.utils.mmconfig import read_mmconfig +from otx.core.model.entity.instance_segmentation import MMDetInstanceSegCompatibleModel + + +class MaskRCNN(MMDetInstanceSegCompatibleModel): + """MaskRCNN Model.""" + + def __init__(self, num_classes: int, variant: Literal["efficientnetb2b", "r50", "swint"]) -> None: + model_name = f"maskrcnn_{variant}" + config = read_mmconfig(model_name=model_name) + super().__init__(num_classes=num_classes, config=config) diff --git a/src/otx/algo/instance_segmentation/mmconfigs/maskrcnn_efficientnetb2b.yaml b/src/otx/algo/instance_segmentation/mmconfigs/maskrcnn_efficientnetb2b.yaml new file mode 100644 index 00000000000..ae1e8a885ef --- /dev/null +++ b/src/otx/algo/instance_segmentation/mmconfigs/maskrcnn_efficientnetb2b.yaml @@ -0,0 +1,198 @@ +load_from: https://storage.openvinotoolkit.org/repositories/openvino_training_extensions/models/instance_segmentation/v2/efficientnet_b2b-mask_rcnn-576x576.pth +data_preprocessor: + type: "DetDataPreprocessor" + bgr_to_rgb: false + mean: + - 123.675 + - 116.28 + - 103.53 + pad_mask: true + pad_size_divisor: 32 + std: + - 1.0 + - 1.0 + - 1.0 +type: MaskRCNN +backbone: + type: efficientnet_b2b + out_indices: + - 2 + - 3 + - 4 + - 5 + frozen_stages: -1 + pretrained: true + activation_cfg: + type: torch_swish + norm_cfg: + type: BN + requires_grad: true +neck: + type: FPN + in_channels: + - 24 + - 48 + - 120 + - 352 + out_channels: 80 + num_outs: 5 +rpn_head: + type: RPNHead + in_channels: 80 + feat_channels: 80 + anchor_generator: + type: AnchorGenerator + scales: + - 8 + ratios: + - 0.5 + - 1.0 + - 2.0 + strides: + - 4 + - 8 + - 16 + - 32 + - 64 + bbox_coder: + type: DeltaXYWHBBoxCoder + target_means: + - 0.0 + - 0.0 + - 0.0 + - 0.0 + target_stds: + - 1.0 + - 1.0 + - 1.0 + - 1.0 + loss_cls: + type: CrossEntropyLoss + use_sigmoid: true + loss_weight: 1.0 + loss_bbox: + type: L1Loss + loss_weight: 1.0 +roi_head: + type: StandardRoIHead + bbox_roi_extractor: + type: SingleRoIExtractor + roi_layer: + type: RoIAlign + output_size: 7 + sampling_ratio: 0 + out_channels: 80 + featmap_strides: + - 4 + - 8 + - 16 + - 32 + bbox_head: + type: Shared2FCBBoxHead + in_channels: 80 + fc_out_channels: 1024 + roi_feat_size: 7 + num_classes: 80 + bbox_coder: + type: DeltaXYWHBBoxCoder + target_means: + - 0.0 + - 0.0 + - 0.0 + - 0.0 + target_stds: + - 0.1 + - 0.1 + - 0.2 + - 0.2 + reg_class_agnostic: false + loss_cls: + type: CrossEntropyLoss + use_sigmoid: false + loss_weight: 1.0 + loss_bbox: + type: L1Loss + loss_weight: 1.0 + mask_roi_extractor: + type: SingleRoIExtractor + roi_layer: + type: RoIAlign + output_size: 14 + sampling_ratio: 0 + out_channels: 80 + featmap_strides: + - 4 + - 8 + - 16 + - 32 + mask_head: + type: FCNMaskHead + num_convs: 4 + in_channels: 80 + conv_out_channels: 80 + num_classes: 80 + loss_mask: + type: CrossEntropyLoss + use_mask: true + loss_weight: 1.0 +train_cfg: + rpn: + assigner: + type: MaxIoUAssigner + pos_iou_thr: 0.7 + neg_iou_thr: 0.3 + min_pos_iou: 0.3 + match_low_quality: true + ignore_iof_thr: -1 + gpu_assign_thr: 300 + sampler: + type: RandomSampler + num: 256 + pos_fraction: 0.5 + neg_pos_ub: -1 + add_gt_as_proposals: false + allowed_border: -1 + pos_weight: -1 + debug: false + rpn_proposal: + nms_across_levels: false + nms_pre: 2000 + max_per_img: 1000 + nms: + type: nms + iou_threshold: 0.8 + min_bbox_size: 0 + rcnn: + assigner: + type: MaxIoUAssigner + pos_iou_thr: 0.5 + neg_iou_thr: 0.5 + min_pos_iou: 0.5 + match_low_quality: true + ignore_iof_thr: -1 + gpu_assign_thr: 300 + sampler: + type: RandomSampler + num: 256 + pos_fraction: 0.25 + neg_pos_ub: -1 + add_gt_as_proposals: true + mask_size: 28 + pos_weight: -1 + debug: false +test_cfg: + rpn: + nms_across_levels: false + nms_pre: 800 + max_per_img: 500 + nms: + type: nms + iou_threshold: 0.8 + min_bbox_size: 0 + rcnn: + score_thr: 0.05 + nms: + type: nms + iou_threshold: 0.7 + max_per_img: 500 + mask_thr_binary: 0.5 diff --git a/src/otx/algo/instance_segmentation/mmconfigs/maskrcnn_r50.yaml b/src/otx/algo/instance_segmentation/mmconfigs/maskrcnn_r50.yaml new file mode 100644 index 00000000000..7d9534e14f1 --- /dev/null +++ b/src/otx/algo/instance_segmentation/mmconfigs/maskrcnn_r50.yaml @@ -0,0 +1,198 @@ +load_from: https://download.openmmlab.com/mmdetection/v2.0/mask_rcnn/mask_rcnn_r50_fpn_mstrain-poly_3x_coco/mask_rcnn_r50_fpn_mstrain-poly_3x_coco_20210524_201154-21b550bb.pth +backbone: + depth: 50 + frozen_stages: 1 + init_cfg: + checkpoint: "torchvision://resnet50" + type: "Pretrained" + norm_cfg: + requires_grad: true + type: "BN" + norm_eval: true + num_stages: 4 + out_indices: + - 0 + - 1 + - 2 + - 3 + style: "pytorch" + type: "ResNet" +data_preprocessor: + bgr_to_rgb: false + mean: + - 123.675 + - 116.28 + - 103.53 + pad_mask: true + pad_size_divisor: 32 + std: + - 58.395 + - 57.12 + - 57.375 + type: "DetDataPreprocessor" +neck: + in_channels: + - 256 + - 512 + - 1024 + - 2048 + num_outs: 5 + out_channels: 256 + type: "FPN" +roi_head: + bbox_head: + bbox_coder: + target_means: + - 0.0 + - 0.0 + - 0.0 + - 0.0 + target_stds: + - 0.1 + - 0.1 + - 0.2 + - 0.2 + type: "DeltaXYWHBBoxCoder" + fc_out_channels: 1024 + in_channels: 256 + loss_bbox: + loss_weight: 1.0 + type: "L1Loss" + loss_cls: + loss_weight: 1.0 + type: "CrossEntropyLoss" + use_sigmoid: false + num_classes: 5 + reg_class_agnostic: false + roi_feat_size: 7 + type: "Shared2FCBBoxHead" + bbox_roi_extractor: + featmap_strides: + - 4 + - 8 + - 16 + - 32 + out_channels: 256 + roi_layer: + output_size: 7 + sampling_ratio: 0 + type: "RoIAlign" + type: "SingleRoIExtractor" + mask_head: + conv_out_channels: 256 + in_channels: 256 + loss_mask: + loss_weight: 1.0 + type: "CrossEntropyLoss" + use_mask: true + num_classes: 5 + num_convs: 4 + type: "FCNMaskHead" + mask_roi_extractor: + featmap_strides: + - 4 + - 8 + - 16 + - 32 + out_channels: 256 + roi_layer: + output_size: 14 + sampling_ratio: 0 + type: "RoIAlign" + type: "SingleRoIExtractor" + type: "StandardRoIHead" +rpn_head: + anchor_generator: + ratios: + - 0.5 + - 1.0 + - 2.0 + scales: + - 8 + strides: + - 4 + - 8 + - 16 + - 32 + - 64 + type: "AnchorGenerator" + bbox_coder: + target_means: + - 0.0 + - 0.0 + - 0.0 + - 0.0 + target_stds: + - 1.0 + - 1.0 + - 1.0 + - 1.0 + type: "DeltaXYWHBBoxCoder" + feat_channels: 256 + in_channels: 256 + loss_bbox: + loss_weight: 1.0 + type: "L1Loss" + loss_cls: + loss_weight: 1.0 + type: "CrossEntropyLoss" + use_sigmoid: true + type: "RPNHead" +test_cfg: + rcnn: + mask_thr_binary: 0.5 + max_per_img: 100 + nms: + iou_threshold: 0.5 + type: "nms" + score_thr: 0.05 + rpn: + max_per_img: 1000 + min_bbox_size: 0 + nms: + iou_threshold: 0.7 + type: "nms" + nms_pre: 1000 +train_cfg: + rcnn: + assigner: + ignore_iof_thr: -1 + match_low_quality: true + min_pos_iou: 0.5 + neg_iou_thr: 0.5 + pos_iou_thr: 0.5 + type: "MaxIoUAssigner" + debug: false + mask_size: 28 + pos_weight: -1 + sampler: + add_gt_as_proposals: true + neg_pos_ub: -1 + num: 512 + pos_fraction: 0.25 + type: "RandomSampler" + rpn: + allowed_border: -1 + assigner: + ignore_iof_thr: -1 + match_low_quality: true + min_pos_iou: 0.3 + neg_iou_thr: 0.3 + pos_iou_thr: 0.7 + type: "MaxIoUAssigner" + debug: false + pos_weight: -1 + sampler: + add_gt_as_proposals: false + neg_pos_ub: -1 + num: 256 + pos_fraction: 0.5 + type: "RandomSampler" + rpn_proposal: + max_per_img: 1000 + min_bbox_size: 0 + nms: + iou_threshold: 0.7 + type: "nms" + nms_pre: 2000 +type: "MaskRCNN" diff --git a/src/otx/algo/instance_segmentation/mmconfigs/maskrcnn_swint.yaml b/src/otx/algo/instance_segmentation/mmconfigs/maskrcnn_swint.yaml new file mode 100644 index 00000000000..a6a8459cc97 --- /dev/null +++ b/src/otx/algo/instance_segmentation/mmconfigs/maskrcnn_swint.yaml @@ -0,0 +1,211 @@ +load_from: https://download.openmmlab.com/mmdetection/v2.0/swin/mask_rcnn_swin-t-p4-w7_fpn_fp16_ms-crop-3x_coco/mask_rcnn_swin-t-p4-w7_fpn_fp16_ms-crop-3x_coco_20210908_165006-90a4008c.pth +backbone: + attn_drop_rate: 0.0 + convert_weights: true + depths: + - 2 + - 2 + - 6 + - 2 + drop_path_rate: 0.2 + drop_rate: 0.0 + embed_dims: 96 + init_cfg: + checkpoint: https://github.com/SwinTransformer/storage/releases/download/v1.0.0/swin_tiny_patch4_window7_224.pth + type: Pretrained + mlp_ratio: 4 + num_heads: + - 3 + - 6 + - 12 + - 24 + out_indices: + - 0 + - 1 + - 2 + - 3 + patch_norm: true + qk_scale: null + qkv_bias: true + type: SwinTransformer + window_size: 7 + with_cp: false +data_preprocessor: + bgr_to_rgb: false + mean: + - 123.675 + - 116.28 + - 103.53 + pad_mask: true + pad_size_divisor: 32 + std: + - 58.395 + - 57.12 + - 57.375 + type: DetDataPreprocessor +neck: + in_channels: + - 96 + - 192 + - 384 + - 768 + num_outs: 5 + out_channels: 256 + type: FPN +roi_head: + bbox_head: + bbox_coder: + target_means: + - 0.0 + - 0.0 + - 0.0 + - 0.0 + target_stds: + - 0.1 + - 0.1 + - 0.2 + - 0.2 + type: DeltaXYWHBBoxCoder + fc_out_channels: 1024 + in_channels: 256 + loss_bbox: + loss_weight: 1.0 + type: L1Loss + loss_cls: + loss_weight: 1.0 + type: CrossEntropyLoss + use_sigmoid: false + num_classes: 80 + reg_class_agnostic: false + roi_feat_size: 7 + type: Shared2FCBBoxHead + bbox_roi_extractor: + featmap_strides: + - 4 + - 8 + - 16 + - 32 + out_channels: 256 + roi_layer: + output_size: 7 + sampling_ratio: 0 + type: RoIAlign + type: SingleRoIExtractor + mask_head: + conv_out_channels: 256 + in_channels: 256 + loss_mask: + loss_weight: 1.0 + type: CrossEntropyLoss + use_mask: true + num_classes: 80 + num_convs: 4 + type: FCNMaskHead + mask_roi_extractor: + featmap_strides: + - 4 + - 8 + - 16 + - 32 + out_channels: 256 + roi_layer: + output_size: 14 + sampling_ratio: 0 + type: RoIAlign + type: SingleRoIExtractor + type: StandardRoIHead +rpn_head: + anchor_generator: + ratios: + - 0.5 + - 1.0 + - 2.0 + scales: + - 8 + strides: + - 4 + - 8 + - 16 + - 32 + - 64 + type: AnchorGenerator + bbox_coder: + target_means: + - 0.0 + - 0.0 + - 0.0 + - 0.0 + target_stds: + - 1.0 + - 1.0 + - 1.0 + - 1.0 + type: DeltaXYWHBBoxCoder + feat_channels: 256 + in_channels: 256 + loss_bbox: + loss_weight: 1.0 + type: L1Loss + loss_cls: + loss_weight: 1.0 + type: CrossEntropyLoss + use_sigmoid: true + type: RPNHead +test_cfg: + rcnn: + mask_thr_binary: 0.5 + max_per_img: 100 + nms: + iou_threshold: 0.5 + type: nms + score_thr: 0.05 + rpn: + max_per_img: 1000 + min_bbox_size: 0 + nms: + iou_threshold: 0.7 + type: nms + nms_pre: 1000 +train_cfg: + rcnn: + assigner: + ignore_iof_thr: -1 + match_low_quality: true + min_pos_iou: 0.5 + neg_iou_thr: 0.5 + pos_iou_thr: 0.5 + type: MaxIoUAssigner + debug: false + mask_size: 28 + pos_weight: -1 + sampler: + add_gt_as_proposals: true + neg_pos_ub: -1 + num: 512 + pos_fraction: 0.25 + type: RandomSampler + rpn: + allowed_border: -1 + assigner: + ignore_iof_thr: -1 + match_low_quality: true + min_pos_iou: 0.3 + neg_iou_thr: 0.3 + pos_iou_thr: 0.7 + type: MaxIoUAssigner + debug: false + pos_weight: -1 + sampler: + add_gt_as_proposals: false + neg_pos_ub: -1 + num: 256 + pos_fraction: 0.5 + type: RandomSampler + rpn_proposal: + max_per_img: 1000 + min_bbox_size: 0 + nms: + iou_threshold: 0.7 + type: nms + nms_pre: 2000 +type: MaskRCNN diff --git a/src/otx/config/model/mmdet_inst_seg.yaml b/src/otx/config/model/mmdet_inst_seg.yaml index 716e1555e39..8a1bc66deb2 100644 --- a/src/otx/config/model/mmdet_inst_seg.yaml +++ b/src/otx/config/model/mmdet_inst_seg.yaml @@ -17,7 +17,6 @@ scheduler: otx_model: _target_: otx.core.model.entity.instance_segmentation.MMDetInstanceSegCompatibleModel - config: ??? # compile model for faster training with pytorch 2.0 torch_compile: false diff --git a/src/otx/config/model/mmseg.yaml b/src/otx/config/model/mmseg.yaml index 7072fee469e..0c7f6b93b70 100644 --- a/src/otx/config/model/mmseg.yaml +++ b/src/otx/config/model/mmseg.yaml @@ -13,7 +13,6 @@ scheduler: otx_model: _target_: otx.core.model.entity.segmentation.MMSegCompatibleModel - config: ??? num_classes: ??? # compile model for faster training with pytorch 2.0 diff --git a/src/otx/recipe/instance_segmentation/maskrcnn_efficientnetb2b.yaml b/src/otx/recipe/instance_segmentation/maskrcnn_efficientnetb2b.yaml index e6088dfe1c6..446996a4a07 100644 --- a/src/otx/recipe/instance_segmentation/maskrcnn_efficientnetb2b.yaml +++ b/src/otx/recipe/instance_segmentation/maskrcnn_efficientnetb2b.yaml @@ -65,205 +65,8 @@ data: model: otx_model: - config: - load_from: https://storage.openvinotoolkit.org/repositories/openvino_training_extensions/models/instance_segmentation/v2/efficientnet_b2b-mask_rcnn-576x576.pth - data_preprocessor: - type: "DetDataPreprocessor" - bgr_to_rgb: false - mean: - - 123.675 - - 116.28 - - 103.53 - pad_mask: true - pad_size_divisor: 32 - std: - - 1.0 - - 1.0 - - 1.0 - type: MaskRCNN - backbone: - type: efficientnet_b2b - out_indices: - - 2 - - 3 - - 4 - - 5 - frozen_stages: -1 - pretrained: true - activation_cfg: - type: torch_swish - norm_cfg: - type: BN - requires_grad: true - neck: - type: FPN - in_channels: - - 24 - - 48 - - 120 - - 352 - out_channels: 80 - num_outs: 5 - rpn_head: - type: RPNHead - in_channels: 80 - feat_channels: 80 - anchor_generator: - type: AnchorGenerator - scales: - - 8 - ratios: - - 0.5 - - 1.0 - - 2.0 - strides: - - 4 - - 8 - - 16 - - 32 - - 64 - bbox_coder: - type: DeltaXYWHBBoxCoder - target_means: - - 0.0 - - 0.0 - - 0.0 - - 0.0 - target_stds: - - 1.0 - - 1.0 - - 1.0 - - 1.0 - loss_cls: - type: CrossEntropyLoss - use_sigmoid: true - loss_weight: 1.0 - loss_bbox: - type: L1Loss - loss_weight: 1.0 - roi_head: - type: StandardRoIHead - bbox_roi_extractor: - type: SingleRoIExtractor - roi_layer: - type: RoIAlign - output_size: 7 - sampling_ratio: 0 - out_channels: 80 - featmap_strides: - - 4 - - 8 - - 16 - - 32 - bbox_head: - type: Shared2FCBBoxHead - in_channels: 80 - fc_out_channels: 1024 - roi_feat_size: 7 - num_classes: 80 - bbox_coder: - type: DeltaXYWHBBoxCoder - target_means: - - 0.0 - - 0.0 - - 0.0 - - 0.0 - target_stds: - - 0.1 - - 0.1 - - 0.2 - - 0.2 - reg_class_agnostic: false - loss_cls: - type: CrossEntropyLoss - use_sigmoid: false - loss_weight: 1.0 - loss_bbox: - type: L1Loss - loss_weight: 1.0 - mask_roi_extractor: - type: SingleRoIExtractor - roi_layer: - type: RoIAlign - output_size: 14 - sampling_ratio: 0 - out_channels: 80 - featmap_strides: - - 4 - - 8 - - 16 - - 32 - mask_head: - type: FCNMaskHead - num_convs: 4 - in_channels: 80 - conv_out_channels: 80 - num_classes: 80 - loss_mask: - type: CrossEntropyLoss - use_mask: true - loss_weight: 1.0 - train_cfg: - rpn: - assigner: - type: MaxIoUAssigner - pos_iou_thr: 0.7 - neg_iou_thr: 0.3 - min_pos_iou: 0.3 - match_low_quality: true - ignore_iof_thr: -1 - gpu_assign_thr: 300 - sampler: - type: RandomSampler - num: 256 - pos_fraction: 0.5 - neg_pos_ub: -1 - add_gt_as_proposals: false - allowed_border: -1 - pos_weight: -1 - debug: false - rpn_proposal: - nms_across_levels: false - nms_pre: 2000 - max_per_img: 1000 - nms: - type: nms - iou_threshold: 0.8 - min_bbox_size: 0 - rcnn: - assigner: - type: MaxIoUAssigner - pos_iou_thr: 0.5 - neg_iou_thr: 0.5 - min_pos_iou: 0.5 - match_low_quality: true - ignore_iof_thr: -1 - gpu_assign_thr: 300 - sampler: - type: RandomSampler - num: 256 - pos_fraction: 0.25 - neg_pos_ub: -1 - add_gt_as_proposals: true - mask_size: 28 - pos_weight: -1 - debug: false - test_cfg: - rpn: - nms_across_levels: false - nms_pre: 800 - max_per_img: 500 - nms: - type: nms - iou_threshold: 0.8 - min_bbox_size: 0 - rcnn: - score_thr: 0.05 - nms: - type: nms - iou_threshold: 0.7 - max_per_img: 500 - mask_thr_binary: 0.5 + _target_: otx.algo.instance_segmentation.maskrcnn.MaskRCNN + variant: efficientnetb2b optimizer: lr: 0.007 weight_decay: 0.001 diff --git a/src/otx/recipe/instance_segmentation/maskrcnn_r50.yaml b/src/otx/recipe/instance_segmentation/maskrcnn_r50.yaml index a0b01837d0e..03181a9a483 100644 --- a/src/otx/recipe/instance_segmentation/maskrcnn_r50.yaml +++ b/src/otx/recipe/instance_segmentation/maskrcnn_r50.yaml @@ -65,205 +65,8 @@ data: model: otx_model: - config: - load_from: https://download.openmmlab.com/mmdetection/v2.0/mask_rcnn/mask_rcnn_r50_fpn_mstrain-poly_3x_coco/mask_rcnn_r50_fpn_mstrain-poly_3x_coco_20210524_201154-21b550bb.pth - backbone: - depth: 50 - frozen_stages: 1 - init_cfg: - checkpoint: "torchvision://resnet50" - type: "Pretrained" - norm_cfg: - requires_grad: true - type: "BN" - norm_eval: true - num_stages: 4 - out_indices: - - 0 - - 1 - - 2 - - 3 - style: "pytorch" - type: "ResNet" - data_preprocessor: - bgr_to_rgb: false - mean: - - 123.675 - - 116.28 - - 103.53 - pad_mask: true - pad_size_divisor: 32 - std: - - 58.395 - - 57.12 - - 57.375 - type: "DetDataPreprocessor" - neck: - in_channels: - - 256 - - 512 - - 1024 - - 2048 - num_outs: 5 - out_channels: 256 - type: "FPN" - roi_head: - bbox_head: - bbox_coder: - target_means: - - 0.0 - - 0.0 - - 0.0 - - 0.0 - target_stds: - - 0.1 - - 0.1 - - 0.2 - - 0.2 - type: "DeltaXYWHBBoxCoder" - fc_out_channels: 1024 - in_channels: 256 - loss_bbox: - loss_weight: 1.0 - type: "L1Loss" - loss_cls: - loss_weight: 1.0 - type: "CrossEntropyLoss" - use_sigmoid: false - num_classes: 5 - reg_class_agnostic: false - roi_feat_size: 7 - type: "Shared2FCBBoxHead" - bbox_roi_extractor: - featmap_strides: - - 4 - - 8 - - 16 - - 32 - out_channels: 256 - roi_layer: - output_size: 7 - sampling_ratio: 0 - type: "RoIAlign" - type: "SingleRoIExtractor" - mask_head: - conv_out_channels: 256 - in_channels: 256 - loss_mask: - loss_weight: 1.0 - type: "CrossEntropyLoss" - use_mask: true - num_classes: 5 - num_convs: 4 - type: "FCNMaskHead" - mask_roi_extractor: - featmap_strides: - - 4 - - 8 - - 16 - - 32 - out_channels: 256 - roi_layer: - output_size: 14 - sampling_ratio: 0 - type: "RoIAlign" - type: "SingleRoIExtractor" - type: "StandardRoIHead" - rpn_head: - anchor_generator: - ratios: - - 0.5 - - 1.0 - - 2.0 - scales: - - 8 - strides: - - 4 - - 8 - - 16 - - 32 - - 64 - type: "AnchorGenerator" - bbox_coder: - target_means: - - 0.0 - - 0.0 - - 0.0 - - 0.0 - target_stds: - - 1.0 - - 1.0 - - 1.0 - - 1.0 - type: "DeltaXYWHBBoxCoder" - feat_channels: 256 - in_channels: 256 - loss_bbox: - loss_weight: 1.0 - type: "L1Loss" - loss_cls: - loss_weight: 1.0 - type: "CrossEntropyLoss" - use_sigmoid: true - type: "RPNHead" - test_cfg: - rcnn: - mask_thr_binary: 0.5 - max_per_img: 100 - nms: - iou_threshold: 0.5 - type: "nms" - score_thr: 0.05 - rpn: - max_per_img: 1000 - min_bbox_size: 0 - nms: - iou_threshold: 0.7 - type: "nms" - nms_pre: 1000 - train_cfg: - rcnn: - assigner: - ignore_iof_thr: -1 - match_low_quality: true - min_pos_iou: 0.5 - neg_iou_thr: 0.5 - pos_iou_thr: 0.5 - type: "MaxIoUAssigner" - debug: false - mask_size: 28 - pos_weight: -1 - sampler: - add_gt_as_proposals: true - neg_pos_ub: -1 - num: 512 - pos_fraction: 0.25 - type: "RandomSampler" - rpn: - allowed_border: -1 - assigner: - ignore_iof_thr: -1 - match_low_quality: true - min_pos_iou: 0.3 - neg_iou_thr: 0.3 - pos_iou_thr: 0.7 - type: "MaxIoUAssigner" - debug: false - pos_weight: -1 - sampler: - add_gt_as_proposals: false - neg_pos_ub: -1 - num: 256 - pos_fraction: 0.5 - type: "RandomSampler" - rpn_proposal: - max_per_img: 1000 - min_bbox_size: 0 - nms: - iou_threshold: 0.7 - type: "nms" - nms_pre: 2000 - type: "MaskRCNN" + _target_: otx.algo.instance_segmentation.maskrcnn.MaskRCNN + variant: r50 optimizer: lr: 0.007 weight_decay: 0.001 diff --git a/src/otx/recipe/instance_segmentation/maskrcnn_swint.yaml b/src/otx/recipe/instance_segmentation/maskrcnn_swint.yaml index b6fa877887d..f933b053765 100644 --- a/src/otx/recipe/instance_segmentation/maskrcnn_swint.yaml +++ b/src/otx/recipe/instance_segmentation/maskrcnn_swint.yaml @@ -65,218 +65,8 @@ data: model: otx_model: - config: - load_from: https://download.openmmlab.com/mmdetection/v2.0/swin/mask_rcnn_swin-t-p4-w7_fpn_fp16_ms-crop-3x_coco/mask_rcnn_swin-t-p4-w7_fpn_fp16_ms-crop-3x_coco_20210908_165006-90a4008c.pth - backbone: - attn_drop_rate: 0.0 - convert_weights: true - depths: - - 2 - - 2 - - 6 - - 2 - drop_path_rate: 0.2 - drop_rate: 0.0 - embed_dims: 96 - init_cfg: - checkpoint: https://github.com/SwinTransformer/storage/releases/download/v1.0.0/swin_tiny_patch4_window7_224.pth - type: Pretrained - mlp_ratio: 4 - num_heads: - - 3 - - 6 - - 12 - - 24 - out_indices: - - 0 - - 1 - - 2 - - 3 - patch_norm: true - qk_scale: null - qkv_bias: true - type: SwinTransformer - window_size: 7 - with_cp: false - data_preprocessor: - bgr_to_rgb: false - mean: - - 123.675 - - 116.28 - - 103.53 - pad_mask: true - pad_size_divisor: 32 - std: - - 58.395 - - 57.12 - - 57.375 - type: DetDataPreprocessor - neck: - in_channels: - - 96 - - 192 - - 384 - - 768 - num_outs: 5 - out_channels: 256 - type: FPN - roi_head: - bbox_head: - bbox_coder: - target_means: - - 0.0 - - 0.0 - - 0.0 - - 0.0 - target_stds: - - 0.1 - - 0.1 - - 0.2 - - 0.2 - type: DeltaXYWHBBoxCoder - fc_out_channels: 1024 - in_channels: 256 - loss_bbox: - loss_weight: 1.0 - type: L1Loss - loss_cls: - loss_weight: 1.0 - type: CrossEntropyLoss - use_sigmoid: false - num_classes: 80 - reg_class_agnostic: false - roi_feat_size: 7 - type: Shared2FCBBoxHead - bbox_roi_extractor: - featmap_strides: - - 4 - - 8 - - 16 - - 32 - out_channels: 256 - roi_layer: - output_size: 7 - sampling_ratio: 0 - type: RoIAlign - type: SingleRoIExtractor - mask_head: - conv_out_channels: 256 - in_channels: 256 - loss_mask: - loss_weight: 1.0 - type: CrossEntropyLoss - use_mask: true - num_classes: 80 - num_convs: 4 - type: FCNMaskHead - mask_roi_extractor: - featmap_strides: - - 4 - - 8 - - 16 - - 32 - out_channels: 256 - roi_layer: - output_size: 14 - sampling_ratio: 0 - type: RoIAlign - type: SingleRoIExtractor - type: StandardRoIHead - rpn_head: - anchor_generator: - ratios: - - 0.5 - - 1.0 - - 2.0 - scales: - - 8 - strides: - - 4 - - 8 - - 16 - - 32 - - 64 - type: AnchorGenerator - bbox_coder: - target_means: - - 0.0 - - 0.0 - - 0.0 - - 0.0 - target_stds: - - 1.0 - - 1.0 - - 1.0 - - 1.0 - type: DeltaXYWHBBoxCoder - feat_channels: 256 - in_channels: 256 - loss_bbox: - loss_weight: 1.0 - type: L1Loss - loss_cls: - loss_weight: 1.0 - type: CrossEntropyLoss - use_sigmoid: true - type: RPNHead - test_cfg: - rcnn: - mask_thr_binary: 0.5 - max_per_img: 100 - nms: - iou_threshold: 0.5 - type: nms - score_thr: 0.05 - rpn: - max_per_img: 1000 - min_bbox_size: 0 - nms: - iou_threshold: 0.7 - type: nms - nms_pre: 1000 - train_cfg: - rcnn: - assigner: - ignore_iof_thr: -1 - match_low_quality: true - min_pos_iou: 0.5 - neg_iou_thr: 0.5 - pos_iou_thr: 0.5 - type: MaxIoUAssigner - debug: false - mask_size: 28 - pos_weight: -1 - sampler: - add_gt_as_proposals: true - neg_pos_ub: -1 - num: 512 - pos_fraction: 0.25 - type: RandomSampler - rpn: - allowed_border: -1 - assigner: - ignore_iof_thr: -1 - match_low_quality: true - min_pos_iou: 0.3 - neg_iou_thr: 0.3 - pos_iou_thr: 0.7 - type: MaxIoUAssigner - debug: false - pos_weight: -1 - sampler: - add_gt_as_proposals: false - neg_pos_ub: -1 - num: 256 - pos_fraction: 0.5 - type: RandomSampler - rpn_proposal: - max_per_img: 1000 - min_bbox_size: 0 - nms: - iou_threshold: 0.7 - type: nms - nms_pre: 2000 - type: MaskRCNN + _target_: otx.algo.instance_segmentation.maskrcnn.MaskRCNN + variant: swint optimizer: _target_: torch.optim.AdamW _partial_: true