Refactoring instance segmentation modules (#3696)

* Remove mmdet and torchvision directory & Refactor duplicated functions * Update docstring * Remove DictConfig
openvinotoolkit · Jul 2, 2024 · 6281089 · 6281089
1 parent 7cdb154
commit 6281089
Show file tree

Hide file tree

Showing 60 changed files with 1,037 additions and 1,587 deletions.
diff --git a/src/otx/algo/common/backbones/cspnext.py b/src/otx/algo/common/backbones/cspnext.py
@@ -11,7 +11,7 @@
 import math
 from typing import ClassVar
 
-from otx.algo.detection.backbones.csp_darknet import SPPBottleneck  # TODO (sungchul): move csp_darknet to common?
+from otx.algo.common.layers import SPPBottleneck
 from otx.algo.detection.layers import CSPLayer
 from otx.algo.modules.base_module import BaseModule
 from otx.algo.modules.conv_module import ConvModule

diff --git a/src/otx/algo/common/layers/__init__.py b/src/otx/algo/common/layers/__init__.py
@@ -4,5 +4,6 @@
 """Custom layer implementations."""
 
 from .res_layer import ResLayer
+from .spp_layer import SPPBottleneck
 
-__all__ = ["ResLayer"]
+__all__ = ["ResLayer", "SPPBottleneck"]
diff --git a/src/otx/algo/common/layers/res_layer.py b/src/otx/algo/common/layers/res_layer.py
@@ -8,16 +8,11 @@
 
 from __future__ import annotations
 
-from typing import TYPE_CHECKING
-
 from otx.algo.modules.base_module import BaseModule, Sequential
 from otx.algo.modules.conv import build_conv_layer
 from otx.algo.modules.norm import build_norm_layer
 from torch import nn
 
-if TYPE_CHECKING:
-    from omegaconf import DictConfig
-
 
 class ResLayer(Sequential):
     """ResLayer to build ResNet style backbone.
@@ -47,7 +42,7 @@ def __init__(
         norm_cfg: dict,
         stride: int = 1,
         avg_down: bool = False,
-        conv_cfg: DictConfig | dict | None = None,
+        conv_cfg: dict | None = None,
         downsample_first: bool = True,
         **kwargs,
     ) -> None:

diff --git a/src/otx/algo/common/layers/spp_layer.py b/src/otx/algo/common/layers/spp_layer.py
@@ -0,0 +1,68 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+# Copyright (c) OpenMMLab. All rights reserved.
+"""Implementation modified from mmdet.models.backbones.csp_darknet.py.
+
+Reference : https://github.com/open-mmlab/mmdetection/blob/v3.2.0/mmdet/models/backbones/csp_darknet.py
+"""
+
+from __future__ import annotations
+
+import torch
+from otx.algo.modules.base_module import BaseModule
+from otx.algo.modules.conv_module import ConvModule
+from torch import Tensor, nn
+
+
+class SPPBottleneck(BaseModule):
+    """Spatial pyramid pooling layer used in YOLOv3-SPP.
+
+    Args:
+        in_channels (int): The input channels of this Module.
+        out_channels (int): The output channels of this Module.
+        kernel_sizes (tuple[int]): Sequential of kernel sizes of pooling
+            layers. Default: (5, 9, 13).
+        conv_cfg (dict): Config dict for convolution layer. Default: None,
+            which means using conv2d.
+        norm_cfg (dict): Config dict for normalization layer.
+            Default: dict(type='BN').
+        act_cfg (dict): Config dict for activation layer.
+            Default: dict(type='Swish').
+        init_cfg (dict, list[dict], optional): Initialization config dict.
+            Default: None.
+    """
+
+    def __init__(
+        self,
+        in_channels: int,
+        out_channels: int,
+        kernel_sizes: tuple[int, ...] = (5, 9, 13),
+        conv_cfg: dict | None = None,
+        norm_cfg: dict | None = None,
+        act_cfg: dict | None = None,
+        init_cfg: dict | list[dict] | None = None,
+    ):
+        super().__init__(init_cfg=init_cfg)
+        norm_cfg = norm_cfg or {"type": "BN", "momentum": 0.03, "eps": 0.001}
+        act_cfg = act_cfg or {"type": "Swish"}
+
+        mid_channels = in_channels // 2
+        self.conv1 = ConvModule(
+            in_channels,
+            mid_channels,
+            1,
+            stride=1,
+            conv_cfg=conv_cfg,
+            norm_cfg=norm_cfg,
+            act_cfg=act_cfg,
+        )
+        self.poolings = nn.ModuleList([nn.MaxPool2d(kernel_size=ks, stride=1, padding=ks // 2) for ks in kernel_sizes])
+        conv2_channels = mid_channels * (len(kernel_sizes) + 1)
+        self.conv2 = ConvModule(conv2_channels, out_channels, 1, conv_cfg=conv_cfg, norm_cfg=norm_cfg, act_cfg=act_cfg)
+
+    def forward(self, x: Tensor) -> Tensor:
+        """Forward."""
+        x = self.conv1(x)
+        with torch.cuda.amp.autocast(enabled=False):
+            x = torch.cat([x] + [pooling(x) for pooling in self.poolings], dim=1)
+        return self.conv2(x)
diff --git a/src/otx/algo/detection/backbones/csp_darknet.py b/src/otx/algo/detection/backbones/csp_darknet.py
@@ -15,6 +15,7 @@
 from torch import Tensor, nn
 from torch.nn.modules.batchnorm import _BatchNorm
 
+from otx.algo.common.layers import SPPBottleneck
 from otx.algo.detection.layers import CSPLayer
 from otx.algo.modules.base_module import BaseModule
 from otx.algo.modules.conv_module import ConvModule
@@ -93,60 +94,6 @@ def export(self, x: Tensor) -> Tensor:
         return self.conv(x)
 
 
-class SPPBottleneck(BaseModule):
-    """Spatial pyramid pooling layer used in YOLOv3-SPP.
-
-    Args:
-        in_channels (int): The input channels of this Module.
-        out_channels (int): The output channels of this Module.
-        kernel_sizes (tuple[int]): Sequential of kernel sizes of pooling
-            layers. Default: (5, 9, 13).
-        conv_cfg (dict): Config dict for convolution layer. Default: None,
-            which means using conv2d.
-        norm_cfg (dict): Config dict for normalization layer.
-            Default: dict(type='BN').
-        act_cfg (dict): Config dict for activation layer.
-            Default: dict(type='Swish').
-        init_cfg (dict, list[dict], optional): Initialization config dict.
-            Default: None.
-    """
-
-    def __init__(
-        self,
-        in_channels: int,
-        out_channels: int,
-        kernel_sizes: tuple[int, ...] = (5, 9, 13),
-        conv_cfg: dict | None = None,
-        norm_cfg: dict | None = None,
-        act_cfg: dict | None = None,
-        init_cfg: dict | list[dict] | None = None,
-    ):
-        super().__init__(init_cfg=init_cfg)
-        norm_cfg = norm_cfg or {"type": "BN", "momentum": 0.03, "eps": 0.001}
-        act_cfg = act_cfg or {"type": "Swish"}
-
-        mid_channels = in_channels // 2
-        self.conv1 = ConvModule(
-            in_channels,
-            mid_channels,
-            1,
-            stride=1,
-            conv_cfg=conv_cfg,
-            norm_cfg=norm_cfg,
-            act_cfg=act_cfg,
-        )
-        self.poolings = nn.ModuleList([nn.MaxPool2d(kernel_size=ks, stride=1, padding=ks // 2) for ks in kernel_sizes])
-        conv2_channels = mid_channels * (len(kernel_sizes) + 1)
-        self.conv2 = ConvModule(conv2_channels, out_channels, 1, conv_cfg=conv_cfg, norm_cfg=norm_cfg, act_cfg=act_cfg)
-
-    def forward(self, x: Tensor) -> Tensor:
-        """Forward."""
-        x = self.conv1(x)
-        with torch.cuda.amp.autocast(enabled=False):
-            x = torch.cat([x] + [pooling(x) for pooling in self.poolings], dim=1)
-        return self.conv2(x)
-
-
 class CSPDarknet(BaseModule):
     """CSP-Darknet backbone used in YOLOv5 and YOLOX.
 

diff --git a/src/otx/algo/detection/heads/__init__.py b/src/otx/algo/detection/heads/__init__.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2023 Intel Corporation
+# Copyright (C) 2023-2024 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 """Custom head implementations for detection task."""
 

diff --git a/src/otx/algo/detection/layers/channel_attention_layer.py b/src/otx/algo/detection/layers/channel_attention_layer.py
@@ -5,16 +5,11 @@
 
 from __future__ import annotations
 
-from typing import TYPE_CHECKING
-
 import torch
 from torch import Tensor, nn
 
 from otx.algo.modules.base_module import BaseModule
 
-if TYPE_CHECKING:
-    from omegaconf import DictConfig
-
 
 class ChannelAttention(BaseModule):
     """Channel attention Module.
@@ -28,7 +23,7 @@ class ChannelAttention(BaseModule):
     def __init__(
         self,
         channels: int,
-        init_cfg: DictConfig | dict | list[DictConfig] | list[dict] | None = None,
+        init_cfg: dict | list[dict] | None = None,
     ) -> None:
         super().__init__(init_cfg=init_cfg)
 

diff --git a/src/otx/algo/detection/layers/csp_layer.py b/src/otx/algo/detection/layers/csp_layer.py
@@ -5,8 +5,6 @@
 
 from __future__ import annotations
 
-from typing import TYPE_CHECKING
-
 import torch
 from torch import Tensor, nn
 
@@ -15,9 +13,6 @@
 from otx.algo.modules.conv_module import ConvModule
 from otx.algo.modules.depthwise_separable_conv_module import DepthwiseSeparableConvModule
 
-if TYPE_CHECKING:
-    from omegaconf import DictConfig
-
 
 class DarknetBottleneck(BaseModule):
     """The basic bottleneck block used in Darknet.
@@ -51,10 +46,10 @@ def __init__(
         expansion: float = 0.5,
         add_identity: bool = True,
         use_depthwise: bool = False,
-        conv_cfg: DictConfig | dict | None = None,
-        norm_cfg: DictConfig | dict | None = None,
-        act_cfg: DictConfig | dict | None = None,
-        init_cfg: DictConfig | dict | list[DictConfig] | list[dict] | None = None,
+        conv_cfg: dict | None = None,
+        norm_cfg: dict | None = None,
+        act_cfg: dict | None = None,
+        init_cfg: dict | list[dict] | None = None,
     ) -> None:
         if norm_cfg is None:
             norm_cfg = {"type": "BN", "momentum": 0.03, "eps": 0.001}
@@ -109,8 +104,7 @@ class CSPNeXtBlock(BaseModule):
             Defaults to dict(type='BN', momentum=0.03, eps=0.001).
         act_cfg (dict): Config dict for activation layer.
             Defaults to dict(type='SiLU').
-        init_cfg (:obj:`DictConfig` or dict or list[dict] or
-            list[:obj:`DictConfig`], optional): Initialization config dict.
+        init_cfg (dict or list[dict], optional): Initialization config dict.
             Defaults to None.
     """
 
@@ -122,10 +116,10 @@ def __init__(
         add_identity: bool = True,
         use_depthwise: bool = False,
         kernel_size: int = 5,
-        conv_cfg: DictConfig | dict | None = None,
-        norm_cfg: DictConfig | dict | None = None,
-        act_cfg: DictConfig | dict | None = None,
-        init_cfg: DictConfig | dict | list[DictConfig] | list[dict] | None = None,
+        conv_cfg: dict | None = None,
+        norm_cfg: dict | None = None,
+        act_cfg: dict | None = None,
+        init_cfg: dict | list[dict] | None = None,
     ) -> None:
         if norm_cfg is None:
             norm_cfg = {"type": "BN", "momentum": 0.03, "eps": 0.001}
@@ -184,8 +178,7 @@ class CSPLayer(BaseModule):
             Defaults to dict(type='BN')
         act_cfg (dict): Config dict for activation layer.
             Defaults to dict(type='Swish')
-        init_cfg (:obj:`DictConfig` or dict or list[dict] or
-            list[:obj:`DictConfig`], optional): Initialization config dict.
+        init_cfg (dict or list[dict], optional): Initialization config dict.
             Defaults to None.
     """
 
@@ -199,10 +192,10 @@ def __init__(
         use_depthwise: bool = False,
         use_cspnext_block: bool = False,
         channel_attention: bool = False,
-        conv_cfg: DictConfig | dict | None = None,
-        norm_cfg: DictConfig | dict | None = None,
-        act_cfg: DictConfig | dict | None = None,
-        init_cfg: DictConfig | dict | list[DictConfig] | list[dict] | None = None,
+        conv_cfg: dict | None = None,
+        norm_cfg: dict | None = None,
+        act_cfg: dict | None = None,
+        init_cfg: dict | list[dict] | None = None,
     ) -> None:
         if norm_cfg is None:
             norm_cfg = {"type": "BN", "momentum": 0.03, "eps": 0.001}

diff --git a/src/otx/algo/detection/ssd.py b/src/otx/algo/detection/ssd.py
@@ -1,7 +1,11 @@
 # Copyright (C) 2023-2024 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
-#
-"""SSD object detector for the OTX detection."""
+# Copyright (c) OpenMMLab. All rights reserved.
+"""SSD object detector for the OTX detection.
+
+Implementation modified from mmdet.models.detectors.single_stage.
+Reference : https://github.com/open-mmlab/mmdetection/blob/v3.2.0/mmdet/models/detectors/single_stage.py
+"""
 
 from __future__ import annotations
 
@@ -34,10 +38,8 @@
 logger = logging.getLogger()
 
 
-# This class and its supporting functions below lightly adapted from the mmdet SingleStageDetector available at:
-# https://github.com/open-mmlab/mmdetection/blob/cfd5d3a985b0249de009b67d04f37263e11cdf3d/mmdet/models/detectors/single_stage.py
 class SingleStageDetector(BaseModule):
-    """Single stage detector implementation from mmdet."""
+    """Single stage detector implementation."""
 
     def __init__(
         self,