Skip to content

Commit

Permalink
Refactoring instance segmentation modules (#3696)
Browse files Browse the repository at this point in the history
* Remove mmdet and torchvision directory & Refactor duplicated functions
* Update docstring
* Remove DictConfig
  • Loading branch information
sungchul2 authored Jul 2, 2024
1 parent 7cdb154 commit 6281089
Show file tree
Hide file tree
Showing 60 changed files with 1,037 additions and 1,587 deletions.
2 changes: 1 addition & 1 deletion src/otx/algo/common/backbones/cspnext.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
import math
from typing import ClassVar

from otx.algo.detection.backbones.csp_darknet import SPPBottleneck # TODO (sungchul): move csp_darknet to common?
from otx.algo.common.layers import SPPBottleneck
from otx.algo.detection.layers import CSPLayer
from otx.algo.modules.base_module import BaseModule
from otx.algo.modules.conv_module import ConvModule
Expand Down
3 changes: 2 additions & 1 deletion src/otx/algo/common/layers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,5 +4,6 @@
"""Custom layer implementations."""

from .res_layer import ResLayer
from .spp_layer import SPPBottleneck

__all__ = ["ResLayer"]
__all__ = ["ResLayer", "SPPBottleneck"]
7 changes: 1 addition & 6 deletions src/otx/algo/common/layers/res_layer.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,16 +8,11 @@

from __future__ import annotations

from typing import TYPE_CHECKING

from otx.algo.modules.base_module import BaseModule, Sequential
from otx.algo.modules.conv import build_conv_layer
from otx.algo.modules.norm import build_norm_layer
from torch import nn

if TYPE_CHECKING:
from omegaconf import DictConfig


class ResLayer(Sequential):
"""ResLayer to build ResNet style backbone.
Expand Down Expand Up @@ -47,7 +42,7 @@ def __init__(
norm_cfg: dict,
stride: int = 1,
avg_down: bool = False,
conv_cfg: DictConfig | dict | None = None,
conv_cfg: dict | None = None,
downsample_first: bool = True,
**kwargs,
) -> None:
Expand Down
68 changes: 68 additions & 0 deletions src/otx/algo/common/layers/spp_layer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
# Copyright (c) OpenMMLab. All rights reserved.
"""Implementation modified from mmdet.models.backbones.csp_darknet.py.
Reference : https://github.com/open-mmlab/mmdetection/blob/v3.2.0/mmdet/models/backbones/csp_darknet.py
"""

from __future__ import annotations

import torch
from otx.algo.modules.base_module import BaseModule
from otx.algo.modules.conv_module import ConvModule
from torch import Tensor, nn


class SPPBottleneck(BaseModule):
"""Spatial pyramid pooling layer used in YOLOv3-SPP.
Args:
in_channels (int): The input channels of this Module.
out_channels (int): The output channels of this Module.
kernel_sizes (tuple[int]): Sequential of kernel sizes of pooling
layers. Default: (5, 9, 13).
conv_cfg (dict): Config dict for convolution layer. Default: None,
which means using conv2d.
norm_cfg (dict): Config dict for normalization layer.
Default: dict(type='BN').
act_cfg (dict): Config dict for activation layer.
Default: dict(type='Swish').
init_cfg (dict, list[dict], optional): Initialization config dict.
Default: None.
"""

def __init__(
self,
in_channels: int,
out_channels: int,
kernel_sizes: tuple[int, ...] = (5, 9, 13),
conv_cfg: dict | None = None,
norm_cfg: dict | None = None,
act_cfg: dict | None = None,
init_cfg: dict | list[dict] | None = None,
):
super().__init__(init_cfg=init_cfg)
norm_cfg = norm_cfg or {"type": "BN", "momentum": 0.03, "eps": 0.001}
act_cfg = act_cfg or {"type": "Swish"}

mid_channels = in_channels // 2
self.conv1 = ConvModule(
in_channels,
mid_channels,
1,
stride=1,
conv_cfg=conv_cfg,
norm_cfg=norm_cfg,
act_cfg=act_cfg,
)
self.poolings = nn.ModuleList([nn.MaxPool2d(kernel_size=ks, stride=1, padding=ks // 2) for ks in kernel_sizes])
conv2_channels = mid_channels * (len(kernel_sizes) + 1)
self.conv2 = ConvModule(conv2_channels, out_channels, 1, conv_cfg=conv_cfg, norm_cfg=norm_cfg, act_cfg=act_cfg)

def forward(self, x: Tensor) -> Tensor:
"""Forward."""
x = self.conv1(x)
with torch.cuda.amp.autocast(enabled=False):
x = torch.cat([x] + [pooling(x) for pooling in self.poolings], dim=1)
return self.conv2(x)
55 changes: 1 addition & 54 deletions src/otx/algo/detection/backbones/csp_darknet.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
from torch import Tensor, nn
from torch.nn.modules.batchnorm import _BatchNorm

from otx.algo.common.layers import SPPBottleneck
from otx.algo.detection.layers import CSPLayer
from otx.algo.modules.base_module import BaseModule
from otx.algo.modules.conv_module import ConvModule
Expand Down Expand Up @@ -93,60 +94,6 @@ def export(self, x: Tensor) -> Tensor:
return self.conv(x)


class SPPBottleneck(BaseModule):
"""Spatial pyramid pooling layer used in YOLOv3-SPP.
Args:
in_channels (int): The input channels of this Module.
out_channels (int): The output channels of this Module.
kernel_sizes (tuple[int]): Sequential of kernel sizes of pooling
layers. Default: (5, 9, 13).
conv_cfg (dict): Config dict for convolution layer. Default: None,
which means using conv2d.
norm_cfg (dict): Config dict for normalization layer.
Default: dict(type='BN').
act_cfg (dict): Config dict for activation layer.
Default: dict(type='Swish').
init_cfg (dict, list[dict], optional): Initialization config dict.
Default: None.
"""

def __init__(
self,
in_channels: int,
out_channels: int,
kernel_sizes: tuple[int, ...] = (5, 9, 13),
conv_cfg: dict | None = None,
norm_cfg: dict | None = None,
act_cfg: dict | None = None,
init_cfg: dict | list[dict] | None = None,
):
super().__init__(init_cfg=init_cfg)
norm_cfg = norm_cfg or {"type": "BN", "momentum": 0.03, "eps": 0.001}
act_cfg = act_cfg or {"type": "Swish"}

mid_channels = in_channels // 2
self.conv1 = ConvModule(
in_channels,
mid_channels,
1,
stride=1,
conv_cfg=conv_cfg,
norm_cfg=norm_cfg,
act_cfg=act_cfg,
)
self.poolings = nn.ModuleList([nn.MaxPool2d(kernel_size=ks, stride=1, padding=ks // 2) for ks in kernel_sizes])
conv2_channels = mid_channels * (len(kernel_sizes) + 1)
self.conv2 = ConvModule(conv2_channels, out_channels, 1, conv_cfg=conv_cfg, norm_cfg=norm_cfg, act_cfg=act_cfg)

def forward(self, x: Tensor) -> Tensor:
"""Forward."""
x = self.conv1(x)
with torch.cuda.amp.autocast(enabled=False):
x = torch.cat([x] + [pooling(x) for pooling in self.poolings], dim=1)
return self.conv2(x)


class CSPDarknet(BaseModule):
"""CSP-Darknet backbone used in YOLOv5 and YOLOX.
Expand Down
2 changes: 1 addition & 1 deletion src/otx/algo/detection/heads/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (C) 2023 Intel Corporation
# Copyright (C) 2023-2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
"""Custom head implementations for detection task."""

Expand Down
7 changes: 1 addition & 6 deletions src/otx/algo/detection/layers/channel_attention_layer.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,16 +5,11 @@

from __future__ import annotations

from typing import TYPE_CHECKING

import torch
from torch import Tensor, nn

from otx.algo.modules.base_module import BaseModule

if TYPE_CHECKING:
from omegaconf import DictConfig


class ChannelAttention(BaseModule):
"""Channel attention Module.
Expand All @@ -28,7 +23,7 @@ class ChannelAttention(BaseModule):
def __init__(
self,
channels: int,
init_cfg: DictConfig | dict | list[DictConfig] | list[dict] | None = None,
init_cfg: dict | list[dict] | None = None,
) -> None:
super().__init__(init_cfg=init_cfg)

Expand Down
35 changes: 14 additions & 21 deletions src/otx/algo/detection/layers/csp_layer.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,6 @@

from __future__ import annotations

from typing import TYPE_CHECKING

import torch
from torch import Tensor, nn

Expand All @@ -15,9 +13,6 @@
from otx.algo.modules.conv_module import ConvModule
from otx.algo.modules.depthwise_separable_conv_module import DepthwiseSeparableConvModule

if TYPE_CHECKING:
from omegaconf import DictConfig


class DarknetBottleneck(BaseModule):
"""The basic bottleneck block used in Darknet.
Expand Down Expand Up @@ -51,10 +46,10 @@ def __init__(
expansion: float = 0.5,
add_identity: bool = True,
use_depthwise: bool = False,
conv_cfg: DictConfig | dict | None = None,
norm_cfg: DictConfig | dict | None = None,
act_cfg: DictConfig | dict | None = None,
init_cfg: DictConfig | dict | list[DictConfig] | list[dict] | None = None,
conv_cfg: dict | None = None,
norm_cfg: dict | None = None,
act_cfg: dict | None = None,
init_cfg: dict | list[dict] | None = None,
) -> None:
if norm_cfg is None:
norm_cfg = {"type": "BN", "momentum": 0.03, "eps": 0.001}
Expand Down Expand Up @@ -109,8 +104,7 @@ class CSPNeXtBlock(BaseModule):
Defaults to dict(type='BN', momentum=0.03, eps=0.001).
act_cfg (dict): Config dict for activation layer.
Defaults to dict(type='SiLU').
init_cfg (:obj:`DictConfig` or dict or list[dict] or
list[:obj:`DictConfig`], optional): Initialization config dict.
init_cfg (dict or list[dict], optional): Initialization config dict.
Defaults to None.
"""

Expand All @@ -122,10 +116,10 @@ def __init__(
add_identity: bool = True,
use_depthwise: bool = False,
kernel_size: int = 5,
conv_cfg: DictConfig | dict | None = None,
norm_cfg: DictConfig | dict | None = None,
act_cfg: DictConfig | dict | None = None,
init_cfg: DictConfig | dict | list[DictConfig] | list[dict] | None = None,
conv_cfg: dict | None = None,
norm_cfg: dict | None = None,
act_cfg: dict | None = None,
init_cfg: dict | list[dict] | None = None,
) -> None:
if norm_cfg is None:
norm_cfg = {"type": "BN", "momentum": 0.03, "eps": 0.001}
Expand Down Expand Up @@ -184,8 +178,7 @@ class CSPLayer(BaseModule):
Defaults to dict(type='BN')
act_cfg (dict): Config dict for activation layer.
Defaults to dict(type='Swish')
init_cfg (:obj:`DictConfig` or dict or list[dict] or
list[:obj:`DictConfig`], optional): Initialization config dict.
init_cfg (dict or list[dict], optional): Initialization config dict.
Defaults to None.
"""

Expand All @@ -199,10 +192,10 @@ def __init__(
use_depthwise: bool = False,
use_cspnext_block: bool = False,
channel_attention: bool = False,
conv_cfg: DictConfig | dict | None = None,
norm_cfg: DictConfig | dict | None = None,
act_cfg: DictConfig | dict | None = None,
init_cfg: DictConfig | dict | list[DictConfig] | list[dict] | None = None,
conv_cfg: dict | None = None,
norm_cfg: dict | None = None,
act_cfg: dict | None = None,
init_cfg: dict | list[dict] | None = None,
) -> None:
if norm_cfg is None:
norm_cfg = {"type": "BN", "momentum": 0.03, "eps": 0.001}
Expand Down
12 changes: 7 additions & 5 deletions src/otx/algo/detection/ssd.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,11 @@
# Copyright (C) 2023-2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
#
"""SSD object detector for the OTX detection."""
# Copyright (c) OpenMMLab. All rights reserved.
"""SSD object detector for the OTX detection.
Implementation modified from mmdet.models.detectors.single_stage.
Reference : https://github.com/open-mmlab/mmdetection/blob/v3.2.0/mmdet/models/detectors/single_stage.py
"""

from __future__ import annotations

Expand Down Expand Up @@ -34,10 +38,8 @@
logger = logging.getLogger()


# This class and its supporting functions below lightly adapted from the mmdet SingleStageDetector available at:
# https://github.com/open-mmlab/mmdetection/blob/cfd5d3a985b0249de009b67d04f37263e11cdf3d/mmdet/models/detectors/single_stage.py
class SingleStageDetector(BaseModule):
"""Single stage detector implementation from mmdet."""
"""Single stage detector implementation."""

def __init__(
self,
Expand Down
Loading

0 comments on commit 6281089

Please sign in to comment.