From 7f7f299198329afede0924583b9b4c84221f138f Mon Sep 17 00:00:00 2001 From: "Kim, Sungchul" Date: Wed, 7 Aug 2024 09:20:27 +0900 Subject: [PATCH] Refactoring base module (`ConvModule`) (#3783) * Split `ConvModule` to per dimension Module * Remove `conv_cfg` * Remove `build_conv_layer` * Move `DepthwiseSeparableConvModule` into `conv_module` * precommit * Remove `build_conv_layer` vestige * Remove assertion errors * Remove unused `efficient_conv_bn_eval` * Fix unit test * Remove `order` --- .../action_classification/backbones/x3d.py | 37 +-- src/otx/algo/action_classification/x3d.py | 2 +- .../classification/backbones/efficientnet.py | 21 +- src/otx/algo/common/backbones/cspnext.py | 17 +- src/otx/algo/common/backbones/resnet.py | 16 +- src/otx/algo/common/backbones/resnext.py | 10 +- src/otx/algo/common/layers/res_layer.py | 10 +- src/otx/algo/common/layers/spp_layer.py | 16 +- .../algo/detection/backbones/csp_darknet.py | 17 +- src/otx/algo/detection/backbones/presnet.py | 26 +- src/otx/algo/detection/heads/atss_head.py | 12 +- src/otx/algo/detection/heads/rtmdet_head.py | 13 +- src/otx/algo/detection/heads/yolox_head.py | 22 +- src/otx/algo/detection/layers/csp_layer.py | 64 ++-- src/otx/algo/detection/necks/cspnext_pafpn.py | 15 +- src/otx/algo/detection/necks/fpn.py | 14 +- .../algo/detection/necks/hybrid_encoder.py | 6 +- src/otx/algo/detection/necks/yolox_pafpn.py | 16 +- .../instance_segmentation/backbones/swin.py | 1 - .../heads/convfc_bbox_head.py | 2 - .../heads/fcn_mask_head.py | 11 +- .../instance_segmentation/heads/rpn_head.py | 4 +- .../heads/rtmdet_ins_head.py | 16 +- .../layers/transformer.py | 9 +- .../algo/instance_segmentation/necks/fpn.py | 11 +- src/otx/algo/modules/__init__.py | 8 +- src/otx/algo/modules/conv.py | 54 ---- src/otx/algo/modules/conv_module.py | 288 ++++++++---------- .../depthwise_separable_conv_module.py | 110 ------- src/otx/algo/modules/transformer.py | 10 +- .../algo/segmentation/backbones/litehrnet.py | 182 +++-------- .../algo/segmentation/heads/base_segm_head.py | 4 - src/otx/algo/segmentation/heads/fcn_head.py | 12 +- src/otx/algo/segmentation/heads/ham_head.py | 12 +- .../algo/segmentation/modules/aggregators.py | 17 +- src/otx/algo/segmentation/modules/blocks.py | 25 +- .../algo/detection/heads/test_yolox_head.py | 5 +- .../algo/detection/layers/test_csp_layer.py | 5 +- .../algo/detection/necks/test_yolox_pafpn.py | 2 +- tests/unit/algo/modules/test_conv.py | 33 -- tests/unit/algo/modules/test_conv_module.py | 194 ++++++------ .../test_depthwise_separable_conv_module.py | 104 ------- tests/unit/algo/modules/test_norm.py | 2 +- .../algo/segmentation/modules/test_blokcs.py | 4 - 44 files changed, 461 insertions(+), 998 deletions(-) delete mode 100644 src/otx/algo/modules/conv.py delete mode 100644 src/otx/algo/modules/depthwise_separable_conv_module.py delete mode 100644 tests/unit/algo/modules/test_conv.py delete mode 100644 tests/unit/algo/modules/test_depthwise_separable_conv_module.py diff --git a/src/otx/algo/action_classification/backbones/x3d.py b/src/otx/algo/action_classification/backbones/x3d.py index 11805b52679..7deef62a9f6 100644 --- a/src/otx/algo/action_classification/backbones/x3d.py +++ b/src/otx/algo/action_classification/backbones/x3d.py @@ -3,6 +3,7 @@ # Copyright (c) OpenMMLab. All rights reserved. """X3D backbone implementation.""" + from __future__ import annotations import math @@ -12,7 +13,7 @@ from torch.nn.modules.batchnorm import _BatchNorm from otx.algo.modules.activation import Swish, build_activation_layer -from otx.algo.modules.conv_module import ConvModule +from otx.algo.modules.conv_module import Conv3dModule from otx.algo.utils.mmengine_utils import load_checkpoint from otx.algo.utils.weight_init import constant_init, kaiming_init @@ -70,8 +71,6 @@ class BlockX3D(nn.Module): unit. If set as None, it means not using SE unit. Default: None. use_swish (bool): Whether to use swish as the activation function before and after the 3x3x3 conv. Default: True. - conv_cfg (dict): Config dict for convolution layer. - Default: ``dict(type='Conv3d')``. norm_cfg (dict): Config for norm layers. required keys are ``type``, Default: ``dict(type='BN3d')``. act_cfg (dict): Config dict for activation layer. @@ -89,7 +88,6 @@ def __init__( downsample: nn.Module | None = None, se_ratio: float | None = None, use_swish: bool = True, - conv_cfg: dict | None = None, norm_cfg: dict | None = None, act_cfg: dict | None = None, with_cp: bool = False, @@ -103,25 +101,23 @@ def __init__( self.downsample = downsample self.se_ratio = se_ratio self.use_swish = use_swish - self.conv_cfg = conv_cfg self.norm_cfg = norm_cfg self.act_cfg = act_cfg self.act_cfg_swish = Swish() self.with_cp = with_cp - self.conv1 = ConvModule( + self.conv1 = Conv3dModule( in_channels=inplanes, out_channels=planes, kernel_size=1, stride=1, padding=0, bias=False, - conv_cfg=self.conv_cfg, norm_cfg=self.norm_cfg, act_cfg=self.act_cfg, ) # Here we use the channel-wise conv - self.conv2 = ConvModule( + self.conv2 = Conv3dModule( in_channels=planes, out_channels=planes, kernel_size=3, @@ -129,21 +125,19 @@ def __init__( padding=1, groups=planes, bias=False, - conv_cfg=self.conv_cfg, norm_cfg=self.norm_cfg, act_cfg=None, ) self.swish = Swish() - self.conv3 = ConvModule( + self.conv3 = Conv3dModule( in_channels=planes, out_channels=outplanes, kernel_size=1, stride=1, padding=0, bias=False, - conv_cfg=self.conv_cfg, norm_cfg=self.norm_cfg, act_cfg=None, ) @@ -201,8 +195,6 @@ class X3DBackbone(nn.Module): unit. If set as None, it means not using SE unit. Default: 1 / 16. use_swish (bool): Whether to use swish as the activation function before and after the 3x3x3 conv. Default: True. - conv_cfg (dict): Config for conv layers. required keys are ``type`` - Default: ``dict(type='Conv3d')``. norm_cfg (dict): Config for norm layers. required keys are ``type`` and ``requires_grad``. Default: ``dict(type='BN3d', requires_grad=True)``. @@ -231,7 +223,6 @@ def __init__( se_style: str = "half", se_ratio: float = 1 / 16, use_swish: bool = True, - conv_cfg: dict | None = None, norm_cfg: dict | None = None, act_cfg: dict | None = None, norm_eval: bool = False, @@ -275,7 +266,6 @@ def __init__( raise ValueError(msg) self.use_swish = use_swish - self.conv_cfg = conv_cfg self.norm_cfg = norm_cfg self.act_cfg = act_cfg self.norm_eval = norm_eval @@ -304,7 +294,6 @@ def __init__( se_ratio=self.se_ratio, use_swish=self.use_swish, norm_cfg=self.norm_cfg, - conv_cfg=self.conv_cfg, act_cfg=self.act_cfg, with_cp=with_cp, **kwargs, @@ -315,14 +304,13 @@ def __init__( self.res_layers.append(layer_name) self.feat_dim = self.base_channels * 2 ** (len(self.stage_blocks) - 1) - self.conv5 = ConvModule( + self.conv5 = Conv3dModule( self.feat_dim, int(self.feat_dim * self.gamma_b), kernel_size=1, stride=1, padding=0, bias=False, - conv_cfg=self.conv_cfg, norm_cfg=self.norm_cfg, act_cfg=self.act_cfg, ) @@ -363,7 +351,6 @@ def make_res_layer( use_swish: bool = True, norm_cfg: dict | None = None, act_cfg: dict | None = None, - conv_cfg: dict | None = None, with_cp: bool = False, **kwargs, ) -> nn.Module: @@ -388,7 +375,6 @@ def make_res_layer( Default: None. use_swish (bool): Whether to use swish as the activation function before and after the 3x3x3 conv. Default: True. - conv_cfg (dict | None): Config for norm layers. Default: None. norm_cfg (dict | None): Config for norm layers. Default: None. act_cfg (dict | None): Config for activate layers. Default: None. with_cp (bool | None): Use checkpoint or not. Using checkpoint @@ -400,14 +386,13 @@ def make_res_layer( """ downsample = None if spatial_stride != 1 or layer_inplanes != inplanes: - downsample = ConvModule( + downsample = Conv3dModule( layer_inplanes, inplanes, kernel_size=1, stride=(1, spatial_stride, spatial_stride), padding=0, bias=False, - conv_cfg=conv_cfg, norm_cfg=norm_cfg, act_cfg=None, ) @@ -431,7 +416,6 @@ def make_res_layer( se_ratio=se_ratio if use_se[0] else None, use_swish=use_swish, norm_cfg=norm_cfg, - conv_cfg=conv_cfg, act_cfg=act_cfg, with_cp=with_cp, **kwargs, @@ -448,7 +432,6 @@ def make_res_layer( se_ratio=se_ratio if use_se[i] else None, use_swish=use_swish, norm_cfg=norm_cfg, - conv_cfg=conv_cfg, act_cfg=act_cfg, with_cp=with_cp, **kwargs, @@ -459,18 +442,17 @@ def make_res_layer( def _make_stem_layer(self) -> None: """Construct the stem layers consists of a conv+norm+act module and a pooling layer.""" - self.conv1_s = ConvModule( + self.conv1_s = Conv3dModule( self.in_channels, self.base_channels, kernel_size=(1, 3, 3), stride=(1, 2, 2), padding=(0, 1, 1), bias=False, - conv_cfg=self.conv_cfg, norm_cfg=None, act_cfg=None, ) - self.conv1_t = ConvModule( + self.conv1_t = Conv3dModule( self.base_channels, self.base_channels, kernel_size=(5, 1, 1), @@ -478,7 +460,6 @@ def _make_stem_layer(self) -> None: padding=(2, 0, 0), groups=self.base_channels, bias=False, - conv_cfg=self.conv_cfg, norm_cfg=self.norm_cfg, act_cfg=self.act_cfg, ) diff --git a/src/otx/algo/action_classification/x3d.py b/src/otx/algo/action_classification/x3d.py index dbb6cb0f490..7f503dadfd4 100644 --- a/src/otx/algo/action_classification/x3d.py +++ b/src/otx/algo/action_classification/x3d.py @@ -2,6 +2,7 @@ # SPDX-License-Identifier: Apache-2.0 # """X3D model implementation.""" + from __future__ import annotations from typing import TYPE_CHECKING @@ -63,7 +64,6 @@ def _build_model(self, num_classes: int) -> nn.Module: gamma_b=2.25, gamma_d=2.2, gamma_w=1, - conv_cfg={"type": "Conv3d"}, norm_cfg={"type": "BN3d", "requires_grad": True}, act_cfg={"type": "ReLU", "inplace": True}, ), diff --git a/src/otx/algo/classification/backbones/efficientnet.py b/src/otx/algo/classification/backbones/efficientnet.py index 9682dda3ce4..55646d434bd 100644 --- a/src/otx/algo/classification/backbones/efficientnet.py +++ b/src/otx/algo/classification/backbones/efficientnet.py @@ -1,7 +1,8 @@ -# Copyright (C) 2023 Intel Corporation +# Copyright (C) 2023-2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 # """EfficientNet Module.""" + from __future__ import annotations import math @@ -14,7 +15,7 @@ from torch.nn import functional, init from otx.algo.modules.activation import build_activation_layer -from otx.algo.modules.conv_module import ConvModule +from otx.algo.modules.conv_module import Conv2dModule from otx.algo.utils.mmengine_utils import load_checkpoint_to_model PRETRAINED_ROOT = "https://github.com/osmr/imgclsmob/releases/download/v0.0.364/" @@ -33,9 +34,9 @@ def conv1x1_block( use_bn: bool = True, bn_eps: float = 1e-5, activation: str | None = "ReLU", -) -> ConvModule: +) -> Conv2dModule: """Conv block.""" - return ConvModule( + return Conv2dModule( in_channels=in_channels, out_channels=out_channels, kernel_size=1, @@ -59,9 +60,9 @@ def conv3x3_block( use_bn: bool = True, bn_eps: float = 1e-5, activation: str | None = "ReLU", -) -> ConvModule: +) -> Conv2dModule: """Conv block.""" - return ConvModule( + return Conv2dModule( in_channels=in_channels, out_channels=out_channels, kernel_size=3, @@ -85,9 +86,9 @@ def dwconv3x3_block( use_bn: bool = True, bn_eps: float = 1e-5, activation: str | None = "ReLU", -) -> ConvModule: +) -> Conv2dModule: """Conv block.""" - return ConvModule( + return Conv2dModule( in_channels=in_channels, out_channels=out_channels, kernel_size=3, @@ -111,9 +112,9 @@ def dwconv5x5_block( use_bn: bool = True, bn_eps: float = 1e-5, activation: str | None = "ReLU", -) -> ConvModule: +) -> Conv2dModule: """Conv block.""" - return ConvModule( + return Conv2dModule( in_channels=in_channels, out_channels=out_channels, kernel_size=5, diff --git a/src/otx/algo/common/backbones/cspnext.py b/src/otx/algo/common/backbones/cspnext.py index c6347e61554..2b98783c6f7 100644 --- a/src/otx/algo/common/backbones/cspnext.py +++ b/src/otx/algo/common/backbones/cspnext.py @@ -14,8 +14,7 @@ from otx.algo.common.layers import SPPBottleneck from otx.algo.detection.layers import CSPLayer from otx.algo.modules.base_module import BaseModule -from otx.algo.modules.conv_module import ConvModule -from otx.algo.modules.depthwise_separable_conv_module import DepthwiseSeparableConvModule +from otx.algo.modules.conv_module import Conv2dModule, DepthwiseSeparableConvModule from torch import Tensor, nn from torch.nn.modules.batchnorm import _BatchNorm @@ -44,8 +43,6 @@ class CSPNeXt(BaseModule): layers. Defaults to (5, 9, 13). channel_attention (bool): Whether to add channel attention in each stage. Defaults to True. - conv_cfg (dict, optional): Config dict for - convolution layer. Defaults to None. norm_cfg (dict): Dictionary to construct and config norm layer. Defaults to dict(type='BN', requires_grad=True). act_cfg (dict): Config dict for activation layer. @@ -86,7 +83,6 @@ def __init__( arch_ovewrite: dict | None = None, spp_kernel_sizes: tuple[int, int, int] = (5, 9, 13), channel_attention: bool = True, - conv_cfg: dict | None = None, norm_cfg: dict | None = None, act_cfg: dict | None = None, norm_eval: bool = False, @@ -121,9 +117,9 @@ def __init__( self.frozen_stages = frozen_stages self.use_depthwise = use_depthwise self.norm_eval = norm_eval - conv = DepthwiseSeparableConvModule if use_depthwise else ConvModule + conv = DepthwiseSeparableConvModule if use_depthwise else Conv2dModule self.stem = nn.Sequential( - ConvModule( + Conv2dModule( 3, int(arch_setting[0][0] * widen_factor // 2), 3, @@ -132,7 +128,7 @@ def __init__( norm_cfg=norm_cfg, act_cfg=act_cfg, ), - ConvModule( + Conv2dModule( int(arch_setting[0][0] * widen_factor // 2), int(arch_setting[0][0] * widen_factor // 2), 3, @@ -141,7 +137,7 @@ def __init__( norm_cfg=norm_cfg, act_cfg=act_cfg, ), - ConvModule( + Conv2dModule( int(arch_setting[0][0] * widen_factor // 2), int(arch_setting[0][0] * widen_factor), 3, @@ -164,7 +160,6 @@ def __init__( 3, stride=2, padding=1, - conv_cfg=conv_cfg, norm_cfg=norm_cfg, act_cfg=act_cfg, ) @@ -174,7 +169,6 @@ def __init__( out_channels, out_channels, kernel_sizes=spp_kernel_sizes, - conv_cfg=conv_cfg, norm_cfg=norm_cfg, act_cfg=act_cfg, ) @@ -188,7 +182,6 @@ def __init__( use_cspnext_block=True, expand_ratio=expand_ratio, channel_attention=channel_attention, - conv_cfg=conv_cfg, norm_cfg=norm_cfg, act_cfg=act_cfg, ) diff --git a/src/otx/algo/common/backbones/resnet.py b/src/otx/algo/common/backbones/resnet.py index eb6d20c18f0..6e993a71b0c 100644 --- a/src/otx/algo/common/backbones/resnet.py +++ b/src/otx/algo/common/backbones/resnet.py @@ -15,7 +15,6 @@ import torch.utils.checkpoint as cp from otx.algo.common.layers import ResLayer from otx.algo.modules.base_module import BaseModule -from otx.algo.modules.conv import build_conv_layer from otx.algo.modules.norm import build_norm_layer from torch import nn from torch.nn.modules.batchnorm import _BatchNorm @@ -35,7 +34,6 @@ def __init__( dilation: int = 1, downsample: nn.Module | None = None, with_cp: bool = False, - conv_cfg: dict | None = None, init_cfg: dict | None = None, ): """Bottleneck block for ResNet. @@ -50,7 +48,6 @@ def __init__( self.stride = stride self.dilation = dilation self.with_cp = with_cp - self.conv_cfg = conv_cfg self.norm_cfg = norm_cfg self.conv1_stride = 1 @@ -60,11 +57,10 @@ def __init__( self.norm2_name, norm2 = build_norm_layer(norm_cfg, planes, postfix=2) self.norm3_name, norm3 = build_norm_layer(norm_cfg, planes * self.expansion, postfix=3) - self.conv1 = build_conv_layer(conv_cfg, inplanes, planes, kernel_size=1, stride=self.conv1_stride, bias=False) + self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, stride=self.conv1_stride, bias=False) self.add_module(self.norm1_name, norm1) - self.conv2 = build_conv_layer( - conv_cfg, + self.conv2 = nn.Conv2d( planes, planes, kernel_size=3, @@ -75,7 +71,7 @@ def __init__( ) self.add_module(self.norm2_name, norm2) - self.conv3 = build_conv_layer(conv_cfg, planes, planes * self.expansion, kernel_size=1, bias=False) + self.conv3 = nn.Conv2d(planes, planes * self.expansion, kernel_size=1, bias=False) self.add_module(self.norm3_name, norm3) self.relu = nn.ReLU(inplace=True) @@ -183,7 +179,6 @@ def __init__( out_indices: tuple[int, int, int, int] = (0, 1, 2, 3), avg_down: bool = False, frozen_stages: int = -1, - conv_cfg: dict | None = None, norm_cfg: dict | None = None, norm_eval: bool = True, with_cp: bool = False, @@ -240,7 +235,6 @@ def __init__( raise ValueError(msg) self.avg_down = avg_down self.frozen_stages = frozen_stages - self.conv_cfg = conv_cfg self.norm_cfg = norm_cfg self.with_cp = with_cp self.norm_eval = norm_eval @@ -264,7 +258,6 @@ def __init__( dilation=dilation, avg_down=self.avg_down, with_cp=with_cp, - conv_cfg=conv_cfg, norm_cfg=norm_cfg, init_cfg=block_init_cfg, ) @@ -287,8 +280,7 @@ def norm1(self) -> nn.Module: return getattr(self, self.norm1_name) def _make_stem_layer(self, in_channels: int, stem_channels: int) -> None: - self.conv1 = build_conv_layer( - self.conv_cfg, + self.conv1 = nn.Conv2d( in_channels, stem_channels, kernel_size=7, diff --git a/src/otx/algo/common/backbones/resnext.py b/src/otx/algo/common/backbones/resnext.py index 339e00bed78..2ec6dfc20c5 100644 --- a/src/otx/algo/common/backbones/resnext.py +++ b/src/otx/algo/common/backbones/resnext.py @@ -12,8 +12,8 @@ from typing import ClassVar from otx.algo.common.layers import ResLayer -from otx.algo.modules.conv import build_conv_layer from otx.algo.modules.norm import build_norm_layer +from torch import nn from .resnet import Bottleneck as _Bottleneck from .resnet import ResNet @@ -46,8 +46,7 @@ def __init__( self.norm2_name, norm2 = build_norm_layer(self.norm_cfg, width, postfix=2) self.norm3_name, norm3 = build_norm_layer(self.norm_cfg, self.planes * self.expansion, postfix=3) - self.conv1 = build_conv_layer( - self.conv_cfg, + self.conv1 = nn.Conv2d( self.inplanes, width, kernel_size=1, @@ -55,8 +54,7 @@ def __init__( bias=False, ) self.add_module(self.norm1_name, norm1) - self.conv2 = build_conv_layer( - self.conv_cfg, + self.conv2 = nn.Conv2d( width, width, kernel_size=3, @@ -67,7 +65,7 @@ def __init__( bias=False, ) self.add_module(self.norm2_name, norm2) - self.conv3 = build_conv_layer(self.conv_cfg, width, self.planes * self.expansion, kernel_size=1, bias=False) + self.conv3 = nn.Conv2d(width, self.planes * self.expansion, kernel_size=1, bias=False) self.add_module(self.norm3_name, norm3) def _del_block_plugins(self, plugin_names: list[str]) -> None: diff --git a/src/otx/algo/common/layers/res_layer.py b/src/otx/algo/common/layers/res_layer.py index f37ed1ee439..aeae090b304 100644 --- a/src/otx/algo/common/layers/res_layer.py +++ b/src/otx/algo/common/layers/res_layer.py @@ -9,7 +9,6 @@ from __future__ import annotations from otx.algo.modules.base_module import BaseModule, Sequential -from otx.algo.modules.conv import build_conv_layer from otx.algo.modules.norm import build_norm_layer from torch import nn @@ -25,8 +24,6 @@ class ResLayer(Sequential): stride (int): stride of the first block. Defaults to 1 avg_down (bool): Use AvgPool instead of stride conv when downsampling in the bottleneck. Defaults to False - conv_cfg (dict): dictionary to construct and config conv layer. - Defaults to None norm_cfg (dict): dictionary to construct and config norm layer. Defaults to dict(type='BN') downsample_first (bool): Downsample at the first block or last block. @@ -42,7 +39,6 @@ def __init__( norm_cfg: dict, stride: int = 1, avg_down: bool = False, - conv_cfg: dict | None = None, downsample_first: bool = True, **kwargs, ) -> None: @@ -64,8 +60,7 @@ def __init__( ) downsample.extend( [ - build_conv_layer( - conv_cfg, + nn.Conv2d( inplanes, planes * block.expansion, kernel_size=1, @@ -85,7 +80,6 @@ def __init__( planes=planes, stride=stride, downsample=downsample, - conv_cfg=conv_cfg, norm_cfg=norm_cfg, **kwargs, ), @@ -93,7 +87,7 @@ def __init__( inplanes = planes * block.expansion layers.extend( [ - block(inplanes=inplanes, planes=planes, stride=1, conv_cfg=conv_cfg, norm_cfg=norm_cfg, **kwargs) + block(inplanes=inplanes, planes=planes, stride=1, norm_cfg=norm_cfg, **kwargs) for _ in range(1, num_blocks) ], ) diff --git a/src/otx/algo/common/layers/spp_layer.py b/src/otx/algo/common/layers/spp_layer.py index 0ceb253ec9d..d314bacea9d 100644 --- a/src/otx/algo/common/layers/spp_layer.py +++ b/src/otx/algo/common/layers/spp_layer.py @@ -10,7 +10,7 @@ import torch from otx.algo.modules.base_module import BaseModule -from otx.algo.modules.conv_module import ConvModule +from otx.algo.modules.conv_module import Conv2dModule from torch import Tensor, nn @@ -22,8 +22,6 @@ class SPPBottleneck(BaseModule): out_channels (int): The output channels of this Module. kernel_sizes (tuple[int]): Sequential of kernel sizes of pooling layers. Default: (5, 9, 13). - conv_cfg (dict): Config dict for convolution layer. Default: None, - which means using conv2d. norm_cfg (dict): Config dict for normalization layer. Default: dict(type='BN'). act_cfg (dict): Config dict for activation layer. @@ -37,7 +35,6 @@ def __init__( in_channels: int, out_channels: int, kernel_sizes: tuple[int, ...] = (5, 9, 13), - conv_cfg: dict | None = None, norm_cfg: dict | None = None, act_cfg: dict | None = None, init_cfg: dict | list[dict] | None = None, @@ -47,18 +44,23 @@ def __init__( act_cfg = act_cfg or {"type": "Swish"} mid_channels = in_channels // 2 - self.conv1 = ConvModule( + self.conv1 = Conv2dModule( in_channels, mid_channels, 1, stride=1, - conv_cfg=conv_cfg, norm_cfg=norm_cfg, act_cfg=act_cfg, ) self.poolings = nn.ModuleList([nn.MaxPool2d(kernel_size=ks, stride=1, padding=ks // 2) for ks in kernel_sizes]) conv2_channels = mid_channels * (len(kernel_sizes) + 1) - self.conv2 = ConvModule(conv2_channels, out_channels, 1, conv_cfg=conv_cfg, norm_cfg=norm_cfg, act_cfg=act_cfg) + self.conv2 = Conv2dModule( + conv2_channels, + out_channels, + 1, + norm_cfg=norm_cfg, + act_cfg=act_cfg, + ) def forward(self, x: Tensor) -> Tensor: """Forward.""" diff --git a/src/otx/algo/detection/backbones/csp_darknet.py b/src/otx/algo/detection/backbones/csp_darknet.py index 1ca142aec08..6e92b995b06 100644 --- a/src/otx/algo/detection/backbones/csp_darknet.py +++ b/src/otx/algo/detection/backbones/csp_darknet.py @@ -18,8 +18,7 @@ from otx.algo.common.layers import SPPBottleneck from otx.algo.detection.layers import CSPLayer from otx.algo.modules.base_module import BaseModule -from otx.algo.modules.conv_module import ConvModule -from otx.algo.modules.depthwise_separable_conv_module import DepthwiseSeparableConvModule +from otx.algo.modules.conv_module import Conv2dModule, DepthwiseSeparableConvModule class Focus(nn.Module): @@ -30,8 +29,6 @@ class Focus(nn.Module): out_channels (int): The output channels of this Module. kernel_size (int): The kernel size of the convolution. Default: 1 stride (int): The stride of the convolution. Default: 1 - conv_cfg (dict): Config dict for convolution layer. Default: None, - which means using conv2d. norm_cfg (dict): Config dict for normalization layer. Default: dict(type='BN', momentum=0.03, eps=0.001). act_cfg (dict): Config dict for activation layer. @@ -44,20 +41,18 @@ def __init__( out_channels: int, kernel_size: int = 1, stride: int = 1, - conv_cfg: dict | None = None, norm_cfg: dict | None = None, act_cfg: dict | None = None, ): super().__init__() norm_cfg = norm_cfg or {"type": "BN", "momentum": 0.03, "eps": 0.001} act_cfg = act_cfg or {"type": "Swish"} - self.conv = ConvModule( + self.conv = Conv2dModule( in_channels * 4, out_channels, kernel_size, stride, padding=(kernel_size - 1) // 2, - conv_cfg=conv_cfg, norm_cfg=norm_cfg, act_cfg=act_cfg, ) @@ -113,7 +108,6 @@ class CSPDarknet(BaseModule): arch_ovewrite(list): Overwrite default arch settings. Default: None. spp_kernal_sizes: (tuple[int]): Sequential of kernel sizes of SPP layers. Default: (5, 9, 13). - conv_cfg (dict): Config dict for convolution layer. Default: None. norm_cfg (dict): Dictionary to construct and config norm layer. Default: dict(type='BN', requires_grad=True). act_cfg (dict): Config dict for activation layer. @@ -153,7 +147,6 @@ def __init__( use_depthwise: bool = False, arch_ovewrite: list | None = None, spp_kernal_sizes: tuple[int, ...] = (5, 9, 13), - conv_cfg: dict | None = None, norm_cfg: dict | None = None, act_cfg: dict | None = None, norm_eval: bool = False, @@ -183,13 +176,12 @@ def __init__( self.frozen_stages = frozen_stages self.use_depthwise = use_depthwise self.norm_eval = norm_eval - conv = DepthwiseSeparableConvModule if use_depthwise else ConvModule + conv = DepthwiseSeparableConvModule if use_depthwise else Conv2dModule self.stem = Focus( 3, int(arch_setting[0][0] * widen_factor), kernel_size=3, - conv_cfg=conv_cfg, norm_cfg=norm_cfg, act_cfg=act_cfg, ) @@ -206,7 +198,6 @@ def __init__( 3, stride=2, padding=1, - conv_cfg=conv_cfg, norm_cfg=norm_cfg, act_cfg=act_cfg, ) @@ -216,7 +207,6 @@ def __init__( out_channels, out_channels, kernel_sizes=spp_kernal_sizes, - conv_cfg=conv_cfg, norm_cfg=norm_cfg, act_cfg=act_cfg, ) @@ -227,7 +217,6 @@ def __init__( num_blocks=num_blocks, add_identity=add_identity, use_depthwise=use_depthwise, - conv_cfg=conv_cfg, norm_cfg=norm_cfg, act_cfg=act_cfg, ) diff --git a/src/otx/algo/detection/backbones/presnet.py b/src/otx/algo/detection/backbones/presnet.py index e3c96d43d63..b31f7f95c3a 100644 --- a/src/otx/algo/detection/backbones/presnet.py +++ b/src/otx/algo/detection/backbones/presnet.py @@ -11,8 +11,9 @@ import torch from torch import nn -from otx.algo.modules import ConvModule, build_activation_layer +from otx.algo.modules import build_activation_layer from otx.algo.modules.base_module import BaseModule +from otx.algo.modules.conv_module import Conv2dModule __all__ = ["PResNet"] @@ -42,15 +43,15 @@ def __init__( OrderedDict( [ ("pool", nn.AvgPool2d(2, 2, 0, ceil_mode=True)), - ("conv", ConvModule(ch_in, ch_out, 1, 1, act_cfg=None, norm_cfg=norm_cfg)), + ("conv", Conv2dModule(ch_in, ch_out, 1, 1, act_cfg=None, norm_cfg=norm_cfg)), ], ), ) else: - self.short = ConvModule(ch_in, ch_out, 1, stride, act_cfg=None, norm_cfg=norm_cfg) + self.short = Conv2dModule(ch_in, ch_out, 1, stride, act_cfg=None, norm_cfg=norm_cfg) - self.branch2a = ConvModule(ch_in, ch_out, 3, stride, padding=1, act_cfg=act_cfg, norm_cfg=norm_cfg) - self.branch2b = ConvModule(ch_out, ch_out, 3, 1, padding=1, act_cfg=None, norm_cfg=norm_cfg) + self.branch2a = Conv2dModule(ch_in, ch_out, 3, stride, padding=1, act_cfg=act_cfg, norm_cfg=norm_cfg) + self.branch2b = Conv2dModule(ch_out, ch_out, 3, 1, padding=1, act_cfg=None, norm_cfg=norm_cfg) self.act = nn.Identity() if act_cfg is None else build_activation_layer(act_cfg) def forward(self, x: torch.Tensor) -> torch.Tensor: @@ -88,9 +89,9 @@ def __init__( width = ch_out - self.branch2a = ConvModule(ch_in, width, 1, stride1, act_cfg=act_cfg, norm_cfg=norm_cfg) - self.branch2b = ConvModule(width, width, 3, stride2, padding=1, act_cfg=act_cfg, norm_cfg=norm_cfg) - self.branch2c = ConvModule(width, ch_out * self.expansion, 1, 1, act_cfg=None, norm_cfg=norm_cfg) + self.branch2a = Conv2dModule(ch_in, width, 1, stride1, act_cfg=act_cfg, norm_cfg=norm_cfg) + self.branch2b = Conv2dModule(width, width, 3, stride2, padding=1, act_cfg=act_cfg, norm_cfg=norm_cfg) + self.branch2c = Conv2dModule(width, ch_out * self.expansion, 1, 1, act_cfg=None, norm_cfg=norm_cfg) self.shortcut = shortcut if not shortcut: @@ -99,12 +100,15 @@ def __init__( OrderedDict( [ ("pool", nn.AvgPool2d(2, 2, 0, ceil_mode=True)), - ("conv", ConvModule(ch_in, ch_out * self.expansion, 1, 1, act_cfg=None, norm_cfg=norm_cfg)), + ( + "conv", + Conv2dModule(ch_in, ch_out * self.expansion, 1, 1, act_cfg=None, norm_cfg=norm_cfg), + ), ], ), ) else: - self.short = ConvModule(ch_in, ch_out * self.expansion, 1, stride, act_cfg=None, norm_cfg=norm_cfg) + self.short = Conv2dModule(ch_in, ch_out * self.expansion, 1, stride, act_cfg=None, norm_cfg=norm_cfg) self.act = nn.Identity() if act_cfg is None else build_activation_layer(act_cfg) @@ -216,7 +220,7 @@ def __init__( self.conv1 = nn.Sequential( OrderedDict( [ - (_name, ConvModule(c_in, c_out, k, s, padding=(k - 1) // 2, act_cfg=act_cfg, norm_cfg=norm_cfg)) + (_name, Conv2dModule(c_in, c_out, k, s, padding=(k - 1) // 2, act_cfg=act_cfg, norm_cfg=norm_cfg)) for c_in, c_out, k, s, _name in conv_def ], ), diff --git a/src/otx/algo/detection/heads/atss_head.py b/src/otx/algo/detection/heads/atss_head.py index 20d8ebd7de0..9d85dbf0b77 100644 --- a/src/otx/algo/detection/heads/atss_head.py +++ b/src/otx/algo/detection/heads/atss_head.py @@ -20,7 +20,7 @@ ) from otx.algo.detection.utils.prior_generators.utils import anchor_inside_flags from otx.algo.detection.utils.utils import unmap -from otx.algo.modules.conv_module import ConvModule +from otx.algo.modules.conv_module import Conv2dModule from otx.algo.modules.scale import Scale from otx.algo.utils.mmengine_utils import InstanceData @@ -39,8 +39,6 @@ class ATSSHead(ClassIncrementalMixin, AnchorHead): in_channels (int): Number of channels in the input feature map. pred_kernel_size (int): Kernel size of ``nn.Conv2d``. Defaults to 3. stacked_convs (int): Number of stacking convs of the head. Defaults to 4. - conv_cfg (dict, optional): Config dict for convolution layer. - Defaults to None. norm_cfg (dict): Config dict for normalization layer. Defaults to ``dict(type='GN', num_groups=32, requires_grad=True)``. reg_decoded_bbox (bool): If true, the regression loss would be @@ -58,7 +56,6 @@ def __init__( in_channels: int, pred_kernel_size: int = 3, stacked_convs: int = 4, - conv_cfg: dict | None = None, norm_cfg: dict | None = None, reg_decoded_bbox: bool = True, loss_centerness: nn.Module | None = None, @@ -70,7 +67,6 @@ def __init__( ) -> None: self.pred_kernel_size = pred_kernel_size self.stacked_convs = stacked_convs - self.conv_cfg = conv_cfg self.norm_cfg = norm_cfg or {"type": "GN", "num_groups": 32, "requires_grad": True} init_cfg = init_cfg or { "type": "Normal", @@ -111,24 +107,22 @@ def _init_layers(self) -> None: for i in range(self.stacked_convs): chn = self.in_channels if i == 0 else self.feat_channels self.cls_convs.append( - ConvModule( + Conv2dModule( chn, self.feat_channels, 3, stride=1, padding=1, - conv_cfg=self.conv_cfg, norm_cfg=self.norm_cfg, ), ) self.reg_convs.append( - ConvModule( + Conv2dModule( chn, self.feat_channels, 3, stride=1, padding=1, - conv_cfg=self.conv_cfg, norm_cfg=self.norm_cfg, ), ) diff --git a/src/otx/algo/detection/heads/rtmdet_head.py b/src/otx/algo/detection/heads/rtmdet_head.py index 12573eebd96..429c03cbe05 100644 --- a/src/otx/algo/detection/heads/rtmdet_head.py +++ b/src/otx/algo/detection/heads/rtmdet_head.py @@ -20,8 +20,7 @@ sigmoid_geometric_mean, unmap, ) -from otx.algo.modules.conv_module import ConvModule -from otx.algo.modules.depthwise_separable_conv_module import DepthwiseSeparableConvModule +from otx.algo.modules.conv_module import Conv2dModule, DepthwiseSeparableConvModule from otx.algo.modules.norm import is_norm from otx.algo.modules.scale import Scale from otx.algo.utils.mmengine_utils import InstanceData @@ -61,25 +60,23 @@ def _init_layers(self) -> None: for i in range(self.stacked_convs): chn = self.in_channels if i == 0 else self.feat_channels self.cls_convs.append( - ConvModule( + Conv2dModule( chn, self.feat_channels, 3, stride=1, padding=1, - conv_cfg=self.conv_cfg, norm_cfg=self.norm_cfg, act_cfg=self.act_cfg, ), ) self.reg_convs.append( - ConvModule( + Conv2dModule( chn, self.feat_channels, 3, stride=1, padding=1, - conv_cfg=self.conv_cfg, norm_cfg=self.norm_cfg, act_cfg=self.act_cfg, ), @@ -680,7 +677,7 @@ def __init__( def _init_layers(self) -> None: """Initialize layers of the head.""" - conv = DepthwiseSeparableConvModule if self.use_depthwise else ConvModule + conv = DepthwiseSeparableConvModule if self.use_depthwise else Conv2dModule self.cls_convs = nn.ModuleList() self.reg_convs = nn.ModuleList() @@ -700,7 +697,6 @@ def _init_layers(self) -> None: 3, stride=1, padding=1, - conv_cfg=self.conv_cfg, norm_cfg=self.norm_cfg, act_cfg=self.act_cfg, ), @@ -712,7 +708,6 @@ def _init_layers(self) -> None: 3, stride=1, padding=1, - conv_cfg=self.conv_cfg, norm_cfg=self.norm_cfg, act_cfg=self.act_cfg, ), diff --git a/src/otx/algo/detection/heads/yolox_head.py b/src/otx/algo/detection/heads/yolox_head.py index 1e4c6ab0015..7f8a12fbef2 100644 --- a/src/otx/algo/detection/heads/yolox_head.py +++ b/src/otx/algo/detection/heads/yolox_head.py @@ -8,6 +8,7 @@ from __future__ import annotations +import logging import math from typing import Sequence @@ -23,10 +24,11 @@ from otx.algo.common.utils.utils import multi_apply, reduce_mean from otx.algo.detection.heads.base_head import BaseDenseHead from otx.algo.detection.losses import IoULoss -from otx.algo.modules.conv_module import ConvModule -from otx.algo.modules.depthwise_separable_conv_module import DepthwiseSeparableConvModule +from otx.algo.modules.conv_module import Conv2dModule, DepthwiseSeparableConvModule from otx.algo.utils.mmengine_utils import InstanceData +logger = logging.getLogger() + class YOLOXHead(BaseDenseHead): """YOLOXHead head used in `YOLOX `_. @@ -47,8 +49,6 @@ class YOLOXHead(BaseDenseHead): conv_bias (bool or str): If specified as `auto`, it will be decided by the norm_cfg. Bias of conv will be set as True if `norm_cfg` is None, otherwise False. Defaults to "auto". - conv_cfg (dict, optional): Config dict for convolution layer. - Defaults to None. norm_cfg (dict): Config dict for normalization layer. Defaults to dict(type='BN', momentum=0.03, eps=0.001). act_cfg (dict): Config dict for activation layer. @@ -75,7 +75,6 @@ def __init__( use_depthwise: bool = False, dcn_on_last_conv: bool = False, conv_bias: bool | str = "auto", - conv_cfg: dict | None = None, norm_cfg: dict | None = None, act_cfg: dict | None = None, loss_cls: nn.Module | None = None, @@ -118,7 +117,6 @@ def __init__( self.conv_bias = conv_bias self.use_sigmoid_cls = True - self.conv_cfg = conv_cfg self.norm_cfg = norm_cfg self.act_cfg = act_cfg @@ -158,11 +156,18 @@ def _init_layers(self) -> None: def _build_stacked_convs(self) -> nn.Sequential: """Initialize conv layers of a single level head.""" - conv = DepthwiseSeparableConvModule if self.use_depthwise else ConvModule + conv = DepthwiseSeparableConvModule if self.use_depthwise else Conv2dModule stacked_convs = [] for i in range(self.stacked_convs): chn = self.in_channels if i == 0 else self.feat_channels - conv_cfg = {"type": "DCNv2"} if self.dcn_on_last_conv and i == self.stacked_convs - 1 else self.conv_cfg + # TODO (sungchul): enable deformable convolution implemented in mmcv + # conv_cfg = {"type": "DCNv2"} if self.dcn_on_last_conv and i == self.stacked_convs - 1 else self.conv_cfg + if self.dcn_on_last_conv and i == self.stacked_convs - 1: + logger.warning( + f"stacked convs[{i}] : Deformable convolution is not supported in YOLOXHead, " + "use normal convolution instead.", + ) + stacked_convs.append( conv( chn, @@ -170,7 +175,6 @@ def _build_stacked_convs(self) -> nn.Sequential: 3, stride=1, padding=1, - conv_cfg=conv_cfg, norm_cfg=self.norm_cfg, act_cfg=self.act_cfg, bias=self.conv_bias, diff --git a/src/otx/algo/detection/layers/csp_layer.py b/src/otx/algo/detection/layers/csp_layer.py index 175584d6074..4cb0d10b57a 100644 --- a/src/otx/algo/detection/layers/csp_layer.py +++ b/src/otx/algo/detection/layers/csp_layer.py @@ -11,8 +11,7 @@ from otx.algo.detection.layers import ChannelAttention from otx.algo.modules import build_activation_layer from otx.algo.modules.base_module import BaseModule -from otx.algo.modules.conv_module import ConvModule -from otx.algo.modules.depthwise_separable_conv_module import DepthwiseSeparableConvModule +from otx.algo.modules.conv_module import Conv2dModule, DepthwiseSeparableConvModule class DarknetBottleneck(BaseModule): @@ -32,8 +31,6 @@ class DarknetBottleneck(BaseModule): Defaults to True. use_depthwise (bool): Whether to use depthwise separable convolution. Defaults to False. - conv_cfg (dict): Config dict for convolution layer. Defaults to None, - which means using conv2d. norm_cfg (dict): Config dict for normalization layer. Defaults to dict(type='BN'). act_cfg (dict): Config dict for activation layer. @@ -47,7 +44,6 @@ def __init__( expansion: float = 0.5, add_identity: bool = True, use_depthwise: bool = False, - conv_cfg: dict | None = None, norm_cfg: dict | None = None, act_cfg: dict | None = None, init_cfg: dict | list[dict] | None = None, @@ -61,15 +57,20 @@ def __init__( super().__init__(init_cfg=init_cfg) hidden_channels = int(out_channels * expansion) - conv = DepthwiseSeparableConvModule if use_depthwise else ConvModule - self.conv1 = ConvModule(in_channels, hidden_channels, 1, conv_cfg=conv_cfg, norm_cfg=norm_cfg, act_cfg=act_cfg) + conv = DepthwiseSeparableConvModule if use_depthwise else Conv2dModule + self.conv1 = Conv2dModule( + in_channels, + hidden_channels, + 1, + norm_cfg=norm_cfg, + act_cfg=act_cfg, + ) self.conv2 = conv( hidden_channels, out_channels, 3, stride=1, padding=1, - conv_cfg=conv_cfg, norm_cfg=norm_cfg, act_cfg=act_cfg, ) @@ -99,8 +100,6 @@ class CSPNeXtBlock(BaseModule): Defaults to False. kernel_size (int): The kernel size of the second convolution layer. Defaults to 5. - conv_cfg (dict): Config dict for convolution layer. Defaults to None, - which means using conv2d. norm_cfg (dict): Config dict for normalization layer. Defaults to dict(type='BN', momentum=0.03, eps=0.001). act_cfg (dict): Config dict for activation layer. @@ -117,7 +116,6 @@ def __init__( add_identity: bool = True, use_depthwise: bool = False, kernel_size: int = 5, - conv_cfg: dict | None = None, norm_cfg: dict | None = None, act_cfg: dict | None = None, init_cfg: dict | list[dict] | None = None, @@ -131,7 +129,7 @@ def __init__( super().__init__(init_cfg=init_cfg) hidden_channels = int(out_channels * expansion) - conv = DepthwiseSeparableConvModule if use_depthwise else ConvModule + conv = DepthwiseSeparableConvModule if use_depthwise else Conv2dModule self.conv1 = conv(in_channels, hidden_channels, 3, stride=1, padding=1, norm_cfg=norm_cfg, act_cfg=act_cfg) self.conv2 = DepthwiseSeparableConvModule( hidden_channels, @@ -139,7 +137,6 @@ def __init__( kernel_size, stride=1, padding=kernel_size // 2, - conv_cfg=conv_cfg, norm_cfg=norm_cfg, act_cfg=act_cfg, ) @@ -177,8 +174,8 @@ def __init__( super().__init__() self.ch_in = ch_in self.ch_out = ch_out - self.conv1 = ConvModule(ch_in, ch_out, 3, 1, padding=1, act_cfg=None, norm_cfg=norm_cfg) - self.conv2 = ConvModule(ch_in, ch_out, 1, 1, act_cfg=None, norm_cfg=norm_cfg) + self.conv1 = Conv2dModule(ch_in, ch_out, 3, 1, padding=1, act_cfg=None, norm_cfg=norm_cfg) + self.conv2 = Conv2dModule(ch_in, ch_out, 1, 1, act_cfg=None, norm_cfg=norm_cfg) self.act = nn.Identity() if act_cfg is None else build_activation_layer(act_cfg) def forward(self, x: Tensor) -> Tensor: @@ -199,7 +196,7 @@ def _pad_1x1_to_3x3_tensor(self, kernel1x1: Tensor | None) -> Tensor: return 0 return nn.functional.pad(kernel1x1, [1, 1, 1, 1]) - def _fuse_bn_tensor(self, branch: ConvModule) -> tuple[float, float]: + def _fuse_bn_tensor(self, branch: Conv2dModule) -> tuple[float, float]: """Fuse the BN layer to the convolution layer.""" if branch is None or branch.norm_layer is None: return 0, 0 @@ -231,8 +228,6 @@ class CSPLayer(BaseModule): blocks. Defaults to False. channel_attention (bool): Whether to add channel attention in each stage. Defaults to True. - conv_cfg (dict, optional): Config dict for convolution layer. - Defaults to None, which means using conv2d. norm_cfg (dict): Config dict for normalization layer. Defaults to dict(type='BN') act_cfg (dict): Config dict for activation layer. @@ -251,7 +246,6 @@ def __init__( use_depthwise: bool = False, use_cspnext_block: bool = False, channel_attention: bool = False, - conv_cfg: dict | None = None, norm_cfg: dict | None = None, act_cfg: dict | None = None, init_cfg: dict | list[dict] | None = None, @@ -267,20 +261,24 @@ def __init__( block = CSPNeXtBlock if use_cspnext_block else DarknetBottleneck mid_channels = int(out_channels * expand_ratio) self.channel_attention = channel_attention - self.main_conv = ConvModule(in_channels, mid_channels, 1, conv_cfg=conv_cfg, norm_cfg=norm_cfg, act_cfg=act_cfg) - self.short_conv = ConvModule( + self.main_conv = Conv2dModule( in_channels, mid_channels, 1, - conv_cfg=conv_cfg, norm_cfg=norm_cfg, act_cfg=act_cfg, ) - self.final_conv = ConvModule( + self.short_conv = Conv2dModule( + in_channels, + mid_channels, + 1, + norm_cfg=norm_cfg, + act_cfg=act_cfg, + ) + self.final_conv = Conv2dModule( 2 * mid_channels, out_channels, 1, - conv_cfg=conv_cfg, norm_cfg=norm_cfg, act_cfg=act_cfg, ) @@ -293,7 +291,6 @@ def __init__( 1.0, add_identity, use_depthwise, - conv_cfg=conv_cfg, norm_cfg=norm_cfg, act_cfg=act_cfg, ) @@ -329,8 +326,7 @@ class CSPRepLayer(nn.Module): bias (bool): Whether to use bias in the convolution layer. Defaults to False. act_cfg (dict[str, str] | None): Config dict for activation layer. - Defaults to None, which means using the activation config in - conv_cfg. + Defaults to None. norm_cfg (dict[str, str] | None): Config dict for normalization layer. Defaults to None. """ @@ -348,8 +344,8 @@ def __init__( """Initialize CSPRepLayer.""" super().__init__() hidden_channels = int(out_channels * expansion) - self.conv1 = ConvModule(in_channels, hidden_channels, 1, 1, bias=bias, act_cfg=act_cfg, norm_cfg=norm_cfg) - self.conv2 = ConvModule(in_channels, hidden_channels, 1, 1, bias=bias, act_cfg=act_cfg, norm_cfg=norm_cfg) + self.conv1 = Conv2dModule(in_channels, hidden_channels, 1, 1, bias=bias, act_cfg=act_cfg, norm_cfg=norm_cfg) + self.conv2 = Conv2dModule(in_channels, hidden_channels, 1, 1, bias=bias, act_cfg=act_cfg, norm_cfg=norm_cfg) self.bottlenecks = nn.Sequential( *[ RepVggBlock(hidden_channels, hidden_channels, act_cfg=act_cfg, norm_cfg=norm_cfg) @@ -357,7 +353,15 @@ def __init__( ], ) if hidden_channels != out_channels: - self.conv3 = ConvModule(hidden_channels, out_channels, 1, 1, bias=bias, act_cfg=act_cfg, norm_cfg=norm_cfg) + self.conv3 = Conv2dModule( + hidden_channels, + out_channels, + 1, + 1, + bias=bias, + act_cfg=act_cfg, + norm_cfg=norm_cfg, + ) else: self.conv3 = nn.Identity() diff --git a/src/otx/algo/detection/necks/cspnext_pafpn.py b/src/otx/algo/detection/necks/cspnext_pafpn.py index 43d9dce233c..4b10101557d 100644 --- a/src/otx/algo/detection/necks/cspnext_pafpn.py +++ b/src/otx/algo/detection/necks/cspnext_pafpn.py @@ -17,8 +17,7 @@ from otx.algo.detection.layers import CSPLayer from otx.algo.modules.base_module import BaseModule -from otx.algo.modules.conv_module import ConvModule -from otx.algo.modules.depthwise_separable_conv_module import DepthwiseSeparableConvModule +from otx.algo.modules.conv_module import Conv2dModule, DepthwiseSeparableConvModule class CSPNeXtPAFPN(BaseModule): @@ -31,7 +30,6 @@ class CSPNeXtPAFPN(BaseModule): use_depthwise (bool): Whether to use depthwise separable convolution in blocks. Defaults to False. expand_ratio (float): Ratio to adjust the number of channels of the hidden layer. Default: 0.5 upsample_cfg (dict): Config dict for interpolate layer. Default: `dict(scale_factor=2, mode='nearest')` - conv_cfg (dict, optional): Config dict for convolution layer. Default: None, which means using conv2d. norm_cfg (dict): Config dict for normalization layer. Default: dict(type='BN') act_cfg (dict): Config dict for activation layer. Default: dict(type='Swish') init_cfg (dict or list[dict], optional): Initialization config dict. Default: None. @@ -45,7 +43,6 @@ def __init__( use_depthwise: bool = False, expand_ratio: float = 0.5, upsample_cfg: dict | None = None, - conv_cfg: dict | None = None, norm_cfg: dict | None = None, act_cfg: dict | None = None, init_cfg: dict | None = None, @@ -66,7 +63,7 @@ def __init__( self.in_channels = in_channels self.out_channels = out_channels - conv = DepthwiseSeparableConvModule if use_depthwise else ConvModule + conv = DepthwiseSeparableConvModule if use_depthwise else Conv2dModule # build top-down blocks self.upsample = nn.Upsample(**upsample_cfg) @@ -74,11 +71,10 @@ def __init__( self.top_down_blocks = nn.ModuleList() for idx in range(len(in_channels) - 1, 0, -1): self.reduce_layers.append( - ConvModule( + Conv2dModule( in_channels[idx], in_channels[idx - 1], 1, - conv_cfg=conv_cfg, norm_cfg=norm_cfg, act_cfg=act_cfg, ), @@ -92,7 +88,6 @@ def __init__( use_depthwise=use_depthwise, use_cspnext_block=True, expand_ratio=expand_ratio, - conv_cfg=conv_cfg, norm_cfg=norm_cfg, act_cfg=act_cfg, ), @@ -109,7 +104,6 @@ def __init__( 3, stride=2, padding=1, - conv_cfg=conv_cfg, norm_cfg=norm_cfg, act_cfg=act_cfg, ), @@ -123,7 +117,6 @@ def __init__( use_depthwise=use_depthwise, use_cspnext_block=True, expand_ratio=expand_ratio, - conv_cfg=conv_cfg, norm_cfg=norm_cfg, act_cfg=act_cfg, ), @@ -132,7 +125,7 @@ def __init__( self.out_convs = nn.ModuleList() for i in range(len(in_channels)): self.out_convs.append( - conv(in_channels[i], out_channels, 3, padding=1, conv_cfg=conv_cfg, norm_cfg=norm_cfg, act_cfg=act_cfg), + conv(in_channels[i], out_channels, 3, padding=1, norm_cfg=norm_cfg, act_cfg=act_cfg), ) def forward(self, inputs: tuple[Tensor, ...]) -> tuple[Tensor, ...]: diff --git a/src/otx/algo/detection/necks/fpn.py b/src/otx/algo/detection/necks/fpn.py index 8ff11c939e0..1d6c32355e0 100644 --- a/src/otx/algo/detection/necks/fpn.py +++ b/src/otx/algo/detection/necks/fpn.py @@ -13,7 +13,7 @@ from torch import Tensor, nn from otx.algo.modules.base_module import BaseModule -from otx.algo.modules.conv_module import ConvModule +from otx.algo.modules.conv_module import Conv2dModule class FPN(BaseModule): @@ -43,8 +43,6 @@ class FPN(BaseModule): conv. Defaults to False. no_norm_on_lateral (bool): Whether to apply norm on lateral. Defaults to False. - conv_cfg (dict, optional): Config dict for - convolution layer. Defaults to None. norm_cfg (dict, optional): Config dict for normalization layer. Defaults to None. act_cfg (dict, optional): Config dict for @@ -64,7 +62,6 @@ def __init__( add_extra_convs: bool | str = False, relu_before_extra_convs: bool = False, no_norm_on_lateral: bool = False, - conv_cfg: dict | None = None, norm_cfg: dict | None = None, act_cfg: dict | None = None, upsample_cfg: dict | None = None, @@ -101,21 +98,19 @@ def __init__( self.fpn_convs = nn.ModuleList() for i in range(self.start_level, self.backbone_end_level): - l_conv = ConvModule( + l_conv = Conv2dModule( in_channels[i], out_channels, 1, - conv_cfg=conv_cfg, norm_cfg=norm_cfg if not self.no_norm_on_lateral else None, act_cfg=act_cfg, inplace=False, ) - fpn_conv = ConvModule( + fpn_conv = Conv2dModule( out_channels, out_channels, 3, padding=1, - conv_cfg=conv_cfg, norm_cfg=norm_cfg, act_cfg=act_cfg, inplace=False, @@ -132,13 +127,12 @@ def __init__( conv_in_channels = self.in_channels[self.backbone_end_level - 1] else: conv_in_channels = out_channels - extra_fpn_conv = ConvModule( + extra_fpn_conv = Conv2dModule( conv_in_channels, out_channels, 3, stride=2, padding=1, - conv_cfg=conv_cfg, norm_cfg=norm_cfg, act_cfg=act_cfg, inplace=False, diff --git a/src/otx/algo/detection/necks/hybrid_encoder.py b/src/otx/algo/detection/necks/hybrid_encoder.py index e219ab3898f..dc8879ada33 100644 --- a/src/otx/algo/detection/necks/hybrid_encoder.py +++ b/src/otx/algo/detection/necks/hybrid_encoder.py @@ -11,7 +11,7 @@ from torch import nn from otx.algo.detection.layers import CSPRepLayer -from otx.algo.modules import ConvModule, build_activation_layer +from otx.algo.modules import Conv2dModule, build_activation_layer from otx.algo.modules.base_module import BaseModule __all__ = ["HybridEncoder"] @@ -191,7 +191,7 @@ def __init__( self.lateral_convs = nn.ModuleList() self.fpn_blocks = nn.ModuleList() for _ in range(len(in_channels) - 1, 0, -1): - self.lateral_convs.append(ConvModule(hidden_dim, hidden_dim, 1, 1, act_cfg=act_cfg, norm_cfg=norm_cfg)) + self.lateral_convs.append(Conv2dModule(hidden_dim, hidden_dim, 1, 1, act_cfg=act_cfg, norm_cfg=norm_cfg)) self.fpn_blocks.append( CSPRepLayer( hidden_dim * 2, @@ -208,7 +208,7 @@ def __init__( self.pan_blocks = nn.ModuleList() for _ in range(len(in_channels) - 1): self.downsample_convs.append( - ConvModule(hidden_dim, hidden_dim, 3, 2, padding=1, act_cfg=act_cfg, norm_cfg=norm_cfg), + Conv2dModule(hidden_dim, hidden_dim, 3, 2, padding=1, act_cfg=act_cfg, norm_cfg=norm_cfg), ) self.pan_blocks.append( CSPRepLayer( diff --git a/src/otx/algo/detection/necks/yolox_pafpn.py b/src/otx/algo/detection/necks/yolox_pafpn.py index d8789201299..762d6c36852 100644 --- a/src/otx/algo/detection/necks/yolox_pafpn.py +++ b/src/otx/algo/detection/necks/yolox_pafpn.py @@ -16,8 +16,7 @@ from otx.algo.detection.layers import CSPLayer from otx.algo.modules.base_module import BaseModule -from otx.algo.modules.conv_module import ConvModule -from otx.algo.modules.depthwise_separable_conv_module import DepthwiseSeparableConvModule +from otx.algo.modules.conv_module import Conv2dModule, DepthwiseSeparableConvModule class YOLOXPAFPN(BaseModule): @@ -31,8 +30,6 @@ class YOLOXPAFPN(BaseModule): blocks. Default: False upsample_cfg (dict): Config dict for interpolate layer. Default: `dict(scale_factor=2, mode='nearest')` - conv_cfg (dict, optional): Config dict for convolution layer. - Default: None, which means using conv2d. norm_cfg (dict): Config dict for normalization layer. Default: dict(type='BN') act_cfg (dict): Config dict for activation layer. @@ -48,7 +45,6 @@ def __init__( num_csp_blocks: int = 3, use_depthwise: bool = False, upsample_cfg: dict | None = None, - conv_cfg: dict | None = None, norm_cfg: dict | None = None, act_cfg: dict | None = None, init_cfg: dict | list[dict] | None = None, @@ -70,7 +66,7 @@ def __init__( self.in_channels = in_channels self.out_channels = out_channels - conv = DepthwiseSeparableConvModule if use_depthwise else ConvModule + conv = DepthwiseSeparableConvModule if use_depthwise else Conv2dModule # build top-down blocks self.upsample = nn.Upsample(**upsample_cfg) @@ -78,11 +74,10 @@ def __init__( self.top_down_blocks = nn.ModuleList() for idx in range(len(in_channels) - 1, 0, -1): self.reduce_layers.append( - ConvModule( + Conv2dModule( in_channels[idx], in_channels[idx - 1], 1, - conv_cfg=conv_cfg, norm_cfg=norm_cfg, act_cfg=act_cfg, ), @@ -94,7 +89,6 @@ def __init__( num_blocks=num_csp_blocks, add_identity=False, use_depthwise=use_depthwise, - conv_cfg=conv_cfg, norm_cfg=norm_cfg, act_cfg=act_cfg, ), @@ -111,7 +105,6 @@ def __init__( 3, stride=2, padding=1, - conv_cfg=conv_cfg, norm_cfg=norm_cfg, act_cfg=act_cfg, ), @@ -123,7 +116,6 @@ def __init__( num_blocks=num_csp_blocks, add_identity=False, use_depthwise=use_depthwise, - conv_cfg=conv_cfg, norm_cfg=norm_cfg, act_cfg=act_cfg, ), @@ -132,7 +124,7 @@ def __init__( self.out_convs = nn.ModuleList() for i in range(len(in_channels)): self.out_convs.append( - ConvModule(in_channels[i], out_channels, 1, conv_cfg=conv_cfg, norm_cfg=norm_cfg, act_cfg=act_cfg), + Conv2dModule(in_channels[i], out_channels, 1, norm_cfg=norm_cfg, act_cfg=act_cfg), ) def forward(self, inputs: tuple[Tensor]) -> tuple[Any, ...]: diff --git a/src/otx/algo/instance_segmentation/backbones/swin.py b/src/otx/algo/instance_segmentation/backbones/swin.py index 06ea9067424..4eb85af6362 100644 --- a/src/otx/algo/instance_segmentation/backbones/swin.py +++ b/src/otx/algo/instance_segmentation/backbones/swin.py @@ -612,7 +612,6 @@ def __init__( self.patch_embed = PatchEmbed( in_channels=in_channels, embed_dims=embed_dims, - conv_type="Conv2d", kernel_size=patch_size, stride=strides[0], norm_cfg=norm_cfg if patch_norm else None, diff --git a/src/otx/algo/instance_segmentation/heads/convfc_bbox_head.py b/src/otx/algo/instance_segmentation/heads/convfc_bbox_head.py index 8331f08c31d..112b49847e8 100644 --- a/src/otx/algo/instance_segmentation/heads/convfc_bbox_head.py +++ b/src/otx/algo/instance_segmentation/heads/convfc_bbox_head.py @@ -33,7 +33,6 @@ def __init__( num_reg_fcs: int = 0, conv_out_channels: int = 256, fc_out_channels: int = 1024, - conv_cfg: dict | None = None, norm_cfg: dict | None = None, init_cfg: dict | None = None, *args, @@ -63,7 +62,6 @@ def __init__( self.num_reg_fcs = num_reg_fcs self.conv_out_channels = conv_out_channels self.fc_out_channels = fc_out_channels - self.conv_cfg = conv_cfg self.norm_cfg = norm_cfg # add shared convs and fcs diff --git a/src/otx/algo/instance_segmentation/heads/fcn_mask_head.py b/src/otx/algo/instance_segmentation/heads/fcn_mask_head.py index cda59b4c349..06ad192cd4a 100644 --- a/src/otx/algo/instance_segmentation/heads/fcn_mask_head.py +++ b/src/otx/algo/instance_segmentation/heads/fcn_mask_head.py @@ -21,8 +21,7 @@ from otx.algo.instance_segmentation.utils.structures.mask import mask_target from otx.algo.instance_segmentation.utils.utils import empty_instances from otx.algo.modules.base_module import BaseModule, ModuleList -from otx.algo.modules.conv import build_conv_layer -from otx.algo.modules.conv_module import ConvModule +from otx.algo.modules.conv_module import Conv2dModule BYTES_PER_FLOAT = 4 # determine it based on available resources. @@ -46,7 +45,6 @@ def __init__( conv_out_channels: int = 256, num_classes: int = 80, class_agnostic: int = False, - conv_cfg: dict | None = None, norm_cfg: dict | None = None, init_cfg: dict | list[dict] | None = None, ) -> None: @@ -63,9 +61,7 @@ def __init__( self.conv_out_channels = conv_out_channels self.num_classes = num_classes self.class_agnostic = class_agnostic - self.conv_cfg = conv_cfg self.norm_cfg = norm_cfg - self.predictor_cfg = {"type": "Conv"} self.loss_mask = loss_mask @@ -74,12 +70,11 @@ def __init__( in_channels = self.in_channels if i == 0 else self.conv_out_channels padding = (self.conv_kernel_size - 1) // 2 self.convs.append( - ConvModule( + Conv2dModule( in_channels, self.conv_out_channels, self.conv_kernel_size, padding=padding, - conv_cfg=conv_cfg, norm_cfg=norm_cfg, ), ) @@ -95,7 +90,7 @@ def __init__( self.upsample = nn.ConvTranspose2d(**upsample_cfg) out_channels = 1 if self.class_agnostic else self.num_classes logits_in_channel = self.conv_out_channels - self.conv_logits = build_conv_layer(self.predictor_cfg, logits_in_channel, out_channels, 1) + self.conv_logits = nn.Conv2d(logits_in_channel, out_channels, 1) self.relu = nn.ReLU(inplace=True) self.debug_imgs = None diff --git a/src/otx/algo/instance_segmentation/heads/rpn_head.py b/src/otx/algo/instance_segmentation/heads/rpn_head.py index ab21d6e02c8..a785371fa88 100644 --- a/src/otx/algo/instance_segmentation/heads/rpn_head.py +++ b/src/otx/algo/instance_segmentation/heads/rpn_head.py @@ -20,7 +20,7 @@ from otx.algo.detection.heads.anchor_head import AnchorHead from otx.algo.instance_segmentation.utils.structures.bbox import empty_box_as, get_box_wh from otx.algo.instance_segmentation.utils.utils import unpack_inst_seg_entity -from otx.algo.modules.conv_module import ConvModule +from otx.algo.modules.conv_module import Conv2dModule from otx.algo.utils.mmengine_utils import InstanceData from otx.core.data.entity.base import OTXBatchDataEntity from otx.core.data.entity.instance_segmentation import InstanceSegBatchDataEntity @@ -71,7 +71,7 @@ def _init_layers(self) -> None: # use ``inplace=False`` to avoid error: one of the variables # needed for gradient computation has been modified by an # inplace operation. - rpn_convs.append(ConvModule(in_channels, self.feat_channels, 3, padding=1, inplace=False)) + rpn_convs.append(Conv2dModule(in_channels, self.feat_channels, 3, padding=1, inplace=False)) self.rpn_conv = nn.Sequential(*rpn_convs) else: self.rpn_conv = nn.Conv2d(self.in_channels, self.feat_channels, 3, padding=1) diff --git a/src/otx/algo/instance_segmentation/heads/rtmdet_ins_head.py b/src/otx/algo/instance_segmentation/heads/rtmdet_ins_head.py index 84f65adee35..9d46627a43b 100644 --- a/src/otx/algo/instance_segmentation/heads/rtmdet_ins_head.py +++ b/src/otx/algo/instance_segmentation/heads/rtmdet_ins_head.py @@ -31,7 +31,7 @@ from otx.algo.instance_segmentation.utils.structures.bbox.transforms import get_box_wh, scale_boxes from otx.algo.instance_segmentation.utils.utils import unpack_inst_seg_entity from otx.algo.modules.base_module import BaseModule -from otx.algo.modules.conv_module import ConvModule +from otx.algo.modules.conv_module import Conv2dModule from otx.algo.modules.norm import is_norm from otx.algo.utils.mmengine_utils import InstanceData from otx.algo.utils.weight_init import bias_init_with_prob, constant_init, normal_init @@ -102,13 +102,12 @@ def _init_layers(self) -> None: for i in range(self.stacked_convs): chn = self.in_channels if i == 0 else self.feat_channels self.kernel_convs.append( - ConvModule( + Conv2dModule( chn, self.feat_channels, 3, stride=1, padding=1, - conv_cfg=self.conv_cfg, norm_cfg=self.norm_cfg, act_cfg=self.act_cfg, ), @@ -739,7 +738,7 @@ def __init__( convs = [] for i in range(stacked_convs): in_c = in_channels if i == 0 else feat_channels - convs.append(ConvModule(in_c, feat_channels, 3, padding=1, act_cfg=act_cfg, norm_cfg=norm_cfg)) + convs.append(Conv2dModule(in_c, feat_channels, 3, padding=1, act_cfg=act_cfg, norm_cfg=norm_cfg)) self.stacked_convs = nn.Sequential(*convs) self.projection = nn.Conv2d(feat_channels, num_prototypes, kernel_size=1) @@ -843,37 +842,34 @@ def _init_layers(self) -> None: for i in range(self.stacked_convs): chn = self.in_channels if i == 0 else self.feat_channels cls_convs.append( - ConvModule( + Conv2dModule( chn, self.feat_channels, 3, stride=1, padding=1, - conv_cfg=self.conv_cfg, norm_cfg=self.norm_cfg, act_cfg=self.act_cfg, ), ) reg_convs.append( - ConvModule( + Conv2dModule( chn, self.feat_channels, 3, stride=1, padding=1, - conv_cfg=self.conv_cfg, norm_cfg=self.norm_cfg, act_cfg=self.act_cfg, ), ) kernel_convs.append( - ConvModule( + Conv2dModule( chn, self.feat_channels, 3, stride=1, padding=1, - conv_cfg=self.conv_cfg, norm_cfg=self.norm_cfg, act_cfg=self.act_cfg, ), diff --git a/src/otx/algo/instance_segmentation/layers/transformer.py b/src/otx/algo/instance_segmentation/layers/transformer.py index b619d1055de..51e62bba734 100644 --- a/src/otx/algo/instance_segmentation/layers/transformer.py +++ b/src/otx/algo/instance_segmentation/layers/transformer.py @@ -17,7 +17,6 @@ from torch import nn from otx.algo.modules.base_module import BaseModule -from otx.algo.modules.conv import build_conv_layer from otx.algo.modules.norm import build_norm_layer @@ -91,11 +90,11 @@ class PatchEmbed(BaseModule): We use a conv layer to implement PatchEmbed. + TODO (sungchul): it is duplicated with otx.algo.modules.transformer.PatchEmbed + Args: in_channels (int): The num of input channels. Default: 3 embed_dims (int): The dimensions of embedding. Default: 768 - conv_type (str): The config dict for embedding - conv layer type selection. Default: "Conv2d. kernel_size (int): The kernel_size of embedding conv. Default: 16. stride (int): The slide stride of embedding conv. Default: None (Would be set as `kernel_size`). @@ -115,7 +114,6 @@ def __init__( self, in_channels: int = 3, embed_dims: int = 768, - conv_type: str = "Conv2d", kernel_size: int = 16, stride: int = 16, padding: int | tuple | str = "corner", @@ -148,8 +146,7 @@ def __init__( self.adap_padding = None padding = to_2tuple(padding) - self.projection = build_conv_layer( - {"type": conv_type}, + self.projection = nn.Conv2d( in_channels=in_channels, out_channels=embed_dims, kernel_size=kernel_size, diff --git a/src/otx/algo/instance_segmentation/necks/fpn.py b/src/otx/algo/instance_segmentation/necks/fpn.py index 0c0d2b21dcb..67286814f89 100644 --- a/src/otx/algo/instance_segmentation/necks/fpn.py +++ b/src/otx/algo/instance_segmentation/necks/fpn.py @@ -12,7 +12,7 @@ from torch import Tensor, nn from otx.algo.modules.base_module import BaseModule -from otx.algo.modules.conv_module import ConvModule +from otx.algo.modules.conv_module import Conv2dModule class FPN(BaseModule): @@ -34,8 +34,6 @@ class FPN(BaseModule): conv. Defaults to False. no_norm_on_lateral (bool): Whether to apply norm on lateral. Defaults to False. - conv_cfg (dict, optional): Config dict for - convolution layer. Defaults to None. norm_cfg (dict, optional): Config dict for normalization layer. Defaults to None. act_cfg (dict, optional): Config dict for @@ -54,7 +52,6 @@ def __init__( end_level: int = -1, relu_before_extra_convs: bool = False, no_norm_on_lateral: bool = False, - conv_cfg: dict | None = None, norm_cfg: dict | None = None, act_cfg: dict | None = None, upsample_cfg: dict | None = None, @@ -95,21 +92,19 @@ def __init__( self.fpn_convs = nn.ModuleList() for i in range(self.start_level, self.backbone_end_level): - l_conv = ConvModule( + l_conv = Conv2dModule( in_channels[i], out_channels, 1, - conv_cfg=conv_cfg, norm_cfg=norm_cfg if not self.no_norm_on_lateral else None, act_cfg=act_cfg, inplace=False, ) - fpn_conv = ConvModule( + fpn_conv = Conv2dModule( out_channels, out_channels, 3, padding=1, - conv_cfg=conv_cfg, norm_cfg=norm_cfg, act_cfg=act_cfg, inplace=False, diff --git a/src/otx/algo/modules/__init__.py b/src/otx/algo/modules/__init__.py index c7de18df440..605f47c67e0 100644 --- a/src/otx/algo/modules/__init__.py +++ b/src/otx/algo/modules/__init__.py @@ -5,18 +5,16 @@ """This module implementation is a code implementation copied or replaced from mmcv.cnn.bricks.""" from .activation import build_activation_layer -from .conv import build_conv_layer -from .conv_module import ConvModule -from .depthwise_separable_conv_module import DepthwiseSeparableConvModule +from .conv_module import Conv2dModule, Conv3dModule, DepthwiseSeparableConvModule from .norm import FrozenBatchNorm2d, build_norm_layer from .padding import build_padding_layer __all__ = [ "build_activation_layer", - "build_conv_layer", "build_padding_layer", "build_norm_layer", - "ConvModule", + "Conv2dModule", + "Conv3dModule", "DepthwiseSeparableConvModule", "FrozenBatchNorm2d", ] diff --git a/src/otx/algo/modules/conv.py b/src/otx/algo/modules/conv.py deleted file mode 100644 index a696686bb64..00000000000 --- a/src/otx/algo/modules/conv.py +++ /dev/null @@ -1,54 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 -# Copyright (c) OpenMMLab. All rights reserved. - -"""This implementation replaces the functionality of mmcv.cnn.bricks.conv.build_conv_layer.""" - -from __future__ import annotations - -import inspect - -from torch import nn - -CONV_DICT = { - "Conv1d": nn.Conv1d, - "Conv2d": nn.Conv2d, - "Conv3d": nn.Conv3d, - "Conv": nn.Conv2d, -} - - -def build_conv_layer(cfg: dict | None, *args, **kwargs) -> nn.Module: - """Build convolution layer. - - Args: - cfg (None or dict): The conv layer config, which should contain: - - type (str): Layer type. - - layer args: Args needed to instantiate an conv layer. - args (argument list): Arguments passed to the `__init__` - method of the corresponding conv layer. - kwargs (keyword arguments): Keyword arguments passed to the `__init__` - method of the corresponding conv layer. - - Returns: - nn.Module: Created conv layer. - """ - if cfg is None: - cfg_ = {"type": "Conv2d"} - else: - if not isinstance(cfg, dict): - msg = "cfg must be a dict" - raise TypeError(msg) - if "type" not in cfg: - msg = 'the cfg dict must contain the key "type"' - raise KeyError(msg) - cfg_ = cfg.copy() - - layer_type = cfg_.pop("type") - if inspect.isclass(layer_type): - return layer_type(*args, **kwargs, **cfg_) - conv_layer = CONV_DICT.get(layer_type) - if conv_layer is None: - msg = f"Cannot find {conv_layer} in {CONV_DICT.keys()}" - raise KeyError(msg) - return conv_layer(*args, **kwargs, **cfg_) diff --git a/src/otx/algo/modules/conv_module.py b/src/otx/algo/modules/conv_module.py index ae8de22a545..8fa9d6764d7 100644 --- a/src/otx/algo/modules/conv_module.py +++ b/src/otx/algo/modules/conv_module.py @@ -3,23 +3,21 @@ # Copyright (c) OpenMMLab. All rights reserved. """This implementation copied ConvModule of mmcv.cnn.bricks.ConvModule.""" + # TODO(someone): Revisit mypy errors after deprecation of mmlab -# mypy: ignore-errors + from __future__ import annotations import warnings -from functools import partial from typing import TYPE_CHECKING -import torch -from torch import nn +from torch import Tensor, nn from torch.nn.modules.batchnorm import _BatchNorm as BatchNorm from torch.nn.modules.instancenorm import _InstanceNorm as InstanceNorm from otx.algo.utils.weight_init import constant_init, kaiming_init from .activation import build_activation_layer -from .conv import build_conv_layer from .norm import build_norm_layer from .padding import build_padding_layer @@ -27,49 +25,12 @@ from torch.nn.modules.conv import _ConvNd as ConvNd -def efficient_conv_bn_eval_forward(bn: BatchNorm, conv: ConvNd, x: torch.Tensor) -> torch.Tensor: - """Implementation based on https://arxiv.org/abs/2305.11624. - - "Tune-Mode ConvBN Blocks For Efficient Transfer Learning" - It leverages the associative law between convolution and affine transform, - i.e., normalize (weight conv feature) = (normalize weight) conv feature. - It works for Eval mode of ConvBN blocks during validation, and can be used - for training as well. It reduces memory and computation cost. - - Args: - bn (_BatchNorm): a BatchNorm module. - conv (nn._ConvNd): a conv module - x (torch.Tensor): Input feature map. - """ - # These lines of code are designed to deal with various cases - # like bn without affine transform, and conv without bias - weight_on_the_fly = conv.weight - bias_on_the_fly = conv.bias if conv.bias is not None else torch.zeros_like(bn.running_var) - - bn_weight = bn.weight if bn.weight is not None else torch.ones_like(bn.running_var) - - bn_bias = bn.bias if bn.bias is not None else torch.zeros_like(bn.running_var) - - # shape of [C_out, 1, 1, 1] in Conv2d - weight_coeff = torch.rsqrt(bn.running_var + bn.eps).reshape([-1] + [1] * (len(conv.weight.shape) - 1)) - # shape of [C_out, 1, 1, 1] in Conv2d - coefff_on_the_fly = bn_weight.view_as(weight_coeff) * weight_coeff - - # shape of [C_out, C_in, k, k] in Conv2d - weight_on_the_fly = weight_on_the_fly * coefff_on_the_fly - # shape of [C_out] in Conv2d - bias_on_the_fly = bn_bias + coefff_on_the_fly.flatten() * (bias_on_the_fly - bn.running_mean) - - return conv._conv_forward(x, weight_on_the_fly, bias_on_the_fly) # noqa: SLF001 - - class ConvModule(nn.Module): """A conv block that bundles conv/norm/activation layers. This block simplifies the usage of convolution layers, which are commonly used with a norm layer (e.g., BatchNorm) and activation layer (e.g., ReLU). - It is based upon three build methods: `build_conv_layer()`, - `build_norm_layer()` and `build_activation_layer()`. + It is based upon two build methods: `build_norm_layer()` and `build_activation_layer()`. Besides, we add some additional features in this module. 1. Automatically set `bias` of the conv layer. @@ -95,8 +56,6 @@ class ConvModule(nn.Module): bias (bool | str): If specified as `auto`, it will be decided by the norm_cfg. Bias will be set as True if `norm_cfg` is None, otherwise False. Default: "auto". - conv_cfg (dict): Config dict for convolution layer. Default: None, - which means using conv2d. norm_cfg (dict): Config dict for normalization layer. Default: None. act_cfg (dict): Config dict for activation layer. Default: dict(type='ReLU'). @@ -109,16 +68,10 @@ class ConvModule(nn.Module): instead. Currently, we support ['zeros', 'circular'] with official implementation and ['reflect'] with our own implementation. Default: 'zeros'. - order (tuple[str]): The order of conv/norm/activation layers. It is a - sequence of "conv", "norm" and "act". Common examples are - ("conv", "norm", "act") and ("act", "conv", "norm"). - Default: ('conv', 'norm', 'act'). - efficient_conv_bn_eval (bool): Whether use efficient conv when the - consecutive bn is in eval mode (either training or testing), as - proposed in https://arxiv.org/abs/2305.11624 . Default: `False`. """ _abbr_ = "conv_block" + _conv_nd: ConvNd def __init__( self, @@ -130,29 +83,20 @@ def __init__( dilation: int | tuple[int, int] = 1, groups: int = 1, bias: bool | str = "auto", - conv_cfg: dict | None = None, norm_cfg: dict | None = None, act_cfg: dict | None = {"type": "ReLU"}, # noqa: B006 inplace: bool = True, with_spectral_norm: bool = False, padding_mode: str = "zeros", - order: tuple = ("conv", "norm", "act"), - efficient_conv_bn_eval: bool = False, ): super().__init__() - assert conv_cfg is None or isinstance(conv_cfg, dict) # noqa: S101 assert norm_cfg is None or isinstance(norm_cfg, dict) # noqa: S101 official_padding_mode = ["zeros", "circular"] - self.conv_cfg = conv_cfg self.norm_cfg = norm_cfg self.act_cfg = act_cfg self.inplace = inplace self.with_spectral_norm = with_spectral_norm self.with_explicit_padding = padding_mode not in official_padding_mode - self.order = order - assert isinstance(self.order, tuple) # noqa: S101 - assert len(self.order) == 3 # noqa: S101 - assert set(order) == {"conv", "norm", "act"} # noqa: S101 self.with_norm = norm_cfg is not None self.with_activation = act_cfg is not None @@ -168,8 +112,7 @@ def __init__( # reset padding to 0 for conv module conv_padding = 0 if self.with_explicit_padding else padding # build convolution layer - self.conv = build_conv_layer( - conv_cfg, + self.conv = self._conv_nd( in_channels, out_channels, kernel_size, @@ -196,19 +139,17 @@ def __init__( # build normalization layers if self.with_norm: # norm layer is after conv layer - norm_channels = out_channels if order.index("norm") > order.index("conv") else in_channels - self.norm_name, norm = build_norm_layer(norm_cfg, norm_channels) + norm_channels = out_channels + self.norm_name, norm = build_norm_layer(norm_cfg, norm_channels) # type: ignore[arg-type] self.add_module(self.norm_name, norm) if self.with_bias and isinstance(norm, (BatchNorm, InstanceNorm)): warnings.warn("Unnecessary conv bias before batch/instance norm", stacklevel=1) else: - self.norm_name = None - - self.turn_on_efficient_conv_bn_eval(efficient_conv_bn_eval) + self.norm_name = None # type: ignore[assignment] # build activation layer if self.with_activation: - act_cfg_ = act_cfg.copy() + act_cfg_ = act_cfg.copy() # type: ignore[union-attr] # nn.Tanh has no 'inplace' argument if act_cfg_["type"] not in [ "Tanh", @@ -248,9 +189,9 @@ def init_weights(self) -> None: # Note: For PyTorch's conv layers, they will be overwritten by our # initialization implementation using default ``kaiming_init``. if not hasattr(self.conv, "init_weights"): - if self.with_activation and self.act_cfg["type"] == "LeakyReLU": + if self.with_activation and self.act_cfg["type"] == "LeakyReLU": # type: ignore[index] nonlinearity = "leaky_relu" - a = self.act_cfg.get("negative_slope", 0.01) + a = self.act_cfg.get("negative_slope", 0.01) # type: ignore[union-attr] else: nonlinearity = "relu" a = 0 @@ -258,106 +199,135 @@ def init_weights(self) -> None: if self.with_norm: constant_init(self.norm_layer, 1, bias=0) - def forward(self, x: torch.Tensor, activate: bool = True, norm: bool = True) -> torch.Tensor: + def forward(self, x: Tensor, activate: bool = True, norm: bool = True) -> Tensor: """Forward pass of the ConvModule. Args: - x (torch.Tensor): Input tensor. + x (Tensor): Input tensor. activate (bool, optional): Whether to apply activation. Defaults to True. norm (bool, optional): Whether to apply normalization. Defaults to True. Returns: - torch.Tensor: Output tensor. + Tensor: Output tensor. """ - layer_index = 0 - while layer_index < len(self.order): - layer = self.order[layer_index] - if layer == "conv": - if self.with_explicit_padding: - x = self.padding_layer(x) - # if the next operation is norm and we have a norm layer in - # eval mode and we have enabled `efficient_conv_bn_eval` for - # the conv operator, then activate the optimized forward and - # skip the next norm operator since it has been fused - if ( - layer_index + 1 < len(self.order) - and self.order[layer_index + 1] == "norm" - and norm - and self.with_norm - and not self.norm_layer.training - and self.efficient_conv_bn_eval_forward is not None - ): - self.conv.forward = partial(self.efficient_conv_bn_eval_forward, self.norm_layer, self.conv) - layer_index += 1 - x = self.conv(x) - del self.conv.forward - else: - x = self.conv(x) - elif layer == "norm" and norm and self.with_norm: - x = self.norm_layer(x) - elif layer == "act" and activate and self.with_activation: - x = self.activate(x) - layer_index += 1 + if self.with_explicit_padding: + x = self.padding_layer(x) + x = self.conv(x) + if norm and self.with_norm: + x = self.norm_layer(x) # type: ignore[misc] + if activate and self.with_activation: + x = self.activate(x) return x - def turn_on_efficient_conv_bn_eval(self, efficient_conv_bn_eval: bool = True) -> None: - """Turn on the efficient convolution batch normalization evaluation. - Args: - efficient_conv_bn_eval (bool, optional): Whether to enable efficient convolution - batch normalization evaluation. Defaults to True. - """ - # efficient_conv_bn_eval works for conv + bn - # with `track_running_stats` option - if ( - efficient_conv_bn_eval - and self.norm_layer - and isinstance(self.norm_layer, BatchNorm) - and self.norm_layer.track_running_stats - ): - self.efficient_conv_bn_eval_forward = efficient_conv_bn_eval_forward - else: - self.efficient_conv_bn_eval_forward = None - - @staticmethod - def create_from_conv_bn( - conv: ConvNd, - bn: BatchNorm, - efficient_conv_bn_eval: bool = True, - ) -> ConvModule: - """Create a ConvModule from a conv and a bn module.""" - self = ConvModule.__new__(ConvModule) - super(ConvModule, self).__init__() - - self.conv_cfg = None - self.norm_cfg = None - self.act_cfg = None - self.inplace = False - self.with_spectral_norm = False - self.with_explicit_padding = False - self.order = ("conv", "norm", "act") - - self.with_norm = True - self.with_activation = False - self.with_bias = conv.bias is not None +class DepthwiseSeparableConvModule(nn.Module): + """Depthwise separable convolution module. - # build convolution layer - self.conv = conv - # export the attributes of self.conv to a higher level for convenience - self.in_channels = self.conv.in_channels - self.out_channels = self.conv.out_channels - self.kernel_size = self.conv.kernel_size - self.stride = self.conv.stride - self.padding = self.conv.padding - self.dilation = self.conv.dilation - self.transposed = self.conv.transposed - self.output_padding = self.conv.output_padding - self.groups = self.conv.groups + See https://arxiv.org/pdf/1704.04861.pdf for details. + + This module can replace a ConvModule with the conv block replaced by two + conv block: depthwise conv block and pointwise conv block. The depthwise + conv block contains depthwise-conv/norm/activation layers. The pointwise + conv block contains pointwise-conv/norm/activation layers. It should be + noted that there will be norm/activation layer in the depthwise conv block + if `norm_cfg` and `act_cfg` are specified. + + Args: + in_channels (int): Number of channels in the input feature map. + Same as that in ``nn._ConvNd``. + out_channels (int): Number of channels produced by the convolution. + Same as that in ``nn._ConvNd``. + kernel_size (int | tuple[int]): Size of the convolving kernel. + Same as that in ``nn._ConvNd``. + stride (int | tuple[int]): Stride of the convolution. + Same as that in ``nn._ConvNd``. Default: 1. + padding (int | tuple[int]): Zero-padding added to both sides of + the input. Same as that in ``nn._ConvNd``. Default: 0. + dilation (int | tuple[int]): Spacing between kernel elements. + Same as that in ``nn._ConvNd``. Default: 1. + norm_cfg (dict): Default norm config for both depthwise ConvModule and + pointwise ConvModule. Default: None. + act_cfg (dict): Default activation config for both depthwise ConvModule + and pointwise ConvModule. Default: dict(type='ReLU'). + dw_norm_cfg (dict): Norm config of depthwise ConvModule. If it is + None, it will be the same as `norm_cfg`. Default: None. + dw_act_cfg (dict): Activation config of depthwise ConvModule. If it is + None, it will be the same as `act_cfg`. Default: None. + pw_norm_cfg (dict): Norm config of pointwise ConvModule. If it is + None, it will be the same as `norm_cfg`. Default: None. + pw_act_cfg (dict): Activation config of pointwise ConvModule. If it is + None, it will be the same as `act_cfg`. Default: None. + kwargs (optional): Other shared arguments for depthwise and pointwise + ConvModule. See ConvModule for ref. + """ + + def __init__( + self, + in_channels: int, + out_channels: int, + kernel_size: int | tuple[int, int], + stride: int | tuple[int, int] = 1, + padding: int | tuple[int, int] = 0, + dilation: int | tuple[int, int] = 1, + norm_cfg: dict | None = None, + act_cfg: dict | None = None, + dw_norm_cfg: dict | None = None, + dw_act_cfg: dict | None = None, + pw_norm_cfg: dict | None = None, + pw_act_cfg: dict | None = None, + **kwargs, + ): + if act_cfg is None: + act_cfg = {"type": "ReLU"} + + super().__init__() + if "groups" in kwargs: + msg = "groups should not be specified in DepthwiseSeparableConvModule." + raise ValueError(msg) + + # if norm/activation config of depthwise/pointwise Conv2dModule is not + # specified, use default config. + dw_norm_cfg = dw_norm_cfg or norm_cfg + dw_act_cfg = dw_act_cfg or act_cfg + pw_norm_cfg = pw_norm_cfg or norm_cfg + pw_act_cfg = pw_act_cfg or act_cfg + + # depthwise convolution + self.depthwise_conv = Conv2dModule( + in_channels, + in_channels, + kernel_size, + stride=stride, + padding=padding, + dilation=dilation, + groups=in_channels, + norm_cfg=dw_norm_cfg, + act_cfg=dw_act_cfg, + **kwargs, + ) + + self.pointwise_conv = Conv2dModule( + in_channels, + out_channels, + 1, + norm_cfg=pw_norm_cfg, + act_cfg=pw_act_cfg, + **kwargs, + ) + + def forward(self, x: Tensor) -> Tensor: + """Forward.""" + x = self.depthwise_conv(x) + return self.pointwise_conv(x) + + +class Conv2dModule(ConvModule): + """A conv2d block that bundles conv/norm/activation layers.""" + + _conv_nd = nn.Conv2d - # build normalization layers - self.norm_name, norm = "bn", bn - self.add_module(self.norm_name, norm) - self.turn_on_efficient_conv_bn_eval(efficient_conv_bn_eval) +class Conv3dModule(ConvModule): + """A conv3d block that bundles conv/norm/activation layers.""" - return self + _conv_nd = nn.Conv3d diff --git a/src/otx/algo/modules/depthwise_separable_conv_module.py b/src/otx/algo/modules/depthwise_separable_conv_module.py deleted file mode 100644 index f04db1cdf09..00000000000 --- a/src/otx/algo/modules/depthwise_separable_conv_module.py +++ /dev/null @@ -1,110 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 -# Copyright (c) OpenMMLab. All rights reserved. - -"""This implementation of DepthwiseSeparableConvModule copied from mmcv.cnn.bricks.depthwise_separable_conv_module.""" - -from __future__ import annotations - -from torch import Tensor, nn - -from .conv_module import ConvModule - - -class DepthwiseSeparableConvModule(nn.Module): - """Depthwise separable convolution module. - - See https://arxiv.org/pdf/1704.04861.pdf for details. - - This module can replace a ConvModule with the conv block replaced by two - conv block: depthwise conv block and pointwise conv block. The depthwise - conv block contains depthwise-conv/norm/activation layers. The pointwise - conv block contains pointwise-conv/norm/activation layers. It should be - noted that there will be norm/activation layer in the depthwise conv block - if `norm_cfg` and `act_cfg` are specified. - - Args: - in_channels (int): Number of channels in the input feature map. - Same as that in ``nn._ConvNd``. - out_channels (int): Number of channels produced by the convolution. - Same as that in ``nn._ConvNd``. - kernel_size (int | tuple[int]): Size of the convolving kernel. - Same as that in ``nn._ConvNd``. - stride (int | tuple[int]): Stride of the convolution. - Same as that in ``nn._ConvNd``. Default: 1. - padding (int | tuple[int]): Zero-padding added to both sides of - the input. Same as that in ``nn._ConvNd``. Default: 0. - dilation (int | tuple[int]): Spacing between kernel elements. - Same as that in ``nn._ConvNd``. Default: 1. - norm_cfg (dict): Default norm config for both depthwise ConvModule and - pointwise ConvModule. Default: None. - act_cfg (dict): Default activation config for both depthwise ConvModule - and pointwise ConvModule. Default: dict(type='ReLU'). - dw_norm_cfg (dict): Norm config of depthwise ConvModule. If it is - None, it will be the same as `norm_cfg`. Default: None. - dw_act_cfg (dict): Activation config of depthwise ConvModule. If it is - None, it will be the same as `act_cfg`. Default: None. - pw_norm_cfg (dict): Norm config of pointwise ConvModule. If it is - None, it will be the same as `norm_cfg`. Default: None. - pw_act_cfg (dict): Activation config of pointwise ConvModule. If it is - None, it will be the same as `act_cfg`. Default: None. - kwargs (optional): Other shared arguments for depthwise and pointwise - ConvModule. See ConvModule for ref. - """ - - def __init__( - self, - in_channels: int, - out_channels: int, - kernel_size: int | tuple[int, int], - stride: int | tuple[int, int] = 1, - padding: int | tuple[int, int] = 0, - dilation: int | tuple[int, int] = 1, - norm_cfg: dict | None = None, - act_cfg: dict | None = None, - dw_norm_cfg: dict | None = None, - dw_act_cfg: dict | None = None, - pw_norm_cfg: dict | None = None, - pw_act_cfg: dict | None = None, - **kwargs, - ): - if act_cfg is None: - act_cfg = {"type": "ReLU"} - - super().__init__() - assert "groups" not in kwargs, "groups should not be specified" # noqa: S101 - - # if norm/activation config of depthwise/pointwise ConvModule is not - # specified, use default config. - dw_norm_cfg = dw_norm_cfg or norm_cfg - dw_act_cfg = dw_act_cfg or act_cfg - pw_norm_cfg = pw_norm_cfg or norm_cfg - pw_act_cfg = pw_act_cfg or act_cfg - - # depthwise convolution - self.depthwise_conv = ConvModule( - in_channels, - in_channels, - kernel_size, - stride=stride, - padding=padding, - dilation=dilation, - groups=in_channels, - norm_cfg=dw_norm_cfg, - act_cfg=dw_act_cfg, - **kwargs, - ) - - self.pointwise_conv = ConvModule( - in_channels, - out_channels, - 1, - norm_cfg=pw_norm_cfg, - act_cfg=pw_act_cfg, - **kwargs, - ) - - def forward(self, x: Tensor) -> Tensor: - """Forward.""" - x = self.depthwise_conv(x) - return self.pointwise_conv(x) diff --git a/src/otx/algo/modules/transformer.py b/src/otx/algo/modules/transformer.py index e37e5e45c1e..46cdd96a943 100644 --- a/src/otx/algo/modules/transformer.py +++ b/src/otx/algo/modules/transformer.py @@ -3,6 +3,7 @@ # Copyright (c) OpenMMLab. All rights reserved. """This implementation replaces the functionality of mmcv.cnn.bricks.transformer.""" + from __future__ import annotations import math @@ -13,7 +14,6 @@ from otx.algo.modules.base_module import BaseModule, Sequential from .activation import build_activation_layer -from .conv import build_conv_layer from .drop import build_dropout from .norm import build_norm_layer @@ -122,11 +122,11 @@ class PatchEmbed(BaseModule): We use a conv layer to implement PatchEmbed. + TODO (sungchul): it is duplicated with otx.algo.instance_segmentation.layers.transformer.PatchEmbed + Args: in_channels (int): The num of input channels. Default: 3 embed_dims (int): The dimensions of embedding. Default: 768 - conv_type (str): The type of convolution - to generate patch embedding. Default: "Conv2d". kernel_size (int): The kernel_size of embedding conv. Default: 16. stride (int): The slide stride of embedding conv. Default: 16. @@ -149,7 +149,6 @@ def __init__( self, in_channels: int = 3, embed_dims: int = 768, - conv_type: str = "Conv2d", kernel_size: int | tuple[int, int] = 16, stride: int | tuple[int, int] = 16, padding: str | int | tuple[int, int] = "corner", @@ -183,8 +182,7 @@ def __init__( self.adaptive_padding = None padding = padding if isinstance(padding, tuple) else (padding, padding) - self.projection = build_conv_layer( - {"type": conv_type}, + self.projection = nn.Conv2d( in_channels=in_channels, out_channels=embed_dims, kernel_size=kernel_size, diff --git a/src/otx/algo/segmentation/backbones/litehrnet.py b/src/otx/algo/segmentation/backbones/litehrnet.py index 7b8ffd450e1..48e359862bd 100644 --- a/src/otx/algo/segmentation/backbones/litehrnet.py +++ b/src/otx/algo/segmentation/backbones/litehrnet.py @@ -1,4 +1,4 @@ -# Copyright (C) 2023 Intel Corporation +# Copyright (C) 2023-2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 # """HRNet network modules for base backbone. @@ -7,7 +7,6 @@ - https://github.com/HRNet/Lite-HRNet """ - from __future__ import annotations from pathlib import Path @@ -17,7 +16,7 @@ from torch import nn from torch.nn import functional -from otx.algo.modules import ConvModule, build_conv_layer, build_norm_layer +from otx.algo.modules import Conv2dModule, build_norm_layer from otx.algo.modules.base_module import BaseModule from otx.algo.segmentation.modules import ( AsymmetricPositionAttentionModule, @@ -37,7 +36,6 @@ def __init__( kernel_size: int = 3, key_ratio: int = 8, value_ratio: int = 8, - conv_cfg: dict | None = None, norm_cfg: dict | None = None, ) -> None: """Neighbour support module. @@ -47,7 +45,6 @@ def __init__( kernel_size (int): Kernel size for convolutional layers. Default is 3. key_ratio (int): Ratio of input channels to key channels. Default is 8. value_ratio (int): Ratio of input channels to value channels. Default is 8. - conv_cfg (dict | None): Config for convolutional layers. Default is None. norm_cfg (dict | None): Config for normalization layers. Default is None. """ super().__init__() @@ -58,54 +55,49 @@ def __init__( self.kernel_size = kernel_size self.key = nn.Sequential( - ConvModule( + Conv2dModule( in_channels=self.in_channels, out_channels=self.key_channels, kernel_size=1, stride=1, - conv_cfg=conv_cfg, norm_cfg=norm_cfg, act_cfg={"type": "ReLU"}, ), - ConvModule( + Conv2dModule( self.key_channels, self.key_channels, kernel_size=self.kernel_size, stride=1, padding=(self.kernel_size - 1) // 2, groups=self.key_channels, - conv_cfg=conv_cfg, norm_cfg=norm_cfg, act_cfg=None, ), - ConvModule( + Conv2dModule( in_channels=self.key_channels, out_channels=self.kernel_size * self.kernel_size, kernel_size=1, stride=1, - conv_cfg=conv_cfg, norm_cfg=norm_cfg, act_cfg=None, ), ) self.value = nn.Sequential( - ConvModule( + Conv2dModule( in_channels=self.in_channels, out_channels=self.value_channels, kernel_size=1, stride=1, - conv_cfg=conv_cfg, norm_cfg=norm_cfg, act_cfg=None, ), nn.Unfold(kernel_size=self.kernel_size, stride=1, padding=1), ) - self.out_conv = ConvModule( + self.out_conv = Conv2dModule( in_channels=self.value_channels, out_channels=self.in_channels, kernel_size=1, stride=1, - conv_cfg=conv_cfg, norm_cfg=norm_cfg, act_cfg=None, ) @@ -131,7 +123,6 @@ def __init__( self, channels: list[int], ratio: int = 16, - conv_cfg: dict | None = None, norm_cfg: dict | None = None, act_cfg: dict | tuple[dict, dict] = ({"type": "ReLU"}, {"type": "Sigmoid"}), ) -> None: @@ -140,7 +131,6 @@ def __init__( Args: channels (list[int]): Number of channels for each stage. ratio (int): Reduction ratio of the bottleneck block. - conv_cfg (dict | None): Config dict for convolution layer. Default: None norm_cfg (dict | None): Config dict for normalization layer. Default: None act_cfg (dict | tuple[dict, dict]): Config dict or a tuple of config dicts for activation layer(s). Default: ({"type": "ReLU"}, {"type": "Sigmoid"}). @@ -156,21 +146,19 @@ def __init__( self.channels = channels total_channel = sum(channels) - self.conv1 = ConvModule( + self.conv1 = Conv2dModule( in_channels=total_channel, out_channels=int(total_channel / ratio), kernel_size=1, stride=1, - conv_cfg=conv_cfg, norm_cfg=norm_cfg, act_cfg=act_cfg[0], ) - self.conv2 = ConvModule( + self.conv2 = Conv2dModule( in_channels=int(total_channel / ratio), out_channels=total_channel, kernel_size=1, stride=1, - conv_cfg=conv_cfg, norm_cfg=norm_cfg, act_cfg=act_cfg[1], ) @@ -195,7 +183,6 @@ def __init__( self, channels: int, ratio: int = 16, - conv_cfg: dict | None = None, norm_cfg: dict | None = None, act_cfg: dict | tuple[dict, dict] = ({"type": "ReLU"}, {"type": "Sigmoid"}), enable_norm: bool = False, @@ -205,8 +192,6 @@ def __init__( Args: channels (int): Number of input channels. ratio (int): Reduction ratio for the bottleneck block. Default: 16. - conv_cfg (dict | None): Configuration dict for convolutional layers. - Default: None. act_cfg (dict | tuple[dict]): Configuration dict or tuple of dicts for activation layers. If a single dict is provided, it will be used for both activation layers. Default: ({"type": "ReLU"}, {"type": "Sigmoid"}). @@ -224,20 +209,18 @@ def __init__( raise ValueError(msg) self.global_avgpool = nn.AdaptiveAvgPool2d(1) - self.conv1 = ConvModule( + self.conv1 = Conv2dModule( in_channels=channels, out_channels=int(channels / ratio), kernel_size=1, stride=1, - conv_cfg=conv_cfg, act_cfg=act_cfg[0], ) - self.conv2 = ConvModule( + self.conv2 = Conv2dModule( in_channels=int(channels / ratio), out_channels=channels, kernel_size=1, stride=1, - conv_cfg=conv_cfg, act_cfg=act_cfg[1], ) @@ -257,7 +240,6 @@ def __init__( self, channels: int, ratio: int = 16, - conv_cfg: dict | None = None, norm_cfg: dict | None = None, enable_norm: bool = False, ) -> None: @@ -266,7 +248,6 @@ def __init__( Args: channels (int): Number of input channels. ratio (int): Reduction ratio of internal channels. - conv_cfg (dict | None): Config dict for convolution layer. norm_cfg (dict | None): Config dict for normalization layer. enable_norm (bool): Whether to enable normalization layers. """ @@ -276,54 +257,49 @@ def __init__( self.internal_channels = int(channels / ratio) # channel-only branch - self.v_channel = ConvModule( + self.v_channel = Conv2dModule( in_channels=self.in_channels, out_channels=self.internal_channels, kernel_size=1, stride=1, bias=False, - conv_cfg=conv_cfg, norm_cfg=norm_cfg if enable_norm else None, act_cfg=None, ) - self.q_channel = ConvModule( + self.q_channel = Conv2dModule( in_channels=self.in_channels, out_channels=1, kernel_size=1, stride=1, bias=False, - conv_cfg=conv_cfg, norm_cfg=norm_cfg if enable_norm else None, act_cfg=None, ) - self.out_channel = ConvModule( + self.out_channel = Conv2dModule( in_channels=self.internal_channels, out_channels=self.in_channels, kernel_size=1, stride=1, - conv_cfg=conv_cfg, norm_cfg=norm_cfg, act_cfg={"type": "Sigmoid"}, ) # spatial-only branch - self.v_spatial = ConvModule( + self.v_spatial = Conv2dModule( in_channels=self.in_channels, out_channels=self.internal_channels, kernel_size=1, stride=1, bias=False, - conv_cfg=conv_cfg, norm_cfg=norm_cfg if enable_norm else None, act_cfg=None, ) - self.q_spatial = ConvModule( + self.q_spatial = Conv2dModule( in_channels=self.in_channels, out_channels=self.internal_channels, kernel_size=1, stride=1, bias=False, - conv_cfg=conv_cfg, norm_cfg=norm_cfg if enable_norm else None, act_cfg=None, ) @@ -392,7 +368,6 @@ def __init__( in_channels: list[int], stride: int, reduce_ratio: int, - conv_cfg: dict | None = None, norm_cfg: dict | None = None, with_cp: bool = False, dropout: float | None = None, @@ -406,7 +381,6 @@ def __init__( in_channels (list[int]): Number of input channels for each input feature map. stride (int): Stride used in the first convolutional layer. reduce_ratio (int): Reduction ratio used in the cross-resolution weighting module. - conv_cfg (dict | None): Dictionary to construct and configure the convolutional layers. norm_cfg (dict | None): Dictionary to construct and configure the normalization layers. with_cp (bool): Whether to use checkpointing to save memory. dropout (float | None): Dropout probability used in the depthwise convolutional layers. @@ -434,19 +408,17 @@ def __init__( self.cross_resolution_weighting = CrossResolutionWeighting( branch_channels, ratio=reduce_ratio, - conv_cfg=conv_cfg, norm_cfg=norm_cfg, ) self.depthwise_convs = nn.ModuleList( [ - ConvModule( + Conv2dModule( channel, channel, kernel_size=dw_ksize, stride=self.stride, padding=dw_ksize // 2, groups=channel, - conv_cfg=conv_cfg, norm_cfg=norm_cfg, act_cfg=None, ) @@ -458,7 +430,6 @@ def __init__( spatial_weighting_module( channels=channel, ratio=4, - conv_cfg=conv_cfg, norm_cfg=norm_cfg, enable_norm=True, ) @@ -475,7 +446,6 @@ def __init__( kernel_size=3, key_ratio=8, value_ratio=4, - conv_cfg=conv_cfg, norm_cfg=norm_cfg, ) for channel in branch_channels @@ -528,7 +498,6 @@ def __init__( stem_channels: int, out_channels: int, expand_ratio: int, - conv_cfg: dict | None = None, norm_cfg: dict | None = None, with_cp: bool = False, strides: tuple[int, int] = (2, 2), @@ -542,7 +511,6 @@ def __init__( stem_channels (int): Number of output channels of the stem layer. out_channels (int): Number of output channels of the backbone network. expand_ratio (int): Expansion ratio of the internal channels. - conv_cfg (dict | None): Dictionary to construct and configure convolution layers. norm_cfg (dict | None): Dictionary to construct and configure normalization layers. with_cp (bool): Use checkpointing to save memory during forward pass. num_stages (int): Number of stages in the backbone network. @@ -568,7 +536,6 @@ def __init__( self.in_channels = in_channels self.out_channels = out_channels - self.conv_cfg = conv_cfg self.norm_cfg = norm_cfg self.with_cp = with_cp @@ -576,26 +543,24 @@ def __init__( if input_norm: self.input_norm = nn.InstanceNorm2d(in_channels) - self.conv1 = ConvModule( + self.conv1 = Conv2dModule( in_channels=in_channels, out_channels=stem_channels, kernel_size=3, stride=strides[0], padding=1, - conv_cfg=self.conv_cfg, norm_cfg=self.norm_cfg, act_cfg={"type": "ReLU"}, ) self.conv2 = None if extra_stride: - self.conv2 = ConvModule( + self.conv2 = Conv2dModule( in_channels=stem_channels, out_channels=stem_channels, kernel_size=3, stride=2, padding=1, - conv_cfg=self.conv_cfg, norm_cfg=self.norm_cfg, act_cfg={"type": "ReLU"}, ) @@ -608,57 +573,52 @@ def __init__( inc_channels = self.out_channels - stem_channels self.branch1 = nn.Sequential( - ConvModule( + Conv2dModule( branch_channels, branch_channels, kernel_size=3, stride=strides[1], padding=1, groups=branch_channels, - conv_cfg=conv_cfg, norm_cfg=norm_cfg, act_cfg=None, ), - ConvModule( + Conv2dModule( branch_channels, inc_channels, kernel_size=1, stride=1, padding=0, - conv_cfg=conv_cfg, norm_cfg=norm_cfg, act_cfg={"type": "ReLU"}, ), ) - self.expand_conv = ConvModule( + self.expand_conv = Conv2dModule( branch_channels, mid_channels, kernel_size=1, stride=1, padding=0, - conv_cfg=conv_cfg, norm_cfg=norm_cfg, act_cfg={"type": "ReLU"}, ) - self.depthwise_conv = ConvModule( + self.depthwise_conv = Conv2dModule( mid_channels, mid_channels, kernel_size=3, stride=strides[1], padding=1, groups=mid_channels, - conv_cfg=conv_cfg, norm_cfg=norm_cfg, act_cfg=None, ) - self.linear_conv = ConvModule( + self.linear_conv = Conv2dModule( mid_channels, branch_channels if stem_channels == self.out_channels else stem_channels, kernel_size=1, stride=1, padding=0, - conv_cfg=conv_cfg, norm_cfg=norm_cfg, act_cfg={"type": "ReLU"}, ) @@ -705,7 +665,6 @@ def __init__( stem_channels: int, out_channels: int, expand_ratio: int, - conv_cfg: dict | None = None, norm_cfg: dict | None = None, with_cp: bool = False, num_stages: int = 1, @@ -720,7 +679,6 @@ def __init__( stem_channels (int): Number of output channels of the stem layer. out_channels (int): Number of output channels of the backbone network. expand_ratio (int): Expansion ratio of the internal channels. - conv_cfg (dict | None): Dictionary to construct and configure convolution layers. norm_cfg (dict | None): Dictionary to construct and configure normalization layers. with_cp (bool): Use checkpointing to save memory during forward pass. num_stages (int): Number of stages in the backbone network. @@ -750,7 +708,6 @@ def __init__( self.in_channels = in_channels self.out_channels = out_channels - self.conv_cfg = conv_cfg self.norm_cfg = norm_cfg self.with_cp = with_cp self.num_stages = num_stages @@ -759,26 +716,24 @@ def __init__( if input_norm: self.input_norm = nn.InstanceNorm2d(in_channels) - self.conv1 = ConvModule( + self.conv1 = Conv2dModule( in_channels=in_channels, out_channels=stem_channels, kernel_size=3, stride=strides[0], padding=1, - conv_cfg=self.conv_cfg, norm_cfg=self.norm_cfg, act_cfg={"type": "ReLU"}, ) self.conv2 = None if extra_stride: - self.conv2 = ConvModule( + self.conv2 = Conv2dModule( in_channels=stem_channels, out_channels=stem_channels, kernel_size=3, stride=2, padding=1, - conv_cfg=self.conv_cfg, norm_cfg=self.norm_cfg, act_cfg={"type": "ReLU"}, ) @@ -791,24 +746,22 @@ def __init__( for stage in range(1, num_stages + 1): self.branch1.append( nn.Sequential( - ConvModule( + Conv2dModule( internal_branch_channels, internal_branch_channels, kernel_size=3, stride=strides[stage], padding=1, groups=internal_branch_channels, - conv_cfg=conv_cfg, norm_cfg=norm_cfg, act_cfg=None, ), - ConvModule( + Conv2dModule( internal_branch_channels, out_branch_channels if stage == num_stages else internal_branch_channels, kernel_size=1, stride=1, padding=0, - conv_cfg=conv_cfg, norm_cfg=norm_cfg, act_cfg={"type": "ReLU"}, ), @@ -817,34 +770,31 @@ def __init__( self.branch2.append( nn.Sequential( - ConvModule( + Conv2dModule( internal_branch_channels, mid_channels, kernel_size=1, stride=1, padding=0, - conv_cfg=conv_cfg, norm_cfg=norm_cfg, act_cfg={"type": "ReLU"}, ), - ConvModule( + Conv2dModule( mid_channels, mid_channels, kernel_size=3, stride=strides[stage], padding=1, groups=mid_channels, - conv_cfg=conv_cfg, norm_cfg=norm_cfg, act_cfg=None, ), - ConvModule( + Conv2dModule( mid_channels, out_branch_channels if stage == num_stages else internal_branch_channels, kernel_size=1, stride=1, padding=0, - conv_cfg=conv_cfg, norm_cfg=norm_cfg, act_cfg={"type": "ReLU"}, ), @@ -893,7 +843,6 @@ def __init__( in_channels: int, out_channels: int, stride: int = 1, - conv_cfg: dict | None = None, norm_cfg: dict | None = None, act_cfg: dict | None = None, with_cp: bool = False, @@ -904,8 +853,6 @@ def __init__( in_channels (int): The input channels of the block. out_channels (int): The output channels of the block. stride (int): Stride of the 3x3 convolution layer. Default: 1 - conv_cfg (dict): Config dict for convolution layer. - Default: None, which means using conv2d. norm_cfg (dict): Config dict for normalization layer. Default: dict(type='BN'). act_cfg (dict): Config dict for activation layer. @@ -935,58 +882,53 @@ def __init__( if self.stride > 1: self.branch1 = nn.Sequential( - ConvModule( + Conv2dModule( in_channels, in_channels, kernel_size=3, stride=self.stride, padding=1, groups=in_channels, - conv_cfg=conv_cfg, norm_cfg=norm_cfg, act_cfg=None, ), - ConvModule( + Conv2dModule( in_channels, branch_features, kernel_size=1, stride=1, padding=0, - conv_cfg=conv_cfg, norm_cfg=norm_cfg, act_cfg=act_cfg, ), ) self.branch2 = nn.Sequential( - ConvModule( + Conv2dModule( in_channels if (self.stride > 1) else branch_features, branch_features, kernel_size=1, stride=1, padding=0, - conv_cfg=conv_cfg, norm_cfg=norm_cfg, act_cfg=act_cfg, ), - ConvModule( + Conv2dModule( branch_features, branch_features, kernel_size=3, stride=self.stride, padding=1, groups=branch_features, - conv_cfg=conv_cfg, norm_cfg=norm_cfg, act_cfg=None, ), - ConvModule( + Conv2dModule( branch_features, branch_features, kernel_size=1, stride=1, padding=0, - conv_cfg=conv_cfg, norm_cfg=norm_cfg, act_cfg=act_cfg, ), @@ -1019,7 +961,6 @@ def __init__( module_type: str, multiscale_output: bool = False, with_fuse: bool = True, - conv_cfg: dict | None = None, norm_cfg: dict | None = None, with_cp: bool = False, dropout: float | None = None, @@ -1036,7 +977,6 @@ def __init__( module_type (str): Type of module to use for the network. Can be "LITE" or "NAIVE". multiscale_output (bool, optional): Whether to output features from all branches. Defaults to False. with_fuse (bool, optional): Whether to use the fuse layer. Defaults to True. - conv_cfg (dict, optional): Configuration for the convolutional layers. Defaults to None. norm_cfg (dict, optional): Configuration for the normalization layers. Defaults to None. with_cp (bool, optional): Whether to use checkpointing. Defaults to False. dropout (float, optional): Dropout rate. Defaults to None. @@ -1056,7 +996,6 @@ def __init__( self.multiscale_output = multiscale_output self.with_fuse = with_fuse self.norm_cfg = norm_cfg - self.conv_cfg = conv_cfg self.with_cp = with_cp self.weighting_module_version = weighting_module_version self.neighbour_weighting = neighbour_weighting @@ -1089,7 +1028,6 @@ def _make_weighting_blocks( self.in_channels, stride=stride, reduce_ratio=reduce_ratio, - conv_cfg=self.conv_cfg, norm_cfg=self.norm_cfg, with_cp=self.with_cp, dropout=dropout, @@ -1108,7 +1046,6 @@ def _make_one_branch(self, branch_index: int, num_blocks: int, stride: int = 1) self.in_channels[branch_index], self.in_channels[branch_index], stride=stride, - conv_cfg=self.conv_cfg, norm_cfg=self.norm_cfg, act_cfg={"type": "ReLU"}, with_cp=self.with_cp, @@ -1118,7 +1055,6 @@ def _make_one_branch(self, branch_index: int, num_blocks: int, stride: int = 1) self.in_channels[branch_index], self.in_channels[branch_index], stride=1, - conv_cfg=self.conv_cfg, norm_cfg=self.norm_cfg, act_cfg={"type": "ReLU"}, with_cp=self.with_cp, @@ -1149,8 +1085,7 @@ def _make_fuse_layers(self) -> nn.ModuleList: if j > i: fuse_layer.append( nn.Sequential( - build_conv_layer( - self.conv_cfg, + nn.Conv2d( in_channels[j], in_channels[i], kernel_size=1, @@ -1169,8 +1104,7 @@ def _make_fuse_layers(self) -> nn.ModuleList: if k == i - j - 1: conv_downsamples.append( nn.Sequential( - build_conv_layer( - self.conv_cfg, + nn.Conv2d( in_channels[j], in_channels[j], kernel_size=3, @@ -1180,8 +1114,7 @@ def _make_fuse_layers(self) -> nn.ModuleList: bias=False, ), build_norm_layer(self.norm_cfg, in_channels[j])[1], - build_conv_layer( - self.conv_cfg, + nn.Conv2d( in_channels[j], in_channels[i], kernel_size=1, @@ -1195,8 +1128,7 @@ def _make_fuse_layers(self) -> nn.ModuleList: else: conv_downsamples.append( nn.Sequential( - build_conv_layer( - self.conv_cfg, + nn.Conv2d( in_channels[j], in_channels[j], kernel_size=3, @@ -1206,8 +1138,7 @@ def _make_fuse_layers(self) -> nn.ModuleList: bias=False, ), build_norm_layer(self.norm_cfg, in_channels[j])[1], - build_conv_layer( - self.conv_cfg, + nn.Conv2d( in_channels[j], in_channels[j], kernel_size=1, @@ -1265,7 +1196,6 @@ class LiteHRNet(BaseModule): Args: extra (dict): detailed configuration for each stage of HRNet. in_channels (int): Number of input image channels. Default: 3. - conv_cfg (dict): dictionary to construct and config conv layer. norm_cfg (dict): dictionary to construct and config norm layer. norm_eval (bool): Whether to set norm layers to eval mode, namely, freeze running stats (mean and var). Note: Effect on Batch Norm @@ -1280,7 +1210,6 @@ def __init__( self, extra: dict, in_channels: int = 3, - conv_cfg: dict | None = None, norm_cfg: dict | None = None, norm_eval: bool = False, with_cp: bool = False, @@ -1294,11 +1223,8 @@ def __init__( if norm_cfg is None: norm_cfg = {"type": "BN"} - if conv_cfg is None: - conv_cfg = {"type": "Conv2d"} self.extra = extra - self.conv_cfg = conv_cfg self.norm_cfg = norm_cfg self.norm_eval = norm_eval self.with_cp = with_cp @@ -1311,7 +1237,6 @@ def __init__( expand_ratio=self.extra["stem"]["expand_ratio"], strides=self.extra["stem"]["strides"], extra_stride=self.extra["stem"]["extra_stride"], - conv_cfg=self.conv_cfg, norm_cfg=self.norm_cfg, ) @@ -1351,13 +1276,12 @@ def __init__( if self.extra["out_modules"]["conv"]["enable"]: out_modules_channels = self.extra["out_modules"]["conv"]["channels"] out_modules.append( - ConvModule( + Conv2dModule( in_channels=in_modules_channels, out_channels=out_modules_channels, kernel_size=1, stride=1, padding=0, - conv_cfg=self.conv_cfg, norm_cfg=self.norm_cfg, act_cfg={"type": "ReLU"}, ), @@ -1370,7 +1294,6 @@ def __init__( key_channels=self.extra["out_modules"]["position_att"]["key_channels"], value_channels=self.extra["out_modules"]["position_att"]["value_channels"], psp_size=self.extra["out_modules"]["position_att"]["psp_size"], - conv_cfg=self.conv_cfg, norm_cfg=self.norm_cfg, ), ) @@ -1378,7 +1301,6 @@ def __init__( out_modules.append( LocalAttentionModule( num_channels=in_modules_channels, - conv_cfg=self.conv_cfg, norm_cfg=self.norm_cfg, ), ) @@ -1390,24 +1312,22 @@ def __init__( self.add_stem_features = self.extra.get("add_stem_features", False) if self.add_stem_features: self.stem_transition = nn.Sequential( - ConvModule( + Conv2dModule( self.stem.out_channels, self.stem.out_channels, kernel_size=3, stride=1, padding=1, groups=self.stem.out_channels, - conv_cfg=conv_cfg, norm_cfg=norm_cfg, act_cfg=None, ), - ConvModule( + Conv2dModule( self.stem.out_channels, num_channels_last[0], kernel_size=1, stride=1, padding=0, - conv_cfg=conv_cfg, norm_cfg=norm_cfg, act_cfg={"type": "ReLU"}, ), @@ -1420,7 +1340,6 @@ def __init__( self.aggregator = IterativeAggregator( in_channels=num_channels_last, min_channels=self.extra["out_aggregator"].get("min_channels", None), - conv_cfg=self.conv_cfg, norm_cfg=self.norm_cfg, ) @@ -1442,8 +1361,7 @@ def _make_transition_layer( if num_channels_cur_layer[i] != num_channels_pre_layer[i]: transition_layers.append( nn.Sequential( - build_conv_layer( - self.conv_cfg, + nn.Conv2d( num_channels_pre_layer[i], num_channels_pre_layer[i], kernel_size=3, @@ -1453,8 +1371,7 @@ def _make_transition_layer( bias=False, ), build_norm_layer(self.norm_cfg, num_channels_pre_layer[i])[1], - build_conv_layer( - self.conv_cfg, + nn.Conv2d( num_channels_pre_layer[i], num_channels_cur_layer[i], kernel_size=1, @@ -1475,8 +1392,7 @@ def _make_transition_layer( out_channels = num_channels_cur_layer[i] if j == i - num_branches_pre else in_channels conv_downsamples.append( nn.Sequential( - build_conv_layer( - self.conv_cfg, + nn.Conv2d( in_channels, in_channels, kernel_size=3, @@ -1486,8 +1402,7 @@ def _make_transition_layer( bias=False, ), build_norm_layer(self.norm_cfg, in_channels)[1], - build_conv_layer( - self.conv_cfg, + nn.Conv2d( in_channels, out_channels, kernel_size=1, @@ -1546,7 +1461,6 @@ def _make_stage( module_type, multiscale_output=reset_multiscale_output, with_fuse=with_fuse, - conv_cfg=self.conv_cfg, norm_cfg=self.norm_cfg, with_cp=self.with_cp, dropout=dropout, diff --git a/src/otx/algo/segmentation/heads/base_segm_head.py b/src/otx/algo/segmentation/heads/base_segm_head.py index e642a7529d0..8547b0233dc 100644 --- a/src/otx/algo/segmentation/heads/base_segm_head.py +++ b/src/otx/algo/segmentation/heads/base_segm_head.py @@ -24,7 +24,6 @@ def __init__( channels: int, num_classes: int, dropout_ratio: float = 0.1, - conv_cfg: dict[str, str] | None = None, norm_cfg: dict[str, str] | None = None, act_cfg: dict[str, str] | None = None, in_index: int | list[int] = -1, @@ -40,8 +39,6 @@ def __init__( channels (int): Number of channels in the feature map. num_classes (int): Number of classes for segmentation. dropout_ratio (float, optional): The dropout ratio. Defaults to 0.1. - conv_cfg (Optional[ConfigType], optional): Config for convolution layer. - Defaults to None. norm_cfg (Optional[ConfigType], optional): Config for normalization layer. Defaults to None. act_cfg (Dict[str, Union[str, Dict]], optional): Activation config. @@ -59,7 +56,6 @@ def __init__( self.num_classes = num_classes self.input_transform = input_transform self.dropout_ratio = dropout_ratio - self.conv_cfg = conv_cfg self.norm_cfg = norm_cfg self.act_cfg = act_cfg if self.input_transform is not None and not isinstance(in_index, list): diff --git a/src/otx/algo/segmentation/heads/fcn_head.py b/src/otx/algo/segmentation/heads/fcn_head.py index 42b8ee2ab85..c6d8316c59f 100644 --- a/src/otx/algo/segmentation/heads/fcn_head.py +++ b/src/otx/algo/segmentation/heads/fcn_head.py @@ -10,7 +10,7 @@ import torch from torch import Tensor, nn -from otx.algo.modules import ConvModule +from otx.algo.modules import Conv2dModule from otx.algo.segmentation.modules import IterativeAggregator from .base_segm_head import BaseSegmHead @@ -34,7 +34,6 @@ def __init__( in_channels: list[int] | int, in_index: list[int] | int, norm_cfg: dict[str, Any] | None = None, - conv_cfg: dict[str, Any] | None = None, input_transform: str | None = None, num_convs: int = 2, kernel_size: int = 3, @@ -73,7 +72,6 @@ def __init__( aggregator = IterativeAggregator( in_channels=in_channels, min_channels=aggregator_min_channels, - conv_cfg=conv_cfg, norm_cfg=norm_cfg, merge_norm=aggregator_merge_norm, use_concat=aggregator_use_concat, @@ -91,7 +89,6 @@ def __init__( super().__init__( in_index=in_index, norm_cfg=norm_cfg, - conv_cfg=conv_cfg, input_transform=input_transform, in_channels=in_channels, **kwargs, @@ -105,7 +102,7 @@ def __init__( conv_padding = (kernel_size // 2) * dilation convs = [ - ConvModule( + Conv2dModule( self.in_channels, self.channels, kernel_size=kernel_size, @@ -117,7 +114,7 @@ def __init__( ] convs.extend( [ - ConvModule( + Conv2dModule( self.channels, self.channels, kernel_size=kernel_size, @@ -134,12 +131,11 @@ def __init__( else: self.convs = nn.Sequential(*convs) if self.concat_input: - self.conv_cat = ConvModule( + self.conv_cat = Conv2dModule( self.in_channels + self.channels, self.channels, kernel_size=kernel_size, padding=kernel_size // 2, - conv_cfg=self.conv_cfg, norm_cfg=self.norm_cfg, act_cfg=self.act_cfg, ) diff --git a/src/otx/algo/segmentation/heads/ham_head.py b/src/otx/algo/segmentation/heads/ham_head.py index 68c7b006cba..52f789808b0 100644 --- a/src/otx/algo/segmentation/heads/ham_head.py +++ b/src/otx/algo/segmentation/heads/ham_head.py @@ -11,7 +11,7 @@ import torch.nn.functional as f from torch import nn -from otx.algo.modules import ConvModule +from otx.algo.modules import Conv2dModule from otx.algo.segmentation.modules import resize from .base_segm_head import BaseSegmHead @@ -45,11 +45,11 @@ def __init__( """ super().__init__() - self.ham_in = ConvModule(ham_channels, ham_channels, 1, norm_cfg=None, act_cfg=None) + self.ham_in = Conv2dModule(ham_channels, ham_channels, 1, norm_cfg=None, act_cfg=None) self.ham = NMF2D(ham_channels=ham_channels, **ham_kwargs) - self.ham_out = ConvModule(ham_channels, ham_channels, 1, norm_cfg=norm_cfg, act_cfg=None) + self.ham_out = Conv2dModule(ham_channels, ham_channels, 1, norm_cfg=norm_cfg, act_cfg=None) def forward(self, x: torch.Tensor) -> torch.Tensor: """Forward.""" @@ -97,22 +97,20 @@ def __init__( self.ham_channels: int = ham_channels self.ham_kwargs: dict[str, Any] = ham_kwargs if ham_kwargs is not None else {} - self.squeeze = ConvModule( + self.squeeze = Conv2dModule( sum(self.in_channels), self.ham_channels, 1, - conv_cfg=self.conv_cfg, norm_cfg=self.norm_cfg, act_cfg=self.act_cfg, ) self.hamburger = Hamburger(self.ham_channels, ham_kwargs=self.ham_kwargs, **kwargs) - self.align = ConvModule( + self.align = Conv2dModule( self.ham_channels, self.channels, 1, - conv_cfg=self.conv_cfg, norm_cfg=self.norm_cfg, act_cfg=self.act_cfg, ) diff --git a/src/otx/algo/segmentation/modules/aggregators.py b/src/otx/algo/segmentation/modules/aggregators.py index b5143255c66..bff23694b50 100644 --- a/src/otx/algo/segmentation/modules/aggregators.py +++ b/src/otx/algo/segmentation/modules/aggregators.py @@ -9,7 +9,7 @@ from torch import nn from torch.nn import functional as f -from otx.algo.modules import ConvModule, DepthwiseSeparableConvModule +from otx.algo.modules import Conv2dModule, DepthwiseSeparableConvModule from .utils import normalize @@ -24,7 +24,6 @@ def __init__( self, in_channels: list[int], min_channels: int | None = None, - conv_cfg: dict | None = None, norm_cfg: dict | None = None, merge_norm: str | None = None, use_concat: bool = False, @@ -34,7 +33,6 @@ def __init__( Args: in_channels (list[int]): List of input channels for each branch. min_channels (int | None): Minimum number of channels. Defaults to None. - conv_cfg (dict | None): Config for convolution layers. Defaults to None. norm_cfg (dict | None): Config for normalization layers. Defaults to None. merge_norm (str | None): Whether to merge normalization layers. Defaults to None. use_concat (bool): Whether to use concatenation. Defaults to False. @@ -44,8 +42,6 @@ def __init__( """ if norm_cfg is None: norm_cfg = {"type": "BN"} - if conv_cfg is None: - conv_cfg = {"type": "Conv2d"} super().__init__() @@ -57,8 +53,8 @@ def __init__( min_channels = min_channels if min_channels is not None else 0 projects: list[DepthwiseSeparableConvModule | None] = [] - expanders: list[ConvModule | None] = [] - fuse_layers: list[ConvModule | None] = [] + expanders: list[Conv2dModule | None] = [] + fuse_layers: list[Conv2dModule | None] = [] for i in range(num_branches): if not self.use_concat or i == 0: @@ -66,12 +62,11 @@ def __init__( else: out_channels = self.in_channels[i + 1] fuse_layers.append( - ConvModule( + Conv2dModule( in_channels=2 * out_channels, out_channels=out_channels, kernel_size=1, stride=1, - conv_cfg=conv_cfg, norm_cfg=norm_cfg, act_cfg={"type": "ReLU"}, ), @@ -89,7 +84,6 @@ def __init__( kernel_size=3, stride=1, padding=1, - conv_cfg=conv_cfg, norm_cfg=norm_cfg, act_cfg={"type": "ReLU"}, dw_act_cfg=None, @@ -99,12 +93,11 @@ def __init__( if self.in_channels[i] < min_channels: expanders.append( - ConvModule( + Conv2dModule( in_channels=self.in_channels[i], out_channels=min_channels, kernel_size=1, stride=1, - conv_cfg=conv_cfg, norm_cfg=norm_cfg, act_cfg={"type": "ReLU"}, ), diff --git a/src/otx/algo/segmentation/modules/blocks.py b/src/otx/algo/segmentation/modules/blocks.py index c6dddd0ad90..86d049b1da4 100644 --- a/src/otx/algo/segmentation/modules/blocks.py +++ b/src/otx/algo/segmentation/modules/blocks.py @@ -12,7 +12,7 @@ from torch import nn from torch.nn import AdaptiveAvgPool2d, AdaptiveMaxPool2d -from otx.algo.modules import ConvModule +from otx.algo.modules import Conv2dModule class PSPModule(nn.Module): @@ -54,7 +54,6 @@ def __init__( key_channels: int, value_channels: int | None = None, psp_size: tuple | None = None, - conv_cfg: dict | None = None, norm_cfg: dict | None = None, ): super().__init__() @@ -62,43 +61,39 @@ def __init__( self.in_channels = in_channels self.key_channels = key_channels self.value_channels = value_channels if value_channels is not None else in_channels - self.conv_cfg = conv_cfg if norm_cfg is None: norm_cfg = {"type": "BN"} if psp_size is None: psp_size = (1, 3, 6, 8) self.norm_cfg = norm_cfg - self.query_key = ConvModule( + self.query_key = Conv2dModule( in_channels=self.in_channels, out_channels=self.key_channels, kernel_size=1, stride=1, padding=0, - conv_cfg=self.conv_cfg, norm_cfg=self.norm_cfg, act_cfg={"type": "ReLU"}, ) self.key_psp = PSPModule(psp_size, method="max") - self.value = ConvModule( + self.value = Conv2dModule( in_channels=self.in_channels, out_channels=self.value_channels, kernel_size=1, stride=1, padding=0, - conv_cfg=self.conv_cfg, norm_cfg=self.norm_cfg, act_cfg={"type": "ReLU"}, ) self.value_psp = PSPModule(psp_size, method="max") - self.out_conv = ConvModule( + self.out_conv = Conv2dModule( in_channels=self.value_channels, out_channels=self.in_channels, kernel_size=1, stride=1, padding=0, - conv_cfg=self.conv_cfg, norm_cfg=self.norm_cfg, act_cfg=None, ) @@ -160,45 +155,41 @@ class LocalAttentionModule(nn.Module): Reference: https://github.com/lxtGH/GALD-DGCNet. """ - def __init__(self, num_channels: int, conv_cfg: dict | None = None, norm_cfg: dict | None = None): + def __init__(self, num_channels: int, norm_cfg: dict | None = None): if norm_cfg is None: norm_cfg = {"type": "BN"} super().__init__() self.num_channels = num_channels - self.conv_cfg = conv_cfg self.norm_cfg = norm_cfg - self.dwconv1 = ConvModule( + self.dwconv1 = Conv2dModule( in_channels=self.num_channels, out_channels=self.num_channels, kernel_size=3, stride=2, padding=1, groups=self.num_channels, - conv_cfg=self.conv_cfg, norm_cfg=self.norm_cfg, act_cfg={"type": "ReLU"}, ) - self.dwconv2 = ConvModule( + self.dwconv2 = Conv2dModule( in_channels=self.num_channels, out_channels=self.num_channels, kernel_size=3, stride=2, padding=1, groups=self.num_channels, - conv_cfg=self.conv_cfg, norm_cfg=self.norm_cfg, act_cfg={"type": "ReLU"}, ) - self.dwconv3 = ConvModule( + self.dwconv3 = Conv2dModule( in_channels=self.num_channels, out_channels=self.num_channels, kernel_size=3, stride=2, padding=1, groups=self.num_channels, - conv_cfg=self.conv_cfg, norm_cfg=self.norm_cfg, act_cfg={"type": "ReLU"}, ) diff --git a/tests/unit/algo/detection/heads/test_yolox_head.py b/tests/unit/algo/detection/heads/test_yolox_head.py index 62ef5a4e6e8..36d190fb74c 100644 --- a/tests/unit/algo/detection/heads/test_yolox_head.py +++ b/tests/unit/algo/detection/heads/test_yolox_head.py @@ -10,8 +10,7 @@ from omegaconf import DictConfig from otx.algo.detection.heads import YOLOXHead from otx.algo.detection.utils.assigners import SimOTAAssigner -from otx.algo.modules.conv_module import ConvModule -from otx.algo.modules.depthwise_separable_conv_module import DepthwiseSeparableConvModule +from otx.algo.modules.conv_module import Conv2dModule, DepthwiseSeparableConvModule from otx.algo.utils.mmengine_utils import InstanceData @@ -52,7 +51,7 @@ def test_loss_by_feat(self): } head = YOLOXHead(num_classes=4, in_channels=1, stacked_convs=1, use_depthwise=False, train_cfg=train_cfg) assert not head.use_l1 - assert isinstance(head.multi_level_cls_convs[0][0], ConvModule) + assert isinstance(head.multi_level_cls_convs[0][0], Conv2dModule) feat = [torch.rand(1, 1, s // feat_size, s // feat_size) for feat_size in [4, 8, 16]] cls_scores, bbox_preds, objectnesses = head.forward(feat) diff --git a/tests/unit/algo/detection/layers/test_csp_layer.py b/tests/unit/algo/detection/layers/test_csp_layer.py index 22e1aa1730c..5e3fe06bf0f 100644 --- a/tests/unit/algo/detection/layers/test_csp_layer.py +++ b/tests/unit/algo/detection/layers/test_csp_layer.py @@ -6,8 +6,7 @@ from otx.algo.detection.layers import ChannelAttention from otx.algo.detection.layers.csp_layer import CSPLayer, CSPNeXtBlock, DarknetBottleneck from otx.algo.modules.activation import Swish -from otx.algo.modules.conv_module import ConvModule -from otx.algo.modules.depthwise_separable_conv_module import DepthwiseSeparableConvModule +from otx.algo.modules.conv_module import Conv2dModule, DepthwiseSeparableConvModule from torch.nn import BatchNorm2d, Conv2d @@ -17,7 +16,7 @@ def test_init(self) -> None: csp_layer = CSPLayer(3, 5) assert isinstance(csp_layer.blocks[0], DarknetBottleneck) - assert isinstance(csp_layer.blocks[0].conv2, ConvModule) + assert isinstance(csp_layer.blocks[0].conv2, Conv2dModule) assert isinstance(csp_layer.blocks[0].conv1.conv, Conv2d) assert isinstance(csp_layer.blocks[0].conv1.bn, BatchNorm2d) assert isinstance(csp_layer.blocks[0].conv1.activate, Swish) diff --git a/tests/unit/algo/detection/necks/test_yolox_pafpn.py b/tests/unit/algo/detection/necks/test_yolox_pafpn.py index 29b0cb17554..fb4fd4886a9 100644 --- a/tests/unit/algo/detection/necks/test_yolox_pafpn.py +++ b/tests/unit/algo/detection/necks/test_yolox_pafpn.py @@ -8,7 +8,7 @@ import torch from otx.algo.detection.necks.yolox_pafpn import YOLOXPAFPN -from otx.algo.modules.depthwise_separable_conv_module import DepthwiseSeparableConvModule +from otx.algo.modules.conv_module import DepthwiseSeparableConvModule class TestYOLOXPAFPN: diff --git a/tests/unit/algo/modules/test_conv.py b/tests/unit/algo/modules/test_conv.py deleted file mode 100644 index 04339449a5d..00000000000 --- a/tests/unit/algo/modules/test_conv.py +++ /dev/null @@ -1,33 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -import pytest -from otx.algo.modules.conv import build_conv_layer -from torch import nn - - -def test_build_conv_layer(): - cfg = {"type": "Conv1d"} - conv = build_conv_layer(cfg, in_channels=1, out_channels=1, kernel_size=1) - assert isinstance(conv, nn.Conv1d) - - cfg = {"type": "Conv2d"} - conv = build_conv_layer(cfg, in_channels=1, out_channels=1, kernel_size=1) - assert isinstance(conv, nn.Conv2d) - - cfg = {"type": "Conv3d"} - conv = build_conv_layer(cfg, in_channels=1, out_channels=1, kernel_size=1) - assert isinstance(conv, nn.Conv3d) - - cfg = {"type": "Conv"} - conv = build_conv_layer(cfg, in_channels=1, out_channels=1, kernel_size=1) - assert isinstance(conv, nn.Conv2d) - - with pytest.raises(TypeError): - build_conv_layer(None) - - with pytest.raises(KeyError, match='the cfg dict must contain the key "type"'): - build_conv_layer({"cfg": 1}) - - with pytest.raises(KeyError, match="Cannot find"): - build_conv_layer({"type": "None"}) diff --git a/tests/unit/algo/modules/test_conv_module.py b/tests/unit/algo/modules/test_conv_module.py index 26dc595a914..be0f8e34463 100644 --- a/tests/unit/algo/modules/test_conv_module.py +++ b/tests/unit/algo/modules/test_conv_module.py @@ -2,32 +2,26 @@ # SPDX-License-Identifier: Apache-2.0 # Copyright (c) OpenMMLab. All rights reserved. # https://github.com/open-mmlab/mmcv/blob/main/tests/test_cnn/test_conv_module.py -from unittest.mock import patch import pytest import torch -from otx.algo.modules.conv_module import ConvModule +from otx.algo.modules.conv_module import Conv2dModule, DepthwiseSeparableConvModule from torch import nn def test_conv_module(): - conv_cfg = "conv" - with pytest.raises(AssertionError): - # conv_cfg must be a dict or None - ConvModule(3, 8, 2, conv_cfg=conv_cfg) - norm_cfg = "norm" with pytest.raises(AssertionError): # norm_cfg must be a dict or None - ConvModule(3, 8, 2, norm_cfg=norm_cfg) + Conv2dModule(3, 8, 2, norm_cfg=norm_cfg) act_cfg = {"type": "softmax"} with pytest.raises(KeyError): # softmax is not supported - ConvModule(3, 8, 2, act_cfg=act_cfg) + Conv2dModule(3, 8, 2, act_cfg=act_cfg) # conv + norm + act - conv = ConvModule(3, 8, 2, norm_cfg={"type": "BN"}) + conv = Conv2dModule(3, 8, 2, norm_cfg={"type": "BN"}) assert conv.with_activation assert hasattr(conv, "activate") assert conv.with_norm @@ -36,26 +30,8 @@ def test_conv_module(): output = conv(x) assert output.shape == (1, 8, 255, 255) - # conv + norm with efficient mode - efficient_conv = ConvModule(3, 8, 2, norm_cfg={"type": "BN"}, efficient_conv_bn_eval=True).eval() - plain_conv = ConvModule(3, 8, 2, norm_cfg={"type": "BN"}, efficient_conv_bn_eval=False).eval() - for efficient_param, plain_param in zip(efficient_conv.state_dict().values(), plain_conv.state_dict().values()): - plain_param.copy_(efficient_param) - - efficient_mode_output = efficient_conv(x) - plain_mode_output = plain_conv(x) - assert torch.allclose(efficient_mode_output, plain_mode_output, atol=1e-5) - - # `conv` attribute can be dynamically modified in efficient mode - efficient_conv = ConvModule(3, 8, 2, norm_cfg={"type": "BN"}, efficient_conv_bn_eval=True).eval() - new_conv = nn.Conv2d(3, 8, 2).eval() - efficient_conv.conv = new_conv - efficient_mode_output = efficient_conv(x) - plain_mode_output = efficient_conv.activate(efficient_conv.norm_layer(new_conv(x))) - assert torch.allclose(efficient_mode_output, plain_mode_output, atol=1e-5) - # conv + act - conv = ConvModule(3, 8, 2) + conv = Conv2dModule(3, 8, 2) assert conv.with_activation assert hasattr(conv, "activate") assert not conv.with_norm @@ -65,7 +41,7 @@ def test_conv_module(): assert output.shape == (1, 8, 255, 255) # conv - conv = ConvModule(3, 8, 2, act_cfg=None) + conv = Conv2dModule(3, 8, 2, act_cfg=None) assert not conv.with_norm assert conv.norm_layer is None assert not conv.with_activation @@ -74,46 +50,46 @@ def test_conv_module(): output = conv(x) assert output.shape == (1, 8, 255, 255) - conv = ConvModule(3, 8, 3, padding=1, with_spectral_norm=True) + conv = Conv2dModule(3, 8, 3, padding=1, with_spectral_norm=True) assert hasattr(conv.conv, "weight_orig") output = conv(x) assert output.shape == (1, 8, 256, 256) - conv = ConvModule(3, 8, 3, padding=1, padding_mode="reflect") + conv = Conv2dModule(3, 8, 3, padding=1, padding_mode="reflect") assert isinstance(conv.padding_layer, nn.ReflectionPad2d) output = conv(x) assert output.shape == (1, 8, 256, 256) # non-existing padding mode with pytest.raises(KeyError): - conv = ConvModule(3, 8, 3, padding=1, padding_mode="non_exists") + conv = Conv2dModule(3, 8, 3, padding=1, padding_mode="non_exists") # leaky relu - conv = ConvModule(3, 8, 3, padding=1, act_cfg={"type": "LeakyReLU"}) + conv = Conv2dModule(3, 8, 3, padding=1, act_cfg={"type": "LeakyReLU"}) assert isinstance(conv.activate, nn.LeakyReLU) output = conv(x) assert output.shape == (1, 8, 256, 256) # tanh - conv = ConvModule(3, 8, 3, padding=1, act_cfg={"type": "Tanh"}) + conv = Conv2dModule(3, 8, 3, padding=1, act_cfg={"type": "Tanh"}) assert isinstance(conv.activate, nn.Tanh) output = conv(x) assert output.shape == (1, 8, 256, 256) # Sigmoid - conv = ConvModule(3, 8, 3, padding=1, act_cfg={"type": "Sigmoid"}) + conv = Conv2dModule(3, 8, 3, padding=1, act_cfg={"type": "Sigmoid"}) assert isinstance(conv.activate, nn.Sigmoid) output = conv(x) assert output.shape == (1, 8, 256, 256) # PReLU - conv = ConvModule(3, 8, 3, padding=1, act_cfg={"type": "PReLU"}) + conv = Conv2dModule(3, 8, 3, padding=1, act_cfg={"type": "PReLU"}) assert isinstance(conv.activate, nn.PReLU) output = conv(x) assert output.shape == (1, 8, 256, 256) # Test norm layer with name - conv = ConvModule(3, 8, 2, norm_cfg={"type": "BN", "name": "some_norm_layer"}) + conv = Conv2dModule(3, 8, 2, norm_cfg={"type": "BN", "name": "some_norm_layer"}) assert conv.norm_layer.__class__.__name__ == "BatchNorm2d" assert conv.norm_name == "some_norm_layer" assert hasattr(conv, "norm_layer") @@ -124,78 +100,108 @@ def test_conv_module(): def test_bias(): # bias: auto, without norm - conv = ConvModule(3, 8, 2) + conv = Conv2dModule(3, 8, 2) assert conv.conv.bias is not None # bias: auto, with norm - conv = ConvModule(3, 8, 2, norm_cfg={"type": "BN"}) + conv = Conv2dModule(3, 8, 2, norm_cfg={"type": "BN"}) assert conv.conv.bias is None # bias: False, without norm - conv = ConvModule(3, 8, 2, bias=False) + conv = Conv2dModule(3, 8, 2, bias=False) assert conv.conv.bias is None # bias: True, with batch norm with pytest.warns(UserWarning) as record: - ConvModule(3, 8, 2, bias=True, norm_cfg={"type": "BN"}) + Conv2dModule(3, 8, 2, bias=True, norm_cfg={"type": "BN"}) assert len(record) == 1 assert record[0].message.args[0] == "Unnecessary conv bias before batch/instance norm" # bias: True, with instance norm with pytest.warns(UserWarning) as record: - ConvModule(3, 8, 2, bias=True, norm_cfg={"type": "IN"}) + Conv2dModule(3, 8, 2, bias=True, norm_cfg={"type": "IN"}) assert len(record) == 1 assert record[0].message.args[0] == "Unnecessary conv bias before batch/instance norm" -def conv_forward(self, x): - return x + "_conv" - - -def bn_forward(self, x): - return x + "_bn" - - -def relu_forward(self, x): - return x + "_relu" - - -@patch("torch.nn.ReLU.forward", relu_forward) -@patch("torch.nn.BatchNorm2d.forward", bn_forward) -@patch("torch.nn.Conv2d.forward", conv_forward) -def test_order(): - order = ["conv", "norm", "act"] - with pytest.raises(AssertionError): - # order must be a tuple - ConvModule(3, 8, 2, order=order) - - order = ("conv", "norm") - with pytest.raises(AssertionError): - # length of order must be 3 - ConvModule(3, 8, 2, order=order) - - order = ("conv", "norm", "norm") - with pytest.raises(AssertionError): - # order must be an order of 'conv', 'norm', 'act' - ConvModule(3, 8, 2, order=order) - - order = ("conv", "norm", "something") - with pytest.raises(AssertionError): - # order must be an order of 'conv', 'norm', 'act' - ConvModule(3, 8, 2, order=order) - - conv = ConvModule(3, 8, 2, norm_cfg={"type": "BN"}) - out = conv("input") - assert out == "input_conv_bn_relu" - - conv = ConvModule(3, 8, 2, norm_cfg={"type": "BN"}, order=("norm", "conv", "act")) - out = conv("input") - assert out == "input_bn_conv_relu" - - conv = ConvModule(3, 8, 2, norm_cfg={"type": "BN"}) - out = conv("input", activate=False) - assert out == "input_conv_bn" - - conv = ConvModule(3, 8, 2, norm_cfg={"type": "BN"}) - out = conv("input", norm=False) - assert out == "input_conv_relu" +class TestDepthwiseSeparableConvModule: + def test_forward_with_default_config(self) -> None: + # test default config + conv = DepthwiseSeparableConvModule(3, 8, 2) + assert conv.depthwise_conv.conv.groups == 3 + assert conv.pointwise_conv.conv.kernel_size == (1, 1) + assert not conv.depthwise_conv.with_norm + assert not conv.pointwise_conv.with_norm + assert conv.depthwise_conv.activate.__class__.__name__ == "ReLU" + assert conv.pointwise_conv.activate.__class__.__name__ == "ReLU" + x = torch.rand(1, 3, 256, 256) + output = conv(x) + assert output.shape == (1, 8, 255, 255) + + def test_forward_with_dw_norm_cfg(self) -> None: + # test dw_norm_cfg + conv = DepthwiseSeparableConvModule(3, 8, 2, dw_norm_cfg={"type": "BN"}) + assert conv.depthwise_conv.norm_name == "bn" + assert not conv.pointwise_conv.with_norm + x = torch.rand(1, 3, 256, 256) + output = conv(x) + assert output.shape == (1, 8, 255, 255) + + def test_forward_with_pw_norm_cfg(self) -> None: + # test pw_norm_cfg + conv = DepthwiseSeparableConvModule(3, 8, 2, pw_norm_cfg={"type": "BN"}) + assert not conv.depthwise_conv.with_norm + assert conv.pointwise_conv.norm_name == "bn" + x = torch.rand(1, 3, 256, 256) + output = conv(x) + assert output.shape == (1, 8, 255, 255) + + def test_forward_with_norm_cfg(self) -> None: + # test norm_cfg + conv = DepthwiseSeparableConvModule(3, 8, 2, norm_cfg={"type": "BN"}) + assert conv.depthwise_conv.norm_name == "bn" + assert conv.pointwise_conv.norm_name == "bn" + x = torch.rand(1, 3, 256, 256) + output = conv(x) + assert output.shape == (1, 8, 255, 255) + + def test_forward_with_spectral_norm_padding_mode(self) -> None: + x = torch.rand(1, 3, 256, 256) + + conv = DepthwiseSeparableConvModule(3, 8, 3, padding=1, with_spectral_norm=True) + assert hasattr(conv.depthwise_conv.conv, "weight_orig") + assert hasattr(conv.pointwise_conv.conv, "weight_orig") + output = conv(x) + assert output.shape == (1, 8, 256, 256) + + conv = DepthwiseSeparableConvModule(3, 8, 3, padding=1, padding_mode="reflect") + assert isinstance(conv.depthwise_conv.padding_layer, nn.ReflectionPad2d) + output = conv(x) + assert output.shape == (1, 8, 256, 256) + + def test_forward_with_dw_act_cfg(self) -> None: + # test dw_act_cfg + conv = DepthwiseSeparableConvModule(3, 8, 3, padding=1, dw_act_cfg={"type": "LeakyReLU"}) + x = torch.rand(1, 3, 256, 256) + assert conv.depthwise_conv.activate.__class__.__name__ == "LeakyReLU" + assert conv.pointwise_conv.activate.__class__.__name__ == "ReLU" + output = conv(x) + assert output.shape == (1, 8, 256, 256) + + def test_forward_with_pw_act_cfg(self) -> None: + # test pw_act_cfg + conv = DepthwiseSeparableConvModule(3, 8, 3, padding=1, pw_act_cfg={"type": "LeakyReLU"}) + x = torch.rand(1, 3, 256, 256) + assert conv.depthwise_conv.activate.__class__.__name__ == "ReLU" + assert conv.pointwise_conv.activate.__class__.__name__ == "LeakyReLU" + output = conv(x) + assert output.shape == (1, 8, 256, 256) + + def test_forward_with_act_cfg(self) -> None: + # test act_cfg + conv = DepthwiseSeparableConvModule(3, 8, 3, padding=1, act_cfg={"type": "LeakyReLU"}) + x = torch.rand(1, 3, 256, 256) + assert conv.depthwise_conv.activate.__class__.__name__ == "LeakyReLU" + assert conv.pointwise_conv.activate.__class__.__name__ == "LeakyReLU" + output = conv(x) + assert output.shape == (1, 8, 256, 256) diff --git a/tests/unit/algo/modules/test_depthwise_separable_conv_module.py b/tests/unit/algo/modules/test_depthwise_separable_conv_module.py deleted file mode 100644 index b8265dea94d..00000000000 --- a/tests/unit/algo/modules/test_depthwise_separable_conv_module.py +++ /dev/null @@ -1,104 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 -# Copyright (c) OpenMMLab. All rights reserved. -"""Test of DepthwiseSeparableConvModule. - -Reference: https://github.com/open-mmlab/mmcv/blob/main/tests/test_cnn/test_depthwise_seperable_conv_module.py -""" - -import pytest -import torch -from otx.algo.modules.depthwise_separable_conv_module import DepthwiseSeparableConvModule -from torch import nn - - -class TestDepthwiseSeparableConvModule: - def test_init_with_non_dict_conv_cfg(self) -> None: - # conv_cfg must be a dict or None - with pytest.raises(AssertionError): - DepthwiseSeparableConvModule(4, 8, 2, groups=2) - - def test_forward_with_default_config(self) -> None: - # test default config - conv = DepthwiseSeparableConvModule(3, 8, 2) - assert conv.depthwise_conv.conv.groups == 3 - assert conv.pointwise_conv.conv.kernel_size == (1, 1) - assert not conv.depthwise_conv.with_norm - assert not conv.pointwise_conv.with_norm - assert conv.depthwise_conv.activate.__class__.__name__ == "ReLU" - assert conv.pointwise_conv.activate.__class__.__name__ == "ReLU" - x = torch.rand(1, 3, 256, 256) - output = conv(x) - assert output.shape == (1, 8, 255, 255) - - def test_forward_with_dw_norm_cfg(self) -> None: - # test dw_norm_cfg - conv = DepthwiseSeparableConvModule(3, 8, 2, dw_norm_cfg={"type": "BN"}) - assert conv.depthwise_conv.norm_name == "bn" - assert not conv.pointwise_conv.with_norm - x = torch.rand(1, 3, 256, 256) - output = conv(x) - assert output.shape == (1, 8, 255, 255) - - def test_forward_with_pw_norm_cfg(self) -> None: - # test pw_norm_cfg - conv = DepthwiseSeparableConvModule(3, 8, 2, pw_norm_cfg={"type": "BN"}) - assert not conv.depthwise_conv.with_norm - assert conv.pointwise_conv.norm_name == "bn" - x = torch.rand(1, 3, 256, 256) - output = conv(x) - assert output.shape == (1, 8, 255, 255) - - def test_forward_with_norm_cfg(self) -> None: - # test norm_cfg - conv = DepthwiseSeparableConvModule(3, 8, 2, norm_cfg={"type": "BN"}) - assert conv.depthwise_conv.norm_name == "bn" - assert conv.pointwise_conv.norm_name == "bn" - x = torch.rand(1, 3, 256, 256) - output = conv(x) - assert output.shape == (1, 8, 255, 255) - - def test_forward_for_order_with_norm_conv_act(self) -> None: - # add test for ['norm', 'conv', 'act'] - conv = DepthwiseSeparableConvModule(3, 8, 2, order=("norm", "conv", "act")) - x = torch.rand(1, 3, 256, 256) - output = conv(x) - assert output.shape == (1, 8, 255, 255) - - conv = DepthwiseSeparableConvModule(3, 8, 3, padding=1, with_spectral_norm=True) - assert hasattr(conv.depthwise_conv.conv, "weight_orig") - assert hasattr(conv.pointwise_conv.conv, "weight_orig") - output = conv(x) - assert output.shape == (1, 8, 256, 256) - - conv = DepthwiseSeparableConvModule(3, 8, 3, padding=1, padding_mode="reflect") - assert isinstance(conv.depthwise_conv.padding_layer, nn.ReflectionPad2d) - output = conv(x) - assert output.shape == (1, 8, 256, 256) - - def test_forward_with_dw_act_cfg(self) -> None: - # test dw_act_cfg - conv = DepthwiseSeparableConvModule(3, 8, 3, padding=1, dw_act_cfg={"type": "LeakyReLU"}) - x = torch.rand(1, 3, 256, 256) - assert conv.depthwise_conv.activate.__class__.__name__ == "LeakyReLU" - assert conv.pointwise_conv.activate.__class__.__name__ == "ReLU" - output = conv(x) - assert output.shape == (1, 8, 256, 256) - - def test_forward_with_pw_act_cfg(self) -> None: - # test pw_act_cfg - conv = DepthwiseSeparableConvModule(3, 8, 3, padding=1, pw_act_cfg={"type": "LeakyReLU"}) - x = torch.rand(1, 3, 256, 256) - assert conv.depthwise_conv.activate.__class__.__name__ == "ReLU" - assert conv.pointwise_conv.activate.__class__.__name__ == "LeakyReLU" - output = conv(x) - assert output.shape == (1, 8, 256, 256) - - def test_forward_with_act_cfg(self) -> None: - # test act_cfg - conv = DepthwiseSeparableConvModule(3, 8, 3, padding=1, act_cfg={"type": "LeakyReLU"}) - x = torch.rand(1, 3, 256, 256) - assert conv.depthwise_conv.activate.__class__.__name__ == "LeakyReLU" - assert conv.pointwise_conv.activate.__class__.__name__ == "LeakyReLU" - output = conv(x) - assert output.shape == (1, 8, 256, 256) diff --git a/tests/unit/algo/modules/test_norm.py b/tests/unit/algo/modules/test_norm.py index f95dfbd8814..8debc2d9a07 100644 --- a/tests/unit/algo/modules/test_norm.py +++ b/tests/unit/algo/modules/test_norm.py @@ -6,7 +6,7 @@ from torch import nn -def test_build_conv_layer(): +def test_build_norm_layer(): cfg = {"type": "BN"} name, norm = build_norm_layer(cfg, num_features=1) assert isinstance(norm, nn.BatchNorm2d) diff --git a/tests/unit/algo/segmentation/modules/test_blokcs.py b/tests/unit/algo/segmentation/modules/test_blokcs.py index 1c6d517b8b2..728d85169a0 100644 --- a/tests/unit/algo/segmentation/modules/test_blokcs.py +++ b/tests/unit/algo/segmentation/modules/test_blokcs.py @@ -15,7 +15,6 @@ def init_cfg(self) -> dict[str, Any]: "key_channels": 128, "value_channels": 320, "psp_size": [1, 3, 6, 8], - "conv_cfg": {"type": "Conv2d"}, "norm_cfg": {"type": "BN"}, } @@ -25,7 +24,6 @@ def test_init(self, init_cfg): assert module.in_channels == init_cfg["in_channels"] assert module.key_channels == init_cfg["key_channels"] assert module.value_channels == init_cfg["value_channels"] - assert module.conv_cfg == init_cfg["conv_cfg"] assert module.norm_cfg == init_cfg["norm_cfg"] @pytest.fixture() @@ -44,7 +42,6 @@ class TestLocalAttentionModule: def init_cfg(self) -> dict[str, Any]: return { "num_channels": 320, - "conv_cfg": {"type": "Conv2d"}, "norm_cfg": {"type": "BN"}, } @@ -52,7 +49,6 @@ def test_init(self, init_cfg): module = LocalAttentionModule(**init_cfg) assert module.num_channels == init_cfg["num_channels"] - assert module.conv_cfg == init_cfg["conv_cfg"] assert module.norm_cfg == init_cfg["norm_cfg"] @pytest.fixture()