Skip to content

Commit

Permalink
Refactoring base module (ConvModule) (#3783)
Browse files Browse the repository at this point in the history
* Split `ConvModule` to per dimension Module

* Remove `conv_cfg`

* Remove `build_conv_layer`

* Move `DepthwiseSeparableConvModule` into `conv_module`

* precommit

* Remove `build_conv_layer` vestige

* Remove assertion errors

* Remove unused `efficient_conv_bn_eval`

* Fix unit test

* Remove `order`
  • Loading branch information
sungchul2 authored Aug 7, 2024
1 parent f618a37 commit 7f7f299
Show file tree
Hide file tree
Showing 44 changed files with 461 additions and 998 deletions.
37 changes: 9 additions & 28 deletions src/otx/algo/action_classification/backbones/x3d.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
# Copyright (c) OpenMMLab. All rights reserved.

"""X3D backbone implementation."""

from __future__ import annotations

import math
Expand All @@ -12,7 +13,7 @@
from torch.nn.modules.batchnorm import _BatchNorm

from otx.algo.modules.activation import Swish, build_activation_layer
from otx.algo.modules.conv_module import ConvModule
from otx.algo.modules.conv_module import Conv3dModule
from otx.algo.utils.mmengine_utils import load_checkpoint
from otx.algo.utils.weight_init import constant_init, kaiming_init

Expand Down Expand Up @@ -70,8 +71,6 @@ class BlockX3D(nn.Module):
unit. If set as None, it means not using SE unit. Default: None.
use_swish (bool): Whether to use swish as the activation function
before and after the 3x3x3 conv. Default: True.
conv_cfg (dict): Config dict for convolution layer.
Default: ``dict(type='Conv3d')``.
norm_cfg (dict): Config for norm layers. required keys are ``type``,
Default: ``dict(type='BN3d')``.
act_cfg (dict): Config dict for activation layer.
Expand All @@ -89,7 +88,6 @@ def __init__(
downsample: nn.Module | None = None,
se_ratio: float | None = None,
use_swish: bool = True,
conv_cfg: dict | None = None,
norm_cfg: dict | None = None,
act_cfg: dict | None = None,
with_cp: bool = False,
Expand All @@ -103,47 +101,43 @@ def __init__(
self.downsample = downsample
self.se_ratio = se_ratio
self.use_swish = use_swish
self.conv_cfg = conv_cfg
self.norm_cfg = norm_cfg
self.act_cfg = act_cfg
self.act_cfg_swish = Swish()
self.with_cp = with_cp

self.conv1 = ConvModule(
self.conv1 = Conv3dModule(
in_channels=inplanes,
out_channels=planes,
kernel_size=1,
stride=1,
padding=0,
bias=False,
conv_cfg=self.conv_cfg,
norm_cfg=self.norm_cfg,
act_cfg=self.act_cfg,
)
# Here we use the channel-wise conv
self.conv2 = ConvModule(
self.conv2 = Conv3dModule(
in_channels=planes,
out_channels=planes,
kernel_size=3,
stride=(1, self.spatial_stride, self.spatial_stride),
padding=1,
groups=planes,
bias=False,
conv_cfg=self.conv_cfg,
norm_cfg=self.norm_cfg,
act_cfg=None,
)

self.swish = Swish()

self.conv3 = ConvModule(
self.conv3 = Conv3dModule(
in_channels=planes,
out_channels=outplanes,
kernel_size=1,
stride=1,
padding=0,
bias=False,
conv_cfg=self.conv_cfg,
norm_cfg=self.norm_cfg,
act_cfg=None,
)
Expand Down Expand Up @@ -201,8 +195,6 @@ class X3DBackbone(nn.Module):
unit. If set as None, it means not using SE unit. Default: 1 / 16.
use_swish (bool): Whether to use swish as the activation function
before and after the 3x3x3 conv. Default: True.
conv_cfg (dict): Config for conv layers. required keys are ``type``
Default: ``dict(type='Conv3d')``.
norm_cfg (dict): Config for norm layers. required keys are ``type`` and
``requires_grad``.
Default: ``dict(type='BN3d', requires_grad=True)``.
Expand Down Expand Up @@ -231,7 +223,6 @@ def __init__(
se_style: str = "half",
se_ratio: float = 1 / 16,
use_swish: bool = True,
conv_cfg: dict | None = None,
norm_cfg: dict | None = None,
act_cfg: dict | None = None,
norm_eval: bool = False,
Expand Down Expand Up @@ -275,7 +266,6 @@ def __init__(
raise ValueError(msg)
self.use_swish = use_swish

self.conv_cfg = conv_cfg
self.norm_cfg = norm_cfg
self.act_cfg = act_cfg
self.norm_eval = norm_eval
Expand Down Expand Up @@ -304,7 +294,6 @@ def __init__(
se_ratio=self.se_ratio,
use_swish=self.use_swish,
norm_cfg=self.norm_cfg,
conv_cfg=self.conv_cfg,
act_cfg=self.act_cfg,
with_cp=with_cp,
**kwargs,
Expand All @@ -315,14 +304,13 @@ def __init__(
self.res_layers.append(layer_name)

self.feat_dim = self.base_channels * 2 ** (len(self.stage_blocks) - 1)
self.conv5 = ConvModule(
self.conv5 = Conv3dModule(
self.feat_dim,
int(self.feat_dim * self.gamma_b),
kernel_size=1,
stride=1,
padding=0,
bias=False,
conv_cfg=self.conv_cfg,
norm_cfg=self.norm_cfg,
act_cfg=self.act_cfg,
)
Expand Down Expand Up @@ -363,7 +351,6 @@ def make_res_layer(
use_swish: bool = True,
norm_cfg: dict | None = None,
act_cfg: dict | None = None,
conv_cfg: dict | None = None,
with_cp: bool = False,
**kwargs,
) -> nn.Module:
Expand All @@ -388,7 +375,6 @@ def make_res_layer(
Default: None.
use_swish (bool): Whether to use swish as the activation function
before and after the 3x3x3 conv. Default: True.
conv_cfg (dict | None): Config for norm layers. Default: None.
norm_cfg (dict | None): Config for norm layers. Default: None.
act_cfg (dict | None): Config for activate layers. Default: None.
with_cp (bool | None): Use checkpoint or not. Using checkpoint
Expand All @@ -400,14 +386,13 @@ def make_res_layer(
"""
downsample = None
if spatial_stride != 1 or layer_inplanes != inplanes:
downsample = ConvModule(
downsample = Conv3dModule(
layer_inplanes,
inplanes,
kernel_size=1,
stride=(1, spatial_stride, spatial_stride),
padding=0,
bias=False,
conv_cfg=conv_cfg,
norm_cfg=norm_cfg,
act_cfg=None,
)
Expand All @@ -431,7 +416,6 @@ def make_res_layer(
se_ratio=se_ratio if use_se[0] else None,
use_swish=use_swish,
norm_cfg=norm_cfg,
conv_cfg=conv_cfg,
act_cfg=act_cfg,
with_cp=with_cp,
**kwargs,
Expand All @@ -448,7 +432,6 @@ def make_res_layer(
se_ratio=se_ratio if use_se[i] else None,
use_swish=use_swish,
norm_cfg=norm_cfg,
conv_cfg=conv_cfg,
act_cfg=act_cfg,
with_cp=with_cp,
**kwargs,
Expand All @@ -459,26 +442,24 @@ def make_res_layer(

def _make_stem_layer(self) -> None:
"""Construct the stem layers consists of a conv+norm+act module and a pooling layer."""
self.conv1_s = ConvModule(
self.conv1_s = Conv3dModule(
self.in_channels,
self.base_channels,
kernel_size=(1, 3, 3),
stride=(1, 2, 2),
padding=(0, 1, 1),
bias=False,
conv_cfg=self.conv_cfg,
norm_cfg=None,
act_cfg=None,
)
self.conv1_t = ConvModule(
self.conv1_t = Conv3dModule(
self.base_channels,
self.base_channels,
kernel_size=(5, 1, 1),
stride=(1, 1, 1),
padding=(2, 0, 0),
groups=self.base_channels,
bias=False,
conv_cfg=self.conv_cfg,
norm_cfg=self.norm_cfg,
act_cfg=self.act_cfg,
)
Expand Down
2 changes: 1 addition & 1 deletion src/otx/algo/action_classification/x3d.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
# SPDX-License-Identifier: Apache-2.0
#
"""X3D model implementation."""

from __future__ import annotations

from typing import TYPE_CHECKING
Expand Down Expand Up @@ -63,7 +64,6 @@ def _build_model(self, num_classes: int) -> nn.Module:
gamma_b=2.25,
gamma_d=2.2,
gamma_w=1,
conv_cfg={"type": "Conv3d"},
norm_cfg={"type": "BN3d", "requires_grad": True},
act_cfg={"type": "ReLU", "inplace": True},
),
Expand Down
21 changes: 11 additions & 10 deletions src/otx/algo/classification/backbones/efficientnet.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
# Copyright (C) 2023 Intel Corporation
# Copyright (C) 2023-2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0
#
"""EfficientNet Module."""

from __future__ import annotations

import math
Expand All @@ -14,7 +15,7 @@
from torch.nn import functional, init

from otx.algo.modules.activation import build_activation_layer
from otx.algo.modules.conv_module import ConvModule
from otx.algo.modules.conv_module import Conv2dModule
from otx.algo.utils.mmengine_utils import load_checkpoint_to_model

PRETRAINED_ROOT = "https://github.com/osmr/imgclsmob/releases/download/v0.0.364/"
Expand All @@ -33,9 +34,9 @@ def conv1x1_block(
use_bn: bool = True,
bn_eps: float = 1e-5,
activation: str | None = "ReLU",
) -> ConvModule:
) -> Conv2dModule:
"""Conv block."""
return ConvModule(
return Conv2dModule(
in_channels=in_channels,
out_channels=out_channels,
kernel_size=1,
Expand All @@ -59,9 +60,9 @@ def conv3x3_block(
use_bn: bool = True,
bn_eps: float = 1e-5,
activation: str | None = "ReLU",
) -> ConvModule:
) -> Conv2dModule:
"""Conv block."""
return ConvModule(
return Conv2dModule(
in_channels=in_channels,
out_channels=out_channels,
kernel_size=3,
Expand All @@ -85,9 +86,9 @@ def dwconv3x3_block(
use_bn: bool = True,
bn_eps: float = 1e-5,
activation: str | None = "ReLU",
) -> ConvModule:
) -> Conv2dModule:
"""Conv block."""
return ConvModule(
return Conv2dModule(
in_channels=in_channels,
out_channels=out_channels,
kernel_size=3,
Expand All @@ -111,9 +112,9 @@ def dwconv5x5_block(
use_bn: bool = True,
bn_eps: float = 1e-5,
activation: str | None = "ReLU",
) -> ConvModule:
) -> Conv2dModule:
"""Conv block."""
return ConvModule(
return Conv2dModule(
in_channels=in_channels,
out_channels=out_channels,
kernel_size=5,
Expand Down
17 changes: 5 additions & 12 deletions src/otx/algo/common/backbones/cspnext.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,7 @@
from otx.algo.common.layers import SPPBottleneck
from otx.algo.detection.layers import CSPLayer
from otx.algo.modules.base_module import BaseModule
from otx.algo.modules.conv_module import ConvModule
from otx.algo.modules.depthwise_separable_conv_module import DepthwiseSeparableConvModule
from otx.algo.modules.conv_module import Conv2dModule, DepthwiseSeparableConvModule
from torch import Tensor, nn
from torch.nn.modules.batchnorm import _BatchNorm

Expand Down Expand Up @@ -44,8 +43,6 @@ class CSPNeXt(BaseModule):
layers. Defaults to (5, 9, 13).
channel_attention (bool): Whether to add channel attention in each
stage. Defaults to True.
conv_cfg (dict, optional): Config dict for
convolution layer. Defaults to None.
norm_cfg (dict): Dictionary to construct and
config norm layer. Defaults to dict(type='BN', requires_grad=True).
act_cfg (dict): Config dict for activation layer.
Expand Down Expand Up @@ -86,7 +83,6 @@ def __init__(
arch_ovewrite: dict | None = None,
spp_kernel_sizes: tuple[int, int, int] = (5, 9, 13),
channel_attention: bool = True,
conv_cfg: dict | None = None,
norm_cfg: dict | None = None,
act_cfg: dict | None = None,
norm_eval: bool = False,
Expand Down Expand Up @@ -121,9 +117,9 @@ def __init__(
self.frozen_stages = frozen_stages
self.use_depthwise = use_depthwise
self.norm_eval = norm_eval
conv = DepthwiseSeparableConvModule if use_depthwise else ConvModule
conv = DepthwiseSeparableConvModule if use_depthwise else Conv2dModule
self.stem = nn.Sequential(
ConvModule(
Conv2dModule(
3,
int(arch_setting[0][0] * widen_factor // 2),
3,
Expand All @@ -132,7 +128,7 @@ def __init__(
norm_cfg=norm_cfg,
act_cfg=act_cfg,
),
ConvModule(
Conv2dModule(
int(arch_setting[0][0] * widen_factor // 2),
int(arch_setting[0][0] * widen_factor // 2),
3,
Expand All @@ -141,7 +137,7 @@ def __init__(
norm_cfg=norm_cfg,
act_cfg=act_cfg,
),
ConvModule(
Conv2dModule(
int(arch_setting[0][0] * widen_factor // 2),
int(arch_setting[0][0] * widen_factor),
3,
Expand All @@ -164,7 +160,6 @@ def __init__(
3,
stride=2,
padding=1,
conv_cfg=conv_cfg,
norm_cfg=norm_cfg,
act_cfg=act_cfg,
)
Expand All @@ -174,7 +169,6 @@ def __init__(
out_channels,
out_channels,
kernel_sizes=spp_kernel_sizes,
conv_cfg=conv_cfg,
norm_cfg=norm_cfg,
act_cfg=act_cfg,
)
Expand All @@ -188,7 +182,6 @@ def __init__(
use_cspnext_block=True,
expand_ratio=expand_ratio,
channel_attention=channel_attention,
conv_cfg=conv_cfg,
norm_cfg=norm_cfg,
act_cfg=act_cfg,
)
Expand Down
Loading

0 comments on commit 7f7f299

Please sign in to comment.