diff --git a/mmaction/models/backbones/c3d.py b/mmaction/models/backbones/c3d.py index 847ff576d9..a51549a5a4 100644 --- a/mmaction/models/backbones/c3d.py +++ b/mmaction/models/backbones/c3d.py @@ -1,14 +1,12 @@ import torch.nn as nn -from mmcv.cnn import ConvModule, constant_init, kaiming_init, normal_init -from mmcv.runner import load_checkpoint -from mmcv.utils import _BatchNorm +from mmcv.cnn import ConvModule +from mmcv.runner import BaseModule -from ...utils import get_root_logger from ..registry import BACKBONES @BACKBONES.register_module() -class C3D(nn.Module): +class C3D(BaseModule): """C3D backbone. Args: @@ -35,19 +33,40 @@ def __init__(self, norm_cfg=None, act_cfg=None, dropout_ratio=0.5, - init_std=0.005): - super().__init__() + init_std=0.005, + init_cfg=None): + super().__init__(init_cfg) if conv_cfg is None: conv_cfg = dict(type='Conv3d') if act_cfg is None: act_cfg = dict(type='ReLU') self.pretrained = pretrained + self.init_std = init_std + + assert not (init_cfg + and pretrained), ('init_cfg and pretrained cannot ' + 'be setting at the same time') + if isinstance(pretrained, str): + if not self.torchvision_pretrain: + self.init_cfg = dict(type='Pretrained', checkpoint=pretrained) + elif pretrained is None: + if init_cfg is None: + self.init_cfg = [ + dict(type='Kaiming', layer='Conv3d'), + dict(type='Normal', std=init_std, layer='Linear'), + dict( + type='Constant', + val=1, + layer=['_BatchNorm', 'GroupNorm']) + ] + else: + raise TypeError('pretrained must be a str or None') + self.style = style self.conv_cfg = conv_cfg self.norm_cfg = norm_cfg self.act_cfg = act_cfg self.dropout_ratio = dropout_ratio - self.init_std = init_std c3d_conv_param = dict( kernel_size=(3, 3, 3), @@ -81,27 +100,6 @@ def __init__(self, self.relu = nn.ReLU() self.dropout = nn.Dropout(p=self.dropout_ratio) - def init_weights(self): - """Initiate the parameters either from existing checkpoint or from - scratch.""" - if isinstance(self.pretrained, str): - logger = get_root_logger() - logger.info(f'load model from: {self.pretrained}') - - load_checkpoint(self, self.pretrained, strict=False, logger=logger) - - elif self.pretrained is None: - for m in self.modules(): - if isinstance(m, nn.Conv3d): - kaiming_init(m) - elif isinstance(m, nn.Linear): - normal_init(m, std=self.init_std) - elif isinstance(m, _BatchNorm): - constant_init(m, 1) - - else: - raise TypeError('pretrained must be a str or None') - def forward(self, x): """Defines the computation performed at every call. diff --git a/mmaction/models/backbones/mobilenet_v2.py b/mmaction/models/backbones/mobilenet_v2.py index 5a093fa1fa..be76d8cb1f 100644 --- a/mmaction/models/backbones/mobilenet_v2.py +++ b/mmaction/models/backbones/mobilenet_v2.py @@ -1,10 +1,9 @@ import torch.nn as nn import torch.utils.checkpoint as cp -from mmcv.cnn import ConvModule, constant_init, kaiming_init -from mmcv.runner import load_checkpoint +from mmcv.cnn import ConvModule +from mmcv.runner import BaseModule from torch.nn.modules.batchnorm import _BatchNorm -from ...utils import get_root_logger from ..builder import BACKBONES @@ -33,7 +32,7 @@ def make_divisible(value, divisor, min_value=None, min_ratio=0.9): return new_value -class InvertedResidual(nn.Module): +class InvertedResidual(BaseModule): """InvertedResidual block for MobileNetV2. Args: @@ -61,9 +60,10 @@ def __init__(self, expand_ratio, conv_cfg=None, norm_cfg=dict(type='BN'), - act_cfg=dict(type='ReLU6'), - with_cp=False): - super(InvertedResidual, self).__init__() + act_cfg=dict(typee='ReLU6'), + with_cp=False, + init_cfg=None): + super().__init__(init_cfg) self.stride = stride assert stride in [1, 2], f'stride must in [1, 2]. ' \ f'But received {stride}.' @@ -119,7 +119,7 @@ def _inner_forward(x): @BACKBONES.register_module() -class MobileNetV2(nn.Module): +class MobileNetV2(BaseModule): """MobileNetV2 backbone. Args: @@ -158,9 +158,29 @@ def __init__(self, norm_cfg=dict(type='BN2d', requires_grad=True), act_cfg=dict(type='ReLU6', inplace=True), norm_eval=False, - with_cp=False): - super().__init__() + with_cp=False, + init_cfg=None): + super().__init__(init_cfg) self.pretrained = pretrained + + assert not (init_cfg + and pretrained), ('init_cfg and pretrained cannot ' + 'be setting at the same time') + if isinstance(pretrained, str): + if not self.torchvision_pretrain: + self.init_cfg = dict(type='Pretrained', checkpoint=pretrained) + elif pretrained is None: + if init_cfg is None: + self.init_cfg = [ + dict(type='Kaiming', layer='Conv2d'), + dict( + type='Constant', + val=1, + layer=['_BatchNorm', 'GroupNorm']) + ] + else: + raise TypeError('pretrained must be a str or None') + self.widen_factor = widen_factor self.out_indices = out_indices for index in out_indices: @@ -250,19 +270,6 @@ def make_layer(self, out_channels, num_blocks, stride, expand_ratio): return nn.Sequential(*layers) - def init_weights(self): - if isinstance(self.pretrained, str): - logger = get_root_logger() - load_checkpoint(self, self.pretrained, strict=False, logger=logger) - elif self.pretrained is None: - for m in self.modules(): - if isinstance(m, nn.Conv2d): - kaiming_init(m) - elif isinstance(m, (_BatchNorm, nn.GroupNorm)): - constant_init(m, 1) - else: - raise TypeError('pretrained must be a str or None') - def forward(self, x): x = self.conv1(x) diff --git a/mmaction/models/backbones/resnet3d.py b/mmaction/models/backbones/resnet3d.py index 9073aa0848..3493ba5eac 100644 --- a/mmaction/models/backbones/resnet3d.py +++ b/mmaction/models/backbones/resnet3d.py @@ -802,6 +802,7 @@ def _freeze_stages(self): def init_weights(self, pretrained=None): if pretrained: + self.pretrained = pretrained self.init_cfg = dict(type='Pretrained', checkpoint=pretrained) if not self.pretrained2d: super().init_weights() diff --git a/mmaction/models/backbones/resnet3d_csn.py b/mmaction/models/backbones/resnet3d_csn.py index 97c3e420aa..54a01bb7e1 100644 --- a/mmaction/models/backbones/resnet3d_csn.py +++ b/mmaction/models/backbones/resnet3d_csn.py @@ -37,8 +37,7 @@ def __init__(self, *args, bottleneck_mode='ir', **kwargs): - super(CSNBottleneck3d, self).__init__(inplanes, planes, *args, - **kwargs) + super().__init__(inplanes, planes, *args, **kwargs) self.bottleneck_mode = bottleneck_mode conv2 = [] if self.bottleneck_mode == 'ip': @@ -124,7 +123,7 @@ def __init__(self, if bottleneck_mode not in ['ip', 'ir']: raise ValueError(f'Bottleneck mode must be "ip" or "ir",' f'but got {bottleneck_mode}.') - super(ResNet3dCSN, self).__init__( + super().__init__( depth, pretrained, temporal_strides=temporal_strides, diff --git a/mmaction/models/backbones/resnet3d_slowfast.py b/mmaction/models/backbones/resnet3d_slowfast.py index 3794ab092e..658aa61b92 100644 --- a/mmaction/models/backbones/resnet3d_slowfast.py +++ b/mmaction/models/backbones/resnet3d_slowfast.py @@ -1,10 +1,8 @@ import torch import torch.nn as nn from mmcv.cnn import ConvModule, kaiming_init -from mmcv.runner import _load_checkpoint, load_checkpoint -from mmcv.utils import print_log +from mmcv.runner import BaseModule, _load_checkpoint -from ...utils import get_root_logger from ..registry import BACKBONES from .resnet3d import ResNet3d @@ -329,11 +327,8 @@ def _freeze_stages(self): def init_weights(self, pretrained=None): """Initiate the parameters either from existing checkpoint or from scratch.""" - if pretrained: - self.pretrained = pretrained - # Override the init_weights of i3d - super().init_weights(pretrained=self.pretrained) + super().init_weights(pretrained=pretrained) for module_name in self.lateral_connections: layer = getattr(self, module_name) for m in layer.modules(): @@ -372,7 +367,7 @@ def build_pathway(cfg, *args, **kwargs): @BACKBONES.register_module() -class ResNet3dSlowFast(nn.Module): +class ResNet3dSlowFast(BaseModule): """Slowfast backbone. This module is proposed in `SlowFast Networks for Video Recognition @@ -430,7 +425,8 @@ def __init__(self, dilations=(1, 1, 1, 1), conv1_stride_t=1, pool1_stride_t=1, - inflate=(0, 0, 1, 1)), + inflate=(0, 0, 1, 1), + init_cfg=None), fast_pathway=dict( type='resnet3d', depth=50, @@ -439,9 +435,21 @@ def __init__(self, base_channels=8, conv1_kernel=(5, 7, 7), conv1_stride_t=1, - pool1_stride_t=1)): - super().__init__() + pool1_stride_t=1, + init_cfg=None), + init_cfg=None): + super().__init__(init_cfg) self.pretrained = pretrained + + assert not (init_cfg + and pretrained), ('init_cfg and pretrained cannot ' + 'be setting at the same time') + if isinstance(pretrained, str): + self.pretrained = pretrained + self.init_cfg = dict(type='Pretrained', checkpoint=pretrained) + elif pretrained is not None: + raise TypeError('pretrained must be a str or None') + self.resample_rate = resample_rate self.speed_ratio = speed_ratio self.channel_ratio = channel_ratio @@ -458,13 +466,10 @@ def init_weights(self, pretrained=None): scratch.""" if pretrained: self.pretrained = pretrained - + self.init_cfg = dict(type='Pretrained', checkpoint=pretrained) if isinstance(self.pretrained, str): - logger = get_root_logger() - msg = f'load model from: {self.pretrained}' - print_log(msg, logger=logger) - # Directly load 3D model. - load_checkpoint(self, self.pretrained, strict=True, logger=logger) + super().init_weights() + elif self.pretrained is None: # Init two branch seperately. self.fast_path.init_weights() diff --git a/mmaction/models/backbones/tanet.py b/mmaction/models/backbones/tanet.py index d66233931d..c118123f6c 100644 --- a/mmaction/models/backbones/tanet.py +++ b/mmaction/models/backbones/tanet.py @@ -1,6 +1,7 @@ from copy import deepcopy import torch.nn as nn +from mmcv.runner import BaseModule from torch.utils import checkpoint as cp from ..common import TAM @@ -8,7 +9,7 @@ from .resnet import Bottleneck, ResNet -class TABlock(nn.Module): +class TABlock(BaseModule): """Temporal Adaptive Block (TA-Block) for TANet. This block is proposed in `TAM: TEMPORAL ADAPTIVE MODULE FOR VIDEO @@ -25,8 +26,8 @@ class TABlock(nn.Module): Default: dict(). """ - def __init__(self, block, num_segments, tam_cfg=dict()): - super().__init__() + def __init__(self, block, num_segments, tam_cfg=dict(), init_cfg=None): + super().__init__(init_cfg) self.tam_cfg = deepcopy(tam_cfg) self.block = block self.num_segments = num_segments diff --git a/mmaction/models/backbones/x3d.py b/mmaction/models/backbones/x3d.py index 4d6b85cff3..74f6ea3b41 100644 --- a/mmaction/models/backbones/x3d.py +++ b/mmaction/models/backbones/x3d.py @@ -2,19 +2,17 @@ import torch.nn as nn import torch.utils.checkpoint as cp -from mmcv.cnn import (ConvModule, Swish, build_activation_layer, constant_init, - kaiming_init) -from mmcv.runner import load_checkpoint +from mmcv.cnn import ConvModule, Swish, build_activation_layer +from mmcv.runner import BaseModule from mmcv.utils import _BatchNorm -from ...utils import get_root_logger from ..registry import BACKBONES -class SEModule(nn.Module): +class SEModule(BaseModule): - def __init__(self, channels, reduction): - super().__init__() + def __init__(self, channels, reduction, init_cfg=None): + super().__init__(init_cfg) self.avg_pool = nn.AdaptiveAvgPool3d(1) self.bottleneck = self._round_width(channels, reduction) self.fc1 = nn.Conv3d( @@ -44,7 +42,7 @@ def forward(self, x): return module_input * x -class BlockX3D(nn.Module): +class BlockX3D(BaseModule): """BlockX3D 3d building block for X3D. Args: @@ -78,8 +76,9 @@ def __init__(self, conv_cfg=dict(type='Conv3d'), norm_cfg=dict(type='BN3d'), act_cfg=dict(type='ReLU'), - with_cp=False): - super().__init__() + with_cp=False, + init_cfg=None): + super().__init__(init_cfg) self.inplanes = inplanes self.planes = planes @@ -167,7 +166,7 @@ def _inner_forward(x): # We do not support initialize with 2D pretrain weight for X3D @BACKBONES.register_module() -class X3D(nn.Module): +class X3D(BaseModule): """X3D backbone. https://arxiv.org/pdf/2004.04730.pdf. Args: @@ -224,13 +223,36 @@ def __init__(self, norm_eval=False, with_cp=False, zero_init_residual=True, + init_cfg=None, **kwargs): - super().__init__() + super().__init__(init_cfg) self.gamma_w = gamma_w self.gamma_b = gamma_b self.gamma_d = gamma_d - self.pretrained = pretrained + block_init_cfg = None + assert not (init_cfg + and pretrained), ('init_cfg and pretrained cannot ' + 'be setting at the same time') + if isinstance(pretrained, str): + self.pretrained = pretrained + if not self.pretrained2d: + self.init_cfg = dict(type='Pretrained', checkpoint=pretrained) + elif pretrained is None: + if init_cfg is None: + self.init_cfg = [ + dict(type='Kaiming', layer='Conv3d'), + dict( + type='Constant', + val=1, + layer=['_BatchNorm', 'GroupNorm']) + ] + if self.zero_init_residual: + block_init_cfg = dict( + type='Constant', val=0, override=dict(name='norm3')) + else: + raise TypeError('pretrained must be a str or None') + self.in_channels = in_channels # Hard coded, can be changed by gamma_w self.base_channels = 24 @@ -288,6 +310,7 @@ def __init__(self, conv_cfg=self.conv_cfg, act_cfg=self.act_cfg, with_cp=with_cp, + init_cfg=block_init_cfg, **kwargs) self.layer_inplanes = inplanes layer_name = f'layer{i + 1}' @@ -472,29 +495,6 @@ def _freeze_stages(self): for param in m.parameters(): param.requires_grad = False - def init_weights(self): - """Initiate the parameters either from existing checkpoint or from - scratch.""" - if isinstance(self.pretrained, str): - logger = get_root_logger() - logger.info(f'load model from: {self.pretrained}') - - load_checkpoint(self, self.pretrained, strict=False, logger=logger) - - elif self.pretrained is None: - for m in self.modules(): - if isinstance(m, nn.Conv3d): - kaiming_init(m) - elif isinstance(m, _BatchNorm): - constant_init(m, 1) - - if self.zero_init_residual: - for m in self.modules(): - if isinstance(m, BlockX3D): - constant_init(m.conv3.bn, 0) - else: - raise TypeError('pretrained must be a str or None') - def forward(self, x): """Defines the computation performed at every call. diff --git a/mmaction/models/common/conv2plus1d.py b/mmaction/models/common/conv2plus1d.py index 675b0e2261..804acba6f8 100644 --- a/mmaction/models/common/conv2plus1d.py +++ b/mmaction/models/common/conv2plus1d.py @@ -1,10 +1,11 @@ import torch.nn as nn -from mmcv.cnn import CONV_LAYERS, build_norm_layer, constant_init, kaiming_init +from mmcv.cnn import CONV_LAYERS, build_norm_layer +from mmcv.runner import BaseModule from torch.nn.modules.utils import _triple @CONV_LAYERS.register_module() -class Conv2plus1d(nn.Module): +class Conv2plus1d(BaseModule): """(2+1)d Conv module for R(2+1)d backbone. https://arxiv.org/pdf/1711.11248.pdf. @@ -31,8 +32,16 @@ def __init__(self, dilation=1, groups=1, bias=True, - norm_cfg=dict(type='BN3d')): - super().__init__() + norm_cfg=dict(type='BN3d'), + init_cfg=None): + super().__init__(init_cfg) + + if init_cfg is None: + self.init_cfg = [ + dict(type='kaiming', layer='Conv3d'), + dict( + type='constant', val=1, layer=['_BatchNorm', 'GroupNorm']) + ] kernel_size = _triple(kernel_size) stride = _triple(stride) @@ -96,9 +105,3 @@ def forward(self, x): x = self.relu(x) x = self.conv_t(x) return x - - def init_weights(self): - """Initiate the parameters from scratch.""" - kaiming_init(self.conv_s) - kaiming_init(self.conv_t) - constant_init(self.bn_s, 1, bias=0) diff --git a/mmaction/models/common/conv_audio.py b/mmaction/models/common/conv_audio.py index 3b8a606753..ef13c28dac 100644 --- a/mmaction/models/common/conv_audio.py +++ b/mmaction/models/common/conv_audio.py @@ -1,11 +1,11 @@ import torch -import torch.nn as nn -from mmcv.cnn import CONV_LAYERS, ConvModule, constant_init, kaiming_init +from mmcv.cnn import CONV_LAYERS, ConvModule +from mmcv.runner import BaseModule from torch.nn.modules.utils import _pair @CONV_LAYERS.register_module() -class ConvAudio(nn.Module): +class ConvAudio(BaseModule): """Conv2d module for AudioResNet backbone. `_. @@ -35,8 +35,16 @@ def __init__(self, padding=0, dilation=1, groups=1, - bias=False): - super().__init__() + bias=False, + init_cfg=None): + super().__init__(init_cfg) + + if init_cfg is None: + self.init_cfg = [ + dict(type='kaiming', layer='Conv3d'), + dict( + type='constant', val=1, layer=['_BatchNorm', 'GroupNorm']) + ] kernel_size = _pair(kernel_size) stride = _pair(stride) @@ -95,10 +103,3 @@ def forward(self, x): else: out = x_1 + x_2 return out - - def init_weights(self): - """Initiate the parameters from scratch.""" - kaiming_init(self.conv_1.conv) - kaiming_init(self.conv_2.conv) - constant_init(self.conv_1.bn, 1, bias=0) - constant_init(self.conv_2.bn, 1, bias=0) diff --git a/mmaction/models/common/lfb.py b/mmaction/models/common/lfb.py index e942dd165e..f54ae36e31 100644 --- a/mmaction/models/common/lfb.py +++ b/mmaction/models/common/lfb.py @@ -14,7 +14,7 @@ lmdb_imported = False -class LFB(object): +class LFB: """Long-Term Feature Bank (LFB). LFB is proposed in `Long-Term Feature Banks for Detailed Video diff --git a/mmaction/models/common/tam.py b/mmaction/models/common/tam.py index 301c69faea..113048f345 100644 --- a/mmaction/models/common/tam.py +++ b/mmaction/models/common/tam.py @@ -1,9 +1,9 @@ import torch.nn as nn import torch.nn.functional as F -from mmcv.cnn import constant_init, kaiming_init, normal_init +from mmcv.runner import BaseModule -class TAM(nn.Module): +class TAM(BaseModule): """Temporal Adaptive Module(TAM) for TANet. This module is proposed in `TAM: TEMPORAL ADAPTIVE MODULE FOR VIDEO @@ -40,8 +40,17 @@ def __init__(self, conv1d_kernel_size=3, adaptive_convolution_stride=1, adaptive_convolution_padding=1, - init_std=0.001): - super().__init__() + init_std=0.001, + init_cfg=None): + super().__init__(init_cfg) + + if init_cfg is None: + self.init_cfg = [ + dict(type='Kaiming', layer='Conv1d'), + dict( + type='Constant', val=1, layer=['_BatchNorm', 'GroupNorm']), + dict(type='Normal', std=init_std, layer='Linear'), + ] assert beta > 0 and alpha > 0 self.in_channels = in_channels @@ -74,16 +83,6 @@ def __init__(self, self.init_weights() - def init_weights(self): - """Initiate the parameters from scratch.""" - for m in self.modules(): - if isinstance(m, nn.Conv1d): - kaiming_init(m) - elif isinstance(m, nn.BatchNorm1d): - constant_init(m, 1) - elif isinstance(m, nn.Linear): - normal_init(m, std=self.init_std) - def forward(self, x): """Defines the computation performed at every call.