From 5acaa706d919caf94e07ef6814a80c931a0a1f27 Mon Sep 17 00:00:00 2001 From: Tao Gong Date: Fri, 6 Aug 2021 15:22:53 +0800 Subject: [PATCH 01/10] unify model initialization for vid --- configs/_base_/models/faster_rcnn_r50_dc5.py | 5 +- ...dff_faster_rcnn_r101_dc5_1x_imagenetvid.py | 38 ++----------- .../dff_faster_rcnn_r50_dc5_1x_imagenetvid.py | 13 +++-- ...gfa_faster_rcnn_r101_dc5_1x_imagenetvid.py | 34 ++---------- ...fgfa_faster_rcnn_r50_dc5_1x_imagenetvid.py | 13 +++-- ...lsa_faster_rcnn_r101_dc5_1x_imagenetvid.py | 53 ++----------------- ...elsa_faster_rcnn_r50_dc5_1x_imagenetvid.py | 1 - .../models/aggregators/embed_aggregator.py | 8 +-- .../models/aggregators/selsa_aggregator.py | 7 +-- mmtrack/models/motion/flownet_simple.py | 13 ++--- mmtrack/models/vid/base.py | 27 ++-------- mmtrack/models/vid/dff.py | 35 ++++++------ mmtrack/models/vid/fgfa.py | 35 ++++++------ mmtrack/models/vid/selsa.py | 29 +++++----- tools/train.py | 9 +--- 15 files changed, 104 insertions(+), 216 deletions(-) diff --git a/configs/_base_/models/faster_rcnn_r50_dc5.py b/configs/_base_/models/faster_rcnn_r50_dc5.py index 0d77a29e8..ef50daabb 100644 --- a/configs/_base_/models/faster_rcnn_r50_dc5.py +++ b/configs/_base_/models/faster_rcnn_r50_dc5.py @@ -1,7 +1,6 @@ model = dict( detector=dict( type='FasterRCNN', - pretrained='torchvision://resnet50', backbone=dict( type='ResNet', depth=50, @@ -12,7 +11,9 @@ frozen_stages=1, norm_cfg=dict(type='BN', requires_grad=True), norm_eval=True, - style='pytorch'), + style='pytorch', + init_cfg=dict( + type='Pretrained', checkpoint='torchvision://resnet50')), neck=dict( type='ChannelMapper', in_channels=[2048], diff --git a/configs/vid/dff/dff_faster_rcnn_r101_dc5_1x_imagenetvid.py b/configs/vid/dff/dff_faster_rcnn_r101_dc5_1x_imagenetvid.py index 544061eda..c603ed414 100644 --- a/configs/vid/dff/dff_faster_rcnn_r101_dc5_1x_imagenetvid.py +++ b/configs/vid/dff/dff_faster_rcnn_r101_dc5_1x_imagenetvid.py @@ -1,35 +1,7 @@ -_base_ = [ - '../../_base_/models/faster_rcnn_r50_dc5.py', - '../../_base_/datasets/imagenet_vid_dff_style.py', - '../../_base_/default_runtime.py' -] +_base_ = ['./dff_faster_rcnn_r50_dc5_1x_imagenetvid.py'] model = dict( - type='DFF', - pretrains=dict( - motion= # noqa: E251 - 'https://download.openmmlab.com/mmtracking/pretrained_weights/flownet_simple.pth' # noqa: E501 - ), detector=dict( - pretrained='torchvision://resnet101', - backbone=dict(depth=101), - train_cfg=dict( - rpn_proposal=dict(max_per_img=1000), - rcnn=dict(sampler=dict(num=512)))), - motion=dict(type='FlowNetSimple', img_scale_factor=0.5), - train_cfg=None, - test_cfg=dict(key_frame_interval=10)) - -# optimizer -optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001) -optimizer_config = dict( - _delete_=True, grad_clip=dict(max_norm=35, norm_type=2)) -# learning policy -lr_config = dict( - policy='step', - warmup='linear', - warmup_iters=500, - warmup_ratio=1.0 / 3, - step=[2, 5]) -# runtime settings -total_epochs = 7 -evaluation = dict(metric=['bbox'], interval=7) + backbone=dict( + depth=101, + init_cfg=dict( + type='Pretrained', checkpoint='torchvision://resnet101')))) diff --git a/configs/vid/dff/dff_faster_rcnn_r50_dc5_1x_imagenetvid.py b/configs/vid/dff/dff_faster_rcnn_r50_dc5_1x_imagenetvid.py index 9dedf1b4a..d32f6142e 100644 --- a/configs/vid/dff/dff_faster_rcnn_r50_dc5_1x_imagenetvid.py +++ b/configs/vid/dff/dff_faster_rcnn_r50_dc5_1x_imagenetvid.py @@ -5,15 +5,18 @@ ] model = dict( type='DFF', - pretrains=dict( - motion= # noqa: E251 - 'https://download.openmmlab.com/mmtracking/pretrained_weights/flownet_simple.pth' # noqa: E501 - ), detector=dict( train_cfg=dict( rpn_proposal=dict(max_per_img=1000), rcnn=dict(sampler=dict(num=512)))), - motion=dict(type='FlowNetSimple', img_scale_factor=0.5), + motion=dict( + type='FlowNetSimple', + img_scale_factor=0.5, + init_cfg=dict( + type='Pretrained', + checkpoint= # noqa: E251 + 'https://download.openmmlab.com/mmtracking/pretrained_weights/flownet_simple.pth' # noqa: E501 + )), train_cfg=None, test_cfg=dict(key_frame_interval=10)) diff --git a/configs/vid/fgfa/fgfa_faster_rcnn_r101_dc5_1x_imagenetvid.py b/configs/vid/fgfa/fgfa_faster_rcnn_r101_dc5_1x_imagenetvid.py index 62ca1441a..060b01478 100644 --- a/configs/vid/fgfa/fgfa_faster_rcnn_r101_dc5_1x_imagenetvid.py +++ b/configs/vid/fgfa/fgfa_faster_rcnn_r101_dc5_1x_imagenetvid.py @@ -1,31 +1,7 @@ -_base_ = [ - '../../_base_/models/faster_rcnn_r50_dc5.py', - '../../_base_/datasets/imagenet_vid_fgfa_style.py', - '../../_base_/default_runtime.py' -] +_base_ = ['./fgfa_faster_rcnn_r50_dc5_1x_imagenetvid.py'] model = dict( - type='FGFA', - pretrains=dict( - motion= # noqa: E251 - 'https://download.openmmlab.com/mmtracking/pretrained_weights/flownet_simple.pth' # noqa: E501 - ), detector=dict( - pretrained='torchvision://resnet101', backbone=dict(depth=101)), - motion=dict(type='FlowNetSimple', img_scale_factor=0.5), - aggregator=dict( - type='EmbedAggregator', num_convs=1, channels=512, kernel_size=3)) - -# optimizer -optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001) -optimizer_config = dict( - _delete_=True, grad_clip=dict(max_norm=35, norm_type=2)) -# learning policy -lr_config = dict( - policy='step', - warmup='linear', - warmup_iters=500, - warmup_ratio=1.0 / 3, - step=[2, 5]) -# runtime settings -total_epochs = 7 -evaluation = dict(metric=['bbox'], interval=7) + backbone=dict( + depth=101, + init_cfg=dict( + type='Pretrained', checkpoint='torchvision://resnet101')))) diff --git a/configs/vid/fgfa/fgfa_faster_rcnn_r50_dc5_1x_imagenetvid.py b/configs/vid/fgfa/fgfa_faster_rcnn_r50_dc5_1x_imagenetvid.py index 78f9cf469..0fcdac968 100644 --- a/configs/vid/fgfa/fgfa_faster_rcnn_r50_dc5_1x_imagenetvid.py +++ b/configs/vid/fgfa/fgfa_faster_rcnn_r50_dc5_1x_imagenetvid.py @@ -5,11 +5,14 @@ ] model = dict( type='FGFA', - pretrains=dict( - motion= # noqa: E251 - 'https://download.openmmlab.com/mmtracking/pretrained_weights/flownet_simple.pth' # noqa: E501 - ), - motion=dict(type='FlowNetSimple', img_scale_factor=0.5), + motion=dict( + type='FlowNetSimple', + img_scale_factor=0.5, + init_cfg=dict( + type='Pretrained', + checkpoint= # noqa: E251 + 'https://download.openmmlab.com/mmtracking/pretrained_weights/flownet_simple.pth' # noqa: E501 + )), aggregator=dict( type='EmbedAggregator', num_convs=1, channels=512, kernel_size=3)) diff --git a/configs/vid/selsa/selsa_faster_rcnn_r101_dc5_1x_imagenetvid.py b/configs/vid/selsa/selsa_faster_rcnn_r101_dc5_1x_imagenetvid.py index 03769d85b..1ef18eead 100644 --- a/configs/vid/selsa/selsa_faster_rcnn_r101_dc5_1x_imagenetvid.py +++ b/configs/vid/selsa/selsa_faster_rcnn_r101_dc5_1x_imagenetvid.py @@ -1,50 +1,7 @@ -_base_ = [ - '../../_base_/models/faster_rcnn_r50_dc5.py', - '../../_base_/datasets/imagenet_vid_fgfa_style.py', - '../../_base_/default_runtime.py' -] +_base_ = ['./selsa_faster_rcnn_r50_dc5_1x_imagenetvid.py'] model = dict( - type='SELSA', - pretrains=None, detector=dict( - pretrained='torchvision://resnet101', - backbone=dict(depth=101), - roi_head=dict( - type='SelsaRoIHead', - bbox_head=dict( - type='SelsaBBoxHead', - num_shared_fcs=2, - aggregator=dict( - type='SelsaAggregator', - in_channels=1024, - num_attention_blocks=16))))) - -# dataset settings -data = dict( - val=dict( - ref_img_sampler=dict( - _delete_=True, - num_ref_imgs=14, - frame_range=[-7, 7], - method='test_with_adaptive_stride')), - test=dict( - ref_img_sampler=dict( - _delete_=True, - num_ref_imgs=14, - frame_range=[-7, 7], - method='test_with_adaptive_stride'))) - -# optimizer -optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001) -optimizer_config = dict( - _delete_=True, grad_clip=dict(max_norm=35, norm_type=2)) -# learning policy -lr_config = dict( - policy='step', - warmup='linear', - warmup_iters=500, - warmup_ratio=1.0 / 3, - step=[2, 5]) -# runtime settings -total_epochs = 7 -evaluation = dict(metric=['bbox'], interval=7) + backbone=dict( + depth=101, + init_cfg=dict( + type='Pretrained', checkpoint='torchvision://resnet101')))) diff --git a/configs/vid/selsa/selsa_faster_rcnn_r50_dc5_1x_imagenetvid.py b/configs/vid/selsa/selsa_faster_rcnn_r50_dc5_1x_imagenetvid.py index 2ee8536ad..7995ad8bf 100644 --- a/configs/vid/selsa/selsa_faster_rcnn_r50_dc5_1x_imagenetvid.py +++ b/configs/vid/selsa/selsa_faster_rcnn_r50_dc5_1x_imagenetvid.py @@ -5,7 +5,6 @@ ] model = dict( type='SELSA', - pretrains=None, detector=dict( roi_head=dict( type='SelsaRoIHead', diff --git a/mmtrack/models/aggregators/embed_aggregator.py b/mmtrack/models/aggregators/embed_aggregator.py index 20f7a6266..778d168ed 100644 --- a/mmtrack/models/aggregators/embed_aggregator.py +++ b/mmtrack/models/aggregators/embed_aggregator.py @@ -1,12 +1,13 @@ import torch import torch.nn as nn from mmcv.cnn.bricks import ConvModule +from mmcv.runner import BaseModule from ..builder import AGGREGATORS @AGGREGATORS.register_module() -class EmbedAggregator(nn.Module): +class EmbedAggregator(BaseModule): """Embedding convs to aggregate multi feature maps. This module is proposed in "Flow-Guided Feature Aggregation for Video @@ -27,8 +28,9 @@ def __init__(self, channels=256, kernel_size=3, norm_cfg=None, - act_cfg=dict(type='ReLU')): - super(EmbedAggregator, self).__init__() + act_cfg=dict(type='ReLU'), + init_cfg=None): + super(EmbedAggregator, self).__init__(init_cfg) assert num_convs > 0, 'The number of convs must be bigger than 1.' self.embed_convs = nn.ModuleList() for i in range(num_convs): diff --git a/mmtrack/models/aggregators/selsa_aggregator.py b/mmtrack/models/aggregators/selsa_aggregator.py index 975eca936..82e6be1cc 100644 --- a/mmtrack/models/aggregators/selsa_aggregator.py +++ b/mmtrack/models/aggregators/selsa_aggregator.py @@ -1,11 +1,12 @@ import torch import torch.nn as nn +from mmcv.runner import BaseModule from ..builder import AGGREGATORS @AGGREGATORS.register_module() -class SelsaAggregator(nn.Module): +class SelsaAggregator(BaseModule): """Selsa aggregator module. This module is proposed in "Sequence Level Semantics Aggregation for Video @@ -18,8 +19,8 @@ class SelsaAggregator(nn.Module): selsa aggregator module. Defaults to 16. """ - def __init__(self, in_channels, num_attention_blocks=16): - super(SelsaAggregator, self).__init__() + def __init__(self, in_channels, num_attention_blocks=16, init_cfg=None): + super(SelsaAggregator, self).__init__(init_cfg) self.fc_embed = nn.Linear(in_channels, in_channels) self.ref_fc_embed = nn.Linear(in_channels, in_channels) self.fc = nn.Linear(in_channels, in_channels) diff --git a/mmtrack/models/motion/flownet_simple.py b/mmtrack/models/motion/flownet_simple.py index 23a35101e..59e1f64f7 100644 --- a/mmtrack/models/motion/flownet_simple.py +++ b/mmtrack/models/motion/flownet_simple.py @@ -1,12 +1,13 @@ import torch import torch.nn as nn from mmcv.cnn.bricks import ConvModule +from mmcv.runner import BaseModule from ..builder import MOTION @MOTION.register_module() -class FlowNetSimple(nn.Module): +class FlowNetSimple(BaseModule): """The simple version of FlowNet. This FlowNetSimple is the implementation of `FlowNetSimple @@ -40,8 +41,9 @@ def __init__(self, out_indices=[2, 3, 4, 5, 6], flow_scale_factor=5.0, flow_img_norm_std=[255.0, 255.0, 255.0], - flow_img_norm_mean=[0.411, 0.432, 0.450]): - super(FlowNetSimple, self).__init__() + flow_img_norm_mean=[0.411, 0.432, 0.450], + init_cfg=None): + super(FlowNetSimple, self).__init__(init_cfg) self.img_scale_factor = img_scale_factor self.out_indices = out_indices self.flow_scale_factor = flow_scale_factor @@ -141,11 +143,6 @@ def __init__(self, conv_cfg=dict(type='Conv'), act_cfg=None) - def init_weights(self): - """Initialize the weight FlowNetSimple.""" - # using the default initialization in ConvModule. - pass - def prepare_imgs(self, imgs, img_metas): """Preprocess images pairs for computing flow. diff --git a/mmtrack/models/vid/base.py b/mmtrack/models/vid/base.py index a279467f2..05635719d 100644 --- a/mmtrack/models/vid/base.py +++ b/mmtrack/models/vid/base.py @@ -5,38 +5,19 @@ import numpy as np import torch import torch.distributed as dist -import torch.nn as nn -from mmcv.runner import auto_fp16, load_checkpoint -from mmcv.utils import print_log +from mmcv.runner import BaseModule, auto_fp16 from mmtrack.utils import get_root_logger -class BaseVideoDetector(nn.Module, metaclass=ABCMeta): +class BaseVideoDetector(BaseModule, metaclass=ABCMeta): """Base class for video object detector.""" - def __init__(self): - super(BaseVideoDetector, self).__init__() + def __init__(self, init_cfg): + super(BaseVideoDetector, self).__init__(init_cfg) self.logger = get_root_logger() self.fp16_enabled = False - def init_module(self, module, pretrain=None): - """Initialize the weights of modules in video detector. - - Args: - pretrained (str, optional): Path to pre-trained weights. - Defaults to None. - """ - if pretrain is not None: - print_log(f'load {module} from: {pretrain}', logger=self.logger) - load_checkpoint( - getattr(self, module), - pretrain, - strict=False, - logger=self.logger) - else: - getattr(self, module).init_weights() - def freeze_module(self, module): """Freeze module during training.""" if isinstance(module, str): diff --git a/mmtrack/models/vid/dff.py b/mmtrack/models/vid/dff.py index d47f83ded..e5c58d1e4 100644 --- a/mmtrack/models/vid/dff.py +++ b/mmtrack/models/vid/dff.py @@ -1,3 +1,5 @@ +import warnings + import torch from addict import Dict from mmdet.core import bbox2result @@ -20,33 +22,34 @@ def __init__(self, detector, motion, pretrains=None, + init_cfg=None, frozen_modules=None, train_cfg=None, test_cfg=None): - super(DFF, self).__init__() + super(DFF, self).__init__(init_cfg) + if isinstance(pretrains, dict): + warnings.warn('DeprecationWarning: pretrains is deprecated, ' + 'please use "init_cfg" instead') + motion_pretrain = pretrains.get('motion', None) + if motion_pretrain: + motion.init_cfg = dict( + type='Pretrained', checkpoint=motion_pretrain) + else: + motion.init_cfg = None + detector_pretrain = pretrains.get('detector', None) + if detector_pretrain: + detector.init_cfg = dict( + type='Pretrained', checkpoint=detector_pretrain) + else: + detector.init_cfg = None self.detector = build_detector(detector) self.motion = build_motion(motion) self.train_cfg = train_cfg self.test_cfg = test_cfg - self.init_weights(pretrains) if frozen_modules is not None: self.freeze_module(frozen_modules) - def init_weights(self, pretrain): - """Initialize the weights of modules in video object detector. - - Args: - pretrained (dict): Path to pre-trained weights. - """ - if pretrain is None: - pretrain = dict() - assert isinstance(pretrain, dict), '`pretrain` must be a dict.' - if self.with_detector and pretrain.get('detector', False): - self.init_module('detector', pretrain['detector']) - if self.with_motion: - self.init_module('motion', pretrain.get('motion', None)) - def forward_train(self, img, img_metas, diff --git a/mmtrack/models/vid/fgfa.py b/mmtrack/models/vid/fgfa.py index 06ad7d8df..f09c2a5a2 100644 --- a/mmtrack/models/vid/fgfa.py +++ b/mmtrack/models/vid/fgfa.py @@ -1,3 +1,5 @@ +import warnings + import torch from addict import Dict from mmdet.core import bbox2result @@ -21,34 +23,35 @@ def __init__(self, motion, aggregator, pretrains=None, + init_cfg=None, frozen_modules=None, train_cfg=None, test_cfg=None): - super(FGFA, self).__init__() + super(FGFA, self).__init__(init_cfg) + if isinstance(pretrains, dict): + warnings.warn('DeprecationWarning: pretrains is deprecated, ' + 'please use "init_cfg" instead') + motion_pretrain = pretrains.get('motion', None) + if motion_pretrain: + motion.init_cfg = dict( + type='Pretrained', checkpoint=motion_pretrain) + else: + motion.init_cfg = None + detector_pretrain = pretrains.get('detector', None) + if detector_pretrain: + detector.init_cfg = dict( + type='Pretrained', checkpoint=detector_pretrain) + else: + detector.init_cfg = None self.detector = build_detector(detector) self.motion = build_motion(motion) self.aggregator = build_aggregator(aggregator) self.train_cfg = train_cfg self.test_cfg = test_cfg - self.init_weights(pretrains) if frozen_modules is not None: self.freeze_module(frozen_modules) - def init_weights(self, pretrain): - """Initialize the weights of modules in video object detector. - - Args: - pretrained (dict): Path to pre-trained weights. - """ - if pretrain is None: - pretrain = dict() - assert isinstance(pretrain, dict), '`pretrain` must be a dict.' - if self.with_detector and pretrain.get('detector', False): - self.init_module('detector', pretrain['detector']) - if self.with_motion: - self.init_module('motion', pretrain.get('motion', None)) - def forward_train(self, img, img_metas, diff --git a/mmtrack/models/vid/selsa.py b/mmtrack/models/vid/selsa.py index 0771feeec..494aa60f9 100644 --- a/mmtrack/models/vid/selsa.py +++ b/mmtrack/models/vid/selsa.py @@ -1,3 +1,5 @@ +import warnings + import torch from addict import Dict from mmdet.models import build_detector @@ -17,34 +19,29 @@ class SELSA(BaseVideoDetector): def __init__(self, detector, pretrains=None, + init_cfg=None, frozen_modules=None, train_cfg=None, test_cfg=None): - super(SELSA, self).__init__() + super(SELSA, self).__init__(init_cfg) + if isinstance(pretrains, dict): + warnings.warn('DeprecationWarning: pretrains is deprecated, ' + 'please use "init_cfg" instead') + detector_pretrain = pretrains.get('detector', None) + if detector_pretrain: + detector.init_cfg = dict( + type='Pretrained', checkpoint=detector_pretrain) + else: + detector.init_cfg = None self.detector = build_detector(detector) assert hasattr(self.detector, 'roi_head'), \ 'selsa video detector only supports two stage detector' self.train_cfg = train_cfg self.test_cfg = test_cfg - self.init_weights(pretrains) if frozen_modules is not None: self.freeze_module(frozen_modules) - def init_weights(self, pretrain): - """Initialize the weights of modules in video object detector. - - Args: - pretrained (dict): Path to pre-trained weights. - """ - if pretrain is None: - pretrain = dict() - assert isinstance(pretrain, dict), '`pretrain` must be a dict.' - if self.with_detector and pretrain.get('detector', False): - self.init_module('detector', pretrain['detector']) - if self.with_motion: - self.init_module('motion', pretrain.get('motion', None)) - def forward_train(self, img, img_metas, diff --git a/tools/train.py b/tools/train.py index 607091fd4..fb3c22eac 100644 --- a/tools/train.py +++ b/tools/train.py @@ -66,13 +66,11 @@ def main(): cfg = Config.fromfile(args.config) - need_init_detector = False if cfg.get('USE_MMDET', False): from mmdet.apis import train_detector as train_model from mmdet.models import build_detector as build_model if 'detector' in cfg.model: cfg.model = cfg.model.detector - need_init_detector = True elif cfg.get('USE_MMCLS', False): from mmtrack.apis import train_model from mmtrack.models import build_reid as build_model @@ -146,12 +144,7 @@ def main(): cfg.model, train_cfg=cfg.train_cfg, test_cfg=cfg.test_cfg) else: model = build_model(cfg.model) - if 'detector' in cfg.model: - model.detector.init_weights() - # if True, the model denotes a detector based on Line #75. Therefore, we - # need model.init_weights() rather than model.detector.init_weights() - if need_init_detector: - model.init_weights() + model.init_weights() datasets = [build_dataset(cfg.data.train)] if len(cfg.workflow) == 2: From a575aacaf4f16d05306d7fe7f9244af8f58bf998 Mon Sep 17 00:00:00 2001 From: Tao Gong Date: Fri, 6 Aug 2021 17:12:52 +0800 Subject: [PATCH 02/10] unify model initialization for sot --- .../siamese_rpn/siamese_rpn_r50_1x_lasot.py | 11 ++++--- mmtrack/models/sot/base.py | 33 +++++-------------- mmtrack/models/sot/siamrpn.py | 33 +++++++++++-------- .../models/track_heads/siamese_rpn_head.py | 17 +++++++--- 4 files changed, 47 insertions(+), 47 deletions(-) diff --git a/configs/sot/siamese_rpn/siamese_rpn_r50_1x_lasot.py b/configs/sot/siamese_rpn/siamese_rpn_r50_1x_lasot.py index c37b893fd..45c8e2934 100644 --- a/configs/sot/siamese_rpn/siamese_rpn_r50_1x_lasot.py +++ b/configs/sot/siamese_rpn/siamese_rpn_r50_1x_lasot.py @@ -6,10 +6,6 @@ # model settings model = dict( type='SiamRPN', - pretrains=dict( - backbone= # noqa: E251 - 'https://download.openmmlab.com/mmtracking/pretrained_weights/sot_resnet50.model' # noqa: E501 - ), backbone=dict( type='SOTResNet', depth=50, @@ -17,7 +13,12 @@ frozen_stages=4, strides=(1, 2, 1, 1), dilations=(1, 1, 2, 4), - norm_eval=True), + norm_eval=True, + init_cfg=dict( + type='Pretrained', + checkpoint= # noqa: E251 + 'https://download.openmmlab.com/mmtracking/pretrained_weights/sot_resnet50.model' # noqa: E501 + )), neck=dict( type='ChannelMapper', in_channels=[512, 1024, 2048], diff --git a/mmtrack/models/sot/base.py b/mmtrack/models/sot/base.py index cf194c893..610e46214 100644 --- a/mmtrack/models/sot/base.py +++ b/mmtrack/models/sot/base.py @@ -5,38 +5,23 @@ import numpy as np import torch import torch.distributed as dist -import torch.nn as nn -from mmcv.runner import auto_fp16, load_checkpoint -from mmcv.utils import print_log +from mmcv.runner import BaseModule, auto_fp16 from mmtrack.utils import get_root_logger -class BaseSingleObjectTracker(nn.Module, metaclass=ABCMeta): - """Base class for single object tracker.""" +class BaseSingleObjectTracker(BaseModule, metaclass=ABCMeta): + """Base class for single object tracker. - def __init__(self): - super(BaseSingleObjectTracker, self).__init__() + Args: + init_cfg (dict or list[dict]): Initialization config dict. + """ + + def __init__(self, init_cfg): + super(BaseSingleObjectTracker, self).__init__(init_cfg) self.logger = get_root_logger() self.fp16_enabled = False - def init_module(self, module, pretrain=None): - """Initialize the weights in video detector. - - Args: - pretrained (str, optional): Path to pre-trained weights. - Defaults to None. - """ - if pretrain is not None: - print_log(f'load {module} from: {pretrain}', logger=self.logger) - load_checkpoint( - getattr(self, module), - pretrain, - strict=False, - logger=self.logger) - else: - getattr(self, module).init_weights() - def freeze_module(self, module): """Freeze module during training.""" if isinstance(module, str): diff --git a/mmtrack/models/sot/siamrpn.py b/mmtrack/models/sot/siamrpn.py index 60a7f9052..04c5ee8e1 100644 --- a/mmtrack/models/sot/siamrpn.py +++ b/mmtrack/models/sot/siamrpn.py @@ -1,3 +1,5 @@ +import warnings + import numpy as np import torch from addict import Dict @@ -20,13 +22,23 @@ class SiamRPN(BaseSingleObjectTracker): def __init__(self, pretrains=None, + init_cfg=None, backbone=None, neck=None, head=None, frozen_modules=None, train_cfg=None, test_cfg=None): - super(SiamRPN, self).__init__() + super(SiamRPN, self).__init__(init_cfg) + if isinstance(pretrains, dict): + warnings.warn('DeprecationWarning: pretrains is deprecated, ' + 'please use "init_cfg" instead') + backbone_pretrain = pretrains.get('backbone', None) + if backbone_pretrain: + backbone.init_cfg = dict( + type='Pretrained', checkpoint=backbone_pretrain) + else: + backbone.init_cfg = None self.backbone = build_backbone(backbone) if neck is not None: self.neck = build_neck(neck) @@ -37,21 +49,16 @@ def __init__(self, self.test_cfg = test_cfg self.train_cfg = train_cfg - self.init_weights(pretrains) if frozen_modules is not None: self.freeze_module(frozen_modules) - def init_weights(self, pretrain): - """Initialize the weights of modules in single object tracker. - - Args: - pretrained (dict): Path to pre-trained weights. - """ - if pretrain is None: - pretrain = dict() - assert isinstance(pretrain, dict), '`pretrain` must be a dict.' - if self.with_backbone and pretrain.get('backbone', False): - self.init_module('backbone', pretrain['backbone']) + def init_weights(self): + """Initialize the weights of modules in single object tracker.""" + # We don't use the `init_weights()` function in BaseModule, since it + # doesn't support the initialization method from `reset_parameters()` + # in Pytorch. + if self.with_backbone: + self.backbone.init_weights() if self.with_neck: for m in self.neck.modules(): diff --git a/mmtrack/models/track_heads/siamese_rpn_head.py b/mmtrack/models/track_heads/siamese_rpn_head.py index 1380edf29..946551877 100644 --- a/mmtrack/models/track_heads/siamese_rpn_head.py +++ b/mmtrack/models/track_heads/siamese_rpn_head.py @@ -1,7 +1,7 @@ import torch import torch.nn as nn from mmcv.cnn.bricks import ConvModule -from mmcv.runner import auto_fp16, force_fp32 +from mmcv.runner import BaseModule, auto_fp16, force_fp32 from mmdet.core import build_assigner, build_bbox_coder, build_sampler from mmdet.core.anchor import build_anchor_generator from mmdet.core.bbox.transforms import bbox_cxcywh_to_xyxy, bbox_xyxy_to_cxcywh @@ -11,7 +11,7 @@ @HEADS.register_module() -class CorrelationHead(nn.Module): +class CorrelationHead(BaseModule): """Correlation head module. This module is proposed in @@ -27,6 +27,8 @@ class CorrelationHead(nn.Module): Defaults to dict(type='BN'). act_cfg (dict): Configuration of activation method after each conv. Defaults to dict(type='ReLU'). + init_cfg (dict or list[dict], optional): Initialization config dict. + Defaults to None. """ def __init__(self, @@ -36,8 +38,9 @@ def __init__(self, kernel_size=3, norm_cfg=dict(type='BN'), act_cfg=dict(type='ReLU'), + init_cfg=None, **kwargs): - super(CorrelationHead, self).__init__() + super(CorrelationHead, self).__init__(init_cfg) self.kernel_convs = ConvModule( in_channels=in_channels, out_channels=mid_channels, @@ -74,7 +77,7 @@ def forward(self, kernel, search): @HEADS.register_module() -class SiameseRPNHead(nn.Module): +class SiameseRPNHead(BaseModule): """Siamese RPN head. This module is proposed in @@ -109,6 +112,9 @@ class SiameseRPNHead(nn.Module): train_cfg (Dict): Training setting. Defaults to None. test_cfg (Dict): Testing setting. Defaults to None. + + init_cfg (dict or list[dict], optional): Initialization config dict. + Defaults to None. """ def __init__(self, @@ -128,9 +134,10 @@ def __init__(self, type='L1Loss', reduction='sum', loss_weight=1.2), train_cfg=None, test_cfg=None, + init_cfg=None, *args, **kwargs): - super(SiameseRPNHead, self).__init__(*args, **kwargs) + super(SiameseRPNHead, self).__init__(init_cfg) self.anchor_generator = build_anchor_generator(anchor_generator) self.bbox_coder = build_bbox_coder(bbox_coder) self.train_cfg = train_cfg From c9fc34972c287da0ab08f4123d465241d69faead Mon Sep 17 00:00:00 2001 From: Tao Gong Date: Fri, 6 Aug 2021 17:17:29 +0800 Subject: [PATCH 03/10] modify docstring when unifying model initialization for vid --- mmtrack/models/aggregators/embed_aggregator.py | 2 ++ mmtrack/models/aggregators/selsa_aggregator.py | 2 ++ mmtrack/models/motion/flownet_simple.py | 2 ++ mmtrack/models/vid/base.py | 6 +++++- 4 files changed, 11 insertions(+), 1 deletion(-) diff --git a/mmtrack/models/aggregators/embed_aggregator.py b/mmtrack/models/aggregators/embed_aggregator.py index 778d168ed..2531af1a0 100644 --- a/mmtrack/models/aggregators/embed_aggregator.py +++ b/mmtrack/models/aggregators/embed_aggregator.py @@ -21,6 +21,8 @@ class EmbedAggregator(BaseModule): conv. Defaults to None. act_cfg (dict): Configuration of activation method after each conv. Defaults to dict(type='ReLU'). + init_cfg (dict or list[dict], optional): Initialization config dict. + Defaults to None. """ def __init__(self, diff --git a/mmtrack/models/aggregators/selsa_aggregator.py b/mmtrack/models/aggregators/selsa_aggregator.py index 82e6be1cc..28357b351 100644 --- a/mmtrack/models/aggregators/selsa_aggregator.py +++ b/mmtrack/models/aggregators/selsa_aggregator.py @@ -17,6 +17,8 @@ class SelsaAggregator(BaseModule): proposal. num_attention_blocks (int): The number of attention blocks used in selsa aggregator module. Defaults to 16. + init_cfg (dict or list[dict], optional): Initialization config dict. + Defaults to None. """ def __init__(self, in_channels, num_attention_blocks=16, init_cfg=None): diff --git a/mmtrack/models/motion/flownet_simple.py b/mmtrack/models/motion/flownet_simple.py index 59e1f64f7..ff7b2334e 100644 --- a/mmtrack/models/motion/flownet_simple.py +++ b/mmtrack/models/motion/flownet_simple.py @@ -23,6 +23,8 @@ class FlowNetSimple(BaseModule): Defaults to [255.0, 255.0, 255.0]. flow_img_norm_mean (list): Used to center the values of image. Defaults to [0.411, 0.432, 0.450]. + init_cfg (dict or list[dict], optional): Initialization config dict. + Defaults to None. """ arch_setting = { diff --git a/mmtrack/models/vid/base.py b/mmtrack/models/vid/base.py index 05635719d..0408704b5 100644 --- a/mmtrack/models/vid/base.py +++ b/mmtrack/models/vid/base.py @@ -11,7 +11,11 @@ class BaseVideoDetector(BaseModule, metaclass=ABCMeta): - """Base class for video object detector.""" + """Base class for video object detector. + + Args: + init_cfg (dict or list[dict], optional): Initialization config dict. + """ def __init__(self, init_cfg): super(BaseVideoDetector, self).__init__(init_cfg) From ab8366725c5d7fb623e9c7d73a065bdd76672f1f Mon Sep 17 00:00:00 2001 From: Tao Gong Date: Sun, 8 Aug 2021 20:21:39 +0800 Subject: [PATCH 04/10] unify model initialization for mot_det training --- configs/_base_/models/faster_rcnn_r50_fpn.py | 5 +++-- configs/det/faster-rcnn_r101_fpn_4e_mot17-half.py | 13 +++++++++---- configs/det/faster-rcnn_r50_fpn_4e_mot17-half.py | 11 ++++++----- 3 files changed, 18 insertions(+), 11 deletions(-) diff --git a/configs/_base_/models/faster_rcnn_r50_fpn.py b/configs/_base_/models/faster_rcnn_r50_fpn.py index 688dd0f6e..98310030b 100644 --- a/configs/_base_/models/faster_rcnn_r50_fpn.py +++ b/configs/_base_/models/faster_rcnn_r50_fpn.py @@ -1,7 +1,6 @@ model = dict( detector=dict( type='FasterRCNN', - pretrained='torchvision://resnet50', backbone=dict( type='ResNet', depth=50, @@ -10,7 +9,9 @@ frozen_stages=1, norm_cfg=dict(type='BN', requires_grad=True), norm_eval=True, - style='pytorch'), + style='pytorch', + init_cfg=dict( + type='Pretrained', checkpoint='torchvision://resnet50')), neck=dict( type='FPN', in_channels=[256, 512, 1024, 2048], diff --git a/configs/det/faster-rcnn_r101_fpn_4e_mot17-half.py b/configs/det/faster-rcnn_r101_fpn_4e_mot17-half.py index 64c60304b..ab3de8801 100644 --- a/configs/det/faster-rcnn_r101_fpn_4e_mot17-half.py +++ b/configs/det/faster-rcnn_r101_fpn_4e_mot17-half.py @@ -2,7 +2,12 @@ _base_ = ['./faster-rcnn_r50_fpn_4e_mot17-half.py'] model = dict( detector=dict( - pretrained='torchvision://resnet101', backbone=dict(depth=101))) -load_from = ('http://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/' - 'faster_rcnn_r101_fpn_2x_coco/faster_rcnn_r101_fpn_2x_coco_bbox_' - 'mAP-0.398_20200504_210455-1d2dac9c.pth') + backbone=dict( + depth=101, + init_cfg=dict( + type='Pretrained', checkpoint='torchvision://resnet101')), + init_cfg=dict( + type='Pretrained', + checkpoint= # noqa: E251 + 'http://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r101_fpn_2x_coco/faster_rcnn_r101_fpn_2x_coco_bbox_mAP-0.398_20200504_210455-1d2dac9c.pth' # noqa: E501 + ))) diff --git a/configs/det/faster-rcnn_r50_fpn_4e_mot17-half.py b/configs/det/faster-rcnn_r50_fpn_4e_mot17-half.py index 644ee28e2..a89479096 100644 --- a/configs/det/faster-rcnn_r50_fpn_4e_mot17-half.py +++ b/configs/det/faster-rcnn_r50_fpn_4e_mot17-half.py @@ -7,8 +7,12 @@ detector=dict( rpn_head=dict(bbox_coder=dict(clip_border=False)), roi_head=dict( - bbox_head=dict(bbox_coder=dict( - clip_border=False), num_classes=1)))) + bbox_head=dict(bbox_coder=dict(clip_border=False), num_classes=1)), + init_cfg=dict( + type='Pretrained', + checkpoint= # noqa: E251 + 'http://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r50_fpn_2x_coco/faster_rcnn_r50_fpn_2x_coco_bbox_mAP-0.384_20200504_210434-a5d8aa15.pth' # noqa: E501 + ))) # learning policy lr_config = dict( policy='step', @@ -18,6 +22,3 @@ step=[3]) # runtime settings total_epochs = 4 -load_from = ('http://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/' - 'faster_rcnn_r50_fpn_2x_coco/faster_rcnn_r50_fpn_2x_coco_' - 'bbox_mAP-0.384_20200504_210434-a5d8aa15.pth') From f2dc9650e6a3fda48904dc07b249f4a08ea11de9 Mon Sep 17 00:00:00 2001 From: Tao Gong Date: Sun, 8 Aug 2021 21:42:50 +0800 Subject: [PATCH 05/10] unify model initialization for mot_reid training --- configs/reid/resnet50_b32x8_MOT17.py | 10 ++++++---- mmtrack/models/reid/base_reid.py | 3 --- mmtrack/models/reid/fc_module.py | 22 ++++++++-------------- mmtrack/models/reid/linear_reid_head.py | 21 +++++++++++---------- requirements/runtime.txt | 2 +- 5 files changed, 26 insertions(+), 32 deletions(-) diff --git a/configs/reid/resnet50_b32x8_MOT17.py b/configs/reid/resnet50_b32x8_MOT17.py index 8b25419d3..5788a1286 100644 --- a/configs/reid/resnet50_b32x8_MOT17.py +++ b/configs/reid/resnet50_b32x8_MOT17.py @@ -23,7 +23,12 @@ loss_pairwise=dict( type='TripletLoss', margin=0.3, loss_weight=1.0), norm_cfg=dict(type='BN1d'), - act_cfg=dict(type='ReLU')))) + act_cfg=dict(type='ReLU')), + init_cfg=dict( + type='Pretrained', + checkpoint= # noqa: E251 + 'https://download.openmmlab.com/mmclassification/v0/resnet/resnet50_batch256_imagenet_20200708-cfb998bf.pth' # noqa: E501 + ))) # optimizer optimizer = dict(type='SGD', lr=0.1, momentum=0.9, weight_decay=0.0001) optimizer_config = dict(grad_clip=None) @@ -35,6 +40,3 @@ warmup_ratio=1.0 / 1000, step=[5]) total_epochs = 6 - -load_from = 'https://download.openmmlab.com/mmclassification/v0/resnet/' \ - 'resnet50_batch256_imagenet_20200708-cfb998bf.pth' diff --git a/mmtrack/models/reid/base_reid.py b/mmtrack/models/reid/base_reid.py index 8798770b0..161fa3e9c 100644 --- a/mmtrack/models/reid/base_reid.py +++ b/mmtrack/models/reid/base_reid.py @@ -8,9 +8,6 @@ class BaseReID(ImageClassifier): """Base class for re-identification.""" - def __init__(self, *args, **kwargs): - super().__init__(*args, **kwargs) - def forward_train(self, img, gt_label, **kwargs): """"Training forward function.""" if img.ndim == 5: diff --git a/mmtrack/models/reid/fc_module.py b/mmtrack/models/reid/fc_module.py index 3701aa0fb..f30f97bd0 100644 --- a/mmtrack/models/reid/fc_module.py +++ b/mmtrack/models/reid/fc_module.py @@ -1,9 +1,9 @@ import torch.nn as nn -from mmcv.cnn import (build_activation_layer, build_norm_layer, constant_init, - kaiming_init) +from mmcv.cnn import build_activation_layer, build_norm_layer +from mmcv.runner import BaseModule -class FcModule(nn.Module): +class FcModule(BaseModule): """Fully-connected layer module. Args: @@ -14,6 +14,8 @@ class FcModule(nn.Module): act_cfg (dict, optional): Configuration of activation method after fc. Defaults to dict(type='ReLU'). inplace (bool, optional): Whether inplace the activatation module. + init_cfg (dict or list[dict], optional): Initialization config dict. + Defaults to dict(type='Kaiming', layer='fc'). """ def __init__(self, @@ -21,8 +23,9 @@ def __init__(self, out_channels, norm_cfg=None, act_cfg=dict(type='ReLU'), - inplace=True): - super(FcModule, self).__init__() + inplace=True, + init_cfg=dict(type='Kaiming', layer='fc')): + super(FcModule, self).__init__(init_cfg) assert norm_cfg is None or isinstance(norm_cfg, dict) assert act_cfg is None or isinstance(act_cfg, dict) self.norm_cfg = norm_cfg @@ -48,20 +51,11 @@ def __init__(self, act_cfg_.setdefault('inplace', inplace) self.activate = build_activation_layer(act_cfg_) - # Use msra init by default - self.init_weights() - @property def norm(self): """Normalization.""" return getattr(self, self.norm_name) - def init_weights(self): - """Initialize weights.""" - kaiming_init(self.fc) - if self.with_norm: - constant_init(self.norm, 1, bias=0) - def forward(self, x, activate=True, norm=True): """Model forward.""" x = self.fc(x) diff --git a/mmtrack/models/reid/linear_reid_head.py b/mmtrack/models/reid/linear_reid_head.py index 5aae09d77..61d4458fc 100644 --- a/mmtrack/models/reid/linear_reid_head.py +++ b/mmtrack/models/reid/linear_reid_head.py @@ -4,7 +4,6 @@ from mmcls.models.builder import HEADS from mmcls.models.heads.base_head import BaseHead from mmcls.models.losses import Accuracy -from mmcv.cnn import constant_init, normal_init from mmcv.runner import auto_fp16, force_fp32 from mmdet.models.builder import build_loss @@ -30,6 +29,9 @@ class LinearReIDHead(BaseHead): loss_pairwise (dict, optional): Triplet loss to train the re-identificaiton module. topk (int, optional): Calculate topk accuracy. Default to False. + init_cfg (dict or list[dict], optional): Initialization config dict. + Defaults to dict(type='Normal',layer=['fc_out', 'classifier'], + mean=0, std=0.01, bias=0). """ def __init__(self, @@ -42,8 +44,14 @@ def __init__(self, num_classes=None, loss=None, loss_pairwise=None, - topk=(1, )): - super(LinearReIDHead, self).__init__() + topk=(1, ), + init_cfg=dict( + type='Normal', + layer=['fc_out', 'classifier'], + mean=0, + std=0.01, + bias=0)): + super(LinearReIDHead, self).__init__(init_cfg) assert isinstance(topk, (int, tuple)) if isinstance(topk, int): topk = (topk, ) @@ -92,13 +100,6 @@ def _init_layers(self): self.bn = nn.BatchNorm1d(self.out_channels) self.classifier = nn.Linear(self.out_channels, self.num_classes) - def init_weights(self): - """Initalize model weights.""" - normal_init(self.fc_out, mean=0, std=0.01, bias=0) - if self.loss_cls: - constant_init(self.bn, 1, bias=0) - normal_init(self.classifier, mean=0, std=0.01, bias=0) - @auto_fp16() def forward_train(self, x): """Model forward.""" diff --git a/requirements/runtime.txt b/requirements/runtime.txt index b29771695..73aae33f7 100644 --- a/requirements/runtime.txt +++ b/requirements/runtime.txt @@ -1,6 +1,6 @@ dotty_dict matplotlib -mmcls==0.12.0 +mmcls==0.14.0 mmpycocotools motmetrics numpy From 1907e93ec39e3db93fcf2b5f6927bbdf641e0ce3 Mon Sep 17 00:00:00 2001 From: Tao Gong Date: Sun, 8 Aug 2021 22:59:12 +0800 Subject: [PATCH 06/10] unify model initialization for mot testing --- ...t_faster-rcnn_fpn_4e_mot17-private-half.py | 21 ++++++---- ...t_faster-rcnn_fpn_4e_mot17-private-half.py | 12 +++--- .../sort_faster-rcnn_fpn_4e_mot17-private.py | 10 +++-- .../sort_faster-rcnn_fpn_4e_mot17-public.py | 10 +++-- ...ster-rcnn_r50_fpn_4e_mot15-private-half.py | 21 ++++++---- ...aster-rcnn_r50_fpn_4e_mot15-public-half.py | 21 ++++++---- ...ster-rcnn_r50_fpn_4e_mot16-private-half.py | 21 ++++++---- ...aster-rcnn_r50_fpn_4e_mot16-public-half.py | 21 ++++++---- ...ster-rcnn_r50_fpn_4e_mot17-private-half.py | 22 ++++++---- ...or_faster-rcnn_r50_fpn_4e_mot17-private.py | 11 +++-- ...tor_faster-rcnn_r50_fpn_4e_mot17-public.py | 11 +++-- ...ster-rcnn_r50_fpn_8e_mot20-private-half.py | 22 ++++++---- ...aster-rcnn_r50_fpn_8e_mot20-public-half.py | 22 ++++++---- ...tor_faster-rcnn_r50_fpn_8e_mot20-public.py | 22 ++++++---- mmtrack/apis/inference.py | 2 + mmtrack/models/mot/base.py | 29 ++----------- mmtrack/models/mot/deep_sort.py | 41 +++++++++++-------- mmtrack/models/mot/tracktor.py | 40 ++++++++++-------- tools/test.py | 2 + 19 files changed, 208 insertions(+), 153 deletions(-) diff --git a/configs/mot/deepsort/deepsort_faster-rcnn_fpn_4e_mot17-private-half.py b/configs/mot/deepsort/deepsort_faster-rcnn_fpn_4e_mot17-private-half.py index 4d858e2fa..0a7c42894 100644 --- a/configs/mot/deepsort/deepsort_faster-rcnn_fpn_4e_mot17-private-half.py +++ b/configs/mot/deepsort/deepsort_faster-rcnn_fpn_4e_mot17-private-half.py @@ -4,17 +4,15 @@ ] model = dict( type='DeepSORT', - pretrains=dict( - detector= # noqa: E251 - 'https://download.openmmlab.com/mmtracking/mot/faster_rcnn/faster-rcnn_r50_fpn_4e_mot17-half-64ee2ed4.pth', # noqa: E501 - reid= # noqa: E251 - 'https://download.openmmlab.com/mmtracking/mot/reid/tracktor_reid_r50_iter25245-a452f51f.pth' # noqa: E501 - ), detector=dict( rpn_head=dict(bbox_coder=dict(clip_border=False)), roi_head=dict( - bbox_head=dict(bbox_coder=dict( - clip_border=False), num_classes=1))), + bbox_head=dict(bbox_coder=dict(clip_border=False), num_classes=1)), + init_cfg=dict( + type='Pretrained', + checkpoint= # noqa: E251 + 'https://download.openmmlab.com/mmtracking/mot/faster_rcnn/faster-rcnn_r50_fpn_4e_mot17-half-64ee2ed4.pth' # noqa: E501 + )), motion=dict(type='KalmanFilter', center_only=False), reid=dict( type='BaseReID', @@ -36,7 +34,12 @@ loss_pairwise=dict( type='TripletLoss', margin=0.3, loss_weight=1.0), norm_cfg=dict(type='BN1d'), - act_cfg=dict(type='ReLU'))), + act_cfg=dict(type='ReLU')), + init_cfg=dict( + type='Pretrained', + checkpoint= # noqa: E251 + 'https://download.openmmlab.com/mmtracking/mot/reid/tracktor_reid_r50_iter25245-a452f51f.pth' # noqa: E501 + )), tracker=dict( type='SortTracker', obj_score_thr=0.5, diff --git a/configs/mot/deepsort/sort_faster-rcnn_fpn_4e_mot17-private-half.py b/configs/mot/deepsort/sort_faster-rcnn_fpn_4e_mot17-private-half.py index e48726149..7573565eb 100644 --- a/configs/mot/deepsort/sort_faster-rcnn_fpn_4e_mot17-private-half.py +++ b/configs/mot/deepsort/sort_faster-rcnn_fpn_4e_mot17-private-half.py @@ -4,15 +4,15 @@ ] model = dict( type='DeepSORT', - pretrains=dict( - detector= # noqa: E251 - 'https://download.openmmlab.com/mmtracking/mot/faster_rcnn/faster-rcnn_r50_fpn_4e_mot17-half-64ee2ed4.pth' # noqa: E501 - ), detector=dict( rpn_head=dict(bbox_coder=dict(clip_border=False)), roi_head=dict( - bbox_head=dict(bbox_coder=dict( - clip_border=False), num_classes=1))), + bbox_head=dict(bbox_coder=dict(clip_border=False), num_classes=1)), + init_cfg=dict( + type='Pretrained', + checkpoint= # noqa: E251 + 'https://download.openmmlab.com/mmtracking/mot/faster_rcnn/faster-rcnn_r50_fpn_4e_mot17-half-64ee2ed4.pth' # noqa: E501 + )), motion=dict(type='KalmanFilter', center_only=False), tracker=dict( type='SortTracker', obj_score_thr=0.5, match_iou_thr=0.5, reid=None)) diff --git a/configs/mot/deepsort/sort_faster-rcnn_fpn_4e_mot17-private.py b/configs/mot/deepsort/sort_faster-rcnn_fpn_4e_mot17-private.py index 6d316bcd8..30ecde712 100644 --- a/configs/mot/deepsort/sort_faster-rcnn_fpn_4e_mot17-private.py +++ b/configs/mot/deepsort/sort_faster-rcnn_fpn_4e_mot17-private.py @@ -1,9 +1,11 @@ _base_ = ['./sort_faster-rcnn_fpn_4e_mot17-private-half.py'] model = dict( - pretrains=dict( - detector= # noqa: E251 - 'https://download.openmmlab.com/mmtracking/mot/faster_rcnn/faster-rcnn_r50_fpn_4e_mot17-ffa52ae7.pth' # noqa: E501 - )) + detector=dict( + init_cfg=dict( + type='Pretrained', + checkpoint= # noqa: E251 + 'https://download.openmmlab.com/mmtracking/mot/faster_rcnn/faster-rcnn_r50_fpn_4e_mot17-ffa52ae7.pth' # noqa: E501 + ))) data_root = 'data/MOT17/' test_set = 'train' data = dict( diff --git a/configs/mot/deepsort/sort_faster-rcnn_fpn_4e_mot17-public.py b/configs/mot/deepsort/sort_faster-rcnn_fpn_4e_mot17-public.py index fb015ba1d..c856664dd 100644 --- a/configs/mot/deepsort/sort_faster-rcnn_fpn_4e_mot17-public.py +++ b/configs/mot/deepsort/sort_faster-rcnn_fpn_4e_mot17-public.py @@ -1,9 +1,11 @@ _base_ = ['./sort_faster-rcnn_fpn_4e_mot17-public-half.py'] model = dict( - pretrains=dict( - detector= # noqa: E251 - 'https://download.openmmlab.com/mmtracking/mot/faster_rcnn/faster-rcnn_r50_fpn_4e_mot17-ffa52ae7.pth' # noqa: E501 - )) + detector=dict( + init_cfg=dict( + type='Pretrained', + checkpoint= # noqa: E251 + 'https://download.openmmlab.com/mmtracking/mot/faster_rcnn/faster-rcnn_r50_fpn_4e_mot17-ffa52ae7.pth' # noqa: E501 + ))) data_root = 'data/MOT17/' test_set = 'train' data = dict( diff --git a/configs/mot/tracktor/tracktor_faster-rcnn_r50_fpn_4e_mot15-private-half.py b/configs/mot/tracktor/tracktor_faster-rcnn_r50_fpn_4e_mot15-private-half.py index 1b9cf5279..47deab62b 100644 --- a/configs/mot/tracktor/tracktor_faster-rcnn_r50_fpn_4e_mot15-private-half.py +++ b/configs/mot/tracktor/tracktor_faster-rcnn_r50_fpn_4e_mot15-private-half.py @@ -1,12 +1,19 @@ _base_ = ['./tracktor_faster-rcnn_r50_fpn_4e_mot17-private-half.py'] + model = dict( - pretrains=dict( - detector= # noqa: E251 - 'https://download.openmmlab.com/mmtracking/mot/faster_rcnn/faster-rcnn_r50_fpn_4e_mot15-half_20210804_001040-ae733d0c.pth', # noqa: E501 - reid= # noqa: E251 - 'https://download.openmmlab.com/mmtracking/mot/reid/reid_r50_6e_mot15_20210803_192157-65b5e2d7.pth' # noqa: E501 - ), - reid=dict(head=dict(num_classes=375))) + detector=dict( + init_cfg=dict( + type='Pretrained', + checkpoint= # noqa: E251 + 'https://download.openmmlab.com/mmtracking/mot/faster_rcnn/faster-rcnn_r50_fpn_4e_mot15-half_20210804_001040-ae733d0c.pth' # noqa: E501 + )), + reid=dict( + head=dict(num_classes=375), + init_cfg=dict( + type='Pretrained', + checkpoint= # noqa: E251 + 'https://download.openmmlab.com/mmtracking/mot/reid/reid_r50_6e_mot15_20210803_192157-65b5e2d7.pth' # noqa: E501 + ))) # data data_root = 'data/MOT15/' data = dict( diff --git a/configs/mot/tracktor/tracktor_faster-rcnn_r50_fpn_4e_mot15-public-half.py b/configs/mot/tracktor/tracktor_faster-rcnn_r50_fpn_4e_mot15-public-half.py index 8b63bd8e0..87d65e2ff 100644 --- a/configs/mot/tracktor/tracktor_faster-rcnn_r50_fpn_4e_mot15-public-half.py +++ b/configs/mot/tracktor/tracktor_faster-rcnn_r50_fpn_4e_mot15-public-half.py @@ -1,12 +1,19 @@ _base_ = ['./tracktor_faster-rcnn_r50_fpn_4e_mot17-private-half.py'] + model = dict( - pretrains=dict( - detector= # noqa: E251 - 'https://download.openmmlab.com/mmtracking/mot/faster_rcnn/faster-rcnn_r50_fpn_4e_mot15-half_20210804_001040-ae733d0c.pth', # noqa: E501 - reid= # noqa: E251 - 'https://download.openmmlab.com/mmtracking/mot/reid/reid_r50_6e_mot15_20210803_192157-65b5e2d7.pth' # noqa: E501 - ), - reid=dict(head=dict(num_classes=375))) + detector=dict( + init_cfg=dict( + type='Pretrained', + checkpoint= # noqa: E251 + 'https://download.openmmlab.com/mmtracking/mot/faster_rcnn/faster-rcnn_r50_fpn_4e_mot15-half_20210804_001040-ae733d0c.pth' # noqa: E501 + )), + reid=dict( + head=dict(num_classes=375), + init_cfg=dict( + type='Pretrained', + checkpoint= # noqa: E251 + 'https://download.openmmlab.com/mmtracking/mot/reid/reid_r50_6e_mot15_20210803_192157-65b5e2d7.pth' # noqa: E501 + ))) # data data_root = 'data/MOT15/' img_norm_cfg = dict( diff --git a/configs/mot/tracktor/tracktor_faster-rcnn_r50_fpn_4e_mot16-private-half.py b/configs/mot/tracktor/tracktor_faster-rcnn_r50_fpn_4e_mot16-private-half.py index f312320ca..3cdff65e3 100644 --- a/configs/mot/tracktor/tracktor_faster-rcnn_r50_fpn_4e_mot16-private-half.py +++ b/configs/mot/tracktor/tracktor_faster-rcnn_r50_fpn_4e_mot16-private-half.py @@ -1,12 +1,19 @@ _base_ = ['./tracktor_faster-rcnn_r50_fpn_4e_mot17-private-half.py'] + model = dict( - pretrains=dict( - detector= # noqa: E251 - 'https://download.openmmlab.com/mmtracking/mot/faster_rcnn/faster-rcnn_r50_fpn_4e_mot16-half_20210804_001054-73477869.pth', # noqa: E501 - reid= # noqa: E251 - 'https://download.openmmlab.com/mmtracking/mot/reid/reid_r50_6e_mot16_20210803_204826-1b3e3cfd.pth' # noqa: E501 - ), - reid=dict(head=dict(num_classes=375))) + detector=dict( + init_cfg=dict( + type='Pretrained', + checkpoint= # noqa: E251 + 'https://download.openmmlab.com/mmtracking/mot/faster_rcnn/faster-rcnn_r50_fpn_4e_mot16-half_20210804_001054-73477869.pth' # noqa: E501 + )), + reid=dict( + head=dict(num_classes=375), + init_cfg=dict( + type='Pretrained', + checkpoint= # noqa: E251 + 'https://download.openmmlab.com/mmtracking/mot/reid/reid_r50_6e_mot16_20210803_204826-1b3e3cfd.pth' # noqa: E501 + ))) # data data_root = 'data/MOT16/' data = dict( diff --git a/configs/mot/tracktor/tracktor_faster-rcnn_r50_fpn_4e_mot16-public-half.py b/configs/mot/tracktor/tracktor_faster-rcnn_r50_fpn_4e_mot16-public-half.py index 2fbe7b59b..0de3c1aef 100644 --- a/configs/mot/tracktor/tracktor_faster-rcnn_r50_fpn_4e_mot16-public-half.py +++ b/configs/mot/tracktor/tracktor_faster-rcnn_r50_fpn_4e_mot16-public-half.py @@ -1,12 +1,19 @@ _base_ = ['./tracktor_faster-rcnn_r50_fpn_4e_mot17-private-half.py'] + model = dict( - pretrains=dict( - detector= # noqa: E251 - 'https://download.openmmlab.com/mmtracking/mot/faster_rcnn/faster-rcnn_r50_fpn_4e_mot16-half_20210804_001054-73477869.pth', # noqa: E501 - reid= # noqa: E251 - 'https://download.openmmlab.com/mmtracking/mot/reid/reid_r50_6e_mot16_20210803_204826-1b3e3cfd.pth' # noqa: E501 - ), - reid=dict(head=dict(num_classes=375))) + detector=dict( + init_cfg=dict( + type='Pretrained', + checkpoint= # noqa: E251 + 'https://download.openmmlab.com/mmtracking/mot/faster_rcnn/faster-rcnn_r50_fpn_4e_mot16-half_20210804_001054-73477869.pth' # noqa: E501 + )), + reid=dict( + head=dict(num_classes=375), + init_cfg=dict( + type='Pretrained', + checkpoint= # noqa: E251 + 'https://download.openmmlab.com/mmtracking/mot/reid/reid_r50_6e_mot16_20210803_204826-1b3e3cfd.pth' # noqa: E501 + ))) # data data_root = 'data/MOT16/' img_norm_cfg = dict( diff --git a/configs/mot/tracktor/tracktor_faster-rcnn_r50_fpn_4e_mot17-private-half.py b/configs/mot/tracktor/tracktor_faster-rcnn_r50_fpn_4e_mot17-private-half.py index ff2c78859..acbba69b4 100644 --- a/configs/mot/tracktor/tracktor_faster-rcnn_r50_fpn_4e_mot17-private-half.py +++ b/configs/mot/tracktor/tracktor_faster-rcnn_r50_fpn_4e_mot17-private-half.py @@ -2,19 +2,18 @@ '../../_base_/models/faster_rcnn_r50_fpn.py', '../../_base_/datasets/mot_challenge.py', '../../_base_/default_runtime.py' ] + model = dict( type='Tracktor', - pretrains=dict( - detector= # noqa: E251 - 'https://download.openmmlab.com/mmtracking/mot/faster_rcnn/faster-rcnn_r50_fpn_4e_mot17-half-64ee2ed4.pth', # noqa: E501 - reid= # noqa: E251 - 'https://download.openmmlab.com/mmtracking/mot/reid/reid_r50_6e_mot17-4bf6b63d.pth' # noqa: E501 - ), detector=dict( rpn_head=dict(bbox_coder=dict(clip_border=False)), roi_head=dict( - bbox_head=dict(bbox_coder=dict( - clip_border=False), num_classes=1))), + bbox_head=dict(bbox_coder=dict(clip_border=False), num_classes=1)), + init_cfg=dict( + type='Pretrained', + checkpoint= # noqa: E251 + 'https://download.openmmlab.com/mmtracking/mot/faster_rcnn/faster-rcnn_r50_fpn_4e_mot17-half-64ee2ed4.pth' # noqa: E501 + )), reid=dict( type='BaseReID', backbone=dict( @@ -35,7 +34,12 @@ loss_pairwise=dict( type='TripletLoss', margin=0.3, loss_weight=1.0), norm_cfg=dict(type='BN1d'), - act_cfg=dict(type='ReLU'))), + act_cfg=dict(type='ReLU')), + init_cfg=dict( + type='Pretrained', + checkpoint= # noqa: E251 + 'https://download.openmmlab.com/mmtracking/mot/reid/reid_r50_6e_mot17-4bf6b63d.pth' # noqa: E501 + )), motion=dict( type='CameraMotionCompensation', warp_mode='cv2.MOTION_EUCLIDEAN', diff --git a/configs/mot/tracktor/tracktor_faster-rcnn_r50_fpn_4e_mot17-private.py b/configs/mot/tracktor/tracktor_faster-rcnn_r50_fpn_4e_mot17-private.py index bdae5efad..faf6d6f45 100644 --- a/configs/mot/tracktor/tracktor_faster-rcnn_r50_fpn_4e_mot17-private.py +++ b/configs/mot/tracktor/tracktor_faster-rcnn_r50_fpn_4e_mot17-private.py @@ -1,9 +1,12 @@ _base_ = ['./tracktor_faster-rcnn_r50_fpn_4e_mot17-private-half.py'] + model = dict( - pretrains=dict( - detector= # noqa: E251 - 'https://download.openmmlab.com/mmtracking/mot/faster_rcnn/faster-rcnn_r50_fpn_4e_mot17-ffa52ae7.pth' # noqa: E501 - )) + detector=dict( + init_cfg=dict( + type='Pretrained', + checkpoint= # noqa: E251 + 'https://download.openmmlab.com/mmtracking/mot/faster_rcnn/faster-rcnn_r50_fpn_4e_mot17-ffa52ae7.pth' # noqa: E501 + ))) # data data_root = 'data/MOT17/' test_set = 'train' diff --git a/configs/mot/tracktor/tracktor_faster-rcnn_r50_fpn_4e_mot17-public.py b/configs/mot/tracktor/tracktor_faster-rcnn_r50_fpn_4e_mot17-public.py index 6a023de3e..a0e425c52 100644 --- a/configs/mot/tracktor/tracktor_faster-rcnn_r50_fpn_4e_mot17-public.py +++ b/configs/mot/tracktor/tracktor_faster-rcnn_r50_fpn_4e_mot17-public.py @@ -1,9 +1,12 @@ _base_ = ['./tracktor_faster-rcnn_r50_fpn_4e_mot17-private-half.py'] + model = dict( - pretrains=dict( - detector= # noqa: E251 - 'https://download.openmmlab.com/mmtracking/mot/faster_rcnn/faster-rcnn_r50_fpn_4e_mot17-ffa52ae7.pth' # noqa: E501 - )) + detector=dict( + init_cfg=dict( + type='Pretrained', + checkpoint= # noqa: E251 + 'https://download.openmmlab.com/mmtracking/mot/faster_rcnn/faster-rcnn_r50_fpn_4e_mot17-ffa52ae7.pth' # noqa: E501 + ))) # data data_root = 'data/MOT17/' test_set = 'test' diff --git a/configs/mot/tracktor/tracktor_faster-rcnn_r50_fpn_8e_mot20-private-half.py b/configs/mot/tracktor/tracktor_faster-rcnn_r50_fpn_8e_mot20-private-half.py index e48ca6ae4..61e3a5cb1 100644 --- a/configs/mot/tracktor/tracktor_faster-rcnn_r50_fpn_8e_mot20-private-half.py +++ b/configs/mot/tracktor/tracktor_faster-rcnn_r50_fpn_8e_mot20-private-half.py @@ -1,16 +1,22 @@ _base_ = ['./tracktor_faster-rcnn_r50_fpn_4e_mot17-private-half.py'] + model = dict( - pretrains=dict( - detector= # noqa: E251 - 'https://download.openmmlab.com/mmtracking/mot/faster_rcnn/faster-rcnn_r50_fpn_8e_mot20-half_20210805_001244-2c323fd1.pth', # noqa: E501 - reid= # noqa: E251 - 'https://download.openmmlab.com/mmtracking/mot/reid/reid_r50_6e_mot20_20210803_212426-c83b1c01.pth' # noqa: E501 - ), detector=dict( rpn_head=dict(bbox_coder=dict(clip_border=True)), roi_head=dict( - bbox_head=dict(bbox_coder=dict(clip_border=True), num_classes=1))), - reid=dict(head=dict(num_classes=1705))) + bbox_head=dict(bbox_coder=dict(clip_border=True), num_classes=1)), + init_cfg=dict( + type='Pretrained', + checkpoint= # noqa: E251 + 'https://download.openmmlab.com/mmtracking/mot/faster_rcnn/faster-rcnn_r50_fpn_8e_mot20-half_20210805_001244-2c323fd1.pth' # noqa: E501 + )), + reid=dict( + head=dict(num_classes=1705), + init_cfg=dict( + type='Pretrained', + checkpoint= # noqa: E251 + 'https://download.openmmlab.com/mmtracking/mot/reid/reid_r50_6e_mot20_20210803_212426-c83b1c01.pth' # noqa: E501 + ))) # data data_root = 'data/MOT20/' data = dict( diff --git a/configs/mot/tracktor/tracktor_faster-rcnn_r50_fpn_8e_mot20-public-half.py b/configs/mot/tracktor/tracktor_faster-rcnn_r50_fpn_8e_mot20-public-half.py index 516c4b133..a5b0afc8a 100644 --- a/configs/mot/tracktor/tracktor_faster-rcnn_r50_fpn_8e_mot20-public-half.py +++ b/configs/mot/tracktor/tracktor_faster-rcnn_r50_fpn_8e_mot20-public-half.py @@ -1,16 +1,22 @@ _base_ = ['./tracktor_faster-rcnn_r50_fpn_4e_mot17-private-half.py'] + model = dict( - pretrains=dict( - detector= # noqa: E251 - 'https://download.openmmlab.com/mmtracking/mot/faster_rcnn/faster-rcnn_r50_fpn_8e_mot20-half_20210805_001244-2c323fd1.pth', # noqa: E501 - reid= # noqa: E251 - 'https://download.openmmlab.com/mmtracking/mot/reid/reid_r50_6e_mot20_20210803_212426-c83b1c01.pth' # noqa: E501 - ), detector=dict( rpn_head=dict(bbox_coder=dict(clip_border=True)), roi_head=dict( - bbox_head=dict(bbox_coder=dict(clip_border=True), num_classes=1))), - reid=dict(head=dict(num_classes=1705))) + bbox_head=dict(bbox_coder=dict(clip_border=True), num_classes=1)), + init_cfg=dict( + type='Pretrained', + checkpoint= # noqa: E251 + 'https://download.openmmlab.com/mmtracking/mot/faster_rcnn/faster-rcnn_r50_fpn_8e_mot20-half_20210805_001244-2c323fd1.pth' # noqa: E501 + )), + reid=dict( + head=dict(num_classes=1705), + init_cfg=dict( + type='Pretrained', + checkpoint= # noqa: E251 + 'https://download.openmmlab.com/mmtracking/mot/reid/reid_r50_6e_mot20_20210803_212426-c83b1c01.pth' # noqa: E501 + ))) data_root = 'data/MOT20/' img_norm_cfg = dict( mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) diff --git a/configs/mot/tracktor/tracktor_faster-rcnn_r50_fpn_8e_mot20-public.py b/configs/mot/tracktor/tracktor_faster-rcnn_r50_fpn_8e_mot20-public.py index d11746ca9..14aabcf52 100644 --- a/configs/mot/tracktor/tracktor_faster-rcnn_r50_fpn_8e_mot20-public.py +++ b/configs/mot/tracktor/tracktor_faster-rcnn_r50_fpn_8e_mot20-public.py @@ -1,16 +1,22 @@ _base_ = ['./tracktor_faster-rcnn_r50_fpn_4e_mot17-private-half.py'] + model = dict( - pretrains=dict( - detector= # noqa: E251 - 'https://download.openmmlab.com/mmtracking/mot/faster_rcnn/faster-rcnn_r50_fpn_8e_mot20_20210804_162232-7fde5e8d.pth', # noqa: E501 - reid= # noqa: E251 - 'https://download.openmmlab.com/mmtracking/mot/reid/reid_r50_6e_mot20_20210803_212426-c83b1c01.pth' # noqa: E501 - ), detector=dict( rpn_head=dict(bbox_coder=dict(clip_border=True)), roi_head=dict( - bbox_head=dict(bbox_coder=dict(clip_border=True), num_classes=1))), - reid=dict(head=dict(num_classes=1705))) + bbox_head=dict(bbox_coder=dict(clip_border=True), num_classes=1)), + init_cfg=dict( + type='Pretrained', + checkpoint= # noqa: E251 + 'https://download.openmmlab.com/mmtracking/mot/faster_rcnn/faster-rcnn_r50_fpn_8e_mot20_20210804_162232-7fde5e8d.pth' # noqa: E501 + )), + reid=dict( + head=dict(num_classes=1705), + init_cfg=dict( + type='Pretrained', + checkpoint= # noqa: E251 + 'https://download.openmmlab.com/mmtracking/mot/reid/reid_r50_6e_mot20_20210803_212426-c83b1c01.pth' # noqa: E501 + ))) data_root = 'data/MOT20/' test_set = 'test' data = dict( diff --git a/mmtrack/apis/inference.py b/mmtrack/apis/inference.py index 9f81beff5..627023cf7 100644 --- a/mmtrack/apis/inference.py +++ b/mmtrack/apis/inference.py @@ -32,6 +32,8 @@ def init_model(config, checkpoint=None, device='cuda:0', cfg_options=None): if 'detector' in config.model: config.model.detector.pretrained = None model = build_model(config.model) + # We need call `init_weights()` to load pretained weights in MOT task. + model.init_weights() if checkpoint is not None: map_loc = 'cpu' if device == 'cpu' else None checkpoint = load_checkpoint(model, checkpoint, map_location=map_loc) diff --git a/mmtrack/models/mot/base.py b/mmtrack/models/mot/base.py index c796c83ae..541961be9 100644 --- a/mmtrack/models/mot/base.py +++ b/mmtrack/models/mot/base.py @@ -3,41 +3,20 @@ import torch import torch.distributed as dist -import torch.nn as nn -from mmcv.runner import auto_fp16, load_checkpoint -from mmcv.utils import print_log +from mmcv.runner import BaseModule, auto_fp16 from mmtrack.core import imshow_tracks, restore_result from mmtrack.utils import get_root_logger -class BaseMultiObjectTracker(nn.Module, metaclass=ABCMeta): +class BaseMultiObjectTracker(BaseModule, metaclass=ABCMeta): """Base class for multiple object tracking.""" - def __init__(self): - super(BaseMultiObjectTracker, self).__init__() + def __init__(self, init_cfg): + super(BaseMultiObjectTracker, self).__init__(init_cfg) self.logger = get_root_logger() self.fp16_enabled = False - def init_module(self, module_name, pretrain=None): - """Initialize the weights of a sub-module. - - Args: - module (nn.Module): A sub-module of the model. - pretrained (str, optional): Path to pre-trained weights. - Defaults to None. - """ - module = getattr(self, module_name) - if pretrain is not None: - print_log( - f'load {module_name} from: {pretrain}', logger=self.logger) - checkpoint = load_checkpoint( - module, pretrain, strict=False, logger=self.logger) - if 'meta' in checkpoint and 'CLASSES' in checkpoint['meta']: - module.CLASSES = checkpoint['meta']['CLASSES'] - else: - module.init_weights() - def freeze_module(self, module): """Freeze module during training.""" if isinstance(module, str): diff --git a/mmtrack/models/mot/deep_sort.py b/mmtrack/models/mot/deep_sort.py index 09b1240d5..50dfccf78 100644 --- a/mmtrack/models/mot/deep_sort.py +++ b/mmtrack/models/mot/deep_sort.py @@ -1,3 +1,5 @@ +import warnings + from mmdet.core import bbox2result from mmdet.models import build_detector @@ -18,8 +20,27 @@ def __init__(self, reid=None, tracker=None, motion=None, - pretrains=None): - super().__init__() + pretrains=None, + init_cfg=None): + super().__init__(init_cfg) + if isinstance(pretrains, dict): + warnings.warn('DeprecationWarning: pretrains is deprecated, ' + 'please use "init_cfg" instead') + if detector: + detector_pretrain = pretrains.get('detector', None) + if detector_pretrain: + detector.init_cfg = dict( + type='Pretrained', checkpoint=detector_pretrain) + else: + detector.init_cfg = None + if reid: + reid_pretrain = pretrains.get('reid', None) + if reid_pretrain: + reid.init_cfg = dict( + type='Pretrained', checkpoint=reid_pretrain) + else: + reid.init_cfg = None + if detector is not None: self.detector = build_detector(detector) @@ -32,22 +53,6 @@ def __init__(self, if tracker is not None: self.tracker = build_tracker(tracker) - self.init_weights(pretrains) - - def init_weights(self, pretrain): - """Initialize the weights of the modules. - - Args: - pretrained (dict): Path to pre-trained weights. - """ - if pretrain is None: - pretrain = dict() - assert isinstance(pretrain, dict), '`pretrain` must be a dict.' - if self.with_detector and pretrain.get('detector', False): - self.init_module('detector', pretrain['detector']) - if self.with_reid and pretrain.get('reid', False): - self.init_module('reid', pretrain['reid']) - def forward_train(self, *args, **kwargs): """Forward function during training.""" raise NotImplementedError( diff --git a/mmtrack/models/mot/tracktor.py b/mmtrack/models/mot/tracktor.py index a894b7769..7efffaf89 100644 --- a/mmtrack/models/mot/tracktor.py +++ b/mmtrack/models/mot/tracktor.py @@ -1,3 +1,5 @@ +import warnings + from mmdet.core import bbox2result from mmdet.models import build_detector @@ -19,8 +21,26 @@ def __init__(self, reid=None, tracker=None, motion=None, - pretrains=None): - super().__init__() + pretrains=None, + init_cfg=None): + super().__init__(init_cfg) + if isinstance(pretrains, dict): + warnings.warn('DeprecationWarning: pretrains is deprecated, ' + 'please use "init_cfg" instead') + if detector: + detector_pretrain = pretrains.get('detector', None) + if detector_pretrain: + detector.init_cfg = dict( + type='Pretrained', checkpoint=detector_pretrain) + else: + detector.init_cfg = None + if reid: + reid_pretrain = pretrains.get('reid', None) + if reid_pretrain: + reid.init_cfg = dict( + type='Pretrained', checkpoint=reid_pretrain) + else: + reid.init_cfg = None if detector is not None: self.detector = build_detector(detector) @@ -40,22 +60,6 @@ def __init__(self, if tracker is not None: self.tracker = build_tracker(tracker) - self.init_weights(pretrains) - - def init_weights(self, pretrain): - """Initialize the weights of the modules. - - Args: - pretrained (dict): Path to pre-trained weights. - """ - if pretrain is None: - pretrain = dict() - assert isinstance(pretrain, dict), '`pretrain` must be a dict.' - if self.with_detector and pretrain.get('detector', False): - self.init_module('detector', pretrain['detector']) - if self.with_reid and pretrain.get('reid', False): - self.init_module('reid', pretrain['reid']) - @property def with_cmc(self): """bool: whether the framework has a camera model compensation diff --git a/tools/test.py b/tools/test.py index a447f4046..2dc155a67 100644 --- a/tools/test.py +++ b/tools/test.py @@ -132,6 +132,8 @@ def main(): cfg.model, train_cfg=cfg.train_cfg, test_cfg=cfg.test_cfg) else: model = build_model(cfg.model) + # We need call `init_weights()` to load pretained weights in MOT task. + model.init_weights() fp16_cfg = cfg.get('fp16', None) if fp16_cfg is not None: wrap_fp16_model(model) From b0ac04c5d4a2bb5012b1d7085fa3e96750eb3a70 Mon Sep 17 00:00:00 2001 From: Tao Gong Date: Sun, 8 Aug 2021 22:59:52 +0800 Subject: [PATCH 07/10] tiny change of mmtrack/models/sot/siamrpn.py --- mmtrack/models/sot/siamrpn.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mmtrack/models/sot/siamrpn.py b/mmtrack/models/sot/siamrpn.py index 04c5ee8e1..77531683c 100644 --- a/mmtrack/models/sot/siamrpn.py +++ b/mmtrack/models/sot/siamrpn.py @@ -21,9 +21,9 @@ class SiamRPN(BaseSingleObjectTracker): """ def __init__(self, + backbone, pretrains=None, init_cfg=None, - backbone=None, neck=None, head=None, frozen_modules=None, From 4f84fc6b54b7da10aaf8ab2695567d7420de5ade Mon Sep 17 00:00:00 2001 From: Tao Gong Date: Sun, 8 Aug 2021 23:19:53 +0800 Subject: [PATCH 08/10] unify model initialization for the rest code --- .../models/cascade_mask_rcnn_r50_fpn.py | 5 +++-- configs/_base_/models/cascade_rcnn_r50_fpn.py | 5 +++-- .../_base_/models/faster_rcnn_r50_caffe_c4.py | 6 ++++-- configs/_base_/models/retinanet_r50_fpn.py | 5 +++-- ...rcnn_r50_fpn_fp16_4e_mot17-private-half.py | 19 +++++++++++++------ mmtrack/models/mot/trackers/base_tracker.py | 10 ++++++---- mmtrack/models/mot/trackers/sort_tracker.py | 5 ++++- .../models/mot/trackers/tracktor_tracker.py | 5 ++++- tools/benchmark.py | 2 ++ tools/mot_param_search.py | 2 ++ 10 files changed, 44 insertions(+), 20 deletions(-) diff --git a/configs/_base_/models/cascade_mask_rcnn_r50_fpn.py b/configs/_base_/models/cascade_mask_rcnn_r50_fpn.py index 49ebfb2dc..51c185b49 100644 --- a/configs/_base_/models/cascade_mask_rcnn_r50_fpn.py +++ b/configs/_base_/models/cascade_mask_rcnn_r50_fpn.py @@ -2,7 +2,6 @@ model = dict( detector=dict( type='CascadeRCNN', - pretrained='torchvision://resnet50', backbone=dict( type='ResNet', depth=50, @@ -11,7 +10,9 @@ frozen_stages=1, norm_cfg=dict(type='BN', requires_grad=True), norm_eval=True, - style='pytorch'), + style='pytorch', + init_cfg=dict( + type='Pretrained', checkpoint='torchvision://resnet50')), neck=dict( type='FPN', in_channels=[256, 512, 1024, 2048], diff --git a/configs/_base_/models/cascade_rcnn_r50_fpn.py b/configs/_base_/models/cascade_rcnn_r50_fpn.py index 4dfaf28e4..60c234f1f 100644 --- a/configs/_base_/models/cascade_rcnn_r50_fpn.py +++ b/configs/_base_/models/cascade_rcnn_r50_fpn.py @@ -2,7 +2,6 @@ model = dict( detector=dict( type='CascadeRCNN', - pretrained='torchvision://resnet50', backbone=dict( type='ResNet', depth=50, @@ -11,7 +10,9 @@ frozen_stages=1, norm_cfg=dict(type='BN', requires_grad=True), norm_eval=True, - style='pytorch'), + style='pytorch', + init_cfg=dict( + type='Pretrained', checkpoint='torchvision://resnet50')), neck=dict( type='FPN', in_channels=[256, 512, 1024, 2048], diff --git a/configs/_base_/models/faster_rcnn_r50_caffe_c4.py b/configs/_base_/models/faster_rcnn_r50_caffe_c4.py index 82e19417d..c931541d8 100644 --- a/configs/_base_/models/faster_rcnn_r50_caffe_c4.py +++ b/configs/_base_/models/faster_rcnn_r50_caffe_c4.py @@ -3,7 +3,6 @@ model = dict( detector=dict( type='FasterRCNN', - pretrained='open-mmlab://detectron2/resnet50_caffe', backbone=dict( type='ResNet', depth=50, @@ -14,7 +13,10 @@ frozen_stages=1, norm_cfg=norm_cfg, norm_eval=True, - style='caffe'), + style='caffe', + init_cfg=dict( + type='Pretrained', + checkpoint='open-mmlab://detectron2/resnet50_caffe')), rpn_head=dict( type='RPNHead', in_channels=1024, diff --git a/configs/_base_/models/retinanet_r50_fpn.py b/configs/_base_/models/retinanet_r50_fpn.py index b164c6dde..72fadfe06 100644 --- a/configs/_base_/models/retinanet_r50_fpn.py +++ b/configs/_base_/models/retinanet_r50_fpn.py @@ -2,7 +2,6 @@ model = dict( detector=dict( type='RetinaNet', - pretrained='torchvision://resnet50', backbone=dict( type='ResNet', depth=50, @@ -11,7 +10,9 @@ frozen_stages=1, norm_cfg=dict(type='BN', requires_grad=True), norm_eval=True, - style='pytorch'), + style='pytorch', + init_cfg=dict( + type='Pretrained', checkpoint='torchvision://resnet50')), neck=dict( type='FPN', in_channels=[256, 512, 1024, 2048], diff --git a/configs/fp16/tracktor_faster-rcnn_r50_fpn_fp16_4e_mot17-private-half.py b/configs/fp16/tracktor_faster-rcnn_r50_fpn_fp16_4e_mot17-private-half.py index 32ed90eb4..deea807e0 100644 --- a/configs/fp16/tracktor_faster-rcnn_r50_fpn_fp16_4e_mot17-private-half.py +++ b/configs/fp16/tracktor_faster-rcnn_r50_fpn_fp16_4e_mot17-private-half.py @@ -1,11 +1,18 @@ _base_ = [ '../mot/tracktor/tracktor_faster-rcnn_r50_fpn_4e_mot17-private-half.py' ] + model = dict( - pretrains=dict( - detector= # noqa: E251 - 'https://download.openmmlab.com/mmtracking/fp16/faster-rcnn_r50_fpn_fp16_4e_mot17-half_20210730_002436-f4ba7d61.pth', # noqa: E501 - reid= # noqa: E251 - 'https://download.openmmlab.com/mmtracking/fp16/reid_r50_fp16_8x32_6e_mot17_20210731_033055-4747ee95.pth' # noqa: E501 - )) + detector=dict( + init_cfg=dict( + type='Pretrained', + checkpoint= # noqa: E251 + 'https://download.openmmlab.com/mmtracking/fp16/faster-rcnn_r50_fpn_fp16_4e_mot17-half_20210730_002436-f4ba7d61.pth' # noqa: E501 + )), + reid=dict( + init_cfg=dict( + type='Pretrained', + checkpoint= # noqa: E251 + 'https://download.openmmlab.com/mmtracking/fp16/reid_r50_fp16_8x32_6e_mot17_20210731_033055-4747ee95.pth' # noqa: E501 + ))) fp16 = dict(loss_scale=512.) diff --git a/mmtrack/models/mot/trackers/base_tracker.py b/mmtrack/models/mot/trackers/base_tracker.py index 3eb39c0b5..9277bfe6c 100644 --- a/mmtrack/models/mot/trackers/base_tracker.py +++ b/mmtrack/models/mot/trackers/base_tracker.py @@ -1,15 +1,15 @@ from abc import ABCMeta, abstractmethod import torch -import torch.nn as nn import torch.nn.functional as F from addict import Dict +from mmcv.runner import BaseModule from mmtrack.models import TRACKERS @TRACKERS.register_module() -class BaseTracker(nn.Module, metaclass=ABCMeta): +class BaseTracker(BaseModule, metaclass=ABCMeta): """Base tracker model. Args: @@ -18,10 +18,12 @@ class BaseTracker(nn.Module, metaclass=ABCMeta): indicates the momentum. Default to None. num_frames_retain (int, optional). If a track is disappeared more than `num_frames_retain` frames, it will be deleted in the memo. + init_cfg (dict or list[dict], optional): Initialization config dict. + Defaults to None. """ - def __init__(self, momentums=None, num_frames_retain=10): - super().__init__() + def __init__(self, momentums=None, num_frames_retain=10, init_cfg=None): + super().__init__(init_cfg) if momentums is not None: assert isinstance(momentums, dict), 'momentums must be a dict' self.momentums = momentums diff --git a/mmtrack/models/mot/trackers/sort_tracker.py b/mmtrack/models/mot/trackers/sort_tracker.py index ca6153d27..d788b786c 100644 --- a/mmtrack/models/mot/trackers/sort_tracker.py +++ b/mmtrack/models/mot/trackers/sort_tracker.py @@ -30,6 +30,8 @@ class SortTracker(BaseTracker): Defaults to 0.7. num_tentatives (int, optional): Number of continuous frames to confirm a track. Defaults to 3. + init_cfg (dict or list[dict], optional): Initialization config dict. + Defaults to None. """ def __init__(self, @@ -41,8 +43,9 @@ def __init__(self, match_score_thr=2.0), match_iou_thr=0.7, num_tentatives=3, + init_cfg=None, **kwargs): - super().__init__(**kwargs) + super().__init__(init_cfg=init_cfg, **kwargs) self.obj_score_thr = obj_score_thr self.reid = reid self.match_iou_thr = match_iou_thr diff --git a/mmtrack/models/mot/trackers/tracktor_tracker.py b/mmtrack/models/mot/trackers/tracktor_tracker.py index 01a55f2f2..5d4d071a4 100644 --- a/mmtrack/models/mot/trackers/tracktor_tracker.py +++ b/mmtrack/models/mot/trackers/tracktor_tracker.py @@ -35,6 +35,8 @@ class TracktorTracker(BaseTracker): matching process. Default to 2.0. - match_iou_thr (float, optional): Minimum IoU when matching objects with embedding similarity. Default to 0.2. + init_cfg (dict or list[dict], optional): Initialization config dict. + Defaults to None. """ def __init__(self, @@ -49,8 +51,9 @@ def __init__(self, img_norm_cfg=None, match_score_thr=2.0, match_iou_thr=0.2), + init_cfg=None, **kwargs): - super().__init__(**kwargs) + super().__init__(init_cfg=init_cfg, **kwargs) self.obj_score_thr = obj_score_thr self.regression = regression self.reid = reid diff --git a/tools/benchmark.py b/tools/benchmark.py index 9c8116173..9c15eb405 100644 --- a/tools/benchmark.py +++ b/tools/benchmark.py @@ -57,6 +57,8 @@ def main(): # build the model and load checkpoint model = build_model(cfg.model) + # We need call `init_weights()` to load pretained weights in MOT task. + model.init_weights() fp16_cfg = cfg.get('fp16', None) if fp16_cfg is not None: wrap_fp16_model(model) diff --git a/tools/mot_param_search.py b/tools/mot_param_search.py index 6960ee9e3..2b686ac2c 100644 --- a/tools/mot_param_search.py +++ b/tools/mot_param_search.py @@ -156,6 +156,8 @@ def main(): cfg.model, train_cfg=cfg.train_cfg, test_cfg=cfg.test_cfg) else: model = build_model(cfg.model) + # We need call `init_weights()` to load pretained weights in MOT task. + model.init_weights() fp16_cfg = cfg.get('fp16', None) if fp16_cfg is not None: wrap_fp16_model(model) From 893655021355c9135d2ea3ab51119ff018254466 Mon Sep 17 00:00:00 2001 From: Tao Gong Date: Sun, 8 Aug 2021 23:35:25 +0800 Subject: [PATCH 09/10] fix a typo in tools/slurm_search.sh and small change of mmtrack/models/sot/siamrpn.py --- mmtrack/models/sot/siamrpn.py | 4 ++-- tools/slurm_search.sh | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/mmtrack/models/sot/siamrpn.py b/mmtrack/models/sot/siamrpn.py index 77531683c..9bf06a34d 100644 --- a/mmtrack/models/sot/siamrpn.py +++ b/mmtrack/models/sot/siamrpn.py @@ -22,10 +22,10 @@ class SiamRPN(BaseSingleObjectTracker): def __init__(self, backbone, - pretrains=None, - init_cfg=None, neck=None, head=None, + pretrains=None, + init_cfg=None, frozen_modules=None, train_cfg=None, test_cfg=None): diff --git a/tools/slurm_search.sh b/tools/slurm_search.sh index 22cad51b7..879039eb2 100755 --- a/tools/slurm_search.sh +++ b/tools/slurm_search.sh @@ -20,4 +20,4 @@ srun -p ${PARTITION} \ --cpus-per-task=${CPUS_PER_TASK} \ --kill-on-bad-exit=1 \ ${SRUN_ARGS} \ - python -u tools/search.py ${CONFIG} --launcher="slurm" ${PY_ARGS} + python -u tools/mot_param_search.py ${CONFIG} --launcher="slurm" ${PY_ARGS} From 4d8920099af07f8cd9242c83dbfbc72f73595c84 Mon Sep 17 00:00:00 2001 From: Tao Gong Date: Mon, 9 Aug 2021 16:24:43 +0800 Subject: [PATCH 10/10] fix a bug of mot_reid init --- mmtrack/models/reid/fc_module.py | 4 ++-- mmtrack/models/reid/linear_reid_head.py | 10 +++------- 2 files changed, 5 insertions(+), 9 deletions(-) diff --git a/mmtrack/models/reid/fc_module.py b/mmtrack/models/reid/fc_module.py index f30f97bd0..863e0eae3 100644 --- a/mmtrack/models/reid/fc_module.py +++ b/mmtrack/models/reid/fc_module.py @@ -15,7 +15,7 @@ class FcModule(BaseModule): Defaults to dict(type='ReLU'). inplace (bool, optional): Whether inplace the activatation module. init_cfg (dict or list[dict], optional): Initialization config dict. - Defaults to dict(type='Kaiming', layer='fc'). + Defaults to dict(type='Kaiming', layer='Linear'). """ def __init__(self, @@ -24,7 +24,7 @@ def __init__(self, norm_cfg=None, act_cfg=dict(type='ReLU'), inplace=True, - init_cfg=dict(type='Kaiming', layer='fc')): + init_cfg=dict(type='Kaiming', layer='Linear')): super(FcModule, self).__init__(init_cfg) assert norm_cfg is None or isinstance(norm_cfg, dict) assert act_cfg is None or isinstance(act_cfg, dict) diff --git a/mmtrack/models/reid/linear_reid_head.py b/mmtrack/models/reid/linear_reid_head.py index 61d4458fc..446d1d062 100644 --- a/mmtrack/models/reid/linear_reid_head.py +++ b/mmtrack/models/reid/linear_reid_head.py @@ -30,8 +30,8 @@ class LinearReIDHead(BaseHead): re-identificaiton module. topk (int, optional): Calculate topk accuracy. Default to False. init_cfg (dict or list[dict], optional): Initialization config dict. - Defaults to dict(type='Normal',layer=['fc_out', 'classifier'], - mean=0, std=0.01, bias=0). + Defaults to dict(type='Normal',layer='Linear', mean=0, std=0.01, + bias=0). """ def __init__(self, @@ -46,11 +46,7 @@ def __init__(self, loss_pairwise=None, topk=(1, ), init_cfg=dict( - type='Normal', - layer=['fc_out', 'classifier'], - mean=0, - std=0.01, - bias=0)): + type='Normal', layer='Linear', mean=0, std=0.01, bias=0)): super(LinearReIDHead, self).__init__(init_cfg) assert isinstance(topk, (int, tuple)) if isinstance(topk, int):