[Feature]: MAE official (#221)

* [Feature]: MAE single image pre-training * [Fix]: Fix config * [Fix]: Fix dataset link * [Feature]: Add run * [Refactor]: Delete spot * [Feature]: ignore nohup output file * [Feature]: Add auto script to generate run cmd * [Refactor]: Refactor mae config file * [Feature]: sz20 settings * [Feature]: Add auto resume * [Fix]: Fix lint * [Feature]: Make git ignore txt * [Refactor]: Delete gpus in script * [Fix]: Make generate_cmd to add --async * [Feature]: Initial version of Vit fine-tune * [Fix]: Add 1424 specific settings * [Fix]: Fix missing file client bug for 1424 * [Feature]: 1424 customized settings * [Fix]: Make drop in eval to False * [Feature]: Change the finetune and pre-training settings * [Feature]: Add debug setting * [Refactor]: Refactor the model * [Feature]: Customized settings * [Feature]: Add A100 settings * [Fix]: Change mae to imagenet * [Feature]: Change mae pretrain num workers to 32 * [Feature]: Change num workers to 16 * [Feature]: Add A100 setting for pre_release ft version * [Feature]: Add img_norm_cfg * [Fix]: Fix mae cls test missing logits bug * [Fix]: Fix mae cls head bias initialize to zero * [Feature]: Rename mae config name * [Feature]: Add MAE README.md * [Fix]: Fix lint * [Feature]: Fix typo * [Fix]: Fix typo * [Feature]: Fix invalid link * [Fix]: Fix finetune config file name * [Feature]: Official pretrain v1 * [Feature]: Change log interval to 100 * [Feature]: pretrain 1600 epochs * [Fix]: Change encoder num head to 12 * [Feature]: Mix precision * [Feature]: Add default value to random masking * [Feature]: Official MAE finetune * [Feature]: Finetune img per gpu 32 * [Feature]: Add multi machine training for lincls * [Fix]: Fix lincls master port master addr * [Feature]: Change img per gpu to 128 * [Feature]: Add linear eval and Refactor * [Fix]: Fix debug mode * [Fix]: Delete MAE dataset in __init__.py * [Feature]: normalize pixel for mae * [Fix]: Fix lint * [Feature]: LARS for linear eval * [Feature]: Add lars for mae linear eval * [Feature]: Change mae linear lars num workers to 32 * [Feature]: Change mae linear lars num workers to 8 * [Feature]: log every 25 iter for mae linear eval lars * [Feature]: Add 1600 epoch and 800 epoch pretraining * [Fix]: Change linear eval to 902 * [Fix]: Add random flip to linear eval * [Fix]: delete fp16 in mae * [Refactor]: Change backbone to mmcls * [Fix]: Align finetune settings * [Fix]: replace timm trunc_normal with mmcv trunc_normal * [Fix]: Change finetune layer_decay to 0.65 * [Fix]: Delete pretrain last norm when global_pooling * [Fix]: set requires_grad of norm1 to False * [Fix]: delete norm1 * [Fix]: Fix docstring bug * [Fix]: Fix lint * [Fix]: Add external link * [Fix]: Delete auto_resume and reformat config readme. * [Fix]: Fix pytest bug * [Fix]: Fix lint * [Refactor]: Rename filename * [Feature]: Add docstring * [Fix]: Rename config file name * [Fix]: Fix name inconsistency bug * [Fix]: Change the default value of persistent_worker in builder to True * [Fix]: Change the default value of CPUS_PER_TASK to 5 * [Fix]: Add a blank line to line136 in tools/train.py * [Fix]: Fix MAE algorithm docstring format and add paper name and url * [Feature]: Add MAE paper name and link, and store mae teaser on github * [Refactor]: Delete mae.png * [Fix]: Fix config file name” * [Fix]: Fix name bug * [Refactor]: Change default GPUS to 8 * [Fix]: Abandon change to drop_last * [Fix]: Fix docstring in mae algorithm * [Fix]: Fix lint * [Fix]: Fix lint * [Fix]: Fix mae finetune algo type bug * [Feature]: Add unit test for algorithm * [Feature]: Add unit test for remaining parts * [Fix]: Fix lint * [Fix]: Fix typo * [Fix]: Delete some unnecessary modification in gitignore * [Feature]: Change finetune setting in mae algo to mixup setting * [Fix]: Change norm_pix_loss to norm_pix in pretrain head * [Fix]: Delete modification in dist_train_linear.sh * [Refactor]: Delete global pool in mae_cls_vit.py * [Fix]: Change finetune param to mixup in test_mae_classification * [Fix]: Change norm_pix_loss to norm_pix of mae_pretrain_head in unit test * [Fix]: Change norm_pix_loss to norm_pix in unit test * [Refactor]: Create init_weights for mae_finetune_head and mae_linprobe_head * [Refactor]: Construct 2d sin-cosine position embedding using torch * [Refactor]: Using classification and using mixup from mmcls * [Fix]: Fix lint * [Fix]: Add False to finetune mae linprobe‘ “ * [Fix]: Set drop_last to False * [Fix]: Fix MAE finetune layerwise lr bug * [Refactor]: Delete redundant MAE when registering MAE * [Refactor]: Split initialize_weights in MAE to submodules * [Fix]: Change the min_lr of mae pretrain to 0.0 * [Refactor]: Delete unused _init_weights in mae_cls_vit * [Refactor]: Change MAE cls vit to a more general name * [Feature]: Add Epoch Fix cosine annealing lr updater * [Fix]: Fix lint * [Feature]: Add layer wise lr decay in optimizer constructor * [Fix]: Fix lint * [Fix]: Fix set layer wise lr decay bug * [Fix]: Fix UT for MAE * [Fix]: Fix lint * [Fix]: update algorithm readme format for MAE * [Fix]: Fix isort * [Fix]: Add Returns inmae_pretrain_vit * [Fix]: Change bgr to rgb * [Fix]: Change norm pix to True * [Fix]: Use cls_token to linear prob * [Fix]: Delete mixup.py * [Fix]: Fix MAE readme * [Feature]: Delete linprobe * [Refactor]: Merge MAE head into one file * [Fix]: Fix lint * [Fix]: rename mae_pretrain_head to mae_head * [Fix]: Fix import error in __init__.py * [Feature]: skip MAE algo UT when running on windows * [Fix]: Fix UT bug * [Feature]: Update model_zoo * [Fix]: Rename MAE pretrain model name * [Fix]: Delete mae ft prefix * [Feature]: Change b to base * [Refactor]: Change b in MAE pt config to base * [Fix]: Fix typo in docstring * [Fix]: Fix name bug * [Feature]: Add new constructor for MAE finetune * [Fix]: Fix model_zoo link * [Fix]: Skip UT for MAE * [Fix]: Change fixed channel order to param Co-authored-by: LIU Yuan <liuyuuan@pjlab.org.cn> Co-authored-by: liu yuan <liuyuan@pjlab.org.cn>
open-mmlab · Mar 3, 2022 · adc980c · adc980c
1 parent dea4647
commit adc980c
Show file tree

Hide file tree

Showing 42 changed files with 1,226 additions and 18 deletions.
diff --git a/configs/benchmarks/classification/_base_/models/vit-base-p16_ft.py b/configs/benchmarks/classification/_base_/models/vit-base-p16_ft.py
@@ -0,0 +1,17 @@
+model = dict(
+    type='Classification',
+    backbone=dict(
+        type='MIMVisionTransformer',
+        arch='b',
+        patch_size=16,
+        drop_path_rate=0.1,
+        final_norm=False),
+    head=dict(
+        type='MAEFinetuneHead',
+        num_classes=1000,
+        embed_dim=768,
+        label_smooth_val=0.1),
+    train_cfg=dict(augments=[
+        dict(type='BatchMixup', alpha=0.8, num_classes=1000, prob=0.5),
+        dict(type='BatchCutMix', alpha=1.0, num_classes=1000, prob=0.5)
+    ]))
diff --git a/configs/benchmarks/classification/_base_/models/vit-base-p16_linprobe.py b/configs/benchmarks/classification/_base_/models/vit-base-p16_linprobe.py
@@ -0,0 +1,9 @@
+model = dict(
+    type='Classification',
+    backbone=dict(
+        type='MIMVisionTransformer',
+        arch='b',
+        patch_size=16,
+        final_norm=True,
+        finetune=False),
+    head=dict(type='MAELinprobeHead', num_classes=1000, embed_dim=768))
diff --git a/configs/benchmarks/classification/_base_/schedules/adamw_coslr-100e_in1k.py b/configs/benchmarks/classification/_base_/schedules/adamw_coslr-100e_in1k.py
@@ -0,0 +1,14 @@
+# optimizer
+optimizer = dict(type='AdamW', lr=1e-3, betas=(0.9, 0.999), weight_decay=0.05)
+
+# learning policy
+lr_config = dict(
+    policy='CosineAnnealing',
+    min_lr=0.,
+    warmup='linear',
+    warmup_iters=5,
+    warmup_ratio=1e-4,  # cannot be 0
+    warmup_by_epoch=True)
+
+# runtime settings
+runner = dict(type='EpochBasedRunner', max_epochs=100)
diff --git a/configs/benchmarks/classification/imagenet/vit-b-p16_ft-8xb128-coslr-100e_in1k.py b/configs/benchmarks/classification/imagenet/vit-b-p16_ft-8xb128-coslr-100e_in1k.py
@@ -0,0 +1,67 @@
+_base_ = [
+    '../_base_/models/vit-base-p16_ft.py',
+    '../_base_/datasets/imagenet.py',
+    '../_base_/schedules/adamw_coslr-100e_in1k.py',
+    '../_base_/default_runtime.py',
+]
+
+# dataset
+img_norm_cfg = dict(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
+train_pipeline = [
+    dict(
+        type='RandomAug',
+        input_size=224,
+        color_jitter=None,
+        auto_augment='rand-m9-mstd0.5-inc1',
+        interpolation='bicubic',
+        re_prob=0.25,
+        re_mode='pixel',
+        re_count=1,
+        mean=(0.485, 0.456, 0.406),
+        std=(0.229, 0.224, 0.225))
+]
+test_pipeline = [
+    dict(type='Resize', size=256, interpolation=3),
+    dict(type='CenterCrop', size=224),
+    dict(type='ToTensor'),
+    dict(type='Normalize', **img_norm_cfg)
+]
+data = dict(
+    samples_per_gpu=128,
+    drop_last=False,
+    workers_per_gpu=32,
+    train=dict(pipeline=train_pipeline),
+    val=dict(pipeline=test_pipeline))
+
+# model
+model = dict(backbone=dict(init_cfg=dict()))
+
+# optimizer
+optimizer = dict(
+    lr=1e-3 * 1024 / 256,
+    paramwise_options={
+        'norm': dict(weight_decay=0.),
+        'bias': dict(weight_decay=0.),
+        'pos_embed': dict(weight_decay=0.),
+        'cls_token': dict(weight_decay=0.)
+    },
+    constructor='MAEFtOptimizerConstructor',
+    layer_decay=0.65)
+
+# learning policy
+lr_config = dict(
+    policy='StepFixCosineAnnealing',
+    min_lr=1e-6,
+    warmup='linear',
+    warmup_iters=5,
+    warmup_ratio=1e-4,
+    warmup_by_epoch=True,
+    by_epoch=False)
+
+# runtime
+checkpoint_config = dict(interval=1, max_keep_ckpts=3, out_dir='')
+persistent_workers = True
+log_config = dict(
+    interval=100, hooks=[
+        dict(type='TextLoggerHook'),
+    ])
diff --git a/configs/selfsup/_base_/datasets/imagenet_mae.py b/configs/selfsup/_base_/datasets/imagenet_mae.py
@@ -0,0 +1,30 @@
+# dataset settings
+data_source = 'ImageNet'
+dataset_type = 'SingleViewDataset'
+img_norm_cfg = dict(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
+train_pipeline = [
+    dict(
+        type='RandomResizedCrop', size=224, scale=(0.2, 1.0), interpolation=3),
+    dict(type='RandomHorizontalFlip')
+]
+
+# prefetch
+prefetch = False
+if not prefetch:
+    train_pipeline.extend(
+        [dict(type='ToTensor'),
+         dict(type='Normalize', **img_norm_cfg)])
+
+# dataset summary
+data = dict(
+    imgs_per_gpu=128,
+    workers_per_gpu=8,
+    train=dict(
+        type=dataset_type,
+        data_source=dict(
+            type=data_source,
+            data_prefix='data/imagenet/train',
+            ann_file='data/imagenet/meta/train.txt',
+        ),
+        pipeline=train_pipeline,
+        prefetch=prefetch))
diff --git a/configs/selfsup/_base_/models/mae_vit-base-p16.py b/configs/selfsup/_base_/models/mae_vit-base-p16.py
@@ -0,0 +1,15 @@
+# model settings
+model = dict(
+    type='MAE',
+    backbone=dict(type='MAEViT', arch='b', patch_size=16, mask_ratio=0.75),
+    neck=dict(
+        type='MAEPretrainDecoder',
+        patch_size=16,
+        in_chans=3,
+        embed_dim=768,
+        decoder_embed_dim=512,
+        decoder_depth=8,
+        decoder_num_heads=16,
+        mlp_ratio=4.,
+    ),
+    head=dict(type='MAEPretrainHead', norm_pix=True, patch_size=16))
diff --git a/configs/selfsup/_base_/schedules/adamw_coslr-200e_in1k.py b/configs/selfsup/_base_/schedules/adamw_coslr-200e_in1k.py
@@ -0,0 +1,15 @@
+# optimizer
+optimizer = dict(type='AdamW', lr=1.5e-4, betas=(0.9, 0.95), weight_decay=0.05)
+optimizer_config = dict()  # grad_clip, coalesce, bucket_size_mb
+
+# learning policy
+lr_config = dict(
+    policy='CosineAnnealing',
+    min_lr=0.,
+    warmup='linear',
+    warmup_iters=40,
+    warmup_ratio=1e-4,  # cannot be 0
+    warmup_by_epoch=True)
+
+# runtime settings
+runner = dict(type='EpochBasedRunner', max_epochs=300)
diff --git a/configs/selfsup/mae/README.md b/configs/selfsup/mae/README.md
@@ -0,0 +1,54 @@
+# MAE
+
+> [Masked Autoencoders Are Scalable Vision Learners](https://arxiv.org/abs/2111.06377)
+
+<!-- [ALGORITHM] -->
+
+## Abstract
+
+This paper shows that masked autoencoders (MAE) are
+scalable self-supervised learners for computer vision. Our
+MAE approach is simple: we mask random patches of the
+input image and reconstruct the missing pixels. It is based
+on two core designs. First, we develop an asymmetric
+encoder-decoder architecture, with an encoder that operates only on the
+visible subset of patches (without mask tokens), along with a lightweight
+decoder that reconstructs the original image from the latent representation
+and mask tokens. Second, we find that masking a high proportion
+of the input image, e.g., 75%, yields a nontrivial and
+meaningful self-supervisory task. Coupling these two designs enables us to
+train large models efficiently and effectively: we accelerate
+training (by 3× or more) and improve accuracy. Our scalable approach allows
+for learning high-capacity models that generalize well: e.g., a vanilla
+ViT-Huge model achieves the best accuracy (87.8%) among
+methods that use only ImageNet-1K data. Transfer performance in downstream tasks outperforms supervised pretraining and shows promising scaling behavior.
+
+<div align="center">
+<img src="https://user-images.githubusercontent.com/30762564/150733959-2959852a-c7bd-4d3f-911f-3e8d8839fe67.png" width="40%"/>
+</div>
+
+
+## Models and Benchmarks
+
+Here, we report the results of the model, which is pre-trained on ImageNet1K
+for 400 epochs, the details are below:
+
+
+
+| Backbone | Pre-train epoch | Fine-tuning Top-1 |                  Pre-train Config                   |                                    Fine-tuning Config                                     |                                                                                                                        Download                                                                                                                         |
+| :------: | :-------------: | :---------------: | :-------------------------------------------------: | :---------------------------------------------------------------------------------------: | :-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: |
+| ViT-B/16 |       400       |       83.1        | [config](./mae_vit-b-p16_8xb512-coslr-400e_in1k.py) | [config](../../benchmarks/classification/imagenet/vit-b-p16_ft-8xb128-coslr-100e_in1k.py) | [model](https://download.openmmlab.com/mmselfsup/mae/mae_vit-base-p16_8xb512-coslr-400e_in1k-224_20220223-85be947b.pth) &#124; [log](https://download.openmmlab.com/mmselfsup/mae/mae_vit-base-p16_8xb512-coslr-300e_in1k-224_20220210_140925.log.json) |
+
+
+## Citation
+
+```bibtex
+@article{He2021MaskedAA,
+  title={Masked Autoencoders Are Scalable Vision Learners},
+  author={Kaiming He and Xinlei Chen and Saining Xie and Yanghao Li and
+  Piotr Doll'ar and Ross B. Girshick},
+  journal={ArXiv},
+  year={2021},
+  volume={abs/2111.06377}
+}
+```
diff --git a/configs/selfsup/mae/mae_vit-base-p16_8xb512-coslr-1600e_in1k.py b/configs/selfsup/mae/mae_vit-base-p16_8xb512-coslr-1600e_in1k.py
@@ -0,0 +1,4 @@
+_base_ = 'mae_vit-base-16_8xb512-coslr-400e_in1k.py'
+
+# schedule
+runner = dict(max_epochs=1600)
diff --git a/configs/selfsup/mae/mae_vit-base-p16_8xb512-coslr-400e_in1k.py b/configs/selfsup/mae/mae_vit-base-p16_8xb512-coslr-400e_in1k.py
@@ -0,0 +1,42 @@
+_base_ = [
+    '../_base_/models/mae_vit-base-p16.py',
+    '../_base_/datasets/imagenet_mae.py',
+    '../_base_/schedules/adamw_coslr-200e_in1k.py',
+    '../_base_/default_runtime.py',
+]
+
+# dataset
+data = dict(samples_per_gpu=512, workers_per_gpu=32)
+
+# optimizer
+optimizer = dict(
+    lr=1.5e-4 * 4096 / 256,
+    paramwise_options={
+        'norm': dict(weight_decay=0.),
+        'bias': dict(weight_decay=0.),
+        'pos_embed': dict(weight_decay=0.),
+        'mask_token': dict(weight_decay=0.),
+        'cls_token': dict(weight_decay=0.)
+    })
+optimizer_config = dict()
+
+# learning policy
+lr_config = dict(
+    policy='StepFixCosineAnnealing',
+    min_lr=0.0,
+    warmup='linear',
+    warmup_iters=40,
+    warmup_ratio=1e-4,
+    warmup_by_epoch=True,
+    by_epoch=False)
+
+# schedule
+runner = dict(max_epochs=400)
+
+# runtime
+checkpoint_config = dict(interval=1, max_keep_ckpts=3, out_dir='')
+persistent_workers = True
+log_config = dict(
+    interval=100, hooks=[
+        dict(type='TextLoggerHook'),
+    ])
diff --git a/configs/selfsup/mae/mae_vit-base-p16_8xb512-coslr-800e_in1k.py b/configs/selfsup/mae/mae_vit-base-p16_8xb512-coslr-800e_in1k.py
@@ -0,0 +1,4 @@
+_base_ = 'mae_vit-base-16_8xb512-coslr-400e_in1k.py'
+
+# schedule
+runner = dict(max_epochs=800)
diff --git a/docs/en/model_zoo.md b/docs/en/model_zoo.md
@@ -20,6 +20,7 @@ All models and part of benchmark results are recorded below.
 |                                                                                                                    | [simsiam_resnet50_8xb32-coslr-200e_in1k](https://github.com/open-mmlab/mmselfsup/blob/master/configs/selfsup/simsiam/simsiam_resnet50_8xb32-coslr-200e_in1k.py)                              | [model](https://download.openmmlab.com/mmselfsup/simsiam/simsiam_resnet50_8xb32-coslr-200e_in1k_20220225-2f488143.pth) &#124; [log](https://download.openmmlab.com/mmselfsup/simsiam/simsiam_resnet50_8xb32-coslr-200e_in1k_20220210_195402.log.json)                         |
 | [SwAV](https://github.com/open-mmlab/mmselfsup/blob/master/configs/selfsup/swav/README.md)                         | [swav_resnet50_8xb32-mcrop-2-6-coslr-200e_in1k-224-96](https://github.com/open-mmlab/mmselfsup/blob/master/configs/selfsup/swav/swav_resnet50_8xb32-mcrop-2-6-coslr-200e_in1k-224-96.py)     | [model](https://download.openmmlab.com/mmselfsup/swav/swav_resnet50_8xb32-mcrop-2-6-coslr-200e_in1k-224-96_20220225-0497dd5d.pth) &#124; [log](https://download.openmmlab.com/mmselfsup/swav/swav_resnet50_8xb32-mcrop-2-6-coslr-200e_in1k-224-96_20220211_061131.log.json)   |
 | [MoCo v3](https://github.com/open-mmlab/mmselfsup/blob/master/configs/selfsup/mocov3/README.md)                    | [mocov3_vit-small-p16_32xb128-fp16-coslr-300e_in1k-224](https://github.com/open-mmlab/mmselfsup/blob/master/configs/selfsup/mocov3/mocov3_vit-small-p16_32xb128-fp16-coslr-300e_in1k-224.py) | [model](https://download.openmmlab.com/mmselfsup/moco/mocov3_vit-small-p16_32xb128-fp16-coslr-300e_in1k-224_20220225-e31238dd.pth) &#124; [log](https://download.openmmlab.com/mmselfsup/moco/mocov3_vit-small-p16_32xb128-fp16-coslr-300e_in1k-224_20220222_160222.log.json) |
+| [MAE](https://github.com/open-mmlab/mmselfsup/blob/master/configs/selfsup/mae/README.md)                           | [mae_vit-base-p16_8xb512-coslr-400e_in1k](https://github.com/open-mmlab/mmselfsup/blob/master/configs/selfsup/mae/mae_vit-base-p16_8xb512-coslr-400e_in1k.py)                                | [model](https://download.openmmlab.com/mmselfsup/mae/mae_vit-base-p16_8xb512-coslr-400e_in1k-224_20220223-85be947b.pth) &#124; [log](https://download.openmmlab.com/mmselfsup/mae/mae_vit-base-p16_8xb512-coslr-300e_in1k-224_20220210_140925.log.json)                       |
 
 Remarks:
 
@@ -52,6 +53,12 @@ If not specified, we use linear evaluation setting from [MoCo](http://openaccess
 | SwAV                | [swav_resnet50_8xb32-mcrop-2-6-coslr-200e_in1k-224-96](https://github.com/open-mmlab/mmselfsup/blob/master/configs/selfsup/swav/swav_resnet50_8xb32-mcrop-2-6-coslr-200e_in1k-224-96.py)     | SwAV paper setting    | 70.47     |
 | MoCo v3             | [mocov3_vit-small-p16_32xb128-fp16-coslr-300e_in1k-224](https://github.com/open-mmlab/mmselfsup/blob/master/configs/selfsup/mocov3/mocov3_vit-small-p16_32xb128-fp16-coslr-300e_in1k-224.py) | MoCo v3 paper setting | 73.19     |
 
+
+### ImageNet Fine-tuning
+| Algorithm | Config                                                                                                                                                        | Remarks | Top-1 (%) |
+| --------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------- | ------- | --------- |
+| MAE       | [mae_vit-base-p16_8xb512-coslr-400e_in1k](https://github.com/open-mmlab/mmselfsup/blob/master/configs/selfsup/mae/mae_vit-base-p16_8xb512-coslr-400e_in1k.py) |         | 83.1      |
+
 ### COCO17 Object Detection
 
 In COCO17 Object detection task, we choose the evluation protocol from [MoCo](http://openaccess.thecvf.com/content_CVPR_2020/papers/He_Momentum_Contrast_for_Unsupervised_Visual_Representation_Learning_CVPR_2020_paper.pdf), with Mask-RCNN architecture, the results below are trained with the same [config](https://github.com/open-mmlab/mmselfsup/blob/master/configs/benchmarks/mmdetection/coco/mask_rcnn_r50_fpn_mstrain_1x_coco.py).

diff --git a/docs/zh_cn/model_zoo.md b/docs/zh_cn/model_zoo.md
@@ -20,6 +20,7 @@
 |                                                                                                                    | [simsiam_resnet50_8xb32-coslr-200e_in1k](https://github.com/open-mmlab/mmselfsup/blob/master/configs/selfsup/simsiam/simsiam_resnet50_8xb32-coslr-200e_in1k.py)                              | [model](https://download.openmmlab.com/mmselfsup/simsiam/simsiam_resnet50_8xb32-coslr-200e_in1k_20220225-2f488143.pth) &#124; [log](https://download.openmmlab.com/mmselfsup/simsiam/simsiam_resnet50_8xb32-coslr-200e_in1k_20220210_195402.log.json)                         |
 | [SwAV](https://github.com/open-mmlab/mmselfsup/blob/master/configs/selfsup/swav/README.md)                         | [swav_resnet50_8xb32-mcrop-2-6-coslr-200e_in1k-224-96](https://github.com/open-mmlab/mmselfsup/blob/master/configs/selfsup/swav/swav_resnet50_8xb32-mcrop-2-6-coslr-200e_in1k-224-96.py)     | [model](https://download.openmmlab.com/mmselfsup/swav/swav_resnet50_8xb32-mcrop-2-6-coslr-200e_in1k-224-96_20220225-0497dd5d.pth) &#124; [log](https://download.openmmlab.com/mmselfsup/swav/swav_resnet50_8xb32-mcrop-2-6-coslr-200e_in1k-224-96_20220211_061131.log.json)   |
 | [MoCo v3](https://github.com/open-mmlab/mmselfsup/blob/master/configs/selfsup/mocov3/README.md)                    | [mocov3_vit-small-p16_32xb128-fp16-coslr-300e_in1k-224](https://github.com/open-mmlab/mmselfsup/blob/master/configs/selfsup/mocov3/mocov3_vit-small-p16_32xb128-fp16-coslr-300e_in1k-224.py) | [model](https://download.openmmlab.com/mmselfsup/moco/mocov3_vit-small-p16_32xb128-fp16-coslr-300e_in1k-224_20220225-e31238dd.pth) &#124; [log](https://download.openmmlab.com/mmselfsup/moco/mocov3_vit-small-p16_32xb128-fp16-coslr-300e_in1k-224_20220222_160222.log.json) |
+| [MAE](https://github.com/open-mmlab/mmselfsup/blob/master/configs/selfsup/mae/README.md)                           | [mae_vit-base-p16_8xb512-coslr-400e_in1k](https://github.com/open-mmlab/mmselfsup/blob/master/configs/selfsup/mae/mae_vit-base-p16_8xb512-coslr-400e_in1k.py)                                | [model](https://download.openmmlab.com/mmselfsup/mae/mae_vit-base-p16_8xb512-coslr-400e_in1k-224_20220223-85be947b.pth) &#124; [log](https://download.openmmlab.com/mmselfsup/mae/mae_vit-base-p16_8xb512-coslr-300e_in1k-224_20220210_140925.log.json)                       |
 
 备注：
 
@@ -52,6 +53,11 @@
 | SwAV                | [swav_resnet50_8xb32-mcrop-2-6-coslr-200e_in1k-224-96](https://github.com/open-mmlab/mmselfsup/blob/master/configs/selfsup/swav/swav_resnet50_8xb32-mcrop-2-6-coslr-200e_in1k-224-96.py)     | SwAV 论文设置    | 70.47     |
 | MoCo v3             | [mocov3_vit-small-p16_32xb128-fp16-coslr-300e_in1k-224](https://github.com/open-mmlab/mmselfsup/blob/master/configs/selfsup/mocov3/mocov3_vit-small-p16_32xb128-fp16-coslr-300e_in1k-224.py) | MoCo v3 论文设置 | 73.19     |
 
+
+### ImageNet 微调
+| 算法 | 配置文件                                                                                                                                                      | 备注 | Top-1 (%) |
+| ---- | ------------------------------------------------------------------------------------------------------------------------------------------------------------- | ---- | --------- |
+| MAE  | [mae_vit-base-p16_8xb512-coslr-400e_in1k](https://github.com/open-mmlab/mmselfsup/blob/master/configs/selfsup/mae/mae_vit-base-p16_8xb512-coslr-400e_in1k.py) |      | 83.1      |
 ### COCO17 目标检测
 
 在 COCO17 数据集的目标检测任务中，我们选用 [MoCo](http://openaccess.thecvf.com/content_CVPR_2020/papers/He_Momentum_Contrast_for_Unsupervised_Visual_Representation_Learning_CVPR_2020_paper.pdf) 的评估设置，基于 Mask-RCNN 网络架构，下列结果通过同样的 [配置文件](https://github.com/open-mmlab/mmselfsup/blob/master/configs/benchmarks/mmdetection/coco/mask_rcnn_r50_fpn_mstrain_1x_coco.py) 训练得到。

diff --git a/mmselfsup/core/hooks/__init__.py b/mmselfsup/core/hooks/__init__.py
@@ -1,4 +1,5 @@
 # Copyright (c) OpenMMLab. All rights reserved.
+from .cosineAnnealing_hook import StepFixCosineAnnealingLrUpdaterHook
 from .deepcluster_hook import DeepClusterHook
 from .densecl_hook import DenseCLHook
 from .momentum_update_hook import MomentumUpdateHook
@@ -10,5 +11,5 @@
 __all__ = [
     'MomentumUpdateHook', 'DeepClusterHook', 'DenseCLHook', 'ODCHook',
     'DistOptimizerHook', 'GradAccumFp16OptimizerHook', 'SimSiamHook',
-    'SwAVHook'
+    'SwAVHook', 'StepFixCosineAnnealingLrUpdaterHook'
 ]
diff --git a/mmselfsup/core/hooks/cosineAnnealing_hook.py b/mmselfsup/core/hooks/cosineAnnealing_hook.py
@@ -0,0 +1,35 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+from mmcv.runner import HOOKS
+from mmcv.runner.hooks.lr_updater import (CosineAnnealingLrUpdaterHook,
+                                          annealing_cos)
+
+
+@HOOKS.register_module()
+class StepFixCosineAnnealingLrUpdaterHook(CosineAnnealingLrUpdaterHook):
+
+    def get_lr(self, runner, base_lr):
+        if self.by_epoch:
+            progress = runner.epoch
+            max_progress = runner.max_epochs
+
+            # Delete warmup epochs
+            if self.warmup is not None:
+                progress = progress - self.warmup_iters // len(
+                    runner.data_loader)
+                max_progress = max_progress - self.warmup_iters // len(
+                    runner.data_loader)
+        else:
+            progress = runner.iter
+            max_progress = runner.max_iters
+
+            # Delete warmup iters
+            if self.warmup is not None:
+                progress = progress - self.warmup_iters
+                max_progress = max_progress - self.warmup_iters
+
+        if self.min_lr_ratio is not None:
+            target_lr = base_lr * self.min_lr_ratio
+        else:
+            target_lr = self.min_lr
+
+        return annealing_cos(base_lr, target_lr, progress / max_progress)
diff --git a/mmselfsup/core/optimizer/__init__.py b/mmselfsup/core/optimizer/__init__.py
@@ -1,6 +1,10 @@
 # Copyright (c) OpenMMLab. All rights reserved.
 from .builder import build_optimizer
 from .constructor import DefaultOptimizerConstructor
+from .mae_finetune_constructor import MAEFtOptimizerConstructor
 from .optimizers import LARS
 
-__all__ = ['LARS', 'build_optimizer', 'DefaultOptimizerConstructor']
+__all__ = [
+    'LARS', 'build_optimizer', 'DefaultOptimizerConstructor',
+    'MAEFtOptimizerConstructor'
+]