From b63150f02a55bac2c1ea8d2971d92c7ddd7e47e3 Mon Sep 17 00:00:00 2001 From: hanhuiyu1996 Date: Fri, 15 Dec 2023 10:20:24 +0800 Subject: [PATCH 1/3] Replace filter_bias_and_bn by weight_decay_filter to fix and be compatible with previous bugs. --- config.py | 10 ++++- configs/bit/bit_resnet101_ascend.yaml | 2 +- configs/bit/bit_resnet50_ascend.yaml | 2 +- configs/bit/bit_resnet50x3_ascend.yaml | 2 +- configs/cmt/cmt_small_ascend.yaml | 1 - configs/coat/coat_lite_mini_ascend.yaml | 1 - configs/coat/coat_lite_tiny_ascend.yaml | 1 - configs/coat/coat_mini_ascend.yaml | 1 - configs/coat/coat_tiny_ascend.yaml | 1 - configs/convit/convit_base_ascend.yaml | 1 - configs/convit/convit_base_plus_ascend.yaml | 1 - configs/convit/convit_small_ascend.yaml | 1 - configs/convit/convit_small_plus_ascend.yaml | 1 - configs/convit/convit_tiny_ascend.yaml | 1 - configs/convit/convit_tiny_gpu.yaml | 1 - configs/convit/convit_tiny_plus_ascend.yaml | 1 - configs/convnext/convnext_base_ascend.yaml | 1 - configs/convnext/convnext_small_ascend.yaml | 1 - configs/convnext/convnext_tiny_ascend.yaml | 1 - .../convnextv2/convnextv2_tiny_ascend.yaml | 1 - configs/crossvit/crossvit_15_ascend.yaml | 1 - configs/crossvit/crossvit_18_ascend.yaml | 1 - configs/crossvit/crossvit_9_ascend.yaml | 1 - configs/densenet/densenet_121_ascend.yaml | 1 - configs/densenet/densenet_121_gpu.yaml | 1 - configs/densenet/densenet_161_ascend.yaml | 1 - configs/densenet/densenet_161_gpu.yaml | 1 - configs/densenet/densenet_169_ascend.yaml | 1 - configs/densenet/densenet_169_gpu.yaml | 1 - configs/densenet/densenet_201_ascend.yaml | 1 - configs/densenet/densenet_201_gpu.yaml | 1 - configs/dpn/dpn107_ascend.yaml | 1 - configs/dpn/dpn131_ascend.yaml | 1 - configs/dpn/dpn92_ascend.yaml | 1 - configs/dpn/dpn98_ascend.yaml | 1 - configs/edgenext/edgenext_base_ascend.yaml | 1 - configs/edgenext/edgenext_small_ascend.yaml | 1 - configs/edgenext/edgenext_x_small_ascend.yaml | 1 - .../edgenext/edgenext_xx_small_ascend.yaml | 1 - .../efficientnet/efficientnet_b0_ascend.yaml | 1 - .../efficientnet/efficientnet_b1_ascend.yaml | 1 - configs/ghostnet/ghostnet_050_ascend.yaml | 1 - configs/ghostnet/ghostnet_100_ascend.yaml | 1 - configs/ghostnet/ghostnet_130_ascend.yaml | 1 - configs/googlenet/googlenet_ascend.yaml | 1 - configs/halonet/halonet_50t_ascend.yaml | 1 - configs/hrnet/hrnet_w32_ascend.yaml | 1 - configs/hrnet/hrnet_w48_ascend.yaml | 1 - configs/inceptionv3/inception_v3_ascend.yaml | 1 - configs/inceptionv4/inception_v4_ascend.yaml | 1 - configs/mixnet/mixnet_l_ascend.yaml | 1 - configs/mixnet/mixnet_m_ascend.yaml | 1 - configs/mixnet/mixnet_s_ascend.yaml | 1 - configs/mnasnet/mnasnet_0.5_ascend.yaml | 1 - configs/mnasnet/mnasnet_0.75_ascend.yaml | 1 - configs/mnasnet/mnasnet_0.75_gpu.yaml | 1 - configs/mnasnet/mnasnet_1.0_ascend.yaml | 1 - configs/mnasnet/mnasnet_1.0_gpu.yaml | 1 - configs/mnasnet/mnasnet_1.3_ascend.yaml | 1 - configs/mnasnet/mnasnet_1.4_ascend.yaml | 1 - configs/mnasnet/mnasnet_1.4_gpu.yaml | 1 - .../mobilenetv1/mobilenet_v1_0.25_ascend.yaml | 1 - .../mobilenetv1/mobilenet_v1_0.25_gpu.yaml | 1 - .../mobilenetv1/mobilenet_v1_0.5_ascend.yaml | 1 - configs/mobilenetv1/mobilenet_v1_0.5_gpu.yaml | 1 - .../mobilenetv1/mobilenet_v1_0.75_ascend.yaml | 1 - .../mobilenetv1/mobilenet_v1_0.75_gpu.yaml | 1 - .../mobilenetv1/mobilenet_v1_1.0_ascend.yaml | 1 - configs/mobilenetv1/mobilenet_v1_1.0_gpu.yaml | 1 - .../mobilenetv2/mobilenet_v2_0.75_ascend.yaml | 1 - .../mobilenetv2/mobilenet_v2_1.0_ascend.yaml | 1 - .../mobilenetv2/mobilenet_v2_1.4_ascend.yaml | 2 +- .../mobilenet_v3_large_ascend.yaml | 2 +- .../mobilenet_v3_small_ascend.yaml | 2 +- configs/mobilevit/mobilevit_small_ascend.yaml | 1 - .../mobilevit/mobilevit_x_small_ascend.yaml | 1 - .../mobilevit/mobilevit_xx_small_ascend.yaml | 1 - configs/nasnet/nasnet_a_4x1056_ascend.yaml | 1 - configs/pit/pit_b_ascend.yaml | 1 - configs/pit/pit_s_ascend.yaml | 1 - configs/pit/pit_ti_ascend.yaml | 1 - configs/pit/pit_xs_ascend.yaml | 1 - configs/poolformer/poolformer_s12_ascend.yaml | 1 - configs/pvt/pvt_large_ascend.yaml | 1 - configs/pvt/pvt_medium_ascend.yaml | 1 - configs/pvt/pvt_small_ascend.yaml | 1 - configs/pvt/pvt_tiny_ascend.yaml | 1 - configs/pvtv2/pvt_v2_b0_ascend.yaml | 1 - configs/pvtv2/pvt_v2_b1_ascend.yaml | 1 - configs/pvtv2/pvt_v2_b2_ascend.yaml | 1 - configs/pvtv2/pvt_v2_b3_ascend.yaml | 1 - configs/pvtv2/pvt_v2_b4_ascend.yaml | 1 - configs/regnet/regnet_x_200mf_ascend.yaml | 1 - configs/regnet/regnet_x_400mf_ascend.yaml | 1 - configs/regnet/regnet_x_600mf_ascend.yaml | 1 - configs/regnet/regnet_x_800mf_ascend.yaml | 1 - configs/regnet/regnet_y_16gf_ascend.yaml | 1 - configs/regnet/regnet_y_200mf_ascend.yaml | 1 - configs/regnet/regnet_y_400mf_ascend.yaml | 1 - configs/regnet/regnet_y_600mf_ascend.yaml | 1 - configs/regnet/regnet_y_800mf_ascend.yaml | 1 - configs/repmlp/repmlp_t224_ascend.yaml | 1 - configs/repvgg/repvgg_a0_ascend.yaml | 1 - configs/repvgg/repvgg_a1_ascend.yaml | 1 - configs/repvgg/repvgg_a2_ascend.yaml | 1 - configs/repvgg/repvgg_b0_ascend.yaml | 1 - configs/repvgg/repvgg_b1_ascend.yaml | 1 - configs/repvgg/repvgg_b1g2_ascend.yaml | 1 - configs/repvgg/repvgg_b1g4_ascend.yaml | 1 - configs/repvgg/repvgg_b2_ascend.yaml | 1 - configs/repvgg/repvgg_b2g4_ascend.yaml | 1 - configs/repvgg/repvgg_b3_ascend.yaml | 1 - configs/res2net/res2net_101_ascend.yaml | 1 - configs/res2net/res2net_101_gpu.yaml | 1 - configs/res2net/res2net_101_v1b_ascend.yaml | 1 - configs/res2net/res2net_101_v1b_gpu.yaml | 1 - configs/res2net/res2net_50_ascend.yaml | 1 - configs/res2net/res2net_50_gpu.yaml | 1 - configs/res2net/res2net_50_v1b_ascend.yaml | 1 - configs/res2net/res2net_50_v1b_gpu.yaml | 1 - configs/resnest/resnest101_ascend.yaml | 1 - configs/resnest/resnest50_ascend.yaml | 1 - configs/resnet/resnet_101_ascend.yaml | 1 - configs/resnet/resnet_101_gpu.yaml | 1 - configs/resnet/resnet_152_ascend.yaml | 1 - configs/resnet/resnet_152_gpu.yaml | 1 - configs/resnet/resnet_18_ascend.yaml | 1 - configs/resnet/resnet_18_gpu.yaml | 1 - configs/resnet/resnet_34_ascend.yaml | 1 - configs/resnet/resnet_34_gpu.yaml | 1 - configs/resnet/resnet_50_ascend.yaml | 1 - configs/resnet/resnet_50_gpu.yaml | 1 - configs/resnetv2/resnetv2_101_ascend.yaml | 1 - configs/resnetv2/resnetv2_50_ascend.yaml | 1 - configs/resnext/resnext101_32x4d_ascend.yaml | 1 - configs/resnext/resnext101_64x4d_ascend.yaml | 1 - configs/resnext/resnext152_64x4d_ascend.yaml | 1 - configs/resnext/resnext50_32x4d_ascend.yaml | 1 - configs/rexnet/rexnet_x09_ascend.yaml | 1 - configs/rexnet/rexnet_x10_ascend.yaml | 1 - configs/rexnet/rexnet_x13_ascend.yaml | 1 - configs/rexnet/rexnet_x15_ascend.yaml | 1 - configs/rexnet/rexnet_x20_ascend.yaml | 1 - configs/senet/seresnet18_ascend.yaml | 1 - configs/senet/seresnet34_ascend.yaml | 1 - configs/senet/seresnet50_ascend.yaml | 1 - configs/senet/seresnext26_32x4d_ascend.yaml | 1 - configs/senet/seresnext50_32x4d_ascend.yaml | 1 - .../shufflenet_v1_0.5_ascend.yaml | 2 +- .../shufflenet_v1_1.0_ascend.yaml | 2 +- .../shufflenet_v2_0.5_ascend.yaml | 2 +- .../shufflenet_v2_1.0_ascend.yaml | 2 +- .../shufflenet_v2_1.5_ascend.yaml | 2 +- .../shufflenet_v2_2.0_ascend.yaml | 2 +- configs/sknet/skresnet18_ascend.yaml | 1 - configs/sknet/skresnet34_ascend.yaml | 1 - configs/sknet/skresnext50_32x4d_ascend.yaml | 1 - configs/squeezenet/squeezenet_1.0_ascend.yaml | 1 - configs/squeezenet/squeezenet_1.0_gpu.yaml | 1 - configs/squeezenet/squeezenet_1.1_ascend.yaml | 1 - configs/squeezenet/squeezenet_1.1_gpu.yaml | 1 - configs/swintransformer/swin_tiny_ascend.yaml | 1 - .../swinv2_tiny_window8_ascend.yaml | 1 - configs/vgg/vgg11_ascend.yaml | 1 - configs/vgg/vgg13_ascend.yaml | 1 - configs/vgg/vgg16_ascend.yaml | 1 - configs/vgg/vgg19_ascend.yaml | 1 - configs/vit/vit_b32_224_ascend.yaml | 1 - configs/vit/vit_l16_224_ascend.yaml | 1 - configs/vit/vit_l32_224_ascend.yaml | 1 - configs/volo/volo_d1_ascend.yaml | 1 - configs/volo/volo_d2_ascend.yaml | 1 - configs/volo/volo_d3_ascend.yaml | 1 - configs/volo/volo_d4_ascend.yaml | 1 - configs/xception/xception_ascend.yaml | 1 - configs/xcit/xcit_tiny_12_p16_ascend.yaml | 1 - docs/en/tutorials/configuration.md | 10 ++--- docs/zh/tutorials/configuration.md | 10 ++--- examples/finetune/finetune.py | 2 +- .../deeplabv3_s16_dilated_resnet101.yaml | 2 +- .../deeplabv3_s8_dilated_resnet101.yaml | 2 +- .../deeplabv3plus_s16_dilated_resnet101.yaml | 2 +- .../deeplabv3plus_s8_dilated_resnet101.yaml | 2 +- examples/seg/deeplabv3/train.py | 2 +- mindcv/optim/optim_factory.py | 39 ++++++++++++++----- tests/modules/parallel/test_parallel_optim.py | 28 ++++++------- tests/modules/test_optim.py | 28 ++++++------- train.py | 2 +- train_with_func.py | 2 +- 189 files changed, 96 insertions(+), 232 deletions(-) diff --git a/config.py b/config.py index 60971e420..0b4028afe 100644 --- a/config.py +++ b/config.py @@ -182,8 +182,14 @@ def create_parser(): help='Weight decay (default=1e-6)') group.add_argument('--use_nesterov', type=str2bool, nargs='?', const=True, default=False, help='Enables the Nesterov momentum (default=False)') - group.add_argument('--filter_bias_and_bn', type=str2bool, nargs='?', const=True, default=True, - help='Filter Bias and BatchNorm (default=True)') + group.add_argument('--weight_decay_filter', type=str, default="disable", + choices=['disable', 'auto', 'norm_and_bias'], + help='filter parameters from weight_decay. ' + 'choice: "disable" - No parameters to filter from weight_decay; "auto" - In this case, ' + 'we do not apply weight decay filtering to any parameters. However, MindSpore currently ' + 'automatically filters norm parameters from weight decay. It is unclear whether there ' + 'will be any changes in future versions of MindSpore, so it is recommended to stay updated;' + '"norm_and_bias" - Filter the paramtersof Norm layer and Bias from weight decay') group.add_argument('--eps', type=float, default=1e-10, help='Term Added to the Denominator to Improve Numerical Stability (default=1e-10)') diff --git a/configs/bit/bit_resnet101_ascend.yaml b/configs/bit/bit_resnet101_ascend.yaml index 314113366..a0911c818 100644 --- a/configs/bit/bit_resnet101_ascend.yaml +++ b/configs/bit/bit_resnet101_ascend.yaml @@ -41,7 +41,7 @@ multi_step_decay_milestones: [30, 40, 50, 60, 70, 80, 85] # optimizer opt: 'sgd' -filter_bias_and_bn: False +weight_decay_filter: 'auto' momentum: 0.9 weight_decay: 0.0001 loss_scale: 1024 diff --git a/configs/bit/bit_resnet50_ascend.yaml b/configs/bit/bit_resnet50_ascend.yaml index 148c08e55..956ef6b45 100644 --- a/configs/bit/bit_resnet50_ascend.yaml +++ b/configs/bit/bit_resnet50_ascend.yaml @@ -40,7 +40,7 @@ multi_step_decay_milestones: [30, 40, 50, 60, 70, 80, 85] # optimizer opt: 'sgd' -filter_bias_and_bn: False +weight_decay_filter: 'auto' momentum: 0.9 weight_decay: 0.0001 loss_scale: 1024 diff --git a/configs/bit/bit_resnet50x3_ascend.yaml b/configs/bit/bit_resnet50x3_ascend.yaml index baf959281..22c96aebb 100644 --- a/configs/bit/bit_resnet50x3_ascend.yaml +++ b/configs/bit/bit_resnet50x3_ascend.yaml @@ -43,7 +43,7 @@ multi_step_decay_milestones: [30, 40, 50, 60, 70, 80, 85] # optimizer opt: 'sgd' -filter_bias_and_bn: False +weight_decay_filter: 'auto' momentum: 0.9 weight_decay: 0.0001 loss_scale: 1024 diff --git a/configs/cmt/cmt_small_ascend.yaml b/configs/cmt/cmt_small_ascend.yaml index 94d6965d2..16bb34b05 100644 --- a/configs/cmt/cmt_small_ascend.yaml +++ b/configs/cmt/cmt_small_ascend.yaml @@ -52,7 +52,6 @@ warmup_epochs: 5 # optimizer opt: "adamw" -filter_bias_and_bn: True momentum: 0.9 weight_decay: 0.05 loss_scale_type: 'dynamic' diff --git a/configs/coat/coat_lite_mini_ascend.yaml b/configs/coat/coat_lite_mini_ascend.yaml index d3eda21e2..3849d8a64 100644 --- a/configs/coat/coat_lite_mini_ascend.yaml +++ b/configs/coat/coat_lite_mini_ascend.yaml @@ -54,6 +54,5 @@ cycle_decay: 1.0 # optimizer opt: 'adamw' weight_decay: 0.025 -filter_bias_and_bn: True loss_scale: 1024 use_nesterov: False diff --git a/configs/coat/coat_lite_tiny_ascend.yaml b/configs/coat/coat_lite_tiny_ascend.yaml index 0ea55dbc4..8d4252677 100644 --- a/configs/coat/coat_lite_tiny_ascend.yaml +++ b/configs/coat/coat_lite_tiny_ascend.yaml @@ -54,6 +54,5 @@ cycle_decay: 1.0 # optimizer opt: 'adamw' weight_decay: 0.025 -filter_bias_and_bn: True loss_scale: 1024 use_nesterov: False diff --git a/configs/coat/coat_mini_ascend.yaml b/configs/coat/coat_mini_ascend.yaml index e209778ba..c3db85de7 100644 --- a/configs/coat/coat_mini_ascend.yaml +++ b/configs/coat/coat_mini_ascend.yaml @@ -55,7 +55,6 @@ epoch_size: 300 # optimizer opt: 'lion' weight_decay: 0.15 -filter_bias_and_bn: True loss_scale: 4096 use_nesterov: False loss_scale_type: dynamic diff --git a/configs/coat/coat_tiny_ascend.yaml b/configs/coat/coat_tiny_ascend.yaml index 927eaa0dd..6a2711c95 100644 --- a/configs/coat/coat_tiny_ascend.yaml +++ b/configs/coat/coat_tiny_ascend.yaml @@ -57,7 +57,6 @@ epoch_size: 300 # optimizer opt: 'lion' weight_decay: 0.15 -filter_bias_and_bn: True loss_scale: 4096 use_nesterov: False loss_scale_type: dynamic diff --git a/configs/convit/convit_base_ascend.yaml b/configs/convit/convit_base_ascend.yaml index 818903210..996c8b8a3 100644 --- a/configs/convit/convit_base_ascend.yaml +++ b/configs/convit/convit_base_ascend.yaml @@ -51,5 +51,4 @@ decay_epochs: 260 opt: 'adamw' weight_decay: 0.1 loss_scale: 1024 -filter_bias_and_bn: True use_nesterov: False diff --git a/configs/convit/convit_base_plus_ascend.yaml b/configs/convit/convit_base_plus_ascend.yaml index 58f8a38b5..4130852a5 100644 --- a/configs/convit/convit_base_plus_ascend.yaml +++ b/configs/convit/convit_base_plus_ascend.yaml @@ -51,5 +51,4 @@ decay_epochs: 260 opt: 'adamw' weight_decay: 0.1 loss_scale: 1024 -filter_bias_and_bn: True use_nesterov: False diff --git a/configs/convit/convit_small_ascend.yaml b/configs/convit/convit_small_ascend.yaml index f644f4a78..e940dcae8 100644 --- a/configs/convit/convit_small_ascend.yaml +++ b/configs/convit/convit_small_ascend.yaml @@ -51,5 +51,4 @@ decay_epochs: 260 opt: 'adamw' weight_decay: 0.05 loss_scale: 1024 -filter_bias_and_bn: True use_nesterov: False diff --git a/configs/convit/convit_small_plus_ascend.yaml b/configs/convit/convit_small_plus_ascend.yaml index 2725711cd..4a935dd63 100644 --- a/configs/convit/convit_small_plus_ascend.yaml +++ b/configs/convit/convit_small_plus_ascend.yaml @@ -51,5 +51,4 @@ decay_epochs: 260 opt: 'adamw' weight_decay: 0.05 loss_scale: 1024 -filter_bias_and_bn: True use_nesterov: False diff --git a/configs/convit/convit_tiny_ascend.yaml b/configs/convit/convit_tiny_ascend.yaml index 4777777e9..9fe422ddc 100644 --- a/configs/convit/convit_tiny_ascend.yaml +++ b/configs/convit/convit_tiny_ascend.yaml @@ -50,5 +50,4 @@ decay_epochs: 295 opt: 'adamw' weight_decay: 0.0001 loss_scale: 1024 -filter_bias_and_bn: True use_nesterov: False diff --git a/configs/convit/convit_tiny_gpu.yaml b/configs/convit/convit_tiny_gpu.yaml index 8393f32a9..8e2690adb 100644 --- a/configs/convit/convit_tiny_gpu.yaml +++ b/configs/convit/convit_tiny_gpu.yaml @@ -47,5 +47,4 @@ decay_epochs: 200 # optimizer opt: 'adamw' weight_decay: 0.025 -filter_bias_and_bn: True use_nesterov: False diff --git a/configs/convit/convit_tiny_plus_ascend.yaml b/configs/convit/convit_tiny_plus_ascend.yaml index d21330d3d..ef2295d20 100644 --- a/configs/convit/convit_tiny_plus_ascend.yaml +++ b/configs/convit/convit_tiny_plus_ascend.yaml @@ -50,5 +50,4 @@ decay_epochs: 260 opt: 'adamw' weight_decay: 0.0001 loss_scale: 1024 -filter_bias_and_bn: True use_nesterov: False diff --git a/configs/convnext/convnext_base_ascend.yaml b/configs/convnext/convnext_base_ascend.yaml index b643ea174..1f083f08d 100644 --- a/configs/convnext/convnext_base_ascend.yaml +++ b/configs/convnext/convnext_base_ascend.yaml @@ -51,7 +51,6 @@ warmup_epochs: 20 # optimizer opt: 'adamw' -filter_bias_and_bn: True momentum: 0.9 weight_decay: 0.05 loss_scale_type: 'auto' diff --git a/configs/convnext/convnext_small_ascend.yaml b/configs/convnext/convnext_small_ascend.yaml index fb5abc131..5972bb0d7 100644 --- a/configs/convnext/convnext_small_ascend.yaml +++ b/configs/convnext/convnext_small_ascend.yaml @@ -51,7 +51,6 @@ warmup_epochs: 20 # optimizer opt: 'adamw' -filter_bias_and_bn: True momentum: 0.9 weight_decay: 0.05 loss_scale_type: 'auto' diff --git a/configs/convnext/convnext_tiny_ascend.yaml b/configs/convnext/convnext_tiny_ascend.yaml index b786b2c42..ffec32c8a 100644 --- a/configs/convnext/convnext_tiny_ascend.yaml +++ b/configs/convnext/convnext_tiny_ascend.yaml @@ -51,7 +51,6 @@ warmup_epochs: 20 # optimizer opt: 'adamw' -filter_bias_and_bn: True momentum: 0.9 weight_decay: 0.05 loss_scale_type: 'dynamic' diff --git a/configs/convnextv2/convnextv2_tiny_ascend.yaml b/configs/convnextv2/convnextv2_tiny_ascend.yaml index 605a0acb3..697790e53 100644 --- a/configs/convnextv2/convnextv2_tiny_ascend.yaml +++ b/configs/convnextv2/convnextv2_tiny_ascend.yaml @@ -52,7 +52,6 @@ warmup_epochs: 20 # optimizer opt: 'adamw' -filter_bias_and_bn: True momentum: 0.9 weight_decay: 0.049 loss_scale_type: 'auto' diff --git a/configs/crossvit/crossvit_15_ascend.yaml b/configs/crossvit/crossvit_15_ascend.yaml index 74c403e73..7d6d24b21 100644 --- a/configs/crossvit/crossvit_15_ascend.yaml +++ b/configs/crossvit/crossvit_15_ascend.yaml @@ -57,7 +57,6 @@ cycle_decay: 1 # optimizer opt: 'adamw' weight_decay: 0.05 -filter_bias_and_bn: True loss_scale: 512 use_nesterov: False eps: 1e-8 diff --git a/configs/crossvit/crossvit_18_ascend.yaml b/configs/crossvit/crossvit_18_ascend.yaml index a9564a2fb..07bf783a7 100644 --- a/configs/crossvit/crossvit_18_ascend.yaml +++ b/configs/crossvit/crossvit_18_ascend.yaml @@ -55,7 +55,6 @@ decay_rate: 0.1 # optimizer opt: 'adamw' weight_decay: 0.05 -filter_bias_and_bn: True loss_scale: 1024 drop_overflow_update: True loss_scale_type: 'dynamic' diff --git a/configs/crossvit/crossvit_9_ascend.yaml b/configs/crossvit/crossvit_9_ascend.yaml index f6abb6a57..e6a03e7d8 100644 --- a/configs/crossvit/crossvit_9_ascend.yaml +++ b/configs/crossvit/crossvit_9_ascend.yaml @@ -54,7 +54,6 @@ decay_rate: 0.1 # optimizer opt: 'adamw' weight_decay: 0.05 -filter_bias_and_bn: True loss_scale_type: 'dynamic' drop_overflow_update: True use_nesterov: False diff --git a/configs/densenet/densenet_121_ascend.yaml b/configs/densenet/densenet_121_ascend.yaml index 70bf38878..fdcf44e9f 100644 --- a/configs/densenet/densenet_121_ascend.yaml +++ b/configs/densenet/densenet_121_ascend.yaml @@ -44,7 +44,6 @@ decay_epochs: 120 # optimizer opt: 'momentum' -filter_bias_and_bn: True momentum: 0.9 weight_decay: 0.0001 loss_scale: 1024 diff --git a/configs/densenet/densenet_121_gpu.yaml b/configs/densenet/densenet_121_gpu.yaml index 82e5e2688..d571ac970 100644 --- a/configs/densenet/densenet_121_gpu.yaml +++ b/configs/densenet/densenet_121_gpu.yaml @@ -43,7 +43,6 @@ decay_epochs: 120 # optimizer opt: 'momentum' -filter_bias_and_bn: True momentum: 0.9 weight_decay: 0.0001 loss_scale: 1024 diff --git a/configs/densenet/densenet_161_ascend.yaml b/configs/densenet/densenet_161_ascend.yaml index 9447dd0c4..6e3442fa0 100644 --- a/configs/densenet/densenet_161_ascend.yaml +++ b/configs/densenet/densenet_161_ascend.yaml @@ -44,7 +44,6 @@ decay_epochs: 120 # optimizer opt: 'momentum' -filter_bias_and_bn: True momentum: 0.9 weight_decay: 0.0001 loss_scale: 1024 diff --git a/configs/densenet/densenet_161_gpu.yaml b/configs/densenet/densenet_161_gpu.yaml index 9447dd0c4..6e3442fa0 100644 --- a/configs/densenet/densenet_161_gpu.yaml +++ b/configs/densenet/densenet_161_gpu.yaml @@ -44,7 +44,6 @@ decay_epochs: 120 # optimizer opt: 'momentum' -filter_bias_and_bn: True momentum: 0.9 weight_decay: 0.0001 loss_scale: 1024 diff --git a/configs/densenet/densenet_169_ascend.yaml b/configs/densenet/densenet_169_ascend.yaml index 53c27502d..0068fcd75 100644 --- a/configs/densenet/densenet_169_ascend.yaml +++ b/configs/densenet/densenet_169_ascend.yaml @@ -44,7 +44,6 @@ decay_epochs: 120 # optimizer opt: 'momentum' -filter_bias_and_bn: True momentum: 0.9 weight_decay: 0.0001 loss_scale: 1024 diff --git a/configs/densenet/densenet_169_gpu.yaml b/configs/densenet/densenet_169_gpu.yaml index 53c27502d..0068fcd75 100644 --- a/configs/densenet/densenet_169_gpu.yaml +++ b/configs/densenet/densenet_169_gpu.yaml @@ -44,7 +44,6 @@ decay_epochs: 120 # optimizer opt: 'momentum' -filter_bias_and_bn: True momentum: 0.9 weight_decay: 0.0001 loss_scale: 1024 diff --git a/configs/densenet/densenet_201_ascend.yaml b/configs/densenet/densenet_201_ascend.yaml index 4de9c1c34..124fd0620 100644 --- a/configs/densenet/densenet_201_ascend.yaml +++ b/configs/densenet/densenet_201_ascend.yaml @@ -44,7 +44,6 @@ decay_epochs: 120 # optimizer opt: 'momentum' -filter_bias_and_bn: True momentum: 0.9 weight_decay: 0.0001 loss_scale: 1024 diff --git a/configs/densenet/densenet_201_gpu.yaml b/configs/densenet/densenet_201_gpu.yaml index 4de9c1c34..124fd0620 100644 --- a/configs/densenet/densenet_201_gpu.yaml +++ b/configs/densenet/densenet_201_gpu.yaml @@ -44,7 +44,6 @@ decay_epochs: 120 # optimizer opt: 'momentum' -filter_bias_and_bn: True momentum: 0.9 weight_decay: 0.0001 loss_scale: 1024 diff --git a/configs/dpn/dpn107_ascend.yaml b/configs/dpn/dpn107_ascend.yaml index 9b7366ec6..c9228e1c4 100644 --- a/configs/dpn/dpn107_ascend.yaml +++ b/configs/dpn/dpn107_ascend.yaml @@ -45,7 +45,6 @@ decay_epochs: 200 # optimizer opt: 'SGD' -filter_bias_and_bn: True momentum: 0.9 weight_decay: 0.0001 loss_scale: 1024 diff --git a/configs/dpn/dpn131_ascend.yaml b/configs/dpn/dpn131_ascend.yaml index d9a737a6b..4d2e5ea03 100644 --- a/configs/dpn/dpn131_ascend.yaml +++ b/configs/dpn/dpn131_ascend.yaml @@ -45,7 +45,6 @@ decay_epochs: 200 # optimizer opt: 'SGD' -filter_bias_and_bn: True momentum: 0.9 weight_decay: 0.0001 loss_scale: 1024 diff --git a/configs/dpn/dpn92_ascend.yaml b/configs/dpn/dpn92_ascend.yaml index 9da4fc83f..0dd2b6246 100644 --- a/configs/dpn/dpn92_ascend.yaml +++ b/configs/dpn/dpn92_ascend.yaml @@ -45,7 +45,6 @@ decay_epochs: 200 # optimizer opt: 'SGD' -filter_bias_and_bn: True momentum: 0.9 weight_decay: 0.0001 loss_scale: 1024 diff --git a/configs/dpn/dpn98_ascend.yaml b/configs/dpn/dpn98_ascend.yaml index f9d9439de..4aa855c80 100644 --- a/configs/dpn/dpn98_ascend.yaml +++ b/configs/dpn/dpn98_ascend.yaml @@ -45,7 +45,6 @@ decay_epochs: 200 # optimizer opt: 'SGD' -filter_bias_and_bn: True momentum: 0.9 weight_decay: 0.0001 loss_scale: 1024 diff --git a/configs/edgenext/edgenext_base_ascend.yaml b/configs/edgenext/edgenext_base_ascend.yaml index 9c7511121..35a02c599 100644 --- a/configs/edgenext/edgenext_base_ascend.yaml +++ b/configs/edgenext/edgenext_base_ascend.yaml @@ -58,7 +58,6 @@ decay_epochs: 330 # optimizer opt: 'adamw' -filter_bias_and_bn: True momentum: 0.9 weight_decay: 0.05 loss_scale: 1024 diff --git a/configs/edgenext/edgenext_small_ascend.yaml b/configs/edgenext/edgenext_small_ascend.yaml index 1f4ad74f2..a9fb549ef 100644 --- a/configs/edgenext/edgenext_small_ascend.yaml +++ b/configs/edgenext/edgenext_small_ascend.yaml @@ -57,7 +57,6 @@ decay_epochs: 330 # optimizer opt: 'adamw' -filter_bias_and_bn: True momentum: 0.9 weight_decay: 0.05 loss_scale: 1024 diff --git a/configs/edgenext/edgenext_x_small_ascend.yaml b/configs/edgenext/edgenext_x_small_ascend.yaml index 71345491f..13e06d051 100644 --- a/configs/edgenext/edgenext_x_small_ascend.yaml +++ b/configs/edgenext/edgenext_x_small_ascend.yaml @@ -57,7 +57,6 @@ decay_epochs: 330 # optimizer opt: 'adamw' -filter_bias_and_bn: True momentum: 0.9 weight_decay: 0.05 loss_scale: 1024 diff --git a/configs/edgenext/edgenext_xx_small_ascend.yaml b/configs/edgenext/edgenext_xx_small_ascend.yaml index f3075d643..fc521837e 100644 --- a/configs/edgenext/edgenext_xx_small_ascend.yaml +++ b/configs/edgenext/edgenext_xx_small_ascend.yaml @@ -56,7 +56,6 @@ decay_epochs: 330 # optimizer opt: 'adamw' -filter_bias_and_bn: True momentum: 0.9 weight_decay: 0.05 loss_scale: 1024 diff --git a/configs/efficientnet/efficientnet_b0_ascend.yaml b/configs/efficientnet/efficientnet_b0_ascend.yaml index 38a538a49..01bae8652 100644 --- a/configs/efficientnet/efficientnet_b0_ascend.yaml +++ b/configs/efficientnet/efficientnet_b0_ascend.yaml @@ -46,7 +46,6 @@ decay_epochs: 445 # optimizer opt: 'rmsprop' -filter_bias_and_bn: True momentum: 0.9 weight_decay: 1e-5 loss_scale_type: 'dynamic' diff --git a/configs/efficientnet/efficientnet_b1_ascend.yaml b/configs/efficientnet/efficientnet_b1_ascend.yaml index 7d34b1d04..c11dc0027 100644 --- a/configs/efficientnet/efficientnet_b1_ascend.yaml +++ b/configs/efficientnet/efficientnet_b1_ascend.yaml @@ -46,7 +46,6 @@ decay_epochs: 430 # optimizer opt: 'rmsprop' -filter_bias_and_bn: True momentum: 0.9 weight_decay: 1e-5 loss_scale_type: 'dynamic' diff --git a/configs/ghostnet/ghostnet_050_ascend.yaml b/configs/ghostnet/ghostnet_050_ascend.yaml index 0629c74c1..5947a29cb 100644 --- a/configs/ghostnet/ghostnet_050_ascend.yaml +++ b/configs/ghostnet/ghostnet_050_ascend.yaml @@ -46,7 +46,6 @@ decay_epochs: 580 # optimizer opt: "momentum" -filter_bias_and_bn: True momentum: 0.9 weight_decay: 0.00002 loss_scale_type: "dynamic" diff --git a/configs/ghostnet/ghostnet_100_ascend.yaml b/configs/ghostnet/ghostnet_100_ascend.yaml index d307e5342..2428ce128 100644 --- a/configs/ghostnet/ghostnet_100_ascend.yaml +++ b/configs/ghostnet/ghostnet_100_ascend.yaml @@ -46,7 +46,6 @@ decay_epochs: 580 # optimizer opt: "momentum" -filter_bias_and_bn: True momentum: 0.9 weight_decay: 0.00002 loss_scale_type: "dynamic" diff --git a/configs/ghostnet/ghostnet_130_ascend.yaml b/configs/ghostnet/ghostnet_130_ascend.yaml index 904bf98d0..395dbd566 100644 --- a/configs/ghostnet/ghostnet_130_ascend.yaml +++ b/configs/ghostnet/ghostnet_130_ascend.yaml @@ -47,7 +47,6 @@ decay_epochs: 580 # optimizer opt: "momentum" -filter_bias_and_bn: True momentum: 0.9 weight_decay: 0.00002 loss_scale_type: "dynamic" diff --git a/configs/googlenet/googlenet_ascend.yaml b/configs/googlenet/googlenet_ascend.yaml index c0268aeaa..c08d83924 100644 --- a/configs/googlenet/googlenet_ascend.yaml +++ b/configs/googlenet/googlenet_ascend.yaml @@ -44,7 +44,6 @@ warmup_epochs: 5 # optimizer opt: 'momentum' -filter_bias_and_bn: True momentum: 0.9 weight_decay: 0.00004 loss_scale: 1024 diff --git a/configs/halonet/halonet_50t_ascend.yaml b/configs/halonet/halonet_50t_ascend.yaml index 50693bdf8..603dcffa9 100644 --- a/configs/halonet/halonet_50t_ascend.yaml +++ b/configs/halonet/halonet_50t_ascend.yaml @@ -43,7 +43,6 @@ val_amp_level: 'O2' # optimizer opt: 'adamw' -filter_bias_and_bn: True weight_decay: 0.04 loss_scale: 1024 use_nesterov: False diff --git a/configs/hrnet/hrnet_w32_ascend.yaml b/configs/hrnet/hrnet_w32_ascend.yaml index 43375be8b..d1e78af89 100644 --- a/configs/hrnet/hrnet_w32_ascend.yaml +++ b/configs/hrnet/hrnet_w32_ascend.yaml @@ -52,4 +52,3 @@ decay_epochs: 280 opt: 'adamw' weight_decay: 0.05 loss_scale: 1024 -filter_bias_and_bn: True diff --git a/configs/hrnet/hrnet_w48_ascend.yaml b/configs/hrnet/hrnet_w48_ascend.yaml index 8d1c06032..6e2818f0b 100644 --- a/configs/hrnet/hrnet_w48_ascend.yaml +++ b/configs/hrnet/hrnet_w48_ascend.yaml @@ -52,4 +52,3 @@ decay_epochs: 280 opt: 'adamw' weight_decay: 0.05 loss_scale: 1024 -filter_bias_and_bn: True diff --git a/configs/inceptionv3/inception_v3_ascend.yaml b/configs/inceptionv3/inception_v3_ascend.yaml index 4bdfbe246..56313ae8d 100644 --- a/configs/inceptionv3/inception_v3_ascend.yaml +++ b/configs/inceptionv3/inception_v3_ascend.yaml @@ -47,7 +47,6 @@ warmup_epochs: 5 # optimizer opt: 'momentum' -filter_bias_and_bn: True momentum: 0.9 weight_decay: 0.00004 loss_scale: 1024 diff --git a/configs/inceptionv4/inception_v4_ascend.yaml b/configs/inceptionv4/inception_v4_ascend.yaml index 22ad03d40..7c1cf6342 100644 --- a/configs/inceptionv4/inception_v4_ascend.yaml +++ b/configs/inceptionv4/inception_v4_ascend.yaml @@ -46,7 +46,6 @@ warmup_epochs: 5 # optimizer opt: 'momentum' -filter_bias_and_bn: True momentum: 0.9 weight_decay: 0.00004 loss_scale: 1024 diff --git a/configs/mixnet/mixnet_l_ascend.yaml b/configs/mixnet/mixnet_l_ascend.yaml index c72755f12..7b6ec097a 100644 --- a/configs/mixnet/mixnet_l_ascend.yaml +++ b/configs/mixnet/mixnet_l_ascend.yaml @@ -48,7 +48,6 @@ warmup_epochs: 20 # optimizer opt: "momentum" -filter_bias_and_bn: True momentum: 0.9 weight_decay: 0.00002 loss_scale_type: "dynamic" diff --git a/configs/mixnet/mixnet_m_ascend.yaml b/configs/mixnet/mixnet_m_ascend.yaml index a655a2c7e..50f430c7f 100644 --- a/configs/mixnet/mixnet_m_ascend.yaml +++ b/configs/mixnet/mixnet_m_ascend.yaml @@ -50,7 +50,6 @@ warmup_epochs: 15 # optimizer opt: "momentum" -filter_bias_and_bn: True momentum: 0.9 weight_decay: 0.00002 loss_scale: 1024 diff --git a/configs/mixnet/mixnet_s_ascend.yaml b/configs/mixnet/mixnet_s_ascend.yaml index eaad3961f..e362e8a2d 100644 --- a/configs/mixnet/mixnet_s_ascend.yaml +++ b/configs/mixnet/mixnet_s_ascend.yaml @@ -49,7 +49,6 @@ warmup_epochs: 15 # optimizer opt: "momentum" -filter_bias_and_bn: True momentum: 0.9 weight_decay: 0.00002 loss_scale: 256 diff --git a/configs/mnasnet/mnasnet_0.5_ascend.yaml b/configs/mnasnet/mnasnet_0.5_ascend.yaml index bbce1a8bf..791a70ccc 100644 --- a/configs/mnasnet/mnasnet_0.5_ascend.yaml +++ b/configs/mnasnet/mnasnet_0.5_ascend.yaml @@ -44,7 +44,6 @@ decay_epochs: 395 # optimizer opt: 'rmsprop' -filter_bias_and_bn: True momentum: 0.9 weight_decay: 1e-5 loss_scale: 256 diff --git a/configs/mnasnet/mnasnet_0.75_ascend.yaml b/configs/mnasnet/mnasnet_0.75_ascend.yaml index 814c35807..f527eb810 100644 --- a/configs/mnasnet/mnasnet_0.75_ascend.yaml +++ b/configs/mnasnet/mnasnet_0.75_ascend.yaml @@ -43,7 +43,6 @@ decay_epochs: 345 # optimizer opt: 'rmsprop' -filter_bias_and_bn: True momentum: 0.9 weight_decay: 1e-5 loss_scale: 256 diff --git a/configs/mnasnet/mnasnet_0.75_gpu.yaml b/configs/mnasnet/mnasnet_0.75_gpu.yaml index 7bbbadd9a..a19d44a01 100644 --- a/configs/mnasnet/mnasnet_0.75_gpu.yaml +++ b/configs/mnasnet/mnasnet_0.75_gpu.yaml @@ -43,7 +43,6 @@ decay_epochs: 345 # optimizer opt: 'rmsprop' -filter_bias_and_bn: True momentum: 0.9 weight_decay: 1e-5 loss_scale: 256 diff --git a/configs/mnasnet/mnasnet_1.0_ascend.yaml b/configs/mnasnet/mnasnet_1.0_ascend.yaml index 771fdda34..3fb5e2017 100644 --- a/configs/mnasnet/mnasnet_1.0_ascend.yaml +++ b/configs/mnasnet/mnasnet_1.0_ascend.yaml @@ -43,7 +43,6 @@ decay_epochs: 445 # optimizer opt: 'rmsprop' -filter_bias_and_bn: True momentum: 0.9 weight_decay: 1e-5 loss_scale: 256 diff --git a/configs/mnasnet/mnasnet_1.0_gpu.yaml b/configs/mnasnet/mnasnet_1.0_gpu.yaml index 4efd8240b..aedb770c7 100644 --- a/configs/mnasnet/mnasnet_1.0_gpu.yaml +++ b/configs/mnasnet/mnasnet_1.0_gpu.yaml @@ -43,7 +43,6 @@ decay_epochs: 445 # optimizer opt: 'rmsprop' -filter_bias_and_bn: True momentum: 0.9 weight_decay: 1e-5 loss_scale: 256 diff --git a/configs/mnasnet/mnasnet_1.3_ascend.yaml b/configs/mnasnet/mnasnet_1.3_ascend.yaml index 5dcc2624f..95e0c7710 100644 --- a/configs/mnasnet/mnasnet_1.3_ascend.yaml +++ b/configs/mnasnet/mnasnet_1.3_ascend.yaml @@ -44,7 +44,6 @@ decay_epochs: 395 # optimizer opt: 'rmsprop' -filter_bias_and_bn: True momentum: 0.9 weight_decay: 1e-5 loss_scale: 256 diff --git a/configs/mnasnet/mnasnet_1.4_ascend.yaml b/configs/mnasnet/mnasnet_1.4_ascend.yaml index dc84463ab..71f26a5d0 100644 --- a/configs/mnasnet/mnasnet_1.4_ascend.yaml +++ b/configs/mnasnet/mnasnet_1.4_ascend.yaml @@ -43,7 +43,6 @@ decay_epochs: 395 # optimizer opt: 'rmsprop' -filter_bias_and_bn: True momentum: 0.9 weight_decay: 1e-5 loss_scale: 256 diff --git a/configs/mnasnet/mnasnet_1.4_gpu.yaml b/configs/mnasnet/mnasnet_1.4_gpu.yaml index 7d30e3568..92790cfb3 100644 --- a/configs/mnasnet/mnasnet_1.4_gpu.yaml +++ b/configs/mnasnet/mnasnet_1.4_gpu.yaml @@ -43,7 +43,6 @@ decay_epochs: 395 # optimizer opt: 'rmsprop' -filter_bias_and_bn: True momentum: 0.9 weight_decay: 1e-5 loss_scale: 256 diff --git a/configs/mobilenetv1/mobilenet_v1_0.25_ascend.yaml b/configs/mobilenetv1/mobilenet_v1_0.25_ascend.yaml index a089bb6b3..28058873c 100644 --- a/configs/mobilenetv1/mobilenet_v1_0.25_ascend.yaml +++ b/configs/mobilenetv1/mobilenet_v1_0.25_ascend.yaml @@ -43,7 +43,6 @@ decay_epochs: 198 # optimizer opt: 'momentum' -filter_bias_and_bn: True momentum: 0.9 weight_decay: 0.00003 loss_scale: 1024 diff --git a/configs/mobilenetv1/mobilenet_v1_0.25_gpu.yaml b/configs/mobilenetv1/mobilenet_v1_0.25_gpu.yaml index a089bb6b3..28058873c 100644 --- a/configs/mobilenetv1/mobilenet_v1_0.25_gpu.yaml +++ b/configs/mobilenetv1/mobilenet_v1_0.25_gpu.yaml @@ -43,7 +43,6 @@ decay_epochs: 198 # optimizer opt: 'momentum' -filter_bias_and_bn: True momentum: 0.9 weight_decay: 0.00003 loss_scale: 1024 diff --git a/configs/mobilenetv1/mobilenet_v1_0.5_ascend.yaml b/configs/mobilenetv1/mobilenet_v1_0.5_ascend.yaml index e9c0445cb..58beff2c9 100644 --- a/configs/mobilenetv1/mobilenet_v1_0.5_ascend.yaml +++ b/configs/mobilenetv1/mobilenet_v1_0.5_ascend.yaml @@ -43,7 +43,6 @@ decay_epochs: 198 # optimizer opt: 'momentum' -filter_bias_and_bn: True momentum: 0.9 weight_decay: 0.00003 loss_scale: 1024 diff --git a/configs/mobilenetv1/mobilenet_v1_0.5_gpu.yaml b/configs/mobilenetv1/mobilenet_v1_0.5_gpu.yaml index e9c0445cb..58beff2c9 100644 --- a/configs/mobilenetv1/mobilenet_v1_0.5_gpu.yaml +++ b/configs/mobilenetv1/mobilenet_v1_0.5_gpu.yaml @@ -43,7 +43,6 @@ decay_epochs: 198 # optimizer opt: 'momentum' -filter_bias_and_bn: True momentum: 0.9 weight_decay: 0.00003 loss_scale: 1024 diff --git a/configs/mobilenetv1/mobilenet_v1_0.75_ascend.yaml b/configs/mobilenetv1/mobilenet_v1_0.75_ascend.yaml index 8a37f2394..c9b8f35e3 100644 --- a/configs/mobilenetv1/mobilenet_v1_0.75_ascend.yaml +++ b/configs/mobilenetv1/mobilenet_v1_0.75_ascend.yaml @@ -43,7 +43,6 @@ decay_epochs: 198 # optimizer opt: 'momentum' -filter_bias_and_bn: True momentum: 0.9 weight_decay: 0.00003 loss_scale: 1024 diff --git a/configs/mobilenetv1/mobilenet_v1_0.75_gpu.yaml b/configs/mobilenetv1/mobilenet_v1_0.75_gpu.yaml index 8a37f2394..c9b8f35e3 100644 --- a/configs/mobilenetv1/mobilenet_v1_0.75_gpu.yaml +++ b/configs/mobilenetv1/mobilenet_v1_0.75_gpu.yaml @@ -43,7 +43,6 @@ decay_epochs: 198 # optimizer opt: 'momentum' -filter_bias_and_bn: True momentum: 0.9 weight_decay: 0.00003 loss_scale: 1024 diff --git a/configs/mobilenetv1/mobilenet_v1_1.0_ascend.yaml b/configs/mobilenetv1/mobilenet_v1_1.0_ascend.yaml index 543e65832..2c6ad6f35 100644 --- a/configs/mobilenetv1/mobilenet_v1_1.0_ascend.yaml +++ b/configs/mobilenetv1/mobilenet_v1_1.0_ascend.yaml @@ -43,7 +43,6 @@ decay_epochs: 198 # optimizer opt: 'momentum' -filter_bias_and_bn: True momentum: 0.9 weight_decay: 0.00003 loss_scale: 1024 diff --git a/configs/mobilenetv1/mobilenet_v1_1.0_gpu.yaml b/configs/mobilenetv1/mobilenet_v1_1.0_gpu.yaml index 4ef48e61c..05967c6ec 100644 --- a/configs/mobilenetv1/mobilenet_v1_1.0_gpu.yaml +++ b/configs/mobilenetv1/mobilenet_v1_1.0_gpu.yaml @@ -44,7 +44,6 @@ decay_epochs: 198 # optimizer opt: 'momentum' -filter_bias_and_bn: True momentum: 0.9 weight_decay: 0.00003 loss_scale: 1024 diff --git a/configs/mobilenetv2/mobilenet_v2_0.75_ascend.yaml b/configs/mobilenetv2/mobilenet_v2_0.75_ascend.yaml index 82fb12395..bc9ad4f33 100644 --- a/configs/mobilenetv2/mobilenet_v2_0.75_ascend.yaml +++ b/configs/mobilenetv2/mobilenet_v2_0.75_ascend.yaml @@ -45,7 +45,6 @@ decay_epochs: 396 # optimizer opt: 'momentum' -filter_bias_and_bn: True momentum: 0.9 weight_decay: 0.00003 loss_scale: 1024 diff --git a/configs/mobilenetv2/mobilenet_v2_1.0_ascend.yaml b/configs/mobilenetv2/mobilenet_v2_1.0_ascend.yaml index 5d271c1f7..0192d7804 100644 --- a/configs/mobilenetv2/mobilenet_v2_1.0_ascend.yaml +++ b/configs/mobilenetv2/mobilenet_v2_1.0_ascend.yaml @@ -45,7 +45,6 @@ decay_epochs: 316 # optimizer opt: 'momentum' -filter_bias_and_bn: True momentum: 0.9 weight_decay: 0.00004 loss_scale: 1024 diff --git a/configs/mobilenetv2/mobilenet_v2_1.4_ascend.yaml b/configs/mobilenetv2/mobilenet_v2_1.4_ascend.yaml index 56397195a..7201f01cc 100644 --- a/configs/mobilenetv2/mobilenet_v2_1.4_ascend.yaml +++ b/configs/mobilenetv2/mobilenet_v2_1.4_ascend.yaml @@ -45,7 +45,7 @@ decay_epochs: 296 # optimizer opt: 'momentum' -filter_bias_and_bn: False +weight_decay_filter: 'auto' momentum: 0.9 weight_decay: 0.00004 loss_scale: 1024 diff --git a/configs/mobilenetv3/mobilenet_v3_large_ascend.yaml b/configs/mobilenetv3/mobilenet_v3_large_ascend.yaml index 19eb0120f..8abe7c2c6 100644 --- a/configs/mobilenetv3/mobilenet_v3_large_ascend.yaml +++ b/configs/mobilenetv3/mobilenet_v3_large_ascend.yaml @@ -45,7 +45,7 @@ decay_epochs: 416 # optimizer opt: 'momentum' -filter_bias_and_bn: False +weight_decay_filter: 'auto' momentum: 0.9 weight_decay: 0.00002 loss_scale: 1024 diff --git a/configs/mobilenetv3/mobilenet_v3_small_ascend.yaml b/configs/mobilenetv3/mobilenet_v3_small_ascend.yaml index 5c75d4e0e..fe19448e4 100644 --- a/configs/mobilenetv3/mobilenet_v3_small_ascend.yaml +++ b/configs/mobilenetv3/mobilenet_v3_small_ascend.yaml @@ -46,7 +46,7 @@ decay_epochs: 466 # optimizer opt: 'momentum' -filter_bias_and_bn: False +weight_decay_filter: 'auto' momentum: 0.9 weight_decay: 0.00002 loss_scale: 1024 diff --git a/configs/mobilevit/mobilevit_small_ascend.yaml b/configs/mobilevit/mobilevit_small_ascend.yaml index 4650cadbe..5be05d8d7 100644 --- a/configs/mobilevit/mobilevit_small_ascend.yaml +++ b/configs/mobilevit/mobilevit_small_ascend.yaml @@ -46,7 +46,6 @@ decay_epochs: 430 # optimizer opt: 'adamw' -filter_bias_and_bn: True momentum: 0.9 weight_decay: 0.01 use_nesterov: False diff --git a/configs/mobilevit/mobilevit_x_small_ascend.yaml b/configs/mobilevit/mobilevit_x_small_ascend.yaml index 994a7c60b..a8f2fa92b 100644 --- a/configs/mobilevit/mobilevit_x_small_ascend.yaml +++ b/configs/mobilevit/mobilevit_x_small_ascend.yaml @@ -46,7 +46,6 @@ decay_epochs: 430 # optimizer opt: 'adamw' -filter_bias_and_bn: True momentum: 0.9 weight_decay: 0.01 use_nesterov: False diff --git a/configs/mobilevit/mobilevit_xx_small_ascend.yaml b/configs/mobilevit/mobilevit_xx_small_ascend.yaml index 127c5a9f7..583cbda27 100644 --- a/configs/mobilevit/mobilevit_xx_small_ascend.yaml +++ b/configs/mobilevit/mobilevit_xx_small_ascend.yaml @@ -46,7 +46,6 @@ decay_epochs: 410 # optimizer opt: 'adamw' -filter_bias_and_bn: True momentum: 0.9 weight_decay: 0.01 use_nesterov: False diff --git a/configs/nasnet/nasnet_a_4x1056_ascend.yaml b/configs/nasnet/nasnet_a_4x1056_ascend.yaml index 07ea9b141..f996d16e2 100644 --- a/configs/nasnet/nasnet_a_4x1056_ascend.yaml +++ b/configs/nasnet/nasnet_a_4x1056_ascend.yaml @@ -44,7 +44,6 @@ decay_epochs: 445 # optimizer opt: 'rmsprop' -filter_bias_and_bn: True momentum: 0.9 weight_decay: 1e-5 loss_scale_type: 'dynamic' diff --git a/configs/pit/pit_b_ascend.yaml b/configs/pit/pit_b_ascend.yaml index 5ca3c629a..47dc116e5 100644 --- a/configs/pit/pit_b_ascend.yaml +++ b/configs/pit/pit_b_ascend.yaml @@ -53,7 +53,6 @@ warmup_factor: 0.002 # optimizer opt: "adamw" -filter_bias_and_bn: True momentum: 0.9 weight_decay: 0.05 loss_scale_type: "auto" diff --git a/configs/pit/pit_s_ascend.yaml b/configs/pit/pit_s_ascend.yaml index ef13fb3d8..0f2b79beb 100644 --- a/configs/pit/pit_s_ascend.yaml +++ b/configs/pit/pit_s_ascend.yaml @@ -53,7 +53,6 @@ warmup_factor: 0.002 # optimizer opt: "adamw" -filter_bias_and_bn: True momentum: 0.9 weight_decay: 0.05 loss_scale_type: "dynamic" diff --git a/configs/pit/pit_ti_ascend.yaml b/configs/pit/pit_ti_ascend.yaml index 0c8a47b51..7a8f73d03 100644 --- a/configs/pit/pit_ti_ascend.yaml +++ b/configs/pit/pit_ti_ascend.yaml @@ -50,7 +50,6 @@ warmup_epochs: 10 # optimizer opt: "adamw" -filter_bias_and_bn: True momentum: 0.9 weight_decay: 0.05 loss_scale_type: "auto" diff --git a/configs/pit/pit_xs_ascend.yaml b/configs/pit/pit_xs_ascend.yaml index 9b8fc5fdf..c789d6658 100644 --- a/configs/pit/pit_xs_ascend.yaml +++ b/configs/pit/pit_xs_ascend.yaml @@ -51,7 +51,6 @@ warmup_epochs: 10 # optimizer opt: "adamw" -filter_bias_and_bn: True momentum: 0.9 weight_decay: 0.05 loss_scale_type: "dynamic" diff --git a/configs/poolformer/poolformer_s12_ascend.yaml b/configs/poolformer/poolformer_s12_ascend.yaml index 535a5fe64..0e42aa94f 100644 --- a/configs/poolformer/poolformer_s12_ascend.yaml +++ b/configs/poolformer/poolformer_s12_ascend.yaml @@ -54,7 +54,6 @@ decay_rate: 0.1 # optimizer opt: 'AdamW' -filter_bias_and_bn: True momentum: 0.9 weight_decay: 0.05 loss_scale: 1024 diff --git a/configs/pvt/pvt_large_ascend.yaml b/configs/pvt/pvt_large_ascend.yaml index 76a12370f..2078a499e 100644 --- a/configs/pvt/pvt_large_ascend.yaml +++ b/configs/pvt/pvt_large_ascend.yaml @@ -52,5 +52,4 @@ decay_epochs: 390 opt: 'adamw' weight_decay: 0.05 loss_scale: 300 -filter_bias_and_bn: True use_nesterov: False diff --git a/configs/pvt/pvt_medium_ascend.yaml b/configs/pvt/pvt_medium_ascend.yaml index 21560b614..d663f2c0f 100644 --- a/configs/pvt/pvt_medium_ascend.yaml +++ b/configs/pvt/pvt_medium_ascend.yaml @@ -52,5 +52,4 @@ decay_epochs: 390 opt: 'adamw' weight_decay: 0.05 loss_scale: 1024 -filter_bias_and_bn: True use_nesterov: False diff --git a/configs/pvt/pvt_small_ascend.yaml b/configs/pvt/pvt_small_ascend.yaml index 55f0ca9be..4b773e791 100644 --- a/configs/pvt/pvt_small_ascend.yaml +++ b/configs/pvt/pvt_small_ascend.yaml @@ -52,5 +52,4 @@ decay_epochs: 390 opt: 'adamw' weight_decay: 0.05 loss_scale: 1024 -filter_bias_and_bn: True use_nesterov: False diff --git a/configs/pvt/pvt_tiny_ascend.yaml b/configs/pvt/pvt_tiny_ascend.yaml index 5610ea5ed..368922728 100644 --- a/configs/pvt/pvt_tiny_ascend.yaml +++ b/configs/pvt/pvt_tiny_ascend.yaml @@ -52,5 +52,4 @@ decay_epochs: 440 opt: 'adamw' weight_decay: 0.05 loss_scale: 1024 -filter_bias_and_bn: True use_nesterov: False diff --git a/configs/pvtv2/pvt_v2_b0_ascend.yaml b/configs/pvtv2/pvt_v2_b0_ascend.yaml index 8cd627636..518d4b23f 100644 --- a/configs/pvtv2/pvt_v2_b0_ascend.yaml +++ b/configs/pvtv2/pvt_v2_b0_ascend.yaml @@ -51,7 +51,6 @@ warmup_epochs: 10 # optimizer opt: "adamw" -filter_bias_and_bn: True momentum: 0.9 weight_decay: 0.05 loss_scale_type: "dynamic" diff --git a/configs/pvtv2/pvt_v2_b1_ascend.yaml b/configs/pvtv2/pvt_v2_b1_ascend.yaml index 0e556eb70..9639adfe8 100644 --- a/configs/pvtv2/pvt_v2_b1_ascend.yaml +++ b/configs/pvtv2/pvt_v2_b1_ascend.yaml @@ -51,7 +51,6 @@ warmup_epochs: 10 # optimizer opt: "adamw" -filter_bias_and_bn: True momentum: 0.9 weight_decay: 0.05 loss_scale_type: "dynamic" diff --git a/configs/pvtv2/pvt_v2_b2_ascend.yaml b/configs/pvtv2/pvt_v2_b2_ascend.yaml index 1f2ff2a44..a35addd9c 100644 --- a/configs/pvtv2/pvt_v2_b2_ascend.yaml +++ b/configs/pvtv2/pvt_v2_b2_ascend.yaml @@ -50,7 +50,6 @@ warmup_epochs: 20 # optimizer opt: "adamw" -filter_bias_and_bn: True momentum: 0.9 weight_decay: 0.05 loss_scale_type: "dynamic" diff --git a/configs/pvtv2/pvt_v2_b3_ascend.yaml b/configs/pvtv2/pvt_v2_b3_ascend.yaml index e8037caf4..c2f51da6f 100644 --- a/configs/pvtv2/pvt_v2_b3_ascend.yaml +++ b/configs/pvtv2/pvt_v2_b3_ascend.yaml @@ -50,7 +50,6 @@ warmup_epochs: 20 # optimizer opt: "adamw" -filter_bias_and_bn: True momentum: 0.9 weight_decay: 0.05 loss_scale_type: "auto" diff --git a/configs/pvtv2/pvt_v2_b4_ascend.yaml b/configs/pvtv2/pvt_v2_b4_ascend.yaml index 91bffc05b..1a247e124 100644 --- a/configs/pvtv2/pvt_v2_b4_ascend.yaml +++ b/configs/pvtv2/pvt_v2_b4_ascend.yaml @@ -50,7 +50,6 @@ warmup_epochs: 20 # optimizer opt: "adamw" -filter_bias_and_bn: True momentum: 0.9 weight_decay: 0.05 loss_scale_type: "auto" diff --git a/configs/regnet/regnet_x_200mf_ascend.yaml b/configs/regnet/regnet_x_200mf_ascend.yaml index 1293f7431..272b45526 100644 --- a/configs/regnet/regnet_x_200mf_ascend.yaml +++ b/configs/regnet/regnet_x_200mf_ascend.yaml @@ -49,7 +49,6 @@ opt: 'momentum' momentum: 0.9 weight_decay: 5e-5 use_nesterov: False -filter_bias_and_bn: True # amp amp_level: 'O2' diff --git a/configs/regnet/regnet_x_400mf_ascend.yaml b/configs/regnet/regnet_x_400mf_ascend.yaml index 3073cfc3e..b93c62dea 100644 --- a/configs/regnet/regnet_x_400mf_ascend.yaml +++ b/configs/regnet/regnet_x_400mf_ascend.yaml @@ -49,7 +49,6 @@ opt: 'momentum' momentum: 0.9 weight_decay: 5e-5 use_nesterov: False -filter_bias_and_bn: True # amp amp_level: 'O2' diff --git a/configs/regnet/regnet_x_600mf_ascend.yaml b/configs/regnet/regnet_x_600mf_ascend.yaml index b45d7484e..6d7e9cb7f 100644 --- a/configs/regnet/regnet_x_600mf_ascend.yaml +++ b/configs/regnet/regnet_x_600mf_ascend.yaml @@ -49,7 +49,6 @@ opt: 'momentum' momentum: 0.9 weight_decay: 5e-5 use_nesterov: False -filter_bias_and_bn: True # amp amp_level: 'O2' diff --git a/configs/regnet/regnet_x_800mf_ascend.yaml b/configs/regnet/regnet_x_800mf_ascend.yaml index 9902be17b..ee2892371 100644 --- a/configs/regnet/regnet_x_800mf_ascend.yaml +++ b/configs/regnet/regnet_x_800mf_ascend.yaml @@ -52,4 +52,3 @@ momentum: 0.9 weight_decay: 0.0001 loss_scale: 128 use_nesterov: False -filter_bias_and_bn: True diff --git a/configs/regnet/regnet_y_16gf_ascend.yaml b/configs/regnet/regnet_y_16gf_ascend.yaml index c5588c231..29cd15886 100644 --- a/configs/regnet/regnet_y_16gf_ascend.yaml +++ b/configs/regnet/regnet_y_16gf_ascend.yaml @@ -55,7 +55,6 @@ opt: 'momentum' momentum: 0.9 weight_decay: 4e-5 use_nesterov: False -filter_bias_and_bn: True # amp amp_level: 'O2' diff --git a/configs/regnet/regnet_y_200mf_ascend.yaml b/configs/regnet/regnet_y_200mf_ascend.yaml index 2fecef323..aea21aaeb 100644 --- a/configs/regnet/regnet_y_200mf_ascend.yaml +++ b/configs/regnet/regnet_y_200mf_ascend.yaml @@ -49,7 +49,6 @@ opt: 'momentum' momentum: 0.9 weight_decay: 5e-5 use_nesterov: False -filter_bias_and_bn: True # amp amp_level: 'O2' diff --git a/configs/regnet/regnet_y_400mf_ascend.yaml b/configs/regnet/regnet_y_400mf_ascend.yaml index b8b90be87..e88aa1211 100644 --- a/configs/regnet/regnet_y_400mf_ascend.yaml +++ b/configs/regnet/regnet_y_400mf_ascend.yaml @@ -49,7 +49,6 @@ opt: 'momentum' momentum: 0.9 weight_decay: 5e-5 use_nesterov: False -filter_bias_and_bn: True # amp amp_level: 'O2' diff --git a/configs/regnet/regnet_y_600mf_ascend.yaml b/configs/regnet/regnet_y_600mf_ascend.yaml index b2687b424..b233e7e31 100644 --- a/configs/regnet/regnet_y_600mf_ascend.yaml +++ b/configs/regnet/regnet_y_600mf_ascend.yaml @@ -49,7 +49,6 @@ opt: 'momentum' momentum: 0.9 weight_decay: 5e-5 use_nesterov: False -filter_bias_and_bn: True # amp amp_level: 'O2' diff --git a/configs/regnet/regnet_y_800mf_ascend.yaml b/configs/regnet/regnet_y_800mf_ascend.yaml index 9471a0a49..ec72f0612 100644 --- a/configs/regnet/regnet_y_800mf_ascend.yaml +++ b/configs/regnet/regnet_y_800mf_ascend.yaml @@ -49,7 +49,6 @@ opt: 'momentum' momentum: 0.9 weight_decay: 5e-5 use_nesterov: False -filter_bias_and_bn: True # amp amp_level: 'O2' diff --git a/configs/repmlp/repmlp_t224_ascend.yaml b/configs/repmlp/repmlp_t224_ascend.yaml index c47e2777f..4c4fb4d2b 100644 --- a/configs/repmlp/repmlp_t224_ascend.yaml +++ b/configs/repmlp/repmlp_t224_ascend.yaml @@ -55,7 +55,6 @@ decay_rate: 0.01 # optimizer opt: 'adamw' -filter_bias_and_bn: True momentum: 0.9 weight_decay: 2e-05 loss_scale: 1024 diff --git a/configs/repvgg/repvgg_a0_ascend.yaml b/configs/repvgg/repvgg_a0_ascend.yaml index 05919cb33..534d9d1a7 100644 --- a/configs/repvgg/repvgg_a0_ascend.yaml +++ b/configs/repvgg/repvgg_a0_ascend.yaml @@ -43,7 +43,6 @@ decay_epochs: 390 # optimizer opt: "momentum" -filter_bias_and_bn: True momentum: 0.9 weight_decay: 0.0001 loss_scale_type: "dynamic" diff --git a/configs/repvgg/repvgg_a1_ascend.yaml b/configs/repvgg/repvgg_a1_ascend.yaml index 8b09349f5..3144967f0 100644 --- a/configs/repvgg/repvgg_a1_ascend.yaml +++ b/configs/repvgg/repvgg_a1_ascend.yaml @@ -43,7 +43,6 @@ decay_epochs: 235 # optimizer opt: "momentum" -filter_bias_and_bn: True momentum: 0.9 weight_decay: 0.0001 loss_scale_type: "dynamic" diff --git a/configs/repvgg/repvgg_a2_ascend.yaml b/configs/repvgg/repvgg_a2_ascend.yaml index 36ee7269a..554f43acf 100644 --- a/configs/repvgg/repvgg_a2_ascend.yaml +++ b/configs/repvgg/repvgg_a2_ascend.yaml @@ -43,7 +43,6 @@ decay_epochs: 235 # optimizer opt: "momentum" -filter_bias_and_bn: True momentum: 0.9 weight_decay: 0.0001 loss_scale_type: "dynamic" diff --git a/configs/repvgg/repvgg_b0_ascend.yaml b/configs/repvgg/repvgg_b0_ascend.yaml index e5ab6bbb1..92c38c723 100644 --- a/configs/repvgg/repvgg_b0_ascend.yaml +++ b/configs/repvgg/repvgg_b0_ascend.yaml @@ -43,7 +43,6 @@ decay_epochs: 235 # optimizer opt: "momentum" -filter_bias_and_bn: True momentum: 0.9 weight_decay: 0.0001 loss_scale_type: "dynamic" diff --git a/configs/repvgg/repvgg_b1_ascend.yaml b/configs/repvgg/repvgg_b1_ascend.yaml index b33462022..8329ac589 100644 --- a/configs/repvgg/repvgg_b1_ascend.yaml +++ b/configs/repvgg/repvgg_b1_ascend.yaml @@ -43,7 +43,6 @@ decay_epochs: 235 # optimizer opt: "momentum" -filter_bias_and_bn: True momentum: 0.9 weight_decay: 0.0001 loss_scale_type: "dynamic" diff --git a/configs/repvgg/repvgg_b1g2_ascend.yaml b/configs/repvgg/repvgg_b1g2_ascend.yaml index 28b5bd8c5..c44cbc0ed 100644 --- a/configs/repvgg/repvgg_b1g2_ascend.yaml +++ b/configs/repvgg/repvgg_b1g2_ascend.yaml @@ -43,7 +43,6 @@ decay_epochs: 235 # optimizer opt: "momentum" -filter_bias_and_bn: True momentum: 0.9 weight_decay: 0.0001 loss_scale_type: "dynamic" diff --git a/configs/repvgg/repvgg_b1g4_ascend.yaml b/configs/repvgg/repvgg_b1g4_ascend.yaml index 05e336b03..0c6af8863 100644 --- a/configs/repvgg/repvgg_b1g4_ascend.yaml +++ b/configs/repvgg/repvgg_b1g4_ascend.yaml @@ -43,7 +43,6 @@ decay_epochs: 235 # optimizer opt: "momentum" -filter_bias_and_bn: True momentum: 0.9 weight_decay: 0.0001 loss_scale_type: "dynamic" diff --git a/configs/repvgg/repvgg_b2_ascend.yaml b/configs/repvgg/repvgg_b2_ascend.yaml index e98e3df1d..c0d72b6fa 100644 --- a/configs/repvgg/repvgg_b2_ascend.yaml +++ b/configs/repvgg/repvgg_b2_ascend.yaml @@ -43,7 +43,6 @@ decay_epochs: 235 # optimizer opt: "momentum" -filter_bias_and_bn: True momentum: 0.9 weight_decay: 0.0001 loss_scale_type: "dynamic" diff --git a/configs/repvgg/repvgg_b2g4_ascend.yaml b/configs/repvgg/repvgg_b2g4_ascend.yaml index 324a45fa7..819ba81e6 100644 --- a/configs/repvgg/repvgg_b2g4_ascend.yaml +++ b/configs/repvgg/repvgg_b2g4_ascend.yaml @@ -43,7 +43,6 @@ decay_epochs: 235 # optimizer opt: "momentum" -filter_bias_and_bn: True momentum: 0.9 weight_decay: 0.0001 loss_scale_type: "dynamic" diff --git a/configs/repvgg/repvgg_b3_ascend.yaml b/configs/repvgg/repvgg_b3_ascend.yaml index d870f9c1b..e6dc995fe 100644 --- a/configs/repvgg/repvgg_b3_ascend.yaml +++ b/configs/repvgg/repvgg_b3_ascend.yaml @@ -46,7 +46,6 @@ decay_epochs: 235 # optimizer opt: "momentum" -filter_bias_and_bn: True momentum: 0.9 weight_decay: 0.0001 loss_scale_type: "dynamic" diff --git a/configs/res2net/res2net_101_ascend.yaml b/configs/res2net/res2net_101_ascend.yaml index a645151a0..ab9487df2 100644 --- a/configs/res2net/res2net_101_ascend.yaml +++ b/configs/res2net/res2net_101_ascend.yaml @@ -49,7 +49,6 @@ decay_epochs: 30 # optimizer opt: "momentum" -filter_bias_and_bn: True momentum: 0.9 weight_decay: 0.0001 loss_scale: 1024 diff --git a/configs/res2net/res2net_101_gpu.yaml b/configs/res2net/res2net_101_gpu.yaml index 632d7c29e..d1336cb3f 100644 --- a/configs/res2net/res2net_101_gpu.yaml +++ b/configs/res2net/res2net_101_gpu.yaml @@ -49,7 +49,6 @@ decay_epochs: 295 # optimizer opt: "momentum" -filter_bias_and_bn: True momentum: 0.9 weight_decay: 0.0001 loss_scale: 1024 diff --git a/configs/res2net/res2net_101_v1b_ascend.yaml b/configs/res2net/res2net_101_v1b_ascend.yaml index 5fdbe6fab..65f97ce98 100644 --- a/configs/res2net/res2net_101_v1b_ascend.yaml +++ b/configs/res2net/res2net_101_v1b_ascend.yaml @@ -48,7 +48,6 @@ decay_epochs: 196 # optimizer opt: "momentum" -filter_bias_and_bn: True momentum: 0.9 weight_decay: 0.0001 loss_scale: 1024 diff --git a/configs/res2net/res2net_101_v1b_gpu.yaml b/configs/res2net/res2net_101_v1b_gpu.yaml index 929678bd6..2282b5fb0 100644 --- a/configs/res2net/res2net_101_v1b_gpu.yaml +++ b/configs/res2net/res2net_101_v1b_gpu.yaml @@ -49,7 +49,6 @@ decay_epochs: 295 # optimizer opt: "momentum" -filter_bias_and_bn: True momentum: 0.9 weight_decay: 0.0001 loss_scale: 1024 diff --git a/configs/res2net/res2net_50_ascend.yaml b/configs/res2net/res2net_50_ascend.yaml index a9339b79d..8891f61b1 100644 --- a/configs/res2net/res2net_50_ascend.yaml +++ b/configs/res2net/res2net_50_ascend.yaml @@ -48,7 +48,6 @@ decay_epochs: 196 # optimizer opt: "momentum" -filter_bias_and_bn: True momentum: 0.9 weight_decay: 0.0001 loss_scale: 1024 diff --git a/configs/res2net/res2net_50_gpu.yaml b/configs/res2net/res2net_50_gpu.yaml index e283e00d6..74637a924 100644 --- a/configs/res2net/res2net_50_gpu.yaml +++ b/configs/res2net/res2net_50_gpu.yaml @@ -49,7 +49,6 @@ decay_epochs: 295 # optimizer opt: "momentum" -filter_bias_and_bn: True momentum: 0.9 weight_decay: 0.0001 loss_scale: 1024 diff --git a/configs/res2net/res2net_50_v1b_ascend.yaml b/configs/res2net/res2net_50_v1b_ascend.yaml index e75ce6b81..4f14e7035 100644 --- a/configs/res2net/res2net_50_v1b_ascend.yaml +++ b/configs/res2net/res2net_50_v1b_ascend.yaml @@ -49,7 +49,6 @@ decay_epochs: 295 # optimizer opt: "momentum" -filter_bias_and_bn: True momentum: 0.9 weight_decay: 0.0001 loss_scale: 1024 diff --git a/configs/res2net/res2net_50_v1b_gpu.yaml b/configs/res2net/res2net_50_v1b_gpu.yaml index f404a027c..acd7ff522 100644 --- a/configs/res2net/res2net_50_v1b_gpu.yaml +++ b/configs/res2net/res2net_50_v1b_gpu.yaml @@ -49,7 +49,6 @@ decay_epochs: 295 # optimizer opt: "momentum" -filter_bias_and_bn: True momentum: 0.9 weight_decay: 0.0001 loss_scale: 1024 diff --git a/configs/resnest/resnest101_ascend.yaml b/configs/resnest/resnest101_ascend.yaml index a26319f3a..393536781 100644 --- a/configs/resnest/resnest101_ascend.yaml +++ b/configs/resnest/resnest101_ascend.yaml @@ -50,7 +50,6 @@ decay_epochs: 345 # optimizer opt: "momentum" -filter_bias_and_bn: True momentum: 0.9 weight_decay: 0.00009 loss_scale_type: "auto" diff --git a/configs/resnest/resnest50_ascend.yaml b/configs/resnest/resnest50_ascend.yaml index c558f5c82..3d37a2267 100644 --- a/configs/resnest/resnest50_ascend.yaml +++ b/configs/resnest/resnest50_ascend.yaml @@ -49,7 +49,6 @@ decay_epochs: 345 # optimizer opt: "momentum" -filter_bias_and_bn: True momentum: 0.9 weight_decay: 0.0001 loss_scale_type: "dynamic" diff --git a/configs/resnet/resnet_101_ascend.yaml b/configs/resnet/resnet_101_ascend.yaml index 9022c0246..cdcffef17 100644 --- a/configs/resnet/resnet_101_ascend.yaml +++ b/configs/resnet/resnet_101_ascend.yaml @@ -43,7 +43,6 @@ decay_epochs: 120 # optimizer opt: "momentum" -filter_bias_and_bn: True momentum: 0.9 weight_decay: 0.0001 loss_scale: 1024 diff --git a/configs/resnet/resnet_101_gpu.yaml b/configs/resnet/resnet_101_gpu.yaml index 9022c0246..cdcffef17 100644 --- a/configs/resnet/resnet_101_gpu.yaml +++ b/configs/resnet/resnet_101_gpu.yaml @@ -43,7 +43,6 @@ decay_epochs: 120 # optimizer opt: "momentum" -filter_bias_and_bn: True momentum: 0.9 weight_decay: 0.0001 loss_scale: 1024 diff --git a/configs/resnet/resnet_152_ascend.yaml b/configs/resnet/resnet_152_ascend.yaml index ee01e33ad..7eb2b7b82 100644 --- a/configs/resnet/resnet_152_ascend.yaml +++ b/configs/resnet/resnet_152_ascend.yaml @@ -43,7 +43,6 @@ decay_epochs: 120 # optimizer opt: "momentum" -filter_bias_and_bn: True momentum: 0.9 weight_decay: 0.0001 loss_scale: 1024 diff --git a/configs/resnet/resnet_152_gpu.yaml b/configs/resnet/resnet_152_gpu.yaml index ee01e33ad..7eb2b7b82 100644 --- a/configs/resnet/resnet_152_gpu.yaml +++ b/configs/resnet/resnet_152_gpu.yaml @@ -43,7 +43,6 @@ decay_epochs: 120 # optimizer opt: "momentum" -filter_bias_and_bn: True momentum: 0.9 weight_decay: 0.0001 loss_scale: 1024 diff --git a/configs/resnet/resnet_18_ascend.yaml b/configs/resnet/resnet_18_ascend.yaml index 33b014631..33e08ac32 100644 --- a/configs/resnet/resnet_18_ascend.yaml +++ b/configs/resnet/resnet_18_ascend.yaml @@ -43,7 +43,6 @@ decay_epochs: 120 # optimizer opt: "momentum" -filter_bias_and_bn: True momentum: 0.9 weight_decay: 0.0001 loss_scale: 1024 diff --git a/configs/resnet/resnet_18_gpu.yaml b/configs/resnet/resnet_18_gpu.yaml index 33b014631..33e08ac32 100644 --- a/configs/resnet/resnet_18_gpu.yaml +++ b/configs/resnet/resnet_18_gpu.yaml @@ -43,7 +43,6 @@ decay_epochs: 120 # optimizer opt: "momentum" -filter_bias_and_bn: True momentum: 0.9 weight_decay: 0.0001 loss_scale: 1024 diff --git a/configs/resnet/resnet_34_ascend.yaml b/configs/resnet/resnet_34_ascend.yaml index 8e130cd3f..a9ecc5bae 100644 --- a/configs/resnet/resnet_34_ascend.yaml +++ b/configs/resnet/resnet_34_ascend.yaml @@ -43,7 +43,6 @@ decay_epochs: 120 # optimizer opt: "momentum" -filter_bias_and_bn: True momentum: 0.9 weight_decay: 0.0001 loss_scale: 1024 diff --git a/configs/resnet/resnet_34_gpu.yaml b/configs/resnet/resnet_34_gpu.yaml index 8e130cd3f..a9ecc5bae 100644 --- a/configs/resnet/resnet_34_gpu.yaml +++ b/configs/resnet/resnet_34_gpu.yaml @@ -43,7 +43,6 @@ decay_epochs: 120 # optimizer opt: "momentum" -filter_bias_and_bn: True momentum: 0.9 weight_decay: 0.0001 loss_scale: 1024 diff --git a/configs/resnet/resnet_50_ascend.yaml b/configs/resnet/resnet_50_ascend.yaml index 76f0be6de..2a2d7bbde 100644 --- a/configs/resnet/resnet_50_ascend.yaml +++ b/configs/resnet/resnet_50_ascend.yaml @@ -43,7 +43,6 @@ decay_epochs: 120 # optimizer opt: "momentum" -filter_bias_and_bn: True momentum: 0.9 weight_decay: 0.0001 loss_scale: 1024 diff --git a/configs/resnet/resnet_50_gpu.yaml b/configs/resnet/resnet_50_gpu.yaml index 76f0be6de..2a2d7bbde 100644 --- a/configs/resnet/resnet_50_gpu.yaml +++ b/configs/resnet/resnet_50_gpu.yaml @@ -43,7 +43,6 @@ decay_epochs: 120 # optimizer opt: "momentum" -filter_bias_and_bn: True momentum: 0.9 weight_decay: 0.0001 loss_scale: 1024 diff --git a/configs/resnetv2/resnetv2_101_ascend.yaml b/configs/resnetv2/resnetv2_101_ascend.yaml index 3f72b2e1b..35044d3fd 100644 --- a/configs/resnetv2/resnetv2_101_ascend.yaml +++ b/configs/resnetv2/resnetv2_101_ascend.yaml @@ -44,7 +44,6 @@ decay_epochs: 120 # optimizer opt: "momentum" -filter_bias_and_bn: True momentum: 0.9 weight_decay: 0.0001 loss_scale: 1024 diff --git a/configs/resnetv2/resnetv2_50_ascend.yaml b/configs/resnetv2/resnetv2_50_ascend.yaml index d299e424e..4ecbf9f4a 100644 --- a/configs/resnetv2/resnetv2_50_ascend.yaml +++ b/configs/resnetv2/resnetv2_50_ascend.yaml @@ -44,7 +44,6 @@ decay_epochs: 120 # optimizer opt: "momentum" -filter_bias_and_bn: True momentum: 0.9 weight_decay: 0.0001 loss_scale: 1024 diff --git a/configs/resnext/resnext101_32x4d_ascend.yaml b/configs/resnext/resnext101_32x4d_ascend.yaml index 247af9042..276d3fa7c 100644 --- a/configs/resnext/resnext101_32x4d_ascend.yaml +++ b/configs/resnext/resnext101_32x4d_ascend.yaml @@ -44,7 +44,6 @@ decay_epochs: 150 # optimizer opt: "momentum" -filter_bias_and_bn: True momentum: 0.9 weight_decay: 0.0001 loss_scale: 1024 diff --git a/configs/resnext/resnext101_64x4d_ascend.yaml b/configs/resnext/resnext101_64x4d_ascend.yaml index 25c9b5cb7..651e62ef1 100644 --- a/configs/resnext/resnext101_64x4d_ascend.yaml +++ b/configs/resnext/resnext101_64x4d_ascend.yaml @@ -44,7 +44,6 @@ decay_epochs: 150 # optimizer opt: "momentum" -filter_bias_and_bn: True momentum: 0.9 weight_decay: 0.0001 loss_scale: 1024 diff --git a/configs/resnext/resnext152_64x4d_ascend.yaml b/configs/resnext/resnext152_64x4d_ascend.yaml index 9864bc906..12defbff0 100644 --- a/configs/resnext/resnext152_64x4d_ascend.yaml +++ b/configs/resnext/resnext152_64x4d_ascend.yaml @@ -44,7 +44,6 @@ decay_epochs: 150 # optimizer opt: "momentum" -filter_bias_and_bn: True momentum: 0.9 weight_decay: 0.0001 loss_scale: 1024 diff --git a/configs/resnext/resnext50_32x4d_ascend.yaml b/configs/resnext/resnext50_32x4d_ascend.yaml index 80d461d87..d3eee9284 100644 --- a/configs/resnext/resnext50_32x4d_ascend.yaml +++ b/configs/resnext/resnext50_32x4d_ascend.yaml @@ -44,7 +44,6 @@ decay_epochs: 150 # optimizer opt: "momentum" -filter_bias_and_bn: True momentum: 0.9 weight_decay: 0.0001 loss_scale: 1024 diff --git a/configs/rexnet/rexnet_x09_ascend.yaml b/configs/rexnet/rexnet_x09_ascend.yaml index 615b41a41..d3ed9a0c0 100644 --- a/configs/rexnet/rexnet_x09_ascend.yaml +++ b/configs/rexnet/rexnet_x09_ascend.yaml @@ -45,7 +45,6 @@ decay_epochs: 395 # optimizer opt: "sgd" -filter_bias_and_bn: True momentum: 0.9 weight_decay: 1.0e-5 loss_scale: 1024 diff --git a/configs/rexnet/rexnet_x10_ascend.yaml b/configs/rexnet/rexnet_x10_ascend.yaml index 1439df063..f2637fc38 100644 --- a/configs/rexnet/rexnet_x10_ascend.yaml +++ b/configs/rexnet/rexnet_x10_ascend.yaml @@ -45,7 +45,6 @@ decay_epochs: 395 # optimizer opt: "sgd" -filter_bias_and_bn: True momentum: 0.9 weight_decay: 1.0e-5 loss_scale: 1024 diff --git a/configs/rexnet/rexnet_x13_ascend.yaml b/configs/rexnet/rexnet_x13_ascend.yaml index 38d07d352..ec57a82ff 100644 --- a/configs/rexnet/rexnet_x13_ascend.yaml +++ b/configs/rexnet/rexnet_x13_ascend.yaml @@ -45,7 +45,6 @@ decay_epochs: 395 # optimizer opt: "sgd" -filter_bias_and_bn: True momentum: 0.9 weight_decay: 1.0e-5 loss_scale: 1024 diff --git a/configs/rexnet/rexnet_x15_ascend.yaml b/configs/rexnet/rexnet_x15_ascend.yaml index 2165d8c3f..d5845ad26 100644 --- a/configs/rexnet/rexnet_x15_ascend.yaml +++ b/configs/rexnet/rexnet_x15_ascend.yaml @@ -46,7 +46,6 @@ decay_epochs: 370 # optimizer opt: "sgd" -filter_bias_and_bn: True momentum: 0.9 weight_decay: 1.0e-5 loss_scale: 1024 diff --git a/configs/rexnet/rexnet_x20_ascend.yaml b/configs/rexnet/rexnet_x20_ascend.yaml index b1729e176..023646973 100644 --- a/configs/rexnet/rexnet_x20_ascend.yaml +++ b/configs/rexnet/rexnet_x20_ascend.yaml @@ -46,7 +46,6 @@ decay_epochs: 370 # optimizer opt: "sgd" -filter_bias_and_bn: True momentum: 0.9 weight_decay: 1.0e-5 loss_scale: 1024 diff --git a/configs/senet/seresnet18_ascend.yaml b/configs/senet/seresnet18_ascend.yaml index b437edba6..111f51a9d 100644 --- a/configs/senet/seresnet18_ascend.yaml +++ b/configs/senet/seresnet18_ascend.yaml @@ -44,7 +44,6 @@ decay_epochs: 150 # optimizer opt: "momentum" -filter_bias_and_bn: True momentum: 0.9 weight_decay: 0.0001 loss_scale: 1024 diff --git a/configs/senet/seresnet34_ascend.yaml b/configs/senet/seresnet34_ascend.yaml index 34420bc62..ac49133c6 100644 --- a/configs/senet/seresnet34_ascend.yaml +++ b/configs/senet/seresnet34_ascend.yaml @@ -44,7 +44,6 @@ decay_epochs: 150 # optimizer opt: "momentum" -filter_bias_and_bn: True momentum: 0.9 weight_decay: 0.0001 loss_scale: 1024 diff --git a/configs/senet/seresnet50_ascend.yaml b/configs/senet/seresnet50_ascend.yaml index f1a1849c6..f9b5aa21d 100644 --- a/configs/senet/seresnet50_ascend.yaml +++ b/configs/senet/seresnet50_ascend.yaml @@ -44,7 +44,6 @@ decay_epochs: 150 # optimizer opt: "momentum" -filter_bias_and_bn: True momentum: 0.9 weight_decay: 0.0001 loss_scale: 1024 diff --git a/configs/senet/seresnext26_32x4d_ascend.yaml b/configs/senet/seresnext26_32x4d_ascend.yaml index 18b519c8d..c424f9c46 100644 --- a/configs/senet/seresnext26_32x4d_ascend.yaml +++ b/configs/senet/seresnext26_32x4d_ascend.yaml @@ -44,7 +44,6 @@ decay_epochs: 150 # optimizer opt: "momentum" -filter_bias_and_bn: True momentum: 0.9 weight_decay: 0.0001 loss_scale: 1024 diff --git a/configs/senet/seresnext50_32x4d_ascend.yaml b/configs/senet/seresnext50_32x4d_ascend.yaml index 8c1ec6e89..88d3781ab 100644 --- a/configs/senet/seresnext50_32x4d_ascend.yaml +++ b/configs/senet/seresnext50_32x4d_ascend.yaml @@ -44,7 +44,6 @@ decay_epochs: 150 # optimizer opt: "momentum" -filter_bias_and_bn: True momentum: 0.9 weight_decay: 0.0001 loss_scale: 1024 diff --git a/configs/shufflenetv1/shufflenet_v1_0.5_ascend.yaml b/configs/shufflenetv1/shufflenet_v1_0.5_ascend.yaml index adccfdb9f..56b2dead3 100644 --- a/configs/shufflenetv1/shufflenet_v1_0.5_ascend.yaml +++ b/configs/shufflenetv1/shufflenet_v1_0.5_ascend.yaml @@ -42,7 +42,7 @@ decay_epochs: 246 # optimizer opt: "momentum" -filter_bias_and_bn: False +weight_decay_filter: 'auto' momentum: 0.9 weight_decay: 0.00004 loss_scale: 1024 diff --git a/configs/shufflenetv1/shufflenet_v1_1.0_ascend.yaml b/configs/shufflenetv1/shufflenet_v1_1.0_ascend.yaml index a48a0d28b..85cff636d 100644 --- a/configs/shufflenetv1/shufflenet_v1_1.0_ascend.yaml +++ b/configs/shufflenetv1/shufflenet_v1_1.0_ascend.yaml @@ -42,7 +42,7 @@ decay_epochs: 246 # optimizer opt: "momentum" -filter_bias_and_bn: False +weight_decay_filter: 'auto' momentum: 0.9 weight_decay: 0.00004 loss_scale: 1024 diff --git a/configs/shufflenetv2/shufflenet_v2_0.5_ascend.yaml b/configs/shufflenetv2/shufflenet_v2_0.5_ascend.yaml index 23f6e42fd..29d2752da 100644 --- a/configs/shufflenetv2/shufflenet_v2_0.5_ascend.yaml +++ b/configs/shufflenetv2/shufflenet_v2_0.5_ascend.yaml @@ -44,7 +44,7 @@ decay_epochs: 246 # optimizer opt: 'momentum' -filter_bias_and_bn: False +weight_decay_filter: 'auto' momentum: 0.9 weight_decay: 0.00004 loss_scale: 1024 diff --git a/configs/shufflenetv2/shufflenet_v2_1.0_ascend.yaml b/configs/shufflenetv2/shufflenet_v2_1.0_ascend.yaml index d21403653..76f557221 100644 --- a/configs/shufflenetv2/shufflenet_v2_1.0_ascend.yaml +++ b/configs/shufflenetv2/shufflenet_v2_1.0_ascend.yaml @@ -44,7 +44,7 @@ decay_epochs: 295 # optimizer opt: 'momentum' -filter_bias_and_bn: False +weight_decay_filter: 'auto' momentum: 0.9 weight_decay: 0.00004 loss_scale: 1024 diff --git a/configs/shufflenetv2/shufflenet_v2_1.5_ascend.yaml b/configs/shufflenetv2/shufflenet_v2_1.5_ascend.yaml index a6871768a..4801974e8 100644 --- a/configs/shufflenetv2/shufflenet_v2_1.5_ascend.yaml +++ b/configs/shufflenetv2/shufflenet_v2_1.5_ascend.yaml @@ -44,7 +44,7 @@ decay_epochs: 295 # optimizer opt: 'momentum' -filter_bias_and_bn: False +weight_decay_filter: 'auto' momentum: 0.9 weight_decay: 0.00004 loss_scale: 1024 diff --git a/configs/shufflenetv2/shufflenet_v2_2.0_ascend.yaml b/configs/shufflenetv2/shufflenet_v2_2.0_ascend.yaml index 974149fbe..a8531236b 100644 --- a/configs/shufflenetv2/shufflenet_v2_2.0_ascend.yaml +++ b/configs/shufflenetv2/shufflenet_v2_2.0_ascend.yaml @@ -44,7 +44,7 @@ decay_epochs: 295 # optimizer opt: 'momentum' -filter_bias_and_bn: False +weight_decay_filter: 'auto' momentum: 0.9 weight_decay: 0.00004 loss_scale: 1024 diff --git a/configs/sknet/skresnet18_ascend.yaml b/configs/sknet/skresnet18_ascend.yaml index c8f9fa2a2..97ebfca39 100644 --- a/configs/sknet/skresnet18_ascend.yaml +++ b/configs/sknet/skresnet18_ascend.yaml @@ -44,7 +44,6 @@ decay_epochs: 195 # optimizer opt: "momentum" -filter_bias_and_bn: True momentum: 0.9 weight_decay: 0.00004 loss_scale: 1024 diff --git a/configs/sknet/skresnet34_ascend.yaml b/configs/sknet/skresnet34_ascend.yaml index 22275fc5b..87e62b8a9 100644 --- a/configs/sknet/skresnet34_ascend.yaml +++ b/configs/sknet/skresnet34_ascend.yaml @@ -47,7 +47,6 @@ warmup_factor: 0.01 # optimizer opt: "momentum" -filter_bias_and_bn: True momentum: 0.9 weight_decay: 0.00004 loss_scale: 128 diff --git a/configs/sknet/skresnext50_32x4d_ascend.yaml b/configs/sknet/skresnext50_32x4d_ascend.yaml index 7da59b34a..9e557e558 100644 --- a/configs/sknet/skresnext50_32x4d_ascend.yaml +++ b/configs/sknet/skresnext50_32x4d_ascend.yaml @@ -44,7 +44,6 @@ decay_epochs: 195 # optimizer opt: "momentum" -filter_bias_and_bn: True momentum: 0.9 weight_decay: 0.0001 loss_scale: 1024 diff --git a/configs/squeezenet/squeezenet_1.0_ascend.yaml b/configs/squeezenet/squeezenet_1.0_ascend.yaml index 0179d00cb..cb2df5061 100644 --- a/configs/squeezenet/squeezenet_1.0_ascend.yaml +++ b/configs/squeezenet/squeezenet_1.0_ascend.yaml @@ -45,7 +45,6 @@ decay_epochs: 200 # optimizer opt: 'momentum' -filter_bias_and_bn: True momentum: 0.9 weight_decay: 0.0001 loss_scale: 1024 diff --git a/configs/squeezenet/squeezenet_1.0_gpu.yaml b/configs/squeezenet/squeezenet_1.0_gpu.yaml index 73eab2961..932ec107b 100644 --- a/configs/squeezenet/squeezenet_1.0_gpu.yaml +++ b/configs/squeezenet/squeezenet_1.0_gpu.yaml @@ -45,7 +45,6 @@ decay_epochs: 200 # optimizer opt: 'momentum' -filter_bias_and_bn: True momentum: 0.9 weight_decay: 0.00007 loss_scale: 1024 diff --git a/configs/squeezenet/squeezenet_1.1_ascend.yaml b/configs/squeezenet/squeezenet_1.1_ascend.yaml index d5a6ee90d..790dded67 100644 --- a/configs/squeezenet/squeezenet_1.1_ascend.yaml +++ b/configs/squeezenet/squeezenet_1.1_ascend.yaml @@ -45,7 +45,6 @@ decay_epochs: 200 # optimizer opt: 'momentum' -filter_bias_and_bn: True momentum: 0.9 weight_decay: 0.0002 loss_scale: 1024 diff --git a/configs/squeezenet/squeezenet_1.1_gpu.yaml b/configs/squeezenet/squeezenet_1.1_gpu.yaml index 806783fd8..ee9ffa1a3 100644 --- a/configs/squeezenet/squeezenet_1.1_gpu.yaml +++ b/configs/squeezenet/squeezenet_1.1_gpu.yaml @@ -45,7 +45,6 @@ decay_epochs: 200 # optimizer opt: 'momentum' -filter_bias_and_bn: True momentum: 0.9 weight_decay: 0.00007 loss_scale: 1024 diff --git a/configs/swintransformer/swin_tiny_ascend.yaml b/configs/swintransformer/swin_tiny_ascend.yaml index 86ad0d396..1717a0dac 100644 --- a/configs/swintransformer/swin_tiny_ascend.yaml +++ b/configs/swintransformer/swin_tiny_ascend.yaml @@ -55,5 +55,4 @@ lr_epoch_stair: False # optimizer opt: "adamw" weight_decay: 0.025 -filter_bias_and_bn: True use_nesterov: False diff --git a/configs/swintransformerv2/swinv2_tiny_window8_ascend.yaml b/configs/swintransformerv2/swinv2_tiny_window8_ascend.yaml index bca9c0c00..b381f6679 100644 --- a/configs/swintransformerv2/swinv2_tiny_window8_ascend.yaml +++ b/configs/swintransformerv2/swinv2_tiny_window8_ascend.yaml @@ -49,7 +49,6 @@ warmup_epochs: 20 # optimizer opt: 'adamw' -filter_bias_and_bn: True momentum: 0.9 weight_decay: 0.05 loss_scale: 1024 diff --git a/configs/vgg/vgg11_ascend.yaml b/configs/vgg/vgg11_ascend.yaml index 26c2744a1..effa5a6d2 100644 --- a/configs/vgg/vgg11_ascend.yaml +++ b/configs/vgg/vgg11_ascend.yaml @@ -44,7 +44,6 @@ warmup_epochs: 2 # optimizer opt: 'momentum' -filter_bias_and_bn: True momentum: 0.9 weight_decay: 0.00004 loss_scale: 1024 diff --git a/configs/vgg/vgg13_ascend.yaml b/configs/vgg/vgg13_ascend.yaml index 2f04aed11..419e49b57 100644 --- a/configs/vgg/vgg13_ascend.yaml +++ b/configs/vgg/vgg13_ascend.yaml @@ -44,7 +44,6 @@ warmup_epochs: 2 # optimizer opt: 'momentum' -filter_bias_and_bn: True momentum: 0.9 weight_decay: 0.00004 loss_scale: 1024 diff --git a/configs/vgg/vgg16_ascend.yaml b/configs/vgg/vgg16_ascend.yaml index 07f12f31d..5ddfd2826 100644 --- a/configs/vgg/vgg16_ascend.yaml +++ b/configs/vgg/vgg16_ascend.yaml @@ -44,7 +44,6 @@ warmup_epochs: 2 # optimizer opt: 'momentum' -filter_bias_and_bn: True momentum: 0.9 weight_decay: 0.00004 loss_scale: 1024 diff --git a/configs/vgg/vgg19_ascend.yaml b/configs/vgg/vgg19_ascend.yaml index 0a6aa2a97..c01d97e40 100644 --- a/configs/vgg/vgg19_ascend.yaml +++ b/configs/vgg/vgg19_ascend.yaml @@ -44,7 +44,6 @@ warmup_epochs: 2 # optimizer opt: 'momentum' -filter_bias_and_bn: True momentum: 0.9 weight_decay: 0.00004 loss_scale: 1024 diff --git a/configs/vit/vit_b32_224_ascend.yaml b/configs/vit/vit_b32_224_ascend.yaml index 06d03914b..e7b17e53f 100644 --- a/configs/vit/vit_b32_224_ascend.yaml +++ b/configs/vit/vit_b32_224_ascend.yaml @@ -57,5 +57,4 @@ lr_epoch_stair: False # optimizer opt: "adamw" weight_decay: 0.025 -filter_bias_and_bn: True use_nesterov: False diff --git a/configs/vit/vit_l16_224_ascend.yaml b/configs/vit/vit_l16_224_ascend.yaml index d336788a4..fd1c66ccc 100644 --- a/configs/vit/vit_l16_224_ascend.yaml +++ b/configs/vit/vit_l16_224_ascend.yaml @@ -57,5 +57,4 @@ lr_epoch_stair: False # optimizer opt: "adamw" weight_decay: 0.05 -filter_bias_and_bn: True use_nesterov: False diff --git a/configs/vit/vit_l32_224_ascend.yaml b/configs/vit/vit_l32_224_ascend.yaml index 82084c4c3..511e7f4e7 100644 --- a/configs/vit/vit_l32_224_ascend.yaml +++ b/configs/vit/vit_l32_224_ascend.yaml @@ -57,5 +57,4 @@ lr_epoch_stair: False # optimizer opt: "adamw" weight_decay: 0.025 -filter_bias_and_bn: True use_nesterov: False diff --git a/configs/volo/volo_d1_ascend.yaml b/configs/volo/volo_d1_ascend.yaml index 247817aa6..d7fffd19b 100644 --- a/configs/volo/volo_d1_ascend.yaml +++ b/configs/volo/volo_d1_ascend.yaml @@ -60,7 +60,6 @@ decay_rate: 0.1 opt: 'adamw' weight_decay: 0.05 momentum: 0.9 -filter_bias_and_bn: True loss_scale_type: 'dynamic' loss_scale: 1024 use_nesterov: False diff --git a/configs/volo/volo_d2_ascend.yaml b/configs/volo/volo_d2_ascend.yaml index a62083ffe..5a92ef8bd 100644 --- a/configs/volo/volo_d2_ascend.yaml +++ b/configs/volo/volo_d2_ascend.yaml @@ -60,7 +60,6 @@ decay_rate: 0.1 opt: 'adamw' weight_decay: 0.05 momentum: 0.9 -filter_bias_and_bn: True loss_scale_type: 'dynamic' loss_scale: 2048 use_nesterov: False diff --git a/configs/volo/volo_d3_ascend.yaml b/configs/volo/volo_d3_ascend.yaml index 60e67e68d..ce15f75b1 100644 --- a/configs/volo/volo_d3_ascend.yaml +++ b/configs/volo/volo_d3_ascend.yaml @@ -61,7 +61,6 @@ decay_rate: 0.1 opt: 'adamw' weight_decay: 0.05 momentum: 0.9 -filter_bias_and_bn: True loss_scale_type: 'dynamic' loss_scale: 1024 use_nesterov: False diff --git a/configs/volo/volo_d4_ascend.yaml b/configs/volo/volo_d4_ascend.yaml index 72e70023b..a2c300da3 100644 --- a/configs/volo/volo_d4_ascend.yaml +++ b/configs/volo/volo_d4_ascend.yaml @@ -60,7 +60,6 @@ decay_rate: 0.1 opt: 'adamw' weight_decay: 0.05 momentum: 0.9 -filter_bias_and_bn: True loss_scale_type: 'dynamic' loss_scale: 1024 use_nesterov: False diff --git a/configs/xception/xception_ascend.yaml b/configs/xception/xception_ascend.yaml index 03db59d47..907d5809e 100644 --- a/configs/xception/xception_ascend.yaml +++ b/configs/xception/xception_ascend.yaml @@ -46,7 +46,6 @@ warmup_epochs: 5 # optimizer opt: 'sgd' -filter_bias_and_bn: True momentum: 0.9 weight_decay: 0.00001 loss_scale: 1024 diff --git a/configs/xcit/xcit_tiny_12_p16_ascend.yaml b/configs/xcit/xcit_tiny_12_p16_ascend.yaml index 4307ef3ea..898f86573 100644 --- a/configs/xcit/xcit_tiny_12_p16_ascend.yaml +++ b/configs/xcit/xcit_tiny_12_p16_ascend.yaml @@ -52,7 +52,6 @@ decay_rate: 0.1 # optimizer opt: 'adamw' -filter_bias_and_bn: True weight_decay: 0.05 loss_scale: 1024 use_nesterov: False diff --git a/docs/en/tutorials/configuration.md b/docs/en/tutorials/configuration.md index d91325a41..661eb059b 100644 --- a/docs/en/tutorials/configuration.md +++ b/docs/en/tutorials/configuration.md @@ -354,7 +354,7 @@ Let's use squeezenet_1.0 model as an example to explain how to configure the cor - opt: name of optimizer. - - filter_bias_and_bn: filter Bias and BatchNorm. + - weight_decay_filter: weight decay filter (filter parameters from weight decay). - momentum: Hyperparameter of type float, means momentum for the moving average. @@ -368,7 +368,7 @@ Let's use squeezenet_1.0 model as an example to explain how to configure the cor ```yaml opt: 'momentum' - filter_bias_and_bn: True + weight_decay_filter: 'norm_and_bias' momentum: 0.9 weight_decay: 0.00007 loss_scale: 1024 @@ -379,7 +379,7 @@ Let's use squeezenet_1.0 model as an example to explain how to configure the cor 3. Parse parameter setting ```shell - python train.py ... --opt momentum --filter_bias_and_bn True --weight_decay 0.00007 \ + python train.py ... --opt momentum --weight_decay_filter 'norm_and_bias' --weight_decay 0.00007 \ --loss_scale 1024 --use_nesterov False ... ``` @@ -395,7 +395,7 @@ Let's use squeezenet_1.0 model as an example to explain how to configure the cor weight_decay=args.weight_decay, momentum=args.momentum, nesterov=args.use_nesterov, - filter_bias_and_bn=args.filter_bias_and_bn, + weight_decay_filter=args.weight_decay_filter, loss_scale=args.loss_scale, checkpoint_path=opt_ckpt_path, eps=args.eps @@ -407,7 +407,7 @@ Let's use squeezenet_1.0 model as an example to explain how to configure the cor weight_decay=args.weight_decay, momentum=args.momentum, nesterov=args.use_nesterov, - filter_bias_and_bn=args.filter_bias_and_bn, + weight_decay_filter=args.weight_decay_filter, checkpoint_path=opt_ckpt_path, eps=args.eps ) diff --git a/docs/zh/tutorials/configuration.md b/docs/zh/tutorials/configuration.md index c8f2ea28a..e24242f44 100644 --- a/docs/zh/tutorials/configuration.md +++ b/docs/zh/tutorials/configuration.md @@ -354,7 +354,7 @@ - opt:优化器名称。 - - filter_bias_and_bn:参数中是否包含bias,gamma或者beta。 + - weight_decay_filter:权重衰减过滤器 (过滤一些参数, 使其在跟新时不做权重衰减)。 - momentum:移动平均的动量。 @@ -368,7 +368,7 @@ ```yaml opt: 'momentum' - filter_bias_and_bn: True + weight_decay_filter: 'norm_and_bias' momentum: 0.9 weight_decay: 0.00007 loss_scale: 1024 @@ -379,7 +379,7 @@ 3. parse参数设置 ```shell - python train.py ... --opt momentum --filter_bias_and_bn True --weight_decay 0.00007 \ + python train.py ... --opt momentum --weight_decay_filter 'norm_and_bias" --weight_decay 0.00007 \ --loss_scale 1024 --use_nesterov False ... ``` @@ -395,7 +395,7 @@ weight_decay=args.weight_decay, momentum=args.momentum, nesterov=args.use_nesterov, - filter_bias_and_bn=args.filter_bias_and_bn, + weight_decay_filter=args.weight_decay_filter, loss_scale=args.loss_scale, checkpoint_path=opt_ckpt_path, eps=args.eps @@ -407,7 +407,7 @@ weight_decay=args.weight_decay, momentum=args.momentum, nesterov=args.use_nesterov, - filter_bias_and_bn=args.filter_bias_and_bn, + weight_decay_filter=args.weight_decay_filter, checkpoint_path=opt_ckpt_path, eps=args.eps ) diff --git a/examples/finetune/finetune.py b/examples/finetune/finetune.py index d3af1d87a..966651108 100644 --- a/examples/finetune/finetune.py +++ b/examples/finetune/finetune.py @@ -283,7 +283,7 @@ def finetune_train(args): weight_decay=args.weight_decay, momentum=args.momentum, nesterov=args.use_nesterov, - filter_bias_and_bn=args.filter_bias_and_bn, + weight_decay_filter=args.weight_decay_filter, loss_scale=optimizer_loss_scale, checkpoint_path=opt_ckpt_path, eps=args.eps, diff --git a/examples/seg/deeplabv3/config/deeplabv3_s16_dilated_resnet101.yaml b/examples/seg/deeplabv3/config/deeplabv3_s16_dilated_resnet101.yaml index 8ddd0b13b..186175c48 100644 --- a/examples/seg/deeplabv3/config/deeplabv3_s16_dilated_resnet101.yaml +++ b/examples/seg/deeplabv3/config/deeplabv3_s16_dilated_resnet101.yaml @@ -51,7 +51,7 @@ drop_overflow_update: False loss_scale: 3072.0 momentum: 0.9 weight_decay: 0.0001 -filter_bias_and_bn: False +weight_decay_filter": 'auto' gradient_accumulation_steps: 1 # callbacks diff --git a/examples/seg/deeplabv3/config/deeplabv3_s8_dilated_resnet101.yaml b/examples/seg/deeplabv3/config/deeplabv3_s8_dilated_resnet101.yaml index 626774f79..be44da9cf 100644 --- a/examples/seg/deeplabv3/config/deeplabv3_s8_dilated_resnet101.yaml +++ b/examples/seg/deeplabv3/config/deeplabv3_s8_dilated_resnet101.yaml @@ -51,7 +51,7 @@ drop_overflow_update: False loss_scale: 2048.0 momentum: 0.9 weight_decay: 0.0001 -filter_bias_and_bn: False +weight_decay_filter": 'auto' gradient_accumulation_steps: 1 # callbacks diff --git a/examples/seg/deeplabv3/config/deeplabv3plus_s16_dilated_resnet101.yaml b/examples/seg/deeplabv3/config/deeplabv3plus_s16_dilated_resnet101.yaml index 9fec9af09..3dfcf277c 100644 --- a/examples/seg/deeplabv3/config/deeplabv3plus_s16_dilated_resnet101.yaml +++ b/examples/seg/deeplabv3/config/deeplabv3plus_s16_dilated_resnet101.yaml @@ -51,7 +51,7 @@ drop_overflow_update: False loss_scale: 3072.0 momentum: 0.9 weight_decay: 0.0001 -filter_bias_and_bn: False +weight_decay_filter": 'auto' gradient_accumulation_steps: 1 # callbacks diff --git a/examples/seg/deeplabv3/config/deeplabv3plus_s8_dilated_resnet101.yaml b/examples/seg/deeplabv3/config/deeplabv3plus_s8_dilated_resnet101.yaml index 7e97d7910..73f0e8881 100644 --- a/examples/seg/deeplabv3/config/deeplabv3plus_s8_dilated_resnet101.yaml +++ b/examples/seg/deeplabv3/config/deeplabv3plus_s8_dilated_resnet101.yaml @@ -51,7 +51,7 @@ drop_overflow_update: False loss_scale: 2048.0 momentum: 0.9 weight_decay: 0.0001 -filter_bias_and_bn: False +weight_decay_filter": 'auto' gradient_accumulation_steps: 1 # callbacks diff --git a/examples/seg/deeplabv3/train.py b/examples/seg/deeplabv3/train.py index 9b5ff1179..d0c021c01 100644 --- a/examples/seg/deeplabv3/train.py +++ b/examples/seg/deeplabv3/train.py @@ -118,7 +118,7 @@ def train(args): lr=lr_scheduler, weight_decay=args.weight_decay, momentum=args.momentum, - filter_bias_and_bn=args.filter_bias_and_bn, + weight_decay_filter=args.weight_decay_filter, loss_scale=optimizer_loss_scale, ) diff --git a/mindcv/optim/optim_factory.py b/mindcv/optim/optim_factory.py index 7fe6bf282..9d5e54ccc 100644 --- a/mindcv/optim/optim_factory.py +++ b/mindcv/optim/optim_factory.py @@ -1,4 +1,5 @@ """ optim factory """ +import logging import os from typing import Optional @@ -11,11 +12,18 @@ __all__ = ["create_optimizer"] +_logger = logging.getLogger(__name__) + + +def init_group_params(params, weight_decay, weight_decay_filter): + if weight_decay_filter == "disable": + return [ + {"params": params, "weight_decay": weight_decay}, + {"order_params": params}, + ] -def init_group_params(params, weight_decay): decay_params = [] no_decay_params = [] - for param in params: if "beta" not in param.name and "gamma" not in param.name and "bias" not in param.name: decay_params.append(param) @@ -23,7 +31,7 @@ def init_group_params(params, weight_decay): no_decay_params.append(param) return [ {"params": decay_params, "weight_decay": weight_decay}, - {"params": no_decay_params}, + {"params": no_decay_params, "weight_decay": 0.0}, {"order_params": params}, ] @@ -35,7 +43,7 @@ def create_optimizer( weight_decay: float = 0, momentum: float = 0.9, nesterov: bool = False, - filter_bias_and_bn: bool = True, + weight_decay_filter: str = "disable", loss_scale: float = 1.0, schedule_decay: float = 4e-3, checkpoint_path: str = "", @@ -59,8 +67,11 @@ def create_optimizer( of current step. Default: 0. momentum: momentum if the optimizer supports. Default: 0.9. nesterov: Whether to use Nesterov Accelerated Gradient (NAG) algorithm to update the gradients. Default: False. - filter_bias_and_bn: whether to filter batch norm parameters and bias from weight decay. - If True, weight decay will not apply on BN parameters and bias in Conv or Dense layers. Default: True. + weight_decay_filter: filters to filter parameters from weight_decay. + - "disable": No parameters to filter. + - "auto": We do not apply weight decay filtering to any parameters. However, MindSpore currently + automatically filters the parameters of Norm layer from weight decay. + - "norm_and_bias": Filter the paramters of Norm layer and Bias from weight decay. loss_scale: A floating point value for the loss scale, which must be larger than 0.0. Default: 1.0. Returns: @@ -68,9 +79,19 @@ def create_optimizer( """ opt = opt.lower() - - if weight_decay and filter_bias_and_bn: - params = init_group_params(params, weight_decay) + if weight_decay_filter == "auto": + _logger.warning( + "You are using AUTO weight decay filter, which means the weight decay filter isn't explicitly pass in " + "when creating an mindspore.nn.Optimizer instance. " + "NOTE: mindspore.nn.Optimizer will filter Norm parmas from weight decay. " + ) + elif weight_decay_filter == "disable" or "norm_and_bias": + params = init_group_params(params, weight_decay, weight_decay_filter) + weight_decay = 0.0 + else: + raise ValueError( + f"weight decay filter only support ['disable', 'auto', 'norm_and_bias'], but got{weight_decay_filter}." + ) opt_args = dict(**kwargs) # if lr is not None: diff --git a/tests/modules/parallel/test_parallel_optim.py b/tests/modules/parallel/test_parallel_optim.py index 651f36c4d..9a2bf6616 100644 --- a/tests/modules/parallel/test_parallel_optim.py +++ b/tests/modules/parallel/test_parallel_optim.py @@ -44,8 +44,8 @@ def construct(self, x): @pytest.mark.parametrize("opt", ["sgd", "momentum"]) @pytest.mark.parametrize("nesterov", [True, False]) -@pytest.mark.parametrize("filter_bias_and_bn", [True, False]) -def test_sgd_optimizer(opt, nesterov, filter_bias_and_bn): +@pytest.mark.parametrize("weight_decay_filter", ["auto", "disable", "norm_and_bias"]) +def test_sgd_optimizer(opt, nesterov, weight_decay_filter): init("nccl") device_num = get_group_size() rank_id = get_rank() # noqa: F841 @@ -64,7 +64,7 @@ def test_sgd_optimizer(opt, nesterov, filter_bias_and_bn): weight_decay=1e-5, momentum=0.9, nesterov=nesterov, - filter_bias_and_bn=filter_bias_and_bn, + weight_decay_filter=weight_decay_filter, ) bs = 8 @@ -227,7 +227,7 @@ def test_param_lr_001_filter_bias_and_bn_optimizer(): weight_decay=1e-5, momentum=0.9, nesterov=False, - filter_bias_and_bn=False, + weight_decay_filter="auto", ) bs = 8 @@ -273,7 +273,7 @@ def test_param_lr_0001_filter_bias_and_bn_optimizer(): weight_decay=1e-5, momentum=0.9, nesterov=False, - filter_bias_and_bn=False, + weight_decay_filter="auto", ) bs = 8 @@ -315,7 +315,7 @@ def test_wrong_momentum_optimizer(momentum): momentum=momentum, loss_scale=1.0, nesterov=False, - filter_bias_and_bn=True, + weight_decay_filter="disable", ) bs = 8 @@ -357,7 +357,7 @@ def test_wrong_loss_scale_optimizer(loss_scale): momentum=0.9, loss_scale=loss_scale, nesterov=False, - filter_bias_and_bn=True, + weight_decay_filter="disable", ) bs = 8 @@ -400,7 +400,7 @@ def test_wrong_weight_decay_optimizer(weight_decay): momentum=0.9, loss_scale=1.0, nesterov=False, - filter_bias_and_bn=True, + weight_decay_filter="disable", ) bs = 8 @@ -442,7 +442,7 @@ def test_wrong_lr_optimizer(lr): momentum=0.9, loss_scale=1.0, nesterov=False, - filter_bias_and_bn=True, + weight_decay_filter="disable", ) bs = 8 @@ -488,7 +488,7 @@ def test_param_lr_01_filter_bias_and_bn_optimizer(): weight_decay=1e-5, momentum=0.9, nesterov=False, - filter_bias_and_bn=False, + weight_decay_filter="auto", ) bs = 8 @@ -530,7 +530,7 @@ def test_wrong_opt_optimizer(opt): momentum=0.9, loss_scale=1.0, nesterov=False, - filter_bias_and_bn=True, + weight_decay_filter="disable", ) bs = 8 @@ -579,7 +579,7 @@ def test_wrong_params_more_optimizer(): momentum=0.9, loss_scale=1.0, nesterov=False, - filter_bias_and_bn=False, + weight_decay_filter="auto", ) bs = 8 @@ -627,7 +627,7 @@ def test_wrong_params_input_optimizer(): momentum=0.9, loss_scale=1.0, nesterov=False, - filter_bias_and_bn=False, + weight_decay_filter="auto", ) bs = 8 @@ -681,7 +681,7 @@ def test_mode_mult_single_optimizer(mode): weight_decay=1e-5, momentum=0.9, nesterov=False, - filter_bias_and_bn=False, + weight_decay_filter="auto", ) bs = 8 diff --git a/tests/modules/test_optim.py b/tests/modules/test_optim.py index 007cbb87f..56bc61244 100644 --- a/tests/modules/test_optim.py +++ b/tests/modules/test_optim.py @@ -43,8 +43,8 @@ def construct(self, x): @pytest.mark.parametrize("opt", ["sgd", "momentum"]) @pytest.mark.parametrize("nesterov", [True, False]) -@pytest.mark.parametrize("filter_bias_and_bn", [True, False]) -def test_sgd_optimizer(opt, nesterov, filter_bias_and_bn): +@pytest.mark.parametrize("weight_decay_filter", ["disable", "auto", "norm_and_bias"]) +def test_sgd_optimizer(opt, nesterov, weight_decay_filter): network = SimpleCNN(in_channels=1, num_classes=10) net_loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction="mean") @@ -55,7 +55,7 @@ def test_sgd_optimizer(opt, nesterov, filter_bias_and_bn): weight_decay=1e-5, momentum=0.9, nesterov=nesterov, - filter_bias_and_bn=filter_bias_and_bn, + weight_decay_filter=weight_decay_filter, ) bs = 8 @@ -171,7 +171,7 @@ def test_param_lr_001_filter_bias_and_bn_optimizer(): ] net_loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction="mean") net_opt = create_optimizer( - group_params, "adamW", lr=0.01, weight_decay=1e-5, momentum=0.9, nesterov=False, filter_bias_and_bn=False + group_params, "adamW", lr=0.01, weight_decay=1e-5, momentum=0.9, nesterov=False, weight_decay_filter="auto" ) bs = 8 @@ -203,7 +203,7 @@ def test_param_lr_0001_filter_bias_and_bn_optimizer(): ] net_loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction="mean") net_opt = create_optimizer( - group_params, "adamW", lr=0.01, weight_decay=1e-5, momentum=0.9, nesterov=False, filter_bias_and_bn=False + group_params, "adamW", lr=0.01, weight_decay=1e-5, momentum=0.9, nesterov=False, weight_decay_filter="auto" ) bs = 8 @@ -237,7 +237,7 @@ def test_wrong_momentum_optimizer(momentum): momentum=momentum, loss_scale=1.0, nesterov=False, - filter_bias_and_bn=True, + weight_decay_filter="disable", ) bs = 8 @@ -271,7 +271,7 @@ def test_wrong_loss_scale_optimizer(loss_scale): momentum=0.9, loss_scale=loss_scale, nesterov=False, - filter_bias_and_bn=True, + weight_decay_filter="disable", ) bs = 8 @@ -306,7 +306,7 @@ def test_wrong_weight_decay_optimizer(weight_decay): momentum=0.9, loss_scale=1.0, nesterov=False, - filter_bias_and_bn=True, + weight_decay_filter="disable", ) bs = 8 @@ -340,7 +340,7 @@ def test_wrong_lr_optimizer(lr): momentum=0.9, loss_scale=1.0, nesterov=False, - filter_bias_and_bn=True, + weight_decay_filter="disable", ) bs = 8 @@ -372,7 +372,7 @@ def test_param_lr_01_filter_bias_and_bn_optimizer(): ] net_loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction="mean") net_opt = create_optimizer( - group_params, "momentum", lr=0.01, weight_decay=1e-5, momentum=0.9, nesterov=False, filter_bias_and_bn=False + group_params, "momentum", lr=0.01, weight_decay=1e-5, momentum=0.9, nesterov=False, weight_decay_filter="auto" ) bs = 8 @@ -406,7 +406,7 @@ def test_wrong_opt_optimizer(opt): momentum=0.9, loss_scale=1.0, nesterov=False, - filter_bias_and_bn=True, + weight_decay_filter="disable", ) bs = 8 @@ -447,7 +447,7 @@ def test_wrong_params_more_optimizer(): momentum=0.9, loss_scale=1.0, nesterov=False, - filter_bias_and_bn=False, + weight_decay_filter="auto", ) bs = 8 @@ -487,7 +487,7 @@ def test_wrong_params_input_optimizer(): momentum=0.9, loss_scale=1.0, nesterov=False, - filter_bias_and_bn=False, + weight_decay_filter="auto", ) bs = 8 @@ -527,7 +527,7 @@ def test_mode_mult_single_optimizer(mode): ] net_loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction="mean") net_opt = create_optimizer( - group_params, "momentum", lr=0.01, weight_decay=1e-5, momentum=0.9, nesterov=False, filter_bias_and_bn=False + group_params, "momentum", lr=0.01, weight_decay=1e-5, momentum=0.9, nesterov=False, weight_decay_filter="auto" ) bs = 8 diff --git a/train.py b/train.py index 16d943de1..0406d90a1 100644 --- a/train.py +++ b/train.py @@ -216,7 +216,7 @@ def main(): weight_decay=args.weight_decay, momentum=args.momentum, nesterov=args.use_nesterov, - filter_bias_and_bn=args.filter_bias_and_bn, + weight_decay_filter=args.weight_decay_filter, loss_scale=optimizer_loss_scale, checkpoint_path=opt_ckpt_path, eps=args.eps, diff --git a/train_with_func.py b/train_with_func.py index 4b64e9345..f9040c3e0 100644 --- a/train_with_func.py +++ b/train_with_func.py @@ -227,7 +227,7 @@ def main(): weight_decay=args.weight_decay, momentum=args.momentum, nesterov=args.use_nesterov, - filter_bias_and_bn=args.filter_bias_and_bn, + weight_decay_filter=args.weight_decay_filter, loss_scale=1.0, eps=args.eps, ) From a086e4e881c0085bf7056f6ea4b7858a9aad9b1d Mon Sep 17 00:00:00 2001 From: hanhuiyu1996 Date: Thu, 4 Jan 2024 20:42:29 +0800 Subject: [PATCH 2/3] add get no_weight_decay layer form model when filter layers from weight decay in optim_factory --- mindcv/optim/optim_factory.py | 25 ++++++++++++++++++------- 1 file changed, 18 insertions(+), 7 deletions(-) diff --git a/mindcv/optim/optim_factory.py b/mindcv/optim/optim_factory.py index 9d5e54ccc..5d8c14504 100644 --- a/mindcv/optim/optim_factory.py +++ b/mindcv/optim/optim_factory.py @@ -15,7 +15,7 @@ _logger = logging.getLogger(__name__) -def init_group_params(params, weight_decay, weight_decay_filter): +def init_group_params(params, weight_decay, weight_decay_filter, no_weight_decay): if weight_decay_filter == "disable": return [ {"params": params, "weight_decay": weight_decay}, @@ -24,11 +24,12 @@ def init_group_params(params, weight_decay, weight_decay_filter): decay_params = [] no_decay_params = [] + no_weight_decay = set(no_weight_decay) for param in params: - if "beta" not in param.name and "gamma" not in param.name and "bias" not in param.name: - decay_params.append(param) - else: + if "beta" in param.name or "gamma" in param.name or "bias" in param.name or param.name in no_weight_decay: no_decay_params.append(param) + else: + decay_params.append(param) return [ {"params": decay_params, "weight_decay": weight_decay}, {"params": no_decay_params, "weight_decay": 0.0}, @@ -37,7 +38,7 @@ def init_group_params(params, weight_decay, weight_decay_filter): def create_optimizer( - params, + model_or_params, opt: str = "adam", lr: Optional[float] = 1e-3, weight_decay: float = 0, @@ -78,7 +79,16 @@ def create_optimizer( Optimizer object """ - opt = opt.lower() + no_weight_decay = {} + if isinstance(model_or_params, nn.Cell): + # a model was passed in, extract parameters and add weight decays to appropriate layers + if hasattr(model_or_params, "no_weight_decay"): + no_weight_decay = model_or_params.no_weight_decay() + params = model_or_params.trainable_params() + + else: + params = model_or_params + if weight_decay_filter == "auto": _logger.warning( "You are using AUTO weight decay filter, which means the weight decay filter isn't explicitly pass in " @@ -86,13 +96,14 @@ def create_optimizer( "NOTE: mindspore.nn.Optimizer will filter Norm parmas from weight decay. " ) elif weight_decay_filter == "disable" or "norm_and_bias": - params = init_group_params(params, weight_decay, weight_decay_filter) + params = init_group_params(params, weight_decay, weight_decay_filter, no_weight_decay) weight_decay = 0.0 else: raise ValueError( f"weight decay filter only support ['disable', 'auto', 'norm_and_bias'], but got{weight_decay_filter}." ) + opt = opt.lower() opt_args = dict(**kwargs) # if lr is not None: # opt_args.setdefault('lr', lr) From 4b82c0bbdf92e535dbc23495df021519c5428fa4 Mon Sep 17 00:00:00 2001 From: hanhuiyu1996 Date: Thu, 4 Jan 2024 20:44:14 +0800 Subject: [PATCH 3/3] support layer_decay in optim_factory --- config.py | 2 + mindcv/optim/optim_factory.py | 169 +++++++++++++++++++++++++++++++++- train.py | 3 +- 3 files changed, 169 insertions(+), 5 deletions(-) diff --git a/config.py b/config.py index 0b4028afe..06bbabef0 100644 --- a/config.py +++ b/config.py @@ -167,6 +167,8 @@ def create_parser(): help='Whether use clip grad (default=False)') group.add_argument('--clip_value', type=float, default=15.0, help='Clip value (default=15.0)') + group.add_argument('--layer_decay', type=float, default=None, + help='layer-wise learning rate decay (default: None)') group.add_argument('--gradient_accumulation_steps', type=int, default=1, help="Accumulate the gradients of n batches before update.") diff --git a/mindcv/optim/optim_factory.py b/mindcv/optim/optim_factory.py index 5d8c14504..01724f5da 100644 --- a/mindcv/optim/optim_factory.py +++ b/mindcv/optim/optim_factory.py @@ -1,7 +1,11 @@ """ optim factory """ +import collections import logging import os -from typing import Optional +import re +from collections import defaultdict +from itertools import chain, islice +from typing import Any, Callable, Dict, Iterator, Optional, Tuple, Union from mindspore import load_checkpoint, load_param_into_net, nn @@ -37,6 +41,152 @@ def init_group_params(params, weight_decay, weight_decay_filter, no_weight_decay ] +def param_groups_layer_decay( + model: nn.Cell, + lr: Optional[float] = 1e-3, + weight_decay: float = 0.05, + no_weight_decay_list: Tuple[str] = (), + layer_decay: float = 0.75, +): + """ + Parameter groups for layer-wise lr decay & weight decay + """ + no_weight_decay_list = set(no_weight_decay_list) + param_group_names = {} # NOTE for debugging + param_groups = {} + if hasattr(model, "group_matcher"): + layer_map = group_with_matcher(model.trainable_params(), model.group_matcher(coarse=False), reverse=True) + else: + layer_map = _layer_map(model) + + num_layers = max(layer_map.values()) + 1 + layer_max = num_layers - 1 + layer_scales = list(layer_decay ** (layer_max - i) for i in range(num_layers)) + + for name, param in model.parameters_and_names(): + if not param.requires_grad: + continue + + # no decay: all 1D parameters and model specific ones + if param.ndim == 1 or name in no_weight_decay_list: + g_decay = "no_decay" + this_decay = 0.0 + else: + g_decay = "decay" + this_decay = weight_decay + + layer_id = layer_map.get(name, layer_max) + group_name = "layer_%d_%s" % (layer_id, g_decay) + + if group_name not in param_groups: + this_scale = layer_scales[layer_id] + param_group_names[group_name] = { + "lr": [learning_rate * this_scale for learning_rate in lr], + "weight_decay": this_decay, + "param_names": [], + } + param_groups[group_name] = { + "lr": [learning_rate * this_scale for learning_rate in lr], + "weight_decay": this_decay, + "params": [], + } + + param_group_names[group_name]["param_names"].append(name) + param_groups[group_name]["params"].append(param) + + return list(param_groups.values()) + + +MATCH_PREV_GROUP = (99999,) + + +def group_with_matcher( + named_objects: Iterator[Tuple[str, Any]], group_matcher: Union[Dict, Callable], reverse: bool = False +): + if isinstance(group_matcher, dict): + # dictionary matcher contains a dict of raw-string regex expr that must be compiled + compiled = [] + for group_ordinal, (_, mspec) in enumerate(group_matcher.items()): + if mspec is None: + continue + # map all matching specifications into 3-tuple (compiled re, prefix, suffix) + if isinstance(mspec, (tuple, list)): + # multi-entry match specifications require each sub-spec to be a 2-tuple (re, suffix) + for sspec in mspec: + compiled += [(re.compile(sspec[0]), (group_ordinal,), sspec[1])] + else: + compiled += [(re.compile(mspec), (group_ordinal,), None)] + group_matcher = compiled + + def _get_grouping(name): + if isinstance(group_matcher, (list, tuple)): + for match_fn, prefix, suffix in group_matcher: + r = match_fn.match(name) + if r: + parts = (prefix, r.groups(), suffix) + # map all tuple elem to int for numeric sort, filter out None entries + return tuple(map(float, chain.from_iterable(filter(None, parts)))) + return (float("inf"),) # un-matched layers (neck, head) mapped to largest ordinal + else: + ord = group_matcher(name) + if not isinstance(ord, collections.abc.Iterable): + return (ord,) + return tuple(ord) + + grouping = defaultdict(list) + for param in named_objects: + grouping[_get_grouping(param.name)].append(param.name) + # remap to integers + layer_id_to_param = defaultdict(list) + lid = -1 + for k in sorted(filter(lambda x: x is not None, grouping.keys())): + if lid < 0 or k[-1] != MATCH_PREV_GROUP[0]: + lid += 1 + layer_id_to_param[lid].extend(grouping[k]) + + if reverse: + # output reverse mapping + param_to_layer_id = {} + for lid, lm in layer_id_to_param.items(): + for n in lm: + param_to_layer_id[n] = lid + return param_to_layer_id + + return layer_id_to_param + + +def _group(it, size): + it = iter(it) + return iter(lambda: tuple(islice(it, size)), ()) + + +def _layer_map(model, layers_per_group=12, num_groups=None): + def _in_head(n, hp): + if not hp: + return True + elif isinstance(hp, (tuple, list)): + return any([n.startswith(hpi) for hpi in hp]) + else: + return n.startswith(hp) + + # attention: need to add pretrained_cfg attr to model + head_prefix = getattr(model, "pretrained_cfg", {}).get("classifier", None) + names_trunk = [] + names_head = [] + for n, _ in model.parameters_and_names(): + names_head.append(n) if _in_head(n, head_prefix) else names_trunk.append(n) + + # group non-head layers + num_trunk_layers = len(names_trunk) + if num_groups is not None: + layers_per_group = -(num_trunk_layers // -num_groups) + names_trunk = list(_group(names_trunk, layers_per_group)) + num_trunk_groups = len(names_trunk) + layer_map = {n: i for i, l in enumerate(names_trunk) for n in l} + layer_map.update({n: num_trunk_groups for n in names_head}) + return layer_map + + def create_optimizer( model_or_params, opt: str = "adam", @@ -45,6 +195,7 @@ def create_optimizer( momentum: float = 0.9, nesterov: bool = False, weight_decay_filter: str = "disable", + layer_decay: Optional[float] = None, loss_scale: float = 1.0, schedule_decay: float = 4e-3, checkpoint_path: str = "", @@ -54,9 +205,9 @@ def create_optimizer( r"""Creates optimizer by name. Args: - params: network parameters. Union[list[Parameter],list[dict]], which must be the list of parameters - or list of dicts. When the list element is a dictionary, the key of the dictionary can be - "params", "lr", "weight_decay","grad_centralization" and "order_params". + model_or_params: network or network parameters. Union[list[Parameter],list[dict], nn.Cell], which must be + the list of parameters or list of dicts or nn.Cell. When the list element is a dictionary, the key of + the dictionary can be "params", "lr", "weight_decay","grad_centralization" and "order_params". opt: wrapped optimizer. You could choose like 'sgd', 'nesterov', 'momentum', 'adam', 'adamw', 'lion', 'rmsprop', 'adagrad', 'lamb'. 'adam' is the default choose for convolution-based networks. 'adamw' is recommended for ViT-based networks. Default: 'adam'. @@ -73,6 +224,7 @@ def create_optimizer( - "auto": We do not apply weight decay filtering to any parameters. However, MindSpore currently automatically filters the parameters of Norm layer from weight decay. - "norm_and_bias": Filter the paramters of Norm layer and Bias from weight decay. + layer_decay: for apply layer-wise learning rate decay. loss_scale: A floating point value for the loss scale, which must be larger than 0.0. Default: 1.0. Returns: @@ -95,6 +247,15 @@ def create_optimizer( "when creating an mindspore.nn.Optimizer instance. " "NOTE: mindspore.nn.Optimizer will filter Norm parmas from weight decay. " ) + elif layer_decay is not None and isinstance(model_or_params, nn.Cell): + params = param_groups_layer_decay( + model_or_params, + lr=lr, + weight_decay=weight_decay, + layer_decay=layer_decay, + no_weight_decay_list=no_weight_decay, + ) + weight_decay = 0.0 elif weight_decay_filter == "disable" or "norm_and_bias": params = init_group_params(params, weight_decay, weight_decay_filter, no_weight_decay) weight_decay = 0.0 diff --git a/train.py b/train.py index 0406d90a1..62b637863 100644 --- a/train.py +++ b/train.py @@ -210,13 +210,14 @@ def main(): else: optimizer_loss_scale = 1.0 optimizer = create_optimizer( - network.trainable_params(), + network, opt=args.opt, lr=lr_scheduler, weight_decay=args.weight_decay, momentum=args.momentum, nesterov=args.use_nesterov, weight_decay_filter=args.weight_decay_filter, + layer_decay=args.layer_decay, loss_scale=optimizer_loss_scale, checkpoint_path=opt_ckpt_path, eps=args.eps,