mindspore-lab · geniuspatrick · Jan 17, 2024 · Dec 15, 2023 · Jan 4, 2024 · Jan 4, 2024
diff --git a/config.py b/config.py
@@ -167,6 +167,8 @@ def create_parser():
                        help='Whether use clip grad (default=False)')
     group.add_argument('--clip_value', type=float, default=15.0,
                        help='Clip value (default=15.0)')
+    group.add_argument('--layer_decay', type=float, default=None,
+                       help='layer-wise learning rate decay (default: None)')
     group.add_argument('--gradient_accumulation_steps', type=int, default=1,
                        help="Accumulate the gradients of n batches before update.")
 
@@ -182,8 +184,14 @@ def create_parser():
                        help='Weight decay (default=1e-6)')
     group.add_argument('--use_nesterov', type=str2bool, nargs='?', const=True, default=False,
                        help='Enables the Nesterov momentum (default=False)')
-    group.add_argument('--filter_bias_and_bn', type=str2bool, nargs='?', const=True, default=True,
-                       help='Filter Bias and BatchNorm (default=True)')
+    group.add_argument('--weight_decay_filter', type=str, default="disable",
+                       choices=['disable', 'auto', 'norm_and_bias'],
+                       help='filter parameters from weight_decay. '
+                            'choice: "disable" - No parameters to filter from weight_decay; "auto" - In this case, '
+                            'we do not apply weight decay filtering to any parameters. However, MindSpore currently '
+                            'automatically filters norm parameters from weight decay. It is unclear whether there '
+                            'will be any changes in future versions of MindSpore, so it is recommended to stay updated;'
+                            '"norm_and_bias" - Filter the paramtersof Norm layer and Bias from weight decay')
     group.add_argument('--eps', type=float, default=1e-10,
                        help='Term Added to the Denominator to Improve Numerical Stability (default=1e-10)')
 

diff --git a/configs/bit/bit_resnet101_ascend.yaml b/configs/bit/bit_resnet101_ascend.yaml
@@ -41,7 +41,7 @@ multi_step_decay_milestones: [30, 40, 50, 60, 70, 80, 85]
 
 # optimizer
 opt: 'sgd'
-filter_bias_and_bn: False
+weight_decay_filter: 'auto'
 momentum: 0.9
 weight_decay: 0.0001
 loss_scale: 1024
diff --git a/configs/bit/bit_resnet50_ascend.yaml b/configs/bit/bit_resnet50_ascend.yaml
@@ -40,7 +40,7 @@ multi_step_decay_milestones: [30, 40, 50, 60, 70, 80, 85]
 
 # optimizer
 opt: 'sgd'
-filter_bias_and_bn: False
+weight_decay_filter: 'auto'
 momentum: 0.9
 weight_decay: 0.0001
 loss_scale: 1024
diff --git a/configs/bit/bit_resnet50x3_ascend.yaml b/configs/bit/bit_resnet50x3_ascend.yaml
@@ -43,7 +43,7 @@ multi_step_decay_milestones: [30, 40, 50, 60, 70, 80, 85]
 
 # optimizer
 opt: 'sgd'
-filter_bias_and_bn: False
+weight_decay_filter: 'auto'
 momentum: 0.9
 weight_decay: 0.0001
 loss_scale: 1024
diff --git a/configs/cmt/cmt_small_ascend.yaml b/configs/cmt/cmt_small_ascend.yaml
@@ -52,7 +52,6 @@ warmup_epochs: 5
 
 # optimizer
 opt: "adamw"
-filter_bias_and_bn: True
 momentum: 0.9
 weight_decay: 0.05
 loss_scale_type: 'dynamic'

diff --git a/configs/coat/coat_lite_mini_ascend.yaml b/configs/coat/coat_lite_mini_ascend.yaml
@@ -54,6 +54,5 @@ cycle_decay: 1.0
 # optimizer
 opt: 'adamw'
 weight_decay: 0.025
-filter_bias_and_bn: True
 loss_scale: 1024
 use_nesterov: False
diff --git a/configs/coat/coat_lite_tiny_ascend.yaml b/configs/coat/coat_lite_tiny_ascend.yaml
@@ -54,6 +54,5 @@ cycle_decay: 1.0
 # optimizer
 opt: 'adamw'
 weight_decay: 0.025
-filter_bias_and_bn: True
 loss_scale: 1024
 use_nesterov: False
diff --git a/configs/coat/coat_mini_ascend.yaml b/configs/coat/coat_mini_ascend.yaml
@@ -55,7 +55,6 @@ epoch_size: 300
 # optimizer
 opt: 'lion'
 weight_decay: 0.15
-filter_bias_and_bn: True
 loss_scale: 4096
 use_nesterov: False
 loss_scale_type: dynamic

diff --git a/configs/coat/coat_tiny_ascend.yaml b/configs/coat/coat_tiny_ascend.yaml
@@ -57,7 +57,6 @@ epoch_size: 300
 # optimizer
 opt: 'lion'
 weight_decay: 0.15
-filter_bias_and_bn: True
 loss_scale: 4096
 use_nesterov: False
 loss_scale_type: dynamic

diff --git a/configs/convit/convit_base_ascend.yaml b/configs/convit/convit_base_ascend.yaml
@@ -51,5 +51,4 @@ decay_epochs: 260
 opt: 'adamw'
 weight_decay: 0.1
 loss_scale: 1024
-filter_bias_and_bn: True
 use_nesterov: False
diff --git a/configs/convit/convit_base_plus_ascend.yaml b/configs/convit/convit_base_plus_ascend.yaml
@@ -51,5 +51,4 @@ decay_epochs: 260
 opt: 'adamw'
 weight_decay: 0.1
 loss_scale: 1024
-filter_bias_and_bn: True
 use_nesterov: False
diff --git a/configs/convit/convit_small_ascend.yaml b/configs/convit/convit_small_ascend.yaml
@@ -51,5 +51,4 @@ decay_epochs: 260
 opt: 'adamw'
 weight_decay: 0.05
 loss_scale: 1024
-filter_bias_and_bn: True
 use_nesterov: False
diff --git a/configs/convit/convit_small_plus_ascend.yaml b/configs/convit/convit_small_plus_ascend.yaml
@@ -51,5 +51,4 @@ decay_epochs: 260
 opt: 'adamw'
 weight_decay: 0.05
 loss_scale: 1024
-filter_bias_and_bn: True
 use_nesterov: False
diff --git a/configs/convit/convit_tiny_ascend.yaml b/configs/convit/convit_tiny_ascend.yaml
@@ -50,5 +50,4 @@ decay_epochs: 295
 opt: 'adamw'
 weight_decay: 0.0001
 loss_scale: 1024
-filter_bias_and_bn: True
 use_nesterov: False
diff --git a/configs/convit/convit_tiny_gpu.yaml b/configs/convit/convit_tiny_gpu.yaml
@@ -47,5 +47,4 @@ decay_epochs: 200
 # optimizer
 opt: 'adamw'
 weight_decay: 0.025
-filter_bias_and_bn: True
 use_nesterov: False
diff --git a/configs/convit/convit_tiny_plus_ascend.yaml b/configs/convit/convit_tiny_plus_ascend.yaml
@@ -50,5 +50,4 @@ decay_epochs: 260
 opt: 'adamw'
 weight_decay: 0.0001
 loss_scale: 1024
-filter_bias_and_bn: True
 use_nesterov: False
diff --git a/configs/convnext/convnext_base_ascend.yaml b/configs/convnext/convnext_base_ascend.yaml
@@ -51,7 +51,6 @@ warmup_epochs: 20
 
 # optimizer
 opt: 'adamw'
-filter_bias_and_bn: True
 momentum: 0.9
 weight_decay: 0.05
 loss_scale_type: 'auto'

diff --git a/configs/convnext/convnext_small_ascend.yaml b/configs/convnext/convnext_small_ascend.yaml
@@ -51,7 +51,6 @@ warmup_epochs: 20
 
 # optimizer
 opt: 'adamw'
-filter_bias_and_bn: True
 momentum: 0.9
 weight_decay: 0.05
 loss_scale_type: 'auto'

diff --git a/configs/convnext/convnext_tiny_ascend.yaml b/configs/convnext/convnext_tiny_ascend.yaml
@@ -51,7 +51,6 @@ warmup_epochs: 20
 
 # optimizer
 opt: 'adamw'
-filter_bias_and_bn: True
 momentum: 0.9
 weight_decay: 0.05
 loss_scale_type: 'dynamic'

diff --git a/configs/convnextv2/convnextv2_tiny_ascend.yaml b/configs/convnextv2/convnextv2_tiny_ascend.yaml
@@ -52,7 +52,6 @@ warmup_epochs: 20
 
 # optimizer
 opt: 'adamw'
-filter_bias_and_bn: True
 momentum: 0.9
 weight_decay: 0.049
 loss_scale_type: 'auto'

diff --git a/configs/crossvit/crossvit_15_ascend.yaml b/configs/crossvit/crossvit_15_ascend.yaml
@@ -57,7 +57,6 @@ cycle_decay: 1
 # optimizer
 opt: 'adamw'
 weight_decay: 0.05
-filter_bias_and_bn: True
 loss_scale: 512
 use_nesterov: False
 eps: 1e-8

diff --git a/configs/crossvit/crossvit_18_ascend.yaml b/configs/crossvit/crossvit_18_ascend.yaml
@@ -55,7 +55,6 @@ decay_rate: 0.1
 # optimizer
 opt: 'adamw'
 weight_decay: 0.05
-filter_bias_and_bn: True
 loss_scale: 1024
 drop_overflow_update: True
 loss_scale_type: 'dynamic'

diff --git a/configs/crossvit/crossvit_9_ascend.yaml b/configs/crossvit/crossvit_9_ascend.yaml
@@ -54,7 +54,6 @@ decay_rate: 0.1
 # optimizer
 opt: 'adamw'
 weight_decay: 0.05
-filter_bias_and_bn: True
 loss_scale_type: 'dynamic'
 drop_overflow_update: True
 use_nesterov: False

diff --git a/configs/densenet/densenet_121_ascend.yaml b/configs/densenet/densenet_121_ascend.yaml
@@ -44,7 +44,6 @@ decay_epochs: 120
 
 # optimizer
 opt: 'momentum'
-filter_bias_and_bn: True
 momentum: 0.9
 weight_decay: 0.0001
 loss_scale: 1024

diff --git a/configs/densenet/densenet_121_gpu.yaml b/configs/densenet/densenet_121_gpu.yaml
@@ -43,7 +43,6 @@ decay_epochs: 120
 
 # optimizer
 opt: 'momentum'
-filter_bias_and_bn: True
 momentum: 0.9
 weight_decay: 0.0001
 loss_scale: 1024

diff --git a/configs/densenet/densenet_161_ascend.yaml b/configs/densenet/densenet_161_ascend.yaml
@@ -44,7 +44,6 @@ decay_epochs: 120
 
 # optimizer
 opt: 'momentum'
-filter_bias_and_bn: True
 momentum: 0.9
 weight_decay: 0.0001
 loss_scale: 1024

diff --git a/configs/densenet/densenet_161_gpu.yaml b/configs/densenet/densenet_161_gpu.yaml
@@ -44,7 +44,6 @@ decay_epochs: 120
 
 # optimizer
 opt: 'momentum'
-filter_bias_and_bn: True
 momentum: 0.9
 weight_decay: 0.0001
 loss_scale: 1024

diff --git a/configs/densenet/densenet_169_ascend.yaml b/configs/densenet/densenet_169_ascend.yaml
@@ -44,7 +44,6 @@ decay_epochs: 120
 
 # optimizer
 opt: 'momentum'
-filter_bias_and_bn: True
 momentum: 0.9
 weight_decay: 0.0001
 loss_scale: 1024

diff --git a/configs/densenet/densenet_169_gpu.yaml b/configs/densenet/densenet_169_gpu.yaml
@@ -44,7 +44,6 @@ decay_epochs: 120
 
 # optimizer
 opt: 'momentum'
-filter_bias_and_bn: True
 momentum: 0.9
 weight_decay: 0.0001
 loss_scale: 1024

diff --git a/configs/densenet/densenet_201_ascend.yaml b/configs/densenet/densenet_201_ascend.yaml
@@ -44,7 +44,6 @@ decay_epochs: 120
 
 # optimizer
 opt: 'momentum'
-filter_bias_and_bn: True
 momentum: 0.9
 weight_decay: 0.0001
 loss_scale: 1024

diff --git a/configs/densenet/densenet_201_gpu.yaml b/configs/densenet/densenet_201_gpu.yaml
@@ -44,7 +44,6 @@ decay_epochs: 120
 
 # optimizer
 opt: 'momentum'
-filter_bias_and_bn: True
 momentum: 0.9
 weight_decay: 0.0001
 loss_scale: 1024

diff --git a/configs/dpn/dpn107_ascend.yaml b/configs/dpn/dpn107_ascend.yaml
@@ -45,7 +45,6 @@ decay_epochs: 200
 
 # optimizer
 opt: 'SGD'
-filter_bias_and_bn: True
 momentum: 0.9
 weight_decay: 0.0001
 loss_scale: 1024

diff --git a/configs/dpn/dpn131_ascend.yaml b/configs/dpn/dpn131_ascend.yaml
@@ -45,7 +45,6 @@ decay_epochs: 200
 
 # optimizer
 opt: 'SGD'
-filter_bias_and_bn: True
 momentum: 0.9
 weight_decay: 0.0001
 loss_scale: 1024

diff --git a/configs/dpn/dpn92_ascend.yaml b/configs/dpn/dpn92_ascend.yaml
@@ -45,7 +45,6 @@ decay_epochs: 200
 
 # optimizer
 opt: 'SGD'
-filter_bias_and_bn: True
 momentum: 0.9
 weight_decay: 0.0001
 loss_scale: 1024

diff --git a/configs/dpn/dpn98_ascend.yaml b/configs/dpn/dpn98_ascend.yaml
@@ -45,7 +45,6 @@ decay_epochs: 200
 
 # optimizer
 opt: 'SGD'
-filter_bias_and_bn: True
 momentum: 0.9
 weight_decay: 0.0001
 loss_scale: 1024

diff --git a/configs/edgenext/edgenext_base_ascend.yaml b/configs/edgenext/edgenext_base_ascend.yaml
@@ -58,7 +58,6 @@ decay_epochs: 330
 
 # optimizer
 opt: 'adamw'
-filter_bias_and_bn: True
 momentum: 0.9
 weight_decay: 0.05
 loss_scale: 1024

diff --git a/configs/edgenext/edgenext_small_ascend.yaml b/configs/edgenext/edgenext_small_ascend.yaml
@@ -57,7 +57,6 @@ decay_epochs: 330
 
 # optimizer
 opt: 'adamw'
-filter_bias_and_bn: True
 momentum: 0.9
 weight_decay: 0.05
 loss_scale: 1024

diff --git a/configs/edgenext/edgenext_x_small_ascend.yaml b/configs/edgenext/edgenext_x_small_ascend.yaml
@@ -57,7 +57,6 @@ decay_epochs: 330
 
 # optimizer
 opt: 'adamw'
-filter_bias_and_bn: True
 momentum: 0.9
 weight_decay: 0.05
 loss_scale: 1024

diff --git a/configs/edgenext/edgenext_xx_small_ascend.yaml b/configs/edgenext/edgenext_xx_small_ascend.yaml
@@ -56,7 +56,6 @@ decay_epochs: 330
 
 # optimizer
 opt: 'adamw'
-filter_bias_and_bn: True
 momentum: 0.9
 weight_decay: 0.05
 loss_scale: 1024

diff --git a/configs/efficientnet/efficientnet_b0_ascend.yaml b/configs/efficientnet/efficientnet_b0_ascend.yaml
@@ -46,7 +46,6 @@ decay_epochs: 445
 
 # optimizer
 opt: 'rmsprop'
-filter_bias_and_bn: True
 momentum: 0.9
 weight_decay: 1e-5
 loss_scale_type: 'dynamic'

diff --git a/configs/efficientnet/efficientnet_b1_ascend.yaml b/configs/efficientnet/efficientnet_b1_ascend.yaml
@@ -46,7 +46,6 @@ decay_epochs: 430
 
 # optimizer
 opt: 'rmsprop'
-filter_bias_and_bn: True
 momentum: 0.9
 weight_decay: 1e-5
 loss_scale_type: 'dynamic'

diff --git a/configs/ghostnet/ghostnet_050_ascend.yaml b/configs/ghostnet/ghostnet_050_ascend.yaml
@@ -46,7 +46,6 @@ decay_epochs: 580
 
 # optimizer
 opt: "momentum"
-filter_bias_and_bn: True
 momentum: 0.9
 weight_decay: 0.00002
 loss_scale_type: "dynamic"

diff --git a/configs/ghostnet/ghostnet_100_ascend.yaml b/configs/ghostnet/ghostnet_100_ascend.yaml
@@ -46,7 +46,6 @@ decay_epochs: 580
 
 # optimizer
 opt: "momentum"
-filter_bias_and_bn: True
 momentum: 0.9
 weight_decay: 0.00002
 loss_scale_type: "dynamic"

diff --git a/configs/ghostnet/ghostnet_130_ascend.yaml b/configs/ghostnet/ghostnet_130_ascend.yaml
@@ -47,7 +47,6 @@ decay_epochs: 580
 
 # optimizer
 opt: "momentum"
-filter_bias_and_bn: True
 momentum: 0.9
 weight_decay: 0.00002
 loss_scale_type: "dynamic"

diff --git a/configs/googlenet/googlenet_ascend.yaml b/configs/googlenet/googlenet_ascend.yaml
@@ -44,7 +44,6 @@ warmup_epochs: 5
 
 # optimizer
 opt: 'momentum'
-filter_bias_and_bn: True
 momentum: 0.9
 weight_decay: 0.00004
 loss_scale: 1024

diff --git a/configs/halonet/halonet_50t_ascend.yaml b/configs/halonet/halonet_50t_ascend.yaml
@@ -43,7 +43,6 @@ val_amp_level: 'O2'
 
 # optimizer
 opt: 'adamw'
-filter_bias_and_bn: True
 weight_decay: 0.04
 loss_scale: 1024
 use_nesterov: False

diff --git a/configs/hrnet/hrnet_w32_ascend.yaml b/configs/hrnet/hrnet_w32_ascend.yaml
@@ -52,4 +52,3 @@ decay_epochs: 280
 opt: 'adamw'
 weight_decay: 0.05
 loss_scale: 1024
-filter_bias_and_bn: True
diff --git a/configs/hrnet/hrnet_w48_ascend.yaml b/configs/hrnet/hrnet_w48_ascend.yaml
@@ -52,4 +52,3 @@ decay_epochs: 280
 opt: 'adamw'
 weight_decay: 0.05
 loss_scale: 1024
-filter_bias_and_bn: True
diff --git a/configs/inceptionv3/inception_v3_ascend.yaml b/configs/inceptionv3/inception_v3_ascend.yaml
@@ -47,7 +47,6 @@ warmup_epochs: 5
 
 # optimizer
 opt: 'momentum'
-filter_bias_and_bn: True
 momentum: 0.9
 weight_decay: 0.00004
 loss_scale: 1024

diff --git a/configs/inceptionv4/inception_v4_ascend.yaml b/configs/inceptionv4/inception_v4_ascend.yaml
@@ -46,7 +46,6 @@ warmup_epochs: 5
 
 # optimizer
 opt: 'momentum'
-filter_bias_and_bn: True
 momentum: 0.9
 weight_decay: 0.00004
 loss_scale: 1024