From fd51de376a5b7f18417a6a66c9ecb777d2c6ac31 Mon Sep 17 00:00:00 2001
From: jaegukhyun <jaeguk.hyun@intel.com>
Date: Fri, 12 Jan 2024 16:19:57 +0900
Subject: [PATCH] Add gradient clipping and sync hyper parameters wit OTX1.x

---
 src/otx/config/trainer/default.yaml           |  2 ++
 src/otx/core/config/trainer.py                |  6 ++++-
 .../recipe/detection/atss_mobilenetv2.yaml    |  9 ++++---
 src/otx/recipe/detection/atss_resnext101.yaml | 11 +++++---
 src/otx/recipe/detection/ssd_mobilenetv2.yaml |  9 ++++---
 src/otx/recipe/detection/yolox_l.yaml         | 27 ++++++++++++-------
 src/otx/recipe/detection/yolox_s.yaml         | 27 ++++++++++++-------
 src/otx/recipe/detection/yolox_tiny.yaml      | 21 ++++++++++-----
 src/otx/recipe/detection/yolox_x.yaml         | 27 ++++++++++++-------
 9 files changed, 95 insertions(+), 44 deletions(-)

diff --git a/src/otx/config/trainer/default.yaml b/src/otx/config/trainer/default.yaml
index 67299d42202..42bda50e16f 100644
--- a/src/otx/config/trainer/default.yaml
+++ b/src/otx/config/trainer/default.yaml
@@ -17,3 +17,5 @@ check_val_every_n_epoch: 1
 # set True to to ensure deterministic results
 # makes training slower but gives more reproducibility than just setting seeds
 deterministic: False
+
+gradient_clip_val: null
diff --git a/src/otx/core/config/trainer.py b/src/otx/core/config/trainer.py
index 12df587d40c..4c23e45934a 100644
--- a/src/otx/core/config/trainer.py
+++ b/src/otx/core/config/trainer.py
@@ -3,8 +3,11 @@
 #
 """Config data type objects for trainer."""
 
+from __future__ import annotations
+
 from dataclasses import dataclass
-from pathlib import Path
+from pathlib import Path  # noqa: TCH003
+from typing import Optional
 
 from omegaconf import DictConfig
 
@@ -21,5 +24,6 @@ class TrainerConfig(DictConfig):
     devices: int
     check_val_every_n_epoch: int
     deterministic: bool
+    gradient_clip_val: Optional[float]
 
     _target_: str = "lightning.pytorch.trainer.Trainer"
diff --git a/src/otx/recipe/detection/atss_mobilenetv2.yaml b/src/otx/recipe/detection/atss_mobilenetv2.yaml
index 558cf3ef0a6..2551c3d38f4 100644
--- a/src/otx/recipe/detection/atss_mobilenetv2.yaml
+++ b/src/otx/recipe/detection/atss_mobilenetv2.yaml
@@ -1,5 +1,6 @@
 # @package _global_
 defaults:
+  - override /trainer: default
   - override /base: detection
   - override /callbacks: detection
   - override /data: mmdet
@@ -39,7 +40,7 @@ data:
           - img_shape
           - pad_shape
   val_subset:
-    batch_size: 1
+    batch_size: 8
     transforms:
       - type: LoadImageFromFile
       - type: Resize
@@ -58,7 +59,7 @@ data:
           - img_shape
           - pad_shape
   test_subset:
-    batch_size: 1
+    batch_size: 8
     transforms:
       - type: LoadImageFromFile
       - type: Resize
@@ -177,5 +178,7 @@ model:
     _target_: torch.optim.SGD
     _partial_: true
     lr: 0.004
-    weight_decay: 0.001
+    weight_decay: 0.0001
     momentum: 0.9
+trainer:
+  gradient_clip_val: 35.0
diff --git a/src/otx/recipe/detection/atss_resnext101.yaml b/src/otx/recipe/detection/atss_resnext101.yaml
index f5f270835be..00ef598b7ef 100644
--- a/src/otx/recipe/detection/atss_resnext101.yaml
+++ b/src/otx/recipe/detection/atss_resnext101.yaml
@@ -1,5 +1,6 @@
 # @package _global_
 defaults:
+  - override /trainer: default
   - override /base: detection
   - override /callbacks: detection
   - override /data: mmdet
@@ -39,7 +40,7 @@ data:
           - img_shape
           - pad_shape
   val_subset:
-    batch_size: 1
+    batch_size: 4
     transforms:
       - type: LoadImageFromFile
       - type: Resize
@@ -57,7 +58,7 @@ data:
           - img_shape
           - pad_shape
   test_subset:
-    batch_size: 1
+    batch_size: 4
     transforms:
       - type: LoadImageFromFile
       - type: Resize
@@ -183,6 +184,8 @@ model:
   optimizer:
     _target_: torch.optim.SGD
     _partial_: true
-    lr: 0.001
-    weight_decay: 0.001
+    lr: 0.004
+    weight_decay: 0.0001
     momentum: 0.9
+trainer:
+  gradient_clip_val: 35.0
diff --git a/src/otx/recipe/detection/ssd_mobilenetv2.yaml b/src/otx/recipe/detection/ssd_mobilenetv2.yaml
index 7da26339c38..98f4972cb02 100644
--- a/src/otx/recipe/detection/ssd_mobilenetv2.yaml
+++ b/src/otx/recipe/detection/ssd_mobilenetv2.yaml
@@ -1,5 +1,6 @@
 # @package _global_
 defaults:
+  - override /trainer: default
   - override /base: detection
   - override /callbacks: detection
   - override /data: mmdet
@@ -45,7 +46,7 @@ data:
           - img_shape
           - pad_shape
   val_subset:
-    batch_size: 1
+    batch_size: 8
     transforms:
       - type: LoadImageFromFile
       - type: Resize
@@ -64,7 +65,7 @@ data:
           - img_shape
           - pad_shape
   test_subset:
-    batch_size: 1
+    batch_size: 8
     transforms:
       - type: LoadImageFromFile
       - type: Resize
@@ -186,5 +187,7 @@ model:
     _target_: torch.optim.SGD
     _partial_: true
     lr: 0.01
-    weight_decay: 0.001
+    weight_decay: 0.0001
     momentum: 0.9
+trainer:
+  gradient_clip_val: 35.0
diff --git a/src/otx/recipe/detection/yolox_l.yaml b/src/otx/recipe/detection/yolox_l.yaml
index ba0232466a8..cc8fe7f42a0 100644
--- a/src/otx/recipe/detection/yolox_l.yaml
+++ b/src/otx/recipe/detection/yolox_l.yaml
@@ -1,5 +1,6 @@
 # @package _global_
 defaults:
+  - override /trainer: default
   - override /base: detection
   - override /callbacks: detection
   - override /data: mmdet
@@ -20,8 +21,8 @@ data:
           - 640
       - type: RandomAffine
         scaling_ratio_range:
-          - 0.5
-          - 1.5
+          - 0.1
+          - 2.0
         border:
           - -320
           - -320
@@ -44,7 +45,7 @@ data:
         scale:
           - 640
           - 640
-        keep_ratio: false
+        keep_ratio: true
       - type: RandomFlip
         prob: 0.5
       - type: Pad
@@ -63,14 +64,17 @@ data:
           - img_shape
           - pad_shape
   val_subset:
-    batch_size: 1
+    batch_size: 8
     transforms:
       - type: LoadImageFromFile
       - type: Resize
         scale:
           - 640
           - 640
-        keep_ratio: false
+        keep_ratio: True
+      - type: Pad
+        pad_to_square: true
+        pad_val: 114
       - type: LoadAnnotations
         with_bbox: true
       - type: PackDetInputs
@@ -82,14 +86,17 @@ data:
           - img_shape
           - pad_shape
   test_subset:
-    batch_size: 1
+    batch_size: 8
     transforms:
       - type: LoadImageFromFile
       - type: Resize
         scale:
           - 640
           - 640
-        keep_ratio: false
+        keep_ratio: True
+      - type: Pad
+        pad_to_square: true
+        pad_val: 114
       - type: LoadAnnotations
         with_bbox: true
       - type: PackDetInputs
@@ -151,6 +158,8 @@ model:
   optimizer:
     _target_: torch.optim.SGD
     _partial_: true
-    lr: 0.0005
-    weight_decay: 0.001
+    lr: 0.001
+    weight_decay: 0.0001
     momentum: 0.9
+trainer:
+  gradient_clip_val: 35.0
diff --git a/src/otx/recipe/detection/yolox_s.yaml b/src/otx/recipe/detection/yolox_s.yaml
index f3d80b4d27a..3a455447e3b 100644
--- a/src/otx/recipe/detection/yolox_s.yaml
+++ b/src/otx/recipe/detection/yolox_s.yaml
@@ -1,5 +1,6 @@
 # @package _global_
 defaults:
+  - override /trainer: default
   - override /base: detection
   - override /callbacks: detection
   - override /data: mmdet
@@ -20,8 +21,8 @@ data:
           - 640
       - type: RandomAffine
         scaling_ratio_range:
-          - 0.5
-          - 1.5
+          - 0.1
+          - 2.0
         border:
           - -320
           - -320
@@ -44,7 +45,7 @@ data:
         scale:
           - 640
           - 640
-        keep_ratio: false
+        keep_ratio: True
       - type: RandomFlip
         prob: 0.5
       - type: Pad
@@ -63,14 +64,17 @@ data:
           - img_shape
           - pad_shape
   val_subset:
-    batch_size: 1
+    batch_size: 8
     transforms:
       - type: LoadImageFromFile
       - type: Resize
         scale:
           - 640
           - 640
-        keep_ratio: false
+        keep_ratio: True
+      - type: Pad
+        pad_to_square: true
+        pad_val: 114
       - type: LoadAnnotations
         with_bbox: true
       - type: PackDetInputs
@@ -82,14 +86,17 @@ data:
           - img_shape
           - pad_shape
   test_subset:
-    batch_size: 1
+    batch_size: 8
     transforms:
       - type: LoadImageFromFile
       - type: Resize
         scale:
           - 640
           - 640
-        keep_ratio: false
+        keep_ratio: True
+      - type: Pad
+        pad_to_square: true
+        pad_val: 114
       - type: LoadAnnotations
         with_bbox: true
       - type: PackDetInputs
@@ -151,6 +158,8 @@ model:
   optimizer:
     _target_: torch.optim.SGD
     _partial_: true
-    lr: 0.0005
-    weight_decay: 0.001
+    lr: 0.001
+    weight_decay: 0.0001
     momentum: 0.9
+trainer:
+  gradient_clip_val: 35.0
diff --git a/src/otx/recipe/detection/yolox_tiny.yaml b/src/otx/recipe/detection/yolox_tiny.yaml
index 7b826a552b0..510d1087cea 100644
--- a/src/otx/recipe/detection/yolox_tiny.yaml
+++ b/src/otx/recipe/detection/yolox_tiny.yaml
@@ -1,5 +1,6 @@
 # @package _global_
 defaults:
+  - override /trainer: default
   - override /base: detection
   - override /callbacks: detection
   - override /data: mmdet
@@ -38,7 +39,7 @@ data:
         scale:
           - 640
           - 640
-        keep_ratio: false
+        keep_ratio: True
       - type: RandomFlip
         prob: 0.5
       - type: Pad
@@ -57,14 +58,17 @@ data:
           - img_shape
           - pad_shape
   val_subset:
-    batch_size: 1
+    batch_size: 8
     transforms:
       - type: LoadImageFromFile
       - type: Resize
         scale:
           - 416
           - 416
-        keep_ratio: false
+        keep_ratio: True
+      - type: Pad
+        pad_to_square: true
+        pad_val: 114
       - type: LoadAnnotations
         with_bbox: true
       - type: PackDetInputs
@@ -76,14 +80,17 @@ data:
           - img_shape
           - pad_shape
   test_subset:
-    batch_size: 1
+    batch_size: 8
     transforms:
       - type: LoadImageFromFile
       - type: Resize
         scale:
           - 416
           - 416
-        keep_ratio: false
+        keep_ratio: True
+      - type: Pad
+        pad_to_square: true
+        pad_val: 114
       - type: LoadAnnotations
         with_bbox: true
       - type: PackDetInputs
@@ -146,5 +153,7 @@ model:
     _target_: torch.optim.SGD
     _partial_: true
     lr: 0.0002
-    weight_decay: 0.001
+    weight_decay: 0.0001
     momentum: 0.9
+trainer:
+  gradient_clip_val: 35.0
diff --git a/src/otx/recipe/detection/yolox_x.yaml b/src/otx/recipe/detection/yolox_x.yaml
index 5fcc5c2538b..486c93ac42a 100644
--- a/src/otx/recipe/detection/yolox_x.yaml
+++ b/src/otx/recipe/detection/yolox_x.yaml
@@ -1,5 +1,6 @@
 # @package _global_
 defaults:
+  - override /trainer: default
   - override /base: detection
   - override /callbacks: detection
   - override /data: mmdet
@@ -20,8 +21,8 @@ data:
           - 640
       - type: RandomAffine
         scaling_ratio_range:
-          - 0.5
-          - 1.5
+          - 0.1
+          - 2.0
         border:
           - -320
           - -320
@@ -44,7 +45,7 @@ data:
         scale:
           - 640
           - 640
-        keep_ratio: false
+        keep_ratio: true
       - type: RandomFlip
         prob: 0.5
       - type: Pad
@@ -63,14 +64,17 @@ data:
           - img_shape
           - pad_shape
   val_subset:
-    batch_size: 1
+    batch_size: 4
     transforms:
       - type: LoadImageFromFile
       - type: Resize
         scale:
           - 640
           - 640
-        keep_ratio: false
+        keep_ratio: True
+      - type: Pad
+        pad_to_square: true
+        pad_val: 114
       - type: LoadAnnotations
         with_bbox: true
       - type: PackDetInputs
@@ -82,14 +86,17 @@ data:
           - img_shape
           - pad_shape
   test_subset:
-    batch_size: 1
+    batch_size: 4
     transforms:
       - type: LoadImageFromFile
       - type: Resize
         scale:
           - 640
           - 640
-        keep_ratio: false
+        keep_ratio: True
+      - type: Pad
+        pad_to_square: true
+        pad_val: 114
       - type: LoadAnnotations
         with_bbox: true
       - type: PackDetInputs
@@ -151,6 +158,8 @@ model:
   optimizer:
     _target_: torch.optim.SGD
     _partial_: true
-    lr: 0.0001
-    weight_decay: 0.001
+    lr: 0.001
+    weight_decay: 0.0001
     momentum: 0.9
+trainer:
+  gradient_clip_val: 35.0