openvinotoolkit · jaegukhyun · Jan 15, 2024 · Jan 12, 2024
diff --git a/src/otx/config/trainer/default.yaml b/src/otx/config/trainer/default.yaml
@@ -17,3 +17,5 @@ check_val_every_n_epoch: 1
 # set True to to ensure deterministic results
 # makes training slower but gives more reproducibility than just setting seeds
 deterministic: False
+
+gradient_clip_val: null
diff --git a/src/otx/core/config/trainer.py b/src/otx/core/config/trainer.py
@@ -3,8 +3,11 @@
 #
 """Config data type objects for trainer."""
 
+from __future__ import annotations
+
 from dataclasses import dataclass
-from pathlib import Path
+from pathlib import Path  # noqa: TCH003
+from typing import Optional
 
 from omegaconf import DictConfig
 
@@ -21,5 +24,6 @@ class TrainerConfig(DictConfig):
     devices: int
     check_val_every_n_epoch: int
     deterministic: bool
+    gradient_clip_val: Optional[float]
 
     _target_: str = "lightning.pytorch.trainer.Trainer"
diff --git a/src/otx/recipe/detection/atss_mobilenetv2.yaml b/src/otx/recipe/detection/atss_mobilenetv2.yaml
@@ -1,5 +1,6 @@
 # @package _global_
 defaults:
+  - override /trainer: default
   - override /base: detection
   - override /callbacks: detection
   - override /data: mmdet
@@ -39,7 +40,7 @@ data:
           - img_shape
           - pad_shape
   val_subset:
-    batch_size: 1
+    batch_size: 8
     transforms:
       - type: LoadImageFromFile
       - type: Resize
@@ -58,7 +59,7 @@ data:
           - img_shape
           - pad_shape
   test_subset:
-    batch_size: 1
+    batch_size: 8
     transforms:
       - type: LoadImageFromFile
       - type: Resize
@@ -177,5 +178,7 @@ model:
     _target_: torch.optim.SGD
     _partial_: true
     lr: 0.004
-    weight_decay: 0.001
+    weight_decay: 0.0001
     momentum: 0.9
+trainer:
+  gradient_clip_val: 35.0
diff --git a/src/otx/recipe/detection/atss_resnext101.yaml b/src/otx/recipe/detection/atss_resnext101.yaml
@@ -1,5 +1,6 @@
 # @package _global_
 defaults:
+  - override /trainer: default
   - override /base: detection
   - override /callbacks: detection
   - override /data: mmdet
@@ -39,7 +40,7 @@ data:
           - img_shape
           - pad_shape
   val_subset:
-    batch_size: 1
+    batch_size: 4
     transforms:
       - type: LoadImageFromFile
       - type: Resize
@@ -57,7 +58,7 @@ data:
           - img_shape
           - pad_shape
   test_subset:
-    batch_size: 1
+    batch_size: 4
     transforms:
       - type: LoadImageFromFile
       - type: Resize
@@ -183,6 +184,8 @@ model:
   optimizer:
     _target_: torch.optim.SGD
     _partial_: true
-    lr: 0.001
-    weight_decay: 0.001
+    lr: 0.004
+    weight_decay: 0.0001
     momentum: 0.9
+trainer:
+  gradient_clip_val: 35.0
diff --git a/src/otx/recipe/detection/ssd_mobilenetv2.yaml b/src/otx/recipe/detection/ssd_mobilenetv2.yaml
@@ -1,5 +1,6 @@
 # @package _global_
 defaults:
+  - override /trainer: default
   - override /base: detection
   - override /callbacks: detection
   - override /data: mmdet
@@ -45,7 +46,7 @@ data:
           - img_shape
           - pad_shape
   val_subset:
-    batch_size: 1
+    batch_size: 8
     transforms:
       - type: LoadImageFromFile
       - type: Resize
@@ -64,7 +65,7 @@ data:
           - img_shape
           - pad_shape
   test_subset:
-    batch_size: 1
+    batch_size: 8
     transforms:
       - type: LoadImageFromFile
       - type: Resize
@@ -186,5 +187,7 @@ model:
     _target_: torch.optim.SGD
     _partial_: true
     lr: 0.01
-    weight_decay: 0.001
+    weight_decay: 0.0001
     momentum: 0.9
+trainer:
+  gradient_clip_val: 35.0
diff --git a/src/otx/recipe/detection/yolox_l.yaml b/src/otx/recipe/detection/yolox_l.yaml
@@ -1,5 +1,6 @@
 # @package _global_
 defaults:
+  - override /trainer: default
   - override /base: detection
   - override /callbacks: detection
   - override /data: mmdet
@@ -20,8 +21,8 @@ data:
           - 640
       - type: RandomAffine
         scaling_ratio_range:
-          - 0.5
-          - 1.5
+          - 0.1
+          - 2.0
         border:
           - -320
           - -320
@@ -44,7 +45,7 @@ data:
         scale:
           - 640
           - 640
-        keep_ratio: false
+        keep_ratio: true
       - type: RandomFlip
         prob: 0.5
       - type: Pad
@@ -63,14 +64,17 @@ data:
           - img_shape
           - pad_shape
   val_subset:
-    batch_size: 1
+    batch_size: 8
     transforms:
       - type: LoadImageFromFile
       - type: Resize
         scale:
           - 640
           - 640
-        keep_ratio: false
+        keep_ratio: True
+      - type: Pad
+        pad_to_square: true
+        pad_val: 114
       - type: LoadAnnotations
         with_bbox: true
       - type: PackDetInputs
@@ -82,14 +86,17 @@ data:
           - img_shape
           - pad_shape
   test_subset:
-    batch_size: 1
+    batch_size: 8
     transforms:
       - type: LoadImageFromFile
       - type: Resize
         scale:
           - 640
           - 640
-        keep_ratio: false
+        keep_ratio: True
+      - type: Pad
+        pad_to_square: true
+        pad_val: 114
       - type: LoadAnnotations
         with_bbox: true
       - type: PackDetInputs
@@ -151,6 +158,8 @@ model:
   optimizer:
     _target_: torch.optim.SGD
     _partial_: true
-    lr: 0.0005
-    weight_decay: 0.001
+    lr: 0.001
+    weight_decay: 0.0001
     momentum: 0.9
+trainer:
+  gradient_clip_val: 35.0
diff --git a/src/otx/recipe/detection/yolox_s.yaml b/src/otx/recipe/detection/yolox_s.yaml
@@ -1,5 +1,6 @@
 # @package _global_
 defaults:
+  - override /trainer: default
   - override /base: detection
   - override /callbacks: detection
   - override /data: mmdet
@@ -20,8 +21,8 @@ data:
           - 640
       - type: RandomAffine
         scaling_ratio_range:
-          - 0.5
-          - 1.5
+          - 0.1
+          - 2.0
         border:
           - -320
           - -320
@@ -44,7 +45,7 @@ data:
         scale:
           - 640
           - 640
-        keep_ratio: false
+        keep_ratio: True
       - type: RandomFlip
         prob: 0.5
       - type: Pad
@@ -63,14 +64,17 @@ data:
           - img_shape
           - pad_shape
   val_subset:
-    batch_size: 1
+    batch_size: 8
     transforms:
       - type: LoadImageFromFile
       - type: Resize
         scale:
           - 640
           - 640
-        keep_ratio: false
+        keep_ratio: True
+      - type: Pad
+        pad_to_square: true
+        pad_val: 114
       - type: LoadAnnotations
         with_bbox: true
       - type: PackDetInputs
@@ -82,14 +86,17 @@ data:
           - img_shape
           - pad_shape
   test_subset:
-    batch_size: 1
+    batch_size: 8
     transforms:
       - type: LoadImageFromFile
       - type: Resize
         scale:
           - 640
           - 640
-        keep_ratio: false
+        keep_ratio: True
+      - type: Pad
+        pad_to_square: true
+        pad_val: 114
       - type: LoadAnnotations
         with_bbox: true
       - type: PackDetInputs
@@ -151,6 +158,8 @@ model:
   optimizer:
     _target_: torch.optim.SGD
     _partial_: true
-    lr: 0.0005
-    weight_decay: 0.001
+    lr: 0.001
+    weight_decay: 0.0001
     momentum: 0.9
+trainer:
+  gradient_clip_val: 35.0
diff --git a/src/otx/recipe/detection/yolox_tiny.yaml b/src/otx/recipe/detection/yolox_tiny.yaml
@@ -1,5 +1,6 @@
 # @package _global_
 defaults:
+  - override /trainer: default
   - override /base: detection
   - override /callbacks: detection
   - override /data: mmdet
@@ -38,7 +39,7 @@ data:
         scale:
           - 640
           - 640
-        keep_ratio: false
+        keep_ratio: True
       - type: RandomFlip
         prob: 0.5
       - type: Pad
@@ -57,14 +58,17 @@ data:
           - img_shape
           - pad_shape
   val_subset:
-    batch_size: 1
+    batch_size: 8
     transforms:
       - type: LoadImageFromFile
       - type: Resize
         scale:
           - 416
           - 416
-        keep_ratio: false
+        keep_ratio: True
+      - type: Pad
+        pad_to_square: true
+        pad_val: 114
       - type: LoadAnnotations
         with_bbox: true
       - type: PackDetInputs
@@ -76,14 +80,17 @@ data:
           - img_shape
           - pad_shape
   test_subset:
-    batch_size: 1
+    batch_size: 8
     transforms:
       - type: LoadImageFromFile
       - type: Resize
         scale:
           - 416
           - 416
-        keep_ratio: false
+        keep_ratio: True
+      - type: Pad
+        pad_to_square: true
+        pad_val: 114
       - type: LoadAnnotations
         with_bbox: true
       - type: PackDetInputs
@@ -146,5 +153,7 @@ model:
     _target_: torch.optim.SGD
     _partial_: true
     lr: 0.0002
-    weight_decay: 0.001
+    weight_decay: 0.0001
     momentum: 0.9
+trainer:
+  gradient_clip_val: 35.0