Merge pull request #3776 from harimkang/harimkan/refactor-recipe-cls

Refine recipes for classification
openvinotoolkit · Jul 31, 2024 · f2f0cdc · f2f0cdc
2 parents b33e964 + 23b9150
commit f2f0cdc
Show file tree

Hide file tree

Showing 23 changed files with 100 additions and 1,267 deletions.
diff --git a/src/otx/recipe/_base_/data/classification.yaml b/src/otx/recipe/_base_/data/classification.yaml
@@ -0,0 +1,78 @@
+task: MULTI_CLASS_CLS
+input_size: 224
+mem_cache_size: 1GB
+mem_cache_img_max_size:
+  - 500
+  - 500
+image_color_channel: RGB
+stack_images: true
+data_format: imagenet_with_subset_dirs
+unannotated_items_ratio: 0.0
+train_subset:
+  subset_name: train
+  transform_lib_type: TORCHVISION
+  batch_size: 64
+  num_workers: 2
+  to_tv_image: false
+  transforms:
+    - class_path: otx.core.data.transform_libs.torchvision.RandomResizedCrop
+      init_args:
+        scale: $(input_size)
+    - class_path: otx.core.data.transform_libs.torchvision.RandomFlip
+      init_args:
+        prob: 0.5
+        is_numpy_to_tvtensor: true
+    - class_path: torchvision.transforms.v2.ToDtype
+      init_args:
+        dtype: ${as_torch_dtype:torch.float32}
+        scale: false
+    - class_path: torchvision.transforms.v2.Normalize
+      init_args:
+        mean: [123.675, 116.28, 103.53]
+        std: [58.395, 57.12, 57.375]
+  sampler:
+    class_path: otx.algo.samplers.balanced_sampler.BalancedSampler
+
+val_subset:
+  subset_name: val
+  transform_lib_type: TORCHVISION
+  batch_size: 64
+  num_workers: 2
+  to_tv_image: false
+  transforms:
+    - class_path: otx.core.data.transform_libs.torchvision.Resize
+      init_args:
+        scale: $(input_size)
+        is_numpy_to_tvtensor: true
+    - class_path: torchvision.transforms.v2.ToDtype
+      init_args:
+        dtype: ${as_torch_dtype:torch.float32}
+        scale: false
+    - class_path: torchvision.transforms.v2.Normalize
+      init_args:
+        mean: [123.675, 116.28, 103.53]
+        std: [58.395, 57.12, 57.375]
+  sampler:
+    class_path: torch.utils.data.RandomSampler
+
+test_subset:
+  subset_name: test
+  transform_lib_type: TORCHVISION
+  batch_size: 64
+  num_workers: 2
+  to_tv_image: false
+  transforms:
+    - class_path: otx.core.data.transform_libs.torchvision.Resize
+      init_args:
+        scale: $(input_size)
+        is_numpy_to_tvtensor: true
+    - class_path: torchvision.transforms.v2.ToDtype
+      init_args:
+        dtype: ${as_torch_dtype:torch.float32}
+        scale: false
+    - class_path: torchvision.transforms.v2.Normalize
+      init_args:
+        mean: [123.675, 116.28, 103.53]
+        std: [58.395, 57.12, 57.375]
+  sampler:
+    class_path: torch.utils.data.RandomSampler
diff --git a/src/otx/recipe/classification/h_label_cls/deit_tiny.yaml b/src/otx/recipe/classification/h_label_cls/deit_tiny.yaml
@@ -23,13 +23,8 @@ engine:
 
 callback_monitor: val/accuracy
 
-data: ../../_base_/data/torchvision_base.yaml
+data: ../../_base_/data/classification.yaml
 overrides:
-  reset:
-    - data.train_subset.transforms
-    - data.val_subset.transforms
-    - data.test_subset.transforms
-
   max_epochs: 90
   callbacks:
     - class_path: otx.algo.callbacks.adaptive_early_stopping.EarlyStoppingWithWarmup
@@ -38,64 +33,4 @@ overrides:
 
   data:
     task: H_LABEL_CLS
-    input_size: 224
-    mem_cache_img_max_size:
-      - 500
-      - 500
-    stack_images: true
     data_format: datumaro
-    train_subset:
-      batch_size: 64
-      to_tv_image: false
-      transforms:
-        - class_path: otx.core.data.transform_libs.torchvision.RandomResizedCrop
-          init_args:
-            scale: $(input_size)
-        - class_path: otx.core.data.transform_libs.torchvision.RandomFlip
-          init_args:
-            prob: 0.5
-            is_numpy_to_tvtensor: true
-        - class_path: torchvision.transforms.v2.ToDtype
-          init_args:
-            dtype: ${as_torch_dtype:torch.float32}
-            scale: false
-        - class_path: torchvision.transforms.v2.Normalize
-          init_args:
-            mean: [123.675, 116.28, 103.53]
-            std: [58.395, 57.12, 57.375]
-      sampler:
-        class_path: otx.algo.samplers.balanced_sampler.BalancedSampler
-
-    val_subset:
-      batch_size: 64
-      to_tv_image: false
-      transforms:
-        - class_path: otx.core.data.transform_libs.torchvision.Resize
-          init_args:
-            scale: $(input_size)
-            is_numpy_to_tvtensor: true
-        - class_path: torchvision.transforms.v2.ToDtype
-          init_args:
-            dtype: ${as_torch_dtype:torch.float32}
-            scale: false
-        - class_path: torchvision.transforms.v2.Normalize
-          init_args:
-            mean: [123.675, 116.28, 103.53]
-            std: [58.395, 57.12, 57.375]
-
-    test_subset:
-      batch_size: 64
-      to_tv_image: false
-      transforms:
-        - class_path: otx.core.data.transform_libs.torchvision.Resize
-          init_args:
-            scale: $(input_size)
-            is_numpy_to_tvtensor: true
-        - class_path: torchvision.transforms.v2.ToDtype
-          init_args:
-            dtype: ${as_torch_dtype:torch.float32}
-            scale: false
-        - class_path: torchvision.transforms.v2.Normalize
-          init_args:
-            mean: [123.675, 116.28, 103.53]
-            std: [58.395, 57.12, 57.375]
diff --git a/src/otx/recipe/classification/h_label_cls/efficientnet_b0.yaml b/src/otx/recipe/classification/h_label_cls/efficientnet_b0.yaml
@@ -22,12 +22,10 @@ engine:
 
 callback_monitor: val/accuracy
 
-data: ../../_base_/data/torchvision_base.yaml
+data: ../../_base_/data/classification.yaml
 overrides:
   reset:
     - data.train_subset.transforms
-    - data.val_subset.transforms
-    - data.test_subset.transforms
 
   max_epochs: 90
   callbacks:
@@ -37,15 +35,8 @@ overrides:
 
   data:
     task: H_LABEL_CLS
-    input_size: 224
-    mem_cache_img_max_size:
-      - 500
-      - 500
-    stack_images: true
     data_format: datumaro
     train_subset:
-      batch_size: 64
-      to_tv_image: false
       transforms:
         - class_path: otx.core.data.transform_libs.torchvision.EfficientNetRandomCrop
           init_args:
@@ -62,39 +53,3 @@ overrides:
           init_args:
             mean: [123.675, 116.28, 103.53]
             std: [58.395, 57.12, 57.375]
-      sampler:
-        class_path: otx.algo.samplers.balanced_sampler.BalancedSampler
-
-    val_subset:
-      batch_size: 64
-      to_tv_image: false
-      transforms:
-        - class_path: otx.core.data.transform_libs.torchvision.Resize
-          init_args:
-            scale: $(input_size)
-            is_numpy_to_tvtensor: true
-        - class_path: torchvision.transforms.v2.ToDtype
-          init_args:
-            dtype: ${as_torch_dtype:torch.float32}
-            scale: false
-        - class_path: torchvision.transforms.v2.Normalize
-          init_args:
-            mean: [123.675, 116.28, 103.53]
-            std: [58.395, 57.12, 57.375]
-
-    test_subset:
-      batch_size: 64
-      to_tv_image: false
-      transforms:
-        - class_path: otx.core.data.transform_libs.torchvision.Resize
-          init_args:
-            scale: $(input_size)
-            is_numpy_to_tvtensor: true
-        - class_path: torchvision.transforms.v2.ToDtype
-          init_args:
-            dtype: ${as_torch_dtype:torch.float32}
-            scale: false
-        - class_path: torchvision.transforms.v2.Normalize
-          init_args:
-            mean: [123.675, 116.28, 103.53]
-            std: [58.395, 57.12, 57.375]
diff --git a/src/otx/recipe/classification/h_label_cls/efficientnet_v2.yaml b/src/otx/recipe/classification/h_label_cls/efficientnet_v2.yaml
@@ -22,12 +22,10 @@ engine:
 
 callback_monitor: val/accuracy
 
-data: ../../_base_/data/torchvision_base.yaml
+data: ../../_base_/data/classification.yaml
 overrides:
   reset:
     - data.train_subset.transforms
-    - data.val_subset.transforms
-    - data.test_subset.transforms
 
   max_epochs: 90
   callbacks:
@@ -37,15 +35,8 @@ overrides:
 
   data:
     task: H_LABEL_CLS
-    input_size: 224
-    mem_cache_img_max_size:
-      - 500
-      - 500
-    stack_images: true
     data_format: datumaro
     train_subset:
-      batch_size: 64
-      to_tv_image: false
       transforms:
         - class_path: otx.core.data.transform_libs.torchvision.EfficientNetRandomCrop
           init_args:
@@ -62,39 +53,3 @@ overrides:
           init_args:
             mean: [123.675, 116.28, 103.53]
             std: [58.395, 57.12, 57.375]
-      sampler:
-        class_path: otx.algo.samplers.balanced_sampler.BalancedSampler
-
-    val_subset:
-      batch_size: 64
-      to_tv_image: false
-      transforms:
-        - class_path: otx.core.data.transform_libs.torchvision.Resize
-          init_args:
-            scale: $(input_size)
-            is_numpy_to_tvtensor: true
-        - class_path: torchvision.transforms.v2.ToDtype
-          init_args:
-            dtype: ${as_torch_dtype:torch.float32}
-            scale: false
-        - class_path: torchvision.transforms.v2.Normalize
-          init_args:
-            mean: [123.675, 116.28, 103.53]
-            std: [58.395, 57.12, 57.375]
-
-    test_subset:
-      batch_size: 64
-      to_tv_image: false
-      transforms:
-        - class_path: otx.core.data.transform_libs.torchvision.Resize
-          init_args:
-            scale: $(input_size)
-            is_numpy_to_tvtensor: true
-        - class_path: torchvision.transforms.v2.ToDtype
-          init_args:
-            dtype: ${as_torch_dtype:torch.float32}
-            scale: false
-        - class_path: torchvision.transforms.v2.Normalize
-          init_args:
-            mean: [123.675, 116.28, 103.53]
-            std: [58.395, 57.12, 57.375]
diff --git a/src/otx/recipe/classification/h_label_cls/mobilenet_v3_large.yaml b/src/otx/recipe/classification/h_label_cls/mobilenet_v3_large.yaml
@@ -28,13 +28,8 @@ engine:
 
 callback_monitor: val/accuracy
 
-data: ../../_base_/data/torchvision_base.yaml
+data: ../../_base_/data/classification.yaml
 overrides:
-  reset:
-    - data.train_subset.transforms
-    - data.val_subset.transforms
-    - data.test_subset.transforms
-
   max_epochs: 90
   callbacks:
     - class_path: otx.algo.callbacks.adaptive_early_stopping.EarlyStoppingWithWarmup
@@ -43,64 +38,4 @@ overrides:
 
   data:
     task: H_LABEL_CLS
-    input_size: 224
-    mem_cache_img_max_size:
-      - 500
-      - 500
-    stack_images: true
     data_format: datumaro
-    train_subset:
-      batch_size: 64
-      to_tv_image: false
-      transforms:
-        - class_path: otx.core.data.transform_libs.torchvision.RandomResizedCrop
-          init_args:
-            scale: $(input_size)
-        - class_path: otx.core.data.transform_libs.torchvision.RandomFlip
-          init_args:
-            prob: 0.5
-            is_numpy_to_tvtensor: true
-        - class_path: torchvision.transforms.v2.ToDtype
-          init_args:
-            dtype: ${as_torch_dtype:torch.float32}
-            scale: false
-        - class_path: torchvision.transforms.v2.Normalize
-          init_args:
-            mean: [123.675, 116.28, 103.53]
-            std: [58.395, 57.12, 57.375]
-      sampler:
-        class_path: otx.algo.samplers.balanced_sampler.BalancedSampler
-
-    val_subset:
-      batch_size: 64
-      to_tv_image: false
-      transforms:
-        - class_path: otx.core.data.transform_libs.torchvision.Resize
-          init_args:
-            scale: $(input_size)
-            is_numpy_to_tvtensor: true
-        - class_path: torchvision.transforms.v2.ToDtype
-          init_args:
-            dtype: ${as_torch_dtype:torch.float32}
-            scale: false
-        - class_path: torchvision.transforms.v2.Normalize
-          init_args:
-            mean: [123.675, 116.28, 103.53]
-            std: [58.395, 57.12, 57.375]
-
-    test_subset:
-      batch_size: 64
-      to_tv_image: false
-      transforms:
-        - class_path: otx.core.data.transform_libs.torchvision.Resize
-          init_args:
-            scale: $(input_size)
-            is_numpy_to_tvtensor: true
-        - class_path: torchvision.transforms.v2.ToDtype
-          init_args:
-            dtype: ${as_torch_dtype:torch.float32}
-            scale: false
-        - class_path: torchvision.transforms.v2.Normalize
-          init_args:
-            mean: [123.675, 116.28, 103.53]
-            std: [58.395, 57.12, 57.375]