Fix data pipeline (#3418)

- Fix #3379 - Remove numpy -> tensor after transform - Add `NumpytoTVTensorMixin`
openvinotoolkit · May 2, 2024 · 2fc7777 · 2fc7777
1 parent 151a94e
commit 2fc7777
Show file tree

Hide file tree

Showing 31 changed files with 237 additions and 131 deletions.
diff --git a/src/otx/core/data/transform_libs/torchvision.py b/src/otx/core/data/transform_libs/torchvision.py
diff --git a/src/otx/recipe/_base_/data/mmseg_base.yaml b/src/otx/recipe/_base_/data/mmseg_base.yaml
@@ -27,6 +27,8 @@ config:
             - 2.0
           antialias: True
       - class_path: otx.core.data.transform_libs.torchvision.PhotoMetricDistortion
+        init_args:
+          is_numpy_to_tvtensor: true
       - class_path: torchvision.transforms.v2.RandomHorizontalFlip
         init_args:
           p: 0.5

diff --git a/src/otx/recipe/classification/h_label_cls/deit_tiny.yaml b/src/otx/recipe/classification/h_label_cls/deit_tiny.yaml
@@ -56,6 +56,7 @@ overrides:
             init_args:
               scale: 224
               backend: cv2
+              is_numpy_to_tvtensor: true
           - class_path: torchvision.transforms.v2.ToDtype
             init_args:
               dtype: ${as_torch_dtype:torch.float32}
@@ -73,6 +74,7 @@ overrides:
           - class_path: otx.core.data.transform_libs.torchvision.Resize
             init_args:
               scale: 224
+              is_numpy_to_tvtensor: true
           - class_path: torchvision.transforms.v2.ToDtype
             init_args:
               dtype: ${as_torch_dtype:torch.float32}
@@ -88,7 +90,7 @@ overrides:
           - class_path: otx.core.data.transform_libs.torchvision.Resize
             init_args:
               scale: 224
-              transform_bbox: false
+              is_numpy_to_tvtensor: true
           - class_path: torchvision.transforms.v2.ToDtype
             init_args:
               dtype: ${as_torch_dtype:torch.float32}

diff --git a/src/otx/recipe/classification/h_label_cls/efficientnet_b0.yaml b/src/otx/recipe/classification/h_label_cls/efficientnet_b0.yaml
@@ -55,6 +55,7 @@ overrides:
             init_args:
               scale: 224
               backend: cv2
+              is_numpy_to_tvtensor: true
           - class_path: torchvision.transforms.v2.ToDtype
             init_args:
               dtype: ${as_torch_dtype:torch.float32}
@@ -73,6 +74,7 @@ overrides:
           - class_path: otx.core.data.transform_libs.torchvision.Resize
             init_args:
               scale: 224
+              is_numpy_to_tvtensor: true
           - class_path: torchvision.transforms.v2.ToDtype
             init_args:
               dtype: ${as_torch_dtype:torch.float32}
@@ -88,7 +90,7 @@ overrides:
           - class_path: otx.core.data.transform_libs.torchvision.Resize
             init_args:
               scale: 224
-              transform_bbox: false
+              is_numpy_to_tvtensor: true
           - class_path: torchvision.transforms.v2.ToDtype
             init_args:
               dtype: ${as_torch_dtype:torch.float32}

diff --git a/src/otx/recipe/classification/h_label_cls/efficientnet_v2.yaml b/src/otx/recipe/classification/h_label_cls/efficientnet_v2.yaml
@@ -58,6 +58,7 @@ overrides:
           - class_path: otx.core.data.transform_libs.torchvision.RandomFlip
             init_args:
               prob: 0.5
+              is_numpy_to_tvtensor: true
           - class_path: torchvision.transforms.v2.ToDtype
             init_args:
               dtype: ${as_torch_dtype:torch.float32}
@@ -75,7 +76,7 @@ overrides:
           - class_path: otx.core.data.transform_libs.torchvision.Resize
             init_args:
               scale: 224
-              transform_bbox: false
+              is_numpy_to_tvtensor: true
           - class_path: torchvision.transforms.v2.ToDtype
             init_args:
               dtype: ${as_torch_dtype:torch.float32}
@@ -91,7 +92,7 @@ overrides:
           - class_path: otx.core.data.transform_libs.torchvision.Resize
             init_args:
               scale: 224
-              transform_bbox: false
+              is_numpy_to_tvtensor: true
           - class_path: torchvision.transforms.v2.ToDtype
             init_args:
               dtype: ${as_torch_dtype:torch.float32}

diff --git a/src/otx/recipe/classification/h_label_cls/mobilenet_v3_large.yaml b/src/otx/recipe/classification/h_label_cls/mobilenet_v3_large.yaml
@@ -63,6 +63,7 @@ overrides:
           - class_path: otx.core.data.transform_libs.torchvision.RandomFlip
             init_args:
               prob: 0.5
+              is_numpy_to_tvtensor: true
           - class_path: torchvision.transforms.v2.ToDtype
             init_args:
               dtype: ${as_torch_dtype:torch.float32}
@@ -80,7 +81,7 @@ overrides:
           - class_path: otx.core.data.transform_libs.torchvision.Resize
             init_args:
               scale: 224
-              transform_bbox: false
+              is_numpy_to_tvtensor: true
           - class_path: torchvision.transforms.v2.ToDtype
             init_args:
               dtype: ${as_torch_dtype:torch.float32}
@@ -96,7 +97,7 @@ overrides:
           - class_path: otx.core.data.transform_libs.torchvision.Resize
             init_args:
               scale: 224
-              transform_bbox: false
+              is_numpy_to_tvtensor: true
           - class_path: torchvision.transforms.v2.ToDtype
             init_args:
               dtype: ${as_torch_dtype:torch.float32}

diff --git a/src/otx/recipe/classification/multi_class_cls/deit_tiny.yaml b/src/otx/recipe/classification/multi_class_cls/deit_tiny.yaml
@@ -50,6 +50,7 @@ overrides:
             init_args:
               scale: 224
               backend: cv2
+              is_numpy_to_tvtensor: true
           - class_path: torchvision.transforms.v2.ToDtype
             init_args:
               dtype: ${as_torch_dtype:torch.float32}
@@ -67,7 +68,7 @@ overrides:
           - class_path: otx.core.data.transform_libs.torchvision.Resize
             init_args:
               scale: 224
-              transform_bbox: false
+              is_numpy_to_tvtensor: true
           - class_path: torchvision.transforms.v2.ToDtype
             init_args:
               dtype: ${as_torch_dtype:torch.float32}
@@ -83,7 +84,7 @@ overrides:
           - class_path: otx.core.data.transform_libs.torchvision.Resize
             init_args:
               scale: 224
-              transform_bbox: false
+              is_numpy_to_tvtensor: true
           - class_path: torchvision.transforms.v2.ToDtype
             init_args:
               dtype: ${as_torch_dtype:torch.float32}

diff --git a/src/otx/recipe/classification/multi_class_cls/dino_v2.yaml b/src/otx/recipe/classification/multi_class_cls/dino_v2.yaml
@@ -52,6 +52,7 @@ overrides:
             init_args:
               scale: 224
               backend: cv2
+              is_numpy_to_tvtensor: true
         sampler:
           class_path: otx.algo.samplers.balanced_sampler.BalancedSampler
       val_subset:
@@ -68,7 +69,7 @@ overrides:
           - class_path: otx.core.data.transform_libs.torchvision.Resize
             init_args:
               scale: 224
-              transform_bbox: false
+              is_numpy_to_tvtensor: true
       test_subset:
         batch_size: 64
         transforms:
@@ -83,4 +84,4 @@ overrides:
           - class_path: otx.core.data.transform_libs.torchvision.Resize
             init_args:
               scale: 224
-              transform_bbox: false
+              is_numpy_to_tvtensor: true
diff --git a/src/otx/recipe/classification/multi_class_cls/efficientnet_b0.yaml b/src/otx/recipe/classification/multi_class_cls/efficientnet_b0.yaml
@@ -49,6 +49,7 @@ overrides:
           - class_path: otx.core.data.transform_libs.torchvision.RandomResizedCrop
             init_args:
               scale: 224
+              is_numpy_to_tvtensor: true
           - class_path: torchvision.transforms.v2.ToDtype
             init_args:
               dtype: ${as_torch_dtype:torch.float32}
@@ -67,6 +68,7 @@ overrides:
           - class_path: otx.core.data.transform_libs.torchvision.Resize
             init_args:
               scale: 224
+              is_numpy_to_tvtensor: true
           - class_path: torchvision.transforms.v2.ToDtype
             init_args:
               dtype: ${as_torch_dtype:torch.float32}
@@ -82,7 +84,7 @@ overrides:
           - class_path: otx.core.data.transform_libs.torchvision.Resize
             init_args:
               scale: 224
-              transform_bbox: False
+              is_numpy_to_tvtensor: true
           - class_path: torchvision.transforms.v2.ToDtype
             init_args:
               dtype: ${as_torch_dtype:torch.float32}

diff --git a/src/otx/recipe/classification/multi_class_cls/efficientnet_v2.yaml b/src/otx/recipe/classification/multi_class_cls/efficientnet_v2.yaml
@@ -52,6 +52,7 @@ overrides:
           - class_path: otx.core.data.transform_libs.torchvision.RandomFlip
             init_args:
               prob: 0.5
+              is_numpy_to_tvtensor: true
           - class_path: torchvision.transforms.v2.ToDtype
             init_args:
               dtype: ${as_torch_dtype:torch.float32}
@@ -69,7 +70,7 @@ overrides:
           - class_path: otx.core.data.transform_libs.torchvision.Resize
             init_args:
               scale: 224
-              transform_bbox: false
+              is_numpy_to_tvtensor: true
           - class_path: torchvision.transforms.v2.ToDtype
             init_args:
               dtype: ${as_torch_dtype:torch.float32}
@@ -85,7 +86,7 @@ overrides:
           - class_path: otx.core.data.transform_libs.torchvision.Resize
             init_args:
               scale: 224
-              transform_bbox: false
+              is_numpy_to_tvtensor: true
           - class_path: torchvision.transforms.v2.ToDtype
             init_args:
               dtype: ${as_torch_dtype:torch.float32}

diff --git a/src/otx/recipe/classification/multi_class_cls/mobilenet_v3_large.yaml b/src/otx/recipe/classification/multi_class_cls/mobilenet_v3_large.yaml
@@ -57,6 +57,7 @@ overrides:
           - class_path: otx.core.data.transform_libs.torchvision.RandomFlip
             init_args:
               prob: 0.5
+              is_numpy_to_tvtensor: true
           - class_path: torchvision.transforms.v2.ToDtype
             init_args:
               dtype: ${as_torch_dtype:torch.float32}
@@ -74,7 +75,7 @@ overrides:
           - class_path: otx.core.data.transform_libs.torchvision.Resize
             init_args:
               scale: 224
-              transform_bbox: false
+              is_numpy_to_tvtensor: true
           - class_path: torchvision.transforms.v2.ToDtype
             init_args:
               dtype: ${as_torch_dtype:torch.float32}
@@ -90,7 +91,7 @@ overrides:
           - class_path: otx.core.data.transform_libs.torchvision.Resize
             init_args:
               scale: 224
-              transform_bbox: false
+              is_numpy_to_tvtensor: true
           - class_path: torchvision.transforms.v2.ToDtype
             init_args:
               dtype: ${as_torch_dtype:torch.float32}

diff --git a/src/otx/recipe/classification/multi_label_cls/deit_tiny.yaml b/src/otx/recipe/classification/multi_label_cls/deit_tiny.yaml
@@ -54,6 +54,7 @@ overrides:
             init_args:
               scale: 224
               backend: cv2
+              is_numpy_to_tvtensor: true
           - class_path: torchvision.transforms.v2.ToDtype
             init_args:
               dtype: ${as_torch_dtype:torch.float32}
@@ -71,6 +72,7 @@ overrides:
           - class_path: otx.core.data.transform_libs.torchvision.Resize
             init_args:
               scale: 224
+              is_numpy_to_tvtensor: true
           - class_path: torchvision.transforms.v2.ToDtype
             init_args:
               dtype: ${as_torch_dtype:torch.float32}
@@ -86,7 +88,7 @@ overrides:
           - class_path: otx.core.data.transform_libs.torchvision.Resize
             init_args:
               scale: 224
-              transform_bbox: false
+              is_numpy_to_tvtensor: true
           - class_path: torchvision.transforms.v2.ToDtype
             init_args:
               dtype: ${as_torch_dtype:torch.float32}

diff --git a/src/otx/recipe/classification/multi_label_cls/efficientnet_b0.yaml b/src/otx/recipe/classification/multi_label_cls/efficientnet_b0.yaml
@@ -53,6 +53,7 @@ overrides:
             init_args:
               scale: 224
               backend: cv2
+              is_numpy_to_tvtensor: true
           - class_path: torchvision.transforms.v2.ToDtype
             init_args:
               dtype: ${as_torch_dtype:torch.float32}
@@ -71,6 +72,7 @@ overrides:
           - class_path: otx.core.data.transform_libs.torchvision.Resize
             init_args:
               scale: 224
+              is_numpy_to_tvtensor: true
           - class_path: torchvision.transforms.v2.ToDtype
             init_args:
               dtype: ${as_torch_dtype:torch.float32}
@@ -86,7 +88,7 @@ overrides:
           - class_path: otx.core.data.transform_libs.torchvision.Resize
             init_args:
               scale: 224
-              transform_bbox: false
+              is_numpy_to_tvtensor: true
           - class_path: torchvision.transforms.v2.ToDtype
             init_args:
               dtype: ${as_torch_dtype:torch.float32}

diff --git a/src/otx/recipe/classification/multi_label_cls/efficientnet_v2.yaml b/src/otx/recipe/classification/multi_label_cls/efficientnet_v2.yaml
@@ -56,6 +56,7 @@ overrides:
           - class_path: otx.core.data.transform_libs.torchvision.RandomFlip
             init_args:
               prob: 0.5
+              is_numpy_to_tvtensor: true
           - class_path: torchvision.transforms.v2.ToDtype
             init_args:
               dtype: ${as_torch_dtype:torch.float32}
@@ -73,7 +74,7 @@ overrides:
           - class_path: otx.core.data.transform_libs.torchvision.Resize
             init_args:
               scale: 224
-              transform_bbox: false
+              is_numpy_to_tvtensor: true
           - class_path: torchvision.transforms.v2.ToDtype
             init_args:
               dtype: ${as_torch_dtype:torch.float32}
@@ -89,7 +90,7 @@ overrides:
           - class_path: otx.core.data.transform_libs.torchvision.Resize
             init_args:
               scale: 224
-              transform_bbox: false
+              is_numpy_to_tvtensor: true
           - class_path: torchvision.transforms.v2.ToDtype
             init_args:
               dtype: ${as_torch_dtype:torch.float32}

diff --git a/src/otx/recipe/classification/multi_label_cls/mobilenet_v3_large.yaml b/src/otx/recipe/classification/multi_label_cls/mobilenet_v3_large.yaml
@@ -61,6 +61,7 @@ overrides:
           - class_path: otx.core.data.transform_libs.torchvision.RandomFlip
             init_args:
               prob: 0.5
+              is_numpy_to_tvtensor: true
           - class_path: torchvision.transforms.v2.ToDtype
             init_args:
               dtype: ${as_torch_dtype:torch.float32}
@@ -78,7 +79,7 @@ overrides:
           - class_path: otx.core.data.transform_libs.torchvision.Resize
             init_args:
               scale: 224
-              transform_bbox: false
+              is_numpy_to_tvtensor: true
           - class_path: torchvision.transforms.v2.ToDtype
             init_args:
               dtype: ${as_torch_dtype:torch.float32}
@@ -94,7 +95,7 @@ overrides:
           - class_path: otx.core.data.transform_libs.torchvision.Resize
             init_args:
               scale: 224
-              transform_bbox: false
+              is_numpy_to_tvtensor: true
           - class_path: torchvision.transforms.v2.ToDtype
             init_args:
               dtype: ${as_torch_dtype:torch.float32}

diff --git a/src/otx/recipe/detection/atss_mobilenetv2.yaml b/src/otx/recipe/detection/atss_mobilenetv2.yaml
@@ -55,9 +55,11 @@ overrides:
                 - 992
                 - 736
               keep_ratio: false
+              transform_bbox: true
           - class_path: otx.core.data.transform_libs.torchvision.RandomFlip
             init_args:
               prob: 0.5
+              is_numpy_to_tvtensor: true
           - class_path: torchvision.transforms.v2.ToDtype
             init_args:
               dtype: ${as_torch_dtype:torch.float32}
@@ -79,6 +81,7 @@ overrides:
                 - 736
               keep_ratio: false
               transform_bbox: false
+              is_numpy_to_tvtensor: true
           - class_path: torchvision.transforms.v2.ToDtype
             init_args:
               dtype: ${as_torch_dtype:torch.float32}
@@ -98,6 +101,7 @@ overrides:
                 - 736
               keep_ratio: false
               transform_bbox: false
+              is_numpy_to_tvtensor: true
           - class_path: torchvision.transforms.v2.ToDtype
             init_args:
               dtype: ${as_torch_dtype:torch.float32}

diff --git a/src/otx/recipe/detection/atss_mobilenetv2_tile.yaml b/src/otx/recipe/detection/atss_mobilenetv2_tile.yaml
@@ -58,9 +58,11 @@ overrides:
                 - 992
                 - 736
               keep_ratio: false
+              transform_bbox: true
           - class_path: otx.core.data.transform_libs.torchvision.RandomFlip
             init_args:
               prob: 0.5
+              is_numpy_to_tvtensor: true
           - class_path: torchvision.transforms.v2.ToDtype
             init_args:
               dtype: ${as_torch_dtype:torch.float32}
@@ -82,6 +84,7 @@ overrides:
                 - 736
               keep_ratio: false
               transform_bbox: false
+              is_numpy_to_tvtensor: true
           - class_path: torchvision.transforms.v2.ToDtype
             init_args:
               dtype: ${as_torch_dtype:torch.float32}
@@ -101,6 +104,7 @@ overrides:
                 - 736
               keep_ratio: false
               transform_bbox: false
+              is_numpy_to_tvtensor: true
           - class_path: torchvision.transforms.v2.ToDtype
             init_args:
               dtype: ${as_torch_dtype:torch.float32}