Make NP TV conversion configurable (#3429)

openvinotoolkit · Apr 30, 2024 · 0f0f943 · 0f0f943
1 parent 88db52e
commit 0f0f943
Show file tree

Hide file tree

Showing 34 changed files with 95 additions and 1 deletion.
diff --git a/src/otx/core/config/data.py b/src/otx/core/config/data.py
@@ -61,6 +61,7 @@ class SubsetConfig:
     transform_lib_type: TransformLibType = TransformLibType.TORCHVISION
     num_workers: int = 2
     sampler: SamplerConfig = field(default_factory=lambda: SamplerConfig())
+    to_tv_image: bool = True
 
 
 @dataclass

diff --git a/src/otx/core/data/dataset/anomaly.py b/src/otx/core/data/dataset/anomaly.py
@@ -43,6 +43,7 @@ def __init__(
         max_refetch: int = 1000,
         image_color_channel: ImageColorChannel = ImageColorChannel.RGB,
         stack_images: bool = True,
+        to_tv_image: bool = True,
     ) -> None:
         self.task_type = task_type
         super().__init__(
@@ -53,6 +54,7 @@ def __init__(
             max_refetch,
             image_color_channel,
             stack_images,
+            to_tv_image,
         )
         self.label_info = AnomalyLabelInfo()
 

diff --git a/src/otx/core/data/dataset/base.py b/src/otx/core/data/dataset/base.py
@@ -75,6 +75,7 @@ def __init__(
         max_refetch: int = 1000,
         image_color_channel: ImageColorChannel = ImageColorChannel.RGB,
         stack_images: bool = True,
+        to_tv_image: bool = True,
     ) -> None:
         self.dm_subset = dm_subset
         self.transforms = transforms
@@ -83,6 +84,7 @@ def __init__(
         self.max_refetch = max_refetch
         self.image_color_channel = image_color_channel
         self.stack_images = stack_images
+        self.to_tv_image = to_tv_image
         self.label_info = LabelInfo.from_dm_label_groups(self.dm_subset.categories()[AnnotationType.label])
 
     def __len__(self) -> int:
@@ -93,7 +95,8 @@ def _sample_another_idx(self) -> int:
 
     def _apply_transforms(self, entity: T_OTXDataEntity) -> T_OTXDataEntity | None:
         if isinstance(self.transforms, Compose):
-            entity = entity.to_tv_image()
+            if self.to_tv_image:
+                entity = entity.to_tv_image()
             return self.transforms(entity)
         if isinstance(self.transforms, Iterable):
             return self._iterable_transforms(entity)

diff --git a/src/otx/core/data/dataset/segmentation.py b/src/otx/core/data/dataset/segmentation.py
@@ -143,6 +143,7 @@ def __init__(
         max_refetch: int = 1000,
         image_color_channel: ImageColorChannel = ImageColorChannel.RGB,
         stack_images: bool = True,
+        to_tv_image: bool = True,
         ignore_index: int = 255,
     ) -> None:
         super().__init__(
@@ -153,6 +154,7 @@ def __init__(
             max_refetch,
             image_color_channel,
             stack_images,
+            to_tv_image,
         )
         self.label_info = SegLabelInfo(
             label_names=self.label_info.label_names,

diff --git a/src/otx/core/data/factory.py b/src/otx/core/data/factory.py
@@ -82,6 +82,7 @@ def create(  # noqa: PLR0911 # ignore too many return statements
             "mem_cache_img_max_size": cfg_data_module.mem_cache_img_max_size,
             "image_color_channel": cfg_data_module.image_color_channel,
             "stack_images": cfg_data_module.stack_images,
+            "to_tv_image": cfg_subset.to_tv_image,
         }
 
         if task in (

diff --git a/src/otx/engine/utils/auto_configurator.py b/src/otx/engine/utils/auto_configurator.py
@@ -383,6 +383,7 @@ def update_ov_subset_pipeline(self, datamodule: OTXDataModule, subset: str = "te
         subset_config.batch_size = ov_test_config["batch_size"]
         subset_config.transform_lib_type = ov_test_config["transform_lib_type"]
         subset_config.transforms = ov_test_config["transforms"]
+        subset_config.to_tv_image = ov_test_config["to_tv_image"]
         data_configuration.tile_config.enable_tiler = False
         msg = (
             f"For OpenVINO IR models, Update the following {subset} \n"

diff --git a/src/otx/recipe/_base_/data/mmaction_base.yaml b/src/otx/recipe/_base_/data/mmaction_base.yaml
@@ -11,6 +11,7 @@ config:
   train_subset:
     subset_name: train
     transform_lib_type: MMACTION
+    to_tv_image: False
     batch_size: 8
     num_workers: 2
     transforms:
@@ -46,6 +47,7 @@ config:
   val_subset:
     subset_name: val
     transform_lib_type: MMACTION
+    to_tv_image: False
     batch_size: 8
     num_workers: 2
     transforms:
@@ -76,6 +78,7 @@ config:
   test_subset:
     subset_name: test
     transform_lib_type: MMACTION
+    to_tv_image: False
     batch_size: 8
     num_workers: 2
     transforms:

diff --git a/src/otx/recipe/_base_/data/mmdet_base.yaml b/src/otx/recipe/_base_/data/mmdet_base.yaml
@@ -11,6 +11,7 @@ config:
     batch_size: 8
     num_workers: 2
     transform_lib_type: MMDET
+    to_tv_image: False
     transforms:
       - backend_args: null
         type: LoadImageFromFile
@@ -31,6 +32,7 @@ config:
     num_workers: 2
     batch_size: 1
     transform_lib_type: MMDET
+    to_tv_image: False
     transforms:
       - backend_args: null
         type: LoadImageFromFile
@@ -55,6 +57,7 @@ config:
     num_workers: 2
     batch_size: 1
     transform_lib_type: MMDET
+    to_tv_image: False
     transforms:
       - backend_args: null
         type: LoadImageFromFile

diff --git a/src/otx/recipe/_base_/data/mmpretrain_base.yaml b/src/otx/recipe/_base_/data/mmpretrain_base.yaml
@@ -13,6 +13,7 @@ config:
     num_workers: 2
     batch_size: 64
     transform_lib_type: MMPRETRAIN
+    to_tv_image: False
     transforms:
       - type: LoadImageFromFile
       - backend: cv2
@@ -26,6 +27,7 @@ config:
     num_workers: 2
     batch_size: 64
     transform_lib_type: MMPRETRAIN
+    to_tv_image: False
     transforms:
       - type: LoadImageFromFile
       - backend: cv2
@@ -42,6 +44,7 @@ config:
     num_workers: 2
     batch_size: 64
     transform_lib_type: MMPRETRAIN
+    to_tv_image: False
     transforms:
       - type: LoadImageFromFile
       - backend: cv2

diff --git a/src/otx/recipe/_base_/data/mmseg_base.yaml b/src/otx/recipe/_base_/data/mmseg_base.yaml
@@ -12,6 +12,7 @@ config:
     batch_size: 8
     num_workers: 4
     transform_lib_type: TORCHVISION
+    to_tv_image: True
     transforms:
       - class_path: torchvision.transforms.v2.RandomResizedCrop
         init_args:
@@ -44,6 +45,7 @@ config:
     batch_size: 8
     num_workers: 4
     transform_lib_type: TORCHVISION
+    to_tv_image: True
     transforms:
       - class_path: torchvision.transforms.v2.Resize
         init_args:
@@ -65,6 +67,7 @@ config:
     num_workers: 4
     batch_size: 8
     transform_lib_type: TORCHVISION
+    to_tv_image: True
     transforms:
       - class_path: torchvision.transforms.v2.Resize
         init_args:

diff --git a/src/otx/recipe/_base_/data/torchvision_base.yaml b/src/otx/recipe/_base_/data/torchvision_base.yaml
@@ -9,6 +9,7 @@ config:
   train_subset:
     subset_name: train
     transform_lib_type: TORCHVISION
+    to_tv_image: True
     transforms:
       - class_path: torchvision.transforms.v2.ToImage
     batch_size: 1
@@ -18,6 +19,7 @@ config:
   val_subset:
     subset_name: val
     transform_lib_type: TORCHVISION
+    to_tv_image: True
     transforms:
       - class_path: torchvision.transforms.v2.ToImage
     batch_size: 1
@@ -27,6 +29,7 @@ config:
   test_subset:
     subset_name: test
     transform_lib_type: TORCHVISION
+    to_tv_image: True
     transforms:
       - class_path: torchvision.transforms.v2.ToImage
     batch_size: 1

diff --git a/src/otx/recipe/classification/h_label_cls/deit_tiny.yaml b/src/otx/recipe/classification/h_label_cls/deit_tiny.yaml
@@ -50,6 +50,7 @@ overrides:
       data_format: datumaro
       train_subset:
         batch_size: 64
+        to_tv_image: False
         transforms:
           - class_path: otx.core.data.transform_libs.torchvision.RandomResizedCrop
             init_args:
@@ -67,6 +68,7 @@ overrides:
           class_path: otx.algo.samplers.balanced_sampler.BalancedSampler
       val_subset:
         batch_size: 64
+        to_tv_image: False
         transforms:
           - class_path: otx.core.data.transform_libs.torchvision.Resize
             init_args:
@@ -81,6 +83,7 @@ overrides:
               std: [58.395, 57.12, 57.375]
       test_subset:
         batch_size: 64
+        to_tv_image: False
         transforms:
           - class_path: otx.core.data.transform_libs.torchvision.Resize
             init_args:

diff --git a/src/otx/recipe/classification/h_label_cls/efficientnet_b0.yaml b/src/otx/recipe/classification/h_label_cls/efficientnet_b0.yaml
@@ -49,6 +49,7 @@ overrides:
       data_format: datumaro
       train_subset:
         batch_size: 64
+        to_tv_image: False
         transforms:
           - class_path: otx.core.data.transform_libs.torchvision.RandomResizedCrop
             init_args:
@@ -66,6 +67,7 @@ overrides:
           class_path: otx.algo.samplers.balanced_sampler.BalancedSampler
       val_subset:
         batch_size: 64
+        to_tv_image: False
         transforms:
           # TODO(harimkang): Need to revisit validation pipeline
           - class_path: otx.core.data.transform_libs.torchvision.Resize
@@ -81,6 +83,7 @@ overrides:
               std: [58.395, 57.12, 57.375]
       test_subset:
         batch_size: 64
+        to_tv_image: False
         transforms:
           - class_path: otx.core.data.transform_libs.torchvision.Resize
             init_args:

diff --git a/src/otx/recipe/classification/h_label_cls/efficientnet_v2.yaml b/src/otx/recipe/classification/h_label_cls/efficientnet_v2.yaml
@@ -49,6 +49,7 @@ overrides:
       data_format: datumaro
       train_subset:
         batch_size: 64
+        to_tv_image: False
         transforms:
           - class_path: otx.core.data.transform_libs.torchvision.RandomResizedCrop
             init_args:
@@ -69,6 +70,7 @@ overrides:
           class_path: otx.algo.samplers.balanced_sampler.BalancedSampler
       val_subset:
         batch_size: 64
+        to_tv_image: False
         transforms:
           - class_path: otx.core.data.transform_libs.torchvision.Resize
             init_args:
@@ -84,6 +86,7 @@ overrides:
               std: [58.395, 57.12, 57.375]
       test_subset:
         batch_size: 64
+        to_tv_image: False
         transforms:
           - class_path: otx.core.data.transform_libs.torchvision.Resize
             init_args:

diff --git a/src/otx/recipe/classification/h_label_cls/mobilenet_v3_large.yaml b/src/otx/recipe/classification/h_label_cls/mobilenet_v3_large.yaml
@@ -54,6 +54,7 @@ overrides:
       data_format: datumaro
       train_subset:
         batch_size: 64
+        to_tv_image: False
         transforms:
           - class_path: otx.core.data.transform_libs.torchvision.RandomResizedCrop
             init_args:
@@ -74,6 +75,7 @@ overrides:
           class_path: otx.algo.samplers.balanced_sampler.BalancedSampler
       val_subset:
         batch_size: 64
+        to_tv_image: False
         transforms:
           - class_path: otx.core.data.transform_libs.torchvision.Resize
             init_args:
@@ -89,6 +91,7 @@ overrides:
               std: [58.395, 57.12, 57.375]
       test_subset:
         batch_size: 64
+        to_tv_image: False
         transforms:
           - class_path: otx.core.data.transform_libs.torchvision.Resize
             init_args:

diff --git a/src/otx/recipe/classification/multi_class_cls/deit_tiny.yaml b/src/otx/recipe/classification/multi_class_cls/deit_tiny.yaml
@@ -44,6 +44,7 @@ overrides:
       stack_images: True
       train_subset:
         batch_size: 64
+        to_tv_image: False
         transforms:
           - class_path: otx.core.data.transform_libs.torchvision.RandomResizedCrop
             init_args:
@@ -61,6 +62,7 @@ overrides:
           class_path: otx.algo.samplers.balanced_sampler.BalancedSampler
       val_subset:
         batch_size: 64
+        to_tv_image: False
         transforms:
           - class_path: otx.core.data.transform_libs.torchvision.Resize
             init_args:
@@ -76,6 +78,7 @@ overrides:
               std: [58.395, 57.12, 57.375]
       test_subset:
         batch_size: 64
+        to_tv_image: False
         transforms:
           - class_path: otx.core.data.transform_libs.torchvision.Resize
             init_args:

diff --git a/src/otx/recipe/classification/multi_class_cls/efficientnet_b0.yaml b/src/otx/recipe/classification/multi_class_cls/efficientnet_b0.yaml
@@ -44,6 +44,7 @@ overrides:
       stack_images: True
       train_subset:
         batch_size: 64
+        to_tv_image: False
         transforms:
           - class_path: otx.core.data.transform_libs.torchvision.RandomResizedCrop
             init_args:
@@ -60,6 +61,7 @@ overrides:
           class_path: otx.algo.samplers.balanced_sampler.BalancedSampler
       val_subset:
         batch_size: 64
+        to_tv_image: False
         transforms:
           # TODO(harimkang): Need to revisit validation pipeline
           - class_path: otx.core.data.transform_libs.torchvision.Resize
@@ -75,6 +77,7 @@ overrides:
               std: [58.395, 57.12, 57.375]
       test_subset:
         batch_size: 64
+        to_tv_image: False
         transforms:
           - class_path: otx.core.data.transform_libs.torchvision.Resize
             init_args:

diff --git a/src/otx/recipe/classification/multi_class_cls/efficientnet_v2.yaml b/src/otx/recipe/classification/multi_class_cls/efficientnet_v2.yaml
@@ -43,6 +43,7 @@ overrides:
       stack_images: True
       train_subset:
         batch_size: 64
+        to_tv_image: False
         transforms:
           - class_path: otx.core.data.transform_libs.torchvision.RandomResizedCrop
             init_args:
@@ -63,6 +64,7 @@ overrides:
           class_path: otx.algo.samplers.balanced_sampler.BalancedSampler
       val_subset:
         batch_size: 64
+        to_tv_image: False
         transforms:
           - class_path: otx.core.data.transform_libs.torchvision.Resize
             init_args:
@@ -78,6 +80,7 @@ overrides:
               std: [58.395, 57.12, 57.375]
       test_subset:
         batch_size: 64
+        to_tv_image: False
         transforms:
           - class_path: otx.core.data.transform_libs.torchvision.Resize
             init_args:

diff --git a/src/otx/recipe/classification/multi_class_cls/mobilenet_v3_large.yaml b/src/otx/recipe/classification/multi_class_cls/mobilenet_v3_large.yaml
@@ -48,6 +48,7 @@ overrides:
       stack_images: True
       train_subset:
         batch_size: 64
+        to_tv_image: False
         transforms:
           - class_path: otx.core.data.transform_libs.torchvision.RandomResizedCrop
             init_args:
@@ -68,6 +69,7 @@ overrides:
           class_path: otx.algo.samplers.balanced_sampler.BalancedSampler
       val_subset:
         batch_size: 64
+        to_tv_image: False
         transforms:
           - class_path: otx.core.data.transform_libs.torchvision.Resize
             init_args:
@@ -83,6 +85,7 @@ overrides:
               std: [58.395, 57.12, 57.375]
       test_subset:
         batch_size: 64
+        to_tv_image: False
         transforms:
           - class_path: otx.core.data.transform_libs.torchvision.Resize
             init_args: