diff --git a/otx/core/data/adapter/detection_dataset_adapter.py b/otx/core/data/adapter/detection_dataset_adapter.py index 3425a543a99..76e424716d9 100644 --- a/otx/core/data/adapter/detection_dataset_adapter.py +++ b/otx/core/data/adapter/detection_dataset_adapter.py @@ -13,6 +13,7 @@ from otx.api.entities.datasets import DatasetEntity from otx.api.entities.image import Image from otx.api.entities.model_template import TaskType +from otx.api.entities.subset import Subset from otx.core.data.adapter.base_dataset_adapter import BaseDatasetAdapter @@ -48,8 +49,13 @@ def get_otx_dataset(self) -> DatasetEntity: if ann.label not in used_labels: used_labels.append(ann.label) - dataset_item = DatasetItemEntity(image, self._get_ann_scene_entity(shapes), subset=subset) - dataset_items.append(dataset_item) + if ( + len(shapes) > 0 + or subset == Subset.UNLABELED + or (subset != Subset.TRAINING and len(datumaro_item.annotations) == 0) + ): + dataset_item = DatasetItemEntity(image, self._get_ann_scene_entity(shapes), subset=subset) + dataset_items.append(dataset_item) self.remove_unused_label_entities(used_labels) return DatasetEntity(items=dataset_items) diff --git a/otx/core/data/adapter/segmentation_dataset_adapter.py b/otx/core/data/adapter/segmentation_dataset_adapter.py index b24b2ab85d5..ab02c01d3c6 100644 --- a/otx/core/data/adapter/segmentation_dataset_adapter.py +++ b/otx/core/data/adapter/segmentation_dataset_adapter.py @@ -92,6 +92,7 @@ def get_otx_dataset(self) -> DatasetEntity: shapes.append(self._get_polygon_entity(d_polygon, image.width, image.height)) if d_polygon.label not in used_labels: used_labels.append(d_polygon.label) + if len(shapes) > 0 or subset == Subset.UNLABELED: dataset_item = DatasetItemEntity(image, self._get_ann_scene_entity(shapes), subset=subset) dataset_items.append(dataset_item) diff --git a/tests/assets/car_tree_bug/annotations/instances_val.json b/tests/assets/car_tree_bug/annotations/instances_val.json index c99563c75f1..3231021483d 100644 --- a/tests/assets/car_tree_bug/annotations/instances_val.json +++ b/tests/assets/car_tree_bug/annotations/instances_val.json @@ -28,7 +28,17 @@ "id": 8, "width": 1280, "height": 720, - "file_name": "Slide20.PNG", + "file_name": "Slide4.PNG", + "license": 0, + "flickr_url": "", + "coco_url": "", + "date_captured": 0 + }, + { + "id": 9, + "width": 1280, + "height": 720, + "file_name": "Slide5.PNG", "license": 0, "flickr_url": "", "coco_url": "", diff --git a/tests/assets/car_tree_bug/images/val/Slide20.PNG b/tests/assets/car_tree_bug/images/val/Slide4.PNG similarity index 100% rename from tests/assets/car_tree_bug/images/val/Slide20.PNG rename to tests/assets/car_tree_bug/images/val/Slide4.PNG diff --git a/tests/assets/car_tree_bug/images/val/Slide5.PNG b/tests/assets/car_tree_bug/images/val/Slide5.PNG new file mode 100644 index 00000000000..2f1d6099444 Binary files /dev/null and b/tests/assets/car_tree_bug/images/val/Slide5.PNG differ diff --git a/tests/unit/core/data/adapter/test_detection_adapter.py b/tests/unit/core/data/adapter/test_detection_adapter.py index 698aa51a656..51856cd05a4 100644 --- a/tests/unit/core/data/adapter/test_detection_adapter.py +++ b/tests/unit/core/data/adapter/test_detection_adapter.py @@ -3,8 +3,8 @@ # SPDX-License-Identifier: Apache-2.0 # import os -from typing import Optional +from otx.api.entities.annotation import NullAnnotationSceneEntity from otx.api.entities.datasets import DatasetEntity from otx.api.entities.label_schema import LabelSchemaEntity from otx.api.entities.model_template import TaskType @@ -20,42 +20,48 @@ class TestOTXDetectionDatasetAdapter: def setup_method(self): self.root_path = os.getcwd() + + @e2e_pytest_unit + def test_detection(self): task = "detection" - self.task_type: TaskType = TASK_NAME_TO_TASK_TYPE[task] + task_type: TaskType = TASK_NAME_TO_TASK_TYPE[task] data_root_dict: dict = TASK_NAME_TO_DATA_ROOT[task] - self.train_data_roots: str = os.path.join(self.root_path, data_root_dict["train"]) - self.val_data_roots: str = os.path.join(self.root_path, data_root_dict["val"]) - self.test_data_roots: str = os.path.join(self.root_path, data_root_dict["test"]) - self.unlabeled_data_roots: Optional[str] = None - if "unlabeled" in data_root_dict: - self.unlabeled_data_roots = os.path.join(self.root_path, data_root_dict["unlabeled"]) - - self.train_dataset_adapter = DetectionDatasetAdapter( - task_type=self.task_type, - train_data_roots=self.train_data_roots, - val_data_roots=self.val_data_roots, - unlabeled_data_roots=self.unlabeled_data_roots, - ) + train_data_roots: str = os.path.join(self.root_path, data_root_dict["train"]) + val_data_roots: str = os.path.join(self.root_path, data_root_dict["val"]) + test_data_roots: str = os.path.join(self.root_path, data_root_dict["test"]) - self.test_dataset_adapter = DetectionDatasetAdapter( - task_type=self.task_type, - test_data_roots=self.test_data_roots, + det_train_dataset_adapter = DetectionDatasetAdapter( + task_type=task_type, + train_data_roots=train_data_roots, + val_data_roots=val_data_roots, ) - @e2e_pytest_unit - def test_init(self): - assert Subset.TRAINING in self.train_dataset_adapter.dataset - assert Subset.VALIDATION in self.train_dataset_adapter.dataset - assert Subset.TESTING in self.test_dataset_adapter.dataset - if self.unlabeled_data_roots is not None: - assert Subset.UNLABELED in self.train_dataset_adapter.dataset + assert Subset.TRAINING in det_train_dataset_adapter.dataset + assert Subset.VALIDATION in det_train_dataset_adapter.dataset - @e2e_pytest_unit - def test_get_otx_dataset(self): - assert isinstance(self.train_dataset_adapter.get_otx_dataset(), DatasetEntity) - assert isinstance(self.test_dataset_adapter.get_otx_dataset(), DatasetEntity) + det_train_dataset = det_train_dataset_adapter.get_otx_dataset() + det_train_label_schema = det_train_dataset_adapter.get_label_schema() + assert isinstance(det_train_dataset, DatasetEntity) + assert isinstance(det_train_label_schema, LabelSchemaEntity) + + # In the test data, there is a empty_label image. + # So, has_empty_label should be True + has_empty_label = False + for train_data in det_train_dataset: + if isinstance(train_data.annotation_scene, NullAnnotationSceneEntity): + has_empty_label = True + assert has_empty_label is True + + det_test_dataset_adapter = DetectionDatasetAdapter( + task_type=task_type, + test_data_roots=test_data_roots, + ) + + assert Subset.TESTING in det_test_dataset_adapter.dataset + assert isinstance(det_test_dataset_adapter.get_otx_dataset(), DatasetEntity) + assert isinstance(det_test_dataset_adapter.get_label_schema(), LabelSchemaEntity) @e2e_pytest_unit def test_instance_segmentation(self): @@ -77,8 +83,18 @@ def test_instance_segmentation(self): assert Subset.TRAINING in instance_seg_train_dataset_adapter.dataset assert Subset.VALIDATION in instance_seg_train_dataset_adapter.dataset - assert isinstance(instance_seg_train_dataset_adapter.get_otx_dataset(), DatasetEntity) - assert isinstance(instance_seg_train_dataset_adapter.get_label_schema(), LabelSchemaEntity) + instance_seg_otx_train_data = instance_seg_train_dataset_adapter.get_otx_dataset() + instance_seg_otx_train_label_schema = instance_seg_train_dataset_adapter.get_label_schema() + assert isinstance(instance_seg_otx_train_data, DatasetEntity) + assert isinstance(instance_seg_otx_train_label_schema, LabelSchemaEntity) + + # In the test data, there is a empty_label image. + # So, has_empty_label should be True + has_empty_label = False + for train_data in instance_seg_otx_train_data: + if isinstance(train_data.annotation_scene, NullAnnotationSceneEntity): + has_empty_label = True + assert has_empty_label is True instance_seg_test_dataset_adapter = DetectionDatasetAdapter( task_type=task_type,