From 711d6b5499be8dec1da5f6de80af231f96a63c5a Mon Sep 17 00:00:00 2001 From: Wonju Lee Date: Fri, 26 Apr 2024 15:23:20 +0900 Subject: [PATCH] Support DOTA data format for oriented object detection task (#1475) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ### Summary ### How to test ### Checklist - [x] I have added unit tests to cover my changes.​ - [ ] I have added integration tests to cover my changes.​ - [x] I have added the description of my changes into [CHANGELOG](https://github.com/openvinotoolkit/datumaro/blob/develop/CHANGELOG.md).​ - [x] I have updated the [documentation](https://github.com/openvinotoolkit/datumaro/tree/develop/docs) accordingly ### License - [x] I submit _my code changes_ under the same [MIT License](https://github.com/openvinotoolkit/datumaro/blob/develop/LICENSE) that covers the project. Feel free to contact the maintainers if that's a concern. - [x] I have updated the license header for each file (see an example below). ```python # Copyright (C) 2024 Intel Corporation # # SPDX-License-Identifier: MIT ``` --- CHANGELOG.md | 2 + docs/source/docs/data-formats/formats/dota.md | 47 ++++ .../docs/data-formats/formats/index.rst | 5 + src/datumaro/plugins/data_formats/dota.py | 260 ++++++++++++++++++ .../plugins/data_formats/roboflow/importer.py | 16 ++ src/datumaro/plugins/specs.json | 20 ++ .../dota_dataset/train/images/train_001.jpg | Bin 0 -> 631 bytes .../dota_dataset/train/images/train_002.jpg | Bin 0 -> 631 bytes .../dota_dataset/train/labelTxt/train_001.txt | 1 + .../dota_dataset/train/labelTxt/train_002.txt | 1 + .../dota_dataset/val/images/val_001.jpg | Bin 0 -> 631 bytes .../dota_dataset/val/labelTxt/val_001.txt | 2 + tests/unit/data_formats/test_dota.py | 153 +++++++++++ tests/unit/data_formats/test_roboflow.py | 6 +- 14 files changed, 510 insertions(+), 3 deletions(-) create mode 100644 docs/source/docs/data-formats/formats/dota.md create mode 100644 src/datumaro/plugins/data_formats/dota.py create mode 100644 tests/assets/dota_dataset/train/images/train_001.jpg create mode 100644 tests/assets/dota_dataset/train/images/train_002.jpg create mode 100644 tests/assets/dota_dataset/train/labelTxt/train_001.txt create mode 100644 tests/assets/dota_dataset/train/labelTxt/train_002.txt create mode 100644 tests/assets/dota_dataset/val/images/val_001.jpg create mode 100644 tests/assets/dota_dataset/val/labelTxt/val_001.txt create mode 100644 tests/unit/data_formats/test_dota.py diff --git a/CHANGELOG.md b/CHANGELOG.md index 9e08b90b7a..ed01d4907a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 () - Add AnnotationType.rotated_bbox for oriented object detection () +- Add DOTA data format for oriented object detection task + () ### Enhancements - Fix ambiguous COCO format detector diff --git a/docs/source/docs/data-formats/formats/dota.md b/docs/source/docs/data-formats/formats/dota.md new file mode 100644 index 0000000000..6e05234c0c --- /dev/null +++ b/docs/source/docs/data-formats/formats/dota.md @@ -0,0 +1,47 @@ +# DOTA +## Format specification +[DOTA](https://captain-whu.github.io/DOTA/dataset.html) is a collection of 2K aerial images for a rotated object detection task. +Each objects are annotated with 4 coordinates for representing oriented bounding boxes, a label among 15 classes (baseball-diamond, basketball-court, bridge, ground-track-field, harbor, helicopter, large-vehicle, plane, roundabout, ship, small-vehicle, soccer-ball-field, storage-tank, swimming-pool, tennis-court) and a difficulty. + +## Import DOTA dataset +A Datumaro project with a DOTA source can be created in the following way: + +```bash +datum project create +datum project import --format dota +``` + +Or, using Python API: + +```python +import datumaro as dm + +dataset = dm.Dataset.import_from('', 'dota') +``` + +## Directory structure + +``` +dota/ +├── train # Subset directory +│ ├── images +│ │ ├── img1.jpg # Image file +│ │ ├── img2.jpg # Image file +│ │ └── ... +│ ├── labelTxt +│ │ ├── img1.txt # Annotation file +│ │ ├── img2.txt # Annotation file +│ │ └── ... +├── val # Subset directory +│ ├── images +│ │ ├── img3.jpg # Image file +│ │ ├── img4.jpg # Image file +│ │ └── ... +│ ├── labelTxt +│ │ ├── img3.txt # Annotation file +│ │ ├── img4.txt # Annotation file +│ │ └── ... +└── ... +``` +## Annotation Txt file +The example of `.txt` is given by [DOTA annotation format](https://captain-whu.github.io/DOTA/dataset.html). diff --git a/docs/source/docs/data-formats/formats/index.rst b/docs/source/docs/data-formats/formats/index.rst index d8f4b4514c..cfe81aa568 100644 --- a/docs/source/docs/data-formats/formats/index.rst +++ b/docs/source/docs/data-formats/formats/index.rst @@ -21,6 +21,7 @@ Supported Data Formats cvat datumaro_binary datumaro + dota icdar image_zip imagenet @@ -104,6 +105,10 @@ Supported Data Formats * `Format specification `_ * `Dataset example `_ * `Format documentation `_ +* DOTA (``detection_rotated``) + * `Format specification `_ + * `Dataset example `_ + * `Format documentation `_ * ICDAR13/15 (``word recognition``, ``text localization``, ``text segmentation``) * `Format specification `_ * `Dataset example `_ diff --git a/src/datumaro/plugins/data_formats/dota.py b/src/datumaro/plugins/data_formats/dota.py new file mode 100644 index 0000000000..d499791937 --- /dev/null +++ b/src/datumaro/plugins/data_formats/dota.py @@ -0,0 +1,260 @@ +# Copyright (C) 2024 Intel Corporation +# +# SPDX-License-Identifier: MIT + +from __future__ import annotations + +import logging as log +import os +import os.path as osp +from typing import Any, Dict, List, Optional, Type, TypeVar + +from datumaro.components.annotation import Annotation, AnnotationType, LabelCategories, RotatedBbox +from datumaro.components.dataset_base import DEFAULT_SUBSET_NAME, DatasetItem, SubsetBase +from datumaro.components.errors import ( + DatasetExportError, + DatasetImportError, + InvalidAnnotationError, + MediaTypeError, +) +from datumaro.components.exporter import Exporter +from datumaro.components.format_detection import FormatDetectionConfidence, FormatDetectionContext +from datumaro.components.importer import ImportContext, Importer +from datumaro.components.media import Image +from datumaro.components.task import TaskAnnotationMapping +from datumaro.util.image import IMAGE_EXTENSIONS +from datumaro.util.os_util import find_files + +T = TypeVar("T") + + +class DotaFormat: + ANNOTATION_DIR = "labelTxt" + IMAGE_DIR = "images" + + +class DotaBase(SubsetBase): + def __init__( + self, + path: Optional[List[str]] = None, + *, + img_path: Optional[str] = None, + subset: Optional[str] = None, + ctx: Optional[ImportContext] = None, + ) -> None: + super().__init__(subset=subset, ctx=ctx) + + if not osp.isdir(path): + raise DatasetImportError(f"Can't find annotation directory {path}") + + self._path = path + + super().__init__(subset=subset, ctx=ctx) + + self._img_files = self._load_img_files(img_path) + self._label_categories = self._load_categories(path) + self._categories = {AnnotationType.label: self._label_categories} + + self._items = self._load_items(path) + self._task_type = TaskAnnotationMapping().get_task(self._ann_types) + + def _load_img_files(self, rootpath: str) -> Dict[str, str]: + return { + self._get_fname(img_file): img_file + for img_file in find_files(rootpath, IMAGE_EXTENSIONS, recursive=True, max_depth=2) + } + + def _load_categories(self, path): + label_names = [] + for ann_file in os.listdir(path): + label_names.extend( + self._parse_annotations( + ann_file=osp.join(self._path, ann_file), only_label_names=True + ) + ) + + label_categories = LabelCategories() + for label_name in sorted(set(label_names)): + label_categories.add(label_name) + + return label_categories + + def _load_items(self, path): + items = [] + for ann_file in os.listdir(path): + fname = osp.splitext(ann_file)[0] + img = Image.from_file(path=self._img_files[fname]) + anns = self._parse_annotations( + ann_file=osp.join(self._path, ann_file), only_label_names=False + ) + items.append(DatasetItem(id=fname, subset=self._subset, media=img, annotations=anns)) + return items + + def _get_fname(self, fpath: str) -> str: + return osp.splitext(osp.basename(fpath))[0] + + def _parse_annotations( + self, + ann_file: str, + only_label_names: bool, + ) -> List[Annotation]: + lines = [] + with open(ann_file, "r", encoding="utf-8") as f: + for line in f: + line = line.strip() + if line: + lines.append(line) + + annotations = [] + for line in lines: + parts = line.split() + if len(parts) != 10: + log.debug( + f"Unexpected field count {len(parts)} in the bbox description. " + "Expected 10 fields (8 coordinates for rectangle, category, and difficulty)." + ) + continue + + label_name = self._parse_field(parts[-2], str, "label_name") + + if only_label_names: + annotations.append(label_name) + continue + + label_id, _ = self._label_categories.find(label_name) + coords = [ + ( + self._parse_field(parts[i], float, "coords"), + self._parse_field(parts[i + 1], float, "coords"), + ) + for i in range(0, 8, 2) + ] + difficulty = self._parse_field(parts[-1], int, "difficulty") + + annotations.append( + RotatedBbox.from_rectangle( + coords, label=label_id, attributes={"difficulty": difficulty} + ) + ) + self._ann_types.add(AnnotationType.rotated_bbox) + + return annotations + + @staticmethod + def _parse_field(value: str, desired_type: Type[T], field_name: str) -> T: + try: + return desired_type(value) + except Exception as e: + raise InvalidAnnotationError( + f"Can't parse {field_name} from '{value}'. Expected {desired_type}" + ) from e + + +class DotaImporter(Importer): + _ANNO_EXT = ".txt" + + @classmethod + def detect(cls, context: FormatDetectionContext) -> FormatDetectionConfidence: + context.require_file("**/" + DotaFormat.ANNOTATION_DIR + "/*" + cls._ANNO_EXT) + return FormatDetectionConfidence.MEDIUM + + @classmethod + def find_sources(cls, path: str) -> List[Dict[str, Any]]: + sources = cls._find_sources_recursive( + path=path, + ext=cls._ANNO_EXT, + dirname=DotaFormat.ANNOTATION_DIR, + extractor_name="dota", + ) + + data_paths = set() + for source in sources: + url = osp.dirname(source["url"]) + subset_name = osp.relpath(source["url"], path).split(osp.sep)[0] + data_paths.add((subset_name, url)) + + return [ + { + "url": ann_dir, + "format": "dota", + "options": { + "subset": subset, + "img_path": osp.join(path, subset, DotaFormat.IMAGE_DIR), + }, + } + for subset, ann_dir in data_paths + ] + + @classmethod + def get_file_extensions(cls) -> List[str]: + return [cls._ANNO_EXT] + + +class DotaExporter(Exporter): + DEFAULT_IMAGE_EXT = ".png" + + def _apply_impl(self): + extractor = self._extractor + save_dir = self._save_dir + + if self._extractor.media_type() and not issubclass(self._extractor.media_type(), Image): + raise MediaTypeError("Media type is not an image") + + os.makedirs(save_dir, exist_ok=True) + + label_categories = extractor.categories()[AnnotationType.label] + + subsets = self._extractor.subsets() + for subset_name, subset in subsets.items(): + if not subset_name or subset_name == DEFAULT_SUBSET_NAME: + subset_name = DEFAULT_SUBSET_NAME + + subset_dir = osp.join(save_dir, subset_name) + os.makedirs(subset_dir, exist_ok=True) + + for item in subset: + try: + self._export_media(item, subset_dir) + self._export_item_annotation(item, subset_dir, label_categories) + + except Exception as e: + self._ctx.error_policy.report_item_error(e, item_id=(item.id, item.subset)) + + def _export_media(self, item: DatasetItem, subset_dir: str) -> str: + try: + if not item.media or not (item.media.has_data or item.media.has_size): + raise DatasetExportError( + "Failed to export item '%s': " "item has no image info" % item.id + ) + + image_name = self._make_image_filename(item) + image_fpath = osp.join(subset_dir, DotaFormat.IMAGE_DIR, image_name) + + if self._save_media: + self._save_image(item, image_fpath) + + except Exception as e: + self._ctx.error_policy.report_item_error(e, item_id=(item.id, item.subset)) + + def _export_item_annotation( + self, item: DatasetItem, subset_dir: str, label_categories: LabelCategories + ) -> None: + try: + annotations = "" + for bbox in item.annotations: + if not isinstance(bbox, RotatedBbox) or bbox.label is None: + continue + coords = bbox.as_polygon() + coords = " ".join("%.2f %.2f" % (x, y) for x, y in coords) + label_name = label_categories[bbox.label].name + difficulty = bbox.attributes.get("difficulty", 0) + annotations += "%s %s %s\n" % (coords, label_name, difficulty) + + annotation_path = osp.join(subset_dir, DotaFormat.ANNOTATION_DIR, "%s.txt" % item.id) + os.makedirs(osp.dirname(annotation_path), exist_ok=True) + + with open(annotation_path, "w", encoding="utf-8") as f: + f.write(annotations) + + except Exception as e: + self._ctx.error_policy.report_item_error(e, item_id=(item.id, item.subset)) diff --git a/src/datumaro/plugins/data_formats/roboflow/importer.py b/src/datumaro/plugins/data_formats/roboflow/importer.py index 0a1a5efe2f..e091a16d04 100644 --- a/src/datumaro/plugins/data_formats/roboflow/importer.py +++ b/src/datumaro/plugins/data_formats/roboflow/importer.py @@ -4,6 +4,7 @@ import os import os.path as osp +import warnings from collections import defaultdict from glob import glob from io import TextIOWrapper @@ -194,6 +195,21 @@ class RoboflowYoloObbImporter(RoboflowYoloImporter): FORMAT_EXT = ".txt" ANN_DIR_NAME = "labelTxt/" + @classmethod + def detect(cls, context: FormatDetectionContext) -> FormatDetectionConfidence: + warnings.warn( + f"FormatDetectionConfidence of '{cls.FORMAT}' is lowered because of 'dota' format support. " + f"It will be deprecated in datumaro==1.8.0.", + DeprecationWarning, + ) + with context.require_any(): + with context.alternative(): + cls._check_ann_file( + context.require_file("**/" + cls.ANN_DIR_NAME + "*" + cls.FORMAT_EXT), context + ) + + return FormatDetectionConfidence.LOW + @classmethod def _check_ann_file_impl(cls, fp: TextIOWrapper) -> bool: for line in fp: diff --git a/src/datumaro/plugins/specs.json b/src/datumaro/plugins/specs.json index ce2b1b7d3c..4c9afb5426 100644 --- a/src/datumaro/plugins/specs.json +++ b/src/datumaro/plugins/specs.json @@ -459,6 +459,26 @@ ] } }, + { + "import_path": "datumaro.plugins.data_formats.dota.DotaBase", + "plugin_name": "dota", + "plugin_type": "DatasetBase" + }, + { + "import_path": "datumaro.plugins.data_formats.dota.DotaExporter", + "plugin_name": "dota", + "plugin_type": "Exporter" + }, + { + "import_path": "datumaro.plugins.data_formats.dota.DotaImporter", + "plugin_name": "dota", + "plugin_type": "Importer", + "metadata": { + "file_extensions": [ + ".txt" + ] + } + }, { "import_path": "datumaro.plugins.data_formats.icdar.base.IcdarTextLocalizationBase", "plugin_name": "icdar_text_localization", diff --git a/tests/assets/dota_dataset/train/images/train_001.jpg b/tests/assets/dota_dataset/train/images/train_001.jpg new file mode 100644 index 0000000000000000000000000000000000000000..222682d80bf9740d8eb672035ae34a240f949592 GIT binary patch literal 631 zcmex=^(PF6}rMnOeST|r4lSw=>~TvNxu(8R<c1}I=;VrF4wW9Q)H;sz?% zD!{d!pzFb!U9xX3zTPI5o8roG<0MW4oqZMDikqloVbuf*=gfJ(V&YTRE(2~ znmD<{#3dx9RMpfqG__1j&CD$#!v`*nMGf}^(PF6}rMnOeST|r4lSw=>~TvNxu(8R<c1}I=;VrF4wW9Q)H;sz?% zD!{d!pzFb!U9xX3zTPI5o8roG<0MW4oqZMDikqloVbuf*=gfJ(V&YTRE(2~ znmD<{#3dx9RMpfqG__1j&CD$#!v`*nMGf}^(PF6}rMnOeST|r4lSw=>~TvNxu(8R<c1}I=;VrF4wW9Q)H;sz?% zD!{d!pzFb!U9xX3zTPI5o8roG<0MW4oqZMDikqloVbuf*=gfJ(V&YTRE(2~ znmD<{#3dx9RMpfqG__1j&CD$#!v`*nMGf}