Support for Market-1501 dataset format (#2869)

* Add support for Market-1501 dataset format * fix data access * Update Datumaro version * Add transforms * Update Changelog
cvat-ai · Mar 22, 2021 · ce1666f · ce1666f
1 parent d62e176
commit ce1666f
Show file tree

Hide file tree

Showing 7 changed files with 387 additions and 219 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -25,6 +25,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - [VGGFace2](https://github.com/ox-vgg/vgg_face2) format support (<https://github.com/openvinotoolkit/cvat/pull/2865>)
 - [Backup/Restore guide](cvat/apps/documentation/backup_guide.md) (<https://github.com/openvinotoolkit/cvat/pull/2964>)
 - Label deletion from tasks and projects (<https://github.com/openvinotoolkit/cvat/pull/2881>)
+- [Market-1501](https://www.aitribune.com/dataset/2018051063) format support (<https://github.com/openvinotoolkit/cvat/pull/2869>)
 
 ### Changed
 

diff --git a/README.md b/README.md
@@ -64,6 +64,7 @@ For more information about supported formats look at the
 | [CamVid](http://mi.eng.cam.ac.uk/research/projects/VideoRec/CamVid/)          | X      | X      |
 | [WIDER Face](http://shuoyang1213.me/WIDERFACE/)                               | X      | X      |
 | [VGGFace2](https://github.com/ox-vgg/vgg_face2)                               | X      | X      |
+| [Market-1501](https://www.aitribune.com/dataset/2018051063)                   | X      | X      |
 
 ## Deep learning serverless functions for automatic labeling
 

diff --git a/cvat/apps/dataset_manager/formats/README.md b/cvat/apps/dataset_manager/formats/README.md
@@ -22,6 +22,7 @@
   - [CamVid](#camvid)
   - [WIDER Face](#widerface)
   - [VGGFace2](#vggface2)
+  - [Market-1501](#market1501)
 
 ## How to add a new annotation format support<a id="how-to-add"></a>
 
@@ -937,3 +938,39 @@ label1 <class1>
 Uploaded file: a zip archive of the structure above
 
 - supported annotations: Rectangles, Points (landmarks - groups of 5 points)
+
+### [Market-1501](https://www.aitribune.com/dataset/2018051063)<a id="market1501" />
+
+#### Market-1501 Dumper
+
+Downloaded file: a zip archive of the following structure:
+
+```bash
+taskname.zip/
+├── bounding_box_<any_subset_name>/
+│   └── image_name_1.jpg
+└── query
+    ├── image_name_2.jpg
+    └── image_name_3.jpg
+# if we keep only annotation:
+taskname.zip/
+└── images_<any_subset_name>.txt
+# images_<any_subset_name>.txt
+query/image_name_1.jpg
+bounding_box_<any_subset_name>/image_name_2.jpg
+bounding_box_<any_subset_name>/image_name_3.jpg
+# image_name = 0001_c1s1_000015_00.jpg
+0001 - person id
+c1 - camera id (there are totally 6 cameras)
+s1 - sequence
+000015 - frame number in sequence
+00 - means that this bounding box is the first one among the several
+```
+
+- supported annotations: Label `market-1501` with atrributes (`query`, `person_id`, `camera_id`)
+
+#### Market-1501 Loader
+
+Uploaded file: a zip archive of the structure above
+
+- supported annotations: Label `market-1501` with atrributes (`query`, `person_id`, `camera_id`)
diff --git a/cvat/apps/dataset_manager/formats/market1501.py b/cvat/apps/dataset_manager/formats/market1501.py
@@ -0,0 +1,77 @@
+# Copyright (C) 2021 Intel Corporation
+#
+# SPDX-License-Identifier: MIT
+
+import zipfile
+from tempfile import TemporaryDirectory
+
+from datumaro.components.dataset import Dataset
+from datumaro.components.extractor import (AnnotationType, Label,
+    LabelCategories, Transform)
+
+from cvat.apps.dataset_manager.bindings import (CvatTaskDataExtractor,
+    import_dm_annotations)
+from cvat.apps.dataset_manager.util import make_zip_archive
+
+from .registry import dm_env, exporter, importer
+
+class AttrToLabelAttr(Transform):
+    def __init__(self, extractor, label):
+        super().__init__(extractor)
+
+        assert isinstance(label, str)
+        self._categories = {}
+        label_cat = self._extractor.categories().get(AnnotationType.label)
+        if not label_cat:
+            label_cat = LabelCategories()
+        self._label = label_cat.add(label)
+        self._categories[AnnotationType.label] = label_cat
+
+    def categories(self):
+        return self._categories
+
+    def transform_item(self, item):
+        annotations = item.annotations
+        if item.attributes:
+            annotations.append(Label(self._label, attributes=item.attributes))
+            item.attributes = {}
+        return item.wrap(annotations=annotations)
+
+class LabelAttrToAttr(Transform):
+    def __init__(self, extractor, label):
+        super().__init__(extractor)
+
+        assert isinstance(label, str)
+        label_cat = self._extractor.categories().get(AnnotationType.label)
+        self._label = label_cat.find(label)[0]
+
+    def transform_item(self, item):
+        annotations = item.annotations
+        attributes = item.attributes
+        if self._label != None:
+            labels = [ann for ann in annotations
+                if ann.type == AnnotationType.label \
+                    and ann.label == self._label]
+            if len(labels) == 1:
+                attributes.update(labels[0].attributes)
+                annotations.remove(labels[0])
+        return item.wrap(annotations=annotations, attributes=attributes)
+
+
+@exporter(name='Market-1501', ext='ZIP', version='1.0')
+def _export(dst_file, task_data, save_images=False):
+    dataset = Dataset.from_extractors(CvatTaskDataExtractor(
+        task_data, include_images=save_images), env=dm_env)
+    with TemporaryDirectory() as temp_dir:
+        dataset.transform(LabelAttrToAttr, 'market-1501')
+        dataset.export(temp_dir, 'market1501', save_images=save_images)
+        make_zip_archive(temp_dir, dst_file)
+
+@importer(name='Market-1501', ext='ZIP', version='1.0')
+def _import(src_file, task_data):
+    with TemporaryDirectory() as tmp_dir:
+        zipfile.ZipFile(src_file).extractall(tmp_dir)
+
+        dataset = Dataset.import_from(tmp_dir, 'market1501', env=dm_env)
+        dataset.transform(AttrToLabelAttr, 'market-1501')
+        import_dm_annotations(dataset, task_data)
diff --git a/cvat/apps/dataset_manager/formats/registry.py b/cvat/apps/dataset_manager/formats/registry.py
@@ -97,3 +97,4 @@ def make_exporter(name):
 import cvat.apps.dataset_manager.formats.camvid
 import cvat.apps.dataset_manager.formats.widerface
 import cvat.apps.dataset_manager.formats.vggface2
+import cvat.apps.dataset_manager.formats.market1501
diff --git a/cvat/apps/dataset_manager/tests/test_formats.py b/cvat/apps/dataset_manager/tests/test_formats.py
@@ -284,6 +284,7 @@ def test_export_formats_query(self):
             'CamVid 1.0',
             'WiderFace 1.0',
             'VGGFace2 1.0',
+            'Market-1501 1.0',
         })
 
     def test_import_formats_query(self):
@@ -304,6 +305,7 @@ def test_import_formats_query(self):
             'CamVid 1.0',
             'WiderFace 1.0',
             'VGGFace2 1.0',
+            'Market-1501 1.0',
         })
 
     def test_exports(self):
@@ -346,6 +348,7 @@ def test_empty_images_are_exported(self):
             ('CamVid 1.0', 'camvid'),
             ('WiderFace 1.0', 'wider_face'),
             ('VGGFace2 1.0', 'vgg_face2'),
+            ('Market-1501 1.0', 'market1501'),
         ]:
             with self.subTest(format=format_name):
                 if not dm.formats.registry.EXPORT_FORMATS[format_name].ENABLED: