cvat-ai · nmanovic · Mar 26, 2021 · Feb 25, 2021 · Mar 1, 2021 · Mar 2, 2021
diff --git a/README.md b/README.md
@@ -62,6 +62,7 @@ For more information about supported formats look at the
 | [LabelMe 3.0](http://labelme.csail.mit.edu/Release3.0)                        | X      | X      |
 | [ImageNet](http://www.image-net.org)                                          | X      | X      |
 | [CamVid](http://mi.eng.cam.ac.uk/research/projects/VideoRec/CamVid/)          | X      | X      |
+| [ICDAR13/15](https://rrc.cvc.uab.es/?ch=2)                                    | X      | X      |
 
 ## Deep learning serverless functions for automatic labeling
 

@@ -20,6 +20,7 @@
   - [TF detection API](#tfrecord)
   - [ImageNet](#imagenet)
   - [CamVid](#camvid)
+  - [ICDAR13/15](#icdar)
 
 ## How to add a new annotation format support<a id="how-to-add"></a>
 
@@ -814,17 +815,17 @@ Downloaded file: a zip archive of the following structure:
 ```bash
 # if we save images:
 taskname.zip/
-└── label1/
-    ├── label1_image1.jpg
-    └── label1_image2.jpg
+├── label1/
+|   ├── label1_image1.jpg
+|   └── label1_image2.jpg
 └── label2/
     ├── label2_image1.jpg
     ├── label2_image3.jpg
     └── label2_image4.jpg
 
 # if we keep only annotation:
 taskname.zip/
-└── <any_subset_name>.txt
+├── <any_subset_name>.txt
 └── synsets.txt
 
 ```
@@ -846,12 +847,12 @@ Downloaded file: a zip archive of the following structure:
 ```bash
 taskname.zip/
 ├── labelmap.txt # optional, required for non-CamVid labels
-└── <any_subset_name>/
-    ├── image1.png
-    └── image2.png
-└── <any_subset_name>annot/
-    ├── image1.png
-    └── image2.png
+├── <any_subset_name>/
+|   ├── image1.png
+|   └── image2.png
+├── <any_subset_name>annot/
+|   ├── image1.png
+|   └── image2.png
 └── <any_subset_name>.txt
 
 # labelmap.txt
@@ -874,3 +875,68 @@ has own color which corresponds to a label.
 Uploaded file: a zip archive of the structure above
 
 - supported annotations: Polygons
+
+### [ICDAR13/15](https://rrc.cvc.uab.es/?ch=2)<a id="icdar" />
+
+#### ICDAR13/15 Dumper
+
+Downloaded file: a zip archive of the following structure:
+
+```bash
+# word recognition task
+taskname.zip/
+└── word_recognition/
+    └── <any_subset_name>/
+        ├── images
+        |   ├── word1.png
+        |   └── word2.png
+        └── gt.txt
+
+# text localization task
+taskname.zip/
+└── text_localization/
+    └── <any_subset_name>/
+        ├── images
+        |   ├── img_1.png
+        |   └── img_2.png
+        ├── gt_img_1.txt
+        └── gt_img_1.txt
+
+#text segmentation task
+taskname.zip/
+└── text_localization/
+    └── <any_subset_name>/
+        ├── images
+        |   ├── 1.png
+        |   └── 2.png
+        ├── 1_GT.bmp
+        ├── 1_GT.txt
+        ├── 2_GT.bmp
+        └── 2_GT.txt
+```
+
+**Word recognition task**:
+- supported annotations: Label `icdar` with attribute `caption`
+
+**Text localization task**:
+- supported annotations: Rectangles and Polygons with label `icdar`
+  and attribute `text`
+
+**Text segmentation task**:
+- supported annotations: Rectangles and Polygons with label `icdar`
+  and attributes `index`, `text`, `color`, `center`
+
+#### ICDAR13/15 Loader
+
+Uploaded file: a zip archive of the structure above
+
+**Word recognition task**:
+- supported annotations: Label `icdar` with attribute `caption`
+
+**Text localization task**:
+- supported annotations: Rectangles and Polygons with label `icdar`
+  and attribute `text`
+
+**Text segmentation task**:
+- supported annotations: Rectangles and Polygons with label `icdar`
+  and attributes `index`, `text`, `color`, `center`
@@ -0,0 +1,69 @@
+# Copyright (C) 2021 Intel Corporation
+#
+# SPDX-License-Identifier: MIT
+
+import os.path as osp
+import zipfile
+from tempfile import TemporaryDirectory
+
+from datumaro.components.dataset import Dataset
+from datumaro.components.extractor import (AnnotationType, Caption, Label,
+    LabelCategories)
+
+from cvat.apps.dataset_manager.bindings import CvatTaskDataExtractor, \
+    import_dm_annotations
+from cvat.apps.dataset_manager.util import make_zip_archive
+
+from .registry import dm_env, exporter, importer
+
+
+@exporter(name='ICDAR', ext='ZIP', version='1.0')
+def _export(dst_file, task_data, save_images=False):
+    dataset = Dataset.from_extractors(CvatTaskDataExtractor(
+        task_data, include_images=save_images), env=dm_env)
+    with TemporaryDirectory() as temp_dir:
+        icdar_format = 'icdar_text_localization'
+        categories = dataset._data._source._categories[AnnotationType.label]
+        for attr in categories.attributes:
+            if attr == 'color' or attr == 'center':
+                icdar_format = 'icdar_text_segmentation'
+            elif attr == 'caption':
+                icdar_format = 'icdar_word_recognition'
+        if icdar_format == 'icdar_word_recognition':
+            for item in dataset._data._source:
+                anns = [p for p in item.annotations
+                    if 'caption' in p.attributes]
+                for ann in anns:
+                    item.annotations.append(Caption(ann.attributes['caption']))
+        elif icdar_format == 'icdar_text_segmentation':
+            dataset.transform('polygons_to_masks')
+            dataset.transform('boxes_to_masks')
+            dataset.transform('merge_instance_segments')
+        dataset.export(temp_dir, icdar_format, save_images=save_images)
+        make_zip_archive(temp_dir, dst_file)
+
+@importer(name='ICDAR', ext='ZIP', version='1.0')
+def _import(src_file, task_data):
+    with TemporaryDirectory() as tmp_dir:
+        zipfile.ZipFile(src_file).extractall(tmp_dir)
+
+        dataset = Dataset.import_from(tmp_dir, 'icdar', env=dm_env)
+        if osp.isdir(osp.join(tmp_dir, 'word_recognition')):
+            for item in dataset._data._source:
+                anns = [p for p in item.annotations
+                    if p.type == AnnotationType.caption]
+                for ann in anns:
+                    item.annotations.append(Label(label=0,
+                        attributes={'caption': ann.caption}))
+                    item.annotations.remove(ann)
+        else:
+            for item in dataset._data._source:
+                anns = [p for p in item.annotations
+                    if p.type in [AnnotationType.bbox, AnnotationType.polygon, AnnotationType.mask]]
+                for ann in anns:
+                    ann.label = 0
+        label_cat = LabelCategories()
+        label_cat.add('icdar')
+        dataset._data._source._categories[AnnotationType.label] = label_cat
+        dataset.transform('masks_to_polygons')
+        import_dm_annotations(dataset, task_data)
@@ -95,3 +95,4 @@ def make_exporter(name):
 import cvat.apps.dataset_manager.formats.yolo
 import cvat.apps.dataset_manager.formats.imagenet
 import cvat.apps.dataset_manager.formats.camvid
+import cvat.apps.dataset_manager.formats.icdar
@@ -271,6 +271,7 @@ def test_export_formats_query(self):
             'YOLO 1.1',
             'ImageNet 1.0',
             'CamVid 1.0',
+            'ICDAR 1.0',
         })
 
     def test_import_formats_query(self):
@@ -289,6 +290,7 @@ def test_import_formats_query(self):
             'YOLO 1.1',
             'ImageNet 1.0',
             'CamVid 1.0',
+            'ICDAR 1.0',
         })
 
     def test_exports(self):
@@ -326,6 +328,7 @@ def test_empty_images_are_exported(self):
             ('YOLO 1.1', 'yolo'),
             ('ImageNet 1.0', 'imagenet_txt'),
             ('CamVid 1.0', 'camvid'),
+            ('ICDAR 1.0', 'icdar'),
         ]:
             with self.subTest(format=format_name):
                 if not dm.formats.registry.EXPORT_FORMATS[format_name].ENABLED: