Allow double quotes for ICDAR Word Recognition (#375)

* Allow double quotes in captions * Add test * Update the changelog
openvinotoolkit · Jul 22, 2021 · 551fa11 · 551fa11
1 parent 422de44
commit 551fa11
Show file tree

Hide file tree

Showing 3 changed files with 21 additions and 9 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -26,6 +26,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - Improved Cityscapes export performance (<https://github.com/openvinotoolkit/datumaro/pull/367>)
 - Incorrect format of `*_labelIds.png` in Cityscapes export (<https://github.com/openvinotoolkit/datumaro/issues/325>, <https://github.com/openvinotoolkit/datumaro/issues/342>)
 - Item id in ImageNet format (<https://github.com/openvinotoolkit/datumaro/pull/371>)
+- Fix double quotes for ICDAR Word Recognition (<https://github.com/openvinotoolkit/datumaro/pull/375>)
 
 ### Security
 - TBD

diff --git a/datumaro/plugins/icdar_format/extractor.py b/datumaro/plugins/icdar_format/extractor.py
@@ -3,6 +3,7 @@
 # SPDX-License-Identifier: MIT
 
 from glob import iglob
+import logging as log
 import os.path as osp
 
 import numpy as np
@@ -59,16 +60,13 @@ def _load_recognition_items(self):
                 objects = line.split(', ')
                 if len(objects) == 2:
                     image = objects[0]
-                    objects = objects[1].split('\"')
-                    if 1 < len(objects):
-                        if len(objects) % 2:
-                            captions = [objects[2 * i + 1]
-                                for i in range(int(len(objects) / 2))]
+                    captions = []
+                    for caption in objects[1:]:
+                        if caption[0] != '\"' or caption[-1] != '\"':
+                            log.warning("Line %s: unexpected number "
+                                "of quotes" % line)
                         else:
-                            raise Exception("Line %s: unexpected number "
-                                "of quotes in filename" % line)
-                    else:
-                        captions = objects[0].split()
+                            captions.append(caption.replace('\\', '')[1:-1])
                 else:
                     image = objects[0][:-1]
                     captions = []

diff --git a/tests/test_icdar_format.py b/tests/test_icdar_format.py
@@ -255,3 +255,16 @@ def test_can_save_and_load_image_with_arbitrary_extension(self):
                 self._test_save_and_load(expected,
                     partial(converter.convert, save_images=True),
                     test_dir, importer, require_images=True)
+
+    @mark_requirement(Requirements.DATUM_GENERAL_REQ)
+    def test_can_save_and_load_captions_with_quotes(self):
+        expected_dataset = Dataset.from_iterable([
+            DatasetItem(id='1', image=np.ones((5, 5, 3)),
+                annotations=[Caption('caption\"')]
+            )
+        ])
+
+        with TestDir() as test_dir:
+            self._test_save_and_load(expected_dataset,
+                partial(IcdarWordRecognitionConverter.convert, save_images=True),
+                test_dir, 'icdar_word_recognition')