Skip to content

Commit

Permalink
Allow double quotes for ICDAR Word Recognition (#375)
Browse files Browse the repository at this point in the history
* Allow double quotes in captions

* Add test

* Update the changelog
  • Loading branch information
Kirill Sizov authored Jul 22, 2021
1 parent 422de44 commit 551fa11
Show file tree
Hide file tree
Showing 3 changed files with 21 additions and 9 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- Improved Cityscapes export performance (<https://github.com/openvinotoolkit/datumaro/pull/367>)
- Incorrect format of `*_labelIds.png` in Cityscapes export (<https://github.com/openvinotoolkit/datumaro/issues/325>, <https://github.com/openvinotoolkit/datumaro/issues/342>)
- Item id in ImageNet format (<https://github.com/openvinotoolkit/datumaro/pull/371>)
- Fix double quotes for ICDAR Word Recognition (<https://github.com/openvinotoolkit/datumaro/pull/375>)

### Security
- TBD
Expand Down
16 changes: 7 additions & 9 deletions datumaro/plugins/icdar_format/extractor.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
# SPDX-License-Identifier: MIT

from glob import iglob
import logging as log
import os.path as osp

import numpy as np
Expand Down Expand Up @@ -59,16 +60,13 @@ def _load_recognition_items(self):
objects = line.split(', ')
if len(objects) == 2:
image = objects[0]
objects = objects[1].split('\"')
if 1 < len(objects):
if len(objects) % 2:
captions = [objects[2 * i + 1]
for i in range(int(len(objects) / 2))]
captions = []
for caption in objects[1:]:
if caption[0] != '\"' or caption[-1] != '\"':
log.warning("Line %s: unexpected number "
"of quotes" % line)
else:
raise Exception("Line %s: unexpected number "
"of quotes in filename" % line)
else:
captions = objects[0].split()
captions.append(caption.replace('\\', '')[1:-1])
else:
image = objects[0][:-1]
captions = []
Expand Down
13 changes: 13 additions & 0 deletions tests/test_icdar_format.py
Original file line number Diff line number Diff line change
Expand Up @@ -255,3 +255,16 @@ def test_can_save_and_load_image_with_arbitrary_extension(self):
self._test_save_and_load(expected,
partial(converter.convert, save_images=True),
test_dir, importer, require_images=True)

@mark_requirement(Requirements.DATUM_GENERAL_REQ)
def test_can_save_and_load_captions_with_quotes(self):
expected_dataset = Dataset.from_iterable([
DatasetItem(id='1', image=np.ones((5, 5, 3)),
annotations=[Caption('caption\"')]
)
])

with TestDir() as test_dir:
self._test_save_and_load(expected_dataset,
partial(IcdarWordRecognitionConverter.convert, save_images=True),
test_dir, 'icdar_word_recognition')

0 comments on commit 551fa11

Please sign in to comment.