Skip to content

Commit

Permalink
Fix imagenet importer (#371)
Browse files Browse the repository at this point in the history
* update tests

* add label name and label dir in front of item id

* Update changelog
  • Loading branch information
Kirill Sizov authored Jul 22, 2021
1 parent 3a6af62 commit 422de44
Show file tree
Hide file tree
Showing 4 changed files with 57 additions and 35 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- Unsafe unpickling in CIFAR import (<https://github.com/openvinotoolkit/datumaro/pull/362>)
- Improved Cityscapes export performance (<https://github.com/openvinotoolkit/datumaro/pull/367>)
- Incorrect format of `*_labelIds.png` in Cityscapes export (<https://github.com/openvinotoolkit/datumaro/issues/325>, <https://github.com/openvinotoolkit/datumaro/issues/342>)
- Item id in ImageNet format (<https://github.com/openvinotoolkit/datumaro/pull/371>)

### Security
- TBD
Expand Down
27 changes: 16 additions & 11 deletions datumaro/plugins/imagenet_format.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,14 +39,13 @@ def _load_items(self, path):
for image_path in find_images(path, recursive=True, max_depth=1):
label = osp.basename(osp.dirname(image_path))
image_name = osp.splitext(osp.basename(image_path))[0]
if image_name.startswith(label + '_'):
image_name = image_name[len(label) + 1:]

item = items.get(image_name)
item_id = osp.join(label, image_name)
item = items.get(item_id)
if item is None:
item = DatasetItem(id=image_name, subset=self._subset,
item = DatasetItem(id=item_id, subset=self._subset,
image=image_path)
items[image_name] = item
items[item_id] = item
annotations = item.annotations

if label != ImagenetPath.IMAGE_DIR_NO_LABEL:
Expand All @@ -68,6 +67,13 @@ class ImagenetConverter(Converter):
DEFAULT_IMAGE_EXT = '.jpg'

def apply(self):

def _get_dir_name(id_parts, label_name):
if 1 < len(id_parts) and id_parts[0] == label_name:
return ''
else:
return label_name

if 1 < len(self._extractor.subsets()):
log.warning("ImageNet format only supports exporting a single "
"subset, subset information will not be used.")
Expand All @@ -76,16 +82,15 @@ def apply(self):
extractor = self._extractor
labels = {}
for item in self._extractor:
id_parts = item.id.split('/')
labels = set(p.label for p in item.annotations
if p.type == AnnotationType.label)

for label in labels:
label_name = extractor.categories()[AnnotationType.label][label].name
self._save_image(item, osp.join(subset_dir, label_name,
'%s_%s' % (label_name, self._make_image_filename(item))))
self._save_image(item, subdir=osp.join(subset_dir,
_get_dir_name(id_parts, label_name)))

if not labels:
self._save_image(item, osp.join(subset_dir,
ImagenetPath.IMAGE_DIR_NO_LABEL,
ImagenetPath.IMAGE_DIR_NO_LABEL + '_' + \
self._make_image_filename(item)))
self._save_image(item, subdir=osp.join(subset_dir,
_get_dir_name(id_parts, ImagenetPath.IMAGE_DIR_NO_LABEL)))
17 changes: 9 additions & 8 deletions tests/cli/test_voc_format.py
Original file line number Diff line number Diff line change
Expand Up @@ -213,15 +213,16 @@ def test_convert_from_voc_format(self):
3. Verify that resulting dataset is equal to the expected dataset.
"""

labels = sorted([l.name for l in VOC.VocLabel if l.value % 2 == 1])

expected_dataset = Dataset.from_iterable([
DatasetItem(id='2007_000001', subset='default',
image=np.ones((10, 20, 3)),
annotations=[Label(i) for i in range(11)]
),
DatasetItem(id='2007_000002', subset='default',
image=np.ones((10, 20, 3))
)
], categories=sorted([l.name for l in VOC.VocLabel if l.value % 2 == 1]))
DatasetItem(id='/'.join([label, '2007_000001']),
subset='default', annotations=[Label(i)])
for i, label in enumerate(labels)
] + [DatasetItem(id='no_label/2007_000002', subset='default',
image=np.ones((10, 20, 3)))
], categories=labels
)

voc_dir = osp.join(DUMMY_DATASETS_DIR, 'voc_dataset1')
with TestDir() as test_dir:
Expand Down
47 changes: 31 additions & 16 deletions tests/test_imagenet_format.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,11 +18,11 @@ class ImagenetFormatTest(TestCase):
@mark_requirement(Requirements.DATUM_GENERAL_REQ)
def test_can_save_and_load(self):
source_dataset = Dataset.from_iterable([
DatasetItem(id='1',
DatasetItem(id='label_0/1',
image=np.ones((8, 8, 3)),
annotations=[Label(0)]
),
DatasetItem(id='2',
DatasetItem(id='label_1/2',
image=np.ones((10, 10, 3)),
annotations=[Label(1)]
),
Expand All @@ -44,35 +44,46 @@ def test_can_save_and_load_with_multiple_labels(self):
source_dataset = Dataset.from_iterable([
DatasetItem(id='1',
image=np.ones((8, 8, 3)),
annotations=[Label(0), Label(1), Label(2)]
annotations=[Label(0), Label(1)]
),
DatasetItem(id='2',
image=np.ones((8, 8, 3))
),
], categories={
AnnotationType.label: LabelCategories.from_iterable(
'label_' + str(label) for label in range(3)),
'label_' + str(label) for label in range(2)),
})

excepted_dataset = Dataset.from_iterable([
DatasetItem(id='label_0/1',
image=np.ones((8, 8, 3)),
annotations=[Label(0)]
),
DatasetItem(id='label_1/1',
image=np.ones((8, 8, 3)),
annotations=[Label(1)]
),
DatasetItem(id='no_label/2',
image=np.ones((8, 8, 3))
),
], categories=['label_0', 'label_1'])

with TestDir() as test_dir:
ImagenetConverter.convert(source_dataset, test_dir, save_images=True)

parsed_dataset = Dataset.import_from(test_dir, 'imagenet')

compare_datasets(self, source_dataset, parsed_dataset,
compare_datasets(self, excepted_dataset, parsed_dataset,
require_images=True)

@mark_requirement(Requirements.DATUM_GENERAL_REQ)
def test_can_save_dataset_with_cyrillic_and_spaces_in_filename(self):
source_dataset = Dataset.from_iterable([
DatasetItem(id="кириллица с пробелом",
DatasetItem(id="label_0/кириллица с пробелом",
image=np.ones((8, 8, 3)),
annotations=[Label(0), Label(1)]
annotations=[Label(0)]
),
], categories={
AnnotationType.label: LabelCategories.from_iterable(
'label_' + str(label) for label in range(2)),
})
], categories=['label_0'])

with TestDir() as test_dir:
ImagenetConverter.convert(source_dataset, test_dir, save_images=True)
Expand All @@ -85,9 +96,9 @@ def test_can_save_dataset_with_cyrillic_and_spaces_in_filename(self):
@mark_requirement(Requirements.DATUM_GENERAL_REQ)
def test_can_save_and_load_image_with_arbitrary_extension(self):
dataset = Dataset.from_iterable([
DatasetItem(id='a', image=Image(path='a.JPEG',
DatasetItem(id='no_label/a', image=Image(path='a.JPEG',
data=np.zeros((4, 3, 3)))),
DatasetItem(id='b', image=Image(path='b.bmp',
DatasetItem(id='no_label/b', image=Image(path='b.bmp',
data=np.zeros((3, 4, 3)))),
], categories=[])

Expand All @@ -105,14 +116,18 @@ class ImagenetImporterTest(TestCase):
@mark_requirement(Requirements.DATUM_GENERAL_REQ)
def test_can_import(self):
expected_dataset = Dataset.from_iterable([
DatasetItem(id='1',
DatasetItem(id='label_0/label_0_1',
image=np.ones((8, 8, 3)),
annotations=[Label(0), Label(1)]
annotations=[Label(0)]
),
DatasetItem(id='2',
DatasetItem(id='label_0/label_0_2',
image=np.ones((10, 10, 3)),
annotations=[Label(0)]
),
DatasetItem(id='label_1/label_1_1',
image=np.ones((8, 8, 3)),
annotations=[Label(1)]
)
], categories={
AnnotationType.label: LabelCategories.from_iterable(
'label_' + str(label) for label in range(2)),
Expand Down

0 comments on commit 422de44

Please sign in to comment.