Skip to content

Commit

Permalink
Update YOLO format with image info
Browse files Browse the repository at this point in the history
  • Loading branch information
zhiltsov-max committed Feb 19, 2020
1 parent 8109ed3 commit 482433c
Show file tree
Hide file tree
Showing 5 changed files with 77 additions and 23 deletions.
21 changes: 12 additions & 9 deletions datumaro/datumaro/plugins/yolo_format/converter.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,21 +75,24 @@ def __call__(self, extractor, save_dir):
image_paths = OrderedDict()

for item in subset:
image_name = '%s.jpg' % item.id
image_paths[item.id] = osp.join('data',
osp.basename(subset_dir), image_name)
if not item.has_image:
raise Exception("Failed to export item '%s': "
"item has no image info" % item.id)
height, width = item.image.size

image_name = item.image.filename
item_name = osp.splitext(item.image.filename)[0]
if self._save_images:
if item.has_image and item.image.has_data:
if not item_name:
item_name = item.id
image_name = item_name + '.jpg'
save_image(osp.join(subset_dir, image_name),
item.image.data)
else:
log.warning("Item '%s' has no image" % item.id)

if not item.has_image:
raise Exception("Failed to export item '%s': "
"item has no image info" % item.id)
height, width = item.image.size
image_paths[item.id] = osp.join('data',
osp.basename(subset_dir), image_name)

yolo_annotation = ''
for bbox in item.annotations:
Expand All @@ -102,7 +105,7 @@ def __call__(self, extractor, save_dir):
yolo_bb = ' '.join('%.6f' % p for p in yolo_bb)
yolo_annotation += '%s %s\n' % (bbox.label, yolo_bb)

annotation_path = osp.join(subset_dir, '%s.txt' % item.id)
annotation_path = osp.join(subset_dir, '%s.txt' % item_name)
with open(annotation_path, 'w') as f:
f.write(yolo_annotation)

Expand Down
33 changes: 25 additions & 8 deletions datumaro/datumaro/plugins/yolo_format/extractor.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
from datumaro.components.extractor import (SourceExtractor, Extractor,
DatasetItem, AnnotationType, Bbox, LabelCategories
)
from datumaro.util.image import lazy_image
from datumaro.util.image import Image

from .format import YoloPath

Expand All @@ -33,16 +33,31 @@ def __len__(self):
def categories(self):
return self._parent.categories()

def __init__(self, config_path):
def __init__(self, config_path, image_info=None):
super().__init__()

if not osp.isfile(config_path):
raise Exception("Can't read dataset descriptor file '%s'" % \
raise Exception("Can't read dataset descriptor file '%s'" %
config_path)

rootpath = osp.dirname(config_path)
self._path = rootpath

assert image_info is None or isinstance(image_info, (str, dict))
if image_info is None:
image_info = osp.join(rootpath, YoloPath.IMAGE_META_FILE)
if not osp.isfile(image_info):
image_info = {}
if isinstance(image_info, str):
if not osp.isfile(image_info):
raise Exception("Can't read image meta file '%s'" % image_info)
with open(image_info) as f:
image_info = {}
for line in f:
image_name, h, w = line.strip().split()
image_info[image_name] = (int(h), int(w))
self._image_info = image_info

with open(config_path, 'r') as f:
config_lines = f.readlines()

Expand Down Expand Up @@ -77,10 +92,10 @@ def __init__(self, config_path):
subset.items = OrderedDict(
(osp.splitext(osp.basename(p))[0], p.strip()) for p in f)

for image_path in subset.items.values():
for item_id, image_path in subset.items.items():
image_path = self._make_local_path(image_path)
if not osp.isfile(image_path):
raise Exception("Can't find image '%s'" % image_path)
if not osp.isfile(image_path) and item_id not in image_info:
raise Exception("Can't find image '%s'" % item_id)

subsets[subset_name] = subset

Expand All @@ -103,8 +118,10 @@ def _get(self, item_id, subset_name):

if isinstance(item, str):
image_path = self._make_local_path(item)
image = lazy_image(image_path)
h, w, _ = image().shape
image_size = self._image_info.get(item_id)
image = Image(path=image_path, size=image_size)
h, w = image.size

anno_path = osp.splitext(image_path)[0] + '.txt'
annotations = self._parse_annotations(anno_path, w, h)

Expand Down
4 changes: 3 additions & 1 deletion datumaro/datumaro/plugins/yolo_format/format.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,4 +6,6 @@

class YoloPath:
DEFAULT_SUBSET_NAME = 'train'
SUBSET_NAMES = ['train', 'valid']
SUBSET_NAMES = ['train', 'valid']

IMAGE_META_FILE = 'images.meta'
10 changes: 6 additions & 4 deletions datumaro/datumaro/plugins/yolo_format/importer.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,18 +15,20 @@ def __call__(self, path, **extra_params):
from datumaro.components.project import Project # cyclic import
project = Project()

if not osp.exists(path):
raise Exception("Failed to find 'yolo' dataset at '%s'" % path)

if path.endswith('.data') and osp.isfile(path):
config_paths = [path]
else:
config_paths = glob(osp.join(path, '*.data'))

if not osp.exists(path) or not config_paths:
raise Exception("Failed to find 'yolo' dataset at '%s'" % path)

for config_path in config_paths:
log.info("Found a dataset at '%s'" % config_path)

source_name = osp.splitext(osp.basename(config_path))[0]
source_name = '%s_%s' % (
osp.basename(osp.dirname(config_path)),
osp.splitext(osp.basename(config_path))[0])
project.add_source(source_name, {
'url': config_path,
'format': 'yolo',
Expand Down
32 changes: 31 additions & 1 deletion datumaro/tests/test_yolo_format.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ class TestExtractor(Extractor):
def __iter__(self):
return iter([
DatasetItem(id=1, subset='train',
image=Image(size=(10, 15)),
image=Image(path='1.jpg', size=(10, 15)),
annotations=[
Bbox(0, 2, 4, 2, label=2),
Bbox(3, 3, 2, 3, label=4),
Expand All @@ -84,3 +84,33 @@ def categories(self):
parsed_dataset = YoloImporter()(test_dir).make_dataset()

compare_datasets(self, source_dataset, parsed_dataset)

def test_can_load_dataset_with_exact_image_info(self):
class TestExtractor(Extractor):
def __iter__(self):
return iter([
DatasetItem(id=1, subset='train',
image=Image(path='1.jpg', size=(10, 15)),
annotations=[
Bbox(0, 2, 4, 2, label=2),
Bbox(3, 3, 2, 3, label=4),
]),
])

def categories(self):
label_categories = LabelCategories()
for i in range(10):
label_categories.add('label_' + str(i))
return {
AnnotationType.label: label_categories,
}

with TestDir() as test_dir:
source_dataset = TestExtractor()

YoloConverter()(source_dataset, test_dir)

parsed_dataset = YoloImporter()(test_dir,
image_info={'1': (10, 15)}).make_dataset()

compare_datasets(self, source_dataset, parsed_dataset)

0 comments on commit 482433c

Please sign in to comment.