Skip to content

Commit

Permalink
[Datumaro] Fix voc colormap (#945)
Browse files Browse the repository at this point in the history
* Add polygon merging option to coco converter

* Add test, refactor coco, add support for cli args

* Drop colormap application in datumaro format

* Add cli support in voc converter

* Add cli support in yolo converter

* Add converter cli options in project cli

* Add image data type conversion in image saving

* Add image data type conversion in image saving

* Update mask support in voc

* Replace null with quotes in coco export

* Improve cli

* Enable Datumaro intellisense in vs cde

* Adjust fields in voc detection export
  • Loading branch information
zhiltsov-max authored and nmanovic committed Dec 13, 2019
1 parent 944d853 commit 3225453
Show file tree
Hide file tree
Showing 12 changed files with 121 additions and 54 deletions.
1 change: 1 addition & 0 deletions .vscode/python.env
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
PYTHONPATH="datumaro/:$PYTHONPATH"
2 changes: 1 addition & 1 deletion .vscode/settings.json
Original file line number Diff line number Diff line change
Expand Up @@ -24,5 +24,5 @@
"changeProcessCWD": true
}
],
"python.linting.pylintEnabled": true
"python.envFile": "${workspaceFolder}/.vscode/python.env"
}
6 changes: 5 additions & 1 deletion datumaro/datumaro/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,11 @@ def main(args=None):
set_up_logger(general_args)

command = get_command(command_name, general_args)
return command(command_args)
try:
return command(command_args)
except Exception as e:
log.error(e)
raise


if __name__ == '__main__':
Expand Down
22 changes: 19 additions & 3 deletions datumaro/datumaro/cli/project/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,16 @@ def build_create_parser(parser):
def create_command(args):
project_dir = osp.abspath(args.dst_dir)
project_path = make_project_path(project_dir)

if not args.overwrite and osp.isdir(project_dir) and os.listdir(project_dir):
log.error("Directory '%s' already exists "
"(pass --overwrite to force creation)" % project_dir)
return 1
os.makedirs(project_dir, exist_ok=args.overwrite)

if not args.overwrite and osp.isfile(project_path):
log.error("Project file '%s' already exists" % (project_path))
log.error("Project file '%s' already exists "
"(pass --overwrite to force creation)" % project_path)
return 1

project_name = args.name
Expand Down Expand Up @@ -59,16 +67,24 @@ def build_import_parser(parser):
parser.add_argument('--overwrite', action='store_true',
help="Overwrite existing files in the save directory")
parser.add_argument('--copy', action='store_true',
help="Make a deep copy instead of saving source links")
help="Copy the dataset instead of saving source links")
# parser.add_argument('extra_args', nargs=argparse.REMAINDER,
# help="Additional arguments for importer (pass '-- -h' for help)")
return parser

def import_command(args):
project_dir = osp.abspath(args.dst_dir)
project_path = make_project_path(project_dir)

if not args.overwrite and osp.isdir(project_dir) and os.listdir(project_dir):
log.error("Directory '%s' already exists "
"(pass --overwrite to force creation)" % project_dir)
return 1
os.makedirs(project_dir, exist_ok=args.overwrite)

if not args.overwrite and osp.isfile(project_path):
log.error("Project file '%s' already exists" % (project_path))
log.error("Project file '%s' already exists "
"(pass --overwrite to force creation)" % project_path)
return 1

project_name = args.name
Expand Down
19 changes: 15 additions & 4 deletions datumaro/datumaro/cli/source/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ def create_command(args):
name = args.name

if project.env.git.has_submodule(name):
log.fatal("Source '%s' already exists" % (name))
log.fatal("Submodule '%s' already exists" % (name))
return 1

try:
Expand Down Expand Up @@ -171,12 +171,21 @@ def remove_command(args):
def build_export_parser(parser):
parser.add_argument('-n', '--name', required=True,
help="Source dataset to be extracted")
parser.add_argument('-e', '--filter', default=None,
help="Filter expression for dataset items. Examples: "
"extract images with width < height: "
"'/item[image/width < image/height]'; "
"extract images with large-area bboxes: "
"'/item[annotation/type=\"bbox\" and annotation/area>2000]'"
)
parser.add_argument('-d', '--dest', dest='dst_dir', required=True,
help="Directory to save output")
parser.add_argument('-f', '--output-format', required=True,
help="Output format (default: %(default)s)")
help="Output format")
parser.add_argument('-p', '--project', dest='project_dir', default='.',
help="Directory of the project to operate on (default: current dir)")
parser.add_argument('extra_args', nargs=argparse.REMAINDER, default=None,
help="Additional arguments for converter (pass '-- -h' for help)")
return parser

def export_command(args):
Expand All @@ -187,8 +196,10 @@ def export_command(args):

source_project = project.make_source_project(args.name)
source_project.make_dataset().export(
save_dir=args.dst_dir,
output_format=args.output_format)
save_dir=dst_dir,
output_format=args.output_format,
filter_expr=args.filter,
cmdline_args=args.extra_args)
log.info("Source '%s' exported to '%s' as '%s'" % \
(args.name, dst_dir, args.output_format))

Expand Down
14 changes: 7 additions & 7 deletions datumaro/datumaro/components/converters/ms_coco.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ def save_image_info(self, item, filename):
'id': _cast(item.id, int, 0),
'width': int(w),
'height': int(h),
'file_name': filename,
'file_name': _cast(filename, str, ''),
'license': 0,
'flickr_url': '',
'coco_url': '',
Expand Down Expand Up @@ -117,8 +117,8 @@ def save_categories(self, dataset):
for idx, cat in enumerate(label_categories.items):
self.categories.append({
'id': 1 + idx,
'name': cat.name,
'supercategory': cat.parent,
'name': _cast(cat.name, str, ''),
'supercategory': _cast(cat.parent, str, ''),
})

def save_annotations(self, item):
Expand Down Expand Up @@ -282,8 +282,8 @@ def save_categories(self, dataset):

cat = {
'id': 1 + idx,
'name': label_cat.name,
'supercategory': label_cat.parent,
'name': _cast(label_cat.name, str, ''),
'supercategory': _cast(label_cat.parent, str, ''),
'keypoints': [str(l) for l in kp_cat.labels],
'skeleton': [int(i) for i in kp_cat.adjacent],
}
Expand Down Expand Up @@ -339,8 +339,8 @@ def save_categories(self, dataset):
for idx, cat in enumerate(label_categories.items):
self.categories.append({
'id': 1 + idx,
'name': cat.name,
'supercategory': cat.parent,
'name': _cast(cat.name, str, ''),
'supercategory': _cast(cat.parent, str, ''),
})

def save_annotations(self, item):
Expand Down
21 changes: 17 additions & 4 deletions datumaro/datumaro/components/converters/voc.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,10 @@
# SPDX-License-Identifier: MIT

from collections import OrderedDict, defaultdict
import logging as log
from lxml import etree as ET
import os
import os.path as osp
from lxml import etree as ET

from datumaro.components.converter import Converter
from datumaro.components.extractor import DEFAULT_SUBSET_NAME, AnnotationType
Expand Down Expand Up @@ -143,6 +144,11 @@ def save_subsets(self):
ET.SubElement(root_elem, 'filename').text = \
item_id + VocPath.IMAGE_EXT

source_elem = ET.SubElement(root_elem, 'source')
ET.SubElement(source_elem, 'database').text = 'Unknown'
ET.SubElement(source_elem, 'annotation').text = 'Unknown'
ET.SubElement(source_elem, 'image').text = 'Unknown'

if item.has_image:
h, w, c = item.image.shape
size_elem = ET.SubElement(root_elem, 'size')
Expand All @@ -151,8 +157,8 @@ def save_subsets(self):
ET.SubElement(size_elem, 'depth').text = str(c)

item_segmented = 0 < len(masks)
if item_segmented:
ET.SubElement(root_elem, 'segmented').text = '1'
ET.SubElement(root_elem, 'segmented').text = \
str(int(item_segmented))

objects_with_parts = []
objects_with_actions = defaultdict(dict)
Expand Down Expand Up @@ -296,14 +302,21 @@ def save_class_lists(self, subset_name, class_lists):
if len(class_lists) == 0:
return

label_cat = self._extractor.categories().get(AnnotationType.label, None)
if not label_cat:
log.warn("Unable to save classification task lists "
"as source does not provide class labels. Skipped.")
return

for label in VocLabel:
ann_file = osp.join(self._cls_subsets_dir,
'%s_%s.txt' % (label.name, subset_name))
with open(ann_file, 'w') as f:
for item, item_labels in class_lists.items():
if not item_labels:
continue
presented = label.value in item_labels
item_labels = [label_cat.items[l].name for l in item_labels]
presented = label.name in item_labels
f.write('%s % d\n' % \
(item, 1 if presented else -1))

Expand Down
13 changes: 9 additions & 4 deletions datumaro/datumaro/components/extractors/voc.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,10 @@
AnnotationType, LabelObject, MaskObject, BboxObject,
LabelCategories, MaskCategories
)
from datumaro.components.formats.voc import VocLabel, VocAction, \
VocBodyPart, VocTask, VocPath, VocColormap, VocInstColormap
from datumaro.components.formats.voc import (VocLabel, VocAction,
VocBodyPart, VocTask, VocPath, VocColormap, VocInstColormap,
VocIgnoredLabel
)
from datumaro.util import dir_items
from datumaro.util.image import lazy_image
from datumaro.util.mask_tools import lazy_mask, invert_colormap
Expand All @@ -32,13 +34,16 @@ def _make_voc_categories():
categories[AnnotationType.label] = label_categories

def label_id(class_index):
if class_index in [0, VocIgnoredLabel]:
return class_index

class_label = VocLabel(class_index).name
label_id, _ = label_categories.find(class_label)
return label_id
return label_id + 1
colormap = { label_id(idx): tuple(color) \
for idx, color in VocColormap.items() }
mask_categories = MaskCategories(colormap)
mask_categories.inverse_colormap # init inverse colormap
mask_categories.inverse_colormap # force init
categories[AnnotationType.mask] = mask_categories

return categories
Expand Down
52 changes: 28 additions & 24 deletions datumaro/datumaro/components/formats/voc.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
#
# SPDX-License-Identifier: MIT

from collections import OrderedDict
from enum import Enum
import numpy as np

Expand All @@ -16,28 +17,30 @@
])

VocLabel = Enum('VocLabel', [
('aeroplane', 0),
('bicycle', 1),
('bird', 2),
('boat', 3),
('bottle', 4),
('bus', 5),
('car', 6),
('cat', 7),
('chair', 8),
('cow', 9),
('diningtable', 10),
('dog', 11),
('horse', 12),
('motorbike', 13),
('person', 14),
('pottedplant', 15),
('sheep', 16),
('sofa', 17),
('train', 18),
('tvmonitor', 19),
('aeroplane', 1),
('bicycle', 2),
('bird', 3),
('boat', 4),
('bottle', 5),
('bus', 6),
('car', 7),
('cat', 8),
('chair', 9),
('cow', 10),
('diningtable', 11),
('dog', 12),
('horse', 13),
('motorbike', 14),
('person', 15),
('pottedplant', 16),
('sheep', 17),
('sofa', 18),
('train', 19),
('tvmonitor', 20),
])

VocIgnoredLabel = 255

VocPose = Enum('VocPose', [
'Unspecified',
'Left',
Expand Down Expand Up @@ -78,11 +81,12 @@ def get_bit(number, index):
colormap[:, c] |= get_bit(indices, c) << j
indices >>= 3

return {
id: tuple(color) for id, color in enumerate(colormap)
}
return OrderedDict(
(id, tuple(color)) for id, color in enumerate(colormap)
)

VocColormap = generate_colormap(len(VocLabel))
VocColormap = {id: color for id, color in generate_colormap(256).items()
if id in [l.value for l in VocLabel] + [0, VocIgnoredLabel]}
VocInstColormap = generate_colormap(256)

class VocPath:
Expand Down
10 changes: 8 additions & 2 deletions datumaro/datumaro/components/project.py
Original file line number Diff line number Diff line change
Expand Up @@ -652,7 +652,10 @@ def remove_source(self, name):
self.env.sources.unregister(name)

def get_source(self, name):
return self.config.sources[name]
try:
return self.config.sources[name]
except KeyError:
raise KeyError("Source '%s' is not found" % name)

def get_subsets(self):
return self.config.subsets
Expand All @@ -669,7 +672,10 @@ def add_model(self, name, value=Model()):
self.env.register_model(name, value)

def get_model(self, name):
return self.env.models.get(name)
try:
return self.env.models.get(name)
except KeyError:
raise KeyError("Model '%s' is not found" % name)

def remove_model(self, name):
self.env.unregister_model(name)
Expand Down
7 changes: 6 additions & 1 deletion datumaro/datumaro/util/image.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,10 @@ def load_image(path):
def save_image(path, image, params=None):
if _IMAGE_BACKEND == _IMAGE_BACKENDS.cv2:
import cv2
ext = path[-4:]
if ext.upper() == '.JPG':
params = [ int(cv2.IMWRITE_JPEG_QUALITY), 75 ]

image = image.astype(np.uint8)
cv2.imwrite(path, image, params=params)
elif _IMAGE_BACKEND == _IMAGE_BACKENDS.PIL:
Expand All @@ -73,6 +77,7 @@ def encode_image(image, ext, params=None):
if ext.upper() == '.JPG':
params = [ int(cv2.IMWRITE_JPEG_QUALITY), 75 ]

image = image.astype(np.uint8)
success, result = cv2.imencode(ext, image, params=params)
if not success:
raise Exception("Failed to encode image to '%s' format" % (ext))
Expand Down Expand Up @@ -149,4 +154,4 @@ def _get_cache(self):
cache = _ImageCache.get_instance()
elif cache == False:
return None
return cache
return cache
8 changes: 5 additions & 3 deletions datumaro/tests/test_voc_format.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@

class VocTest(TestCase):
def test_colormap_generator(self):
reference = [
reference = np.array([
[ 0, 0, 0],
[128, 0, 0],
[ 0, 128, 0],
Expand All @@ -55,7 +55,9 @@ def test_colormap_generator(self):
[128, 64, 0],
[ 0, 192, 0],
[128, 192, 0],
]
[ 0, 64, 128],
[224, 224, 192], # ignored
])

self.assertTrue(np.array_equal(reference, list(VOC.VocColormap.values())))

Expand Down Expand Up @@ -192,7 +194,7 @@ def test_can_load_voc_cls(self):
count += 1
ann = find(item.annotations,
lambda x: x.type == AnnotationType.label and \
x.label == label.value)
get_label(extractor, x.label) == label.name)
self.assertFalse(ann is None)
self.assertEqual(count, len(item.annotations))

Expand Down

0 comments on commit 3225453

Please sign in to comment.