diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md new file mode 100644 index 0000000000..f9a1afc17f --- /dev/null +++ b/CONTRIBUTING.md @@ -0,0 +1,75 @@ +## Table of Contents + +- [Installation](#installation) +- [Usage](#usage) +- [Testing](#testing) +- [Design](#design-and-code-structure) + +## Installation + +### Prerequisites + +- Python (3.5+) +- OpenVINO (optional) + +``` bash +git clone https://github.com/opencv/cvat +``` + +Optionally, install a virtual environment: + +``` bash +python -m pip install virtualenv +python -m virtualenv venv +. venv/bin/activate +``` + +Then install all dependencies: + +``` bash +while read -r p; do pip install $p; done < requirements.txt +``` + +If you're working inside CVAT environment: +``` bash +. .env/bin/activate +while read -r p; do pip install $p; done < datumaro/requirements.txt +``` + +## Usage + +> The directory containing Datumaro should be in the `PYTHONPATH` +> environment variable or `cvat/datumaro/` should be the current directory. + +``` bash +datum --help +python -m datumaro --help +python datumaro/ --help +python datum.py --help +``` + +``` python +import datumaro +``` + +## Testing + +It is expected that all Datumaro functionality is covered and checked by +unit tests. Tests are placed in `tests/` directory. + +To run tests use: + +``` bash +python -m unittest discover -s tests +``` + +If you're working inside CVAT environment, you can also use: + +``` bash +python manage.py test datumaro/ +``` + +## Design and code structure + +- [Design document](docs/design.md) +- [Developer guide](docs/developer_guide.md) \ No newline at end of file diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000000..ae9cf7104e --- /dev/null +++ b/LICENSE @@ -0,0 +1,22 @@ +MIT License + +Copyright (C) 2019-2020 Intel Corporation +  +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), +to deal in the Software without restriction, including without limitation +the rights to use, copy, modify, merge, publish, distribute, sublicense, +and/or sell copies of the Software, and to permit persons to whom +the Software is furnished to do so, subject to the following conditions: +  +The above copyright notice and this permission notice shall be included +in all copies or substantial portions of the Software. +  +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL +THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES +OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE +OR OTHER DEALINGS IN THE SOFTWARE. +  diff --git a/README.md b/README.md new file mode 100644 index 0000000000..2d83cc4df6 --- /dev/null +++ b/README.md @@ -0,0 +1,205 @@ +# Dataset Management Framework (Datumaro) + +A framework to build, transform, and analyze datasets. + + +``` +CVAT annotations -- ---> Annotation tool + \ / +COCO-like dataset -----> Datumaro ---> dataset ------> Model training + / \ +VOC-like dataset -- ---> Publication etc. +``` + + +## Contents + +- [Documentation](#documentation) +- [Features](#features) +- [Installation](#installation) +- [Usage](#usage) +- [Examples](#examples) +- [Contributing](#contributing) + +## Documentation + +- [User manual](docs/user_manual.md) +- [Design document](docs/design.md) +- [Contributing](CONTRIBUTING.md) + +## Features + +- Dataset format conversions: + - COCO (`image_info`, `instances`, `person_keypoints`, `captions`, `labels`*) + - [Format specification](http://cocodataset.org/#format-data) + - [Dataset example](tests/assets/coco_dataset) + - `labels` are our extension - like `instances` with only `category_id` + - PASCAL VOC (`classification`, `detection`, `segmentation` (class, instances), `action_classification`, `person_layout`) + - [Format specification](http://host.robots.ox.ac.uk/pascal/VOC/voc2012/htmldoc/index.html) + - [Dataset example](tests/assets/voc_dataset) + - YOLO (`bboxes`) + - [Format specification](https://github.com/AlexeyAB/darknet#how-to-train-pascal-voc-data) + - [Dataset example](tests/assets/yolo_dataset) + - TF Detection API (`bboxes`, `masks`) + - Format specifications: [bboxes](https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/using_your_own_dataset.md), [masks](https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/instance_segmentation.md) + - [Dataset example](tests/assets/tf_detection_api_dataset) + - MOT sequences + - [Format specification](https://arxiv.org/pdf/1906.04567.pdf) + - [Dataset example](tests/assets/mot_dataset) + - CVAT + - [Format specification](https://github.com/opencv/cvat/blob/develop/cvat/apps/documentation/xml_format.md) + - [Dataset example](tests/assets/cvat_dataset) + - LabelMe + - [Format specification](http://labelme.csail.mit.edu/Release3.0) + - [Dataset example](tests/assets/labelme_dataset) +- Dataset building operations: + - Merging multiple datasets into one + - Dataset filtering with custom conditions, for instance: + - remove polygons of a certain class + - remove images without a specific class + - remove `occluded` annotations from images + - keep only vertically-oriented images + - remove small area bounding boxes from annotations + - Annotation conversions, for instance: + - polygons to instance masks and vise-versa + - apply a custom colormap for mask annotations + - rename or remove dataset labels +- Dataset comparison +- Model integration: + - Inference (OpenVINO and custom models) + - Explainable AI ([RISE algorithm](https://arxiv.org/abs/1806.07421)) + +> Check the [design document](docs/design.md) for a full list of features + +## Installation + +Optionally, create a virtual environment: + +``` bash +python -m pip install virtualenv +python -m virtualenv venv +. venv/bin/activate +``` + +Install Datumaro package: + +``` bash +pip install 'git+https://github.com/opencv/cvat#egg=datumaro&subdirectory=datumaro' +``` + +## Usage + +There are several options available: +- [A standalone command-line tool](#standalone-tool) +- [A python module](#python-module) + +### Standalone tool + + +``` + User + | + v ++------------------+ +| CVAT | ++--------v---------+ +------------------+ +--------------+ +| Datumaro module | ----> | Datumaro project | <---> | Datumaro CLI | <--- User ++------------------+ +------------------+ +--------------+ +``` + + +``` bash +datum --help +python -m datumaro --help +``` + +### Python module + +Datumaro can be used in custom scripts as a library in the following way: + +``` python +from datumaro.components.project import Project # project-related things +import datumaro.components.extractor # annotations and high-level interfaces +# etc. +project = Project.load('directory') +``` + +## Examples + + + + +- Convert [PASCAL VOC](http://host.robots.ox.ac.uk/pascal/VOC/voc2012/index.html#data) to COCO, keep only images with `cat` class presented: + ```bash + # Download VOC dataset: + # http://host.robots.ox.ac.uk/pascal/VOC/voc2012/VOCtrainval_11-May-2012.tar + datum convert --input-format voc --input-path \ + --output-format coco --filter '/item[annotation/label="cat"]' + ``` + +- Convert only non-occluded annotations from a CVAT-annotated project to TFrecord: + ```bash + # export Datumaro dataset in CVAT UI, extract somewhere, go to the project dir + datum project extract --filter '/item/annotation[occluded="False"]' \ + --mode items+anno --output-dir not_occluded + datum project export --project not_occluded \ + --format tf_detection_api -- --save-images + ``` + +- Annotate COCO, extract image subset, re-annotate it in CVAT, update old dataset: + ```bash + # Download COCO dataset http://cocodataset.org/#download + # Put images to coco/images/ and annotations to coco/annotations/ + datum project import --format coco --input-path + datum project export --filter '/image[images_I_dont_like]' --format cvat \ + --output-dir reannotation + # import dataset and images to CVAT, re-annotate + # export Datumaro project, extract to 'reannotation-upd' + datum project project merge reannotation-upd + datum project export --format coco + ``` + +- Annotate instance polygons in CVAT, export as masks in COCO: + ```bash + datum convert --input-format cvat --input-path \ + --output-format coco -- --segmentation-mode masks + ``` + +- Apply an OpenVINO detection model to some COCO-like dataset, + then compare annotations with ground truth and visualize in TensorBoard: + ```bash + datum project import --format coco --input-path + # create model results interpretation script + datum model add mymodel openvino \ + --weights model.bin --description model.xml \ + --interpretation-script parse_results.py + datum model run --model mymodel --output-dir mymodel_inference/ + datum project diff mymodel_inference/ --format tensorboard --output-dir diff + ``` + +- Change colors in PASCAL VOC-like `.png` masks: + ```bash + datum project import --format voc --input-path + + # Create a color map file with desired colors: + # + # label : color_rgb : parts : actions + # cat:0,0,255:: + # dog:255,0,0:: + # + # Save as mycolormap.txt + + datum project export --format voc_segmentation -- --label-map mycolormap.txt + # add "--apply-colormap=0" to save grayscale (indexed) masks + # check "--help" option for more info + # use "datum --loglevel debug" for extra conversion info + ``` + + + + +## Contributing + +Feel free to [open an Issue](https://github.com/opencv/cvat/issues/new) if you +think something needs to be changed. You are welcome to participate in development, +development instructions are available in our [developer manual](CONTRIBUTING.md). diff --git a/datum.py b/datum.py new file mode 100755 index 0000000000..12c150bd16 --- /dev/null +++ b/datum.py @@ -0,0 +1,8 @@ +#!/usr/bin/env python +import sys + +from datumaro.cli.__main__ import main + + +if __name__ == '__main__': + sys.exit(main()) diff --git a/datumaro/__init__.py b/datumaro/__init__.py new file mode 100644 index 0000000000..eb864e52b5 --- /dev/null +++ b/datumaro/__init__.py @@ -0,0 +1,4 @@ + +# Copyright (C) 2019-2020 Intel Corporation +# +# SPDX-License-Identifier: MIT diff --git a/datumaro/__main__.py b/datumaro/__main__.py new file mode 100644 index 0000000000..be1cb09298 --- /dev/null +++ b/datumaro/__main__.py @@ -0,0 +1,12 @@ + +# Copyright (C) 2019-2020 Intel Corporation +# +# SPDX-License-Identifier: MIT + +import sys + +from datumaro.cli.__main__ import main + + +if __name__ == '__main__': + sys.exit(main()) diff --git a/datumaro/cli/__init__.py b/datumaro/cli/__init__.py new file mode 100644 index 0000000000..eb864e52b5 --- /dev/null +++ b/datumaro/cli/__init__.py @@ -0,0 +1,4 @@ + +# Copyright (C) 2019-2020 Intel Corporation +# +# SPDX-License-Identifier: MIT diff --git a/datumaro/cli/__main__.py b/datumaro/cli/__main__.py new file mode 100644 index 0000000000..80a8805f56 --- /dev/null +++ b/datumaro/cli/__main__.py @@ -0,0 +1,125 @@ + +# Copyright (C) 2019-2020 Intel Corporation +# +# SPDX-License-Identifier: MIT + +import argparse +import logging as log +import sys + +from . import contexts, commands +from .util import CliException, add_subparser +from ..version import VERSION + + +_log_levels = { + 'debug': log.DEBUG, + 'info': log.INFO, + 'warning': log.WARNING, + 'error': log.ERROR, + 'critical': log.CRITICAL +} + +def loglevel(name): + return _log_levels[name] + +class _LogManager: + @classmethod + def init_logger(cls, args=None): + # Define minimalistic parser only to obtain loglevel + parser = argparse.ArgumentParser(add_help=False) + cls._define_loglevel_option(parser) + args, _ = parser.parse_known_args(args) + + log.basicConfig(format='%(asctime)s %(levelname)s: %(message)s', + level=args.loglevel) + + @staticmethod + def _define_loglevel_option(parser): + parser.add_argument('--loglevel', type=loglevel, default='info', + help="Logging level (options: %s; default: %s)" % \ + (', '.join(_log_levels.keys()), "%(default)s")) + return parser + + +def _make_subcommands_help(commands, help_line_start=0): + desc = "" + for command_name, _, command_help in commands: + desc += (" %-" + str(max(0, help_line_start - 2 - 1)) + "s%s\n") % \ + (command_name, command_help) + return desc + +def make_parser(): + parser = argparse.ArgumentParser(prog="datumaro", + description="Dataset Framework", + formatter_class=argparse.RawDescriptionHelpFormatter) + + parser.add_argument('--version', action='version', version=VERSION) + _LogManager._define_loglevel_option(parser) + + known_contexts = [ + ('project', contexts.project, "Actions on projects (datasets)"), + ('source', contexts.source, "Actions on data sources"), + ('model', contexts.model, "Actions on models"), + ] + known_commands = [ + ('create', commands.create, "Create project"), + ('add', commands.add, "Add source to project"), + ('remove', commands.remove, "Remove source from project"), + ('export', commands.export, "Export project"), + ('explain', commands.explain, "Run Explainable AI algorithm for model"), + ('merge', commands.merge, "Merge datasets"), + ('convert', commands.convert, "Convert dataset"), + ] + + # Argparse doesn't support subparser groups: + # https://stackoverflow.com/questions/32017020/grouping-argparse-subparser-arguments + help_line_start = max((len(e[0]) for e in known_contexts + known_commands), + default=0) + help_line_start = max((2 + help_line_start) // 4 + 1, 6) * 4 # align to tabs + subcommands_desc = "" + if known_contexts: + subcommands_desc += "Contexts:\n" + subcommands_desc += _make_subcommands_help(known_contexts, + help_line_start) + if known_commands: + if subcommands_desc: + subcommands_desc += "\n" + subcommands_desc += "Commands:\n" + subcommands_desc += _make_subcommands_help(known_commands, + help_line_start) + if subcommands_desc: + subcommands_desc += \ + "\nRun '%s COMMAND --help' for more information on a command." % \ + parser.prog + + subcommands = parser.add_subparsers(title=subcommands_desc, + description="", help=argparse.SUPPRESS) + for command_name, command, _ in known_contexts + known_commands: + add_subparser(subcommands, command_name, command.build_parser) + + return parser + + +def main(args=None): + _LogManager.init_logger(args) + + parser = make_parser() + args = parser.parse_args(args) + + if 'command' not in args: + parser.print_help() + return 1 + + try: + return args.command(args) + except CliException as e: + log.error(e) + return 1 + except Exception as e: + log.error(e) + raise + + +if __name__ == '__main__': + sys.exit(main()) \ No newline at end of file diff --git a/datumaro/cli/commands/__init__.py b/datumaro/cli/commands/__init__.py new file mode 100644 index 0000000000..fe74bc2b09 --- /dev/null +++ b/datumaro/cli/commands/__init__.py @@ -0,0 +1,6 @@ + +# Copyright (C) 2019-2020 Intel Corporation +# +# SPDX-License-Identifier: MIT + +from . import add, create, explain, export, remove, merge, convert diff --git a/datumaro/cli/commands/add.py b/datumaro/cli/commands/add.py new file mode 100644 index 0000000000..288d7c047c --- /dev/null +++ b/datumaro/cli/commands/add.py @@ -0,0 +1,8 @@ + +# Copyright (C) 2019-2020 Intel Corporation +# +# SPDX-License-Identifier: MIT + +# pylint: disable=unused-import + +from ..contexts.source import build_add_parser as build_parser diff --git a/datumaro/cli/commands/convert.py b/datumaro/cli/commands/convert.py new file mode 100644 index 0000000000..6398bac73b --- /dev/null +++ b/datumaro/cli/commands/convert.py @@ -0,0 +1,137 @@ + +# Copyright (C) 2019-2020 Intel Corporation +# +# SPDX-License-Identifier: MIT + +import argparse +import logging as log +import os +import os.path as osp + +from datumaro.components.project import Environment + +from ..contexts.project import FilterModes +from ..util import CliException, MultilineFormatter, make_file_name +from ..util.project import generate_next_file_name + + +def build_parser(parser_ctor=argparse.ArgumentParser): + builtin_importers = sorted(Environment().importers.items) + builtin_converters = sorted(Environment().converters.items) + + parser = parser_ctor(help="Convert an existing dataset to another format", + description=""" + Converts a dataset from one format to another. + You can add your own formats using a project.|n + |n + Supported input formats: %s|n + |n + Supported output formats: %s|n + |n + Examples:|n + - Export a dataset as a PASCAL VOC dataset, include images:|n + |s|sconvert -i src/path -f voc -- --save-images|n + |n + - Export a dataset as a COCO dataset to a specific directory:|n + |s|sconvert -i src/path -f coco -o path/I/like/ + """ % (', '.join(builtin_importers), ', '.join(builtin_converters)), + formatter_class=MultilineFormatter) + + parser.add_argument('-i', '--input-path', default='.', dest='source', + help="Path to look for a dataset") + parser.add_argument('-if', '--input-format', + help="Input dataset format. Will try to detect, if not specified.") + parser.add_argument('-f', '--output-format', required=True, + help="Output format") + parser.add_argument('-o', '--output-dir', dest='dst_dir', + help="Directory to save output (default: a subdir in the current one)") + parser.add_argument('--overwrite', action='store_true', + help="Overwrite existing files in the save directory") + parser.add_argument('-e', '--filter', + help="Filter expression for dataset items") + parser.add_argument('--filter-mode', default=FilterModes.i.name, + type=FilterModes.parse, + help="Filter mode (options: %s; default: %s)" % \ + (', '.join(FilterModes.list_options()) , '%(default)s')) + parser.add_argument('extra_args', nargs=argparse.REMAINDER, + help="Additional arguments for output format (pass '-- -h' for help)") + parser.set_defaults(command=convert_command) + + return parser + +def convert_command(args): + env = Environment() + + try: + converter = env.converters.get(args.output_format) + except KeyError: + raise CliException("Converter for format '%s' is not found" % \ + args.output_format) + extra_args = converter.from_cmdline(args.extra_args) + def converter_proxy(extractor, save_dir): + return converter.convert(extractor, save_dir, **extra_args) + + filter_args = FilterModes.make_filter_args(args.filter_mode) + + if not args.input_format: + matches = [] + for format_name in env.importers.items: + log.debug("Checking '%s' format...", format_name) + importer = env.make_importer(format_name) + try: + match = importer.detect(args.source) + if match: + log.debug("format matched") + matches.append((format_name, importer)) + except NotImplementedError: + log.debug("Format '%s' does not support auto detection.", + format_name) + + if len(matches) == 0: + log.error("Failed to detect dataset format. " + "Try to specify format with '-if/--input-format' parameter.") + return 1 + elif len(matches) != 1: + log.error("Multiple formats match the dataset: %s. " + "Try to specify format with '-if/--input-format' parameter.", + ', '.join(m[0] for m in matches)) + return 2 + + format_name, importer = matches[0] + args.input_format = format_name + log.info("Source dataset format detected as '%s'", args.input_format) + else: + try: + importer = env.make_importer(args.input_format) + if hasattr(importer, 'from_cmdline'): + extra_args = importer.from_cmdline() + except KeyError: + raise CliException("Importer for format '%s' is not found" % \ + args.input_format) + + source = osp.abspath(args.source) + + dst_dir = args.dst_dir + if dst_dir: + if not args.overwrite and osp.isdir(dst_dir) and os.listdir(dst_dir): + raise CliException("Directory '%s' already exists " + "(pass --overwrite to overwrite)" % dst_dir) + else: + dst_dir = generate_next_file_name('%s-%s' % \ + (osp.basename(source), make_file_name(args.output_format))) + dst_dir = osp.abspath(dst_dir) + + project = importer(source) + dataset = project.make_dataset() + + log.info("Exporting the dataset") + dataset.export_project( + save_dir=dst_dir, + converter=converter_proxy, + filter_expr=args.filter, + **filter_args) + + log.info("Dataset exported to '%s' as '%s'" % \ + (dst_dir, args.output_format)) + + return 0 diff --git a/datumaro/cli/commands/create.py b/datumaro/cli/commands/create.py new file mode 100644 index 0000000000..97e3c9b4cf --- /dev/null +++ b/datumaro/cli/commands/create.py @@ -0,0 +1,8 @@ + +# Copyright (C) 2019-2020 Intel Corporation +# +# SPDX-License-Identifier: MIT + +# pylint: disable=unused-import + +from ..contexts.project import build_create_parser as build_parser \ No newline at end of file diff --git a/datumaro/cli/commands/explain.py b/datumaro/cli/commands/explain.py new file mode 100644 index 0000000000..4d5d16b2af --- /dev/null +++ b/datumaro/cli/commands/explain.py @@ -0,0 +1,183 @@ + +# Copyright (C) 2019-2020 Intel Corporation +# +# SPDX-License-Identifier: MIT + +import argparse +import logging as log +import os +import os.path as osp + +from datumaro.components.project import Project +from datumaro.util.command_targets import (TargetKinds, target_selector, + ProjectTarget, SourceTarget, ImageTarget, is_project_path) +from datumaro.util.image import load_image, save_image +from ..util import MultilineFormatter +from ..util.project import load_project + + +def build_parser(parser_ctor=argparse.ArgumentParser): + parser = parser_ctor(help="Run Explainable AI algorithm", + description="Runs an explainable AI algorithm for a model.") + + parser.add_argument('-m', '--model', required=True, + help="Model to use for inference") + parser.add_argument('-t', '--target', default=None, + help="Inference target - image, source, project " + "(default: current dir)") + parser.add_argument('-o', '--output-dir', dest='save_dir', default=None, + help="Directory to save output (default: display only)") + + method_sp = parser.add_subparsers(dest='algorithm') + + rise_parser = method_sp.add_parser('rise', + description=""" + RISE: Randomized Input Sampling for + Explanation of Black-box Models algorithm|n + |n + See explanations at: https://arxiv.org/pdf/1806.07421.pdf + """, + formatter_class=MultilineFormatter) + rise_parser.add_argument('-s', '--max-samples', default=None, type=int, + help="Number of algorithm iterations (default: mask size ^ 2)") + rise_parser.add_argument('--mw', '--mask-width', + dest='mask_width', default=7, type=int, + help="Mask width (default: %(default)s)") + rise_parser.add_argument('--mh', '--mask-height', + dest='mask_height', default=7, type=int, + help="Mask height (default: %(default)s)") + rise_parser.add_argument('--prob', default=0.5, type=float, + help="Mask pixel inclusion probability (default: %(default)s)") + rise_parser.add_argument('--iou', '--iou-thresh', + dest='iou_thresh', default=0.9, type=float, + help="IoU match threshold for detections (default: %(default)s)") + rise_parser.add_argument('--nms', '--nms-iou-thresh', + dest='nms_iou_thresh', default=0.0, type=float, + help="IoU match threshold in Non-maxima suppression (default: no NMS)") + rise_parser.add_argument('--conf', '--det-conf-thresh', + dest='det_conf_thresh', default=0.0, type=float, + help="Confidence threshold for detections (default: include all)") + rise_parser.add_argument('-b', '--batch-size', default=1, type=int, + help="Inference batch size (default: %(default)s)") + rise_parser.add_argument('--display', action='store_true', + help="Visualize results during computations") + + parser.add_argument('-p', '--project', dest='project_dir', default='.', + help="Directory of the project to operate on (default: current dir)") + parser.set_defaults(command=explain_command) + + return parser + +def explain_command(args): + project_path = args.project_dir + if is_project_path(project_path): + project = Project.load(project_path) + else: + project = None + args.target = target_selector( + ProjectTarget(is_default=True, project=project), + SourceTarget(project=project), + ImageTarget() + )(args.target) + if args.target[0] == TargetKinds.project: + if is_project_path(args.target[1]): + args.project_dir = osp.dirname(osp.abspath(args.target[1])) + + + import cv2 + from matplotlib import cm + + project = load_project(args.project_dir) + + model = project.make_executable_model(args.model) + + if str(args.algorithm).lower() != 'rise': + raise NotImplementedError() + + from datumaro.components.algorithms.rise import RISE + rise = RISE(model, + max_samples=args.max_samples, + mask_width=args.mask_width, + mask_height=args.mask_height, + prob=args.prob, + iou_thresh=args.iou_thresh, + nms_thresh=args.nms_iou_thresh, + det_conf_thresh=args.det_conf_thresh, + batch_size=args.batch_size) + + if args.target[0] == TargetKinds.image: + image_path = args.target[1] + image = load_image(image_path) + + log.info("Running inference explanation for '%s'" % image_path) + heatmap_iter = rise.apply(image, progressive=args.display) + + image = image / 255.0 + file_name = osp.splitext(osp.basename(image_path))[0] + if args.display: + for i, heatmaps in enumerate(heatmap_iter): + for j, heatmap in enumerate(heatmaps): + hm_painted = cm.jet(heatmap)[:, :, 2::-1] + disp = (image + hm_painted) / 2 + cv2.imshow('heatmap-%s' % j, hm_painted) + cv2.imshow(file_name + '-heatmap-%s' % j, disp) + cv2.waitKey(10) + print("Iter", i, "of", args.max_samples, end='\r') + else: + heatmaps = next(heatmap_iter) + + if args.save_dir is not None: + log.info("Saving inference heatmaps at '%s'" % args.save_dir) + os.makedirs(args.save_dir, exist_ok=True) + + for j, heatmap in enumerate(heatmaps): + save_path = osp.join(args.save_dir, + file_name + '-heatmap-%s.png' % j) + save_image(save_path, heatmap * 255.0) + else: + for j, heatmap in enumerate(heatmaps): + disp = (image + cm.jet(heatmap)[:, :, 2::-1]) / 2 + cv2.imshow(file_name + '-heatmap-%s' % j, disp) + cv2.waitKey(0) + elif args.target[0] == TargetKinds.source or \ + args.target[0] == TargetKinds.project: + if args.target[0] == TargetKinds.source: + source_name = args.target[1] + dataset = project.make_source_project(source_name).make_dataset() + log.info("Running inference explanation for '%s'" % source_name) + else: + project_name = project.config.project_name + dataset = project.make_dataset() + log.info("Running inference explanation for '%s'" % project_name) + + for item in dataset: + image = item.image.data + if image is None: + log.warn( + "Dataset item %s does not have image data. Skipping." % \ + (item.id)) + continue + + heatmap_iter = rise.apply(image) + + image = image / 255.0 + heatmaps = next(heatmap_iter) + + if args.save_dir is not None: + log.info("Saving inference heatmaps to '%s'" % args.save_dir) + os.makedirs(args.save_dir, exist_ok=True) + + for j, heatmap in enumerate(heatmaps): + save_image(osp.join(args.save_dir, + item.id + '-heatmap-%s.png' % j), + heatmap * 255.0, create_dir=True) + + if not args.save_dir or args.display: + for j, heatmap in enumerate(heatmaps): + disp = (image + cm.jet(heatmap)[:, :, 2::-1]) / 2 + cv2.imshow(item.id + '-heatmap-%s' % j, disp) + cv2.waitKey(0) + else: + raise NotImplementedError() + + return 0 diff --git a/datumaro/cli/commands/export.py b/datumaro/cli/commands/export.py new file mode 100644 index 0000000000..be47245d6b --- /dev/null +++ b/datumaro/cli/commands/export.py @@ -0,0 +1,8 @@ + +# Copyright (C) 2019-2020 Intel Corporation +# +# SPDX-License-Identifier: MIT + +# pylint: disable=unused-import + +from ..contexts.project import build_export_parser as build_parser \ No newline at end of file diff --git a/datumaro/cli/commands/merge.py b/datumaro/cli/commands/merge.py new file mode 100644 index 0000000000..2583cd8641 --- /dev/null +++ b/datumaro/cli/commands/merge.py @@ -0,0 +1,124 @@ + +# Copyright (C) 2020 Intel Corporation +# +# SPDX-License-Identifier: MIT + +import argparse +import json +import logging as log +import os.path as osp +from collections import OrderedDict + +from datumaro.components.project import Project +from datumaro.components.operations import (IntersectMerge, + QualityError, MergeError) + +from ..util import at_least, MultilineFormatter, CliException +from ..util.project import generate_next_file_name, load_project + + +def build_parser(parser_ctor=argparse.ArgumentParser): + parser = parser_ctor(help="Merge few projects", + description=""" + Merges multiple datasets into one. This can be useful if you + have few annotations and wish to merge them, + taking into consideration potential overlaps and conflicts. + This command can try to find a common ground by voting or + return a list of conflicts.|n + |n + Examples:|n + - Merge annotations from 3 (or more) annotators:|n + |s|smerge project1/ project2/ project3/|n + - Check groups of the merged dataset for consistence:|n + |s|s|slook for groups consising of 'person', 'hand' 'head', 'foot'|n + |s|smerge project1/ project2/ -g 'person,hand?,head,foot?' + """, + formatter_class=MultilineFormatter) + + def _group(s): + return s.split(',') + + parser.add_argument('project', nargs='+', action=at_least(2), + help="Path to a project (repeatable)") + parser.add_argument('-iou', '--iou-thresh', default=0.25, type=float, + help="IoU match threshold for segments (default: %(default)s)") + parser.add_argument('-oconf', '--output-conf-thresh', + default=0.0, type=float, + help="Confidence threshold for output " + "annotations (default: %(default)s)") + parser.add_argument('--quorum', default=0, type=int, + help="Minimum count for a label and attribute voting " + "results to be counted (default: %(default)s)") + parser.add_argument('-g', '--groups', action='append', type=_group, + default=[], + help="A comma-separated list of labels in " + "annotation groups to check. '?' postfix can be added to a label to" + "make it optional in the group (repeatable)") + parser.add_argument('-o', '--output-dir', dest='dst_dir', default=None, + help="Output directory (default: current project's dir)") + parser.add_argument('--overwrite', action='store_true', + help="Overwrite existing files in the save directory") + parser.set_defaults(command=merge_command) + + return parser + +def merge_command(args): + source_projects = [load_project(p) for p in args.project] + + dst_dir = args.dst_dir + if dst_dir: + if not args.overwrite and osp.isdir(dst_dir) and os.listdir(dst_dir): + raise CliException("Directory '%s' already exists " + "(pass --overwrite to overwrite)" % dst_dir) + else: + dst_dir = generate_next_file_name('merged') + + source_datasets = [] + for p in source_projects: + log.debug("Loading project '%s' dataset", p.config.project_name) + source_datasets.append(p.make_dataset()) + + merger = IntersectMerge(conf=IntersectMerge.Conf( + pairwise_dist=args.iou_thresh, groups=args.groups, + output_conf_thresh=args.output_conf_thresh, quorum=args.quorum + )) + merged_dataset = merger(source_datasets) + + merged_project = Project() + output_dataset = merged_project.make_dataset() + output_dataset.define_categories(merged_dataset.categories()) + merged_dataset = output_dataset.update(merged_dataset) + merged_dataset.save(save_dir=dst_dir) + + report_path = osp.join(dst_dir, 'merge_report.json') + save_merge_report(merger, report_path) + + dst_dir = osp.abspath(dst_dir) + log.info("Merge results have been saved to '%s'" % dst_dir) + log.info("Report has been saved to '%s'" % report_path) + + return 0 + +def save_merge_report(merger, path): + item_errors = OrderedDict() + source_errors = OrderedDict() + all_errors = [] + + for e in merger.errors: + if isinstance(e, QualityError): + item_errors[str(e.item_id)] = item_errors.get(str(e.item_id), 0) + 1 + elif isinstance(e, MergeError): + for s in e.sources: + source_errors[s] = source_errors.get(s, 0) + 1 + item_errors[str(e.item_id)] = item_errors.get(str(e.item_id), 0) + 1 + + all_errors.append(str(e)) + + errors = OrderedDict([ + ('Item errors', item_errors), + ('Source errors', source_errors), + ('All errors', all_errors), + ]) + + with open(path, 'w') as f: + json.dump(errors, f, indent=4) \ No newline at end of file diff --git a/datumaro/cli/commands/remove.py b/datumaro/cli/commands/remove.py new file mode 100644 index 0000000000..7b9c0d3a2f --- /dev/null +++ b/datumaro/cli/commands/remove.py @@ -0,0 +1,8 @@ + +# Copyright (C) 2019-2020 Intel Corporation +# +# SPDX-License-Identifier: MIT + +# pylint: disable=unused-import + +from ..contexts.source import build_remove_parser as build_parser \ No newline at end of file diff --git a/datumaro/cli/contexts/__init__.py b/datumaro/cli/contexts/__init__.py new file mode 100644 index 0000000000..433efe9b86 --- /dev/null +++ b/datumaro/cli/contexts/__init__.py @@ -0,0 +1,6 @@ + +# Copyright (C) 2019-2020 Intel Corporation +# +# SPDX-License-Identifier: MIT + +from . import project, source, model, item \ No newline at end of file diff --git a/datumaro/cli/contexts/item/__init__.py b/datumaro/cli/contexts/item/__init__.py new file mode 100644 index 0000000000..8f74826d90 --- /dev/null +++ b/datumaro/cli/contexts/item/__init__.py @@ -0,0 +1,36 @@ + +# Copyright (C) 2019-2020 Intel Corporation +# +# SPDX-License-Identifier: MIT + +import argparse + +from ...util import add_subparser + + +def build_export_parser(parser_ctor=argparse.ArgumentParser): + parser = parser_ctor() + return parser + +def build_stats_parser(parser_ctor=argparse.ArgumentParser): + parser = parser_ctor() + return parser + +def build_diff_parser(parser_ctor=argparse.ArgumentParser): + parser = parser_ctor() + return parser + +def build_edit_parser(parser_ctor=argparse.ArgumentParser): + parser = parser_ctor() + return parser + +def build_parser(parser_ctor=argparse.ArgumentParser): + parser = parser_ctor() + + subparsers = parser.add_subparsers() + add_subparser(subparsers, 'export', build_export_parser) + add_subparser(subparsers, 'stats', build_stats_parser) + add_subparser(subparsers, 'diff', build_diff_parser) + add_subparser(subparsers, 'edit', build_edit_parser) + + return parser diff --git a/datumaro/cli/contexts/model/__init__.py b/datumaro/cli/contexts/model/__init__.py new file mode 100644 index 0000000000..69b7da1eae --- /dev/null +++ b/datumaro/cli/contexts/model/__init__.py @@ -0,0 +1,183 @@ + +# Copyright (C) 2019-2020 Intel Corporation +# +# SPDX-License-Identifier: MIT + +import argparse +import logging as log +import os +import os.path as osp +import re + +from datumaro.components.config import DEFAULT_FORMAT +from datumaro.components.project import Environment + +from ...util import CliException, MultilineFormatter, add_subparser +from ...util.project import load_project, \ + generate_next_name, generate_next_file_name + + +def build_add_parser(parser_ctor=argparse.ArgumentParser): + builtins = sorted(Environment().launchers.items) + + parser = parser_ctor(help="Add model to project", + description=""" + Registers an executable model into a project. A model requires + a launcher to be executed. Each launcher has its own options, which + are passed after '--' separator, pass '-- -h' for more info. + |n + List of builtin launchers: %s + """ % ', '.join(builtins), + formatter_class=MultilineFormatter) + + parser.add_argument('-l', '--launcher', required=True, + help="Model launcher") + parser.add_argument('extra_args', nargs=argparse.REMAINDER, default=None, + help="Additional arguments for converter (pass '-- -h' for help)") + parser.add_argument('--copy', action='store_true', + help="Copy the model to the project") + parser.add_argument('-n', '--name', default=None, + help="Name of the model to be added (default: generate automatically)") + parser.add_argument('--overwrite', action='store_true', + help="Overwrite if exists") + parser.add_argument('-p', '--project', dest='project_dir', default='.', + help="Directory of the project to operate on (default: current dir)") + parser.set_defaults(command=add_command) + + return parser + +def add_command(args): + project = load_project(args.project_dir) + + if args.name: + if not args.overwrite and args.name in project.config.models: + raise CliException("Model '%s' already exists " + "(pass --overwrite to overwrite)" % args.name) + else: + args.name = generate_next_name( + project.config.models, 'model', '-', default=0) + assert args.name not in project.config.models, args.name + + try: + launcher = project.env.launchers.get(args.launcher) + except KeyError: + raise CliException("Launcher '%s' is not found" % args.launcher) + + cli_plugin = getattr(launcher, 'cli_plugin', launcher) + model_args = cli_plugin.from_cmdline(args.extra_args) + + if args.copy: + log.info("Copying model data") + + model_dir = project.local_model_dir(args.name) + os.makedirs(model_dir, exist_ok=False) + + try: + cli_plugin.copy_model(model_dir, model_args) + except (AttributeError, NotImplementedError): + log.error("Can't copy: copying is not available for '%s' models" % \ + args.launcher) + + log.info("Checking the model") + project.add_model(args.name, { + 'launcher': args.launcher, + 'options': model_args, + }) + project.make_executable_model(args.name) + + project.save() + + log.info("Model '%s' with launcher '%s' has been added to project '%s'" % \ + (args.name, args.launcher, project.config.project_name)) + + return 0 + +def build_remove_parser(parser_ctor=argparse.ArgumentParser): + parser = parser_ctor() + + parser.add_argument('name', + help="Name of the model to be removed") + parser.add_argument('-p', '--project', dest='project_dir', default='.', + help="Directory of the project to operate on (default: current dir)") + parser.set_defaults(command=remove_command) + + return parser + +def remove_command(args): + project = load_project(args.project_dir) + + project.remove_model(args.name) + project.save() + + return 0 + +def build_run_parser(parser_ctor=argparse.ArgumentParser): + parser = parser_ctor() + + parser.add_argument('-o', '--output-dir', dest='dst_dir', + help="Directory to save output") + parser.add_argument('-m', '--model', dest='model_name', required=True, + help="Model to apply to the project") + parser.add_argument('-p', '--project', dest='project_dir', default='.', + help="Directory of the project to operate on (default: current dir)") + parser.add_argument('--overwrite', action='store_true', + help="Overwrite if exists") + parser.set_defaults(command=run_command) + + return parser + +def run_command(args): + project = load_project(args.project_dir) + + dst_dir = args.dst_dir + if dst_dir: + if not args.overwrite and osp.isdir(dst_dir) and os.listdir(dst_dir): + raise CliException("Directory '%s' already exists " + "(pass --overwrite overwrite)" % dst_dir) + else: + dst_dir = generate_next_file_name('%s-inference' % \ + project.config.project_name) + + project.make_dataset().apply_model( + save_dir=osp.abspath(dst_dir), + model=args.model_name) + + log.info("Inference results have been saved to '%s'" % dst_dir) + + return 0 + +def build_info_parser(parser_ctor=argparse.ArgumentParser): + parser = parser_ctor() + + parser.add_argument('-n', '--name', + help="Model name") + parser.add_argument('-v', '--verbose', action='store_true', + help="Show details") + parser.add_argument('-p', '--project', dest='project_dir', default='.', + help="Directory of the project to operate on (default: current dir)") + parser.set_defaults(command=info_command) + + return parser + +def info_command(args): + project = load_project(args.project_dir) + + if args.name: + model = project.get_model(args.name) + print(model) + else: + for name, conf in project.config.models.items(): + print(name) + if args.verbose: + print(dict(conf)) + +def build_parser(parser_ctor=argparse.ArgumentParser): + parser = parser_ctor() + + subparsers = parser.add_subparsers() + add_subparser(subparsers, 'add', build_add_parser) + add_subparser(subparsers, 'remove', build_remove_parser) + add_subparser(subparsers, 'run', build_run_parser) + add_subparser(subparsers, 'info', build_info_parser) + + return parser diff --git a/datumaro/cli/contexts/project/__init__.py b/datumaro/cli/contexts/project/__init__.py new file mode 100644 index 0000000000..bab5da6fb5 --- /dev/null +++ b/datumaro/cli/contexts/project/__init__.py @@ -0,0 +1,826 @@ + +# Copyright (C) 2019-2020 Intel Corporation +# +# SPDX-License-Identifier: MIT + +import argparse +import json +import logging as log +import os +import os.path as osp +import shutil +from enum import Enum + +from datumaro.components.cli_plugin import CliPlugin +from datumaro.components.dataset_filter import DatasetItemEncoder +from datumaro.components.extractor import AnnotationType +from datumaro.components.operations import (DistanceComparator, + ExactComparator, compute_ann_statistics, compute_image_statistics, mean_std) +from datumaro.components.project import \ + PROJECT_DEFAULT_CONFIG as DEFAULT_CONFIG +from datumaro.components.project import Environment, Project + +from ...util import (CliException, MultilineFormatter, add_subparser, + make_file_name) +from ...util.project import generate_next_file_name, load_project +from .diff import DiffVisualizer + + +def build_create_parser(parser_ctor=argparse.ArgumentParser): + parser = parser_ctor(help="Create empty project", + description=""" + Create a new empty project.|n + |n + Examples:|n + - Create a project in the current directory:|n + |s|screate -n myproject|n + |n + - Create a project in other directory:|n + |s|screate -o path/I/like/ + """, + formatter_class=MultilineFormatter) + + parser.add_argument('-o', '--output-dir', default='.', dest='dst_dir', + help="Save directory for the new project (default: current dir") + parser.add_argument('-n', '--name', default=None, + help="Name of the new project (default: same as project dir)") + parser.add_argument('--overwrite', action='store_true', + help="Overwrite existing files in the save directory") + parser.set_defaults(command=create_command) + + return parser + +def create_command(args): + project_dir = osp.abspath(args.dst_dir) + + project_env_dir = osp.join(project_dir, DEFAULT_CONFIG.env_dir) + if osp.isdir(project_env_dir) and os.listdir(project_env_dir): + if not args.overwrite: + raise CliException("Directory '%s' already exists " + "(pass --overwrite to overwrite)" % project_env_dir) + else: + shutil.rmtree(project_env_dir, ignore_errors=True) + + own_dataset_dir = osp.join(project_dir, DEFAULT_CONFIG.dataset_dir) + if osp.isdir(own_dataset_dir) and os.listdir(own_dataset_dir): + if not args.overwrite: + raise CliException("Directory '%s' already exists " + "(pass --overwrite to overwrite)" % own_dataset_dir) + else: + # NOTE: remove the dir to avoid using data from previous project + shutil.rmtree(own_dataset_dir) + + project_name = args.name + if project_name is None: + project_name = osp.basename(project_dir) + + log.info("Creating project at '%s'" % project_dir) + + Project.generate(project_dir, { + 'project_name': project_name, + }) + + log.info("Project has been created at '%s'" % project_dir) + + return 0 + +def build_import_parser(parser_ctor=argparse.ArgumentParser): + builtins = sorted(Environment().importers.items) + + parser = parser_ctor(help="Create project from existing dataset", + description=""" + Creates a project from an existing dataset. The source can be:|n + - a dataset in a supported format (check 'formats' section below)|n + - a Datumaro project|n + |n + Formats:|n + Datasets come in a wide variety of formats. Each dataset + format defines its own data structure and rules on how to + interpret the data. For example, the following data structure + is used in COCO format:|n + /dataset/|n + - /images/.jpg|n + - /annotations/|n + |n + In Datumaro dataset formats are supported by + Extractor-s and Importer-s. + An Extractor produces a list of dataset items corresponding + to the dataset. An Importer creates a project from the + data source location. + It is possible to add a custom Extractor and Importer. + To do this, you need to put an Extractor and + Importer implementation scripts to + /.datumaro/extractors + and /.datumaro/importers.|n + |n + List of builtin dataset formats: %s|n + |n + Examples:|n + - Create a project from VOC dataset in the current directory:|n + |s|simport -f voc -i path/to/voc|n + |n + - Create a project from COCO dataset in other directory:|n + |s|simport -f coco -i path/to/coco -o path/I/like/ + """ % ', '.join(builtins), + formatter_class=MultilineFormatter) + + parser.add_argument('-o', '--output-dir', default='.', dest='dst_dir', + help="Directory to save the new project to (default: current dir)") + parser.add_argument('-n', '--name', default=None, + help="Name of the new project (default: same as project dir)") + parser.add_argument('--copy', action='store_true', + help="Copy the dataset instead of saving source links") + parser.add_argument('--skip-check', action='store_true', + help="Skip source checking") + parser.add_argument('--overwrite', action='store_true', + help="Overwrite existing files in the save directory") + parser.add_argument('-i', '--input-path', required=True, dest='source', + help="Path to import project from") + parser.add_argument('-f', '--format', + help="Source project format. Will try to detect, if not specified.") + parser.add_argument('extra_args', nargs=argparse.REMAINDER, + help="Additional arguments for importer (pass '-- -h' for help)") + parser.set_defaults(command=import_command) + + return parser + +def import_command(args): + project_dir = osp.abspath(args.dst_dir) + + project_env_dir = osp.join(project_dir, DEFAULT_CONFIG.env_dir) + if osp.isdir(project_env_dir) and os.listdir(project_env_dir): + if not args.overwrite: + raise CliException("Directory '%s' already exists " + "(pass --overwrite to overwrite)" % project_env_dir) + else: + shutil.rmtree(project_env_dir, ignore_errors=True) + + own_dataset_dir = osp.join(project_dir, DEFAULT_CONFIG.dataset_dir) + if osp.isdir(own_dataset_dir) and os.listdir(own_dataset_dir): + if not args.overwrite: + raise CliException("Directory '%s' already exists " + "(pass --overwrite to overwrite)" % own_dataset_dir) + else: + # NOTE: remove the dir to avoid using data from previous project + shutil.rmtree(own_dataset_dir) + + project_name = args.name + if project_name is None: + project_name = osp.basename(project_dir) + + env = Environment() + log.info("Importing project from '%s'" % args.source) + + extra_args = {} + if not args.format: + if args.extra_args: + raise CliException("Extra args can not be used without format") + + log.info("Trying to detect dataset format...") + + matches = [] + for format_name in env.importers.items: + log.debug("Checking '%s' format...", format_name) + importer = env.make_importer(format_name) + try: + match = importer.detect(args.source) + if match: + log.debug("format matched") + matches.append((format_name, importer)) + except NotImplementedError: + log.debug("Format '%s' does not support auto detection.", + format_name) + + if len(matches) == 0: + log.error("Failed to detect dataset format automatically. " + "Try to specify format with '-f/--format' parameter.") + return 1 + elif len(matches) != 1: + log.error("Multiple formats match the dataset: %s. " + "Try to specify format with '-f/--format' parameter.", + ', '.join(m[0] for m in matches)) + return 2 + + format_name, importer = matches[0] + args.format = format_name + else: + try: + importer = env.make_importer(args.format) + if hasattr(importer, 'from_cmdline'): + extra_args = importer.from_cmdline(args.extra_args) + except KeyError: + raise CliException("Importer for format '%s' is not found" % \ + args.format) + + log.info("Importing project as '%s'" % args.format) + + source = osp.abspath(args.source) + project = importer(source, **extra_args) + project.config.project_name = project_name + project.config.project_dir = project_dir + + if not args.skip_check or args.copy: + log.info("Checking the dataset...") + dataset = project.make_dataset() + if args.copy: + log.info("Cloning data...") + dataset.save(merge=True, save_images=True) + else: + project.save() + + log.info("Project has been created at '%s'" % project_dir) + + return 0 + + +class FilterModes(Enum): + # primary + items = 1 + annotations = 2 + items_annotations = 3 + + # shortcuts + i = 1 + a = 2 + i_a = 3 + a_i = 3 + annotations_items = 3 + + @staticmethod + def parse(s): + s = s.lower() + s = s.replace('+', '_') + return FilterModes[s] + + @classmethod + def make_filter_args(cls, mode): + if mode == cls.items: + return {} + elif mode == cls.annotations: + return { + 'filter_annotations': True + } + elif mode == cls.items_annotations: + return { + 'filter_annotations': True, + 'remove_empty': True, + } + else: + raise NotImplementedError() + + @classmethod + def list_options(cls): + return [m.name.replace('_', '+') for m in cls] + +def build_export_parser(parser_ctor=argparse.ArgumentParser): + builtins = sorted(Environment().converters.items) + + parser = parser_ctor(help="Export project", + description=""" + Exports the project dataset in some format. Optionally, a filter + can be passed, check 'filter' command description for more info. + Each dataset format has its own options, which + are passed after '--' separator (see examples), pass '-- -h' + for more info. If not stated otherwise, by default + only annotations are exported, to include images pass + '--save-images' parameter.|n + |n + Formats:|n + In Datumaro dataset formats are supported by Converter-s. + A Converter produces a dataset of a specific format + from dataset items. It is possible to add a custom Converter. + To do this, you need to put a Converter + definition script to /.datumaro/converters.|n + |n + List of builtin dataset formats: %s|n + |n + Examples:|n + - Export project as a VOC-like dataset, include images:|n + |s|sexport -f voc -- --save-images|n + |n + - Export project as a COCO-like dataset in other directory:|n + |s|sexport -f coco -o path/I/like/ + """ % ', '.join(builtins), + formatter_class=MultilineFormatter) + + parser.add_argument('-e', '--filter', default=None, + help="Filter expression for dataset items") + parser.add_argument('--filter-mode', default=FilterModes.i.name, + type=FilterModes.parse, + help="Filter mode (options: %s; default: %s)" % \ + (', '.join(FilterModes.list_options()) , '%(default)s')) + parser.add_argument('-o', '--output-dir', dest='dst_dir', default=None, + help="Directory to save output (default: a subdir in the current one)") + parser.add_argument('--overwrite', action='store_true', + help="Overwrite existing files in the save directory") + parser.add_argument('-p', '--project', dest='project_dir', default='.', + help="Directory of the project to operate on (default: current dir)") + parser.add_argument('-f', '--format', required=True, + help="Output format") + parser.add_argument('extra_args', nargs=argparse.REMAINDER, default=None, + help="Additional arguments for converter (pass '-- -h' for help)") + parser.set_defaults(command=export_command) + + return parser + +def export_command(args): + project = load_project(args.project_dir) + + dst_dir = args.dst_dir + if dst_dir: + if not args.overwrite and osp.isdir(dst_dir) and os.listdir(dst_dir): + raise CliException("Directory '%s' already exists " + "(pass --overwrite to overwrite)" % dst_dir) + else: + dst_dir = generate_next_file_name('%s-%s' % \ + (project.config.project_name, make_file_name(args.format))) + dst_dir = osp.abspath(dst_dir) + + try: + converter = project.env.converters.get(args.format) + except KeyError: + raise CliException("Converter for format '%s' is not found" % \ + args.format) + + extra_args = converter.from_cmdline(args.extra_args) + def converter_proxy(extractor, save_dir): + return converter.convert(extractor, save_dir, **extra_args) + + filter_args = FilterModes.make_filter_args(args.filter_mode) + + log.info("Loading the project...") + dataset = project.make_dataset() + + log.info("Exporting the project...") + dataset.export_project( + save_dir=dst_dir, + converter=converter_proxy, + filter_expr=args.filter, + **filter_args) + log.info("Project exported to '%s' as '%s'" % \ + (dst_dir, args.format)) + + return 0 + +def build_filter_parser(parser_ctor=argparse.ArgumentParser): + parser = parser_ctor(help="Extract subproject", + description=""" + Extracts a subproject that contains only items matching filter. + A filter is an XPath expression, which is applied to XML + representation of a dataset item. Check '--dry-run' parameter + to see XML representations of the dataset items.|n + |n + To filter annotations use the mode ('-m') parameter.|n + Supported modes:|n + - 'i', 'items'|n + - 'a', 'annotations'|n + - 'i+a', 'a+i', 'items+annotations', 'annotations+items'|n + When filtering annotations, use the 'items+annotations' + mode to point that annotation-less dataset items should be + removed. To select an annotation, write an XPath that + returns 'annotation' elements (see examples).|n + |n + Examples:|n + - Filter images with width < height:|n + |s|sextract -e '/item[image/width < image/height]'|n + |n + - Filter images with large-area bboxes:|n + |s|sextract -e '/item[annotation/type="bbox" and + annotation/area>2000]'|n + |n + - Filter out all irrelevant annotations from items:|n + |s|sextract -m a -e '/item/annotation[label = "person"]'|n + |n + - Filter out all irrelevant annotations from items:|n + |s|sextract -m a -e '/item/annotation[label="cat" and + area > 99.5]'|n + |n + - Filter occluded annotations and items, if no annotations left:|n + |s|sextract -m i+a -e '/item/annotation[occluded="True"]' + """, + formatter_class=MultilineFormatter) + + parser.add_argument('-e', '--filter', default=None, + help="XML XPath filter expression for dataset items") + parser.add_argument('-m', '--mode', default=FilterModes.i.name, + type=FilterModes.parse, + help="Filter mode (options: %s; default: %s)" % \ + (', '.join(FilterModes.list_options()) , '%(default)s')) + parser.add_argument('--dry-run', action='store_true', + help="Print XML representations to be filtered and exit") + parser.add_argument('-o', '--output-dir', dest='dst_dir', default=None, + help="Output directory (default: update current project)") + parser.add_argument('--overwrite', action='store_true', + help="Overwrite existing files in the save directory") + parser.add_argument('-p', '--project', dest='project_dir', default='.', + help="Directory of the project to operate on (default: current dir)") + parser.set_defaults(command=filter_command) + + return parser + +def filter_command(args): + project = load_project(args.project_dir) + + if not args.dry_run: + dst_dir = args.dst_dir + if dst_dir: + if not args.overwrite and osp.isdir(dst_dir) and os.listdir(dst_dir): + raise CliException("Directory '%s' already exists " + "(pass --overwrite to overwrite)" % dst_dir) + else: + dst_dir = generate_next_file_name('%s-filter' % \ + project.config.project_name) + dst_dir = osp.abspath(dst_dir) + + dataset = project.make_dataset() + + filter_args = FilterModes.make_filter_args(args.mode) + + if args.dry_run: + dataset = dataset.filter(expr=args.filter, **filter_args) + for item in dataset: + encoded_item = DatasetItemEncoder.encode(item, dataset.categories()) + xml_item = DatasetItemEncoder.to_string(encoded_item) + print(xml_item) + return 0 + + if not args.filter: + raise CliException("Expected a filter expression ('-e' argument)") + + dataset.filter_project(save_dir=dst_dir, expr=args.filter, **filter_args) + + log.info("Subproject has been extracted to '%s'" % dst_dir) + + return 0 + +def build_merge_parser(parser_ctor=argparse.ArgumentParser): + parser = parser_ctor(help="Merge two projects", + description=""" + Updates items of the current project with items + from other project.|n + |n + Examples:|n + - Update a project with items from other project:|n + |s|smerge -p path/to/first/project path/to/other/project + """, + formatter_class=MultilineFormatter) + + parser.add_argument('other_project_dir', + help="Path to a project") + parser.add_argument('-o', '--output-dir', dest='dst_dir', default=None, + help="Output directory (default: current project's dir)") + parser.add_argument('--overwrite', action='store_true', + help="Overwrite existing files in the save directory") + parser.add_argument('-p', '--project', dest='project_dir', default='.', + help="Directory of the project to operate on (default: current dir)") + parser.set_defaults(command=merge_command) + + return parser + +def merge_command(args): + first_project = load_project(args.project_dir) + second_project = load_project(args.other_project_dir) + + dst_dir = args.dst_dir + if dst_dir: + if not args.overwrite and osp.isdir(dst_dir) and os.listdir(dst_dir): + raise CliException("Directory '%s' already exists " + "(pass --overwrite to overwrite)" % dst_dir) + + first_dataset = first_project.make_dataset() + second_dataset = second_project.make_dataset() + + first_dataset.update(second_dataset) + first_dataset.save(save_dir=dst_dir) + + if dst_dir is None: + dst_dir = first_project.config.project_dir + dst_dir = osp.abspath(dst_dir) + log.info("Merge results have been saved to '%s'" % dst_dir) + + return 0 + +def build_diff_parser(parser_ctor=argparse.ArgumentParser): + parser = parser_ctor(help="Compare projects", + description=""" + Compares two projects, match annotations by distance.|n + |n + Examples:|n + - Compare two projects, match boxes if IoU > 0.7,|n + |s|s|s|sprint results to Tensorboard: + |s|sdiff path/to/other/project -o diff/ -v tensorboard --iou-thresh 0.7 + """, + formatter_class=MultilineFormatter) + + parser.add_argument('other_project_dir', + help="Directory of the second project to be compared") + parser.add_argument('-o', '--output-dir', dest='dst_dir', default=None, + help="Directory to save comparison results (default: do not save)") + parser.add_argument('-v', '--visualizer', + default=DiffVisualizer.DEFAULT_FORMAT, + choices=[f.name for f in DiffVisualizer.Format], + help="Output format (default: %(default)s)") + parser.add_argument('--iou-thresh', default=0.5, type=float, + help="IoU match threshold for detections (default: %(default)s)") + parser.add_argument('--conf-thresh', default=0.5, type=float, + help="Confidence threshold for detections (default: %(default)s)") + parser.add_argument('--overwrite', action='store_true', + help="Overwrite existing files in the save directory") + parser.add_argument('-p', '--project', dest='project_dir', default='.', + help="Directory of the first project to be compared (default: current dir)") + parser.set_defaults(command=diff_command) + + return parser + +def diff_command(args): + first_project = load_project(args.project_dir) + second_project = load_project(args.other_project_dir) + + comparator = DistanceComparator(iou_threshold=args.iou_thresh) + + dst_dir = args.dst_dir + if dst_dir: + if not args.overwrite and osp.isdir(dst_dir) and os.listdir(dst_dir): + raise CliException("Directory '%s' already exists " + "(pass --overwrite to overwrite)" % dst_dir) + else: + dst_dir = generate_next_file_name('%s-%s-diff' % ( + first_project.config.project_name, + second_project.config.project_name) + ) + dst_dir = osp.abspath(dst_dir) + log.info("Saving diff to '%s'" % dst_dir) + + dst_dir_existed = osp.exists(dst_dir) + try: + visualizer = DiffVisualizer(save_dir=dst_dir, comparator=comparator, + output_format=args.visualizer) + visualizer.save_dataset_diff( + first_project.make_dataset(), + second_project.make_dataset()) + except BaseException: + if not dst_dir_existed and osp.isdir(dst_dir): + shutil.rmtree(dst_dir, ignore_errors=True) + raise + + return 0 + +def build_ediff_parser(parser_ctor=argparse.ArgumentParser): + parser = parser_ctor(help="Compare projects for equality", + description=""" + Compares two projects for equality.|n + |n + Examples:|n + - Compare two projects, exclude annotation group |n + |s|s|sand the 'is_crowd' attribute from comparison:|n + |s|sediff other/project/ -if group -ia is_crowd + """, + formatter_class=MultilineFormatter) + + parser.add_argument('other_project_dir', + help="Directory of the second project to be compared") + parser.add_argument('-iia', '--ignore-item-attr', action='append', + help="Ignore item attribute (repeatable)") + parser.add_argument('-ia', '--ignore-attr', action='append', + help="Ignore annotation attribute (repeatable)") + parser.add_argument('-if', '--ignore-field', + action='append', default=['id', 'group'], + help="Ignore annotation field (repeatable, default: %(default)s)") + parser.add_argument('--match-images', action='store_true', + help='Match dataset items by images instead of ids') + parser.add_argument('--all', action='store_true', + help="Include matches in the output") + parser.add_argument('-p', '--project', dest='project_dir', default='.', + help="Directory of the first project to be compared (default: current dir)") + parser.set_defaults(command=ediff_command) + + return parser + +def ediff_command(args): + first_project = load_project(args.project_dir) + second_project = load_project(args.other_project_dir) + + comparator = ExactComparator( + match_images=args.match_images, + ignored_fields=args.ignore_field, + ignored_attrs=args.ignore_attr, + ignored_item_attrs=args.ignore_item_attr) + matches, mismatches, a_extra, b_extra, errors = \ + comparator.compare_datasets( + first_project.make_dataset(), second_project.make_dataset()) + output = { + "mismatches": mismatches, + "a_extra_items": sorted(a_extra), + "b_extra_items": sorted(b_extra), + "errors": errors, + } + if args.all: + output["matches"] = matches + + output_file = generate_next_file_name('diff', ext='.json') + with open(output_file, 'w') as f: + json.dump(output, f, indent=4, sort_keys=True) + + print("Found:") + print("The first project has %s unmatched items" % len(a_extra)) + print("The second project has %s unmatched items" % len(b_extra)) + print("%s item conflicts" % len(errors)) + print("%s matching annotations" % len(matches)) + print("%s mismatching annotations" % len(mismatches)) + + log.info("Output has been saved to '%s'" % output_file) + + return 0 + +def build_transform_parser(parser_ctor=argparse.ArgumentParser): + builtins = sorted(Environment().transforms.items) + + parser = parser_ctor(help="Transform project", + description=""" + Applies some operation to dataset items in the project + and produces a new project.|n + |n + Builtin transforms: %s|n + |n + Examples:|n + - Convert instance polygons to masks:|n + |s|stransform -t polygons_to_masks + """ % ', '.join(builtins), + formatter_class=MultilineFormatter) + + parser.add_argument('-t', '--transform', required=True, + help="Transform to apply to the project") + parser.add_argument('-o', '--output-dir', dest='dst_dir', default=None, + help="Directory to save output (default: current dir)") + parser.add_argument('--overwrite', action='store_true', + help="Overwrite existing files in the save directory") + parser.add_argument('-p', '--project', dest='project_dir', default='.', + help="Directory of the project to operate on (default: current dir)") + parser.add_argument('extra_args', nargs=argparse.REMAINDER, default=None, + help="Additional arguments for transformation (pass '-- -h' for help)") + parser.set_defaults(command=transform_command) + + return parser + +def transform_command(args): + project = load_project(args.project_dir) + + dst_dir = args.dst_dir + if dst_dir: + if not args.overwrite and osp.isdir(dst_dir) and os.listdir(dst_dir): + raise CliException("Directory '%s' already exists " + "(pass --overwrite to overwrite)" % dst_dir) + else: + dst_dir = generate_next_file_name('%s-%s' % \ + (project.config.project_name, make_file_name(args.transform))) + dst_dir = osp.abspath(dst_dir) + + try: + transform = project.env.transforms.get(args.transform) + except KeyError: + raise CliException("Transform '%s' is not found" % args.transform) + + extra_args = {} + if hasattr(transform, 'from_cmdline'): + extra_args = transform.from_cmdline(args.extra_args) + + log.info("Loading the project...") + dataset = project.make_dataset() + + log.info("Transforming the project...") + dataset.transform_project( + method=transform, + save_dir=dst_dir, + **extra_args + ) + + log.info("Transform results have been saved to '%s'" % dst_dir) + + return 0 + +def build_stats_parser(parser_ctor=argparse.ArgumentParser): + parser = parser_ctor(help="Get project statistics", + description=""" + Outputs various project statistics like image mean and std, + annotations count etc. + """, + formatter_class=MultilineFormatter) + + parser.add_argument('-p', '--project', dest='project_dir', default='.', + help="Directory of the project to operate on (default: current dir)") + parser.set_defaults(command=stats_command) + + return parser + +def stats_command(args): + project = load_project(args.project_dir) + + dataset = project.make_dataset() + stats = {} + stats.update(compute_image_statistics(dataset)) + stats.update(compute_ann_statistics(dataset)) + + dst_file = generate_next_file_name('statistics', ext='.json') + log.info("Writing project statistics to '%s'" % dst_file) + with open(dst_file, 'w') as f: + json.dump(stats, f, indent=4, sort_keys=True) + +def build_info_parser(parser_ctor=argparse.ArgumentParser): + parser = parser_ctor(help="Get project info", + description=""" + Outputs project info. + """, + formatter_class=MultilineFormatter) + + parser.add_argument('--all', action='store_true', + help="Print all information") + parser.add_argument('-p', '--project', dest='project_dir', default='.', + help="Directory of the project to operate on (default: current dir)") + parser.set_defaults(command=info_command) + + return parser + +def info_command(args): + project = load_project(args.project_dir) + config = project.config + env = project.env + dataset = project.make_dataset() + + print("Project:") + print(" name:", config.project_name) + print(" location:", config.project_dir) + print("Plugins:") + print(" importers:", ', '.join(env.importers.items)) + print(" extractors:", ', '.join(env.extractors.items)) + print(" converters:", ', '.join(env.converters.items)) + print(" launchers:", ', '.join(env.launchers.items)) + + print("Sources:") + for source_name, source in config.sources.items(): + print(" source '%s':" % source_name) + print(" format:", source.format) + print(" url:", source.url) + print(" location:", project.local_source_dir(source_name)) + + def print_extractor_info(extractor, indent=''): + print("%slength:" % indent, len(extractor)) + + categories = extractor.categories() + print("%scategories:" % indent, ', '.join(c.name for c in categories)) + + for cat_type, cat in categories.items(): + print("%s %s:" % (indent, cat_type.name)) + if cat_type == AnnotationType.label: + print("%s count:" % indent, len(cat.items)) + + count_threshold = 10 + if args.all: + count_threshold = len(cat.items) + labels = ', '.join(c.name for c in cat.items[:count_threshold]) + if count_threshold < len(cat.items): + labels += " (and %s more)" % ( + len(cat.items) - count_threshold) + print("%s labels:" % indent, labels) + + print("Dataset:") + print_extractor_info(dataset, indent=" ") + + subsets = dataset.subsets() + print(" subsets:", ', '.join(subsets)) + for subset_name in subsets: + subset = dataset.get_subset(subset_name) + print(" subset '%s':" % subset_name) + print_extractor_info(subset, indent=" ") + + print("Models:") + for model_name, model in config.models.items(): + print(" model '%s':" % model_name) + print(" type:", model.launcher) + + return 0 + + +def build_parser(parser_ctor=argparse.ArgumentParser): + parser = parser_ctor( + description=""" + Manipulate projects.|n + |n + By default, the project to be operated on is searched for + in the current directory. An additional '-p' argument can be + passed to specify project location. + """, + formatter_class=MultilineFormatter) + + subparsers = parser.add_subparsers() + add_subparser(subparsers, 'create', build_create_parser) + add_subparser(subparsers, 'import', build_import_parser) + add_subparser(subparsers, 'export', build_export_parser) + add_subparser(subparsers, 'filter', build_filter_parser) + add_subparser(subparsers, 'merge', build_merge_parser) + add_subparser(subparsers, 'diff', build_diff_parser) + add_subparser(subparsers, 'ediff', build_ediff_parser) + add_subparser(subparsers, 'transform', build_transform_parser) + add_subparser(subparsers, 'info', build_info_parser) + add_subparser(subparsers, 'stats', build_stats_parser) + + return parser diff --git a/datumaro/cli/contexts/project/diff.py b/datumaro/cli/contexts/project/diff.py new file mode 100644 index 0000000000..358f386057 --- /dev/null +++ b/datumaro/cli/contexts/project/diff.py @@ -0,0 +1,290 @@ + +# Copyright (C) 2019-2020 Intel Corporation +# +# SPDX-License-Identifier: MIT + +from collections import Counter +from enum import Enum +import numpy as np +import os +import os.path as osp + +_formats = ['simple'] + +import warnings +with warnings.catch_warnings(): + warnings.simplefilter("ignore") + import tensorboardX as tb + _formats.append('tensorboard') + +from datumaro.components.extractor import AnnotationType +from datumaro.util.image import save_image + + +Format = Enum('Formats', _formats) + +class DiffVisualizer: + Format = Format + DEFAULT_FORMAT = Format.simple + + _UNMATCHED_LABEL = -1 + + + def __init__(self, comparator, save_dir, output_format=DEFAULT_FORMAT): + self.comparator = comparator + + if isinstance(output_format, str): + output_format = Format[output_format] + assert output_format in Format + self.output_format = output_format + + self.save_dir = save_dir + if output_format is Format.tensorboard: + logdir = osp.join(self.save_dir, 'logs', 'diff') + self.file_writer = tb.SummaryWriter(logdir) + if output_format is Format.simple: + self.label_diff_writer = None + + self.categories = {} + + self.label_confusion_matrix = Counter() + self.bbox_confusion_matrix = Counter() + + def save_dataset_diff(self, extractor_a, extractor_b): + if self.save_dir: + os.makedirs(self.save_dir, exist_ok=True) + + if len(extractor_a) != len(extractor_b): + print("Datasets have different lengths: %s vs %s" % \ + (len(extractor_a), len(extractor_b))) + + self.categories = {} + + label_mismatch = self.comparator. \ + compare_dataset_labels(extractor_a, extractor_b) + if label_mismatch is None: + print("Datasets have no label information") + elif len(label_mismatch) != 0: + print("Datasets have mismatching labels:") + for a_label, b_label in label_mismatch: + if a_label is None: + print(" > %s" % b_label.name) + elif b_label is None: + print(" < %s" % a_label.name) + else: + print(" %s != %s" % (a_label.name, b_label.name)) + else: + self.categories.update(extractor_a.categories()) + self.categories.update(extractor_b.categories()) + + self.label_confusion_matrix = Counter() + self.bbox_confusion_matrix = Counter() + + if self.output_format is Format.tensorboard: + self.file_writer.reopen() + + ids_a = set((item.id, item.subset) for item in extractor_a) + ids_b = set((item.id, item.subset) for item in extractor_b) + ids = ids_a & ids_b + + if len(ids) != len(ids_a): + print("Unmatched items in the first dataset: ") + print(ids_a - ids) + if len(ids) != len(ids_b): + print("Unmatched items in the second dataset: ") + print(ids_b - ids) + + for item_id, item_subset in ids: + item_a = extractor_a.get(item_id, item_subset) + item_b = extractor_a.get(item_id, item_subset) + + label_diff = self.comparator.compare_item_labels(item_a, item_b) + self.update_label_confusion(label_diff) + + bbox_diff = self.comparator.compare_item_bboxes(item_a, item_b) + self.update_bbox_confusion(bbox_diff) + + self.save_item_label_diff(item_a, item_b, label_diff) + self.save_item_bbox_diff(item_a, item_b, bbox_diff) + + if len(self.label_confusion_matrix) != 0: + self.save_conf_matrix(self.label_confusion_matrix, + 'labels_confusion.png') + if len(self.bbox_confusion_matrix) != 0: + self.save_conf_matrix(self.bbox_confusion_matrix, + 'bbox_confusion.png') + + if self.output_format is Format.tensorboard: + self.file_writer.flush() + self.file_writer.close() + elif self.output_format is Format.simple: + if self.label_diff_writer: + self.label_diff_writer.flush() + self.label_diff_writer.close() + + def update_label_confusion(self, label_diff): + matches, a_unmatched, b_unmatched = label_diff + for label in matches: + self.label_confusion_matrix[(label, label)] += 1 + for a_label in a_unmatched: + self.label_confusion_matrix[(a_label, self._UNMATCHED_LABEL)] += 1 + for b_label in b_unmatched: + self.label_confusion_matrix[(self._UNMATCHED_LABEL, b_label)] += 1 + + def update_bbox_confusion(self, bbox_diff): + matches, mispred, a_unmatched, b_unmatched = bbox_diff + for a_bbox, b_bbox in matches: + self.bbox_confusion_matrix[(a_bbox.label, b_bbox.label)] += 1 + for a_bbox, b_bbox in mispred: + self.bbox_confusion_matrix[(a_bbox.label, b_bbox.label)] += 1 + for a_bbox in a_unmatched: + self.bbox_confusion_matrix[(a_bbox.label, self._UNMATCHED_LABEL)] += 1 + for b_bbox in b_unmatched: + self.bbox_confusion_matrix[(self._UNMATCHED_LABEL, b_bbox.label)] += 1 + + @classmethod + def draw_text_with_background(cls, frame, text, origin, + font=None, scale=1.0, + color=(0, 0, 0), thickness=1, bgcolor=(1, 1, 1)): + import cv2 + + if not font: + font = cv2.FONT_HERSHEY_SIMPLEX + + text_size, baseline = cv2.getTextSize(text, font, scale, thickness) + cv2.rectangle(frame, + tuple((origin + (0, baseline)).astype(int)), + tuple((origin + (text_size[0], -text_size[1])).astype(int)), + bgcolor, cv2.FILLED) + cv2.putText(frame, text, + tuple(origin.astype(int)), + font, scale, color, thickness) + return text_size, baseline + + def draw_detection_roi(self, frame, x, y, w, h, label, conf, color): + import cv2 + + cv2.rectangle(frame, (x, y), (x + w, y + h), color, 2) + + text = '%s %.2f%%' % (label, 100.0 * conf) + text_scale = 0.5 + font = cv2.FONT_HERSHEY_SIMPLEX + text_size = cv2.getTextSize(text, font, text_scale, 1) + line_height = np.array([0, text_size[0][1]]) + self.draw_text_with_background(frame, text, + np.array([x, y]) - line_height * 0.5, + font, scale=text_scale, color=[255 - c for c in color]) + + def get_label(self, label_id): + cat = self.categories.get(AnnotationType.label) + if cat is None: + return str(label_id) + return cat.items[label_id].name + + def draw_bbox(self, img, shape, color): + x, y, w, h = shape.get_bbox() + self.draw_detection_roi(img, int(x), int(y), int(w), int(h), + self.get_label(shape.label), shape.attributes.get('score', 1), + color) + + def get_label_diff_file(self): + if self.label_diff_writer is None: + self.label_diff_writer = \ + open(osp.join(self.save_dir, 'label_diff.txt'), 'w') + return self.label_diff_writer + + def save_item_label_diff(self, item_a, item_b, diff): + _, a_unmatched, b_unmatched = diff + + if 0 < len(a_unmatched) + len(b_unmatched): + if self.output_format is Format.simple: + f = self.get_label_diff_file() + f.write(item_a.id + '\n') + for a_label in a_unmatched: + f.write(' >%s\n' % self.get_label(a_label)) + for b_label in b_unmatched: + f.write(' <%s\n' % self.get_label(b_label)) + elif self.output_format is Format.tensorboard: + tag = item_a.id + for a_label in a_unmatched: + self.file_writer.add_text(tag, + '>%s\n' % self.get_label(a_label)) + for b_label in b_unmatched: + self.file_writer.add_text(tag, + '<%s\n' % self.get_label(b_label)) + + def save_item_bbox_diff(self, item_a, item_b, diff): + _, mispred, a_unmatched, b_unmatched = diff + + if 0 < len(a_unmatched) + len(b_unmatched) + len(mispred): + img_a = item_a.image.data.copy() + img_b = img_a.copy() + for a_bbox, b_bbox in mispred: + self.draw_bbox(img_a, a_bbox, (0, 255, 0)) + self.draw_bbox(img_b, b_bbox, (0, 0, 255)) + for a_bbox in a_unmatched: + self.draw_bbox(img_a, a_bbox, (255, 255, 0)) + for b_bbox in b_unmatched: + self.draw_bbox(img_b, b_bbox, (255, 255, 0)) + + img = np.hstack([img_a, img_b]) + + path = osp.join(self.save_dir, item_a.id) + + if self.output_format is Format.simple: + save_image(path + '.png', img, create_dir=True) + elif self.output_format is Format.tensorboard: + self.save_as_tensorboard(img, path) + + def save_as_tensorboard(self, img, name): + img = img[:, :, ::-1] # to RGB + img = np.transpose(img, (2, 0, 1)) # to (C, H, W) + img = img.astype(dtype=np.uint8) + self.file_writer.add_image(name, img) + + def save_conf_matrix(self, conf_matrix, filename): + import matplotlib.pyplot as plt + + classes = None + label_categories = self.categories.get(AnnotationType.label) + if label_categories is not None: + classes = { id: c.name for id, c in enumerate(label_categories.items) } + if classes is None: + classes = { c: 'label_%s' % c for c, _ in conf_matrix } + classes[self._UNMATCHED_LABEL] = 'unmatched' + + class_idx = { id: i for i, id in enumerate(classes.keys()) } + matrix = np.zeros((len(classes), len(classes)), dtype=int) + for idx_pair in conf_matrix: + index = (class_idx[idx_pair[0]], class_idx[idx_pair[1]]) + matrix[index] = conf_matrix[idx_pair] + + labels = [label for id, label in classes.items()] + + fig = plt.figure() + fig.add_subplot(111) + table = plt.table( + cellText=matrix, + colLabels=labels, + rowLabels=labels, + loc ='center') + table.auto_set_font_size(False) + table.set_fontsize(8) + table.scale(3, 3) + # Removing ticks and spines enables you to get the figure only with table + plt.tick_params(axis='x', which='both', bottom=False, top=False, labelbottom=False) + plt.tick_params(axis='y', which='both', right=False, left=False, labelleft=False) + for pos in ['right','top','bottom','left']: + plt.gca().spines[pos].set_visible(False) + + for idx_pair in conf_matrix: + i = class_idx[idx_pair[0]] + j = class_idx[idx_pair[1]] + if conf_matrix[idx_pair] != 0: + if i != j: + table._cells[(i + 1, j)].set_facecolor('#FF0000') + else: + table._cells[(i + 1, j)].set_facecolor('#00FF00') + + plt.savefig(osp.join(self.save_dir, filename), + bbox_inches='tight', pad_inches=0.05) diff --git a/datumaro/cli/contexts/source/__init__.py b/datumaro/cli/contexts/source/__init__.py new file mode 100644 index 0000000000..45dbdb1b52 --- /dev/null +++ b/datumaro/cli/contexts/source/__init__.py @@ -0,0 +1,273 @@ + +# Copyright (C) 2019-2020 Intel Corporation +# +# SPDX-License-Identifier: MIT + +import argparse +import logging as log +import os +import os.path as osp +import shutil + +from datumaro.components.project import Environment +from ...util import add_subparser, CliException, MultilineFormatter +from ...util.project import load_project + + +def build_add_parser(parser_ctor=argparse.ArgumentParser): + builtins = sorted(Environment().extractors.items) + + base_parser = argparse.ArgumentParser(add_help=False) + base_parser.add_argument('-n', '--name', default=None, + help="Name of the new source") + base_parser.add_argument('-f', '--format', required=True, + help="Source dataset format") + base_parser.add_argument('--skip-check', action='store_true', + help="Skip source checking") + base_parser.add_argument('-p', '--project', dest='project_dir', default='.', + help="Directory of the project to operate on (default: current dir)") + + parser = parser_ctor(help="Add data source to project", + description=""" + Adds a data source to a project. The source can be:|n + - a dataset in a supported format (check 'formats' section below)|n + - a Datumaro project|n + |n + The source can be either a local directory or a remote + git repository. Each source type has its own parameters, which can + be checked by:|n + '%s'.|n + |n + Formats:|n + Datasets come in a wide variety of formats. Each dataset + format defines its own data structure and rules on how to + interpret the data. For example, the following data structure + is used in COCO format:|n + /dataset/|n + - /images/.jpg|n + - /annotations/|n + |n + In Datumaro dataset formats are supported by Extractor-s. + An Extractor produces a list of dataset items corresponding + to the dataset. It is possible to add a custom Extractor. + To do this, you need to put an Extractor + definition script to /.datumaro/extractors.|n + |n + List of builtin source formats: %s|n + |n + Examples:|n + - Add a local directory with VOC-like dataset:|n + |s|sadd path path/to/voc -f voc_detection|n + - Add a local file with CVAT annotations, call it 'mysource'|n + |s|s|s|sto the project somewhere else:|n + |s|sadd path path/to/cvat.xml -f cvat -n mysource -p somewhere/else/ + """ % ('%(prog)s SOURCE_TYPE --help', ', '.join(builtins)), + formatter_class=MultilineFormatter, + add_help=False) + parser.set_defaults(command=add_command) + + sp = parser.add_subparsers(dest='source_type', metavar='SOURCE_TYPE', + help="The type of the data source " + "(call '%s SOURCE_TYPE --help' for more info)" % parser.prog) + + dir_parser = sp.add_parser('path', help="Add local path as source", + parents=[base_parser]) + dir_parser.add_argument('url', + help="Path to the source") + dir_parser.add_argument('--copy', action='store_true', + help="Copy the dataset instead of saving source links") + + repo_parser = sp.add_parser('git', help="Add git repository as source", + parents=[base_parser]) + repo_parser.add_argument('url', + help="URL of the source git repository") + repo_parser.add_argument('-b', '--branch', default='master', + help="Branch of the source repository (default: %(default)s)") + repo_parser.add_argument('--checkout', action='store_true', + help="Do branch checkout") + + # NOTE: add common parameters to the parent help output + # the other way could be to use parse_known_args() + display_parser = argparse.ArgumentParser( + parents=[base_parser, parser], + prog=parser.prog, usage="%(prog)s [-h] SOURCE_TYPE ...", + description=parser.description, formatter_class=MultilineFormatter) + class HelpAction(argparse._HelpAction): + def __call__(self, parser, namespace, values, option_string=None): + display_parser.print_help() + parser.exit() + + parser.add_argument('-h', '--help', action=HelpAction, + help='show this help message and exit') + + # TODO: needed distinction on how to add an extractor or a remote source + + return parser + +def add_command(args): + project = load_project(args.project_dir) + + if args.source_type == 'git': + name = args.name + if name is None: + name = osp.splitext(osp.basename(args.url))[0] + + if project.env.git.has_submodule(name): + raise CliException("Git submodule '%s' already exists" % name) + + try: + project.get_source(name) + raise CliException("Source '%s' already exists" % name) + except KeyError: + pass + + rel_local_dir = project.local_source_dir(name) + local_dir = osp.join(project.config.project_dir, rel_local_dir) + url = args.url + project.env.git.create_submodule(name, local_dir, + url=url, branch=args.branch, no_checkout=not args.checkout) + elif args.source_type == 'path': + url = osp.abspath(args.url) + if not osp.exists(url): + raise CliException("Source path '%s' does not exist" % url) + + name = args.name + if name is None: + name = osp.splitext(osp.basename(url))[0] + + if project.env.git.has_submodule(name): + raise CliException("Git submodule '%s' already exists" % name) + + try: + project.get_source(name) + raise CliException("Source '%s' already exists" % name) + except KeyError: + pass + + rel_local_dir = project.local_source_dir(name) + local_dir = osp.join(project.config.project_dir, rel_local_dir) + + if args.copy: + log.info("Copying from '%s' to '%s'" % (url, local_dir)) + if osp.isdir(url): + # copytree requires destination dir not to exist + shutil.copytree(url, local_dir) + url = rel_local_dir + elif osp.isfile(url): + os.makedirs(local_dir) + shutil.copy2(url, local_dir) + url = osp.join(rel_local_dir, osp.basename(url)) + else: + raise Exception("Expected file or directory") + else: + os.makedirs(local_dir) + + project.add_source(name, { 'url': url, 'format': args.format }) + + if not args.skip_check: + log.info("Checking the source...") + try: + project.make_source_project(name).make_dataset() + except Exception: + shutil.rmtree(local_dir, ignore_errors=True) + raise + + project.save() + + log.info("Source '%s' has been added to the project, location: '%s'" \ + % (name, rel_local_dir)) + + return 0 + +def build_remove_parser(parser_ctor=argparse.ArgumentParser): + parser = parser_ctor(help="Remove source from project", + description="Remove a source from a project.") + + parser.add_argument('-n', '--name', required=True, + help="Name of the source to be removed") + parser.add_argument('--force', action='store_true', + help="Ignore possible errors during removal") + parser.add_argument('--keep-data', action='store_true', + help="Do not remove source data") + parser.add_argument('-p', '--project', dest='project_dir', default='.', + help="Directory of the project to operate on (default: current dir)") + parser.set_defaults(command=remove_command) + + return parser + +def remove_command(args): + project = load_project(args.project_dir) + + name = args.name + if not name: + raise CliException("Expected source name") + try: + project.get_source(name) + except KeyError: + if not args.force: + raise CliException("Source '%s' does not exist" % name) + + if project.env.git.has_submodule(name): + if args.force: + log.warning("Forcefully removing the '%s' source..." % name) + + project.env.git.remove_submodule(name, force=args.force) + + source_dir = osp.join(project.config.project_dir, + project.local_source_dir(name)) + project.remove_source(name) + project.save() + + if not args.keep_data: + shutil.rmtree(source_dir, ignore_errors=True) + + log.info("Source '%s' has been removed from the project" % name) + + return 0 + +def build_info_parser(parser_ctor=argparse.ArgumentParser): + parser = parser_ctor() + + parser.add_argument('-n', '--name', + help="Source name") + parser.add_argument('-v', '--verbose', action='store_true', + help="Show details") + parser.add_argument('-p', '--project', dest='project_dir', default='.', + help="Directory of the project to operate on (default: current dir)") + parser.set_defaults(command=info_command) + + return parser + +def info_command(args): + project = load_project(args.project_dir) + + if args.name: + source = project.get_source(args.name) + print(source) + else: + for name, conf in project.config.sources.items(): + print(name) + if args.verbose: + print(dict(conf)) + +def build_parser(parser_ctor=argparse.ArgumentParser): + parser = parser_ctor(description=""" + Manipulate data sources inside of a project.|n + |n + A data source is a source of data for a project. + The project combines multiple data sources into one dataset. + The role of a data source is to provide dataset items - images + and/or annotations.|n + |n + By default, the project to be operated on is searched for + in the current directory. An additional '-p' argument can be + passed to specify project location. + """, + formatter_class=MultilineFormatter) + + subparsers = parser.add_subparsers() + add_subparser(subparsers, 'add', build_add_parser) + add_subparser(subparsers, 'remove', build_remove_parser) + add_subparser(subparsers, 'info', build_info_parser) + + return parser diff --git a/datumaro/cli/util/__init__.py b/datumaro/cli/util/__init__.py new file mode 100644 index 0000000000..4ee0b72b07 --- /dev/null +++ b/datumaro/cli/util/__init__.py @@ -0,0 +1,74 @@ + +# Copyright (C) 2019-2020 Intel Corporation +# +# SPDX-License-Identifier: MIT + +import argparse +import textwrap + + +class CliException(Exception): pass + +def add_subparser(subparsers, name, builder): + return builder(lambda **kwargs: subparsers.add_parser(name, **kwargs)) + +class MultilineFormatter(argparse.HelpFormatter): + """ + Keeps line breaks introduced with '|n' separator + and spaces introduced with '|s'. + """ + + def __init__(self, keep_natural=False, **kwargs): + super().__init__(**kwargs) + self._keep_natural = keep_natural + + def _fill_text(self, text, width, indent): + text = self._whitespace_matcher.sub(' ', text).strip() + text = text.replace('|s', ' ') + + paragraphs = text.split('|n ') + if self._keep_natural: + paragraphs = sum((p.split('\n ') for p in paragraphs), []) + + multiline_text = '' + for paragraph in paragraphs: + formatted_paragraph = textwrap.fill(paragraph, width, + initial_indent=indent, subsequent_indent=indent) + '\n' + multiline_text += formatted_paragraph + return multiline_text + +def required_count(nmin=0, nmax=0): + assert 0 <= nmin and 0 <= nmax and nmin or nmax + + class RequiredCount(argparse.Action): + def __call__(self, parser, args, values, option_string=None): + k = len(values) + if not ((nmin and (nmin <= k) or not nmin) and \ + (nmax and (k <= nmax) or not nmax)): + msg = "Argument '%s' requires" % self.dest + if nmin and nmax: + msg += " from %s to %s arguments" % (nmin, nmax) + elif nmin: + msg += " at least %s arguments" % nmin + else: + msg += " no more %s arguments" % nmax + raise argparse.ArgumentTypeError(msg) + setattr(args, self.dest, values) + return RequiredCount + +def at_least(n): + return required_count(n, 0) + +def make_file_name(s): + # adapted from + # https://docs.djangoproject.com/en/2.1/_modules/django/utils/text/#slugify + """ + Normalizes string, converts to lowercase, removes non-alpha characters, + and converts spaces to hyphens. + """ + import unicodedata, re + s = unicodedata.normalize('NFKD', s).encode('ascii', 'ignore') + s = s.decode() + s = re.sub(r'[^\w\s-]', '', s).strip().lower() + s = re.sub(r'[-\s]+', '-', s) + return s \ No newline at end of file diff --git a/datumaro/cli/util/project.py b/datumaro/cli/util/project.py new file mode 100644 index 0000000000..56590a4d1d --- /dev/null +++ b/datumaro/cli/util/project.py @@ -0,0 +1,39 @@ + +# Copyright (C) 2019-2020 Intel Corporation +# +# SPDX-License-Identifier: MIT + +import os +import re + +from datumaro.components.project import Project +from datumaro.util import cast + + +def load_project(project_dir): + return Project.load(project_dir) + +def generate_next_file_name(basename, basedir='.', sep='.', ext=''): + """ + If basedir does not contain basename, returns basename, + otherwise generates a name by appending sep to the basename + and the number, next to the last used number in the basedir for + files with basename prefix. Optionally, appends ext. + """ + + return generate_next_name(os.listdir(basedir), basename, sep, ext) + +def generate_next_name(names, basename, sep='.', suffix='', default=None): + pattern = re.compile(r'%s(?:%s(\d+))?%s' % \ + tuple(map(re.escape, [basename, sep, suffix]))) + matches = [match for match in (pattern.match(n) for n in names) if match] + + max_idx = max([cast(match[1], int, 0) for match in matches], default=None) + if max_idx is None: + if default is not None: + idx = sep + str(default) + else: + idx = '' + else: + idx = sep + str(max_idx + 1) + return basename + idx + suffix \ No newline at end of file diff --git a/datumaro/components/__init__.py b/datumaro/components/__init__.py new file mode 100644 index 0000000000..5a1ec10f3a --- /dev/null +++ b/datumaro/components/__init__.py @@ -0,0 +1,5 @@ + +# Copyright (C) 2019-2020 Intel Corporation +# +# SPDX-License-Identifier: MIT + diff --git a/datumaro/components/algorithms/__init__.py b/datumaro/components/algorithms/__init__.py new file mode 100644 index 0000000000..5a1ec10f3a --- /dev/null +++ b/datumaro/components/algorithms/__init__.py @@ -0,0 +1,5 @@ + +# Copyright (C) 2019-2020 Intel Corporation +# +# SPDX-License-Identifier: MIT + diff --git a/datumaro/components/algorithms/rise.py b/datumaro/components/algorithms/rise.py new file mode 100644 index 0000000000..3fb9a895c1 --- /dev/null +++ b/datumaro/components/algorithms/rise.py @@ -0,0 +1,203 @@ + +# Copyright (C) 2019-2020 Intel Corporation +# +# SPDX-License-Identifier: MIT + +# pylint: disable=unused-variable + +import numpy as np +from math import ceil + +from datumaro.components.extractor import AnnotationType +from datumaro.util.annotation_util import nms + + +def flatmatvec(mat): + return np.reshape(mat, (len(mat), -1)) + +def expand(array, axis=None): + if axis is None: + axis = len(array.shape) + return np.expand_dims(array, axis=axis) + +class RISE: + """ + Implements RISE: Randomized Input Sampling for + Explanation of Black-box Models algorithm + See explanations at: https://arxiv.org/pdf/1806.07421.pdf + """ + + def __init__(self, model, + max_samples=None, mask_width=7, mask_height=7, prob=0.5, + iou_thresh=0.9, nms_thresh=0.0, det_conf_thresh=0.0, + batch_size=1): + self.model = model + self.max_samples = max_samples + self.mask_height = mask_height + self.mask_width = mask_width + self.prob = prob + self.iou_thresh = iou_thresh + self.nms_thresh = nms_thresh + self.det_conf_thresh = det_conf_thresh + self.batch_size = batch_size + + @staticmethod + def split_outputs(annotations): + labels = [] + bboxes = [] + for r in annotations: + if r.type is AnnotationType.label: + labels.append(r) + elif r.type is AnnotationType.bbox: + bboxes.append(r) + return labels, bboxes + + def normalize_hmaps(self, heatmaps, counts): + eps = np.finfo(heatmaps.dtype).eps + mhmaps = flatmatvec(heatmaps) + mhmaps /= expand(counts * self.prob + eps) + mhmaps -= expand(np.min(mhmaps, axis=1)) + mhmaps /= expand(np.max(mhmaps, axis=1) + eps) + return np.reshape(mhmaps, heatmaps.shape) + + def apply(self, image, progressive=False): + import cv2 + + assert len(image.shape) in [2, 3], \ + "Expected an input image in (H, W, C) format" + if len(image.shape) == 3: + assert image.shape[2] in [3, 4], "Expected BGR or BGRA input" + image = image[:, :, :3].astype(np.float32) + + model = self.model + iou_thresh = self.iou_thresh + + image_size = np.array((image.shape[:2])) + mask_size = np.array((self.mask_height, self.mask_width)) + cell_size = np.ceil(image_size / mask_size) + upsampled_size = np.ceil((mask_size + 1) * cell_size) + + rng = lambda shape=None: np.random.rand(*shape) + samples = np.prod(image_size) + if self.max_samples is not None: + samples = min(self.max_samples, samples) + batch_size = self.batch_size + + result = next(iter(model.launch(expand(image, 0)))) + result_labels, result_bboxes = self.split_outputs(result) + if 0 < self.det_conf_thresh: + result_bboxes = [b for b in result_bboxes \ + if self.det_conf_thresh <= b.attributes['score']] + if 0 < self.nms_thresh: + result_bboxes = nms(result_bboxes, self.nms_thresh) + + predicted_labels = set() + if len(result_labels) != 0: + predicted_label = max(result_labels, + key=lambda r: r.attributes['score']).label + predicted_labels.add(predicted_label) + if len(result_bboxes) != 0: + for bbox in result_bboxes: + predicted_labels.add(bbox.label) + predicted_labels = { label: idx \ + for idx, label in enumerate(predicted_labels) } + + predicted_bboxes = result_bboxes + + heatmaps_count = len(predicted_labels) + len(predicted_bboxes) + heatmaps = np.zeros((heatmaps_count, *image_size), dtype=np.float32) + total_counts = np.zeros(heatmaps_count, dtype=np.int32) + confs = np.zeros(heatmaps_count, dtype=np.float32) + + heatmap_id = 0 + + label_heatmaps = None + label_total_counts = None + label_confs = None + if len(predicted_labels) != 0: + step = len(predicted_labels) + label_heatmaps = heatmaps[heatmap_id : heatmap_id + step] + label_total_counts = total_counts[heatmap_id : heatmap_id + step] + label_confs = confs[heatmap_id : heatmap_id + step] + heatmap_id += step + + bbox_heatmaps = None + bbox_total_counts = None + bbox_confs = None + if len(predicted_bboxes) != 0: + step = len(predicted_bboxes) + bbox_heatmaps = heatmaps[heatmap_id : heatmap_id + step] + bbox_total_counts = total_counts[heatmap_id : heatmap_id + step] + bbox_confs = confs[heatmap_id : heatmap_id + step] + heatmap_id += step + + ups_mask = np.empty(upsampled_size.astype(int), dtype=np.float32) + masks = np.empty((batch_size, *image_size), dtype=np.float32) + + full_batch_inputs = np.empty((batch_size, *image.shape), dtype=np.float32) + current_heatmaps = np.empty_like(heatmaps) + for b in range(ceil(samples / batch_size)): + batch_pos = b * batch_size + current_batch_size = min(samples - batch_pos, batch_size) + + batch_masks = masks[: current_batch_size] + for i in range(current_batch_size): + mask = (rng(mask_size) < self.prob).astype(np.float32) + cv2.resize(mask, (int(upsampled_size[1]), int(upsampled_size[0])), + ups_mask) + + offsets = np.round(rng((2,)) * cell_size) + mask = ups_mask[ + int(offsets[0]):int(image_size[0] + offsets[0]), + int(offsets[1]):int(image_size[1] + offsets[1]) ] + batch_masks[i] = mask + + batch_inputs = full_batch_inputs[:current_batch_size] + np.multiply(expand(batch_masks), expand(image, 0), out=batch_inputs) + + results = model.launch(batch_inputs) + for mask, result in zip(batch_masks, results): + result_labels, result_bboxes = self.split_outputs(result) + + confs.fill(0) + if len(predicted_labels) != 0: + for r in result_labels: + idx = predicted_labels.get(r.label, None) + if idx is not None: + label_total_counts[idx] += 1 + label_confs[idx] += r.attributes['score'] + for r in result_bboxes: + idx = predicted_labels.get(r.label, None) + if idx is not None: + label_total_counts[idx] += 1 + label_confs[idx] += r.attributes['score'] + + if len(predicted_bboxes) != 0 and len(result_bboxes) != 0: + if 0 < self.det_conf_thresh: + result_bboxes = [b for b in result_bboxes \ + if self.det_conf_thresh <= b.attributes['score']] + if 0 < self.nms_thresh: + result_bboxes = nms(result_bboxes, self.nms_thresh) + + for detection in result_bboxes: + for pred_idx, pred in enumerate(predicted_bboxes): + if pred.label != detection.label: + continue + + iou = pred.iou(detection) + assert iou == -1 or 0 <= iou and iou <= 1 + if iou < iou_thresh: + continue + + bbox_total_counts[pred_idx] += 1 + + conf = detection.attributes['score'] + bbox_confs[pred_idx] += conf + + np.multiply.outer(confs, mask, out=current_heatmaps) + heatmaps += current_heatmaps + + if progressive: + yield self.normalize_hmaps(heatmaps.copy(), total_counts) + + yield self.normalize_hmaps(heatmaps, total_counts) \ No newline at end of file diff --git a/datumaro/components/cli_plugin.py b/datumaro/components/cli_plugin.py new file mode 100644 index 0000000000..e85f5c4f30 --- /dev/null +++ b/datumaro/components/cli_plugin.py @@ -0,0 +1,44 @@ + +# Copyright (C) 2020 Intel Corporation +# +# SPDX-License-Identifier: MIT + +import argparse + +from datumaro.cli.util import MultilineFormatter +from datumaro.util import to_snake_case + + +class CliPlugin: + @staticmethod + def _get_name(cls): + return getattr(cls, 'NAME', + remove_plugin_type(to_snake_case(cls.__name__))) + + @staticmethod + def _get_doc(cls): + return getattr(cls, '__doc__', "") + + @classmethod + def build_cmdline_parser(cls, **kwargs): + args = { + 'prog': cls._get_name(cls), + 'description': cls._get_doc(cls), + 'formatter_class': MultilineFormatter, + } + args.update(kwargs) + + return argparse.ArgumentParser(**args) + + @classmethod + def from_cmdline(cls, args=None): + if args and args[0] == '--': + args = args[1:] + parser = cls.build_cmdline_parser() + args = parser.parse_args(args) + return vars(args) + +def remove_plugin_type(s): + for t in {'transform', 'extractor', 'converter', 'launcher', 'importer'}: + s = s.replace('_' + t, '') + return s diff --git a/datumaro/components/config.py b/datumaro/components/config.py new file mode 100644 index 0000000000..a79cda151b --- /dev/null +++ b/datumaro/components/config.py @@ -0,0 +1,237 @@ + +# Copyright (C) 2019-2020 Intel Corporation +# +# SPDX-License-Identifier: MIT + +import yaml + + +class Schema: + class Item: + def __init__(self, ctor, internal=False): + self.ctor = ctor + self.internal = internal + + def __call__(self, *args, **kwargs): + return self.ctor(*args, **kwargs) + + def __init__(self, items=None, fallback=None): + self._items = {} + if items is not None: + self._items.update(items) + self._fallback = fallback + + def _get_items(self, allow_fallback=True): + all_items = {} + + if allow_fallback and self._fallback is not None: + all_items.update(self._fallback) + all_items.update(self._items) + + return all_items + + def items(self, allow_fallback=True): + return self._get_items(allow_fallback=allow_fallback).items() + + def keys(self, allow_fallback=True): + return self._get_items(allow_fallback=allow_fallback).keys() + + def values(self, allow_fallback=True): + return self._get_items(allow_fallback=allow_fallback).values() + + def __contains__(self, key): + return key in self.keys() + + def __len__(self): + return len(self._get_items()) + + def __iter__(self): + return iter(self._get_items()) + + def __getitem__(self, key): + default = object() + value = self.get(key, default=default) + if value is default: + raise KeyError('Key "%s" does not exist' % (key)) + return value + + def get(self, key, default=None): + found = self._items.get(key, default) + if found is not default: + return found + + if self._fallback is not None: + return self._fallback.get(key, default) + +class SchemaBuilder: + def __init__(self): + self._items = {} + + def add(self, name, ctor=str, internal=False): + if name in self._items: + raise KeyError('Key "%s" already exists' % (name)) + + self._items[name] = Schema.Item(ctor, internal=internal) + return self + + def build(self): + return Schema(self._items) + +class Config: + def __init__(self, config=None, fallback=None, schema=None, mutable=True): + # schema should be established first + self.__dict__['_schema'] = schema + self.__dict__['_mutable'] = True + + self.__dict__['_config'] = {} + if fallback is not None: + for k, v in fallback.items(allow_fallback=False): + self.set(k, v) + if config is not None: + self.update(config) + + self.__dict__['_mutable'] = mutable + + def _items(self, allow_fallback=True, allow_internal=True): + all_config = {} + if allow_fallback and self._schema is not None: + for key, item in self._schema.items(): + all_config[key] = item() + all_config.update(self._config) + + if not allow_internal and self._schema is not None: + for key, item in self._schema.items(): + if item.internal: + all_config.pop(key) + return all_config + + def items(self, allow_fallback=True, allow_internal=True): + return self._items( + allow_fallback=allow_fallback, + allow_internal=allow_internal + ).items() + + def keys(self, allow_fallback=True, allow_internal=True): + return self._items( + allow_fallback=allow_fallback, + allow_internal=allow_internal + ).keys() + + def values(self, allow_fallback=True, allow_internal=True): + return self._items( + allow_fallback=allow_fallback, + allow_internal=allow_internal + ).values() + + def __contains__(self, key): + return key in self.keys() + + def __len__(self): + return len(self.items()) + + def __iter__(self): + return iter(self.keys()) + + def __getitem__(self, key): + default = object() + value = self.get(key, default=default) + if value is default: + raise KeyError('Key "%s" does not exist' % (key)) + return value + + def __setitem__(self, key, value): + return self.set(key, value) + + def __getattr__(self, key): + return self.get(key) + + def __setattr__(self, key, value): + return self.set(key, value) + + def __eq__(self, other): + try: + for k, my_v in self.items(allow_internal=False): + other_v = other[k] + if my_v != other_v: + return False + return True + except Exception: + return False + + def update(self, other): + for k, v in other.items(): + self.set(k, v) + + def remove(self, key): + if not self._mutable: + raise Exception("Cannot set value of immutable object") + + self._config.pop(key, None) + + def get(self, key, default=None): + found = self._config.get(key, default) + if found is not default: + return found + + if self._schema is not None: + found = self._schema.get(key, default) + if found is not default: + # ignore mutability + found = found() + self._config[key] = found + return found + + return found + + def set(self, key, value): + if not self._mutable: + raise Exception("Cannot set value of immutable object") + + if self._schema is not None: + if key not in self._schema: + raise Exception("Can not set key '%s' - schema mismatch" % (key)) + + schema_entry = self._schema[key] + schema_entry_instance = schema_entry() + if not isinstance(value, type(schema_entry_instance)): + if isinstance(value, dict) and \ + isinstance(schema_entry_instance, Config): + schema_entry_instance.update(value) + value = schema_entry_instance + else: + raise Exception("Can not set key '%s' - schema mismatch" % (key)) + + self._config[key] = value + return value + + @staticmethod + def parse(path): + with open(path, 'r') as f: + return Config(yaml.safe_load(f)) + + @staticmethod + def yaml_representer(dumper, value): + return dumper.represent_data( + value._items(allow_internal=False, allow_fallback=False)) + + def dump(self, path): + with open(path, 'w+') as f: + yaml.dump(self, f) + +yaml.add_multi_representer(Config, Config.yaml_representer) + + +class DefaultConfig(Config): + def __init__(self, default=None): + super().__init__() + self.__dict__['_default'] = default + + def set(self, key, value): + if key not in self.keys(allow_fallback=False): + value = self._default(value) + return super().set(key, value) + else: + return super().set(key, value) + + +DEFAULT_FORMAT = 'datumaro' \ No newline at end of file diff --git a/datumaro/components/config_model.py b/datumaro/components/config_model.py new file mode 100644 index 0000000000..c6f65179a6 --- /dev/null +++ b/datumaro/components/config_model.py @@ -0,0 +1,63 @@ + +# Copyright (C) 2019-2020 Intel Corporation +# +# SPDX-License-Identifier: MIT + +from datumaro.components.config import Config, \ + DefaultConfig as _DefaultConfig, \ + SchemaBuilder as _SchemaBuilder + + +SOURCE_SCHEMA = _SchemaBuilder() \ + .add('url', str) \ + .add('format', str) \ + .add('options', dict) \ + .build() + +class Source(Config): + def __init__(self, config=None): + super().__init__(config, schema=SOURCE_SCHEMA) + + +MODEL_SCHEMA = _SchemaBuilder() \ + .add('launcher', str) \ + .add('options', dict) \ + .build() + +class Model(Config): + def __init__(self, config=None): + super().__init__(config, schema=MODEL_SCHEMA) + + +PROJECT_SCHEMA = _SchemaBuilder() \ + .add('project_name', str) \ + .add('format_version', int) \ + \ + .add('subsets', list) \ + .add('sources', lambda: _DefaultConfig( + lambda v=None: Source(v))) \ + .add('models', lambda: _DefaultConfig( + lambda v=None: Model(v))) \ + \ + .add('models_dir', str, internal=True) \ + .add('plugins_dir', str, internal=True) \ + .add('sources_dir', str, internal=True) \ + .add('dataset_dir', str, internal=True) \ + .add('project_filename', str, internal=True) \ + .add('project_dir', str, internal=True) \ + .add('env_dir', str, internal=True) \ + .build() + +PROJECT_DEFAULT_CONFIG = Config({ + 'project_name': 'undefined', + 'format_version': 1, + + 'sources_dir': 'sources', + 'dataset_dir': 'dataset', + 'models_dir': 'models', + 'plugins_dir': 'plugins', + + 'project_filename': 'config.yaml', + 'project_dir': '', + 'env_dir': '.datumaro', +}, mutable=False, schema=PROJECT_SCHEMA) diff --git a/datumaro/components/converter.py b/datumaro/components/converter.py new file mode 100644 index 0000000000..05dedb48ac --- /dev/null +++ b/datumaro/components/converter.py @@ -0,0 +1,79 @@ + +# Copyright (C) 2019-2020 Intel Corporation +# +# SPDX-License-Identifier: MIT + +import logging as log +import os +import os.path as osp +import shutil + +from datumaro.components.cli_plugin import CliPlugin +from datumaro.util.image import save_image + + +class IConverter: + @classmethod + def convert(cls, extractor, save_dir, **options): + raise NotImplementedError("Should be implemented in a subclass") + +class Converter(IConverter, CliPlugin): + DEFAULT_IMAGE_EXT = None + + @classmethod + def build_cmdline_parser(cls, **kwargs): + parser = super().build_cmdline_parser(**kwargs) + parser.add_argument('--save-images', action='store_true', + help="Save images (default: %(default)s)") + parser.add_argument('--image-ext', default=None, + help="Image extension (default: keep or use format default%s)" % \ + (' ' + cls.DEFAULT_IMAGE_EXT if cls.DEFAULT_IMAGE_EXT else '')) + + return parser + + @classmethod + def convert(cls, extractor, save_dir, **options): + converter = cls(extractor, save_dir, **options) + return converter.apply() + + def apply(self): + raise NotImplementedError("Should be implemented in a subclass") + + def __init__(self, extractor, save_dir, save_images=False, + image_ext=None, default_image_ext=None): + default_image_ext = default_image_ext or self.DEFAULT_IMAGE_EXT + assert default_image_ext + self._default_image_ext = default_image_ext + + self._save_images = save_images + self._image_ext = image_ext + + self._extractor = extractor + self._save_dir = save_dir + + def _find_image_ext(self, item): + src_ext = None + if item.has_image: + src_ext = osp.splitext(osp.basename(item.image.path))[1] + + return self._image_ext or src_ext or self._default_image_ext + + def _make_image_filename(self, item): + return item.id + self._find_image_ext(item) + + def _save_image(self, item, path=None): + image = item.image.data + if image is None: + log.warning("Item '%s' has no image", item.id) + return item.image.path + + path = path or self._make_image_filename(item) + + src_ext = osp.splitext(osp.basename(item.image.path))[1] + dst_ext = osp.splitext(osp.basename(path))[1] + + os.makedirs(osp.dirname(path), exist_ok=True) + if src_ext == dst_ext and osp.isfile(item.image.path): + shutil.copyfile(item.image.path, path) + else: + save_image(path, image) diff --git a/datumaro/components/dataset_filter.py b/datumaro/components/dataset_filter.py new file mode 100644 index 0000000000..2fe1443d51 --- /dev/null +++ b/datumaro/components/dataset_filter.py @@ -0,0 +1,261 @@ + +# Copyright (C) 2019-2020 Intel Corporation +# +# SPDX-License-Identifier: MIT + +import logging as log +from lxml import etree as ET # lxml has proper XPath implementation +from datumaro.components.extractor import (Transform, + Annotation, AnnotationType, + Label, Mask, Points, Polygon, PolyLine, Bbox, Caption, +) + + +class DatasetItemEncoder: + @classmethod + def encode(cls, item, categories=None): + item_elem = ET.Element('item') + ET.SubElement(item_elem, 'id').text = str(item.id) + ET.SubElement(item_elem, 'subset').text = str(item.subset) + ET.SubElement(item_elem, 'path').text = str('/'.join(item.path)) + + image = item.image + if image is not None: + item_elem.append(cls.encode_image(image)) + + for ann in item.annotations: + item_elem.append(cls.encode_annotation(ann, categories)) + + return item_elem + + @classmethod + def encode_image(cls, image): + image_elem = ET.Element('image') + + size = image.size + if size is not None: + h, w = size + else: + h = 'unknown' + w = h + ET.SubElement(image_elem, 'width').text = str(w) + ET.SubElement(image_elem, 'height').text = str(h) + + ET.SubElement(image_elem, 'has_data').text = '%d' % int(image.has_data) + ET.SubElement(image_elem, 'path').text = image.path + + return image_elem + + @classmethod + def encode_annotation_base(cls, annotation): + assert isinstance(annotation, Annotation) + ann_elem = ET.Element('annotation') + ET.SubElement(ann_elem, 'id').text = str(annotation.id) + ET.SubElement(ann_elem, 'type').text = str(annotation.type.name) + + for k, v in annotation.attributes.items(): + ET.SubElement(ann_elem, k.replace(' ', '-')).text = str(v) + + ET.SubElement(ann_elem, 'group').text = str(annotation.group) + + return ann_elem + + @staticmethod + def _get_label(label_id, categories): + label = '' + if label_id is None: + return '' + if categories is not None: + label_cat = categories.get(AnnotationType.label) + if label_cat is not None: + label = label_cat.items[label_id].name + return label + + @classmethod + def encode_label_object(cls, obj, categories): + ann_elem = cls.encode_annotation_base(obj) + + ET.SubElement(ann_elem, 'label').text = \ + str(cls._get_label(obj.label, categories)) + ET.SubElement(ann_elem, 'label_id').text = str(obj.label) + + return ann_elem + + @classmethod + def encode_mask_object(cls, obj, categories): + ann_elem = cls.encode_annotation_base(obj) + + ET.SubElement(ann_elem, 'label').text = \ + str(cls._get_label(obj.label, categories)) + ET.SubElement(ann_elem, 'label_id').text = str(obj.label) + + return ann_elem + + @classmethod + def encode_bbox_object(cls, obj, categories): + ann_elem = cls.encode_annotation_base(obj) + + ET.SubElement(ann_elem, 'label').text = \ + str(cls._get_label(obj.label, categories)) + ET.SubElement(ann_elem, 'label_id').text = str(obj.label) + ET.SubElement(ann_elem, 'x').text = str(obj.x) + ET.SubElement(ann_elem, 'y').text = str(obj.y) + ET.SubElement(ann_elem, 'w').text = str(obj.w) + ET.SubElement(ann_elem, 'h').text = str(obj.h) + ET.SubElement(ann_elem, 'area').text = str(obj.get_area()) + + return ann_elem + + @classmethod + def encode_points_object(cls, obj, categories): + ann_elem = cls.encode_annotation_base(obj) + + ET.SubElement(ann_elem, 'label').text = \ + str(cls._get_label(obj.label, categories)) + ET.SubElement(ann_elem, 'label_id').text = str(obj.label) + + x, y, w, h = obj.get_bbox() + area = w * h + bbox_elem = ET.SubElement(ann_elem, 'bbox') + ET.SubElement(bbox_elem, 'x').text = str(x) + ET.SubElement(bbox_elem, 'y').text = str(y) + ET.SubElement(bbox_elem, 'w').text = str(w) + ET.SubElement(bbox_elem, 'h').text = str(h) + ET.SubElement(bbox_elem, 'area').text = str(area) + + points = obj.points + for i in range(0, len(points), 2): + point_elem = ET.SubElement(ann_elem, 'point') + ET.SubElement(point_elem, 'x').text = str(points[i]) + ET.SubElement(point_elem, 'y').text = str(points[i + 1]) + ET.SubElement(point_elem, 'visible').text = \ + str(obj.visibility[i // 2].name) + + return ann_elem + + @classmethod + def encode_polygon_object(cls, obj, categories): + ann_elem = cls.encode_annotation_base(obj) + + ET.SubElement(ann_elem, 'label').text = \ + str(cls._get_label(obj.label, categories)) + ET.SubElement(ann_elem, 'label_id').text = str(obj.label) + + x, y, w, h = obj.get_bbox() + area = w * h + bbox_elem = ET.SubElement(ann_elem, 'bbox') + ET.SubElement(bbox_elem, 'x').text = str(x) + ET.SubElement(bbox_elem, 'y').text = str(y) + ET.SubElement(bbox_elem, 'w').text = str(w) + ET.SubElement(bbox_elem, 'h').text = str(h) + ET.SubElement(bbox_elem, 'area').text = str(area) + + points = obj.points + for i in range(0, len(points), 2): + point_elem = ET.SubElement(ann_elem, 'point') + ET.SubElement(point_elem, 'x').text = str(points[i]) + ET.SubElement(point_elem, 'y').text = str(points[i + 1]) + + return ann_elem + + @classmethod + def encode_polyline_object(cls, obj, categories): + ann_elem = cls.encode_annotation_base(obj) + + ET.SubElement(ann_elem, 'label').text = \ + str(cls._get_label(obj.label, categories)) + ET.SubElement(ann_elem, 'label_id').text = str(obj.label) + + x, y, w, h = obj.get_bbox() + area = w * h + bbox_elem = ET.SubElement(ann_elem, 'bbox') + ET.SubElement(bbox_elem, 'x').text = str(x) + ET.SubElement(bbox_elem, 'y').text = str(y) + ET.SubElement(bbox_elem, 'w').text = str(w) + ET.SubElement(bbox_elem, 'h').text = str(h) + ET.SubElement(bbox_elem, 'area').text = str(area) + + points = obj.points + for i in range(0, len(points), 2): + point_elem = ET.SubElement(ann_elem, 'point') + ET.SubElement(point_elem, 'x').text = str(points[i]) + ET.SubElement(point_elem, 'y').text = str(points[i + 1]) + + return ann_elem + + @classmethod + def encode_caption_object(cls, obj): + ann_elem = cls.encode_annotation_base(obj) + + ET.SubElement(ann_elem, 'caption').text = str(obj.caption) + + return ann_elem + + @classmethod + def encode_annotation(cls, o, categories=None): + if isinstance(o, Label): + return cls.encode_label_object(o, categories) + if isinstance(o, Mask): + return cls.encode_mask_object(o, categories) + if isinstance(o, Bbox): + return cls.encode_bbox_object(o, categories) + if isinstance(o, Points): + return cls.encode_points_object(o, categories) + if isinstance(o, PolyLine): + return cls.encode_polyline_object(o, categories) + if isinstance(o, Polygon): + return cls.encode_polygon_object(o, categories) + if isinstance(o, Caption): + return cls.encode_caption_object(o) + raise NotImplementedError("Unexpected annotation object passed: %s" % o) + + @staticmethod + def to_string(encoded_item): + return ET.tostring(encoded_item, encoding='unicode', pretty_print=True) + +def XPathDatasetFilter(extractor, xpath=None): + if xpath is None: + return extractor + try: + xpath = ET.XPath(xpath) + except Exception: + log.error("Failed to create XPath from expression '%s'", xpath) + raise + f = lambda item: bool(xpath( + DatasetItemEncoder.encode(item, extractor.categories()))) + return extractor.select(f) + +class XPathAnnotationsFilter(Transform): + def __init__(self, extractor, xpath=None, remove_empty=False): + super().__init__(extractor) + + if xpath is not None: + try: + xpath = ET.XPath(xpath) + except Exception: + log.error("Failed to create XPath from expression '%s'", xpath) + raise + self._filter = xpath + + self._remove_empty = remove_empty + + def __iter__(self): + for item in self._extractor: + item = self.transform_item(item) + if item is not None: + yield item + + def transform_item(self, item): + if self._filter is None: + return item + + encoded = DatasetItemEncoder.encode(item, self._extractor.categories()) + filtered = self._filter(encoded) + filtered = [elem for elem in filtered if elem.tag == 'annotation'] + + encoded = encoded.findall('annotation') + annotations = [item.annotations[encoded.index(e)] for e in filtered] + + if self._remove_empty and len(annotations) == 0: + return None + return self.wrap_item(item, annotations=annotations) \ No newline at end of file diff --git a/datumaro/components/extractor.py b/datumaro/components/extractor.py new file mode 100644 index 0000000000..dcb7b036c0 --- /dev/null +++ b/datumaro/components/extractor.py @@ -0,0 +1,621 @@ + +# Copyright (C) 2019-2020 Intel Corporation +# +# SPDX-License-Identifier: MIT + +from collections import namedtuple +from enum import Enum +import numpy as np + +import attr +from attr import attrs, attrib + +from datumaro.util.image import Image +from datumaro.util.attrs_util import not_empty, default_if_none + + +AnnotationType = Enum('AnnotationType', + [ + 'label', + 'mask', + 'points', + 'polygon', + 'polyline', + 'bbox', + 'caption', + ]) + +_COORDINATE_ROUNDING_DIGITS = 2 + +@attrs(kw_only=True) +class Annotation: + id = attrib(default=0, validator=default_if_none(int)) + attributes = attrib(factory=dict, validator=default_if_none(dict)) + group = attrib(default=0, validator=default_if_none(int)) + + def __attrs_post_init__(self): + assert isinstance(self.type, AnnotationType) + + @property + def type(self) -> AnnotationType: + return self._type # must be set in subclasses + + def wrap(self, **kwargs): + return attr.evolve(self, **kwargs) + +@attrs(kw_only=True) +class Categories: + attributes = attrib(factory=set, validator=default_if_none(set), eq=False) + +@attrs +class LabelCategories(Categories): + @attrs(repr_ns='LabelCategories') + class Category: + name = attrib(converter=str, validator=not_empty) + parent = attrib(default='', validator=default_if_none(str)) + attributes = attrib(factory=set, validator=default_if_none(set)) + + items = attrib(factory=list, validator=default_if_none(list)) + _indices = attrib(factory=dict, init=False, eq=False) + + @classmethod + def from_iterable(cls, iterable): + """Generation of LabelCategories from iterable object + + Args: + iterable ([type]): This iterable object can be: + 1)simple str - will generate one Category with str as name + 2)list of str - will interpreted as list of Category names + 3)list of positional argumetns - will generate Categories + with this arguments + + + Returns: + LabelCategories: LabelCategories object + """ + temp_categories = cls() + + if isinstance(iterable, str): + iterable = [[iterable]] + + for category in iterable: + if isinstance(category, str): + category = [category] + temp_categories.add(*category) + + return temp_categories + + def __attrs_post_init__(self): + self._reindex() + + def _reindex(self): + indices = {} + for index, item in enumerate(self.items): + assert item.name not in self._indices + indices[item.name] = index + self._indices = indices + + def add(self, name: str, parent: str = None, attributes: dict = None): + assert name not in self._indices, name + + index = len(self.items) + self.items.append(self.Category(name, parent, attributes)) + self._indices[name] = index + return index + + def find(self, name: str): + index = self._indices.get(name) + if index is not None: + return index, self.items[index] + return index, None + +@attrs +class Label(Annotation): + _type = AnnotationType.label + label = attrib(converter=int) + +@attrs(eq=False) +class MaskCategories(Categories): + colormap = attrib(factory=dict, validator=default_if_none(dict)) + _inverse_colormap = attrib(default=None, + validator=attr.validators.optional(dict)) + + @property + def inverse_colormap(self): + from datumaro.util.mask_tools import invert_colormap + if self._inverse_colormap is None: + if self.colormap is not None: + self._inverse_colormap = invert_colormap(self.colormap) + return self._inverse_colormap + + def __eq__(self, other): + if not super().__eq__(other): + return False + if not isinstance(other, __class__): + return False + for label_id, my_color in self.colormap.items(): + other_color = other.colormap.get(label_id) + if not np.array_equal(my_color, other_color): + return False + return True + +@attrs(eq=False) +class Mask(Annotation): + _type = AnnotationType.mask + _image = attrib() + label = attrib(converter=attr.converters.optional(int), + default=None, kw_only=True) + z_order = attrib(default=0, validator=default_if_none(int), kw_only=True) + + @property + def image(self): + if callable(self._image): + return self._image() + return self._image + + def as_class_mask(self, label_id=None): + if label_id is None: + label_id = self.label + return self.image * label_id + + def as_instance_mask(self, instance_id): + return self.image * instance_id + + def get_area(self): + return np.count_nonzero(self.image) + + def get_bbox(self): + from datumaro.util.mask_tools import find_mask_bbox + return find_mask_bbox(self.image) + + def paint(self, colormap): + from datumaro.util.mask_tools import paint_mask + return paint_mask(self.as_class_mask(), colormap) + + def __eq__(self, other): + if not super().__eq__(other): + return False + if not isinstance(other, __class__): + return False + return \ + (self.label == other.label) and \ + (self.z_order == other.z_order) and \ + (np.array_equal(self.image, other.image)) + +@attrs(eq=False) +class RleMask(Mask): + rle = attrib() + _image = attrib(default=attr.Factory( + lambda self: self._lazy_decode(self.rle), + takes_self=True), init=False) + + @staticmethod + def _lazy_decode(rle): + from pycocotools import mask as mask_utils + return lambda: mask_utils.decode(rle).astype(np.bool) + + def get_area(self): + from pycocotools import mask as mask_utils + return mask_utils.area(self.rle) + + def get_bbox(self): + from pycocotools import mask as mask_utils + return mask_utils.toBbox(self.rle) + + def __eq__(self, other): + if not isinstance(other, __class__): + return super().__eq__(other) + return self.rle == other.rle + +class CompiledMask: + @staticmethod + def from_instance_masks(instance_masks, + instance_ids=None, instance_labels=None): + from datumaro.util.mask_tools import merge_masks + + if instance_ids is not None: + assert len(instance_ids) == len(instance_masks) + else: + instance_ids = [None] * len(instance_masks) + + if instance_labels is not None: + assert len(instance_labels) == len(instance_masks) + else: + instance_labels = [None] * len(instance_masks) + + instance_masks = sorted( + zip(instance_masks, instance_ids, instance_labels), + key=lambda m: m[0].z_order) + + instance_mask = [m.as_instance_mask(id if id is not None else 1 + idx) + for idx, (m, id, _) in enumerate(instance_masks)] + instance_mask = merge_masks(instance_mask) + + cls_mask = [m.as_class_mask(c) for m, _, c in instance_masks] + cls_mask = merge_masks(cls_mask) + return __class__(class_mask=cls_mask, instance_mask=instance_mask) + + def __init__(self, class_mask=None, instance_mask=None): + self._class_mask = class_mask + self._instance_mask = instance_mask + + @staticmethod + def _get_image(image): + if callable(image): + return image() + return image + + @property + def class_mask(self): + return self._get_image(self._class_mask) + + @property + def instance_mask(self): + return self._get_image(self._instance_mask) + + @property + def instance_count(self): + return int(self.instance_mask.max()) + + def get_instance_labels(self): + class_shift = 16 + m = (self.class_mask.astype(np.uint32) << class_shift) \ + + self.instance_mask.astype(np.uint32) + keys = np.unique(m) + instance_labels = {k & ((1 << class_shift) - 1): k >> class_shift + for k in keys if k & ((1 << class_shift) - 1) != 0 + } + return instance_labels + + def extract(self, instance_id): + return self.instance_mask == instance_id + + def lazy_extract(self, instance_id): + return lambda: self.extract(instance_id) + +@attrs +class _Shape(Annotation): + points = attrib(converter=lambda x: + [round(p, _COORDINATE_ROUNDING_DIGITS) for p in x]) + label = attrib(converter=attr.converters.optional(int), + default=None, kw_only=True) + z_order = attrib(default=0, validator=default_if_none(int), kw_only=True) + + def get_area(self): + raise NotImplementedError() + + def get_bbox(self): + points = self.points + if not points: + return None + + xs = [p for p in points[0::2]] + ys = [p for p in points[1::2]] + x0 = min(xs) + x1 = max(xs) + y0 = min(ys) + y1 = max(ys) + return [x0, y0, x1 - x0, y1 - y0] + +@attrs +class PolyLine(_Shape): + _type = AnnotationType.polyline + + def as_polygon(self): + return self.points[:] + + def get_area(self): + return 0 + +@attrs +class Polygon(_Shape): + _type = AnnotationType.polygon + + def __attrs_post_init__(self): + super().__attrs_post_init__() + # keep the message on a single line to produce informative output + assert len(self.points) % 2 == 0 and 3 <= len(self.points) // 2, "Wrong polygon points: %s" % self.points + + def get_area(self): + import pycocotools.mask as mask_utils + + x, y, w, h = self.get_bbox() + rle = mask_utils.frPyObjects([self.points], y + h, x + w) + area = mask_utils.area(rle)[0] + return area + +@attrs +class Bbox(_Shape): + _type = AnnotationType.bbox + + # will be overridden by attrs, then will be overridden again by us + # attrs' method will be renamed to __attrs_init__ + def __init__(self, x, y, w, h, *args, **kwargs): + kwargs.pop('points', None) # comes from wrap() + self.__attrs_init__([x, y, x + w, y + h], *args, **kwargs) + __actual_init__ = __init__ # save pointer + + @property + def x(self): + return self.points[0] + + @property + def y(self): + return self.points[1] + + @property + def w(self): + return self.points[2] - self.points[0] + + @property + def h(self): + return self.points[3] - self.points[1] + + def get_area(self): + return self.w * self.h + + def get_bbox(self): + return [self.x, self.y, self.w, self.h] + + def as_polygon(self): + x, y, w, h = self.get_bbox() + return [ + x, y, + x + w, y, + x + w, y + h, + x, y + h + ] + + def iou(self, other): + from datumaro.util.annotation_util import bbox_iou + return bbox_iou(self.get_bbox(), other.get_bbox()) + + def wrap(item, **kwargs): + d = {'x': item.x, 'y': item.y, 'w': item.w, 'h': item.h} + d.update(kwargs) + return attr.evolve(item, **d) + +assert not hasattr(Bbox, '__attrs_init__') # hopefully, it will be supported +setattr(Bbox, '__attrs_init__', Bbox.__init__) +setattr(Bbox, '__init__', Bbox.__actual_init__) + +@attrs +class PointsCategories(Categories): + @attrs(repr_ns="PointsCategories") + class Category: + labels = attrib(factory=list, validator=default_if_none(list)) + joints = attrib(factory=set, validator=default_if_none(set)) + + items = attrib(factory=dict, validator=default_if_none(dict)) + + @classmethod + def from_iterable(cls, iterable): + """Generation of PointsCategories from iterable object + + Args: + iterable ([type]): This iterable object can be: + 1) list of positional argumetns - will generate Categories + with these arguments + + Returns: + PointsCategories: PointsCategories object + """ + temp_categories = cls() + + for category in iterable: + temp_categories.add(*category) + return temp_categories + + def add(self, label_id, labels=None, joints=None): + if joints is None: + joints = [] + joints = set(map(tuple, joints)) + self.items[label_id] = self.Category(labels, joints) + +@attrs +class Points(_Shape): + Visibility = Enum('Visibility', [ + ('absent', 0), + ('hidden', 1), + ('visible', 2), + ]) + _type = AnnotationType.points + + visibility = attrib(type=list, default=None) + @visibility.validator + def _visibility_validator(self, attribute, visibility): + if visibility is None: + visibility = [self.Visibility.visible] * (len(self.points) // 2) + else: + for i, v in enumerate(visibility): + if not isinstance(v, self.Visibility): + visibility[i] = self.Visibility(v) + assert len(visibility) == len(self.points) // 2 + self.visibility = visibility + + def __attrs_post_init__(self): + super().__attrs_post_init__() + assert len(self.points) % 2 == 0, self.points + + def get_area(self): + return 0 + + def get_bbox(self): + xs = [p for p, v in zip(self.points[0::2], self.visibility) + if v != __class__.Visibility.absent] + ys = [p for p, v in zip(self.points[1::2], self.visibility) + if v != __class__.Visibility.absent] + x0 = min(xs, default=0) + x1 = max(xs, default=0) + y0 = min(ys, default=0) + y1 = max(ys, default=0) + return [x0, y0, x1 - x0, y1 - y0] + +@attrs +class Caption(Annotation): + _type = AnnotationType.caption + caption = attrib(converter=str) + +@attrs +class DatasetItem: + id = attrib(converter=lambda x: str(x).replace('\\', '/'), + type=str, validator=not_empty) + annotations = attrib(factory=list, validator=default_if_none(list)) + subset = attrib(default='', validator=default_if_none(str)) + path = attrib(factory=list, validator=default_if_none(list)) + + image = attrib(type=Image, default=None) + @image.validator + def _image_validator(self, attribute, image): + if callable(image) or isinstance(image, np.ndarray): + image = Image(data=image) + elif isinstance(image, str): + image = Image(path=image) + assert image is None or isinstance(image, Image) + self.image = image + + attributes = attrib(factory=dict, validator=default_if_none(dict)) + + @property + def has_image(self): + return self.image is not None + + def wrap(item, **kwargs): + return attr.evolve(item, **kwargs) + +class IExtractor: + def __iter__(self): + raise NotImplementedError() + + def __len__(self): + raise NotImplementedError() + + def subsets(self): + raise NotImplementedError() + + def get_subset(self, name): + raise NotImplementedError() + + def categories(self): + raise NotImplementedError() + + def select(self, pred): + raise NotImplementedError() + +class _DatasetFilter: + def __init__(self, iterable, predicate): + self.iterable = iterable + self.predicate = predicate + + def __iter__(self): + return filter(self.predicate, self.iterable) + +class _ExtractorBase(IExtractor): + def __init__(self, length=None, subsets=None): + self._length = length + self._subsets = subsets + + def _init_cache(self): + subsets = set() + length = -1 + for length, item in enumerate(self): + subsets.add(item.subset) + length += 1 + + if self._length is None: + self._length = length + if self._subsets is None: + self._subsets = subsets + + def __len__(self): + if self._length is None: + self._init_cache() + return self._length + + def subsets(self): + if self._subsets is None: + self._init_cache() + return list(self._subsets) + + def get_subset(self, name): + if name in self.subsets(): + return self.select(lambda item: item.subset == name) + else: + raise Exception("Unknown subset '%s' requested" % name) + + def transform(self, method, *args, **kwargs): + return method(self, *args, **kwargs) + +class DatasetIteratorWrapper(_ExtractorBase): + def __init__(self, iterable, categories, subsets=None): + super().__init__(length=None, subsets=subsets) + self._iterable = iterable + self._categories = categories + + def __iter__(self): + return iter(self._iterable) + + def categories(self): + return self._categories + + def select(self, pred): + return DatasetIteratorWrapper( + _DatasetFilter(self, pred), self.categories(), self.subsets()) + +class Extractor(_ExtractorBase): + def __init__(self, length=None): + super().__init__(length=None) + + def categories(self): + return {} + + def select(self, pred): + return DatasetIteratorWrapper( + _DatasetFilter(self, pred), self.categories(), self.subsets()) + +DEFAULT_SUBSET_NAME = 'default' + + +class SourceExtractor(Extractor): + def __init__(self, length=None, subset=None): + super().__init__(length=length) + + if subset == DEFAULT_SUBSET_NAME: + subset = None + self._subset = subset + + def subsets(self): + return [self._subset] + + def get_subset(self, name): + if name != self._subset: + raise Exception("Unknown subset '%s' requested" % name) + return self + +class Importer: + @classmethod + def detect(cls, path): + raise NotImplementedError() + + def __call__(self, path, **extra_params): + raise NotImplementedError() + +class Transform(Extractor): + @staticmethod + def wrap_item(item, **kwargs): + return item.wrap(**kwargs) + + def __init__(self, extractor): + super().__init__() + + self._extractor = extractor + + def __iter__(self): + for item in self._extractor: + yield self.transform_item(item) + + def categories(self): + return self._extractor.categories() + + def transform_item(self, item: DatasetItem) -> DatasetItem: + raise NotImplementedError() diff --git a/datumaro/components/launcher.py b/datumaro/components/launcher.py new file mode 100644 index 0000000000..adc31fb575 --- /dev/null +++ b/datumaro/components/launcher.py @@ -0,0 +1,67 @@ + +# Copyright (C) 2019-2020 Intel Corporation +# +# SPDX-License-Identifier: MIT + +import numpy as np + +from datumaro.components.extractor import (Transform, LabelCategories, + AnnotationType) +from datumaro.util import take_by + + +# pylint: disable=no-self-use +class Launcher: + def __init__(self, model_dir=None): + pass + + def launch(self, inputs): + raise NotImplementedError() + + def categories(self): + return None +# pylint: enable=no-self-use + +class ModelTransform(Transform): + def __init__(self, extractor, launcher, batch_size=1): + super().__init__(extractor) + self._launcher = launcher + self._batch_size = batch_size + + def __iter__(self): + for batch in take_by(self._extractor, self._batch_size): + inputs = np.array([item.image.data for item in batch]) + inference = self._launcher.launch(inputs) + + for item, annotations in zip(batch, inference): + self._check_annotations(annotations) + yield self.wrap_item(item, annotations=annotations) + + def get_subset(self, name): + subset = self._extractor.get_subset(name) + return __class__(subset, self._launcher, self._batch_size) + + def categories(self): + launcher_override = self._launcher.categories() + if launcher_override is not None: + return launcher_override + return self._extractor.categories() + + def transform_item(self, item): + inputs = np.expand_dims(item.image, axis=0) + annotations = self._launcher.launch(inputs)[0] + return self.wrap_item(item, annotations=annotations) + + def _check_annotations(self, annotations): + labels_count = len(self.categories().get( + AnnotationType.label, LabelCategories()).items) + + for ann in annotations: + label = getattr(ann, 'label') + if label is None: + continue + + if label not in range(labels_count): + raise Exception("Annotation has unexpected label id %s, " + "while there is only %s defined labels." % \ + (label, labels_count)) \ No newline at end of file diff --git a/datumaro/components/operations.py b/datumaro/components/operations.py new file mode 100644 index 0000000000..d887add955 --- /dev/null +++ b/datumaro/components/operations.py @@ -0,0 +1,1504 @@ + +# Copyright (C) 2020 Intel Corporation +# +# SPDX-License-Identifier: MIT + +from collections import OrderedDict +from copy import deepcopy +import hashlib +import logging as log + +import attr +import cv2 +import numpy as np +from attr import attrib, attrs +from unittest import TestCase + +from datumaro.components.cli_plugin import CliPlugin +from datumaro.components.extractor import (AnnotationType, Bbox, Label, + LabelCategories, PointsCategories, MaskCategories) +from datumaro.components.project import Dataset +from datumaro.util import find, filter_dict +from datumaro.util.attrs_util import ensure_cls, default_if_none +from datumaro.util.annotation_util import (segment_iou, bbox_iou, + mean_bbox, OKS, find_instances, max_bbox, smooth_line) + +def get_ann_type(anns, t): + return [a for a in anns if a.type == t] + +def match_annotations_equal(a, b): + matches = [] + a_unmatched = a[:] + b_unmatched = b[:] + for a_ann in a: + for b_ann in b_unmatched: + if a_ann != b_ann: + continue + + matches.append((a_ann, b_ann)) + a_unmatched.remove(a_ann) + b_unmatched.remove(b_ann) + break + + return matches, a_unmatched, b_unmatched + +def merge_annotations_equal(a, b): + matches, a_unmatched, b_unmatched = match_annotations_equal(a, b) + return [ann_a for (ann_a, _) in matches] + a_unmatched + b_unmatched + +def merge_categories(sources): + categories = {} + for source in sources: + categories.update(source) + for source in sources: + for cat_type, source_cat in source.items(): + if not categories[cat_type] == source_cat: + raise NotImplementedError( + "Merging of datasets with different categories is " + "only allowed in 'merge' command.") + return categories + +class MergingStrategy(CliPlugin): + @classmethod + def merge(cls, sources, **options): + instance = cls(**options) + return instance(sources) + + def __init__(self, **options): + super().__init__(**options) + self.__dict__['_sources'] = None + + def __call__(self, sources): + raise NotImplementedError() + + +@attrs +class DatasetError: + item_id = attrib() + +@attrs +class QualityError(DatasetError): + pass + +@attrs +class TooCloseError(QualityError): + a = attrib() + b = attrib() + distance = attrib() + + def __str__(self): + return "Item %s: annotations are too close: %s, %s, distance = %s" % \ + (self.item_id, self.a, self.b, self.distance) + +@attrs +class WrongGroupError(QualityError): + found = attrib(converter=set) + expected = attrib(converter=set) + group = attrib(converter=list) + + def __str__(self): + return "Item %s: annotation group has wrong labels: " \ + "found %s, expected %s, group %s" % \ + (self.item_id, self.found, self.expected, self.group) + +@attrs +class MergeError(DatasetError): + sources = attrib(converter=set) + +@attrs +class NoMatchingAnnError(MergeError): + ann = attrib() + + def __str__(self): + return "Item %s: can't find matching annotation " \ + "in sources %s, annotation is %s" % \ + (self.item_id, self.sources, self.ann) + +@attrs +class NoMatchingItemError(MergeError): + def __str__(self): + return "Item %s: can't find matching item in sources %s" % \ + (self.item_id, self.sources) + +@attrs +class FailedLabelVotingError(MergeError): + votes = attrib() + ann = attrib(default=None) + + def __str__(self): + return "Item %s: label voting failed%s, votes %s, sources %s" % \ + (self.item_id, 'for ann %s' % self.ann if self.ann else '', + self.votes, self.sources) + +@attrs +class FailedAttrVotingError(MergeError): + attr = attrib() + votes = attrib() + ann = attrib() + + def __str__(self): + return "Item %s: attribute voting failed " \ + "for ann %s, votes %s, sources %s" % \ + (self.item_id, self.ann, self.votes, self.sources) + +@attrs +class IntersectMerge(MergingStrategy): + @attrs(repr_ns='IntersectMerge', kw_only=True) + class Conf: + pairwise_dist = attrib(converter=float, default=0.5) + sigma = attrib(converter=list, factory=list) + + output_conf_thresh = attrib(converter=float, default=0) + quorum = attrib(converter=int, default=0) + ignored_attributes = attrib(converter=set, factory=set) + + def _groups_conveter(value): + result = [] + for group in value: + rg = set() + for label in group: + optional = label.endswith('?') + name = label if not optional else label[:-1] + rg.add((name, optional)) + result.append(rg) + return result + groups = attrib(converter=_groups_conveter, factory=list) + close_distance = attrib(converter=float, default=0.75) + conf = attrib(converter=ensure_cls(Conf), factory=Conf) + + # Error trackers: + errors = attrib(factory=list, init=False) + def add_item_error(self, error, *args, **kwargs): + self.errors.append(error(self._item_id, *args, **kwargs)) + + # Indexes: + _dataset_map = attrib(init=False) # id(dataset) -> (dataset, index) + _item_map = attrib(init=False) # id(item) -> (item, id(dataset)) + _ann_map = attrib(init=False) # id(ann) -> (ann, id(item)) + _item_id = attrib(init=False) + _item = attrib(init=False) + + # Misc. + _categories = attrib(init=False) # merged categories + + def __call__(self, datasets): + self._categories = self._merge_categories( + [d.categories() for d in datasets]) + merged = Dataset(categories=self._categories) + + self._check_groups_definition() + + item_matches, item_map = self.match_items(datasets) + self._item_map = item_map + self._dataset_map = { id(d): (d, i) for i, d in enumerate(datasets) } + + for item_id, items in item_matches.items(): + self._item_id = item_id + + if len(items) < len(datasets): + missing_sources = set(id(s) for s in datasets) - set(items) + missing_sources = [self._dataset_map[s][1] + for s in missing_sources] + self.add_item_error(NoMatchingItemError, missing_sources) + merged.put(self.merge_items(items)) + + return merged + + def get_ann_source(self, ann_id): + return self._item_map[self._ann_map[ann_id][1]][1] + + def merge_items(self, items): + self._item = next(iter(items.values())) + + self._ann_map = {} + sources = [] + for item in items.values(): + self._ann_map.update({ id(a): (a, id(item)) + for a in item.annotations }) + sources.append(item.annotations) + log.debug("Merging item %s: source annotations %s" % \ + (self._item_id, list(map(len, sources)))) + + annotations = self.merge_annotations(sources) + + annotations = [a for a in annotations + if self.conf.output_conf_thresh <= a.attributes.get('score', 1)] + + return self._item.wrap(annotations=annotations) + + def merge_annotations(self, sources): + self._make_mergers(sources) + + clusters = self._match_annotations(sources) + + joined_clusters = sum(clusters.values(), []) + group_map = self._find_cluster_groups(joined_clusters) + + annotations = [] + for t, clusters in clusters.items(): + for cluster in clusters: + self._check_cluster_sources(cluster) + + merged_clusters = self._merge_clusters(t, clusters) + + for merged_ann, cluster in zip(merged_clusters, clusters): + attributes = self._find_cluster_attrs(cluster, merged_ann) + attributes = { k: v for k, v in attributes.items() + if k not in self.conf.ignored_attributes } + attributes.update(merged_ann.attributes) + merged_ann.attributes = attributes + + new_group_id = find(enumerate(group_map), + lambda e: id(cluster) in e[1][0]) + if new_group_id is None: + new_group_id = 0 + else: + new_group_id = new_group_id[0] + 1 + merged_ann.group = new_group_id + + if self.conf.close_distance: + self._check_annotation_distance(t, merged_clusters) + + annotations += merged_clusters + + if self.conf.groups: + self._check_groups(annotations) + + return annotations + + @staticmethod + def match_items(datasets): + item_ids = set((item.id, item.subset) for d in datasets for item in d) + + item_map = {} # id(item) -> (item, id(dataset)) + + matches = OrderedDict() + for (item_id, item_subset) in sorted(item_ids, key=lambda e: e[0]): + items = {} + for d in datasets: + try: + item = d.get(item_id, subset=item_subset) + items[id(d)] = item + item_map[id(item)] = (item, id(d)) + except KeyError: + pass + matches[(item_id, item_subset)] = items + + return matches, item_map + + def _merge_label_categories(self, sources): + same = True + common = None + for src_categories in sources: + src_cat = src_categories.get(AnnotationType.label) + if common is None: + common = src_cat + elif common != src_cat: + same = False + break + + if same: + return common + + dst_cat = LabelCategories() + for src_id, src_categories in enumerate(sources): + src_cat = src_categories.get(AnnotationType.label) + if src_cat is None: + continue + + for src_label in src_cat.items: + dst_label = dst_cat.find(src_label.name)[1] + if dst_label is not None: + if dst_label != src_label: + if src_label.parent and dst_label.parent and \ + src_label.parent != dst_label.parent: + raise ValueError("Can't merge label category " + "%s (from #%s): " + "parent label conflict: %s vs. %s" % \ + (src_label.name, src_id, + src_label.parent, dst_label.parent) + ) + dst_label.parent = dst_label.parent or src_label.parent + dst_label.attributes |= src_label.attributes + else: + pass + else: + dst_cat.add(src_label.name, + src_label.parent, src_label.attributes) + + return dst_cat + + def _merge_point_categories(self, sources, label_cat): + dst_point_cat = PointsCategories() + + for src_id, src_categories in enumerate(sources): + src_label_cat = src_categories.get(AnnotationType.label) + src_point_cat = src_categories.get(AnnotationType.points) + if src_label_cat is None or src_point_cat is None: + continue + + for src_label_id, src_cat in src_point_cat.items.items(): + src_label = src_label_cat.items[src_label_id].name + dst_label_id = label_cat.find(src_label)[0] + dst_cat = dst_point_cat.items.get(dst_label_id) + if dst_cat is not None: + if dst_cat != src_cat: + raise ValueError("Can't merge point category for label " + "%s (from #%s): %s vs. %s" % \ + (src_label, src_id, src_cat, dst_cat) + ) + else: + pass + else: + dst_point_cat.add(dst_label_id, + src_cat.labels, src_cat.joints) + + if len(dst_point_cat.items) == 0: + return None + + return dst_point_cat + + def _merge_mask_categories(self, sources, label_cat): + dst_mask_cat = MaskCategories() + + for src_id, src_categories in enumerate(sources): + src_label_cat = src_categories.get(AnnotationType.label) + src_mask_cat = src_categories.get(AnnotationType.mask) + if src_label_cat is None or src_mask_cat is None: + continue + + for src_label_id, src_cat in src_mask_cat.colormap.items(): + src_label = src_label_cat.items[src_label_id].name + dst_label_id = label_cat.find(src_label)[0] + dst_cat = dst_mask_cat.colormap.get(dst_label_id) + if dst_cat is not None: + if dst_cat != src_cat: + raise ValueError("Can't merge mask category for label " + "%s (from #%s): %s vs. %s" % \ + (src_label, src_id, src_cat, dst_cat) + ) + else: + pass + else: + dst_mask_cat.colormap[dst_label_id] = src_cat + + if len(dst_mask_cat.colormap) == 0: + return None + + return dst_mask_cat + + def _merge_categories(self, sources): + dst_categories = {} + + label_cat = self._merge_label_categories(sources) + if label_cat is None: + return dst_categories + + dst_categories[AnnotationType.label] = label_cat + + points_cat = self._merge_point_categories(sources, label_cat) + if points_cat is not None: + dst_categories[AnnotationType.points] = points_cat + + mask_cat = self._merge_mask_categories(sources, label_cat) + if mask_cat is not None: + dst_categories[AnnotationType.mask] = mask_cat + + return dst_categories + + def _match_annotations(self, sources): + all_by_type = {} + for s in sources: + src_by_type = {} + for a in s: + src_by_type.setdefault(a.type, []).append(a) + for k, v in src_by_type.items(): + all_by_type.setdefault(k, []).append(v) + + clusters = {} + for k, v in all_by_type.items(): + clusters.setdefault(k, []).extend(self._match_ann_type(k, v)) + + return clusters + + def _make_mergers(self, sources): + def _make(c, **kwargs): + kwargs.update(attr.asdict(self.conf)) + fields = attr.fields_dict(c) + return c(**{ k: v for k, v in kwargs.items() if k in fields }, + context=self) + + def _for_type(t, **kwargs): + if t is AnnotationType.label: + return _make(LabelMerger, **kwargs) + elif t is AnnotationType.bbox: + return _make(BboxMerger, **kwargs) + elif t is AnnotationType.mask: + return _make(MaskMerger, **kwargs) + elif t is AnnotationType.polygon: + return _make(PolygonMerger, **kwargs) + elif t is AnnotationType.polyline: + return _make(LineMerger, **kwargs) + elif t is AnnotationType.points: + return _make(PointsMerger, **kwargs) + elif t is AnnotationType.caption: + return _make(CaptionsMerger, **kwargs) + else: + raise NotImplementedError("Type %s is not supported" % t) + + instance_map = {} + for s in sources: + s_instances = find_instances(s) + for inst in s_instances: + inst_bbox = max_bbox([a for a in inst if a.type in + {AnnotationType.polygon, + AnnotationType.mask, AnnotationType.bbox} + ]) + for ann in inst: + instance_map[id(ann)] = [inst, inst_bbox] + + self._mergers = { t: _for_type(t, instance_map=instance_map) + for t in AnnotationType } + + def _match_ann_type(self, t, sources): + return self._mergers[t].match_annotations(sources) + + def _merge_clusters(self, t, clusters): + return self._mergers[t].merge_clusters(clusters) + + @staticmethod + def _find_cluster_groups(clusters): + cluster_groups = [] + visited = set() + for a_idx, cluster_a in enumerate(clusters): + if a_idx in visited: + continue + visited.add(a_idx) + + cluster_group = { id(cluster_a) } + + # find segment groups in the cluster group + a_groups = set(ann.group for ann in cluster_a) + for cluster_b in clusters[a_idx+1 :]: + b_groups = set(ann.group for ann in cluster_b) + if a_groups & b_groups: + a_groups |= b_groups + + # now we know all the segment groups in this cluster group + # so we can find adjacent clusters + for b_idx, cluster_b in enumerate(clusters[a_idx+1 :]): + b_idx = a_idx + 1 + b_idx + b_groups = set(ann.group for ann in cluster_b) + if a_groups & b_groups: + cluster_group.add( id(cluster_b) ) + visited.add(b_idx) + + if a_groups == {0}: + continue # skip annotations without a group + cluster_groups.append( (cluster_group, a_groups) ) + return cluster_groups + + def _find_cluster_attrs(self, cluster, ann): + quorum = self.conf.quorum or 0 + + # TODO: when attribute types are implemented, add linear + # interpolation for contiguous values + + attr_votes = {} # name -> { value: score , ... } + for s in cluster: + for name, value in s.attributes.items(): + votes = attr_votes.get(name, {}) + votes[value] = 1 + votes.get(value, 0) + attr_votes[name] = votes + + attributes = {} + for name, votes in attr_votes.items(): + winner, count = max(votes.items(), key=lambda e: e[1]) + if count < quorum: + if sum(votes.values()) < quorum: + # blame provokers + missing_sources = set( + self.get_ann_source(id(a)) for a in cluster + if s.attributes.get(name) == winner) + else: + # blame outliers + missing_sources = set( + self.get_ann_source(id(a)) for a in cluster + if s.attributes.get(name) != winner) + missing_sources = [self._dataset_map[s][1] + for s in missing_sources] + self.add_item_error(FailedAttrVotingError, + missing_sources, name, votes, ann) + continue + attributes[name] = winner + + return attributes + + def _check_cluster_sources(self, cluster): + if len(cluster) == len(self._dataset_map): + return + + def _has_item(s): + try: + item =self._dataset_map[s][0].get(*self._item_id) + if len(item.annotations) == 0: + return False + return True + except KeyError: + return False + + missing_sources = set(self._dataset_map) - \ + set(self.get_ann_source(id(a)) for a in cluster) + missing_sources = [self._dataset_map[s][1] for s in missing_sources + if _has_item(s)] + if missing_sources: + self.add_item_error(NoMatchingAnnError, missing_sources, cluster[0]) + + def _check_annotation_distance(self, t, annotations): + for a_idx, a_ann in enumerate(annotations): + for b_ann in annotations[a_idx+1:]: + d = self._mergers[t].distance(a_ann, b_ann) + if self.conf.close_distance < d: + self.add_item_error(TooCloseError, a_ann, b_ann, d) + + def _check_groups(self, annotations): + check_groups = [] + for check_group_raw in self.conf.groups: + check_group = set(l[0] for l in check_group_raw) + optional = set(l[0] for l in check_group_raw if l[1]) + check_groups.append((check_group, optional)) + + def _check_group(group_labels, group): + for check_group, optional in check_groups: + common = check_group & group_labels + real_miss = check_group - common - optional + extra = group_labels - check_group + if common and (extra or real_miss): + self.add_item_error(WrongGroupError, group_labels, + check_group, group) + break + + groups = find_instances(annotations) + for group in groups: + group_labels = set() + for ann in group: + if not hasattr(ann, 'label'): + continue + label = self._get_label_name(ann.label) + + if ann.group: + group_labels.add(label) + else: + _check_group({label}, [ann]) + + if not group_labels: + continue + _check_group(group_labels, group) + + def _get_label_name(self, label_id): + if label_id is None: + return None + return self._categories[AnnotationType.label].items[label_id].name + + def _get_label_id(self, label): + return self._categories[AnnotationType.label].find(label)[0] + + def _get_src_label_name(self, ann, label_id): + if label_id is None: + return None + item_id = self._ann_map[id(ann)][1] + dataset_id = self._item_map[item_id][1] + return self._dataset_map[dataset_id][0] \ + .categories()[AnnotationType.label].items[label_id].name + + def _get_any_label_name(self, ann, label_id): + if label_id is None: + return None + try: + return self._get_src_label_name(ann, label_id) + except KeyError: + return self._get_label_name(label_id) + + def _check_groups_definition(self): + for group in self.conf.groups: + for label, _ in group: + _, entry = self._categories[AnnotationType.label].find(label) + if entry is None: + raise ValueError("Datasets do not contain " + "label '%s', available labels %s" % \ + (label, [i.name for i in + self._categories[AnnotationType.label].items]) + ) + +@attrs(kw_only=True) +class AnnotationMatcher: + _context = attrib(type=IntersectMerge, default=None) + + def match_annotations(self, sources): + raise NotImplementedError() + +@attrs +class LabelMatcher(AnnotationMatcher): + def distance(self, a, b): + a_label = self._context._get_any_label_name(a, a.label) + b_label = self._context._get_any_label_name(b, b.label) + return a_label == b_label + + def match_annotations(self, sources): + return [sum(sources, [])] + +@attrs(kw_only=True) +class _ShapeMatcher(AnnotationMatcher): + pairwise_dist = attrib(converter=float, default=0.9) + cluster_dist = attrib(converter=float, default=-1.0) + + def match_annotations(self, sources): + distance = self.distance + label_matcher = self.label_matcher + pairwise_dist = self.pairwise_dist + cluster_dist = self.cluster_dist + + if cluster_dist < 0: cluster_dist = pairwise_dist + + id_segm = { id(a): (a, id(s)) for s in sources for a in s } + + def _is_close_enough(cluster, extra_id): + # check if whole cluster IoU will not be broken + # when this segment is added + b = id_segm[extra_id][0] + for a_id in cluster: + a = id_segm[a_id][0] + if distance(a, b) < cluster_dist: + return False + return True + + def _has_same_source(cluster, extra_id): + b = id_segm[extra_id][1] + for a_id in cluster: + a = id_segm[a_id][1] + if a == b: + return True + return False + + # match segments in sources, pairwise + adjacent = { i: [] for i in id_segm } # id(sgm) -> [id(adj_sgm1), ...] + for a_idx, src_a in enumerate(sources): + for src_b in sources[a_idx+1 :]: + matches, _, _, _ = match_segments(src_a, src_b, + dist_thresh=pairwise_dist, + distance=distance, label_matcher=label_matcher) + for a, b in matches: + adjacent[id(a)].append(id(b)) + + # join all segments into matching clusters + clusters = [] + visited = set() + for cluster_idx in adjacent: + if cluster_idx in visited: + continue + + cluster = set() + to_visit = { cluster_idx } + while to_visit: + c = to_visit.pop() + cluster.add(c) + visited.add(c) + + for i in adjacent[c]: + if i in visited: + continue + if 0 < cluster_dist and not _is_close_enough(cluster, i): + continue + if _has_same_source(cluster, i): + continue + + to_visit.add(i) + + clusters.append([id_segm[i][0] for i in cluster]) + + return clusters + + @staticmethod + def distance(a, b): + return segment_iou(a, b) + + def label_matcher(self, a, b): + a_label = self._context._get_any_label_name(a, a.label) + b_label = self._context._get_any_label_name(b, b.label) + return a_label == b_label + +@attrs +class BboxMatcher(_ShapeMatcher): + pass + +@attrs +class PolygonMatcher(_ShapeMatcher): + pass + +@attrs +class MaskMatcher(_ShapeMatcher): + pass + +@attrs(kw_only=True) +class PointsMatcher(_ShapeMatcher): + sigma = attrib(type=list, default=None) + instance_map = attrib(converter=dict) + + def distance(self, a, b): + a_bbox = self.instance_map[id(a)][1] + b_bbox = self.instance_map[id(b)][1] + if bbox_iou(a_bbox, b_bbox) <= 0: + return 0 + bbox = mean_bbox([a_bbox, b_bbox]) + return OKS(a, b, sigma=self.sigma, bbox=bbox) + +@attrs +class LineMatcher(_ShapeMatcher): + @staticmethod + def distance(a, b): + a_bbox = a.get_bbox() + b_bbox = b.get_bbox() + bbox = max_bbox([a_bbox, b_bbox]) + area = bbox[2] * bbox[3] + if not area: + return 1 + + # compute inter-line area, normalize by common bbox + point_count = max(max(len(a.points) // 2, len(b.points) // 2), 5) + a, sa = smooth_line(a.points, point_count) + b, sb = smooth_line(b.points, point_count) + dists = np.linalg.norm(a - b, axis=1) + dists = (dists[:-1] + dists[1:]) * 0.5 + s = np.sum(dists) * 0.5 * (sa + sb) / area + return abs(1 - s) + +@attrs +class CaptionsMatcher(AnnotationMatcher): + def match_annotations(self, sources): + raise NotImplementedError() + + +@attrs(kw_only=True) +class AnnotationMerger: + def merge_clusters(self, clusters): + raise NotImplementedError() + +@attrs(kw_only=True) +class LabelMerger(AnnotationMerger, LabelMatcher): + quorum = attrib(converter=int, default=0) + + def merge_clusters(self, clusters): + assert len(clusters) <= 1 + if len(clusters) == 0: + return [] + + votes = {} # label -> score + for ann in clusters[0]: + label = self._context._get_src_label_name(ann, ann.label) + votes[label] = 1 + votes.get(label, 0) + + merged = [] + for label, count in votes.items(): + if count < self.quorum: + sources = set(self.get_ann_source(id(a)) for a in clusters[0] + if label not in [self._context._get_src_label_name(l, l.label) + for l in a]) + sources = [self._context._dataset_map[s][1] for s in sources] + self._context.add_item_error(FailedLabelVotingError, + sources, votes) + continue + + merged.append(Label(self._context._get_label_id(label), attributes={ + 'score': count / len(self._context._dataset_map) + })) + + return merged + +@attrs(kw_only=True) +class _ShapeMerger(AnnotationMerger, _ShapeMatcher): + quorum = attrib(converter=int, default=0) + + def merge_clusters(self, clusters): + merged = [] + for cluster in clusters: + label, label_score = self.find_cluster_label(cluster) + shape, shape_score = self.merge_cluster_shape(cluster) + + shape.z_order = max(cluster, key=lambda a: a.z_order).z_order + shape.label = label + shape.attributes['score'] = label_score * shape_score \ + if label is not None else shape_score + + merged.append(shape) + + return merged + + def find_cluster_label(self, cluster): + votes = {} + for s in cluster: + label = self._context._get_src_label_name(s, s.label) + state = votes.setdefault(label, [0, 0]) + state[0] += s.attributes.get('score', 1.0) + state[1] += 1 + + label, (score, count) = max(votes.items(), key=lambda e: e[1][0]) + if count < self.quorum: + self._context.add_item_error(FailedLabelVotingError, votes) + label = None + score = score / len(self._context._dataset_map) + label = self._context._get_label_id(label) + return label, score + + @staticmethod + def _merge_cluster_shape_mean_box_nearest(cluster): + mbbox = Bbox(*mean_bbox(cluster)) + dist = (segment_iou(mbbox, s) for s in cluster) + nearest_pos, _ = max(enumerate(dist), key=lambda e: e[1]) + return cluster[nearest_pos] + + def merge_cluster_shape(self, cluster): + shape = self._merge_cluster_shape_mean_box_nearest(cluster) + shape_score = sum(max(0, self.distance(shape, s)) + for s in cluster) / len(cluster) + return shape, shape_score + +@attrs +class BboxMerger(_ShapeMerger, BboxMatcher): + pass + +@attrs +class PolygonMerger(_ShapeMerger, PolygonMatcher): + pass + +@attrs +class MaskMerger(_ShapeMerger, MaskMatcher): + pass + +@attrs +class PointsMerger(_ShapeMerger, PointsMatcher): + pass + +@attrs +class LineMerger(_ShapeMerger, LineMatcher): + pass + +@attrs +class CaptionsMerger(AnnotationMerger, CaptionsMatcher): + pass + +def match_segments(a_segms, b_segms, distance=segment_iou, dist_thresh=1.0, + label_matcher=lambda a, b: a.label == b.label): + assert callable(distance), distance + assert callable(label_matcher), label_matcher + + a_segms.sort(key=lambda ann: 1 - ann.attributes.get('score', 1)) + b_segms.sort(key=lambda ann: 1 - ann.attributes.get('score', 1)) + + # a_matches: indices of b_segms matched to a bboxes + # b_matches: indices of a_segms matched to b bboxes + a_matches = -np.ones(len(a_segms), dtype=int) + b_matches = -np.ones(len(b_segms), dtype=int) + + distances = np.array([[distance(a, b) for b in b_segms] for a in a_segms]) + + # matches: boxes we succeeded to match completely + # mispred: boxes we succeeded to match, having label mismatch + matches = [] + mispred = [] + + for a_idx, a_segm in enumerate(a_segms): + if len(b_segms) == 0: + break + matched_b = -1 + max_dist = -1 + b_indices = np.argsort([not label_matcher(a_segm, b_segm) + for b_segm in b_segms], + kind='stable') # prioritize those with same label, keep score order + for b_idx in b_indices: + if 0 <= b_matches[b_idx]: # assign a_segm with max conf + continue + d = distances[a_idx, b_idx] + if d < dist_thresh or d <= max_dist: + continue + max_dist = d + matched_b = b_idx + + if matched_b < 0: + continue + a_matches[a_idx] = matched_b + b_matches[matched_b] = a_idx + + b_segm = b_segms[matched_b] + + if label_matcher(a_segm, b_segm): + matches.append( (a_segm, b_segm) ) + else: + mispred.append( (a_segm, b_segm) ) + + # *_umatched: boxes of (*) we failed to match + a_unmatched = [a_segms[i] for i, m in enumerate(a_matches) if m < 0] + b_unmatched = [b_segms[i] for i, m in enumerate(b_matches) if m < 0] + + return matches, mispred, a_unmatched, b_unmatched + +def mean_std(dataset): + """ + Computes unbiased mean and std. dev. for dataset images, channel-wise. + """ + # Use an online algorithm to: + # - handle different image sizes + # - avoid cancellation problem + if len(dataset) == 0: + return [0, 0, 0], [0, 0, 0] + + stats = np.empty((len(dataset), 2, 3), dtype=np.double) + counts = np.empty(len(dataset), dtype=np.uint32) + + mean = lambda i, s: s[i][0] + var = lambda i, s: s[i][1] + + for i, item in enumerate(dataset): + counts[i] = np.prod(item.image.size) + + image = item.image.data + if len(image.shape) == 2: + image = image[:, :, np.newaxis] + else: + image = image[:, :, :3] + # opencv is much faster than numpy here + cv2.meanStdDev(image.astype(np.double) / 255, + mean=mean(i, stats), stddev=var(i, stats)) + + # make variance unbiased + np.multiply(np.square(stats[:, 1]), + (counts / (counts - 1))[:, np.newaxis], + out=stats[:, 1]) + + _, mean, var = StatsCounter().compute_stats(stats, counts, mean, var) + return mean * 255, np.sqrt(var) * 255 + +class StatsCounter: + # Implements online parallel computation of sample variance + # https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Parallel_algorithm + + # Needed do avoid catastrophic cancellation in floating point computations + @staticmethod + def pairwise_stats(count_a, mean_a, var_a, count_b, mean_b, var_b): + delta = mean_b - mean_a + m_a = var_a * (count_a - 1) + m_b = var_b * (count_b - 1) + M2 = m_a + m_b + delta ** 2 * count_a * count_b / (count_a + count_b) + return ( + count_a + count_b, + mean_a * 0.5 + mean_b * 0.5, + M2 / (count_a + count_b - 1) + ) + + # stats = float array of shape N, 2 * d, d = dimensions of values + # count = integer array of shape N + # mean_accessor = function(idx, stats) to retrieve element mean + # variance_accessor = function(idx, stats) to retrieve element variance + # Recursively computes total count, mean and variance, does O(log(N)) calls + @staticmethod + def compute_stats(stats, counts, mean_accessor, variance_accessor): + m = mean_accessor + v = variance_accessor + n = len(stats) + if n == 1: + return counts[0], m(0, stats), v(0, stats) + if n == 2: + return __class__.pairwise_stats( + counts[0], m(0, stats), v(0, stats), + counts[1], m(1, stats), v(1, stats) + ) + h = n // 2 + return __class__.pairwise_stats( + *__class__.compute_stats(stats[:h], counts[:h], m, v), + *__class__.compute_stats(stats[h:], counts[h:], m, v) + ) + +def compute_image_statistics(dataset): + stats = { + 'dataset': {}, + 'subsets': {} + } + + def _extractor_stats(extractor): + available = True + for item in extractor: + if not (item.has_image and item.image.has_data): + available = False + log.warn("Item %s has no image. Image stats won't be computed", + item.id) + break + + stats = { + 'images count': len(extractor), + } + + if available: + mean, std = mean_std(extractor) + stats.update({ + 'image mean': [float(n) for n in mean[::-1]], + 'image std': [float(n) for n in std[::-1]], + }) + else: + stats.update({ + 'image mean': 'n/a', + 'image std': 'n/a', + }) + return stats + + stats['dataset'].update(_extractor_stats(dataset)) + + subsets = dataset.subsets() or [None] + if subsets and 0 < len([s for s in subsets if s]): + for subset_name in subsets: + stats['subsets'][subset_name] = _extractor_stats( + dataset.get_subset(subset_name)) + + return stats + +def compute_ann_statistics(dataset): + labels = dataset.categories().get(AnnotationType.label) + def get_label(ann): + return labels.items[ann.label].name if ann.label is not None else None + + stats = { + 'images count': len(dataset), + 'annotations count': 0, + 'unannotated images count': 0, + 'unannotated images': [], + 'annotations by type': { t.name: { + 'count': 0, + } for t in AnnotationType }, + 'annotations': {}, + } + by_type = stats['annotations by type'] + + attr_template = { + 'count': 0, + 'values count': 0, + 'values present': set(), + 'distribution': {}, # value -> (count, total%) + } + label_stat = { + 'count': 0, + 'distribution': { l.name: [0, 0] for l in labels.items + }, # label -> (count, total%) + + 'attributes': {}, + } + stats['annotations']['labels'] = label_stat + segm_stat = { + 'avg. area': 0, + 'area distribution': [], # a histogram with 10 bins + # (min, min+10%), ..., (min+90%, max) -> (count, total%) + + 'pixel distribution': { l.name: [0, 0] for l in labels.items + }, # label -> (count, total%) + } + stats['annotations']['segments'] = segm_stat + segm_areas = [] + pixel_dist = segm_stat['pixel distribution'] + total_pixels = 0 + + for item in dataset: + if len(item.annotations) == 0: + stats['unannotated images'].append(item.id) + continue + + for ann in item.annotations: + by_type[ann.type.name]['count'] += 1 + + if not hasattr(ann, 'label') or ann.label is None: + continue + + if ann.type in {AnnotationType.mask, + AnnotationType.polygon, AnnotationType.bbox}: + area = ann.get_area() + segm_areas.append(area) + pixel_dist[get_label(ann)][0] += int(area) + + label_stat['count'] += 1 + label_stat['distribution'][get_label(ann)][0] += 1 + + for name, value in ann.attributes.items(): + if name.lower() in { 'occluded', 'visibility', 'score', + 'id', 'track_id' }: + continue + attrs_stat = label_stat['attributes'].setdefault(name, + deepcopy(attr_template)) + attrs_stat['count'] += 1 + attrs_stat['values present'].add(str(value)) + attrs_stat['distribution'] \ + .setdefault(str(value), [0, 0])[0] += 1 + + stats['annotations count'] = sum(t['count'] for t in + stats['annotations by type'].values()) + stats['unannotated images count'] = len(stats['unannotated images']) + + for label_info in label_stat['distribution'].values(): + label_info[1] = label_info[0] / label_stat['count'] + + for label_attr in label_stat['attributes'].values(): + label_attr['values count'] = len(label_attr['values present']) + label_attr['values present'] = sorted(label_attr['values present']) + for attr_info in label_attr['distribution'].values(): + attr_info[1] = attr_info[0] / label_attr['count'] + + # numpy.sum might be faster, but could overflow with large datasets. + # Python's int can transparently mutate to be of indefinite precision (long) + total_pixels = sum(int(a) for a in segm_areas) + + segm_stat['avg. area'] = total_pixels / (len(segm_areas) or 1.0) + + for label_info in segm_stat['pixel distribution'].values(): + label_info[1] = label_info[0] / total_pixels + + if len(segm_areas) != 0: + hist, bins = np.histogram(segm_areas) + segm_stat['area distribution'] = [{ + 'min': float(bin_min), 'max': float(bin_max), + 'count': int(c), 'percent': int(c) / len(segm_areas) + } for c, (bin_min, bin_max) in zip(hist, zip(bins[:-1], bins[1:]))] + + return stats + +@attrs +class DistanceComparator: + iou_threshold = attrib(converter=float, default=0.5) + + @staticmethod + def match_datasets(a, b): + a_items = set((item.id, item.subset) for item in a) + b_items = set((item.id, item.subset) for item in b) + + matches = a_items & b_items + a_unmatched = a_items - b_items + b_unmatched = b_items - a_items + return matches, a_unmatched, b_unmatched + + @staticmethod + def match_classes(a, b): + a_label_cat = a.categories().get(AnnotationType.label, LabelCategories()) + b_label_cat = b.categories().get(AnnotationType.label, LabelCategories()) + + a_labels = set(c.name for c in a_label_cat) + b_labels = set(c.name for c in b_label_cat) + + matches = a_labels & b_labels + a_unmatched = a_labels - b_labels + b_unmatched = b_labels - a_labels + return matches, a_unmatched, b_unmatched + + def match_annotations(self, item_a, item_b): + return { t: self._match_ann_type(t, item_a, item_b) } + + def _match_ann_type(self, t, *args): + # pylint: disable=no-value-for-parameter + if t == AnnotationType.label: + return self.match_labels(*args) + elif t == AnnotationType.bbox: + return self.match_boxes(*args) + elif t == AnnotationType.polygon: + return self.match_polygons(*args) + elif t == AnnotationType.mask: + return self.match_masks(*args) + elif t == AnnotationType.points: + return self.match_points(*args) + elif t == AnnotationType.polyline: + return self.match_lines(*args) + # pylint: enable=no-value-for-parameter + else: + raise NotImplementedError("Unexpected annotation type %s" % t) + + @staticmethod + def _get_ann_type(t, item): + return get_ann_type(item.annotations, t) + + def match_labels(self, item_a, item_b): + a_labels = set(a.label for a in + self._get_ann_type(AnnotationType.label, item_a)) + b_labels = set(a.label for a in + self._get_ann_type(AnnotationType.label, item_b)) + + matches = a_labels & b_labels + a_unmatched = a_labels - b_labels + b_unmatched = b_labels - a_labels + return matches, a_unmatched, b_unmatched + + def _match_segments(self, t, item_a, item_b): + a_boxes = self._get_ann_type(t, item_a) + b_boxes = self._get_ann_type(t, item_b) + return match_segments(a_boxes, b_boxes, dist_thresh=self.iou_threshold) + + def match_polygons(self, item_a, item_b): + return self._match_segments(AnnotationType.polygon, item_a, item_b) + + def match_masks(self, item_a, item_b): + return self._match_segments(AnnotationType.mask, item_a, item_b) + + def match_boxes(self, item_a, item_b): + return self._match_segments(AnnotationType.bbox, item_a, item_b) + + def match_points(self, item_a, item_b): + a_points = self._get_ann_type(AnnotationType.points, item_a) + b_points = self._get_ann_type(AnnotationType.points, item_b) + + instance_map = {} + for s in [item_a.annotations, item_b.annotations]: + s_instances = find_instances(s) + for inst in s_instances: + inst_bbox = max_bbox(inst) + for ann in inst: + instance_map[id(ann)] = [inst, inst_bbox] + matcher = PointsMatcher(instance_map=instance_map) + + return match_segments(a_points, b_points, + dist_thresh=self.iou_threshold, distance=matcher.distance) + + def match_lines(self, item_a, item_b): + a_lines = self._get_ann_type(AnnotationType.polyline, item_a) + b_lines = self._get_ann_type(AnnotationType.polyline, item_b) + + matcher = LineMatcher() + + return match_segments(a_lines, b_lines, + dist_thresh=self.iou_threshold, distance=matcher.distance) + +def match_items_by_id(a, b): + a_items = set((item.id, item.subset) for item in a) + b_items = set((item.id, item.subset) for item in b) + + matches = a_items & b_items + matches = [([m], [m]) for m in matches] + a_unmatched = a_items - b_items + b_unmatched = b_items - a_items + return matches, a_unmatched, b_unmatched + +def match_items_by_image_hash(a, b): + def _hash(item): + if not item.image.has_data: + log.warning("Image (%s, %s) has no image " + "data, counted as unmatched", item.id, item.subset) + return None + return hashlib.md5(item.image.data.tobytes()).hexdigest() + + def _build_hashmap(source): + d = {} + for item in source: + h = _hash(item) + if h is None: + h = str(id(item)) # anything unique + d.setdefault(h, []).append((item.id, item.subset)) + return d + + a_hash = _build_hashmap(a) + b_hash = _build_hashmap(b) + + a_items = set(a_hash) + b_items = set(b_hash) + + matches = a_items & b_items + a_unmatched = a_items - b_items + b_unmatched = b_items - a_items + + matches = [(a_hash[h], b_hash[h]) for h in matches] + a_unmatched = set(i for h in a_unmatched for i in a_hash[h]) + b_unmatched = set(i for h in b_unmatched for i in b_hash[h]) + + return matches, a_unmatched, b_unmatched + +@attrs +class ExactComparator: + match_images = attrib(kw_only=True, type=bool, default=False) + ignored_fields = attrib(kw_only=True, + factory=set, validator=default_if_none(set)) + ignored_attrs = attrib(kw_only=True, + factory=set, validator=default_if_none(set)) + ignored_item_attrs = attrib(kw_only=True, + factory=set, validator=default_if_none(set)) + + _test = attrib(init=False, type=TestCase) + errors = attrib(init=False, type=list) + + def __attrs_post_init__(self): + self._test = TestCase() + self._test.maxDiff = None + + + def _match_items(self, a, b): + if self.match_images: + return match_items_by_image_hash(a, b) + else: + return match_items_by_id(a, b) + + def _compare_categories(self, a, b): + test = self._test + errors = self.errors + + try: + test.assertEqual( + sorted(a, key=lambda t: t.value), + sorted(b, key=lambda t: t.value) + ) + except AssertionError as e: + errors.append({'type': 'categories', 'message': str(e)}) + + if AnnotationType.label in a: + try: + test.assertEqual( + a[AnnotationType.label].items, + b[AnnotationType.label].items, + ) + except AssertionError as e: + errors.append({'type': 'labels', 'message': str(e)}) + if AnnotationType.mask in a: + try: + test.assertEqual( + a[AnnotationType.mask].colormap, + b[AnnotationType.mask].colormap, + ) + except AssertionError as e: + errors.append({'type': 'colormap', 'message': str(e)}) + if AnnotationType.points in a: + try: + test.assertEqual( + a[AnnotationType.points].items, + b[AnnotationType.points].items, + ) + except AssertionError as e: + errors.append({'type': 'points', 'message': str(e)}) + + def _compare_annotations(self, a, b): + ignored_fields = self.ignored_fields + ignored_attrs = self.ignored_attrs + + a_fields = { k: None for k in vars(a) if k in ignored_fields } + b_fields = { k: None for k in vars(b) if k in ignored_fields } + if 'attributes' not in ignored_fields: + a_fields['attributes'] = filter_dict(a.attributes, ignored_attrs) + b_fields['attributes'] = filter_dict(b.attributes, ignored_attrs) + + result = a.wrap(**a_fields) == b.wrap(**b_fields) + + return result + + def _compare_items(self, item_a, item_b): + test = self._test + + a_id = (item_a.id, item_a.subset) + b_id = (item_b.id, item_b.subset) + + matched = [] + unmatched = [] + errors = [] + + try: + test.assertEqual( + filter_dict(item_a.attributes, self.ignored_item_attrs), + filter_dict(item_b.attributes, self.ignored_item_attrs) + ) + except AssertionError as e: + errors.append({'type': 'item_attr', + 'a_item': a_id, 'b_item': b_id, 'message': str(e)}) + + b_annotations = item_b.annotations[:] + for ann_a in item_a.annotations: + ann_b_candidates = [x for x in item_b.annotations + if x.type == ann_a.type] + + ann_b = find(enumerate(self._compare_annotations(ann_a, x) + for x in ann_b_candidates), lambda x: x[1]) + if ann_b is None: + unmatched.append({ + 'item': a_id, 'source': 'a', 'ann': str(ann_a), + }) + continue + else: + ann_b = ann_b_candidates[ann_b[0]] + + b_annotations.remove(ann_b) # avoid repeats + matched.append({'a_item': a_id, 'b_item': b_id, + 'a': str(ann_a), 'b': str(ann_b)}) + + for ann_b in b_annotations: + unmatched.append({'item': b_id, 'source': 'b', 'ann': str(ann_b)}) + + return matched, unmatched, errors + + def compare_datasets(self, a, b): + self.errors = [] + errors = self.errors + + self._compare_categories(a.categories(), b.categories()) + + matched = [] + unmatched = [] + + matches, a_unmatched, b_unmatched = self._match_items(a, b) + + if a.categories().get(AnnotationType.label) != \ + b.categories().get(AnnotationType.label): + return matched, unmatched, a_unmatched, b_unmatched, errors + + _dist = lambda s: len(s[1]) + len(s[2]) + for a_ids, b_ids in matches: + # build distance matrix + match_status = {} # (a_id, b_id): [matched, unmatched, errors] + a_matches = { a_id: None for a_id in a_ids } + b_matches = { b_id: None for b_id in b_ids } + + for a_id in a_ids: + item_a = a.get(*a_id) + candidates = {} + + for b_id in b_ids: + item_b = b.get(*b_id) + + i_m, i_um, i_err = self._compare_items(item_a, item_b) + candidates[b_id] = [i_m, i_um, i_err] + + if len(i_um) == 0: + a_matches[a_id] = b_id + b_matches[b_id] = a_id + matched.extend(i_m) + errors.extend(i_err) + break + + match_status[a_id] = candidates + + # assign + for a_id in a_ids: + if len(b_ids) == 0: + break + + # find the closest, ignore already assigned + matched_b = a_matches[a_id] + if matched_b is not None: + continue + min_dist = -1 + for b_id in b_ids: + if b_matches[b_id] is not None: + continue + d = _dist(match_status[a_id][b_id]) + if d < min_dist and 0 <= min_dist: + continue + min_dist = d + matched_b = b_id + + if matched_b is None: + continue + a_matches[a_id] = matched_b + b_matches[matched_b] = a_id + + m = match_status[a_id][matched_b] + matched.extend(m[0]) + unmatched.extend(m[1]) + errors.extend(m[2]) + + a_unmatched |= set(a_id for a_id, m in a_matches.items() if not m) + b_unmatched |= set(b_id for b_id, m in b_matches.items() if not m) + + return matched, unmatched, a_unmatched, b_unmatched, errors \ No newline at end of file diff --git a/datumaro/components/project.py b/datumaro/components/project.py new file mode 100644 index 0000000000..07f8f01920 --- /dev/null +++ b/datumaro/components/project.py @@ -0,0 +1,850 @@ +# Copyright (C) 2019-2020 Intel Corporation +# +# SPDX-License-Identifier: MIT + +from collections import OrderedDict, defaultdict +from functools import reduce +from glob import glob +from typing import Iterable, Union, Dict, List +import git +import importlib +import inspect +import logging as log +import os +import os.path as osp +import shutil +import sys + +from datumaro.components.config import Config, DEFAULT_FORMAT +from datumaro.components.config_model import (Model, Source, + PROJECT_DEFAULT_CONFIG, PROJECT_SCHEMA) +from datumaro.components.extractor import Extractor, LabelCategories,\ + AnnotationType, DatasetItem +from datumaro.components.launcher import ModelTransform +from datumaro.components.dataset_filter import \ + XPathDatasetFilter, XPathAnnotationsFilter + + +def import_foreign_module(name, path, package=None): + module = None + default_path = sys.path.copy() + try: + sys.path = [ osp.abspath(path), ] + default_path + sys.modules.pop(name, None) # remove from cache + module = importlib.import_module(name, package=package) + sys.modules.pop(name) # remove from cache + except Exception: + raise + finally: + sys.path = default_path + return module + + +class Registry: + def __init__(self, config=None, item_type=None): + self.item_type = item_type + + self.items = {} + + if config is not None: + self.load(config) + + def load(self, config): + pass + + def register(self, name, value): + if self.item_type: + value = self.item_type(value) + self.items[name] = value + return value + + def unregister(self, name): + return self.items.pop(name, None) + + def get(self, key): + return self.items[key] # returns a class / ctor + + +class ModelRegistry(Registry): + def __init__(self, config=None): + super().__init__(config, item_type=Model) + + def load(self, config): + # TODO: list default dir, insert values + if 'models' in config: + for name, model in config.models.items(): + self.register(name, model) + + +class SourceRegistry(Registry): + def __init__(self, config=None): + super().__init__(config, item_type=Source) + + def load(self, config): + # TODO: list default dir, insert values + if 'sources' in config: + for name, source in config.sources.items(): + self.register(name, source) + +class PluginRegistry(Registry): + def __init__(self, config=None, builtin=None, local=None): + super().__init__(config) + + from datumaro.components.cli_plugin import CliPlugin + + if builtin is not None: + for v in builtin: + k = CliPlugin._get_name(v) + self.register(k, v) + if local is not None: + for v in local: + k = CliPlugin._get_name(v) + self.register(k, v) + +class GitWrapper: + def __init__(self, config=None): + self.repo = None + + if config is not None and config.project_dir: + self.init(config.project_dir) + + @staticmethod + def _git_dir(base_path): + return osp.join(base_path, '.git') + + @classmethod + def spawn(cls, path): + spawn = not osp.isdir(cls._git_dir(path)) + repo = git.Repo.init(path=path) + if spawn: + repo.config_writer().set_value("user", "name", "User") \ + .set_value("user", "email", "user@nowhere.com") \ + .release() + # gitpython does not support init, use git directly + repo.git.init() + repo.git.commit('-m', 'Initial commit', '--allow-empty') + return repo + + def init(self, path): + self.repo = self.spawn(path) + return self.repo + + def is_initialized(self): + return self.repo is not None + + def create_submodule(self, name, dst_dir, **kwargs): + self.repo.create_submodule(name, dst_dir, **kwargs) + + def has_submodule(self, name): + return name in [submodule.name for submodule in self.repo.submodules] + + def remove_submodule(self, name, **kwargs): + return self.repo.submodule(name).remove(**kwargs) + +def load_project_as_dataset(url): + # symbol forward declaration + raise NotImplementedError() + +class Environment: + _builtin_plugins = None + PROJECT_EXTRACTOR_NAME = 'datumaro_project' + + def __init__(self, config=None): + config = Config(config, + fallback=PROJECT_DEFAULT_CONFIG, schema=PROJECT_SCHEMA) + + self.models = ModelRegistry(config) + self.sources = SourceRegistry(config) + + self.git = GitWrapper(config) + + env_dir = osp.join(config.project_dir, config.env_dir) + builtin = self._load_builtin_plugins() + custom = self._load_plugins2(osp.join(env_dir, config.plugins_dir)) + select = lambda seq, t: [e for e in seq if issubclass(e, t)] + from datumaro.components.extractor import Transform + from datumaro.components.extractor import SourceExtractor + from datumaro.components.extractor import Importer + from datumaro.components.converter import Converter + from datumaro.components.launcher import Launcher + self.extractors = PluginRegistry( + builtin=select(builtin, SourceExtractor), + local=select(custom, SourceExtractor) + ) + self.extractors.register(self.PROJECT_EXTRACTOR_NAME, + load_project_as_dataset) + + self.importers = PluginRegistry( + builtin=select(builtin, Importer), + local=select(custom, Importer) + ) + self.launchers = PluginRegistry( + builtin=select(builtin, Launcher), + local=select(custom, Launcher) + ) + self.converters = PluginRegistry( + builtin=select(builtin, Converter), + local=select(custom, Converter) + ) + self.transforms = PluginRegistry( + builtin=select(builtin, Transform), + local=select(custom, Transform) + ) + + @staticmethod + def _find_plugins(plugins_dir): + plugins = [] + if not osp.exists(plugins_dir): + return plugins + + for plugin_name in os.listdir(plugins_dir): + p = osp.join(plugins_dir, plugin_name) + if osp.isfile(p) and p.endswith('.py'): + plugins.append((plugins_dir, plugin_name, None)) + elif osp.isdir(p): + plugins += [(plugins_dir, + osp.splitext(plugin_name)[0] + '.' + osp.basename(p), + osp.splitext(plugin_name)[0] + ) + for p in glob(osp.join(p, '*.py'))] + return plugins + + @classmethod + def _import_module(cls, module_dir, module_name, types, package=None): + module = import_foreign_module(osp.splitext(module_name)[0], module_dir, + package=package) + + exports = [] + if hasattr(module, 'exports'): + exports = module.exports + else: + for symbol in dir(module): + if symbol.startswith('_'): + continue + exports.append(getattr(module, symbol)) + + exports = [s for s in exports + if inspect.isclass(s) and issubclass(s, types) and not s in types] + + return exports + + @classmethod + def _load_plugins(cls, plugins_dir, types): + types = tuple(types) + + plugins = cls._find_plugins(plugins_dir) + + all_exports = [] + for module_dir, module_name, package in plugins: + try: + exports = cls._import_module(module_dir, module_name, types, + package) + except Exception as e: + module_search_error = ImportError + try: + module_search_error = ModuleNotFoundError # python 3.6+ + except NameError: + pass + + message = ["Failed to import module '%s': %s", module_name, e] + if isinstance(e, module_search_error): + log.debug(*message) + else: + log.warning(*message) + continue + + log.debug("Imported the following symbols from %s: %s" % \ + ( + module_name, + ', '.join(s.__name__ for s in exports) + ) + ) + all_exports.extend(exports) + + return all_exports + + @classmethod + def _load_builtin_plugins(cls): + if not cls._builtin_plugins: + plugins_dir = osp.join( + __file__[: __file__.rfind(osp.join('datumaro', 'components'))], + osp.join('datumaro', 'plugins') + ) + assert osp.isdir(plugins_dir), plugins_dir + cls._builtin_plugins = cls._load_plugins2(plugins_dir) + return cls._builtin_plugins + + @classmethod + def _load_plugins2(cls, plugins_dir): + from datumaro.components.extractor import Transform + from datumaro.components.extractor import SourceExtractor + from datumaro.components.extractor import Importer + from datumaro.components.converter import Converter + from datumaro.components.launcher import Launcher + types = [SourceExtractor, Converter, Importer, Launcher, Transform] + + return cls._load_plugins(plugins_dir, types) + + def make_extractor(self, name, *args, **kwargs): + return self.extractors.get(name)(*args, **kwargs) + + def make_importer(self, name, *args, **kwargs): + return self.importers.get(name)(*args, **kwargs) + + def make_launcher(self, name, *args, **kwargs): + return self.launchers.get(name)(*args, **kwargs) + + def make_converter(self, name, *args, **kwargs): + return self.converters.get(name)(*args, **kwargs) + + def register_model(self, name, model): + self.models.register(name, model) + + def unregister_model(self, name): + self.models.unregister(name) + + +class Dataset(Extractor): + class Subset(Extractor): + def __init__(self, parent): + self.parent = parent + self.items = OrderedDict() + + def __iter__(self): + yield from self.items.values() + + def __len__(self): + return len(self.items) + + def categories(self): + return self.parent.categories() + + @classmethod + def from_iterable(cls, iterable: Iterable[DatasetItem], + categories: Union[Dict, List[str]] = None): + if isinstance(categories, list): + categories = { AnnotationType.label: + LabelCategories.from_iterable(categories) + } + + if not categories: + categories = {} + + class _extractor(Extractor): + def __iter__(self): + return iter(iterable) + + def categories(self): + return categories + + return cls.from_extractors(_extractor()) + + @classmethod + def from_extractors(cls, *sources): + categories = cls._merge_categories(s.categories() for s in sources) + dataset = Dataset(categories=categories) + + # merge items + subsets = defaultdict(lambda: cls.Subset(dataset)) + for source in sources: + for item in source: + existing_item = subsets[item.subset].items.get(item.id) + if existing_item is not None: + path = existing_item.path + if item.path != path: + path = None + item = cls._merge_items(existing_item, item, path=path) + + subsets[item.subset].items[item.id] = item + + dataset._subsets = dict(subsets) + return dataset + + def __init__(self, categories=None): + super().__init__() + + self._subsets = {} + + if not categories: + categories = {} + self._categories = categories + + def __iter__(self): + for subset in self._subsets.values(): + for item in subset: + yield item + + def __len__(self): + if self._length is None: + self._length = reduce(lambda s, x: s + len(x), + self._subsets.values(), 0) + return self._length + + def get_subset(self, name): + return self._subsets[name] + + def subsets(self): + return list(self._subsets) + + def categories(self): + return self._categories + + def get(self, item_id, subset=None, path=None): + if path: + raise KeyError("Requested dataset item path is not found") + item_id = str(item_id) + subset = subset or '' + subset = self._subsets[subset] + return subset.items[item_id] + + def put(self, item, item_id=None, subset=None, path=None): + if path: + raise KeyError("Requested dataset item path is not found") + + if item_id is None: + item_id = item.id + if subset is None: + subset = item.subset + + item = item.wrap(id=item_id, subset=subset, path=None) + if subset not in self._subsets: + self._subsets[subset] = self.Subset(self) + self._subsets[subset].items[item_id] = item + self._length = None + + return item + + def filter(self, expr, filter_annotations=False, remove_empty=False): + if filter_annotations: + return self.transform(XPathAnnotationsFilter, expr, remove_empty) + else: + return self.transform(XPathDatasetFilter, expr) + + def update(self, items): + for item in items: + self.put(item) + return self + + def define_categories(self, categories): + assert not self._categories + self._categories = categories + + @staticmethod + def _lazy_image(item): + # NOTE: avoid https://docs.python.org/3/faq/programming.html#why-do-lambdas-defined-in-a-loop-with-different-values-all-return-the-same-result + return lambda: item.image + + @classmethod + def _merge_items(cls, existing_item, current_item, path=None): + return existing_item.wrap(path=path, + image=cls._merge_images(existing_item, current_item), + annotations=cls._merge_anno( + existing_item.annotations, current_item.annotations)) + + @staticmethod + def _merge_images(existing_item, current_item): + image = None + if existing_item.has_image and current_item.has_image: + if existing_item.image.has_data: + image = existing_item.image + else: + image = current_item.image + + if existing_item.image.path != current_item.image.path: + if not existing_item.image.path: + image._path = current_item.image.path + + if all([existing_item.image._size, current_item.image._size]): + assert existing_item.image._size == current_item.image._size, "Image info differs for item '%s'" % existing_item.id + elif existing_item.image._size: + image._size = existing_item.image._size + else: + image._size = current_item.image._size + elif existing_item.has_image: + image = existing_item.image + else: + image = current_item.image + + return image + + @staticmethod + def _merge_anno(a, b): + # TODO: implement properly with merging and annotations remapping + from .operations import merge_annotations_equal + return merge_annotations_equal(a, b) + + @staticmethod + def _merge_categories(sources): + # TODO: implement properly with merging and annotations remapping + from .operations import merge_categories + return merge_categories(sources) + +class ProjectDataset(Dataset): + def __init__(self, project): + super().__init__() + + self._project = project + config = self.config + env = self.env + + sources = {} + for s_name, source in config.sources.items(): + s_format = source.format or env.PROJECT_EXTRACTOR_NAME + options = {} + options.update(source.options) + + url = source.url + if not source.url: + url = osp.join(config.project_dir, config.sources_dir, s_name) + sources[s_name] = env.make_extractor(s_format, url, **options) + self._sources = sources + + own_source = None + own_source_dir = osp.join(config.project_dir, config.dataset_dir) + if config.project_dir and osp.isdir(own_source_dir): + log.disable(log.INFO) + own_source = env.make_importer(DEFAULT_FORMAT)(own_source_dir) \ + .make_dataset() + log.disable(log.NOTSET) + + # merge categories + # TODO: implement properly with merging and annotations remapping + categories = self._merge_categories(s.categories() + for s in self._sources.values()) + # ovewrite with own categories + if own_source is not None and (not categories or len(own_source) != 0): + categories.update(own_source.categories()) + self._categories = categories + + # merge items + subsets = defaultdict(lambda: self.Subset(self)) + for source_name, source in self._sources.items(): + log.debug("Loading '%s' source contents..." % source_name) + for item in source: + existing_item = subsets[item.subset].items.get(item.id) + if existing_item is not None: + path = existing_item.path + if item.path != path: + path = None # NOTE: move to our own dataset + item = self._merge_items(existing_item, item, path=path) + else: + s_config = config.sources[source_name] + if s_config and \ + s_config.format != env.PROJECT_EXTRACTOR_NAME: + # NOTE: consider imported sources as our own dataset + path = None + else: + path = [source_name] + (item.path or []) + item = item.wrap(path=path) + + subsets[item.subset].items[item.id] = item + + # override with our items, fallback to existing images + if own_source is not None: + log.debug("Loading own dataset...") + for item in own_source: + existing_item = subsets[item.subset].items.get(item.id) + if existing_item is not None: + item = item.wrap(path=None, + image=self._merge_images(existing_item, item)) + + subsets[item.subset].items[item.id] = item + + # TODO: implement subset remapping when needed + subsets_filter = config.subsets + if len(subsets_filter) != 0: + subsets = { k: v for k, v in subsets.items() if k in subsets_filter} + self._subsets = dict(subsets) + + self._length = None + + def iterate_own(self): + return self.select(lambda item: not item.path) + + def get(self, item_id, subset=None, path=None): + if path: + source = path[0] + rest_path = path[1:] + return self._sources[source].get( + item_id=item_id, subset=subset, path=rest_path) + return super().get(item_id, subset) + + def put(self, item, item_id=None, subset=None, path=None): + if path is None: + path = item.path + + if path: + source = path[0] + rest_path = path[1:] + # TODO: reverse remapping + self._sources[source].put(item, + item_id=item_id, subset=subset, path=rest_path) + + if item_id is None: + item_id = item.id + if subset is None: + subset = item.subset + + item = item.wrap(path=path) + if subset not in self._subsets: + self._subsets[subset] = self.Subset(self) + self._subsets[subset].items[item_id] = item + self._length = None + + return item + + def save(self, save_dir=None, merge=False, recursive=True, + save_images=False): + if save_dir is None: + assert self.config.project_dir + save_dir = self.config.project_dir + project = self._project + else: + merge = True + + if merge: + project = Project(Config(self.config)) + project.config.remove('sources') + + save_dir = osp.abspath(save_dir) + dataset_save_dir = osp.join(save_dir, project.config.dataset_dir) + + converter_kwargs = { + 'save_images': save_images, + } + + save_dir_existed = osp.exists(save_dir) + try: + os.makedirs(save_dir, exist_ok=True) + os.makedirs(dataset_save_dir, exist_ok=True) + + if merge: + # merge and save the resulting dataset + self.env.converters.get(DEFAULT_FORMAT).convert( + self, dataset_save_dir, **converter_kwargs) + else: + if recursive: + # children items should already be updated + # so we just save them recursively + for source in self._sources.values(): + if isinstance(source, ProjectDataset): + source.save(**converter_kwargs) + + self.env.converters.get(DEFAULT_FORMAT).convert( + self.iterate_own(), dataset_save_dir, **converter_kwargs) + + project.save(save_dir) + except BaseException: + if not save_dir_existed and osp.isdir(save_dir): + shutil.rmtree(save_dir, ignore_errors=True) + raise + + @property + def env(self): + return self._project.env + + @property + def config(self): + return self._project.config + + @property + def sources(self): + return self._sources + + def _save_branch_project(self, extractor, save_dir=None): + extractor = Dataset.from_extractors(extractor) # apply lazy transforms + + # NOTE: probably this function should be in the ViewModel layer + save_dir = osp.abspath(save_dir) + if save_dir: + dst_project = Project() + else: + if not self.config.project_dir: + raise Exception("Either a save directory or a project " + "directory should be specified") + save_dir = self.config.project_dir + + dst_project = Project(Config(self.config)) + dst_project.config.remove('project_dir') + dst_project.config.remove('sources') + dst_project.config.project_name = osp.basename(save_dir) + + dst_dataset = dst_project.make_dataset() + dst_dataset.define_categories(extractor.categories()) + dst_dataset.update(extractor) + + dst_dataset.save(save_dir=save_dir, merge=True) + + def transform_project(self, method, save_dir=None, **method_kwargs): + # NOTE: probably this function should be in the ViewModel layer + if isinstance(method, str): + method = self.env.make_transform(method) + + transformed = self.transform(method, **method_kwargs) + self._save_branch_project(transformed, save_dir=save_dir) + + def apply_model(self, model, save_dir=None, batch_size=1): + # NOTE: probably this function should be in the ViewModel layer + if isinstance(model, str): + launcher = self._project.make_executable_model(model) + + self.transform_project(ModelTransform, launcher=launcher, + save_dir=save_dir, batch_size=batch_size) + + def export_project(self, save_dir, converter, + filter_expr=None, filter_annotations=False, remove_empty=False): + # NOTE: probably this function should be in the ViewModel layer + dataset = self + if filter_expr: + dataset = dataset.filter(filter_expr, + filter_annotations=filter_annotations, + remove_empty=remove_empty) + + save_dir = osp.abspath(save_dir) + save_dir_existed = osp.exists(save_dir) + try: + os.makedirs(save_dir, exist_ok=True) + converter(dataset, save_dir) + except BaseException: + if not save_dir_existed: + shutil.rmtree(save_dir) + raise + + def filter_project(self, filter_expr, filter_annotations=False, + save_dir=None, remove_empty=False): + # NOTE: probably this function should be in the ViewModel layer + dataset = self + if filter_expr: + dataset = dataset.filter(filter_expr, + filter_annotations=filter_annotations, + remove_empty=remove_empty) + self._save_branch_project(dataset, save_dir=save_dir) + +class Project: + @classmethod + def load(cls, path): + path = osp.abspath(path) + config_path = osp.join(path, PROJECT_DEFAULT_CONFIG.env_dir, + PROJECT_DEFAULT_CONFIG.project_filename) + config = Config.parse(config_path) + config.project_dir = path + config.project_filename = osp.basename(config_path) + return Project(config) + + def save(self, save_dir=None): + config = self.config + + if save_dir is None: + assert config.project_dir + project_dir = config.project_dir + else: + project_dir = save_dir + + env_dir = osp.join(project_dir, config.env_dir) + save_dir = osp.abspath(env_dir) + + project_dir_existed = osp.exists(project_dir) + env_dir_existed = osp.exists(env_dir) + try: + os.makedirs(save_dir, exist_ok=True) + + config_path = osp.join(save_dir, config.project_filename) + config.dump(config_path) + except BaseException: + if not env_dir_existed: + shutil.rmtree(save_dir, ignore_errors=True) + if not project_dir_existed: + shutil.rmtree(project_dir, ignore_errors=True) + raise + + @staticmethod + def generate(save_dir, config=None): + config = Config(config) + config.project_dir = save_dir + project = Project(config) + project.save(save_dir) + return project + + @staticmethod + def import_from(path, dataset_format, env=None, **kwargs): + if env is None: + env = Environment() + importer = env.make_importer(dataset_format) + return importer(path, **kwargs) + + def __init__(self, config=None): + self.config = Config(config, + fallback=PROJECT_DEFAULT_CONFIG, schema=PROJECT_SCHEMA) + self.env = Environment(self.config) + + def make_dataset(self): + return ProjectDataset(self) + + def add_source(self, name, value=None): + if value is None or isinstance(value, (dict, Config)): + value = Source(value) + self.config.sources[name] = value + self.env.sources.register(name, value) + + def remove_source(self, name): + self.config.sources.remove(name) + self.env.sources.unregister(name) + + def get_source(self, name): + try: + return self.config.sources[name] + except KeyError: + raise KeyError("Source '%s' is not found" % name) + + def get_subsets(self): + return self.config.subsets + + def set_subsets(self, value): + if not value: + self.config.remove('subsets') + else: + self.config.subsets = value + + def add_model(self, name, value=None): + if value is None or isinstance(value, (dict, Config)): + value = Model(value) + self.env.register_model(name, value) + self.config.models[name] = value + + def get_model(self, name): + try: + return self.env.models.get(name) + except KeyError: + raise KeyError("Model '%s' is not found" % name) + + def remove_model(self, name): + self.config.models.remove(name) + self.env.unregister_model(name) + + def make_executable_model(self, name): + model = self.get_model(name) + return self.env.make_launcher(model.launcher, + **model.options, model_dir=self.local_model_dir(name)) + + def make_source_project(self, name): + source = self.get_source(name) + + config = Config(self.config) + config.remove('sources') + config.remove('subsets') + project = Project(config) + project.add_source(name, source) + return project + + def local_model_dir(self, model_name): + return osp.join( + self.config.env_dir, self.config.models_dir, model_name) + + def local_source_dir(self, source_name): + return osp.join(self.config.sources_dir, source_name) + +# pylint: disable=function-redefined +def load_project_as_dataset(url): + # implement the function declared above + return Project.load(url).make_dataset() +# pylint: enable=function-redefined diff --git a/datumaro/plugins/__init__.py b/datumaro/plugins/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/datumaro/plugins/accuracy_checker_plugin/__init__.py b/datumaro/plugins/accuracy_checker_plugin/__init__.py new file mode 100644 index 0000000000..fdd6d29179 --- /dev/null +++ b/datumaro/plugins/accuracy_checker_plugin/__init__.py @@ -0,0 +1,4 @@ +# Copyright (C) 2020 Intel Corporation +# +# SPDX-License-Identifier: MIT + diff --git a/datumaro/plugins/accuracy_checker_plugin/details/ac.py b/datumaro/plugins/accuracy_checker_plugin/details/ac.py new file mode 100644 index 0000000000..4fc2ffb5c6 --- /dev/null +++ b/datumaro/plugins/accuracy_checker_plugin/details/ac.py @@ -0,0 +1,116 @@ + +# Copyright (C) 2020 Intel Corporation +# +# SPDX-License-Identifier: MIT + +from datumaro.util.tf_util import import_tf +import_tf() # prevent TF loading and potential interpeter crash + +from itertools import groupby + +from accuracy_checker.adapters import create_adapter +from accuracy_checker.data_readers import DataRepresentation +from accuracy_checker.launcher import InputFeeder, create_launcher +from accuracy_checker.postprocessor import PostprocessingExecutor +from accuracy_checker.preprocessor import PreprocessingExecutor +from accuracy_checker.utils import extract_image_representations + +from datumaro.components.extractor import AnnotationType, LabelCategories + +from .representation import import_predictions + + +class _FakeDataset: + def __init__(self, metadata=None): + self.metadata = metadata or {} + +class GenericAcLauncher: + @staticmethod + def from_config(config): + launcher_config = config['launcher'] + launcher = create_launcher(launcher_config) + + dataset = _FakeDataset() + adapter_config = config.get('adapter') or launcher_config.get('adapter') + label_config = adapter_config.get('labels') \ + if isinstance(adapter_config, dict) else None + if label_config: + assert isinstance(label_config, (list, dict)) + if isinstance(label_config, list): + label_config = dict(enumerate(label_config)) + + dataset.metadata = {'label_map': { + int(key): label for key, label in label_config.items() + }} + adapter = create_adapter(adapter_config, launcher, dataset) + + preproc_config = config.get('preprocessing') + preproc = None + if preproc_config: + preproc = PreprocessingExecutor(preproc_config, + dataset_meta=dataset.metadata, + input_shapes=launcher.inputs_info_for_meta() + ) + + postproc_config = config.get('postprocessing') + postproc = None + if postproc_config: + postproc = PostprocessingExecutor(postproc_config, + dataset_meta=dataset.metadata, + ) + + return __class__(launcher, + adapter=adapter, preproc=preproc, postproc=postproc) + + def __init__(self, launcher, adapter=None, + preproc=None, postproc=None, input_feeder=None): + self._launcher = launcher + self._input_feeder = input_feeder or InputFeeder( + launcher.config.get('inputs', []), launcher.inputs, + launcher.fit_to_input, launcher.default_layout + ) + self._adapter = adapter + self._preproc = preproc + self._postproc = postproc + + self._categories = self._init_categories() + + def launch_raw(self, inputs): + ids = range(len(inputs)) + inputs = [DataRepresentation(inp, identifier=id) + for id, inp in zip(ids, inputs)] + _, batch_meta = extract_image_representations(inputs) + + if self._preproc: + inputs = self._preproc.process(inputs) + + inputs = self._input_feeder.fill_inputs(inputs) + outputs = self._launcher.predict(inputs, batch_meta) + + if self._adapter: + outputs = self._adapter.process(outputs, ids, batch_meta) + + if self._postproc: + outputs = self._postproc.process(outputs) + + return outputs + + def launch(self, inputs): + outputs = self.launch_raw(inputs) + return [import_predictions(g) for _, g in + groupby(outputs, key=lambda o: o.identifier)] + + def categories(self): + return self._categories + + def _init_categories(self): + if self._adapter is None or self._adapter.label_map is None: + return None + + label_map = sorted(self._adapter.label_map.items(), key=lambda e: e[0]) + + label_cat = LabelCategories() + for _, label in label_map: + label_cat.add(label) + + return { AnnotationType.label: label_cat } diff --git a/datumaro/plugins/accuracy_checker_plugin/details/representation.py b/datumaro/plugins/accuracy_checker_plugin/details/representation.py new file mode 100644 index 0000000000..d7007806bf --- /dev/null +++ b/datumaro/plugins/accuracy_checker_plugin/details/representation.py @@ -0,0 +1,62 @@ + +# Copyright (C) 2020 Intel Corporation +# +# SPDX-License-Identifier: MIT + +from datumaro.util.tf_util import import_tf +import_tf() # prevent TF loading and potential interpeter crash + +import accuracy_checker.representation as ac + +import datumaro.components.extractor as dm +from datumaro.util.annotation_util import softmax + +def import_predictions(predictions): + # Convert Accuracy checker predictions to Datumaro annotations + + anns = [] + + for pred in predictions: + anns.extend(import_prediction(pred)) + + return anns + +def import_prediction(pred): + if isinstance(pred, ac.ClassificationPrediction): + scores = softmax(pred.scores) + return (dm.Label(label_id, attributes={'score': float(score)}) + for label_id, score in enumerate(scores)) + elif isinstance(pred, ac.ArgMaxClassificationPrediction): + return (dm.Label(int(pred.label)), ) + elif isinstance(pred, ac.CharacterRecognitionPrediction): + return (dm.Label(int(pred.label)), ) + elif isinstance(pred, (ac.DetectionPrediction, ac.ActionDetectionPrediction)): + return (dm.Bbox(x0, y0, x1 - x0, y1 - y0, int(label_id), + attributes={'score': float(score)}) + for label, score, x0, y0, x1, y1 in zip(pred.labels, pred.scores, + pred.x_mins, pred.y_mins, pred.x_maxs, pred.y_maxs) + ) + elif isinstance(pred, ac.DepthEstimationPrediction): + return (dm.Mask(pred.depth_map), ) # 2d floating point mask + # elif isinstance(pred, ac.HitRatioPrediction): + # - + elif isinstance(pred, ac.ImageInpaintingPrediction): + return (dm.Mask(pred.value), ) # an image + # elif isinstance(pred, ac.MultiLabelRecognitionPrediction): + # - + # elif isinstance(pred, ac.MachineTranslationPrediction): + # - + # elif isinstance(pred, ac.QuestionAnsweringPrediction): + # - + # elif isinstance(pred, ac.PoseEstimation3dPrediction): + # - + # elif isinstance(pred, ac.PoseEstimationPrediction): + # - + # elif isinstance(pred, ac.RegressionPrediction): + # - + else: + raise NotImplementedError("Can't convert %s" % type(pred)) + + + + diff --git a/datumaro/plugins/accuracy_checker_plugin/launcher.py b/datumaro/plugins/accuracy_checker_plugin/launcher.py new file mode 100644 index 0000000000..1525110830 --- /dev/null +++ b/datumaro/plugins/accuracy_checker_plugin/launcher.py @@ -0,0 +1,37 @@ + +# Copyright (C) 2020 Intel Corporation +# +# SPDX-License-Identifier: MIT + +import os.path as osp +import yaml + +from datumaro.components.cli_plugin import CliPlugin +from datumaro.components.launcher import Launcher + +from .details.ac import GenericAcLauncher as _GenericAcLauncher + + +class AcLauncher(Launcher, CliPlugin): + """ + Generic model launcher with Accuracy Checker backend. + """ + + @classmethod + def build_cmdline_parser(cls, **kwargs): + parser = super().build_cmdline_parser(**kwargs) + parser.add_argument('-c', '--config', type=osp.abspath, required=True, + help="Path to the launcher configuration file (.yml)") + return parser + + def __init__(self, config, model_dir=None): + model_dir = model_dir or '' + with open(osp.join(model_dir, config), 'r') as f: + config = yaml.safe_load(f) + self._launcher = _GenericAcLauncher.from_config(config) + + def launch(self, inputs): + return self._launcher.launch(inputs) + + def categories(self): + return self._launcher.categories() diff --git a/datumaro/plugins/coco_format/__init__.py b/datumaro/plugins/coco_format/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/datumaro/plugins/coco_format/converter.py b/datumaro/plugins/coco_format/converter.py new file mode 100644 index 0000000000..27cdd08754 --- /dev/null +++ b/datumaro/plugins/coco_format/converter.py @@ -0,0 +1,596 @@ + +# Copyright (C) 2020 Intel Corporation +# +# SPDX-License-Identifier: MIT + +import json +import logging as log +import os +import os.path as osp +from enum import Enum +from itertools import groupby + +import pycocotools.mask as mask_utils + +import datumaro.util.annotation_util as anno_tools +import datumaro.util.mask_tools as mask_tools +from datumaro.components.converter import Converter +from datumaro.components.extractor import (_COORDINATE_ROUNDING_DIGITS, + DEFAULT_SUBSET_NAME, AnnotationType, Points) +from datumaro.util import cast, find, str_to_bool + +from .format import CocoPath, CocoTask + +SegmentationMode = Enum('SegmentationMode', ['guess', 'polygons', 'mask']) + +class _TaskConverter: + def __init__(self, context): + self._min_ann_id = 1 + self._context = context + + data = { + 'licenses': [], + 'info': {}, + 'categories': [], + 'images': [], + 'annotations': [] + } + + data['licenses'].append({ + 'name': '', + 'id': 0, + 'url': '' + }) + + data['info'] = { + 'contributor': '', + 'date_created': '', + 'description': '', + 'url': '', + 'version': '', + 'year': '' + } + self._data = data + + def is_empty(self): + return len(self._data['annotations']) == 0 + + def _get_image_id(self, item): + return self._context._get_image_id(item) + + def save_image_info(self, item, filename): + if item.has_image: + h, w = item.image.size + else: + h = 0 + w = 0 + + self._data['images'].append({ + 'id': self._get_image_id(item), + 'width': int(w), + 'height': int(h), + 'file_name': cast(filename, str, ''), + 'license': 0, + 'flickr_url': '', + 'coco_url': '', + 'date_captured': 0, + }) + + def save_categories(self, dataset): + raise NotImplementedError() + + def save_annotations(self, item): + raise NotImplementedError() + + def write(self, path): + next_id = self._min_ann_id + for ann in self.annotations: + if ann['id'] is None: + ann['id'] = next_id + next_id += 1 + + with open(path, 'w') as outfile: + json.dump(self._data, outfile) + + @property + def annotations(self): + return self._data['annotations'] + + @property + def categories(self): + return self._data['categories'] + + def _get_ann_id(self, annotation): + ann_id = annotation.id + if ann_id: + self._min_ann_id = max(ann_id, self._min_ann_id) + return ann_id + + @staticmethod + def _convert_attributes(ann): + return { k: v for k, v in ann.attributes.items() + if k not in {'is_crowd', 'score'} + } + +class _ImageInfoConverter(_TaskConverter): + def is_empty(self): + return len(self._data['images']) == 0 + + def save_categories(self, dataset): + pass + + def save_annotations(self, item): + pass + +class _CaptionsConverter(_TaskConverter): + def save_categories(self, dataset): + pass + + def save_annotations(self, item): + for ann_idx, ann in enumerate(item.annotations): + if ann.type != AnnotationType.caption: + continue + + elem = { + 'id': self._get_ann_id(ann), + 'image_id': self._get_image_id(item), + 'category_id': 0, # NOTE: workaround for a bug in cocoapi + 'caption': ann.caption, + } + if 'score' in ann.attributes: + try: + elem['score'] = float(ann.attributes['score']) + except Exception as e: + log.warning("Item '%s', ann #%s: failed to convert " + "attribute 'score': %e" % (item.id, ann_idx, e)) + if self._context._allow_attributes: + elem['attributes'] = self._convert_attributes(ann) + + self.annotations.append(elem) + +class _InstancesConverter(_TaskConverter): + def save_categories(self, dataset): + label_categories = dataset.categories().get(AnnotationType.label) + if label_categories is None: + return + + for idx, cat in enumerate(label_categories.items): + self.categories.append({ + 'id': 1 + idx, + 'name': cast(cat.name, str, ''), + 'supercategory': cast(cat.parent, str, ''), + }) + + @classmethod + def crop_segments(cls, instances, img_width, img_height): + instances = sorted(instances, key=lambda x: x[0].z_order) + + segment_map = [] + segments = [] + for inst_idx, (_, polygons, mask, _) in enumerate(instances): + if polygons: + segment_map.extend(inst_idx for p in polygons) + segments.extend(polygons) + elif mask is not None: + segment_map.append(inst_idx) + segments.append(mask) + + segments = mask_tools.crop_covered_segments( + segments, img_width, img_height) + + for inst_idx, inst in enumerate(instances): + new_segments = [s for si_id, s in zip(segment_map, segments) + if si_id == inst_idx] + + if not new_segments: + inst[1] = [] + inst[2] = None + continue + + if inst[1]: + inst[1] = sum(new_segments, []) + else: + mask = mask_tools.merge_masks(new_segments) + inst[2] = mask_tools.mask_to_rle(mask) + + return instances + + def find_instance_parts(self, group, img_width, img_height): + boxes = [a for a in group if a.type == AnnotationType.bbox] + polygons = [a for a in group if a.type == AnnotationType.polygon] + masks = [a for a in group if a.type == AnnotationType.mask] + + anns = boxes + polygons + masks + leader = anno_tools.find_group_leader(anns) + bbox = anno_tools.max_bbox(anns) + mask = None + polygons = [p.points for p in polygons] + + if self._context._segmentation_mode == SegmentationMode.guess: + use_masks = True == leader.attributes.get('is_crowd', + find(masks, lambda x: x.label == leader.label) is not None) + elif self._context._segmentation_mode == SegmentationMode.polygons: + use_masks = False + elif self._context._segmentation_mode == SegmentationMode.mask: + use_masks = True + else: + raise NotImplementedError("Unexpected segmentation mode '%s'" % \ + self._context._segmentation_mode) + + if use_masks: + if polygons: + mask = mask_tools.rles_to_mask(polygons, img_width, img_height) + + if masks: + if mask is not None: + masks += [mask] + mask = mask_tools.merge_masks([m.image for m in masks]) + + if mask is not None: + mask = mask_tools.mask_to_rle(mask) + polygons = [] + else: + if masks: + mask = mask_tools.merge_masks([m.image for m in masks]) + polygons += mask_tools.mask_to_polygons(mask) + mask = None + + return [leader, polygons, mask, bbox] + + @staticmethod + def find_instance_anns(annotations): + return [a for a in annotations + if a.type in { AnnotationType.bbox, + AnnotationType.polygon, AnnotationType.mask } + ] + + @classmethod + def find_instances(cls, annotations): + return anno_tools.find_instances(cls.find_instance_anns(annotations)) + + def save_annotations(self, item): + instances = self.find_instances(item.annotations) + if not instances: + return + + if not item.has_image: + log.warn("Item '%s': skipping writing instances " + "since no image info available" % item.id) + return + h, w = item.image.size + instances = [self.find_instance_parts(i, w, h) for i in instances] + + if self._context._crop_covered: + instances = self.crop_segments(instances, w, h) + + for instance in instances: + elem = self.convert_instance(instance, item) + if elem: + self.annotations.append(elem) + + def convert_instance(self, instance, item): + ann, polygons, mask, bbox = instance + + is_crowd = mask is not None + if is_crowd: + segmentation = { + 'counts': list(int(c) for c in mask['counts']), + 'size': list(int(c) for c in mask['size']) + } + else: + segmentation = [list(map(float, p)) for p in polygons] + + area = 0 + if segmentation: + if item.has_image: + h, w = item.image.size + else: + # NOTE: here we can guess the image size as + # it is only needed for the area computation + w = bbox[0] + bbox[2] + h = bbox[1] + bbox[3] + + rles = mask_utils.frPyObjects(segmentation, h, w) + if is_crowd: + rles = [rles] + else: + rles = mask_utils.merge(rles) + area = mask_utils.area(rles) + else: + _, _, w, h = bbox + segmentation = [] + area = w * h + + elem = { + 'id': self._get_ann_id(ann), + 'image_id': self._get_image_id(item), + 'category_id': cast(ann.label, int, -1) + 1, + 'segmentation': segmentation, + 'area': float(area), + 'bbox': [round(float(n), _COORDINATE_ROUNDING_DIGITS) for n in bbox], + 'iscrowd': int(is_crowd), + } + if 'score' in ann.attributes: + try: + elem['score'] = float(ann.attributes['score']) + except Exception as e: + log.warning("Item '%s': failed to convert attribute " + "'score': %e" % (item.id, e)) + if self._context._allow_attributes: + elem['attributes'] = self._convert_attributes(ann) + + return elem + +class _KeypointsConverter(_InstancesConverter): + def save_categories(self, dataset): + label_categories = dataset.categories().get(AnnotationType.label) + if label_categories is None: + return + point_categories = dataset.categories().get(AnnotationType.points) + + for idx, label_cat in enumerate(label_categories.items): + cat = { + 'id': 1 + idx, + 'name': cast(label_cat.name, str, ''), + 'supercategory': cast(label_cat.parent, str, ''), + 'keypoints': [], + 'skeleton': [], + } + + if point_categories is not None: + kp_cat = point_categories.items.get(idx) + if kp_cat is not None: + cat.update({ + 'keypoints': [str(l) for l in kp_cat.labels], + 'skeleton': [list(map(int, j)) for j in kp_cat.joints], + }) + self.categories.append(cat) + + def save_annotations(self, item): + point_annotations = [a for a in item.annotations + if a.type == AnnotationType.points] + if not point_annotations: + return + + # Create annotations for solitary keypoints annotations + for points in self.find_solitary_points(item.annotations): + instance = [points, [], None, points.get_bbox()] + elem = super().convert_instance(instance, item) + elem.update(self.convert_points_object(points)) + self.annotations.append(elem) + + # Create annotations for complete instance + keypoints annotations + super().save_annotations(item) + + @classmethod + def find_solitary_points(cls, annotations): + annotations = sorted(annotations, key=lambda a: a.group) + solitary_points = [] + + for g_id, group in groupby(annotations, lambda a: a.group): + if not g_id or g_id and not cls.find_instance_anns(group): + group = [a for a in group if a.type == AnnotationType.points] + solitary_points.extend(group) + + return solitary_points + + @staticmethod + def convert_points_object(ann): + keypoints = [] + points = ann.points + visibility = ann.visibility + for index in range(0, len(points), 2): + kp = points[index : index + 2] + state = visibility[index // 2].value + keypoints.extend([*kp, state]) + + num_annotated = len([v for v in visibility \ + if v != Points.Visibility.absent]) + + return { + 'keypoints': keypoints, + 'num_keypoints': num_annotated, + } + + def convert_instance(self, instance, item): + points_ann = find(item.annotations, lambda x: \ + x.type == AnnotationType.points and \ + instance[0].group and x.group == instance[0].group) + if not points_ann: + return None + + elem = super().convert_instance(instance, item) + elem.update(self.convert_points_object(points_ann)) + + return elem + +class _LabelsConverter(_TaskConverter): + def save_categories(self, dataset): + label_categories = dataset.categories().get(AnnotationType.label) + if label_categories is None: + return + + for idx, cat in enumerate(label_categories.items): + self.categories.append({ + 'id': 1 + idx, + 'name': cast(cat.name, str, ''), + 'supercategory': cast(cat.parent, str, ''), + }) + + def save_annotations(self, item): + for ann in item.annotations: + if ann.type != AnnotationType.label: + continue + + elem = { + 'id': self._get_ann_id(ann), + 'image_id': self._get_image_id(item), + 'category_id': int(ann.label) + 1, + } + if 'score' in ann.attributes: + try: + elem['score'] = float(ann.attributes['score']) + except Exception as e: + log.warning("Item '%s': failed to convert attribute " + "'score': %e" % (item.id, e)) + if self._context._allow_attributes: + elem['attributes'] = self._convert_attributes(ann) + + self.annotations.append(elem) + +class CocoConverter(Converter): + @staticmethod + def _split_tasks_string(s): + return [CocoTask[i.strip()] for i in s.split(',')] + + @classmethod + def build_cmdline_parser(cls, **kwargs): + parser = super().build_cmdline_parser(**kwargs) + parser.add_argument('--segmentation-mode', + choices=[m.name for m in SegmentationMode], + default=SegmentationMode.guess.name, + help=""" + Save mode for instance segmentation:|n + - '{sm.guess.name}': guess the mode for each instance,|n + |s|suse 'is_crowd' attribute as hint|n + - '{sm.polygons.name}': save polygons,|n + |s|smerge and convert masks, prefer polygons|n + - '{sm.mask.name}': save masks,|n + |s|smerge and convert polygons, prefer masks|n + Default: %(default)s. + """.format(sm=SegmentationMode)) + parser.add_argument('--crop-covered', action='store_true', + help="Crop covered segments so that background objects' " + "segmentation was more accurate (default: %(default)s)") + parser.add_argument('--allow-attributes', + type=str_to_bool, default=True, + help="Allow export of attributes (default: %(default)s)") + parser.add_argument('--tasks', type=cls._split_tasks_string, + help="COCO task filter, comma-separated list of {%s} " + "(default: all)" % ', '.join(t.name for t in CocoTask)) + return parser + + DEFAULT_IMAGE_EXT = CocoPath.IMAGE_EXT + + _TASK_CONVERTER = { + CocoTask.image_info: _ImageInfoConverter, + CocoTask.instances: _InstancesConverter, + CocoTask.person_keypoints: _KeypointsConverter, + CocoTask.captions: _CaptionsConverter, + CocoTask.labels: _LabelsConverter, + } + + def __init__(self, extractor, save_dir, + tasks=None, segmentation_mode=None, crop_covered=False, + allow_attributes=True, **kwargs): + super().__init__(extractor, save_dir, **kwargs) + + assert tasks is None or isinstance(tasks, (CocoTask, list, str)) + if isinstance(tasks, CocoTask): + tasks = [tasks] + elif isinstance(tasks, str): + tasks = [CocoTask[tasks]] + elif tasks: + for i, t in enumerate(tasks): + if isinstance(t, str): + tasks[i] = CocoTask[t] + else: + assert t in CocoTask, t + self._tasks = tasks + + assert segmentation_mode is None or \ + isinstance(segmentation_mode, str) or \ + segmentation_mode in SegmentationMode + if segmentation_mode is None: + segmentation_mode = SegmentationMode.guess + if isinstance(segmentation_mode, str): + segmentation_mode = SegmentationMode[segmentation_mode] + self._segmentation_mode = segmentation_mode + + self._crop_covered = crop_covered + self._allow_attributes = allow_attributes + + self._image_ids = {} + + def _make_dirs(self): + self._images_dir = osp.join(self._save_dir, CocoPath.IMAGES_DIR) + os.makedirs(self._images_dir, exist_ok=True) + + self._ann_dir = osp.join(self._save_dir, CocoPath.ANNOTATIONS_DIR) + os.makedirs(self._ann_dir, exist_ok=True) + + def _make_task_converter(self, task): + if task not in self._TASK_CONVERTER: + raise NotImplementedError() + return self._TASK_CONVERTER[task](self) + + def _make_task_converters(self): + return { task: self._make_task_converter(task) + for task in (self._tasks or self._TASK_CONVERTER) } + + def _get_image_id(self, item): + image_id = self._image_ids.get(item.id) + if image_id is None: + image_id = cast(item.attributes.get('id'), int, + len(self._image_ids) + 1) + self._image_ids[item.id] = image_id + return image_id + + def _save_image(self, item, path=None): + super()._save_image(item, + osp.join(self._images_dir, self._make_image_filename(item))) + + def apply(self): + self._make_dirs() + + for subset_name in self._extractor.subsets() or [None]: + if subset_name: + subset = self._extractor.get_subset(subset_name) + else: + subset_name = DEFAULT_SUBSET_NAME + subset = self._extractor + + task_converters = self._make_task_converters() + for task_conv in task_converters.values(): + task_conv.save_categories(subset) + for item in subset: + if self._save_images: + if item.has_image: + self._save_image(item) + else: + log.debug("Item '%s' has no image info", item.id) + for task_conv in task_converters.values(): + task_conv.save_image_info(item, + self._make_image_filename(item)) + task_conv.save_annotations(item) + + for task, task_conv in task_converters.items(): + if task_conv.is_empty() and not self._tasks: + continue + task_conv.write(osp.join(self._ann_dir, + '%s_%s.json' % (task.name, subset_name))) + +class CocoInstancesConverter(CocoConverter): + def __init__(self, *args, **kwargs): + kwargs['tasks'] = CocoTask.instances + super().__init__(*args, **kwargs) + +class CocoImageInfoConverter(CocoConverter): + def __init__(self, *args, **kwargs): + kwargs['tasks'] = CocoTask.image_info + super().__init__(*args, **kwargs) + +class CocoPersonKeypointsConverter(CocoConverter): + def __init__(self, *args, **kwargs): + kwargs['tasks'] = CocoTask.person_keypoints + super().__init__(*args, **kwargs) + +class CocoCaptionsConverter(CocoConverter): + def __init__(self, *args, **kwargs): + kwargs['tasks'] = CocoTask.captions + super().__init__(*args, **kwargs) + +class CocoLabelsConverter(CocoConverter): + def __init__(self, *args, **kwargs): + kwargs['tasks'] = CocoTask.labels + super().__init__(*args, **kwargs) diff --git a/datumaro/plugins/coco_format/extractor.py b/datumaro/plugins/coco_format/extractor.py new file mode 100644 index 0000000000..73e7882036 --- /dev/null +++ b/datumaro/plugins/coco_format/extractor.py @@ -0,0 +1,261 @@ + +# Copyright (C) 2019-2020 Intel Corporation +# +# SPDX-License-Identifier: MIT + +from collections import OrderedDict +import logging as log +import os.path as osp + +from pycocotools.coco import COCO +import pycocotools.mask as mask_utils + +from datumaro.components.extractor import (SourceExtractor, + DEFAULT_SUBSET_NAME, DatasetItem, + AnnotationType, Label, RleMask, Points, Polygon, Bbox, Caption, + LabelCategories, PointsCategories +) +from datumaro.util.image import Image + +from .format import CocoTask, CocoPath + + +class _CocoExtractor(SourceExtractor): + def __init__(self, path, task, merge_instance_polygons=False): + assert osp.isfile(path), path + + subset = osp.splitext(osp.basename(path))[0].rsplit('_', maxsplit=1) + subset = subset[1] if len(subset) == 2 else None + super().__init__(subset=subset) + + rootpath = '' + if path.endswith(osp.join(CocoPath.ANNOTATIONS_DIR, osp.basename(path))): + rootpath = path.rsplit(CocoPath.ANNOTATIONS_DIR, maxsplit=1)[0] + images_dir = '' + if rootpath and osp.isdir(osp.join(rootpath, CocoPath.IMAGES_DIR)): + images_dir = osp.join(rootpath, CocoPath.IMAGES_DIR) + if osp.isdir(osp.join(images_dir, subset or DEFAULT_SUBSET_NAME)): + images_dir = osp.join(images_dir, subset or DEFAULT_SUBSET_NAME) + self._images_dir = images_dir + self._task = task + + self._merge_instance_polygons = merge_instance_polygons + + loader = self._make_subset_loader(path) + self._load_categories(loader) + self._items = self._load_items(loader) + + def categories(self): + return self._categories + + def __iter__(self): + for item in self._items.values(): + yield item + + def __len__(self): + return len(self._items) + + @staticmethod + def _make_subset_loader(path): + # COCO API has an 'unclosed file' warning + coco_api = COCO() + with open(path, 'r') as f: + import json + dataset = json.load(f) + + coco_api.dataset = dataset + coco_api.createIndex() + return coco_api + + def _load_categories(self, loader): + self._categories = {} + + if self._task in [CocoTask.instances, CocoTask.labels, + CocoTask.person_keypoints, + # TODO: Task.stuff, CocoTask.panoptic + ]: + label_categories, label_map = self._load_label_categories(loader) + self._categories[AnnotationType.label] = label_categories + self._label_map = label_map + + if self._task == CocoTask.person_keypoints: + person_kp_categories = self._load_person_kp_categories(loader) + self._categories[AnnotationType.points] = person_kp_categories + + # pylint: disable=no-self-use + def _load_label_categories(self, loader): + catIds = loader.getCatIds() + cats = loader.loadCats(catIds) + + categories = LabelCategories() + label_map = {} + for idx, cat in enumerate(cats): + label_map[cat['id']] = idx + categories.add(name=cat['name'], parent=cat['supercategory']) + + return categories, label_map + # pylint: enable=no-self-use + + def _load_person_kp_categories(self, loader): + catIds = loader.getCatIds() + cats = loader.loadCats(catIds) + + categories = PointsCategories() + for cat in cats: + label_id = self._label_map[cat['id']] + categories.add(label_id=label_id, + labels=cat['keypoints'], joints=cat['skeleton'] + ) + + return categories + + def _load_items(self, loader): + items = OrderedDict() + + for img_id in loader.getImgIds(): + image_info = loader.loadImgs(img_id)[0] + image_path = osp.join(self._images_dir, image_info['file_name']) + image_size = (image_info.get('height'), image_info.get('width')) + if all(image_size): + image_size = (int(image_size[0]), int(image_size[1])) + else: + image_size = None + image = Image(path=image_path, size=image_size) + + anns = loader.getAnnIds(imgIds=img_id) + anns = loader.loadAnns(anns) + anns = sum((self._load_annotations(a, image_info) for a in anns), []) + + items[img_id] = DatasetItem( + id=osp.splitext(image_info['file_name'])[0], + subset=self._subset, image=image, annotations=anns, + attributes={'id': img_id}) + + return items + + def _get_label_id(self, ann): + cat_id = ann.get('category_id') + if cat_id in [0, None]: + return None + return self._label_map[cat_id] + + def _load_annotations(self, ann, image_info=None): + parsed_annotations = [] + + ann_id = ann.get('id') + + attributes = {} + if 'attributes' in ann: + try: + attributes.update(ann['attributes']) + except Exception as e: + log.debug("item #%s: failed to read annotation attributes: %s", + image_info['id'], e) + if 'score' in ann: + attributes['score'] = ann['score'] + + group = ann_id # make sure all tasks' annotations are merged + + if self._task in [CocoTask.instances, CocoTask.person_keypoints]: + x, y, w, h = ann['bbox'] + label_id = self._get_label_id(ann) + + is_crowd = bool(ann['iscrowd']) + attributes['is_crowd'] = is_crowd + + if self._task is CocoTask.person_keypoints: + keypoints = ann['keypoints'] + points = [p for i, p in enumerate(keypoints) if i % 3 != 2] + visibility = keypoints[2::3] + parsed_annotations.append( + Points(points, visibility, label=label_id, + id=ann_id, attributes=attributes, group=group) + ) + + segmentation = ann.get('segmentation') + if segmentation and segmentation != [[]]: + rle = None + + if isinstance(segmentation, list): + if not self._merge_instance_polygons: + # polygon - a single object can consist of multiple parts + for polygon_points in segmentation: + parsed_annotations.append(Polygon( + points=polygon_points, label=label_id, + id=ann_id, attributes=attributes, group=group + )) + else: + # merge all parts into a single mask RLE + img_h = image_info['height'] + img_w = image_info['width'] + rles = mask_utils.frPyObjects(segmentation, img_h, img_w) + rle = mask_utils.merge(rles) + elif isinstance(segmentation['counts'], list): + # uncompressed RLE + img_h = image_info['height'] + img_w = image_info['width'] + mask_h, mask_w = segmentation['size'] + if img_h == mask_h and img_w == mask_w: + rle = mask_utils.frPyObjects( + [segmentation], mask_h, mask_w)[0] + else: + log.warning("item #%s: mask #%s " + "does not match image size: %s vs. %s. " + "Skipping this annotation.", + image_info['id'], ann_id, + (mask_h, mask_w), (img_h, img_w) + ) + else: + # compressed RLE + rle = segmentation + + if rle is not None: + parsed_annotations.append(RleMask(rle=rle, label=label_id, + id=ann_id, attributes=attributes, group=group + )) + else: + parsed_annotations.append( + Bbox(x, y, w, h, label=label_id, + id=ann_id, attributes=attributes, group=group) + ) + elif self._task is CocoTask.labels: + label_id = self._get_label_id(ann) + parsed_annotations.append( + Label(label=label_id, + id=ann_id, attributes=attributes, group=group) + ) + elif self._task is CocoTask.captions: + caption = ann['caption'] + parsed_annotations.append( + Caption(caption, + id=ann_id, attributes=attributes, group=group) + ) + else: + raise NotImplementedError() + + return parsed_annotations + +class CocoImageInfoExtractor(_CocoExtractor): + def __init__(self, path, **kwargs): + kwargs['task'] = CocoTask.image_info + super().__init__(path, **kwargs) + +class CocoCaptionsExtractor(_CocoExtractor): + def __init__(self, path, **kwargs): + kwargs['task'] = CocoTask.captions + super().__init__(path, **kwargs) + +class CocoInstancesExtractor(_CocoExtractor): + def __init__(self, path, **kwargs): + kwargs['task'] = CocoTask.instances + super().__init__(path, **kwargs) + +class CocoPersonKeypointsExtractor(_CocoExtractor): + def __init__(self, path, **kwargs): + kwargs['task'] = CocoTask.person_keypoints + super().__init__(path, **kwargs) + +class CocoLabelsExtractor(_CocoExtractor): + def __init__(self, path, **kwargs): + kwargs['task'] = CocoTask.labels + super().__init__(path, **kwargs) diff --git a/datumaro/plugins/coco_format/format.py b/datumaro/plugins/coco_format/format.py new file mode 100644 index 0000000000..5129d49d9a --- /dev/null +++ b/datumaro/plugins/coco_format/format.py @@ -0,0 +1,23 @@ + +# Copyright (C) 2019-2020 Intel Corporation +# +# SPDX-License-Identifier: MIT + +from enum import Enum + + +CocoTask = Enum('CocoTask', [ + 'instances', + 'person_keypoints', + 'captions', + 'labels', # extension, does not exist in the original COCO format + 'image_info', + # 'panoptic', + # 'stuff', +]) + +class CocoPath: + IMAGES_DIR = 'images' + ANNOTATIONS_DIR = 'annotations' + + IMAGE_EXT = '.jpg' diff --git a/datumaro/plugins/coco_format/importer.py b/datumaro/plugins/coco_format/importer.py new file mode 100644 index 0000000000..3896b725d1 --- /dev/null +++ b/datumaro/plugins/coco_format/importer.py @@ -0,0 +1,95 @@ + +# Copyright (C) 2019-2020 Intel Corporation +# +# SPDX-License-Identifier: MIT + +from collections import defaultdict +from glob import glob +import logging as log +import os.path as osp + +from datumaro.components.extractor import Importer +from datumaro.util.log_utils import logging_disabled + +from .format import CocoTask + + +class CocoImporter(Importer): + _COCO_EXTRACTORS = { + CocoTask.instances: 'coco_instances', + CocoTask.person_keypoints: 'coco_person_keypoints', + CocoTask.captions: 'coco_captions', + CocoTask.labels: 'coco_labels', + CocoTask.image_info: 'coco_image_info', + } + + @classmethod + def detect(cls, path): + with logging_disabled(log.WARN): + return len(cls.find_subsets(path)) != 0 + + def __call__(self, path, **extra_params): + from datumaro.components.project import Project # cyclic import + project = Project() + + subsets = self.find_subsets(path) + + if len(subsets) == 0: + raise Exception("Failed to find 'coco' dataset at '%s'" % path) + + # TODO: should be removed when proper label merging is implemented + conflicting_types = {CocoTask.instances, + CocoTask.person_keypoints, CocoTask.labels} + ann_types = set(t for s in subsets.values() for t in s) \ + & conflicting_types + if 1 <= len(ann_types): + selected_ann_type = sorted(ann_types, key=lambda x: x.name)[0] + if 1 < len(ann_types): + log.warning("Not implemented: " + "Found potentially conflicting source types with labels: %s. " + "Only one type will be used: %s" \ + % (", ".join(t.name for t in ann_types), selected_ann_type.name)) + + for ann_files in subsets.values(): + for ann_type, ann_file in ann_files.items(): + if ann_type in conflicting_types: + if ann_type is not selected_ann_type: + log.warning("Not implemented: " + "conflicting source '%s' is skipped." % ann_file) + continue + log.info("Found a dataset at '%s'" % ann_file) + + source_name = osp.splitext(osp.basename(ann_file))[0] + project.add_source(source_name, { + 'url': ann_file, + 'format': self._COCO_EXTRACTORS[ann_type], + 'options': dict(extra_params), + }) + + return project + + @staticmethod + def find_subsets(path): + if path.endswith('.json') and osp.isfile(path): + subset_paths = [path] + else: + subset_paths = glob(osp.join(path, '**', '*_*.json'), + recursive=True) + + subsets = defaultdict(dict) + for subset_path in subset_paths: + name_parts = osp.splitext(osp.basename(subset_path))[0] \ + .rsplit('_', maxsplit=1) + + ann_type = name_parts[0] + try: + ann_type = CocoTask[ann_type] + except KeyError: + log.warn("Skipping '%s': unknown subset " + "type '%s', the only known are: %s" % \ + (subset_path, ann_type, + ', '.join([e.name for e in CocoTask]))) + continue + subset_name = name_parts[1] + subsets[subset_name][ann_type] = subset_path + return dict(subsets) diff --git a/datumaro/plugins/cvat_format/__init__.py b/datumaro/plugins/cvat_format/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/datumaro/plugins/cvat_format/converter.py b/datumaro/plugins/cvat_format/converter.py new file mode 100644 index 0000000000..4849619b76 --- /dev/null +++ b/datumaro/plugins/cvat_format/converter.py @@ -0,0 +1,331 @@ + +# Copyright (C) 2019-2020 Intel Corporation +# +# SPDX-License-Identifier: MIT + +import logging as log +import os +import os.path as osp +from collections import OrderedDict +from xml.sax.saxutils import XMLGenerator + +from datumaro.components.converter import Converter +from datumaro.components.extractor import DEFAULT_SUBSET_NAME, AnnotationType +from datumaro.util import cast, pairs + +from .format import CvatPath + + +class XmlAnnotationWriter: + VERSION = '1.1' + + def __init__(self, f): + self.xmlgen = XMLGenerator(f, 'utf-8') + self._level = 0 + + def _indent(self, newline = True): + if newline: + self.xmlgen.ignorableWhitespace('\n') + self.xmlgen.ignorableWhitespace(' ' * self._level) + + def _add_version(self): + self._indent() + self.xmlgen.startElement('version', {}) + self.xmlgen.characters(self.VERSION) + self.xmlgen.endElement('version') + + def open_root(self): + self.xmlgen.startDocument() + self.xmlgen.startElement('annotations', {}) + self._level += 1 + self._add_version() + + def _add_meta(self, meta): + self._level += 1 + for k, v in meta.items(): + if isinstance(v, OrderedDict): + self._indent() + self.xmlgen.startElement(k, {}) + self._add_meta(v) + self._indent() + self.xmlgen.endElement(k) + elif isinstance(v, list): + self._indent() + self.xmlgen.startElement(k, {}) + for tup in v: + self._add_meta(OrderedDict([tup])) + self._indent() + self.xmlgen.endElement(k) + else: + self._indent() + self.xmlgen.startElement(k, {}) + self.xmlgen.characters(v) + self.xmlgen.endElement(k) + self._level -= 1 + + def write_meta(self, meta): + self._indent() + self.xmlgen.startElement('meta', {}) + self._add_meta(meta) + self._indent() + self.xmlgen.endElement('meta') + + def open_track(self, track): + self._indent() + self.xmlgen.startElement('track', track) + self._level += 1 + + def open_image(self, image): + self._indent() + self.xmlgen.startElement('image', image) + self._level += 1 + + def open_box(self, box): + self._indent() + self.xmlgen.startElement('box', box) + self._level += 1 + + def open_polygon(self, polygon): + self._indent() + self.xmlgen.startElement('polygon', polygon) + self._level += 1 + + def open_polyline(self, polyline): + self._indent() + self.xmlgen.startElement('polyline', polyline) + self._level += 1 + + def open_points(self, points): + self._indent() + self.xmlgen.startElement('points', points) + self._level += 1 + + def open_tag(self, tag): + self._indent() + self.xmlgen.startElement("tag", tag) + self._level += 1 + + def add_attribute(self, attribute): + self._indent() + self.xmlgen.startElement('attribute', {'name': attribute['name']}) + self.xmlgen.characters(attribute['value']) + self.xmlgen.endElement('attribute') + + def _close_element(self, element): + self._level -= 1 + self._indent() + self.xmlgen.endElement(element) + + def close_box(self): + self._close_element('box') + + def close_polygon(self): + self._close_element('polygon') + + def close_polyline(self): + self._close_element('polyline') + + def close_points(self): + self._close_element('points') + + def close_tag(self): + self._close_element('tag') + + def close_image(self): + self._close_element('image') + + def close_track(self): + self._close_element('track') + + def close_root(self): + self._close_element('annotations') + self.xmlgen.endDocument() + +class _SubsetWriter: + def __init__(self, file, name, extractor, context): + self._writer = XmlAnnotationWriter(file) + self._name = name + self._extractor = extractor + self._context = context + + def write(self): + self._writer.open_root() + self._write_meta() + + for index, item in enumerate(self._extractor): + self._write_item(item, index) + + self._writer.close_root() + + def _write_item(self, item, index): + image_info = OrderedDict([ + ("id", str(cast(item.attributes.get('frame'), int, index))), + ]) + filename = item.id + CvatPath.IMAGE_EXT + image_info["name"] = filename + if item.has_image: + size = item.image.size + if size: + h, w = size + image_info["width"] = str(w) + image_info["height"] = str(h) + + if self._context._save_images: + self._context._save_image(item, + osp.join(self._context._images_dir, filename)) + else: + log.debug("Item '%s' has no image info", item.id) + self._writer.open_image(image_info) + + for ann in item.annotations: + if ann.type in {AnnotationType.points, AnnotationType.polyline, + AnnotationType.polygon, AnnotationType.bbox}: + self._write_shape(ann) + elif ann.type == AnnotationType.label: + self._write_tag(ann) + else: + continue + + self._writer.close_image() + + def _write_meta(self): + label_cat = self._extractor.categories()[AnnotationType.label] + meta = OrderedDict([ + ("task", OrderedDict([ + ("id", ""), + ("name", self._name), + ("size", str(len(self._extractor))), + ("mode", "annotation"), + ("overlap", ""), + ("start_frame", "0"), + ("stop_frame", str(len(self._extractor))), + ("frame_filter", ""), + ("z_order", "True"), + + ("labels", [ + ("label", OrderedDict([ + ("name", label.name), + ("attributes", [ + ("attribute", OrderedDict([ + ("name", attr), + ("mutable", "True"), + ("input_type", "text"), + ("default_value", ""), + ("values", ""), + ])) for attr in label.attributes + ]) + ])) for label in label_cat.items + ]), + ])), + ]) + self._writer.write_meta(meta) + + def _get_label(self, label_id): + label_cat = self._extractor.categories()[AnnotationType.label] + return label_cat.items[label_id] + + def _write_shape(self, shape): + if shape.label is None: + return + + shape_data = OrderedDict([ + ("label", self._get_label(shape.label).name), + ("occluded", str(int(shape.attributes.get('occluded', False)))), + ]) + + if shape.type == AnnotationType.bbox: + shape_data.update(OrderedDict([ + ("xtl", "{:.2f}".format(shape.points[0])), + ("ytl", "{:.2f}".format(shape.points[1])), + ("xbr", "{:.2f}".format(shape.points[2])), + ("ybr", "{:.2f}".format(shape.points[3])) + ])) + else: + shape_data.update(OrderedDict([ + ("points", ';'.join(( + ','.join(( + "{:.2f}".format(x), + "{:.2f}".format(y) + )) for x, y in pairs(shape.points)) + )), + ])) + + shape_data['z_order'] = str(int(shape.z_order)) + if shape.group: + shape_data['group_id'] = str(shape.group) + + if shape.type == AnnotationType.bbox: + self._writer.open_box(shape_data) + elif shape.type == AnnotationType.polygon: + self._writer.open_polygon(shape_data) + elif shape.type == AnnotationType.polyline: + self._writer.open_polyline(shape_data) + elif shape.type == AnnotationType.points: + self._writer.open_points(shape_data) + else: + raise NotImplementedError("unknown shape type") + + for attr_name, attr_value in shape.attributes.items(): + if isinstance(attr_value, bool): + attr_value = 'true' if attr_value else 'false' + if attr_name in self._get_label(shape.label).attributes: + self._writer.add_attribute(OrderedDict([ + ("name", str(attr_name)), + ("value", str(attr_value)), + ])) + + if shape.type == AnnotationType.bbox: + self._writer.close_box() + elif shape.type == AnnotationType.polygon: + self._writer.close_polygon() + elif shape.type == AnnotationType.polyline: + self._writer.close_polyline() + elif shape.type == AnnotationType.points: + self._writer.close_points() + else: + raise NotImplementedError("unknown shape type") + + def _write_tag(self, label): + if label.label is None: + return + + tag_data = OrderedDict([ + ('label', self._get_label(label.label).name), + ]) + if label.group: + tag_data['group_id'] = str(label.group) + self._writer.open_tag(tag_data) + + for attr_name, attr_value in label.attributes.items(): + if isinstance(attr_value, bool): + attr_value = 'true' if attr_value else 'false' + if attr_name in self._get_label(label.label).attributes: + self._writer.add_attribute(OrderedDict([ + ("name", str(attr_name)), + ("value", str(attr_value)), + ])) + + self._writer.close_tag() + +class CvatConverter(Converter): + DEFAULT_IMAGE_EXT = CvatPath.IMAGE_EXT + + def apply(self): + images_dir = osp.join(self._save_dir, CvatPath.IMAGES_DIR) + os.makedirs(images_dir, exist_ok=True) + self._images_dir = images_dir + + subsets = self._extractor.subsets() + if len(subsets) == 0: + subsets = [ None ] + + for subset_name in subsets: + if subset_name: + subset = self._extractor.get_subset(subset_name) + else: + subset_name = DEFAULT_SUBSET_NAME + subset = self._extractor + + with open(osp.join(self._save_dir, '%s.xml' % subset_name), 'w') as f: + writer = _SubsetWriter(f, subset_name, subset, self) + writer.write() diff --git a/datumaro/plugins/cvat_format/extractor.py b/datumaro/plugins/cvat_format/extractor.py new file mode 100644 index 0000000000..7e37c2dd74 --- /dev/null +++ b/datumaro/plugins/cvat_format/extractor.py @@ -0,0 +1,316 @@ + +# Copyright (C) 2019-2020 Intel Corporation +# +# SPDX-License-Identifier: MIT + +from collections import OrderedDict +import os.path as osp +from defusedxml import ElementTree + +from datumaro.components.extractor import (SourceExtractor, DatasetItem, + AnnotationType, Points, Polygon, PolyLine, Bbox, Label, + LabelCategories +) +from datumaro.util.image import Image + +from .format import CvatPath + + +class CvatExtractor(SourceExtractor): + _SUPPORTED_SHAPES = ('box', 'polygon', 'polyline', 'points') + + def __init__(self, path): + assert osp.isfile(path), path + rootpath = osp.dirname(path) + images_dir = '' + if osp.isdir(osp.join(rootpath, CvatPath.IMAGES_DIR)): + images_dir = osp.join(rootpath, CvatPath.IMAGES_DIR) + self._images_dir = images_dir + self._path = path + + super().__init__(subset=osp.splitext(osp.basename(path))[0]) + + items, categories = self._parse(path) + self._items = self._load_items(items) + self._categories = categories + + def categories(self): + return self._categories + + def __iter__(self): + for item in self._items.values(): + yield item + + def __len__(self): + return len(self._items) + + @classmethod + def _parse(cls, path): + context = ElementTree.iterparse(path, events=("start", "end")) + context = iter(context) + + categories, frame_size = cls._parse_meta(context) + + items = OrderedDict() + + track = None + shape = None + tag = None + attributes = None + image = None + for ev, el in context: + if ev == 'start': + if el.tag == 'track': + track = { + 'id': el.attrib['id'], + 'label': el.attrib.get('label'), + 'group': int(el.attrib.get('group_id', 0)), + 'height': frame_size[0], + 'width': frame_size[1], + } + elif el.tag == 'image': + image = { + 'name': el.attrib.get('name'), + 'frame': el.attrib['id'], + 'width': el.attrib.get('width'), + 'height': el.attrib.get('height'), + } + elif el.tag in cls._SUPPORTED_SHAPES and (track or image): + attributes = {} + shape = { + 'type': None, + 'attributes': attributes, + } + if track: + shape.update(track) + shape['track_id'] = int(track['id']) + if image: + shape.update(image) + elif el.tag == 'tag' and image: + attributes = {} + tag = { + 'frame': image['frame'], + 'attributes': attributes, + 'group': int(el.attrib.get('group_id', 0)), + 'label': el.attrib['label'], + } + elif ev == 'end': + if el.tag == 'attribute' and attributes is not None: + attr_value = el.text + if el.text in ['true', 'false']: + attr_value = attr_value == 'true' + else: + try: + attr_value = float(attr_value) + except ValueError: + pass + attributes[el.attrib['name']] = attr_value + elif el.tag in cls._SUPPORTED_SHAPES: + if track is not None: + shape['frame'] = el.attrib['frame'] + shape['outside'] = (el.attrib.get('outside') == '1') + shape['keyframe'] = (el.attrib.get('keyframe') == '1') + if image is not None: + shape['label'] = el.attrib.get('label') + shape['group'] = int(el.attrib.get('group_id', 0)) + + shape['type'] = el.tag + shape['occluded'] = (el.attrib.get('occluded') == '1') + shape['z_order'] = int(el.attrib.get('z_order', 0)) + + if el.tag == 'box': + shape['points'] = list(map(float, [ + el.attrib['xtl'], el.attrib['ytl'], + el.attrib['xbr'], el.attrib['ybr'], + ])) + else: + shape['points'] = [] + for pair in el.attrib['points'].split(';'): + shape['points'].extend(map(float, pair.split(','))) + + frame_desc = items.get(shape['frame'], {'annotations': []}) + frame_desc['annotations'].append( + cls._parse_shape_ann(shape, categories)) + items[shape['frame']] = frame_desc + shape = None + + elif el.tag == 'tag': + frame_desc = items.get(tag['frame'], {'annotations': []}) + frame_desc['annotations'].append( + cls._parse_tag_ann(tag, categories)) + items[tag['frame']] = frame_desc + tag = None + elif el.tag == 'track': + track = None + elif el.tag == 'image': + frame_desc = items.get(image['frame'], {'annotations': []}) + frame_desc.update({ + 'name': image.get('name'), + 'height': image.get('height'), + 'width': image.get('width'), + }) + items[image['frame']] = frame_desc + image = None + el.clear() + + return items, categories + + @staticmethod + def _parse_meta(context): + ev, el = next(context) + if not (ev == 'start' and el.tag == 'annotations'): + raise Exception("Unexpected token ") + + categories = {} + + frame_size = None + mode = None + labels = OrderedDict() + label = None + + # Recursive descent parser + el = None + states = ['annotations'] + def accepted(expected_state, tag, next_state=None): + state = states[-1] + if state == expected_state and el is not None and el.tag == tag: + if not next_state: + next_state = tag + states.append(next_state) + return True + return False + def consumed(expected_state, tag): + state = states[-1] + if state == expected_state and el is not None and el.tag == tag: + states.pop() + return True + return False + + for ev, el in context: + if ev == 'start': + if accepted('annotations', 'meta'): pass + elif accepted('meta', 'task'): pass + elif accepted('task', 'mode'): pass + elif accepted('task', 'original_size'): + frame_size = [None, None] + elif accepted('original_size', 'height', next_state='frame_height'): pass + elif accepted('original_size', 'width', next_state='frame_width'): pass + elif accepted('task', 'labels'): pass + elif accepted('labels', 'label'): + label = { 'name': None, 'attributes': set() } + elif accepted('label', 'name', next_state='label_name'): pass + elif accepted('label', 'attributes'): pass + elif accepted('attributes', 'attribute'): pass + elif accepted('attribute', 'name', next_state='attr_name'): pass + elif accepted('annotations', 'image') or \ + accepted('annotations', 'track') or \ + accepted('annotations', 'tag'): + break + else: + pass + elif ev == 'end': + if consumed('meta', 'meta'): + break + elif consumed('task', 'task'): pass + elif consumed('mode', 'mode'): + mode = el.text + elif consumed('original_size', 'original_size'): pass + elif consumed('frame_height', 'height'): + frame_size[0] = int(el.text) + elif consumed('frame_width', 'width'): + frame_size[1] = int(el.text) + elif consumed('label_name', 'name'): + label['name'] = el.text + elif consumed('attr_name', 'name'): + label['attributes'].add(el.text) + elif consumed('attribute', 'attribute'): pass + elif consumed('attributes', 'attributes'): pass + elif consumed('label', 'label'): + labels[label['name']] = label['attributes'] + label = None + elif consumed('labels', 'labels'): pass + else: + pass + + assert len(states) == 1 and states[0] == 'annotations', \ + "Expected 'meta' section in the annotation file, path: %s" % states + + common_attrs = ['occluded'] + if mode == 'interpolation': + common_attrs.append('keyframe') + common_attrs.append('outside') + common_attrs.append('track_id') + + label_cat = LabelCategories(attributes=common_attrs) + for label, attrs in labels.items(): + label_cat.add(label, attributes=attrs) + + categories[AnnotationType.label] = label_cat + + return categories, frame_size + + @classmethod + def _parse_shape_ann(cls, ann, categories): + ann_id = ann.get('id', 0) + ann_type = ann['type'] + + attributes = ann.get('attributes') or {} + if 'occluded' in categories[AnnotationType.label].attributes: + attributes['occluded'] = ann.get('occluded', False) + if 'outside' in ann: + attributes['outside'] = ann['outside'] + if 'keyframe' in ann: + attributes['keyframe'] = ann['keyframe'] + if 'track_id' in ann: + attributes['track_id'] = ann['track_id'] + + group = ann.get('group') + + label = ann.get('label') + label_id = categories[AnnotationType.label].find(label)[0] + + z_order = ann.get('z_order', 0) + points = ann.get('points', []) + + if ann_type == 'polyline': + return PolyLine(points, label=label_id, z_order=z_order, + id=ann_id, attributes=attributes, group=group) + + elif ann_type == 'polygon': + return Polygon(points, label=label_id, z_order=z_order, + id=ann_id, attributes=attributes, group=group) + + elif ann_type == 'points': + return Points(points, label=label_id, z_order=z_order, + id=ann_id, attributes=attributes, group=group) + + elif ann_type == 'box': + x, y = points[0], points[1] + w, h = points[2] - x, points[3] - y + return Bbox(x, y, w, h, label=label_id, z_order=z_order, + id=ann_id, attributes=attributes, group=group) + + else: + raise NotImplementedError("Unknown annotation type '%s'" % ann_type) + + @classmethod + def _parse_tag_ann(cls, ann, categories): + label = ann.get('label') + label_id = categories[AnnotationType.label].find(label)[0] + group = ann.get('group') + attributes = ann.get('attributes') + return Label(label_id, attributes=attributes, group=group) + + def _load_items(self, parsed): + for frame_id, item_desc in parsed.items(): + name = item_desc.get('name', 'frame_%06d.png' % int(frame_id)) + image = osp.join(self._images_dir, name) + image_size = (item_desc.get('height'), item_desc.get('width')) + if all(image_size): + image = Image(path=image, size=tuple(map(int, image_size))) + + parsed[frame_id] = DatasetItem(id=osp.splitext(name)[0], + subset=self._subset, image=image, + annotations=item_desc.get('annotations'), + attributes={'frame': int(frame_id)}) + return parsed diff --git a/datumaro/plugins/cvat_format/format.py b/datumaro/plugins/cvat_format/format.py new file mode 100644 index 0000000000..e5572a89be --- /dev/null +++ b/datumaro/plugins/cvat_format/format.py @@ -0,0 +1,9 @@ + +# Copyright (C) 2019-2020 Intel Corporation +# +# SPDX-License-Identifier: MIT + +class CvatPath: + IMAGES_DIR = 'images' + + IMAGE_EXT = '.jpg' diff --git a/datumaro/plugins/cvat_format/importer.py b/datumaro/plugins/cvat_format/importer.py new file mode 100644 index 0000000000..a3a83757ee --- /dev/null +++ b/datumaro/plugins/cvat_format/importer.py @@ -0,0 +1,51 @@ + +# Copyright (C) 2019-2020 Intel Corporation +# +# SPDX-License-Identifier: MIT + +from glob import glob +import logging as log +import os.path as osp + +from datumaro.components.extractor import Importer + + +class CvatImporter(Importer): + EXTRACTOR_NAME = 'cvat' + + @classmethod + def detect(cls, path): + return len(cls.find_subsets(path)) != 0 + + def __call__(self, path, **extra_params): + from datumaro.components.project import Project # cyclic import + project = Project() + + subset_paths = self.find_subsets(path) + + if len(subset_paths) == 0: + raise Exception("Failed to find 'cvat' dataset at '%s'" % path) + + for subset_path in subset_paths: + if not osp.isfile(subset_path): + continue + + log.info("Found a dataset at '%s'" % subset_path) + + subset_name = osp.splitext(osp.basename(subset_path))[0] + + project.add_source(subset_name, { + 'url': subset_path, + 'format': self.EXTRACTOR_NAME, + 'options': dict(extra_params), + }) + + return project + + @staticmethod + def find_subsets(path): + if path.endswith('.xml') and osp.isfile(path): + subset_paths = [path] + else: + subset_paths = glob(osp.join(path, '**', '*.xml'), recursive=True) + return subset_paths \ No newline at end of file diff --git a/datumaro/plugins/datumaro_format/__init__.py b/datumaro/plugins/datumaro_format/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/datumaro/plugins/datumaro_format/converter.py b/datumaro/plugins/datumaro_format/converter.py new file mode 100644 index 0000000000..2d86209461 --- /dev/null +++ b/datumaro/plugins/datumaro_format/converter.py @@ -0,0 +1,261 @@ + +# Copyright (C) 2019-2020 Intel Corporation +# +# SPDX-License-Identifier: MIT + +# pylint: disable=no-self-use + +import json +import numpy as np +import os +import os.path as osp + +from datumaro.components.converter import Converter +from datumaro.components.extractor import ( + DEFAULT_SUBSET_NAME, Annotation, _Shape, + Label, Mask, RleMask, Points, Polygon, PolyLine, Bbox, Caption, + LabelCategories, MaskCategories, PointsCategories +) +from datumaro.util import cast +import pycocotools.mask as mask_utils + +from .format import DatumaroPath + + +class _SubsetWriter: + def __init__(self, name, context): + self._name = name + self._context = context + + self._data = { + 'info': {}, + 'categories': {}, + 'items': [], + } + + @property + def categories(self): + return self._data['categories'] + + @property + def items(self): + return self._data['items'] + + def write_item(self, item): + annotations = [] + item_desc = { + 'id': item.id, + 'annotations': annotations, + } + if item.attributes: + item_desc['attr'] = item.attributes + if item.path: + item_desc['path'] = item.path + if item.has_image: + path = item.image.path + if self._context._save_images: + path = self._context._make_image_filename(item) + self._context._save_image(item, path) + + item_desc['image'] = { + 'size': item.image.size, + 'path': path, + } + self.items.append(item_desc) + + for ann in item.annotations: + if isinstance(ann, Label): + converted_ann = self._convert_label_object(ann) + elif isinstance(ann, Mask): + converted_ann = self._convert_mask_object(ann) + elif isinstance(ann, Points): + converted_ann = self._convert_points_object(ann) + elif isinstance(ann, PolyLine): + converted_ann = self._convert_polyline_object(ann) + elif isinstance(ann, Polygon): + converted_ann = self._convert_polygon_object(ann) + elif isinstance(ann, Bbox): + converted_ann = self._convert_bbox_object(ann) + elif isinstance(ann, Caption): + converted_ann = self._convert_caption_object(ann) + else: + raise NotImplementedError() + annotations.append(converted_ann) + + def write_categories(self, categories): + for ann_type, desc in categories.items(): + if isinstance(desc, LabelCategories): + converted_desc = self._convert_label_categories(desc) + elif isinstance(desc, MaskCategories): + converted_desc = self._convert_mask_categories(desc) + elif isinstance(desc, PointsCategories): + converted_desc = self._convert_points_categories(desc) + else: + raise NotImplementedError() + self.categories[ann_type.name] = converted_desc + + def write(self, save_dir): + with open(osp.join(save_dir, '%s.json' % (self._name)), 'w') as f: + json.dump(self._data, f) + + def _convert_annotation(self, obj): + assert isinstance(obj, Annotation) + + ann_json = { + 'id': cast(obj.id, int), + 'type': cast(obj.type.name, str), + 'attributes': obj.attributes, + 'group': cast(obj.group, int, 0), + } + return ann_json + + def _convert_label_object(self, obj): + converted = self._convert_annotation(obj) + + converted.update({ + 'label_id': cast(obj.label, int), + }) + return converted + + def _convert_mask_object(self, obj): + converted = self._convert_annotation(obj) + + if isinstance(obj, RleMask): + rle = obj.rle + else: + rle = mask_utils.encode( + np.require(obj.image, dtype=np.uint8, requirements='F')) + + converted.update({ + 'label_id': cast(obj.label, int), + 'rle': { + # serialize as compressed COCO mask + 'counts': rle['counts'].decode('ascii'), + 'size': list(int(c) for c in rle['size']), + }, + 'z_order': obj.z_order, + }) + return converted + + def _convert_shape_object(self, obj): + assert isinstance(obj, _Shape) + converted = self._convert_annotation(obj) + + converted.update({ + 'label_id': cast(obj.label, int), + 'points': [float(p) for p in obj.points], + 'z_order': obj.z_order, + }) + return converted + + def _convert_polyline_object(self, obj): + return self._convert_shape_object(obj) + + def _convert_polygon_object(self, obj): + return self._convert_shape_object(obj) + + def _convert_bbox_object(self, obj): + converted = self._convert_shape_object(obj) + converted.pop('points', None) + converted['bbox'] = [float(p) for p in obj.get_bbox()] + return converted + + def _convert_points_object(self, obj): + converted = self._convert_shape_object(obj) + + converted.update({ + 'visibility': [int(v.value) for v in obj.visibility], + }) + return converted + + def _convert_caption_object(self, obj): + converted = self._convert_annotation(obj) + + converted.update({ + 'caption': cast(obj.caption, str), + }) + return converted + + def _convert_label_categories(self, obj): + converted = { + 'labels': [], + } + for label in obj.items: + converted['labels'].append({ + 'name': cast(label.name, str), + 'parent': cast(label.parent, str), + }) + return converted + + def _convert_mask_categories(self, obj): + converted = { + 'colormap': [], + } + for label_id, color in obj.colormap.items(): + converted['colormap'].append({ + 'label_id': int(label_id), + 'r': int(color[0]), + 'g': int(color[1]), + 'b': int(color[2]), + }) + return converted + + def _convert_points_categories(self, obj): + converted = { + 'items': [], + } + for label_id, item in obj.items.items(): + converted['items'].append({ + 'label_id': int(label_id), + 'labels': [cast(label, str) for label in item.labels], + 'joints': [list(map(int, j)) for j in item.joints], + }) + return converted + +class DatumaroConverter(Converter): + DEFAULT_IMAGE_EXT = DatumaroPath.IMAGE_EXT + + def apply(self): + os.makedirs(self._save_dir, exist_ok=True) + + images_dir = osp.join(self._save_dir, DatumaroPath.IMAGES_DIR) + os.makedirs(images_dir, exist_ok=True) + self._images_dir = images_dir + + annotations_dir = osp.join(self._save_dir, DatumaroPath.ANNOTATIONS_DIR) + os.makedirs(annotations_dir, exist_ok=True) + self._annotations_dir = annotations_dir + + subsets = self._extractor.subsets() or [None] + subsets = [n or DEFAULT_SUBSET_NAME for n in subsets] + subsets = { name: _SubsetWriter(name, self) for name in subsets } + + for subset, writer in subsets.items(): + writer.write_categories(self._extractor.categories()) + + for item in self._extractor: + subset = item.subset or DEFAULT_SUBSET_NAME + writer = subsets[subset] + + writer.write_item(item) + + for subset, writer in subsets.items(): + writer.write(annotations_dir) + + def _save_image(self, item, path=None): + super()._save_image(item, + osp.join(self._images_dir, self._make_image_filename(item))) + +class DatumaroProjectConverter(Converter): + @classmethod + def convert(cls, extractor, save_dir, **kwargs): + os.makedirs(save_dir, exist_ok=True) + + from datumaro.components.project import Project + project = Project.generate(save_dir, + config=kwargs.pop('project_config', None)) + + DatumaroConverter.convert(extractor, + save_dir=osp.join( + project.config.project_dir, project.config.dataset_dir), + **kwargs) \ No newline at end of file diff --git a/datumaro/plugins/datumaro_format/extractor.py b/datumaro/plugins/datumaro_format/extractor.py new file mode 100644 index 0000000000..c1ae40d48a --- /dev/null +++ b/datumaro/plugins/datumaro_format/extractor.py @@ -0,0 +1,157 @@ + +# Copyright (C) 2019-2020 Intel Corporation +# +# SPDX-License-Identifier: MIT + +import json +import os.path as osp + +from datumaro.components.extractor import (SourceExtractor, DatasetItem, + AnnotationType, Label, RleMask, Points, Polygon, PolyLine, Bbox, Caption, + LabelCategories, MaskCategories, PointsCategories +) +from datumaro.util.image import Image + +from .format import DatumaroPath + + +class DatumaroExtractor(SourceExtractor): + def __init__(self, path): + assert osp.isfile(path), path + rootpath = '' + if path.endswith(osp.join(DatumaroPath.ANNOTATIONS_DIR, osp.basename(path))): + rootpath = path.rsplit(DatumaroPath.ANNOTATIONS_DIR, maxsplit=1)[0] + images_dir = '' + if rootpath and osp.isdir(osp.join(rootpath, DatumaroPath.IMAGES_DIR)): + images_dir = osp.join(rootpath, DatumaroPath.IMAGES_DIR) + self._images_dir = images_dir + + super().__init__(subset=osp.splitext(osp.basename(path))[0]) + + with open(path, 'r') as f: + parsed_anns = json.load(f) + self._categories = self._load_categories(parsed_anns) + self._items = self._load_items(parsed_anns) + + def categories(self): + return self._categories + + def __iter__(self): + for item in self._items: + yield item + + def __len__(self): + return len(self._items) + + @staticmethod + def _load_categories(parsed): + categories = {} + + parsed_label_cat = parsed['categories'].get(AnnotationType.label.name) + if parsed_label_cat: + label_categories = LabelCategories() + for item in parsed_label_cat['labels']: + label_categories.add(item['name'], parent=item['parent']) + + categories[AnnotationType.label] = label_categories + + parsed_mask_cat = parsed['categories'].get(AnnotationType.mask.name) + if parsed_mask_cat: + colormap = {} + for item in parsed_mask_cat['colormap']: + colormap[int(item['label_id'])] = \ + (item['r'], item['g'], item['b']) + + mask_categories = MaskCategories(colormap=colormap) + categories[AnnotationType.mask] = mask_categories + + parsed_points_cat = parsed['categories'].get(AnnotationType.points.name) + if parsed_points_cat: + point_categories = PointsCategories() + for item in parsed_points_cat['items']: + point_categories.add(int(item['label_id']), + item['labels'], joints=item['joints']) + + categories[AnnotationType.points] = point_categories + + return categories + + def _load_items(self, parsed): + items = [] + for item_desc in parsed['items']: + item_id = item_desc['id'] + + image = None + image_info = item_desc.get('image') + if image_info: + image_path = image_info.get('path') or \ + item_id + DatumaroPath.IMAGE_EXT + image_path = osp.join(self._images_dir, image_path) + image = Image(path=image_path, size=image_info.get('size')) + + annotations = self._load_annotations(item_desc) + + item = DatasetItem(id=item_id, subset=self._subset, + annotations=annotations, image=image, + attributes=item_desc.get('attr')) + + items.append(item) + + return items + + @staticmethod + def _load_annotations(item): + parsed = item['annotations'] + loaded = [] + + for ann in parsed: + ann_id = ann.get('id') + ann_type = AnnotationType[ann['type']] + attributes = ann.get('attributes') + group = ann.get('group') + + label_id = ann.get('label_id') + z_order = ann.get('z_order') + points = ann.get('points') + + if ann_type == AnnotationType.label: + loaded.append(Label(label=label_id, + id=ann_id, attributes=attributes, group=group)) + + elif ann_type == AnnotationType.mask: + rle = ann['rle'] + rle['counts'] = rle['counts'].encode('ascii') + loaded.append(RleMask(rle=rle, label=label_id, + id=ann_id, attributes=attributes, group=group, + z_order=z_order)) + + elif ann_type == AnnotationType.polyline: + loaded.append(PolyLine(points, label=label_id, + id=ann_id, attributes=attributes, group=group, + z_order=z_order)) + + elif ann_type == AnnotationType.polygon: + loaded.append(Polygon(points, label=label_id, + id=ann_id, attributes=attributes, group=group, + z_order=z_order)) + + elif ann_type == AnnotationType.bbox: + x, y, w, h = ann['bbox'] + loaded.append(Bbox(x, y, w, h, label=label_id, + id=ann_id, attributes=attributes, group=group, + z_order=z_order)) + + elif ann_type == AnnotationType.points: + loaded.append(Points(points, label=label_id, + id=ann_id, attributes=attributes, group=group, + z_order=z_order)) + + elif ann_type == AnnotationType.caption: + caption = ann.get('caption') + loaded.append(Caption(caption, + id=ann_id, attributes=attributes, group=group)) + + else: + raise NotImplementedError() + + return loaded diff --git a/datumaro/plugins/datumaro_format/format.py b/datumaro/plugins/datumaro_format/format.py new file mode 100644 index 0000000000..501c100b09 --- /dev/null +++ b/datumaro/plugins/datumaro_format/format.py @@ -0,0 +1,12 @@ + +# Copyright (C) 2019-2020 Intel Corporation +# +# SPDX-License-Identifier: MIT + +class DatumaroPath: + IMAGES_DIR = 'images' + ANNOTATIONS_DIR = 'annotations' + MASKS_DIR = 'masks' + + IMAGE_EXT = '.jpg' + MASK_EXT = '.png' diff --git a/datumaro/plugins/datumaro_format/importer.py b/datumaro/plugins/datumaro_format/importer.py new file mode 100644 index 0000000000..dbb90f8612 --- /dev/null +++ b/datumaro/plugins/datumaro_format/importer.py @@ -0,0 +1,56 @@ + +# Copyright (C) 2019-2020 Intel Corporation +# +# SPDX-License-Identifier: MIT + +from glob import glob +import logging as log +import os.path as osp + +from datumaro.components.extractor import Importer + +from .format import DatumaroPath + + +class DatumaroImporter(Importer): + EXTRACTOR_NAME = 'datumaro' + + @classmethod + def detect(cls, path): + return len(cls.find_subsets(path)) != 0 + + def __call__(self, path, **extra_params): + from datumaro.components.project import Project # cyclic import + project = Project() + + subset_paths = self.find_subsets(path) + if len(subset_paths) == 0: + raise Exception("Failed to find 'datumaro' dataset at '%s'" % path) + + for subset_path in subset_paths: + if not osp.isfile(subset_path): + continue + + log.info("Found a dataset at '%s'" % subset_path) + + subset_name = osp.splitext(osp.basename(subset_path))[0] + + project.add_source(subset_name, { + 'url': subset_path, + 'format': self.EXTRACTOR_NAME, + 'options': dict(extra_params), + }) + + return project + + @staticmethod + def find_subsets(path): + if path.endswith('.json') and osp.isfile(path): + subset_paths = [path] + else: + subset_paths = glob(osp.join(path, '*.json')) + + if osp.basename(osp.normpath(path)) != DatumaroPath.ANNOTATIONS_DIR: + path = osp.join(path, DatumaroPath.ANNOTATIONS_DIR) + subset_paths += glob(osp.join(path, '*.json')) + return subset_paths \ No newline at end of file diff --git a/datumaro/plugins/image_dir.py b/datumaro/plugins/image_dir.py new file mode 100644 index 0000000000..062387e10c --- /dev/null +++ b/datumaro/plugins/image_dir.py @@ -0,0 +1,76 @@ + +# Copyright (C) 2019-2020 Intel Corporation +# +# SPDX-License-Identifier: MIT + +import logging as log +import os +import os.path as osp + +from datumaro.components.extractor import DatasetItem, SourceExtractor, Importer +from datumaro.components.converter import Converter +from datumaro.util.image import Image + + +class ImageDirImporter(Importer): + EXTRACTOR_NAME = 'image_dir' + + def __call__(self, path, **extra_params): + from datumaro.components.project import Project # cyclic import + project = Project() + + if not osp.isdir(path): + raise Exception("Can't find a directory at '%s'" % path) + + source_name = osp.basename(osp.normpath(path)) + project.add_source(source_name, { + 'url': source_name, + 'format': self.EXTRACTOR_NAME, + 'options': dict(extra_params), + }) + + return project + + +class ImageDirExtractor(SourceExtractor): + def __init__(self, url): + super().__init__() + + assert osp.isdir(url), url + + items = [] + for dirpath, _, filenames in os.walk(url): + for name in filenames: + path = osp.join(dirpath, name) + try: + image = Image(path) + # force loading + image.data # pylint: disable=pointless-statement + except Exception: + continue + + item_id = osp.relpath(osp.splitext(path)[0], url) + items.append(DatasetItem(id=item_id, image=image)) + + self._items = items + + def __iter__(self): + for item in self._items: + yield item + + def __len__(self): + return len(self._items) + + +class ImageDirConverter(Converter): + DEFAULT_IMAGE_EXT = '.jpg' + + def apply(self): + os.makedirs(self._save_dir, exist_ok=True) + + for item in self._extractor: + if item.has_image: + self._save_image(item, + osp.join(self._save_dir, self._make_image_filename(item))) + else: + log.debug("Item '%s' has no image info", item.id) \ No newline at end of file diff --git a/datumaro/plugins/labelme_format.py b/datumaro/plugins/labelme_format.py new file mode 100644 index 0000000000..e037afbae9 --- /dev/null +++ b/datumaro/plugins/labelme_format.py @@ -0,0 +1,437 @@ +# Copyright (C) 2020 Intel Corporation +# +# SPDX-License-Identifier: MIT + +from collections import defaultdict +from defusedxml import ElementTree +import logging as log +import numpy as np +import os +import os.path as osp + +from datumaro.components.extractor import (SourceExtractor, DEFAULT_SUBSET_NAME, + DatasetItem, AnnotationType, Mask, Bbox, Polygon, LabelCategories +) +from datumaro.components.extractor import Importer +from datumaro.components.converter import Converter +from datumaro.util.image import Image, save_image +from datumaro.util.mask_tools import load_mask, find_mask_bbox + + +class LabelMePath: + MASKS_DIR = 'Masks' + IMAGE_EXT = '.jpg' + +class LabelMeExtractor(SourceExtractor): + def __init__(self, path, subset_name=None): + assert osp.isdir(path), path + super().__init__(subset=subset_name) + + items, categories = self._parse(path) + self._categories = categories + self._items = items + + def categories(self): + return self._categories + + def __iter__(self): + for item in self._items: + yield item + + def __len__(self): + return len(self._items) + + def _parse(self, path): + categories = { + AnnotationType.label: LabelCategories(attributes={ + 'occluded', 'username' + }) + } + + items = [] + for p in sorted(p for p in os.listdir(path) if p.endswith('.xml')): + root = ElementTree.parse(osp.join(path, p)) + + image_path = osp.join(path, root.find('filename').text) + image_size = None + imagesize_elem = root.find('imagesize') + if imagesize_elem is not None: + width_elem = imagesize_elem.find('ncols') + height_elem = imagesize_elem.find('nrows') + image_size = (int(height_elem.text), int(width_elem.text)) + image = Image(path=image_path, size=image_size) + + annotations = self._parse_annotations(root, path, categories) + + items.append(DatasetItem(id=osp.splitext(p)[0], + subset=self._subset, image=image, annotations=annotations)) + return items, categories + + @classmethod + def _parse_annotations(cls, xml_root, dataset_root, categories): + def parse_attributes(attr_str): + parsed = [] + if not attr_str: + return parsed + + for attr in [a.strip() for a in attr_str.split(',') if a.strip()]: + if '=' in attr: + name, value = attr.split('=', maxsplit=1) + if value.lower() in {'true', 'false'}: + value = value.lower() == 'true' + else: + try: + value = float(value) + except ValueError: + pass + parsed.append((name, value)) + else: + parsed.append((attr, True)) + + return parsed + + label_cat = categories[AnnotationType.label] + def get_label_id(label): + if not label: + return None + idx, _ = label_cat.find(label) + if idx is None: + idx = label_cat.add(label) + return idx + + image_annotations = [] + + parsed_annotations = dict() + group_assignments = dict() + root_annotations = set() + for obj_elem in xml_root.iter('object'): + obj_id = int(obj_elem.find('id').text) + + ann_items = [] + + label = get_label_id(obj_elem.find('name').text) + + attributes = [] + attributes_elem = obj_elem.find('attributes') + if attributes_elem is not None and attributes_elem.text: + attributes = parse_attributes(attributes_elem.text) + + occluded = False + occluded_elem = obj_elem.find('occluded') + if occluded_elem is not None and occluded_elem.text: + occluded = (occluded_elem.text == 'yes') + attributes.append(('occluded', occluded)) + + deleted = False + deleted_elem = obj_elem.find('deleted') + if deleted_elem is not None and deleted_elem.text: + deleted = bool(int(deleted_elem.text)) + + user = '' + + poly_elem = obj_elem.find('polygon') + segm_elem = obj_elem.find('segm') + type_elem = obj_elem.find('type') # the only value is 'bounding_box' + if poly_elem is not None: + user_elem = poly_elem.find('username') + if user_elem is not None and user_elem.text: + user = user_elem.text + attributes.append(('username', user)) + + points = [] + for point_elem in poly_elem.iter('pt'): + x = float(point_elem.find('x').text) + y = float(point_elem.find('y').text) + points.append(x) + points.append(y) + + if type_elem is not None and type_elem.text == 'bounding_box': + xmin = min(points[::2]) + xmax = max(points[::2]) + ymin = min(points[1::2]) + ymax = max(points[1::2]) + ann_items.append(Bbox(xmin, ymin, xmax - xmin, ymax - ymin, + label=label, attributes=attributes, id=obj_id, + )) + else: + ann_items.append(Polygon(points, + label=label, attributes=attributes, id=obj_id, + )) + elif segm_elem is not None: + user_elem = segm_elem.find('username') + if user_elem is not None and user_elem.text: + user = user_elem.text + attributes.append(('username', user)) + + mask_path = osp.join(dataset_root, LabelMePath.MASKS_DIR, + segm_elem.find('mask').text) + if not osp.isfile(mask_path): + raise Exception("Can't find mask at '%s'" % mask_path) + mask = load_mask(mask_path) + mask = np.any(mask, axis=2) + ann_items.append(Mask(image=mask, label=label, id=obj_id, + attributes=attributes)) + + if not deleted: + parsed_annotations[obj_id] = ann_items + + # Find parents and children + parts_elem = obj_elem.find('parts') + if parts_elem is not None: + children_ids = [] + hasparts_elem = parts_elem.find('hasparts') + if hasparts_elem is not None and hasparts_elem.text: + children_ids = [int(c) for c in hasparts_elem.text.split(',')] + + parent_ids = [] + ispartof_elem = parts_elem.find('ispartof') + if ispartof_elem is not None and ispartof_elem.text: + parent_ids = [int(c) for c in ispartof_elem.text.split(',')] + + if children_ids and not parent_ids and hasparts_elem.text: + root_annotations.add(obj_id) + group_assignments[obj_id] = [None, children_ids] + + # assign single group to all grouped annotations + current_group_id = 0 + annotations_to_visit = list(root_annotations) + while annotations_to_visit: + ann_id = annotations_to_visit.pop() + ann_assignment = group_assignments[ann_id] + group_id, children_ids = ann_assignment + if group_id: + continue + + if ann_id in root_annotations: + current_group_id += 1 # start a new group + + group_id = current_group_id + ann_assignment[0] = group_id + + # continue with children + annotations_to_visit.extend(children_ids) + + assert current_group_id == len(root_annotations) + + for ann_id, ann_items in parsed_annotations.items(): + group_id = 0 + if ann_id in group_assignments: + ann_assignment = group_assignments[ann_id] + group_id = ann_assignment[0] + + for ann_item in ann_items: + if group_id: + ann_item.group = group_id + + image_annotations.append(ann_item) + + return image_annotations + + +class LabelMeImporter(Importer): + _EXTRACTOR_NAME = 'label_me' + + @classmethod + def detect(cls, path): + if not osp.isdir(path): + return False + return len(cls.find_subsets(path)) != 0 + + def __call__(self, path, **extra_params): + from datumaro.components.project import Project # cyclic import + project = Project() + + subset_paths = self.find_subsets(path) + if len(subset_paths) == 0: + raise Exception("Failed to find 'label_me' dataset at '%s'" % path) + + for subset_path, subset_name in subset_paths: + params = {} + if subset_name: + params['subset_name'] = subset_name + params.update(extra_params) + + source_name = osp.splitext(osp.basename(subset_path))[0] + project.add_source(source_name, { + 'url': subset_path, + 'format': self._EXTRACTOR_NAME, + 'options': params, + }) + + return project + + @staticmethod + def find_subsets(path): + subset_paths = [] + if not osp.isdir(path): + raise Exception("Expected directory path, got '%s'" % path) + + path = osp.normpath(path) + + def has_annotations(d): + return len([p for p in os.listdir(d) if p.endswith('.xml')]) != 0 + + if has_annotations(path): + subset_paths = [(path, None)] + else: + for d in os.listdir(path): + subset = d + d = osp.join(path, d) + if osp.isdir(d) and has_annotations(d): + subset_paths.append((d, subset)) + return subset_paths + + +class LabelMeConverter(Converter): + DEFAULT_IMAGE_EXT = LabelMePath.IMAGE_EXT + + def apply(self): + for subset_name in self._extractor.subsets() or [None]: + if subset_name: + subset = self._extractor.get_subset(subset_name) + else: + subset_name = DEFAULT_SUBSET_NAME + subset = self._extractor + + subset_dir = osp.join(self._save_dir, subset_name) + os.makedirs(subset_dir, exist_ok=True) + os.makedirs(osp.join(subset_dir, LabelMePath.MASKS_DIR), + exist_ok=True) + + for item in subset: + self._save_item(item, subset_dir) + + def _get_label(self, label_id): + if label_id is None: + return '' + return self._extractor.categories()[AnnotationType.label] \ + .items[label_id].name + + def _save_item(self, item, subset_dir): + from lxml import etree as ET + + log.debug("Converting item '%s'", item.id) + + if '/' in item.id: + raise Exception("Can't export item '%s': " + "LabelMe format only supports flat image layout" % item.id) + + image_filename = self._make_image_filename(item) + if self._save_images: + if item.has_image and item.image.has_data: + self._save_image(item, osp.join(subset_dir, image_filename)) + else: + log.debug("Item '%s' has no image", item.id) + + root_elem = ET.Element('annotation') + ET.SubElement(root_elem, 'filename').text = image_filename + ET.SubElement(root_elem, 'folder').text = '' + + source_elem = ET.SubElement(root_elem, 'source') + ET.SubElement(source_elem, 'sourceImage').text = '' + ET.SubElement(source_elem, 'sourceAnnotation').text = 'Datumaro' + + if item.has_image: + image_elem = ET.SubElement(root_elem, 'imagesize') + image_size = item.image.size + ET.SubElement(image_elem, 'nrows').text = str(image_size[0]) + ET.SubElement(image_elem, 'ncols').text = str(image_size[1]) + + groups = defaultdict(list) + + obj_id = 0 + for ann in item.annotations: + if not ann.type in { AnnotationType.polygon, + AnnotationType.bbox, AnnotationType.mask }: + continue + + obj_elem = ET.SubElement(root_elem, 'object') + ET.SubElement(obj_elem, 'name').text = self._get_label(ann.label) + ET.SubElement(obj_elem, 'deleted').text = '0' + ET.SubElement(obj_elem, 'verified').text = '0' + ET.SubElement(obj_elem, 'occluded').text = \ + 'yes' if ann.attributes.pop('occluded', '') == True else 'no' + ET.SubElement(obj_elem, 'date').text = '' + ET.SubElement(obj_elem, 'id').text = str(obj_id) + + parts_elem = ET.SubElement(obj_elem, 'parts') + if ann.group: + groups[ann.group].append((obj_id, parts_elem)) + else: + ET.SubElement(parts_elem, 'hasparts').text = '' + ET.SubElement(parts_elem, 'ispartof').text = '' + + if ann.type == AnnotationType.bbox: + ET.SubElement(obj_elem, 'type').text = 'bounding_box' + + poly_elem = ET.SubElement(obj_elem, 'polygon') + x0, y0, x1, y1 = ann.points + points = [ (x0, y0), (x1, y0), (x1, y1), (x0, y1) ] + for x, y in points: + point_elem = ET.SubElement(poly_elem, 'pt') + ET.SubElement(point_elem, 'x').text = '%.2f' % x + ET.SubElement(point_elem, 'y').text = '%.2f' % y + + ET.SubElement(poly_elem, 'username').text = \ + str(ann.attributes.pop('username', '')) + elif ann.type == AnnotationType.polygon: + poly_elem = ET.SubElement(obj_elem, 'polygon') + for x, y in zip(ann.points[::2], ann.points[1::2]): + point_elem = ET.SubElement(poly_elem, 'pt') + ET.SubElement(point_elem, 'x').text = '%.2f' % x + ET.SubElement(point_elem, 'y').text = '%.2f' % y + + ET.SubElement(poly_elem, 'username').text = \ + str(ann.attributes.pop('username', '')) + elif ann.type == AnnotationType.mask: + mask_filename = '%s_mask_%s.png' % (item.id, obj_id) + save_image(osp.join(subset_dir, LabelMePath.MASKS_DIR, + mask_filename), + self._paint_mask(ann.image)) + + segm_elem = ET.SubElement(obj_elem, 'segm') + ET.SubElement(segm_elem, 'mask').text = mask_filename + + bbox = find_mask_bbox(ann.image) + box_elem = ET.SubElement(segm_elem, 'box') + ET.SubElement(box_elem, 'xmin').text = '%.2f' % bbox[0] + ET.SubElement(box_elem, 'ymin').text = '%.2f' % bbox[1] + ET.SubElement(box_elem, 'xmax').text = \ + '%.2f' % (bbox[0] + bbox[2]) + ET.SubElement(box_elem, 'ymax').text = \ + '%.2f' % (bbox[1] + bbox[3]) + + ET.SubElement(segm_elem, 'username').text = \ + str(ann.attributes.pop('username', '')) + else: + raise NotImplementedError("Unknown shape type '%s'" % ann.type) + + attrs = [] + for k, v in ann.attributes.items(): + attrs.append('%s=%s' % (k, v)) + ET.SubElement(obj_elem, 'attributes').text = ', '.join(attrs) + + obj_id += 1 + + for _, group in groups.items(): + leader_id, leader_parts_elem = group[0] + leader_parts = [str(o_id) for o_id, _ in group[1:]] + ET.SubElement(leader_parts_elem, 'hasparts').text = \ + ','.join(leader_parts) + ET.SubElement(leader_parts_elem, 'ispartof').text = '' + + for obj_id, parts_elem in group[1:]: + ET.SubElement(parts_elem, 'hasparts').text = '' + ET.SubElement(parts_elem, 'ispartof').text = str(leader_id) + + xml_path = osp.join(subset_dir, '%s.xml' % item.id) + with open(xml_path, 'w', encoding='utf-8') as f: + xml_data = ET.tostring(root_elem, encoding='unicode', + pretty_print=True) + f.write(xml_data) + + @staticmethod + def _paint_mask(mask): + # TODO: check if mask colors are random + return np.array([[0, 0, 0, 0], [255, 203, 0, 153]], + dtype=np.uint8)[mask.astype(np.uint8)] \ No newline at end of file diff --git a/datumaro/plugins/mot_format.py b/datumaro/plugins/mot_format.py new file mode 100644 index 0000000000..12d3d07c6c --- /dev/null +++ b/datumaro/plugins/mot_format.py @@ -0,0 +1,314 @@ +# Copyright (C) 2020 Intel Corporation +# +# SPDX-License-Identifier: MIT + +# The Multiple Object Tracking Benchmark challenge format support +# Format description: https://arxiv.org/pdf/1906.04567.pdf +# Another description: https://motchallenge.net/instructions + +from collections import OrderedDict +import csv +from enum import Enum +import logging as log +import os +import os.path as osp + +from datumaro.components.extractor import (SourceExtractor, + DatasetItem, AnnotationType, Bbox, LabelCategories +) +from datumaro.components.extractor import Importer +from datumaro.components.converter import Converter +from datumaro.util import cast +from datumaro.util.image import Image + + +MotLabel = Enum('MotLabel', [ + ('pedestrian', 1), + ('person on vehicle', 2), + ('car', 3), + ('bicycle', 4), + ('motorbike', 5), + ('non motorized vehicle', 6), + ('static person', 7), + ('distractor', 8), + ('occluder', 9), + ('occluder on the ground', 10), + ('occluder full', 11), + ('reflection', 12), +]) + +class MotPath: + IMAGE_DIR = 'img1' + SEQINFO_FILE = 'seqinfo.ini' + LABELS_FILE = 'labels.txt' + GT_FILENAME = 'gt.txt' + DET_FILENAME = 'det.txt' + + IMAGE_EXT = '.jpg' + + FIELDS = [ + 'frame_id', + 'track_id', + 'x', + 'y', + 'w', + 'h', + 'confidence', # or 'not ignored' flag for GT anns + 'class_id', + 'visibility' + ] + + +class MotSeqExtractor(SourceExtractor): + def __init__(self, path, labels=None, occlusion_threshold=0, is_gt=None): + super().__init__() + + assert osp.isfile(path) + seq_root = osp.dirname(osp.dirname(path)) + self._image_dir = '' + if osp.isdir(osp.join(seq_root, MotPath.IMAGE_DIR)): + self._image_dir = osp.join(seq_root, MotPath.IMAGE_DIR) + + seq_info = osp.join(seq_root, MotPath.SEQINFO_FILE) + if osp.isfile(seq_info): + seq_info = self._parse_seq_info(seq_info) + self._image_dir = osp.join(seq_root, seq_info['imdir']) + else: + seq_info = None + self._seq_info = seq_info + + self._occlusion_threshold = float(occlusion_threshold) + + assert is_gt in {None, True, False} + if is_gt is None: + if osp.basename(path) == MotPath.DET_FILENAME: + is_gt = False + else: + is_gt = True + self._is_gt = is_gt + + if labels is None: + labels = osp.join(osp.dirname(path), MotPath.LABELS_FILE) + if not osp.isfile(labels): + labels = [lbl.name for lbl in MotLabel] + if isinstance(labels, str): + labels = self._parse_labels(labels) + elif isinstance(labels, list): + assert all(isinstance(lbl, str) for lbl in labels), labels + else: + raise TypeError("Unexpected type of 'labels' argument: %s" % labels) + self._categories = self._load_categories(labels) + self._items = self._load_items(path) + + def categories(self): + return self._categories + + def __iter__(self): + for item in self._items.values(): + yield item + + def __len__(self): + return len(self._items) + + @staticmethod + def _parse_labels(path): + with open(path, encoding='utf-8') as labels_file: + return [s.strip() for s in labels_file] + + def _load_categories(self, labels): + attributes = ['track_id'] + if self._is_gt: + attributes += ['occluded', 'visibility', 'ignored'] + else: + attributes += ['score'] + label_cat = LabelCategories(attributes=attributes) + for label in labels: + label_cat.add(label) + + return { AnnotationType.label: label_cat } + + def _load_items(self, path): + labels_count = len(self._categories[AnnotationType.label].items) + items = OrderedDict() + + if self._seq_info: + for frame_id in range(self._seq_info['seqlength']): + items[frame_id] = DatasetItem( + id=frame_id, + subset=self._subset, + image=Image( + path=osp.join(self._image_dir, + '%06d%s' % (frame_id, self._seq_info['imext'])), + size=(self._seq_info['imheight'], self._seq_info['imwidth']) + ) + ) + elif osp.isdir(self._image_dir): + for p in os.listdir(self._image_dir): + if p.endswith(MotPath.IMAGE_EXT): + frame_id = int(osp.splitext(p)[0]) + items[frame_id] = DatasetItem( + id=frame_id, + subset=self._subset, + image=osp.join(self._image_dir, p), + ) + + with open(path, newline='', encoding='utf-8') as csv_file: + # NOTE: Different MOT files have different count of fields + # (7, 9 or 10). This is handled by reader: + # - all extra fields go to a separate field + # - all unmet fields have None values + for row in csv.DictReader(csv_file, fieldnames=MotPath.FIELDS): + frame_id = int(row['frame_id']) + item = items.get(frame_id) + if item is None: + item = DatasetItem(id=frame_id, subset=self._subset) + annotations = item.annotations + + x, y = float(row['x']), float(row['y']) + w, h = float(row['w']), float(row['h']) + label_id = row.get('class_id') + if label_id and label_id != '-1': + label_id = int(label_id) - 1 + assert label_id < labels_count, label_id + else: + label_id = None + + attributes = {} + + # Annotations for detection task are not related to any track + track_id = int(row['track_id']) + if 0 < track_id: + attributes['track_id'] = track_id + + confidence = cast(row.get('confidence'), float, 1) + visibility = cast(row.get('visibility'), float, 1) + if self._is_gt: + attributes['visibility'] = visibility + attributes['occluded'] = \ + visibility <= self._occlusion_threshold + attributes['ignored'] = confidence == 0 + else: + attributes['score'] = float(confidence) + + annotations.append(Bbox(x, y, w, h, label=label_id, + attributes=attributes)) + + items[frame_id] = item + return items + + @classmethod + def _parse_seq_info(cls, path): + fields = {} + with open(path, encoding='utf-8') as f: + for line in f: + entry = line.lower().strip().split('=', maxsplit=1) + if len(entry) == 2: + fields[entry[0]] = entry[1] + cls._check_seq_info(fields) + for k in { 'framerate', 'seqlength', 'imwidth', 'imheight' }: + fields[k] = int(fields[k]) + return fields + + @staticmethod + def _check_seq_info(seq_info): + assert set(seq_info) == {'name', 'imdir', 'framerate', 'seqlength', 'imwidth', 'imheight', 'imext'}, seq_info + +class MotSeqImporter(Importer): + _EXTRACTOR_NAME = 'mot_seq' + + @classmethod + def detect(cls, path): + return len(cls.find_subsets(path)) != 0 + + def __call__(self, path, **extra_params): + from datumaro.components.project import Project # cyclic import + project = Project() + + subsets = self.find_subsets(path) + if len(subsets) == 0: + raise Exception("Failed to find 'mot' dataset at '%s'" % path) + + for ann_file in subsets: + log.info("Found a dataset at '%s'" % ann_file) + + source_name = osp.splitext(osp.basename(ann_file))[0] + project.add_source(source_name, { + 'url': ann_file, + 'format': self._EXTRACTOR_NAME, + 'options': extra_params, + }) + + return project + + @staticmethod + def find_subsets(path): + subsets = [] + if path.endswith('.txt') and osp.isfile(path): + subsets = [path] + elif osp.isdir(path): + p = osp.join(path, 'gt', MotPath.GT_FILENAME) + if osp.isfile(p): + subsets.append(p) + return subsets + +class MotSeqGtConverter(Converter): + DEFAULT_IMAGE_EXT = MotPath.IMAGE_EXT + + def apply(self): + extractor = self._extractor + + images_dir = osp.join(self._save_dir, MotPath.IMAGE_DIR) + os.makedirs(images_dir, exist_ok=True) + self._images_dir = images_dir + + anno_dir = osp.join(self._save_dir, 'gt') + os.makedirs(anno_dir, exist_ok=True) + anno_file = osp.join(anno_dir, MotPath.GT_FILENAME) + with open(anno_file, 'w', encoding="utf-8") as csv_file: + writer = csv.DictWriter(csv_file, fieldnames=MotPath.FIELDS) + + track_id_mapping = {-1: -1} + for idx, item in enumerate(extractor): + log.debug("Converting item '%s'", item.id) + + frame_id = cast(item.id, int, 1 + idx) + + for anno in item.annotations: + if anno.type != AnnotationType.bbox: + continue + + track_id = int(anno.attributes.get('track_id', -1)) + if track_id not in track_id_mapping: + track_id_mapping[track_id] = len(track_id_mapping) + track_id = track_id_mapping[track_id] + + writer.writerow({ + 'frame_id': frame_id, + 'track_id': track_id, + 'x': anno.x, + 'y': anno.y, + 'w': anno.w, + 'h': anno.h, + 'confidence': int(anno.attributes.get('ignored') != True), + 'class_id': 1 + cast(anno.label, int, -2), + 'visibility': float( + anno.attributes.get('visibility', + 1 - float( + anno.attributes.get('occluded', False) + ) + ) + ) + }) + + if self._save_images: + if item.has_image and item.image.has_data: + self._save_image(item, osp.join(self._images_dir, + '%06d%s' % (frame_id, self._find_image_ext(item)))) + else: + log.debug("Item '%s' has no image", item.id) + + labels_file = osp.join(anno_dir, MotPath.LABELS_FILE) + with open(labels_file, 'w', encoding='utf-8') as f: + f.write('\n'.join(l.name + for l in extractor.categories()[AnnotationType.label].items) + ) diff --git a/datumaro/plugins/openvino_launcher.py b/datumaro/plugins/openvino_launcher.py new file mode 100644 index 0000000000..abdaa0fcae --- /dev/null +++ b/datumaro/plugins/openvino_launcher.py @@ -0,0 +1,188 @@ + +# Copyright (C) 2019-2020 Intel Corporation +# +# SPDX-License-Identifier: MIT + +# pylint: disable=exec-used + +import cv2 +import logging as log +import numpy as np +import os.path as osp +import shutil + +from openvino.inference_engine import IECore + +from datumaro.components.cli_plugin import CliPlugin +from datumaro.components.launcher import Launcher + + +class OpenVinoImporter(CliPlugin): + @classmethod + def build_cmdline_parser(cls, **kwargs): + parser = super().build_cmdline_parser(**kwargs) + parser.add_argument('-d', '--description', required=True, + help="Path to the model description file (.xml)") + parser.add_argument('-w', '--weights', required=True, + help="Path to the model weights file (.bin)") + parser.add_argument('-i', '--interpreter', required=True, + help="Path to the network output interprter script (.py)") + parser.add_argument('--device', default='CPU', + help="Target device (default: %(default)s)") + return parser + + @staticmethod + def copy_model(model_dir, model): + shutil.copy(model['description'], + osp.join(model_dir, osp.basename(model['description']))) + model['description'] = osp.basename(model['description']) + + shutil.copy(model['weights'], + osp.join(model_dir, osp.basename(model['weights']))) + model['weights'] = osp.basename(model['weights']) + + shutil.copy(model['interpreter'], + osp.join(model_dir, osp.basename(model['interpreter']))) + model['interpreter'] = osp.basename(model['interpreter']) + + +class InterpreterScript: + def __init__(self, path): + with open(path, 'r') as f: + script = f.read() + + context = {} + exec(script, context, context) + + process_outputs = context.get('process_outputs') + if not callable(process_outputs): + raise Exception("Can't find 'process_outputs' function in " + "the interpreter script") + self.__dict__['process_outputs'] = process_outputs + + get_categories = context.get('get_categories') + assert get_categories is None or callable(get_categories) + if get_categories: + self.__dict__['get_categories'] = get_categories + + @staticmethod + def get_categories(): + return None + + @staticmethod + def process_outputs(inputs, outputs): + raise NotImplementedError( + "Function should be implemented in the interpreter script") + + +class OpenVinoLauncher(Launcher): + cli_plugin = OpenVinoImporter + + def __init__(self, description, weights, interpreter, + plugins_path=None, device=None, model_dir=None): + model_dir = model_dir or '' + if not osp.isfile(description): + description = osp.join(model_dir, description) + if not osp.isfile(description): + raise Exception('Failed to open model description file "%s"' % \ + (description)) + + if not osp.isfile(weights): + weights = osp.join(model_dir, weights) + if not osp.isfile(weights): + raise Exception('Failed to open model weights file "%s"' % \ + (weights)) + + if not osp.isfile(interpreter): + interpreter = osp.join(model_dir, interpreter) + if not osp.isfile(interpreter): + raise Exception('Failed to open model interpreter script file "%s"' % \ + (interpreter)) + + self._interpreter = InterpreterScript(interpreter) + + self._device = device or 'CPU' + + self._ie = IECore() + if hasattr(self._ie, 'read_network'): + self._network = self._ie.read_network(description, weights) + else: # backward compatibility + from openvino.inference_engine import IENetwork + self._network = IENetwork.from_ir(description, weights) + self._check_model_support(self._network, self._device) + self._load_executable_net() + + def _check_model_support(self, net, device): + supported_layers = set(self._ie.query_network(net, device)) + not_supported_layers = set(net.layers) - supported_layers + if len(not_supported_layers) != 0: + log.error("The following layers are not supported " \ + "by the plugin for device '%s': %s." % \ + (device, ', '.join(not_supported_layers))) + raise NotImplementedError( + "Some layers are not supported on the device") + + def _load_executable_net(self, batch_size=1): + network = self._network + + iter_inputs = iter(network.inputs) + self._input_blob_name = next(iter_inputs) + self._output_blob_name = next(iter(network.outputs)) + + # NOTE: handling for the inclusion of `image_info` in OpenVino2019 + self._require_image_info = 'image_info' in network.inputs + if self._input_blob_name == 'image_info': + self._input_blob_name = next(iter_inputs) + + input_type = network.inputs[self._input_blob_name] + self._input_layout = input_type if isinstance(input_type, list) else input_type.shape + + self._input_layout[0] = batch_size + network.reshape({self._input_blob_name: self._input_layout}) + self._batch_size = batch_size + + self._net = self._ie.load_network(network=network, num_requests=1, + device_name=self._device) + + def infer(self, inputs): + assert len(inputs.shape) == 4, \ + "Expected an input image in (N, H, W, C) format, got %s" % \ + (inputs.shape) + assert inputs.shape[3] == 3, "Expected BGR input, got %s" % inputs.shape + + n, c, h, w = self._input_layout + if inputs.shape[1:3] != (h, w): + resized_inputs = np.empty((n, h, w, c), dtype=inputs.dtype) + for inp, resized_input in zip(inputs, resized_inputs): + cv2.resize(inp, (w, h), resized_input) + inputs = resized_inputs + inputs = inputs.transpose((0, 3, 1, 2)) # NHWC to NCHW + inputs = {self._input_blob_name: inputs} + if self._require_image_info: + info = np.zeros([1, 3]) + info[0, 0] = h + info[0, 1] = w + info[0, 2] = 1.0 # scale + inputs['image_info'] = info + + results = self._net.infer(inputs) + if len(results) == 1: + return results[self._output_blob_name] + else: + return results + + def launch(self, inputs): + batch_size = len(inputs) + if self._batch_size < batch_size: + self._load_executable_net(batch_size) + + outputs = self.infer(inputs) + results = self.process_outputs(inputs, outputs) + return results + + def categories(self): + return self._interpreter.get_categories() + + def process_outputs(self, inputs, outputs): + return self._interpreter.process_outputs(inputs, outputs) + diff --git a/datumaro/plugins/tf_detection_api_format/__init__.py b/datumaro/plugins/tf_detection_api_format/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/datumaro/plugins/tf_detection_api_format/converter.py b/datumaro/plugins/tf_detection_api_format/converter.py new file mode 100644 index 0000000000..a178bdba40 --- /dev/null +++ b/datumaro/plugins/tf_detection_api_format/converter.py @@ -0,0 +1,217 @@ + +# Copyright (C) 2019-2020 Intel Corporation +# +# SPDX-License-Identifier: MIT + +import codecs +from collections import OrderedDict +import hashlib +import logging as log +import os +import os.path as osp +import string + +from datumaro.components.extractor import (AnnotationType, DEFAULT_SUBSET_NAME, + LabelCategories +) +from datumaro.components.converter import Converter +from datumaro.util.image import encode_image +from datumaro.util.annotation_util import (max_bbox, + find_group_leader, find_instances) +from datumaro.util.mask_tools import merge_masks +from datumaro.util.tf_util import import_tf as _import_tf + +from .format import DetectionApiPath +tf = _import_tf() + + +# filter out non-ASCII characters, otherwise training will crash +_printable = set(string.printable) +def _make_printable(s): + return ''.join(filter(lambda x: x in _printable, s)) + +def int64_feature(value): + return tf.train.Feature(int64_list=tf.train.Int64List(value=[value])) + +def int64_list_feature(value): + return tf.train.Feature(int64_list=tf.train.Int64List(value=value)) + +def bytes_feature(value): + return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value])) + +def bytes_list_feature(value): + return tf.train.Feature(bytes_list=tf.train.BytesList(value=value)) + +def float_list_feature(value): + return tf.train.Feature(float_list=tf.train.FloatList(value=value)) + +class TfDetectionApiConverter(Converter): + DEFAULT_IMAGE_EXT = DetectionApiPath.DEFAULT_IMAGE_EXT + + @classmethod + def build_cmdline_parser(cls, **kwargs): + parser = super().build_cmdline_parser(**kwargs) + parser.add_argument('--save-masks', action='store_true', + help="Include instance masks (default: %(default)s)") + return parser + + def __init__(self, extractor, save_dir, save_masks=False, **kwargs): + super().__init__(extractor, save_dir, **kwargs) + + self._save_masks = save_masks + + def apply(self): + os.makedirs(self._save_dir, exist_ok=True) + + label_categories = self._extractor.categories().get(AnnotationType.label, + LabelCategories()) + get_label = lambda label_id: label_categories.items[label_id].name \ + if label_id is not None else '' + label_ids = OrderedDict((label.name, 1 + idx) + for idx, label in enumerate(label_categories.items)) + map_label_id = lambda label_id: label_ids.get(get_label(label_id), 0) + self._get_label = get_label + self._get_label_id = map_label_id + + subsets = self._extractor.subsets() + if len(subsets) == 0: + subsets = [ None ] + + for subset_name in subsets: + if subset_name: + subset = self._extractor.get_subset(subset_name) + else: + subset_name = DEFAULT_SUBSET_NAME + subset = self._extractor + + labelmap_path = osp.join(self._save_dir, DetectionApiPath.LABELMAP_FILE) + with codecs.open(labelmap_path, 'w', encoding='utf8') as f: + for label, idx in label_ids.items(): + f.write( + 'item {\n' + + ('\tid: %s\n' % (idx)) + + ("\tname: '%s'\n" % (label)) + + '}\n\n' + ) + + anno_path = osp.join(self._save_dir, '%s.tfrecord' % (subset_name)) + with tf.io.TFRecordWriter(anno_path) as writer: + for item in subset: + tf_example = self._make_tf_example(item) + writer.write(tf_example.SerializeToString()) + + @staticmethod + def _find_instances(annotations): + return find_instances(a for a in annotations + if a.type in { AnnotationType.bbox, AnnotationType.mask }) + + def _find_instance_parts(self, group, img_width, img_height): + boxes = [a for a in group if a.type == AnnotationType.bbox] + masks = [a for a in group if a.type == AnnotationType.mask] + + anns = boxes + masks + leader = find_group_leader(anns) + bbox = max_bbox(anns) + + mask = None + if self._save_masks: + mask = merge_masks([m.image for m in masks]) + + return [leader, mask, bbox] + + def _export_instances(self, instances, width, height): + xmins = [] # List of normalized left x coordinates of bounding boxes (1 per box) + xmaxs = [] # List of normalized right x coordinates of bounding boxes (1 per box) + ymins = [] # List of normalized top y coordinates of bounding boxes (1 per box) + ymaxs = [] # List of normalized bottom y coordinates of bounding boxes (1 per box) + classes_text = [] # List of class names of bounding boxes (1 per box) + classes = [] # List of class ids of bounding boxes (1 per box) + masks = [] # List of PNG-encoded instance masks (1 per box) + + for leader, mask, box in instances: + label = _make_printable(self._get_label(leader.label)) + classes_text.append(label.encode('utf-8')) + classes.append(self._get_label_id(leader.label)) + + xmins.append(box[0] / width) + xmaxs.append((box[0] + box[2]) / width) + ymins.append(box[1] / height) + ymaxs.append((box[1] + box[3]) / height) + + if self._save_masks: + if mask is not None: + mask = encode_image(mask, '.png') + else: + mask = b'' + masks.append(mask) + + result = {} + if classes: + result = { + 'image/object/bbox/xmin': float_list_feature(xmins), + 'image/object/bbox/xmax': float_list_feature(xmaxs), + 'image/object/bbox/ymin': float_list_feature(ymins), + 'image/object/bbox/ymax': float_list_feature(ymaxs), + 'image/object/class/text': bytes_list_feature(classes_text), + 'image/object/class/label': int64_list_feature(classes), + } + if masks: + result['image/object/mask'] = bytes_list_feature(masks) + return result + + def _make_tf_example(self, item): + features = { + 'image/source_id': bytes_feature( + str(item.attributes.get('source_id') or '').encode('utf-8') + ), + } + + filename = self._make_image_filename(item) + features['image/filename'] = bytes_feature(filename.encode('utf-8')) + + if not item.has_image: + raise Exception("Failed to export dataset item '%s': " + "item has no image info" % item.id) + height, width = item.image.size + + features.update({ + 'image/height': int64_feature(height), + 'image/width': int64_feature(width), + }) + + features.update({ + 'image/encoded': bytes_feature(b''), + 'image/format': bytes_feature(b''), + 'image/key/sha256': bytes_feature(b''), + }) + if self._save_images: + if item.has_image and item.image.has_data: + buffer, fmt = self._save_image(item, filename) + key = hashlib.sha256(buffer).hexdigest() + + features.update({ + 'image/encoded': bytes_feature(buffer), + 'image/format': bytes_feature(fmt.encode('utf-8')), + 'image/key/sha256': bytes_feature(key.encode('utf8')), + }) + else: + log.warning("Item '%s' has no image" % item.id) + + instances = self._find_instances(item.annotations) + instances = [self._find_instance_parts(i, width, height) for i in instances] + features.update(self._export_instances(instances, width, height)) + + tf_example = tf.train.Example( + features=tf.train.Features(feature=features)) + + return tf_example + + def _save_image(self, item, path=None): + dst_ext = osp.splitext(osp.basename(path))[1] + fmt = DetectionApiPath.IMAGE_EXT_FORMAT.get(dst_ext) + if not fmt: + log.warning("Item '%s': can't find format string for the '%s' " + "image extension, the corresponding field will be empty." % \ + (item.id, dst_ext)) + buffer = encode_image(item.image.data, dst_ext) + return buffer, fmt \ No newline at end of file diff --git a/datumaro/plugins/tf_detection_api_format/extractor.py b/datumaro/plugins/tf_detection_api_format/extractor.py new file mode 100644 index 0000000000..6962d3c0e6 --- /dev/null +++ b/datumaro/plugins/tf_detection_api_format/extractor.py @@ -0,0 +1,195 @@ + +# Copyright (C) 2019-2020 Intel Corporation +# +# SPDX-License-Identifier: MIT + +from collections import OrderedDict +import numpy as np +import os.path as osp +import re + +from datumaro.components.extractor import (SourceExtractor, DatasetItem, + AnnotationType, Bbox, Mask, LabelCategories +) +from datumaro.util.image import Image, decode_image, lazy_image +from datumaro.util.tf_util import import_tf as _import_tf + +from .format import DetectionApiPath +tf = _import_tf() + + +def clamp(value, _min, _max): + return max(min(_max, value), _min) + +class TfDetectionApiExtractor(SourceExtractor): + def __init__(self, path): + assert osp.isfile(path), path + images_dir = '' + root_dir = osp.dirname(osp.abspath(path)) + if osp.basename(root_dir) == DetectionApiPath.ANNOTATIONS_DIR: + root_dir = osp.dirname(root_dir) + images_dir = osp.join(root_dir, DetectionApiPath.IMAGES_DIR) + if not osp.isdir(images_dir): + images_dir = '' + + super().__init__(subset=osp.splitext(osp.basename(path))[0]) + + items, labels = self._parse_tfrecord_file(path, self._subset, images_dir) + self._items = items + self._categories = self._load_categories(labels) + + def categories(self): + return self._categories + + def __iter__(self): + for item in self._items: + yield item + + def __len__(self): + return len(self._items) + + @staticmethod + def _load_categories(labels): + label_categories = LabelCategories() + labels = sorted(labels.items(), key=lambda item: item[1]) + for label, _ in labels: + label_categories.add(label) + return { + AnnotationType.label: label_categories + } + + @classmethod + def _parse_labelmap(cls, text): + id_pattern = r'(?:id\s*:\s*(?P\d+))' + name_pattern = r'(?:name\s*:\s*[\'\"](?P.*?)[\'\"])' + entry_pattern = r'(\{(?:[\s\n]*(?:%(id)s|%(name)s)[\s\n]*){2}\})+' % \ + {'id': id_pattern, 'name': name_pattern} + matches = re.finditer(entry_pattern, text) + + labelmap = {} + for match in matches: + label_id = match.group('id') + label_name = match.group('name') + if label_id is not None and label_name is not None: + labelmap[label_name] = int(label_id) + + return labelmap + + @classmethod + def _parse_tfrecord_file(cls, filepath, subset, images_dir): + dataset = tf.data.TFRecordDataset(filepath) + features = { + 'image/filename': tf.io.FixedLenFeature([], tf.string), + 'image/source_id': tf.io.FixedLenFeature([], tf.string), + 'image/height': tf.io.FixedLenFeature([], tf.int64), + 'image/width': tf.io.FixedLenFeature([], tf.int64), + 'image/encoded': tf.io.FixedLenFeature([], tf.string), + 'image/format': tf.io.FixedLenFeature([], tf.string), + + # use varlen to avoid errors when this field is missing + 'image/key/sha256': tf.io.VarLenFeature(tf.string), + + # Object boxes and classes. + 'image/object/bbox/xmin': tf.io.VarLenFeature(tf.float32), + 'image/object/bbox/xmax': tf.io.VarLenFeature(tf.float32), + 'image/object/bbox/ymin': tf.io.VarLenFeature(tf.float32), + 'image/object/bbox/ymax': tf.io.VarLenFeature(tf.float32), + 'image/object/class/label': tf.io.VarLenFeature(tf.int64), + 'image/object/class/text': tf.io.VarLenFeature(tf.string), + 'image/object/mask': tf.io.VarLenFeature(tf.string), + } + + dataset_labels = OrderedDict() + labelmap_path = osp.join(osp.dirname(filepath), + DetectionApiPath.LABELMAP_FILE) + if osp.exists(labelmap_path): + with open(labelmap_path, 'r', encoding='utf-8') as f: + labelmap_text = f.read() + dataset_labels.update({ label: id - 1 + for label, id in cls._parse_labelmap(labelmap_text).items() + }) + + dataset_items = [] + + for record in dataset: + parsed_record = tf.io.parse_single_example(record, features) + frame_id = parsed_record['image/source_id'].numpy().decode('utf-8') + frame_filename = \ + parsed_record['image/filename'].numpy().decode('utf-8') + frame_height = tf.cast( + parsed_record['image/height'], tf.int64).numpy().item() + frame_width = tf.cast( + parsed_record['image/width'], tf.int64).numpy().item() + frame_image = parsed_record['image/encoded'].numpy() + xmins = tf.sparse.to_dense( + parsed_record['image/object/bbox/xmin']).numpy() + ymins = tf.sparse.to_dense( + parsed_record['image/object/bbox/ymin']).numpy() + xmaxs = tf.sparse.to_dense( + parsed_record['image/object/bbox/xmax']).numpy() + ymaxs = tf.sparse.to_dense( + parsed_record['image/object/bbox/ymax']).numpy() + label_ids = tf.sparse.to_dense( + parsed_record['image/object/class/label']).numpy() + labels = tf.sparse.to_dense( + parsed_record['image/object/class/text'], + default_value=b'').numpy() + masks = tf.sparse.to_dense( + parsed_record['image/object/mask'], + default_value=b'').numpy() + + for label, label_id in zip(labels, label_ids): + label = label.decode('utf-8') + if not label: + continue + if label_id <= 0: + continue + if label in dataset_labels: + continue + dataset_labels[label] = label_id - 1 + + item_id = osp.splitext(frame_filename)[0] + + annotations = [] + for shape_id, shape in enumerate( + np.dstack((labels, xmins, ymins, xmaxs, ymaxs))[0]): + label = shape[0].decode('utf-8') + + mask = None + if len(masks) != 0: + mask = masks[shape_id] + + if mask is not None: + if isinstance(mask, bytes): + mask = lazy_image(mask, decode_image) + annotations.append(Mask(image=mask, + label=dataset_labels.get(label) + )) + else: + x = clamp(shape[1] * frame_width, 0, frame_width) + y = clamp(shape[2] * frame_height, 0, frame_height) + w = clamp(shape[3] * frame_width, 0, frame_width) - x + h = clamp(shape[4] * frame_height, 0, frame_height) - y + annotations.append(Bbox(x, y, w, h, + label=dataset_labels.get(label) + )) + + image_size = None + if frame_height and frame_width: + image_size = (frame_height, frame_width) + + image_params = {} + if frame_image: + image_params['data'] = lazy_image(frame_image, decode_image) + if frame_filename: + image_params['path'] = osp.join(images_dir, frame_filename) + + image = None + if image_params: + image = Image(**image_params, size=image_size) + + dataset_items.append(DatasetItem(id=item_id, subset=subset, + image=image, annotations=annotations, + attributes={'source_id': frame_id})) + + return dataset_items, dataset_labels diff --git a/datumaro/plugins/tf_detection_api_format/format.py b/datumaro/plugins/tf_detection_api_format/format.py new file mode 100644 index 0000000000..f4a879a692 --- /dev/null +++ b/datumaro/plugins/tf_detection_api_format/format.py @@ -0,0 +1,13 @@ + +# Copyright (C) 2019-2020 Intel Corporation +# +# SPDX-License-Identifier: MIT + +class DetectionApiPath: + IMAGES_DIR = 'images' + ANNOTATIONS_DIR = 'annotations' + + DEFAULT_IMAGE_EXT = '.jpg' + IMAGE_EXT_FORMAT = {'.jpg': 'jpeg', '.png': 'png'} + + LABELMAP_FILE = 'label_map.pbtxt' \ No newline at end of file diff --git a/datumaro/plugins/tf_detection_api_format/importer.py b/datumaro/plugins/tf_detection_api_format/importer.py new file mode 100644 index 0000000000..b3d8a47d8e --- /dev/null +++ b/datumaro/plugins/tf_detection_api_format/importer.py @@ -0,0 +1,52 @@ + +# Copyright (C) 2019-2020 Intel Corporation +# +# SPDX-License-Identifier: MIT + +from glob import glob +import logging as log +import os.path as osp + +from datumaro.components.extractor import Importer + + +class TfDetectionApiImporter(Importer): + EXTRACTOR_NAME = 'tf_detection_api' + + @classmethod + def detect(cls, path): + return len(cls.find_subsets(path)) != 0 + + def __call__(self, path, **extra_params): + from datumaro.components.project import Project # cyclic import + project = Project() + + subset_paths = self.find_subsets(path) + if len(subset_paths) == 0: + raise Exception( + "Failed to find 'tf_detection_api' dataset at '%s'" % path) + + for subset_path in subset_paths: + if not osp.isfile(subset_path): + continue + + log.info("Found a dataset at '%s'" % subset_path) + + subset_name = osp.splitext(osp.basename(subset_path))[0] + + project.add_source(subset_name, { + 'url': subset_path, + 'format': self.EXTRACTOR_NAME, + 'options': dict(extra_params), + }) + + return project + + @staticmethod + def find_subsets(path): + if path.endswith('.tfrecord') and osp.isfile(path): + subset_paths = [path] + else: + subset_paths = glob(osp.join(path, '**', '*.tfrecord'), + recursive=True) + return subset_paths \ No newline at end of file diff --git a/datumaro/plugins/transforms.py b/datumaro/plugins/transforms.py new file mode 100644 index 0000000000..7e7cea8bad --- /dev/null +++ b/datumaro/plugins/transforms.py @@ -0,0 +1,524 @@ +# Copyright (C) 2020 Intel Corporation +# +# SPDX-License-Identifier: MIT + +from enum import Enum +import logging as log +import os.path as osp +import random +import re + +import pycocotools.mask as mask_utils + +from datumaro.components.extractor import (Transform, AnnotationType, + RleMask, Polygon, Bbox, + LabelCategories, MaskCategories, PointsCategories +) +from datumaro.components.cli_plugin import CliPlugin +import datumaro.util.mask_tools as mask_tools +from datumaro.util.annotation_util import find_group_leader, find_instances + + +class CropCoveredSegments(Transform, CliPlugin): + def transform_item(self, item): + annotations = [] + segments = [] + for ann in item.annotations: + if ann.type in {AnnotationType.polygon, AnnotationType.mask}: + segments.append(ann) + else: + annotations.append(ann) + if not segments: + return item + + if not item.has_image: + raise Exception("Image info is required for this transform") + h, w = item.image.size + segments = self.crop_segments(segments, w, h) + + annotations += segments + return self.wrap_item(item, annotations=annotations) + + @classmethod + def crop_segments(cls, segment_anns, img_width, img_height): + segment_anns = sorted(segment_anns, key=lambda x: x.z_order) + + segments = [] + for s in segment_anns: + if s.type == AnnotationType.polygon: + segments.append(s.points) + elif s.type == AnnotationType.mask: + if isinstance(s, RleMask): + rle = s.rle + else: + rle = mask_tools.mask_to_rle(s.image) + segments.append(rle) + + segments = mask_tools.crop_covered_segments( + segments, img_width, img_height) + + new_anns = [] + for ann, new_segment in zip(segment_anns, segments): + fields = {'z_order': ann.z_order, 'label': ann.label, + 'id': ann.id, 'group': ann.group, 'attributes': ann.attributes + } + if ann.type == AnnotationType.polygon: + if fields['group'] is None: + fields['group'] = cls._make_group_id( + segment_anns + new_anns, fields['id']) + for polygon in new_segment: + new_anns.append(Polygon(points=polygon, **fields)) + else: + rle = mask_tools.mask_to_rle(new_segment) + rle = mask_utils.frPyObjects(rle, *rle['size']) + new_anns.append(RleMask(rle=rle, **fields)) + + return new_anns + + @staticmethod + def _make_group_id(anns, ann_id): + if ann_id: + return ann_id + max_gid = max(anns, default=0, key=lambda x: x.group) + return max_gid + 1 + +class MergeInstanceSegments(Transform, CliPlugin): + """ + Replaces instance masks and, optionally, polygons with a single mask. + """ + + @classmethod + def build_cmdline_parser(cls, **kwargs): + parser = super().build_cmdline_parser(**kwargs) + parser.add_argument('--include-polygons', action='store_true', + help="Include polygons") + return parser + + def __init__(self, extractor, include_polygons=False): + super().__init__(extractor) + + self._include_polygons = include_polygons + + def transform_item(self, item): + annotations = [] + segments = [] + for ann in item.annotations: + if ann.type in {AnnotationType.polygon, AnnotationType.mask}: + segments.append(ann) + else: + annotations.append(ann) + if not segments: + return item + + if not item.has_image: + raise Exception("Image info is required for this transform") + h, w = item.image.size + instances = self.find_instances(segments) + segments = [self.merge_segments(i, w, h, self._include_polygons) + for i in instances] + segments = sum(segments, []) + + annotations += segments + return self.wrap_item(item, annotations=annotations) + + @classmethod + def merge_segments(cls, instance, img_width, img_height, + include_polygons=False): + polygons = [a for a in instance if a.type == AnnotationType.polygon] + masks = [a for a in instance if a.type == AnnotationType.mask] + if not polygons and not masks: + return [] + + leader = find_group_leader(polygons + masks) + instance = [] + + # Build the resulting mask + mask = None + + if include_polygons and polygons: + polygons = [p.points for p in polygons] + mask = mask_tools.rles_to_mask(polygons, img_width, img_height) + else: + instance += polygons # keep unused polygons + + if masks: + masks = [m.image for m in masks] + if mask is not None: + masks += [mask] + mask = mask_tools.merge_masks(masks) + + if mask is None: + return instance + + mask = mask_tools.mask_to_rle(mask) + mask = mask_utils.frPyObjects(mask, *mask['size']) + instance.append( + RleMask(rle=mask, label=leader.label, z_order=leader.z_order, + id=leader.id, attributes=leader.attributes, group=leader.group + ) + ) + return instance + + @staticmethod + def find_instances(annotations): + return find_instances(a for a in annotations + if a.type in {AnnotationType.polygon, AnnotationType.mask}) + +class PolygonsToMasks(Transform, CliPlugin): + def transform_item(self, item): + annotations = [] + for ann in item.annotations: + if ann.type == AnnotationType.polygon: + if not item.has_image: + raise Exception("Image info is required for this transform") + h, w = item.image.size + annotations.append(self.convert_polygon(ann, h, w)) + else: + annotations.append(ann) + + return self.wrap_item(item, annotations=annotations) + + @staticmethod + def convert_polygon(polygon, img_h, img_w): + rle = mask_utils.frPyObjects([polygon.points], img_h, img_w)[0] + + return RleMask(rle=rle, label=polygon.label, z_order=polygon.z_order, + id=polygon.id, attributes=polygon.attributes, group=polygon.group) + +class BoxesToMasks(Transform, CliPlugin): + def transform_item(self, item): + annotations = [] + for ann in item.annotations: + if ann.type == AnnotationType.bbox: + if not item.has_image: + raise Exception("Image info is required for this transform") + h, w = item.image.size + annotations.append(self.convert_bbox(ann, h, w)) + else: + annotations.append(ann) + + return self.wrap_item(item, annotations=annotations) + + @staticmethod + def convert_bbox(bbox, img_h, img_w): + rle = mask_utils.frPyObjects([bbox.as_polygon()], img_h, img_w)[0] + + return RleMask(rle=rle, label=bbox.label, z_order=bbox.z_order, + id=bbox.id, attributes=bbox.attributes, group=bbox.group) + +class MasksToPolygons(Transform, CliPlugin): + def transform_item(self, item): + annotations = [] + for ann in item.annotations: + if ann.type == AnnotationType.mask: + polygons = self.convert_mask(ann) + if not polygons: + log.debug("[%s]: item %s: " + "Mask conversion to polygons resulted in too " + "small polygons, which were discarded" % \ + (self._get_name(__class__), item.id)) + annotations.extend(polygons) + else: + annotations.append(ann) + + return self.wrap_item(item, annotations=annotations) + + @staticmethod + def convert_mask(mask): + polygons = mask_tools.mask_to_polygons(mask.image) + + return [ + Polygon(points=p, label=mask.label, z_order=mask.z_order, + id=mask.id, attributes=mask.attributes, group=mask.group) + for p in polygons + ] + +class ShapesToBoxes(Transform, CliPlugin): + def transform_item(self, item): + annotations = [] + for ann in item.annotations: + if ann.type in { AnnotationType.mask, AnnotationType.polygon, + AnnotationType.polyline, AnnotationType.points, + }: + annotations.append(self.convert_shape(ann)) + else: + annotations.append(ann) + + return self.wrap_item(item, annotations=annotations) + + @staticmethod + def convert_shape(shape): + bbox = shape.get_bbox() + return Bbox(*bbox, label=shape.label, z_order=shape.z_order, + id=shape.id, attributes=shape.attributes, group=shape.group) + +class Reindex(Transform, CliPlugin): + @classmethod + def build_cmdline_parser(cls, **kwargs): + parser = super().build_cmdline_parser(**kwargs) + parser.add_argument('-s', '--start', type=int, default=1, + help="Start value for item ids") + return parser + + def __init__(self, extractor, start=1): + super().__init__(extractor) + + self._start = start + + def __iter__(self): + for i, item in enumerate(self._extractor): + yield self.wrap_item(item, id=i + self._start) + +class MapSubsets(Transform, CliPlugin): + @staticmethod + def _mapping_arg(s): + parts = s.split(':') + if len(parts) != 2: + import argparse + raise argparse.ArgumentTypeError() + return parts + + @classmethod + def build_cmdline_parser(cls, **kwargs): + parser = super().build_cmdline_parser(**kwargs) + parser.add_argument('-s', '--subset', action='append', + type=cls._mapping_arg, dest='mapping', + help="Subset mapping of the form: 'src:dst' (repeatable)") + return parser + + def __init__(self, extractor, mapping=None): + super().__init__(extractor) + + if mapping is None: + mapping = {} + elif not isinstance(mapping, dict): + mapping = dict(tuple(m) for m in mapping) + self._mapping = mapping + + def transform_item(self, item): + return self.wrap_item(item, + subset=self._mapping.get(item.subset, item.subset)) + +class RandomSplit(Transform, CliPlugin): + """ + Joins all subsets into one and splits the result into few parts. + It is expected that item ids are unique and subset ratios sum up to 1.|n + |n + Example:|n + |s|s%(prog)s --subset train:.67 --subset test:.33 + """ + + @staticmethod + def _split_arg(s): + parts = s.split(':') + if len(parts) != 2: + import argparse + raise argparse.ArgumentTypeError() + return (parts[0], float(parts[1])) + + @classmethod + def build_cmdline_parser(cls, **kwargs): + parser = super().build_cmdline_parser(**kwargs) + parser.add_argument('-s', '--subset', action='append', + type=cls._split_arg, dest='splits', + default=[('train', 0.67), ('test', 0.33)], + help="Subsets in the form of: ':' (repeatable)") + parser.add_argument('--seed', type=int, help="Random seed") + return parser + + def __init__(self, extractor, splits, seed=None): + super().__init__(extractor) + + assert 0 < len(splits), "Expected at least one split" + assert all(0.0 <= r and r <= 1.0 for _, r in splits), \ + "Ratios are expected to be in the range [0; 1], but got %s" % splits + + total_ratio = sum(s[1] for s in splits) + if not abs(total_ratio - 1.0) <= 1e-7: + raise Exception( + "Sum of ratios is expected to be 1, got %s, which is %s" % + (splits, total_ratio)) + + dataset_size = len(extractor) + indices = list(range(dataset_size)) + + random.seed(seed) + random.shuffle(indices) + parts = [] + s = 0 + for subset, ratio in splits: + s += ratio + boundary = int(s * dataset_size) + parts.append((boundary, subset)) + + self._parts = parts + + def _find_split(self, index): + for boundary, subset in self._parts: + if index < boundary: + return subset + return subset # all the possible remainder goes to the last split + + def __iter__(self): + for i, item in enumerate(self._extractor): + yield self.wrap_item(item, subset=self._find_split(i)) + +class IdFromImageName(Transform, CliPlugin): + def transform_item(self, item): + if item.has_image and item.image.path: + name = osp.splitext(osp.basename(item.image.path))[0] + return self.wrap_item(item, id=name) + else: + log.debug("Can't change item id for item '%s': " + "item has no image info" % item.id) + return item + +class Rename(Transform, CliPlugin): + """ + Renames items in the dataset. Supports regular expressions. + The first character in the expression is a delimiter for + the pattern and replacement parts. Replacement part can also + contain string.format tokens with 'item' object available.|n + |n + Examples:|n + - Replace 'pattern' with 'replacement':|n + |s|srename -e '|pattern|replacement|'|n + - Remove 'frame_' from item ids:|n + |s|srename -e '|frame_(\d+)|\\1|' + """ + + @classmethod + def build_cmdline_parser(cls, **kwargs): + parser = super().build_cmdline_parser(**kwargs) + parser.add_argument('-e', '--regex', + help="Regex for renaming.") + return parser + + def __init__(self, extractor, regex): + super().__init__(extractor) + + assert regex and isinstance(regex, str) + parts = regex.split(regex[0], maxsplit=3) + regex, sub = parts[1:3] + self._re = re.compile(regex) + self._sub = sub + + def transform_item(self, item): + return self.wrap_item(item, id=self._re.sub(self._sub, item.id) \ + .format(item=item)) + +class RemapLabels(Transform, CliPlugin): + """ + Changes labels in the dataset.|n + Examples:|n + - Rename 'person' to 'car' and 'cat' to 'dog', keep 'bus', remove others:|n + |s|sremap_labels -l person:car -l bus:bus -l cat:dog --default delete + """ + + DefaultAction = Enum('DefaultAction', ['keep', 'delete']) + + @staticmethod + def _split_arg(s): + parts = s.split(':') + if len(parts) != 2: + import argparse + raise argparse.ArgumentTypeError() + return (parts[0], parts[1]) + + @classmethod + def build_cmdline_parser(cls, **kwargs): + parser = super().build_cmdline_parser(**kwargs) + parser.add_argument('-l', '--label', action='append', + type=cls._split_arg, dest='mapping', + help="Label in the form of: ':' (repeatable)") + parser.add_argument('--default', + choices=[a.name for a in cls.DefaultAction], + default=cls.DefaultAction.keep.name, + help="Action for unspecified labels (default: %(default)s)") + return parser + + def __init__(self, extractor, mapping, default=None): + super().__init__(extractor) + + assert isinstance(default, (str, self.DefaultAction)) + if isinstance(default, str): + default = self.DefaultAction[default] + + assert isinstance(mapping, (dict, list)) + if isinstance(mapping, list): + mapping = dict(mapping) + + self._categories = {} + + src_label_cat = self._extractor.categories().get(AnnotationType.label) + if src_label_cat is not None: + self._make_label_id_map(src_label_cat, mapping, default) + + src_mask_cat = self._extractor.categories().get(AnnotationType.mask) + if src_mask_cat is not None: + assert src_label_cat is not None + dst_mask_cat = MaskCategories(attributes=src_mask_cat.attributes) + dst_mask_cat.colormap = { + id: src_mask_cat.colormap[id] + for id, _ in enumerate(src_label_cat.items) + if self._map_id(id) or id == 0 + } + self._categories[AnnotationType.mask] = dst_mask_cat + + src_points_cat = self._extractor.categories().get(AnnotationType.points) + if src_points_cat is not None: + assert src_label_cat is not None + dst_points_cat = PointsCategories(attributes=src_points_cat.attributes) + dst_points_cat.items = { + id: src_points_cat.items[id] + for id, item in enumerate(src_label_cat.items) + if self._map_id(id) or id == 0 + } + self._categories[AnnotationType.points] = dst_points_cat + + def _make_label_id_map(self, src_label_cat, label_mapping, default_action): + dst_label_cat = LabelCategories(attributes=src_label_cat.attributes) + id_mapping = {} + for src_index, src_label in enumerate(src_label_cat.items): + dst_label = label_mapping.get(src_label.name) + if not dst_label and default_action == self.DefaultAction.keep: + dst_label = src_label.name # keep unspecified as is + if not dst_label: + continue + + dst_index = dst_label_cat.find(dst_label)[0] + if dst_index is None: + dst_index = dst_label_cat.add(dst_label, + src_label.parent, src_label.attributes) + id_mapping[src_index] = dst_index + + if log.getLogger().isEnabledFor(log.DEBUG): + log.debug("Label mapping:") + for src_id, src_label in enumerate(src_label_cat.items): + if id_mapping.get(src_id): + log.debug("#%s '%s' -> #%s '%s'", + src_id, src_label.name, id_mapping[src_id], + dst_label_cat.items[id_mapping[src_id]].name + ) + else: + log.debug("#%s '%s' -> ", src_id, src_label.name) + + self._map_id = lambda src_id: id_mapping.get(src_id, None) + self._categories[AnnotationType.label] = dst_label_cat + + def categories(self): + return self._categories + + def transform_item(self, item): + annotations = [] + for ann in item.annotations: + if ann.type in { AnnotationType.label, AnnotationType.mask, + AnnotationType.points, AnnotationType.polygon, + AnnotationType.polyline, AnnotationType.bbox + } and ann.label is not None: + conv_label = self._map_id(ann.label) + if conv_label is not None: + annotations.append(ann.wrap(label=conv_label)) + else: + annotations.append(ann.wrap()) + return item.wrap(annotations=annotations) \ No newline at end of file diff --git a/datumaro/plugins/voc_format/__init__.py b/datumaro/plugins/voc_format/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/datumaro/plugins/voc_format/converter.py b/datumaro/plugins/voc_format/converter.py new file mode 100644 index 0000000000..65e586d85d --- /dev/null +++ b/datumaro/plugins/voc_format/converter.py @@ -0,0 +1,590 @@ + +# Copyright (C) 2020 Intel Corporation +# +# SPDX-License-Identifier: MIT + +import logging as log +import os +import os.path as osp +from collections import OrderedDict, defaultdict +from enum import Enum +from itertools import chain + +from lxml import etree as ET + +from datumaro.components.converter import Converter +from datumaro.components.extractor import (DEFAULT_SUBSET_NAME, AnnotationType, + CompiledMask, LabelCategories) +from datumaro.util import find, str_to_bool +from datumaro.util.image import save_image +from datumaro.util.mask_tools import paint_mask, remap_mask + +from .format import (VocTask, VocPath, VocInstColormap, + parse_label_map, make_voc_label_map, make_voc_categories, write_label_map +) + + +def _convert_attr(name, attributes, type_conv, default=None, warn=True): + d = object() + value = attributes.get(name, d) + if value is d: + return default + + try: + return type_conv(value) + except Exception as e: + log.warning("Failed to convert attribute '%s'='%s': %s" % \ + (name, value, e)) + return default + +def _write_xml_bbox(bbox, parent_elem): + x, y, w, h = bbox + bbox_elem = ET.SubElement(parent_elem, 'bndbox') + ET.SubElement(bbox_elem, 'xmin').text = str(x) + ET.SubElement(bbox_elem, 'ymin').text = str(y) + ET.SubElement(bbox_elem, 'xmax').text = str(x + w) + ET.SubElement(bbox_elem, 'ymax').text = str(y + h) + return bbox_elem + + +LabelmapType = Enum('LabelmapType', ['voc', 'source']) + +class VocConverter(Converter): + DEFAULT_IMAGE_EXT = VocPath.IMAGE_EXT + + @staticmethod + def _split_tasks_string(s): + return [VocTask[i.strip()] for i in s.split(',')] + + @staticmethod + def _get_labelmap(s): + if osp.isfile(s): + return s + try: + return LabelmapType[s].name + except KeyError: + import argparse + raise argparse.ArgumentTypeError() + + @classmethod + def build_cmdline_parser(cls, **kwargs): + parser = super().build_cmdline_parser(**kwargs) + + parser.add_argument('--apply-colormap', type=str_to_bool, default=True, + help="Use colormap for class and instance masks " + "(default: %(default)s)") + parser.add_argument('--label-map', type=cls._get_labelmap, default=None, + help="Labelmap file path or one of %s" % \ + ', '.join(t.name for t in LabelmapType)) + parser.add_argument('--allow-attributes', + type=str_to_bool, default=True, + help="Allow export of attributes (default: %(default)s)") + parser.add_argument('--tasks', type=cls._split_tasks_string, + help="VOC task filter, comma-separated list of {%s} " + "(default: all)" % ', '.join(t.name for t in VocTask)) + + return parser + + def __init__(self, extractor, save_dir, + tasks=None, apply_colormap=True, label_map=None, + allow_attributes=True, **kwargs): + super().__init__(extractor, save_dir, **kwargs) + + assert tasks is None or isinstance(tasks, (VocTask, list, set)) + if tasks is None: + tasks = set(VocTask) + elif isinstance(tasks, VocTask): + tasks = {tasks} + else: + tasks = set(t if t in VocTask else VocTask[t] for t in tasks) + self._tasks = tasks + + self._apply_colormap = apply_colormap + self._allow_attributes = allow_attributes + + if label_map is None: + label_map = LabelmapType.source + self._load_categories(label_map) + + def apply(self): + self.make_dirs() + self.save_subsets() + self.save_label_map() + + def make_dirs(self): + save_dir = self._save_dir + subsets_dir = osp.join(save_dir, VocPath.SUBSETS_DIR) + cls_subsets_dir = osp.join(subsets_dir, + VocPath.TASK_DIR[VocTask.classification]) + action_subsets_dir = osp.join(subsets_dir, + VocPath.TASK_DIR[VocTask.action_classification]) + layout_subsets_dir = osp.join(subsets_dir, + VocPath.TASK_DIR[VocTask.person_layout]) + segm_subsets_dir = osp.join(subsets_dir, + VocPath.TASK_DIR[VocTask.segmentation]) + ann_dir = osp.join(save_dir, VocPath.ANNOTATIONS_DIR) + img_dir = osp.join(save_dir, VocPath.IMAGES_DIR) + segm_dir = osp.join(save_dir, VocPath.SEGMENTATION_DIR) + inst_dir = osp.join(save_dir, VocPath.INSTANCES_DIR) + images_dir = osp.join(save_dir, VocPath.IMAGES_DIR) + + os.makedirs(subsets_dir, exist_ok=True) + os.makedirs(ann_dir, exist_ok=True) + os.makedirs(img_dir, exist_ok=True) + os.makedirs(segm_dir, exist_ok=True) + os.makedirs(inst_dir, exist_ok=True) + os.makedirs(images_dir, exist_ok=True) + + self._subsets_dir = subsets_dir + self._cls_subsets_dir = cls_subsets_dir + self._action_subsets_dir = action_subsets_dir + self._layout_subsets_dir = layout_subsets_dir + self._segm_subsets_dir = segm_subsets_dir + self._ann_dir = ann_dir + self._img_dir = img_dir + self._segm_dir = segm_dir + self._inst_dir = inst_dir + self._images_dir = images_dir + + def get_label(self, label_id): + return self._extractor. \ + categories()[AnnotationType.label].items[label_id].name + + def save_subsets(self): + for subset_name in self._extractor.subsets() or [None]: + if subset_name: + subset = self._extractor.get_subset(subset_name) + else: + subset_name = DEFAULT_SUBSET_NAME + subset = self._extractor + + class_lists = OrderedDict() + clsdet_list = OrderedDict() + action_list = OrderedDict() + layout_list = OrderedDict() + segm_list = OrderedDict() + + for item in subset: + log.debug("Converting item '%s'", item.id) + + image_filename = self._make_image_filename(item) + if self._save_images: + if item.has_image and item.image.has_data: + self._save_image(item, + osp.join(self._images_dir, image_filename)) + else: + log.debug("Item '%s' has no image", item.id) + + labels = [] + bboxes = [] + masks = [] + for a in item.annotations: + if a.type == AnnotationType.label: + labels.append(a) + elif a.type == AnnotationType.bbox: + bboxes.append(a) + elif a.type == AnnotationType.mask: + masks.append(a) + + if self._tasks is None and bboxes or \ + self._tasks & {VocTask.detection, VocTask.person_layout, + VocTask.action_classification}: + root_elem = ET.Element('annotation') + if '_' in item.id: + folder = item.id[ : item.id.find('_')] + else: + folder = '' + ET.SubElement(root_elem, 'folder').text = folder + ET.SubElement(root_elem, 'filename').text = image_filename + + source_elem = ET.SubElement(root_elem, 'source') + ET.SubElement(source_elem, 'database').text = 'Unknown' + ET.SubElement(source_elem, 'annotation').text = 'Unknown' + ET.SubElement(source_elem, 'image').text = 'Unknown' + + if item.has_image: + h, w = item.image.size + if item.image.has_data: + image_shape = item.image.data.shape + c = 1 if len(image_shape) == 2 else image_shape[2] + else: + c = 3 + size_elem = ET.SubElement(root_elem, 'size') + ET.SubElement(size_elem, 'width').text = str(w) + ET.SubElement(size_elem, 'height').text = str(h) + ET.SubElement(size_elem, 'depth').text = str(c) + + item_segmented = 0 < len(masks) + ET.SubElement(root_elem, 'segmented').text = \ + str(int(item_segmented)) + + objects_with_parts = [] + objects_with_actions = defaultdict(dict) + + main_bboxes = [] + layout_bboxes = [] + for bbox in bboxes: + label = self.get_label(bbox.label) + if self._is_part(label): + layout_bboxes.append(bbox) + elif self._is_label(label): + main_bboxes.append(bbox) + + for new_obj_id, obj in enumerate(main_bboxes): + attr = obj.attributes + + obj_elem = ET.SubElement(root_elem, 'object') + + obj_label = self.get_label(obj.label) + ET.SubElement(obj_elem, 'name').text = obj_label + + if 'pose' in attr: + ET.SubElement(obj_elem, 'pose').text = \ + str(attr['pose']) + + if 'truncated' in attr: + truncated = _convert_attr('truncated', attr, int, 0) + ET.SubElement(obj_elem, 'truncated').text = \ + '%d' % truncated + + if 'difficult' in attr: + difficult = _convert_attr('difficult', attr, int, 0) + ET.SubElement(obj_elem, 'difficult').text = \ + '%d' % difficult + + if 'occluded' in attr: + occluded = _convert_attr('occluded', attr, int, 0) + ET.SubElement(obj_elem, 'occluded').text = \ + '%d' % occluded + + bbox = obj.get_bbox() + if bbox is not None: + _write_xml_bbox(bbox, obj_elem) + + for part_bbox in filter( + lambda x: obj.group and obj.group == x.group, + layout_bboxes): + part_elem = ET.SubElement(obj_elem, 'part') + ET.SubElement(part_elem, 'name').text = \ + self.get_label(part_bbox.label) + _write_xml_bbox(part_bbox.get_bbox(), part_elem) + + objects_with_parts.append(new_obj_id) + + label_actions = self._get_actions(obj_label) + actions_elem = ET.Element('actions') + for action in label_actions: + present = 0 + if action in attr: + present = _convert_attr(action, attr, + lambda v: int(v == True), 0) + ET.SubElement(actions_elem, action).text = \ + '%d' % present + + objects_with_actions[new_obj_id][action] = present + if len(actions_elem) != 0: + obj_elem.append(actions_elem) + + if self._allow_attributes: + native_attrs = {'difficult', 'pose', + 'truncated', 'occluded' } + native_attrs.update(label_actions) + + attrs_elem = ET.Element('attributes') + for k, v in attr.items(): + if k in native_attrs: + continue + attr_elem = ET.SubElement(attrs_elem, 'attribute') + ET.SubElement(attr_elem, 'name').text = str(k) + ET.SubElement(attr_elem, 'value').text = str(v) + if len(attrs_elem): + obj_elem.append(attrs_elem) + + if self._tasks & {VocTask.detection, VocTask.person_layout, + VocTask.action_classification}: + ann_path = osp.join(self._ann_dir, item.id + '.xml') + os.makedirs(osp.dirname(ann_path), exist_ok=True) + with open(ann_path, 'w') as f: + f.write(ET.tostring(root_elem, + encoding='unicode', pretty_print=True)) + + clsdet_list[item.id] = True + layout_list[item.id] = objects_with_parts + action_list[item.id] = objects_with_actions + + for label_ann in labels: + label = self.get_label(label_ann.label) + if not self._is_label(label): + continue + class_list = class_lists.get(item.id, set()) + class_list.add(label_ann.label) + class_lists[item.id] = class_list + + clsdet_list[item.id] = True + + if masks: + compiled_mask = CompiledMask.from_instance_masks(masks, + instance_labels=[self._label_id_mapping(m.label) + for m in masks]) + + self.save_segm( + osp.join(self._segm_dir, item.id + VocPath.SEGM_EXT), + compiled_mask.class_mask) + self.save_segm( + osp.join(self._inst_dir, item.id + VocPath.SEGM_EXT), + compiled_mask.instance_mask, + colormap=VocInstColormap) + + segm_list[item.id] = True + + if len(item.annotations) == 0: + clsdet_list[item.id] = None + layout_list[item.id] = None + action_list[item.id] = None + segm_list[item.id] = None + + if self._tasks & {VocTask.classification, VocTask.detection, + VocTask.action_classification, VocTask.person_layout}: + self.save_clsdet_lists(subset_name, clsdet_list) + if self._tasks & {VocTask.classification}: + self.save_class_lists(subset_name, class_lists) + if self._tasks & {VocTask.action_classification}: + self.save_action_lists(subset_name, action_list) + if self._tasks & {VocTask.person_layout}: + self.save_layout_lists(subset_name, layout_list) + if self._tasks & {VocTask.segmentation}: + self.save_segm_lists(subset_name, segm_list) + + def save_action_lists(self, subset_name, action_list): + if not action_list: + return + + os.makedirs(self._action_subsets_dir, exist_ok=True) + + ann_file = osp.join(self._action_subsets_dir, subset_name + '.txt') + with open(ann_file, 'w') as f: + for item in action_list: + f.write('%s\n' % item) + + if len(action_list) == 0: + return + + all_actions = set(chain(*(self._get_actions(l) + for l in self._label_map))) + for action in all_actions: + ann_file = osp.join(self._action_subsets_dir, + '%s_%s.txt' % (action, subset_name)) + with open(ann_file, 'w') as f: + for item, objs in action_list.items(): + if not objs: + continue + for obj_id, obj_actions in objs.items(): + presented = obj_actions[action] + f.write('%s %s % d\n' % \ + (item, 1 + obj_id, 1 if presented else -1)) + + def save_class_lists(self, subset_name, class_lists): + if not class_lists: + return + + os.makedirs(self._cls_subsets_dir, exist_ok=True) + + for label in self._label_map: + ann_file = osp.join(self._cls_subsets_dir, + '%s_%s.txt' % (label, subset_name)) + with open(ann_file, 'w') as f: + for item, item_labels in class_lists.items(): + if not item_labels: + continue + item_labels = [self.get_label(l) for l in item_labels] + presented = label in item_labels + f.write('%s % d\n' % (item, 1 if presented else -1)) + + def save_clsdet_lists(self, subset_name, clsdet_list): + if not clsdet_list: + return + + os.makedirs(self._cls_subsets_dir, exist_ok=True) + + ann_file = osp.join(self._cls_subsets_dir, subset_name + '.txt') + with open(ann_file, 'w') as f: + for item in clsdet_list: + f.write('%s\n' % item) + + def save_segm_lists(self, subset_name, segm_list): + if not segm_list: + return + + os.makedirs(self._segm_subsets_dir, exist_ok=True) + + ann_file = osp.join(self._segm_subsets_dir, subset_name + '.txt') + with open(ann_file, 'w') as f: + for item in segm_list: + f.write('%s\n' % item) + + def save_layout_lists(self, subset_name, layout_list): + if not layout_list: + return + + os.makedirs(self._layout_subsets_dir, exist_ok=True) + + ann_file = osp.join(self._layout_subsets_dir, subset_name + '.txt') + with open(ann_file, 'w') as f: + for item, item_layouts in layout_list.items(): + if item_layouts: + for obj_id in item_layouts: + f.write('%s % d\n' % (item, 1 + obj_id)) + else: + f.write('%s\n' % (item)) + + def save_segm(self, path, mask, colormap=None): + if self._apply_colormap: + if colormap is None: + colormap = self._categories[AnnotationType.mask].colormap + mask = paint_mask(mask, colormap) + save_image(path, mask, create_dir=True) + + def save_label_map(self): + path = osp.join(self._save_dir, VocPath.LABELMAP_FILE) + write_label_map(path, self._label_map) + + def _load_categories(self, label_map_source): + if label_map_source == LabelmapType.voc.name: + # use the default VOC colormap + label_map = make_voc_label_map() + + elif label_map_source == LabelmapType.source.name and \ + AnnotationType.mask not in self._extractor.categories(): + # generate colormap for input labels + labels = self._extractor.categories() \ + .get(AnnotationType.label, LabelCategories()) + label_map = OrderedDict((item.name, [None, [], []]) + for item in labels.items) + + elif label_map_source == LabelmapType.source.name and \ + AnnotationType.mask in self._extractor.categories(): + # use source colormap + labels = self._extractor.categories()[AnnotationType.label] + colors = self._extractor.categories()[AnnotationType.mask] + label_map = OrderedDict() + for idx, item in enumerate(labels.items): + color = colors.colormap.get(idx) + if color is not None: + label_map[item.name] = [color, [], []] + + elif isinstance(label_map_source, dict): + label_map = OrderedDict( + sorted(label_map_source.items(), key=lambda e: e[0])) + + elif isinstance(label_map_source, str) and osp.isfile(label_map_source): + label_map = parse_label_map(label_map_source) + + else: + raise Exception("Wrong labelmap specified, " + "expected one of %s or a file path" % \ + ', '.join(t.name for t in LabelmapType)) + + # There must always be a label with color (0, 0, 0) at index 0 + bg_label = find(label_map.items(), lambda x: x[1][0] == (0, 0, 0)) + if bg_label is not None: + bg_label = bg_label[0] + else: + bg_label = 'background' + if bg_label not in label_map: + has_colors = any(v[0] is not None for v in label_map.values()) + color = (0, 0, 0) if has_colors else None + label_map[bg_label] = [color, [], []] + label_map.move_to_end(bg_label, last=False) + + self._categories = make_voc_categories(label_map) + + # Update colors with assigned values + colormap = self._categories[AnnotationType.mask].colormap + for label_id, color in colormap.items(): + label_desc = label_map[ + self._categories[AnnotationType.label].items[label_id].name] + label_desc[0] = color + + self._label_map = label_map + self._label_id_mapping = self._make_label_id_map() + + def _is_label(self, s): + return self._label_map.get(s) is not None + + def _is_part(self, s): + for label_desc in self._label_map.values(): + if s in label_desc[1]: + return True + return False + + def _is_action(self, label, s): + return s in self._get_actions(label) + + def _get_actions(self, label): + label_desc = self._label_map.get(label) + if not label_desc: + return [] + return label_desc[2] + + def _make_label_id_map(self): + source_labels = { + id: label.name for id, label in + enumerate(self._extractor.categories().get( + AnnotationType.label, LabelCategories()).items) + } + target_labels = { + label.name: id for id, label in + enumerate(self._categories[AnnotationType.label].items) + } + id_mapping = { + src_id: target_labels.get(src_label, 0) + for src_id, src_label in source_labels.items() + } + + void_labels = [src_label for src_id, src_label in source_labels.items() + if src_label not in target_labels] + if void_labels: + log.warning("The following labels are remapped to background: %s" % + ', '.join(void_labels)) + log.debug("Saving segmentations with the following label mapping: \n%s" % + '\n'.join(["#%s '%s' -> #%s '%s'" % + ( + src_id, src_label, id_mapping[src_id], + self._categories[AnnotationType.label] \ + .items[id_mapping[src_id]].name + ) + for src_id, src_label in source_labels.items() + ]) + ) + + def map_id(src_id): + return id_mapping.get(src_id, 0) + return map_id + + def _remap_mask(self, mask): + return remap_mask(mask, self._label_id_mapping) + +class VocClassificationConverter(VocConverter): + def __init__(self, *args, **kwargs): + kwargs['tasks'] = VocTask.classification + super().__init__(*args, **kwargs) + +class VocDetectionConverter(VocConverter): + def __init__(self, *args, **kwargs): + kwargs['tasks'] = VocTask.detection + super().__init__(*args, **kwargs) + +class VocLayoutConverter(VocConverter): + def __init__(self, *args, **kwargs): + kwargs['tasks'] = VocTask.person_layout + super().__init__(*args, **kwargs) + +class VocActionConverter(VocConverter): + def __init__(self, *args, **kwargs): + kwargs['tasks'] = VocTask.action_classification + super().__init__(*args, **kwargs) + +class VocSegmentationConverter(VocConverter): + def __init__(self, *args, **kwargs): + kwargs['tasks'] = VocTask.segmentation + super().__init__(*args, **kwargs) diff --git a/datumaro/plugins/voc_format/extractor.py b/datumaro/plugins/voc_format/extractor.py new file mode 100644 index 0000000000..0fe667d347 --- /dev/null +++ b/datumaro/plugins/voc_format/extractor.py @@ -0,0 +1,302 @@ + +# Copyright (C) 2019-2020 Intel Corporation +# +# SPDX-License-Identifier: MIT + +from collections import defaultdict +import logging as log +import numpy as np +import os.path as osp +from defusedxml import ElementTree + +from datumaro.components.extractor import (SourceExtractor, DatasetItem, + AnnotationType, Label, Mask, Bbox, CompiledMask +) +from datumaro.util import dir_items +from datumaro.util.image import Image +from datumaro.util.mask_tools import lazy_mask, invert_colormap + +from .format import ( + VocTask, VocPath, VocInstColormap, parse_label_map, make_voc_categories +) + + +_inverse_inst_colormap = invert_colormap(VocInstColormap) + +class _VocExtractor(SourceExtractor): + def __init__(self, path): + assert osp.isfile(path), path + self._path = path + self._dataset_dir = osp.dirname(osp.dirname(osp.dirname(path))) + + super().__init__(subset=osp.splitext(osp.basename(path))[0]) + + self._categories = self._load_categories(self._dataset_dir) + + label_color = lambda label_idx: \ + self._categories[AnnotationType.mask].colormap.get(label_idx, None) + log.debug("Loaded labels: %s" % ', '.join( + "'%s' %s" % (l.name, ('(%s, %s, %s)' % c) if c else '') + for i, l, c in ((i, l, label_color(i)) for i, l in enumerate( + self._categories[AnnotationType.label].items + )) + )) + self._items = self._load_subset_list(path) + + def categories(self): + return self._categories + + def __len__(self): + return len(self._items) + + def _get_label_id(self, label): + label_id, _ = self._categories[AnnotationType.label].find(label) + assert label_id is not None, label + return label_id + + @staticmethod + def _load_categories(dataset_path): + label_map = None + label_map_path = osp.join(dataset_path, VocPath.LABELMAP_FILE) + if osp.isfile(label_map_path): + label_map = parse_label_map(label_map_path) + return make_voc_categories(label_map) + + @staticmethod + def _load_subset_list(subset_path): + with open(subset_path) as f: + return [line.split()[0] for line in f] + +class VocClassificationExtractor(_VocExtractor): + def __iter__(self): + raw_anns = self._load_annotations() + for item_id in self._items: + log.debug("Reading item '%s'" % item_id) + image = osp.join(self._dataset_dir, VocPath.IMAGES_DIR, + item_id + VocPath.IMAGE_EXT) + anns = self._parse_annotations(raw_anns, item_id) + yield DatasetItem(id=item_id, subset=self._subset, + image=image, annotations=anns) + + def _load_annotations(self): + annotations = defaultdict(list) + task_dir = osp.dirname(self._path) + anno_files = [s for s in dir_items(task_dir, '.txt') + if s.endswith('_' + osp.basename(self._path))] + for ann_filename in anno_files: + with open(osp.join(task_dir, ann_filename)) as f: + label = ann_filename[:ann_filename.rfind('_')] + label_id = self._get_label_id(label) + for line in f: + item, present = line.split() + if present == '1': + annotations[item].append(label_id) + + return dict(annotations) + + @staticmethod + def _parse_annotations(raw_anns, item_id): + return [Label(label_id) for label_id in raw_anns.get(item_id, [])] + +class _VocXmlExtractor(_VocExtractor): + def __init__(self, path, task): + super().__init__(path) + self._task = task + + def __iter__(self): + anno_dir = osp.join(self._dataset_dir, VocPath.ANNOTATIONS_DIR) + + for item_id in self._items: + log.debug("Reading item '%s'" % item_id) + image = item_id + VocPath.IMAGE_EXT + height, width = 0, 0 + + anns = [] + ann_file = osp.join(anno_dir, item_id + '.xml') + if osp.isfile(ann_file): + root_elem = ElementTree.parse(ann_file) + height = root_elem.find('size/height') + if height is not None: + height = int(height.text) + width = root_elem.find('size/width') + if width is not None: + width = int(width.text) + filename_elem = root_elem.find('filename') + if filename_elem is not None: + image = filename_elem.text + anns = self._parse_annotations(root_elem) + + image = osp.join(self._dataset_dir, VocPath.IMAGES_DIR, image) + if height and width: + image = Image(path=image, size=(height, width)) + + yield DatasetItem(id=item_id, subset=self._subset, + image=image, annotations=anns) + + def _parse_annotations(self, root_elem): + item_annotations = [] + + for obj_id, object_elem in enumerate(root_elem.findall('object')): + obj_id += 1 + attributes = {} + group = obj_id + + obj_label_id = None + label_elem = object_elem.find('name') + if label_elem is not None: + obj_label_id = self._get_label_id(label_elem.text) + + obj_bbox = self._parse_bbox(object_elem) + + if obj_label_id is None or obj_bbox is None: + continue + + difficult_elem = object_elem.find('difficult') + attributes['difficult'] = difficult_elem is not None and \ + difficult_elem.text == '1' + + truncated_elem = object_elem.find('truncated') + attributes['truncated'] = truncated_elem is not None and \ + truncated_elem.text == '1' + + occluded_elem = object_elem.find('occluded') + attributes['occluded'] = occluded_elem is not None and \ + occluded_elem.text == '1' + + pose_elem = object_elem.find('pose') + if pose_elem is not None: + attributes['pose'] = pose_elem.text + + point_elem = object_elem.find('point') + if point_elem is not None: + point_x = point_elem.find('x') + point_y = point_elem.find('y') + point = [float(point_x.text), float(point_y.text)] + attributes['point'] = point + + actions_elem = object_elem.find('actions') + actions = {a: False + for a in self._categories[AnnotationType.label] \ + .items[obj_label_id].attributes} + if actions_elem is not None: + for action_elem in actions_elem: + actions[action_elem.tag] = (action_elem.text == '1') + for action, present in actions.items(): + attributes[action] = present + + has_parts = False + for part_elem in object_elem.findall('part'): + part = part_elem.find('name').text + part_label_id = self._get_label_id(part) + part_bbox = self._parse_bbox(part_elem) + + if self._task is not VocTask.person_layout: + break + if part_bbox is None: + continue + has_parts = True + item_annotations.append(Bbox(*part_bbox, label=part_label_id, + group=group)) + + attributes_elem = object_elem.find('attributes') + if attributes_elem is not None: + for attr_elem in attributes_elem.iter('attribute'): + attributes[attr_elem.find('name').text] = \ + attr_elem.find('value').text + + if self._task is VocTask.person_layout and not has_parts: + continue + if self._task is VocTask.action_classification and not actions: + continue + + item_annotations.append(Bbox(*obj_bbox, label=obj_label_id, + attributes=attributes, id=obj_id, group=group)) + + return item_annotations + + @staticmethod + def _parse_bbox(object_elem): + bbox_elem = object_elem.find('bndbox') + xmin = float(bbox_elem.find('xmin').text) + xmax = float(bbox_elem.find('xmax').text) + ymin = float(bbox_elem.find('ymin').text) + ymax = float(bbox_elem.find('ymax').text) + return [xmin, ymin, xmax - xmin, ymax - ymin] + +class VocDetectionExtractor(_VocXmlExtractor): + def __init__(self, path): + super().__init__(path, task=VocTask.detection) + +class VocLayoutExtractor(_VocXmlExtractor): + def __init__(self, path): + super().__init__(path, task=VocTask.person_layout) + +class VocActionExtractor(_VocXmlExtractor): + def __init__(self, path): + super().__init__(path, task=VocTask.action_classification) + +class VocSegmentationExtractor(_VocExtractor): + def __iter__(self): + for item_id in self._items: + log.debug("Reading item '%s'" % item_id) + image = osp.join(self._dataset_dir, VocPath.IMAGES_DIR, + item_id + VocPath.IMAGE_EXT) + anns = self._load_annotations(item_id) + yield DatasetItem(id=item_id, subset=self._subset, + image=image, annotations=anns) + + @staticmethod + def _lazy_extract_mask(mask, c): + return lambda: mask == c + + def _load_annotations(self, item_id): + item_annotations = [] + + class_mask = None + segm_path = osp.join(self._dataset_dir, VocPath.SEGMENTATION_DIR, + item_id + VocPath.SEGM_EXT) + if osp.isfile(segm_path): + inverse_cls_colormap = \ + self._categories[AnnotationType.mask].inverse_colormap + class_mask = lazy_mask(segm_path, inverse_cls_colormap) + + instances_mask = None + inst_path = osp.join(self._dataset_dir, VocPath.INSTANCES_DIR, + item_id + VocPath.SEGM_EXT) + if osp.isfile(inst_path): + instances_mask = lazy_mask(inst_path, _inverse_inst_colormap) + + if instances_mask is not None: + compiled_mask = CompiledMask(class_mask, instances_mask) + + if class_mask is not None: + label_cat = self._categories[AnnotationType.label] + instance_labels = compiled_mask.get_instance_labels() + else: + instance_labels = {i: None + for i in range(compiled_mask.instance_count)} + + for instance_id, label_id in instance_labels.items(): + image = compiled_mask.lazy_extract(instance_id) + + attributes = {} + if label_id is not None: + actions = {a: False + for a in label_cat.items[label_id].attributes + } + attributes.update(actions) + + item_annotations.append(Mask( + image=image, label=label_id, + attributes=attributes, group=instance_id + )) + elif class_mask is not None: + log.warn("item '%s': has only class segmentation, " + "instance masks will not be available" % item_id) + class_mask = class_mask() + classes = np.unique(class_mask) + for label_id in classes: + image = self._lazy_extract_mask(class_mask, label_id) + item_annotations.append(Mask(image=image, label=label_id)) + + return item_annotations diff --git a/datumaro/plugins/voc_format/format.py b/datumaro/plugins/voc_format/format.py new file mode 100644 index 0000000000..a03446d511 --- /dev/null +++ b/datumaro/plugins/voc_format/format.py @@ -0,0 +1,206 @@ + +# Copyright (C) 2019-2020 Intel Corporation +# +# SPDX-License-Identifier: MIT + +from collections import OrderedDict +from enum import Enum +from itertools import chain +import numpy as np + +from datumaro.components.extractor import (AnnotationType, + LabelCategories, MaskCategories +) + + +VocTask = Enum('VocTask', [ + 'classification', + 'detection', + 'segmentation', + 'action_classification', + 'person_layout', +]) + +VocLabel = Enum('VocLabel', [ + ('background', 0), + ('aeroplane', 1), + ('bicycle', 2), + ('bird', 3), + ('boat', 4), + ('bottle', 5), + ('bus', 6), + ('car', 7), + ('cat', 8), + ('chair', 9), + ('cow', 10), + ('diningtable', 11), + ('dog', 12), + ('horse', 13), + ('motorbike', 14), + ('person', 15), + ('pottedplant', 16), + ('sheep', 17), + ('sofa', 18), + ('train', 19), + ('tvmonitor', 20), + ('ignored', 255), +]) + +VocPose = Enum('VocPose', [ + 'Unspecified', + 'Left', + 'Right', + 'Frontal', + 'Rear', +]) + +VocBodyPart = Enum('VocBodyPart', [ + 'head', + 'hand', + 'foot', +]) + +VocAction = Enum('VocAction', [ + 'other', + 'jumping', + 'phoning', + 'playinginstrument', + 'reading', + 'ridingbike', + 'ridinghorse', + 'running', + 'takingphoto', + 'usingcomputer', + 'walking', +]) + +def generate_colormap(length=256): + def get_bit(number, index): + return (number >> index) & 1 + + colormap = np.zeros((length, 3), dtype=int) + indices = np.arange(length, dtype=int) + + for j in range(7, -1, -1): + for c in range(3): + colormap[:, c] |= get_bit(indices, c) << j + indices >>= 3 + + return OrderedDict( + (id, tuple(color)) for id, color in enumerate(colormap) + ) + +VocColormap = {id: color for id, color in generate_colormap(256).items() + if id in [l.value for l in VocLabel]} +VocInstColormap = generate_colormap(256) + +class VocPath: + IMAGES_DIR = 'JPEGImages' + ANNOTATIONS_DIR = 'Annotations' + SEGMENTATION_DIR = 'SegmentationClass' + INSTANCES_DIR = 'SegmentationObject' + SUBSETS_DIR = 'ImageSets' + IMAGE_EXT = '.jpg' + SEGM_EXT = '.png' + LABELMAP_FILE = 'labelmap.txt' + + TASK_DIR = { + VocTask.classification: 'Main', + VocTask.detection: 'Main', + VocTask.segmentation: 'Segmentation', + VocTask.action_classification: 'Action', + VocTask.person_layout: 'Layout', + } + + +def make_voc_label_map(): + labels = sorted(VocLabel, key=lambda l: l.value) + label_map = OrderedDict( + (label.name, [VocColormap[label.value], [], []]) for label in labels) + label_map[VocLabel.person.name][1] = [p.name for p in VocBodyPart] + label_map[VocLabel.person.name][2] = [a.name for a in VocAction] + return label_map + +def parse_label_map(path): + if not path: + return None + + label_map = OrderedDict() + with open(path, 'r') as f: + for line in f: + # skip empty and commented lines + line = line.strip() + if not line or line and line[0] == '#': + continue + + # name, color, parts, actions + label_desc = line.strip().split(':') + name = label_desc[0] + + if name in label_map: + raise ValueError("Label '%s' is already defined" % name) + + if 1 < len(label_desc) and len(label_desc[1]) != 0: + color = label_desc[1].split(',') + assert len(color) == 3, \ + "Label '%s' has wrong color, expected 'r,g,b', got '%s'" % \ + (name, color) + color = tuple([int(c) for c in color]) + else: + color = None + + if 2 < len(label_desc) and len(label_desc[2]) != 0: + parts = label_desc[2].split(',') + else: + parts = [] + + if 3 < len(label_desc) and len(label_desc[3]) != 0: + actions = label_desc[3].split(',') + else: + actions = [] + + label_map[name] = [color, parts, actions] + return label_map + +def write_label_map(path, label_map): + with open(path, 'w') as f: + f.write('# label:color_rgb:parts:actions\n') + for label_name, label_desc in label_map.items(): + if label_desc[0]: + color_rgb = ','.join(str(c) for c in label_desc[0]) + else: + color_rgb = '' + + parts = ','.join(str(p) for p in label_desc[1]) + actions = ','.join(str(a) for a in label_desc[2]) + + f.write('%s\n' % ':'.join([label_name, color_rgb, parts, actions])) + +def make_voc_categories(label_map=None): + if label_map is None: + label_map = make_voc_label_map() + + categories = {} + + label_categories = LabelCategories() + label_categories.attributes.update(['difficult', 'truncated', 'occluded']) + + for label, desc in label_map.items(): + label_categories.add(label, attributes=desc[2]) + for part in OrderedDict((k, None) for k in chain( + *(desc[1] for desc in label_map.values()))): + label_categories.add(part) + categories[AnnotationType.label] = label_categories + + has_colors = any(v[0] is not None for v in label_map.values()) + if not has_colors: # generate new colors + colormap = generate_colormap(len(label_map)) + else: # only copy defined colors + label_id = lambda label: label_categories.find(label)[0] + colormap = { label_id(name): desc[0] + for name, desc in label_map.items() if desc[0] is not None } + mask_categories = MaskCategories(colormap) + mask_categories.inverse_colormap # pylint: disable=pointless-statement + categories[AnnotationType.mask] = mask_categories + + return categories diff --git a/datumaro/plugins/voc_format/importer.py b/datumaro/plugins/voc_format/importer.py new file mode 100644 index 0000000000..e9354e6c24 --- /dev/null +++ b/datumaro/plugins/voc_format/importer.py @@ -0,0 +1,56 @@ + +# Copyright (C) 2019-2020 Intel Corporation +# +# SPDX-License-Identifier: MIT + +from glob import glob +import os.path as osp + +from datumaro.components.extractor import Importer + +from .format import VocTask, VocPath + + +class VocImporter(Importer): + _TASKS = [ + (VocTask.classification, 'voc_classification', 'Main'), + (VocTask.detection, 'voc_detection', 'Main'), + (VocTask.segmentation, 'voc_segmentation', 'Segmentation'), + (VocTask.person_layout, 'voc_layout', 'Layout'), + (VocTask.action_classification, 'voc_action', 'Action'), + ] + + @classmethod + def detect(cls, path): + return len(cls.find_subsets(path)) != 0 + + def __call__(self, path, **extra_params): + from datumaro.components.project import Project # cyclic import + project = Project() + + subset_paths = self.find_subsets(path) + if len(subset_paths) == 0: + raise Exception("Failed to find 'voc' dataset at '%s'" % path) + + for task, extractor_type, subset_path in subset_paths: + project.add_source('%s-%s' % + (task.name, osp.splitext(osp.basename(subset_path))[0]), + { + 'url': subset_path, + 'format': extractor_type, + 'options': dict(extra_params), + }) + + return project + + @staticmethod + def find_subsets(path): + subset_paths = [] + for task, extractor_type, task_dir in __class__._TASKS: + task_dir = osp.join(path, VocPath.SUBSETS_DIR, task_dir) + if not osp.isdir(task_dir): + continue + task_subsets = [p for p in glob(osp.join(task_dir, '*.txt')) + if '_' not in osp.basename(p)] + subset_paths += [(task, extractor_type, p) for p in task_subsets] + return subset_paths diff --git a/datumaro/plugins/yolo_format/__init__.py b/datumaro/plugins/yolo_format/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/datumaro/plugins/yolo_format/converter.py b/datumaro/plugins/yolo_format/converter.py new file mode 100644 index 0000000000..9217c7747b --- /dev/null +++ b/datumaro/plugins/yolo_format/converter.py @@ -0,0 +1,108 @@ + +# Copyright (C) 2019-2020 Intel Corporation +# +# SPDX-License-Identifier: MIT + +import logging as log +import os +import os.path as osp +from collections import OrderedDict + +from datumaro.components.converter import Converter +from datumaro.components.extractor import AnnotationType + +from .format import YoloPath + + +def _make_yolo_bbox(img_size, box): + # https://github.com/pjreddie/darknet/blob/master/scripts/voc_label.py + # - values relative to width and height of image + # - are center of rectangle + x = (box[0] + box[2]) / 2 / img_size[0] + y = (box[1] + box[3]) / 2 / img_size[1] + w = (box[2] - box[0]) / img_size[0] + h = (box[3] - box[1]) / img_size[1] + return x, y, w, h + +class YoloConverter(Converter): + # https://github.com/AlexeyAB/darknet#how-to-train-to-detect-your-custom-objects + DEFAULT_IMAGE_EXT = '.jpg' + + def apply(self): + extractor = self._extractor + save_dir = self._save_dir + + os.makedirs(save_dir, exist_ok=True) + + label_categories = extractor.categories()[AnnotationType.label] + label_ids = {label.name: idx + for idx, label in enumerate(label_categories.items)} + with open(osp.join(save_dir, 'obj.names'), 'w') as f: + f.writelines('%s\n' % l[0] + for l in sorted(label_ids.items(), key=lambda x: x[1])) + + subset_lists = OrderedDict() + + for subset_name in extractor.subsets() or [None]: + if subset_name and subset_name in YoloPath.SUBSET_NAMES: + subset = extractor.get_subset(subset_name) + elif not subset_name: + subset_name = YoloPath.DEFAULT_SUBSET_NAME + subset = extractor + else: + log.warn("Skipping subset export '%s'. " + "If specified, the only valid names are %s" % \ + (subset_name, ', '.join( + "'%s'" % s for s in YoloPath.SUBSET_NAMES))) + continue + + subset_dir = osp.join(save_dir, 'obj_%s_data' % subset_name) + os.makedirs(subset_dir, exist_ok=True) + + image_paths = OrderedDict() + + for item in subset: + if not item.has_image: + raise Exception("Failed to export item '%s': " + "item has no image info" % item.id) + height, width = item.image.size + + image_name = self._make_image_filename(item) + if self._save_images: + if item.has_image and item.image.has_data: + self._save_image(item, osp.join(subset_dir, image_name)) + else: + log.warning("Item '%s' has no image" % item.id) + image_paths[item.id] = osp.join('data', + osp.basename(subset_dir), image_name) + + yolo_annotation = '' + for bbox in item.annotations: + if bbox.type is not AnnotationType.bbox: + continue + if bbox.label is None: + continue + + yolo_bb = _make_yolo_bbox((width, height), bbox.points) + yolo_bb = ' '.join('%.6f' % p for p in yolo_bb) + yolo_annotation += '%s %s\n' % (bbox.label, yolo_bb) + + annotation_path = osp.join(subset_dir, '%s.txt' % item.id) + os.makedirs(osp.dirname(annotation_path), exist_ok=True) + with open(annotation_path, 'w') as f: + f.write(yolo_annotation) + + subset_list_name = '%s.txt' % subset_name + subset_lists[subset_name] = subset_list_name + with open(osp.join(save_dir, subset_list_name), 'w') as f: + f.writelines('%s\n' % s for s in image_paths.values()) + + with open(osp.join(save_dir, 'obj.data'), 'w') as f: + f.write('classes = %s\n' % len(label_ids)) + + for subset_name, subset_list_name in subset_lists.items(): + f.write('%s = %s\n' % (subset_name, + osp.join('data', subset_list_name))) + + f.write('names = %s\n' % osp.join('data', 'obj.names')) + f.write('backup = backup/\n') diff --git a/datumaro/plugins/yolo_format/extractor.py b/datumaro/plugins/yolo_format/extractor.py new file mode 100644 index 0000000000..c8c39c420d --- /dev/null +++ b/datumaro/plugins/yolo_format/extractor.py @@ -0,0 +1,201 @@ + +# Copyright (C) 2019-2020 Intel Corporation +# +# SPDX-License-Identifier: MIT + +from collections import OrderedDict +import os.path as osp +import re + +from datumaro.components.extractor import (SourceExtractor, Extractor, + DatasetItem, AnnotationType, Bbox, LabelCategories +) +from datumaro.util import split_path +from datumaro.util.image import Image + +from .format import YoloPath + + +class YoloExtractor(SourceExtractor): + class Subset(Extractor): + def __init__(self, name, parent): + super().__init__() + self._name = name + self._parent = parent + self.items = OrderedDict() + + def __iter__(self): + for item_id in self.items: + yield self._parent._get(item_id, self._name) + + def __len__(self): + return len(self.items) + + def categories(self): + return self._parent.categories() + + def __init__(self, config_path, image_info=None): + super().__init__() + + if not osp.isfile(config_path): + raise Exception("Can't read dataset descriptor file '%s'" % + config_path) + + rootpath = osp.dirname(config_path) + self._path = rootpath + + assert image_info is None or isinstance(image_info, (str, dict)) + if image_info is None: + image_info = osp.join(rootpath, YoloPath.IMAGE_META_FILE) + if not osp.isfile(image_info): + image_info = {} + if isinstance(image_info, str): + if not osp.isfile(image_info): + raise Exception("Can't read image meta file '%s'" % image_info) + with open(image_info) as f: + image_info = {} + for line in f: + image_name, h, w = line.strip().split() + image_info[image_name] = (int(h), int(w)) + self._image_info = image_info + + with open(config_path, 'r') as f: + config_lines = f.readlines() + + subsets = OrderedDict() + names_path = None + + for line in config_lines: + match = re.match(r'(\w+)\s*=\s*(.+)$', line) + if not match: + continue + + key = match.group(1) + value = match.group(2) + if key == 'names': + names_path = value + elif key in YoloPath.SUBSET_NAMES: + subsets[key] = value + else: + continue + + if not names_path: + raise Exception("Failed to parse labels path from '%s'" % \ + config_path) + + for subset_name, list_path in subsets.items(): + list_path = osp.join(self._path, self.localize_path(list_path)) + if not osp.isfile(list_path): + raise Exception("Not found '%s' subset list file" % subset_name) + + subset = YoloExtractor.Subset(subset_name, self) + with open(list_path, 'r') as f: + subset.items = OrderedDict( + (self.name_from_path(p), self.localize_path(p)) + for p in f + ) + subsets[subset_name] = subset + + self._subsets = subsets + + self._categories = { + AnnotationType.label: + self._load_categories( + osp.join(self._path, self.localize_path(names_path))) + } + + @staticmethod + def localize_path(path): + path = path.strip() + default_base = osp.join('data', '') + if path.startswith(default_base): # default path + path = path[len(default_base) : ] + return path + + @classmethod + def name_from_path(cls, path): + path = cls.localize_path(path) + parts = split_path(path) + if 1 < len(parts) and not osp.isabs(path): + # NOTE: when path is like [data/]/ + # drop everything but + # can be , so no just basename() + path = osp.join(*parts[1:]) + return osp.splitext(path)[0] + + def _get(self, item_id, subset_name): + subset = self._subsets[subset_name] + item = subset.items[item_id] + + if isinstance(item, str): + image_size = self._image_info.get(item_id) + image = Image(path=osp.join(self._path, item), size=image_size) + + anno_path = osp.splitext(image.path)[0] + '.txt' + annotations = self._parse_annotations(anno_path, image) + + item = DatasetItem(id=item_id, subset=subset_name, + image=image, annotations=annotations) + subset.items[item_id] = item + + return item + + @staticmethod + def _parse_annotations(anno_path, image): + lines = [] + with open(anno_path, 'r') as f: + for line in f: + line = line.strip() + if line: + lines.append(line) + + annotations = [] + if lines: + size = image.size # use image info as late as possible + if size is None: + raise Exception("Can't find image info for '%s'" % image.path) + image_height, image_width = size + for line in lines: + label_id, xc, yc, w, h = line.split() + label_id = int(label_id) + w = float(w) + h = float(h) + x = float(xc) - w * 0.5 + y = float(yc) - h * 0.5 + annotations.append(Bbox( + round(x * image_width, 1), round(y * image_height, 1), + round(w * image_width, 1), round(h * image_height, 1), + label=label_id + )) + + return annotations + + @staticmethod + def _load_categories(names_path): + label_categories = LabelCategories() + + with open(names_path, 'r') as f: + for label in f: + label_categories.add(label.strip()) + + return label_categories + + def categories(self): + return self._categories + + def __iter__(self): + for subset in self._subsets.values(): + for item in subset: + yield item + + def __len__(self): + length = 0 + for subset in self._subsets.values(): + length += len(subset) + return length + + def subsets(self): + return list(self._subsets) + + def get_subset(self, name): + return self._subsets[name] \ No newline at end of file diff --git a/datumaro/plugins/yolo_format/format.py b/datumaro/plugins/yolo_format/format.py new file mode 100644 index 0000000000..02a07669bb --- /dev/null +++ b/datumaro/plugins/yolo_format/format.py @@ -0,0 +1,11 @@ + +# Copyright (C) 2019-2020 Intel Corporation +# +# SPDX-License-Identifier: MIT + + +class YoloPath: + DEFAULT_SUBSET_NAME = 'train' + SUBSET_NAMES = ['train', 'valid'] + + IMAGE_META_FILE = 'images.meta' \ No newline at end of file diff --git a/datumaro/plugins/yolo_format/importer.py b/datumaro/plugins/yolo_format/importer.py new file mode 100644 index 0000000000..a040ea4ef1 --- /dev/null +++ b/datumaro/plugins/yolo_format/importer.py @@ -0,0 +1,46 @@ + +# Copyright (C) 2019-2020 Intel Corporation +# +# SPDX-License-Identifier: MIT + +from glob import glob +import logging as log +import os.path as osp + +from datumaro.components.extractor import Importer + + +class YoloImporter(Importer): + @classmethod + def detect(cls, path): + return len(cls.find_configs(path)) != 0 + + def __call__(self, path, **extra_params): + from datumaro.components.project import Project # cyclic import + project = Project() + + config_paths = self.find_configs(path) + if len(config_paths) == 0: + raise Exception("Failed to find 'yolo' dataset at '%s'" % path) + + for config_path in config_paths: + log.info("Found a dataset at '%s'" % config_path) + + source_name = '%s_%s' % ( + osp.basename(osp.dirname(config_path)), + osp.splitext(osp.basename(config_path))[0]) + project.add_source(source_name, { + 'url': config_path, + 'format': 'yolo', + 'options': dict(extra_params), + }) + + return project + + @staticmethod + def find_configs(path): + if path.endswith('.data') and osp.isfile(path): + config_paths = [path] + else: + config_paths = glob(osp.join(path, '**', '*.data'), recursive=True) + return config_paths \ No newline at end of file diff --git a/datumaro/util/__init__.py b/datumaro/util/__init__.py new file mode 100644 index 0000000000..0a75756bd2 --- /dev/null +++ b/datumaro/util/__init__.py @@ -0,0 +1,93 @@ + +# Copyright (C) 2019-2020 Intel Corporation +# +# SPDX-License-Identifier: MIT + +import os +import os.path as osp +from itertools import islice + + +def find(iterable, pred=lambda x: True, default=None): + return next((x for x in iterable if pred(x)), default) + +def dir_items(path, ext, truncate_ext=False): + items = [] + for f in os.listdir(path): + ext_pos = f.rfind(ext) + if ext_pos != -1: + if truncate_ext: + f = f[:ext_pos] + items.append(f) + return items + +def split_path(path): + path = osp.normpath(path) + parts = [] + + while True: + path, part = osp.split(path) + if part: + parts.append(part) + else: + if path: + parts.append(path) + break + parts.reverse() + + return parts + +def cast(value, type_conv, default=None): + if value is None: + return default + try: + return type_conv(value) + except Exception: + return default + +def to_snake_case(s): + if not s: + return '' + + name = [s[0].lower()] + for idx, char in enumerate(s[1:]): + idx = idx + 1 + if char.isalpha() and char.isupper(): + prev_char = s[idx - 1] + if not (prev_char.isalpha() and prev_char.isupper()): + # avoid "HTML" -> "h_t_m_l" + name.append('_') + name.append(char.lower()) + else: + name.append(char) + return ''.join(name) + +def pairs(iterable): + a = iter(iterable) + return zip(a, a) + +def take_by(iterable, count): + """ + Returns elements from the input iterable by batches of N items. + ('abcdefg', 3) -> ['a', 'b', 'c'], ['d', 'e', 'f'], ['g'] + """ + + it = iter(iterable) + while True: + batch = list(islice(it, count)) + if len(batch) == 0: + break + + yield batch + +def str_to_bool(s): + t = s.lower() + if t in {'true', '1', 'ok', 'yes', 'y'}: + return True + elif t in {'false', '0', 'no', 'n'}: + return False + else: + raise ValueError("Can't convert value '%s' to bool" % s) + +def filter_dict(d, exclude_keys): + return { k: v for k, v in d.items() if k not in exclude_keys } \ No newline at end of file diff --git a/datumaro/util/annotation_util.py b/datumaro/util/annotation_util.py new file mode 100644 index 0000000000..3daa313f3f --- /dev/null +++ b/datumaro/util/annotation_util.py @@ -0,0 +1,212 @@ +# Copyright (C) 2020 Intel Corporation +# +# SPDX-License-Identifier: MIT + +from itertools import groupby + +import numpy as np + +from datumaro.components.extractor import _Shape, Mask, AnnotationType, RleMask +from datumaro.util.mask_tools import mask_to_rle + + +def find_instances(instance_anns): + instance_anns = sorted(instance_anns, key=lambda a: a.group) + ann_groups = [] + for g_id, group in groupby(instance_anns, lambda a: a.group): + if not g_id: + ann_groups.extend(([a] for a in group)) + else: + ann_groups.append(list(group)) + + return ann_groups + +def find_group_leader(group): + return max(group, key=lambda x: x.get_area()) + +def _get_bbox(ann): + if isinstance(ann, (_Shape, Mask)): + return ann.get_bbox() + else: + return ann + +def max_bbox(annotations): + boxes = [_get_bbox(ann) for ann in annotations] + x0 = min((b[0] for b in boxes), default=0) + y0 = min((b[1] for b in boxes), default=0) + x1 = max((b[0] + b[2] for b in boxes), default=0) + y1 = max((b[1] + b[3] for b in boxes), default=0) + return [x0, y0, x1 - x0, y1 - y0] + +def mean_bbox(annotations): + le = len(annotations) + boxes = [_get_bbox(ann) for ann in annotations] + mlb = sum(b[0] for b in boxes) / le + mtb = sum(b[1] for b in boxes) / le + mrb = sum(b[0] + b[2] for b in boxes) / le + mbb = sum(b[1] + b[3] for b in boxes) / le + return [mlb, mtb, mrb - mlb, mbb - mtb] + +def softmax(x): + return np.exp(x) / sum(np.exp(x)) + +def nms(segments, iou_thresh=0.5): + """ + Non-maxima suppression algorithm. + """ + + indices = np.argsort([b.attributes['score'] for b in segments]) + ious = np.array([[iou(a, b) for b in segments] for a in segments]) + + predictions = [] + while len(indices) != 0: + i = len(indices) - 1 + pred_idx = indices[i] + to_remove = [i] + predictions.append(segments[pred_idx]) + for i, box_idx in enumerate(indices[:i]): + if iou_thresh < ious[pred_idx, box_idx]: + to_remove.append(i) + indices = np.delete(indices, to_remove) + + return predictions + +def bbox_iou(a, b): + """ + IoU computations for simple cases with bounding boxes + """ + bbox_a = _get_bbox(a) + bbox_b = _get_bbox(b) + + aX, aY, aW, aH = bbox_a + bX, bY, bW, bH = bbox_b + in_right = min(aX + aW, bX + bW) + in_left = max(aX, bX) + in_top = max(aY, bY) + in_bottom = min(aY + aH, bY + bH) + + in_w = max(0, in_right - in_left) + in_h = max(0, in_bottom - in_top) + intersection = in_w * in_h + if not intersection: + return -1 + + a_area = aW * aH + b_area = bW * bH + union = a_area + b_area - intersection + return intersection / union + +def segment_iou(a, b): + """ + Generic IoU computation with masks, polygons, and boxes. + Returns -1 if no intersection, [0; 1] otherwise + """ + from pycocotools import mask as mask_utils + + a_bbox = a.get_bbox() + b_bbox = b.get_bbox() + + is_bbox = AnnotationType.bbox in [a.type, b.type] + if is_bbox: + a = [a_bbox] + b = [b_bbox] + else: + w = max(a_bbox[0] + a_bbox[2], b_bbox[0] + b_bbox[2]) + h = max(a_bbox[1] + a_bbox[3], b_bbox[1] + b_bbox[3]) + + def _to_rle(ann): + if ann.type == AnnotationType.polygon: + return mask_utils.frPyObjects([ann.points], h, w) + elif isinstance(ann, RleMask): + return [ann.rle] + elif ann.type == AnnotationType.mask: + return mask_utils.frPyObjects([mask_to_rle(ann.image)], h, w) + else: + raise TypeError("Unexpected arguments: %s, %s" % (a, b)) + a = _to_rle(a) + b = _to_rle(b) + return float(mask_utils.iou(a, b, [not is_bbox])) + +def PDJ(a, b, eps=None, ratio=0.05, bbox=None): + """ + Percentage of Detected Joints metric. + Counts the number of matching points. + """ + + assert eps is not None or ratio is not None + + p1 = np.array(a.points).reshape((-1, 2)) + p2 = np.array(b.points).reshape((-1, 2)) + if len(p1) != len(p2): + return 0 + + if not eps: + if bbox is None: + bbox = mean_bbox([a, b]) + + diag = (bbox[2] ** 2 + bbox[3] ** 2) ** 0.5 + eps = ratio * diag + + dists = np.linalg.norm(p1 - p2, axis=1) + return np.sum(dists < eps) / len(p1) + +def OKS(a, b, sigma=None, bbox=None, scale=None): + """ + Object Keypoint Similarity metric. + https://cocodataset.org/#keypoints-eval + """ + + p1 = np.array(a.points).reshape((-1, 2)) + p2 = np.array(b.points).reshape((-1, 2)) + if len(p1) != len(p2): + return 0 + + if not sigma: + sigma = 0.1 + else: + assert len(sigma) == len(p1) + + if not scale: + if bbox is None: + bbox = mean_bbox([a, b]) + scale = bbox[2] * bbox[3] + + dists = np.linalg.norm(p1 - p2, axis=1) + return np.sum(np.exp(-(dists ** 2) / (2 * scale * (2 * sigma) ** 2))) + +def smooth_line(points, segments): + assert 2 <= len(points) // 2 and len(points) % 2 == 0 + + if len(points) // 2 == segments: + return points + + points = list(points) + if len(points) == 2: + points.extend(points) + points = np.array(points).reshape((-1, 2)) + + lengths = np.linalg.norm(points[1:] - points[:-1], axis=1) + dists = [0] + for l in lengths: + dists.append(dists[-1] + l) + + step = dists[-1] / segments + + new_points = np.zeros((segments + 1, 2)) + new_points[0] = points[0] + + old_segment = 0 + for new_segment in range(1, segments + 1): + pos = new_segment * step + while dists[old_segment + 1] < pos and old_segment + 2 < len(dists): + old_segment += 1 + + segment_start = dists[old_segment] + segment_len = lengths[old_segment] + prev_p = points[old_segment] + next_p = points[old_segment + 1] + r = (pos - segment_start) / segment_len + + new_points[new_segment] = prev_p * (1 - r) + next_p * r + + return new_points, step diff --git a/datumaro/util/attrs_util.py b/datumaro/util/attrs_util.py new file mode 100644 index 0000000000..e631f35ad2 --- /dev/null +++ b/datumaro/util/attrs_util.py @@ -0,0 +1,33 @@ +# Copyright (C) 2020 Intel Corporation +# +# SPDX-License-Identifier: MIT + +import attr + +_NOTSET = object() + +def not_empty(inst, attribute, x): + assert len(x) != 0, x + +def default_if_none(conv): + def validator(inst, attribute, value): + default = attribute.default + if value is None: + if callable(default): + value = default() + elif isinstance(default, attr.Factory): + value = default.factory() + else: + value = default + elif not isinstance(value, attribute.type or conv): + value = conv(value) + setattr(inst, attribute.name, value) + return validator + +def ensure_cls(c): + def converter(arg): + if isinstance(arg, c): + return arg + else: + return c(**arg) + return converter \ No newline at end of file diff --git a/datumaro/util/command_targets.py b/datumaro/util/command_targets.py new file mode 100644 index 0000000000..50c854f271 --- /dev/null +++ b/datumaro/util/command_targets.py @@ -0,0 +1,113 @@ + +# Copyright (C) 2019-2020 Intel Corporation +# +# SPDX-License-Identifier: MIT + +import argparse +from enum import Enum + +from datumaro.components.project import Project +from datumaro.util.image import load_image + + +TargetKinds = Enum('TargetKinds', + ['project', 'source', 'external_dataset', 'inference', 'image']) + +def is_project_name(value, project): + return value == project.config.project_name + +def is_project_path(value): + if value: + try: + Project.load(value) + return True + except Exception: + pass + return False + +def is_project(value, project=None): + if is_project_path(value): + return True + elif project is not None: + return is_project_name(value, project) + + return False + +def is_source(value, project=None): + if project is not None: + try: + project.get_source(value) + return True + except KeyError: + pass + + return False + +def is_external_source(value): + return False + +def is_inference_path(value): + return False + +def is_image_path(value): + try: + return load_image(value) is not None + except Exception: + return False + + +class Target: + def __init__(self, kind, test, is_default=False, name=None): + self.kind = kind + self.test = test + self.is_default = is_default + self.name = name + + def _get_fields(self): + return [self.kind, self.test, self.is_default, self.name] + + def __str__(self): + return self.name or str(self.kind) + + def __len__(self): + return len(self._get_fields()) + + def __iter__(self): + return iter(self._get_fields()) + +def ProjectTarget(kind=TargetKinds.project, test=None, + is_default=False, name='project name or path', + project=None): + if test is None: + test = lambda v: is_project(v, project=project) + return Target(kind, test, is_default, name) + +def SourceTarget(kind=TargetKinds.source, test=None, + is_default=False, name='source name', + project=None): + if test is None: + test = lambda v: is_source(v, project=project) + return Target(kind, test, is_default, name) + +def ExternalDatasetTarget(kind=TargetKinds.external_dataset, + test=is_external_source, + is_default=False, name='external dataset path'): + return Target(kind, test, is_default, name) + +def InferenceTarget(kind=TargetKinds.inference, test=is_inference_path, + is_default=False, name='inference path'): + return Target(kind, test, is_default, name) + +def ImageTarget(kind=TargetKinds.image, test=is_image_path, + is_default=False, name='image path'): + return Target(kind, test, is_default, name) + + +def target_selector(*targets): + def selector(value): + for (kind, test, is_default, _) in targets: + if (is_default and (value == '' or value is None)) or test(value): + return (kind, value) + raise argparse.ArgumentTypeError('Value should be one of: %s' \ + % (', '.join([str(t) for t in targets]))) + return selector diff --git a/datumaro/util/image.py b/datumaro/util/image.py new file mode 100644 index 0000000000..626d849976 --- /dev/null +++ b/datumaro/util/image.py @@ -0,0 +1,246 @@ + +# Copyright (C) 2019-2020 Intel Corporation +# +# SPDX-License-Identifier: MIT + +# pylint: disable=unused-import + +from enum import Enum +from io import BytesIO +import numpy as np +import os +import os.path as osp + +_IMAGE_BACKENDS = Enum('_IMAGE_BACKENDS', ['cv2', 'PIL']) +_IMAGE_BACKEND = None +try: + import cv2 + _IMAGE_BACKEND = _IMAGE_BACKENDS.cv2 +except ImportError: + import PIL + _IMAGE_BACKEND = _IMAGE_BACKENDS.PIL + +from datumaro.util.image_cache import ImageCache as _ImageCache + + +def load_image(path): + """ + Reads an image in the HWC Grayscale/BGR(A) float [0; 255] format. + """ + + if _IMAGE_BACKEND == _IMAGE_BACKENDS.cv2: + import cv2 + image = cv2.imread(path, cv2.IMREAD_UNCHANGED) + image = image.astype(np.float32) + elif _IMAGE_BACKEND == _IMAGE_BACKENDS.PIL: + from PIL import Image + image = Image.open(path) + image = np.asarray(image, dtype=np.float32) + if len(image.shape) == 3 and image.shape[2] in {3, 4}: + image[:, :, :3] = image[:, :, 2::-1] # RGB to BGR + else: + raise NotImplementedError() + + assert len(image.shape) in {2, 3} + if len(image.shape) == 3: + assert image.shape[2] in {3, 4} + return image + +def save_image(path, image, create_dir=False, **kwargs): + # NOTE: Check destination path for existence + # OpenCV silently fails if target directory does not exist + dst_dir = osp.dirname(path) + if dst_dir: + if create_dir: + os.makedirs(dst_dir, exist_ok=True) + elif not osp.isdir(dst_dir): + raise FileNotFoundError("Directory does not exist: '%s'" % dst_dir) + + if not kwargs: + kwargs = {} + + if _IMAGE_BACKEND == _IMAGE_BACKENDS.cv2: + import cv2 + + params = [] + + ext = path[-4:] + if ext.upper() == '.JPG': + params = [ + int(cv2.IMWRITE_JPEG_QUALITY), kwargs.get('jpeg_quality', 75) + ] + + image = image.astype(np.uint8) + cv2.imwrite(path, image, params=params) + elif _IMAGE_BACKEND == _IMAGE_BACKENDS.PIL: + from PIL import Image + + params = {} + params['quality'] = kwargs.get('jpeg_quality') + if kwargs.get('jpeg_quality') == 100: + params['subsampling'] = 0 + + image = image.astype(np.uint8) + if len(image.shape) == 3 and image.shape[2] in {3, 4}: + image[:, :, :3] = image[:, :, 2::-1] # BGR to RGB + image = Image.fromarray(image) + image.save(path, **params) + else: + raise NotImplementedError() + +def encode_image(image, ext, **kwargs): + if not kwargs: + kwargs = {} + + if _IMAGE_BACKEND == _IMAGE_BACKENDS.cv2: + import cv2 + + params = [] + + if not ext.startswith('.'): + ext = '.' + ext + + if ext.upper() == '.JPG': + params = [ + int(cv2.IMWRITE_JPEG_QUALITY), kwargs.get('jpeg_quality', 75) + ] + + image = image.astype(np.uint8) + success, result = cv2.imencode(ext, image, params=params) + if not success: + raise Exception("Failed to encode image to '%s' format" % (ext)) + return result.tobytes() + elif _IMAGE_BACKEND == _IMAGE_BACKENDS.PIL: + from PIL import Image + + if ext.startswith('.'): + ext = ext[1:] + + params = {} + params['quality'] = kwargs.get('jpeg_quality') + if kwargs.get('jpeg_quality') == 100: + params['subsampling'] = 0 + + image = image.astype(np.uint8) + if len(image.shape) == 3 and image.shape[2] in {3, 4}: + image[:, :, :3] = image[:, :, 2::-1] # BGR to RGB + image = Image.fromarray(image) + with BytesIO() as buffer: + image.save(buffer, format=ext, **params) + return buffer.getvalue() + else: + raise NotImplementedError() + +def decode_image(image_bytes): + if _IMAGE_BACKEND == _IMAGE_BACKENDS.cv2: + import cv2 + image = np.frombuffer(image_bytes, dtype=np.uint8) + image = cv2.imdecode(image, cv2.IMREAD_UNCHANGED) + image = image.astype(np.float32) + elif _IMAGE_BACKEND == _IMAGE_BACKENDS.PIL: + from PIL import Image + image = Image.open(BytesIO(image_bytes)) + image = np.asarray(image, dtype=np.float32) + if len(image.shape) == 3 and image.shape[2] in {3, 4}: + image[:, :, :3] = image[:, :, 2::-1] # RGB to BGR + else: + raise NotImplementedError() + + assert len(image.shape) in {2, 3} + if len(image.shape) == 3: + assert image.shape[2] in {3, 4} + return image + + +class lazy_image: + def __init__(self, path, loader=None, cache=None): + if loader is None: + loader = load_image + self.path = path + self.loader = loader + + # Cache: + # - False: do not cache + # - None: use the global cache + # - object: an object to be used as cache + assert cache in {None, False} or isinstance(cache, object) + self.cache = cache + + def __call__(self): + image = None + image_id = hash(self) # path is not necessary hashable or a file path + + cache = self._get_cache(self.cache) + if cache is not None: + image = cache.get(image_id) + + if image is None: + image = self.loader(self.path) + if cache is not None: + cache.push(image_id, image) + return image + + @staticmethod + def _get_cache(cache): + if cache is None: + cache = _ImageCache.get_instance() + elif cache == False: + return None + return cache + + def __hash__(self): + return hash((id(self), self.path, self.loader)) + +class Image: + def __init__(self, data=None, path=None, loader=None, cache=None, + size=None): + assert size is None or len(size) == 2 + if size is not None: + assert len(size) == 2 and 0 < size[0] and 0 < size[1], size + size = tuple(size) + self._size = size # (H, W) + + assert path is None or isinstance(path, str) + if path is None: + path = '' + self._path = path + + assert data is not None or path or loader, "Image can not be empty" + if data is None and (path or loader): + if osp.isfile(path) or loader: + data = lazy_image(path, loader=loader, cache=cache) + self._data = data + + @property + def path(self): + return self._path + + @property + def data(self): + if callable(self._data): + return self._data() + return self._data + + @property + def has_data(self): + return self._data is not None + + @property + def size(self): + if self._size is None: + data = self.data + if data is not None: + self._size = data.shape[:2] + return self._size + + def __eq__(self, other): + if isinstance(other, np.ndarray): + return self.has_data and np.array_equal(self.data, other) + + if not isinstance(other, __class__): + return False + return \ + (np.array_equal(self.size, other.size)) and \ + (self.has_data == other.has_data) and \ + (self.has_data and np.array_equal(self.data, other.data) or \ + not self.has_data) \ No newline at end of file diff --git a/datumaro/util/image_cache.py b/datumaro/util/image_cache.py new file mode 100644 index 0000000000..08f0258289 --- /dev/null +++ b/datumaro/util/image_cache.py @@ -0,0 +1,42 @@ +# Copyright (C) 2019-2020 Intel Corporation +# +# SPDX-License-Identifier: MIT + +from collections import OrderedDict + + +_instance = None + +DEFAULT_CAPACITY = 2 + +class ImageCache: + @staticmethod + def get_instance(): + global _instance + if _instance is None: + _instance = ImageCache() + return _instance + + def __init__(self, capacity=DEFAULT_CAPACITY): + self.capacity = int(capacity) + self.items = OrderedDict() + + def push(self, item_id, image): + if self.capacity <= len(self.items): + self.items.popitem(last=True) + self.items[item_id] = image + + def get(self, item_id): + default = object() + item = self.items.get(item_id, default) + if item is default: + return None + + self.items.move_to_end(item_id, last=False) # naive splay tree + return item + + def size(self): + return len(self.items) + + def clear(self): + self.items.clear() \ No newline at end of file diff --git a/datumaro/util/log_utils.py b/datumaro/util/log_utils.py new file mode 100644 index 0000000000..6c8d8421e7 --- /dev/null +++ b/datumaro/util/log_utils.py @@ -0,0 +1,16 @@ + +# Copyright (C) 2020 Intel Corporation +# +# SPDX-License-Identifier: MIT + +from contextlib import contextmanager +import logging + +@contextmanager +def logging_disabled(max_level=logging.CRITICAL): + previous_level = logging.root.manager.disable + logging.disable(max_level) + try: + yield + finally: + logging.disable(previous_level) \ No newline at end of file diff --git a/datumaro/util/mask_tools.py b/datumaro/util/mask_tools.py new file mode 100644 index 0000000000..95c8633a23 --- /dev/null +++ b/datumaro/util/mask_tools.py @@ -0,0 +1,289 @@ + +# Copyright (C) 2019-2020 Intel Corporation +# +# SPDX-License-Identifier: MIT + +import numpy as np + +from datumaro.util.image import lazy_image, load_image + + +def generate_colormap(length=256): + """ + Generates colors using PASCAL VOC algorithm. + + Returns index -> (R, G, B) mapping. + """ + + def get_bit(number, index): + return (number >> index) & 1 + + colormap = np.zeros((length, 3), dtype=int) + indices = np.arange(length, dtype=int) + + for j in range(7, -1, -1): + for c in range(3): + colormap[:, c] |= get_bit(indices, c) << j + indices >>= 3 + + return { + id: tuple(color) for id, color in enumerate(colormap) + } + +def invert_colormap(colormap): + return { + tuple(a): index for index, a in colormap.items() + } + +def check_is_mask(mask): + assert len(mask.shape) in {2, 3} + if len(mask.shape) == 3: + assert mask.shape[2] == 1 + +_default_colormap = generate_colormap() +_default_unpaint_colormap = invert_colormap(_default_colormap) + +def unpaint_mask(painted_mask, inverse_colormap=None): + # Covert color mask to index mask + + # mask: HWC BGR [0; 255] + # colormap: (R, G, B) -> index + assert len(painted_mask.shape) == 3 + if inverse_colormap is None: + inverse_colormap = _default_unpaint_colormap + + if callable(inverse_colormap): + map_fn = lambda a: inverse_colormap( + (a >> 16) & 255, (a >> 8) & 255, a & 255 + ) + else: + map_fn = lambda a: inverse_colormap[( + (a >> 16) & 255, (a >> 8) & 255, a & 255 + )] + + painted_mask = painted_mask.astype(int) + painted_mask = painted_mask[:, :, 0] + \ + (painted_mask[:, :, 1] << 8) + \ + (painted_mask[:, :, 2] << 16) + uvals, unpainted_mask = np.unique(painted_mask, return_inverse=True) + palette = np.array([map_fn(v) for v in uvals], dtype=np.float32) + unpainted_mask = palette[unpainted_mask].reshape(painted_mask.shape[:2]) + + return unpainted_mask + +def paint_mask(mask, colormap=None): + # Applies colormap to index mask + + # mask: HW(C) [0; max_index] mask + # colormap: index -> (R, G, B) + check_is_mask(mask) + + if colormap is None: + colormap = _default_colormap + if callable(colormap): + map_fn = colormap + else: + map_fn = lambda c: colormap.get(c, (-1, -1, -1)) + palette = np.array([map_fn(c)[::-1] for c in range(256)], dtype=np.float32) + + mask = mask.astype(np.uint8) + painted_mask = palette[mask].reshape((*mask.shape[:2], 3)) + return painted_mask + +def remap_mask(mask, map_fn): + # Changes mask elements from one colormap to another + + # mask: HW(C) [0; max_index] mask + check_is_mask(mask) + + return np.array([map_fn(c) for c in range(256)], dtype=np.uint8)[mask] + +def make_index_mask(binary_mask, index): + return np.choose(binary_mask, np.array([0, index], dtype=np.uint8)) + +def make_binary_mask(mask): + return np.nonzero(mask) + + +def load_mask(path, inverse_colormap=None): + mask = load_image(path) + mask = mask.astype(np.uint8) + if inverse_colormap is not None: + if len(mask.shape) == 3 and mask.shape[2] != 1: + mask = unpaint_mask(mask, inverse_colormap) + return mask + +def lazy_mask(path, inverse_colormap=None): + return lazy_image(path, lambda path: load_mask(path, inverse_colormap)) + +def mask_to_rle(binary_mask): + # walk in row-major order as COCO format specifies + bounded = binary_mask.ravel(order='F') + + # add borders to sequence + # find boundary positions for sequences and compute their lengths + difs = np.diff(bounded, prepend=[1 - bounded[0]], append=[1 - bounded[-1]]) + counts, = np.where(difs != 0) + + # start RLE encoding from 0 as COCO format specifies + if bounded[0] != 0: + counts = np.diff(counts, prepend=[0]) + else: + counts = np.diff(counts) + + return { + 'counts': counts, + 'size': list(binary_mask.shape) + } + +def mask_to_polygons(mask, tolerance=1.0, area_threshold=1): + """ + Convert an instance mask to polygons + + Args: + mask: a 2d binary mask + tolerance: maximum distance from original points of + a polygon to the approximated ones + area_threshold: minimal area of generated polygons + + Returns: + A list of polygons like [[x1,y1, x2,y2 ...], [...]] + """ + from pycocotools import mask as mask_utils + from skimage import measure + + polygons = [] + + # pad mask with 0 around borders + padded_mask = np.pad(mask, pad_width=1, mode='constant', constant_values=0) + contours = measure.find_contours(padded_mask, 0.5) + # Fix coordinates after padding + contours = np.subtract(contours, 1) + + for contour in contours: + if not np.array_equal(contour[0], contour[-1]): + contour = np.vstack((contour, contour[0])) # make polygon closed + + contour = measure.approximate_polygon(contour, tolerance) + if len(contour) <= 2: + continue + + contour = np.flip(contour, axis=1).flatten().clip(0) # [x0, y0, ...] + + # Check if the polygon is big enough + rle = mask_utils.frPyObjects([contour], mask.shape[0], mask.shape[1]) + area = sum(mask_utils.area(rle)) + if area_threshold <= area: + polygons.append(contour) + return polygons + +def crop_covered_segments(segments, width, height, + iou_threshold=0.0, ratio_tolerance=0.001, area_threshold=1, + return_masks=False): + """ + Find all segments occluded by others and crop them to the visible part only. + Input segments are expected to be sorted from background to foreground. + + Args: + segments: 1d list of segment RLEs (in COCO format) + width: width of the image + height: height of the image + iou_threshold: IoU threshold for objects to be counted as intersected + By default is set to 0 to process any intersected objects + ratio_tolerance: an IoU "handicap" value for a situation + when an object is (almost) fully covered by another one and we + don't want make a "hole" in the background object + area_threshold: minimal area of included segments + + Returns: + A list of input segments' parts (in the same order as input): + [ + [[x1,y1, x2,y2 ...], ...], # input segment #0 parts + mask1, # input segment #1 mask (if source segment is mask) + [], # when source segment is too small + ... + ] + """ + from pycocotools import mask as mask_utils + + segments = [[s] for s in segments] + input_rles = [mask_utils.frPyObjects(s, height, width) for s in segments] + + for i, rle_bottom in enumerate(input_rles): + area_bottom = sum(mask_utils.area(rle_bottom)) + if area_bottom < area_threshold: + segments[i] = [] if not return_masks else None + continue + + rles_top = [] + for j in range(i + 1, len(input_rles)): + rle_top = input_rles[j] + iou = sum(mask_utils.iou(rle_bottom, rle_top, [0, 0]))[0] + + if iou <= iou_threshold: + continue + + area_top = sum(mask_utils.area(rle_top)) + area_ratio = area_top / area_bottom + + # If a segment is fully inside another one, skip this segment + if abs(area_ratio - iou) < ratio_tolerance: + continue + + # Check if the bottom segment is fully covered by the top one. + # There is a mistake in the annotation, keep the background one + if abs(1 / area_ratio - iou) < ratio_tolerance: + rles_top = [] + break + + rles_top += rle_top + + if not rles_top and not isinstance(segments[i][0], dict) \ + and not return_masks: + continue + + rle_bottom = rle_bottom[0] + bottom_mask = mask_utils.decode(rle_bottom).astype(np.uint8) + + if rles_top: + rle_top = mask_utils.merge(rles_top) + top_mask = mask_utils.decode(rle_top).astype(np.uint8) + + bottom_mask -= top_mask + bottom_mask[bottom_mask != 1] = 0 + + if not return_masks and not isinstance(segments[i][0], dict): + segments[i] = mask_to_polygons(bottom_mask, + area_threshold=area_threshold) + else: + segments[i] = bottom_mask + + return segments + +def rles_to_mask(rles, width, height): + from pycocotools import mask as mask_utils + + rles = mask_utils.frPyObjects(rles, height, width) + rles = mask_utils.merge(rles) + mask = mask_utils.decode(rles) + return mask + +def find_mask_bbox(mask): + cols = np.any(mask, axis=0) + rows = np.any(mask, axis=1) + x0, x1 = np.where(cols)[0][[0, -1]] + y0, y1 = np.where(rows)[0][[0, -1]] + return [x0, y0, x1 - x0, y1 - y0] + +def merge_masks(masks): + """ + Merges masks into one, mask order is responsible for z order. + """ + if not masks: + return None + + merged_mask = masks[0] + for m in masks[1:]: + merged_mask = np.where(m != 0, m, merged_mask) + + return merged_mask \ No newline at end of file diff --git a/datumaro/util/os_util.py b/datumaro/util/os_util.py new file mode 100644 index 0000000000..b4d05e376d --- /dev/null +++ b/datumaro/util/os_util.py @@ -0,0 +1,17 @@ + +# Copyright (C) 2020 Intel Corporation +# +# SPDX-License-Identifier: MIT + +import subprocess + + +def check_instruction_set(instruction): + return instruction == str.strip( + # Let's ignore a warning from bandit about using shell=True. + # In this case it isn't a security issue and we use some + # shell features like pipes. + subprocess.check_output( + 'lscpu | grep -o "%s" | head -1' % instruction, + shell=True).decode('utf-8') # nosec + ) \ No newline at end of file diff --git a/datumaro/util/test_utils.py b/datumaro/util/test_utils.py new file mode 100644 index 0000000000..db2767dbc1 --- /dev/null +++ b/datumaro/util/test_utils.py @@ -0,0 +1,121 @@ + +# Copyright (C) 2019-2020 Intel Corporation +# +# SPDX-License-Identifier: MIT + +import inspect +import os +import os.path as osp +import shutil +import tempfile + +from datumaro.components.extractor import AnnotationType +from datumaro.util import find + + +def current_function_name(depth=1): + return inspect.getouterframes(inspect.currentframe())[depth].function + +class FileRemover: + def __init__(self, path, is_dir=False, ignore_errors=False): + self.path = path + self.is_dir = is_dir + self.ignore_errors = ignore_errors + + def __enter__(self): + return self.path + + # pylint: disable=redefined-builtin + def __exit__(self, type=None, value=None, traceback=None): + if self.is_dir: + shutil.rmtree(self.path, ignore_errors=self.ignore_errors) + else: + os.remove(self.path) + # pylint: enable=redefined-builtin + +class TestDir(FileRemover): + def __init__(self, path=None, ignore_errors=False): + if path is None: + path = osp.abspath('temp_%s-' % current_function_name(2)) + path = tempfile.mkdtemp(dir=os.getcwd(), prefix=path) + else: + os.makedirs(path, exist_ok=ignore_errors) + + super().__init__(path, is_dir=True, ignore_errors=ignore_errors) + +def compare_categories(test, expected, actual): + test.assertEqual( + sorted(expected, key=lambda t: t.value), + sorted(actual, key=lambda t: t.value) + ) + + if AnnotationType.label in expected: + test.assertEqual( + expected[AnnotationType.label].items, + actual[AnnotationType.label].items, + ) + if AnnotationType.mask in expected: + test.assertEqual( + expected[AnnotationType.mask].colormap, + actual[AnnotationType.mask].colormap, + ) + if AnnotationType.points in expected: + test.assertEqual( + expected[AnnotationType.points].items, + actual[AnnotationType.points].items, + ) + +def _compare_annotations(expected, actual, ignored_attrs=None): + if not ignored_attrs: + return expected == actual + + a_attr = expected.attributes + b_attr = actual.attributes + + expected.attributes = {k:v for k,v in a_attr.items() if k not in ignored_attrs} + actual.attributes = {k:v for k,v in b_attr.items() if k not in ignored_attrs} + r = expected == actual + + expected.attributes = a_attr + actual.attributes = b_attr + return r + +def compare_datasets(test, expected, actual, ignored_attrs=None): + compare_categories(test, expected.categories(), actual.categories()) + + test.assertEqual(sorted(expected.subsets()), sorted(actual.subsets())) + test.assertEqual(len(expected), len(actual)) + for item_a in expected: + item_b = find(actual, lambda x: x.id == item_a.id and \ + x.subset == item_a.subset) + test.assertFalse(item_b is None, item_a.id) + test.assertEqual(item_a.attributes, item_b.attributes) + test.assertEqual(len(item_a.annotations), len(item_b.annotations)) + for ann_a in item_a.annotations: + # We might find few corresponding items, so check them all + ann_b_matches = [x for x in item_b.annotations + if x.type == ann_a.type] + test.assertFalse(len(ann_b_matches) == 0, 'ann id: %s' % ann_a.id) + + ann_b = find(ann_b_matches, lambda x: + _compare_annotations(x, ann_a, ignored_attrs=ignored_attrs)) + if ann_b is None: + test.fail('ann %s, candidates %s' % (ann_a, ann_b_matches)) + item_b.annotations.remove(ann_b) # avoid repeats + +def compare_datasets_strict(test, expected, actual): + # Compares datasets for strong equality + + test.assertEqual(expected.categories(), actual.categories()) + + test.assertListEqual(sorted(expected.subsets()), sorted(actual.subsets())) + test.assertEqual(len(expected), len(actual)) + + for subset_name in expected.subsets(): + e_subset = expected.get_subset(subset_name) + a_subset = actual.get_subset(subset_name) + test.assertEqual(len(e_subset), len(a_subset)) + for idx, (item_a, item_b) in enumerate(zip(e_subset, a_subset)): + test.assertEqual(item_a, item_b, + '%s:\n%s\nvs.\n%s\n' % \ + (idx, item_a, item_b)) \ No newline at end of file diff --git a/datumaro/util/tf_util.py b/datumaro/util/tf_util.py new file mode 100644 index 0000000000..9eda97bab9 --- /dev/null +++ b/datumaro/util/tf_util.py @@ -0,0 +1,80 @@ + +# Copyright (C) 2019-2020 Intel Corporation +# +# SPDX-License-Identifier: MIT + + +def check_import(): + # Workaround for checking import availability: + # Official TF builds include AVX instructions. Once we try to import, + # the program crashes. We raise an exception instead. + + import subprocess + import sys + + from .os_util import check_instruction_set + + result = subprocess.run([sys.executable, '-c', 'import tensorflow'], + timeout=60, + universal_newlines=True, # use text mode for output stream + stdout=subprocess.PIPE, stderr=subprocess.PIPE) # capture output + + if result.returncode != 0: + message = result.stderr + if not message: + message = "Can't import tensorflow. " \ + "Test process exit code: %s." % result.returncode + if not check_instruction_set('avx'): + # The process has probably crashed for AVX unavalability + message += " This is likely because your CPU does not " \ + "support AVX instructions, " \ + "which are required for tensorflow." + + raise ImportError(message) + +def import_tf(check=True): + import sys + + not_found = object() + tf = sys.modules.get('tensorflow', not_found) + if tf is None: + import tensorflow as tf # emit default error + elif tf is not not_found: + return tf + + # Reduce output noise, https://stackoverflow.com/questions/38073432/how-to-suppress-verbose-tensorflow-logging + import os + os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' + + if check: + try: + check_import() + except Exception: + sys.modules['tensorflow'] = None # prevent further import + raise + + import tensorflow as tf + + try: + tf.get_logger().setLevel('WARNING') + except AttributeError: + pass + try: + tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.WARN) + except AttributeError: + pass + + # Enable eager execution in early versions to unlock dataset operations + eager_enabled = False + try: + tf.compat.v1.enable_eager_execution() + eager_enabled = True + except AttributeError: + pass + try: + if not eager_enabled: + tf.enable_eager_execution() + except AttributeError: + pass + + return tf diff --git a/datumaro/version.py b/datumaro/version.py new file mode 100644 index 0000000000..8589c06387 --- /dev/null +++ b/datumaro/version.py @@ -0,0 +1 @@ +VERSION = '0.1.0' \ No newline at end of file diff --git a/docs/cli_design.mm b/docs/cli_design.mm new file mode 100644 index 0000000000..0ff17cb299 --- /dev/null +++ b/docs/cli_design.mm @@ -0,0 +1,65 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/docs/design.md b/docs/design.md new file mode 100644 index 0000000000..528b2adf75 --- /dev/null +++ b/docs/design.md @@ -0,0 +1,185 @@ +# Datumaro + + + +## Table of contents + +- [Concept](#concept) +- [RC 1 vision](#rc-1-vision) + +## Concept + +Datumaro is: +- a tool to build composite datasets and iterate over them +- a tool to create and maintain datasets + - Version control of annotations and images + - Publication (with removal of sensitive information) + - Editing + - Joining and splitting + - Exporting, format changing + - Image preprocessing +- a dataset storage +- a tool to debug datasets + - A network can be used to generate + informative data subsets (e.g. with false-positives) + to be analyzed further + +### Requirements + +- User interfaces + - a library + - a console tool with visualization means +- Targets: single datasets, composite datasets, single images / videos +- Built-in support for well-known annotation formats and datasets: + CVAT, COCO, PASCAL VOC, Cityscapes, ImageNet +- Extensibility with user-provided components +- Lightweightness - it should be easy to start working with Datumaro + - Minimal dependency on environment and configuration + - It should be easier to use Datumaro than writing own code + for computation of statistics or dataset manipulations + +### Functionality and ideas + +- Blur sensitive areas on dataset images +- Dataset annotation filters, relabelling etc. +- Dataset augmentation +- Calculation of statistics: + - Mean & std, custom stats +- "Edit" command to modify annotations +- Versioning (for images, annotations, subsets, sources etc., comparison) +- Documentation generation +- Provision of iterators for user code +- Dataset downloading +- Dataset generation +- Dataset building (export in a specific format, indexation, statistics, documentation) +- Dataset exporting to other formats +- Dataset debugging (run inference, generate dataset slices, compute statistics) +- "Explainable AI" - highlight network attention areas ([paper](https://arxiv.org/abs/1901.04592)) + - Black-box approach + - Classification, Detection, Segmentation, Captioning + - White-box approach + +### Research topics + +- exploration of network prediction uncertainty (aka Bayessian approach) + Use case: explanation of network "quality", "stability", "certainty" +- adversarial attacks on networks +- dataset minification / reduction + Use case: removal of redundant information to reach the same network quality with lesser training time +- dataset expansion and filtration of additions + Use case: add only important data +- guidance for key frame selection for tracking ([paper](https://arxiv.org/abs/1903.11779)) + Use case: more effective annotation, better predictions + +## RC 1 vision + +In the first version Datumaro should be a project manager for CVAT. +It should only consume data from CVAT. The collected dataset +can be downloaded by user to be operated on with Datumaro CLI. + + +``` + User + | + v + +------------------+ + | CVAT | + +--------v---------+ +------------------+ +--------------+ + | Datumaro module | ----> | Datumaro project | <---> | Datumaro CLI | <--- User + +------------------+ +------------------+ +--------------+ +``` + + +### Interfaces + +- [x] Python API for user code + - [x] Installation as a package +- [x] A command-line tool for dataset manipulations + +### Features + +- Dataset format support (reading, writing) + - [x] Own format + - [x] CVAT + - [x] COCO + - [x] PASCAL VOC + - [x] YOLO + - [x] TF Detection API + - [ ] Cityscapes + - [ ] ImageNet + +- Dataset visualization (`show`) + - [ ] Ability to visualize a dataset + - [ ] with TensorBoard + +- Calculation of statistics for datasets + - [x] Pixel mean, std + - [x] Object counts (detection scenario) + - [x] Image-Class distribution (classification scenario) + - [x] Pixel-Class distribution (segmentation scenario) + - [ ] Image similarity clusters + - [ ] Custom statistics + +- Dataset building + - [x] Composite dataset building + - [x] Class remapping + - [x] Subset splitting + - [x] Dataset filtering (`extract`) + - [x] Dataset merging (`merge`) + - [ ] Dataset item editing (`edit`) + +- Dataset comparison (`diff`) + - [x] Annotation-annotation comparison + - [x] Annotation-inference comparison + - [x] Annotation quality estimation (for CVAT) + - Provide a simple method to check + annotation quality with a model and generate summary + +- Dataset and model debugging + - [x] Inference explanation (`explain`) + - [x] Black-box approach ([RISE paper](https://arxiv.org/abs/1806.07421)) + - [x] Ability to run a model on a dataset and read the results + +- CVAT-integration features + - [x] Task export + - [x] Datumaro project export + - [x] Dataset export + - [x] Original raw data (images, a video file) can be downloaded (exported) + together with annotations or just have links + on CVAT server (in future, support S3, etc) + - [x] Be able to use local files instead of remote links + - [ ] Specify cache directory + - [x] Use case "annotate for model training" + - create a task + - annotate + - export the task + - convert to a training format + - train a DL model + - [x] Use case "annotate - reannotate problematic images - merge" + - [x] Use case "annotate and estimate quality" + - create a task + - annotate + - estimate quality of annotations + +### Optional features + +- Dataset publishing + - [ ] Versioning (for annotations, subsets, sources, etc.) + - [ ] Blur sensitive areas on images + - [ ] Tracking of legal information + - [ ] Documentation generation + +- Dataset building + - [ ] Dataset minification / Extraction of the most representative subset + - Use case: generate low-precision calibration dataset + +- Dataset and model debugging + - [ ] Training visualization + - [ ] Inference explanation (`explain`) + - [ ] White-box approach + +### Properties + +- Lightweightness +- Modularity +- Extensibility diff --git a/docs/developer_guide.md b/docs/developer_guide.md new file mode 100644 index 0000000000..e2fd101d15 --- /dev/null +++ b/docs/developer_guide.md @@ -0,0 +1,200 @@ +## Basics + +The center part of the library is the `Dataset` class, which allows to iterate +over its elements. `DatasetItem`, an element of a dataset, represents a single +dataset entry with annotations - an image, video sequence, audio track etc. +It can contain only annotated data or meta information, only annotations, or +all of this. + +Basic library usage and data flow: + +```lang-none +Extractors -> Dataset -> Converter + | + Filtration + Transformations + Statistics + Merging + Inference + Quality Checking + Comparison + ... +``` + +1. Data is read (or produced) by one or many `Extractor`s and merged + into a `Dataset` +1. A dataset is processed in some way +1. A dataset is saved with a `Converter` + +Datumaro has a number of dataset and annotation features: +- iteration over dataset elements +- filtering of datasets and annotations by a custom criteria +- working with subsets (e.g. `train`, `val`, `test`) +- computing of dataset statistics +- comparison and merging of datasets +- various annotation operations + +```python +from datumaro.components.project import Environment + +# Import and save a dataset +env = Environment() +dataset = env.make_importer('voc')('src/dir').make_dataset() +env.converters.get('coco').convert(dataset, save_dir='dst/dir') +``` + +## Library contents + +### Dataset Formats + +Dataset reading is supported by `Extractor`s and `Importer`s: +- An `Extractor` produces a list of `DatasetItem`s corresponding +to the dataset. +- An `Importer` creates a project from the data source location. + +It is possible to add custom Extractors and Importers. To do this, you need +to put an `Extractor` and `Importer` implementations to a plugin directory. + +Dataset writing is supported by `Converter`s. +A Converter produces a dataset of a specific format from dataset items. +It is possible to add custom `Converter`s. To do this, you need to put a +Converter implementation script to a plugin directory. + +### Dataset Conversions ("Transforms") + +A `Transform` is a function for altering a dataset and producing a new one. +It can update dataset items, annotations, classes, and other properties. +A list of available transforms for dataset conversions can be extended by +adding a `Transform` implementation script into a plugin directory. + +### Model launchers + +A list of available launchers for model execution can be extended by +adding a `Launcher` implementation script into a plugin directory. + +## Plugins + +Datumaro comes with a number of built-in formats and other tools, +but it also can be extended by plugins. Plugins are optional components, +which dependencies are not installed by default. +In Datumaro there are several types of plugins, which include: +- `extractor` - produces dataset items from data source +- `importer` - recognizes dataset type and creates project +- `converter` - exports dataset to a specific format +- `transformation` - modifies dataset items or other properties +- `launcher` - executes models + +A plugin is a regular Python module. It must be present in a plugin directory: +- `/.datumaro/plugins` for project-specific plugins +- `/plugins` for global plugins + +A plugin can be used either via the `Environment` class instance, +or by regular module importing: + +```python +from datumaro.components.project import Environment, Project +from datumaro.plugins.yolo_format.converter import YoloConverter + +# Import a dataset +dataset = Environment().make_importer('voc')(src_dir).make_dataset() + +# Load an existing project, save the dataset in some project-specific format +project = Project.load('project/dir') +project.env.converters.get('custom_format').convert(dataset, save_dir=dst_dir) + +# Save the dataset in some built-in format +Environment().converters.get('yolo').convert(dataset, save_dir=dst_dir) +YoloConverter.convert(dataset, save_dir=dst_dir) +``` + +### Writing a plugin + +A plugin is a Python module with any name, which exports some symbols. +To export a symbol, inherit it from one of special classes: + +```python +from datumaro.components.extractor import Importer, SourceExtractor, Transform +from datumaro.components.launcher import Launcher +from datumaro.components.converter import Converter +``` + +The `exports` list of the module can be used to override default behaviour: +```python +class MyComponent1: ... +class MyComponent2: ... +exports = [MyComponent2] # exports only MyComponent2 +``` + +There is also an additional class to modify plugin appearance in command line: + +```python +from datumaro.components.cli_plugin import CliPlugin +``` + +#### Plugin example + + + +``` +datumaro/plugins/ +- my_plugin1/file1.py +- my_plugin1/file2.py +- my_plugin2.py +``` + + + +`my_plugin1/file2.py` contents: + +```python +from datumaro.components.extractor import Transform, CliPlugin +from .file1 import something, useful + +class MyTransform(Transform, CliPlugin): + NAME = "custom_name" # could be generated automatically + + """ + Some description. The text will be displayed in the command line output. + """ + + @classmethod + def build_cmdline_parser(cls, **kwargs): + parser = super().build_cmdline_parser(**kwargs) + parser.add_argument('-q', help="Very useful parameter") + return parser + + def __init__(self, extractor, q): + super().__init__(extractor) + self.q = q + + def transform_item(self, item): + return item +``` + +`my_plugin2.py` contents: + +```python +from datumaro.components.extractor import SourceExtractor + +class MyFormat: ... +class MyFormatExtractor(SourceExtractor): ... + +exports = [MyFormat] # explicit exports declaration +# MyFormatExtractor won't be exported +``` + +## Command-line + +Basically, the interface is divided on contexts and single commands. +Contexts are semantically grouped commands, related to a single topic or target. +Single commands are handy shorter alternatives for the most used commands +and also special commands, which are hard to be put into any specific context. +[Docker](https://www.docker.com/) is an example of similar approach. + +![cli-design-image](images/cli_design.png) + +- The diagram above was created with [FreeMind](http://freemind.sourceforge.net/wiki/index.php/Main_Page) + +Model-View-ViewModel (MVVM) UI pattern is used. + +![mvvm-image](images/mvvm.png) diff --git a/docs/images/cli_design.png b/docs/images/cli_design.png new file mode 100644 index 0000000000..f83b1430ec Binary files /dev/null and b/docs/images/cli_design.png differ diff --git a/docs/images/mvvm.png b/docs/images/mvvm.png new file mode 100644 index 0000000000..88257123ac Binary files /dev/null and b/docs/images/mvvm.png differ diff --git a/docs/user_manual.md b/docs/user_manual.md new file mode 100644 index 0000000000..9e68f8f931 --- /dev/null +++ b/docs/user_manual.md @@ -0,0 +1,1003 @@ +# User manual + +## Contents + +- [Installation](#installation) +- [Interfaces](#interfaces) +- [Supported dataset formats and annotations](#supported-formats) +- [Command line workflow](#command-line-workflow) + - [Project structure](#project-structure) +- [Command reference](#command-reference) + - [Convert datasets](#convert-datasets) + - [Create project](#create-project) + - [Add and remove data](#add-and-remove-data) + - [Import project](#import-project) + - [Filter project](#filter-project) + - [Update project (merge)](#update-project) + - [Merge projects](#merge-projects) + - [Export project](#export-project) + - [Compare projects](#compare-projects) + - [Obtaining project info](#get-project-info) + - [Obtaining project statistics](#get-project-statistics) + - [Register model](#register-model) + - [Run inference](#run-inference) + - [Run inference explanation](#explain-inference) + - [Transform project](#transform-project) +- [Extending](#extending) +- [Links](#links) + +## Installation + +### Prerequisites + +- Python (3.5+) +- OpenVINO (optional) + +### Installation steps + +Optionally, set up a virtual environment: + +``` bash +python -m pip install virtualenv +python -m virtualenv venv +. venv/bin/activate +``` + +Install: +``` bash +pip install 'git+https://github.com/opencv/cvat#egg=datumaro&subdirectory=datumaro' +``` + +> You can change the installation branch with `.../cvat@#egg...` +> Also note `--force-reinstall` parameter in this case. + +## Interfaces + +As a standalone tool: + +``` bash +datum --help +``` + +As a python module: +> The directory containing Datumaro should be in the `PYTHONPATH` +> environment variable or `cvat/datumaro/` should be the current directory. + +``` bash +python -m datumaro --help +python datumaro/ --help +python datum.py --help +``` + +As a python library: + +``` python +import datumaro +``` + +## Supported Formats + +List of supported formats: +- MS COCO (`image_info`, `instances`, `person_keypoints`, `captions`, `labels`*) + - [Format specification](http://cocodataset.org/#format-data) + - [Dataset example](../tests/assets/coco_dataset) + - `labels` are our extension - like `instances` with only `category_id` +- PASCAL VOC (`classification`, `detection`, `segmentation` (class, instances), `action_classification`, `person_layout`) + - [Format specification](http://host.robots.ox.ac.uk/pascal/VOC/voc2012/htmldoc/index.html) + - [Dataset example](../tests/assets/voc_dataset) +- YOLO (`bboxes`) + - [Format specification](https://github.com/AlexeyAB/darknet#how-to-train-pascal-voc-data) + - [Dataset example](../tests/assets/yolo_dataset) +- TF Detection API (`bboxes`, `masks`) + - Format specifications: [bboxes](https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/using_your_own_dataset.md), [masks](https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/instance_segmentation.md) + - [Dataset example](../tests/assets/tf_detection_api_dataset) +- MOT sequences + - [Format specification](https://arxiv.org/pdf/1906.04567.pdf) + - [Dataset example](../tests/assets/mot_dataset) +- CVAT + - [Format specification](https://github.com/opencv/cvat/blob/develop/cvat/apps/documentation/xml_format.md) + - [Dataset example](../tests/assets/cvat_dataset) +- LabelMe + - [Format specification](http://labelme.csail.mit.edu/Release3.0) + - [Dataset example](../tests/assets/labelme_dataset) + +List of supported annotation types: +- Labels +- Bounding boxes +- Polygons +- Polylines +- (Segmentation) Masks +- (Key-)Points +- Captions + +## Command line workflow + +The key object is a project, so most CLI commands operate on projects. +However, there are few commands operating on datasets directly. +A project is a combination of a project's own dataset, a number of +external data sources and an environment. +An empty Project can be created by `project create` command, +an existing dataset can be imported with `project import` command. +A typical way to obtain projects is to export tasks in CVAT UI. + +If you want to interact with models, you need to add them to project first. + +### Project structure + + +``` +└── project/ + ├── .datumaro/ + | ├── config.yml + │   ├── .git/ + │   ├── models/ + │   └── plugins/ + │   ├── plugin1/ + │   | ├── file1.py + │   | └── file2.py + │   ├── plugin2.py + │   ├── custom_extractor1.py + │   └── ... + ├── dataset/ + └── sources/ + ├── source1 + └── ... +``` + + +## Command reference + +> **Note**: command invocation syntax is subject to change, +> **always refer to command --help output** + +Available CLI commands: +![CLI design doc](images/cli_design.png) + +### Convert datasets + +This command allows to convert a dataset from one format into another. In fact, this +command is a combination of `project import` and `project export` and just provides a simpler +way to obtain the same result when no extra options is needed. A list of supported +formats can be found in the `--help` output of this command. + +Usage: + +``` bash +datum convert --help + +datum convert \ + -i \ + -if \ + -o \ + -f \ + -- [extra parameters for output format] +``` + +Example: convert a VOC-like dataset to a COCO-like one: + +``` bash +datum convert --input-format voc --input-path \ + --output-format coco +``` + +### Import project + +This command creates a Project from an existing dataset. + +Supported formats are listed in the command help. Check [extending tips](#extending) +for information on extra format support. + +Usage: + +``` bash +datum project import --help + +datum project import \ + -i \ + -o \ + -f +``` + +Example: create a project from COCO-like dataset + +``` bash +datum project import \ + -i /home/coco_dir \ + -o /home/project_dir \ + -f coco +``` + +An _MS COCO_-like dataset should have the following directory structure: + + +``` +COCO/ +├── annotations/ +│   ├── instances_val2017.json +│   ├── instances_train2017.json +├── images/ +│   ├── val2017 +│   ├── train2017 +``` + + +Everything after the last `_` is considered a subset name in the COCO format. + +### Create project + +The command creates an empty project. Once a Project is created, there are +a few options to interact with it. + +Usage: + +``` bash +datum project create --help + +datum project create \ + -o +``` + +Example: create an empty project `my_dataset` + +``` bash +datum project create -o my_dataset/ +``` + +### Add and remove data + +A Project can contain a number of external Data Sources. Each Data Source +describes a way to produce dataset items. A Project combines dataset items from +all the sources and its own dataset into one composite dataset. You can manage +project sources by commands in the `source` command line context. + +Datasets come in a wide variety of formats. Each dataset +format defines its own data structure and rules on how to +interpret the data. For example, the following data structure +is used in COCO format: + +``` +/dataset/ +- /images/.jpg +- /annotations/ +``` + + +Supported formats are listed in the command help. Check [extending tips](#extending) +for information on extra format support. + +Usage: + +``` bash +datum source add --help +datum source remove --help + +datum source add \ + path \ + -p \ + -n + +datum source remove \ + -p \ + -n +``` + +Example: create a project from a bunch of different annotations and images, +and generate TFrecord for TF Detection API for model training + +``` bash +datum project create +# 'default' is the name of the subset below +datum source add path -f coco_instances +datum source add path -f cvat +datum source add path -f voc_detection +datum source add path -f datumaro +datum source add path -f image_dir +datum project export -f tf_detection_api +``` + +### Filter project + +This command allows to create a sub-Project from a Project. The new project +includes only items satisfying some condition. [XPath](https://devhints.io/xpath) +is used as a query format. + +There are several filtering modes available (`-m/--mode` parameter). +Supported modes: +- `i`, `items` +- `a`, `annotations` +- `i+a`, `a+i`, `items+annotations`, `annotations+items` + +When filtering annotations, use the `items+annotations` +mode to point that annotation-less dataset items should be +removed. To select an annotation, write an XPath that +returns `annotation` elements (see examples). + +Usage: + +``` bash +datum project filter --help + +datum project filter \ + -p \ + -e '' +``` + +Example: extract a dataset with only images which `width` < `height` + +``` bash +datum project filter \ + -p test_project \ + -e '/item[image/width < image/height]' +``` + +Example: extract a dataset with only large annotations of class `cat` and any non-`persons` + +``` bash +datum project filter \ + -p test_project \ + --mode annotations -e '/item/annotation[(label="cat" and area > 99.5) or label!="person"]' +``` + +Example: extract a dataset with only occluded annotations, remove empty images + +``` bash +datum project filter \ + -p test_project \ + -m i+a -e '/item/annotation[occluded="True"]' +``` + +Item representations are available with `--dry-run` parameter: + +``` xml + + 290768 + minival2014 + + 612 + 612 + 3 + + + 80154 + bbox + 39 + 264.59 + 150.25 + 11.199999999999989 + 42.31 + 473.87199999999956 + + + 669839 + bbox + 41 + 163.58 + 191.75 + 76.98999999999998 + 73.63 + 5668.773699999998 + + ... + +``` + +### Update project + +This command updates items in a project from another one +(check [Merge Projects](#merge-projects) for complex merging). + +Usage: + +``` bash +datum project merge --help + +datum project merge \ + -p \ + -o \ + +``` + +Example: update annotations in the `first_project` with annotations +from the `second_project` and save the result as `merged_project` + +``` bash +datum project merge \ + -p first_project \ + -o merged_project \ + second_project +``` + +### Merge projects + +This command merges items from 2 or more projects and checks annotations for errors. + +Spatial annotations are compared by distance and intersected, labels and attributes +are selected by voting. +Merge conflicts, missing items and annotations, other errors are saved into a `.json` file. + +Usage: + +``` bash +datum merge --help + +datum merge +``` + +Example: merge 4 (partially-)intersecting projects, +- consider voting succeeded when there are 3+ same votes +- consider shapes intersecting when IoU >= 0.6 +- check annotation groups to have `person`, `hand`, `head` and `foot` (`?` for optional) + +``` bash +datum merge project1/ project2/ project3/ project4/ \ + --quorum 3 \ + -iou 0.6 \ + --groups 'person,hand?,head,foot?' +``` + +### Export project + +This command exports a Project as a dataset in some format. + +Supported formats are listed in the command help. Check [extending tips](#extending) +for information on extra format support. + +Usage: + +``` bash +datum project export --help + +datum project export \ + -p \ + -o \ + -f \ + -- [additional format parameters] +``` + +Example: save project as VOC-like dataset, include images, convert images to `PNG` + +``` bash +datum project export \ + -p test_project \ + -o test_project-export \ + -f voc \ + -- --save-images --image-ext='.png' +``` + +### Get project info + +This command outputs project status information. + +Usage: + +``` bash +datum project info --help + +datum project info \ + -p +``` + +Example: + +``` bash +datum project info -p /test_project + +Project: + name: test_project + location: /test_project +Sources: + source 'instances_minival2014': + format: coco_instances + url: /coco_like/annotations/instances_minival2014.json +Dataset: + length: 5000 + categories: label + label: + count: 80 + labels: person, bicycle, car, motorcycle (and 76 more) + subsets: minival2014 + subset 'minival2014': + length: 5000 + categories: label + label: + count: 80 + labels: person, bicycle, car, motorcycle (and 76 more) +``` + +### Get project statistics + +This command computes various project statistics, such as: +- image mean and std. dev. +- class and attribute balance +- mask pixel balance +- segment area distribution + +Usage: + +``` bash +datum project stats --help + +datum project stats \ + -p +``` + +Example: + +
+ +``` bash +datum project stats -p /test_project + +{ + "annotations": { + "labels": { + "attributes": { + "gender": { + "count": 358, + "distribution": { + "female": [ + 149, + 0.41620111731843573 + ], + "male": [ + 209, + 0.5837988826815642 + ] + }, + "values count": 2, + "values present": [ + "female", + "male" + ] + }, + "view": { + "count": 340, + "distribution": { + "__undefined__": [ + 4, + 0.011764705882352941 + ], + "front": [ + 54, + 0.1588235294117647 + ], + "left": [ + 14, + 0.041176470588235294 + ], + "rear": [ + 235, + 0.6911764705882353 + ], + "right": [ + 33, + 0.09705882352941177 + ] + }, + "values count": 5, + "values present": [ + "__undefined__", + "front", + "left", + "rear", + "right" + ] + } + }, + "count": 2038, + "distribution": { + "car": [ + 340, + 0.16683022571148184 + ], + "cyclist": [ + 194, + 0.09519136408243375 + ], + "head": [ + 354, + 0.17369970559371933 + ], + "ignore": [ + 100, + 0.04906771344455348 + ], + "left_hand": [ + 238, + 0.11678115799803729 + ], + "person": [ + 358, + 0.17566241413150147 + ], + "right_hand": [ + 77, + 0.037782139352306184 + ], + "road_arrows": [ + 326, + 0.15996074582924436 + ], + "traffic_sign": [ + 51, + 0.025024533856722278 + ] + } + }, + "segments": { + "area distribution": [ + { + "count": 1318, + "max": 11425.1, + "min": 0.0, + "percent": 0.9627465303140978 + }, + { + "count": 1, + "max": 22850.2, + "min": 11425.1, + "percent": 0.0007304601899196494 + }, + { + "count": 0, + "max": 34275.3, + "min": 22850.2, + "percent": 0.0 + }, + { + "count": 0, + "max": 45700.4, + "min": 34275.3, + "percent": 0.0 + }, + { + "count": 0, + "max": 57125.5, + "min": 45700.4, + "percent": 0.0 + }, + { + "count": 0, + "max": 68550.6, + "min": 57125.5, + "percent": 0.0 + }, + { + "count": 0, + "max": 79975.7, + "min": 68550.6, + "percent": 0.0 + }, + { + "count": 0, + "max": 91400.8, + "min": 79975.7, + "percent": 0.0 + }, + { + "count": 0, + "max": 102825.90000000001, + "min": 91400.8, + "percent": 0.0 + }, + { + "count": 50, + "max": 114251.0, + "min": 102825.90000000001, + "percent": 0.036523009495982466 + } + ], + "avg. area": 5411.624543462382, + "pixel distribution": { + "car": [ + 13655, + 0.0018431496518735067 + ], + "cyclist": [ + 939005, + 0.12674674030446592 + ], + "head": [ + 0, + 0.0 + ], + "ignore": [ + 5501200, + 0.7425510702956085 + ], + "left_hand": [ + 0, + 0.0 + ], + "person": [ + 954654, + 0.12885903974805205 + ], + "right_hand": [ + 0, + 0.0 + ], + "road_arrows": [ + 0, + 0.0 + ], + "traffic_sign": [ + 0, + 0.0 + ] + } + } + }, + "annotations by type": { + "bbox": { + "count": 548 + }, + "caption": { + "count": 0 + }, + "label": { + "count": 0 + }, + "mask": { + "count": 0 + }, + "points": { + "count": 669 + }, + "polygon": { + "count": 821 + }, + "polyline": { + "count": 0 + } + }, + "annotations count": 2038, + "dataset": { + "image mean": [ + 107.06903686941979, + 79.12831698580979, + 52.95829558185416 + ], + "image std": [ + 49.40237673503467, + 43.29600731496902, + 35.47373007603151 + ], + "images count": 100 + }, + "images count": 100, + "subsets": {}, + "unannotated images": [ + "img00051", + "img00052", + "img00053", + "img00054", + "img00055", + ], + "unannotated images count": 5 +} +``` + +
+ +### Register model + +Supported models: +- OpenVINO +- Custom models via custom `launchers` + +Usage: + +``` bash +datum model add --help +``` + +Example: register an OpenVINO model + +A model consists of a graph description and weights. There is also a script +used to convert model outputs to internal data structures. + +``` bash +datum project create +datum model add \ + -n openvino \ + -d -w -i +``` + +Interpretation script for an OpenVINO detection model (`convert.py`): + +``` python +from datumaro.components.extractor import * + +max_det = 10 +conf_thresh = 0.1 + +def process_outputs(inputs, outputs): + # inputs = model input, array or images, shape = (N, C, H, W) + # outputs = model output, shape = (N, 1, K, 7) + # results = conversion result, [ [ Annotation, ... ], ... ] + results = [] + for input, output in zip(inputs, outputs): + input_height, input_width = input.shape[:2] + detections = output[0] + image_results = [] + for i, det in enumerate(detections): + label = int(det[1]) + conf = det[2] + if conf <= conf_thresh: + continue + + x = max(int(det[3] * input_width), 0) + y = max(int(det[4] * input_height), 0) + w = min(int(det[5] * input_width - x), input_width) + h = min(int(det[6] * input_height - y), input_height) + image_results.append(Bbox(x, y, w, h, + label=label, attributes={'score': conf} )) + + results.append(image_results[:max_det]) + + return results + +def get_categories(): + # Optionally, provide output categories - label map etc. + # Example: + label_categories = LabelCategories() + label_categories.add('person') + label_categories.add('car') + return { AnnotationType.label: label_categories } +``` + +### Run model + +This command applies model to dataset images and produces a new project. + +Usage: + +``` bash +datum model run --help + +datum model run \ + -p \ + -m \ + -o +``` + +Example: launch inference on a dataset + +``` bash +datum project import <...> +datum model add mymodel <...> +datum model run -m mymodel -o inference +``` + +### Compare projects + +The command compares two datasets and saves the results in the +specified directory. The current project is considered to be +"ground truth". + +``` bash +datum project diff --help + +datum project diff -o +``` + +Example: compare a dataset with model inference + +``` bash +datum project import <...> +datum model add mymodel <...> +datum project transform <...> -o inference +datum project diff inference -o diff +``` + +### Explain inference + +Usage: + +``` bash +datum explain --help + +datum explain \ + -m \ + -o \ + -t \ + \ + +``` + +Example: run inference explanation on a single image with visualization + +``` bash +datum project create <...> +datum model add mymodel <...> +datum explain \ + -m mymodel \ + -t 'image.png' \ + rise \ + -s 1000 --progressive +``` + +### Transform Project + +This command allows to modify images or annotations in a project all at once. + +``` bash +datum project transform --help + +datum project transform \ + -p \ + -o \ + -t \ + -- [extra transform options] +``` + +Example: split a dataset randomly to `train` and `test` subsets, ratio is 2:1 + +``` bash +datum project transform -t random_split -- --subset train:.67 --subset test:.33 +``` + +Example: convert polygons to masks, masks to boxes etc.: + +``` bash +datum project transform -t boxes_to_masks +datum project transform -t masks_to_polygons +datum project transform -t polygons_to_masks +datum project transform -t shapes_to_boxes +``` + +Example: remap dataset labels, `person` to `car` and `cat` to `dog`, keep `bus`, remove others + +``` bash +datum project transform -t remap_labels -- \ + -l person:car -l bus:bus -l cat:dog \ + --default delete +``` + +Example: rename dataset items by a regular expression +- Replace `pattern` with `replacement` +- Remove `frame_` from item ids + +``` bash +datum project transform -t rename -- -e '|pattern|replacement|' +datum project transform -t rename -- -e '|frame_(\d+)|\\1|' +``` + +## Extending + +There are few ways to extend and customize Datumaro behaviour, which is supported by plugins. +Check [our contribution guide](../CONTRIBUTING.md) for details on plugin implementation. +In general, a plugin is a Python code file. It must be put into a plugin directory: +- `/.datumaro/plugins` for project-specific plugins +- `/plugins` for global plugins + +### Dataset Formats + +Dataset reading is supported by Extractors and Importers. +An Extractor produces a list of dataset items corresponding +to the dataset. An Importer creates a project from the data source location. +It is possible to add custom Extractors and Importers. To do this, you need +to put an Extractor and Importer implementation scripts to a plugin directory. + +Dataset writing is supported by Converters. +A Converter produces a dataset of a specific format from dataset items. +It is possible to add custom Converters. To do this, you need to put a Converter +implementation script to a plugin directory. + +### Dataset Conversions ("Transforms") + +A Transform is a function for altering a dataset and producing a new one. It can update +dataset items, annotations, classes, and other properties. +A list of available transforms for dataset conversions can be extended by adding a Transform +implementation script into a plugin directory. + +### Model launchers + +A list of available launchers for model execution can be extended by adding a Launcher +implementation script into a plugin directory. + +## Links +- [TensorFlow detection model zoo](https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/detection_model_zoo.md) +- [How to convert model to OpenVINO format](https://docs.openvinotoolkit.org/latest/_docs_MO_DG_prepare_model_convert_model_tf_specific_Convert_Object_Detection_API_Models.html) +- [Model conversion script example](https://github.com/opencv/cvat/blob/3e09503ba6c6daa6469a6c4d275a5a8b168dfa2c/components/tf_annotation/install.sh#L23) diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000000..6bc3c7ee79 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,12 @@ +attrs>=19.3.0 +Cython>=0.27.3 # include before pycocotools +defusedxml>=0.6.0 +GitPython>=3.0.8 +lxml>=4.4.1 +matplotlib>=3.3.1 +opencv-python-headless>=4.1.0.25 +Pillow>=6.1.0 +pycocotools>=2.0.0 +PyYAML>=5.3.1 +scikit-image>=0.15.0 +tensorboardX>=1.8 diff --git a/setup.py b/setup.py new file mode 100644 index 0000000000..cf6d043303 --- /dev/null +++ b/setup.py @@ -0,0 +1,73 @@ + +# Copyright (C) 2019-2020 Intel Corporation +# +# SPDX-License-Identifier: MIT + +import os.path as osp +import re +import setuptools + + +def find_version(file_path=None): + if not file_path: + file_path = osp.join(osp.dirname(osp.abspath(__file__)), + 'datumaro', 'version.py') + + with open(file_path, 'r') as version_file: + version_text = version_file.read() + + # PEP440: + # https://www.python.org/dev/peps/pep-0440/#appendix-b-parsing-version-strings-with-regular-expressions + pep_regex = r'([1-9]\d*!)?(0|[1-9]\d*)(\.(0|[1-9]\d*))*((a|b|rc)(0|[1-9]\d*))?(\.post(0|[1-9]\d*))?(\.dev(0|[1-9]\d*))?' + version_regex = r'VERSION\s*=\s*.(' + pep_regex + ').' + match = re.match(version_regex, version_text) + if not match: + raise RuntimeError("Failed to find version string in '%s'" % file_path) + + version = version_text[match.start(1) : match.end(1)] + return version + + +with open('README.md', 'r') as fh: + long_description = fh.read() + +setuptools.setup( + name="datumaro", + version=find_version(), + author="Intel", + author_email="maxim.zhiltsov@intel.com", + description="Dataset Management Framework (Datumaro)", + long_description=long_description, + long_description_content_type="text/markdown", + url="https://github.com/opencv/cvat/datumaro", + packages=setuptools.find_packages(exclude=['tests*']), + classifiers=[ + "Programming Language :: Python :: 3", + "License :: OSI Approved :: MIT License", + "Operating System :: OS Independent", + ], + python_requires='>=3.5', + install_requires=[ + 'attrs', + 'defusedxml', + 'GitPython', + 'lxml', + 'matplotlib', + 'numpy', + 'opencv-python', + 'Pillow', + 'pycocotools', + 'PyYAML', + 'scikit-image', + 'tensorboardX', + ], + extras_require={ + 'tf': ['tensorflow'], + 'tf-gpu': ['tensorflow-gpu'], + }, + entry_points={ + 'console_scripts': [ + 'datum=datumaro.cli.__main__:main', + ], + }, +) diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/tests/assets/coco_dataset/annotations/instances_val.json b/tests/assets/coco_dataset/annotations/instances_val.json new file mode 100644 index 0000000000..b5d9bd8697 --- /dev/null +++ b/tests/assets/coco_dataset/annotations/instances_val.json @@ -0,0 +1,59 @@ +{ + "licenses": [ + { + "name": "", + "id": 0, + "url": "" + } + ], + "info": { + "contributor": "", + "date_created": "", + "description": "", + "url": "", + "version": "", + "year": "" + }, + "categories": [ + { + "id": 1, + "name": "TEST", + "supercategory": "" + } + ], + "images": [ + { + "id": 1, + "width": 5, + "height": 10, + "file_name": "000000000001.jpg", + "license": 0, + "flickr_url": "", + "coco_url": "", + "date_captured": 0 + } + ], + "annotations": [ + { + "id": 1, + "image_id": 1, + "category_id": 1, + "segmentation": [[0, 0, 1, 0, 1, 2, 0, 2]], + "area": 2, + "bbox": [0, 0, 1, 2], + "iscrowd": 0 + }, + { + "id": 2, + "image_id": 1, + "category_id": 1, + "segmentation": { + "counts": [0, 10, 5, 5, 5, 5, 0, 10, 10, 0], + "size": [10, 5] + }, + "area": 30, + "bbox": [0, 0, 10, 4], + "iscrowd": 1 + } + ] + } diff --git a/tests/assets/coco_dataset/images/val/000000000001.jpg b/tests/assets/coco_dataset/images/val/000000000001.jpg new file mode 100644 index 0000000000..8bce84d3bf Binary files /dev/null and b/tests/assets/coco_dataset/images/val/000000000001.jpg differ diff --git a/tests/assets/cvat_dataset/for_images/images/img0.jpg b/tests/assets/cvat_dataset/for_images/images/img0.jpg new file mode 100644 index 0000000000..9d28e0c15e Binary files /dev/null and b/tests/assets/cvat_dataset/for_images/images/img0.jpg differ diff --git a/tests/assets/cvat_dataset/for_images/images/img1.jpg b/tests/assets/cvat_dataset/for_images/images/img1.jpg new file mode 100644 index 0000000000..ee889d2269 Binary files /dev/null and b/tests/assets/cvat_dataset/for_images/images/img1.jpg differ diff --git a/tests/assets/cvat_dataset/for_images/train.xml b/tests/assets/cvat_dataset/for_images/train.xml new file mode 100644 index 0000000000..023464840d --- /dev/null +++ b/tests/assets/cvat_dataset/for_images/train.xml @@ -0,0 +1,45 @@ + + 1.1 + + + True + annotation + + + + + + + + + true + v3 + + + + + + + + diff --git a/tests/assets/cvat_dataset/for_video/annotations.xml b/tests/assets/cvat_dataset/for_video/annotations.xml new file mode 100644 index 0000000000..5a68f811a2 --- /dev/null +++ b/tests/assets/cvat_dataset/for_video/annotations.xml @@ -0,0 +1,92 @@ + + + 1.1 + + + 5 + v1 + 4 + interpolation + 2 + + 2020-04-23 08:57:24.614217+00:00 + 2020-04-23 09:04:48.168008+00:00 + 10 + 19 + step=3 + True + + + + + + + + 3 + 0 + 3 + http://localhost:7000/?id=3 + + + 4 + 2 + 3 + http://localhost:7000/?id=4 + + + + max + + + + + 25 + 20 + + + 2020-04-23 09:05:02.335612+00:00 + t.mp4 + + + + + + + + + + + + hgkf + + + jk + + + + + + + + + diff --git a/tests/assets/cvat_dataset/for_video/images/frame_000010.png b/tests/assets/cvat_dataset/for_video/images/frame_000010.png new file mode 100644 index 0000000000..14996e0c4f Binary files /dev/null and b/tests/assets/cvat_dataset/for_video/images/frame_000010.png differ diff --git a/tests/assets/cvat_dataset/for_video/images/frame_000013.png b/tests/assets/cvat_dataset/for_video/images/frame_000013.png new file mode 100644 index 0000000000..14996e0c4f Binary files /dev/null and b/tests/assets/cvat_dataset/for_video/images/frame_000013.png differ diff --git a/tests/assets/labelme_dataset/Masks/img1_mask_1.png b/tests/assets/labelme_dataset/Masks/img1_mask_1.png new file mode 100644 index 0000000000..a37c5508f9 Binary files /dev/null and b/tests/assets/labelme_dataset/Masks/img1_mask_1.png differ diff --git a/tests/assets/labelme_dataset/Masks/img1_mask_5.png b/tests/assets/labelme_dataset/Masks/img1_mask_5.png new file mode 100644 index 0000000000..c20e4871ae Binary files /dev/null and b/tests/assets/labelme_dataset/Masks/img1_mask_5.png differ diff --git a/tests/assets/labelme_dataset/Scribbles/img1_scribble_1.png b/tests/assets/labelme_dataset/Scribbles/img1_scribble_1.png new file mode 100644 index 0000000000..6a582819f3 Binary files /dev/null and b/tests/assets/labelme_dataset/Scribbles/img1_scribble_1.png differ diff --git a/tests/assets/labelme_dataset/Scribbles/img1_scribble_5.png b/tests/assets/labelme_dataset/Scribbles/img1_scribble_5.png new file mode 100644 index 0000000000..415e1f88b2 Binary files /dev/null and b/tests/assets/labelme_dataset/Scribbles/img1_scribble_5.png differ diff --git a/tests/assets/labelme_dataset/img1.png b/tests/assets/labelme_dataset/img1.png new file mode 100644 index 0000000000..26f7b564ab Binary files /dev/null and b/tests/assets/labelme_dataset/img1.png differ diff --git a/tests/assets/labelme_dataset/img1.xml b/tests/assets/labelme_dataset/img1.xml new file mode 100644 index 0000000000..ff8ae1b46e --- /dev/null +++ b/tests/assets/labelme_dataset/img1.xml @@ -0,0 +1 @@ +img1.pngexample_folderThe MIT-CSAIL database of objects and scenesLabelMe Webtoolwindow0025-May-2012 00:09:480admin433445344537433777102license plate00no27-Jul-2014 02:58:501brussell58666268img1_mask_1.png58666268img1_scribble_1.pngo100yesa13,415-Nov-2019 14:38:512anonymous3012422124261522181422122712q100nokj215-Nov-2019 14:39:003anonymous352143224028283131223225b100yeshg215-Nov-2019 14:39:094bounding_boxanonymous1319231923301330m100nod615-Nov-2019 14:39:305bounding_boxanonymous56147023img1_mask_5.png55137023img1_scribble_5.pnghg00nogfd lkj lkj hi515-Nov-2019 14:41:576anonymous642174247232623460276222 \ No newline at end of file diff --git a/tests/assets/mot_dataset/gt/gt.txt b/tests/assets/mot_dataset/gt/gt.txt new file mode 100644 index 0000000000..f4b7c0d469 --- /dev/null +++ b/tests/assets/mot_dataset/gt/gt.txt @@ -0,0 +1 @@ +1,-1,0,4,4,8,1,3,1.0 diff --git a/tests/assets/mot_dataset/gt/labels.txt b/tests/assets/mot_dataset/gt/labels.txt new file mode 100644 index 0000000000..6d9c393d86 --- /dev/null +++ b/tests/assets/mot_dataset/gt/labels.txt @@ -0,0 +1,10 @@ +label_0 +label_1 +label_2 +label_3 +label_4 +label_5 +label_6 +label_7 +label_8 +label_9 \ No newline at end of file diff --git a/tests/assets/mot_dataset/img1/000001.jpg b/tests/assets/mot_dataset/img1/000001.jpg new file mode 100644 index 0000000000..3588867b5a Binary files /dev/null and b/tests/assets/mot_dataset/img1/000001.jpg differ diff --git a/tests/assets/pytorch_launcher/__init__.py b/tests/assets/pytorch_launcher/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/tests/assets/pytorch_launcher/model_config.yml b/tests/assets/pytorch_launcher/model_config.yml new file mode 100644 index 0000000000..a3bef4fabc --- /dev/null +++ b/tests/assets/pytorch_launcher/model_config.yml @@ -0,0 +1,37 @@ +launcher: + framework: pytorch + module: samplenet.SampLeNet + python_path: '.' + checkpoint: 'samplenet.pth' + +# launcher returns raw result, so it should be converted +# to an appropriate representation with adapter +adapter: + type: classification + labels: + - label1 + - label2 + - label3 + - label4 + - label5 + - label6 + - label7 + - label8 + - label9 + - label10 + +# list of preprocessing, applied to each image during validation +# order of entries matters +preprocessing: + # resize input image to topology input size + # you may specify size to which image should be resized + # via dst_width, dst_height fields + - type: resize + size: 32 + # topology is trained on RGB images, but Datumaro reads in BGR + # so it must be converted to RGB + - type: bgr_to_rgb + # dataset mean and standard deviation + - type: normalization + mean: (125.307, 122.961, 113.8575) + std: (51.5865, 50.847, 51.255) \ No newline at end of file diff --git a/tests/assets/pytorch_launcher/samplenet.pth b/tests/assets/pytorch_launcher/samplenet.pth new file mode 100644 index 0000000000..6c70368e09 Binary files /dev/null and b/tests/assets/pytorch_launcher/samplenet.pth differ diff --git a/tests/assets/pytorch_launcher/samplenet.py b/tests/assets/pytorch_launcher/samplenet.py new file mode 100644 index 0000000000..7282e43adf --- /dev/null +++ b/tests/assets/pytorch_launcher/samplenet.py @@ -0,0 +1,38 @@ +""" +Copyright (C) 2019-2020 Intel Corporation + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +""" + +import torch.nn as nn +import torch.nn.functional as F + + +class SampLeNet(nn.Module): + def __init__(self): + super(SampLeNet, self).__init__() + self.conv1 = nn.Conv2d(3, 6, 5) + self.pool = nn.MaxPool2d(2, 2) + self.conv2 = nn.Conv2d(6, 16, 5) + self.fc1 = nn.Linear(16 * 5 * 5, 120) + self.fc2 = nn.Linear(120, 84) + self.fc3 = nn.Linear(84, 10) + + def forward(self, x): + x = self.pool(F.relu(self.conv1(x))) + x = self.pool(F.relu(self.conv2(x))) + x = x.view(-1, 16 * 5 * 5) + x = F.relu(self.fc1(x)) + x = F.relu(self.fc2(x)) + x = self.fc3(x) + return x diff --git a/tests/assets/tf_detection_api_dataset/label_map.pbtxt b/tests/assets/tf_detection_api_dataset/label_map.pbtxt new file mode 100644 index 0000000000..dbf2b339b7 --- /dev/null +++ b/tests/assets/tf_detection_api_dataset/label_map.pbtxt @@ -0,0 +1,50 @@ +item { + id: 1 + name: 'label_0' +} + +item { + id: 2 + name: 'label_1' +} + +item { + id: 3 + name: 'label_2' +} + +item { + id: 4 + name: 'label_3' +} + +item { + id: 5 + name: 'label_4' +} + +item { + id: 6 + name: 'label_5' +} + +item { + id: 7 + name: 'label_6' +} + +item { + id: 8 + name: 'label_7' +} + +item { + id: 9 + name: 'label_8' +} + +item { + id: 10 + name: 'label_9' +} + diff --git a/tests/assets/tf_detection_api_dataset/test.tfrecord b/tests/assets/tf_detection_api_dataset/test.tfrecord new file mode 100644 index 0000000000..81dafa705b Binary files /dev/null and b/tests/assets/tf_detection_api_dataset/test.tfrecord differ diff --git a/tests/assets/tf_detection_api_dataset/train.tfrecord b/tests/assets/tf_detection_api_dataset/train.tfrecord new file mode 100644 index 0000000000..3ca3833163 Binary files /dev/null and b/tests/assets/tf_detection_api_dataset/train.tfrecord differ diff --git a/tests/assets/tf_detection_api_dataset/val.tfrecord b/tests/assets/tf_detection_api_dataset/val.tfrecord new file mode 100644 index 0000000000..34fa9ce1cd Binary files /dev/null and b/tests/assets/tf_detection_api_dataset/val.tfrecord differ diff --git a/tests/assets/voc_dataset/Annotations/2007_000001.xml b/tests/assets/voc_dataset/Annotations/2007_000001.xml new file mode 100644 index 0000000000..4f1e25a211 --- /dev/null +++ b/tests/assets/voc_dataset/Annotations/2007_000001.xml @@ -0,0 +1,54 @@ + + + VOC2007 + 2007_000001.jpg + + 10 + 20 + 3 + + 1 + + cat + Unspecified + 1 + 0 + + 1 + 2 + 3 + 4 + + + + person + + 4 + 5 + 6 + 7 + + + head + + 5.5 + 6 + 7.5 + 8 + + + + 1 + 0 + 1 + 0 + 1 + 0 + 1 + 0 + 1 + 0 + 1 + + + diff --git a/tests/assets/voc_dataset/ImageSets/Action/test.txt b/tests/assets/voc_dataset/ImageSets/Action/test.txt new file mode 100644 index 0000000000..c9fdc2510e --- /dev/null +++ b/tests/assets/voc_dataset/ImageSets/Action/test.txt @@ -0,0 +1 @@ +2007_000002 diff --git a/tests/assets/voc_dataset/ImageSets/Action/train.txt b/tests/assets/voc_dataset/ImageSets/Action/train.txt new file mode 100644 index 0000000000..640b0d53ff --- /dev/null +++ b/tests/assets/voc_dataset/ImageSets/Action/train.txt @@ -0,0 +1 @@ +2007_000001 diff --git a/tests/assets/voc_dataset/ImageSets/Layout/test.txt b/tests/assets/voc_dataset/ImageSets/Layout/test.txt new file mode 100644 index 0000000000..c9fdc2510e --- /dev/null +++ b/tests/assets/voc_dataset/ImageSets/Layout/test.txt @@ -0,0 +1 @@ +2007_000002 diff --git a/tests/assets/voc_dataset/ImageSets/Layout/train.txt b/tests/assets/voc_dataset/ImageSets/Layout/train.txt new file mode 100644 index 0000000000..640b0d53ff --- /dev/null +++ b/tests/assets/voc_dataset/ImageSets/Layout/train.txt @@ -0,0 +1 @@ +2007_000001 diff --git a/tests/assets/voc_dataset/ImageSets/Main/aeroplane_train.txt b/tests/assets/voc_dataset/ImageSets/Main/aeroplane_train.txt new file mode 100644 index 0000000000..a3decd42ad --- /dev/null +++ b/tests/assets/voc_dataset/ImageSets/Main/aeroplane_train.txt @@ -0,0 +1 @@ +2007_000001 1 diff --git a/tests/assets/voc_dataset/ImageSets/Main/background_train.txt b/tests/assets/voc_dataset/ImageSets/Main/background_train.txt new file mode 100644 index 0000000000..d4385b6978 --- /dev/null +++ b/tests/assets/voc_dataset/ImageSets/Main/background_train.txt @@ -0,0 +1 @@ +2007_000001 -1 diff --git a/tests/assets/voc_dataset/ImageSets/Main/bicycle_train.txt b/tests/assets/voc_dataset/ImageSets/Main/bicycle_train.txt new file mode 100644 index 0000000000..d4385b6978 --- /dev/null +++ b/tests/assets/voc_dataset/ImageSets/Main/bicycle_train.txt @@ -0,0 +1 @@ +2007_000001 -1 diff --git a/tests/assets/voc_dataset/ImageSets/Main/bird_train.txt b/tests/assets/voc_dataset/ImageSets/Main/bird_train.txt new file mode 100644 index 0000000000..a3decd42ad --- /dev/null +++ b/tests/assets/voc_dataset/ImageSets/Main/bird_train.txt @@ -0,0 +1 @@ +2007_000001 1 diff --git a/tests/assets/voc_dataset/ImageSets/Main/boat_train.txt b/tests/assets/voc_dataset/ImageSets/Main/boat_train.txt new file mode 100644 index 0000000000..d4385b6978 --- /dev/null +++ b/tests/assets/voc_dataset/ImageSets/Main/boat_train.txt @@ -0,0 +1 @@ +2007_000001 -1 diff --git a/tests/assets/voc_dataset/ImageSets/Main/bottle_train.txt b/tests/assets/voc_dataset/ImageSets/Main/bottle_train.txt new file mode 100644 index 0000000000..a3decd42ad --- /dev/null +++ b/tests/assets/voc_dataset/ImageSets/Main/bottle_train.txt @@ -0,0 +1 @@ +2007_000001 1 diff --git a/tests/assets/voc_dataset/ImageSets/Main/bus_train.txt b/tests/assets/voc_dataset/ImageSets/Main/bus_train.txt new file mode 100644 index 0000000000..d4385b6978 --- /dev/null +++ b/tests/assets/voc_dataset/ImageSets/Main/bus_train.txt @@ -0,0 +1 @@ +2007_000001 -1 diff --git a/tests/assets/voc_dataset/ImageSets/Main/car_train.txt b/tests/assets/voc_dataset/ImageSets/Main/car_train.txt new file mode 100644 index 0000000000..a3decd42ad --- /dev/null +++ b/tests/assets/voc_dataset/ImageSets/Main/car_train.txt @@ -0,0 +1 @@ +2007_000001 1 diff --git a/tests/assets/voc_dataset/ImageSets/Main/cat_train.txt b/tests/assets/voc_dataset/ImageSets/Main/cat_train.txt new file mode 100644 index 0000000000..d4385b6978 --- /dev/null +++ b/tests/assets/voc_dataset/ImageSets/Main/cat_train.txt @@ -0,0 +1 @@ +2007_000001 -1 diff --git a/tests/assets/voc_dataset/ImageSets/Main/chair_train.txt b/tests/assets/voc_dataset/ImageSets/Main/chair_train.txt new file mode 100644 index 0000000000..a3decd42ad --- /dev/null +++ b/tests/assets/voc_dataset/ImageSets/Main/chair_train.txt @@ -0,0 +1 @@ +2007_000001 1 diff --git a/tests/assets/voc_dataset/ImageSets/Main/cow_train.txt b/tests/assets/voc_dataset/ImageSets/Main/cow_train.txt new file mode 100644 index 0000000000..d4385b6978 --- /dev/null +++ b/tests/assets/voc_dataset/ImageSets/Main/cow_train.txt @@ -0,0 +1 @@ +2007_000001 -1 diff --git a/tests/assets/voc_dataset/ImageSets/Main/diningtable_train.txt b/tests/assets/voc_dataset/ImageSets/Main/diningtable_train.txt new file mode 100644 index 0000000000..a3decd42ad --- /dev/null +++ b/tests/assets/voc_dataset/ImageSets/Main/diningtable_train.txt @@ -0,0 +1 @@ +2007_000001 1 diff --git a/tests/assets/voc_dataset/ImageSets/Main/dog_train.txt b/tests/assets/voc_dataset/ImageSets/Main/dog_train.txt new file mode 100644 index 0000000000..d4385b6978 --- /dev/null +++ b/tests/assets/voc_dataset/ImageSets/Main/dog_train.txt @@ -0,0 +1 @@ +2007_000001 -1 diff --git a/tests/assets/voc_dataset/ImageSets/Main/horse_train.txt b/tests/assets/voc_dataset/ImageSets/Main/horse_train.txt new file mode 100644 index 0000000000..a3decd42ad --- /dev/null +++ b/tests/assets/voc_dataset/ImageSets/Main/horse_train.txt @@ -0,0 +1 @@ +2007_000001 1 diff --git a/tests/assets/voc_dataset/ImageSets/Main/ignored_train.txt b/tests/assets/voc_dataset/ImageSets/Main/ignored_train.txt new file mode 100644 index 0000000000..a3decd42ad --- /dev/null +++ b/tests/assets/voc_dataset/ImageSets/Main/ignored_train.txt @@ -0,0 +1 @@ +2007_000001 1 diff --git a/tests/assets/voc_dataset/ImageSets/Main/motorbike_train.txt b/tests/assets/voc_dataset/ImageSets/Main/motorbike_train.txt new file mode 100644 index 0000000000..d4385b6978 --- /dev/null +++ b/tests/assets/voc_dataset/ImageSets/Main/motorbike_train.txt @@ -0,0 +1 @@ +2007_000001 -1 diff --git a/tests/assets/voc_dataset/ImageSets/Main/person_train.txt b/tests/assets/voc_dataset/ImageSets/Main/person_train.txt new file mode 100644 index 0000000000..a3decd42ad --- /dev/null +++ b/tests/assets/voc_dataset/ImageSets/Main/person_train.txt @@ -0,0 +1 @@ +2007_000001 1 diff --git a/tests/assets/voc_dataset/ImageSets/Main/pottedplant_train.txt b/tests/assets/voc_dataset/ImageSets/Main/pottedplant_train.txt new file mode 100644 index 0000000000..d4385b6978 --- /dev/null +++ b/tests/assets/voc_dataset/ImageSets/Main/pottedplant_train.txt @@ -0,0 +1 @@ +2007_000001 -1 diff --git a/tests/assets/voc_dataset/ImageSets/Main/sheep_train.txt b/tests/assets/voc_dataset/ImageSets/Main/sheep_train.txt new file mode 100644 index 0000000000..a3decd42ad --- /dev/null +++ b/tests/assets/voc_dataset/ImageSets/Main/sheep_train.txt @@ -0,0 +1 @@ +2007_000001 1 diff --git a/tests/assets/voc_dataset/ImageSets/Main/sofa_train.txt b/tests/assets/voc_dataset/ImageSets/Main/sofa_train.txt new file mode 100644 index 0000000000..d4385b6978 --- /dev/null +++ b/tests/assets/voc_dataset/ImageSets/Main/sofa_train.txt @@ -0,0 +1 @@ +2007_000001 -1 diff --git a/tests/assets/voc_dataset/ImageSets/Main/test.txt b/tests/assets/voc_dataset/ImageSets/Main/test.txt new file mode 100644 index 0000000000..c9fdc2510e --- /dev/null +++ b/tests/assets/voc_dataset/ImageSets/Main/test.txt @@ -0,0 +1 @@ +2007_000002 diff --git a/tests/assets/voc_dataset/ImageSets/Main/train.txt b/tests/assets/voc_dataset/ImageSets/Main/train.txt new file mode 100644 index 0000000000..640b0d53ff --- /dev/null +++ b/tests/assets/voc_dataset/ImageSets/Main/train.txt @@ -0,0 +1 @@ +2007_000001 diff --git a/tests/assets/voc_dataset/ImageSets/Main/train_train.txt b/tests/assets/voc_dataset/ImageSets/Main/train_train.txt new file mode 100644 index 0000000000..a3decd42ad --- /dev/null +++ b/tests/assets/voc_dataset/ImageSets/Main/train_train.txt @@ -0,0 +1 @@ +2007_000001 1 diff --git a/tests/assets/voc_dataset/ImageSets/Main/tvmonitor_train.txt b/tests/assets/voc_dataset/ImageSets/Main/tvmonitor_train.txt new file mode 100644 index 0000000000..d4385b6978 --- /dev/null +++ b/tests/assets/voc_dataset/ImageSets/Main/tvmonitor_train.txt @@ -0,0 +1 @@ +2007_000001 -1 diff --git a/tests/assets/voc_dataset/ImageSets/Segmentation/test.txt b/tests/assets/voc_dataset/ImageSets/Segmentation/test.txt new file mode 100644 index 0000000000..c9fdc2510e --- /dev/null +++ b/tests/assets/voc_dataset/ImageSets/Segmentation/test.txt @@ -0,0 +1 @@ +2007_000002 diff --git a/tests/assets/voc_dataset/ImageSets/Segmentation/train.txt b/tests/assets/voc_dataset/ImageSets/Segmentation/train.txt new file mode 100644 index 0000000000..640b0d53ff --- /dev/null +++ b/tests/assets/voc_dataset/ImageSets/Segmentation/train.txt @@ -0,0 +1 @@ +2007_000001 diff --git a/tests/assets/voc_dataset/JPEGImages/2007_000002.jpg b/tests/assets/voc_dataset/JPEGImages/2007_000002.jpg new file mode 100644 index 0000000000..3c81296b31 Binary files /dev/null and b/tests/assets/voc_dataset/JPEGImages/2007_000002.jpg differ diff --git a/tests/assets/voc_dataset/SegmentationClass/2007_000001.png b/tests/assets/voc_dataset/SegmentationClass/2007_000001.png new file mode 100644 index 0000000000..0b92051452 Binary files /dev/null and b/tests/assets/voc_dataset/SegmentationClass/2007_000001.png differ diff --git a/tests/assets/voc_dataset/SegmentationObject/2007_000001.png b/tests/assets/voc_dataset/SegmentationObject/2007_000001.png new file mode 100644 index 0000000000..ebbeee61dd Binary files /dev/null and b/tests/assets/voc_dataset/SegmentationObject/2007_000001.png differ diff --git a/tests/assets/yolo_dataset/obj.data b/tests/assets/yolo_dataset/obj.data new file mode 100644 index 0000000000..16ca4090f4 --- /dev/null +++ b/tests/assets/yolo_dataset/obj.data @@ -0,0 +1,4 @@ +classes = 10 +train = data/train.txt +names = data/obj.names +backup = backup/ diff --git a/tests/assets/yolo_dataset/obj.names b/tests/assets/yolo_dataset/obj.names new file mode 100644 index 0000000000..b24c644df6 --- /dev/null +++ b/tests/assets/yolo_dataset/obj.names @@ -0,0 +1,10 @@ +label_0 +label_1 +label_2 +label_3 +label_4 +label_5 +label_6 +label_7 +label_8 +label_9 diff --git a/tests/assets/yolo_dataset/obj_train_data/1.jpg b/tests/assets/yolo_dataset/obj_train_data/1.jpg new file mode 100644 index 0000000000..8689b95631 Binary files /dev/null and b/tests/assets/yolo_dataset/obj_train_data/1.jpg differ diff --git a/tests/assets/yolo_dataset/obj_train_data/1.txt b/tests/assets/yolo_dataset/obj_train_data/1.txt new file mode 100644 index 0000000000..1f507909e2 --- /dev/null +++ b/tests/assets/yolo_dataset/obj_train_data/1.txt @@ -0,0 +1,2 @@ +2 0.133333 0.300000 0.266667 0.200000 +4 0.266667 0.450000 0.133333 0.300000 diff --git a/tests/assets/yolo_dataset/train.txt b/tests/assets/yolo_dataset/train.txt new file mode 100644 index 0000000000..f55beb7362 --- /dev/null +++ b/tests/assets/yolo_dataset/train.txt @@ -0,0 +1 @@ +data/obj_train_data/1.jpg diff --git a/tests/test_RISE.py b/tests/test_RISE.py new file mode 100644 index 0000000000..04772287f4 --- /dev/null +++ b/tests/test_RISE.py @@ -0,0 +1,231 @@ +from collections import namedtuple +import numpy as np + +from unittest import TestCase + +from datumaro.components.extractor import Label, Bbox +from datumaro.components.launcher import Launcher +from datumaro.components.algorithms.rise import RISE + + +class RiseTest(TestCase): + def test_rise_can_be_applied_to_classification_model(self): + class TestLauncher(Launcher): + def __init__(self, class_count, roi, **kwargs): + self.class_count = class_count + self.roi = roi + + def launch(self, inputs): + for inp in inputs: + yield self._process(inp) + + def _process(self, image): + roi = self.roi + roi_area = (roi[1] - roi[0]) * (roi[3] - roi[2]) + if 0.5 * roi_area < np.sum(image[roi[0]:roi[1], roi[2]:roi[3], 0]): + cls = 0 + else: + cls = 1 + + cls_conf = 0.5 + other_conf = (1.0 - cls_conf) / (self.class_count - 1) + + return [ + Label(i, attributes={ + 'score': cls_conf if cls == i else other_conf }) \ + for i in range(self.class_count) + ] + + roi = [70, 90, 7, 90] + model = TestLauncher(class_count=3, roi=roi) + + rise = RISE(model, max_samples=(7 * 7) ** 2, mask_width=7, mask_height=7) + + image = np.ones((100, 100, 3)) + heatmaps = next(rise.apply(image)) + + self.assertEqual(1, len(heatmaps)) + + heatmap = heatmaps[0] + self.assertEqual(image.shape[:2], heatmap.shape) + + h_sum = np.sum(heatmap) + h_area = np.prod(heatmap.shape) + roi_sum = np.sum(heatmap[roi[0]:roi[1], roi[2]:roi[3]]) + roi_area = (roi[1] - roi[0]) * (roi[3] - roi[2]) + roi_den = roi_sum / roi_area + hrest_den = (h_sum - roi_sum) / (h_area - roi_area) + self.assertLess(hrest_den, roi_den) + + def test_rise_can_be_applied_to_detection_model(self): + ROI = namedtuple('ROI', + ['threshold', 'x', 'y', 'w', 'h', 'label']) + + class TestLauncher(Launcher): + def __init__(self, rois, class_count, fp_count=4, pixel_jitter=20, **kwargs): + self.rois = rois + self.roi_base_sums = [None, ] * len(rois) + self.class_count = class_count + self.fp_count = fp_count + self.pixel_jitter = pixel_jitter + + @staticmethod + def roi_value(roi, image): + return np.sum( + image[roi.y:roi.y + roi.h, roi.x:roi.x + roi.w, :]) + + def launch(self, inputs): + for inp in inputs: + yield self._process(inp) + + def _process(self, image): + detections = [] + for i, roi in enumerate(self.rois): + roi_sum = self.roi_value(roi, image) + roi_base_sum = self.roi_base_sums[i] + first_run = roi_base_sum is None + if first_run: + roi_base_sum = roi_sum + self.roi_base_sums[i] = roi_base_sum + + cls_conf = roi_sum / roi_base_sum + + if roi.threshold < roi_sum / roi_base_sum: + cls = roi.label + detections.append( + Bbox(roi.x, roi.y, roi.w, roi.h, + label=cls, attributes={'score': cls_conf}) + ) + + if first_run: + continue + for j in range(self.fp_count): + if roi.threshold < cls_conf: + cls = roi.label + else: + cls = (i + j) % self.class_count + box = [roi.x, roi.y, roi.w, roi.h] + offset = (np.random.rand(4) - 0.5) * self.pixel_jitter + detections.append( + Bbox(*(box + offset), + label=cls, attributes={'score': cls_conf}) + ) + + return detections + + rois = [ + ROI(0.3, 10, 40, 30, 10, 0), + ROI(0.5, 70, 90, 7, 10, 0), + ROI(0.7, 5, 20, 40, 60, 2), + ROI(0.9, 30, 20, 10, 40, 1), + ] + model = model = TestLauncher(class_count=3, rois=rois) + + rise = RISE(model, max_samples=(7 * 7) ** 2, mask_width=7, mask_height=7) + + image = np.ones((100, 100, 3)) + heatmaps = next(rise.apply(image)) + heatmaps_class_count = len(set([roi.label for roi in rois])) + self.assertEqual(heatmaps_class_count + len(rois), len(heatmaps)) + + # import cv2 + # roi_image = image.copy() + # for i, roi in enumerate(rois): + # cv2.rectangle(roi_image, (roi.x, roi.y), (roi.x + roi.w, roi.y + roi.h), (32 * i) * 3) + # cv2.imshow('img', roi_image) + + for c in range(heatmaps_class_count): + class_roi = np.zeros(image.shape[:2]) + for i, roi in enumerate(rois): + if roi.label != c: + continue + class_roi[roi.y:roi.y + roi.h, roi.x:roi.x + roi.w] \ + += roi.threshold + + heatmap = heatmaps[c] + + roi_pixels = heatmap[class_roi != 0] + h_sum = np.sum(roi_pixels) + h_area = np.sum(roi_pixels != 0) + h_den = h_sum / h_area + + rest_pixels = heatmap[class_roi == 0] + r_sum = np.sum(rest_pixels) + r_area = np.sum(rest_pixels != 0) + r_den = r_sum / r_area + + # print(r_den, h_den) + # cv2.imshow('class %s' % c, heatmap) + self.assertLess(r_den, h_den) + + for i, roi in enumerate(rois): + heatmap = heatmaps[heatmaps_class_count + i] + h_sum = np.sum(heatmap) + h_area = np.prod(heatmap.shape) + roi_sum = np.sum(heatmap[roi.y:roi.y + roi.h, roi.x:roi.x + roi.w]) + roi_area = roi.h * roi.w + roi_den = roi_sum / roi_area + hrest_den = (h_sum - roi_sum) / (h_area - roi_area) + # print(hrest_den, h_den) + # cv2.imshow('roi %s' % i, heatmap) + self.assertLess(hrest_den, roi_den) + # cv2.waitKey(0) + + @staticmethod + def DISABLED_test_roi_nms(): + ROI = namedtuple('ROI', + ['conf', 'x', 'y', 'w', 'h', 'label']) + + class_count = 3 + noisy_count = 3 + rois = [ + ROI(0.3, 10, 40, 30, 10, 0), + ROI(0.5, 70, 90, 7, 10, 0), + ROI(0.7, 5, 20, 40, 60, 2), + ROI(0.9, 30, 20, 10, 40, 1), + ] + pixel_jitter = 10 + + detections = [] + for i, roi in enumerate(rois): + detections.append( + Bbox(roi.x, roi.y, roi.w, roi.h, + label=roi.label, attributes={'score': roi.conf}) + ) + + for j in range(noisy_count): + cls_conf = roi.conf * j / noisy_count + cls = (i + j) % class_count + box = [roi.x, roi.y, roi.w, roi.h] + offset = (np.random.rand(4) - 0.5) * pixel_jitter + detections.append( + Bbox(*(box + offset), + label=cls, attributes={'score': cls_conf}) + ) + + import cv2 + image = np.zeros((100, 100, 3)) + for i, det in enumerate(detections): + roi = ROI(det.attributes['score'], *det.get_bbox(), det.label) + p1 = (int(roi.x), int(roi.y)) + p2 = (int(roi.x + roi.w), int(roi.y + roi.h)) + c = (0, 1 * (i % (1 + noisy_count) == 0), 1) + cv2.rectangle(image, p1, p2, c) + cv2.putText(image, 'd%s-%s-%.2f' % (i, roi.label, roi.conf), + p1, cv2.FONT_HERSHEY_SIMPLEX, 0.25, c) + cv2.imshow('nms_image', image) + cv2.waitKey(0) + + nms_boxes = RISE.nms(detections, iou_thresh=0.25) + print(len(detections), len(nms_boxes)) + + for i, det in enumerate(nms_boxes): + roi = ROI(det.attributes['score'], *det.get_bbox(), det.label) + p1 = (int(roi.x), int(roi.y)) + p2 = (int(roi.x + roi.w), int(roi.y + roi.h)) + c = (0, 1, 0) + cv2.rectangle(image, p1, p2, c) + cv2.putText(image, 'p%s-%s-%.2f' % (i, roi.label, roi.conf), + p1, cv2.FONT_HERSHEY_SIMPLEX, 0.25, c) + cv2.imshow('nms_image', image) + cv2.waitKey(0) \ No newline at end of file diff --git a/tests/test_coco_format.py b/tests/test_coco_format.py new file mode 100644 index 0000000000..131284be52 --- /dev/null +++ b/tests/test_coco_format.py @@ -0,0 +1,479 @@ +from functools import partial +import numpy as np +import os.path as osp + +from unittest import TestCase + +from datumaro.components.project import Project, Dataset +from datumaro.components.extractor import (DatasetItem, + AnnotationType, Label, Mask, Points, Polygon, Bbox, Caption, + LabelCategories, PointsCategories +) +from datumaro.plugins.coco_format.converter import ( + CocoConverter, + CocoImageInfoConverter, + CocoCaptionsConverter, + CocoInstancesConverter, + CocoPersonKeypointsConverter, + CocoLabelsConverter, +) +from datumaro.plugins.coco_format.importer import CocoImporter +from datumaro.util.image import Image +from datumaro.util.test_utils import TestDir, compare_datasets + + +DUMMY_DATASET_DIR = osp.join(osp.dirname(__file__), 'assets', 'coco_dataset') + +class CocoImporterTest(TestCase): + def test_can_import(self): + expected_dataset = Dataset.from_iterable([ + DatasetItem(id='000000000001', image=np.ones((10, 5, 3)), + subset='val', attributes={'id': 1}, + annotations=[ + Polygon([0, 0, 1, 0, 1, 2, 0, 2], label=0, + id=1, group=1, attributes={'is_crowd': False}), + Mask(np.array( + [[1, 0, 0, 1, 0]] * 5 + + [[1, 1, 1, 1, 0]] * 5 + ), label=0, + id=2, group=2, attributes={'is_crowd': True}), + ] + ), + ], categories=['TEST',]) + + dataset = Project.import_from(DUMMY_DATASET_DIR, 'coco') \ + .make_dataset() + + compare_datasets(self, expected_dataset, dataset) + + def test_can_detect(self): + self.assertTrue(CocoImporter.detect(DUMMY_DATASET_DIR)) + +class CocoConverterTest(TestCase): + def _test_save_and_load(self, source_dataset, converter, test_dir, + target_dataset=None, importer_args=None): + converter(source_dataset, test_dir) + + if importer_args is None: + importer_args = {} + parsed_dataset = CocoImporter()(test_dir, **importer_args).make_dataset() + + if target_dataset is None: + target_dataset = source_dataset + + compare_datasets(self, expected=target_dataset, actual=parsed_dataset) + + def test_can_save_and_load_captions(self): + expected_dataset = Dataset.from_iterable([ + DatasetItem(id=1, subset='train', + annotations=[ + Caption('hello', id=1, group=1), + Caption('world', id=2, group=2), + ], attributes={'id': 1}), + DatasetItem(id=2, subset='train', + annotations=[ + Caption('test', id=3, group=3), + ], attributes={'id': 2}), + + DatasetItem(id=3, subset='val', + annotations=[ + Caption('word', id=1, group=1), + ], attributes={'id': 1}), + ]) + + with TestDir() as test_dir: + self._test_save_and_load(expected_dataset, + CocoCaptionsConverter.convert, test_dir) + + def test_can_save_and_load_instances(self): + source_dataset = Dataset.from_iterable([ + DatasetItem(id=1, subset='train', image=np.ones((4, 4, 3)), + annotations=[ + # Bbox + single polygon + Bbox(0, 1, 2, 2, + label=2, group=1, id=1, + attributes={ 'is_crowd': False }), + Polygon([0, 1, 2, 1, 2, 3, 0, 3], + attributes={ 'is_crowd': False }, + label=2, group=1, id=1), + ], attributes={'id': 1}), + DatasetItem(id=2, subset='train', image=np.ones((4, 4, 3)), + annotations=[ + # Mask + bbox + Mask(np.array([ + [0, 1, 0, 0], + [0, 1, 0, 0], + [0, 1, 1, 1], + [0, 0, 0, 0]], + ), + attributes={ 'is_crowd': True }, + label=4, group=3, id=3), + Bbox(1, 0, 2, 2, label=4, group=3, id=3, + attributes={ 'is_crowd': True }), + ], attributes={'id': 2}), + + DatasetItem(id=3, subset='val', image=np.ones((4, 4, 3)), + annotations=[ + # Bbox + mask + Bbox(0, 1, 2, 2, label=4, group=3, id=3, + attributes={ 'is_crowd': True }), + Mask(np.array([ + [0, 0, 0, 0], + [1, 1, 1, 0], + [1, 1, 0, 0], + [0, 0, 0, 0]], + ), + attributes={ 'is_crowd': True }, + label=4, group=3, id=3), + ], attributes={'id': 1}), + ], categories=[str(i) for i in range(10)]) + + target_dataset = Dataset.from_iterable([ + DatasetItem(id=1, subset='train', image=np.ones((4, 4, 3)), + annotations=[ + Polygon([0, 1, 2, 1, 2, 3, 0, 3], + attributes={ 'is_crowd': False }, + label=2, group=1, id=1), + ], attributes={'id': 1}), + DatasetItem(id=2, subset='train', image=np.ones((4, 4, 3)), + annotations=[ + Mask(np.array([ + [0, 1, 0, 0], + [0, 1, 0, 0], + [0, 1, 1, 1], + [0, 0, 0, 0]], + ), + attributes={ 'is_crowd': True }, + label=4, group=3, id=3), + ], attributes={'id': 2}), + + DatasetItem(id=3, subset='val', image=np.ones((4, 4, 3)), + annotations=[ + Mask(np.array([ + [0, 0, 0, 0], + [1, 1, 1, 0], + [1, 1, 0, 0], + [0, 0, 0, 0]], + ), + attributes={ 'is_crowd': True }, + label=4, group=3, id=3), + ], attributes={'id': 1}) + ], categories=[str(i) for i in range(10)]) + + with TestDir() as test_dir: + self._test_save_and_load(source_dataset, + CocoInstancesConverter.convert, test_dir, + target_dataset=target_dataset) + + def test_can_merge_polygons_on_loading(self): + source_dataset = Dataset.from_iterable([ + DatasetItem(id=1, image=np.zeros((6, 10, 3)), + annotations=[ + Polygon([0, 0, 4, 0, 4, 4], + label=3, id=4, group=4), + Polygon([5, 0, 9, 0, 5, 5], + label=3, id=4, group=4), + ] + ), + ], categories=[str(i) for i in range(10)]) + + target_dataset = Dataset.from_iterable([ + DatasetItem(id=1, image=np.zeros((6, 10, 3)), + annotations=[ + Mask(np.array([ + [0, 1, 1, 1, 0, 1, 1, 1, 1, 0], + [0, 0, 1, 1, 0, 1, 1, 1, 0, 0], + [0, 0, 0, 1, 0, 1, 1, 0, 0, 0], + [0, 0, 0, 0, 0, 1, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0, 0, 0, 0, 0]], + # only internal fragment (without the border), + # but not everywhere... + ), + label=3, id=4, group=4, + attributes={ 'is_crowd': False }), + ], attributes={'id': 1} + ), + ], categories=[str(i) for i in range(10)]) + + with TestDir() as test_dir: + self._test_save_and_load(source_dataset, + CocoInstancesConverter.convert, test_dir, + importer_args={'merge_instance_polygons': True}, + target_dataset=target_dataset) + + def test_can_crop_covered_segments(self): + source_dataset = Dataset.from_iterable([ + DatasetItem(id=1, image=np.zeros((5, 5, 3)), + annotations=[ + Mask(np.array([ + [0, 0, 1, 1, 1], + [0, 0, 1, 1, 1], + [1, 1, 0, 1, 1], + [1, 1, 1, 0, 0], + [1, 1, 1, 0, 0]], + ), + label=2, id=1, z_order=0), + Polygon([1, 1, 4, 1, 4, 4, 1, 4], + label=1, id=2, z_order=1), + ] + ), + ], categories=[str(i) for i in range(10)]) + + target_dataset = Dataset.from_iterable([ + DatasetItem(id=1, image=np.zeros((5, 5, 3)), + annotations=[ + Mask(np.array([ + [0, 0, 1, 1, 1], + [0, 0, 0, 0, 1], + [1, 0, 0, 0, 1], + [1, 0, 0, 0, 0], + [1, 1, 1, 0, 0]], + ), + attributes={ 'is_crowd': True }, + label=2, id=1, group=1), + + Polygon([1, 1, 4, 1, 4, 4, 1, 4], + label=1, id=2, group=2, + attributes={ 'is_crowd': False }), + ], attributes={'id': 1} + ), + ], categories=[str(i) for i in range(10)]) + + with TestDir() as test_dir: + self._test_save_and_load(source_dataset, + partial(CocoInstancesConverter.convert, crop_covered=True), + test_dir, target_dataset=target_dataset) + + def test_can_convert_polygons_to_mask(self): + source_dataset = Dataset.from_iterable([ + DatasetItem(id=1, image=np.zeros((6, 10, 3)), + annotations=[ + Polygon([0, 0, 4, 0, 4, 4], + label=3, id=4, group=4), + Polygon([5, 0, 9, 0, 5, 5], + label=3, id=4, group=4), + ] + ), + ], categories=[str(i) for i in range(10)]) + + target_dataset = Dataset.from_iterable([ + DatasetItem(id=1, image=np.zeros((6, 10, 3)), + annotations=[ + Mask(np.array([ + [0, 1, 1, 1, 0, 1, 1, 1, 1, 0], + [0, 0, 1, 1, 0, 1, 1, 1, 0, 0], + [0, 0, 0, 1, 0, 1, 1, 0, 0, 0], + [0, 0, 0, 0, 0, 1, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0, 0, 0, 0, 0]], + # only internal fragment (without the border), + # but not everywhere... + ), + attributes={ 'is_crowd': True }, + label=3, id=4, group=4), + ], attributes={'id': 1} + ), + ], categories=[str(i) for i in range(10)]) + + with TestDir() as test_dir: + self._test_save_and_load(source_dataset, + partial(CocoInstancesConverter.convert, segmentation_mode='mask'), + test_dir, target_dataset=target_dataset) + + def test_can_convert_masks_to_polygons(self): + source_dataset = Dataset.from_iterable([ + DatasetItem(id=1, image=np.zeros((5, 10, 3)), + annotations=[ + Mask(np.array([ + [0, 1, 1, 1, 0, 1, 1, 1, 1, 0], + [0, 0, 1, 1, 0, 1, 1, 1, 0, 0], + [0, 0, 0, 1, 0, 1, 1, 0, 0, 0], + [0, 0, 0, 0, 0, 1, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + ]), + label=3, id=4, group=4), + ] + ), + ], categories=[str(i) for i in range(10)]) + + target_dataset = Dataset.from_iterable([ + DatasetItem(id=1, image=np.zeros((5, 10, 3)), + annotations=[ + Polygon( + [3.0, 2.5, 1.0, 0.0, 3.5, 0.0, 3.0, 2.5], + label=3, id=4, group=4, + attributes={ 'is_crowd': False }), + Polygon( + [5.0, 3.5, 4.5, 0.0, 8.0, 0.0, 5.0, 3.5], + label=3, id=4, group=4, + attributes={ 'is_crowd': False }), + ], attributes={'id': 1} + ), + ], categories=[str(i) for i in range(10)]) + + with TestDir() as test_dir: + self._test_save_and_load(source_dataset, + partial(CocoInstancesConverter.convert, segmentation_mode='polygons'), + test_dir, + target_dataset=target_dataset) + + def test_can_save_and_load_images(self): + expected_dataset = Dataset.from_iterable([ + DatasetItem(id=1, subset='train', attributes={'id': 1}), + DatasetItem(id=2, subset='train', attributes={'id': 2}), + + DatasetItem(id=2, subset='val', attributes={'id': 2}), + DatasetItem(id=3, subset='val', attributes={'id': 3}), + DatasetItem(id=4, subset='val', attributes={'id': 4}), + + DatasetItem(id=5, subset='test', attributes={'id': 1}), + ]) + + with TestDir() as test_dir: + self._test_save_and_load(expected_dataset, + CocoImageInfoConverter.convert, test_dir) + + def test_can_save_and_load_labels(self): + expected_dataset = Dataset.from_iterable([ + DatasetItem(id=1, subset='train', + annotations=[ + Label(4, id=1, group=1), + Label(9, id=2, group=2), + ], attributes={'id': 1}), + ], categories=[str(i) for i in range(10)]) + + with TestDir() as test_dir: + self._test_save_and_load(expected_dataset, + CocoLabelsConverter.convert, test_dir) + + def test_can_save_and_load_keypoints(self): + source_dataset = Dataset.from_iterable([ + DatasetItem(id=1, subset='train', image=np.zeros((5, 5, 3)), + annotations=[ + # Full instance annotations: polygon + keypoints + Points([0, 0, 0, 2, 4, 1], [0, 1, 2], + label=3, group=1, id=1), + Polygon([0, 0, 4, 0, 4, 4], + label=3, group=1, id=1), + + # Full instance annotations: bbox + keypoints + Points([1, 2, 3, 4, 2, 3], group=2, id=2), + Bbox(1, 2, 2, 2, group=2, id=2), + + # Solitary keypoints + Points([1, 2, 0, 2, 4, 1], label=5, id=3), + + # Some other solitary annotations (bug #1387) + Polygon([0, 0, 4, 0, 4, 4], label=3, id=4), + + # Solitary keypoints with no label + Points([0, 0, 1, 2, 3, 4], [0, 1, 2], id=5), + ]), + ], categories={ + AnnotationType.label: LabelCategories.from_iterable( + str(i) for i in range(10)), + AnnotationType.points: PointsCategories.from_iterable( + (i, None, [[0, 1], [1, 2]]) for i in range(10) + ), + }) + + target_dataset = Dataset.from_iterable([ + DatasetItem(id=1, subset='train', image=np.zeros((5, 5, 3)), + annotations=[ + Points([0, 0, 0, 2, 4, 1], [0, 1, 2], + label=3, group=1, id=1, + attributes={'is_crowd': False}), + Polygon([0, 0, 4, 0, 4, 4], + label=3, group=1, id=1, + attributes={'is_crowd': False}), + + Points([1, 2, 3, 4, 2, 3], + group=2, id=2, + attributes={'is_crowd': False}), + Bbox(1, 2, 2, 2, + group=2, id=2, + attributes={'is_crowd': False}), + + Points([1, 2, 0, 2, 4, 1], + label=5, group=3, id=3, + attributes={'is_crowd': False}), + Bbox(0, 1, 4, 1, + label=5, group=3, id=3, + attributes={'is_crowd': False}), + + Points([0, 0, 1, 2, 3, 4], [0, 1, 2], + group=5, id=5, + attributes={'is_crowd': False}), + Bbox(1, 2, 2, 2, + group=5, id=5, + attributes={'is_crowd': False}), + ], attributes={'id': 1}), + ], categories={ + AnnotationType.label: LabelCategories.from_iterable( + str(i) for i in range(10)), + AnnotationType.points: PointsCategories.from_iterable( + (i, None, [[0, 1], [1, 2]]) for i in range(10) + ), + }) + + with TestDir() as test_dir: + self._test_save_and_load(source_dataset, + CocoPersonKeypointsConverter.convert, test_dir, + target_dataset=target_dataset) + + def test_can_save_dataset_with_no_subsets(self): + test_dataset = Dataset.from_iterable([ + DatasetItem(id=1, attributes={'id': 1}), + DatasetItem(id=2, attributes={'id': 2}), + ]) + + with TestDir() as test_dir: + self._test_save_and_load(test_dataset, + CocoConverter.convert, test_dir) + + def test_can_save_dataset_with_image_info(self): + expected_dataset = Dataset.from_iterable([ + DatasetItem(id=1, image=Image(path='1.jpg', size=(10, 15)), + attributes={'id': 1}), + ]) + + with TestDir() as test_dir: + self._test_save_and_load(expected_dataset, + CocoImageInfoConverter.convert, test_dir) + + def test_relative_paths(self): + expected_dataset = Dataset.from_iterable([ + DatasetItem(id='1', image=np.ones((4, 2, 3)), + attributes={'id': 1}), + DatasetItem(id='subdir1/1', image=np.ones((2, 6, 3)), + attributes={'id': 2}), + DatasetItem(id='subdir2/1', image=np.ones((5, 4, 3)), + attributes={'id': 3}), + ]) + + with TestDir() as test_dir: + self._test_save_and_load(expected_dataset, + partial(CocoImageInfoConverter.convert, save_images=True), test_dir) + + def test_preserve_coco_ids(self): + expected_dataset = Dataset.from_iterable([ + DatasetItem(id='some/name1', image=np.ones((4, 2, 3)), + attributes={'id': 40}), + ]) + + with TestDir() as test_dir: + self._test_save_and_load(expected_dataset, + partial(CocoImageInfoConverter.convert, save_images=True), test_dir) + + def test_annotation_attributes(self): + expected_dataset = Dataset.from_iterable([ + DatasetItem(id=1, image=np.ones((4, 2, 3)), annotations=[ + Polygon([0, 0, 4, 0, 4, 4], label=5, group=1, id=1, + attributes={'is_crowd': False, 'x': 5, 'y': 'abc'}), + ], attributes={'id': 1}) + ], categories=[str(i) for i in range(10)]) + + with TestDir() as test_dir: + self._test_save_and_load(expected_dataset, + CocoConverter.convert, test_dir) diff --git a/tests/test_command_targets.py b/tests/test_command_targets.py new file mode 100644 index 0000000000..5b8a69f318 --- /dev/null +++ b/tests/test_command_targets.py @@ -0,0 +1,128 @@ +import numpy as np +import os.path as osp + +from unittest import TestCase + +from datumaro.components.project import Project +from datumaro.util.command_targets import ProjectTarget, \ + ImageTarget, SourceTarget +from datumaro.util.image import save_image +from datumaro.util.test_utils import TestDir + + +class CommandTargetsTest(TestCase): + def test_image_false_when_no_file(self): + target = ImageTarget() + + status = target.test('somepath.jpg') + + self.assertFalse(status) + + def test_image_false_when_false(self): + with TestDir() as test_dir: + path = osp.join(test_dir, 'test.jpg') + with open(path, 'w+') as f: + f.write('qwerty123') + + target = ImageTarget() + + status = target.test(path) + + self.assertFalse(status) + + def test_image_true_when_true(self): + with TestDir() as test_dir: + path = osp.join(test_dir, 'test.jpg') + save_image(path, np.ones([10, 7, 3])) + + target = ImageTarget() + + status = target.test(path) + + self.assertTrue(status) + + def test_project_false_when_no_file(self): + target = ProjectTarget() + + status = target.test('somepath.jpg') + + self.assertFalse(status) + + def test_project_false_when_no_name(self): + target = ProjectTarget(project=Project()) + + status = target.test('') + + self.assertFalse(status) + + def test_project_true_when_project_file(self): + with TestDir() as test_dir: + path = osp.join(test_dir, 'test.jpg') + Project().save(path) + + target = ProjectTarget() + + status = target.test(path) + + self.assertTrue(status) + + def test_project_true_when_project_name(self): + project_name = 'qwerty' + project = Project({ + 'project_name': project_name + }) + target = ProjectTarget(project=project) + + status = target.test(project_name) + + self.assertTrue(status) + + def test_project_false_when_not_project_name(self): + project_name = 'qwerty' + project = Project({ + 'project_name': project_name + }) + target = ProjectTarget(project=project) + + status = target.test(project_name + '123') + + self.assertFalse(status) + + def test_project_false_when_not_project_file(self): + with TestDir() as test_dir: + path = osp.join(test_dir, 'test.jpg') + with open(path, 'w+') as f: + f.write('wqererw') + + target = ProjectTarget() + + status = target.test(path) + + self.assertFalse(status) + + def test_source_false_when_no_project(self): + target = SourceTarget() + + status = target.test('qwerty123') + + self.assertFalse(status) + + def test_source_true_when_source_exists(self): + source_name = 'qwerty' + project = Project() + project.add_source(source_name) + target = SourceTarget(project=project) + + status = target.test(source_name) + + self.assertTrue(status) + + def test_source_false_when_source_doesnt_exist(self): + source_name = 'qwerty' + project = Project() + project.add_source(source_name) + target = SourceTarget(project=project) + + status = target.test(source_name + '123') + + self.assertFalse(status) \ No newline at end of file diff --git a/tests/test_cvat_format.py b/tests/test_cvat_format.py new file mode 100644 index 0000000000..5c246ff484 --- /dev/null +++ b/tests/test_cvat_format.py @@ -0,0 +1,278 @@ +from functools import partial +import numpy as np +import os.path as osp + +from unittest import TestCase +from datumaro.components.project import Dataset +from datumaro.components.extractor import (Extractor, DatasetItem, + AnnotationType, Points, Polygon, PolyLine, Bbox, Label, + LabelCategories, +) +from datumaro.plugins.cvat_format.importer import CvatImporter +from datumaro.plugins.cvat_format.converter import CvatConverter +from datumaro.util.image import Image +from datumaro.util.test_utils import TestDir, compare_datasets + + +DUMMY_IMAGE_DATASET_DIR = osp.join(osp.dirname(__file__), + 'assets', 'cvat_dataset', 'for_images') + +DUMMY_VIDEO_DATASET_DIR = osp.join(osp.dirname(__file__), + 'assets', 'cvat_dataset', 'for_video') + +class CvatImporterTest(TestCase): + def test_can_detect_image(self): + self.assertTrue(CvatImporter.detect(DUMMY_IMAGE_DATASET_DIR)) + + def test_can_detect_video(self): + self.assertTrue(CvatImporter.detect(DUMMY_VIDEO_DATASET_DIR)) + + def test_can_load_image(self): + expected_dataset = Dataset.from_iterable([ + DatasetItem(id='img0', subset='train', + image=np.ones((8, 8, 3)), + annotations=[ + Bbox(0, 2, 4, 2, label=0, z_order=1, + attributes={ + 'occluded': True, + 'a1': True, 'a2': 'v3' + }), + PolyLine([1, 2, 3, 4, 5, 6, 7, 8], + attributes={'occluded': False}), + ], attributes={'frame': 0}), + DatasetItem(id='img1', subset='train', + image=np.ones((10, 10, 3)), + annotations=[ + Polygon([1, 2, 3, 4, 6, 5], z_order=1, + attributes={'occluded': False}), + Points([1, 2, 3, 4, 5, 6], label=1, z_order=2, + attributes={'occluded': False}), + ], attributes={'frame': 1}), + ], categories={ + AnnotationType.label: LabelCategories.from_iterable([ + ['label1', '', {'a1', 'a2'}], + ['label2'], + ]) + }) + + parsed_dataset = CvatImporter()(DUMMY_IMAGE_DATASET_DIR).make_dataset() + + compare_datasets(self, expected_dataset, parsed_dataset) + + def test_can_load_video(self): + expected_dataset = Dataset.from_iterable([ + DatasetItem(id='frame_000010', subset='annotations', + image=np.ones((20, 25, 3)), + annotations=[ + Bbox(3, 4, 7, 1, label=2, + id=0, + attributes={ + 'occluded': True, + 'outside': False, 'keyframe': True, + 'track_id': 0 + }), + Points([21.95, 8.00, 2.55, 15.09, 2.23, 3.16], + label=0, + id=1, + attributes={ + 'occluded': False, + 'outside': False, 'keyframe': True, + 'track_id': 1, 'hgl': 'hgkf', + }), + ], attributes={'frame': 10}), + DatasetItem(id='frame_000013', subset='annotations', + image=np.ones((20, 25, 3)), + annotations=[ + Bbox(7, 6, 7, 2, label=2, + id=0, + attributes={ + 'occluded': False, + 'outside': True, 'keyframe': True, + 'track_id': 0 + }), + Points([21.95, 8.00, 9.55, 15.09, 5.23, 1.16], + label=0, + id=1, + attributes={ + 'occluded': False, + 'outside': True, 'keyframe': True, + 'track_id': 1, 'hgl': 'jk', + }), + PolyLine([7.85, 13.88, 3.50, 6.67, 15.90, 2.00, 13.31, 7.21], + label=2, + id=2, + attributes={ + 'occluded': False, + 'outside': False, 'keyframe': True, + 'track_id': 2, + }), + ], attributes={'frame': 13}), + DatasetItem(id='frame_000016', subset='annotations', + image=Image(path='frame_0000016.png', size=(20, 25)), + annotations=[ + Bbox(8, 7, 6, 10, label=2, + id=0, + attributes={ + 'occluded': False, + 'outside': True, 'keyframe': True, + 'track_id': 0 + }), + PolyLine([7.85, 13.88, 3.50, 6.67, 15.90, 2.00, 13.31, 7.21], + label=2, + id=2, + attributes={ + 'occluded': False, + 'outside': True, 'keyframe': True, + 'track_id': 2, + }), + ], attributes={'frame': 16}), + ], categories={ + AnnotationType.label: LabelCategories.from_iterable([ + ['klhg', '', {'hgl'}], + ['z U k'], + ['II'] + ]), + }) + + parsed_dataset = CvatImporter()(DUMMY_VIDEO_DATASET_DIR).make_dataset() + + compare_datasets(self, expected_dataset, parsed_dataset) + +class CvatConverterTest(TestCase): + def _test_save_and_load(self, source_dataset, converter, test_dir, + target_dataset=None, importer_args=None): + converter(source_dataset, test_dir) + + if importer_args is None: + importer_args = {} + parsed_dataset = CvatImporter()(test_dir, **importer_args).make_dataset() + + if target_dataset is None: + target_dataset = source_dataset + + compare_datasets(self, expected=target_dataset, actual=parsed_dataset) + + def test_can_save_and_load(self): + label_categories = LabelCategories() + for i in range(10): + label_categories.add(str(i)) + label_categories.items[2].attributes.update(['a1', 'a2']) + label_categories.attributes.update(['occluded']) + + source_dataset = Dataset.from_iterable([ + DatasetItem(id=0, subset='s1', image=np.zeros((5, 10, 3)), + annotations=[ + Polygon([0, 0, 4, 0, 4, 4], + label=1, group=4, + attributes={ 'occluded': True }), + Points([1, 1, 3, 2, 2, 3], + label=2, + attributes={ 'a1': 'x', 'a2': 42, + 'unknown': 'bar' }), + Label(1), + Label(2, attributes={ 'a1': 'y', 'a2': 44 }), + ] + ), + DatasetItem(id=1, subset='s1', + annotations=[ + PolyLine([0, 0, 4, 0, 4, 4], + label=3, id=4, group=4), + Bbox(5, 0, 1, 9, + label=3, id=4, group=4), + ] + ), + + DatasetItem(id=2, subset='s2', image=np.ones((5, 10, 3)), + annotations=[ + Polygon([0, 0, 4, 0, 4, 4], z_order=1, + label=3, group=4, + attributes={ 'occluded': False }), + PolyLine([5, 0, 9, 0, 5, 5]), # will be skipped as no label + ] + ), + + DatasetItem(id=3, subset='s3', image=Image( + path='3.jpg', size=(2, 4))), + ], categories={ + AnnotationType.label: label_categories, + }) + + target_dataset = Dataset.from_iterable([ + DatasetItem(id=0, subset='s1', image=np.zeros((5, 10, 3)), + annotations=[ + Polygon([0, 0, 4, 0, 4, 4], + label=1, group=4, + attributes={ 'occluded': True }), + Points([1, 1, 3, 2, 2, 3], + label=2, + attributes={ 'occluded': False, + 'a1': 'x', 'a2': 42 }), + Label(1), + Label(2, attributes={ 'a1': 'y', 'a2': 44 }), + ], attributes={'frame': 0} + ), + DatasetItem(id=1, subset='s1', + annotations=[ + PolyLine([0, 0, 4, 0, 4, 4], + label=3, group=4, + attributes={ 'occluded': False }), + Bbox(5, 0, 1, 9, + label=3, group=4, + attributes={ 'occluded': False }), + ], attributes={'frame': 1} + ), + + DatasetItem(id=2, subset='s2', image=np.ones((5, 10, 3)), + annotations=[ + Polygon([0, 0, 4, 0, 4, 4], z_order=1, + label=3, group=4, + attributes={ 'occluded': False }), + ], attributes={'frame': 0} + ), + + DatasetItem(id=3, subset='s3', image=Image( + path='3.jpg', size=(2, 4)), + attributes={'frame': 0}), + ], categories={ + AnnotationType.label: label_categories, + }) + + with TestDir() as test_dir: + self._test_save_and_load(source_dataset, + partial(CvatConverter.convert, save_images=True), test_dir, + target_dataset=target_dataset) + + def test_relative_paths(self): + source_dataset = Dataset.from_iterable([ + DatasetItem(id='1', image=np.ones((4, 2, 3))), + DatasetItem(id='subdir1/1', image=np.ones((2, 6, 3))), + DatasetItem(id='subdir2/1', image=np.ones((5, 4, 3))), + ], categories={ AnnotationType.label: LabelCategories() }) + + target_dataset = Dataset.from_iterable([ + DatasetItem(id='1', image=np.ones((4, 2, 3)), + attributes={'frame': 0}), + DatasetItem(id='subdir1/1', image=np.ones((2, 6, 3)), + attributes={'frame': 1}), + DatasetItem(id='subdir2/1', image=np.ones((5, 4, 3)), + attributes={'frame': 2}), + ], categories={ + AnnotationType.label: LabelCategories() + }) + + with TestDir() as test_dir: + self._test_save_and_load(source_dataset, + partial(CvatConverter.convert, save_images=True), test_dir, + target_dataset=target_dataset) + + def test_preserve_frame_ids(self): + expected_dataset = Dataset.from_iterable([ + DatasetItem(id='some/name1', image=np.ones((4, 2, 3)), + attributes={'frame': 40}), + ], categories={ + AnnotationType.label: LabelCategories() + }) + + with TestDir() as test_dir: + self._test_save_and_load(expected_dataset, + CvatConverter.convert, test_dir) diff --git a/tests/test_datumaro_format.py b/tests/test_datumaro_format.py new file mode 100644 index 0000000000..8faf5ef0ee --- /dev/null +++ b/tests/test_datumaro_format.py @@ -0,0 +1,108 @@ +from functools import partial +import numpy as np + +from unittest import TestCase +from datumaro.components.project import Dataset +from datumaro.components.project import Project +from datumaro.components.extractor import (Extractor, DatasetItem, + AnnotationType, Label, Mask, Points, Polygon, + PolyLine, Bbox, Caption, + LabelCategories, MaskCategories, PointsCategories +) +from datumaro.plugins.datumaro_format.importer import DatumaroImporter +from datumaro.plugins.datumaro_format.converter import DatumaroConverter +from datumaro.util.mask_tools import generate_colormap +from datumaro.util.image import Image +from datumaro.util.test_utils import TestDir, compare_datasets_strict + + +class DatumaroConverterTest(TestCase): + def _test_save_and_load(self, source_dataset, converter, test_dir, + target_dataset=None, importer_args=None): + converter(source_dataset, test_dir) + + if importer_args is None: + importer_args = {} + parsed_dataset = Project.import_from( + test_dir, 'datumaro', **importer_args).make_dataset() + + if target_dataset is None: + target_dataset = source_dataset + + compare_datasets_strict(self, + expected=target_dataset, actual=parsed_dataset) + + @property + def test_dataset(self): + label_categories = LabelCategories() + for i in range(5): + label_categories.add('cat' + str(i)) + + mask_categories = MaskCategories( + generate_colormap(len(label_categories.items))) + + points_categories = PointsCategories() + for index, _ in enumerate(label_categories.items): + points_categories.add(index, ['cat1', 'cat2'], joints=[[0, 1]]) + + return Dataset.from_iterable([ + DatasetItem(id=100, subset='train', image=np.ones((10, 6, 3)), + annotations=[ + Caption('hello', id=1), + Caption('world', id=2, group=5), + Label(2, id=3, attributes={ + 'x': 1, + 'y': '2', + }), + Bbox(1, 2, 3, 4, label=4, id=4, z_order=1, attributes={ + 'score': 1.0, + }), + Bbox(5, 6, 7, 8, id=5, group=5), + Points([1, 2, 2, 0, 1, 1], label=0, id=5, z_order=4), + Mask(label=3, id=5, z_order=2, image=np.ones((2, 3))), + ]), + DatasetItem(id=21, subset='train', + annotations=[ + Caption('test'), + Label(2), + Bbox(1, 2, 3, 4, label=5, id=42, group=42) + ]), + + DatasetItem(id=2, subset='val', + annotations=[ + PolyLine([1, 2, 3, 4, 5, 6, 7, 8], id=11, z_order=1), + Polygon([1, 2, 3, 4, 5, 6, 7, 8], id=12, z_order=4), + ]), + + DatasetItem(id=42, subset='test', + attributes={'a1': 5, 'a2': '42'}), + + DatasetItem(id=42), + DatasetItem(id=43, image=Image(path='1/b/c.qq', size=(2, 4))), + ], categories={ + AnnotationType.label: label_categories, + AnnotationType.mask: mask_categories, + AnnotationType.points: points_categories, + }) + + def test_can_save_and_load(self): + with TestDir() as test_dir: + self._test_save_and_load(self.test_dataset, + partial(DatumaroConverter.convert, save_images=True), test_dir) + + def test_can_detect(self): + with TestDir() as test_dir: + DatumaroConverter.convert(self.test_dataset, save_dir=test_dir) + + self.assertTrue(DatumaroImporter.detect(test_dir)) + + def test_relative_paths(self): + test_dataset = Dataset.from_iterable([ + DatasetItem(id='1', image=np.ones((4, 2, 3))), + DatasetItem(id='subdir1/1', image=np.ones((2, 6, 3))), + DatasetItem(id='subdir2/1', image=np.ones((5, 4, 3))), + ]) + + with TestDir() as test_dir: + self._test_save_and_load(test_dataset, + partial(DatumaroConverter.convert, save_images=True), test_dir) diff --git a/tests/test_diff.py b/tests/test_diff.py new file mode 100644 index 0000000000..33dd79da0f --- /dev/null +++ b/tests/test_diff.py @@ -0,0 +1,251 @@ +import numpy as np + +from datumaro.components.extractor import (DatasetItem, Label, Bbox, + Caption, Mask, Points) +from datumaro.components.project import Dataset +from datumaro.components.operations import DistanceComparator, ExactComparator + +from unittest import TestCase + + +class DistanceComparatorTest(TestCase): + def test_no_bbox_diff_with_same_item(self): + detections = 3 + anns = [ + Bbox(i * 10, 10, 10, 10, label=i) + for i in range(detections) + ] + item = DatasetItem(id=0, annotations=anns) + + iou_thresh = 0.5 + comp = DistanceComparator(iou_threshold=iou_thresh) + + result = comp.match_boxes(item, item) + + matches, mispred, a_greater, b_greater = result + self.assertEqual(0, len(mispred)) + self.assertEqual(0, len(a_greater)) + self.assertEqual(0, len(b_greater)) + self.assertEqual(len(item.annotations), len(matches)) + for a_bbox, b_bbox in matches: + self.assertLess(iou_thresh, a_bbox.iou(b_bbox)) + self.assertEqual(a_bbox.label, b_bbox.label) + + def test_can_find_bbox_with_wrong_label(self): + detections = 3 + class_count = 2 + item1 = DatasetItem(id=1, annotations=[ + Bbox(i * 10, 10, 10, 10, label=i) + for i in range(detections) + ]) + item2 = DatasetItem(id=2, annotations=[ + Bbox(i * 10, 10, 10, 10, label=(i + 1) % class_count) + for i in range(detections) + ]) + + iou_thresh = 0.5 + comp = DistanceComparator(iou_threshold=iou_thresh) + + result = comp.match_boxes(item1, item2) + + matches, mispred, a_greater, b_greater = result + self.assertEqual(len(item1.annotations), len(mispred)) + self.assertEqual(0, len(a_greater)) + self.assertEqual(0, len(b_greater)) + self.assertEqual(0, len(matches)) + for a_bbox, b_bbox in mispred: + self.assertLess(iou_thresh, a_bbox.iou(b_bbox)) + self.assertEqual((a_bbox.label + 1) % class_count, b_bbox.label) + + def test_can_find_missing_boxes(self): + detections = 3 + class_count = 2 + item1 = DatasetItem(id=1, annotations=[ + Bbox(i * 10, 10, 10, 10, label=i) + for i in range(detections) if i % 2 == 0 + ]) + item2 = DatasetItem(id=2, annotations=[ + Bbox(i * 10, 10, 10, 10, label=(i + 1) % class_count) + for i in range(detections) if i % 2 == 1 + ]) + + iou_thresh = 0.5 + comp = DistanceComparator(iou_threshold=iou_thresh) + + result = comp.match_boxes(item1, item2) + + matches, mispred, a_greater, b_greater = result + self.assertEqual(0, len(mispred)) + self.assertEqual(len(item1.annotations), len(a_greater)) + self.assertEqual(len(item2.annotations), len(b_greater)) + self.assertEqual(0, len(matches)) + + def test_no_label_diff_with_same_item(self): + detections = 3 + anns = [ Label(i) for i in range(detections) ] + item = DatasetItem(id=1, annotations=anns) + + result = DistanceComparator().match_labels(item, item) + + matches, a_greater, b_greater = result + self.assertEqual(0, len(a_greater)) + self.assertEqual(0, len(b_greater)) + self.assertEqual(len(item.annotations), len(matches)) + + def test_can_find_wrong_label(self): + item1 = DatasetItem(id=1, annotations=[ + Label(0), + Label(1), + Label(2), + ]) + item2 = DatasetItem(id=2, annotations=[ + Label(2), + Label(3), + Label(4), + ]) + + result = DistanceComparator().match_labels(item1, item2) + + matches, a_greater, b_greater = result + self.assertEqual(2, len(a_greater)) + self.assertEqual(2, len(b_greater)) + self.assertEqual(1, len(matches)) + + def test_can_match_points(self): + item1 = DatasetItem(id=1, annotations=[ + Points([1, 2, 2, 0, 1, 1], label=0), + + Points([3, 5, 5, 7, 5, 3], label=0), + ]) + item2 = DatasetItem(id=2, annotations=[ + Points([1.5, 2, 2, 0.5, 1, 1.5], label=0), + + Points([5, 7, 7, 7, 7, 5], label=0), + ]) + + result = DistanceComparator().match_points(item1, item2) + + matches, mismatches, a_greater, b_greater = result + self.assertEqual(1, len(a_greater)) + self.assertEqual(1, len(b_greater)) + self.assertEqual(1, len(matches)) + self.assertEqual(0, len(mismatches)) + +class ExactComparatorTest(TestCase): + def test_class_comparison(self): + a = Dataset.from_iterable([], categories=['a', 'b', 'c']) + b = Dataset.from_iterable([], categories=['b', 'c']) + + comp = ExactComparator() + _, _, _, _, errors = comp.compare_datasets(a, b) + + self.assertEqual(1, len(errors), errors) + + def test_item_comparison(self): + a = Dataset.from_iterable([ + DatasetItem(id=1, subset='train'), + DatasetItem(id=2, subset='test', attributes={'x': 1}), + ], categories=['a', 'b', 'c']) + + b = Dataset.from_iterable([ + DatasetItem(id=2, subset='test'), + DatasetItem(id=3), + ], categories=['a', 'b', 'c']) + + comp = ExactComparator() + _, _, a_extra_items, b_extra_items, errors = comp.compare_datasets(a, b) + + self.assertEqual({('1', 'train')}, a_extra_items) + self.assertEqual({('3', '')}, b_extra_items) + self.assertEqual(1, len(errors), errors) + + def test_annotation_comparison(self): + a = Dataset.from_iterable([ + DatasetItem(id=1, annotations=[ + Caption('hello'), # unmatched + Caption('world', group=5), + Label(2, attributes={ 'x': 1, 'y': '2', }), + Bbox(1, 2, 3, 4, label=4, z_order=1, attributes={ + 'score': 1.0, + }), + Bbox(5, 6, 7, 8, group=5), + Points([1, 2, 2, 0, 1, 1], label=0, z_order=4), + Mask(label=3, z_order=2, image=np.ones((2, 3))), + ]), + ], categories=['a', 'b', 'c', 'd']) + + b = Dataset.from_iterable([ + DatasetItem(id=1, annotations=[ + Caption('world', group=5), + Label(2, attributes={ 'x': 1, 'y': '2', }), + Bbox(1, 2, 3, 4, label=4, z_order=1, attributes={ + 'score': 1.0, + }), + Bbox(5, 6, 7, 8, group=5), + Bbox(5, 6, 7, 8, group=5), # unmatched + Points([1, 2, 2, 0, 1, 1], label=0, z_order=4), + Mask(label=3, z_order=2, image=np.ones((2, 3))), + ]), + ], categories=['a', 'b', 'c', 'd']) + + comp = ExactComparator() + matched, unmatched, _, _, errors = comp.compare_datasets(a, b) + + self.assertEqual(6, len(matched), matched) + self.assertEqual(2, len(unmatched), unmatched) + self.assertEqual(0, len(errors), errors) + + def test_image_comparison(self): + a = Dataset.from_iterable([ + DatasetItem(id=11, image=np.ones((5, 4, 3)), annotations=[ + Bbox(5, 6, 7, 8), + ]), + DatasetItem(id=12, image=np.ones((5, 4, 3)), annotations=[ + Bbox(1, 2, 3, 4), + Bbox(5, 6, 7, 8), + ]), + DatasetItem(id=13, image=np.ones((5, 4, 3)), annotations=[ + Bbox(9, 10, 11, 12), # mismatch + ]), + + DatasetItem(id=14, image=np.zeros((5, 4, 3)), annotations=[ + Bbox(1, 2, 3, 4), + Bbox(5, 6, 7, 8), + ], attributes={ 'a': 1 }), + + DatasetItem(id=15, image=np.zeros((5, 5, 3)), annotations=[ + Bbox(1, 2, 3, 4), + Bbox(5, 6, 7, 8), + ]), + ], categories=['a', 'b', 'c', 'd']) + + b = Dataset.from_iterable([ + DatasetItem(id=21, image=np.ones((5, 4, 3)), annotations=[ + Bbox(5, 6, 7, 8), + ]), + DatasetItem(id=22, image=np.ones((5, 4, 3)), annotations=[ + Bbox(1, 2, 3, 4), + Bbox(5, 6, 7, 8), + ]), + DatasetItem(id=23, image=np.ones((5, 4, 3)), annotations=[ + Bbox(10, 10, 11, 12), # mismatch + ]), + + DatasetItem(id=24, image=np.zeros((5, 4, 3)), annotations=[ + Bbox(6, 6, 7, 8), # 1 ann missing, mismatch + ], attributes={ 'a': 2 }), + + DatasetItem(id=25, image=np.zeros((4, 4, 3)), annotations=[ + Bbox(6, 6, 7, 8), + ]), + ], categories=['a', 'b', 'c', 'd']) + + comp = ExactComparator(match_images=True) + matched_ann, unmatched_ann, a_unmatched, b_unmatched, errors = \ + comp.compare_datasets(a, b) + + self.assertEqual(3, len(matched_ann), matched_ann) + self.assertEqual(5, len(unmatched_ann), unmatched_ann) + self.assertEqual(1, len(a_unmatched), a_unmatched) + self.assertEqual(1, len(b_unmatched), b_unmatched) + self.assertEqual(1, len(errors), errors) \ No newline at end of file diff --git a/tests/test_image.py b/tests/test_image.py new file mode 100644 index 0000000000..5f4ef81c4f --- /dev/null +++ b/tests/test_image.py @@ -0,0 +1,64 @@ +from itertools import product +import numpy as np +import os.path as osp + +from unittest import TestCase + +import datumaro.util.image as image_module +from datumaro.util.test_utils import TestDir + + +class ImageOperationsTest(TestCase): + def setUp(self): + self.default_backend = image_module._IMAGE_BACKEND + + def tearDown(self): + image_module._IMAGE_BACKEND = self.default_backend + + def test_save_and_load_backends(self): + backends = image_module._IMAGE_BACKENDS + for save_backend, load_backend, c in product(backends, backends, [1, 3]): + with TestDir() as test_dir: + if c == 1: + src_image = np.random.randint(0, 255 + 1, (2, 4)) + else: + src_image = np.random.randint(0, 255 + 1, (2, 4, c)) + path = osp.join(test_dir, 'img.png') # lossless + + image_module._IMAGE_BACKEND = save_backend + image_module.save_image(path, src_image, jpeg_quality=100) + + image_module._IMAGE_BACKEND = load_backend + dst_image = image_module.load_image(path) + + self.assertTrue(np.array_equal(src_image, dst_image), + 'save: %s, load: %s' % (save_backend, load_backend)) + + def test_encode_and_decode_backends(self): + backends = image_module._IMAGE_BACKENDS + for save_backend, load_backend, c in product(backends, backends, [1, 3]): + if c == 1: + src_image = np.random.randint(0, 255 + 1, (2, 4)) + else: + src_image = np.random.randint(0, 255 + 1, (2, 4, c)) + + image_module._IMAGE_BACKEND = save_backend + buffer = image_module.encode_image(src_image, '.png', + jpeg_quality=100) # lossless + + image_module._IMAGE_BACKEND = load_backend + dst_image = image_module.decode_image(buffer) + + self.assertTrue(np.array_equal(src_image, dst_image), + 'save: %s, load: %s' % (save_backend, load_backend)) + + def test_save_image_to_inexistent_dir_raises_error(self): + with self.assertRaises(FileNotFoundError): + image_module.save_image('some/path.jpg', np.ones((5, 4, 3)), + create_dir=False) + + def test_save_image_can_create_dir(self): + with TestDir() as test_dir: + path = osp.join(test_dir, 'some', 'path.jpg') + image_module.save_image(path, np.ones((5, 4, 3)), create_dir=True) + self.assertTrue(osp.isfile(path)) diff --git a/tests/test_image_dir_format.py b/tests/test_image_dir_format.py new file mode 100644 index 0000000000..b991220ebb --- /dev/null +++ b/tests/test_image_dir_format.py @@ -0,0 +1,48 @@ +import numpy as np + +from unittest import TestCase + +from datumaro.components.project import Project +from datumaro.components.extractor import Extractor, DatasetItem +from datumaro.plugins.image_dir import ImageDirConverter +from datumaro.util.test_utils import TestDir, compare_datasets + + +class ImageDirFormatTest(TestCase): + def test_can_load(self): + class TestExtractor(Extractor): + def __iter__(self): + return iter([ + DatasetItem(id=1, image=np.ones((10, 6, 3))), + DatasetItem(id=2, image=np.ones((5, 4, 3))), + ]) + + with TestDir() as test_dir: + source_dataset = TestExtractor() + + ImageDirConverter.convert(source_dataset, save_dir=test_dir) + + project = Project.import_from(test_dir, 'image_dir') + parsed_dataset = project.make_dataset() + + compare_datasets(self, source_dataset, parsed_dataset) + + def test_relative_paths(self): + class TestExtractor(Extractor): + def __iter__(self): + return iter([ + DatasetItem(id='1', image=np.ones((4, 2, 3))), + DatasetItem(id='subdir1/1', image=np.ones((2, 6, 3))), + DatasetItem(id='subdir2/1', image=np.ones((5, 4, 3))), + ]) + + with TestDir() as test_dir: + source_dataset = TestExtractor() + + ImageDirConverter.convert(source_dataset, save_dir=test_dir) + + project = Project.import_from(test_dir, 'image_dir') + parsed_dataset = project.make_dataset() + + compare_datasets(self, source_dataset, parsed_dataset) + diff --git a/tests/test_images.py b/tests/test_images.py new file mode 100644 index 0000000000..c8ae3274e9 --- /dev/null +++ b/tests/test_images.py @@ -0,0 +1,81 @@ +import numpy as np +import os.path as osp + +from unittest import TestCase + +from datumaro.util.test_utils import TestDir +from datumaro.util.image import lazy_image, load_image, save_image, Image +from datumaro.util.image_cache import ImageCache + + +class LazyImageTest(TestCase): + def test_cache_works(self): + with TestDir() as test_dir: + image = np.ones((100, 100, 3), dtype=np.uint8) + image_path = osp.join(test_dir, 'image.jpg') + save_image(image_path, image) + + caching_loader = lazy_image(image_path, cache=None) + self.assertTrue(caching_loader() is caching_loader()) + + non_caching_loader = lazy_image(image_path, cache=False) + self.assertFalse(non_caching_loader() is non_caching_loader()) + +class ImageCacheTest(TestCase): + def test_cache_fifo_displacement(self): + capacity = 2 + cache = ImageCache(capacity) + + loaders = [lazy_image(None, loader=lambda p: object(), cache=cache) + for _ in range(capacity + 1)] + + first_request = [loader() for loader in loaders[1 : ]] + loaders[0]() # pop something from the cache + + second_request = [loader() for loader in loaders[2 : ]] + second_request.insert(0, loaders[1]()) + + matches = sum([a is b for a, b in zip(first_request, second_request)]) + self.assertEqual(matches, len(first_request) - 1) + + def test_global_cache_is_accessible(self): + loader = lazy_image(None, loader=lambda p: object()) + + ImageCache.get_instance().clear() + self.assertTrue(loader() is loader()) + self.assertEqual(ImageCache.get_instance().size(), 1) + +class ImageTest(TestCase): + def test_lazy_image_shape(self): + data = np.ones((5, 6, 7)) + + image_lazy = Image(data=data, size=(2, 4)) + image_eager = Image(data=data) + + self.assertEqual((2, 4), image_lazy.size) + self.assertEqual((5, 6), image_eager.size) + + def test_ctors(self): + with TestDir() as test_dir: + path = osp.join(test_dir, 'path.png') + image = np.ones([2, 4, 3]) + save_image(path, image) + + for args in [ + { 'data': image }, + { 'data': image, 'path': path }, + { 'data': image, 'path': path, 'size': (2, 4) }, + { 'data': image, 'path': path, 'loader': load_image, 'size': (2, 4) }, + { 'path': path }, + { 'path': path, 'loader': load_image }, + { 'path': 'somepath', 'loader': lambda p: image }, + { 'loader': lambda p: image }, + { 'path': path, 'size': (2, 4) }, + ]: + with self.subTest(**args): + img = Image(**args) + # pylint: disable=pointless-statement + if img.has_data: + img.data + img.size + # pylint: enable=pointless-statement diff --git a/tests/test_labelme_format.py b/tests/test_labelme_format.py new file mode 100644 index 0000000000..d40938bd8a --- /dev/null +++ b/tests/test_labelme_format.py @@ -0,0 +1,206 @@ +from functools import partial +import numpy as np +import os.path as osp + +from unittest import TestCase +from datumaro.components.project import Dataset +from datumaro.components.extractor import (Extractor, DatasetItem, + AnnotationType, Bbox, Mask, Polygon, LabelCategories +) +from datumaro.components.project import Project +from datumaro.plugins.labelme_format import LabelMeImporter, \ + LabelMeConverter +from datumaro.util.test_utils import TestDir, compare_datasets + + +class LabelMeConverterTest(TestCase): + def _test_save_and_load(self, source_dataset, converter, test_dir, + target_dataset=None, importer_args=None): + converter(source_dataset, test_dir) + + if importer_args is None: + importer_args = {} + parsed_dataset = LabelMeImporter()(test_dir, **importer_args) \ + .make_dataset() + + if target_dataset is None: + target_dataset = source_dataset + + compare_datasets(self, expected=target_dataset, actual=parsed_dataset) + + def test_can_save_and_load(self): + source_dataset = Dataset.from_iterable([ + DatasetItem(id=1, subset='train', + image=np.ones((16, 16, 3)), + annotations=[ + Bbox(0, 4, 4, 8, label=2, group=2), + Polygon([0, 4, 4, 4, 5, 6], label=3, attributes={ + 'occluded': True, + 'a1': 'qwe', + 'a2': True, + 'a3': 123, + }), + Mask(np.array([[0, 1], [1, 0], [1, 1]]), group=2, + attributes={ 'username': 'test' }), + Bbox(1, 2, 3, 4, group=3), + Mask(np.array([[0, 0], [0, 0], [1, 1]]), group=3, + attributes={ 'occluded': True } + ), + ] + ), + ], categories={ + AnnotationType.label: LabelCategories.from_iterable( + 'label_' + str(label) for label in range(10)), + }) + + target_dataset = Dataset.from_iterable([ + DatasetItem(id=1, subset='train', + image=np.ones((16, 16, 3)), + annotations=[ + Bbox(0, 4, 4, 8, label=0, group=2, id=0, + attributes={ + 'occluded': False, 'username': '', + } + ), + Polygon([0, 4, 4, 4, 5, 6], label=1, id=1, + attributes={ + 'occluded': True, 'username': '', + 'a1': 'qwe', + 'a2': True, + 'a3': 123, + } + ), + Mask(np.array([[0, 1], [1, 0], [1, 1]]), group=2, + id=2, attributes={ + 'occluded': False, 'username': 'test' + } + ), + Bbox(1, 2, 3, 4, group=1, id=3, attributes={ + 'occluded': False, 'username': '', + }), + Mask(np.array([[0, 0], [0, 0], [1, 1]]), group=1, + id=4, attributes={ + 'occluded': True, 'username': '' + } + ), + ] + ), + ], categories={ + AnnotationType.label: LabelCategories.from_iterable([ + 'label_2', 'label_3']), + }) + + with TestDir() as test_dir: + self._test_save_and_load( + source_dataset, + partial(LabelMeConverter.convert, save_images=True), + test_dir, target_dataset=target_dataset) + + def test_cant_save_dataset_with_relative_paths(self): + expected_dataset = Dataset.from_iterable([ + DatasetItem(id='dir/1', image=np.ones((2, 6, 3))), + ], categories={ + AnnotationType.label: LabelCategories(), + }) + + with self.assertRaisesRegex(Exception, r'only supports flat'): + with TestDir() as test_dir: + self._test_save_and_load(expected_dataset, + LabelMeConverter.convert, test_dir) + + +DUMMY_DATASET_DIR = osp.join(osp.dirname(__file__), 'assets', 'labelme_dataset') + +class LabelMeImporterTest(TestCase): + def test_can_detect(self): + self.assertTrue(LabelMeImporter.detect(DUMMY_DATASET_DIR)) + + def test_can_import(self): + img1 = np.ones((77, 102, 3)) * 255 + img1[6:32, 7:41] = 0 + + mask1 = np.zeros((77, 102), dtype=int) + mask1[67:69, 58:63] = 1 + + mask2 = np.zeros((77, 102), dtype=int) + mask2[13:25, 54:71] = [ + [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + [0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0], + [0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0], + [0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0], + [0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0], + [0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 0], + [0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0], + [0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0], + [0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0], + [0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0], + [0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + ] + + target_dataset = Dataset.from_iterable([ + DatasetItem(id='img1', image=img1, + annotations=[ + Polygon([43, 34, 45, 34, 45, 37, 43, 37], + label=0, id=0, + attributes={ + 'occluded': False, + 'username': 'admin' + } + ), + Mask(mask1, label=1, id=1, + attributes={ + 'occluded': False, + 'username': 'brussell' + } + ), + Polygon([30, 12, 42, 21, 24, 26, 15, 22, 18, 14, 22, 12, 27, 12], + label=2, group=2, id=2, + attributes={ + 'a1': True, + 'occluded': True, + 'username': 'anonymous' + } + ), + Polygon([35, 21, 43, 22, 40, 28, 28, 31, 31, 22, 32, 25], + label=3, group=2, id=3, + attributes={ + 'kj': True, + 'occluded': False, + 'username': 'anonymous' + } + ), + Bbox(13, 19, 10, 11, label=4, group=2, id=4, + attributes={ + 'hg': True, + 'occluded': True, + 'username': 'anonymous' + } + ), + Mask(mask2, label=5, group=1, id=5, + attributes={ + 'd': True, + 'occluded': False, + 'username': 'anonymous' + } + ), + Polygon([64, 21, 74, 24, 72, 32, 62, 34, 60, 27, 62, 22], + label=6, group=1, id=6, + attributes={ + 'gfd lkj lkj hi': True, + 'occluded': False, + 'username': 'anonymous' + } + ), + ] + ), + ], categories={ + AnnotationType.label: LabelCategories.from_iterable([ + 'window', 'license plate', 'o1', + 'q1', 'b1', 'm1', 'hg', + ]), + }) + + parsed = Project.import_from(DUMMY_DATASET_DIR, 'label_me') \ + .make_dataset() + compare_datasets(self, expected=target_dataset, actual=parsed) \ No newline at end of file diff --git a/tests/test_masks.py b/tests/test_masks.py new file mode 100644 index 0000000000..4396966089 --- /dev/null +++ b/tests/test_masks.py @@ -0,0 +1,197 @@ +import numpy as np + +from unittest import TestCase + +import datumaro.util.mask_tools as mask_tools +from datumaro.components.extractor import CompiledMask + + +class PolygonConversionsTest(TestCase): + def test_mask_can_be_converted_to_polygon(self): + mask = np.array([ + [0, 1, 1, 1, 0, 1, 1, 1, 1, 0], + [0, 0, 1, 1, 0, 1, 0, 1, 0, 0], + [0, 0, 0, 1, 0, 1, 1, 0, 0, 0], + [0, 0, 0, 0, 0, 1, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + ]) + expected = [ + [1, 0, 3, 0, 3, 2, 1, 0], + [5, 0, 8, 0, 5, 3], + ] + + computed = mask_tools.mask_to_polygons(mask) + + self.assertEqual(len(expected), len(computed)) + + def test_can_crop_covered_segments(self): + image_size = [7, 7] + initial = [ + [1, 1, 6, 1, 6, 6, 1, 6], # rectangle + mask_tools.mask_to_rle(np.array([ + [0, 0, 0, 0, 0, 0, 0], + [0, 0, 1, 0, 1, 1, 0], + [0, 1, 1, 0, 1, 1, 0], + [0, 0, 0, 0, 0, 1, 0], + [0, 1, 1, 0, 0, 1, 0], + [0, 1, 1, 1, 1, 1, 0], + [0, 0, 0, 0, 0, 0, 0], + ])), + [1, 1, 6, 6, 1, 6], # lower-left triangle + ] + expected = [ + np.array([ + [0, 0, 0, 0, 0, 0, 0], + [0, 0, 0, 1, 0, 0, 0], + [0, 0, 0, 1, 0, 0, 0], + [0, 0, 0, 0, 1, 0, 0], + [0, 0, 0, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0, 0], + ]), # half-covered + np.array([ + [0, 0, 0, 0, 0, 0, 0], + [0, 0, 1, 0, 1, 1, 0], + [0, 0, 0, 0, 1, 1, 0], + [0, 0, 0, 0, 0, 1, 0], + [0, 0, 0, 0, 0, 1, 0], + [0, 0, 0, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0, 0], + ]), # half-covered + mask_tools.rles_to_mask([initial[2]], *image_size), # unchanged + ] + + computed = mask_tools.crop_covered_segments(initial, *image_size, + ratio_tolerance=0, return_masks=True) + + self.assertEqual(len(initial), len(computed)) + for i, (e_mask, c_mask) in enumerate(zip(expected, computed)): + self.assertTrue(np.array_equal(e_mask, c_mask), + '#%s: %s\n%s\n' % (i, e_mask, c_mask)) + + def _test_mask_to_rle(self, source_mask): + rle_uncompressed = mask_tools.mask_to_rle(source_mask) + + from pycocotools import mask as mask_utils + resulting_mask = mask_utils.frPyObjects( + rle_uncompressed, *rle_uncompressed['size']) + resulting_mask = mask_utils.decode(resulting_mask) + + self.assertTrue(np.array_equal(source_mask, resulting_mask), + '%s\n%s\n' % (source_mask, resulting_mask)) + + def test_mask_to_rle_multi(self): + cases = [ + np.array([ + [0, 1, 1, 1, 0, 1, 1, 1, 1, 0], + [0, 0, 1, 1, 0, 1, 0, 1, 0, 0], + [0, 0, 0, 1, 0, 1, 1, 0, 0, 0], + [0, 0, 0, 0, 0, 1, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + ]), + + np.array([ + [0] + ]), + np.array([ + [1] + ]), + + np.array([ + [1, 0, 0, 0, 0, 0, 0, 1, 0, 0], + [0, 0, 1, 1, 1, 0, 0, 0, 0, 0], + [1, 0, 1, 0, 1, 1, 1, 0, 0, 0], + [1, 1, 0, 1, 0, 1, 1, 1, 1, 0], + [1, 0, 1, 0, 1, 0, 0, 0, 0, 0], + [1, 0, 0, 1, 0, 0, 0, 1, 0, 1], + [1, 1, 0, 0, 1, 1, 0, 0, 0, 1], + [0, 0, 1, 0, 0, 0, 1, 1, 1, 1], + [1, 1, 0, 0, 0, 0, 0, 1, 0, 0], + [1, 1, 1, 1, 1, 0, 1, 0, 1, 0], + [0, 1, 0, 1, 1, 1, 1, 1, 0, 0], + [0, 1, 0, 0, 0, 1, 0, 0, 1, 0], + [1, 1, 0, 1, 0, 0, 1, 1, 1, 1], + ]) + ] + + for case in cases: + self._test_mask_to_rle(case) + +class ColormapOperationsTest(TestCase): + def test_can_paint_mask(self): + mask = np.zeros((1, 3), dtype=np.uint8) + mask[:, 0] = 0 + mask[:, 1] = 1 + mask[:, 2] = 2 + + colormap = mask_tools.generate_colormap(3) + + expected = np.zeros((*mask.shape, 3), dtype=np.uint8) + expected[:, 0] = colormap[0][::-1] + expected[:, 1] = colormap[1][::-1] + expected[:, 2] = colormap[2][::-1] + + actual = mask_tools.paint_mask(mask, colormap) + + self.assertTrue(np.array_equal(expected, actual), + '%s\nvs.\n%s' % (expected, actual)) + + def test_can_unpaint_mask(self): + colormap = mask_tools.generate_colormap(3) + inverse_colormap = mask_tools.invert_colormap(colormap) + + mask = np.zeros((1, 3, 3), dtype=np.uint8) + mask[:, 0] = colormap[0][::-1] + mask[:, 1] = colormap[1][::-1] + mask[:, 2] = colormap[2][::-1] + + expected = np.zeros((1, 3), dtype=np.uint8) + expected[:, 0] = 0 + expected[:, 1] = 1 + expected[:, 2] = 2 + + actual = mask_tools.unpaint_mask(mask, inverse_colormap) + + self.assertTrue(np.array_equal(expected, actual), + '%s\nvs.\n%s' % (expected, actual)) + + def test_can_remap_mask(self): + class_count = 10 + remap_fn = lambda c: class_count - c + + src = np.empty((class_count, class_count), dtype=np.uint8) + for c in range(class_count): + src[c:, c:] = c + + expected = np.empty_like(src) + for c in range(class_count): + expected[c:, c:] = remap_fn(c) + + actual = mask_tools.remap_mask(src, remap_fn) + + self.assertTrue(np.array_equal(expected, actual), + '%s\nvs.\n%s' % (expected, actual)) + + def test_can_merge_masks(self): + masks = [ + np.array([0, 2, 4, 0, 0, 1]), + np.array([0, 1, 1, 0, 2, 0]), + np.array([0, 0, 2, 3, 0, 0]), + ] + expected = \ + np.array([0, 1, 2, 3, 2, 1]) + + actual = mask_tools.merge_masks(masks) + + self.assertTrue(np.array_equal(expected, actual), + '%s\nvs.\n%s' % (expected, actual)) + + def test_can_decode_compiled_mask(self): + class_idx = 1000 + instance_idx = 10000 + mask = np.array([1]) + compiled_mask = CompiledMask(mask * class_idx, mask * instance_idx) + + labels = compiled_mask.get_instance_labels() + + self.assertEqual({instance_idx: class_idx}, labels) \ No newline at end of file diff --git a/tests/test_mot_format.py b/tests/test_mot_format.py new file mode 100644 index 0000000000..4cc2a98b3a --- /dev/null +++ b/tests/test_mot_format.py @@ -0,0 +1,136 @@ +from functools import partial +import numpy as np +import os.path as osp + +from unittest import TestCase +from datumaro.components.project import Dataset +from datumaro.components.extractor import (Extractor, DatasetItem, + AnnotationType, Bbox, LabelCategories +) +from datumaro.components.project import Project +from datumaro.plugins.mot_format import MotSeqGtConverter, MotSeqImporter +from datumaro.util.test_utils import TestDir, compare_datasets + + +class MotConverterTest(TestCase): + def _test_save_and_load(self, source_dataset, converter, test_dir, + target_dataset=None, importer_args=None): + converter(source_dataset, test_dir) + + if importer_args is None: + importer_args = {} + parsed_dataset = MotSeqImporter()(test_dir, **importer_args) \ + .make_dataset() + + if target_dataset is None: + target_dataset = source_dataset + + compare_datasets(self, expected=target_dataset, actual=parsed_dataset) + + def test_can_save_bboxes(self): + source_dataset = Dataset.from_iterable([ + DatasetItem(id=1, subset='train', + image=np.ones((16, 16, 3)), + annotations=[ + Bbox(0, 4, 4, 8, label=2, attributes={ + 'occluded': True, + }), + Bbox(0, 4, 4, 4, label=3, attributes={ + 'visibility': 0.4, + }), + Bbox(2, 4, 4, 4, attributes={ + 'ignored': True + }), + ] + ), + + DatasetItem(id=2, subset='val', + image=np.ones((8, 8, 3)), + annotations=[ + Bbox(1, 2, 4, 2, label=3), + ] + ), + + DatasetItem(id=3, subset='test', + image=np.ones((5, 4, 3)) * 3, + ), + ], categories={ + AnnotationType.label: LabelCategories.from_iterable( + 'label_' + str(label) for label in range(10)), + }) + + target_dataset = Dataset.from_iterable([ + DatasetItem(id=1, + image=np.ones((16, 16, 3)), + annotations=[ + Bbox(0, 4, 4, 8, label=2, attributes={ + 'occluded': True, + 'visibility': 0.0, + 'ignored': False, + }), + Bbox(0, 4, 4, 4, label=3, attributes={ + 'occluded': False, + 'visibility': 0.4, + 'ignored': False, + }), + Bbox(2, 4, 4, 4, attributes={ + 'occluded': False, + 'visibility': 1.0, + 'ignored': True, + }), + ] + ), + + DatasetItem(id=2, + image=np.ones((8, 8, 3)), + annotations=[ + Bbox(1, 2, 4, 2, label=3, attributes={ + 'occluded': False, + 'visibility': 1.0, + 'ignored': False, + }), + ] + ), + + DatasetItem(id=3, + image=np.ones((5, 4, 3)) * 3, + ), + ], categories={ + AnnotationType.label: LabelCategories.from_iterable( + 'label_' + str(label) for label in range(10)), + }) + + with TestDir() as test_dir: + self._test_save_and_load( + source_dataset, + partial(MotSeqGtConverter.convert, save_images=True), + test_dir, target_dataset=target_dataset) + + +DUMMY_DATASET_DIR = osp.join(osp.dirname(__file__), 'assets', 'mot_dataset') + +class MotImporterTest(TestCase): + def test_can_detect(self): + self.assertTrue(MotSeqImporter.detect(DUMMY_DATASET_DIR)) + + def test_can_import(self): + expected_dataset = Dataset.from_iterable([ + DatasetItem(id=1, + image=np.ones((16, 16, 3)), + annotations=[ + Bbox(0, 4, 4, 8, label=2, attributes={ + 'occluded': False, + 'visibility': 1.0, + 'ignored': False, + }), + ] + ), + ], categories={ + AnnotationType.label: LabelCategories.from_iterable( + 'label_' + str(label) for label in range(10)), + }) + + dataset = Project.import_from(DUMMY_DATASET_DIR, 'mot_seq') \ + .make_dataset() + + compare_datasets(self, expected_dataset, dataset) \ No newline at end of file diff --git a/tests/test_ops.py b/tests/test_ops.py new file mode 100644 index 0000000000..5b7355bf79 --- /dev/null +++ b/tests/test_ops.py @@ -0,0 +1,451 @@ +from unittest import TestCase + +import numpy as np + +from datumaro.components.extractor import (Bbox, Caption, DatasetItem, + Extractor, Label, Mask, Points, Polygon, PolyLine, + LabelCategories, PointsCategories, MaskCategories, AnnotationType) +from datumaro.components.operations import (FailedAttrVotingError, + IntersectMerge, NoMatchingAnnError, NoMatchingItemError, WrongGroupError, + compute_ann_statistics, mean_std) +from datumaro.components.project import Dataset +from datumaro.util.test_utils import compare_datasets + + +class TestOperations(TestCase): + def test_mean_std(self): + expected_mean = [100, 50, 150] + expected_std = [20, 50, 10] + + class TestExtractor(Extractor): + def __iter__(self): + return iter([ + DatasetItem(id=1, image=np.random.normal( + expected_mean, expected_std, + size=(w, h, 3)) + ) + for i, (w, h) in enumerate([ + (3000, 100), (800, 600), (400, 200), (700, 300) + ]) + ]) + + actual_mean, actual_std = mean_std(TestExtractor()) + + for em, am in zip(expected_mean, actual_mean): + self.assertAlmostEqual(em, am, places=0) + for estd, astd in zip(expected_std, actual_std): + self.assertAlmostEqual(estd, astd, places=0) + + def test_stats(self): + dataset = Dataset.from_iterable([ + DatasetItem(id=1, image=np.ones((5, 5, 3)), annotations=[ + Caption('hello'), + Caption('world'), + Label(2, attributes={ 'x': 1, 'y': '2', }), + Bbox(1, 2, 2, 2, label=2, attributes={ 'score': 0.5, }), + Bbox(5, 6, 2, 2, attributes={ + 'x': 1, 'y': '3', 'occluded': True, + }), + Points([1, 2, 2, 0, 1, 1], label=0), + Mask(label=3, image=np.array([ + [0, 0, 1, 1, 1], + [0, 0, 1, 1, 1], + [0, 0, 1, 1, 1], + [0, 0, 0, 0, 0], + [0, 0, 0, 0, 0], + ])), + ]), + DatasetItem(id=2, image=np.ones((2, 4, 3)), annotations=[ + Label(2, attributes={ 'x': 2, 'y': '2', }), + Bbox(1, 2, 2, 2, label=3, attributes={ 'score': 0.5, }), + Bbox(5, 6, 2, 2, attributes={ + 'x': 2, 'y': '3', 'occluded': False, + }), + ]), + DatasetItem(id=3), + ], categories=['label_%s' % i for i in range(4)]) + + expected = { + 'images count': 3, + 'annotations count': 10, + 'unannotated images count': 1, + 'unannotated images': ['3'], + 'annotations by type': { + 'label': { 'count': 2, }, + 'polygon': { 'count': 0, }, + 'polyline': { 'count': 0, }, + 'bbox': { 'count': 4, }, + 'mask': { 'count': 1, }, + 'points': { 'count': 1, }, + 'caption': { 'count': 2, }, + }, + 'annotations': { + 'labels': { + 'count': 6, + 'distribution': { + 'label_0': [1, 1/6], + 'label_1': [0, 0.0], + 'label_2': [3, 3/6], + 'label_3': [2, 2/6], + }, + 'attributes': { + 'x': { + 'count': 2, # unnotations with no label are skipped + 'values count': 2, + 'values present': ['1', '2'], + 'distribution': { + '1': [1, 1/2], + '2': [1, 1/2], + }, + }, + 'y': { + 'count': 2, # unnotations with no label are skipped + 'values count': 1, + 'values present': ['2'], + 'distribution': { + '2': [2, 2/2], + }, + }, + # must not include "special" attributes like "occluded" + } + }, + 'segments': { + 'avg. area': (4 * 2 + 9 * 1) / 3, + 'area distribution': [ + {'min': 4.0, 'max': 4.5, 'count': 2, 'percent': 2/3}, + {'min': 4.5, 'max': 5.0, 'count': 0, 'percent': 0.0}, + {'min': 5.0, 'max': 5.5, 'count': 0, 'percent': 0.0}, + {'min': 5.5, 'max': 6.0, 'count': 0, 'percent': 0.0}, + {'min': 6.0, 'max': 6.5, 'count': 0, 'percent': 0.0}, + {'min': 6.5, 'max': 7.0, 'count': 0, 'percent': 0.0}, + {'min': 7.0, 'max': 7.5, 'count': 0, 'percent': 0.0}, + {'min': 7.5, 'max': 8.0, 'count': 0, 'percent': 0.0}, + {'min': 8.0, 'max': 8.5, 'count': 0, 'percent': 0.0}, + {'min': 8.5, 'max': 9.0, 'count': 1, 'percent': 1/3}, + ], + 'pixel distribution': { + 'label_0': [0, 0.0], + 'label_1': [0, 0.0], + 'label_2': [4, 4/17], + 'label_3': [13, 13/17], + }, + } + }, + } + + actual = compute_ann_statistics(dataset) + + self.assertEqual(expected, actual) + +class TestMultimerge(TestCase): + def test_can_match_items(self): + # items 1 and 3 are unique, item 2 is common and should be merged + + source0 = Dataset.from_iterable([ + DatasetItem(1, annotations=[ Label(0), ]), + DatasetItem(2, annotations=[ Label(0), ]), + ], categories=['a', 'b']) + + source1 = Dataset.from_iterable([ + DatasetItem(2, annotations=[ Label(1), ]), + DatasetItem(3, annotations=[ Label(0), ]), + ], categories=['a', 'b']) + + source2 = Dataset.from_iterable([ + DatasetItem(2, annotations=[ Label(0), Bbox(1, 2, 3, 4) ]), + ], categories=['a', 'b']) + + expected = Dataset.from_iterable([ + DatasetItem(1, annotations=[ + Label(0, attributes={'score': 1/3}), + ]), + DatasetItem(2, annotations=[ + Label(0, attributes={'score': 2/3}), + Label(1, attributes={'score': 1/3}), + Bbox(1, 2, 3, 4, attributes={'score': 1.0}), + ]), + DatasetItem(3, annotations=[ + Label(0, attributes={'score': 1/3}), + ]), + ], categories=['a', 'b']) + + merger = IntersectMerge() + merged = merger([source0, source1, source2]) + + compare_datasets(self, expected, merged) + self.assertEqual( + [ + NoMatchingItemError(item_id=('1', ''), sources={1, 2}), + NoMatchingItemError(item_id=('3', ''), sources={0, 2}), + ], + sorted((e for e in merger.errors + if isinstance(e, NoMatchingItemError)), + key=lambda e: e.item_id) + ) + self.assertEqual( + [ + NoMatchingAnnError(item_id=('2', ''), sources={0, 1}, + ann=source2.get('2').annotations[1]), + ], + sorted((e for e in merger.errors + if isinstance(e, NoMatchingAnnError)), + key=lambda e: e.item_id) + ) + + def test_can_match_shapes(self): + source0 = Dataset.from_iterable([ + DatasetItem(1, annotations=[ + # unique + Bbox(1, 2, 3, 4, label=1), + + # common + Mask(label=2, z_order=2, image=np.array([ + [0, 0, 0, 0], + [0, 0, 0, 0], + [1, 1, 1, 0], + [1, 1, 1, 0], + ])), + Polygon([1, 0, 3, 2, 1, 2]), + + # an instance with keypoints + Bbox(4, 5, 2, 4, label=2, z_order=1, group=1), + Points([5, 6], label=0, group=1), + Points([6, 8], label=1, group=1), + + PolyLine([1, 1, 2, 1, 3, 1]), + ]), + ], categories=['a', 'b', 'c']) + + source1 = Dataset.from_iterable([ + DatasetItem(1, annotations=[ + # common + Mask(label=2, image=np.array([ + [0, 0, 0, 0], + [0, 1, 1, 1], + [0, 1, 1, 1], + [0, 1, 1, 1], + ])), + Polygon([0, 2, 2, 0, 2, 1]), + + # an instance with keypoints + Bbox(4, 4, 2, 5, label=2, z_order=1, group=2), + Points([5.5, 6.5], label=0, group=2), + Points([6, 8], label=1, group=2), + + PolyLine([1, 1.5, 2, 1.5]), + ]), + ], categories=['a', 'b', 'c']) + + source2 = Dataset.from_iterable([ + DatasetItem(1, annotations=[ + # common + Mask(label=2, z_order=3, image=np.array([ + [0, 0, 1, 1], + [0, 1, 1, 1], + [1, 1, 1, 1], + [1, 1, 1, 0], + ])), + Polygon([3, 1, 2, 2, 0, 1]), + + # an instance with keypoints, one is missing + Bbox(3, 6, 2, 3, label=2, z_order=4, group=3), + Points([4.5, 5.5], label=0, group=3), + + PolyLine([1, 1.25, 3, 1, 4, 2]), + ]), + ], categories=['a', 'b', 'c']) + + expected = Dataset.from_iterable([ + DatasetItem(1, annotations=[ + # unique + Bbox(1, 2, 3, 4, label=1), + + # common + # nearest to mean bbox + Mask(label=2, z_order=3, image=np.array([ + [0, 0, 0, 0], + [0, 1, 1, 1], + [0, 1, 1, 1], + [0, 1, 1, 1], + ])), + Polygon([1, 0, 3, 2, 1, 2]), + + # an instance with keypoints + Bbox(4, 5, 2, 4, label=2, z_order=4, group=1), + Points([5, 6], label=0, group=1), + Points([6, 8], label=1, group=1), + + PolyLine([1, 1.25, 3, 1, 4, 2]), + ]), + ], categories=['a', 'b', 'c']) + + merger = IntersectMerge(conf={'quorum': 1, 'pairwise_dist': 0.1}) + merged = merger([source0, source1, source2]) + + compare_datasets(self, expected, merged, ignored_attrs={'score'}) + self.assertEqual( + [ + NoMatchingAnnError(item_id=('1', ''), sources={2}, + ann=source0.get('1').annotations[5]), + NoMatchingAnnError(item_id=('1', ''), sources={1, 2}, + ann=source0.get('1').annotations[0]), + ], + sorted((e for e in merger.errors + if isinstance(e, NoMatchingAnnError)), + key=lambda e: len(e.sources)) + ) + + def test_attributes(self): + source0 = Dataset.from_iterable([ + DatasetItem(1, annotations=[ + Label(2, attributes={ + 'unique': 1, + 'common_under_quorum': 2, + 'common_over_quorum': 3, + 'ignored': 'q', + }), + ]), + ], categories=['a', 'b', 'c']) + + source1 = Dataset.from_iterable([ + DatasetItem(1, annotations=[ + Label(2, attributes={ + 'common_under_quorum': 2, + 'common_over_quorum': 3, + 'ignored': 'q', + }), + ]), + ], categories=['a', 'b', 'c']) + + source2 = Dataset.from_iterable([ + DatasetItem(1, annotations=[ + Label(2, attributes={ + 'common_over_quorum': 3, + 'ignored': 'q', + }), + ]), + ], categories=['a', 'b', 'c']) + + expected = Dataset.from_iterable([ + DatasetItem(1, annotations=[ + Label(2, attributes={ 'common_over_quorum': 3 }), + ]), + ], categories=['a', 'b', 'c']) + + merger = IntersectMerge(conf={ + 'quorum': 3, 'ignored_attributes': {'ignored'}}) + merged = merger([source0, source1, source2]) + + compare_datasets(self, expected, merged, ignored_attrs={'score'}) + self.assertEqual(2, len([e for e in merger.errors + if isinstance(e, FailedAttrVotingError)]) + ) + + def test_group_checks(self): + dataset = Dataset.from_iterable([ + DatasetItem(1, annotations=[ + Bbox(0, 0, 0, 0, label=0, group=1), # misses an optional label + Bbox(0, 0, 0, 0, label=1, group=1), + + Bbox(0, 0, 0, 0, label=2, group=2), # misses a mandatory label - error + Bbox(0, 0, 0, 0, label=2, group=2), + + Bbox(0, 0, 0, 0, label=4), # misses an optional label + Bbox(0, 0, 0, 0, label=5), # misses a mandatory label - error + Bbox(0, 0, 0, 0, label=0), # misses a mandatory label - error + + Bbox(0, 0, 0, 0, label=3), # not listed - not checked + ]), + ], categories=['a', 'a_g1', 'a_g2_opt', 'b', 'c', 'c_g1_opt']) + + merger = IntersectMerge(conf={'groups': [ + ['a', 'a_g1', 'a_g2_opt?'], ['c', 'c_g1_opt?'] + ]}) + merger([dataset, dataset]) + + self.assertEqual(3, len([e for e in merger.errors + if isinstance(e, WrongGroupError)]), merger.errors + ) + + def test_can_merge_classes(self): + source0 = Dataset.from_iterable([ + DatasetItem(1, annotations=[ + Label(0), + Label(1), + Bbox(0, 0, 1, 1, label=1), + ]), + ], categories=['a', 'b']) + + source1 = Dataset.from_iterable([ + DatasetItem(1, annotations=[ + Label(0), + Label(1), + Bbox(0, 0, 1, 1, label=0), + Bbox(0, 0, 1, 1, label=1), + ]), + ], categories=['b', 'c']) + + expected = Dataset.from_iterable([ + DatasetItem(1, annotations=[ + Label(0), + Label(1), + Label(2), + Bbox(0, 0, 1, 1, label=1), + Bbox(0, 0, 1, 1, label=2), + ]), + ], categories=['a', 'b', 'c']) + + merger = IntersectMerge() + merged = merger([source0, source1]) + + compare_datasets(self, expected, merged, ignored_attrs={'score'}) + + def test_can_merge_categories(self): + source0 = Dataset.from_iterable([ + DatasetItem(1, annotations=[ Label(0), ]), + ], categories={ + AnnotationType.label: LabelCategories.from_iterable(['a', 'b']), + AnnotationType.points: PointsCategories.from_iterable([ + (0, ['l0', 'l1']), + (1, ['l2', 'l3']), + ]), + AnnotationType.mask: MaskCategories({ + 0: (0, 1, 2), + 1: (1, 2, 3), + }), + }) + + source1 = Dataset.from_iterable([ + DatasetItem(1, annotations=[ Label(0), ]), + ], categories={ + AnnotationType.label: LabelCategories.from_iterable(['c', 'b']), + AnnotationType.points: PointsCategories.from_iterable([ + (0, []), + (1, ['l2', 'l3']), + ]), + AnnotationType.mask: MaskCategories({ + 0: (0, 2, 4), + 1: (1, 2, 3), + }), + }) + + expected = Dataset.from_iterable([ + DatasetItem(1, annotations=[ Label(0), Label(2), ]), + ], categories={ + AnnotationType.label: LabelCategories.from_iterable(['a', 'b', 'c']), + AnnotationType.points: PointsCategories.from_iterable([ + (0, ['l0', 'l1']), + (1, ['l2', 'l3']), + (2, []), + ]), + AnnotationType.mask: MaskCategories({ + 0: (0, 1, 2), + 1: (1, 2, 3), + 2: (0, 2, 4), + }), + }) + + merger = IntersectMerge() + merged = merger([source0, source1]) + + compare_datasets(self, expected, merged, ignored_attrs={'score'}) \ No newline at end of file diff --git a/tests/test_project.py b/tests/test_project.py new file mode 100644 index 0000000000..50d21d3872 --- /dev/null +++ b/tests/test_project.py @@ -0,0 +1,549 @@ +import numpy as np +import os +import os.path as osp + +from unittest import TestCase + +from datumaro.components.project import Project, Environment, Dataset +from datumaro.components.config_model import Source, Model +from datumaro.components.launcher import Launcher, ModelTransform +from datumaro.components.extractor import (Extractor, DatasetItem, + Label, Mask, Points, Polygon, PolyLine, Bbox, Caption, + LabelCategories, AnnotationType +) +from datumaro.util.image import Image +from datumaro.components.config import Config, DefaultConfig, SchemaBuilder +from datumaro.components.dataset_filter import \ + XPathDatasetFilter, XPathAnnotationsFilter, DatasetItemEncoder +from datumaro.util.test_utils import TestDir, compare_datasets + + +class ProjectTest(TestCase): + def test_project_generate(self): + src_config = Config({ + 'project_name': 'test_project', + 'format_version': 1, + }) + + with TestDir() as test_dir: + project_path = test_dir + Project.generate(project_path, src_config) + + self.assertTrue(osp.isdir(project_path)) + + result_config = Project.load(project_path).config + self.assertEqual( + src_config.project_name, result_config.project_name) + self.assertEqual( + src_config.format_version, result_config.format_version) + + @staticmethod + def test_default_ctor_is_ok(): + Project() + + @staticmethod + def test_empty_config_is_ok(): + Project(Config()) + + def test_add_source(self): + source_name = 'source' + origin = Source({ + 'url': 'path', + 'format': 'ext' + }) + project = Project() + + project.add_source(source_name, origin) + + added = project.get_source(source_name) + self.assertIsNotNone(added) + self.assertEqual(added, origin) + + def test_added_source_can_be_saved(self): + source_name = 'source' + origin = Source({ + 'url': 'path', + }) + project = Project() + project.add_source(source_name, origin) + + saved = project.config + + self.assertEqual(origin, saved.sources[source_name]) + + def test_added_source_can_be_dumped(self): + source_name = 'source' + origin = Source({ + 'url': 'path', + }) + project = Project() + project.add_source(source_name, origin) + + with TestDir() as test_dir: + project.save(test_dir) + + loaded = Project.load(test_dir) + loaded = loaded.get_source(source_name) + self.assertEqual(origin, loaded) + + def test_can_import_with_custom_importer(self): + class TestImporter: + def __call__(self, path, subset=None): + return Project({ + 'project_filename': path, + 'subsets': [ subset ] + }) + + path = 'path' + importer_name = 'test_importer' + + env = Environment() + env.importers.register(importer_name, TestImporter) + + project = Project.import_from(path, importer_name, env, + subset='train') + + self.assertEqual(path, project.config.project_filename) + self.assertListEqual(['train'], project.config.subsets) + + def test_can_dump_added_model(self): + model_name = 'model' + + project = Project() + saved = Model({ 'launcher': 'name' }) + project.add_model(model_name, saved) + + with TestDir() as test_dir: + project.save(test_dir) + + loaded = Project.load(test_dir) + loaded = loaded.get_model(model_name) + self.assertEqual(saved, loaded) + + def test_can_have_project_source(self): + with TestDir() as test_dir: + Project.generate(test_dir) + + project2 = Project() + project2.add_source('project1', { + 'url': test_dir, + }) + dataset = project2.make_dataset() + + self.assertTrue('project1' in dataset.sources) + + def test_can_batch_launch_custom_model(self): + dataset = Dataset.from_iterable([ + DatasetItem(id=i, subset='train', image=np.array([i])) + for i in range(5) + ], categories=['label']) + + class TestLauncher(Launcher): + def launch(self, inputs): + for i, inp in enumerate(inputs): + yield [ Label(0, attributes={'idx': i, 'data': inp.item()}) ] + + model_name = 'model' + launcher_name = 'custom_launcher' + + project = Project() + project.env.launchers.register(launcher_name, TestLauncher) + project.add_model(model_name, { 'launcher': launcher_name }) + model = project.make_executable_model(model_name) + + batch_size = 3 + executor = ModelTransform(dataset, model, batch_size=batch_size) + + for item in executor: + self.assertEqual(1, len(item.annotations)) + self.assertEqual(int(item.id) % batch_size, + item.annotations[0].attributes['idx']) + self.assertEqual(int(item.id), + item.annotations[0].attributes['data']) + + def test_can_do_transform_with_custom_model(self): + class TestExtractorSrc(Extractor): + def __iter__(self): + for i in range(2): + yield DatasetItem(id=i, image=np.ones([2, 2, 3]) * i, + annotations=[Label(i)]) + + def categories(self): + label_cat = LabelCategories() + label_cat.add('0') + label_cat.add('1') + return { AnnotationType.label: label_cat } + + class TestLauncher(Launcher): + def launch(self, inputs): + for inp in inputs: + yield [ Label(inp[0, 0, 0]) ] + + class TestExtractorDst(Extractor): + def __init__(self, url): + super().__init__() + self.items = [osp.join(url, p) for p in sorted(os.listdir(url))] + + def __iter__(self): + for path in self.items: + with open(path, 'r') as f: + index = osp.splitext(osp.basename(path))[0] + label = int(f.readline().strip()) + yield DatasetItem(id=index, annotations=[Label(label)]) + + model_name = 'model' + launcher_name = 'custom_launcher' + extractor_name = 'custom_extractor' + + project = Project() + project.env.launchers.register(launcher_name, TestLauncher) + project.env.extractors.register(extractor_name, TestExtractorSrc) + project.add_model(model_name, { 'launcher': launcher_name }) + project.add_source('source', { 'format': extractor_name }) + + with TestDir() as test_dir: + project.make_dataset().apply_model(model=model_name, + save_dir=test_dir) + + result = Project.load(test_dir) + result.env.extractors.register(extractor_name, TestExtractorDst) + it = iter(result.make_dataset()) + item1 = next(it) + item2 = next(it) + self.assertEqual(0, item1.annotations[0].label) + self.assertEqual(1, item2.annotations[0].label) + + def test_source_datasets_can_be_merged(self): + class TestExtractor(Extractor): + def __init__(self, url, n=0, s=0): + super().__init__(length=n) + self.n = n + self.s = s + + def __iter__(self): + for i in range(self.n): + yield DatasetItem(id=self.s + i, subset='train') + + e_name1 = 'e1' + e_name2 = 'e2' + n1 = 2 + n2 = 4 + + project = Project() + project.env.extractors.register(e_name1, lambda p: TestExtractor(p, n=n1)) + project.env.extractors.register(e_name2, lambda p: TestExtractor(p, n=n2, s=n1)) + project.add_source('source1', { 'format': e_name1 }) + project.add_source('source2', { 'format': e_name2 }) + + dataset = project.make_dataset() + + self.assertEqual(n1 + n2, len(dataset)) + + def test_project_filter_can_be_applied(self): + class TestExtractor(Extractor): + def __iter__(self): + for i in range(10): + yield DatasetItem(id=i, subset='train') + + e_type = 'type' + project = Project() + project.env.extractors.register(e_type, TestExtractor) + project.add_source('source', { 'format': e_type }) + + dataset = project.make_dataset().filter('/item[id < 5]') + + self.assertEqual(5, len(dataset)) + + def test_can_save_and_load_own_dataset(self): + with TestDir() as test_dir: + src_project = Project() + src_dataset = src_project.make_dataset() + item = DatasetItem(id=1) + src_dataset.put(item) + src_dataset.save(test_dir) + + loaded_project = Project.load(test_dir) + loaded_dataset = loaded_project.make_dataset() + + self.assertEqual(list(src_dataset), list(loaded_dataset)) + + def test_project_own_dataset_can_be_modified(self): + project = Project() + dataset = project.make_dataset() + + item = DatasetItem(id=1) + dataset.put(item) + + self.assertEqual(item, next(iter(dataset))) + + def test_project_compound_child_can_be_modified_recursively(self): + with TestDir() as test_dir: + child1 = Project({ + 'project_dir': osp.join(test_dir, 'child1'), + }) + child1.save() + + child2 = Project({ + 'project_dir': osp.join(test_dir, 'child2'), + }) + child2.save() + + parent = Project() + parent.add_source('child1', { + 'url': child1.config.project_dir + }) + parent.add_source('child2', { + 'url': child2.config.project_dir + }) + dataset = parent.make_dataset() + + item1 = DatasetItem(id='ch1', path=['child1']) + item2 = DatasetItem(id='ch2', path=['child2']) + dataset.put(item1) + dataset.put(item2) + + self.assertEqual(2, len(dataset)) + self.assertEqual(1, len(dataset.sources['child1'])) + self.assertEqual(1, len(dataset.sources['child2'])) + + def test_project_can_merge_item_annotations(self): + class TestExtractor1(Extractor): + def __iter__(self): + yield DatasetItem(id=1, subset='train', annotations=[ + Label(2, id=3), + Label(3, attributes={ 'x': 1 }), + ]) + + class TestExtractor2(Extractor): + def __iter__(self): + yield DatasetItem(id=1, subset='train', annotations=[ + Label(3, attributes={ 'x': 1 }), + Label(4, id=4), + ]) + + project = Project() + project.env.extractors.register('t1', TestExtractor1) + project.env.extractors.register('t2', TestExtractor2) + project.add_source('source1', { 'format': 't1' }) + project.add_source('source2', { 'format': 't2' }) + + merged = project.make_dataset() + + self.assertEqual(1, len(merged)) + + item = next(iter(merged)) + self.assertEqual(3, len(item.annotations)) + +class DatasetFilterTest(TestCase): + @staticmethod + def test_item_representations(): + item = DatasetItem(id=1, subset='subset', path=['a', 'b'], + image=np.ones((5, 4, 3)), + annotations=[ + Label(0, attributes={'a1': 1, 'a2': '2'}, id=1, group=2), + Caption('hello', id=1), + Caption('world', group=5), + Label(2, id=3, attributes={ 'x': 1, 'y': '2' }), + Bbox(1, 2, 3, 4, label=4, id=4, attributes={ 'a': 1.0 }), + Bbox(5, 6, 7, 8, id=5, group=5), + Points([1, 2, 2, 0, 1, 1], label=0, id=5), + Mask(id=5, image=np.ones((3, 2))), + Mask(label=3, id=5, image=np.ones((2, 3))), + PolyLine([1, 2, 3, 4, 5, 6, 7, 8], id=11), + Polygon([1, 2, 3, 4, 5, 6, 7, 8]), + ] + ) + + encoded = DatasetItemEncoder.encode(item) + DatasetItemEncoder.to_string(encoded) + + def test_item_filter_can_be_applied(self): + class TestExtractor(Extractor): + def __iter__(self): + for i in range(4): + yield DatasetItem(id=i, subset='train') + + extractor = TestExtractor() + + filtered = XPathDatasetFilter(extractor, '/item[id > 1]') + + self.assertEqual(2, len(filtered)) + + def test_annotations_filter_can_be_applied(self): + class SrcExtractor(Extractor): + def __iter__(self): + return iter([ + DatasetItem(id=0), + DatasetItem(id=1, annotations=[ + Label(0), + Label(1), + ]), + DatasetItem(id=2, annotations=[ + Label(0), + Label(2), + ]), + ]) + + class DstExtractor(Extractor): + def __iter__(self): + return iter([ + DatasetItem(id=0), + DatasetItem(id=1, annotations=[ + Label(0), + ]), + DatasetItem(id=2, annotations=[ + Label(0), + ]), + ]) + + extractor = SrcExtractor() + + filtered = XPathAnnotationsFilter(extractor, + '/item/annotation[label_id = 0]') + + self.assertListEqual(list(filtered), list(DstExtractor())) + + def test_annotations_filter_can_remove_empty_items(self): + class SrcExtractor(Extractor): + def __iter__(self): + return iter([ + DatasetItem(id=0), + DatasetItem(id=1, annotations=[ + Label(0), + Label(1), + ]), + DatasetItem(id=2, annotations=[ + Label(0), + Label(2), + ]), + ]) + + class DstExtractor(Extractor): + def __iter__(self): + return iter([ + DatasetItem(id=2, annotations=[ + Label(2), + ]), + ]) + + extractor = SrcExtractor() + + filtered = XPathAnnotationsFilter(extractor, + '/item/annotation[label_id = 2]', remove_empty=True) + + self.assertListEqual(list(filtered), list(DstExtractor())) + +class ConfigTest(TestCase): + def test_can_produce_multilayer_config_from_dict(self): + schema_low = SchemaBuilder() \ + .add('options', dict) \ + .build() + schema_mid = SchemaBuilder() \ + .add('desc', lambda: Config(schema=schema_low)) \ + .build() + schema_top = SchemaBuilder() \ + .add('container', lambda: DefaultConfig( + lambda v: Config(v, schema=schema_mid))) \ + .build() + + value = 1 + source = Config({ + 'container': { + 'elem': { + 'desc': { + 'options': { + 'k': value + } + } + } + } + }, schema=schema_top) + + self.assertEqual(value, source.container['elem'].desc.options['k']) + +class ExtractorTest(TestCase): + def test_custom_extractor_can_be_created(self): + class CustomExtractor(Extractor): + def __iter__(self): + return iter([ + DatasetItem(id=0, subset='train'), + DatasetItem(id=1, subset='train'), + DatasetItem(id=2, subset='train'), + + DatasetItem(id=3, subset='test'), + DatasetItem(id=4, subset='test'), + + DatasetItem(id=1), + DatasetItem(id=2), + DatasetItem(id=3), + ]) + + extractor_name = 'ext1' + project = Project() + project.env.extractors.register(extractor_name, CustomExtractor) + project.add_source('src1', { + 'url': 'path', + 'format': extractor_name, + }) + + dataset = project.make_dataset() + + compare_datasets(self, CustomExtractor(), dataset) + +class DatasetTest(TestCase): + def test_create_from_extractors(self): + class SrcExtractor1(Extractor): + def __iter__(self): + return iter([ + DatasetItem(id=1, subset='train', annotations=[ + Bbox(1, 2, 3, 4), + Label(4), + ]), + DatasetItem(id=1, subset='val', annotations=[ + Label(4), + ]), + ]) + + class SrcExtractor2(Extractor): + def __iter__(self): + return iter([ + DatasetItem(id=1, subset='val', annotations=[ + Label(5), + ]), + ]) + + class DstExtractor(Extractor): + def __iter__(self): + return iter([ + DatasetItem(id=1, subset='train', annotations=[ + Bbox(1, 2, 3, 4), + Label(4), + ]), + DatasetItem(id=1, subset='val', annotations=[ + Label(4), + Label(5), + ]), + ]) + + dataset = Dataset.from_extractors(SrcExtractor1(), SrcExtractor2()) + + compare_datasets(self, DstExtractor(), dataset) + + +class DatasetItemTest(TestCase): + def test_ctor_requires_id(self): + with self.assertRaises(Exception): + # pylint: disable=no-value-for-parameter + DatasetItem() + # pylint: enable=no-value-for-parameter + + @staticmethod + def test_ctors_with_image(): + for args in [ + { 'id': 0, 'image': None }, + { 'id': 0, 'image': 'path.jpg' }, + { 'id': 0, 'image': np.array([1, 2, 3]) }, + { 'id': 0, 'image': lambda f: np.array([1, 2, 3]) }, + { 'id': 0, 'image': Image(data=np.array([1, 2, 3])) }, + ]: + DatasetItem(**args) \ No newline at end of file diff --git a/tests/test_tfrecord_format.py b/tests/test_tfrecord_format.py new file mode 100644 index 0000000000..f2dbd160fd --- /dev/null +++ b/tests/test_tfrecord_format.py @@ -0,0 +1,210 @@ +from functools import partial +import numpy as np +import os.path as osp + +from unittest import TestCase, skipIf +from datumaro.components.project import Dataset +from datumaro.components.extractor import (Extractor, DatasetItem, + AnnotationType, Bbox, Mask, LabelCategories +) +from datumaro.components.project import Project +from datumaro.util.image import Image +from datumaro.util.test_utils import TestDir, compare_datasets +from datumaro.util.tf_util import check_import + +try: + from datumaro.plugins.tf_detection_api_format.importer import TfDetectionApiImporter + from datumaro.plugins.tf_detection_api_format.extractor import TfDetectionApiExtractor + from datumaro.plugins.tf_detection_api_format.converter import TfDetectionApiConverter + import_failed = False +except ImportError: + import_failed = True + + import importlib + module_found = importlib.util.find_spec('tensorflow') is not None + + @skipIf(not module_found, "Tensorflow package is not found") + class TfImportTest(TestCase): + def test_raises_when_crashes_on_import(self): + # Should fire if import can't be done for any reason except + # module unavailability and import crash + with self.assertRaisesRegex(ImportError, 'Test process exit code'): + check_import() + +@skipIf(import_failed, "Failed to import tensorflow") +class TfrecordConverterTest(TestCase): + def _test_save_and_load(self, source_dataset, converter, test_dir, + target_dataset=None, importer_args=None): + converter(source_dataset, test_dir) + + if importer_args is None: + importer_args = {} + parsed_dataset = TfDetectionApiImporter()(test_dir, **importer_args) \ + .make_dataset() + + if target_dataset is None: + target_dataset = source_dataset + + compare_datasets(self, expected=target_dataset, actual=parsed_dataset) + + def test_can_save_bboxes(self): + test_dataset = Dataset.from_iterable([ + DatasetItem(id=1, subset='train', + image=np.ones((16, 16, 3)), + annotations=[ + Bbox(0, 4, 4, 8, label=2), + Bbox(0, 4, 4, 4, label=3), + Bbox(2, 4, 4, 4), + ], attributes={'source_id': ''} + ), + ], categories={ + AnnotationType.label: LabelCategories.from_iterable( + 'label_' + str(label) for label in range(10)), + }) + + with TestDir() as test_dir: + self._test_save_and_load( + test_dataset, + partial(TfDetectionApiConverter.convert, save_images=True), + test_dir) + + def test_can_save_masks(self): + test_dataset = Dataset.from_iterable([ + DatasetItem(id=1, subset='train', image=np.ones((4, 5, 3)), + annotations=[ + Mask(image=np.array([ + [1, 0, 0, 1], + [0, 1, 1, 0], + [0, 1, 1, 0], + [1, 0, 0, 1], + ]), label=1), + ], + attributes={'source_id': ''} + ), + ], categories={ + AnnotationType.label: LabelCategories.from_iterable( + 'label_' + str(label) for label in range(10)), + }) + + with TestDir() as test_dir: + self._test_save_and_load( + test_dataset, + partial(TfDetectionApiConverter.convert, save_masks=True), + test_dir) + + def test_can_save_dataset_with_no_subsets(self): + test_dataset = Dataset.from_iterable([ + DatasetItem(id=1, + image=np.ones((16, 16, 3)), + annotations=[ + Bbox(2, 1, 4, 4, label=2), + Bbox(4, 2, 8, 4, label=3), + ], + attributes={'source_id': ''} + ), + + DatasetItem(id=2, + image=np.ones((8, 8, 3)) * 2, + annotations=[ + Bbox(4, 4, 4, 4, label=3), + ], + attributes={'source_id': ''} + ), + + DatasetItem(id=3, + image=np.ones((8, 4, 3)) * 3, + attributes={'source_id': ''} + ), + ], categories={ + AnnotationType.label: LabelCategories.from_iterable( + 'label_' + str(label) for label in range(10)), + }) + + with TestDir() as test_dir: + self._test_save_and_load( + test_dataset, + partial(TfDetectionApiConverter.convert, save_images=True), + test_dir) + + def test_can_save_dataset_with_image_info(self): + test_dataset = Dataset.from_iterable([ + DatasetItem(id='1/q.e', + image=Image(path='1/q.e', size=(10, 15)), + attributes={'source_id': ''} + ) + ], categories={ + AnnotationType.label: LabelCategories(), + }) + + with TestDir() as test_dir: + self._test_save_and_load(test_dataset, + TfDetectionApiConverter.convert, test_dir) + + def test_labelmap_parsing(self): + text = """ + { + id: 4 + name: 'qw1' + } + { + id: 5 name: 'qw2' + } + + { + name: 'qw3' + id: 6 + } + {name:'qw4' id:7} + """ + expected = { + 'qw1': 4, + 'qw2': 5, + 'qw3': 6, + 'qw4': 7, + } + parsed = TfDetectionApiExtractor._parse_labelmap(text) + + self.assertEqual(expected, parsed) + + +DUMMY_DATASET_DIR = osp.join(osp.dirname(__file__), + 'assets', 'tf_detection_api_dataset') + +@skipIf(import_failed, "Failed to import tensorflow") +class TfrecordImporterTest(TestCase): + def test_can_detect(self): + self.assertTrue(TfDetectionApiImporter.detect(DUMMY_DATASET_DIR)) + + def test_can_import(self): + target_dataset = Dataset.from_iterable([ + DatasetItem(id=1, subset='train', + image=np.ones((16, 16, 3)), + annotations=[ + Bbox(0, 4, 4, 8, label=2), + Bbox(0, 4, 4, 4, label=3), + Bbox(2, 4, 4, 4), + ], + attributes={'source_id': '1'} + ), + + DatasetItem(id=2, subset='val', + image=np.ones((8, 8, 3)), + annotations=[ + Bbox(1, 2, 4, 2, label=3), + ], + attributes={'source_id': '2'} + ), + + DatasetItem(id=3, subset='test', + image=np.ones((5, 4, 3)) * 3, + attributes={'source_id': '3'} + ), + ], categories={ + AnnotationType.label: LabelCategories.from_iterable( + 'label_' + str(label) for label in range(10)), + }) + + dataset = Project.import_from(DUMMY_DATASET_DIR, 'tf_detection_api') \ + .make_dataset() + + compare_datasets(self, target_dataset, dataset) diff --git a/tests/test_transforms.py b/tests/test_transforms.py new file mode 100644 index 0000000000..ed072a67b4 --- /dev/null +++ b/tests/test_transforms.py @@ -0,0 +1,415 @@ +import logging as log +import numpy as np + +from unittest import TestCase +from datumaro.components.project import Dataset +from datumaro.components.extractor import (Extractor, DatasetItem, + Mask, Polygon, PolyLine, Points, Bbox, Label, + LabelCategories, MaskCategories, AnnotationType +) +import datumaro.util.mask_tools as mask_tools +import datumaro.plugins.transforms as transforms +from datumaro.util.test_utils import compare_datasets + + +class TransformsTest(TestCase): + def test_reindex(self): + class SrcExtractor(Extractor): + def __iter__(self): + return iter([ + DatasetItem(id=10), + DatasetItem(id=10, subset='train'), + DatasetItem(id='a', subset='val'), + ]) + + class DstExtractor(Extractor): + def __iter__(self): + return iter([ + DatasetItem(id=5), + DatasetItem(id=6, subset='train'), + DatasetItem(id=7, subset='val'), + ]) + + actual = transforms.Reindex(SrcExtractor(), start=5) + compare_datasets(self, DstExtractor(), actual) + + def test_mask_to_polygons(self): + class SrcExtractor(Extractor): + def __iter__(self): + items = [ + DatasetItem(id=1, image=np.zeros((5, 10, 3)), + annotations=[ + Mask(np.array([ + [0, 1, 1, 1, 0, 1, 1, 1, 1, 0], + [0, 0, 1, 1, 0, 1, 1, 1, 0, 0], + [0, 0, 0, 1, 0, 1, 1, 0, 0, 0], + [0, 0, 0, 0, 0, 1, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + ]), + ), + ] + ), + ] + return iter(items) + + class DstExtractor(Extractor): + def __iter__(self): + return iter([ + DatasetItem(id=1, image=np.zeros((5, 10, 3)), + annotations=[ + Polygon([3.0, 2.5, 1.0, 0.0, 3.5, 0.0, 3.0, 2.5]), + Polygon([5.0, 3.5, 4.5, 0.0, 8.0, 0.0, 5.0, 3.5]), + ] + ), + ]) + + actual = transforms.MasksToPolygons(SrcExtractor()) + compare_datasets(self, DstExtractor(), actual) + + def test_mask_to_polygons_small_polygons_message(self): + source_dataset = Dataset.from_iterable([ + DatasetItem(id=1, image=np.zeros((5, 10, 3)), + annotations=[ + Mask(np.array([ + [0, 0, 0], + [0, 1, 0], + [0, 0, 0], + ]), + ), + ] + ), + ]) + + target_dataset = Dataset.from_iterable([ + DatasetItem(id=1, image=np.zeros((5, 10, 3))), ]) + + with self.assertLogs(level=log.DEBUG) as logs: + actual = transforms.MasksToPolygons(source_dataset) + + compare_datasets(self, target_dataset, actual) + self.assertRegex('\n'.join(logs.output), 'too small polygons') + + def test_polygons_to_masks(self): + source_dataset = Dataset.from_iterable([ + DatasetItem(id=1, image=np.zeros((5, 10, 3)), + annotations=[ + Polygon([0, 0, 4, 0, 4, 4]), + Polygon([5, 0, 9, 0, 5, 5]), + ] + ), + ]) + + target_dataset = Dataset.from_iterable([ + DatasetItem(id=1, image=np.zeros((5, 10, 3)), + annotations=[ + Mask(np.array([ + [0, 0, 0, 0, 0, 1, 1, 1, 1, 0], + [0, 0, 0, 0, 0, 1, 1, 1, 0, 0], + [0, 0, 0, 0, 0, 1, 1, 0, 0, 0], + [0, 0, 0, 0, 0, 1, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + ]), + ), + Mask(np.array([ + [0, 1, 1, 1, 0, 0, 0, 0, 0, 0], + [0, 0, 1, 1, 0, 0, 0, 0, 0, 0], + [0, 0, 0, 1, 0, 0, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + ]), + ), + ] + ), + ]) + + actual = transforms.PolygonsToMasks(source_dataset) + compare_datasets(self, target_dataset, actual) + + def test_crop_covered_segments(self): + source_dataset = Dataset.from_iterable([ + DatasetItem(id=1, image=np.zeros((5, 5, 3)), + annotations=[ + # The mask is partially covered by the polygon + Mask(np.array([ + [0, 0, 1, 1, 1], + [0, 0, 1, 1, 1], + [1, 1, 1, 1, 1], + [1, 1, 1, 0, 0], + [1, 1, 1, 0, 0]], + ), + z_order=0), + Polygon([1, 1, 4, 1, 4, 4, 1, 4], + z_order=1), + ] + ), + ]) + + target_dataset = Dataset.from_iterable([ + DatasetItem(id=1, image=np.zeros((5, 5, 3)), + annotations=[ + Mask(np.array([ + [0, 0, 1, 1, 1], + [0, 0, 0, 0, 1], + [1, 0, 0, 0, 1], + [1, 0, 0, 0, 0], + [1, 1, 1, 0, 0]], + ), + z_order=0), + Polygon([1, 1, 4, 1, 4, 4, 1, 4], + z_order=1), + ] + ), + ]) + + actual = transforms.CropCoveredSegments(source_dataset) + compare_datasets(self, target_dataset, actual) + + def test_merge_instance_segments(self): + source_dataset = Dataset.from_iterable([ + DatasetItem(id=1, image=np.zeros((5, 5, 3)), + annotations=[ + Mask(np.array([ + [0, 0, 1, 1, 1], + [0, 0, 0, 0, 1], + [1, 0, 0, 0, 1], + [1, 0, 0, 0, 0], + [1, 1, 1, 0, 0]], + ), + z_order=0, group=1), + Polygon([1, 1, 4, 1, 4, 4, 1, 4], + z_order=1, group=1), + Polygon([0, 0, 0, 2, 2, 2, 2, 0], + z_order=1), + ] + ), + ]) + + target_dataset = Dataset.from_iterable([ + DatasetItem(id=1, image=np.zeros((5, 5, 3)), + annotations=[ + Mask(np.array([ + [0, 0, 1, 1, 1], + [0, 1, 1, 1, 1], + [1, 1, 1, 1, 1], + [1, 1, 1, 1, 0], + [1, 1, 1, 0, 0]], + ), + z_order=0, group=1), + Mask(np.array([ + [1, 1, 0, 0, 0], + [1, 1, 0, 0, 0], + [0, 0, 0, 0, 0], + [0, 0, 0, 0, 0], + [0, 0, 0, 0, 0]], + ), + z_order=1), + ] + ), + ]) + + actual = transforms.MergeInstanceSegments(source_dataset, + include_polygons=True) + compare_datasets(self, target_dataset, actual) + + def test_map_subsets(self): + source_dataset = Dataset.from_iterable([ + DatasetItem(id=1, subset='a'), + DatasetItem(id=2, subset='b'), + DatasetItem(id=3, subset='c'), + ]) + + target_dataset = Dataset.from_iterable([ + DatasetItem(id=1, subset=''), + DatasetItem(id=2, subset='a'), + DatasetItem(id=3, subset='c'), + ]) + + actual = transforms.MapSubsets(source_dataset, + { 'a': '', 'b': 'a' }) + compare_datasets(self, target_dataset, actual) + + def test_shapes_to_boxes(self): + source_dataset = Dataset.from_iterable([ + DatasetItem(id=1, image=np.zeros((5, 5, 3)), + annotations=[ + Mask(np.array([ + [0, 0, 1, 1, 1], + [0, 0, 0, 0, 1], + [1, 0, 0, 0, 1], + [1, 0, 0, 0, 0], + [1, 1, 1, 0, 0]], + ), id=1), + Polygon([1, 1, 4, 1, 4, 4, 1, 4], id=2), + PolyLine([1, 1, 2, 1, 2, 2, 1, 2], id=3), + Points([2, 2, 4, 2, 4, 4, 2, 4], id=4), + ] + ), + ]) + + target_dataset = Dataset.from_iterable([ + DatasetItem(id=1, image=np.zeros((5, 5, 3)), + annotations=[ + Bbox(0, 0, 4, 4, id=1), + Bbox(1, 1, 3, 3, id=2), + Bbox(1, 1, 1, 1, id=3), + Bbox(2, 2, 2, 2, id=4), + ] + ), + ]) + + actual = transforms.ShapesToBoxes(source_dataset) + compare_datasets(self, target_dataset, actual) + + def test_id_from_image(self): + source_dataset = Dataset.from_iterable([ + DatasetItem(id=1, image='path.jpg'), + DatasetItem(id=2), + ]) + target_dataset = Dataset.from_iterable([ + DatasetItem(id='path', image='path.jpg'), + DatasetItem(id=2), + ]) + + actual = transforms.IdFromImageName(source_dataset) + compare_datasets(self, target_dataset, actual) + + def test_boxes_to_masks(self): + source_dataset = Dataset.from_iterable([ + DatasetItem(id=1, image=np.zeros((5, 5, 3)), + annotations=[ + Bbox(0, 0, 3, 3, z_order=1), + Bbox(0, 0, 3, 1, z_order=2), + Bbox(0, 2, 3, 1, z_order=3), + ] + ), + ]) + + target_dataset = Dataset.from_iterable([ + DatasetItem(id=1, image=np.zeros((5, 5, 3)), + annotations=[ + Mask(np.array([ + [1, 1, 1, 0, 0], + [1, 1, 1, 0, 0], + [1, 1, 1, 0, 0], + [0, 0, 0, 0, 0], + [0, 0, 0, 0, 0]], + ), + z_order=1), + Mask(np.array([ + [1, 1, 1, 0, 0], + [0, 0, 0, 0, 0], + [0, 0, 0, 0, 0], + [0, 0, 0, 0, 0], + [0, 0, 0, 0, 0]], + ), + z_order=2), + Mask(np.array([ + [0, 0, 0, 0, 0], + [0, 0, 0, 0, 0], + [1, 1, 1, 0, 0], + [0, 0, 0, 0, 0], + [0, 0, 0, 0, 0]], + ), + z_order=3), + ] + ), + ]) + + actual = transforms.BoxesToMasks(source_dataset) + compare_datasets(self, target_dataset, actual) + + def test_random_split(self): + source_dataset = Dataset.from_iterable([ + DatasetItem(id=1, subset="a"), + DatasetItem(id=2, subset="a"), + DatasetItem(id=3, subset="b"), + DatasetItem(id=4, subset="b"), + DatasetItem(id=5, subset="b"), + DatasetItem(id=6, subset=""), + DatasetItem(id=7, subset=""), + ]) + + actual = transforms.RandomSplit(source_dataset, splits=[ + ('train', 4.0 / 7.0), + ('test', 3.0 / 7.0), + ]) + + self.assertEqual(4, len(actual.get_subset('train'))) + self.assertEqual(3, len(actual.get_subset('test'))) + + def test_random_split_gives_error_on_wrong_ratios(self): + source_dataset = Dataset.from_iterable([DatasetItem(id=1)]) + + with self.assertRaises(Exception): + transforms.RandomSplit(source_dataset, splits=[ + ('train', 0.5), + ('test', 0.7), + ]) + + with self.assertRaises(Exception): + transforms.RandomSplit(source_dataset, splits=[]) + + with self.assertRaises(Exception): + transforms.RandomSplit(source_dataset, splits=[ + ('train', -0.5), + ('test', 1.5), + ]) + + def test_remap_labels(self): + src_dataset = Dataset.from_iterable([ + DatasetItem(id=1, annotations=[ + # Should be remapped + Label(1), + Bbox(1, 2, 3, 4, label=2), + Mask(image=np.array([1]), label=3), + + # Should be kept + Polygon([1, 1, 2, 2, 3, 4], label=4), + PolyLine([1, 3, 4, 2, 5, 6]) + ]) + ], categories={ + AnnotationType.label: LabelCategories.from_iterable( + 'label%s' % i for i in range(5)), + AnnotationType.mask: MaskCategories( + colormap=mask_tools.generate_colormap(5)), + }) + + dst_dataset = Dataset.from_iterable([ + DatasetItem(id=1, annotations=[ + Label(1), + Bbox(1, 2, 3, 4, label=0), + Mask(image=np.array([1]), label=1), + + Polygon([1, 1, 2, 2, 3, 4], label=2), + PolyLine([1, 3, 4, 2, 5, 6], label=None) + ]), + ], categories={ + AnnotationType.label: LabelCategories.from_iterable( + ['label0', 'label9', 'label4']), + AnnotationType.mask: MaskCategories(colormap={ + k: v for k, v in mask_tools.generate_colormap(5).items() + if k in { 0, 1, 3, 4 } + }) + }) + + actual = transforms.RemapLabels(src_dataset, mapping={ + 'label1': 'label9', + 'label2': 'label0', + 'label3': 'label9', + }, default='keep') + + compare_datasets(self, dst_dataset, actual) + + def test_remap_labels_delete_unspecified(self): + source_dataset = Dataset.from_iterable([ + DatasetItem(id=1, annotations=[ Label(0) ]) + ], categories=['label0']) + + target_dataset = Dataset.from_iterable([ + DatasetItem(id=1), + ], categories=[]) + + actual = transforms.RemapLabels(source_dataset, + mapping={}, default='delete') + + compare_datasets(self, target_dataset, actual) diff --git a/tests/test_voc_format.py b/tests/test_voc_format.py new file mode 100644 index 0000000000..e83a743095 --- /dev/null +++ b/tests/test_voc_format.py @@ -0,0 +1,677 @@ +from collections import OrderedDict +from functools import partial +import numpy as np +import os.path as osp + +from unittest import TestCase + +from datumaro.components.extractor import (Extractor, DatasetItem, + AnnotationType, Label, Bbox, Mask, LabelCategories, +) +import datumaro.plugins.voc_format.format as VOC +from datumaro.plugins.voc_format.converter import ( + VocConverter, + VocClassificationConverter, + VocDetectionConverter, + VocLayoutConverter, + VocActionConverter, + VocSegmentationConverter, +) +from datumaro.plugins.voc_format.importer import VocImporter +from datumaro.components.project import Project +from datumaro.util.image import Image +from datumaro.util.test_utils import TestDir, compare_datasets + + +class VocFormatTest(TestCase): + def test_colormap_generator(self): + reference = np.array([ + [ 0, 0, 0], + [128, 0, 0], + [ 0, 128, 0], + [128, 128, 0], + [ 0, 0, 128], + [128, 0, 128], + [ 0, 128, 128], + [128, 128, 128], + [ 64, 0, 0], + [192, 0, 0], + [ 64, 128, 0], + [192, 128, 0], + [ 64, 0, 128], + [192, 0, 128], + [ 64, 128, 128], + [192, 128, 128], + [ 0, 64, 0], + [128, 64, 0], + [ 0, 192, 0], + [128, 192, 0], + [ 0, 64, 128], + [224, 224, 192], # ignored + ]) + + self.assertTrue(np.array_equal(reference, list(VOC.VocColormap.values()))) + + def test_can_write_and_parse_labelmap(self): + src_label_map = VOC.make_voc_label_map() + src_label_map['qq'] = [None, ['part1', 'part2'], ['act1', 'act2']] + src_label_map['ww'] = [(10, 20, 30), [], ['act3']] + + with TestDir() as test_dir: + file_path = osp.join(test_dir, 'test.txt') + + VOC.write_label_map(file_path, src_label_map) + dst_label_map = VOC.parse_label_map(file_path) + + self.assertEqual(src_label_map, dst_label_map) + +class TestExtractorBase(Extractor): + def _label(self, voc_label): + return self.categories()[AnnotationType.label].find(voc_label)[0] + + def categories(self): + return VOC.make_voc_categories() + + +DUMMY_DATASET_DIR = osp.join(osp.dirname(__file__), 'assets', 'voc_dataset') + +class VocImportTest(TestCase): + def test_can_import(self): + class DstExtractor(TestExtractorBase): + def __iter__(self): + return iter([ + DatasetItem(id='2007_000001', subset='train', + image=Image(path='2007_000001.jpg', size=(20, 10)), + annotations=[ + Label(self._label(l.name)) + for l in VOC.VocLabel if l.value % 2 == 1 + ] + [ + Bbox(1, 2, 2, 2, label=self._label('cat'), + attributes={ + 'pose': VOC.VocPose(1).name, + 'truncated': True, + 'difficult': False, + 'occluded': False, + }, + id=1, group=1, + ), + Bbox(4, 5, 2, 2, label=self._label('person'), + attributes={ + 'truncated': False, + 'difficult': False, + 'occluded': False, + **{ + a.name: a.value % 2 == 1 + for a in VOC.VocAction + } + }, + id=2, group=2, + ), + Bbox(5.5, 6, 2, 2, label=self._label( + VOC.VocBodyPart(1).name), + group=2 + ), + Mask(image=np.ones([5, 10]), + label=self._label(VOC.VocLabel(2).name), + group=1, + ), + ] + ), + DatasetItem(id='2007_000002', subset='test', + image=np.zeros((20, 10, 3))), + ]) + + dataset = Project.import_from(DUMMY_DATASET_DIR, 'voc').make_dataset() + + compare_datasets(self, DstExtractor(), dataset) + + def test_can_detect_voc(self): + self.assertTrue(VocImporter.detect(DUMMY_DATASET_DIR)) + +class VocConverterTest(TestCase): + def _test_save_and_load(self, source_dataset, converter, test_dir, + target_dataset=None, importer_args=None): + converter(source_dataset, test_dir) + + if importer_args is None: + importer_args = {} + parsed_dataset = VocImporter()(test_dir, **importer_args).make_dataset() + + if target_dataset is None: + target_dataset = source_dataset + + compare_datasets(self, expected=target_dataset, actual=parsed_dataset) + + def test_can_save_voc_cls(self): + class TestExtractor(TestExtractorBase): + def __iter__(self): + return iter([ + DatasetItem(id='a/0', subset='a', annotations=[ + Label(1), + Label(2), + Label(3), + ]), + + DatasetItem(id=1, subset='b', annotations=[ + Label(4), + ]), + ]) + + with TestDir() as test_dir: + self._test_save_and_load(TestExtractor(), + partial(VocClassificationConverter.convert, label_map='voc'), + test_dir) + + def test_can_save_voc_det(self): + class TestExtractor(TestExtractorBase): + def __iter__(self): + return iter([ + DatasetItem(id='a/1', subset='a', annotations=[ + Bbox(2, 3, 4, 5, label=2, + attributes={ 'occluded': True } + ), + Bbox(2, 3, 4, 5, label=3, + attributes={ 'truncated': True }, + ), + ]), + + DatasetItem(id=2, subset='b', annotations=[ + Bbox(5, 4, 6, 5, label=3, + attributes={ 'difficult': True }, + ), + ]), + ]) + + class DstExtractor(TestExtractorBase): + def __iter__(self): + return iter([ + DatasetItem(id='a/1', subset='a', annotations=[ + Bbox(2, 3, 4, 5, label=2, id=1, group=1, + attributes={ + 'truncated': False, + 'difficult': False, + 'occluded': True, + } + ), + Bbox(2, 3, 4, 5, label=3, id=2, group=2, + attributes={ + 'truncated': True, + 'difficult': False, + 'occluded': False, + }, + ), + ]), + + DatasetItem(id=2, subset='b', annotations=[ + Bbox(5, 4, 6, 5, label=3, id=1, group=1, + attributes={ + 'truncated': False, + 'difficult': True, + 'occluded': False, + }, + ), + ]), + ]) + + with TestDir() as test_dir: + self._test_save_and_load(TestExtractor(), + partial(VocDetectionConverter.convert, label_map='voc'), + test_dir, target_dataset=DstExtractor()) + + def test_can_save_voc_segm(self): + class TestExtractor(TestExtractorBase): + def __iter__(self): + return iter([ + DatasetItem(id='a/b/1', subset='a', annotations=[ + # overlapping masks, the first should be truncated + # the second and third are different instances + Mask(image=np.array([[0, 0, 0, 1, 0]]), label=3, + z_order=3), + Mask(image=np.array([[0, 1, 1, 1, 0]]), label=4, + z_order=1), + Mask(image=np.array([[1, 1, 0, 0, 0]]), label=3, + z_order=2), + ]), + ]) + + class DstExtractor(TestExtractorBase): + def __iter__(self): + return iter([ + DatasetItem(id='a/b/1', subset='a', annotations=[ + Mask(image=np.array([[0, 0, 1, 0, 0]]), label=4, + group=1), + Mask(image=np.array([[1, 1, 0, 0, 0]]), label=3, + group=2), + Mask(image=np.array([[0, 0, 0, 1, 0]]), label=3, + group=3), + ]), + ]) + + with TestDir() as test_dir: + self._test_save_and_load(TestExtractor(), + partial(VocSegmentationConverter.convert, label_map='voc'), + test_dir, target_dataset=DstExtractor()) + + def test_can_save_voc_segm_unpainted(self): + class TestExtractor(TestExtractorBase): + def __iter__(self): + return iter([ + DatasetItem(id=1, subset='a', annotations=[ + # overlapping masks, the first should be truncated + # the second and third are different instances + Mask(image=np.array([[0, 0, 0, 1, 0]]), label=3, + z_order=3), + Mask(image=np.array([[0, 1, 1, 1, 0]]), label=4, + z_order=1), + Mask(image=np.array([[1, 1, 0, 0, 0]]), label=3, + z_order=2), + ]), + ]) + + class DstExtractor(TestExtractorBase): + def __iter__(self): + return iter([ + DatasetItem(id=1, subset='a', annotations=[ + Mask(image=np.array([[0, 0, 1, 0, 0]]), label=4, + group=1), + Mask(image=np.array([[1, 1, 0, 0, 0]]), label=3, + group=2), + Mask(image=np.array([[0, 0, 0, 1, 0]]), label=3, + group=3), + ]), + ]) + + with TestDir() as test_dir: + self._test_save_and_load(TestExtractor(), + partial(VocSegmentationConverter.convert, + label_map='voc', apply_colormap=False), + test_dir, target_dataset=DstExtractor()) + + def test_can_save_voc_segm_with_many_instances(self): + def bit(x, y, shape): + mask = np.zeros(shape) + mask[y, x] = 1 + return mask + + class TestExtractor(TestExtractorBase): + def __iter__(self): + return iter([ + DatasetItem(id=1, subset='a', annotations=[ + Mask(image=bit(x, y, shape=[10, 10]), + label=self._label(VOC.VocLabel(3).name), + z_order=10 * y + x + 1 + ) + for y in range(10) for x in range(10) + ]), + ]) + + class DstExtractor(TestExtractorBase): + def __iter__(self): + return iter([ + DatasetItem(id=1, subset='a', annotations=[ + Mask(image=bit(x, y, shape=[10, 10]), + label=self._label(VOC.VocLabel(3).name), + group=10 * y + x + 1 + ) + for y in range(10) for x in range(10) + ]), + ]) + + with TestDir() as test_dir: + self._test_save_and_load(TestExtractor(), + partial(VocSegmentationConverter.convert, label_map='voc'), + test_dir, target_dataset=DstExtractor()) + + def test_can_save_voc_layout(self): + class TestExtractor(TestExtractorBase): + def __iter__(self): + return iter([ + DatasetItem(id='a/b/1', subset='a', annotations=[ + Bbox(2, 3, 4, 5, label=2, id=1, group=1, + attributes={ + 'pose': VOC.VocPose(1).name, + 'truncated': True, + 'difficult': False, + 'occluded': False, + } + ), + Bbox(2, 3, 1, 1, label=self._label( + VOC.VocBodyPart(1).name), group=1), + Bbox(5, 4, 3, 2, label=self._label( + VOC.VocBodyPart(2).name), group=1), + ]), + ]) + + with TestDir() as test_dir: + self._test_save_and_load(TestExtractor(), + partial(VocLayoutConverter.convert, label_map='voc'), test_dir) + + def test_can_save_voc_action(self): + class TestExtractor(TestExtractorBase): + def __iter__(self): + return iter([ + DatasetItem(id='a/b/1', subset='a', annotations=[ + Bbox(2, 3, 4, 5, label=2, + attributes={ + 'truncated': True, + VOC.VocAction(1).name: True, + VOC.VocAction(2).name: True, + } + ), + Bbox(5, 4, 3, 2, label=self._label('person'), + attributes={ + 'truncated': True, + VOC.VocAction(1).name: True, + VOC.VocAction(2).name: True, + } + ), + ]), + ]) + + class DstExtractor(TestExtractorBase): + def __iter__(self): + return iter([ + DatasetItem(id='a/b/1', subset='a', annotations=[ + Bbox(2, 3, 4, 5, label=2, + id=1, group=1, attributes={ + 'truncated': True, + 'difficult': False, + 'occluded': False, + # no attributes here in the label categories + } + ), + Bbox(5, 4, 3, 2, label=self._label('person'), + id=2, group=2, attributes={ + 'truncated': True, + 'difficult': False, + 'occluded': False, + VOC.VocAction(1).name: True, + VOC.VocAction(2).name: True, + **{ + a.name: False for a in VOC.VocAction + if a.value not in {1, 2} + } + } + ), + ]), + ]) + + with TestDir() as test_dir: + self._test_save_and_load(TestExtractor(), + partial(VocActionConverter.convert, + label_map='voc', allow_attributes=False), test_dir, + target_dataset=DstExtractor()) + + def test_can_save_dataset_with_no_subsets(self): + class TestExtractor(TestExtractorBase): + def __iter__(self): + return iter([ + DatasetItem(id=1, annotations=[ + Label(2), + Label(3), + ]), + + DatasetItem(id=2, annotations=[ + Label(3), + ]), + ]) + + with TestDir() as test_dir: + self._test_save_and_load(TestExtractor(), + partial(VocConverter.convert, label_map='voc'), test_dir) + + def test_can_save_dataset_with_images(self): + class TestExtractor(TestExtractorBase): + def __iter__(self): + return iter([ + DatasetItem(id=1, subset='a', image=np.ones([4, 5, 3])), + DatasetItem(id=2, subset='a', image=np.ones([5, 4, 3])), + + DatasetItem(id=3, subset='b', image=np.ones([2, 6, 3])), + ]) + + with TestDir() as test_dir: + self._test_save_and_load(TestExtractor(), + partial(VocConverter.convert, label_map='voc', save_images=True), + test_dir) + + def test_dataset_with_voc_labelmap(self): + class SrcExtractor(TestExtractorBase): + def __iter__(self): + yield DatasetItem(id=1, annotations=[ + Bbox(2, 3, 4, 5, label=self._label('cat'), id=1), + Bbox(1, 2, 3, 4, label=self._label('non_voc_label'), id=2), + ]) + + def categories(self): + label_cat = LabelCategories() + label_cat.add(VOC.VocLabel.cat.name) + label_cat.add('non_voc_label') + return { + AnnotationType.label: label_cat, + } + + class DstExtractor(TestExtractorBase): + def __iter__(self): + yield DatasetItem(id=1, annotations=[ + # drop non voc label + Bbox(2, 3, 4, 5, label=self._label('cat'), id=1, group=1, + attributes={ + 'truncated': False, + 'difficult': False, + 'occluded': False, + } + ), + ]) + + def categories(self): + return VOC.make_voc_categories() + + with TestDir() as test_dir: + self._test_save_and_load(SrcExtractor(), + partial(VocConverter.convert, label_map='voc'), + test_dir, target_dataset=DstExtractor()) + + def test_dataset_with_source_labelmap_undefined(self): + class SrcExtractor(TestExtractorBase): + def __iter__(self): + yield DatasetItem(id=1, annotations=[ + Bbox(2, 3, 4, 5, label=0, id=1), + Bbox(1, 2, 3, 4, label=1, id=2), + ]) + + def categories(self): + label_cat = LabelCategories() + label_cat.add('Label_1') + label_cat.add('label_2') + return { + AnnotationType.label: label_cat, + } + + class DstExtractor(TestExtractorBase): + def __iter__(self): + yield DatasetItem(id=1, annotations=[ + Bbox(2, 3, 4, 5, label=self._label('Label_1'), + id=1, group=1, attributes={ + 'truncated': False, + 'difficult': False, + 'occluded': False, + } + ), + Bbox(1, 2, 3, 4, label=self._label('label_2'), + id=2, group=2, attributes={ + 'truncated': False, + 'difficult': False, + 'occluded': False, + } + ), + ]) + + def categories(self): + label_map = OrderedDict() + label_map['background'] = [None, [], []] + label_map['Label_1'] = [None, [], []] + label_map['label_2'] = [None, [], []] + return VOC.make_voc_categories(label_map) + + with TestDir() as test_dir: + self._test_save_and_load(SrcExtractor(), + partial(VocConverter.convert, label_map='source'), + test_dir, target_dataset=DstExtractor()) + + def test_dataset_with_source_labelmap_defined(self): + class SrcExtractor(TestExtractorBase): + def __iter__(self): + yield DatasetItem(id=1, annotations=[ + Bbox(2, 3, 4, 5, label=0, id=1), + Bbox(1, 2, 3, 4, label=2, id=2), + ]) + + def categories(self): + label_map = OrderedDict() + label_map['label_1'] = [(1, 2, 3), [], []] + label_map['background'] = [(0, 0, 0), [], []] # can be not 0 + label_map['label_2'] = [(3, 2, 1), [], []] + return VOC.make_voc_categories(label_map) + + class DstExtractor(TestExtractorBase): + def __iter__(self): + yield DatasetItem(id=1, annotations=[ + Bbox(2, 3, 4, 5, label=self._label('label_1'), + id=1, group=1, attributes={ + 'truncated': False, + 'difficult': False, + 'occluded': False, + } + ), + Bbox(1, 2, 3, 4, label=self._label('label_2'), + id=2, group=2, attributes={ + 'truncated': False, + 'difficult': False, + 'occluded': False, + } + ), + ]) + + def categories(self): + label_map = OrderedDict() + label_map['background'] = [(0, 0, 0), [], []] + label_map['label_1'] = [(1, 2, 3), [], []] + label_map['label_2'] = [(3, 2, 1), [], []] + return VOC.make_voc_categories(label_map) + + with TestDir() as test_dir: + self._test_save_and_load(SrcExtractor(), + partial(VocConverter.convert, label_map='source'), + test_dir, target_dataset=DstExtractor()) + + def test_dataset_with_fixed_labelmap(self): + class SrcExtractor(TestExtractorBase): + def __iter__(self): + yield DatasetItem(id=1, annotations=[ + Bbox(2, 3, 4, 5, label=self._label('foreign_label'), id=1), + Bbox(1, 2, 3, 4, label=self._label('label'), id=2, group=2, + attributes={'act1': True}), + Bbox(2, 3, 4, 5, label=self._label('label_part1'), group=2), + Bbox(2, 3, 4, 6, label=self._label('label_part2'), group=2), + ]) + + def categories(self): + label_cat = LabelCategories() + label_cat.add('foreign_label') + label_cat.add('label', attributes=['act1', 'act2']) + label_cat.add('label_part1') + label_cat.add('label_part2') + return { + AnnotationType.label: label_cat, + } + + label_map = OrderedDict([ + ('label', [None, ['label_part1', 'label_part2'], ['act1', 'act2']]) + ]) + + dst_label_map = OrderedDict([ + ('background', [None, [], []]), + ('label', [None, ['label_part1', 'label_part2'], ['act1', 'act2']]) + ]) + + class DstExtractor(TestExtractorBase): + def __iter__(self): + yield DatasetItem(id=1, annotations=[ + Bbox(1, 2, 3, 4, label=self._label('label'), id=1, group=1, + attributes={ + 'act1': True, + 'act2': False, + 'truncated': False, + 'difficult': False, + 'occluded': False, + } + ), + Bbox(2, 3, 4, 5, label=self._label('label_part1'), group=1), + Bbox(2, 3, 4, 6, label=self._label('label_part2'), group=1), + ]) + + def categories(self): + return VOC.make_voc_categories(dst_label_map) + + with TestDir() as test_dir: + self._test_save_and_load(SrcExtractor(), + partial(VocConverter.convert, label_map=label_map), + test_dir, target_dataset=DstExtractor()) + + def test_can_save_dataset_with_image_info(self): + class TestExtractor(TestExtractorBase): + def __iter__(self): + return iter([ + DatasetItem(id=1, image=Image(path='1.jpg', size=(10, 15))), + ]) + + with TestDir() as test_dir: + self._test_save_and_load(TestExtractor(), + partial(VocConverter.convert, label_map='voc'), test_dir) + + def test_relative_paths(self): + class TestExtractor(TestExtractorBase): + def __iter__(self): + return iter([ + DatasetItem(id='1', image=np.ones((4, 2, 3))), + DatasetItem(id='subdir1/1', image=np.ones((2, 6, 3))), + DatasetItem(id='subdir2/1', image=np.ones((5, 4, 3))), + ]) + + with TestDir() as test_dir: + self._test_save_and_load(TestExtractor(), + partial(VocConverter.convert, + label_map='voc', save_images=True), + test_dir) + + def test_can_save_attributes(self): + class TestExtractor(TestExtractorBase): + def __iter__(self): + return iter([ + DatasetItem(id='a', annotations=[ + Bbox(2, 3, 4, 5, label=2, + attributes={ 'occluded': True, 'x': 1, 'y': '2' } + ), + ]), + ]) + + class DstExtractor(TestExtractorBase): + def __iter__(self): + return iter([ + DatasetItem(id='a', annotations=[ + Bbox(2, 3, 4, 5, label=2, id=1, group=1, + attributes={ + 'truncated': False, + 'difficult': False, + 'occluded': True, + 'x': '1', 'y': '2', # can only read strings + } + ), + ]), + ]) + + with TestDir() as test_dir: + self._test_save_and_load(TestExtractor(), + partial(VocConverter.convert, label_map='voc'), test_dir, + target_dataset=DstExtractor()) diff --git a/tests/test_yolo_format.py b/tests/test_yolo_format.py new file mode 100644 index 0000000000..1f6425d1bc --- /dev/null +++ b/tests/test_yolo_format.py @@ -0,0 +1,140 @@ +import numpy as np +import os.path as osp + +from unittest import TestCase + +from datumaro.components.extractor import (Extractor, DatasetItem, + AnnotationType, Bbox, LabelCategories, +) +from datumaro.components.project import Project, Dataset +from datumaro.plugins.yolo_format.importer import YoloImporter +from datumaro.plugins.yolo_format.converter import YoloConverter +from datumaro.util.image import Image, save_image +from datumaro.util.test_utils import TestDir, compare_datasets + + +class YoloFormatTest(TestCase): + def test_can_save_and_load(self): + source_dataset = Dataset.from_iterable([ + DatasetItem(id=1, subset='train', image=np.ones((8, 8, 3)), + annotations=[ + Bbox(0, 2, 4, 2, label=2), + Bbox(0, 1, 2, 3, label=4), + ]), + DatasetItem(id=2, subset='train', image=np.ones((10, 10, 3)), + annotations=[ + Bbox(0, 2, 4, 2, label=2), + Bbox(3, 3, 2, 3, label=4), + Bbox(2, 1, 2, 3, label=4), + ]), + + DatasetItem(id=3, subset='valid', image=np.ones((8, 8, 3)), + annotations=[ + Bbox(0, 1, 5, 2, label=2), + Bbox(0, 2, 3, 2, label=5), + Bbox(0, 2, 4, 2, label=6), + Bbox(0, 7, 3, 2, label=7), + ]), + ], categories={ + AnnotationType.label: LabelCategories.from_iterable( + 'label_' + str(i) for i in range(10)), + }) + + with TestDir() as test_dir: + + YoloConverter.convert(source_dataset, test_dir, save_images=True) + parsed_dataset = YoloImporter()(test_dir).make_dataset() + + compare_datasets(self, source_dataset, parsed_dataset) + + def test_can_save_dataset_with_image_info(self): + source_dataset = Dataset.from_iterable([ + DatasetItem(id=1, subset='train', + image=Image(path='1.jpg', size=(10, 15)), + annotations=[ + Bbox(0, 2, 4, 2, label=2), + Bbox(3, 3, 2, 3, label=4), + ]), + ], categories={ + AnnotationType.label: LabelCategories.from_iterable( + 'label_' + str(i) for i in range(10)), + }) + + with TestDir() as test_dir: + + YoloConverter.convert(source_dataset, test_dir) + + save_image(osp.join(test_dir, 'obj_train_data', '1.jpg'), + np.ones((10, 15, 3))) # put the image for dataset + parsed_dataset = YoloImporter()(test_dir).make_dataset() + + compare_datasets(self, source_dataset, parsed_dataset) + + def test_can_load_dataset_with_exact_image_info(self): + source_dataset = Dataset.from_iterable([ + DatasetItem(id=1, subset='train', + image=Image(path='1.jpg', size=(10, 15)), + annotations=[ + Bbox(0, 2, 4, 2, label=2), + Bbox(3, 3, 2, 3, label=4), + ]), + ], categories={ + AnnotationType.label: LabelCategories.from_iterable( + 'label_' + str(i) for i in range(10)), + }) + + with TestDir() as test_dir: + + YoloConverter.convert(source_dataset, test_dir) + + parsed_dataset = YoloImporter()(test_dir, + image_info={'1': (10, 15)}).make_dataset() + + compare_datasets(self, source_dataset, parsed_dataset) + + def test_relative_paths(self): + source_dataset = Dataset.from_iterable([ + DatasetItem(id='1', subset='train', + image=np.ones((4, 2, 3))), + DatasetItem(id='subdir1/1', subset='train', + image=np.ones((2, 6, 3))), + DatasetItem(id='subdir2/1', subset='train', + image=np.ones((5, 4, 3))), + ], categories={ + AnnotationType.label: LabelCategories(), + }) + + for save_images in {True, False}: + with self.subTest(save_images=save_images): + with TestDir() as test_dir: + + YoloConverter.convert(source_dataset, test_dir, + save_images=save_images) + parsed_dataset = YoloImporter()(test_dir).make_dataset() + + compare_datasets(self, source_dataset, parsed_dataset) + + +DUMMY_DATASET_DIR = osp.join(osp.dirname(__file__), 'assets', 'yolo_dataset') + +class YoloImporterTest(TestCase): + def test_can_detect(self): + self.assertTrue(YoloImporter.detect(DUMMY_DATASET_DIR)) + + def test_can_import(self): + expected_dataset = Dataset.from_iterable([ + DatasetItem(id=1, subset='train', + image=np.ones((10, 15, 3)), + annotations=[ + Bbox(0, 2, 4, 2, label=2), + Bbox(3, 3, 2, 3, label=4), + ]), + ], categories={ + AnnotationType.label: LabelCategories.from_iterable( + 'label_' + str(i) for i in range(10)), + }) + + dataset = Project.import_from(DUMMY_DATASET_DIR, 'yolo') \ + .make_dataset() + + compare_datasets(self, expected_dataset, dataset)