Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Import for SYNTHIA dataset format #532

Merged
merged 27 commits into from
Nov 22, 2021
Merged
Show file tree
Hide file tree
Changes from 11 commits
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
848c471
import for SYNTHIA dataset format (bboxes, cuboids)
yasakova-anastasia Nov 1, 2021
74e6d27
some fixes
yasakova-anastasia Nov 3, 2021
ba195cd
add segmentation task
yasakova-anastasia Nov 8, 2021
25fe97f
fix pylint
yasakova-anastasia Nov 8, 2021
e115fd9
Add SYNTHIA to documentation
yasakova-anastasia Nov 9, 2021
473e318
Merge branch 'develop' into ay/add-synthia-format
yasakova-anastasia Nov 9, 2021
aaebcc2
Update Changelog
yasakova-anastasia Nov 9, 2021
2b5fd27
fix tests
yasakova-anastasia Nov 9, 2021
65c909c
some fixes
yasakova-anastasia Nov 9, 2021
04587c8
fix remark
yasakova-anastasia Nov 9, 2021
e165084
remove the format detection test
yasakova-anastasia Nov 9, 2021
dfcb676
fix documentation
yasakova-anastasia Nov 15, 2021
0774a82
fixes
yasakova-anastasia Nov 15, 2021
eeab585
resolve conflict
yasakova-anastasia Nov 15, 2021
59cc562
remove unused import
yasakova-anastasia Nov 15, 2021
219e6df
add import for semantic segmentation
yasakova-anastasia Nov 18, 2021
75419ac
some fixes
yasakova-anastasia Nov 18, 2021
76a93b4
Merge branch 'develop' into ay/add-synthia-format
yasakova-anastasia Nov 18, 2021
9b7950c
add detector
yasakova-anastasia Nov 18, 2021
4c4cc42
add more tests for detector
yasakova-anastasia Nov 18, 2021
5879ba5
add dynamic object attribute
yasakova-anastasia Nov 18, 2021
6c45b75
remove useless files
yasakova-anastasia Nov 18, 2021
5bc7825
fix documentation
yasakova-anastasia Nov 18, 2021
78059c8
fixes
yasakova-anastasia Nov 22, 2021
903bbea
small fix in documentation
yasakova-anastasia Nov 22, 2021
9aa4a74
small fix
yasakova-anastasia Nov 22, 2021
efd091a
fix extractor
yasakova-anastasia Nov 22, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
### Added
- Import for CelebA dataset format.
(<https://github.com/openvinotoolkit/datumaro/pull/484>)
- Import for SYNTHIA dataset format.
(<https://github.com/openvinotoolkit/datumaro/pull/532>)

### Changed
- File `people.txt` became optional in LFW
Expand Down
160 changes: 160 additions & 0 deletions datumaro/plugins/synthia_format.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,160 @@
# Copyright (C) 2021 Intel Corporation
#
# SPDX-License-Identifier: MIT

from collections import OrderedDict
import os.path as osp

import numpy as np

from datumaro.components.annotation import (
AnnotationType, LabelCategories, Mask, MaskCategories,
)
from datumaro.components.extractor import DatasetItem, Importer, SourceExtractor
from datumaro.util import find
from datumaro.util.image import find_images, load_image
from datumaro.util.mask_tools import generate_colormap


class SynthiaPath:
IMAGES_DIR = 'RGB'
LABELS_SEGM_DIR = 'GT/LABELS'

SynthiaLabelMap = OrderedDict([
IRDonch marked this conversation as resolved.
Show resolved Hide resolved
('Void', (0, 0, 0)),
('Sky', (128, 128, 128)),
('Building', (128, 0, 0)),
('Road', (128, 64, 128)),
('Sidewalk', (0, 0, 192)),
('Fence', (64, 64, 128)),
('Vegetation', (128, 128, 0)),
('Pole', (192, 192, 128)),
('Car', (64, 0, 128)),
('Truck', (0, 0, 70)),
('TrafficSign', (192, 128, 128)),
('Pedestrian', (64, 64, 0)),
('Bicycle', (0, 128, 192)),
('Lanemarking', (0, 172, 0)),
('TrafficLight', (0, 128, 128)),
])

def make_categories(label_map=None):
if label_map is None:
label_map = SynthiaLabelMap

# There must always be a label with color (0, 0, 0) at index 0
IRDonch marked this conversation as resolved.
Show resolved Hide resolved
bg_label = find(label_map.items(), lambda x: x[1] == (0, 0, 0))
if bg_label is not None:
bg_label = bg_label[0]
else:
bg_label = 'background'
if bg_label not in label_map:
IRDonch marked this conversation as resolved.
Show resolved Hide resolved
has_colors = any(v is not None for v in label_map.values())
color = (0, 0, 0) if has_colors else None
label_map[bg_label] = color
label_map.move_to_end(bg_label, last=False)

categories = {}
label_categories = LabelCategories()
for label in label_map:
label_categories.add(label)
categories[AnnotationType.label] = label_categories

has_colors = any(v is not None for v in label_map.values())
IRDonch marked this conversation as resolved.
Show resolved Hide resolved
if not has_colors: # generate new colors
colormap = generate_colormap(len(label_map))
else: # only copy defined colors
label_id = lambda label: label_categories.find(label)[0]
colormap = { label_id(name): (desc[0], desc[1], desc[2])
IRDonch marked this conversation as resolved.
Show resolved Hide resolved
for name, desc in label_map.items() }
mask_categories = MaskCategories(colormap)
mask_categories.inverse_colormap # pylint: disable=pointless-statement
categories[AnnotationType.mask] = mask_categories
return categories

def parse_label_map(path):
if not path:
return None
IRDonch marked this conversation as resolved.
Show resolved Hide resolved

label_map = OrderedDict()
with open(path, 'r', encoding='utf-8') as f:
for line in f:
# skip empty and commented lines
line = line.strip()
if not line or line and line[0] == '#':
IRDonch marked this conversation as resolved.
Show resolved Hide resolved
continue

# color, name
label_desc = line.strip().split()
IRDonch marked this conversation as resolved.
Show resolved Hide resolved

if 2 < len(label_desc):
name = label_desc[3]
color = tuple([int(c) for c in label_desc[:-1]])
IRDonch marked this conversation as resolved.
Show resolved Hide resolved
else:
name = label_desc[0]
color = None

if name in label_map:
raise ValueError("Label '%s' is already defined" % name)

label_map[name] = color
return label_map

class SynthiaExtractor(SourceExtractor):
def __init__(self, path):
if not osp.isdir(path):
raise FileNotFoundError("Can't read dataset directory '%s'" % path)

super().__init__()

self._categories = self._load_categories(path)
self._items = list(self._load_items(path).values())

def _load_categories(self, path):
label_map = None
IRDonch marked this conversation as resolved.
Show resolved Hide resolved
label_map_path = osp.join(path, 'labels.txt')
if osp.isfile(label_map_path):
label_map = parse_label_map(label_map_path)
else:
label_map = SynthiaLabelMap
self._labels = [label for label in label_map]
IRDonch marked this conversation as resolved.
Show resolved Hide resolved
return make_categories(label_map)

def _load_items(self, root_dir):
image_dir = osp.join(root_dir, 'RGB')
IRDonch marked this conversation as resolved.
Show resolved Hide resolved
if osp.isdir(image_dir):
images = {
osp.splitext(osp.relpath(p, image_dir))[0].replace('\\', '/'): p
for p in find_images(image_dir, recursive=True)
}
else:
images = {}

items = {}

gt_dir = osp.join(root_dir, SynthiaPath.LABELS_SEGM_DIR)
if osp.isdir(gt_dir):
gt_images = find_images(gt_dir, recursive=True)
for gt_img in gt_images:
item_id = osp.splitext(osp.relpath(gt_img, gt_dir))[0].replace('\\', '/')

anno = []
instances_mask = load_image(gt_img, dtype=np.uint16)[:,:,2]
IRDonch marked this conversation as resolved.
Show resolved Hide resolved
segm_ids = np.unique(instances_mask)
for segm_id in segm_ids:
anno.append(Mask(
image=self._lazy_extract_mask(instances_mask, segm_id),
label=segm_id))
IRDonch marked this conversation as resolved.
Show resolved Hide resolved

items[item_id] = DatasetItem(id=item_id, image=images[item_id],
annotations=anno)
return items

IRDonch marked this conversation as resolved.
Show resolved Hide resolved
@staticmethod
def _lazy_extract_mask(mask, c):
return lambda: mask == c

class SynthiaImporter(Importer):
@classmethod
def find_sources(cls, path):
return [{'url': path, 'format': 'synthia'}]
112 changes: 112 additions & 0 deletions site/content/en/docs/formats/synthia.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
---
title: 'SYNTHIA'
linkTitle: 'SYNTHIA'
description: ''
weight: 1
---

## Format specification

The original SYNTHIA dataset is available
[here](https://synthia-dataset.net).
IRDonch marked this conversation as resolved.
Show resolved Hide resolved

Supported annotation types:
- `Masks`
IRDonch marked this conversation as resolved.
Show resolved Hide resolved

## Import SYNTHIA dataset

A Datumaro project with a SYNTHIA source can be created in the following way:

```bash
datum create
datum import --format synthia <path/to/dataset>
```

It is also possible to import the dataset using Python API:

```python
from datumaro.components.dataset import Dataset

synthia_dataset = Dataset.import_from('<path/to/dataset>', 'synthia')
```

SYNTHIA dataset directory should have the following structure:

<!--lint disable fenced-code-flag-->
```
dataset/
├── GT/
│   └── LABELS/
│   ├── Stereo_Left/
│   │   ├── Omni_B/
│   │   │   ├── 000000.png
│   │   │   ├── 000001.png
│ | | └── ...
│   │   └── ...
│   └── Stereo_Right/
│   ├── Omni_B/
│   │   ├── 000000.png
│   │   └── 000001.png
│ | └── ...
│      └── ...
└── RGB/
   ├── Stereo_Left/
   │   ├── Omni_B/
   │   │   ├── 000000.png
   │   │   ├── 000001.png
| | └── ...
   │   └── ...
   └── Stereo_Right/
   ├── Omni_B/
   │   ├── 000000.png
   │   └── 000001.png
| └── ...
   └── ...
```

- RGB folder containing standard RGB images used for training.
IRDonch marked this conversation as resolved.
Show resolved Hide resolved
- GT/LABELS folder containing containing png files (one per image).
IRDonch marked this conversation as resolved.
Show resolved Hide resolved
Annotations are given in two channels. The first channel contains
IRDonch marked this conversation as resolved.
Show resolved Hide resolved
the class of that pixel (see the table below). The second channel
IRDonch marked this conversation as resolved.
Show resolved Hide resolved
contains the unique ID of the instance for those objects
that are dynamic (cars, pedestrians, etc.).

Also present in the original dataset:
IRDonch marked this conversation as resolved.
Show resolved Hide resolved
- GT/COLOR folder containing png files (one per image).
Annotations are given using a color representation.
- Depth folder containing unsigned short images. Depth is encoded
in any of the 3 channels in centimetres as an ushort.
But this information can be obtained from the instance segmentation.
IRDonch marked this conversation as resolved.
Show resolved Hide resolved


## Export to other formats

Datumaro can convert a SYNTHIA dataset into any other format [Datumaro supports](/docs/user-manual/supported_formats/).
To get the expected result, convert the dataset to a format
that supports segmentation masks.

There are several ways to convert a SYNTHIA dataset to other dataset
formats using CLI:

```bash
datum create
datum import -f synthia <path/to/dataset>
datum export -f voc -o <output/dir> -- --save-images
# or
datum convert -if synthia -i <path/to/dataset> \
-f voc -o <output/dir> -- --save-images
```

Or, using Python API:

```python
from datumaro.components.dataset import Dataset

dataset = Dataset.import_from('<path/to/dataset>', 'synthia')
dataset.export('save_dir', 'voc')
```

## Examples

Examples of using this format from the code can be found in
[the format tests](https://github.com/openvinotoolkit/datumaro/blob/develop/tests/test_synthia_format.py)
4 changes: 4 additions & 0 deletions site/content/en/docs/user-manual/supported_formats.md
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,10 @@ List of supported formats:
- [Format specification](https://docs.supervise.ly/data-organization/00_ann_format_navi)
- [Dataset example](https://github.com/openvinotoolkit/datumaro/tree/develop/tests/assets/sly_pointcloud_dataset)
- [Format documentation](/docs/formats/sly_pointcloud)
- SYNTHIA (`segmentation`)
- [Format specification](https://synthia-dataset.net/)
- [Dataset example](https://github.com/openvinotoolkit/datumaro/tree/develop/tests/assets/synthia_dataset)
- [Format documentation](/docs/formats/synthia)
- CVAT
- [Format specification](https://openvinotoolkit.github.io/cvat/docs/manual/advanced/xml_format)
- [Dataset example](https://github.com/openvinotoolkit/datumaro/tree/develop/tests/assets/cvat_dataset)
Expand Down
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
1 change: 1 addition & 0 deletions tests/requirements.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ class Requirements:
DATUM_283 = "Create cli tests for testing convert command for VOC format"
DATUM_399 = "Implement import for ADE20K dataset"
DATUM_475 = "Support import for CelebA dataset"
DATUM_497 = "Support import for SYNTHIA dataset"

# GitHub issues (bugs)
# https://github.com/openvinotoolkit/datumaro/issues
Expand Down
58 changes: 58 additions & 0 deletions tests/test_synthia_format.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@

from unittest import TestCase
import os.path as osp

import numpy as np

from datumaro.components.annotation import Mask
from datumaro.components.dataset import Dataset
from datumaro.components.extractor import DatasetItem
from datumaro.util.test_utils import compare_datasets
import datumaro.plugins.synthia_format as Synthia

from .requirements import Requirements, mark_requirement

DUMMY_DATASET_DIR = osp.join(osp.dirname(__file__), 'assets',
'synthia_dataset')

class SynthiaImporterTest(TestCase):
@mark_requirement(Requirements.DATUM_497)
def test_can_import(self):
IRDonch marked this conversation as resolved.
Show resolved Hide resolved
expected_dataset = Dataset.from_iterable([
DatasetItem(id='Stereo_Left/Omni_B/000000',
image=np.ones((1, 5, 3)),
annotations=[
Mask(np.array([[1, 1, 0, 0, 0]]), label=1),
Mask(np.array([[0, 0, 1, 1, 1]]), label=10),
],
),
DatasetItem(id='Stereo_Left/Omni_B/000001',
image=np.ones((1, 5, 3)),
annotations=[
Mask(np.array([[1, 0, 0, 0, 0]]), label=8),
Mask(np.array([[0, 1, 1, 0, 0]]), label=11),
Mask(np.array([[0, 0, 0, 1, 1]]), label=3),
],
),
DatasetItem(id='Stereo_Left/Omni_F/000000',
image=np.ones((1, 5, 3)),
annotations=[
Mask(np.array([[1, 1, 0, 0, 0]]), label=1),
Mask(np.array([[0, 0, 1, 1, 0]]), label=2),
Mask(np.array([[0, 0, 0, 0, 1]]), label=3),
],
),
DatasetItem(id='Stereo_Left/Omni_F/000001',
image=np.ones((1, 5, 3)),
annotations=[
Mask(np.array([[1, 0, 0, 0, 0]]), label=1),
Mask(np.array([[0, 1, 0, 0, 0]]), label=2),
Mask(np.array([[0, 0, 1, 1, 0]]), label=15),
Mask(np.array([[0, 0, 0, 0, 1]]), label=3),
],
)
], categories=Synthia.make_categories())

dataset = Dataset.import_from(DUMMY_DATASET_DIR, 'synthia')

compare_datasets(self, expected_dataset, dataset, require_images=True)