openvinotoolkit · zhiltsov-max · May 5, 2021 · Apr 28, 2021 · Apr 29, 2021 · Apr 29, 2021
@@ -11,6 +11,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - Support for escaping in attribiute values in LabelMe format (<https://github.com/openvinotoolkit/datumaro/issues/49>)
 - Support for Segmentation Splitting (<https://github.com/openvinotoolkit/datumaro/pull/223>)
 - Support for CIFAR-10/100 dataset format (<https://github.com/openvinotoolkit/datumaro/pull/225>)
+- Support import COCO panoptic and stuff format (<https://github.com/openvinotoolkit/datumaro/pull/210>)
 
 ### Changed
 - LabelMe format saves dataset items with their relative paths by subsets without changing names (<https://github.com/openvinotoolkit/datumaro/pull/200>)

@@ -124,7 +124,7 @@ CVAT annotations                             ---> Publication, statistics etc.
 [(Back to top)](#table-of-contents)
 
 - Dataset reading, writing, conversion in any direction. [Supported formats](docs/user_manual.md#supported-formats):
-  - [COCO](http://cocodataset.org/#format-data) (`image_info`, `instances`, `person_keypoints`, `captions`, `labels`*)
+  - [COCO](http://cocodataset.org/#format-data) (`image_info`, `instances`, `person_keypoints`, `captions`, `labels`, `panoptic`, `stuff`)
   - [PASCAL VOC](http://host.robots.ox.ac.uk/pascal/VOC/voc2012/htmldoc/index.html) (`classification`, `detection`, `segmentation`, `action_classification`, `person_layout`)
   - [YOLO](https://github.com/AlexeyAB/darknet#how-to-train-pascal-voc-data) (`bboxes`)
   - [TF Detection API](https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/using_your_own_dataset.md) (`bboxes`, `masks`)

@@ -5,6 +5,7 @@
 
 import json
 import logging as log
+import numpy as np
 import os
 import os.path as osp
 from enum import Enum
@@ -19,6 +20,7 @@
     _COORDINATE_ROUNDING_DIGITS, AnnotationType, Points)
 from datumaro.components.dataset import ItemStatus
 from datumaro.util import cast, find, str_to_bool
+from datumaro.util.image import save_image
 
 from .format import CocoPath, CocoTask
 
@@ -451,6 +453,75 @@ def save_annotations(self, item):
 
             self.annotations.append(elem)
 
+class _StuffConverter(_InstancesConverter):
+    pass
+
+class _PanopticConverter(_TaskConverter):
+    def write(self, path):
+        with open(path, 'w') as outfile:
+            json.dump(self._data, outfile)
+
+    def save_categories(self, dataset):
+        label_categories = dataset.categories().get(AnnotationType.label)
+        if label_categories is None:
+            return
+
+        for idx, cat in enumerate(label_categories.items):
+            self.categories.append({
+                'id': 1 + idx,
+                'name': cast(cat.name, str, ''),
+                'supercategory': cast(cat.parent, str, ''),
+            })
+
+    def save_annotations(self, item):
+        if not item.has_image:
+            return
+
+        ann_filename = item.id + CocoPath.PANOPTIC_EXT
+
+        def id2bgr(id_map):
+            id_map_copy = id_map.copy()
+            bgr_shape = tuple(list(id_map.shape) + [3])
+            bgr_map = np.zeros(bgr_shape, dtype=np.uint8)
+            for i in range(3):
+                bgr_map[..., 2-i] = id_map_copy % 256
+                id_map_copy //= 256
+            return bgr_map
+
+        segments_info = list()
+        masks = []
+        next_id = self._min_ann_id
+        for ann in item.annotations:
+            if ann.type != AnnotationType.mask:
+                continue
+
+            if not ann.id:
+                ann.id = next_id
+                next_id += 1
+
+            segment_info = {}
+            segment_info['id'] = ann.id
+            segment_info['category_id'] = cast(ann.label, int, -1) + 1
+            segment_info['area'] = float(ann.get_area())
+            segment_info['bbox'] = [float(p) for p in ann.get_bbox()]
+            segment_info['iscrowd'] = cast(ann.attributes.get("is_crowd"), int, 0)
+            segments_info.append(segment_info)
+            masks.append(ann)
+
+        if masks:
+            pan_format = mask_tools.merge_masks(
+                ((m.image, m.id) for m in masks),
+                start=np.zeros(item.image.size, dtype=np.uint32))
+            save_image(osp.join(self._context._segmentation_dir, ann_filename),
+                id2bgr(pan_format), create_dir=True)
+
+        elem = {
+            'image_id': self._get_image_id(item),
+            'file_name': ann_filename,
+            'segments_info': segments_info
+        }
+        self.annotations.append(elem)
+
 class CocoConverter(Converter):
     @staticmethod
     def _split_tasks_string(s):
@@ -497,6 +568,8 @@ def build_cmdline_parser(cls, **kwargs):
         CocoTask.person_keypoints: _KeypointsConverter,
         CocoTask.captions: _CaptionsConverter,
         CocoTask.labels: _LabelsConverter,
+        CocoTask.panoptic: _PanopticConverter,
+        CocoTask.stuff: _StuffConverter,
     }
 
     def __init__(self, extractor, save_dir,
@@ -541,6 +614,11 @@ def _make_dirs(self):
         self._ann_dir = osp.join(self._save_dir, CocoPath.ANNOTATIONS_DIR)
         os.makedirs(self._ann_dir, exist_ok=True)
 
+    def _make_segmentation_dir(self, subset_name):
+        self._segmentation_dir = osp.join(self._save_dir,
+            CocoPath.ANNOTATIONS_DIR, 'panoptic_'+ subset_name)
+        os.makedirs(self._segmentation_dir, exist_ok=True)
+
     def _make_task_converter(self, task):
         if task not in self._TASK_CONVERTER:
             raise NotImplementedError()
@@ -568,6 +646,8 @@ def apply(self):
             task_converters = self._make_task_converters()
             for task_conv in task_converters.values():
                 task_conv.save_categories(subset)
+            if CocoTask.panoptic in task_converters:
+                self._make_segmentation_dir(subset_name)
 
             for item in subset:
                 if self._save_images:
@@ -637,3 +717,14 @@ class CocoLabelsConverter(CocoConverter):
     def __init__(self, *args, **kwargs):
         kwargs['tasks'] = CocoTask.labels
         super().__init__(*args, **kwargs)
+
+class CocoPanopticConverter(CocoConverter):
+    def __init__(self, *args, **kwargs):
+        kwargs['tasks'] = CocoTask.panoptic
+        super().__init__(*args, **kwargs)
+
+class CocoStuffConverter(CocoConverter):
+    def __init__(self, *args, **kwargs):
+        kwargs['tasks'] = CocoTask.stuff
+        kwargs['segmentation_mode'] = SegmentationMode.mask
+        super().__init__(*args, **kwargs)
@@ -4,7 +4,9 @@
 # SPDX-License-Identifier: MIT
 
 from collections import OrderedDict
+import json
 import logging as log
+import numpy as np
 import os.path as osp
 
 from pycocotools.coco import COCO
@@ -15,7 +17,7 @@
     AnnotationType, Label, RleMask, Points, Polygon, Bbox, Caption,
     LabelCategories, PointsCategories
 )
-from datumaro.util.image import Image
+from datumaro.util.image import Image, load_image
 
 from .format import CocoTask, CocoPath
 
@@ -42,16 +44,24 @@ def __init__(self, path, task, merge_instance_polygons=False, subset=None):
 
         self._merge_instance_polygons = merge_instance_polygons
 
-        loader = self._make_subset_loader(path)
-        self._load_categories(loader)
-        self._items = list(self._load_items(loader).values())
+        if self._task == CocoTask.panoptic:
+            #panoptic is not added to pycocotools
+            panoptic_config = self._load_panoptic_config(path)
+            panoptic_images = osp.splitext(path)[0]
+
+            self._load_panoptic_categories(panoptic_config)
+            self._items = list(self._load_panoptic_items(panoptic_config,
+                panoptic_images).values())
+        else:
+            loader = self._make_subset_loader(path)
+            self._load_categories(loader)
+            self._items = list(self._load_items(loader).values())
 
     @staticmethod
     def _make_subset_loader(path):
         # COCO API has an 'unclosed file' warning
         coco_api = COCO()
         with open(path, 'r') as f:
-            import json
             dataset = json.load(f)
 
         coco_api.dataset = dataset
@@ -62,9 +72,7 @@ def _load_categories(self, loader):
         self._categories = {}
 
         if self._task in [CocoTask.instances, CocoTask.labels,
-                CocoTask.person_keypoints,
-                # TODO: Task.stuff, CocoTask.panoptic
-                ]:
+                CocoTask.person_keypoints, CocoTask.stuff]:
             label_categories, label_map = self._load_label_categories(loader)
             self._categories[AnnotationType.label] = label_categories
             self._label_map = label_map
@@ -100,6 +108,22 @@ def _load_person_kp_categories(self, loader):
 
         return categories
 
+    @staticmethod
+    def _load_panoptic_config(path):
+        with open(path, 'r') as f:
+            return json.load(f)
+
+    def _load_panoptic_categories(self, config):
+        label_categories = LabelCategories()
+        label_map = {}
+        for idx, cat in enumerate(config['categories']):
+            label_map[cat['id']] = idx
+            label_categories.add(name=cat['name'],
+                parent=cat.get('supercategory'))
+
+        self._categories[AnnotationType.label] = label_categories
+        self._label_map = label_map
+
     def _load_items(self, loader):
         items = OrderedDict()
 
@@ -124,6 +148,45 @@ def _load_items(self, loader):
 
         return items
 
+    def _load_panoptic_items(self, config, panoptic_images):
+        items = OrderedDict()
+
+        def bgr2id(color):
+            return color[:, :, 2] \
+                + 256 * color[:, :, 1] \
+                + 256 * 256 * color[:, :, 0]
+
+        imgs_info = {}
+        for img in config['images']:
+            imgs_info[img['id']] = img
+
+        for ann in config['annotations']:
+            img_id = int(ann['image_id'])
+            image_path = osp.join(self._images_dir, imgs_info[img_id]['file_name'])
+            image_size = (imgs_info[img_id].get('height'),
+                imgs_info[img_id].get('width'))
+            image = Image(path=image_path, size=image_size)
+            anns = list()
+
+            panoptic_image_path = osp.join(panoptic_images, ann['file_name'])
+            panoptic_format = load_image(panoptic_image_path, dtype=np.uint32)
+            pan = bgr2id(panoptic_format)
+            for segm_info in ann['segments_info']:
+                cat_id = segm_info['category_id']
+                segm_id = segm_info['id']
+                mask = pan == segm_id
+                rle = mask_utils.encode(np.asfortranarray(mask.astype('uint8')))
+                rle['counts'] = rle['counts'].decode('utf8')
+                attributes = {}
+                attributes['is_crowd'] = bool(segm_info['iscrowd'])
+                anns.append(RleMask(rle=rle, label=cat_id, id=segm_id,
+                    group=segm_id, attributes=attributes))
+
+            items[img_id] = DatasetItem(
+                id=img_id, subset=self._subset, image=image, annotations=anns,
+                attributes={'id': img_id})
+        return items
+
     def _get_label_id(self, ann):
         cat_id = ann.get('category_id')
         if cat_id in [0, None]:
@@ -147,7 +210,8 @@ def _load_annotations(self, ann, image_info=None):
 
         group = ann_id # make sure all tasks' annotations are merged
 
-        if self._task in [CocoTask.instances, CocoTask.person_keypoints]:
+        if self._task in [CocoTask.instances, CocoTask.person_keypoints,
+            CocoTask.stuff]:
             x, y, w, h = ann['bbox']
             label_id = self._get_label_id(ann)
 
@@ -250,3 +314,13 @@ class CocoLabelsExtractor(_CocoExtractor):
     def __init__(self, path, **kwargs):
         kwargs['task'] = CocoTask.labels
         super().__init__(path, **kwargs)
+
+class CocoPanopticExtractor(_CocoExtractor):
+    def __init__(self, path, **kwargs):
+        kwargs['task'] = CocoTask.panoptic
+        super().__init__(path, **kwargs)
+
+class CocoStuffExtractor(_CocoExtractor):
+    def __init__(self, path, **kwargs):
+        kwargs['task'] = CocoTask.stuff
+        super().__init__(path, **kwargs)
@@ -12,12 +12,13 @@
     'captions',
     'labels', # extension, does not exist in the original COCO format
     'image_info',
-    # 'panoptic',
-    # 'stuff',
+    'panoptic',
+    'stuff',
 ])
 
 class CocoPath:
     IMAGES_DIR = 'images'
     ANNOTATIONS_DIR = 'annotations'
 
     IMAGE_EXT = '.jpg'
+    PANOPTIC_EXT = '.png'
@@ -21,6 +21,8 @@ class CocoImporter(Importer):
         CocoTask.captions: 'coco_captions',
         CocoTask.labels: 'coco_labels',
         CocoTask.image_info: 'coco_image_info',
+        CocoTask.panoptic: 'coco_panoptic',
+        CocoTask.stuff: 'coco_stuff',
     }
 
     @classmethod
@@ -39,7 +41,8 @@ def __call__(self, path, **extra_params):
 
         # TODO: should be removed when proper label merging is implemented
         conflicting_types = {CocoTask.instances,
-            CocoTask.person_keypoints, CocoTask.labels}
+            CocoTask.person_keypoints, CocoTask.labels,
+            CocoTask.panoptic, CocoTask.stuff}
         ann_types = set(t for s in subsets.values() for t in s) \
             & conflicting_types
         if 1 <= len(ann_types):

@@ -131,11 +131,16 @@ def _convert_mask_object(self, obj):
             rle = mask_utils.encode(
                 np.require(obj.image, dtype=np.uint8, requirements='F'))
 
+        if isinstance(rle['counts'], str):
+           counts = rle['counts']
+        else:
+           counts = rle['counts'].decode('ascii')
+
         converted.update({
             'label_id': cast(obj.label, int),
             'rle': {
                 # serialize as compressed COCO mask
-                'counts': rle['counts'].decode('ascii'),
+                'counts': counts,
                 'size': list(int(c) for c in rle['size']),
             },
             'z_order': obj.z_order,

@@ -3,6 +3,7 @@
 #
 # SPDX-License-Identifier: MIT
 
+from itertools import chain
 import numpy as np
 
 from datumaro.util.image import lazy_image, load_image
@@ -279,7 +280,7 @@ def find_mask_bbox(mask):
     y0, y1 = np.where(rows)[0][[0, -1]]
     return [x0, y0, x1 - x0, y1 - y0]
 
-def merge_masks(masks):
+def merge_masks(masks, start=None):
     """
         Merges masks into one, mask order is responsible for z order.
         To avoid memory explosion on mask materialization, consider passing
@@ -288,6 +289,9 @@ def merge_masks(masks):
         Inputs: a sequence of index masks or (binary mask, index) pairs
         Outputs: an index mask
     """
+    if start is not None:
+        masks = chain([start], masks)
+
     it = iter(masks)
 
     try:

@@ -84,7 +84,7 @@ import datumaro
 ## Supported Formats
 
 List of supported formats:
-- MS COCO (`image_info`, `instances`, `person_keypoints`, `captions`, `labels`*)
+- MS COCO (`image_info`, `instances`, `person_keypoints`, `captions`, `labels`, `panoptic`, `stuff`)
   - [Format specification](http://cocodataset.org/#format-data)
   - [Dataset example](../tests/assets/coco_dataset)
   - `labels` are our extension - like `instances` with only `category_id`