openvinotoolkit · zhiltsov-max · Dec 28, 2021 · Dec 17, 2021 · Dec 17, 2021 · Dec 17, 2021
@@ -3,137 +3,148 @@
 # SPDX-License-Identifier: MIT
 
 from distutils.util import strtobool
-from itertools import chain
 import os
 import os.path as osp
 import re
 
 from datumaro.components.converter import Converter
-from datumaro.components.extractor import DatasetItem, Importer, SourceExtractor
+from datumaro.components.extractor import DatasetItem, Extractor, Importer
 from datumaro.util.image import find_images
 
 
 class Market1501Path:
     QUERY_DIR = 'query'
     BBOX_DIR = 'bounding_box_'
     IMAGE_EXT = '.jpg'
-    PATTERN = re.compile(r'^(-?\d+)_c(\d+)(?:s\d+_\d+_00(.*))?')
+    PATTERN = re.compile(r'^(-?\d+)_c(\d+)s(\d+)_(\d+)_(\d+)(.*)')
     LIST_PREFIX = 'images_'
     UNKNOWN_ID = -1
+    ATTRIBUTES = ['person_id', 'camera_id', 'track_id', 'frame_id', 'bbox_id']
 
-class Market1501Extractor(SourceExtractor):
-    def __init__(self, path, subset=None):
+class Market1501Extractor(Extractor):
+    def __init__(self, path):
         if not osp.isdir(path):
             raise NotADirectoryError(
                 "Can't open folder with annotation files '%s'" % path)
 
-        if not subset:
-            subset = ''
-            for p in os.listdir(path):
-                pf = osp.join(path, p)
+        self._path = path
+        super().__init__()
 
-                if p.startswith(Market1501Path.BBOX_DIR) and osp.isdir(pf):
-                    subset = p.replace(Market1501Path.BBOX_DIR, '')
-                    break
+        subsets = {}
+        for p in os.listdir(path):
+            pf = osp.join(path, p)
 
-                if p.startswith(Market1501Path.LIST_PREFIX) and osp.isfile(pf):
-                    subset = p.replace(Market1501Path.LIST_PREFIX, '')
-                    subset = osp.splitext(subset)[0]
-                    break
-        super().__init__(subset=subset)
+            if p.startswith(Market1501Path.BBOX_DIR) and osp.isdir(pf):
+                subset = p.replace(Market1501Path.BBOX_DIR, '')
+                subsets[subset] = pf
 
-        self._path = path
-        self._items = list(self._load_items(path).values())
+            if p.startswith(Market1501Path.LIST_PREFIX) and osp.isfile(pf):
+                subset = p.replace(Market1501Path.LIST_PREFIX, '')
+                subset = osp.splitext(subset)[0]
+                subsets[subset] = pf
+
+            if p.startswith(Market1501Path.QUERY_DIR) and osp.isdir(pf):
+                subset = Market1501Path.QUERY_DIR
+                subsets[subset] = pf
+
+        self._items = []
+        for subset, subset_path in subsets.items():
+            self._items.extend(list(
+                self._load_items(subset, subset_path).values()))
+
+    def __iter__(self):
+        yield from self._items
 
-    def _load_items(self, rootdir):
+    def _load_items(self, subset, subset_path):
         items = {}
 
         paths = []
-        anno_file = osp.join(rootdir,
-            Market1501Path.LIST_PREFIX + self._subset + '.txt')
-        if osp.isfile(anno_file):
-            with open(anno_file, encoding='utf-8') as f:
+        if osp.isfile(subset_path):
+            with open(subset_path, encoding='utf-8') as f:
                 for line in f:
-                    paths.append(osp.join(rootdir, line.strip()))
+                    paths.append(osp.join(self._path, line.strip()))
         else:
-            paths = list(chain(
-                find_images(osp.join(rootdir,
-                        Market1501Path.QUERY_DIR),
-                    recursive=True),
-                find_images(osp.join(rootdir,
-                        Market1501Path.BBOX_DIR + self._subset),
-                    recursive=True),
-            ))
-
-        for image_path in paths:
+            paths = list(find_images(subset_path, recursive=True))
+
+        for image_path in sorted(paths):
             item_id = osp.splitext(osp.normpath(image_path))[0]
             if osp.isabs(image_path):
-                item_id = osp.relpath(item_id, rootdir)
-            subdir, item_id = item_id.split(os.sep, maxsplit=1)
+                item_id = osp.relpath(item_id, self._path)
+            item_id = item_id.split(osp.sep, maxsplit=1)[1]
 
-            pid = Market1501Path.UNKNOWN_ID
-            camid = Market1501Path.UNKNOWN_ID
+            attributes = {}
             search = Market1501Path.PATTERN.search(osp.basename(item_id))
             if search:
-                pid, camid = map(int, search.groups()[0:2])
-                camid -= 1 # make ids 0-based
-                custom_name = search.groups()[2]
+                attribute_values = search.groups()[0:5]
+                attributes = {
+                    'person_id': attribute_values[0],
+                    'camera_id': int(attribute_values[1]) - 1,
+                    'track_id': int(attribute_values[2]),
+                    'frame_id': int(attribute_values[3]),
+                    'bbox_id': int(attribute_values[4]),
+                    'query': subset == Market1501Path.QUERY_DIR
+                }
+
+                custom_name = search.groups()[5]
                 if custom_name:
                     item_id = osp.join(osp.dirname(item_id), custom_name)
 
             item = items.get(item_id)
             if item is None:
-                item = DatasetItem(id=item_id, subset=self._subset,
-                    image=image_path)
+                item = DatasetItem(id=item_id, subset=subset, image=image_path,
+                    attributes=attributes)
                 items[item_id] = item
 
-            if pid != Market1501Path.UNKNOWN_ID or \
-                    camid != Market1501Path.UNKNOWN_ID:
-                attributes = item.attributes
-                attributes['query'] = subdir == Market1501Path.QUERY_DIR
-                attributes['person_id'] = pid
-                attributes['camera_id'] = camid
         return items
 
 class Market1501Importer(Importer):
     @classmethod
     def find_sources(cls, path):
-        if not osp.isdir(path):
-            return []
-        return [{ 'url': path, 'format': Market1501Extractor.NAME }]
+        for dirname in os.listdir(path):
+            if dirname.startswith((Market1501Path.BBOX_DIR,
+                    Market1501Path.QUERY_DIR, Market1501Path.LIST_PREFIX)):
+                return [{'url': path, 'format': Market1501Extractor.NAME}]
 
 class Market1501Converter(Converter):
     DEFAULT_IMAGE_EXT = Market1501Path.IMAGE_EXT
 
+    def _make_dir_name(self, item):
+        dirname = Market1501Path.BBOX_DIR + item.subset
+        query = item.attributes.get('query')
+        if query is not None and isinstance(query, str):
+            query = strtobool(query)
+        if query:
+            dirname = Market1501Path.QUERY_DIR
+        return dirname
+
     def apply(self):
         for subset_name, subset in self._extractor.subsets().items():
             annotation = ''
+            used_frames = {}
 
             for item in subset:
+                dirname = self._make_dir_name(item)
+
                 image_name = item.id
-                if Market1501Path.PATTERN.search(image_name) is None:
-                    if 'person_id' in item.attributes and \
-                            'camera_id' in item.attributes:
-                        image_pattern = '{:04d}_c{}s1_000000_00{}'
-                        pid = int(item.attributes['person_id'])
-                        camid = int(item.attributes['camera_id']) + 1
-                        dirname, basename = osp.split(item.id)
-                        image_name = osp.join(dirname,
-                            image_pattern.format(pid, camid, basename))
-
-                dirname = Market1501Path.BBOX_DIR + subset_name
-                if 'query' in item.attributes:
-                    query = item.attributes.get('query')
-                    if isinstance(query, str):
-                        query = strtobool(query)
-                    if query:
-                        dirname = Market1501Path.QUERY_DIR
+                pid = item.attributes.get('person_id')
+                match = Market1501Path.PATTERN.fullmatch(item.id)
+                if not match and pid is not None:
+                    cid = int(item.attributes.get('camera_id', 0)) + 1
+                    tid = int(item.attributes.get('track_id', 1))
+                    bbid = int(item.attributes.get('bbox_id', 0))
+                    fid = int(item.attributes.get('frame_id',
+                        max(used_frames.get((pid, cid, tid), [-1])) + 1))
+                    image_name = f'{pid}_c{cid}s{tid}_{fid:06d}_{bbid:02d}'
 
                 image_path = self._make_image_filename(item,
                     name=image_name, subdir=dirname)
                 if self._save_images and item.has_image:
                     self._save_image(item, osp.join(self._save_dir, image_path))
 
+                attrs = Market1501Path.PATTERN.search(image_name)
+                if attrs:
+                    attrs = attrs.groups()
+                    used_frames.setdefault(attrs[0:2], []).append(int(attrs[3]))
                 annotation += '%s\n' % image_path
 
             annotation_file = osp.join(self._save_dir,

@@ -0,0 +1,115 @@
+# Copyright (C) 2020-2021 Intel Corporation
+#
+# SPDX-License-Identifier: MIT
+import fnmatch
+import glob
+import logging as log
+import os
+import os.path as osp
+
+from datumaro.components.annotation import (
+    AnnotationType, Label, LabelCategories,
+)
+from datumaro.components.dataset import DatasetItem
+from datumaro.components.extractor import Extractor, Importer
+from datumaro.components.format_detection import FormatDetectionContext
+from datumaro.util.image import find_images
+
+
+class MarsPath:
+    SUBSET_DIR_PATTERN = 'bbox_*'
+    IMAGE_DIR_PATTERNS = ['[0-9]' * 4, '00-1']
+    IMAGE_NAME_POSTFIX = 'C[0-9]' + 'T' + '[0-9]' * 4 \
+                         + 'F' + '[0-9]' * 3  + '.*'
+
+class MarsExtractor(Extractor):
+    def __init__(self, path):
+        assert osp.isdir(path), path
+        super().__init__()
+
+        self._dataset_dir = path
+        self._subsets = {
+            subset_dir.split('_', maxsplit=1)[1]: osp.join(path, subset_dir)
+            for subset_dir in os.listdir(path)
+            if (osp.isdir(osp.join(path, subset_dir)) and
+                fnmatch.fnmatch(subset_dir, MarsPath.SUBSET_DIR_PATTERN))
+        }
+
+        self._categories = self._load_categories()
+        self._items = []
+        for subset, subset_path in self._subsets.items():
+            self._items.extend(self._load_items(subset, subset_path))
+
+    def __iter__(self):
+        yield from self._items
+
+    def categories(self):
+        return self._categories
+
+    def _load_categories(self):
+        dirs = sorted([dir_name for subset_path in self._subsets.values()
+            for dir_name in os.listdir(subset_path)
+            if (osp.isdir(osp.join(self._dataset_dir, subset_path, dir_name))
+                and any(fnmatch.fnmatch(dir_name, image_dir)
+                    for image_dir in MarsPath.IMAGE_DIR_PATTERNS))
+        ])
+        return {AnnotationType.label: LabelCategories.from_iterable(dirs)}
+
+    def _load_items(self, subset, path):
+        items = []
+        for label_cat in self._categories[AnnotationType.label]:
+            label = label_cat.name
+            label_id = self._categories[AnnotationType.label].find(label)[0]
+            for image_path in find_images(osp.join(path, label)):
+                image_name = osp.basename(image_path)
+                item_id = osp.splitext(image_name)[0]
+                pedestrian_id = image_name[0:4]
+
+                if not fnmatch.fnmatch(image_name,
+                        label + MarsPath.IMAGE_NAME_POSTFIX):
+                    items.append(DatasetItem(id=item_id, image=image_path))
+                    continue
+
+                if pedestrian_id != label:
+                    log.warning(f'The image {image_path} will be skip because'
+                        'pedestrian id for it does not match with'
+                        f'the directory name: {label}')
+                    continue
+
+                items.append(DatasetItem(id=item_id, image=image_path,
+                    subset=subset, annotations=[Label(label=label_id)],
+                    attributes={'person_id': pedestrian_id,
+                        'camera_id': int(image_name[5]),
+                        'track_id': int(image_name[7:11]),
+                        'frame_id': int(image_name[12:15])
+                    })
+                )
+
+        return items
+
+class MarsImporter(Importer):
+    @classmethod
+    def detect(cls, context: FormatDetectionContext):
+        with context.require_any():
+            for image_dir in MarsPath.IMAGE_DIR_PATTERNS:
+                with context.alternative():
+                    context.require_file('/'.join([MarsPath.SUBSET_DIR_PATTERN,
+                        image_dir, image_dir + MarsPath.IMAGE_NAME_POSTFIX]
+                    ))
+
+    @classmethod
+    def find_sources(cls, path):
+        patterns = ['/'.join((path, subset_dir, image_dir,
+                image_dir + MarsPath.IMAGE_NAME_POSTFIX))
+            for image_dir in MarsPath.IMAGE_DIR_PATTERNS
+            for subset_dir in os.listdir(path)
+            if (osp.isdir(osp.join(path, subset_dir)) and
+                fnmatch.fnmatch(subset_dir, MarsPath.SUBSET_DIR_PATTERN))
+        ]
+
+        for pattern in patterns:
+            try:
+                next(glob.iglob(pattern))
+                return [{'url': path, 'format': 'mars'}]
+            except StopIteration:
+                continue