Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add MARS (Import only) #585

Merged
merged 24 commits into from
Dec 28, 2021
Merged
Show file tree
Hide file tree
Changes from 18 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
151 changes: 81 additions & 70 deletions datumaro/plugins/market1501_format.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,137 +3,148 @@
# SPDX-License-Identifier: MIT

from distutils.util import strtobool
from itertools import chain
import os
import os.path as osp
import re

from datumaro.components.converter import Converter
from datumaro.components.extractor import DatasetItem, Importer, SourceExtractor
from datumaro.components.extractor import DatasetItem, Extractor, Importer
from datumaro.util.image import find_images


class Market1501Path:
QUERY_DIR = 'query'
BBOX_DIR = 'bounding_box_'
IMAGE_EXT = '.jpg'
PATTERN = re.compile(r'^(-?\d+)_c(\d+)(?:s\d+_\d+_00(.*))?')
PATTERN = re.compile(r'^(-?\d+)_c(\d+)s(\d+)_(\d+)_(\d+)(.*)')
LIST_PREFIX = 'images_'
UNKNOWN_ID = -1
ATTRIBUTES = ['person_id', 'camera_id', 'track_id', 'frame_id', 'bbox_id']

class Market1501Extractor(SourceExtractor):
def __init__(self, path, subset=None):
class Market1501Extractor(Extractor):
def __init__(self, path):
if not osp.isdir(path):
raise NotADirectoryError(
"Can't open folder with annotation files '%s'" % path)

if not subset:
subset = ''
for p in os.listdir(path):
pf = osp.join(path, p)
self._path = path
super().__init__()

if p.startswith(Market1501Path.BBOX_DIR) and osp.isdir(pf):
subset = p.replace(Market1501Path.BBOX_DIR, '')
break
subsets = {}
for p in os.listdir(path):
pf = osp.join(path, p)

if p.startswith(Market1501Path.LIST_PREFIX) and osp.isfile(pf):
subset = p.replace(Market1501Path.LIST_PREFIX, '')
subset = osp.splitext(subset)[0]
break
super().__init__(subset=subset)
if p.startswith(Market1501Path.BBOX_DIR) and osp.isdir(pf):
subset = p.replace(Market1501Path.BBOX_DIR, '')
subsets[subset] = pf

self._path = path
self._items = list(self._load_items(path).values())
if p.startswith(Market1501Path.LIST_PREFIX) and osp.isfile(pf):
subset = p.replace(Market1501Path.LIST_PREFIX, '')
subset = osp.splitext(subset)[0]
subsets[subset] = pf

if p.startswith(Market1501Path.QUERY_DIR) and osp.isdir(pf):
subset = Market1501Path.QUERY_DIR
subsets[subset] = pf

self._items = []
for subset, subset_path in subsets.items():
self._items.extend(list(
self._load_items(subset, subset_path).values()))

def __iter__(self):
yield from self._items

def _load_items(self, rootdir):
def _load_items(self, subset, subset_path):
items = {}

paths = []
anno_file = osp.join(rootdir,
Market1501Path.LIST_PREFIX + self._subset + '.txt')
if osp.isfile(anno_file):
with open(anno_file, encoding='utf-8') as f:
if osp.isfile(subset_path):
with open(subset_path, encoding='utf-8') as f:
for line in f:
paths.append(osp.join(rootdir, line.strip()))
paths.append(osp.join(self._path, line.strip()))
else:
paths = list(chain(
find_images(osp.join(rootdir,
Market1501Path.QUERY_DIR),
recursive=True),
find_images(osp.join(rootdir,
Market1501Path.BBOX_DIR + self._subset),
recursive=True),
))

for image_path in paths:
paths = list(find_images(subset_path, recursive=True))

for image_path in sorted(paths):
item_id = osp.splitext(osp.normpath(image_path))[0]
if osp.isabs(image_path):
item_id = osp.relpath(item_id, rootdir)
subdir, item_id = item_id.split(os.sep, maxsplit=1)
item_id = osp.relpath(item_id, self._path)
item_id = item_id.split(osp.sep, maxsplit=1)[1]

pid = Market1501Path.UNKNOWN_ID
camid = Market1501Path.UNKNOWN_ID
attributes = {}
search = Market1501Path.PATTERN.search(osp.basename(item_id))
if search:
pid, camid = map(int, search.groups()[0:2])
camid -= 1 # make ids 0-based
custom_name = search.groups()[2]
attribute_values = search.groups()[0:5]
attributes = {
'person_id': attribute_values[0],
'camera_id': int(attribute_values[1]) - 1,
'track_id': int(attribute_values[2]),
'frame_id': int(attribute_values[3]),
'bbox_id': int(attribute_values[4]),
'query': subset == Market1501Path.QUERY_DIR
}

custom_name = search.groups()[5]
if custom_name:
item_id = osp.join(osp.dirname(item_id), custom_name)

item = items.get(item_id)
if item is None:
item = DatasetItem(id=item_id, subset=self._subset,
image=image_path)
item = DatasetItem(id=item_id, subset=subset, image=image_path,
attributes=attributes)
items[item_id] = item

if pid != Market1501Path.UNKNOWN_ID or \
camid != Market1501Path.UNKNOWN_ID:
attributes = item.attributes
attributes['query'] = subdir == Market1501Path.QUERY_DIR
attributes['person_id'] = pid
attributes['camera_id'] = camid
return items

class Market1501Importer(Importer):
@classmethod
def find_sources(cls, path):
if not osp.isdir(path):
return []
return [{ 'url': path, 'format': Market1501Extractor.NAME }]
for dirname in os.listdir(path):
if dirname.startswith((Market1501Path.BBOX_DIR,
Market1501Path.QUERY_DIR, Market1501Path.LIST_PREFIX)):
return [{'url': path, 'format': Market1501Extractor.NAME}]

class Market1501Converter(Converter):
DEFAULT_IMAGE_EXT = Market1501Path.IMAGE_EXT

def _make_dir_name(self, item):
dirname = Market1501Path.BBOX_DIR + item.subset
query = item.attributes.get('query')
if query is not None and isinstance(query, str):
query = strtobool(query)
if query:
dirname = Market1501Path.QUERY_DIR
return dirname

def apply(self):
for subset_name, subset in self._extractor.subsets().items():
annotation = ''
used_frames = {}

for item in subset:
dirname = self._make_dir_name(item)

image_name = item.id
if Market1501Path.PATTERN.search(image_name) is None:
if 'person_id' in item.attributes and \
'camera_id' in item.attributes:
image_pattern = '{:04d}_c{}s1_000000_00{}'
pid = int(item.attributes['person_id'])
camid = int(item.attributes['camera_id']) + 1
dirname, basename = osp.split(item.id)
image_name = osp.join(dirname,
image_pattern.format(pid, camid, basename))

dirname = Market1501Path.BBOX_DIR + subset_name
if 'query' in item.attributes:
query = item.attributes.get('query')
if isinstance(query, str):
query = strtobool(query)
if query:
dirname = Market1501Path.QUERY_DIR
pid = item.attributes.get('person_id')
match = Market1501Path.PATTERN.fullmatch(item.id)
if not match and pid is not None:
zhiltsov-max marked this conversation as resolved.
Show resolved Hide resolved
cid = int(item.attributes.get('camera_id', 0)) + 1
tid = int(item.attributes.get('track_id', 1))
bbid = int(item.attributes.get('bbox_id', 0))
fid = int(item.attributes.get('frame_id',
max(used_frames.get((pid, cid, tid), [-1])) + 1))
image_name = f'{pid}_c{cid}s{tid}_{fid:06d}_{bbid:02d}'

image_path = self._make_image_filename(item,
name=image_name, subdir=dirname)
if self._save_images and item.has_image:
self._save_image(item, osp.join(self._save_dir, image_path))

attrs = Market1501Path.PATTERN.search(image_name)
if attrs:
attrs = attrs.groups()
used_frames.setdefault(attrs[0:2], []).append(int(attrs[3]))
annotation += '%s\n' % image_path

annotation_file = osp.join(self._save_dir,
Expand Down
115 changes: 115 additions & 0 deletions datumaro/plugins/mars_format.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,115 @@
# Copyright (C) 2020-2021 Intel Corporation
#
# SPDX-License-Identifier: MIT
import fnmatch
import glob
import logging as log
import os
import os.path as osp

from datumaro.components.annotation import (
AnnotationType, Label, LabelCategories,
)
from datumaro.components.dataset import DatasetItem
from datumaro.components.extractor import Extractor, Importer
from datumaro.components.format_detection import FormatDetectionContext
from datumaro.util.image import find_images


class MarsPath:
SUBSET_DIR_PATTERN = 'bbox_*'
IMAGE_DIR_PATTERNS = ['[0-9]' * 4, '00-1']
IMAGE_NAME_POSTFIX = 'C[0-9]' + 'T' + '[0-9]' * 4 \
+ 'F' + '[0-9]' * 3 + '.*'

class MarsExtractor(Extractor):
def __init__(self, path):
assert osp.isdir(path), path
super().__init__()

self._dataset_dir = path
self._subsets = {
subset_dir.split('_', maxsplit=1)[1]: osp.join(path, subset_dir)
for subset_dir in os.listdir(path)
if (osp.isdir(osp.join(path, subset_dir)) and
fnmatch.fnmatch(subset_dir, MarsPath.SUBSET_DIR_PATTERN))
}

self._categories = self._load_categories()
self._items = []
for subset, subset_path in self._subsets.items():
self._items.extend(self._load_items(subset, subset_path))

def __iter__(self):
yield from self._items

def categories(self):
return self._categories

def _load_categories(self):
dirs = sorted([dir_name for subset_path in self._subsets.values()
for dir_name in os.listdir(subset_path)
if (osp.isdir(osp.join(self._dataset_dir, subset_path, dir_name))
and any(fnmatch.fnmatch(dir_name, image_dir)
for image_dir in MarsPath.IMAGE_DIR_PATTERNS))
])
return {AnnotationType.label: LabelCategories.from_iterable(dirs)}

def _load_items(self, subset, path):
items = []
for label_cat in self._categories[AnnotationType.label]:
label = label_cat.name
label_id = self._categories[AnnotationType.label].find(label)[0]
for image_path in find_images(osp.join(path, label)):
image_name = osp.basename(image_path)
item_id = osp.splitext(image_name)[0]
pedestrian_id = image_name[0:4]

if not fnmatch.fnmatch(image_name,
label + MarsPath.IMAGE_NAME_POSTFIX):
items.append(DatasetItem(id=item_id, image=image_path))
continue

if pedestrian_id != label:
log.warning(f'The image {image_path} will be skip because'
'pedestrian id for it does not match with'
f'the directory name: {label}')
continue

items.append(DatasetItem(id=item_id, image=image_path,
subset=subset, annotations=[Label(label=label_id)],
attributes={'person_id': pedestrian_id,
'camera_id': int(image_name[5]),
'track_id': int(image_name[7:11]),
'frame_id': int(image_name[12:15])
})
)

return items

class MarsImporter(Importer):
@classmethod
def detect(cls, context: FormatDetectionContext):
with context.require_any():
for image_dir in MarsPath.IMAGE_DIR_PATTERNS:
with context.alternative():
context.require_file('/'.join([MarsPath.SUBSET_DIR_PATTERN,
image_dir, image_dir + MarsPath.IMAGE_NAME_POSTFIX]
))

@classmethod
def find_sources(cls, path):
patterns = ['/'.join((path, subset_dir, image_dir,
image_dir + MarsPath.IMAGE_NAME_POSTFIX))
for image_dir in MarsPath.IMAGE_DIR_PATTERNS
for subset_dir in os.listdir(path)
if (osp.isdir(osp.join(path, subset_dir)) and
fnmatch.fnmatch(subset_dir, MarsPath.SUBSET_DIR_PATTERN))
]

for pattern in patterns:
try:
next(glob.iglob(pattern))
return [{'url': path, 'format': 'mars'}]
except StopIteration:
continue
Loading