diff --git a/CHANGELOG.md b/CHANGELOG.md index 7a2c0d16aa..1d5ccb48e1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,6 +12,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 () - Extension autodetection in `ByteImage` () +- Add MARS format (import only) + () ### Changed - `smooth_line` from `datumaro.util.annotation_util` - the function diff --git a/datumaro/plugins/market1501_format.py b/datumaro/plugins/market1501_format.py index 4a52329972..c4bd141c36 100644 --- a/datumaro/plugins/market1501_format.py +++ b/datumaro/plugins/market1501_format.py @@ -3,13 +3,12 @@ # SPDX-License-Identifier: MIT from distutils.util import strtobool -from itertools import chain import os import os.path as osp import re from datumaro.components.converter import Converter -from datumaro.components.extractor import DatasetItem, Importer, SourceExtractor +from datumaro.components.extractor import DatasetItem, Extractor, Importer from datumaro.util.image import find_images @@ -17,123 +16,136 @@ class Market1501Path: QUERY_DIR = 'query' BBOX_DIR = 'bounding_box_' IMAGE_EXT = '.jpg' - PATTERN = re.compile(r'^(-?\d+)_c(\d+)(?:s\d+_\d+_00(.*))?') + PATTERN = re.compile(r'^(-?\d+)_c(\d+)s(\d+)_(\d+)_(\d+)(.*)') LIST_PREFIX = 'images_' UNKNOWN_ID = -1 + ATTRIBUTES = ['person_id', 'camera_id', 'track_id', 'frame_id', 'bbox_id'] -class Market1501Extractor(SourceExtractor): - def __init__(self, path, subset=None): +class Market1501Extractor(Extractor): + def __init__(self, path): if not osp.isdir(path): raise NotADirectoryError( "Can't open folder with annotation files '%s'" % path) - if not subset: - subset = '' - for p in os.listdir(path): - pf = osp.join(path, p) + self._path = path + super().__init__() - if p.startswith(Market1501Path.BBOX_DIR) and osp.isdir(pf): - subset = p.replace(Market1501Path.BBOX_DIR, '') - break + subsets = {} + for p in os.listdir(path): + pf = osp.join(path, p) - if p.startswith(Market1501Path.LIST_PREFIX) and osp.isfile(pf): - subset = p.replace(Market1501Path.LIST_PREFIX, '') - subset = osp.splitext(subset)[0] - break - super().__init__(subset=subset) + if p.startswith(Market1501Path.BBOX_DIR) and osp.isdir(pf): + subset = p.replace(Market1501Path.BBOX_DIR, '') + subsets[subset] = pf - self._path = path - self._items = list(self._load_items(path).values()) + if p.startswith(Market1501Path.LIST_PREFIX) and osp.isfile(pf): + subset = p.replace(Market1501Path.LIST_PREFIX, '') + subset = osp.splitext(subset)[0] + subsets[subset] = pf + + if p.startswith(Market1501Path.QUERY_DIR) and osp.isdir(pf): + subset = Market1501Path.QUERY_DIR + subsets[subset] = pf + + self._items = [] + for subset, subset_path in subsets.items(): + self._items.extend(list( + self._load_items(subset, subset_path).values())) + + def __iter__(self): + yield from self._items - def _load_items(self, rootdir): + def _load_items(self, subset, subset_path): items = {} paths = [] - anno_file = osp.join(rootdir, - Market1501Path.LIST_PREFIX + self._subset + '.txt') - if osp.isfile(anno_file): - with open(anno_file, encoding='utf-8') as f: + if osp.isfile(subset_path): + with open(subset_path, encoding='utf-8') as f: for line in f: - paths.append(osp.join(rootdir, line.strip())) + paths.append(osp.join(self._path, line.strip())) else: - paths = list(chain( - find_images(osp.join(rootdir, - Market1501Path.QUERY_DIR), - recursive=True), - find_images(osp.join(rootdir, - Market1501Path.BBOX_DIR + self._subset), - recursive=True), - )) - - for image_path in paths: + paths = list(find_images(subset_path, recursive=True)) + + for image_path in sorted(paths): item_id = osp.splitext(osp.normpath(image_path))[0] if osp.isabs(image_path): - item_id = osp.relpath(item_id, rootdir) - subdir, item_id = item_id.split(os.sep, maxsplit=1) + item_id = osp.relpath(item_id, self._path) + item_id = item_id.split(osp.sep, maxsplit=1)[1] - pid = Market1501Path.UNKNOWN_ID - camid = Market1501Path.UNKNOWN_ID + attributes = {} search = Market1501Path.PATTERN.search(osp.basename(item_id)) if search: - pid, camid = map(int, search.groups()[0:2]) - camid -= 1 # make ids 0-based - custom_name = search.groups()[2] + attribute_values = search.groups()[0:5] + attributes = { + 'person_id': attribute_values[0], + 'camera_id': int(attribute_values[1]) - 1, + 'track_id': int(attribute_values[2]), + 'frame_id': int(attribute_values[3]), + 'bbox_id': int(attribute_values[4]), + 'query': subset == Market1501Path.QUERY_DIR + } + + custom_name = search.groups()[5] if custom_name: item_id = osp.join(osp.dirname(item_id), custom_name) item = items.get(item_id) if item is None: - item = DatasetItem(id=item_id, subset=self._subset, - image=image_path) + item = DatasetItem(id=item_id, subset=subset, image=image_path, + attributes=attributes) items[item_id] = item - if pid != Market1501Path.UNKNOWN_ID or \ - camid != Market1501Path.UNKNOWN_ID: - attributes = item.attributes - attributes['query'] = subdir == Market1501Path.QUERY_DIR - attributes['person_id'] = pid - attributes['camera_id'] = camid return items class Market1501Importer(Importer): @classmethod def find_sources(cls, path): - if not osp.isdir(path): - return [] - return [{ 'url': path, 'format': Market1501Extractor.NAME }] + for dirname in os.listdir(path): + if dirname.startswith((Market1501Path.BBOX_DIR, + Market1501Path.QUERY_DIR, Market1501Path.LIST_PREFIX)): + return [{'url': path, 'format': Market1501Extractor.NAME}] class Market1501Converter(Converter): DEFAULT_IMAGE_EXT = Market1501Path.IMAGE_EXT + def _make_dir_name(self, item): + dirname = Market1501Path.BBOX_DIR + item.subset + query = item.attributes.get('query') + if query is not None and isinstance(query, str): + query = strtobool(query) + if query: + dirname = Market1501Path.QUERY_DIR + return dirname + def apply(self): for subset_name, subset in self._extractor.subsets().items(): annotation = '' + used_frames = {} for item in subset: + dirname = self._make_dir_name(item) + image_name = item.id - if Market1501Path.PATTERN.search(image_name) is None: - if 'person_id' in item.attributes and \ - 'camera_id' in item.attributes: - image_pattern = '{:04d}_c{}s1_000000_00{}' - pid = int(item.attributes['person_id']) - camid = int(item.attributes['camera_id']) + 1 - dirname, basename = osp.split(item.id) - image_name = osp.join(dirname, - image_pattern.format(pid, camid, basename)) - - dirname = Market1501Path.BBOX_DIR + subset_name - if 'query' in item.attributes: - query = item.attributes.get('query') - if isinstance(query, str): - query = strtobool(query) - if query: - dirname = Market1501Path.QUERY_DIR + pid = item.attributes.get('person_id') + match = Market1501Path.PATTERN.fullmatch(item.id) + if not match and pid: + cid = int(item.attributes.get('camera_id', 0)) + 1 + tid = int(item.attributes.get('track_id', 1)) + bbid = int(item.attributes.get('bbox_id', 0)) + fid = int(item.attributes.get('frame_id', + max(used_frames.get((pid, cid, tid), [-1])) + 1)) + image_name = osp.join(osp.dirname(image_name), + f'{pid}_c{cid}s{tid}_{fid:06d}_{bbid:02d}') image_path = self._make_image_filename(item, name=image_name, subdir=dirname) if self._save_images and item.has_image: self._save_image(item, osp.join(self._save_dir, image_path)) + attrs = Market1501Path.PATTERN.search(image_name) + if attrs: + attrs = attrs.groups() + used_frames.setdefault(attrs[0:2], []).append(int(attrs[3])) annotation += '%s\n' % image_path annotation_file = osp.join(self._save_dir, diff --git a/datumaro/plugins/mars_format.py b/datumaro/plugins/mars_format.py new file mode 100644 index 0000000000..1822133808 --- /dev/null +++ b/datumaro/plugins/mars_format.py @@ -0,0 +1,115 @@ +# Copyright (C) 2020-2021 Intel Corporation +# +# SPDX-License-Identifier: MIT +import fnmatch +import glob +import logging as log +import os +import os.path as osp + +from datumaro.components.annotation import ( + AnnotationType, Label, LabelCategories, +) +from datumaro.components.dataset import DatasetItem +from datumaro.components.extractor import Extractor, Importer +from datumaro.components.format_detection import FormatDetectionContext +from datumaro.util.image import find_images + + +class MarsPath: + SUBSET_DIR_PATTERN = 'bbox_*' + IMAGE_DIR_PATTERNS = ['[0-9]' * 4, '00-1'] + IMAGE_NAME_POSTFIX = 'C[0-9]' + 'T' + '[0-9]' * 4 \ + + 'F' + '[0-9]' * 3 + '.*' + +class MarsExtractor(Extractor): + def __init__(self, path): + assert osp.isdir(path), path + super().__init__() + + self._dataset_dir = path + self._subsets = { + subset_dir.split('_', maxsplit=1)[1]: osp.join(path, subset_dir) + for subset_dir in os.listdir(path) + if (osp.isdir(osp.join(path, subset_dir)) and + fnmatch.fnmatch(subset_dir, MarsPath.SUBSET_DIR_PATTERN)) + } + + self._categories = self._load_categories() + self._items = [] + for subset, subset_path in self._subsets.items(): + self._items.extend(self._load_items(subset, subset_path)) + + def __iter__(self): + yield from self._items + + def categories(self): + return self._categories + + def _load_categories(self): + dirs = sorted([dir_name for subset_path in self._subsets.values() + for dir_name in os.listdir(subset_path) + if (osp.isdir(osp.join(self._dataset_dir, subset_path, dir_name)) + and any(fnmatch.fnmatch(dir_name, image_dir) + for image_dir in MarsPath.IMAGE_DIR_PATTERNS)) + ]) + return {AnnotationType.label: LabelCategories.from_iterable(dirs)} + + def _load_items(self, subset, path): + items = [] + for label_cat in self._categories[AnnotationType.label]: + label = label_cat.name + label_id = self._categories[AnnotationType.label].find(label)[0] + for image_path in find_images(osp.join(path, label)): + image_name = osp.basename(image_path) + item_id = osp.splitext(image_name)[0] + pedestrian_id = image_name[0:4] + + if not fnmatch.fnmatch(image_name, + label + MarsPath.IMAGE_NAME_POSTFIX): + items.append(DatasetItem(id=item_id, image=image_path)) + continue + + if pedestrian_id != label: + log.warning(f'The image {image_path} will be skip because' + 'pedestrian id for it does not match with' + f'the directory name: {label}') + continue + + items.append(DatasetItem(id=item_id, image=image_path, + subset=subset, annotations=[Label(label=label_id)], + attributes={'person_id': pedestrian_id, + 'camera_id': int(image_name[5]), + 'track_id': int(image_name[7:11]), + 'frame_id': int(image_name[12:15]) + }) + ) + + return items + +class MarsImporter(Importer): + @classmethod + def detect(cls, context: FormatDetectionContext): + with context.require_any(): + for image_dir in MarsPath.IMAGE_DIR_PATTERNS: + with context.alternative(): + context.require_file('/'.join([MarsPath.SUBSET_DIR_PATTERN, + image_dir, image_dir + MarsPath.IMAGE_NAME_POSTFIX] + )) + + @classmethod + def find_sources(cls, path): + patterns = ['/'.join((path, subset_dir, image_dir, + image_dir + MarsPath.IMAGE_NAME_POSTFIX)) + for image_dir in MarsPath.IMAGE_DIR_PATTERNS + for subset_dir in os.listdir(path) + if (osp.isdir(osp.join(path, subset_dir)) and + fnmatch.fnmatch(subset_dir, MarsPath.SUBSET_DIR_PATTERN)) + ] + + for pattern in patterns: + try: + next(glob.iglob(pattern)) + return [{'url': path, 'format': 'mars'}] + except StopIteration: + continue diff --git a/site/content/en/docs/formats/market1501.md b/site/content/en/docs/formats/market1501.md new file mode 100644 index 0000000000..f1c3e14c37 --- /dev/null +++ b/site/content/en/docs/formats/market1501.md @@ -0,0 +1,103 @@ +--- +title: 'Market-1501' +linkTitle: 'Market-1501' +description: '' +weight: 14 +--- + +## Format specification + +Market-1501 is a dataset for person re-identification task, link +for downloading this dataset is available +[here](https://zheng-lab.cecs.anu.edu.au/Project/project_reid.html). + +Supported items attributes: +- `person_id` (str): four-digit number that represent ID of pedestrian; +- `camera_id` (int): one-digit number that represent ID of camera that took + the image (original dataset has totally 6 cameras); +- `track_id` (int): one-digit number that represent ID of the track with + the particular pedestrian, this attribute matches with `sequence_id` + in the original dataset; +- `frame_id` (int): six-digit number, that mean number of + frame within this track. For the tracks, their names are accumulated + for each ID, but for frames, they start from "0001" in each track; +- `bbox_id` (int): two-digit number, that mean number of + bounding bbox that was selected for that image + (see the + [original docs](https://zheng-lab.cecs.anu.edu.au/Project/project_reid.html) + for more info). + +These item attributes decodes into the image name with such convention: +``` +0000_c1s1_000000_00.jpg +``` +- first four digits indicate the `person_id`; +- digit after `c` indicates the `camera_id`; +- digit after `s` indicate the `track_id`; +- six digits after `s1_` indicate the `frame_id`; +- the last two digits before `.jpg` indicate the `bbox_id`. + +## Import Market-1501 dataset + +Importing of Market-1501 dataset into the Datumaro project: +``` +datum create +datum import -f market1501 +``` +See more information about adding datasets to the project in the +[docs](/docs/user-manual/command-reference/sources/#source-add). + +Or you can import Market-1501 using Python API: + +```python +from datumaro.components.dataset import Dataset +dataset = Dataset.import_from('', 'market1501') +``` + + +For successful importing the Market-1501 dataset, the directory with it +should has the following structure: + +``` +market1501_dataset/ +├── query # optional directory with query image +│ ├── 0001_c1s1_001051_00.jpg +│ ├── 0002_c1s1_001051_00.jpg +│ ├── ... +├── bounding_box_ +│ ├── 0003_c1s1_001051_00.jpg +│ ├── 0003_c2s1_001054_01.jpg +│ ├── 0004_c1s1_001051_00.jpg +│ ├── ... +├── bounding_box_ +│ ├── 0005_c1s1_001051_00.jpg +│ ├── 0006_c1s1_001051_00.jpg +│ ├── ... +├── ... +``` + +## Export dataset to the Market-1501 format + +With Datumaro you can export dataset, that has `person_id` item attribute, +to the Market-1501 format, example: + +``` +# Converting MARS dataset into the Market-1501 +datum convert -if mars -i ./mars_dataset \ + -f market1501 -o ./output_dir + +# Export dataaset to the Market-1501 format through the Datumaro project: +datum create +datum add -f mars ../mars +datum export -f market1501 -o ./output_dir -- --save-images --image-ext png +``` + +> Note: if your dataset contains only person_id attributes Datumaro +> will assign default values for other attributes (camera_id, track_id, bbox_id) +> and increment frame_id for collisions. + +Available extra export options for Market-1501 dataset format: +- `--save-images` allow to export dataset with saving images. + (by default `False`) +- `--image-ext IMAGE_EXT` allow to specify image extension + for exporting dataset (by default - keep original) diff --git a/site/content/en/docs/formats/mars.md b/site/content/en/docs/formats/mars.md new file mode 100644 index 0000000000..4b0e171edc --- /dev/null +++ b/site/content/en/docs/formats/mars.md @@ -0,0 +1,74 @@ +--- +title: 'MARS' +linkTitle: 'MARS' +description: '' +weight: 14 +--- + +## Format specification + +MARS is a dataset for the motion analysis and person identification task. +MARS dataset is available for downloading +[here](https://zheng-lab.cecs.anu.edu.au/Project/project_mars.html) + +Supported types of annotations: +- `Bbox` + +Required attributes: +- `person_id` (str): four-digit number that represent ID of pedestrian; +- `camera_id` (int): one-digit number that represent ID of camera that took + the image (original dataset has totally 6 cameras); +- `track_id` (int): four-digit number that represent ID of the track with + the particular pedestrian; +- `frame_id` (int): three-digit number, that mean number of + frame within this track. For the tracks, their names are accumulated + for each ID, but for frames, they start from "0001" in each track. + + +## Import MARS dataset + +Use these instructions to import MARS dataset into Datumaro project: + +```bash +datum create +datum add -f mars ./dataset +``` + +> Note: the directory with dataset should be subdirectory of the +> project directory. + +``` +mars_dataset +├── +│ ├── 0001 # directory with images of pedestrian with id 0001 +│ │ ├── 0001C1T0001F001.jpg +│ │ ├── 0001C1T0001F002.jpg +│ │ ├── ... +│ ├── 0002 # directory with images of pedestrian with id 0002 +│ │ ├── 0002C1T0001F001.jpg +│ │ ├── 0002C1T0001F001.jpg +│ │ ├── ... +│ ├── 0000 # distractors images, which negatively affect retrieval accuracy. +│ │ ├── 0000C1T0001F001.jpg +│ │ ├── 0000C1T0001F001.jpg +│ │ ├── ... +│ ├── 00-1 # junk images which do not affect retrieval accuracy +│ │ ├── 00-1C1T0001F001.jpg +│ │ ├── 00-1C1T0001F001.jpg +│ │ ├── ... +├── +│ ├── ... +├── ... +``` + +All images in MARS dataset follow a strict convention of naming: +``` +xxxxCxTxxxxFxxx.jpg +``` +- the first four digits indicate the pedestrian's number; +- digit after `C` indicates the camera id; +- four digits after `T` indicate the track id for this pedestrian; +- three digits after `F` indicate the frame id with this track. + +> Note: there are two specific pedestrian IDs 0000 and 00-1 +> which indicate distracting images and unwanted images respectively. diff --git a/tests/assets/market1501_dataset/bounding_box_train/0002_c1s3_000151_00.jpg b/tests/assets/market1501_dataset/bounding_box_train/0002_c1s3_000151_00.jpg new file mode 100644 index 0000000000..92e4057cfb Binary files /dev/null and b/tests/assets/market1501_dataset/bounding_box_train/0002_c1s3_000151_00.jpg differ diff --git a/tests/assets/mars_dataset/bbox_test/00-1/00-1C2T0081F201.jpg b/tests/assets/mars_dataset/bbox_test/00-1/00-1C2T0081F201.jpg new file mode 100644 index 0000000000..b99ff40ee4 Binary files /dev/null and b/tests/assets/mars_dataset/bbox_test/00-1/00-1C2T0081F201.jpg differ diff --git a/tests/assets/mars_dataset/bbox_train/0000/0000C6T0101F001.jpg b/tests/assets/mars_dataset/bbox_train/0000/0000C6T0101F001.jpg new file mode 100644 index 0000000000..b99ff40ee4 Binary files /dev/null and b/tests/assets/mars_dataset/bbox_train/0000/0000C6T0101F001.jpg differ diff --git a/tests/assets/mars_dataset/bbox_train/0001/0001C1T0001F001.jpg b/tests/assets/mars_dataset/bbox_train/0001/0001C1T0001F001.jpg new file mode 100644 index 0000000000..b99ff40ee4 Binary files /dev/null and b/tests/assets/mars_dataset/bbox_train/0001/0001C1T0001F001.jpg differ diff --git a/tests/test_market1501_format.py b/tests/test_market1501_format.py index e88ae5e4e3..d2751d54a1 100644 --- a/tests/test_market1501_format.py +++ b/tests/test_market1501_format.py @@ -20,28 +20,19 @@ class Market1501FormatTest(TestCase): def test_can_save_and_load(self): source_dataset = Dataset.from_iterable([ DatasetItem(id='0001_c2s3_000001_00', - subset='test', image=np.ones((2, 5, 3)), - attributes = { - 'camera_id': 1, - 'person_id': 1, - 'query': True - } + subset='query', image=np.ones((2, 5, 3)), + attributes = {'camera_id': 1, 'person_id': '0001', 'track_id': 3, + 'frame_id': 1, 'bbox_id': 0, 'query': True} ), DatasetItem(id='0002_c4s2_000002_00', subset='test', image=np.ones((2, 5, 3)), - attributes = { - 'camera_id': 3, - 'person_id': 2, - 'query': False - } + attributes = {'camera_id': 3, 'person_id': '0002', 'track_id': 2, + 'frame_id': 2, 'bbox_id': 0, 'query': False} ), DatasetItem(id='0001_c1s1_000003_00', subset='test', image=np.ones((2, 5, 3)), - attributes = { - 'camera_id': 0, - 'person_id': 1, - 'query': False - } + attributes = {'camera_id': 0, 'person_id': '0001', 'track_id': 1, + 'frame_id': 3, 'bbox_id': 0, 'query': False} ), ]) @@ -56,11 +47,8 @@ def test_can_save_dataset_with_no_subsets(self): source_dataset = Dataset.from_iterable([ DatasetItem(id='0001_c2s3_000001_00', image=np.ones((2, 5, 3)), - attributes = { - 'camera_id': 1, - 'person_id': 1, - 'query': True - } + attributes = {'camera_id': 1, 'person_id': '0001', 'track_id': 3, + 'frame_id': 1, 'bbox_id': 0, 'query': False} ), ]) @@ -75,11 +63,15 @@ def test_can_save_dataset_with_cyrillic_and_spaces_in_filename(self): source_dataset = Dataset.from_iterable([ DatasetItem(id='кириллица с пробелом', image=np.ones((2, 5, 3)), - attributes = { - 'camera_id': 1, - 'person_id': 1, - 'query': True - } + attributes = {'camera_id': 0, 'person_id': '0001', 'query': False} + ), + ]) + + expected_dataset = Dataset.from_iterable([ + DatasetItem(id='0001_c1s1_000000_00', + image=np.ones((2, 5, 3)), + attributes = {'camera_id': 0, 'person_id': '0001', 'track_id': 1, + 'frame_id': 0, 'bbox_id': 0, 'query': False} ), ]) @@ -87,27 +79,16 @@ def test_can_save_dataset_with_cyrillic_and_spaces_in_filename(self): Market1501Converter.convert(source_dataset, test_dir, save_images=True) parsed_dataset = Dataset.import_from(test_dir, 'market1501') - compare_datasets(self, source_dataset, parsed_dataset, + compare_datasets(self, expected_dataset, parsed_dataset, require_images=True) @mark_requirement(Requirements.DATUM_GENERAL_REQ) def test_can_save_dataset_with_no_save_images(self): source_dataset = Dataset.from_iterable([ DatasetItem(id='0001_c2s3_000001_00', - subset='test', image=np.ones((2, 5, 3)), - attributes = { - 'camera_id': 1, - 'person_id': 1, - 'query': True - } - ), - DatasetItem(id='test1', - subset='test', image=np.ones((2, 5, 3)), - attributes = { - 'camera_id': 1, - 'person_id': 2, - 'query': False - } + subset='query', image=np.ones((2, 5, 3)), + attributes = {'camera_id': 1, 'person_id': '0001', 'track_id': 3, + 'frame_id': 1, 'bbox_id': 0, 'query': True} ), ]) @@ -120,20 +101,16 @@ def test_can_save_dataset_with_no_save_images(self): @mark_requirement(Requirements.DATUM_GENERAL_REQ) def test_can_save_and_load_image_with_arbitrary_extension(self): expected = Dataset.from_iterable([ - DatasetItem(id='q/1', image=Image( - path='q/1.JPEG', data=np.zeros((4, 3, 3))), - attributes={ - 'camera_id': 1, - 'person_id': 1, - 'query': False - }), - DatasetItem(id='a/b/c/2', image=Image( - path='a/b/c/2.bmp', data=np.zeros((3, 4, 3))), - attributes={ - 'camera_id': 1, - 'person_id': 2, - 'query': True - }), + DatasetItem(id='c/0001_c1s1_000000_00', image=Image( + path='c/0001_c1s1_0000_00.JPEG', data=np.zeros((4, 3, 3))), + attributes={'camera_id': 0, 'person_id': '0001', 'track_id': 1, + 'frame_id': 0, 'bbox_id': 0, 'query': False} + ), + DatasetItem(id='a/b/0002_c2s2_000001_00', image=Image( + path='a/b/0002_c2s2_0001_00.bmp', data=np.zeros((3, 4, 3))), + attributes={'camera_id': 1, 'person_id': '0002', 'track_id': 2, + 'frame_id': 1, 'bbox_id': 0, 'query': False} + ), ]) with TestDir() as test_dir: @@ -149,14 +126,6 @@ def test_can_save_dataset_with_no_attributes(self): DatasetItem(id='test1', subset='test', image=np.ones((2, 5, 3)), ), - DatasetItem(id='test2', - subset='test', image=np.ones((2, 5, 3)), - attributes={ - 'camera_id': 1, - 'person_id': -1, - 'query': True - } - ), ]) with TestDir() as test_dir: @@ -177,23 +146,21 @@ def test_can_detect(self): def test_can_import(self): expected_dataset = Dataset.from_iterable([ DatasetItem(id='0001_c2s3_000111_00', - subset='test', image=np.ones((2, 5, 3)), - attributes = { - 'camera_id': 1, - 'person_id': 1, - 'query': True - } + subset='query', image=np.ones((2, 5, 3)), + attributes = {'camera_id': 1, 'person_id': '0001', 'track_id': 3, + 'frame_id': 111, 'bbox_id': 0, 'query': True} ), DatasetItem(id='0001_c1s1_001051_00', subset='test', image=np.ones((2, 5, 3)), - attributes = { - 'camera_id': 0, - 'person_id': 1, - 'query': False - } + attributes = {'camera_id': 0, 'person_id': '0001', 'track_id': 1, + 'frame_id': 1051, 'bbox_id': 0, 'query': False} + ), + DatasetItem(id='0002_c1s3_000151_00', + subset='train', image=np.ones((2, 5, 3)), + attributes = {'camera_id': 0, 'person_id': '0002', 'track_id': 3, + 'frame_id': 151, 'bbox_id': 0, 'query': False} ), ]) - dataset = Dataset.import_from(DUMMY_DATASET_DIR, 'market1501') compare_datasets(self, expected_dataset, dataset) diff --git a/tests/test_mars_format.py b/tests/test_mars_format.py new file mode 100644 index 0000000000..8f04dff843 --- /dev/null +++ b/tests/test_mars_format.py @@ -0,0 +1,48 @@ +# Copyright (C) 2021 Intel Corporation +# +# SPDX-License-Identifier: MIT + +from unittest.case import TestCase +import os.path as osp + +import numpy as np + +from datumaro.components.annotation import Label +from datumaro.components.dataset import Dataset, DatasetItem +from datumaro.components.environment import Environment +from datumaro.plugins.mars_format import MarsImporter +from datumaro.util.test_utils import compare_datasets + +from tests.requirements import Requirements, mark_requirement + +ASSETS_DIR = osp.join(osp.dirname(__file__), 'assets') +DUMMY_MARS_DATASET = osp.join(ASSETS_DIR, 'mars_dataset') + +class MarsImporterTest(TestCase): + @mark_requirement(Requirements.DATUM_GENERAL_REQ) + def test_can_import(self): + expected_dataset = Dataset.from_iterable([ + DatasetItem(id='0001C1T0001F001', image=np.ones((10, 10, 3)), + subset='train', annotations=[Label(label=2)], + attributes={'person_id': '0001', 'camera_id': 1, 'track_id': 1, + 'frame_id': 1} + ), + DatasetItem(id='0000C6T0101F001', image=np.ones((10, 10, 3)), + subset='train', annotations=[Label(label=1)], + attributes={'person_id': '0000', 'camera_id': 6, 'track_id': 101, + 'frame_id': 1} + ), + DatasetItem(id='00-1C2T0081F201', image=np.ones((10, 10, 3)), + subset='test', annotations=[Label(label=0)], + attributes={'person_id': '00-1', 'camera_id': 2, 'track_id': 81, + 'frame_id': 201} + ), + ], categories=['00-1', '0000', '0001']) + + imported_dataset = Dataset.import_from(DUMMY_MARS_DATASET, 'mars') + compare_datasets(self, expected_dataset, imported_dataset, require_images=True) + + @mark_requirement(Requirements.DATUM_GENERAL_REQ) + def test_can_detect(self): + detected_formats = Environment().detect_dataset(DUMMY_MARS_DATASET) + self.assertEqual([MarsImporter.NAME], detected_formats)