From f1c52c789c79b5855e6ef07a2854fbdd4c35e9db Mon Sep 17 00:00:00 2001
From: "kirill.sizov" <kirill.sizov@intel.com>
Date: Fri, 17 Dec 2021 17:27:35 +0300
Subject: [PATCH 01/23] Add importing of MARS dataset

---
 datumaro/plugins/mars_format.py | 122 ++++++++++++++++++++++++++++++++
 1 file changed, 122 insertions(+)
 create mode 100644 datumaro/plugins/mars_format.py

diff --git a/datumaro/plugins/mars_format.py b/datumaro/plugins/mars_format.py
new file mode 100644
index 0000000000..c34fd5f68c
--- /dev/null
+++ b/datumaro/plugins/mars_format.py
@@ -0,0 +1,122 @@
+# Copyright (C) 2020-2021 Intel Corporation
+#
+# SPDX-License-Identifier: MIT
+import fnmatch
+import glob
+import logging as log
+import os
+import os.path as osp
+
+from datumaro.components.annotation import (
+    AnnotationType, Label, LabelCategories,
+)
+from datumaro.components.dataset import DatasetItem
+from datumaro.components.extractor import Extractor, Importer
+from datumaro.components.format_detection import FormatDetectionContext
+from datumaro.components.media import Image
+from datumaro.util.image import find_images
+
+
+class MarsPath:
+    SUBSET_DIR_PATTERN = 'bbox_*'
+    IMAGE_DIR_PATTERNS = ['[0-9]' * 4, '00-1']
+    IMAGE_NAME_POSTFIX = 'C[0-9]' + 'T' + '[0-9]' * 4 \
+                         + 'F' + '[0-9]' * 3  + '.*'
+
+class MarsExtractor(Extractor):
+    def __init__(self, path):
+        assert osp.isdir(path), path
+        super().__init__()
+
+        self._dataset_dir = path
+        self._subsets = {
+            subset_dir.split('_', maxsplit=1)[1]: osp.join(path, subset_dir)
+            for subset_dir in os.listdir(path)
+            if (osp.isdir(osp.join(path, subset_dir)) and
+                fnmatch.fnmatch(subset_dir, MarsPath.SUBSET_DIR_PATTERN))
+        }
+
+        self._categories = self._load_categories()
+        self._items = []
+        for subset, subset_path in self._subsets.items():
+            self._items.extend(self._load_items(subset, subset_path))
+
+    def __iter__(self):
+        yield from self._items
+
+    def categories(self):
+        return self._categories
+
+    def _load_categories(self):
+        dirs = sorted([dir_name for subset_path in self._subsets.values()
+            for dir_name in os.listdir(subset_path)
+            if (osp.isdir(osp.join(self._dataset_dir, subset_path, dir_name))
+                and any(fnmatch.fnmatch(dir_name, image_dir)
+                    for image_dir in MarsPath.IMAGE_DIR_PATTERNS))
+        ])
+        return {AnnotationType.label: LabelCategories.from_iterable(dirs)}
+
+    def _load_items(self, subset, path):
+        items = []
+        for label_cat in self._categories[AnnotationType.label]:
+            label = label_cat.name
+            label_id = self._categories[AnnotationType.label].find(label)[0]
+            for image_path in find_images(osp.join(path, label)):
+                image_name = osp.basename(image_path)
+                pedestrian_id = image_name[0:4]
+
+                if not fnmatch.fnmatch(image_name,
+                        label + MarsPath.IMAGE_NAME_POSTFIX):
+                    log.warning(f'The image {image_path} will be skip because '
+                        'it has incorrect name. See the docs to get more info '
+                        f'pattern {label + MarsPath.IMAGE_NAME_POSTFIX}')
+                    continue
+
+                if pedestrian_id != label:
+                    log.warning(f'The image {image_path} will be skip because'
+                        'pedestrian id for it does not match with'
+                        f'the directory name: {label}')
+                    continue
+
+                items.append(DatasetItem(id=osp.splitext(image_name)[0],
+                    image=Image(path=osp.join(path, label, image_name)),
+                    annotations=[Label(label=label_id, attributes={
+                            'is_distractors': pedestrian_id == '0000',
+                            'is_junk': pedestrian_id == '00-1',
+                            'pedestrian_id': pedestrian_id,
+                            'camera_id': image_name[5],
+                            'track_id': image_name[7:11],
+                            'frame_id': image_name[12:15]
+                        })
+                    ], subset=subset)
+                )
+
+        return items
+
+class MarsImporter(Importer):
+    @classmethod
+    def detect(cls, context: FormatDetectionContext):
+        with context.require_any():
+            for image_dir in MarsPath.IMAGE_DIR_PATTERNS:
+                with context.alternative():
+                    context.require_file('/'.join([MarsPath.SUBSET_DIR_PATTERN,
+                        image_dir, image_dir + MarsPath.IMAGE_NAME_POSTFIX]
+                    ))
+
+    @classmethod
+    def find_sources(cls, path):
+        patterns = ['/'.join((path, subset_dir, image_dir,
+                image_dir + MarsPath.IMAGE_NAME_POSTFIX))
+            for image_dir in MarsPath.IMAGE_DIR_PATTERNS
+            for subset_dir in os.listdir(path)
+            if (osp.isdir(osp.join(path, subset_dir)) and
+                fnmatch.fnmatch(subset_dir, MarsPath.SUBSET_DIR_PATTERN))
+        ]
+
+        for pattern in patterns:
+            try:
+                next(glob.iglob(pattern))
+                return [{'url': path, 'format': 'mars'}]
+            except StopIteration:
+                continue
+

From c5d79eebaf33a13185334f9554f41affc74b85e6 Mon Sep 17 00:00:00 2001
From: "kirill.sizov" <kirill.sizov@intel.com>
Date: Fri, 17 Dec 2021 17:27:55 +0300
Subject: [PATCH 02/23] Add tests

---
 tests/test_mars_format.py | 60 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 60 insertions(+)
 create mode 100644 tests/test_mars_format.py

diff --git a/tests/test_mars_format.py b/tests/test_mars_format.py
new file mode 100644
index 0000000000..377792d1ca
--- /dev/null
+++ b/tests/test_mars_format.py
@@ -0,0 +1,60 @@
+# Copyright (C) 2021 Intel Corporation
+#
+# SPDX-License-Identifier: MIT
+
+from unittest.case import TestCase
+import os.path as osp
+
+import numpy as np
+
+from datumaro.components.annotation import Label
+from datumaro.components.dataset import Dataset, DatasetItem
+from datumaro.components.environment import Environment
+from datumaro.plugins.mars_format import MarsImporter
+from datumaro.util.test_utils import compare_datasets
+
+from tests.requirements import Requirements, mark_requirement
+
+ASSETS_DIR = osp.join(osp.dirname(__file__), 'assets')
+DUMMY_MARS_DATASET = osp.join(ASSETS_DIR, 'mars_dataset')
+
+class MarsImporterTest(TestCase):
+    @mark_requirement(Requirements.DATUM_GENERAL_REQ)
+    def test_can_import(self):
+        expected_dataset = Dataset.from_iterable([
+            DatasetItem(id='0001C1T0001F001', image=np.ones((10, 10, 3)),
+                subset='train', annotations=[Label(label=2, attributes={
+                    'is_distractors': False,
+                    'is_junk': False,
+                    'pedestrian_id': '0001',
+                    'camera_id': '1',
+                    'track_id': '0001',
+                    'frame_id': '001'})]
+            ),
+            DatasetItem(id='0000C6T0101F001', image=np.ones((10, 10, 3)),
+                subset='train', annotations=[Label(label=1, attributes={
+                    'is_distractors': True,
+                    'is_junk': False,
+                    'pedestrian_id': '0000',
+                    'camera_id': '6',
+                    'track_id': '0101',
+                    'frame_id': '001'})]
+            ),
+            DatasetItem(id='00-1C2T0081F201', image=np.ones((10, 10, 3)),
+                subset='test', annotations=[Label(label=0, attributes={
+                    'is_distractors': False,
+                    'is_junk': True,
+                    'pedestrian_id': '00-1',
+                    'camera_id': '2',
+                    'track_id': '0081',
+                    'frame_id': '201'})]
+            ),
+        ], categories=['00-1', '0000', '0001'])
+
+        imported_dataset = Dataset.import_from(DUMMY_MARS_DATASET, 'mars')
+        compare_datasets(self, expected_dataset, imported_dataset, require_images=True)
+
+    @mark_requirement(Requirements.DATUM_GENERAL_REQ)
+    def test_can_detect(self):
+        detected_formats = Environment().detect_dataset(DUMMY_MARS_DATASET)
+        self.assertEqual([MarsImporter.NAME], detected_formats)

From b37d4bc9c913ed0c21b3b1d6bb9fc4d4dcb7f942 Mon Sep 17 00:00:00 2001
From: "kirill.sizov" <kirill.sizov@intel.com>
Date: Fri, 17 Dec 2021 17:28:20 +0300
Subject: [PATCH 03/23] Add docs

---
 site/content/en/docs/formats/mars.md | 76 ++++++++++++++++++++++++++++
 1 file changed, 76 insertions(+)
 create mode 100644 site/content/en/docs/formats/mars.md

diff --git a/site/content/en/docs/formats/mars.md b/site/content/en/docs/formats/mars.md
new file mode 100644
index 0000000000..9c5adadf7a
--- /dev/null
+++ b/site/content/en/docs/formats/mars.md
@@ -0,0 +1,76 @@
+---
+title: 'MARS'
+linkTitle: 'MARS'
+description: ''
+weight: 14
+---
+
+## Format specification
+
+MARS is a dataset for and motion analysis and person identification task,
+and this dataset it's extension of Market-1501 dataset format.
+MARS dataset is available for downloading
+[here](http://zheng-lab.cecs.anu.edu.au/Project/project_mars.html)
+
+Supported types of annotations:
+- `Label`
+
+Required attributes:
+- `is_distractors` (bool): True when image with distractors,
+    which negatively affect retrieval accuracy
+- `is_junk`: True for junk image which do not affect retrieval accuracy;
+- `pedestrian_id`: four-digit number in format `%04d`;
+- `camera_id`: one-digit number;
+- `track_id`: four-digit number in format `%04d`;
+- `frame_id`: three-digit number in format `%03d`, that mean number of
+  frame within this track. For the tracks, their names are accumulated
+  for each ID, but for frames, they start from "0001" in each track.
+
+
+## Import MARS dataset
+
+Use these instructions to import MARS dataset into Datumaro project:
+
+```bash
+datum create
+datum add -f mars ./dataset
+```
+
+> Note: the directory with dataset should be subdirectory of the
+> project directory.
+
+```
+mars_dataset
+├── <bbox_subset_name1>
+│   ├── 0001 # directory with images of pedestrian with id 0001
+│   │   ├── 0001C1T0001F001.jpg
+│   │   ├── 0001C1T0001F002.jpg
+│   │   ├── ...
+│   ├── 0002 # directory with images of pedestrian with id 0002
+│   │   ├── 0002C1T0001F001.jpg
+│   │   ├── 0002C1T0001F001.jpg
+│   │   ├── ...
+│   ├── 0000 # distractors images, which negatively affect retrieval accuracy.
+│   │   ├── 0000C1T0001F001.jpg
+│   │   ├── 0000C1T0001F001.jpg
+│   │   ├── ...
+│   ├── 00-1 # junk images which do not affect retrieval accuracy
+│   │   ├── 00-1C1T0001F001.jpg
+│   │   ├── 00-1C1T0001F001.jpg
+│   │   ├── ...
+├── <bbox_subset_name2>
+│   ├── ...
+├── ...
+```
+
+All images in MARS dataset has strict convention of naming:
+```
+xxxxCxTxxxxFxxx.jpg
+```
+- the first four digits indicate the pedestrian's number;
+- digits after `C` indicate the camera id;
+- four digits after `T` indicate the track id for this pedestrian;
+- three digits after `F` indicate the frame id with this track
+
+> Note: there are two specific pedestrian IDs 0000 and 00-1
+> which indicate distracting images and unwanted images respectively

From d3850ce8af99fcb39a0f9468ae41e2d92fbc99a3 Mon Sep 17 00:00:00 2001
From: "kirill.sizov" <kirill.sizov@intel.com>
Date: Fri, 17 Dec 2021 17:39:09 +0300
Subject: [PATCH 04/23] Add assets

---
 .../bbox_test/00-1/00-1C2T0081F201.jpg            | Bin 0 -> 631 bytes
 .../bbox_train/0000/0000C6T0101F001.jpg           | Bin 0 -> 631 bytes
 .../bbox_train/0001/0001C1T0001F001.jpg           | Bin 0 -> 631 bytes
 3 files changed, 0 insertions(+), 0 deletions(-)
 create mode 100644 tests/assets/mars_dataset/bbox_test/00-1/00-1C2T0081F201.jpg
 create mode 100644 tests/assets/mars_dataset/bbox_train/0000/0000C6T0101F001.jpg
 create mode 100644 tests/assets/mars_dataset/bbox_train/0001/0001C1T0001F001.jpg

diff --git a/tests/assets/mars_dataset/bbox_test/00-1/00-1C2T0081F201.jpg b/tests/assets/mars_dataset/bbox_test/00-1/00-1C2T0081F201.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..b99ff40ee40d5590dc54c4c2445a81b5d04b4337
GIT binary patch
literal 631
zcmex=<NpH&0WUXCHwH#V1_nkTWcYuZ!I^=H5d=Ua69}*{gGg2u7G@SURyH;;X6NK!
zW9MXNW8>iF;N$`UAd82aiwDF383NJD#LCRf%Eivc4pu@E@&5pWAO`~%0~a%+5(ASU
zBeNjm|04|YKzFi&od<L*1TZo&v#_$U1D(MQRIpV5=zL~oprcuVE(WTu1<Et92(k((
z8alEG2PU#B6^a-&PF%>L?6mQqXwbzED#l4gO`Kd};u4Zls%q*Qnp!5NX66=_R?aT2
zZtfnQUcn)uVc`*xQOPN(Y3Ui6S;Zx#W#tu>Rn0A}ZS5VMU6UqHnL2IyjG40*Enc#8
z+42=DS8dw7W$U)>J9h3mboj{8W5-XNJay^vm8;jT-?(|};iJb-o<4j2;^nK4pFV&2
z`tAFVpT9u<Vq{>3cne5k^_L*fUreAlU<dh&k*OSrnFU!`6%E;h90S=C3x$=88aYIq
zCNA7~kW<+>=!0ld(M2vX6_bamA3<IN`;0h`HId~rxW^Fwy2Zf5%m|D;W<dsfhCj@U
Kx-^*o-vj_JM85I>

literal 0
HcmV?d00001

diff --git a/tests/assets/mars_dataset/bbox_train/0000/0000C6T0101F001.jpg b/tests/assets/mars_dataset/bbox_train/0000/0000C6T0101F001.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..b99ff40ee40d5590dc54c4c2445a81b5d04b4337
GIT binary patch
literal 631
zcmex=<NpH&0WUXCHwH#V1_nkTWcYuZ!I^=H5d=Ua69}*{gGg2u7G@SURyH;;X6NK!
zW9MXNW8>iF;N$`UAd82aiwDF383NJD#LCRf%Eivc4pu@E@&5pWAO`~%0~a%+5(ASU
zBeNjm|04|YKzFi&od<L*1TZo&v#_$U1D(MQRIpV5=zL~oprcuVE(WTu1<Et92(k((
z8alEG2PU#B6^a-&PF%>L?6mQqXwbzED#l4gO`Kd};u4Zls%q*Qnp!5NX66=_R?aT2
zZtfnQUcn)uVc`*xQOPN(Y3Ui6S;Zx#W#tu>Rn0A}ZS5VMU6UqHnL2IyjG40*Enc#8
z+42=DS8dw7W$U)>J9h3mboj{8W5-XNJay^vm8;jT-?(|};iJb-o<4j2;^nK4pFV&2
z`tAFVpT9u<Vq{>3cne5k^_L*fUreAlU<dh&k*OSrnFU!`6%E;h90S=C3x$=88aYIq
zCNA7~kW<+>=!0ld(M2vX6_bamA3<IN`;0h`HId~rxW^Fwy2Zf5%m|D;W<dsfhCj@U
Kx-^*o-vj_JM85I>

literal 0
HcmV?d00001

diff --git a/tests/assets/mars_dataset/bbox_train/0001/0001C1T0001F001.jpg b/tests/assets/mars_dataset/bbox_train/0001/0001C1T0001F001.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..b99ff40ee40d5590dc54c4c2445a81b5d04b4337
GIT binary patch
literal 631
zcmex=<NpH&0WUXCHwH#V1_nkTWcYuZ!I^=H5d=Ua69}*{gGg2u7G@SURyH;;X6NK!
zW9MXNW8>iF;N$`UAd82aiwDF383NJD#LCRf%Eivc4pu@E@&5pWAO`~%0~a%+5(ASU
zBeNjm|04|YKzFi&od<L*1TZo&v#_$U1D(MQRIpV5=zL~oprcuVE(WTu1<Et92(k((
z8alEG2PU#B6^a-&PF%>L?6mQqXwbzED#l4gO`Kd};u4Zls%q*Qnp!5NX66=_R?aT2
zZtfnQUcn)uVc`*xQOPN(Y3Ui6S;Zx#W#tu>Rn0A}ZS5VMU6UqHnL2IyjG40*Enc#8
z+42=DS8dw7W$U)>J9h3mboj{8W5-XNJay^vm8;jT-?(|};iJb-o<4j2;^nK4pFV&2
z`tAFVpT9u<Vq{>3cne5k^_L*fUreAlU<dh&k*OSrnFU!`6%E;h90S=C3x$=88aYIq
zCNA7~kW<+>=!0ld(M2vX6_bamA3<IN`;0h`HId~rxW^Fwy2Zf5%m|D;W<dsfhCj@U
Kx-^*o-vj_JM85I>

literal 0
HcmV?d00001


From 144613b1c3858850e5935db4534220d5bc7a9475 Mon Sep 17 00:00:00 2001
From: "kirill.sizov" <kirill.sizov@intel.com>
Date: Fri, 17 Dec 2021 17:41:46 +0300
Subject: [PATCH 05/23] Delete extra line

---
 datumaro/plugins/mars_format.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/datumaro/plugins/mars_format.py b/datumaro/plugins/mars_format.py
index c34fd5f68c..fa4fd3e069 100644
--- a/datumaro/plugins/mars_format.py
+++ b/datumaro/plugins/mars_format.py
@@ -119,4 +119,3 @@ def find_sources(cls, path):
                 return [{'url': path, 'format': 'mars'}]
             except StopIteration:
                 continue
-

From d241a9d7f37a291a8466f86f0b61d9fa3de48533 Mon Sep 17 00:00:00 2001
From: "kirill.sizov" <kirill.sizov@intel.com>
Date: Mon, 20 Dec 2021 16:29:34 +0300
Subject: [PATCH 06/23] Delete redundant attributes

---
 datumaro/plugins/mars_format.py      | 2 --
 site/content/en/docs/formats/mars.md | 3 ---
 tests/test_mars_format.py            | 6 ------
 3 files changed, 11 deletions(-)

diff --git a/datumaro/plugins/mars_format.py b/datumaro/plugins/mars_format.py
index fa4fd3e069..4e00272f71 100644
--- a/datumaro/plugins/mars_format.py
+++ b/datumaro/plugins/mars_format.py
@@ -81,8 +81,6 @@ def _load_items(self, subset, path):
                 items.append(DatasetItem(id=osp.splitext(image_name)[0],
                     image=Image(path=osp.join(path, label, image_name)),
                     annotations=[Label(label=label_id, attributes={
-                            'is_distractors': pedestrian_id == '0000',
-                            'is_junk': pedestrian_id == '00-1',
                             'pedestrian_id': pedestrian_id,
                             'camera_id': image_name[5],
                             'track_id': image_name[7:11],
diff --git a/site/content/en/docs/formats/mars.md b/site/content/en/docs/formats/mars.md
index 9c5adadf7a..748ec0ee51 100644
--- a/site/content/en/docs/formats/mars.md
+++ b/site/content/en/docs/formats/mars.md
@@ -16,9 +16,6 @@ Supported types of annotations:
 - `Label`
 
 Required attributes:
-- `is_distractors` (bool): True when image with distractors,
-    which negatively affect retrieval accuracy
-- `is_junk`: True for junk image which do not affect retrieval accuracy;
 - `pedestrian_id`: four-digit number in format `%04d`;
 - `camera_id`: one-digit number;
 - `track_id`: four-digit number in format `%04d`;
diff --git a/tests/test_mars_format.py b/tests/test_mars_format.py
index 377792d1ca..77ffaacdb0 100644
--- a/tests/test_mars_format.py
+++ b/tests/test_mars_format.py
@@ -24,8 +24,6 @@ def test_can_import(self):
         expected_dataset = Dataset.from_iterable([
             DatasetItem(id='0001C1T0001F001', image=np.ones((10, 10, 3)),
                 subset='train', annotations=[Label(label=2, attributes={
-                    'is_distractors': False,
-                    'is_junk': False,
                     'pedestrian_id': '0001',
                     'camera_id': '1',
                     'track_id': '0001',
@@ -33,8 +31,6 @@ def test_can_import(self):
             ),
             DatasetItem(id='0000C6T0101F001', image=np.ones((10, 10, 3)),
                 subset='train', annotations=[Label(label=1, attributes={
-                    'is_distractors': True,
-                    'is_junk': False,
                     'pedestrian_id': '0000',
                     'camera_id': '6',
                     'track_id': '0101',
@@ -42,8 +38,6 @@ def test_can_import(self):
             ),
             DatasetItem(id='00-1C2T0081F201', image=np.ones((10, 10, 3)),
                 subset='test', annotations=[Label(label=0, attributes={
-                    'is_distractors': False,
-                    'is_junk': True,
                     'pedestrian_id': '00-1',
                     'camera_id': '2',
                     'track_id': '0081',

From 511a500c53f79ea560fab61e94d81965f98e2545 Mon Sep 17 00:00:00 2001
From: "kirill.sizov" <kirill.sizov@intel.com>
Date: Mon, 20 Dec 2021 16:52:38 +0300
Subject: [PATCH 07/23] Fix typo

---
 datumaro/plugins/mars_format.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/datumaro/plugins/mars_format.py b/datumaro/plugins/mars_format.py
index 4e00272f71..4b77c35052 100644
--- a/datumaro/plugins/mars_format.py
+++ b/datumaro/plugins/mars_format.py
@@ -67,9 +67,9 @@ def _load_items(self, subset, path):
 
                 if not fnmatch.fnmatch(image_name,
                         label + MarsPath.IMAGE_NAME_POSTFIX):
-                    log.warning(f'The image {image_path} will be skip because '
-                        'it has incorrect name. See the docs to get more info '
-                        f'pattern {label + MarsPath.IMAGE_NAME_POSTFIX}')
+                    log.warning(f'The image {image_path} will be skipped '
+                        'because it has incorrect name. See the docs to get '
+                        'more information')
                     continue
 
                 if pedestrian_id != label:

From 53f0646b50c3b453fba4cfb8fae833e8cbf54080 Mon Sep 17 00:00:00 2001
From: "kirill.sizov" <kirill.sizov@intel.com>
Date: Tue, 21 Dec 2021 11:15:11 +0300
Subject: [PATCH 08/23] Fix docs

---
 site/content/en/docs/formats/mars.md | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/site/content/en/docs/formats/mars.md b/site/content/en/docs/formats/mars.md
index 748ec0ee51..20d2d41d4e 100644
--- a/site/content/en/docs/formats/mars.md
+++ b/site/content/en/docs/formats/mars.md
@@ -7,10 +7,10 @@ weight: 14
 
 ## Format specification
 
-MARS is a dataset for and motion analysis and person identification task,
+MARS is a dataset for the motion analysis and person identification task,
 and this dataset it's extension of Market-1501 dataset format.
 MARS dataset is available for downloading
-[here](http://zheng-lab.cecs.anu.edu.au/Project/project_mars.html)
+[here](https://zheng-lab.cecs.anu.edu.au/Project/project_mars.html)
 
 Supported types of annotations:
 - `Label`
@@ -60,12 +60,12 @@ mars_dataset
 ├── ...
 ```
 
-All images in MARS dataset has strict convention of naming:
+All images in MARS dataset follow a strict convention of naming:
 ```
 xxxxCxTxxxxFxxx.jpg
 ```
 - the first four digits indicate the pedestrian's number;
-- digits after `C` indicate the camera id;
+- digit after `C` indicates the camera id;
 - four digits after `T` indicate the track id for this pedestrian;
 - three digits after `F` indicate the frame id with this track
 

From 6fef01d7adb6beb8ea537e390aafa9c5c1d83287 Mon Sep 17 00:00:00 2001
From: "kirill.sizov" <kirill.sizov@intel.com>
Date: Tue, 21 Dec 2021 12:04:51 +0300
Subject: [PATCH 09/23] Commit suggestion changes

---
 datumaro/plugins/mars_format.py      | 18 +++++++++---------
 site/content/en/docs/formats/mars.md | 13 ++++++-------
 tests/test_mars_format.py            | 23 +++++++----------------
 3 files changed, 22 insertions(+), 32 deletions(-)

diff --git a/datumaro/plugins/mars_format.py b/datumaro/plugins/mars_format.py
index 4b77c35052..2fd6aea502 100644
--- a/datumaro/plugins/mars_format.py
+++ b/datumaro/plugins/mars_format.py
@@ -8,7 +8,7 @@
 import os.path as osp
 
 from datumaro.components.annotation import (
-    AnnotationType, Label, LabelCategories,
+    AnnotationType, Bbox, LabelCategories,
 )
 from datumaro.components.dataset import DatasetItem
 from datumaro.components.extractor import Extractor, Importer
@@ -78,15 +78,15 @@ def _load_items(self, subset, path):
                         f'the directory name: {label}')
                     continue
 
+                image = Image(path=image_path)
+                width, height = image.size
                 items.append(DatasetItem(id=osp.splitext(image_name)[0],
-                    image=Image(path=osp.join(path, label, image_name)),
-                    annotations=[Label(label=label_id, attributes={
-                            'pedestrian_id': pedestrian_id,
-                            'camera_id': image_name[5],
-                            'track_id': image_name[7:11],
-                            'frame_id': image_name[12:15]
-                        })
-                    ], subset=subset)
+                    image=image, subset=subset,
+                    annotations=[Bbox(0, 0, width, height, label=label_id)],
+                    attributes={'camera_id': int(image_name[5]),
+                            'track_id': int(image_name[7:11]),
+                            'frame_id': int(image_name[12:15])
+                    })
                 )
 
         return items
diff --git a/site/content/en/docs/formats/mars.md b/site/content/en/docs/formats/mars.md
index 20d2d41d4e..8af724fc6b 100644
--- a/site/content/en/docs/formats/mars.md
+++ b/site/content/en/docs/formats/mars.md
@@ -13,13 +13,12 @@ MARS dataset is available for downloading
 [here](https://zheng-lab.cecs.anu.edu.au/Project/project_mars.html)
 
 Supported types of annotations:
-- `Label`
+- `Bbox`
 
 Required attributes:
-- `pedestrian_id`: four-digit number in format `%04d`;
-- `camera_id`: one-digit number;
-- `track_id`: four-digit number in format `%04d`;
-- `frame_id`: three-digit number in format `%03d`, that mean number of
+- `camera_id` (int): one-digit number;
+- `track_id` (int): four-digit number;
+- `frame_id` (int): three-digit number, that mean number of
   frame within this track. For the tracks, their names are accumulated
   for each ID, but for frames, they start from "0001" in each track.
 
@@ -67,7 +66,7 @@ xxxxCxTxxxxFxxx.jpg
 - the first four digits indicate the pedestrian's number;
 - digit after `C` indicates the camera id;
 - four digits after `T` indicate the track id for this pedestrian;
-- three digits after `F` indicate the frame id with this track
+- three digits after `F` indicate the frame id with this track.
 
 > Note: there are two specific pedestrian IDs 0000 and 00-1
-> which indicate distracting images and unwanted images respectively
+> which indicate distracting images and unwanted images respectively.
diff --git a/tests/test_mars_format.py b/tests/test_mars_format.py
index 77ffaacdb0..7445e0eda4 100644
--- a/tests/test_mars_format.py
+++ b/tests/test_mars_format.py
@@ -7,7 +7,7 @@
 
 import numpy as np
 
-from datumaro.components.annotation import Label
+from datumaro.components.annotation import Bbox
 from datumaro.components.dataset import Dataset, DatasetItem
 from datumaro.components.environment import Environment
 from datumaro.plugins.mars_format import MarsImporter
@@ -23,25 +23,16 @@ class MarsImporterTest(TestCase):
     def test_can_import(self):
         expected_dataset = Dataset.from_iterable([
             DatasetItem(id='0001C1T0001F001', image=np.ones((10, 10, 3)),
-                subset='train', annotations=[Label(label=2, attributes={
-                    'pedestrian_id': '0001',
-                    'camera_id': '1',
-                    'track_id': '0001',
-                    'frame_id': '001'})]
+                subset='train', annotations=[Bbox(0, 0, 10, 10, label=2)],
+                attributes={'camera_id': 1, 'track_id': 1, 'frame_id': 1}
             ),
             DatasetItem(id='0000C6T0101F001', image=np.ones((10, 10, 3)),
-                subset='train', annotations=[Label(label=1, attributes={
-                    'pedestrian_id': '0000',
-                    'camera_id': '6',
-                    'track_id': '0101',
-                    'frame_id': '001'})]
+                subset='train', annotations=[Bbox(0, 0, 10, 10, label=1)],
+                attributes={'camera_id': 6, 'track_id': 101, 'frame_id': 1}
             ),
             DatasetItem(id='00-1C2T0081F201', image=np.ones((10, 10, 3)),
-                subset='test', annotations=[Label(label=0, attributes={
-                    'pedestrian_id': '00-1',
-                    'camera_id': '2',
-                    'track_id': '0081',
-                    'frame_id': '201'})]
+                subset='test', annotations=[Bbox(0, 0, 10, 10, label=0)],
+                attributes={'camera_id': 2, 'track_id': 81, 'frame_id': 201}
             ),
         ], categories=['00-1', '0000', '0001'])
 

From 74b723b9533f13d59d9f3bfd62b6587904ab8281 Mon Sep 17 00:00:00 2001
From: "kirill.sizov" <kirill.sizov@intel.com>
Date: Tue, 21 Dec 2021 12:06:48 +0300
Subject: [PATCH 10/23] Sort imports

---
 datumaro/plugins/mars_format.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/datumaro/plugins/mars_format.py b/datumaro/plugins/mars_format.py
index 2fd6aea502..6029c0f108 100644
--- a/datumaro/plugins/mars_format.py
+++ b/datumaro/plugins/mars_format.py
@@ -7,9 +7,7 @@
 import os
 import os.path as osp
 
-from datumaro.components.annotation import (
-    AnnotationType, Bbox, LabelCategories,
-)
+from datumaro.components.annotation import AnnotationType, Bbox, LabelCategories
 from datumaro.components.dataset import DatasetItem
 from datumaro.components.extractor import Extractor, Importer
 from datumaro.components.format_detection import FormatDetectionContext

From 9124bbf1cd610dfc254a49a8c8bd3f6081a54d3e Mon Sep 17 00:00:00 2001
From: "kirill.sizov" <kirill.sizov@intel.com>
Date: Tue, 21 Dec 2021 12:14:21 +0300
Subject: [PATCH 11/23] Add more information about attributes

---
 site/content/en/docs/formats/mars.md | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/site/content/en/docs/formats/mars.md b/site/content/en/docs/formats/mars.md
index 8af724fc6b..e94830f4b0 100644
--- a/site/content/en/docs/formats/mars.md
+++ b/site/content/en/docs/formats/mars.md
@@ -16,8 +16,10 @@ Supported types of annotations:
 - `Bbox`
 
 Required attributes:
-- `camera_id` (int): one-digit number;
-- `track_id` (int): four-digit number;
+- `camera_id` (int): one-digit number that represent ID of camera that took
+  the image (original dataset has totally 6 cameras);
+- `track_id` (int): four-digit number that represent ID of the track with
+  the particular pedestrian;
 - `frame_id` (int): three-digit number, that mean number of
   frame within this track. For the tracks, their names are accumulated
   for each ID, but for frames, they start from "0001" in each track.

From 460927393bba92d3b099d6830bcae6230e113340 Mon Sep 17 00:00:00 2001
From: "kirill.sizov" <kirill.sizov@intel.com>
Date: Mon, 27 Dec 2021 13:15:50 +0300
Subject: [PATCH 12/23] Align formats Market-1501 and Mars

---
 datumaro/plugins/market1501_format.py | 151 ++++++++++++++------------
 datumaro/plugins/mars_format.py       |  21 ++--
 2 files changed, 90 insertions(+), 82 deletions(-)

diff --git a/datumaro/plugins/market1501_format.py b/datumaro/plugins/market1501_format.py
index 4a52329972..aba1b01ba8 100644
--- a/datumaro/plugins/market1501_format.py
+++ b/datumaro/plugins/market1501_format.py
@@ -3,13 +3,12 @@
 # SPDX-License-Identifier: MIT
 
 from distutils.util import strtobool
-from itertools import chain
 import os
 import os.path as osp
 import re
 
 from datumaro.components.converter import Converter
-from datumaro.components.extractor import DatasetItem, Importer, SourceExtractor
+from datumaro.components.extractor import DatasetItem, Importer, Extractor
 from datumaro.util.image import find_images
 
 
@@ -17,123 +16,135 @@ class Market1501Path:
     QUERY_DIR = 'query'
     BBOX_DIR = 'bounding_box_'
     IMAGE_EXT = '.jpg'
-    PATTERN = re.compile(r'^(-?\d+)_c(\d+)(?:s\d+_\d+_00(.*))?')
+    PATTERN = re.compile(r'^(-?\d+)_c(\d+)s(\d+)_(\d+)_(\d+)(.*)')
     LIST_PREFIX = 'images_'
     UNKNOWN_ID = -1
+    ATTRIBUTES = ['person_id', 'camera_id', 'track_id', 'frame_id', 'bbox_id']
 
-class Market1501Extractor(SourceExtractor):
-    def __init__(self, path, subset=None):
+class Market1501Extractor(Extractor):
+    def __init__(self, path):
         if not osp.isdir(path):
             raise NotADirectoryError(
                 "Can't open folder with annotation files '%s'" % path)
 
-        if not subset:
-            subset = ''
-            for p in os.listdir(path):
-                pf = osp.join(path, p)
+        self._path = path
+        super().__init__()
 
-                if p.startswith(Market1501Path.BBOX_DIR) and osp.isdir(pf):
-                    subset = p.replace(Market1501Path.BBOX_DIR, '')
-                    break
+        subsets = {}
+        for p in os.listdir(path):
+            pf = osp.join(path, p)
 
-                if p.startswith(Market1501Path.LIST_PREFIX) and osp.isfile(pf):
-                    subset = p.replace(Market1501Path.LIST_PREFIX, '')
-                    subset = osp.splitext(subset)[0]
-                    break
-        super().__init__(subset=subset)
+            if p.startswith(Market1501Path.BBOX_DIR) and osp.isdir(pf):
+                subset = p.replace(Market1501Path.BBOX_DIR, '')
+                subsets[subset] = pf
 
-        self._path = path
-        self._items = list(self._load_items(path).values())
+            if p.startswith(Market1501Path.LIST_PREFIX) and osp.isfile(pf):
+                subset = p.replace(Market1501Path.LIST_PREFIX, '')
+                subset = osp.splitext(subset)[0]
+                subsets[subset] = pf
+
+            if p.startswith(Market1501Path.QUERY_DIR) and osp.isdir(pf):
+                subset = Market1501Path.QUERY_DIR
+                subsets[subset] = pf
+
+        self._items = []
+        for subset, subset_path in subsets.items():
+            self._items.extend(list(
+                self._load_items(subset, subset_path).values()))
+
+    def __iter__(self):
+        yield from self._items
 
-    def _load_items(self, rootdir):
+    def _load_items(self, subset, subset_path):
         items = {}
 
         paths = []
-        anno_file = osp.join(rootdir,
-            Market1501Path.LIST_PREFIX + self._subset + '.txt')
-        if osp.isfile(anno_file):
-            with open(anno_file, encoding='utf-8') as f:
+        if osp.isfile(subset_path):
+            with open(subset_path, encoding='utf-8') as f:
                 for line in f:
-                    paths.append(osp.join(rootdir, line.strip()))
+                    paths.append(osp.join(self._path, line.strip()))
         else:
-            paths = list(chain(
-                find_images(osp.join(rootdir,
-                        Market1501Path.QUERY_DIR),
-                    recursive=True),
-                find_images(osp.join(rootdir,
-                        Market1501Path.BBOX_DIR + self._subset),
-                    recursive=True),
-            ))
-
-        for image_path in paths:
+            paths = list(find_images(subset_path, recursive=True))
+
+        for image_path in sorted(paths):
             item_id = osp.splitext(osp.normpath(image_path))[0]
             if osp.isabs(image_path):
-                item_id = osp.relpath(item_id, rootdir)
-            subdir, item_id = item_id.split(os.sep, maxsplit=1)
+                item_id = osp.relpath(item_id, self._path)
+            item_id = item_id.split(osp.sep, maxsplit=1)[1]
 
-            pid = Market1501Path.UNKNOWN_ID
-            camid = Market1501Path.UNKNOWN_ID
+            attributes = {}
             search = Market1501Path.PATTERN.search(osp.basename(item_id))
             if search:
-                pid, camid = map(int, search.groups()[0:2])
-                camid -= 1 # make ids 0-based
-                custom_name = search.groups()[2]
+                attribute_values = search.groups()[0:5]
+                attributes = {
+                    'person_id': attribute_values[0],
+                    'camera_id': int(attribute_values[1]) - 1,
+                    'track_id': int(attribute_values[2]),
+                    'frame_id': int(attribute_values[3]),
+                    'bbox_id': int(attribute_values[4]),
+                    'query': subset == Market1501Path.QUERY_DIR
+                }
+
+                custom_name = search.groups()[5]
                 if custom_name:
                     item_id = osp.join(osp.dirname(item_id), custom_name)
 
             item = items.get(item_id)
             if item is None:
-                item = DatasetItem(id=item_id, subset=self._subset,
-                    image=image_path)
+                item = DatasetItem(id=item_id, subset=subset, image=image_path,
+                    attributes=attributes)
                 items[item_id] = item
 
-            if pid != Market1501Path.UNKNOWN_ID or \
-                    camid != Market1501Path.UNKNOWN_ID:
-                attributes = item.attributes
-                attributes['query'] = subdir == Market1501Path.QUERY_DIR
-                attributes['person_id'] = pid
-                attributes['camera_id'] = camid
         return items
 
 class Market1501Importer(Importer):
     @classmethod
     def find_sources(cls, path):
-        if not osp.isdir(path):
-            return []
-        return [{ 'url': path, 'format': Market1501Extractor.NAME }]
+        for dirname in os.listdir(path):
+            if dirname.startswith((Market1501Path.BBOX_DIR,
+                    Market1501Path.QUERY_DIR, Market1501Path.LIST_PREFIX)):
+                return [{'url': path, 'format': Market1501Extractor.NAME}]
 
 class Market1501Converter(Converter):
     DEFAULT_IMAGE_EXT = Market1501Path.IMAGE_EXT
 
+    def _make_dir_name(self, item):
+        dirname = Market1501Path.BBOX_DIR + item.subset
+        query = item.attributes.get('query')
+        if query is not None and isinstance(query, str):
+            query = strtobool(query)
+        if query:
+            dirname = Market1501Path.QUERY_DIR
+        return dirname
+
     def apply(self):
         for subset_name, subset in self._extractor.subsets().items():
             annotation = ''
+            used_frames = {}
 
             for item in subset:
+                dirname = self._make_dir_name(item)
+
                 image_name = item.id
-                if Market1501Path.PATTERN.search(image_name) is None:
-                    if 'person_id' in item.attributes and \
-                            'camera_id' in item.attributes:
-                        image_pattern = '{:04d}_c{}s1_000000_00{}'
-                        pid = int(item.attributes['person_id'])
-                        camid = int(item.attributes['camera_id']) + 1
-                        dirname, basename = osp.split(item.id)
-                        image_name = osp.join(dirname,
-                            image_pattern.format(pid, camid, basename))
-
-                dirname = Market1501Path.BBOX_DIR + subset_name
-                if 'query' in item.attributes:
-                    query = item.attributes.get('query')
-                    if isinstance(query, str):
-                        query = strtobool(query)
-                    if query:
-                        dirname = Market1501Path.QUERY_DIR
+                pid = item.attributes.get('person_id')
+                match = Market1501Path.PATTERN.fullmatch(item.id)
+                if not match and pid is not None:
+                    cid = int(item.attributes.get('camera_id', 0)) + 1
+                    tid = int(item.attributes.get('track_id', 1))
+                    bbid = int(item.attributes.get('bbox_id', 0))
+                    fid = int(item.attributes.get('frame_id',
+                        max(used_frames.get((pid, cid, tid), [-1])) + 1))
+                    image_name = f'{pid}_c{cid}s{tid}_{fid:06d}_{bbid:02d}'
 
                 image_path = self._make_image_filename(item,
                     name=image_name, subdir=dirname)
                 if self._save_images and item.has_image:
                     self._save_image(item, osp.join(self._save_dir, image_path))
 
+                attrs = Market1501Path.PATTERN.search(image_name)
+                if attrs:
+                    attrs = attrs.groups()
+                    used_frames.setdefault(attrs[0:2], []).append(int(attrs[3]))
                 annotation += '%s\n' % image_path
 
             annotation_file = osp.join(self._save_dir,
diff --git a/datumaro/plugins/mars_format.py b/datumaro/plugins/mars_format.py
index 6029c0f108..2a77d85d79 100644
--- a/datumaro/plugins/mars_format.py
+++ b/datumaro/plugins/mars_format.py
@@ -7,7 +7,7 @@
 import os
 import os.path as osp
 
-from datumaro.components.annotation import AnnotationType, Bbox, LabelCategories
+from datumaro.components.annotation import AnnotationType, Bbox, LabelCategories, Label
 from datumaro.components.dataset import DatasetItem
 from datumaro.components.extractor import Extractor, Importer
 from datumaro.components.format_detection import FormatDetectionContext
@@ -61,13 +61,12 @@ def _load_items(self, subset, path):
             label_id = self._categories[AnnotationType.label].find(label)[0]
             for image_path in find_images(osp.join(path, label)):
                 image_name = osp.basename(image_path)
+                item_id = osp.splitext(image_name)[0]
                 pedestrian_id = image_name[0:4]
 
                 if not fnmatch.fnmatch(image_name,
                         label + MarsPath.IMAGE_NAME_POSTFIX):
-                    log.warning(f'The image {image_path} will be skipped '
-                        'because it has incorrect name. See the docs to get '
-                        'more information')
+                    items.append(DatasetItem(id=item_id, image=image_path))
                     continue
 
                 if pedestrian_id != label:
@@ -76,14 +75,12 @@ def _load_items(self, subset, path):
                         f'the directory name: {label}')
                     continue
 
-                image = Image(path=image_path)
-                width, height = image.size
-                items.append(DatasetItem(id=osp.splitext(image_name)[0],
-                    image=image, subset=subset,
-                    annotations=[Bbox(0, 0, width, height, label=label_id)],
-                    attributes={'camera_id': int(image_name[5]),
-                            'track_id': int(image_name[7:11]),
-                            'frame_id': int(image_name[12:15])
+                items.append(DatasetItem(id=item_id, image=image_path,
+                    subset=subset, annotations=[Label(label=label_id)],
+                    attributes={'person_id': pedestrian_id,
+                        'camera_id': int(image_name[5]),
+                        'track_id': int(image_name[7:11]),
+                        'frame_id': int(image_name[12:15])
                     })
                 )
 

From 165c0bba1625de7fff8e510421cd7e924d9711a0 Mon Sep 17 00:00:00 2001
From: "kirill.sizov" <kirill.sizov@intel.com>
Date: Mon, 27 Dec 2021 13:16:05 +0300
Subject: [PATCH 13/23] Update tests

---
 tests/test_market1501_format.py | 117 ++++++++++++--------------------
 1 file changed, 42 insertions(+), 75 deletions(-)

diff --git a/tests/test_market1501_format.py b/tests/test_market1501_format.py
index e88ae5e4e3..66a4784cb2 100644
--- a/tests/test_market1501_format.py
+++ b/tests/test_market1501_format.py
@@ -20,28 +20,19 @@ class Market1501FormatTest(TestCase):
     def test_can_save_and_load(self):
         source_dataset = Dataset.from_iterable([
             DatasetItem(id='0001_c2s3_000001_00',
-                subset='test', image=np.ones((2, 5, 3)),
-                attributes = {
-                    'camera_id': 1,
-                    'person_id': 1,
-                    'query': True
-                }
+                subset='query', image=np.ones((2, 5, 3)),
+                attributes = {'camera_id': 1, 'person_id': '0001', 'track_id': 3,
+                    'frame_id': 1, 'bbox_id': 0, 'query': True}
             ),
             DatasetItem(id='0002_c4s2_000002_00',
                 subset='test', image=np.ones((2, 5, 3)),
-                attributes = {
-                    'camera_id': 3,
-                    'person_id': 2,
-                    'query': False
-                }
+                attributes = {'camera_id': 3, 'person_id': '0002', 'track_id': 2,
+                    'frame_id': 2, 'bbox_id': 0, 'query': False}
             ),
             DatasetItem(id='0001_c1s1_000003_00',
                 subset='test', image=np.ones((2, 5, 3)),
-                attributes = {
-                    'camera_id': 0,
-                    'person_id': 1,
-                    'query': False
-                }
+                attributes = {'camera_id': 0, 'person_id': '0001', 'track_id': 1,
+                    'frame_id': 3, 'bbox_id': 0, 'query': False}
             ),
         ])
 
@@ -56,11 +47,8 @@ def test_can_save_dataset_with_no_subsets(self):
         source_dataset = Dataset.from_iterable([
             DatasetItem(id='0001_c2s3_000001_00',
                 image=np.ones((2, 5, 3)),
-                attributes = {
-                    'camera_id': 1,
-                    'person_id': 1,
-                    'query': True
-                }
+                attributes = {'camera_id': 1, 'person_id': '0001', 'track_id': 3,
+                    'frame_id': 1, 'bbox_id': 0, 'query': False}
             ),
         ])
 
@@ -75,11 +63,15 @@ def test_can_save_dataset_with_cyrillic_and_spaces_in_filename(self):
         source_dataset = Dataset.from_iterable([
             DatasetItem(id='кириллица с пробелом',
                 image=np.ones((2, 5, 3)),
-                attributes = {
-                    'camera_id': 1,
-                    'person_id': 1,
-                    'query': True
-                }
+                attributes = {'camera_id': 0, 'person_id': '0001', 'query': False}
+            ),
+        ])
+
+        expected_dataset = Dataset.from_iterable([
+            DatasetItem(id='0001_c1s1_000000_00',
+                image=np.ones((2, 5, 3)),
+                attributes = {'camera_id': 0, 'person_id': '0001', 'track_id': 1,
+                    'frame_id': 0, 'bbox_id': 0, 'query': False}
             ),
         ])
 
@@ -87,27 +79,16 @@ def test_can_save_dataset_with_cyrillic_and_spaces_in_filename(self):
             Market1501Converter.convert(source_dataset, test_dir, save_images=True)
             parsed_dataset = Dataset.import_from(test_dir, 'market1501')
 
-            compare_datasets(self, source_dataset, parsed_dataset,
+            compare_datasets(self, expected_dataset, parsed_dataset,
                 require_images=True)
 
     @mark_requirement(Requirements.DATUM_GENERAL_REQ)
     def test_can_save_dataset_with_no_save_images(self):
         source_dataset = Dataset.from_iterable([
             DatasetItem(id='0001_c2s3_000001_00',
-                subset='test', image=np.ones((2, 5, 3)),
-                attributes = {
-                    'camera_id': 1,
-                    'person_id': 1,
-                    'query': True
-                }
-            ),
-            DatasetItem(id='test1',
-                subset='test', image=np.ones((2, 5, 3)),
-                attributes = {
-                    'camera_id': 1,
-                    'person_id': 2,
-                    'query': False
-                }
+                subset='query', image=np.ones((2, 5, 3)),
+                attributes = {'camera_id': 1, 'person_id': '0001', 'track_id': 3,
+                    'frame_id': 1, 'bbox_id': 0, 'query': True}
             ),
         ])
 
@@ -120,20 +101,16 @@ def test_can_save_dataset_with_no_save_images(self):
     @mark_requirement(Requirements.DATUM_GENERAL_REQ)
     def test_can_save_and_load_image_with_arbitrary_extension(self):
         expected = Dataset.from_iterable([
-            DatasetItem(id='q/1', image=Image(
-                    path='q/1.JPEG', data=np.zeros((4, 3, 3))),
-                attributes={
-                    'camera_id': 1,
-                    'person_id': 1,
-                    'query': False
-                }),
-            DatasetItem(id='a/b/c/2', image=Image(
-                    path='a/b/c/2.bmp', data=np.zeros((3, 4, 3))),
-                attributes={
-                    'camera_id': 1,
-                    'person_id': 2,
-                    'query': True
-                }),
+            DatasetItem(id='0001_c1s1_0000_00', image=Image(
+                    path='0001_c1s1_0000_00.JPEG', data=np.zeros((4, 3, 3))),
+                attributes={'camera_id': 0, 'person_id': '0001', 'track_id': 1,
+                    'frame_id': 0, 'bbox_id': 0, 'query': False}
+            ),
+            DatasetItem(id='0002_c2s2_0001_00', image=Image(
+                    path='0002_c2s2_0001_00.bmp', data=np.zeros((3, 4, 3))),
+                attributes={'camera_id': 1, 'person_id': '0002', 'track_id': 2,
+                    'frame_id': 1, 'bbox_id': 0, 'query': False}
+            ),
         ])
 
         with TestDir() as test_dir:
@@ -149,14 +126,6 @@ def test_can_save_dataset_with_no_attributes(self):
             DatasetItem(id='test1',
                 subset='test', image=np.ones((2, 5, 3)),
             ),
-            DatasetItem(id='test2',
-                subset='test', image=np.ones((2, 5, 3)),
-                attributes={
-                    'camera_id': 1,
-                    'person_id': -1,
-                    'query': True
-                }
-            ),
         ])
 
         with TestDir() as test_dir:
@@ -177,23 +146,21 @@ def test_can_detect(self):
     def test_can_import(self):
         expected_dataset = Dataset.from_iterable([
             DatasetItem(id='0001_c2s3_000111_00',
-                subset='test', image=np.ones((2, 5, 3)),
-                attributes = {
-                    'camera_id': 1,
-                    'person_id': 1,
-                    'query': True
-                }
+                subset='query', image=np.ones((2, 5, 3)),
+                attributes = {'camera_id': 1, 'person_id': '0001', 'track_id': 3,
+                    'frame_id': 111, 'bbox_id': 0, 'query': True}
             ),
             DatasetItem(id='0001_c1s1_001051_00',
                 subset='test', image=np.ones((2, 5, 3)),
-                attributes = {
-                    'camera_id': 0,
-                    'person_id': 1,
-                    'query': False
-                }
+                attributes = {'camera_id': 0, 'person_id': '0001', 'track_id': 1,
+                    'frame_id': 1051, 'bbox_id': 0, 'query': False}
+            ),
+            DatasetItem(id='0002_c1s3_000151_00',
+                subset='train', image=np.ones((2, 5, 3)),
+                attributes = {'camera_id': 0, 'person_id': '0002', 'track_id': 3,
+                    'frame_id': 151, 'bbox_id': 0, 'query': False}
             ),
         ])
-
         dataset = Dataset.import_from(DUMMY_DATASET_DIR, 'market1501')
 
         compare_datasets(self, expected_dataset, dataset)

From 7f852c1e4711ada0510553303a636545fb0445e8 Mon Sep 17 00:00:00 2001
From: "kirill.sizov" <kirill.sizov@intel.com>
Date: Mon, 27 Dec 2021 13:17:44 +0300
Subject: [PATCH 14/23] Sort imports

---
 datumaro/plugins/mars_format.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/datumaro/plugins/mars_format.py b/datumaro/plugins/mars_format.py
index 2a77d85d79..1822133808 100644
--- a/datumaro/plugins/mars_format.py
+++ b/datumaro/plugins/mars_format.py
@@ -7,11 +7,12 @@
 import os
 import os.path as osp
 
-from datumaro.components.annotation import AnnotationType, Bbox, LabelCategories, Label
+from datumaro.components.annotation import (
+    AnnotationType, Label, LabelCategories,
+)
 from datumaro.components.dataset import DatasetItem
 from datumaro.components.extractor import Extractor, Importer
 from datumaro.components.format_detection import FormatDetectionContext
-from datumaro.components.media import Image
 from datumaro.util.image import find_images
 
 
From 57bfb99902774a83b46c873a9715d87112c56f5e Mon Sep 17 00:00:00 2001
From: "kirill.sizov" <kirill.sizov@intel.com>
Date: Mon, 27 Dec 2021 13:17:56 +0300
Subject: [PATCH 15/23] Update docs

---
 site/content/en/docs/formats/mars.md | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/site/content/en/docs/formats/mars.md b/site/content/en/docs/formats/mars.md
index e94830f4b0..03f1c917a9 100644
--- a/site/content/en/docs/formats/mars.md
+++ b/site/content/en/docs/formats/mars.md
@@ -7,8 +7,7 @@ weight: 14
 
 ## Format specification
 
-MARS is a dataset for the motion analysis and person identification task,
-and this dataset it's extension of Market-1501 dataset format.
+MARS is a dataset for the motion analysis and person identification task.
 MARS dataset is available for downloading
 [here](https://zheng-lab.cecs.anu.edu.au/Project/project_mars.html)
 

From 9242f85d49f5525cc3466719f383486609bf0e31 Mon Sep 17 00:00:00 2001
From: "kirill.sizov" <kirill.sizov@intel.com>
Date: Mon, 27 Dec 2021 13:22:52 +0300
Subject: [PATCH 16/23] Update tests for MARS format

---
 tests/test_mars_format.py | 17 ++++++++++-------
 1 file changed, 10 insertions(+), 7 deletions(-)

diff --git a/tests/test_mars_format.py b/tests/test_mars_format.py
index 7445e0eda4..8f04dff843 100644
--- a/tests/test_mars_format.py
+++ b/tests/test_mars_format.py
@@ -7,7 +7,7 @@
 
 import numpy as np
 
-from datumaro.components.annotation import Bbox
+from datumaro.components.annotation import Label
 from datumaro.components.dataset import Dataset, DatasetItem
 from datumaro.components.environment import Environment
 from datumaro.plugins.mars_format import MarsImporter
@@ -23,16 +23,19 @@ class MarsImporterTest(TestCase):
     def test_can_import(self):
         expected_dataset = Dataset.from_iterable([
             DatasetItem(id='0001C1T0001F001', image=np.ones((10, 10, 3)),
-                subset='train', annotations=[Bbox(0, 0, 10, 10, label=2)],
-                attributes={'camera_id': 1, 'track_id': 1, 'frame_id': 1}
+                subset='train', annotations=[Label(label=2)],
+                attributes={'person_id': '0001', 'camera_id': 1, 'track_id': 1,
+                    'frame_id': 1}
             ),
             DatasetItem(id='0000C6T0101F001', image=np.ones((10, 10, 3)),
-                subset='train', annotations=[Bbox(0, 0, 10, 10, label=1)],
-                attributes={'camera_id': 6, 'track_id': 101, 'frame_id': 1}
+                subset='train', annotations=[Label(label=1)],
+                attributes={'person_id': '0000', 'camera_id': 6, 'track_id': 101,
+                    'frame_id': 1}
             ),
             DatasetItem(id='00-1C2T0081F201', image=np.ones((10, 10, 3)),
-                subset='test', annotations=[Bbox(0, 0, 10, 10, label=0)],
-                attributes={'camera_id': 2, 'track_id': 81, 'frame_id': 201}
+                subset='test', annotations=[Label(label=0)],
+                attributes={'person_id': '00-1', 'camera_id': 2, 'track_id': 81,
+                    'frame_id': 201}
             ),
         ], categories=['00-1', '0000', '0001'])
 

From 993204b3c70d23605c99e06bc59942bbdf1dab38 Mon Sep 17 00:00:00 2001
From: "kirill.sizov" <kirill.sizov@intel.com>
Date: Mon, 27 Dec 2021 13:23:06 +0300
Subject: [PATCH 17/23] Sort imports

---
 datumaro/plugins/market1501_format.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/datumaro/plugins/market1501_format.py b/datumaro/plugins/market1501_format.py
index aba1b01ba8..93b7597239 100644
--- a/datumaro/plugins/market1501_format.py
+++ b/datumaro/plugins/market1501_format.py
@@ -8,7 +8,7 @@
 import re
 
 from datumaro.components.converter import Converter
-from datumaro.components.extractor import DatasetItem, Importer, Extractor
+from datumaro.components.extractor import DatasetItem, Extractor, Importer
 from datumaro.util.image import find_images
 
 
From 6d3386cfcd3cd78621f4a9a378d7806d765e3451 Mon Sep 17 00:00:00 2001
From: "kirill.sizov" <kirill.sizov@intel.com>
Date: Mon, 27 Dec 2021 13:45:34 +0300
Subject: [PATCH 18/23] Update assets

---
 .../bounding_box_train/0002_c1s3_000151_00.jpg     | Bin 0 -> 75 bytes
 1 file changed, 0 insertions(+), 0 deletions(-)
 create mode 100644 tests/assets/market1501_dataset/bounding_box_train/0002_c1s3_000151_00.jpg

diff --git a/tests/assets/market1501_dataset/bounding_box_train/0002_c1s3_000151_00.jpg b/tests/assets/market1501_dataset/bounding_box_train/0002_c1s3_000151_00.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..92e4057cfb4a734f2a6e2c788dbc03f8b2428916
GIT binary patch
literal 75
zcmeAS@N?(olHy`uVBq!ia0vp^tU%1f!2~4bIT}F{LY^*;Asn*FEKF=F2RK-GJb+?X
W7&w)0-_--M7(8A5T-G@yGywpArwoMv

literal 0
HcmV?d00001


From 0225b9d4c5575ea305bcf792cc6256d77cb27642 Mon Sep 17 00:00:00 2001
From: "kirill.sizov" <kirill.sizov@intel.com>
Date: Mon, 27 Dec 2021 16:05:26 +0300
Subject: [PATCH 19/23] Not allow empty person id; support relative path

---
 datumaro/plugins/market1501_format.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/datumaro/plugins/market1501_format.py b/datumaro/plugins/market1501_format.py
index 93b7597239..c4bd141c36 100644
--- a/datumaro/plugins/market1501_format.py
+++ b/datumaro/plugins/market1501_format.py
@@ -128,13 +128,14 @@ def apply(self):
                 image_name = item.id
                 pid = item.attributes.get('person_id')
                 match = Market1501Path.PATTERN.fullmatch(item.id)
-                if not match and pid is not None:
+                if not match and pid:
                     cid = int(item.attributes.get('camera_id', 0)) + 1
                     tid = int(item.attributes.get('track_id', 1))
                     bbid = int(item.attributes.get('bbox_id', 0))
                     fid = int(item.attributes.get('frame_id',
                         max(used_frames.get((pid, cid, tid), [-1])) + 1))
-                    image_name = f'{pid}_c{cid}s{tid}_{fid:06d}_{bbid:02d}'
+                    image_name = osp.join(osp.dirname(image_name),
+                        f'{pid}_c{cid}s{tid}_{fid:06d}_{bbid:02d}')
 
                 image_path = self._make_image_filename(item,
                     name=image_name, subdir=dirname)

From 97951016bf5c8cb837bfc27feef073cb5f0ffa90 Mon Sep 17 00:00:00 2001
From: "kirill.sizov" <kirill.sizov@intel.com>
Date: Mon, 27 Dec 2021 16:05:47 +0300
Subject: [PATCH 20/23] Update tests

---
 tests/test_market1501_format.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/tests/test_market1501_format.py b/tests/test_market1501_format.py
index 66a4784cb2..d2751d54a1 100644
--- a/tests/test_market1501_format.py
+++ b/tests/test_market1501_format.py
@@ -101,13 +101,13 @@ def test_can_save_dataset_with_no_save_images(self):
     @mark_requirement(Requirements.DATUM_GENERAL_REQ)
     def test_can_save_and_load_image_with_arbitrary_extension(self):
         expected = Dataset.from_iterable([
-            DatasetItem(id='0001_c1s1_0000_00', image=Image(
-                    path='0001_c1s1_0000_00.JPEG', data=np.zeros((4, 3, 3))),
+            DatasetItem(id='c/0001_c1s1_000000_00', image=Image(
+                    path='c/0001_c1s1_0000_00.JPEG', data=np.zeros((4, 3, 3))),
                 attributes={'camera_id': 0, 'person_id': '0001', 'track_id': 1,
                     'frame_id': 0, 'bbox_id': 0, 'query': False}
             ),
-            DatasetItem(id='0002_c2s2_0001_00', image=Image(
-                    path='0002_c2s2_0001_00.bmp', data=np.zeros((3, 4, 3))),
+            DatasetItem(id='a/b/0002_c2s2_000001_00', image=Image(
+                    path='a/b/0002_c2s2_0001_00.bmp', data=np.zeros((3, 4, 3))),
                 attributes={'camera_id': 1, 'person_id': '0002', 'track_id': 2,
                     'frame_id': 1, 'bbox_id': 0, 'query': False}
             ),

From a48a6268dd96c668a605bc9d1e5c46dce4ecce94 Mon Sep 17 00:00:00 2001
From: "kirill.sizov" <kirill.sizov@intel.com>
Date: Tue, 28 Dec 2021 12:23:32 +0300
Subject: [PATCH 21/23] Docs: add info about person_id attribute

---
 site/content/en/docs/formats/mars.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/site/content/en/docs/formats/mars.md b/site/content/en/docs/formats/mars.md
index 03f1c917a9..4b0e171edc 100644
--- a/site/content/en/docs/formats/mars.md
+++ b/site/content/en/docs/formats/mars.md
@@ -15,6 +15,7 @@ Supported types of annotations:
 - `Bbox`
 
 Required attributes:
+- `person_id` (str): four-digit number that represent ID of pedestrian;
 - `camera_id` (int): one-digit number that represent ID of camera that took
   the image (original dataset has totally 6 cameras);
 - `track_id` (int): four-digit number that represent ID of the track with

From 5050bbdde60ff4b94d693a8f108b77b737369369 Mon Sep 17 00:00:00 2001
From: "kirill.sizov" <kirill.sizov@intel.com>
Date: Tue, 28 Dec 2021 12:51:17 +0300
Subject: [PATCH 22/23] Add docs for Market-1501

---
 site/content/en/docs/formats/market1501.md | 103 +++++++++++++++++++++
 1 file changed, 103 insertions(+)
 create mode 100644 site/content/en/docs/formats/market1501.md

diff --git a/site/content/en/docs/formats/market1501.md b/site/content/en/docs/formats/market1501.md
new file mode 100644
index 0000000000..f1c3e14c37
--- /dev/null
+++ b/site/content/en/docs/formats/market1501.md
@@ -0,0 +1,103 @@
+---
+title: 'Market-1501'
+linkTitle: 'Market-1501'
+description: ''
+weight: 14
+---
+
+## Format specification
+
+Market-1501 is a dataset for person re-identification task, link
+for downloading this dataset is available
+[here](https://zheng-lab.cecs.anu.edu.au/Project/project_reid.html).
+
+Supported items attributes:
+- `person_id` (str): four-digit number that represent ID of pedestrian;
+- `camera_id` (int): one-digit number that represent ID of camera that took
+  the image (original dataset has totally 6 cameras);
+- `track_id` (int): one-digit number that represent ID of the track with
+  the particular pedestrian, this attribute matches with `sequence_id`
+  in the original dataset;
+- `frame_id` (int): six-digit number, that mean number of
+  frame within this track. For the tracks, their names are accumulated
+  for each ID, but for frames, they start from "0001" in each track;
+- `bbox_id` (int): two-digit number, that mean number of
+  bounding bbox that was selected for that image
+  (see the
+  [original docs](https://zheng-lab.cecs.anu.edu.au/Project/project_reid.html)
+  for more info).
+
+These item attributes decodes into the image name with such convention:
+```
+0000_c1s1_000000_00.jpg
+```
+- first four digits indicate the `person_id`;
+- digit after `c` indicates the `camera_id`;
+- digit after `s` indicate the `track_id`;
+- six digits after `s1_` indicate the `frame_id`;
+- the last two digits before `.jpg` indicate the `bbox_id`.
+
+## Import Market-1501 dataset
+
+Importing of Market-1501 dataset into the Datumaro project:
+```
+datum create
+datum import -f market1501 <path_to_market1501>
+```
+See more information about adding datasets to the project in the
+[docs](/docs/user-manual/command-reference/sources/#source-add).
+
+Or you can import Market-1501 using Python API:
+
+```python
+from datumaro.components.dataset import Dataset
+dataset = Dataset.import_from('<path_to_dataset>', 'market1501')
+```
+
+
+For successful importing the Market-1501 dataset, the directory with it
+should has the following structure:
+
+```
+market1501_dataset/
+├── query # optional directory with query image
+│   ├── 0001_c1s1_001051_00.jpg
+│   ├── 0002_c1s1_001051_00.jpg
+│   ├── ...
+├── bounding_box_<subset_name1>
+│   ├── 0003_c1s1_001051_00.jpg
+│   ├── 0003_c2s1_001054_01.jpg
+│   ├── 0004_c1s1_001051_00.jpg
+│   ├── ...
+├── bounding_box_<subset_name2>
+│   ├── 0005_c1s1_001051_00.jpg
+│   ├── 0006_c1s1_001051_00.jpg
+│   ├── ...
+├── ...
+```
+
+## Export dataset to the Market-1501 format
+
+With Datumaro you can export dataset, that has `person_id` item attribute,
+to the Market-1501 format, example:
+
+```
+# Converting MARS dataset into the Market-1501
+datum convert -if mars -i ./mars_dataset \
+    -f market1501 -o ./output_dir
+
+# Export dataaset to the Market-1501 format through the Datumaro project:
+datum create
+datum add -f mars ../mars
+datum export -f market1501 -o ./output_dir -- --save-images --image-ext png
+```
+
+> Note: if your dataset contains only person_id attributes Datumaro
+> will assign default values for other attributes (camera_id, track_id, bbox_id)
+> and increment frame_id for collisions.
+
+Available extra export options for Market-1501 dataset format:
+- `--save-images` allow to export dataset with saving images.
+  (by default `False`)
+- `--image-ext IMAGE_EXT` allow to specify image extension
+  for exporting dataset (by default - keep original)

From c73bc1c677778629f8881fe12cd463ce4bc78fba Mon Sep 17 00:00:00 2001
From: "kirill.sizov" <kirill.sizov@intel.com>
Date: Tue, 28 Dec 2021 13:27:56 +0300
Subject: [PATCH 23/23] Update CHANGELOG

---
 CHANGELOG.md | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 7a2c0d16aa..1d5ccb48e1 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -12,6 +12,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
   (<https://github.com/openvinotoolkit/datumaro/pull/582>)
 - Extension autodetection in `ByteImage`
   (<https://github.com/openvinotoolkit/datumaro/pull/595>)
+- Add MARS format (import only)
+  (<https://github.com/openvinotoolkit/datumaro/pull/585>)
 
 ### Changed
 - `smooth_line` from `datumaro.util.annotation_util` - the function