Skip to content

Commit

Permalink
reidentification to reid and align 80 char
Browse files Browse the repository at this point in the history
  • Loading branch information
harimkang committed Apr 21, 2021
1 parent 10d7f9d commit 1d45ec6
Show file tree
Hide file tree
Showing 3 changed files with 15 additions and 14 deletions.
17 changes: 9 additions & 8 deletions datumaro/plugins/splitter.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
NEAR_ZERO = 1e-7

SplitTask = Enum(
"split", ["classification", "detection", "segmentation", "reidentification"]
"split", ["classification", "detection", "segmentation", "reid"]
)


Expand All @@ -30,8 +30,8 @@ class Split(Transform, CliPlugin):
Each image can have multiple object annotations -
(bbox, mask, polygon). Since an image shouldn't be included
in multiple subsets at the same time, and image annotations
shoudln't be split, in general, dataset annotations are unlikely to be split
exactly in the specified ratio. |n
shoudln't be split, in general, dataset annotations are unlikely
to be split exactly in the specified ratio. |n
This split tries to split dataset images as close as possible
to the specified ratio, keeping the initial class distribution.|n
|n
Expand All @@ -50,19 +50,20 @@ class Split(Transform, CliPlugin):
|n
Notes:|n
- Each image is expected to have only one Annotation. Unlabeled or
multi-labeled images will be split into subsets randomly(or 'not-supported' in reidentification). |n
multi-labeled images will be split into subsets randomly. |n
- If Labels also have attributes, also splits by attribute values.|n
- If there is not enough images in some class or attributes group,
the split ratio can't be guaranteed.|n
- Object ID can be described by Label, or by attribute (--attr parameter) in reidentification task|n
- The splits of the test set are controlled by '--query' parameter in reidentification task. |n
In reidentification task, |n
- Object ID can be described by Label, or by attribute (--attr parameter)|n
- The splits of the test set are controlled by '--query' parameter |n
|s|sGallery ratio would be 1.0 - query.|n
|n
Example:|n
|s|s%(prog)s -t classification --subset train:.5 --subset val:.2 --subset test:.3 |n
|s|s%(prog)s -t detection --subset train:.5 --subset val:.2 --subset test:.3 |n
|s|s%(prog)s -t segmentation --subset train:.5 --subset val:.2 --subset test:.3 |n
|s|s%(prog)s -t reidentification --subset train:.5 --subset val:.2 --subset test:.3 --query .5 |n
|s|s%(prog)s -t reid --subset train:.5 --subset val:.2 --subset test:.3 --query .5 |n
Example: use 'person_id' attribute for splitting|n
|s|s%(prog)s --attr person_id
"""
Expand Down Expand Up @@ -138,7 +139,7 @@ def _get_splitter(task, dataset, splits, seed, query, attr_for_id):
splitter = _InstanceSpecificSplit(
dataset=dataset, splits=splits, seed=seed, task=task
)
elif task == SplitTask.reidentification.name:
elif task == SplitTask.reid.name:
splitter = _ReidentificationSplit(
dataset=dataset,
splits=splits,
Expand Down
2 changes: 1 addition & 1 deletion docs/user_manual.md
Original file line number Diff line number Diff line change
Expand Up @@ -1048,7 +1048,7 @@ datum transform -t split -- \
-t segmentation --subset train:.5 --subset val:.2 --subset test:.3

datum transform -t split -- \
-t reidentification --subset train:.5 --subset val:.2 --subset test:.3 \
-t reid --subset train:.5 --subset val:.2 --subset test:.3 \
--query .5
```

Expand Down
10 changes: 5 additions & 5 deletions tests/test_splitter.py
Original file line number Diff line number Diff line change
Expand Up @@ -323,7 +323,7 @@ def _get_present(stat):
attr_for_id = None
source = self._generate_dataset(config)
splits = [("train", 0.5), ("val", 0.2), ("test", 0.3)]
task = splitter.SplitTask.reidentification.name
task = splitter.SplitTask.reid.name
query = 0.4 / 0.7
actual = splitter.Split(source, task, splits, query, attr_for_id)

Expand Down Expand Up @@ -384,7 +384,7 @@ def test_split_for_reidentification_randomseed(self):
counts[label] = count
config[label] = {"attrs": None, "counts": count}
source = self._generate_dataset(config)
task = splitter.SplitTask.reidentification.name
task = splitter.SplitTask.reid.name
splits = [("train", 0.5), ("test", 0.5)]
query = 0.4 / 0.7
r1 = splitter.Split(source, task, splits, query, seed=1234)
Expand All @@ -402,7 +402,7 @@ def test_split_for_reidentification_rebalance(self):
label = "label%03d" % i
config[label] = {"attrs": None, "counts": 7}
source = self._generate_dataset(config)
task = splitter.SplitTask.reidentification.name
task = splitter.SplitTask.reid.name
splits = [("train", 0.5), ("val", 0.2), ("test", 0.3)]
query = 0.4 / 0.7
actual = splitter.Split(source, task, splits, query)
Expand All @@ -414,7 +414,7 @@ def test_split_for_reidentification_rebalance(self):

def test_split_for_reidentification_unlabeled(self):
query = 0.5
task = splitter.SplitTask.reidentification.name
task = splitter.SplitTask.reid.name

with self.subTest("no label"):
iterable = [DatasetItem(i, annotations=[]) for i in range(10)]
Expand All @@ -434,7 +434,7 @@ def test_split_for_reidentification_unlabeled(self):

def test_split_for_reidentification_gives_error(self):
query = 0.4 / 0.7 # valid query ratio
task = splitter.SplitTask.reidentification.name
task = splitter.SplitTask.reid.name

counts = {i: (i % 3 + 1) * 7 for i in range(10)}
config = {"person": {"attrs": ["PID"], "counts": counts}}
Expand Down

0 comments on commit 1d45ec6

Please sign in to comment.