Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add validation item for instance segmentation #227

Merged
merged 4 commits into from
Apr 29, 2021
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 10 additions & 5 deletions datumaro/cli/contexts/project/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
import os
import os.path as osp
import shutil
import numpy as np
from enum import Enum

from datumaro.components.dataset_filter import DatasetItemEncoder
Expand Down Expand Up @@ -815,23 +816,27 @@ def validate_command(args):
project = load_project(args.project_dir)
task_type = args.task_type
subset_name = args.subset_name
dst_file_name = 'validation_results'
dst_file_name = f'validation_results-{task_type}'

dataset = project.make_dataset()
if subset_name is not None:
dataset = dataset.get_subset(subset_name)
dst_file_name += f'-{subset_name}'
validation_results = validate_annotations(dataset, task_type)

def _convert_tuple_keys_to_str(d):
def _make_serializable(d):
for key, val in list(d.items()):
# tuple key to str
if isinstance(key, tuple):
d[str(key)] = val
d.pop(key)
if isinstance(val, dict):
_convert_tuple_keys_to_str(val)

_convert_tuple_keys_to_str(validation_results)
if isinstance(val, np.uint32): # uint32 to str
d[key] = str(val)
elif isinstance(val, dict):
_make_serializable(val)

_make_serializable(validation_results)

dst_file = generate_next_file_name(dst_file_name, ext='.json')
log.info("Writing project validation results to '%s'" % dst_file)
Expand Down
35 changes: 16 additions & 19 deletions datumaro/components/errors.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,10 +119,13 @@ def __str__(self):
return "Metadata (ex. LabelCategories) should be defined" \
" to validate a dataset."


@attrs
class MissingLabelAnnotation(DatasetItemValidationError):
class MissingAnnotation(DatasetItemValidationError):
ann_type = attrib()
def __str__(self):
return "Item needs a label, but not found."
return f"Item needs '{self.ann_type}' annotation(s), " \
"but not found."

@attrs
class MultiLabelAnnotations(DatasetItemValidationError):
Expand Down Expand Up @@ -228,40 +231,34 @@ def __str__(self):
f" '{self. attr_name}' for the label '{self.label_name}'."

@attrs
class ImbalancedBboxDistInLabel(DatasetValidationError):
class ImbalancedDistInLabel(DatasetValidationError):
label_name = attrib()
prop = attrib()

def __str__(self):
return f"Values of bbox '{self.prop}' are not evenly " \
return f"Values of '{self.prop}' are not evenly " \
f"distributed for '{self.label_name}' label."

@attrs
class ImbalancedBboxDistInAttribute(DatasetValidationError):
class ImbalancedDistInAttribute(DatasetValidationError):
label_name = attrib()
attr_name = attrib()
attr_value = attrib()
prop = attrib()

def __str__(self):
return f"Values of bbox '{self.prop}' are not evenly " \
return f"Values of '{self.prop}' are not evenly " \
f"distributed for '{self.attr_name}' = '{self.attr_value}' for " \
f"the '{self.label_name}' label."

@attrs
class MissingBboxAnnotation(DatasetItemValidationError):
def __str__(self):
return 'Item needs one or more bounding box annotations, ' \
'but not found.'

@attrs
class NegativeLength(DatasetItemValidationError):
ann_id = attrib()
prop = attrib()
val = attrib()

def __str__(self):
return f"Bounding box annotation '{self.ann_id}' in " \
return f"Annotation '{self.ann_id}' in " \
"the item should have a positive value of " \
f"'{self.prop}' but got '{self.val}'."

Expand All @@ -271,9 +268,9 @@ class InvalidValue(DatasetItemValidationError):
prop = attrib()

def __str__(self):
return f"Bounding box annotation '{self.ann_id}' in " \
return f"Annotation '{self.ann_id}' in " \
'the item has an inf or a NaN value of ' \
f"bounding box '{self.prop}'."
f"'{self.prop}'."

@attrs
class FarFromLabelMean(DatasetItemValidationError):
Expand All @@ -284,8 +281,8 @@ class FarFromLabelMean(DatasetItemValidationError):
val = attrib()

def __str__(self):
return f"Bounding box annotation '{self.ann_id}' in " \
f"the item has a value of bounding box '{self.prop}' that " \
return f"Annotation '{self.ann_id}' in " \
f"the item has a value of '{self.prop}' that " \
"is too far from the label average. (mean of " \
f"'{self.label_name}' label: {self.mean}, got '{self.val}')."

Expand All @@ -300,8 +297,8 @@ class FarFromAttrMean(DatasetItemValidationError):
val = attrib()

def __str__(self):
return f"Bounding box annotation '{self.ann_id}' in the " \
f"item has a value of bounding box '{self.prop}' that " \
return f"Annotation '{self.ann_id}' in the " \
f"item has a value of '{self.prop}' that " \
"is too far from the attribute average. (mean of " \
f"'{self.attr_name}' = '{self.attr_value}' for the " \
f"'{self.label_name}' label: {self.mean}, got '{self.val}')."
Loading