Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix dataset import for Datumaro format #4544

Merged
merged 22 commits into from
Nov 1, 2022
Merged
Show file tree
Hide file tree
Changes from 10 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -138,6 +138,7 @@ Skeleton (<https://github.com/cvat-ai/cvat/pull/1>), (<https://github.com/opencv
- Fix build dependencies for Siammask (<https://github.com/openvinotoolkit/cvat/pull/4486>)
- Bug: Exif orientation information handled incorrectly (<https://github.com/openvinotoolkit/cvat/pull/4529>)
- Fixed build of retinanet function image (<https://github.com/cvat-ai/cvat/pull/54>)
- Dataset import for Datumaro format (<https://github.com/opencv/cvat/pull/4544>)
SpecLad marked this conversation as resolved.
Show resolved Hide resolved
- Bug: Import dataset of Imagenet format fail (<https://github.com/opencv/cvat/issues/4850>)

## \[2.0.0] - 2022-03-04
Expand Down
5 changes: 4 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -132,14 +132,15 @@ For more information about the supported formats, look at the
| --------------------------------------------------------------------------------------------------------- | ------ | ------ |
| [CVAT for images](https://opencv.github.io/cvat/docs/manual/advanced/xml_format/#annotation) | ✔️ | ✔️ |
| [CVAT for a video](https://opencv.github.io/cvat/docs/manual/advanced/xml_format/#interpolation) | ✔️ | ✔️ |
| [Datumaro](https://github.com/cvat-ai/datumaro) | | ✔️ |
| [Datumaro](https://github.com/cvat-ai/datumaro) | ✔️ | ✔️ |
| [PASCAL VOC](http://host.robots.ox.ac.uk/pascal/VOC/) | ✔️ | ✔️ |
| Segmentation masks from [PASCAL VOC](http://host.robots.ox.ac.uk/pascal/VOC/) | ✔️ | ✔️ |
| [YOLO](https://pjreddie.com/darknet/yolo/) | ✔️ | ✔️ |
| [MS COCO Object Detection](http://cocodataset.org/#format-data) | ✔️ | ✔️ |
| [MS COCO Keypoints Detection](http://cocodataset.org/#format-data) | ✔️ | ✔️ |
| [TFrecord](https://www.tensorflow.org/tutorials/load_data/tfrecord) | ✔️ | ✔️ |
| [MOT](https://motchallenge.net/) | ✔️ | ✔️ |
| [MOTS PNG](https://www.vision.rwth-aachen.de/page/mots) | ✔️ | ✔️ |
| [LabelMe 3.0](http://labelme.csail.mit.edu/Release3.0) | ✔️ | ✔️ |
| [ImageNet](http://www.image-net.org) | ✔️ | ✔️ |
| [CamVid](http://mi.eng.cam.ac.uk/research/projects/VideoRec/CamVid/) | ✔️ | ✔️ |
Expand All @@ -150,7 +151,9 @@ For more information about the supported formats, look at the
| [Open Images V6](https://storage.googleapis.com/openimages/web/index.html) | ✔️ | ✔️ |
| [Cityscapes](https://www.cityscapes-dataset.com/login/) | ✔️ | ✔️ |
| [KITTI](http://www.cvlibs.net/datasets/kitti/) | ✔️ | ✔️ |
| [Kitti Raw Format](https://www.cvlibs.net/datasets/kitti/raw_data.php) | ✔️ | ✔️ |
| [LFW](http://vis-www.cs.umass.edu/lfw/) | ✔️ | ✔️ |
| [Sly Point Cloud Format](https://docs.supervise.ly/data-organization/00_ann_format_navi) | ✔️ | ✔️ |
yasakova-anastasia marked this conversation as resolved.
Show resolved Hide resolved

<!--lint enable maximum-line-length-->

Expand Down
8 changes: 6 additions & 2 deletions cvat/apps/dataset_manager/formats/datumaro.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,12 +36,14 @@ def _export(dst_file, instance_data, save_images=False):
make_zip_archive(tmp_dir, dst_file)

@importer(name="Datumaro", ext="ZIP", version="1.0")
def _import(src_file, instance_data):
def _import(src_file, instance_data, load_data_callback=None):
with TemporaryDirectory() as tmp_dir:
Archive(src_file.name).extractall(tmp_dir)

dataset = Dataset.import_from(tmp_dir, 'datumaro', env=dm_env)

if load_data_callback is not None:
load_data_callback(dataset, instance_data)
import_dm_annotations(dataset, instance_data)

@exporter(name="Datumaro 3D", ext="ZIP", version="1.0", dimension=DimensionType.DIM_3D)
Expand All @@ -58,10 +60,12 @@ def _export(dst_file, instance_data, save_images=False):
make_zip_archive(tmp_dir, dst_file)

@importer(name="Datumaro 3D", ext="ZIP", version="1.0", dimension=DimensionType.DIM_3D)
def _import(src_file, instance_data):
def _import(src_file, instance_data, load_data_callback=None):
with TemporaryDirectory() as tmp_dir:
Archive(src_file.name).extractall(tmp_dir)

dataset = Dataset.import_from(tmp_dir, 'datumaro', env=dm_env)

if load_data_callback is not None:
load_data_callback(dataset, instance_data)
import_dm_annotations(dataset, instance_data)
4 changes: 3 additions & 1 deletion cvat/apps/dataset_manager/formats/kitti.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ def _export(dst_file, instance_data, save_images=False):
make_zip_archive(tmp_dir, dst_file)

@importer(name='KITTI', ext='ZIP', version='1.0')
def _import(src_file, instance_data):
def _import(src_file, instance_data, load_data_callback=None):
with TemporaryDirectory() as tmp_dir:
Archive(src_file.name).extractall(tmp_dir)

Expand All @@ -51,4 +51,6 @@ def _import(src_file, instance_data):
filter_annotations=True)
dataset.transform('masks_to_polygons')

if load_data_callback is not None:
load_data_callback(dataset, instance_data)
import_dm_annotations(dataset, instance_data)
5 changes: 3 additions & 2 deletions cvat/apps/dataset_manager/formats/vggface2.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@

from datumaro.components.dataset import Dataset

from cvat.apps.dataset_manager.bindings import GetCVATDataExtractor, \
from cvat.apps.dataset_manager.bindings import GetCVATDataExtractor, TaskData, \
import_dm_annotations
from cvat.apps.dataset_manager.util import make_zip_archive

Expand All @@ -30,7 +30,8 @@ def _import(src_file, instance_data, load_data_callback=None):
zipfile.ZipFile(src_file).extractall(tmp_dir)

dataset = Dataset.import_from(tmp_dir, 'vgg_face2', env=dm_env)
dataset.transform('rename', regex=r"|([^/]+/)?(.+)|\2|")
if isinstance(instance_data, TaskData):
dataset.transform('rename', regex=r"|([^/]+/)?(.+)|\2|")
if load_data_callback is not None:
load_data_callback(dataset, instance_data)
import_dm_annotations(dataset, instance_data)
30 changes: 30 additions & 0 deletions cvat/apps/dataset_manager/tests/assets/tasks.json
Original file line number Diff line number Diff line change
Expand Up @@ -184,6 +184,36 @@
}
]
},
"KITTI 1.0": {
"name": "kitti task",
"overlap": 0,
"segment_size": 100,
"labels": [
{
"name": "car",
"color": "#2080c0",
"attributes": [
{
"name": "is_crowd",
"mutable": false,
"input_type": "checkbox",
"default_value": "false",
"values": ["false", "true"]
}
]
},
{
"name": "person",
"color": "#c06060",
"attributes": []
},
{
"name": "background",
"color": "#000000",
"attributes": []
}
]
},
"wrong_checkbox_value": {
"name": "wrong checkbox value task",
"overlap": 0,
Expand Down
86 changes: 66 additions & 20 deletions cvat/apps/dataset_manager/tests/test_rest_api_formats.py
Original file line number Diff line number Diff line change
Expand Up @@ -199,6 +199,11 @@ def _put_request_with_data(self, path, data, user):
response = self.client.put(path, data)
return response

def _post_request_with_data(self, path, data, user):
with ForceLogin(user, self.client):
response = self.client.post(path, data)
return response

def _delete_request(self, path, user):
with ForceLogin(user, self.client):
response = self.client.delete(path)
Expand Down Expand Up @@ -349,6 +354,9 @@ def _generate_url_dump_project_annotations(self, project_id, format_name):
def _generate_url_dump_project_dataset(self, project_id, format_name):
return f"/api/projects/{project_id}/dataset?format={format_name}"

def _generate_url_upload_project_dataset(self, project_id, format_name):
return f"/api/projects/{project_id}/dataset?format={format_name}"

def _remove_annotations(self, url, user):
response = self._delete_request(url, user)
self.assertEqual(response.status_code, status.HTTP_204_NO_CONTENT)
Expand Down Expand Up @@ -436,7 +444,6 @@ def test_api_v2_dump_and_upload_annotations_with_objects_type_is_shape(self):
if upload_format_name == "CVAT 1.1":
file_zip_name = osp.join(test_dir, f'{test_name}_admin_CVAT for images 1.1.zip')
else:

file_zip_name = osp.join(test_dir, f'{test_name}_admin_{upload_format_name}.zip')
if not upload_format.ENABLED or not osp.exists(file_zip_name):
continue
Expand Down Expand Up @@ -925,33 +932,32 @@ def test_api_v2_rewriting_annotations(self):
dump_formats = dm.views.get_export_formats()
with TestDir() as test_dir:
for dump_format in dump_formats:
if not dump_format.ENABLED:
if not dump_format.ENABLED or dump_format.DIMENSION == dm.bindings.DimensionType.DIM_3D:
continue
dump_format_name = dump_format.DISPLAY_NAME

with self.subTest(format=dump_format_name):
if dump_format_name in [
"MOTS PNG 1.0", # issue #2925 and changed points values
'Kitti Raw Format 1.0',
'Sly Point Cloud Format 1.0',
'Datumaro 3D 1.0',
"Cityscapes 1.0" # expanding annotations due to background mask
]:
self.skipTest("Format is fail")

images = self._generate_task_images(3)
if dump_format_name in [
"Market-1501 1.0", "Cityscapes 1.0", \
"Market-1501 1.0",
"ICDAR Localization 1.0", "ICDAR Recognition 1.0", \
"ICDAR Segmentation 1.0", "COCO Keypoints 1.0",
]:
task = self._create_task(tasks[dump_format_name], images)
else:
task = self._create_task(tasks["main"], images)
task_id = task["id"]

if dump_format_name in [
"MOT 1.1", "MOTS PNG 1.0",
"PASCAL VOC 1.1", "Segmentation mask 1.1",
"MOT 1.1", "PASCAL VOC 1.1", "Segmentation mask 1.1",
"TFRecord 1.0", "YOLO 1.1", "ImageNet 1.0",
"WiderFace 1.0", "VGGFace2 1.0", "Cityscapes 1.0",
"WiderFace 1.0", "VGGFace2 1.0",
"Datumaro 1.0", "Open Images V6 1.0", "KITTI 1.0"
]:
self._create_annotations(task, dump_format_name, "default")
Expand Down Expand Up @@ -981,6 +987,7 @@ def test_api_v2_rewriting_annotations(self):

with open(file_zip_name, 'rb') as binary_file:
self._upload_file(url, binary_file, self.admin)

task_ann = TaskAnnotation(task_id)
task_ann.init_from_db()
task_ann_data = task_ann.data
Expand Down Expand Up @@ -1209,10 +1216,11 @@ def test_api_v2_check_attribute_import_in_tracks(self):
data_from_task_after_upload = self._get_data_from_task(task_id, include_images)
compare_datasets(self, data_from_task_before_upload, data_from_task_after_upload)

class ProjectDump(_DbTestBase):
def test_api_v2_export_dataset(self):
class ProjectDumpUpload(_DbTestBase):
def test_api_v2_export_import_dataset(self):
test_name = self._testMethodName
dump_formats = dm.views.get_export_formats()
upload_formats = dm.views.get_import_formats()

expected = {
self.admin: {'name': 'admin', 'code': status.HTTP_200_OK, 'create code': status.HTTP_201_CREATED,
Expand All @@ -1228,41 +1236,79 @@ def test_api_v2_export_dataset(self):
if not dump_format.ENABLED or dump_format.DIMENSION == dm.bindings.DimensionType.DIM_3D:
continue
dump_format_name = dump_format.DISPLAY_NAME
if dump_format_name in ('Market-1501 1.0', 'Cityscapes 1.0'):
self.skipTest('TO-DO: fix bug for this formats')
SpecLad marked this conversation as resolved.
Show resolved Hide resolved

with self.subTest(format=dump_format_name):
project = self._create_project(projects['main'])
pid = project['id']
images = self._generate_task_images(3)
tasks['task in project #1']['project_id'] = pid
self._create_task(tasks['task in project #1'], images)
images = self._generate_task_images(3, 3)
tasks['task in project #2']['project_id'] = pid
self._create_task(tasks['task in project #2'], images)
project = projects['main']
if dump_format_name in tasks:
project['labels'] = tasks[dump_format_name]['labels']
project = self._create_project(project)
tasks['task in project #1']['project_id'] = project['id']
task = self._create_task(tasks['task in project #1'], self._generate_task_images(3))

url = self._generate_url_dump_project_dataset(project['id'], dump_format_name)

if dump_format_name in [
"MOT 1.1", "MOTS PNG 1.0", \
"PASCAL VOC 1.1", "Segmentation mask 1.1", \
"TFRecord 1.0", "YOLO 1.1", "ImageNet 1.0", \
"WiderFace 1.0", "VGGFace2 1.0", "Cityscapes 1.0", \
"Datumaro 1.0"\
SpecLad marked this conversation as resolved.
Show resolved Hide resolved
]:
self._create_annotations(task, dump_format_name, "default")
else:
self._create_annotations(task, dump_format_name, "random")

for user, edata in list(expected.items()):
user_name = edata['name']
file_zip_name = osp.join(test_dir, f'{test_name}_{user_name}_{dump_format_name}.zip')
data = {
"format": dump_format_name,
}

response = self._get_request_with_data(url, data, user)
self.assertEqual(response.status_code, edata["accept code"])

response = self._get_request_with_data(url, data, user)
self.assertEqual(response.status_code, edata["create code"])

data = {
"format": dump_format_name,
"action": "download",
}
response = self._get_request_with_data(url, data, user)
self.assertEqual(response.status_code, edata["code"])

if response.status_code == status.HTTP_200_OK:
content = BytesIO(b"".join(response.streaming_content))
with open(file_zip_name, "wb") as f:
f.write(content.getvalue())

self.assertEqual(response.status_code, edata['code'])
self.assertEqual(osp.exists(file_zip_name), edata['file_exists'])

def test_api_v2_export_annotatios(self):
for upload_format in upload_formats:
if not upload_format.ENABLED or upload_format.DIMENSION == dm.bindings.DimensionType.DIM_3D:
continue
upload_format_name = upload_format.DISPLAY_NAME

with self.subTest(format=upload_format_name):
for user, edata in list(expected.items()):
project = projects['main']
if upload_format_name in tasks:
project['labels'] = tasks[upload_format_name]['labels']
project = self._create_project(project)
file_zip_name = osp.join(test_dir, f"{test_name}_{edata['name']}_{upload_format_name}.zip")
url = self._generate_url_upload_project_dataset(project['id'], upload_format_name)

if osp.exists(file_zip_name):
with open(file_zip_name, 'rb') as binary_file:
response = self._post_request_with_data(url, {"dataset_file": binary_file}, user)
print(response.status_code)
SpecLad marked this conversation as resolved.
Show resolved Hide resolved
self.assertEqual(response.status_code, edata['accept code'])

def test_api_v2_export_annotations(self):
test_name = self._testMethodName
dump_formats = dm.views.get_export_formats()

Expand Down
18 changes: 18 additions & 0 deletions tests/python/rest_api/test_projects.py
Original file line number Diff line number Diff line change
Expand Up @@ -475,6 +475,24 @@ def test_can_import_export_dataset_with_imagenet_format(self):

self._test_import_project(username, project_id, format_name, import_data)

def test_can_import_export_dataset_with_datumaro_format(self):
# https://github.com/opencv/cvat/issues/4410
username = "admin1"
format_name = "Datumaro 1.0"

project_id = 4

response = self._test_export_project(username, project_id, format_name)

tmp_file = io.BytesIO(response.data)
tmp_file.name = "dataset.zip"

import_data = {
"dataset_file": tmp_file,
}

self._test_import_project(username, project_id, format_name, import_data)

zhiltsov-max marked this conversation as resolved.
Show resolved Hide resolved

@pytest.mark.usefixtures("restore_db_per_function")
class TestPatchProjectLabel:
Expand Down