From 38b5b10b7475d1fe84b53cc7f6bb4b3a1485f8e2 Mon Sep 17 00:00:00 2001 From: Maya Date: Wed, 28 Apr 2021 13:08:16 +0300 Subject: [PATCH 01/10] Add logger, reverse func --- cvat/apps/engine/migrations/0038_manifest.py | 87 ++++++++++++++++++-- 1 file changed, 82 insertions(+), 5 deletions(-) diff --git a/cvat/apps/engine/migrations/0038_manifest.py b/cvat/apps/engine/migrations/0038_manifest.py index 7447aa6f5740..036d1751442d 100644 --- a/cvat/apps/engine/migrations/0038_manifest.py +++ b/cvat/apps/engine/migrations/0038_manifest.py @@ -1,6 +1,8 @@ # Generated by Django 3.1.1 on 2021-02-20 08:36 import glob +import itertools +import logging import os from re import search @@ -11,26 +13,91 @@ StorageMethodChoice) from utils.dataset_manifest import ImageManifestManager, VideoManifestManager -def migrate_data(apps, shema_editor): +def get_logger(): + migration = os.path.basename(__file__).split(".")[0] + logger = logging.getLogger(name=migration) + logger.setLevel(logging.INFO) + file_handler = logging.FileHandler(os.path.join(settings.MIGRATIONS_LOGS_ROOT, f"{migration}.log")) + formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s') + file_handler.setFormatter(formatter) + logger.addHandler(file_handler) + return logger + +def _get_query_set(apps): Data = apps.get_model("engine", "Data") query_set = Data.objects.filter(storage_method=StorageMethodChoice.CACHE) + return query_set + +def migrate2meta(apps, shema_editor): + logger = get_logger() + query_set = _get_query_set(apps) + for db_data in query_set: + try: + upload_dir = '{}/{}/raw'.format(settings.MEDIA_DATA_ROOT, db_data.id) + logger.info('Migrate data({}), folder - {}'.format(db_data.id, upload_dir)) + meta_path = os.path.join(upload_dir, "meta_info.txt") + if os.path.exists(os.path.join(upload_dir, 'manifest.jsonl')): + os.remove(os.path.join(upload_dir, 'manifest.jsonl')) + logger.info('A manifest file has been deleted') + if os.path.exists(os.path.join(upload_dir, 'index.json')): + os.remove(os.path.join(upload_dir, 'index.json')) + logger.info('A manifest index file has been deleted') + data_dir = upload_dir if db_data.storage == StorageChoice.LOCAL else settings.SHARE_ROOT + if hasattr(db_data, 'video'): + if os.path.exists(meta_path): + logger.info('A meta_info.txt alredy exists') + continue + media_file = os.path.join(data_dir, db_data.video.path) + logger.info('Preparing of the video meta has begun') + meta = VideoManifestManager(manifest_path=upload_dir) \ + .prepare_meta(media_file=media_file, force=True) + with open(meta_path, "w") as meta_file: + for idx, pts, _ in meta: + meta_file.write(f"{idx} {pts}\n") + meta_file.write(str(meta.frames)) + logger.info('Succesfull migration for the data({})'.format(db_data.id)) + else: + name_format = "dummy_{}.txt" + sources = [db_image.path for db_image in db_data.images.all().order_by('frame')] + counter = itertools.count() + for idx, img_paths in itertools.groupby(sources, lambda x: next(counter) // db_data.chunk_size): + if os.path.exists(os.path.join(upload_dir, name_format.format(idx))): + logger.info(name_format.format(idx) + " alredy exists") + continue + with open(os.path.join(upload_dir, name_format.format(idx)), "w") as dummy_chunk: + dummy_chunk.writelines([f"{img_path}\n" for img_path in img_paths]) + except Exception as ex: + logger.error(str(ex)) + +def migrate2manifest(apps, shema_editor): + logger = get_logger() + logger.info('The data migration has been started for creating manifest`s files') + query_set = _get_query_set(apps) + logger.info('Need to update {} data objects'.format(len(query_set))) for db_data in query_set: try: upload_dir = '{}/{}/raw'.format(settings.MEDIA_DATA_ROOT, db_data.id) + logger.info('Migrate data({}), folder - {}'.format(db_data.id, upload_dir)) if os.path.exists(os.path.join(upload_dir, 'meta_info.txt')): - os.remove(os.path.join(upload_dir, 'meta_info.txt')) + os.remove(os.path.join(upload_dir, 'meta_info.txt')) + logger.info('{}/meta_info.txt has been deleted'.format(upload_dir)) else: for path in glob.glob(f'{upload_dir}/dummy_*.txt'): os.remove(path) + logger.info(f"{path} has been deleted") # it's necessary for case with long data migration if os.path.exists(os.path.join(upload_dir, 'manifest.jsonl')): + logger.info('Manifest file alredy exists') continue data_dir = upload_dir if db_data.storage == StorageChoice.LOCAL else settings.SHARE_ROOT if hasattr(db_data, 'video'): media_file = os.path.join(data_dir, db_data.video.path) manifest = VideoManifestManager(manifest_path=upload_dir) - meta_info = manifest.prepare_meta(media_file=media_file) + logger.info('Preparing of the video meta information has begun') + meta_info = manifest.prepare_meta(media_file=media_file, force=True) + logger.info('Manifest creating has begun') manifest.create(meta_info) + logger.info('Index creating has begun') manifest.init_index() else: manifest = ImageManifestManager(manifest_path=upload_dir) @@ -43,6 +110,7 @@ def migrate_data(apps, shema_editor): else: sources = [os.path.join(data_dir, db_image.path) for db_image in db_data.images.all().order_by('frame')] if any(list(filter(lambda x: x.dimension==DimensionType.DIM_3D, db_data.tasks.all()))): + logger.info('Preparing of images 3d meta information has begun') content = [] for source in sources: name, ext = os.path.splitext(os.path.relpath(source, upload_dir)) @@ -51,6 +119,7 @@ def migrate_data(apps, shema_editor): 'extension': ext }) else: + logger.info('Preparing of 2d images meta information has begun') meta_info = manifest.prepare_meta(sources=sources, data_dir=data_dir) content = meta_info.content @@ -58,6 +127,7 @@ def migrate_data(apps, shema_editor): def _get_frame_step(str_): match = search("step\s*=\s*([1-9]\d*)", str_) return int(match.group(1)) if match else 1 + logger.info('Data is located on the share, metadata update has been started') step = _get_frame_step(db_data.frame_filter) start = db_data.start_frame stop = db_data.stop_frame + 1 @@ -67,10 +137,14 @@ def _get_frame_step(str_): item = content.pop(0) if i in images_range else dict() result_content.append(item) content = result_content + logger.info('') + logger.info('Manifest creating has begun') manifest.create(content) + logger.info('Index creating has begun') manifest.init_index() + logger.info('Succesfull migration for the data({})'.format(db_data.id)) except Exception as ex: - print(str(ex)) + logger.error(str(ex)) class Migration(migrations.Migration): @@ -79,5 +153,8 @@ class Migration(migrations.Migration): ] operations = [ - migrations.RunPython(migrate_data) + migrations.RunPython( + code=migrate2manifest, + reverse_code=migrate2meta + ) ] From 358af707c67850f21e3e04f10aaf9595c9baea55 Mon Sep 17 00:00:00 2001 From: Maya Date: Thu, 29 Apr 2021 13:15:07 +0300 Subject: [PATCH 02/10] Fix image filtering --- cvat/apps/engine/migrations/0038_manifest.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/cvat/apps/engine/migrations/0038_manifest.py b/cvat/apps/engine/migrations/0038_manifest.py index 036d1751442d..8e9626df76ff 100644 --- a/cvat/apps/engine/migrations/0038_manifest.py +++ b/cvat/apps/engine/migrations/0038_manifest.py @@ -11,6 +11,7 @@ from cvat.apps.engine.models import (DimensionType, StorageChoice, StorageMethodChoice) +from cvat.apps.engine.media_extractors import get_mime from utils.dataset_manifest import ImageManifestManager, VideoManifestManager def get_logger(): @@ -104,7 +105,7 @@ def migrate2manifest(apps, shema_editor): sources = [] if db_data.storage == StorageChoice.LOCAL: for (root, _, files) in os.walk(data_dir): - sources.extend([os.path.join(root, f) for f in files]) + sources.extend([os.path.join(root, f) for f in files if get_mime(f) == 'image']) sources.sort() # using share, this means that we can not explicitly restore the entire data structure else: From be98cb5f118c89c605dcfa72f73d90061e9400d8 Mon Sep 17 00:00:00 2001 From: Maya Date: Fri, 30 Apr 2021 11:32:47 +0300 Subject: [PATCH 03/10] Fix upload video manifest --- utils/dataset_manifest/core.py | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/utils/dataset_manifest/core.py b/utils/dataset_manifest/core.py index 78a00b0b98bf..edb68fb21af6 100644 --- a/utils/dataset_manifest/core.py +++ b/utils/dataset_manifest/core.py @@ -325,7 +325,7 @@ def index(self): return self._index class VideoManifestManager(_ManifestManager): - def __init__(self, manifest_path, *args, **kwargs): + def __init__(self, manifest_path): super().__init__(manifest_path) setattr(self._manifest, 'TYPE', 'video') self.BASE_INFORMATION['properties'] = 3 @@ -381,9 +381,15 @@ def validate_base_info(self): assert self._manifest.TYPE != json.loads(manifest_file.readline())['type'] class VideoManifestValidator(VideoManifestManager): - def __init__(self, **kwargs): - self.source_path = kwargs.pop('source_path') - super().__init__(self, **kwargs) + def __init__(self, source_path, manifest_path): + self.source_path = source_path + super().__init__(manifest_path) + + @staticmethod + def _get_video_stream(container): + video_stream = next(stream for stream in container.streams if stream.type == 'video') + video_stream.thread_type = 'AUTO' + return video_stream def validate_key_frame(self, container, video_stream, key_frame): for packet in container.demux(video_stream): From 3d4bb4a86ecf02a600c42b05a537060c0077e201 Mon Sep 17 00:00:00 2001 From: Maya Date: Fri, 30 Apr 2021 12:01:14 +0300 Subject: [PATCH 04/10] Fix --- cvat/apps/engine/migrations/0038_manifest.py | 1 - 1 file changed, 1 deletion(-) diff --git a/cvat/apps/engine/migrations/0038_manifest.py b/cvat/apps/engine/migrations/0038_manifest.py index 8e9626df76ff..dd5c0aa18874 100644 --- a/cvat/apps/engine/migrations/0038_manifest.py +++ b/cvat/apps/engine/migrations/0038_manifest.py @@ -55,7 +55,6 @@ def migrate2meta(apps, shema_editor): with open(meta_path, "w") as meta_file: for idx, pts, _ in meta: meta_file.write(f"{idx} {pts}\n") - meta_file.write(str(meta.frames)) logger.info('Succesfull migration for the data({})'.format(db_data.id)) else: name_format = "dummy_{}.txt" From 47dadab32ca9c6994522056c4eab50b8308a7358 Mon Sep 17 00:00:00 2001 From: Maya Date: Fri, 30 Apr 2021 12:07:57 +0300 Subject: [PATCH 05/10] Fix logger messages --- cvat/apps/engine/migrations/0038_manifest.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cvat/apps/engine/migrations/0038_manifest.py b/cvat/apps/engine/migrations/0038_manifest.py index dd5c0aa18874..bdf6c8ec29c2 100644 --- a/cvat/apps/engine/migrations/0038_manifest.py +++ b/cvat/apps/engine/migrations/0038_manifest.py @@ -55,17 +55,18 @@ def migrate2meta(apps, shema_editor): with open(meta_path, "w") as meta_file: for idx, pts, _ in meta: meta_file.write(f"{idx} {pts}\n") - logger.info('Succesfull migration for the data({})'.format(db_data.id)) else: name_format = "dummy_{}.txt" sources = [db_image.path for db_image in db_data.images.all().order_by('frame')] counter = itertools.count() + logger.info('Preparing of the dummy chunks has begun') for idx, img_paths in itertools.groupby(sources, lambda x: next(counter) // db_data.chunk_size): if os.path.exists(os.path.join(upload_dir, name_format.format(idx))): logger.info(name_format.format(idx) + " alredy exists") continue with open(os.path.join(upload_dir, name_format.format(idx)), "w") as dummy_chunk: dummy_chunk.writelines([f"{img_path}\n" for img_path in img_paths]) + logger.info('Succesfull migration for the data({})'.format(db_data.id)) except Exception as ex: logger.error(str(ex)) @@ -137,7 +138,6 @@ def _get_frame_step(str_): item = content.pop(0) if i in images_range else dict() result_content.append(item) content = result_content - logger.info('') logger.info('Manifest creating has begun') manifest.create(content) logger.info('Index creating has begun') From 5d659192473d554f264665857ff0d0d7ab36fc5a Mon Sep 17 00:00:00 2001 From: Maya Date: Fri, 30 Apr 2021 13:09:21 +0300 Subject: [PATCH 06/10] Add stdout & stderr --- cvat/apps/engine/migrations/0038_manifest.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/cvat/apps/engine/migrations/0038_manifest.py b/cvat/apps/engine/migrations/0038_manifest.py index bdf6c8ec29c2..777f3df74c7d 100644 --- a/cvat/apps/engine/migrations/0038_manifest.py +++ b/cvat/apps/engine/migrations/0038_manifest.py @@ -4,6 +4,7 @@ import itertools import logging import os +import sys from re import search from django.conf import settings @@ -22,6 +23,8 @@ def get_logger(): formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s') file_handler.setFormatter(formatter) logger.addHandler(file_handler) + logger.addHandler(logging.StreamHandler(sys.stdout)) + logger.addHandler(logging.StreamHandler(sys.stderr)) return logger def _get_query_set(apps): From 94cb016ba11c761c92d044e3d582e7c0b7f56214 Mon Sep 17 00:00:00 2001 From: Maya Date: Fri, 30 Apr 2021 20:33:10 +0300 Subject: [PATCH 07/10] Update CHANGELOG --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 763a1047be15..bcedcc12e84a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -24,6 +24,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Export of instance masks with holes () - Changing a label on canvas does not work when 'Show object details' enabled () - Make sure frame unzip web worker correctly terminates after unzipping all images in a requested chunk () +- Manifest: migration () ### Security From 25c5d99d896a143490dd1f0a9f09586797bcb50d Mon Sep 17 00:00:00 2001 From: Nikita Manovich Date: Tue, 4 May 2021 19:28:57 +0300 Subject: [PATCH 08/10] Fix typo --- cvat/apps/engine/migrations/0038_manifest.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cvat/apps/engine/migrations/0038_manifest.py b/cvat/apps/engine/migrations/0038_manifest.py index 777f3df74c7d..21e9ce5696f9 100644 --- a/cvat/apps/engine/migrations/0038_manifest.py +++ b/cvat/apps/engine/migrations/0038_manifest.py @@ -65,7 +65,7 @@ def migrate2meta(apps, shema_editor): logger.info('Preparing of the dummy chunks has begun') for idx, img_paths in itertools.groupby(sources, lambda x: next(counter) // db_data.chunk_size): if os.path.exists(os.path.join(upload_dir, name_format.format(idx))): - logger.info(name_format.format(idx) + " alredy exists") + logger.info(name_format.format(idx) + " already exists") continue with open(os.path.join(upload_dir, name_format.format(idx)), "w") as dummy_chunk: dummy_chunk.writelines([f"{img_path}\n" for img_path in img_paths]) From e10f59e234bcde5206cc1eb6d719766415d514d4 Mon Sep 17 00:00:00 2001 From: Nikita Manovich Date: Tue, 4 May 2021 19:34:21 +0300 Subject: [PATCH 09/10] Fix typo --- cvat/apps/engine/migrations/0038_manifest.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cvat/apps/engine/migrations/0038_manifest.py b/cvat/apps/engine/migrations/0038_manifest.py index 21e9ce5696f9..cfc867b711ea 100644 --- a/cvat/apps/engine/migrations/0038_manifest.py +++ b/cvat/apps/engine/migrations/0038_manifest.py @@ -91,7 +91,7 @@ def migrate2manifest(apps, shema_editor): logger.info(f"{path} has been deleted") # it's necessary for case with long data migration if os.path.exists(os.path.join(upload_dir, 'manifest.jsonl')): - logger.info('Manifest file alredy exists') + logger.info('Manifest file already exists') continue data_dir = upload_dir if db_data.storage == StorageChoice.LOCAL else settings.SHARE_ROOT if hasattr(db_data, 'video'): From 492d1fe73314e20dca38f753f2b538ff890c7104 Mon Sep 17 00:00:00 2001 From: Nikita Manovich Date: Tue, 4 May 2021 19:38:50 +0300 Subject: [PATCH 10/10] Fix typo --- cvat/apps/engine/migrations/0038_manifest.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cvat/apps/engine/migrations/0038_manifest.py b/cvat/apps/engine/migrations/0038_manifest.py index cfc867b711ea..05a5b11b0b8e 100644 --- a/cvat/apps/engine/migrations/0038_manifest.py +++ b/cvat/apps/engine/migrations/0038_manifest.py @@ -49,7 +49,7 @@ def migrate2meta(apps, shema_editor): data_dir = upload_dir if db_data.storage == StorageChoice.LOCAL else settings.SHARE_ROOT if hasattr(db_data, 'video'): if os.path.exists(meta_path): - logger.info('A meta_info.txt alredy exists') + logger.info('A meta_info.txt already exists') continue media_file = os.path.join(data_dir, db_data.video.path) logger.info('Preparing of the video meta has begun')