From 36d23e843fa2eeb446514af6c6f00fb742bd2086 Mon Sep 17 00:00:00 2001 From: Maya Date: Tue, 30 Nov 2021 23:05:22 +0300 Subject: [PATCH 1/3] fix --- cvat/apps/engine/media_extractors.py | 21 +++++++++++---------- cvat/apps/engine/task.py | 22 ++++++++++++---------- 2 files changed, 23 insertions(+), 20 deletions(-) diff --git a/cvat/apps/engine/media_extractors.py b/cvat/apps/engine/media_extractors.py index 109fd7bad861..4a68bdca0659 100644 --- a/cvat/apps/engine/media_extractors.py +++ b/cvat/apps/engine/media_extractors.py @@ -179,14 +179,14 @@ def __init__(self, source_path, step=1, start=0, stop=None, dimension=DimensionT ) class ArchiveReader(DirectoryReader): - def __init__(self, source_path, step=1, start=0, stop=None, dimension=DimensionType.DIM_2D): + def __init__(self, source_path, step=1, start=0, stop=None, dimension=DimensionType.DIM_2D, extract_dir=None): self._archive_source = source_path[0] - extract_dir = source_path[1] if len(source_path) > 1 else os.path.dirname(source_path[0]) - Archive(self._archive_source).extractall(extract_dir) - if extract_dir == os.path.dirname(source_path[0]): + tmp_dir = extract_dir if extract_dir else os.path.dirname(source_path[0]) + Archive(self._archive_source).extractall(tmp_dir) + if not extract_dir: os.remove(self._archive_source) super().__init__( - source_path=[extract_dir], + source_path=[tmp_dir], step=step, start=start, stop=stop, @@ -194,7 +194,7 @@ def __init__(self, source_path, step=1, start=0, stop=None, dimension=DimensionT ) class PdfReader(ImageListReader): - def __init__(self, source_path, step=1, start=0, stop=None, dimension=DimensionType.DIM_2D): + def __init__(self, source_path, step=1, start=0, stop=None, dimension=DimensionType.DIM_2D, extract_dir=None): if not source_path: raise Exception('No PDF found') @@ -207,7 +207,7 @@ def _make_name(): yield '{}{:09d}.jpeg'.format(_basename, page_num) from pdf2image import convert_from_path - self._tmp_dir = os.path.dirname(source_path[0]) + self._tmp_dir = extract_dir if extract_dir else os.path.dirname(source_path[0]) os.makedirs(self._tmp_dir, exist_ok=True) # Avoid OOM: https://github.com/openvinotoolkit/cvat/issues/940 @@ -215,7 +215,8 @@ def _make_name(): last_page=stop, paths_only=True, output_folder=self._tmp_dir, fmt="jpeg", output_file=_make_name()) - os.remove(source_path[0]) + if not extract_dir: + os.remove(source_path[0]) super().__init__( source_path=paths, @@ -226,9 +227,9 @@ def _make_name(): ) class ZipReader(ImageListReader): - def __init__(self, source_path, step=1, start=0, stop=None, dimension=DimensionType.DIM_2D): + def __init__(self, source_path, step=1, start=0, stop=None, dimension=DimensionType.DIM_2D, extract_dir=None): self._zip_source = zipfile.ZipFile(source_path[0], mode='r') - self.extract_dir = source_path[1] if len(source_path) > 1 else None + self.extract_dir = extract_dir file_list = [f for f in self._zip_source.namelist() if files_to_ignore(f) and get_mime(f) == 'image'] super().__init__(file_list, step=step, start=start, stop=stop, dimension=dimension) diff --git a/cvat/apps/engine/task.py b/cvat/apps/engine/task.py index 2003d541c559..de34b6b954aa 100644 --- a/cvat/apps/engine/task.py +++ b/cvat/apps/engine/task.py @@ -293,22 +293,24 @@ def _create_thread(tid, data, isImport=False): if extractor is not None: raise Exception('Combined data types are not supported') source_paths=[os.path.join(upload_dir, f) for f in media_files] - if media_type in {'archive', 'zip'} and db_data.storage == models.StorageChoice.SHARE: - source_paths.append(db_data.get_upload_dirname()) - upload_dir = db_data.get_upload_dirname() - db_data.storage = models.StorageChoice.LOCAL if isImport and media_type == 'image' and db_data.storage == models.StorageChoice.SHARE: manifest_index = _get_manifest_frame_indexer(db_data.start_frame, db_data.get_frame_step()) db_data.start_frame = 0 data['stop_frame'] = None db_data.frame_filter = '' - extractor = MEDIA_TYPES[media_type]['extractor']( - source_path=source_paths, - step=db_data.get_frame_step(), - start=db_data.start_frame, - stop=data['stop_frame'], - ) + details = { + 'source_path': source_paths, + 'step': db_data.get_frame_step(), + 'start': db_data.start_frame, + 'stop': data['stop_frame'], + } + if media_type in {'archive', 'zip', 'pdf'} and db_data.storage == models.StorageChoice.SHARE: + details['extract_dir'] = db_data.get_upload_dirname() + upload_dir = db_data.get_upload_dirname() + db_data.storage = models.StorageChoice.LOCAL + + extractor = MEDIA_TYPES[media_type]['extractor'](**details) validate_dimension = ValidateDimension() From cdf16b2d64384f64376ccc95bd1089fbcb2ea206 Mon Sep 17 00:00:00 2001 From: Maya Date: Mon, 13 Dec 2021 13:16:50 +0300 Subject: [PATCH 2/3] Update changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index af4c55bc2430..df03aa425501 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -38,6 +38,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Order of labels in tasks and projects () - Fixed task creating with large files via webpage () - Added information to export CVAT_HOST when performing local installation for accessing over network () +- Original pdf file is deleted when using share() ### Security - TDB From 9e320396b7287d9e89fc16383d091c9f72500faa Mon Sep 17 00:00:00 2001 From: Maya Date: Thu, 16 Dec 2021 11:01:44 +0300 Subject: [PATCH 3/3] Add test --- cvat/apps/engine/tests/test_rest_api.py | 34 ++++++++++++++++++++++--- 1 file changed, 31 insertions(+), 3 deletions(-) diff --git a/cvat/apps/engine/tests/test_rest_api.py b/cvat/apps/engine/tests/test_rest_api.py index f5499c62cb41..20b2ee06daa3 100644 --- a/cvat/apps/engine/tests/test_rest_api.py +++ b/cvat/apps/engine/tests/test_rest_api.py @@ -2767,6 +2767,13 @@ def setUpClass(cls): shutil.rmtree(root_path) cls._image_sizes[filename] = image_sizes + file_name = 'test_1.pdf' + path = os.path.join(settings.SHARE_ROOT, file_name) + img_sizes, data = generate_pdf_file(file_name, page_count=5) + with open(path, "wb") as pdf_file: + pdf_file.write(data.read()) + cls._image_sizes[file_name] = img_sizes + generate_manifest_file(data_type='video', manifest_path=os.path.join(settings.SHARE_ROOT, 'videos', 'manifest.jsonl'), sources=[os.path.join(settings.SHARE_ROOT, 'videos', 'test_video_1.mp4')]) @@ -2804,6 +2811,9 @@ def tearDownClass(cls): path = os.path.join(settings.SHARE_ROOT, "manifest.jsonl") os.remove(path) + path = os.path.join(settings.SHARE_ROOT, "test_1.pdf") + os.remove(path) + def _run_api_v1_tasks_id_data_post(self, tid, user, data): with ForceLogin(user, self.client): response = self.client.post('/api/v1/tasks/{}/data'.format(tid), @@ -2886,10 +2896,12 @@ def _test_api_v1_tasks_id_data_spec(self, user, spec, data, expected_compressed_ db_data = Task.objects.get(pk=task_id).data self.assertEqual(expected_storage_method, db_data.storage_method) self.assertEqual(expected_uploaded_data_location, db_data.storage) - # check if used share without copying inside and files doesn`t exist in ../raw/ + # check if used share without copying inside and files doesn`t exist in ../raw/ and exist in share if expected_uploaded_data_location is StorageChoice.SHARE: - self.assertEqual(False, - os.path.exists(os.path.join(db_data.get_upload_dirname(), next(iter(data.values()))))) + raw_file_path = os.path.join(db_data.get_upload_dirname(), next(iter(data.values()))) + share_file_path = os.path.join(settings.SHARE_ROOT, next(iter(data.values()))) + self.assertEqual(False, os.path.exists(raw_file_path)) + self.assertEqual(True, os.path.exists(share_file_path)) # check preview response = self._get_preview(task_id, user) @@ -2956,6 +2968,10 @@ def _test_api_v1_tasks_id_data_spec(self, user, spec, data, expected_compressed_ for f in source_files: if zipfile.is_zipfile(f): source_images.extend(self._extract_zip_chunk(f, dimension=dimension)) + elif isinstance(f, str) and f.endswith('.pdf'): + with open(f, 'rb') as pdf_file: + source_images.extend(convert_from_bytes(pdf_file.read(), + fmt='png')) elif isinstance(f, io.BytesIO) and \ str(getattr(f, 'name', None)).endswith('.pdf'): source_images.extend(convert_from_bytes(f.getvalue(), @@ -3475,6 +3491,18 @@ def _test_api_v1_tasks_id_data(self, user): self._test_api_v1_tasks_id_data_spec(user, task_spec, task_data, self.ChunkType.IMAGESET, self.ChunkType.IMAGESET, image_sizes, StorageMethodChoice.CACHE, StorageChoice.SHARE) + task_spec.update([('name', 'task pdf in the shared folder #30')]) + task_data = { + "server_files[0]": "test_1.pdf", + "image_quality": 70, + "copy_data": False, + "use_cache": True, + } + image_sizes = self._image_sizes[task_data["server_files[0]"]] + + self._test_api_v1_tasks_id_data_spec(user, task_spec, task_data, self.ChunkType.IMAGESET, self.ChunkType.IMAGESET, + image_sizes, StorageMethodChoice.CACHE, StorageChoice.LOCAL) + def test_api_v1_tasks_id_data_admin(self): self._test_api_v1_tasks_id_data(self.admin)