Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix tus loading if multiple apache processes are used #6006

Merged
merged 6 commits into from
Apr 13, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- Server micro version support check in SDK/CLI (<https://github.com/opencv/cvat/pull/5991>)
- \[SDK\] Compatibility with upcoming urllib 2.1.0
(<https://github.com/opencv/cvat/pull/6002>)
- Fix TUS file uploading if multiple apache processes are used (<https://github.com/opencv/cvat/pull/6006>)

### Security
- TDB
Expand Down
157 changes: 104 additions & 53 deletions cvat/apps/engine/mixins.py
Original file line number Diff line number Diff line change
@@ -1,33 +1,92 @@
# Copyright (C) 2021-2022 Intel Corporation
# Copyright (C) 2022 CVAT.ai Corporation
# Copyright (C) 2022-2023 CVAT.ai Corporation
#
# SPDX-License-Identifier: MIT

import os
import base64
from unittest import mock
import json
import os
import uuid
from dataclasses import asdict, dataclass
from distutils.util import strtobool
from unittest import mock

from django.conf import settings
from django.core.cache import cache
from distutils.util import strtobool
from rest_framework import status, mixins
from rest_framework import mixins, status
from rest_framework.response import Response

from cvat.apps.engine.models import Location
from cvat.apps.engine.location import StorageType, get_location_configuration
from cvat.apps.engine.models import Location
from cvat.apps.engine.serializers import DataSerializer


class TusFile:
_tus_cache_timeout = 3600
def __init__(self, file_id, upload_dir):
@dataclass
class TusMeta:
metadata: dict
filename: str
file_size: int
offset: int = 0

class TusMetaFile():
def __init__(self, path) -> None:
self._path = path
self._meta = None
if os.path.exists(self._path):
self._meta = self._read()

@property
def meta(self):
return self._meta

@meta.setter
def meta(self, meta):
self._meta = meta

def _read(self):
with open(self._path, "r") as fp:
data = json.load(fp)
return TusFile.TusMeta(**data)

def save(self):
if self._meta is not None:
os.makedirs(os.path.dirname(self._path), exist_ok=True)
with open(self._path, "w") as fp:
json.dump(asdict(self._meta), fp)

def exists(self):
return os.path.exists(self._path)

def delete(self):
os.remove(self._path)

def __init__(self, file_id, upload_dir, meta=None):
self.file_id = file_id
self.upload_dir = upload_dir
self.file_path = os.path.join(self.upload_dir, self.file_id)
self.filename = cache.get("tus-uploads/{}/filename".format(file_id))
self.file_size = int(cache.get("tus-uploads/{}/file_size".format(file_id)))
self.metadata = cache.get("tus-uploads/{}/metadata".format(file_id))
self.offset = cache.get("tus-uploads/{}/offset".format(file_id))
self.meta_file = self.TusMetaFile(self._get_tus_meta_file_path(file_id, upload_dir))
if meta is not None:
self.meta_file.meta = meta
self.meta_file.save()

@property
def filename(self):
return self.meta_file.meta.filename

@property
def file_size(self):
return self.meta_file.meta.file_size

@property
def offset(self):
return self.meta_file.meta.offset

def exists(self):
return self.meta_file.exists()

@staticmethod
def _get_tus_meta_file_path(file_id, upload_dir):
return os.path.join(upload_dir, f"{file_id}.meta")

def init_file(self):
os.makedirs(self.upload_dir, exist_ok=True)
Expand All @@ -40,50 +99,43 @@ def write_chunk(self, chunk):
with open(self.file_path, 'r+b') as file:
file.seek(chunk.offset)
file.write(chunk.content)
self.offset = cache.incr("tus-uploads/{}/offset".format(self.file_id), chunk.size)
self.meta_file.meta.offset += chunk.size
self.meta_file.save()

def is_complete(self):
return self.offset == self.file_size

def rename(self):
file_id_path = os.path.join(self.upload_dir, self.file_id)
file_path = os.path.join(self.upload_dir, self.filename)
file_exists = os.path.lexists(os.path.join(self.upload_dir, self.filename))
if file_exists:
if os.path.lexists(file_path):
original_file_name, extension = os.path.splitext(self.filename)
file_amount = 1
while os.path.lexists(os.path.join(self.upload_dir, self.filename)):
self.filename = "{}_{}{}".format(original_file_name, file_amount, extension)
self.meta_file.meta.filename = "{}_{}{}".format(original_file_name, file_amount, extension)
file_path = os.path.join(self.upload_dir, self.filename)
file_amount += 1
os.rename(file_id_path, file_path)
os.rename(self.file_path, file_path)

def clean(self):
cache.delete_many([
"tus-uploads/{}/file_size".format(self.file_id),
"tus-uploads/{}/filename".format(self.file_id),
"tus-uploads/{}/offset".format(self.file_id),
"tus-uploads/{}/metadata".format(self.file_id),
])

@staticmethod
def get_tusfile(file_id, upload_dir):
file_exists = cache.get("tus-uploads/{}/filename".format(file_id), None) is not None
if file_exists:
return TusFile(file_id, upload_dir)
return None
self.meta_file.delete()

@staticmethod
def create_file(metadata, file_size, upload_dir):
file_id = str(uuid.uuid4())
filename = metadata.get("filename")
cache.add("tus-uploads/{}/filename".format(file_id), "{}".format(filename), TusFile._tus_cache_timeout)
cache.add("tus-uploads/{}/file_size".format(file_id), file_size, TusFile._tus_cache_timeout)
cache.add("tus-uploads/{}/offset".format(file_id), 0, TusFile._tus_cache_timeout)
cache.add("tus-uploads/{}/metadata".format(file_id), metadata, TusFile._tus_cache_timeout)
azhavoro marked this conversation as resolved.
Show resolved Hide resolved

tus_file = TusFile(file_id, upload_dir)
tus_file = TusFile(
file_id,
upload_dir,
TusFile.TusMeta(
filename=filename,
file_size=file_size,
offset=0,
metadata=metadata,
),
)
tus_file.init_file()

return tus_file

class TusChunk:
Expand Down Expand Up @@ -190,32 +242,31 @@ def init_tus_upload(self, request):
'Upload-Filename': tus_file.filename})

def append_tus_chunk(self, request, file_id):
tus_file = TusFile(str(file_id), self.get_upload_dir())
if request.method == 'HEAD':
tus_file = TusFile.get_tusfile(str(file_id), self.get_upload_dir())
if tus_file:
if tus_file.exists():
return self._tus_response(status=status.HTTP_200_OK, extra_headers={
'Upload-Offset': tus_file.offset,
'Upload-Length': tus_file.file_size})
return self._tus_response(status=status.HTTP_404_NOT_FOUND)
else:
tus_file = TusFile.get_tusfile(str(file_id), self.get_upload_dir())
chunk = TusChunk(request)

if chunk.offset != tus_file.offset:
return self._tus_response(status=status.HTTP_409_CONFLICT)
chunk = TusChunk(request)

if chunk.offset != tus_file.offset:
return self._tus_response(status=status.HTTP_409_CONFLICT)

if chunk.offset > tus_file.file_size:
return self._tus_response(status=status.HTTP_413_REQUEST_ENTITY_TOO_LARGE)
if chunk.offset > tus_file.file_size:
return self._tus_response(status=status.HTTP_413_REQUEST_ENTITY_TOO_LARGE)

tus_file.write_chunk(chunk)
tus_file.write_chunk(chunk)

if tus_file.is_complete():
tus_file.rename()
tus_file.clean()
if tus_file.is_complete():
tus_file.rename()
tus_file.clean()

return self._tus_response(status=status.HTTP_204_NO_CONTENT,
extra_headers={'Upload-Offset': tus_file.offset,
'Upload-Filename': tus_file.filename})
return self._tus_response(status=status.HTTP_204_NO_CONTENT,
extra_headers={'Upload-Offset': tus_file.offset,
'Upload-Filename': tus_file.filename})

def validate_filename(self, filename):
upload_dir = self.get_upload_dir()
Expand Down