From 6c7db474329d79c95e8a98ed28da567aed641980 Mon Sep 17 00:00:00 2001 From: Javier Torres Date: Tue, 13 Feb 2024 09:58:04 +0100 Subject: [PATCH] url generation --- nucliadb/nucliadb/ingest/tests/fixtures.py | 2 +- nucliadb/nucliadb/reader/tests/fixtures.py | 2 +- nucliadb/nucliadb/train/tests/fixtures.py | 2 +- nucliadb/nucliadb/writer/api/v1/upload.py | 9 +- nucliadb/nucliadb/writer/tests/fixtures.py | 1 - nucliadb/nucliadb/writer/tests/test_files.py | 95 +++++++++++++++++++- nucliadb/requirements.lock.txt | 2 +- nucliadb/requirements.txt | 2 +- 8 files changed, 105 insertions(+), 10 deletions(-) diff --git a/nucliadb/nucliadb/ingest/tests/fixtures.py b/nucliadb/nucliadb/ingest/tests/fixtures.py index bf64dc1fc9..b41232a0d1 100644 --- a/nucliadb/nucliadb/ingest/tests/fixtures.py +++ b/nucliadb/nucliadb/ingest/tests/fixtures.py @@ -251,7 +251,7 @@ async def indexing_utility_ingest(natsd): @pytest.fixture(scope="function") -async def _natsd_reset(natsd, event_loop): +async def _natsd_reset(natsd): nc = await nats.connect(servers=[natsd]) js = nc.jetstream() try: diff --git a/nucliadb/nucliadb/reader/tests/fixtures.py b/nucliadb/nucliadb/reader/tests/fixtures.py index 0f3537792f..30d8264ed6 100644 --- a/nucliadb/nucliadb/reader/tests/fixtures.py +++ b/nucliadb/nucliadb/reader/tests/fixtures.py @@ -50,7 +50,7 @@ def test_settings_reader(cache, gcs, fake_node, maindb_driver): # type: ignore @pytest.fixture(scope="function") -async def reader_api(test_settings_reader: None, local_files, event_loop): # type: ignore +async def reader_api(test_settings_reader: None, local_files): # type: ignore from nucliadb.reader.app import create_application application = create_application() diff --git a/nucliadb/nucliadb/train/tests/fixtures.py b/nucliadb/nucliadb/train/tests/fixtures.py index aff8e7b835..b0faac6865 100644 --- a/nucliadb/nucliadb/train/tests/fixtures.py +++ b/nucliadb/nucliadb/train/tests/fixtures.py @@ -321,7 +321,7 @@ def test_settings_train(cache, gcs, fake_node, maindb_driver): # type: ignore @pytest.fixture(scope="function") -async def train_api(test_settings_train: None, local_files, event_loop): # type: ignore +async def train_api(test_settings_train: None, local_files): # type: ignore from nucliadb.train.utils import start_train_grpc, stop_train_grpc await start_shard_manager() diff --git a/nucliadb/nucliadb/writer/api/v1/upload.py b/nucliadb/nucliadb/writer/api/v1/upload.py index 81b3751e9b..b46f1484c2 100644 --- a/nucliadb/nucliadb/writer/api/v1/upload.py +++ b/nucliadb/nucliadb/writer/api/v1/upload.py @@ -294,7 +294,10 @@ async def _tus_post( await storage_manager.start(dm, path=path, kbid=kbid) await dm.save() - location = api.url_path_for("Upload information", kbid=kbid, upload_id=upload_id) + # Find the URL for upload, with the same parameter as this call + location = api.url_path_for( + "Upload information", upload_id=upload_id, **request.path_params + ) return Response( status_code=201, headers={ @@ -325,7 +328,7 @@ async def tus_head_rslug_prefix( @api.head( - f"/{KB_PREFIX}/{{kbid}}/{RESOURCE_PREFIX}/{{rid}}/file/{{field}}/{TUSUPLOAD}/{{upload_id}}", + f"/{KB_PREFIX}/{{kbid}}/{RESOURCE_PREFIX}/{{path_rid}}/file/{{field}}/{TUSUPLOAD}/{{upload_id}}", tags=["Resource field TUS uploads"], status_code=200, openapi_extra={"x-operation-order": 3}, @@ -336,7 +339,7 @@ async def tus_head_rslug_prefix( async def tus_head_rid_prefix( request: Request, kbid: str, - rid: str, + path_rid: str, field: str, upload_id: str, ) -> Response: diff --git a/nucliadb/nucliadb/writer/tests/fixtures.py b/nucliadb/nucliadb/writer/tests/fixtures.py index a438c32439..9466dfdd27 100644 --- a/nucliadb/nucliadb/writer/tests/fixtures.py +++ b/nucliadb/nucliadb/writer/tests/fixtures.py @@ -49,7 +49,6 @@ async def writer_api( transaction_utility, processing_utility, tus_manager, - event_loop, ) -> AsyncIterator[Callable[[list[Enum], str, str], AsyncClient]]: nucliadb_settings.nucliadb_ingest = grpc_servicer.host from nucliadb.writer.app import create_application diff --git a/nucliadb/nucliadb/writer/tests/test_files.py b/nucliadb/nucliadb/writer/tests/test_files.py index 3a1ec95c0e..df62442f55 100644 --- a/nucliadb/nucliadb/writer/tests/test_files.py +++ b/nucliadb/nucliadb/writer/tests/test_files.py @@ -28,7 +28,7 @@ from nucliadb_protos.resources_pb2 import FieldType from nucliadb_protos.writer_pb2 import BrokerMessage, ResourceFieldId -from nucliadb.writer.api.v1.router import KB_PREFIX, RSLUG_PREFIX +from nucliadb.writer.api.v1.router import KB_PREFIX, RESOURCE_PREFIX, RSLUG_PREFIX from nucliadb.writer.api.v1.upload import maybe_b64decode from nucliadb.writer.tus import TUSUPLOAD, UPLOAD, get_storage_manager from nucliadb_models.resource import NucliaDBRoles @@ -535,6 +535,99 @@ async def test_file_tus_upload_field_by_slug(writer_api, knowledgebox_writer, re assert len(data.read()) == len(raw_bytes) +@pytest.mark.asyncio +async def test_file_tus_upload_field_by_resource_id( + writer_api, knowledgebox_writer, resource +): + kb = knowledgebox_writer + + async with writer_api(roles=[NucliaDBRoles.WRITER]) as client: + language = base64.b64encode(b"ca").decode() + filename = base64.b64encode(b"image.jpg").decode() + md5 = base64.b64encode(b"7af0916dba8b70e29d99e72941923529").decode() + headers = { + "tus-resumable": "1.0.0", + "upload-metadata": f"filename {filename},language {language},md5 {md5}", + "content-type": "image/jpg", + "upload-defer-length": "1", + } + + resp = await client.post( + f"/{KB_PREFIX}/{kb}/resource/idonotexist/file/field1/{TUSUPLOAD}", + headers=headers, + ) + assert resp.status_code == 404 + + resp = await client.post( + f"/{KB_PREFIX}/{kb}/resource/{resource}/file/field1/{TUSUPLOAD}", + headers=headers, + ) + assert resp.status_code == 201 + url = resp.headers["location"] + + # Check that we are using the slug for the whole file upload + assert f"{RESOURCE_PREFIX}/{resource}" in url + + offset = 0 + min_chunk_size = get_storage_manager().min_upload_size + raw_bytes = b"x" * min_chunk_size + b"y" * 500 + io_bytes = io.BytesIO(raw_bytes) + data = io_bytes.read(min_chunk_size) + while data != b"": + resp = await client.head(url) + + assert resp.headers["Upload-Length"] == f"0" + assert resp.headers["Upload-Offset"] == f"{offset}" + + headers = { + "upload-offset": f"{offset}", + "content-length": f"{len(data)}", + } + is_last_chunk = len(data) < min_chunk_size + if is_last_chunk: + headers["upload-length"] = f"{offset + len(data)}" + + resp = await client.patch( + url, + content=data, + headers=headers, + ) + assert resp.status_code == 200 + offset += len(data) + data = io_bytes.read(min_chunk_size) + + assert resp.headers["Tus-Upload-Finished"] == "1" + + transaction = get_transaction_utility() + + sub = await transaction.js.pull_subscribe( + const.Streams.INGEST.subject.format(partition="1"), "auto" + ) + msgs = await sub.fetch(2) + + writer = BrokerMessage() + writer.ParseFromString(msgs[1].data) + await msgs[1].ack() + + path = resp.headers["ndb-field"] + field = path.split("/")[-1] + assert path.split("/")[-3] == resource + assert writer.uuid == resource + assert writer.basic.icon == "image/jpg" + assert writer.basic.title == "" + assert writer.files[field].language == "ca" + assert writer.files[field].file.size == len(raw_bytes) + assert writer.files[field].file.filename == "image.jpg" + assert writer.files[field].file.md5 == "7af0916dba8b70e29d99e72941923529" + + storage = await get_storage() + data = await storage.downloadbytes( + bucket=writer.files[field].file.bucket_name, + key=writer.files[field].file.uri, + ) + assert len(data.read()) == len(raw_bytes) + + @pytest.mark.asyncio async def test_multiple_tus_file_upload_tries( writer_api, knowledgebox_writer, resource diff --git a/nucliadb/requirements.lock.txt b/nucliadb/requirements.lock.txt index 466b6cbd39..04fcca9ee7 100644 --- a/nucliadb/requirements.lock.txt +++ b/nucliadb/requirements.lock.txt @@ -86,7 +86,7 @@ PyJWT==2.8.0 pyparsing==3.1.1 python-dateutil==2.8.2 PyYAML==6.0.1 -redis==5.0.1 +redis==5.0.0 requests==2.31.0 rsa==4.9 sentry-sdk==1.40.3 diff --git a/nucliadb/requirements.txt b/nucliadb/requirements.txt index 14e7aab588..498dfa2464 100644 --- a/nucliadb/requirements.txt +++ b/nucliadb/requirements.txt @@ -23,7 +23,7 @@ types-setuptools # pydantic 2 need a careful migration pydantic>=1.9.0,<2.0 aiobotocore>=2.5.2 -botocore<1.30.0 +botocore>=1.34.0 google-cloud-storage gcloud oauth2client