url generation

nuclia · Feb 13, 2024 · 6c7db47 · 6c7db47 · github-actions · Feb 13, 2024
1 parent dde6bca
commit 6c7db47
Show file tree

Hide file tree

Showing 8 changed files with 105 additions and 10 deletions.
diff --git a/nucliadb/nucliadb/ingest/tests/fixtures.py b/nucliadb/nucliadb/ingest/tests/fixtures.py
@@ -251,7 +251,7 @@ async def indexing_utility_ingest(natsd):
 
 
 @pytest.fixture(scope="function")
-async def _natsd_reset(natsd, event_loop):
+async def _natsd_reset(natsd):
     nc = await nats.connect(servers=[natsd])
     js = nc.jetstream()
     try:

diff --git a/nucliadb/nucliadb/reader/tests/fixtures.py b/nucliadb/nucliadb/reader/tests/fixtures.py
@@ -50,7 +50,7 @@ def test_settings_reader(cache, gcs, fake_node, maindb_driver):  # type: ignore
 
 
 @pytest.fixture(scope="function")
-async def reader_api(test_settings_reader: None, local_files, event_loop):  # type: ignore
+async def reader_api(test_settings_reader: None, local_files):  # type: ignore
     from nucliadb.reader.app import create_application
 
     application = create_application()

diff --git a/nucliadb/nucliadb/train/tests/fixtures.py b/nucliadb/nucliadb/train/tests/fixtures.py
@@ -321,7 +321,7 @@ def test_settings_train(cache, gcs, fake_node, maindb_driver):  # type: ignore
 
 
 @pytest.fixture(scope="function")
-async def train_api(test_settings_train: None, local_files, event_loop):  # type: ignore
+async def train_api(test_settings_train: None, local_files):  # type: ignore
     from nucliadb.train.utils import start_train_grpc, stop_train_grpc
 
     await start_shard_manager()

diff --git a/nucliadb/nucliadb/writer/api/v1/upload.py b/nucliadb/nucliadb/writer/api/v1/upload.py
@@ -294,7 +294,10 @@ async def _tus_post(
     await storage_manager.start(dm, path=path, kbid=kbid)
     await dm.save()
 
-    location = api.url_path_for("Upload information", kbid=kbid, upload_id=upload_id)
+    # Find the URL for upload, with the same parameter as this call
+    location = api.url_path_for(
+        "Upload information", upload_id=upload_id, **request.path_params
+    )
     return Response(
         status_code=201,
         headers={
@@ -325,7 +328,7 @@ async def tus_head_rslug_prefix(
 
 
 @api.head(
-    f"/{KB_PREFIX}/{{kbid}}/{RESOURCE_PREFIX}/{{rid}}/file/{{field}}/{TUSUPLOAD}/{{upload_id}}",
+    f"/{KB_PREFIX}/{{kbid}}/{RESOURCE_PREFIX}/{{path_rid}}/file/{{field}}/{TUSUPLOAD}/{{upload_id}}",
     tags=["Resource field TUS uploads"],
     status_code=200,
     openapi_extra={"x-operation-order": 3},
@@ -336,7 +339,7 @@ async def tus_head_rslug_prefix(
 async def tus_head_rid_prefix(
     request: Request,
     kbid: str,
-    rid: str,
+    path_rid: str,
     field: str,
     upload_id: str,
 ) -> Response:

diff --git a/nucliadb/nucliadb/writer/tests/fixtures.py b/nucliadb/nucliadb/writer/tests/fixtures.py
@@ -49,7 +49,6 @@ async def writer_api(
     transaction_utility,
     processing_utility,
     tus_manager,
-    event_loop,
 ) -> AsyncIterator[Callable[[list[Enum], str, str], AsyncClient]]:
     nucliadb_settings.nucliadb_ingest = grpc_servicer.host
     from nucliadb.writer.app import create_application

diff --git a/nucliadb/nucliadb/writer/tests/test_files.py b/nucliadb/nucliadb/writer/tests/test_files.py
@@ -28,7 +28,7 @@
 from nucliadb_protos.resources_pb2 import FieldType
 from nucliadb_protos.writer_pb2 import BrokerMessage, ResourceFieldId
 
-from nucliadb.writer.api.v1.router import KB_PREFIX, RSLUG_PREFIX
+from nucliadb.writer.api.v1.router import KB_PREFIX, RESOURCE_PREFIX, RSLUG_PREFIX
 from nucliadb.writer.api.v1.upload import maybe_b64decode
 from nucliadb.writer.tus import TUSUPLOAD, UPLOAD, get_storage_manager
 from nucliadb_models.resource import NucliaDBRoles
@@ -535,6 +535,99 @@ async def test_file_tus_upload_field_by_slug(writer_api, knowledgebox_writer, re
     assert len(data.read()) == len(raw_bytes)
 
 
+@pytest.mark.asyncio
+async def test_file_tus_upload_field_by_resource_id(
+    writer_api, knowledgebox_writer, resource
+):
+    kb = knowledgebox_writer
+
+    async with writer_api(roles=[NucliaDBRoles.WRITER]) as client:
+        language = base64.b64encode(b"ca").decode()
+        filename = base64.b64encode(b"image.jpg").decode()
+        md5 = base64.b64encode(b"7af0916dba8b70e29d99e72941923529").decode()
+        headers = {
+            "tus-resumable": "1.0.0",
+            "upload-metadata": f"filename {filename},language {language},md5 {md5}",
+            "content-type": "image/jpg",
+            "upload-defer-length": "1",
+        }
+
+        resp = await client.post(
+            f"/{KB_PREFIX}/{kb}/resource/idonotexist/file/field1/{TUSUPLOAD}",
+            headers=headers,
+        )
+        assert resp.status_code == 404
+
+        resp = await client.post(
+            f"/{KB_PREFIX}/{kb}/resource/{resource}/file/field1/{TUSUPLOAD}",
+            headers=headers,
+        )
+        assert resp.status_code == 201
+        url = resp.headers["location"]
+
+        # Check that we are using the slug for the whole file upload
+        assert f"{RESOURCE_PREFIX}/{resource}" in url
+
+        offset = 0
+        min_chunk_size = get_storage_manager().min_upload_size
+        raw_bytes = b"x" * min_chunk_size + b"y" * 500
+        io_bytes = io.BytesIO(raw_bytes)
+        data = io_bytes.read(min_chunk_size)
+        while data != b"":
+            resp = await client.head(url)
+
+            assert resp.headers["Upload-Length"] == f"0"
+            assert resp.headers["Upload-Offset"] == f"{offset}"
+
+            headers = {
+                "upload-offset": f"{offset}",
+                "content-length": f"{len(data)}",
+            }
+            is_last_chunk = len(data) < min_chunk_size
+            if is_last_chunk:
+                headers["upload-length"] = f"{offset + len(data)}"
+
+            resp = await client.patch(
+                url,
+                content=data,
+                headers=headers,
+            )
+            assert resp.status_code == 200
+            offset += len(data)
+            data = io_bytes.read(min_chunk_size)
+
+        assert resp.headers["Tus-Upload-Finished"] == "1"
+
+    transaction = get_transaction_utility()
+
+    sub = await transaction.js.pull_subscribe(
+        const.Streams.INGEST.subject.format(partition="1"), "auto"
+    )
+    msgs = await sub.fetch(2)
+
+    writer = BrokerMessage()
+    writer.ParseFromString(msgs[1].data)
+    await msgs[1].ack()
+
+    path = resp.headers["ndb-field"]
+    field = path.split("/")[-1]
+    assert path.split("/")[-3] == resource
+    assert writer.uuid == resource
+    assert writer.basic.icon == "image/jpg"
+    assert writer.basic.title == ""
+    assert writer.files[field].language == "ca"
+    assert writer.files[field].file.size == len(raw_bytes)
+    assert writer.files[field].file.filename == "image.jpg"
+    assert writer.files[field].file.md5 == "7af0916dba8b70e29d99e72941923529"
+
+    storage = await get_storage()
+    data = await storage.downloadbytes(
+        bucket=writer.files[field].file.bucket_name,
+        key=writer.files[field].file.uri,
+    )
+    assert len(data.read()) == len(raw_bytes)
+
+
 @pytest.mark.asyncio
 async def test_multiple_tus_file_upload_tries(
     writer_api, knowledgebox_writer, resource

diff --git a/nucliadb/requirements.lock.txt b/nucliadb/requirements.lock.txt
@@ -86,7 +86,7 @@ PyJWT==2.8.0
 pyparsing==3.1.1
 python-dateutil==2.8.2
 PyYAML==6.0.1
-redis==5.0.1
+redis==5.0.0
 requests==2.31.0
 rsa==4.9
 sentry-sdk==1.40.3

diff --git a/nucliadb/requirements.txt b/nucliadb/requirements.txt
@@ -23,7 +23,7 @@ types-setuptools
 # pydantic 2 need a careful migration
 pydantic>=1.9.0,<2.0
 aiobotocore>=2.5.2
-botocore<1.30.0
+botocore>=1.34.0
 google-cloud-storage
 gcloud
 oauth2client