Skip to content

Commit

Permalink
url generation
Browse files Browse the repository at this point in the history
  • Loading branch information
javitonino committed Feb 13, 2024
1 parent dde6bca commit 6c7db47
Show file tree
Hide file tree
Showing 8 changed files with 105 additions and 10 deletions.
2 changes: 1 addition & 1 deletion nucliadb/nucliadb/ingest/tests/fixtures.py
Original file line number Diff line number Diff line change
Expand Up @@ -251,7 +251,7 @@ async def indexing_utility_ingest(natsd):


@pytest.fixture(scope="function")
async def _natsd_reset(natsd, event_loop):
async def _natsd_reset(natsd):
nc = await nats.connect(servers=[natsd])
js = nc.jetstream()
try:
Expand Down
2 changes: 1 addition & 1 deletion nucliadb/nucliadb/reader/tests/fixtures.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ def test_settings_reader(cache, gcs, fake_node, maindb_driver): # type: ignore


@pytest.fixture(scope="function")
async def reader_api(test_settings_reader: None, local_files, event_loop): # type: ignore
async def reader_api(test_settings_reader: None, local_files): # type: ignore
from nucliadb.reader.app import create_application

application = create_application()
Expand Down
2 changes: 1 addition & 1 deletion nucliadb/nucliadb/train/tests/fixtures.py
Original file line number Diff line number Diff line change
Expand Up @@ -321,7 +321,7 @@ def test_settings_train(cache, gcs, fake_node, maindb_driver): # type: ignore


@pytest.fixture(scope="function")
async def train_api(test_settings_train: None, local_files, event_loop): # type: ignore
async def train_api(test_settings_train: None, local_files): # type: ignore
from nucliadb.train.utils import start_train_grpc, stop_train_grpc

await start_shard_manager()
Expand Down
9 changes: 6 additions & 3 deletions nucliadb/nucliadb/writer/api/v1/upload.py
Original file line number Diff line number Diff line change
Expand Up @@ -294,7 +294,10 @@ async def _tus_post(
await storage_manager.start(dm, path=path, kbid=kbid)
await dm.save()

location = api.url_path_for("Upload information", kbid=kbid, upload_id=upload_id)
# Find the URL for upload, with the same parameter as this call
location = api.url_path_for(
"Upload information", upload_id=upload_id, **request.path_params
)
return Response(
status_code=201,
headers={
Expand Down Expand Up @@ -325,7 +328,7 @@ async def tus_head_rslug_prefix(


@api.head(
f"/{KB_PREFIX}/{{kbid}}/{RESOURCE_PREFIX}/{{rid}}/file/{{field}}/{TUSUPLOAD}/{{upload_id}}",
f"/{KB_PREFIX}/{{kbid}}/{RESOURCE_PREFIX}/{{path_rid}}/file/{{field}}/{TUSUPLOAD}/{{upload_id}}",
tags=["Resource field TUS uploads"],
status_code=200,
openapi_extra={"x-operation-order": 3},
Expand All @@ -336,7 +339,7 @@ async def tus_head_rslug_prefix(
async def tus_head_rid_prefix(
request: Request,
kbid: str,
rid: str,
path_rid: str,
field: str,
upload_id: str,
) -> Response:
Expand Down
1 change: 0 additions & 1 deletion nucliadb/nucliadb/writer/tests/fixtures.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,6 @@ async def writer_api(
transaction_utility,
processing_utility,
tus_manager,
event_loop,
) -> AsyncIterator[Callable[[list[Enum], str, str], AsyncClient]]:
nucliadb_settings.nucliadb_ingest = grpc_servicer.host
from nucliadb.writer.app import create_application
Expand Down
95 changes: 94 additions & 1 deletion nucliadb/nucliadb/writer/tests/test_files.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@
from nucliadb_protos.resources_pb2 import FieldType
from nucliadb_protos.writer_pb2 import BrokerMessage, ResourceFieldId

from nucliadb.writer.api.v1.router import KB_PREFIX, RSLUG_PREFIX
from nucliadb.writer.api.v1.router import KB_PREFIX, RESOURCE_PREFIX, RSLUG_PREFIX
from nucliadb.writer.api.v1.upload import maybe_b64decode
from nucliadb.writer.tus import TUSUPLOAD, UPLOAD, get_storage_manager
from nucliadb_models.resource import NucliaDBRoles
Expand Down Expand Up @@ -535,6 +535,99 @@ async def test_file_tus_upload_field_by_slug(writer_api, knowledgebox_writer, re
assert len(data.read()) == len(raw_bytes)


@pytest.mark.asyncio
async def test_file_tus_upload_field_by_resource_id(
writer_api, knowledgebox_writer, resource
):
kb = knowledgebox_writer

async with writer_api(roles=[NucliaDBRoles.WRITER]) as client:
language = base64.b64encode(b"ca").decode()
filename = base64.b64encode(b"image.jpg").decode()
md5 = base64.b64encode(b"7af0916dba8b70e29d99e72941923529").decode()
headers = {
"tus-resumable": "1.0.0",
"upload-metadata": f"filename {filename},language {language},md5 {md5}",
"content-type": "image/jpg",
"upload-defer-length": "1",
}

resp = await client.post(
f"/{KB_PREFIX}/{kb}/resource/idonotexist/file/field1/{TUSUPLOAD}",
headers=headers,
)
assert resp.status_code == 404

resp = await client.post(
f"/{KB_PREFIX}/{kb}/resource/{resource}/file/field1/{TUSUPLOAD}",
headers=headers,
)
assert resp.status_code == 201
url = resp.headers["location"]

# Check that we are using the slug for the whole file upload
assert f"{RESOURCE_PREFIX}/{resource}" in url

offset = 0
min_chunk_size = get_storage_manager().min_upload_size
raw_bytes = b"x" * min_chunk_size + b"y" * 500
io_bytes = io.BytesIO(raw_bytes)
data = io_bytes.read(min_chunk_size)
while data != b"":
resp = await client.head(url)

assert resp.headers["Upload-Length"] == f"0"
assert resp.headers["Upload-Offset"] == f"{offset}"

headers = {
"upload-offset": f"{offset}",
"content-length": f"{len(data)}",
}
is_last_chunk = len(data) < min_chunk_size
if is_last_chunk:
headers["upload-length"] = f"{offset + len(data)}"

resp = await client.patch(
url,
content=data,
headers=headers,
)
assert resp.status_code == 200
offset += len(data)
data = io_bytes.read(min_chunk_size)

assert resp.headers["Tus-Upload-Finished"] == "1"

transaction = get_transaction_utility()

sub = await transaction.js.pull_subscribe(
const.Streams.INGEST.subject.format(partition="1"), "auto"
)
msgs = await sub.fetch(2)

writer = BrokerMessage()
writer.ParseFromString(msgs[1].data)
await msgs[1].ack()

path = resp.headers["ndb-field"]
field = path.split("/")[-1]
assert path.split("/")[-3] == resource
assert writer.uuid == resource
assert writer.basic.icon == "image/jpg"
assert writer.basic.title == ""
assert writer.files[field].language == "ca"
assert writer.files[field].file.size == len(raw_bytes)
assert writer.files[field].file.filename == "image.jpg"
assert writer.files[field].file.md5 == "7af0916dba8b70e29d99e72941923529"

storage = await get_storage()
data = await storage.downloadbytes(
bucket=writer.files[field].file.bucket_name,
key=writer.files[field].file.uri,
)
assert len(data.read()) == len(raw_bytes)


@pytest.mark.asyncio
async def test_multiple_tus_file_upload_tries(
writer_api, knowledgebox_writer, resource
Expand Down
2 changes: 1 addition & 1 deletion nucliadb/requirements.lock.txt
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ PyJWT==2.8.0
pyparsing==3.1.1
python-dateutil==2.8.2
PyYAML==6.0.1
redis==5.0.1
redis==5.0.0
requests==2.31.0
rsa==4.9
sentry-sdk==1.40.3
Expand Down
2 changes: 1 addition & 1 deletion nucliadb/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ types-setuptools
# pydantic 2 need a careful migration
pydantic>=1.9.0,<2.0
aiobotocore>=2.5.2
botocore<1.30.0
botocore>=1.34.0
google-cloud-storage
gcloud
oauth2client
Expand Down

3 comments on commit 6c7db47

@github-actions
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Benchmark

Benchmark suite Current: 6c7db47 Previous: d4afd82 Ratio
nucliadb/search/tests/unit/search/test_fetch.py::test_highligh_error 13364.45423170066 iter/sec (stddev: 6.954242229166724e-7) 13028.533525895236 iter/sec (stddev: 4.192637045977425e-7) 0.97

This comment was automatically generated by workflow using github-action-benchmark.

@github-actions
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Benchmark

Benchmark suite Current: 6c7db47 Previous: d4afd82 Ratio
nucliadb/search/tests/unit/search/test_fetch.py::test_highligh_error 13256.006636055266 iter/sec (stddev: 2.1444538218864e-7) 13028.533525895236 iter/sec (stddev: 4.192637045977425e-7) 0.98

This comment was automatically generated by workflow using github-action-benchmark.

@github-actions
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Benchmark

Benchmark suite Current: 6c7db47 Previous: d4afd82 Ratio
nucliadb/search/tests/unit/search/test_fetch.py::test_highligh_error 12677.517605741306 iter/sec (stddev: 1.8345230403151718e-7) 13028.533525895236 iter/sec (stddev: 4.192637045977425e-7) 1.03

This comment was automatically generated by workflow using github-action-benchmark.

Please sign in to comment.