Skip to content

Commit

Permalink
Fix exists object storage logic (#2958)
Browse files Browse the repository at this point in the history
  • Loading branch information
lferran authored Mar 10, 2025
1 parent 37c331d commit ccd53e6
Show file tree
Hide file tree
Showing 6 changed files with 32 additions and 11 deletions.
3 changes: 1 addition & 2 deletions nucliadb/src/nucliadb/backups/create.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,6 @@
)
from nucliadb.backups.models import BackupMetadata, CreateBackupRequest
from nucliadb.backups.settings import settings
from nucliadb.backups.utils import exists_in_storge
from nucliadb.common import datamanagers
from nucliadb.common.context import ApplicationContext
from nucliadb.export_import.utils import (
Expand Down Expand Up @@ -261,7 +260,7 @@ async def delete_metadata(context: ApplicationContext, kbid: str, backup_id: str


async def exists_cf(context: ApplicationContext, cf: resources_pb2.CloudFile) -> bool:
return await exists_in_storge(context.blob_storage, cf.bucket_name, cf.uri)
return await context.blob_storage.exists_object(bucket=cf.bucket_name, key=cf.uri)


async def upload_to_bucket(context: ApplicationContext, bytes_iterator: AsyncIterator[bytes], key: str):
Expand Down
11 changes: 3 additions & 8 deletions nucliadb/src/nucliadb/backups/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,12 +24,7 @@


async def exists_backup(storage: Storage, backup_id: str) -> bool:
return await exists_in_storge(
storage, settings.backups_bucket, StorageKeys.BACKUP_PREFIX.format(backup_id=backup_id)
# As the labels file is always created, we use it to check if the backup exists
return await storage.exists_object(
settings.backups_bucket, StorageKeys.LABELS.format(backup_id=backup_id)
)


async def exists_in_storge(storage: Storage, bucket: str, key: str) -> bool:
async for _ in storage.iterate_objects(bucket=bucket, prefix=key):
return True
return False
5 changes: 4 additions & 1 deletion nucliadb_utils/src/nucliadb_utils/storages/azure.py
Original file line number Diff line number Diff line change
Expand Up @@ -141,7 +141,10 @@ async def exists(self) -> Optional[ObjectMetadata]:
bucket = self.bucket
else:
return None
return await self.storage.object_store.get_metadata(bucket, key)
try:
return await self.storage.object_store.get_metadata(bucket, key)
except ObjectNotFoundError:
return None

async def upload(self, iterator: AsyncIterator, origin: CloudFile) -> CloudFile:
self.field = await self.start(origin)
Expand Down
8 changes: 8 additions & 0 deletions nucliadb_utils/src/nucliadb_utils/storages/local.py
Original file line number Diff line number Diff line change
Expand Up @@ -308,3 +308,11 @@ async def insert_object(self, bucket: str, key: str, data: bytes) -> None:
os.makedirs(os.path.dirname(path), exist_ok=True)
with open(path, "wb") as file:
file.write(data)
metadata_path = f"{path}.metadata"
metadata = ObjectMetadata(
filename=key.split("/")[-1],
content_type="application/octet-stream",
size=len(data),
)
with open(metadata_path, "w") as file:
file.write(json.dumps(metadata.model_dump()))
4 changes: 4 additions & 0 deletions nucliadb_utils/src/nucliadb_utils/storages/storage.py
Original file line number Diff line number Diff line change
Expand Up @@ -332,6 +332,10 @@ def file_field(
key = KB_RESOURCE_FIELD.format(kbid=kbid, uuid=uuid, field=field)
return self.field_klass(storage=self, bucket=bucket, fullkey=key, field=old_field)

async def exists_object(self, bucket: str, key: str) -> bool:
sf: StorageField = self.field_klass(storage=self, bucket=bucket, fullkey=key)
return await sf.exists() is not None

def file_extracted(
self, kbid: str, uuid: str, field_type: str, field: str, key: str
) -> StorageField:
Expand Down
12 changes: 12 additions & 0 deletions nucliadb_utils/tests/integration/storages/test_storage.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,7 @@ async def storage_test(storage: Storage):
names.append(object_info.name)
assert names == [key1]

await _test_exists_object(storage)
await _test_iterate_objects(storage)

# Check insert object
Expand All @@ -117,6 +118,17 @@ async def storage_test(storage: Storage):
await storage.delete_kb(kbid2)


async def _test_exists_object(storage: Storage):
bucket = "existtest"
await storage.create_bucket(bucket)

assert await storage.exists_object(bucket, "foo") is False

await storage.upload_object(bucket, "foo", b"bar")

assert await storage.exists_object(bucket, "foo") is True


async def _test_iterate_objects(storage: Storage):
bucket = "itertest"
await storage.create_bucket(bucket)
Expand Down

0 comments on commit ccd53e6

Please sign in to comment.