diff --git a/nucliadb/src/nucliadb/backups/create.py b/nucliadb/src/nucliadb/backups/create.py index 779de12e26..4b48181de3 100644 --- a/nucliadb/src/nucliadb/backups/create.py +++ b/nucliadb/src/nucliadb/backups/create.py @@ -30,7 +30,6 @@ ) from nucliadb.backups.models import BackupMetadata, CreateBackupRequest from nucliadb.backups.settings import settings -from nucliadb.backups.utils import exists_in_storge from nucliadb.common import datamanagers from nucliadb.common.context import ApplicationContext from nucliadb.export_import.utils import ( @@ -261,7 +260,7 @@ async def delete_metadata(context: ApplicationContext, kbid: str, backup_id: str async def exists_cf(context: ApplicationContext, cf: resources_pb2.CloudFile) -> bool: - return await exists_in_storge(context.blob_storage, cf.bucket_name, cf.uri) + return await context.blob_storage.exists_object(bucket=cf.bucket_name, key=cf.uri) async def upload_to_bucket(context: ApplicationContext, bytes_iterator: AsyncIterator[bytes], key: str): diff --git a/nucliadb/src/nucliadb/backups/utils.py b/nucliadb/src/nucliadb/backups/utils.py index 847b26695c..7c25c16cd5 100644 --- a/nucliadb/src/nucliadb/backups/utils.py +++ b/nucliadb/src/nucliadb/backups/utils.py @@ -24,12 +24,7 @@ async def exists_backup(storage: Storage, backup_id: str) -> bool: - return await exists_in_storge( - storage, settings.backups_bucket, StorageKeys.BACKUP_PREFIX.format(backup_id=backup_id) + # As the labels file is always created, we use it to check if the backup exists + return await storage.exists_object( + settings.backups_bucket, StorageKeys.LABELS.format(backup_id=backup_id) ) - - -async def exists_in_storge(storage: Storage, bucket: str, key: str) -> bool: - async for _ in storage.iterate_objects(bucket=bucket, prefix=key): - return True - return False diff --git a/nucliadb_utils/src/nucliadb_utils/storages/azure.py b/nucliadb_utils/src/nucliadb_utils/storages/azure.py index 8284fdfbd4..5183068a5c 100644 --- a/nucliadb_utils/src/nucliadb_utils/storages/azure.py +++ b/nucliadb_utils/src/nucliadb_utils/storages/azure.py @@ -141,7 +141,10 @@ async def exists(self) -> Optional[ObjectMetadata]: bucket = self.bucket else: return None - return await self.storage.object_store.get_metadata(bucket, key) + try: + return await self.storage.object_store.get_metadata(bucket, key) + except ObjectNotFoundError: + return None async def upload(self, iterator: AsyncIterator, origin: CloudFile) -> CloudFile: self.field = await self.start(origin) diff --git a/nucliadb_utils/src/nucliadb_utils/storages/local.py b/nucliadb_utils/src/nucliadb_utils/storages/local.py index 86900f018e..5640ca96b6 100644 --- a/nucliadb_utils/src/nucliadb_utils/storages/local.py +++ b/nucliadb_utils/src/nucliadb_utils/storages/local.py @@ -308,3 +308,11 @@ async def insert_object(self, bucket: str, key: str, data: bytes) -> None: os.makedirs(os.path.dirname(path), exist_ok=True) with open(path, "wb") as file: file.write(data) + metadata_path = f"{path}.metadata" + metadata = ObjectMetadata( + filename=key.split("/")[-1], + content_type="application/octet-stream", + size=len(data), + ) + with open(metadata_path, "w") as file: + file.write(json.dumps(metadata.model_dump())) diff --git a/nucliadb_utils/src/nucliadb_utils/storages/storage.py b/nucliadb_utils/src/nucliadb_utils/storages/storage.py index 53864cb834..a30a814bff 100644 --- a/nucliadb_utils/src/nucliadb_utils/storages/storage.py +++ b/nucliadb_utils/src/nucliadb_utils/storages/storage.py @@ -332,6 +332,10 @@ def file_field( key = KB_RESOURCE_FIELD.format(kbid=kbid, uuid=uuid, field=field) return self.field_klass(storage=self, bucket=bucket, fullkey=key, field=old_field) + async def exists_object(self, bucket: str, key: str) -> bool: + sf: StorageField = self.field_klass(storage=self, bucket=bucket, fullkey=key) + return await sf.exists() is not None + def file_extracted( self, kbid: str, uuid: str, field_type: str, field: str, key: str ) -> StorageField: diff --git a/nucliadb_utils/tests/integration/storages/test_storage.py b/nucliadb_utils/tests/integration/storages/test_storage.py index d2116d6136..faeaef2a86 100644 --- a/nucliadb_utils/tests/integration/storages/test_storage.py +++ b/nucliadb_utils/tests/integration/storages/test_storage.py @@ -98,6 +98,7 @@ async def storage_test(storage: Storage): names.append(object_info.name) assert names == [key1] + await _test_exists_object(storage) await _test_iterate_objects(storage) # Check insert object @@ -117,6 +118,17 @@ async def storage_test(storage: Storage): await storage.delete_kb(kbid2) +async def _test_exists_object(storage: Storage): + bucket = "existtest" + await storage.create_bucket(bucket) + + assert await storage.exists_object(bucket, "foo") is False + + await storage.upload_object(bucket, "foo", b"bar") + + assert await storage.exists_object(bucket, "foo") is True + + async def _test_iterate_objects(storage: Storage): bucket = "itertest" await storage.create_bucket(bucket)