Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix exists object storage logic #2958

Merged
merged 4 commits into from
Mar 10, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 1 addition & 2 deletions nucliadb/src/nucliadb/backups/create.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,6 @@
)
from nucliadb.backups.models import BackupMetadata, CreateBackupRequest
from nucliadb.backups.settings import settings
from nucliadb.backups.utils import exists_in_storge
from nucliadb.common import datamanagers
from nucliadb.common.context import ApplicationContext
from nucliadb.export_import.utils import (
Expand Down Expand Up @@ -261,7 +260,7 @@ async def delete_metadata(context: ApplicationContext, kbid: str, backup_id: str


async def exists_cf(context: ApplicationContext, cf: resources_pb2.CloudFile) -> bool:
return await exists_in_storge(context.blob_storage, cf.bucket_name, cf.uri)
return await context.blob_storage.exists_object(bucket=cf.bucket_name, key=cf.uri)


async def upload_to_bucket(context: ApplicationContext, bytes_iterator: AsyncIterator[bytes], key: str):
Expand Down
11 changes: 3 additions & 8 deletions nucliadb/src/nucliadb/backups/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,12 +24,7 @@


async def exists_backup(storage: Storage, backup_id: str) -> bool:
return await exists_in_storge(
storage, settings.backups_bucket, StorageKeys.BACKUP_PREFIX.format(backup_id=backup_id)
# As the labels file is always created, we use it to check if the backup exists
return await storage.exists_object(
settings.backups_bucket, StorageKeys.LABELS.format(backup_id=backup_id)
)


async def exists_in_storge(storage: Storage, bucket: str, key: str) -> bool:
async for _ in storage.iterate_objects(bucket=bucket, prefix=key):
return True
return False
5 changes: 4 additions & 1 deletion nucliadb_utils/src/nucliadb_utils/storages/azure.py
Original file line number Diff line number Diff line change
Expand Up @@ -141,7 +141,10 @@ async def exists(self) -> Optional[ObjectMetadata]:
bucket = self.bucket
else:
return None
return await self.storage.object_store.get_metadata(bucket, key)
try:
return await self.storage.object_store.get_metadata(bucket, key)
except ObjectNotFoundError:
return None

async def upload(self, iterator: AsyncIterator, origin: CloudFile) -> CloudFile:
self.field = await self.start(origin)
Expand Down
8 changes: 8 additions & 0 deletions nucliadb_utils/src/nucliadb_utils/storages/local.py
Original file line number Diff line number Diff line change
Expand Up @@ -308,3 +308,11 @@ async def insert_object(self, bucket: str, key: str, data: bytes) -> None:
os.makedirs(os.path.dirname(path), exist_ok=True)
with open(path, "wb") as file:
file.write(data)
metadata_path = f"{path}.metadata"
metadata = ObjectMetadata(
filename=key.split("/")[-1],
content_type="application/octet-stream",
size=len(data),
)
with open(metadata_path, "w") as file:
file.write(json.dumps(metadata.model_dump()))
4 changes: 4 additions & 0 deletions nucliadb_utils/src/nucliadb_utils/storages/storage.py
Original file line number Diff line number Diff line change
Expand Up @@ -332,6 +332,10 @@ def file_field(
key = KB_RESOURCE_FIELD.format(kbid=kbid, uuid=uuid, field=field)
return self.field_klass(storage=self, bucket=bucket, fullkey=key, field=old_field)

async def exists_object(self, bucket: str, key: str) -> bool:
sf: StorageField = self.field_klass(storage=self, bucket=bucket, fullkey=key)
return await sf.exists() is not None

def file_extracted(
self, kbid: str, uuid: str, field_type: str, field: str, key: str
) -> StorageField:
Expand Down
12 changes: 12 additions & 0 deletions nucliadb_utils/tests/integration/storages/test_storage.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,7 @@ async def storage_test(storage: Storage):
names.append(object_info.name)
assert names == [key1]

await _test_exists_object(storage)
await _test_iterate_objects(storage)

# Check insert object
Expand All @@ -117,6 +118,17 @@ async def storage_test(storage: Storage):
await storage.delete_kb(kbid2)


async def _test_exists_object(storage: Storage):
bucket = "existtest"
await storage.create_bucket(bucket)

assert await storage.exists_object(bucket, "foo") is False

await storage.upload_object(bucket, "foo", b"bar")

assert await storage.exists_object(bucket, "foo") is True


async def _test_iterate_objects(storage: Storage):
bucket = "itertest"
await storage.create_bucket(bucket)
Expand Down
Loading