From 1fe9d0afe6223ff81e9f8605e5435698f3a8253a Mon Sep 17 00:00:00 2001 From: Ferran Llamas Date: Tue, 18 Jun 2024 09:12:28 +0200 Subject: [PATCH] Azure blob storage connection tweaks (#2252) --- nucliadb/src/nucliadb/ingest/processing.py | 43 ++++-- nucliadb/src/nucliadb/search/predict.py | 6 +- .../src/nucliadb/search/search/chat/prompt.py | 30 ++-- nucliadb/src/nucliadb/writer/tus/__init__.py | 9 +- nucliadb/src/nucliadb/writer/tus/azure.py | 6 +- nucliadb/tests/fixtures.py | 1 + nucliadb_node/src/settings.rs | 13 +- nucliadb_utils/requirements-storages.txt | 1 + nucliadb_utils/src/nucliadb_utils/settings.py | 9 +- .../src/nucliadb_utils/storages/azure.py | 17 ++- .../src/nucliadb_utils/tests/azure.py | 3 + .../src/nucliadb_utils/utilities.py | 5 +- pdm.lock | 129 ++++++++++++------ pyproject.toml | 1 + 14 files changed, 178 insertions(+), 95 deletions(-) diff --git a/nucliadb/src/nucliadb/ingest/processing.py b/nucliadb/src/nucliadb/ingest/processing.py index 8280d548f8..7212632fda 100644 --- a/nucliadb/src/nucliadb/ingest/processing.py +++ b/nucliadb/src/nucliadb/ingest/processing.py @@ -37,7 +37,12 @@ from nucliadb_protos.resources_pb2 import FieldFile as FieldFilePB from nucliadb_telemetry import metrics from nucliadb_utils.exceptions import LimitsExceededError, SendToProcessError -from nucliadb_utils.settings import FileBackendConfig, nuclia_settings, storage_settings +from nucliadb_utils.settings import ( + FileBackendConfig, + is_onprem_nucliadb, + nuclia_settings, + storage_settings, +) from nucliadb_utils.storages.storage import Storage from nucliadb_utils.utilities import Utility, set_utility @@ -129,22 +134,34 @@ async def start_processing_engine(): set_utility(Utility.PROCESSING, processing_engine) -def to_processing_driver_type(file_backend_driver: FileBackendConfig) -> int: +class ProcessingDriverType(Enum): + # XXX IMPORTANT XXX: Make sure the values are in sync with + # the ones defined in nuclia/learning/processing repository + GCS = 0 + S3 = 1 + LOCAL = 2 + + +def to_processing_driver_type(file_backend_driver: FileBackendConfig) -> ProcessingDriverType: """ Outputs a nuclia-internal backend driver identifier that is used by processing to store the blobs of processed metadata in the right bucket folder. """ - if file_backend_driver == FileBackendConfig.GCS: - return 0 - elif file_backend_driver == FileBackendConfig.S3: - return 1 - elif file_backend_driver == FileBackendConfig.LOCAL: - return 2 - else: + if is_onprem_nucliadb(): + # On-prem installations are always regarded as local storage from the processing perspective, + # as Nuclia processing engine will not have direct access to the storage. + return ProcessingDriverType.LOCAL + + try: + return { + FileBackendConfig.GCS: ProcessingDriverType.GCS, + FileBackendConfig.S3: ProcessingDriverType.S3, + }[file_backend_driver] + except KeyError: logger.error( f"Not a valid file backend driver to processing, fallback to local: {file_backend_driver}" ) - return 2 + return ProcessingDriverType.LOCAL class ProcessingEngine: @@ -180,7 +197,7 @@ def __init__( self.nuclia_jwt_key = nuclia_jwt_key self.days_to_keep = days_to_keep - self.driver = to_processing_driver_type(driver) + self.driver: ProcessingDriverType = to_processing_driver_type(driver) self._exit_stack = AsyncExitStack() async def initialize(self): @@ -203,7 +220,7 @@ def generate_file_token_from_cloudfile(self, cf: CloudFile) -> str: "iat": now, "md5": cf.md5, "source": 1, # To indicate that this files comes internally - "driver": self.driver, + "driver": self.driver.value, "jti": uuid.uuid4().hex, "bucket_name": cf.bucket_name, "filename": cf.filename, @@ -227,7 +244,7 @@ def generate_file_token_from_fieldfile(self, file: FieldFilePB) -> str: "iat": now, "md5": file.file.md5, "source": 1, # To indicate that this files comes internally - "driver": self.driver, + "driver": self.driver.value, "jti": uuid.uuid4().hex, "bucket_name": file.file.bucket_name, "filename": file.file.filename, diff --git a/nucliadb/src/nucliadb/search/predict.py b/nucliadb/src/nucliadb/search/predict.py index e6fc161284..3d3eb730c0 100644 --- a/nucliadb/src/nucliadb/search/predict.py +++ b/nucliadb/src/nucliadb/search/predict.py @@ -244,7 +244,6 @@ async def check_response(self, resp: aiohttp.ClientResponse, expected_status: in if resp.status == 402: data = await resp.json() raise LimitsExceededError(402, data["detail"]) - try: data = await resp.json() try: @@ -256,7 +255,10 @@ async def check_response(self, resp: aiohttp.ClientResponse, expected_status: in aiohttp.client_exceptions.ContentTypeError, ): detail = await resp.text() - logger.error(f"Predict API error at {resp.url}: {detail}") + if str(resp.status).startswith("5"): + logger.error(f"Predict API error at {resp.url}: {detail}") + else: + logger.info(f"Predict API error at {resp.url}: {detail}") raise ProxiedPredictAPIError(status=resp.status, detail=detail) @backoff.on_exception( diff --git a/nucliadb/src/nucliadb/search/search/chat/prompt.py b/nucliadb/src/nucliadb/search/search/chat/prompt.py index b317641f41..b8341af94c 100644 --- a/nucliadb/src/nucliadb/search/search/chat/prompt.py +++ b/nucliadb/src/nucliadb/search/search/chat/prompt.py @@ -379,9 +379,7 @@ async def build( context = ccontext.output context_images = ccontext.images - context_order = { - text_block_id: order for order, text_block_id in enumerate(context.keys()) - } + context_order = {text_block_id: order for order, text_block_id in enumerate(context.keys())} return context, context_order, context_images async def _build_context_images(self, context: CappedPromptContext) -> None: @@ -401,27 +399,17 @@ async def _build_context_images(self, context: CappedPromptContext) -> None: for paragraph in self.ordered_paragraphs: if paragraph.page_with_visual and paragraph.position: - if ( - gather_pages - and paragraph.position.page_number - and len(context.images) < page_count - ): + if gather_pages and paragraph.position.page_number and len(context.images) < page_count: field = "/".join(paragraph.id.split("/")[:3]) page = paragraph.position.page_number page_id = f"{field}/{page}" if page_id not in context.images: - context.images[page_id] = await get_page_image( - self.kbid, paragraph.id, page - ) + context.images[page_id] = await get_page_image(self.kbid, paragraph.id, page) # Only send tables if enabled by strategy, by default, send paragraph images - send_images = ( - gather_tables and paragraph.is_a_table - ) or not paragraph.is_a_table + send_images = (gather_tables and paragraph.is_a_table) or not paragraph.is_a_table if send_images and paragraph.reference and paragraph.reference != "": image = paragraph.reference - context.images[paragraph.id] = await get_paragraph_image( - self.kbid, paragraph.id, image - ) + context.images[paragraph.id] = await get_paragraph_image(self.kbid, paragraph.id, image) async def _build_context(self, context: CappedPromptContext) -> None: if self.strategies is None or len(self.strategies) == 0: @@ -473,9 +461,7 @@ class ExtraCharsParagraph: paragraphs: List[Tuple[FindParagraph, str]] -async def get_extra_chars( - kbid: str, ordered_paragraphs: list[FindParagraph], distance: int -): +async def get_extra_chars(kbid: str, ordered_paragraphs: list[FindParagraph], distance: int): etcache = paragraphs.ExtractedTextCache() resources: Dict[str, ExtraCharsParagraph] = {} for paragraph in ordered_paragraphs: @@ -531,7 +517,9 @@ async def get_extra_chars( paragraph.text = "" if first_paragraph is not None: - first_paragraph.text = f"DOCUMENT: {title_text} \n SUMMARY: {summary_text} \n RESOURCE CONTENT: {text}" + first_paragraph.text = ( + f"DOCUMENT: {title_text} \n SUMMARY: {summary_text} \n RESOURCE CONTENT: {text}" + ) def _clean_paragraph_text(paragraph: FindParagraph) -> str: diff --git a/nucliadb/src/nucliadb/writer/tus/__init__.py b/nucliadb/src/nucliadb/writer/tus/__init__.py index fe2bd0fd2f..596a278f80 100644 --- a/nucliadb/src/nucliadb/writer/tus/__init__.py +++ b/nucliadb/src/nucliadb/writer/tus/__init__.py @@ -92,11 +92,14 @@ async def initialize(): DRIVER = TusStorageDriver(backend=storage_backend, manager=storage_manager) elif storage_settings.file_backend == FileBackendConfig.AZURE: - if storage_settings.azure_connection_string is None: - raise ConfigurationError("AZURE_CONNECTION_STRING env variable not configured") + if storage_settings.azure_account_url is None: + raise ConfigurationError("AZURE_ACCOUNT_URL env variable not configured") storage_backend = AzureBlobStore() - await storage_backend.initialize(storage_settings.azure_connection_string) + await storage_backend.initialize( + storage_settings.azure_account_url, + connection_string=storage_settings.azure_connection_string, + ) storage_manager = AzureFileStorageManager(storage_backend) DRIVER = TusStorageDriver(backend=storage_backend, manager=storage_manager) diff --git a/nucliadb/src/nucliadb/writer/tus/azure.py b/nucliadb/src/nucliadb/writer/tus/azure.py index 138da68fc6..c384184582 100644 --- a/nucliadb/src/nucliadb/writer/tus/azure.py +++ b/nucliadb/src/nucliadb/writer/tus/azure.py @@ -19,6 +19,8 @@ # from __future__ import annotations +from typing import Optional + from nucliadb.writer import logger from nucliadb.writer.tus.dm import FileDataManager from nucliadb.writer.tus.storage import BlobStore, FileStorageManager @@ -39,10 +41,10 @@ async def finalize(self): logger.exception("Error closing AzureBlobStore") self._object_store = None - async def initialize(self, connection_string: str): + async def initialize(self, account_url: str, connection_string: Optional[str] = None): self.bucket = "nucliadb-{kbid}" self.source = CloudFile.Source.AZURE - self._object_store = AzureObjectStore(connection_string) + self._object_store = AzureObjectStore(account_url, connection_string=connection_string) await self._object_store.initialize() @property diff --git a/nucliadb/tests/fixtures.py b/nucliadb/tests/fixtures.py index ff9ed65276..6848930073 100644 --- a/nucliadb/tests/fixtures.py +++ b/nucliadb/tests/fixtures.py @@ -675,6 +675,7 @@ def local_storage_settings(tmpdir): def azure_storage_settings(azurite: AzuriteFixture): return { "file_backend": FileBackendConfig.AZURE, + "azure_account_url": azurite.account_url, "azure_connection_string": azurite.connection_string, } diff --git a/nucliadb_node/src/settings.rs b/nucliadb_node/src/settings.rs index 2ce2bb1a11..9812c9e89f 100644 --- a/nucliadb_node/src/settings.rs +++ b/nucliadb_node/src/settings.rs @@ -134,8 +134,9 @@ pub fn build_object_store_driver(settings: &EnvSettings) -> Arc Arc::new(builder.build().unwrap()) } ObjectStoreType::AZURE => { - let builder = - MicrosoftAzureBuilder::new().with_allow_http(true).with_url(settings.azure_url.clone().unwrap()); + let builder = MicrosoftAzureBuilder::new() + .with_allow_http(true) + .with_url(settings.azure_account_url.clone().unwrap()); Arc::new(builder.build().unwrap()) } // Any other type is not supported for now @@ -250,7 +251,7 @@ pub struct EnvSettings { pub s3_region_name: String, pub s3_indexing_bucket: String, pub s3_endpoint: Option, - pub azure_url: Option, + pub azure_account_url: Option, } impl EnvSettings { @@ -331,7 +332,7 @@ impl Default for EnvSettings { s3_region_name: Default::default(), s3_indexing_bucket: Default::default(), s3_endpoint: None, - azure_url: Default::default(), + azure_account_url: Default::default(), } } } @@ -366,8 +367,8 @@ mod tests { let settings = from_pairs(&[("FILE_BACKEND", "s3")]).unwrap(); assert_eq!(settings.file_backend, super::ObjectStoreType::S3); - let azure_url = "https://myaccount.blob.core.windows.net/mycontainer/myblob"; - let settings = from_pairs(&[("FILE_BACKEND", "azure"), ("azure_url", azure_url)]).unwrap(); + let azure_account_url = "https://myaccount.blob.core.windows.net/mycontainer/myblob"; + let settings = from_pairs(&[("FILE_BACKEND", "azure"), ("azure_account_url", azure_account_url)]).unwrap(); assert_eq!(settings.file_backend, super::ObjectStoreType::AZURE); let settings = from_pairs(&[("FILE_BACKEND", "unknown")]).unwrap(); diff --git a/nucliadb_utils/requirements-storages.txt b/nucliadb_utils/requirements-storages.txt index e801eee4c4..ded2b7b69e 100644 --- a/nucliadb_utils/requirements-storages.txt +++ b/nucliadb_utils/requirements-storages.txt @@ -6,3 +6,4 @@ aiofiles>=0.8.0 backoff>=1.11.1 google-auth>=2.4.1 azure-storage-blob>=12.20.0 +azure-identity>=1.16.1 \ No newline at end of file diff --git a/nucliadb_utils/src/nucliadb_utils/settings.py b/nucliadb_utils/src/nucliadb_utils/settings.py index e8ad573a46..46cea9e76f 100644 --- a/nucliadb_utils/src/nucliadb_utils/settings.py +++ b/nucliadb_utils/src/nucliadb_utils/settings.py @@ -114,11 +114,16 @@ class StorageSettings(BaseSettings): description="Number of days that uploaded files are kept in Nulia's processing engine", ) - azure_connection_string: Optional[str] = Field( + azure_account_url: Optional[str] = Field( default=None, - description="Azure Storage connection string: https://docs.microsoft.com/en-us/azure/storage/common/storage-configure-connection-string", # noqa + description="Azure Account URL. The driver implementation uses Azure's default credential authentication method: https://learn.microsoft.com/en-us/python/api/azure-identity/azure.identity.defaultazurecredential?view=azure-python", # noqa + examples=["https://.blob.core.windows.net"], ) + # For testing purposes: Azurite docker image requires a connection string as it + # doesn't support Azure's default credential authentication method + azure_connection_string: Optional[str] = None + storage_settings = StorageSettings() diff --git a/nucliadb_utils/src/nucliadb_utils/storages/azure.py b/nucliadb_utils/src/nucliadb_utils/storages/azure.py index 867a867831..417c2fce47 100644 --- a/nucliadb_utils/src/nucliadb_utils/storages/azure.py +++ b/nucliadb_utils/src/nucliadb_utils/storages/azure.py @@ -25,6 +25,7 @@ from typing import AsyncGenerator, AsyncIterator, Optional, Union from azure.core.exceptions import ResourceExistsError, ResourceNotFoundError +from azure.identity import DefaultAzureCredential from azure.storage.blob import BlobProperties, BlobType, ContentSettings from azure.storage.blob.aio import BlobServiceClient @@ -161,11 +162,12 @@ class AzureStorage(Storage): def __init__( self, - connection_string: str, + account_url: str, deadletter_bucket: str = "deadletter", indexing_bucket: str = "indexing", + connection_string: Optional[str] = None, ): - self.object_store = AzureObjectStore(connection_string) + self.object_store = AzureObjectStore(account_url, connection_string=connection_string) self.deadletter_bucket = deadletter_bucket self.indexing_bucket = indexing_bucket @@ -215,7 +217,8 @@ async def iterate_objects(self, bucket: str, prefix: str) -> AsyncGenerator[Obje class AzureObjectStore(ObjectStore): - def __init__(self, connection_string: str): + def __init__(self, account_url: str, connection_string: Optional[str] = None): + self.account_url = account_url self.connection_string = connection_string self._service_client: Optional[BlobServiceClient] = None @@ -226,7 +229,13 @@ def service_client(self) -> BlobServiceClient: return self._service_client async def initialize(self): - self._service_client = BlobServiceClient.from_connection_string(self.connection_string) + if self.connection_string: + # For testing purposes + self._service_client = BlobServiceClient.from_connection_string(self.connection_string) + else: + self._service_client = BlobServiceClient( + self.account_url, credential=DefaultAzureCredential() + ) async def finalize(self): try: diff --git a/nucliadb_utils/src/nucliadb_utils/tests/azure.py b/nucliadb_utils/src/nucliadb_utils/tests/azure.py index 672f6e47fd..2998bd817e 100644 --- a/nucliadb_utils/src/nucliadb_utils/tests/azure.py +++ b/nucliadb_utils/src/nucliadb_utils/tests/azure.py @@ -74,6 +74,7 @@ class AzuriteFixture: port: int container: BaseImage connection_string: str + account_url: str def get_connection_string(host, port) -> str: @@ -99,6 +100,7 @@ def azurite() -> Generator[AzuriteFixture, None, None]: port=port, container=container.container_obj, connection_string=get_connection_string(host, port), + account_url=f"http://{host}:{port}/devstoreaccount1", ) finally: container.stop() @@ -107,6 +109,7 @@ def azurite() -> Generator[AzuriteFixture, None, None]: @pytest.fixture(scope="function") async def azure_storage(azurite): storage = AzureStorage( + account_url=azurite.account_url, connection_string=azurite.connection_string, ) MAIN[Utility.STORAGE] = storage diff --git a/nucliadb_utils/src/nucliadb_utils/utilities.py b/nucliadb_utils/src/nucliadb_utils/utilities.py index 850dec1e92..cd76ea3f8c 100644 --- a/nucliadb_utils/src/nucliadb_utils/utilities.py +++ b/nucliadb_utils/src/nucliadb_utils/utilities.py @@ -101,10 +101,11 @@ async def get_storage( if storage_settings.file_backend == FileBackendConfig.AZURE: from nucliadb_utils.storages.azure import AzureStorage - if storage_settings.azure_connection_string is None: - raise ConfigurationError("AZURE_CONNECTION_STRING env var not configured") + if storage_settings.azure_account_url is None: + raise ConfigurationError("AZURE_ACCOUNT_URL env variable not configured") azureutil = AzureStorage( + account_url=storage_settings.azure_account_url, connection_string=storage_settings.azure_connection_string, ) diff --git a/pdm.lock b/pdm.lock index 2d703b0ddd..d359032a02 100644 --- a/pdm.lock +++ b/pdm.lock @@ -5,7 +5,7 @@ groups = ["default", "dev"] strategy = ["cross_platform", "inherit_metadata"] lock_version = "4.4.1" -content_hash = "sha256:68bc0acdef65833172520fc60cdc0ce00ea6e87b9ec9a93f116b4486c2648784" +content_hash = "sha256:5a28ae05adc8b61de87787956726c77f4339e7e6ac80b6b6eb0aae11beda75f1" [[package]] name = "aiobotocore" @@ -309,7 +309,7 @@ name = "azure-core" version = "1.30.2" requires_python = ">=3.8" summary = "Microsoft Azure Core Library for Python" -groups = ["dev"] +groups = ["default"] dependencies = [ "requests>=2.21.0", "six>=1.11.0", @@ -321,20 +321,20 @@ files = [ ] [[package]] -name = "azure-storage-blob" -version = "12.20.0" +name = "azure-identity" +version = "1.16.1" requires_python = ">=3.8" -summary = "Microsoft Azure Blob Storage Client Library for Python" -groups = ["dev"] +summary = "Microsoft Azure Identity Library for Python" +groups = ["default"] dependencies = [ - "azure-core>=1.28.0", - "cryptography>=2.1.4", - "isodate>=0.6.1", - "typing-extensions>=4.6.0", + "azure-core>=1.23.0", + "cryptography>=2.5", + "msal-extensions>=0.3.0", + "msal>=1.24.0", ] files = [ - {file = "azure-storage-blob-12.20.0.tar.gz", hash = "sha256:eeb91256e41d4b5b9bad6a87fd0a8ade07dd58aa52344e2c8d2746e27a017d3b"}, - {file = "azure_storage_blob-12.20.0-py3-none-any.whl", hash = "sha256:de6b3bf3a90e9341a6bcb96a2ebe981dffff993e9045818f6549afea827a52a9"}, + {file = "azure-identity-1.16.1.tar.gz", hash = "sha256:6d93f04468f240d59246d8afde3091494a5040d4f141cad0f49fc0c399d0d91e"}, + {file = "azure_identity-1.16.1-py3-none-any.whl", hash = "sha256:8fb07c25642cd4ac422559a8b50d3e77f73dcc2bbfaba419d06d6c9d7cff6726"}, ] [[package]] @@ -381,7 +381,7 @@ name = "certifi" version = "2024.6.2" requires_python = ">=3.6" summary = "Python package for providing Mozilla's CA Bundle." -groups = ["dev"] +groups = ["default", "dev"] files = [ {file = "certifi-2024.6.2-py3-none-any.whl", hash = "sha256:ddc6c8ce995e6987e7faf5e3f1b02b302836a0e5d98ece18392cb1a36c72ad56"}, {file = "certifi-2024.6.2.tar.gz", hash = "sha256:3cd43f1c6fa7dedc5899d69d3ad0398fd018ad1a17fba83ddaf78aa46c747516"}, @@ -392,7 +392,7 @@ name = "cffi" version = "1.16.0" requires_python = ">=3.8" summary = "Foreign Function Interface for Python calling C code." -groups = ["dev"] +groups = ["default", "dev"] dependencies = [ "pycparser", ] @@ -459,7 +459,7 @@ name = "charset-normalizer" version = "3.3.2" requires_python = ">=3.7.0" summary = "The Real First Universal Charset Detector. Open, modern and actively maintained alternative to Chardet." -groups = ["dev"] +groups = ["default", "dev"] files = [ {file = "charset-normalizer-3.3.2.tar.gz", hash = "sha256:f30c3cb33b24454a82faecaf01b19c18562b1e89558fb6c56de4d9118a032fd5"}, {file = "charset_normalizer-3.3.2-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:25baf083bf6f6b341f4121c2f3c548875ee6f5339300e08be3f2b2ba1721cdd3"}, @@ -663,7 +663,7 @@ name = "cryptography" version = "42.0.8" requires_python = ">=3.7" summary = "cryptography is a package which provides cryptographic recipes and primitives to Python developers." -groups = ["dev"] +groups = ["default", "dev"] dependencies = [ "cffi>=1.12; platform_python_implementation != \"PyPy\"", ] @@ -1463,7 +1463,7 @@ name = "idna" version = "3.7" requires_python = ">=3.5" summary = "Internationalized Domain Names in Applications (IDNA)" -groups = ["dev"] +groups = ["default", "dev"] files = [ {file = "idna-3.7-py3-none-any.whl", hash = "sha256:82fee1fc78add43492d3a1898bfa6d8a904cc97d8427f683ed8e798d07761aa0"}, {file = "idna-3.7.tar.gz", hash = "sha256:028ff3aadf0609c1fd278d8ea3089299412a7a8b9bd005dd08b9f8285bcb5cfc"}, @@ -1494,19 +1494,6 @@ files = [ {file = "iniconfig-2.0.0.tar.gz", hash = "sha256:2d91e135bf72d31a410b17c16da610a82cb55f6b0477d1a902134b24a455b8b3"}, ] -[[package]] -name = "isodate" -version = "0.6.1" -summary = "An ISO 8601 date/time/duration parser and formatter" -groups = ["dev"] -dependencies = [ - "six", -] -files = [ - {file = "isodate-0.6.1-py2.py3-none-any.whl", hash = "sha256:0751eece944162659049d35f4f549ed815792b38793f07cf73381c1c87cbed96"}, - {file = "isodate-0.6.1.tar.gz", hash = "sha256:48c5881de7e8b0a0d648cb024c8062dc84e7b840ed81e864c7614fd3c127bde9"}, -] - [[package]] name = "jinja2" version = "3.1.4" @@ -1933,6 +1920,39 @@ files = [ {file = "mrflagly-0.2.8-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dbb1ed71c11a43a34b883c296b934ad9d7ec42526b4f358fac30f0300d1cbeea"}, ] +[[package]] +name = "msal" +version = "1.28.1" +requires_python = ">=3.7" +summary = "The Microsoft Authentication Library (MSAL) for Python library enables your app to access the Microsoft Cloud by supporting authentication of users with Microsoft Azure Active Directory accounts (AAD) and Microsoft Accounts (MSA) using industry standard OAuth2 and OpenID Connect." +groups = ["default"] +dependencies = [ + "PyJWT[crypto]<3,>=1.0.0", + "cryptography<45,>=2.5", + "requests<3,>=2.0.0", +] +files = [ + {file = "msal-1.28.1-py3-none-any.whl", hash = "sha256:563c2d70de77a2ca9786aab84cb4e133a38a6897e6676774edc23d610bfc9e7b"}, + {file = "msal-1.28.1.tar.gz", hash = "sha256:d72bbfe2d5c2f2555f4bc6205be4450ddfd12976610dd9a16a9ab0f05c68b64d"}, +] + +[[package]] +name = "msal-extensions" +version = "1.1.0" +requires_python = ">=3.7" +summary = "Microsoft Authentication Library extensions (MSAL EX) provides a persistence API that can save your data on disk, encrypted on Windows, macOS and Linux. Concurrent data access will be coordinated by a file lock mechanism." +groups = ["default"] +dependencies = [ + "msal<2.0.0,>=0.4.1", + "packaging", + "portalocker<3,>=1.0; platform_system != \"Windows\"", + "portalocker<3,>=1.6; platform_system == \"Windows\"", +] +files = [ + {file = "msal-extensions-1.1.0.tar.gz", hash = "sha256:6ab357867062db7b253d0bd2df6d411c7891a0ee7308d54d1e4317c1d1c54252"}, + {file = "msal_extensions-1.1.0-py3-none-any.whl", hash = "sha256:01be9711b4c0b1a151450068eeb2c4f0997df3bba085ac299de3a66f585e382f"}, +] + [[package]] name = "multidict" version = "6.0.5" @@ -2328,7 +2348,6 @@ groups = ["dev"] dependencies = [ "aiobotocore>=2.9.0", "aiofiles>=0.8.0", - "azure-storage-blob>=12.20.0", "backoff>=1.11.1", "fastapi>=0.95.2", "google-api-python-client>=2.37.0", @@ -2611,7 +2630,7 @@ name = "packaging" version = "24.0" requires_python = ">=3.7" summary = "Core utilities for Python packages" -groups = ["dev"] +groups = ["default", "dev"] files = [ {file = "packaging-24.0-py3-none-any.whl", hash = "sha256:2ddfb553fdf02fb784c234c7ba6ccc288296ceabec964ad2eae3777778130bc5"}, {file = "packaging-24.0.tar.gz", hash = "sha256:eb82c5e3e56209074766e6885bb04b8c38a0c015d0a30036ebe7ece34c9989e9"}, @@ -2654,6 +2673,20 @@ files = [ {file = "pluggy-1.5.0.tar.gz", hash = "sha256:2cffa88e94fdc978c4c574f15f9e59b7f4201d439195c3715ca9e2486f1d0cf1"}, ] +[[package]] +name = "portalocker" +version = "2.8.2" +requires_python = ">=3.8" +summary = "Wraps the portalocker recipe for easy usage" +groups = ["default"] +dependencies = [ + "pywin32>=226; platform_system == \"Windows\"", +] +files = [ + {file = "portalocker-2.8.2-py3-none-any.whl", hash = "sha256:cfb86acc09b9aa7c3b43594e19be1345b9d16af3feb08bf92f23d4dce513a28e"}, + {file = "portalocker-2.8.2.tar.gz", hash = "sha256:2b035aa7828e46c58e9b31390ee1f169b98e1066ab10b9a6a861fe7e25ee4f33"}, +] + [[package]] name = "pre-commit" version = "2.20.0" @@ -2883,7 +2916,7 @@ name = "pycparser" version = "2.22" requires_python = ">=3.8" summary = "C parser in Python" -groups = ["dev"] +groups = ["default", "dev"] files = [ {file = "pycparser-2.22-py3-none-any.whl", hash = "sha256:c3702b6d3dd8c7abc1afa565d7e63d53a1d0bd86cdc24edd75470f4de499cfcc"}, {file = "pycparser-2.22.tar.gz", hash = "sha256:491c8be9c040f5390f5bf44a5b07752bd07f56edf992381b05c701439eec10f6"}, @@ -3015,7 +3048,23 @@ name = "pyjwt" version = "2.8.0" requires_python = ">=3.7" summary = "JSON Web Token implementation in Python" -groups = ["dev"] +groups = ["default", "dev"] +files = [ + {file = "PyJWT-2.8.0-py3-none-any.whl", hash = "sha256:59127c392cc44c2da5bb3192169a91f429924e17aff6534d70fdc02ab3e04320"}, + {file = "PyJWT-2.8.0.tar.gz", hash = "sha256:57e28d156e3d5c10088e0c68abb90bfac3df82b40a71bd0daa20c65ccd5c23de"}, +] + +[[package]] +name = "pyjwt" +version = "2.8.0" +extras = ["crypto"] +requires_python = ">=3.7" +summary = "JSON Web Token implementation in Python" +groups = ["default"] +dependencies = [ + "PyJWT==2.8.0", + "cryptography>=3.4.0", +] files = [ {file = "PyJWT-2.8.0-py3-none-any.whl", hash = "sha256:59127c392cc44c2da5bb3192169a91f429924e17aff6534d70fdc02ab3e04320"}, {file = "PyJWT-2.8.0.tar.gz", hash = "sha256:57e28d156e3d5c10088e0c68abb90bfac3df82b40a71bd0daa20c65ccd5c23de"}, @@ -3264,8 +3313,8 @@ files = [ name = "pywin32" version = "306" summary = "Python for Window Extensions" -groups = ["dev"] -marker = "sys_platform == \"win32\"" +groups = ["default", "dev"] +marker = "sys_platform == \"win32\" or platform_system == \"Windows\"" files = [ {file = "pywin32-306-cp310-cp310-win32.whl", hash = "sha256:06d3420a5155ba65f0b72f2699b5bacf3109f36acbe8923765c22938a69dfc8d"}, {file = "pywin32-306-cp310-cp310-win_amd64.whl", hash = "sha256:84f4471dbca1887ea3803d8848a1616429ac94a4a8d05f4bc9c5dcfd42ca99c8"}, @@ -3354,7 +3403,7 @@ name = "requests" version = "2.32.3" requires_python = ">=3.8" summary = "Python HTTP for Humans." -groups = ["dev"] +groups = ["default", "dev"] dependencies = [ "certifi>=2017.4.17", "charset-normalizer<4,>=2", @@ -3574,7 +3623,7 @@ name = "six" version = "1.16.0" requires_python = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*" summary = "Python 2 and 3 compatibility utilities" -groups = ["dev"] +groups = ["default", "dev"] files = [ {file = "six-1.16.0-py2.py3-none-any.whl", hash = "sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254"}, {file = "six-1.16.0.tar.gz", hash = "sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926"}, @@ -3818,7 +3867,7 @@ name = "typing-extensions" version = "4.12.1" requires_python = ">=3.8" summary = "Backported and Experimental Type Hints for Python 3.8+" -groups = ["dev"] +groups = ["default", "dev"] files = [ {file = "typing_extensions-4.12.1-py3-none-any.whl", hash = "sha256:6024b58b69089e5a89c347397254e35f1bf02a907728ec7fee9bf0fe837d203a"}, {file = "typing_extensions-4.12.1.tar.gz", hash = "sha256:915f5e35ff76f56588223f15fdd5938f9a1cf9195c0de25130c627e4d597f6d1"}, @@ -3917,7 +3966,7 @@ name = "urllib3" version = "1.26.18" requires_python = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*" summary = "HTTP library with thread-safe connection pooling, file post, and more." -groups = ["dev"] +groups = ["default", "dev"] files = [ {file = "urllib3-1.26.18-py2.py3-none-any.whl", hash = "sha256:34b97092d7e0a3a8cf7cd10e386f401b3737364026c45e622aa02903dffe0f07"}, {file = "urllib3-1.26.18.tar.gz", hash = "sha256:f8ecc1bba5667413457c529ab955bf8c67b45db799d159066261719e328580a0"}, diff --git a/pyproject.toml b/pyproject.toml index e31b909859..64d4fdb733 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -3,6 +3,7 @@ name = "nucliadb_workspace" requires-python = ">=3.9" version = "0.0.0" dependencies = [ + "azure-identity>=1.16.1", ] [tool.pdm.dev-dependencies]