From 18a7d2db18bab17f89c10f95e4f72db658eb1904 Mon Sep 17 00:00:00 2001 From: Hari Selvarajan <105197202+HariGS-DB@users.noreply.github.com> Date: Tue, 5 Mar 2024 19:15:52 +0000 Subject: [PATCH] Added `databricks labs ucx create-uber-principal` command to create Azure Service Principal for migration (#976) ## Changes - Added new cli cmd for create-master-principal in labs.yml, cli.py - Added separate class for AzureApiClient to separate out azure API calls - Added logic to create SPN, secret, roleassignment in resources and update workspace config with spn client_id - added logic to call create spn, update rbac of all storage account to that spn, update ucx cluster policy with spn secret for each storage account - test unit and int test cases Resolves #881 Related issues: - https://github.com/databrickslabs/ucx/pull/993 - https://github.com/databrickslabs/ucx/issues/693 ### Functionality - [ ] added relevant user documentation - [X] added new CLI command - [ ] modified existing command: `databricks labs ucx ...` - [ ] added a new workflow - [ ] modified existing workflow: `...` - [ ] added a new table - [ ] modified existing table: `...` ### Tests - [X] manually tested - [X] added unit tests - [X] added integration tests - [ ] verified on staging environment (screenshot attached) --- labs.yml | 7 + pyproject.toml | 2 +- src/databricks/labs/ucx/azure/access.py | 133 +++++++++++- src/databricks/labs/ucx/azure/credentials.py | 9 +- src/databricks/labs/ucx/azure/resources.py | 167 ++++++++++++--- src/databricks/labs/ucx/cli.py | 20 ++ src/databricks/labs/ucx/config.py | 1 + src/databricks/labs/ucx/install.py | 9 + tests/integration/azure/test_access.py | 75 ++++++- tests/integration/azure/test_credentials.py | 9 +- tests/unit/azure/__init__.py | 20 ++ tests/unit/azure/azure/mappings.json | 19 ++ tests/unit/azure/conftest.py | 15 -- tests/unit/azure/test_access.py | 211 +++++++++++++++---- tests/unit/azure/test_resources.py | 168 ++++++++++++--- tests/unit/test_cli.py | 29 +++ tests/unit/test_install.py | 35 +++ 17 files changed, 796 insertions(+), 133 deletions(-) delete mode 100644 tests/unit/azure/conftest.py diff --git a/labs.yml b/labs.yml index bc0dd5380c..bb92770f41 100644 --- a/labs.yml +++ b/labs.yml @@ -108,6 +108,13 @@ commands: - name: aws-profile description: AWS Profile to use for authentication + - name: create-uber-principal + description: For azure cloud, creates a service principal and gives STORAGE BLOB READER access on all the storage account + used by tables in the workspace and stores the spn info in the UCX cluster policy. + flags: + - name: subscription-id + description: Subscription to scan storage account in + - name: validate-groups-membership description: Validate groups to check if the groups at account level and workspace level have different memberships table_template: |- diff --git a/pyproject.toml b/pyproject.toml index 5149bfe0f9..0ee3b31742 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -27,7 +27,7 @@ classifiers = [ "Programming Language :: Python :: Implementation :: CPython", ] dependencies = ["databricks-sdk~=0.20.0", - "databricks-labs-blueprint~=0.3.0", + "databricks-labs-blueprint~=0.3.1", "PyYAML>=6.0.0,<7.0.0"] [project.entry-points.databricks] diff --git a/src/databricks/labs/ucx/azure/access.py b/src/databricks/labs/ucx/azure/access.py index 130cab403c..8acf45f914 100644 --- a/src/databricks/labs/ucx/azure/access.py +++ b/src/databricks/labs/ucx/azure/access.py @@ -1,11 +1,20 @@ +import json +import uuid from dataclasses import dataclass from databricks.labs.blueprint.installation import Installation +from databricks.labs.blueprint.tui import Prompts from databricks.sdk import WorkspaceClient +from databricks.sdk.errors import NotFound, ResourceAlreadyExists from databricks.sdk.service.catalog import Privilege from databricks.labs.ucx.assessment.crawlers import logger -from databricks.labs.ucx.azure.resources import AzureResource, AzureResources +from databricks.labs.ucx.azure.resources import ( + AzureAPIClient, + AzureResource, + AzureResources, + PrincipalSecret, +) from databricks.labs.ucx.config import WorkspaceConfig from databricks.labs.ucx.framework.crawlers import StatementExecutionBackend from databricks.labs.ucx.hive_metastore.locations import ExternalLocations @@ -46,7 +55,12 @@ def for_cli(cls, ws: WorkspaceClient, product='ucx', include_subscriptions=None) installation = Installation.current(ws, product) config = installation.load(WorkspaceConfig) sql_backend = StatementExecutionBackend(ws, config.warehouse_id) - azurerm = AzureResources(ws, include_subscriptions=include_subscriptions) + azure_mgmt_client = AzureAPIClient( + ws.config.arm_environment.resource_manager_endpoint, + ws.config.arm_environment.service_management_endpoint, + ) + graph_client = AzureAPIClient("https://graph.microsoft.com", "https://graph.microsoft.com") + azurerm = AzureResources(azure_mgmt_client, graph_client, include_subscriptions) locations = ExternalLocations(ws, sql_backend, config.inventory_database) return cls(installation, ws, azurerm, locations) @@ -91,6 +105,121 @@ def save_spn_permissions(self) -> str | None: return None return self._installation.save(storage_account_infos, filename=self._filename) + def _update_cluster_policy_definition( + self, + policy_definition: str, + storage_accounts: list[AzureResource], + uber_principal: PrincipalSecret, + inventory_database: str, + ) -> str: + policy_dict = json.loads(policy_definition) + tenant_id = self._azurerm.tenant_id() + endpoint = f"https://login.microsoftonline.com/{tenant_id}/oauth2/token" + for storage in storage_accounts: + policy_dict[ + f"spark_conf.fs.azure.account.oauth2.client.id.{storage.storage_account}.dfs.core.windows.net" + ] = self._policy_config(uber_principal.client.client_id) + policy_dict[ + f"spark_conf.fs.azure.account.oauth.provider.type.{storage.storage_account}.dfs.core.windows.net" + ] = self._policy_config("org.apache.hadoop.fs.azurebfs.oauth2.ClientCredsTokenProvider") + policy_dict[ + f"spark_conf.fs.azure.account.oauth2.client.endpoint.{storage.storage_account}.dfs.core.windows.net" + ] = self._policy_config(endpoint) + policy_dict[f"spark_conf.fs.azure.account.auth.type.{storage.storage_account}.dfs.core.windows.net"] = ( + self._policy_config("OAuth") + ) + policy_dict[ + f"spark_conf.fs.azure.account.oauth2.client.secret.{storage.storage_account}.dfs.core.windows.net" + ] = self._policy_config(f"{{secrets/{inventory_database}/uber_principal_secret}}") + return json.dumps(policy_dict) + + @staticmethod + def _policy_config(value: str): + return {"type": "fixed", "value": value} + + def _update_cluster_policy_with_spn( + self, + policy_id: str, + storage_accounts: list[AzureResource], + uber_principal: PrincipalSecret, + inventory_database: str, + ): + try: + policy_definition = "" + cluster_policy = self._ws.cluster_policies.get(policy_id) + + self._installation.save(cluster_policy, filename="policy-backup.json") + + if cluster_policy.definition is not None: + policy_definition = self._update_cluster_policy_definition( + cluster_policy.definition, storage_accounts, uber_principal, inventory_database + ) + if cluster_policy.name is not None: + self._ws.cluster_policies.edit(policy_id, cluster_policy.name, definition=policy_definition) + except NotFound: + msg = f"cluster policy {policy_id} not found, please run UCX installation to create UCX cluster policy" + raise NotFound(msg) from None + + def create_uber_principal(self, prompts: Prompts): + config = self._installation.load(WorkspaceConfig) + inventory_database = config.inventory_database + display_name = f"unity-catalog-migration-{inventory_database}-{self._ws.get_workspace_id()}" + uber_principal_name = prompts.question( + "Enter a name for the uber service principal to be created", default=display_name + ) + policy_id = config.policy_id + if policy_id is None: + msg = "UCX cluster policy not found in config. Please run latest UCX installation to set cluster policy" + logger.error(msg) + raise ValueError(msg) from None + if config.uber_spn_id is not None: + logger.warning("Uber service principal already created for this workspace.") + return + used_storage_accounts = self._get_storage_accounts() + if len(used_storage_accounts) == 0: + logger.warning( + "There are no external table present with azure storage account. " + "Please check if assessment job is run" + ) + return + storage_account_info = [] + for storage in self._azurerm.storage_accounts(): + if storage.storage_account in used_storage_accounts: + storage_account_info.append(storage) + logger.info("Creating service principal") + uber_principal = self._azurerm.create_service_principal(uber_principal_name) + self._create_scope(uber_principal, inventory_database) + config.uber_spn_id = uber_principal.client.client_id + logger.info( + f"Created service principal of client_id {config.uber_spn_id}. " f"Applying permission on storage accounts" + ) + try: + self._apply_storage_permission(storage_account_info, uber_principal) + self._installation.save(config) + self._update_cluster_policy_with_spn(policy_id, storage_account_info, uber_principal, inventory_database) + except PermissionError: + self._azurerm.delete_service_principal(uber_principal.client.object_id) + logger.info(f"Update UCX cluster policy {policy_id} with spn connection details for storage accounts") + + def _apply_storage_permission(self, storage_account_info: list[AzureResource], uber_principal: PrincipalSecret): + for storage in storage_account_info: + role_name = str(uuid.uuid4()) + self._azurerm.apply_storage_permission( + uber_principal.client.object_id, storage, "STORAGE_BLOB_DATA_READER", role_name + ) + logger.debug( + f"Storage Data Blob Reader permission applied for spn {uber_principal.client.client_id} " + f"to storage account {storage.storage_account}" + ) + + def _create_scope(self, uber_principal: PrincipalSecret, inventory_database: str): + logger.info(f"Creating secret scope {inventory_database}.") + try: + self._ws.secrets.create_scope(inventory_database) + except ResourceAlreadyExists: + logger.warning(f"Secret scope {inventory_database} already exists, using the same") + self._ws.secrets.put_secret(inventory_database, "uber_principal_secret", string_value=uber_principal.secret) + def load(self): return self._installation.load(list[StoragePermissionMapping], filename=self._filename) diff --git a/src/databricks/labs/ucx/azure/credentials.py b/src/databricks/labs/ucx/azure/credentials.py index 6ed416537e..e0e4e19895 100644 --- a/src/databricks/labs/ucx/azure/credentials.py +++ b/src/databricks/labs/ucx/azure/credentials.py @@ -18,7 +18,7 @@ AzureResourcePermissions, StoragePermissionMapping, ) -from databricks.labs.ucx.azure.resources import AzureResources +from databricks.labs.ucx.azure.resources import AzureAPIClient, AzureResources from databricks.labs.ucx.config import WorkspaceConfig from databricks.labs.ucx.framework.crawlers import StatementExecutionBackend from databricks.labs.ucx.hive_metastore.locations import ExternalLocations @@ -171,7 +171,12 @@ def for_cli(cls, ws: WorkspaceClient, installation: Installation, prompts: Promp config = installation.load(WorkspaceConfig) sql_backend = StatementExecutionBackend(ws, config.warehouse_id) - azurerm = AzureResources(ws) + azure_mgmt_client = AzureAPIClient( + ws.config.arm_environment.resource_manager_endpoint, + ws.config.arm_environment.service_management_endpoint, + ) + graph_client = AzureAPIClient("https://graph.microsoft.com", "https://graph.microsoft.com") + azurerm = AzureResources(azure_mgmt_client, graph_client) locations = ExternalLocations(ws, sql_backend, config.inventory_database) resource_permissions = AzureResourcePermissions(installation, ws, azurerm, locations) diff --git a/src/databricks/labs/ucx/azure/resources.py b/src/databricks/labs/ucx/azure/resources.py index 3d19253239..7ebd34d122 100644 --- a/src/databricks/labs/ucx/azure/resources.py +++ b/src/databricks/labs/ucx/azure/resources.py @@ -1,17 +1,19 @@ from collections.abc import Iterable from dataclasses import dataclass -from databricks.sdk import WorkspaceClient from databricks.sdk.core import ( ApiClient, AzureCliTokenSource, Config, credentials_provider, ) -from databricks.sdk.errors import NotFound +from databricks.sdk.errors import NotFound, PermissionDenied, ResourceConflict from databricks.labs.ucx.assessment.crawlers import logger +# https://learn.microsoft.com/en-us/azure/role-based-access-control/built-in-roles +_ROLES = {"STORAGE_BLOB_DATA_READER": "2a2b9908-6ea1-4ae2-8e65-a410df84e7d1"} + @dataclass class AzureSubscription: @@ -78,6 +80,12 @@ class Principal: directory_id: str | None = None +@dataclass +class PrincipalSecret: + client: Principal + secret: str + + @dataclass class AzureRoleAssignment: resource: AzureResource @@ -86,27 +94,15 @@ class AzureRoleAssignment: role_name: str -class AzureResources: - def __init__(self, ws: WorkspaceClient, *, include_subscriptions=None): - if not include_subscriptions: - include_subscriptions = [] - rm_host = ws.config.arm_environment.resource_manager_endpoint - self._resource_manager = ApiClient( - Config( - host=rm_host, - credentials_provider=self._provider_for(ws.config.arm_environment.service_management_endpoint), - ) - ) - self._graph = ApiClient( +class AzureAPIClient: + def __init__(self, host_endpoint: str, service_endpoint: str): + self.api_client = ApiClient( Config( - host="https://graph.microsoft.com", - credentials_provider=self._provider_for("https://graph.microsoft.com"), + host=host_endpoint, + credentials_provider=self._provider_for(service_endpoint), ) ) - self._token_source = AzureCliTokenSource(rm_host) - self._include_subscriptions = include_subscriptions - self._role_definitions = {} # type: dict[str, str] - self._principals: dict[str, Principal | None] = {} + self._token_source = AzureCliTokenSource(host_endpoint) def _provider_for(self, endpoint: str): @credentials_provider("azure-cli", ["host"]) @@ -121,20 +117,127 @@ def inner() -> dict[str, str]: return _credentials + def get(self, path: str, api_version: str | None = None): + headers = {"Accept": "application/json"} + query = {} + if api_version is not None: + query = {"api-version": api_version} + return self.api_client.do("GET", path, query, headers) + + def put(self, path: str, api_version: str | None = None, body: dict | None = None): + headers = {"Content-Type": "application/json"} + query: dict[str, str] = {} + if api_version is not None: + query = {"api-version": api_version} + if body is not None: + return self.api_client.do("PUT", path, query, headers, body) + return None + + def post(self, path: str, body: dict | None = None): + headers = {"Content-Type": "application/json"} + query: dict[str, str] = {} + if body is not None: + return self.api_client.do("POST", path, query, headers, body) + return self.api_client.do("POST", path, query, headers) + + def delete(self, path: str): + # this method is added only to be used in int test to delete the application once tests pass + headers = {"Content-Type": "application/json"} + query: dict[str, str] = {} + return self.api_client.do("DELETE", path, query, headers) + + def token(self): + return self._token_source.token() + + +class AzureResources: + def __init__(self, azure_mgmt: AzureAPIClient, azure_graph: AzureAPIClient, include_subscriptions=None): + if not include_subscriptions: + include_subscriptions = [] + self._mgmt = azure_mgmt + self._graph = azure_graph + self._include_subscriptions = include_subscriptions + self._role_definitions = {} # type: dict[str, str] + self._principals: dict[str, Principal | None] = {} + def _get_subscriptions(self) -> Iterable[AzureSubscription]: - for subscription in self._get_resource("/subscriptions", api_version="2022-12-01").get("value", []): + for subscription in self._mgmt.get("/subscriptions", "2022-12-01").get("value", []): yield AzureSubscription( name=subscription["displayName"], subscription_id=subscription["subscriptionId"], tenant_id=subscription["tenantId"], ) - def _tenant_id(self): - token = self._token_source.token() + def create_service_principal(self, display_name: str) -> PrincipalSecret: + try: + application_info: dict[str, str] = self._graph.post("/v1.0/applications", {"displayName": display_name}) + app_id = application_info.get("appId") + assert app_id is not None + service_principal_info: dict[str, str] = self._graph.post("/v1.0/servicePrincipals", {"appId": app_id}) + object_id = service_principal_info.get("id") + assert object_id is not None + secret_info: dict[str, str] = self._graph.post(f"/v1.0/servicePrincipals/{object_id}/addPassword") + + except PermissionDenied: + msg = ( + "Permission denied. Please run this cmd under the identity of a user who has" + " create service principal permission." + ) + logger.error(msg) + raise PermissionDenied(msg) from None + secret = secret_info.get("secretText") + client_id = service_principal_info.get("appId") + principal_type = service_principal_info.get("servicePrincipalType") + directory_id = service_principal_info.get("appOwnerOrganizationId") + assert client_id is not None + assert object_id is not None + assert principal_type is not None + assert directory_id is not None + assert secret is not None + return PrincipalSecret(Principal(client_id, display_name, object_id, principal_type, directory_id), secret) + + def delete_service_principal(self, principal_id: str): + try: + self._graph.delete(f"/v1.0/applications(appId='{principal_id}')") + except PermissionDenied: + msg = f"User doesnt have permission to delete application {principal_id}" + logger.error(msg) + raise PermissionDenied(msg) from None + + def apply_storage_permission(self, principal_id: str, resource: AzureResource, role_name: str, role_guid: str): + try: + role_id = _ROLES[role_name] + path = f"{str(resource)}/providers/Microsoft.Authorization/roleAssignments/{role_guid}" + role_definition_id = ( + f"/subscriptions/{resource.subscription_id}/providers/Microsoft.Authorization/roleDefinitions/{role_id}" + ) + body = { + "properties": { + "roleDefinitionId": role_definition_id, + "principalId": principal_id, + "principalType": "ServicePrincipal", + } + } + self._mgmt.put(path, "2022-04-01", body) + except ResourceConflict: + logger.warning( + f"Role assignment already exists for role {role_guid} on storage {resource.storage_account}" + f" for spn {principal_id}." + ) + except PermissionDenied: + msg = ( + "Permission denied. Please run this cmd under the identity of a user who has " + "create service principal permission." + ) + logger.error(msg) + raise PermissionDenied(msg) from None + + def tenant_id(self): + token = self._mgmt.token() return token.jwt_claims().get("tid") def subscriptions(self): - tenant_id = self._tenant_id() + tenant_id = self.tenant_id() for subscription in self._get_subscriptions(): if subscription.tenant_id != tenant_id: continue @@ -142,23 +245,18 @@ def subscriptions(self): continue yield subscription - def _get_resource(self, path: str, api_version: str): - headers = {"Accept": "application/json"} - query = {"api-version": api_version} - return self._resource_manager.do("GET", path, query=query, headers=headers) - def storage_accounts(self) -> Iterable[AzureResource]: for subscription in self.subscriptions(): logger.info(f"Checking in subscription {subscription.name} for storage accounts") path = f"/subscriptions/{subscription.subscription_id}/providers/Microsoft.Storage/storageAccounts" - for storage in self._get_resource(path, "2023-01-01").get("value", []): + for storage in self._mgmt.get(path, "2023-01-01").get("value", []): resource_id = storage.get("id") if not resource_id: continue yield AzureResource(resource_id) def containers(self, storage: AzureResource): - for raw in self._get_resource(f"{storage}/blobServices/default/containers", "2023-01-01").get("value", []): + for raw in self._mgmt.get(f"{storage}/blobServices/default/containers", "2023-01-01").get("value", []): resource_id = raw.get("id") if not resource_id: continue @@ -169,11 +267,12 @@ def _get_principal(self, principal_id: str) -> Principal | None: return self._principals[principal_id] try: path = f"/v1.0/directoryObjects/{principal_id}" - raw: dict[str, str] = self._graph.do("GET", path) # type: ignore[assignment] + raw: dict[str, str] = self._graph.get(path) # type: ignore[assignment] except NotFound: # don't load principals from external directories twice self._principals[principal_id] = None return self._principals[principal_id] + client_id = raw.get("appId") display_name = raw.get("displayName") object_id = raw.get("id") @@ -196,7 +295,7 @@ def role_assignments( """See https://learn.microsoft.com/en-us/rest/api/authorization/role-assignments/list-for-resource""" if not principal_types: principal_types = ["ServicePrincipal"] - result = self._get_resource(f"{resource_id}/providers/Microsoft.Authorization/roleAssignments", "2022-04-01") + result = self._mgmt.get(f"{resource_id}/providers/Microsoft.Authorization/roleAssignments", "2022-04-01") for role_assignment in result.get("value", []): assignment = self._role_assignment(role_assignment, resource_id, principal_types) if not assignment: @@ -235,7 +334,7 @@ def _role_assignment( def _role_name(self, role_definition_id) -> str | None: if role_definition_id not in self._role_definitions: - role_definition = self._get_resource(role_definition_id, "2022-04-01") + role_definition = self._mgmt.get(role_definition_id, "2022-04-01") definition_properties = role_definition.get("properties", {}) role_name: str = definition_properties.get("roleName") if not role_name: diff --git a/src/databricks/labs/ucx/cli.py b/src/databricks/labs/ucx/cli.py index da07aab4f4..18a6c0d9f0 100644 --- a/src/databricks/labs/ucx/cli.py +++ b/src/databricks/labs/ucx/cli.py @@ -324,5 +324,25 @@ def migrate_credentials(w: WorkspaceClient): logger.error("migrate_credentials is not yet supported in GCP") +@ucx.command +def create_uber_principal(w: WorkspaceClient, subscription_id: str): + """For azure cloud, creates a service principal and gives STORAGE BLOB READER access on all the storage account + used by tables in the workspace and stores the spn info in the UCX cluster policy.""" + if not w.config.is_azure: + logger.error("This command is only supported on azure workspaces.") + return + if w.config.auth_type != "azure-cli": + logger.error("In order to obtain AAD token, Please run azure cli to authenticate.") + return + if not subscription_id: + logger.error("Please enter subscription id to scan storage account in.") + return + prompts = Prompts() + include_subscriptions = [subscription_id] if subscription_id else None + azure_resource_permissions = AzureResourcePermissions.for_cli(w, include_subscriptions=include_subscriptions) + azure_resource_permissions.create_uber_principal(prompts) + return + + if __name__ == "__main__": ucx() diff --git a/src/databricks/labs/ucx/config.py b/src/databricks/labs/ucx/config.py index 3366ed8a9c..54796f5874 100644 --- a/src/databricks/labs/ucx/config.py +++ b/src/databricks/labs/ucx/config.py @@ -36,6 +36,7 @@ class WorkspaceConfig: # pylint: disable=too-many-instance-attributes override_clusters: dict[str, str] | None = None policy_id: str | None = None num_days_submit_runs_history: int = 30 + uber_spn_id: str | None = None # Flag to see if terraform has been used for deploying certain entities is_terraform_used: bool = False diff --git a/src/databricks/labs/ucx/install.py b/src/databricks/labs/ucx/install.py index 2c7d987bca..2b3b482876 100644 --- a/src/databricks/labs/ucx/install.py +++ b/src/databricks/labs/ucx/install.py @@ -942,6 +942,7 @@ def uninstall(self): self._remove_jobs() self._remove_warehouse() self._remove_policies() + self._remove_secret_scope() self._installation.remove() logger.info("UnInstalling UCX complete") @@ -961,6 +962,14 @@ def _remove_policies(self): except NotFound: logger.error("UCX Policy already deleted") + def _remove_secret_scope(self): + logger.info("Deleting secret scope") + try: + if self.config.uber_spn_id is not None: + self._ws.secrets.delete_scope(self.config.inventory_database) + except NotFound: + logger.error("Secret scope already deleted") + def _remove_jobs(self): logger.info("Deleting jobs") if not self._state.jobs: diff --git a/tests/integration/azure/test_access.py b/tests/integration/azure/test_access.py index 94c371addb..8cadfa2671 100644 --- a/tests/integration/azure/test_access.py +++ b/tests/integration/azure/test_access.py @@ -1,10 +1,13 @@ +import json import logging import pytest from databricks.labs.blueprint.installation import Installation +from databricks.labs.blueprint.tui import MockPrompts from databricks.labs.ucx.azure.access import AzureResourcePermissions -from databricks.labs.ucx.azure.resources import AzureResources +from databricks.labs.ucx.azure.resources import AzureAPIClient, AzureResources +from databricks.labs.ucx.config import WorkspaceConfig from databricks.labs.ucx.hive_metastore.locations import ( ExternalLocation, ExternalLocations, @@ -20,13 +23,17 @@ def test_azure_storage_accounts(ws, sql_backend, inventory_schema, make_random): ] sql_backend.save_table(f"{inventory_schema}.external_locations", tables, ExternalLocation) location = ExternalLocations(ws, sql_backend, inventory_schema) - installation = Installation(ws, make_random) - az_res_perm = AzureResourcePermissions(installation, ws, AzureResources(ws), location) + installation = Installation(ws, make_random(4)) + azure_mgmt_client = AzureAPIClient( + ws.config.arm_environment.resource_manager_endpoint, + ws.config.arm_environment.service_management_endpoint, + ) + graph_client = AzureAPIClient("https://graph.microsoft.com", "https://graph.microsoft.com") + azure_resources = AzureResources(azure_mgmt_client, graph_client) + az_res_perm = AzureResourcePermissions(installation, ws, azure_resources, location) az_res_perm.save_spn_permissions() - mapping = az_res_perm.load() - assert len(mapping) == 1 - assert mapping[0].prefix == "labsazurethings" + assert mapping[0].prefix == "abfss://things@labsazurethings.dfs.core.windows.net/" @pytest.mark.skip @@ -37,6 +44,60 @@ def test_save_spn_permissions_local(ws, sql_backend, inventory_schema, make_rand sql_backend.save_table(f"{inventory_schema}.external_locations", tables, ExternalLocation) location = ExternalLocations(ws, sql_backend, inventory_schema) installation = Installation(ws, make_random(4)) - az_res_perm = AzureResourcePermissions(installation, ws, AzureResources(ws, include_subscriptions=""), location) + azure_mgmt_client = AzureAPIClient( + ws.config.arm_environment.resource_manager_endpoint, + ws.config.arm_environment.service_management_endpoint, + ) + graph_client = AzureAPIClient("https://graph.microsoft.com", "https://graph.microsoft.com") + azure_resources = AzureResources(azure_mgmt_client, graph_client) + az_res_perm = AzureResourcePermissions(installation, ws, azure_resources, location) path = az_res_perm.save_spn_permissions() assert ws.workspace.get_status(path) + + +@pytest.mark.skip +def test_create_global_spn(ws, sql_backend, inventory_schema, make_random, make_cluster_policy, env_or_skip): + tables = [ + ExternalLocation("abfss://things@labsazurethings.dfs.core.windows.net/folder1", 1), + ] + sql_backend.save_table(f"{inventory_schema}.external_locations", tables, ExternalLocation) + installation = Installation(ws, make_random(4)) + policy = make_cluster_policy() + installation.save(WorkspaceConfig(inventory_database='ucx', policy_id=policy.policy_id)) + azure_mgmt_client = AzureAPIClient( + ws.config.arm_environment.resource_manager_endpoint, + ws.config.arm_environment.service_management_endpoint, + ) + graph_client = AzureAPIClient("https://graph.microsoft.com", "https://graph.microsoft.com") + azure_resources = AzureResources(azure_mgmt_client, graph_client) + az_res_perm = AzureResourcePermissions( + installation, ws, azure_resources, ExternalLocations(ws, sql_backend, inventory_schema) + ) + az_res_perm.create_uber_principal( + MockPrompts({"Enter a name for the uber service principal to be created*": "UCXServicePrincipal"}) + ) + config = installation.load(WorkspaceConfig) + assert config.uber_spn_id is not None + policy_definition = json.loads(ws.cluster_policies.get(policy_id=policy.policy_id).definition) + resource_id = env_or_skip("TEST_STORAGE_RESOURCE") + role_assignments = azure_resources.role_assignments(resource_id) + global_spn_assignment = None + for assignment in role_assignments: + if assignment.principal.client_id == config.uber_spn_id: + global_spn_assignment = assignment + break + assert global_spn_assignment + assert global_spn_assignment.principal.client_id == config.uber_spn_id + assert global_spn_assignment.role_name == "Storage Blob Data Reader" + assert str(global_spn_assignment.scope) == resource_id + assert ( + policy_definition["spark_conf.fs.azure.account.oauth2.client.id.labsazurethings.dfs.core.windows.net"]["value"] + == config.uber_spn_id + ) + assert ( + policy_definition["spark_conf.fs.azure.account.oauth2.client.endpoint.labsazurethings.dfs.core.windows.net"][ + "value" + ] + == "https://login.microsoftonline.com/9f37a392-f0ae-4280-9796-f1864a10effc/oauth2/token" + ) + graph_client.delete(f"/v1.0/applications(appId='{config.uber_spn_id}')") diff --git a/tests/integration/azure/test_credentials.py b/tests/integration/azure/test_credentials.py index 0df60389a1..4308bdeadb 100644 --- a/tests/integration/azure/test_credentials.py +++ b/tests/integration/azure/test_credentials.py @@ -16,7 +16,7 @@ StorageCredentialManager, StorageCredentialValidationResult, ) -from databricks.labs.ucx.azure.resources import AzureResources +from databricks.labs.ucx.azure.resources import AzureAPIClient, AzureResources from databricks.labs.ucx.hive_metastore import ExternalLocations from tests.integration.conftest import StaticServicePrincipalCrawler @@ -60,7 +60,12 @@ def run_migration(ws, sql_backend): def inner( test_info: MigrationTestInfo, credentials: set[str], read_only=False ) -> list[StorageCredentialValidationResult]: - azurerm = AzureResources(ws) + azure_mgmt_client = AzureAPIClient( + ws.config.arm_environment.resource_manager_endpoint, + ws.config.arm_environment.service_management_endpoint, + ) + graph_client = AzureAPIClient("https://graph.microsoft.com", "https://graph.microsoft.com") + azurerm = AzureResources(azure_mgmt_client, graph_client) locations = ExternalLocations(ws, sql_backend, "dont_need_a_schema") installation = MockInstallation( diff --git a/tests/unit/azure/__init__.py b/tests/unit/azure/__init__.py index ac74c5eb56..60b8a9189f 100644 --- a/tests/unit/azure/__init__.py +++ b/tests/unit/azure/__init__.py @@ -1,5 +1,11 @@ +import base64 import json import pathlib +from unittest.mock import create_autospec + +from databricks.sdk.oauth import Token + +from databricks.labs.ucx.azure.resources import AzureAPIClient __dir = pathlib.Path(__file__).parent @@ -11,6 +17,20 @@ def _load_fixture(filename: str): def get_az_api_mapping(*args, **_): mapping = _load_fixture("azure/mappings.json")[0] + if args[0] in mapping: + return mapping[args[0]] if args[1] in mapping: return mapping[args[1]] return {} + + +def azure_api_client(): + token = json.dumps({"aud": "foo", "tid": "bar"}).encode("utf-8") + str_token = base64.b64encode(token).decode("utf-8").replace("=", "") + tok = Token(access_token=f"header.{str_token}.sig") + api_client = create_autospec(AzureAPIClient) + api_client.token.return_value = tok + api_client.get.side_effect = get_az_api_mapping + api_client.put.side_effect = get_az_api_mapping + api_client.post.side_effect = get_az_api_mapping + return api_client diff --git a/tests/unit/azure/azure/mappings.json b/tests/unit/azure/azure/mappings.json index 55611c77ac..e595c90f72 100644 --- a/tests/unit/azure/azure/mappings.json +++ b/tests/unit/azure/azure/mappings.json @@ -178,6 +178,25 @@ "id": "rol2" } ] + }, + "/v1.0/servicePrincipals": { + "appId": "appIduser1", + "displayName": "disNameuser1", + "id": "Iduser1", + "appOwnerOrganizationId": "dir1", + "servicePrincipalType": "Application" + }, + "/v1.0/servicePrincipals/Iduser1/addPassword": { + "secretText": "mypwd" + }, + "/resourceid1/providers/Microsoft.Authorization/roleAssignments/2a2b9908-6ea1-4ae2-8e65-a410df84e7d1?api-version=2022-04-01": { + "foo": "bar" + }, + "subscriptions/002/resourceGroups/rg1/storageAccounts/sto2/providers/Microsoft.Authorization/roleAssignments/12345": { + "foo": "bar" + }, + "/v1.0/applications": { + "appId": "appIduser1" } } ] \ No newline at end of file diff --git a/tests/unit/azure/conftest.py b/tests/unit/azure/conftest.py deleted file mode 100644 index 8ded511369..0000000000 --- a/tests/unit/azure/conftest.py +++ /dev/null @@ -1,15 +0,0 @@ -import base64 -import json - -import pytest -from databricks.sdk.oauth import Token - -pytest.register_assert_rewrite('databricks.labs.blueprint.installation') - - -@pytest.fixture -def az_token(mocker): - token = json.dumps({"aud": "foo", "tid": "bar"}).encode("utf-8") - str_token = base64.b64encode(token).decode("utf-8").replace("=", "") - tok = Token(access_token=f"header.{str_token}.sig") - mocker.patch("databricks.sdk.oauth.Refreshable.token", return_value=tok) diff --git a/tests/unit/azure/test_access.py b/tests/unit/azure/test_access.py index 1c77fdc227..aa569d2b6a 100644 --- a/tests/unit/azure/test_access.py +++ b/tests/unit/azure/test_access.py @@ -1,7 +1,13 @@ -from unittest.mock import create_autospec +import json +from unittest.mock import call, create_autospec +import pytest from databricks.labs.blueprint.installation import MockInstallation +from databricks.labs.blueprint.tui import MockPrompts from databricks.sdk import WorkspaceClient +from databricks.sdk.errors import NotFound +from databricks.sdk.service.compute import Policy +from databricks.sdk.service.workspace import GetSecretResponse from databricks.labs.ucx.azure.access import AzureResourcePermissions from databricks.labs.ucx.azure.resources import ( @@ -13,7 +19,7 @@ from databricks.labs.ucx.hive_metastore import ExternalLocations from ..framework.mocks import MockBackend -from . import get_az_api_mapping +from . import azure_api_client def test_save_spn_permissions_no_external_table(caplog): @@ -22,23 +28,37 @@ def test_save_spn_permissions_no_external_table(caplog): backend = MockBackend(rows=rows) location = ExternalLocations(w, backend, "ucx") installation = MockInstallation() - azure_resource_permission = AzureResourcePermissions( - installation, w, AzureResources(w, include_subscriptions="002"), location - ) + azure_resources = create_autospec(AzureResources) + azure_resource_permission = AzureResourcePermissions(installation, w, azure_resources, location) + azure_resources.storage_accounts.return_value = [] azure_resource_permission.save_spn_permissions() msg = "There are no external table present with azure storage account. Please check if assessment job is run" assert [rec.message for rec in caplog.records if msg in rec.message] -def test_save_spn_permissions_no_azure_storage_account(): +def test_save_spn_permissions_no_external_tables(): w = create_autospec(WorkspaceClient) rows = {"SELECT \\* FROM ucx.external_locations": [["s3://bucket1/folder1", "0"]]} backend = MockBackend(rows=rows) location = ExternalLocations(w, backend, "ucx") installation = MockInstallation() - azure_resource_permission = AzureResourcePermissions( - installation, w, AzureResources(w, include_subscriptions="002"), location - ) + azure_resources = create_autospec(AzureResources) + azure_resource_permission = AzureResourcePermissions(installation, w, azure_resources, location) + azure_resources.storage_accounts.return_value = [] + assert not azure_resource_permission.save_spn_permissions() + + +def test_save_spn_permissions_no_azure_storage_account(): + w = create_autospec(WorkspaceClient) + rows = { + "SELECT \\* FROM ucx.external_locations": [["abfss://container1@storage1.dfs.core.windows.net/folder1", "1"]] + } + backend = MockBackend(rows=rows) + location = ExternalLocations(w, backend, "ucx") + installation = MockInstallation() + azure_resources = create_autospec(AzureResources) + azure_resource_permission = AzureResourcePermissions(installation, w, azure_resources, location) + azure_resources.storage_accounts.return_value = [] assert not azure_resource_permission.save_spn_permissions() @@ -103,47 +123,156 @@ def test_save_spn_permissions_valid_azure_storage_account(): ) -def test_save_spn_permissions_no_valid_storage_accounts(caplog, mocker, az_token): +def test_create_global_spn_no_policy(): + w = create_autospec(WorkspaceClient) + location = ExternalLocations(w, MockBackend(), "ucx") + installation = MockInstallation( + { + 'config.yml': { + 'inventory_database': 'ucx', + 'connect': { + 'host': 'foo', + 'token': 'bar', + }, + } + } + ) + azure_resources = create_autospec(AzureResources) + azure_resource_permission = AzureResourcePermissions(installation, w, azure_resources, location) + prompts = MockPrompts({"Enter a name for the uber service principal to be created*": "UCXServicePrincipal"}) + with pytest.raises(ValueError): + azure_resource_permission.create_uber_principal(prompts) + + +def test_create_global_spn_spn_present(): + w = create_autospec(WorkspaceClient) + location = ExternalLocations(w, MockBackend(), "ucx") + installation = MockInstallation( + { + 'config.yml': { + 'inventory_database': 'ucx', + 'policy_id': 'foo1', + 'uber_spn_id': '123', + 'connect': { + 'host': 'foo', + 'token': 'bar', + }, + } + } + ) + azure_resources = create_autospec(AzureResources) + prompts = MockPrompts({"Enter a name for the uber service principal to be created*": "UCXServicePrincipal"}) + azure_resource_permission = AzureResourcePermissions(installation, w, azure_resources, location) + assert not azure_resource_permission.create_uber_principal(prompts) + + +def test_create_global_spn_no_storage(): w = create_autospec(WorkspaceClient) - rows = {"SELECT \\* FROM ucx.external_locations": [["abfss://continer1@sto3.dfs.core.windows.net/folder1", 1]]} - mocker.patch("databricks.sdk.core.ApiClient.do", side_effect=get_az_api_mapping) + rows = {"SELECT \\* FROM ucx.external_locations": [["s3://bucket1/folder1", "0"]]} backend = MockBackend(rows=rows) + installation = MockInstallation( + { + 'config.yml': { + 'inventory_database': 'ucx', + 'policy_id': 'foo1', + 'connect': { + 'host': 'foo', + 'token': 'bar', + }, + } + } + ) location = ExternalLocations(w, backend, "ucx") - installation = MockInstallation() - azure_resource_permission = AzureResourcePermissions( - installation, w, AzureResources(w, include_subscriptions="002"), location + prompts = MockPrompts({"Enter a name for the uber service principal to be created*": "UCXServicePrincipal"}) + azure_resources = create_autospec(AzureResources) + azure_resource_permission = AzureResourcePermissions(installation, w, azure_resources, location) + assert not azure_resource_permission.create_uber_principal(prompts) + + +def test_create_global_spn_cluster_policy_not_found(mocker): + w = create_autospec(WorkspaceClient) + w.cluster_policies.get.side_effect = NotFound() + rows = {"SELECT \\* FROM ucx.external_locations": [["abfss://container1@sto2.dfs.core.windows.net/folder1", "1"]]} + backend = MockBackend(rows=rows) + location = ExternalLocations(w, backend, "ucx") + installation = MockInstallation( + { + 'config.yml': { + 'inventory_database': 'ucx', + 'policy_id': 'foo1', + 'connect': { + 'host': 'foo', + 'token': 'bar', + }, + } + } ) - azure_resource_permission.save_spn_permissions() - assert [rec.message for rec in caplog.records if "No storage account found in current tenant" in rec.message] + api_client = azure_api_client() + prompts = MockPrompts({"Enter a name for the uber service principal to be created*": "UCXServicePrincipal"}) + azure_resources = AzureResources(api_client, api_client, include_subscriptions="002") + azure_resource_permission = AzureResourcePermissions(installation, w, azure_resources, location) + with pytest.raises(NotFound): + azure_resource_permission.create_uber_principal(prompts) -def test_save_spn_permissions_valid_storage_accounts(caplog, mocker, az_token): +def test_create_global_spn(mocker): w = create_autospec(WorkspaceClient) - rows = {"SELECT \\* FROM ucx.external_locations": [["abfss://continer1@sto2.dfs.core.windows.net/folder1", 1]]} - mocker.patch("databricks.sdk.core.ApiClient.do", side_effect=get_az_api_mapping) + cluster_policy = Policy( + policy_id="foo", name="Unity Catalog Migration (ucx) (me@example.com)", definition=json.dumps({"foo": "bar"}) + ) + w.cluster_policies.get.return_value = cluster_policy + w.secrets.get_secret.return_value = GetSecretResponse("uber_principal_secret", "mypwd") + rows = {"SELECT \\* FROM ucx.external_locations": [["abfss://container1@sto2.dfs.core.windows.net/folder1", "1"]]} backend = MockBackend(rows=rows) location = ExternalLocations(w, backend, "ucx") - installation = MockInstallation() - azure_resource_permission = AzureResourcePermissions( - installation, w, AzureResources(w, include_subscriptions="002"), location + installation = MockInstallation( + { + 'config.yml': { + 'inventory_database': 'ucx', + 'policy_id': 'foo1', + 'connect': { + 'host': 'foo', + 'token': 'bar', + }, + } + } ) - azure_resource_permission.save_spn_permissions() + api_client = azure_api_client() + prompts = MockPrompts({"Enter a name for the uber service principal to be created*": "UCXServicePrincipal"}) + azure_resources = AzureResources(api_client, api_client, include_subscriptions="002") + azure_resource_permission = AzureResourcePermissions(installation, w, azure_resources, location) + azure_resource_permission.create_uber_principal(prompts) installation.assert_file_written( - 'azure_storage_account_info.csv', - [ - { - 'client_id': 'appIduser3', - 'prefix': 'abfss://container3@sto2.dfs.core.windows.net/', - 'principal': 'disNameuser3', - 'privilege': 'WRITE_FILES', - 'type': 'ManagedIdentity', - }, - { - 'client_id': 'appIduser3', - 'prefix': 'abfss://container3@sto2.dfs.core.windows.net/', - 'principal': 'disNameuser3', - 'privilege': 'WRITE_FILES', - 'type': 'ManagedIdentity', - }, - ], + 'policy-backup.json', + {'definition': '{"foo": "bar"}', 'name': 'Unity Catalog Migration (ucx) (me@example.com)', 'policy_id': 'foo'}, + ) + call_1 = call("/v1.0/applications", {"displayName": "UCXServicePrincipal"}) + call_2 = call("/v1.0/servicePrincipals", {"appId": "appIduser1"}) + call_3 = call("/v1.0/servicePrincipals/Iduser1/addPassword") + api_client.post.assert_has_calls([call_1, call_2, call_3], any_order=True) + api_client.put.assert_called_once() + definition = { + "foo": "bar", + "spark_conf.fs.azure.account.oauth2.client.id.sto2.dfs.core.windows.net": { + "type": "fixed", + "value": "appIduser1", + }, + "spark_conf.fs.azure.account.oauth.provider.type.sto2.dfs.core.windows.net": { + "type": "fixed", + "value": "org.apache.hadoop.fs.azurebfs.oauth2.ClientCredsTokenProvider", + }, + "spark_conf.fs.azure.account.oauth2.client.endpoint.sto2.dfs.core.windows.net": { + "type": "fixed", + "value": "https://login.microsoftonline.com/bar/oauth2/token", + }, + "spark_conf.fs.azure.account.auth.type.sto2.dfs.core.windows.net": {"type": "fixed", "value": "OAuth"}, + "spark_conf.fs.azure.account.oauth2.client.secret.sto2.dfs.core.windows.net": { + "type": "fixed", + "value": "{secrets/ucx/uber_principal_secret}", + }, + } + w.cluster_policies.edit.assert_called_with( + 'foo1', 'Unity Catalog Migration (ucx) (me@example.com)', definition=json.dumps(definition) ) + w.secrets.create_scope.assert_called_with("ucx") + w.secrets.put_secret.assert_called_with("ucx", "uber_principal_secret", string_value="mypwd") diff --git a/tests/unit/azure/test_resources.py b/tests/unit/azure/test_resources.py index 807145ccb8..8cf3c96500 100644 --- a/tests/unit/azure/test_resources.py +++ b/tests/unit/azure/test_resources.py @@ -1,34 +1,36 @@ -from unittest.mock import create_autospec +import pytest +from databricks.sdk.errors import PermissionDenied, ResourceConflict -from databricks.sdk import WorkspaceClient +from databricks.labs.ucx.azure.resources import ( + AzureAPIClient, + AzureResource, + AzureResources, + Principal, +) -from databricks.labs.ucx.azure.resources import AzureResource, AzureResources, Principal +from . import azure_api_client, get_az_api_mapping -from . import get_az_api_mapping - -def test_subscriptions_no_subscription(mocker, az_token): - w = create_autospec(WorkspaceClient) - mocker.patch("databricks.sdk.core.ApiClient.do", side_effect=get_az_api_mapping) - azure_resource = AzureResources(w, include_subscriptions="001") +def test_subscriptions_no_subscription(mocker): + api_client = azure_api_client() + azure_resource = AzureResources(api_client, api_client, include_subscriptions="001") subscriptions = list(azure_resource.subscriptions()) + assert len(subscriptions) == 0 -def test_subscriptions_valid_subscription(mocker, az_token): - w = create_autospec(WorkspaceClient) - mocker.patch("databricks.sdk.core.ApiClient.do", side_effect=get_az_api_mapping) - azure_resource = AzureResources(w, include_subscriptions="002") +def test_subscriptions_valid_subscription(mocker): + api_client = azure_api_client() + azure_resource = AzureResources(api_client, api_client, include_subscriptions="002") subscriptions = list(azure_resource.subscriptions()) assert len(subscriptions) == 1 for subscription in subscriptions: assert subscription.name == "sub2" -def test_storage_accounts(mocker, az_token): - w = create_autospec(WorkspaceClient) - mocker.patch("databricks.sdk.core.ApiClient.do", side_effect=get_az_api_mapping) - azure_resource = AzureResources(w, include_subscriptions="002") +def test_storage_accounts(mocker): + api_client = azure_api_client() + azure_resource = AzureResources(api_client, api_client, include_subscriptions="002") storage_accounts = list(azure_resource.storage_accounts()) assert len(storage_accounts) == 2 for storage_account in storage_accounts: @@ -38,10 +40,9 @@ def test_storage_accounts(mocker, az_token): assert storage_account.subscription_id == "002" -def test_containers(mocker, az_token): - w = create_autospec(WorkspaceClient) - mocker.patch("databricks.sdk.core.ApiClient.do", side_effect=get_az_api_mapping) - azure_resource = AzureResources(w, include_subscriptions="002") +def test_containers(mocker): + api_client = azure_api_client() + azure_resource = AzureResources(api_client, api_client, include_subscriptions="002") azure_storage = AzureResource("subscriptions/002/resourceGroups/rg1/storageAccounts/sto2") containers = list(azure_resource.containers(azure_storage)) assert len(containers) == 3 @@ -52,10 +53,9 @@ def test_containers(mocker, az_token): assert container.subscription_id == "002" -def test_role_assignments_storage(mocker, az_token): - w = create_autospec(WorkspaceClient) - mocker.patch("databricks.sdk.core.ApiClient.do", side_effect=get_az_api_mapping) - azure_resource = AzureResources(w, include_subscriptions="002") +def test_role_assignments_storage(mocker): + api_client = azure_api_client() + azure_resource = AzureResources(api_client, api_client, include_subscriptions="002") resource_id = "subscriptions/002/resourceGroups/rg1/storageAccounts/sto2" role_assignments = list(azure_resource.role_assignments(resource_id)) assert len(role_assignments) == 1 @@ -68,10 +68,9 @@ def test_role_assignments_storage(mocker, az_token): assert role_assignment.resource == AzureResource(resource_id) -def test_role_assignments_container(mocker, az_token): - w = create_autospec(WorkspaceClient) - mocker.patch("databricks.sdk.core.ApiClient.do", side_effect=get_az_api_mapping) - azure_resource = AzureResources(w, include_subscriptions="002") +def test_role_assignments_container(mocker): + api_client = azure_api_client() + azure_resource = AzureResources(api_client, api_client, include_subscriptions="002") resource_id = "subscriptions/002/resourceGroups/rg1/storageAccounts/sto2/containers/container1" role_assignments = list(azure_resource.role_assignments(resource_id)) assert len(role_assignments) == 1 @@ -82,3 +81,114 @@ def test_role_assignments_container(mocker, az_token): ) assert str(role_assignment.scope) == resource_id assert role_assignment.resource == AzureResource(resource_id) + + +def test_create_service_principal(mocker): + api_client = azure_api_client() + azure_resource = AzureResources(api_client, api_client) + global_spn = azure_resource.create_service_principal("disNameuser1") + assert global_spn.client.client_id == "appIduser1" + assert global_spn.client.object_id == "Iduser1" + assert global_spn.client.display_name == "disNameuser1" + assert global_spn.client.directory_id == "dir1" + assert global_spn.secret == "mypwd" + + +def test_create_service_principal_no_access(mocker): + api_client = azure_api_client() + api_client.post.side_effect = PermissionDenied() + azure_resource = AzureResources(api_client, api_client) + with pytest.raises(PermissionDenied): + azure_resource.create_service_principal("disNameuser1") + + +def test_apply_storage_permission(mocker): + api_client = azure_api_client() + azure_resource = AzureResources(api_client, api_client) + azure_storage = AzureResource("subscriptions/002/resourceGroups/rg1/storageAccounts/sto2") + azure_resource.apply_storage_permission("test", azure_storage, "STORAGE_BLOB_DATA_READER", "12345") + path = "subscriptions/002/resourceGroups/rg1/storageAccounts/sto2/providers/Microsoft.Authorization/roleAssignments/12345" + body = { + 'properties': { + 'principalId': 'test', + 'principalType': 'ServicePrincipal', + 'roleDefinitionId': '/subscriptions/002/providers/Microsoft.Authorization/roleDefinitions/2a2b9908-6ea1-4ae2-8e65-a410df84e7d1', + } + } + + api_client.put.assert_called_with(path, "2022-04-01", body) + + +def test_apply_storage_permission_no_access(mocker): + api_client = azure_api_client() + api_client.put.side_effect = PermissionDenied() + azure_storage = AzureResource("subscriptions/002/resourceGroups/rg1/storageAccounts/sto2") + azure_resource = AzureResources(api_client, api_client) + with pytest.raises(PermissionDenied): + azure_resource.apply_storage_permission("test", azure_storage, "STORAGE_BLOB_DATA_READER", "12345") + + +def test_delete_service_principal_no_access(mocker): + api_client = azure_api_client() + api_client.delete.side_effect = PermissionDenied() + azure_resource = AzureResources(api_client, api_client) + with pytest.raises(PermissionDenied): + azure_resource.delete_service_principal("test") + + +def test_delete_service_principal(mocker): + api_client = azure_api_client() + azure_resource = AzureResources(api_client, api_client) + azure_resource.delete_service_principal("test") + api_client.delete.assert_called_with("/v1.0/applications(appId='test')") + + +def test_apply_storage_permission_assignment_present(mocker): + api_client = azure_api_client() + api_client.put.side_effect = ResourceConflict() + azure_storage = AzureResource("subscriptions/002/resourceGroups/rg1/storageAccounts/sto2") + azure_resource = AzureResources(api_client, api_client) + azure_resource.apply_storage_permission("test", azure_storage, "STORAGE_BLOB_DATA_READER", "12345") + path = "subscriptions/002/resourceGroups/rg1/storageAccounts/sto2/providers/Microsoft.Authorization/roleAssignments/12345" + body = { + 'properties': { + 'principalId': 'test', + 'principalType': 'ServicePrincipal', + 'roleDefinitionId': '/subscriptions/002/providers/Microsoft.Authorization/roleDefinitions/2a2b9908-6ea1-4ae2-8e65-a410df84e7d1', + } + } + api_client.put.assert_called_with(path, "2022-04-01", body) + + +def test_azure_client_api_put_graph(mocker): + api_client = AzureAPIClient("foo", "bar") + api_client.api_client.do = get_az_api_mapping + result = api_client.post("/v1.0/servicePrincipals", {"foo": "bar"}) + assert result.get("appId") == "appIduser1" + + +def test_azure_client_api_put_mgmt(mocker): + api_client = AzureAPIClient("foo", "bar") + api_client.api_client.do = get_az_api_mapping + result = api_client.put("id001", "2022-04-01", {"foo": "bar"}) + assert result.get("id") == "role1" + + +def test_azure_client_api_get_azure(mocker): + api_client = AzureAPIClient("foo", "bar") + api_client.api_client.do = get_az_api_mapping + subscriptions = api_client.get("/subscriptions") + assert len(subscriptions) == 1 + + +def test_azure_client_api_get_graph(mocker): + api_client = AzureAPIClient("foo", "bar") + api_client.api_client.do = get_az_api_mapping + principal = api_client.get("/v1.0/directoryObjects/user1", "2022-02-01") + assert principal.get("appId") == "appIduser1" + + +def test_azure_client_api_delete_spn(mocker): + api_client = AzureAPIClient("foo", "bar") + api_client.api_client.do = get_az_api_mapping + api_client.delete("/applications/1234") diff --git a/tests/unit/test_cli.py b/tests/unit/test_cli.py index 6561742b5c..3187b0a79e 100644 --- a/tests/unit/test_cli.py +++ b/tests/unit/test_cli.py @@ -13,6 +13,7 @@ alias, create_account_groups, create_table_mapping, + create_uber_principal, ensure_assessment_run, installations, manual_workspace_info, @@ -328,3 +329,31 @@ def test_migrate_credentials_azure(ws): with patch("databricks.labs.blueprint.tui.Prompts.confirm", return_value=True): migrate_credentials(ws) ws.storage_credentials.list.assert_called() + + +def test_create_master_principal_not_azure(ws): + ws.config.is_azure = False + create_uber_principal(ws, subscription_id="") + ws.workspace.get_status.assert_not_called() + + +def test_create_master_principal_no_azure_cli(ws): + ws.config.auth_type = "azure_clis" + ws.config.is_azure = True + create_uber_principal(ws, subscription_id="") + ws.workspace.get_status.assert_not_called() + + +def test_create_master_principal_no_subscription(ws): + ws.config.auth_type = "azure-cli" + ws.config.is_azure = True + create_uber_principal(ws, subscription_id="") + ws.workspace.get_status.assert_not_called() + + +def test_create_master_principal(ws): + ws.config.auth_type = "azure-cli" + ws.config.is_azure = True + with patch("databricks.labs.blueprint.tui.Prompts.question", return_value=True): + with pytest.raises(ValueError): + create_uber_principal(ws, subscription_id="12") diff --git a/tests/unit/test_install.py b/tests/unit/test_install.py index 371f2e7c89..14b9e5a7d6 100644 --- a/tests/unit/test_install.py +++ b/tests/unit/test_install.py @@ -941,6 +941,41 @@ def test_not_remove_warehouse_with_a_different_prefix(ws): ws.warehouses.delete.assert_not_called() +def test_remove_secret_scope(ws, caplog): + wheels = create_autospec(WheelsV2) + prompts = MockPrompts( + { + r'Do you want to uninstall ucx.*': 'yes', + 'Do you want to delete the inventory database ucx too?': 'no', + } + ) + installation = MockInstallation() + config = WorkspaceConfig(inventory_database='ucx', uber_spn_id="123") + timeout = timedelta(seconds=1) + # ws.secrets.delete_scope.side_effect = NotFound() + workspace_installation = WorkspaceInstallation(config, installation, MockBackend(), wheels, ws, prompts, timeout) + workspace_installation.uninstall() + ws.secrets.delete_scope.assert_called_with('ucx') + + +def test_remove_secret_scope_no_scope(ws, caplog): + wheels = create_autospec(WheelsV2) + prompts = MockPrompts( + { + r'Do you want to uninstall ucx.*': 'yes', + 'Do you want to delete the inventory database ucx too?': 'no', + } + ) + installation = MockInstallation() + config = WorkspaceConfig(inventory_database='ucx', uber_spn_id="123") + timeout = timedelta(seconds=1) + ws.secrets.delete_scope.side_effect = NotFound() + workspace_installation = WorkspaceInstallation(config, installation, MockBackend(), wheels, ws, prompts, timeout) + with caplog.at_level('ERROR'): + workspace_installation.uninstall() + assert 'Secret scope already deleted' in caplog.messages + + def test_remove_cluster_policy_not_exists(ws, caplog): sql_backend = MockBackend() wheels = create_autospec(WheelsV2)