From 73a5ce0a4facc4e996cd0e8e4d9612d2c6f2f329 Mon Sep 17 00:00:00 2001 From: qziyuan <91635877+qziyuan@users.noreply.github.com> Date: Thu, 29 Feb 2024 01:22:31 -0800 Subject: [PATCH] Handle None directory_id if managed identity encountered during the crawling of StoragePermissionMapping (#986) While creating StoragePermissionMapping, a principal could be managed identity which does not have directory_id. This PR will allow managed identity to be stored in StoragePermissionMapping, and allow None directory_id. - Add `type` field to dataclass `StoragePermissionMapping` and `Principal` to indicate if a principal is service principal or managed identity. - Allow None `directory_id` if the principal is not a service principal. - Ignore the managed identity while migrating to UC storage credentials for now. fix #339 - [ ] added relevant user documentation - [ ] added new CLI command - [ ] modified existing command: `databricks labs ucx ...` - [ ] added a new workflow - [ ] modified existing workflow: `...` - [ ] added a new table - [ ] modified existing table: `...` - [ ] manually tested - [ ] added unit tests - [ ] added integration tests - [ ] verified on staging environment (screenshot attached) --- src/databricks/labs/ucx/azure/access.py | 4 ++- src/databricks/labs/ucx/azure/credentials.py | 12 ++++++-- src/databricks/labs/ucx/azure/resources.py | 14 +++++++-- tests/integration/azure/test_credentials.py | 20 +++++++++++-- tests/unit/azure/azure/mappings.json | 3 +- tests/unit/azure/test_access.py | 10 ++++--- tests/unit/azure/test_credentials.py | 31 ++++++++++++++++---- tests/unit/azure/test_resources.py | 8 +++-- 8 files changed, 81 insertions(+), 21 deletions(-) diff --git a/src/databricks/labs/ucx/azure/access.py b/src/databricks/labs/ucx/azure/access.py index 78a8f7f917..130cab403c 100644 --- a/src/databricks/labs/ucx/azure/access.py +++ b/src/databricks/labs/ucx/azure/access.py @@ -17,8 +17,9 @@ class StoragePermissionMapping: client_id: str principal: str privilege: str + type: str # Need this directory_id/tenant_id when create UC storage credentials using service principal - directory_id: str + directory_id: str | None = None class AzureResourcePermissions: @@ -65,6 +66,7 @@ def _map_storage(self, storage: AzureResource) -> list[StoragePermissionMapping] client_id=role_assignment.principal.client_id, principal=role_assignment.principal.display_name, privilege=privilege, + type=role_assignment.principal.type, directory_id=role_assignment.principal.directory_id, ) ) diff --git a/src/databricks/labs/ucx/azure/credentials.py b/src/databricks/labs/ucx/azure/credentials.py index 961a7d765e..d34fef5e68 100644 --- a/src/databricks/labs/ucx/azure/credentials.py +++ b/src/databricks/labs/ucx/azure/credentials.py @@ -37,9 +37,9 @@ class ServicePrincipalMigrationInfo: class StorageCredentialValidationResult: name: str application_id: str - directory_id: str read_only: bool validated_on: str + directory_id: str | None = None failures: list[str] | None = None @classmethod @@ -47,9 +47,9 @@ def from_validation(cls, permission_mapping: StoragePermissionMapping, failures: return cls( permission_mapping.principal, permission_mapping.client_id, - permission_mapping.directory_id, permission_mapping.privilege == Privilege.READ_FILES.value, permission_mapping.prefix, + permission_mapping.directory_id, failures, ) @@ -85,6 +85,10 @@ def list(self, include_names: set[str] | None = None) -> set[str]: return application_ids def create_with_client_secret(self, spn: ServicePrincipalMigrationInfo) -> StorageCredentialInfo: + # this function should only be used to migrate service principal, fail the command here if + # it's misused to create storage credential with managed identity + assert spn.permission_mapping.directory_id is not None + # prepare the storage credential properties name = spn.permission_mapping.principal service_principal = AzureServicePrincipal( @@ -228,7 +232,9 @@ def _generate_migration_list(self, include_names: set[str] | None = None) -> lis Create the list of SP that need to be migrated, output an action plan as a csv file for users to confirm """ # load sp list from azure_storage_account_info.csv - sp_list = self._resource_permissions.load() + permission_mappings = self._resource_permissions.load() + # For now we only migrate Service Principal and ignore Managed Identity + sp_list = [mapping for mapping in permission_mappings if mapping.type == "Application"] # list existed storage credentials sc_set = self._storage_credential_manager.list(include_names) # check if the sp is already used in UC storage credential diff --git a/src/databricks/labs/ucx/azure/resources.py b/src/databricks/labs/ucx/azure/resources.py index 6c86d464b6..3d19253239 100644 --- a/src/databricks/labs/ucx/azure/resources.py +++ b/src/databricks/labs/ucx/azure/resources.py @@ -70,8 +70,12 @@ class Principal: client_id: str display_name: str object_id: str + # service principal will have type: "Application" + # managed identity will have type: "ManagedIdentity" + type: str # Need this directory_id/tenant_id when create UC storage credentials using service principal - directory_id: str + # it will be None if type is managed identity + directory_id: str | None = None @dataclass @@ -173,13 +177,17 @@ def _get_principal(self, principal_id: str) -> Principal | None: client_id = raw.get("appId") display_name = raw.get("displayName") object_id = raw.get("id") + principal_type = raw.get("servicePrincipalType") # Need this directory_id/tenant_id when create UC storage credentials using service principal directory_id = raw.get("appOwnerOrganizationId") assert client_id is not None assert display_name is not None assert object_id is not None - assert directory_id is not None - self._principals[principal_id] = Principal(client_id, display_name, object_id, directory_id) + assert principal_type is not None + if principal_type == "Application": + # service principal must have directory_id + assert directory_id is not None + self._principals[principal_id] = Principal(client_id, display_name, object_id, principal_type, directory_id) return self._principals[principal_id] def role_assignments( diff --git a/tests/integration/azure/test_credentials.py b/tests/integration/azure/test_credentials.py index d330646245..25c8d29588 100644 --- a/tests/integration/azure/test_credentials.py +++ b/tests/integration/azure/test_credentials.py @@ -1,10 +1,13 @@ import base64 import re from dataclasses import dataclass +from datetime import timedelta import pytest from databricks.labs.blueprint.installation import MockInstallation from databricks.labs.blueprint.tui import MockPrompts +from databricks.sdk.errors import InternalError, NotFound +from databricks.sdk.retries import retried from databricks.labs.ucx.assessment.azure import AzureServicePrincipalInfo from databricks.labs.ucx.azure.access import AzureResourcePermissions @@ -68,6 +71,7 @@ def inner( 'client_id': test_info.application_id, 'principal': test_info.credential_name, 'privilege': "READ_FILES" if read_only else "WRITE_FILES", + 'type': "Application", 'directory_id': test_info.directory_id, }, ] @@ -97,7 +101,8 @@ def inner( return inner -def test_spn_migration_existed_storage_credential(extract_test_info, make_storage_credential, run_migration): +@retried(on=[InternalError], timeout=timedelta(minutes=2)) +def test_spn_migration_existed_storage_credential(extract_test_info, make_storage_credential_spn, run_migration): # create a storage credential for this test make_storage_credential( credential_name=extract_test_info.credential_name, @@ -113,13 +118,24 @@ def test_spn_migration_existed_storage_credential(extract_test_info, make_storag assert not migration_result +def save_delete_credential(ws, name): + try: + ws.storage_credentials.delete(name, force=True) + except NotFound: + # If test failed with exception threw before storage credential is created, + # don't fail the test with storage credential cannot be deleted error, + # instead let the original exception be reported. + pass + + +@retried(on=[InternalError], timeout=timedelta(minutes=2)) @pytest.mark.parametrize("read_only", [False, True]) def test_spn_migration(ws, extract_test_info, run_migration, read_only): try: migration_results = run_migration(extract_test_info, {"lets_migrate_the_spn"}, read_only) storage_credential = ws.storage_credentials.get(extract_test_info.credential_name) finally: - ws.storage_credentials.delete(extract_test_info.credential_name, force=True) + save_delete_credential(ws, extract_test_info.credential_name) assert storage_credential is not None assert storage_credential.read_only is read_only diff --git a/tests/unit/azure/azure/mappings.json b/tests/unit/azure/azure/mappings.json index 7df33168fd..55611c77ac 100644 --- a/tests/unit/azure/azure/mappings.json +++ b/tests/unit/azure/azure/mappings.json @@ -9,13 +9,14 @@ "appId": "appIduser2", "displayName": "disNameuser2", "id": "Iduser2", + "servicePrincipalType": "Application", "appOwnerOrganizationId": "0000-0000" }, "/v1.0/directoryObjects/user3": { "appId": "appIduser3", "displayName": "disNameuser3", "id": "Iduser3", - "appOwnerOrganizationId": "0000-0000" + "servicePrincipalType": "ManagedIdentity" }, "/subscriptions": { "value": [ diff --git a/tests/unit/azure/test_access.py b/tests/unit/azure/test_access.py index 04b84d0e72..1c77fdc227 100644 --- a/tests/unit/azure/test_access.py +++ b/tests/unit/azure/test_access.py @@ -68,13 +68,13 @@ def test_save_spn_permissions_valid_azure_storage_account(): AzureRoleAssignment( resource=AzureResource(f'{containers}/container1'), scope=AzureResource(f'{containers}/container1'), - principal=Principal('a', 'b', 'c', '0000-0000'), + principal=Principal('a', 'b', 'c', 'Application', '0000-0000'), role_name='Storage Blob Data Contributor', ), AzureRoleAssignment( resource=AzureResource(f'{storage_accounts}/storage1'), scope=AzureResource(f'{storage_accounts}/storage1'), - principal=Principal('d', 'e', 'f', '0000-0000'), + principal=Principal('d', 'e', 'f', 'Application', '0000-0000'), role_name='Button Clicker', ), ] @@ -88,6 +88,7 @@ def test_save_spn_permissions_valid_azure_storage_account(): 'prefix': 'abfss://container1@storage1.dfs.core.windows.net/', 'principal': 'b', 'privilege': 'WRITE_FILES', + 'type': 'Application', 'directory_id': '0000-0000', }, { @@ -95,6 +96,7 @@ def test_save_spn_permissions_valid_azure_storage_account(): 'prefix': 'abfss://container2@storage1.dfs.core.windows.net/', 'principal': 'b', 'privilege': 'WRITE_FILES', + 'type': 'Application', 'directory_id': '0000-0000', }, ], @@ -134,14 +136,14 @@ def test_save_spn_permissions_valid_storage_accounts(caplog, mocker, az_token): 'prefix': 'abfss://container3@sto2.dfs.core.windows.net/', 'principal': 'disNameuser3', 'privilege': 'WRITE_FILES', - 'directory_id': '0000-0000', + 'type': 'ManagedIdentity', }, { 'client_id': 'appIduser3', 'prefix': 'abfss://container3@sto2.dfs.core.windows.net/', 'principal': 'disNameuser3', 'privilege': 'WRITE_FILES', - 'directory_id': '0000-0000', + 'type': 'ManagedIdentity', }, ], ) diff --git a/tests/unit/azure/test_credentials.py b/tests/unit/azure/test_credentials.py index 511a8b7e85..c9530dbea8 100644 --- a/tests/unit/azure/test_credentials.py +++ b/tests/unit/azure/test_credentials.py @@ -54,6 +54,7 @@ def installation(): 'client_id': 'app_secret1', 'principal': 'principal_1', 'privilege': 'WRITE_FILES', + 'type': 'Application', 'directory_id': 'directory_id_1', }, { @@ -61,6 +62,7 @@ def installation(): 'client_id': 'app_secret2', 'principal': 'principal_read', 'privilege': 'READ_FILES', + 'type': 'Application', 'directory_id': 'directory_id_1', }, { @@ -68,6 +70,7 @@ def installation(): 'client_id': 'app_secret3', 'principal': 'principal_write', 'privilege': 'WRITE_FILES', + 'type': 'Application', 'directory_id': 'directory_id_2', }, { @@ -75,8 +78,16 @@ def installation(): 'client_id': 'app_secret4', 'principal': 'principal_overlap', 'privilege': 'WRITE_FILES', + 'type': 'Application', 'directory_id': 'directory_id_2', }, + { + 'prefix': 'prefix5', + 'client_id': 'app_secret4', + 'principal': 'managed_identity', + 'privilege': 'WRITE_FILES', + 'type': 'ManagedIdentity', + }, ], } ) @@ -154,6 +165,7 @@ def test_create_storage_credentials(credential_manager): "app_secret1", "principal_write", "WRITE_FILES", + "Application", "directory_id_1", ), "test", @@ -164,6 +176,7 @@ def test_create_storage_credentials(credential_manager): "app_secret2", "principal_read", "READ_FILES", + "Application", "directory_id_1", ), "test", @@ -179,7 +192,9 @@ def test_create_storage_credentials(credential_manager): def test_validate_storage_credentials(credential_manager): - permission_mapping = StoragePermissionMapping("prefix", "client_id", "principal_1", "WRITE_FILES", "directory_id") + permission_mapping = StoragePermissionMapping( + "prefix", "client_id", "principal_1", "WRITE_FILES", "Application", "directory_id" + ) # validate normal storage credential validation = credential_manager.validate(permission_mapping) @@ -190,7 +205,7 @@ def test_validate_storage_credentials(credential_manager): def test_validate_read_only_storage_credentials(credential_manager): permission_mapping = StoragePermissionMapping( - "prefix", "client_id", "principal_read", "READ_FILES", "directory_id_1" + "prefix", "client_id", "principal_read", "READ_FILES", "Application", "directory_id_1" ) # validate read-only storage credential @@ -201,7 +216,9 @@ def test_validate_read_only_storage_credentials(credential_manager): def test_validate_storage_credentials_overlap_location(credential_manager): - permission_mapping = StoragePermissionMapping("prefix", "client_id", "overlap", "WRITE_FILES", "directory_id_2") + permission_mapping = StoragePermissionMapping( + "prefix", "client_id", "overlap", "WRITE_FILES", "Application", "directory_id_2" + ) # prefix used for validation overlaps with existing external location will raise InvalidParameterValue # assert InvalidParameterValue is handled @@ -212,14 +229,18 @@ def test_validate_storage_credentials_overlap_location(credential_manager): def test_validate_storage_credentials_non_response(credential_manager): - permission_mapping = StoragePermissionMapping("prefix", "client_id", "none", "WRITE_FILES", "directory_id") + permission_mapping = StoragePermissionMapping( + "prefix", "client_id", "none", "WRITE_FILES", "Application", "directory_id" + ) validation = credential_manager.validate(permission_mapping) assert validation.failures == ["Validation returned no results."] def test_validate_storage_credentials_failed_operation(credential_manager): - permission_mapping = StoragePermissionMapping("prefix", "client_id", "fail", "WRITE_FILES", "directory_id_2") + permission_mapping = StoragePermissionMapping( + "prefix", "client_id", "fail", "WRITE_FILES", "Application", "directory_id_2" + ) validation = credential_manager.validate(permission_mapping) assert validation.failures == ["LIST validation failed with message: fail"] diff --git a/tests/unit/azure/test_resources.py b/tests/unit/azure/test_resources.py index c0c468a20c..807145ccb8 100644 --- a/tests/unit/azure/test_resources.py +++ b/tests/unit/azure/test_resources.py @@ -61,7 +61,9 @@ def test_role_assignments_storage(mocker, az_token): assert len(role_assignments) == 1 for role_assignment in role_assignments: assert role_assignment.role_name == "Contributor" - assert role_assignment.principal == Principal("appIduser2", "disNameuser2", "Iduser2", "0000-0000") + assert role_assignment.principal == Principal( + "appIduser2", "disNameuser2", "Iduser2", "Application", "0000-0000" + ) assert str(role_assignment.scope) == resource_id assert role_assignment.resource == AzureResource(resource_id) @@ -75,6 +77,8 @@ def test_role_assignments_container(mocker, az_token): assert len(role_assignments) == 1 for role_assignment in role_assignments: assert role_assignment.role_name == "Contributor" - assert role_assignment.principal == Principal("appIduser2", "disNameuser2", "Iduser2", "0000-0000") + assert role_assignment.principal == Principal( + "appIduser2", "disNameuser2", "Iduser2", "Application", "0000-0000" + ) assert str(role_assignment.scope) == resource_id assert role_assignment.resource == AzureResource(resource_id)