Skip to content

Commit

Permalink
Handle None directory_id if managed identity encountered during the c…
Browse files Browse the repository at this point in the history
…rawling of StoragePermissionMapping (#986)

While creating StoragePermissionMapping, a principal could be managed
identity which does not have directory_id. This PR will allow managed
identity to be stored in StoragePermissionMapping, and allow None
directory_id.

<!-- Summary of your changes that are easy to understand. Add
screenshots when necessary -->

- Add `type` field to dataclass `StoragePermissionMapping` and
`Principal` to indicate if a principal is service principal or managed
identity.
- Allow None `directory_id` if the principal is not a service principal.
- Ignore the managed identity while migrating to UC storage credentials
for now.

<!-- DOC: Link issue with a keyword: close, closes, closed, fix, fixes,
fixed, resolve, resolves, resolved. See
https://docs.github.com/en/issues/tracking-your-work-with-issues/linking-a-pull-request-to-an-issue#linking-a-pull-request-to-an-issue-using-a-keyword
-->

fix #339

- [ ] added relevant user documentation
- [ ] added new CLI command
- [ ] modified existing command: `databricks labs ucx ...`
- [ ] added a new workflow
- [ ] modified existing workflow: `...`
- [ ] added a new table
- [ ] modified existing table: `...`

<!-- How is this tested? Please see the checklist below and also
describe any other relevant tests -->

- [ ] manually tested
- [ ] added unit tests
- [ ] added integration tests
- [ ] verified on staging environment (screenshot attached)
  • Loading branch information
qziyuan authored and nkvuong committed Mar 6, 2024
1 parent 9dba8ee commit 73a5ce0
Show file tree
Hide file tree
Showing 8 changed files with 81 additions and 21 deletions.
4 changes: 3 additions & 1 deletion src/databricks/labs/ucx/azure/access.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,9 @@ class StoragePermissionMapping:
client_id: str
principal: str
privilege: str
type: str
# Need this directory_id/tenant_id when create UC storage credentials using service principal
directory_id: str
directory_id: str | None = None


class AzureResourcePermissions:
Expand Down Expand Up @@ -65,6 +66,7 @@ def _map_storage(self, storage: AzureResource) -> list[StoragePermissionMapping]
client_id=role_assignment.principal.client_id,
principal=role_assignment.principal.display_name,
privilege=privilege,
type=role_assignment.principal.type,
directory_id=role_assignment.principal.directory_id,
)
)
Expand Down
12 changes: 9 additions & 3 deletions src/databricks/labs/ucx/azure/credentials.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,19 +37,19 @@ class ServicePrincipalMigrationInfo:
class StorageCredentialValidationResult:
name: str
application_id: str
directory_id: str
read_only: bool
validated_on: str
directory_id: str | None = None
failures: list[str] | None = None

@classmethod
def from_validation(cls, permission_mapping: StoragePermissionMapping, failures: list[str] | None):
return cls(
permission_mapping.principal,
permission_mapping.client_id,
permission_mapping.directory_id,
permission_mapping.privilege == Privilege.READ_FILES.value,
permission_mapping.prefix,
permission_mapping.directory_id,
failures,
)

Expand Down Expand Up @@ -85,6 +85,10 @@ def list(self, include_names: set[str] | None = None) -> set[str]:
return application_ids

def create_with_client_secret(self, spn: ServicePrincipalMigrationInfo) -> StorageCredentialInfo:
# this function should only be used to migrate service principal, fail the command here if
# it's misused to create storage credential with managed identity
assert spn.permission_mapping.directory_id is not None

# prepare the storage credential properties
name = spn.permission_mapping.principal
service_principal = AzureServicePrincipal(
Expand Down Expand Up @@ -228,7 +232,9 @@ def _generate_migration_list(self, include_names: set[str] | None = None) -> lis
Create the list of SP that need to be migrated, output an action plan as a csv file for users to confirm
"""
# load sp list from azure_storage_account_info.csv
sp_list = self._resource_permissions.load()
permission_mappings = self._resource_permissions.load()
# For now we only migrate Service Principal and ignore Managed Identity
sp_list = [mapping for mapping in permission_mappings if mapping.type == "Application"]
# list existed storage credentials
sc_set = self._storage_credential_manager.list(include_names)
# check if the sp is already used in UC storage credential
Expand Down
14 changes: 11 additions & 3 deletions src/databricks/labs/ucx/azure/resources.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,8 +70,12 @@ class Principal:
client_id: str
display_name: str
object_id: str
# service principal will have type: "Application"
# managed identity will have type: "ManagedIdentity"
type: str
# Need this directory_id/tenant_id when create UC storage credentials using service principal
directory_id: str
# it will be None if type is managed identity
directory_id: str | None = None


@dataclass
Expand Down Expand Up @@ -173,13 +177,17 @@ def _get_principal(self, principal_id: str) -> Principal | None:
client_id = raw.get("appId")
display_name = raw.get("displayName")
object_id = raw.get("id")
principal_type = raw.get("servicePrincipalType")
# Need this directory_id/tenant_id when create UC storage credentials using service principal
directory_id = raw.get("appOwnerOrganizationId")
assert client_id is not None
assert display_name is not None
assert object_id is not None
assert directory_id is not None
self._principals[principal_id] = Principal(client_id, display_name, object_id, directory_id)
assert principal_type is not None
if principal_type == "Application":
# service principal must have directory_id
assert directory_id is not None
self._principals[principal_id] = Principal(client_id, display_name, object_id, principal_type, directory_id)
return self._principals[principal_id]

def role_assignments(
Expand Down
20 changes: 18 additions & 2 deletions tests/integration/azure/test_credentials.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,13 @@
import base64
import re
from dataclasses import dataclass
from datetime import timedelta

import pytest
from databricks.labs.blueprint.installation import MockInstallation
from databricks.labs.blueprint.tui import MockPrompts
from databricks.sdk.errors import InternalError, NotFound
from databricks.sdk.retries import retried

from databricks.labs.ucx.assessment.azure import AzureServicePrincipalInfo
from databricks.labs.ucx.azure.access import AzureResourcePermissions
Expand Down Expand Up @@ -68,6 +71,7 @@ def inner(
'client_id': test_info.application_id,
'principal': test_info.credential_name,
'privilege': "READ_FILES" if read_only else "WRITE_FILES",
'type': "Application",
'directory_id': test_info.directory_id,
},
]
Expand Down Expand Up @@ -97,7 +101,8 @@ def inner(
return inner


def test_spn_migration_existed_storage_credential(extract_test_info, make_storage_credential, run_migration):
@retried(on=[InternalError], timeout=timedelta(minutes=2))
def test_spn_migration_existed_storage_credential(extract_test_info, make_storage_credential_spn, run_migration):
# create a storage credential for this test
make_storage_credential(
credential_name=extract_test_info.credential_name,
Expand All @@ -113,13 +118,24 @@ def test_spn_migration_existed_storage_credential(extract_test_info, make_storag
assert not migration_result


def save_delete_credential(ws, name):
try:
ws.storage_credentials.delete(name, force=True)
except NotFound:
# If test failed with exception threw before storage credential is created,
# don't fail the test with storage credential cannot be deleted error,
# instead let the original exception be reported.
pass


@retried(on=[InternalError], timeout=timedelta(minutes=2))
@pytest.mark.parametrize("read_only", [False, True])
def test_spn_migration(ws, extract_test_info, run_migration, read_only):
try:
migration_results = run_migration(extract_test_info, {"lets_migrate_the_spn"}, read_only)
storage_credential = ws.storage_credentials.get(extract_test_info.credential_name)
finally:
ws.storage_credentials.delete(extract_test_info.credential_name, force=True)
save_delete_credential(ws, extract_test_info.credential_name)

assert storage_credential is not None
assert storage_credential.read_only is read_only
Expand Down
3 changes: 2 additions & 1 deletion tests/unit/azure/azure/mappings.json
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,14 @@
"appId": "appIduser2",
"displayName": "disNameuser2",
"id": "Iduser2",
"servicePrincipalType": "Application",
"appOwnerOrganizationId": "0000-0000"
},
"/v1.0/directoryObjects/user3": {
"appId": "appIduser3",
"displayName": "disNameuser3",
"id": "Iduser3",
"appOwnerOrganizationId": "0000-0000"
"servicePrincipalType": "ManagedIdentity"
},
"/subscriptions": {
"value": [
Expand Down
10 changes: 6 additions & 4 deletions tests/unit/azure/test_access.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,13 +68,13 @@ def test_save_spn_permissions_valid_azure_storage_account():
AzureRoleAssignment(
resource=AzureResource(f'{containers}/container1'),
scope=AzureResource(f'{containers}/container1'),
principal=Principal('a', 'b', 'c', '0000-0000'),
principal=Principal('a', 'b', 'c', 'Application', '0000-0000'),
role_name='Storage Blob Data Contributor',
),
AzureRoleAssignment(
resource=AzureResource(f'{storage_accounts}/storage1'),
scope=AzureResource(f'{storage_accounts}/storage1'),
principal=Principal('d', 'e', 'f', '0000-0000'),
principal=Principal('d', 'e', 'f', 'Application', '0000-0000'),
role_name='Button Clicker',
),
]
Expand All @@ -88,13 +88,15 @@ def test_save_spn_permissions_valid_azure_storage_account():
'prefix': 'abfss://container1@storage1.dfs.core.windows.net/',
'principal': 'b',
'privilege': 'WRITE_FILES',
'type': 'Application',
'directory_id': '0000-0000',
},
{
'client_id': 'a',
'prefix': 'abfss://container2@storage1.dfs.core.windows.net/',
'principal': 'b',
'privilege': 'WRITE_FILES',
'type': 'Application',
'directory_id': '0000-0000',
},
],
Expand Down Expand Up @@ -134,14 +136,14 @@ def test_save_spn_permissions_valid_storage_accounts(caplog, mocker, az_token):
'prefix': 'abfss://container3@sto2.dfs.core.windows.net/',
'principal': 'disNameuser3',
'privilege': 'WRITE_FILES',
'directory_id': '0000-0000',
'type': 'ManagedIdentity',
},
{
'client_id': 'appIduser3',
'prefix': 'abfss://container3@sto2.dfs.core.windows.net/',
'principal': 'disNameuser3',
'privilege': 'WRITE_FILES',
'directory_id': '0000-0000',
'type': 'ManagedIdentity',
},
],
)
31 changes: 26 additions & 5 deletions tests/unit/azure/test_credentials.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,29 +54,40 @@ def installation():
'client_id': 'app_secret1',
'principal': 'principal_1',
'privilege': 'WRITE_FILES',
'type': 'Application',
'directory_id': 'directory_id_1',
},
{
'prefix': 'prefix2',
'client_id': 'app_secret2',
'principal': 'principal_read',
'privilege': 'READ_FILES',
'type': 'Application',
'directory_id': 'directory_id_1',
},
{
'prefix': 'prefix3',
'client_id': 'app_secret3',
'principal': 'principal_write',
'privilege': 'WRITE_FILES',
'type': 'Application',
'directory_id': 'directory_id_2',
},
{
'prefix': 'overlap_with_external_location',
'client_id': 'app_secret4',
'principal': 'principal_overlap',
'privilege': 'WRITE_FILES',
'type': 'Application',
'directory_id': 'directory_id_2',
},
{
'prefix': 'prefix5',
'client_id': 'app_secret4',
'principal': 'managed_identity',
'privilege': 'WRITE_FILES',
'type': 'ManagedIdentity',
},
],
}
)
Expand Down Expand Up @@ -154,6 +165,7 @@ def test_create_storage_credentials(credential_manager):
"app_secret1",
"principal_write",
"WRITE_FILES",
"Application",
"directory_id_1",
),
"test",
Expand All @@ -164,6 +176,7 @@ def test_create_storage_credentials(credential_manager):
"app_secret2",
"principal_read",
"READ_FILES",
"Application",
"directory_id_1",
),
"test",
Expand All @@ -179,7 +192,9 @@ def test_create_storage_credentials(credential_manager):


def test_validate_storage_credentials(credential_manager):
permission_mapping = StoragePermissionMapping("prefix", "client_id", "principal_1", "WRITE_FILES", "directory_id")
permission_mapping = StoragePermissionMapping(
"prefix", "client_id", "principal_1", "WRITE_FILES", "Application", "directory_id"
)

# validate normal storage credential
validation = credential_manager.validate(permission_mapping)
Expand All @@ -190,7 +205,7 @@ def test_validate_storage_credentials(credential_manager):

def test_validate_read_only_storage_credentials(credential_manager):
permission_mapping = StoragePermissionMapping(
"prefix", "client_id", "principal_read", "READ_FILES", "directory_id_1"
"prefix", "client_id", "principal_read", "READ_FILES", "Application", "directory_id_1"
)

# validate read-only storage credential
Expand All @@ -201,7 +216,9 @@ def test_validate_read_only_storage_credentials(credential_manager):


def test_validate_storage_credentials_overlap_location(credential_manager):
permission_mapping = StoragePermissionMapping("prefix", "client_id", "overlap", "WRITE_FILES", "directory_id_2")
permission_mapping = StoragePermissionMapping(
"prefix", "client_id", "overlap", "WRITE_FILES", "Application", "directory_id_2"
)

# prefix used for validation overlaps with existing external location will raise InvalidParameterValue
# assert InvalidParameterValue is handled
Expand All @@ -212,14 +229,18 @@ def test_validate_storage_credentials_overlap_location(credential_manager):


def test_validate_storage_credentials_non_response(credential_manager):
permission_mapping = StoragePermissionMapping("prefix", "client_id", "none", "WRITE_FILES", "directory_id")
permission_mapping = StoragePermissionMapping(
"prefix", "client_id", "none", "WRITE_FILES", "Application", "directory_id"
)

validation = credential_manager.validate(permission_mapping)
assert validation.failures == ["Validation returned no results."]


def test_validate_storage_credentials_failed_operation(credential_manager):
permission_mapping = StoragePermissionMapping("prefix", "client_id", "fail", "WRITE_FILES", "directory_id_2")
permission_mapping = StoragePermissionMapping(
"prefix", "client_id", "fail", "WRITE_FILES", "Application", "directory_id_2"
)

validation = credential_manager.validate(permission_mapping)
assert validation.failures == ["LIST validation failed with message: fail"]
Expand Down
8 changes: 6 additions & 2 deletions tests/unit/azure/test_resources.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,9 @@ def test_role_assignments_storage(mocker, az_token):
assert len(role_assignments) == 1
for role_assignment in role_assignments:
assert role_assignment.role_name == "Contributor"
assert role_assignment.principal == Principal("appIduser2", "disNameuser2", "Iduser2", "0000-0000")
assert role_assignment.principal == Principal(
"appIduser2", "disNameuser2", "Iduser2", "Application", "0000-0000"
)
assert str(role_assignment.scope) == resource_id
assert role_assignment.resource == AzureResource(resource_id)

Expand All @@ -75,6 +77,8 @@ def test_role_assignments_container(mocker, az_token):
assert len(role_assignments) == 1
for role_assignment in role_assignments:
assert role_assignment.role_name == "Contributor"
assert role_assignment.principal == Principal("appIduser2", "disNameuser2", "Iduser2", "0000-0000")
assert role_assignment.principal == Principal(
"appIduser2", "disNameuser2", "Iduser2", "Application", "0000-0000"
)
assert str(role_assignment.scope) == resource_id
assert role_assignment.resource == AzureResource(resource_id)

0 comments on commit 73a5ce0

Please sign in to comment.