diff --git a/src/databricks/labs/ucx/contexts/application.py b/src/databricks/labs/ucx/contexts/application.py index b21bdff4fe..24132afd1f 100644 --- a/src/databricks/labs/ucx/contexts/application.py +++ b/src/databricks/labs/ucx/contexts/application.py @@ -28,7 +28,7 @@ from databricks.labs.ucx.assessment.export import AssessmentExporter from databricks.labs.ucx.aws.credentials import CredentialManager from databricks.labs.ucx.config import WorkspaceConfig -from databricks.labs.ucx.framework.owners import AdministratorLocator +from databricks.labs.ucx.framework.owners import AdministratorLocator, WorkspacePathOwnership from databricks.labs.ucx.hive_metastore import ExternalLocations, MountsCrawler, TablesCrawler from databricks.labs.ucx.hive_metastore.catalog_schema import CatalogSchema from databricks.labs.ucx.hive_metastore.grants import ( @@ -265,6 +265,10 @@ def tables_crawler(self) -> TablesCrawler: def table_ownership(self) -> TableOwnership: return TableOwnership(self.administrator_locator) + @cached_property + def workspace_path_ownership(self) -> WorkspacePathOwnership: + return WorkspacePathOwnership(self.administrator_locator, self.workspace_client) + @cached_property def tables_migrator(self) -> TablesMigrator: return TablesMigrator( diff --git a/src/databricks/labs/ucx/framework/owners.py b/src/databricks/labs/ucx/framework/owners.py index a8ca725ed7..0350ecfd60 100644 --- a/src/databricks/labs/ucx/framework/owners.py +++ b/src/databricks/labs/ucx/framework/owners.py @@ -1,12 +1,16 @@ import logging from abc import ABC, abstractmethod from collections.abc import Callable, Iterable, Sequence +from datetime import timedelta from functools import cached_property from typing import Generic, TypeVar, final +from databricks.labs.blueprint.paths import WorkspacePath from databricks.sdk import WorkspaceClient -from databricks.sdk.errors import NotFound -from databricks.sdk.service.iam import User +from databricks.sdk.errors import NotFound, InternalError +from databricks.sdk.retries import retried +from databricks.sdk.service.iam import User, PermissionLevel +from databricks.sdk.service.workspace import ObjectType logger = logging.getLogger(__name__) @@ -190,3 +194,46 @@ def owner_of(self, record: Record) -> str: def _maybe_direct_owner(self, record: Record) -> str | None: """Obtain the record-specific user-name associated with the given record, if any.""" return None + + +class WorkspacePathOwnership(Ownership[WorkspacePath]): + def __init__(self, administrator_locator: AdministratorLocator, ws: WorkspaceClient) -> None: + super().__init__(administrator_locator) + self._ws = ws + + @retried(on=[InternalError], timeout=timedelta(minutes=1)) + def _maybe_direct_owner(self, record: WorkspacePath) -> str | None: + maybe_type_and_id = self._maybe_type_and_id(record) + if not maybe_type_and_id: + return None + object_type, object_id = maybe_type_and_id + try: + object_permissions = self._ws.permissions.get(object_type, object_id) + return self._infer_from_first_can_manage(object_permissions) + except NotFound: + logger.warning(f"removed on backend: {object_type} {object_id}") + return None + + @staticmethod + def _maybe_type_and_id(path: WorkspacePath) -> tuple[str, str] | None: + object_info = path._object_info # pylint: disable=protected-access + object_id = str(object_info.object_id) + match object_info.object_type: + case ObjectType.NOTEBOOK: + return 'notebooks', object_id + case ObjectType.FILE: + return 'files', object_id + return None + + @staticmethod + def _infer_from_first_can_manage(object_permissions): + for acl in object_permissions.access_control_list: + for permission in acl.all_permissions: + if permission.permission_level != PermissionLevel.CAN_MANAGE: + continue + if acl.user_name: + return acl.user_name + if acl.group_name: + return acl.group_name + return acl.service_principal_name + return None diff --git a/tests/integration/framework/test_owners.py b/tests/integration/framework/test_owners.py index e570378f06..756ddff362 100644 --- a/tests/integration/framework/test_owners.py +++ b/tests/integration/framework/test_owners.py @@ -2,8 +2,10 @@ from databricks.sdk import WorkspaceClient from databricks.sdk.service import iam +from databricks.sdk.service.iam import PermissionLevel from databricks.labs.ucx.contexts.workflow_task import RuntimeContext +from databricks.labs.ucx.framework.owners import AdministratorLocator, WorkspacePathOwnership def _find_admins_group_id(ws: WorkspaceClient) -> str: @@ -51,3 +53,33 @@ def test_fallback_admin_user(ws, installation_ctx: RuntimeContext) -> None: assert an_admin == the_user.user_name and the_user.active assert _user_is_member_of_group(the_user, admins_group_id) or _user_has_role(the_user, "account_admin") + + +def test_notebook_owner(make_notebook, make_notebook_permissions, make_group, ws): + notebook = make_notebook() + new_group = make_group() + make_notebook_permissions( + object_id=notebook, + permission_level=PermissionLevel.CAN_MANAGE, + group_name=new_group.display_name, + ) + + admin_locator = AdministratorLocator(ws) + notebook_ownership = WorkspacePathOwnership(admin_locator, ws) + + name = notebook_ownership.owner_of(notebook) + + my_user = ws.current_user.me() + assert name == my_user.user_name + + +def test_file_owner(make_workspace_file, ws): + ws_file = make_workspace_file() + + admin_locator = AdministratorLocator(ws) + notebook_ownership = WorkspacePathOwnership(admin_locator, ws) + + name = notebook_ownership.owner_of(ws_file) + + my_user = ws.current_user.me() + assert name == my_user.user_name