Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added WorkspacePathOwnership to determine transitive owners for files and notebooks #3047

Merged
merged 1 commit into from
Oct 23, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion src/databricks/labs/ucx/contexts/application.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@
from databricks.labs.ucx.assessment.export import AssessmentExporter
from databricks.labs.ucx.aws.credentials import CredentialManager
from databricks.labs.ucx.config import WorkspaceConfig
from databricks.labs.ucx.framework.owners import AdministratorLocator
from databricks.labs.ucx.framework.owners import AdministratorLocator, WorkspacePathOwnership
from databricks.labs.ucx.hive_metastore import ExternalLocations, MountsCrawler, TablesCrawler
from databricks.labs.ucx.hive_metastore.catalog_schema import CatalogSchema
from databricks.labs.ucx.hive_metastore.grants import (
Expand Down Expand Up @@ -265,6 +265,10 @@ def tables_crawler(self) -> TablesCrawler:
def table_ownership(self) -> TableOwnership:
return TableOwnership(self.administrator_locator)

@cached_property
def workspace_path_ownership(self) -> WorkspacePathOwnership:
return WorkspacePathOwnership(self.administrator_locator, self.workspace_client)

@cached_property
def tables_migrator(self) -> TablesMigrator:
return TablesMigrator(
Expand Down
51 changes: 49 additions & 2 deletions src/databricks/labs/ucx/framework/owners.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,16 @@
import logging
from abc import ABC, abstractmethod
from collections.abc import Callable, Iterable, Sequence
from datetime import timedelta
from functools import cached_property
from typing import Generic, TypeVar, final

from databricks.labs.blueprint.paths import WorkspacePath
from databricks.sdk import WorkspaceClient
from databricks.sdk.errors import NotFound
from databricks.sdk.service.iam import User
from databricks.sdk.errors import NotFound, InternalError
from databricks.sdk.retries import retried
from databricks.sdk.service.iam import User, PermissionLevel
from databricks.sdk.service.workspace import ObjectType

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -190,3 +194,46 @@ def owner_of(self, record: Record) -> str:
def _maybe_direct_owner(self, record: Record) -> str | None:
"""Obtain the record-specific user-name associated with the given record, if any."""
return None


class WorkspacePathOwnership(Ownership[WorkspacePath]):
def __init__(self, administrator_locator: AdministratorLocator, ws: WorkspaceClient) -> None:
super().__init__(administrator_locator)
self._ws = ws

@retried(on=[InternalError], timeout=timedelta(minutes=1))
def _maybe_direct_owner(self, record: WorkspacePath) -> str | None:
maybe_type_and_id = self._maybe_type_and_id(record)
if not maybe_type_and_id:
return None
object_type, object_id = maybe_type_and_id
try:
object_permissions = self._ws.permissions.get(object_type, object_id)
return self._infer_from_first_can_manage(object_permissions)
except NotFound:
logger.warning(f"removed on backend: {object_type} {object_id}")
return None

@staticmethod
def _maybe_type_and_id(path: WorkspacePath) -> tuple[str, str] | None:
object_info = path._object_info # pylint: disable=protected-access
object_id = str(object_info.object_id)
match object_info.object_type:
case ObjectType.NOTEBOOK:
return 'notebooks', object_id
case ObjectType.FILE:
return 'files', object_id
return None

@staticmethod
def _infer_from_first_can_manage(object_permissions):
for acl in object_permissions.access_control_list:
for permission in acl.all_permissions:
if permission.permission_level != PermissionLevel.CAN_MANAGE:
continue
if acl.user_name:
return acl.user_name
if acl.group_name:
return acl.group_name
return acl.service_principal_name
return None
32 changes: 32 additions & 0 deletions tests/integration/framework/test_owners.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,10 @@

from databricks.sdk import WorkspaceClient
from databricks.sdk.service import iam
from databricks.sdk.service.iam import PermissionLevel

from databricks.labs.ucx.contexts.workflow_task import RuntimeContext
from databricks.labs.ucx.framework.owners import AdministratorLocator, WorkspacePathOwnership


def _find_admins_group_id(ws: WorkspaceClient) -> str:
Expand Down Expand Up @@ -51,3 +53,33 @@ def test_fallback_admin_user(ws, installation_ctx: RuntimeContext) -> None:

assert an_admin == the_user.user_name and the_user.active
assert _user_is_member_of_group(the_user, admins_group_id) or _user_has_role(the_user, "account_admin")


def test_notebook_owner(make_notebook, make_notebook_permissions, make_group, ws):
notebook = make_notebook()
new_group = make_group()
make_notebook_permissions(
object_id=notebook,
permission_level=PermissionLevel.CAN_MANAGE,
group_name=new_group.display_name,
)

admin_locator = AdministratorLocator(ws)
notebook_ownership = WorkspacePathOwnership(admin_locator, ws)

name = notebook_ownership.owner_of(notebook)

my_user = ws.current_user.me()
assert name == my_user.user_name


def test_file_owner(make_workspace_file, ws):
ws_file = make_workspace_file()

admin_locator = AdministratorLocator(ws)
notebook_ownership = WorkspacePathOwnership(admin_locator, ws)

name = notebook_ownership.owner_of(ws_file)

my_user = ws.current_user.me()
assert name == my_user.user_name
Loading