Skip to content

Commit

Permalink
Removed pydantic dependency (#138)
Browse files Browse the repository at this point in the history
  • Loading branch information
nfx authored Sep 2, 2023
1 parent 999678a commit e214477
Show file tree
Hide file tree
Showing 11 changed files with 109 additions and 53 deletions.
9 changes: 4 additions & 5 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -27,13 +27,12 @@ classifiers = [
]
dependencies = [
"databricks-sdk~=0.7.0",
"typer[all]>=0.9.0,<0.10.0",
"pyhocon>=0.3.60,<0.4.0",
"pydantic>=2.0.3, <3.0.0",
"PyYAML>=6.0.0,<7.0.0",
"ratelimit>=2.2.1,<3.0.0",

# TODO: remove later
"typer[all]>=0.9.0,<0.10.0",
"pandas>=2.0.3,<3.0.0",
"python-dotenv>=1.0.0,<=2.0.0",
"ratelimit>=2.2.1,<3.0.0",
"tenacity>=8.2.2,<9.0.0",
]

Expand Down
4 changes: 2 additions & 2 deletions src/databricks/labs/ucx/cli/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,10 @@

@app.command()
def migrate_groups(config_file: Annotated[Path, typer.Argument(help="Path to config file")] = "migration_config.yml"):
from databricks.labs.ucx.cli.utils import get_migration_config
from databricks.labs.ucx.config import MigrationConfig
from databricks.labs.ucx.toolkits.group_migration import GroupMigrationToolkit

config = get_migration_config(config_file)
config = MigrationConfig.from_file(config_file)
toolkit = GroupMigrationToolkit(config)
toolkit.prepare_environment()

Expand Down
11 changes: 0 additions & 11 deletions src/databricks/labs/ucx/cli/utils.py

This file was deleted.

60 changes: 55 additions & 5 deletions src/databricks/labs/ucx/config.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from dataclasses import dataclass
from pathlib import Path

from databricks.sdk.core import Config
from pydantic import RootModel
from pydantic.dataclasses import dataclass

from databricks.labs.ucx.__about__ import __version__

Expand All @@ -17,6 +18,10 @@ def __repr__(self):
def to_spark(self):
return self.__repr__()

@classmethod
def from_dict(cls, raw: dict):
return cls(**raw)


@dataclass
class GroupsConfig:
Expand All @@ -32,11 +37,19 @@ def __post_init__(self):
msg = "No selected groups provided, but auto-collection is disabled"
raise ValueError(msg)

@classmethod
def from_dict(cls, raw: dict):
return cls(**raw)


@dataclass
class InventoryConfig:
table: InventoryTable

@classmethod
def from_dict(cls, raw: dict):
return cls(table=InventoryTable.from_dict(raw.get("table")))


@dataclass
class ConnectConfig:
Expand Down Expand Up @@ -72,6 +85,10 @@ def from_databricks_config(cfg: Config) -> "ConnectConfig":
rate_limit=cfg.rate_limit,
)

@classmethod
def from_dict(cls, raw: dict):
return cls(**raw)


@dataclass
class MigrationConfig:
Expand All @@ -83,10 +100,46 @@ class MigrationConfig:
log_level: str | None = "INFO"

def __post_init__(self):
if self.connect is None:
self.connect = ConnectConfig()
if self.with_table_acls:
msg = "Table ACLS are not yet implemented"
raise NotImplementedError(msg)

def as_dict(self) -> dict:
from dataclasses import fields, is_dataclass

def inner(x):
if is_dataclass(x):
result = []
for f in fields(x):
value = inner(getattr(x, f.name))
if not value:
continue
result.append((f.name, value))
return dict(result)
return x

return inner(self)

@classmethod
def from_dict(cls, raw: dict) -> "MigrationConfig":
return cls(
inventory=InventoryConfig.from_dict(raw.get("inventory", {})),
with_table_acls=raw.get("with_table_acls", False),
groups=GroupsConfig.from_dict(raw.get("groups", {})),
connect=ConnectConfig.from_dict(raw.get("connect", {})),
num_threads=raw.get("num_threads", 4),
log_level=raw.get("log_level", "INFO"),
)

@classmethod
def from_file(cls, config_file: Path) -> "MigrationConfig":
from yaml import safe_load

raw = safe_load(config_file.read_text())
return MigrationConfig.from_dict({} if not raw else raw)

def to_databricks_config(self) -> Config:
connect = self.connect
if connect is None:
Expand All @@ -109,6 +162,3 @@ def to_databricks_config(self) -> Config:
product="ucx",
product_version=__version__,
)

def to_json(self) -> str:
return RootModel[MigrationConfig](self).model_dump_json(indent=4)
2 changes: 1 addition & 1 deletion src/databricks/labs/ucx/inventory/inventorizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -208,7 +208,7 @@ def _prepare_permissions_inventory_item(self, scope: SecretScope) -> Permissions
object_id=scope.name,
logical_object_type=LogicalObjectType.SECRET_SCOPE,
request_object_type=None,
raw_object_permissions=json.dumps(acls_container.model_dump(mode="json")),
raw_object_permissions=json.dumps(acls_container.as_dict()),
)

def inventorize(self) -> list[PermissionsInventoryItem]:
Expand Down
1 change: 1 addition & 0 deletions src/databricks/labs/ucx/inventory/permissions.py
Original file line number Diff line number Diff line change
Expand Up @@ -184,6 +184,7 @@ def _scope_permissions_applicator(self, request_payload: SecretsPermissionReques
scope=request_payload.object_id, principal=_acl_item.principal, permission=_acl_item.permission
)
logger.debug(f"Applied new permissions for scope {request_payload.object_id}: {_acl_item}")
# TODO: add mixin to SDK
# in-flight check for the applied permissions
# the api might be inconsistent, therefore we need to check that the permissions were applied
for _ in range(3):
Expand Down
2 changes: 1 addition & 1 deletion src/databricks/labs/ucx/inventory/table.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ def cleanup(self):
def save(self, items: list[PermissionsInventoryItem]):
# TODO: update instead of append
logger.info(f"Saving {len(items)} items to inventory table {self.config.table}")
serialized_items = pd.DataFrame([item.model_dump(mode="json") for item in items])
serialized_items = pd.DataFrame([item.as_dict() for item in items])
df = self.spark.createDataFrame(serialized_items, schema=self._table_schema)
df.write.mode("append").format("delta").saveAsTable(self.config.table.to_spark())
logger.info("Successfully saved the items to inventory table")
Expand Down
58 changes: 38 additions & 20 deletions src/databricks/labs/ucx/inventory/types.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,10 @@
import json
from dataclasses import asdict, dataclass

import pandas as pd
from databricks.sdk.service.iam import ObjectPermissions
from databricks.sdk.service.workspace import AclItem as SdkAclItem
from databricks.sdk.service.workspace import AclPermission as SdkAclPermission
from pydantic import BaseModel
from pydantic.tools import parse_obj_as

from databricks.labs.ucx.generic import StrEnum

Expand All @@ -30,16 +29,6 @@ def __repr__(self):
return self.value


class SqlRequestObjectType(StrEnum):
ALERTS = "alerts"
DASHBOARDS = "dashboards"
DATA_SOURCES = "data-sources"
QUERIES = "queries"

def __repr__(self):
return self.value


class LogicalObjectType(StrEnum):
ENTITLEMENTS = "ENTITLEMENTS"
ROLES = "ROLES"
Expand Down Expand Up @@ -69,12 +58,18 @@ class AclPermission(StrEnum):
MANAGE = "MANAGE"


class AclItem(BaseModel):
@dataclass
class AclItem:
principal: str
permission: AclPermission

@classmethod
def from_dict(cls, raw: dict):
return cls(principal=raw.get("principal", None), permission=AclPermission(raw.get("permission")))


class AclItemsContainer(BaseModel):
@dataclass
class AclItemsContainer:
acls: list[AclItem]

@staticmethod
Expand All @@ -89,16 +84,25 @@ def to_sdk(self) -> list[SdkAclItem]:
SdkAclItem(principal=acl.principal, permission=SdkAclPermission(acl.permission.value)) for acl in self.acls
]

@classmethod
def from_dict(cls, raw: dict) -> "AclItemsContainer":
return cls(acls=[AclItem.from_dict(a) for a in raw.get("acls", [])])

def as_dict(self) -> dict:
return asdict(self)


class RolesAndEntitlements(BaseModel):
@dataclass
class RolesAndEntitlements:
roles: list
entitlements: list


class PermissionsInventoryItem(BaseModel):
@dataclass
class PermissionsInventoryItem:
object_id: str
logical_object_type: LogicalObjectType
request_object_type: RequestObjectType | SqlRequestObjectType | None
request_object_type: RequestObjectType | None
raw_object_permissions: str

@property
Expand All @@ -108,13 +112,27 @@ def object_permissions(self) -> dict:
@property
def typed_object_permissions(self) -> ObjectPermissions | AclItemsContainer | RolesAndEntitlements:
if self.logical_object_type == LogicalObjectType.SECRET_SCOPE:
return parse_obj_as(AclItemsContainer, self.object_permissions)
return AclItemsContainer.from_dict(self.object_permissions)
elif self.logical_object_type in [LogicalObjectType.ROLES, LogicalObjectType.ENTITLEMENTS]:
return parse_obj_as(RolesAndEntitlements, self.object_permissions)
return RolesAndEntitlements(**self.object_permissions)
else:
return ObjectPermissions.from_dict(self.object_permissions)

@staticmethod
def from_pandas(source: pd.DataFrame) -> list["PermissionsInventoryItem"]:
items = source.to_dict(orient="records")
return [PermissionsInventoryItem(**item) for item in items]
return [PermissionsInventoryItem.from_dict(item) for item in items]

def as_dict(self) -> dict:
return asdict(self)

@classmethod
def from_dict(cls, raw: dict) -> "PermissionsInventoryItem":
return cls(
object_id=raw["object_id"],
logical_object_type=LogicalObjectType(raw["logical_object_type"]),
request_object_type=RequestObjectType(raw["request_object_type"])
if raw.get("request_object_type", None) is not None
else None,
raw_object_permissions=raw.get("raw_object_permissions", None),
)
1 change: 0 additions & 1 deletion tests/integration/test_e2e.py
Original file line number Diff line number Diff line change
Expand Up @@ -176,7 +176,6 @@ def test_e2e(
with_table_acls=False,
inventory=InventoryConfig(table=inventory_table),
groups=GroupsConfig(selected=[g[0].display_name for g in env.groups]),
auth=None,
log_level="DEBUG",
)
toolkit = GroupMigrationToolkit(config)
Expand Down
7 changes: 3 additions & 4 deletions tests/unit/test_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,7 @@

import pytest
import yaml
from pydantic import RootModel

from databricks.labs.ucx.cli.utils import get_migration_config
from databricks.labs.ucx.config import (
GroupsConfig,
InventoryConfig,
Expand Down Expand Up @@ -61,8 +59,9 @@ def test_reader(tmp_path: Path):
config: MigrationConfig = mc(with_table_acls=False)
config_file = tmp_path / "config.yml"

as_dict = config.as_dict()
with config_file.open("w") as writable:
yaml.safe_dump(RootModel[MigrationConfig](config).model_dump(), writable)
yaml.safe_dump(as_dict, writable)

loaded = get_migration_config(config_file)
loaded = MigrationConfig.from_file(config_file)
assert loaded == config
7 changes: 4 additions & 3 deletions tests/unit/test_serde.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import dataclasses

from databricks.sdk.service.workspace import AclItem as SdkAclItem
from databricks.sdk.service.workspace import AclPermission as SdkAclPermission
from pydantic.tools import parse_obj_as

from databricks.labs.ucx.inventory.types import AclItemsContainer

Expand All @@ -17,7 +18,7 @@ def test_acl_items_container_serde():

assert after == sdk_items

_dump = container.model_dump(mode="json")
_str = parse_obj_as(AclItemsContainer, _dump)
_dump = dataclasses.asdict(container)
_str = AclItemsContainer.from_dict(_dump)

assert _str == container

0 comments on commit e214477

Please sign in to comment.