Skip to content

Commit

Permalink
refactor(powerbi): dashboards, reports and independent_datasets acces…
Browse files Browse the repository at this point in the history
…sed by id (#12431)
  • Loading branch information
sgomezvillamor authored Jan 23, 2025
1 parent 5309ae0 commit 3471857
Show file tree
Hide file tree
Showing 4 changed files with 47 additions and 40 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -1322,14 +1322,14 @@ def extract_independent_datasets(
context=",".join(
[
dataset.name
for dataset in workspace.independent_datasets
for dataset in workspace.independent_datasets.values()
if dataset.name
]
),
)
return

for dataset in workspace.independent_datasets:
for dataset in workspace.independent_datasets.values():
yield from auto_workunit(
stream=self.mapper.to_datahub_dataset(
dataset=dataset,
Expand Down Expand Up @@ -1440,7 +1440,7 @@ def get_workspace_workunit(

yield from auto_workunit(self.emit_app(workspace=workspace))

for dashboard in workspace.dashboards:
for dashboard in workspace.dashboards.values():
try:
# Fetch PowerBi users for dashboards
dashboard.users = self.powerbi_client.get_dashboard_users(dashboard)
Expand All @@ -1459,7 +1459,7 @@ def get_workspace_workunit(
if wu is not None:
yield wu

for report in workspace.reports:
for report in workspace.reports.values():
for work_unit in self.mapper.report_to_datahub_work_units(
report, workspace
):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -71,13 +71,13 @@ class Workspace:
id: str
name: str
type: str # This is used as a subtype of the Container entity.
dashboards: List["Dashboard"]
reports: List["Report"]
datasets: Dict[str, "PowerBIDataset"]
report_endorsements: Dict[str, List[str]]
dashboard_endorsements: Dict[str, List[str]]
dashboards: Dict[str, "Dashboard"] # key = dashboard id
reports: Dict[str, "Report"] # key = report id
datasets: Dict[str, "PowerBIDataset"] # key = dataset id
report_endorsements: Dict[str, List[str]] # key = report id
dashboard_endorsements: Dict[str, List[str]] # key = dashboard id
scan_result: dict
independent_datasets: List["PowerBIDataset"]
independent_datasets: Dict[str, "PowerBIDataset"] # key = dataset id
app: Optional["App"]

def get_urn_part(self, workspace_id_as_urn_part: Optional[bool] = False) -> str:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -193,15 +193,18 @@ def get_dashboard_users(self, dashboard: Dashboard) -> List[User]:
def get_report_users(self, workspace_id: str, report_id: str) -> List[User]:
return self._get_entity_users(workspace_id, Constant.REPORTS, report_id)

def get_reports(self, workspace: Workspace) -> List[Report]:
def get_reports(self, workspace: Workspace) -> Dict[str, Report]:
"""
Fetch the report from PowerBi for the given Workspace
"""
reports: List[Report] = []
reports: Dict[str, Report] = {}
try:
reports = self._get_resolver().get_reports(workspace)
reports = {
report.id: report
for report in self._get_resolver().get_reports(workspace)
}
# Fill Report dataset
for report in reports:
for report in reports.values():
if report.dataset_id:
report.dataset = self.dataset_registry.get(report.dataset_id)
if report.dataset is None:
Expand All @@ -222,7 +225,7 @@ def fill_ownership() -> None:
)
return

for report in reports:
for report in reports.values():
report.users = self.get_report_users(
workspace_id=workspace.id, report_id=report.id
)
Expand All @@ -234,7 +237,7 @@ def fill_tags() -> None:
)
return

for report in reports:
for report in reports.values():
report.tags = workspace.report_endorsements.get(report.id, [])

fill_ownership()
Expand Down Expand Up @@ -270,12 +273,12 @@ def get_workspaces(self) -> List[Workspace]:
name=workspace[Constant.NAME],
type=workspace[Constant.TYPE],
datasets={},
dashboards=[],
reports=[],
dashboards={},
reports={},
report_endorsements={},
dashboard_endorsements={},
scan_result={},
independent_datasets=[],
independent_datasets={},
app=None, # It will be populated in _fill_metadata_from_scan_result method
)
for workspace in groups
Expand Down Expand Up @@ -561,12 +564,12 @@ def _fill_metadata_from_scan_result(
name=workspace_metadata[Constant.NAME],
type=workspace_metadata[Constant.TYPE],
datasets={},
dashboards=[],
reports=[],
dashboards={},
reports={},
report_endorsements={},
dashboard_endorsements={},
scan_result={},
independent_datasets=[],
independent_datasets={},
app=None, # It is getting set from scan-result
)
cur_workspace.scan_result = workspace_metadata
Expand Down Expand Up @@ -597,25 +600,28 @@ def _fill_metadata_from_scan_result(
def _fill_independent_datasets(self, workspace: Workspace) -> None:
reachable_datasets: List[str] = []
# Find out reachable datasets
for dashboard in workspace.dashboards:
for dashboard in workspace.dashboards.values():
for tile in dashboard.tiles:
if tile.dataset is not None:
reachable_datasets.append(tile.dataset.id)

for report in workspace.reports:
for report in workspace.reports.values():
if report.dataset is not None:
reachable_datasets.append(report.dataset.id)

# Set datasets not present in reachable_datasets
for dataset in workspace.datasets.values():
if dataset.id not in reachable_datasets:
workspace.independent_datasets.append(dataset)
workspace.independent_datasets[dataset.id] = dataset

def _fill_regular_metadata_detail(self, workspace: Workspace) -> None:
def fill_dashboards() -> None:
workspace.dashboards = self._get_resolver().get_dashboards(workspace)
workspace.dashboards = {
dashboard.id: dashboard
for dashboard in self._get_resolver().get_dashboards(workspace)
}
# set tiles of Dashboard
for dashboard in workspace.dashboards:
for dashboard in workspace.dashboards.values():
dashboard.tiles = self._get_resolver().get_tiles(
workspace, dashboard=dashboard
)
Expand Down Expand Up @@ -644,7 +650,7 @@ def fill_dashboard_tags() -> None:
"Skipping tag retrieval for dashboard as extract_endorsements_to_tags is set to false"
)
return
for dashboard in workspace.dashboards:
for dashboard in workspace.dashboards.values():
dashboard.tags = workspace.dashboard_endorsements.get(dashboard.id, [])

if self.__config.extract_dashboards:
Expand Down
25 changes: 13 additions & 12 deletions metadata-ingestion/tests/integration/powerbi/test_powerbi.py
Original file line number Diff line number Diff line change
Expand Up @@ -978,16 +978,16 @@ def validate_pipeline(pipeline: Pipeline) -> None:
name="demo-workspace",
type="Workspace",
datasets={},
dashboards=[],
reports=[],
dashboards={},
reports={},
report_endorsements={},
dashboard_endorsements={},
scan_result={},
independent_datasets=[],
independent_datasets={},
app=None,
)
# Fetch actual reports
reports: List[Report] = cast(
reports: Dict[str, Report] = cast(
PowerBiDashboardSource, pipeline.source
).powerbi_client.get_reports(workspace=mock_workspace)

Expand Down Expand Up @@ -1020,8 +1020,8 @@ def validate_pipeline(pipeline: Pipeline) -> None:
"pages": [],
},
]
expected_reports: List[Report] = [
Report(
expected_reports: Dict[str, Report] = {
report[Constant.ID]: Report(
id=report[Constant.ID],
name=report[Constant.NAME],
type=ReportType.PowerBIReport,
Expand All @@ -1045,14 +1045,15 @@ def validate_pipeline(pipeline: Pipeline) -> None:
dataset=mock_workspace.datasets.get(report[Constant.DATASET_ID]),
)
for report in mock_reports
]
}
# Compare actual and expected reports
for i in range(2):
assert reports[i].id == expected_reports[i].id
assert reports[i].name == expected_reports[i].name
assert reports[i].description == expected_reports[i].description
assert reports[i].dataset == expected_reports[i].dataset
assert reports[i].pages == expected_reports[i].pages
report_id = mock_reports[i][Constant.ID]
assert reports[report_id].id == expected_reports[report_id].id
assert reports[report_id].name == expected_reports[report_id].name
assert reports[report_id].description == expected_reports[report_id].description
assert reports[report_id].dataset == expected_reports[report_id].dataset
assert reports[report_id].pages == expected_reports[report_id].pages


@freeze_time(FROZEN_TIME)
Expand Down

0 comments on commit 3471857

Please sign in to comment.