Skip to content

Commit

Permalink
Merge pull request #21 from jgarciaf106/feat/add-cli-export-assessmen…
Browse files Browse the repository at this point in the history
…t-reviewed

Feat/add cli export assessment reviewed
  • Loading branch information
rportilla-databricks authored Oct 2, 2024
2 parents 4d2e9a6 + 77786a2 commit 84830fd
Show file tree
Hide file tree
Showing 10 changed files with 102 additions and 21 deletions.
7 changes: 7 additions & 0 deletions .github/workflows/solacc.yml
Original file line number Diff line number Diff line change
Expand Up @@ -26,3 +26,10 @@ jobs:

- name: Verify linters on solution accelerators
run: make solacc

- name: Upload reports
uses: actions/upload-artifact@v4
with:
name: report
path: build/
if-no-files-found: error
18 changes: 18 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -1174,6 +1174,24 @@ The export-assessment command is used to export UCX assessment results to a spec

[[back to top](#databricks-labs-ucx)]

## `export-assessment` command

```commandline
databricks labs ucx export-assessment
```
The export-assessment command is used to export UCX assessment results to a specified location. When you run this command, you will be prompted to provide details on the destination path and the type of report you wish to generate. If you do not specify these details, the command will default to exporting the main results to the current directory. The exported file will be named based on the selection made in the format. Eg: export_{query_choice}_results.zip
- **Choose a path to save the UCX Assessment results:**
- **Description:** Specify the path where the results should be saved. If not provided, results will be saved in the current directory.

- **Choose which assessment results to export:**
- **Description:** Select the type of results to export. Options include:
- `azure`
- `estimates`
- `interactive`
- `main`

[[back to top](#databricks-labs-ucx)]

# Metastore related commands

These commands are used to assign a Unity Catalog metastore to a workspace. The metastore assignment is a pre-requisite
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ dependencies = ["databricks-sdk~=0.30",
"databricks-labs-lsql>=0.5,<0.13",
"databricks-labs-blueprint>=0.8,<0.10",
"PyYAML>=6.0.0,<7.0.0",
"sqlglot>=25.5.0,<25.23",
"sqlglot>=25.5.0,<25.25",
"astroid>=3.3.1"]

[project.optional-dependencies]
Expand Down
2 changes: 1 addition & 1 deletion src/databricks/labs/ucx/hive_metastore/table_size.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ class TableSize:
size_in_bytes: int


class TableSizeCrawler(CrawlerBase):
class TableSizeCrawler(CrawlerBase[TableSize]):
def __init__(self, backend: SqlBackend, schema, include_databases: list[str] | None = None):
"""
Initializes a TablesSizeCrawler instance.
Expand Down
2 changes: 1 addition & 1 deletion src/databricks/labs/ucx/hive_metastore/tables.py
Original file line number Diff line number Diff line change
Expand Up @@ -476,7 +476,7 @@ def _describe(self, catalog: str, database: str, table: str) -> Table | None:
return None


class FasterTableScanCrawler(CrawlerBase):
class FasterTableScanCrawler(CrawlerBase[Table]):
"""
FasterTableScanCrawler is a specialized version of TablesCrawler that uses spark._jsparkSession to utilize
faster scanning with Scala APIs.
Expand Down
2 changes: 1 addition & 1 deletion src/databricks/labs/ucx/hive_metastore/udfs.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ def key(self) -> str:
return f"{self.catalog}.{self.database}.{self.name}".lower()


class UdfsCrawler(CrawlerBase):
class UdfsCrawler(CrawlerBase[Udf]):
def __init__(self, backend: SqlBackend, schema: str, include_databases: list[str] | None = None):
"""
Initializes a UdfsCrawler instance.
Expand Down
12 changes: 7 additions & 5 deletions src/databricks/labs/ucx/installer/workflows.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,8 +112,7 @@
f'--parent_run_id=' + dbutils.widgets.get('parent_run_id'))
"""

EXPORT_TO_EXCEL_NOTEBOOK = """
# Databricks notebook source
EXPORT_TO_EXCEL_NOTEBOOK = """# Databricks notebook source
# MAGIC %md
# MAGIC ##### Exporter of UCX assessment results
# MAGIC ##### Instructions:
Expand Down Expand Up @@ -165,7 +164,7 @@
# DBTITLE 1,Assessment Export
FILE_NAME = "ucx_assessment_main.xlsx"
TMP_PATH = f"/Workspace{ctx.installation.install_folder()}/tmp/"
TMP_PATH = f"/Workspace{{ctx.installation.install_folder()}}/tmp/"
DOWNLOAD_PATH = "/dbfs/FileStore/excel-export"
Expand Down Expand Up @@ -194,8 +193,10 @@ def _to_excel(dataset: Dataset, writer: ...) -> None:
def _render_export() -> None:
'''Render an HTML link for downloading the results.'''
html_content = f'''
<style>@font-face{{font-family:'DM Sans';src:url(https://cdn.bfldr.com/9AYANS2F/at/p9qfs3vgsvnp5c7txz583vgs/dm-sans-regular.ttf?auto=webp&format=ttf) format('truetype');font-weight:400;font-style:normal}}body{{font-family:'DM Sans',Arial,sans-serif}}.export-container{{text-align:center;margin-top:20px}}.export-container h2{{color:#1B3139;font-size:24px;margin-bottom:20px}}.export-container a{{display:inline-block;padding:12px 25px;background-color:#1B3139;color:#fff;text-decoration:none;border-radius:4px;font-size:18px;font-weight:500;transition:background-color 0.3s ease,transform 0.3s ease}}.export-container a:hover{{background-color:#FF3621;transform:translateY(-2px)}}</style><div class="export-container"><h2>Export Results</h2><a href='{workspace_host}files/excel-export/ucx_assessment_main.xlsx?o={workspace_id}' target='_blank' download>Download Results</a></div>
html_content = '''
<style>@font-face{{font-family:'DM Sans';src:url(https://cdn.bfldr.com/9AYANS2F/at/p9qfs3vgsvnp5c7txz583vgs/dm-sans-regular.ttf?auto=webp&format=ttf) format('truetype');font-weight:400;font-style:normal}}body{{font-family:'DM Sans',Arial,sans-serif}}.export-container{{text-align:center;margin-top:20px}}.export-container h2{{color:#1B3139;font-size:24px;margin-bottom:20px}}.export-container a{{display:inline-block;padding:12px 25px;background-color:#1B3139;color:#fff;text-decoration:none;border-radius:4px;font-size:18px;font-weight:500;transition:background-color 0.3s ease,transform:translateY(-2px) ease}}.export-container a:hover{{background-color:#FF3621;transform:translateY(-2px)}}</style>
<div class="export-container"><h2>Export Results</h2><a href='{workspace_host}/files/excel-export/ucx_assessment_main.xlsx?o={workspace_id}' target='_blank' download>Download Results</a></div>
'''
displayHTML(html_content)
Expand Down Expand Up @@ -597,6 +598,7 @@ def create_jobs(self) -> None:
self.remove_jobs(keep=desired_workflows)
self._install_state.save()
self._create_debug(remote_wheels)
self._create_export(remote_wheels)
self._create_readme()

def remove_jobs(self, *, keep: set[str] | None = None) -> None:
Expand Down
5 changes: 4 additions & 1 deletion src/databricks/labs/ucx/source_code/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,10 @@ def message_relative_to(self, base: Path, *, default: Path | None = None) -> str
logger.debug(f'THIS IS A BUG! {advice.code}:{advice.message} has unknown path')
if default is not None:
path = default
path = path.relative_to(base)
try:
path = path.relative_to(base)
except ValueError:
logger.debug(f'Not a relative path: {path} to base: {base}')
# increment start_line because it is 0-based whereas IDEs are usually 1-based
return f"./{path.as_posix()}:{advice.start_line+1}:{advice.start_col}: [{advice.code}] {advice.message}"

Expand Down
8 changes: 7 additions & 1 deletion src/databricks/labs/ucx/source_code/notebooks/sources.py
Original file line number Diff line number Diff line change
Expand Up @@ -163,7 +163,13 @@ def __init__(
self._python_trees: dict[PythonCell, Tree] = {} # the original trees to be linted

def lint(self) -> Iterable[Advice]:
yield from self._load_tree_from_notebook(self._notebook, True)
has_failure = False
for advice in self._load_tree_from_notebook(self._notebook, True):
if isinstance(advice, Failure): # happens when a cell is unparseable
has_failure = True
yield advice
if has_failure:
return
for cell in self._notebook.cells:
if not self._context.is_supported(cell.language.language):
continue
Expand Down
65 changes: 55 additions & 10 deletions tests/integration/source_code/solacc.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,11 @@
import dataclasses
import json
import logging
import os
import shutil
import sys
from dataclasses import dataclass, field
from datetime import datetime, timezone
from pathlib import Path

import requests
Expand All @@ -20,6 +23,8 @@

this_file = Path(__file__)
dist = (this_file / '../../../../dist').resolve().absolute()
build = dist.parent / "build"
build.mkdir(exist_ok=True)


def _get_repos_to_clone() -> dict[str, str]:
Expand Down Expand Up @@ -72,23 +77,41 @@ def _collect_uninferrable_count(advices: list[LocatedAdvice]):


def _collect_unparseable(advices: list[LocatedAdvice]):
return set(located_advice for located_advice in advices if located_advice.advice.code == 'parse-error')
return list(located_advice for located_advice in advices if located_advice.advice.code == 'parse-error')


def _print_advices(advices: list[LocatedAdvice]):
for located_advice in advices:
message = located_advice.message_relative_to(dist.parent)
sys.stdout.write(f"{message}\n")
messages = list(
located_advice.message_relative_to(dist.parent).replace('\n', ' ') + '\n' for located_advice in advices
)
if os.getenv("CI"):
advices_path = build / "advices.txt"
with advices_path.open("a") as advices_file:
advices_file.writelines(messages)
else:
for message in messages:
sys.stdout.write(message)


@dataclass
class _SolaccStats:
run_id: str
name: str
start_timestamp: datetime
end_timestamp: datetime
files_count: int
files_size: int


@dataclass
class _SolaccContext:
unparsed_files_path: Path | None = None
files_to_skip: set[str] | None = None
files_to_skip: set[Path] | None = None
total_count = 0
parseable_count = 0
uninferrable_count = 0
missing_imports: dict[str, dict[str, int]] = field(default_factory=dict)
stats: list[_SolaccStats] = field(default_factory=list)

@classmethod
def create(cls, for_all_dirs: bool):
Expand All @@ -98,11 +121,11 @@ def create(cls, for_all_dirs: bool):
unparsed_path = Path(Path(__file__).parent, "solacc-unparsed.txt")
if unparsed_path.exists():
os.remove(unparsed_path)
files_to_skip: set[str] | None = None
files_to_skip: set[Path] | None = None
malformed = Path(__file__).parent / "solacc-malformed.txt"
if for_all_dirs and malformed.exists():
lines = malformed.read_text(encoding="utf-8").split("\n")
files_to_skip = set(line for line in lines if len(line) > 0 and not line.startswith("#"))
files_to_skip = set(dist / line for line in lines if len(line) > 0 and not line.startswith("#"))
return _SolaccContext(unparsed_files_path=unparsed_path, files_to_skip=files_to_skip)

def register_missing_import(self, missing_import: str):
Expand Down Expand Up @@ -153,7 +176,19 @@ def _lint_dir(solacc: _SolaccContext, soldir: Path):
files_to_skip = set(solacc.files_to_skip) if solacc.files_to_skip else set()
linted_files = set(files_to_skip)
# lint solution
start_timestamp = datetime.now(timezone.utc)
advices = list(ctx.local_code_linter.lint_path(soldir, linted_files))
end_timestamp = datetime.now(timezone.utc)
# record stats
stats = _SolaccStats(
run_id=os.getenv("GITHUB_RUN_ATTEMPT") or "local",
start_timestamp=start_timestamp,
end_timestamp=end_timestamp,
name=soldir.name,
files_count=len(all_files),
files_size=sum(path.stat().st_size for path in [soldir / filename for filename in all_files]),
)
solacc.stats.append(stats)
# collect unparseable files
unparseables = _collect_unparseable(advices)
solacc.parseable_count += len(linted_files) - len(files_to_skip) - len(set(advice.path for advice in unparseables))
Expand All @@ -162,7 +197,11 @@ def _lint_dir(solacc: _SolaccContext, soldir: Path):
logger.error(f"Error during parsing of {unparseable.path}: {unparseable.advice.message}".replace("\n", " "))
# populate solacc-unparsed.txt
with solacc.unparsed_files_path.open(mode="a", encoding="utf-8") as f:
f.write(unparseable.path.relative_to(dist).as_posix())
try:
path = unparseable.path.relative_to(dist)
except ValueError:
path = unparseable.path
f.write(path.as_posix())
f.write("\n")
# collect missing imports
for missing_import in _collect_missing_imports(advices):
Expand All @@ -178,8 +217,8 @@ def _lint_dir(solacc: _SolaccContext, soldir: Path):
def _lint_repos(clone_urls, sol_to_lint: str | None):
solacc = _SolaccContext.create(sol_to_lint is not None)
if sol_to_lint:
# don't clone if linting just one file, assumption is we're troubleshooting
_lint_dir(solacc, dist / sol_to_lint)
sol_dir = _clone_repo(clone_urls[sol_to_lint], sol_to_lint)
_lint_dir(solacc, sol_dir)
else:
names: list[str] = list(clone_urls.keys())
for name in sorted(names, key=str.casefold):
Expand All @@ -199,6 +238,12 @@ def _lint_repos(clone_urls, sol_to_lint: str | None):
f"not computed: {solacc.uninferrable_count}"
)
solacc.log_missing_imports()
# log stats
stats_path = build / "stats.json"
with stats_path.open("a") as stats_file:
for stats in solacc.stats:
message = json.dumps(dataclasses.asdict(stats), default=str)
stats_file.writelines([message])
# fail the job if files are unparseable
if parseable_pct < 100:
sys.exit(1)
Expand Down

0 comments on commit 84830fd

Please sign in to comment.