diff --git a/.github/workflows/solacc.yml b/.github/workflows/solacc.yml index 82c10bb2f2..69995672b0 100644 --- a/.github/workflows/solacc.yml +++ b/.github/workflows/solacc.yml @@ -26,3 +26,10 @@ jobs: - name: Verify linters on solution accelerators run: make solacc + + - name: Upload reports + uses: actions/upload-artifact@v4 + with: + name: report + path: build/ + if-no-files-found: error diff --git a/README.md b/README.md index f15bd6c7ca..f571af0864 100644 --- a/README.md +++ b/README.md @@ -1174,6 +1174,24 @@ The export-assessment command is used to export UCX assessment results to a spec [[back to top](#databricks-labs-ucx)] +## `export-assessment` command + +```commandline +databricks labs ucx export-assessment +``` +The export-assessment command is used to export UCX assessment results to a specified location. When you run this command, you will be prompted to provide details on the destination path and the type of report you wish to generate. If you do not specify these details, the command will default to exporting the main results to the current directory. The exported file will be named based on the selection made in the format. Eg: export_{query_choice}_results.zip +- **Choose a path to save the UCX Assessment results:** + - **Description:** Specify the path where the results should be saved. If not provided, results will be saved in the current directory. + +- **Choose which assessment results to export:** + - **Description:** Select the type of results to export. Options include: + - `azure` + - `estimates` + - `interactive` + - `main` + +[[back to top](#databricks-labs-ucx)] + # Metastore related commands These commands are used to assign a Unity Catalog metastore to a workspace. The metastore assignment is a pre-requisite diff --git a/pyproject.toml b/pyproject.toml index 7afc837663..4812d5bff9 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -48,7 +48,7 @@ dependencies = ["databricks-sdk~=0.30", "databricks-labs-lsql>=0.5,<0.13", "databricks-labs-blueprint>=0.8,<0.10", "PyYAML>=6.0.0,<7.0.0", - "sqlglot>=25.5.0,<25.23", + "sqlglot>=25.5.0,<25.25", "astroid>=3.3.1"] [project.optional-dependencies] diff --git a/src/databricks/labs/ucx/hive_metastore/table_size.py b/src/databricks/labs/ucx/hive_metastore/table_size.py index aadb7576e5..3e5c61f81c 100644 --- a/src/databricks/labs/ucx/hive_metastore/table_size.py +++ b/src/databricks/labs/ucx/hive_metastore/table_size.py @@ -22,7 +22,7 @@ class TableSize: size_in_bytes: int -class TableSizeCrawler(CrawlerBase): +class TableSizeCrawler(CrawlerBase[TableSize]): def __init__(self, backend: SqlBackend, schema, include_databases: list[str] | None = None): """ Initializes a TablesSizeCrawler instance. diff --git a/src/databricks/labs/ucx/hive_metastore/tables.py b/src/databricks/labs/ucx/hive_metastore/tables.py index 7f8ecdfca3..f935aada95 100644 --- a/src/databricks/labs/ucx/hive_metastore/tables.py +++ b/src/databricks/labs/ucx/hive_metastore/tables.py @@ -476,7 +476,7 @@ def _describe(self, catalog: str, database: str, table: str) -> Table | None: return None -class FasterTableScanCrawler(CrawlerBase): +class FasterTableScanCrawler(CrawlerBase[Table]): """ FasterTableScanCrawler is a specialized version of TablesCrawler that uses spark._jsparkSession to utilize faster scanning with Scala APIs. diff --git a/src/databricks/labs/ucx/hive_metastore/udfs.py b/src/databricks/labs/ucx/hive_metastore/udfs.py index 6bfd173449..6ee1eefd38 100644 --- a/src/databricks/labs/ucx/hive_metastore/udfs.py +++ b/src/databricks/labs/ucx/hive_metastore/udfs.py @@ -33,7 +33,7 @@ def key(self) -> str: return f"{self.catalog}.{self.database}.{self.name}".lower() -class UdfsCrawler(CrawlerBase): +class UdfsCrawler(CrawlerBase[Udf]): def __init__(self, backend: SqlBackend, schema: str, include_databases: list[str] | None = None): """ Initializes a UdfsCrawler instance. diff --git a/src/databricks/labs/ucx/installer/workflows.py b/src/databricks/labs/ucx/installer/workflows.py index ae766fb730..5248303dbc 100644 --- a/src/databricks/labs/ucx/installer/workflows.py +++ b/src/databricks/labs/ucx/installer/workflows.py @@ -112,8 +112,7 @@ f'--parent_run_id=' + dbutils.widgets.get('parent_run_id')) """ -EXPORT_TO_EXCEL_NOTEBOOK = """ -# Databricks notebook source +EXPORT_TO_EXCEL_NOTEBOOK = """# Databricks notebook source # MAGIC %md # MAGIC ##### Exporter of UCX assessment results # MAGIC ##### Instructions: @@ -165,7 +164,7 @@ # DBTITLE 1,Assessment Export FILE_NAME = "ucx_assessment_main.xlsx" -TMP_PATH = f"/Workspace{ctx.installation.install_folder()}/tmp/" +TMP_PATH = f"/Workspace{{ctx.installation.install_folder()}}/tmp/" DOWNLOAD_PATH = "/dbfs/FileStore/excel-export" @@ -194,8 +193,10 @@ def _to_excel(dataset: Dataset, writer: ...) -> None: def _render_export() -> None: '''Render an HTML link for downloading the results.''' - html_content = f''' -

Export Results

Download Results
+ html_content = ''' + +

Export Results

Download Results
+ ''' displayHTML(html_content) @@ -597,6 +598,7 @@ def create_jobs(self) -> None: self.remove_jobs(keep=desired_workflows) self._install_state.save() self._create_debug(remote_wheels) + self._create_export(remote_wheels) self._create_readme() def remove_jobs(self, *, keep: set[str] | None = None) -> None: diff --git a/src/databricks/labs/ucx/source_code/base.py b/src/databricks/labs/ucx/source_code/base.py index 4e22e74a1a..3457751688 100644 --- a/src/databricks/labs/ucx/source_code/base.py +++ b/src/databricks/labs/ucx/source_code/base.py @@ -96,7 +96,10 @@ def message_relative_to(self, base: Path, *, default: Path | None = None) -> str logger.debug(f'THIS IS A BUG! {advice.code}:{advice.message} has unknown path') if default is not None: path = default - path = path.relative_to(base) + try: + path = path.relative_to(base) + except ValueError: + logger.debug(f'Not a relative path: {path} to base: {base}') # increment start_line because it is 0-based whereas IDEs are usually 1-based return f"./{path.as_posix()}:{advice.start_line+1}:{advice.start_col}: [{advice.code}] {advice.message}" diff --git a/src/databricks/labs/ucx/source_code/notebooks/sources.py b/src/databricks/labs/ucx/source_code/notebooks/sources.py index febe224ed3..afc45a04cc 100644 --- a/src/databricks/labs/ucx/source_code/notebooks/sources.py +++ b/src/databricks/labs/ucx/source_code/notebooks/sources.py @@ -163,7 +163,13 @@ def __init__( self._python_trees: dict[PythonCell, Tree] = {} # the original trees to be linted def lint(self) -> Iterable[Advice]: - yield from self._load_tree_from_notebook(self._notebook, True) + has_failure = False + for advice in self._load_tree_from_notebook(self._notebook, True): + if isinstance(advice, Failure): # happens when a cell is unparseable + has_failure = True + yield advice + if has_failure: + return for cell in self._notebook.cells: if not self._context.is_supported(cell.language.language): continue diff --git a/tests/integration/source_code/solacc.py b/tests/integration/source_code/solacc.py index 5f61b30b40..1fe921f3fb 100644 --- a/tests/integration/source_code/solacc.py +++ b/tests/integration/source_code/solacc.py @@ -1,8 +1,11 @@ +import dataclasses +import json import logging import os import shutil import sys from dataclasses import dataclass, field +from datetime import datetime, timezone from pathlib import Path import requests @@ -20,6 +23,8 @@ this_file = Path(__file__) dist = (this_file / '../../../../dist').resolve().absolute() +build = dist.parent / "build" +build.mkdir(exist_ok=True) def _get_repos_to_clone() -> dict[str, str]: @@ -72,23 +77,41 @@ def _collect_uninferrable_count(advices: list[LocatedAdvice]): def _collect_unparseable(advices: list[LocatedAdvice]): - return set(located_advice for located_advice in advices if located_advice.advice.code == 'parse-error') + return list(located_advice for located_advice in advices if located_advice.advice.code == 'parse-error') def _print_advices(advices: list[LocatedAdvice]): - for located_advice in advices: - message = located_advice.message_relative_to(dist.parent) - sys.stdout.write(f"{message}\n") + messages = list( + located_advice.message_relative_to(dist.parent).replace('\n', ' ') + '\n' for located_advice in advices + ) + if os.getenv("CI"): + advices_path = build / "advices.txt" + with advices_path.open("a") as advices_file: + advices_file.writelines(messages) + else: + for message in messages: + sys.stdout.write(message) + + +@dataclass +class _SolaccStats: + run_id: str + name: str + start_timestamp: datetime + end_timestamp: datetime + files_count: int + files_size: int @dataclass class _SolaccContext: unparsed_files_path: Path | None = None - files_to_skip: set[str] | None = None + files_to_skip: set[Path] | None = None total_count = 0 parseable_count = 0 uninferrable_count = 0 missing_imports: dict[str, dict[str, int]] = field(default_factory=dict) + stats: list[_SolaccStats] = field(default_factory=list) @classmethod def create(cls, for_all_dirs: bool): @@ -98,11 +121,11 @@ def create(cls, for_all_dirs: bool): unparsed_path = Path(Path(__file__).parent, "solacc-unparsed.txt") if unparsed_path.exists(): os.remove(unparsed_path) - files_to_skip: set[str] | None = None + files_to_skip: set[Path] | None = None malformed = Path(__file__).parent / "solacc-malformed.txt" if for_all_dirs and malformed.exists(): lines = malformed.read_text(encoding="utf-8").split("\n") - files_to_skip = set(line for line in lines if len(line) > 0 and not line.startswith("#")) + files_to_skip = set(dist / line for line in lines if len(line) > 0 and not line.startswith("#")) return _SolaccContext(unparsed_files_path=unparsed_path, files_to_skip=files_to_skip) def register_missing_import(self, missing_import: str): @@ -153,7 +176,19 @@ def _lint_dir(solacc: _SolaccContext, soldir: Path): files_to_skip = set(solacc.files_to_skip) if solacc.files_to_skip else set() linted_files = set(files_to_skip) # lint solution + start_timestamp = datetime.now(timezone.utc) advices = list(ctx.local_code_linter.lint_path(soldir, linted_files)) + end_timestamp = datetime.now(timezone.utc) + # record stats + stats = _SolaccStats( + run_id=os.getenv("GITHUB_RUN_ATTEMPT") or "local", + start_timestamp=start_timestamp, + end_timestamp=end_timestamp, + name=soldir.name, + files_count=len(all_files), + files_size=sum(path.stat().st_size for path in [soldir / filename for filename in all_files]), + ) + solacc.stats.append(stats) # collect unparseable files unparseables = _collect_unparseable(advices) solacc.parseable_count += len(linted_files) - len(files_to_skip) - len(set(advice.path for advice in unparseables)) @@ -162,7 +197,11 @@ def _lint_dir(solacc: _SolaccContext, soldir: Path): logger.error(f"Error during parsing of {unparseable.path}: {unparseable.advice.message}".replace("\n", " ")) # populate solacc-unparsed.txt with solacc.unparsed_files_path.open(mode="a", encoding="utf-8") as f: - f.write(unparseable.path.relative_to(dist).as_posix()) + try: + path = unparseable.path.relative_to(dist) + except ValueError: + path = unparseable.path + f.write(path.as_posix()) f.write("\n") # collect missing imports for missing_import in _collect_missing_imports(advices): @@ -178,8 +217,8 @@ def _lint_dir(solacc: _SolaccContext, soldir: Path): def _lint_repos(clone_urls, sol_to_lint: str | None): solacc = _SolaccContext.create(sol_to_lint is not None) if sol_to_lint: - # don't clone if linting just one file, assumption is we're troubleshooting - _lint_dir(solacc, dist / sol_to_lint) + sol_dir = _clone_repo(clone_urls[sol_to_lint], sol_to_lint) + _lint_dir(solacc, sol_dir) else: names: list[str] = list(clone_urls.keys()) for name in sorted(names, key=str.casefold): @@ -199,6 +238,12 @@ def _lint_repos(clone_urls, sol_to_lint: str | None): f"not computed: {solacc.uninferrable_count}" ) solacc.log_missing_imports() + # log stats + stats_path = build / "stats.json" + with stats_path.open("a") as stats_file: + for stats in solacc.stats: + message = json.dumps(dataclasses.asdict(stats), default=str) + stats_file.writelines([message]) # fail the job if files are unparseable if parseable_pct < 100: sys.exit(1)