diff --git a/src/databricks/labs/ucx/source_code/notebooks/cells.py b/src/databricks/labs/ucx/source_code/notebooks/cells.py index 020cd279bc..712acee54a 100644 --- a/src/databricks/labs/ucx/source_code/notebooks/cells.py +++ b/src/databricks/labs/ucx/source_code/notebooks/cells.py @@ -484,9 +484,15 @@ def _split(cls, code: str) -> list[str]: Sources: https://docs.databricks.com/en/libraries/notebooks-python-libraries.html#manage-libraries-with-pip-commands """ + # strip preliminary comments + pip_idx = code.find("pip") + if pip_idx > 0 and code[pip_idx - 1] in {'%', '!'}: + pip_idx -= 1 + code = code[pip_idx:] + # look for standalone '\n' match = cls._splitter.search(code) if match: code = code[: match.start()] # Remove code after non-escaped newline + # make single line code = code.replace("\\\n", " ") - lexer = shlex.split(code, posix=True) - return list(lexer) + return shlex.split(code, posix=True) diff --git a/tests/integration/source_code/test_cells.py b/tests/integration/source_code/test_cells.py new file mode 100644 index 0000000000..a1621ad94a --- /dev/null +++ b/tests/integration/source_code/test_cells.py @@ -0,0 +1,27 @@ +from pathlib import Path + +from databricks.sdk.service.workspace import Language + +from databricks.labs.ucx.source_code.base import CurrentSessionState +from databricks.labs.ucx.source_code.graph import Dependency, DependencyGraph +from databricks.labs.ucx.source_code.linters.files import FileLoader +from databricks.labs.ucx.source_code.notebooks.sources import Notebook + + +def test_malformed_pip_cell_is_supported(simple_ctx): + source = """# Databricks notebook source +# MAGIC %md This notebook sets up the companion cluster(s) to run the solution accelerator. It also creates the Workflow to illustrate the order of execution. Happy exploring! + +# COMMAND ---------- + +# DBTITLE 0,Install util packages +# MAGIC %pip install git+https://github.com/databricks-academy/dbacademy@v1.0.13 git+https://github.com/databricks-industry-solutions/notebook-solution-companion@safe-print-html --quiet --disable-pip-version-check + +""" + notebook = Notebook.parse(Path(""), source=source, default_language=Language.PYTHON) + dependency = Dependency(FileLoader(), Path("")) + parent = DependencyGraph( + dependency, None, simple_ctx.dependency_resolver, simple_ctx.path_lookup, CurrentSessionState() + ) + problems = notebook.build_dependency_graph(parent) + assert not problems