Skip to content

Commit

Permalink
Strip preliminary comments in pip cells (databrickslabs#2763)
Browse files Browse the repository at this point in the history
## Changes
Current implementation fails when pip command is preceded by non MAGIC
comments
This PR fixes the issue

### Linked issues
None

### Functionality
None

### Tests
- [x] added unit tests

Co-authored-by: Eric Vergnaud <eric.vergnaud@databricks.com>
  • Loading branch information
ericvergnaud and ericvergnaud authored Sep 27, 2024
1 parent d14a2cf commit 97b9996
Show file tree
Hide file tree
Showing 2 changed files with 35 additions and 2 deletions.
10 changes: 8 additions & 2 deletions src/databricks/labs/ucx/source_code/notebooks/cells.py
Original file line number Diff line number Diff line change
Expand Up @@ -484,9 +484,15 @@ def _split(cls, code: str) -> list[str]:
Sources:
https://docs.databricks.com/en/libraries/notebooks-python-libraries.html#manage-libraries-with-pip-commands
"""
# strip preliminary comments
pip_idx = code.find("pip")
if pip_idx > 0 and code[pip_idx - 1] in {'%', '!'}:
pip_idx -= 1
code = code[pip_idx:]
# look for standalone '\n'
match = cls._splitter.search(code)
if match:
code = code[: match.start()] # Remove code after non-escaped newline
# make single line
code = code.replace("\\\n", " ")
lexer = shlex.split(code, posix=True)
return list(lexer)
return shlex.split(code, posix=True)
27 changes: 27 additions & 0 deletions tests/integration/source_code/test_cells.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
from pathlib import Path

from databricks.sdk.service.workspace import Language

from databricks.labs.ucx.source_code.base import CurrentSessionState
from databricks.labs.ucx.source_code.graph import Dependency, DependencyGraph
from databricks.labs.ucx.source_code.linters.files import FileLoader
from databricks.labs.ucx.source_code.notebooks.sources import Notebook


def test_malformed_pip_cell_is_supported(simple_ctx):
source = """# Databricks notebook source
# MAGIC %md This notebook sets up the companion cluster(s) to run the solution accelerator. It also creates the Workflow to illustrate the order of execution. Happy exploring!
# COMMAND ----------
# DBTITLE 0,Install util packages
# MAGIC %pip install git+https://github.com/databricks-academy/dbacademy@v1.0.13 git+https://github.com/databricks-industry-solutions/notebook-solution-companion@safe-print-html --quiet --disable-pip-version-check
"""
notebook = Notebook.parse(Path(""), source=source, default_language=Language.PYTHON)
dependency = Dependency(FileLoader(), Path(""))
parent = DependencyGraph(
dependency, None, simple_ctx.dependency_resolver, simple_ctx.path_lookup, CurrentSessionState()
)
problems = notebook.build_dependency_graph(parent)
assert not problems

0 comments on commit 97b9996

Please sign in to comment.