Skip to content

Commit

Permalink
Fix failing integration tests that perform a real assessment (databri…
Browse files Browse the repository at this point in the history
…ckslabs#2736)

## Changes
Ensure 'assessment' workflow only runs minimal assessment in integration
tests

### Linked issues
None

### Functionality
None

### Tests
- [x] changed integration tests

Co-authored-by: Eric Vergnaud <eric.vergnaud@databricks.com>
  • Loading branch information
2 people authored and jgarciaf106 committed Sep 26, 2024
1 parent 93d496e commit ee20112
Show file tree
Hide file tree
Showing 3 changed files with 37 additions and 18 deletions.
14 changes: 4 additions & 10 deletions tests/integration/assessment/test_ext_hms.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
import dataclasses
import datetime as dt
import io

from databricks.labs.lsql.backends import CommandExecutionBackend
from databricks.sdk.service.iam import PermissionLevel
Expand All @@ -9,12 +8,11 @@
def test_running_real_assessment_job_ext_hms(
ws,
installation_ctx,
product_info,
env_or_skip,
make_cluster_policy,
make_cluster_policy_permissions,
make_notebook,
make_job,
make_dashboard,
populate_for_linting,
):
cluster_id = env_or_skip('TEST_EXT_HMS_CLUSTER_ID')
ext_hms_ctx = installation_ctx.replace(
Expand All @@ -41,14 +39,10 @@ def test_running_real_assessment_job_ext_hms(
ext_hms_ctx.__dict__['include_object_permissions'] = [f"cluster-policies:{cluster_policy.policy_id}"]
ext_hms_ctx.workspace_installation.run()

populate_for_linting(installation_ctx.installation)

# Under ideal circumstances this can take 10-16 minutes (depending on whether there are compute instances available
# via the integration pool). Allow some margin to reduce spurious failures.
notebook_path = make_notebook(content=io.BytesIO(b"import xyz"))
job = make_job(notebook_path=notebook_path)
installation_ctx.config.include_job_ids = [job.job_id]

dashboard = make_dashboard()
installation_ctx.config.include_dashboard_ids = [dashboard.id]
ext_hms_ctx.deployed_workflows.run_workflow("assessment", max_wait=dt.timedelta(minutes=25))

# assert the workflow is successful. the tasks on sql warehouse will fail so skip checking them
Expand Down
14 changes: 6 additions & 8 deletions tests/integration/assessment/test_workflows.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
import io
from datetime import timedelta

from databricks.sdk.errors import NotFound, InvalidParameterValue
Expand All @@ -8,7 +7,11 @@

@retried(on=[NotFound, InvalidParameterValue], timeout=timedelta(minutes=8))
def test_running_real_assessment_job(
ws, installation_ctx, make_cluster_policy, make_cluster_policy_permissions, make_job, make_notebook, make_dashboard
ws,
installation_ctx,
make_cluster_policy,
make_cluster_policy_permissions,
populate_for_linting,
):
ws_group, _ = installation_ctx.make_ucx_group()
cluster_policy = make_cluster_policy()
Expand All @@ -20,12 +23,7 @@ def test_running_real_assessment_job(
installation_ctx.__dict__['include_object_permissions'] = [f"cluster-policies:{cluster_policy.policy_id}"]
installation_ctx.workspace_installation.run()

notebook_path = make_notebook(content=io.BytesIO(b"import xyz"))
job = make_job(notebook_path=notebook_path)
installation_ctx.config.include_job_ids = [job.job_id]

dashboard = make_dashboard()
installation_ctx.config.include_dashboard_ids = [dashboard.id]
populate_for_linting(installation_ctx.installation)

installation_ctx.deployed_workflows.run_workflow("assessment")
assert installation_ctx.deployed_workflows.validate_step("assessment")
Expand Down
27 changes: 27 additions & 0 deletions tests/integration/conftest.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import io
import json
from collections.abc import Callable, Generator
import functools
Expand All @@ -9,11 +10,15 @@
from functools import cached_property
import shutil
import subprocess
from pathlib import Path

import pytest # pylint: disable=wrong-import-order
import yaml
from databricks.labs.blueprint.commands import CommandExecutor
from databricks.labs.blueprint.entrypoint import is_in_debug
from databricks.labs.blueprint.installation import Installation, MockInstallation
from databricks.labs.blueprint.parallel import Threads
from databricks.labs.blueprint.paths import WorkspacePath
from databricks.labs.blueprint.tui import MockPrompts
from databricks.labs.blueprint.wheels import ProductInfo
from databricks.labs.lsql.backends import SqlBackend
Expand Down Expand Up @@ -1175,3 +1180,25 @@ def _run(command: str) -> str:
except ValueError as err:
logger.debug(f"pytest_ignore_collect: error: {err}")
return False


@pytest.fixture
def populate_for_linting(ws, make_random, make_job, make_notebook, make_query, make_dashboard, watchdog_purge_suffix):
def populate_workspace(installation):
# keep linting scope to minimum to avoid test timeouts
path = Path(installation.install_folder()) / f"dummy-{make_random(4)}-{watchdog_purge_suffix}"
notebook_path = make_notebook(path=path, content=io.BytesIO(b"spark.read.parquet('dbfs://mnt/foo/bar')"))
job = make_job(notebook_path=notebook_path)
query = make_query(sql_query='SELECT * from parquet.`dbfs://mnt/foo/bar`')
dashboard = make_dashboard(query=query)
# can't use installation.load(WorkspaceConfig)/installation.save() because they populate empty credentials
config_path = WorkspacePath(ws, installation.install_folder()) / "config.yml"
text = config_path.read_text()
config = yaml.safe_load(text)
config["include_job_ids"] = [job.job_id]
config["include_dashboard_ids"] = [dashboard.id]
text = yaml.dump(config)
config_path.unlink()
config_path.write_text(text)

return populate_workspace

0 comments on commit ee20112

Please sign in to comment.