Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix failing integration tests that perform a real assessment #2736

Merged
merged 1 commit into from
Sep 24, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 4 additions & 10 deletions tests/integration/assessment/test_ext_hms.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
import dataclasses
import datetime as dt
import io

from databricks.labs.lsql.backends import CommandExecutionBackend
from databricks.sdk.service.iam import PermissionLevel
Expand All @@ -9,12 +8,11 @@
def test_running_real_assessment_job_ext_hms(
ws,
installation_ctx,
product_info,
env_or_skip,
make_cluster_policy,
make_cluster_policy_permissions,
make_notebook,
make_job,
make_dashboard,
populate_for_linting,
):
cluster_id = env_or_skip('TEST_EXT_HMS_CLUSTER_ID')
ext_hms_ctx = installation_ctx.replace(
Expand All @@ -41,14 +39,10 @@ def test_running_real_assessment_job_ext_hms(
ext_hms_ctx.__dict__['include_object_permissions'] = [f"cluster-policies:{cluster_policy.policy_id}"]
ext_hms_ctx.workspace_installation.run()

populate_for_linting(installation_ctx.installation)

# Under ideal circumstances this can take 10-16 minutes (depending on whether there are compute instances available
# via the integration pool). Allow some margin to reduce spurious failures.
notebook_path = make_notebook(content=io.BytesIO(b"import xyz"))
job = make_job(notebook_path=notebook_path)
installation_ctx.config.include_job_ids = [job.job_id]

dashboard = make_dashboard()
installation_ctx.config.include_dashboard_ids = [dashboard.id]
ext_hms_ctx.deployed_workflows.run_workflow("assessment", max_wait=dt.timedelta(minutes=25))

# assert the workflow is successful. the tasks on sql warehouse will fail so skip checking them
Expand Down
14 changes: 6 additions & 8 deletions tests/integration/assessment/test_workflows.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
import io
from datetime import timedelta

from databricks.sdk.errors import NotFound, InvalidParameterValue
Expand All @@ -8,7 +7,11 @@

@retried(on=[NotFound, InvalidParameterValue], timeout=timedelta(minutes=8))
def test_running_real_assessment_job(
ws, installation_ctx, make_cluster_policy, make_cluster_policy_permissions, make_job, make_notebook, make_dashboard
ws,
installation_ctx,
make_cluster_policy,
make_cluster_policy_permissions,
populate_for_linting,
):
ws_group, _ = installation_ctx.make_ucx_group()
cluster_policy = make_cluster_policy()
Expand All @@ -20,12 +23,7 @@ def test_running_real_assessment_job(
installation_ctx.__dict__['include_object_permissions'] = [f"cluster-policies:{cluster_policy.policy_id}"]
installation_ctx.workspace_installation.run()

notebook_path = make_notebook(content=io.BytesIO(b"import xyz"))
job = make_job(notebook_path=notebook_path)
installation_ctx.config.include_job_ids = [job.job_id]

dashboard = make_dashboard()
installation_ctx.config.include_dashboard_ids = [dashboard.id]
populate_for_linting(installation_ctx.installation)

installation_ctx.deployed_workflows.run_workflow("assessment")
assert installation_ctx.deployed_workflows.validate_step("assessment")
Expand Down
27 changes: 27 additions & 0 deletions tests/integration/conftest.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import io
import json
from collections.abc import Callable, Generator
import functools
Expand All @@ -9,11 +10,15 @@
from functools import cached_property
import shutil
import subprocess
from pathlib import Path

import pytest # pylint: disable=wrong-import-order
import yaml
from databricks.labs.blueprint.commands import CommandExecutor
from databricks.labs.blueprint.entrypoint import is_in_debug
from databricks.labs.blueprint.installation import Installation, MockInstallation
from databricks.labs.blueprint.parallel import Threads
from databricks.labs.blueprint.paths import WorkspacePath
from databricks.labs.blueprint.tui import MockPrompts
from databricks.labs.blueprint.wheels import ProductInfo
from databricks.labs.lsql.backends import SqlBackend
Expand Down Expand Up @@ -1175,3 +1180,25 @@ def _run(command: str) -> str:
except ValueError as err:
logger.debug(f"pytest_ignore_collect: error: {err}")
return False


@pytest.fixture
def populate_for_linting(ws, make_random, make_job, make_notebook, make_query, make_dashboard, watchdog_purge_suffix):
def populate_workspace(installation):
# keep linting scope to minimum to avoid test timeouts
path = Path(installation.install_folder()) / f"dummy-{make_random(4)}-{watchdog_purge_suffix}"
notebook_path = make_notebook(path=path, content=io.BytesIO(b"spark.read.parquet('dbfs://mnt/foo/bar')"))
job = make_job(notebook_path=notebook_path)
query = make_query(sql_query='SELECT * from parquet.`dbfs://mnt/foo/bar`')
dashboard = make_dashboard(query=query)
# can't use installation.load(WorkspaceConfig)/installation.save() because they populate empty credentials
config_path = WorkspacePath(ws, installation.install_folder()) / "config.yml"
text = config_path.read_text()
config = yaml.safe_load(text)
config["include_job_ids"] = [job.job_id]
config["include_dashboard_ids"] = [dashboard.id]
text = yaml.dump(config)
config_path.unlink()
config_path.write_text(text)

return populate_workspace