From 6e671a8db8854b5fbc3f461e3978a1fe5c5dfd76 Mon Sep 17 00:00:00 2001 From: Tudor Date: Thu, 16 Jan 2025 12:16:39 +0200 Subject: [PATCH 1/2] ADAP-1166: Add table format telemetry reporting to Spark adapter (#517) --- dbt/adapters/spark/impl.py | 20 +++++++++++ tests/unit/test_adapter_telemetry.py | 51 ++++++++++++++++++++++++++++ 2 files changed, 71 insertions(+) create mode 100644 tests/unit/test_adapter_telemetry.py diff --git a/dbt/adapters/spark/impl.py b/dbt/adapters/spark/impl.py index d33ebde20..5f8178a9d 100644 --- a/dbt/adapters/spark/impl.py +++ b/dbt/adapters/spark/impl.py @@ -516,6 +516,26 @@ def debug_query(self) -> None: """Override for DebugTask method""" self.execute("select 1 as id") + @classmethod + def _get_adapter_specific_run_info(cls, config: RelationConfig) -> Dict[str, Any]: + table_format: Optional[str] = None + # Full table_format support within this adapter is coming. Until then, for telemetry, + # we're relying on table_formats_within_file_formats - a subset of file_format values + table_formats_within_file_formats = ["delta", "iceberg", "hive", "hudi"] + + if ( + config + and hasattr(config, "_extra") + and (file_format := config._extra.get("file_format")) + ): + if file_format in table_formats_within_file_formats: + table_format = file_format + + return { + "adapter_type": "spark", + "table_format": table_format, + } + # spark does something interesting with joins when both tables have the same # static values for the join condition and complains that the join condition is diff --git a/tests/unit/test_adapter_telemetry.py b/tests/unit/test_adapter_telemetry.py new file mode 100644 index 000000000..b0de952b6 --- /dev/null +++ b/tests/unit/test_adapter_telemetry.py @@ -0,0 +1,51 @@ +from unittest import mock + +import dbt.adapters.spark.__version__ + +from dbt.adapters.spark.impl import SparkAdapter +from dbt.adapters.base.relation import AdapterTrackingRelationInfo + + +def assert_telemetry_data(adapter_type: str, file_format: str): + table_formats_within_file_formats = ["delta", "iceberg", "hive", "hudi"] + expected_table_format = None + if file_format in table_formats_within_file_formats: + expected_table_format = file_format + + mock_model_config = mock.MagicMock() + mock_model_config._extra = mock.MagicMock() + mock_model_config._extra = { + "adapter_type": adapter_type, + "file_format": file_format, + } + + res = SparkAdapter.get_adapter_run_info(mock_model_config) + + assert res.adapter_name == adapter_type + assert res.base_adapter_version == dbt.adapters.__about__.version + assert res.adapter_version == dbt.adapters.spark.__version__.version + + assert res.model_adapter_details == { + "adapter_type": adapter_type, + "table_format": expected_table_format, + } + + assert type(res) is AdapterTrackingRelationInfo + + +def test_telemetry_with_spark_details(): + spark_file_formats = [ + "text", + "csv", + "json", + "jdbc", + "parquet", + "orc", + "hive", + "delta", + "iceberg", + "libsvm", + "hudi", + ] + for file_format in spark_file_formats: + assert_telemetry_data("spark", file_format) From 3aff81e019cab1182e5f7047f30d4816ea1ff37d Mon Sep 17 00:00:00 2001 From: Mike Alfare Date: Wed, 5 Feb 2025 14:53:36 -0500 Subject: [PATCH 2/2] pin runner to ubuntu-22.04 for PR checks --- .github/workflows/main.yml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 287e5acb7..9544277b4 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -37,7 +37,7 @@ jobs: code-quality: name: code-quality - runs-on: ubuntu-latest + runs-on: ubuntu-22.04 timeout-minutes: 10 steps: @@ -69,7 +69,7 @@ jobs: unit: name: unit test / python ${{ matrix.python-version }} - runs-on: ubuntu-latest + runs-on: ubuntu-22.04 timeout-minutes: 10 strategy: @@ -114,7 +114,7 @@ jobs: build: name: build packages - runs-on: ubuntu-latest + runs-on: ubuntu-22.04 outputs: is_alpha: ${{ steps.check-is-alpha.outputs.is_alpha }} @@ -172,7 +172,7 @@ jobs: strategy: fail-fast: false matrix: - os: [ubuntu-latest, macos-14, windows-latest] + os: [ubuntu-22.04, macos-14, windows-latest] python-version: ["3.9", "3.10", "3.11", "3.12"] dist-type: ["whl", "gz"]