dbt-labs · mikealfare · Feb 5, 2025 · Jan 16, 2025 · Feb 5, 2025
@@ -37,7 +37,7 @@ jobs:
   code-quality:
     name: code-quality
 
-    runs-on: ubuntu-latest
+    runs-on: ubuntu-22.04
     timeout-minutes: 10
 
     steps:
@@ -69,7 +69,7 @@ jobs:
   unit:
     name: unit test / python ${{ matrix.python-version }}
 
-    runs-on: ubuntu-latest
+    runs-on: ubuntu-22.04
     timeout-minutes: 10
 
     strategy:
@@ -114,7 +114,7 @@ jobs:
   build:
     name: build packages
 
-    runs-on: ubuntu-latest
+    runs-on: ubuntu-22.04
 
     outputs:
       is_alpha: ${{ steps.check-is-alpha.outputs.is_alpha }}
@@ -172,7 +172,7 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        os: [ubuntu-latest, macos-14, windows-latest]
+        os: [ubuntu-22.04, macos-14, windows-latest]
         python-version: ["3.9", "3.10", "3.11", "3.12"]
         dist-type: ["whl", "gz"]
 

@@ -516,6 +516,26 @@ def debug_query(self) -> None:
         """Override for DebugTask method"""
         self.execute("select 1 as id")
 
+    @classmethod
+    def _get_adapter_specific_run_info(cls, config: RelationConfig) -> Dict[str, Any]:
+        table_format: Optional[str] = None
+        # Full table_format support within this adapter is coming. Until then, for telemetry,
+        # we're relying on table_formats_within_file_formats - a subset of file_format values
+        table_formats_within_file_formats = ["delta", "iceberg", "hive", "hudi"]
+
+        if (
+            config
+            and hasattr(config, "_extra")
+            and (file_format := config._extra.get("file_format"))
+        ):
+            if file_format in table_formats_within_file_formats:
+                table_format = file_format
+
+        return {
+            "adapter_type": "spark",
+            "table_format": table_format,
+        }
+
 
 # spark does something interesting with joins when both tables have the same
 # static values for the join condition and complains that the join condition is

@@ -0,0 +1,51 @@
+from unittest import mock
+
+import dbt.adapters.spark.__version__
+
+from dbt.adapters.spark.impl import SparkAdapter
+from dbt.adapters.base.relation import AdapterTrackingRelationInfo
+
+
+def assert_telemetry_data(adapter_type: str, file_format: str):
+    table_formats_within_file_formats = ["delta", "iceberg", "hive", "hudi"]
+    expected_table_format = None
+    if file_format in table_formats_within_file_formats:
+        expected_table_format = file_format
+
+    mock_model_config = mock.MagicMock()
+    mock_model_config._extra = mock.MagicMock()
+    mock_model_config._extra = {
+        "adapter_type": adapter_type,
+        "file_format": file_format,
+    }
+
+    res = SparkAdapter.get_adapter_run_info(mock_model_config)
+
+    assert res.adapter_name == adapter_type
+    assert res.base_adapter_version == dbt.adapters.__about__.version
+    assert res.adapter_version == dbt.adapters.spark.__version__.version
+
+    assert res.model_adapter_details == {
+        "adapter_type": adapter_type,
+        "table_format": expected_table_format,
+    }
+
+    assert type(res) is AdapterTrackingRelationInfo
+
+
+def test_telemetry_with_spark_details():
+    spark_file_formats = [
+        "text",
+        "csv",
+        "json",
+        "jdbc",
+        "parquet",
+        "orc",
+        "hive",
+        "delta",
+        "iceberg",
+        "libsvm",
+        "hudi",
+    ]
+    for file_format in spark_file_formats:
+        assert_telemetry_data("spark", file_format)