autotraderuk · ccharlesgb · Nov 23, 2023 · Nov 22, 2023
diff --git a/README.md b/README.md
@@ -172,7 +172,11 @@ Currently, these rules can cause linting failures:
 The dbt package [dbt-external-tables][dbt-external-tables] gives dbt support for staging and managing
 [external tables][bq-external-tables]. These sources do not produce any compiled sql in the manifest, so it is not
 possible for the dry runner to predict their schema. Therefore, you must specify the resulting schema manually in the
-metadata of the source. For example if you were import data from a gcs bucket:
+metadata of the source. 
+
+However, if the `columns` schema is already defined under the `name` in the yaml config, you do not need to specify `dry_run_columns` under `external`. The dry runner will use the `columns` schema if `dry_run_columns` is not specified. This avoids duplicated schema definitions.
+
+For example if you were import data from a gcs bucket:
 
 ```yaml
 version: 2

diff --git a/dbt_dry_run/node_runner/source_runner.py b/dbt_dry_run/node_runner/source_runner.py
@@ -20,9 +20,13 @@ def run(self, node: Node) -> DryRunResult:
         if node.is_external_source():
             external_config = cast(ExternalConfig, node.external)
             try:
-                predicted_table = map_columns_to_table(
+                # Use columns schema if dry_run_columns is not specified
+                columns_to_map = (
                     external_config.dry_run_columns_map
+                    if external_config.dry_run_columns
+                    else node.columns
                 )
+                predicted_table = map_columns_to_table(columns_to_map)
             except (InvalidColumnSpecification, UnknownDataTypeException) as e:
                 status = DryRunStatus.FAILURE
                 exception = e

diff --git a/dbt_dry_run/test/node_runner/test_source_runner.py b/dbt_dry_run/test/node_runner/test_source_runner.py
@@ -0,0 +1,34 @@
+from unittest.mock import MagicMock
+
+from dbt_dry_run.models.manifest import ExternalConfig, ManifestColumn, Node, NodeConfig
+from dbt_dry_run.node_runner.source_runner import SourceRunner
+from dbt_dry_run.results import DryRunStatus, Results
+
+
+def test_external_source_with_columns_but_no_dry_run_columns() -> None:
+    # Create a Node with an external source that has columns but no dry_run_columns
+    node = Node(
+        unique_id="S",
+        resource_type="source",
+        config=NodeConfig(),
+        name="s",
+        database="db1",
+        schema="schema1",
+        original_file_path="/filepath1.yaml",
+        root_path="/filepath1",
+        columns={
+            "column1": ManifestColumn(name="column1", data_type="STRING"),
+            "column2": ManifestColumn(name="column2", data_type="RECORD[]"),
+        },
+        alias="s",
+        external=ExternalConfig(location="location"),  # No dry_run_columns specified
+    )
+
+    mock_sql_runner = MagicMock()
+    mock_results = MagicMock()
+
+    source_runner = SourceRunner(mock_sql_runner, mock_results)
+    result = source_runner.run(node)
+
+    # The test should pass if no InvalidColumnSpecification exception is raised
+    assert result.status != DryRunStatus.FAILURE