From 9ba91cff712d0c4824e1ac810f92a09103c8b5ff Mon Sep 17 00:00:00 2001
From: Hariharan Banukumar <hariharan.banukumar@gmail.com>
Date: Mon, 23 Aug 2021 07:13:54 -0400
Subject: [PATCH] fixed get_columns_in_relation for open source delta table
 (#207)

* fixed get_columns_in_relation for open source delta table

* fixed E501 linting error and added change log
---
 CHANGELOG.md               |  2 ++
 dbt/adapters/spark/impl.py | 11 ++++++++---
 2 files changed, 10 insertions(+), 3 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 60d85d50a..cbce884c8 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,10 +1,12 @@
 ## dbt-spark 0.21.0 (Release TBD)
 
 ### Fixes
+- Enhanced get_columns_in_relation method to handle a bug in open source deltalake which doesnt return schema details in `show table extended in databasename like '*'` query output. This impacts dbt snapshots if file format is open source deltalake ([#207](https://github.com/dbt-labs/dbt-spark/pull/207))
 - Add pyodbc import error message to dbt.exceptions.RuntimeException to get more detailed information when running `dbt debug` ([#192](https://github.com/dbt-labs/dbt-spark/pull/192))
 - Add support for ODBC Server Side Parameters, allowing options that need to be set with the `SET` statement to be used ([#201](https://github.com/dbt-labs/dbt-spark/pull/201))
 
 ### Contributors
+- [@harryharanb](https://github.com/harryharanb) ([#207](https://github.com/dbt-labs/dbt-spark/pull/207))
 - [@JCZuurmond](https://github.com/JCZuurmond) ([#192](https://github.com/fishtown-analytics/dbt-spark/pull/192))
 - [@jethron](https://github.com/jethron) ([#201](https://github.com/fishtown-analytics/dbt-spark/pull/201))
 
diff --git a/dbt/adapters/spark/impl.py b/dbt/adapters/spark/impl.py
index f8e72449a..03fba9fac 100644
--- a/dbt/adapters/spark/impl.py
+++ b/dbt/adapters/spark/impl.py
@@ -212,11 +212,16 @@ def get_columns_in_relation(self, relation: Relation) -> List[SparkColumn]:
                                 for cached_relation in cached_relations
                                 if str(cached_relation) == str(relation)),
                                None)
-        if cached_relation is None or cached_relation.information is None:
+        columns = []
+        if cached_relation and cached_relation.information:
+            columns = self.parse_columns_from_information(cached_relation)
+        if not columns:
+            # in open source delta 'show table extended' query output doesnt
+            # return relation's schema. if columns are empty from cache,
+            # use get_columns_in_relation spark macro
+            # which would execute 'describe extended tablename' query
             rows: List[agate.Row] = super().get_columns_in_relation(relation)
             columns = self.parse_describe_extended(relation, rows)
-        else:
-            columns = self.parse_columns_from_information(cached_relation)
         return columns
 
     def parse_columns_from_information(