-
Notifications
You must be signed in to change notification settings - Fork 1.7k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Attempt getting snowflake to go faster and use less ram
- Thread information schema queries, one per db - Pass schema list into catalog queries and filter on that in SQL - break the existing interface for get_catalog (sorry, not sorry)
- Loading branch information
Jacob Beck
committed
Jan 9, 2020
1 parent
a348c3f
commit f1bf300
Showing
3 changed files
with
125 additions
and
69 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
132 changes: 67 additions & 65 deletions
132
plugins/snowflake/dbt/include/snowflake/macros/catalog.sql
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,68 +1,70 @@ | ||
|
||
{% macro snowflake__get_catalog(information_schemas) -%} | ||
|
||
{%- call statement('catalog', fetch_result=True) -%} | ||
{% for information_schema in information_schemas %} | ||
|
||
( | ||
with tables as ( | ||
|
||
select | ||
table_catalog as "table_database", | ||
table_schema as "table_schema", | ||
table_name as "table_name", | ||
table_type as "table_type", | ||
|
||
-- note: this is the _role_ that owns the table | ||
table_owner as "table_owner", | ||
|
||
'Clustering Key' as "stats:clustering_key:label", | ||
clustering_key as "stats:clustering_key:value", | ||
'The key used to cluster this table' as "stats:clustering_key:description", | ||
(clustering_key is not null) as "stats:clustering_key:include", | ||
|
||
'Row Count' as "stats:row_count:label", | ||
row_count as "stats:row_count:value", | ||
'An approximate count of rows in this table' as "stats:row_count:description", | ||
(row_count is not null) as "stats:row_count:include", | ||
|
||
'Approximate Size' as "stats:bytes:label", | ||
bytes as "stats:bytes:value", | ||
'Approximate size of the table as reported by Snowflake' as "stats:bytes:description", | ||
(bytes is not null) as "stats:bytes:include" | ||
|
||
from {{ information_schema }}.tables | ||
|
||
), | ||
|
||
columns as ( | ||
|
||
select | ||
table_catalog as "table_database", | ||
table_schema as "table_schema", | ||
table_name as "table_name", | ||
null as "table_comment", | ||
|
||
column_name as "column_name", | ||
ordinal_position as "column_index", | ||
data_type as "column_type", | ||
null as "column_comment" | ||
|
||
from {{ information_schema }}.columns | ||
|
||
) | ||
|
||
select * | ||
from tables | ||
join columns using ("table_database", "table_schema", "table_name") | ||
where "table_schema" != 'INFORMATION_SCHEMA' | ||
order by "column_index" | ||
) | ||
{% if not loop.last %} union all {% endif %} | ||
|
||
{% endfor %} | ||
{%- endcall -%} | ||
|
||
{{ return(load_result('catalog').table) }} | ||
{% macro snowflake__get_catalog() %} | ||
{# snowflake has a different argspec, because it filters by schema inside the database #} | ||
{% do exceptions.raise_compiler_error('get_catalog is not valid on snowflake, use snowflake_get_catalog') %} | ||
{% endmacro %} | ||
|
||
|
||
{% macro snowflake_get_catalog(information_schema, schemas) -%} | ||
{% set query %} | ||
with tables as ( | ||
|
||
select | ||
table_catalog as "table_database", | ||
table_schema as "table_schema", | ||
table_name as "table_name", | ||
table_type as "table_type", | ||
|
||
-- note: this is the _role_ that owns the table | ||
table_owner as "table_owner", | ||
|
||
'Clustering Key' as "stats:clustering_key:label", | ||
clustering_key as "stats:clustering_key:value", | ||
'The key used to cluster this table' as "stats:clustering_key:description", | ||
(clustering_key is not null) as "stats:clustering_key:include", | ||
|
||
'Row Count' as "stats:row_count:label", | ||
row_count as "stats:row_count:value", | ||
'An approximate count of rows in this table' as "stats:row_count:description", | ||
(row_count is not null) as "stats:row_count:include", | ||
|
||
'Approximate Size' as "stats:bytes:label", | ||
bytes as "stats:bytes:value", | ||
'Approximate size of the table as reported by Snowflake' as "stats:bytes:description", | ||
(bytes is not null) as "stats:bytes:include" | ||
|
||
from {{ information_schema }}.tables | ||
|
||
), | ||
|
||
columns as ( | ||
|
||
select | ||
table_catalog as "table_database", | ||
table_schema as "table_schema", | ||
table_name as "table_name", | ||
null as "table_comment", | ||
|
||
column_name as "column_name", | ||
ordinal_position as "column_index", | ||
data_type as "column_type", | ||
null as "column_comment" | ||
|
||
from {{ information_schema }}.columns | ||
) | ||
|
||
select * | ||
from tables | ||
join columns using ("table_database", "table_schema", "table_name") | ||
where "table_schema" != 'INFORMATION_SCHEMA' | ||
and ( | ||
{%- for schema in schemas -%} | ||
"table_schema" = '{{ schema }}'{%- if not loop.last %} or {% endif -%} | ||
{%- endfor -%} | ||
) | ||
order by "column_index" | ||
{%- endset -%} | ||
|
||
{{ return(run_query(query)) }} | ||
|
||
{%- endmacro %} |