Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ADAP-865: Parameterize where clause, add option to supply list of relations #758

Merged
merged 21 commits into from
Oct 11, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
546f132
parameterize where clause, add option to supply list of relations
mikealfare Aug 29, 2023
ac21429
parameterize where clause, add option to supply list of relations
mikealfare Aug 29, 2023
b1a2f7d
revert whitespace fix
mikealfare Aug 29, 2023
cad0555
revert whitespace fix
mikealfare Aug 29, 2023
9fb0839
revert whitespace fix
mikealfare Aug 29, 2023
6e4e0b2
fix missing macro keyword
mikealfare Aug 29, 2023
673e1c5
point to the dev branch on core, revert before pushing to main
mikealfare Sep 13, 2023
9db81be
Merge branch 'main' into feature/applied-state/get-catalog-by-object
mikealfare Sep 14, 2023
9bdacf9
add new macro get_catalog_relations, update get_catalog to share comm…
mikealfare Sep 15, 2023
e624ff1
fixed reference in get_catalog_relations, added original dict version…
mikealfare Sep 15, 2023
de33932
remove dict version of relations based get_catalog, point to List[Bas…
mikealfare Sep 19, 2023
c72be56
Merge branch 'main' into feature/applied-state/get-catalog-by-object
mikealfare Sep 26, 2023
6ed39f0
Merge branch 'main' into feature/applied-state/get-catalog-by-object
mikealfare Oct 3, 2023
26db949
point dev reqs back to main on core
mikealfare Oct 3, 2023
3b7dbff
fix typo in schemas argument
mikealfare Oct 4, 2023
08212d2
add feature flag to turn on relation filtering for get_catalog
mikealfare Oct 4, 2023
d908ff8
update changelog to point to the PR instead of a broken url
mikealfare Oct 9, 2023
7d5f735
Merge branch 'main' into feature/applied-state/get-catalog-by-object
mikealfare Oct 11, 2023
290cb77
Merge branch 'main' into feature/applied-state/get-catalog-by-object
mikealfare Oct 11, 2023
e591600
Merge branch 'main' into feature/applied-state/get-catalog-by-object
mikealfare Oct 11, 2023
423535e
Support changes to dbt-core capability system
peterallenwebb Oct 11, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions .changes/unreleased/Features-20230829-152412.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
kind: Features
body: Support limiting get_catalog by object name
time: 2023-08-29T15:24:12.649104-04:00
custom:
Author: mikealfare
Issue: "758"
5 changes: 5 additions & 0 deletions dbt/adapters/snowflake/impl.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@

from dbt.adapters.base.impl import AdapterConfig, ConstraintSupport # type: ignore
from dbt.adapters.base.meta import available
from dbt.adapters.capability import CapabilityDict, CapabilitySupport, Support, Capability
from dbt.adapters.sql import SQLAdapter # type: ignore
from dbt.adapters.sql.impl import (
LIST_SCHEMAS_MACRO_NAME,
Expand Down Expand Up @@ -49,6 +50,10 @@ class SnowflakeAdapter(SQLAdapter):
ConstraintType.foreign_key: ConstraintSupport.NOT_ENFORCED,
}

_capabilities = CapabilityDict(
{Capability.SchemaMetadataByRelations: CapabilitySupport(support=Support.Full)}
)

@classmethod
def date_function(cls):
return "CURRENT_TIMESTAMP()"
Expand Down
185 changes: 118 additions & 67 deletions dbt/include/snowflake/macros/catalog.sql
Original file line number Diff line number Diff line change
@@ -1,72 +1,123 @@
{% macro snowflake__get_catalog(information_schema, schemas) -%}
{% set query %}
with tables as (

select
table_catalog as "table_database",
table_schema as "table_schema",
table_name as "table_name",
table_type as "table_type",
comment as "table_comment",

-- note: this is the _role_ that owns the table
table_owner as "table_owner",

'Clustering Key' as "stats:clustering_key:label",
clustering_key as "stats:clustering_key:value",
'The key used to cluster this table' as "stats:clustering_key:description",
(clustering_key is not null) as "stats:clustering_key:include",

'Row Count' as "stats:row_count:label",
row_count as "stats:row_count:value",
'An approximate count of rows in this table' as "stats:row_count:description",
(row_count is not null) as "stats:row_count:include",

'Approximate Size' as "stats:bytes:label",
bytes as "stats:bytes:value",
'Approximate size of the table as reported by Snowflake' as "stats:bytes:description",
(bytes is not null) as "stats:bytes:include",

'Last Modified' as "stats:last_modified:label",
to_varchar(convert_timezone('UTC', last_altered), 'yyyy-mm-dd HH24:MI'||'UTC') as "stats:last_modified:value",
'The timestamp for last update/change' as "stats:last_modified:description",
(last_altered is not null and table_type='BASE TABLE') as "stats:last_modified:include"

from {{ information_schema }}.tables
where (
{%- for schema in schemas -%}
upper("table_schema") = upper('{{ schema }}'){%- if not loop.last %} or {% endif -%}
{%- endfor -%}
)

),

columns as (

select
table_catalog as "table_database",
table_schema as "table_schema",
table_name as "table_name",

column_name as "column_name",
ordinal_position as "column_index",
data_type as "column_type",
comment as "column_comment"

from {{ information_schema }}.columns
where (
{%- for schema in schemas -%}
upper("table_schema") = upper('{{ schema }}'){%- if not loop.last %} or {% endif -%}
{%- endfor -%}
)
)

select *
from tables
join columns using ("table_database", "table_schema", "table_name")
order by "column_index"

{% set query %}
with tables as (
{{ snowflake__get_catalog_tables_sql(information_schema) }}
{{ snowflake__get_catalog_schemas_where_clause_sql(schemas) }}
),
columns as (
{{ snowflake__get_catalog_columns_sql(information_schema) }}
{{ snowflake__get_catalog_schemas_where_clause_sql(schemas) }}
)
{{ snowflake__get_catalog_results_sql() }}
{%- endset -%}

{{ return(run_query(query)) }}

{%- endmacro %}


{% macro snowflake__get_catalog_relations(information_schema, relations) -%}

{% set query %}
with tables as (
{{ snowflake__get_catalog_tables_sql(information_schema) }}
{{ snowflake__get_catalog_relations_where_clause_sql(relations) }}
),
columns as (
{{ snowflake__get_catalog_columns_sql(information_schema) }}
{{ snowflake__get_catalog_relations_where_clause_sql(relations) }}
)
{{ snowflake__get_catalog_results_sql() }}
{%- endset -%}

{{ return(run_query(query)) }}
{{ return(run_query(query)) }}

{%- endmacro %}


{% macro snowflake__get_catalog_tables_sql(information_schema) -%}
select
table_catalog as "table_database",
table_schema as "table_schema",
table_name as "table_name",
table_type as "table_type",
comment as "table_comment",

-- note: this is the _role_ that owns the table
table_owner as "table_owner",

'Clustering Key' as "stats:clustering_key:label",
clustering_key as "stats:clustering_key:value",
'The key used to cluster this table' as "stats:clustering_key:description",
(clustering_key is not null) as "stats:clustering_key:include",

'Row Count' as "stats:row_count:label",
row_count as "stats:row_count:value",
'An approximate count of rows in this table' as "stats:row_count:description",
(row_count is not null) as "stats:row_count:include",

'Approximate Size' as "stats:bytes:label",
bytes as "stats:bytes:value",
'Approximate size of the table as reported by Snowflake' as "stats:bytes:description",
(bytes is not null) as "stats:bytes:include",

'Last Modified' as "stats:last_modified:label",
to_varchar(convert_timezone('UTC', last_altered), 'yyyy-mm-dd HH24:MI'||'UTC') as "stats:last_modified:value",
'The timestamp for last update/change' as "stats:last_modified:description",
(last_altered is not null and table_type='BASE TABLE') as "stats:last_modified:include"
from {{ information_schema }}.tables
{%- endmacro %}


{% macro snowflake__get_catalog_columns_sql(information_schema) -%}
select
table_catalog as "table_database",
table_schema as "table_schema",
table_name as "table_name",

column_name as "column_name",
ordinal_position as "column_index",
data_type as "column_type",
comment as "column_comment"
from {{ information_schema }}.columns
{%- endmacro %}


{% macro snowflake__get_catalog_results_sql() -%}
select *
from tables
join columns using ("table_database", "table_schema", "table_name")
mikealfare marked this conversation as resolved.
Show resolved Hide resolved
order by "column_index"
{%- endmacro %}


{% macro snowflake__get_catalog_schemas_where_clause_sql(schemas) -%}
where ({%- for schema in schemas -%}
upper("table_schema") = upper('{{ schema }}'){%- if not loop.last %} or {% endif -%}
{%- endfor -%})
{%- endmacro %}


{% macro snowflake__get_catalog_relations_where_clause_sql(relations) -%}
where (
{%- for relation in relations -%}
{% if relation.schema and relation.identifier %}
(
upper("table_schema") = upper('{{ relation.schema }}')
and upper("table_name") = upper('{{ relation.identifier }}')
)
{% elif relation.schema %}
(
upper("table_schema") = upper('{{ relation.schema }}')
)
{% else %}
{% do exceptions.raise_compiler_error(
'`get_catalog_relations` requires a list of relations, each with a schema'
) %}
{% endif %}

{%- if not loop.last %} or {% endif -%}
{%- endfor -%}
)
{%- endmacro %}