From 3a9ed62884ff0affb47f1968a781a59968530a0e Mon Sep 17 00:00:00 2001 From: NiallRees Date: Wed, 3 Mar 2021 22:00:53 +0000 Subject: [PATCH] Schema-ing and incrementaling --- README.md | 5 ++- models/dim_dbt__models.sql | 1 - models/incremental/dim_dbt__models.sql | 39 +++++++++++++++++++ .../fct_dbt__model_executions.sql | 20 ++++++++-- .../incremental/int_dbt__model_executions.sql | 20 ++++++++++ models/schemas.yml | 26 +++++++++++++ 6 files changed, 104 insertions(+), 7 deletions(-) delete mode 100644 models/dim_dbt__models.sql create mode 100644 models/incremental/dim_dbt__models.sql rename models/{ => incremental}/fct_dbt__model_executions.sql (64%) diff --git a/README.md b/README.md index 77ecab6a..ef108fec 100644 --- a/README.md +++ b/README.md @@ -3,10 +3,11 @@ This package builds a mart of tables from dbt artifacts loaded into a table. It Models included: -- `fct_dbt_model_executions` -- `fct_dbt_run_results` +- `dim_dbt__models` +- `fct_dbt__model_executions` - `fct_dbt__latest_full_model_executions` - `fct_dbt__critical_path` +- `fct_dbt_run_results` The critical path model determines the slowest route through your DAG, which provides you with the information needed to make a targeted effort to reducing `dbt run` times. For example: diff --git a/models/dim_dbt__models.sql b/models/dim_dbt__models.sql deleted file mode 100644 index 5ef6a895..00000000 --- a/models/dim_dbt__models.sql +++ /dev/null @@ -1 +0,0 @@ -select * from {{ ref("stg_dbt__models") }} \ No newline at end of file diff --git a/models/incremental/dim_dbt__models.sql b/models/incremental/dim_dbt__models.sql new file mode 100644 index 00000000..a38fbd9d --- /dev/null +++ b/models/incremental/dim_dbt__models.sql @@ -0,0 +1,39 @@ +{{ config( materialized='incremental', unique_key='manifest_model_id' ) }} + +with dbt_models as ( + + select * from {{ ref('stg_dbt__models') }} + +), + +dbt_models_incremental as ( + + select * + from dbt_models + + {% if is_incremental() %} + -- this filter will only be applied on an incremental run + where artifact_generated_at > (select max(artifact_generated_at) from {{ this }}) + {% endif %} + +), + +fields as ( + + select + manifest_model_id, + command_invocation_id, + artifact_generated_at, + node_id, + name, + model_schema, + depends_on_nodes, + package_name, + model_path, + checksum, + model_materialization + from dbt_models_incremental + +) + +select * from fields diff --git a/models/fct_dbt__model_executions.sql b/models/incremental/fct_dbt__model_executions.sql similarity index 64% rename from models/fct_dbt__model_executions.sql rename to models/incremental/fct_dbt__model_executions.sql index 1f84150c..6edeb8f5 100644 --- a/models/fct_dbt__model_executions.sql +++ b/models/incremental/fct_dbt__model_executions.sql @@ -14,17 +14,29 @@ model_executions as ( ), +model_executions_incremental as ( + + select * + from model_executions + + {% if is_incremental() %} + -- this filter will only be applied on an incremental run + where artifact_generated_at > (select max(artifact_generated_at) from {{ this }}) + {% endif %} + +), + model_executions_with_materialization as ( select - model_executions.*, + model_executions_incremental.*, models.model_materialization, models.model_schema, models.name - from model_executions + from model_executions_incremental left join models on ( - model_executions.command_invocation_id = models.command_invocation_id - and model_executions.node_id = models.node_id + model_executions_incremental.command_invocation_id = models.command_invocation_id + and model_executions_incremental.node_id = models.node_id ) ), diff --git a/models/incremental/int_dbt__model_executions.sql b/models/incremental/int_dbt__model_executions.sql index f7d4d4f3..51402cbd 100644 --- a/models/incremental/int_dbt__model_executions.sql +++ b/models/incremental/int_dbt__model_executions.sql @@ -1,3 +1,5 @@ +{{ config( materialized='incremental', unique_key='model_execution_id' ) }} + with model_executions as ( select * @@ -15,6 +17,24 @@ model_executions_incremental as ( where artifact_generated_at > (select max(artifact_generated_at) from {{ this }}) {% endif %} +), + +fields as ( + + select + model_execution_id, + command_invocation_id, + artifact_generated_at, + was_full_refresh, + node_id, + thread_id, + status, + compile_started_at, + query_completed_at, + total_node_runtime, + rows_affected + from model_executions_incremental + ) select * from model_executions_incremental \ No newline at end of file diff --git a/models/schemas.yml b/models/schemas.yml index 50e3dfab..4153d35d 100644 --- a/models/schemas.yml +++ b/models/schemas.yml @@ -103,3 +103,29 @@ models: description: Was the run executed with a --full-refresh flag? - name: env_* description: Columns for the environment variables set when the command was executed. + + - name: dim_dbt__models + description: All dbt model metadata from every manifest.json. + columns: + - name: manifest_model_id + description: Primary key generated from the command_invocation_id and checksum. + tests: + - unique + - not_null + - name: command_invocation_id + description: The id of the command which resulted in the source artifact's generation. + - name: artifact_generated_at + description: Timestamp of when the source artifact was generated. + - name: node_id + description: Unique id for the node, in the form of model.[package_name].[model_name] + - name: name + description: The model name. + - name: model_schema + - name: depends_on_nodes + description: List of node ids the model depends on. + - name: package_name + - name: model_path + description: Filepath of the model. + - name: checksum + description: Unique identifier for the model. If a model is unchanged between separate executions this will remain the same. + - name: model_materialization