From fe23ef462271eaa3bcec2ea7cca5bceab796c44c Mon Sep 17 00:00:00 2001 From: Noy Arie Date: Tue, 13 Aug 2024 18:15:50 +0300 Subject: [PATCH 1/3] WIP --- .../dbt_project/macros/get_test_results.sql | 96 ++++++++++--------- 1 file changed, 53 insertions(+), 43 deletions(-) diff --git a/elementary/monitor/dbt_project/macros/get_test_results.sql b/elementary/monitor/dbt_project/macros/get_test_results.sql index 67d8ee7dc..f81e5a038 100644 --- a/elementary/monitor/dbt_project/macros/get_test_results.sql +++ b/elementary/monitor/dbt_project/macros/get_test_results.sql @@ -1,4 +1,6 @@ {%- macro get_test_results(days_back = 7, invocations_per_test = 720, disable_passed_test_metrics = false) -%} + {% set elementary_tests_allowlist_status = ['fail', 'warn'] if disable_passed_test_metrics else ['fail', 'warn', 'pass'] %} + {% set select_test_results %} with test_results as ( {{ elementary_cli.current_tests_run_results_query(days_back=days_back) }} @@ -11,51 +13,62 @@ {# When we split test into multiple test results, we want to have the same invocation order for the test results from the same run so we use rank. #} rank() over (partition by elementary_unique_id order by detected_at desc) as invocations_rank_index from test_results + ), + + test_result_rows as ( + select * + from {{ ref("elementary", "test_result_rows") }} ) select - id, - invocation_id, - test_execution_id, - model_unique_id, - test_unique_id, - elementary_unique_id, - detected_at, - database_name, - schema_name, - table_name, - column_name, - test_type, - test_sub_type, - test_results_description, - original_path, - package_name, - owners, - model_owner, - tags, - test_tags, - model_tags, - meta, - model_meta, - case when invocations_rank_index = 1 then test_results_query else NULL end as test_results_query, - other, - test_name, - test_params, - severity, - status, - execution_time, - days_diff, - invocations_rank_index, - failures, - result_rows - from ordered_test_results - where invocations_rank_index <= {{ invocations_per_test }} - order by elementary_unique_id, invocations_rank_index desc + test_results.id, + test_results.invocation_id, + test_results.test_execution_id, + test_results.model_unique_id, + test_results.test_unique_id, + test_results.elementary_unique_id, + test_results.detected_at, + test_results.database_name, + test_results.schema_name, + test_results.table_name, + test_results.column_name, + test_results.test_type, + test_results.test_sub_type, + test_results.test_results_description, + test_results.original_path, + test_results.package_name, + test_results.owners, + test_results.model_owner, + test_results.tags, + test_results.test_tags, + test_results.model_tags, + test_results.meta, + test_results.model_meta, + case when test_results.invocations_rank_index = 1 then test_results.test_results_query else NULL end as test_results_query, + test_results.other, + test_results.test_name, + test_results.test_params, + test_results.severity, + test_results.status, + + case when test_results.invocations_rank_index = 1 and ((test_results.test_type == 'dbt_test' and test_results.status in ('fail', 'warn')) or (test_type != 'dbt_test' and status in {{ elementary.strings_list_to_tuple(elementary_tests_allowlist_status) }})) then test_result_rows.result_row else NULL end as test_rows_sample, + + test_results.execution_time, + test_results.days_diff, + test_results.invocations_rank_index, + test_results.failures, + test_results.result_rows + from ordered_test_results as test_results + join test_result_rows on test_result_rows.elementary_test_results_id = ordered_test_results.id + where test_results.invocations_rank_index <= {{ invocations_per_test }} + order by test_results.elementary_unique_id, test_results.invocations_rank_index desc + + {%- endset -%} {% set test_results = [] %} {% set test_results_agate = elementary.run_query(select_test_results) %} - {% set test_result_rows_agate = elementary_cli.get_result_rows_agate(days_back) %} + {# {% set test_result_rows_agate = elementary_cli.get_result_rows_agate(days_back) %} #} {% set tests = elementary.agate_to_dicts(test_results_agate) %} {% set filtered_tests = [] %} @@ -73,12 +86,9 @@ {% set test_params = fromjson(test.test_params) %} {% set status = test.status | lower %} - {% set elementary_tests_allowlist_status = ['fail', 'warn'] %} - {% if not disable_passed_test_metrics %} - {% do elementary_tests_allowlist_status.append('pass') %} - {% endif %} {%- if (test_type == 'dbt_test' and status in ['fail', 'warn']) or (test_type != 'dbt_test' and status in elementary_tests_allowlist_status) -%} - {% set test_rows_sample = elementary_cli.get_test_rows_sample(test.result_rows, test_result_rows_agate.get(test.id)) %} + {# {% set test_rows_sample = elementary_cli.get_test_rows_sample(test.result_rows, test_result_rows_agate.get(test.id)) %} #} + {% set test_rows_sample = test.test_rows_sample %} {# Dimension anomalies return multiple dimensions for the test rows sample, and needs to be handle differently. #} {# Currently we show only the anomalous for all of the dimensions. #} {% if test.test_sub_type == 'dimension' or test_params.dimensions %} From f588bef659470538041ff538103327a5c680bf6f Mon Sep 17 00:00:00 2001 From: Noy Arie Date: Tue, 13 Aug 2024 19:06:53 +0300 Subject: [PATCH 2/3] query only samples of last invocation --- .../macros/get_result_rows_agate.sql | 5 ++- .../dbt_project/macros/get_test_results.sql | 32 ++++++++++--------- 2 files changed, 21 insertions(+), 16 deletions(-) diff --git a/elementary/monitor/dbt_project/macros/get_result_rows_agate.sql b/elementary/monitor/dbt_project/macros/get_result_rows_agate.sql index 206aa33c3..d92bcbe41 100644 --- a/elementary/monitor/dbt_project/macros/get_result_rows_agate.sql +++ b/elementary/monitor/dbt_project/macros/get_result_rows_agate.sql @@ -1,10 +1,13 @@ -{% macro get_result_rows_agate(days_back) %} +{% macro get_result_rows_agate(days_back, valid_ids_query = none) %} {% set query %} select elementary_test_results_id, result_row from {{ ref("elementary", "test_result_rows") }} where {{ elementary.edr_datediff(elementary.edr_cast_as_timestamp('detected_at'), elementary.edr_current_timestamp(), 'day') }} < {{ days_back }} + {% if valid_ids_query %} + and elementary_test_results_id in ({{ valid_ids_query }}) + {% endif %} {% endset %} {% do return(elementary.run_query(query).group_by("elementary_test_results_id")) %} {% endmacro %} diff --git a/elementary/monitor/dbt_project/macros/get_test_results.sql b/elementary/monitor/dbt_project/macros/get_test_results.sql index f81e5a038..56eade21a 100644 --- a/elementary/monitor/dbt_project/macros/get_test_results.sql +++ b/elementary/monitor/dbt_project/macros/get_test_results.sql @@ -13,11 +13,6 @@ {# When we split test into multiple test results, we want to have the same invocation order for the test results from the same run so we use rank. #} rank() over (partition by elementary_unique_id order by detected_at desc) as invocations_rank_index from test_results - ), - - test_result_rows as ( - select * - from {{ ref("elementary", "test_result_rows") }} ) select @@ -50,25 +45,33 @@ test_results.test_params, test_results.severity, test_results.status, - - case when test_results.invocations_rank_index = 1 and ((test_results.test_type == 'dbt_test' and test_results.status in ('fail', 'warn')) or (test_type != 'dbt_test' and status in {{ elementary.strings_list_to_tuple(elementary_tests_allowlist_status) }})) then test_result_rows.result_row else NULL end as test_rows_sample, - test_results.execution_time, test_results.days_diff, test_results.invocations_rank_index, test_results.failures, test_results.result_rows from ordered_test_results as test_results - join test_result_rows on test_result_rows.elementary_test_results_id = ordered_test_results.id where test_results.invocations_rank_index <= {{ invocations_per_test }} order by test_results.elementary_unique_id, test_results.invocations_rank_index desc - - {%- endset -%} {% set test_results = [] %} - {% set test_results_agate = elementary.run_query(select_test_results) %} - {# {% set test_result_rows_agate = elementary_cli.get_result_rows_agate(days_back) %} #} + + {% set elementary_database, elementary_schema = elementary.get_package_database_and_schema() %} + {% set relation = elementary.create_temp_table(elementary_database, elementary_schema, 'ordered_test_results', select_test_results) %} + + {% set test_results_agate_sql %} + select * from {{ relation }} + {% endset %} + + {% set valid_ids_query %} + select distinct id + from {{ relation }} + where invocations_rank_index = 1 + {% endset %} + + {% set test_results_agate = elementary.run_query(test_results_agate_sql) %} + {% set test_result_rows_agate = elementary_cli.get_result_rows_agate(days_back, valid_ids_query) %} {% set tests = elementary.agate_to_dicts(test_results_agate) %} {% set filtered_tests = [] %} @@ -87,8 +90,7 @@ {% set status = test.status | lower %} {%- if (test_type == 'dbt_test' and status in ['fail', 'warn']) or (test_type != 'dbt_test' and status in elementary_tests_allowlist_status) -%} - {# {% set test_rows_sample = elementary_cli.get_test_rows_sample(test.result_rows, test_result_rows_agate.get(test.id)) %} #} - {% set test_rows_sample = test.test_rows_sample %} + {% set test_rows_sample = elementary_cli.get_test_rows_sample(test.result_rows, test_result_rows_agate.get(test.id)) %} {# Dimension anomalies return multiple dimensions for the test rows sample, and needs to be handle differently. #} {# Currently we show only the anomalous for all of the dimensions. #} {% if test.test_sub_type == 'dimension' or test_params.dimensions %} From 9ec16eff5e5fac9d9f05102478cba2424b7340e4 Mon Sep 17 00:00:00 2001 From: Noy Arie Date: Tue, 13 Aug 2024 19:22:20 +0300 Subject: [PATCH 3/3] rename --- elementary/monitor/dbt_project/macros/get_test_results.sql | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/elementary/monitor/dbt_project/macros/get_test_results.sql b/elementary/monitor/dbt_project/macros/get_test_results.sql index 56eade21a..2bbca69ec 100644 --- a/elementary/monitor/dbt_project/macros/get_test_results.sql +++ b/elementary/monitor/dbt_project/macros/get_test_results.sql @@ -58,15 +58,15 @@ {% set test_results = [] %} {% set elementary_database, elementary_schema = elementary.get_package_database_and_schema() %} - {% set relation = elementary.create_temp_table(elementary_database, elementary_schema, 'ordered_test_results', select_test_results) %} + {% set ordered_test_results_relation = elementary.create_temp_table(elementary_database, elementary_schema, 'ordered_test_results', select_test_results) %} {% set test_results_agate_sql %} - select * from {{ relation }} + select * from {{ ordered_test_results_relation }} {% endset %} {% set valid_ids_query %} select distinct id - from {{ relation }} + from {{ ordered_test_results_relation }} where invocations_rank_index = 1 {% endset %}