-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathrobust_scale.sql
51 lines (41 loc) · 1.63 KB
/
robust_scale.sql
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
{% macro robust_scale(column, iqr=0.5, source_relation='') %}
{{ return(adapter.dispatch('robust_scale', 'dbt_ml_inline_preprocessing')(column, iqr, source_relation)) }}
{% endmacro %}
{% macro default__robust_scale(column, iqr, source_relation) %}
{% if source_relation == '' %}
{% do exceptions.warn('Source relation is required for percentile impute in Postgresql 9.4+') %}
{% endif %}
{% set median_query %}
select percentile_cont(0.5) within group (order by {{ column }} ) from {{ source_relation }}
{% endset %}
{% set iqr_minus_query %}
select percentile_cont(0.5 - {{ iqr }}/2) within group (order by {{ column }} ) from {{ source_relation }}
{% endset %}
{% set iqr_plus_query %}
select percentile_cont(0.5 + {{ iqr }}/2) within group (order by {{ column }} ) from {{ source_relation }}
{% endset %}
{% set median = dbt_utils.get_single_value(median_query) %}
{% set iqr_minus = dbt_utils.get_single_value(iqr_minus_query) %}
{% set iqr_plus = dbt_utils.get_single_value(iqr_plus_query) %}
(
{{ column }} - {{ median }}
)
/
(
{{ iqr_plus }} - {{ iqr_minus }}
)
{% endmacro %}
{% macro snowflake__robust_scale(column, iqr, source_relation) %}
{#
(value - median) / (IQR_plus - IQR_minus)
#}
(
{{ column }} - (percentile_cont(0.5) within group (order by {{ column }}) over ())
)
/
(
percentile_cont(0.5 + {{ iqr }}/2) within group (order by {{ column }}) over ()
-
percentile_cont(0.5 - {{ iqr }}/2) within group (order by {{ column }}) over ()
)
{% endmacro %}