Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Cleanup environments #333

Draft
wants to merge 4 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,6 @@ combined AS (
NULL AS machine,
NULL AS system_release,
NULL AS is_dev_build,
NULL AS environment_name_hash,
NULL AS python_implementation,
NULL AS system_name,
NULL AS system_version,
Expand Down Expand Up @@ -158,7 +157,6 @@ combined AS (
unstructured_executions.machine,
unstructured_executions.system_release,
unstructured_executions.is_dev_build,
unstructured_executions.environment_name_hash,
unstructured_executions.python_implementation,
unstructured_executions.system_name,
unstructured_executions.system_version,
Expand Down Expand Up @@ -262,7 +260,6 @@ SELECT
combined.machine,
combined.system_release,
combined.is_dev_build,
combined.environment_name_hash,
combined.python_implementation,
combined.system_name,
combined.system_version,
Expand Down
44 changes: 44 additions & 0 deletions data/transform/models/marts/telemetry/base/environment_dim.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
{{
config(materialized='table')
}}

SELECT
{{ dbt_utils.surrogate_key(
[
'structured_executions.project_id',
'cmd_parsed_all.environment'
]
) }} AS environment_pk,
structured_executions.project_id,
cmd_parsed_all.environment AS env_hash,
COALESCE(
hash_lookup.unhashed_value,
cmd_parsed_all.environment
) AS env_name
FROM {{ ref('structured_executions') }}
LEFT JOIN
{{ ref('cmd_parsed_all') }} ON
structured_executions.command = cmd_parsed_all.command
LEFT JOIN {{ ref('hash_lookup') }}
ON cmd_parsed_all.environment = hash_lookup.hash_value
AND hash_lookup.category = 'environment'

UNION ALL

SELECT
{{ dbt_utils.surrogate_key(
[
'unstructured_executions.project_id',
'unstructured_executions.environment_name_hash'
]
) }} AS environment_pk,
unstructured_executions.project_id,
unstructured_executions.environment_name_hash AS env_hash,
COALESCE(
hash_lookup.unhashed_value,
unstructured_executions.environment_name_hash
) AS env_name
FROM {{ ref('unstructured_executions') }}
LEFT JOIN {{ ref('hash_lookup') }}
ON unstructured_executions.environment_name_hash = hash_lookup.hash_value
AND hash_lookup.category = 'environment'
14 changes: 0 additions & 14 deletions data/transform/models/marts/telemetry/base/environments.sql

This file was deleted.

28 changes: 28 additions & 0 deletions data/transform/models/marts/telemetry/base/execution_env_map.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
{{
config(materialized='table')
}}

SELECT
{{ dbt_utils.surrogate_key(
[
'structured_executions.project_id',
'cmd_parsed_all.environment'
]
) }} AS environment_fk,
structured_executions.execution_id
FROM {{ ref('structured_executions') }}
LEFT JOIN
{{ ref('cmd_parsed_all') }} ON
structured_executions.command = cmd_parsed_all.command

UNION ALL

SELECT
{{ dbt_utils.surrogate_key(
[
'unstructured_executions.project_id',
'unstructured_executions.environment_name_hash'
]
) }} AS environment_fk,
unstructured_executions.execution_id
FROM {{ ref('unstructured_executions') }}
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ WITH plugin_prep AS (
SELECT
plugin_executions.execution_id,
cli_executions_base.project_id,
cli_executions_base.environment_name_hash AS env_id,
environment_dim.env_hash AS env_id,
ARRAY_AGG(
COALESCE(
plugin_executions.plugin_surrogate_key,
Expand All @@ -12,6 +12,10 @@ WITH plugin_prep AS (
FROM {{ ref('plugin_executions') }}
LEFT JOIN {{ ref('cli_executions_base') }}
ON plugin_executions.execution_id = cli_executions_base.execution_id
LEFT JOIN {{ ref('execution_env_map') }}
ON plugin_executions.execution_id = execution_env_map.execution_id
LEFT JOIN {{ ref('environment_dim') }}
ON execution_env_map.environment_fk = environment_dim.environment_pk
WHERE cli_executions_base.cli_command IN ('elt', 'run')
GROUP BY 1, 2, 3
)
Expand Down
17 changes: 16 additions & 1 deletion data/transform/models/marts/telemetry/base/schema.yml
Original file line number Diff line number Diff line change
Expand Up @@ -134,4 +134,19 @@ models:
- name: event_date
tests:
- not_null


- name: environment_dim
description: This table contains attributes about project environments.
columns:
- name: environment_pk
tests:
- not_null
- unique

- name: execution_env_map
columns:
- name: environment_fk
tests:
- relationships:
to: ref('environment_dim')
field: environment_pk
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,8 @@ SELECT
cli_executions_base.python_version,
ip_address_dim.ip_address_hash,
ip_address_dim.cloud_provider,
ip_address_dim.execution_location
ip_address_dim.execution_location,
environment_dim.env_name
FROM {{ ref('cli_executions_base') }}
LEFT JOIN {{ ref('pipeline_executions') }}
ON cli_executions_base.execution_id = pipeline_executions.execution_id
Expand All @@ -19,3 +20,7 @@ LEFT JOIN {{ ref('date_dim') }}
ON cli_executions_base.event_date = date_dim.date_day
LEFT JOIN {{ ref('ip_address_dim') }}
ON cli_executions_base.ip_address_hash = ip_address_dim.ip_address_hash
LEFT JOIN {{ ref('execution_env_map') }}
ON cli_executions_base.execution_id = execution_env_map.execution_id
LEFT JOIN {{ ref('environment_dim') }}
ON execution_env_map.environment_fk = environment_dim.environment_pk
11 changes: 5 additions & 6 deletions data/transform/models/marts/telemetry/fact_plugin_usage.sql
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,7 @@ SELECT
plugin_executions.plugin_surrogate_key,
-- CLI Attributes
cli_executions_base.cli_command,
cli_executions_base.environment_name_hash AS env_id,
hash_lookup.unhashed_value AS env_name,
environment_dim.env_name,
cli_executions_base.exit_code AS cli_exit_code,
cli_executions_base.meltano_version,
cli_executions_base.num_cpu_cores_available,
Expand Down Expand Up @@ -58,7 +57,7 @@ LEFT JOIN {{ ref('project_dim') }}
ON cli_executions_base.project_id = project_dim.project_id
LEFT JOIN {{ ref('ip_address_dim') }}
ON cli_executions_base.ip_address_hash = ip_address_dim.ip_address_hash
-- TODO: move this parsing up stream
LEFT JOIN {{ ref('hash_lookup') }}
ON cli_executions_base.environment_name_hash = hash_lookup.hash_value
AND hash_lookup.category = 'environment'
LEFT JOIN {{ ref('execution_env_map') }}
ON cli_executions_base.execution_id = execution_env_map.execution_id
LEFT JOIN {{ ref('environment_dim') }}
ON execution_env_map.environment_fk = environment_dim.environment_pk
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,9 @@ WITH source AS (

{% if env_var("MELTANO_ENVIRONMENT") == "cicd" %}

FROM raw.snowplow.events
WHERE derived_tstamp::TIMESTAMP >= DATEADD('day', -7, CURRENT_DATE)
-- filter test events
AND app_id != 'test'
FROM raw.snowplow.events SAMPLE ROW (100000 ROWS)
WHERE COALESCE(app_id, '') != 'test'

{% else %}

FROM {{ source('snowplow', 'events') }}
Expand Down