Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactor: Benefits Amplitude events #3468

Merged
merged 9 commits into from
Oct 25, 2024
36 changes: 32 additions & 4 deletions warehouse/models/mart/benefits/_mart_benefits.yml
Original file line number Diff line number Diff line change
Expand Up @@ -55,8 +55,16 @@ models:
description: UUID
- name: processed_time
description: UTC ISO-8601 timestamp
- name: event_properties_enrollment_method
description: The `enrollment_method` value from the `event_properties` column
- name: event_properties_auth_provider
description: The `auth_provider` value from the `event_properties` column
description: The `claims_provider` value from the `event_properties` column
deprecated:
enabled: true
reason: "This column is deprecated, use `event_properties_claims_provider` instead"
date: "2024-10-10"
- name: event_properties_claims_provider
description: The `claims_provider` value from the `event_properties` column
- name: event_properties_card_tokenize_func
description: The `card_tokenize_func` value from the `event_properties` column
- name: event_properties_card_tokenize_url
Expand All @@ -78,13 +86,27 @@ models:
- name: event_properties_path
description: The `path` value from the `event_properties` column
- name: event_properties_payment_group
description: The `payment_group` value from the `event_properties` column
description: The `enrollment_group` value from the `event_properties` column
deprecated:
enabled: true
reason: "This column is deprecated, use `event_properties_enrollment_group` instead"
date: "2024-10-10"
- name: event_properties_enrollment_group
description: The `enrollment_group` value from the `event_properties` column
- name: event_properties_status
description: The `status` value from the `event_properties` column
- name: event_properties_transit_agency
description: The `transit_agency` value from the `event_properties` column
- name: event_properties_eligibility_types
description: A semi-colon delimited list of `eligibility_types` values from the `event_properties` column
description: A semi-colon delimited list of `enrollment_flows` values from the `event_properties` column
deprecated:
enabled: true
reason: "This column is deprecated, use `event_properties_enrollment_flows` instead"
date: "2024-09-19"
- name: event_properties_enrollment_flows
description: A semi-colon delimited list of `enrollment_flows` values from the `event_properties` column
- name: user_properties_enrollment_method
description: The `enrollment_method` value from the `user_properties` column
- name: user_properties_eligibility_verifier
description: The `eligibility_verifier` value from the `user_properties` column
- name: user_properties_initial_referrer
Expand All @@ -100,4 +122,10 @@ models:
- name: user_properties_user_agent
description: The `user_agent` value from the `user_properties` column
- name: user_properties_eligibility_types
description: A semi-colon delimited list of `eligibility_types` values from the `user_properties` column
description: A semi-colon delimited list of `enrollment_flows` values from the `user_properties` column
deprecated:
enabled: true
reason: "This column is deprecated, use `user_properties_enrollment_flows` instead"
date: "2024-09-19"
- name: user_properties_enrollment_flows
description: A semi-colon delimited list of `enrollment_flows` values from the `user_properties` column
115 changes: 100 additions & 15 deletions warehouse/models/mart/benefits/fct_benefits_events.sql
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,32 @@ WITH fct_benefits_events AS (
client_event_time,
event_id,
session_id,
event_type,
version_name,
case
when event_type = "selected eligibility verifier"
then "selected enrollment flow"
when event_type = "started payment connection"
then "started card tokenization"
when event_type = "closed payment connection" or event_type = "ended card tokenization"
then "finished card tokenization"
else event_type
end as event_type,
-- Fix bug in Docker build process resulting in incorrect version strings
-- https://github.com/cal-itp/benefits/pull/2392
case
when version_name = "2024.7.3.dev0+gcd3b083.d20240731"
then "2024.7.2"
when version_name = "2024.8.2.dev0+g7664917.d20240821"
then "2024.8.1"
when version_name = "2024.9.2.dev0+gadf41b9.d20240909"
then "2024.9.1"
when version_name = "2024.9.3.dev0+gfeb06d2.d20240918"
then "2024.9.2"
when version_name = "2024.9.4.dev0+g861519e.d20240926"
then "2024.9.3"
when version_name = "2024.10.2.dev0+g158e1b0.d20241010"
then "2024.10.1"
else version_name
end as version_name,
os_name,
os_version,
device_family,
Expand All @@ -25,12 +49,27 @@ WITH fct_benefits_events AS (
server_upload_time,
server_received_time,
amplitude_id,
start_version,
-- Fix bug in Docker build process resulting in incorrect version strings
-- https://github.com/cal-itp/benefits/pull/2392
case
when start_version = "2024.7.3.dev0+gcd3b083.d20240731"
then "2024.7.2"
when start_version = "2024.8.2.dev0+g7664917.d20240821"
then "2024.8.1"
when start_version = "2024.9.2.dev0+gadf41b9.d20240909"
then "2024.9.1"
when start_version = "2024.9.3.dev0+gfeb06d2.d20240918"
then "2024.9.2"
when start_version = "2024.9.4.dev0+g861519e.d20240926"
then "2024.9.3"
when start_version = "2024.10.2.dev0+g158e1b0.d20241010"
then "2024.10.1"
else start_version
end as start_version,
uuid,
processed_time,

-- Event Properties (https://app.amplitude.com/data/compiler/Benefits/properties/main/latest/event)
{{ json_extract_column('event_properties', 'auth_provider') }},
{{ json_extract_column('event_properties', 'card_tokenize_func') }},
{{ json_extract_column('event_properties', 'card_tokenize_url') }},
{{ json_extract_column('event_properties', 'eligibility_verifier') }},
Expand All @@ -41,10 +80,36 @@ WITH fct_benefits_events AS (
{{ json_extract_column('event_properties', 'language') }},
{{ json_extract_column('event_properties', 'origin') }},
{{ json_extract_column('event_properties', 'path') }},
{{ json_extract_column('event_properties', 'payment_group') }},
{{ json_extract_column('event_properties', 'status') }},
{{ json_extract_column('event_properties', 'transit_agency') }},
{{ json_extract_flattened_column('event_properties', 'eligibility_types') }},

-- New column `enrollment_method`, historical values should be set to "digital"
-- https://github.com/cal-itp/benefits/pull/2402
COALESCE(
{{ json_extract_column('event_properties', 'enrollment_method', no_alias = true) }},
"digital"
) AS event_properties_enrollment_method,

-- Historical data existed in `auth_provider` but new data is in `claims_provider`
-- https://github.com/cal-itp/benefits/pull/2401
COALESCE(
{{ json_extract_column('event_properties', 'claims_provider', no_alias = true) }},
{{ json_extract_column('event_properties', 'auth_provider', no_alias = true) }}
) AS event_properties_claims_provider,

-- Historical data existed in `eligibility_types` but new data is in `enrollment_flows`
-- https://github.com/cal-itp/benefits/pull/2379
COALESCE(
{{ json_extract_flattened_column('event_properties', 'enrollment_flows', no_alias = true) }},
{{ json_extract_flattened_column('event_properties', 'eligibility_types', no_alias = true) }}
) AS event_properties_enrollment_flows,

-- Historical data existed in `payment_group` but new data is in `enrollment_group`
-- https://github.com/cal-itp/benefits/pull/2391
COALESCE(
{{ json_extract_flattened_column('event_properties', 'enrollment_group', no_alias = true) }},
{{ json_extract_flattened_column('event_properties', 'payment_group', no_alias = true) }}
) AS event_properties_enrollment_group,

-- User Properties (https://app.amplitude.com/data/compiler/Benefits/properties/main/latest/user)
{{ json_extract_column('user_properties', 'eligibility_verifier') }},
Expand All @@ -61,7 +126,20 @@ WITH fct_benefits_events AS (
{{ json_extract_column('user_properties', 'referrer') }},
{{ json_extract_column('user_properties', 'referring_domain') }},
{{ json_extract_column('user_properties', 'user_agent') }},
{{ json_extract_flattened_column('user_properties', 'eligibility_types') }}

-- New column `enrollment_method`, historical values should be set to "digital"
-- https://github.com/cal-itp/benefits/pull/2402
COALESCE(
{{ json_extract_column('user_properties', 'enrollment_method', no_alias = true) }},
"digital"
) AS user_properties_enrollment_method,

-- Historical data existed in `eligibility_types` but new data is in `enrollment_flows`
-- https://github.com/cal-itp/benefits/pull/2379
COALESCE(
{{ json_extract_flattened_column('user_properties', 'enrollment_flows', no_alias = true) }},
{{ json_extract_flattened_column('user_properties', 'eligibility_types', no_alias = true) }}
) AS user_properties_enrollment_flows

FROM {{ ref('stg_amplitude__benefits_events') }}
),
Expand Down Expand Up @@ -92,14 +170,20 @@ fct_old_enrollments AS (
start_version,
uuid,
processed_time,
event_properties_auth_provider,
"digital" as event_properties_enrollment_method,
CASE
WHEN client_event_time < '2022-08-12T07:00:00Z'
THEN "ca-dmv"
WHEN client_event_time >= '2022-08-12T07:00:00Z'
THEN "cdt-logingov"
END as event_properties_claims_provider,
event_properties_card_tokenize_func,
event_properties_card_tokenize_url,
CASE
WHEN client_event_time < '2022-08-12T07:00:00Z'
THEN "DMV"
THEN "ca-dmv"
WHEN client_event_time >= '2022-08-12T07:00:00Z'
THEN "(MST) CDT claims via Login.gov"
THEN "cdt-logingov"
END as event_properties_eligibility_verifier,
event_properties_error_name,
event_properties_error_status,
Expand All @@ -108,23 +192,24 @@ fct_old_enrollments AS (
event_properties_language,
event_properties_origin,
event_properties_path,
"5170d37b-43d5-4049-899c-b4d850e14990" as event_properties_payment_group,
"5170d37b-43d5-4049-899c-b4d850e14990" as event_properties_enrollment_group,
"success" as event_properties_status,
"Monterey-Salinas Transit" as event_properties_transit_agency,
"senior" as event_properties_eligibility_types,
"senior" as event_properties_enrollment_flows,
"digital" as user_properties_enrollment_method,
CASE
WHEN client_event_time < '2022-08-12T07:00:00Z'
THEN "DMV"
THEN "ca-dmv"
WHEN client_event_time >= '2022-08-12T07:00:00Z'
THEN "(MST) CDT claims via Login.gov"
THEN "cdt-logingov"
END as user_properties_eligibility_verifier,
user_properties_initial_referrer,
user_properties_initial_referring_domain,
"Monterey-Salinas Transit" as user_properties_transit_agency,
user_properties_user_agent,
user_properties_referrer,
user_properties_referring_domain,
"senior" as user_properties_eligibility_types
"senior" as user_properties_enrollment_flows
FROM fct_benefits_events
WHERE client_event_time >= '2021-12-08T08:00:00Z'
and client_event_time < '2022-08-29T07:00:00Z'
Expand Down
Loading