Skip to content

Commit

Permalink
Refactor: Benefits Amplitude events (#3468)
Browse files Browse the repository at this point in the history
  • Loading branch information
thekaveman authored Oct 25, 2024
2 parents aff55ca + 34aa4a0 commit dfab12a
Show file tree
Hide file tree
Showing 2 changed files with 132 additions and 19 deletions.
36 changes: 32 additions & 4 deletions warehouse/models/mart/benefits/_mart_benefits.yml
Original file line number Diff line number Diff line change
Expand Up @@ -55,8 +55,16 @@ models:
description: UUID
- name: processed_time
description: UTC ISO-8601 timestamp
- name: event_properties_enrollment_method
description: The `enrollment_method` value from the `event_properties` column
- name: event_properties_auth_provider
description: The `auth_provider` value from the `event_properties` column
description: The `claims_provider` value from the `event_properties` column
deprecated:
enabled: true
reason: "This column is deprecated, use `event_properties_claims_provider` instead"
date: "2024-10-10"
- name: event_properties_claims_provider
description: The `claims_provider` value from the `event_properties` column
- name: event_properties_card_tokenize_func
description: The `card_tokenize_func` value from the `event_properties` column
- name: event_properties_card_tokenize_url
Expand All @@ -78,13 +86,27 @@ models:
- name: event_properties_path
description: The `path` value from the `event_properties` column
- name: event_properties_payment_group
description: The `payment_group` value from the `event_properties` column
description: The `enrollment_group` value from the `event_properties` column
deprecated:
enabled: true
reason: "This column is deprecated, use `event_properties_enrollment_group` instead"
date: "2024-10-10"
- name: event_properties_enrollment_group
description: The `enrollment_group` value from the `event_properties` column
- name: event_properties_status
description: The `status` value from the `event_properties` column
- name: event_properties_transit_agency
description: The `transit_agency` value from the `event_properties` column
- name: event_properties_eligibility_types
description: A semi-colon delimited list of `eligibility_types` values from the `event_properties` column
description: A semi-colon delimited list of `enrollment_flows` values from the `event_properties` column
deprecated:
enabled: true
reason: "This column is deprecated, use `event_properties_enrollment_flows` instead"
date: "2024-09-19"
- name: event_properties_enrollment_flows
description: A semi-colon delimited list of `enrollment_flows` values from the `event_properties` column
- name: user_properties_enrollment_method
description: The `enrollment_method` value from the `user_properties` column
- name: user_properties_eligibility_verifier
description: The `eligibility_verifier` value from the `user_properties` column
- name: user_properties_initial_referrer
Expand All @@ -100,4 +122,10 @@ models:
- name: user_properties_user_agent
description: The `user_agent` value from the `user_properties` column
- name: user_properties_eligibility_types
description: A semi-colon delimited list of `eligibility_types` values from the `user_properties` column
description: A semi-colon delimited list of `enrollment_flows` values from the `user_properties` column
deprecated:
enabled: true
reason: "This column is deprecated, use `user_properties_enrollment_flows` instead"
date: "2024-09-19"
- name: user_properties_enrollment_flows
description: A semi-colon delimited list of `enrollment_flows` values from the `user_properties` column
115 changes: 100 additions & 15 deletions warehouse/models/mart/benefits/fct_benefits_events.sql
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,32 @@ WITH fct_benefits_events AS (
client_event_time,
event_id,
session_id,
event_type,
version_name,
case
when event_type = "selected eligibility verifier"
then "selected enrollment flow"
when event_type = "started payment connection"
then "started card tokenization"
when event_type = "closed payment connection" or event_type = "ended card tokenization"
then "finished card tokenization"
else event_type
end as event_type,
-- Fix bug in Docker build process resulting in incorrect version strings
-- https://github.com/cal-itp/benefits/pull/2392
case
when version_name = "2024.7.3.dev0+gcd3b083.d20240731"
then "2024.7.2"
when version_name = "2024.8.2.dev0+g7664917.d20240821"
then "2024.8.1"
when version_name = "2024.9.2.dev0+gadf41b9.d20240909"
then "2024.9.1"
when version_name = "2024.9.3.dev0+gfeb06d2.d20240918"
then "2024.9.2"
when version_name = "2024.9.4.dev0+g861519e.d20240926"
then "2024.9.3"
when version_name = "2024.10.2.dev0+g158e1b0.d20241010"
then "2024.10.1"
else version_name
end as version_name,
os_name,
os_version,
device_family,
Expand All @@ -25,12 +49,27 @@ WITH fct_benefits_events AS (
server_upload_time,
server_received_time,
amplitude_id,
start_version,
-- Fix bug in Docker build process resulting in incorrect version strings
-- https://github.com/cal-itp/benefits/pull/2392
case
when start_version = "2024.7.3.dev0+gcd3b083.d20240731"
then "2024.7.2"
when start_version = "2024.8.2.dev0+g7664917.d20240821"
then "2024.8.1"
when start_version = "2024.9.2.dev0+gadf41b9.d20240909"
then "2024.9.1"
when start_version = "2024.9.3.dev0+gfeb06d2.d20240918"
then "2024.9.2"
when start_version = "2024.9.4.dev0+g861519e.d20240926"
then "2024.9.3"
when start_version = "2024.10.2.dev0+g158e1b0.d20241010"
then "2024.10.1"
else start_version
end as start_version,
uuid,
processed_time,

-- Event Properties (https://app.amplitude.com/data/compiler/Benefits/properties/main/latest/event)
{{ json_extract_column('event_properties', 'auth_provider') }},
{{ json_extract_column('event_properties', 'card_tokenize_func') }},
{{ json_extract_column('event_properties', 'card_tokenize_url') }},
{{ json_extract_column('event_properties', 'eligibility_verifier') }},
Expand All @@ -41,10 +80,36 @@ WITH fct_benefits_events AS (
{{ json_extract_column('event_properties', 'language') }},
{{ json_extract_column('event_properties', 'origin') }},
{{ json_extract_column('event_properties', 'path') }},
{{ json_extract_column('event_properties', 'payment_group') }},
{{ json_extract_column('event_properties', 'status') }},
{{ json_extract_column('event_properties', 'transit_agency') }},
{{ json_extract_flattened_column('event_properties', 'eligibility_types') }},

-- New column `enrollment_method`, historical values should be set to "digital"
-- https://github.com/cal-itp/benefits/pull/2402
COALESCE(
{{ json_extract_column('event_properties', 'enrollment_method', no_alias = true) }},
"digital"
) AS event_properties_enrollment_method,

-- Historical data existed in `auth_provider` but new data is in `claims_provider`
-- https://github.com/cal-itp/benefits/pull/2401
COALESCE(
{{ json_extract_column('event_properties', 'claims_provider', no_alias = true) }},
{{ json_extract_column('event_properties', 'auth_provider', no_alias = true) }}
) AS event_properties_claims_provider,

-- Historical data existed in `eligibility_types` but new data is in `enrollment_flows`
-- https://github.com/cal-itp/benefits/pull/2379
COALESCE(
{{ json_extract_flattened_column('event_properties', 'enrollment_flows', no_alias = true) }},
{{ json_extract_flattened_column('event_properties', 'eligibility_types', no_alias = true) }}
) AS event_properties_enrollment_flows,

-- Historical data existed in `payment_group` but new data is in `enrollment_group`
-- https://github.com/cal-itp/benefits/pull/2391
COALESCE(
{{ json_extract_flattened_column('event_properties', 'enrollment_group', no_alias = true) }},
{{ json_extract_flattened_column('event_properties', 'payment_group', no_alias = true) }}
) AS event_properties_enrollment_group,

-- User Properties (https://app.amplitude.com/data/compiler/Benefits/properties/main/latest/user)
{{ json_extract_column('user_properties', 'eligibility_verifier') }},
Expand All @@ -61,7 +126,20 @@ WITH fct_benefits_events AS (
{{ json_extract_column('user_properties', 'referrer') }},
{{ json_extract_column('user_properties', 'referring_domain') }},
{{ json_extract_column('user_properties', 'user_agent') }},
{{ json_extract_flattened_column('user_properties', 'eligibility_types') }}

-- New column `enrollment_method`, historical values should be set to "digital"
-- https://github.com/cal-itp/benefits/pull/2402
COALESCE(
{{ json_extract_column('user_properties', 'enrollment_method', no_alias = true) }},
"digital"
) AS user_properties_enrollment_method,

-- Historical data existed in `eligibility_types` but new data is in `enrollment_flows`
-- https://github.com/cal-itp/benefits/pull/2379
COALESCE(
{{ json_extract_flattened_column('user_properties', 'enrollment_flows', no_alias = true) }},
{{ json_extract_flattened_column('user_properties', 'eligibility_types', no_alias = true) }}
) AS user_properties_enrollment_flows

FROM {{ ref('stg_amplitude__benefits_events') }}
),
Expand Down Expand Up @@ -92,14 +170,20 @@ fct_old_enrollments AS (
start_version,
uuid,
processed_time,
event_properties_auth_provider,
"digital" as event_properties_enrollment_method,
CASE
WHEN client_event_time < '2022-08-12T07:00:00Z'
THEN "ca-dmv"
WHEN client_event_time >= '2022-08-12T07:00:00Z'
THEN "cdt-logingov"
END as event_properties_claims_provider,
event_properties_card_tokenize_func,
event_properties_card_tokenize_url,
CASE
WHEN client_event_time < '2022-08-12T07:00:00Z'
THEN "DMV"
THEN "ca-dmv"
WHEN client_event_time >= '2022-08-12T07:00:00Z'
THEN "(MST) CDT claims via Login.gov"
THEN "cdt-logingov"
END as event_properties_eligibility_verifier,
event_properties_error_name,
event_properties_error_status,
Expand All @@ -108,23 +192,24 @@ fct_old_enrollments AS (
event_properties_language,
event_properties_origin,
event_properties_path,
"5170d37b-43d5-4049-899c-b4d850e14990" as event_properties_payment_group,
"5170d37b-43d5-4049-899c-b4d850e14990" as event_properties_enrollment_group,
"success" as event_properties_status,
"Monterey-Salinas Transit" as event_properties_transit_agency,
"senior" as event_properties_eligibility_types,
"senior" as event_properties_enrollment_flows,
"digital" as user_properties_enrollment_method,
CASE
WHEN client_event_time < '2022-08-12T07:00:00Z'
THEN "DMV"
THEN "ca-dmv"
WHEN client_event_time >= '2022-08-12T07:00:00Z'
THEN "(MST) CDT claims via Login.gov"
THEN "cdt-logingov"
END as user_properties_eligibility_verifier,
user_properties_initial_referrer,
user_properties_initial_referring_domain,
"Monterey-Salinas Transit" as user_properties_transit_agency,
user_properties_user_agent,
user_properties_referrer,
user_properties_referring_domain,
"senior" as user_properties_eligibility_types
"senior" as user_properties_enrollment_flows
FROM fct_benefits_events
WHERE client_event_time >= '2021-12-08T08:00:00Z'
and client_event_time < '2022-08-29T07:00:00Z'
Expand Down

0 comments on commit dfab12a

Please sign in to comment.