From 2de116256f031de6d7775d136b66df3c14a7c760 Mon Sep 17 00:00:00 2001 From: cdelabre-pass <103637148+cdelabre-pass@users.noreply.github.com> Date: Fri, 27 Dec 2024 18:11:27 +0100 Subject: [PATCH] (DE-830) feat(dbt): add diversity metric in dbt --- .../column__metric.md | 0 .../metric/column_diversity_booking.md | 9 ++ ...on__int_metric__diversity_daily_booking.md | 22 +++++ .../int_metric__diversity_daily_booking.yaml | 24 +++++ .../int_metric__diversity_daily_booking.sql | 91 +++++++++++++++++++ .../diversity/int_metric__diversity_score.sql | 8 ++ 6 files changed, 154 insertions(+) rename orchestration/dags/data_gcp_dbt/models/_dbt_docs/glossary/{applicative_database => metric}/column__metric.md (100%) create mode 100644 orchestration/dags/data_gcp_dbt/models/_dbt_docs/glossary/metric/column_diversity_booking.md create mode 100644 orchestration/dags/data_gcp_dbt/models/_dbt_docs/models/intermediate/metric/diversity/description__int_metric__diversity_daily_booking.md create mode 100644 orchestration/dags/data_gcp_dbt/models/intermediate/metric/diversity/_schema/int_metric__diversity_daily_booking.yaml create mode 100644 orchestration/dags/data_gcp_dbt/models/intermediate/metric/diversity/int_metric__diversity_daily_booking.sql create mode 100644 orchestration/dags/data_gcp_dbt/models/intermediate/metric/diversity/int_metric__diversity_score.sql diff --git a/orchestration/dags/data_gcp_dbt/models/_dbt_docs/glossary/applicative_database/column__metric.md b/orchestration/dags/data_gcp_dbt/models/_dbt_docs/glossary/metric/column__metric.md similarity index 100% rename from orchestration/dags/data_gcp_dbt/models/_dbt_docs/glossary/applicative_database/column__metric.md rename to orchestration/dags/data_gcp_dbt/models/_dbt_docs/glossary/metric/column__metric.md diff --git a/orchestration/dags/data_gcp_dbt/models/_dbt_docs/glossary/metric/column_diversity_booking.md b/orchestration/dags/data_gcp_dbt/models/_dbt_docs/glossary/metric/column_diversity_booking.md new file mode 100644 index 0000000000..4620f85ce3 --- /dev/null +++ b/orchestration/dags/data_gcp_dbt/models/_dbt_docs/glossary/metric/column_diversity_booking.md @@ -0,0 +1,9 @@ +# Diversity Booking +description: Description of the `diversity booking` columns. +title: Diversity Booking +--- + +{% docs column_diversity__booking_entity_rank %} The rank of a booking entity for a user, determined by the order of booking creation. {% enddocs %} +{% docs column__diversity_booked_entity_type %} The type of entity booked, which can be one of several categories such as OFFER_CATEGORY, VENUE_TYPE, OFFER_SUBCATEGORY, VENUE, or EXTRA_CATEGORY. {% enddocs %} +{% docs column__diversity_booked_entity %} The specific entity booked, which can be an offer category ID, venue type label, offer subcategory ID, venue ID, or extra category. {% enddocs %} +{% docs column__diversity_score %} A score assigned to a booking based on its rank and entity type, with a multiplier applied {% enddocs %} diff --git a/orchestration/dags/data_gcp_dbt/models/_dbt_docs/models/intermediate/metric/diversity/description__int_metric__diversity_daily_booking.md b/orchestration/dags/data_gcp_dbt/models/_dbt_docs/models/intermediate/metric/diversity/description__int_metric__diversity_daily_booking.md new file mode 100644 index 0000000000..e53827f77f --- /dev/null +++ b/orchestration/dags/data_gcp_dbt/models/_dbt_docs/models/intermediate/metric/diversity/description__int_metric__diversity_daily_booking.md @@ -0,0 +1,22 @@ +--- +title: Diversification of Cultural Practices +description: Description of the `int_metric__diversity_daily_booking` table. +--- + +{% docs description__int_metric__diversity_daily_booking %} + +The `int_metric__diversity_daily_booking` table captures the diversification of cultural practices within the pass Culture application. Diversification is defined by booking an offer with different characteristics from those previously booked, indicating a cultural discovery by the user. + +For each reservation, the analyzed characteristics include: + +- **Diversity in category**: from book to cinema, from live performance to music. +- **Diversity in subcategory**: from comic book to detective novel, from drama to comedy. +- **Diversity in genre**: from science fiction to fantasy, from thriller to romance. +- **Diversity in place**: from an independent bookstore to a large network, from a cinema to a performance hall. +- **Diversity in type of place**: from a museum to a library, from a theater to a concert hall. + +{% enddocs %} + +## Table description + +{% docs table__int_metric__diversity_daily_booking %}{% enddocs %} diff --git a/orchestration/dags/data_gcp_dbt/models/intermediate/metric/diversity/_schema/int_metric__diversity_daily_booking.yaml b/orchestration/dags/data_gcp_dbt/models/intermediate/metric/diversity/_schema/int_metric__diversity_daily_booking.yaml new file mode 100644 index 0000000000..56151bd722 --- /dev/null +++ b/orchestration/dags/data_gcp_dbt/models/intermediate/metric/diversity/_schema/int_metric__diversity_daily_booking.yaml @@ -0,0 +1,24 @@ +version: 2 + +models: + - name: int_metric__diversity_daily_booking + description: "{{ doc('description__int_metric__diversity_daily_booking') }}" + columns: + - name: booking_id + description: "{{ doc('column__booking_id') }}" + - name: booking_created_at + description: "{{ doc('column__booking_created_at') }}" + - name: booking_creation_date + description: "{{ doc('column__booking_creation_date') }}" + - name: user_id + description: "{{ doc('column__user_id') }}" + - name: booking_rank + description: "{{ doc('column__booking_rank') }}" + - name: diversity_booking_entity_rank + description: "{{ doc('column_diversity__booking_entity_rank') }}" + - name: diversity_booked_entity_type + description: "{{ doc('column__diversity_booked_entity_type') }}" + - name: diversity_booked_entity + description: "{{ doc('column__diversity_booked_entity') }}" + - name: diversity_score + description: "{{ doc('column__diversity_score') }}" diff --git a/orchestration/dags/data_gcp_dbt/models/intermediate/metric/diversity/int_metric__diversity_daily_booking.sql b/orchestration/dags/data_gcp_dbt/models/intermediate/metric/diversity/int_metric__diversity_daily_booking.sql new file mode 100644 index 0000000000..397e20231f --- /dev/null +++ b/orchestration/dags/data_gcp_dbt/models/intermediate/metric/diversity/int_metric__diversity_daily_booking.sql @@ -0,0 +1,91 @@ +{{ + config( + **custom_incremental_config( + incremental_strategy="insert_overwrite", + partition_by={"field": "booking_creation_date", "data_type": "date"}, + ) + ) +}} + +{% set entities = [ + { + "entity": "offer_category_id", + "type": "OFFER_CATEGORY", + "score_multiplier": 25, + }, + { + "entity": "venue_type_label", + "type": "VENUE_TYPE", + "score_multiplier": 20, + }, + { + "entity": "offer_subcategory_id", + "type": "OFFER_SUBCATEGORY", + "score_multiplier": 10, + }, + {"entity": "venue_id", "type": "VENUE", "score_multiplier": 5}, + { + "entity": "extra_category", + "type": "EXTRA_CATEGORY", + "score_multiplier": 5, + }, +] %} + +with + raw_data as ( + select + booking_id, + booking_created_at, + booking_creation_date, + user_id, + offer_subcategory_id, + venue_type_label, + offer_category_id, + venue_id, + coalesce(offer_type_label, venue_id) as extra_category, -- venue_id is used as extra_category when offer_type_label is null + row_number() over ( + partition by user_id order by booking_created_at + ) as booking_rank + from {{ ref("int_global__booking") }} + where booking_status != 'CANCELLED' + ), + + entity_calculations as ( + {% for entity in entities %} + select distinct + booking_id, + booking_created_at, + booking_creation_date, + booking_rank, + user_id, + {{ entity.entity }} as diversity_booked_entity, + '{{ entity.type }}' as diversity_booked_entity_type, + {{ entity.score_multiplier }} as score_multiplier, + row_number() over ( + partition by user_id, {{ entity.entity }} + order by booking_created_at + ) as diversity_booking_entity_rank + from raw_data + {% if not loop.last %} + union all + {% endif %} + {% endfor %} + ) + +select + booking_rank, + booking_id, + booking_created_at, + diversity_booking_entity_rank, + diversity_booked_entity_type, + diversity_booked_entity, + user_id, + booking_creation_date, + case + when booking_rank = 1 + then score_multiplier + when diversity_booking_entity_rank = 1 + then score_multiplier + else 0 + end as diversity_score +from entity_calculations diff --git a/orchestration/dags/data_gcp_dbt/models/intermediate/metric/diversity/int_metric__diversity_score.sql b/orchestration/dags/data_gcp_dbt/models/intermediate/metric/diversity/int_metric__diversity_score.sql new file mode 100644 index 0000000000..acbfce4ec6 --- /dev/null +++ b/orchestration/dags/data_gcp_dbt/models/intermediate/metric/diversity/int_metric__diversity_score.sql @@ -0,0 +1,8 @@ +select + booking_id, + booking_creation_date, + booking_created_at, + user_id, + sum(diversity_score) as diversity_score +from {{ ref("int_metric__diversity_daily_booking") }} +group by booking_id, booking_creation_date, booking_created_at, user_id