From 83e2627ba39d3c5acb5687be9701ea636bbed4ff Mon Sep 17 00:00:00 2001 From: m-d-bowerman Date: Fri, 11 Oct 2024 06:45:57 -0700 Subject: [PATCH 1/4] create serp_events_clients_daily table and view --- .../search/serp_events_clients_daily/view.sql | 7 ++ .../metadata.yaml | 20 ++++ .../serp_events_clients_daily_v1/query.sql | 55 ++++++++++ .../serp_events_clients_daily_v1/schema.yaml | 100 ++++++++++++++++++ 4 files changed, 182 insertions(+) create mode 100644 sql/moz-fx-data-shared-prod/search/serp_events_clients_daily/view.sql create mode 100644 sql/moz-fx-data-shared-prod/search_derived/serp_events_clients_daily_v1/metadata.yaml create mode 100644 sql/moz-fx-data-shared-prod/search_derived/serp_events_clients_daily_v1/query.sql create mode 100644 sql/moz-fx-data-shared-prod/search_derived/serp_events_clients_daily_v1/schema.yaml diff --git a/sql/moz-fx-data-shared-prod/search/serp_events_clients_daily/view.sql b/sql/moz-fx-data-shared-prod/search/serp_events_clients_daily/view.sql new file mode 100644 index 00000000000..485926e28f8 --- /dev/null +++ b/sql/moz-fx-data-shared-prod/search/serp_events_clients_daily/view.sql @@ -0,0 +1,7 @@ +CREATE OR REPLACE VIEW + `moz-fx-data-shared-prod.search.serp_events_clients_daily` +AS +SELECT + * +FROM + `moz-fx-data-shared-prod.search_derived.serp_events_clients_daily_v1` diff --git a/sql/moz-fx-data-shared-prod/search_derived/serp_events_clients_daily_v1/metadata.yaml b/sql/moz-fx-data-shared-prod/search_derived/serp_events_clients_daily_v1/metadata.yaml new file mode 100644 index 00000000000..7290235a046 --- /dev/null +++ b/sql/moz-fx-data-shared-prod/search_derived/serp_events_clients_daily_v1/metadata.yaml @@ -0,0 +1,20 @@ +friendly_name: SERP Events Clients Daily +description: |- + Aggregation of the desktop SERP Events data to the client-daily level. +owners: +- mozilla/revenue_forecasting_data_reviewers +labels: + incremental: true + schedule: daily + dag: bqetl_search_dashboard +scheduling: + dag_name: bqetl_search_dashboard +bigquery: + time_partitioning: + type: day + field: submission_date + require_partition_filter: true + expiration_days: null + range_partitioning: null + clustering: null +references: {} diff --git a/sql/moz-fx-data-shared-prod/search_derived/serp_events_clients_daily_v1/query.sql b/sql/moz-fx-data-shared-prod/search_derived/serp_events_clients_daily_v1/query.sql new file mode 100644 index 00000000000..2edfd1ec3d0 --- /dev/null +++ b/sql/moz-fx-data-shared-prod/search_derived/serp_events_clients_daily_v1/query.sql @@ -0,0 +1,55 @@ +SELECT + submission_date, + glean_client_id, + legacy_telemetry_client_id, + profile_group_id, + `moz-fx-data-shared-prod`.udf.normalize_search_engine(search_engine) AS partner, + 'desktop' AS device, + normalized_country_code, + normalized_channel, + os, + browser_version_info.major_version AS browser_major_version, + browser_version_info.minor_version AS browser_minor_version, + ANY_VALUE(experiments) AS experiments, + LOGICAL_OR(ad_blocker_inferred) AS ad_blocker_inferred, + COUNT( + DISTINCT IF( + REGEXP_CONTAINS(sap_source, 'urlbar') + OR sap_source IN ('searchbar', 'contextmenu', 'webextension', 'system'), + impression_id, + NULL + ) + ) AS sap, + COUNTIF(is_tagged) AS tagged_sap, + COUNTIF(is_tagged AND REGEXP_CONTAINS(sap_source, 'follow_on')) AS tagged_follow_on, + SUM(num_ad_clicks) AS ad_click, + COUNTIF(num_ads_visible > 0) AS search_with_ads, + COUNTIF(NOT is_tagged) AS organic, + SUM(IF(NOT is_tagged, num_ad_clicks, 0)) AS ad_click_organic, + COUNTIF(num_ads_visible > 0 AND NOT is_tagged) AS search_with_ads_organic, + -- serp_events does not have distribution ID or partner codes to calculate monetizable SAP + COUNTIF(ad_blocker_inferred) AS sap_with_ad_blocker_inferred, + SUM(num_ads_visible) AS num_ads_visible, + SUM(num_ads_blocked) AS num_ads_blocked, + SUM(num_ads_notshowing) AS num_ads_notshowing, + COUNTIF(abandon_reason IS NOT NULL) AS num_abandoned_serp +FROM + `moz-fx-data-shared-prod.firefox_desktop.serp_events` +WHERE + {% if is_init() %} + submission_date >= '2023-07-14' + {% else %} + submission_date = @submission_date + {% endif %} +GROUP BY + submission_date, + glean_client_id, + legacy_telemetry_client_id, + profile_group_id, + partner, + device, + normalized_country_code, + normalized_channel, + os, + browser_major_version, + browser_minor_version diff --git a/sql/moz-fx-data-shared-prod/search_derived/serp_events_clients_daily_v1/schema.yaml b/sql/moz-fx-data-shared-prod/search_derived/serp_events_clients_daily_v1/schema.yaml new file mode 100644 index 00000000000..e93d193076c --- /dev/null +++ b/sql/moz-fx-data-shared-prod/search_derived/serp_events_clients_daily_v1/schema.yaml @@ -0,0 +1,100 @@ +fields: +- mode: NULLABLE + name: submission_date + type: DATE +- mode: NULLABLE + name: glean_client_id + type: STRING +- mode: NULLABLE + name: legacy_telemetry_client_id + type: STRING +- mode: NULLABLE + name: profile_group_id + type: STRING +- mode: NULLABLE + name: partner + type: STRING +- mode: NULLABLE + name: device + type: STRING +- mode: NULLABLE + name: normalized_country_code + type: STRING +- mode: NULLABLE + name: normalized_channel + type: STRING +- mode: NULLABLE + name: os + type: STRING +- mode: NULLABLE + name: browser_major_version + type: NUMERIC +- mode: NULLABLE + name: browser_minor_version + type: NUMERIC +- fields: + - mode: NULLABLE + name: key + type: STRING + - fields: + - mode: NULLABLE + name: branch + type: STRING + - fields: + - mode: NULLABLE + name: type + type: STRING + - mode: NULLABLE + name: enrollment_id + type: STRING + mode: NULLABLE + name: extra + type: RECORD + mode: NULLABLE + name: value + type: RECORD + mode: REPEATED + name: experiments + type: RECORD +- mode: NULLABLE + name: ad_blocker_inferred + type: BOOLEAN +- mode: NULLABLE + name: sap + type: INTEGER +- mode: NULLABLE + name: tagged_sap + type: INTEGER +- mode: NULLABLE + name: tagged_follow_on + type: INTEGER +- mode: NULLABLE + name: ad_click + type: INTEGER +- mode: NULLABLE + name: search_with_ads + type: INTEGER +- mode: NULLABLE + name: organic + type: INTEGER +- mode: NULLABLE + name: ad_click_organic + type: INTEGER +- mode: NULLABLE + name: search_with_ads_organic + type: INTEGER +- mode: NULLABLE + name: sap_with_ad_blocker_inferred + type: INTEGER +- mode: NULLABLE + name: num_ads_visible + type: INTEGER +- mode: NULLABLE + name: num_ads_blocked + type: INTEGER +- mode: NULLABLE + name: num_ads_notshowing + type: INTEGER +- mode: NULLABLE + name: num_abandoned_serp + type: INTEGER From 730e05d2b3ab0728f446e77aacc99bd89dea2725 Mon Sep 17 00:00:00 2001 From: m-d-bowerman Date: Fri, 11 Oct 2024 12:27:32 -0700 Subject: [PATCH 2/4] submission date filter to account for lag --- .../search_derived/serp_events_clients_daily_v1/query.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/moz-fx-data-shared-prod/search_derived/serp_events_clients_daily_v1/query.sql b/sql/moz-fx-data-shared-prod/search_derived/serp_events_clients_daily_v1/query.sql index 2edfd1ec3d0..fd666ee1843 100644 --- a/sql/moz-fx-data-shared-prod/search_derived/serp_events_clients_daily_v1/query.sql +++ b/sql/moz-fx-data-shared-prod/search_derived/serp_events_clients_daily_v1/query.sql @@ -39,7 +39,7 @@ WHERE {% if is_init() %} submission_date >= '2023-07-14' {% else %} - submission_date = @submission_date + submission_date = DATE_SUB(@submission_date, INTERVAL 1 DAY) {% endif %} GROUP BY submission_date, From b019c56acede56497f87f730c9c48e2c753ba2a3 Mon Sep 17 00:00:00 2001 From: m-d-bowerman Date: Fri, 11 Oct 2024 12:29:08 -0700 Subject: [PATCH 3/4] remove init templating --- .../search_derived/serp_events_clients_daily_v1/query.sql | 4 ---- 1 file changed, 4 deletions(-) diff --git a/sql/moz-fx-data-shared-prod/search_derived/serp_events_clients_daily_v1/query.sql b/sql/moz-fx-data-shared-prod/search_derived/serp_events_clients_daily_v1/query.sql index fd666ee1843..6808faede92 100644 --- a/sql/moz-fx-data-shared-prod/search_derived/serp_events_clients_daily_v1/query.sql +++ b/sql/moz-fx-data-shared-prod/search_derived/serp_events_clients_daily_v1/query.sql @@ -36,11 +36,7 @@ SELECT FROM `moz-fx-data-shared-prod.firefox_desktop.serp_events` WHERE - {% if is_init() %} - submission_date >= '2023-07-14' - {% else %} submission_date = DATE_SUB(@submission_date, INTERVAL 1 DAY) - {% endif %} GROUP BY submission_date, glean_client_id, From 0487b8832673698afc01cf6a3ad63e1c5a9d8b08 Mon Sep 17 00:00:00 2001 From: m-d-bowerman Date: Fri, 11 Oct 2024 12:29:29 -0700 Subject: [PATCH 4/4] query format --- .../search_derived/serp_events_clients_daily_v1/query.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/moz-fx-data-shared-prod/search_derived/serp_events_clients_daily_v1/query.sql b/sql/moz-fx-data-shared-prod/search_derived/serp_events_clients_daily_v1/query.sql index 6808faede92..75dc3c48dc7 100644 --- a/sql/moz-fx-data-shared-prod/search_derived/serp_events_clients_daily_v1/query.sql +++ b/sql/moz-fx-data-shared-prod/search_derived/serp_events_clients_daily_v1/query.sql @@ -36,7 +36,7 @@ SELECT FROM `moz-fx-data-shared-prod.firefox_desktop.serp_events` WHERE - submission_date = DATE_SUB(@submission_date, INTERVAL 1 DAY) + submission_date = DATE_SUB(@submission_date, INTERVAL 1 DAY) GROUP BY submission_date, glean_client_id,