diff --git a/dags/bqetl_google_analytics_derived_ga4.py b/dags/bqetl_google_analytics_derived_ga4.py index 24a22ef5895..d1404f078ea 100644 --- a/dags/bqetl_google_analytics_derived_ga4.py +++ b/dags/bqetl_google_analytics_derived_ga4.py @@ -103,8 +103,21 @@ depends_on_past=False, ) + ga_derived__www_site_page_metrics__v2 = bigquery_etl_query( + task_id="ga_derived__www_site_page_metrics__v2", + destination_table="www_site_page_metrics_v2", + dataset_id="ga_derived", + project_id="moz-fx-data-marketing-prod", + owner="kwindau@mozilla.com", + email=["kwindau@mozilla.com", "telemetry-alerts@mozilla.com"], + date_partition_parameter="submission_date", + depends_on_past=False, + ) + ga_derived__www_site_events_metrics__v2.set_upstream(ga_derived__www_site_hits__v2) ga_derived__www_site_landing_page_metrics__v2.set_upstream( ga_derived__www_site_hits__v2 ) + + ga_derived__www_site_page_metrics__v2.set_upstream(ga_derived__www_site_hits__v2) diff --git a/sql/moz-fx-data-marketing-prod/ga/www_site_page_metrics/metadata.yaml b/sql/moz-fx-data-marketing-prod/ga/www_site_page_metrics/metadata.yaml new file mode 100644 index 00000000000..1ae27e6193a --- /dev/null +++ b/sql/moz-fx-data-marketing-prod/ga/www_site_page_metrics/metadata.yaml @@ -0,0 +1,14 @@ +friendly_name: Www Site Page Metrics +description: |- + Please provide a description for the query +owners: [] +labels: {} +bigquery: null +workgroup_access: +- role: roles/bigquery.dataViewer + members: + - workgroup:mozilla-confidential +references: + view.sql: + - moz-fx-data-marketing-prod.ga_derived.www_site_page_metrics_v2 +deprecated: false diff --git a/sql/moz-fx-data-marketing-prod/ga/www_site_page_metrics/view.sql b/sql/moz-fx-data-marketing-prod/ga/www_site_page_metrics/view.sql new file mode 100644 index 00000000000..87cffda1d00 --- /dev/null +++ b/sql/moz-fx-data-marketing-prod/ga/www_site_page_metrics/view.sql @@ -0,0 +1,7 @@ +CREATE OR REPLACE VIEW + `moz-fx-data-marketing-prod.ga.www_site_page_metrics` +AS +SELECT + * +FROM + `moz-fx-data-marketing-prod.ga_derived.www_site_page_metrics_v2` diff --git a/sql/moz-fx-data-marketing-prod/ga_derived/www_site_page_metrics_v2/metadata.yaml b/sql/moz-fx-data-marketing-prod/ga_derived/www_site_page_metrics_v2/metadata.yaml new file mode 100644 index 00000000000..cb45d0ed415 --- /dev/null +++ b/sql/moz-fx-data-marketing-prod/ga_derived/www_site_page_metrics_v2/metadata.yaml @@ -0,0 +1,29 @@ +friendly_name: WWW Site Page Metrics V2 +description: |- + Aggregated metrics per page on www.mozilla.org from Google Analytics 4 +owners: +- kwindau@mozilla.com +labels: + incremental: true + owner1: kwindau + dag: bqetl_google_analytics_derived_ga4 +scheduling: + dag_name: bqetl_google_analytics_derived_ga4 +bigquery: + time_partitioning: + type: day + field: date + require_partition_filter: true + expiration_days: null + clustering: + fields: + - page_name + - locale + - country + - medium +workgroup_access: +- role: roles/bigquery.dataViewer + members: + - workgroup:mozilla-confidential +references: {} +deprecated: false diff --git a/sql/moz-fx-data-marketing-prod/ga_derived/www_site_page_metrics_v2/query.sql b/sql/moz-fx-data-marketing-prod/ga_derived/www_site_page_metrics_v2/query.sql new file mode 100644 index 00000000000..30375057fd8 --- /dev/null +++ b/sql/moz-fx-data-marketing-prod/ga_derived/www_site_page_metrics_v2/query.sql @@ -0,0 +1,187 @@ +--first, calculate the next page view's start time relative to when the session started in seconds using lead +WITH page_view_staging AS ( + SELECT + *, + LEAD(hit_time) OVER ( + PARTITION BY + full_visitor_id, + visit_start_time + ORDER BY + hit_time + ) AS next_pageview, + FROM + `moz-fx-data-marketing-prod.ga_derived.www_site_hits_v2` + WHERE + date = @submission_date + AND hit_type = 'PAGE' +), +--now, subtract current page view start time from next page view start time to get time on current page +page_views_only AS ( + SELECT + a.date, + a.page_path AS page, + a.page_path_level1 AS locale, + a.page_name, + a.page_level_1, + a.page_level_2, + a.page_level_3, + a.page_level_4, + a.page_level_5, + a.device_category, + a.operating_system, + a.language, + a.browser, + a.browser_version, + a.country, + a.source, + a.medium, + a.campaign, + a.ad_content, + SUM(a.next_pageview - a.hit_time) AS total_time_on_page + FROM + page_view_staging AS a + GROUP BY + a.date, + a.page_path, + a.page_path_level1, + a.page_name, + a.page_level_1, + a.page_level_2, + a.page_level_3, + a.page_level_4, + a.page_level_5, + a.device_category, + a.operating_system, + a.language, + a.browser, + a.browser_version, + a.country, + a.source, + a.medium, + a.campaign, + a.ad_content +), +all_events_staging AS ( + SELECT + a.date, + a.page_path AS page, + a.page_path_level1 AS locale, + a.page_name, + a.page_level_1, + a.page_level_2, + a.page_level_3, + a.page_level_4, + a.page_level_5, + a.device_category, + a.operating_system, + a.language, + a.browser, + a.browser_version, + a.country, + a.source, + a.medium, + a.campaign, + a.ad_content, + COUNTIF(a.event_name = 'page_view') AS pageviews, + COUNT( + DISTINCT(CASE WHEN a.event_name = 'page_view' THEN a.visit_identifier ELSE NULL END) + ) AS unique_pageviews, + SUM(a.entrances) AS entrances, + SUM(a.exits) AS exits, + COUNTIF(event_name = 'page_view' AND is_exit IS FALSE) AS non_exit_pageviews, + COUNTIF(hit_type = 'EVENT') AS total_events, + COUNT( + DISTINCT(CASE WHEN hit_type = 'EVENT' THEN visit_identifier ELSE NULL END) + ) AS unique_events, + COUNT( + DISTINCT(CASE WHEN single_page_session IS TRUE THEN visit_identifier ELSE NULL END) + ) AS single_page_sessions, + COUNT( + DISTINCT( + CASE + WHEN bounces = 1 + AND event_name = 'page_view' + THEN visit_identifier + ELSE NULL + END + ) + ) AS bounces + FROM + `moz-fx-data-marketing-prod.ga_derived.www_site_hits_v2` AS a + WHERE + date = @submission_date + GROUP BY + a.date, + a.page_path, + a.page_path_level1, + a.page_name, + a.page_level_1, + a.page_level_2, + a.page_level_3, + a.page_level_4, + a.page_level_5, + a.device_category, + a.operating_system, + a.language, + a.browser, + a.browser_version, + a.country, + a.source, + a.medium, + a.campaign, + a.ad_content +) +--join it all together to get everything plus total time on each page +SELECT + a.date, + a.page, + a.locale, + a.page_name, + a.page_level_1, + a.page_level_2, + a.page_level_3, + a.page_level_4, + a.page_level_5, + a.device_category, + a.operating_system, + a.language, + a.browser, + a.browser_version, + a.country, + a.source, + a.medium, + a.campaign, + a.ad_content, + a.pageviews, + a.unique_pageviews, + a.entrances, + a.exits, + a.non_exit_pageviews, + b.total_time_on_page, + a.total_events, + a.unique_events, + a.single_page_sessions, + a.bounces, +FROM + all_events_staging a +FULL OUTER JOIN + page_views_only b + ON a.date = b.date + AND COALESCE(a.page, '') = COALESCE(b.page, '') + AND COALESCE(a.locale, '') = COALESCE(b.locale, '') + AND COALESCE(a.page_name, '') = COALESCE(b.page_name, '') + AND COALESCE(a.page_level_1, '') = COALESCE(b.page_level_1, '') + AND COALESCE(a.page_level_2, '') = COALESCE(b.page_level_2, '') + AND COALESCE(a.page_level_3, '') = COALESCE(b.page_level_3, '') + AND COALESCE(a.page_level_4, '') = COALESCE(b.page_level_4, '') + AND COALESCE(a.page_level_5, '') = COALESCE(b.page_level_5, '') + AND COALESCE(a.device_category, '') = COALESCE(b.device_category, '') + AND COALESCE(a.operating_system, '') = COALESCE(b.operating_system, '') + AND COALESCE(a.language, '') = COALESCE(b.language, '') + AND COALESCE(a.browser, '') = COALESCE(b.browser, '') + AND COALESCE(a.browser_version, '') = COALESCE(b.browser_version, '') + AND COALESCE(a.country, '') = COALESCE(b.country, '') + AND COALESCE(a.source, '') = COALESCE(b.source, '') + AND COALESCE(a.medium, '') = COALESCE(b.medium, '') + AND COALESCE(a.campaign, '') = COALESCE(b.campaign, '') + AND COALESCE(a.ad_content, '') = COALESCE(b.ad_content, '') diff --git a/sql/moz-fx-data-marketing-prod/ga_derived/www_site_page_metrics_v2/schema.yaml b/sql/moz-fx-data-marketing-prod/ga_derived/www_site_page_metrics_v2/schema.yaml new file mode 100644 index 00000000000..43bc5c7aa05 --- /dev/null +++ b/sql/moz-fx-data-marketing-prod/ga_derived/www_site_page_metrics_v2/schema.yaml @@ -0,0 +1,117 @@ +fields: +- mode: NULLABLE + name: date + type: DATE + description: The date of the visit +- mode: NULLABLE + name: page + type: STRING + description: Page +- mode: NULLABLE + name: locale + type: STRING + description: Locale +- mode: NULLABLE + name: page_name + type: STRING + description: Page Name +- mode: NULLABLE + name: page_level_1 + type: STRING + description: Page Level 1 +- mode: NULLABLE + name: page_level_2 + type: STRING + description: Page Level 2 +- mode: NULLABLE + name: page_level_3 + type: STRING + description: Page Level 3 +- mode: NULLABLE + name: page_level_4 + type: STRING + description: Page Level 4 +- mode: NULLABLE + name: page_level_5 + type: STRING + description: Page Level 5 +- mode: NULLABLE + name: device_category + type: STRING + description: Device Category - The device category the visitor used to visit the site +- mode: NULLABLE + name: operating_system + type: STRING + description: Operating System - The device operating system that the visitor used to visit the site +- mode: NULLABLE + name: language + type: STRING + description: Language +- mode: NULLABLE + name: browser + type: STRING + description: Browser +- mode: NULLABLE + name: browser_version + type: STRING + description: Browser Version +- mode: NULLABLE + name: country + type: STRING + description: Country +- mode: NULLABLE + name: source + type: STRING + description: Source +- mode: NULLABLE + name: medium + type: STRING + description: Medium +- mode: NULLABLE + name: campaign + type: STRING + description: Campaign +- mode: NULLABLE + name: ad_content + type: STRING + description: Ad Content +- mode: NULLABLE + name: pageviews + type: INT64 + description: Page Views +- mode: NULLABLE + name: unique_pageviews + type: INT64 + description: Unique Page Views +- mode: NULLABLE + name: entrances + type: INT64 + description: Entrances +- mode: NULLABLE + name: exits + type: INT64 + description: Exits +- mode: NULLABLE + name: non_exit_pageviews + type: INT64 + description: Non Exit Page Views +- mode: NULLABLE + name: total_time_on_page + type: FLOAT64 + description: Total Time On Page +- mode: NULLABLE + name: total_events + type: INT64 + description: Total Events +- mode: NULLABLE + name: unique_events + type: INT64 + description: Unique Events +- mode: NULLABLE + name: single_page_sessions + type: INT64 + description: Single Page Sessions +- mode: NULLABLE + name: bounces + type: INT64 + description: Bounces