Skip to content

Commit

Permalink
Auto-push due to change on main branch [ci skip]
Browse files Browse the repository at this point in the history
  • Loading branch information
CircleCI generate-sql job committed Feb 2, 2024
1 parent d63abbf commit 72dfbd4
Show file tree
Hide file tree
Showing 6 changed files with 367 additions and 0 deletions.
13 changes: 13 additions & 0 deletions dags/bqetl_google_analytics_derived_ga4.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,8 +103,21 @@
depends_on_past=False,
)

ga_derived__www_site_page_metrics__v2 = bigquery_etl_query(
task_id="ga_derived__www_site_page_metrics__v2",
destination_table="www_site_page_metrics_v2",
dataset_id="ga_derived",
project_id="moz-fx-data-marketing-prod",
owner="[email protected]",
email=["[email protected]", "[email protected]"],
date_partition_parameter="submission_date",
depends_on_past=False,
)

ga_derived__www_site_events_metrics__v2.set_upstream(ga_derived__www_site_hits__v2)

ga_derived__www_site_landing_page_metrics__v2.set_upstream(
ga_derived__www_site_hits__v2
)

ga_derived__www_site_page_metrics__v2.set_upstream(ga_derived__www_site_hits__v2)
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
friendly_name: Www Site Page Metrics
description: |-
Please provide a description for the query
owners: []
labels: {}
bigquery: null
workgroup_access:
- role: roles/bigquery.dataViewer
members:
- workgroup:mozilla-confidential
references:
view.sql:
- moz-fx-data-marketing-prod.ga_derived.www_site_page_metrics_v2
deprecated: false
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
CREATE OR REPLACE VIEW
`moz-fx-data-marketing-prod.ga.www_site_page_metrics`
AS
SELECT
*
FROM
`moz-fx-data-marketing-prod.ga_derived.www_site_page_metrics_v2`
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
friendly_name: WWW Site Page Metrics V2
description: |-
Aggregated metrics per page on www.mozilla.org from Google Analytics 4
owners:
- [email protected]
labels:
incremental: true
owner1: kwindau
dag: bqetl_google_analytics_derived_ga4
scheduling:
dag_name: bqetl_google_analytics_derived_ga4
bigquery:
time_partitioning:
type: day
field: date
require_partition_filter: true
expiration_days: null
clustering:
fields:
- page_name
- locale
- country
- medium
workgroup_access:
- role: roles/bigquery.dataViewer
members:
- workgroup:mozilla-confidential
references: {}
deprecated: false
Original file line number Diff line number Diff line change
@@ -0,0 +1,187 @@
--first, calculate the next page view's start time relative to when the session started in seconds using lead
WITH page_view_staging AS (
SELECT
*,
LEAD(hit_time) OVER (
PARTITION BY
full_visitor_id,
visit_start_time
ORDER BY
hit_time
) AS next_pageview,
FROM
`moz-fx-data-marketing-prod.ga_derived.www_site_hits_v2`
WHERE
date = @submission_date
AND hit_type = 'PAGE'
),
--now, subtract current page view start time from next page view start time to get time on current page
page_views_only AS (
SELECT
a.date,
a.page_path AS page,
a.page_path_level1 AS locale,
a.page_name,
a.page_level_1,
a.page_level_2,
a.page_level_3,
a.page_level_4,
a.page_level_5,
a.device_category,
a.operating_system,
a.language,
a.browser,
a.browser_version,
a.country,
a.source,
a.medium,
a.campaign,
a.ad_content,
SUM(a.next_pageview - a.hit_time) AS total_time_on_page
FROM
page_view_staging AS a
GROUP BY
a.date,
a.page_path,
a.page_path_level1,
a.page_name,
a.page_level_1,
a.page_level_2,
a.page_level_3,
a.page_level_4,
a.page_level_5,
a.device_category,
a.operating_system,
a.language,
a.browser,
a.browser_version,
a.country,
a.source,
a.medium,
a.campaign,
a.ad_content
),
all_events_staging AS (
SELECT
a.date,
a.page_path AS page,
a.page_path_level1 AS locale,
a.page_name,
a.page_level_1,
a.page_level_2,
a.page_level_3,
a.page_level_4,
a.page_level_5,
a.device_category,
a.operating_system,
a.language,
a.browser,
a.browser_version,
a.country,
a.source,
a.medium,
a.campaign,
a.ad_content,
COUNTIF(a.event_name = 'page_view') AS pageviews,
COUNT(
DISTINCT(CASE WHEN a.event_name = 'page_view' THEN a.visit_identifier ELSE NULL END)
) AS unique_pageviews,
SUM(a.entrances) AS entrances,
SUM(a.exits) AS exits,
COUNTIF(event_name = 'page_view' AND is_exit IS FALSE) AS non_exit_pageviews,
COUNTIF(hit_type = 'EVENT') AS total_events,
COUNT(
DISTINCT(CASE WHEN hit_type = 'EVENT' THEN visit_identifier ELSE NULL END)
) AS unique_events,
COUNT(
DISTINCT(CASE WHEN single_page_session IS TRUE THEN visit_identifier ELSE NULL END)
) AS single_page_sessions,
COUNT(
DISTINCT(
CASE
WHEN bounces = 1
AND event_name = 'page_view'
THEN visit_identifier
ELSE NULL
END
)
) AS bounces
FROM
`moz-fx-data-marketing-prod.ga_derived.www_site_hits_v2` AS a
WHERE
date = @submission_date
GROUP BY
a.date,
a.page_path,
a.page_path_level1,
a.page_name,
a.page_level_1,
a.page_level_2,
a.page_level_3,
a.page_level_4,
a.page_level_5,
a.device_category,
a.operating_system,
a.language,
a.browser,
a.browser_version,
a.country,
a.source,
a.medium,
a.campaign,
a.ad_content
)
--join it all together to get everything plus total time on each page
SELECT
a.date,
a.page,
a.locale,
a.page_name,
a.page_level_1,
a.page_level_2,
a.page_level_3,
a.page_level_4,
a.page_level_5,
a.device_category,
a.operating_system,
a.language,
a.browser,
a.browser_version,
a.country,
a.source,
a.medium,
a.campaign,
a.ad_content,
a.pageviews,
a.unique_pageviews,
a.entrances,
a.exits,
a.non_exit_pageviews,
b.total_time_on_page,
a.total_events,
a.unique_events,
a.single_page_sessions,
a.bounces,
FROM
all_events_staging a
FULL OUTER JOIN
page_views_only b
ON a.date = b.date
AND COALESCE(a.page, '') = COALESCE(b.page, '')
AND COALESCE(a.locale, '') = COALESCE(b.locale, '')
AND COALESCE(a.page_name, '') = COALESCE(b.page_name, '')
AND COALESCE(a.page_level_1, '') = COALESCE(b.page_level_1, '')
AND COALESCE(a.page_level_2, '') = COALESCE(b.page_level_2, '')
AND COALESCE(a.page_level_3, '') = COALESCE(b.page_level_3, '')
AND COALESCE(a.page_level_4, '') = COALESCE(b.page_level_4, '')
AND COALESCE(a.page_level_5, '') = COALESCE(b.page_level_5, '')
AND COALESCE(a.device_category, '') = COALESCE(b.device_category, '')
AND COALESCE(a.operating_system, '') = COALESCE(b.operating_system, '')
AND COALESCE(a.language, '') = COALESCE(b.language, '')
AND COALESCE(a.browser, '') = COALESCE(b.browser, '')
AND COALESCE(a.browser_version, '') = COALESCE(b.browser_version, '')
AND COALESCE(a.country, '') = COALESCE(b.country, '')
AND COALESCE(a.source, '') = COALESCE(b.source, '')
AND COALESCE(a.medium, '') = COALESCE(b.medium, '')
AND COALESCE(a.campaign, '') = COALESCE(b.campaign, '')
AND COALESCE(a.ad_content, '') = COALESCE(b.ad_content, '')
Original file line number Diff line number Diff line change
@@ -0,0 +1,117 @@
fields:
- mode: NULLABLE
name: date
type: DATE
description: The date of the visit
- mode: NULLABLE
name: page
type: STRING
description: Page
- mode: NULLABLE
name: locale
type: STRING
description: Locale
- mode: NULLABLE
name: page_name
type: STRING
description: Page Name
- mode: NULLABLE
name: page_level_1
type: STRING
description: Page Level 1
- mode: NULLABLE
name: page_level_2
type: STRING
description: Page Level 2
- mode: NULLABLE
name: page_level_3
type: STRING
description: Page Level 3
- mode: NULLABLE
name: page_level_4
type: STRING
description: Page Level 4
- mode: NULLABLE
name: page_level_5
type: STRING
description: Page Level 5
- mode: NULLABLE
name: device_category
type: STRING
description: Device Category - The device category the visitor used to visit the site
- mode: NULLABLE
name: operating_system
type: STRING
description: Operating System - The device operating system that the visitor used to visit the site
- mode: NULLABLE
name: language
type: STRING
description: Language
- mode: NULLABLE
name: browser
type: STRING
description: Browser
- mode: NULLABLE
name: browser_version
type: STRING
description: Browser Version
- mode: NULLABLE
name: country
type: STRING
description: Country
- mode: NULLABLE
name: source
type: STRING
description: Source
- mode: NULLABLE
name: medium
type: STRING
description: Medium
- mode: NULLABLE
name: campaign
type: STRING
description: Campaign
- mode: NULLABLE
name: ad_content
type: STRING
description: Ad Content
- mode: NULLABLE
name: pageviews
type: INT64
description: Page Views
- mode: NULLABLE
name: unique_pageviews
type: INT64
description: Unique Page Views
- mode: NULLABLE
name: entrances
type: INT64
description: Entrances
- mode: NULLABLE
name: exits
type: INT64
description: Exits
- mode: NULLABLE
name: non_exit_pageviews
type: INT64
description: Non Exit Page Views
- mode: NULLABLE
name: total_time_on_page
type: FLOAT64
description: Total Time On Page
- mode: NULLABLE
name: total_events
type: INT64
description: Total Events
- mode: NULLABLE
name: unique_events
type: INT64
description: Unique Events
- mode: NULLABLE
name: single_page_sessions
type: INT64
description: Single Page Sessions
- mode: NULLABLE
name: bounces
type: INT64
description: Bounces

0 comments on commit 72dfbd4

Please sign in to comment.