Skip to content

Commit

Permalink
feat: make sure that device_manufacturer is lowered prior to joining
Browse files Browse the repository at this point in the history
  • Loading branch information
kik-kik committed Jan 14, 2025
1 parent cccde89 commit c1afd0f
Show file tree
Hide file tree
Showing 7 changed files with 24 additions and 24 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,7 @@ CREATE OR REPLACE VIEW
`{{ project_id }}.{{ dataset }}.{{ name }}`
AS
SELECT
* EXCEPT (isp) REPLACE(
-- Lower device_manufacturer as in some cases the same manufacturer value has different casing.
LOWER(device_manufacturer) AS device_manufacturer
),
* EXCEPT (isp),
CASE
WHEN LOWER(isp) = "browserstack"
THEN CONCAT("{{ friendly_name }}", " ", isp)
Expand Down Expand Up @@ -60,5 +57,8 @@ SELECT
ELSE
CAST(NULL AS STRING)
END AS device_type,
-- Lower device_manufacturer as in some cases the same manufacturer value has different casing.
-- But keeping the original value to not create a sudden impact on downstream calculations.
LOWER(device_manufacturer) AS device_manufacturer_lowered,
FROM
`{{ project_id }}.{{ dataset }}.baseline_clients_last_seen`
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@
WITH device_manufacturer_counts AS (
SELECT
submission_date,
device_manufacturer,
DENSE_RANK() OVER(PARTITION BY submission_date ORDER BY COUNT(*) DESC) AS manufacturer_rank,
device_manufacturer_lowered,
RANK() OVER(PARTITION BY submission_date ORDER BY COUNT(*) DESC) AS manufacturer_rank,
FROM
`{{ project_id }}.{{ dataset }}.engagement_clients`
WHERE
Expand All @@ -16,7 +16,7 @@ WITH device_manufacturer_counts AS (
{% endraw %}
GROUP BY
submission_date,
device_manufacturer
device_manufacturer_lowered
)

SELECT
Expand All @@ -36,12 +36,12 @@ SELECT
COUNTIF(is_mau) AS mau,
device_type,
-- Bucket device manufacturers with low count prior to aggregation
IF(manufacturer_rank <= 150, device_manufacturer, "other") AS device_manufacturer,
IF(manufacturer_rank <= 150, device_manufacturer_lowered, "other") AS device_manufacturer,
FROM
`{{ project_id }}.{{ dataset }}.engagement_clients`
LEFT JOIN
device_manufacturer_counts
USING(submission_date, device_manufacturer)
USING(submission_date, device_manufacturer_lowered)
WHERE
{% raw %}
{% if is_init() %}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ WITH active_users AS (
is_mau,
is_mobile,
device_type,
device_manufacturer,
device_manufacturer_lowered,
FROM
`{{ project_id }}.{{ dataset }}.active_users`
),
Expand Down Expand Up @@ -65,7 +65,7 @@ SELECT
ELSE 'Unknown'
END AS lifecycle_stage,
device_type,
device_manufacturer,
device_manufacturer_lowered,
FROM
active_users
LEFT JOIN
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ SELECT
normalized_os AS os,
normalized_os_version AS os_version,
device_model,
device_manufacturer,
device_manufacturer_lowered,
is_mobile,
{% for attribution_field in product_attribution_fields %}
attribution.{{ attribution_field }},
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@
WITH device_manufacturer_counts AS (
SELECT
first_seen_date,
device_manufacturer,
DENSE_RANK() OVER(PARTITION BY submission_date ORDER BY COUNT(*) DESC) AS manufacturer_rank,
device_manufacturer_lowered,
RANK() OVER(PARTITION BY first_seen_date ORDER BY COUNT(*) DESC) AS manufacturer_rank,
FROM
`{{ project_id }}.{{ dataset }}.new_profile_clients`
WHERE
Expand All @@ -16,7 +16,7 @@ WITH device_manufacturer_counts AS (
{% endraw %}
GROUP BY
first_seen_date,
device_manufacturer
device_manufacturer_lowered
)

SELECT
Expand All @@ -29,7 +29,7 @@ SELECT
os,
os_version,
-- Bucket device manufacturers with low count prior to aggregation
IF(manufacturer_rank <= 150, device_manufacturer, "other") AS device_manufacturer,
IF(manufacturer_rank <= 150, device_manufacturer_lowered, "other") AS device_manufacturer,
is_mobile,
{% for field in product_attribution_fields.values() if not field.client_only %}
{{ field.name }},
Expand All @@ -40,7 +40,7 @@ FROM
`{{ project_id }}.{{ dataset }}.new_profile_clients`
LEFT JOIN
device_manufacturer_counts
USING(first_seen_date, device_manufacturer)
USING(first_seen_date, device_manufacturer_lowered)
WHERE
{% raw %}
{% if is_init() %}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@
WITH device_manufacturer_counts AS (
SELECT
submission_date,
device_manufacturer,
DENSE_RANK() OVER(PARTITION BY submission_date ORDER BY COUNT(*) DESC) AS manufacturer_rank,
device_manufacturer_lowered,
RANK() OVER(PARTITION BY submission_date ORDER BY COUNT(*) DESC) AS manufacturer_rank,
FROM
`{{ project_id }}.{{ dataset }}.retention_clients`
WHERE
Expand All @@ -18,7 +18,7 @@ WITH device_manufacturer_counts AS (
{% endraw %}
GROUP BY
submission_date,
device_manufacturer
device_manufacturer_lowered
)

SELECT
Expand All @@ -42,12 +42,12 @@ SELECT
COUNTIF(repeat_profile) AS repeat_profiles,
device_type,
-- Bucket device manufacturers with low count prior to aggregation
IF(manufacturer_rank <= 150, device_manufacturer, "other") AS device_manufacturer,
IF(manufacturer_rank <= 150, device_manufacturer_lowered, "other") AS device_manufacturer,
FROM
`{{ project_id }}.{{ dataset }}.retention_clients`
LEFT JOIN
device_manufacturer_counts
USING(submission_date, device_manufacturer)
USING(submission_date, device_manufacturer_lowered)
WHERE
{% raw %}
{% if is_init() %}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ WITH active_users AS (
days_active_bits,
is_mobile,
device_type,
device_manufacturer,
device_manufacturer_lowered,
FROM
`{{ project_id }}.{{ dataset }}.active_users`
),
Expand Down Expand Up @@ -84,7 +84,7 @@ SELECT
ELSE 'Unknown'
END AS lifecycle_stage,
active_users.device_type,
active_users.device_manufacturer,
active_users.device_manufacturer_lowered,
FROM
`{{ project_id }}.{{ dataset }}.baseline_clients_daily` AS clients_daily
INNER JOIN
Expand Down

0 comments on commit c1afd0f

Please sign in to comment.