Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Tech Report: Audit pass rates #49

Merged
merged 37 commits into from
Feb 14, 2025
Merged
Show file tree
Hide file tree
Changes from 35 commits
Commits
Show all changes
37 commits
Select commit Hold shift + click to select a range
cf33544
versions
max-ostapenko Jan 12, 2025
547f63e
tech filter
max-ostapenko Jan 12, 2025
e1f0e60
Merge branch 'main' into central-flyingfish
max-ostapenko Jan 14, 2025
3ec247c
Merge branch 'main' into central-flyingfish
max-ostapenko Jan 21, 2025
65acf68
Merge branch 'main' into central-flyingfish
max-ostapenko Jan 26, 2025
e8580d0
new table with versions
max-ostapenko Jan 26, 2025
bfac4f6
typo
max-ostapenko Jan 26, 2025
ac2f597
versions table
max-ostapenko Jan 26, 2025
bdd46b8
fix
max-ostapenko Jan 26, 2025
23864b9
no retries
max-ostapenko Jan 26, 2025
4d42453
tech_report_* tables
max-ostapenko Jan 26, 2025
4dd3f9b
clusters renamed
max-ostapenko Jan 26, 2025
8032aab
lint
max-ostapenko Jan 26, 2025
a41ed32
adjust export config
max-ostapenko Jan 26, 2025
2aec142
fix clustering
max-ostapenko Jan 26, 2025
396d664
origin renamed
max-ostapenko Jan 27, 2025
e9b666e
deduplicated good_cwv
max-ostapenko Jan 27, 2025
ff2f5a4
Merge branch 'main' into central-flyingfish
max-ostapenko Jan 27, 2025
58eea31
include minor
max-ostapenko Jan 30, 2025
747a18f
Merge branch 'main' into main
max-ostapenko Jan 30, 2025
8c0455c
Merge branch 'central-flyingfish' into central-flyingfish
max-ostapenko Jan 30, 2025
c88ef18
fix
max-ostapenko Jan 30, 2025
3268e28
Merge branch 'central-flyingfish' into central-flyingfish
max-ostapenko Jan 30, 2025
bd07f78
cleanup
max-ostapenko Jan 30, 2025
5967524
pattern fix
max-ostapenko Jan 30, 2025
146978d
Merge branch 'central-flyingfish' into central-flyingfish
max-ostapenko Jan 30, 2025
7ff9151
tech detections only
max-ostapenko Jan 31, 2025
718e3c4
fix
max-ostapenko Jan 31, 2025
34a4bb7
relaxed pattern
max-ostapenko Jan 31, 2025
15e5555
Merge branch 'main' into central-flyingfish
max-ostapenko Feb 1, 2025
8bde2fb
remove similar_technologies
max-ostapenko Feb 1, 2025
55529ec
audits pass rates
max-ostapenko Feb 2, 2025
0ac00e9
pass_rate
max-ostapenko Feb 2, 2025
5b51bb5
fix
max-ostapenko Feb 2, 2025
c810c25
Merge branch 'main' into equal-hornet
max-ostapenko Feb 14, 2025
1b3e2ec
drop PWA
max-ostapenko Feb 14, 2025
7ac201e
Merge branch 'main' into equal-hornet
max-ostapenko Feb 14, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
242 changes: 177 additions & 65 deletions definitions/output/reports/tech_crux.js
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,31 @@ CREATE TEMP FUNCTION IS_NON_ZERO(
) RETURNS BOOL AS (
good + needs_improvement + poor > 0
);

CREATE TEMP FUNCTION get_passed_audits(lighthouse JSON)
RETURNS ARRAY<STRUCT<
category STRING,
id STRING
>>
LANGUAGE js AS """
const results = []

for (const category of Object.keys(lighthouse?.categories ? lighthouse.categories : {})) {
for (const audit of lighthouse.categories[category].auditRefs) {
if (
lighthouse.audits[audit.id].score === 1 &&
!['metrics', 'hidden'].includes(audit.group)
) {
results.push({
category,
id: audit.id
})
}
}
}

return results;
""";
`).query(ctx => `
WITH pages AS (
SELECT
Expand Down Expand Up @@ -172,7 +197,6 @@ technologies AS (
WHERE
tech.technology IS NOT NULL


UNION ALL

SELECT
Expand All @@ -183,26 +207,7 @@ technologies AS (
FROM pages
),

categories AS (
SELECT
tech.technology,
ARRAY_TO_STRING(ARRAY_AGG(DISTINCT category IGNORE NULLS ORDER BY category), ', ') AS category
FROM pages,
UNNEST(technologies) AS tech,
UNNEST(tech.categories) AS category
GROUP BY technology

UNION ALL

SELECT
'ALL' AS technology,
ARRAY_TO_STRING(ARRAY_AGG(DISTINCT category IGNORE NULLS ORDER BY category), ', ') AS category
FROM pages,
UNNEST(technologies) AS tech,
UNNEST(tech.categories) AS category
),

lab_metrics AS (
lab_data AS (
SELECT
client,
page,
Expand All @@ -218,13 +223,34 @@ lab_metrics AS (
FROM pages
),

lab_data AS (
audits AS (
SELECT DISTINCT
client,
root_page,
technology,
version,
audit_category,
audit_id
FROM (
SELECT
client,
page,
root_page,
audits.category AS audit_category,
audits.id AS audit_id
FROM pages
INNER JOIN UNNEST(get_passed_audits(pages.lighthouse)) AS audits
) AS audits_data
INNER JOIN technologies
USING (client, page)
),

lab_metrics AS (
SELECT
client,
root_page,
technology,
version,
ANY_VALUE(category) AS category,
AVG(bytesTotal) AS bytesTotal,
AVG(bytesJS) AS bytesJS,
AVG(bytesImg) AS bytesImg,
Expand All @@ -233,16 +259,130 @@ lab_data AS (
AVG(performance) AS performance,
AVG(pwa) AS pwa,
AVG(seo) AS seo
FROM lab_metrics
FROM lab_data
INNER JOIN technologies
USING (client, page)
INNER JOIN categories
USING (technology)
GROUP BY
client,
root_page,
technology,
version
),

origins_summary AS (
SELECT
geo,
client,
rank,
technology,
version,
COUNT(DISTINCT root_page) AS origins
FROM lab_metrics
INNER JOIN crux
USING (client, root_page)
GROUP BY
geo,
client,
rank,
technology,
version

),


audits_summary AS (
SELECT
geo,
client,
rank,
technology,
version,
ARRAY_AGG(STRUCT(
audit_category AS category,
audit_id AS id,
SAFE_DIVIDE(audits.origins, origins_summary.origins) AS pass_rate
)) AS audits
FROM (
SELECT
geo,
client,
rank,
technology,
version,
audit_category,
audit_id,
COUNT(DISTINCT root_page) AS origins
FROM audits
INNER JOIN crux
USING (client, root_page)
GROUP BY
geo,
client,
rank,
technology,
version,
audit_category,
audit_id
) AS audits
LEFT JOIN origins_summary
USING (geo, client, rank, technology, version)
GROUP BY
geo,
client,
rank,
technology,
version
),

other_summary AS (
SELECT
geo,
client,
rank,
technology,
version,

STRUCT(
COUNTIF(good_fid) AS origins_with_good_fid,
COUNTIF(good_cls) AS origins_with_good_cls,
COUNTIF(good_lcp) AS origins_with_good_lcp,
COUNTIF(good_fcp) AS origins_with_good_fcp,
COUNTIF(good_ttfb) AS origins_with_good_ttfb,
COUNTIF(good_inp) AS origins_with_good_inp,
COUNTIF(any_fid) AS origins_with_any_fid,
COUNTIF(any_cls) AS origins_with_any_cls,
COUNTIF(any_lcp) AS origins_with_any_lcp,
COUNTIF(any_fcp) AS origins_with_any_fcp,
COUNTIF(any_ttfb) AS origins_with_any_ttfb,
COUNTIF(any_inp) AS origins_with_any_inp,
COUNTIF(good_cwv) AS origins_with_good_cwv,
COUNTIF(any_lcp AND any_cls) AS origins_eligible_for_cwv,
SAFE_DIVIDE(COUNTIF(good_cwv), COUNTIF(any_lcp AND any_cls)) AS pct_eligible_origins_with_good_cwv
) AS crux,

STRUCT(
SAFE_CAST(APPROX_QUANTILES(accessibility, 1000)[OFFSET(500)] AS NUMERIC) AS accessibility,
SAFE_CAST(APPROX_QUANTILES(best_practices, 1000)[OFFSET(500)] AS NUMERIC) AS practices,
SAFE_CAST(APPROX_QUANTILES(performance, 1000)[OFFSET(500)] AS NUMERIC) AS performance,
SAFE_CAST(APPROX_QUANTILES(pwa, 1000)[OFFSET(500)] AS NUMERIC) AS pwa,
SAFE_CAST(APPROX_QUANTILES(seo, 1000)[OFFSET(500)] AS NUMERIC) AS seo
) AS median_lighthouse_score,

STRUCT(
SAFE_CAST(APPROX_QUANTILES(bytesTotal, 1000)[OFFSET(500)] AS INT64) AS total,
SAFE_CAST(APPROX_QUANTILES(bytesJS, 1000)[OFFSET(500)] AS INT64) AS js,
SAFE_CAST(APPROX_QUANTILES(bytesImg, 1000)[OFFSET(500)] AS INT64) AS images
) AS median_page_weight_bytes

FROM lab_metrics
INNER JOIN crux
USING (client, root_page)
GROUP BY
geo,
client,
rank,
technology,
version
)

SELECT
Expand All @@ -252,44 +392,16 @@ SELECT
rank,
technology,
version,
COUNT(DISTINCT root_page) AS origins,

# CrUX data
COUNTIF(good_fid) AS origins_with_good_fid,
COUNTIF(good_cls) AS origins_with_good_cls,
COUNTIF(good_lcp) AS origins_with_good_lcp,
COUNTIF(good_fcp) AS origins_with_good_fcp,
COUNTIF(good_ttfb) AS origins_with_good_ttfb,
COUNTIF(good_inp) AS origins_with_good_inp,
COUNTIF(any_fid) AS origins_with_any_fid,
COUNTIF(any_cls) AS origins_with_any_cls,
COUNTIF(any_lcp) AS origins_with_any_lcp,
COUNTIF(any_fcp) AS origins_with_any_fcp,
COUNTIF(any_ttfb) AS origins_with_any_ttfb,
COUNTIF(any_inp) AS origins_with_any_inp,
COUNTIF(good_cwv) AS origins_with_good_cwv,
COUNTIF(any_lcp AND any_cls) AS origins_eligible_for_cwv,
SAFE_DIVIDE(COUNTIF(good_cwv), COUNTIF(any_lcp AND any_cls)) AS pct_eligible_origins_with_good_cwv,

# Lighthouse data
SAFE_CAST(APPROX_QUANTILES(accessibility, 1000)[OFFSET(500)] AS NUMERIC) AS median_lighthouse_score_accessibility,
SAFE_CAST(APPROX_QUANTILES(best_practices, 1000)[OFFSET(500)] AS NUMERIC) AS median_lighthouse_score_best_practices,
SAFE_CAST(APPROX_QUANTILES(performance, 1000)[OFFSET(500)] AS NUMERIC) AS median_lighthouse_score_performance,
SAFE_CAST(APPROX_QUANTILES(pwa, 1000)[OFFSET(500)] AS NUMERIC) AS median_lighthouse_score_pwa,
SAFE_CAST(APPROX_QUANTILES(seo, 1000)[OFFSET(500)] AS NUMERIC) AS median_lighthouse_score_seo,

# Page weight stats
SAFE_CAST(APPROX_QUANTILES(bytesTotal, 1000)[OFFSET(500)] AS INT64) AS median_bytes_total,
SAFE_CAST(APPROX_QUANTILES(bytesJS, 1000)[OFFSET(500)] AS INT64) AS median_bytes_js,
SAFE_CAST(APPROX_QUANTILES(bytesImg, 1000)[OFFSET(500)] AS INT64) AS median_bytes_image

FROM lab_data
INNER JOIN crux
USING (client, root_page)
GROUP BY
geo,
client,
rank,
technology,
version
# Metrics
origins,
crux,
median_lighthouse_score,
median_page_weight_bytes,
audits
FROM origins_summary
LEFT JOIN other_summary
USING (geo, client, rank, technology, version)
LEFT JOIN audits_summary
USING (geo, client, rank, technology, version)
`)
89 changes: 89 additions & 0 deletions definitions/output/reports/tech_report_audits.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
const pastMonth = constants.fnPastMonth(constants.currentMonth)

publish('tech_report_audits', {
schema: 'reports',
type: 'incremental',
protected: true,
bigquery: {
partitionBy: 'date',
clusterBy: ['rank', 'geo']
},
tags: ['tech_report']
}).preOps(ctx => `
CREATE TEMP FUNCTION GET_AUDITS(
records ARRAY<STRUCT<
client STRING,
audits ARRAY<STRUCT<
category STRING,
id STRING,
pass_rate FLOAT64
>>
>>
)
RETURNS ARRAY<STRUCT<
category STRING,
id STRING,
mobile STRUCT<
pass_rate FLOAT64
>,
desktop STRUCT<
pass_rate FLOAT64
>
>>
LANGUAGE js AS '''
// Create a map to accumulate audits based on a unique key (category + id).
var auditMap = {};

// Loop over each record.
records.forEach(function(record) {
// Loop over each audit in the record.
record.audits.forEach(function(audit) {
// Create a unique key for combining audits.
var key = audit.category + '|' + audit.id;
// Initialize the audit in the map if not present.
if (!auditMap[key]) {
auditMap[key] = {
category: audit.category,
id: audit.id,
mobile: { pass_rate: 0 },
desktop: { pass_rate: 0 }
};
}
// Add the pass_rate to the proper client type.
if (record.client === 'mobile') {
auditMap[key].mobile.pass_rate += audit.pass_rate;
} else if (record.client === 'desktop') {
auditMap[key].desktop.pass_rate += audit.pass_rate;
}
});
});

// Convert the map into an array of audits.
return Object.keys(auditMap).map(function(key) {
return auditMap[key];
});
''';

DELETE FROM ${ctx.self()}
WHERE date = '${pastMonth}';
`).query(ctx => `
/* {"dataform_trigger": "tech_report_complete", "date": "${pastMonth}", "name": "audits", "type": "report"} */
SELECT
date,
geo,
rank,
technology,
version,
GET_AUDITS(ARRAY_AGG(STRUCT(
client,
audits
))) AS audits
FROM ${ctx.ref('reports', 'tech_crux')}
WHERE date = '${pastMonth}'
GROUP BY
date,
geo,
rank,
technology,
version
`)
Loading
Loading