diff --git a/sql/2024/04/inaccurate-sizes-attribute-impact.sql b/sql/2024/04/inaccurate-sizes-attribute-impact.sql new file mode 100644 index 0000000..f68c880 --- /dev/null +++ b/sql/2024/04/inaccurate-sizes-attribute-impact.sql @@ -0,0 +1,102 @@ +# HTTP Archive query to measure impact of inaccurate sizes attributes per for WordPress sites. +# +# WPP Research, Copyright 2024 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# See https://github.com/GoogleChromeLabs/wpp-research/pull/108 + +DECLARE DATE_TO_QUERY DATE DEFAULT '2024-03-01'; + +CREATE TEMPORARY FUNCTION GET_IMG_SIZES_ACCURACY(custom_metrics STRING) RETURNS + ARRAY> +AS ( + ARRAY( + SELECT AS STRUCT + CAST(JSON_EXTRACT_SCALAR(image, '$.hasSrcset') AS BOOL) AS hasSrcset, + CAST(JSON_EXTRACT_SCALAR(image, '$.hasSizes') AS BOOL) AS hasSizes, + CAST(JSON_EXTRACT_SCALAR(image, '$.sizesAbsoluteError') AS FLOAT64) AS sizesAbsoluteError, + CAST(JSON_EXTRACT_SCALAR(image, '$.sizesRelativeError') AS FLOAT64) AS sizesRelativeError, + CAST(JSON_EXTRACT_SCALAR(image, '$.idealSizesSelectedResourceEstimatedPixels') AS INT64) AS idealSizesSelectedResourceEstimatedPixels, + CAST(JSON_EXTRACT_SCALAR(image, '$.actualSizesEstimatedWastedLoadedPixels') AS INT64) AS actualSizesEstimatedWastedLoadedPixels, + SAFE_DIVIDE( + CAST(JSON_EXTRACT_SCALAR(image, '$.actualSizesEstimatedWastedLoadedPixels') AS INT64), + CAST(JSON_EXTRACT_SCALAR(image, '$.idealSizesSelectedResourceEstimatedPixels') AS INT64) + ) AS relativeSizesEstimatedWastedLoadedPixels, + CAST(JSON_EXTRACT_SCALAR(image, '$.idealSizesSelectedResourceEstimatedBytes') AS FLOAT64) AS idealSizesSelectedResourceEstimatedBytes, + CAST(JSON_EXTRACT_SCALAR(image, '$.actualSizesEstimatedWastedLoadedBytes') AS FLOAT64) AS actualSizesEstimatedWastedLoadedBytes, + SAFE_DIVIDE( + CAST(JSON_EXTRACT_SCALAR(image, '$.actualSizesEstimatedWastedLoadedBytes') AS FLOAT64), + CAST(JSON_EXTRACT_SCALAR(image, '$.idealSizesSelectedResourceEstimatedBytes') AS FLOAT64) + ) AS relativeSizesEstimatedWastedLoadedBytes, + FROM + UNNEST(JSON_EXTRACT_ARRAY(custom_metrics, '$.responsive_images.responsive-images')) AS image + ) +); + +CREATE TEMPORARY FUNCTION IS_CMS(technologies ARRAY, info ARRAY>>, cms STRING, version STRING) RETURNS BOOL AS ( + EXISTS( + SELECT * FROM UNNEST(technologies) AS technology, UNNEST(technology.info) AS info + WHERE technology.technology = cms + AND ( + version = "" + OR ENDS_WITH(version, ".x") AND (STARTS_WITH(info, RTRIM(version, "x")) OR info = RTRIM(version, ".x")) + OR info = version + ) + ) +); + +WITH wordpressSizesData AS ( + SELECT + client, + image + FROM + `httparchive.all.pages`, + UNNEST(GET_IMG_SIZES_ACCURACY(custom_metrics)) AS image + WHERE + date = DATE_TO_QUERY + AND IS_CMS(technologies, 'WordPress', '') + AND is_root_page = TRUE + AND image.hasSrcset = TRUE + AND image.hasSizes = TRUE +) + +SELECT + percentile, + client, + APPROX_QUANTILES(image.sizesAbsoluteError, 100)[OFFSET(percentile)] AS sizesAbsoluteError, + APPROX_QUANTILES(image.sizesRelativeError, 100)[OFFSET(percentile)] AS sizesRelativeError, + APPROX_QUANTILES(image.idealSizesSelectedResourceEstimatedPixels, 100)[OFFSET(percentile)] AS idealSizesSelectedResourceEstimatedPixels, + APPROX_QUANTILES(image.actualSizesEstimatedWastedLoadedPixels, 100)[OFFSET(percentile)] AS actualSizesEstimatedWastedLoadedPixels, + APPROX_QUANTILES(image.relativeSizesEstimatedWastedLoadedPixels, 100)[OFFSET(percentile)] AS relativeSizesEstimatedWastedLoadedPixels, + APPROX_QUANTILES(image.idealSizesSelectedResourceEstimatedBytes, 100)[OFFSET(percentile)] AS idealSizesSelectedResourceEstimatedBytes, + APPROX_QUANTILES(image.actualSizesEstimatedWastedLoadedBytes, 100)[OFFSET(percentile)] AS actualSizesEstimatedWastedLoadedBytes, + APPROX_QUANTILES(image.relativeSizesEstimatedWastedLoadedBytes, 100)[OFFSET(percentile)] AS relativeSizesEstimatedWastedLoadedBytes, +FROM + wordpressSizesData, + UNNEST([10, 20, 30, 40, 50, 60, 70, 80, 90]) AS percentile +GROUP BY + percentile, + client +ORDER BY + client, + percentile diff --git a/sql/README.md b/sql/README.md index 1e455cd..2e7e37a 100644 --- a/sql/README.md +++ b/sql/README.md @@ -23,6 +23,7 @@ For additional considerations for writing BigQuery queries against HTTP Archive, ### 2024/04 * [Diff for Web Vitals passing rates of sites that enabled the Speculation Rules API from one month to the next](./2024/04/web-vitals-diff-for-sites-enabling-speculation-rules.sql) +* [Impact of inaccurate sizes attributes in WordPress](./2024/04/inaccurate-sizes-attribute-impact.sql) ### 2024/01