From 57e53e8dee5cc499a72ece349a66f4174c8af236 Mon Sep 17 00:00:00 2001 From: JerrySentry <142266253+JerrySentry@users.noreply.github.com> Date: Mon, 3 Jun 2024 12:07:38 -0400 Subject: [PATCH] Bundle Analysis: associate past assets to current parsed bundle (#231) * Bundle analysis: add asset association helper * typing change * add unit test * reword comment * typing and comment * refactor associate_previous_assets to helper functions * typing --- shared/bundle_analysis/report.py | 101 +++++++++++++++++- tests/samples/asset_link_curr_a.json | 57 ++++++++++ tests/samples/asset_link_curr_b.json | 57 ++++++++++ tests/samples/asset_link_prev_a.json | 57 ++++++++++ tests/samples/asset_link_prev_b.json | 57 ++++++++++ .../bundle_analysis/test_asset_association.py | 92 ++++++++++++++++ 6 files changed, 420 insertions(+), 1 deletion(-) create mode 100644 tests/samples/asset_link_curr_a.json create mode 100644 tests/samples/asset_link_curr_b.json create mode 100644 tests/samples/asset_link_prev_a.json create mode 100644 tests/samples/asset_link_prev_b.json create mode 100644 tests/unit/bundle_analysis/test_asset_association.py diff --git a/shared/bundle_analysis/report.py b/shared/bundle_analysis/report.py index 9701c182f..da0f6bf7b 100644 --- a/shared/bundle_analysis/report.py +++ b/shared/bundle_analysis/report.py @@ -2,8 +2,9 @@ import os import sqlite3 import tempfile -from typing import Any, Dict, Iterator, Optional +from typing import Any, Dict, Iterator, Optional, Set, Tuple +from sqlalchemy import text from sqlalchemy.exc import OperationalError from sqlalchemy.sql import func @@ -170,6 +171,104 @@ def ingest(self, path: str) -> int: self.db_session.commit() return session_id + def _associate_bundle_report_assets_by_name( + self, curr_bundle_report: BundleReport, prev_bundle_report: BundleReport + ) -> Set[Tuple[str, str]]: + """ + Rule 1 + Returns a set of pairs of UUIDs (the current asset UUID and prev asset UUID) + representing that the curr asset UUID should be updated to the prev asset UUID + because the curr asset has the same hashed name as the previous asset + """ + ret = set() + prev_asset_hashed_names = { + a.hashed_name: a.uuid for a in prev_bundle_report.asset_reports() + } + for curr_asset in curr_bundle_report.asset_reports(): + if curr_asset.asset_type == models.AssetType.JAVASCRIPT: + if curr_asset.hashed_name in prev_asset_hashed_names: + ret.add( + ( + prev_asset_hashed_names[curr_asset.hashed_name], + curr_asset.uuid, + ) + ) + return ret + + def _associate_bundle_report_assets_by_module_names( + self, curr_bundle_report: BundleReport, prev_bundle_report: BundleReport + ) -> Set[Tuple[str, str]]: + """ + Rule 2 + Returns a set of pairs of UUIDs (the current asset UUID and prev asset UUID) + representing that the curr asset UUID should be updated to the prev asset UUID + because there exists a prev asset where all its module names are the same as the + curr asset module names + """ + ret = set() + prev_module_asset_mapping = {} + for prev_asset in prev_bundle_report.asset_reports(): + if prev_asset.asset_type == models.AssetType.JAVASCRIPT: + prev_modules = tuple( + sorted(frozenset([m.name for m in prev_asset.modules()])) + ) + # NOTE: Assume two non-related assets CANNOT have the same set of modules + # though in reality there can be rare cases of this but we + # will deal with that later if it becomes a prevalent problem + prev_module_asset_mapping[prev_modules] = prev_asset.uuid + + for curr_asset in curr_bundle_report.asset_reports(): + if curr_asset.asset_type == models.AssetType.JAVASCRIPT: + curr_modules = tuple( + sorted(frozenset([m.name for m in curr_asset.modules()])) + ) + if curr_modules in prev_module_asset_mapping: + ret.add( + ( + prev_module_asset_mapping[curr_modules], + curr_asset.uuid, + ) + ) + return ret + + def associate_previous_assets( + self, prev_bundle_analysis_report: Any + ) -> "BundleAnalysisReport": + """ + Only associate past asset if it is Javascript or Typescript types + and belonging to the same bundle name + Associated if one of the following is true + Rule 1. Previous and current asset have the same hashed name + Rule 2. Previous and current asset shared the same set of module names + """ + for curr_bundle_report in self.bundle_reports(): + for prev_bundle_report in prev_bundle_analysis_report.bundle_reports(): + if curr_bundle_report.name == prev_bundle_report.name: + associated_assets_found = set() + + # Rule 1 check + associated_assets_found |= ( + self._associate_bundle_report_assets_by_name( + curr_bundle_report, prev_bundle_report + ) + ) + + # Rule 2 check + associated_assets_found |= ( + self._associate_bundle_report_assets_by_module_names( + curr_bundle_report, prev_bundle_report + ) + ) + + # Update the Assets table for the bundle + # TODO: Use SQLalchemy ORM to update instead of raw SQL + # https://github.com/codecov/engineering-team/issues/1846 + for pair in associated_assets_found: + prev_uuid, curr_uuid = pair + stmt = f"UPDATE assets SET uuid='{prev_uuid}' WHERE uuid='{curr_uuid}'" + self.db_session.execute(text(stmt)) + self.db_session.commit() + def metadata(self) -> Dict[models.MetadataKey, Any]: with models.get_db_session(self.db_path) as session: metadata = session.query(models.Metadata).all() diff --git a/tests/samples/asset_link_curr_a.json b/tests/samples/asset_link_curr_a.json new file mode 100644 index 000000000..e479c26b7 --- /dev/null +++ b/tests/samples/asset_link_curr_a.json @@ -0,0 +1,57 @@ +{ + "version": "1", + "builtAt": 1716480551148, + "duration": 294, + "bundleName": "BundleA", + "outputPath": "/vite/dist", + "bundler": { "name": "rollup", "version": "4.16.2" }, + "plugin": { "name": "@codecov/vite-plugin", "version": "0.0.1-beta.8" }, + "assets": [ + { "name": "asset-same-name-diff-modules.js", "size": 4126, "normalized": "asset-*.js" }, + { "name": "asset-diff-name-same-modules-TWO.js", "size": 1421, "normalized": "asset-*.js" }, + { "name": "asset-diff-name-diff-modules-TWO.js", "size": 161, "normalized": "asset-*.js" }, + { "name": "asset-css-A-TWO.css", "size": 4, "normalized": "assets/index-*.css"}, + { "name": "asset-css-B-TWO.css", "size": 5, "normalized": "assets/index-*.css"}, + { "name": "asset-font-A-TWO.woff", "size": 40, "normalized": "assets/index-*.woff"}, + { "name": "asset-font-B-TWO.ttf", "size": 50, "normalized": "assets/index-*.ttf"}, + { "name": "asset-image-A-TWO.jpg", "size": 400, "normalized": "assets/index-*.jpg"}, + { "name": "asset-image-B-TWO.svg", "size": 500, "normalized": "assets/index-*.svg"} + ], + "chunks": [ + { + "id": "asset", + "uniqueId": "0-asset", + "entry": false, + "initial": true, + "files": ["asset-same-name-diff-modules.js"], + "names": ["index"] + }, + { + "id": "asset", + "uniqueId": "1-asset", + "entry": false, + "initial": true, + "files": ["asset-diff-name-same-modules-TWO.js"], + "names": ["asset"] + }, + { + "id": "asset", + "uniqueId": "2-asset", + "entry": true, + "initial": false, + "files": ["asset-diff-name-diff-modules-TWO.js"], + "names": ["asset"] + } + ], + "modules": [ + { "name": "./src/file-aTwo.js", "size": 189, "chunkUniqueIds": ["0-asset"] }, + { "name": "./src/file-bTwo.js", "size": 353, "chunkUniqueIds": ["0-asset"] }, + { "name": "./src/file-cTwo.js", "size": 497, "chunkUniqueIds": ["0-asset"] }, + { "name": "./src/file-d.js", "size": 154, "chunkUniqueIds": ["1-asset"] }, + { "name": "./src/file-e.js", "size": 140, "chunkUniqueIds": ["1-asset"] }, + { "name": "./src/file-f.js", "size": 315, "chunkUniqueIds": ["1-asset"] }, + { "name": "./src/file-sTWO.js", "size": 406, "chunkUniqueIds": ["2-asset"] }, + { "name": "./src/file-tTWO.js", "size": 262, "chunkUniqueIds": ["2-asset"] }, + { "name": "./src/file-uTWO.js", "size": 308, "chunkUniqueIds": ["2-asset"] } + ] +} diff --git a/tests/samples/asset_link_curr_b.json b/tests/samples/asset_link_curr_b.json new file mode 100644 index 000000000..caa5d91c8 --- /dev/null +++ b/tests/samples/asset_link_curr_b.json @@ -0,0 +1,57 @@ +{ + "version": "1", + "builtAt": 1716480551148, + "duration": 294, + "bundleName": "BundleB", + "outputPath": "/vite/dist", + "bundler": { "name": "rollup", "version": "4.16.2" }, + "plugin": { "name": "@codecov/vite-plugin", "version": "0.0.1-beta.8" }, + "assets": [ + { "name": "asset-same-name-diff-modules.js", "size": 4126, "normalized": "asset-*.js" }, + { "name": "asset-diff-name-same-modules-TWO.js", "size": 1421, "normalized": "asset-*.js" }, + { "name": "asset-diff-name-diff-modules-TWO.js", "size": 161, "normalized": "asset-*.js" }, + { "name": "asset-css-A-TWO.css", "size": 4, "normalized": "assets/index-*.css"}, + { "name": "asset-css-B-TWO.css", "size": 5, "normalized": "assets/index-*.css"}, + { "name": "asset-font-A-TWO.woff", "size": 40, "normalized": "assets/index-*.woff"}, + { "name": "asset-font-B-TWO.ttf", "size": 50, "normalized": "assets/index-*.ttf"}, + { "name": "asset-image-A-TWO.jpg", "size": 400, "normalized": "assets/index-*.jpg"}, + { "name": "asset-image-B-TWO.svg", "size": 500, "normalized": "assets/index-*.svg"} + ], + "chunks": [ + { + "id": "asset", + "uniqueId": "0-asset", + "entry": false, + "initial": true, + "files": ["asset-same-name-diff-modules.js"], + "names": ["index"] + }, + { + "id": "asset", + "uniqueId": "1-asset", + "entry": false, + "initial": true, + "files": ["asset-diff-name-same-modules-TWO.js"], + "names": ["asset"] + }, + { + "id": "asset", + "uniqueId": "2-asset", + "entry": true, + "initial": false, + "files": ["asset-diff-name-diff-modules-TWO.js"], + "names": ["asset"] + } + ], + "modules": [ + { "name": "./src/file-aTwo.js", "size": 189, "chunkUniqueIds": ["0-asset"] }, + { "name": "./src/file-bTwo.js", "size": 353, "chunkUniqueIds": ["0-asset"] }, + { "name": "./src/file-cTwo.js", "size": 497, "chunkUniqueIds": ["0-asset"] }, + { "name": "./src/file-d.js", "size": 154, "chunkUniqueIds": ["1-asset"] }, + { "name": "./src/file-e.js", "size": 140, "chunkUniqueIds": ["1-asset"] }, + { "name": "./src/file-f.js", "size": 315, "chunkUniqueIds": ["1-asset"] }, + { "name": "./src/file-sTWO.js", "size": 406, "chunkUniqueIds": ["2-asset"] }, + { "name": "./src/file-tTWO.js", "size": 262, "chunkUniqueIds": ["2-asset"] }, + { "name": "./src/file-uTWO.js", "size": 308, "chunkUniqueIds": ["2-asset"] } + ] +} diff --git a/tests/samples/asset_link_prev_a.json b/tests/samples/asset_link_prev_a.json new file mode 100644 index 000000000..3943e6cef --- /dev/null +++ b/tests/samples/asset_link_prev_a.json @@ -0,0 +1,57 @@ +{ + "version": "1", + "builtAt": 1716480551136, + "duration": 294, + "bundleName": "BundleA", + "outputPath": "/vite/dist", + "bundler": { "name": "rollup", "version": "4.16.2" }, + "plugin": { "name": "@codecov/vite-plugin", "version": "0.0.1-beta.8" }, + "assets": [ + { "name": "asset-same-name-diff-modules.js", "size": 4126, "normalized": "asset-*.js" }, + { "name": "asset-diff-name-same-modules-ONE.js", "size": 1421, "normalized": "asset-*.js" }, + { "name": "asset-diff-name-diff-modules-ONE.js", "size": 161, "normalized": "asset-*.js" }, + { "name": "asset-css-A-ONE.css", "size": 2, "normalized": "assets/index-*.css"}, + { "name": "asset-css-B-ONE.css", "size": 3, "normalized": "assets/index-*.css"}, + { "name": "asset-font-A-ONE.woff", "size": 20, "normalized": "assets/index-*.woff"}, + { "name": "asset-font-B-ONE.ttf", "size": 30, "normalized": "assets/index-*.ttf"}, + { "name": "asset-image-A-ONE.jpg", "size": 200, "normalized": "assets/index-*.jpg"}, + { "name": "asset-image-B-ONE.svg", "size": 300, "normalized": "assets/index-*.svg"} + ], + "chunks": [ + { + "id": "asset", + "uniqueId": "0-asset", + "entry": false, + "initial": true, + "files": ["asset-same-name-diff-modules.js"], + "names": ["index"] + }, + { + "id": "asset", + "uniqueId": "1-asset", + "entry": false, + "initial": true, + "files": ["asset-diff-name-same-modules-ONE.js"], + "names": ["asset"] + }, + { + "id": "asset", + "uniqueId": "2-asset", + "entry": true, + "initial": false, + "files": ["asset-diff-name-diff-modules-ONE.js"], + "names": ["asset"] + } + ], + "modules": [ + { "name": "./src/file-aOne.js", "size": 189, "chunkUniqueIds": ["0-asset"] }, + { "name": "./src/file-bOne.js", "size": 353, "chunkUniqueIds": ["0-asset"] }, + { "name": "./src/file-cOne.js", "size": 497, "chunkUniqueIds": ["0-asset"] }, + { "name": "./src/file-d.js", "size": 154, "chunkUniqueIds": ["1-asset"] }, + { "name": "./src/file-e.js", "size": 140, "chunkUniqueIds": ["1-asset"] }, + { "name": "./src/file-f.js", "size": 315, "chunkUniqueIds": ["1-asset"] }, + { "name": "./src/file-gONE.js", "size": 406, "chunkUniqueIds": ["2-asset"] }, + { "name": "./src/file-hONE.js", "size": 262, "chunkUniqueIds": ["2-asset"] }, + { "name": "./src/file-iONE.js", "size": 308, "chunkUniqueIds": ["2-asset"] } + ] +} diff --git a/tests/samples/asset_link_prev_b.json b/tests/samples/asset_link_prev_b.json new file mode 100644 index 000000000..5a2c4b930 --- /dev/null +++ b/tests/samples/asset_link_prev_b.json @@ -0,0 +1,57 @@ +{ + "version": "1", + "builtAt": 1716480551136, + "duration": 294, + "bundleName": "BundleB", + "outputPath": "/vite/dist", + "bundler": { "name": "rollup", "version": "4.16.2" }, + "plugin": { "name": "@codecov/vite-plugin", "version": "0.0.1-beta.8" }, + "assets": [ + { "name": "asset-same-name-diff-modules.js", "size": 4126, "normalized": "asset-*.js" }, + { "name": "asset-diff-name-same-modules-ONE.js", "size": 1421, "normalized": "asset-*.js" }, + { "name": "asset-diff-name-diff-modules-ONE.js", "size": 161, "normalized": "asset-*.js" }, + { "name": "asset-css-A-ONE.css", "size": 2, "normalized": "assets/index-*.css"}, + { "name": "asset-css-B-ONE.css", "size": 3, "normalized": "assets/index-*.css"}, + { "name": "asset-font-A-ONE.woff", "size": 20, "normalized": "assets/index-*.woff"}, + { "name": "asset-font-B-ONE.ttf", "size": 30, "normalized": "assets/index-*.ttf"}, + { "name": "asset-image-A-ONE.jpg", "size": 200, "normalized": "assets/index-*.jpg"}, + { "name": "asset-image-B-ONE.svg", "size": 300, "normalized": "assets/index-*.svg"} + ], + "chunks": [ + { + "id": "asset", + "uniqueId": "0-asset", + "entry": false, + "initial": true, + "files": ["asset-same-name-diff-modules.js"], + "names": ["index"] + }, + { + "id": "asset", + "uniqueId": "1-asset", + "entry": false, + "initial": true, + "files": ["asset-diff-name-same-modules-ONE.js"], + "names": ["asset"] + }, + { + "id": "asset", + "uniqueId": "2-asset", + "entry": true, + "initial": false, + "files": ["asset-diff-name-diff-modules-ONE.js"], + "names": ["asset"] + } + ], + "modules": [ + { "name": "./src/file-aOne.js", "size": 189, "chunkUniqueIds": ["0-asset"] }, + { "name": "./src/file-bOne.js", "size": 353, "chunkUniqueIds": ["0-asset"] }, + { "name": "./src/file-cOne.js", "size": 497, "chunkUniqueIds": ["0-asset"] }, + { "name": "./src/file-d.js", "size": 154, "chunkUniqueIds": ["1-asset"] }, + { "name": "./src/file-e.js", "size": 140, "chunkUniqueIds": ["1-asset"] }, + { "name": "./src/file-f.js", "size": 315, "chunkUniqueIds": ["1-asset"] }, + { "name": "./src/file-gONE.js", "size": 406, "chunkUniqueIds": ["2-asset"] }, + { "name": "./src/file-hONE.js", "size": 262, "chunkUniqueIds": ["2-asset"] }, + { "name": "./src/file-iONE.js", "size": 308, "chunkUniqueIds": ["2-asset"] } + ] +} diff --git a/tests/unit/bundle_analysis/test_asset_association.py b/tests/unit/bundle_analysis/test_asset_association.py new file mode 100644 index 000000000..957e98335 --- /dev/null +++ b/tests/unit/bundle_analysis/test_asset_association.py @@ -0,0 +1,92 @@ +from pathlib import Path +from typing import Dict +from unittest.mock import patch + +import pytest + +from shared.bundle_analysis import BundleAnalysisReport, BundleAnalysisReportLoader +from shared.bundle_analysis.models import Asset, AssetType, MetadataKey +from shared.storage.exceptions import PutRequestRateLimitError +from shared.storage.memory import MemoryStorageService + +bundle_stats_prev_a_path = ( + Path(__file__).parent.parent.parent / "samples" / "asset_link_prev_a.json" +) + +bundle_stats_prev_b_path = ( + Path(__file__).parent.parent.parent / "samples" / "asset_link_prev_b.json" +) + +bundle_stats_curr_a_path = ( + Path(__file__).parent.parent.parent / "samples" / "asset_link_curr_a.json" +) + +bundle_stats_curr_b_path = ( + Path(__file__).parent.parent.parent / "samples" / "asset_link_curr_b.json" +) + + +def _get_asset_mapping( + bundle_analysis_report: BundleAnalysisReport, bundle_name: Asset +) -> Dict[str, str]: + bundle_report = bundle_analysis_report.bundle_report(bundle_name) + asset_report = list(bundle_report.asset_reports()) + return {asset.hashed_name: asset for asset in asset_report} + + +def test_asset_association(): + try: + prev_bar = BundleAnalysisReport() + prev_bar.ingest(bundle_stats_prev_a_path) + prev_bar.ingest(bundle_stats_prev_b_path) + + prev_a_asset_mapping = _get_asset_mapping(prev_bar, "BundleA") + prev_b_asset_mapping = _get_asset_mapping(prev_bar, "BundleB") + + curr_bar = BundleAnalysisReport() + curr_bar.ingest(bundle_stats_curr_a_path) + curr_bar.ingest(bundle_stats_curr_b_path) + + curr_a_asset_mapping_before = _get_asset_mapping(curr_bar, "BundleA") + curr_b_asset_mapping_before = _get_asset_mapping(curr_bar, "BundleB") + + curr_bar.associate_previous_assets(prev_bar) + + curr_a_asset_mapping_after = _get_asset_mapping(curr_bar, "BundleA") + curr_b_asset_mapping_after = _get_asset_mapping(curr_bar, "BundleB") + + # Check that non javscript asset types didn't have their UUIDs updated + for hashed_name, asset in curr_a_asset_mapping_before.items(): + if asset.asset_type != AssetType.JAVASCRIPT: + assert asset.uuid == curr_a_asset_mapping_after[hashed_name].uuid + for hashed_name, asset in curr_b_asset_mapping_before.items(): + if asset.asset_type != AssetType.JAVASCRIPT: + assert asset.uuid == curr_b_asset_mapping_after[hashed_name].uuid + + # Same name -> asset associated + asset_a = prev_a_asset_mapping["asset-same-name-diff-modules.js"] + assert ( + curr_a_asset_mapping_after["asset-same-name-diff-modules.js"].uuid + == asset_a.uuid + ) + asset_b = prev_b_asset_mapping["asset-same-name-diff-modules.js"] + assert ( + curr_b_asset_mapping_after["asset-same-name-diff-modules.js"].uuid + == asset_b.uuid + ) + + # Diff name, same modules -> asset associated + asset_a = prev_a_asset_mapping["asset-diff-name-same-modules-ONE.js"] + assert curr_a_asset_mapping_after["asset-diff-name-same-modules-TWO.js"] + asset_b = prev_b_asset_mapping["asset-diff-name-same-modules-ONE.js"] + assert curr_b_asset_mapping_after["asset-diff-name-same-modules-TWO.js"] + + # Diff name, diff modules -> asset not associated + asset_a = prev_a_asset_mapping["asset-diff-name-diff-modules-ONE.js"] + assert curr_a_asset_mapping_after["asset-diff-name-diff-modules-TWO.js"] + asset_b = prev_b_asset_mapping["asset-diff-name-diff-modules-ONE.js"] + assert curr_b_asset_mapping_after["asset-diff-name-diff-modules-TWO.js"] + + finally: + prev_bar.cleanup() + curr_bar.cleanup()