Skip to content

Commit

Permalink
Bundle Analysis: Routing report and comparisons (#447)
Browse files Browse the repository at this point in the history
* Bundle Analysis: Build BundleRouteReport

* refactor some stuff from parsers

* add route comparison service

* code review changes
  • Loading branch information
JerrySentry authored Dec 5, 2024
1 parent 3b22b03 commit 130c885
Show file tree
Hide file tree
Showing 11 changed files with 917 additions and 17 deletions.
2 changes: 2 additions & 0 deletions shared/bundle_analysis/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
MissingBaseReportError,
MissingBundleError,
MissingHeadReportError,
RouteChange,
)
from shared.bundle_analysis.parser import Parser
from shared.bundle_analysis.report import (
Expand All @@ -33,4 +34,5 @@
"ModuleReport",
"BundleAnalysisReportLoader",
"StoragePaths",
"RouteChange",
]
142 changes: 130 additions & 12 deletions shared/bundle_analysis/comparison.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from dataclasses import dataclass
from enum import Enum
from functools import cached_property
from typing import Iterator, List, MutableSet, Optional, Tuple
from typing import Dict, Iterator, List, MutableSet, Optional, Tuple

import sentry_sdk

Expand All @@ -12,6 +12,7 @@
AssetReport,
BundleAnalysisReport,
BundleReport,
BundleRouteReport,
)
from shared.bundle_analysis.storage import BundleAnalysisReportLoader
from shared.django_apps.core.models import Repository
Expand All @@ -33,36 +34,47 @@ class MissingBundleError(Exception):


@dataclass(frozen=True)
class BundleChange:
class BaseChange:
"""
Info about how a bundle has changed between two different reports.
Base class for representing changes between two different reports.
"""

class ChangeType(Enum):
ADDED = "added"
REMOVED = "removed"
CHANGED = "changed"

bundle_name: str
change_type: ChangeType
size_delta: int


@dataclass(frozen=True)
class BundleChange(BaseChange):
"""
Info about how a bundle has changed between two different reports.
"""

bundle_name: str
percentage_delta: float


@dataclass(frozen=True)
class AssetChange:
class RouteChange(BaseChange):
"""
Info about how an asset has changed between two different reports.
Info about how a bundle route has changed between two different reports.
"""

class ChangeType(Enum):
ADDED = "added"
REMOVED = "removed"
CHANGED = "changed"
route_name: str
percentage_delta: float


@dataclass(frozen=True)
class AssetChange(BaseChange):
"""
Info about how an asset has changed between two different reports.
"""

asset_name: str
change_type: ChangeType
size_delta: int


AssetMatch = Tuple[Optional[AssetReport], Optional[AssetReport]]
Expand Down Expand Up @@ -176,6 +188,68 @@ def _match_assets(
return matches


class BundleRoutesComparison:
"""
Compares all routes of two bundle route reports for a given bundle
"""

def __init__(
self,
base_report: BundleRouteReport,
head_report: BundleRouteReport,
):
self.base_report = base_report
self.head_report = head_report

@sentry_sdk.trace
def size_changes(self) -> List[RouteChange]:
"""
Returns a list of changes for each unique route that exists between the base and head.
If a route exists on base but not head that is considered "removed" and -100% percentage delta
If a route exists on head but not base that is considered "added" and +100% percentage delta
Otherwise it is considered "changed" and percentage delta = (diff_size / base_size) * 100
"""
base_sizes = self.base_report.get_sizes()
head_sizes = self.head_report.get_sizes()

all_routes, results = base_sizes.keys() | head_sizes.keys(), []
for route_name in all_routes:
# Added new route
if route_name not in base_sizes or base_sizes[route_name] == 0:
results.append(
RouteChange(
route_name=route_name,
change_type=RouteChange.ChangeType.ADDED,
size_delta=head_sizes[route_name],
percentage_delta=100,
)
)
# Removed old route
elif route_name not in head_sizes:
results.append(
RouteChange(
route_name=route_name,
change_type=RouteChange.ChangeType.REMOVED,
size_delta=-base_sizes[route_name],
percentage_delta=-100.0,
)
)
# Changed
else:
size_delta = head_sizes[route_name] - base_sizes[route_name]
percentage_delta = round((size_delta / base_sizes[route_name]) * 100, 2)
results.append(
RouteChange(
route_name=route_name,
change_type=RouteChange.ChangeType.CHANGED,
size_delta=size_delta,
percentage_delta=percentage_delta,
)
)

return results


class BundleAnalysisComparison:
"""
Compares two different bundle analysis reports.
Expand Down Expand Up @@ -310,3 +384,47 @@ def bundle_comparison(self, bundle_name: str) -> BundleComparison:
if base_bundle_report is None or head_bundle_report is None:
raise MissingBundleError()
return BundleComparison(base_bundle_report, head_bundle_report)

@sentry_sdk.trace
def bundle_routes_changes(self) -> Dict[str, List[RouteChange]]:
"""
Comparison for all the routes available to a pair of bundles.
"""
comparison_mapping = {}
base_bundle_reports = {
bundle_report.name: bundle_report.full_route_report()
for bundle_report in self.base_report.bundle_reports()
}
head_bundle_reports = {
bundle_report.name: bundle_report.full_route_report()
for bundle_report in self.head_report.bundle_reports()
}

# Combine all bundle route reports with base and head. If either don't exist
# then it will be set as None in the comparison param.
bundle_names = base_bundle_reports.keys() | head_bundle_reports.keys()
comparison_mapping = {
name: BundleRoutesComparison(
base_bundle_reports.get(name), head_bundle_reports.get(name)
).size_changes()
for name in bundle_names
}

return comparison_mapping

@sentry_sdk.trace
def bundle_routes_changes_by_bundle(self, bundle_name: str) -> List[RouteChange]:
"""
Comparison for all the routes available to a pair of bundles.
"""
base_bundle_report = self.base_report.bundle_report(bundle_name)
head_bundle_report = self.head_report.bundle_report(bundle_name)
if base_bundle_report is None or head_bundle_report is None:
raise MissingBundleError()

base_route_report = base_bundle_report.full_route_report()
head_route_report = head_bundle_report.full_route_report()

return BundleRoutesComparison(
base_route_report, head_route_report
).size_changes()
18 changes: 13 additions & 5 deletions shared/bundle_analysis/parsers/v3.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import re
import uuid
from collections import defaultdict
from typing import List, Tuple
from typing import Dict, List, Tuple

import ijson
import sentry_sdk
Expand Down Expand Up @@ -99,7 +99,7 @@ def reset(self):
self.module_list = []

# dynamic imports: mapping between Chunk and each file name of its dynamic imports
self.dynamic_imports_mapping = defaultdict(
self.dynamic_import_file_names_by_chunk = defaultdict(
list
) # typing: Dict[Chunk, List[str]]

Expand Down Expand Up @@ -284,7 +284,7 @@ def _parse_chunks_event(self, prefix: str, event: str, value: str):
elif prefix == "chunks.item.files.item":
self.chunk_asset_names.append(value)
elif prefix == "chunks.item.dynamicImports.item":
self.dynamic_imports_mapping[self.chunk].append(value)
self.dynamic_import_file_names_by_chunk[self.chunk].append(value)
elif (prefix, event) == ("chunks.item", "end_map"):
self.chunk_list.append(self.chunk)

Expand Down Expand Up @@ -322,9 +322,17 @@ def _parse_modules_event(self, prefix: str, event: str, value: str):
self.module = None
self.module_chunk_unique_external_ids = []

def _parse_dynamic_imports(self) -> List[dict]:
def _parse_dynamic_imports(self) -> List[Dict[str, int]]:
"""
Computes all the dynamic imports that needs to be inserted to the DB
Returns a list of dictionary objects representing the insert params
[{
"chunk_id": chunk.id,
"asset_id": asset.id,
}]
"""
dynamic_imports_list = []
for chunk, filenames in self.dynamic_imports_mapping.items():
for chunk, filenames in self.dynamic_import_file_names_by_chunk.items():
imported_assets = {}
for filename in filenames:
try:
Expand Down
93 changes: 93 additions & 0 deletions shared/bundle_analysis/report.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,14 @@
import os
import sqlite3
import tempfile
from collections import defaultdict, deque
from typing import Any, Dict, Iterator, List, Optional, Set, Tuple

import sentry_sdk
from sqlalchemy import asc, desc, text
from sqlalchemy.exc import OperationalError
from sqlalchemy.orm import Session as DbSession
from sqlalchemy.orm import aliased
from sqlalchemy.orm.query import Query
from sqlalchemy.sql import func
from sqlalchemy.sql.functions import coalesce
Expand All @@ -21,6 +23,7 @@
AssetType,
Bundle,
Chunk,
DynamicImport,
Metadata,
MetadataKey,
Module,
Expand Down Expand Up @@ -114,6 +117,57 @@ def routes(self) -> Optional[List[str]]:

return list(routes)

def dynamically_imported_assets(self) -> List["AssetReport"]:
"""
Returns all dynamically imported assets of the current Asset.
This is retrieving by querying all unique Assets in the DynamicImport
model for each Chunk of the current Asset.
"""
with get_db_session(self.db_path) as session:
# Reattach self.asset to the current session to avoid DetachedInstanceError
asset = session.merge(self.asset)

# Alias the chunks table for the Asset.chunks relationship
asset_chunks = aliased(Chunk)

assets = (
session.query(Asset)
.distinct()
.join(DynamicImport, DynamicImport.asset_id == Asset.id)
.join(Chunk, DynamicImport.chunk_id == Chunk.id)
.join(asset_chunks, asset_chunks.id == DynamicImport.chunk_id)
.filter(asset_chunks.id.in_([chunk.id for chunk in asset.chunks]))
)

return (
AssetReport(self.db_path, asset, self.bundle_info)
for asset in assets.all()
)


class BundleRouteReport:
"""
Report wrapper for asset route analytics. Mainly used for BundleRouteComparison
Stores a dictionary
keys: all routes of the bundle
values: a list of distinct Assets (as AssetReports) that is associated with the route
"""

def __init__(self, db_path: str, data: Dict[str, List[AssetReport]]):
self.db_path = db_path
self.data = data

def get_sizes(self) -> Dict[str, int]:
results = {}
for route, asset_reports in self.data.items():
results[route] = sum([asset.size for asset in asset_reports])
return results

def get_size(self, route: str) -> Optional[int]:
if route not in self.data:
return None
return sum([asset.size for asset in self.data[route]])


class BundleReport:
"""
Expand Down Expand Up @@ -243,6 +297,45 @@ def is_cached(self) -> bool:
result = session.query(Bundle).filter(Bundle.id == self.bundle.id).first()
return result.is_cached

def routes(self) -> Dict[str, List[AssetReport]]:
"""
Returns a mapping of routes and all Assets (as AssetReports) that belongs to it
Note that this ignores dynamically imported Assets (ie only the direct asset)
"""
route_map = defaultdict(list)
for asset_report in self.asset_reports():
for route in asset_report.routes():
route_map[route].append(asset_report)
return route_map

@sentry_sdk.trace
def full_route_report(self) -> BundleRouteReport:
"""
A more powerful routes function that will additionally associate dynamically
imported Assets into the belonging route. Also this function returns a
BundleRouteReport object as this will be used for comparison and additional
data manipulation.
"""
return_data = defaultdict(list) # typing: Dict[str, List[AssetReport]]
for route, asset_reports in self.routes().items():
# Implements a graph traversal algorithm to get all nodes (Asset) linked by edges
# represented as DynamicImport.
visited_asset_ids = set()
unique_assets = []

# For each Asset get all the dynamic imported Asset that we will need to traverse into
to_be_processed_asset = deque(asset_reports)
while to_be_processed_asset:
current_asset = to_be_processed_asset.popleft()
if current_asset.id not in visited_asset_ids:
visited_asset_ids.add(current_asset.id)
unique_assets.append(current_asset)
to_be_processed_asset += current_asset.dynamically_imported_assets()

# Add all the assets found to the route we were processing
return_data[route] = unique_assets
return BundleRouteReport(self.db_path, return_data)


class BundleAnalysisReport:
"""
Expand Down
Loading

0 comments on commit 130c885

Please sign in to comment.