From 2e7c5a73039561a6af1074d9f8bef3783386db8e Mon Sep 17 00:00:00 2001 From: Andrew Svetlov Date: Fri, 15 Nov 2024 13:32:49 +0100 Subject: [PATCH 01/30] hyperscan --- aiohttp/web_urldispatcher.py | 231 +++++++++++++++++++++++++--------- requirements/runtime-deps.in | 1 + requirements/runtime-deps.txt | 2 + setup.cfg | 2 + 4 files changed, 175 insertions(+), 61 deletions(-) diff --git a/aiohttp/web_urldispatcher.py b/aiohttp/web_urldispatcher.py index 65de218612f..ee88727de3d 100644 --- a/aiohttp/web_urldispatcher.py +++ b/aiohttp/web_urldispatcher.py @@ -1,9 +1,12 @@ +from __future__ import annotations + import abc import asyncio import base64 import functools import hashlib import html +import itertools import keyword import os import re @@ -41,6 +44,7 @@ from .abc import AbstractMatchInfo, AbstractRouter, AbstractView from .helpers import DEBUG from .http import HttpVersion11 +from .log import web_logger from .typedefs import Handler, PathLike from .web_exceptions import ( HTTPException, @@ -68,6 +72,14 @@ ) +try: + import hyperscan + + HAS_HYPERSCAN = True +except ImportError: + HAS_HYPERSCAN = False + + if TYPE_CHECKING: from .web_app import Application @@ -104,23 +116,23 @@ class _InfoDict(TypedDict, total=False): directory: Path prefix: str - routes: Mapping[str, "AbstractRoute"] + routes: Mapping[str, AbstractRoute] - app: "Application" + app: Application domain: str - rule: "AbstractRuleMatching" + rule: AbstractRuleMatching http_exception: HTTPException class AbstractResource(Sized, Iterable["AbstractRoute"]): - def __init__(self, *, name: Optional[str] = None) -> None: + def __init__(self, *, name: str | None = None) -> None: self._name = name @property - def name(self) -> Optional[str]: + def name(self) -> str | None: return self._name @property @@ -166,10 +178,10 @@ class AbstractRoute(abc.ABC): def __init__( self, method: str, - handler: Union[Handler, Type[AbstractView]], + handler: Handler | type[AbstractView], *, - expect_handler: Optional[_ExpectHandler] = None, - resource: Optional[AbstractResource] = None, + expect_handler: _ExpectHandler | None = None, + resource: AbstractResource | None = None, ) -> None: if expect_handler is None: expect_handler = _default_expect_handler @@ -207,11 +219,11 @@ def handler(self) -> Handler: @property @abc.abstractmethod - def name(self) -> Optional[str]: + def name(self) -> str | None: """Optional route's name, always equals to resource's name.""" @property - def resource(self) -> Optional[AbstractResource]: + def resource(self) -> AbstractResource | None: return self._resource @abc.abstractmethod @@ -222,7 +234,7 @@ def get_info(self) -> _InfoDict: def url_for(self, *args: str, **kwargs: str) -> URL: """Construct url for route with additional params.""" - async def handle_expect_header(self, request: Request) -> Optional[StreamResponse]: + async def handle_expect_header(self, request: Request) -> StreamResponse | None: return await self._expect_handler(request) @@ -230,11 +242,11 @@ class UrlMappingMatchInfo(BaseDict, AbstractMatchInfo): __slots__ = ("_route", "_apps", "_current_app", "_frozen") - def __init__(self, match_dict: Dict[str, str], route: AbstractRoute) -> None: + def __init__(self, match_dict: dict[str, str], route: AbstractRoute) -> None: super().__init__(match_dict) self._route = route - self._apps: List[Application] = [] - self._current_app: Optional[Application] = None + self._apps: list[Application] = [] + self._current_app: Application | None = None self._frozen = False @property @@ -250,17 +262,17 @@ def expect_handler(self) -> _ExpectHandler: return self._route.handle_expect_header @property - def http_exception(self) -> Optional[HTTPException]: + def http_exception(self) -> HTTPException | None: return None def get_info(self) -> _InfoDict: # type: ignore[override] return self._route.get_info() @property - def apps(self) -> Tuple["Application", ...]: + def apps(self) -> tuple[Application, ...]: return tuple(self._apps) - def add_app(self, app: "Application") -> None: + def add_app(self, app: Application) -> None: if self._frozen: raise RuntimeError("Cannot change apps stack after .freeze() call") if self._current_app is None: @@ -268,13 +280,13 @@ def add_app(self, app: "Application") -> None: self._apps.insert(0, app) @property - def current_app(self) -> "Application": + def current_app(self) -> Application: app = self._current_app assert app is not None return app @current_app.setter - def current_app(self, app: "Application") -> None: + def current_app(self, app: Application) -> None: if DEBUG: # pragma: no cover if app not in self._apps: raise RuntimeError( @@ -326,17 +338,17 @@ async def _default_expect_handler(request: Request) -> None: class Resource(AbstractResource): - def __init__(self, *, name: Optional[str] = None) -> None: + def __init__(self, *, name: str | None = None) -> None: super().__init__(name=name) - self._routes: List[ResourceRoute] = [] + self._routes: list[ResourceRoute] = [] def add_route( self, method: str, - handler: Union[Type[AbstractView], Handler], + handler: type[AbstractView] | Handler, *, - expect_handler: Optional[_ExpectHandler] = None, - ) -> "ResourceRoute": + expect_handler: _ExpectHandler | None = None, + ) -> ResourceRoute: for route_obj in self._routes: if route_obj.method == method or route_obj.method == hdrs.METH_ANY: raise RuntimeError( @@ -349,14 +361,14 @@ def add_route( self.register_route(route_obj) return route_obj - def register_route(self, route: "ResourceRoute") -> None: + def register_route(self, route: ResourceRoute) -> None: assert isinstance( route, ResourceRoute ), f"Instance of Route class is required, got {route!r}" self._routes.append(route) async def resolve(self, request: Request) -> _Resolve: - allowed_methods: Set[str] = set() + allowed_methods: set[str] = set() match_dict = self._match(request.rel_url.path_safe) if match_dict is None: @@ -372,20 +384,20 @@ async def resolve(self, request: Request) -> _Resolve: return None, allowed_methods @abc.abstractmethod - def _match(self, path: str) -> Optional[Dict[str, str]]: + def _match(self, path: str) -> dict[str, str] | None: pass # pragma: no cover def __len__(self) -> int: return len(self._routes) - def __iter__(self) -> Iterator["ResourceRoute"]: + def __iter__(self) -> Iterator[ResourceRoute]: return iter(self._routes) # TODO: implement all abstract methods class PlainResource(Resource): - def __init__(self, path: str, *, name: Optional[str] = None) -> None: + def __init__(self, path: str, *, name: str | None = None) -> None: super().__init__(name=name) assert not path or path.startswith("/") self._path = path @@ -404,7 +416,7 @@ def add_prefix(self, prefix: str) -> None: assert len(prefix) > 1 self._path = prefix + self._path - def _match(self, path: str) -> Optional[Dict[str, str]]: + def _match(self, path: str) -> dict[str, str] | None: # string comparison is about 10 times faster than regexp matching if self._path == path: return {} @@ -429,7 +441,7 @@ class DynamicResource(Resource): DYN_WITH_RE = re.compile(r"\{(?P[_a-zA-Z][_a-zA-Z0-9]*):(?P.+)\}") GOOD = r"[^{}/]+" - def __init__(self, path: str, *, name: Optional[str] = None) -> None: + def __init__(self, path: str, *, name: str | None = None) -> None: super().__init__(name=name) self._orig_path = path pattern = "" @@ -474,7 +486,7 @@ def add_prefix(self, prefix: str) -> None: self._pattern = re.compile(re.escape(prefix) + self._pattern.pattern) self._formatter = prefix + self._formatter - def _match(self, path: str) -> Optional[Dict[str, str]]: + def _match(self, path: str) -> dict[str, str] | None: match = self._pattern.fullmatch(path) if match is None: return None @@ -500,7 +512,7 @@ def __repr__(self) -> str: class PrefixResource(AbstractResource): - def __init__(self, prefix: str, *, name: Optional[str] = None) -> None: + def __init__(self, prefix: str, *, name: str | None = None) -> None: assert not prefix or prefix.startswith("/"), prefix assert prefix in ("", "/") or not prefix.endswith("/"), prefix super().__init__(name=name) @@ -532,8 +544,8 @@ def __init__( prefix: str, directory: PathLike, *, - name: Optional[str] = None, - expect_handler: Optional[_ExpectHandler] = None, + name: str | None = None, + expect_handler: _ExpectHandler | None = None, chunk_size: int = 256 * 1024, show_index: bool = False, follow_symlinks: bool = False, @@ -566,7 +578,7 @@ def url_for( # type: ignore[override] self, *, filename: PathLike, - append_version: Optional[bool] = None, + append_version: bool | None = None, ) -> URL: if append_version is None: append_version = self._append_version @@ -730,7 +742,7 @@ def __repr__(self) -> str: class PrefixedSubAppResource(PrefixResource): - def __init__(self, prefix: str, app: "Application") -> None: + def __init__(self, prefix: str, app: Application) -> None: super().__init__(prefix) self._app = app self._add_prefix_to_resources(prefix) @@ -847,7 +859,7 @@ def match_domain(self, host: str) -> bool: class MatchedSubAppResource(PrefixedSubAppResource): - def __init__(self, rule: AbstractRuleMatching, app: "Application") -> None: + def __init__(self, rule: AbstractRuleMatching, app: Application) -> None: AbstractResource.__init__(self) self._prefix = "" self._app = app @@ -881,10 +893,10 @@ class ResourceRoute(AbstractRoute): def __init__( self, method: str, - handler: Union[Handler, Type[AbstractView]], + handler: Handler | type[AbstractView], resource: AbstractResource, *, - expect_handler: Optional[_ExpectHandler] = None, + expect_handler: _ExpectHandler | None = None, ) -> None: super().__init__( method, handler, expect_handler=expect_handler, resource=resource @@ -896,7 +908,7 @@ def __repr__(self) -> str: ) @property - def name(self) -> Optional[str]: + def name(self) -> str | None: if self._resource is None: return None return self._resource.name @@ -920,7 +932,7 @@ def url_for(self, *args: str, **kwargs: str) -> URL: raise RuntimeError(".url_for() is not allowed for SystemRoute") @property - def name(self) -> Optional[str]: + def name(self) -> str | None: return None def get_info(self) -> _InfoDict: @@ -945,7 +957,7 @@ class View(AbstractView): async def _iter(self) -> StreamResponse: if self.request.method not in hdrs.METH_ALL: self._raise_allowed_methods() - method: Optional[Callable[[], Awaitable[StreamResponse]]] = getattr( + method: Callable[[], Awaitable[StreamResponse]] | None = getattr( self, self.request.method.lower(), None ) if method is None: @@ -961,7 +973,7 @@ def _raise_allowed_methods(self) -> NoReturn: class ResourcesView(Sized, Iterable[AbstractResource], Container[AbstractResource]): - def __init__(self, resources: List[AbstractResource]) -> None: + def __init__(self, resources: list[AbstractResource]) -> None: self._resources = resources def __len__(self) -> int: @@ -975,8 +987,8 @@ def __contains__(self, resource: object) -> bool: class RoutesView(Sized, Iterable[AbstractRoute], Container[AbstractRoute]): - def __init__(self, resources: List[AbstractResource]): - self._routes: List[AbstractRoute] = [] + def __init__(self, resources: list[AbstractResource]): + self._routes: list[AbstractRoute] = [] for resource in resources: for route in resource: self._routes.append(route) @@ -997,21 +1009,52 @@ class UrlDispatcher(AbstractRouter, Mapping[str, AbstractResource]): def __init__(self) -> None: super().__init__() - self._resources: List[AbstractResource] = [] - self._named_resources: Dict[str, AbstractResource] = {} + self._resources: list[AbstractResource] = [] + self._named_resources: dict[str, AbstractResource] = {} self._resource_index: dict[str, list[AbstractResource]] = {} - self._matched_sub_app_resources: List[MatchedSubAppResource] = [] + self._matched_sub_app_resources: list[MatchedSubAppResource] = [] + self._hyperdb: hyperscan.Database | None = None + self._hyper_index: dict[int, AbstractResource] = {} + + async def _resolve_fast( + self, request: Request, path: str, allowed_methods: set[str] + ) -> UrlMappingMatchInfo | None: + # N.B.: allowed_methods variable in modified in-place + found: list[int] = [] + + def on_match( + id_: int, from_: int, to: int, flags: int, context: Any | None + ) -> bool | None: + found.append(id_) + + self._hyperdb.scan(path.encode("utf8"), match_event_handler=on_match) + if not found: + return None + if len(found) > 1: + # Multiple matches are found, + # fall back to explicit iteration to find the FIRST match + return await self._resolve_fallback(request, path, allowed_methods) + + resource = self._hyper_index[found[0]] + match_dict, allowed = await resource.resolve(request) + if match_dict is not None: + return match_dict + else: + allowed_methods |= allowed + return None + + async def _resolve_fallback( + self, request: Request, url_part: str, allowed_methods: set[str] + ) -> UrlMappingMatchInfo | None: + # N.B.: allowed_methods variable in modified in-place - async def resolve(self, request: Request) -> UrlMappingMatchInfo: resource_index = self._resource_index - allowed_methods: Set[str] = set() # Walk the url parts looking for candidates. We walk the url backwards # to ensure the most explicit match is found first. If there are multiple # candidates for a given url part because there are multiple resources # registered for the same canonical path, we resolve them in a linear # fashion to ensure registration order is respected. - url_part = request.rel_url.path_safe while url_part: for candidate in resource_index.get(url_part, ()): match_dict, allowed = await candidate.resolve(request) @@ -1023,6 +1066,28 @@ async def resolve(self, request: Request) -> UrlMappingMatchInfo: break url_part = url_part.rpartition("/")[0] or "/" + return None + + async def resolve(self, request: Request) -> UrlMappingMatchInfo: + allowed_methods: set[str] = set() + + if self._hyperdb is not None: + if ( + (ret := await self._resolve_fast(request, + request.rel_url.path_safe, allowed_methods + )) + is not None + ): + return ret + else: + if ( + (ret := await self._resolve_fallback(request, + request.rel_url.path_safe, allowed_methods + )) + is not None + ): + return ret + # # We didn't find any candidates, so we'll try the matched sub-app # resources which we have to walk in a linear fashion because they @@ -1127,7 +1192,7 @@ def unindex_resource(self, resource: AbstractResource) -> None: resource_key = self._get_resource_index_key(resource) self._resource_index[resource_key].remove(resource) - def add_resource(self, path: str, *, name: Optional[str] = None) -> Resource: + def add_resource(self, path: str, *, name: str | None = None) -> Resource: if path and not path.startswith("/"): raise ValueError("path should be started with / or be empty") # Reuse last added resource if path and name are the same @@ -1147,10 +1212,10 @@ def add_route( self, method: str, path: str, - handler: Union[Handler, Type[AbstractView]], + handler: Handler | type[AbstractView], *, - name: Optional[str] = None, - expect_handler: Optional[_ExpectHandler] = None, + name: str | None = None, + expect_handler: _ExpectHandler | None = None, ) -> AbstractRoute: resource = self.add_resource(path, name=name) return resource.add_route(method, handler, expect_handler=expect_handler) @@ -1160,8 +1225,8 @@ def add_static( prefix: str, path: PathLike, *, - name: Optional[str] = None, - expect_handler: Optional[_ExpectHandler] = None, + name: str | None = None, + expect_handler: _ExpectHandler | None = None, chunk_size: int = 256 * 1024, show_index: bool = False, follow_symlinks: bool = False, @@ -1202,7 +1267,7 @@ def add_get( path: str, handler: Handler, *, - name: Optional[str] = None, + name: str | None = None, allow_head: bool = True, **kwargs: Any, ) -> AbstractRoute: @@ -1233,7 +1298,7 @@ def add_delete(self, path: str, handler: Handler, **kwargs: Any) -> AbstractRout return self.add_route(hdrs.METH_DELETE, path, handler, **kwargs) def add_view( - self, path: str, handler: Type[AbstractView], **kwargs: Any + self, path: str, handler: type[AbstractView], **kwargs: Any ) -> AbstractRoute: """Shortcut for add_route with ANY methods for a class-based view.""" return self.add_route(hdrs.METH_ANY, path, handler, **kwargs) @@ -1242,8 +1307,52 @@ def freeze(self) -> None: super().freeze() for resource in self._resources: resource.freeze() + if HAS_HYPERSCAN: + self._hyperdb = hyperscan.Database() + patterns: list[bytes] = [] + ids: list[int] = [] + counter = itertools.count() + for resource in self._resources: + pattern: str | None = None + if isinstance(resource, PlainResource): + pattern = re.escape(resource.get_info()["path"]) + elif isinstance(resource, DynamicResource): + pattern = resource.get_info()["pattern"].pattern + elif isinstance(resource, StaticResource): + pattern = resource.get_info()["prefix"] + "/.+" + else: + pattern = None + + if pattern is None: + continue + + id_ = next(counter) + patterns.append(f"^{pattern}$".encode()) + self._hyper_index[id_] = resource + ids.append(id_) + + count = len(patterns) + try: + self._hyperdb.compile( + expressions=patterns, + ids=ids, + elements=count, + flags=[ + hyperscan.HS_FLAG_UTF8 + | hyperscan.HS_FLAG_UCP + | hyperscan.HS_FLAG_SINGLEMATCH + ] + * count, + ) + except hyperscan.error as exc: + web_logger.warning( + "Cannot compile hyperscan database: %s, switching to fallback url resolver", + repr(exc), + ) + self._hyperdb = None + self._hyper_index.clear() - def add_routes(self, routes: Iterable[AbstractRouteDef]) -> List[AbstractRoute]: + def add_routes(self, routes: Iterable[AbstractRouteDef]) -> list[AbstractRoute]: """Append routes to route table. Parameter should be a sequence of RouteDef objects. diff --git a/requirements/runtime-deps.in b/requirements/runtime-deps.in index 48ee7016d13..f21770b2547 100644 --- a/requirements/runtime-deps.in +++ b/requirements/runtime-deps.in @@ -7,6 +7,7 @@ async-timeout >= 4.0, < 6.0 ; python_version < "3.11" Brotli; platform_python_implementation == 'CPython' brotlicffi; platform_python_implementation != 'CPython' frozenlist >= 1.1.1 +hyperscan >= 0.7.8; platform_python_implementation == 'CPython' multidict >=4.5, < 7.0 propcache >= 0.2.0 yarl >= 1.17.0, < 2.0 diff --git a/requirements/runtime-deps.txt b/requirements/runtime-deps.txt index 078e905dd6f..8f164c8d0d3 100644 --- a/requirements/runtime-deps.txt +++ b/requirements/runtime-deps.txt @@ -20,6 +20,8 @@ frozenlist==1.5.0 # via # -r requirements/runtime-deps.in # aiosignal +hyperscan == 0.7.8 ; platform_python_implementation == "CPython" + # via -r requirements/runtime-deps.in idna==3.6 # via yarl multidict==6.1.0 diff --git a/setup.cfg b/setup.cfg index 6a288316e74..a04d750782a 100644 --- a/setup.cfg +++ b/setup.cfg @@ -70,6 +70,8 @@ speedups = aiodns >= 3.2.0; sys_platform=="linux" or sys_platform=="darwin" Brotli; platform_python_implementation == 'CPython' brotlicffi; platform_python_implementation != 'CPython' + Brotli; platform_python_implementation == 'CPython' + hyperscan >= 0.7.8; platform_python_implementation == 'CPython' [options.packages.find] exclude = From 1a5a10da8063f0e7fcf6ee3cdbb65dcd71d9861f Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Fri, 15 Nov 2024 12:36:31 +0000 Subject: [PATCH 02/30] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- aiohttp/web_urldispatcher.py | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/aiohttp/web_urldispatcher.py b/aiohttp/web_urldispatcher.py index ee88727de3d..a7630ef08cc 100644 --- a/aiohttp/web_urldispatcher.py +++ b/aiohttp/web_urldispatcher.py @@ -1073,19 +1073,17 @@ async def resolve(self, request: Request) -> UrlMappingMatchInfo: if self._hyperdb is not None: if ( - (ret := await self._resolve_fast(request, - request.rel_url.path_safe, allowed_methods - )) - is not None - ): + ret := await self._resolve_fast( + request, request.rel_url.path_safe, allowed_methods + ) + ) is not None: return ret else: if ( - (ret := await self._resolve_fallback(request, - request.rel_url.path_safe, allowed_methods - )) - is not None - ): + ret := await self._resolve_fallback( + request, request.rel_url.path_safe, allowed_methods + ) + ) is not None: return ret # From def5f076b1262764339b4c8c9003e6cd721c7c6f Mon Sep 17 00:00:00 2001 From: Andrew Svetlov Date: Fri, 15 Nov 2024 13:43:49 +0100 Subject: [PATCH 03/30] Fix mypy --- .mypy.ini | 3 +++ aiohttp/web_urldispatcher.py | 27 ++++++++++++--------------- 2 files changed, 15 insertions(+), 15 deletions(-) diff --git a/.mypy.ini b/.mypy.ini index 4a1333c4704..12aac2be56c 100644 --- a/.mypy.ini +++ b/.mypy.ini @@ -34,3 +34,6 @@ ignore_missing_imports = True [mypy-gunicorn.*] ignore_missing_imports = True + +[mypy-hyperscan] +ignore_missing_imports = True diff --git a/aiohttp/web_urldispatcher.py b/aiohttp/web_urldispatcher.py index a7630ef08cc..bbb6ec8e893 100644 --- a/aiohttp/web_urldispatcher.py +++ b/aiohttp/web_urldispatcher.py @@ -1013,12 +1013,15 @@ def __init__(self) -> None: self._named_resources: dict[str, AbstractResource] = {} self._resource_index: dict[str, list[AbstractResource]] = {} self._matched_sub_app_resources: list[MatchedSubAppResource] = [] - self._hyperdb: hyperscan.Database | None = None + self._hyperdb: hyperscan.Database | None = None # type: ignore[no-any-unimported] self._hyper_index: dict[int, AbstractResource] = {} async def _resolve_fast( self, request: Request, path: str, allowed_methods: set[str] ) -> UrlMappingMatchInfo | None: + if self._hyperdb is None: + return await self._resolve_fallback(request, path, allowed_methods) + # N.B.: allowed_methods variable in modified in-place found: list[int] = [] @@ -1026,6 +1029,7 @@ def on_match( id_: int, from_: int, to: int, flags: int, context: Any | None ) -> bool | None: found.append(id_) + return None self._hyperdb.scan(path.encode("utf8"), match_event_handler=on_match) if not found: @@ -1071,20 +1075,13 @@ async def _resolve_fallback( async def resolve(self, request: Request) -> UrlMappingMatchInfo: allowed_methods: set[str] = set() - if self._hyperdb is not None: - if ( - ret := await self._resolve_fast( - request, request.rel_url.path_safe, allowed_methods - ) - ) is not None: - return ret - else: - if ( - ret := await self._resolve_fallback( - request, request.rel_url.path_safe, allowed_methods - ) - ) is not None: - return ret + if ( + (ret := await self._resolve_fast(request, + request.rel_url.path_safe, allowed_methods + )) + is not None + ): + return ret # # We didn't find any candidates, so we'll try the matched sub-app From bea95da75099decc1c5250145763422bb12c3155 Mon Sep 17 00:00:00 2001 From: Andrew Svetlov Date: Fri, 15 Nov 2024 13:44:36 +0100 Subject: [PATCH 04/30] Reforman --- aiohttp/web_urldispatcher.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/aiohttp/web_urldispatcher.py b/aiohttp/web_urldispatcher.py index bbb6ec8e893..af8ce58c353 100644 --- a/aiohttp/web_urldispatcher.py +++ b/aiohttp/web_urldispatcher.py @@ -1076,11 +1076,10 @@ async def resolve(self, request: Request) -> UrlMappingMatchInfo: allowed_methods: set[str] = set() if ( - (ret := await self._resolve_fast(request, - request.rel_url.path_safe, allowed_methods - )) - is not None - ): + ret := await self._resolve_fast( + request, request.rel_url.path_safe, allowed_methods + ) + ) is not None: return ret # From bd1f61fdec4ba08d79c482061bdcf4cf75f3b63e Mon Sep 17 00:00:00 2001 From: Andrew Svetlov Date: Fri, 15 Nov 2024 16:02:27 +0100 Subject: [PATCH 05/30] Update setup.cfg Co-authored-by: Sam Bull --- setup.cfg | 1 - 1 file changed, 1 deletion(-) diff --git a/setup.cfg b/setup.cfg index a04d750782a..53d0b5b3b2b 100644 --- a/setup.cfg +++ b/setup.cfg @@ -70,7 +70,6 @@ speedups = aiodns >= 3.2.0; sys_platform=="linux" or sys_platform=="darwin" Brotli; platform_python_implementation == 'CPython' brotlicffi; platform_python_implementation != 'CPython' - Brotli; platform_python_implementation == 'CPython' hyperscan >= 0.7.8; platform_python_implementation == 'CPython' [options.packages.find] From 934f755c18f72ceb91532a10a50e00a4cd11c059 Mon Sep 17 00:00:00 2001 From: Andrew Svetlov Date: Sat, 16 Nov 2024 08:22:57 +0100 Subject: [PATCH 06/30] step --- aiohttp/web_urldispatcher.py | 42 ++++++++++++++++++------------------ 1 file changed, 21 insertions(+), 21 deletions(-) diff --git a/aiohttp/web_urldispatcher.py b/aiohttp/web_urldispatcher.py index af8ce58c353..7560e9c73f7 100644 --- a/aiohttp/web_urldispatcher.py +++ b/aiohttp/web_urldispatcher.py @@ -1,5 +1,3 @@ -from __future__ import annotations - import abc import asyncio import base64 @@ -116,13 +114,13 @@ class _InfoDict(TypedDict, total=False): directory: Path prefix: str - routes: Mapping[str, AbstractRoute] + routes: Mapping[str, "AbstractRoute"] - app: Application + app: "Application" domain: str - rule: AbstractRuleMatching + rule: "AbstractRuleMatching" http_exception: HTTPException @@ -269,10 +267,10 @@ def get_info(self) -> _InfoDict: # type: ignore[override] return self._route.get_info() @property - def apps(self) -> tuple[Application, ...]: + def apps(self) -> tuple["Application", ...]: return tuple(self._apps) - def add_app(self, app: Application) -> None: + def add_app(self, app: "Application") -> None: if self._frozen: raise RuntimeError("Cannot change apps stack after .freeze() call") if self._current_app is None: @@ -280,13 +278,13 @@ def add_app(self, app: Application) -> None: self._apps.insert(0, app) @property - def current_app(self) -> Application: + def current_app(self) -> "Application": app = self._current_app assert app is not None return app @current_app.setter - def current_app(self, app: Application) -> None: + def current_app(self, app: "Application") -> None: if DEBUG: # pragma: no cover if app not in self._apps: raise RuntimeError( @@ -348,7 +346,7 @@ def add_route( handler: type[AbstractView] | Handler, *, expect_handler: _ExpectHandler | None = None, - ) -> ResourceRoute: + ) -> "ResourceRoute": for route_obj in self._routes: if route_obj.method == method or route_obj.method == hdrs.METH_ANY: raise RuntimeError( @@ -361,7 +359,7 @@ def add_route( self.register_route(route_obj) return route_obj - def register_route(self, route: ResourceRoute) -> None: + def register_route(self, route: "ResourceRoute") -> None: assert isinstance( route, ResourceRoute ), f"Instance of Route class is required, got {route!r}" @@ -390,7 +388,7 @@ def _match(self, path: str) -> dict[str, str] | None: def __len__(self) -> int: return len(self._routes) - def __iter__(self) -> Iterator[ResourceRoute]: + def __iter__(self) -> Iterator["ResourceRoute"]: return iter(self._routes) # TODO: implement all abstract methods @@ -742,7 +740,7 @@ def __repr__(self) -> str: class PrefixedSubAppResource(PrefixResource): - def __init__(self, prefix: str, app: Application) -> None: + def __init__(self, prefix: str, app: "Application") -> None: super().__init__(prefix) self._app = app self._add_prefix_to_resources(prefix) @@ -859,7 +857,7 @@ def match_domain(self, host: str) -> bool: class MatchedSubAppResource(PrefixedSubAppResource): - def __init__(self, rule: AbstractRuleMatching, app: Application) -> None: + def __init__(self, rule: AbstractRuleMatching, app: "Application") -> None: AbstractResource.__init__(self) self._prefix = "" self._app = app @@ -1016,6 +1014,12 @@ def __init__(self) -> None: self._hyperdb: hyperscan.Database | None = None # type: ignore[no-any-unimported] self._hyper_index: dict[int, AbstractResource] = {} + def _on_match( + self, id_: int, from_: int, to: int, flags: int, found: list[int] + ) -> bool | None: + found.append(id_) + return None + async def _resolve_fast( self, request: Request, path: str, allowed_methods: set[str] ) -> UrlMappingMatchInfo | None: @@ -1025,13 +1029,9 @@ async def _resolve_fast( # N.B.: allowed_methods variable in modified in-place found: list[int] = [] - def on_match( - id_: int, from_: int, to: int, flags: int, context: Any | None - ) -> bool | None: - found.append(id_) - return None - - self._hyperdb.scan(path.encode("utf8"), match_event_handler=on_match) + self._hyperdb.scan( + path.encode("utf8"), match_event_handler=self._on_match, context=found + ) if not found: return None if len(found) > 1: From 29403238d30b564a70befb16b9c5f8bb1515c668 Mon Sep 17 00:00:00 2001 From: Andrew Svetlov Date: Sat, 16 Nov 2024 08:38:56 +0100 Subject: [PATCH 07/30] step --- aiohttp/web_urldispatcher.py | 88 ++++++++++++++++++------------------ 1 file changed, 44 insertions(+), 44 deletions(-) diff --git a/aiohttp/web_urldispatcher.py b/aiohttp/web_urldispatcher.py index 7560e9c73f7..b8ae81effc7 100644 --- a/aiohttp/web_urldispatcher.py +++ b/aiohttp/web_urldispatcher.py @@ -126,11 +126,11 @@ class _InfoDict(TypedDict, total=False): class AbstractResource(Sized, Iterable["AbstractRoute"]): - def __init__(self, *, name: str | None = None) -> None: + def __init__(self, *, name: Optional[str] = None) -> None: self._name = name @property - def name(self) -> str | None: + def name(self) -> Optional[str]: return self._name @property @@ -176,10 +176,10 @@ class AbstractRoute(abc.ABC): def __init__( self, method: str, - handler: Handler | type[AbstractView], + handler: Union[Handler, Type[AbstractView]], *, - expect_handler: _ExpectHandler | None = None, - resource: AbstractResource | None = None, + expect_handler: Optional[_ExpectHandler] = None, + resource: Optional[AbstractResource] = None, ) -> None: if expect_handler is None: expect_handler = _default_expect_handler @@ -217,11 +217,11 @@ def handler(self) -> Handler: @property @abc.abstractmethod - def name(self) -> str | None: + def name(self) -> Optional[str]: """Optional route's name, always equals to resource's name.""" @property - def resource(self) -> AbstractResource | None: + def resource(self) -> Optional[AbstractResource]: return self._resource @abc.abstractmethod @@ -232,7 +232,7 @@ def get_info(self) -> _InfoDict: def url_for(self, *args: str, **kwargs: str) -> URL: """Construct url for route with additional params.""" - async def handle_expect_header(self, request: Request) -> StreamResponse | None: + async def handle_expect_header(self, request: Request) -> Optional[StreamResponse]: return await self._expect_handler(request) @@ -240,11 +240,11 @@ class UrlMappingMatchInfo(BaseDict, AbstractMatchInfo): __slots__ = ("_route", "_apps", "_current_app", "_frozen") - def __init__(self, match_dict: dict[str, str], route: AbstractRoute) -> None: + def __init__(self, match_dict: Dict[str, str], route: AbstractRoute) -> None: super().__init__(match_dict) self._route = route - self._apps: list[Application] = [] - self._current_app: Application | None = None + self._apps: List[Application] = [] + self._current_app: Optional[Application] = None self._frozen = False @property @@ -260,14 +260,14 @@ def expect_handler(self) -> _ExpectHandler: return self._route.handle_expect_header @property - def http_exception(self) -> HTTPException | None: + def http_exception(self) -> Optional[HTTPException]: return None def get_info(self) -> _InfoDict: # type: ignore[override] return self._route.get_info() @property - def apps(self) -> tuple["Application", ...]: + def apps(self) -> Tuple["Application", ...]: return tuple(self._apps) def add_app(self, app: "Application") -> None: @@ -336,16 +336,16 @@ async def _default_expect_handler(request: Request) -> None: class Resource(AbstractResource): - def __init__(self, *, name: str | None = None) -> None: + def __init__(self, *, name: Optional[str] = None) -> None: super().__init__(name=name) - self._routes: list[ResourceRoute] = [] + self._routes: List[ResourceRoute] = [] def add_route( self, method: str, - handler: type[AbstractView] | Handler, + handler: Union[Type[AbstractView], Handler], *, - expect_handler: _ExpectHandler | None = None, + expect_handler: Optional[_ExpectHandler] = None, ) -> "ResourceRoute": for route_obj in self._routes: if route_obj.method == method or route_obj.method == hdrs.METH_ANY: @@ -382,7 +382,7 @@ async def resolve(self, request: Request) -> _Resolve: return None, allowed_methods @abc.abstractmethod - def _match(self, path: str) -> dict[str, str] | None: + def _match(self, path: str) -> Optional[Dict[str, str]]: pass # pragma: no cover def __len__(self) -> int: @@ -395,7 +395,7 @@ def __iter__(self) -> Iterator["ResourceRoute"]: class PlainResource(Resource): - def __init__(self, path: str, *, name: str | None = None) -> None: + def __init__(self, path: str, *, name: Optional[str] = None) -> None: super().__init__(name=name) assert not path or path.startswith("/") self._path = path @@ -414,7 +414,7 @@ def add_prefix(self, prefix: str) -> None: assert len(prefix) > 1 self._path = prefix + self._path - def _match(self, path: str) -> dict[str, str] | None: + def _match(self, path: str) -> Optional[Dict[str, str]]: # string comparison is about 10 times faster than regexp matching if self._path == path: return {} @@ -439,7 +439,7 @@ class DynamicResource(Resource): DYN_WITH_RE = re.compile(r"\{(?P[_a-zA-Z][_a-zA-Z0-9]*):(?P.+)\}") GOOD = r"[^{}/]+" - def __init__(self, path: str, *, name: str | None = None) -> None: + def __init__(self, path: str, *, name: Optional[str] = None) -> None: super().__init__(name=name) self._orig_path = path pattern = "" @@ -484,7 +484,7 @@ def add_prefix(self, prefix: str) -> None: self._pattern = re.compile(re.escape(prefix) + self._pattern.pattern) self._formatter = prefix + self._formatter - def _match(self, path: str) -> dict[str, str] | None: + def _match(self, path: str) -> Optional[Dict[str, str]]: match = self._pattern.fullmatch(path) if match is None: return None @@ -510,7 +510,7 @@ def __repr__(self) -> str: class PrefixResource(AbstractResource): - def __init__(self, prefix: str, *, name: str | None = None) -> None: + def __init__(self, prefix: str, *, name: Optional[str] = None) -> None: assert not prefix or prefix.startswith("/"), prefix assert prefix in ("", "/") or not prefix.endswith("/"), prefix super().__init__(name=name) @@ -542,8 +542,8 @@ def __init__( prefix: str, directory: PathLike, *, - name: str | None = None, - expect_handler: _ExpectHandler | None = None, + name: Optional[str] = None, + expect_handler: Optional[_ExpectHandler] = None, chunk_size: int = 256 * 1024, show_index: bool = False, follow_symlinks: bool = False, @@ -576,7 +576,7 @@ def url_for( # type: ignore[override] self, *, filename: PathLike, - append_version: bool | None = None, + append_version: Optional[bool] = None, ) -> URL: if append_version is None: append_version = self._append_version @@ -891,10 +891,10 @@ class ResourceRoute(AbstractRoute): def __init__( self, method: str, - handler: Handler | type[AbstractView], + handler: Union[Handler, Type[AbstractView]], resource: AbstractResource, *, - expect_handler: _ExpectHandler | None = None, + expect_handler: Optional[_ExpectHandler] = None, ) -> None: super().__init__( method, handler, expect_handler=expect_handler, resource=resource @@ -906,7 +906,7 @@ def __repr__(self) -> str: ) @property - def name(self) -> str | None: + def name(self) -> Optional[str]: if self._resource is None: return None return self._resource.name @@ -930,7 +930,7 @@ def url_for(self, *args: str, **kwargs: str) -> URL: raise RuntimeError(".url_for() is not allowed for SystemRoute") @property - def name(self) -> str | None: + def name(self) -> Optional[str]: return None def get_info(self) -> _InfoDict: @@ -955,7 +955,7 @@ class View(AbstractView): async def _iter(self) -> StreamResponse: if self.request.method not in hdrs.METH_ALL: self._raise_allowed_methods() - method: Callable[[], Awaitable[StreamResponse]] | None = getattr( + method: Optional[Callable[[], Awaitable[StreamResponse]]] = getattr( self, self.request.method.lower(), None ) if method is None: @@ -971,7 +971,7 @@ def _raise_allowed_methods(self) -> NoReturn: class ResourcesView(Sized, Iterable[AbstractResource], Container[AbstractResource]): - def __init__(self, resources: list[AbstractResource]) -> None: + def __init__(self, resources: List[AbstractResource]) -> None: self._resources = resources def __len__(self) -> int: @@ -986,7 +986,7 @@ def __contains__(self, resource: object) -> bool: class RoutesView(Sized, Iterable[AbstractRoute], Container[AbstractRoute]): def __init__(self, resources: list[AbstractResource]): - self._routes: list[AbstractRoute] = [] + self._routes: List[AbstractRoute] = [] for resource in resources: for route in resource: self._routes.append(route) @@ -1007,8 +1007,8 @@ class UrlDispatcher(AbstractRouter, Mapping[str, AbstractResource]): def __init__(self) -> None: super().__init__() - self._resources: list[AbstractResource] = [] - self._named_resources: dict[str, AbstractResource] = {} + self._resources: List[AbstractResource] = [] + self._named_resources: Dict[str, AbstractResource] = {} self._resource_index: dict[str, list[AbstractResource]] = {} self._matched_sub_app_resources: list[MatchedSubAppResource] = [] self._hyperdb: hyperscan.Database | None = None # type: ignore[no-any-unimported] @@ -1186,7 +1186,7 @@ def unindex_resource(self, resource: AbstractResource) -> None: resource_key = self._get_resource_index_key(resource) self._resource_index[resource_key].remove(resource) - def add_resource(self, path: str, *, name: str | None = None) -> Resource: + def add_resource(self, path: str, *, name: Optional[str] = None) -> Resource: if path and not path.startswith("/"): raise ValueError("path should be started with / or be empty") # Reuse last added resource if path and name are the same @@ -1206,10 +1206,10 @@ def add_route( self, method: str, path: str, - handler: Handler | type[AbstractView], + handler: Union[Handler, Type[AbstractView]], *, - name: str | None = None, - expect_handler: _ExpectHandler | None = None, + name: Optional[str] = None, + expect_handler: Optional[_ExpectHandler] = None, ) -> AbstractRoute: resource = self.add_resource(path, name=name) return resource.add_route(method, handler, expect_handler=expect_handler) @@ -1219,8 +1219,8 @@ def add_static( prefix: str, path: PathLike, *, - name: str | None = None, - expect_handler: _ExpectHandler | None = None, + name: Optional[str] = None, + expect_handler: Optional[_ExpectHandler] = None, chunk_size: int = 256 * 1024, show_index: bool = False, follow_symlinks: bool = False, @@ -1261,7 +1261,7 @@ def add_get( path: str, handler: Handler, *, - name: str | None = None, + name: Optional[str] = None, allow_head: bool = True, **kwargs: Any, ) -> AbstractRoute: @@ -1292,7 +1292,7 @@ def add_delete(self, path: str, handler: Handler, **kwargs: Any) -> AbstractRout return self.add_route(hdrs.METH_DELETE, path, handler, **kwargs) def add_view( - self, path: str, handler: type[AbstractView], **kwargs: Any + self, path: str, handler: Type[AbstractView], **kwargs: Any ) -> AbstractRoute: """Shortcut for add_route with ANY methods for a class-based view.""" return self.add_route(hdrs.METH_ANY, path, handler, **kwargs) @@ -1346,7 +1346,7 @@ def freeze(self) -> None: self._hyperdb = None self._hyper_index.clear() - def add_routes(self, routes: Iterable[AbstractRouteDef]) -> list[AbstractRoute]: + def add_routes(self, routes: Iterable[AbstractRouteDef]) -> List[AbstractRoute]: """Append routes to route table. Parameter should be a sequence of RouteDef objects. From 87056a2874a48bea664bfb8a2812e0a9fec52454 Mon Sep 17 00:00:00 2001 From: Andrew Svetlov Date: Sat, 16 Nov 2024 09:44:46 +0100 Subject: [PATCH 08/30] fix --- aiohttp/web_urldispatcher.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/aiohttp/web_urldispatcher.py b/aiohttp/web_urldispatcher.py index 20f83f11cac..cab41d7c7b6 100644 --- a/aiohttp/web_urldispatcher.py +++ b/aiohttp/web_urldispatcher.py @@ -982,7 +982,7 @@ def __contains__(self, resource: object) -> bool: class RoutesView(Sized, Iterable[AbstractRoute], Container[AbstractRoute]): - def __init__(self, resources: list[AbstractResource]): + def __init__(self, resources: List[AbstractResource]): self._routes: List[AbstractRoute] = [] for resource in resources: for route in resource: @@ -1007,7 +1007,7 @@ def __init__(self) -> None: self._resources: List[AbstractResource] = [] self._named_resources: Dict[str, AbstractResource] = {} self._resource_index: dict[str, list[AbstractResource]] = {} - self._matched_sub_app_resources: list[MatchedSubAppResource] = [] + self._matched_sub_app_resources: List[MatchedSubAppResource] = [] self._hyperdb: hyperscan.Database | None = None # type: ignore[no-any-unimported] self._hyper_index: dict[int, AbstractResource] = {} From ac2f8e372c351557895c593098f8f0c865adbff7 Mon Sep 17 00:00:00 2001 From: Andrew Svetlov Date: Sat, 16 Nov 2024 11:44:49 +0100 Subject: [PATCH 09/30] fix --- aiohttp/web_urldispatcher.py | 119 ++++++++++++++++++++--------------- 1 file changed, 67 insertions(+), 52 deletions(-) diff --git a/aiohttp/web_urldispatcher.py b/aiohttp/web_urldispatcher.py index cab41d7c7b6..0a8e514ce92 100644 --- a/aiohttp/web_urldispatcher.py +++ b/aiohttp/web_urldispatcher.py @@ -4,7 +4,6 @@ import functools import hashlib import html -import itertools import keyword import os import re @@ -745,6 +744,7 @@ def __init__(self, prefix: str, app: "Application") -> None: def add_prefix(self, prefix: str) -> None: super().add_prefix(prefix) self._add_prefix_to_resources(prefix) + self._app.router._rebuild() def _add_prefix_to_resources(self, prefix: str) -> None: router = self._app.router @@ -1009,7 +1009,6 @@ def __init__(self) -> None: self._resource_index: dict[str, list[AbstractResource]] = {} self._matched_sub_app_resources: List[MatchedSubAppResource] = [] self._hyperdb: hyperscan.Database | None = None # type: ignore[no-any-unimported] - self._hyper_index: dict[int, AbstractResource] = {} def _on_match( self, id_: int, from_: int, to: int, flags: int, found: list[int] @@ -1025,6 +1024,7 @@ async def _resolve_fast( # N.B.: allowed_methods variable in modified in-place found: list[int] = [] + res_count = len(self._resources) self._hyperdb.scan( path.encode("utf8"), match_event_handler=self._on_match, context=found @@ -1033,16 +1033,20 @@ async def _resolve_fast( return None if len(found) > 1: # Multiple matches are found, - # fall back to explicit iteration to find the FIRST match - return await self._resolve_fallback(request, path, allowed_methods) + # use the FIRST match. + # Match ids are basically indexes in self._resources + # with an offset for variable resources + found.sort() - resource = self._hyper_index[found[0]] - match_dict, allowed = await resource.resolve(request) - if match_dict is not None: - return match_dict - else: - allowed_methods |= allowed - return None + for idx in found: + resource = self._resources[idx if idx < res_count else idx - res_count] + match_dict, allowed = await resource.resolve(request) + if match_dict is not None: + return match_dict + else: + allowed_methods |= allowed + + return None async def _resolve_fallback( self, request: Request, url_part: str, allowed_methods: set[str] @@ -1298,50 +1302,61 @@ def freeze(self) -> None: super().freeze() for resource in self._resources: resource.freeze() - if HAS_HYPERSCAN: - self._hyperdb = hyperscan.Database() - patterns: list[bytes] = [] - ids: list[int] = [] - counter = itertools.count() - for resource in self._resources: - pattern: str | None = None - if isinstance(resource, PlainResource): - pattern = re.escape(resource.get_info()["path"]) - elif isinstance(resource, DynamicResource): - pattern = resource.get_info()["pattern"].pattern - elif isinstance(resource, StaticResource): - pattern = resource.get_info()["prefix"] + "/.+" - else: - pattern = None - - if pattern is None: + self._rebuild() + + def _rebuild(self) -> None: + self._hyperdb = None + if not HAS_HYPERSCAN: + return + + patterns: list[bytes] = [] + ids: list[int] = [] + res_count = len(self._resources) + for id_, resource in enumerate(self._resources): + pattern: str | None = None + if isinstance(resource, PlainResource): + pattern = re.escape(resource.get_info()["path"]) + elif isinstance(resource, DynamicResource): + pattern = resource.get_info()["pattern"].pattern + # put variable resources at the end of search order list + id_ += res_count + elif isinstance(resource, PrefixResource): + if isinstance(resource, MatchedSubAppResource): + # wildcard resources doesn't fit hyperscan table continue - - id_ = next(counter) - patterns.append(f"^{pattern}$".encode()) - self._hyper_index[id_] = resource - ids.append(id_) - - count = len(patterns) - try: - self._hyperdb.compile( - expressions=patterns, - ids=ids, - elements=count, - flags=[ - hyperscan.HS_FLAG_UTF8 - | hyperscan.HS_FLAG_UCP - | hyperscan.HS_FLAG_SINGLEMATCH - ] - * count, - ) - except hyperscan.error as exc: + pattern = resource.get_info()["prefix"] + "(/.*)?" + else: web_logger.warning( - "Cannot compile hyperscan database: %s, switching to fallback url resolver", - repr(exc), + "Unsupported resource [%s] %s, switching to fallback url resolver", + type(resource), + resource.canonical, ) - self._hyperdb = None - self._hyper_index.clear() + return + + patterns.append(f"^{pattern}$".encode()) + ids.append(id_) + + count = len(patterns) + self._hyperdb = hyperscan.Database() + self._hyperpatterns = patterns + try: + self._hyperdb.compile( + expressions=patterns, + ids=ids, + elements=count, + flags=[ + hyperscan.HS_FLAG_UTF8 + | hyperscan.HS_FLAG_UCP + | hyperscan.HS_FLAG_SINGLEMATCH + ] + * count, + ) + except hyperscan.error as exc: + web_logger.warning( + "Cannot compile hyperscan database: %s, switching to fallback url resolver", + repr(exc), + ) + self._hyperdb = None def add_routes(self, routes: Iterable[AbstractRouteDef]) -> List[AbstractRoute]: """Append routes to route table. From 91e890a00e1e051d3f7951cb7189c711ea983ac1 Mon Sep 17 00:00:00 2001 From: Andrew Svetlov Date: Sat, 16 Nov 2024 11:46:08 +0100 Subject: [PATCH 10/30] Drop debug info --- aiohttp/web_urldispatcher.py | 1 - 1 file changed, 1 deletion(-) diff --git a/aiohttp/web_urldispatcher.py b/aiohttp/web_urldispatcher.py index 0a8e514ce92..e5ea5fa75d8 100644 --- a/aiohttp/web_urldispatcher.py +++ b/aiohttp/web_urldispatcher.py @@ -1338,7 +1338,6 @@ def _rebuild(self) -> None: count = len(patterns) self._hyperdb = hyperscan.Database() - self._hyperpatterns = patterns try: self._hyperdb.compile( expressions=patterns, From e0eb9669efba06caf6f43a84bb652161be5eadde Mon Sep 17 00:00:00 2001 From: Andrew Svetlov Date: Sat, 16 Nov 2024 12:00:16 +0100 Subject: [PATCH 11/30] Tune deps --- requirements/runtime-deps.in | 2 +- requirements/runtime-deps.txt | 2 +- setup.cfg | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/requirements/runtime-deps.in b/requirements/runtime-deps.in index f21770b2547..be075fe615a 100644 --- a/requirements/runtime-deps.in +++ b/requirements/runtime-deps.in @@ -7,7 +7,7 @@ async-timeout >= 4.0, < 6.0 ; python_version < "3.11" Brotli; platform_python_implementation == 'CPython' brotlicffi; platform_python_implementation != 'CPython' frozenlist >= 1.1.1 -hyperscan >= 0.7.8; platform_python_implementation == 'CPython' +hyperscan >= 0.7.8; platform_python_implementation == 'CPython' and (sys_platform=="linux" or sys_platform=="darwin") multidict >=4.5, < 7.0 propcache >= 0.2.0 yarl >= 1.17.0, < 2.0 diff --git a/requirements/runtime-deps.txt b/requirements/runtime-deps.txt index 8f164c8d0d3..b2d642febc1 100644 --- a/requirements/runtime-deps.txt +++ b/requirements/runtime-deps.txt @@ -20,7 +20,7 @@ frozenlist==1.5.0 # via # -r requirements/runtime-deps.in # aiosignal -hyperscan == 0.7.8 ; platform_python_implementation == "CPython" +hyperscan == 0.7.8 ; platform_python_implementation == "CPython" and (sys_platform=="linux" or sys_platform=="darwin") # via -r requirements/runtime-deps.in idna==3.6 # via yarl diff --git a/setup.cfg b/setup.cfg index 53d0b5b3b2b..c02e3c65bd2 100644 --- a/setup.cfg +++ b/setup.cfg @@ -70,7 +70,7 @@ speedups = aiodns >= 3.2.0; sys_platform=="linux" or sys_platform=="darwin" Brotli; platform_python_implementation == 'CPython' brotlicffi; platform_python_implementation != 'CPython' - hyperscan >= 0.7.8; platform_python_implementation == 'CPython' + hyperscan >= 0.7.8; platform_python_implementation == 'CPython' and (sys_platform=="linux" or sys_platform=="darwin") [options.packages.find] exclude = From d6c019b6a882005dac8c4f96026e7a9a24614b9d Mon Sep 17 00:00:00 2001 From: Andrew Svetlov Date: Sat, 16 Nov 2024 12:04:17 +0100 Subject: [PATCH 12/30] fix --- aiohttp/web_urldispatcher.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/aiohttp/web_urldispatcher.py b/aiohttp/web_urldispatcher.py index e5ea5fa75d8..51d8ffb1142 100644 --- a/aiohttp/web_urldispatcher.py +++ b/aiohttp/web_urldispatcher.py @@ -1012,7 +1012,7 @@ def __init__(self) -> None: def _on_match( self, id_: int, from_: int, to: int, flags: int, found: list[int] - ) -> bool | None: + ) -> Optional[bool]: found.append(id_) return None From 5db0bfda2baefaa7a02c474939f9505d4a99c816 Mon Sep 17 00:00:00 2001 From: Andrew Svetlov Date: Sat, 16 Nov 2024 12:08:22 +0100 Subject: [PATCH 13/30] fix --- aiohttp/web_urldispatcher.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/aiohttp/web_urldispatcher.py b/aiohttp/web_urldispatcher.py index 51d8ffb1142..b648802d398 100644 --- a/aiohttp/web_urldispatcher.py +++ b/aiohttp/web_urldispatcher.py @@ -1008,7 +1008,7 @@ def __init__(self) -> None: self._named_resources: Dict[str, AbstractResource] = {} self._resource_index: dict[str, list[AbstractResource]] = {} self._matched_sub_app_resources: List[MatchedSubAppResource] = [] - self._hyperdb: hyperscan.Database | None = None # type: ignore[no-any-unimported] + self._hyperdb: Optional[hyperscan.Database] = None # type: ignore[no-any-unimported] def _on_match( self, id_: int, from_: int, to: int, flags: int, found: list[int] @@ -1018,7 +1018,7 @@ def _on_match( async def _resolve_fast( self, request: Request, path: str, allowed_methods: set[str] - ) -> UrlMappingMatchInfo | None: + ) -> Optional[UrlMappingMatchInfo]: if self._hyperdb is None: return await self._resolve_fallback(request, path, allowed_methods) @@ -1050,7 +1050,7 @@ async def _resolve_fast( async def _resolve_fallback( self, request: Request, url_part: str, allowed_methods: set[str] - ) -> UrlMappingMatchInfo | None: + ) -> Optional[UrlMappingMatchInfo]: # N.B.: allowed_methods variable in modified in-place resource_index = self._resource_index From 7ce5180b95e354566c3aabb4798b430812857a30 Mon Sep 17 00:00:00 2001 From: Andrew Svetlov Date: Sat, 16 Nov 2024 12:08:39 +0100 Subject: [PATCH 14/30] fix --- aiohttp/web_urldispatcher.py | 1 - 1 file changed, 1 deletion(-) diff --git a/aiohttp/web_urldispatcher.py b/aiohttp/web_urldispatcher.py index b648802d398..f1fd00bf892 100644 --- a/aiohttp/web_urldispatcher.py +++ b/aiohttp/web_urldispatcher.py @@ -1313,7 +1313,6 @@ def _rebuild(self) -> None: ids: list[int] = [] res_count = len(self._resources) for id_, resource in enumerate(self._resources): - pattern: str | None = None if isinstance(resource, PlainResource): pattern = re.escape(resource.get_info()["path"]) elif isinstance(resource, DynamicResource): From d8c5da1d5958784c1854f25968a669b852d7ab7d Mon Sep 17 00:00:00 2001 From: Andrew Svetlov Date: Sat, 16 Nov 2024 12:28:06 +0100 Subject: [PATCH 15/30] fix --- requirements/runtime-deps.in | 2 +- requirements/runtime-deps.txt | 2 +- setup.cfg | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/requirements/runtime-deps.in b/requirements/runtime-deps.in index be075fe615a..191f10c08fe 100644 --- a/requirements/runtime-deps.in +++ b/requirements/runtime-deps.in @@ -7,7 +7,7 @@ async-timeout >= 4.0, < 6.0 ; python_version < "3.11" Brotli; platform_python_implementation == 'CPython' brotlicffi; platform_python_implementation != 'CPython' frozenlist >= 1.1.1 -hyperscan >= 0.7.8; platform_python_implementation == 'CPython' and (sys_platform=="linux" or sys_platform=="darwin") +hyperscan >= 0.7.8; platform_python_implementation == 'CPython' and (sys_platform=="linux" or sys_platform=="darwin") and (python_version < "3.14") multidict >=4.5, < 7.0 propcache >= 0.2.0 yarl >= 1.17.0, < 2.0 diff --git a/requirements/runtime-deps.txt b/requirements/runtime-deps.txt index b2d642febc1..fe0d01c4774 100644 --- a/requirements/runtime-deps.txt +++ b/requirements/runtime-deps.txt @@ -20,7 +20,7 @@ frozenlist==1.5.0 # via # -r requirements/runtime-deps.in # aiosignal -hyperscan == 0.7.8 ; platform_python_implementation == "CPython" and (sys_platform=="linux" or sys_platform=="darwin") +hyperscan == 0.7.8 ; platform_python_implementation == "CPython" and (sys_platform=="linux" or sys_platform=="darwin") and (python_version < "3.14") # via -r requirements/runtime-deps.in idna==3.6 # via yarl diff --git a/setup.cfg b/setup.cfg index c02e3c65bd2..c0e288e23d0 100644 --- a/setup.cfg +++ b/setup.cfg @@ -70,7 +70,7 @@ speedups = aiodns >= 3.2.0; sys_platform=="linux" or sys_platform=="darwin" Brotli; platform_python_implementation == 'CPython' brotlicffi; platform_python_implementation != 'CPython' - hyperscan >= 0.7.8; platform_python_implementation == 'CPython' and (sys_platform=="linux" or sys_platform=="darwin") + hyperscan >= 0.7.8; platform_python_implementation == 'CPython' and (sys_platform=="linux" or sys_platform=="darwin") and (python_version < "3.14") [options.packages.find] exclude = From d27eea9ca93b62f2fe63277ca43b1403c476ce5b Mon Sep 17 00:00:00 2001 From: Andrew Svetlov Date: Sat, 16 Nov 2024 12:38:52 +0100 Subject: [PATCH 16/30] tune --- aiohttp/web_urldispatcher.py | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/aiohttp/web_urldispatcher.py b/aiohttp/web_urldispatcher.py index f1fd00bf892..fb461f96a9f 100644 --- a/aiohttp/web_urldispatcher.py +++ b/aiohttp/web_urldispatcher.py @@ -1076,12 +1076,20 @@ async def _resolve_fallback( async def resolve(self, request: Request) -> UrlMappingMatchInfo: allowed_methods: set[str] = set() - if ( - ret := await self._resolve_fast( - request, request.rel_url.path_safe, allowed_methods - ) - ) is not None: - return ret + if self._hyperdb is not None: + if ( + ret := await self._resolve_fast( + request, request.rel_url.path_safe, allowed_methods + ) + ) is not None: + return ret + else: + if ( + ret := await self._resolve_fallback( + request, request.rel_url.path_safe, allowed_methods + ) + ) is not None: + return ret # # We didn't find any candidates, so we'll try the matched sub-app From 03719e9ff6d8e6902c64b005125f4ce245179e46 Mon Sep 17 00:00:00 2001 From: Andrew Svetlov Date: Sat, 16 Nov 2024 13:08:08 +0100 Subject: [PATCH 17/30] tune --- aiohttp/web_urldispatcher.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/aiohttp/web_urldispatcher.py b/aiohttp/web_urldispatcher.py index fb461f96a9f..3074d8d1159 100644 --- a/aiohttp/web_urldispatcher.py +++ b/aiohttp/web_urldispatcher.py @@ -1024,7 +1024,8 @@ async def _resolve_fast( # N.B.: allowed_methods variable in modified in-place found: list[int] = [] - res_count = len(self._resources) + resources = self._resources + res_count = len(resources) self._hyperdb.scan( path.encode("utf8"), match_event_handler=self._on_match, context=found @@ -1039,7 +1040,7 @@ async def _resolve_fast( found.sort() for idx in found: - resource = self._resources[idx if idx < res_count else idx - res_count] + resource = resources[idx if idx < res_count else idx - res_count] match_dict, allowed = await resource.resolve(request) if match_dict is not None: return match_dict From 89bac7a0e21c0b78e7f44fb2c5c4a172d86b8576 Mon Sep 17 00:00:00 2001 From: Andrew Svetlov Date: Sat, 16 Nov 2024 13:43:26 +0100 Subject: [PATCH 18/30] inline --- aiohttp/web_urldispatcher.py | 111 +++++++++++++---------------------- 1 file changed, 41 insertions(+), 70 deletions(-) diff --git a/aiohttp/web_urldispatcher.py b/aiohttp/web_urldispatcher.py index 3074d8d1159..f445b49a1af 100644 --- a/aiohttp/web_urldispatcher.py +++ b/aiohttp/web_urldispatcher.py @@ -1016,81 +1016,52 @@ def _on_match( found.append(id_) return None - async def _resolve_fast( - self, request: Request, path: str, allowed_methods: set[str] - ) -> Optional[UrlMappingMatchInfo]: - if self._hyperdb is None: - return await self._resolve_fallback(request, path, allowed_methods) - - # N.B.: allowed_methods variable in modified in-place - found: list[int] = [] - resources = self._resources - res_count = len(resources) - - self._hyperdb.scan( - path.encode("utf8"), match_event_handler=self._on_match, context=found - ) - if not found: - return None - if len(found) > 1: - # Multiple matches are found, - # use the FIRST match. - # Match ids are basically indexes in self._resources - # with an offset for variable resources - found.sort() - - for idx in found: - resource = resources[idx if idx < res_count else idx - res_count] - match_dict, allowed = await resource.resolve(request) - if match_dict is not None: - return match_dict - else: - allowed_methods |= allowed - - return None - - async def _resolve_fallback( - self, request: Request, url_part: str, allowed_methods: set[str] - ) -> Optional[UrlMappingMatchInfo]: - # N.B.: allowed_methods variable in modified in-place - - resource_index = self._resource_index - - # Walk the url parts looking for candidates. We walk the url backwards - # to ensure the most explicit match is found first. If there are multiple - # candidates for a given url part because there are multiple resources - # registered for the same canonical path, we resolve them in a linear - # fashion to ensure registration order is respected. - while url_part: - for candidate in resource_index.get(url_part, ()): - match_dict, allowed = await candidate.resolve(request) - if match_dict is not None: - return match_dict - else: - allowed_methods |= allowed - if url_part == "/": - break - url_part = url_part.rpartition("/")[0] or "/" - - return None - async def resolve(self, request: Request) -> UrlMappingMatchInfo: allowed_methods: set[str] = set() if self._hyperdb is not None: - if ( - ret := await self._resolve_fast( - request, request.rel_url.path_safe, allowed_methods - ) - ) is not None: - return ret + path = request.rel_url.path_safe + found: list[int] = [] + resources = self._resources + res_count = len(resources) + + self._hyperdb.scan( + path.encode("utf8"), match_event_handler=self._on_match, context=found + ) + if found: + if len(found) > 1: + # Multiple matches are found, + # use the FIRST match. + # Match ids are basically indexes in self._resources + # with an offset for variable resources + found.sort() + + for idx in found: + resource = resources[idx if idx < res_count else idx - res_count] + match_dict, allowed = await resource.resolve(request) + if match_dict is not None: + return match_dict + else: + allowed_methods |= allowed else: - if ( - ret := await self._resolve_fallback( - request, request.rel_url.path_safe, allowed_methods - ) - ) is not None: - return ret + url_part = request.rel_url.path_safe + resource_index = self._resource_index + + # Walk the url parts looking for candidates. We walk the url backwards + # to ensure the most explicit match is found first. If there are multiple + # candidates for a given url part because there are multiple resources + # registered for the same canonical path, we resolve them in a linear + # fashion to ensure registration order is respected. + while url_part: + for candidate in resource_index.get(url_part, ()): + match_dict, allowed = await candidate.resolve(request) + if match_dict is not None: + return match_dict + else: + allowed_methods |= allowed + if url_part == "/": + break + url_part = url_part.rpartition("/")[0] or "/" # # We didn't find any candidates, so we'll try the matched sub-app From 4a299038c9bc4021c8dd904560681711b6882d09 Mon Sep 17 00:00:00 2001 From: Andrew Svetlov Date: Sat, 16 Nov 2024 16:24:13 +0100 Subject: [PATCH 19/30] Use dict lookup for plain resources --- aiohttp/web_urldispatcher.py | 29 +++++++++++++++++------------ 1 file changed, 17 insertions(+), 12 deletions(-) diff --git a/aiohttp/web_urldispatcher.py b/aiohttp/web_urldispatcher.py index f445b49a1af..bc082b5412f 100644 --- a/aiohttp/web_urldispatcher.py +++ b/aiohttp/web_urldispatcher.py @@ -1009,6 +1009,7 @@ def __init__(self) -> None: self._resource_index: dict[str, list[AbstractResource]] = {} self._matched_sub_app_resources: List[MatchedSubAppResource] = [] self._hyperdb: Optional[hyperscan.Database] = None # type: ignore[no-any-unimported] + self._plain_resources: dict[str, AbstractResource] = {} def _on_match( self, id_: int, from_: int, to: int, flags: int, found: list[int] @@ -1018,9 +1019,16 @@ def _on_match( async def resolve(self, request: Request) -> UrlMappingMatchInfo: allowed_methods: set[str] = set() + path = request.rel_url.path_safe + + if (resource := self._plain_resources.get(path)) is not None: + match_dict, allowed = await resource.resolve(request) + if match_dict is not None: + return match_dict + else: + allowed_methods |= allowed if self._hyperdb is not None: - path = request.rel_url.path_safe found: list[int] = [] resources = self._resources res_count = len(resources) @@ -1044,7 +1052,7 @@ async def resolve(self, request: Request) -> UrlMappingMatchInfo: else: allowed_methods |= allowed else: - url_part = request.rel_url.path_safe + url_part = path resource_index = self._resource_index # Walk the url parts looking for candidates. We walk the url backwards @@ -1286,15 +1294,14 @@ def freeze(self) -> None: def _rebuild(self) -> None: self._hyperdb = None - if not HAS_HYPERSCAN: - return - + self._plain_resources.clear() patterns: list[bytes] = [] ids: list[int] = [] res_count = len(self._resources) for id_, resource in enumerate(self._resources): if isinstance(resource, PlainResource): - pattern = re.escape(resource.get_info()["path"]) + self._plain_resources[resource.get_info()["path"]] = resource + continue elif isinstance(resource, DynamicResource): pattern = resource.get_info()["pattern"].pattern # put variable resources at the end of search order list @@ -1305,16 +1312,14 @@ def _rebuild(self) -> None: continue pattern = resource.get_info()["prefix"] + "(/.*)?" else: - web_logger.warning( - "Unsupported resource [%s] %s, switching to fallback url resolver", - type(resource), - resource.canonical, - ) - return + raise RuntimeError(f"Unsupported resource type {type(resource)}") patterns.append(f"^{pattern}$".encode()) ids.append(id_) + if not HAS_HYPERSCAN: + return + count = len(patterns) self._hyperdb = hyperscan.Database() try: From 51c26c07d85f7c025a786f0c090d19817733cd20 Mon Sep 17 00:00:00 2001 From: Andrew Svetlov Date: Sat, 16 Nov 2024 18:03:43 +0100 Subject: [PATCH 20/30] Add separate dict for prefix resources --- aiohttp/web_urldispatcher.py | 24 +++++++++++++++--------- 1 file changed, 15 insertions(+), 9 deletions(-) diff --git a/aiohttp/web_urldispatcher.py b/aiohttp/web_urldispatcher.py index bc082b5412f..3e28ddc340c 100644 --- a/aiohttp/web_urldispatcher.py +++ b/aiohttp/web_urldispatcher.py @@ -1009,7 +1009,8 @@ def __init__(self) -> None: self._resource_index: dict[str, list[AbstractResource]] = {} self._matched_sub_app_resources: List[MatchedSubAppResource] = [] self._hyperdb: Optional[hyperscan.Database] = None # type: ignore[no-any-unimported] - self._plain_resources: dict[str, AbstractResource] = {} + self._plain_resources: dict[str, PlainResource] = {} + self._prefix_resources: dict[str, PrefixResource] = {} def _on_match( self, id_: int, from_: int, to: int, flags: int, found: list[int] @@ -1021,8 +1022,15 @@ async def resolve(self, request: Request) -> UrlMappingMatchInfo: allowed_methods: set[str] = set() path = request.rel_url.path_safe - if (resource := self._plain_resources.get(path)) is not None: - match_dict, allowed = await resource.resolve(request) + if (plain_resource := self._plain_resources.get(path)) is not None: + match_dict, allowed = await plain_resource.resolve(request) + if match_dict is not None: + return match_dict + else: + allowed_methods |= allowed + + for prefix, prefix_resource in self._prefix_resources.items(): + match_dict, allowed = await prefix_resource.resolve(request) if match_dict is not None: return match_dict else: @@ -1031,7 +1039,6 @@ async def resolve(self, request: Request) -> UrlMappingMatchInfo: if self._hyperdb is not None: found: list[int] = [] resources = self._resources - res_count = len(resources) self._hyperdb.scan( path.encode("utf8"), match_event_handler=self._on_match, context=found @@ -1045,7 +1052,7 @@ async def resolve(self, request: Request) -> UrlMappingMatchInfo: found.sort() for idx in found: - resource = resources[idx if idx < res_count else idx - res_count] + resource = resources[idx] match_dict, allowed = await resource.resolve(request) if match_dict is not None: return match_dict @@ -1295,22 +1302,21 @@ def freeze(self) -> None: def _rebuild(self) -> None: self._hyperdb = None self._plain_resources.clear() + self._prefix_resources.clear() patterns: list[bytes] = [] ids: list[int] = [] - res_count = len(self._resources) for id_, resource in enumerate(self._resources): if isinstance(resource, PlainResource): self._plain_resources[resource.get_info()["path"]] = resource continue elif isinstance(resource, DynamicResource): pattern = resource.get_info()["pattern"].pattern - # put variable resources at the end of search order list - id_ += res_count elif isinstance(resource, PrefixResource): if isinstance(resource, MatchedSubAppResource): # wildcard resources doesn't fit hyperscan table continue - pattern = resource.get_info()["prefix"] + "(/.*)?" + self._prefix_resources[resource.get_info()["prefix"]] = resource + continue else: raise RuntimeError(f"Unsupported resource type {type(resource)}") From e6e9e489942487881a9165ae0780afc2524655f6 Mon Sep 17 00:00:00 2001 From: Andrew Svetlov Date: Sat, 16 Nov 2024 18:08:35 +0100 Subject: [PATCH 21/30] Tune --- aiohttp/web_urldispatcher.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/aiohttp/web_urldispatcher.py b/aiohttp/web_urldispatcher.py index 3e28ddc340c..4a7be51586c 100644 --- a/aiohttp/web_urldispatcher.py +++ b/aiohttp/web_urldispatcher.py @@ -1010,7 +1010,7 @@ def __init__(self) -> None: self._matched_sub_app_resources: List[MatchedSubAppResource] = [] self._hyperdb: Optional[hyperscan.Database] = None # type: ignore[no-any-unimported] self._plain_resources: dict[str, PlainResource] = {} - self._prefix_resources: dict[str, PrefixResource] = {} + self._prefix_resources: list[tuple[str, PrefixResource]] = [] def _on_match( self, id_: int, from_: int, to: int, flags: int, found: list[int] @@ -1029,7 +1029,7 @@ async def resolve(self, request: Request) -> UrlMappingMatchInfo: else: allowed_methods |= allowed - for prefix, prefix_resource in self._prefix_resources.items(): + for prefix, prefix_resource in self._prefix_resources: match_dict, allowed = await prefix_resource.resolve(request) if match_dict is not None: return match_dict @@ -1302,7 +1302,7 @@ def freeze(self) -> None: def _rebuild(self) -> None: self._hyperdb = None self._plain_resources.clear() - self._prefix_resources.clear() + del self._prefix_resources[:] patterns: list[bytes] = [] ids: list[int] = [] for id_, resource in enumerate(self._resources): @@ -1315,7 +1315,7 @@ def _rebuild(self) -> None: if isinstance(resource, MatchedSubAppResource): # wildcard resources doesn't fit hyperscan table continue - self._prefix_resources[resource.get_info()["prefix"]] = resource + self._prefix_resources.append((resource.get_info()["prefix"], resource)) continue else: raise RuntimeError(f"Unsupported resource type {type(resource)}") From f4526758abe7af6bfa835d409dc861fe0f5f2f38 Mon Sep 17 00:00:00 2001 From: Andrew Svetlov Date: Sun, 17 Nov 2024 08:30:51 +0100 Subject: [PATCH 22/30] Refactor prefixed resources routing --- aiohttp/web_urldispatcher.py | 44 ++++++++++++++++++++++++++++-------- 1 file changed, 35 insertions(+), 9 deletions(-) diff --git a/aiohttp/web_urldispatcher.py b/aiohttp/web_urldispatcher.py index 4a7be51586c..bb31e23a7be 100644 --- a/aiohttp/web_urldispatcher.py +++ b/aiohttp/web_urldispatcher.py @@ -1,6 +1,7 @@ import abc import asyncio import base64 +import dataclasses import functools import hashlib import html @@ -998,6 +999,12 @@ def __contains__(self, route: object) -> bool: return route in self._routes +@dataclasses.dataclass +class _PrefixSubtree: + common_prefix: str + resources: list[PrefixResource] + + class UrlDispatcher(AbstractRouter, Mapping[str, AbstractResource]): NAME_SPLIT_RE = re.compile(r"[.:-]") HTTP_NOT_FOUND = HTTPNotFound() @@ -1010,7 +1017,7 @@ def __init__(self) -> None: self._matched_sub_app_resources: List[MatchedSubAppResource] = [] self._hyperdb: Optional[hyperscan.Database] = None # type: ignore[no-any-unimported] self._plain_resources: dict[str, PlainResource] = {} - self._prefix_resources: list[tuple[str, PrefixResource]] = [] + self._prefix_resources: dict[str, _PrefixSubtree] = {} def _on_match( self, id_: int, from_: int, to: int, flags: int, found: list[int] @@ -1029,12 +1036,17 @@ async def resolve(self, request: Request) -> UrlMappingMatchInfo: else: allowed_methods |= allowed - for prefix, prefix_resource in self._prefix_resources: - match_dict, allowed = await prefix_resource.resolve(request) - if match_dict is not None: - return match_dict - else: - allowed_methods |= allowed + parts = path.split("/") + # path.startswith("/"), thus parts[0] == "". + # parts[1] is the first prefix segment + if (subtree := self._prefix_resources.get(parts[1])) is not None: + if path.startswith(subtree.common_prefix): + for prefix_resource in subtree.resources: + match_dict, allowed = await prefix_resource.resolve(request) + if match_dict is not None: + return match_dict + else: + allowed_methods |= allowed if self._hyperdb is not None: found: list[int] = [] @@ -1302,7 +1314,7 @@ def freeze(self) -> None: def _rebuild(self) -> None: self._hyperdb = None self._plain_resources.clear() - del self._prefix_resources[:] + self._prefix_resources.clear() patterns: list[bytes] = [] ids: list[int] = [] for id_, resource in enumerate(self._resources): @@ -1315,7 +1327,21 @@ def _rebuild(self) -> None: if isinstance(resource, MatchedSubAppResource): # wildcard resources doesn't fit hyperscan table continue - self._prefix_resources.append((resource.get_info()["prefix"], resource)) + prefix = resource.get_info()["prefix"] + parts = prefix.split("/") + segment = parts[0] + subtree = self._prefix_resources.get(segment) + if subtree is None: + subtree = _PrefixSubtree(prefix, [resource]) + self._prefix_resources[segment] = subtree + else: + subtree_parts = subtree.common_prefix.split("/") + segments = [] + for lft, rgt in zip(parts, subtree_parts): + if lft == rgt: + segments.append(lft) + subtree.common_prefix = "/".join(segments) + subtree.resources.append(resource) continue else: raise RuntimeError(f"Unsupported resource type {type(resource)}") From f56a12a01b278433751871dc21e0a0253334eab9 Mon Sep 17 00:00:00 2001 From: Andrew Svetlov Date: Sun, 17 Nov 2024 08:33:02 +0100 Subject: [PATCH 23/30] address review --- aiohttp/web_urldispatcher.py | 29 ++++++++++++++--------------- 1 file changed, 14 insertions(+), 15 deletions(-) diff --git a/aiohttp/web_urldispatcher.py b/aiohttp/web_urldispatcher.py index bb31e23a7be..d854e6498f1 100644 --- a/aiohttp/web_urldispatcher.py +++ b/aiohttp/web_urldispatcher.py @@ -1055,21 +1055,20 @@ async def resolve(self, request: Request) -> UrlMappingMatchInfo: self._hyperdb.scan( path.encode("utf8"), match_event_handler=self._on_match, context=found ) - if found: - if len(found) > 1: - # Multiple matches are found, - # use the FIRST match. - # Match ids are basically indexes in self._resources - # with an offset for variable resources - found.sort() - - for idx in found: - resource = resources[idx] - match_dict, allowed = await resource.resolve(request) - if match_dict is not None: - return match_dict - else: - allowed_methods |= allowed + if len(found) > 1: + # Multiple matches are found, + # use the FIRST match. + # Match ids are basically indexes in self._resources + # with an offset for variable resources + found.sort() + + for idx in found: + resource = resources[idx] + match_dict, allowed = await resource.resolve(request) + if match_dict is not None: + return match_dict + else: + allowed_methods |= allowed else: url_part = path resource_index = self._resource_index From e14bad9f3708a8fd5d4c2d650283dade68599eb3 Mon Sep 17 00:00:00 2001 From: Andrew Svetlov Date: Sun, 17 Nov 2024 09:08:57 +0100 Subject: [PATCH 24/30] tune --- aiohttp/web_urldispatcher.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/aiohttp/web_urldispatcher.py b/aiohttp/web_urldispatcher.py index d854e6498f1..20e6e55f92e 100644 --- a/aiohttp/web_urldispatcher.py +++ b/aiohttp/web_urldispatcher.py @@ -1040,7 +1040,7 @@ async def resolve(self, request: Request) -> UrlMappingMatchInfo: # path.startswith("/"), thus parts[0] == "". # parts[1] is the first prefix segment if (subtree := self._prefix_resources.get(parts[1])) is not None: - if path.startswith(subtree.common_prefix): + if len(subtree.resources) == 1 or path.startswith(subtree.common_prefix): for prefix_resource in subtree.resources: match_dict, allowed = await prefix_resource.resolve(request) if match_dict is not None: From 35631730ba8e9ee2dd52644df8cc39cbf3f0b85b Mon Sep 17 00:00:00 2001 From: Andrew Svetlov Date: Sun, 17 Nov 2024 11:35:46 +0100 Subject: [PATCH 25/30] comment --- aiohttp/web_urldispatcher.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/aiohttp/web_urldispatcher.py b/aiohttp/web_urldispatcher.py index 20e6e55f92e..fce819faf9a 100644 --- a/aiohttp/web_urldispatcher.py +++ b/aiohttp/web_urldispatcher.py @@ -1058,8 +1058,7 @@ async def resolve(self, request: Request) -> UrlMappingMatchInfo: if len(found) > 1: # Multiple matches are found, # use the FIRST match. - # Match ids are basically indexes in self._resources - # with an offset for variable resources + # Match ids are basically indexes in self._resources. found.sort() for idx in found: From 00e6331e34fb9c4fd37596b951249a2e47553ae0 Mon Sep 17 00:00:00 2001 From: Andrew Svetlov Date: Mon, 18 Nov 2024 10:52:50 +0100 Subject: [PATCH 26/30] Fix prefix resource lookup --- aiohttp/web_urldispatcher.py | 183 +++++++++++++++++------------------ 1 file changed, 90 insertions(+), 93 deletions(-) diff --git a/aiohttp/web_urldispatcher.py b/aiohttp/web_urldispatcher.py index fce819faf9a..f3dc347de99 100644 --- a/aiohttp/web_urldispatcher.py +++ b/aiohttp/web_urldispatcher.py @@ -1,7 +1,6 @@ import abc import asyncio import base64 -import dataclasses import functools import hashlib import html @@ -999,12 +998,6 @@ def __contains__(self, route: object) -> bool: return route in self._routes -@dataclasses.dataclass -class _PrefixSubtree: - common_prefix: str - resources: list[PrefixResource] - - class UrlDispatcher(AbstractRouter, Mapping[str, AbstractResource]): NAME_SPLIT_RE = re.compile(r"[.:-]") HTTP_NOT_FOUND = HTTPNotFound() @@ -1017,7 +1010,8 @@ def __init__(self) -> None: self._matched_sub_app_resources: List[MatchedSubAppResource] = [] self._hyperdb: Optional[hyperscan.Database] = None # type: ignore[no-any-unimported] self._plain_resources: dict[str, PlainResource] = {} - self._prefix_resources: dict[str, _PrefixSubtree] = {} + self._prefix_resources: dict[str, list[PrefixResource]] = {} + self._has_variable_resources = True def _on_match( self, id_: int, from_: int, to: int, flags: int, found: list[int] @@ -1029,6 +1023,7 @@ async def resolve(self, request: Request) -> UrlMappingMatchInfo: allowed_methods: set[str] = set() path = request.rel_url.path_safe + # plain resource lookup if (plain_resource := self._plain_resources.get(path)) is not None: match_dict, allowed = await plain_resource.resolve(request) if match_dict is not None: @@ -1036,58 +1031,71 @@ async def resolve(self, request: Request) -> UrlMappingMatchInfo: else: allowed_methods |= allowed - parts = path.split("/") - # path.startswith("/"), thus parts[0] == "". - # parts[1] is the first prefix segment - if (subtree := self._prefix_resources.get(parts[1])) is not None: - if len(subtree.resources) == 1 or path.startswith(subtree.common_prefix): - for prefix_resource in subtree.resources: - match_dict, allowed = await prefix_resource.resolve(request) - if match_dict is not None: - return match_dict - else: - allowed_methods |= allowed - - if self._hyperdb is not None: - found: list[int] = [] - resources = self._resources - - self._hyperdb.scan( - path.encode("utf8"), match_event_handler=self._on_match, context=found - ) - if len(found) > 1: - # Multiple matches are found, - # use the FIRST match. - # Match ids are basically indexes in self._resources. - found.sort() - - for idx in found: - resource = resources[idx] - match_dict, allowed = await resource.resolve(request) + # prefix resource lookup + url_part = path + prefix_resources = self._prefix_resources + + # Walk the url parts looking for candidates. We walk the url backwards + # to ensure the most explicit match is found first. If there are multiple + # candidates for a given url part because there are multiple resources + # registered for the same canonical path, we resolve them in a linear + # fashion to ensure registration order is respected. + while url_part: + for prefix_resource in prefix_resources.get(url_part, ()): + match_dict, allowed = await prefix_resource.resolve(request) if match_dict is not None: return match_dict else: allowed_methods |= allowed - else: - url_part = path - resource_index = self._resource_index - - # Walk the url parts looking for candidates. We walk the url backwards - # to ensure the most explicit match is found first. If there are multiple - # candidates for a given url part because there are multiple resources - # registered for the same canonical path, we resolve them in a linear - # fashion to ensure registration order is respected. - while url_part: - for candidate in resource_index.get(url_part, ()): - match_dict, allowed = await candidate.resolve(request) + if url_part == "/": + break + url_part = url_part.rpartition("/")[0] or "/" + + # variable resource lookup + if self._has_variable_resources: + if self._hyperdb is not None: + found: list[int] = [] + resources = self._resources + + self._hyperdb.scan( + path.encode("utf8"), + match_event_handler=self._on_match, + context=found, + ) + if len(found) > 1: + # Multiple matches are found, + # use the FIRST match. + # Match ids are basically indexes in self._resources. + found.sort() + + for idx in found: + resource = resources[idx] + match_dict, allowed = await resource.resolve(request) if match_dict is not None: return match_dict else: allowed_methods |= allowed - if url_part == "/": - break - url_part = url_part.rpartition("/")[0] or "/" - + else: + url_part = path + resource_index = self._resource_index + + # Walk the url parts looking for candidates. We walk the url backwards + # to ensure the most explicit match is found first. If there are multiple + # candidates for a given url part because there are multiple resources + # registered for the same canonical path, we resolve them in a linear + # fashion to ensure registration order is respected. + while url_part: + for candidate in resource_index.get(url_part, ()): + match_dict, allowed = await candidate.resolve(request) + if match_dict is not None: + return match_dict + else: + allowed_methods |= allowed + if url_part == "/": + break + url_part = url_part.rpartition("/")[0] or "/" + + # domain resource lookup # # We didn't find any candidates, so we'll try the matched sub-app # resources which we have to walk in a linear fashion because they @@ -1318,58 +1326,47 @@ def _rebuild(self) -> None: for id_, resource in enumerate(self._resources): if isinstance(resource, PlainResource): self._plain_resources[resource.get_info()["path"]] = resource - continue elif isinstance(resource, DynamicResource): pattern = resource.get_info()["pattern"].pattern + patterns.append(f"^{pattern}$".encode()) + ids.append(id_) elif isinstance(resource, PrefixResource): if isinstance(resource, MatchedSubAppResource): # wildcard resources doesn't fit hyperscan table continue prefix = resource.get_info()["prefix"] - parts = prefix.split("/") - segment = parts[0] - subtree = self._prefix_resources.get(segment) - if subtree is None: - subtree = _PrefixSubtree(prefix, [resource]) - self._prefix_resources[segment] = subtree - else: - subtree_parts = subtree.common_prefix.split("/") - segments = [] - for lft, rgt in zip(parts, subtree_parts): - if lft == rgt: - segments.append(lft) - subtree.common_prefix = "/".join(segments) - subtree.resources.append(resource) - continue + # There may be multiple resources for a prefix + # so we keep them in a list to ensure that registration + # order is respected. + self._prefix_resources.setdefault(prefix.rstrip("/") or "/", []).append( + resource + ) else: raise RuntimeError(f"Unsupported resource type {type(resource)}") - patterns.append(f"^{pattern}$".encode()) - ids.append(id_) - - if not HAS_HYPERSCAN: - return - count = len(patterns) - self._hyperdb = hyperscan.Database() - try: - self._hyperdb.compile( - expressions=patterns, - ids=ids, - elements=count, - flags=[ - hyperscan.HS_FLAG_UTF8 - | hyperscan.HS_FLAG_UCP - | hyperscan.HS_FLAG_SINGLEMATCH - ] - * count, - ) - except hyperscan.error as exc: - web_logger.warning( - "Cannot compile hyperscan database: %s, switching to fallback url resolver", - repr(exc), - ) - self._hyperdb = None + self._has_variable_resources = count > 0 + if self._has_variable_resources: + if HAS_HYPERSCAN: + self._hyperdb = hyperscan.Database() + try: + self._hyperdb.compile( + expressions=patterns, + ids=ids, + elements=count, + flags=[ + hyperscan.HS_FLAG_UTF8 + | hyperscan.HS_FLAG_UCP + | hyperscan.HS_FLAG_SINGLEMATCH + ] + * count, + ) + except hyperscan.error as exc: + web_logger.warning( + "Cannot compile hyperscan database: %s, switching to fallback url resolver", + repr(exc), + ) + self._hyperdb = None def add_routes(self, routes: Iterable[AbstractRouteDef]) -> List[AbstractRoute]: """Append routes to route table. From 55a0c9df906b3dc5e9e6af3cc99364b5a7b7a8c1 Mon Sep 17 00:00:00 2001 From: Andrew Svetlov Date: Mon, 18 Nov 2024 13:54:47 +0100 Subject: [PATCH 27/30] Alternative strategy for prefix resource matching --- aiohttp/web_urldispatcher.py | 24 ++++++++++++++++-------- 1 file changed, 16 insertions(+), 8 deletions(-) diff --git a/aiohttp/web_urldispatcher.py b/aiohttp/web_urldispatcher.py index f3dc347de99..c9b7591c6ee 100644 --- a/aiohttp/web_urldispatcher.py +++ b/aiohttp/web_urldispatcher.py @@ -1011,6 +1011,7 @@ def __init__(self) -> None: self._hyperdb: Optional[hyperscan.Database] = None # type: ignore[no-any-unimported] self._plain_resources: dict[str, PlainResource] = {} self._prefix_resources: dict[str, list[PrefixResource]] = {} + self._max_prefix_cardinality = 0 self._has_variable_resources = True def _on_match( @@ -1032,24 +1033,27 @@ async def resolve(self, request: Request) -> UrlMappingMatchInfo: allowed_methods |= allowed # prefix resource lookup - url_part = path + parts = tuple( + path[1:].split("/", self._max_prefix_cardinality)[ + : self._max_prefix_cardinality + ] + ) prefix_resources = self._prefix_resources - # Walk the url parts looking for candidates. We walk the url backwards # to ensure the most explicit match is found first. If there are multiple # candidates for a given url part because there are multiple resources # registered for the same canonical path, we resolve them in a linear # fashion to ensure registration order is respected. - while url_part: - for prefix_resource in prefix_resources.get(url_part, ()): + while True: + for prefix_resource in prefix_resources.get(parts, ()): match_dict, allowed = await prefix_resource.resolve(request) if match_dict is not None: return match_dict else: allowed_methods |= allowed - if url_part == "/": + if len(parts) <= 1: break - url_part = url_part.rpartition("/")[0] or "/" + parts = parts[:-1] # variable resource lookup if self._has_variable_resources: @@ -1321,6 +1325,7 @@ def _rebuild(self) -> None: self._hyperdb = None self._plain_resources.clear() self._prefix_resources.clear() + self._max_prefix_cardinality = 0 patterns: list[bytes] = [] ids: list[int] = [] for id_, resource in enumerate(self._resources): @@ -1338,9 +1343,12 @@ def _rebuild(self) -> None: # There may be multiple resources for a prefix # so we keep them in a list to ensure that registration # order is respected. - self._prefix_resources.setdefault(prefix.rstrip("/") or "/", []).append( - resource + parts = tuple(prefix.split("/")[1:]) + self._prefix_resources.setdefault(parts, []).append(resource) + self._max_prefix_cardinality = max( + self._max_prefix_cardinality, len(parts) ) + # breakpoint() else: raise RuntimeError(f"Unsupported resource type {type(resource)}") From a8bed537634a8e39aa9f61303a2ea1aa94d588ce Mon Sep 17 00:00:00 2001 From: Andrew Svetlov Date: Mon, 18 Nov 2024 13:56:25 +0100 Subject: [PATCH 28/30] relax deps --- requirements/runtime-deps.in | 2 +- requirements/runtime-deps.txt | 2 +- setup.cfg | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/requirements/runtime-deps.in b/requirements/runtime-deps.in index 191f10c08fe..be075fe615a 100644 --- a/requirements/runtime-deps.in +++ b/requirements/runtime-deps.in @@ -7,7 +7,7 @@ async-timeout >= 4.0, < 6.0 ; python_version < "3.11" Brotli; platform_python_implementation == 'CPython' brotlicffi; platform_python_implementation != 'CPython' frozenlist >= 1.1.1 -hyperscan >= 0.7.8; platform_python_implementation == 'CPython' and (sys_platform=="linux" or sys_platform=="darwin") and (python_version < "3.14") +hyperscan >= 0.7.8; platform_python_implementation == 'CPython' and (sys_platform=="linux" or sys_platform=="darwin") multidict >=4.5, < 7.0 propcache >= 0.2.0 yarl >= 1.17.0, < 2.0 diff --git a/requirements/runtime-deps.txt b/requirements/runtime-deps.txt index fe0d01c4774..b2d642febc1 100644 --- a/requirements/runtime-deps.txt +++ b/requirements/runtime-deps.txt @@ -20,7 +20,7 @@ frozenlist==1.5.0 # via # -r requirements/runtime-deps.in # aiosignal -hyperscan == 0.7.8 ; platform_python_implementation == "CPython" and (sys_platform=="linux" or sys_platform=="darwin") and (python_version < "3.14") +hyperscan == 0.7.8 ; platform_python_implementation == "CPython" and (sys_platform=="linux" or sys_platform=="darwin") # via -r requirements/runtime-deps.in idna==3.6 # via yarl diff --git a/setup.cfg b/setup.cfg index 76c764ec360..e9dd87ce2df 100644 --- a/setup.cfg +++ b/setup.cfg @@ -70,7 +70,7 @@ speedups = aiodns >= 3.2.0; sys_platform=="linux" or sys_platform=="darwin" Brotli; platform_python_implementation == 'CPython' brotlicffi; platform_python_implementation != 'CPython' - hyperscan >= 0.7.8; platform_python_implementation == 'CPython' and (sys_platform=="linux" or sys_platform=="darwin") and (python_version < "3.14") + hyperscan >= 0.7.8; platform_python_implementation == 'CPython' and (sys_platform=="linux" or sys_platform=="darwin") [options.packages.find] exclude = From 2ac1eca3b72079cb024997abcbe0c6e21ead9ff0 Mon Sep 17 00:00:00 2001 From: Andrew Svetlov Date: Mon, 18 Nov 2024 14:06:12 +0100 Subject: [PATCH 29/30] fix --- aiohttp/web_urldispatcher.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/aiohttp/web_urldispatcher.py b/aiohttp/web_urldispatcher.py index c9b7591c6ee..bcd4b2cd2a2 100644 --- a/aiohttp/web_urldispatcher.py +++ b/aiohttp/web_urldispatcher.py @@ -1045,12 +1045,13 @@ async def resolve(self, request: Request) -> UrlMappingMatchInfo: # registered for the same canonical path, we resolve them in a linear # fashion to ensure registration order is respected. while True: - for prefix_resource in prefix_resources.get(parts, ()): - match_dict, allowed = await prefix_resource.resolve(request) - if match_dict is not None: - return match_dict - else: - allowed_methods |= allowed + if (found_prefix_resources := prefix_resources.get(parts)) is not None: + for prefix_resource in found_prefix_resources: + match_dict, allowed = await prefix_resource.resolve(request) + if match_dict is not None: + return match_dict + else: + allowed_methods |= allowed if len(parts) <= 1: break parts = parts[:-1] From a718f6e9d99dff126948c8e08655535469668441 Mon Sep 17 00:00:00 2001 From: Andrew Svetlov Date: Mon, 18 Nov 2024 14:19:40 +0100 Subject: [PATCH 30/30] fix --- aiohttp/web_urldispatcher.py | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/aiohttp/web_urldispatcher.py b/aiohttp/web_urldispatcher.py index bcd4b2cd2a2..5f1b38d7dc4 100644 --- a/aiohttp/web_urldispatcher.py +++ b/aiohttp/web_urldispatcher.py @@ -1010,7 +1010,7 @@ def __init__(self) -> None: self._matched_sub_app_resources: List[MatchedSubAppResource] = [] self._hyperdb: Optional[hyperscan.Database] = None # type: ignore[no-any-unimported] self._plain_resources: dict[str, PlainResource] = {} - self._prefix_resources: dict[str, list[PrefixResource]] = {} + self._prefix_resources: dict[tuple[str, ...], list[PrefixResource]] = {} self._max_prefix_cardinality = 0 self._has_variable_resources = True @@ -1045,13 +1045,12 @@ async def resolve(self, request: Request) -> UrlMappingMatchInfo: # registered for the same canonical path, we resolve them in a linear # fashion to ensure registration order is respected. while True: - if (found_prefix_resources := prefix_resources.get(parts)) is not None: - for prefix_resource in found_prefix_resources: - match_dict, allowed = await prefix_resource.resolve(request) - if match_dict is not None: - return match_dict - else: - allowed_methods |= allowed + for prefix_resource in prefix_resources.get(parts, ()): + match_dict, allowed = await prefix_resource.resolve(request) + if match_dict is not None: + return match_dict + else: + allowed_methods |= allowed if len(parts) <= 1: break parts = parts[:-1]