diff --git a/CHANGES/7829.misc b/CHANGES/7829.misc new file mode 100644 index 00000000000..9eb060f4713 --- /dev/null +++ b/CHANGES/7829.misc @@ -0,0 +1,3 @@ +Improved URL handler resolution time by indexing resources in the UrlDispatcher. +For applications with a large number of handlers, this should increase performance significantly. +-- by :user:`bdraco` diff --git a/aiohttp/web_urldispatcher.py b/aiohttp/web_urldispatcher.py index 5c8032364c2..36b8ea8a550 100644 --- a/aiohttp/web_urldispatcher.py +++ b/aiohttp/web_urldispatcher.py @@ -716,13 +716,20 @@ class PrefixedSubAppResource(PrefixResource): def __init__(self, prefix: str, app: "Application") -> None: super().__init__(prefix) self._app = app - for resource in app.router.resources(): - resource.add_prefix(prefix) + self._add_prefix_to_resources(prefix) def add_prefix(self, prefix: str) -> None: super().add_prefix(prefix) - for resource in self._app.router.resources(): + self._add_prefix_to_resources(prefix) + + def _add_prefix_to_resources(self, prefix: str) -> None: + router = self._app.router + for resource in router.resources(): + # Since the canonical path of a resource is about + # to change, we need to unindex it and then reindex + router.unindex_resource(resource) resource.add_prefix(prefix) + router.index_resource(resource) def url_for(self, *args: str, **kwargs: str) -> URL: raise RuntimeError(".url_for() is not supported " "by sub-application root") @@ -731,11 +738,6 @@ def get_info(self) -> _InfoDict: return {"app": self._app, "prefix": self._prefix} async def resolve(self, request: Request) -> _Resolve: - if ( - not request.url.raw_path.startswith(self._prefix2) - and request.url.raw_path != self._prefix - ): - return None, set() match_info = await self._app.router.resolve(request) match_info.add_app(self._app) if isinstance(match_info.http_exception, HTTPMethodNotAllowed): @@ -974,17 +976,45 @@ def __contains__(self, route: object) -> bool: class UrlDispatcher(AbstractRouter, Mapping[str, AbstractResource]): NAME_SPLIT_RE = re.compile(r"[.:-]") + HTTP_NOT_FOUND = HTTPNotFound() def __init__(self) -> None: super().__init__() self._resources: List[AbstractResource] = [] self._named_resources: Dict[str, AbstractResource] = {} + self._resource_index: dict[str, list[AbstractResource]] = {} + self._matched_sub_app_resources: List[MatchedSubAppResource] = [] async def resolve(self, request: Request) -> UrlMappingMatchInfo: - method = request.method + resource_index = self._resource_index allowed_methods: Set[str] = set() - for resource in self._resources: + # Walk the url parts looking for candidates. We walk the url backwards + # to ensure the most explicit match is found first. If there are multiple + # candidates for a given url part because there are multiple resources + # registered for the same canonical path, we resolve them in a linear + # fashion to ensure registration order is respected. + url_part = request.rel_url.raw_path + while url_part: + for candidate in resource_index.get(url_part, ()): + match_dict, allowed = await candidate.resolve(request) + if match_dict is not None: + return match_dict + else: + allowed_methods |= allowed + if url_part == "/": + break + url_part = url_part.rpartition("/")[0] or "/" + + # + # We didn't find any candidates, so we'll try the matched sub-app + # resources which we have to walk in a linear fashion because they + # have regex/wildcard match rules and we cannot index them. + # + # For most cases we do not expect there to be many of these since + # currently they are only added by `add_domain` + # + for resource in self._matched_sub_app_resources: match_dict, allowed = await resource.resolve(request) if match_dict is not None: return match_dict @@ -992,9 +1022,9 @@ async def resolve(self, request: Request) -> UrlMappingMatchInfo: allowed_methods |= allowed if allowed_methods: - return MatchInfoError(HTTPMethodNotAllowed(method, allowed_methods)) - else: - return MatchInfoError(HTTPNotFound()) + return MatchInfoError(HTTPMethodNotAllowed(request.method, allowed_methods)) + + return MatchInfoError(self.HTTP_NOT_FOUND) def __iter__(self) -> Iterator[str]: return iter(self._named_resources) @@ -1050,6 +1080,30 @@ def register_resource(self, resource: AbstractResource) -> None: self._named_resources[name] = resource self._resources.append(resource) + if isinstance(resource, MatchedSubAppResource): + # We cannot index match sub-app resources because they have match rules + self._matched_sub_app_resources.append(resource) + else: + self.index_resource(resource) + + def _get_resource_index_key(self, resource: AbstractResource) -> str: + """Return a key to index the resource in the resource index.""" + # strip at the first { to allow for variables + return resource.canonical.partition("{")[0].rstrip("/") or "/" + + def index_resource(self, resource: AbstractResource) -> None: + """Add a resource to the resource index.""" + resource_key = self._get_resource_index_key(resource) + # There may be multiple resources for a canonical path + # so we keep them in a list to ensure that registration + # order is respected. + self._resource_index.setdefault(resource_key, []).append(resource) + + def unindex_resource(self, resource: AbstractResource) -> None: + """Remove a resource from the resource index.""" + resource_key = self._get_resource_index_key(resource) + self._resource_index[resource_key].remove(resource) + def add_resource(self, path: str, *, name: Optional[str] = None) -> Resource: if path and not path.startswith("/"): raise ValueError("path should be started with / or be empty") diff --git a/docs/web_reference.rst b/docs/web_reference.rst index 6652edb8490..f84b06d8045 100644 --- a/docs/web_reference.rst +++ b/docs/web_reference.rst @@ -1865,20 +1865,38 @@ unique *name* and at least one :term:`route`. :term:`web-handler` lookup is performed in the following way: -1. Router iterates over *resources* one-by-one. -2. If *resource* matches to requested URL the resource iterates over - own *routes*. -3. If route matches to requested HTTP method (or ``'*'`` wildcard) the - route's handler is used as found :term:`web-handler`. The lookup is - finished. -4. Otherwise router tries next resource from the *routing table*. -5. If the end of *routing table* is reached and no *resource* / - *route* pair found the *router* returns special :class:`~aiohttp.abc.AbstractMatchInfo` +1. The router splits the URL and checks the index from longest to shortest. + For example, '/one/two/three' will first check the index for + '/one/two/three', then '/one/two' and finally '/'. +2. If the URL part is found in the index, the list of routes for + that URL part is iterated over. If a route matches to requested HTTP + method (or ``'*'`` wildcard) the route's handler is used as the chosen + :term:`web-handler`. The lookup is finished. +3. If the route is not found in the index, the router tries to find + the route in the list of :class:`~aiohttp.web.MatchedSubAppResource`, + (current only created from :meth:`~aiohttp.web.Application.add_domain`), + and will iterate over the list of + :class:`~aiohttp.web.MatchedSubAppResource` in a linear fashion + until a match is found. +4. If no *resource* / *route* pair was found, the *router* + returns the special :class:`~aiohttp.abc.AbstractMatchInfo` instance with :attr:`aiohttp.abc.AbstractMatchInfo.http_exception` is not ``None`` but :exc:`HTTPException` with either *HTTP 404 Not Found* or *HTTP 405 Method Not Allowed* status code. Registered :meth:`~aiohttp.abc.AbstractMatchInfo.handler` raises this exception on call. +Fixed paths are preferred over variable paths. For example, +if you have two routes ``/a/b`` and ``/a/{name}``, then the first +route will always be preferred over the second one. + +If there are multiple dynamic paths with the same fixed prefix, +they will be resolved in order of registration. + +For example, if you have two dynamic routes that are prefixed +with the fixed ``/users`` path such as ``/users/{x}/{y}/z`` and +``/users/{x}/y/z``, the first one will be preferred over the +second one. + User should never instantiate resource classes but give it by :meth:`UrlDispatcher.add_resource` call. @@ -1900,7 +1918,10 @@ Resource classes hierarchy:: Resource PlainResource DynamicResource + PrefixResource StaticResource + PrefixedSubAppResource + MatchedSubAppResource .. class:: AbstractResource diff --git a/tests/test_urldispatch.py b/tests/test_urldispatch.py index 5f5687eacc3..adb1e52e781 100644 --- a/tests/test_urldispatch.py +++ b/tests/test_urldispatch.py @@ -1264,10 +1264,17 @@ async def test_prefixed_subapp_overlap(app: Any) -> None: subapp2.router.add_get("/b", handler2) app.add_subapp("/ss", subapp2) + subapp3 = web.Application() + handler3 = make_handler() + subapp3.router.add_get("/c", handler3) + app.add_subapp("/s/s", subapp3) + match_info = await app.router.resolve(make_mocked_request("GET", "/s/a")) assert match_info.route.handler is handler1 match_info = await app.router.resolve(make_mocked_request("GET", "/ss/b")) assert match_info.route.handler is handler2 + match_info = await app.router.resolve(make_mocked_request("GET", "/s/s/c")) + assert match_info.route.handler is handler3 async def test_prefixed_subapp_empty_route(app: Any) -> None: diff --git a/tests/test_web_urldispatcher.py b/tests/test_web_urldispatcher.py index 79bd42a3a01..716fa78fd4a 100644 --- a/tests/test_web_urldispatcher.py +++ b/tests/test_web_urldispatcher.py @@ -10,7 +10,7 @@ from aiohttp import web from aiohttp.pytest_plugin import AiohttpClient -from aiohttp.web_urldispatcher import SystemRoute +from aiohttp.web_urldispatcher import Resource, SystemRoute @pytest.mark.parametrize( @@ -142,7 +142,6 @@ async def test_access_to_the_file_with_spaces( r = await client.get(url) assert r.status == 200 assert (await r.text()) == data - await r.release() async def test_access_non_existing_resource( @@ -544,3 +543,86 @@ async def handler(request: web.Request) -> web.Response: r = await client.get(yarl.URL(urlencoded_path, encoded=True)) assert r.status == expected_http_resp_status await r.release() + + +async def test_order_is_preserved(aiohttp_client: AiohttpClient) -> None: + """Test route order is preserved. + + Note that fixed/static paths are always preferred over a regex path. + """ + app = web.Application() + + async def handler(request: web.Request) -> web.Response: + assert isinstance(request.match_info._route.resource, Resource) + return web.Response(text=request.match_info._route.resource.canonical) + + app.router.add_get("/first/x/{b}/", handler) + app.router.add_get(r"/first/{x:.*/b}", handler) + + app.router.add_get(r"/second/{user}/info", handler) + app.router.add_get("/second/bob/info", handler) + + app.router.add_get("/third/bob/info", handler) + app.router.add_get(r"/third/{user}/info", handler) + + app.router.add_get(r"/forth/{name:\d+}", handler) + app.router.add_get("/forth/42", handler) + + app.router.add_get("/fifth/42", handler) + app.router.add_get(r"/fifth/{name:\d+}", handler) + + client = await aiohttp_client(app) + + r = await client.get("/first/x/b/") + assert r.status == 200 + assert await r.text() == "/first/x/{b}/" + + r = await client.get("/second/frank/info") + assert r.status == 200 + assert await r.text() == "/second/{user}/info" + + # Fixed/static paths are always preferred over regex paths + r = await client.get("/second/bob/info") + assert r.status == 200 + assert await r.text() == "/second/bob/info" + + r = await client.get("/third/bob/info") + assert r.status == 200 + assert await r.text() == "/third/bob/info" + + r = await client.get("/third/frank/info") + assert r.status == 200 + assert await r.text() == "/third/{user}/info" + + r = await client.get("/forth/21") + assert r.status == 200 + assert await r.text() == "/forth/{name}" + + # Fixed/static paths are always preferred over regex paths + r = await client.get("/forth/42") + assert r.status == 200 + assert await r.text() == "/forth/42" + + r = await client.get("/fifth/21") + assert r.status == 200 + assert await r.text() == "/fifth/{name}" + + r = await client.get("/fifth/42") + assert r.status == 200 + assert await r.text() == "/fifth/42" + + +async def test_url_with_many_slashes(aiohttp_client: AiohttpClient) -> None: + app = web.Application() + + class MyView(web.View): + async def get(self) -> web.Response: + return web.Response() + + app.router.add_routes([web.view("/a", MyView)]) + + client = await aiohttp_client(app) + + r = await client.get("///a") + assert r.status == 200 + await r.release()