From cf080332160558c948a00cae9ce43d1d24ccabb0 Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" Date: Sat, 5 Oct 2024 09:51:40 -0500 Subject: [PATCH] Improve performance of building the origin (#1175) --- CHANGES/1175.misc.rst | 1 + tests/test_url.py | 5 +++++ yarl/_url.py | 40 +++++++++++++++++++++++++--------------- 3 files changed, 31 insertions(+), 15 deletions(-) create mode 100644 CHANGES/1175.misc.rst diff --git a/CHANGES/1175.misc.rst b/CHANGES/1175.misc.rst new file mode 100644 index 000000000..8d998cf1c --- /dev/null +++ b/CHANGES/1175.misc.rst @@ -0,0 +1 @@ +Improved performance of the :py:meth:`~yarl.URL.origin` method -- by :user:`bdraco`. diff --git a/tests/test_url.py b/tests/test_url.py index 9947b9501..05ac20917 100644 --- a/tests/test_url.py +++ b/tests/test_url.py @@ -63,6 +63,11 @@ def test_origin(): assert URL("http://example.com:8888") == url.origin() +def test_origin_with_no_auth(): + url = URL("http://example.com:8888/path/to?a=1&b=2") + assert URL("http://example.com:8888") == url.origin() + + def test_origin_nonascii(): url = URL("http://user:password@оун-упа.укр:8888/path/to?a=1&b=2") assert str(url.origin()) == "http://xn----8sb1bdhvc.xn--j1amh:8888" diff --git a/yarl/_url.py b/yarl/_url.py index bac600d7c..85cb41250 100644 --- a/yarl/_url.py +++ b/yarl/_url.py @@ -95,6 +95,7 @@ class _SplitResultDict(TypedDict, total=False): class _InternalURLCache(TypedDict, total=False): + _origin: "URL" absolute: bool scheme: str raw_authority: str @@ -543,13 +544,24 @@ def origin(self) -> "URL": """ # TODO: add a keyword-only option for keeping user/pass maybe? - if not self.absolute: + return self._origin + + @cached_property + def _origin(self) -> "URL": + """Return an URL with scheme, host and port parts only. + + user, password, path, query and fragment are removed. + """ + v = self._val + if not v.netloc: raise ValueError("URL should be absolute") - if not self._val.scheme: + if not v.scheme: raise ValueError("URL should have scheme") - v = self._val - netloc = self._make_netloc(None, None, v.hostname, v.port) - val = v._replace(netloc=netloc, path="", query="", fragment="") + if "@" not in v.netloc: + val = v._replace(path="", query="", fragment="") + else: + netloc = self._make_netloc(None, None, v.hostname, v.port) + val = v._replace(netloc=netloc, path="", query="", fragment="") return URL(val, encoded=True) def relative(self) -> "URL": @@ -1052,26 +1064,24 @@ def _make_netloc( ) -> str: if host is None: return "" - quoter = cls._REQUOTER if requote else cls._QUOTER - if encode_host: - ret = cls._encode_host(host) - else: - ret = host + ret = cls._encode_host(host) if encode_host else host if port is not None: ret = f"{ret}:{port}" + if user is None and password is None: + return ret + quoter = cls._REQUOTER if requote else cls._QUOTER if password is not None: if not user: user = "" - else: - if encode: - user = quoter(user) + elif encode: + user = quoter(user) if encode: password = quoter(password) - user = user + ":" + password + user = f"{user}:{password}" elif user and encode: user = quoter(user) if user: - ret = user + "@" + ret + ret = f"{user}@{ret}" return ret @classmethod