From ad663a1a5340aa2049b5f2768360368622d59f33 Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" Date: Mon, 23 Sep 2024 11:56:35 -0500 Subject: [PATCH 01/13] Safe path test --- yarl/_url.py | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/yarl/_url.py b/yarl/_url.py index 01f9d8fab..d0332c5af 100644 --- a/yarl/_url.py +++ b/yarl/_url.py @@ -225,7 +225,8 @@ class URL: _FRAGMENT_REQUOTER = _Quoter(safe="?/:@") _UNQUOTER = _Unquoter() - _PATH_UNQUOTER = _Unquoter(ignore="/", unsafe="+") + _PATH_UNQUOTER = _Unquoter(unsafe="+") + _SAFE_PATH_UNQUOTER = _Unquoter(ignore="/%", unsafe="+") _QS_UNQUOTER = _Unquoter(qs=True) _val: SplitResult @@ -710,6 +711,17 @@ def path(self) -> str: """ return self._PATH_UNQUOTER(self.raw_path) + @cached_property + def safe_path(self) -> str: + """Decoded path of URL. + + / for absolute URLs without path part. + + / (%2F) and % (%25) are not decoded + + """ + return self._SAFE_PATH_UNQUOTER(self.raw_path) + @cached_property def _parsed_query(self) -> List[Tuple[str, str]]: """Parse query part of URL.""" From ea20dd8ee7c7decc1226709f9252e1d8439ef84e Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" Date: Mon, 23 Sep 2024 11:57:36 -0500 Subject: [PATCH 02/13] Update yarl/_url.py --- yarl/_url.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/yarl/_url.py b/yarl/_url.py index d0332c5af..bf827e54d 100644 --- a/yarl/_url.py +++ b/yarl/_url.py @@ -712,7 +712,7 @@ def path(self) -> str: return self._PATH_UNQUOTER(self.raw_path) @cached_property - def safe_path(self) -> str: + def path_safe(self) -> str: """Decoded path of URL. / for absolute URLs without path part. From 307f894e01f68f5af3805f6d117eaad7db6e77ae Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" Date: Mon, 23 Sep 2024 11:57:51 -0500 Subject: [PATCH 03/13] Update yarl/_url.py --- yarl/_url.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/yarl/_url.py b/yarl/_url.py index bf827e54d..53d0d0435 100644 --- a/yarl/_url.py +++ b/yarl/_url.py @@ -226,7 +226,7 @@ class URL: _UNQUOTER = _Unquoter() _PATH_UNQUOTER = _Unquoter(unsafe="+") - _SAFE_PATH_UNQUOTER = _Unquoter(ignore="/%", unsafe="+") + _PATH_SAFE_UNQUOTER = _Unquoter(ignore="/%", unsafe="+") _QS_UNQUOTER = _Unquoter(qs=True) _val: SplitResult From ba815c24d77f00242bbfd959f11c38033a6cc933 Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" Date: Mon, 23 Sep 2024 12:03:56 -0500 Subject: [PATCH 04/13] fix --- yarl/_url.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/yarl/_url.py b/yarl/_url.py index 53d0d0435..26da688ac 100644 --- a/yarl/_url.py +++ b/yarl/_url.py @@ -720,7 +720,7 @@ def path_safe(self) -> str: / (%2F) and % (%25) are not decoded """ - return self._SAFE_PATH_UNQUOTER(self.raw_path) + return self._PATH_SAFE_UNQUOTER(self.raw_path) @cached_property def _parsed_query(self) -> List[Tuple[str, str]]: From 64ee23aec00d540930465c2c9a9b1fe880382383 Mon Sep 17 00:00:00 2001 From: Sam Bull Date: Mon, 23 Sep 2024 18:31:12 +0100 Subject: [PATCH 05/13] Update api.rst --- docs/api.rst | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/docs/api.rst b/docs/api.rst index b9e950b52..1293603d3 100644 --- a/docs/api.rst +++ b/docs/api.rst @@ -270,6 +270,23 @@ There are two kinds of properties: *decoded* and *encoded* (with >>> URL('http://example.com').path '/' + .. warning:: + + In many situations it is important to distinguish between path separators + (a literal ``/``) and other forward slashes (a literal `%2F`). Use + :attr:`URL.path_safe` for these cases. + +.. attribute:: URL.path_safe + + Similar to :attr:`URL.path` except it doesn't decode ``%2F`` or ``%25``. + This allows to distinguish between path separators (``/``) and encoded + slashes (``%2F``). + + Note that ``%25`` is also not decoded to avoid issues with double unquoting + of values. e.g. You can unquote the value with + ``URL.path_safe.replace("%2F", "/").replace("%25", %")`` to get the same + result as :meth:`URL.path_safe`. If the `%25` was unqouted, it would be + impossible to tell the difference between ``%2F`` and ``%252F``. .. attribute:: URL.path_qs From e6040e66cf17b9edcb9a7ee57b59b450742f5ae2 Mon Sep 17 00:00:00 2001 From: Sam Bull Date: Mon, 23 Sep 2024 18:55:42 +0100 Subject: [PATCH 06/13] Update docs/api.rst --- docs/api.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/api.rst b/docs/api.rst index 1293603d3..e57102861 100644 --- a/docs/api.rst +++ b/docs/api.rst @@ -285,7 +285,7 @@ There are two kinds of properties: *decoded* and *encoded* (with Note that ``%25`` is also not decoded to avoid issues with double unquoting of values. e.g. You can unquote the value with ``URL.path_safe.replace("%2F", "/").replace("%25", %")`` to get the same - result as :meth:`URL.path_safe`. If the `%25` was unqouted, it would be + result as :meth:`URL.path_safe`. If the `%25` was unquoted, it would be impossible to tell the difference between ``%2F`` and ``%252F``. .. attribute:: URL.path_qs From 7674f6acabe0ed3520e0e067a9f171ce32fbce12 Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" Date: Mon, 23 Sep 2024 13:10:30 -0500 Subject: [PATCH 07/13] Update yarl/_url.py --- yarl/_url.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/yarl/_url.py b/yarl/_url.py index 26da688ac..a80c3192e 100644 --- a/yarl/_url.py +++ b/yarl/_url.py @@ -225,7 +225,7 @@ class URL: _FRAGMENT_REQUOTER = _Quoter(safe="?/:@") _UNQUOTER = _Unquoter() - _PATH_UNQUOTER = _Unquoter(unsafe="+") + _PATH_UNQUOTER = _Unquoter(ignore="/", unsafe="+") _PATH_SAFE_UNQUOTER = _Unquoter(ignore="/%", unsafe="+") _QS_UNQUOTER = _Unquoter(qs=True) From 307a588f4bef306ff462750af67da3f573003443 Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" Date: Mon, 23 Sep 2024 13:24:32 -0500 Subject: [PATCH 08/13] lint --- docs/api.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/api.rst b/docs/api.rst index e57102861..d8fdd2bec 100644 --- a/docs/api.rst +++ b/docs/api.rst @@ -273,7 +273,7 @@ There are two kinds of properties: *decoded* and *encoded* (with .. warning:: In many situations it is important to distinguish between path separators - (a literal ``/``) and other forward slashes (a literal `%2F`). Use + (a literal ``/``) and other forward slashes (a literal ``%2F``). Use :attr:`URL.path_safe` for these cases. .. attribute:: URL.path_safe From 5dfceb358fa35f84cebe745f4ae46b55bb10833f Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" Date: Mon, 23 Sep 2024 13:28:04 -0500 Subject: [PATCH 09/13] another --- docs/api.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/api.rst b/docs/api.rst index d8fdd2bec..b5492d2f7 100644 --- a/docs/api.rst +++ b/docs/api.rst @@ -285,7 +285,7 @@ There are two kinds of properties: *decoded* and *encoded* (with Note that ``%25`` is also not decoded to avoid issues with double unquoting of values. e.g. You can unquote the value with ``URL.path_safe.replace("%2F", "/").replace("%25", %")`` to get the same - result as :meth:`URL.path_safe`. If the `%25` was unquoted, it would be + result as :meth:`URL.path_safe`. If the ``%25`` was unquoted, it would be impossible to tell the difference between ``%2F`` and ``%252F``. .. attribute:: URL.path_qs From bcf8927f80c2068efb4d47d881c662f88d66b047 Mon Sep 17 00:00:00 2001 From: Sam Bull Date: Mon, 23 Sep 2024 19:28:19 +0100 Subject: [PATCH 10/13] Update test_url.py --- tests/test_url.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/tests/test_url.py b/tests/test_url.py index 085cf78d8..68d368390 100644 --- a/tests/test_url.py +++ b/tests/test_url.py @@ -352,6 +352,22 @@ def test_path_with_2F(): assert url.path == "/foo/bar%2Fbaz" +def test_path_safe_with_2F(): + """Path should not decode %2F, otherwise it may look like a path separator.""" + + url = URL("http://example.com/foo/bar%2fbaz") + assert url.path_safe == "/foo/bar%2Fbaz" + + +def test_path_safe_with_25(): + """Path should not decode %25, otherwise it is prone to double unquoting.""" + + url = URL("http://example.com/foo/bar%252Fbaz") + assert url.path_safe == "/foo/bar%252Fbaz" + unquoted = url.path_safe.replace("%2F", "/").replace("%25", "%") + assert unquoted == "/foo/bar%2Fbaz" + + def test_raw_path_for_empty_url(): url = URL() assert "" == url.raw_path From 3cbd087089a4ac485c29023a2f16e5f14c07552e Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" Date: Mon, 23 Sep 2024 13:30:12 -0500 Subject: [PATCH 11/13] changelog --- CHANGES/1150.feature.rst | 1 + 1 file changed, 1 insertion(+) create mode 100644 CHANGES/1150.feature.rst diff --git a/CHANGES/1150.feature.rst b/CHANGES/1150.feature.rst new file mode 100644 index 000000000..fbcb1f323 --- /dev/null +++ b/CHANGES/1150.feature.rst @@ -0,0 +1 @@ +Added :attr:`~yarl.URL.path_safe` to be able to fetch the path without ``%2F`` and ``%25`` decoded -- by :user:`bdraco`. From 4cc6c8bb19d0a65c41a886f4a1324ac50aedad59 Mon Sep 17 00:00:00 2001 From: Sam Bull Date: Mon, 23 Sep 2024 19:39:26 +0100 Subject: [PATCH 12/13] Update docs/api.rst --- docs/api.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/api.rst b/docs/api.rst index b5492d2f7..b552e4f41 100644 --- a/docs/api.rst +++ b/docs/api.rst @@ -285,7 +285,7 @@ There are two kinds of properties: *decoded* and *encoded* (with Note that ``%25`` is also not decoded to avoid issues with double unquoting of values. e.g. You can unquote the value with ``URL.path_safe.replace("%2F", "/").replace("%25", %")`` to get the same - result as :meth:`URL.path_safe`. If the ``%25`` was unquoted, it would be + result as :meth:`URL.path`. If the ``%25`` was unquoted, it would be impossible to tell the difference between ``%2F`` and ``%252F``. .. attribute:: URL.path_qs From d65dea6f2573a5a51ee44ae5486bb799549a6a3a Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" Date: Mon, 23 Sep 2024 13:54:22 -0500 Subject: [PATCH 13/13] add round trip test --- tests/test_url.py | 26 +++++++++++++++++++++++--- 1 file changed, 23 insertions(+), 3 deletions(-) diff --git a/tests/test_url.py b/tests/test_url.py index 4c846144f..1dbdd80a0 100644 --- a/tests/test_url.py +++ b/tests/test_url.py @@ -1,5 +1,5 @@ from enum import Enum -from urllib.parse import SplitResult +from urllib.parse import SplitResult, quote, unquote import pytest @@ -353,14 +353,14 @@ def test_path_with_2F(): def test_path_safe_with_2F(): - """Path should not decode %2F, otherwise it may look like a path separator.""" + """Path safe should not decode %2F, otherwise it may look like a path separator.""" url = URL("http://example.com/foo/bar%2fbaz") assert url.path_safe == "/foo/bar%2Fbaz" def test_path_safe_with_25(): - """Path should not decode %25, otherwise it is prone to double unquoting.""" + """Path safe should not decode %25, otherwise it is prone to double unquoting.""" url = URL("http://example.com/foo/bar%252Fbaz") assert url.path_safe == "/foo/bar%252Fbaz" @@ -368,6 +368,26 @@ def test_path_safe_with_25(): assert unquoted == "/foo/bar%2Fbaz" +@pytest.mark.parametrize( + "original_path", + [ + "m+@bar/baz", + "m%2B@bar/baz", + "m%252B@bar/baz", + "m%2F@bar/baz", + ], +) +def test_path_safe_only_round_trips(original_path: str) -> None: + """Path safe can round trip with documented decode method.""" + encoded_once = quote(original_path, safe="") + encoded_twice = quote(encoded_once, safe="") + + url = URL(f"http://example.com/{encoded_twice}") + unquoted = url.path_safe.replace("%2F", "/").replace("%25", "%") + assert unquoted == f"/{encoded_once}" + assert unquote(unquoted) == f"/{original_path}" + + def test_raw_path_for_empty_url(): url = URL() assert "" == url.raw_path