From 1e969abb9cef055c5d4350d60532ee63775a1708 Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" Date: Mon, 9 Sep 2024 13:29:01 -0500 Subject: [PATCH] Small speed up to normalizing the path (#1137) --- CHANGES/1137.misc.rst | 1 + tests/test_normalize_path.py | 2 ++ tests/test_url.py | 29 +++++++++++++++++++++++++++++ yarl/_url.py | 5 ++++- 4 files changed, 36 insertions(+), 1 deletion(-) create mode 100644 CHANGES/1137.misc.rst diff --git a/CHANGES/1137.misc.rst b/CHANGES/1137.misc.rst new file mode 100644 index 000000000..a5abe16e4 --- /dev/null +++ b/CHANGES/1137.misc.rst @@ -0,0 +1 @@ +Improved performance of normalizing paths -- by :user:`bdraco`. diff --git a/tests/test_normalize_path.py b/tests/test_normalize_path.py index defc4d8dd..20d89818c 100644 --- a/tests/test_normalize_path.py +++ b/tests/test_normalize_path.py @@ -8,6 +8,7 @@ ("/", "/"), ("//", "//"), ("///", "///"), + ("path", "path"), # Single-dot ("path/to", "path/to"), ("././path/to", "path/to"), @@ -15,6 +16,7 @@ ("path/././to", "path/to"), ("path/to/.", "path/to/"), ("path/to/./.", "path/to/"), + ("/path/to/.", "/path/to/"), # Double-dots ("../path/to", "path/to"), ("path/../to", "to"), diff --git a/tests/test_url.py b/tests/test_url.py index 5f6e033cb..085cf78d8 100644 --- a/tests/test_url.py +++ b/tests/test_url.py @@ -993,6 +993,35 @@ def test_joinpath_path_starting_from_slash_is_forbidden(): assert url.joinpath("/to/others") +PATHS = [ + # No dots + ("", ""), + ("path", "path"), + # Single-dot + ("path/to", "path/to"), + ("././path/to", "path/to"), + ("path/./to", "path/to"), + ("path/././to", "path/to"), + ("path/to/.", "path/to/"), + ("path/to/./.", "path/to/"), + # Double-dots + ("../path/to", "path/to"), + ("path/../to", "to"), + ("path/../../to", "to"), + # Non-ASCII characters + ("μονοπάτι/../../να/ᴜɴɪ/ᴄᴏᴅᴇ", "να/ᴜɴɪ/ᴄᴏᴅᴇ"), + ("μονοπάτι/../../να/𝕦𝕟𝕚/𝕔𝕠𝕕𝕖/.", "να/𝕦𝕟𝕚/𝕔𝕠𝕕𝕖/"), +] + + +@pytest.mark.parametrize("original,expected", PATHS) +def test_join_path_normalized(original: str, expected: str) -> None: + """Test that joinpath normalizes paths.""" + base_url = URL("http://example.com") + new_url = base_url.joinpath(original) + assert new_url.path == f"/{expected}" + + # with_path diff --git a/yarl/_url.py b/yarl/_url.py index b73562259..effe524e9 100644 --- a/yarl/_url.py +++ b/yarl/_url.py @@ -899,9 +899,12 @@ def _make_child(self, paths: "Sequence[str]", encoded: bool = False) -> "URL": @classmethod def _normalize_path(cls, path: str) -> str: # Drop '.' and '..' from str path + if "." not in path: + # No need to normalize if there are no '.' or '..' segments + return path prefix = "" - if path.startswith("/"): + if path and path[0] == "/": # preserve the "/" root element of absolute paths, copying it to the # normalised output as per sections 5.2.4 and 6.2.2.3 of rfc3986. prefix = "/"