Skip to content

Commit 8ff69ae

Browse files
committed
Normalize URL paths: convert /.//p, /..//p, and //p to p
1 parent 7492360 commit 8ff69ae

File tree

3 files changed

+93
-5
lines changed

3 files changed

+93
-5
lines changed

url/src/lib.rs

+53-1
Original file line numberDiff line numberDiff line change
@@ -1757,6 +1757,39 @@ impl Url {
17571757
let old_after_path_pos = to_u32(self.serialization.len()).unwrap();
17581758
let cannot_be_a_base = self.cannot_be_a_base();
17591759
let scheme_type = SchemeType::from(self.scheme());
1760+
let mut path_empty = false;
1761+
1762+
// Check ':' and then see if the next character is '/'
1763+
let mut has_host = if let Some(index) = self.serialization.find(":") {
1764+
if self.serialization.len() > index + 1
1765+
&& self.serialization.as_bytes().get(index + 1) == Some(&b'/')
1766+
{
1767+
let rest = &self.serialization[(index + ":/".len())..];
1768+
let host_part = rest.split('/').next().unwrap_or("");
1769+
path_empty = rest.is_empty();
1770+
!host_part.is_empty() && !host_part.contains('@')
1771+
} else {
1772+
false
1773+
}
1774+
} else {
1775+
false
1776+
};
1777+
1778+
// Ensure the path length is greater than 1 to account
1779+
// for cases where "/." is already appended from serialization
1780+
// If we set path, then we already checked the other two conditions:
1781+
// https://url.spec.whatwg.org/#url-serializing
1782+
// 1. The host is null
1783+
// 2. the first segment of the URL's path is an empty string
1784+
if self.path().len() + path.len() > 1 {
1785+
if let Some(index) = self.serialization.find(":") {
1786+
let removal_start = index + ":".len();
1787+
if self.serialization[removal_start..].starts_with("/.") {
1788+
self.path_start = removal_start as u32;
1789+
}
1790+
}
1791+
}
1792+
17601793
self.serialization.truncate(self.path_start as usize);
17611794
self.mutate(|parser| {
17621795
if cannot_be_a_base {
@@ -1766,14 +1799,33 @@ impl Url {
17661799
}
17671800
parser.parse_cannot_be_a_base_path(parser::Input::new_no_trim(path));
17681801
} else {
1769-
let mut has_host = true; // FIXME
17701802
parser.parse_path_start(
17711803
scheme_type,
17721804
&mut has_host,
17731805
parser::Input::new_no_trim(path),
17741806
);
17751807
}
17761808
});
1809+
1810+
// For cases where normalization is applied across both the serialization and the path.
1811+
// Append "/." immediately after the scheme (up to ":")
1812+
// This is done if three conditions are met.
1813+
// https://url.spec.whatwg.org/#url-serializing
1814+
// 1. The host is null
1815+
// 2. The url's path length is greater than 1
1816+
// 3. the first segment of the URL's path is an empty string
1817+
if !has_host && path.len() > 1 && path_empty {
1818+
if let Some(index) = self.serialization.find(":") {
1819+
if self.serialization.len() > index + 2
1820+
&& self.serialization.as_bytes().get(index + 1) == Some(&b'/')
1821+
&& self.serialization.as_bytes().get(index + 2) == Some(&b'/')
1822+
{
1823+
self.serialization.insert_str(index + ":".len(), "/.");
1824+
self.path_start += "/.".len() as u32;
1825+
}
1826+
}
1827+
}
1828+
17771829
self.restore_after_path(old_after_path_pos, &after_path);
17781830
}
17791831

url/tests/expected_failures.txt

-4
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,3 @@
4343
<file://monkey/> set pathname to <\\\\>
4444
<file:///unicorn> set pathname to <//\\/>
4545
<file:///unicorn> set pathname to <//monkey/..//>
46-
<non-spec:/> set pathname to </.//p>
47-
<non-spec:/> set pathname to </..//p>
48-
<non-spec:/> set pathname to <//p>
49-
<non-spec:/.//> set pathname to <p>

url/tests/unit.rs

+40
Original file line numberDiff line numberDiff line change
@@ -1379,3 +1379,43 @@ fn serde_error_message() {
13791379
r#"relative URL without a base: "§invalid#+#*Ä" at line 1 column 25"#
13801380
);
13811381
}
1382+
1383+
#[test]
1384+
fn test_fuzzing_uri_failures() {
1385+
use url::quirks;
1386+
let mut url = Url::parse("data:/.dummy.path").unwrap();
1387+
assert!(!url.cannot_be_a_base());
1388+
1389+
url.set_path(".dummy.path");
1390+
assert_eq!(url.as_str(), "data:/.dummy.path");
1391+
assert_eq!(url.path(), "/.dummy.path");
1392+
url.check_invariants().unwrap();
1393+
1394+
url.path_segments_mut()
1395+
.expect("should have path segments")
1396+
.push(".another.dummy.path");
1397+
assert_eq!(url.as_str(), "data:/.dummy.path/.another.dummy.path");
1398+
assert_eq!(url.path(), "/.dummy.path/.another.dummy.path");
1399+
url.check_invariants().unwrap();
1400+
1401+
url = Url::parse("web+demo:/").unwrap();
1402+
assert!(!url.cannot_be_a_base());
1403+
1404+
url.set_path("//.dummy.path");
1405+
assert_eq!(url.path(), "//.dummy.path");
1406+
1407+
let segments: Vec<_> = url
1408+
.path_segments()
1409+
.expect("should have path segments")
1410+
.collect();
1411+
assert_eq!(segments, vec!["", ".dummy.path"]);
1412+
assert_eq!(url.as_str(), "web+demo:/.//.dummy.path");
1413+
1414+
quirks::set_hostname(&mut url, ".dummy.host").unwrap();
1415+
assert_eq!(url.as_str(), "web+demo://.dummy.host//.dummy.path");
1416+
url.check_invariants().unwrap();
1417+
1418+
quirks::set_hostname(&mut url, "").unwrap();
1419+
assert_eq!(url.as_str(), "web+demo:////.dummy.path");
1420+
url.check_invariants().unwrap();
1421+
}

0 commit comments

Comments
 (0)