diff --git a/src/host.rs b/src/host.rs index 9afc6d8e7..02bae9e25 100644 --- a/src/host.rs +++ b/src/host.rs @@ -24,9 +24,10 @@ pub(crate) enum HostInternal { Ipv6(Ipv6Addr), } -impl From> for HostInternal { - fn from(host: Host) -> HostInternal { +impl From> for HostInternal { + fn from(host: Host) -> HostInternal { match host { + Host::Domain(ref s) if s.is_empty() => HostInternal::None, Host::Domain(_) => HostInternal::Domain, Host::Ipv4(address) => HostInternal::Ipv4(address), Host::Ipv6(address) => HostInternal::Ipv6(address), diff --git a/src/lib.rs b/src/lib.rs index d60935c29..2ad421d08 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -456,13 +456,15 @@ impl Url { if self.slice(self.scheme_end + 1..).starts_with("//") { // URL with authority - match self.byte_at(self.username_end) { - b':' => { - assert!(self.host_start >= self.username_end + 2); - assert_eq!(self.byte_at(self.host_start - 1), b'@'); + if self.username_end != self.serialization.len() as u32 { + match self.byte_at(self.username_end) { + b':' => { + assert!(self.host_start >= self.username_end + 2); + assert_eq!(self.byte_at(self.host_start - 1), b'@'); + } + b'@' => assert!(self.host_start == self.username_end + 1), + _ => assert_eq!(self.username_end, self.scheme_end + 3), } - b'@' => assert!(self.host_start == self.username_end + 1), - _ => assert_eq!(self.username_end, self.scheme_end + 3), } assert!(self.host_start >= self.username_end); assert!(self.host_end >= self.host_start); @@ -490,7 +492,10 @@ impl Url { Some(port_str.parse::().expect("Couldn't parse port?")) ); } - assert_eq!(self.byte_at(self.path_start), b'/'); + assert!( + self.path_start as usize == self.serialization.len() + || matches!(self.byte_at(self.path_start), b'/' | b'#' | b'?') + ); } else { // Anarchist URL (no authority) assert_eq!(self.username_end, self.scheme_end + 1); @@ -501,11 +506,11 @@ impl Url { assert_eq!(self.path_start, self.scheme_end + 1); } if let Some(start) = self.query_start { - assert!(start > self.path_start); + assert!(start >= self.path_start); assert_eq!(self.byte_at(start), b'?'); } if let Some(start) = self.fragment_start { - assert!(start > self.path_start); + assert!(start >= self.path_start); assert_eq!(self.byte_at(start), b'#'); } if let (Some(query_start), Some(fragment_start)) = (self.query_start, self.fragment_start) { @@ -685,7 +690,7 @@ impl Url { /// ``` #[inline] pub fn cannot_be_a_base(&self) -> bool { - !self.slice(self.path_start..).starts_with('/') + !self.slice(self.scheme_end + 1..).starts_with('/') } /// Return the username for this URL (typically the empty string) @@ -745,7 +750,10 @@ impl Url { pub fn password(&self) -> Option<&str> { // This ':' is not the one marking a port number since a host can not be empty. // (Except for file: URLs, which do not have port numbers.) - if self.has_authority() && self.byte_at(self.username_end) == b':' { + if self.has_authority() + && self.username_end != self.serialization.len() as u32 + && self.byte_at(self.username_end) == b':' + { debug_assert!(self.byte_at(self.host_start - 1) == b'@'); Some(self.slice(self.username_end + 1..self.host_start - 1)) } else { @@ -1226,7 +1234,7 @@ impl Url { if let Some(input) = fragment { self.fragment_start = Some(to_u32(self.serialization.len()).unwrap()); self.serialization.push('#'); - self.mutate(|parser| parser.parse_fragment(parser::Input::new(input))) + self.mutate(|parser| parser.parse_fragment(parser::Input::no_trim(input))) } else { self.fragment_start = None } @@ -1284,7 +1292,12 @@ impl Url { let scheme_type = SchemeType::from(self.scheme()); let scheme_end = self.scheme_end; self.mutate(|parser| { - parser.parse_query(scheme_type, scheme_end, parser::Input::new(input)) + let vfn = parser.violation_fn; + parser.parse_query( + scheme_type, + scheme_end, + parser::Input::trim_tab_and_newlines(input, vfn), + ) }); } @@ -1625,14 +1638,34 @@ impl Url { if host == "" && SchemeType::from(self.scheme()).is_special() { return Err(ParseError::EmptyHost); } + let mut host_substr = host; + // Otherwise, if c is U+003A (:) and the [] flag is unset, then + if !host.starts_with('[') || !host.ends_with(']') { + match host.find(':') { + Some(0) => { + // If buffer is the empty string, validation error, return failure. + return Err(ParseError::InvalidDomainCharacter); + } + // Let host be the result of host parsing buffer + Some(colon_index) => { + host_substr = &host[..colon_index]; + } + None => {} + } + } if SchemeType::from(self.scheme()).is_special() { - self.set_host_internal(Host::parse(host)?, None) + self.set_host_internal(Host::parse(host_substr)?, None); } else { - self.set_host_internal(Host::parse_opaque(host)?, None) + self.set_host_internal(Host::parse_opaque(host_substr)?, None); } } else if self.has_host() { - if SchemeType::from(self.scheme()).is_special() { + let scheme_type = SchemeType::from(self.scheme()); + if scheme_type.is_special() { return Err(ParseError::EmptyHost); + } else { + if self.serialization.len() == self.path_start as usize { + self.serialization.push('/'); + } } debug_assert!(self.byte_at(self.scheme_end) == b':'); debug_assert!(self.byte_at(self.path_start) == b'/'); @@ -1935,14 +1968,28 @@ impl Url { /// /// # fn run() -> Result<(), ParseError> { /// let mut url = Url::parse("https://example.net")?; - /// let result = url.set_scheme("foo"); - /// assert_eq!(url.as_str(), "foo://example.net/"); + /// let result = url.set_scheme("http"); + /// assert_eq!(url.as_str(), "http://example.net/"); /// assert!(result.is_ok()); /// # Ok(()) /// # } /// # run().unwrap(); /// ``` + /// Change the URL’s scheme from `foo` to `bar`: /// + /// ``` + /// use url::Url; + /// # use url::ParseError; + /// + /// # fn run() -> Result<(), ParseError> { + /// let mut url = Url::parse("foo://example.net")?; + /// let result = url.set_scheme("bar"); + /// assert_eq!(url.as_str(), "bar://example.net"); + /// assert!(result.is_ok()); + /// # Ok(()) + /// # } + /// # run().unwrap(); + /// ``` /// /// Cannot change URL’s scheme from `https` to `foõ`: /// @@ -1975,14 +2022,55 @@ impl Url { /// # } /// # run().unwrap(); /// ``` + /// Cannot change the URL’s scheme from `foo` to `https`: + /// + /// ``` + /// use url::Url; + /// # use url::ParseError; + /// + /// # fn run() -> Result<(), ParseError> { + /// let mut url = Url::parse("foo://example.net")?; + /// let result = url.set_scheme("https"); + /// assert_eq!(url.as_str(), "foo://example.net"); + /// assert!(result.is_err()); + /// # Ok(()) + /// # } + /// # run().unwrap(); + /// ``` + /// Cannot change the URL’s scheme from `http` to `foo`: + /// + /// ``` + /// use url::Url; + /// # use url::ParseError; + /// + /// # fn run() -> Result<(), ParseError> { + /// let mut url = Url::parse("http://example.net")?; + /// let result = url.set_scheme("foo"); + /// assert_eq!(url.as_str(), "http://example.net/"); + /// assert!(result.is_err()); + /// # Ok(()) + /// # } + /// # run().unwrap(); + /// ``` pub fn set_scheme(&mut self, scheme: &str) -> Result<(), ()> { let mut parser = Parser::for_setter(String::new()); let remaining = parser.parse_scheme(parser::Input::new(scheme))?; - if !remaining.is_empty() - || (!self.has_host() && SchemeType::from(&parser.serialization).is_special()) + let new_scheme_type = SchemeType::from(&parser.serialization); + let old_scheme_type = SchemeType::from(self.scheme()); + // If url’s scheme is a special scheme and buffer is not a special scheme, then return. + if (new_scheme_type.is_special() && !old_scheme_type.is_special()) || + // If url’s scheme is not a special scheme and buffer is a special scheme, then return. + (!new_scheme_type.is_special() && old_scheme_type.is_special()) || + // If url includes credentials or has a non-null port, and buffer is "file", then return. + // If url’s scheme is "file" and its host is an empty host or null, then return. + (new_scheme_type.is_file() && self.has_authority()) { return Err(()); } + + if !remaining.is_empty() || (!self.has_host() && new_scheme_type.is_special()) { + return Err(()); + } let old_scheme_end = self.scheme_end; let new_scheme_end = to_u32(parser.serialization.len()).unwrap(); let adjust = |index: &mut u32| { @@ -2004,6 +2092,14 @@ impl Url { parser.serialization.push_str(self.slice(old_scheme_end..)); self.serialization = parser.serialization; + + // Update the port so it can be removed + // If it is the scheme's default + // we don't mind it silently failing + // if there was no port in the first place + let previous_port = self.port(); + let _ = self.set_port(previous_port); + Ok(()) } @@ -2408,6 +2504,7 @@ fn path_to_file_url_segments_windows( } let mut components = path.components(); + let host_start = serialization.len() + 1; let host_end; let host_internal; match components.next() { @@ -2434,15 +2531,24 @@ fn path_to_file_url_segments_windows( _ => return Err(()), } + let mut path_only_has_prefix = true; for component in components { if component == Component::RootDir { continue; } + path_only_has_prefix = false; // FIXME: somehow work with non-unicode? let component = component.as_os_str().to_str().ok_or(())?; serialization.push('/'); serialization.extend(percent_encode(component.as_bytes(), PATH_SEGMENT)); } + // A windows drive letter must end with a slash. + if serialization.len() > host_start + && parser::is_windows_drive_letter(&serialization[host_start..]) + && path_only_has_prefix + { + serialization.push('/'); + } Ok((host_end, host_internal)) } @@ -2467,6 +2573,14 @@ fn file_url_segments_to_pathbuf( bytes.push(b'/'); bytes.extend(percent_decode(segment.as_bytes())); } + // A windows drive letter must end with a slash. + if bytes.len() > 2 { + if matches!(bytes[bytes.len() - 2], b'a'..=b'z' | b'A'..=b'Z') + && matches!(bytes[bytes.len() - 1], b':' | b'|') + { + bytes.push(b'/'); + } + } let os_str = OsStr::from_bytes(&bytes); let path = PathBuf::from(os_str); debug_assert!( diff --git a/src/parser.rs b/src/parser.rs index e2ea36bfa..6c84ba412 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -156,7 +156,7 @@ impl fmt::Display for SyntaxViolation { } } -#[derive(Copy, Clone)] +#[derive(Copy, Clone, PartialEq)] pub enum SchemeType { File, SpecialNotFile, @@ -201,6 +201,30 @@ impl<'i> Input<'i> { Input::with_log(input, None) } + pub fn no_trim(input: &'i str) -> Self { + Input { + chars: input.chars(), + } + } + + pub fn trim_tab_and_newlines( + original_input: &'i str, + vfn: Option<&dyn Fn(SyntaxViolation)>, + ) -> Self { + let input = original_input.trim_matches(ascii_tab_or_new_line); + if let Some(vfn) = vfn { + if input.len() < original_input.len() { + vfn(SyntaxViolation::C0SpaceIgnored) + } + if input.chars().any(|c| matches!(c, '\t' | '\n' | '\r')) { + vfn(SyntaxViolation::TabOrNewlineIgnored) + } + } + Input { + chars: input.chars(), + } + } + pub fn with_log(original_input: &'i str, vfn: Option<&dyn Fn(SyntaxViolation)>) -> Self { let input = original_input.trim_matches(c0_control_or_space); if let Some(vfn) = vfn { @@ -488,15 +512,112 @@ impl<'a> Parser<'a> { mut self, input: Input, scheme_type: SchemeType, - mut base_file_url: Option<&Url>, + base_file_url: Option<&Url>, ) -> ParseResult { use SyntaxViolation::Backslash; // file state debug_assert!(self.serialization.is_empty()); let (first_char, input_after_first_char) = input.split_first(); - match first_char { - None => { - if let Some(base_url) = base_file_url { + if matches!(first_char, Some('/') | Some('\\')) { + self.log_violation_if(SyntaxViolation::Backslash, || first_char == Some('\\')); + // file slash state + let (next_char, input_after_next_char) = input_after_first_char.split_first(); + if matches!(next_char, Some('/') | Some('\\')) { + self.log_violation_if(Backslash, || next_char == Some('\\')); + // file host state + self.serialization.push_str("file://"); + let scheme_end = "file".len() as u32; + let host_start = "file://".len() as u32; + let (path_start, mut host, remaining) = + self.parse_file_host(input_after_next_char)?; + let mut host_end = to_u32(self.serialization.len())?; + let mut has_host = !matches!(host, HostInternal::None); + let remaining = if path_start { + self.parse_path_start(SchemeType::File, &mut has_host, remaining) + } else { + let path_start = self.serialization.len(); + self.serialization.push('/'); + self.parse_path(SchemeType::File, &mut has_host, path_start, remaining) + }; + + // For file URLs that have a host and whose path starts + // with the windows drive letter we just remove the host. + if !has_host { + self.serialization + .drain(host_start as usize..host_end as usize); + host_end = host_start; + host = HostInternal::None; + } + let (query_start, fragment_start) = + self.parse_query_and_fragment(scheme_type, scheme_end, remaining)?; + return Ok(Url { + serialization: self.serialization, + scheme_end: scheme_end, + username_end: host_start, + host_start: host_start, + host_end: host_end, + host: host, + port: None, + path_start: host_end, + query_start: query_start, + fragment_start: fragment_start, + }); + } else { + self.serialization.push_str("file://"); + let scheme_end = "file".len() as u32; + let host_start = "file://".len(); + let mut host_end = host_start; + let mut host = HostInternal::None; + if !starts_with_windows_drive_letter_segment(&input_after_first_char) { + if let Some(base_url) = base_file_url { + let first_segment = base_url.path_segments().unwrap().next().unwrap(); + if is_normalized_windows_drive_letter(first_segment) { + self.serialization.push('/'); + self.serialization.push_str(first_segment); + } else if let Some(host_str) = base_url.host_str() { + self.serialization.push_str(host_str); + host_end = self.serialization.len(); + host = base_url.host.clone(); + } + } + } + // If c is the EOF code point, U+002F (/), U+005C (\), U+003F (?), or U+0023 (#), then decrease pointer by one + let parse_path_input = if let Some(c) = first_char { + if c == '/' || c == '\\' || c == '?' || c == '#' { + input + } else { + input_after_first_char + } + } else { + input_after_first_char + }; + + let remaining = + self.parse_path(SchemeType::File, &mut false, host_end, parse_path_input); + + let host_start = host_start as u32; + + let (query_start, fragment_start) = + self.parse_query_and_fragment(scheme_type, scheme_end, remaining)?; + + let host_end = host_end as u32; + return Ok(Url { + serialization: self.serialization, + scheme_end: scheme_end, + username_end: host_start, + host_start, + host_end, + host, + port: None, + path_start: host_end, + query_start: query_start, + fragment_start: fragment_start, + }); + } + } + if let Some(base_url) = base_file_url { + match first_char { + None => { // Copy everything except the fragment let before_fragment = match base_url.fragment_start { Some(i) => &base_url.serialization[..i as usize], @@ -508,26 +629,8 @@ impl<'a> Parser<'a> { fragment_start: None, ..*base_url }) - } else { - self.serialization.push_str("file:///"); - let scheme_end = "file".len() as u32; - let path_start = "file://".len() as u32; - Ok(Url { - serialization: self.serialization, - scheme_end, - username_end: path_start, - host_start: path_start, - host_end: path_start, - host: HostInternal::None, - port: None, - path_start, - query_start: None, - fragment_start: None, - }) } - } - Some('?') => { - if let Some(base_url) = base_file_url { + Some('?') => { // Copy everything up to the query string let before_query = match (base_url.query_start, base_url.fragment_start) { (None, None) => &*base_url.serialization, @@ -542,179 +645,77 @@ impl<'a> Parser<'a> { fragment_start, ..*base_url }) - } else { - self.serialization.push_str("file:///"); - let scheme_end = "file".len() as u32; - let path_start = "file://".len() as u32; - let (query_start, fragment_start) = - self.parse_query_and_fragment(scheme_type, scheme_end, input)?; - Ok(Url { - serialization: self.serialization, - scheme_end, - username_end: path_start, - host_start: path_start, - host_end: path_start, - host: HostInternal::None, - port: None, - path_start, - query_start, - fragment_start, - }) } - } - Some('#') => { - if let Some(base_url) = base_file_url { - self.fragment_only(base_url, input) - } else { - self.serialization.push_str("file:///"); - let scheme_end = "file".len() as u32; - let path_start = "file://".len() as u32; - let fragment_start = "file:///".len() as u32; - self.serialization.push('#'); - self.parse_fragment(input_after_first_char); - Ok(Url { - serialization: self.serialization, - scheme_end, - username_end: path_start, - host_start: path_start, - host_end: path_start, - host: HostInternal::None, - port: None, - path_start, - query_start: None, - fragment_start: Some(fragment_start), - }) - } - } - Some('/') | Some('\\') => { - self.log_violation_if(Backslash, || first_char == Some('\\')); - // file slash state - let (next_char, input_after_next_char) = input_after_first_char.split_first(); - self.log_violation_if(Backslash, || next_char == Some('\\')); - if matches!(next_char, Some('/') | Some('\\')) { - // file host state - self.serialization.push_str("file://"); - let scheme_end = "file".len() as u32; - let host_start = "file://".len() as u32; - let (path_start, mut host, remaining) = - self.parse_file_host(input_after_next_char)?; - let mut host_end = to_u32(self.serialization.len())?; - let mut has_host = !matches!(host, HostInternal::None); - let remaining = if path_start { - self.parse_path_start(SchemeType::File, &mut has_host, remaining) + Some('#') => self.fragment_only(base_url, input), + _ => { + if !starts_with_windows_drive_letter_segment(&input) { + let before_query = match (base_url.query_start, base_url.fragment_start) { + (None, None) => &*base_url.serialization, + (Some(i), _) | (None, Some(i)) => base_url.slice(..i), + }; + self.serialization.push_str(before_query); + self.shorten_path(SchemeType::File, base_url.path_start as usize); + let remaining = self.parse_path( + SchemeType::File, + &mut true, + base_url.path_start as usize, + input, + ); + self.with_query_and_fragment( + SchemeType::File, + base_url.scheme_end, + base_url.username_end, + base_url.host_start, + base_url.host_end, + base_url.host, + base_url.port, + base_url.path_start, + remaining, + ) } else { - let path_start = self.serialization.len(); - self.serialization.push('/'); - self.parse_path(SchemeType::File, &mut has_host, path_start, remaining) - }; - // For file URLs that have a host and whose path starts - // with the windows drive letter we just remove the host. - if !has_host { - self.serialization - .drain(host_start as usize..host_end as usize); - host_end = host_start; - host = HostInternal::None; - } - let (query_start, fragment_start) = - self.parse_query_and_fragment(scheme_type, scheme_end, remaining)?; - Ok(Url { - serialization: self.serialization, - scheme_end, - username_end: host_start, - host_start, - host_end, - host, - port: None, - path_start: host_end, - query_start, - fragment_start, - }) - } else { - self.serialization.push_str("file:///"); - let scheme_end = "file".len() as u32; - let path_start = "file://".len(); - if let Some(base_url) = base_file_url { - let first_segment = base_url.path_segments().unwrap().next().unwrap(); - // FIXME: *normalized* drive letter - if is_windows_drive_letter(first_segment) { - self.serialization.push_str(first_segment); - self.serialization.push('/'); - } + self.serialization.push_str("file:///"); + let scheme_end = "file".len() as u32; + let path_start = "file://".len(); + let remaining = + self.parse_path(SchemeType::File, &mut false, path_start, input); + let (query_start, fragment_start) = + self.parse_query_and_fragment(SchemeType::File, scheme_end, remaining)?; + let path_start = path_start as u32; + Ok(Url { + serialization: self.serialization, + scheme_end: scheme_end, + username_end: path_start, + host_start: path_start, + host_end: path_start, + host: HostInternal::None, + port: None, + path_start: path_start, + query_start: query_start, + fragment_start: fragment_start, + }) } - let remaining = self.parse_path( - SchemeType::File, - &mut false, - path_start, - input_after_first_char, - ); - let (query_start, fragment_start) = - self.parse_query_and_fragment(scheme_type, scheme_end, remaining)?; - let path_start = path_start as u32; - Ok(Url { - serialization: self.serialization, - scheme_end, - username_end: path_start, - host_start: path_start, - host_end: path_start, - host: HostInternal::None, - port: None, - path_start, - query_start, - fragment_start, - }) - } - } - _ => { - if starts_with_windows_drive_letter_segment(&input) { - base_file_url = None; - } - if let Some(base_url) = base_file_url { - let before_query = match (base_url.query_start, base_url.fragment_start) { - (None, None) => &*base_url.serialization, - (Some(i), _) | (None, Some(i)) => base_url.slice(..i), - }; - self.serialization.push_str(before_query); - self.pop_path(SchemeType::File, base_url.path_start as usize); - let remaining = self.parse_path( - SchemeType::File, - &mut true, - base_url.path_start as usize, - input, - ); - self.with_query_and_fragment( - SchemeType::File, - base_url.scheme_end, - base_url.username_end, - base_url.host_start, - base_url.host_end, - base_url.host, - base_url.port, - base_url.path_start, - remaining, - ) - } else { - self.serialization.push_str("file:///"); - let scheme_end = "file".len() as u32; - let path_start = "file://".len(); - let remaining = - self.parse_path(SchemeType::File, &mut false, path_start, input); - let (query_start, fragment_start) = - self.parse_query_and_fragment(SchemeType::File, scheme_end, remaining)?; - let path_start = path_start as u32; - Ok(Url { - serialization: self.serialization, - scheme_end, - username_end: path_start, - host_start: path_start, - host_end: path_start, - host: HostInternal::None, - port: None, - path_start, - query_start, - fragment_start, - }) } } + } else { + self.serialization.push_str("file:///"); + let scheme_end = "file".len() as u32; + let path_start = "file://".len(); + let remaining = self.parse_path(SchemeType::File, &mut false, path_start, input); + let (query_start, fragment_start) = + self.parse_query_and_fragment(SchemeType::File, scheme_end, remaining)?; + let path_start = path_start as u32; + Ok(Url { + serialization: self.serialization, + scheme_end: scheme_end, + username_end: path_start, + host_start: path_start, + host_end: path_start, + host: HostInternal::None, + port: None, + path_start: path_start, + query_start: query_start, + fragment_start: fragment_start, + }) } } @@ -772,12 +773,14 @@ impl<'a> Parser<'a> { debug_assert!(base_url.byte_at(scheme_end) == b':'); self.serialization .push_str(base_url.slice(..scheme_end + 1)); + if let Some(after_prefix) = input.split_prefix("//") { + return self.after_double_slash(after_prefix, scheme_type, scheme_end); + } return self.after_double_slash(remaining, scheme_type, scheme_end); } let path_start = base_url.path_start; - debug_assert!(base_url.byte_at(path_start) == b'/'); - self.serialization - .push_str(base_url.slice(..path_start + 1)); + self.serialization.push_str(base_url.slice(..path_start)); + self.serialization.push_str("/"); let remaining = self.parse_path( scheme_type, &mut true, @@ -804,8 +807,24 @@ impl<'a> Parser<'a> { self.serialization.push_str(before_query); // FIXME spec says just "remove last entry", not the "pop" algorithm self.pop_path(scheme_type, base_url.path_start as usize); - let remaining = - self.parse_path(scheme_type, &mut true, base_url.path_start as usize, input); + // A special url always has a path. + // A path always starts with '/' + if self.serialization.len() == base_url.path_start as usize { + if SchemeType::from(base_url.scheme()).is_special() || !input.is_empty() { + self.serialization.push('/'); + } + } + let remaining = match input.split_first() { + (Some('/'), remaining) => self.parse_path( + scheme_type, + &mut true, + base_url.path_start as usize, + remaining, + ), + _ => { + self.parse_path(scheme_type, &mut true, base_url.path_start as usize, input) + } + }; self.with_query_and_fragment( scheme_type, base_url.scheme_end, @@ -830,11 +849,16 @@ impl<'a> Parser<'a> { self.serialization.push('/'); self.serialization.push('/'); // authority state + let before_authority = self.serialization.len(); let (username_end, remaining) = self.parse_userinfo(input, scheme_type)?; + let has_authority = before_authority != self.serialization.len(); // host state let host_start = to_u32(self.serialization.len())?; let (host_end, host, port, remaining) = self.parse_host_and_port(remaining, scheme_end, scheme_type)?; + if host == HostInternal::None && has_authority { + return Err(ParseError::EmptyHost); + } // path state let path_start = to_u32(self.serialization.len())?; let remaining = self.parse_path_start(scheme_type, &mut true, remaining); @@ -878,7 +902,18 @@ impl<'a> Parser<'a> { } let (mut userinfo_char_count, remaining) = match last_at { None => return Ok((to_u32(self.serialization.len())?, input)), - Some((0, remaining)) => return Ok((to_u32(self.serialization.len())?, remaining)), + Some((0, remaining)) => { + // Otherwise, if one of the following is true + // c is the EOF code point, U+002F (/), U+003F (?), or U+0023 (#) + // url is special and c is U+005C (\) + // If @ flag is set and buffer is the empty string, validation error, return failure. + if let (Some(c), _) = remaining.split_first() { + if c == '/' || c == '?' || c == '#' || (scheme_type.is_special() && c == '\\') { + return Err(ParseError::EmptyHost); + } + } + return Ok((to_u32(self.serialization.len())?, remaining)); + } Some(x) => x, }; @@ -924,6 +959,18 @@ impl<'a> Parser<'a> { let (host, remaining) = Parser::parse_host(input, scheme_type)?; write!(&mut self.serialization, "{}", host).unwrap(); let host_end = to_u32(self.serialization.len())?; + if let Host::Domain(h) = &host { + if h.is_empty() { + // Port with an empty host + if remaining.starts_with(":") { + return Err(ParseError::EmptyHost); + } + if scheme_type.is_special() { + return Err(ParseError::EmptyHost); + } + } + }; + let (port, remaining) = if let Some(remaining) = remaining.split_prefix(':') { let scheme = || default_port(&self.serialization[..scheme_end as usize]); Parser::parse_port(remaining, scheme, self.context)? @@ -940,6 +987,9 @@ impl<'a> Parser<'a> { mut input: Input, scheme_type: SchemeType, ) -> ParseResult<(Host, Input)> { + if scheme_type.is_file() { + return Parser::get_file_host(input); + } // Undo the Input abstraction here to avoid allocating in the common case // where the host part of the input does not contain any tab or newline let input_str = input.chars.as_str(); @@ -979,7 +1029,7 @@ impl<'a> Parser<'a> { host_str = &input_str[..bytes] } } - if scheme_type.is_special() && host_str.is_empty() { + if scheme_type == SchemeType::SpecialNotFile && host_str.is_empty() { return Err(ParseError::EmptyHost); } if !scheme_type.is_special() { @@ -990,10 +1040,41 @@ impl<'a> Parser<'a> { Ok((host, input)) } - pub(crate) fn parse_file_host<'i>( + fn get_file_host<'i>(input: Input<'i>) -> ParseResult<(Host, Input)> { + let (_, host_str, remaining) = Parser::file_host(input)?; + let host = match Host::parse(&host_str)? { + Host::Domain(ref d) if d == "localhost" => Host::Domain("".to_string()), + host => host, + }; + Ok((host, remaining)) + } + + fn parse_file_host<'i>( &mut self, input: Input<'i>, ) -> ParseResult<(bool, HostInternal, Input<'i>)> { + let has_host; + let (_, host_str, remaining) = Parser::file_host(input)?; + let host = if host_str.is_empty() { + has_host = false; + HostInternal::None + } else { + match Host::parse(&host_str)? { + Host::Domain(ref d) if d == "localhost" => { + has_host = false; + HostInternal::None + } + host => { + write!(&mut self.serialization, "{}", host).unwrap(); + has_host = true; + host.into() + } + } + }; + Ok((has_host, host, remaining)) + } + + pub fn file_host<'i>(input: Input<'i>) -> ParseResult<(bool, String, Input<'i>)> { // Undo the Input abstraction here to avoid allocating in the common case // where the host part of the input does not contain any tab or newline let input_str = input.chars.as_str(); @@ -1022,20 +1103,9 @@ impl<'a> Parser<'a> { } } if is_windows_drive_letter(host_str) { - return Ok((false, HostInternal::None, input)); + return Ok((false, "".to_string(), input)); } - let host = if host_str.is_empty() { - HostInternal::None - } else { - match Host::parse(host_str)? { - Host::Domain(ref d) if d == "localhost" => HostInternal::None, - host => { - write!(&mut self.serialization, "{}", host).unwrap(); - host.into() - } - } - }; - Ok((true, host, remaining)) + Ok((true, host_str.to_string(), remaining)) } pub fn parse_port

( @@ -1073,21 +1143,34 @@ impl<'a> Parser<'a> { &mut self, scheme_type: SchemeType, has_host: &mut bool, - mut input: Input<'i>, + input: Input<'i>, ) -> Input<'i> { - // Path start state - match input.split_first() { - (Some('/'), remaining) => input = remaining, - (Some('\\'), remaining) => { - if scheme_type.is_special() { - self.log_violation(SyntaxViolation::Backslash); - input = remaining + let path_start = self.serialization.len(); + let (maybe_c, remaining) = input.split_first(); + // If url is special, then: + if scheme_type.is_special() { + if maybe_c == Some('\\') { + // If c is U+005C (\), validation error. + self.log_violation(SyntaxViolation::Backslash); + } + // A special URL always has a non-empty path. + if !self.serialization.ends_with("/") { + self.serialization.push('/'); + // We have already made sure the forward slash is present. + if maybe_c == Some('/') || maybe_c == Some('\\') { + return self.parse_path(scheme_type, has_host, path_start, remaining); } } - _ => {} + return self.parse_path(scheme_type, has_host, path_start, input); + } else if maybe_c == Some('?') || maybe_c == Some('#') { + // Otherwise, if state override is not given and c is U+003F (?), + // set url’s query to the empty string and state to query state. + // Otherwise, if state override is not given and c is U+0023 (#), + // set url’s fragment to the empty string and state to fragment state. + // The query and path states will be handled by the caller. + return input; } - let path_start = self.serialization.len(); - self.serialization.push('/'); + // Otherwise, if c is not the EOF code point: self.parse_path(scheme_type, has_host, path_start, input) } @@ -1099,7 +1182,6 @@ impl<'a> Parser<'a> { mut input: Input<'i>, ) -> Input<'i> { // Relative path state - debug_assert!(self.serialization.ends_with('/')); loop { let segment_start = self.serialization.len(); let mut ends_with_slash = false; @@ -1112,6 +1194,7 @@ impl<'a> Parser<'a> { }; match c { '/' if self.context != Context::PathSegmentSetter => { + self.serialization.push(c); ends_with_slash = true; break; } @@ -1119,6 +1202,7 @@ impl<'a> Parser<'a> { && scheme_type.is_special() => { self.log_violation(SyntaxViolation::Backslash); + self.serialization.push('/'); ends_with_slash = true; break; } @@ -1142,44 +1226,104 @@ impl<'a> Parser<'a> { } } } - match &self.serialization[segment_start..] { + // Going from &str to String to &str to please the 1.33.0 borrow checker + let before_slash_string = if ends_with_slash { + self.serialization[segment_start..self.serialization.len() - 1].to_owned() + } else { + self.serialization[segment_start..self.serialization.len()].to_owned() + }; + let segment_before_slash: &str = &before_slash_string; + match segment_before_slash { + // If buffer is a double-dot path segment, shorten url’s path, ".." | "%2e%2e" | "%2e%2E" | "%2E%2e" | "%2E%2E" | "%2e." | "%2E." | ".%2e" | ".%2E" => { debug_assert!(self.serialization.as_bytes()[segment_start - 1] == b'/'); - self.serialization.truncate(segment_start - 1); // Truncate "/.." - self.pop_path(scheme_type, path_start); - if !self.serialization[path_start..].ends_with('/') { - self.serialization.push('/') + self.serialization.truncate(segment_start); + if self.serialization.ends_with("/") + && Parser::last_slash_can_be_removed(&self.serialization, path_start) + { + self.serialization.pop(); + } + self.shorten_path(scheme_type, path_start); + + // and then if neither c is U+002F (/), nor url is special and c is U+005C (\), append the empty string to url’s path. + if ends_with_slash && !self.serialization.ends_with("/") { + self.serialization.push('/'); } } + // Otherwise, if buffer is a single-dot path segment and if neither c is U+002F (/), + // nor url is special and c is U+005C (\), append the empty string to url’s path. "." | "%2e" | "%2E" => { self.serialization.truncate(segment_start); + if !self.serialization.ends_with("/") { + self.serialization.push('/'); + } } _ => { - if scheme_type.is_file() - && is_windows_drive_letter(&self.serialization[path_start + 1..]) - { - if self.serialization.ends_with('|') { - self.serialization.pop(); + // If url’s scheme is "file", url’s path is empty, and buffer is a Windows drive letter, then + if scheme_type.is_file() && is_windows_drive_letter(segment_before_slash) { + // Replace the second code point in buffer with U+003A (:). + if let Some(c) = segment_before_slash.chars().nth(0) { + self.serialization.truncate(segment_start); + self.serialization.push(c); self.serialization.push(':'); + if ends_with_slash { + self.serialization.push('/'); + } } + // If url’s host is neither the empty string nor null, + // validation error, set url’s host to the empty string. if *has_host { self.log_violation(SyntaxViolation::FileWithHostAndWindowsDrive); *has_host = false; // FIXME account for this in callers } } - if ends_with_slash { - self.serialization.push('/') - } } } if !ends_with_slash { break; } } + if scheme_type.is_file() { + // while url’s path’s size is greater than 1 + // and url’s path[0] is the empty string, + // validation error, remove the first item from url’s path. + //FIXME: log violation + let path = self.serialization.split_off(path_start); + self.serialization.push('/'); + self.serialization.push_str(&path.trim_start_matches("/")); + } input } + fn last_slash_can_be_removed(serialization: &String, path_start: usize) -> bool { + let url_before_segment = &serialization[..serialization.len() - 1]; + if let Some(segment_before_start) = url_before_segment.rfind("/") { + // Do not remove the root slash + segment_before_start >= path_start + // Or a windows drive letter slash + && !path_starts_with_windows_drive_letter(&serialization[segment_before_start..]) + } else { + false + } + } + + /// https://url.spec.whatwg.org/#shorten-a-urls-path + fn shorten_path(&mut self, scheme_type: SchemeType, path_start: usize) { + // If path is empty, then return. + if self.serialization.len() == path_start { + return; + } + // If url’s scheme is "file", path’s size is 1, and path[0] is a normalized Windows drive letter, then return. + if scheme_type.is_file() + && is_normalized_windows_drive_letter(&self.serialization[path_start..]) + { + return; + } + // Remove path’s last item. + self.pop_path(scheme_type, path_start); + } + /// https://url.spec.whatwg.org/#pop-a-urls-path fn pop_path(&mut self, scheme_type: SchemeType, path_start: usize) { if self.serialization.len() > path_start { @@ -1187,9 +1331,8 @@ impl<'a> Parser<'a> { // + 1 since rfind returns the position before the slash. let segment_start = path_start + slash_position + 1; // Don’t pop a Windows drive letter - // FIXME: *normalized* Windows drive letter if !(scheme_type.is_file() - && is_windows_drive_letter(&self.serialization[segment_start..])) + && is_normalized_windows_drive_letter(&self.serialization[segment_start..])) { self.serialization.truncate(segment_start); } @@ -1329,14 +1472,8 @@ impl<'a> Parser<'a> { self.log_violation(SyntaxViolation::NullInFragment) } else { self.check_url_code_point(c, &input); - self.serialization.extend(utf8_percent_encode( - utf8_c, - // FIXME: tests fail when we use the FRAGMENT set here - // as defined in the spec as of 2019-07-17, - // likely because tests are out of date. - // See https://github.com/servo/rust-url/issues/290 - CONTROLS, - )); + self.serialization + .extend(utf8_percent_encode(utf8_c, FRAGMENT)); } } } @@ -1394,6 +1531,12 @@ fn c0_control_or_space(ch: char) -> bool { ch <= ' ' // U+0000 to U+0020 } +/// https://infra.spec.whatwg.org/#ascii-tab-or-newline +#[inline] +fn ascii_tab_or_new_line(ch: char) -> bool { + matches!(ch, '\t' | '\r' | '\n') +} + /// https://url.spec.whatwg.org/#ascii-alpha #[inline] pub fn ascii_alpha(ch: char) -> bool { @@ -1409,18 +1552,48 @@ pub fn to_u32(i: usize) -> ParseResult { } } +fn is_normalized_windows_drive_letter(segment: &str) -> bool { + is_windows_drive_letter(segment) && segment.as_bytes()[1] == b':' +} + /// Wether the scheme is file:, the path has a single segment, and that segment /// is a Windows drive letter -fn is_windows_drive_letter(segment: &str) -> bool { +#[inline] +pub fn is_windows_drive_letter(segment: &str) -> bool { segment.len() == 2 && starts_with_windows_drive_letter(segment) } +/// Wether path starts with a root slash +/// and a windows drive letter eg: "/c:" or "/a:/" +fn path_starts_with_windows_drive_letter(s: &str) -> bool { + if let Some(c) = s.as_bytes().get(0) { + matches!(c, b'/' | b'\\' | b'?' | b'#') && starts_with_windows_drive_letter(&s[1..]) + } else { + false + } +} + fn starts_with_windows_drive_letter(s: &str) -> bool { - ascii_alpha(s.as_bytes()[0] as char) && matches!(s.as_bytes()[1], b':' | b'|') + s.len() >= 2 + && ascii_alpha(s.as_bytes()[0] as char) + && matches!(s.as_bytes()[1], b':' | b'|') + && (s.len() == 2 || matches!(s.as_bytes()[2], b'/' | b'\\' | b'?' | b'#')) } +/// https://url.spec.whatwg.org/#start-with-a-windows-drive-letter fn starts_with_windows_drive_letter_segment(input: &Input) -> bool { let mut input = input.clone(); - matches!((input.next(), input.next(), input.next()), (Some(a), Some(b), Some(c)) - if ascii_alpha(a) && matches!(b, ':' | '|') && matches!(c, '/' | '\\' | '?' | '#')) + match (input.next(), input.next(), input.next()) { + // its first two code points are a Windows drive letter + // its third code point is U+002F (/), U+005C (\), U+003F (?), or U+0023 (#). + (Some(a), Some(b), Some(c)) + if ascii_alpha(a) && matches!(b, ':' | '|') && matches!(c, '/' | '\\' | '?' | '#') => + { + true + } + // its first two code points are a Windows drive letter + // its length is 2 + (Some(a), Some(b), None) if ascii_alpha(a) && matches!(b, ':' | '|') => true, + _ => false, + } } diff --git a/src/path_segments.rs b/src/path_segments.rs index 97055e777..6f5679887 100644 --- a/src/path_segments.rs +++ b/src/path_segments.rs @@ -45,7 +45,15 @@ pub struct PathSegmentsMut<'a> { pub fn new(url: &mut Url) -> PathSegmentsMut { let after_path = url.take_after_path(); let old_after_path_position = to_u32(url.serialization.len()).unwrap(); - debug_assert!(url.byte_at(url.path_start) == b'/'); + // Special urls always have a non empty path + if SchemeType::from(url.scheme()).is_special() { + debug_assert!(url.byte_at(url.path_start) == b'/'); + } else { + debug_assert!( + url.serialization.len() == url.path_start as usize + || url.byte_at(url.path_start) == b'/' + ); + } PathSegmentsMut { after_first_slash: url.path_start as usize + "/".len(), url, @@ -212,7 +220,10 @@ impl<'a> PathSegmentsMut<'a> { if matches!(segment, "." | "..") { continue; } - if parser.serialization.len() > path_start + 1 { + if parser.serialization.len() > path_start + 1 + // Non special url's path might still be empty + || parser.serialization.len() == path_start + { parser.serialization.push('/'); } let mut has_host = true; // FIXME account for this? diff --git a/src/quirks.rs b/src/quirks.rs index 285ee21b6..caab354cc 100644 --- a/src/quirks.rs +++ b/src/quirks.rs @@ -99,26 +99,47 @@ pub fn host(url: &Url) -> &str { /// Setter for https://url.spec.whatwg.org/#dom-url-host pub fn set_host(url: &mut Url, new_host: &str) -> Result<(), ()> { + // If context object’s url’s cannot-be-a-base-URL flag is set, then return. if url.cannot_be_a_base() { return Err(()); } + // Host parsing rules are strict, + // We don't want to trim the input + let input = Input::no_trim(new_host); let host; let opt_port; { let scheme = url.scheme(); - let result = Parser::parse_host(Input::new(new_host), SchemeType::from(scheme)); - match result { - Ok((h, remaining)) => { - host = h; - opt_port = if let Some(remaining) = remaining.split_prefix(':') { + let scheme_type = SchemeType::from(scheme); + if let Ok((h, remaining)) = Parser::parse_host(input, scheme_type) { + host = h; + opt_port = if let Some(remaining) = remaining.split_prefix(':') { + if remaining.is_empty() { + None + } else { Parser::parse_port(remaining, || default_port(scheme), Context::Setter) .ok() .map(|(port, _remaining)| port) - } else { - None - }; + } + } else { + None + }; + } else { + return Err(()); + } + } + // Make sure we won't set an empty host to a url with a username or a port + if host == Host::Domain("".to_string()) { + if !username(&url).is_empty() { + return Err(()); + } + if let Some(p) = opt_port { + if let Some(_) = p { + return Err(()); } - Err(_) => return Err(()), + } + if url.port().is_some() { + return Err(()); } } url.set_host_internal(host, opt_port); @@ -136,8 +157,24 @@ pub fn set_hostname(url: &mut Url, new_hostname: &str) -> Result<(), ()> { if url.cannot_be_a_base() { return Err(()); } - let result = Parser::parse_host(Input::new(new_hostname), SchemeType::from(url.scheme())); - if let Ok((host, _remaining)) = result { + // Host parsing rules are strict we don't want to trim the input + let input = Input::no_trim(new_hostname); + let scheme_type = SchemeType::from(url.scheme()); + if let Ok((host, _remaining)) = Parser::parse_host(input, scheme_type) { + if let Host::Domain(h) = &host { + if h.is_empty() { + // Empty host on special not file url + if SchemeType::from(url.scheme()) == SchemeType::SpecialNotFile + // Port with an empty host + ||!port(&url).is_empty() + // Empty host that includes credentials + || !url.username().is_empty() + || !url.password().unwrap_or(&"").is_empty() + { + return Err(()); + } + } + } url.set_host_internal(host, None); Ok(()) } else { @@ -182,8 +219,19 @@ pub fn pathname(url: &Url) -> &str { /// Setter for https://url.spec.whatwg.org/#dom-url-pathname pub fn set_pathname(url: &mut Url, new_pathname: &str) { - if !url.cannot_be_a_base() { + if url.cannot_be_a_base() { + return; + } + if Some('/') == new_pathname.chars().nth(0) + || (SchemeType::from(url.scheme()).is_special() + // \ is a segment delimiter for 'special' URLs" + && Some('\\') == new_pathname.chars().nth(0)) + { url.set_path(new_pathname) + } else { + let mut path_to_set = String::from("/"); + path_to_set.push_str(new_pathname); + url.set_path(&path_to_set) } } @@ -208,13 +256,14 @@ pub fn hash(url: &Url) -> &str { /// Setter for https://url.spec.whatwg.org/#dom-url-hash pub fn set_hash(url: &mut Url, new_hash: &str) { - if url.scheme() != "javascript" { - url.set_fragment(match new_hash { - "" => None, - _ if new_hash.starts_with('#') => Some(&new_hash[1..]), - _ => Some(new_hash), - }) - } + url.set_fragment(match new_hash { + // If the given value is the empty string, + // then set context object’s url’s fragment to null and return. + "" => None, + // Let input be the given value with a single leading U+0023 (#) removed, if any. + _ if new_hash.starts_with('#') => Some(&new_hash[1..]), + _ => Some(new_hash), + }) } fn trim(s: &str) -> &str { diff --git a/tests/setters_tests.json b/tests/setters_tests.json index a45171bf3..db23d9247 100644 --- a/tests/setters_tests.json +++ b/tests/setters_tests.json @@ -27,7 +27,7 @@ "href": "a://example.net", "new_value": "", "expected": { - "href": "a://example.net/", + "href": "a://example.net", "protocol": "a:" } }, @@ -35,16 +35,24 @@ "href": "a://example.net", "new_value": "b", "expected": { - "href": "b://example.net/", + "href": "b://example.net", "protocol": "b:" } }, + { + "href": "javascript:alert(1)", + "new_value": "defuse", + "expected": { + "href": "defuse:alert(1)", + "protocol": "defuse:" + } + }, { "comment": "Upper-case ASCII is lower-cased", "href": "a://example.net", "new_value": "B", "expected": { - "href": "b://example.net/", + "href": "b://example.net", "protocol": "b:" } }, @@ -53,7 +61,7 @@ "href": "a://example.net", "new_value": "é", "expected": { - "href": "a://example.net/", + "href": "a://example.net", "protocol": "a:" } }, @@ -62,7 +70,7 @@ "href": "a://example.net", "new_value": "0b", "expected": { - "href": "a://example.net/", + "href": "a://example.net", "protocol": "a:" } }, @@ -71,7 +79,7 @@ "href": "a://example.net", "new_value": "+b", "expected": { - "href": "a://example.net/", + "href": "a://example.net", "protocol": "a:" } }, @@ -79,7 +87,7 @@ "href": "a://example.net", "new_value": "bC0+-.", "expected": { - "href": "bc0+-.://example.net/", + "href": "bc0+-.://example.net", "protocol": "bc0+-.:" } }, @@ -88,7 +96,7 @@ "href": "a://example.net", "new_value": "b,c", "expected": { - "href": "a://example.net/", + "href": "a://example.net", "protocol": "a:" } }, @@ -97,10 +105,35 @@ "href": "a://example.net", "new_value": "bé", "expected": { - "href": "a://example.net/", + "href": "a://example.net", "protocol": "a:" } }, + { + "comment": "Can’t switch from URL containing username/password/port to file", + "href": "http://test@example.net", + "new_value": "file", + "expected": { + "href": "http://test@example.net/", + "protocol": "http:" + } + }, + { + "href": "gopher://example.net:1234", + "new_value": "file", + "expected": { + "href": "gopher://example.net:1234/", + "protocol": "gopher:" + } + }, + { + "href": "wss://x:x@example.net:1234", + "new_value": "file", + "expected": { + "href": "wss://x:x@example.net:1234/", + "protocol": "wss:" + } + }, { "comment": "Can’t switch from file URL with no host", "href": "file://localhost/", @@ -127,12 +160,36 @@ } }, { - "comment": "Spec deviation: from special scheme to not is not problematic. https://github.com/whatwg/url/issues/104", + "comment": "Can’t switch from special scheme to non-special", "href": "http://example.net", "new_value": "b", "expected": { - "href": "b://example.net/", - "protocol": "b:" + "href": "http://example.net/", + "protocol": "http:" + } + }, + { + "href": "file://hi/path", + "new_value": "s", + "expected": { + "href": "file://hi/path", + "protocol": "file:" + } + }, + { + "href": "https://example.net", + "new_value": "s", + "expected": { + "href": "https://example.net/", + "protocol": "https:" + } + }, + { + "href": "ftp://example.net", + "new_value": "test", + "expected": { + "href": "ftp://example.net/", + "protocol": "ftp:" } }, { @@ -145,12 +202,44 @@ } }, { - "comment": "Spec deviation: from non-special scheme with a host to special is not problematic. https://github.com/whatwg/url/issues/104", + "comment": "Can’t switch from non-special scheme to special", "href": "ssh://me@example.net", "new_value": "http", "expected": { - "href": "http://me@example.net/", - "protocol": "http:" + "href": "ssh://me@example.net", + "protocol": "ssh:" + } + }, + { + "href": "ssh://me@example.net", + "new_value": "gopher", + "expected": { + "href": "ssh://me@example.net", + "protocol": "ssh:" + } + }, + { + "href": "ssh://me@example.net", + "new_value": "file", + "expected": { + "href": "ssh://me@example.net", + "protocol": "ssh:" + } + }, + { + "href": "ssh://example.net", + "new_value": "file", + "expected": { + "href": "ssh://example.net", + "protocol": "ssh:" + } + }, + { + "href": "nonsense:///test", + "new_value": "https", + "expected": { + "href": "nonsense:///test", + "protocol": "nonsense:" } }, { @@ -170,6 +259,16 @@ "href": "view-source+data:text/html,

Test", "protocol": "view-source+data:" } + }, + { + "comment": "Port is set to null if it is the default for new scheme.", + "href": "http://foo.com:443/", + "new_value": "https", + "expected": { + "href": "https://foo.com/", + "protocol": "https:", + "port": "" + } } ], "username": [ @@ -266,14 +365,6 @@ "username": "" } }, - { - "href": "file://test/", - "new_value": "test", - "expected": { - "href": "file://test/", - "username": "" - } - }, { "href": "javascript://x/", "new_value": "wario", @@ -281,6 +372,14 @@ "href": "javascript://wario@x/", "username": "wario" } + }, + { + "href": "file://test/", + "new_value": "test", + "expected": { + "href": "file://test/", + "username": "" + } } ], "password": [ @@ -369,14 +468,6 @@ "password": "" } }, - { - "href": "file://test/", - "new_value": "test", - "expected": { - "href": "file://test/", - "password": "" - } - }, { "href": "javascript://x/", "new_value": "bowser", @@ -384,9 +475,27 @@ "href": "javascript://:bowser@x/", "password": "bowser" } + }, + { + "href": "file://test/", + "new_value": "test", + "expected": { + "href": "file://test/", + "password": "" + } } ], "host": [ + { + "comment": "Non-special scheme", + "href": "sc://x/", + "new_value": "\u0000", + "expected": { + "href": "sc://x/", + "host": "x", + "hostname": "x" + } + }, { "href": "sc://x/", "new_value": "\u0009", @@ -414,6 +523,15 @@ "hostname": "" } }, + { + "href": "sc://x/", + "new_value": " ", + "expected": { + "href": "sc://x/", + "host": "x", + "hostname": "x" + } + }, { "href": "sc://x/", "new_value": "#", @@ -459,6 +577,16 @@ "hostname": "%C3%9F" } }, + { + "comment": "IDNA Nontransitional_Processing", + "href": "https://x/", + "new_value": "ß", + "expected": { + "href": "https://xn--zca/", + "host": "xn--zca", + "hostname": "xn--zca" + } + }, { "comment": "Cannot-be-a-base means no host", "href": "mailto:me@example.net", @@ -499,14 +627,14 @@ } }, { - "comment": "Port number is removed if empty in the new value: https://github.com/whatwg/url/pull/113", + "comment": "Port number is unchanged if not specified", "href": "http://example.net:8080", "new_value": "example.com:", "expected": { - "href": "http://example.com/", - "host": "example.com", + "href": "http://example.com:8080/", + "host": "example.com:8080", "hostname": "example.com", - "port": "" + "port": "8080" } }, { @@ -591,6 +719,17 @@ "port": "80" } }, + { + "comment": "Port number is removed if new port is scheme default and existing URL has a non-default port", + "href": "http://example.net:8080", + "new_value": "example.com:80", + "expected": { + "href": "http://example.com/", + "host": "example.com", + "hostname": "example.com", + "port": "" + } + }, { "comment": "Stuff after a / delimiter is ignored", "href": "http://example.net/path", @@ -790,9 +929,69 @@ "host": "example.net", "hostname": "example.net" } + }, + { + "href": "file://y/", + "new_value": "x:123", + "expected": { + "href": "file://y/", + "host": "y", + "hostname": "y", + "port": "" + } + }, + { + "href": "file://y/", + "new_value": "loc%41lhost", + "expected": { + "href": "file:///", + "host": "", + "hostname": "", + "port": "" + } + }, + { + "href": "file://hi/x", + "new_value": "", + "expected": { + "href": "file:///x", + "host": "", + "hostname": "", + "port": "" + } + }, + { + "href": "sc://test@test/", + "new_value": "", + "expected": { + "href": "sc://test@test/", + "host": "test", + "hostname": "test", + "username": "test" + } + }, + { + "href": "sc://test:12/", + "new_value": "", + "expected": { + "href": "sc://test:12/", + "host": "test:12", + "hostname": "test", + "port": "12" + } } ], "hostname": [ + { + "comment": "Non-special scheme", + "href": "sc://x/", + "new_value": "\u0000", + "expected": { + "href": "sc://x/", + "host": "x", + "hostname": "x" + } + }, { "href": "sc://x/", "new_value": "\u0009", @@ -820,6 +1019,15 @@ "hostname": "" } }, + { + "href": "sc://x/", + "new_value": " ", + "expected": { + "href": "sc://x/", + "host": "x", + "hostname": "x" + } + }, { "href": "sc://x/", "new_value": "#", @@ -1055,6 +1263,56 @@ "host": "example.net", "hostname": "example.net" } + }, + { + "href": "file://y/", + "new_value": "x:123", + "expected": { + "href": "file://y/", + "host": "y", + "hostname": "y", + "port": "" + } + }, + { + "href": "file://y/", + "new_value": "loc%41lhost", + "expected": { + "href": "file:///", + "host": "", + "hostname": "", + "port": "" + } + }, + { + "href": "file://hi/x", + "new_value": "", + "expected": { + "href": "file:///x", + "host": "", + "hostname": "", + "port": "" + } + }, + { + "href": "sc://test@test/", + "new_value": "", + "expected": { + "href": "sc://test@test/", + "host": "test", + "hostname": "test", + "username": "test" + } + }, + { + "href": "sc://test:12/", + "new_value": "", + "expected": { + "href": "sc://test:12/", + "host": "test:12", + "hostname": "test", + "port": "12" + } } ], "port": [ @@ -1324,12 +1582,12 @@ } }, { - "comment": "UTF-8 percent encoding with the default encode set. Tabs and newlines are removed. Leading or training C0 controls and space are removed.", + "comment": "UTF-8 percent encoding with the default encode set. Tabs and newlines are removed.", "href": "a:/", - "new_value": "\u0000\u0001\t\n\r\u001f !\u0000\u0001\t\n\r\u001f !\"#$%&'()*+,-./09:;<=>?@AZ[\\]^_`az{|}~\u007f\u0080\u0081Éé", + "new_value": "\u0000\u0001\t\n\r\u001f !\"#$%&'()*+,-./09:;<=>?@AZ[\\]^_`az{|}~\u007f\u0080\u0081Éé", "expected": { - "href": "a:/!%00%01%1F%20!%22%23$%&'()*+,-./09:;%3C=%3E%3F@AZ[\\]^_%60az%7B|%7D~%7F%C2%80%C2%81%C3%89%C3%A9", - "pathname": "/!%00%01%1F%20!%22%23$%&'()*+,-./09:;%3C=%3E%3F@AZ[\\]^_%60az%7B|%7D~%7F%C2%80%C2%81%C3%89%C3%A9" + "href": "a:/%00%01%1F%20!%22%23$%&'()*+,-./09:;%3C=%3E%3F@AZ[\\]^_%60az%7B|%7D~%7F%C2%80%C2%81%C3%89%C3%A9", + "pathname": "/%00%01%1F%20!%22%23$%&'()*+,-./09:;%3C=%3E%3F@AZ[\\]^_%60az%7B|%7D~%7F%C2%80%C2%81%C3%89%C3%A9" } }, { @@ -1376,6 +1634,33 @@ "href": "sc://example.net/%23", "pathname": "/%23" } + }, + { + "comment": "File URLs and (back)slashes", + "href": "file://monkey/", + "new_value": "\\\\", + "expected": { + "href": "file://monkey/", + "pathname": "/" + } + }, + { + "comment": "File URLs and (back)slashes", + "href": "file:///unicorn", + "new_value": "//\\/", + "expected": { + "href": "file:///", + "pathname": "/" + } + }, + { + "comment": "File URLs and (back)slashes", + "href": "file:///unicorn", + "new_value": "//monkey/..//", + "expected": { + "href": "file:///", + "pathname": "/" + } } ], "search": [ @@ -1444,12 +1729,12 @@ } }, { - "comment": "UTF-8 percent encoding with the query encode set. Tabs and newlines are removed. Leading or training C0 controls and space are removed.", + "comment": "UTF-8 percent encoding with the query encode set. Tabs and newlines are removed.", "href": "a:/", - "new_value": "\u0000\u0001\t\n\r\u001f !\u0000\u0001\t\n\r\u001f !\"#$%&'()*+,-./09:;<=>?@AZ[\\]^_`az{|}~\u007f\u0080\u0081Éé", + "new_value": "\u0000\u0001\t\n\r\u001f !\"#$%&'()*+,-./09:;<=>?@AZ[\\]^_`az{|}~\u007f\u0080\u0081Éé", "expected": { - "href": "a:/?!%00%01%1F%20!%22%23$%&'()*+,-./09:;%3C=%3E?@AZ[\\]^_`az{|}~%7F%C2%80%C2%81%C3%89%C3%A9", - "search": "?!%00%01%1F%20!%22%23$%&'()*+,-./09:;%3C=%3E?@AZ[\\]^_`az{|}~%7F%C2%80%C2%81%C3%89%C3%A9" + "href": "a:/?%00%01%1F%20!%22%23$%&'()*+,-./09:;%3C=%3E?@AZ[\\]^_`az{|}~%7F%C2%80%C2%81%C3%89%C3%A9", + "search": "?%00%01%1F%20!%22%23$%&'()*+,-./09:;%3C=%3E?@AZ[\\]^_`az{|}~%7F%C2%80%C2%81%C3%89%C3%A9" } }, { @@ -1511,13 +1796,53 @@ "hash": "" } }, + { + "href": "http://example.net", + "new_value": "#foo bar", + "expected": { + "href": "http://example.net/#foo%20bar", + "hash": "#foo%20bar" + } + }, + { + "href": "http://example.net", + "new_value": "#foo\"bar", + "expected": { + "href": "http://example.net/#foo%22bar", + "hash": "#foo%22bar" + } + }, + { + "href": "http://example.net", + "new_value": "#foobar", + "expected": { + "href": "http://example.net/#foo%3Ebar", + "hash": "#foo%3Ebar" + } + }, + { + "href": "http://example.net", + "new_value": "#foo`bar", + "expected": { + "href": "http://example.net/#foo%60bar", + "hash": "#foo%60bar" + } + }, { "comment": "Simple percent-encoding; nuls, tabs, and newlines are removed", "href": "a:/", - "new_value": "\u0000\u0001\t\n\r\u001f !\u0000\u0001\t\n\r\u001f !\"#$%&'()*+,-./09:;<=>?@AZ[\\]^_`az{|}~\u007f\u0080\u0081Éé", + "new_value": "\u0000\u0001\t\n\r\u001f !\"#$%&'()*+,-./09:;<=>?@AZ[\\]^_`az{|}~\u007f\u0080\u0081Éé", "expected": { - "href": "a:/#!%01%1F !\"#$%&'()*+,-./09:;<=>?@AZ[\\]^_`az{|}~%7F%C2%80%C2%81%C3%89%C3%A9", - "hash": "#!%01%1F !\"#$%&'()*+,-./09:;<=>?@AZ[\\]^_`az{|}~%7F%C2%80%C2%81%C3%89%C3%A9" + "href": "a:/#%01%1F%20!%22#$%&'()*+,-./09:;%3C=%3E?@AZ[\\]^_%60az{|}~%7F%C2%80%C2%81%C3%89%C3%A9", + "hash": "#%01%1F%20!%22#$%&'()*+,-./09:;%3C=%3E?@AZ[\\]^_%60az{|}~%7F%C2%80%C2%81%C3%89%C3%A9" } }, { @@ -1528,6 +1853,14 @@ "href": "http://example.net/#%c3%89t%C3%A9", "hash": "#%c3%89t%C3%A9" } + }, + { + "href": "javascript:alert(1)", + "new_value": "castle", + "expected": { + "href": "javascript:alert(1)#castle", + "hash": "#castle" + } } ] } diff --git a/tests/unit.rs b/tests/unit.rs index 9918ea316..9cc7c53fe 100644 --- a/tests/unit.rs +++ b/tests/unit.rs @@ -23,6 +23,49 @@ fn size() { assert_eq!(size_of::(), size_of::>()); } +#[test] +fn test_relative() { + let base: Url = "sc://%C3%B1".parse().unwrap(); + let url = base.join("/resources/testharness.js").unwrap(); + assert_eq!(url.as_str(), "sc://%C3%B1/resources/testharness.js"); +} + +#[test] +fn test_relative_empty() { + let base: Url = "sc://%C3%B1".parse().unwrap(); + let url = base.join("").unwrap(); + assert_eq!(url.as_str(), "sc://%C3%B1"); +} + +#[test] +fn test_set_empty_host() { + let mut base: Url = "moz://foo:bar@servo/baz".parse().unwrap(); + base.set_username("").unwrap(); + assert_eq!(base.as_str(), "moz://:bar@servo/baz"); + base.set_host(None).unwrap(); + assert_eq!(base.as_str(), "moz:/baz"); + base.set_host(Some("servo")).unwrap(); + assert_eq!(base.as_str(), "moz://servo/baz"); +} + +#[test] +fn test_set_empty_hostname() { + use url::quirks; + let mut base: Url = "moz://foo@servo/baz".parse().unwrap(); + assert!( + quirks::set_hostname(&mut base, "").is_err(), + "setting an empty hostname to a url with a username should fail" + ); + base = "moz://:pass@servo/baz".parse().unwrap(); + assert!( + quirks::set_hostname(&mut base, "").is_err(), + "setting an empty hostname to a url with a password should fail" + ); + base = "moz://servo/baz".parse().unwrap(); + quirks::set_hostname(&mut base, "").unwrap(); + assert_eq!(base.as_str(), "moz:///baz"); +} + macro_rules! assert_from_file_path { ($path: expr) => { assert_from_file_path!($path, $path) @@ -413,9 +456,9 @@ fn test_set_host() { assert_eq!(url.as_str(), "foobar:/hello"); let mut url = Url::parse("foo://ș").unwrap(); - assert_eq!(url.as_str(), "foo://%C8%99/"); + assert_eq!(url.as_str(), "foo://%C8%99"); url.set_host(Some("goșu.ro")).unwrap(); - assert_eq!(url.as_str(), "foo://go%C8%99u.ro/"); + assert_eq!(url.as_str(), "foo://go%C8%99u.ro"); } #[test] @@ -550,3 +593,29 @@ fn test_options_reuse() { assert_eq!(url.as_str(), "http://mozilla.org/sub/path"); assert_eq!(*violations.borrow(), vec!(ExpectedDoubleSlash, Backslash)); } + +/// https://github.com/servo/rust-url/issues/505 +#[cfg(windows)] +#[test] +fn test_url_from_file_path() { + use std::path::PathBuf; + use url::Url; + + let p = PathBuf::from("c:///"); + let u = Url::from_file_path(p).unwrap(); + let path = u.to_file_path().unwrap(); + assert_eq!("C:\\", path.to_str().unwrap()); +} + +/// https://github.com/servo/rust-url/issues/505 +#[cfg(not(windows))] +#[test] +fn test_url_from_file_path() { + use std::path::PathBuf; + use url::Url; + + let p = PathBuf::from("/c:/"); + let u = Url::from_file_path(p).unwrap(); + let path = u.to_file_path().unwrap(); + assert_eq!("/c:/", path.to_str().unwrap()); +} diff --git a/tests/urltestdata.json b/tests/urltestdata.json index 5565c938f..bf4e2a783 100644 --- a/tests/urltestdata.json +++ b/tests/urltestdata.json @@ -153,7 +153,7 @@ { "input": "http://f:21/ b ? d # e ", "base": "http://example.org/foo/bar", - "href": "http://f:21/%20b%20?%20d%20# e", + "href": "http://f:21/%20b%20?%20d%20#%20e", "origin": "http://f:21", "protocol": "http:", "username": "", @@ -163,12 +163,12 @@ "port": "21", "pathname": "/%20b%20", "search": "?%20d%20", - "hash": "# e" + "hash": "#%20e" }, { "input": "lolscheme:x x#x x", "base": "about:blank", - "href": "lolscheme:x x#x x", + "href": "lolscheme:x x#x%20x", "protocol": "lolscheme:", "username": "", "password": "", @@ -177,7 +177,7 @@ "port": "", "pathname": "x x", "search": "", - "hash": "#x x" + "hash": "#x%20x" }, { "input": "http://f:/c", @@ -572,7 +572,7 @@ { "input": "foo://", "base": "http://example.org/foo/bar", - "href": "foo:///", + "href": "foo://", "origin": "null", "protocol": "foo:", "username": "", @@ -580,7 +580,7 @@ "host": "", "hostname": "", "port": "", - "pathname": "/", + "pathname": "", "search": "", "hash": "" }, @@ -1433,6 +1433,22 @@ "search": "", "hash": "" }, + "# Based on https://felixfbecker.github.io/whatwg-url-custom-host-repro/", + { + "input": "ssh://example.com/foo/bar.git", + "base": "http://example.org/", + "href": "ssh://example.com/foo/bar.git", + "origin": "null", + "protocol": "ssh:", + "username": "", + "password": "", + "host": "example.com", + "hostname": "example.com", + "port": "", + "pathname": "/foo/bar.git", + "search": "", + "hash": "" + }, "# Based on http://trac.webkit.org/browser/trunk/LayoutTests/fast/url/file.html", { "input": "file:c:\\foo\\bar.html", @@ -2260,7 +2276,7 @@ { "input": "http://www.google.com/foo?bar=baz# »", "base": "about:blank", - "href": "http://www.google.com/foo?bar=baz# %C2%BB", + "href": "http://www.google.com/foo?bar=baz#%20%C2%BB", "origin": "http://www.google.com", "protocol": "http:", "username": "", @@ -2270,12 +2286,12 @@ "port": "", "pathname": "/foo", "search": "?bar=baz", - "hash": "# %C2%BB" + "hash": "#%20%C2%BB" }, { "input": "data:test# »", "base": "about:blank", - "href": "data:test# %C2%BB", + "href": "data:test#%20%C2%BB", "origin": "null", "protocol": "data:", "username": "", @@ -2285,7 +2301,7 @@ "port": "", "pathname": "test", "search": "", - "hash": "# %C2%BB" + "hash": "#%20%C2%BB" }, { "input": "http://www.google.com", @@ -4015,6 +4031,37 @@ "search": "?`{}", "hash": "" }, + "byte is ' and url is special", + { + "input": "http://host/?'", + "base": "about:blank", + "href": "http://host/?%27", + "origin": "http://host", + "protocol": "http:", + "username": "", + "password": "", + "host": "host", + "hostname": "host", + "port": "", + "pathname": "/", + "search": "?%27", + "hash": "" + }, + { + "input": "notspecial://host/?'", + "base": "about:blank", + "href": "notspecial://host/?'", + "origin": "null", + "protocol": "notspecial:", + "username": "", + "password": "", + "host": "host", + "hostname": "host", + "port": "", + "pathname": "/", + "search": "?'", + "hash": "" + }, "# Credentials in base", { "input": "/some/path", @@ -4473,6 +4520,26 @@ "search": "", "hash": "" }, + { + "input": "sc://@/", + "base": "about:blank", + "failure": true + }, + { + "input": "sc://te@s:t@/", + "base": "about:blank", + "failure": true + }, + { + "input": "sc://:/", + "base": "about:blank", + "failure": true + }, + { + "input": "sc://:12/", + "base": "about:blank", + "failure": true + }, { "input": "sc://[/", "base": "about:blank", @@ -4566,6 +4633,22 @@ "search": "", "hash": "" }, + "# unknown scheme with non-URL characters in the path", + { + "input": "wow:\uFFFF", + "base": "about:blank", + "href": "wow:%EF%BF%BF", + "origin": "null", + "protocol": "wow:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "%EF%BF%BF", + "search": "", + "hash": "" + }, "# Hosts and percent-encoding", { "input": "ftp://example.com%80/", @@ -4767,6 +4850,70 @@ "searchParams": "qux=", "hash": "#foo%08bar" }, + { + "input": "http://foo.bar/baz?qux#foo\"bar", + "base": "about:blank", + "href": "http://foo.bar/baz?qux#foo%22bar", + "origin": "http://foo.bar", + "protocol": "http:", + "username": "", + "password": "", + "host": "foo.bar", + "hostname": "foo.bar", + "port": "", + "pathname": "/baz", + "search": "?qux", + "searchParams": "qux=", + "hash": "#foo%22bar" + }, + { + "input": "http://foo.bar/baz?qux#foobar", + "base": "about:blank", + "href": "http://foo.bar/baz?qux#foo%3Ebar", + "origin": "http://foo.bar", + "protocol": "http:", + "username": "", + "password": "", + "host": "foo.bar", + "hostname": "foo.bar", + "port": "", + "pathname": "/baz", + "search": "?qux", + "searchParams": "qux=", + "hash": "#foo%3Ebar" + }, + { + "input": "http://foo.bar/baz?qux#foo`bar", + "base": "about:blank", + "href": "http://foo.bar/baz?qux#foo%60bar", + "origin": "http://foo.bar", + "protocol": "http:", + "username": "", + "password": "", + "host": "foo.bar", + "hostname": "foo.bar", + "port": "", + "pathname": "/baz", + "search": "?qux", + "searchParams": "qux=", + "hash": "#foo%60bar" + }, "# IPv4 parsing (via https://github.com/nodejs/node/pull/10317)", { "input": "http://192.168.257", @@ -4954,6 +5101,11 @@ "hash": "" }, "More IPv4 parsing (via https://github.com/jsdom/whatwg-url/issues/92)", + { + "input": "https://0x100000000/test", + "base": "about:blank", + "failure": true + }, { "input": "https://256.0.0.1/test", "base": "about:blank", @@ -5187,6 +5339,90 @@ "hash": "#x" }, "# File URLs and many (back)slashes", + { + "input": "file:\\\\//", + "base": "about:blank", + "href": "file:///", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "file:\\\\\\\\", + "base": "about:blank", + "href": "file:///", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "file:\\\\\\\\?fox", + "base": "about:blank", + "href": "file:///?fox", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/", + "search": "?fox", + "hash": "" + }, + { + "input": "file:\\\\\\\\#guppy", + "base": "about:blank", + "href": "file:///#guppy", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/", + "search": "", + "hash": "#guppy" + }, + { + "input": "file://spider///", + "base": "about:blank", + "href": "file://spider/", + "protocol": "file:", + "username": "", + "password": "", + "host": "spider", + "hostname": "spider", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "file:\\\\localhost//", + "base": "about:blank", + "href": "file:///", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, { "input": "file:///localhost//cat", "base": "about:blank", @@ -5201,6 +5437,48 @@ "search": "", "hash": "" }, + { + "input": "file://\\/localhost//cat", + "base": "about:blank", + "href": "file:///localhost//cat", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/localhost//cat", + "search": "", + "hash": "" + }, + { + "input": "file://localhost//a//../..//", + "base": "about:blank", + "href": "file:///", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/", + "search": "", + "hash": "" + }, + { + "input": "/////mouse", + "base": "file:///elephant", + "href": "file:///mouse", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/mouse", + "search": "", + "hash": "" + }, { "input": "\\//pig", "base": "file://lion/", @@ -5215,6 +5493,48 @@ "search": "", "hash": "" }, + { + "input": "\\/localhost//pig", + "base": "file://lion/", + "href": "file:///pig", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/pig", + "search": "", + "hash": "" + }, + { + "input": "//localhost//pig", + "base": "file://lion/", + "href": "file:///pig", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/pig", + "search": "", + "hash": "" + }, + { + "input": "/..//localhost//pig", + "base": "file://lion/", + "href": "file://lion/localhost//pig", + "protocol": "file:", + "username": "", + "password": "", + "host": "lion", + "hostname": "lion", + "port": "", + "pathname": "/localhost//pig", + "search": "", + "hash": "" + }, { "input": "file://", "base": "file://ape/", @@ -5229,7 +5549,50 @@ "search": "", "hash": "" }, + "# File URLs with non-empty hosts", + { + "input": "/rooibos", + "base": "file://tea/", + "href": "file://tea/rooibos", + "protocol": "file:", + "username": "", + "password": "", + "host": "tea", + "hostname": "tea", + "port": "", + "pathname": "/rooibos", + "search": "", + "hash": "" + }, + { + "input": "/?chai", + "base": "file://tea/", + "href": "file://tea/?chai", + "protocol": "file:", + "username": "", + "password": "", + "host": "tea", + "hostname": "tea", + "port": "", + "pathname": "/", + "search": "?chai", + "hash": "" + }, "# Windows drive letter handling with the 'file:' base URL", + { + "input": "C|", + "base": "file://host/dir/file", + "href": "file:///C:", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/C:", + "search": "", + "hash": "" + }, { "input": "C|#", "base": "file://host/dir/file", @@ -5329,6 +5692,48 @@ "hash": "" }, "# Windows drive letter quirk in the file slash state", + { + "input": "/c:/foo/bar", + "base": "file:///c:/baz/qux", + "href": "file:///c:/foo/bar", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/c:/foo/bar", + "search": "", + "hash": "" + }, + { + "input": "/c|/foo/bar", + "base": "file:///c:/baz/qux", + "href": "file:///c:/foo/bar", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/c:/foo/bar", + "search": "", + "hash": "" + }, + { + "input": "file:\\c:\\foo\\bar", + "base": "file:///c:/baz/qux", + "href": "file:///c:/foo/bar", + "protocol": "file:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "/c:/foo/bar", + "search": "", + "hash": "" + }, { "input": "/c:/foo/bar", "base": "file://host/path", @@ -5343,9 +5748,9 @@ "search": "", "hash": "" }, - "# Windows drive letter quirk (no host)", + "# Windows drive letter quirk with not empty host", { - "input": "file:/C|/", + "input": "file://example.net/C:/", "base": "about:blank", "href": "file:///C:/", "protocol": "file:", @@ -5359,7 +5764,7 @@ "hash": "" }, { - "input": "file://C|/", + "input": "file://1.2.3.4/C:/", "base": "about:blank", "href": "file:///C:/", "protocol": "file:", @@ -5372,9 +5777,8 @@ "search": "", "hash": "" }, - "# Windows drive letter quirk with not empty host", { - "input": "file://example.net/C:/", + "input": "file://[1::8]/C:/", "base": "about:blank", "href": "file:///C:/", "protocol": "file:", @@ -5387,8 +5791,9 @@ "search": "", "hash": "" }, + "# Windows drive letter quirk (no host)", { - "input": "file://1.2.3.4/C:/", + "input": "file:/C|/", "base": "about:blank", "href": "file:///C:/", "protocol": "file:", @@ -5402,7 +5807,7 @@ "hash": "" }, { - "input": "file://[1::8]/C:/", + "input": "file://C|/", "base": "about:blank", "href": "file:///C:/", "protocol": "file:", @@ -5544,6 +5949,109 @@ "failure": true }, "# Non-special-URL path tests", + { + "input": "sc://ñ", + "base": "about:blank", + "href": "sc://%C3%B1", + "origin": "null", + "protocol": "sc:", + "username": "", + "password": "", + "host": "%C3%B1", + "hostname": "%C3%B1", + "port": "", + "pathname": "", + "search": "", + "hash": "" + }, + { + "input": "sc://ñ?x", + "base": "about:blank", + "href": "sc://%C3%B1?x", + "origin": "null", + "protocol": "sc:", + "username": "", + "password": "", + "host": "%C3%B1", + "hostname": "%C3%B1", + "port": "", + "pathname": "", + "search": "?x", + "hash": "" + }, + { + "input": "sc://ñ#x", + "base": "about:blank", + "href": "sc://%C3%B1#x", + "origin": "null", + "protocol": "sc:", + "username": "", + "password": "", + "host": "%C3%B1", + "hostname": "%C3%B1", + "port": "", + "pathname": "", + "search": "", + "hash": "#x" + }, + { + "input": "#x", + "base": "sc://ñ", + "href": "sc://%C3%B1#x", + "origin": "null", + "protocol": "sc:", + "username": "", + "password": "", + "host": "%C3%B1", + "hostname": "%C3%B1", + "port": "", + "pathname": "", + "search": "", + "hash": "#x" + }, + { + "input": "?x", + "base": "sc://ñ", + "href": "sc://%C3%B1?x", + "origin": "null", + "protocol": "sc:", + "username": "", + "password": "", + "host": "%C3%B1", + "hostname": "%C3%B1", + "port": "", + "pathname": "", + "search": "?x", + "hash": "" + }, + { + "input": "sc://?", + "base": "about:blank", + "href": "sc://?", + "protocol": "sc:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "", + "search": "", + "hash": "" + }, + { + "input": "sc://#", + "base": "about:blank", + "href": "sc://#", + "protocol": "sc:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "", + "search": "", + "hash": "" + }, { "input": "///", "base": "sc://x/", @@ -5558,6 +6066,34 @@ "search": "", "hash": "" }, + { + "input": "////", + "base": "sc://x/", + "href": "sc:////", + "protocol": "sc:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "//", + "search": "", + "hash": "" + }, + { + "input": "////x/", + "base": "sc://x/", + "href": "sc:////x/", + "protocol": "sc:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "//x/", + "search": "", + "hash": "" + }, { "input": "tftp://foobar.com/someconfig;mode=netascii", "base": "about:blank", @@ -6048,27 +6584,34 @@ "search": "?a", "hash": "#%GH" }, - "Bad bases", + "URLs that require a non-about:blank base. (Also serve as invalid base tests.)", { - "input": "test-a.html", - "base": "a", + "input": "a", + "base": "about:blank", "failure": true }, { - "input": "test-a-slash.html", - "base": "a/", + "input": "a/", + "base": "about:blank", "failure": true }, { - "input": "test-a-slash-slash.html", - "base": "a//", + "input": "a//", + "base": "about:blank", "failure": true }, + "Bases that don't fail to parse but fail to be bases", { "input": "test-a-colon.html", "base": "a:", "failure": true }, + { + "input": "test-a-colon-b.html", + "base": "a:b", + "failure": true + }, + "Other base URL tests, that must succeed", { "input": "test-a-colon-slash.html", "base": "a:/", @@ -6097,11 +6640,6 @@ "search": "", "hash": "" }, - { - "input": "test-a-colon-b.html", - "base": "a:b", - "failure": true - }, { "input": "test-a-colon-slash-b.html", "base": "a:/b",