diff --git a/src/uri/mod.rs b/src/uri/mod.rs index 716ea1ad..38df0287 100644 --- a/src/uri/mod.rs +++ b/src/uri/mod.rs @@ -785,12 +785,14 @@ impl From for Parts { } } +// parse_full() parses a Uri that includes more than just a path. It +// expects that at least one of the scheme or authority will be present +// as well. fn parse_full(mut s: Bytes) -> Result { // Parse the scheme let scheme = match Scheme2::parse(&s[..])? { Scheme2::None => Scheme2::None, Scheme2::Standard(p) => { - // TODO: use truncate let _ = s.split_to(p.len() + 3); Scheme2::Standard(p) } @@ -798,10 +800,12 @@ fn parse_full(mut s: Bytes) -> Result { // Grab the protocol let mut scheme = s.split_to(n + 3); - // Strip ://, TODO: truncate - let _ = scheme.split_off(n); + // Strip :// + scheme.truncate(n); // Allocate the ByteStr + // Safety: the postcondition on Scheme2::parse() means that + // s[0..n+3] is valid UTF-8. scheme is a subslice of s[0..n+3]. let val = unsafe { ByteStr::from_utf8_unchecked(scheme) }; Scheme2::Other(Box::new(val)) @@ -813,28 +817,23 @@ fn parse_full(mut s: Bytes) -> Result { let authority_end = Authority::parse(&s[..])?; if scheme.is_none() { + // Path is not allowed if there is no scheme. if authority_end != s.len() { return Err(ErrorKind::InvalidFormat.into()); } - - let authority = Authority { - data: unsafe { ByteStr::from_utf8_unchecked(s) }, - }; - - return Ok(Uri { - scheme: scheme.into(), - authority: authority, - path_and_query: PathAndQuery::empty(), - }); - } - - // Authority is required when absolute - if authority_end == 0 { - return Err(ErrorKind::InvalidFormat.into()); + } else { + // Authority is required when absolute + if authority_end == 0 { + return Err(ErrorKind::InvalidFormat.into()); + } } let authority = s.split_to(authority_end); let authority = Authority { + // Safety: The postcondition on Authority::parse() means that + // s[0..authority_end] is valid UTF-8 after that call. The call + // to s.split_to() means that authority here is what s[0..authority_end] + // was after the call to Authority::parse(). data: unsafe { ByteStr::from_utf8_unchecked(authority) }, }; diff --git a/src/uri/scheme.rs b/src/uri/scheme.rs index 5bbab11e..be881c33 100644 --- a/src/uri/scheme.rs +++ b/src/uri/scheme.rs @@ -253,6 +253,7 @@ impl Scheme2 { } } + // Postcondition: On Ok(Scheme2::Other(n)) return, s[0..n+3] is valid UTF-8 pub(super) fn parse(s: &[u8]) -> Result, InvalidUri> { if s.len() >= 7 { // Check for HTTP @@ -270,6 +271,9 @@ impl Scheme2 { } if s.len() > 3 { + // The only Ok(Scheme2::Option(n)) return from this function is an + // early exit from this loop. This loop checks each byte in s against + // SCHEME_CHARS until until one of the early exit conditions. for i in 0..s.len() { let b = s[i]; @@ -290,10 +294,15 @@ impl Scheme2 { } // Return scheme + // Postcondition: Every byte in s[0..i] has matched the + // _ arm so is valid UTF-8. s[i..i+3] matches "://" which + // is also valid UTF-8. Thus s[0..i+3] is valid UTF-8. return Ok(Scheme2::Other(i)); } // Invald scheme character, abort 0 => break, + // Valid scheme character: imples that b is a valid, single + // byte UTF-8 codepoint. _ => {} } } diff --git a/src/uri/tests.rs b/src/uri/tests.rs index 719cb94e..35dd1dc7 100644 --- a/src/uri/tests.rs +++ b/src/uri/tests.rs @@ -1,4 +1,5 @@ use std::str::FromStr; +use std::convert::TryFrom; use super::{ErrorKind, InvalidUri, Port, Uri, URI_CHARS}; @@ -517,3 +518,51 @@ fn test_partial_eq_path_with_terminating_questionmark() { assert_eq!(uri, a); } + +#[test] +fn test_uri_from_u8_slice() { + let uri = Uri::try_from(b"http://example.com".as_ref()).expect("conversion"); + + assert_eq!(uri, "http://example.com"); +} + +#[test] +fn test_uri_from_u8_slice_error() { + fn err(s: &[u8]) { + Uri::try_from(s).unwrap_err(); + } + + err(b"http://"); + err(b"htt:p//host"); + err(b"hyper.rs/"); + err(b"hyper.rs?key=val"); + err(b"?key=val"); + err(b"localhost/"); + err(b"localhost?key=val"); + err(b"\0"); + err(b"http://[::1"); + err(b"http://::1]"); + err(b"localhost:8080:3030"); + err(b"@"); + err(b"http://username:password@/wut"); + + // illegal queries + err(b"/?foo\rbar"); + err(b"/?foo\nbar"); + err(b"/?<"); + err(b"/?>"); + + // invalid UTF-8 + err(&[0xc0]); + err(&[b'h', b't', b't', b'p', b':', b'/', b'/', 0xc0]); + err(b"http://example.com/\0xc0"); + err(b"http://example.com/path?\0xc0"); +} + +#[test] +fn test_uri_with_invalid_fragment_is_valid() { + let uri_bytes = b"http://example.com/path?query#\0xc0"; + let uri = Uri::try_from(uri_bytes.as_ref()).expect("conversion error"); + + assert_eq!(uri, "http://example.com/path?query"); +}