Skip to content

Commit 59733e1

Browse files
authored
Audit use of unsafe in uri/authority.rs (#414)
* Add unit test for rejecting invalid UTF-8 * Add Authority::from_static() test * Refactor uri::Authority Extract the common code from three ways of creating an Authority into a private create_authority() function. * Add comments to explain the safety of Authority The comments describe the preconditions and postconditions that together ensure that the one use of 'unsafe' in uri/authority.rs is sound. * Fix typo
1 parent 3ef1133 commit 59733e1

File tree

2 files changed

+63
-31
lines changed

2 files changed

+63
-31
lines changed

src/uri/authority.rs

+57-31
Original file line numberDiff line numberDiff line change
@@ -23,15 +23,9 @@ impl Authority {
2323

2424
// Not public while `bytes` is unstable.
2525
pub(super) fn from_shared(s: Bytes) -> Result<Self, InvalidUri> {
26-
let authority_end = Authority::parse_non_empty(&s[..])?;
27-
28-
if authority_end != s.len() {
29-
return Err(ErrorKind::InvalidUriChar.into());
30-
}
31-
32-
Ok(Authority {
33-
data: unsafe { ByteStr::from_utf8_unchecked(s) },
34-
})
26+
// Precondition on create_authority: trivially satisfied by the
27+
// identity clousre
28+
create_authority(s, |s| s)
3529
}
3630

3731
/// Attempt to convert an `Authority` from a static string.
@@ -52,18 +46,8 @@ impl Authority {
5246
/// assert_eq!(authority.host(), "example.com");
5347
/// ```
5448
pub fn from_static(src: &'static str) -> Self {
55-
let s = src.as_bytes();
56-
let b = Bytes::from_static(s);
57-
let authority_end =
58-
Authority::parse_non_empty(&b[..]).expect("static str is not valid authority");
59-
60-
if authority_end != b.len() {
61-
panic!("static str is not valid authority");
62-
}
63-
64-
Authority {
65-
data: unsafe { ByteStr::from_utf8_unchecked(b) },
66-
}
49+
Authority::from_shared(Bytes::from_static(src.as_bytes()))
50+
.expect("static str is not valid authority")
6751
}
6852

6953

@@ -83,6 +67,8 @@ impl Authority {
8367
}
8468

8569
// Note: this may return an *empty* Authority. You might want `parse_non_empty`.
70+
// Postcondition: for all Ok() returns, s[..ret.unwrap()] is valid UTF-8 where
71+
// ret is the return value.
8672
pub(super) fn parse(s: &[u8]) -> Result<usize, InvalidUri> {
8773
let mut colon_cnt = 0;
8874
let mut start_bracket = false;
@@ -91,6 +77,10 @@ impl Authority {
9177
let mut end = s.len();
9278
let mut at_sign_pos = None;
9379

80+
// Among other things, this loop checks that every byte in s up to the
81+
// first '/', '?', or '#' is a valid URI character (or in some contexts,
82+
// a '%'). This means that each such byte is a valid single-byte UTF-8
83+
// code point.
9484
for (i, &b) in s.iter().enumerate() {
9585
match URI_CHARS[b as usize] {
9686
b'/' | b'?' | b'#' => {
@@ -168,6 +158,9 @@ impl Authority {
168158
//
169159
// This should be used by functions that allow a user to parse
170160
// an `Authority` by itself.
161+
//
162+
// Postcondition: for all Ok() returns, s[..ret.unwrap()] is valid UTF-8 where
163+
// ret is the return value.
171164
fn parse_non_empty(s: &[u8]) -> Result<usize, InvalidUri> {
172165
if s.is_empty() {
173166
return Err(ErrorKind::Empty.into());
@@ -432,17 +425,10 @@ impl<'a> TryFrom<&'a [u8]> for Authority {
432425
#[inline]
433426
fn try_from(s: &'a [u8]) -> Result<Self, Self::Error> {
434427
// parse first, and only turn into Bytes if valid
435-
let end = Authority::parse_non_empty(s)?;
436-
437-
if end != s.len() {
438-
return Err(ErrorKind::InvalidAuthority.into());
439-
}
440428

441-
Ok(Authority {
442-
data: unsafe {
443-
ByteStr::from_utf8_unchecked(Bytes::copy_from_slice(s))
444-
},
445-
})
429+
// Preconditon on create_authority: copy_from_slice() copies all of
430+
// bytes from the [u8] parameter into a new Bytes
431+
create_authority(s, |s| Bytes::copy_from_slice(s))
446432
}
447433
}
448434

@@ -494,6 +480,30 @@ fn host(auth: &str) -> &str {
494480
}
495481
}
496482

483+
// Precondition: f converts all of the bytes in the passed in B into the
484+
// returned Bytes.
485+
fn create_authority<B, F>(b: B, f: F) -> Result<Authority, InvalidUri>
486+
where
487+
B: AsRef<[u8]>,
488+
F: FnOnce(B) -> Bytes,
489+
{
490+
let s = b.as_ref();
491+
let authority_end = Authority::parse_non_empty(s)?;
492+
493+
if authority_end != s.len() {
494+
return Err(ErrorKind::InvalidUriChar.into());
495+
}
496+
497+
let bytes = f(b);
498+
499+
Ok(Authority {
500+
// Safety: the postcondition on parse_non_empty() and the check against
501+
// s.len() ensure that b is valid UTF-8. The precondition on f ensures
502+
// that this is carried through to bytes.
503+
data: unsafe { ByteStr::from_utf8_unchecked(bytes) },
504+
})
505+
}
506+
497507
#[cfg(test)]
498508
mod tests {
499509
use super::*;
@@ -529,6 +539,12 @@ mod tests {
529539
assert_eq!("EXAMPLE.com", authority);
530540
}
531541

542+
#[test]
543+
fn from_static_equates_with_a_str() {
544+
let authority = Authority::from_static("example.com");
545+
assert_eq!(authority, "example.com");
546+
}
547+
532548
#[test]
533549
fn not_equal_with_a_str_of_a_different_authority() {
534550
let authority: Authority = "example.com".parse().unwrap();
@@ -616,4 +632,14 @@ mod tests {
616632
let err = Authority::parse_non_empty(b"[fe80::1:2:3:4]%20").unwrap_err();
617633
assert_eq!(err.0, ErrorKind::InvalidAuthority);
618634
}
635+
636+
#[test]
637+
fn rejects_invalid_utf8() {
638+
let err = Authority::try_from([0xc0u8].as_ref()).unwrap_err();
639+
assert_eq!(err.0, ErrorKind::InvalidUriChar);
640+
641+
let err = Authority::from_shared(Bytes::from_static([0xc0u8].as_ref()))
642+
.unwrap_err();
643+
assert_eq!(err.0, ErrorKind::InvalidUriChar);
644+
}
619645
}

src/uri/mod.rs

+6
Original file line numberDiff line numberDiff line change
@@ -143,6 +143,12 @@ enum ErrorKind {
143143
// u16::MAX is reserved for None
144144
const MAX_LEN: usize = (u16::MAX - 1) as usize;
145145

146+
// URI_CHARS is a table of valid characters in a URI. An entry in the table is
147+
// 0 for invalid characters. For valid characters the entry is itself (i.e.
148+
// the entry for 33 is b'!' because b'!' == 33u8). An important characteristic
149+
// of this table is that all entries above 127 are invalid. This makes all of the
150+
// valid entries a valid single-byte UTF-8 code point. This means that a slice
151+
// of such valid entries is valid UTF-8.
146152
const URI_CHARS: [u8; 256] = [
147153
// 0 1 2 3 4 5 6 7 8 9
148154
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // x

0 commit comments

Comments
 (0)