Skip to content

Commit ccf8889

Browse files
committed
Add safety comments to header::name module
The comments document the invariant, preconditions, and post-conditions that together ensure that the use of unsafe related to UTF-8 assumptions (in calls to ByteStr::from_utf8_unchecked()) are sound.
1 parent a3a7800 commit ccf8889

File tree

1 file changed

+64
-19
lines changed

1 file changed

+64
-19
lines changed

src/header/name.rs

Lines changed: 64 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,7 @@ enum Repr<T> {
5151
struct Custom(ByteStr);
5252

5353
#[derive(Debug, Clone)]
54+
// Invariant: If lower then buf is valid UTF-8.
5455
struct MaybeLower<'a> {
5556
buf: &'a [u8],
5657
lower: bool,
@@ -979,6 +980,8 @@ standard_headers! {
979980
/// / DIGIT / ALPHA
980981
/// ; any VCHAR, except delimiters
981982
/// ```
983+
// HEADER_CHARS maps every byte that is 128 or larger to 0 so everything that is
984+
// mapped by HEADER_CHARS, maps to a valid single-byte UTF-8 codepoint.
982985
const HEADER_CHARS: [u8; 256] = [
983986
// 0 1 2 3 4 5 6 7 8 9
984987
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // x
@@ -1010,6 +1013,8 @@ const HEADER_CHARS: [u8; 256] = [
10101013
];
10111014

10121015
/// Valid header name characters for HTTP/2.0 and HTTP/3.0
1016+
// HEADER_CHARS_H2 maps every byte that is 128 or larger to 0 so everything that is
1017+
// mapped by HEADER_CHARS_H2, maps to a valid single-byte UTF-8 codepoint.
10131018
const HEADER_CHARS_H2: [u8; 256] = [
10141019
// 0 1 2 3 4 5 6 7 8 9
10151020
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // x
@@ -1044,6 +1049,7 @@ const HEADER_CHARS_H2: [u8; 256] = [
10441049
/// This version is best under optimized mode, however in a wasm debug compile,
10451050
/// the `eq` macro expands to 1 + 1 + 1 + 1... and wasm explodes when this chain gets too long
10461051
/// See https://github.com/DenisKolodin/yew/issues/478
1052+
// Precondition: table maps all bytes that are not valid single-byte UTF-8 to something that is.
10471053
fn parse_hdr<'a>(
10481054
data: &'a [u8],
10491055
b: &'a mut [MaybeUninit<u8>; SCRATCH_BUF_SIZE],
@@ -1053,30 +1059,33 @@ fn parse_hdr<'a>(
10531059

10541060
let len = data.len();
10551061

1056-
// Precondition: each element of buf must be intitialized
1062+
// Precondition: each element of buf must be intitialized and must be
1063+
// a valid single-byte UTF-8 codepoint.
10571064
let validate = |buf: &'a [MaybeUninit<u8>]| {
10581065
// Safety: follows from the precondtion
10591066
let buf = unsafe {slice_assume_init(buf)};
10601067
if buf.iter().any(|&b| b == 0) {
10611068
Err(InvalidHeaderName::new())
10621069
} else {
1070+
// Precondition: satified by the precondition of validate.
10631071
Ok(HdrName::custom(buf, true))
10641072
}
10651073
};
10661074

10671075
// Called as either eq!(b == b'a' b'b' b'c') or eq!(b[i] == b'a' b'b' b'c')
1068-
// Precondition: the first n elements of b (or the first n starting at i)
1069-
// must be intitialized, where n is the number of bytes listed after the '=='
1070-
// in the invocation.
1076+
// Precondition: the first n elements of b (or the first n starting at i) must be
1077+
// intitialized, where n is the number of bytes listed after the '==' in the
1078+
// invocation.
10711079
macro_rules! eq {
10721080
(($($cmp:expr,)*) $v:ident[$n:expr] ==) => {
10731081
$($cmp) && *
10741082
};
10751083
(($($cmp:expr,)*) $v:ident[$n:expr] == $a:tt $($rest:tt)*) => {
1076-
// Safety: this arm is matched once for each byte after the '==' in
1077-
// the invocation (starting at 0 or i depending on the form of the call).
1078-
// By the precondtion $v[$n] is intitialized for each such match.
1079-
eq!(($($cmp,)* unsafe {*($v[$n].as_ptr())} == $a ,) $v[$n+1] == $($rest)*)
1084+
// Safety: this arm is matched once for each byte after the '==' in the
1085+
// invocation (starting at 0 or i depending on the form of the call). By
1086+
// the precondtion $v[$n] is intitialized for each such match.
1087+
eq!(($($cmp,)* unsafe {*($v[$n].as_ptr())} == $a ,) $v[$n+1] ==
1088+
$($rest)*)
10801089
};
10811090
($v:ident == $($rest:tt)+) => {
10821091
eq!(() $v[0] == $($rest)+)
@@ -1086,11 +1095,12 @@ fn parse_hdr<'a>(
10861095
};
10871096
}
10881097

1089-
1090-
// Post-condition: the first n elements of $d are intitialized where n is the
1091-
// third paramter to the macro. Note that this macro overwrite the first n elements
1092-
// of $d without dropping the existing contents (if any) but the elements of $d
1093-
// are u8's so no drop is necessary.
1098+
// Post-condition: the first n elements of $d are intitialized to a valid
1099+
// single-byte UTF-8 codepoint where n is the third paramter to the macro. Note
1100+
// that this macro overwrite the first n elements of $d without dropping the
1101+
// existing contents (if any) but the elements of $d are u8's so no drop is
1102+
// necessary. The UTF-8 part of the post-condition follows from the precondition
1103+
// on table that is a part of parse_hdr().
10941104
macro_rules! to_lower {
10951105
($d:ident, $src:ident, 1) => { $d[0] = MaybeUninit::new(table[$src[0] as usize]); };
10961106
($d:ident, $src:ident, 2) => { to_lower!($d, $src, 1); $d[1] = MaybeUninit::new(table[$src[1] as usize]); };
@@ -1155,8 +1165,9 @@ fn parse_hdr<'a>(
11551165
Ok(Te.into())
11561166
} else {
11571167
// Precondition: the post-condition on to_lower!() ensures that the
1158-
// first 2 elements of b are intitialized. len == 2 so all of
1159-
// b[..len] is intitialized.
1168+
// first 2 elements of b are intitialized and are valid single-byte
1169+
// UTF-8. len == 2 so all of b[..len] is intitialized and is valid
1170+
// UTF-8.
11601171
validate(&b[..len])
11611172
}
11621173
}
@@ -1573,11 +1584,13 @@ fn parse_hdr<'a>(
15731584
_ => {
15741585
if len < 64 {
15751586
for i in 0..len {
1587+
// The precondition on table for parse_hdr() means that b[i] is
1588+
// intitialized to a valid single-byte UTF-8 codepoint.
15761589
b[i] = MaybeUninit::new(table[data[i] as usize]);
15771590
}
15781591

15791592
// Precondition: the first len bytes of b are intitialized in the loop above so
1580-
// b[..len] is intitialized.
1593+
// b[..len] is intitialized and is valid UTF-8.
15811594
validate(&b[..len])
15821595
} else {
15831596
Ok(HdrName::custom(data, false))
@@ -1588,6 +1601,7 @@ fn parse_hdr<'a>(
15881601

15891602
#[cfg(all(debug_assertions, target_arch = "wasm32"))]
15901603
/// This version works best in debug mode in wasm
1604+
// Precondition: table maps all bytes that are not valid single-byte UTF-8 to something that is.
15911605
fn parse_hdr<'a>(
15921606
data: &'a [u8],
15931607
b: &'a mut [MaybeUninit<u8>; SCRATCH_BUF_SIZE],
@@ -1597,11 +1611,13 @@ fn parse_hdr<'a>(
15971611

15981612
let len = data.len();
15991613

1614+
// Precondition: the first len bytes of buf are valid UTF-8.
16001615
let validate = |buf: &'a [u8], len: usize| {
16011616
let buf = &buf[..len];
16021617
if buf.iter().any(|&b| b == 0) {
16031618
Err(InvalidHeaderName::new())
16041619
} else {
1620+
// Precondition: follows from the precondtion on validate.
16051621
Ok(HdrName::custom(buf, true))
16061622
}
16071623
};
@@ -1617,9 +1633,13 @@ fn parse_hdr<'a>(
16171633
len if len > 64 => Ok(HdrName::custom(data, false)),
16181634
len => {
16191635
// Read from data into the buffer - transforming using `table` as we go.
1620-
// The assignment to *out ensures that each byte is intitialized. Since
1621-
// *out is a u8 it doesn't matter that we are not dropping *out before accessing it.
1622-
data.iter().zip(b.iter_mut()).for_each(|(index, out)| *out = MaybeUninit::new(table[*index as usize]));
1636+
// The assignment to *out ensures that each byte is intitialized. Since
1637+
// *out is a u8 it doesn't matter that we are not dropping *out before
1638+
// accessing it. The precondition on table for parse_hdr() means that
1639+
// each intitialized byte of b is valid UTF-8.
1640+
data.iter().zip(b.iter_mut()).for_each(|(index, out)| *out =
1641+
MaybeUninit::new(table[*index as
1642+
usize]));
16231643
// Safety: We just intitialized the first len bytes of b in the previous line.
16241644
let b = unsafe {slice_assume_init(&b[..len])};
16251645
match &b[0..len] {
@@ -1704,6 +1724,8 @@ fn parse_hdr<'a>(
17041724
b"content-security-policy-report-only" => {
17051725
Ok(ContentSecurityPolicyReportOnly.into())
17061726
}
1727+
// Precondition: other is the first len bytes of b which was
1728+
// initialized to valid UTF-8 above.
17071729
other => validate(other, len),
17081730
}
17091731
}
@@ -1724,10 +1746,12 @@ impl HeaderName {
17241746
/// This function normalizes the input.
17251747
pub fn from_bytes(src: &[u8]) -> Result<HeaderName, InvalidHeaderName> {
17261748
let mut buf = uninit_u8_array();
1749+
// Precondition: HEADER_CHARS is a valid table for parse_hdr().
17271750
match parse_hdr(src, &mut buf, &HEADER_CHARS)?.inner {
17281751
Repr::Standard(std) => Ok(std.into()),
17291752
Repr::Custom(MaybeLower { buf, lower: true }) => {
17301753
let buf = Bytes::copy_from_slice(buf);
1754+
// Safety: the invariant on MaybeLower ensures buf is valid UTF-8.
17311755
let val = unsafe { ByteStr::from_utf8_unchecked(buf) };
17321756
Ok(Custom(val).into())
17331757
}
@@ -1736,6 +1760,7 @@ impl HeaderName {
17361760
let mut dst = BytesMut::with_capacity(buf.len());
17371761

17381762
for b in buf.iter() {
1763+
// HEADER_CHARS maps all bytes to valid single-byte UTF-8
17391764
let b = HEADER_CHARS[*b as usize];
17401765

17411766
if b == 0 {
@@ -1745,6 +1770,9 @@ impl HeaderName {
17451770
dst.put_u8(b);
17461771
}
17471772

1773+
// Safety: the loop above maps all bytes in buf to valid single byte
1774+
// UTF-8 before copying them into dst. This means that dst (and hence
1775+
// dst.freeze()) is valid UTF-8.
17481776
let val = unsafe { ByteStr::from_utf8_unchecked(dst.freeze()) };
17491777

17501778
Ok(Custom(val).into())
@@ -1772,21 +1800,27 @@ impl HeaderName {
17721800
/// ```
17731801
pub fn from_lowercase(src: &[u8]) -> Result<HeaderName, InvalidHeaderName> {
17741802
let mut buf = uninit_u8_array();
1803+
// Precondition: HEADER_CHARS_H2 is a valid table for parse_hdr()
17751804
match parse_hdr(src, &mut buf, &HEADER_CHARS_H2)?.inner {
17761805
Repr::Standard(std) => Ok(std.into()),
17771806
Repr::Custom(MaybeLower { buf, lower: true }) => {
17781807
let buf = Bytes::copy_from_slice(buf);
1808+
// Safety: the invariant on MaybeLower ensures buf is valid UTF-8.
17791809
let val = unsafe { ByteStr::from_utf8_unchecked(buf) };
17801810
Ok(Custom(val).into())
17811811
}
17821812
Repr::Custom(MaybeLower { buf, lower: false }) => {
17831813
for &b in buf.iter() {
1814+
// HEADER_CHARS maps all bytes that are not valid single-byte
1815+
// UTF-8 to 0 so this check returns an error for invalid UTF-8.
17841816
if b != HEADER_CHARS[b as usize] {
17851817
return Err(InvalidHeaderName::new());
17861818
}
17871819
}
17881820

17891821
let buf = Bytes::copy_from_slice(buf);
1822+
// Safety: the loop above checks that each byte of buf (either
1823+
// version) is valid UTF-8.
17901824
let val = unsafe { ByteStr::from_utf8_unchecked(buf) };
17911825
Ok(Custom(val).into())
17921826
}
@@ -1831,6 +1865,7 @@ impl HeaderName {
18311865
pub fn from_static(src: &'static str) -> HeaderName {
18321866
let bytes = src.as_bytes();
18331867
let mut buf = uninit_u8_array();
1868+
// Precondition: HEADER_CHARS_H2 is a valid table for parse_hdr()
18341869
match parse_hdr(bytes, &mut buf, &HEADER_CHARS_H2) {
18351870
Ok(hdr_name) => match hdr_name.inner {
18361871
Repr::Standard(std) => std.into(),
@@ -2073,8 +2108,10 @@ impl Error for InvalidHeaderName {}
20732108
// ===== HdrName =====
20742109

20752110
impl<'a> HdrName<'a> {
2111+
// Precondition: if lower then buf is valid UTF-8
20762112
fn custom(buf: &'a [u8], lower: bool) -> HdrName<'a> {
20772113
HdrName {
2114+
// Invariant (on MaybeLower): follows from the precondition
20782115
inner: Repr::Custom(MaybeLower {
20792116
buf: buf,
20802117
lower: lower,
@@ -2086,6 +2123,7 @@ impl<'a> HdrName<'a> {
20862123
where F: FnOnce(HdrName<'_>) -> U,
20872124
{
20882125
let mut buf = uninit_u8_array();
2126+
// Precondition: HEADER_CHARS is a valid table for parse_hdr().
20892127
let hdr = parse_hdr(hdr, &mut buf, &HEADER_CHARS)?;
20902128
Ok(f(hdr))
20912129
}
@@ -2096,6 +2134,7 @@ impl<'a> HdrName<'a> {
20962134
{
20972135
let mut buf = uninit_u8_array();
20982136
let hdr =
2137+
// Precondition: HEADER_CHARS is a valid table for parse_hdr().
20992138
parse_hdr(hdr.as_bytes(), &mut buf, &HEADER_CHARS).expect("static str is invalid name");
21002139
f(hdr)
21012140
}
@@ -2111,6 +2150,7 @@ impl<'a> From<HdrName<'a>> for HeaderName {
21112150
Repr::Custom(maybe_lower) => {
21122151
if maybe_lower.lower {
21132152
let buf = Bytes::copy_from_slice(&maybe_lower.buf[..]);
2153+
// Safety: the invariant on MaybeLower ensures buf is valid UTF-8.
21142154
let byte_str = unsafe { ByteStr::from_utf8_unchecked(buf) };
21152155

21162156
HeaderName {
@@ -2121,9 +2161,14 @@ impl<'a> From<HdrName<'a>> for HeaderName {
21212161
let mut dst = BytesMut::with_capacity(maybe_lower.buf.len());
21222162

21232163
for b in maybe_lower.buf.iter() {
2164+
// HEADER_CHARS maps each byte to a valid single-byte UTF-8
2165+
// codepoint.
21242166
dst.put_u8(HEADER_CHARS[*b as usize]);
21252167
}
21262168

2169+
// Safety: the loop above maps each byte of maybe_lower.buf to a
2170+
// valid single-byte UTF-8 codepoint before copying it into dst.
2171+
// dst (and hence dst.freeze()) is thus valid UTF-8.
21272172
let buf = unsafe { ByteStr::from_utf8_unchecked(dst.freeze()) };
21282173

21292174
HeaderName {

0 commit comments

Comments
 (0)