Skip to content

Commit f399df0

Browse files
committed
Merge remote-tracking branch 'remotes/sbosnick/audit_header_name' into constname_merge
Merges #428 into #499
2 parents 50642bd + ccf8889 commit f399df0

File tree

3 files changed

+119
-18
lines changed

3 files changed

+119
-18
lines changed

Cargo.toml

+6
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@ seahash = "3.0.5"
3636
serde = "1.0"
3737
serde_json = "1.0"
3838
doc-comment = "0.3"
39+
criterion = "0.3.2"
3940

4041
[[bench]]
4142
name = "header_map"
@@ -45,6 +46,11 @@ path = "benches/header_map/mod.rs"
4546
name = "header_name"
4647
path = "benches/header_name.rs"
4748

49+
[[bench]]
50+
name = "header_name2"
51+
path = "benches/header_name2.rs"
52+
harness = false
53+
4854
[[bench]]
4955
name = "header_value"
5056
path = "benches/header_value.rs"

benches/header_name2.rs

+52
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
use criterion::{criterion_group, criterion_main, BenchmarkId,Criterion, Throughput};
2+
use http::header::HeaderName;
3+
4+
// This is a list of some of the standard headers ordered by increasing size.
5+
// It has exactly one standard header per size (some sizes don't have a standard
6+
// header).
7+
const STANDARD_HEADERS_BY_SIZE: &[&str] = &[
8+
"te",
9+
"age",
10+
"date",
11+
"allow",
12+
"accept",
13+
"alt-svc",
14+
"if-match",
15+
"forwarded",
16+
"connection",
17+
"retry-after",
18+
"content-type",
19+
"accept-ranges",
20+
"accept-charset",
21+
"accept-encoding",
22+
"content-encoding",
23+
"if-modified-since",
24+
"proxy-authenticate",
25+
"content-disposition",
26+
"sec-websocket-accept",
27+
"sec-websocket-version",
28+
"access-control-max-age",
29+
"content-security-policy",
30+
"sec-websocket-extensions",
31+
"strict-transport-security",
32+
"access-control-allow-origin",
33+
"access-control-allow-headers",
34+
"access-control-expose-headers",
35+
"access-control-request-headers",
36+
"access-control-allow-credentials",
37+
"content-security-policy-report-only",
38+
];
39+
40+
fn header_name_by_size(c: &mut Criterion) {
41+
let mut group = c.benchmark_group("std_hdr");
42+
for name in STANDARD_HEADERS_BY_SIZE {
43+
group.throughput(Throughput::Bytes(name.len() as u64));
44+
group.bench_with_input(BenchmarkId::from_parameter(name), name, |b, name| {
45+
b.iter(|| HeaderName::from_static(name) );
46+
});
47+
}
48+
group.finish();
49+
}
50+
51+
criterion_group!(benches, header_name_by_size);
52+
criterion_main!(benches);

src/header/name.rs

+61-18
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,9 @@ use std::borrow::Borrow;
55
use std::error::Error;
66
use std::convert::{TryFrom};
77
use std::hash::{Hash, Hasher};
8+
use std::mem::MaybeUninit;
89
use std::str::FromStr;
9-
use std::{fmt, mem};
10+
use std::fmt;
1011

1112
/// Represents an HTTP header field name
1213
///
@@ -50,6 +51,7 @@ enum Repr<T> {
5051
struct Custom(ByteStr);
5152

5253
#[derive(Debug, Clone)]
54+
// Invariant: If lower then buf is valid UTF-8.
5355
struct MaybeLower<'a> {
5456
buf: &'a [u8],
5557
lower: bool,
@@ -986,6 +988,8 @@ standard_headers! {
986988
/// / DIGIT / ALPHA
987989
/// ; any VCHAR, except delimiters
988990
/// ```
991+
// HEADER_CHARS maps every byte that is 128 or larger to 0 so everything that is
992+
// mapped by HEADER_CHARS, maps to a valid single-byte UTF-8 codepoint.
989993
const HEADER_CHARS: [u8; 256] = [
990994
// 0 1 2 3 4 5 6 7 8 9
991995
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // x
@@ -1017,6 +1021,8 @@ const HEADER_CHARS: [u8; 256] = [
10171021
];
10181022

10191023
/// Valid header name characters for HTTP/2.0 and HTTP/3.0
1024+
// HEADER_CHARS_H2 maps every byte that is 128 or larger to 0 so everything that is
1025+
// mapped by HEADER_CHARS_H2, maps to a valid single-byte UTF-8 codepoint.
10201026
const HEADER_CHARS_H2: [u8; 256] = [
10211027
// 0 1 2 3 4 5 6 7 8 9
10221028
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // x
@@ -1049,15 +1055,18 @@ const HEADER_CHARS_H2: [u8; 256] = [
10491055

10501056
fn parse_hdr<'a>(
10511057
data: &'a [u8],
1052-
b: &'a mut [u8; 64],
1058+
b: &'a mut [MaybeUninit<u8>; SCRATCH_BUF_SIZE],
10531059
table: &[u8; 256],
10541060
) -> Result<HdrName<'a>, InvalidHeaderName> {
10551061
match data.len() {
10561062
0 => Err(InvalidHeaderName::new()),
1057-
len @ 1..=64 => {
1063+
len @ 1..=SCRATCH_BUF_SIZE => {
10581064
// Read from data into the buffer - transforming using `table` as we go
1059-
data.iter().zip(b.iter_mut()).for_each(|(index, out)| *out = table[*index as usize]);
1060-
let name = &b[0..len];
1065+
data.iter()
1066+
.zip(b.iter_mut())
1067+
.for_each(|(index, out)| *out = MaybeUninit::new(table[*index as usize]));
1068+
// Safety: len bytes of b were just initialized.
1069+
let name: &'a [u8] = unsafe { slice_assume_init(&b[0..len]) };
10611070
match StandardHeader::from_bytes(name) {
10621071
Some(sh) => Ok(sh.into()),
10631072
None => {
@@ -1069,7 +1078,7 @@ fn parse_hdr<'a>(
10691078
}
10701079
}
10711080
}
1072-
65..=super::MAX_HEADER_NAME_LEN => Ok(HdrName::custom(data, false)),
1081+
0..=super::MAX_HEADER_NAME_LEN => Ok(HdrName::custom(data, false)),
10731082
_ => Err(InvalidHeaderName::new()),
10741083
}
10751084
}
@@ -1086,14 +1095,14 @@ impl HeaderName {
10861095
/// Converts a slice of bytes to an HTTP header name.
10871096
///
10881097
/// This function normalizes the input.
1089-
#[allow(deprecated)]
10901098
pub fn from_bytes(src: &[u8]) -> Result<HeaderName, InvalidHeaderName> {
1091-
#[allow(deprecated)]
1092-
let mut buf = unsafe { mem::uninitialized() };
1099+
let mut buf = uninit_u8_array();
1100+
// Precondition: HEADER_CHARS is a valid table for parse_hdr().
10931101
match parse_hdr(src, &mut buf, &HEADER_CHARS)?.inner {
10941102
Repr::Standard(std) => Ok(std.into()),
10951103
Repr::Custom(MaybeLower { buf, lower: true }) => {
10961104
let buf = Bytes::copy_from_slice(buf);
1105+
// Safety: the invariant on MaybeLower ensures buf is valid UTF-8.
10971106
let val = unsafe { ByteStr::from_utf8_unchecked(buf) };
10981107
Ok(Custom(val).into())
10991108
}
@@ -1102,6 +1111,7 @@ impl HeaderName {
11021111
let mut dst = BytesMut::with_capacity(buf.len());
11031112

11041113
for b in buf.iter() {
1114+
// HEADER_CHARS maps all bytes to valid single-byte UTF-8
11051115
let b = HEADER_CHARS[*b as usize];
11061116

11071117
if b == 0 {
@@ -1111,6 +1121,9 @@ impl HeaderName {
11111121
dst.put_u8(b);
11121122
}
11131123

1124+
// Safety: the loop above maps all bytes in buf to valid single byte
1125+
// UTF-8 before copying them into dst. This means that dst (and hence
1126+
// dst.freeze()) is valid UTF-8.
11141127
let val = unsafe { ByteStr::from_utf8_unchecked(dst.freeze()) };
11151128

11161129
Ok(Custom(val).into())
@@ -1136,25 +1149,29 @@ impl HeaderName {
11361149
/// // Parsing a header that contains uppercase characters
11371150
/// assert!(HeaderName::from_lowercase(b"Content-Length").is_err());
11381151
/// ```
1139-
#[allow(deprecated)]
11401152
pub fn from_lowercase(src: &[u8]) -> Result<HeaderName, InvalidHeaderName> {
1141-
#[allow(deprecated)]
1142-
let mut buf = unsafe { mem::uninitialized() };
1153+
let mut buf = uninit_u8_array();
1154+
// Precondition: HEADER_CHARS_H2 is a valid table for parse_hdr()
11431155
match parse_hdr(src, &mut buf, &HEADER_CHARS_H2)?.inner {
11441156
Repr::Standard(std) => Ok(std.into()),
11451157
Repr::Custom(MaybeLower { buf, lower: true }) => {
11461158
let buf = Bytes::copy_from_slice(buf);
1159+
// Safety: the invariant on MaybeLower ensures buf is valid UTF-8.
11471160
let val = unsafe { ByteStr::from_utf8_unchecked(buf) };
11481161
Ok(Custom(val).into())
11491162
}
11501163
Repr::Custom(MaybeLower { buf, lower: false }) => {
11511164
for &b in buf.iter() {
1165+
// HEADER_CHARS maps all bytes that are not valid single-byte
1166+
// UTF-8 to 0 so this check returns an error for invalid UTF-8.
11521167
if b != HEADER_CHARS[b as usize] {
11531168
return Err(InvalidHeaderName::new());
11541169
}
11551170
}
11561171

11571172
let buf = Bytes::copy_from_slice(buf);
1173+
// Safety: the loop above checks that each byte of buf (either
1174+
// version) is valid UTF-8.
11581175
let val = unsafe { ByteStr::from_utf8_unchecked(buf) };
11591176
Ok(Custom(val).into())
11601177
}
@@ -1481,33 +1498,33 @@ impl Error for InvalidHeaderName {}
14811498
// ===== HdrName =====
14821499

14831500
impl<'a> HdrName<'a> {
1501+
// Precondition: if lower then buf is valid UTF-8
14841502
fn custom(buf: &'a [u8], lower: bool) -> HdrName<'a> {
14851503
HdrName {
1504+
// Invariant (on MaybeLower): follows from the precondition
14861505
inner: Repr::Custom(MaybeLower {
14871506
buf: buf,
14881507
lower: lower,
14891508
}),
14901509
}
14911510
}
14921511

1493-
#[allow(deprecated)]
14941512
pub fn from_bytes<F, U>(hdr: &[u8], f: F) -> Result<U, InvalidHeaderName>
14951513
where F: FnOnce(HdrName<'_>) -> U,
14961514
{
1497-
#[allow(deprecated)]
1498-
let mut buf = unsafe { mem::uninitialized() };
1515+
let mut buf = uninit_u8_array();
1516+
// Precondition: HEADER_CHARS is a valid table for parse_hdr().
14991517
let hdr = parse_hdr(hdr, &mut buf, &HEADER_CHARS)?;
15001518
Ok(f(hdr))
15011519
}
15021520

1503-
#[allow(deprecated)]
15041521
pub fn from_static<F, U>(hdr: &'static str, f: F) -> U
15051522
where
15061523
F: FnOnce(HdrName<'_>) -> U,
15071524
{
1508-
#[allow(deprecated)]
1509-
let mut buf = unsafe { mem::uninitialized() };
1525+
let mut buf = uninit_u8_array();
15101526
let hdr =
1527+
// Precondition: HEADER_CHARS is a valid table for parse_hdr().
15111528
parse_hdr(hdr.as_bytes(), &mut buf, &HEADER_CHARS).expect("static str is invalid name");
15121529
f(hdr)
15131530
}
@@ -1523,6 +1540,7 @@ impl<'a> From<HdrName<'a>> for HeaderName {
15231540
Repr::Custom(maybe_lower) => {
15241541
if maybe_lower.lower {
15251542
let buf = Bytes::copy_from_slice(&maybe_lower.buf[..]);
1543+
// Safety: the invariant on MaybeLower ensures buf is valid UTF-8.
15261544
let byte_str = unsafe { ByteStr::from_utf8_unchecked(buf) };
15271545

15281546
HeaderName {
@@ -1533,9 +1551,14 @@ impl<'a> From<HdrName<'a>> for HeaderName {
15331551
let mut dst = BytesMut::with_capacity(maybe_lower.buf.len());
15341552

15351553
for b in maybe_lower.buf.iter() {
1554+
// HEADER_CHARS maps each byte to a valid single-byte UTF-8
1555+
// codepoint.
15361556
dst.put_u8(HEADER_CHARS[*b as usize]);
15371557
}
15381558

1559+
// Safety: the loop above maps each byte of maybe_lower.buf to a
1560+
// valid single-byte UTF-8 codepoint before copying it into dst.
1561+
// dst (and hence dst.freeze()) is thus valid UTF-8.
15391562
let buf = unsafe { ByteStr::from_utf8_unchecked(dst.freeze()) };
15401563

15411564
HeaderName {
@@ -1606,6 +1629,25 @@ fn eq_ignore_ascii_case(lower: &[u8], s: &[u8]) -> bool {
16061629
})
16071630
}
16081631

1632+
// Utility functions for MaybeUninit<>. These are drawn from unstable API's on
1633+
// MaybeUninit<> itself.
1634+
const SCRATCH_BUF_SIZE: usize = 64;
1635+
1636+
fn uninit_u8_array() -> [MaybeUninit<u8>; SCRATCH_BUF_SIZE] {
1637+
let arr = MaybeUninit::<[MaybeUninit<u8>; SCRATCH_BUF_SIZE]>::uninit();
1638+
// Safety: assume_init() is claiming that an array of MaybeUninit<>
1639+
// has been initilized, but MaybeUninit<>'s do not require initilizaton.
1640+
unsafe { arr.assume_init() }
1641+
}
1642+
1643+
// Assuming all the elements are initilized, get a slice of them.
1644+
//
1645+
// Safety: All elements of `slice` must be initilized to prevent
1646+
// undefined behavior.
1647+
unsafe fn slice_assume_init<T>(slice: &[MaybeUninit<T>]) -> &[T] {
1648+
&*(slice as *const [MaybeUninit<T>] as *const [T])
1649+
}
1650+
16091651
#[cfg(test)]
16101652
mod tests {
16111653
use super::*;
@@ -1652,6 +1694,7 @@ mod tests {
16521694
#[test]
16531695
#[should_panic]
16541696
fn test_static_invalid_name_lengths() {
1697+
// Safety: ONE_TOO_LONG contains only the UTF-8 safe, single-byte codepoint b'a'.
16551698
let _ = HeaderName::from_static(unsafe { std::str::from_utf8_unchecked(ONE_TOO_LONG) });
16561699
}
16571700

0 commit comments

Comments
 (0)