Skip to content

Convert header::name to use MaybeUninit #555

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jun 6, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ seahash = "3.0.5"
serde = "1.0"
serde_json = "1.0"
doc-comment = "0.3"
criterion = "0.3.2"

[[bench]]
name = "header_map"
Expand All @@ -45,6 +46,11 @@ path = "benches/header_map/mod.rs"
name = "header_name"
path = "benches/header_name.rs"

[[bench]]
name = "header_name2"
path = "benches/header_name2.rs"
harness = false

[[bench]]
name = "header_value"
path = "benches/header_value.rs"
Expand Down
52 changes: 52 additions & 0 deletions benches/header_name2.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
use criterion::{criterion_group, criterion_main, BenchmarkId,Criterion, Throughput};
use http::header::HeaderName;

// This is a list of some of the standard headers ordered by increasing size.
// It has exactly one standard header per size (some sizes don't have a standard
// header).
const STANDARD_HEADERS_BY_SIZE: &[&str] = &[
"te",
"age",
"date",
"allow",
"accept",
"alt-svc",
"if-match",
"forwarded",
"connection",
"retry-after",
"content-type",
"accept-ranges",
"accept-charset",
"accept-encoding",
"content-encoding",
"if-modified-since",
"proxy-authenticate",
"content-disposition",
"sec-websocket-accept",
"sec-websocket-version",
"access-control-max-age",
"content-security-policy",
"sec-websocket-extensions",
"strict-transport-security",
"access-control-allow-origin",
"access-control-allow-headers",
"access-control-expose-headers",
"access-control-request-headers",
"access-control-allow-credentials",
"content-security-policy-report-only",
];

fn header_name_by_size(c: &mut Criterion) {
let mut group = c.benchmark_group("std_hdr");
for name in STANDARD_HEADERS_BY_SIZE {
group.throughput(Throughput::Bytes(name.len() as u64));
group.bench_with_input(BenchmarkId::from_parameter(name), name, |b, name| {
b.iter(|| HeaderName::from_static(name) );
});
}
group.finish();
}

criterion_group!(benches, header_name_by_size);
criterion_main!(benches);
80 changes: 62 additions & 18 deletions src/header/name.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,9 @@ use std::borrow::Borrow;
use std::error::Error;
use std::convert::{TryFrom};
use std::hash::{Hash, Hasher};
use std::mem::MaybeUninit;
use std::str::FromStr;
use std::{fmt, mem};
use std::fmt;

/// Represents an HTTP header field name
///
Expand Down Expand Up @@ -50,6 +51,7 @@ enum Repr<T> {
struct Custom(ByteStr);

#[derive(Debug, Clone)]
// Invariant: If lower then buf is valid UTF-8.
struct MaybeLower<'a> {
buf: &'a [u8],
lower: bool,
Expand Down Expand Up @@ -986,6 +988,8 @@ standard_headers! {
/// / DIGIT / ALPHA
/// ; any VCHAR, except delimiters
/// ```
// HEADER_CHARS maps every byte that is 128 or larger to 0 so everything that is
// mapped by HEADER_CHARS, maps to a valid single-byte UTF-8 codepoint.
const HEADER_CHARS: [u8; 256] = [
// 0 1 2 3 4 5 6 7 8 9
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // x
Expand Down Expand Up @@ -1017,6 +1021,8 @@ const HEADER_CHARS: [u8; 256] = [
];

/// Valid header name characters for HTTP/2.0 and HTTP/3.0
// HEADER_CHARS_H2 maps every byte that is 128 or larger to 0 so everything that is
// mapped by HEADER_CHARS_H2, maps to a valid single-byte UTF-8 codepoint.
const HEADER_CHARS_H2: [u8; 256] = [
// 0 1 2 3 4 5 6 7 8 9
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // x
Expand Down Expand Up @@ -1049,15 +1055,18 @@ const HEADER_CHARS_H2: [u8; 256] = [

fn parse_hdr<'a>(
data: &'a [u8],
b: &'a mut [u8; 64],
b: &'a mut [MaybeUninit<u8>; SCRATCH_BUF_SIZE],
table: &[u8; 256],
) -> Result<HdrName<'a>, InvalidHeaderName> {
match data.len() {
0 => Err(InvalidHeaderName::new()),
len @ 1..=64 => {
len @ 1..=SCRATCH_BUF_SIZE => {
// Read from data into the buffer - transforming using `table` as we go
data.iter().zip(b.iter_mut()).for_each(|(index, out)| *out = table[*index as usize]);
let name = &b[0..len];
data.iter()
.zip(b.iter_mut())
.for_each(|(index, out)| *out = MaybeUninit::new(table[*index as usize]));
// Safety: len bytes of b were just initialized.
let name: &'a [u8] = unsafe { slice_assume_init(&b[0..len]) };
match StandardHeader::from_bytes(name) {
Some(sh) => Ok(sh.into()),
None => {
Expand All @@ -1069,7 +1078,7 @@ fn parse_hdr<'a>(
}
}
}
65..=super::MAX_HEADER_NAME_LEN => Ok(HdrName::custom(data, false)),
SCRATCH_BUF_OVERFLOW..=super::MAX_HEADER_NAME_LEN => Ok(HdrName::custom(data, false)),
_ => Err(InvalidHeaderName::new()),
}
}
Expand All @@ -1086,14 +1095,14 @@ impl HeaderName {
/// Converts a slice of bytes to an HTTP header name.
///
/// This function normalizes the input.
#[allow(deprecated)]
pub fn from_bytes(src: &[u8]) -> Result<HeaderName, InvalidHeaderName> {
#[allow(deprecated)]
let mut buf = unsafe { mem::uninitialized() };
let mut buf = uninit_u8_array();
// Precondition: HEADER_CHARS is a valid table for parse_hdr().
match parse_hdr(src, &mut buf, &HEADER_CHARS)?.inner {
Repr::Standard(std) => Ok(std.into()),
Repr::Custom(MaybeLower { buf, lower: true }) => {
let buf = Bytes::copy_from_slice(buf);
// Safety: the invariant on MaybeLower ensures buf is valid UTF-8.
let val = unsafe { ByteStr::from_utf8_unchecked(buf) };
Ok(Custom(val).into())
}
Expand All @@ -1102,6 +1111,7 @@ impl HeaderName {
let mut dst = BytesMut::with_capacity(buf.len());

for b in buf.iter() {
// HEADER_CHARS maps all bytes to valid single-byte UTF-8
let b = HEADER_CHARS[*b as usize];

if b == 0 {
Expand All @@ -1111,6 +1121,9 @@ impl HeaderName {
dst.put_u8(b);
}

// Safety: the loop above maps all bytes in buf to valid single byte
// UTF-8 before copying them into dst. This means that dst (and hence
// dst.freeze()) is valid UTF-8.
let val = unsafe { ByteStr::from_utf8_unchecked(dst.freeze()) };

Ok(Custom(val).into())
Expand All @@ -1136,25 +1149,29 @@ impl HeaderName {
/// // Parsing a header that contains uppercase characters
/// assert!(HeaderName::from_lowercase(b"Content-Length").is_err());
/// ```
#[allow(deprecated)]
pub fn from_lowercase(src: &[u8]) -> Result<HeaderName, InvalidHeaderName> {
#[allow(deprecated)]
let mut buf = unsafe { mem::uninitialized() };
let mut buf = uninit_u8_array();
// Precondition: HEADER_CHARS_H2 is a valid table for parse_hdr()
match parse_hdr(src, &mut buf, &HEADER_CHARS_H2)?.inner {
Repr::Standard(std) => Ok(std.into()),
Repr::Custom(MaybeLower { buf, lower: true }) => {
let buf = Bytes::copy_from_slice(buf);
// Safety: the invariant on MaybeLower ensures buf is valid UTF-8.
let val = unsafe { ByteStr::from_utf8_unchecked(buf) };
Ok(Custom(val).into())
}
Repr::Custom(MaybeLower { buf, lower: false }) => {
for &b in buf.iter() {
// HEADER_CHARS maps all bytes that are not valid single-byte
// UTF-8 to 0 so this check returns an error for invalid UTF-8.
if b != HEADER_CHARS[b as usize] {
return Err(InvalidHeaderName::new());
}
}

let buf = Bytes::copy_from_slice(buf);
// Safety: the loop above checks that each byte of buf (either
// version) is valid UTF-8.
let val = unsafe { ByteStr::from_utf8_unchecked(buf) };
Ok(Custom(val).into())
}
Expand Down Expand Up @@ -1481,33 +1498,33 @@ impl Error for InvalidHeaderName {}
// ===== HdrName =====

impl<'a> HdrName<'a> {
// Precondition: if lower then buf is valid UTF-8
fn custom(buf: &'a [u8], lower: bool) -> HdrName<'a> {
HdrName {
// Invariant (on MaybeLower): follows from the precondition
inner: Repr::Custom(MaybeLower {
buf: buf,
lower: lower,
}),
}
}

#[allow(deprecated)]
pub fn from_bytes<F, U>(hdr: &[u8], f: F) -> Result<U, InvalidHeaderName>
where F: FnOnce(HdrName<'_>) -> U,
{
#[allow(deprecated)]
let mut buf = unsafe { mem::uninitialized() };
let mut buf = uninit_u8_array();
// Precondition: HEADER_CHARS is a valid table for parse_hdr().
let hdr = parse_hdr(hdr, &mut buf, &HEADER_CHARS)?;
Ok(f(hdr))
}

#[allow(deprecated)]
pub fn from_static<F, U>(hdr: &'static str, f: F) -> U
where
F: FnOnce(HdrName<'_>) -> U,
{
#[allow(deprecated)]
let mut buf = unsafe { mem::uninitialized() };
let mut buf = uninit_u8_array();
let hdr =
// Precondition: HEADER_CHARS is a valid table for parse_hdr().
parse_hdr(hdr.as_bytes(), &mut buf, &HEADER_CHARS).expect("static str is invalid name");
f(hdr)
}
Expand All @@ -1523,6 +1540,7 @@ impl<'a> From<HdrName<'a>> for HeaderName {
Repr::Custom(maybe_lower) => {
if maybe_lower.lower {
let buf = Bytes::copy_from_slice(&maybe_lower.buf[..]);
// Safety: the invariant on MaybeLower ensures buf is valid UTF-8.
let byte_str = unsafe { ByteStr::from_utf8_unchecked(buf) };

HeaderName {
Expand All @@ -1533,9 +1551,14 @@ impl<'a> From<HdrName<'a>> for HeaderName {
let mut dst = BytesMut::with_capacity(maybe_lower.buf.len());

for b in maybe_lower.buf.iter() {
// HEADER_CHARS maps each byte to a valid single-byte UTF-8
// codepoint.
dst.put_u8(HEADER_CHARS[*b as usize]);
}

// Safety: the loop above maps each byte of maybe_lower.buf to a
// valid single-byte UTF-8 codepoint before copying it into dst.
// dst (and hence dst.freeze()) is thus valid UTF-8.
let buf = unsafe { ByteStr::from_utf8_unchecked(dst.freeze()) };

HeaderName {
Expand Down Expand Up @@ -1606,6 +1629,26 @@ fn eq_ignore_ascii_case(lower: &[u8], s: &[u8]) -> bool {
})
}

// Utility functions for MaybeUninit<>. These are drawn from unstable API's on
// MaybeUninit<> itself.
const SCRATCH_BUF_SIZE: usize = 64;
const SCRATCH_BUF_OVERFLOW: usize = SCRATCH_BUF_SIZE + 1;

fn uninit_u8_array() -> [MaybeUninit<u8>; SCRATCH_BUF_SIZE] {
let arr = MaybeUninit::<[MaybeUninit<u8>; SCRATCH_BUF_SIZE]>::uninit();
// Safety: assume_init() is claiming that an array of MaybeUninit<>
// has been initilized, but MaybeUninit<>'s do not require initilizaton.
unsafe { arr.assume_init() }
}

// Assuming all the elements are initilized, get a slice of them.
//
// Safety: All elements of `slice` must be initilized to prevent
// undefined behavior.
unsafe fn slice_assume_init<T>(slice: &[MaybeUninit<T>]) -> &[T] {
&*(slice as *const [MaybeUninit<T>] as *const [T])
}

#[cfg(test)]
mod tests {
use super::*;
Expand Down Expand Up @@ -1652,6 +1695,7 @@ mod tests {
#[test]
#[should_panic]
fn test_static_invalid_name_lengths() {
// Safety: ONE_TOO_LONG contains only the UTF-8 safe, single-byte codepoint b'a'.
let _ = HeaderName::from_static(unsafe { std::str::from_utf8_unchecked(ONE_TOO_LONG) });
}

Expand Down