@@ -51,6 +51,7 @@ enum Repr<T> {
51
51
struct Custom ( ByteStr ) ;
52
52
53
53
#[ derive( Debug , Clone ) ]
54
+ // Invariant: If lower then buf is valid UTF-8.
54
55
struct MaybeLower < ' a > {
55
56
buf : & ' a [ u8 ] ,
56
57
lower : bool ,
@@ -979,6 +980,8 @@ standard_headers! {
979
980
/// / DIGIT / ALPHA
980
981
/// ; any VCHAR, except delimiters
981
982
/// ```
983
+ // HEADER_CHARS maps every byte that is 128 or larger to 0 so everything that is
984
+ // mapped by HEADER_CHARS, maps to a valid single-byte UTF-8 codepoint.
982
985
const HEADER_CHARS : [ u8 ; 256 ] = [
983
986
// 0 1 2 3 4 5 6 7 8 9
984
987
0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , // x
@@ -1010,6 +1013,8 @@ const HEADER_CHARS: [u8; 256] = [
1010
1013
] ;
1011
1014
1012
1015
/// Valid header name characters for HTTP/2.0 and HTTP/3.0
1016
+ // HEADER_CHARS_H2 maps every byte that is 128 or larger to 0 so everything that is
1017
+ // mapped by HEADER_CHARS_H2, maps to a valid single-byte UTF-8 codepoint.
1013
1018
const HEADER_CHARS_H2 : [ u8 ; 256 ] = [
1014
1019
// 0 1 2 3 4 5 6 7 8 9
1015
1020
0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , // x
@@ -1044,6 +1049,7 @@ const HEADER_CHARS_H2: [u8; 256] = [
1044
1049
/// This version is best under optimized mode, however in a wasm debug compile,
1045
1050
/// the `eq` macro expands to 1 + 1 + 1 + 1... and wasm explodes when this chain gets too long
1046
1051
/// See https://github.com/DenisKolodin/yew/issues/478
1052
+ // Precondition: table maps all bytes that are not valid single-byte UTF-8 to something that is.
1047
1053
fn parse_hdr < ' a > (
1048
1054
data : & ' a [ u8 ] ,
1049
1055
b : & ' a mut [ MaybeUninit < u8 > ; SCRATCH_BUF_SIZE ] ,
@@ -1053,30 +1059,33 @@ fn parse_hdr<'a>(
1053
1059
1054
1060
let len = data. len ( ) ;
1055
1061
1056
- // Precondition: each element of buf must be intitialized
1062
+ // Precondition: each element of buf must be intitialized and must be
1063
+ // a valid single-byte UTF-8 codepoint.
1057
1064
let validate = |buf : & ' a [ MaybeUninit < u8 > ] | {
1058
1065
// Safety: follows from the precondtion
1059
1066
let buf = unsafe { slice_assume_init ( buf) } ;
1060
1067
if buf. iter ( ) . any ( |& b| b == 0 ) {
1061
1068
Err ( InvalidHeaderName :: new ( ) )
1062
1069
} else {
1070
+ // Precondition: satified by the precondition of validate.
1063
1071
Ok ( HdrName :: custom ( buf, true ) )
1064
1072
}
1065
1073
} ;
1066
1074
1067
1075
// Called as either eq!(b == b'a' b'b' b'c') or eq!(b[i] == b'a' b'b' b'c')
1068
- // Precondition: the first n elements of b (or the first n starting at i)
1069
- // must be intitialized, where n is the number of bytes listed after the '=='
1070
- // in the invocation.
1076
+ // Precondition: the first n elements of b (or the first n starting at i) must be
1077
+ // intitialized, where n is the number of bytes listed after the '==' in the
1078
+ // invocation.
1071
1079
macro_rules! eq {
1072
1080
( ( $( $cmp: expr, ) * ) $v: ident[ $n: expr] ==) => {
1073
1081
$( $cmp) && *
1074
1082
} ;
1075
1083
( ( $( $cmp: expr, ) * ) $v: ident[ $n: expr] == $a: tt $( $rest: tt) * ) => {
1076
- // Safety: this arm is matched once for each byte after the '==' in
1077
- // the invocation (starting at 0 or i depending on the form of the call).
1078
- // By the precondtion $v[$n] is intitialized for each such match.
1079
- eq!( ( $( $cmp, ) * unsafe { * ( $v[ $n] . as_ptr( ) ) } == $a , ) $v[ $n+1 ] == $( $rest) * )
1084
+ // Safety: this arm is matched once for each byte after the '==' in the
1085
+ // invocation (starting at 0 or i depending on the form of the call). By
1086
+ // the precondtion $v[$n] is intitialized for each such match.
1087
+ eq!( ( $( $cmp, ) * unsafe { * ( $v[ $n] . as_ptr( ) ) } == $a , ) $v[ $n+1 ] ==
1088
+ $( $rest) * )
1080
1089
} ;
1081
1090
( $v: ident == $( $rest: tt) +) => {
1082
1091
eq!( ( ) $v[ 0 ] == $( $rest) +)
@@ -1086,11 +1095,12 @@ fn parse_hdr<'a>(
1086
1095
} ;
1087
1096
}
1088
1097
1089
-
1090
- // Post-condition: the first n elements of $d are intitialized where n is the
1091
- // third paramter to the macro. Note that this macro overwrite the first n elements
1092
- // of $d without dropping the existing contents (if any) but the elements of $d
1093
- // are u8's so no drop is necessary.
1098
+ // Post-condition: the first n elements of $d are intitialized to a valid
1099
+ // single-byte UTF-8 codepoint where n is the third paramter to the macro. Note
1100
+ // that this macro overwrite the first n elements of $d without dropping the
1101
+ // existing contents (if any) but the elements of $d are u8's so no drop is
1102
+ // necessary. The UTF-8 part of the post-condition follows from the precondition
1103
+ // on table that is a part of parse_hdr().
1094
1104
macro_rules! to_lower {
1095
1105
( $d: ident, $src: ident, 1 ) => { $d[ 0 ] = MaybeUninit :: new( table[ $src[ 0 ] as usize ] ) ; } ;
1096
1106
( $d: ident, $src: ident, 2 ) => { to_lower!( $d, $src, 1 ) ; $d[ 1 ] = MaybeUninit :: new( table[ $src[ 1 ] as usize ] ) ; } ;
@@ -1155,8 +1165,9 @@ fn parse_hdr<'a>(
1155
1165
Ok ( Te . into ( ) )
1156
1166
} else {
1157
1167
// Precondition: the post-condition on to_lower!() ensures that the
1158
- // first 2 elements of b are intitialized. len == 2 so all of
1159
- // b[..len] is intitialized.
1168
+ // first 2 elements of b are intitialized and are valid single-byte
1169
+ // UTF-8. len == 2 so all of b[..len] is intitialized and is valid
1170
+ // UTF-8.
1160
1171
validate ( & b[ ..len] )
1161
1172
}
1162
1173
}
@@ -1573,11 +1584,13 @@ fn parse_hdr<'a>(
1573
1584
_ => {
1574
1585
if len < 64 {
1575
1586
for i in 0 ..len {
1587
+ // The precondition on table for parse_hdr() means that b[i] is
1588
+ // intitialized to a valid single-byte UTF-8 codepoint.
1576
1589
b[ i] = MaybeUninit :: new ( table[ data[ i] as usize ] ) ;
1577
1590
}
1578
1591
1579
1592
// Precondition: the first len bytes of b are intitialized in the loop above so
1580
- // b[..len] is intitialized.
1593
+ // b[..len] is intitialized and is valid UTF-8 .
1581
1594
validate ( & b[ ..len] )
1582
1595
} else {
1583
1596
Ok ( HdrName :: custom ( data, false ) )
@@ -1588,6 +1601,7 @@ fn parse_hdr<'a>(
1588
1601
1589
1602
#[ cfg( all( debug_assertions, target_arch = "wasm32" ) ) ]
1590
1603
/// This version works best in debug mode in wasm
1604
+ // Precondition: table maps all bytes that are not valid single-byte UTF-8 to something that is.
1591
1605
fn parse_hdr < ' a > (
1592
1606
data : & ' a [ u8 ] ,
1593
1607
b : & ' a mut [ MaybeUninit < u8 > ; SCRATCH_BUF_SIZE ] ,
@@ -1597,11 +1611,13 @@ fn parse_hdr<'a>(
1597
1611
1598
1612
let len = data. len ( ) ;
1599
1613
1614
+ // Precondition: the first len bytes of buf are valid UTF-8.
1600
1615
let validate = |buf : & ' a [ u8 ] , len : usize | {
1601
1616
let buf = & buf[ ..len] ;
1602
1617
if buf. iter ( ) . any ( |& b| b == 0 ) {
1603
1618
Err ( InvalidHeaderName :: new ( ) )
1604
1619
} else {
1620
+ // Precondition: follows from the precondtion on validate.
1605
1621
Ok ( HdrName :: custom ( buf, true ) )
1606
1622
}
1607
1623
} ;
@@ -1617,9 +1633,13 @@ fn parse_hdr<'a>(
1617
1633
len if len > 64 => Ok ( HdrName :: custom ( data, false ) ) ,
1618
1634
len => {
1619
1635
// Read from data into the buffer - transforming using `table` as we go.
1620
- // The assignment to *out ensures that each byte is intitialized. Since
1621
- // *out is a u8 it doesn't matter that we are not dropping *out before accessing it.
1622
- data. iter ( ) . zip ( b. iter_mut ( ) ) . for_each ( |( index, out) | * out = MaybeUninit :: new ( table[ * index as usize ] ) ) ;
1636
+ // The assignment to *out ensures that each byte is intitialized. Since
1637
+ // *out is a u8 it doesn't matter that we are not dropping *out before
1638
+ // accessing it. The precondition on table for parse_hdr() means that
1639
+ // each intitialized byte of b is valid UTF-8.
1640
+ data. iter ( ) . zip ( b. iter_mut ( ) ) . for_each ( |( index, out) | * out =
1641
+ MaybeUninit :: new ( table[ * index as
1642
+ usize ] ) ) ;
1623
1643
// Safety: We just intitialized the first len bytes of b in the previous line.
1624
1644
let b = unsafe { slice_assume_init ( & b[ ..len] ) } ;
1625
1645
match & b[ 0 ..len] {
@@ -1704,6 +1724,8 @@ fn parse_hdr<'a>(
1704
1724
b"content-security-policy-report-only" => {
1705
1725
Ok ( ContentSecurityPolicyReportOnly . into ( ) )
1706
1726
}
1727
+ // Precondition: other is the first len bytes of b which was
1728
+ // initialized to valid UTF-8 above.
1707
1729
other => validate ( other, len) ,
1708
1730
}
1709
1731
}
@@ -1724,10 +1746,12 @@ impl HeaderName {
1724
1746
/// This function normalizes the input.
1725
1747
pub fn from_bytes ( src : & [ u8 ] ) -> Result < HeaderName , InvalidHeaderName > {
1726
1748
let mut buf = uninit_u8_array ( ) ;
1749
+ // Precondition: HEADER_CHARS is a valid table for parse_hdr().
1727
1750
match parse_hdr ( src, & mut buf, & HEADER_CHARS ) ?. inner {
1728
1751
Repr :: Standard ( std) => Ok ( std. into ( ) ) ,
1729
1752
Repr :: Custom ( MaybeLower { buf, lower : true } ) => {
1730
1753
let buf = Bytes :: copy_from_slice ( buf) ;
1754
+ // Safety: the invariant on MaybeLower ensures buf is valid UTF-8.
1731
1755
let val = unsafe { ByteStr :: from_utf8_unchecked ( buf) } ;
1732
1756
Ok ( Custom ( val) . into ( ) )
1733
1757
}
@@ -1736,6 +1760,7 @@ impl HeaderName {
1736
1760
let mut dst = BytesMut :: with_capacity ( buf. len ( ) ) ;
1737
1761
1738
1762
for b in buf. iter ( ) {
1763
+ // HEADER_CHARS maps all bytes to valid single-byte UTF-8
1739
1764
let b = HEADER_CHARS [ * b as usize ] ;
1740
1765
1741
1766
if b == 0 {
@@ -1745,6 +1770,9 @@ impl HeaderName {
1745
1770
dst. put_u8 ( b) ;
1746
1771
}
1747
1772
1773
+ // Safety: the loop above maps all bytes in buf to valid single byte
1774
+ // UTF-8 before copying them into dst. This means that dst (and hence
1775
+ // dst.freeze()) is valid UTF-8.
1748
1776
let val = unsafe { ByteStr :: from_utf8_unchecked ( dst. freeze ( ) ) } ;
1749
1777
1750
1778
Ok ( Custom ( val) . into ( ) )
@@ -1772,21 +1800,27 @@ impl HeaderName {
1772
1800
/// ```
1773
1801
pub fn from_lowercase ( src : & [ u8 ] ) -> Result < HeaderName , InvalidHeaderName > {
1774
1802
let mut buf = uninit_u8_array ( ) ;
1803
+ // Precondition: HEADER_CHARS_H2 is a valid table for parse_hdr()
1775
1804
match parse_hdr ( src, & mut buf, & HEADER_CHARS_H2 ) ?. inner {
1776
1805
Repr :: Standard ( std) => Ok ( std. into ( ) ) ,
1777
1806
Repr :: Custom ( MaybeLower { buf, lower : true } ) => {
1778
1807
let buf = Bytes :: copy_from_slice ( buf) ;
1808
+ // Safety: the invariant on MaybeLower ensures buf is valid UTF-8.
1779
1809
let val = unsafe { ByteStr :: from_utf8_unchecked ( buf) } ;
1780
1810
Ok ( Custom ( val) . into ( ) )
1781
1811
}
1782
1812
Repr :: Custom ( MaybeLower { buf, lower : false } ) => {
1783
1813
for & b in buf. iter ( ) {
1814
+ // HEADER_CHARS maps all bytes that are not valid single-byte
1815
+ // UTF-8 to 0 so this check returns an error for invalid UTF-8.
1784
1816
if b != HEADER_CHARS [ b as usize ] {
1785
1817
return Err ( InvalidHeaderName :: new ( ) ) ;
1786
1818
}
1787
1819
}
1788
1820
1789
1821
let buf = Bytes :: copy_from_slice ( buf) ;
1822
+ // Safety: the loop above checks that each byte of buf (either
1823
+ // version) is valid UTF-8.
1790
1824
let val = unsafe { ByteStr :: from_utf8_unchecked ( buf) } ;
1791
1825
Ok ( Custom ( val) . into ( ) )
1792
1826
}
@@ -1831,6 +1865,7 @@ impl HeaderName {
1831
1865
pub fn from_static ( src : & ' static str ) -> HeaderName {
1832
1866
let bytes = src. as_bytes ( ) ;
1833
1867
let mut buf = uninit_u8_array ( ) ;
1868
+ // Precondition: HEADER_CHARS_H2 is a valid table for parse_hdr()
1834
1869
match parse_hdr ( bytes, & mut buf, & HEADER_CHARS_H2 ) {
1835
1870
Ok ( hdr_name) => match hdr_name. inner {
1836
1871
Repr :: Standard ( std) => std. into ( ) ,
@@ -2073,8 +2108,10 @@ impl Error for InvalidHeaderName {}
2073
2108
// ===== HdrName =====
2074
2109
2075
2110
impl < ' a > HdrName < ' a > {
2111
+ // Precondition: if lower then buf is valid UTF-8
2076
2112
fn custom ( buf : & ' a [ u8 ] , lower : bool ) -> HdrName < ' a > {
2077
2113
HdrName {
2114
+ // Invariant (on MaybeLower): follows from the precondition
2078
2115
inner : Repr :: Custom ( MaybeLower {
2079
2116
buf : buf,
2080
2117
lower : lower,
@@ -2086,6 +2123,7 @@ impl<'a> HdrName<'a> {
2086
2123
where F : FnOnce ( HdrName < ' _ > ) -> U ,
2087
2124
{
2088
2125
let mut buf = uninit_u8_array ( ) ;
2126
+ // Precondition: HEADER_CHARS is a valid table for parse_hdr().
2089
2127
let hdr = parse_hdr ( hdr, & mut buf, & HEADER_CHARS ) ?;
2090
2128
Ok ( f ( hdr) )
2091
2129
}
@@ -2096,6 +2134,7 @@ impl<'a> HdrName<'a> {
2096
2134
{
2097
2135
let mut buf = uninit_u8_array ( ) ;
2098
2136
let hdr =
2137
+ // Precondition: HEADER_CHARS is a valid table for parse_hdr().
2099
2138
parse_hdr ( hdr. as_bytes ( ) , & mut buf, & HEADER_CHARS ) . expect ( "static str is invalid name" ) ;
2100
2139
f ( hdr)
2101
2140
}
@@ -2111,6 +2150,7 @@ impl<'a> From<HdrName<'a>> for HeaderName {
2111
2150
Repr :: Custom ( maybe_lower) => {
2112
2151
if maybe_lower. lower {
2113
2152
let buf = Bytes :: copy_from_slice ( & maybe_lower. buf [ ..] ) ;
2153
+ // Safety: the invariant on MaybeLower ensures buf is valid UTF-8.
2114
2154
let byte_str = unsafe { ByteStr :: from_utf8_unchecked ( buf) } ;
2115
2155
2116
2156
HeaderName {
@@ -2121,9 +2161,14 @@ impl<'a> From<HdrName<'a>> for HeaderName {
2121
2161
let mut dst = BytesMut :: with_capacity ( maybe_lower. buf . len ( ) ) ;
2122
2162
2123
2163
for b in maybe_lower. buf . iter ( ) {
2164
+ // HEADER_CHARS maps each byte to a valid single-byte UTF-8
2165
+ // codepoint.
2124
2166
dst. put_u8 ( HEADER_CHARS [ * b as usize ] ) ;
2125
2167
}
2126
2168
2169
+ // Safety: the loop above maps each byte of maybe_lower.buf to a
2170
+ // valid single-byte UTF-8 codepoint before copying it into dst.
2171
+ // dst (and hence dst.freeze()) is thus valid UTF-8.
2127
2172
let buf = unsafe { ByteStr :: from_utf8_unchecked ( dst. freeze ( ) ) } ;
2128
2173
2129
2174
HeaderName {
0 commit comments