From 1c77cf8fd77b13e969ce223e64cc908edef80e2b Mon Sep 17 00:00:00 2001 From: Matthias Einwag Date: Sat, 8 May 2021 17:01:06 -0700 Subject: [PATCH] Optimize VarInt decoding Upfront: This isn't the most elegant change. But given VarInts sit in the hot path it seems worth it. The current VarInt decoding performs some duplicate bounds check. `VarInt::decode` checks for available bytes in order to return an error. And `bytes::Buf` performs additional bounds checks in various helper methods. By using `Buf::chunk` and `Buf::advance` directly we can save some of those. We could improve even more by only support decoding from contiguous buffers, but that would change the API contract. In addition it seemed like the `u16` be->le conversion wasn't as free as it should be. A 2byte LE buffer improves things here. Overall this reduces CPU time spent in VarInt decoding from about 2% to around 1%. --- quinn-proto/src/varint.rs | 43 ++++++++++++++++++++++++++++++++++----- 1 file changed, 38 insertions(+), 5 deletions(-) diff --git a/quinn-proto/src/varint.rs b/quinn-proto/src/varint.rs index 7f57bc3cb..6b124ef36 100644 --- a/quinn-proto/src/varint.rs +++ b/quinn-proto/src/varint.rs @@ -150,7 +150,11 @@ impl Codec for VarInt { return Err(UnexpectedEnd); } let mut buf = [0; 8]; - buf[0] = r.get_u8(); + // This does not make use of `r.get_u8()` to avoid the additional bounds + // check. We already performed this one earlier on. + buf[0] = r.chunk()[0]; + r.advance(1); + let tag = buf[0] >> 6; buf[0] &= 0b0011_1111; let x = match tag { @@ -159,21 +163,35 @@ impl Codec for VarInt { if r.remaining() < 1 { return Err(UnexpectedEnd); } - r.copy_to_slice(&mut buf[1..2]); - u64::from(u16::from_be_bytes(buf[..2].try_into().unwrap())) + + // A little-endian buffer is used here since that's our most + // likely target platform and it performs slightly better than + // the be conversion. For bigger numbers we don't bother, because + // it makes the byte copy more complicated. + let mut b = [0, buf[0]]; + b[0] = r.chunk()[0]; + r.advance(1); + + u64::from(u16::from_le_bytes(b)) } 0b10 => { if r.remaining() < 3 { return Err(UnexpectedEnd); } - r.copy_to_slice(&mut buf[1..4]); + + // We performed the bounds check upfront + unsafe { copy_unchecked(r, &mut buf[1..4]) }; + u64::from(u32::from_be_bytes(buf[..4].try_into().unwrap())) } 0b11 => { if r.remaining() < 7 { return Err(UnexpectedEnd); } - r.copy_to_slice(&mut buf[1..8]); + + // We performed the bounds check upfront + unsafe { copy_unchecked(r, &mut buf[1..8]) }; + u64::from_be_bytes(buf) } _ => unreachable!(), @@ -196,3 +214,18 @@ impl Codec for VarInt { } } } + +unsafe fn copy_unchecked(src: &mut B, dst: &mut [u8]) { + let mut off = 0; + let len = dst.len(); + + while off < dst.len() { + let chunk = src.chunk(); + let to_copy = chunk.len().min(len - off); + + std::ptr::copy_nonoverlapping(chunk.as_ptr(), dst[off..].as_mut_ptr(), to_copy); + + off += to_copy; + src.advance(to_copy); + } +}