diff --git a/dc/s2n-quic-dc/Cargo.toml b/dc/s2n-quic-dc/Cargo.toml index 8b4bad8e83..209f9e1171 100644 --- a/dc/s2n-quic-dc/Cargo.toml +++ b/dc/s2n-quic-dc/Cargo.toml @@ -44,6 +44,7 @@ tracing-subscriber = { version = "0.3", features = ["env-filter"], optional = tr zerocopy = { version = "0.7", features = ["derive"] } zeroize = "1" parking_lot = "0.12" +slab = "0.4" [dev-dependencies] bolero = "0.12" diff --git a/dc/s2n-quic-dc/src/path/secret/receiver.rs b/dc/s2n-quic-dc/src/path/secret/receiver.rs index 41e413dac2..521b2d8844 100644 --- a/dc/s2n-quic-dc/src/path/secret/receiver.rs +++ b/dc/s2n-quic-dc/src/path/secret/receiver.rs @@ -1,371 +1,683 @@ // Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 -use crate::credentials::{Credentials, Id, KeyId}; -use s2n_quic_core::packet::number::{ - PacketNumber, PacketNumberSpace, SlidingWindow, SlidingWindowError, -}; -use std::{ - cell::UnsafeCell, - ptr::NonNull, - sync::{ - atomic::{AtomicU64, Ordering}, - Arc, Mutex, - }, -}; - -const SHARED_ENTRIES: usize = 1 << 20; -// Maximum page size on current machines (macOS aarch64 has 16kb pages) -// -// mmap is documented as failing if we don't request a page boundary. Currently our sizes work out -// such that rounding is useless, but this is good future proofing. -const MAX_PAGE: usize = 16_384; -const SHARED_ALLOCATION: usize = { - let element = std::mem::size_of::(); - let size = element * SHARED_ENTRIES; - // TODO use `next_multiple_of` once MSRV is >=1.73 - (size + MAX_PAGE - 1) / MAX_PAGE * MAX_PAGE -}; +use self::allocator::Allocator; +use crate::credentials::{Credentials, KeyId}; +use std::alloc::Layout; +use std::mem::MaybeUninit; +use std::ptr::NonNull; +use std::sync::{Arc, Mutex}; + +mod allocator; #[derive(Debug)] pub struct Shared { - secret: u64, - backing: NonNull, + alloc: Allocator, + entries: Mutex>, } unsafe impl Send for Shared {} unsafe impl Sync for Shared {} -impl Drop for Shared { - fn drop(&mut self) { - unsafe { - if libc::munmap(self.backing.as_ptr().cast(), SHARED_ALLOCATION) != 0 { - // Avoid panicking in a destructor, just let the memory leak while logging. We - // expect this to be essentially a global singleton in most production cases so - // likely we're exiting the process anyway. - eprintln!( - "Failed to unmap memory: {:?}", - std::io::Error::last_os_error() - ); - } +impl Shared { + pub fn without_region() -> Arc { + Arc::new(Shared { + alloc: Allocator::with_capacity(0), + entries: Mutex::new(slab::Slab::new()), + }) + } + + pub fn new() -> Arc { + Arc::new(Shared { + // ~20MB + alloc: Allocator::with_capacity(20 * 1024 * 1024), + entries: Mutex::new(slab::Slab::new()), + }) + } + + pub fn new_receiver(self: Arc) -> State { + let mut guard = self.entries.lock().unwrap(); + let key = guard.insert(InnerState::new()); + State { + shared: self.clone(), + entry: key, } } -} -const fn assert_copy() {} + fn remove(&self, entry: usize) -> InnerState { + let mut guard = self.entries.lock().unwrap_or_else(|e| e.into_inner()); + guard.remove(entry) + } +} -struct SharedSlot { - id: UnsafeCell, - key_id: AtomicU64, +#[derive(Debug)] +pub struct State { + // FIXME: Avoid storing Shared pointer inside every path secret entry. + // Instead thread the pointer through all the methods. + shared: Arc, + // FIXME: shrink to u32 index? + entry: usize, } -impl SharedSlot { - fn try_lock(&self) -> Option> { - let current = self.key_id.load(Ordering::Relaxed); - if current & LOCK != 0 { - // If we are already locked, then give up. - // A concurrent thread updated this slot, any write we do would squash that thread's - // write. Doing so if that thread remove()d may make sense in the future but not right - // now. - return None; +impl Drop for State { + fn drop(&mut self) { + let entry = self.shared.remove(self.entry); + if let SharedIndex::Bitset(handle) | SharedIndex::Array(handle) = entry.shared.unpack() { + // SAFETY: Entry is being dropped, so this is called at most once. + unsafe { self.shared.alloc.deallocate(handle) }; } - let Ok(_) = self.key_id.compare_exchange( - current, - current | LOCK, - Ordering::Acquire, - Ordering::Relaxed, - ) else { - return None; - }; - - Some(SharedSlotGuard { - slot: self, - key_id: current, - }) } } -struct SharedSlotGuard<'a> { - slot: &'a SharedSlot, - key_id: u64, +// KeyIDs move through two filters: +// +// * `max_seen` + bitset absorbs traffic with minimal reordering. Conceptually they are a single +// 33-bit bitset ending at (inclusively) `max_seen`. 1-bits indicate seen entries. This is +// currently expected to be enough to absorb the vast majority (>99.99%) of traffic seen in +// practice. This space is always available to every Path Secret. +// * If we don't see a key ID (i.e., we shift out a zero bit from the bitset) we insert into a list +// or bitset within the Shared state. This list tracks *only* unseen entries, so we expect it to +// generally be short. Currently the list can track entries within a region 2**16 wide. Note that +// this region is independent of `max_seen` and so only needs to potentially be changed if we +// evict a zero bit (which happens pretty rarely), and even then only if we still haven't caught +// a packet that's 2**16 old. See more details on `SortedListHeader` and `BitsetHeader`. +#[derive(Clone, Debug)] +pub struct InnerState { + max_seen: u64, + + // Directly stored bitset, adjacent to max_seen. + bitset: u32, + + // Any key ID > to this is either AlreadyExists or Ok. + // Note that == is Unknown, since += 1 is *not* a safe operation. + // + // This is updated when we evict from the list/bitset (i.e., drop a still-Ok value). + // FIXME: actually not updated today, because we need to thread this into deallocation for + // proper updates. + minimum_evicted: u64, + + // Index into the shared allocator's parents/entry array, if any. + shared: SharedIndexMemory, + + // FIXME: Move into shared allocation. + list: Vec, } -impl SharedSlotGuard<'_> { - fn write_id(&mut self, id: Id) { - // Store the new ID. - // SAFETY: We hold the lock since we are in the guard. - unsafe { - // Note: no destructor is run for the previously stored element, but Id is Copy. - // If we did want to run a destructor we'd have to ensure that we replaced a PRESENT - // entry. - assert_copy::(); - std::ptr::write(self.slot.id.get(), id); - } +// "u24" indices keep the in-memory size down. +#[derive(Copy, Clone)] +enum SharedIndexMemory { + None, + Array([u8; 3]), + Bitset([u8; 3]), +} + +impl std::fmt::Debug for SharedIndexMemory { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + self.unpack().fmt(f) } +} - fn id(&self) -> Id { - // SAFETY: We hold the lock, so copying out the Id is safe. - unsafe { *self.slot.id.get() } +#[derive(Debug, Copy, Clone)] +enum SharedIndex { + None, + Array(usize), + Bitset(usize), +} + +impl SharedIndexMemory { + fn unpack(self) -> SharedIndex { + match self { + SharedIndexMemory::None => SharedIndex::None, + SharedIndexMemory::Array([a, b, c]) => { + SharedIndex::Array(u32::from_le_bytes([a, b, c, 0]) as usize) + } + SharedIndexMemory::Bitset([a, b, c]) => { + SharedIndex::Bitset(u32::from_le_bytes([a, b, c, 0]) as usize) + } + } } } -impl Drop for SharedSlotGuard<'_> { - fn drop(&mut self) { - self.slot.key_id.store(self.key_id, Ordering::Release); +impl SharedIndex { + fn pack(self) -> SharedIndexMemory { + match self { + SharedIndex::None => SharedIndexMemory::None, + SharedIndex::Array(i) => { + assert!(i < (1 << 24)); + let [a, b, c, d] = (i as u32).to_le_bytes(); + assert!(d == 0); + SharedIndexMemory::Array([a, b, c]) + } + SharedIndex::Bitset(i) => { + assert!(i < (1 << 24)); + let [a, b, c, d] = (i as u32).to_le_bytes(); + assert!(d == 0); + SharedIndexMemory::Bitset([a, b, c]) + } + } } } -const LOCK: u64 = 1 << 62; -const PRESENT: u64 = 1 << 63; +impl InnerState { + fn new() -> Self { + Self { + max_seen: u64::MAX, + minimum_evicted: u64::MAX, + bitset: 0, + shared: SharedIndexMemory::None, -impl Shared { - pub fn new() -> Arc { - let mut secret = [0; 8]; - aws_lc_rs::rand::fill(&mut secret).expect("random is available"); - let shared = Shared { - secret: u64::from_ne_bytes(secret), - backing: unsafe { - // Note: We rely on the zero-initialization provided by the kernel. That ensures - // that an entry in the map is not LOCK'd to begin with and is not PRESENT as well. - let ptr = libc::mmap( - std::ptr::null_mut(), - SHARED_ALLOCATION, - libc::PROT_READ | libc::PROT_WRITE, - libc::MAP_ANONYMOUS | libc::MAP_PRIVATE, - 0, - 0, - ); - // -1 - if ptr as usize == usize::MAX { - panic!( - "Failed to allocate backing allocation for shared: {:?}", - std::io::Error::last_os_error() - ); - } - NonNull::new(ptr).unwrap().cast() - }, + list: vec![], + } + } + + // Iterate over the unseen IDs that were > previous max seen, and + // will not *become* tracked now (i.e., don't fall into the new bitset). + // + // The bitset tracks (max_seen-32)..=(max_seen-1) + fn skipped_bitset(&self, previous_max: Option) -> std::ops::Range { + let end = self.max_seen.saturating_sub(u32::BITS as u64); + // Push start up so we don't push more than 65k elements, which is our list limit. + // This avoids a too-long loop if we jump forward too much. + let start = match previous_max { + Some(previous_max) => (previous_max + 1).max(end.saturating_sub(u16::MAX as u64)), + None => end.saturating_sub(u16::MAX as u64), }; + start..end + } +} - // We need to modify the slot to which an all-zero path secert ID and key ID map. Otherwise - // we'd return Err(AlreadyExists) for that entry which isn't correct - it has not been - // inserted or removed, so it should be Err(Unknown). - // - // This is the only slot that needs modification. All other slots are never used for lookup - // of this set of credentials and so containing this set of credentials is fine. - let slot = shared.slot(&Credentials { - id: Id::from([0; 16]), - key_id: KeyId::new(0).unwrap(), - }); - // The max key ID is never used by senders (checked on the sending side), while avoiding - // taking a full bit out of the range of key IDs. We also statically return Unknown for it - // on removal to avoid a non-local invariant. - slot.key_id.store(KeyId::MAX.as_u64(), Ordering::Relaxed); +impl State { + pub fn without_shared() -> State { + let shared = Shared::without_region(); + shared.new_receiver() + } - Arc::new(shared) + #[cfg(test)] + pub fn snapshot(&self) -> InnerState { + self.shared.entries.lock().unwrap()[self.entry].clone() } - pub fn new_receiver(self: Arc) -> State { - State::with_shared(self) + pub fn with_shared(shared: Arc) -> State { + shared.new_receiver() } - fn insert(&self, identity: &Credentials) { - let slot = self.slot(identity); - let Some(mut guard) = slot.try_lock() else { - return; - }; - guard.write_id(identity.id); - guard.key_id = *identity.key_id | PRESENT; + pub fn minimum_unseen_key_id(&self) -> KeyId { + // wrapping_add ensures that our sentinel u64::MAX is zero, which is accurate (i.e., if we + // have not seen any keys, then we have not seen the zeroth key either). + KeyId::new( + self.shared.entries.lock().unwrap()[self.entry] + .max_seen + .wrapping_add(1), + ) + .unwrap() } - fn remove(&self, identity: &Credentials) -> Result<(), Error> { - // See `new` for details. - if identity.key_id == KeyId::MAX.as_u64() { - return Err(Error::Unknown); - } + pub fn pre_authentication(&self, _credentials: &Credentials) -> Result<(), Error> { + // always pass for now + Ok(()) + } - let slot = self.slot(identity); - let previous = slot.key_id.load(Ordering::Relaxed); - if previous & LOCK != 0 { - // If we are already locked, then give up. - // A concurrent thread updated this slot, any write we do would squash that thread's - // write. No concurrent thread could have inserted what we're looking for since - // both insert and remove for a single path secret ID run under a Mutex. - return Err(Error::Unknown); + pub fn post_authentication(&self, credentials: &Credentials) -> Result<(), Error> { + let entry = &mut self.shared.entries.lock().unwrap()[self.entry]; + + if entry.max_seen == u64::MAX { + // no need to touch the bitset, we've not seen any of the previous entries. + entry.max_seen = *credentials.key_id; + + for id in entry.skipped_bitset(None) { + self.push_list(entry, id); + } + + Ok(()) + } else if credentials.key_id > entry.max_seen { + let previous_max = entry.max_seen; + entry.max_seen = *credentials.key_id; + let delta = entry.max_seen - previous_max; + + // This is the range that is going to get shifted out. + // + // Any bit not set means we haven't yet seen it, so we should add it to our list. + // + // If we shifted by 1, then the range we want is 31..=31 (1 bit, 1 << 31, top bit) + // If we shifted by 2, then the range we want is 30..=31 (2 bits) + // If we shifted by 30, then the range we want is 2..=31 (30 bits) + // If we shifted by 60, then the range we want is 0..=31 (all 32 bits) + for bit in (32u64.saturating_sub(delta)..=31).rev() { + // +1 since bit 0 is previous_max - 1 + let Some(id) = previous_max.checked_sub(bit + 1) else { + continue; + }; + if entry.bitset & (1 << bit) == 0 { + self.push_list(entry, id); + } + } + + for id in entry.skipped_bitset(Some(previous_max)) { + self.push_list(entry, id); + } + + if delta <= u32::BITS as u64 { + // as u32 is safe since we checked we're less than 32. + let delta = delta as u32; + + // Shift the no longer fitting bits out + // 0s mean we have *not* seen the entry, so shifting those in for the middle part + entry.bitset = entry.bitset.checked_shl(delta).unwrap_or(0); + // Set the bit corresponding to previously max-seen. + entry.bitset |= 1 << (delta - 1); + } else { + entry.bitset = 0; + } + + // forward shift is always successful + Ok(()) + } else if credentials.key_id == entry.max_seen { + Err(Error::AlreadyExists) + } else { + let delta = entry.max_seen - *credentials.key_id; + if delta <= u32::BITS as u64 { + // -1 for the transition from max seen to the bitset + if (entry.bitset & (1 << (delta - 1) as u32)) != 0 { + Err(Error::AlreadyExists) + } else { + entry.bitset |= 1 << (delta - 1) as u32; + Ok(()) + } + } else if let Ok(()) = self.try_remove_list(entry, *credentials.key_id) { + Ok(()) + } else if *credentials.key_id > entry.minimum_evicted { + Err(Error::AlreadyExists) + } else { + Err(Error::Unknown) + } } - if previous & (!PRESENT) != *identity.key_id { - // If the currently stored entry does not match our desired KeyId, - // then we don't know whether this key has been replayed or not. - return Err(Error::Unknown); + } + + fn deallocate_shared(&self, entry: &mut InnerState) { + if let SharedIndex::Bitset(handle) | SharedIndex::Array(handle) = entry.shared.unpack() { + entry.shared = SharedIndexMemory::None; + // SAFETY: we've cleared the shared field, so won't get called again. + unsafe { self.shared.alloc.deallocate(handle) }; } + } - let Some(mut guard) = slot.try_lock() else { - // Don't try to win the race by spinning, let the other thread proceed. - return Err(Error::Unknown); - }; + fn push_list(&self, entry: &mut InnerState, id: u64) { + for _ in 0..2 { + match entry.shared.unpack() { + SharedIndex::None => { + let guard = self.shared.alloc.allocate(SortedList::layout_for_cap(1)); + entry.shared = SharedIndex::Array(guard.handle()).pack(); + unsafe { + let mut list = SortedList::initialize(guard.as_ptr(), 1); + // Safe to unwrap because it can't need to grow -- we allocated with capacity + // for 1 element and that element will get used up here. + list.insert(id).unwrap(); + } - // Check if the path secret ID matches. - if guard.id() != identity.id { - return Err(Error::Unknown); + // we're done, exit + return; + } + SharedIndex::Array(handle) => { + let Some(existing) = self.shared.alloc.read_allocation(handle) else { + self.deallocate_shared(entry); + // loop around to try again with a new allocation + continue; + }; + + let mut list = unsafe { SortedList::from_existing(existing.as_ptr()) }; + let Err(err) = list.insert(id) else { + // successfully inserted, done. + return; + }; + + // drop the lock before we allocate, cannot hold entry lock across + // allocation or we may deadlock. + drop(existing); + + let (_new_guard, mut list) = match err { + CapacityError::Array(cap) => { + let guard = self.shared.alloc.allocate(SortedList::layout_for_cap(cap)); + entry.shared = SharedIndex::Array(guard.handle()).pack(); + let list = unsafe { + SortedList::initialize(guard.as_ptr(), cap.try_into().unwrap()) + }; + (guard, list) + } + CapacityError::Bitset => { + todo!() + } + }; + + let previous = self.shared.alloc.read_allocation(handle); + if let Some(previous) = previous { + let mut prev_list = unsafe { SortedList::from_existing(previous.as_ptr()) }; + prev_list.copy_to(&mut list); + } + + // Safe to unwrap because it can't need to grow -- we allocated with + // capacity for at least one more element and that element will get used up + // here. We haven't released the lock on this list since allocation so it's + // impossible for some other thread to have used up the space. + // + // FIXME: that assumption is not true if we failed to copy, since we probably + // need to *shrink* then. Maybe we should allocate a temporary buffer to copy + // into? + list.insert(id).unwrap(); + + return; + } + SharedIndex::Bitset(_) => { + todo!() + } + } } - // Ok, at this point we know that the key ID and the path secret ID both match. + // Should be unreachable - we should always exit from the loop in at most two "turns" via + // `return`. + unreachable!() + } - let ret = if guard.key_id & PRESENT != 0 { + fn try_remove_list(&self, entry: &mut InnerState, id: u64) -> Result<(), ()> { + if let Ok(idx) = entry.list.binary_search(&id) { + // FIXME: augment with bitset for fast removal + entry.list.remove(idx); Ok(()) } else { - Err(Error::AlreadyExists) - }; + Err(()) + } + } +} - // Release the lock, removing the PRESENT bit (which may already be missing). - guard.key_id = *identity.key_id; +impl super::map::SizeOf for State {} - ret - } +#[derive(Copy, Clone)] +struct SortedListHeader { + len: u16, + count: u16, + cap: u16, + minimum: u64, +} - fn index(&self, identity: &Credentials) -> usize { - let hash = u64::from_ne_bytes(identity.id[..8].try_into().unwrap()) - ^ *identity.key_id - ^ self.secret; - let index = hash & (SHARED_ENTRIES as u64 - 1); - index as usize +struct SortedList { + p: NonNull, +} + +impl SortedList { + unsafe fn initialize(ptr: NonNull, cap: u16) -> SortedList { + ptr.as_ptr() + .cast::() + .write(SortedListHeader { + len: 0, + count: 0, + cap, + minimum: 0, + }); + SortedList { p: ptr.cast() } } - fn slot(&self, identity: &Credentials) -> &SharedSlot { - let index = self.index(identity); - // SAFETY: in-bounds -- the & above truncates such that we're always in the appropriate - // range that we allocated with mmap above. - // - // Casting to a reference is safe -- the Slot type has an UnsafeCell around all of the data - // (either inside the atomic or directly). - unsafe { self.backing.as_ptr().add(index).as_ref().unwrap_unchecked() } + fn layout_for_cap(cap: usize) -> Layout { + Layout::new::() + .extend(Layout::array::(cap).unwrap()) + .unwrap() + .0 + .extend(Layout::array::(cap.div_ceil(8)).unwrap()) + .unwrap() + .0 + } + fn bitset_offset(cap: usize) -> usize { + Layout::new::() + .extend(Layout::array::(cap).unwrap()) + .unwrap() + .0 + .extend(Layout::array::(cap.div_ceil(8)).unwrap()) + .unwrap() + .1 } -} -#[derive(Debug)] -pub struct State { - // Minimum that we're potentially willing to accept. - // This is lazily updated and so may be out of date. - min_key_id: AtomicU64, + fn slice_offset(cap: usize) -> usize { + Layout::new::() + .extend(Layout::array::(cap).unwrap()) + .unwrap() + .1 + } - // This is the maximum ID we've seen so far. This is sent to peers for when we cannot determine - // if the packet sent is replayed as it falls outside our replay window. Peers use this - // information to resynchronize on the latest state. - max_seen_key_id: AtomicU64, + fn minimum(&self) -> u64 { + // aligned to 8 bytes, so should be aligned. + unsafe { self.p.cast::().as_ref().minimum } + } - seen: Mutex, + fn set_minimum(&self, min: u64) { + unsafe { + self.p.cast::().as_mut().minimum = min; + } + } - shared: Option>, -} + fn len(&self) -> usize { + unsafe { usize::from(self.p.cast::().as_ref().len) } + } -impl super::map::SizeOf for Mutex { - fn size(&self) -> usize { - // If we don't need drop, it's very likely that this type is fully contained in size_of - // Self. This simplifies implementing this trait for e.g. std types. - // - // Mutex on macOS (at least) has a more expensive, pthread-based impl that allocates. But - // on Linux there's no extra allocation. - if cfg!(target_os = "linux") { - assert!( - !std::mem::needs_drop::(), - "{:?} requires custom SizeOf impl", - std::any::type_name::() - ); + fn set_len(&self, len: usize) { + unsafe { + self.p.cast::().as_mut().len = len.try_into().unwrap(); } - std::mem::size_of::() } -} -impl super::map::SizeOf for State { - fn size(&self) -> usize { - let State { - min_key_id, - max_seen_key_id, - seen, - shared, - } = self; - // shared is shared across all State's (effectively) so we don't currently account for that - // allocation. - min_key_id.size() + max_seen_key_id.size() + seen.size() + std::mem::size_of_val(shared) + fn capacity(&self) -> usize { + unsafe { usize::from(self.p.cast::().as_ref().cap) } } -} -#[derive(Clone, Copy, Debug, PartialEq, Eq, thiserror::Error)] -pub enum Error { - /// This indicates that we know about this element and it *definitely* already exists. - #[error("packet definitely already seen before")] - AlreadyExists, - /// We don't know whether we've seen this element before. It may or may not have already been - /// received. - #[error("packet may have been seen before")] - Unknown, -} + fn set_capacity(&self, cap: usize) { + unsafe { + self.p.cast::().as_mut().cap = cap.try_into().unwrap(); + } + } -impl State { - pub fn without_shared() -> State { - State { - min_key_id: Default::default(), - max_seen_key_id: Default::default(), - seen: Default::default(), - shared: None, + fn count(&self) -> usize { + unsafe { usize::from(self.p.cast::().as_ref().count) } + } + + fn set_count(&self, count: usize) { + unsafe { + self.p.cast::().as_mut().count = count.try_into().unwrap(); } } - pub fn with_shared(shared: Arc) -> State { - State { - min_key_id: Default::default(), - max_seen_key_id: Default::default(), - seen: Default::default(), - shared: Some(shared), + #[inline(never)] + fn insert(&mut self, value: u64) -> Result<(), CapacityError> { + let value = match self.to_offset(value) { + Some(v) => v, + None => { + self.compact_ensuring(value); + self.to_offset(value).expect("compact ensuring guarantee") + } + }; + if self.len() == self.capacity() { + // FIXME: might actually need to go to bitset or compact + return Err(CapacityError::Array(self.len() + 1)); + } + unsafe { + // move past the header + self.p + .as_ptr() + .add(Self::slice_offset(self.capacity())) + .cast::() + .add(self.len()) + .write(value); + self.set_len(self.len() + 1); + self.set_count(self.count() + 1); } + + Ok(()) } - pub fn pre_authentication(&self, identity: &Credentials) -> Result<(), Error> { - if self.min_key_id.load(Ordering::Relaxed) > *identity.key_id { + #[inline(never)] + fn remove(&mut self, value: u64) -> Result<(), Error> { + let Some(value) = self.to_offset(value) else { + // If the value is >= minimum, but we can't compute an offset, we know for sure that it + // was not inserted into the array. As such it must have been received already. + return if value >= self.minimum() { + Err(Error::AlreadyExists) + } else { + Err(Error::Unknown) + }; + }; + let slice = unsafe { + std::slice::from_raw_parts::( + self.p + .as_ptr() + .add(Self::slice_offset(self.capacity())) + .cast::(), + self.len(), + ) + }; + + let Ok(idx) = slice.binary_search(&value) else { return Err(Error::Unknown); + }; + let bitset = unsafe { + std::slice::from_raw_parts_mut::( + self.p.as_ptr().add(Self::bitset_offset(self.capacity())), + self.len().div_ceil(8), + ) + }; + let pos = idx / 8; + let mask = 1 << (idx % 8); + if bitset[pos] & mask != 0 { + return Err(Error::AlreadyExists); + } + bitset[pos] |= mask; + + self.set_count(self.count() - 1); + + if self.count() * 2 < self.len() { + self.shrink(); } Ok(()) } - pub fn minimum_unseen_key_id(&self) -> KeyId { - KeyId::try_from(self.max_seen_key_id.load(Ordering::Relaxed) + 1).unwrap() - } - - /// Called after decryption has been performed - pub fn post_authentication(&self, identity: &Credentials) -> Result<(), Error> { - let key_id = identity.key_id; - self.max_seen_key_id.fetch_max(*key_id, Ordering::Relaxed); - let pn = PacketNumberSpace::Initial.new_packet_number(key_id); - - // Note: intentionally retaining this lock across potential insertion into the shared map. - // This avoids the case where we have evicted an entry but cannot see it in the shared map - // yet from a concurrent thread. This should not be required for correctness but helps - // reasoning about the state of the world. - let mut seen = self.seen.lock().unwrap(); - match seen.insert_with_evicted(pn) { - Ok(evicted) => { - if let Some(shared) = &self.shared { - // FIXME: Consider bounding the number of evicted entries to insert or - // otherwise optimizing? This can run for at most 128 entries today... - for evicted in evicted { - shared.insert(&Credentials { - id: identity.id, - key_id: PacketNumber::as_varint(evicted), - }); - } - } - Ok(()) - } - Err(SlidingWindowError::TooOld) => { - if let Some(shared) = &self.shared { - shared.remove(identity) - } else { - Err(Error::Unknown) - } - } - Err(SlidingWindowError::Duplicate) => Err(Error::AlreadyExists), + //fn grow(&mut self) { + // todo!() + // let new_cap = (self.capacity() + 1) + // .next_power_of_two() + // .clamp(0, u16::MAX as usize); + // self.reallocate_to(new_cap); + //} + + fn copy_to(&mut self, new: &mut SortedList) { + unsafe { + let new_cap = new.capacity(); + let new = new.p; + + // copy header + self.p + .as_ptr() + .copy_to_nonoverlapping(new.as_ptr(), std::mem::size_of::()); + + // copy bitset + self.p + .as_ptr() + .add(Self::bitset_offset(self.capacity())) + .copy_to_nonoverlapping( + new.as_ptr().add(Self::bitset_offset(new_cap)), + self.capacity().div_ceil(8), + ); + + // Zero out tail of the new bitset (that didn't get init'd by the copy above). + std::slice::from_raw_parts_mut::>( + new.as_ptr().add(Self::bitset_offset(new_cap)).cast(), + new_cap.div_ceil(8), + )[self.capacity().div_ceil(8)..] + .fill(MaybeUninit::zeroed()); + + // Copy the actual values + self.p + .as_ptr() + .add(Self::slice_offset(self.capacity())) + .cast::() + .copy_to_nonoverlapping( + new.as_ptr().add(Self::slice_offset(new_cap)).cast(), + self.len(), + ); + + self.p = new; + self.set_capacity(new_cap); } } + + // this also updates `minimum` to be best-possible given the data. + fn shrink(&mut self) { + todo!() + //let slice = unsafe { + // std::slice::from_raw_parts::( + // self.p + // .as_ptr() + // .add(Self::slice_offset(self.capacity())) + // .cast::(), + // self.len(), + // ) + //}; + //let bitset = unsafe { + // std::slice::from_raw_parts::( + // self.p.as_ptr().add(Self::bitset_offset(self.capacity())), + // self.len().div_ceil(8), + // ) + //}; + + //let mut new = Self::new(); + //let mut cap = 0; + //while cap < self.count() { + // // should match grow()'s impl + // cap = (cap + 1).next_power_of_two().clamp(0, u16::MAX as usize); + //} + //new.reallocate_to(cap); + //for (idx, value) in slice.iter().copied().enumerate() { + // let pos = idx / 8; + // let mask = 1 << (idx % 8); + // // not yet removed... + // if bitset[pos] & mask == 0 { + // new.insert(self.minimum() + value as u64); + // } + //} + //*self = new; + } + + fn to_offset(&mut self, value: u64) -> Option { + if self.minimum() == u64::MAX { + self.set_minimum(value); + } + let value = value.checked_sub(self.minimum())?; + u16::try_from(value).ok() + } + + unsafe fn from_existing(p: NonNull) -> SortedList { + SortedList { p } + } + + /// Re-pack the sorted list, potentially dropping values, to ensure that `can_fit` fits into + /// the list. + fn compact_ensuring(&self, can_fit: u64) { + todo!() + } +} + +#[derive(Clone, Copy, Debug, PartialEq, Eq, thiserror::Error)] +enum CapacityError { + #[error("need to grow or shrink to an array with capacity {0}")] + Array(usize), + #[error("need to grow or shrink to a bitset")] + Bitset, +} + +#[derive(Clone, Copy, Debug, PartialEq, Eq, thiserror::Error)] +pub enum Error { + /// This indicates that we know about this element and it *definitely* already exists. + #[error("packet definitely already seen before")] + AlreadyExists, + /// We don't know whether we've seen this element before. It may or may not have already been + /// received. + #[error("packet may have been seen before")] + Unknown, } #[cfg(test)] diff --git a/dc/s2n-quic-dc/src/path/secret/receiver/allocator.rs b/dc/s2n-quic-dc/src/path/secret/receiver/allocator.rs new file mode 100644 index 0000000000..5052b67bb0 --- /dev/null +++ b/dc/s2n-quic-dc/src/path/secret/receiver/allocator.rs @@ -0,0 +1,514 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +//! Slab allocator drawing from a fixed arena. +//! +//! The arena is allocated at initialization time, providing a fixed memory region from which to +//! allocate entries from. We support allocating a compile-time fixed set of types, however, the +//! internals are mostly uncaring about *what* that set is (including the size). +//! +//! The arena has three types of pages: +//! +//! * Free (empty) +//! * Partially allocated +//! * Fully initialized/allocated +//! +//! Initially, all pages start empty. When an allocation request is made, a page is moved into the +//! partially allocated state. A u16 counter is placed at the top of the page for the # of entries +//! allocated so far,. A given size class always allocates from this page until the page is +//! exhausted. A page, when moved into the partially allocated state, is also threaded into an +//! intrusive doubly linked list of allocated pages for this size class. This list supports +//! deallocation operations. A partially-empty page, if it exists, is always at the top of this +//! list. +//! +//! Effectively, we have a `LinkedList>` for each T, with at most one of the Vecs being +//! non-fixed-size. +//! +//! On deallocation, we swap the entry we just allocated with one from the top of the page list. +//! This ensures that at most one page for this type is not contiguously allocated, meaning that +//! wasted memory due to fragmentation is bounded to a single page per allocatable type. + +#![allow(dead_code)] + +use std::alloc::Layout; +use std::collections::{BTreeMap, HashMap, VecDeque}; +use std::ptr::NonNull; +use std::sync::Mutex; + +#[derive(Debug)] +pub struct Allocator { + inner: Mutex, +} + +#[derive(Debug)] +struct AllocatorInner { + // layout and region are only used for Drop, otherwise we always manage through the other + // fields. + layout: Layout, + region: NonNull, + + free_pages: Vec>, + + // This slab indirects into the allocator's internally memory, allowing us to move allocated + // entries ~transparently to callers. + // + // FIXME: Remove the indirection by moving the containing memory (path secret entries) into the + // allocator and/or guarantee that they don't move without invalidation of the child via `Pin`. + parents: slab::Slab>>, + + // These are lists of *allocated* entries. + allocated_pages: BTreeMap>>, + + // For each size class, the page we are currently allocating from. + // + // When this runs out we can grab a new region from `free_pages`. + reserved_page: HashMap>>, +} + +#[derive(Debug, PartialEq, Eq)] +struct BySize(Layout); + +impl std::ops::Deref for BySize { + type Target = Layout; + + fn deref(&self) -> &Self::Target { + &self.0 + } +} + +impl Ord for BySize { + fn cmp(&self, other: &Self) -> std::cmp::Ordering { + self.0 + .size() + .cmp(&other.0.size()) + .then(self.0.align().cmp(&other.0.align())) + } +} + +impl PartialOrd for BySize { + fn partial_cmp(&self, other: &Self) -> Option { + Some(self.cmp(other)) + } +} + +// Note: must contain at least one entry for every size class. +// 2**14 is sufficient for our purposes. +const PAGE_SIZE: usize = 1 << 13; + +impl Allocator { + pub fn with_capacity(capacity: usize) -> Allocator { + let layout = + Layout::from_size_align(capacity.next_multiple_of(PAGE_SIZE), PAGE_SIZE).unwrap(); + let region = unsafe { NonNull::new(std::alloc::alloc(layout)).unwrap() }; + // ensures step_by does the right thing. + assert_eq!(layout.size() % PAGE_SIZE, 0); + let free_pages: Vec<_> = (0..layout.size()) + .step_by(PAGE_SIZE) + .map(|offset| unsafe { NonNull::new(region.as_ptr().add(offset)).unwrap() }) + .collect(); + let end = (region.as_ptr() as usize) + .checked_add(layout.size()) + .unwrap(); + for page in free_pages.iter().copied() { + let start = (page.as_ptr() as usize).checked_add(PAGE_SIZE).unwrap(); + assert!(start <= end, "{:x} < {:x}", start, end); + + let end = page.as_ptr().wrapping_add(PAGE_SIZE & !(PAGE_SIZE - 1)); + assert_eq!( + page.as_ptr() as usize, + (end as usize - 1) & !(PAGE_SIZE - 1) + ); + } + let inner = AllocatorInner { + layout, + region, + parents: slab::Slab::new(), + free_pages, + allocated_pages: Default::default(), + reserved_page: Default::default(), + }; + Allocator { + inner: Mutex::new(inner), + } + } + + /// Allocate `Layout`. + /// + /// Returns a handle which can be used to lookup the allocation. + pub fn allocate(&self, layout: Layout) -> AllocationGuard<'_> { + let mut inner = self.inner.lock().unwrap_or_else(|e| e.into_inner()); + let handle = inner.allocate(layout); + // this allocation cannot be freed yet as we didn't release the `inner` lock. + inner.read_allocation(self, handle).unwrap() + } + + pub fn read_allocation(&self, handle: usize) -> Option> { + let inner = self.inner.lock().unwrap_or_else(|e| e.into_inner()); + + let guard = inner.parents[handle].lock(); + let Some(offset) = *guard else { + return None; + }; + + parking_lot::MutexGuard::leak(guard); + + Some(AllocationGuard { + this: self, + mutex: handle, + ptr: unsafe { + NonNull::new(inner.region.as_ptr().add(usize::try_from(offset).unwrap())).unwrap() + }, + }) + } + + /// Must only be called once. + pub unsafe fn deallocate(&self, handle: usize) { + let mut inner = self.inner.lock().unwrap_or_else(|e| e.into_inner()); + + let entry = inner.parents.remove(handle); + // FIXME: ABA avoidance needed? + assert!(!entry.is_locked()); + let Some(offset) = entry.into_inner() else { + // If already deallocated, nothing more to do. + return; + }; + let deallocate_ptr = unsafe { inner.region.as_ptr().add(usize::try_from(offset).unwrap()) }; + // Round the pointer we are deallocating down to the page size, giving us a pointer to the + // start of the page. + let deallocating_page = map_addr(deallocate_ptr, |addr| addr & !(PAGE_SIZE - 1)); + let layout = deallocating_page + .add(std::mem::size_of::()) + .cast::() + .read_unaligned(); + + // Lookup `layout` (after adjustment) in reserved_page, if possible, popping the last + // allocated entry, and potentially freeing the page. If not possible, that means + // reserved_page is empty; + if let Some(Some(start)) = inner.reserved_page.get_mut(&layout) { + unsafe { + let page_start = start.as_ptr(); + + let mut page = PageWithLayout { + start: page_start, + layout, + }; + + page.pop_to(&mut inner, deallocate_ptr); + + if page.count() == 0 { + // remove this reserved page. + assert!(inner.reserved_page.remove(&layout).is_some()); + + // we already popped the page, so just put it back on the free pages list. + inner.free_pages.push(NonNull::new(page_start).unwrap()); + } + + return; + } + } + + // Lookup `layout` in allocated pages and pop an entry off of a page, moving that page into + // `reserved` and replacing the deallocated entry with the popped one. + // + // Note that no reserved page exists (since we didn't exit above). + if let Some(pages) = inner.allocated_pages.get_mut(&BySize(layout)) { + unsafe { + let page_start = pages.pop_back().unwrap().as_ptr(); + + let mut page = PageWithLayout { + start: page_start, + layout, + }; + + page.pop_to(&mut inner, deallocate_ptr); + + // This is reachable if this is the only entry on the page. + if page.count() == 0 { + // we already popped the page, so just put it back on the free pages list. + inner.free_pages.push(NonNull::new(page_start).unwrap()); + } + + // OK, at this point `deallocated` is filled, and we have a page with N-1 entries + // that we add to reserved_page. + inner + .reserved_page + .insert(layout, Some(NonNull::new(page_start).unwrap())); + + return; + } + } + + // This entry cannot be in a partial page pre-dealloc (it would be a reserved page in that + // case) and it cannot be a full page (2nd case was hit above). No other cases exist. + unreachable!() + } +} + +struct PageWithLayout { + start: *mut u8, + layout: Layout, +} + +impl PageWithLayout { + fn count(&self) -> u16 { + unsafe { self.start.cast::().read() } + } + + fn add_count(&mut self, v: i16) { + unsafe { + *self.start.cast::() = self.count().checked_add_signed(v).unwrap(); + } + } + + /// Pop an entry from this page and move the contents into `dest`. + /// + /// `dest` is assumed to be an entry (possibly on this page) which is of size `layout`. + unsafe fn pop_to(&mut self, inner: &mut AllocatorInner, dest: *mut u8) { + let page_end = self.start.add(PAGE_SIZE & !(self.layout.align() - 1)); + + let last_allocated = page_end.sub(self.layout.size() * self.count() as usize); + // last_allocated.is_aligned_to(layout.align()), except stable. + assert!(last_allocated as usize & (self.layout.align() - 1) == 0); + + self.add_count(-1); + + // If we are deallocating the last entry on this page, no parent swapping is + // needed, we can just drop it. + if last_allocated != dest { + // Lock the parent pointer for the entry we're popping and replacing the + // deallocated entry with. + let src_parent_idx = last_allocated.cast::().read() as usize; + let mut src_parent = inner.parents[src_parent_idx].lock(); + assert!(src_parent.is_some()); + + // Copy the data into the now-deallocated entry. + dest.copy_from_nonoverlapping(last_allocated, self.layout.size()); + + // Update parent pointer to point to new entry. + *src_parent = + Some(u32::try_from(dest as usize - inner.region.as_ptr() as usize).unwrap()); + drop(src_parent); + } + } + + /// Add an entry to this page. + /// + /// Returns Some(entry address) if this was successful. + fn push(&mut self) -> Option<*mut u8> { + unsafe { + let page_end = self.start.add(PAGE_SIZE & !(self.layout.align() - 1)); + + let last_allocated = page_end.sub(self.layout.size() * self.count() as usize); + // last_allocated.is_aligned_to(layout.align()), except stable. + assert!(last_allocated as usize & (self.layout.align() - 1) == 0); + + let new_allocation = last_allocated.wrapping_sub(self.layout.size()); + + if new_allocation + >= self + .start + .add(std::mem::size_of::()) + .add(std::mem::size_of::()) + { + self.add_count(1); + Some(new_allocation) + } else { + None + } + } + } +} + +pub struct AllocationGuard<'a> { + this: &'a Allocator, + mutex: usize, + ptr: NonNull, +} + +impl AllocationGuard<'_> { + pub fn as_ptr(&self) -> NonNull { + self.ptr + } + + pub fn handle(&self) -> usize { + self.mutex + } +} + +impl Drop for AllocationGuard<'_> { + fn drop(&mut self) { + // creation leaked the guard, so now we force unlock. + unsafe { + self.this + .inner + .lock() + .unwrap_or_else(|e| e.into_inner()) + .parents[self.mutex] + .force_unlock(); + } + } +} + +impl AllocatorInner { + fn allocate(&mut self, layout: Layout) -> usize { + // Add parent pointer field. + let (layout, _) = Layout::new::().extend(layout).unwrap(); + let layout = layout.pad_to_align(); + + let reserved = self.reserved_page.entry(layout).or_insert_with(|| None); + let align = layout.align(); + assert!(align.is_power_of_two()); + + let allocation = 'allocate: loop { + match reserved { + Some(page_start) => { + let mut page = PageWithLayout { + start: page_start.as_ptr(), + layout, + }; + if let Some(ptr) = page.push() { + break ptr; + } else { + // move before the counter + self.allocated_pages + .entry(BySize(layout)) + .or_default() + .push_back(*page_start); + // no more entries left... + *reserved = None; + // fallthrough + } + } + None => {} + } + + // Ok, we failed to pull from the reserved page, re-populate reserved. + if let Some(page) = self.free_pages.pop() { + unsafe { + // Each page has a u16 counter at the front of allocated entries. + // Initialize the counter. + // It is discoverable later by aligning the end of the page down. + page.as_ptr().cast::().write(0); + page.as_ptr() + .add(std::mem::size_of::()) + .cast::() + .write_unaligned(layout); + *reserved = Some(page); + + // and loop around to allocate from the reserved page... + continue; + } + } + + // Ok, no free pages left either, we need to deallocate entries. + for (page_layout, pages) in self.allocated_pages.iter_mut().rev() { + let Some(page) = pages.pop_front() else { + continue; + }; + + // OK, we are going to empty this page and return it to free pages, which will then + // move it into the reserved pages for `layout`. + // + // We need to deallocate all of the entries on this page (in theory we can try to + // move them into a reserved page if it's available, but for simplicitly we're just + // deallocating right now). + // + // FIXME: This does mean that when we call allocate() to *grow* we might actually + // deallocate our own memory. That's not ideal, but seems like an OK failure mode - + // one workaround could be to pin the local page to avoid using it. + + // SAFETY: `count` is protected by the mutex we're in and pages are initialized to + // a zero count when we move them out of free pages. + let count = unsafe { page.as_ptr().cast::().read() }; + + unsafe { + let mut next = page.as_ptr().add(PAGE_SIZE); + for _ in 0..count { + next = map_addr(next, |v| { + v.checked_sub(page_layout.size()) + .map(|v| v & !(align - 1)) + // should never overflow / underflow since we're iterating by `count`. + .unwrap() + }); + + // We prepend a u32 to layouts which contains the parent index. + let parent = next.cast::().read(); + + // Mark the parent as deallocated. + *self.parents[parent as usize].lock() = None; + } + } + + // All entries on the page have been deallocated and are no longer in use, so this + // page is now free. + self.free_pages.push(page); + + // We don't need more than one page to be freed, so break out. + continue 'allocate; + } + + unreachable!("if no free pages must have at least some allocated pages") + }; + + // OK, we've allocated a block of memory (allocation). Now we need to initialize the parent + // pointer. + + let parent_idx = self.parents.insert(parking_lot::Mutex::new(Some( + u32::try_from(allocation as usize - self.region.as_ptr() as usize).unwrap(), + ))); + unsafe { + allocation + .cast::() + .write(u32::try_from(parent_idx).unwrap()); + } + + parent_idx + } + + fn read_allocation<'a>( + &self, + parent: &'a Allocator, + handle: usize, + ) -> Option> { + let guard = self.parents[handle].lock(); + let Some(offset) = *guard else { + return None; + }; + + // FIXME: we leak this guard, and then release the `inner` mutex lock which is a bit + // problematic since &mut could let you get_mut() on the Mutex... some safety condition is + // probably missing somewhere. + parking_lot::MutexGuard::leak(guard); + + Some(AllocationGuard { + this: parent, + mutex: handle, + ptr: unsafe { + NonNull::new(self.region.as_ptr().add(usize::try_from(offset).unwrap())).unwrap() + }, + }) + } +} + +#[cfg(miri)] +fn map_addr(v: *mut T, mapper: impl FnOnce(usize) -> usize) -> *mut T { + v.map_addr(mapper) +} + +// Actually this is "new enough Rust", i.e., support for Strict Provenance. +// Remove when we bump MSRV to 1.84. +#[cfg(not(miri))] +fn map_addr(v: *mut T, mapper: impl FnOnce(usize) -> usize) -> *mut T { + mapper(v as usize) as *mut T +} + +impl Drop for AllocatorInner { + fn drop(&mut self) { + unsafe { + std::alloc::dealloc(self.region.as_ptr(), self.layout); + } + } +} + +#[cfg(test)] +mod test; diff --git a/dc/s2n-quic-dc/src/path/secret/receiver/allocator/test.rs b/dc/s2n-quic-dc/src/path/secret/receiver/allocator/test.rs new file mode 100644 index 0000000000..aedaf51f43 --- /dev/null +++ b/dc/s2n-quic-dc/src/path/secret/receiver/allocator/test.rs @@ -0,0 +1,135 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +use super::Allocator; +use std::alloc::Layout; + +#[test] +fn trivial_check() { + let allocator = Allocator::with_capacity(8192); + let handle1 = allocator.allocate(Layout::new::()).handle(); + let handle2 = allocator.allocate(Layout::new::()).handle(); + let ptr1 = allocator.read_allocation(handle1).unwrap(); + let ptr2 = allocator.read_allocation(handle2).unwrap(); + assert_ne!(ptr1.as_ptr(), ptr2.as_ptr()); + drop(ptr1); + drop(ptr2); + unsafe { + allocator.deallocate(handle1); + allocator.deallocate(handle2); + } +} + +#[test] +fn fills_page() { + // 1 means we allocate a single page + let allocator = Allocator::with_capacity(1); + let mut handles = vec![]; + for _ in 0..1021 { + handles.push(allocator.allocate(Layout::new::()).handle()); + } + let mut count = 0; + for handle in handles.iter() { + count += allocator.read_allocation(*handle).is_some() as usize; + } + assert_eq!(count, handles.len()); +} + +#[test] +fn allocates_indefinitely() { + // 1 means we allocate a single page + let allocator = Allocator::with_capacity(1); + assert_eq!(allocator.inner.lock().unwrap().free_pages.len(), 1); + let layout = Layout::new::(); + let mut handles = vec![]; + for _ in 0..(1021 * if cfg!(miri) { 2 } else { 1000 }) { + handles.push(allocator.allocate(layout).handle()); + } + let mut count = 0; + for handle in handles { + count += allocator.read_allocation(handle).is_some() as usize; + + unsafe { + allocator.deallocate(handle); + } + } + // no fragmentation - we emptied a bunch of pages but we still have a full page allocated at + // the end. + assert_eq!(count, 1021); +} + +#[test] +fn allocate_and_deallocate_multipage() { + let allocator = Allocator::with_capacity(super::PAGE_SIZE * 3); + assert_eq!(allocator.inner.lock().unwrap().free_pages.len(), 3); + let mut handles = vec![]; + let layout = Layout::new::(); + for _ in 0..3000 { + handles.push(allocator.allocate(layout).handle()); + } + let mut count = 0; + for handle in handles.iter() { + count += allocator.read_allocation(*handle).is_some() as usize; + } + assert_eq!(count, 3000); + + for handle in handles { + unsafe { + allocator.deallocate(handle); + } + } +} + +#[test] +fn allocate_and_deallocate_multilayout() { + let allocator = Allocator::with_capacity(super::PAGE_SIZE * 10); + assert_eq!(allocator.inner.lock().unwrap().free_pages.len(), 10); + let mut handles = vec![]; + let layout1 = Layout::new::<[u32; 1]>(); + let layout2 = Layout::new::<[u32; 2]>(); + let layout3 = Layout::new::<[u32; 3]>(); + for _ in 0..1000 { + handles.push(allocator.allocate(layout1).handle()); + } + for _ in 0..1000 { + handles.push(allocator.allocate(layout2).handle()); + } + for _ in 0..1000 { + handles.push(allocator.allocate(layout3).handle()); + } + let mut count = 0; + for handle in handles.iter() { + count += allocator.read_allocation(*handle).is_some() as usize; + } + assert_eq!(count, 3000); + + for handle in handles[..1000].iter() { + unsafe { + allocator.deallocate(*handle); + } + } + for handle in handles[1000..2000].iter() { + unsafe { + allocator.deallocate(*handle); + } + } + for handle in handles[2000..].iter() { + unsafe { + allocator.deallocate(*handle); + } + } +} + +#[test] +fn reuse_handle() { + let allocator = Allocator::with_capacity(1); + let handle1 = allocator.allocate(Layout::new::()).handle(); + unsafe { + allocator.deallocate(handle1); + } + let handle2 = allocator.allocate(Layout::new::()).handle(); + unsafe { + allocator.deallocate(handle2); + } + assert_eq!(handle1, handle2); +} diff --git a/dc/s2n-quic-dc/src/path/secret/receiver/tests.rs b/dc/s2n-quic-dc/src/path/secret/receiver/tests.rs index 8e49634a2b..67f5a5f96e 100644 --- a/dc/s2n-quic-dc/src/path/secret/receiver/tests.rs +++ b/dc/s2n-quic-dc/src/path/secret/receiver/tests.rs @@ -2,6 +2,7 @@ // SPDX-License-Identifier: Apache-2.0 use super::*; +use crate::credentials::Id; use bolero::check; use rand::{seq::SliceRandom, Rng, SeedableRng}; use std::collections::{binary_heap::PeekMut, BinaryHeap, HashSet}; @@ -90,61 +91,6 @@ fn check_ordered_u16() { }); } -#[test] -fn shared() { - let subject = Shared::new(); - let id1 = Id::from([0; 16]); - let mut id2 = Id::from([0; 16]); - // This is a part of the key ID not used for hashing. - id2[10] = 1; - let key1 = KeyId::new(0).unwrap(); - let key2 = KeyId::new(1).unwrap(); - subject.insert(&Credentials { - id: id1, - key_id: key1, - }); - assert_eq!( - subject.remove(&Credentials { - id: id1, - key_id: key1, - }), - Ok(()) - ); - assert_eq!( - subject.remove(&Credentials { - id: id1, - key_id: key1, - }), - Err(Error::AlreadyExists) - ); - subject.insert(&Credentials { - id: id2, - key_id: key1, - }); - assert_eq!( - subject.remove(&Credentials { - id: id1, - key_id: key1, - }), - Err(Error::Unknown) - ); - assert_eq!( - subject.remove(&Credentials { - id: id1, - key_id: key2, - }), - Err(Error::Unknown) - ); - // Removal never taints an entry, so this is still fine. - assert_eq!( - subject.remove(&Credentials { - id: id2, - key_id: key1, - }), - Ok(()) - ); -} - // This test is not particularly interesting, it's mostly just the same as the random tests above // which insert ordered and unordered values. Mostly it tests that we continue to allow 129 IDs of // arbitrary reordering. @@ -209,6 +155,9 @@ fn check_delayed_specific() { // delay represents the *minimum* delay a delayed entry sees. The maximum is up to SHARED_ENTRIES. fn check_delayed_inner(seed: u64, delay: u16) { + // FIXME: re-word + const SHARED_ENTRIES: usize = 65_000; + // We expect that the shared map is always big enough to absorb our delay. // (This is statically true; u16::MAX < SHARED_ENTRIES). assert!((delay as usize) < SHARED_ENTRIES); @@ -267,19 +216,22 @@ impl Model { fn insert(&mut self, op: u64) { let pid = Id::from([0; 16]); let id = KeyId::new(op).unwrap(); - let expected = self.oracle.insert(op); - if expected { + let expected = match self.oracle.insert(op) { + true => Ok(()), + false => Err(Error::AlreadyExists), + }; + if expected.is_ok() { self.insert_order.push(op); } let actual = self.subject.post_authentication(&Credentials { id: pid, key_id: id, }); - if actual.is_ok() != expected { + if actual.is_ok() != expected.is_ok() { let mut oracle = self.oracle.iter().collect::>(); oracle.sort_unstable(); panic!( - "Inserting {:?} failed, in oracle: {}, in subject: {:?}, inserted: {:?}", + "Inserting {:?} failed, in oracle: {:?}, in subject: {:?}, inserted: {:?}", op, expected, actual, self.insert_order ); } @@ -287,66 +239,51 @@ impl Model { } #[test] -fn shared_no_collisions() { - let mut seen = HashSet::new(); - let shared = Shared::new(); - for key_id in 0..SHARED_ENTRIES as u64 { - let index = shared.index(&Credentials { - id: Id::from([0; 16]), - key_id: KeyId::new(key_id).unwrap(), - }); - assert!(seen.insert(index)); - } - - // The next entry should collide, since we will wrap around. - let index = shared.index(&Credentials { - id: Id::from([0; 16]), - key_id: KeyId::new(SHARED_ENTRIES as u64 + 1).unwrap(), - }); - assert!(!seen.insert(index)); -} +fn check_manual_state() { + let state = State::without_shared(); + let pid = Id::from([0; 16]); + let creds = |id: u64| Credentials { + id: pid, + key_id: KeyId::new(id).unwrap(), + }; + state.post_authentication(&creds(0)).unwrap(); + assert_eq!(state.snapshot().max_seen, 0); + assert_eq!(state.snapshot().bitset, 0); + assert_eq!(state.snapshot().list, vec![]); -#[test] -fn shared_id_pair_no_collisions() { - let shared = Shared::new(); + state.post_authentication(&creds(32)).unwrap(); + assert_eq!(state.snapshot().max_seen, 32); + // bitset tracks 0..=31 + assert_eq!(state.snapshot().bitset, 0x8000_0000); + assert_eq!(state.snapshot().list, vec![]); - // Two random IDs. Exact constants shouldn't matter much, we're mainly aiming to test overall - // quality of our mapping from Id + KeyId. - let id1 = Id::from(u128::to_ne_bytes(0x25add729cce683cd0cda41d35436bdc6)); - let id2 = Id::from(u128::to_ne_bytes(0x2862115d0691fe180f2aeb26af3c2e5e)); + state.post_authentication(&creds(33)).unwrap(); + assert_eq!(state.snapshot().max_seen, 33); + // bitset tracks 1..=32 + assert_eq!(state.snapshot().bitset, 0x0000_0001); + assert_eq!(state.snapshot().list, vec![]); - for key_id in 0..SHARED_ENTRIES as u64 { - let index1 = shared.index(&Credentials { - id: id1, - key_id: KeyId::new(key_id).unwrap(), - }); - let index2 = shared.index(&Credentials { - id: id2, - key_id: KeyId::new(key_id).unwrap(), - }); + state.post_authentication(&creds(35)).unwrap(); + assert_eq!(state.snapshot().max_seen, 35); + // bitset tracks 3..=34 + assert_eq!(state.snapshot().bitset, 0x0000_0006); + assert_eq!(state.snapshot().list, vec![1, 2]); - // Our path secret IDs are sufficiently different that we expect that for any given index - // we map to a different slot. This test is not *really* saying much since it's highly - // dependent on the exact values of the path secret IDs, but it prevents simple bugs like - // ignoring the IDs entirely. - assert_ne!(index1, index2); - } -} + state.post_authentication(&creds(70)).unwrap(); + assert_eq!(state.snapshot().max_seen, 70); + // bitset tracks 38..=69 + assert_eq!(state.snapshot().bitset, 0x0000_0000); + assert_eq!( + state.snapshot().list, + (1..=37) + .filter(|v| ![32, 33, 35].contains(v)) + .collect::>() + ); -// Confirms that we start out without any entries present in the map. -#[test] -fn shared_no_entries() { - let shared = Shared::new(); - // We have to check all slots to be sure. The index used for lookup is going to be shuffled due - // to the hashing in of the secret. We need to use an all-zero path secret ID since the entries - // in the map start out zero-initialized today. - for key_id in 0..SHARED_ENTRIES as u64 { - assert_eq!( - shared.remove(&Credentials { - id: Id::from([0; 16]), - key_id: KeyId::new(key_id).unwrap(), - }), - Err(Error::Unknown) - ); - } + // zero has fallen out of tracking + assert_eq!( + state.post_authentication(&creds(0)).unwrap_err(), + // FIXME: this should be AlreadyExists as we have not evicted any unseen entries yet. + Error::Unknown + ); }