From a3eee05ee2c71b6e987a5a96eba8b5f301f96739 Mon Sep 17 00:00:00 2001 From: Hadrien G Date: Fri, 8 Nov 2019 17:55:37 +0100 Subject: [PATCH] Provide abstractions for properly aligning abomonated bytes --- src/align/alloc.rs | 259 +++++++++++++++++++++++++++++++++++++++++++++ src/align/mod.rs | 3 + src/lib.rs | 4 + tests/tests.rs | 19 +++- 4 files changed, 281 insertions(+), 4 deletions(-) create mode 100644 src/align/alloc.rs diff --git a/src/align/alloc.rs b/src/align/alloc.rs new file mode 100644 index 0000000..ec2487b --- /dev/null +++ b/src/align/alloc.rs @@ -0,0 +1,259 @@ +/// Tools for storing abomonated objects with correct alignment +/// +/// Use of `decode::()` requires that the input bytes are aligned on a +/// `T::alignment()` boundary, or else undefined behavior will ensue. +/// +/// This module provides tools for ensuring this alignment constraint on input +/// bytes of unknown or known-incorrect alignment before calling `decode()`. + +use crate::{ + Entomb, + Exhume, +}; + +use std::{ + alloc::{self, Layout}, + marker::PhantomData, + ops::{Deref, DerefMut}, + ptr::NonNull, +}; + + +/// Overaligned `Box<[u8]>` for abomonated objects of type T +/// +/// Compared with a regular `Box<[u8]>`, this heap-allocated bag of bytes also +/// ensures that the heap allocation is aligned on `T::alignment()`, and thus +/// suitable for use as input to `decode::()`. +pub struct Coffin(NonNull<[u8]>, PhantomData); + +impl Coffin { + /// Copy abomonated bytes into a suitably aligned heap allocation + /// + /// May abort the computation if memory is exhausted or the system allocator + /// is not able to satisfy the size or alignment requirements. + pub fn new(bytes: &[u8]) -> Self { + // Perform the memory allocation using the system allocator. This is + // safe because all safety preconditions are checked by Self::layout(). + let size = bytes.len(); + let layout = Self::layout(size); + let ptr = unsafe { alloc::alloc(layout) }; + + // Abort on memory allocation errors the recommended way. Since the + // system allocator may abort, no point in not aborting ourselves... + if ptr.is_null() { alloc::handle_alloc_error(layout); } + + // Transfer the input bytes on our new allocation. This is safe as... + // - `bytes.as_ptr()` has to be valid for `size` by slice construction + // - `ptr` is non-null and must point to a memory region of `size` bytes + // - Pointers are always byte-aligned, so alignment is irrelevant. + // - Heap allocations may not overlap with existing objects. + unsafe { ptr.copy_from_nonoverlapping(bytes.as_ptr(), size); } + + // Produce the output slice. The transmute is safe as... + // - We don't care about lifetimes as we want a NonNull in the end + // - As discussed above, `ptr` is non-null and well-aligned. + // - The bytes of the slice have been initialized above + Self(unsafe { std::slice::from_raw_parts_mut(ptr, size) }.into(), + PhantomData) + } + + /// Compute the proper layout for a coffin allocation, checking the safety + /// preconditions of the system memory allocator along the way. + /// + /// We handle errors via panics because they all emerge from edge cases that + /// should only be encountered by users actively trying to break this code. + fn layout(size: usize) -> Layout { + // Basic sanity check for debug builds + debug_assert!(size >= std::mem::size_of::(), + "Requested size is quite obviously not big enough"); + + // We're going to use the system allocator, so we cannot accept + // zero-sized slices of bytes. + assert!(size > 0, "Allocation size must be positive"); + + // At this point, the only layout errors that remain are those caused by + // a bad Abomonation::alignment implementation (alignment is zero or not + // a power of 2) or by a huge input size (close to usize::MAX). + Layout::from_size_align(size, T::alignment()) + .expect("Bad Abomonation::alignment() impl or excessive size") + } +} + +impl Deref for Coffin { + type Target = [u8]; + + fn deref(&self) -> &Self::Target { + // This is safe as... + // - The target allocation is live until the Coffin will be dropped. + // - Normal borrow-checking rules apply and prevent the user from + // aliasing or retaining the output reference in an invalid way. + // + // ...but see the Drop documentation for a possible edge case :( + unsafe { self.0.as_ref() } + } +} + +impl DerefMut for Coffin { + fn deref_mut(&mut self) -> &mut Self::Target { + // This is safe for the same reason that Deref is. + unsafe { self.0.as_mut() } + } +} + +impl Drop for Coffin { + fn drop(&mut self) { + // In principle, this should be safe for the same reason that DerefMut + // is, however there is a wrinkle for all of those... + // + // If we want any form of Deref to be safe, the Rust compiler must + // prevent LLVM from inserting memory reads from the slice after + // deallocation, and currently it doesn't. + // + // There is no clear reason why LLVM would do this, though, and `std` + // encounters the same problem everywhere, so we'll take the risk... + // + // FIXME: Once the Rust team has figured out the right way to handle + // this, use it here if it requires manual action. + // + // Here's one ongoing discussion of this topic for reference: + // https://github.com/rust-lang/rust/issues/55005 + let slice = unsafe { self.0.as_mut() }; + + // This is safe because... + // - Every Coffin is always created with its own allocation, only Drop + // can liberate it, and Drop will only be called once. + // - Layout is computed in the same way as in `Coffin::new()`, and the + // size of the target slice is the same as that of new's input bytes. + unsafe { alloc::dealloc(slice.as_mut_ptr(), + Self::layout(slice.len())); } + } +} + + +/// `Cow`-style abstraction for aligning abomonated bytes before `decode()` +/// +/// Often, one needs to decode input bytes which are _probably_ well-aligned, +/// but may not always to be. For example, POSIX memory allocations are aligned +/// on 16-byte boundaries, which is sufficient for most types... as long as +/// multiple abomonated objects are not stored in a sequence without padding +/// bytes in between. +/// +/// In those circumstances, pessimistically using `Coffin` all the time +/// would cause unnecessarily intensive use of the system memory allocator. +/// Instead, it is better to check if the input bytes are well-aligned and only +/// reallocate them if necessary, which is what this abstraction does. +pub enum AlignedBytes<'bytes, T: Exhume<'bytes>> { + /// The orignal bytes were sufficiently well-aligned + Borrowed(&'bytes mut [u8]), + + /// The abomonated bytes were relocated into a well-aligned heap location + Owned(Coffin), +} + +impl<'bytes, T: Exhume<'bytes>> AlignedBytes<'bytes, T> { + /// Prepare possibly misaligned bytes for decoding + pub fn new(bytes: &'bytes mut [u8]) -> Self { + let misalignment = (bytes.as_ptr() as usize) % T::alignment(); + if misalignment == 0 { + Self::Borrowed(bytes) + } else { + Self::Owned(Coffin::new(bytes)) + } + } +} + +impl<'bytes, T: Exhume<'bytes>> From<&'bytes mut [u8]> for AlignedBytes<'bytes, T> { + fn from(bytes: &'bytes mut [u8]) -> Self { + Self::new(bytes) + } +} + +impl<'bytes, T: Exhume<'bytes>> From> for AlignedBytes<'bytes, T> { + fn from(coffin: Coffin) -> Self { + Self::Owned(coffin) + } +} + +impl<'bytes, T: Exhume<'bytes>> Deref for AlignedBytes<'bytes, T> { + type Target = [u8]; + + fn deref(&self) -> &[u8] { + match self { + Self::Borrowed(b) => b, + Self::Owned(o) => o, + } + } +} + +impl<'bytes, T: Exhume<'bytes>> DerefMut for AlignedBytes<'bytes, T> { + fn deref_mut(&mut self) -> &mut [u8] { + match self { + Self::Borrowed(b) => b, + Self::Owned(o) => o, + } + } +} + + +#[cfg(test)] +mod tests { + use super::{AlignedBytes, Coffin, Entomb, Exhume}; + + #[test] + fn coffin() { + check_coffin::(); + check_coffin::(); + check_coffin::(); + check_coffin::(); + check_coffin::(); + } + + fn check_coffin() { + let bytes = make_test_bytes_for::(); + let coffin = Coffin::::new(&bytes[..]); + assert_eq!(&coffin[..], &bytes[..], + "Coffin data is incorrect"); + assert_eq!(coffin.as_ptr() as usize % T::alignment(), 0, + "Coffin alignment is not strong enough"); + } + + #[test] + fn aligned_bytes() { + check_aligned_bytes::(); + check_aligned_bytes::(); + check_aligned_bytes::(); + check_aligned_bytes::(); + } + + fn check_aligned_bytes() + where for<'a> T: Exhume<'a> + { + assert!(std::mem::align_of::() > 1, + "This test requires generating misaligned data"); + + let mut bytes = make_test_bytes_for::(); + let mut coffin = Coffin::::new(&bytes[..]); + let aligned_bytes = AlignedBytes::::new(&mut coffin[..]); + match aligned_bytes { + AlignedBytes::Borrowed(_) => {} + AlignedBytes::Owned(_) => panic!("Should not allocate here"), + } + assert_eq!(&aligned_bytes[..], &bytes[..]); + + bytes.push(42); + let mut coffin = Coffin::::new(&bytes[..]); + let aligned_bytes = AlignedBytes::::new(&mut coffin[1..]); + match aligned_bytes { + AlignedBytes::Borrowed(_) => panic!("Should allocate here"), + AlignedBytes::Owned(_) => {}, + } + assert_eq!(&aligned_bytes[..], &bytes[1..]); + } + + fn make_test_bytes_for() -> Vec { + let mut i = 0; + std::iter::repeat_with(|| { i += 1; i }) + .take(std::mem::size_of::()) + .collect::>() + } +} diff --git a/src/align/mod.rs b/src/align/mod.rs index 7aa85b4..eff9c4f 100644 --- a/src/align/mod.rs +++ b/src/align/mod.rs @@ -1,6 +1,9 @@ /// Utilities for handling alignment in abomonated data mod io; +mod alloc; #[deprecated(note = "Made pub for internal unsafe_abomonate use only")] pub use self::io::{AlignedReader, AlignedWriter}; + +pub use self::alloc::{AlignedBytes, Coffin}; diff --git a/src/lib.rs b/src/lib.rs index 1543825..453318d 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -119,6 +119,10 @@ pub unsafe fn encode(typed: &T, write: W) -> IOResult<()> { /// abomonated data of type T, which you can check with `T::alignment()`. /// Failure to meet this requirement will result in undefined behavior. /// +/// If you are not able to guarantee sufficient alignment from your data source, you may find the +/// `align::AlignedBytes` utility useful. It checks if your data is well-aligned, and moves it +/// into a well-aligned heap allocation otherwise. +/// /// # Examples /// ``` /// use abomonation::{encode, decode}; diff --git a/tests/tests.rs b/tests/tests.rs index 26b43cb..0fec0e4 100644 --- a/tests/tests.rs +++ b/tests/tests.rs @@ -1,6 +1,7 @@ extern crate abomonation; use abomonation::*; +use abomonation::align::AlignedBytes; use std::fmt::Debug; // Test struct for the unsafe_abomonate macro @@ -135,10 +136,20 @@ fn test_multiple_encode_decode() { unsafe { encode(&vec![1,2,3], &mut bytes).unwrap(); } unsafe { encode(&"grawwwwrr".to_owned(), &mut bytes).unwrap(); } - let (t, r) = unsafe { decode::(&mut bytes) }.unwrap(); assert_eq!(*t, 0); - let (t, r) = unsafe { decode::(r) }.unwrap(); assert_eq!(*t, 7); - let (t, r) = unsafe { decode::>(r) }.unwrap(); assert_eq!(*t, vec![1,2,3]); - let (t, _r) = unsafe { decode::(r) }.unwrap(); assert_eq!(*t, "grawwwwrr".to_owned()); + let (t, r) = unsafe { decode::(&mut bytes) }.unwrap(); + assert_eq!(*t, 0); + + let mut r = AlignedBytes::::new(r); + let (t, r) = unsafe { decode::(&mut r) }.unwrap(); + assert_eq!(*t, 7); + + let mut r = AlignedBytes::>::new(r); + let (t, r) = unsafe { decode::>(&mut r) }.unwrap(); + assert_eq!(*t, vec![1,2,3]); + + let mut r = AlignedBytes::::new(r); + let (t, _r) = unsafe { decode::(&mut r) }.unwrap(); + assert_eq!(*t, "grawwwwrr".to_owned()); } #[test]