diff --git a/.github/workflows/audit.yml b/.github/workflows/audit.yml index d947bc1..df9f423 100644 --- a/.github/workflows/audit.yml +++ b/.github/workflows/audit.yml @@ -8,12 +8,19 @@ on: jobs: audit: - name: Cargo Audit runs-on: ubuntu-latest + steps: - name: Checkout sources - uses: actions/checkout@v1 + uses: actions/checkout@v4 + - name: Run cargo audit - uses: actions-rs/audit-check@v1 + uses: rustsec/audit-check@v1.4.1 with: token: ${{ secrets.GITHUB_TOKEN }} + + permissions: + issues: write + issues-reason: to create issues + checks: write + checks-reason: to create check diff --git a/.github/workflows/check.yml b/.github/workflows/check.yml index 74415f9..a9a9098 100644 --- a/.github/workflows/check.yml +++ b/.github/workflows/check.yml @@ -11,17 +11,13 @@ jobs: os: [ubuntu-latest, windows-latest] steps: - name: Checkout sources - uses: actions/checkout@v2 + uses: actions/checkout@v4 - name: Install stable toolchain - uses: actions-rs/toolchain@v1 - with: - profile: minimal - toolchain: stable - override: true + uses: dtolnay/rust-toolchain@stable - name: Run cargo check - uses: actions-rs/cargo@v1 + uses: clechasseur/rs-cargo@v2 with: command: check @@ -33,17 +29,13 @@ jobs: os: [ubuntu-latest, windows-latest] steps: - name: Checkout sources - uses: actions/checkout@v2 + uses: actions/checkout@v4 - name: Install nightly toolchain - uses: actions-rs/toolchain@v1 - with: - profile: minimal - toolchain: nightly - override: true + uses: dtolnay/rust-toolchain@nightly - name: Run cargo test - uses: actions-rs/cargo@v1 + uses: clechasseur/rs-cargo@v2 with: command: test args: --no-default-features --features dev @@ -56,17 +48,13 @@ jobs: os: [ubuntu-latest, windows-latest] steps: - name: Checkout sources - uses: actions/checkout@v2 + uses: actions/checkout@v4 - name: Install nightly toolchain - uses: actions-rs/toolchain@v1 - with: - profile: minimal - toolchain: nightly - override: true + uses: dtolnay/rust-toolchain@nightly - name: Run cargo test - uses: actions-rs/cargo@v1 + uses: clechasseur/rs-cargo@v2 with: command: test args: --all-features @@ -78,17 +66,13 @@ jobs: runs-on: ubuntu-latest steps: - name: Checkout sources - uses: actions/checkout@v2 + uses: actions/checkout@v4 - name: Install nightly toolchain - uses: actions-rs/toolchain@v1 - with: - profile: minimal - toolchain: nightly - override: true + uses: dtolnay/rust-toolchain@nightly - name: Run cargo doc - uses: actions-rs/cargo@v1 + uses: clechasseur/rs-cargo@v2 with: command: doc args: --all-features @@ -98,18 +82,15 @@ jobs: runs-on: ubuntu-latest steps: - name: Checkout sources - uses: actions/checkout@v2 + uses: actions/checkout@v4 - name: Install nightly toolchain - uses: actions-rs/toolchain@v1 + uses: dtolnay/rust-toolchain@nightly with: - profile: minimal - toolchain: nightly - override: true components: rustfmt - name: Run cargo fmt - uses: actions-rs/cargo@v1 + uses: clechasseur/rs-cargo@v2 with: command: fmt args: -- --check @@ -119,16 +100,16 @@ jobs: runs-on: ubuntu-latest steps: - name: Checkout sources - uses: actions/checkout@v2 + uses: actions/checkout@v4 + - name: Install nightly toolchain - uses: actions-rs/toolchain@v1 + uses: dtolnay/rust-toolchain@nightly with: - toolchain: nightly components: clippy - override: true - - uses: actions-rs/clippy-check@v1 + + - uses: clechasseur/rs-cargo@v2 with: - token: ${{ secrets.GITHUB_TOKEN }} + command: clippy args: --all-features miri: @@ -136,17 +117,15 @@ jobs: runs-on: ubuntu-latest steps: - name: Checkout sources - uses: actions/checkout@v2 + uses: actions/checkout@v4 - name: Get latest toolchain version with miri run: echo "TOOLCHAIN=$(curl -s https://rust-lang.github.io/rustup-components-history/x86_64-unknown-linux-gnu/miri)" >> $GITHUB_ENV - name: Install latest nightly toolchain with miri - uses: actions-rs/toolchain@v1 + uses: dtolnay/rust-toolchain@master with: - profile: minimal toolchain: nightly-${{ env.TOOLCHAIN }} - override: true components: rust-src, miri - name: Run cargo miri test @@ -154,4 +133,4 @@ jobs: env: MIRI_LOG: 1 MIRI_BACKTRACE: 1 - MIRIFLAGS: -Zmiri-strict-provenance -Zmiri-check-number-validity + MIRIFLAGS: -Zmiri-strict-provenance diff --git a/.github/workflows/codecov.yml b/.github/workflows/codecov.yml index a6c10e6..3cbc4f3 100644 --- a/.github/workflows/codecov.yml +++ b/.github/workflows/codecov.yml @@ -3,29 +3,22 @@ name: Code Coverage on: [push, pull_request] jobs: - code_cov: - name: Code Coverage Tarpaulin + Codecov + test: + name: coverage runs-on: ubuntu-latest + container: + image: xd009642/tarpaulin:develop-nightly + options: --security-opt seccomp=unconfined steps: - name: Checkout repository - uses: actions/checkout@v2 + uses: actions/checkout@v4 - - name: Install nightly toolchain - uses: actions-rs/toolchain@v1 - with: - toolchain: nightly - override: true - - - name: Run cargo-tarpaulin - uses: actions-rs/tarpaulin@v0.1 - with: - args: --ignore-tests --all-features - out-type: Xml - run-types: AllTargets - env: - RUSTFLAGS: "-Ctarget-cpu=native" + - name: Generate code coverage + run: | + cargo +nightly tarpaulin --verbose --all-features --workspace --timeout 120 --out xml - name: Upload to codecov.io - uses: codecov/codecov-action@v2 + uses: codecov/codecov-action@v4 with: - files: ./cobertura.xml + token: ${{secrets.CODECOV_TOKEN}} + fail_ci_if_error: true diff --git a/Cargo.toml b/Cargo.toml index e9aa735..0824851 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -18,8 +18,8 @@ all-features = true [features] default = ["std"] -std = ["thiserror", "winapi/std"] -nightly_allocator_api = [] +std = ["dep:thiserror", "allocator-api2/std"] +nightly_allocator_api = ["allocator-api2/nightly"] nightly_core_intrinsics = [] # even though stabilised, still necessary for avx512 nightly_stdsimd = [] @@ -33,20 +33,23 @@ nightly = [ "nightly_strict_provenance", ] # required features to run tests; additional features enable more tests -dev = ["std", "nightly_allocator_api"] +dev = ["std"] [dependencies] +allocator-api2 = { version = "0.2", default-features = false } cfg-if = "1.0" -libc = "0.2" mirai-annotations = "1.12" sptr = "0.3" -thiserror = {version = "1.0", optional = true} +thiserror = { version = "1.0", optional = true } + +[target.'cfg(unix)'.dependencies] +rustix = { version = "0.38", features = ["mm", "param"] } [target.'cfg(windows)'.dependencies] -winapi = {version = "0.3.9", features = ["impl-default", "memoryapi", "sysinfoapi"]} +windows = { version = "0.56.0", features = ["Win32_System_SystemInformation", "Win32_System_Memory"] } [dev-dependencies] -criterion = "0.3" +criterion = "0.5" [[bench]] name = "bench_zeroizers" diff --git a/benches/bench_zeroizers.rs b/benches/bench_zeroizers.rs index 2c62483..47ac729 100644 --- a/benches/bench_zeroizers.rs +++ b/benches/bench_zeroizers.rs @@ -1,13 +1,6 @@ use criterion::{criterion_group, criterion_main, Criterion}; -#[cfg(all(target_arch = "x86_64", target_feature = "ermsb"))] -use secmem_alloc::zeroize::AsmRepStosZeroizer; -#[cfg(all(target_arch = "x86_64", target_feature = "avx"))] -use secmem_alloc::zeroize::X86_64AvxZeroizer; -#[cfg(all(target_arch = "x86_64", target_feature = "sse2"))] -use secmem_alloc::zeroize::X86_64Sse2Zeroizer; use secmem_alloc::zeroize::{ - LibcZeroizer, MemZeroizer, VolatileMemsetZeroizer, VolatileWrite8Zeroizer, - VolatileWriteZeroizer, + MemZeroizer, MemsetAsmBarierZeroizer, VolatileMemsetZeroizer, VolatileWrite8Zeroizer, }; fn zeroize_b127(z: Z, array: &mut [u8; 127]) { @@ -50,21 +43,9 @@ macro_rules! bench_zeroizers { $cgroup.bench_function("VolatileMemsetZeroizer", |b| { b.iter(|| $bench_function(VolatileMemsetZeroizer, &mut $array.0)) }); - $cgroup.bench_function("LibcZeroizer", |b| { - b.iter(|| $bench_function(LibcZeroizer, &mut $array.0)) + $cgroup.bench_function("MemsetAsmBarierZeroizer", |b| { + b.iter(|| $bench_function(MemsetAsmBarierZeroizer, &mut $array.0)) }); - $cgroup.bench_function("VolatileWriteZeroizer", |b| { - b.iter(|| $bench_function(VolatileWriteZeroizer, &mut $array.0)) - }); - $cgroup.bench_function("VolatileWrite8Zeroizer", |b| { - b.iter(|| $bench_function(VolatileWrite8Zeroizer, &mut $array.0)) - }); - #[cfg(all(target_arch = "x86_64", target_feature = "sse2"))] - { - $cgroup.bench_function("X86_64Sse2Zeroizer", |b| { - b.iter(|| $bench_function(X86_64Sse2Zeroizer, &mut $array.0)) - }); - } #[cfg(all(target_arch = "x86_64", target_feature = "avx"))] { $cgroup.bench_function("X86_64AvxZeroizer", |b| { diff --git a/src/allocator_api.rs b/src/allocator_api.rs deleted file mode 100644 index 825054e..0000000 --- a/src/allocator_api.rs +++ /dev/null @@ -1,437 +0,0 @@ -//! Nightly allocator api, code copied from the standard library. -// Copyright (c) 2021 rust standard library contributors -// Dual-licensed under Apache 2.0 and MIT licenses -// Code is slightly modified to make it work on stable rust. - -use crate::util::nonnull_as_mut_ptr; -use core::alloc::Layout; -use core::fmt; -use core::ptr::{self, NonNull}; -use mirai_annotations::debug_checked_precondition; - -/// The `AllocError` error indicates an allocation failure -/// that may be due to resource exhaustion or to -/// something wrong when combining the given input arguments with this -/// allocator. -#[derive(Copy, Clone, PartialEq, Eq, Debug)] -pub struct AllocError; - -impl fmt::Display for AllocError { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - f.write_str("memory allocation failed") - } -} - -/// An implementation of `Allocator` can allocate, grow, shrink, and deallocate -/// arbitrary blocks of data described via [`Layout`][]. -/// -/// `Allocator` is designed to be implemented on ZSTs, references, or smart -/// pointers because having an allocator like `MyAlloc([u8; N])` cannot be -/// moved, without updating the pointers to the allocated memory. -/// -/// Unlike [`GlobalAlloc`][], zero-sized allocations are allowed in `Allocator`. -/// If an underlying allocator does not support this (like jemalloc) or return a -/// null pointer (such as `libc::malloc`), this must be caught by the -/// implementation. -/// -/// ### Currently allocated memory -/// -/// Some of the methods require that a memory block be *currently allocated* via -/// an allocator. This means that: -/// -/// * the starting address for that memory block was previously returned by -/// [`allocate`], [`grow`], or [`shrink`], and -/// -/// * the memory block has not been subsequently deallocated, where blocks are -/// either deallocated directly by being passed to [`deallocate`] or were -/// changed by being passed to [`grow`] or [`shrink`] that returns `Ok`. If -/// `grow` or `shrink` have returned `Err`, the passed pointer remains valid. -/// -/// [`allocate`]: Allocator::allocate -/// [`grow`]: Allocator::grow -/// [`shrink`]: Allocator::shrink -/// [`deallocate`]: Allocator::deallocate -/// -/// ### Memory fitting -/// -/// Some of the methods require that a layout *fit* a memory block. What it -/// means for a layout to "fit" a memory block means (or equivalently, for a -/// memory block to "fit" a layout) is that the following conditions must hold: -/// -/// * The block must be allocated with the same alignment as [`layout.align()`], -/// and -/// -/// * The provided [`layout.size()`] must fall in the range `min ..= max`, -/// where: -/// - `min` is the size of the layout most recently used to allocate the -/// block, and -/// - `max` is the latest actual size returned from [`allocate`], [`grow`], or -/// [`shrink`]. -/// -/// [`layout.align()`]: Layout::align -/// [`layout.size()`]: Layout::size -/// -/// # Safety -/// -/// * Memory blocks returned from an allocator must point to valid memory and -/// retain their validity until the instance and all of its clones are -/// dropped, -/// -/// * cloning or moving the allocator must not invalidate memory blocks returned -/// from this allocator. A cloned allocator must behave like the same -/// allocator, and -/// -/// * any pointer to a memory block which is [*currently allocated*] may be -/// passed to any other method of the allocator. -/// -/// [*currently allocated*]: #currently-allocated-memory -pub unsafe trait Allocator { - /// Attempts to allocate a block of memory. - /// - /// On success, returns a [`NonNull<[u8]>`][NonNull] meeting the size and - /// alignment guarantees of `layout`. - /// - /// The returned block may have a larger size than specified by - /// `layout.size()`, and may or may not have its contents initialized. - /// - /// # Errors - /// - /// Returning `Err` indicates that either memory is exhausted or `layout` - /// does not meet allocator's size or alignment constraints. - /// - /// Implementations are encouraged to return `Err` on memory exhaustion - /// rather than panicking or aborting, but this is not a strict - /// requirement. (Specifically: it is *legal* to implement this trait - /// atop an underlying native allocation library that aborts on memory - /// exhaustion.) - /// - /// Clients wishing to abort computation in response to an allocation error - /// are encouraged to call the [`handle_alloc_error`] function, rather - /// than directly invoking `panic!` or similar. - /// - /// [`handle_alloc_error`]: ../../alloc/alloc/fn.handle_alloc_error.html - fn allocate(&self, layout: Layout) -> Result, AllocError>; - - /// Behaves like `allocate`, but also ensures that the returned memory is - /// zero-initialized. - /// - /// # Errors - /// - /// Returning `Err` indicates that either memory is exhausted or `layout` - /// does not meet allocator's size or alignment constraints. - /// - /// Implementations are encouraged to return `Err` on memory exhaustion - /// rather than panicking or aborting, but this is not a strict - /// requirement. (Specifically: it is *legal* to implement this trait - /// atop an underlying native allocation library that aborts on memory - /// exhaustion.) - /// - /// Clients wishing to abort computation in response to an allocation error - /// are encouraged to call the [`handle_alloc_error`] function, rather - /// than directly invoking `panic!` or similar. - /// - /// [`handle_alloc_error`]: ../../alloc/alloc/fn.handle_alloc_error.html - fn allocate_zeroed(&self, layout: Layout) -> Result, AllocError> { - let ptr = self.allocate(layout)?; - // SAFETY: `alloc` returns a valid memory block - unsafe { nonnull_as_mut_ptr(ptr).write_bytes(0, ptr.as_ref().len()) } - Ok(ptr) - } - - /// Deallocates the memory referenced by `ptr`. - /// - /// # Safety - /// - /// * `ptr` must denote a block of memory [*currently allocated*] via this - /// allocator, and - /// * `layout` must [*fit*] that block of memory. - /// - /// [*currently allocated*]: #currently-allocated-memory - /// [*fit*]: #memory-fitting - unsafe fn deallocate(&self, ptr: NonNull, layout: Layout); - - /// Attempts to extend the memory block. - /// - /// Returns a new [`NonNull<[u8]>`][NonNull] containing a pointer and the - /// actual size of the allocated memory. The pointer is suitable for - /// holding data described by `new_layout`. To accomplish - /// this, the allocator may extend the allocation referenced by `ptr` to fit - /// the new layout. - /// - /// If this returns `Ok`, then ownership of the memory block referenced by - /// `ptr` has been transferred to this allocator. The memory may or may - /// not have been freed, and should be considered unusable unless it was - /// transferred back to the caller again via the return value - /// of this method. - /// - /// If this method returns `Err`, then ownership of the memory block has not - /// been transferred to this allocator, and the contents of the memory - /// block are unaltered. - /// - /// # Safety - /// - /// * `ptr` must denote a block of memory [*currently allocated*] via this - /// allocator. - /// * `old_layout` must [*fit*] that block of memory (The `new_layout` - /// argument need not fit it.). - /// * `new_layout.size()` must be greater than or equal to - /// `old_layout.size()`. - /// - /// [*currently allocated*]: #currently-allocated-memory - /// [*fit*]: #memory-fitting - /// - /// # Errors - /// - /// Returns `Err` if the new layout does not meet the allocator's size and - /// alignment constraints of the allocator, or if growing otherwise - /// fails. - /// - /// Implementations are encouraged to return `Err` on memory exhaustion - /// rather than panicking or aborting, but this is not a strict - /// requirement. (Specifically: it is *legal* to implement this trait - /// atop an underlying native allocation library that aborts on memory - /// exhaustion.) - /// - /// Clients wishing to abort computation in response to an allocation error - /// are encouraged to call the [`handle_alloc_error`] function, rather - /// than directly invoking `panic!` or similar. - /// - /// [`handle_alloc_error`]: ../../alloc/alloc/fn.handle_alloc_error.html - unsafe fn grow( - &self, - ptr: NonNull, - old_layout: Layout, - new_layout: Layout, - ) -> Result, AllocError> { - debug_checked_precondition!( - new_layout.size() >= old_layout.size(), - "`new_layout.size()` must be greater than or equal to `old_layout.size()`" - ); - - let new_ptr = self.allocate(new_layout)?; - - // SAFETY: because `new_layout.size()` must be greater than or equal to - // `old_layout.size()`, both the old and new memory allocation are valid for - // reads and writes for `old_layout.size()` bytes. Also, because the old - // allocation wasn't yet deallocated, it cannot overlap `new_ptr`. Thus, - // the call to `copy_nonoverlapping` is safe. The safety contract for - // `dealloc` must be upheld by the caller. - unsafe { - ptr::copy_nonoverlapping(ptr.as_ptr(), nonnull_as_mut_ptr(new_ptr), old_layout.size()); - self.deallocate(ptr, old_layout); - } - - Ok(new_ptr) - } - - /// Behaves like `grow`, but also ensures that the new contents are set to - /// zero before being returned. - /// - /// The memory block will contain the following contents after a successful - /// call to `grow_zeroed`: - /// * Bytes `0..old_layout.size()` are preserved from the original - /// allocation. - /// * Bytes `old_layout.size()..old_size` will either be preserved or - /// zeroed, depending on the allocator implementation. `old_size` refers - /// to the size of the memory block prior to the `grow_zeroed` call, - /// which may be larger than the size that was originally requested when - /// it was allocated. - /// * Bytes `old_size..new_size` are zeroed. `new_size` refers to the size - /// of the memory block returned by the `grow_zeroed` call. - /// - /// # Safety - /// - /// * `ptr` must denote a block of memory [*currently allocated*] via this - /// allocator. - /// * `old_layout` must [*fit*] that block of memory (The `new_layout` - /// argument need not fit it.). - /// * `new_layout.size()` must be greater than or equal to - /// `old_layout.size()`. - /// - /// [*currently allocated*]: #currently-allocated-memory - /// [*fit*]: #memory-fitting - /// - /// # Errors - /// - /// Returns `Err` if the new layout does not meet the allocator's size and - /// alignment constraints of the allocator, or if growing otherwise - /// fails. - /// - /// Implementations are encouraged to return `Err` on memory exhaustion - /// rather than panicking or aborting, but this is not a strict - /// requirement. (Specifically: it is *legal* to implement this trait - /// atop an underlying native allocation library that aborts on memory - /// exhaustion.) - /// - /// Clients wishing to abort computation in response to an allocation error - /// are encouraged to call the [`handle_alloc_error`] function, rather - /// than directly invoking `panic!` or similar. - /// - /// [`handle_alloc_error`]: ../../alloc/alloc/fn.handle_alloc_error.html - unsafe fn grow_zeroed( - &self, - ptr: NonNull, - old_layout: Layout, - new_layout: Layout, - ) -> Result, AllocError> { - debug_checked_precondition!( - new_layout.size() >= old_layout.size(), - "`new_layout.size()` must be greater than or equal to `old_layout.size()`" - ); - - let new_ptr = self.allocate_zeroed(new_layout)?; - - // SAFETY: because `new_layout.size()` must be greater than or equal to - // `old_layout.size()`, both the old and new memory allocation are valid for - // reads and writes for `old_layout.size()` bytes. Also, because the old - // allocation wasn't yet deallocated, it cannot overlap `new_ptr`. Thus, - // the call to `copy_nonoverlapping` is safe. The safety contract for - // `dealloc` must be upheld by the caller. - unsafe { - ptr::copy_nonoverlapping(ptr.as_ptr(), nonnull_as_mut_ptr(new_ptr), old_layout.size()); - self.deallocate(ptr, old_layout); - } - - Ok(new_ptr) - } - - /// Attempts to shrink the memory block. - /// - /// Returns a new [`NonNull<[u8]>`][NonNull] containing a pointer and the - /// actual size of the allocated memory. The pointer is suitable for - /// holding data described by `new_layout`. To accomplish - /// this, the allocator may shrink the allocation referenced by `ptr` to fit - /// the new layout. - /// - /// If this returns `Ok`, then ownership of the memory block referenced by - /// `ptr` has been transferred to this allocator. The memory may or may - /// not have been freed, and should be considered unusable unless it was - /// transferred back to the caller again via the return value - /// of this method. - /// - /// If this method returns `Err`, then ownership of the memory block has not - /// been transferred to this allocator, and the contents of the memory - /// block are unaltered. - /// - /// # Safety - /// - /// * `ptr` must denote a block of memory [*currently allocated*] via this - /// allocator. - /// * `old_layout` must [*fit*] that block of memory (The `new_layout` - /// argument need not fit it.). - /// * `new_layout.size()` must be smaller than or equal to - /// `old_layout.size()`. - /// - /// [*currently allocated*]: #currently-allocated-memory - /// [*fit*]: #memory-fitting - /// - /// # Errors - /// - /// Returns `Err` if the new layout does not meet the allocator's size and - /// alignment constraints of the allocator, or if shrinking otherwise - /// fails. - /// - /// Implementations are encouraged to return `Err` on memory exhaustion - /// rather than panicking or aborting, but this is not a strict - /// requirement. (Specifically: it is *legal* to implement this trait - /// atop an underlying native allocation library that aborts on memory - /// exhaustion.) - /// - /// Clients wishing to abort computation in response to an allocation error - /// are encouraged to call the [`handle_alloc_error`] function, rather - /// than directly invoking `panic!` or similar. - /// - /// [`handle_alloc_error`]: ../../alloc/alloc/fn.handle_alloc_error.html - unsafe fn shrink( - &self, - ptr: NonNull, - old_layout: Layout, - new_layout: Layout, - ) -> Result, AllocError> { - debug_checked_precondition!( - new_layout.size() <= old_layout.size(), - "`new_layout.size()` must be smaller than or equal to `old_layout.size()`" - ); - - let new_ptr = self.allocate(new_layout)?; - - // SAFETY: because `new_layout.size()` must be lower than or equal to - // `old_layout.size()`, both the old and new memory allocation are valid for - // reads and writes for `new_layout.size()` bytes. Also, because the old - // allocation wasn't yet deallocated, it cannot overlap `new_ptr`. Thus, - // the call to `copy_nonoverlapping` is safe. The safety contract for - // `dealloc` must be upheld by the caller. - unsafe { - ptr::copy_nonoverlapping(ptr.as_ptr(), nonnull_as_mut_ptr(new_ptr), new_layout.size()); - self.deallocate(ptr, old_layout); - } - - Ok(new_ptr) - } - - /// Creates a "by reference" adaptor for this instance of `Allocator`. - /// - /// The returned adaptor also implements `Allocator` and will simply borrow - /// this. - #[inline(always)] - fn by_ref(&self) -> &Self - where - Self: Sized, - { - self - } -} - -unsafe impl Allocator for &A -where - A: Allocator + ?Sized, -{ - #[inline] - fn allocate(&self, layout: Layout) -> Result, AllocError> { - (**self).allocate(layout) - } - - #[inline] - fn allocate_zeroed(&self, layout: Layout) -> Result, AllocError> { - (**self).allocate_zeroed(layout) - } - - #[inline] - unsafe fn deallocate(&self, ptr: NonNull, layout: Layout) { - // SAFETY: the safety contract must be upheld by the caller - unsafe { (**self).deallocate(ptr, layout) } - } - - #[inline] - unsafe fn grow( - &self, - ptr: NonNull, - old_layout: Layout, - new_layout: Layout, - ) -> Result, AllocError> { - // SAFETY: the safety contract must be upheld by the caller - unsafe { (**self).grow(ptr, old_layout, new_layout) } - } - - #[inline] - unsafe fn grow_zeroed( - &self, - ptr: NonNull, - old_layout: Layout, - new_layout: Layout, - ) -> Result, AllocError> { - // SAFETY: the safety contract must be upheld by the caller - unsafe { (**self).grow_zeroed(ptr, old_layout, new_layout) } - } - - #[inline] - unsafe fn shrink( - &self, - ptr: NonNull, - old_layout: Layout, - new_layout: Layout, - ) -> Result, AllocError> { - // SAFETY: the safety contract must be upheld by the caller - unsafe { (**self).shrink(ptr, old_layout, new_layout) } - } -} diff --git a/src/boxed.rs b/src/boxed.rs deleted file mode 100644 index 07c978f..0000000 --- a/src/boxed.rs +++ /dev/null @@ -1,294 +0,0 @@ -//! Module providing a simple replacement for [`std::boxed::Box`] with allocator -//! support. -//! -//! # Motivation -//! - The allocator api of [`std::boxed::Box`] is still unstable (at the time of -//! writing). The [`Box`] provided by this module can be used on stable with -//! allocators. -//! - [Issue #78459](https://github.com/rust-lang/rust/issues/78459) prevents -//! the use of non-zero sized allocators with [`std::boxed::Box`] even on -//! nightly. -// some code and documentation is copied from the standard library - -use crate::allocator_api::{AllocError, Allocator}; -use alloc::alloc::handle_alloc_error; -use core::alloc::Layout; -use core::marker::PhantomData; -use core::mem::{ManuallyDrop, MaybeUninit}; -use core::ops::{Deref, DerefMut}; -use core::ptr::NonNull; - -/// A replacement for [`std::boxed::Box`] that works with custom allocators. -/// -/// See the module-level documentation for more. -pub struct Box { - /// Pointer to the inner value, allocated with `self.alloc`. - // Safety: must always point to a valid instance of `T`. - ptr: NonNull, - // we own an instance of type `T` - _phantom_heapmem: PhantomData, - /// Allocator used for heap allocation - alloc: A, -} - -impl Box { - /// Create [`Box`] from a pointer and an allocator. - /// - /// # Safety - /// - `ptr` has to be allocated using the allocator `alloc` (and not yet - /// deallocated) - /// - `ptr` must point to a valid instance of `T` (otherwise using e.g. - /// [`Deref::deref`] on the resulting [`Box`] is unsound) - /// - in particular `ptr` must point to an allocation that fits - /// `Layout::for_value(*ptr)` - unsafe fn from_raw_parts(ptr: NonNull, alloc: A) -> Self { - Self { - ptr, - alloc, - _phantom_heapmem: PhantomData::, - } - } - - /// Destruct a [`Box`] into the pointer and allocator without dropping the - /// [`Box`]. - fn into_raw_parts(self) -> (NonNull, A) { - let ptr = self.ptr; - let me = ManuallyDrop::new(self); - let alloc_ptr = &me.deref().alloc as *const A; - // SAFETY: `alloc_ptr` is valid for reads, properly aligned, initialised... - // SAFETY: the contents of `me` are never dropped so `alloc` can be safely - // dropped later - let alloc = unsafe { alloc_ptr.read() }; - (ptr, alloc) - } -} - -// documentation and implementations copied from the standard library -// Copyright (c) 2021 rust standard library contributors -// slight modifications to accomodate for missing APIs, different `Box` -// definition -impl Box { - /// Allocates memory in the given allocator then places `x` into it. - /// - /// This doesn't actually allocate if `T` is zero-sized. - /// - /// # Examples - /// - /// ``` - /// #![feature(allocator_api)] - /// - /// use secmem_alloc::boxed::Box; - /// use std::alloc::System; - /// - /// let five = Box::new_in(5, System); - /// ``` - #[inline] - pub fn new_in(x: T, alloc: A) -> Self { - let mut boxed = Self::new_uninit_in(alloc); - unsafe { - boxed.as_mut_ptr().write(x); - boxed.assume_init() - } - } - - /// Allocates memory in the given allocator then places `x` into it, - /// returning an error if the allocation fails - /// - /// This doesn't actually allocate if `T` is zero-sized. - /// - /// # Examples - /// - /// ``` - /// #![feature(allocator_api)] - /// - /// use secmem_alloc::boxed::Box; - /// use std::alloc::System; - /// - /// let five = Box::try_new_in(5, System)?; - /// # Ok::<(), core::alloc::AllocError>(()) - /// ``` - #[inline] - pub fn try_new_in(x: T, alloc: A) -> Result { - let mut boxed = Self::try_new_uninit_in(alloc)?; - unsafe { - boxed.as_mut_ptr().write(x); - Ok(boxed.assume_init()) - } - } - - /// Constructs a new box with uninitialized contents in the provided - /// allocator. - /// - /// # Examples - /// - /// ``` - /// #![feature(allocator_api)] - /// - /// use secmem_alloc::boxed::Box; - /// use std::alloc::System; - /// - /// let mut five = Box::::new_uninit_in(System); - /// - /// let five = unsafe { - /// // Deferred initialization: - /// five.as_mut_ptr().write(5); - /// - /// five.assume_init() - /// }; - /// - /// assert_eq!(*five, 5) - /// ``` - pub fn new_uninit_in(alloc: A) -> Box, A> { - let layout = Layout::new::>(); - // NOTE: Prefer match over unwrap_or_else since closure sometimes not - // inlineable. That would make code size bigger. - match Box::try_new_uninit_in(alloc) { - Ok(m) => m, - Err(_) => handle_alloc_error(layout), - } - } - - /// Constructs a new box with uninitialized contents in the provided - /// allocator, returning an error if the allocation fails - /// - /// # Examples - /// - /// ``` - /// #![feature(allocator_api)] - /// - /// use secmem_alloc::boxed::Box; - /// use std::alloc::System; - /// - /// let mut five = Box::::try_new_uninit_in(System)?; - /// - /// let five = unsafe { - /// // Deferred initialization: - /// five.as_mut_ptr().write(5); - /// - /// five.assume_init() - /// }; - /// - /// assert_eq!(*five, 5); - /// # Ok::<(), core::alloc::AllocError>(()) - /// ``` - pub fn try_new_uninit_in(alloc: A) -> Result, A>, AllocError> { - let layout = Layout::new::>(); - let ptr: NonNull> = alloc.allocate(layout)?.cast(); - unsafe { Ok(Box::from_raw_parts(ptr, alloc)) } - } -} - -// documentation and implementations copied from the standard library -// Copyright (c) 2021 rust standard library contributors -// slight modifications to accomodate for missing APIs, different `Box` -// definition -impl Box, A> { - /// Converts to `Box`. - /// - /// # Safety - /// - /// As with [`MaybeUninit::assume_init`], - /// it is up to the caller to guarantee that the value - /// really is in an initialized state. - /// Calling this when the content is not yet fully initialized - /// causes immediate undefined behavior. - /// - /// # Examples - /// - /// ``` - /// #![feature(allocator_api)] - /// - /// use secmem_alloc::boxed::Box; - /// use std::alloc::System; - /// - /// let mut five = Box::::new_uninit_in(System); - /// - /// let five: Box = unsafe { - /// // Deferred initialization: - /// five.as_mut_ptr().write(5); - /// - /// five.assume_init() - /// }; - /// - /// assert_eq!(*five, 5) - /// ``` - #[inline] - pub unsafe fn assume_init(self) -> Box { - let (ptr, alloc) = Box::into_raw_parts(self); - let ptr_init: NonNull = ptr.cast(); - unsafe { Box::from_raw_parts(ptr_init, alloc) } - } -} - -impl Deref for Box { - type Target = T; - - fn deref(&self) -> &T { - // SAFETY: `self.ptr` always points to a valid instance of `T` - unsafe { &*self.ptr.as_ptr() } - } -} - -impl DerefMut for Box { - fn deref_mut(&mut self) -> &mut T { - // SAFETY: `self.ptr` always points to a valid instance of `T` - unsafe { &mut *self.ptr.as_ptr() } - } -} - -impl Drop for Box { - fn drop(&mut self) { - // obtain the Layout of the value stored in this Box - let ref_to_inner: &T = self.deref(); - let layout = Layout::for_value::(ref_to_inner); - // `self.ptr` points to an allocation that fits `layout` - - // SAFETY: `self.ptr.as_ptr()` is valid for reads and writes, properly aligned - unsafe { - self.ptr.as_ptr().drop_in_place(); - } - // SAFETY: from now on it is unsound to dereference `self.ptr` (hence `self`) - - // deallocate memory - let ptr: NonNull = self.ptr.cast(); - // SAFETY: `self.ptr` was allocated with allocator `self.alloc` and fits - // `layout` - unsafe { - self.alloc.deallocate(ptr, layout); - } - // `self.ptr` is now dangling, but this is sound since `NonNull` is - // not `Drop` `self.alloc` is dropped automatically - } -} - -#[cfg(test)] -mod tests { - use super::Box; - use std::alloc::System; - use std::mem::MaybeUninit; - - #[test] - fn new_in() { - let boxed = Box::new_in([37; 256], System); - assert_eq!(*boxed, [37; 256]); - } - - #[test] - fn try_new_in() { - let boxed = Box::try_new_in([37; 256], System).expect("error creating box"); - assert_eq!(*boxed, [37; 256]); - } - - #[test] - fn uninit_initialise() { - let mut boxed: Box, System> = - Box::<[u8; 256], _>::new_uninit_in(System); - unsafe { - // initialise `boxed` - boxed.as_mut_ptr().write([37; 256]); - } - // SAFETY: `boxed` is now initialised - let boxed: Box<[u8; 256], System> = unsafe { boxed.assume_init() }; - assert_eq!(*boxed, [37; 256]); - } -} diff --git a/src/internals/mem.rs b/src/internals/mem.rs index 67f60cc..59735c4 100644 --- a/src/internals/mem.rs +++ b/src/internals/mem.rs @@ -1,91 +1,19 @@ //! Helper functions for allocating memory and working with memory pages. -#[cfg(unix)] -use core::ffi::c_void; use core::ptr::NonNull; -#[cfg(unix)] -use libc::{c_int, off_t, size_t}; -#[cfg(feature = "std")] -use thiserror::Error; -#[cfg(windows)] -use winapi::ctypes::c_void; - -/// Return the page size on the running system. -/// -/// Is constant during the entire execution of a process. -// TODO: should we store the page size in a static to avoid repeat FFI calls to -// get the page size? with cross language LTO and static libc linking that -// shouldn't be necessary -pub fn page_size() -> usize { - get_sys_page_size() -} - -cfg_if::cfg_if! { - if #[cfg(miri)] { - /// Page size shim for miri. - #[cfg(not(tarpaulin_include))] - fn get_sys_page_size() -> usize { - 4096 - } - } else if #[cfg(unix)] { - /// Return the page size on the running system by querying libc. - fn get_sys_page_size() -> usize { - unsafe { - // the pagesize must always fit in a `size_t` (`usize`) - #[allow(clippy::cast_possible_truncation, clippy::cast_sign_loss)] - { - libc::sysconf(libc::_SC_PAGESIZE) as size_t - } - } - } - } else if #[cfg(windows)] { - /// Return the page size on the running system by querying kernel32.lib. - fn get_sys_page_size() -> usize { - use winapi::um::sysinfoapi::{LPSYSTEM_INFO, GetSystemInfo, SYSTEM_INFO}; - - let mut sysinfo = SYSTEM_INFO::default(); - let sysinfo_ptr: LPSYSTEM_INFO = &mut sysinfo as *mut SYSTEM_INFO; - // SAFETY: `sysinfo_ptr` points to a valid (empty/all zeros) `SYSTEM_INFO` - unsafe { - GetSystemInfo(sysinfo_ptr) - }; - // the pagesize must always fit in a `usize` (on windows it is a `u32`) - #[allow(clippy::cast_possible_truncation, clippy::cast_sign_loss)] - { - sysinfo.dwPageSize as usize - } - } - } -} - -/// Could not allocate a memory page. -#[derive(Debug, Clone)] -#[cfg_attr(feature = "std", derive(Error))] -#[cfg_attr(feature = "std", error("could not map a memory page"))] -pub struct PageAllocError; - -/// Could not mlock a range of pages. -#[derive(Debug, Clone)] -#[cfg_attr(feature = "std", derive(Error))] -#[cfg_attr( - feature = "std", - error("could not lock the memory page to physical memory") -)] -struct MemLockError; /// An single allocated page of memory. pub struct Page { /// Pointer to the start of the page. page_ptr: NonNull, - ///// This type owns a page of memory as raw bytes - //_phantom_pagemem: PhantomData<[u8]>, /// Size of a memory page. /// /// It is not strictly necessary to store this as it is constant during the /// entire execution of a process. This will therefore at all times /// equal the result of `page_size`. - // TODO: if we decide to store the page size in a static then this field can be removed page_size: usize, + /// This type owns a page of memory as raw bytes + _phantom_pagemem: core::marker::PhantomData<[u8]>, } impl Page { @@ -104,11 +32,6 @@ impl Page { self.page_ptr.as_ptr() } - /// Get a mutable pointer to the start of the memory page. - fn as_c_ptr_mut(&self) -> *mut c_void { - self.as_ptr_mut() as *mut c_void - } - /// Get a non-mutable pointer to the start of the memory page. pub fn as_ptr(&self) -> *const u8 { self.page_ptr.as_ptr() as *const u8 @@ -117,270 +40,13 @@ impl Page { cfg_if::cfg_if! { if #[cfg(miri)] { - // miri shim - #[cfg(not(tarpaulin_include))] - impl Drop for Page { - fn drop(&mut self) { - let ptr = self.as_c_ptr_mut(); - let page_size = self.page_size(); - unsafe { - // SAFETY: we allocated/mapped this page in the constructor so it is safe to - // unmap now `munmap` also unlocks a page if it was locked so it is - // not necessary to `munlock` the page if it was locked. - //libc::munmap(ptr, self.page_size()); - std::alloc::dealloc( - ptr as *mut u8, - std::alloc::Layout::from_size_align(page_size, page_size).unwrap(), - ); - } - // SAFETY: `NonNull` and `usize` both do not drop so we need not - // worry about subsequent drops - } - } + mod miri; + pub use miri::PageAllocError; } else if #[cfg(unix)] { - impl Drop for Page { - fn drop(&mut self) { - let ptr = self.as_c_ptr_mut(); - unsafe { - // SAFETY: we allocated/mapped this page in the constructor so it is safe to - // unmap now `munmap` also unlocks a page if it was locked so it is - // not necessary to `munlock` the page if it was locked. - libc::munmap(ptr, self.page_size()); - } - // SAFETY: `NonNull` and `usize` both do not drop so we need not - // worry about subsequent drops - } - } + mod unix; + pub use unix::PageAllocError; } else if #[cfg(windows)] { - impl Drop for Page { - fn drop(&mut self) { - use winapi::um::memoryapi::VirtualFree; - use winapi::um::winnt::MEM_RELEASE; - use winapi::shared::minwindef::LPVOID; - - let ptr: LPVOID = self.as_c_ptr_mut(); - unsafe { - // SAFETY: we allocated/mapped this page in the constructor so it is safe to - // unmap now - VirtualFree(ptr, 0, MEM_RELEASE); - } - // SAFETY: `NonNull` and `usize` both do not drop so we need not - // worry about subsequent drops - } - } - } -} - -cfg_if::cfg_if! { - if #[cfg(miri)] { - // miri shims, better than nothing but not very accurate - #[cfg(not(tarpaulin_include))] - impl Page { - fn alloc_new() -> Result { - let _addr: *mut c_void = core::ptr::null_mut(); - let page_size: size_t = page_size(); - let _prot: c_int = libc::PROT_READ | libc::PROT_WRITE; - // NORESERVE disables backing the memory map with swap space - let _flags = libc::MAP_PRIVATE | libc::MAP_NORESERVE | libc::MAP_ANONYMOUS; - let _fd: c_int = -1; - let _offset: off_t = 0; - - let page_ptr: *mut u8 = unsafe { - //libc::mmap(_addr, page_size, _prot, _flags, _fd, _offset) - std::alloc::alloc_zeroed( - std::alloc::Layout::from_size_align(page_size, page_size).unwrap(), - ) - }; - - if page_ptr.is_null() { - Err(PageAllocError) - } else { - let page_ptr = unsafe { - // SAFETY: we just checked that `page_ptr` is non-null - NonNull::new_unchecked(page_ptr as *mut u8) - }; - Ok(Self { - page_ptr, - page_size, - }) - } - } - - fn mlock(&mut self) -> Result<(), MemLockError> { - let res = { - //libc::mlock(self.as_c_ptr_mut(), self.page_size()) - let _ptr = self.as_c_ptr_mut(); - let _ps = self.page_size(); - 0 - }; - - if res == 0 { - Ok(()) - } else { - Err(MemLockError) - } - } - - pub fn alloc_new_lock() -> Result { - let mut page = Self::alloc_new()?; - // if this fails then `page` is deallocated by it's drop implementation - page.mlock().map_err(|_| PageAllocError)?; - Ok(page) - } - } - } else if #[cfg(unix)] { - impl Page { - /// Allocate a new page of memory using (anonymous) `mmap` with the - /// noreserve flag. - /// - /// The noreserve flag disables swapping of the memory page. As a - /// consequence, the OS may unmap the page of memory, in which case - /// writing to it causes a SIGSEGV. Therefore, the page - /// should be mlocked before actual use. - /// - /// # Errors - /// The function returns an `PageAllocError` if the `mmap` call fails. - fn alloc_new_noreserve() -> Result { - let addr: *mut c_void = core::ptr::null_mut(); - let page_size: size_t = page_size(); - let prot: c_int = libc::PROT_READ | libc::PROT_WRITE; - // NORESERVE disables backing the memory map with swap space - // it is not available (anymore) on FreeBSD/DragonFlyBSD (never implemented) - // also unimplemented on other BSDs, but the flag is there for compat... - // FreeBSD + DragonFlyBSD have a `MAP_NOCORE` flag which excludes this memory - // from being included in a core dump (but ideally, disable core dumps entirely) - cfg_if::cfg_if!{ - if #[cfg(any(target_os = "freebsd", target_os = "dragonfly"))] { - let flags = libc::MAP_PRIVATE | libc::MAP_ANONYMOUS | libc::MAP_NOCORE; - } else { - let flags = libc::MAP_PRIVATE | libc::MAP_NORESERVE | libc::MAP_ANONYMOUS; - } - } - - let fd: c_int = -1; - let offset: off_t = 0; - - let page_ptr: *mut c_void = unsafe { - libc::mmap(addr, page_size, prot, flags, fd, offset) - }; - - if page_ptr.is_null() || page_ptr == libc::MAP_FAILED { - Err(PageAllocError) - } else { - let page_ptr = unsafe { - // SAFETY: we just checked that `page_ptr` is non-null - NonNull::new_unchecked(page_ptr as *mut u8) - }; - Ok(Self { - page_ptr, - page_size, - }) - } - } - - /// Lock the memory page to physical memory. - /// - /// When this function returns successfully then the memory page is - /// guarantied to be backed by physical memory, i.e. not (only) swapped. - /// In combination with the noreserve flag during the allocation, this - /// guaranties the memory to not be swapped at all, except on hibernation - /// or memory starvation. This is really the best we can achieve. If memory - /// contents are really secret than there is no other solution than to - /// use a swap space encrypted with an ephemeral secret key, and - /// hibernation should be disabled (both on the OS level). - fn mlock(&mut self) -> Result<(), MemLockError> { - let res = unsafe { libc::mlock(self.as_c_ptr_mut(), self.page_size()) }; - - if res == 0 { - Ok(()) - } else { - Err(MemLockError) - } - } - - /// Allocate a new page of memory using (anonymous) `mmap` with the - /// noreserve flag and mlock page. - /// - /// The noreserve flag disables swapping of the memory page. The page is - /// then mlocked to force it into physical memory. - /// - /// # Errors - /// The function returns an `PageAllocError` if the `mmap` or `mlock` call - /// fails. - pub fn alloc_new_lock() -> Result { - let mut page = Self::alloc_new_noreserve()?; - page.mlock().map_err(|_| PageAllocError)?; - Ok(page) - } - } - } else if #[cfg(windows)] { - impl Page { - /// Allocate a new page of memory using `VirtualAlloc`. - /// - /// # Errors - /// The function returns an `PageAllocError` if the `VirtualAlloc` call fails. - fn alloc_new() -> Result { - use winapi::um::memoryapi::VirtualAlloc; - use winapi::um::winnt::{MEM_COMMIT, MEM_RESERVE, PAGE_READWRITE}; - use winapi::shared::{minwindef::{DWORD, LPVOID}, basetsd::SIZE_T}; - - let addr: LPVOID = core::ptr::null_mut(); - let page_size: SIZE_T = page_size(); - let alloc_type: DWORD = MEM_RESERVE | MEM_COMMIT; - let protect: DWORD = PAGE_READWRITE; - - let page_ptr: LPVOID = unsafe { - VirtualAlloc(addr, page_size, alloc_type, protect) - }; - - if page_ptr.is_null() { - Err(PageAllocError) - } else { - let page_ptr = unsafe { - // SAFETY: we just checked that `page_ptr` is non-null - NonNull::new_unchecked(page_ptr as *mut u8) - }; - Ok(Self { - page_ptr, - page_size, - }) - } - } - - /// Lock the memory page to physical memory. - /// - /// When this function returns successfully then the memory page is - /// guarantied to be backed by physical memory, i.e. not (only) swapped. - /// This guaranties the memory to not be swapped at all, except on hibernation - /// or memory starvation. This is really the best we can achieve. If memory - /// contents are really secret than there is no other solution than to - /// use a swap space encrypted with an ephemeral secret key, and - /// hibernation should be disabled (both on the OS level). - fn lock(&mut self) -> Result<(), MemLockError> { - use winapi::um::memoryapi::VirtualLock; - use winapi::shared::minwindef::BOOL; - - let res: BOOL = unsafe { VirtualLock(self.as_c_ptr_mut(), self.page_size()) }; - - if res == 0 { - Err(MemLockError) - } else { - Ok(()) - } - } - - /// Allocate a new page of memory using `VirtualAlloc` and `VirtualLock` page. - /// - /// The page is locked to force it into physical memory. - /// - /// # Errors - /// The function returns an `PageAllocError` if the `VirtualAlloc` or `VirtualLock` - /// call fails. - pub fn alloc_new_lock() -> Result { - let mut page = Self::alloc_new()?; - page.lock().map_err(|_| PageAllocError)?; - Ok(page) - } - } + mod windows; + pub use windows::PageAllocError; } } diff --git a/src/internals/mem/miri.rs b/src/internals/mem/miri.rs new file mode 100644 index 0000000..a287bf8 --- /dev/null +++ b/src/internals/mem/miri.rs @@ -0,0 +1,83 @@ +//! Miri shims for memory management. Not accurate, but better than nothing. + +use super::Page; +use core::ptr::NonNull; + +/// Page size shim for miri. +#[cfg(not(tarpaulin_include))] +pub fn page_size() -> usize { + 4096 +} + +#[derive(Debug, Clone)] +#[cfg_attr(feature = "std", derive(thiserror::Error))] +pub enum PageAllocError { + #[cfg_attr(feature = "std", error("trying to create invalid layout"))] + Layout(std::alloc::LayoutError), + #[cfg_attr(feature = "std", error("could not allocate memory"))] + Alloc, + #[cfg_attr(feature = "std", error("could not lock memory"))] + Lock, +} + +#[cfg(not(tarpaulin_include))] +impl Page { + fn alloc_new() -> Result { + let page_size = page_size(); + + //libc::mmap(_addr, page_size, _prot, _flags, _fd, _offset) + let layout = std::alloc::Layout::from_size_align(page_size, page_size) + .map_err(|e| PageAllocError::Layout(e))?; + let page_ptr: *mut u8 = unsafe { std::alloc::alloc_zeroed(layout) }; + + if page_ptr.is_null() { + Err(PageAllocError::Alloc) + } else { + let page_ptr = unsafe { + // SAFETY: we just checked that `page_ptr` is non-null + NonNull::new_unchecked(page_ptr as *mut u8) + }; + Ok(Self { + page_ptr, + page_size, + _phantom_pagemem: core::marker::PhantomData, + }) + } + } + + fn mlock(&mut self) -> Result<(), PageAllocError> { + let res = { + //libc::mlock(self.as_c_ptr_mut(), self.page_size()) + let _ptr = self.as_ptr_mut(); + let _ps = self.page_size(); + 0 + }; + + if res == 0 { + Ok(()) + } else { + Err(PageAllocError::Lock) + } + } + + pub fn alloc_new_lock() -> Result { + let mut page = Self::alloc_new()?; + // if this fails then `page` is deallocated by it's drop implementation + page.mlock()?; + Ok(page) + } +} + +#[cfg(not(tarpaulin_include))] +impl Drop for Page { + fn drop(&mut self) { + let ptr = self.as_ptr_mut(); + let page_size = self.page_size(); + + //libc::munmap(ptr, self.page_size()); + let layout = std::alloc::Layout::from_size_align(page_size, page_size).unwrap(); + // SAFETY: we allocated this page in the constructor so it is safe to deallocate + // now. + unsafe { std::alloc::dealloc(ptr, layout) }; + } +} diff --git a/src/internals/mem/unix.rs b/src/internals/mem/unix.rs new file mode 100644 index 0000000..b96f57a --- /dev/null +++ b/src/internals/mem/unix.rs @@ -0,0 +1,114 @@ +//! Unix `mmap` private anonymous memory pages. + +use super::Page; + +use core::ffi::c_void; +use core::ptr::NonNull; + +/// Return the page size on the running system using the `rustix` crate. +pub fn page_size() -> usize { + rustix::param::page_size() +} + +#[derive(Debug, Clone)] +#[cfg_attr(feature = "std", derive(thiserror::Error))] +pub enum PageAllocError { + #[cfg_attr(feature = "std", error("could not map a memory page: {0}"))] + Mmap(rustix::io::Errno), + #[cfg_attr(feature = "std", error("could not lock memory page: {0}"))] + Mlock(rustix::io::Errno), +} + +impl Page { + /// Get a mutable pointer to the start of the memory page. + fn as_c_ptr_mut(&self) -> *mut c_void { + self.as_ptr_mut() as *mut c_void + } + + /// Allocate a new page of memory using (anonymous) `mmap` with the + /// noreserve flag. + /// + /// The noreserve flag disables swapping of the memory page. As a + /// consequence, the OS may unmap the page of memory, in which case + /// writing to it causes a SIGSEGV. Therefore, the page + /// should be mlocked before actual use. + /// + /// # Errors + /// The function returns an `PageAllocError` if the `mmap` call fails. + fn alloc_new_noreserve() -> Result { + use rustix::mm::{MapFlags, ProtFlags}; + + let addr: *mut c_void = core::ptr::null_mut(); + let page_size = page_size(); + let prot = ProtFlags::READ | ProtFlags::WRITE; + // NORESERVE disables backing the memory map with swap space. It requires + // `mlock` to be used on the resulting page before use. Redox, FreeBSD + // and DragonFlyBSD don't have NORESERVE. Other BSDs also don't implement it, + // but it is available for compatibility. FreeBSD and DragonflyBSD have a NOCORE + // flag, which hides the page from core dumps (memory dumps when the process + // crashes). + cfg_if::cfg_if! { + if #[cfg(target_os = "redox")] { + let flags = MapFlags::PRIVATE; + } else if #[cfg(any(target_os = "freebsd", target_os = "dragonfly"))] { + let flags = MapFlags::PRIVATE | MapFlags::NOCORE; + } else { + let flags = MapFlags::PRIVATE | MapFlags::NORESERVE; + } + } + + let page_ptr: *mut c_void = + unsafe { rustix::mm::mmap_anonymous(addr, page_size, prot, flags) }?; + + // SAFETY: if `mmap` is successful, the result is non-zero + let page_ptr = unsafe { NonNull::new_unchecked(page_ptr as *mut u8) }; + Ok(Self { + page_ptr, + page_size, + _phantom_pagemem: core::marker::PhantomData, + }) + } + + /// Lock the memory page to physical memory. + /// + /// When this function returns successfully then the memory page is + /// guarantied to be backed by physical memory, i.e. not (only) swapped. + /// In combination with the noreserve flag during the allocation, this + /// guaranties the memory to not be swapped at all, except on hibernation + /// or memory starvation. This is really the best we can achieve. If memory + /// contents are really secret than there is no other solution than to + /// use a swap space encrypted with an ephemeral secret key, and + /// hibernation should be disabled (both on the OS level). + fn mlock(&mut self) -> Result<(), rustix::io::Errno> { + unsafe { rustix::mm::mlock(self.as_c_ptr_mut(), self.page_size()) } + } + + /// Allocate a new page of memory using (anonymous) `mmap` with the + /// noreserve flag and mlock page. + /// + /// The noreserve flag disables swapping of the memory page. The page is + /// then mlocked to force it into physical memory. + /// + /// # Errors + /// The function returns an `PageAllocError` if the `mmap` or `mlock` call + /// fails. + pub fn alloc_new_lock() -> Result { + let mut page = Self::alloc_new_noreserve().map_err(|e| PageAllocError::Mmap(e))?; + page.mlock().map_err(|e| PageAllocError::Mlock(e))?; + Ok(page) + } +} + +impl Drop for Page { + fn drop(&mut self) { + let ptr = self.as_c_ptr_mut(); + unsafe { + // SAFETY: we allocated/mapped this page in the constructor so it is safe to + // unmap now. `munmap` also unlocks a page if it was locked so it is + // not necessary to `munlock` the page if it was locked. + rustix::mm::munmap(ptr, self.page_size()).unwrap(); + } + // SAFETY: `NonNull` and `usize` both do not drop so we need not + // worry about subsequent drops + } +} diff --git a/src/internals/mem/windows.rs b/src/internals/mem/windows.rs new file mode 100644 index 0000000..930b5e5 --- /dev/null +++ b/src/internals/mem/windows.rs @@ -0,0 +1,110 @@ +//! Windows `VirtualAlloc` memory page allocation. + +use super::Page; + +use core::ffi::c_void; +use core::ptr::NonNull; + +/// Return the page size on the running system by querying kernel32.lib. +pub fn page_size() -> usize { + use windows::Win32::System::SystemInformation::{GetSystemInfo, SYSTEM_INFO}; + + let mut sysinfo = SYSTEM_INFO::default(); + let sysinfo_ptr = &mut sysinfo as *mut SYSTEM_INFO; + // SAFETY: `sysinfo_ptr` points to a valid (empty/all zeros) `SYSTEM_INFO` + unsafe { GetSystemInfo(sysinfo_ptr) }; + // the pagesize must always fit in a `usize` (on windows it is a `u32`) + #[allow(clippy::cast_possible_truncation, clippy::cast_sign_loss)] + { + sysinfo.dwPageSize as usize + } +} + +#[derive(Debug, Clone)] +#[cfg_attr(feature = "std", derive(thiserror::Error))] +pub enum PageAllocError { + #[cfg_attr(feature = "std", error("could not map a memory page"))] + VirtualAlloc, + #[cfg_attr(feature = "std", error("could not lock memory page: {0}"))] + VirtualLock(windows::core::Error), +} + +impl Page { + /// Get a mutable pointer to the start of the memory page. + fn as_c_ptr_mut(&self) -> *mut c_void { + self.as_ptr_mut() as *mut c_void + } + + /// Allocate a new page of memory using `VirtualAlloc`. + /// + /// # Errors + /// The function returns an `PageAllocError` if the `VirtualAlloc` call + /// fails. + fn alloc_new() -> Result { + use windows::Win32::System::Memory::{ + VirtualAlloc, MEM_COMMIT, MEM_RESERVE, PAGE_PROTECTION_FLAGS, PAGE_READWRITE, + VIRTUAL_ALLOCATION_TYPE, + }; + + let page_size = page_size(); + let alloc_type: VIRTUAL_ALLOCATION_TYPE = MEM_RESERVE | MEM_COMMIT; + let protect: PAGE_PROTECTION_FLAGS = PAGE_READWRITE; + + let page_ptr: *mut c_void = unsafe { VirtualAlloc(None, page_size, alloc_type, protect) }; + + if page_ptr.is_null() { + Err(()) + } else { + let page_ptr = unsafe { + // SAFETY: we just checked that `page_ptr` is non-null + NonNull::new_unchecked(page_ptr as *mut u8) + }; + Ok(Self { + page_ptr, + page_size, + _phantom_pagemem: core::marker::PhantomData, + }) + } + } + + /// Lock the memory page to physical memory. + /// + /// When this function returns successfully then the memory page is + /// guarantied to be backed by physical memory, i.e. not (only) swapped. + /// This guaranties the memory to not be swapped at all, except on + /// hibernation or memory starvation. This is really the best we can + /// achieve. If memory contents are really secret than there is no other + /// solution than to use a swap space encrypted with an ephemeral secret + /// key, and hibernation should be disabled (both on the OS level). + fn lock(&mut self) -> Result<(), windows::core::Error> { + use windows::Win32::System::Memory::VirtualLock; + + unsafe { VirtualLock(self.as_c_ptr_mut(), self.page_size()) } + } + + /// Allocate a new page of memory using `VirtualAlloc` and `VirtualLock` + /// page. + /// + /// The page is locked to force it into physical memory. + /// + /// # Errors + /// The function returns an `PageAllocError` if the `VirtualAlloc` or + /// `VirtualLock` call fails. + pub fn alloc_new_lock() -> Result { + let mut page = Self::alloc_new().map_err(|_| PageAllocError::VirtualAlloc)?; + page.lock().map_err(|e| PageAllocError::VirtualLock(e))?; + Ok(page) + } +} + +impl Drop for Page { + fn drop(&mut self) { + use windows::Win32::System::Memory::{VirtualFree, MEM_RELEASE}; + + // SAFETY: we allocated/mapped this page in the constructor so it is safe to + // unmap now + unsafe { VirtualFree(self.as_c_ptr_mut(), 0, MEM_RELEASE) }.unwrap(); + // SAFETY: `NonNull` and `usize` both do not drop so we need not + // worry about subsequent drops + } +} diff --git a/src/internals/zeroize.rs b/src/internals/zeroize.rs index 65bb31d..c2da080 100644 --- a/src/internals/zeroize.rs +++ b/src/internals/zeroize.rs @@ -1,35 +1,103 @@ //! Utility functions for securely wiping memory. //! -//! Contains wrappers around intrinsics and ffi functions necessary for the -//! [`crate::zeroize`] module. +//! Utility functions for the [`crate::zeroize`] module, to securely wiping +//! memory, implemented using cross-platform pure Rust volatile writes. -#[cfg(target_arch = "x86_64")] -mod asm_x86_64; -#[cfg(target_arch = "x86_64")] -pub use asm_x86_64::*; +use crate::macros::precondition_memory_range; +use crate::util::is_aligned_ptr_mut; +use mirai_annotations::debug_checked_precondition; -mod system; -pub use system::*; +/// Zeroize the memory pointed to by `ptr` and of size `len` bytes, by +/// overwriting it byte for byte using volatile writes. +/// +/// This is guarantied to be not elided by the compiler. +/// +/// # Safety +/// The caller *must* ensure that `ptr` is valid for writes of `len` bytes, see +/// the [`std::ptr`] documentation. In particular this function is not atomic. +pub unsafe fn volatile_write_zeroize(mut ptr: *mut u8, len: usize) { + precondition_memory_range!(ptr, len); + for _i in 0..len { + // SAFETY: `ptr` originally pointed into an allocation of `len` bytes so now, + // after `_i` steps `len - _i > 0` bytes are left, so `ptr` is valid for + // a byte write + unsafe { + core::ptr::write_volatile(ptr, 0u8); + } + // SAFETY: after increment, `ptr` points into the same allocation if `_i == len` + // or one byte past it, so `add` is sound + ptr = unsafe { ptr.add(1) }; + } +} -mod volatile_write; -pub use volatile_write::*; +/// Zeroize the memory pointed to by `ptr` for `len` rounded down to a multiple +/// of 8 bytes. +/// +/// This function rounds down `len` to a multiple of 8 and then zeroizes the +/// memory pointed to by `ptr` for that length. This operation is guarantied to +/// be not elided by the compiler. If `len` is a multiple of 8 then this +/// zeroizes the entire specified block of memory. Returns a pointer to the byte +/// after the last zeroed byte, with the provenance of `ptr`. +/// +/// # Safety +/// The caller *must* ensure that `ptr` is valid for writes of `len` bytes, see +/// the [`std::ptr`] documentation. In particular this function is not atomic. +/// +/// Furthermore, `ptr` *must* be at least 8 byte aligned. +pub unsafe fn zeroize_align8_block8(mut ptr: *mut u8, len: usize) -> *mut u8 { + precondition_memory_range!(ptr, len); + debug_checked_precondition!(is_aligned_ptr_mut(ptr, 8)); + + let nblocks = (len - len % 8) / 8; + for _i in 0..nblocks { + // SAFETY: `ptr` originally pointed into an allocation of `len` bytes so now, + // after `_i` steps `len - 8*_i >= 8` bytes are left, so `ptr` is valid + // for an 8 byte write SAFETY: `ptr` was originally 8 byte aligned by + // caller contract and we only added a multiple of 8 so it is still 8 + // byte aligned + unsafe { + core::ptr::write_volatile(ptr.cast::(), 0u64); + } + // SAFETY: after increment, `ptr` points into the same allocation or (if `8*_i + // == len`) at most one byte past it, so `add` is sound; `ptr` stays 8 + // byte aligned + ptr = unsafe { ptr.add(8) }; + } + ptr +} -/// Volatile write byte to memory. +/// Zeroize the memory pointed to by `ptr` and of size `len % 8` bytes. /// -/// This uses the [`core::intrinsics::volatile_set_memory`] intrinsic and can -/// only be used on nightly, with the `nightly` feature enabled. +/// This can be used to zeroize the bytes left unzeroized by +/// `zeroize_align8_block8` if `len` is not a multiple of 8. This operation is +/// guarantied to be not elided by the compiler. /// /// # Safety /// The caller *must* ensure that `ptr` is valid for writes of `len` bytes, see /// the [`std::ptr`] documentation. In particular this function is not atomic. -// In addition `ptr` needs to be properly aligned, but because we are talking -// about bytes (therefore byte alignment), it *always* is. -#[cfg(feature = "nightly_core_intrinsics")] -pub unsafe fn volatile_memset(ptr: *mut u8, val: u8, len: usize) { - crate::macros::precondition_memory_range!(ptr, len); - // SAFETY: the caller must uphold the safety contract - unsafe { - core::intrinsics::volatile_set_memory(ptr, val, len); +/// +/// Furthermore, `ptr` *must* be at least 4 byte aligned. +pub unsafe fn zeroize_align4_tail8(mut ptr: *mut u8, len: usize) { + precondition_memory_range!(ptr, len % 8); + debug_checked_precondition!(is_aligned_ptr_mut(ptr, 4)); + + if len % 8 >= 4 { + // SAFETY: `ptr` is valid for `len % 8` bytes by caller contract + // SAFETY: `ptr` is still 4 byte aligned by caller contract + unsafe { + core::ptr::write_volatile(ptr.cast::(), 0u32); + } + ptr = unsafe { ptr.add(4) }; + } + // the final remainder (at most 3 bytes) is zeroed byte-for-byte + // SAFETY: `ptr` has been incremented by a multiple of 4 <= `len` so `ptr` + // points to an allocation of `len % 4` bytes, so `ptr` can be written to + // and incremented `len % 4` times + for _i in 0..(len % 4) { + unsafe { + core::ptr::write_volatile(ptr, 0u8); + } + ptr = unsafe { ptr.add(1) }; } } @@ -76,135 +144,18 @@ mod tests { assert_eq!(&array[..], &expected[..]); } - #[cfg(feature = "nightly_core_intrinsics")] - #[test] - fn test_volatile_memset() { - test_b128_zeroizer(|ptr: *mut u8, len: usize| unsafe { volatile_memset(ptr, 0, len) }) - } - - #[cfg(any( - target_os = "freebsd", - target_os = "dragonfly", - target_os = "openbsd", - target_os = "netbsd", - target_os = "macos", - target_os = "ios", - target_env = "gnu", - target_env = "musl" - ))] - #[test] - #[cfg_attr(miri, ignore)] // ffi - fn test_explicit_bzero() { - test_b128_zeroizer(|ptr: *mut u8, len: usize| unsafe { libc_explicit_bzero(ptr, len) }) - } - - #[cfg(all(target_arch = "x86_64", target_feature = "ermsb"))] - #[test] - #[cfg_attr(miri, ignore)] // asm - fn test_asm_ermsb_zeroize() { - test_b128_zeroizer(|ptr: *mut u8, len: usize| unsafe { asm_ermsb_zeroize(ptr, len) }) - } - - #[cfg(all(target_arch = "x86_64", target_feature = "ermsb"))] #[test] fn test_volatile_write_zeroize() { - test_b128_zeroizer(|ptr: *mut u8, len: usize| unsafe { volatile_write_zeroize(ptr, len) }) + test_b128_zeroizer(|ptr, len| unsafe { volatile_write_zeroize(ptr, len) }) } - #[cfg(feature = "nightly_core_intrinsics")] #[test] - fn test_volatile_memset_lowalign() { - test_b239_lowalign_zeroizer(|ptr: *mut u8, len: usize| unsafe { - volatile_memset(ptr, 0, len) - }) - } - - #[cfg(any( - target_os = "freebsd", - target_os = "dragonfly", - target_os = "openbsd", - target_os = "netbsd", - target_os = "macos", - target_os = "ios", - target_env = "gnu", - target_env = "musl" - ))] - #[test] - #[cfg_attr(miri, ignore)] // ffi - fn test_explicit_bzero_lowalign() { - test_b239_lowalign_zeroizer(|ptr: *mut u8, len: usize| unsafe { - libc_explicit_bzero(ptr, len) - }) - } - - #[cfg(all(target_arch = "x86_64", target_feature = "ermsb"))] - #[test] - #[cfg_attr(miri, ignore)] // asm - fn test_asm_ermsb_zeroize_lowalign() { - test_b239_lowalign_zeroizer(|ptr: *mut u8, len: usize| unsafe { - asm_ermsb_zeroize(ptr, len) - }) - } - - #[cfg(all(target_arch = "x86_64", target_feature = "ermsb"))] - #[test] - fn test_volatile_write_zeroize_lowalign() { - test_b239_lowalign_zeroizer(|ptr: *mut u8, len: usize| unsafe { - volatile_write_zeroize(ptr, len) - }) + fn test_lowalign_volatile_write_zeroize() { + test_b239_lowalign_zeroizer(|ptr, len| unsafe { volatile_write_zeroize(ptr, len) }) } #[test] fn test_zeroize_align8_block8() { test_b257_align64_block_zeroizer(|ptr, len| unsafe { zeroize_align8_block8(ptr, len) }) } - - #[cfg(all(target_arch = "x86_64", target_feature = "sse2"))] - #[test] - #[cfg_attr(miri, ignore)] // asm - fn test_x86_64_simd16_zeroize_align16_block16() { - test_b257_align64_block_zeroizer(|ptr, len| unsafe { - x86_64_simd16_zeroize_align16_block16(ptr, len) - }) - } - - #[cfg(all(target_arch = "x86_64", target_feature = "sse2"))] - #[test] - #[cfg_attr(miri, ignore)] // asm - fn test_x86_64_simd16_unroll2_zeroize_align16_block16() { - test_b257_align64_block_zeroizer(|ptr, len| unsafe { - x86_64_simd16_unroll2_zeroize_align16_block16(ptr, len) - }) - } - - #[cfg(all(target_arch = "x86_64", target_feature = "avx2"))] - #[test] - #[cfg_attr(miri, ignore)] // asm - fn test_x86_64_simd32_zeroize_align32_block32() { - test_b257_align64_block_zeroizer(|ptr, len| unsafe { - x86_64_simd32_zeroize_align32_block32(ptr, len) - }) - } - - #[cfg(all(target_arch = "x86_64", target_feature = "avx2"))] - #[test] - #[cfg_attr(miri, ignore)] // asm - fn test_x86_64_simd32_unroll2_zeroize_align32_block32() { - test_b257_align64_block_zeroizer(|ptr, len| unsafe { - x86_64_simd32_unroll2_zeroize_align32_block32(ptr, len) - }) - } - - #[cfg(all( - target_arch = "x86_64", - target_feature = "avx512f", - feature = "nightly_stdsimd" - ))] - #[test] - #[cfg_attr(miri, ignore)] // asm - fn test_x86_64_simd64_zeroize_align64_block64() { - test_b257_align64_block_zeroizer(|ptr, len| unsafe { - x86_64_simd64_zeroize_align64_block64(ptr, len) - }) - } } diff --git a/src/internals/zeroize/asm_x86_64.rs b/src/internals/zeroize/asm_x86_64.rs deleted file mode 100644 index 69c3113..0000000 --- a/src/internals/zeroize/asm_x86_64.rs +++ /dev/null @@ -1,326 +0,0 @@ -//! Utility functions for securely wiping memory, implemented in asm for x86_64 -//! cpus. - -use crate::macros::precondition_memory_range; -use crate::util::is_aligned_ptr_mut; -use mirai_annotations::debug_checked_precondition; - -/// Overwrite memory with zeros. This operation will not be elided by the -/// compiler. -/// -/// This uses inline assembly in Rust. The implementation makes use of the -/// efficient `rep stosb` memory set functionality on modern x86_64 cpus. This -/// is very slow for small amounts of data but very efficient for zeroizing -/// large amounts of data (depending an CPU architecture though), works on -/// stable, and does not require a libc. -/// -/// # Safety -/// The caller *must* ensure that `ptr` is valid for writes of `len` bytes, see -/// the [`std::ptr`] documentation. In particular this function is not atomic. -// In addition `ptr` needs to be properly aligned, but because we are talking -// about bytes (therefore byte alignment), it *always* is. -#[cfg(all(target_arch = "x86_64", target_feature = "ermsb"))] -pub unsafe fn asm_ermsb_zeroize(ptr: *mut u8, len: usize) { - precondition_memory_range!(ptr, len); - - unsafe { - core::arch::asm!( - "rep stosb byte ptr es:[rdi], al", - // `len` in the rcx register - inout("rcx") len => _, - // `ptr` int the rdi register - inout("rdi") ptr => _, - // zero byte to al (first byte of rax) register - in("al") 0u8, - options(nostack), - ); - } -} - -/// Zeroize the memory pointed to by `ptr` for `len` rounded down to a multiple -/// of 16 bytes. -/// -/// This function rounds down `len` to a multiple of 16 and then zeroizes the -/// memory pointed to by `ptr` for that length. This operation is guarantied to -/// be not elided by the compiler. If `len` is a multiple of 16 then this -/// zeroizes the entire specified block of memory. Returns a pointer to the byte -/// after the last zeroed byte, with the provenance of `ptr`. -/// -/// This uses sse2 instructions in inline asm to zeroize the memory with blocks -/// of 16 bytes at a time. -/// -/// # Safety -/// The caller *must* ensure that `ptr` is valid for writes of `len` bytes, see -/// the [`std::ptr`] documentation. In particular this function is not atomic. -/// -/// Furthermore, `ptr` *must* be at least 16 byte aligned. -#[cfg(all(target_arch = "x86_64", target_feature = "sse2"))] -pub unsafe fn x86_64_simd16_zeroize_align16_block16(mut ptr: *mut u8, len: usize) -> *mut u8 { - use core::arch::x86_64 as arch; - - precondition_memory_range!(ptr, len); - debug_checked_precondition!(is_aligned_ptr_mut(ptr, 16)); - - let nblocks = (len - len % 16) / 16; - - for _i in 0..nblocks { - // SAFETY: `ptr` is valid for a `len >= nblocks*16` byte write, so we can write - // `nblocks` times 16 bytes and increment `ptr` by 16 bytes; `ptr` stays 16 byte - // aligned - unsafe { - // SAFETY: `ptr` originally pointed into an allocation of `len` bytes so now, - // after `_i` steps `len - 16*_i >= 16` bytes are left, so `ptr` is valid - // for a 16 byte write; also `ptr` is 16 byte aligned - core::arch::asm!( - " - /* write 16 zero bytes to ptr */ - vmovdqa xmmword ptr [{0}], {1} - ", - in(reg) ptr, - in(xmm_reg) arch::_mm_setzero_si128(), - options(nostack), - ); - // NOTE: increment `ptr` outside of the asm to maintain provenance - // SAFETY: this stays within the memory where `ptr` is valid for writes and - // maintains 16 byte alignment - ptr = ptr.add(16); - } - } - ptr -} - -/// Zeroize the memory pointed to by `ptr` for `len` rounded down to a multiple -/// of 16 bytes. -/// -/// This function rounds down `len` to a multiple of 16 and then zeroizes the -/// memory pointed to by `ptr` for that length. This operation is guarantied to -/// be not elided by the compiler. If `len` is a multiple of 16 then this -/// zeroizes the entire specified block of memory. Returns a pointer to the byte -/// after the last zeroed byte, with the provenance of `ptr`. -/// -/// This uses sse2 instructions in inline asm to zeroize the memory with blocks -/// of 16 bytes at a time. -/// -/// # Safety -/// The caller *must* ensure that `ptr` is valid for writes of `len` bytes, see -/// the [`std::ptr`] documentation. In particular this function is not atomic. -/// -/// Furthermore, `ptr` *must* be at least 16 byte aligned. -#[cfg(all(target_arch = "x86_64", target_feature = "sse2"))] -pub unsafe fn x86_64_simd16_unroll2_zeroize_align16_block16( - mut ptr: *mut u8, - len: usize, -) -> *mut u8 { - use core::arch::x86_64 as arch; - - precondition_memory_range!(ptr, len); - debug_checked_precondition!(is_aligned_ptr_mut(ptr, 16)); - - let nblocks = (len - len % 16) / 16; - - // SAFETY: `ptr` is valid for a `len >= nblocks*16` byte write, so we can write - // `nblocks` times 16 bytes and increment `ptr` by 16 bytes; `ptr` stays 16 byte - // aligned - for _i in 0..nblocks / 2 { - unsafe { - core::arch::asm!( - " - /* write 16 zero bytes to ptr */ - vmovdqa xmmword ptr [{0}], {1} - vmovdqa xmmword ptr [{0} + 16], {1} - ", - in(reg) ptr, - in(xmm_reg) arch::_mm_setzero_si128(), - options(nostack), - ); - ptr = ptr.add(32); - } - } - if nblocks % 2 == 1 { - unsafe { - core::arch::asm!( - " - /* write 16 zero bytes to ptr */ - vmovdqa xmmword ptr [{0}], {1} - ", - in(reg) ptr, - in(xmm_reg) arch::_mm_setzero_si128(), - options(nostack), - ); - ptr = ptr.add(16); - } - } - ptr -} - -/// Zeroize the memory pointed to by `ptr` for `len` rounded down to a multiple -/// of 32 bytes. -/// -/// This function rounds down `len` to a multiple of 32 and then zeroizes the -/// memory pointed to by `ptr` for that length. This operation is guarantied to -/// be not elided by the compiler. If `len` is a multiple of 32 then this -/// zeroizes the entire specified block of memory. Returns a pointer to the byte -/// after the last zeroed byte, with the provenance of `ptr`. -/// -/// This uses avx2 instructions in inline asm to zeroize the memory with blocks -/// of 32 bytes at a time. -/// -/// # Safety -/// The caller *must* ensure that `ptr` is valid for writes of `len` bytes, see -/// the [`std::ptr`] documentation. In particular this function is not atomic. -/// -/// Furthermore, `ptr` *must* be at least 32 byte aligned. -#[cfg(all(target_arch = "x86_64", target_feature = "avx"))] -pub unsafe fn x86_64_simd32_zeroize_align32_block32(mut ptr: *mut u8, len: usize) -> *mut u8 { - use core::arch::x86_64 as arch; - - precondition_memory_range!(ptr, len); - debug_checked_precondition!(is_aligned_ptr_mut(ptr, 32)); - - let nblocks = (len - len % 32) / 32; - - for _i in 0..nblocks { - // SAFETY: `ptr` is valid for a `len >= nblocks*32` byte write, so we can write - // `nblocks` times 32 bytes and increment `ptr` by 32 bytes; `ptr` stays 32 byte - // aligned - unsafe { - // SAFETY: `ptr` originally pointed into an allocation of `len` bytes so now, - // after `_i` steps `len - 32*_i >= 32` bytes are left, so `ptr` is valid - // for a 32 byte write; also `ptr` is 32 byte aligned - core::arch::asm!( - " - /* write 32 zero bytes to ptr */ - vmovdqa ymmword ptr [{0}], {1} - ", - in(reg) ptr, - in(ymm_reg) arch::_mm256_setzero_si256(), - options(nostack), - ); - // NOTE: increment `ptr` outside of the asm to maintain provenance - // SAFETY: this stays within the memory where `ptr` is valid for writes and - // maintains 32 byte alignment - ptr = ptr.add(32); - } - } - ptr -} - -/// Zeroize the memory pointed to by `ptr` for `len` rounded down to a multiple -/// of 32 bytes. -/// -/// This function rounds down `len` to a multiple of 32 and then zeroizes the -/// memory pointed to by `ptr` for that length. This operation is guarantied to -/// be not elided by the compiler. If `len` is a multiple of 32 then this -/// zeroizes the entire specified block of memory. Returns a pointer to the byte -/// after the last zeroed byte, with the provenance of `ptr`. -/// -/// This uses avx2 instructions in inline asm to zeroize the memory with blocks -/// of 32 bytes at a time. -/// -/// # Safety -/// The caller *must* ensure that `ptr` is valid for writes of `len` bytes, see -/// the [`std::ptr`] documentation. In particular this function is not atomic. -/// -/// Furthermore, `ptr` *must* be at least 32 byte aligned. -#[cfg(all(target_arch = "x86_64", target_feature = "avx"))] -pub unsafe fn x86_64_simd32_unroll2_zeroize_align32_block32( - mut ptr: *mut u8, - len: usize, -) -> *mut u8 { - use core::arch::x86_64 as arch; - - precondition_memory_range!(ptr, len); - debug_checked_precondition!(is_aligned_ptr_mut(ptr, 32)); - - let nblocks = (len - len % 32) / 32; - - // SAFETY: `ptr` is valid for a `len >= nblocks*32` byte write, so we can write - // `nblocks` times 32 bytes and increment `ptr` by 32 bytes; `ptr` stays 32 byte - // aligned - for _i in 0..(nblocks / 2) { - unsafe { - core::arch::asm!( - " - /* write 64 zero bytes to ptr */ - vmovdqa ymmword ptr [{0}], {1} - vmovdqa ymmword ptr [{0} + 32], {1} - ", - in(reg) ptr, - in(ymm_reg) arch::_mm256_setzero_si256(), - options(nostack), - ); - ptr = ptr.add(64); - } - } - if nblocks % 2 == 1 { - unsafe { - core::arch::asm!( - " - /* write 32 zero bytes to ptr */ - vmovdqa ymmword ptr [{0}], {1} - ", - in(reg) ptr, - in(ymm_reg) arch::_mm256_setzero_si256(), - options(nostack), - ); - ptr = ptr.add(32); - } - } - ptr -} - -/// Zeroize the memory pointed to by `ptr` for `len` rounded down to a multiple -/// of 64 bytes. -/// -/// This function rounds down `len` to a multiple of 64 and then zeroizes the -/// memory pointed to by `ptr` for that length. This operation is guarantied to -/// be not elided by the compiler. If `len` is a multiple of 64 then this -/// zeroizes the entire specified block of memory. Returns a pointer to the byte -/// after the last zeroed byte, with the provenance of `ptr`. -/// -/// This uses avx512 instructions in inline asm to zeroize the memory with -/// blocks of 64 bytes at a time. -/// -/// # Safety -/// The caller *must* ensure that `ptr` is valid for writes of `len` bytes, see -/// the [`std::ptr`] documentation. In particular this function is not atomic. -/// -/// Furthermore, `ptr` *must* be at least 64 byte aligned. -#[cfg(all( - target_arch = "x86_64", - target_feature = "avx512f", - feature = "nightly_stdsimd" -))] -pub unsafe fn x86_64_simd64_zeroize_align64_block64(mut ptr: *mut u8, len: usize) -> *mut u8 { - use core::arch::x86_64 as arch; - - precondition_memory_range!(ptr, len); - debug_checked_precondition!(is_aligned_ptr_mut(ptr, 64)); - - let nblocks = (len - len % 64) / 64; - - for _i in 0..nblocks { - // SAFETY: `ptr` is valid for a `len >= nblocks*64` byte write, so we can write - // `nblocks` times 64 bytes and increment `ptr` by 64 bytes; `ptr` stays 64 byte - // aligned - unsafe { - // SAFETY: `ptr` originally pointed into an allocation of `len` bytes so now, - // after `_i` steps `len - 64*_i >= 64` bytes are left, so `ptr` is valid - // for a 64 byte write; also `ptr` is 64 byte aligned - core::arch::asm!( - " - /* write 64 zero bytes to ptr */ - vmovdqa64 zmmword ptr [{0}], {1} - ", - in(reg) ptr, - in(zmm_reg) arch::_mm512_setzero_si512(), - options(nostack), - ); - // NOTE: increment `ptr` outside of the asm to maintain provenance - // SAFETY: this stays within the memory where `ptr` is valid for writes and - // maintains 64 byte alignment - ptr = ptr.add(64); - } - } - ptr -} diff --git a/src/internals/zeroize/system.rs b/src/internals/zeroize/system.rs deleted file mode 100644 index 287a595..0000000 --- a/src/internals/zeroize/system.rs +++ /dev/null @@ -1,104 +0,0 @@ -//! Bindings to system functions for securely wiping memory. - -use crate::macros::precondition_memory_range; - -/// Overwrite memory with zeros. This operation will not be elided by the -/// compiler. -/// -/// This uses the `explicit_bzero` function present in many recent libcs. -/// -/// # Safety -/// It's C. But the safety requirement is quite obvious: The caller *must* -/// ensure that `ptr` is valid for writes of `len` bytes, see the [`std::ptr`] -/// documentation. In particular this function is not atomic. -// In addition `ptr` needs to be properly aligned, but because we are talking -// about bytes (therefore byte alignment), it *always* is. -#[cfg(any( - target_os = "freebsd", - target_os = "dragonfly", - target_os = "openbsd", - all(target_env = "gnu", unix), - target_env = "musl" -))] -pub unsafe fn libc_explicit_bzero(ptr: *mut u8, len: usize) { - precondition_memory_range!(ptr, len); - // SAFETY: the caller must uphold the safety contract - unsafe { - libc::explicit_bzero(ptr as *mut libc::c_void, len as libc::size_t); - } -} - -/// Overwrite memory with zeros. This operation will not be elided by the -/// compiler. -/// -/// This uses the `explicit_bzero` function present in many recent libcs. -/// -/// # Safety -/// It's C. But the safety requirement is quite obvious: The caller *must* -/// ensure that `ptr` is valid for writes of `len` bytes, see the [`std::ptr`] -/// documentation. In particular this function is not atomic. -// In addition `ptr` needs to be properly aligned, but because we are talking -// about bytes (therefore byte alignment), it *always* is. -#[cfg(all(target_env = "gnu", windows))] -pub unsafe fn libc_explicit_bzero(ptr: *mut u8, len: usize) { - precondition_memory_range!(ptr, len); - extern "C" { - fn explicit_bzero(ptr: *mut libc::c_void, len: libc::size_t); - } - - // SAFETY: the caller must uphold the safety contract - unsafe { - explicit_bzero(ptr as *mut libc::c_void, len as libc::size_t); - } -} - -/// Overwrite memory with zeros. This operation will not be elided by the -/// compiler. -/// -/// This uses the `explicit_bzero` function present in many recent libcs. -/// -/// # Safety -/// It's C. But the safety requirement is quite obvious: The caller *must* -/// ensure that `ptr` is valid for writes of `len` bytes, see the [`std::ptr`] -/// documentation. In particular this function is not atomic. -// In addition `ptr` needs to be properly aligned, but because we are talking -// about bytes (therefore byte alignment), it *always* is. -#[cfg(target_os = "netbsd")] -pub unsafe fn libc_explicit_bzero(ptr: *mut u8, len: usize) { - precondition_memory_range!(ptr, len); - // SAFETY: the caller must uphold the safety contract - unsafe { - libc::explicit_memset( - ptr as *mut libc::c_void, - 0 as libc::c_int, - len as libc::size_t, - ); - } -} - -/// Overwrite memory with zeros. This operation will not be elided by the -/// compiler. -/// -/// This uses the `explicit_bzero` function present in many recent libcs. -/// -/// # Safety -/// It's C. But the safety requirement is quite obvious: The caller *must* -/// ensure that `ptr` is valid for writes of `len` bytes, see the [`std::ptr`] -/// documentation. In particular this function is not atomic. -// In addition `ptr` needs to be properly aligned, but because we are talking -// about bytes (therefore byte alignment), it *always* is. -#[cfg(any(target_os = "macos", target_os = "ios"))] -pub unsafe fn libc_explicit_bzero(ptr: *mut u8, len: usize) { - precondition_memory_range!(ptr, len); - // SAFETY: the caller must uphold the safety contract - unsafe { - // the zero value is a `c_int` (`i32` by default), but then converted to - // `unsigned char` (`u8`) - libc::memset_s( - ptr as *mut libc::c_void, - len as libc::size_t, - 0 as libc::c_int, - len as libc::size_t, - ); - } -} diff --git a/src/internals/zeroize/volatile_write.rs b/src/internals/zeroize/volatile_write.rs deleted file mode 100644 index 674356e..0000000 --- a/src/internals/zeroize/volatile_write.rs +++ /dev/null @@ -1,100 +0,0 @@ -//! Utility functions for securely wiping memory, implemented using -//! cross-platform volatile writes. - -use crate::macros::precondition_memory_range; -use crate::util::is_aligned_ptr_mut; -use mirai_annotations::debug_checked_precondition; - -/// Zeroize the memory pointed to by `ptr` and of size `len` bytes, by -/// overwriting it byte for byte using volatile writes. -/// -/// This is guarantied to be not elided by the compiler. -/// -/// # Safety -/// The caller *must* ensure that `ptr` is valid for writes of `len` bytes, see -/// the [`std::ptr`] documentation. In particular this function is not atomic. -pub unsafe fn volatile_write_zeroize(mut ptr: *mut u8, len: usize) { - precondition_memory_range!(ptr, len); - for _i in 0..len { - // SAFETY: `ptr` originally pointed into an allocation of `len` bytes so now, - // after `_i` steps `len - _i > 0` bytes are left, so `ptr` is valid for - // a byte write - unsafe { - core::ptr::write_volatile(ptr, 0u8); - } - // SAFETY: after increment, `ptr` points into the same allocation if `_i == len` - // or one byte past it, so `add` is sound - ptr = unsafe { ptr.add(1) }; - } -} - -/// Zeroize the memory pointed to by `ptr` for `len` rounded down to a multiple -/// of 8 bytes. -/// -/// This function rounds down `len` to a multiple of 8 and then zeroizes the -/// memory pointed to by `ptr` for that length. This operation is guarantied to -/// be not elided by the compiler. If `len` is a multiple of 8 then this -/// zeroizes the entire specified block of memory. Returns a pointer to the byte -/// after the last zeroed byte, with the provenance of `ptr`. -/// -/// # Safety -/// The caller *must* ensure that `ptr` is valid for writes of `len` bytes, see -/// the [`std::ptr`] documentation. In particular this function is not atomic. -/// -/// Furthermore, `ptr` *must* be at least 8 byte aligned. -pub unsafe fn zeroize_align8_block8(mut ptr: *mut u8, len: usize) -> *mut u8 { - precondition_memory_range!(ptr, len); - debug_checked_precondition!(is_aligned_ptr_mut(ptr, 8)); - - let nblocks = (len - len % 8) / 8; - for _i in 0..nblocks { - // SAFETY: `ptr` originally pointed into an allocation of `len` bytes so now, - // after `_i` steps `len - 8*_i >= 8` bytes are left, so `ptr` is valid - // for an 8 byte write SAFETY: `ptr` was originally 8 byte aligned by - // caller contract and we only added a multiple of 8 so it is still 8 - // byte aligned - unsafe { - core::ptr::write_volatile(ptr.cast::(), 0u64); - } - // SAFETY: after increment, `ptr` points into the same allocation or (if `8*_i - // == len`) at most one byte past it, so `add` is sound; `ptr` stays 8 - // byte aligned - ptr = unsafe { ptr.add(8) }; - } - ptr -} - -/// Zeroize the memory pointed to by `ptr` and of size `len % 8` bytes. -/// -/// This can be used to zeroize the bytes left unzeroized by -/// `zeroize_align8_block8` if `len` is not a multiple of 8. This operation is -/// guarantied to be not elided by the compiler. -/// -/// # Safety -/// The caller *must* ensure that `ptr` is valid for writes of `len` bytes, see -/// the [`std::ptr`] documentation. In particular this function is not atomic. -/// -/// Furthermore, `ptr` *must* be at least 4 byte aligned. -pub unsafe fn zeroize_align4_tail8(mut ptr: *mut u8, len: usize) { - precondition_memory_range!(ptr, len % 8); - debug_checked_precondition!(is_aligned_ptr_mut(ptr, 4)); - - if len % 8 >= 4 { - // SAFETY: `ptr` is valid for `len % 8` bytes by caller contract - // SAFETY: `ptr` is still 4 byte aligned by caller contract - unsafe { - core::ptr::write_volatile(ptr.cast::(), 0u32); - } - ptr = unsafe { ptr.add(4) }; - } - // the final remainder (at most 3 bytes) is zeroed byte-for-byte - // SAFETY: `ptr` has been incremented by a multiple of 4 <= `len` so `ptr` - // points to an allocation of `len % 4` bytes, so `ptr` can be written to - // and incremented `len % 4` times - for _i in 0..(len % 4) { - unsafe { - core::ptr::write_volatile(ptr, 0u8); - } - ptr = unsafe { ptr.add(1) }; - } -} diff --git a/src/lib.rs b/src/lib.rs index ebc5514..7a975bb 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,6 +1,10 @@ +// https://github.com/rust-lang/rust/issues/32838 #![cfg_attr(feature = "nightly_allocator_api", feature(allocator_api))] +// for `volatile_memset` #![cfg_attr(feature = "nightly_core_intrinsics", feature(core_intrinsics))] -#![cfg_attr(feature = "nightly_stdsimd", feature(stdsimd))] +// https://github.com/rust-lang/rust/issues/111137 +#![cfg_attr(feature = "nightly_stdsimd", feature(stdarch_x86_avx512))] +// https://github.com/rust-lang/rust/issues/95228 #![cfg_attr(feature = "nightly_strict_provenance", feature(strict_provenance))] #![cfg_attr( feature = "nightly_strict_provenance", @@ -25,9 +29,8 @@ //! ``` //! #![feature(allocator_api)] //! // requires `nightly_allocator_api` crate feature to be enabled and a nightly compiler -//! use secmem_alloc::allocator_api::Allocator; +//! use secmem_alloc::allocator_api::{Allocator, Global, Vec}; //! use secmem_alloc::zeroizing_alloc::ZeroizeAlloc; -//! use std::alloc::Global; //! //! fn read_password(buf: &mut Vec) { //! // query password from the user and put it in `buf` @@ -57,8 +60,7 @@ //! // if you enable the `nightly_allocator_api` crate feature, the following line is necessary //! #![feature(allocator_api)] //! -//! use secmem_alloc::allocator_api::Allocator; -//! use secmem_alloc::boxed::Box; +//! use secmem_alloc::allocator_api::{Allocator, Box}; //! use secmem_alloc::sec_alloc::SecStackSinglePageAlloc; //! //! fn get_secret_key(buf: &mut Box<[u8; 256], A>) { @@ -110,19 +112,19 @@ extern crate alloc; +/// Re-exports the most important items of the [`allocator-api2` crate]. +/// +/// [`allocator-api2` crate]: https://crates.io/crates/allocator-api2 +pub mod allocator_api { + pub use allocator_api2::alloc::{Allocator, Global}; + pub use allocator_api2::boxed::Box; + pub use allocator_api2::vec::Vec; +} + mod internals; mod macros; mod util; -#[cfg(not(feature = "nightly_allocator_api"))] -pub mod allocator_api; -#[cfg(feature = "nightly_allocator_api")] -/// Nightly allocator api, imported from the standard library. -pub mod allocator_api { - pub use core::alloc::{AllocError, Allocator}; -} - -pub mod boxed; pub mod sec_alloc; pub mod zeroize; pub mod zeroizing_alloc; diff --git a/src/sec_alloc.rs b/src/sec_alloc.rs index 0ac6c99..2a63495 100644 --- a/src/sec_alloc.rs +++ b/src/sec_alloc.rs @@ -20,13 +20,13 @@ //! in the global allocator leaks). This *could* make some exploits harder, //! but not impossible. -use crate::allocator_api::{AllocError, Allocator}; use crate::internals::mem; use crate::util::{ align_up_ptr_mut, align_up_usize, is_aligned_ptr, large_offset_from, nonnull_as_mut_ptr, unlikely, }; use crate::zeroize::{DefaultMemZeroizer, MemZeroizer}; +use allocator_api2::alloc::{AllocError, Allocator}; use core::alloc::Layout; use core::cell::Cell; use core::ptr::{self, NonNull}; @@ -125,7 +125,7 @@ impl SecStackSinglePageAlloc { impl Drop for SecStackSinglePageAlloc { // panic in drop leads to abort, so we better just abort // however, abort is only stably available with `std` (not `core`) - #[cfg(featue = "std")] + #[cfg(feature = "std")] fn drop(&mut self) { // check for leaks if self.bytes.get() != 0 { @@ -143,7 +143,7 @@ impl Drop for SecStackSinglePageAlloc { } } - #[cfg(not(featue = "std"))] + #[cfg(not(feature = "std"))] fn drop(&mut self) { // check for leaks debug_assert!(self.bytes.get() == 0); @@ -706,6 +706,7 @@ unsafe impl Allocator for SecStackSinglePageAlloc { #[cfg(test)] mod tests { use super::*; + use crate::allocator_api::{Box, Vec}; use crate::zeroize::TestZeroizer; use std::mem::drop; @@ -726,8 +727,6 @@ mod tests { #[test] fn box_allocation_8b() { - use crate::boxed::Box; - let allocator = SecStackSinglePageAlloc::::new().expect("allocator creation failed"); allocator.consistency_check(); @@ -741,8 +740,6 @@ mod tests { #[test] fn box_allocation_9b() { - use crate::boxed::Box; - let allocator = SecStackSinglePageAlloc::::new().expect("allocator creation failed"); allocator.consistency_check(); @@ -756,8 +753,6 @@ mod tests { #[test] fn box_allocation_zst() { - use crate::boxed::Box; - let allocator = SecStackSinglePageAlloc::::new().expect("allocator creation failed"); allocator.consistency_check(); @@ -771,8 +766,6 @@ mod tests { #[test] fn multiple_box_allocations() { - use crate::boxed::Box; - let allocator = SecStackSinglePageAlloc::::new().expect("allocator creation failed"); allocator.consistency_check(); @@ -796,8 +789,6 @@ mod tests { #[test] fn multiple_box_allocations_high_align() { - use crate::boxed::Box; - let allocator = SecStackSinglePageAlloc::::new().expect("allocator creation failed"); allocator.consistency_check(); @@ -821,8 +812,6 @@ mod tests { #[test] fn multiple_box_allocations_mixed_align() { - use crate::boxed::Box; - let allocator = SecStackSinglePageAlloc::::new().expect("allocator creation failed"); allocator.consistency_check(); @@ -846,8 +835,6 @@ mod tests { #[test] fn many_box_allocations_mixed_align_nonstacked_drop() { - use crate::boxed::Box; - let allocator = SecStackSinglePageAlloc::::new().expect("allocator creation failed"); allocator.consistency_check(); @@ -911,7 +898,6 @@ mod tests { #[test] fn vec_allocation_nonfinal_grow() { - use crate::boxed::Box; type A = SecStackSinglePageAlloc; let allocator: A = SecStackSinglePageAlloc::new().expect("allocator creation failed"); @@ -953,7 +939,6 @@ mod tests { #[test] fn vec_allocation_nonfinal_shrink() { - use crate::boxed::Box; type A = SecStackSinglePageAlloc; let allocator: A = SecStackSinglePageAlloc::new().expect("allocator creation failed"); diff --git a/src/zeroize.rs b/src/zeroize.rs index 3c2f8b1..7be5e02 100644 --- a/src/zeroize.rs +++ b/src/zeroize.rs @@ -72,43 +72,9 @@ cfg_if::cfg_if! { /// the selected features and the version of this library. pub type DefaultMemZeroizer = VolatileMemsetZeroizer; pub(crate) use VolatileMemsetZeroizer as DefaultMemZeroizerConstructor; - } else if #[cfg(any( - target_os = "freebsd", - target_os = "dragonfly", - target_os = "openbsd", - target_os = "netbsd", - target_os = "macos", - target_os = "ios", - target_env = "gnu", - target_env = "musl" - ))] { - /// Best (i.e. fastest) [`MemZeroizer`] available for the target. - /// - /// Which [`MemZeroizer`] this is is an implementation detail, can depend on the target and - /// the selected features and the version of this library. - pub type DefaultMemZeroizer = LibcZeroizer; - pub(crate) use LibcZeroizer as DefaultMemZeroizerConstructor; - } else if #[cfg(all(target_arch = "x86_64", target_feature = "avx"))] { - /// Best (i.e. fastest) [`MemZeroizer`] available for the target. - /// - /// Which [`MemZeroizer`] this is is an implementation detail, can depend on the target and - /// the selected features and the version of this library. - pub type DefaultMemZeroizer = X86_64AvxZeroizer; - pub(crate) use X86_64AvxZeroizer as DefaultMemZeroizerConstructor; - } else if #[cfg(all(target_arch = "x86_64", target_feature = "sse2"))] { - /// Best (i.e. fastest) [`MemZeroizer`] available for the target. - /// - /// Which [`MemZeroizer`] this is is an implementation detail, can depend on the target and - /// the selected features and the version of this library. - pub type DefaultMemZeroizer = X86_64Sse2Zeroizer; - pub(crate) use X86_64Sse2Zeroizer as DefaultMemZeroizerConstructor; } else { - /// Best (i.e. fastest) [`MemZeroizer`] available for the target. - /// - /// Which [`MemZeroizer`] this is is an implementation detail, can depend on the target and - /// the selected features and the version of this library. - pub type DefaultMemZeroizer = VolatileWrite8Zeroizer; - pub(crate) use VolatileWrite8Zeroizer as DefaultMemZeroizerConstructor; + pub type DefaultMemZeroizer = MemsetAsmBarierZeroizer; + pub(crate) use MemsetAsmBarierZeroizer as DefaultMemZeroizerConstructor; } } @@ -118,12 +84,6 @@ pub(crate) use VolatileWrite8Zeroizer as TestZeroizer; /// This zeroizer uses the volatile memset intrinsic which does not /// yet have a stable counterpart. It should be very fast, but requires /// nightly. -/// -/// In addition to the volatile write we place a compiler fence right next to -/// the volatile write. This should not be necessary for secure zeroization -/// since the volatile semantics guarenties our writes are not elided, and they -/// can not be delayed since we are deallocating the memory after zeroization. -/// The use of this fence is therefore only a precaution. #[cfg(feature = "nightly_core_intrinsics")] #[derive(Debug, Copy, Clone, Default)] pub struct VolatileMemsetZeroizer; @@ -133,124 +93,41 @@ impl MemZeroizer for VolatileMemsetZeroizer { unsafe fn zeroize_mem_blocks(&self, ptr: *mut u8, len: usize) { precondition_memory_range!(ptr, len); debug_precondition_logaligned!(A, ptr); - // SAFETY: the caller must uphold the safety contract of - // `internals::volatile_memset` - unsafe { - internals::volatile_memset(ptr, 0, len); - } - fence(); - } -} - -/// This zeroizer uses volatile zeroization functions provided by libc. -/// It should be fast but is only available on certain platforms. -/// -/// In addition to the volatile write we place a compiler fence right next to -/// the volatile write. This should not be necessary for secure zeroization -/// since the volatile semantics guarenties our writes are not elided, and they -/// can not be delayed since we are deallocating the memory after zeroization. -/// The use of this fence is therefore only a precaution. -#[cfg(any( - target_os = "freebsd", - target_os = "dragonfly", - target_os = "openbsd", - target_os = "netbsd", - target_os = "macos", - target_os = "ios", - target_env = "gnu", - target_env = "musl" -))] -#[derive(Debug, Copy, Clone, Default)] -pub struct LibcZeroizer; - -#[cfg(any( - target_os = "freebsd", - target_os = "dragonfly", - target_os = "openbsd", - target_os = "netbsd", - target_os = "macos", - target_os = "ios", - target_env = "gnu", - target_env = "musl" -))] -impl MemZeroizer for LibcZeroizer { - unsafe fn zeroize_mem_blocks(&self, ptr: *mut u8, len: usize) { - precondition_memory_range!(ptr, len); - debug_precondition_logaligned!(A, ptr); - debug_precondition_logmultiple!(B, len); - // SAFETY: the caller must uphold the safety contract of - // `internals::libc_explicit_bzero` - unsafe { - internals::libc_explicit_bzero(ptr, len); - } - fence(); - } -} - -/// This zeroizer uses volatile assembly (`rep stosb`) for modern x86_64, -/// performing very well for large amounts of memory. To make this available on -/// stable, it uses a C compiler at build time. -/// -/// In addition to the volatile write we place a compiler fence right next to -/// the volatile write. This should not be necessary for secure zeroization -/// since the volatile semantics guarenties our writes are not elided, and they -/// can not be delayed since we are deallocating the memory after zeroization. -/// The use of this fence is therefore only a precaution. -#[cfg(all(target_arch = "x86_64", target_feature = "ermsb"))] -#[derive(Debug, Copy, Clone, Default)] -pub struct AsmRepStosZeroizer; - -#[cfg(all(target_arch = "x86_64", target_feature = "ermsb"))] -impl MemZeroizer for AsmRepStosZeroizer { - unsafe fn zeroize_mem_blocks(&self, ptr: *mut u8, len: usize) { - precondition_memory_range!(ptr, len); - debug_precondition_logaligned!(A, ptr); - debug_precondition_logmultiple!(B, len); - // SAFETY: the caller must uphold the safety contract of - // `internals::asm_ermsb_zeroize` + // SAFETY: the caller must uphold the safety contract unsafe { - internals::asm_ermsb_zeroize(ptr, len); + core::intrinsics::volatile_set_memory(ptr, 0, len); } - fence(); } } -/// This zeroizer uses a volatile write per byte. This zeroization technique is -/// similar to the `zeroize` crate, available for all target platforms on -/// stable, but extremely slow. -/// -/// In addition to the volatile write we place a compiler fence right next to -/// the volatile write. This should not be necessary for secure zeroization -/// since the volatile semantics guarenties our writes are not elided, and they -/// can not be delayed since we are deallocating the memory after zeroization. -/// The use of this fence is therefore only a precaution. +/// This zeroizer uses a non-volatile memset, followed by an empty asm block +/// acting as an optimisation barier. It should be very fast, and according to +/// my current understanding of the op.sem. the compiler is not allowed to +/// remove the writes. #[derive(Debug, Copy, Clone, Default)] -pub struct VolatileWriteZeroizer; +pub struct MemsetAsmBarierZeroizer; -impl MemZeroizer for VolatileWriteZeroizer { +impl MemZeroizer for MemsetAsmBarierZeroizer { unsafe fn zeroize_mem_blocks(&self, ptr: *mut u8, len: usize) { precondition_memory_range!(ptr, len); debug_precondition_logaligned!(A, ptr); - debug_precondition_logmultiple!(B, len); - // SAFETY: the caller must uphold the safety contract of - // `volatile_write_zeroize_mem` + // SAFETY: the caller must uphold the safety contract of `write_bytes` + unsafe { ptr.write_bytes(0, len) }; + // Optimisation barier, so the writes can not be optimised out unsafe { - internals::volatile_write_zeroize(ptr, len); - } - fence(); + core::arch::asm!( + "/* {0} */", + in(reg) ptr, + options(nostack, readonly, preserves_flags), + ) + }; } } /// This zeroizer uses a volatile write per 8 bytes if the pointer is 8 byte /// aligned, and otherwise uses `VolatileWriteZeroizer`. This zeroization -/// technique is available for all target platforms on stable, but not very -/// fast. -/// -/// In addition to the volatile write we place a compiler fence right next to -/// the volatile write. This should not be necessary for secure zeroization -/// since the volatile semantics guarenties our writes are not elided, and they -/// can not be delayed since we are deallocating the memory after zeroization. -/// The use of this fence is therefore only a precaution. +/// technique is pure Rust and available for all target platforms on stable, but +/// not very fast. /// /// This zeroization method can benefit (in terms of performance) from using the /// [`MemZeroizer::zeroize_mem_blocks`] function instead of @@ -279,132 +156,8 @@ impl MemZeroizer for VolatileWrite8Zeroizer { internals::volatile_write_zeroize(ptr, len); } } - fence(); - } -} - -/// This zeroizer uses inline asm with avx2 instructions if the pointer is 32 -/// byte aligned, and otherwise uses `VolatileWrite8Zeroizer`. This zeroization -/// technique is available for x86_64 platforms with avx2 cpu support on stable, -/// and reasonably fast for 32 byte aligned pointers. -/// -/// In addition to the volatile write we place a compiler fence right next to -/// the volatile write. This should not be necessary for secure zeroization -/// since the volatile semantics guarenties our writes are not elided, and they -/// can not be delayed since we are deallocating the memory after zeroization. -/// The use of this fence is therefore only a precaution. -/// -/// This zeroization method can benefit (in terms of performance) from using the -/// [`MemZeroizer::zeroize_mem_blocks`] function instead of -/// [`MemZeroizer::zeroize_mem`] function if a minimum alignment is known -/// at compile time. -#[cfg(all(target_arch = "x86_64", target_feature = "avx"))] -#[derive(Debug, Copy, Clone, Default)] -pub struct X86_64AvxZeroizer; - -#[cfg(all(target_arch = "x86_64", target_feature = "avx"))] -impl MemZeroizer for X86_64AvxZeroizer { - unsafe fn zeroize_mem_blocks(&self, mut ptr: *mut u8, len: usize) { - precondition_memory_range!(ptr, len); - debug_precondition_logaligned!(A, ptr); - debug_precondition_logmultiple!(B, len); - // if we have 32 = 2^5 byte alignment then write 32 bytes at a time, - // with 8 = 2^3 byte align do 8 bytes at a time, otherwise 1 byte at a time - if (A >= 5) | is_aligned_ptr_mut(ptr, 32) { - // SAFETY: `ptr` is 32 byte aligned - ptr = unsafe { internals::x86_64_simd32_unroll2_zeroize_align32_block32(ptr, len) }; - // zeroize tail - if B < 5 { - ptr = unsafe { internals::zeroize_align8_block8(ptr, len % 32) }; - } - if B < 3 { - unsafe { internals::zeroize_align4_tail8(ptr, len % 8) }; - } - } else if (A >= 3) | is_aligned_ptr_mut(ptr, 8) { - // SAFETY: `ptr` is 8 byte aligned - ptr = unsafe { internals::zeroize_align8_block8(ptr, len) }; - if B < 3 { - unsafe { internals::zeroize_align4_tail8(ptr, len % 8) }; - } - } else { - // SAFETY: no alignment requirement - unsafe { - internals::volatile_write_zeroize(ptr, len); - } - } - fence(); - } -} - -/// This zeroizer uses inline asm with sse2 instructions if the pointer is 16 -/// byte aligned, and otherwise uses `VolatileWrite8Zeroizer`. This zeroization -/// technique is available for x86_64 platforms with sse2 cpu support on stable, -/// and reasonably fast for 16 byte aligned pointers. -/// -/// In addition to the volatile write we place a compiler fence right next to -/// the volatile write. This should not be necessary for secure zeroization -/// since the volatile semantics guarenties our writes are not elided, and they -/// can not be delayed since we are deallocating the memory after zeroization. -/// The use of this fence is therefore only a precaution. -/// -/// This zeroization method can benefit (in terms of performance) from using the -/// [`MemZeroizer::zeroize_mem_blocks`] function instead of -/// [`MemZeroizer::zeroize_mem`] function if a minimum alignment is known -/// at compile time. -#[cfg(all(target_arch = "x86_64", target_feature = "sse2"))] -#[derive(Debug, Copy, Clone, Default)] -pub struct X86_64Sse2Zeroizer; - -#[cfg(all(target_arch = "x86_64", target_feature = "sse2"))] -impl MemZeroizer for X86_64Sse2Zeroizer { - unsafe fn zeroize_mem_blocks(&self, mut ptr: *mut u8, len: usize) { - precondition_memory_range!(ptr, len); - debug_precondition_logaligned!(A, ptr); - debug_precondition_logmultiple!(B, len); - // if we have 16 = 2^4 byte alignment then write 16 bytes at a time, - // with 8 = 2^3 byte align do 8 bytes at a time, otherwise 1 byte at a time - if (A >= 4) | is_aligned_ptr_mut(ptr, 16) { - // SAFETY: `ptr` is 16 byte aligned - - ptr = unsafe { internals::x86_64_simd16_unroll2_zeroize_align16_block16(ptr, len) }; - // zeroize tail - if B < 4 { - ptr = unsafe { internals::zeroize_align8_block8(ptr, len % 16) }; - } - if B < 3 { - unsafe { internals::zeroize_align4_tail8(ptr, len % 8) }; - } - } else if (A >= 3) | is_aligned_ptr_mut(ptr, 8) { - // SAFETY: `ptr` is 8 byte aligned - ptr = unsafe { internals::zeroize_align8_block8(ptr, len) }; - if B < 3 { - unsafe { internals::zeroize_align4_tail8(ptr, len % 8) }; - } - } else { - // SAFETY: no alignment requirement - unsafe { - internals::volatile_write_zeroize(ptr, len); - } - } - fence(); } } -/// Compiler fence. -/// -/// Forces sequentially consistent access across this fence at compile time. At -/// runtime the CPU can still reorder memory accesses. This should not be -/// necessary for secure zeroization since the volatile semantics guaranties our -/// writes are not elided, and they can not be delayed since we are deallocating -/// the memory after zeroization. The use of this fence is therefore only a -/// precaution. For the same reasons it probably does not add security, it also -/// probably does not hurt performance significantly. -#[inline] -fn fence() { - use core::sync::atomic::{compiler_fence, Ordering}; - - compiler_fence(Ordering::SeqCst); -} - #[cfg(test)] mod tests; diff --git a/src/zeroize/tests.rs b/src/zeroize/tests.rs index 8571cc0..ed0ba44 100644 --- a/src/zeroize/tests.rs +++ b/src/zeroize/tests.rs @@ -33,98 +33,18 @@ fn test_b127_volatile_memset_zeroizer() { test_b127_zeroizer(VolatileMemsetZeroizer); } -#[cfg(any( - target_os = "freebsd", - target_os = "dragonfly", - target_os = "openbsd", - target_os = "netbsd", - target_os = "macos", - target_os = "ios", - target_env = "gnu", - target_env = "musl" -))] -#[test] -#[cfg_attr(miri, ignore)] // ffi -fn test_b127_libc_zeroizer() { - test_b127_zeroizer(LibcZeroizer); -} - -#[cfg(all(target_arch = "x86_64", target_feature = "ermsb"))] -#[test] -#[cfg_attr(miri, ignore)] // ffi, asm -fn test_b127_asm_rep_stos_zeroizer() { - test_b127_zeroizer(AsmRepStosZeroizer); -} - -#[test] -fn test_b127_volatile_write_zeroizer() { - test_b127_zeroizer(VolatileWriteZeroizer); -} - #[test] fn test_b127_volatile_write8_zeroizer() { test_b127_zeroizer(VolatileWrite8Zeroizer); } -#[cfg(all(target_arch = "x86_64", target_feature = "avx"))] -#[test] -fn test_b127_x86_64_avx_zeroizer() { - test_b127_zeroizer(X86_64AvxZeroizer); -} - -#[cfg(all(target_arch = "x86_64", target_feature = "sse2"))] -#[test] -fn test_b127_x86_64_sse2_zeroizer() { - test_b127_zeroizer(X86_64Sse2Zeroizer); -} - #[cfg(feature = "nightly_core_intrinsics")] #[test] fn test_b239_lowalign_volatile_memset_zeroizer() { test_b239_lowalign_zeroizer(VolatileMemsetZeroizer); } -#[cfg(any( - target_os = "freebsd", - target_os = "dragonfly", - target_os = "openbsd", - target_os = "netbsd", - target_os = "macos", - target_os = "ios", - target_env = "gnu", - target_env = "musl" -))] -#[test] -#[cfg_attr(miri, ignore)] // ffi -fn test_b239_lowalign_libc_zeroizer() { - test_b239_lowalign_zeroizer(LibcZeroizer); -} - -#[cfg(all(target_arch = "x86_64", target_feature = "ermsb"))] -#[test] -#[cfg_attr(miri, ignore)] // ffi, asm -fn test_b239_lowalign_asm_rep_stos_zeroizer() { - test_b239_lowalign_zeroizer(AsmRepStosZeroizer); -} - -#[test] -fn test_b239_lowalign_volatile_write_zeroizer() { - test_b239_lowalign_zeroizer(VolatileWriteZeroizer); -} - #[test] fn test_b239_lowalign_volatile_write8_zeroizer() { test_b239_lowalign_zeroizer(VolatileWrite8Zeroizer); } - -#[cfg(all(target_arch = "x86_64", target_feature = "avx"))] -#[test] -fn test_b239_lowalign_x86_64_avx_zeroizer() { - test_b239_lowalign_zeroizer(X86_64AvxZeroizer); -} - -#[cfg(all(target_arch = "x86_64", target_feature = "sse2"))] -#[test] -fn test_b239_lowalign_x86_64_sse2_zeroizer() { - test_b239_lowalign_zeroizer(X86_64Sse2Zeroizer); -} diff --git a/src/zeroizing_alloc.rs b/src/zeroizing_alloc.rs index f48e028..494954a 100644 --- a/src/zeroizing_alloc.rs +++ b/src/zeroizing_alloc.rs @@ -9,13 +9,13 @@ //! in memory but not dropped. This can happen for example when resizing //! [`Vec`]s. -use crate::allocator_api::{AllocError, Allocator}; use crate::macros::{ debug_handleallocerror_precondition, debug_handleallocerror_precondition_valid_layout, precondition_memory_range, }; use crate::zeroize::{DefaultMemZeroizer, DefaultMemZeroizerConstructor, MemZeroizer}; use alloc::alloc::handle_alloc_error; +use allocator_api2::alloc::{AllocError, Allocator}; use core::alloc::{GlobalAlloc, Layout}; use core::ptr::NonNull; #[cfg(not(feature = "nightly_strict_provenance"))] @@ -182,13 +182,12 @@ where #[cfg(test)] mod tests { use super::*; + use crate::allocator_api::{Box, Vec}; use crate::zeroize::TestZeroizer; use std::alloc::System; #[test] fn box_allocation_8b() { - use crate::boxed::Box; - let allocator = ZeroizeAlloc::with_zeroizer(System, TestZeroizer); let _heap_mem = Box::new_in([1u8; 8], &allocator); // drop `_heap_mem` @@ -197,8 +196,6 @@ mod tests { #[test] fn box_allocation_9b() { - use crate::boxed::Box; - let allocator = ZeroizeAlloc::with_zeroizer(System, TestZeroizer); let _heap_mem = Box::new_in([1u8; 9], &allocator); // drop `_heap_mem` @@ -207,8 +204,6 @@ mod tests { #[test] fn box_allocation_zst() { - use crate::boxed::Box; - let allocator = ZeroizeAlloc::with_zeroizer(System, TestZeroizer); let _heap_mem = Box::new_in([(); 8], &allocator); // drop `_heap_mem`