From 6519b13ce15a99ef3e9273139a55c6fe7268a909 Mon Sep 17 00:00:00 2001 From: niluxv Date: Thu, 2 May 2024 10:30:57 +0200 Subject: [PATCH] Introduce zeroizer based on (non-volatile) `write_bytes` and `asm!` optimisation barier and remove many slower ones *Breaking:* Removes many zeroizers previously available. This new zeroizer cannot be optimised out according to my current understanding of the op.sem. of Rust. It can be implemented on stable, is target-independent, and faster than all our handwritten ones. Therefore we can remove all the handwritten target-specific ones. This is good, since their implementation turned out to be prone to mistakes. --- Cargo.toml | 1 - benches/bench_zeroizers.rs | 25 +- src/internals/zeroize.rs | 233 +++++++---------- src/internals/zeroize/asm_x86_64.rs | 326 ------------------------ src/internals/zeroize/system.rs | 104 -------- src/internals/zeroize/volatile_write.rs | 100 -------- src/zeroize.rs | 289 ++------------------- src/zeroize/tests.rs | 80 ------ 8 files changed, 116 insertions(+), 1042 deletions(-) delete mode 100644 src/internals/zeroize/asm_x86_64.rs delete mode 100644 src/internals/zeroize/system.rs delete mode 100644 src/internals/zeroize/volatile_write.rs diff --git a/Cargo.toml b/Cargo.toml index 251ec1e..0824851 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -38,7 +38,6 @@ dev = ["std"] [dependencies] allocator-api2 = { version = "0.2", default-features = false } cfg-if = "1.0" -libc = "0.2" mirai-annotations = "1.12" sptr = "0.3" thiserror = { version = "1.0", optional = true } diff --git a/benches/bench_zeroizers.rs b/benches/bench_zeroizers.rs index 2c62483..47ac729 100644 --- a/benches/bench_zeroizers.rs +++ b/benches/bench_zeroizers.rs @@ -1,13 +1,6 @@ use criterion::{criterion_group, criterion_main, Criterion}; -#[cfg(all(target_arch = "x86_64", target_feature = "ermsb"))] -use secmem_alloc::zeroize::AsmRepStosZeroizer; -#[cfg(all(target_arch = "x86_64", target_feature = "avx"))] -use secmem_alloc::zeroize::X86_64AvxZeroizer; -#[cfg(all(target_arch = "x86_64", target_feature = "sse2"))] -use secmem_alloc::zeroize::X86_64Sse2Zeroizer; use secmem_alloc::zeroize::{ - LibcZeroizer, MemZeroizer, VolatileMemsetZeroizer, VolatileWrite8Zeroizer, - VolatileWriteZeroizer, + MemZeroizer, MemsetAsmBarierZeroizer, VolatileMemsetZeroizer, VolatileWrite8Zeroizer, }; fn zeroize_b127(z: Z, array: &mut [u8; 127]) { @@ -50,21 +43,9 @@ macro_rules! bench_zeroizers { $cgroup.bench_function("VolatileMemsetZeroizer", |b| { b.iter(|| $bench_function(VolatileMemsetZeroizer, &mut $array.0)) }); - $cgroup.bench_function("LibcZeroizer", |b| { - b.iter(|| $bench_function(LibcZeroizer, &mut $array.0)) + $cgroup.bench_function("MemsetAsmBarierZeroizer", |b| { + b.iter(|| $bench_function(MemsetAsmBarierZeroizer, &mut $array.0)) }); - $cgroup.bench_function("VolatileWriteZeroizer", |b| { - b.iter(|| $bench_function(VolatileWriteZeroizer, &mut $array.0)) - }); - $cgroup.bench_function("VolatileWrite8Zeroizer", |b| { - b.iter(|| $bench_function(VolatileWrite8Zeroizer, &mut $array.0)) - }); - #[cfg(all(target_arch = "x86_64", target_feature = "sse2"))] - { - $cgroup.bench_function("X86_64Sse2Zeroizer", |b| { - b.iter(|| $bench_function(X86_64Sse2Zeroizer, &mut $array.0)) - }); - } #[cfg(all(target_arch = "x86_64", target_feature = "avx"))] { $cgroup.bench_function("X86_64AvxZeroizer", |b| { diff --git a/src/internals/zeroize.rs b/src/internals/zeroize.rs index 65bb31d..c2da080 100644 --- a/src/internals/zeroize.rs +++ b/src/internals/zeroize.rs @@ -1,35 +1,103 @@ //! Utility functions for securely wiping memory. //! -//! Contains wrappers around intrinsics and ffi functions necessary for the -//! [`crate::zeroize`] module. +//! Utility functions for the [`crate::zeroize`] module, to securely wiping +//! memory, implemented using cross-platform pure Rust volatile writes. -#[cfg(target_arch = "x86_64")] -mod asm_x86_64; -#[cfg(target_arch = "x86_64")] -pub use asm_x86_64::*; +use crate::macros::precondition_memory_range; +use crate::util::is_aligned_ptr_mut; +use mirai_annotations::debug_checked_precondition; -mod system; -pub use system::*; +/// Zeroize the memory pointed to by `ptr` and of size `len` bytes, by +/// overwriting it byte for byte using volatile writes. +/// +/// This is guarantied to be not elided by the compiler. +/// +/// # Safety +/// The caller *must* ensure that `ptr` is valid for writes of `len` bytes, see +/// the [`std::ptr`] documentation. In particular this function is not atomic. +pub unsafe fn volatile_write_zeroize(mut ptr: *mut u8, len: usize) { + precondition_memory_range!(ptr, len); + for _i in 0..len { + // SAFETY: `ptr` originally pointed into an allocation of `len` bytes so now, + // after `_i` steps `len - _i > 0` bytes are left, so `ptr` is valid for + // a byte write + unsafe { + core::ptr::write_volatile(ptr, 0u8); + } + // SAFETY: after increment, `ptr` points into the same allocation if `_i == len` + // or one byte past it, so `add` is sound + ptr = unsafe { ptr.add(1) }; + } +} -mod volatile_write; -pub use volatile_write::*; +/// Zeroize the memory pointed to by `ptr` for `len` rounded down to a multiple +/// of 8 bytes. +/// +/// This function rounds down `len` to a multiple of 8 and then zeroizes the +/// memory pointed to by `ptr` for that length. This operation is guarantied to +/// be not elided by the compiler. If `len` is a multiple of 8 then this +/// zeroizes the entire specified block of memory. Returns a pointer to the byte +/// after the last zeroed byte, with the provenance of `ptr`. +/// +/// # Safety +/// The caller *must* ensure that `ptr` is valid for writes of `len` bytes, see +/// the [`std::ptr`] documentation. In particular this function is not atomic. +/// +/// Furthermore, `ptr` *must* be at least 8 byte aligned. +pub unsafe fn zeroize_align8_block8(mut ptr: *mut u8, len: usize) -> *mut u8 { + precondition_memory_range!(ptr, len); + debug_checked_precondition!(is_aligned_ptr_mut(ptr, 8)); + + let nblocks = (len - len % 8) / 8; + for _i in 0..nblocks { + // SAFETY: `ptr` originally pointed into an allocation of `len` bytes so now, + // after `_i` steps `len - 8*_i >= 8` bytes are left, so `ptr` is valid + // for an 8 byte write SAFETY: `ptr` was originally 8 byte aligned by + // caller contract and we only added a multiple of 8 so it is still 8 + // byte aligned + unsafe { + core::ptr::write_volatile(ptr.cast::(), 0u64); + } + // SAFETY: after increment, `ptr` points into the same allocation or (if `8*_i + // == len`) at most one byte past it, so `add` is sound; `ptr` stays 8 + // byte aligned + ptr = unsafe { ptr.add(8) }; + } + ptr +} -/// Volatile write byte to memory. +/// Zeroize the memory pointed to by `ptr` and of size `len % 8` bytes. /// -/// This uses the [`core::intrinsics::volatile_set_memory`] intrinsic and can -/// only be used on nightly, with the `nightly` feature enabled. +/// This can be used to zeroize the bytes left unzeroized by +/// `zeroize_align8_block8` if `len` is not a multiple of 8. This operation is +/// guarantied to be not elided by the compiler. /// /// # Safety /// The caller *must* ensure that `ptr` is valid for writes of `len` bytes, see /// the [`std::ptr`] documentation. In particular this function is not atomic. -// In addition `ptr` needs to be properly aligned, but because we are talking -// about bytes (therefore byte alignment), it *always* is. -#[cfg(feature = "nightly_core_intrinsics")] -pub unsafe fn volatile_memset(ptr: *mut u8, val: u8, len: usize) { - crate::macros::precondition_memory_range!(ptr, len); - // SAFETY: the caller must uphold the safety contract - unsafe { - core::intrinsics::volatile_set_memory(ptr, val, len); +/// +/// Furthermore, `ptr` *must* be at least 4 byte aligned. +pub unsafe fn zeroize_align4_tail8(mut ptr: *mut u8, len: usize) { + precondition_memory_range!(ptr, len % 8); + debug_checked_precondition!(is_aligned_ptr_mut(ptr, 4)); + + if len % 8 >= 4 { + // SAFETY: `ptr` is valid for `len % 8` bytes by caller contract + // SAFETY: `ptr` is still 4 byte aligned by caller contract + unsafe { + core::ptr::write_volatile(ptr.cast::(), 0u32); + } + ptr = unsafe { ptr.add(4) }; + } + // the final remainder (at most 3 bytes) is zeroed byte-for-byte + // SAFETY: `ptr` has been incremented by a multiple of 4 <= `len` so `ptr` + // points to an allocation of `len % 4` bytes, so `ptr` can be written to + // and incremented `len % 4` times + for _i in 0..(len % 4) { + unsafe { + core::ptr::write_volatile(ptr, 0u8); + } + ptr = unsafe { ptr.add(1) }; } } @@ -76,135 +144,18 @@ mod tests { assert_eq!(&array[..], &expected[..]); } - #[cfg(feature = "nightly_core_intrinsics")] - #[test] - fn test_volatile_memset() { - test_b128_zeroizer(|ptr: *mut u8, len: usize| unsafe { volatile_memset(ptr, 0, len) }) - } - - #[cfg(any( - target_os = "freebsd", - target_os = "dragonfly", - target_os = "openbsd", - target_os = "netbsd", - target_os = "macos", - target_os = "ios", - target_env = "gnu", - target_env = "musl" - ))] - #[test] - #[cfg_attr(miri, ignore)] // ffi - fn test_explicit_bzero() { - test_b128_zeroizer(|ptr: *mut u8, len: usize| unsafe { libc_explicit_bzero(ptr, len) }) - } - - #[cfg(all(target_arch = "x86_64", target_feature = "ermsb"))] - #[test] - #[cfg_attr(miri, ignore)] // asm - fn test_asm_ermsb_zeroize() { - test_b128_zeroizer(|ptr: *mut u8, len: usize| unsafe { asm_ermsb_zeroize(ptr, len) }) - } - - #[cfg(all(target_arch = "x86_64", target_feature = "ermsb"))] #[test] fn test_volatile_write_zeroize() { - test_b128_zeroizer(|ptr: *mut u8, len: usize| unsafe { volatile_write_zeroize(ptr, len) }) + test_b128_zeroizer(|ptr, len| unsafe { volatile_write_zeroize(ptr, len) }) } - #[cfg(feature = "nightly_core_intrinsics")] #[test] - fn test_volatile_memset_lowalign() { - test_b239_lowalign_zeroizer(|ptr: *mut u8, len: usize| unsafe { - volatile_memset(ptr, 0, len) - }) - } - - #[cfg(any( - target_os = "freebsd", - target_os = "dragonfly", - target_os = "openbsd", - target_os = "netbsd", - target_os = "macos", - target_os = "ios", - target_env = "gnu", - target_env = "musl" - ))] - #[test] - #[cfg_attr(miri, ignore)] // ffi - fn test_explicit_bzero_lowalign() { - test_b239_lowalign_zeroizer(|ptr: *mut u8, len: usize| unsafe { - libc_explicit_bzero(ptr, len) - }) - } - - #[cfg(all(target_arch = "x86_64", target_feature = "ermsb"))] - #[test] - #[cfg_attr(miri, ignore)] // asm - fn test_asm_ermsb_zeroize_lowalign() { - test_b239_lowalign_zeroizer(|ptr: *mut u8, len: usize| unsafe { - asm_ermsb_zeroize(ptr, len) - }) - } - - #[cfg(all(target_arch = "x86_64", target_feature = "ermsb"))] - #[test] - fn test_volatile_write_zeroize_lowalign() { - test_b239_lowalign_zeroizer(|ptr: *mut u8, len: usize| unsafe { - volatile_write_zeroize(ptr, len) - }) + fn test_lowalign_volatile_write_zeroize() { + test_b239_lowalign_zeroizer(|ptr, len| unsafe { volatile_write_zeroize(ptr, len) }) } #[test] fn test_zeroize_align8_block8() { test_b257_align64_block_zeroizer(|ptr, len| unsafe { zeroize_align8_block8(ptr, len) }) } - - #[cfg(all(target_arch = "x86_64", target_feature = "sse2"))] - #[test] - #[cfg_attr(miri, ignore)] // asm - fn test_x86_64_simd16_zeroize_align16_block16() { - test_b257_align64_block_zeroizer(|ptr, len| unsafe { - x86_64_simd16_zeroize_align16_block16(ptr, len) - }) - } - - #[cfg(all(target_arch = "x86_64", target_feature = "sse2"))] - #[test] - #[cfg_attr(miri, ignore)] // asm - fn test_x86_64_simd16_unroll2_zeroize_align16_block16() { - test_b257_align64_block_zeroizer(|ptr, len| unsafe { - x86_64_simd16_unroll2_zeroize_align16_block16(ptr, len) - }) - } - - #[cfg(all(target_arch = "x86_64", target_feature = "avx2"))] - #[test] - #[cfg_attr(miri, ignore)] // asm - fn test_x86_64_simd32_zeroize_align32_block32() { - test_b257_align64_block_zeroizer(|ptr, len| unsafe { - x86_64_simd32_zeroize_align32_block32(ptr, len) - }) - } - - #[cfg(all(target_arch = "x86_64", target_feature = "avx2"))] - #[test] - #[cfg_attr(miri, ignore)] // asm - fn test_x86_64_simd32_unroll2_zeroize_align32_block32() { - test_b257_align64_block_zeroizer(|ptr, len| unsafe { - x86_64_simd32_unroll2_zeroize_align32_block32(ptr, len) - }) - } - - #[cfg(all( - target_arch = "x86_64", - target_feature = "avx512f", - feature = "nightly_stdsimd" - ))] - #[test] - #[cfg_attr(miri, ignore)] // asm - fn test_x86_64_simd64_zeroize_align64_block64() { - test_b257_align64_block_zeroizer(|ptr, len| unsafe { - x86_64_simd64_zeroize_align64_block64(ptr, len) - }) - } } diff --git a/src/internals/zeroize/asm_x86_64.rs b/src/internals/zeroize/asm_x86_64.rs deleted file mode 100644 index 69c3113..0000000 --- a/src/internals/zeroize/asm_x86_64.rs +++ /dev/null @@ -1,326 +0,0 @@ -//! Utility functions for securely wiping memory, implemented in asm for x86_64 -//! cpus. - -use crate::macros::precondition_memory_range; -use crate::util::is_aligned_ptr_mut; -use mirai_annotations::debug_checked_precondition; - -/// Overwrite memory with zeros. This operation will not be elided by the -/// compiler. -/// -/// This uses inline assembly in Rust. The implementation makes use of the -/// efficient `rep stosb` memory set functionality on modern x86_64 cpus. This -/// is very slow for small amounts of data but very efficient for zeroizing -/// large amounts of data (depending an CPU architecture though), works on -/// stable, and does not require a libc. -/// -/// # Safety -/// The caller *must* ensure that `ptr` is valid for writes of `len` bytes, see -/// the [`std::ptr`] documentation. In particular this function is not atomic. -// In addition `ptr` needs to be properly aligned, but because we are talking -// about bytes (therefore byte alignment), it *always* is. -#[cfg(all(target_arch = "x86_64", target_feature = "ermsb"))] -pub unsafe fn asm_ermsb_zeroize(ptr: *mut u8, len: usize) { - precondition_memory_range!(ptr, len); - - unsafe { - core::arch::asm!( - "rep stosb byte ptr es:[rdi], al", - // `len` in the rcx register - inout("rcx") len => _, - // `ptr` int the rdi register - inout("rdi") ptr => _, - // zero byte to al (first byte of rax) register - in("al") 0u8, - options(nostack), - ); - } -} - -/// Zeroize the memory pointed to by `ptr` for `len` rounded down to a multiple -/// of 16 bytes. -/// -/// This function rounds down `len` to a multiple of 16 and then zeroizes the -/// memory pointed to by `ptr` for that length. This operation is guarantied to -/// be not elided by the compiler. If `len` is a multiple of 16 then this -/// zeroizes the entire specified block of memory. Returns a pointer to the byte -/// after the last zeroed byte, with the provenance of `ptr`. -/// -/// This uses sse2 instructions in inline asm to zeroize the memory with blocks -/// of 16 bytes at a time. -/// -/// # Safety -/// The caller *must* ensure that `ptr` is valid for writes of `len` bytes, see -/// the [`std::ptr`] documentation. In particular this function is not atomic. -/// -/// Furthermore, `ptr` *must* be at least 16 byte aligned. -#[cfg(all(target_arch = "x86_64", target_feature = "sse2"))] -pub unsafe fn x86_64_simd16_zeroize_align16_block16(mut ptr: *mut u8, len: usize) -> *mut u8 { - use core::arch::x86_64 as arch; - - precondition_memory_range!(ptr, len); - debug_checked_precondition!(is_aligned_ptr_mut(ptr, 16)); - - let nblocks = (len - len % 16) / 16; - - for _i in 0..nblocks { - // SAFETY: `ptr` is valid for a `len >= nblocks*16` byte write, so we can write - // `nblocks` times 16 bytes and increment `ptr` by 16 bytes; `ptr` stays 16 byte - // aligned - unsafe { - // SAFETY: `ptr` originally pointed into an allocation of `len` bytes so now, - // after `_i` steps `len - 16*_i >= 16` bytes are left, so `ptr` is valid - // for a 16 byte write; also `ptr` is 16 byte aligned - core::arch::asm!( - " - /* write 16 zero bytes to ptr */ - vmovdqa xmmword ptr [{0}], {1} - ", - in(reg) ptr, - in(xmm_reg) arch::_mm_setzero_si128(), - options(nostack), - ); - // NOTE: increment `ptr` outside of the asm to maintain provenance - // SAFETY: this stays within the memory where `ptr` is valid for writes and - // maintains 16 byte alignment - ptr = ptr.add(16); - } - } - ptr -} - -/// Zeroize the memory pointed to by `ptr` for `len` rounded down to a multiple -/// of 16 bytes. -/// -/// This function rounds down `len` to a multiple of 16 and then zeroizes the -/// memory pointed to by `ptr` for that length. This operation is guarantied to -/// be not elided by the compiler. If `len` is a multiple of 16 then this -/// zeroizes the entire specified block of memory. Returns a pointer to the byte -/// after the last zeroed byte, with the provenance of `ptr`. -/// -/// This uses sse2 instructions in inline asm to zeroize the memory with blocks -/// of 16 bytes at a time. -/// -/// # Safety -/// The caller *must* ensure that `ptr` is valid for writes of `len` bytes, see -/// the [`std::ptr`] documentation. In particular this function is not atomic. -/// -/// Furthermore, `ptr` *must* be at least 16 byte aligned. -#[cfg(all(target_arch = "x86_64", target_feature = "sse2"))] -pub unsafe fn x86_64_simd16_unroll2_zeroize_align16_block16( - mut ptr: *mut u8, - len: usize, -) -> *mut u8 { - use core::arch::x86_64 as arch; - - precondition_memory_range!(ptr, len); - debug_checked_precondition!(is_aligned_ptr_mut(ptr, 16)); - - let nblocks = (len - len % 16) / 16; - - // SAFETY: `ptr` is valid for a `len >= nblocks*16` byte write, so we can write - // `nblocks` times 16 bytes and increment `ptr` by 16 bytes; `ptr` stays 16 byte - // aligned - for _i in 0..nblocks / 2 { - unsafe { - core::arch::asm!( - " - /* write 16 zero bytes to ptr */ - vmovdqa xmmword ptr [{0}], {1} - vmovdqa xmmword ptr [{0} + 16], {1} - ", - in(reg) ptr, - in(xmm_reg) arch::_mm_setzero_si128(), - options(nostack), - ); - ptr = ptr.add(32); - } - } - if nblocks % 2 == 1 { - unsafe { - core::arch::asm!( - " - /* write 16 zero bytes to ptr */ - vmovdqa xmmword ptr [{0}], {1} - ", - in(reg) ptr, - in(xmm_reg) arch::_mm_setzero_si128(), - options(nostack), - ); - ptr = ptr.add(16); - } - } - ptr -} - -/// Zeroize the memory pointed to by `ptr` for `len` rounded down to a multiple -/// of 32 bytes. -/// -/// This function rounds down `len` to a multiple of 32 and then zeroizes the -/// memory pointed to by `ptr` for that length. This operation is guarantied to -/// be not elided by the compiler. If `len` is a multiple of 32 then this -/// zeroizes the entire specified block of memory. Returns a pointer to the byte -/// after the last zeroed byte, with the provenance of `ptr`. -/// -/// This uses avx2 instructions in inline asm to zeroize the memory with blocks -/// of 32 bytes at a time. -/// -/// # Safety -/// The caller *must* ensure that `ptr` is valid for writes of `len` bytes, see -/// the [`std::ptr`] documentation. In particular this function is not atomic. -/// -/// Furthermore, `ptr` *must* be at least 32 byte aligned. -#[cfg(all(target_arch = "x86_64", target_feature = "avx"))] -pub unsafe fn x86_64_simd32_zeroize_align32_block32(mut ptr: *mut u8, len: usize) -> *mut u8 { - use core::arch::x86_64 as arch; - - precondition_memory_range!(ptr, len); - debug_checked_precondition!(is_aligned_ptr_mut(ptr, 32)); - - let nblocks = (len - len % 32) / 32; - - for _i in 0..nblocks { - // SAFETY: `ptr` is valid for a `len >= nblocks*32` byte write, so we can write - // `nblocks` times 32 bytes and increment `ptr` by 32 bytes; `ptr` stays 32 byte - // aligned - unsafe { - // SAFETY: `ptr` originally pointed into an allocation of `len` bytes so now, - // after `_i` steps `len - 32*_i >= 32` bytes are left, so `ptr` is valid - // for a 32 byte write; also `ptr` is 32 byte aligned - core::arch::asm!( - " - /* write 32 zero bytes to ptr */ - vmovdqa ymmword ptr [{0}], {1} - ", - in(reg) ptr, - in(ymm_reg) arch::_mm256_setzero_si256(), - options(nostack), - ); - // NOTE: increment `ptr` outside of the asm to maintain provenance - // SAFETY: this stays within the memory where `ptr` is valid for writes and - // maintains 32 byte alignment - ptr = ptr.add(32); - } - } - ptr -} - -/// Zeroize the memory pointed to by `ptr` for `len` rounded down to a multiple -/// of 32 bytes. -/// -/// This function rounds down `len` to a multiple of 32 and then zeroizes the -/// memory pointed to by `ptr` for that length. This operation is guarantied to -/// be not elided by the compiler. If `len` is a multiple of 32 then this -/// zeroizes the entire specified block of memory. Returns a pointer to the byte -/// after the last zeroed byte, with the provenance of `ptr`. -/// -/// This uses avx2 instructions in inline asm to zeroize the memory with blocks -/// of 32 bytes at a time. -/// -/// # Safety -/// The caller *must* ensure that `ptr` is valid for writes of `len` bytes, see -/// the [`std::ptr`] documentation. In particular this function is not atomic. -/// -/// Furthermore, `ptr` *must* be at least 32 byte aligned. -#[cfg(all(target_arch = "x86_64", target_feature = "avx"))] -pub unsafe fn x86_64_simd32_unroll2_zeroize_align32_block32( - mut ptr: *mut u8, - len: usize, -) -> *mut u8 { - use core::arch::x86_64 as arch; - - precondition_memory_range!(ptr, len); - debug_checked_precondition!(is_aligned_ptr_mut(ptr, 32)); - - let nblocks = (len - len % 32) / 32; - - // SAFETY: `ptr` is valid for a `len >= nblocks*32` byte write, so we can write - // `nblocks` times 32 bytes and increment `ptr` by 32 bytes; `ptr` stays 32 byte - // aligned - for _i in 0..(nblocks / 2) { - unsafe { - core::arch::asm!( - " - /* write 64 zero bytes to ptr */ - vmovdqa ymmword ptr [{0}], {1} - vmovdqa ymmword ptr [{0} + 32], {1} - ", - in(reg) ptr, - in(ymm_reg) arch::_mm256_setzero_si256(), - options(nostack), - ); - ptr = ptr.add(64); - } - } - if nblocks % 2 == 1 { - unsafe { - core::arch::asm!( - " - /* write 32 zero bytes to ptr */ - vmovdqa ymmword ptr [{0}], {1} - ", - in(reg) ptr, - in(ymm_reg) arch::_mm256_setzero_si256(), - options(nostack), - ); - ptr = ptr.add(32); - } - } - ptr -} - -/// Zeroize the memory pointed to by `ptr` for `len` rounded down to a multiple -/// of 64 bytes. -/// -/// This function rounds down `len` to a multiple of 64 and then zeroizes the -/// memory pointed to by `ptr` for that length. This operation is guarantied to -/// be not elided by the compiler. If `len` is a multiple of 64 then this -/// zeroizes the entire specified block of memory. Returns a pointer to the byte -/// after the last zeroed byte, with the provenance of `ptr`. -/// -/// This uses avx512 instructions in inline asm to zeroize the memory with -/// blocks of 64 bytes at a time. -/// -/// # Safety -/// The caller *must* ensure that `ptr` is valid for writes of `len` bytes, see -/// the [`std::ptr`] documentation. In particular this function is not atomic. -/// -/// Furthermore, `ptr` *must* be at least 64 byte aligned. -#[cfg(all( - target_arch = "x86_64", - target_feature = "avx512f", - feature = "nightly_stdsimd" -))] -pub unsafe fn x86_64_simd64_zeroize_align64_block64(mut ptr: *mut u8, len: usize) -> *mut u8 { - use core::arch::x86_64 as arch; - - precondition_memory_range!(ptr, len); - debug_checked_precondition!(is_aligned_ptr_mut(ptr, 64)); - - let nblocks = (len - len % 64) / 64; - - for _i in 0..nblocks { - // SAFETY: `ptr` is valid for a `len >= nblocks*64` byte write, so we can write - // `nblocks` times 64 bytes and increment `ptr` by 64 bytes; `ptr` stays 64 byte - // aligned - unsafe { - // SAFETY: `ptr` originally pointed into an allocation of `len` bytes so now, - // after `_i` steps `len - 64*_i >= 64` bytes are left, so `ptr` is valid - // for a 64 byte write; also `ptr` is 64 byte aligned - core::arch::asm!( - " - /* write 64 zero bytes to ptr */ - vmovdqa64 zmmword ptr [{0}], {1} - ", - in(reg) ptr, - in(zmm_reg) arch::_mm512_setzero_si512(), - options(nostack), - ); - // NOTE: increment `ptr` outside of the asm to maintain provenance - // SAFETY: this stays within the memory where `ptr` is valid for writes and - // maintains 64 byte alignment - ptr = ptr.add(64); - } - } - ptr -} diff --git a/src/internals/zeroize/system.rs b/src/internals/zeroize/system.rs deleted file mode 100644 index 287a595..0000000 --- a/src/internals/zeroize/system.rs +++ /dev/null @@ -1,104 +0,0 @@ -//! Bindings to system functions for securely wiping memory. - -use crate::macros::precondition_memory_range; - -/// Overwrite memory with zeros. This operation will not be elided by the -/// compiler. -/// -/// This uses the `explicit_bzero` function present in many recent libcs. -/// -/// # Safety -/// It's C. But the safety requirement is quite obvious: The caller *must* -/// ensure that `ptr` is valid for writes of `len` bytes, see the [`std::ptr`] -/// documentation. In particular this function is not atomic. -// In addition `ptr` needs to be properly aligned, but because we are talking -// about bytes (therefore byte alignment), it *always* is. -#[cfg(any( - target_os = "freebsd", - target_os = "dragonfly", - target_os = "openbsd", - all(target_env = "gnu", unix), - target_env = "musl" -))] -pub unsafe fn libc_explicit_bzero(ptr: *mut u8, len: usize) { - precondition_memory_range!(ptr, len); - // SAFETY: the caller must uphold the safety contract - unsafe { - libc::explicit_bzero(ptr as *mut libc::c_void, len as libc::size_t); - } -} - -/// Overwrite memory with zeros. This operation will not be elided by the -/// compiler. -/// -/// This uses the `explicit_bzero` function present in many recent libcs. -/// -/// # Safety -/// It's C. But the safety requirement is quite obvious: The caller *must* -/// ensure that `ptr` is valid for writes of `len` bytes, see the [`std::ptr`] -/// documentation. In particular this function is not atomic. -// In addition `ptr` needs to be properly aligned, but because we are talking -// about bytes (therefore byte alignment), it *always* is. -#[cfg(all(target_env = "gnu", windows))] -pub unsafe fn libc_explicit_bzero(ptr: *mut u8, len: usize) { - precondition_memory_range!(ptr, len); - extern "C" { - fn explicit_bzero(ptr: *mut libc::c_void, len: libc::size_t); - } - - // SAFETY: the caller must uphold the safety contract - unsafe { - explicit_bzero(ptr as *mut libc::c_void, len as libc::size_t); - } -} - -/// Overwrite memory with zeros. This operation will not be elided by the -/// compiler. -/// -/// This uses the `explicit_bzero` function present in many recent libcs. -/// -/// # Safety -/// It's C. But the safety requirement is quite obvious: The caller *must* -/// ensure that `ptr` is valid for writes of `len` bytes, see the [`std::ptr`] -/// documentation. In particular this function is not atomic. -// In addition `ptr` needs to be properly aligned, but because we are talking -// about bytes (therefore byte alignment), it *always* is. -#[cfg(target_os = "netbsd")] -pub unsafe fn libc_explicit_bzero(ptr: *mut u8, len: usize) { - precondition_memory_range!(ptr, len); - // SAFETY: the caller must uphold the safety contract - unsafe { - libc::explicit_memset( - ptr as *mut libc::c_void, - 0 as libc::c_int, - len as libc::size_t, - ); - } -} - -/// Overwrite memory with zeros. This operation will not be elided by the -/// compiler. -/// -/// This uses the `explicit_bzero` function present in many recent libcs. -/// -/// # Safety -/// It's C. But the safety requirement is quite obvious: The caller *must* -/// ensure that `ptr` is valid for writes of `len` bytes, see the [`std::ptr`] -/// documentation. In particular this function is not atomic. -// In addition `ptr` needs to be properly aligned, but because we are talking -// about bytes (therefore byte alignment), it *always* is. -#[cfg(any(target_os = "macos", target_os = "ios"))] -pub unsafe fn libc_explicit_bzero(ptr: *mut u8, len: usize) { - precondition_memory_range!(ptr, len); - // SAFETY: the caller must uphold the safety contract - unsafe { - // the zero value is a `c_int` (`i32` by default), but then converted to - // `unsigned char` (`u8`) - libc::memset_s( - ptr as *mut libc::c_void, - len as libc::size_t, - 0 as libc::c_int, - len as libc::size_t, - ); - } -} diff --git a/src/internals/zeroize/volatile_write.rs b/src/internals/zeroize/volatile_write.rs deleted file mode 100644 index 674356e..0000000 --- a/src/internals/zeroize/volatile_write.rs +++ /dev/null @@ -1,100 +0,0 @@ -//! Utility functions for securely wiping memory, implemented using -//! cross-platform volatile writes. - -use crate::macros::precondition_memory_range; -use crate::util::is_aligned_ptr_mut; -use mirai_annotations::debug_checked_precondition; - -/// Zeroize the memory pointed to by `ptr` and of size `len` bytes, by -/// overwriting it byte for byte using volatile writes. -/// -/// This is guarantied to be not elided by the compiler. -/// -/// # Safety -/// The caller *must* ensure that `ptr` is valid for writes of `len` bytes, see -/// the [`std::ptr`] documentation. In particular this function is not atomic. -pub unsafe fn volatile_write_zeroize(mut ptr: *mut u8, len: usize) { - precondition_memory_range!(ptr, len); - for _i in 0..len { - // SAFETY: `ptr` originally pointed into an allocation of `len` bytes so now, - // after `_i` steps `len - _i > 0` bytes are left, so `ptr` is valid for - // a byte write - unsafe { - core::ptr::write_volatile(ptr, 0u8); - } - // SAFETY: after increment, `ptr` points into the same allocation if `_i == len` - // or one byte past it, so `add` is sound - ptr = unsafe { ptr.add(1) }; - } -} - -/// Zeroize the memory pointed to by `ptr` for `len` rounded down to a multiple -/// of 8 bytes. -/// -/// This function rounds down `len` to a multiple of 8 and then zeroizes the -/// memory pointed to by `ptr` for that length. This operation is guarantied to -/// be not elided by the compiler. If `len` is a multiple of 8 then this -/// zeroizes the entire specified block of memory. Returns a pointer to the byte -/// after the last zeroed byte, with the provenance of `ptr`. -/// -/// # Safety -/// The caller *must* ensure that `ptr` is valid for writes of `len` bytes, see -/// the [`std::ptr`] documentation. In particular this function is not atomic. -/// -/// Furthermore, `ptr` *must* be at least 8 byte aligned. -pub unsafe fn zeroize_align8_block8(mut ptr: *mut u8, len: usize) -> *mut u8 { - precondition_memory_range!(ptr, len); - debug_checked_precondition!(is_aligned_ptr_mut(ptr, 8)); - - let nblocks = (len - len % 8) / 8; - for _i in 0..nblocks { - // SAFETY: `ptr` originally pointed into an allocation of `len` bytes so now, - // after `_i` steps `len - 8*_i >= 8` bytes are left, so `ptr` is valid - // for an 8 byte write SAFETY: `ptr` was originally 8 byte aligned by - // caller contract and we only added a multiple of 8 so it is still 8 - // byte aligned - unsafe { - core::ptr::write_volatile(ptr.cast::(), 0u64); - } - // SAFETY: after increment, `ptr` points into the same allocation or (if `8*_i - // == len`) at most one byte past it, so `add` is sound; `ptr` stays 8 - // byte aligned - ptr = unsafe { ptr.add(8) }; - } - ptr -} - -/// Zeroize the memory pointed to by `ptr` and of size `len % 8` bytes. -/// -/// This can be used to zeroize the bytes left unzeroized by -/// `zeroize_align8_block8` if `len` is not a multiple of 8. This operation is -/// guarantied to be not elided by the compiler. -/// -/// # Safety -/// The caller *must* ensure that `ptr` is valid for writes of `len` bytes, see -/// the [`std::ptr`] documentation. In particular this function is not atomic. -/// -/// Furthermore, `ptr` *must* be at least 4 byte aligned. -pub unsafe fn zeroize_align4_tail8(mut ptr: *mut u8, len: usize) { - precondition_memory_range!(ptr, len % 8); - debug_checked_precondition!(is_aligned_ptr_mut(ptr, 4)); - - if len % 8 >= 4 { - // SAFETY: `ptr` is valid for `len % 8` bytes by caller contract - // SAFETY: `ptr` is still 4 byte aligned by caller contract - unsafe { - core::ptr::write_volatile(ptr.cast::(), 0u32); - } - ptr = unsafe { ptr.add(4) }; - } - // the final remainder (at most 3 bytes) is zeroed byte-for-byte - // SAFETY: `ptr` has been incremented by a multiple of 4 <= `len` so `ptr` - // points to an allocation of `len % 4` bytes, so `ptr` can be written to - // and incremented `len % 4` times - for _i in 0..(len % 4) { - unsafe { - core::ptr::write_volatile(ptr, 0u8); - } - ptr = unsafe { ptr.add(1) }; - } -} diff --git a/src/zeroize.rs b/src/zeroize.rs index 3c2f8b1..7be5e02 100644 --- a/src/zeroize.rs +++ b/src/zeroize.rs @@ -72,43 +72,9 @@ cfg_if::cfg_if! { /// the selected features and the version of this library. pub type DefaultMemZeroizer = VolatileMemsetZeroizer; pub(crate) use VolatileMemsetZeroizer as DefaultMemZeroizerConstructor; - } else if #[cfg(any( - target_os = "freebsd", - target_os = "dragonfly", - target_os = "openbsd", - target_os = "netbsd", - target_os = "macos", - target_os = "ios", - target_env = "gnu", - target_env = "musl" - ))] { - /// Best (i.e. fastest) [`MemZeroizer`] available for the target. - /// - /// Which [`MemZeroizer`] this is is an implementation detail, can depend on the target and - /// the selected features and the version of this library. - pub type DefaultMemZeroizer = LibcZeroizer; - pub(crate) use LibcZeroizer as DefaultMemZeroizerConstructor; - } else if #[cfg(all(target_arch = "x86_64", target_feature = "avx"))] { - /// Best (i.e. fastest) [`MemZeroizer`] available for the target. - /// - /// Which [`MemZeroizer`] this is is an implementation detail, can depend on the target and - /// the selected features and the version of this library. - pub type DefaultMemZeroizer = X86_64AvxZeroizer; - pub(crate) use X86_64AvxZeroizer as DefaultMemZeroizerConstructor; - } else if #[cfg(all(target_arch = "x86_64", target_feature = "sse2"))] { - /// Best (i.e. fastest) [`MemZeroizer`] available for the target. - /// - /// Which [`MemZeroizer`] this is is an implementation detail, can depend on the target and - /// the selected features and the version of this library. - pub type DefaultMemZeroizer = X86_64Sse2Zeroizer; - pub(crate) use X86_64Sse2Zeroizer as DefaultMemZeroizerConstructor; } else { - /// Best (i.e. fastest) [`MemZeroizer`] available for the target. - /// - /// Which [`MemZeroizer`] this is is an implementation detail, can depend on the target and - /// the selected features and the version of this library. - pub type DefaultMemZeroizer = VolatileWrite8Zeroizer; - pub(crate) use VolatileWrite8Zeroizer as DefaultMemZeroizerConstructor; + pub type DefaultMemZeroizer = MemsetAsmBarierZeroizer; + pub(crate) use MemsetAsmBarierZeroizer as DefaultMemZeroizerConstructor; } } @@ -118,12 +84,6 @@ pub(crate) use VolatileWrite8Zeroizer as TestZeroizer; /// This zeroizer uses the volatile memset intrinsic which does not /// yet have a stable counterpart. It should be very fast, but requires /// nightly. -/// -/// In addition to the volatile write we place a compiler fence right next to -/// the volatile write. This should not be necessary for secure zeroization -/// since the volatile semantics guarenties our writes are not elided, and they -/// can not be delayed since we are deallocating the memory after zeroization. -/// The use of this fence is therefore only a precaution. #[cfg(feature = "nightly_core_intrinsics")] #[derive(Debug, Copy, Clone, Default)] pub struct VolatileMemsetZeroizer; @@ -133,124 +93,41 @@ impl MemZeroizer for VolatileMemsetZeroizer { unsafe fn zeroize_mem_blocks(&self, ptr: *mut u8, len: usize) { precondition_memory_range!(ptr, len); debug_precondition_logaligned!(A, ptr); - // SAFETY: the caller must uphold the safety contract of - // `internals::volatile_memset` - unsafe { - internals::volatile_memset(ptr, 0, len); - } - fence(); - } -} - -/// This zeroizer uses volatile zeroization functions provided by libc. -/// It should be fast but is only available on certain platforms. -/// -/// In addition to the volatile write we place a compiler fence right next to -/// the volatile write. This should not be necessary for secure zeroization -/// since the volatile semantics guarenties our writes are not elided, and they -/// can not be delayed since we are deallocating the memory after zeroization. -/// The use of this fence is therefore only a precaution. -#[cfg(any( - target_os = "freebsd", - target_os = "dragonfly", - target_os = "openbsd", - target_os = "netbsd", - target_os = "macos", - target_os = "ios", - target_env = "gnu", - target_env = "musl" -))] -#[derive(Debug, Copy, Clone, Default)] -pub struct LibcZeroizer; - -#[cfg(any( - target_os = "freebsd", - target_os = "dragonfly", - target_os = "openbsd", - target_os = "netbsd", - target_os = "macos", - target_os = "ios", - target_env = "gnu", - target_env = "musl" -))] -impl MemZeroizer for LibcZeroizer { - unsafe fn zeroize_mem_blocks(&self, ptr: *mut u8, len: usize) { - precondition_memory_range!(ptr, len); - debug_precondition_logaligned!(A, ptr); - debug_precondition_logmultiple!(B, len); - // SAFETY: the caller must uphold the safety contract of - // `internals::libc_explicit_bzero` - unsafe { - internals::libc_explicit_bzero(ptr, len); - } - fence(); - } -} - -/// This zeroizer uses volatile assembly (`rep stosb`) for modern x86_64, -/// performing very well for large amounts of memory. To make this available on -/// stable, it uses a C compiler at build time. -/// -/// In addition to the volatile write we place a compiler fence right next to -/// the volatile write. This should not be necessary for secure zeroization -/// since the volatile semantics guarenties our writes are not elided, and they -/// can not be delayed since we are deallocating the memory after zeroization. -/// The use of this fence is therefore only a precaution. -#[cfg(all(target_arch = "x86_64", target_feature = "ermsb"))] -#[derive(Debug, Copy, Clone, Default)] -pub struct AsmRepStosZeroizer; - -#[cfg(all(target_arch = "x86_64", target_feature = "ermsb"))] -impl MemZeroizer for AsmRepStosZeroizer { - unsafe fn zeroize_mem_blocks(&self, ptr: *mut u8, len: usize) { - precondition_memory_range!(ptr, len); - debug_precondition_logaligned!(A, ptr); - debug_precondition_logmultiple!(B, len); - // SAFETY: the caller must uphold the safety contract of - // `internals::asm_ermsb_zeroize` + // SAFETY: the caller must uphold the safety contract unsafe { - internals::asm_ermsb_zeroize(ptr, len); + core::intrinsics::volatile_set_memory(ptr, 0, len); } - fence(); } } -/// This zeroizer uses a volatile write per byte. This zeroization technique is -/// similar to the `zeroize` crate, available for all target platforms on -/// stable, but extremely slow. -/// -/// In addition to the volatile write we place a compiler fence right next to -/// the volatile write. This should not be necessary for secure zeroization -/// since the volatile semantics guarenties our writes are not elided, and they -/// can not be delayed since we are deallocating the memory after zeroization. -/// The use of this fence is therefore only a precaution. +/// This zeroizer uses a non-volatile memset, followed by an empty asm block +/// acting as an optimisation barier. It should be very fast, and according to +/// my current understanding of the op.sem. the compiler is not allowed to +/// remove the writes. #[derive(Debug, Copy, Clone, Default)] -pub struct VolatileWriteZeroizer; +pub struct MemsetAsmBarierZeroizer; -impl MemZeroizer for VolatileWriteZeroizer { +impl MemZeroizer for MemsetAsmBarierZeroizer { unsafe fn zeroize_mem_blocks(&self, ptr: *mut u8, len: usize) { precondition_memory_range!(ptr, len); debug_precondition_logaligned!(A, ptr); - debug_precondition_logmultiple!(B, len); - // SAFETY: the caller must uphold the safety contract of - // `volatile_write_zeroize_mem` + // SAFETY: the caller must uphold the safety contract of `write_bytes` + unsafe { ptr.write_bytes(0, len) }; + // Optimisation barier, so the writes can not be optimised out unsafe { - internals::volatile_write_zeroize(ptr, len); - } - fence(); + core::arch::asm!( + "/* {0} */", + in(reg) ptr, + options(nostack, readonly, preserves_flags), + ) + }; } } /// This zeroizer uses a volatile write per 8 bytes if the pointer is 8 byte /// aligned, and otherwise uses `VolatileWriteZeroizer`. This zeroization -/// technique is available for all target platforms on stable, but not very -/// fast. -/// -/// In addition to the volatile write we place a compiler fence right next to -/// the volatile write. This should not be necessary for secure zeroization -/// since the volatile semantics guarenties our writes are not elided, and they -/// can not be delayed since we are deallocating the memory after zeroization. -/// The use of this fence is therefore only a precaution. +/// technique is pure Rust and available for all target platforms on stable, but +/// not very fast. /// /// This zeroization method can benefit (in terms of performance) from using the /// [`MemZeroizer::zeroize_mem_blocks`] function instead of @@ -279,132 +156,8 @@ impl MemZeroizer for VolatileWrite8Zeroizer { internals::volatile_write_zeroize(ptr, len); } } - fence(); - } -} - -/// This zeroizer uses inline asm with avx2 instructions if the pointer is 32 -/// byte aligned, and otherwise uses `VolatileWrite8Zeroizer`. This zeroization -/// technique is available for x86_64 platforms with avx2 cpu support on stable, -/// and reasonably fast for 32 byte aligned pointers. -/// -/// In addition to the volatile write we place a compiler fence right next to -/// the volatile write. This should not be necessary for secure zeroization -/// since the volatile semantics guarenties our writes are not elided, and they -/// can not be delayed since we are deallocating the memory after zeroization. -/// The use of this fence is therefore only a precaution. -/// -/// This zeroization method can benefit (in terms of performance) from using the -/// [`MemZeroizer::zeroize_mem_blocks`] function instead of -/// [`MemZeroizer::zeroize_mem`] function if a minimum alignment is known -/// at compile time. -#[cfg(all(target_arch = "x86_64", target_feature = "avx"))] -#[derive(Debug, Copy, Clone, Default)] -pub struct X86_64AvxZeroizer; - -#[cfg(all(target_arch = "x86_64", target_feature = "avx"))] -impl MemZeroizer for X86_64AvxZeroizer { - unsafe fn zeroize_mem_blocks(&self, mut ptr: *mut u8, len: usize) { - precondition_memory_range!(ptr, len); - debug_precondition_logaligned!(A, ptr); - debug_precondition_logmultiple!(B, len); - // if we have 32 = 2^5 byte alignment then write 32 bytes at a time, - // with 8 = 2^3 byte align do 8 bytes at a time, otherwise 1 byte at a time - if (A >= 5) | is_aligned_ptr_mut(ptr, 32) { - // SAFETY: `ptr` is 32 byte aligned - ptr = unsafe { internals::x86_64_simd32_unroll2_zeroize_align32_block32(ptr, len) }; - // zeroize tail - if B < 5 { - ptr = unsafe { internals::zeroize_align8_block8(ptr, len % 32) }; - } - if B < 3 { - unsafe { internals::zeroize_align4_tail8(ptr, len % 8) }; - } - } else if (A >= 3) | is_aligned_ptr_mut(ptr, 8) { - // SAFETY: `ptr` is 8 byte aligned - ptr = unsafe { internals::zeroize_align8_block8(ptr, len) }; - if B < 3 { - unsafe { internals::zeroize_align4_tail8(ptr, len % 8) }; - } - } else { - // SAFETY: no alignment requirement - unsafe { - internals::volatile_write_zeroize(ptr, len); - } - } - fence(); - } -} - -/// This zeroizer uses inline asm with sse2 instructions if the pointer is 16 -/// byte aligned, and otherwise uses `VolatileWrite8Zeroizer`. This zeroization -/// technique is available for x86_64 platforms with sse2 cpu support on stable, -/// and reasonably fast for 16 byte aligned pointers. -/// -/// In addition to the volatile write we place a compiler fence right next to -/// the volatile write. This should not be necessary for secure zeroization -/// since the volatile semantics guarenties our writes are not elided, and they -/// can not be delayed since we are deallocating the memory after zeroization. -/// The use of this fence is therefore only a precaution. -/// -/// This zeroization method can benefit (in terms of performance) from using the -/// [`MemZeroizer::zeroize_mem_blocks`] function instead of -/// [`MemZeroizer::zeroize_mem`] function if a minimum alignment is known -/// at compile time. -#[cfg(all(target_arch = "x86_64", target_feature = "sse2"))] -#[derive(Debug, Copy, Clone, Default)] -pub struct X86_64Sse2Zeroizer; - -#[cfg(all(target_arch = "x86_64", target_feature = "sse2"))] -impl MemZeroizer for X86_64Sse2Zeroizer { - unsafe fn zeroize_mem_blocks(&self, mut ptr: *mut u8, len: usize) { - precondition_memory_range!(ptr, len); - debug_precondition_logaligned!(A, ptr); - debug_precondition_logmultiple!(B, len); - // if we have 16 = 2^4 byte alignment then write 16 bytes at a time, - // with 8 = 2^3 byte align do 8 bytes at a time, otherwise 1 byte at a time - if (A >= 4) | is_aligned_ptr_mut(ptr, 16) { - // SAFETY: `ptr` is 16 byte aligned - - ptr = unsafe { internals::x86_64_simd16_unroll2_zeroize_align16_block16(ptr, len) }; - // zeroize tail - if B < 4 { - ptr = unsafe { internals::zeroize_align8_block8(ptr, len % 16) }; - } - if B < 3 { - unsafe { internals::zeroize_align4_tail8(ptr, len % 8) }; - } - } else if (A >= 3) | is_aligned_ptr_mut(ptr, 8) { - // SAFETY: `ptr` is 8 byte aligned - ptr = unsafe { internals::zeroize_align8_block8(ptr, len) }; - if B < 3 { - unsafe { internals::zeroize_align4_tail8(ptr, len % 8) }; - } - } else { - // SAFETY: no alignment requirement - unsafe { - internals::volatile_write_zeroize(ptr, len); - } - } - fence(); } } -/// Compiler fence. -/// -/// Forces sequentially consistent access across this fence at compile time. At -/// runtime the CPU can still reorder memory accesses. This should not be -/// necessary for secure zeroization since the volatile semantics guaranties our -/// writes are not elided, and they can not be delayed since we are deallocating -/// the memory after zeroization. The use of this fence is therefore only a -/// precaution. For the same reasons it probably does not add security, it also -/// probably does not hurt performance significantly. -#[inline] -fn fence() { - use core::sync::atomic::{compiler_fence, Ordering}; - - compiler_fence(Ordering::SeqCst); -} - #[cfg(test)] mod tests; diff --git a/src/zeroize/tests.rs b/src/zeroize/tests.rs index 8571cc0..ed0ba44 100644 --- a/src/zeroize/tests.rs +++ b/src/zeroize/tests.rs @@ -33,98 +33,18 @@ fn test_b127_volatile_memset_zeroizer() { test_b127_zeroizer(VolatileMemsetZeroizer); } -#[cfg(any( - target_os = "freebsd", - target_os = "dragonfly", - target_os = "openbsd", - target_os = "netbsd", - target_os = "macos", - target_os = "ios", - target_env = "gnu", - target_env = "musl" -))] -#[test] -#[cfg_attr(miri, ignore)] // ffi -fn test_b127_libc_zeroizer() { - test_b127_zeroizer(LibcZeroizer); -} - -#[cfg(all(target_arch = "x86_64", target_feature = "ermsb"))] -#[test] -#[cfg_attr(miri, ignore)] // ffi, asm -fn test_b127_asm_rep_stos_zeroizer() { - test_b127_zeroizer(AsmRepStosZeroizer); -} - -#[test] -fn test_b127_volatile_write_zeroizer() { - test_b127_zeroizer(VolatileWriteZeroizer); -} - #[test] fn test_b127_volatile_write8_zeroizer() { test_b127_zeroizer(VolatileWrite8Zeroizer); } -#[cfg(all(target_arch = "x86_64", target_feature = "avx"))] -#[test] -fn test_b127_x86_64_avx_zeroizer() { - test_b127_zeroizer(X86_64AvxZeroizer); -} - -#[cfg(all(target_arch = "x86_64", target_feature = "sse2"))] -#[test] -fn test_b127_x86_64_sse2_zeroizer() { - test_b127_zeroizer(X86_64Sse2Zeroizer); -} - #[cfg(feature = "nightly_core_intrinsics")] #[test] fn test_b239_lowalign_volatile_memset_zeroizer() { test_b239_lowalign_zeroizer(VolatileMemsetZeroizer); } -#[cfg(any( - target_os = "freebsd", - target_os = "dragonfly", - target_os = "openbsd", - target_os = "netbsd", - target_os = "macos", - target_os = "ios", - target_env = "gnu", - target_env = "musl" -))] -#[test] -#[cfg_attr(miri, ignore)] // ffi -fn test_b239_lowalign_libc_zeroizer() { - test_b239_lowalign_zeroizer(LibcZeroizer); -} - -#[cfg(all(target_arch = "x86_64", target_feature = "ermsb"))] -#[test] -#[cfg_attr(miri, ignore)] // ffi, asm -fn test_b239_lowalign_asm_rep_stos_zeroizer() { - test_b239_lowalign_zeroizer(AsmRepStosZeroizer); -} - -#[test] -fn test_b239_lowalign_volatile_write_zeroizer() { - test_b239_lowalign_zeroizer(VolatileWriteZeroizer); -} - #[test] fn test_b239_lowalign_volatile_write8_zeroizer() { test_b239_lowalign_zeroizer(VolatileWrite8Zeroizer); } - -#[cfg(all(target_arch = "x86_64", target_feature = "avx"))] -#[test] -fn test_b239_lowalign_x86_64_avx_zeroizer() { - test_b239_lowalign_zeroizer(X86_64AvxZeroizer); -} - -#[cfg(all(target_arch = "x86_64", target_feature = "sse2"))] -#[test] -fn test_b239_lowalign_x86_64_sse2_zeroizer() { - test_b239_lowalign_zeroizer(X86_64Sse2Zeroizer); -}