Skip to content

Commit

Permalink
Introduce zeroizer based on (non-volatile) write_bytes and asm! o…
Browse files Browse the repository at this point in the history
…ptimisation barier and remove many slower ones

*Breaking:* Removes many zeroizers previously available.

This new zeroizer cannot be optimised out according to my current understanding of the op.sem. of Rust.
It can be implemented on stable, is target-independent, and faster than all our handwritten ones.
Therefore we can remove all the handwritten target-specific ones. This is good, since their implementation
turned out to be prone to mistakes.
  • Loading branch information
niluxv committed May 2, 2024
1 parent f0e489f commit 6519b13
Show file tree
Hide file tree
Showing 8 changed files with 116 additions and 1,042 deletions.
1 change: 0 additions & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,6 @@ dev = ["std"]
[dependencies]
allocator-api2 = { version = "0.2", default-features = false }
cfg-if = "1.0"
libc = "0.2"
mirai-annotations = "1.12"
sptr = "0.3"
thiserror = { version = "1.0", optional = true }
Expand Down
25 changes: 3 additions & 22 deletions benches/bench_zeroizers.rs
Original file line number Diff line number Diff line change
@@ -1,13 +1,6 @@
use criterion::{criterion_group, criterion_main, Criterion};
#[cfg(all(target_arch = "x86_64", target_feature = "ermsb"))]
use secmem_alloc::zeroize::AsmRepStosZeroizer;
#[cfg(all(target_arch = "x86_64", target_feature = "avx"))]
use secmem_alloc::zeroize::X86_64AvxZeroizer;
#[cfg(all(target_arch = "x86_64", target_feature = "sse2"))]
use secmem_alloc::zeroize::X86_64Sse2Zeroizer;
use secmem_alloc::zeroize::{
LibcZeroizer, MemZeroizer, VolatileMemsetZeroizer, VolatileWrite8Zeroizer,
VolatileWriteZeroizer,
MemZeroizer, MemsetAsmBarierZeroizer, VolatileMemsetZeroizer, VolatileWrite8Zeroizer,
};

fn zeroize_b127<Z: MemZeroizer>(z: Z, array: &mut [u8; 127]) {
Expand Down Expand Up @@ -50,21 +43,9 @@ macro_rules! bench_zeroizers {
$cgroup.bench_function("VolatileMemsetZeroizer", |b| {
b.iter(|| $bench_function(VolatileMemsetZeroizer, &mut $array.0))
});
$cgroup.bench_function("LibcZeroizer", |b| {
b.iter(|| $bench_function(LibcZeroizer, &mut $array.0))
$cgroup.bench_function("MemsetAsmBarierZeroizer", |b| {
b.iter(|| $bench_function(MemsetAsmBarierZeroizer, &mut $array.0))
});
$cgroup.bench_function("VolatileWriteZeroizer", |b| {
b.iter(|| $bench_function(VolatileWriteZeroizer, &mut $array.0))
});
$cgroup.bench_function("VolatileWrite8Zeroizer", |b| {
b.iter(|| $bench_function(VolatileWrite8Zeroizer, &mut $array.0))
});
#[cfg(all(target_arch = "x86_64", target_feature = "sse2"))]
{
$cgroup.bench_function("X86_64Sse2Zeroizer", |b| {
b.iter(|| $bench_function(X86_64Sse2Zeroizer, &mut $array.0))
});
}
#[cfg(all(target_arch = "x86_64", target_feature = "avx"))]
{
$cgroup.bench_function("X86_64AvxZeroizer", |b| {
Expand Down
233 changes: 92 additions & 141 deletions src/internals/zeroize.rs
Original file line number Diff line number Diff line change
@@ -1,35 +1,103 @@
//! Utility functions for securely wiping memory.
//!
//! Contains wrappers around intrinsics and ffi functions necessary for the
//! [`crate::zeroize`] module.
//! Utility functions for the [`crate::zeroize`] module, to securely wiping
//! memory, implemented using cross-platform pure Rust volatile writes.
#[cfg(target_arch = "x86_64")]
mod asm_x86_64;
#[cfg(target_arch = "x86_64")]
pub use asm_x86_64::*;
use crate::macros::precondition_memory_range;
use crate::util::is_aligned_ptr_mut;
use mirai_annotations::debug_checked_precondition;

mod system;
pub use system::*;
/// Zeroize the memory pointed to by `ptr` and of size `len` bytes, by
/// overwriting it byte for byte using volatile writes.
///
/// This is guarantied to be not elided by the compiler.
///
/// # Safety
/// The caller *must* ensure that `ptr` is valid for writes of `len` bytes, see
/// the [`std::ptr`] documentation. In particular this function is not atomic.
pub unsafe fn volatile_write_zeroize(mut ptr: *mut u8, len: usize) {
precondition_memory_range!(ptr, len);
for _i in 0..len {
// SAFETY: `ptr` originally pointed into an allocation of `len` bytes so now,
// after `_i` steps `len - _i > 0` bytes are left, so `ptr` is valid for
// a byte write
unsafe {
core::ptr::write_volatile(ptr, 0u8);
}
// SAFETY: after increment, `ptr` points into the same allocation if `_i == len`
// or one byte past it, so `add` is sound
ptr = unsafe { ptr.add(1) };
}
}

mod volatile_write;
pub use volatile_write::*;
/// Zeroize the memory pointed to by `ptr` for `len` rounded down to a multiple
/// of 8 bytes.
///
/// This function rounds down `len` to a multiple of 8 and then zeroizes the
/// memory pointed to by `ptr` for that length. This operation is guarantied to
/// be not elided by the compiler. If `len` is a multiple of 8 then this
/// zeroizes the entire specified block of memory. Returns a pointer to the byte
/// after the last zeroed byte, with the provenance of `ptr`.
///
/// # Safety
/// The caller *must* ensure that `ptr` is valid for writes of `len` bytes, see
/// the [`std::ptr`] documentation. In particular this function is not atomic.
///
/// Furthermore, `ptr` *must* be at least 8 byte aligned.
pub unsafe fn zeroize_align8_block8(mut ptr: *mut u8, len: usize) -> *mut u8 {
precondition_memory_range!(ptr, len);
debug_checked_precondition!(is_aligned_ptr_mut(ptr, 8));

let nblocks = (len - len % 8) / 8;
for _i in 0..nblocks {
// SAFETY: `ptr` originally pointed into an allocation of `len` bytes so now,
// after `_i` steps `len - 8*_i >= 8` bytes are left, so `ptr` is valid
// for an 8 byte write SAFETY: `ptr` was originally 8 byte aligned by
// caller contract and we only added a multiple of 8 so it is still 8
// byte aligned
unsafe {
core::ptr::write_volatile(ptr.cast::<u64>(), 0u64);
}
// SAFETY: after increment, `ptr` points into the same allocation or (if `8*_i
// == len`) at most one byte past it, so `add` is sound; `ptr` stays 8
// byte aligned
ptr = unsafe { ptr.add(8) };
}
ptr
}

/// Volatile write byte to memory.
/// Zeroize the memory pointed to by `ptr` and of size `len % 8` bytes.
///
/// This uses the [`core::intrinsics::volatile_set_memory`] intrinsic and can
/// only be used on nightly, with the `nightly` feature enabled.
/// This can be used to zeroize the bytes left unzeroized by
/// `zeroize_align8_block8` if `len` is not a multiple of 8. This operation is
/// guarantied to be not elided by the compiler.
///
/// # Safety
/// The caller *must* ensure that `ptr` is valid for writes of `len` bytes, see
/// the [`std::ptr`] documentation. In particular this function is not atomic.
// In addition `ptr` needs to be properly aligned, but because we are talking
// about bytes (therefore byte alignment), it *always* is.
#[cfg(feature = "nightly_core_intrinsics")]
pub unsafe fn volatile_memset(ptr: *mut u8, val: u8, len: usize) {
crate::macros::precondition_memory_range!(ptr, len);
// SAFETY: the caller must uphold the safety contract
unsafe {
core::intrinsics::volatile_set_memory(ptr, val, len);
///
/// Furthermore, `ptr` *must* be at least 4 byte aligned.
pub unsafe fn zeroize_align4_tail8(mut ptr: *mut u8, len: usize) {
precondition_memory_range!(ptr, len % 8);
debug_checked_precondition!(is_aligned_ptr_mut(ptr, 4));

if len % 8 >= 4 {
// SAFETY: `ptr` is valid for `len % 8` bytes by caller contract
// SAFETY: `ptr` is still 4 byte aligned by caller contract
unsafe {
core::ptr::write_volatile(ptr.cast::<u32>(), 0u32);
}
ptr = unsafe { ptr.add(4) };
}
// the final remainder (at most 3 bytes) is zeroed byte-for-byte
// SAFETY: `ptr` has been incremented by a multiple of 4 <= `len` so `ptr`
// points to an allocation of `len % 4` bytes, so `ptr` can be written to
// and incremented `len % 4` times
for _i in 0..(len % 4) {
unsafe {
core::ptr::write_volatile(ptr, 0u8);
}
ptr = unsafe { ptr.add(1) };
}
}

Expand Down Expand Up @@ -76,135 +144,18 @@ mod tests {
assert_eq!(&array[..], &expected[..]);
}

#[cfg(feature = "nightly_core_intrinsics")]
#[test]
fn test_volatile_memset() {
test_b128_zeroizer(|ptr: *mut u8, len: usize| unsafe { volatile_memset(ptr, 0, len) })
}

#[cfg(any(
target_os = "freebsd",
target_os = "dragonfly",
target_os = "openbsd",
target_os = "netbsd",
target_os = "macos",
target_os = "ios",
target_env = "gnu",
target_env = "musl"
))]
#[test]
#[cfg_attr(miri, ignore)] // ffi
fn test_explicit_bzero() {
test_b128_zeroizer(|ptr: *mut u8, len: usize| unsafe { libc_explicit_bzero(ptr, len) })
}

#[cfg(all(target_arch = "x86_64", target_feature = "ermsb"))]
#[test]
#[cfg_attr(miri, ignore)] // asm
fn test_asm_ermsb_zeroize() {
test_b128_zeroizer(|ptr: *mut u8, len: usize| unsafe { asm_ermsb_zeroize(ptr, len) })
}

#[cfg(all(target_arch = "x86_64", target_feature = "ermsb"))]
#[test]
fn test_volatile_write_zeroize() {
test_b128_zeroizer(|ptr: *mut u8, len: usize| unsafe { volatile_write_zeroize(ptr, len) })
test_b128_zeroizer(|ptr, len| unsafe { volatile_write_zeroize(ptr, len) })
}

#[cfg(feature = "nightly_core_intrinsics")]
#[test]
fn test_volatile_memset_lowalign() {
test_b239_lowalign_zeroizer(|ptr: *mut u8, len: usize| unsafe {
volatile_memset(ptr, 0, len)
})
}

#[cfg(any(
target_os = "freebsd",
target_os = "dragonfly",
target_os = "openbsd",
target_os = "netbsd",
target_os = "macos",
target_os = "ios",
target_env = "gnu",
target_env = "musl"
))]
#[test]
#[cfg_attr(miri, ignore)] // ffi
fn test_explicit_bzero_lowalign() {
test_b239_lowalign_zeroizer(|ptr: *mut u8, len: usize| unsafe {
libc_explicit_bzero(ptr, len)
})
}

#[cfg(all(target_arch = "x86_64", target_feature = "ermsb"))]
#[test]
#[cfg_attr(miri, ignore)] // asm
fn test_asm_ermsb_zeroize_lowalign() {
test_b239_lowalign_zeroizer(|ptr: *mut u8, len: usize| unsafe {
asm_ermsb_zeroize(ptr, len)
})
}

#[cfg(all(target_arch = "x86_64", target_feature = "ermsb"))]
#[test]
fn test_volatile_write_zeroize_lowalign() {
test_b239_lowalign_zeroizer(|ptr: *mut u8, len: usize| unsafe {
volatile_write_zeroize(ptr, len)
})
fn test_lowalign_volatile_write_zeroize() {
test_b239_lowalign_zeroizer(|ptr, len| unsafe { volatile_write_zeroize(ptr, len) })
}

#[test]
fn test_zeroize_align8_block8() {
test_b257_align64_block_zeroizer(|ptr, len| unsafe { zeroize_align8_block8(ptr, len) })
}

#[cfg(all(target_arch = "x86_64", target_feature = "sse2"))]
#[test]
#[cfg_attr(miri, ignore)] // asm
fn test_x86_64_simd16_zeroize_align16_block16() {
test_b257_align64_block_zeroizer(|ptr, len| unsafe {
x86_64_simd16_zeroize_align16_block16(ptr, len)
})
}

#[cfg(all(target_arch = "x86_64", target_feature = "sse2"))]
#[test]
#[cfg_attr(miri, ignore)] // asm
fn test_x86_64_simd16_unroll2_zeroize_align16_block16() {
test_b257_align64_block_zeroizer(|ptr, len| unsafe {
x86_64_simd16_unroll2_zeroize_align16_block16(ptr, len)
})
}

#[cfg(all(target_arch = "x86_64", target_feature = "avx2"))]
#[test]
#[cfg_attr(miri, ignore)] // asm
fn test_x86_64_simd32_zeroize_align32_block32() {
test_b257_align64_block_zeroizer(|ptr, len| unsafe {
x86_64_simd32_zeroize_align32_block32(ptr, len)
})
}

#[cfg(all(target_arch = "x86_64", target_feature = "avx2"))]
#[test]
#[cfg_attr(miri, ignore)] // asm
fn test_x86_64_simd32_unroll2_zeroize_align32_block32() {
test_b257_align64_block_zeroizer(|ptr, len| unsafe {
x86_64_simd32_unroll2_zeroize_align32_block32(ptr, len)
})
}

#[cfg(all(
target_arch = "x86_64",
target_feature = "avx512f",
feature = "nightly_stdsimd"
))]
#[test]
#[cfg_attr(miri, ignore)] // asm
fn test_x86_64_simd64_zeroize_align64_block64() {
test_b257_align64_block_zeroizer(|ptr, len| unsafe {
x86_64_simd64_zeroize_align64_block64(ptr, len)
})
}
}
Loading

0 comments on commit 6519b13

Please sign in to comment.