diff --git a/.github/workflows/audit.yml b/.github/workflows/audit.yml
index d947bc1..df9f423 100644
--- a/.github/workflows/audit.yml
+++ b/.github/workflows/audit.yml
@@ -8,12 +8,19 @@ on:
 
 jobs:
   audit:
-    name: Cargo Audit
     runs-on: ubuntu-latest
+
     steps:
       - name: Checkout sources
-        uses: actions/checkout@v1
+        uses: actions/checkout@v4
+
       - name: Run cargo audit
-        uses: actions-rs/audit-check@v1
+        uses: rustsec/audit-check@v1.4.1
         with:
           token: ${{ secrets.GITHUB_TOKEN }}
+
+    permissions:
+        issues: write
+        issues-reason: to create issues
+        checks: write
+        checks-reason: to create check
diff --git a/.github/workflows/check.yml b/.github/workflows/check.yml
index 74415f9..a9a9098 100644
--- a/.github/workflows/check.yml
+++ b/.github/workflows/check.yml
@@ -11,17 +11,13 @@ jobs:
         os: [ubuntu-latest, windows-latest]
     steps:
       - name: Checkout sources
-        uses: actions/checkout@v2
+        uses: actions/checkout@v4
 
       - name: Install stable toolchain
-        uses: actions-rs/toolchain@v1
-        with:
-          profile: minimal
-          toolchain: stable
-          override: true
+        uses: dtolnay/rust-toolchain@stable
 
       - name: Run cargo check
-        uses: actions-rs/cargo@v1
+        uses: clechasseur/rs-cargo@v2
         with:
           command: check
 
@@ -33,17 +29,13 @@ jobs:
         os: [ubuntu-latest, windows-latest]
     steps:
       - name: Checkout sources
-        uses: actions/checkout@v2
+        uses: actions/checkout@v4
 
       - name: Install nightly toolchain
-        uses: actions-rs/toolchain@v1
-        with:
-          profile: minimal
-          toolchain: nightly
-          override: true
+        uses: dtolnay/rust-toolchain@nightly
 
       - name: Run cargo test
-        uses: actions-rs/cargo@v1
+        uses: clechasseur/rs-cargo@v2
         with:
           command: test
           args: --no-default-features --features dev
@@ -56,17 +48,13 @@ jobs:
         os: [ubuntu-latest, windows-latest]
     steps:
       - name: Checkout sources
-        uses: actions/checkout@v2
+        uses: actions/checkout@v4
 
       - name: Install nightly toolchain
-        uses: actions-rs/toolchain@v1
-        with:
-          profile: minimal
-          toolchain: nightly
-          override: true
+        uses: dtolnay/rust-toolchain@nightly
 
       - name: Run cargo test
-        uses: actions-rs/cargo@v1
+        uses: clechasseur/rs-cargo@v2
         with:
           command: test
           args: --all-features
@@ -78,17 +66,13 @@ jobs:
     runs-on: ubuntu-latest
     steps:
       - name: Checkout sources
-        uses: actions/checkout@v2
+        uses: actions/checkout@v4
 
       - name: Install nightly toolchain
-        uses: actions-rs/toolchain@v1
-        with:
-          profile: minimal
-          toolchain: nightly
-          override: true
+        uses: dtolnay/rust-toolchain@nightly
 
       - name: Run cargo doc
-        uses: actions-rs/cargo@v1
+        uses: clechasseur/rs-cargo@v2
         with:
           command: doc
           args: --all-features
@@ -98,18 +82,15 @@ jobs:
     runs-on: ubuntu-latest
     steps:
       - name: Checkout sources
-        uses: actions/checkout@v2
+        uses: actions/checkout@v4
 
       - name: Install nightly toolchain
-        uses: actions-rs/toolchain@v1
+        uses: dtolnay/rust-toolchain@nightly
         with:
-          profile: minimal
-          toolchain: nightly
-          override: true
           components: rustfmt
 
       - name: Run cargo fmt
-        uses: actions-rs/cargo@v1
+        uses: clechasseur/rs-cargo@v2
         with:
           command: fmt
           args: -- --check
@@ -119,16 +100,16 @@ jobs:
     runs-on: ubuntu-latest
     steps:
       - name: Checkout sources
-        uses: actions/checkout@v2
+        uses: actions/checkout@v4
+
       - name: Install nightly toolchain
-        uses: actions-rs/toolchain@v1
+        uses: dtolnay/rust-toolchain@nightly
         with:
-            toolchain: nightly
             components: clippy
-            override: true
-      - uses: actions-rs/clippy-check@v1
+
+      - uses: clechasseur/rs-cargo@v2
         with:
-          token: ${{ secrets.GITHUB_TOKEN }}
+          command: clippy
           args: --all-features
 
   miri:
@@ -136,17 +117,15 @@ jobs:
     runs-on: ubuntu-latest
     steps:
       - name: Checkout sources
-        uses: actions/checkout@v2
+        uses: actions/checkout@v4
 
       - name: Get latest toolchain version with miri
         run: echo "TOOLCHAIN=$(curl -s https://rust-lang.github.io/rustup-components-history/x86_64-unknown-linux-gnu/miri)" >> $GITHUB_ENV
 
       - name: Install latest nightly toolchain with miri
-        uses: actions-rs/toolchain@v1
+        uses: dtolnay/rust-toolchain@master
         with:
-          profile: minimal
           toolchain: nightly-${{ env.TOOLCHAIN }}
-          override: true
           components: rust-src, miri
 
       - name: Run cargo miri test
@@ -154,4 +133,4 @@ jobs:
         env:
           MIRI_LOG: 1
           MIRI_BACKTRACE: 1
-          MIRIFLAGS: -Zmiri-strict-provenance -Zmiri-check-number-validity
+          MIRIFLAGS: -Zmiri-strict-provenance
diff --git a/.github/workflows/codecov.yml b/.github/workflows/codecov.yml
index a6c10e6..3cbc4f3 100644
--- a/.github/workflows/codecov.yml
+++ b/.github/workflows/codecov.yml
@@ -3,29 +3,22 @@ name: Code Coverage
 on: [push, pull_request]
 
 jobs:
-  code_cov:
-    name: Code Coverage Tarpaulin + Codecov
+  test:
+    name: coverage
     runs-on: ubuntu-latest
+    container:
+      image: xd009642/tarpaulin:develop-nightly
+      options: --security-opt seccomp=unconfined
     steps:
       - name: Checkout repository
-        uses: actions/checkout@v2
+        uses: actions/checkout@v4
 
-      - name: Install nightly toolchain
-        uses: actions-rs/toolchain@v1
-        with:
-          toolchain: nightly
-          override: true
-
-      - name: Run cargo-tarpaulin
-        uses: actions-rs/tarpaulin@v0.1
-        with:
-          args: --ignore-tests --all-features
-          out-type: Xml
-          run-types: AllTargets
-        env:
-          RUSTFLAGS: "-Ctarget-cpu=native"
+      - name: Generate code coverage
+        run: |
+          cargo +nightly tarpaulin --verbose --all-features --workspace --timeout 120 --out xml
 
       - name: Upload to codecov.io
-        uses: codecov/codecov-action@v2
+        uses: codecov/codecov-action@v4
         with:
-          files: ./cobertura.xml
+          token: ${{secrets.CODECOV_TOKEN}}
+          fail_ci_if_error: true
diff --git a/Cargo.toml b/Cargo.toml
index e9aa735..0824851 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -18,8 +18,8 @@ all-features = true
 
 [features]
 default = ["std"]
-std = ["thiserror", "winapi/std"]
-nightly_allocator_api = []
+std = ["dep:thiserror", "allocator-api2/std"]
+nightly_allocator_api = ["allocator-api2/nightly"]
 nightly_core_intrinsics = []
 # even though stabilised, still necessary for avx512
 nightly_stdsimd = []
@@ -33,20 +33,23 @@ nightly = [
     "nightly_strict_provenance",
 ]
 # required features to run tests; additional features enable more tests
-dev = ["std", "nightly_allocator_api"]
+dev = ["std"]
 
 [dependencies]
+allocator-api2 = { version = "0.2", default-features = false }
 cfg-if = "1.0"
-libc = "0.2"
 mirai-annotations = "1.12"
 sptr = "0.3"
-thiserror = {version = "1.0", optional = true}
+thiserror = { version = "1.0", optional = true }
+
+[target.'cfg(unix)'.dependencies]
+rustix = { version = "0.38", features = ["mm", "param"] }
 
 [target.'cfg(windows)'.dependencies]
-winapi = {version = "0.3.9", features = ["impl-default", "memoryapi", "sysinfoapi"]}
+windows = { version = "0.56.0", features = ["Win32_System_SystemInformation", "Win32_System_Memory"] }
 
 [dev-dependencies]
-criterion = "0.3"
+criterion = "0.5"
 
 [[bench]]
 name = "bench_zeroizers"
diff --git a/benches/bench_zeroizers.rs b/benches/bench_zeroizers.rs
index 2c62483..47ac729 100644
--- a/benches/bench_zeroizers.rs
+++ b/benches/bench_zeroizers.rs
@@ -1,13 +1,6 @@
 use criterion::{criterion_group, criterion_main, Criterion};
-#[cfg(all(target_arch = "x86_64", target_feature = "ermsb"))]
-use secmem_alloc::zeroize::AsmRepStosZeroizer;
-#[cfg(all(target_arch = "x86_64", target_feature = "avx"))]
-use secmem_alloc::zeroize::X86_64AvxZeroizer;
-#[cfg(all(target_arch = "x86_64", target_feature = "sse2"))]
-use secmem_alloc::zeroize::X86_64Sse2Zeroizer;
 use secmem_alloc::zeroize::{
-    LibcZeroizer, MemZeroizer, VolatileMemsetZeroizer, VolatileWrite8Zeroizer,
-    VolatileWriteZeroizer,
+    MemZeroizer, MemsetAsmBarierZeroizer, VolatileMemsetZeroizer, VolatileWrite8Zeroizer,
 };
 
 fn zeroize_b127<Z: MemZeroizer>(z: Z, array: &mut [u8; 127]) {
@@ -50,21 +43,9 @@ macro_rules! bench_zeroizers {
         $cgroup.bench_function("VolatileMemsetZeroizer", |b| {
             b.iter(|| $bench_function(VolatileMemsetZeroizer, &mut $array.0))
         });
-        $cgroup.bench_function("LibcZeroizer", |b| {
-            b.iter(|| $bench_function(LibcZeroizer, &mut $array.0))
+        $cgroup.bench_function("MemsetAsmBarierZeroizer", |b| {
+            b.iter(|| $bench_function(MemsetAsmBarierZeroizer, &mut $array.0))
         });
-        $cgroup.bench_function("VolatileWriteZeroizer", |b| {
-            b.iter(|| $bench_function(VolatileWriteZeroizer, &mut $array.0))
-        });
-        $cgroup.bench_function("VolatileWrite8Zeroizer", |b| {
-            b.iter(|| $bench_function(VolatileWrite8Zeroizer, &mut $array.0))
-        });
-        #[cfg(all(target_arch = "x86_64", target_feature = "sse2"))]
-        {
-            $cgroup.bench_function("X86_64Sse2Zeroizer", |b| {
-                b.iter(|| $bench_function(X86_64Sse2Zeroizer, &mut $array.0))
-            });
-        }
         #[cfg(all(target_arch = "x86_64", target_feature = "avx"))]
         {
             $cgroup.bench_function("X86_64AvxZeroizer", |b| {
diff --git a/src/allocator_api.rs b/src/allocator_api.rs
deleted file mode 100644
index 825054e..0000000
--- a/src/allocator_api.rs
+++ /dev/null
@@ -1,437 +0,0 @@
-//! Nightly allocator api, code copied from the standard library.
-// Copyright (c) 2021 rust standard library contributors
-// Dual-licensed under Apache 2.0 and MIT licenses
-// Code is slightly modified to make it work on stable rust.
-
-use crate::util::nonnull_as_mut_ptr;
-use core::alloc::Layout;
-use core::fmt;
-use core::ptr::{self, NonNull};
-use mirai_annotations::debug_checked_precondition;
-
-/// The `AllocError` error indicates an allocation failure
-/// that may be due to resource exhaustion or to
-/// something wrong when combining the given input arguments with this
-/// allocator.
-#[derive(Copy, Clone, PartialEq, Eq, Debug)]
-pub struct AllocError;
-
-impl fmt::Display for AllocError {
-    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
-        f.write_str("memory allocation failed")
-    }
-}
-
-/// An implementation of `Allocator` can allocate, grow, shrink, and deallocate
-/// arbitrary blocks of data described via [`Layout`][].
-///
-/// `Allocator` is designed to be implemented on ZSTs, references, or smart
-/// pointers because having an allocator like `MyAlloc([u8; N])` cannot be
-/// moved, without updating the pointers to the allocated memory.
-///
-/// Unlike [`GlobalAlloc`][], zero-sized allocations are allowed in `Allocator`.
-/// If an underlying allocator does not support this (like jemalloc) or return a
-/// null pointer (such as `libc::malloc`), this must be caught by the
-/// implementation.
-///
-/// ### Currently allocated memory
-///
-/// Some of the methods require that a memory block be *currently allocated* via
-/// an allocator. This means that:
-///
-/// * the starting address for that memory block was previously returned by
-///   [`allocate`], [`grow`], or [`shrink`], and
-///
-/// * the memory block has not been subsequently deallocated, where blocks are
-///   either deallocated directly by being passed to [`deallocate`] or were
-///   changed by being passed to [`grow`] or [`shrink`] that returns `Ok`. If
-///   `grow` or `shrink` have returned `Err`, the passed pointer remains valid.
-///
-/// [`allocate`]: Allocator::allocate
-/// [`grow`]: Allocator::grow
-/// [`shrink`]: Allocator::shrink
-/// [`deallocate`]: Allocator::deallocate
-///
-/// ### Memory fitting
-///
-/// Some of the methods require that a layout *fit* a memory block. What it
-/// means for a layout to "fit" a memory block means (or equivalently, for a
-/// memory block to "fit" a layout) is that the following conditions must hold:
-///
-/// * The block must be allocated with the same alignment as [`layout.align()`],
-///   and
-///
-/// * The provided [`layout.size()`] must fall in the range `min ..= max`,
-///   where:
-///   - `min` is the size of the layout most recently used to allocate the
-///     block, and
-///   - `max` is the latest actual size returned from [`allocate`], [`grow`], or
-///     [`shrink`].
-///
-/// [`layout.align()`]: Layout::align
-/// [`layout.size()`]: Layout::size
-///
-/// # Safety
-///
-/// * Memory blocks returned from an allocator must point to valid memory and
-///   retain their validity until the instance and all of its clones are
-///   dropped,
-///
-/// * cloning or moving the allocator must not invalidate memory blocks returned
-///   from this allocator. A cloned allocator must behave like the same
-///   allocator, and
-///
-/// * any pointer to a memory block which is [*currently allocated*] may be
-///   passed to any other method of the allocator.
-///
-/// [*currently allocated*]: #currently-allocated-memory
-pub unsafe trait Allocator {
-    /// Attempts to allocate a block of memory.
-    ///
-    /// On success, returns a [`NonNull<[u8]>`][NonNull] meeting the size and
-    /// alignment guarantees of `layout`.
-    ///
-    /// The returned block may have a larger size than specified by
-    /// `layout.size()`, and may or may not have its contents initialized.
-    ///
-    /// # Errors
-    ///
-    /// Returning `Err` indicates that either memory is exhausted or `layout`
-    /// does not meet allocator's size or alignment constraints.
-    ///
-    /// Implementations are encouraged to return `Err` on memory exhaustion
-    /// rather than panicking or aborting, but this is not a strict
-    /// requirement. (Specifically: it is *legal* to implement this trait
-    /// atop an underlying native allocation library that aborts on memory
-    /// exhaustion.)
-    ///
-    /// Clients wishing to abort computation in response to an allocation error
-    /// are encouraged to call the [`handle_alloc_error`] function, rather
-    /// than directly invoking `panic!` or similar.
-    ///
-    /// [`handle_alloc_error`]: ../../alloc/alloc/fn.handle_alloc_error.html
-    fn allocate(&self, layout: Layout) -> Result<NonNull<[u8]>, AllocError>;
-
-    /// Behaves like `allocate`, but also ensures that the returned memory is
-    /// zero-initialized.
-    ///
-    /// # Errors
-    ///
-    /// Returning `Err` indicates that either memory is exhausted or `layout`
-    /// does not meet allocator's size or alignment constraints.
-    ///
-    /// Implementations are encouraged to return `Err` on memory exhaustion
-    /// rather than panicking or aborting, but this is not a strict
-    /// requirement. (Specifically: it is *legal* to implement this trait
-    /// atop an underlying native allocation library that aborts on memory
-    /// exhaustion.)
-    ///
-    /// Clients wishing to abort computation in response to an allocation error
-    /// are encouraged to call the [`handle_alloc_error`] function, rather
-    /// than directly invoking `panic!` or similar.
-    ///
-    /// [`handle_alloc_error`]: ../../alloc/alloc/fn.handle_alloc_error.html
-    fn allocate_zeroed(&self, layout: Layout) -> Result<NonNull<[u8]>, AllocError> {
-        let ptr = self.allocate(layout)?;
-        // SAFETY: `alloc` returns a valid memory block
-        unsafe { nonnull_as_mut_ptr(ptr).write_bytes(0, ptr.as_ref().len()) }
-        Ok(ptr)
-    }
-
-    /// Deallocates the memory referenced by `ptr`.
-    ///
-    /// # Safety
-    ///
-    /// * `ptr` must denote a block of memory [*currently allocated*] via this
-    ///   allocator, and
-    /// * `layout` must [*fit*] that block of memory.
-    ///
-    /// [*currently allocated*]: #currently-allocated-memory
-    /// [*fit*]: #memory-fitting
-    unsafe fn deallocate(&self, ptr: NonNull<u8>, layout: Layout);
-
-    /// Attempts to extend the memory block.
-    ///
-    /// Returns a new [`NonNull<[u8]>`][NonNull] containing a pointer and the
-    /// actual size of the allocated memory. The pointer is suitable for
-    /// holding data described by `new_layout`. To accomplish
-    /// this, the allocator may extend the allocation referenced by `ptr` to fit
-    /// the new layout.
-    ///
-    /// If this returns `Ok`, then ownership of the memory block referenced by
-    /// `ptr` has been transferred to this allocator. The memory may or may
-    /// not have been freed, and should be considered unusable unless it was
-    /// transferred back to the caller again via the return value
-    /// of this method.
-    ///
-    /// If this method returns `Err`, then ownership of the memory block has not
-    /// been transferred to this allocator, and the contents of the memory
-    /// block are unaltered.
-    ///
-    /// # Safety
-    ///
-    /// * `ptr` must denote a block of memory [*currently allocated*] via this
-    ///   allocator.
-    /// * `old_layout` must [*fit*] that block of memory (The `new_layout`
-    ///   argument need not fit it.).
-    /// * `new_layout.size()` must be greater than or equal to
-    ///   `old_layout.size()`.
-    ///
-    /// [*currently allocated*]: #currently-allocated-memory
-    /// [*fit*]: #memory-fitting
-    ///
-    /// # Errors
-    ///
-    /// Returns `Err` if the new layout does not meet the allocator's size and
-    /// alignment constraints of the allocator, or if growing otherwise
-    /// fails.
-    ///
-    /// Implementations are encouraged to return `Err` on memory exhaustion
-    /// rather than panicking or aborting, but this is not a strict
-    /// requirement. (Specifically: it is *legal* to implement this trait
-    /// atop an underlying native allocation library that aborts on memory
-    /// exhaustion.)
-    ///
-    /// Clients wishing to abort computation in response to an allocation error
-    /// are encouraged to call the [`handle_alloc_error`] function, rather
-    /// than directly invoking `panic!` or similar.
-    ///
-    /// [`handle_alloc_error`]: ../../alloc/alloc/fn.handle_alloc_error.html
-    unsafe fn grow(
-        &self,
-        ptr: NonNull<u8>,
-        old_layout: Layout,
-        new_layout: Layout,
-    ) -> Result<NonNull<[u8]>, AllocError> {
-        debug_checked_precondition!(
-            new_layout.size() >= old_layout.size(),
-            "`new_layout.size()` must be greater than or equal to `old_layout.size()`"
-        );
-
-        let new_ptr = self.allocate(new_layout)?;
-
-        // SAFETY: because `new_layout.size()` must be greater than or equal to
-        // `old_layout.size()`, both the old and new memory allocation are valid for
-        // reads and writes for `old_layout.size()` bytes. Also, because the old
-        // allocation wasn't yet deallocated, it cannot overlap `new_ptr`. Thus,
-        // the call to `copy_nonoverlapping` is safe. The safety contract for
-        // `dealloc` must be upheld by the caller.
-        unsafe {
-            ptr::copy_nonoverlapping(ptr.as_ptr(), nonnull_as_mut_ptr(new_ptr), old_layout.size());
-            self.deallocate(ptr, old_layout);
-        }
-
-        Ok(new_ptr)
-    }
-
-    /// Behaves like `grow`, but also ensures that the new contents are set to
-    /// zero before being returned.
-    ///
-    /// The memory block will contain the following contents after a successful
-    /// call to `grow_zeroed`:
-    ///   * Bytes `0..old_layout.size()` are preserved from the original
-    ///     allocation.
-    ///   * Bytes `old_layout.size()..old_size` will either be preserved or
-    ///     zeroed, depending on the allocator implementation. `old_size` refers
-    ///     to the size of the memory block prior to the `grow_zeroed` call,
-    ///     which may be larger than the size that was originally requested when
-    ///     it was allocated.
-    ///   * Bytes `old_size..new_size` are zeroed. `new_size` refers to the size
-    ///     of the memory block returned by the `grow_zeroed` call.
-    ///
-    /// # Safety
-    ///
-    /// * `ptr` must denote a block of memory [*currently allocated*] via this
-    ///   allocator.
-    /// * `old_layout` must [*fit*] that block of memory (The `new_layout`
-    ///   argument need not fit it.).
-    /// * `new_layout.size()` must be greater than or equal to
-    ///   `old_layout.size()`.
-    ///
-    /// [*currently allocated*]: #currently-allocated-memory
-    /// [*fit*]: #memory-fitting
-    ///
-    /// # Errors
-    ///
-    /// Returns `Err` if the new layout does not meet the allocator's size and
-    /// alignment constraints of the allocator, or if growing otherwise
-    /// fails.
-    ///
-    /// Implementations are encouraged to return `Err` on memory exhaustion
-    /// rather than panicking or aborting, but this is not a strict
-    /// requirement. (Specifically: it is *legal* to implement this trait
-    /// atop an underlying native allocation library that aborts on memory
-    /// exhaustion.)
-    ///
-    /// Clients wishing to abort computation in response to an allocation error
-    /// are encouraged to call the [`handle_alloc_error`] function, rather
-    /// than directly invoking `panic!` or similar.
-    ///
-    /// [`handle_alloc_error`]: ../../alloc/alloc/fn.handle_alloc_error.html
-    unsafe fn grow_zeroed(
-        &self,
-        ptr: NonNull<u8>,
-        old_layout: Layout,
-        new_layout: Layout,
-    ) -> Result<NonNull<[u8]>, AllocError> {
-        debug_checked_precondition!(
-            new_layout.size() >= old_layout.size(),
-            "`new_layout.size()` must be greater than or equal to `old_layout.size()`"
-        );
-
-        let new_ptr = self.allocate_zeroed(new_layout)?;
-
-        // SAFETY: because `new_layout.size()` must be greater than or equal to
-        // `old_layout.size()`, both the old and new memory allocation are valid for
-        // reads and writes for `old_layout.size()` bytes. Also, because the old
-        // allocation wasn't yet deallocated, it cannot overlap `new_ptr`. Thus,
-        // the call to `copy_nonoverlapping` is safe. The safety contract for
-        // `dealloc` must be upheld by the caller.
-        unsafe {
-            ptr::copy_nonoverlapping(ptr.as_ptr(), nonnull_as_mut_ptr(new_ptr), old_layout.size());
-            self.deallocate(ptr, old_layout);
-        }
-
-        Ok(new_ptr)
-    }
-
-    /// Attempts to shrink the memory block.
-    ///
-    /// Returns a new [`NonNull<[u8]>`][NonNull] containing a pointer and the
-    /// actual size of the allocated memory. The pointer is suitable for
-    /// holding data described by `new_layout`. To accomplish
-    /// this, the allocator may shrink the allocation referenced by `ptr` to fit
-    /// the new layout.
-    ///
-    /// If this returns `Ok`, then ownership of the memory block referenced by
-    /// `ptr` has been transferred to this allocator. The memory may or may
-    /// not have been freed, and should be considered unusable unless it was
-    /// transferred back to the caller again via the return value
-    /// of this method.
-    ///
-    /// If this method returns `Err`, then ownership of the memory block has not
-    /// been transferred to this allocator, and the contents of the memory
-    /// block are unaltered.
-    ///
-    /// # Safety
-    ///
-    /// * `ptr` must denote a block of memory [*currently allocated*] via this
-    ///   allocator.
-    /// * `old_layout` must [*fit*] that block of memory (The `new_layout`
-    ///   argument need not fit it.).
-    /// * `new_layout.size()` must be smaller than or equal to
-    ///   `old_layout.size()`.
-    ///
-    /// [*currently allocated*]: #currently-allocated-memory
-    /// [*fit*]: #memory-fitting
-    ///
-    /// # Errors
-    ///
-    /// Returns `Err` if the new layout does not meet the allocator's size and
-    /// alignment constraints of the allocator, or if shrinking otherwise
-    /// fails.
-    ///
-    /// Implementations are encouraged to return `Err` on memory exhaustion
-    /// rather than panicking or aborting, but this is not a strict
-    /// requirement. (Specifically: it is *legal* to implement this trait
-    /// atop an underlying native allocation library that aborts on memory
-    /// exhaustion.)
-    ///
-    /// Clients wishing to abort computation in response to an allocation error
-    /// are encouraged to call the [`handle_alloc_error`] function, rather
-    /// than directly invoking `panic!` or similar.
-    ///
-    /// [`handle_alloc_error`]: ../../alloc/alloc/fn.handle_alloc_error.html
-    unsafe fn shrink(
-        &self,
-        ptr: NonNull<u8>,
-        old_layout: Layout,
-        new_layout: Layout,
-    ) -> Result<NonNull<[u8]>, AllocError> {
-        debug_checked_precondition!(
-            new_layout.size() <= old_layout.size(),
-            "`new_layout.size()` must be smaller than or equal to `old_layout.size()`"
-        );
-
-        let new_ptr = self.allocate(new_layout)?;
-
-        // SAFETY: because `new_layout.size()` must be lower than or equal to
-        // `old_layout.size()`, both the old and new memory allocation are valid for
-        // reads and writes for `new_layout.size()` bytes. Also, because the old
-        // allocation wasn't yet deallocated, it cannot overlap `new_ptr`. Thus,
-        // the call to `copy_nonoverlapping` is safe. The safety contract for
-        // `dealloc` must be upheld by the caller.
-        unsafe {
-            ptr::copy_nonoverlapping(ptr.as_ptr(), nonnull_as_mut_ptr(new_ptr), new_layout.size());
-            self.deallocate(ptr, old_layout);
-        }
-
-        Ok(new_ptr)
-    }
-
-    /// Creates a "by reference" adaptor for this instance of `Allocator`.
-    ///
-    /// The returned adaptor also implements `Allocator` and will simply borrow
-    /// this.
-    #[inline(always)]
-    fn by_ref(&self) -> &Self
-    where
-        Self: Sized,
-    {
-        self
-    }
-}
-
-unsafe impl<A> Allocator for &A
-where
-    A: Allocator + ?Sized,
-{
-    #[inline]
-    fn allocate(&self, layout: Layout) -> Result<NonNull<[u8]>, AllocError> {
-        (**self).allocate(layout)
-    }
-
-    #[inline]
-    fn allocate_zeroed(&self, layout: Layout) -> Result<NonNull<[u8]>, AllocError> {
-        (**self).allocate_zeroed(layout)
-    }
-
-    #[inline]
-    unsafe fn deallocate(&self, ptr: NonNull<u8>, layout: Layout) {
-        // SAFETY: the safety contract must be upheld by the caller
-        unsafe { (**self).deallocate(ptr, layout) }
-    }
-
-    #[inline]
-    unsafe fn grow(
-        &self,
-        ptr: NonNull<u8>,
-        old_layout: Layout,
-        new_layout: Layout,
-    ) -> Result<NonNull<[u8]>, AllocError> {
-        // SAFETY: the safety contract must be upheld by the caller
-        unsafe { (**self).grow(ptr, old_layout, new_layout) }
-    }
-
-    #[inline]
-    unsafe fn grow_zeroed(
-        &self,
-        ptr: NonNull<u8>,
-        old_layout: Layout,
-        new_layout: Layout,
-    ) -> Result<NonNull<[u8]>, AllocError> {
-        // SAFETY: the safety contract must be upheld by the caller
-        unsafe { (**self).grow_zeroed(ptr, old_layout, new_layout) }
-    }
-
-    #[inline]
-    unsafe fn shrink(
-        &self,
-        ptr: NonNull<u8>,
-        old_layout: Layout,
-        new_layout: Layout,
-    ) -> Result<NonNull<[u8]>, AllocError> {
-        // SAFETY: the safety contract must be upheld by the caller
-        unsafe { (**self).shrink(ptr, old_layout, new_layout) }
-    }
-}
diff --git a/src/boxed.rs b/src/boxed.rs
deleted file mode 100644
index 07c978f..0000000
--- a/src/boxed.rs
+++ /dev/null
@@ -1,294 +0,0 @@
-//! Module providing a simple replacement for [`std::boxed::Box`] with allocator
-//! support.
-//!
-//! # Motivation
-//! - The allocator api of [`std::boxed::Box`] is still unstable (at the time of
-//!   writing). The [`Box`] provided by this module can be used on stable with
-//!   allocators.
-//! - [Issue #78459](https://github.com/rust-lang/rust/issues/78459) prevents
-//!   the use of non-zero sized allocators with [`std::boxed::Box`] even on
-//!   nightly.
-// some code and documentation is copied from the standard library
-
-use crate::allocator_api::{AllocError, Allocator};
-use alloc::alloc::handle_alloc_error;
-use core::alloc::Layout;
-use core::marker::PhantomData;
-use core::mem::{ManuallyDrop, MaybeUninit};
-use core::ops::{Deref, DerefMut};
-use core::ptr::NonNull;
-
-/// A replacement for [`std::boxed::Box`] that works with custom allocators.
-///
-/// See the module-level documentation for more.
-pub struct Box<T: ?Sized, A: Allocator> {
-    /// Pointer to the inner value, allocated with `self.alloc`.
-    // Safety: must always point to a valid instance of `T`.
-    ptr: NonNull<T>,
-    // we own an instance of type `T`
-    _phantom_heapmem: PhantomData<T>,
-    /// Allocator used for heap allocation
-    alloc: A,
-}
-
-impl<T: ?Sized, A: Allocator> Box<T, A> {
-    /// Create [`Box`] from a pointer and an allocator.
-    ///
-    /// # Safety
-    /// - `ptr` has to be allocated using the allocator `alloc` (and not yet
-    ///   deallocated)
-    /// - `ptr` must point to a valid instance of `T` (otherwise using e.g.
-    ///   [`Deref::deref`] on the resulting [`Box`] is unsound)
-    /// - in particular `ptr` must point to an allocation that fits
-    ///   `Layout::for_value(*ptr)`
-    unsafe fn from_raw_parts(ptr: NonNull<T>, alloc: A) -> Self {
-        Self {
-            ptr,
-            alloc,
-            _phantom_heapmem: PhantomData::<T>,
-        }
-    }
-
-    /// Destruct a [`Box`] into the pointer and allocator without dropping the
-    /// [`Box`].
-    fn into_raw_parts(self) -> (NonNull<T>, A) {
-        let ptr = self.ptr;
-        let me = ManuallyDrop::new(self);
-        let alloc_ptr = &me.deref().alloc as *const A;
-        // SAFETY: `alloc_ptr` is valid for reads, properly aligned, initialised...
-        // SAFETY: the contents of `me` are never dropped so `alloc` can be safely
-        // dropped later
-        let alloc = unsafe { alloc_ptr.read() };
-        (ptr, alloc)
-    }
-}
-
-// documentation and implementations copied from the standard library
-// Copyright (c) 2021 rust standard library contributors
-// slight modifications to accomodate for missing APIs, different `Box`
-// definition
-impl<T, A: Allocator> Box<T, A> {
-    /// Allocates memory in the given allocator then places `x` into it.
-    ///
-    /// This doesn't actually allocate if `T` is zero-sized.
-    ///
-    /// # Examples
-    ///
-    /// ```
-    /// #![feature(allocator_api)]
-    ///
-    /// use secmem_alloc::boxed::Box;
-    /// use std::alloc::System;
-    ///
-    /// let five = Box::new_in(5, System);
-    /// ```
-    #[inline]
-    pub fn new_in(x: T, alloc: A) -> Self {
-        let mut boxed = Self::new_uninit_in(alloc);
-        unsafe {
-            boxed.as_mut_ptr().write(x);
-            boxed.assume_init()
-        }
-    }
-
-    /// Allocates memory in the given allocator then places `x` into it,
-    /// returning an error if the allocation fails
-    ///
-    /// This doesn't actually allocate if `T` is zero-sized.
-    ///
-    /// # Examples
-    ///
-    /// ```
-    /// #![feature(allocator_api)]
-    ///
-    /// use secmem_alloc::boxed::Box;
-    /// use std::alloc::System;
-    ///
-    /// let five = Box::try_new_in(5, System)?;
-    /// # Ok::<(), core::alloc::AllocError>(())
-    /// ```
-    #[inline]
-    pub fn try_new_in(x: T, alloc: A) -> Result<Self, AllocError> {
-        let mut boxed = Self::try_new_uninit_in(alloc)?;
-        unsafe {
-            boxed.as_mut_ptr().write(x);
-            Ok(boxed.assume_init())
-        }
-    }
-
-    /// Constructs a new box with uninitialized contents in the provided
-    /// allocator.
-    ///
-    /// # Examples
-    ///
-    /// ```
-    /// #![feature(allocator_api)]
-    ///
-    /// use secmem_alloc::boxed::Box;
-    /// use std::alloc::System;
-    ///
-    /// let mut five = Box::<u32, _>::new_uninit_in(System);
-    ///
-    /// let five = unsafe {
-    ///     // Deferred initialization:
-    ///     five.as_mut_ptr().write(5);
-    ///
-    ///     five.assume_init()
-    /// };
-    ///
-    /// assert_eq!(*five, 5)
-    /// ```
-    pub fn new_uninit_in(alloc: A) -> Box<MaybeUninit<T>, A> {
-        let layout = Layout::new::<MaybeUninit<T>>();
-        // NOTE: Prefer match over unwrap_or_else since closure sometimes not
-        // inlineable. That would make code size bigger.
-        match Box::try_new_uninit_in(alloc) {
-            Ok(m) => m,
-            Err(_) => handle_alloc_error(layout),
-        }
-    }
-
-    /// Constructs a new box with uninitialized contents in the provided
-    /// allocator, returning an error if the allocation fails
-    ///
-    /// # Examples
-    ///
-    /// ```
-    /// #![feature(allocator_api)]
-    ///
-    /// use secmem_alloc::boxed::Box;
-    /// use std::alloc::System;
-    ///
-    /// let mut five = Box::<u32, _>::try_new_uninit_in(System)?;
-    ///
-    /// let five = unsafe {
-    ///     // Deferred initialization:
-    ///     five.as_mut_ptr().write(5);
-    ///
-    ///     five.assume_init()
-    /// };
-    ///
-    /// assert_eq!(*five, 5);
-    /// # Ok::<(), core::alloc::AllocError>(())
-    /// ```
-    pub fn try_new_uninit_in(alloc: A) -> Result<Box<MaybeUninit<T>, A>, AllocError> {
-        let layout = Layout::new::<MaybeUninit<T>>();
-        let ptr: NonNull<MaybeUninit<T>> = alloc.allocate(layout)?.cast();
-        unsafe { Ok(Box::from_raw_parts(ptr, alloc)) }
-    }
-}
-
-// documentation and implementations copied from the standard library
-// Copyright (c) 2021 rust standard library contributors
-// slight modifications to accomodate for missing APIs, different `Box`
-// definition
-impl<T, A: Allocator> Box<MaybeUninit<T>, A> {
-    /// Converts to `Box<T, A>`.
-    ///
-    /// # Safety
-    ///
-    /// As with [`MaybeUninit::assume_init`],
-    /// it is up to the caller to guarantee that the value
-    /// really is in an initialized state.
-    /// Calling this when the content is not yet fully initialized
-    /// causes immediate undefined behavior.
-    ///
-    /// # Examples
-    ///
-    /// ```
-    /// #![feature(allocator_api)]
-    ///
-    /// use secmem_alloc::boxed::Box;
-    /// use std::alloc::System;
-    ///
-    /// let mut five = Box::<u32, _>::new_uninit_in(System);
-    ///
-    /// let five: Box<u32, _> = unsafe {
-    ///     // Deferred initialization:
-    ///     five.as_mut_ptr().write(5);
-    ///
-    ///     five.assume_init()
-    /// };
-    ///
-    /// assert_eq!(*five, 5)
-    /// ```
-    #[inline]
-    pub unsafe fn assume_init(self) -> Box<T, A> {
-        let (ptr, alloc) = Box::into_raw_parts(self);
-        let ptr_init: NonNull<T> = ptr.cast();
-        unsafe { Box::from_raw_parts(ptr_init, alloc) }
-    }
-}
-
-impl<T: ?Sized, A: Allocator> Deref for Box<T, A> {
-    type Target = T;
-
-    fn deref(&self) -> &T {
-        // SAFETY: `self.ptr` always points to a valid instance of `T`
-        unsafe { &*self.ptr.as_ptr() }
-    }
-}
-
-impl<T: ?Sized, A: Allocator> DerefMut for Box<T, A> {
-    fn deref_mut(&mut self) -> &mut T {
-        // SAFETY: `self.ptr` always points to a valid instance of `T`
-        unsafe { &mut *self.ptr.as_ptr() }
-    }
-}
-
-impl<T: ?Sized, A: Allocator> Drop for Box<T, A> {
-    fn drop(&mut self) {
-        // obtain the Layout of the value stored in this Box
-        let ref_to_inner: &T = self.deref();
-        let layout = Layout::for_value::<T>(ref_to_inner);
-        // `self.ptr` points to an allocation that fits `layout`
-
-        // SAFETY: `self.ptr.as_ptr()` is valid for reads and writes, properly aligned
-        unsafe {
-            self.ptr.as_ptr().drop_in_place();
-        }
-        // SAFETY: from now on it is unsound to dereference `self.ptr` (hence `self`)
-
-        // deallocate memory
-        let ptr: NonNull<u8> = self.ptr.cast();
-        // SAFETY: `self.ptr` was allocated with allocator `self.alloc` and fits
-        // `layout`
-        unsafe {
-            self.alloc.deallocate(ptr, layout);
-        }
-        // `self.ptr` is now dangling, but this is sound since `NonNull<T>` is
-        // not `Drop` `self.alloc` is dropped automatically
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::Box;
-    use std::alloc::System;
-    use std::mem::MaybeUninit;
-
-    #[test]
-    fn new_in() {
-        let boxed = Box::new_in([37; 256], System);
-        assert_eq!(*boxed, [37; 256]);
-    }
-
-    #[test]
-    fn try_new_in() {
-        let boxed = Box::try_new_in([37; 256], System).expect("error creating box");
-        assert_eq!(*boxed, [37; 256]);
-    }
-
-    #[test]
-    fn uninit_initialise() {
-        let mut boxed: Box<MaybeUninit<[u8; 256]>, System> =
-            Box::<[u8; 256], _>::new_uninit_in(System);
-        unsafe {
-            // initialise `boxed`
-            boxed.as_mut_ptr().write([37; 256]);
-        }
-        // SAFETY: `boxed` is now initialised
-        let boxed: Box<[u8; 256], System> = unsafe { boxed.assume_init() };
-        assert_eq!(*boxed, [37; 256]);
-    }
-}
diff --git a/src/internals/mem.rs b/src/internals/mem.rs
index 67f60cc..59735c4 100644
--- a/src/internals/mem.rs
+++ b/src/internals/mem.rs
@@ -1,91 +1,19 @@
 //! Helper functions for allocating memory and working with memory pages.
 
-#[cfg(unix)]
-use core::ffi::c_void;
 use core::ptr::NonNull;
-#[cfg(unix)]
-use libc::{c_int, off_t, size_t};
-#[cfg(feature = "std")]
-use thiserror::Error;
-#[cfg(windows)]
-use winapi::ctypes::c_void;
-
-/// Return the page size on the running system.
-///
-/// Is constant during the entire execution of a process.
-// TODO: should we store the page size in a static to avoid repeat FFI calls to
-// get the page size? with cross language LTO and static libc linking that
-// shouldn't be necessary
-pub fn page_size() -> usize {
-    get_sys_page_size()
-}
-
-cfg_if::cfg_if! {
-    if #[cfg(miri)] {
-        /// Page size shim for miri.
-        #[cfg(not(tarpaulin_include))]
-        fn get_sys_page_size() -> usize {
-            4096
-        }
-    } else if #[cfg(unix)] {
-        /// Return the page size on the running system by querying libc.
-        fn get_sys_page_size() -> usize {
-            unsafe {
-                // the pagesize must always fit in a `size_t` (`usize`)
-                #[allow(clippy::cast_possible_truncation, clippy::cast_sign_loss)]
-                {
-                    libc::sysconf(libc::_SC_PAGESIZE) as size_t
-                }
-            }
-        }
-    } else if #[cfg(windows)] {
-        /// Return the page size on the running system by querying kernel32.lib.
-        fn get_sys_page_size() -> usize {
-            use winapi::um::sysinfoapi::{LPSYSTEM_INFO, GetSystemInfo, SYSTEM_INFO};
-
-            let mut sysinfo = SYSTEM_INFO::default();
-            let sysinfo_ptr: LPSYSTEM_INFO = &mut sysinfo as *mut SYSTEM_INFO;
-            // SAFETY: `sysinfo_ptr` points to a valid (empty/all zeros) `SYSTEM_INFO`
-            unsafe {
-                GetSystemInfo(sysinfo_ptr)
-            };
-            // the pagesize must always fit in a `usize` (on windows it is a `u32`)
-            #[allow(clippy::cast_possible_truncation, clippy::cast_sign_loss)]
-            {
-                sysinfo.dwPageSize as usize
-            }
-        }
-    }
-}
-
-/// Could not allocate a memory page.
-#[derive(Debug, Clone)]
-#[cfg_attr(feature = "std", derive(Error))]
-#[cfg_attr(feature = "std", error("could not map a memory page"))]
-pub struct PageAllocError;
-
-/// Could not mlock a range of pages.
-#[derive(Debug, Clone)]
-#[cfg_attr(feature = "std", derive(Error))]
-#[cfg_attr(
-    feature = "std",
-    error("could not lock the memory page to physical memory")
-)]
-struct MemLockError;
 
 /// An single allocated page of memory.
 pub struct Page {
     /// Pointer to the start of the page.
     page_ptr: NonNull<u8>,
-    ///// This type owns a page of memory as raw bytes
-    //_phantom_pagemem: PhantomData<[u8]>,
     /// Size of a memory page.
     ///
     /// It is not strictly necessary to store this as it is constant during the
     /// entire execution of a process. This will therefore at all times
     /// equal the result of `page_size`.
-    // TODO: if we decide to store the page size in a static then this field can be removed
     page_size: usize,
+    /// This type owns a page of memory as raw bytes
+    _phantom_pagemem: core::marker::PhantomData<[u8]>,
 }
 
 impl Page {
@@ -104,11 +32,6 @@ impl Page {
         self.page_ptr.as_ptr()
     }
 
-    /// Get a mutable pointer to the start of the memory page.
-    fn as_c_ptr_mut(&self) -> *mut c_void {
-        self.as_ptr_mut() as *mut c_void
-    }
-
     /// Get a non-mutable pointer to the start of the memory page.
     pub fn as_ptr(&self) -> *const u8 {
         self.page_ptr.as_ptr() as *const u8
@@ -117,270 +40,13 @@ impl Page {
 
 cfg_if::cfg_if! {
     if #[cfg(miri)] {
-        // miri shim
-        #[cfg(not(tarpaulin_include))]
-        impl Drop for Page {
-            fn drop(&mut self) {
-                let ptr = self.as_c_ptr_mut();
-                let page_size = self.page_size();
-                unsafe {
-                    // SAFETY: we allocated/mapped this page in the constructor so it is safe to
-                    // unmap now `munmap` also unlocks a page if it was locked so it is
-                    // not necessary to `munlock` the page if it was locked.
-                    //libc::munmap(ptr, self.page_size());
-                    std::alloc::dealloc(
-                        ptr as *mut u8,
-                        std::alloc::Layout::from_size_align(page_size, page_size).unwrap(),
-                    );
-                }
-                // SAFETY: `NonNull<u8>` and `usize` both do not drop so we need not
-                // worry about subsequent drops
-            }
-        }
+        mod miri;
+        pub use miri::PageAllocError;
     } else if #[cfg(unix)] {
-        impl Drop for Page {
-            fn drop(&mut self) {
-                let ptr = self.as_c_ptr_mut();
-                unsafe {
-                    // SAFETY: we allocated/mapped this page in the constructor so it is safe to
-                    // unmap now `munmap` also unlocks a page if it was locked so it is
-                    // not necessary to `munlock` the page if it was locked.
-                    libc::munmap(ptr, self.page_size());
-                }
-                // SAFETY: `NonNull<u8>` and `usize` both do not drop so we need not
-                // worry about subsequent drops
-            }
-        }
+        mod unix;
+        pub use unix::PageAllocError;
     } else if #[cfg(windows)] {
-        impl Drop for Page {
-            fn drop(&mut self) {
-                use winapi::um::memoryapi::VirtualFree;
-                use winapi::um::winnt::MEM_RELEASE;
-                use winapi::shared::minwindef::LPVOID;
-
-                let ptr: LPVOID = self.as_c_ptr_mut();
-                unsafe {
-                    // SAFETY: we allocated/mapped this page in the constructor so it is safe to
-                    // unmap now
-                    VirtualFree(ptr, 0, MEM_RELEASE);
-                }
-                // SAFETY: `NonNull<u8>` and `usize` both do not drop so we need not
-                // worry about subsequent drops
-            }
-        }
-    }
-}
-
-cfg_if::cfg_if! {
-    if #[cfg(miri)] {
-        // miri shims, better than nothing but not very accurate
-        #[cfg(not(tarpaulin_include))]
-        impl Page {
-            fn alloc_new() -> Result<Self, PageAllocError> {
-                let _addr: *mut c_void = core::ptr::null_mut();
-                let page_size: size_t = page_size();
-                let _prot: c_int = libc::PROT_READ | libc::PROT_WRITE;
-                // NORESERVE disables backing the memory map with swap space
-                let _flags = libc::MAP_PRIVATE | libc::MAP_NORESERVE | libc::MAP_ANONYMOUS;
-                let _fd: c_int = -1;
-                let _offset: off_t = 0;
-
-                let page_ptr: *mut u8 = unsafe {
-                    //libc::mmap(_addr, page_size, _prot, _flags, _fd, _offset)
-                    std::alloc::alloc_zeroed(
-                        std::alloc::Layout::from_size_align(page_size, page_size).unwrap(),
-                    )
-                };
-
-                if page_ptr.is_null() {
-                    Err(PageAllocError)
-                } else {
-                    let page_ptr = unsafe {
-                        // SAFETY: we just checked that `page_ptr` is non-null
-                        NonNull::new_unchecked(page_ptr as *mut u8)
-                    };
-                    Ok(Self {
-                        page_ptr,
-                        page_size,
-                    })
-                }
-            }
-
-            fn mlock(&mut self) -> Result<(), MemLockError> {
-                let res = {
-                    //libc::mlock(self.as_c_ptr_mut(), self.page_size())
-                    let _ptr = self.as_c_ptr_mut();
-                    let _ps = self.page_size();
-                    0
-                };
-
-                if res == 0 {
-                    Ok(())
-                } else {
-                    Err(MemLockError)
-                }
-            }
-
-            pub fn alloc_new_lock() -> Result<Self, PageAllocError> {
-                let mut page = Self::alloc_new()?;
-                // if this fails then `page` is deallocated by it's drop implementation
-                page.mlock().map_err(|_| PageAllocError)?;
-                Ok(page)
-            }
-        }
-    } else if #[cfg(unix)] {
-        impl Page {
-            /// Allocate a new page of memory using (anonymous) `mmap` with the
-            /// noreserve flag.
-            ///
-            /// The noreserve flag disables swapping of the memory page. As a
-            /// consequence, the OS may unmap the page of memory, in which case
-            /// writing to it causes a SIGSEGV. Therefore, the page
-            /// should be mlocked before actual use.
-            ///
-            /// # Errors
-            /// The function returns an `PageAllocError` if the `mmap` call fails.
-            fn alloc_new_noreserve() -> Result<Self, PageAllocError> {
-                let addr: *mut c_void = core::ptr::null_mut();
-                let page_size: size_t = page_size();
-                let prot: c_int = libc::PROT_READ | libc::PROT_WRITE;
-                // NORESERVE disables backing the memory map with swap space
-                // it is not available (anymore) on FreeBSD/DragonFlyBSD (never implemented)
-                // also unimplemented on other BSDs, but the flag is there for compat...
-                // FreeBSD + DragonFlyBSD have a `MAP_NOCORE` flag which excludes this memory
-                // from being included in a core dump (but ideally, disable core dumps entirely)
-                cfg_if::cfg_if!{
-                    if #[cfg(any(target_os = "freebsd", target_os = "dragonfly"))] {
-                        let flags = libc::MAP_PRIVATE | libc::MAP_ANONYMOUS | libc::MAP_NOCORE;
-                    } else {
-                        let flags = libc::MAP_PRIVATE | libc::MAP_NORESERVE | libc::MAP_ANONYMOUS;
-                    }
-                }
-
-                let fd: c_int = -1;
-                let offset: off_t = 0;
-
-                let page_ptr: *mut c_void = unsafe {
-                    libc::mmap(addr, page_size, prot, flags, fd, offset)
-                };
-
-                if page_ptr.is_null() || page_ptr == libc::MAP_FAILED {
-                    Err(PageAllocError)
-                } else {
-                    let page_ptr = unsafe {
-                        // SAFETY: we just checked that `page_ptr` is non-null
-                        NonNull::new_unchecked(page_ptr as *mut u8)
-                    };
-                    Ok(Self {
-                        page_ptr,
-                        page_size,
-                    })
-                }
-            }
-
-            /// Lock the memory page to physical memory.
-            ///
-            /// When this function returns successfully then the memory page is
-            /// guarantied to be backed by physical memory, i.e. not (only) swapped.
-            /// In combination with the noreserve flag during the allocation, this
-            /// guaranties the memory to not be swapped at all, except on hibernation
-            /// or memory starvation. This is really the best we can achieve. If memory
-            /// contents are really secret than there is no other solution than to
-            /// use a swap space encrypted with an ephemeral secret key, and
-            /// hibernation should be disabled (both on the OS level).
-            fn mlock(&mut self) -> Result<(), MemLockError> {
-                let res = unsafe { libc::mlock(self.as_c_ptr_mut(), self.page_size()) };
-
-                if res == 0 {
-                    Ok(())
-                } else {
-                    Err(MemLockError)
-                }
-            }
-
-            /// Allocate a new page of memory using (anonymous) `mmap` with the
-            /// noreserve flag and mlock page.
-            ///
-            /// The noreserve flag disables swapping of the memory page. The page is
-            /// then mlocked to force it into physical memory.
-            ///
-            /// # Errors
-            /// The function returns an `PageAllocError` if the `mmap` or `mlock` call
-            /// fails.
-            pub fn alloc_new_lock() -> Result<Self, PageAllocError> {
-                let mut page = Self::alloc_new_noreserve()?;
-                page.mlock().map_err(|_| PageAllocError)?;
-                Ok(page)
-            }
-        }
-    } else if #[cfg(windows)] {
-        impl Page {
-            /// Allocate a new page of memory using `VirtualAlloc`.
-            ///
-            /// # Errors
-            /// The function returns an `PageAllocError` if the `VirtualAlloc` call fails.
-            fn alloc_new() -> Result<Self, PageAllocError> {
-                use winapi::um::memoryapi::VirtualAlloc;
-                use winapi::um::winnt::{MEM_COMMIT, MEM_RESERVE, PAGE_READWRITE};
-                use winapi::shared::{minwindef::{DWORD, LPVOID}, basetsd::SIZE_T};
-
-                let addr: LPVOID = core::ptr::null_mut();
-                let page_size: SIZE_T = page_size();
-                let alloc_type: DWORD = MEM_RESERVE | MEM_COMMIT;
-                let protect: DWORD = PAGE_READWRITE;
-
-                let page_ptr: LPVOID = unsafe {
-                    VirtualAlloc(addr, page_size, alloc_type, protect)
-                };
-
-                if page_ptr.is_null() {
-                    Err(PageAllocError)
-                } else {
-                    let page_ptr = unsafe {
-                        // SAFETY: we just checked that `page_ptr` is non-null
-                        NonNull::new_unchecked(page_ptr as *mut u8)
-                    };
-                    Ok(Self {
-                        page_ptr,
-                        page_size,
-                    })
-                }
-            }
-
-            /// Lock the memory page to physical memory.
-            ///
-            /// When this function returns successfully then the memory page is
-            /// guarantied to be backed by physical memory, i.e. not (only) swapped.
-            /// This guaranties the memory to not be swapped at all, except on hibernation
-            /// or memory starvation. This is really the best we can achieve. If memory
-            /// contents are really secret than there is no other solution than to
-            /// use a swap space encrypted with an ephemeral secret key, and
-            /// hibernation should be disabled (both on the OS level).
-            fn lock(&mut self) -> Result<(), MemLockError> {
-                use winapi::um::memoryapi::VirtualLock;
-                use winapi::shared::minwindef::BOOL;
-
-                let res: BOOL = unsafe { VirtualLock(self.as_c_ptr_mut(), self.page_size()) };
-
-                if res == 0 {
-                    Err(MemLockError)
-                } else {
-                    Ok(())
-                }
-            }
-
-            /// Allocate a new page of memory using `VirtualAlloc` and `VirtualLock` page.
-            ///
-            /// The page is locked to force it into physical memory.
-            ///
-            /// # Errors
-            /// The function returns an `PageAllocError` if the `VirtualAlloc` or `VirtualLock`
-            /// call fails.
-            pub fn alloc_new_lock() -> Result<Self, PageAllocError> {
-                let mut page = Self::alloc_new()?;
-                page.lock().map_err(|_| PageAllocError)?;
-                Ok(page)
-            }
-        }
+        mod windows;
+        pub use windows::PageAllocError;
     }
 }
diff --git a/src/internals/mem/miri.rs b/src/internals/mem/miri.rs
new file mode 100644
index 0000000..a287bf8
--- /dev/null
+++ b/src/internals/mem/miri.rs
@@ -0,0 +1,83 @@
+//! Miri shims for memory management. Not accurate, but better than nothing.
+
+use super::Page;
+use core::ptr::NonNull;
+
+/// Page size shim for miri.
+#[cfg(not(tarpaulin_include))]
+pub fn page_size() -> usize {
+    4096
+}
+
+#[derive(Debug, Clone)]
+#[cfg_attr(feature = "std", derive(thiserror::Error))]
+pub enum PageAllocError {
+    #[cfg_attr(feature = "std", error("trying to create invalid layout"))]
+    Layout(std::alloc::LayoutError),
+    #[cfg_attr(feature = "std", error("could not allocate memory"))]
+    Alloc,
+    #[cfg_attr(feature = "std", error("could not lock memory"))]
+    Lock,
+}
+
+#[cfg(not(tarpaulin_include))]
+impl Page {
+    fn alloc_new() -> Result<Self, PageAllocError> {
+        let page_size = page_size();
+
+        //libc::mmap(_addr, page_size, _prot, _flags, _fd, _offset)
+        let layout = std::alloc::Layout::from_size_align(page_size, page_size)
+            .map_err(|e| PageAllocError::Layout(e))?;
+        let page_ptr: *mut u8 = unsafe { std::alloc::alloc_zeroed(layout) };
+
+        if page_ptr.is_null() {
+            Err(PageAllocError::Alloc)
+        } else {
+            let page_ptr = unsafe {
+                // SAFETY: we just checked that `page_ptr` is non-null
+                NonNull::new_unchecked(page_ptr as *mut u8)
+            };
+            Ok(Self {
+                page_ptr,
+                page_size,
+                _phantom_pagemem: core::marker::PhantomData,
+            })
+        }
+    }
+
+    fn mlock(&mut self) -> Result<(), PageAllocError> {
+        let res = {
+            //libc::mlock(self.as_c_ptr_mut(), self.page_size())
+            let _ptr = self.as_ptr_mut();
+            let _ps = self.page_size();
+            0
+        };
+
+        if res == 0 {
+            Ok(())
+        } else {
+            Err(PageAllocError::Lock)
+        }
+    }
+
+    pub fn alloc_new_lock() -> Result<Self, PageAllocError> {
+        let mut page = Self::alloc_new()?;
+        // if this fails then `page` is deallocated by it's drop implementation
+        page.mlock()?;
+        Ok(page)
+    }
+}
+
+#[cfg(not(tarpaulin_include))]
+impl Drop for Page {
+    fn drop(&mut self) {
+        let ptr = self.as_ptr_mut();
+        let page_size = self.page_size();
+
+        //libc::munmap(ptr, self.page_size());
+        let layout = std::alloc::Layout::from_size_align(page_size, page_size).unwrap();
+        // SAFETY: we allocated this page in the constructor so it is safe to deallocate
+        // now.
+        unsafe { std::alloc::dealloc(ptr, layout) };
+    }
+}
diff --git a/src/internals/mem/unix.rs b/src/internals/mem/unix.rs
new file mode 100644
index 0000000..b96f57a
--- /dev/null
+++ b/src/internals/mem/unix.rs
@@ -0,0 +1,114 @@
+//! Unix `mmap` private anonymous memory pages.
+
+use super::Page;
+
+use core::ffi::c_void;
+use core::ptr::NonNull;
+
+/// Return the page size on the running system using the `rustix` crate.
+pub fn page_size() -> usize {
+    rustix::param::page_size()
+}
+
+#[derive(Debug, Clone)]
+#[cfg_attr(feature = "std", derive(thiserror::Error))]
+pub enum PageAllocError {
+    #[cfg_attr(feature = "std", error("could not map a memory page: {0}"))]
+    Mmap(rustix::io::Errno),
+    #[cfg_attr(feature = "std", error("could not lock memory page: {0}"))]
+    Mlock(rustix::io::Errno),
+}
+
+impl Page {
+    /// Get a mutable pointer to the start of the memory page.
+    fn as_c_ptr_mut(&self) -> *mut c_void {
+        self.as_ptr_mut() as *mut c_void
+    }
+
+    /// Allocate a new page of memory using (anonymous) `mmap` with the
+    /// noreserve flag.
+    ///
+    /// The noreserve flag disables swapping of the memory page. As a
+    /// consequence, the OS may unmap the page of memory, in which case
+    /// writing to it causes a SIGSEGV. Therefore, the page
+    /// should be mlocked before actual use.
+    ///
+    /// # Errors
+    /// The function returns an `PageAllocError` if the `mmap` call fails.
+    fn alloc_new_noreserve() -> Result<Self, rustix::io::Errno> {
+        use rustix::mm::{MapFlags, ProtFlags};
+
+        let addr: *mut c_void = core::ptr::null_mut();
+        let page_size = page_size();
+        let prot = ProtFlags::READ | ProtFlags::WRITE;
+        // NORESERVE disables backing the memory map with swap space. It requires
+        // `mlock` to be used on the resulting page before use. Redox, FreeBSD
+        // and DragonFlyBSD don't have NORESERVE. Other BSDs also don't implement it,
+        // but it is available for compatibility. FreeBSD and DragonflyBSD have a NOCORE
+        // flag, which hides the page from core dumps (memory dumps when the process
+        // crashes).
+        cfg_if::cfg_if! {
+            if #[cfg(target_os = "redox")] {
+                let flags = MapFlags::PRIVATE;
+            } else if #[cfg(any(target_os = "freebsd", target_os = "dragonfly"))] {
+                let flags = MapFlags::PRIVATE | MapFlags::NOCORE;
+            } else {
+                let flags = MapFlags::PRIVATE | MapFlags::NORESERVE;
+            }
+        }
+
+        let page_ptr: *mut c_void =
+            unsafe { rustix::mm::mmap_anonymous(addr, page_size, prot, flags) }?;
+
+        // SAFETY: if `mmap` is successful, the result is non-zero
+        let page_ptr = unsafe { NonNull::new_unchecked(page_ptr as *mut u8) };
+        Ok(Self {
+            page_ptr,
+            page_size,
+            _phantom_pagemem: core::marker::PhantomData,
+        })
+    }
+
+    /// Lock the memory page to physical memory.
+    ///
+    /// When this function returns successfully then the memory page is
+    /// guarantied to be backed by physical memory, i.e. not (only) swapped.
+    /// In combination with the noreserve flag during the allocation, this
+    /// guaranties the memory to not be swapped at all, except on hibernation
+    /// or memory starvation. This is really the best we can achieve. If memory
+    /// contents are really secret than there is no other solution than to
+    /// use a swap space encrypted with an ephemeral secret key, and
+    /// hibernation should be disabled (both on the OS level).
+    fn mlock(&mut self) -> Result<(), rustix::io::Errno> {
+        unsafe { rustix::mm::mlock(self.as_c_ptr_mut(), self.page_size()) }
+    }
+
+    /// Allocate a new page of memory using (anonymous) `mmap` with the
+    /// noreserve flag and mlock page.
+    ///
+    /// The noreserve flag disables swapping of the memory page. The page is
+    /// then mlocked to force it into physical memory.
+    ///
+    /// # Errors
+    /// The function returns an `PageAllocError` if the `mmap` or `mlock` call
+    /// fails.
+    pub fn alloc_new_lock() -> Result<Self, PageAllocError> {
+        let mut page = Self::alloc_new_noreserve().map_err(|e| PageAllocError::Mmap(e))?;
+        page.mlock().map_err(|e| PageAllocError::Mlock(e))?;
+        Ok(page)
+    }
+}
+
+impl Drop for Page {
+    fn drop(&mut self) {
+        let ptr = self.as_c_ptr_mut();
+        unsafe {
+            // SAFETY: we allocated/mapped this page in the constructor so it is safe to
+            // unmap now. `munmap` also unlocks a page if it was locked so it is
+            // not necessary to `munlock` the page if it was locked.
+            rustix::mm::munmap(ptr, self.page_size()).unwrap();
+        }
+        // SAFETY: `NonNull<u8>` and `usize` both do not drop so we need not
+        // worry about subsequent drops
+    }
+}
diff --git a/src/internals/mem/windows.rs b/src/internals/mem/windows.rs
new file mode 100644
index 0000000..930b5e5
--- /dev/null
+++ b/src/internals/mem/windows.rs
@@ -0,0 +1,110 @@
+//! Windows `VirtualAlloc` memory page allocation.
+
+use super::Page;
+
+use core::ffi::c_void;
+use core::ptr::NonNull;
+
+/// Return the page size on the running system by querying kernel32.lib.
+pub fn page_size() -> usize {
+    use windows::Win32::System::SystemInformation::{GetSystemInfo, SYSTEM_INFO};
+
+    let mut sysinfo = SYSTEM_INFO::default();
+    let sysinfo_ptr = &mut sysinfo as *mut SYSTEM_INFO;
+    // SAFETY: `sysinfo_ptr` points to a valid (empty/all zeros) `SYSTEM_INFO`
+    unsafe { GetSystemInfo(sysinfo_ptr) };
+    // the pagesize must always fit in a `usize` (on windows it is a `u32`)
+    #[allow(clippy::cast_possible_truncation, clippy::cast_sign_loss)]
+    {
+        sysinfo.dwPageSize as usize
+    }
+}
+
+#[derive(Debug, Clone)]
+#[cfg_attr(feature = "std", derive(thiserror::Error))]
+pub enum PageAllocError {
+    #[cfg_attr(feature = "std", error("could not map a memory page"))]
+    VirtualAlloc,
+    #[cfg_attr(feature = "std", error("could not lock memory page: {0}"))]
+    VirtualLock(windows::core::Error),
+}
+
+impl Page {
+    /// Get a mutable pointer to the start of the memory page.
+    fn as_c_ptr_mut(&self) -> *mut c_void {
+        self.as_ptr_mut() as *mut c_void
+    }
+
+    /// Allocate a new page of memory using `VirtualAlloc`.
+    ///
+    /// # Errors
+    /// The function returns an `PageAllocError` if the `VirtualAlloc` call
+    /// fails.
+    fn alloc_new() -> Result<Self, ()> {
+        use windows::Win32::System::Memory::{
+            VirtualAlloc, MEM_COMMIT, MEM_RESERVE, PAGE_PROTECTION_FLAGS, PAGE_READWRITE,
+            VIRTUAL_ALLOCATION_TYPE,
+        };
+
+        let page_size = page_size();
+        let alloc_type: VIRTUAL_ALLOCATION_TYPE = MEM_RESERVE | MEM_COMMIT;
+        let protect: PAGE_PROTECTION_FLAGS = PAGE_READWRITE;
+
+        let page_ptr: *mut c_void = unsafe { VirtualAlloc(None, page_size, alloc_type, protect) };
+
+        if page_ptr.is_null() {
+            Err(())
+        } else {
+            let page_ptr = unsafe {
+                // SAFETY: we just checked that `page_ptr` is non-null
+                NonNull::new_unchecked(page_ptr as *mut u8)
+            };
+            Ok(Self {
+                page_ptr,
+                page_size,
+                _phantom_pagemem: core::marker::PhantomData,
+            })
+        }
+    }
+
+    /// Lock the memory page to physical memory.
+    ///
+    /// When this function returns successfully then the memory page is
+    /// guarantied to be backed by physical memory, i.e. not (only) swapped.
+    /// This guaranties the memory to not be swapped at all, except on
+    /// hibernation or memory starvation. This is really the best we can
+    /// achieve. If memory contents are really secret than there is no other
+    /// solution than to use a swap space encrypted with an ephemeral secret
+    /// key, and hibernation should be disabled (both on the OS level).
+    fn lock(&mut self) -> Result<(), windows::core::Error> {
+        use windows::Win32::System::Memory::VirtualLock;
+
+        unsafe { VirtualLock(self.as_c_ptr_mut(), self.page_size()) }
+    }
+
+    /// Allocate a new page of memory using `VirtualAlloc` and `VirtualLock`
+    /// page.
+    ///
+    /// The page is locked to force it into physical memory.
+    ///
+    /// # Errors
+    /// The function returns an `PageAllocError` if the `VirtualAlloc` or
+    /// `VirtualLock` call fails.
+    pub fn alloc_new_lock() -> Result<Self, PageAllocError> {
+        let mut page = Self::alloc_new().map_err(|_| PageAllocError::VirtualAlloc)?;
+        page.lock().map_err(|e| PageAllocError::VirtualLock(e))?;
+        Ok(page)
+    }
+}
+
+impl Drop for Page {
+    fn drop(&mut self) {
+        use windows::Win32::System::Memory::{VirtualFree, MEM_RELEASE};
+
+        // SAFETY: we allocated/mapped this page in the constructor so it is safe to
+        // unmap now
+        unsafe { VirtualFree(self.as_c_ptr_mut(), 0, MEM_RELEASE) }.unwrap();
+        // SAFETY: `NonNull<u8>` and `usize` both do not drop so we need not
+        // worry about subsequent drops
+    }
+}
diff --git a/src/internals/zeroize.rs b/src/internals/zeroize.rs
index 65bb31d..c2da080 100644
--- a/src/internals/zeroize.rs
+++ b/src/internals/zeroize.rs
@@ -1,35 +1,103 @@
 //! Utility functions for securely wiping memory.
 //!
-//! Contains wrappers around intrinsics and ffi functions necessary for the
-//! [`crate::zeroize`] module.
+//! Utility functions for the [`crate::zeroize`] module, to securely wiping
+//! memory, implemented using cross-platform pure Rust volatile writes.
 
-#[cfg(target_arch = "x86_64")]
-mod asm_x86_64;
-#[cfg(target_arch = "x86_64")]
-pub use asm_x86_64::*;
+use crate::macros::precondition_memory_range;
+use crate::util::is_aligned_ptr_mut;
+use mirai_annotations::debug_checked_precondition;
 
-mod system;
-pub use system::*;
+/// Zeroize the memory pointed to by `ptr` and of size `len` bytes, by
+/// overwriting it byte for byte using volatile writes.
+///
+/// This is guarantied to be not elided by the compiler.
+///
+/// # Safety
+/// The caller *must* ensure that `ptr` is valid for writes of `len` bytes, see
+/// the [`std::ptr`] documentation. In particular this function is not atomic.
+pub unsafe fn volatile_write_zeroize(mut ptr: *mut u8, len: usize) {
+    precondition_memory_range!(ptr, len);
+    for _i in 0..len {
+        // SAFETY: `ptr` originally pointed into an allocation of `len` bytes so now,
+        // after `_i` steps `len - _i > 0` bytes are left, so `ptr` is valid for
+        // a byte write
+        unsafe {
+            core::ptr::write_volatile(ptr, 0u8);
+        }
+        // SAFETY: after increment, `ptr` points into the same allocation if `_i == len`
+        // or one byte past it, so `add` is sound
+        ptr = unsafe { ptr.add(1) };
+    }
+}
 
-mod volatile_write;
-pub use volatile_write::*;
+/// Zeroize the memory pointed to by `ptr` for `len` rounded down to a multiple
+/// of 8 bytes.
+///
+/// This function rounds down `len` to a multiple of 8 and then zeroizes the
+/// memory pointed to by `ptr` for that length. This operation is guarantied to
+/// be not elided by the compiler. If `len` is a multiple of 8 then this
+/// zeroizes the entire specified block of memory. Returns a pointer to the byte
+/// after the last zeroed byte, with the provenance of `ptr`.
+///
+/// # Safety
+/// The caller *must* ensure that `ptr` is valid for writes of `len` bytes, see
+/// the [`std::ptr`] documentation. In particular this function is not atomic.
+///
+/// Furthermore, `ptr` *must* be at least 8 byte aligned.
+pub unsafe fn zeroize_align8_block8(mut ptr: *mut u8, len: usize) -> *mut u8 {
+    precondition_memory_range!(ptr, len);
+    debug_checked_precondition!(is_aligned_ptr_mut(ptr, 8));
+
+    let nblocks = (len - len % 8) / 8;
+    for _i in 0..nblocks {
+        // SAFETY: `ptr` originally pointed into an allocation of `len` bytes so now,
+        // after `_i` steps `len - 8*_i >= 8` bytes are left, so `ptr` is valid
+        // for an 8 byte write SAFETY: `ptr` was originally 8 byte aligned by
+        // caller contract and we only added a multiple of 8 so it is still 8
+        // byte aligned
+        unsafe {
+            core::ptr::write_volatile(ptr.cast::<u64>(), 0u64);
+        }
+        // SAFETY: after increment, `ptr` points into the same allocation or (if `8*_i
+        // == len`) at most one byte past it, so `add` is sound; `ptr` stays 8
+        // byte aligned
+        ptr = unsafe { ptr.add(8) };
+    }
+    ptr
+}
 
-/// Volatile write byte to memory.
+/// Zeroize the memory pointed to by `ptr` and of size `len % 8` bytes.
 ///
-/// This uses the [`core::intrinsics::volatile_set_memory`] intrinsic and can
-/// only be used on nightly, with the `nightly` feature enabled.
+/// This can be used to zeroize the bytes left unzeroized by
+/// `zeroize_align8_block8` if `len` is not a multiple of 8. This operation is
+/// guarantied to be not elided by the compiler.
 ///
 /// # Safety
 /// The caller *must* ensure that `ptr` is valid for writes of `len` bytes, see
 /// the [`std::ptr`] documentation. In particular this function is not atomic.
-// In addition `ptr` needs to be properly aligned, but because we are talking
-// about bytes (therefore byte alignment), it *always* is.
-#[cfg(feature = "nightly_core_intrinsics")]
-pub unsafe fn volatile_memset(ptr: *mut u8, val: u8, len: usize) {
-    crate::macros::precondition_memory_range!(ptr, len);
-    // SAFETY: the caller must uphold the safety contract
-    unsafe {
-        core::intrinsics::volatile_set_memory(ptr, val, len);
+///
+/// Furthermore, `ptr` *must* be at least 4 byte aligned.
+pub unsafe fn zeroize_align4_tail8(mut ptr: *mut u8, len: usize) {
+    precondition_memory_range!(ptr, len % 8);
+    debug_checked_precondition!(is_aligned_ptr_mut(ptr, 4));
+
+    if len % 8 >= 4 {
+        // SAFETY: `ptr` is valid for `len % 8` bytes by caller contract
+        // SAFETY: `ptr` is still 4 byte aligned by caller contract
+        unsafe {
+            core::ptr::write_volatile(ptr.cast::<u32>(), 0u32);
+        }
+        ptr = unsafe { ptr.add(4) };
+    }
+    // the final remainder (at most 3 bytes) is zeroed byte-for-byte
+    // SAFETY: `ptr` has been incremented by a multiple of 4 <= `len` so `ptr`
+    // points to an allocation of `len % 4` bytes, so `ptr` can be written to
+    // and incremented `len % 4` times
+    for _i in 0..(len % 4) {
+        unsafe {
+            core::ptr::write_volatile(ptr, 0u8);
+        }
+        ptr = unsafe { ptr.add(1) };
     }
 }
 
@@ -76,135 +144,18 @@ mod tests {
         assert_eq!(&array[..], &expected[..]);
     }
 
-    #[cfg(feature = "nightly_core_intrinsics")]
-    #[test]
-    fn test_volatile_memset() {
-        test_b128_zeroizer(|ptr: *mut u8, len: usize| unsafe { volatile_memset(ptr, 0, len) })
-    }
-
-    #[cfg(any(
-        target_os = "freebsd",
-        target_os = "dragonfly",
-        target_os = "openbsd",
-        target_os = "netbsd",
-        target_os = "macos",
-        target_os = "ios",
-        target_env = "gnu",
-        target_env = "musl"
-    ))]
-    #[test]
-    #[cfg_attr(miri, ignore)] // ffi
-    fn test_explicit_bzero() {
-        test_b128_zeroizer(|ptr: *mut u8, len: usize| unsafe { libc_explicit_bzero(ptr, len) })
-    }
-
-    #[cfg(all(target_arch = "x86_64", target_feature = "ermsb"))]
-    #[test]
-    #[cfg_attr(miri, ignore)] // asm
-    fn test_asm_ermsb_zeroize() {
-        test_b128_zeroizer(|ptr: *mut u8, len: usize| unsafe { asm_ermsb_zeroize(ptr, len) })
-    }
-
-    #[cfg(all(target_arch = "x86_64", target_feature = "ermsb"))]
     #[test]
     fn test_volatile_write_zeroize() {
-        test_b128_zeroizer(|ptr: *mut u8, len: usize| unsafe { volatile_write_zeroize(ptr, len) })
+        test_b128_zeroizer(|ptr, len| unsafe { volatile_write_zeroize(ptr, len) })
     }
 
-    #[cfg(feature = "nightly_core_intrinsics")]
     #[test]
-    fn test_volatile_memset_lowalign() {
-        test_b239_lowalign_zeroizer(|ptr: *mut u8, len: usize| unsafe {
-            volatile_memset(ptr, 0, len)
-        })
-    }
-
-    #[cfg(any(
-        target_os = "freebsd",
-        target_os = "dragonfly",
-        target_os = "openbsd",
-        target_os = "netbsd",
-        target_os = "macos",
-        target_os = "ios",
-        target_env = "gnu",
-        target_env = "musl"
-    ))]
-    #[test]
-    #[cfg_attr(miri, ignore)] // ffi
-    fn test_explicit_bzero_lowalign() {
-        test_b239_lowalign_zeroizer(|ptr: *mut u8, len: usize| unsafe {
-            libc_explicit_bzero(ptr, len)
-        })
-    }
-
-    #[cfg(all(target_arch = "x86_64", target_feature = "ermsb"))]
-    #[test]
-    #[cfg_attr(miri, ignore)] // asm
-    fn test_asm_ermsb_zeroize_lowalign() {
-        test_b239_lowalign_zeroizer(|ptr: *mut u8, len: usize| unsafe {
-            asm_ermsb_zeroize(ptr, len)
-        })
-    }
-
-    #[cfg(all(target_arch = "x86_64", target_feature = "ermsb"))]
-    #[test]
-    fn test_volatile_write_zeroize_lowalign() {
-        test_b239_lowalign_zeroizer(|ptr: *mut u8, len: usize| unsafe {
-            volatile_write_zeroize(ptr, len)
-        })
+    fn test_lowalign_volatile_write_zeroize() {
+        test_b239_lowalign_zeroizer(|ptr, len| unsafe { volatile_write_zeroize(ptr, len) })
     }
 
     #[test]
     fn test_zeroize_align8_block8() {
         test_b257_align64_block_zeroizer(|ptr, len| unsafe { zeroize_align8_block8(ptr, len) })
     }
-
-    #[cfg(all(target_arch = "x86_64", target_feature = "sse2"))]
-    #[test]
-    #[cfg_attr(miri, ignore)] // asm
-    fn test_x86_64_simd16_zeroize_align16_block16() {
-        test_b257_align64_block_zeroizer(|ptr, len| unsafe {
-            x86_64_simd16_zeroize_align16_block16(ptr, len)
-        })
-    }
-
-    #[cfg(all(target_arch = "x86_64", target_feature = "sse2"))]
-    #[test]
-    #[cfg_attr(miri, ignore)] // asm
-    fn test_x86_64_simd16_unroll2_zeroize_align16_block16() {
-        test_b257_align64_block_zeroizer(|ptr, len| unsafe {
-            x86_64_simd16_unroll2_zeroize_align16_block16(ptr, len)
-        })
-    }
-
-    #[cfg(all(target_arch = "x86_64", target_feature = "avx2"))]
-    #[test]
-    #[cfg_attr(miri, ignore)] // asm
-    fn test_x86_64_simd32_zeroize_align32_block32() {
-        test_b257_align64_block_zeroizer(|ptr, len| unsafe {
-            x86_64_simd32_zeroize_align32_block32(ptr, len)
-        })
-    }
-
-    #[cfg(all(target_arch = "x86_64", target_feature = "avx2"))]
-    #[test]
-    #[cfg_attr(miri, ignore)] // asm
-    fn test_x86_64_simd32_unroll2_zeroize_align32_block32() {
-        test_b257_align64_block_zeroizer(|ptr, len| unsafe {
-            x86_64_simd32_unroll2_zeroize_align32_block32(ptr, len)
-        })
-    }
-
-    #[cfg(all(
-        target_arch = "x86_64",
-        target_feature = "avx512f",
-        feature = "nightly_stdsimd"
-    ))]
-    #[test]
-    #[cfg_attr(miri, ignore)] // asm
-    fn test_x86_64_simd64_zeroize_align64_block64() {
-        test_b257_align64_block_zeroizer(|ptr, len| unsafe {
-            x86_64_simd64_zeroize_align64_block64(ptr, len)
-        })
-    }
 }
diff --git a/src/internals/zeroize/asm_x86_64.rs b/src/internals/zeroize/asm_x86_64.rs
deleted file mode 100644
index 69c3113..0000000
--- a/src/internals/zeroize/asm_x86_64.rs
+++ /dev/null
@@ -1,326 +0,0 @@
-//! Utility functions for securely wiping memory, implemented in asm for x86_64
-//! cpus.
-
-use crate::macros::precondition_memory_range;
-use crate::util::is_aligned_ptr_mut;
-use mirai_annotations::debug_checked_precondition;
-
-/// Overwrite memory with zeros. This operation will not be elided by the
-/// compiler.
-///
-/// This uses inline assembly in Rust. The implementation makes use of the
-/// efficient `rep stosb` memory set functionality on modern x86_64 cpus. This
-/// is very slow for small amounts of data but very efficient for zeroizing
-/// large amounts of data (depending an CPU architecture though), works on
-/// stable, and does not require a libc.
-///
-/// # Safety
-/// The caller *must* ensure that `ptr` is valid for writes of `len` bytes, see
-/// the [`std::ptr`] documentation. In particular this function is not atomic.
-// In addition `ptr` needs to be properly aligned, but because we are talking
-// about bytes (therefore byte alignment), it *always* is.
-#[cfg(all(target_arch = "x86_64", target_feature = "ermsb"))]
-pub unsafe fn asm_ermsb_zeroize(ptr: *mut u8, len: usize) {
-    precondition_memory_range!(ptr, len);
-
-    unsafe {
-        core::arch::asm!(
-            "rep stosb byte ptr es:[rdi], al",
-            // `len` in the rcx register
-            inout("rcx") len => _,
-            // `ptr` int the rdi register
-            inout("rdi") ptr => _,
-            // zero byte to al (first byte of rax) register
-            in("al") 0u8,
-            options(nostack),
-        );
-    }
-}
-
-/// Zeroize the memory pointed to by `ptr` for `len` rounded down to a multiple
-/// of 16 bytes.
-///
-/// This function rounds down `len` to a multiple of 16 and then zeroizes the
-/// memory pointed to by `ptr` for that length. This operation is guarantied to
-/// be not elided by the compiler. If `len` is a multiple of 16 then this
-/// zeroizes the entire specified block of memory. Returns a pointer to the byte
-/// after the last zeroed byte, with the provenance of `ptr`.
-///
-/// This uses sse2 instructions in inline asm to zeroize the memory with blocks
-/// of 16 bytes at a time.
-///
-/// # Safety
-/// The caller *must* ensure that `ptr` is valid for writes of `len` bytes, see
-/// the [`std::ptr`] documentation. In particular this function is not atomic.
-///
-/// Furthermore, `ptr` *must* be at least 16 byte aligned.
-#[cfg(all(target_arch = "x86_64", target_feature = "sse2"))]
-pub unsafe fn x86_64_simd16_zeroize_align16_block16(mut ptr: *mut u8, len: usize) -> *mut u8 {
-    use core::arch::x86_64 as arch;
-
-    precondition_memory_range!(ptr, len);
-    debug_checked_precondition!(is_aligned_ptr_mut(ptr, 16));
-
-    let nblocks = (len - len % 16) / 16;
-
-    for _i in 0..nblocks {
-        // SAFETY: `ptr` is valid for a `len >= nblocks*16` byte write, so we can write
-        // `nblocks` times 16 bytes and increment `ptr` by 16 bytes; `ptr` stays 16 byte
-        // aligned
-        unsafe {
-            // SAFETY: `ptr` originally pointed into an allocation of `len` bytes so now,
-            // after `_i` steps `len - 16*_i >= 16` bytes are left, so `ptr` is valid
-            // for a 16 byte write; also `ptr` is 16 byte aligned
-            core::arch::asm!(
-                "
-                /* write 16 zero bytes to ptr */
-                vmovdqa xmmword ptr [{0}], {1}
-                ",
-                in(reg) ptr,
-                in(xmm_reg) arch::_mm_setzero_si128(),
-                options(nostack),
-            );
-            // NOTE: increment `ptr` outside of the asm to maintain provenance
-            // SAFETY: this stays within the memory where `ptr` is valid for writes and
-            // maintains 16 byte alignment
-            ptr = ptr.add(16);
-        }
-    }
-    ptr
-}
-
-/// Zeroize the memory pointed to by `ptr` for `len` rounded down to a multiple
-/// of 16 bytes.
-///
-/// This function rounds down `len` to a multiple of 16 and then zeroizes the
-/// memory pointed to by `ptr` for that length. This operation is guarantied to
-/// be not elided by the compiler. If `len` is a multiple of 16 then this
-/// zeroizes the entire specified block of memory. Returns a pointer to the byte
-/// after the last zeroed byte, with the provenance of `ptr`.
-///
-/// This uses sse2 instructions in inline asm to zeroize the memory with blocks
-/// of 16 bytes at a time.
-///
-/// # Safety
-/// The caller *must* ensure that `ptr` is valid for writes of `len` bytes, see
-/// the [`std::ptr`] documentation. In particular this function is not atomic.
-///
-/// Furthermore, `ptr` *must* be at least 16 byte aligned.
-#[cfg(all(target_arch = "x86_64", target_feature = "sse2"))]
-pub unsafe fn x86_64_simd16_unroll2_zeroize_align16_block16(
-    mut ptr: *mut u8,
-    len: usize,
-) -> *mut u8 {
-    use core::arch::x86_64 as arch;
-
-    precondition_memory_range!(ptr, len);
-    debug_checked_precondition!(is_aligned_ptr_mut(ptr, 16));
-
-    let nblocks = (len - len % 16) / 16;
-
-    // SAFETY: `ptr` is valid for a `len >= nblocks*16` byte write, so we can write
-    // `nblocks` times 16 bytes and increment `ptr` by 16 bytes; `ptr` stays 16 byte
-    // aligned
-    for _i in 0..nblocks / 2 {
-        unsafe {
-            core::arch::asm!(
-                "
-                /* write 16 zero bytes to ptr */
-                vmovdqa xmmword ptr [{0}], {1}
-                vmovdqa xmmword ptr [{0} + 16], {1}
-                ",
-                in(reg) ptr,
-                in(xmm_reg) arch::_mm_setzero_si128(),
-                options(nostack),
-            );
-            ptr = ptr.add(32);
-        }
-    }
-    if nblocks % 2 == 1 {
-        unsafe {
-            core::arch::asm!(
-                "
-                /* write 16 zero bytes to ptr */
-                vmovdqa xmmword ptr [{0}], {1}
-                ",
-                in(reg) ptr,
-                in(xmm_reg) arch::_mm_setzero_si128(),
-                options(nostack),
-            );
-            ptr = ptr.add(16);
-        }
-    }
-    ptr
-}
-
-/// Zeroize the memory pointed to by `ptr` for `len` rounded down to a multiple
-/// of 32 bytes.
-///
-/// This function rounds down `len` to a multiple of 32 and then zeroizes the
-/// memory pointed to by `ptr` for that length. This operation is guarantied to
-/// be not elided by the compiler. If `len` is a multiple of 32 then this
-/// zeroizes the entire specified block of memory. Returns a pointer to the byte
-/// after the last zeroed byte, with the provenance of `ptr`.
-///
-/// This uses avx2 instructions in inline asm to zeroize the memory with blocks
-/// of 32 bytes at a time.
-///
-/// # Safety
-/// The caller *must* ensure that `ptr` is valid for writes of `len` bytes, see
-/// the [`std::ptr`] documentation. In particular this function is not atomic.
-///
-/// Furthermore, `ptr` *must* be at least 32 byte aligned.
-#[cfg(all(target_arch = "x86_64", target_feature = "avx"))]
-pub unsafe fn x86_64_simd32_zeroize_align32_block32(mut ptr: *mut u8, len: usize) -> *mut u8 {
-    use core::arch::x86_64 as arch;
-
-    precondition_memory_range!(ptr, len);
-    debug_checked_precondition!(is_aligned_ptr_mut(ptr, 32));
-
-    let nblocks = (len - len % 32) / 32;
-
-    for _i in 0..nblocks {
-        // SAFETY: `ptr` is valid for a `len >= nblocks*32` byte write, so we can write
-        // `nblocks` times 32 bytes and increment `ptr` by 32 bytes; `ptr` stays 32 byte
-        // aligned
-        unsafe {
-            // SAFETY: `ptr` originally pointed into an allocation of `len` bytes so now,
-            // after `_i` steps `len - 32*_i >= 32` bytes are left, so `ptr` is valid
-            // for a 32 byte write; also `ptr` is 32 byte aligned
-            core::arch::asm!(
-                "
-                /* write 32 zero bytes to ptr */
-                vmovdqa ymmword ptr [{0}], {1}
-                ",
-                in(reg) ptr,
-                in(ymm_reg) arch::_mm256_setzero_si256(),
-                options(nostack),
-            );
-            // NOTE: increment `ptr` outside of the asm to maintain provenance
-            // SAFETY: this stays within the memory where `ptr` is valid for writes and
-            // maintains 32 byte alignment
-            ptr = ptr.add(32);
-        }
-    }
-    ptr
-}
-
-/// Zeroize the memory pointed to by `ptr` for `len` rounded down to a multiple
-/// of 32 bytes.
-///
-/// This function rounds down `len` to a multiple of 32 and then zeroizes the
-/// memory pointed to by `ptr` for that length. This operation is guarantied to
-/// be not elided by the compiler. If `len` is a multiple of 32 then this
-/// zeroizes the entire specified block of memory. Returns a pointer to the byte
-/// after the last zeroed byte, with the provenance of `ptr`.
-///
-/// This uses avx2 instructions in inline asm to zeroize the memory with blocks
-/// of 32 bytes at a time.
-///
-/// # Safety
-/// The caller *must* ensure that `ptr` is valid for writes of `len` bytes, see
-/// the [`std::ptr`] documentation. In particular this function is not atomic.
-///
-/// Furthermore, `ptr` *must* be at least 32 byte aligned.
-#[cfg(all(target_arch = "x86_64", target_feature = "avx"))]
-pub unsafe fn x86_64_simd32_unroll2_zeroize_align32_block32(
-    mut ptr: *mut u8,
-    len: usize,
-) -> *mut u8 {
-    use core::arch::x86_64 as arch;
-
-    precondition_memory_range!(ptr, len);
-    debug_checked_precondition!(is_aligned_ptr_mut(ptr, 32));
-
-    let nblocks = (len - len % 32) / 32;
-
-    // SAFETY: `ptr` is valid for a `len >= nblocks*32` byte write, so we can write
-    // `nblocks` times 32 bytes and increment `ptr` by 32 bytes; `ptr` stays 32 byte
-    // aligned
-    for _i in 0..(nblocks / 2) {
-        unsafe {
-            core::arch::asm!(
-                "
-                /* write 64 zero bytes to ptr */
-                vmovdqa ymmword ptr [{0}], {1}
-                vmovdqa ymmword ptr [{0} + 32], {1}
-                ",
-                in(reg) ptr,
-                in(ymm_reg) arch::_mm256_setzero_si256(),
-                options(nostack),
-            );
-            ptr = ptr.add(64);
-        }
-    }
-    if nblocks % 2 == 1 {
-        unsafe {
-            core::arch::asm!(
-                "
-                /* write 32 zero bytes to ptr */
-                vmovdqa ymmword ptr [{0}], {1}
-                ",
-                in(reg) ptr,
-                in(ymm_reg) arch::_mm256_setzero_si256(),
-                options(nostack),
-            );
-            ptr = ptr.add(32);
-        }
-    }
-    ptr
-}
-
-/// Zeroize the memory pointed to by `ptr` for `len` rounded down to a multiple
-/// of 64 bytes.
-///
-/// This function rounds down `len` to a multiple of 64 and then zeroizes the
-/// memory pointed to by `ptr` for that length. This operation is guarantied to
-/// be not elided by the compiler. If `len` is a multiple of 64 then this
-/// zeroizes the entire specified block of memory. Returns a pointer to the byte
-/// after the last zeroed byte, with the provenance of `ptr`.
-///
-/// This uses avx512 instructions in inline asm to zeroize the memory with
-/// blocks of 64 bytes at a time.
-///
-/// # Safety
-/// The caller *must* ensure that `ptr` is valid for writes of `len` bytes, see
-/// the [`std::ptr`] documentation. In particular this function is not atomic.
-///
-/// Furthermore, `ptr` *must* be at least 64 byte aligned.
-#[cfg(all(
-    target_arch = "x86_64",
-    target_feature = "avx512f",
-    feature = "nightly_stdsimd"
-))]
-pub unsafe fn x86_64_simd64_zeroize_align64_block64(mut ptr: *mut u8, len: usize) -> *mut u8 {
-    use core::arch::x86_64 as arch;
-
-    precondition_memory_range!(ptr, len);
-    debug_checked_precondition!(is_aligned_ptr_mut(ptr, 64));
-
-    let nblocks = (len - len % 64) / 64;
-
-    for _i in 0..nblocks {
-        // SAFETY: `ptr` is valid for a `len >= nblocks*64` byte write, so we can write
-        // `nblocks` times 64 bytes and increment `ptr` by 64 bytes; `ptr` stays 64 byte
-        // aligned
-        unsafe {
-            // SAFETY: `ptr` originally pointed into an allocation of `len` bytes so now,
-            // after `_i` steps `len - 64*_i >= 64` bytes are left, so `ptr` is valid
-            // for a 64 byte write; also `ptr` is 64 byte aligned
-            core::arch::asm!(
-                "
-                /* write 64 zero bytes to ptr */
-                vmovdqa64 zmmword ptr [{0}], {1}
-                ",
-                in(reg) ptr,
-                in(zmm_reg) arch::_mm512_setzero_si512(),
-                options(nostack),
-            );
-            // NOTE: increment `ptr` outside of the asm to maintain provenance
-            // SAFETY: this stays within the memory where `ptr` is valid for writes and
-            // maintains 64 byte alignment
-            ptr = ptr.add(64);
-        }
-    }
-    ptr
-}
diff --git a/src/internals/zeroize/system.rs b/src/internals/zeroize/system.rs
deleted file mode 100644
index 287a595..0000000
--- a/src/internals/zeroize/system.rs
+++ /dev/null
@@ -1,104 +0,0 @@
-//! Bindings to system functions for securely wiping memory.
-
-use crate::macros::precondition_memory_range;
-
-/// Overwrite memory with zeros. This operation will not be elided by the
-/// compiler.
-///
-/// This uses the `explicit_bzero` function present in many recent libcs.
-///
-/// # Safety
-/// It's C. But the safety requirement is quite obvious: The caller *must*
-/// ensure that `ptr` is valid for writes of `len` bytes, see the [`std::ptr`]
-/// documentation. In particular this function is not atomic.
-// In addition `ptr` needs to be properly aligned, but because we are talking
-// about bytes (therefore byte alignment), it *always* is.
-#[cfg(any(
-    target_os = "freebsd",
-    target_os = "dragonfly",
-    target_os = "openbsd",
-    all(target_env = "gnu", unix),
-    target_env = "musl"
-))]
-pub unsafe fn libc_explicit_bzero(ptr: *mut u8, len: usize) {
-    precondition_memory_range!(ptr, len);
-    // SAFETY: the caller must uphold the safety contract
-    unsafe {
-        libc::explicit_bzero(ptr as *mut libc::c_void, len as libc::size_t);
-    }
-}
-
-/// Overwrite memory with zeros. This operation will not be elided by the
-/// compiler.
-///
-/// This uses the `explicit_bzero` function present in many recent libcs.
-///
-/// # Safety
-/// It's C. But the safety requirement is quite obvious: The caller *must*
-/// ensure that `ptr` is valid for writes of `len` bytes, see the [`std::ptr`]
-/// documentation. In particular this function is not atomic.
-// In addition `ptr` needs to be properly aligned, but because we are talking
-// about bytes (therefore byte alignment), it *always* is.
-#[cfg(all(target_env = "gnu", windows))]
-pub unsafe fn libc_explicit_bzero(ptr: *mut u8, len: usize) {
-    precondition_memory_range!(ptr, len);
-    extern "C" {
-        fn explicit_bzero(ptr: *mut libc::c_void, len: libc::size_t);
-    }
-
-    // SAFETY: the caller must uphold the safety contract
-    unsafe {
-        explicit_bzero(ptr as *mut libc::c_void, len as libc::size_t);
-    }
-}
-
-/// Overwrite memory with zeros. This operation will not be elided by the
-/// compiler.
-///
-/// This uses the `explicit_bzero` function present in many recent libcs.
-///
-/// # Safety
-/// It's C. But the safety requirement is quite obvious: The caller *must*
-/// ensure that `ptr` is valid for writes of `len` bytes, see the [`std::ptr`]
-/// documentation. In particular this function is not atomic.
-// In addition `ptr` needs to be properly aligned, but because we are talking
-// about bytes (therefore byte alignment), it *always* is.
-#[cfg(target_os = "netbsd")]
-pub unsafe fn libc_explicit_bzero(ptr: *mut u8, len: usize) {
-    precondition_memory_range!(ptr, len);
-    // SAFETY: the caller must uphold the safety contract
-    unsafe {
-        libc::explicit_memset(
-            ptr as *mut libc::c_void,
-            0 as libc::c_int,
-            len as libc::size_t,
-        );
-    }
-}
-
-/// Overwrite memory with zeros. This operation will not be elided by the
-/// compiler.
-///
-/// This uses the `explicit_bzero` function present in many recent libcs.
-///
-/// # Safety
-/// It's C. But the safety requirement is quite obvious: The caller *must*
-/// ensure that `ptr` is valid for writes of `len` bytes, see the [`std::ptr`]
-/// documentation. In particular this function is not atomic.
-// In addition `ptr` needs to be properly aligned, but because we are talking
-// about bytes (therefore byte alignment), it *always* is.
-#[cfg(any(target_os = "macos", target_os = "ios"))]
-pub unsafe fn libc_explicit_bzero(ptr: *mut u8, len: usize) {
-    precondition_memory_range!(ptr, len);
-    // SAFETY: the caller must uphold the safety contract
-    unsafe {
-        // the zero value is a `c_int` (`i32` by default), but then converted to
-        // `unsigned char` (`u8`)
-        libc::memset_s(
-            ptr as *mut libc::c_void,
-            len as libc::size_t,
-            0 as libc::c_int,
-            len as libc::size_t,
-        );
-    }
-}
diff --git a/src/internals/zeroize/volatile_write.rs b/src/internals/zeroize/volatile_write.rs
deleted file mode 100644
index 674356e..0000000
--- a/src/internals/zeroize/volatile_write.rs
+++ /dev/null
@@ -1,100 +0,0 @@
-//! Utility functions for securely wiping memory, implemented using
-//! cross-platform volatile writes.
-
-use crate::macros::precondition_memory_range;
-use crate::util::is_aligned_ptr_mut;
-use mirai_annotations::debug_checked_precondition;
-
-/// Zeroize the memory pointed to by `ptr` and of size `len` bytes, by
-/// overwriting it byte for byte using volatile writes.
-///
-/// This is guarantied to be not elided by the compiler.
-///
-/// # Safety
-/// The caller *must* ensure that `ptr` is valid for writes of `len` bytes, see
-/// the [`std::ptr`] documentation. In particular this function is not atomic.
-pub unsafe fn volatile_write_zeroize(mut ptr: *mut u8, len: usize) {
-    precondition_memory_range!(ptr, len);
-    for _i in 0..len {
-        // SAFETY: `ptr` originally pointed into an allocation of `len` bytes so now,
-        // after `_i` steps `len - _i > 0` bytes are left, so `ptr` is valid for
-        // a byte write
-        unsafe {
-            core::ptr::write_volatile(ptr, 0u8);
-        }
-        // SAFETY: after increment, `ptr` points into the same allocation if `_i == len`
-        // or one byte past it, so `add` is sound
-        ptr = unsafe { ptr.add(1) };
-    }
-}
-
-/// Zeroize the memory pointed to by `ptr` for `len` rounded down to a multiple
-/// of 8 bytes.
-///
-/// This function rounds down `len` to a multiple of 8 and then zeroizes the
-/// memory pointed to by `ptr` for that length. This operation is guarantied to
-/// be not elided by the compiler. If `len` is a multiple of 8 then this
-/// zeroizes the entire specified block of memory. Returns a pointer to the byte
-/// after the last zeroed byte, with the provenance of `ptr`.
-///
-/// # Safety
-/// The caller *must* ensure that `ptr` is valid for writes of `len` bytes, see
-/// the [`std::ptr`] documentation. In particular this function is not atomic.
-///
-/// Furthermore, `ptr` *must* be at least 8 byte aligned.
-pub unsafe fn zeroize_align8_block8(mut ptr: *mut u8, len: usize) -> *mut u8 {
-    precondition_memory_range!(ptr, len);
-    debug_checked_precondition!(is_aligned_ptr_mut(ptr, 8));
-
-    let nblocks = (len - len % 8) / 8;
-    for _i in 0..nblocks {
-        // SAFETY: `ptr` originally pointed into an allocation of `len` bytes so now,
-        // after `_i` steps `len - 8*_i >= 8` bytes are left, so `ptr` is valid
-        // for an 8 byte write SAFETY: `ptr` was originally 8 byte aligned by
-        // caller contract and we only added a multiple of 8 so it is still 8
-        // byte aligned
-        unsafe {
-            core::ptr::write_volatile(ptr.cast::<u64>(), 0u64);
-        }
-        // SAFETY: after increment, `ptr` points into the same allocation or (if `8*_i
-        // == len`) at most one byte past it, so `add` is sound; `ptr` stays 8
-        // byte aligned
-        ptr = unsafe { ptr.add(8) };
-    }
-    ptr
-}
-
-/// Zeroize the memory pointed to by `ptr` and of size `len % 8` bytes.
-///
-/// This can be used to zeroize the bytes left unzeroized by
-/// `zeroize_align8_block8` if `len` is not a multiple of 8. This operation is
-/// guarantied to be not elided by the compiler.
-///
-/// # Safety
-/// The caller *must* ensure that `ptr` is valid for writes of `len` bytes, see
-/// the [`std::ptr`] documentation. In particular this function is not atomic.
-///
-/// Furthermore, `ptr` *must* be at least 4 byte aligned.
-pub unsafe fn zeroize_align4_tail8(mut ptr: *mut u8, len: usize) {
-    precondition_memory_range!(ptr, len % 8);
-    debug_checked_precondition!(is_aligned_ptr_mut(ptr, 4));
-
-    if len % 8 >= 4 {
-        // SAFETY: `ptr` is valid for `len % 8` bytes by caller contract
-        // SAFETY: `ptr` is still 4 byte aligned by caller contract
-        unsafe {
-            core::ptr::write_volatile(ptr.cast::<u32>(), 0u32);
-        }
-        ptr = unsafe { ptr.add(4) };
-    }
-    // the final remainder (at most 3 bytes) is zeroed byte-for-byte
-    // SAFETY: `ptr` has been incremented by a multiple of 4 <= `len` so `ptr`
-    // points to an allocation of `len % 4` bytes, so `ptr` can be written to
-    // and incremented `len % 4` times
-    for _i in 0..(len % 4) {
-        unsafe {
-            core::ptr::write_volatile(ptr, 0u8);
-        }
-        ptr = unsafe { ptr.add(1) };
-    }
-}
diff --git a/src/lib.rs b/src/lib.rs
index ebc5514..7a975bb 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -1,6 +1,10 @@
+// https://github.com/rust-lang/rust/issues/32838
 #![cfg_attr(feature = "nightly_allocator_api", feature(allocator_api))]
+// for `volatile_memset`
 #![cfg_attr(feature = "nightly_core_intrinsics", feature(core_intrinsics))]
-#![cfg_attr(feature = "nightly_stdsimd", feature(stdsimd))]
+// https://github.com/rust-lang/rust/issues/111137
+#![cfg_attr(feature = "nightly_stdsimd", feature(stdarch_x86_avx512))]
+// https://github.com/rust-lang/rust/issues/95228
 #![cfg_attr(feature = "nightly_strict_provenance", feature(strict_provenance))]
 #![cfg_attr(
     feature = "nightly_strict_provenance",
@@ -25,9 +29,8 @@
 //! ```
 //! #![feature(allocator_api)]
 //! // requires `nightly_allocator_api` crate feature to be enabled and a nightly compiler
-//! use secmem_alloc::allocator_api::Allocator;
+//! use secmem_alloc::allocator_api::{Allocator, Global, Vec};
 //! use secmem_alloc::zeroizing_alloc::ZeroizeAlloc;
-//! use std::alloc::Global;
 //!
 //! fn read_password<A: Allocator>(buf: &mut Vec<u8, A>) {
 //!     // query password from the user and put it in `buf`
@@ -57,8 +60,7 @@
 //! // if you enable the `nightly_allocator_api` crate feature, the following line is necessary
 //! #![feature(allocator_api)]
 //!
-//! use secmem_alloc::allocator_api::Allocator;
-//! use secmem_alloc::boxed::Box;
+//! use secmem_alloc::allocator_api::{Allocator, Box};
 //! use secmem_alloc::sec_alloc::SecStackSinglePageAlloc;
 //!
 //! fn get_secret_key<A: Allocator>(buf: &mut Box<[u8; 256], A>) {
@@ -110,19 +112,19 @@
 
 extern crate alloc;
 
+/// Re-exports the most important items of the [`allocator-api2` crate].
+///
+/// [`allocator-api2` crate]: https://crates.io/crates/allocator-api2
+pub mod allocator_api {
+    pub use allocator_api2::alloc::{Allocator, Global};
+    pub use allocator_api2::boxed::Box;
+    pub use allocator_api2::vec::Vec;
+}
+
 mod internals;
 mod macros;
 mod util;
 
-#[cfg(not(feature = "nightly_allocator_api"))]
-pub mod allocator_api;
-#[cfg(feature = "nightly_allocator_api")]
-/// Nightly allocator api, imported from the standard library.
-pub mod allocator_api {
-    pub use core::alloc::{AllocError, Allocator};
-}
-
-pub mod boxed;
 pub mod sec_alloc;
 pub mod zeroize;
 pub mod zeroizing_alloc;
diff --git a/src/sec_alloc.rs b/src/sec_alloc.rs
index 0ac6c99..2a63495 100644
--- a/src/sec_alloc.rs
+++ b/src/sec_alloc.rs
@@ -20,13 +20,13 @@
 //!   in the global allocator leaks). This *could* make some exploits harder,
 //!   but not impossible.
 
-use crate::allocator_api::{AllocError, Allocator};
 use crate::internals::mem;
 use crate::util::{
     align_up_ptr_mut, align_up_usize, is_aligned_ptr, large_offset_from, nonnull_as_mut_ptr,
     unlikely,
 };
 use crate::zeroize::{DefaultMemZeroizer, MemZeroizer};
+use allocator_api2::alloc::{AllocError, Allocator};
 use core::alloc::Layout;
 use core::cell::Cell;
 use core::ptr::{self, NonNull};
@@ -125,7 +125,7 @@ impl<Z: MemZeroizer> SecStackSinglePageAlloc<Z> {
 impl<Z: MemZeroizer> Drop for SecStackSinglePageAlloc<Z> {
     // panic in drop leads to abort, so we better just abort
     // however, abort is only stably available with `std` (not `core`)
-    #[cfg(featue = "std")]
+    #[cfg(feature = "std")]
     fn drop(&mut self) {
         // check for leaks
         if self.bytes.get() != 0 {
@@ -143,7 +143,7 @@ impl<Z: MemZeroizer> Drop for SecStackSinglePageAlloc<Z> {
         }
     }
 
-    #[cfg(not(featue = "std"))]
+    #[cfg(not(feature = "std"))]
     fn drop(&mut self) {
         // check for leaks
         debug_assert!(self.bytes.get() == 0);
@@ -706,6 +706,7 @@ unsafe impl<Z: MemZeroizer> Allocator for SecStackSinglePageAlloc<Z> {
 #[cfg(test)]
 mod tests {
     use super::*;
+    use crate::allocator_api::{Box, Vec};
     use crate::zeroize::TestZeroizer;
     use std::mem::drop;
 
@@ -726,8 +727,6 @@ mod tests {
 
     #[test]
     fn box_allocation_8b() {
-        use crate::boxed::Box;
-
         let allocator =
             SecStackSinglePageAlloc::<TestZeroizer>::new().expect("allocator creation failed");
         allocator.consistency_check();
@@ -741,8 +740,6 @@ mod tests {
 
     #[test]
     fn box_allocation_9b() {
-        use crate::boxed::Box;
-
         let allocator =
             SecStackSinglePageAlloc::<TestZeroizer>::new().expect("allocator creation failed");
         allocator.consistency_check();
@@ -756,8 +753,6 @@ mod tests {
 
     #[test]
     fn box_allocation_zst() {
-        use crate::boxed::Box;
-
         let allocator =
             SecStackSinglePageAlloc::<TestZeroizer>::new().expect("allocator creation failed");
         allocator.consistency_check();
@@ -771,8 +766,6 @@ mod tests {
 
     #[test]
     fn multiple_box_allocations() {
-        use crate::boxed::Box;
-
         let allocator =
             SecStackSinglePageAlloc::<TestZeroizer>::new().expect("allocator creation failed");
         allocator.consistency_check();
@@ -796,8 +789,6 @@ mod tests {
 
     #[test]
     fn multiple_box_allocations_high_align() {
-        use crate::boxed::Box;
-
         let allocator =
             SecStackSinglePageAlloc::<TestZeroizer>::new().expect("allocator creation failed");
         allocator.consistency_check();
@@ -821,8 +812,6 @@ mod tests {
 
     #[test]
     fn multiple_box_allocations_mixed_align() {
-        use crate::boxed::Box;
-
         let allocator =
             SecStackSinglePageAlloc::<TestZeroizer>::new().expect("allocator creation failed");
         allocator.consistency_check();
@@ -846,8 +835,6 @@ mod tests {
 
     #[test]
     fn many_box_allocations_mixed_align_nonstacked_drop() {
-        use crate::boxed::Box;
-
         let allocator =
             SecStackSinglePageAlloc::<TestZeroizer>::new().expect("allocator creation failed");
         allocator.consistency_check();
@@ -911,7 +898,6 @@ mod tests {
 
     #[test]
     fn vec_allocation_nonfinal_grow() {
-        use crate::boxed::Box;
         type A = SecStackSinglePageAlloc<TestZeroizer>;
 
         let allocator: A = SecStackSinglePageAlloc::new().expect("allocator creation failed");
@@ -953,7 +939,6 @@ mod tests {
 
     #[test]
     fn vec_allocation_nonfinal_shrink() {
-        use crate::boxed::Box;
         type A = SecStackSinglePageAlloc<TestZeroizer>;
 
         let allocator: A = SecStackSinglePageAlloc::new().expect("allocator creation failed");
diff --git a/src/zeroize.rs b/src/zeroize.rs
index 3c2f8b1..7be5e02 100644
--- a/src/zeroize.rs
+++ b/src/zeroize.rs
@@ -72,43 +72,9 @@ cfg_if::cfg_if! {
         /// the selected features and the version of this library.
         pub type DefaultMemZeroizer = VolatileMemsetZeroizer;
         pub(crate) use VolatileMemsetZeroizer as DefaultMemZeroizerConstructor;
-    } else if #[cfg(any(
-        target_os = "freebsd",
-        target_os = "dragonfly",
-        target_os = "openbsd",
-        target_os = "netbsd",
-        target_os = "macos",
-        target_os = "ios",
-        target_env = "gnu",
-        target_env = "musl"
-    ))] {
-        /// Best (i.e. fastest) [`MemZeroizer`] available for the target.
-        ///
-        /// Which [`MemZeroizer`] this is is an implementation detail, can depend on the target and
-        /// the selected features and the version of this library.
-        pub type DefaultMemZeroizer = LibcZeroizer;
-        pub(crate) use LibcZeroizer as DefaultMemZeroizerConstructor;
-    } else if #[cfg(all(target_arch = "x86_64", target_feature = "avx"))] {
-        /// Best (i.e. fastest) [`MemZeroizer`] available for the target.
-        ///
-        /// Which [`MemZeroizer`] this is is an implementation detail, can depend on the target and
-        /// the selected features and the version of this library.
-        pub type DefaultMemZeroizer = X86_64AvxZeroizer;
-        pub(crate) use X86_64AvxZeroizer as DefaultMemZeroizerConstructor;
-    } else if #[cfg(all(target_arch = "x86_64", target_feature = "sse2"))] {
-        /// Best (i.e. fastest) [`MemZeroizer`] available for the target.
-        ///
-        /// Which [`MemZeroizer`] this is is an implementation detail, can depend on the target and
-        /// the selected features and the version of this library.
-        pub type DefaultMemZeroizer = X86_64Sse2Zeroizer;
-        pub(crate) use X86_64Sse2Zeroizer as DefaultMemZeroizerConstructor;
     } else {
-        /// Best (i.e. fastest) [`MemZeroizer`] available for the target.
-        ///
-        /// Which [`MemZeroizer`] this is is an implementation detail, can depend on the target and
-        /// the selected features and the version of this library.
-        pub type DefaultMemZeroizer = VolatileWrite8Zeroizer;
-        pub(crate) use VolatileWrite8Zeroizer as DefaultMemZeroizerConstructor;
+        pub type DefaultMemZeroizer = MemsetAsmBarierZeroizer;
+        pub(crate) use MemsetAsmBarierZeroizer as DefaultMemZeroizerConstructor;
     }
 }
 
@@ -118,12 +84,6 @@ pub(crate) use VolatileWrite8Zeroizer as TestZeroizer;
 /// This zeroizer uses the volatile memset intrinsic which does not
 /// yet have a stable counterpart. It should be very fast, but requires
 /// nightly.
-///
-/// In addition to the volatile write we place a compiler fence right next to
-/// the volatile write. This should not be necessary for secure zeroization
-/// since the volatile semantics guarenties our writes are not elided, and they
-/// can not be delayed since we are deallocating the memory after zeroization.
-/// The use of this fence is therefore only a precaution.
 #[cfg(feature = "nightly_core_intrinsics")]
 #[derive(Debug, Copy, Clone, Default)]
 pub struct VolatileMemsetZeroizer;
@@ -133,124 +93,41 @@ impl MemZeroizer for VolatileMemsetZeroizer {
     unsafe fn zeroize_mem_blocks<const A: u8, const B: u8>(&self, ptr: *mut u8, len: usize) {
         precondition_memory_range!(ptr, len);
         debug_precondition_logaligned!(A, ptr);
-        // SAFETY: the caller must uphold the safety contract of
-        // `internals::volatile_memset`
-        unsafe {
-            internals::volatile_memset(ptr, 0, len);
-        }
-        fence();
-    }
-}
-
-/// This zeroizer uses volatile zeroization functions provided by libc.
-/// It should be fast but is only available on certain platforms.
-///
-/// In addition to the volatile write we place a compiler fence right next to
-/// the volatile write. This should not be necessary for secure zeroization
-/// since the volatile semantics guarenties our writes are not elided, and they
-/// can not be delayed since we are deallocating the memory after zeroization.
-/// The use of this fence is therefore only a precaution.
-#[cfg(any(
-    target_os = "freebsd",
-    target_os = "dragonfly",
-    target_os = "openbsd",
-    target_os = "netbsd",
-    target_os = "macos",
-    target_os = "ios",
-    target_env = "gnu",
-    target_env = "musl"
-))]
-#[derive(Debug, Copy, Clone, Default)]
-pub struct LibcZeroizer;
-
-#[cfg(any(
-    target_os = "freebsd",
-    target_os = "dragonfly",
-    target_os = "openbsd",
-    target_os = "netbsd",
-    target_os = "macos",
-    target_os = "ios",
-    target_env = "gnu",
-    target_env = "musl"
-))]
-impl MemZeroizer for LibcZeroizer {
-    unsafe fn zeroize_mem_blocks<const A: u8, const B: u8>(&self, ptr: *mut u8, len: usize) {
-        precondition_memory_range!(ptr, len);
-        debug_precondition_logaligned!(A, ptr);
-        debug_precondition_logmultiple!(B, len);
-        // SAFETY: the caller must uphold the safety contract of
-        // `internals::libc_explicit_bzero`
-        unsafe {
-            internals::libc_explicit_bzero(ptr, len);
-        }
-        fence();
-    }
-}
-
-/// This zeroizer uses volatile assembly (`rep stosb`) for modern x86_64,
-/// performing very well for large amounts of memory. To make this available on
-/// stable, it uses a C compiler at build time.
-///
-/// In addition to the volatile write we place a compiler fence right next to
-/// the volatile write. This should not be necessary for secure zeroization
-/// since the volatile semantics guarenties our writes are not elided, and they
-/// can not be delayed since we are deallocating the memory after zeroization.
-/// The use of this fence is therefore only a precaution.
-#[cfg(all(target_arch = "x86_64", target_feature = "ermsb"))]
-#[derive(Debug, Copy, Clone, Default)]
-pub struct AsmRepStosZeroizer;
-
-#[cfg(all(target_arch = "x86_64", target_feature = "ermsb"))]
-impl MemZeroizer for AsmRepStosZeroizer {
-    unsafe fn zeroize_mem_blocks<const A: u8, const B: u8>(&self, ptr: *mut u8, len: usize) {
-        precondition_memory_range!(ptr, len);
-        debug_precondition_logaligned!(A, ptr);
-        debug_precondition_logmultiple!(B, len);
-        // SAFETY: the caller must uphold the safety contract of
-        // `internals::asm_ermsb_zeroize`
+        // SAFETY: the caller must uphold the safety contract
         unsafe {
-            internals::asm_ermsb_zeroize(ptr, len);
+            core::intrinsics::volatile_set_memory(ptr, 0, len);
         }
-        fence();
     }
 }
 
-/// This zeroizer uses a volatile write per byte. This zeroization technique is
-/// similar to the `zeroize` crate, available for all target platforms on
-/// stable, but extremely slow.
-///
-/// In addition to the volatile write we place a compiler fence right next to
-/// the volatile write. This should not be necessary for secure zeroization
-/// since the volatile semantics guarenties our writes are not elided, and they
-/// can not be delayed since we are deallocating the memory after zeroization.
-/// The use of this fence is therefore only a precaution.
+/// This zeroizer uses a non-volatile memset, followed by an empty asm block
+/// acting as an optimisation barier. It should be very fast, and according to
+/// my current understanding of the op.sem. the compiler is not allowed to
+/// remove the writes.
 #[derive(Debug, Copy, Clone, Default)]
-pub struct VolatileWriteZeroizer;
+pub struct MemsetAsmBarierZeroizer;
 
-impl MemZeroizer for VolatileWriteZeroizer {
+impl MemZeroizer for MemsetAsmBarierZeroizer {
     unsafe fn zeroize_mem_blocks<const A: u8, const B: u8>(&self, ptr: *mut u8, len: usize) {
         precondition_memory_range!(ptr, len);
         debug_precondition_logaligned!(A, ptr);
-        debug_precondition_logmultiple!(B, len);
-        // SAFETY: the caller must uphold the safety contract of
-        // `volatile_write_zeroize_mem`
+        // SAFETY: the caller must uphold the safety contract of `write_bytes`
+        unsafe { ptr.write_bytes(0, len) };
+        // Optimisation barier, so the writes can not be optimised out
         unsafe {
-            internals::volatile_write_zeroize(ptr, len);
-        }
-        fence();
+            core::arch::asm!(
+                "/* {0} */",
+                in(reg) ptr,
+                options(nostack, readonly, preserves_flags),
+            )
+        };
     }
 }
 
 /// This zeroizer uses a volatile write per 8 bytes if the pointer is 8 byte
 /// aligned, and otherwise uses `VolatileWriteZeroizer`. This zeroization
-/// technique is available for all target platforms on stable, but not very
-/// fast.
-///
-/// In addition to the volatile write we place a compiler fence right next to
-/// the volatile write. This should not be necessary for secure zeroization
-/// since the volatile semantics guarenties our writes are not elided, and they
-/// can not be delayed since we are deallocating the memory after zeroization.
-/// The use of this fence is therefore only a precaution.
+/// technique is pure Rust and available for all target platforms on stable, but
+/// not very fast.
 ///
 /// This zeroization method can benefit (in terms of performance) from using the
 /// [`MemZeroizer::zeroize_mem_blocks`] function instead of
@@ -279,132 +156,8 @@ impl MemZeroizer for VolatileWrite8Zeroizer {
                 internals::volatile_write_zeroize(ptr, len);
             }
         }
-        fence();
-    }
-}
-
-/// This zeroizer uses inline asm with avx2 instructions if the pointer is 32
-/// byte aligned, and otherwise uses `VolatileWrite8Zeroizer`. This zeroization
-/// technique is available for x86_64 platforms with avx2 cpu support on stable,
-/// and reasonably fast for 32 byte aligned pointers.
-///
-/// In addition to the volatile write we place a compiler fence right next to
-/// the volatile write. This should not be necessary for secure zeroization
-/// since the volatile semantics guarenties our writes are not elided, and they
-/// can not be delayed since we are deallocating the memory after zeroization.
-/// The use of this fence is therefore only a precaution.
-///
-/// This zeroization method can benefit (in terms of performance) from using the
-/// [`MemZeroizer::zeroize_mem_blocks`] function instead of
-/// [`MemZeroizer::zeroize_mem`] function if a minimum alignment is known
-/// at compile time.
-#[cfg(all(target_arch = "x86_64", target_feature = "avx"))]
-#[derive(Debug, Copy, Clone, Default)]
-pub struct X86_64AvxZeroizer;
-
-#[cfg(all(target_arch = "x86_64", target_feature = "avx"))]
-impl MemZeroizer for X86_64AvxZeroizer {
-    unsafe fn zeroize_mem_blocks<const A: u8, const B: u8>(&self, mut ptr: *mut u8, len: usize) {
-        precondition_memory_range!(ptr, len);
-        debug_precondition_logaligned!(A, ptr);
-        debug_precondition_logmultiple!(B, len);
-        // if we have 32 = 2^5 byte alignment then write 32 bytes at a time,
-        // with 8 = 2^3 byte align do 8 bytes at a time, otherwise 1 byte at a time
-        if (A >= 5) | is_aligned_ptr_mut(ptr, 32) {
-            // SAFETY: `ptr` is 32 byte aligned
-            ptr = unsafe { internals::x86_64_simd32_unroll2_zeroize_align32_block32(ptr, len) };
-            // zeroize tail
-            if B < 5 {
-                ptr = unsafe { internals::zeroize_align8_block8(ptr, len % 32) };
-            }
-            if B < 3 {
-                unsafe { internals::zeroize_align4_tail8(ptr, len % 8) };
-            }
-        } else if (A >= 3) | is_aligned_ptr_mut(ptr, 8) {
-            // SAFETY: `ptr` is 8 byte aligned
-            ptr = unsafe { internals::zeroize_align8_block8(ptr, len) };
-            if B < 3 {
-                unsafe { internals::zeroize_align4_tail8(ptr, len % 8) };
-            }
-        } else {
-            // SAFETY: no alignment requirement
-            unsafe {
-                internals::volatile_write_zeroize(ptr, len);
-            }
-        }
-        fence();
-    }
-}
-
-/// This zeroizer uses inline asm with sse2 instructions if the pointer is 16
-/// byte aligned, and otherwise uses `VolatileWrite8Zeroizer`. This zeroization
-/// technique is available for x86_64 platforms with sse2 cpu support on stable,
-/// and reasonably fast for 16 byte aligned pointers.
-///
-/// In addition to the volatile write we place a compiler fence right next to
-/// the volatile write. This should not be necessary for secure zeroization
-/// since the volatile semantics guarenties our writes are not elided, and they
-/// can not be delayed since we are deallocating the memory after zeroization.
-/// The use of this fence is therefore only a precaution.
-///
-/// This zeroization method can benefit (in terms of performance) from using the
-/// [`MemZeroizer::zeroize_mem_blocks`] function instead of
-/// [`MemZeroizer::zeroize_mem`] function if a minimum alignment is known
-/// at compile time.
-#[cfg(all(target_arch = "x86_64", target_feature = "sse2"))]
-#[derive(Debug, Copy, Clone, Default)]
-pub struct X86_64Sse2Zeroizer;
-
-#[cfg(all(target_arch = "x86_64", target_feature = "sse2"))]
-impl MemZeroizer for X86_64Sse2Zeroizer {
-    unsafe fn zeroize_mem_blocks<const A: u8, const B: u8>(&self, mut ptr: *mut u8, len: usize) {
-        precondition_memory_range!(ptr, len);
-        debug_precondition_logaligned!(A, ptr);
-        debug_precondition_logmultiple!(B, len);
-        // if we have 16 = 2^4 byte alignment then write 16 bytes at a time,
-        // with 8 = 2^3 byte align do 8 bytes at a time, otherwise 1 byte at a time
-        if (A >= 4) | is_aligned_ptr_mut(ptr, 16) {
-            // SAFETY: `ptr` is 16 byte aligned
-
-            ptr = unsafe { internals::x86_64_simd16_unroll2_zeroize_align16_block16(ptr, len) };
-            // zeroize tail
-            if B < 4 {
-                ptr = unsafe { internals::zeroize_align8_block8(ptr, len % 16) };
-            }
-            if B < 3 {
-                unsafe { internals::zeroize_align4_tail8(ptr, len % 8) };
-            }
-        } else if (A >= 3) | is_aligned_ptr_mut(ptr, 8) {
-            // SAFETY: `ptr` is 8 byte aligned
-            ptr = unsafe { internals::zeroize_align8_block8(ptr, len) };
-            if B < 3 {
-                unsafe { internals::zeroize_align4_tail8(ptr, len % 8) };
-            }
-        } else {
-            // SAFETY: no alignment requirement
-            unsafe {
-                internals::volatile_write_zeroize(ptr, len);
-            }
-        }
-        fence();
     }
 }
 
-/// Compiler fence.
-///
-/// Forces sequentially consistent access across this fence at compile time. At
-/// runtime the CPU can still reorder memory accesses. This should not be
-/// necessary for secure zeroization since the volatile semantics guaranties our
-/// writes are not elided, and they can not be delayed since we are deallocating
-/// the memory after zeroization. The use of this fence is therefore only a
-/// precaution. For the same reasons it probably does not add security, it also
-/// probably does not hurt performance significantly.
-#[inline]
-fn fence() {
-    use core::sync::atomic::{compiler_fence, Ordering};
-
-    compiler_fence(Ordering::SeqCst);
-}
-
 #[cfg(test)]
 mod tests;
diff --git a/src/zeroize/tests.rs b/src/zeroize/tests.rs
index 8571cc0..ed0ba44 100644
--- a/src/zeroize/tests.rs
+++ b/src/zeroize/tests.rs
@@ -33,98 +33,18 @@ fn test_b127_volatile_memset_zeroizer() {
     test_b127_zeroizer(VolatileMemsetZeroizer);
 }
 
-#[cfg(any(
-    target_os = "freebsd",
-    target_os = "dragonfly",
-    target_os = "openbsd",
-    target_os = "netbsd",
-    target_os = "macos",
-    target_os = "ios",
-    target_env = "gnu",
-    target_env = "musl"
-))]
-#[test]
-#[cfg_attr(miri, ignore)] // ffi
-fn test_b127_libc_zeroizer() {
-    test_b127_zeroizer(LibcZeroizer);
-}
-
-#[cfg(all(target_arch = "x86_64", target_feature = "ermsb"))]
-#[test]
-#[cfg_attr(miri, ignore)] // ffi, asm
-fn test_b127_asm_rep_stos_zeroizer() {
-    test_b127_zeroizer(AsmRepStosZeroizer);
-}
-
-#[test]
-fn test_b127_volatile_write_zeroizer() {
-    test_b127_zeroizer(VolatileWriteZeroizer);
-}
-
 #[test]
 fn test_b127_volatile_write8_zeroizer() {
     test_b127_zeroizer(VolatileWrite8Zeroizer);
 }
 
-#[cfg(all(target_arch = "x86_64", target_feature = "avx"))]
-#[test]
-fn test_b127_x86_64_avx_zeroizer() {
-    test_b127_zeroizer(X86_64AvxZeroizer);
-}
-
-#[cfg(all(target_arch = "x86_64", target_feature = "sse2"))]
-#[test]
-fn test_b127_x86_64_sse2_zeroizer() {
-    test_b127_zeroizer(X86_64Sse2Zeroizer);
-}
-
 #[cfg(feature = "nightly_core_intrinsics")]
 #[test]
 fn test_b239_lowalign_volatile_memset_zeroizer() {
     test_b239_lowalign_zeroizer(VolatileMemsetZeroizer);
 }
 
-#[cfg(any(
-    target_os = "freebsd",
-    target_os = "dragonfly",
-    target_os = "openbsd",
-    target_os = "netbsd",
-    target_os = "macos",
-    target_os = "ios",
-    target_env = "gnu",
-    target_env = "musl"
-))]
-#[test]
-#[cfg_attr(miri, ignore)] // ffi
-fn test_b239_lowalign_libc_zeroizer() {
-    test_b239_lowalign_zeroizer(LibcZeroizer);
-}
-
-#[cfg(all(target_arch = "x86_64", target_feature = "ermsb"))]
-#[test]
-#[cfg_attr(miri, ignore)] // ffi, asm
-fn test_b239_lowalign_asm_rep_stos_zeroizer() {
-    test_b239_lowalign_zeroizer(AsmRepStosZeroizer);
-}
-
-#[test]
-fn test_b239_lowalign_volatile_write_zeroizer() {
-    test_b239_lowalign_zeroizer(VolatileWriteZeroizer);
-}
-
 #[test]
 fn test_b239_lowalign_volatile_write8_zeroizer() {
     test_b239_lowalign_zeroizer(VolatileWrite8Zeroizer);
 }
-
-#[cfg(all(target_arch = "x86_64", target_feature = "avx"))]
-#[test]
-fn test_b239_lowalign_x86_64_avx_zeroizer() {
-    test_b239_lowalign_zeroizer(X86_64AvxZeroizer);
-}
-
-#[cfg(all(target_arch = "x86_64", target_feature = "sse2"))]
-#[test]
-fn test_b239_lowalign_x86_64_sse2_zeroizer() {
-    test_b239_lowalign_zeroizer(X86_64Sse2Zeroizer);
-}
diff --git a/src/zeroizing_alloc.rs b/src/zeroizing_alloc.rs
index f48e028..494954a 100644
--- a/src/zeroizing_alloc.rs
+++ b/src/zeroizing_alloc.rs
@@ -9,13 +9,13 @@
 //! in memory but not dropped. This can happen for example when resizing
 //! [`Vec`]s.
 
-use crate::allocator_api::{AllocError, Allocator};
 use crate::macros::{
     debug_handleallocerror_precondition, debug_handleallocerror_precondition_valid_layout,
     precondition_memory_range,
 };
 use crate::zeroize::{DefaultMemZeroizer, DefaultMemZeroizerConstructor, MemZeroizer};
 use alloc::alloc::handle_alloc_error;
+use allocator_api2::alloc::{AllocError, Allocator};
 use core::alloc::{GlobalAlloc, Layout};
 use core::ptr::NonNull;
 #[cfg(not(feature = "nightly_strict_provenance"))]
@@ -182,13 +182,12 @@ where
 #[cfg(test)]
 mod tests {
     use super::*;
+    use crate::allocator_api::{Box, Vec};
     use crate::zeroize::TestZeroizer;
     use std::alloc::System;
 
     #[test]
     fn box_allocation_8b() {
-        use crate::boxed::Box;
-
         let allocator = ZeroizeAlloc::with_zeroizer(System, TestZeroizer);
         let _heap_mem = Box::new_in([1u8; 8], &allocator);
         // drop `_heap_mem`
@@ -197,8 +196,6 @@ mod tests {
 
     #[test]
     fn box_allocation_9b() {
-        use crate::boxed::Box;
-
         let allocator = ZeroizeAlloc::with_zeroizer(System, TestZeroizer);
         let _heap_mem = Box::new_in([1u8; 9], &allocator);
         // drop `_heap_mem`
@@ -207,8 +204,6 @@ mod tests {
 
     #[test]
     fn box_allocation_zst() {
-        use crate::boxed::Box;
-
         let allocator = ZeroizeAlloc::with_zeroizer(System, TestZeroizer);
         let _heap_mem = Box::new_in([(); 8], &allocator);
         // drop `_heap_mem`