diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index f2e4cb2..b6f7546 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -57,7 +57,7 @@ jobs: matrix: # When updating this, the reminder to update the minimum supported # Rust version in Cargo.toml. - rust: ['1.38'] + rust: ['1.59'] steps: - uses: actions/checkout@v3 - name: Install Rust diff --git a/Cargo.toml b/Cargo.toml index 535b2e0..7fd4487 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -10,7 +10,7 @@ authors = [ "John Nunley " ] edition = "2018" -rust-version = "1.38" +rust-version = "1.59" description = "Concurrent multi-producer multi-consumer queue" license = "Apache-2.0 OR MIT" repository = "https://github.com/smol-rs/concurrent-queue" diff --git a/src/lib.rs b/src/lib.rs index 77cffb5..8807a6c 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -59,7 +59,7 @@ extern crate std; use alloc::boxed::Box; use core::fmt; -use sync::atomic::{self, AtomicUsize, Ordering}; +use sync::atomic::{self, Ordering}; #[cfg(feature = "std")] use std::error; @@ -538,28 +538,31 @@ impl fmt::Display for PushError { /// Equivalent to `atomic::fence(Ordering::SeqCst)`, but in some cases faster. #[inline] fn full_fence() { - if cfg!(all( - any(target_arch = "x86", target_arch = "x86_64"), - not(miri), - not(loom) - )) { + #[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), not(miri)))] + { + use core::{arch::asm, cell::UnsafeCell}; // HACK(stjepang): On x86 architectures there are two different ways of executing // a `SeqCst` fence. // // 1. `atomic::fence(SeqCst)`, which compiles into a `mfence` instruction. - // 2. `_.compare_exchange(_, _, SeqCst, SeqCst)`, which compiles into a `lock cmpxchg` instruction. + // 2. `lock ` instruction. // // Both instructions have the effect of a full barrier, but empirical benchmarks have shown // that the second one is sometimes a bit faster. - // - // The ideal solution here would be to use inline assembly, but we're instead creating a - // temporary atomic variable and compare-and-exchanging its value. No sane compiler to - // x86 platforms is going to optimize this away. - atomic::compiler_fence(Ordering::SeqCst); - let a = AtomicUsize::new(0); - let _ = a.compare_exchange(0, 1, Ordering::SeqCst, Ordering::SeqCst); - atomic::compiler_fence(Ordering::SeqCst); - } else { + let a = UnsafeCell::new(0_usize); + // It is common to use `lock or` here, but when using a local variable, `lock not`, which + // does not change the flag, should be slightly more efficient. + // Refs: https://www.felixcloutier.com/x86/not + unsafe { + #[cfg(target_pointer_width = "64")] + asm!("lock not qword ptr [{0}]", in(reg) a.get(), options(nostack, preserves_flags)); + #[cfg(target_pointer_width = "32")] + asm!("lock not dword ptr [{0:e}]", in(reg) a.get(), options(nostack, preserves_flags)); + } + return; + } + #[allow(unreachable_code)] + { atomic::fence(Ordering::SeqCst); } }