From 0ff06a05ee11680e1150a2aca4e7f68caac00104 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=90=D1=80=D1=82=D1=91=D0=BC=20=D0=9F=D0=B0=D0=B2=D0=BB?= =?UTF-8?q?=D0=BE=D0=B2=20=5BArtyom=20Pavlov=5D?= Date: Sat, 28 Nov 2020 05:15:33 +0300 Subject: [PATCH 1/5] improve cpuid-bool efficiency --- cpuid-bool/Cargo.toml | 4 +- cpuid-bool/src/lib.rs | 154 +++++++++++++++++++++++++----------------- 2 files changed, 94 insertions(+), 64 deletions(-) diff --git a/cpuid-bool/Cargo.toml b/cpuid-bool/Cargo.toml index d81e01d9..e692197d 100644 --- a/cpuid-bool/Cargo.toml +++ b/cpuid-bool/Cargo.toml @@ -1,9 +1,9 @@ [package] name = "cpuid-bool" -version = "0.1.2" +version = "0.2.0" authors = ["RustCrypto Developers"] license = "MIT OR Apache-2.0" -description = "A lightweight no-std compatible alternative to is_x86_feature_detected" +description = "A lightweight and efficient no-std compatible alternative to the is_x86_feature_detected macro" documentation = "https://docs.rs/cpuid-bool" repository = "https://github.com/RustCrypto/utils" keywords = ["cpuid", "target-feature"] diff --git a/cpuid-bool/src/lib.rs b/cpuid-bool/src/lib.rs index 557539a7..2940174c 100644 --- a/cpuid-bool/src/lib.rs +++ b/cpuid-bool/src/lib.rs @@ -2,16 +2,31 @@ //! //! # Usage example //! ``` -//! if cpuid_bool::cpuid_bool!("sha", "aes") { +//! cpuid_bool::new!(cpuid_aes_sha, "aes", "sha"); +//! +//! // `token` is a Zero Sized Type value, which guarantees +//! // that underlying static storage got properly initialized, +//! // which allows to omit initialization branch +//! let token = cpuid_aes_sha::init(); +//! if token.get() { //! println!("CPU supports both SHA and AES extensions"); //! } else { //! println!("SHA and AES extensions are not supported"); //! } +//! +//! // If stored value needed only once you can get stored value +//! // omitting the token +//! let val = cpuid_aes_sha::get(); +//! assert_eq!(val, token.get()); +//! +//! // Additionally you can get both token and value +//! let (token, val) = cpuid_aes_sha::init_get(); +//! assert_eq!(val, token.get()); //! ``` //! Note that if all tested target features are enabled via compiler options -//! (e.g. by using `RUSTFLAGS`), `cpuid_bool!` macro immideatly will expand -//! to `true` and will not use CPUID instruction. Such behavior allows -//! compiler to eliminate fallback code. +//! (e.g. by using `RUSTFLAGS`), the `get` method will always return `true` +//! and `init` will not use CPUID instruction. Such behavior allows +//! compiler to completely eliminate fallback code. //! //! After first call macro caches result and returns it in subsequent //! calls, thus runtime overhead for them is minimal. @@ -19,33 +34,84 @@ #[cfg(not(any(target_arch = "x86", target_arch = "x86_64")))] compile_error!("This crate works only on x86 and x86-64 targets."); -use core::sync::atomic::{AtomicU8, Ordering::Relaxed}; +/// Create module with CPUID bool code. +#[macro_export] +macro_rules! new { + ($mod_name:ident, $($tf:tt),+ $(,)? ) => { + mod $mod_name { + use core::sync::atomic::{AtomicU8, Ordering::Relaxed}; + + const UNINIT: u8 = u8::max_value(); + static STORAGE: AtomicU8 = AtomicU8::new(UNINIT); -/// This structure represents a lazily initialized static boolean value. -/// -/// Useful when it is preferable to just rerun initialization instead of -/// locking. Used internally by the `cpuid_bool` macro. -pub struct LazyBool(AtomicU8); + /// Initialization token + #[derive(Copy, Clone, Debug)] + pub struct InitToken(()); + + impl InitToken { + /// Get initialized value + #[inline(always)] + pub fn get(&self) -> bool { + // CPUID is not available on SGX targets + #[cfg(all(not(target_env = "sgx"), not(all($(target_feature=$tf, )*))))] + let res = STORAGE.load(Relaxed) == 1; + #[cfg(all(target_env = "sgx", not(all($(target_feature=$tf, )*))))] + let res = false; + #[cfg(all($(target_feature=$tf, )*))] + let res = true; + res + } + } + + /// Initialize underlying storage if needed and get + /// stored value and initialization token. + #[inline] + pub fn init_get() -> (InitToken, bool) { + // CPUID is not available on SGX targets + #[cfg(all(not(target_env = "sgx"), not(all($(target_feature=$tf, )*))))] + let res = { + #[cfg(target_arch = "x86")] + use core::arch::x86::{__cpuid, __cpuid_count}; + #[cfg(target_arch = "x86_64")] + use core::arch::x86_64::{__cpuid, __cpuid_count}; + + // Relaxed ordering is fine, as we only have a single atomic variable. + let val = STORAGE.load(Relaxed); + if val == UNINIT { + #[allow(unused_variables)] + let cr = unsafe { + [__cpuid(1), __cpuid_count(7, 0)] + }; + let res = $(cpuid_bool::check!(cr, $tf) & )+ true; + STORAGE.store(res as u8, Relaxed); + res + } else { + val == 1 + } + }; + #[cfg(all(target_env = "sgx", not(all($(target_feature=$tf, )*))))] + let res = false; + #[cfg(all($(target_feature=$tf, )*))] + let res = true; -impl LazyBool { - const UNINIT: u8 = u8::max_value(); + (InitToken(()), res) + } - pub const fn new() -> Self { - Self(AtomicU8::new(Self::UNINIT)) - } + /// Initialize underlying storage if needed and get + /// initialization token. + #[inline] + pub fn init() -> InitToken { + init_get().0 + } - // Runs the init() function at least once, returning the value of some run - // of init(). Multiple callers can run their init() functions in parallel. - // init() should always return the same value, if it succeeds. - pub fn unsync_init(&self, init: impl FnOnce() -> bool) -> bool { - // Relaxed ordering is fine, as we only have a single atomic variable. - let mut val = self.0.load(Relaxed); - if val == Self::UNINIT { - val = init() as u8; - self.0.store(val as u8, Relaxed); + /// Initialize underlying storage if needed and get + /// stored value. + #[inline] + pub fn get() -> bool { + init_get().1 + } } - val != 0 - } + }; } // TODO: find how to define private macro usable inside a public one @@ -83,39 +149,3 @@ expand_check_macro! { ("adx", 1, ebx, 19), ("sha", 1, ebx, 29), } - -/// Check at runtime if CPU supports sequence of target features. -/// -/// During first execution this macro will use CPUID to check requested -/// target features, results will be cached and further calls will return -/// it instead. -#[macro_export] -macro_rules! cpuid_bool { - ($($tf:tt),+ $(,)? ) => {{ - // CPUID is not available on SGX targets - #[cfg(all(not(target_env = "sgx"), not(all($(target_feature=$tf, )*))))] - let res = { - #[cfg(target_arch = "x86")] - use core::arch::x86::{__cpuid, __cpuid_count}; - #[cfg(target_arch = "x86_64")] - use core::arch::x86_64::{__cpuid, __cpuid_count}; - - static CPUID_BOOL: cpuid_bool::LazyBool = cpuid_bool::LazyBool::new(); - CPUID_BOOL.unsync_init(|| { - #[allow(unused_variables)] - let cr = unsafe { - [__cpuid(1), __cpuid_count(7, 0)] - }; - // TODO: find how to remove `true` - $(cpuid_bool::check!(cr, $tf) & )+ true - }) - }; - - #[cfg(all(target_env = "sgx", not(all($(target_feature=$tf, )*))))] - let res = false; - #[cfg(all($(target_feature=$tf, )*))] - let res = true; - - res - }}; -} From 316b021874971e8cc2e627a989c8f6f3e4aad6d5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=90=D1=80=D1=82=D1=91=D0=BC=20=D0=9F=D0=B0=D0=B2=D0=BB?= =?UTF-8?q?=D0=BE=D0=B2=20=5BArtyom=20Pavlov=5D?= Date: Sat, 28 Nov 2020 05:22:26 +0300 Subject: [PATCH 2/5] update changelog --- cpuid-bool/CHANGELOG.md | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/cpuid-bool/CHANGELOG.md b/cpuid-bool/CHANGELOG.md index 961ba48c..1b11df62 100644 --- a/cpuid-bool/CHANGELOG.md +++ b/cpuid-bool/CHANGELOG.md @@ -5,6 +5,12 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## 0.2.0 (2020-12-01) +### Changed +- Macro now creates a module with several function and the `InitToken` type ([#86]) + +[#86]: https://github.com/RustCrypto/utils/pull/86 + ## 0.1.2 (2020-07-20) ### Added - LICENSE files ([#70]) From 7d78bf1bbd293c9868f6c0a652df4ce5fb448d73 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=90=D1=80=D1=82=D1=91=D0=BC=20=D0=9F=D0=B0=D0=B2=D0=BB?= =?UTF-8?q?=D0=BE=D0=B2=20=5BArtyom=20Pavlov=5D?= Date: Sat, 28 Nov 2020 05:23:31 +0300 Subject: [PATCH 3/5] fmt fix --- cpuid-bool/src/lib.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpuid-bool/src/lib.rs b/cpuid-bool/src/lib.rs index 2940174c..314b1476 100644 --- a/cpuid-bool/src/lib.rs +++ b/cpuid-bool/src/lib.rs @@ -13,7 +13,7 @@ //! } else { //! println!("SHA and AES extensions are not supported"); //! } -//! +//! //! // If stored value needed only once you can get stored value //! // omitting the token //! let val = cpuid_aes_sha::get(); From 599d2515ba44fd619cf83993061e8281007a00d1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=90=D1=80=D1=82=D1=91=D0=BC=20=D0=9F=D0=B0=D0=B2=D0=BB?= =?UTF-8?q?=D0=BE=D0=B2=20=5BArtyom=20Pavlov=5D?= Date: Sat, 28 Nov 2020 05:26:15 +0300 Subject: [PATCH 4/5] update docs --- cpuid-bool/src/lib.rs | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/cpuid-bool/src/lib.rs b/cpuid-bool/src/lib.rs index 314b1476..be0b71b3 100644 --- a/cpuid-bool/src/lib.rs +++ b/cpuid-bool/src/lib.rs @@ -1,13 +1,14 @@ //! Macro for checking CPU capabilities at runtime. //! -//! # Usage example +//! # Example //! ``` +//! // This macro creates `cpuid_aes_sha` module //! cpuid_bool::new!(cpuid_aes_sha, "aes", "sha"); //! //! // `token` is a Zero Sized Type value, which guarantees //! // that underlying static storage got properly initialized, //! // which allows to omit initialization branch -//! let token = cpuid_aes_sha::init(); +//! let token: cpuid_aes_sha::InitToken = cpuid_aes_sha::init(); //! if token.get() { //! println!("CPU supports both SHA and AES extensions"); //! } else { From 81691bdc9a8c78243d816d264c602a6a35000279 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=90=D1=80=D1=82=D1=91=D0=BC=20=D0=9F=D0=B0=D0=B2=D0=BB?= =?UTF-8?q?=D0=BE=D0=B2=20=5BArtyom=20Pavlov=5D?= Date: Sat, 28 Nov 2020 05:26:31 +0300 Subject: [PATCH 5/5] update Cargo.lock --- Cargo.lock | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.lock b/Cargo.lock index e0766486..8195dd27 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -29,7 +29,7 @@ version = "0.2.0" [[package]] name = "cpuid-bool" -version = "0.1.2" +version = "0.2.0" [[package]] name = "dbl"