Skip to content

Improve cpuid-bool efficiency #86

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Nov 29, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 6 additions & 0 deletions cpuid-bool/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,12 @@ All notable changes to this project will be documented in this file.
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).

## 0.2.0 (2020-12-01)
### Changed
- Macro now creates a module with several function and the `InitToken` type ([#86])

[#86]: https://github.com/RustCrypto/utils/pull/86

## 0.1.2 (2020-07-20)
### Added
- LICENSE files ([#70])
Expand Down
4 changes: 2 additions & 2 deletions cpuid-bool/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
[package]
name = "cpuid-bool"
version = "0.1.2"
version = "0.2.0"
authors = ["RustCrypto Developers"]
license = "MIT OR Apache-2.0"
description = "A lightweight no-std compatible alternative to is_x86_feature_detected"
description = "A lightweight and efficient no-std compatible alternative to the is_x86_feature_detected macro"
documentation = "https://docs.rs/cpuid-bool"
repository = "https://github.com/RustCrypto/utils"
keywords = ["cpuid", "target-feature"]
Expand Down
157 changes: 94 additions & 63 deletions cpuid-bool/src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,51 +1,118 @@
//! Macro for checking CPU capabilities at runtime.
//!
//! # Usage example
//! # Example
//! ```
//! if cpuid_bool::cpuid_bool!("sha", "aes") {
//! // This macro creates `cpuid_aes_sha` module
//! cpuid_bool::new!(cpuid_aes_sha, "aes", "sha");
//!
//! // `token` is a Zero Sized Type value, which guarantees
//! // that underlying static storage got properly initialized,
//! // which allows to omit initialization branch
//! let token: cpuid_aes_sha::InitToken = cpuid_aes_sha::init();
//! if token.get() {
//! println!("CPU supports both SHA and AES extensions");
//! } else {
//! println!("SHA and AES extensions are not supported");
//! }
//!
//! // If stored value needed only once you can get stored value
//! // omitting the token
//! let val = cpuid_aes_sha::get();
//! assert_eq!(val, token.get());
//!
//! // Additionally you can get both token and value
//! let (token, val) = cpuid_aes_sha::init_get();
//! assert_eq!(val, token.get());
//! ```
//! Note that if all tested target features are enabled via compiler options
//! (e.g. by using `RUSTFLAGS`), `cpuid_bool!` macro immideatly will expand
//! to `true` and will not use CPUID instruction. Such behavior allows
//! compiler to eliminate fallback code.
//! (e.g. by using `RUSTFLAGS`), the `get` method will always return `true`
//! and `init` will not use CPUID instruction. Such behavior allows
//! compiler to completely eliminate fallback code.
//!
//! After first call macro caches result and returns it in subsequent
//! calls, thus runtime overhead for them is minimal.
#![no_std]
#[cfg(not(any(target_arch = "x86", target_arch = "x86_64")))]
compile_error!("This crate works only on x86 and x86-64 targets.");

use core::sync::atomic::{AtomicU8, Ordering::Relaxed};
/// Create module with CPUID bool code.
#[macro_export]
macro_rules! new {
($mod_name:ident, $($tf:tt),+ $(,)? ) => {
mod $mod_name {
use core::sync::atomic::{AtomicU8, Ordering::Relaxed};

const UNINIT: u8 = u8::max_value();
static STORAGE: AtomicU8 = AtomicU8::new(UNINIT);

/// This structure represents a lazily initialized static boolean value.
///
/// Useful when it is preferable to just rerun initialization instead of
/// locking. Used internally by the `cpuid_bool` macro.
pub struct LazyBool(AtomicU8);
/// Initialization token
#[derive(Copy, Clone, Debug)]
pub struct InitToken(());

impl InitToken {
/// Get initialized value
#[inline(always)]
pub fn get(&self) -> bool {
// CPUID is not available on SGX targets
#[cfg(all(not(target_env = "sgx"), not(all($(target_feature=$tf, )*))))]
let res = STORAGE.load(Relaxed) == 1;
#[cfg(all(target_env = "sgx", not(all($(target_feature=$tf, )*))))]
let res = false;
#[cfg(all($(target_feature=$tf, )*))]
let res = true;
res
}
}

/// Initialize underlying storage if needed and get
/// stored value and initialization token.
#[inline]
pub fn init_get() -> (InitToken, bool) {
// CPUID is not available on SGX targets
#[cfg(all(not(target_env = "sgx"), not(all($(target_feature=$tf, )*))))]
let res = {
#[cfg(target_arch = "x86")]
use core::arch::x86::{__cpuid, __cpuid_count};
#[cfg(target_arch = "x86_64")]
use core::arch::x86_64::{__cpuid, __cpuid_count};

// Relaxed ordering is fine, as we only have a single atomic variable.
let val = STORAGE.load(Relaxed);
if val == UNINIT {
#[allow(unused_variables)]
let cr = unsafe {
[__cpuid(1), __cpuid_count(7, 0)]
};
let res = $(cpuid_bool::check!(cr, $tf) & )+ true;
STORAGE.store(res as u8, Relaxed);
res
} else {
val == 1
}
};
#[cfg(all(target_env = "sgx", not(all($(target_feature=$tf, )*))))]
let res = false;
#[cfg(all($(target_feature=$tf, )*))]
let res = true;

impl LazyBool {
const UNINIT: u8 = u8::max_value();
(InitToken(()), res)
}

pub const fn new() -> Self {
Self(AtomicU8::new(Self::UNINIT))
}
/// Initialize underlying storage if needed and get
/// initialization token.
#[inline]
pub fn init() -> InitToken {
init_get().0
}

// Runs the init() function at least once, returning the value of some run
// of init(). Multiple callers can run their init() functions in parallel.
// init() should always return the same value, if it succeeds.
pub fn unsync_init(&self, init: impl FnOnce() -> bool) -> bool {
// Relaxed ordering is fine, as we only have a single atomic variable.
let mut val = self.0.load(Relaxed);
if val == Self::UNINIT {
val = init() as u8;
self.0.store(val as u8, Relaxed);
/// Initialize underlying storage if needed and get
/// stored value.
#[inline]
pub fn get() -> bool {
init_get().1
}
}
val != 0
}
};
}

// TODO: find how to define private macro usable inside a public one
Expand Down Expand Up @@ -83,39 +150,3 @@ expand_check_macro! {
("adx", 1, ebx, 19),
("sha", 1, ebx, 29),
}

/// Check at runtime if CPU supports sequence of target features.
///
/// During first execution this macro will use CPUID to check requested
/// target features, results will be cached and further calls will return
/// it instead.
#[macro_export]
macro_rules! cpuid_bool {
($($tf:tt),+ $(,)? ) => {{
// CPUID is not available on SGX targets
#[cfg(all(not(target_env = "sgx"), not(all($(target_feature=$tf, )*))))]
let res = {
#[cfg(target_arch = "x86")]
use core::arch::x86::{__cpuid, __cpuid_count};
#[cfg(target_arch = "x86_64")]
use core::arch::x86_64::{__cpuid, __cpuid_count};

static CPUID_BOOL: cpuid_bool::LazyBool = cpuid_bool::LazyBool::new();
CPUID_BOOL.unsync_init(|| {
#[allow(unused_variables)]
let cr = unsafe {
[__cpuid(1), __cpuid_count(7, 0)]
};
// TODO: find how to remove `true`
$(cpuid_bool::check!(cr, $tf) & )+ true
})
};

#[cfg(all(target_env = "sgx", not(all($(target_feature=$tf, )*))))]
let res = false;
#[cfg(all($(target_feature=$tf, )*))]
let res = true;

res
}};
}