Skip to content

Commit be91954

Browse files
authored
Improve cpuid-bool efficiency (#86)
1 parent b292950 commit be91954

File tree

4 files changed

+103
-66
lines changed

4 files changed

+103
-66
lines changed

Cargo.lock

+1-1
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

cpuid-bool/CHANGELOG.md

+6
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,12 @@ All notable changes to this project will be documented in this file.
55
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
66
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
77

8+
## 0.2.0 (2020-12-01)
9+
### Changed
10+
- Macro now creates a module with several function and the `InitToken` type ([#86])
11+
12+
[#86]: https://github.com/RustCrypto/utils/pull/86
13+
814
## 0.1.2 (2020-07-20)
915
### Added
1016
- LICENSE files ([#70])

cpuid-bool/Cargo.toml

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
11
[package]
22
name = "cpuid-bool"
3-
version = "0.1.2"
3+
version = "0.2.0"
44
authors = ["RustCrypto Developers"]
55
license = "MIT OR Apache-2.0"
6-
description = "A lightweight no-std compatible alternative to is_x86_feature_detected"
6+
description = "A lightweight and efficient no-std compatible alternative to the is_x86_feature_detected macro"
77
documentation = "https://docs.rs/cpuid-bool"
88
repository = "https://github.com/RustCrypto/utils"
99
keywords = ["cpuid", "target-feature"]

cpuid-bool/src/lib.rs

+94-63
Original file line numberDiff line numberDiff line change
@@ -1,51 +1,118 @@
11
//! Macro for checking CPU capabilities at runtime.
22
//!
3-
//! # Usage example
3+
//! # Example
44
//! ```
5-
//! if cpuid_bool::cpuid_bool!("sha", "aes") {
5+
//! // This macro creates `cpuid_aes_sha` module
6+
//! cpuid_bool::new!(cpuid_aes_sha, "aes", "sha");
7+
//!
8+
//! // `token` is a Zero Sized Type value, which guarantees
9+
//! // that underlying static storage got properly initialized,
10+
//! // which allows to omit initialization branch
11+
//! let token: cpuid_aes_sha::InitToken = cpuid_aes_sha::init();
12+
//! if token.get() {
613
//! println!("CPU supports both SHA and AES extensions");
714
//! } else {
815
//! println!("SHA and AES extensions are not supported");
916
//! }
17+
//!
18+
//! // If stored value needed only once you can get stored value
19+
//! // omitting the token
20+
//! let val = cpuid_aes_sha::get();
21+
//! assert_eq!(val, token.get());
22+
//!
23+
//! // Additionally you can get both token and value
24+
//! let (token, val) = cpuid_aes_sha::init_get();
25+
//! assert_eq!(val, token.get());
1026
//! ```
1127
//! Note that if all tested target features are enabled via compiler options
12-
//! (e.g. by using `RUSTFLAGS`), `cpuid_bool!` macro immideatly will expand
13-
//! to `true` and will not use CPUID instruction. Such behavior allows
14-
//! compiler to eliminate fallback code.
28+
//! (e.g. by using `RUSTFLAGS`), the `get` method will always return `true`
29+
//! and `init` will not use CPUID instruction. Such behavior allows
30+
//! compiler to completely eliminate fallback code.
1531
//!
1632
//! After first call macro caches result and returns it in subsequent
1733
//! calls, thus runtime overhead for them is minimal.
1834
#![no_std]
1935
#[cfg(not(any(target_arch = "x86", target_arch = "x86_64")))]
2036
compile_error!("This crate works only on x86 and x86-64 targets.");
2137

22-
use core::sync::atomic::{AtomicU8, Ordering::Relaxed};
38+
/// Create module with CPUID bool code.
39+
#[macro_export]
40+
macro_rules! new {
41+
($mod_name:ident, $($tf:tt),+ $(,)? ) => {
42+
mod $mod_name {
43+
use core::sync::atomic::{AtomicU8, Ordering::Relaxed};
44+
45+
const UNINIT: u8 = u8::max_value();
46+
static STORAGE: AtomicU8 = AtomicU8::new(UNINIT);
2347

24-
/// This structure represents a lazily initialized static boolean value.
25-
///
26-
/// Useful when it is preferable to just rerun initialization instead of
27-
/// locking. Used internally by the `cpuid_bool` macro.
28-
pub struct LazyBool(AtomicU8);
48+
/// Initialization token
49+
#[derive(Copy, Clone, Debug)]
50+
pub struct InitToken(());
51+
52+
impl InitToken {
53+
/// Get initialized value
54+
#[inline(always)]
55+
pub fn get(&self) -> bool {
56+
// CPUID is not available on SGX targets
57+
#[cfg(all(not(target_env = "sgx"), not(all($(target_feature=$tf, )*))))]
58+
let res = STORAGE.load(Relaxed) == 1;
59+
#[cfg(all(target_env = "sgx", not(all($(target_feature=$tf, )*))))]
60+
let res = false;
61+
#[cfg(all($(target_feature=$tf, )*))]
62+
let res = true;
63+
res
64+
}
65+
}
66+
67+
/// Initialize underlying storage if needed and get
68+
/// stored value and initialization token.
69+
#[inline]
70+
pub fn init_get() -> (InitToken, bool) {
71+
// CPUID is not available on SGX targets
72+
#[cfg(all(not(target_env = "sgx"), not(all($(target_feature=$tf, )*))))]
73+
let res = {
74+
#[cfg(target_arch = "x86")]
75+
use core::arch::x86::{__cpuid, __cpuid_count};
76+
#[cfg(target_arch = "x86_64")]
77+
use core::arch::x86_64::{__cpuid, __cpuid_count};
78+
79+
// Relaxed ordering is fine, as we only have a single atomic variable.
80+
let val = STORAGE.load(Relaxed);
81+
if val == UNINIT {
82+
#[allow(unused_variables)]
83+
let cr = unsafe {
84+
[__cpuid(1), __cpuid_count(7, 0)]
85+
};
86+
let res = $(cpuid_bool::check!(cr, $tf) & )+ true;
87+
STORAGE.store(res as u8, Relaxed);
88+
res
89+
} else {
90+
val == 1
91+
}
92+
};
93+
#[cfg(all(target_env = "sgx", not(all($(target_feature=$tf, )*))))]
94+
let res = false;
95+
#[cfg(all($(target_feature=$tf, )*))]
96+
let res = true;
2997

30-
impl LazyBool {
31-
const UNINIT: u8 = u8::max_value();
98+
(InitToken(()), res)
99+
}
32100

33-
pub const fn new() -> Self {
34-
Self(AtomicU8::new(Self::UNINIT))
35-
}
101+
/// Initialize underlying storage if needed and get
102+
/// initialization token.
103+
#[inline]
104+
pub fn init() -> InitToken {
105+
init_get().0
106+
}
36107

37-
// Runs the init() function at least once, returning the value of some run
38-
// of init(). Multiple callers can run their init() functions in parallel.
39-
// init() should always return the same value, if it succeeds.
40-
pub fn unsync_init(&self, init: impl FnOnce() -> bool) -> bool {
41-
// Relaxed ordering is fine, as we only have a single atomic variable.
42-
let mut val = self.0.load(Relaxed);
43-
if val == Self::UNINIT {
44-
val = init() as u8;
45-
self.0.store(val as u8, Relaxed);
108+
/// Initialize underlying storage if needed and get
109+
/// stored value.
110+
#[inline]
111+
pub fn get() -> bool {
112+
init_get().1
113+
}
46114
}
47-
val != 0
48-
}
115+
};
49116
}
50117

51118
// TODO: find how to define private macro usable inside a public one
@@ -83,39 +150,3 @@ expand_check_macro! {
83150
("adx", 1, ebx, 19),
84151
("sha", 1, ebx, 29),
85152
}
86-
87-
/// Check at runtime if CPU supports sequence of target features.
88-
///
89-
/// During first execution this macro will use CPUID to check requested
90-
/// target features, results will be cached and further calls will return
91-
/// it instead.
92-
#[macro_export]
93-
macro_rules! cpuid_bool {
94-
($($tf:tt),+ $(,)? ) => {{
95-
// CPUID is not available on SGX targets
96-
#[cfg(all(not(target_env = "sgx"), not(all($(target_feature=$tf, )*))))]
97-
let res = {
98-
#[cfg(target_arch = "x86")]
99-
use core::arch::x86::{__cpuid, __cpuid_count};
100-
#[cfg(target_arch = "x86_64")]
101-
use core::arch::x86_64::{__cpuid, __cpuid_count};
102-
103-
static CPUID_BOOL: cpuid_bool::LazyBool = cpuid_bool::LazyBool::new();
104-
CPUID_BOOL.unsync_init(|| {
105-
#[allow(unused_variables)]
106-
let cr = unsafe {
107-
[__cpuid(1), __cpuid_count(7, 0)]
108-
};
109-
// TODO: find how to remove `true`
110-
$(cpuid_bool::check!(cr, $tf) & )+ true
111-
})
112-
};
113-
114-
#[cfg(all(target_env = "sgx", not(all($(target_feature=$tf, )*))))]
115-
let res = false;
116-
#[cfg(all($(target_feature=$tf, )*))]
117-
let res = true;
118-
119-
res
120-
}};
121-
}

0 commit comments

Comments
 (0)