|
| 1 | +//! SIMD support |
| 2 | +//! |
| 3 | +//! This crate provides the fundamentals of supporting SIMD in Rust. This crate |
| 4 | +//! should compile on all platforms and provide `simd` and `vendor` modules at |
| 5 | +//! the top-level. The `simd` module contains *portable vector types* which |
| 6 | +//! should work across all platforms and be implemented in the most efficient |
| 7 | +//! manner possible for the platform at hand. The `vendor` module contains |
| 8 | +//! vendor intrinsics that operate over these SIMD types, typically |
| 9 | +//! corresponding to a particular CPU instruction |
| 10 | +//! |
| 11 | +//! ```rust |
| 12 | +//! extern crate coresimd as stdsimd; |
| 13 | +//! use stdsimd::simd::u32x4; |
| 14 | +//! |
| 15 | +//! fn main() { |
| 16 | +//! let a = u32x4::new(1, 2, 3, 4); |
| 17 | +//! let b = u32x4::splat(10); |
| 18 | +//! assert_eq!(a + b, u32x4::new(11, 12, 13, 14)); |
| 19 | +//! } |
| 20 | +//! ``` |
| 21 | +//! |
| 22 | +//! > **Note**: This crate is *nightly only* at the moment, and requires a |
| 23 | +//! > nightly rust toolchain to compile. |
| 24 | +//! |
| 25 | +//! This documentation is only for one particular architecture, you can find |
| 26 | +//! others at: |
| 27 | +//! |
| 28 | +//! * [i686](https://rust-lang-nursery.github.io/stdsimd/i686/stdsimd/) |
| 29 | +//! * [`x86_64`](https://rust-lang-nursery.github.io/stdsimd/x86_64/stdsimd/) |
| 30 | +//! * [arm](https://rust-lang-nursery.github.io/stdsimd/arm/stdsimd/) |
| 31 | +//! * [aarch64](https://rust-lang-nursery.github.io/stdsimd/aarch64/stdsimd/) |
| 32 | +//! |
| 33 | +//! ## Portability |
| 34 | +//! |
| 35 | +//! The `simd` module and its types should be portable to all platforms. The |
| 36 | +//! runtime characteristics of these types may vary per platform and per CPU |
| 37 | +//! feature enabled, but they should always have the most optimized |
| 38 | +//! implementation for the target at hand. |
| 39 | +//! |
| 40 | +//! The `vendor` module provides no portability guarantees. The `vendor` module |
| 41 | +//! is per CPU architecture currently and provides intrinsics corresponding to |
| 42 | +//! functions for that particular CPU architecture. Note that the functions |
| 43 | +//! provided in this module are intended to correspond to CPU instructions and |
| 44 | +//! have no runtime support for whether you CPU actually supports the |
| 45 | +//! instruction. |
| 46 | +//! |
| 47 | +//! CPU target feature detection is done via the `cfg_feature_enabled!` macro |
| 48 | +//! at runtime. This macro will detect at runtime whether the specified feature |
| 49 | +//! is available or not, returning true or false depending on the current CPU. |
| 50 | +//! |
| 51 | +//! ``` |
| 52 | +//! #![feature(cfg_target_feature)] |
| 53 | +//! |
| 54 | +//! #[macro_use] |
| 55 | +//! extern crate coresimd as stdsimd; |
| 56 | +//! |
| 57 | +//! fn main() { |
| 58 | +//! if cfg_feature_enabled!("avx2") { |
| 59 | +//! println!("avx2 intrinsics will work"); |
| 60 | +//! } else { |
| 61 | +//! println!("avx2 intrinsics will not work"); |
| 62 | +//! // undefined behavior: may generate a `SIGILL`. |
| 63 | +//! } |
| 64 | +//! } |
| 65 | +//! ``` |
| 66 | +//! |
| 67 | +//! After verifying that a specified feature is available, use `target_feature` |
| 68 | +//! to enable a given feature and use the desired intrinsic. |
| 69 | +//! |
| 70 | +//! ```ignore |
| 71 | +//! # #![feature(cfg_target_feature)] |
| 72 | +//! # #![feature(target_feature)] |
| 73 | +//! # #[macro_use] |
| 74 | +//! # extern crate coresimd as stdsimd; |
| 75 | +//! # fn main() { |
| 76 | +//! # if cfg_feature_enabled!("avx2") { |
| 77 | +//! // avx2 specific code may be used in this function |
| 78 | +//! #[target_feature = "+avx2"] |
| 79 | +//! fn and_256() { |
| 80 | +//! // avx2 feature specific intrinsics will work here! |
| 81 | +//! use stdsimd::vendor::{__m256i, _mm256_and_si256}; |
| 82 | +//! |
| 83 | +//! let a = __m256i::splat(5); |
| 84 | +//! let b = __m256i::splat(3); |
| 85 | +//! |
| 86 | +//! let got = unsafe { _mm256_and_si256(a, b) }; |
| 87 | +//! |
| 88 | +//! assert_eq!(got, __m256i::splat(1)); |
| 89 | +//! } |
| 90 | +//! # and_256(); |
| 91 | +//! # } |
| 92 | +//! # } |
| 93 | +//! ``` |
| 94 | +//! |
| 95 | +//! # Status |
| 96 | +//! |
| 97 | +//! This crate is intended for eventual inclusion into the standard library, |
| 98 | +//! but some work and experimentation is needed to get there! First and |
| 99 | +//! foremost you can help out by kicking the tires on this crate and seeing if |
| 100 | +//! it works for your use case! Next up you can help us fill out the [vendor |
| 101 | +//! intrinsics][vendor] to ensure that we've got all the SIMD support |
| 102 | +//! necessary. |
| 103 | +//! |
| 104 | +//! The language support and status of SIMD is also still a little up in the |
| 105 | +//! air right now, you may be interested in a few issues along these lines: |
| 106 | +//! |
| 107 | +//! * [Overal tracking issue for SIMD support][simd_tracking_issue] |
| 108 | +//! * [`cfg_target_feature` tracking issue][cfg_target_feature_issue] |
| 109 | +//! * [SIMD types currently not sound][simd_soundness_bug] |
| 110 | +//! * [`#[target_feature]` improvements][target_feature_impr] |
| 111 | +//! |
| 112 | +//! [vendor]: https://github.com/rust-lang-nursery/stdsimd/issues/40 |
| 113 | +//! [simd_tracking_issue]: https://github.com/rust-lang/rust/issues/27731 |
| 114 | +//! [cfg_target_feature_issue]: https://github.com/rust-lang/rust/issues/29717 |
| 115 | +//! [simd_soundness_bug]: https://github.com/rust-lang/rust/issues/44367 |
| 116 | +//! [target_feature_impr]: https://github.com/rust-lang/rust/issues/44839 |
| 117 | +
|
| 118 | +#![cfg_attr(feature = "strict", deny(warnings))] |
| 119 | +#![allow(dead_code)] |
| 120 | +#![allow(unused_features)] |
| 121 | +#![feature(const_fn, link_llvm_intrinsics, platform_intrinsics, repr_simd, |
| 122 | + simd_ffi, target_feature, cfg_target_feature, i128_type, asm, |
| 123 | + const_atomic_usize_new, stmt_expr_attributes)] |
| 124 | +#![cfg_attr(test, feature(proc_macro, test, repr_align, attr_literals))] |
| 125 | +#![cfg_attr(feature = "cargo-clippy", |
| 126 | + allow(inline_always, too_many_arguments, cast_sign_loss, |
| 127 | + cast_lossless, cast_possible_wrap, |
| 128 | + cast_possible_truncation, cast_precision_loss, |
| 129 | + shadow_reuse, cyclomatic_complexity, similar_names, |
| 130 | + many_single_char_names))] |
| 131 | +#![no_std] |
| 132 | + |
| 133 | +#[cfg(test)] |
| 134 | +#[macro_use] |
| 135 | +extern crate std; |
| 136 | + |
| 137 | +#[cfg(test)] |
| 138 | +extern crate stdsimd_test; |
| 139 | + |
| 140 | +#[cfg(test)] |
| 141 | +extern crate test; |
| 142 | + |
| 143 | +/// Platform independent SIMD vector types and operations. |
| 144 | +pub mod simd { |
| 145 | + pub use v128::*; |
| 146 | + pub use v256::*; |
| 147 | + pub use v512::*; |
| 148 | + pub use v64::*; |
| 149 | +} |
| 150 | + |
| 151 | +/// Platform dependent vendor intrinsics. |
| 152 | +pub mod vendor { |
| 153 | + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] |
| 154 | + pub use x86::*; |
| 155 | + |
| 156 | + #[cfg(any(target_arch = "arm", target_arch = "aarch64"))] |
| 157 | + pub use arm::*; |
| 158 | + |
| 159 | + #[cfg(target_arch = "aarch64")] |
| 160 | + pub use aarch64::*; |
| 161 | + |
| 162 | + // FIXME: rust does not expose the nvptx and nvptx64 targets yet |
| 163 | + #[cfg(not(any(target_arch = "x86", target_arch = "x86_64", |
| 164 | + target_arch = "arm", target_arch = "aarch64")))] |
| 165 | + pub use nvptx::*; |
| 166 | + |
| 167 | + #[cfg( |
| 168 | + // x86/x86_64: |
| 169 | + any(target_arch = "x86", target_arch = "x86_64") |
| 170 | + )] |
| 171 | + pub use runtime::{__unstable_detect_feature, __Feature}; |
| 172 | +} |
| 173 | + |
| 174 | +#[cfg( |
| 175 | + // x86/x86_64: |
| 176 | + any(target_arch = "x86", target_arch = "x86_64") |
| 177 | +)] |
| 178 | +#[macro_use] |
| 179 | +mod runtime; |
| 180 | + |
| 181 | +#[macro_use] |
| 182 | +mod macros; |
| 183 | +mod simd_llvm; |
| 184 | +mod v128; |
| 185 | +mod v256; |
| 186 | +mod v512; |
| 187 | +mod v64; |
| 188 | + |
| 189 | +/// 32-bit wide vector tpyes |
| 190 | +mod v32 { |
| 191 | + use simd_llvm::*; |
| 192 | + |
| 193 | + define_ty! { i16x2, i16, i16 } |
| 194 | + define_impl! { i16x2, i16, 2, i16x2, x0, x1 } |
| 195 | + define_ty! { u16x2, u16, u16 } |
| 196 | + define_impl! { u16x2, u16, 2, i16x2, x0, x1 } |
| 197 | + |
| 198 | + define_ty! { i8x4, i8, i8, i8, i8 } |
| 199 | + define_impl! { i8x4, i8, 4, i8x4, x0, x1, x2, x3 } |
| 200 | + define_ty! { u8x4, u8, u8, u8, u8 } |
| 201 | + define_impl! { u8x4, u8, 4, i8x4, x0, x1, x2, x3 } |
| 202 | + |
| 203 | + define_casts!( |
| 204 | + (i16x2, i64x2, as_i64x2), |
| 205 | + (u16x2, i64x2, as_i64x2), |
| 206 | + (i8x4, i32x4, as_i32x4), |
| 207 | + (u8x4, i32x4, as_i32x4) |
| 208 | + ); |
| 209 | +} |
| 210 | + |
| 211 | +/// 16-bit wide vector tpyes |
| 212 | +mod v16 { |
| 213 | + use simd_llvm::*; |
| 214 | + |
| 215 | + define_ty! { i8x2, i8, i8 } |
| 216 | + define_impl! { i8x2, i8, 2, i8x2, x0, x1 } |
| 217 | + define_ty! { u8x2, u8, u8 } |
| 218 | + define_impl! { u8x2, u8, 2, i8x2, x0, x1 } |
| 219 | + |
| 220 | + define_casts!((i8x2, i64x2, as_i64x2), (u8x2, i64x2, as_i64x2)); |
| 221 | +} |
| 222 | + |
| 223 | +#[cfg(any(target_arch = "x86", target_arch = "x86_64"))] |
| 224 | +mod x86; |
| 225 | + |
| 226 | +#[cfg(any(target_arch = "arm", target_arch = "aarch64"))] |
| 227 | +mod arm; |
| 228 | +#[cfg(target_arch = "aarch64")] |
| 229 | +mod aarch64; |
| 230 | + |
| 231 | +mod nvptx; |
0 commit comments