Skip to content

Commit

Permalink
refactor the x86 module (rust-lang#195)
Browse files Browse the repository at this point in the history
* refactor the x86 module

* document the i686 check

* document strict and intel_sde feature

* document nvptx module
  • Loading branch information
gnzlbg authored and alexcrichton committed Nov 19, 2017
1 parent c68f66a commit d985a84
Show file tree
Hide file tree
Showing 29 changed files with 873 additions and 742 deletions.
6 changes: 5 additions & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,10 @@ stdsimd-test = { version = "0.*", path = "stdsimd-test" }
cupid = "0.4.0"

[features]
strict = []
std = []

# Internal-only: denies all warnings.
strict = []
# Internal-only: enables only those intrinsics supported by Intel's
# Software Development Environment (SDE).
intel_sde = []
2 changes: 1 addition & 1 deletion ci/run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ case ${TARGET} in
esac

FEATURES="strict,$FEATURES"
FEATURES_STD="${FEATURES},std"
FEATURES_STD="$std,${FEATURES}"

echo "RUSTFLAGS=${RUSTFLAGS}"
echo "FEATURES=${FEATURES}"
Expand Down
12 changes: 11 additions & 1 deletion src/nvptx/mod.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,14 @@
//! nvptx intrinsics
//! NVPTX intrinsics (experimental)
//!
//! These intrinsics form the foundation of the CUDA
//! programming model.
//!
//! The reference is the [CUDA C Programming Guide][cuda_c]. Relevant is also the [LLVM NVPTX Backend documentation][llvm_docs].
//!
//! [cuda_c]:
//! http://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html
//! [llvm_docs]:
//! https://llvm.org/docs/NVPTXUsage.html
#[allow(improper_ctypes)]
extern "C" {
Expand Down
4 changes: 2 additions & 2 deletions src/x86/ia32.rs → src/x86/i386/eflags.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
//! `i386/ia32` intrinsics
//! `i386` intrinsics
/// Reads EFLAGS.
#[cfg(target_arch = "x86")]
Expand Down Expand Up @@ -34,7 +34,7 @@ pub unsafe fn __writeeflags(eflags: u64) {

#[cfg(test)]
mod tests {
use super::*;
use x86::i386::*;

#[test]
fn test_eflags() {
Expand Down
4 changes: 4 additions & 0 deletions src/x86/i386/mod.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
//! `i386` intrinsics
mod eflags;
pub use self::eflags::*;
2 changes: 1 addition & 1 deletion src/x86/abm.rs → src/x86/i586/abm.rs
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ pub unsafe fn _popcnt64(x: u64) -> u64 {
mod tests {
use stdsimd_test::simd_test;

use x86::abm;
use x86::i586::abm;

#[simd_test = "lzcnt"]
unsafe fn _lzcnt_u32() {
Expand Down
30 changes: 15 additions & 15 deletions src/x86/avx.rs → src/x86/i586/avx.rs
Original file line number Diff line number Diff line change
Expand Up @@ -986,7 +986,7 @@ pub unsafe fn _mm256_permute_ps(a: f32x8, imm8: i32) -> f32x8 {
#[target_feature = "+avx,+sse"]
#[cfg_attr(test, assert_instr(vpermilps, imm8 = 9))]
pub unsafe fn _mm_permute_ps(a: f32x4, imm8: i32) -> f32x4 {
use x86::sse::_mm_undefined_ps;
use x86::i586::sse::_mm_undefined_ps;

let imm8 = (imm8 & 0xFF) as u8;
macro_rules! shuffle4 {
Expand Down Expand Up @@ -1100,7 +1100,7 @@ pub unsafe fn _mm256_permute_pd(a: f64x4, imm8: i32) -> f64x4 {
#[target_feature = "+avx,+sse2"]
#[cfg_attr(test, assert_instr(vpermilpd, imm8 = 0x1))]
pub unsafe fn _mm_permute_pd(a: f64x2, imm8: i32) -> f64x2 {
use x86::sse2::_mm_undefined_pd;
use x86::i586::sse2::_mm_undefined_pd;

let imm8 = (imm8 & 0xFF) as u8;
macro_rules! shuffle2 {
Expand Down Expand Up @@ -2159,7 +2159,7 @@ pub unsafe fn _mm256_castsi128_si256(a: __m128i) -> __m256i {
#[inline(always)]
#[target_feature = "+avx,+sse"]
pub unsafe fn _mm256_zextps128_ps256(a: f32x4) -> f32x8 {
use x86::sse::_mm_setzero_ps;
use x86::i586::sse::_mm_setzero_ps;
simd_shuffle8(a, _mm_setzero_ps(), [0, 1, 2, 3, 4, 5, 6, 7])
}

Expand All @@ -2169,7 +2169,7 @@ pub unsafe fn _mm256_zextps128_ps256(a: f32x4) -> f32x8 {
#[inline(always)]
#[target_feature = "+avx,+sse2"]
pub unsafe fn _mm256_zextsi128_si256(a: __m128i) -> __m256i {
use x86::sse2::_mm_setzero_si128;
use x86::i586::sse2::_mm_setzero_si128;
let b = mem::transmute(_mm_setzero_si128());
let dst: i64x4 = simd_shuffle4(i64x2::from(a), b, [0, 1, 2, 3]);
__m256i::from(dst)
Expand All @@ -2182,7 +2182,7 @@ pub unsafe fn _mm256_zextsi128_si256(a: __m128i) -> __m256i {
#[inline(always)]
#[target_feature = "+avx,+sse2"]
pub unsafe fn _mm256_zextpd128_pd256(a: f64x2) -> f64x4 {
use x86::sse2::_mm_setzero_pd;
use x86::i586::sse2::_mm_setzero_pd;
simd_shuffle4(a, _mm_setzero_pd(), [0, 1, 2, 3])
}

Expand Down Expand Up @@ -2268,7 +2268,7 @@ pub unsafe fn _mm256_setr_m128i(lo: __m128i, hi: __m128i) -> __m256i {
pub unsafe fn _mm256_loadu2_m128(
hiaddr: *const f32, loaddr: *const f32
) -> f32x8 {
use x86::sse::_mm_loadu_ps;
use x86::i586::sse::_mm_loadu_ps;
let a = _mm256_castps128_ps256(_mm_loadu_ps(loaddr));
_mm256_insertf128_ps(a, _mm_loadu_ps(hiaddr), 1)
}
Expand All @@ -2282,7 +2282,7 @@ pub unsafe fn _mm256_loadu2_m128(
pub unsafe fn _mm256_loadu2_m128d(
hiaddr: *const f64, loaddr: *const f64
) -> f64x4 {
use x86::sse2::_mm_loadu_pd;
use x86::i586::sse2::_mm_loadu_pd;
let a = _mm256_castpd128_pd256(_mm_loadu_pd(loaddr));
_mm256_insertf128_pd(a, _mm_loadu_pd(hiaddr), 1)
}
Expand All @@ -2295,7 +2295,7 @@ pub unsafe fn _mm256_loadu2_m128d(
pub unsafe fn _mm256_loadu2_m128i(
hiaddr: *const __m128i, loaddr: *const __m128i
) -> __m256i {
use x86::sse2::_mm_loadu_si128;
use x86::i586::sse2::_mm_loadu_si128;
let a = _mm256_castsi128_si256(_mm_loadu_si128(loaddr));
_mm256_insertf128_si256(a, _mm_loadu_si128(hiaddr), 1)
}
Expand All @@ -2309,7 +2309,7 @@ pub unsafe fn _mm256_loadu2_m128i(
pub unsafe fn _mm256_storeu2_m128(
hiaddr: *mut f32, loaddr: *mut f32, a: f32x8
) {
use x86::sse::_mm_storeu_ps;
use x86::i586::sse::_mm_storeu_ps;
let lo = _mm256_castps256_ps128(a);
_mm_storeu_ps(loaddr, lo);
let hi = _mm256_extractf128_ps(a, 1);
Expand All @@ -2325,7 +2325,7 @@ pub unsafe fn _mm256_storeu2_m128(
pub unsafe fn _mm256_storeu2_m128d(
hiaddr: *mut f64, loaddr: *mut f64, a: f64x4
) {
use x86::sse2::_mm_storeu_pd;
use x86::i586::sse2::_mm_storeu_pd;
let lo = _mm256_castpd256_pd128(a);
_mm_storeu_pd(loaddr, lo);
let hi = _mm256_extractf128_pd(a, 1);
Expand All @@ -2340,7 +2340,7 @@ pub unsafe fn _mm256_storeu2_m128d(
pub unsafe fn _mm256_storeu2_m128i(
hiaddr: *mut __m128i, loaddr: *mut __m128i, a: __m256i
) {
use x86::sse2::_mm_storeu_si128;
use x86::i586::sse2::_mm_storeu_si128;
let lo = _mm256_castsi256_si128(a);
_mm_storeu_si128(loaddr, lo);
let hi = _mm256_extractf128_si256(a, 1);
Expand Down Expand Up @@ -2501,7 +2501,7 @@ mod tests {

use v128::{f32x4, f64x2, i32x4, i64x2, i8x16};
use v256::*;
use x86::avx;
use x86::i586::avx;
use x86::{__m128i, __m256i};

#[simd_test = "avx"]
Expand Down Expand Up @@ -4173,7 +4173,7 @@ mod tests {

#[simd_test = "avx"]
unsafe fn _mm256_storeu2_m128() {
use x86::sse::_mm_undefined_ps;
use x86::i586::sse::_mm_undefined_ps;
let a = f32x8::new(1., 2., 3., 4., 5., 6., 7., 8.);
let mut hi = _mm_undefined_ps();
let mut lo = _mm_undefined_ps();
Expand All @@ -4188,7 +4188,7 @@ mod tests {

#[simd_test = "avx"]
unsafe fn _mm256_storeu2_m128d() {
use x86::sse2::_mm_undefined_pd;
use x86::i586::sse2::_mm_undefined_pd;
let a = f64x4::new(1., 2., 3., 4.);
let mut hi = _mm_undefined_pd();
let mut lo = _mm_undefined_pd();
Expand All @@ -4203,7 +4203,7 @@ mod tests {

#[simd_test = "avx"]
unsafe fn _mm256_storeu2_m128i() {
use x86::sse2::_mm_undefined_si128;
use x86::i586::sse2::_mm_undefined_si128;
#[cfg_attr(rustfmt, rustfmt_skip)]
let a = i8x32::new(
1, 2, 3, 4, 5, 6, 7, 8,
Expand Down
2 changes: 1 addition & 1 deletion src/x86/avx2.rs → src/x86/i586/avx2.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2217,7 +2217,7 @@ mod tests {

use v256::*;
use v128::*;
use x86::avx2;
use x86::i586::avx2;
use x86::__m256i;
use std;

Expand Down
2 changes: 1 addition & 1 deletion src/x86/bmi.rs → src/x86/i586/bmi.rs
Original file line number Diff line number Diff line change
Expand Up @@ -192,7 +192,7 @@ extern "C" {
mod tests {
use stdsimd_test::simd_test;

use x86::bmi;
use x86::i586::bmi;

#[simd_test = "bmi"]
unsafe fn _bextr_u32() {
Expand Down
2 changes: 1 addition & 1 deletion src/x86/bmi2.rs → src/x86/i586/bmi2.rs
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,7 @@ extern "C" {
mod tests {
use stdsimd_test::simd_test;

use x86::bmi2;
use x86::i586::bmi2;

#[simd_test = "bmi2"]
unsafe fn _pext_u32() {
Expand Down
10 changes: 5 additions & 5 deletions src/x86/cpuid.rs → src/x86/i586/cpuid.rs
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ pub fn has_cpuid() -> bool {
}
#[cfg(target_arch = "x86")]
{
use super::ia32::{__readeflags, __writeeflags};
use x86::i386::{__readeflags, __writeeflags};

// On `x86` the `cpuid` instruction is not always available.
// This follows the approach indicated in:
Expand Down Expand Up @@ -119,23 +119,23 @@ pub unsafe fn __get_cpuid_max(leaf: u32) -> (u32, u32) {

#[cfg(test)]
mod tests {
use super::*;
use x86::i586::cpuid;

#[test]
fn test_always_has_cpuid() {
// all currently-tested targets have the instruction
// FIXME: add targets without `cpuid` to CI
assert!(has_cpuid());
assert!(cpuid::has_cpuid());
}

#[cfg(target_arch = "x86")]
#[test]
fn test_has_cpuid() {
use vendor::__readeflags;
use x86::i386::__readeflags;
unsafe {
let before = __readeflags();

if has_cpuid() {
if cpuid::has_cpuid() {
assert!(before != __readeflags());
} else {
assert!(before == __readeflags());
Expand Down
39 changes: 39 additions & 0 deletions src/x86/i586/mod.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
//! `i586` intrinsics
pub use self::cpuid::*;
pub use self::xsave::*;

pub use self::sse::*;
pub use self::sse2::*;
pub use self::sse3::*;
pub use self::ssse3::*;
pub use self::sse41::*;
pub use self::sse42::*;
pub use self::avx::*;
pub use self::avx2::*;

pub use self::abm::*;
pub use self::bmi::*;
pub use self::bmi2::*;

#[cfg(not(feature = "intel_sde"))]
pub use self::tbm::*;

mod cpuid;
mod xsave;

mod sse;
mod sse2;
mod sse3;
mod ssse3;
mod sse41;
mod sse42;
mod avx;
mod avx2;

mod abm;
mod bmi;
mod bmi2;

#[cfg(not(feature = "intel_sde"))]
mod tbm;
Loading

0 comments on commit d985a84

Please sign in to comment.