From b40f23bf9c3020ad78890a3b0baf73108cc4fe58 Mon Sep 17 00:00:00 2001 From: gnzlbg Date: Thu, 16 Nov 2017 10:25:09 +0100 Subject: [PATCH 1/3] add nvptx architecture --- src/lib.rs | 4 ++ src/nvptx/mod.rs | 109 +++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 113 insertions(+) create mode 100644 src/nvptx/mod.rs diff --git a/src/lib.rs b/src/lib.rs index e8dfc558d6..0ace4f8435 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -153,6 +153,8 @@ pub mod vendor { #[cfg(target_arch = "aarch64")] pub use aarch64::*; + + pub use nvptx::*; } #[macro_use] @@ -194,3 +196,5 @@ mod x86; mod arm; #[cfg(target_arch = "aarch64")] mod aarch64; + +mod nvptx; diff --git a/src/nvptx/mod.rs b/src/nvptx/mod.rs new file mode 100644 index 0000000000..f99f7ac1ba --- /dev/null +++ b/src/nvptx/mod.rs @@ -0,0 +1,109 @@ +//! nvptx intrinsics + +#[allow(improper_ctypes)] +extern "C" { + #[link_name = "llvm.cuda.syncthreads"] + fn syncthreads() -> (); + #[link_name = "llvm.nvvm.read.ptx.sreg.ntid.x"] + fn block_dim_x() -> i32; + #[link_name = "llvm.nvvm.read.ptx.sreg.ntid.y"] + fn block_dim_y() -> i32; + #[link_name = "llvm.nvvm.read.ptx.sreg.ntid.z"] + fn block_dim_z() -> i32; + #[link_name = "llvm.nvvm.read.ptx.sreg.ctaid.x"] + fn block_idx_x() -> i32; + #[link_name = "llvm.nvvm.read.ptx.sreg.ctaid.y"] + fn block_idx_y() -> i32; + #[link_name = "llvm.nvvm.read.ptx.sreg.ctaid.z"] + fn block_idx_z() -> i32; + #[link_name = "llvm.nvvm.read.ptx.sreg.nctaid.x"] + fn grid_dim_x() -> i32; + #[link_name = "llvm.nvvm.read.ptx.sreg.nctaid.y"] + fn grid_dim_y() -> i32; + #[link_name = "llvm.nvvm.read.ptx.sreg.nctaid.z"] + fn grid_dim_z() -> i32; + #[link_name = "llvm.nvvm.read.ptx.sreg.tid.x"] + fn thread_idx_x() -> i32; + #[link_name = "llvm.nvvm.read.ptx.sreg.tid.y"] + fn thread_idx_y() -> i32; + #[link_name = "llvm.nvvm.read.ptx.sreg.tid.z"] + fn thread_idx_z() -> i32; +} + +/// Synchronizes all threads in the block. +#[inline(always)] +pub unsafe fn _syncthreads() -> () { + syncthreads() +} + +/// x-th thread-block dimension. +#[inline(always)] +pub unsafe fn _block_dim_x() -> i32 { + block_dim_x() +} + +/// y-th thread-block dimension. +#[inline(always)] +pub unsafe fn _block_dim_y() -> i32 { + block_dim_y() +} + +/// z-th thread-block dimension. +#[inline(always)] +pub unsafe fn _block_dim_z() -> i32 { + block_dim_z() +} + +/// x-th thread-block index. +#[inline(always)] +pub unsafe fn _block_idx_x() -> i32 { + block_idx_x() +} + +/// y-th thread-block index. +#[inline(always)] +pub unsafe fn _block_idx_y() -> i32 { + block_idx_y() +} + +/// z-th thread-block index. +#[inline(always)] +pub unsafe fn _block_idx_z() -> i32 { + block_idx_z() +} + +/// x-th block-grid dimension. +#[inline(always)] +pub unsafe fn _grid_dim_x() -> i32 { + grid_dim_x() +} + +/// y-th block-grid dimension. +#[inline(always)] +pub unsafe fn _grid_dim_y() -> i32 { + grid_dim_y() +} + +/// z-th block-grid dimension. +#[inline(always)] +pub unsafe fn _grid_dim_z() -> i32 { + grid_dim_z() +} + +/// x-th thread index. +#[inline(always)] +pub unsafe fn _thread_idx_x() -> i32 { + thread_idx_x() +} + +/// y-th thread index. +#[inline(always)] +pub unsafe fn _thread_idx_y() -> i32 { + thread_idx_y() +} + +/// z-th thread index. +#[inline(always)] +pub unsafe fn _thread_idx_z() -> i32 { + thread_idx_z() +} From 9bd9ba43ff41b9c29c45ec47eebc5d05d0cb90ac Mon Sep 17 00:00:00 2001 From: gnzlbg Date: Thu, 16 Nov 2017 11:23:56 +0100 Subject: [PATCH 2/3] add support for no_std --- Cargo.toml | 1 + ci/run.sh | 3 +++ src/lib.rs | 4 ++++ src/x86/mod.rs | 11 +++++++++++ src/x86/runtime.rs | 1 + src/x86/sse.rs | 2 +- src/x86/sse2.rs | 4 ++-- 7 files changed, 23 insertions(+), 3 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index a4d67f84be..874f9d4f08 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -32,3 +32,4 @@ cupid = "0.3" [features] strict = [] +std = [] \ No newline at end of file diff --git a/ci/run.sh b/ci/run.sh index c8b41e9c78..f4d3382cb0 100755 --- a/ci/run.sh +++ b/ci/run.sh @@ -19,3 +19,6 @@ echo "RUSTFLAGS=${RUSTFLAGS}" cargo test --target $TARGET --features "strict" cargo test --release --target $TARGET --features "strict" + +cargo test --target $TARGET --features "strict,std" +cargo test --release --target $TARGET --features "strict,std" diff --git a/src/lib.rs b/src/lib.rs index 0ace4f8435..d9161168d8 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -128,6 +128,10 @@ cast_possible_truncation, cast_precision_loss, shadow_reuse, cyclomatic_complexity, similar_names, doc_markdown, many_single_char_names))] +#![cfg_attr(not(feature = "std"), no_std)] + +#[cfg(not(feature = "std"))] +extern crate core as std; #[cfg(test)] extern crate stdsimd_test; diff --git a/src/x86/mod.rs b/src/x86/mod.rs index ba84f9d890..96011521ab 100644 --- a/src/x86/mod.rs +++ b/src/x86/mod.rs @@ -41,3 +41,14 @@ mod abm; mod bmi; mod bmi2; mod tbm; + +#[allow(non_camel_case_types)] +#[cfg(not(feature = "std"))] +#[repr(u8)] +pub enum c_void { + #[doc(hidden)] __variant1, + #[doc(hidden)] __variant2, +} + +#[cfg(feature = "std")] +use std::os::raw::c_void; diff --git a/src/x86/runtime.rs b/src/x86/runtime.rs index 4b7e3aa56e..1549c4f7a3 100644 --- a/src/x86/runtime.rs +++ b/src/x86/runtime.rs @@ -283,6 +283,7 @@ pub fn __unstable_detect_feature(x: __Feature) -> bool { #[cfg(test)] mod tests { + #[cfg(feature = "std")] #[test] fn runtime_detection_x86_nocapture() { println!("sse: {:?}", cfg_feature_enabled!("sse")); diff --git a/src/x86/sse.rs b/src/x86/sse.rs index 49e10efdd1..2d1be6d134 100644 --- a/src/x86/sse.rs +++ b/src/x86/sse.rs @@ -3,7 +3,7 @@ use simd_llvm::simd_shuffle4; use v128::*; use v64::f32x2; -use std::os::raw::c_void; +use super::c_void; use std::mem; use std::ptr; diff --git a/src/x86/sse2.rs b/src/x86/sse2.rs index d1832f2b34..75e6ac58ce 100644 --- a/src/x86/sse2.rs +++ b/src/x86/sse2.rs @@ -4,7 +4,7 @@ use stdsimd_test::assert_instr; use std::mem; -use std::os::raw::c_void; +use super::c_void; use std::ptr; use simd_llvm::{simd_cast, simd_shuffle16, simd_shuffle2, simd_shuffle4, @@ -2242,7 +2242,7 @@ extern "C" { #[cfg(test)] mod tests { - use std::os::raw::c_void; + use super::c_void; use stdsimd_test::simd_test; use test::black_box; // Used to inhibit constant-folding. From ef6284fcbd177c2c2ec43153d7ccb0cbcd269777 Mon Sep 17 00:00:00 2001 From: gnzlbg Date: Thu, 16 Nov 2017 13:57:45 +0100 Subject: [PATCH 3/3] formatting --- src/macros.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/macros.rs b/src/macros.rs index c2018acc40..c2009fa939 100644 --- a/src/macros.rs +++ b/src/macros.rs @@ -485,7 +485,7 @@ macro_rules! test_arithmetic_ { #[cfg(test)] #[macro_export] - macro_rules! test_neg_ { +macro_rules! test_neg_ { ($tn:ident, $zero:expr, $one:expr, $two:expr, $four:expr) => { { let z = $tn::splat($zero); @@ -573,7 +573,7 @@ macro_rules! test_bit_arithmetic_ { #[cfg(test)] #[macro_export] - macro_rules! test_ops_si { +macro_rules! test_ops_si { ($($tn:ident),+) => { $( test_arithmetic_!($tn, 0, 1, 2, 4); @@ -585,7 +585,7 @@ macro_rules! test_bit_arithmetic_ { #[cfg(test)] #[macro_export] - macro_rules! test_ops_ui { +macro_rules! test_ops_ui { ($($tn:ident),+) => { $( test_arithmetic_!($tn, 0, 1, 2, 4); @@ -596,7 +596,7 @@ macro_rules! test_bit_arithmetic_ { #[cfg(test)] #[macro_export] - macro_rules! test_ops_f { +macro_rules! test_ops_f { ($($tn:ident),+) => { $( test_arithmetic_!($tn, 0., 1., 2., 4.);