Skip to content

Commit da9f72d

Browse files
authored
Start adding some avx512 intrinsics (#618)
First one is the quite simple `_mm512_abs_epi32` intrinsic!
1 parent 359b736 commit da9f72d

File tree

9 files changed

+257
-0
lines changed

9 files changed

+257
-0
lines changed

.appveyor.yml

+3
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,9 @@ environment:
44
# default so pass a flag to disable it to ensure our tests work ok.
55
RUSTFLAGS: -Clink-args=/OPT:NOICF
66

7+
# VS2017 looks to be the first with avx-512 support, notably in dumpbin
8+
APPVEYOR_BUILD_WORKER_IMAGE: Visual Studio 2017
9+
710
matrix:
811
- TARGET: x86_64-pc-windows-msvc
912

Cargo.toml

+2
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,9 @@ exclude = [
1010
[profile.release]
1111
debug = true
1212
opt-level = 3
13+
incremental = true
1314

1415
[profile.bench]
1516
debug = 1
1617
opt-level = 3
18+
incremental = true

coresimd/simd.rs

+8
Original file line numberDiff line numberDiff line change
@@ -181,3 +181,11 @@ simd_ty!(i32x8[i32]:
181181
i32, i32, i32, i32, i32, i32, i32, i32
182182
| x0, x1, x2, x3, x4, x5, x6, x7);
183183
simd_ty!(i64x4[i64]: i64, i64, i64, i64 | x0, x1, x2, x3);
184+
185+
// 512-bit wide types:
186+
187+
simd_ty!(i32x16[i32]:
188+
i32, i32, i32, i32, i32, i32, i32, i32,
189+
i32, i32, i32, i32, i32, i32, i32, i32
190+
| x0, x1, x2, x3, x4, x5, x6, x7,
191+
x8, x9, x10, x11, x12, x13, x14, x15);

coresimd/x86/avx512f.rs

+189
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,189 @@
1+
use coresimd::simd::*;
2+
use coresimd::x86::*;
3+
use mem;
4+
5+
#[cfg(test)]
6+
use stdsimd_test::assert_instr;
7+
8+
/// Computes the absolute values of packed 32-bit integers in `a`.
9+
///
10+
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#avx512techs=AVX512F&expand=33,34,4990,33&text=_mm512_abs_epi32)
11+
#[inline]
12+
#[target_feature(enable = "avx512f")]
13+
#[cfg_attr(test, assert_instr(vpabsd))]
14+
pub unsafe fn _mm512_abs_epi32(a: __m512i) -> __m512i {
15+
mem::transmute(pabsd(a.as_i32x16(), _mm512_setzero_si512().as_i32x16(), -1))
16+
}
17+
18+
/// Compute the absolute value of packed 32-bit integers in `a`, and store the
19+
/// unsigned results in `dst` using writemask `k` (elements are copied from
20+
/// `src` when the corresponding mask bit is not set).
21+
///
22+
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#avx512techs=AVX512F&expand=33,34,4990,33&text=_mm512_abs_epi32)
23+
#[inline]
24+
#[target_feature(enable = "avx512f")]
25+
#[cfg_attr(test, assert_instr(vpabsd))]
26+
pub unsafe fn _mm512_mask_abs_epi32(src: __m512i, k: __mmask16, a: __m512i) -> __m512i {
27+
mem::transmute(pabsd(a.as_i32x16(), src.as_i32x16(), k))
28+
}
29+
30+
/// Compute the absolute value of packed 32-bit integers in `a`, and store the
31+
/// unsigned results in `dst` using zeromask `k` (elements are zeroed out when
32+
/// the corresponding mask bit is not set).
33+
///
34+
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#avx512techs=AVX512F&expand=33,34,4990,33,34,35,35&text=_mm512_maskz_abs_epi32)
35+
#[inline]
36+
#[target_feature(enable = "avx512f")]
37+
#[cfg_attr(test, assert_instr(vpabsd))]
38+
pub unsafe fn _mm512_maskz_abs_epi32(k: __mmask16, a: __m512i) -> __m512i {
39+
mem::transmute(pabsd(a.as_i32x16(), _mm512_setzero_si512().as_i32x16(), k))
40+
}
41+
42+
/// Return vector of type `__m512i` with all elements set to zero.
43+
///
44+
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#avx512techs=AVX512F&expand=33,34,4990&text=_mm512_setzero_si512)
45+
#[inline]
46+
#[target_feature(enable = "avx512f")]
47+
#[cfg_attr(test, assert_instr(vxorps))]
48+
pub unsafe fn _mm512_setzero_si512() -> __m512i {
49+
mem::zeroed()
50+
}
51+
52+
/// Set packed 32-bit integers in `dst` with the supplied values in reverse
53+
/// order.
54+
#[inline]
55+
#[target_feature(enable = "avx512f")]
56+
pub unsafe fn _mm512_setr_epi32(
57+
e15: i32,
58+
e14: i32,
59+
e13: i32,
60+
e12: i32,
61+
e11: i32,
62+
e10: i32,
63+
e9: i32,
64+
e8: i32,
65+
e7: i32,
66+
e6: i32,
67+
e5: i32,
68+
e4: i32,
69+
e3: i32,
70+
e2: i32,
71+
e1: i32,
72+
e0: i32,
73+
) -> __m512i {
74+
let r = i32x16(
75+
e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0,
76+
);
77+
mem::transmute(r)
78+
}
79+
80+
#[allow(improper_ctypes)]
81+
extern "C" {
82+
#[link_name = "llvm.x86.avx512.mask.pabs.d.512"]
83+
fn pabsd(a: i32x16, b: i32x16, c: i16) -> i32x16;
84+
}
85+
86+
#[cfg(test)]
87+
mod tests {
88+
use std;
89+
use stdsimd_test::simd_test;
90+
91+
use coresimd::x86::*;
92+
93+
#[simd_test(enable = "avx512f")]
94+
unsafe fn test_mm512_abs_epi32() {
95+
#[rustfmt::skip]
96+
let a = _mm512_setr_epi32(
97+
0, 1, -1, std::i32::MAX,
98+
std::i32::MIN, 100, -100, -32,
99+
0, 1, -1, std::i32::MAX,
100+
std::i32::MIN, 100, -100, -32,
101+
);
102+
let r = _mm512_abs_epi32(a);
103+
let e = _mm512_setr_epi32(
104+
0,
105+
1,
106+
1,
107+
std::i32::MAX,
108+
std::i32::MAX.wrapping_add(1),
109+
100,
110+
100,
111+
32,
112+
0,
113+
1,
114+
1,
115+
std::i32::MAX,
116+
std::i32::MAX.wrapping_add(1),
117+
100,
118+
100,
119+
32,
120+
);
121+
assert_eq_m512i(r, e);
122+
}
123+
124+
#[simd_test(enable = "avx512f")]
125+
unsafe fn test_mm512_mask_abs_epi32() {
126+
#[rustfmt::skip]
127+
let a = _mm512_setr_epi32(
128+
0, 1, -1, std::i32::MAX,
129+
std::i32::MIN, 100, -100, -32,
130+
0, 1, -1, std::i32::MAX,
131+
std::i32::MIN, 100, -100, -32,
132+
);
133+
let r = _mm512_mask_abs_epi32(a, 0, a);
134+
assert_eq_m512i(r, a);
135+
let r = _mm512_mask_abs_epi32(a, 0b11111111, a);
136+
let e = _mm512_setr_epi32(
137+
0,
138+
1,
139+
1,
140+
std::i32::MAX,
141+
std::i32::MAX.wrapping_add(1),
142+
100,
143+
100,
144+
32,
145+
0,
146+
1,
147+
-1,
148+
std::i32::MAX,
149+
std::i32::MIN,
150+
100,
151+
-100,
152+
-32,
153+
);
154+
assert_eq_m512i(r, e);
155+
}
156+
157+
#[simd_test(enable = "avx512f")]
158+
unsafe fn test_mm512_maskz_abs_epi32() {
159+
#[rustfmt::skip]
160+
let a = _mm512_setr_epi32(
161+
0, 1, -1, std::i32::MAX,
162+
std::i32::MIN, 100, -100, -32,
163+
0, 1, -1, std::i32::MAX,
164+
std::i32::MIN, 100, -100, -32,
165+
);
166+
let r = _mm512_maskz_abs_epi32(0, a);
167+
assert_eq_m512i(r, _mm512_setzero_si512());
168+
let r = _mm512_maskz_abs_epi32(0b11111111, a);
169+
let e = _mm512_setr_epi32(
170+
0,
171+
1,
172+
1,
173+
std::i32::MAX,
174+
std::i32::MAX.wrapping_add(1),
175+
100,
176+
100,
177+
32,
178+
0,
179+
0,
180+
0,
181+
0,
182+
0,
183+
0,
184+
0,
185+
0,
186+
);
187+
assert_eq_m512i(r, e);
188+
}
189+
}

coresimd/x86/mod.rs

+25
Original file line numberDiff line numberDiff line change
@@ -391,6 +391,10 @@ types! {
391391
pub struct __m512d(f64, f64, f64, f64, f64, f64, f64, f64);
392392
}
393393

394+
/// The `__mmask16` type used in AVX-512 intrinsics, a 16-bit integer
395+
#[allow(non_camel_case_types)]
396+
pub type __mmask16 = i16;
397+
394398
#[cfg(test)]
395399
mod test;
396400
#[cfg(test)]
@@ -502,6 +506,24 @@ impl m256iExt for __m256i {
502506
}
503507
}
504508

509+
#[allow(non_camel_case_types)]
510+
#[unstable(feature = "stdimd_internal", issue = "0")]
511+
pub(crate) trait m512iExt: Sized {
512+
fn as_m512i(self) -> __m512i;
513+
514+
#[inline]
515+
fn as_i32x16(self) -> ::coresimd::simd::i32x16 {
516+
unsafe { mem::transmute(self.as_m512i()) }
517+
}
518+
}
519+
520+
impl m512iExt for __m512i {
521+
#[inline]
522+
fn as_m512i(self) -> Self {
523+
self
524+
}
525+
}
526+
505527
mod eflags;
506528
pub use self::eflags::*;
507529

@@ -580,3 +602,6 @@ use stdsimd_test::assert_instr;
580602
pub unsafe fn ud2() -> ! {
581603
::intrinsics::abort()
582604
}
605+
606+
mod avx512f;
607+
pub use self::avx512f::*;

coresimd/x86/test.rs

+8
Original file line numberDiff line numberDiff line change
@@ -135,3 +135,11 @@ mod x86_polyfill {
135135
pub use coresimd::x86_64::{_mm256_insert_epi64, _mm_insert_epi64};
136136
}
137137
pub use self::x86_polyfill::*;
138+
139+
pub unsafe fn assert_eq_m512i(a: __m512i, b: __m512i) {
140+
union A {
141+
a: __m512i,
142+
b: [i32; 16],
143+
}
144+
assert_eq!(A { a }.b, A { a: b }.b)
145+
}

crates/coresimd/src/lib.rs

+1
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@
3333
sse4a_target_feature,
3434
arm_target_feature,
3535
aarch64_target_feature,
36+
avx512_target_feature,
3637
mips_target_feature,
3738
powerpc_target_feature,
3839
wasm_target_feature

crates/stdsimd-verify/src/lib.rs

+4
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,10 @@ fn to_type(t: &syn::Type) -> proc_macro2::TokenStream {
9898
"__m256" => quote! { &M256 },
9999
"__m256d" => quote! { &M256D },
100100
"__m256i" => quote! { &M256I },
101+
"__m512" => quote! { &M512 },
102+
"__m512d" => quote! { &M512D },
103+
"__m512i" => quote! { &M512I },
104+
"__mmask16" => quote! { &MMASK16 },
101105
"__m64" => quote! { &M64 },
102106
"bool" => quote! { &BOOL },
103107
"f32" => quote! { &F32 },

crates/stdsimd-verify/tests/x86-intel.rs

+17
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,10 @@ static M128D: Type = Type::M128D;
5454
static M256: Type = Type::M256;
5555
static M256I: Type = Type::M256I;
5656
static M256D: Type = Type::M256D;
57+
static M512: Type = Type::M512;
58+
static M512I: Type = Type::M512I;
59+
static M512D: Type = Type::M512D;
60+
static MMASK16: Type = Type::MMASK16;
5761

5862
static TUPLE: Type = Type::Tuple;
5963
static CPUID: Type = Type::CpuidResult;
@@ -72,6 +76,10 @@ enum Type {
7276
M256,
7377
M256D,
7478
M256I,
79+
M512,
80+
M512D,
81+
M512I,
82+
MMASK16,
7583
Tuple,
7684
CpuidResult,
7785
Never,
@@ -422,6 +430,15 @@ fn equate(t: &Type, intel: &str, intrinsic: &str, is_const: bool) -> Result<(),
422430
| (&Type::M256, "__m256")
423431
| (&Type::Ptr(&Type::M256), "__m256*") => {}
424432

433+
(&Type::M512I, "__m512i")
434+
| (&Type::Ptr(&Type::M512I), "__m512i*")
435+
| (&Type::M512D, "__m512d")
436+
| (&Type::Ptr(&Type::M512D), "__m512d*")
437+
| (&Type::M512, "__m512")
438+
| (&Type::Ptr(&Type::M512), "__m512*") => {}
439+
440+
(&Type::MMASK16, "__mmask16") => {}
441+
425442
// This is a macro (?) in C which seems to mutate its arguments, but
426443
// that means that we're taking pointers to arguments in rust
427444
// as we're not exposing it as a macro.

0 commit comments

Comments
 (0)