Skip to content

Commit c3960ea

Browse files
authored
Merge pull request #1042 from minybot/avx512
1 parent 6515d14 commit c3960ea

File tree

6 files changed

+2633
-3790
lines changed

6 files changed

+2633
-3790
lines changed

crates/core_arch/src/x86/avx2.rs

+18-67
Original file line numberDiff line numberDiff line change
@@ -2642,74 +2642,25 @@ pub unsafe fn _mm256_shuffle_epi8(a: __m256i, b: __m256i) -> __m256i {
26422642
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_shuffle_epi32)
26432643
#[inline]
26442644
#[target_feature(enable = "avx2")]
2645-
#[cfg_attr(test, assert_instr(vpermilps, imm8 = 9))]
2646-
#[rustc_args_required_const(1)]
2645+
#[cfg_attr(test, assert_instr(vpermilps, MASK = 9))]
2646+
#[rustc_legacy_const_generics(1)]
26472647
#[stable(feature = "simd_x86", since = "1.27.0")]
2648-
pub unsafe fn _mm256_shuffle_epi32(a: __m256i, imm8: i32) -> __m256i {
2649-
// simd_shuffleX requires that its selector parameter be made up of
2650-
// constant values, but we can't enforce that here. In spirit, we need
2651-
// to write a `match` on all possible values of a byte, and for each value,
2652-
// hard-code the correct `simd_shuffleX` call using only constants. We
2653-
// then hope for LLVM to do the rest.
2654-
//
2655-
// Of course, that's... awful. So we try to use macros to do it for us.
2656-
let imm8 = (imm8 & 0xFF) as u8;
2657-
2658-
let a = a.as_i32x8();
2659-
macro_rules! shuffle_done {
2660-
($x01:expr, $x23:expr, $x45:expr, $x67:expr) => {
2661-
simd_shuffle8(
2662-
a,
2663-
a,
2664-
[
2665-
$x01,
2666-
$x23,
2667-
$x45,
2668-
$x67,
2669-
4 + $x01,
2670-
4 + $x23,
2671-
4 + $x45,
2672-
4 + $x67,
2673-
],
2674-
)
2675-
};
2676-
}
2677-
macro_rules! shuffle_x67 {
2678-
($x01:expr, $x23:expr, $x45:expr) => {
2679-
match (imm8 >> 6) & 0b11 {
2680-
0b00 => shuffle_done!($x01, $x23, $x45, 0),
2681-
0b01 => shuffle_done!($x01, $x23, $x45, 1),
2682-
0b10 => shuffle_done!($x01, $x23, $x45, 2),
2683-
_ => shuffle_done!($x01, $x23, $x45, 3),
2684-
}
2685-
};
2686-
}
2687-
macro_rules! shuffle_x45 {
2688-
($x01:expr, $x23:expr) => {
2689-
match (imm8 >> 4) & 0b11 {
2690-
0b00 => shuffle_x67!($x01, $x23, 0),
2691-
0b01 => shuffle_x67!($x01, $x23, 1),
2692-
0b10 => shuffle_x67!($x01, $x23, 2),
2693-
_ => shuffle_x67!($x01, $x23, 3),
2694-
}
2695-
};
2696-
}
2697-
macro_rules! shuffle_x23 {
2698-
($x01:expr) => {
2699-
match (imm8 >> 2) & 0b11 {
2700-
0b00 => shuffle_x45!($x01, 0),
2701-
0b01 => shuffle_x45!($x01, 1),
2702-
0b10 => shuffle_x45!($x01, 2),
2703-
_ => shuffle_x45!($x01, 3),
2704-
}
2705-
};
2706-
}
2707-
let r: i32x8 = match imm8 & 0b11 {
2708-
0b00 => shuffle_x23!(0),
2709-
0b01 => shuffle_x23!(1),
2710-
0b10 => shuffle_x23!(2),
2711-
_ => shuffle_x23!(3),
2712-
};
2648+
pub unsafe fn _mm256_shuffle_epi32<const MASK: i32>(a: __m256i) -> __m256i {
2649+
static_assert_imm8!(MASK);
2650+
let r: i32x8 = simd_shuffle8(
2651+
a.as_i32x8(),
2652+
a.as_i32x8(),
2653+
[
2654+
MASK as u32 & 0b11,
2655+
(MASK as u32 >> 2) & 0b11,
2656+
(MASK as u32 >> 4) & 0b11,
2657+
(MASK as u32 >> 6) & 0b11,
2658+
(MASK as u32 & 0b11) + 4,
2659+
((MASK as u32 >> 2) & 0b11) + 4,
2660+
((MASK as u32 >> 4) & 0b11) + 4,
2661+
((MASK as u32 >> 6) & 0b11) + 4,
2662+
],
2663+
);
27132664
transmute(r)
27142665
}
27152666

0 commit comments

Comments
 (0)