@@ -2642,74 +2642,25 @@ pub unsafe fn _mm256_shuffle_epi8(a: __m256i, b: __m256i) -> __m256i {
2642
2642
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_shuffle_epi32)
2643
2643
#[ inline]
2644
2644
#[ target_feature( enable = "avx2" ) ]
2645
- #[ cfg_attr( test, assert_instr( vpermilps, imm8 = 9 ) ) ]
2646
- #[ rustc_args_required_const ( 1 ) ]
2645
+ #[ cfg_attr( test, assert_instr( vpermilps, MASK = 9 ) ) ]
2646
+ #[ rustc_legacy_const_generics ( 1 ) ]
2647
2647
#[ stable( feature = "simd_x86" , since = "1.27.0" ) ]
2648
- pub unsafe fn _mm256_shuffle_epi32 ( a : __m256i , imm8 : i32 ) -> __m256i {
2649
- // simd_shuffleX requires that its selector parameter be made up of
2650
- // constant values, but we can't enforce that here. In spirit, we need
2651
- // to write a `match` on all possible values of a byte, and for each value,
2652
- // hard-code the correct `simd_shuffleX` call using only constants. We
2653
- // then hope for LLVM to do the rest.
2654
- //
2655
- // Of course, that's... awful. So we try to use macros to do it for us.
2656
- let imm8 = ( imm8 & 0xFF ) as u8 ;
2657
-
2658
- let a = a. as_i32x8 ( ) ;
2659
- macro_rules! shuffle_done {
2660
- ( $x01: expr, $x23: expr, $x45: expr, $x67: expr) => {
2661
- simd_shuffle8(
2662
- a,
2663
- a,
2664
- [
2665
- $x01,
2666
- $x23,
2667
- $x45,
2668
- $x67,
2669
- 4 + $x01,
2670
- 4 + $x23,
2671
- 4 + $x45,
2672
- 4 + $x67,
2673
- ] ,
2674
- )
2675
- } ;
2676
- }
2677
- macro_rules! shuffle_x67 {
2678
- ( $x01: expr, $x23: expr, $x45: expr) => {
2679
- match ( imm8 >> 6 ) & 0b11 {
2680
- 0b00 => shuffle_done!( $x01, $x23, $x45, 0 ) ,
2681
- 0b01 => shuffle_done!( $x01, $x23, $x45, 1 ) ,
2682
- 0b10 => shuffle_done!( $x01, $x23, $x45, 2 ) ,
2683
- _ => shuffle_done!( $x01, $x23, $x45, 3 ) ,
2684
- }
2685
- } ;
2686
- }
2687
- macro_rules! shuffle_x45 {
2688
- ( $x01: expr, $x23: expr) => {
2689
- match ( imm8 >> 4 ) & 0b11 {
2690
- 0b00 => shuffle_x67!( $x01, $x23, 0 ) ,
2691
- 0b01 => shuffle_x67!( $x01, $x23, 1 ) ,
2692
- 0b10 => shuffle_x67!( $x01, $x23, 2 ) ,
2693
- _ => shuffle_x67!( $x01, $x23, 3 ) ,
2694
- }
2695
- } ;
2696
- }
2697
- macro_rules! shuffle_x23 {
2698
- ( $x01: expr) => {
2699
- match ( imm8 >> 2 ) & 0b11 {
2700
- 0b00 => shuffle_x45!( $x01, 0 ) ,
2701
- 0b01 => shuffle_x45!( $x01, 1 ) ,
2702
- 0b10 => shuffle_x45!( $x01, 2 ) ,
2703
- _ => shuffle_x45!( $x01, 3 ) ,
2704
- }
2705
- } ;
2706
- }
2707
- let r: i32x8 = match imm8 & 0b11 {
2708
- 0b00 => shuffle_x23 ! ( 0 ) ,
2709
- 0b01 => shuffle_x23 ! ( 1 ) ,
2710
- 0b10 => shuffle_x23 ! ( 2 ) ,
2711
- _ => shuffle_x23 ! ( 3 ) ,
2712
- } ;
2648
+ pub unsafe fn _mm256_shuffle_epi32 < const MASK : i32 > ( a : __m256i ) -> __m256i {
2649
+ static_assert_imm8 ! ( MASK ) ;
2650
+ let r: i32x8 = simd_shuffle8 (
2651
+ a. as_i32x8 ( ) ,
2652
+ a. as_i32x8 ( ) ,
2653
+ [
2654
+ MASK as u32 & 0b11 ,
2655
+ ( MASK as u32 >> 2 ) & 0b11 ,
2656
+ ( MASK as u32 >> 4 ) & 0b11 ,
2657
+ ( MASK as u32 >> 6 ) & 0b11 ,
2658
+ ( MASK as u32 & 0b11 ) + 4 ,
2659
+ ( ( MASK as u32 >> 2 ) & 0b11 ) + 4 ,
2660
+ ( ( MASK as u32 >> 4 ) & 0b11 ) + 4 ,
2661
+ ( ( MASK as u32 >> 6 ) & 0b11 ) + 4 ,
2662
+ ] ,
2663
+ ) ;
2713
2664
transmute ( r)
2714
2665
}
2715
2666
0 commit comments