diff --git a/crates/core_arch/src/x86/avx2.rs b/crates/core_arch/src/x86/avx2.rs index 21f20f9c75..2e83400633 100644 --- a/crates/core_arch/src/x86/avx2.rs +++ b/crates/core_arch/src/x86/avx2.rs @@ -2961,140 +2961,54 @@ pub fn _mm256_srli_si256(a: __m256i) -> __m256i { #[stable(feature = "simd_x86", since = "1.27.0")] pub fn _mm256_bsrli_epi128(a: __m256i) -> __m256i { static_assert_uimm_bits!(IMM8, 8); + const fn mask(shift: i32, i: u32) -> u32 { + let shift = shift as u32 & 0xff; + if shift > 15 || (15 - (i % 16)) < shift { + 0 + } else { + 32 + (i + shift) + } + } unsafe { let a = a.as_i8x32(); - let zero = i8x32::ZERO; - let r: i8x32 = match IMM8 % 16 { - 0 => simd_shuffle!( - a, - zero, - [ - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, - 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, - ], - ), - 1 => simd_shuffle!( - a, - zero, - [ - 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 32, 17, 18, 19, 20, 21, 22, - 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, - ], - ), - 2 => simd_shuffle!( - a, - zero, - [ - 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 32, 32, 18, 19, 20, 21, 22, 23, - 24, 25, 26, 27, 28, 29, 30, 31, 32, 32, - ], - ), - 3 => simd_shuffle!( - a, - zero, - [ - 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 32, 32, 32, 19, 20, 21, 22, 23, - 24, 25, 26, 27, 28, 29, 30, 31, 32, 32, 32, - ], - ), - 4 => simd_shuffle!( - a, - zero, - [ - 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 32, 32, 32, 32, 20, 21, 22, 23, 24, - 25, 26, 27, 28, 29, 30, 31, 32, 32, 32, 32, - ], - ), - 5 => simd_shuffle!( - a, - zero, - [ - 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 32, 32, 32, 32, 32, 21, 22, 23, 24, 25, - 26, 27, 28, 29, 30, 31, 32, 32, 32, 32, 32, - ], - ), - 6 => simd_shuffle!( - a, - zero, - [ - 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 32, 32, 32, 32, 32, 32, 22, 23, 24, 25, 26, - 27, 28, 29, 30, 31, 32, 32, 32, 32, 32, 32, - ], - ), - 7 => simd_shuffle!( - a, - zero, - [ - 7, 8, 9, 10, 11, 12, 13, 14, 15, 32, 32, 32, 32, 32, 32, 32, 23, 24, 25, 26, - 27, 28, 29, 30, 31, 32, 32, 32, 32, 32, 32, 32, - ], - ), - 8 => simd_shuffle!( - a, - zero, - [ - 8, 9, 10, 11, 12, 13, 14, 15, 32, 32, 32, 32, 32, 32, 32, 32, 24, 25, 26, 27, - 28, 29, 30, 31, 32, 32, 32, 32, 32, 32, 32, 32, - ], - ), - 9 => simd_shuffle!( - a, - zero, - [ - 9, 10, 11, 12, 13, 14, 15, 32, 32, 32, 32, 32, 32, 32, 32, 32, 25, 26, 27, 28, - 29, 30, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, - ], - ), - 10 => simd_shuffle!( - a, - zero, - [ - 10, 11, 12, 13, 14, 15, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 26, 27, 28, 29, - 30, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - ], - ), - 11 => simd_shuffle!( - a, - zero, - [ - 11, 12, 13, 14, 15, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 27, 28, 29, 30, - 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - ], - ), - 12 => simd_shuffle!( - a, - zero, - [ - 12, 13, 14, 15, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 28, 29, 30, 31, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - ], - ), - 13 => simd_shuffle!( - a, - zero, - [ - 13, 14, 15, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 29, 30, 31, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - ], - ), - 14 => simd_shuffle!( - a, - zero, - [ - 14, 15, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 30, 31, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - ], - ), - 15 => simd_shuffle!( - a, - zero, - [ - 15, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 31, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - ], - ), - _ => zero, - }; + let r: i8x32 = simd_shuffle!( + i8x32::ZERO, + a, + [ + mask(IMM8, 0), + mask(IMM8, 1), + mask(IMM8, 2), + mask(IMM8, 3), + mask(IMM8, 4), + mask(IMM8, 5), + mask(IMM8, 6), + mask(IMM8, 7), + mask(IMM8, 8), + mask(IMM8, 9), + mask(IMM8, 10), + mask(IMM8, 11), + mask(IMM8, 12), + mask(IMM8, 13), + mask(IMM8, 14), + mask(IMM8, 15), + mask(IMM8, 16), + mask(IMM8, 17), + mask(IMM8, 18), + mask(IMM8, 19), + mask(IMM8, 20), + mask(IMM8, 21), + mask(IMM8, 22), + mask(IMM8, 23), + mask(IMM8, 24), + mask(IMM8, 25), + mask(IMM8, 26), + mask(IMM8, 27), + mask(IMM8, 28), + mask(IMM8, 29), + mask(IMM8, 30), + mask(IMM8, 31), + ], + ); transmute(r) } } diff --git a/crates/core_arch/src/x86/avx512bw.rs b/crates/core_arch/src/x86/avx512bw.rs index b60b0b7079..ba3c28cd5f 100644 --- a/crates/core_arch/src/x86/avx512bw.rs +++ b/crates/core_arch/src/x86/avx512bw.rs @@ -11208,203 +11208,86 @@ pub fn _mm512_bslli_epi128(a: __m512i) -> __m512i { pub fn _mm512_bsrli_epi128(a: __m512i) -> __m512i { unsafe { static_assert_uimm_bits!(IMM8, 8); + const fn mask(shift: i32, i: u32) -> u32 { + let shift = shift as u32 & 0xff; + if shift > 15 || (15 - (i % 16)) < shift { + 0 + } else { + 64 + (i + shift) + } + } let a = a.as_i8x64(); let zero = i8x64::ZERO; - let r: i8x64 = match IMM8 % 16 { - 0 => { - simd_shuffle!( - a, - zero, - [ - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, - 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, - 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, - 59, 60, 61, 62, 63, - ], - ) - } - 1 => { - simd_shuffle!( - a, - zero, - [ - 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 64, 17, 18, 19, 20, 21, - 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 80, 33, 34, 35, 36, 37, 38, 39, 40, - 41, 42, 43, 44, 45, 46, 47, 96, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, - 60, 61, 62, 63, 112, - ], - ) - } - 2 => { - simd_shuffle!( - a, - zero, - [ - 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 64, 65, 18, 19, 20, 21, 22, - 23, 24, 25, 26, 27, 28, 29, 30, 31, 80, 81, 34, 35, 36, 37, 38, 39, 40, 41, - 42, 43, 44, 45, 46, 47, 96, 97, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, - 61, 62, 63, 112, 113, - ], - ) - } - 3 => { - simd_shuffle!( - a, - zero, - [ - 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 64, 65, 66, 19, 20, 21, 22, - 23, 24, 25, 26, 27, 28, 29, 30, 31, 80, 81, 82, 35, 36, 37, 38, 39, 40, 41, - 42, 43, 44, 45, 46, 47, 96, 97, 98, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, - 61, 62, 63, 112, 113, 114, - ], - ) - } - 4 => { - simd_shuffle!( - a, - zero, - [ - 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 64, 65, 66, 67, 20, 21, 22, 23, - 24, 25, 26, 27, 28, 29, 30, 31, 80, 81, 82, 83, 36, 37, 38, 39, 40, 41, 42, - 43, 44, 45, 46, 47, 96, 97, 98, 99, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, - 62, 63, 112, 113, 114, 115, - ], - ) - } - 5 => { - simd_shuffle!( - a, - zero, - [ - 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 64, 65, 66, 67, 68, 21, 22, 23, 24, - 25, 26, 27, 28, 29, 30, 31, 80, 81, 82, 83, 84, 37, 38, 39, 40, 41, 42, 43, - 44, 45, 46, 47, 96, 97, 98, 99, 100, 53, 54, 55, 56, 57, 58, 59, 60, 61, - 62, 63, 112, 113, 114, 115, 116, - ], - ) - } - 6 => { - simd_shuffle!( - a, - zero, - [ - 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 64, 65, 66, 67, 68, 69, 22, 23, 24, 25, - 26, 27, 28, 29, 30, 31, 80, 81, 82, 83, 84, 85, 38, 39, 40, 41, 42, 43, 44, - 45, 46, 47, 96, 97, 98, 99, 100, 101, 54, 55, 56, 57, 58, 59, 60, 61, 62, - 63, 112, 113, 114, 115, 116, 117, - ], - ) - } - 7 => { - simd_shuffle!( - a, - zero, - [ - 7, 8, 9, 10, 11, 12, 13, 14, 15, 64, 65, 66, 67, 68, 69, 70, 23, 24, 25, - 26, 27, 28, 29, 30, 31, 80, 81, 82, 83, 84, 85, 86, 39, 40, 41, 42, 43, 44, - 45, 46, 47, 96, 97, 98, 99, 100, 101, 102, 55, 56, 57, 58, 59, 60, 61, 62, - 63, 112, 113, 114, 115, 116, 117, 118, - ], - ) - } - 8 => { - simd_shuffle!( - a, - zero, - [ - 8, 9, 10, 11, 12, 13, 14, 15, 64, 65, 66, 67, 68, 69, 70, 71, 24, 25, 26, - 27, 28, 29, 30, 31, 80, 81, 82, 83, 84, 85, 86, 87, 40, 41, 42, 43, 44, 45, - 46, 47, 96, 97, 98, 99, 100, 101, 102, 103, 56, 57, 58, 59, 60, 61, 62, 63, - 112, 113, 114, 115, 116, 117, 118, 119, - ], - ) - } - 9 => { - simd_shuffle!( - a, - zero, - [ - 9, 10, 11, 12, 13, 14, 15, 64, 65, 66, 67, 68, 69, 70, 71, 72, 25, 26, 27, - 28, 29, 30, 31, 80, 81, 82, 83, 84, 85, 86, 87, 88, 41, 42, 43, 44, 45, 46, - 47, 96, 97, 98, 99, 100, 101, 102, 103, 104, 57, 58, 59, 60, 61, 62, 63, - 112, 113, 114, 115, 116, 117, 118, 119, 120, - ], - ) - } - 10 => { - simd_shuffle!( - a, - zero, - [ - 10, 11, 12, 13, 14, 15, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 26, 27, 28, - 29, 30, 31, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 42, 43, 44, 45, 46, 47, - 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 58, 59, 60, 61, 62, 63, 112, - 113, 114, 115, 116, 117, 118, 119, 120, 121, - ], - ) - } - 11 => { - simd_shuffle!( - a, - zero, - [ - 11, 12, 13, 14, 15, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 27, 28, 29, - 30, 31, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 43, 44, 45, 46, 47, 96, - 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 59, 60, 61, 62, 63, 112, - 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, - ], - ) - } - 12 => { - simd_shuffle!( - a, - zero, - [ - 12, 13, 14, 15, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 28, 29, 30, - 31, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 44, 45, 46, 47, 96, 97, - 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 60, 61, 62, 63, 112, 113, - 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, - ], - ) - } - 13 => { - simd_shuffle!( - a, - zero, - [ - 13, 14, 15, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 29, 30, 31, - 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 45, 46, 47, 96, 97, 98, - 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 61, 62, 63, 112, 113, 114, - 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, - ], - ) - } - 14 => { - simd_shuffle!( - a, - zero, - [ - 14, 15, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 30, 31, 80, - 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 46, 47, 96, 97, 98, 99, - 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 62, 63, 112, 113, 114, - 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, - ], - ) - } - 15 => { - simd_shuffle!( - a, - zero, - [ - 15, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 31, 80, 81, - 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 47, 96, 97, 98, 99, - 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 63, 112, 113, 114, - 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, - ], - ) - } - _ => zero, - }; + let r: i8x64 = simd_shuffle!( + zero, + a, + [ + mask(IMM8, 0), + mask(IMM8, 1), + mask(IMM8, 2), + mask(IMM8, 3), + mask(IMM8, 4), + mask(IMM8, 5), + mask(IMM8, 6), + mask(IMM8, 7), + mask(IMM8, 8), + mask(IMM8, 9), + mask(IMM8, 10), + mask(IMM8, 11), + mask(IMM8, 12), + mask(IMM8, 13), + mask(IMM8, 14), + mask(IMM8, 15), + mask(IMM8, 16), + mask(IMM8, 17), + mask(IMM8, 18), + mask(IMM8, 19), + mask(IMM8, 20), + mask(IMM8, 21), + mask(IMM8, 22), + mask(IMM8, 23), + mask(IMM8, 24), + mask(IMM8, 25), + mask(IMM8, 26), + mask(IMM8, 27), + mask(IMM8, 28), + mask(IMM8, 29), + mask(IMM8, 30), + mask(IMM8, 31), + mask(IMM8, 32), + mask(IMM8, 33), + mask(IMM8, 34), + mask(IMM8, 35), + mask(IMM8, 36), + mask(IMM8, 37), + mask(IMM8, 38), + mask(IMM8, 39), + mask(IMM8, 40), + mask(IMM8, 41), + mask(IMM8, 42), + mask(IMM8, 43), + mask(IMM8, 44), + mask(IMM8, 45), + mask(IMM8, 46), + mask(IMM8, 47), + mask(IMM8, 48), + mask(IMM8, 49), + mask(IMM8, 50), + mask(IMM8, 51), + mask(IMM8, 52), + mask(IMM8, 53), + mask(IMM8, 54), + mask(IMM8, 55), + mask(IMM8, 56), + mask(IMM8, 57), + mask(IMM8, 58), + mask(IMM8, 59), + mask(IMM8, 60), + mask(IMM8, 61), + mask(IMM8, 62), + mask(IMM8, 63), + ], + ); transmute(r) } }