Skip to content

Commit 87a28c5

Browse files
TDeckingAmanieu
authored andcommitted
Refactor avx512bw: max/min
1 parent a64ad45 commit 87a28c5

File tree

1 file changed

+24
-26
lines changed

1 file changed

+24
-26
lines changed

crates/core_arch/src/x86/avx512bw.rs

+24-26
Original file line numberDiff line numberDiff line change
@@ -1630,7 +1630,9 @@ pub unsafe fn _mm_maskz_mullo_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m1
16301630
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16311631
#[cfg_attr(test, assert_instr(vpmaxuw))]
16321632
pub unsafe fn _mm512_max_epu16(a: __m512i, b: __m512i) -> __m512i {
1633-
transmute(vpmaxuw(a.as_u16x32(), b.as_u16x32()))
1633+
let a = a.as_u16x32();
1634+
let b = b.as_u16x32();
1635+
transmute(simd_select::<i16x32, _>(simd_gt(a, b), a, b))
16341636
}
16351637

16361638
/// Compare packed unsigned 16-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
@@ -1716,7 +1718,9 @@ pub unsafe fn _mm_maskz_max_epu16(k: __mmask8, a: __m128i, b: __m128i) -> __m128
17161718
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17171719
#[cfg_attr(test, assert_instr(vpmaxub))]
17181720
pub unsafe fn _mm512_max_epu8(a: __m512i, b: __m512i) -> __m512i {
1719-
transmute(vpmaxub(a.as_u8x64(), b.as_u8x64()))
1721+
let a = a.as_u8x64();
1722+
let b = b.as_u8x64();
1723+
transmute(simd_select::<i8x64, _>(simd_gt(a, b), a, b))
17201724
}
17211725

17221726
/// Compare packed unsigned 8-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
@@ -1802,7 +1806,9 @@ pub unsafe fn _mm_maskz_max_epu8(k: __mmask16, a: __m128i, b: __m128i) -> __m128
18021806
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18031807
#[cfg_attr(test, assert_instr(vpmaxsw))]
18041808
pub unsafe fn _mm512_max_epi16(a: __m512i, b: __m512i) -> __m512i {
1805-
transmute(vpmaxsw(a.as_i16x32(), b.as_i16x32()))
1809+
let a = a.as_i16x32();
1810+
let b = b.as_i16x32();
1811+
transmute(simd_select::<i16x32, _>(simd_gt(a, b), a, b))
18061812
}
18071813

18081814
/// Compare packed signed 16-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
@@ -1888,7 +1894,9 @@ pub unsafe fn _mm_maskz_max_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128
18881894
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18891895
#[cfg_attr(test, assert_instr(vpmaxsb))]
18901896
pub unsafe fn _mm512_max_epi8(a: __m512i, b: __m512i) -> __m512i {
1891-
transmute(vpmaxsb(a.as_i8x64(), b.as_i8x64()))
1897+
let a = a.as_i8x64();
1898+
let b = b.as_i8x64();
1899+
transmute(simd_select::<i8x64, _>(simd_gt(a, b), a, b))
18921900
}
18931901

18941902
/// Compare packed signed 8-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
@@ -1974,7 +1982,9 @@ pub unsafe fn _mm_maskz_max_epi8(k: __mmask16, a: __m128i, b: __m128i) -> __m128
19741982
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19751983
#[cfg_attr(test, assert_instr(vpminuw))]
19761984
pub unsafe fn _mm512_min_epu16(a: __m512i, b: __m512i) -> __m512i {
1977-
transmute(vpminuw(a.as_u16x32(), b.as_u16x32()))
1985+
let a = a.as_u16x32();
1986+
let b = b.as_u16x32();
1987+
transmute(simd_select::<i16x32, _>(simd_lt(a, b), a, b))
19781988
}
19791989

19801990
/// Compare packed unsigned 16-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
@@ -2060,7 +2070,9 @@ pub unsafe fn _mm_maskz_min_epu16(k: __mmask8, a: __m128i, b: __m128i) -> __m128
20602070
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20612071
#[cfg_attr(test, assert_instr(vpminub))]
20622072
pub unsafe fn _mm512_min_epu8(a: __m512i, b: __m512i) -> __m512i {
2063-
transmute(vpminub(a.as_u8x64(), b.as_u8x64()))
2073+
let a = a.as_u8x64();
2074+
let b = b.as_u8x64();
2075+
transmute(simd_select::<i8x64, _>(simd_lt(a, b), a, b))
20642076
}
20652077

20662078
/// Compare packed unsigned 8-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
@@ -2146,7 +2158,9 @@ pub unsafe fn _mm_maskz_min_epu8(k: __mmask16, a: __m128i, b: __m128i) -> __m128
21462158
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21472159
#[cfg_attr(test, assert_instr(vpminsw))]
21482160
pub unsafe fn _mm512_min_epi16(a: __m512i, b: __m512i) -> __m512i {
2149-
transmute(vpminsw(a.as_i16x32(), b.as_i16x32()))
2161+
let a = a.as_i16x32();
2162+
let b = b.as_i16x32();
2163+
transmute(simd_select::<i16x32, _>(simd_lt(a, b), a, b))
21502164
}
21512165

21522166
/// Compare packed signed 16-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
@@ -2232,7 +2246,9 @@ pub unsafe fn _mm_maskz_min_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128
22322246
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22332247
#[cfg_attr(test, assert_instr(vpminsb))]
22342248
pub unsafe fn _mm512_min_epi8(a: __m512i, b: __m512i) -> __m512i {
2235-
transmute(vpminsb(a.as_i8x64(), b.as_i8x64()))
2249+
let a = a.as_i8x64();
2250+
let b = b.as_i8x64();
2251+
transmute(simd_select::<i8x64, _>(simd_lt(a, b), a, b))
22362252
}
22372253

22382254
/// Compare packed signed 8-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
@@ -10504,24 +10520,6 @@ extern "C" {
1050410520
#[link_name = "llvm.x86.avx512.mask.cmp.b.128"]
1050510521
fn vpcmpb128(a: i8x16, b: i8x16, op: i32, mask: u16) -> u16;
1050610522

10507-
#[link_name = "llvm.x86.avx512.mask.pmaxu.w.512"]
10508-
fn vpmaxuw(a: u16x32, b: u16x32) -> u16x32;
10509-
#[link_name = "llvm.x86.avx512.mask.pmaxu.b.512"]
10510-
fn vpmaxub(a: u8x64, b: u8x64) -> u8x64;
10511-
#[link_name = "llvm.x86.avx512.mask.pmaxs.w.512"]
10512-
fn vpmaxsw(a: i16x32, b: i16x32) -> i16x32;
10513-
#[link_name = "llvm.x86.avx512.mask.pmaxs.b.512"]
10514-
fn vpmaxsb(a: i8x64, b: i8x64) -> i8x64;
10515-
10516-
#[link_name = "llvm.x86.avx512.mask.pminu.w.512"]
10517-
fn vpminuw(a: u16x32, b: u16x32) -> u16x32;
10518-
#[link_name = "llvm.x86.avx512.mask.pminu.b.512"]
10519-
fn vpminub(a: u8x64, b: u8x64) -> u8x64;
10520-
#[link_name = "llvm.x86.avx512.mask.pmins.w.512"]
10521-
fn vpminsw(a: i16x32, b: i16x32) -> i16x32;
10522-
#[link_name = "llvm.x86.avx512.mask.pmins.b.512"]
10523-
fn vpminsb(a: i8x64, b: i8x64) -> i8x64;
10524-
1052510523
#[link_name = "llvm.x86.avx512.pmaddw.d.512"]
1052610524
fn vpmaddwd(a: i16x32, b: i16x32) -> i32x16;
1052710525
#[link_name = "llvm.x86.avx512.pmaddubs.w.512"]

0 commit comments

Comments
 (0)