diff --git a/ci/docker/x86_64-unknown-linux-gnu-emulated/Dockerfile b/ci/docker/x86_64-unknown-linux-gnu-emulated/Dockerfile index 70ec5feecd..fe5fe474ca 100644 --- a/ci/docker/x86_64-unknown-linux-gnu-emulated/Dockerfile +++ b/ci/docker/x86_64-unknown-linux-gnu-emulated/Dockerfile @@ -8,8 +8,9 @@ RUN apt-get update && apt-get install -y --no-install-recommends \ wget \ xz-utils -RUN wget https://downloadmirror.intel.com/843185/sde-external-9.48.0-2024-11-25-lin.tar.xz -RUN tar -xJf sde-external-9.48.0-2024-11-25-lin.tar.xz -ENV CARGO_TARGET_X86_64_UNKNOWN_LINUX_GNU_RUNNER="/sde-external-9.48.0-2024-11-25-lin/sde64 \ +RUN wget http://ci-mirrors.rust-lang.org/stdarch/sde-external-9.48.0-2024-11-25-lin.tar.xz -O sde.tar.xz +RUN mkdir intel-sde +RUN tar -xJf sde.tar.xz --strip-components=1 -C intel-sde +ENV CARGO_TARGET_X86_64_UNKNOWN_LINUX_GNU_RUNNER="/intel-sde/sde64 \ -cpuid-in /checkout/ci/docker/x86_64-unknown-linux-gnu-emulated/cpuid.def \ -rtm-mode full -tsx --" diff --git a/crates/core_arch/src/aarch64/neon/generated.rs b/crates/core_arch/src/aarch64/neon/generated.rs index 485aa80cfe..56e522a316 100644 --- a/crates/core_arch/src/aarch64/neon/generated.rs +++ b/crates/core_arch/src/aarch64/neon/generated.rs @@ -12826,6 +12826,403 @@ pub unsafe fn vld4q_u64(a: *const u64) -> uint64x2x4_t { ret_val.3 = unsafe { simd_shuffle!(ret_val.3, ret_val.3, [1, 0]) }; ret_val } +#[doc = "Lookup table read with 2-bit indices"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vluti2_lane_s8)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,lut")] +#[cfg_attr(test, assert_instr(nop, LANE = 1))] +#[unstable(feature = "stdarch_neon_feat_lut", issue = "138050")] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vluti2_lane_s8(a: int8x8_t, b: uint8x8_t) -> int8x16_t { + static_assert!(LANE >= 0 && LANE <= 1); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.vluti2.lane.v16i8.v8i8" + )] + fn _vluti2_lane_s8(a: int8x8_t, b: uint8x8_t, n: i32) -> int8x16_t; + } + _vluti2_lane_s8(a, b, LANE) +} +#[doc = "Lookup table read with 2-bit indices"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vluti2q_lane_s8)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,lut")] +#[cfg_attr(test, assert_instr(nop, LANE = 1))] +#[unstable(feature = "stdarch_neon_feat_lut", issue = "138050")] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vluti2q_lane_s8(a: int8x16_t, b: uint8x8_t) -> int8x16_t { + static_assert!(LANE >= 0 && LANE <= 1); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.vluti2.lane.v16i8.v16i8" + )] + fn _vluti2q_lane_s8(a: int8x16_t, b: uint8x8_t, n: i32) -> int8x16_t; + } + _vluti2q_lane_s8(a, b, LANE) +} +#[doc = "Lookup table read with 2-bit indices"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vluti2_lane_s16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,lut")] +#[cfg_attr(test, assert_instr(nop, LANE = 1))] +#[unstable(feature = "stdarch_neon_feat_lut", issue = "138050")] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vluti2_lane_s16(a: int16x4_t, b: uint8x8_t) -> int16x8_t { + static_assert!(LANE >= 0 && LANE <= 3); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.vluti2.lane.v8i16.v4i16" + )] + fn _vluti2_lane_s16(a: int16x4_t, b: uint8x8_t, n: i32) -> int16x8_t; + } + _vluti2_lane_s16(a, b, LANE) +} +#[doc = "Lookup table read with 2-bit indices"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vluti2q_lane_s16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,lut")] +#[cfg_attr(test, assert_instr(nop, LANE = 1))] +#[unstable(feature = "stdarch_neon_feat_lut", issue = "138050")] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vluti2q_lane_s16(a: int16x8_t, b: uint8x8_t) -> int16x8_t { + static_assert!(LANE >= 0 && LANE <= 3); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.vluti2.lane.v8i16.v8i16" + )] + fn _vluti2q_lane_s16(a: int16x8_t, b: uint8x8_t, n: i32) -> int16x8_t; + } + _vluti2q_lane_s16(a, b, LANE) +} +#[doc = "Lookup table read with 2-bit indices"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vluti2_lane_u8)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,lut")] +#[cfg_attr(test, assert_instr(nop, LANE = 1))] +#[unstable(feature = "stdarch_neon_feat_lut", issue = "138050")] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vluti2_lane_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x16_t { + static_assert!(LANE >= 0 && LANE <= 1); + transmute(vluti2_lane_s8::(transmute(a), b)) +} +#[doc = "Lookup table read with 2-bit indices"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vluti2q_lane_u8)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,lut")] +#[cfg_attr(test, assert_instr(nop, LANE = 1))] +#[unstable(feature = "stdarch_neon_feat_lut", issue = "138050")] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vluti2q_lane_u8(a: uint8x16_t, b: uint8x8_t) -> uint8x16_t { + static_assert!(LANE >= 0 && LANE <= 1); + transmute(vluti2q_lane_s8::(transmute(a), b)) +} +#[doc = "Lookup table read with 2-bit indices"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vluti2_lane_u16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,lut")] +#[cfg_attr(test, assert_instr(nop, LANE = 1))] +#[unstable(feature = "stdarch_neon_feat_lut", issue = "138050")] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vluti2_lane_u16(a: uint16x4_t, b: uint8x8_t) -> uint16x8_t { + static_assert!(LANE >= 0 && LANE <= 3); + transmute(vluti2_lane_s16::(transmute(a), b)) +} +#[doc = "Lookup table read with 2-bit indices"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vluti2q_lane_u16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,lut")] +#[cfg_attr(test, assert_instr(nop, LANE = 1))] +#[unstable(feature = "stdarch_neon_feat_lut", issue = "138050")] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vluti2q_lane_u16(a: uint16x8_t, b: uint8x8_t) -> uint16x8_t { + static_assert!(LANE >= 0 && LANE <= 3); + transmute(vluti2q_lane_s16::(transmute(a), b)) +} +#[doc = "Lookup table read with 2-bit indices"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vluti2_lane_p8)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,lut")] +#[cfg_attr(test, assert_instr(nop, LANE = 1))] +#[unstable(feature = "stdarch_neon_feat_lut", issue = "138050")] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vluti2_lane_p8(a: poly8x8_t, b: uint8x8_t) -> poly8x16_t { + static_assert!(LANE >= 0 && LANE <= 1); + transmute(vluti2_lane_s8::(transmute(a), b)) +} +#[doc = "Lookup table read with 2-bit indices"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vluti2q_lane_p8)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,lut")] +#[cfg_attr(test, assert_instr(nop, LANE = 1))] +#[unstable(feature = "stdarch_neon_feat_lut", issue = "138050")] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vluti2q_lane_p8(a: poly8x16_t, b: uint8x8_t) -> poly8x16_t { + static_assert!(LANE >= 0 && LANE <= 1); + transmute(vluti2q_lane_s8::(transmute(a), b)) +} +#[doc = "Lookup table read with 2-bit indices"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vluti2_lane_p16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,lut")] +#[cfg_attr(test, assert_instr(nop, LANE = 1))] +#[unstable(feature = "stdarch_neon_feat_lut", issue = "138050")] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vluti2_lane_p16(a: poly16x4_t, b: uint8x8_t) -> poly16x8_t { + static_assert!(LANE >= 0 && LANE <= 3); + transmute(vluti2_lane_s16::(transmute(a), b)) +} +#[doc = "Lookup table read with 2-bit indices"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vluti2q_lane_p16)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,lut")] +#[cfg_attr(test, assert_instr(nop, LANE = 1))] +#[unstable(feature = "stdarch_neon_feat_lut", issue = "138050")] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vluti2q_lane_p16(a: poly16x8_t, b: uint8x8_t) -> poly16x8_t { + static_assert!(LANE >= 0 && LANE <= 3); + transmute(vluti2q_lane_s16::(transmute(a), b)) +} +#[doc = "Lookup table read with 4-bit indices"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vluti4q_lane_f16_x2)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,lut,fp16")] +#[cfg_attr(test, assert_instr(nop, LANE = 0))] +#[unstable(feature = "stdarch_neon_feat_lut", issue = "138050")] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vluti4q_lane_f16_x2(a: float16x8x2_t, b: uint8x8_t) -> float16x8_t { + static_assert!(LANE >= 0 && LANE <= 1); + transmute(vluti4q_lane_s16_x2::(transmute(a), b)) +} +#[doc = "Lookup table read with 4-bit indices"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vluti4q_lane_u16_x2)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,lut")] +#[cfg_attr(test, assert_instr(nop, LANE = 0))] +#[unstable(feature = "stdarch_neon_feat_lut", issue = "138050")] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vluti4q_lane_u16_x2(a: uint16x8x2_t, b: uint8x8_t) -> uint16x8_t { + static_assert!(LANE >= 0 && LANE <= 1); + transmute(vluti4q_lane_s16_x2::(transmute(a), b)) +} +#[doc = "Lookup table read with 4-bit indices"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vluti4q_lane_p16_x2)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,lut")] +#[cfg_attr(test, assert_instr(nop, LANE = 0))] +#[unstable(feature = "stdarch_neon_feat_lut", issue = "138050")] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vluti4q_lane_p16_x2(a: poly16x8x2_t, b: uint8x8_t) -> poly16x8_t { + static_assert!(LANE >= 0 && LANE <= 1); + transmute(vluti4q_lane_s16_x2::(transmute(a), b)) +} +#[doc = "Lookup table read with 4-bit indices"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vluti4q_lane_s16_x2)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,lut")] +#[cfg_attr(test, assert_instr(nop, LANE = 0))] +#[unstable(feature = "stdarch_neon_feat_lut", issue = "138050")] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vluti4q_lane_s16_x2(a: int16x8x2_t, b: uint8x8_t) -> int16x8_t { + static_assert!(LANE >= 0 && LANE <= 1); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.vluti4q.lane.x2.v8i16" + )] + fn _vluti4q_lane_s16_x2(a: int16x8_t, a: int16x8_t, b: uint8x8_t, n: i32) -> int16x8_t; + } + _vluti4q_lane_s16_x2(a.0, a.1, b, LANE) +} +#[doc = "Lookup table read with 4-bit indices"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vluti4q_lane_s8)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,lut")] +#[cfg_attr(test, assert_instr(nop, LANE = 0))] +#[unstable(feature = "stdarch_neon_feat_lut", issue = "138050")] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vluti4q_lane_s8(a: int8x16_t, b: uint8x8_t) -> int8x16_t { + static_assert!(LANE == 0); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.vluti4q.lane.v8i8" + )] + fn _vluti4q_lane_s8(a: int8x16_t, b: uint8x8_t, n: i32) -> int8x16_t; + } + _vluti4q_lane_s8(a, b, LANE) +} +#[doc = "Lookup table read with 4-bit indices"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vluti4q_lane_u8)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,lut")] +#[cfg_attr(test, assert_instr(nop, LANE = 0))] +#[unstable(feature = "stdarch_neon_feat_lut", issue = "138050")] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vluti4q_lane_u8(a: uint8x16_t, b: uint8x8_t) -> uint8x16_t { + static_assert!(LANE == 0); + transmute(vluti4q_lane_s8::(transmute(a), b)) +} +#[doc = "Lookup table read with 4-bit indices"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vluti4q_lane_p8)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,lut")] +#[cfg_attr(test, assert_instr(nop, LANE = 0))] +#[unstable(feature = "stdarch_neon_feat_lut", issue = "138050")] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vluti4q_lane_p8(a: poly8x16_t, b: uint8x8_t) -> poly8x16_t { + static_assert!(LANE == 0); + transmute(vluti4q_lane_s8::(transmute(a), b)) +} +#[doc = "Lookup table read with 4-bit indices"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vluti4q_laneq_f16_x2)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,lut,fp16")] +#[cfg_attr(test, assert_instr(nop, LANE = 3))] +#[unstable(feature = "stdarch_neon_feat_lut", issue = "138050")] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vluti4q_laneq_f16_x2( + a: float16x8x2_t, + b: uint8x16_t, +) -> float16x8_t { + static_assert!(LANE >= 0 && LANE <= 3); + transmute(vluti4q_laneq_s16_x2::(transmute(a), b)) +} +#[doc = "Lookup table read with 4-bit indices"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vluti4q_laneq_u16_x2)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,lut")] +#[cfg_attr(test, assert_instr(nop, LANE = 3))] +#[unstable(feature = "stdarch_neon_feat_lut", issue = "138050")] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vluti4q_laneq_u16_x2(a: uint16x8x2_t, b: uint8x16_t) -> uint16x8_t { + static_assert!(LANE >= 0 && LANE <= 3); + transmute(vluti4q_laneq_s16_x2::(transmute(a), b)) +} +#[doc = "Lookup table read with 4-bit indices"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vluti4q_laneq_p16_x2)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,lut")] +#[cfg_attr(test, assert_instr(nop, LANE = 3))] +#[unstable(feature = "stdarch_neon_feat_lut", issue = "138050")] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vluti4q_laneq_p16_x2(a: poly16x8x2_t, b: uint8x16_t) -> poly16x8_t { + static_assert!(LANE >= 0 && LANE <= 3); + transmute(vluti4q_laneq_s16_x2::(transmute(a), b)) +} +#[doc = "Lookup table read with 4-bit indices"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vluti4q_laneq_s16_x2)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,lut")] +#[cfg_attr(test, assert_instr(nop, LANE = 3))] +#[unstable(feature = "stdarch_neon_feat_lut", issue = "138050")] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vluti4q_laneq_s16_x2(a: int16x8x2_t, b: uint8x16_t) -> int16x8_t { + static_assert!(LANE >= 0 && LANE <= 3); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.vluti4q.laneq.x2.v8i16" + )] + fn _vluti4q_laneq_s16_x2(a: int16x8_t, b: int16x8_t, c: uint8x16_t, n: i32) -> int16x8_t; + } + _vluti4q_laneq_s16_x2(a.0, a.1, b, LANE) +} +#[doc = "Lookup table read with 4-bit indices"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vluti4q_laneq_s8)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,lut")] +#[cfg_attr(test, assert_instr(nop, LANE = 0))] +#[unstable(feature = "stdarch_neon_feat_lut", issue = "138050")] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vluti4q_laneq_s8(a: int8x16_t, b: uint8x16_t) -> int8x16_t { + static_assert!(LANE >= 0 && LANE <= 1); + unsafe extern "unadjusted" { + #[cfg_attr( + any(target_arch = "aarch64", target_arch = "arm64ec"), + link_name = "llvm.aarch64.neon.vluti4q.laneq.v16i8" + )] + fn _vluti4q_laneq_s8(a: int8x16_t, b: uint8x16_t, n: i32) -> int8x16_t; + } + _vluti4q_laneq_s8(a, b, LANE) +} +#[doc = "Lookup table read with 4-bit indices"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vluti4q_laneq_u8)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,lut")] +#[cfg_attr(test, assert_instr(nop, LANE = 0))] +#[unstable(feature = "stdarch_neon_feat_lut", issue = "138050")] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vluti4q_laneq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { + static_assert!(LANE >= 0 && LANE <= 1); + transmute(vluti4q_laneq_s8::(transmute(a), b)) +} +#[doc = "Lookup table read with 4-bit indices"] +#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vluti4q_laneq_p8)"] +#[doc = "## Safety"] +#[doc = " * Neon instrinsic unsafe"] +#[inline] +#[target_feature(enable = "neon,lut")] +#[cfg_attr(test, assert_instr(nop, LANE = 0))] +#[unstable(feature = "stdarch_neon_feat_lut", issue = "138050")] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vluti4q_laneq_p8(a: poly8x16_t, b: uint8x16_t) -> poly8x16_t { + static_assert!(LANE >= 0 && LANE <= 1); + transmute(vluti4q_laneq_s8::(transmute(a), b)) +} #[doc = "Maximum (vector)"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmax_f64)"] #[inline] diff --git a/crates/core_arch/src/lib.rs b/crates/core_arch/src/lib.rs index 16e52d2d9d..b8f4c2625e 100644 --- a/crates/core_arch/src/lib.rs +++ b/crates/core_arch/src/lib.rs @@ -38,7 +38,8 @@ x86_amx_intrinsics, f16, keylocker_x86, - aarch64_unstable_target_feature + aarch64_unstable_target_feature, + bigint_helper_methods )] #![cfg_attr(test, feature(test, abi_vectorcall, stdarch_internal))] #![deny(clippy::missing_inline_in_public_items)] diff --git a/crates/core_arch/src/macros.rs b/crates/core_arch/src/macros.rs index f59e278bb7..e00b433536 100644 --- a/crates/core_arch/src/macros.rs +++ b/crates/core_arch/src/macros.rs @@ -132,7 +132,7 @@ macro_rules! types { impl crate::fmt::Debug for $name { #[inline] fn fmt(&self, f: &mut crate::fmt::Formatter<'_>) -> crate::fmt::Result { - crate::core_arch::simd::debug_simd_finish(f, stringify!($name), self.0) + crate::core_arch::simd::debug_simd_finish(f, stringify!($name), self.as_array()) } } )*); diff --git a/crates/core_arch/src/s390x/macros.rs b/crates/core_arch/src/s390x/macros.rs index 2c40fedae2..4f0f84ec91 100644 --- a/crates/core_arch/src/s390x/macros.rs +++ b/crates/core_arch/src/s390x/macros.rs @@ -250,6 +250,19 @@ macro_rules! l_t_t { u8 }; + (vector_bool_long_long ) => { + u64 + }; + (vector_bool_int ) => { + u32 + }; + (vector_bool_short ) => { + u16 + }; + (vector_bool_char ) => { + u8 + }; + (vector_float) => { f32 }; @@ -338,6 +351,9 @@ macro_rules! t_u { (vector_bool_int) => { vector_unsigned_int }; + (vector_bool_long_long) => { + vector_unsigned_long_long + }; (vector_unsigned_char) => { vector_unsigned_char }; @@ -380,6 +396,9 @@ macro_rules! t_b { (vector_bool_int) => { vector_bool_int }; + (vector_bool_long_long) => { + vector_bool_long_long + }; (vector_signed_char) => { vector_bool_char }; diff --git a/crates/core_arch/src/s390x/vector.rs b/crates/core_arch/src/s390x/vector.rs index 795e426384..5103273ed9 100644 --- a/crates/core_arch/src/s390x/vector.rs +++ b/crates/core_arch/src/s390x/vector.rs @@ -7,7 +7,7 @@ #![allow(non_camel_case_types)] -use crate::{core_arch::simd::*, intrinsics::simd::*, mem::transmute}; +use crate::{core_arch::simd::*, intrinsics::simd::*, mem::MaybeUninit, mem::transmute}; #[cfg(test)] use stdarch_test::assert_instr; @@ -97,6 +97,10 @@ unsafe extern "unadjusted" { #[link_name = "llvm.s390.vsrlb"] fn vsrlb(a: vector_signed_char, b: vector_signed_char) -> vector_signed_char; #[link_name = "llvm.s390.vslb"] fn vslb(a: vector_signed_char, b: vector_signed_char) -> vector_signed_char; + #[link_name = "llvm.s390.vsldb"] fn vsldb(a: i8x16, b: i8x16, c: u32) -> i8x16; + #[link_name = "llvm.s390.vsld"] fn vsld(a: i8x16, b: i8x16, c: u32) -> i8x16; + #[link_name = "llvm.s390.vsrd"] fn vsrd(a: i8x16, b: i8x16, c: u32) -> i8x16; + #[link_name = "llvm.fshl.v16i8"] fn fshlb(a: vector_unsigned_char, b: vector_unsigned_char, c: vector_unsigned_char) -> vector_unsigned_char; #[link_name = "llvm.fshl.v8i16"] fn fshlh(a: vector_unsigned_short, b: vector_unsigned_short, c: vector_unsigned_short) -> vector_unsigned_short; #[link_name = "llvm.fshl.v4i32"] fn fshlf(a: vector_unsigned_int, b: vector_unsigned_int, c: vector_unsigned_int) -> vector_unsigned_int; @@ -122,6 +126,8 @@ unsafe extern "unadjusted" { #[link_name = "llvm.s390.vsbiq"] fn vsbiq(a: u128, b: u128, c: u128) -> u128; #[link_name = "llvm.s390.vsbcbiq"] fn vsbcbiq(a: u128, b: u128, c: u128) -> u128; + #[link_name = "llvm.s390.vacq"] fn vacq(a: u128, b: u128, c: u128) -> u128; + #[link_name = "llvm.s390.vscbib"] fn vscbib(a: vector_unsigned_char, b: vector_unsigned_char) -> vector_unsigned_char; #[link_name = "llvm.s390.vscbih"] fn vscbih(a: vector_unsigned_short, b: vector_unsigned_short) -> vector_unsigned_short; #[link_name = "llvm.s390.vscbif"] fn vscbif(a: vector_unsigned_int, b: vector_unsigned_int) -> vector_unsigned_int; @@ -142,6 +148,189 @@ unsafe extern "unadjusted" { #[link_name = "llvm.s390.vfaezbs"] fn vfaezbs(a: vector_signed_char, b: vector_signed_char, c: i32) -> PackedTuple; #[link_name = "llvm.s390.vfaezhs"] fn vfaezhs(a: vector_signed_short, b: vector_signed_short, c: i32) -> PackedTuple; #[link_name = "llvm.s390.vfaezfs"] fn vfaezfs(a: vector_signed_int, b: vector_signed_int, c: i32) -> PackedTuple; + + #[link_name = "llvm.s390.vll"] fn vll(a: u32, b: *const u8) -> vector_signed_char; + #[link_name = "llvm.s390.vstl"] fn vstl(a: vector_signed_char, b: u32, c: *mut u8); + + #[link_name = "llvm.s390.vlrl"] fn vlrl(a: u32, b: *const u8) -> vector_unsigned_char; + #[link_name = "llvm.s390.vstrl"] fn vstrl(a: vector_unsigned_char, b: u32, c: *mut u8); + + #[link_name = "llvm.s390.lcbb"] fn lcbb(a: *const u8, b: u32) -> u32; + #[link_name = "llvm.s390.vlbb"] fn vlbb(a: *const u8, b: u32) -> MaybeUninit; + + #[link_name = "llvm.s390.vpksh"] fn vpksh(a: vector_signed_short, b: vector_signed_short) -> vector_signed_char; + #[link_name = "llvm.s390.vpksf"] fn vpksf(a: vector_signed_int, b: vector_signed_int) -> vector_signed_short; + #[link_name = "llvm.s390.vpksg"] fn vpksg(a: vector_signed_long_long, b: vector_signed_long_long) -> vector_signed_int; + + #[link_name = "llvm.s390.vpklsh"] fn vpklsh(a: vector_signed_short, b: vector_signed_short) -> vector_unsigned_char; + #[link_name = "llvm.s390.vpklsf"] fn vpklsf(a: vector_signed_int, b: vector_signed_int) -> vector_unsigned_short; + #[link_name = "llvm.s390.vpklsg"] fn vpklsg(a: vector_signed_long_long, b: vector_signed_long_long) -> vector_unsigned_int; + + #[link_name = "llvm.s390.vpkshs"] fn vpkshs(a: vector_signed_short, b: vector_signed_short) -> PackedTuple; + #[link_name = "llvm.s390.vpksfs"] fn vpksfs(a: vector_signed_int, b: vector_signed_int) -> PackedTuple; + #[link_name = "llvm.s390.vpksgs"] fn vpksgs(a: vector_signed_long_long, b: vector_signed_long_long) -> PackedTuple; + + #[link_name = "llvm.s390.vpklshs"] fn vpklshs(a: vector_unsigned_short, b: vector_unsigned_short) -> PackedTuple; + #[link_name = "llvm.s390.vpklsfs"] fn vpklsfs(a: vector_unsigned_int, b: vector_unsigned_int) -> PackedTuple; + #[link_name = "llvm.s390.vpklsgs"] fn vpklsgs(a: vector_unsigned_long_long, b: vector_unsigned_long_long) -> PackedTuple; + + #[link_name = "llvm.s390.vuplbw"] fn vuplbw (a: vector_signed_char) -> vector_signed_short; + #[link_name = "llvm.s390.vuplhw"] fn vuplhw (a: vector_signed_short) -> vector_signed_int; + #[link_name = "llvm.s390.vuplfw"] fn vuplfw (a: vector_signed_int) -> vector_signed_long_long; + #[link_name = "llvm.s390.vupllb"] fn vupllb (a: vector_unsigned_char) -> vector_unsigned_short; + #[link_name = "llvm.s390.vupllh"] fn vupllh (a: vector_unsigned_short) -> vector_unsigned_int; + #[link_name = "llvm.s390.vupllf"] fn vupllf (a: vector_unsigned_int) -> vector_unsigned_long_long; + + #[link_name = "llvm.s390.vavgb"] fn vavgb(a: vector_signed_char, b: vector_signed_char) -> vector_signed_char; + #[link_name = "llvm.s390.vavgh"] fn vavgh(a: vector_signed_short, b: vector_signed_short) -> vector_signed_short; + #[link_name = "llvm.s390.vavgf"] fn vavgf(a: vector_signed_int, b: vector_signed_int) -> vector_signed_int; + #[link_name = "llvm.s390.vavgg"] fn vavgg(a: vector_signed_long_long, b: vector_signed_long_long) -> vector_signed_long_long; + + #[link_name = "llvm.s390.vavglb"] fn vavglb(a: vector_unsigned_char, b: vector_unsigned_char) -> vector_unsigned_char; + #[link_name = "llvm.s390.vavglh"] fn vavglh(a: vector_unsigned_short, b: vector_unsigned_short) -> vector_unsigned_short; + #[link_name = "llvm.s390.vavglf"] fn vavglf(a: vector_unsigned_int, b: vector_unsigned_int) -> vector_unsigned_int; + #[link_name = "llvm.s390.vavglg"] fn vavglg(a: vector_unsigned_long_long, b: vector_unsigned_long_long) -> vector_unsigned_long_long; + + #[link_name = "llvm.s390.vcksm"] fn vcksm(a: vector_unsigned_int, b: vector_unsigned_int) -> vector_unsigned_int; + + #[link_name = "llvm.s390.vmeb"] fn vmeb(a: vector_signed_char, b: vector_signed_char) -> vector_signed_short; + #[link_name = "llvm.s390.vmeh"] fn vmeh(a: vector_signed_short, b: vector_signed_short) -> vector_signed_int; + #[link_name = "llvm.s390.vmef"] fn vmef(a: vector_signed_int, b: vector_signed_int) -> vector_signed_long_long; + + #[link_name = "llvm.s390.vmleb"] fn vmleb(a: vector_unsigned_char, b: vector_unsigned_char) -> vector_unsigned_short; + #[link_name = "llvm.s390.vmleh"] fn vmleh(a: vector_unsigned_short, b: vector_unsigned_short) -> vector_unsigned_int; + #[link_name = "llvm.s390.vmlef"] fn vmlef(a: vector_unsigned_int, b: vector_unsigned_int) -> vector_unsigned_long_long; + + #[link_name = "llvm.s390.vmob"] fn vmob(a: vector_signed_char, b: vector_signed_char) -> vector_signed_short; + #[link_name = "llvm.s390.vmoh"] fn vmoh(a: vector_signed_short, b: vector_signed_short) -> vector_signed_int; + #[link_name = "llvm.s390.vmof"] fn vmof(a: vector_signed_int, b: vector_signed_int) -> vector_signed_long_long; + + #[link_name = "llvm.s390.vmlob"] fn vmlob(a: vector_unsigned_char, b: vector_unsigned_char) -> vector_unsigned_short; + #[link_name = "llvm.s390.vmloh"] fn vmloh(a: vector_unsigned_short, b: vector_unsigned_short) -> vector_unsigned_int; + #[link_name = "llvm.s390.vmlof"] fn vmlof(a: vector_unsigned_int, b: vector_unsigned_int) -> vector_unsigned_long_long; + + #[link_name = "llvm.s390.vmhb"] fn vmhb(a: vector_signed_char, b: vector_signed_char) -> vector_signed_char; + #[link_name = "llvm.s390.vmhh"] fn vmhh(a: vector_signed_short, b: vector_signed_short) -> vector_signed_short; + #[link_name = "llvm.s390.vmhf"] fn vmhf(a: vector_signed_int, b: vector_signed_int) -> vector_signed_int; + + #[link_name = "llvm.s390.vmlhb"] fn vmlhb(a: vector_unsigned_char, b: vector_unsigned_char) -> vector_unsigned_char; + #[link_name = "llvm.s390.vmlhh"] fn vmlhh(a: vector_unsigned_short, b: vector_unsigned_short) -> vector_unsigned_short; + #[link_name = "llvm.s390.vmlhf"] fn vmlhf(a: vector_unsigned_int, b: vector_unsigned_int) -> vector_unsigned_int; + + #[link_name = "llvm.s390.vmaeb"] fn vmaeb(a: vector_signed_char, b: vector_signed_char, c: vector_signed_short) -> vector_signed_short; + #[link_name = "llvm.s390.vmaeh"] fn vmaeh(a: vector_signed_short, b: vector_signed_short, c: vector_signed_int) -> vector_signed_int; + #[link_name = "llvm.s390.vmaef"] fn vmaef(a: vector_signed_int, b: vector_signed_int, c: vector_signed_long_long) -> vector_signed_long_long; + + #[link_name = "llvm.s390.vmaleb"] fn vmaleb(a: vector_unsigned_char, b: vector_unsigned_char, c: vector_unsigned_short) -> vector_unsigned_short; + #[link_name = "llvm.s390.vmaleh"] fn vmaleh(a: vector_unsigned_short, b: vector_unsigned_short, c: vector_unsigned_int) -> vector_unsigned_int; + #[link_name = "llvm.s390.vmalef"] fn vmalef(a: vector_unsigned_int, b: vector_unsigned_int, c: vector_unsigned_long_long) -> vector_unsigned_long_long; + + #[link_name = "llvm.s390.vmaob"] fn vmaob(a: vector_signed_char, b: vector_signed_char, c: vector_signed_short) -> vector_signed_short; + #[link_name = "llvm.s390.vmaoh"] fn vmaoh(a: vector_signed_short, b: vector_signed_short, c: vector_signed_int) -> vector_signed_int; + #[link_name = "llvm.s390.vmaof"] fn vmaof(a: vector_signed_int, b: vector_signed_int, c: vector_signed_long_long) -> vector_signed_long_long; + + #[link_name = "llvm.s390.vmalob"] fn vmalob(a: vector_unsigned_char, b: vector_unsigned_char, c: vector_unsigned_short) -> vector_unsigned_short; + #[link_name = "llvm.s390.vmaloh"] fn vmaloh(a: vector_unsigned_short, b: vector_unsigned_short, c: vector_unsigned_int) -> vector_unsigned_int; + #[link_name = "llvm.s390.vmalof"] fn vmalof(a: vector_unsigned_int, b: vector_unsigned_int, c: vector_unsigned_long_long) -> vector_unsigned_long_long; + + #[link_name = "llvm.s390.vmahb"] fn vmahb(a: vector_signed_char, b: vector_signed_char, c: vector_signed_char) -> vector_signed_char; + #[link_name = "llvm.s390.vmahh"] fn vmahh(a: vector_signed_short, b: vector_signed_short, c: vector_signed_short) -> vector_signed_short; + #[link_name = "llvm.s390.vmahf"] fn vmahf(a: vector_signed_int, b: vector_signed_int, c: vector_signed_int) -> vector_signed_int; + + #[link_name = "llvm.s390.vmalhb"] fn vmalhb(a: vector_unsigned_char, b: vector_unsigned_char, c: vector_unsigned_char) -> vector_unsigned_char; + #[link_name = "llvm.s390.vmalhh"] fn vmalhh(a: vector_unsigned_short, b: vector_unsigned_short, c: vector_unsigned_short) -> vector_unsigned_short; + #[link_name = "llvm.s390.vmalhf"] fn vmalhf(a: vector_unsigned_int, b: vector_unsigned_int, c: vector_unsigned_int) -> vector_unsigned_int; + + #[link_name = "llvm.s390.vmalb"] fn vmalb(a: vector_signed_char, b: vector_signed_char, c: vector_signed_char) -> vector_signed_char; + #[link_name = "llvm.s390.vmalh"] fn vmalh(a: vector_signed_short, b: vector_signed_short, c: vector_signed_short) -> vector_signed_short; + #[link_name = "llvm.s390.vmalf"] fn vmalf(a: vector_signed_int, b: vector_signed_int, c: vector_signed_int) -> vector_signed_int; + + #[link_name = "llvm.s390.vmallb"] fn vmallb(a: vector_unsigned_char, b: vector_unsigned_char, c: vector_unsigned_char) -> vector_unsigned_char; + #[link_name = "llvm.s390.vmallh"] fn vmallh(a: vector_unsigned_short, b: vector_unsigned_short, c: vector_unsigned_short) -> vector_unsigned_short; + #[link_name = "llvm.s390.vmallf"] fn vmallf(a: vector_unsigned_int, b: vector_unsigned_int, c: vector_unsigned_int) -> vector_unsigned_int; + + #[link_name = "llvm.s390.vgfmb"] fn vgfmb(a: vector_unsigned_char, b: vector_unsigned_char) -> vector_unsigned_short; + #[link_name = "llvm.s390.vgfmh"] fn vgfmh(a: vector_unsigned_short, b: vector_unsigned_short) -> vector_unsigned_int; + #[link_name = "llvm.s390.vgfmf"] fn vgfmf(a: vector_unsigned_int, b: vector_unsigned_int) -> vector_unsigned_long_long; + #[link_name = "llvm.s390.vgfmg"] fn vgfmg(a: vector_unsigned_long_long, b: vector_unsigned_long_long) -> u128; + + #[link_name = "llvm.s390.vgfmab"] fn vgfmab(a: vector_unsigned_char, b: vector_unsigned_char, c: vector_unsigned_short) -> vector_unsigned_short; + #[link_name = "llvm.s390.vgfmah"] fn vgfmah(a: vector_unsigned_short, b: vector_unsigned_short, c: vector_unsigned_int) -> vector_unsigned_int; + #[link_name = "llvm.s390.vgfmaf"] fn vgfmaf(a: vector_unsigned_int, b: vector_unsigned_int, c: vector_unsigned_long_long) -> vector_unsigned_long_long; + #[link_name = "llvm.s390.vgfmag"] fn vgfmag(a: vector_unsigned_long_long, b: vector_unsigned_long_long, c: u128) -> u128; + + #[link_name = "llvm.s390.vbperm"] fn vbperm(a: vector_unsigned_char, b: vector_unsigned_char) -> vector_unsigned_long_long; + + #[link_name = "llvm.s390.vftcisb"] fn vftcisb(a: vector_float, b: u32) -> PackedTuple; + #[link_name = "llvm.s390.vftcidb"] fn vftcidb(a: vector_double, b: u32) -> PackedTuple; + + #[link_name = "llvm.s390.vtm"] fn vtm(a: i8x16, b: i8x16) -> i32; + + #[link_name = "llvm.s390.vstrsb"] fn vstrsb(a: vector_unsigned_char, b: vector_unsigned_char, c: vector_unsigned_char) -> PackedTuple; + #[link_name = "llvm.s390.vstrsh"] fn vstrsh(a: vector_unsigned_short, b: vector_unsigned_short, c: vector_unsigned_char) -> PackedTuple; + #[link_name = "llvm.s390.vstrsf"] fn vstrsf(a: vector_unsigned_int, b: vector_unsigned_int, c: vector_unsigned_char) -> PackedTuple; + + #[link_name = "llvm.s390.vstrszb"] fn vstrszb(a: vector_unsigned_char, b: vector_unsigned_char, c: vector_unsigned_char) -> PackedTuple; + #[link_name = "llvm.s390.vstrszh"] fn vstrszh(a: vector_unsigned_short, b: vector_unsigned_short, c: vector_unsigned_char) -> PackedTuple; + #[link_name = "llvm.s390.vstrszf"] fn vstrszf(a: vector_unsigned_int, b: vector_unsigned_int, c: vector_unsigned_char) -> PackedTuple; + + #[link_name = "llvm.s390.vistrb"] fn vistrb(a: vector_unsigned_char) -> vector_unsigned_char; + #[link_name = "llvm.s390.vistrh"] fn vistrh(a: vector_unsigned_short) -> vector_unsigned_short; + #[link_name = "llvm.s390.vistrf"] fn vistrf(a: vector_unsigned_int) -> vector_unsigned_int; + + #[link_name = "llvm.s390.vistrbs"] fn vistrbs(a: vector_unsigned_char) -> PackedTuple; + #[link_name = "llvm.s390.vistrhs"] fn vistrhs(a: vector_unsigned_short) -> PackedTuple; + #[link_name = "llvm.s390.vistrfs"] fn vistrfs(a: vector_unsigned_int) -> PackedTuple; + + #[link_name = "llvm.s390.vmslg"] fn vmslg(a: vector_unsigned_long_long, b: vector_unsigned_long_long, c: u128, d: u32) -> u128; + + #[link_name = "llvm.s390.vstrcb"] fn vstrcb(a: vector_unsigned_char, b: vector_unsigned_char, c: vector_unsigned_char, d: u32) -> vector_bool_char; + #[link_name = "llvm.s390.vstrch"] fn vstrch(a: vector_unsigned_short, b: vector_unsigned_short, c: vector_unsigned_short, d: u32) -> vector_bool_short; + #[link_name = "llvm.s390.vstrcf"] fn vstrcf(a: vector_unsigned_int, b: vector_unsigned_int, c: vector_unsigned_int, d: u32) -> vector_bool_int; + + #[link_name = "llvm.s390.vstrcbs"] fn vstrcbs(a: vector_unsigned_char, b: vector_unsigned_char, c: vector_unsigned_char, d: u32) -> PackedTuple; + #[link_name = "llvm.s390.vstrchs"] fn vstrchs(a: vector_unsigned_short, b: vector_unsigned_short, c: vector_unsigned_short, d: u32) -> PackedTuple; + #[link_name = "llvm.s390.vstrcfs"] fn vstrcfs(a: vector_unsigned_int, b: vector_unsigned_int, c: vector_unsigned_int, d: u32) -> PackedTuple; + + #[link_name = "llvm.s390.vstrczb"] fn vstrczb(a: vector_unsigned_char, b: vector_unsigned_char, c: vector_unsigned_char, d: u32) -> vector_bool_char; + #[link_name = "llvm.s390.vstrczh"] fn vstrczh(a: vector_unsigned_short, b: vector_unsigned_short, c: vector_unsigned_short, d: u32) -> vector_bool_short; + #[link_name = "llvm.s390.vstrczf"] fn vstrczf(a: vector_unsigned_int, b: vector_unsigned_int, c: vector_unsigned_int, d: u32) -> vector_bool_int; + + #[link_name = "llvm.s390.vstrczbs"] fn vstrczbs(a: vector_unsigned_char, b: vector_unsigned_char, c: vector_unsigned_char, d: u32) -> PackedTuple; + #[link_name = "llvm.s390.vstrczhs"] fn vstrczhs(a: vector_unsigned_short, b: vector_unsigned_short, c: vector_unsigned_short, d: u32) -> PackedTuple; + #[link_name = "llvm.s390.vstrczfs"] fn vstrczfs(a: vector_unsigned_int, b: vector_unsigned_int, c: vector_unsigned_int, d: u32) -> PackedTuple; + + #[link_name = "llvm.s390.vfeeb"] fn vfeeb(a: i8x16, b: i8x16) -> i8x16; + #[link_name = "llvm.s390.vfeeh"] fn vfeeh(a: i16x8, b: i16x8) -> i16x8; + #[link_name = "llvm.s390.vfeef"] fn vfeef(a: i32x4, b: i32x4) -> i32x4; + + #[link_name = "llvm.s390.vfeezb"] fn vfeezb(a: i8x16, b: i8x16) -> i8x16; + #[link_name = "llvm.s390.vfeezh"] fn vfeezh(a: i16x8, b: i16x8) -> i16x8; + #[link_name = "llvm.s390.vfeezf"] fn vfeezf(a: i32x4, b: i32x4) -> i32x4; + + #[link_name = "llvm.s390.vfeebs"] fn vfeebs(a: i8x16, b: i8x16) -> PackedTuple; + #[link_name = "llvm.s390.vfeehs"] fn vfeehs(a: i16x8, b: i16x8) -> PackedTuple; + #[link_name = "llvm.s390.vfeefs"] fn vfeefs(a: i32x4, b: i32x4) -> PackedTuple; + + #[link_name = "llvm.s390.vfeezbs"] fn vfeezbs(a: i8x16, b: i8x16) -> PackedTuple; + #[link_name = "llvm.s390.vfeezhs"] fn vfeezhs(a: i16x8, b: i16x8) -> PackedTuple; + #[link_name = "llvm.s390.vfeezfs"] fn vfeezfs(a: i32x4, b: i32x4) -> PackedTuple; + + #[link_name = "llvm.s390.vfeneb"] fn vfeneb(a: i8x16, b: i8x16) -> i8x16; + #[link_name = "llvm.s390.vfeneh"] fn vfeneh(a: i16x8, b: i16x8) -> i16x8; + #[link_name = "llvm.s390.vfenef"] fn vfenef(a: i32x4, b: i32x4) -> i32x4; + + #[link_name = "llvm.s390.vfenezb"] fn vfenezb(a: i8x16, b: i8x16) -> i8x16; + #[link_name = "llvm.s390.vfenezh"] fn vfenezh(a: i16x8, b: i16x8) -> i16x8; + #[link_name = "llvm.s390.vfenezf"] fn vfenezf(a: i32x4, b: i32x4) -> i32x4; + + #[link_name = "llvm.s390.vfenebs"] fn vfenebs(a: i8x16, b: i8x16) -> PackedTuple; + #[link_name = "llvm.s390.vfenehs"] fn vfenehs(a: i16x8, b: i16x8) -> PackedTuple; + #[link_name = "llvm.s390.vfenefs"] fn vfenefs(a: i32x4, b: i32x4) -> PackedTuple; + + #[link_name = "llvm.s390.vfenezbs"] fn vfenezbs(a: i8x16, b: i8x16) -> PackedTuple; + #[link_name = "llvm.s390.vfenezhs"] fn vfenezhs(a: i16x8, b: i16x8) -> PackedTuple; + #[link_name = "llvm.s390.vfenezfs"] fn vfenezfs(a: i32x4, b: i32x4) -> PackedTuple; } impl_from! { i8x16, u8x16, i16x8, u16x8, i32x4, u32x4, i64x2, u64x2, f32x4, f64x2 } @@ -194,6 +383,39 @@ impl ShuffleMask { } ShuffleMask(mask) } + + const fn pack() -> Self { + let mut mask = [0; N]; + let mut i = 1; + let mut index = 0; + while index < N { + mask[index] = i as u32; + + i += 2; + index += 1; + } + ShuffleMask(mask) + } + + const fn unpack_low() -> Self { + let mut mask = [0; N]; + let mut i = 0; + while i < N { + mask[i] = (N + i) as u32; + i += 1; + } + ShuffleMask(mask) + } + + const fn unpack_high() -> Self { + let mut mask = [0; N]; + let mut i = 0; + while i < N { + mask[i] = i as u32; + i += 1; + } + ShuffleMask(mask) + } } const fn genmask() -> [u8; 16] { @@ -229,6 +451,23 @@ const fn genmasks(bit_width: u32, a: u8, b: u8) -> u64 { ((1u64.wrapping_shl(a as u32 + 1)) - 1) & !((1u64.wrapping_shl(b as u32)) - 1) } +const fn validate_block_boundary(block_boundary: u16) -> u32 { + assert!( + block_boundary.is_power_of_two() && block_boundary >= 64 && block_boundary <= 4096, + "block boundary must be a constant power of 2 from 64 to 4096", + ); + + // so that 64 is encoded as 0, 128 as 1, ect. + block_boundary as u32 >> 7 +} + +enum FindImm { + Eq = 4, + Ne = 12, + EqIdx = 0, + NeIdx = 8, +} + #[macro_use] mod sealed { use super::*; @@ -552,6 +791,88 @@ mod sealed { impl_vec_trait! { [VectorNabs vec_nabs] vec_nabs_f32 (vector_float) } impl_vec_trait! { [VectorNabs vec_nabs] vec_nabs_f64 (vector_double) } + #[unstable(feature = "stdarch_s390x", issue = "135681")] + pub trait VectorNmsub { + unsafe fn vec_nmsub(self, b: Self, c: Self) -> Self; + } + + #[inline] + #[target_feature(enable = "vector")] + #[cfg_attr( + all(test, target_feature = "vector-enhancements-2"), + assert_instr(vfnmssb) + )] + unsafe fn vec_nmsub_f32(a: vector_float, b: vector_float, c: vector_float) -> vector_float { + simd_neg(simd_fma(a, b, simd_neg(c))) + } + + #[unstable(feature = "stdarch_s390x", issue = "135681")] + impl VectorNmsub for vector_float { + #[target_feature(enable = "vector")] + unsafe fn vec_nmsub(self, b: Self, c: Self) -> Self { + vec_nmsub_f32(self, b, c) + } + } + + #[inline] + #[target_feature(enable = "vector")] + #[cfg_attr( + all(test, target_feature = "vector-enhancements-2"), + assert_instr(vfnmsdb) + )] + unsafe fn vec_nmsub_f64(a: vector_double, b: vector_double, c: vector_double) -> vector_double { + simd_neg(simd_fma(a, b, simd_neg(c))) + } + + #[unstable(feature = "stdarch_s390x", issue = "135681")] + impl VectorNmsub for vector_double { + #[target_feature(enable = "vector")] + unsafe fn vec_nmsub(self, b: Self, c: Self) -> Self { + vec_nmsub_f64(self, b, c) + } + } + + #[unstable(feature = "stdarch_s390x", issue = "135681")] + pub trait VectorNmadd { + unsafe fn vec_nmadd(self, b: Self, c: Self) -> Self; + } + + #[inline] + #[target_feature(enable = "vector")] + #[cfg_attr( + all(test, target_feature = "vector-enhancements-2"), + assert_instr(vfnmasb) + )] + unsafe fn vec_nmadd_f32(a: vector_float, b: vector_float, c: vector_float) -> vector_float { + simd_neg(simd_fma(a, b, c)) + } + + #[unstable(feature = "stdarch_s390x", issue = "135681")] + impl VectorNmadd for vector_float { + #[target_feature(enable = "vector")] + unsafe fn vec_nmadd(self, b: Self, c: Self) -> Self { + vec_nmadd_f32(self, b, c) + } + } + + #[inline] + #[target_feature(enable = "vector")] + #[cfg_attr( + all(test, target_feature = "vector-enhancements-2"), + assert_instr(vfnmadb) + )] + unsafe fn vec_nmadd_f64(a: vector_double, b: vector_double, c: vector_double) -> vector_double { + simd_neg(simd_fma(a, b, c)) + } + + #[unstable(feature = "stdarch_s390x", issue = "135681")] + impl VectorNmadd for vector_double { + #[target_feature(enable = "vector")] + unsafe fn vec_nmadd(self, b: Self, c: Self) -> Self { + vec_nmadd_f64(self, b, c) + } + } + #[unstable(feature = "stdarch_s390x", issue = "135681")] pub trait VectorSplat { unsafe fn vec_splat(self) -> Self; @@ -871,7 +1192,8 @@ mod sealed { test_impl! { vec_roundc_f32 (a: vector_float) -> vector_float [nearbyint_v4f32, "vector-enhancements-1" vfisb] } test_impl! { vec_roundc_f64 (a: vector_double) -> vector_double [nearbyint_v2f64, vfidb] } - // FIXME(llvm) roundeven does not yet lower to vfidb (but should in the future) + // FIXME(llvm) llvm trunk already lowers roundeven to vfidb, but rust does not use it yet + // use https://godbolt.org/z/cWq95fexe to check, and enable the instruction test when it works test_impl! { vec_round_f32 (a: vector_float) -> vector_float [roundeven_v4f32, _] } test_impl! { vec_round_f64 (a: vector_double) -> vector_double [roundeven_v2f64, _] } @@ -1212,6 +1534,21 @@ mod sealed { unsafe fn vec_reve(self) -> Self; } + #[repr(simd)] + struct ReverseMask([u32; N]); + + impl ReverseMask { + const fn new() -> Self { + let mut index = [0; N]; + let mut i = 0; + while i < N { + index[i] = (N - i - 1) as u32; + i += 1; + } + ReverseMask(index) + } + } + macro_rules! impl_reve { ($($ty:ident, $fun:ident, $instr:ident),*) => { $( @@ -1592,10 +1929,9 @@ mod sealed { type Result = $r; #[inline] #[target_feature(enable = "vector")] - unsafe fn $m(self, b: Self, c: *mut i32) -> Self::Result { + unsafe fn $m(self, b: Self) -> (Self::Result, i32) { let PackedTuple { x, y } = $fun::<{ FindImm::$imm as i32 }>(transmute(self), transmute(b)); - c.write(y); - transmute(x) + (transmute(x), y) } } )* @@ -1622,10 +1958,9 @@ mod sealed { type Result = t_b!($ty); #[inline] #[target_feature(enable = "vector")] - unsafe fn $m(self, b: Self, c: *mut i32) -> Self::Result { + unsafe fn $m(self, b: Self) -> (Self::Result, i32) { let PackedTuple { x, y } = $fun::<{ FindImm::$imm as i32 }>(transmute(self), transmute(b)); - c.write(y); - transmute(x) + (transmute(x), y) } } )* @@ -1688,13 +2023,6 @@ mod sealed { }; } - enum FindImm { - Eq = 4, - Ne = 12, - EqIdx = 0, - NeIdx = 8, - } - #[unstable(feature = "stdarch_s390x", issue = "135681")] pub trait VectorFindAnyEq { type Result; @@ -1784,19 +2112,19 @@ mod sealed { } vfaes_wrapper! { - vfaebs vector_signed_char - vfaehs vector_signed_short - vfaefs vector_signed_int + vfaebs vector_signed_char + vfaehs vector_signed_short + vfaefs vector_signed_int - vfaezbs vector_signed_char - vfaezhs vector_signed_short - vfaezfs vector_signed_int + vfaezbs vector_signed_char + vfaezhs vector_signed_short + vfaezfs vector_signed_int } #[unstable(feature = "stdarch_s390x", issue = "135681")] pub trait VectorFindAnyEqCC { type Result; - unsafe fn vec_find_any_eq_cc(self, other: Other, c: *mut i32) -> Self::Result; + unsafe fn vec_find_any_eq_cc(self, other: Other) -> (Self::Result, i32); } impl_vfae! { [cc VectorFindAnyEqCC vec_find_any_eq_cc] Eq vfaebs vfaehs vfaefs } @@ -1804,7 +2132,7 @@ mod sealed { #[unstable(feature = "stdarch_s390x", issue = "135681")] pub trait VectorFindAnyNeCC { type Result; - unsafe fn vec_find_any_ne_cc(self, other: Other, c: *mut i32) -> Self::Result; + unsafe fn vec_find_any_ne_cc(self, other: Other) -> (Self::Result, i32); } impl_vfae! { [cc VectorFindAnyNeCC vec_find_any_ne_cc] Ne vfaebs vfaehs vfaefs } @@ -1812,7 +2140,7 @@ mod sealed { #[unstable(feature = "stdarch_s390x", issue = "135681")] pub trait VectorFindAnyEqIdxCC { type Result; - unsafe fn vec_find_any_eq_idx_cc(self, other: Other, c: *mut i32) -> Self::Result; + unsafe fn vec_find_any_eq_idx_cc(self, other: Other) -> (Self::Result, i32); } impl_vfae! { [idx_cc VectorFindAnyEqIdxCC vec_find_any_eq_idx_cc] EqIdx vfaebs vfaehs vfaefs } @@ -1820,7 +2148,7 @@ mod sealed { #[unstable(feature = "stdarch_s390x", issue = "135681")] pub trait VectorFindAnyNeIdxCC { type Result; - unsafe fn vec_find_any_ne_idx_cc(self, other: Other, c: *mut i32) -> Self::Result; + unsafe fn vec_find_any_ne_idx_cc(self, other: Other) -> (Self::Result, i32); } impl_vfae! { [idx_cc VectorFindAnyNeIdxCC vec_find_any_ne_idx_cc] NeIdx vfaebs vfaehs vfaefs } @@ -1828,7 +2156,7 @@ mod sealed { #[unstable(feature = "stdarch_s390x", issue = "135681")] pub trait VectorFindAnyEqOrZeroIdxCC { type Result; - unsafe fn vec_find_any_eq_or_0_idx_cc(self, other: Other, c: *mut i32) -> Self::Result; + unsafe fn vec_find_any_eq_or_0_idx_cc(self, other: Other) -> (Self::Result, i32); } impl_vfae! { [idx_cc VectorFindAnyEqOrZeroIdxCC vec_find_any_eq_or_0_idx_cc] EqIdx vfaezbs vfaezhs vfaezfs } @@ -1836,843 +2164,3441 @@ mod sealed { #[unstable(feature = "stdarch_s390x", issue = "135681")] pub trait VectorFindAnyNeOrZeroIdxCC { type Result; - unsafe fn vec_find_any_ne_or_0_idx_cc(self, other: Other, c: *mut i32) -> Self::Result; + unsafe fn vec_find_any_ne_or_0_idx_cc(self, other: Other) -> (Self::Result, i32); } impl_vfae! { [idx_cc VectorFindAnyNeOrZeroIdxCC vec_find_any_ne_or_0_idx_cc] NeIdx vfaezbs vfaezhs vfaezfs } -} -/// Vector element-wise addition. -#[inline] -#[target_feature(enable = "vector")] -#[unstable(feature = "stdarch_s390x", issue = "135681")] -pub unsafe fn vec_add(a: T, b: U) -> >::Result -where - T: sealed::VectorAdd, -{ - a.vec_add(b) -} + #[inline] + #[target_feature(enable = "vector")] + #[cfg_attr(test, assert_instr(vl))] + unsafe fn test_vector_load(offset: isize, ptr: *const i32) -> vector_signed_int { + ptr.byte_offset(offset) + .cast::() + .read_unaligned() + } -/// Vector element-wise subtraction. -#[inline] -#[target_feature(enable = "vector")] -#[unstable(feature = "stdarch_s390x", issue = "135681")] -pub unsafe fn vec_sub(a: T, b: U) -> >::Result -where - T: sealed::VectorSub, -{ - a.vec_sub(b) -} + #[inline] + #[target_feature(enable = "vector")] + #[cfg_attr(test, assert_instr(vst))] + unsafe fn test_vector_store(vector: vector_signed_int, offset: isize, ptr: *mut i32) { + ptr.byte_offset(offset) + .cast::() + .write_unaligned(vector) + } -/// Vector element-wise multiplication. -/// -/// ## Purpose -/// Compute the products of corresponding elements of two vectors. -/// -/// ## Result value -/// Each element of r receives the product of the corresponding elements of a and b. -#[inline] -#[target_feature(enable = "vector")] -#[unstable(feature = "stdarch_s390x", issue = "135681")] -pub unsafe fn vec_mul(a: T, b: T) -> T -where - T: sealed::VectorMul, -{ - a.vec_mul(b) -} + #[unstable(feature = "stdarch_s390x", issue = "135681")] + pub trait VectorLoad: Sized { + type ElementType; -/// Vector Count Leading Zeros -#[inline] -#[target_feature(enable = "vector")] -#[unstable(feature = "stdarch_s390x", issue = "135681")] -pub unsafe fn vec_cntlz(a: T) -> ::Result -where - T: sealed::CountBits, -{ - a.vec_cntlz() -} + #[inline] + #[target_feature(enable = "vector")] + unsafe fn vec_xl(offset: isize, ptr: *const Self::ElementType) -> Self { + ptr.byte_offset(offset).cast::().read_unaligned() + } -/// Vector Count Trailing Zeros -#[inline] -#[target_feature(enable = "vector")] -#[unstable(feature = "stdarch_s390x", issue = "135681")] -pub unsafe fn vec_cnttz(a: T) -> ::Result -where - T: sealed::CountBits, -{ - a.vec_cnttz() -} + unsafe fn vec_load_len(ptr: *const Self::ElementType, byte_count: u32) -> Self; -/// Vector Population Count -/// -/// Computes the population count (number of set bits) in each element of the input. -#[inline] -#[target_feature(enable = "vector")] -#[unstable(feature = "stdarch_s390x", issue = "135681")] -pub unsafe fn vec_popcnt(a: T) -> ::Result -where - T: sealed::CountBits, -{ - a.vec_popcnt() -} + unsafe fn vec_load_bndry( + ptr: *const Self::ElementType, + ) -> MaybeUninit; + } -/// Vector element-wise maximum. -#[inline] -#[target_feature(enable = "vector")] -#[unstable(feature = "stdarch_s390x", issue = "135681")] -pub unsafe fn vec_max(a: T, b: U) -> >::Result -where - T: sealed::VectorMax, -{ - a.vec_max(b) -} + #[unstable(feature = "stdarch_s390x", issue = "135681")] + pub trait VectorStore: Sized { + type ElementType; -/// Vector element-wise minimum. -#[inline] -#[target_feature(enable = "vector")] -#[unstable(feature = "stdarch_s390x", issue = "135681")] -pub unsafe fn vec_min(a: T, b: U) -> >::Result -where - T: sealed::VectorMin, -{ - a.vec_min(b) -} + #[inline] + #[target_feature(enable = "vector")] + unsafe fn vec_xst(self, offset: isize, ptr: *mut Self::ElementType) { + ptr.byte_offset(offset).cast::().write_unaligned(self) + } -/// Vector abs. -#[inline] -#[target_feature(enable = "vector")] -#[unstable(feature = "stdarch_s390x", issue = "135681")] -pub unsafe fn vec_abs(a: T) -> T -where - T: sealed::VectorAbs, -{ - a.vec_abs() -} + unsafe fn vec_store_len(self, ptr: *mut Self::ElementType, byte_count: u32); + } -/// Vector negative abs. -#[inline] -#[target_feature(enable = "vector")] -#[unstable(feature = "stdarch_s390x", issue = "135681")] -pub unsafe fn vec_nabs(a: T) -> T -where - T: sealed::VectorNabs, -{ - a.vec_nabs() -} + macro_rules! impl_load_store { + ($($ty:ident)*) => { + $( + #[unstable(feature = "stdarch_s390x", issue = "135681")] + impl VectorLoad for t_t_l!($ty) { + type ElementType = $ty; -/// Vector square root. -#[inline] -#[target_feature(enable = "vector")] -#[unstable(feature = "stdarch_s390x", issue = "135681")] -pub unsafe fn vec_sqrt(a: T) -> T -where - T: sealed::VectorSqrt, -{ - a.vec_sqrt() -} + #[inline] + #[target_feature(enable = "vector")] + unsafe fn vec_load_len(ptr: *const Self::ElementType, byte_count: u32) -> Self { + transmute(vll( byte_count, ptr.cast(),)) + } -/// Vector Splat -#[inline] + #[inline] + #[target_feature(enable = "vector")] + unsafe fn vec_load_bndry(ptr: *const Self::ElementType) -> MaybeUninit { + transmute(vlbb(ptr.cast(), const { validate_block_boundary(BLOCK_BOUNDARY) })) + } + + } + + #[unstable(feature = "stdarch_s390x", issue = "135681")] + impl VectorStore for t_t_l!($ty) { + type ElementType = $ty; + + #[inline] + #[target_feature(enable = "vector")] + unsafe fn vec_store_len(self, ptr: *mut Self::ElementType, byte_count: u32) { + vstl(transmute(self), byte_count, ptr.cast()) + } + } + )* + } + } + + impl_load_store! { i8 u8 i16 u16 i32 u32 i64 u64 f32 f64 } + + #[inline] + #[target_feature(enable = "vector")] + #[cfg_attr(test, assert_instr(vll))] + unsafe fn test_vec_load_len(ptr: *const i32, byte_count: u32) -> vector_signed_int { + vector_signed_int::vec_load_len(ptr, byte_count) + } + + #[inline] + #[target_feature(enable = "vector")] + #[cfg_attr(test, assert_instr("vlbb"))] + unsafe fn test_vec_load_bndry(ptr: *const i32) -> MaybeUninit { + vector_signed_int::vec_load_bndry::<512>(ptr) + } + + #[inline] + #[target_feature(enable = "vector")] + #[cfg_attr(test, assert_instr(vst))] + unsafe fn test_vec_store_len(vector: vector_signed_int, ptr: *mut i32, byte_count: u32) { + vector.vec_store_len(ptr, byte_count) + } + + #[unstable(feature = "stdarch_s390x", issue = "135681")] + pub trait VectorLoadPair: Sized { + type ElementType; + + unsafe fn vec_load_pair(a: Self::ElementType, b: Self::ElementType) -> Self; + } + + #[unstable(feature = "stdarch_s390x", issue = "135681")] + impl VectorLoadPair for vector_signed_long_long { + type ElementType = i64; + + #[inline] + #[target_feature(enable = "vector")] + unsafe fn vec_load_pair(a: i64, b: i64) -> Self { + vector_signed_long_long([a, b]) + } + } + + #[unstable(feature = "stdarch_s390x", issue = "135681")] + impl VectorLoadPair for vector_unsigned_long_long { + type ElementType = u64; + + #[inline] + #[target_feature(enable = "vector")] + unsafe fn vec_load_pair(a: u64, b: u64) -> Self { + vector_unsigned_long_long([a, b]) + } + } + + #[inline] + #[target_feature(enable = "vector")] + unsafe fn pack(a: T, b: T) -> T { + simd_shuffle(a, b, const { ShuffleMask::::pack() }) + } + + #[inline] + #[target_feature(enable = "vector")] + #[cfg_attr(test, assert_instr(vpkh))] + unsafe fn vpkh(a: i16x8, b: i16x8) -> i8x16 { + let a: i8x16 = transmute(a); + let b: i8x16 = transmute(b); + simd_shuffle(a, b, const { ShuffleMask::<16>::pack() }) + } + #[inline] + #[target_feature(enable = "vector")] + #[cfg_attr(test, assert_instr(vpkf))] + unsafe fn vpkf(a: i32x4, b: i32x4) -> i16x8 { + let a: i16x8 = transmute(a); + let b: i16x8 = transmute(b); + simd_shuffle(a, b, const { ShuffleMask::<8>::pack() }) + } + #[inline] + #[target_feature(enable = "vector")] + #[cfg_attr(test, assert_instr(vpkg))] + unsafe fn vpkg(a: i64x2, b: i64x2) -> i32x4 { + let a: i32x4 = transmute(a); + let b: i32x4 = transmute(b); + simd_shuffle(a, b, const { ShuffleMask::<4>::pack() }) + } + + #[unstable(feature = "stdarch_s390x", issue = "135681")] + pub trait VectorPack { + type Result; + unsafe fn vec_pack(self, b: Other) -> Self::Result; + } + + impl_vec_trait! { [VectorPack vec_pack]+ vpkh (vector_signed_short, vector_signed_short) -> vector_signed_char } + impl_vec_trait! { [VectorPack vec_pack]+ vpkh (vector_unsigned_short, vector_unsigned_short) -> vector_unsigned_char } + impl_vec_trait! { [VectorPack vec_pack]+ vpkh (vector_bool_short, vector_bool_short) -> vector_bool_char } + impl_vec_trait! { [VectorPack vec_pack]+ vpkf (vector_signed_int, vector_signed_int) -> vector_signed_short } + impl_vec_trait! { [VectorPack vec_pack]+ vpkf (vector_unsigned_int, vector_unsigned_int) -> vector_unsigned_short } + impl_vec_trait! { [VectorPack vec_pack]+ vpkf (vector_bool_int, vector_bool_int) -> vector_bool_short } + impl_vec_trait! { [VectorPack vec_pack]+ vpkg (vector_signed_long_long, vector_signed_long_long) -> vector_signed_int } + impl_vec_trait! { [VectorPack vec_pack]+ vpkg (vector_unsigned_long_long, vector_unsigned_long_long) -> vector_unsigned_int } + impl_vec_trait! { [VectorPack vec_pack]+ vpkg (vector_bool_long_long, vector_bool_long_long) -> vector_bool_int } + + #[unstable(feature = "stdarch_powerpc", issue = "111145")] + pub trait VectorPacks { + type Result; + unsafe fn vec_packs(self, b: Other) -> Self::Result; + } + + impl_vec_trait! { [VectorPacks vec_packs] vpksh (vector_signed_short, vector_signed_short) -> vector_signed_char } + impl_vec_trait! { [VectorPacks vec_packs] vpklsh (vector_unsigned_short, vector_unsigned_short) -> vector_unsigned_char } + impl_vec_trait! { [VectorPacks vec_packs] vpksf (vector_signed_int, vector_signed_int) -> vector_signed_short } + impl_vec_trait! { [VectorPacks vec_packs] vpklsf (vector_unsigned_int, vector_unsigned_int) -> vector_unsigned_short } + impl_vec_trait! { [VectorPacks vec_packs] vpksg (vector_signed_long_long, vector_signed_long_long) -> vector_signed_int } + impl_vec_trait! { [VectorPacks vec_packs] vpklsg (vector_unsigned_long_long, vector_unsigned_long_long) -> vector_unsigned_int } + + #[unstable(feature = "stdarch_powerpc", issue = "111145")] + pub trait VectorPacksu { + type Result; + unsafe fn vec_packsu(self, b: Other) -> Self::Result; + } + + unsafe fn simd_smax(a: T, b: T) -> T { + simd_select::(simd_gt::(a, b), a, b) + } + + #[inline] + #[target_feature(enable = "vector")] + #[cfg_attr(test, assert_instr(vpklsh))] + unsafe fn vpacksuh(a: vector_signed_short, b: vector_signed_short) -> vector_unsigned_char { + vpklsh( + simd_smax(a, vector_signed_short([0; 8])), + simd_smax(b, vector_signed_short([0; 8])), + ) + } + #[inline] + #[target_feature(enable = "vector")] + #[cfg_attr(test, assert_instr(vpklsf))] + unsafe fn vpacksuf(a: vector_signed_int, b: vector_signed_int) -> vector_unsigned_short { + vpklsf( + simd_smax(a, vector_signed_int([0; 4])), + simd_smax(b, vector_signed_int([0; 4])), + ) + } + #[inline] + #[target_feature(enable = "vector")] + #[cfg_attr(test, assert_instr(vpklsg))] + unsafe fn vpacksug( + a: vector_signed_long_long, + b: vector_signed_long_long, + ) -> vector_unsigned_int { + vpklsg( + simd_smax(a, vector_signed_long_long([0; 2])), + simd_smax(b, vector_signed_long_long([0; 2])), + ) + } + + impl_vec_trait! { [VectorPacksu vec_packsu] vpacksuh (vector_signed_short, vector_signed_short) -> vector_unsigned_char } + impl_vec_trait! { [VectorPacksu vec_packsu] vpklsh (vector_unsigned_short, vector_unsigned_short) -> vector_unsigned_char } + impl_vec_trait! { [VectorPacksu vec_packsu] vpacksuf (vector_signed_int, vector_signed_int) -> vector_unsigned_short } + impl_vec_trait! { [VectorPacksu vec_packsu] vpklsf (vector_unsigned_int, vector_unsigned_int) -> vector_unsigned_short } + impl_vec_trait! { [VectorPacksu vec_packsu] vpacksug (vector_signed_long_long, vector_signed_long_long) -> vector_unsigned_int } + impl_vec_trait! { [VectorPacksu vec_packsu] vpklsg (vector_unsigned_long_long, vector_unsigned_long_long) -> vector_unsigned_int } + + macro_rules! impl_vector_packs_cc { + ($($intr:ident $ty:ident $outty:ident)*) => { + $( + #[inline] + #[target_feature(enable = "vector")] + #[cfg_attr(test, assert_instr($intr))] + unsafe fn $intr( + a: $ty, + b: $ty, + ) -> ($outty, i32) { + let PackedTuple { x, y } = super::$intr(a, b); + (x, y) + } + + #[unstable(feature = "stdarch_s390x", issue = "135681")] + impl VectorPacksCC for $ty { + type Result = $outty; + + #[inline] + #[target_feature(enable = "vector")] + unsafe fn vec_packs_cc(self, b: Self) -> (Self::Result, i32) { + $intr(self, b) + } + } + )* + } + } + + #[unstable(feature = "stdarch_powerpc", issue = "111145")] + pub trait VectorPacksCC { + type Result; + unsafe fn vec_packs_cc(self, b: Self) -> (Self::Result, i32); + } + + impl_vector_packs_cc! { + vpkshs vector_signed_short vector_signed_char + vpklshs vector_unsigned_short vector_unsigned_char + vpksfs vector_signed_int vector_signed_short + vpklsfs vector_unsigned_int vector_unsigned_short + vpksgs vector_signed_long_long vector_signed_int + vpklsgs vector_unsigned_long_long vector_unsigned_int + } + + macro_rules! impl_vector_packsu_cc { + ($($intr:ident $ty:ident $outty:ident)*) => { + $( + #[unstable(feature = "stdarch_s390x", issue = "135681")] + impl VectorPacksuCC for $ty { + type Result = $outty; + + #[inline] + #[target_feature(enable = "vector")] + unsafe fn vec_packsu_cc(self, b: Self) -> (Self::Result, i32) { + $intr(self, b) + } + } + )* + } + } + + #[unstable(feature = "stdarch_powerpc", issue = "111145")] + pub trait VectorPacksuCC { + type Result; + unsafe fn vec_packsu_cc(self, b: Self) -> (Self::Result, i32); + } + + impl_vector_packsu_cc! { + vpklshs vector_unsigned_short vector_unsigned_char + vpklsfs vector_unsigned_int vector_unsigned_short + vpklsgs vector_unsigned_long_long vector_unsigned_int + } + + #[unstable(feature = "stdarch_powerpc", issue = "111145")] + pub trait VectorMadd { + unsafe fn vec_madd(self, b: Self, c: Self) -> Self; + unsafe fn vec_msub(self, b: Self, c: Self) -> Self; + } + + test_impl! { vfmasb (a: vector_float, b: vector_float, c: vector_float) -> vector_float [simd_fma, "vector-enhancements-1" vfmasb] } + test_impl! { vfmadb (a: vector_double, b: vector_double, c: vector_double) -> vector_double [simd_fma, vfmadb] } + + #[inline] + unsafe fn simd_fms(a: T, b: T, c: T) -> T { + simd_fma(a, b, simd_neg(c)) + } + + test_impl! { vfmssb (a: vector_float, b: vector_float, c: vector_float) -> vector_float [simd_fms, "vector-enhancements-1" vfmssb] } + test_impl! { vfmsdb (a: vector_double, b: vector_double, c: vector_double) -> vector_double [simd_fms, vfmsdb] } + + #[unstable(feature = "stdarch_s390x", issue = "135681")] + impl VectorMadd for vector_float { + #[inline] + #[target_feature(enable = "vector")] + unsafe fn vec_madd(self, b: Self, c: Self) -> Self { + vfmasb(self, b, c) + } + + #[inline] + #[target_feature(enable = "vector")] + unsafe fn vec_msub(self, b: Self, c: Self) -> Self { + vfmssb(self, b, c) + } + } + + #[unstable(feature = "stdarch_s390x", issue = "135681")] + impl VectorMadd for vector_double { + #[inline] + #[target_feature(enable = "vector")] + unsafe fn vec_madd(self, b: Self, c: Self) -> Self { + vfmadb(self, b, c) + } + + #[inline] + #[target_feature(enable = "vector")] + unsafe fn vec_msub(self, b: Self, c: Self) -> Self { + vfmsdb(self, b, c) + } + } + + macro_rules! impl_vec_unpack { + ($mask:ident $instr:ident $src:ident $shuffled:ident $dst:ident $width:literal) => { + #[inline] + #[target_feature(enable = "vector")] + #[cfg_attr(test, assert_instr($instr))] + unsafe fn $instr(a: $src) -> $dst { + simd_as(simd_shuffle::<_, _, $shuffled>( + a, + a, + const { ShuffleMask::<$width>::$mask() }, + )) + } + }; + } + + #[unstable(feature = "stdarch_s390x", issue = "135681")] + pub trait VectorUnpackh { + type Result; + unsafe fn vec_unpackh(self) -> Self::Result; + } + + impl_vec_unpack!(unpack_high vuphb vector_signed_char i8x8 vector_signed_short 8); + impl_vec_unpack!(unpack_high vuphh vector_signed_short i16x4 vector_signed_int 4); + impl_vec_unpack!(unpack_high vuphf vector_signed_int i32x2 vector_signed_long_long 2); + + impl_vec_unpack!(unpack_high vuplhb vector_unsigned_char u8x8 vector_unsigned_short 8); + impl_vec_unpack!(unpack_high vuplhh vector_unsigned_short u16x4 vector_unsigned_int 4); + impl_vec_unpack!(unpack_high vuplhf vector_unsigned_int u32x2 vector_unsigned_long_long 2); + + impl_vec_trait! {[VectorUnpackh vec_unpackh] vuphb (vector_signed_char) -> vector_signed_short} + impl_vec_trait! {[VectorUnpackh vec_unpackh] vuphh (vector_signed_short) -> vector_signed_int} + impl_vec_trait! {[VectorUnpackh vec_unpackh] vuphf (vector_signed_int) -> vector_signed_long_long} + + impl_vec_trait! {[VectorUnpackh vec_unpackh] vuplhb (vector_unsigned_char) -> vector_unsigned_short} + impl_vec_trait! {[VectorUnpackh vec_unpackh] vuplhh (vector_unsigned_short) -> vector_unsigned_int} + impl_vec_trait! {[VectorUnpackh vec_unpackh] vuplhf (vector_unsigned_int) -> vector_unsigned_long_long} + + impl_vec_trait! {[VectorUnpackh vec_unpackh]+ vuplhb (vector_bool_char) -> vector_bool_short} + impl_vec_trait! {[VectorUnpackh vec_unpackh]+ vuplhh (vector_bool_short) -> vector_bool_int} + impl_vec_trait! {[VectorUnpackh vec_unpackh]+ vuplhf (vector_bool_int) -> vector_bool_long_long} + + #[unstable(feature = "stdarch_s390x", issue = "135681")] + pub trait VectorUnpackl { + type Result; + unsafe fn vec_unpackl(self) -> Self::Result; + } + + // FIXME(llvm): a shuffle + simd_as does not currently optimize into a single instruction like + // unpachk above. Tracked in https://github.com/llvm/llvm-project/issues/129576. + + impl_vec_trait! {[VectorUnpackl vec_unpackl] vuplbw (vector_signed_char) -> vector_signed_short} + impl_vec_trait! {[VectorUnpackl vec_unpackl] vuplhw (vector_signed_short) -> vector_signed_int} + impl_vec_trait! {[VectorUnpackl vec_unpackl] vuplfw (vector_signed_int) -> vector_signed_long_long} + + impl_vec_trait! {[VectorUnpackl vec_unpackl] vupllb (vector_unsigned_char) -> vector_unsigned_short} + impl_vec_trait! {[VectorUnpackl vec_unpackl] vupllh (vector_unsigned_short) -> vector_unsigned_int} + impl_vec_trait! {[VectorUnpackl vec_unpackl] vupllf (vector_unsigned_int) -> vector_unsigned_long_long} + + impl_vec_trait! {[VectorUnpackl vec_unpackl]+ vupllb (vector_bool_char) -> vector_bool_short} + impl_vec_trait! {[VectorUnpackl vec_unpackl]+ vupllh (vector_bool_short) -> vector_bool_int} + impl_vec_trait! {[VectorUnpackl vec_unpackl]+ vupllf (vector_bool_int) -> vector_bool_long_long} + + test_impl! { vec_vavgb(a: vector_signed_char, b: vector_signed_char) -> vector_signed_char [ vavgb, vavgb ] } + test_impl! { vec_vavgh(a: vector_signed_short, b: vector_signed_short) -> vector_signed_short [ vavgh, vavgh ] } + test_impl! { vec_vavgf(a: vector_signed_int, b: vector_signed_int) -> vector_signed_int [ vavgf, vavgf ] } + test_impl! { vec_vavgg(a: vector_signed_long_long, b: vector_signed_long_long) -> vector_signed_long_long [ vavgg, vavgg ] } + + test_impl! { vec_vavglb(a: vector_unsigned_char, b: vector_unsigned_char) -> vector_unsigned_char [ vavglb, vavglb ] } + test_impl! { vec_vavglh(a: vector_unsigned_short, b: vector_unsigned_short) -> vector_unsigned_short [ vavglh, vavglh ] } + test_impl! { vec_vavglf(a: vector_unsigned_int, b: vector_unsigned_int) -> vector_unsigned_int [ vavglf, vavglf ] } + test_impl! { vec_vavglg(a: vector_unsigned_long_long, b: vector_unsigned_long_long) -> vector_unsigned_long_long [ vavglg, vavglg ] } + + #[unstable(feature = "stdarch_s390x", issue = "135681")] + pub trait VectorAvg { + type Result; + unsafe fn vec_avg(self, b: Other) -> Self::Result; + } + + impl_vec_trait! { [VectorAvg vec_avg] 2 (vec_vavglb, vec_vavgb, vec_vavglh, vec_vavgh, vec_vavglf, vec_vavgf, vec_vavglg, vec_vavgg) } + + macro_rules! impl_mul { + ([$Trait:ident $m:ident] $fun:ident ($a:ty, $b:ty) -> $r:ty) => { + #[unstable(feature = "stdarch_s390x", issue = "135681")] + impl $Trait<$r> for $a { + #[inline] + #[target_feature(enable = "vector")] + unsafe fn $m(self, b: $b) -> $r { + $fun(transmute(self), transmute(b)) + } + } + }; + ([$Trait:ident $m:ident] $fun:ident ($a:ty, $b:ty, $c:ty) -> $r:ty) => { + #[unstable(feature = "stdarch_s390x", issue = "135681")] + impl $Trait for $a { + type Result = $r; + #[inline] + #[target_feature(enable = "vector")] + unsafe fn $m(self, b: $b, c: $c) -> $r { + $fun(self, b, c) + } + } + }; + } + + #[unstable(feature = "stdarch_s390x", issue = "135681")] + pub trait VectorMule { + unsafe fn vec_mule(self, b: Self) -> Result; + } + + // FIXME(llvm) sadly this does not yet work https://github.com/llvm/llvm-project/issues/129705 + // #[target_feature(enable = "vector")] + // #[cfg_attr(test, assert_instr(vmleh))] + // unsafe fn vec_vmleh(a: vector_unsigned_short, b: vector_unsigned_short) -> vector_unsigned_int { + // let even_a: vector_unsigned_int = simd_as(simd_shuffle::<_, _, u16x4>( + // a, + // a, + // const { ShuffleMask([0, 2, 4, 6]) }, + // )); + // + // let even_b: vector_unsigned_int = simd_as(simd_shuffle::<_, _, u16x4>( + // b, + // b, + // const { ShuffleMask([0, 2, 4, 6]) }, + // )); + // + // simd_mul(even_a, even_b) + // } + + test_impl! { vec_vmeb(a: vector_signed_char, b: vector_signed_char) -> vector_signed_short [ vmeb, vmeb ] } + test_impl! { vec_vmeh(a: vector_signed_short, b: vector_signed_short) -> vector_signed_int[ vmeh, vmeh ] } + test_impl! { vec_vmef(a: vector_signed_int, b: vector_signed_int) -> vector_signed_long_long [ vmef, vmef ] } + + test_impl! { vec_vmleb(a: vector_unsigned_char, b: vector_unsigned_char) -> vector_unsigned_short [ vmleb, vmleb ] } + test_impl! { vec_vmleh(a: vector_unsigned_short, b: vector_unsigned_short) -> vector_unsigned_int[ vmleh, vmleh ] } + test_impl! { vec_vmlef(a: vector_unsigned_int, b: vector_unsigned_int) -> vector_unsigned_long_long [ vmlef, vmlef ] } + + impl_mul!([VectorMule vec_mule] vec_vmeb (vector_signed_char, vector_signed_char) -> vector_signed_short ); + impl_mul!([VectorMule vec_mule] vec_vmeh (vector_signed_short, vector_signed_short) -> vector_signed_int); + impl_mul!([VectorMule vec_mule] vec_vmef (vector_signed_int, vector_signed_int) -> vector_signed_long_long ); + + impl_mul!([VectorMule vec_mule] vec_vmleb (vector_unsigned_char, vector_unsigned_char) -> vector_unsigned_short ); + impl_mul!([VectorMule vec_mule] vec_vmleh (vector_unsigned_short, vector_unsigned_short) -> vector_unsigned_int); + impl_mul!([VectorMule vec_mule] vec_vmlef (vector_unsigned_int, vector_unsigned_int) -> vector_unsigned_long_long ); + + #[unstable(feature = "stdarch_s390x", issue = "135681")] + pub trait VectorMulo { + unsafe fn vec_mulo(self, b: Self) -> Result; + } + + test_impl! { vec_vmob(a: vector_signed_char, b: vector_signed_char) -> vector_signed_short [ vmob, vmob ] } + test_impl! { vec_vmoh(a: vector_signed_short, b: vector_signed_short) -> vector_signed_int[ vmoh, vmoh ] } + test_impl! { vec_vmof(a: vector_signed_int, b: vector_signed_int) -> vector_signed_long_long [ vmof, vmof ] } + + test_impl! { vec_vmlob(a: vector_unsigned_char, b: vector_unsigned_char) -> vector_unsigned_short [ vmlob, vmlob ] } + test_impl! { vec_vmloh(a: vector_unsigned_short, b: vector_unsigned_short) -> vector_unsigned_int[ vmloh, vmloh ] } + test_impl! { vec_vmlof(a: vector_unsigned_int, b: vector_unsigned_int) -> vector_unsigned_long_long [ vmlof, vmlof ] } + + impl_mul!([VectorMulo vec_mulo] vec_vmob (vector_signed_char, vector_signed_char) -> vector_signed_short ); + impl_mul!([VectorMulo vec_mulo] vec_vmoh (vector_signed_short, vector_signed_short) -> vector_signed_int); + impl_mul!([VectorMulo vec_mulo] vec_vmof (vector_signed_int, vector_signed_int) -> vector_signed_long_long ); + + impl_mul!([VectorMulo vec_mulo] vec_vmlob (vector_unsigned_char, vector_unsigned_char) -> vector_unsigned_short ); + impl_mul!([VectorMulo vec_mulo] vec_vmloh (vector_unsigned_short, vector_unsigned_short) -> vector_unsigned_int); + impl_mul!([VectorMulo vec_mulo] vec_vmlof (vector_unsigned_int, vector_unsigned_int) -> vector_unsigned_long_long ); + + #[unstable(feature = "stdarch_s390x", issue = "135681")] + pub trait VectorMulh { + unsafe fn vec_mulh(self, b: Self) -> Result; + } + + test_impl! { vec_vmhb(a: vector_signed_char, b: vector_signed_char) -> vector_signed_char [ vmhb, vmhb ] } + test_impl! { vec_vmhh(a: vector_signed_short, b: vector_signed_short) -> vector_signed_short [ vmhh, vmhh ] } + test_impl! { vec_vmhf(a: vector_signed_int, b: vector_signed_int) -> vector_signed_int [ vmhf, vmhf ] } + + test_impl! { vec_vmlhb(a: vector_unsigned_char, b: vector_unsigned_char) -> vector_unsigned_char [ vmlhb, vmlhb ] } + test_impl! { vec_vmlhh(a: vector_unsigned_short, b: vector_unsigned_short) -> vector_unsigned_short [ vmlhh, vmlhh ] } + test_impl! { vec_vmlhf(a: vector_unsigned_int, b: vector_unsigned_int) -> vector_unsigned_int [ vmlhf, vmlhf ] } + + impl_mul!([VectorMulh vec_mulh] vec_vmhb (vector_signed_char, vector_signed_char) -> vector_signed_char); + impl_mul!([VectorMulh vec_mulh] vec_vmhh (vector_signed_short, vector_signed_short) -> vector_signed_short); + impl_mul!([VectorMulh vec_mulh] vec_vmhf (vector_signed_int, vector_signed_int) -> vector_signed_int); + + impl_mul!([VectorMulh vec_mulh] vec_vmlhb (vector_unsigned_char, vector_unsigned_char) -> vector_unsigned_char); + impl_mul!([VectorMulh vec_mulh] vec_vmlhh (vector_unsigned_short, vector_unsigned_short) -> vector_unsigned_short); + impl_mul!([VectorMulh vec_mulh] vec_vmlhf (vector_unsigned_int, vector_unsigned_int) -> vector_unsigned_int); + + #[unstable(feature = "stdarch_s390x", issue = "135681")] + pub trait VectorMeadd { + type Result; + unsafe fn vec_meadd(self, b: Self, c: Self::Result) -> Self::Result; + } + + test_impl! { vec_vmaeb(a: vector_signed_char, b: vector_signed_char, c: vector_signed_short) -> vector_signed_short [ vmaeb, vmaeb ] } + test_impl! { vec_vmaeh(a: vector_signed_short, b: vector_signed_short, c: vector_signed_int) -> vector_signed_int[ vmaeh, vmaeh ] } + test_impl! { vec_vmaef(a: vector_signed_int, b: vector_signed_int, c: vector_signed_long_long) -> vector_signed_long_long [ vmaef, vmaef ] } + + test_impl! { vec_vmaleb(a: vector_unsigned_char, b: vector_unsigned_char, c: vector_unsigned_short) -> vector_unsigned_short [ vmaleb, vmaleb ] } + test_impl! { vec_vmaleh(a: vector_unsigned_short, b: vector_unsigned_short, c: vector_unsigned_int) -> vector_unsigned_int[ vmaleh, vmaleh ] } + test_impl! { vec_vmalef(a: vector_unsigned_int, b: vector_unsigned_int, c: vector_unsigned_long_long) -> vector_unsigned_long_long [ vmalef, vmalef ] } + + impl_mul!([VectorMeadd vec_meadd] vec_vmaeb (vector_signed_char, vector_signed_char, vector_signed_short) -> vector_signed_short ); + impl_mul!([VectorMeadd vec_meadd] vec_vmaeh (vector_signed_short, vector_signed_short, vector_signed_int) -> vector_signed_int); + impl_mul!([VectorMeadd vec_meadd] vec_vmaef (vector_signed_int, vector_signed_int, vector_signed_long_long) -> vector_signed_long_long ); + + impl_mul!([VectorMeadd vec_meadd] vec_vmaleb (vector_unsigned_char, vector_unsigned_char, vector_unsigned_short) -> vector_unsigned_short ); + impl_mul!([VectorMeadd vec_meadd] vec_vmaleh (vector_unsigned_short, vector_unsigned_short, vector_unsigned_int) -> vector_unsigned_int); + impl_mul!([VectorMeadd vec_meadd] vec_vmalef (vector_unsigned_int, vector_unsigned_int, vector_unsigned_long_long) -> vector_unsigned_long_long ); + + #[unstable(feature = "stdarch_s390x", issue = "135681")] + pub trait VectorMoadd { + type Result; + unsafe fn vec_moadd(self, b: Self, c: Self::Result) -> Self::Result; + } + + test_impl! { vec_vmaob(a: vector_signed_char, b: vector_signed_char, c: vector_signed_short) -> vector_signed_short [ vmaob, vmaob ] } + test_impl! { vec_vmaoh(a: vector_signed_short, b: vector_signed_short, c: vector_signed_int) -> vector_signed_int[ vmaoh, vmaoh ] } + test_impl! { vec_vmaof(a: vector_signed_int, b: vector_signed_int, c: vector_signed_long_long) -> vector_signed_long_long [ vmaof, vmaof ] } + + test_impl! { vec_vmalob(a: vector_unsigned_char, b: vector_unsigned_char, c: vector_unsigned_short) -> vector_unsigned_short [ vmalob, vmalob ] } + test_impl! { vec_vmaloh(a: vector_unsigned_short, b: vector_unsigned_short, c: vector_unsigned_int) -> vector_unsigned_int[ vmaloh, vmaloh ] } + test_impl! { vec_vmalof(a: vector_unsigned_int, b: vector_unsigned_int, c: vector_unsigned_long_long) -> vector_unsigned_long_long [ vmalof, vmalof ] } + + impl_mul!([VectorMoadd vec_moadd] vec_vmaob (vector_signed_char, vector_signed_char, vector_signed_short) -> vector_signed_short ); + impl_mul!([VectorMoadd vec_moadd] vec_vmaoh (vector_signed_short, vector_signed_short, vector_signed_int) -> vector_signed_int); + impl_mul!([VectorMoadd vec_moadd] vec_vmaof (vector_signed_int, vector_signed_int, vector_signed_long_long) -> vector_signed_long_long ); + + impl_mul!([VectorMoadd vec_moadd] vec_vmalob (vector_unsigned_char, vector_unsigned_char, vector_unsigned_short) -> vector_unsigned_short ); + impl_mul!([VectorMoadd vec_moadd] vec_vmaloh (vector_unsigned_short, vector_unsigned_short, vector_unsigned_int) -> vector_unsigned_int); + impl_mul!([VectorMoadd vec_moadd] vec_vmalof (vector_unsigned_int, vector_unsigned_int, vector_unsigned_long_long) -> vector_unsigned_long_long ); + + #[unstable(feature = "stdarch_s390x", issue = "135681")] + pub trait VectorMhadd { + type Result; + unsafe fn vec_mhadd(self, b: Self, c: Self::Result) -> Self::Result; + } + + test_impl! { vec_vmahb(a: vector_signed_char, b: vector_signed_char, c: vector_signed_char) -> vector_signed_char [ vmahb, vmahb ] } + test_impl! { vec_vmahh(a: vector_signed_short, b: vector_signed_short, c: vector_signed_short) -> vector_signed_short[ vmahh, vmahh ] } + test_impl! { vec_vmahf(a: vector_signed_int, b: vector_signed_int, c: vector_signed_int) -> vector_signed_int [ vmahf, vmahf ] } + + test_impl! { vec_vmalhb(a: vector_unsigned_char, b: vector_unsigned_char, c: vector_unsigned_char) -> vector_unsigned_char [ vmalhb, vmalhb ] } + test_impl! { vec_vmalhh(a: vector_unsigned_short, b: vector_unsigned_short, c: vector_unsigned_short) -> vector_unsigned_short[ vmalhh, vmalhh ] } + test_impl! { vec_vmalhf(a: vector_unsigned_int, b: vector_unsigned_int, c: vector_unsigned_int) -> vector_unsigned_int [ vmalhf, vmalhf ] } + + impl_mul!([VectorMhadd vec_mhadd] vec_vmahb (vector_signed_char, vector_signed_char, vector_signed_char) -> vector_signed_char ); + impl_mul!([VectorMhadd vec_mhadd] vec_vmahh (vector_signed_short, vector_signed_short, vector_signed_short) -> vector_signed_short); + impl_mul!([VectorMhadd vec_mhadd] vec_vmahf (vector_signed_int, vector_signed_int, vector_signed_int) -> vector_signed_int ); + + impl_mul!([VectorMhadd vec_mhadd] vec_vmalhb (vector_unsigned_char, vector_unsigned_char, vector_unsigned_char) -> vector_unsigned_char ); + impl_mul!([VectorMhadd vec_mhadd] vec_vmalhh (vector_unsigned_short, vector_unsigned_short, vector_unsigned_short) -> vector_unsigned_short); + impl_mul!([VectorMhadd vec_mhadd] vec_vmalhf (vector_unsigned_int, vector_unsigned_int, vector_unsigned_int) -> vector_unsigned_int ); + + #[unstable(feature = "stdarch_s390x", issue = "135681")] + pub trait VectorMladd { + type Result; + unsafe fn vec_mladd(self, b: Self, c: Self::Result) -> Self::Result; + } + + #[inline] + #[target_feature(enable = "vector")] + unsafe fn simd_mladd(a: T, b: T, c: T) -> T { + simd_add(simd_mul(a, b), c) + } + + test_impl! { vec_vmal_ib(a: vector_signed_char, b: vector_signed_char, c: vector_signed_char) -> vector_signed_char [simd_mladd, vmalb ] } + test_impl! { vec_vmal_ih(a: vector_signed_short, b: vector_signed_short, c: vector_signed_short) -> vector_signed_short[simd_mladd, vmalh ] } + test_impl! { vec_vmal_if(a: vector_signed_int, b: vector_signed_int, c: vector_signed_int) -> vector_signed_int [simd_mladd, vmalf ] } + + test_impl! { vec_vmal_ub(a: vector_unsigned_char, b: vector_unsigned_char, c: vector_unsigned_char) -> vector_unsigned_char [simd_mladd, vmalb ] } + test_impl! { vec_vmal_uh(a: vector_unsigned_short, b: vector_unsigned_short, c: vector_unsigned_short) -> vector_unsigned_short[simd_mladd, vmalh ] } + test_impl! { vec_vmal_uf(a: vector_unsigned_int, b: vector_unsigned_int, c: vector_unsigned_int) -> vector_unsigned_int [simd_mladd, vmalf ] } + + impl_mul!([VectorMladd vec_mladd] vec_vmal_ib (vector_signed_char, vector_signed_char, vector_signed_char) -> vector_signed_char ); + impl_mul!([VectorMladd vec_mladd] vec_vmal_ih (vector_signed_short, vector_signed_short, vector_signed_short) -> vector_signed_short); + impl_mul!([VectorMladd vec_mladd] vec_vmal_if (vector_signed_int, vector_signed_int, vector_signed_int) -> vector_signed_int ); + + impl_mul!([VectorMladd vec_mladd] vec_vmal_ub (vector_unsigned_char, vector_unsigned_char, vector_unsigned_char) -> vector_unsigned_char ); + impl_mul!([VectorMladd vec_mladd] vec_vmal_uh (vector_unsigned_short, vector_unsigned_short, vector_unsigned_short) -> vector_unsigned_short); + impl_mul!([VectorMladd vec_mladd] vec_vmal_uf (vector_unsigned_int, vector_unsigned_int, vector_unsigned_int) -> vector_unsigned_int ); + + #[unstable(feature = "stdarch_s390x", issue = "135681")] + pub trait VectorGfmsum { + unsafe fn vec_gfmsum(self, b: Self) -> Result; + } + + test_impl! { vec_vgfmb(a: vector_unsigned_char, b: vector_unsigned_char) -> vector_unsigned_short [ vgfmb, vgfmb ] } + test_impl! { vec_vgfmh(a: vector_unsigned_short, b: vector_unsigned_short) -> vector_unsigned_int[ vgfmh, vgfmh] } + test_impl! { vec_vgfmf(a: vector_unsigned_int, b: vector_unsigned_int) -> vector_unsigned_long_long [ vgfmf, vgfmf ] } + + impl_mul!([VectorGfmsum vec_gfmsum] vec_vgfmb (vector_unsigned_char, vector_unsigned_char) -> vector_unsigned_short ); + impl_mul!([VectorGfmsum vec_gfmsum] vec_vgfmh (vector_unsigned_short, vector_unsigned_short) -> vector_unsigned_int); + impl_mul!([VectorGfmsum vec_gfmsum] vec_vgfmf (vector_unsigned_int, vector_unsigned_int) -> vector_unsigned_long_long ); + + #[unstable(feature = "stdarch_s390x", issue = "135681")] + pub trait VectorGfmsumAccum { + type Result; + unsafe fn vec_gfmsum_accum(self, b: Self, c: Self::Result) -> Self::Result; + } + + test_impl! { vec_vgfmab(a: vector_unsigned_char, b: vector_unsigned_char, c: vector_unsigned_short) -> vector_unsigned_short [ vgfmab, vgfmab ] } + test_impl! { vec_vgfmah(a: vector_unsigned_short, b: vector_unsigned_short, c: vector_unsigned_int) -> vector_unsigned_int[ vgfmah, vgfmah] } + test_impl! { vec_vgfmaf(a: vector_unsigned_int, b: vector_unsigned_int, c: vector_unsigned_long_long) -> vector_unsigned_long_long [ vgfmaf, vgfmaf ] } + + #[unstable(feature = "stdarch_s390x", issue = "135681")] + impl VectorGfmsumAccum for vector_unsigned_char { + type Result = vector_unsigned_short; + #[inline] + #[target_feature(enable = "vector")] + unsafe fn vec_gfmsum_accum(self, b: Self, c: Self::Result) -> Self::Result { + vec_vgfmab(self, b, c) + } + } + #[unstable(feature = "stdarch_s390x", issue = "135681")] + impl VectorGfmsumAccum for vector_unsigned_short { + type Result = vector_unsigned_int; + #[inline] + #[target_feature(enable = "vector")] + unsafe fn vec_gfmsum_accum(self, b: Self, c: Self::Result) -> Self::Result { + vec_vgfmah(self, b, c) + } + } + #[unstable(feature = "stdarch_s390x", issue = "135681")] + impl VectorGfmsumAccum for vector_unsigned_int { + type Result = vector_unsigned_long_long; + #[inline] + #[target_feature(enable = "vector")] + unsafe fn vec_gfmsum_accum(self, b: Self, c: Self::Result) -> Self::Result { + vec_vgfmaf(self, b, c) + } + } + + #[inline] + #[target_feature(enable = "vector")] + #[cfg_attr(test, assert_instr(vgef, D = 3))] + unsafe fn vgef( + a: vector_unsigned_int, + b: vector_unsigned_int, + c: *const u32, + ) -> vector_unsigned_int { + static_assert_uimm_bits!(D, 2); + let offset: u32 = simd_extract(b, D); + let ptr = c.byte_add(offset as usize); + let value = ptr.read(); + simd_insert(a, D, value) + } + + #[inline] + #[target_feature(enable = "vector")] + #[cfg_attr(test, assert_instr(vgeg, D = 1))] + unsafe fn vgeg( + a: vector_unsigned_long_long, + b: vector_unsigned_long_long, + c: *const u64, + ) -> vector_unsigned_long_long { + static_assert_uimm_bits!(D, 1); + let offset: u64 = simd_extract(b, D); + let ptr = c.byte_add(offset as usize); + let value = ptr.read(); + simd_insert(a, D, value) + } + + #[unstable(feature = "stdarch_s390x", issue = "135681")] + pub trait VectorGatherElement { + type Element; + type Offset; + unsafe fn vec_gather_element( + self, + b: Self::Offset, + c: *const Self::Element, + ) -> Self; + } + + macro_rules! impl_vec_gather_element { + ($($instr:ident $ty:ident)*) => { + $( + #[unstable(feature = "stdarch_s390x", issue = "135681")] + impl VectorGatherElement for $ty { + type Element = l_t_t!($ty); + type Offset = t_u!($ty); + + #[inline] + #[target_feature(enable = "vector")] + unsafe fn vec_gather_element(self, b: Self::Offset, c: *const Self::Element) -> Self { + transmute($instr::(transmute(self), b, c.cast())) + } + } + )* + } + } + + impl_vec_gather_element! { + vgef vector_signed_int + vgef vector_bool_int + vgef vector_unsigned_int + + vgeg vector_signed_long_long + vgeg vector_bool_long_long + vgeg vector_unsigned_long_long + + vgef vector_float + vgeg vector_double + } + + #[inline] + #[target_feature(enable = "vector")] + #[cfg_attr(test, assert_instr(vscef, D = 3))] + unsafe fn vscef(a: vector_unsigned_int, b: vector_unsigned_int, c: *mut u32) { + static_assert_uimm_bits!(D, 2); + let value = simd_extract(a, D); + let offset: u32 = simd_extract(b, D); + let ptr = c.byte_add(offset as usize); + ptr.write(value); + } + + #[inline] + #[target_feature(enable = "vector")] + #[cfg_attr(test, assert_instr(vsceg, D = 1))] + unsafe fn vsceg( + a: vector_unsigned_long_long, + b: vector_unsigned_long_long, + c: *mut u64, + ) { + static_assert_uimm_bits!(D, 1); + let value = simd_extract(a, D); + let offset: u64 = simd_extract(b, D); + let ptr = c.byte_add(offset as usize); + ptr.write(value); + } + + #[unstable(feature = "stdarch_s390x", issue = "135681")] + pub trait VectorScatterElement { + type Element; + type Offset; + unsafe fn vec_scatter_element(self, b: Self::Offset, c: *mut Self::Element); + } + + macro_rules! impl_vec_scatter_element { + ($($instr:ident $ty:ident)*) => { + $( + #[unstable(feature = "stdarch_s390x", issue = "135681")] + impl VectorScatterElement for $ty { + type Element = l_t_t!($ty); + type Offset = t_u!($ty); + + #[inline] + #[target_feature(enable = "vector")] + unsafe fn vec_scatter_element(self, b: Self::Offset, c: *mut Self::Element) { + $instr::(transmute(self), b, c.cast()) + } + } + )* + } + } + + impl_vec_scatter_element! { + vscef vector_signed_int + vscef vector_bool_int + vscef vector_unsigned_int + + vsceg vector_signed_long_long + vsceg vector_bool_long_long + vsceg vector_unsigned_long_long + + vscef vector_float + vsceg vector_double + } + + #[unstable(feature = "stdarch_s390x", issue = "135681")] + pub trait VectorSel: Sized { + unsafe fn vec_sel(self, b: Self, c: Mask) -> Self; + } + + macro_rules! impl_vec_sel { + ($($ty:ident)*) => { + $( + #[unstable(feature = "stdarch_s390x", issue = "135681")] + impl VectorSel for $ty { + #[inline] + #[target_feature(enable = "vector")] + unsafe fn vec_sel(self, b: Self, c: t_u!($ty)) -> Self { + let b = simd_and(b, transmute(c)); + let a = simd_and(self, simd_xor(transmute(c), transmute(vector_signed_char([!0; 16])))); + simd_or(a, b) + } + } + + #[unstable(feature = "stdarch_s390x", issue = "135681")] + impl VectorSel for $ty { + #[inline] + #[target_feature(enable = "vector")] + unsafe fn vec_sel(self, b: Self, c: t_b!($ty)) -> Self { + // defer to the implementation with an unsigned mask + self.vec_sel(b, transmute::<_, t_u!($ty)>(c)) + } + } + )* + } + } + + impl_vec_sel! { + vector_signed_char + vector_signed_short + vector_signed_int + vector_signed_long_long + + vector_unsigned_char + vector_unsigned_short + vector_unsigned_int + vector_unsigned_long_long + + vector_bool_char + vector_bool_short + vector_bool_int + vector_bool_long_long + + vector_float + vector_double + } + + #[unstable(feature = "stdarch_s390x", issue = "135681")] + pub trait VectorFpTestDataClass { + type Result; + unsafe fn vec_fp_test_data_class(self) -> (Self::Result, i32); + } + + #[unstable(feature = "stdarch_s390x", issue = "135681")] + impl VectorFpTestDataClass for vector_float { + type Result = vector_bool_int; + + #[inline] + #[target_feature(enable = "vector")] + unsafe fn vec_fp_test_data_class(self) -> (Self::Result, i32) { + let PackedTuple { x, y } = vftcisb(self, CLASS); + (x, y) + } + } + + #[unstable(feature = "stdarch_s390x", issue = "135681")] + impl VectorFpTestDataClass for vector_double { + type Result = vector_bool_long_long; + + #[inline] + #[target_feature(enable = "vector")] + unsafe fn vec_fp_test_data_class(self) -> (Self::Result, i32) { + let PackedTuple { x, y } = vftcidb(self, CLASS); + (x, y) + } + } + + #[unstable(feature = "stdarch_s390x", issue = "135681")] + pub trait VectorCompare { + unsafe fn vec_all_lt(self, other: Self) -> i32; + unsafe fn vec_all_le(self, other: Self) -> i32; + unsafe fn vec_all_gt(self, other: Self) -> i32; + unsafe fn vec_all_ge(self, other: Self) -> i32; + } + + // NOTE: this implementation is currently non-optimal, but it does work for floats even with + // only `vector` enabled. + // + // - https://github.com/llvm/llvm-project/issues/129434 + // - https://github.com/llvm/llvm-project/issues/130424 + macro_rules! impl_vec_compare { + ($($ty:ident)*) => { + $( + #[unstable(feature = "stdarch_s390x", issue = "135681")] + impl VectorCompare for $ty { + #[inline] + #[target_feature(enable = "vector")] + unsafe fn vec_all_lt(self, other: Self) -> i32 { + simd_reduce_all(simd_lt::<_, t_b!($ty)>(self, other)) as i32 + } + #[inline] + #[target_feature(enable = "vector")] + unsafe fn vec_all_le(self, other: Self) -> i32 { + simd_reduce_all(simd_le::<_, t_b!($ty)>(self, other)) as i32 + } + #[inline] + #[target_feature(enable = "vector")] + unsafe fn vec_all_gt(self, other: Self) -> i32 { + simd_reduce_all(simd_gt::<_, t_b!($ty)>(self, other)) as i32 + } + #[inline] + #[target_feature(enable = "vector")] + unsafe fn vec_all_ge(self, other: Self) -> i32 { + simd_reduce_all(simd_ge::<_, t_b!($ty)>(self, other)) as i32 + } + } + )* + } + } + + impl_vec_compare! { + vector_signed_char + vector_unsigned_char + + vector_signed_short + vector_unsigned_short + + vector_signed_int + vector_unsigned_int + vector_float + + vector_signed_long_long + vector_unsigned_long_long + vector_double + } + + #[unstable(feature = "stdarch_s390x", issue = "135681")] + pub trait VectorTestMask { + type Mask; + unsafe fn vec_test_mask(self, other: Self::Mask) -> i32; + } + + macro_rules! impl_vec_test_mask { + ($($instr:ident $ty:ident)*) => { + $( + #[unstable(feature = "stdarch_s390x", issue = "135681")] + impl VectorTestMask for $ty { + type Mask = t_u!($ty); + + #[inline] + #[target_feature(enable = "vector")] + unsafe fn vec_test_mask(self, other: Self::Mask) -> i32 { + vtm(transmute(self), transmute(other)) + } + } + )* + } + } + + impl_vec_test_mask! { + vector_signed_char + vector_signed_short + vector_signed_int + vector_signed_long_long + + vector_unsigned_char + vector_unsigned_short + vector_unsigned_int + vector_unsigned_long_long + + vector_float + vector_double + } + + #[unstable(feature = "stdarch_s390x", issue = "135681")] + pub trait VectorSearchString { + unsafe fn vec_search_string_cc( + self, + b: Self, + c: vector_unsigned_char, + ) -> (vector_unsigned_char, i32); + + unsafe fn vec_search_string_until_zero_cc( + self, + b: Self, + c: vector_unsigned_char, + ) -> (vector_unsigned_char, i32); + } + + macro_rules! impl_vec_search_string{ + ($($intr_s:ident $intr_sz:ident $ty:ident)*) => { + $( + #[unstable(feature = "stdarch_s390x", issue = "135681")] + impl VectorSearchString for $ty { + #[inline] + #[target_feature(enable = "vector")] + unsafe fn vec_search_string_cc(self, b: Self, c: vector_unsigned_char) -> (vector_unsigned_char, i32) { + let PackedTuple { x,y } = $intr_s(transmute(self), transmute(b), c); + (x, y) + } + + #[inline] + #[target_feature(enable = "vector")] + unsafe fn vec_search_string_until_zero_cc(self, b: Self, c: vector_unsigned_char) -> (vector_unsigned_char, i32) { + let PackedTuple { x,y } = $intr_sz(transmute(self), transmute(b), c); + (x, y) + } + } + + )* + } + } + + impl_vec_search_string! { + vstrsb vstrszb vector_signed_char + vstrsb vstrszb vector_bool_char + vstrsb vstrszb vector_unsigned_char + + vstrsh vstrszh vector_signed_short + vstrsh vstrszh vector_bool_short + vstrsh vstrszh vector_unsigned_short + + vstrsf vstrszf vector_signed_int + vstrsf vstrszf vector_bool_int + vstrsf vstrszf vector_unsigned_int + } + + #[inline] + #[target_feature(enable = "vector")] + #[cfg_attr(test, assert_instr(vcdgb))] + pub unsafe fn vcdgb(a: vector_signed_long_long) -> vector_double { + simd_as(a) + } + + #[inline] + #[target_feature(enable = "vector")] + #[cfg_attr(test, assert_instr(vcdlgb))] + pub unsafe fn vcdlgb(a: vector_unsigned_long_long) -> vector_double { + simd_as(a) + } + + #[unstable(feature = "stdarch_s390x", issue = "135681")] + pub trait VectorDouble { + unsafe fn vec_double(self) -> vector_double; + } + + #[unstable(feature = "stdarch_s390x", issue = "135681")] + impl VectorDouble for vector_signed_long_long { + #[inline] + #[target_feature(enable = "vector")] + unsafe fn vec_double(self) -> vector_double { + vcdgb(self) + } + } + + #[unstable(feature = "stdarch_s390x", issue = "135681")] + impl VectorDouble for vector_unsigned_long_long { + #[inline] + #[target_feature(enable = "vector")] + unsafe fn vec_double(self) -> vector_double { + vcdlgb(self) + } + } + + #[inline] + #[target_feature(enable = "vector")] + #[cfg_attr( + all(test, target_feature = "vector-enhancements-2"), + assert_instr(vcefb) + )] + pub unsafe fn vcefb(a: vector_signed_int) -> vector_float { + simd_as(a) + } + + #[inline] + #[target_feature(enable = "vector")] + #[cfg_attr( + all(test, target_feature = "vector-enhancements-2"), + assert_instr(vcelfb) + )] + pub unsafe fn vcelfb(a: vector_unsigned_int) -> vector_float { + simd_as(a) + } + + #[unstable(feature = "stdarch_s390x", issue = "135681")] + pub trait VectorFloat { + unsafe fn vec_float(self) -> vector_float; + } + + #[unstable(feature = "stdarch_s390x", issue = "135681")] + impl VectorFloat for vector_signed_int { + #[inline] + #[target_feature(enable = "vector")] + unsafe fn vec_float(self) -> vector_float { + vcefb(self) + } + } + + #[unstable(feature = "stdarch_s390x", issue = "135681")] + impl VectorFloat for vector_unsigned_int { + #[inline] + #[target_feature(enable = "vector")] + unsafe fn vec_float(self) -> vector_float { + vcelfb(self) + } + } + + #[unstable(feature = "stdarch_s390x", issue = "135681")] + pub trait VectorExtendSigned64 { + unsafe fn vec_extend_s64(self) -> vector_signed_long_long; + } + + #[inline] + #[target_feature(enable = "vector")] + // FIXME(llvm): https://github.com/llvm/llvm-project/issues/129899 + // #[cfg_attr(test, assert_instr(vsegb))] + pub unsafe fn vsegb(a: vector_signed_char) -> vector_signed_long_long { + simd_as(simd_shuffle::<_, _, i8x2>( + a, + a, + const { u32x2::from_array([7, 15]) }, + )) + } + + #[inline] + #[target_feature(enable = "vector")] + // FIXME(llvm): https://github.com/llvm/llvm-project/issues/129899 + // #[cfg_attr(test, assert_instr(vsegh))] + pub unsafe fn vsegh(a: vector_signed_short) -> vector_signed_long_long { + simd_as(simd_shuffle::<_, _, i16x2>( + a, + a, + const { u32x2::from_array([3, 7]) }, + )) + } + + #[inline] + #[target_feature(enable = "vector")] + // FIXME(llvm): https://github.com/llvm/llvm-project/issues/129899 + // #[cfg_attr(test, assert_instr(vsegf))] + pub unsafe fn vsegf(a: vector_signed_int) -> vector_signed_long_long { + simd_as(simd_shuffle::<_, _, i32x2>( + a, + a, + const { u32x2::from_array([1, 3]) }, + )) + } + + #[unstable(feature = "stdarch_s390x", issue = "135681")] + impl VectorExtendSigned64 for vector_signed_char { + #[inline] + #[target_feature(enable = "vector")] + unsafe fn vec_extend_s64(self) -> vector_signed_long_long { + vsegb(self) + } + } + #[unstable(feature = "stdarch_s390x", issue = "135681")] + impl VectorExtendSigned64 for vector_signed_short { + #[inline] + #[target_feature(enable = "vector")] + unsafe fn vec_extend_s64(self) -> vector_signed_long_long { + vsegh(self) + } + } + #[unstable(feature = "stdarch_s390x", issue = "135681")] + impl VectorExtendSigned64 for vector_signed_int { + #[inline] + #[target_feature(enable = "vector")] + unsafe fn vec_extend_s64(self) -> vector_signed_long_long { + vsegf(self) + } + } + + // NOTE: VectorSigned and VectorUnsigned make strong safety assumptions around floats. + // This is what C provides, but even IBM does not clearly document these constraints. + // + // https://doc.rust-lang.org/std/intrinsics/simd/fn.simd_cast.html + + #[unstable(feature = "stdarch_s390x", issue = "135681")] + pub trait VectorSigned { + type Result; + unsafe fn vec_signed(self) -> Self::Result; + } + + test_impl! { vcgsb (a: vector_float) -> vector_signed_int [simd_cast, "vector-enhancements-2" vcgsb] } + test_impl! { vcgdb (a: vector_double) -> vector_signed_long_long [simd_cast, vcgdb] } + + impl_vec_trait! { [VectorSigned vec_signed] vcgsb (vector_float) -> vector_signed_int } + impl_vec_trait! { [VectorSigned vec_signed] vcgdb (vector_double) -> vector_signed_long_long } + + #[unstable(feature = "stdarch_s390x", issue = "135681")] + pub trait VectorUnsigned { + type Result; + unsafe fn vec_unsigned(self) -> Self::Result; + } + + test_impl! { vclgsb (a: vector_float) -> vector_unsigned_int [simd_cast, "vector-enhancements-2" vclgsb] } + test_impl! { vclgdb (a: vector_double) -> vector_unsigned_long_long [simd_cast, vclgdb] } + + impl_vec_trait! { [VectorUnsigned vec_unsigned] vclgsb (vector_float) -> vector_unsigned_int } + impl_vec_trait! { [VectorUnsigned vec_unsigned] vclgdb (vector_double) -> vector_unsigned_long_long } + + #[unstable(feature = "stdarch_s390x", issue = "135681")] + pub trait VectorCopyUntilZero { + unsafe fn vec_cp_until_zero(self) -> Self; + } + + test_impl! { vec_vistrb (a: vector_unsigned_char) -> vector_unsigned_char [vistrb, vistrb] } + test_impl! { vec_vistrh (a: vector_unsigned_short) -> vector_unsigned_short [vistrh, vistrh] } + test_impl! { vec_vistrf (a: vector_unsigned_int) -> vector_unsigned_int [vistrf, vistrf] } + + impl_vec_trait! { [VectorCopyUntilZero vec_cp_until_zero]+ vec_vistrb (vector_signed_char) } + impl_vec_trait! { [VectorCopyUntilZero vec_cp_until_zero]+ vec_vistrb (vector_bool_char) } + impl_vec_trait! { [VectorCopyUntilZero vec_cp_until_zero]+ vec_vistrb (vector_unsigned_char) } + + impl_vec_trait! { [VectorCopyUntilZero vec_cp_until_zero]+ vec_vistrh (vector_signed_short) } + impl_vec_trait! { [VectorCopyUntilZero vec_cp_until_zero]+ vec_vistrh (vector_bool_short) } + impl_vec_trait! { [VectorCopyUntilZero vec_cp_until_zero]+ vec_vistrh (vector_unsigned_short) } + + impl_vec_trait! { [VectorCopyUntilZero vec_cp_until_zero]+ vec_vistrf (vector_signed_int) } + impl_vec_trait! { [VectorCopyUntilZero vec_cp_until_zero]+ vec_vistrf (vector_bool_int) } + impl_vec_trait! { [VectorCopyUntilZero vec_cp_until_zero]+ vec_vistrf (vector_unsigned_int) } + + #[unstable(feature = "stdarch_s390x", issue = "135681")] + pub trait VectorCopyUntilZeroCC: Sized { + unsafe fn vec_cp_until_zero_cc(self) -> (Self, i32); + } + + test_impl! { vec_vistrbs (a: vector_unsigned_char) -> PackedTuple [vistrbs, vistrbs] } + test_impl! { vec_vistrhs (a: vector_unsigned_short) -> PackedTuple [vistrhs, vistrhs] } + test_impl! { vec_vistrfs (a: vector_unsigned_int) -> PackedTuple [vistrfs, vistrfs] } + + macro_rules! impl_vec_copy_until_zero_cc { + ($($intr:ident $ty:ident)*) => { + $( + #[unstable(feature = "stdarch_s390x", issue = "135681")] + impl VectorCopyUntilZeroCC for $ty { + #[inline] + #[target_feature(enable = "vector")] + unsafe fn vec_cp_until_zero_cc(self) -> (Self, i32) { + let PackedTuple { x,y } = $intr(transmute(self)); + (transmute(x), y) + } + } + + )* + } + } + + impl_vec_copy_until_zero_cc! { + vec_vistrbs vector_signed_char + vec_vistrbs vector_bool_char + vec_vistrbs vector_unsigned_char + + vec_vistrhs vector_signed_short + vec_vistrhs vector_bool_short + vec_vistrhs vector_unsigned_short + + vec_vistrfs vector_signed_int + vec_vistrfs vector_bool_int + vec_vistrfs vector_unsigned_int + } + + #[unstable(feature = "stdarch_s390x", issue = "135681")] + pub trait VectorSrdb { + unsafe fn vec_srdb(self, b: Self) -> Self; + } + + #[unstable(feature = "stdarch_s390x", issue = "135681")] + pub trait VectorSld { + unsafe fn vec_sld(self, b: Self) -> Self; + + unsafe fn vec_sldw(self, b: Self) -> Self; + + unsafe fn vec_sldb(self, b: Self) -> Self; + } + + // FIXME(llvm) https://github.com/llvm/llvm-project/issues/129955 + // ideally we could implement this in terms of llvm.fshl.i128 + // #[link_name = "llvm.fshl.i128"] fn fshl_i128(a: u128, b: u128, c: u128) -> u128; + // transmute(fshl_i128(transmute(a), transmute(b), const { C * 8 } )) + + macro_rules! impl_vec_sld { + ($($ty:ident)*) => { + $( + #[unstable(feature = "stdarch_s390x", issue = "135681")] + impl VectorSld for $ty { + #[inline] + #[target_feature(enable = "vector")] + unsafe fn vec_sld(self, b: Self) -> Self { + static_assert_uimm_bits!(C, 4); + transmute(vsldb(transmute(self), transmute(b), C)) + } + + #[inline] + #[target_feature(enable = "vector")] + unsafe fn vec_sldw(self, b: Self) -> Self { + static_assert_uimm_bits!(C, 2); + transmute(vsldb(transmute(self), transmute(b), const { 4 * C })) + } + + #[inline] + #[target_feature(enable = "vector-enhancements-2")] + unsafe fn vec_sldb(self, b: Self) -> Self { + static_assert_uimm_bits!(C, 3); + transmute(vsld(transmute(self), transmute(b), C)) + } + } + + #[unstable(feature = "stdarch_s390x", issue = "135681")] + impl VectorSrdb for $ty { + #[inline] + #[target_feature(enable = "vector-enhancements-2")] + unsafe fn vec_srdb(self, b: Self) -> Self { + static_assert_uimm_bits!(C, 3); + transmute(vsrd(transmute(self), transmute(b), C)) + } + } + )* + } + } + + impl_vec_sld! { + vector_signed_char + vector_bool_char + vector_unsigned_char + + vector_signed_short + vector_bool_short + vector_unsigned_short + + vector_signed_int + vector_bool_int + vector_unsigned_int + + vector_signed_long_long + vector_bool_long_long + vector_unsigned_long_long + + vector_float + vector_double + } + + #[unstable(feature = "stdarch_s390x", issue = "135681")] + pub trait VectorCompareRange: Sized { + type Result; + + unsafe fn vstrc(self, b: Self, c: Self) -> Self::Result; + unsafe fn vstrcz(self, b: Self, c: Self) -> Self::Result; + unsafe fn vstrcs(self, b: Self, c: Self) -> (Self::Result, i32); + unsafe fn vstrczs(self, b: Self, c: Self) -> (Self::Result, i32); + } + + const fn validate_compare_range_imm(imm: u32) { + if !matches!(imm, 0 | 4 | 8 | 12) { + panic!("IMM needs to be one of 0, 4, 8, 12"); + } + } + + macro_rules! impl_compare_range { + ($($ty:ident $vstrc:ident $vstrcs:ident $vstrcz:ident $vstrczs:ident)*) => { + $( + #[unstable(feature = "stdarch_s390x", issue = "135681")] + impl VectorCompareRange for $ty { + type Result = t_b!($ty); + + #[inline] + #[target_feature(enable = "vector")] + unsafe fn vstrc(self, b: Self, c: Self) -> Self::Result { + const { validate_compare_range_imm }; + $vstrc(self, b, c, IMM) + } + + #[inline] + #[target_feature(enable = "vector")] + unsafe fn vstrcz(self, b: Self, c: Self) -> Self::Result { + const { validate_compare_range_imm }; + $vstrcz(self, b, c, IMM) + } + + #[inline] + #[target_feature(enable = "vector")] + unsafe fn vstrcs(self, b: Self, c: Self) -> (Self::Result, i32) { + const { validate_compare_range_imm }; + let PackedTuple { x, y } = $vstrcs(self, b, c, IMM); + (x,y) + } + + #[inline] + #[target_feature(enable = "vector")] + unsafe fn vstrczs(self, b: Self, c: Self) -> (Self::Result, i32) { + const { validate_compare_range_imm }; + let PackedTuple { x, y } = $vstrczs(self, b, c, IMM); + (x,y) + } + } + )* + } + } + + impl_compare_range! { + vector_unsigned_char vstrcb vstrcbs vstrczb vstrczbs + vector_unsigned_short vstrch vstrchs vstrczh vstrczhs + vector_unsigned_int vstrcf vstrcfs vstrczf vstrczfs + } + + #[unstable(feature = "stdarch_s390x", issue = "135681")] + pub trait VectorComparePredicate: Sized { + type Result; + + #[inline] + #[target_feature(enable = "vector")] + unsafe fn vec_cmpgt(self, other: Self) -> Self::Result { + simd_gt(self, other) + } + + #[inline] + #[target_feature(enable = "vector")] + unsafe fn vec_cmpge(self, other: Self) -> Self::Result { + simd_ge(self, other) + } + + #[inline] + #[target_feature(enable = "vector")] + unsafe fn vec_cmplt(self, other: Self) -> Self::Result { + simd_lt(self, other) + } + + #[inline] + #[target_feature(enable = "vector")] + unsafe fn vec_cmple(self, other: Self) -> Self::Result { + simd_le(self, other) + } + } + + macro_rules! impl_compare_predicate { + ($($ty:ident)*) => { + $( + #[unstable(feature = "stdarch_s390x", issue = "135681")] + impl VectorComparePredicate for $ty { + type Result = t_b!($ty); + } + )* + } + } + + impl_compare_predicate! { + vector_signed_char + vector_unsigned_char + + vector_signed_short + vector_unsigned_short + + vector_signed_int + vector_unsigned_int + vector_float + + vector_signed_long_long + vector_unsigned_long_long + vector_double + } + + #[unstable(feature = "stdarch_s390x", issue = "135681")] + pub trait VectorEquality: Sized { + type Result; + + #[inline] + #[target_feature(enable = "vector")] + unsafe fn vec_cmpeq(self, other: Self) -> Self::Result { + simd_eq(self, other) + } + + #[inline] + #[target_feature(enable = "vector")] + unsafe fn vec_cmpne(self, other: Self) -> Self::Result { + simd_ne(self, other) + } + } + + macro_rules! impl_compare_equality { + ($($ty:ident)*) => { + $( + #[unstable(feature = "stdarch_s390x", issue = "135681")] + impl VectorEquality for $ty { + type Result = t_b!($ty); + } + )* + } + } + + impl_compare_equality! { + vector_bool_char + vector_signed_char + vector_unsigned_char + + vector_bool_short + vector_signed_short + vector_unsigned_short + + vector_bool_int + vector_signed_int + vector_unsigned_int + vector_float + + vector_bool_long_long + vector_signed_long_long + vector_unsigned_long_long + vector_double + } + + #[unstable(feature = "stdarch_s390x", issue = "135681")] + pub trait VectorEqualityIdx: Sized { + type Result; + + unsafe fn vec_cmpeq_idx(self, other: Self) -> Self::Result; + unsafe fn vec_cmpne_idx(self, other: Self) -> Self::Result; + + unsafe fn vec_cmpeq_idx_cc(self, other: Self) -> (Self::Result, i32); + unsafe fn vec_cmpne_idx_cc(self, other: Self) -> (Self::Result, i32); + + unsafe fn vec_cmpeq_or_0_idx(self, other: Self) -> Self::Result; + unsafe fn vec_cmpne_or_0_idx(self, other: Self) -> Self::Result; + + unsafe fn vec_cmpeq_or_0_idx_cc(self, other: Self) -> (Self::Result, i32); + unsafe fn vec_cmpne_or_0_idx_cc(self, other: Self) -> (Self::Result, i32); + } + + macro_rules! impl_compare_equality_idx { + ($($ty:ident $ret:ident + $cmpeq:ident $cmpne:ident + $cmpeq_or_0:ident $cmpne_or_0:ident + $cmpeq_cc:ident $cmpne_cc:ident + $cmpeq_or_0_cc:ident $cmpne_or_0_cc:ident + )*) => { + $( + #[unstable(feature = "stdarch_s390x", issue = "135681")] + impl VectorEqualityIdx for $ty { + type Result = $ret; + + #[inline] + #[target_feature(enable = "vector")] + unsafe fn vec_cmpeq_idx(self, other: Self) -> Self::Result { + transmute($cmpeq(transmute(self), transmute(other))) + } + + #[inline] + #[target_feature(enable = "vector")] + unsafe fn vec_cmpne_idx(self, other: Self) -> Self::Result { + transmute($cmpne(transmute(self), transmute(other))) + } + + #[inline] + #[target_feature(enable = "vector")] + unsafe fn vec_cmpeq_or_0_idx(self, other: Self) -> Self::Result { + transmute($cmpeq_or_0(transmute(self), transmute(other))) + } + + #[inline] + #[target_feature(enable = "vector")] + unsafe fn vec_cmpne_or_0_idx(self, other: Self) -> Self::Result { + transmute($cmpne_or_0(transmute(self), transmute(other))) + } + + #[inline] + #[target_feature(enable = "vector")] + unsafe fn vec_cmpeq_idx_cc(self, other: Self) -> (Self::Result, i32) { + let PackedTuple { x, y } = $cmpeq_cc(transmute(self), transmute(other)); + (transmute(x), y) + } + + #[inline] + #[target_feature(enable = "vector")] + unsafe fn vec_cmpne_idx_cc(self, other: Self) -> (Self::Result, i32) { + let PackedTuple { x, y } = $cmpne_cc(transmute(self), transmute(other)); + (transmute(x),y) + } + + #[inline] + #[target_feature(enable = "vector")] + unsafe fn vec_cmpeq_or_0_idx_cc(self, other: Self) -> (Self::Result, i32) { + let PackedTuple { x, y } = $cmpeq_or_0_cc(transmute(self), transmute(other)); + (transmute(x), y) + } + + #[inline] + #[target_feature(enable = "vector")] + unsafe fn vec_cmpne_or_0_idx_cc(self, other: Self) -> (Self::Result, i32) { + let PackedTuple { x, y } = $cmpne_or_0_cc(transmute(self), transmute(other)); + (transmute(x),y) + } + } + )* + } + } + + impl_compare_equality_idx! { + vector_signed_char vector_signed_char vfeeb vfeneb vfeezb vfenezb vfeebs vfenebs vfeezbs vfenezbs + vector_bool_char vector_unsigned_char vfeeb vfeneb vfeezb vfenezb vfeebs vfenebs vfeezbs vfenezbs + vector_unsigned_char vector_unsigned_char vfeeb vfeneb vfeezb vfenezb vfeebs vfenebs vfeezbs vfenezbs + vector_signed_short vector_signed_short vfeeh vfeneh vfeezh vfenezh vfeehs vfenehs vfeezhs vfenezhs + vector_bool_short vector_unsigned_short vfeeh vfeneh vfeezh vfenezh vfeehs vfenehs vfeezhs vfenezhs + vector_unsigned_short vector_unsigned_short vfeeh vfeneh vfeezh vfenezh vfeehs vfenehs vfeezhs vfenezhs + vector_signed_int vector_signed_int vfeef vfenef vfeezf vfenezf vfeefs vfenefs vfeezfs vfenezfs + vector_bool_int vector_unsigned_int vfeef vfenef vfeezf vfenezf vfeefs vfenefs vfeezfs vfenezfs + vector_unsigned_int vector_unsigned_int vfeef vfenef vfeezf vfenezf vfeefs vfenefs vfeezfs vfenezfs + } +} + +/// Load Count to Block Boundary +#[inline] +#[target_feature(enable = "vector")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +#[cfg_attr(test, assert_instr(lcbb, BLOCK_BOUNDARY = 512))] +unsafe fn __lcbb(ptr: *const u8) -> u32 { + lcbb(ptr, const { validate_block_boundary(BLOCK_BOUNDARY) }) +} + +/// Vector element-wise addition. +#[inline] +#[target_feature(enable = "vector")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub unsafe fn vec_add, U>(a: T, b: U) -> T::Result { + a.vec_add(b) +} + +/// Vector element-wise subtraction. +#[inline] +#[target_feature(enable = "vector")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub unsafe fn vec_sub, U>(a: T, b: U) -> T::Result { + a.vec_sub(b) +} + +/// Vector element-wise multiplication. +/// +/// ## Purpose +/// Compute the products of corresponding elements of two vectors. +/// +/// ## Result value +/// Each element of r receives the product of the corresponding elements of a and b. +#[inline] +#[target_feature(enable = "vector")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub unsafe fn vec_mul(a: T, b: T) -> T { + a.vec_mul(b) +} + +/// Vector Count Leading Zeros +#[inline] +#[target_feature(enable = "vector")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub unsafe fn vec_cntlz(a: T) -> T::Result { + a.vec_cntlz() +} + +/// Vector Count Trailing Zeros +#[inline] +#[target_feature(enable = "vector")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub unsafe fn vec_cnttz(a: T) -> T::Result { + a.vec_cnttz() +} + +/// Vector Population Count +/// +/// Computes the population count (number of set bits) in each element of the input. +#[inline] +#[target_feature(enable = "vector")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub unsafe fn vec_popcnt(a: T) -> T::Result { + a.vec_popcnt() +} + +/// Vector element-wise maximum. +#[inline] +#[target_feature(enable = "vector")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub unsafe fn vec_max, U>(a: T, b: U) -> T::Result { + a.vec_max(b) +} + +/// Vector element-wise minimum. +#[inline] +#[target_feature(enable = "vector")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub unsafe fn vec_min, U>(a: T, b: U) -> T::Result { + a.vec_min(b) +} + +/// Vector abs. +#[inline] +#[target_feature(enable = "vector")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub unsafe fn vec_abs(a: T) -> T { + a.vec_abs() +} + +/// Vector negative abs. +#[inline] +#[target_feature(enable = "vector")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub unsafe fn vec_nabs(a: T) -> T { + a.vec_nabs() +} + +/// Vector Negative Multiply Add +#[inline] +#[target_feature(enable = "vector")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub unsafe fn vec_nmadd(a: T, b: T, c: T) -> T { + a.vec_nmadd(b, c) +} + +/// Vector Negative Multiply Subtract +#[inline] +#[target_feature(enable = "vector")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub unsafe fn vec_nmsub(a: T, b: T, c: T) -> T { + a.vec_nmsub(b, c) +} + +/// Vector square root. +#[inline] +#[target_feature(enable = "vector")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub unsafe fn vec_sqrt(a: T) -> T { + a.vec_sqrt() +} + +/// Vector Splat +#[inline] +#[target_feature(enable = "vector")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub unsafe fn vec_splat(a: T) -> T { + a.vec_splat::() +} + +/// Vector splats. +#[inline] +#[target_feature(enable = "vector")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub unsafe fn vec_splats, U>(a: T) -> U { + a.vec_splats() +} + +/// Vector and +#[inline] +#[target_feature(enable = "vector")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub unsafe fn vec_and, U>(a: T, b: U) -> T::Result { + a.vec_and(b) +} + +/// Vector or +#[inline] +#[target_feature(enable = "vector")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub unsafe fn vec_or, U>(a: T, b: U) -> T::Result { + a.vec_or(b) +} + +/// Vector xor +#[inline] +#[target_feature(enable = "vector")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub unsafe fn vec_xor, U>(a: T, b: U) -> T::Result { + a.vec_xor(b) +} + +/// Vector nor +#[inline] +#[target_feature(enable = "vector")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub unsafe fn vec_nor, U>(a: T, b: U) -> T::Result { + a.vec_nor(b) +} + +/// Vector nand +#[inline] +#[target_feature(enable = "vector")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub unsafe fn vec_nand, U>(a: T, b: U) -> T::Result { + a.vec_nand(b) +} + +/// Vector xnor +#[inline] +#[target_feature(enable = "vector")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub unsafe fn vec_eqv, U>(a: T, b: U) -> T::Result { + a.vec_eqv(b) +} + +/// Vector andc. +#[inline] +#[target_feature(enable = "vector")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub unsafe fn vec_andc, U>(a: T, b: U) -> T::Result { + a.vec_andc(b) +} + +/// Vector OR with Complement +/// +/// ## Purpose +/// Performs a bitwise OR of the first vector with the bitwise-complemented second vector. +/// +/// ## Result value +/// r is the bitwise OR of a and the bitwise complement of b. +#[inline] +#[target_feature(enable = "vector")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub unsafe fn vec_orc, U>(a: T, b: U) -> T::Result { + a.vec_orc(b) +} + +/// Vector floor. +#[inline] +#[target_feature(enable = "vector")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub unsafe fn vec_floor(a: T) -> T { + a.vec_floor() +} + +/// Vector ceil. +#[inline] +#[target_feature(enable = "vector")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub unsafe fn vec_ceil(a: T) -> T { + a.vec_ceil() +} + +/// Returns a vector containing the truncated values of the corresponding elements of the given vector. +/// Each element of the result contains the value of the corresponding element of a, truncated to an integral value. +#[inline] +#[target_feature(enable = "vector")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub unsafe fn vec_trunc(a: T) -> T { + a.vec_trunc() +} + +/// Returns a vector containing the rounded values to the nearest representable floating-point integer, +/// using IEEE round-to-nearest rounding, of the corresponding elements of the given vector +#[inline] +#[target_feature(enable = "vector")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub unsafe fn vec_round(a: T) -> T { + a.vec_round() +} + +/// Returns a vector by using the current rounding mode to round every +/// floating-point element in the given vector to integer. +#[inline] +#[target_feature(enable = "vector")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub unsafe fn vec_roundc(a: T) -> T { + a.vec_roundc() +} + +/// Returns a vector containing the largest representable floating-point integral values less +/// than or equal to the values of the corresponding elements of the given vector. +#[inline] +#[target_feature(enable = "vector")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub unsafe fn vec_roundm(a: T) -> T { + // the IBM docs note + // + // > vec_roundm provides the same functionality as vec_floor, except that vec_roundz would not trigger the IEEE-inexact exception. + // + // but in practice `vec_floor` also does not trigger that exception, so both are equivalent + a.vec_floor() +} + +/// Returns a vector containing the smallest representable floating-point integral values greater +/// than or equal to the values of the corresponding elements of the given vector. +#[inline] +#[target_feature(enable = "vector")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub unsafe fn vec_roundp(a: T) -> T { + // the IBM docs note + // + // > vec_roundp provides the same functionality as vec_ceil, except that vec_roundz would not trigger the IEEE-inexact exception. + // + // but in practice `vec_ceil` also does not trigger that exception, so both are equivalent + a.vec_ceil() +} + +/// Returns a vector containing the truncated values of the corresponding elements of the given vector. +/// Each element of the result contains the value of the corresponding element of a, truncated to an integral value. +#[inline] +#[target_feature(enable = "vector")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub unsafe fn vec_roundz(a: T) -> T { + // the IBM docs note + // + // > vec_roundz provides the same functionality as vec_trunc, except that vec_roundz would not trigger the IEEE-inexact exception. + // + // but in practice `vec_trunc` also does not trigger that exception, so both are equivalent + a.vec_trunc() +} + +/// Returns a vector by using the current rounding mode to round every floating-point element in the given vector to integer. +#[inline] +#[target_feature(enable = "vector")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub unsafe fn vec_rint(a: T) -> T { + a.vec_rint() +} + +/// Vector Average +#[inline] +#[target_feature(enable = "vector")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub unsafe fn vec_avg, U>(a: T, b: U) -> T::Result { + a.vec_avg(b) +} + +/// Vector Shift Left +#[inline] +#[target_feature(enable = "vector")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub unsafe fn vec_sl, U>(a: T, b: U) -> T::Result { + a.vec_sl(b) +} + +/// Vector Shift Right +#[inline] +#[target_feature(enable = "vector")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub unsafe fn vec_sr, U>(a: T, b: U) -> T::Result { + a.vec_sr(b) +} + +/// Vector Shift Right Algebraic +#[inline] +#[target_feature(enable = "vector")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub unsafe fn vec_sra, U>(a: T, b: U) -> T::Result { + a.vec_sra(b) +} + +/// Vector Shift Left by Byte +#[inline] +#[target_feature(enable = "vector")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub unsafe fn vec_slb, U>(a: T, b: U) -> T::Result { + a.vec_slb(b) +} + +/// Vector Shift Right by Byte +#[inline] +#[target_feature(enable = "vector")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub unsafe fn vec_srb, U>(a: T, b: U) -> T::Result { + a.vec_srb(b) +} + +/// Vector Shift Right Algebraic by Byte +#[inline] +#[target_feature(enable = "vector")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub unsafe fn vec_srab, U>(a: T, b: U) -> T::Result { + a.vec_srab(b) +} + +/// Vector Element Rotate Left +#[inline] +#[target_feature(enable = "vector")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub unsafe fn vec_rl, U>(a: T, b: U) -> T::Result { + a.vec_rl(b) +} + +/// Performs a left shift for a vector by a given number of bits. Each element of the result is obtained by shifting the corresponding +/// element of a left by the number of bits specified by the last 3 bits of every byte of b. The bits that are shifted out are replaced by zeros. +#[inline] +#[target_feature(enable = "vector")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub unsafe fn vec_sll(a: T, b: vector_unsigned_char) -> T +where + T: sealed::VectorSll, +{ + a.vec_sll(b) +} + +/// Performs a right shift for a vector by a given number of bits. Each element of the result is obtained by shifting the corresponding +/// element of a right by the number of bits specified by the last 3 bits of every byte of b. The bits that are shifted out are replaced by zeros. +#[inline] +#[target_feature(enable = "vector")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub unsafe fn vec_srl(a: T, b: vector_unsigned_char) -> T +where + T: sealed::VectorSrl, +{ + a.vec_srl(b) +} + +/// Performs an algebraic right shift for a vector by a given number of bits. Each element of the result is obtained by shifting the corresponding +/// element of a right by the number of bits specified by the last 3 bits of every byte of b. The bits that are shifted out are replaced by copies of +/// the most significant bit of the element of a. +#[inline] +#[target_feature(enable = "vector")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub unsafe fn vec_sral(a: T, b: vector_unsigned_char) -> T +where + T: sealed::VectorSral, +{ + a.vec_sral(b) +} + +/// Rotates each element of a vector left by a given number of bits. Each element of the result is obtained by rotating the corresponding element +/// of a left by the number of bits specified by b, modulo the number of bits in the element. +#[inline] +#[target_feature(enable = "vector")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub unsafe fn vec_rli(a: T, bits: core::ffi::c_ulong) -> T { + a.vec_rli(bits) +} + +/// Returns a vector with the elements of the input vector in reversed order. +#[inline] +#[target_feature(enable = "vector")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub unsafe fn vec_reve(a: T) -> T { + a.vec_reve() +} + +/// Returns a vector where each vector element contains the corresponding byte-reversed vector element of the input vector. +#[inline] +#[target_feature(enable = "vector")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub unsafe fn vec_revb(a: T) -> T { + a.vec_revb() +} + +/// Merges the most significant ("high") halves of two vectors. +#[inline] +#[target_feature(enable = "vector")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub unsafe fn vec_mergeh(a: T, b: T) -> T { + a.vec_mergeh(b) +} + +/// Vector Pack +#[inline] +#[target_feature(enable = "vector")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub unsafe fn vec_pack, U>(a: T, b: U) -> T::Result { + a.vec_pack(b) +} + +/// Vector Pack Saturated +#[inline] +#[target_feature(enable = "vector")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub unsafe fn vec_packs, U>(a: T, b: U) -> T::Result { + a.vec_packs(b) +} + +/// Vector Pack Saturated Condition Code +#[inline] +#[target_feature(enable = "vector")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub unsafe fn vec_packs_cc(a: T, b: T, c: *mut i32) -> T::Result { + let (x, y) = a.vec_packs_cc(b); + unsafe { c.write(y) }; + x +} + +/// Vector Pack Saturated Unsigned +#[inline] +#[target_feature(enable = "vector")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub unsafe fn vec_packsu, U>(a: T, b: U) -> T::Result { + a.vec_packsu(b) +} + +/// Vector Pack Saturated Unsigned Condition Code +#[inline] +#[target_feature(enable = "vector")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub unsafe fn vec_packsu_cc(a: T, b: T, c: *mut i32) -> T::Result { + let (x, y) = a.vec_packsu_cc(b); + unsafe { c.write(y) }; + x +} + +/// Vector Unpack High +#[inline] +#[target_feature(enable = "vector")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub unsafe fn vec_unpackh(a: T) -> ::Result { + a.vec_unpackh() +} + +/// Vector Unpack Low +#[inline] +#[target_feature(enable = "vector")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub unsafe fn vec_unpackl(a: T) -> ::Result { + a.vec_unpackl() +} + +/// Merges the least significant ("low") halves of two vectors. +#[inline] +#[target_feature(enable = "vector")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub unsafe fn vec_mergel(a: T, b: T) -> T { + a.vec_mergel(b) +} + +/// Generates byte masks for elements in the return vector. For each bit in a, if the bit is one, all bit positions +/// in the corresponding byte element of d are set to ones. Otherwise, if the bit is zero, the corresponding byte element is set to zero. +#[inline] +#[target_feature(enable = "vector")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +#[cfg_attr(test, assert_instr(vgbm, MASK = 0x00FF))] +pub unsafe fn vec_genmask() -> vector_unsigned_char { + vector_unsigned_char(const { genmask::() }) +} + +/// Vector Generate Mask (Byte) +#[inline] +#[target_feature(enable = "vector")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +#[cfg_attr(test, assert_instr(vrepib, L = 3, H = 5))] +pub unsafe fn vec_genmasks_8() -> vector_unsigned_char { + vector_unsigned_char(const { [genmasks(u8::BITS, L, H) as u8; 16] }) +} + +/// Vector Generate Mask (Halfword) +#[inline] +#[target_feature(enable = "vector")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +#[cfg_attr(test, assert_instr(vrepih, L = 3, H = 5))] +pub unsafe fn vec_genmasks_16() -> vector_unsigned_short { + vector_unsigned_short(const { [genmasks(u16::BITS, L, H) as u16; 8] }) +} + +/// Vector Generate Mask (Word) +#[inline] +#[target_feature(enable = "vector")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +#[cfg_attr(test, assert_instr(vgmf, L = 3, H = 5))] +pub unsafe fn vec_genmasks_32() -> vector_unsigned_int { + vector_unsigned_int(const { [genmasks(u32::BITS, L, H) as u32; 4] }) +} + +/// Vector Generate Mask (Doubleword) +#[inline] +#[target_feature(enable = "vector")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +#[cfg_attr(test, assert_instr(vgmg, L = 3, H = 5))] +pub unsafe fn vec_genmasks_64() -> vector_unsigned_long_long { + vector_unsigned_long_long(const { [genmasks(u64::BITS, L, H); 2] }) +} + +/// Returns a vector that contains some elements of two vectors, in the order specified by a third vector. +/// Each byte of the result is selected by using the least significant 5 bits of the corresponding byte of c as an index into the concatenated bytes of a and b. +/// Note: The vector generate mask built-in function [`vec_genmask`] could help generate the mask c. +#[inline] +#[target_feature(enable = "vector")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub unsafe fn vec_perm(a: T, b: T, c: vector_unsigned_char) -> T { + a.vec_perm(b, c) +} + +/// Vector Sum Across Quadword +/// +/// Returns a vector containing the results of performing a sum across all the elements in each of the quadword of vector a, +/// and the rightmost word or doubleword element of the b. The result is an unsigned 128-bit integer. +#[inline] +#[target_feature(enable = "vector")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub unsafe fn vec_sum_u128(a: T, b: T) -> vector_unsigned_char { + a.vec_sum_u128(b) +} + +/// Vector Sum Across Doubleword +/// +/// Returns a vector containing the results of performing a sum across all the elements in each of the doubleword of vector a, +/// and the rightmost sub-element of the corresponding doubleword of b. +#[inline] +#[target_feature(enable = "vector")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub unsafe fn vec_sum2(a: T, b: T) -> vector_unsigned_long_long { + a.vec_sum2(b) +} + +/// Vector Sum Across Word +/// +/// Returns a vector containing the results of performing a sum across all the elements in each of the word of vector a, +/// and the rightmost sub-element of the corresponding word of b. +#[inline] +#[target_feature(enable = "vector")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub unsafe fn vec_sum4(a: T, b: T) -> vector_unsigned_int { + a.vec_sum4(b) +} + +/// Vector Addition unsigned 128-bits +/// +/// Adds unsigned quadword values. +/// +/// This function operates on the vectors as 128-bit unsigned integers. It returns low 128 bits of a + b. +#[inline] +#[target_feature(enable = "vector")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +#[cfg_attr(test, assert_instr(vaq))] +pub unsafe fn vec_add_u128( + a: vector_unsigned_char, + b: vector_unsigned_char, +) -> vector_unsigned_char { + let a: u128 = transmute(a); + let b: u128 = transmute(b); + transmute(a.wrapping_add(b)) +} + +/// Vector Subtract unsigned 128-bits +/// +/// Subtracts unsigned quadword values. +/// +/// This function operates on the vectors as 128-bit unsigned integers. It returns low 128 bits of a - b. +#[inline] +#[target_feature(enable = "vector")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +#[cfg_attr(test, assert_instr(vsq))] +pub unsafe fn vec_sub_u128( + a: vector_unsigned_char, + b: vector_unsigned_char, +) -> vector_unsigned_char { + let a: u128 = transmute(a); + let b: u128 = transmute(b); + + transmute(a.wrapping_sub(b)) +} + +/// Vector Subtract Carryout +/// +/// Returns a vector containing the borrow produced by subtracting each of corresponding elements of b from a. +/// +/// On each resulting element, the value is 0 if a borrow occurred, or 1 if no borrow occurred. +#[inline] +#[target_feature(enable = "vector")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub unsafe fn vec_subc, U>(a: T, b: U) -> T::Result { + a.vec_subc(b) +} + +/// Gets the carry bit of the 128-bit subtraction of two quadword values. +/// This function operates on the vectors as 128-bit unsigned integers. It returns a vector containing the borrow produced by subtracting b from a, as unsigned 128-bits integers. +/// If no borrow occurred, the bit 127 of d is 1; otherwise it is set to 0. All other bits of d are 0. +#[inline] +#[target_feature(enable = "vector")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +#[cfg_attr(test, assert_instr(vscbiq))] +pub unsafe fn vec_subc_u128( + a: vector_unsigned_char, + b: vector_unsigned_char, +) -> vector_unsigned_char { + // FIXME(llvm) sadly this does not work https://github.com/llvm/llvm-project/issues/129608 + // let a: u128 = transmute(a); + // let b: u128 = transmute(b); + // transmute(!a.overflowing_sub(b).1 as u128) + transmute(vscbiq(transmute(a), transmute(b))) +} + +/// Vector Add Compute Carryout unsigned 128-bits +#[inline] +#[target_feature(enable = "vector")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +#[cfg_attr(test, assert_instr(vaccq))] +pub unsafe fn vec_addc_u128( + a: vector_unsigned_char, + b: vector_unsigned_char, +) -> vector_unsigned_char { + let a: u128 = transmute(a); + let b: u128 = transmute(b); + transmute(a.overflowing_add(b).1 as u128) +} + +/// Vector Add With Carry unsigned 128-bits +#[inline] +#[target_feature(enable = "vector")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +#[cfg_attr(test, assert_instr(vacq))] +pub unsafe fn vec_adde_u128( + a: vector_unsigned_char, + b: vector_unsigned_char, + c: vector_unsigned_char, +) -> vector_unsigned_char { + let a: u128 = transmute(a); + let b: u128 = transmute(b); + let c: u128 = transmute(c); + // FIXME(llvm) sadly this does not work + // let (d, _carry) = a.carrying_add(b, c & 1 != 0); + // transmute(d) + transmute(vacq(a, b, c)) +} + +/// Vector Add With Carry Compute Carry unsigned 128-bits +#[inline] +#[target_feature(enable = "vector")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +#[cfg_attr(test, assert_instr(vacccq))] +pub unsafe fn vec_addec_u128( + a: vector_unsigned_char, + b: vector_unsigned_char, + c: vector_unsigned_char, +) -> vector_unsigned_char { + let a: u128 = transmute(a); + let b: u128 = transmute(b); + let c: u128 = transmute(c); + let (_d, carry) = a.carrying_add(b, c & 1 != 0); + transmute(carry as u128) +} + +/// Subtracts unsigned quadword values with carry bit from a previous operation. +/// +/// This function operates on the vectors as 128-bit unsigned integers. It returns a vector containing the result of subtracting of b from a, +/// and the carryout bit from a previous operation. +/// +/// Note: Only the borrow indication bit (127-bit) of c is used, and the other bits are ignored. +#[inline] +#[target_feature(enable = "vector")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +#[cfg_attr(test, assert_instr(vsbiq))] +pub unsafe fn vec_sube_u128( + a: vector_unsigned_char, + b: vector_unsigned_char, + c: vector_unsigned_char, +) -> vector_unsigned_char { + transmute(vsbiq(transmute(a), transmute(b), transmute(c))) +} + +/// Vector Subtract with Carryout, Carryout +/// +/// Gets the carry bit of the 128-bit subtraction of two quadword values with carry bit from the previous operation. +/// +/// It returns a vector containing the carryout produced from the result of subtracting of b from a, +/// and the carryout bit from a previous operation. If no borrow occurred, the 127-bit of d is 1, otherwise 0. +/// All other bits of d are 0. +/// +/// Note: Only the borrow indication bit (127-bit) of c is used, and the other bits are ignored. +#[inline] +#[target_feature(enable = "vector")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +#[cfg_attr(test, assert_instr(vsbcbiq))] +pub unsafe fn vec_subec_u128( + a: vector_unsigned_char, + b: vector_unsigned_char, + c: vector_unsigned_char, +) -> vector_unsigned_char { + transmute(vsbcbiq(transmute(a), transmute(b), transmute(c))) +} + +/// Vector Splat Signed Byte +#[inline] +#[target_feature(enable = "vector")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +#[cfg_attr(test, assert_instr(vrepib, IMM = 42))] +pub unsafe fn vec_splat_s8() -> vector_signed_char { + vector_signed_char([IMM; 16]) +} + +/// Vector Splat Signed Halfword +#[inline] +#[target_feature(enable = "vector")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +#[cfg_attr(test, assert_instr(vrepih, IMM = 42))] +pub unsafe fn vec_splat_s16() -> vector_signed_short { + vector_signed_short([IMM as i16; 8]) +} + +/// Vector Splat Signed Word +#[inline] +#[target_feature(enable = "vector")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +#[cfg_attr(test, assert_instr(vrepif, IMM = 42))] +pub unsafe fn vec_splat_s32() -> vector_signed_int { + vector_signed_int([IMM as i32; 4]) +} + +/// Vector Splat Signed Doubleword +#[inline] +#[target_feature(enable = "vector")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +#[cfg_attr(test, assert_instr(vrepig, IMM = 42))] +pub unsafe fn vec_splat_s64() -> vector_signed_long_long { + vector_signed_long_long([IMM as i64; 2]) +} + +/// Vector Splat Unsigned Byte +#[inline] +#[target_feature(enable = "vector")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +#[cfg_attr(test, assert_instr(vrepib, IMM = 42))] +pub unsafe fn vec_splat_u8() -> vector_unsigned_char { + vector_unsigned_char([IMM; 16]) +} + +/// Vector Splat Unsigned Halfword +#[inline] +#[target_feature(enable = "vector")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +#[cfg_attr(test, assert_instr(vrepih, IMM = 42))] +pub unsafe fn vec_splat_u16() -> vector_unsigned_short { + vector_unsigned_short([IMM as u16; 8]) +} + +/// Vector Splat Unsigned Word +#[inline] +#[target_feature(enable = "vector")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +#[cfg_attr(test, assert_instr(vrepif, IMM = 42))] +pub unsafe fn vec_splat_u32() -> vector_unsigned_int { + vector_unsigned_int([IMM as u32; 4]) +} + +/// Vector Splat Unsigned Doubleword +#[inline] +#[target_feature(enable = "vector")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +#[cfg_attr(test, assert_instr(vrepig, IMM = 42))] +pub unsafe fn vec_splat_u64() -> vector_unsigned_long_long { + vector_unsigned_long_long([IMM as u64; 2]) +} + +macro_rules! vec_find_any { + ($($Trait:ident $fun:ident)*) => { + $( + #[inline] + #[target_feature(enable = "vector")] + #[unstable(feature = "stdarch_s390x", issue = "135681")] + pub unsafe fn $fun, U>(a: T, b: U) -> T::Result { + a.$fun(b) + } + )* + } +} + +vec_find_any! { + VectorFindAnyEq vec_find_any_eq + VectorFindAnyNe vec_find_any_ne + VectorFindAnyEqIdx vec_find_any_eq_idx + VectorFindAnyNeIdx vec_find_any_ne_idx + VectorFindAnyEqOrZeroIdx vec_find_any_eq_or_0_idx + VectorFindAnyNeOrZeroIdx vec_find_any_ne_or_0_idx +} + +macro_rules! vec_find_any_cc { + ($($Trait:ident $fun:ident)*) => { + $( + #[inline] + #[target_feature(enable = "vector")] + #[unstable(feature = "stdarch_s390x", issue = "135681")] + pub unsafe fn $fun, U>(a: T, b: U, c: *mut i32) -> T::Result { + let (x, y) = a.$fun(b); + unsafe { c.write(y) }; + x + } + )* + } +} + +vec_find_any_cc! { + VectorFindAnyEqCC vec_find_any_eq_cc + VectorFindAnyNeCC vec_find_any_ne_cc + VectorFindAnyEqIdxCC vec_find_any_eq_idx_cc + VectorFindAnyNeIdxCC vec_find_any_ne_idx_cc + VectorFindAnyEqOrZeroIdxCC vec_find_any_eq_or_0_idx_cc + VectorFindAnyNeOrZeroIdxCC vec_find_any_ne_or_0_idx_cc +} + +/// Vector Load +#[inline] +#[target_feature(enable = "vector")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub unsafe fn vec_xl(offset: isize, ptr: *const T::ElementType) -> T { + T::vec_xl(offset, ptr) +} + +/// Vector Load Pair +#[inline] +#[target_feature(enable = "vector")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub unsafe fn vec_load_pair(a: T::ElementType, b: T::ElementType) -> T { + T::vec_load_pair(a, b) +} + +/// Vector Load to Block Boundary +#[inline] +#[target_feature(enable = "vector")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub unsafe fn vec_load_bndry( + ptr: *const T::ElementType, +) -> MaybeUninit { + T::vec_load_bndry::(ptr) +} + +/// Vector Store +#[inline] +#[target_feature(enable = "vector")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub unsafe fn vec_xst(vector: T, offset: isize, ptr: *mut T::ElementType) { + vector.vec_xst(offset, ptr) +} + +/// Vector Load with Length +#[inline] +#[target_feature(enable = "vector")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub unsafe fn vec_load_len( + ptr: *const T::ElementType, + byte_count: u32, +) -> T { + T::vec_load_len(ptr, byte_count) +} + +/// Vector Store with Length +#[inline] +#[target_feature(enable = "vector")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub unsafe fn vec_store_len( + vector: T, + ptr: *mut T::ElementType, + byte_count: u32, +) { + vector.vec_store_len(ptr, byte_count) +} + +/// Vector Load Rightmost with Length +#[inline] +#[target_feature(enable = "vector-packed-decimal")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +#[cfg_attr(test, assert_instr(vlrlr))] +pub unsafe fn vec_load_len_r(ptr: *const u8, byte_count: u32) -> vector_unsigned_char { + vlrl(byte_count, ptr) +} + +/// Vector Store Rightmost with Length +#[inline] +#[target_feature(enable = "vector-packed-decimal")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +#[cfg_attr(test, assert_instr(vstrlr))] +pub unsafe fn vec_store_len_r(vector: vector_unsigned_char, ptr: *mut u8, byte_count: u32) { + vstrl(vector, byte_count, ptr) +} + +/// Vector Multiply Add +#[inline] +#[target_feature(enable = "vector-packed-decimal")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub unsafe fn vec_madd(a: T, b: T, c: T) -> T { + a.vec_madd(b, c) +} + +/// Vector Multiply Add +#[inline] +#[target_feature(enable = "vector-packed-decimal")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub unsafe fn vec_msub(a: T, b: T, c: T) -> T { + a.vec_msub(b, c) +} + +/// Vector Multiply and Add Even +#[inline] +#[target_feature(enable = "vector-packed-decimal")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub unsafe fn vec_meadd(a: T, b: T, c: T::Result) -> T::Result { + a.vec_meadd(b, c) +} + +/// Vector Multiply and Add Odd +#[inline] +#[target_feature(enable = "vector-packed-decimal")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub unsafe fn vec_moadd(a: T, b: T, c: T::Result) -> T::Result { + a.vec_moadd(b, c) +} + +/// Vector Multiply and Add High +#[inline] +#[target_feature(enable = "vector-packed-decimal")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub unsafe fn vec_mhadd(a: T, b: T, c: T::Result) -> T::Result { + a.vec_mhadd(b, c) +} + +/// Vector Multiply and Add Low +#[inline] +#[target_feature(enable = "vector-packed-decimal")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub unsafe fn vec_mladd(a: T, b: T, c: T::Result) -> T::Result { + a.vec_mladd(b, c) +} + +/// Vector Checksum +#[inline] #[target_feature(enable = "vector")] #[unstable(feature = "stdarch_s390x", issue = "135681")] -pub unsafe fn vec_splat(a: T) -> T -where - T: sealed::VectorSplat, -{ - a.vec_splat::() +#[cfg_attr(test, assert_instr(vcksm))] +pub unsafe fn vec_checksum(a: vector_unsigned_int, b: vector_unsigned_int) -> vector_unsigned_int { + vcksm(a, b) } -/// Vector splats. +/// Vector Multiply Even #[inline] #[target_feature(enable = "vector")] #[unstable(feature = "stdarch_s390x", issue = "135681")] -pub unsafe fn vec_splats(a: T) -> U -where - T: sealed::VectorSplats, -{ - a.vec_splats() +pub unsafe fn vec_mule, U>(a: T, b: T) -> U { + a.vec_mule(b) } -/// Vector and +/// Vector Multiply Odd #[inline] #[target_feature(enable = "vector")] #[unstable(feature = "stdarch_s390x", issue = "135681")] -pub unsafe fn vec_and(a: T, b: U) -> >::Result -where - T: sealed::VectorAnd, -{ - a.vec_and(b) +pub unsafe fn vec_mulo, U>(a: T, b: T) -> U { + a.vec_mulo(b) } -/// Vector or +/// Vector Multiply High #[inline] #[target_feature(enable = "vector")] #[unstable(feature = "stdarch_s390x", issue = "135681")] -pub unsafe fn vec_or(a: T, b: U) -> >::Result -where - T: sealed::VectorOr, -{ - a.vec_or(b) +pub unsafe fn vec_mulh, U>(a: T, b: T) -> U { + a.vec_mulh(b) } -/// Vector xor +/// Vector Galois Field Multiply Sum #[inline] #[target_feature(enable = "vector")] #[unstable(feature = "stdarch_s390x", issue = "135681")] -pub unsafe fn vec_xor(a: T, b: U) -> >::Result -where - T: sealed::VectorXor, -{ - a.vec_xor(b) +pub unsafe fn vec_gfmsum, U>(a: T, b: T) -> U { + a.vec_gfmsum(b) } -/// Vector nor +/// Vector Galois Field Multiply Sum #[inline] #[target_feature(enable = "vector")] #[unstable(feature = "stdarch_s390x", issue = "135681")] -pub unsafe fn vec_nor(a: T, b: U) -> >::Result -where - T: sealed::VectorNor, -{ - a.vec_nor(b) +pub unsafe fn vec_gfmsum_accum( + a: T, + b: T, + c: T::Result, +) -> T::Result { + a.vec_gfmsum_accum(b, c) } -/// Vector nand +/// Vector Galois Field Multiply Sum 128-bits #[inline] #[target_feature(enable = "vector")] #[unstable(feature = "stdarch_s390x", issue = "135681")] -pub unsafe fn vec_nand(a: T, b: U) -> >::Result -where - T: sealed::VectorNand, -{ - a.vec_nand(b) +#[cfg_attr(test, assert_instr(vgfmg))] +pub unsafe fn vec_gfmsum_128( + a: vector_unsigned_long_long, + b: vector_unsigned_long_long, +) -> vector_unsigned_char { + transmute(vgfmg(a, b)) } -/// Vector xnor +/// Vector Galois Field Multiply Sum and Accumulate 128-bits #[inline] #[target_feature(enable = "vector")] #[unstable(feature = "stdarch_s390x", issue = "135681")] -pub unsafe fn vec_eqv(a: T, b: U) -> >::Result -where - T: sealed::VectorEqv, -{ - a.vec_eqv(b) +#[cfg_attr(test, assert_instr(vgfmag))] +pub unsafe fn vec_gfmsum_accum_128( + a: vector_unsigned_long_long, + b: vector_unsigned_long_long, + c: vector_unsigned_char, +) -> vector_unsigned_char { + transmute(vgfmag(a, b, transmute(c))) } -/// Vector andc. +/// Vector Bit Permute +#[inline] +#[target_feature(enable = "vector-enhancements-1")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +#[cfg_attr(test, assert_instr(vbperm))] +pub unsafe fn vec_bperm_u128( + a: vector_unsigned_char, + b: vector_unsigned_char, +) -> vector_unsigned_long_long { + vbperm(a, b) +} + +/// Vector Gather Element #[inline] #[target_feature(enable = "vector")] #[unstable(feature = "stdarch_s390x", issue = "135681")] -pub unsafe fn vec_andc(a: T, b: U) -> >::Result -where - T: sealed::VectorAndc, -{ - a.vec_andc(b) +pub unsafe fn vec_gather_element( + a: T, + b: T::Offset, + c: *const T::Element, +) -> T { + a.vec_gather_element::(b, c) } -/// Vector OR with Complement -/// -/// ## Purpose -/// Performs a bitwise OR of the first vector with the bitwise-complemented second vector. +/// Vector Select +#[inline] +#[target_feature(enable = "vector")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub unsafe fn vec_sel, U>(a: T, b: T, c: U) -> T { + a.vec_sel(b, c) +} + +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub const __VEC_CLASS_FP_ZERO_P: u32 = 1 << 11; +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub const __VEC_CLASS_FP_ZERO_N: u32 = 1 << 10; +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub const __VEC_CLASS_FP_ZERO: u32 = __VEC_CLASS_FP_ZERO_P | __VEC_CLASS_FP_ZERO_N; +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub const __VEC_CLASS_FP_NORMAL_P: u32 = 1 << 9; +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub const __VEC_CLASS_FP_NORMAL_N: u32 = 1 << 8; +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub const __VEC_CLASS_FP_NORMAL: u32 = __VEC_CLASS_FP_NORMAL_P | __VEC_CLASS_FP_NORMAL_N; +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub const __VEC_CLASS_FP_SUBNORMAL_P: u32 = 1 << 7; +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub const __VEC_CLASS_FP_SUBNORMAL_N: u32 = 1 << 6; +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub const __VEC_CLASS_FP_SUBNORMAL: u32 = __VEC_CLASS_FP_SUBNORMAL_P | __VEC_CLASS_FP_SUBNORMAL_N; +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub const __VEC_CLASS_FP_INFINITY_P: u32 = 1 << 5; +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub const __VEC_CLASS_FP_INFINITY_N: u32 = 1 << 4; +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub const __VEC_CLASS_FP_INFINITY: u32 = __VEC_CLASS_FP_INFINITY_P | __VEC_CLASS_FP_INFINITY_N; +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub const __VEC_CLASS_FP_QNAN_P: u32 = 1 << 3; +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub const __VEC_CLASS_FP_QNAN_N: u32 = 1 << 2; +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub const __VEC_CLASS_FP_QNAN: u32 = __VEC_CLASS_FP_QNAN_P | __VEC_CLASS_FP_QNAN_N; +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub const __VEC_CLASS_FP_SNAN_P: u32 = 1 << 1; +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub const __VEC_CLASS_FP_SNAN_N: u32 = 1 << 0; +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub const __VEC_CLASS_FP_SNAN: u32 = __VEC_CLASS_FP_SNAN_P | __VEC_CLASS_FP_SNAN_N; +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub const __VEC_CLASS_FP_NAN: u32 = __VEC_CLASS_FP_QNAN | __VEC_CLASS_FP_SNAN; +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub const __VEC_CLASS_FP_NOT_NORMAL: u32 = + __VEC_CLASS_FP_NAN | __VEC_CLASS_FP_SUBNORMAL | __VEC_CLASS_FP_ZERO | __VEC_CLASS_FP_INFINITY; + +/// Vector Floating-Point Test Data Class /// -/// ## Result value -/// r is the bitwise OR of a and the bitwise complement of b. +/// You can use the `__VEC_CLASS_FP_*` constants as the argument for this operand #[inline] #[target_feature(enable = "vector")] #[unstable(feature = "stdarch_s390x", issue = "135681")] -pub unsafe fn vec_orc(a: T, b: U) -> >::Result -where - T: sealed::VectorOrc, -{ - a.vec_orc(b) +pub unsafe fn vec_fp_test_data_class( + a: T, + c: *mut i32, +) -> T::Result { + let (x, y) = a.vec_fp_test_data_class::(); + c.write(y); + x } -/// Vector floor. #[inline] #[target_feature(enable = "vector")] #[unstable(feature = "stdarch_s390x", issue = "135681")] -pub unsafe fn vec_floor(a: T) -> T -where - T: sealed::VectorFloor, -{ - a.vec_floor() +pub unsafe fn vec_all_nan(a: T) -> i32 { + i32::from(a.vec_fp_test_data_class::<__VEC_CLASS_FP_NAN>().1 == 0) } -/// Vector ceil. #[inline] #[target_feature(enable = "vector")] #[unstable(feature = "stdarch_s390x", issue = "135681")] -pub unsafe fn vec_ceil(a: T) -> T -where - T: sealed::VectorCeil, -{ - a.vec_ceil() +pub unsafe fn vec_all_numeric(a: T) -> i32 { + i32::from(a.vec_fp_test_data_class::<__VEC_CLASS_FP_NAN>().1 == 3) } -/// Returns a vector containing the truncated values of the corresponding elements of the given vector. -/// Each element of the result contains the value of the corresponding element of a, truncated to an integral value. #[inline] #[target_feature(enable = "vector")] #[unstable(feature = "stdarch_s390x", issue = "135681")] -pub unsafe fn vec_trunc(a: T) -> T -where - T: sealed::VectorTrunc, -{ - a.vec_trunc() +pub unsafe fn vec_any_nan(a: T) -> i32 { + i32::from(a.vec_fp_test_data_class::<__VEC_CLASS_FP_NAN>().1 != 3) } -/// Returns a vector containing the rounded values to the nearest representable floating-point integer, -/// using IEEE round-to-nearest rounding, of the corresponding elements of the given vector #[inline] #[target_feature(enable = "vector")] #[unstable(feature = "stdarch_s390x", issue = "135681")] -pub unsafe fn vec_round(a: T) -> T -where - T: sealed::VectorRound, -{ - a.vec_round() +pub unsafe fn vec_any_numeric(a: T) -> i32 { + i32::from(a.vec_fp_test_data_class::<__VEC_CLASS_FP_NAN>().1 != 0) } -/// Returns a vector by using the current rounding mode to round every -/// floating-point element in the given vector to integer. +/// Vector Test under Mask #[inline] #[target_feature(enable = "vector")] #[unstable(feature = "stdarch_s390x", issue = "135681")] -pub unsafe fn vec_roundc(a: T) -> T -where - T: sealed::VectorRoundc, -{ - a.vec_roundc() +pub unsafe fn vec_test_mask(a: T, b: T::Mask) -> i32 { + // I can't find much information about this, but this might just be a check for whether the + // bitwise and of a and b is non-zero? + a.vec_test_mask(b) } -/// Returns a vector containing the largest representable floating-point integral values less -/// than or equal to the values of the corresponding elements of the given vector. +/// Vector Search String #[inline] #[target_feature(enable = "vector")] #[unstable(feature = "stdarch_s390x", issue = "135681")] -pub unsafe fn vec_roundm(a: T) -> T -where - T: sealed::VectorFloor, -{ - // the IBM docs note - // - // > vec_roundm provides the same functionality as vec_floor, except that vec_roundz would not trigger the IEEE-inexact exception. - // - // but in practice `vec_floor` also does not trigger that exception, so both are equivalent - a.vec_floor() +pub unsafe fn vec_search_string_cc( + a: T, + b: T, + c: vector_unsigned_char, + d: *mut i32, +) -> vector_unsigned_char { + let (x, y) = a.vec_search_string_cc(b, c); + unsafe { d.write(y) }; + x } -/// Returns a vector containing the smallest representable floating-point integral values greater -/// than or equal to the values of the corresponding elements of the given vector. +/// Vector Search String Until Zero #[inline] #[target_feature(enable = "vector")] #[unstable(feature = "stdarch_s390x", issue = "135681")] -pub unsafe fn vec_roundp(a: T) -> T -where - T: sealed::VectorCeil, -{ - // the IBM docs note - // - // > vec_roundp provides the same functionality as vec_ceil, except that vec_roundz would not trigger the IEEE-inexact exception. - // - // but in practice `vec_ceil` also does not trigger that exception, so both are equivalent - a.vec_ceil() +pub unsafe fn vec_search_string_until_zero_cc( + a: T, + b: T, + c: vector_unsigned_char, + d: *mut i32, +) -> vector_unsigned_char { + let (x, y) = a.vec_search_string_until_zero_cc(b, c); + unsafe { d.write(y) }; + x } -/// Returns a vector containing the truncated values of the corresponding elements of the given vector. -/// Each element of the result contains the value of the corresponding element of a, truncated to an integral value. +/// Vector Convert from float (even elements) to double +#[inline] +#[target_feature(enable = "vector-enhancements-1")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +// FIXME: this emits `vflls` where `vldeb` is expected +// #[cfg_attr(all(test, target_feature = "vector-enhancements-1"), assert_instr(vldeb))] +pub unsafe fn vec_doublee(a: vector_float) -> vector_double { + let even = simd_shuffle::<_, _, f32x2>(a, a, const { u32x2::from_array([0, 2]) }); + simd_as(even) +} + +/// Vector Convert from double to float (even elements) +#[inline] +#[target_feature(enable = "vector-enhancements-1")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +// FIXME: the C version uses a shuffle mask with poison; we can't do that +// #[cfg_attr(all(test, target_feature = "vector-enhancements-1"), assert_instr(vledb))] +pub unsafe fn vec_floate(a: vector_double) -> vector_float { + let truncated: f32x2 = simd_as(a); + simd_shuffle( + truncated, + truncated, + const { u32x4::from_array([0, 0, 1, 1]) }, + ) +} + +/// Vector Convert from int to float #[inline] #[target_feature(enable = "vector")] #[unstable(feature = "stdarch_s390x", issue = "135681")] -pub unsafe fn vec_roundz(a: T) -> T -where - T: sealed::VectorTrunc, -{ - // the IBM docs note - // - // > vec_roundz provides the same functionality as vec_trunc, except that vec_roundz would not trigger the IEEE-inexact exception. - // - // but in practice `vec_trunc` also does not trigger that exception, so both are equivalent - a.vec_trunc() +pub unsafe fn vec_float(a: impl sealed::VectorFloat) -> vector_float { + a.vec_float() } -/// Returns a vector by using the current rounding mode to round every floating-point element in the given vector to integer. +/// Vector Convert from long long to double #[inline] #[target_feature(enable = "vector")] #[unstable(feature = "stdarch_s390x", issue = "135681")] -pub unsafe fn vec_rint(a: T) -> T -where - T: sealed::VectorRint, -{ - a.vec_rint() +pub unsafe fn vec_double(a: impl sealed::VectorDouble) -> vector_double { + a.vec_double() } -/// Vector Shift Left +/// Vector Sign Extend to Doubleword #[inline] #[target_feature(enable = "vector")] #[unstable(feature = "stdarch_s390x", issue = "135681")] -pub unsafe fn vec_sl(a: T, b: U) -> >::Result -where - T: sealed::VectorSl, -{ - a.vec_sl(b) +pub unsafe fn vec_extend_s64(a: impl sealed::VectorExtendSigned64) -> vector_signed_long_long { + a.vec_extend_s64() } -/// Vector Shift Right +/// Vector Convert floating point to signed +#[inline] +#[target_feature(enable = "vector")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub unsafe fn vec_signed(a: T) -> T::Result { + a.vec_signed() +} + +/// Vector Convert floating point to unsigned +#[inline] +#[target_feature(enable = "vector")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub unsafe fn vec_unsigned(a: T) -> T::Result { + a.vec_unsigned() +} + +/// Vector Copy Until Zero +#[inline] +#[target_feature(enable = "vector")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub unsafe fn vec_cp_until_zero(a: T) -> T { + a.vec_cp_until_zero() +} + +/// Vector Copy Until Zero +#[inline] +#[target_feature(enable = "vector")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub unsafe fn vec_cp_until_zero_cc(a: T, cc: *mut i32) -> T { + let (x, y) = a.vec_cp_until_zero_cc(); + unsafe { cc.write(y) }; + x +} + +/// Vector Multiply Sum Logical +#[inline] +#[target_feature(enable = "vector-enhancements-1")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +#[cfg_attr( + all(test, target_feature = "vector-enhancements-1"), + assert_instr(vmslg, D = 4) +)] +pub unsafe fn vec_msum_u128( + a: vector_unsigned_long_long, + b: vector_unsigned_long_long, + c: vector_unsigned_char, +) -> vector_unsigned_char { + const { + if !matches!(D, 0 | 4 | 8 | 12) { + panic!("D needs to be one of 0, 4, 8, 12"); + } + }; + transmute(vmslg(a, b, transmute(c), D)) +} + +/// Vector Shift Left Double by Byte +#[inline] +#[target_feature(enable = "vector")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub unsafe fn vec_sld(a: T, b: T) -> T { + static_assert_uimm_bits!(C, 4); + a.vec_sld::(b) +} + +/// Vector Shift Left Double by Word +#[inline] +#[target_feature(enable = "vector")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub unsafe fn vec_sldw(a: T, b: T) -> T { + static_assert_uimm_bits!(C, 2); + a.vec_sldw::(b) +} + +/// Vector Shift Left Double by Bit +#[inline] +#[target_feature(enable = "vector-enhancements-2")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub unsafe fn vec_sldb(a: T, b: T) -> T { + static_assert_uimm_bits!(C, 3); + a.vec_sldb::(b) +} + +/// Vector Shift Right Double by Bit +#[inline] +#[target_feature(enable = "vector-enhancements-2")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub unsafe fn vec_srdb(a: T, b: T) -> T { + static_assert_uimm_bits!(C, 3); + a.vec_srdb::(b) +} + +/// Vector Compare Ranges +#[inline] +#[target_feature(enable = "vector")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub unsafe fn vec_cmprg(a: T, b: T, c: T) -> T::Result { + a.vstrc::<{ FindImm::Eq as u32 }>(b, c) +} + +/// Vector Compare Not in Ranges +#[inline] +#[target_feature(enable = "vector")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub unsafe fn vec_cmpnrg(a: T, b: T, c: T) -> T::Result { + a.vstrc::<{ FindImm::Ne as u32 }>(b, c) +} + +/// Vector Compare Ranges Index +#[inline] +#[target_feature(enable = "vector")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub unsafe fn vec_cmprg_idx(a: T, b: T, c: T) -> T::Result { + a.vstrc::<{ FindImm::EqIdx as u32 }>(b, c) +} + +/// Vector Compare Not in Ranges Index +#[inline] +#[target_feature(enable = "vector")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub unsafe fn vec_cmpnrg_idx(a: T, b: T, c: T) -> T::Result { + a.vstrc::<{ FindImm::NeIdx as u32 }>(b, c) +} + +/// Vector Compare Ranges with Condition Code +#[inline] +#[target_feature(enable = "vector")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub unsafe fn vec_cmprg_cc( + a: T, + b: T, + c: T, + d: *mut i32, +) -> T::Result { + let (x, y) = a.vstrcs::<{ FindImm::Eq as u32 }>(b, c); + d.write(y); + x +} + +/// Vector Compare Not in Ranges with Condition Code +#[inline] +#[target_feature(enable = "vector")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub unsafe fn vec_cmpnrg_cc( + a: T, + b: T, + c: T, + d: *mut i32, +) -> T::Result { + let (x, y) = a.vstrcs::<{ FindImm::Ne as u32 }>(b, c); + d.write(y); + x +} + +/// Vector Compare Ranges Index with Condition Code #[inline] #[target_feature(enable = "vector")] #[unstable(feature = "stdarch_s390x", issue = "135681")] -pub unsafe fn vec_sr(a: T, b: U) -> >::Result -where - T: sealed::VectorSr, -{ - a.vec_sr(b) +pub unsafe fn vec_cmprg_idx_cc( + a: T, + b: T, + c: T, + d: *mut i32, +) -> T::Result { + let (x, y) = a.vstrcs::<{ FindImm::EqIdx as u32 }>(b, c); + d.write(y); + x } -/// Vector Shift Right Algebraic +/// Vector Compare Not in Ranges Index with Condition Code #[inline] #[target_feature(enable = "vector")] #[unstable(feature = "stdarch_s390x", issue = "135681")] -pub unsafe fn vec_sra(a: T, b: U) -> >::Result -where - T: sealed::VectorSra, -{ - a.vec_sra(b) +pub unsafe fn vec_cmpnrg_idx_cc( + a: T, + b: T, + c: T, + d: *mut i32, +) -> T::Result { + let (x, y) = a.vstrcs::<{ FindImm::NeIdx as u32 }>(b, c); + d.write(y); + x } -/// Vector Shift Left by Byte -#[inline] +/// Vector Compare Ranges or Zero Index#[inline] #[target_feature(enable = "vector")] #[unstable(feature = "stdarch_s390x", issue = "135681")] -pub unsafe fn vec_slb(a: T, b: U) -> >::Result -where - T: sealed::VectorSlb, -{ - a.vec_slb(b) +pub unsafe fn vec_cmprg_or_0_idx(a: T, b: T, c: T) -> T::Result { + a.vstrcz::<{ FindImm::EqIdx as u32 }>(b, c) } -/// Vector Shift Right by Byte +/// Vector Compare Not in Ranges or Zero Index #[inline] #[target_feature(enable = "vector")] #[unstable(feature = "stdarch_s390x", issue = "135681")] -pub unsafe fn vec_srb(a: T, b: U) -> >::Result -where - T: sealed::VectorSrb, -{ - a.vec_srb(b) +pub unsafe fn vec_cmpnrg_or_0_idx(a: T, b: T, c: T) -> T::Result { + a.vstrcz::<{ FindImm::NeIdx as u32 }>(b, c) } -/// Vector Shift Right Algebraic by Byte +/// Vector Compare Ranges or Zero Index with Condition Code #[inline] #[target_feature(enable = "vector")] #[unstable(feature = "stdarch_s390x", issue = "135681")] -pub unsafe fn vec_srab(a: T, b: U) -> >::Result -where - T: sealed::VectorSrab, -{ - a.vec_srab(b) +pub unsafe fn vec_cmprg_or_0_idx_cc( + a: T, + b: T, + c: T, + d: *mut i32, +) -> T::Result { + let (x, y) = a.vstrczs::<{ FindImm::EqIdx as u32 }>(b, c); + d.write(y); + x } -/// Vector Element Rotate Left +/// Vector Compare Not in Ranges or Zero Index with Condition Code #[inline] #[target_feature(enable = "vector")] #[unstable(feature = "stdarch_s390x", issue = "135681")] -pub unsafe fn vec_rl(a: T, b: U) -> >::Result -where - T: sealed::VectorRl, -{ - a.vec_rl(b) +pub unsafe fn vec_cmpnrg_or_0_idx_cc( + a: T, + b: T, + c: T, + d: *mut i32, +) -> T::Result { + let (x, y) = a.vstrczs::<{ FindImm::NeIdx as u32 }>(b, c); + d.write(y); + x } -/// Performs a left shift for a vector by a given number of bits. Each element of the result is obtained by shifting the corresponding -/// element of a left by the number of bits specified by the last 3 bits of every byte of b. The bits that are shifted out are replaced by zeros. +/// Vector Compare Equal #[inline] #[target_feature(enable = "vector")] #[unstable(feature = "stdarch_s390x", issue = "135681")] -pub unsafe fn vec_sll(a: T, b: vector_unsigned_char) -> T -where - T: sealed::VectorSll, -{ - a.vec_sll(b) +pub unsafe fn vec_cmpeq(a: T, b: T) -> T::Result { + a.vec_cmpeq(b) } -/// Performs a right shift for a vector by a given number of bits. Each element of the result is obtained by shifting the corresponding -/// element of a right by the number of bits specified by the last 3 bits of every byte of b. The bits that are shifted out are replaced by zeros. +/// Vector Compare Not Equal #[inline] #[target_feature(enable = "vector")] #[unstable(feature = "stdarch_s390x", issue = "135681")] -pub unsafe fn vec_srl(a: T, b: vector_unsigned_char) -> T -where - T: sealed::VectorSrl, -{ - a.vec_srl(b) +pub unsafe fn vec_cmpne(a: T, b: T) -> T::Result { + a.vec_cmpne(b) } -/// Performs an algebraic right shift for a vector by a given number of bits. Each element of the result is obtained by shifting the corresponding -/// element of a right by the number of bits specified by the last 3 bits of every byte of b. The bits that are shifted out are replaced by copies of -/// the most significant bit of the element of a. +/// Vector Compare Greater Than #[inline] #[target_feature(enable = "vector")] #[unstable(feature = "stdarch_s390x", issue = "135681")] -pub unsafe fn vec_sral(a: T, b: vector_unsigned_char) -> T -where - T: sealed::VectorSral, -{ - a.vec_sral(b) +pub unsafe fn vec_cmpgt(a: T, b: T) -> T::Result { + a.vec_cmpgt(b) } -/// Rotates each element of a vector left by a given number of bits. Each element of the result is obtained by rotating the corresponding element -/// of a left by the number of bits specified by b, modulo the number of bits in the element. +/// Vector Compare Greater Than or Equal #[inline] #[target_feature(enable = "vector")] #[unstable(feature = "stdarch_s390x", issue = "135681")] -pub unsafe fn vec_rli(a: T, bits: core::ffi::c_ulong) -> T -where - T: sealed::VectorRli, -{ - a.vec_rli(bits) +pub unsafe fn vec_cmpge(a: T, b: T) -> T::Result { + a.vec_cmpge(b) } -/// Returns a vector with the elements of the input vector in reversed order. +/// Vector Compare Less #[inline] #[target_feature(enable = "vector")] #[unstable(feature = "stdarch_s390x", issue = "135681")] -pub unsafe fn vec_reve(a: T) -> T -where - T: sealed::VectorReve, -{ - a.vec_reve() +pub unsafe fn vec_cmplt(a: T, b: T) -> T::Result { + a.vec_cmplt(b) } -/// Returns a vector where each vector element contains the corresponding byte-reversed vector element of the input vector. +/// Vector Compare Less Than or Equal #[inline] #[target_feature(enable = "vector")] #[unstable(feature = "stdarch_s390x", issue = "135681")] -pub unsafe fn vec_revb(a: T) -> T -where - T: sealed::VectorRevb, -{ - a.vec_revb() +pub unsafe fn vec_cmple(a: T, b: T) -> T::Result { + a.vec_cmple(b) } -/// Merges the most significant ("high") halves of two vectors. +/// Vector Compare Equal Index #[inline] #[target_feature(enable = "vector")] #[unstable(feature = "stdarch_s390x", issue = "135681")] -pub unsafe fn vec_mergeh(a: T, b: T) -> T -where - T: sealed::VectorMergeh, -{ - a.vec_mergeh(b) +pub unsafe fn vec_cmpeq_idx(a: T, b: T) -> T::Result { + a.vec_cmpeq_idx(b) } - -/// Merges the least significant ("low") halves of two vectors. +/// Vector Compare Not Equal Index #[inline] #[target_feature(enable = "vector")] #[unstable(feature = "stdarch_s390x", issue = "135681")] -pub unsafe fn vec_mergel(a: T, b: T) -> T -where - T: sealed::VectorMergel, -{ - a.vec_mergel(b) +pub unsafe fn vec_cmpne_idx(a: T, b: T) -> T::Result { + a.vec_cmpne_idx(b) } - -/// Generates byte masks for elements in the return vector. For each bit in a, if the bit is one, all bit positions -/// in the corresponding byte element of d are set to ones. Otherwise, if the bit is zero, the corresponding byte element is set to zero. +/// Vector Compare Equal Index with Condition Code #[inline] #[target_feature(enable = "vector")] #[unstable(feature = "stdarch_s390x", issue = "135681")] -#[cfg_attr(test, assert_instr(vgbm, MASK = 0x00FF))] -pub unsafe fn vec_genmask() -> vector_unsigned_char { - vector_unsigned_char(const { genmask::() }) +pub unsafe fn vec_cmpeq_idx_cc( + a: T, + b: T, + cc: *mut i32, +) -> T::Result { + let (x, y) = a.vec_cmpeq_idx_cc(b); + unsafe { cc.write(y) }; + x } - -/// Vector Generate Mask (Byte) +/// Vector Compare Not Equal Index with Condition Code #[inline] #[target_feature(enable = "vector")] #[unstable(feature = "stdarch_s390x", issue = "135681")] -#[cfg_attr(test, assert_instr(vrepib, L = 3, H = 5))] -pub unsafe fn vec_genmasks_8() -> vector_unsigned_char { - vector_unsigned_char(const { [genmasks(u8::BITS, L, H) as u8; 16] }) +pub unsafe fn vec_cmpne_idx_cc( + a: T, + b: T, + cc: *mut i32, +) -> T::Result { + let (x, y) = a.vec_cmpne_idx_cc(b); + unsafe { cc.write(y) }; + x } - -/// Vector Generate Mask (Halfword) +/// Vector Compare Equal or Zero Index #[inline] #[target_feature(enable = "vector")] #[unstable(feature = "stdarch_s390x", issue = "135681")] -#[cfg_attr(test, assert_instr(vrepih, L = 3, H = 5))] -pub unsafe fn vec_genmasks_16() -> vector_unsigned_short { - vector_unsigned_short(const { [genmasks(u16::BITS, L, H) as u16; 8] }) +pub unsafe fn vec_cmpeq_or_0_idx(a: T, b: T) -> T::Result { + a.vec_cmpeq_or_0_idx(b) } - -/// Vector Generate Mask (Word) +/// Vector Compare Not Equal or Zero Index #[inline] #[target_feature(enable = "vector")] #[unstable(feature = "stdarch_s390x", issue = "135681")] -#[cfg_attr(test, assert_instr(vgmf, L = 3, H = 5))] -pub unsafe fn vec_genmasks_32() -> vector_unsigned_int { - vector_unsigned_int(const { [genmasks(u32::BITS, L, H) as u32; 4] }) +pub unsafe fn vec_cmpne_or_0_idx(a: T, b: T) -> T::Result { + a.vec_cmpne_or_0_idx(b) } - -/// Vector Generate Mask (Doubleword) +/// Vector Compare Equal or Zero Index with Condition Code #[inline] #[target_feature(enable = "vector")] #[unstable(feature = "stdarch_s390x", issue = "135681")] -#[cfg_attr(test, assert_instr(vgmg, L = 3, H = 5))] -pub unsafe fn vec_genmasks_64() -> vector_unsigned_long_long { - vector_unsigned_long_long(const { [genmasks(u64::BITS, L, H); 2] }) +pub unsafe fn vec_cmpeq_or_0_idx_cc( + a: T, + b: T, + cc: *mut i32, +) -> T::Result { + let (x, y) = a.vec_cmpeq_or_0_idx_cc(b); + unsafe { cc.write(y) }; + x } - -/// Returns a vector that contains some elements of two vectors, in the order specified by a third vector. -/// Each byte of the result is selected by using the least significant 5 bits of the corresponding byte of c as an index into the concatenated bytes of a and b. -/// Note: The vector generate mask built-in function [`vec_genmask`] could help generate the mask c. +/// Vector Compare Not Equal or Zero Index with Condition Code #[inline] #[target_feature(enable = "vector")] #[unstable(feature = "stdarch_s390x", issue = "135681")] -pub unsafe fn vec_perm(a: T, b: T, c: vector_unsigned_char) -> T { - a.vec_perm(b, c) +pub unsafe fn vec_cmpne_or_0_idx_cc( + a: T, + b: T, + cc: *mut i32, +) -> T::Result { + let (x, y) = a.vec_cmpne_or_0_idx_cc(b); + unsafe { cc.write(y) }; + x } -/// Vector Sum Across Quadword -/// -/// Returns a vector containing the results of performing a sum across all the elements in each of the quadword of vector a, -/// and the rightmost word or doubleword element of the b. The result is an unsigned 128-bit integer. +/// All Elements Equal #[inline] #[target_feature(enable = "vector")] #[unstable(feature = "stdarch_s390x", issue = "135681")] -pub unsafe fn vec_sum_u128(a: T, b: T) -> vector_unsigned_char { - a.vec_sum_u128(b) +pub unsafe fn vec_all_eq(a: T, b: T) -> i32 { + simd_reduce_all(vec_cmpeq(a, b)) as i32 as i32 } -/// Vector Sum Across Doubleword -/// -/// Returns a vector containing the results of performing a sum across all the elements in each of the doubleword of vector a, -/// and the rightmost sub-element of the corresponding doubleword of b. +/// All Elements Not Equal #[inline] #[target_feature(enable = "vector")] #[unstable(feature = "stdarch_s390x", issue = "135681")] -pub unsafe fn vec_sum2(a: T, b: T) -> vector_unsigned_long_long { - a.vec_sum2(b) +pub unsafe fn vec_all_ne(a: T, b: T) -> i32 { + simd_reduce_all(vec_cmpne(a, b)) as i32 } -/// Vector Sum Across Word -/// -/// Returns a vector containing the results of performing a sum across all the elements in each of the word of vector a, -/// and the rightmost sub-element of the corresponding word of b. +/// Any Element Equal #[inline] #[target_feature(enable = "vector")] #[unstable(feature = "stdarch_s390x", issue = "135681")] -pub unsafe fn vec_sum4(a: T, b: T) -> vector_unsigned_int { - a.vec_sum4(b) +pub unsafe fn vec_any_eq(a: T, b: T) -> i32 { + simd_reduce_any(vec_cmpeq(a, b)) as i32 } -/// Vector Subtract unsigned 128-bits -/// -/// Subtracts unsigned quadword values. -/// -/// This function operates on the vectors as 128-bit unsigned integers. It returns low 128 bits of a - b. +/// Any Element Not Equal #[inline] #[target_feature(enable = "vector")] #[unstable(feature = "stdarch_s390x", issue = "135681")] -#[cfg_attr(test, assert_instr(vsq))] -pub unsafe fn vec_sub_u128( - a: vector_unsigned_char, - b: vector_unsigned_char, -) -> vector_unsigned_char { - let a: u128 = transmute(a); - let b: u128 = transmute(b); - - transmute(a.wrapping_sub(b)) +pub unsafe fn vec_any_ne(a: T, b: T) -> i32 { + simd_reduce_any(vec_cmpne(a, b)) as i32 } -/// Vector Subtract Carryout -/// -/// Returns a vector containing the borrow produced by subtracting each of corresponding elements of b from a. -/// -/// On each resulting element, the value is 0 if a borrow occurred, or 1 if no borrow occurred. +/// All Elements Less Than #[inline] #[target_feature(enable = "vector")] #[unstable(feature = "stdarch_s390x", issue = "135681")] -pub unsafe fn vec_subc(a: T, b: U) -> >::Result -where - T: sealed::VectorSubc, -{ - a.vec_subc(b) +pub unsafe fn vec_all_lt(a: T, b: T) -> i32 { + a.vec_all_lt(b) } -/// Gets the carry bit of the 128-bit subtraction of two quadword values. -/// This function operates on the vectors as 128-bit unsigned integers. It returns a vector containing the borrow produced by subtracting b from a, as unsigned 128-bits integers. -/// If no borrow occurred, the bit 127 of d is 1; otherwise it is set to 0. All other bits of d are 0. +/// All Elements Less Than or Equal #[inline] #[target_feature(enable = "vector")] #[unstable(feature = "stdarch_s390x", issue = "135681")] -#[cfg_attr(test, assert_instr(vscbiq))] -pub unsafe fn vec_subc_u128( - a: vector_unsigned_char, - b: vector_unsigned_char, -) -> vector_unsigned_char { - transmute(vscbiq(transmute(a), transmute(b))) +pub unsafe fn vec_all_le(a: T, b: T) -> i32 { + a.vec_all_le(b) } -/// Subtracts unsigned quadword values with carry bit from a previous operation. -/// -/// This function operates on the vectors as 128-bit unsigned integers. It returns a vector containing the result of subtracting of b from a, -/// and the carryout bit from a previous operation. -/// -/// Note: Only the borrow indication bit (127-bit) of c is used, and the other bits are ignored. +/// All Elements Greater Than #[inline] #[target_feature(enable = "vector")] #[unstable(feature = "stdarch_s390x", issue = "135681")] -#[cfg_attr(test, assert_instr(vsbiq))] -pub unsafe fn vec_sube_u128( - a: vector_unsigned_char, - b: vector_unsigned_char, - c: vector_unsigned_char, -) -> vector_unsigned_char { - transmute(vsbiq(transmute(a), transmute(b), transmute(c))) +pub unsafe fn vec_all_gt(a: T, b: T) -> i32 { + a.vec_all_gt(b) } -/// Vector Subtract with Carryout, Carryout -/// -/// Gets the carry bit of the 128-bit subtraction of two quadword values with carry bit from the previous operation. -/// -/// It returns a vector containing the carryout produced from the result of subtracting of b from a, -/// and the carryout bit from a previous operation. If no borrow occurred, the 127-bit of d is 1, otherwise 0. -/// All other bits of d are 0. -/// -/// Note: Only the borrow indication bit (127-bit) of c is used, and the other bits are ignored. +/// All Elements Greater Than or Equal #[inline] #[target_feature(enable = "vector")] #[unstable(feature = "stdarch_s390x", issue = "135681")] -#[cfg_attr(test, assert_instr(vsbcbiq))] -pub unsafe fn vec_subec_u128( - a: vector_unsigned_char, - b: vector_unsigned_char, - c: vector_unsigned_char, -) -> vector_unsigned_char { - transmute(vsbcbiq(transmute(a), transmute(b), transmute(c))) +pub unsafe fn vec_all_ge(a: T, b: T) -> i32 { + a.vec_all_ge(b) } -/// Vector Splat Signed Byte +/// All Elements Not Less Than #[inline] #[target_feature(enable = "vector")] #[unstable(feature = "stdarch_s390x", issue = "135681")] -#[cfg_attr(test, assert_instr(vrepib, IMM = 42))] -pub unsafe fn vec_splat_i8() -> vector_signed_char { - vector_signed_char([IMM; 16]) +pub unsafe fn vec_all_nlt(a: T, b: T) -> i32 { + vec_all_ge(a, b) } -/// Vector Splat Signed Halfword +/// All Elements Not Less Than or Equal #[inline] #[target_feature(enable = "vector")] #[unstable(feature = "stdarch_s390x", issue = "135681")] -#[cfg_attr(test, assert_instr(vrepih, IMM = 42))] -pub unsafe fn vec_splat_i16() -> vector_signed_short { - vector_signed_short([IMM as i16; 8]) +pub unsafe fn vec_all_nle(a: T, b: T) -> i32 { + vec_all_gt(a, b) } -/// Vector Splat Signed Word +/// All Elements Not Greater Than #[inline] #[target_feature(enable = "vector")] #[unstable(feature = "stdarch_s390x", issue = "135681")] -#[cfg_attr(test, assert_instr(vrepif, IMM = 42))] -pub unsafe fn vec_splat_i32() -> vector_signed_int { - vector_signed_int([IMM as i32; 4]) +pub unsafe fn vec_all_ngt(a: T, b: T) -> i32 { + vec_all_le(a, b) } -/// Vector Splat Signed Doubleword +/// All Elements Not Greater Than or Equal #[inline] #[target_feature(enable = "vector")] #[unstable(feature = "stdarch_s390x", issue = "135681")] -#[cfg_attr(test, assert_instr(vrepig, IMM = 42))] -pub unsafe fn vec_splat_i64() -> vector_signed_long_long { - vector_signed_long_long([IMM as i64; 2]) +pub unsafe fn vec_all_nge(a: T, b: T) -> i32 { + vec_all_lt(a, b) } -/// Vector Splat Unsigned Byte +/// Any Elements Less Than #[inline] #[target_feature(enable = "vector")] #[unstable(feature = "stdarch_s390x", issue = "135681")] -#[cfg_attr(test, assert_instr(vrepib, IMM = 42))] -pub unsafe fn vec_splat_u8() -> vector_unsigned_char { - vector_unsigned_char([IMM; 16]) +pub unsafe fn vec_any_lt(a: T, b: T) -> i32 { + !vec_all_ge(a, b) } -/// Vector Splat Unsigned Halfword +/// Any Elements Less Than or Equal #[inline] #[target_feature(enable = "vector")] #[unstable(feature = "stdarch_s390x", issue = "135681")] -#[cfg_attr(test, assert_instr(vrepih, IMM = 42))] -pub unsafe fn vec_splat_u16() -> vector_unsigned_short { - vector_unsigned_short([IMM as u16; 8]) +pub unsafe fn vec_any_le(a: T, b: T) -> i32 { + !vec_all_gt(a, b) } -/// Vector Splat Unsigned Word +/// Any Elements Greater Than #[inline] #[target_feature(enable = "vector")] #[unstable(feature = "stdarch_s390x", issue = "135681")] -#[cfg_attr(test, assert_instr(vrepif, IMM = 42))] -pub unsafe fn vec_splat_u32() -> vector_unsigned_int { - vector_unsigned_int([IMM as u32; 4]) +pub unsafe fn vec_any_gt(a: T, b: T) -> i32 { + !vec_all_le(a, b) } -/// Vector Splat Unsigned Doubleword +/// Any Elements Greater Than or Equal #[inline] #[target_feature(enable = "vector")] #[unstable(feature = "stdarch_s390x", issue = "135681")] -#[cfg_attr(test, assert_instr(vrepig, IMM = 42))] -pub unsafe fn vec_splat_u64() -> vector_unsigned_long_long { - vector_unsigned_long_long([IMM as u64; 2]) +pub unsafe fn vec_any_ge(a: T, b: T) -> i32 { + !vec_all_lt(a, b) } -macro_rules! vec_find_any { - ($($Trait:ident $fun:ident)*) => { - $( - #[inline] - #[target_feature(enable = "vector")] - #[unstable(feature = "stdarch_s390x", issue = "135681")] - pub unsafe fn $fun(a: T, b: U) -> >::Result - where - T: sealed::$Trait, - { - a.$fun(b) - } - )* - } +/// Any Elements Not Less Than +#[inline] +#[target_feature(enable = "vector")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub unsafe fn vec_any_nlt(a: T, b: T) -> i32 { + vec_any_ge(a, b) } -vec_find_any! { - VectorFindAnyEq vec_find_any_eq - VectorFindAnyNe vec_find_any_ne - VectorFindAnyEqIdx vec_find_any_eq_idx - VectorFindAnyNeIdx vec_find_any_ne_idx - VectorFindAnyEqOrZeroIdx vec_find_any_eq_or_0_idx - VectorFindAnyNeOrZeroIdx vec_find_any_ne_or_0_idx +/// Any Elements Not Less Than or Equal +#[inline] +#[target_feature(enable = "vector")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub unsafe fn vec_any_nle(a: T, b: T) -> i32 { + vec_any_gt(a, b) } -macro_rules! vec_find_any_cc { - ($($Trait:ident $fun:ident)*) => { - $( - #[inline] - #[target_feature(enable = "vector")] - #[unstable(feature = "stdarch_s390x", issue = "135681")] - pub unsafe fn $fun(a: T, b: U, c: *mut i32) -> >::Result - where - T: sealed::$Trait, - { - a.$fun(b, c) - } - )* - } +/// Any Elements Not Greater Than +#[inline] +#[target_feature(enable = "vector")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub unsafe fn vec_any_ngt(a: T, b: T) -> i32 { + vec_any_le(a, b) } -vec_find_any_cc! { - VectorFindAnyEqCC vec_find_any_eq_cc - VectorFindAnyNeCC vec_find_any_ne_cc - VectorFindAnyEqIdxCC vec_find_any_eq_idx_cc - VectorFindAnyNeIdxCC vec_find_any_ne_idx_cc - VectorFindAnyEqOrZeroIdxCC vec_find_any_eq_or_0_idx_cc - VectorFindAnyNeOrZeroIdxCC vec_find_any_ne_or_0_idx_cc +/// Any Elements Not Greater Than or Equal +#[inline] +#[target_feature(enable = "vector")] +#[unstable(feature = "stdarch_s390x", issue = "135681")] +pub unsafe fn vec_any_nge(a: T, b: T) -> i32 { + vec_any_lt(a, b) } #[cfg(test)] @@ -2699,6 +5625,11 @@ mod tests { assert_eq!(ShuffleMask::<4>::merge_high().0, [0, 4, 1, 5]); } + #[test] + fn pack_mask() { + assert_eq!(ShuffleMask::<4>::pack().0, [1, 3, 5, 7]); + } + #[test] fn test_vec_mask() { assert_eq!( @@ -3075,11 +6006,6 @@ mod tests { [0, 0, 0, 1, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 0, 16], [-4, -2, -1, -8] } - test_vec_2! { test_vec_rl, vec_rl, u32x4, - [0x12345678, 0x9ABCDEF0, 0x0F0F0F0F, 0x12345678], - [4, 8, 12, 68], - [0x23456781, 0xBCDEF09A, 0xF0F0F0F0, 0x23456781] } - test_vec_1! { test_vec_reve_f32, vec_reve, f32x4, [0.1, 0.5, 0.6, 0.9], [0.9, 0.6, 0.5, 0.1] @@ -3378,4 +6304,844 @@ mod tests { assert_eq!(c, 3); assert_eq!(d.as_array(), &[0, 16, 0, 0]); } + + #[simd_test(enable = "vector")] + fn test_vector_load() { + let expected = [0xAAAA_AAAA, 0xBBBB_BBBB, 0xCCCC_CCCC, 0xDDDD_DDDD]; + + let source: [u32; 8] = [ + 0xAAAA_AAAA, + 0xBBBB_BBBB, + 0xCCCC_CCCC, + 0xDDDD_DDDD, + 0, + 0, + 0, + 0, + ]; + assert_eq!( + unsafe { vec_xl::(0, source.as_ptr()) }.as_array(), + &expected + ); + + // offset is in bytes + let source: [u32; 8] = [ + 0x0000_AAAA, + 0xAAAA_BBBB, + 0xBBBB_CCCC, + 0xCCCC_DDDD, + 0xDDDD_0000, + 0, + 0, + 0, + ]; + assert_eq!( + unsafe { vec_xl::(2, source.as_ptr()) }.as_array(), + &expected + ); + } + + #[simd_test(enable = "vector")] + fn test_vector_store() { + let vec = vector_unsigned_int([0xAAAA_AAAA, 0xBBBB_BBBB, 0xCCCC_CCCC, 0xDDDD_DDDD]); + + let mut dest = [0u32; 8]; + unsafe { vec_xst(vec, 0, dest.as_mut_ptr()) }; + assert_eq!( + dest, + [ + 0xAAAA_AAAA, + 0xBBBB_BBBB, + 0xCCCC_CCCC, + 0xDDDD_DDDD, + 0, + 0, + 0, + 0 + ] + ); + + // offset is in bytes + let mut dest = [0u32; 8]; + unsafe { vec_xst(vec, 2, dest.as_mut_ptr()) }; + assert_eq!( + dest, + [ + 0x0000_AAAA, + 0xAAAA_BBBB, + 0xBBBB_CCCC, + 0xCCCC_DDDD, + 0xDDDD_0000, + 0, + 0, + 0, + ] + ); + } + + #[simd_test(enable = "vector")] + fn test_vector_lcbb() { + #[repr(align(64))] + struct Align64(T); + + static ARRAY: Align64<[u8; 128]> = Align64([0; 128]); + + assert_eq!(unsafe { __lcbb::<64>(ARRAY.0[64..].as_ptr()) }, 16); + assert_eq!(unsafe { __lcbb::<64>(ARRAY.0[63..].as_ptr()) }, 1); + assert_eq!(unsafe { __lcbb::<64>(ARRAY.0[56..].as_ptr()) }, 8); + assert_eq!(unsafe { __lcbb::<64>(ARRAY.0[48..].as_ptr()) }, 16); + } + + test_vec_2! { test_vec_pack, vec_pack, i16x8, i16x8 -> i8x16, + [0, 1, -1, 42, 32767, -32768, 30000, -30000], + [32767, -32768, 12345, -12345, 0, 1, -1, 42], + [0, 1, -1, 42, -1, 0, 48, -48, -1, 0, 57, -57, 0, 1, -1, 42] + } + + test_vec_2! { test_vec_packs, vec_packs, i16x8, i16x8 -> i8x16, + [0, 1, -1, 42, 32767, -32768, 30000, -30000], + [32767, -32768, 12345, -12345, 0, 1, -1, 42], + [0, 1, -1, 42, 127, -128, 127, -128, 127, -128, 127, -128, 0, 1, -1, 42] + } + + test_vec_2! { test_vec_packsu_signed, vec_packsu, i16x8, i16x8 -> u8x16, + [0, 1, -1, 42, 32767, -32768, 30000, -30000], + [32767, -32768, 12345, -12345, 0, 1, -1, 42], + [0, 1, 0, 42, 255, 0, 255, 0, 255, 0, 255, 0, 0, 1, 0, 42] + } + + test_vec_2! { test_vec_packsu_unsigned, vec_packsu, u16x8, u16x8 -> u8x16, + [65535, 32768, 1234, 5678, 16, 8, 4, 2], + [30000, 25000, 20000, 15000, 31, 63, 127, 255], + [255, 255, 255, 255, 16, 8, 4, 2, 255, 255, 255, 255, 31, 63, 127, 255] + } + + test_vec_2! { test_vec_rl, vec_rl, u32x4, + [0x12345678, 0x9ABCDEF0, 0x0F0F0F0F, 0x12345678], + [4, 8, 12, 68], + [0x23456781, 0xBCDEF09A, 0xF0F0F0F0, 0x23456781] + } + + test_vec_1! { test_vec_unpackh_i, vec_unpackh, i16x8 -> i32x4, + [0x1234, -2, 0x0F0F, -32768, 0, 0, 0, 0], + [0x1234, -2, 0x0F0F, -32768] + } + + test_vec_1! { test_vec_unpackh_u, vec_unpackh, u16x8 -> u32x4, + [0x1234, 0xFFFF, 0x0F0F, 0x8000, 0, 0, 0, 0], + [0x1234, 0xFFFF, 0x0F0F, 0x8000] + } + + test_vec_1! { test_vec_unpackl_i, vec_unpackl, i16x8 -> i32x4, + [0, 0, 0, 0, 0x1234, -2, 0x0F0F, -32768], + [0x1234, -2, 0x0F0F, -32768] + } + + test_vec_1! { test_vec_unpackl_u, vec_unpackl, u16x8 -> u32x4, + [0, 0, 0, 0, 0x1234, 0xFFFF, 0x0F0F, 0x8000], + [0x1234, 0xFFFF, 0x0F0F, 0x8000] + } + + test_vec_2! { test_vec_avg, vec_avg, u32x4, + [2, 1, u32::MAX, 0], + [4, 2, 2, 0], + [3, (1u32 + 2).div_ceil(2), (u32::MAX as u64 + 2u64).div_ceil(2) as u32, 0] + } + + test_vec_2! { test_vec_checksum, vec_checksum, u32x4, + [1, 2, 3, u32::MAX], + [5, 6, 7, 8], + [0, 12, 0, 0] + } + + test_vec_2! { test_vec_add_u128, vec_add_u128, u8x16, + [0x01, 0x05, 0x0F, 0x1A, 0x2F, 0x3F, 0x50, 0x65, + 0x7A, 0x8F, 0x9A, 0xAD, 0xB0, 0xC3, 0xD5, 0xE8], + [0xF0, 0xEF, 0xC3, 0xB1, 0x92, 0x71, 0x5A, 0x43, + 0x3B, 0x29, 0x13, 0x04, 0xD7, 0xA1, 0x8C, 0x76], + [0xF1, 0xF4, 0xD2, 0xCB, 0xC1, 0xB0, 0xAA, 0xA8, 0xB5, 0xB8, 0xAD, 0xB2, 0x88, 0x65, 0x62, 0x5E] + } + + #[simd_test(enable = "vector")] + fn test_vec_addc_u128() { + unsafe { + let a = u128::MAX; + let b = 1u128; + + let d: u128 = transmute(vec_addc_u128(transmute(a), transmute(b))); + assert!(a.checked_add(b).is_none()); + assert_eq!(d, 1); + + let a = 1u128; + let b = 1u128; + + let d: u128 = transmute(vec_addc_u128(transmute(a), transmute(b))); + assert!(a.checked_add(b).is_some()); + assert_eq!(d, 0); + } + } + + #[simd_test(enable = "vector")] + fn test_vec_subc_u128() { + unsafe { + let a = 0u128; + let b = 1u128; + + let d: u128 = transmute(vec_subc_u128(transmute(a), transmute(b))); + assert!(a.checked_sub(b).is_none()); + assert_eq!(d, 0); + + let a = 1u128; + let b = 1u128; + + let d: u128 = transmute(vec_subc_u128(transmute(a), transmute(b))); + assert!(a.checked_sub(b).is_some()); + assert_eq!(d, 1); + } + } + + test_vec_2! { test_vec_mule_u, vec_mule, u16x8, u16x8 -> u32x4, + [0xFFFF, 0, 2, 0, 2, 0, 1, 0], + [0xFFFF, 0, 4, 0, 0xFFFF, 0, 2, 0], + [0xFFFE_0001, 8, 0x0001_FFFE, 2] + } + + test_vec_2! { test_vec_mule_i, vec_mule, i16x8, i16x8 -> i32x4, + [i16::MIN, 0, -2, 0, 2, 0, 1, 0], + [i16::MIN, 0, 4, 0, i16::MAX, 0, 2, 0], + [0x4000_0000, -8, 0xFFFE, 2] + } + + test_vec_2! { test_vec_mulo_u, vec_mulo, u16x8, u16x8 -> u32x4, + [0, 0xFFFF, 0, 2, 0, 2, 0, 1], + [0, 0xFFFF, 0, 4, 0, 0xFFFF, 0, 2], + [0xFFFE_0001, 8, 0x0001_FFFE, 2] + } + + test_vec_2! { test_vec_mulo_i, vec_mulo, i16x8, i16x8 -> i32x4, + [0, i16::MIN, 0, -2, 0, 2, 0, 1], + [0, i16::MIN, 0, 4, 0, i16::MAX, 0, 2], + [0x4000_0000, -8, 0xFFFE, 2] + } + + test_vec_2! { test_vec_mulh_u, vec_mulh, u32x4, u32x4 -> u32x4, + [u32::MAX, 2, 2, 1], + [u32::MAX, 4, u32::MAX, 2], + [u32::MAX - 1, 0, 1, 0] + } + + test_vec_2! { test_vec_mulh_i, vec_mulh, i32x4, i32x4 -> i32x4, + [i32::MIN, -2, 2, 1], + [i32::MIN, 4, i32::MAX, 2], + [0x4000_0000, -1, 0, 0] + } + + test_vec_2! { test_vec_gfmsum_1, vec_gfmsum, u16x8, u16x8 -> u32x4, + [0x1234, 0x5678, 0x9ABC, 0xDEF0, 0x1357, 0x2468, 0xACE0, 0xBDF0], + [0xFFFF, 0x0001, 0x8000, 0x7FFF, 0xAAAA, 0x5555, 0x1234, 0x5678], + [0xE13A794, 0x68764A50, 0x94AA3E, 0x2C93F300] + } + + test_vec_2! { test_vec_gfmsum_2, vec_gfmsum, u16x8, u16x8 -> u32x4, + [0x0000, 0xFFFF, 0xAAAA, 0x5555, 0x1234, 0x5678, 0x9ABC, 0xDEF0], + [0xFFFF, 0x0000, 0x5555, 0xAAAA, 0x0001, 0x8000, 0x7FFF, 0x1357], + [0, 0, 0x2B3C1234, 0x3781D244] + } + + #[simd_test(enable = "vector")] + fn test_vec_gfmsum_128() { + let a = vector_unsigned_long_long([1, 2]); + let b = vector_unsigned_long_long([3, 4]); + + let d: u128 = unsafe { transmute(vec_gfmsum_128(a, b)) }; + assert_eq!(d, 11); + + let a = vector_unsigned_long_long([0x0101010101010101, 0x0202020202020202]); + let b = vector_unsigned_long_long([0x0404040404040404, 0x0505050505050505]); + + let d: u128 = unsafe { transmute(vec_gfmsum_128(a, b)) }; + assert_eq!(d, 0xE000E000E000E000E000E000E000E); + } + + #[simd_test(enable = "vector-enhancements-1")] + fn test_vec_bperm_u128() { + let a = vector_unsigned_char([65, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]); + let b = vector_unsigned_char([ + 0, 0, 0, 0, 1, 1, 1, 1, 128, 128, 128, 128, 255, 255, 255, 255, + ]); + let d = unsafe { vec_bperm_u128(a, b) }; + assert_eq!(d.as_array(), &[0xF00, 0]); + } + + #[simd_test(enable = "vector")] + fn test_vec_sel() { + let a = vector_signed_int([1, 2, 3, 4]); + let b = vector_signed_int([5, 6, 7, 8]); + + let e = vector_unsigned_int([9, 10, 11, 12]); + let f = vector_unsigned_int([9, 9, 11, 11]); + + let c: vector_bool_int = unsafe { simd_eq(e, f) }; + assert_eq!(c.as_array(), &[!0, 0, !0, 0]); + let d: vector_signed_int = unsafe { vec_sel(a, b, c) }; + assert_eq!(d.as_array(), &[5, 2, 7, 4]); + } + + #[simd_test(enable = "vector")] + fn test_vec_gather_element() { + let a1: [u32; 10] = [10, 11, 12, 13, 14, 15, 16, 17, 18, 19]; + let a2: [u32; 10] = [20, 21, 22, 23, 24, 25, 26, 27, 28, 29]; + + let v1 = vector_unsigned_int([1, 2, 3, 4]); + let v2 = vector_unsigned_int([1, 2, 3, 4]); + + let sizeof_int = core::mem::size_of::() as u32; + let v3 = vector_unsigned_int([ + 5 * sizeof_int, + 8 * sizeof_int, + 9 * sizeof_int, + 6 * sizeof_int, + ]); + + unsafe { + let d1 = vec_gather_element::<_, 0>(v1, v3, a1.as_ptr()); + assert_eq!(d1.as_array(), &[15, 2, 3, 4]); + let d2 = vec_gather_element::<_, 0>(v2, v3, a2.as_ptr()); + assert_eq!(d2.as_array(), &[25, 2, 3, 4]); + } + } + + #[simd_test(enable = "vector")] + fn test_vec_fp_test_data_class() { + let mut cc = 42; + + let v1 = vector_double([0.0, f64::NAN]); + let v2 = vector_double([f64::INFINITY, 1.0]); + let v3 = vector_double([1.0, 2.0]); + + unsafe { + let d = vec_fp_test_data_class::<_, __VEC_CLASS_FP_ZERO>(v1, &mut cc); + assert_eq!(cc, 1); + assert_eq!(d.as_array(), &[!0, 0]); + + let d = vec_fp_test_data_class::<_, __VEC_CLASS_FP_NAN>(v1, &mut cc); + assert_eq!(cc, 1); + assert_eq!(d.as_array(), &[0, !0]); + + let d = vec_fp_test_data_class::<_, __VEC_CLASS_FP_INFINITY>(v2, &mut cc); + assert_eq!(cc, 1); + assert_eq!(d.as_array(), &[!0, 0]); + + let d = vec_fp_test_data_class::<_, __VEC_CLASS_FP_INFINITY_N>(v2, &mut cc); + assert_eq!(cc, 3); + assert_eq!(d.as_array(), &[0, 0]); + + let d = vec_fp_test_data_class::<_, __VEC_CLASS_FP_NORMAL>(v2, &mut cc); + assert_eq!(cc, 1); + assert_eq!(d.as_array(), &[0, !0]); + + let d = vec_fp_test_data_class::<_, __VEC_CLASS_FP_NORMAL>(v3, &mut cc); + assert_eq!(cc, 0); + assert_eq!(d.as_array(), &[!0, !0]); + } + } + + #[simd_test(enable = "vector")] + fn test_vec_fp_any_all_nan_numeric() { + unsafe { + assert_eq!( + vec_all_nan(vector_double([f64::NAN, f64::NAN])), + i32::from(true) + ); + assert_eq!( + vec_all_nan(vector_double([f64::NAN, 1.0])), + i32::from(false) + ); + assert_eq!(vec_all_nan(vector_double([0.0, 1.0])), i32::from(false)); + + assert_eq!( + vec_any_nan(vector_double([f64::NAN, f64::NAN])), + i32::from(true) + ); + assert_eq!(vec_any_nan(vector_double([f64::NAN, 1.0])), i32::from(true)); + assert_eq!(vec_any_nan(vector_double([0.0, 1.0])), i32::from(false)); + + assert_eq!( + vec_all_numeric(vector_double([f64::NAN, f64::NAN])), + i32::from(false) + ); + assert_eq!( + vec_all_numeric(vector_double([f64::NAN, 1.0])), + i32::from(false) + ); + assert_eq!(vec_all_numeric(vector_double([0.0, 1.0])), i32::from(true)); + + assert_eq!( + vec_any_numeric(vector_double([f64::NAN, f64::NAN])), + i32::from(false) + ); + assert_eq!( + vec_any_numeric(vector_double([f64::NAN, 1.0])), + i32::from(true) + ); + assert_eq!(vec_any_numeric(vector_double([0.0, 1.0])), i32::from(true)); + + // "numeric" means "not NaN". infinities are numeric + assert_eq!( + vec_all_numeric(vector_double([f64::INFINITY, f64::NEG_INFINITY])), + i32::from(true) + ); + assert_eq!( + vec_any_numeric(vector_double([f64::INFINITY, f64::NEG_INFINITY])), + i32::from(true) + ); + } + } + + #[simd_test(enable = "vector")] + fn test_vec_test_mask() { + unsafe { + let v = vector_unsigned_long_long([0xFF00FF00FF00FF00; 2]); + let m = vector_unsigned_long_long([0x0000FF000000FF00; 2]); + assert_eq!(vec_test_mask(v, m), 3); + + let v = vector_unsigned_long_long([u64::MAX; 2]); + let m = vector_unsigned_long_long([0; 2]); + assert_eq!(vec_test_mask(v, m), 0); + + let v = vector_unsigned_long_long([0; 2]); + let m = vector_unsigned_long_long([u64::MAX; 2]); + assert_eq!(vec_test_mask(v, m), 0); + + let v = vector_unsigned_long_long([0xAAAAAAAAAAAAAAAA; 2]); + let m = vector_unsigned_long_long([0xAAAAAAAAAAAAAAAA; 2]); + assert_eq!(vec_test_mask(v, m), 3); + } + } + + #[simd_test(enable = "vector-enhancements-2")] + fn test_vec_search_string_cc() { + unsafe { + let b = vector_unsigned_char(*b"ABCD------------"); + let c = vector_unsigned_char([4; 16]); + let mut d = 0i32; + + let haystack = vector_unsigned_char(*b"__ABCD__________"); + let result = vec_search_string_cc(haystack, b, c, &mut d); + assert_eq!(result.as_array()[7], 2); + assert_eq!(d, 2); + + let haystack = vector_unsigned_char(*b"___ABCD_________"); + let result = vec_search_string_cc(haystack, b, c, &mut d); + assert_eq!(result.as_array()[7], 3); + assert_eq!(d, 2); + + let haystack = vector_unsigned_char(*b"________________"); + let result = vec_search_string_cc(haystack, b, c, &mut d); + assert_eq!(result.as_array()[7], 16); + assert_eq!(d, 0); + + let haystack = vector_unsigned_char(*b"______\0_________"); + let result = vec_search_string_cc(haystack, b, c, &mut d); + assert_eq!(result.as_array()[7], 16); + assert_eq!(d, 0); + + let haystack = vector_unsigned_char(*b"______\0__ABCD___"); + let result = vec_search_string_cc(haystack, b, c, &mut d); + assert_eq!(result.as_array()[7], 9); + assert_eq!(d, 2); + } + } + + #[simd_test(enable = "vector-enhancements-2")] + fn test_vec_search_string_until_zero_cc() { + unsafe { + let b = vector_unsigned_char(*b"ABCD\0\0\0\0\0\0\0\0\0\0\0\0"); + let c = vector_unsigned_char([16; 16]); + let mut d = 0i32; + + let haystack = vector_unsigned_char(*b"__ABCD__________"); + let result = vec_search_string_until_zero_cc(haystack, b, c, &mut d); + assert_eq!(result.as_array()[7], 2); + assert_eq!(d, 2); + + let haystack = vector_unsigned_char(*b"___ABCD_________"); + let result = vec_search_string_until_zero_cc(haystack, b, c, &mut d); + assert_eq!(result.as_array()[7], 3); + assert_eq!(d, 2); + + let haystack = vector_unsigned_char(*b"________________"); + let result = vec_search_string_until_zero_cc(haystack, b, c, &mut d); + assert_eq!(result.as_array()[7], 16); + assert_eq!(d, 0); + + let haystack = vector_unsigned_char(*b"______\0_________"); + let result = vec_search_string_until_zero_cc(haystack, b, c, &mut d); + assert_eq!(result.as_array()[7], 16); + assert_eq!(d, 1); + + let haystack = vector_unsigned_char(*b"______\0__ABCD___"); + let result = vec_search_string_until_zero_cc(haystack, b, c, &mut d); + assert_eq!(result.as_array()[7], 16); + assert_eq!(d, 1); + } + } + + #[simd_test(enable = "vector")] + fn test_vec_doublee() { + unsafe { + let v = vector_float([1.0, 2.0, 3.0, 4.0]); + assert_eq!(vec_doublee(v).as_array(), &[1.0, 3.0]); + + let v = vector_float([f32::NAN, 2.0, f32::INFINITY, 4.0]); + let d = vec_doublee(v); + assert!(d.as_array()[0].is_nan()); + assert_eq!(d.as_array()[1], f64::INFINITY); + } + } + + #[simd_test(enable = "vector")] + fn test_vec_floate() { + // NOTE: indices 1 and 3 can have an arbitrary value. With the C version + // these are poison values, our version initializes the memory but its + // value still should not be relied upon by application code. + unsafe { + let v = vector_double([1.0, 2.0]); + let d = vec_floate(v); + assert_eq!(d.as_array()[0], 1.0); + assert_eq!(d.as_array()[2], 2.0); + + let v = vector_double([f64::NAN, f64::INFINITY]); + let d = vec_floate(v); + assert!(d.as_array()[0].is_nan()); + assert_eq!(d.as_array()[2], f32::INFINITY); + + let v = vector_double([f64::MIN, f64::MAX]); + let d = vec_floate(v); + assert_eq!(d.as_array()[0], f64::MIN as f32); + assert_eq!(d.as_array()[2], f64::MAX as f32); + } + } + + #[simd_test(enable = "vector")] + fn test_vec_extend_s64() { + unsafe { + let v = vector_signed_char([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]); + assert_eq!(vec_extend_s64(v).as_array(), &[7, 15]); + + let v = vector_signed_short([0, 1, 2, 3, 4, 5, 6, 7]); + assert_eq!(vec_extend_s64(v).as_array(), &[3, 7]); + + let v = vector_signed_int([0, 1, 2, 3]); + assert_eq!(vec_extend_s64(v).as_array(), &[1, 3]); + } + } + + #[simd_test(enable = "vector")] + fn test_vec_signed() { + unsafe { + let v = vector_float([1.0, 2.5, -2.5, -0.0]); + assert_eq!(vec_signed(v).as_array(), &[1, 2, -2, 0]); + + let v = vector_double([2.5, -2.5]); + assert_eq!(vec_signed(v).as_array(), &[2, -2]); + } + } + + #[simd_test(enable = "vector")] + fn test_vec_unsigned() { + // NOTE: converting a negative floating point value is UB! + unsafe { + let v = vector_float([1.0, 2.5, 3.5, 0.0]); + assert_eq!(vec_unsigned(v).as_array(), &[1, 2, 3, 0]); + + let v = vector_double([2.5, 3.5]); + assert_eq!(vec_unsigned(v).as_array(), &[2, 3]); + } + } + + #[simd_test(enable = "vector")] + fn test_vec_cp_until_zero() { + unsafe { + let v = vector_signed_int([1, 2, 3, 4]); + let d = vec_cp_until_zero(v); + assert_eq!(d.as_array(), &[1, 2, 3, 4]); + + let v = vector_signed_int([1, 2, 0, 4]); + let d = vec_cp_until_zero(v); + assert_eq!(d.as_array(), &[1, 2, 0, 0]); + } + } + + #[simd_test(enable = "vector")] + fn test_vec_cp_until_zero_cc() { + let mut cc = 0; + unsafe { + let v = vector_signed_int([1, 2, 3, 4]); + let d = vec_cp_until_zero_cc(v, &mut cc); + assert_eq!(d.as_array(), &[1, 2, 3, 4]); + assert_eq!(cc, 3); + + let v = vector_signed_int([1, 2, 0, 4]); + let d = vec_cp_until_zero_cc(v, &mut cc); + assert_eq!(d.as_array(), &[1, 2, 0, 0]); + assert_eq!(cc, 0); + } + } + + #[simd_test(enable = "vector-enhancements-1")] + fn test_vec_msum_u128() { + let a = vector_unsigned_long_long([1, 2]); + let b = vector_unsigned_long_long([3, 4]); + + unsafe { + let c: vector_unsigned_char = transmute(100u128); + + let d: u128 = transmute(vec_msum_u128::<0>(a, b, c)); + assert_eq!(d, (1 * 3) + (2 * 4) + 100); + + let d: u128 = transmute(vec_msum_u128::<4>(a, b, c)); + assert_eq!(d, (1 * 3) + (2 * 4) * 2 + 100); + + let d: u128 = transmute(vec_msum_u128::<8>(a, b, c)); + assert_eq!(d, (1 * 3) * 2 + (2 * 4) + 100); + + let d: u128 = transmute(vec_msum_u128::<12>(a, b, c)); + assert_eq!(d, (1 * 3) * 2 + (2 * 4) * 2 + 100); + } + } + + #[simd_test(enable = "vector")] + fn test_vec_sld() { + let a = vector_unsigned_long_long([0xAAAAAAAAAAAAAAAA, 0xAAAAAAAAAAAAAAAA]); + let b = vector_unsigned_long_long([0xBBBBBBBBBBBBBBBB, 0xBBBBBBBBBBBBBBBB]); + + unsafe { + let d = vec_sld::<_, 4>(a, b); + assert_eq!(d.as_array(), &[0xAAAAAAAAAAAAAAAA, 0xAAAAAAAABBBBBBBB]); + } + } + + #[simd_test(enable = "vector")] + fn test_vec_sldw() { + let a = vector_unsigned_long_long([0xAAAAAAAAAAAAAAAA, 0xAAAAAAAAAAAAAAAA]); + let b = vector_unsigned_long_long([0xBBBBBBBBBBBBBBBB, 0xBBBBBBBBBBBBBBBB]); + + unsafe { + let d = vec_sldw::<_, 1>(a, b); + assert_eq!(d.as_array(), &[0xAAAAAAAAAAAAAAAA, 0xAAAAAAAABBBBBBBB]); + } + } + + #[simd_test(enable = "vector-enhancements-2")] + fn test_vec_sldb() { + let a = vector_unsigned_long_long([0xAAAAAAAAAAAAAAAA, 0xAAAAAAAAAAAAAAAA]); + let b = vector_unsigned_long_long([0xBBBBBBBBBBBBBBBB, 0xBBBBBBBBBBBBBBBB]); + + unsafe { + let d = vec_sldb::<_, 4>(a, b); + assert_eq!(d.as_array(), &[0xAAAAAAAAAAAAAAAA, 0xAAAAAAAAAAAAAAAB]); + } + } + + #[simd_test(enable = "vector-enhancements-2")] + fn test_vec_srdb() { + let a = vector_unsigned_long_long([0xAAAAAAAAAAAAAAAA, 0xAAAAAAAAAAAAAAAA]); + let b = vector_unsigned_long_long([0xBBBBBBBBBBBBBBBB, 0xBBBBBBBBBBBBBBBB]); + + unsafe { + let d = vec_srdb::<_, 4>(a, b); + assert_eq!(d.as_array(), &[0xABBBBBBBBBBBBBBB, 0xBBBBBBBBBBBBBBBB]); + } + } + + const GT: u32 = 0x20000000; + const LT: u32 = 0x40000000; + const EQ: u32 = 0x80000000; + + #[simd_test(enable = "vector")] + fn test_vec_cmprg() { + let a = vector_unsigned_int([11, 22, 33, 44]); + let b = vector_unsigned_int([10, 20, 30, 40]); + + let c = vector_unsigned_int([GT, LT, GT, LT]); + let d = unsafe { vec_cmprg(a, b, c) }; + assert_eq!(d.as_array(), &[!0, 0, !0, 0]); + + let c = vector_unsigned_int([GT, LT, 0, 0]); + let d = unsafe { vec_cmprg(a, b, c) }; + assert_eq!(d.as_array(), &[!0, 0, 0, 0]); + + let a = vector_unsigned_int([11, 22, 33, 30]); + let b = vector_unsigned_int([10, 20, 30, 30]); + + let c = vector_unsigned_int([GT, LT, EQ, EQ]); + let d = unsafe { vec_cmprg(a, b, c) }; + assert_eq!(d.as_array(), &[!0, 0, 0, !0]); + } + + #[simd_test(enable = "vector")] + fn test_vec_cmpnrg() { + let a = vector_unsigned_int([11, 22, 33, 44]); + let b = vector_unsigned_int([10, 20, 30, 40]); + + let c = vector_unsigned_int([GT, LT, GT, LT]); + let d = unsafe { vec_cmpnrg(a, b, c) }; + assert_eq!(d.as_array(), &[0, !0, 0, !0]); + + let c = vector_unsigned_int([GT, LT, 0, 0]); + let d = unsafe { vec_cmpnrg(a, b, c) }; + assert_eq!(d.as_array(), &[0, !0, !0, !0]); + + let a = vector_unsigned_int([11, 22, 33, 30]); + let b = vector_unsigned_int([10, 20, 30, 30]); + + let c = vector_unsigned_int([GT, LT, EQ, EQ]); + let d = unsafe { vec_cmpnrg(a, b, c) }; + assert_eq!(d.as_array(), &[0, !0, !0, 0]); + } + + #[simd_test(enable = "vector")] + fn test_vec_cmprg_idx() { + let a = vector_unsigned_int([1, 11, 22, 33]); + let b = vector_unsigned_int([10, 20, 30, 40]); + + let c = vector_unsigned_int([GT, LT, GT, LT]); + let d = unsafe { vec_cmprg_idx(a, b, c) }; + assert_eq!(d.as_array(), &[0, 4, 0, 0]); + } + + #[simd_test(enable = "vector")] + fn test_vec_cmpnrg_idx() { + let a = vector_unsigned_int([1, 11, 22, 33]); + let b = vector_unsigned_int([10, 20, 30, 40]); + + let c = vector_unsigned_int([GT, LT, GT, LT]); + let d = unsafe { vec_cmpnrg_idx(a, b, c) }; + assert_eq!(d.as_array(), &[0, 0, 0, 0]); + } + + #[simd_test(enable = "vector")] + fn test_vec_cmprg_or_0_idx() { + let a = vector_unsigned_int([1, 0, 22, 33]); + let b = vector_unsigned_int([10, 20, 30, 40]); + + let c = vector_unsigned_int([GT, LT, GT, LT]); + let d = unsafe { vec_cmprg_or_0_idx(a, b, c) }; + assert_eq!(d.as_array(), &[0, 4, 0, 0]); + } + + #[simd_test(enable = "vector")] + fn test_vec_cmpnrg_or_0_idx() { + let a = vector_unsigned_int([11, 33, 0, 22]); + let b = vector_unsigned_int([10, 20, 30, 40]); + + let c = vector_unsigned_int([GT, LT, GT, LT]); + let d = unsafe { vec_cmpnrg_or_0_idx(a, b, c) }; + assert_eq!(d.as_array(), &[0, 8, 0, 0]); + } + + test_vec_2! { test_vec_cmpgt, vec_cmpgt, f32x4, f32x4 -> i32x4, + [1.0, f32::NAN, f32::NAN, 3.14], + [2.0, f32::NAN, 5.0, 2.0], + [0, 0, 0, !0] + } + + test_vec_2! { test_vec_cmpge, vec_cmpge, f32x4, f32x4 -> i32x4, + [1.0, f32::NAN, f32::NAN, 3.14], + [1.0, f32::NAN, 5.0, 2.0], + [!0, 0, 0, !0] + } + + test_vec_2! { test_vec_cmplt, vec_cmplt, f32x4, f32x4 -> i32x4, + [1.0, f32::NAN, f32::NAN, 2.0], + [2.0, f32::NAN, 5.0, 2.0], + [!0, 0, 0, 0] + } + + test_vec_2! { test_vec_cmple, vec_cmple, f32x4, f32x4 -> i32x4, + [1.0, f32::NAN, f32::NAN, 2.0], + [1.0, f32::NAN, 5.0, 3.14], + [!0, 0, 0, !0] + } + + test_vec_2! { test_vec_cmpeq, vec_cmpeq, f32x4, f32x4 -> i32x4, + [1.0, f32::NAN, f32::NAN, 2.0], + [1.0, f32::NAN, 5.0, 3.14], + [!0, 0, 0, 0] + } + + test_vec_2! { test_vec_cmpne, vec_cmpne, f32x4, f32x4 -> i32x4, + [1.0, f32::NAN, f32::NAN, 2.0], + [1.0, f32::NAN, 5.0, 3.14], + [0, !0, !0, !0] + } + + #[simd_test(enable = "vector")] + fn test_vec_meadd() { + let a = vector_unsigned_short([1, 0, 2, 0, 3, 0, 4, 0]); + let b = vector_unsigned_short([5, 0, 6, 0, 7, 0, 8, 0]); + let c = vector_unsigned_int([2, 2, 2, 2]); + + let d = unsafe { vec_meadd(a, b, c) }; + assert_eq!(d.as_array(), &[7, 14, 23, 34]); + + let a = vector_signed_short([1, 0, 2, 0, 3, 0, 4, 0]); + let b = vector_signed_short([5, 0, 6, 0, 7, 0, 8, 0]); + let c = vector_signed_int([2, -2, 2, -2]); + + let d = unsafe { vec_meadd(a, b, c) }; + assert_eq!(d.as_array(), &[7, 10, 23, 30]); + } + + #[simd_test(enable = "vector")] + fn test_vec_moadd() { + let a = vector_unsigned_short([0, 1, 0, 2, 0, 3, 0, 4]); + let b = vector_unsigned_short([0, 5, 0, 6, 0, 7, 0, 8]); + let c = vector_unsigned_int([2, 2, 2, 2]); + + let d = unsafe { vec_moadd(a, b, c) }; + assert_eq!(d.as_array(), &[7, 14, 23, 34]); + + let a = vector_signed_short([0, 1, 0, 2, 0, 3, 0, 4]); + let b = vector_signed_short([0, 5, 0, 6, 0, 7, 0, 8]); + let c = vector_signed_int([2, -2, 2, -2]); + + let d = unsafe { vec_moadd(a, b, c) }; + assert_eq!(d.as_array(), &[7, 10, 23, 30]); + } + + #[simd_test(enable = "vector")] + fn test_vec_mhadd() { + let a = vector_unsigned_int([1, 2, 3, 4]); + let b = vector_unsigned_int([5, 6, 7, 8]); + let c = vector_unsigned_int([u32::MAX; 4]); + + let d = unsafe { vec_mhadd(a, b, c) }; + assert_eq!(d.as_array(), &[1, 1, 1, 1]); + + let a = vector_signed_int([-1, -2, -3, -4]); + let b = vector_signed_int([5, 6, 7, 8]); + let c = vector_signed_int([i32::MIN; 4]); + + let d = unsafe { vec_mhadd(a, b, c) }; + assert_eq!(d.as_array(), &[-1, -1, -1, -1]); + } + + #[simd_test(enable = "vector")] + fn test_vec_mladd() { + let a = vector_unsigned_int([1, 2, 3, 4]); + let b = vector_unsigned_int([5, 6, 7, 8]); + let c = vector_unsigned_int([2, 2, 2, 2]); + + let d = unsafe { vec_mladd(a, b, c) }; + assert_eq!(d.as_array(), &[7, 14, 23, 34]); + + let a = vector_signed_int([-1, -2, -3, -4]); + let b = vector_signed_int([5, 6, 7, 8]); + let c = vector_signed_int([2, 2, 2, 2]); + + let d = unsafe { vec_mladd(a, b, c) }; + assert_eq!(d.as_array(), &[-3, -10, -19, -30]); + } } diff --git a/crates/core_arch/src/simd.rs b/crates/core_arch/src/simd.rs index 29e4b3e7a2..25834943f0 100644 --- a/crates/core_arch/src/simd.rs +++ b/crates/core_arch/src/simd.rs @@ -5,7 +5,7 @@ macro_rules! simd_ty { ($id:ident [$elem_type:ty ; $len:literal]: $($param_name:ident),*) => { #[repr(simd)] - #[derive(Copy, Clone, Debug, PartialEq)] + #[derive(Copy, Clone)] pub(crate) struct $id([$elem_type; $len]); #[allow(clippy::use_self)] @@ -38,13 +38,32 @@ macro_rules! simd_ty { /// Use for testing only. // FIXME: Workaround rust@60637 #[inline(always)] - pub(crate) fn extract(self, index: usize) -> $elem_type { - assert!(index < $len); - // Now that we know this is in-bounds, use pointer arithmetic to access the right element. - let self_ptr = &self as *const Self as *const $elem_type; - unsafe { - self_ptr.add(index).read() - } + pub(crate) fn extract(&self, index: usize) -> $elem_type { + self.as_array()[index] + } + + #[inline] + pub(crate) fn as_array(&self) -> &[$elem_type; $len] { + let simd_ptr: *const Self = self; + let array_ptr: *const [$elem_type; $len] = simd_ptr.cast(); + // SAFETY: We can always read the prefix of a simd type as an array. + // There might be more padding afterwards for some widths, but + // that's not a problem for reading less than that. + unsafe { &*array_ptr } + } + } + + impl core::cmp::PartialEq for $id { + #[inline] + fn eq(&self, other: &Self) -> bool { + self.as_array() == other.as_array() + } + } + + impl core::fmt::Debug for $id { + #[inline] + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + debug_simd_finish(f, stringify!($id), self.as_array()) } } } @@ -53,7 +72,7 @@ macro_rules! simd_ty { macro_rules! simd_m_ty { ($id:ident [$elem_type:ident ; $len:literal]: $($param_name:ident),*) => { #[repr(simd)] - #[derive(Copy, Clone, Debug, PartialEq)] + #[derive(Copy, Clone)] pub(crate) struct $id([$elem_type; $len]); #[allow(clippy::use_self)] @@ -79,6 +98,30 @@ macro_rules! simd_m_ty { // a simd type with exactly one element. unsafe { simd_shuffle!(one, one, [0; $len]) } } + + #[inline] + pub(crate) fn as_array(&self) -> &[$elem_type; $len] { + let simd_ptr: *const Self = self; + let array_ptr: *const [$elem_type; $len] = simd_ptr.cast(); + // SAFETY: We can always read the prefix of a simd type as an array. + // There might be more padding afterwards for some widths, but + // that's not a problem for reading less than that. + unsafe { &*array_ptr } + } + } + + impl core::cmp::PartialEq for $id { + #[inline] + fn eq(&self, other: &Self) -> bool { + self.as_array() == other.as_array() + } + } + + impl core::fmt::Debug for $id { + #[inline] + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + debug_simd_finish(f, stringify!($id), self.as_array()) + } } } } @@ -968,7 +1011,7 @@ simd_ty!( pub(crate) fn debug_simd_finish( formatter: &mut crate::fmt::Formatter<'_>, type_name: &str, - array: [T; N], + array: &[T; N], ) -> crate::fmt::Result { crate::fmt::Formatter::debug_tuple_fields_finish( formatter, diff --git a/crates/core_arch/src/wasm32/simd128.rs b/crates/core_arch/src/wasm32/simd128.rs index a774b68cfd..1a1e7dc780 100644 --- a/crates/core_arch/src/wasm32/simd128.rs +++ b/crates/core_arch/src/wasm32/simd128.rs @@ -2318,7 +2318,25 @@ pub fn u8x16_narrow_i16x8(a: v128, b: v128) -> v128 { #[doc(alias("i8x16.shl"))] #[stable(feature = "wasm_simd", since = "1.54.0")] pub fn i8x16_shl(a: v128, amt: u32) -> v128 { - unsafe { simd_shl(a.as_i8x16(), simd::i8x16::splat(amt as i8)).v128() } + // SAFETY: the safety of this intrinsic relies on the fact that the + // shift amount for each lane is less than the number of bits in the input + // lane. In this case the input has 8-bit lanes but the shift amount above + // is `u32`, so a mask is required to discard all the upper bits of `amt` to + // ensure that the safety condition is met. + // + // Note that this is distinct from the behavior of the native WebAssembly + // instruction here where WebAssembly defines this instruction as performing + // a mask as well. This is nonetheless required since this must have defined + // semantics in LLVM, not just WebAssembly. + // + // Finally note that this mask operation is not actually emitted into the + // final binary itself. LLVM understands that the wasm operation implicitly + // masks, so it knows this mask operation is redundant. + // + // Basically the extra mask here is required as a bridge from the documented + // semantics through LLVM back out to WebAssembly. Both ends have the + // documented semantics, and the mask is required by LLVM in the middle. + unsafe { simd_shl(a.as_i8x16(), simd::i8x16::splat((amt & 0x7) as i8)).v128() } } #[stable(feature = "wasm_simd", since = "1.54.0")] @@ -2335,7 +2353,9 @@ pub use i8x16_shl as u8x16_shl; #[doc(alias("i8x16.shr_s"))] #[stable(feature = "wasm_simd", since = "1.54.0")] pub fn i8x16_shr(a: v128, amt: u32) -> v128 { - unsafe { simd_shr(a.as_i8x16(), simd::i8x16::splat(amt as i8)).v128() } + // SAFETY: see i8x16_shl for more documentation why this is unsafe, + // essentially the shift amount must be valid hence the mask. + unsafe { simd_shr(a.as_i8x16(), simd::i8x16::splat((amt & 0x7) as i8)).v128() } } /// Shifts each lane to the right by the specified number of bits, shifting in @@ -2349,7 +2369,9 @@ pub fn i8x16_shr(a: v128, amt: u32) -> v128 { #[doc(alias("i8x16.shr_u"))] #[stable(feature = "wasm_simd", since = "1.54.0")] pub fn u8x16_shr(a: v128, amt: u32) -> v128 { - unsafe { simd_shr(a.as_u8x16(), simd::u8x16::splat(amt as u8)).v128() } + // SAFETY: see i8x16_shl for more documentation why this is unsafe, + // essentially the shift amount must be valid hence the mask. + unsafe { simd_shr(a.as_u8x16(), simd::u8x16::splat((amt & 0x7) as u8)).v128() } } /// Adds two 128-bit vectors as if they were two packed sixteen 8-bit integers. @@ -2686,7 +2708,9 @@ pub use i16x8_extend_high_u8x16 as u16x8_extend_high_u8x16; #[doc(alias("i16x8.shl"))] #[stable(feature = "wasm_simd", since = "1.54.0")] pub fn i16x8_shl(a: v128, amt: u32) -> v128 { - unsafe { simd_shl(a.as_i16x8(), simd::i16x8::splat(amt as i16)).v128() } + // SAFETY: see i8x16_shl for more documentation why this is unsafe, + // essentially the shift amount must be valid hence the mask. + unsafe { simd_shl(a.as_i16x8(), simd::i16x8::splat((amt & 0xf) as i16)).v128() } } #[stable(feature = "wasm_simd", since = "1.54.0")] @@ -2703,7 +2727,9 @@ pub use i16x8_shl as u16x8_shl; #[doc(alias("i16x8.shr_s"))] #[stable(feature = "wasm_simd", since = "1.54.0")] pub fn i16x8_shr(a: v128, amt: u32) -> v128 { - unsafe { simd_shr(a.as_i16x8(), simd::i16x8::splat(amt as i16)).v128() } + // SAFETY: see i8x16_shl for more documentation why this is unsafe, + // essentially the shift amount must be valid hence the mask. + unsafe { simd_shr(a.as_i16x8(), simd::i16x8::splat((amt & 0xf) as i16)).v128() } } /// Shifts each lane to the right by the specified number of bits, shifting in @@ -2717,7 +2743,9 @@ pub fn i16x8_shr(a: v128, amt: u32) -> v128 { #[doc(alias("i16x8.shr_u"))] #[stable(feature = "wasm_simd", since = "1.54.0")] pub fn u16x8_shr(a: v128, amt: u32) -> v128 { - unsafe { simd_shr(a.as_u16x8(), simd::u16x8::splat(amt as u16)).v128() } + // SAFETY: see i8x16_shl for more documentation why this is unsafe, + // essentially the shift amount must be valid hence the mask. + unsafe { simd_shr(a.as_u16x8(), simd::u16x8::splat((amt & 0xf) as u16)).v128() } } /// Adds two 128-bit vectors as if they were two packed eight 16-bit integers. @@ -3136,7 +3164,9 @@ pub use i32x4_extend_high_u16x8 as u32x4_extend_high_u16x8; #[doc(alias("i32x4.shl"))] #[stable(feature = "wasm_simd", since = "1.54.0")] pub fn i32x4_shl(a: v128, amt: u32) -> v128 { - unsafe { simd_shl(a.as_i32x4(), simd::i32x4::splat(amt as i32)).v128() } + // SAFETY: see i8x16_shl for more documentation why this is unsafe, + // essentially the shift amount must be valid hence the mask. + unsafe { simd_shl(a.as_i32x4(), simd::i32x4::splat((amt & 0x1f) as i32)).v128() } } #[stable(feature = "wasm_simd", since = "1.54.0")] @@ -3153,7 +3183,9 @@ pub use i32x4_shl as u32x4_shl; #[doc(alias("i32x4.shr_s"))] #[stable(feature = "wasm_simd", since = "1.54.0")] pub fn i32x4_shr(a: v128, amt: u32) -> v128 { - unsafe { simd_shr(a.as_i32x4(), simd::i32x4::splat(amt as i32)).v128() } + // SAFETY: see i8x16_shl for more documentation why this is unsafe, + // essentially the shift amount must be valid hence the mask. + unsafe { simd_shr(a.as_i32x4(), simd::i32x4::splat((amt & 0x1f) as i32)).v128() } } /// Shifts each lane to the right by the specified number of bits, shifting in @@ -3167,7 +3199,9 @@ pub fn i32x4_shr(a: v128, amt: u32) -> v128 { #[doc(alias("i32x4.shr_u"))] #[stable(feature = "wasm_simd", since = "1.54.0")] pub fn u32x4_shr(a: v128, amt: u32) -> v128 { - unsafe { simd_shr(a.as_u32x4(), simd::u32x4::splat(amt)).v128() } + // SAFETY: see i8x16_shl for more documentation why this is unsafe, + // essentially the shift amount must be valid hence the mask. + unsafe { simd_shr(a.as_u32x4(), simd::u32x4::splat(amt & 0x1f)).v128() } } /// Adds two 128-bit vectors as if they were two packed four 32-bit integers. @@ -3502,7 +3536,9 @@ pub use i64x2_extend_high_u32x4 as u64x2_extend_high_u32x4; #[doc(alias("i64x2.shl"))] #[stable(feature = "wasm_simd", since = "1.54.0")] pub fn i64x2_shl(a: v128, amt: u32) -> v128 { - unsafe { simd_shl(a.as_i64x2(), simd::i64x2::splat(amt as i64)).v128() } + // SAFETY: see i8x16_shl for more documentation why this is unsafe, + // essentially the shift amount must be valid hence the mask. + unsafe { simd_shl(a.as_i64x2(), simd::i64x2::splat((amt & 0x3f) as i64)).v128() } } #[stable(feature = "wasm_simd", since = "1.54.0")] @@ -3519,7 +3555,9 @@ pub use i64x2_shl as u64x2_shl; #[doc(alias("i64x2.shr_s"))] #[stable(feature = "wasm_simd", since = "1.54.0")] pub fn i64x2_shr(a: v128, amt: u32) -> v128 { - unsafe { simd_shr(a.as_i64x2(), simd::i64x2::splat(amt as i64)).v128() } + // SAFETY: see i8x16_shl for more documentation why this is unsafe, + // essentially the shift amount must be valid hence the mask. + unsafe { simd_shr(a.as_i64x2(), simd::i64x2::splat((amt & 0x3f) as i64)).v128() } } /// Shifts each lane to the right by the specified number of bits, shifting in @@ -3533,7 +3571,9 @@ pub fn i64x2_shr(a: v128, amt: u32) -> v128 { #[doc(alias("i64x2.shr_u"))] #[stable(feature = "wasm_simd", since = "1.54.0")] pub fn u64x2_shr(a: v128, amt: u32) -> v128 { - unsafe { simd_shr(a.as_u64x2(), simd::u64x2::splat(amt as u64)).v128() } + // SAFETY: see i8x16_shl for more documentation why this is unsafe, + // essentially the shift amount must be valid hence the mask. + unsafe { simd_shr(a.as_u64x2(), simd::u64x2::splat((amt & 0x3f) as u64)).v128() } } /// Adds two 128-bit vectors as if they were two packed two 64-bit integers. diff --git a/crates/intrinsic-test/missing_aarch64.txt b/crates/intrinsic-test/missing_aarch64.txt index 2f625d3e21..bbcfc40c69 100644 --- a/crates/intrinsic-test/missing_aarch64.txt +++ b/crates/intrinsic-test/missing_aarch64.txt @@ -30,6 +30,32 @@ vrnd32x_f64 vrnd32z_f64 vrnd64x_f64 vrnd64z_f64 +vluti2_lane_p16 +vluti2_lane_p8 +vluti2_lane_s16 +vluti2_lane_s8 +vluti2_lane_u16 +vluti2_lane_u8 +vluti2q_lane_p16 +vluti2q_lane_p8 +vluti2q_lane_s16 +vluti2q_lane_s8 +vluti2q_lane_u16 +vluti2q_lane_u8 +vluti4q_lane_f16_x2 +vluti4q_lane_p16_x2 +vluti4q_lane_p8 +vluti4q_lane_s16_x2 +vluti4q_lane_s8 +vluti4q_lane_u16_x2 +vluti4q_lane_u8 +vluti4q_laneq_f16_x2 +vluti4q_laneq_p16_x2 +vluti4q_laneq_p8 +vluti4q_laneq_s16_x2 +vluti4q_laneq_s8 +vluti4q_laneq_u16_x2 +vluti4q_laneq_u8 # Broken in Clang vcvth_s16_f16 diff --git a/crates/intrinsic-test/src/main.rs b/crates/intrinsic-test/src/main.rs index 0fa0268053..2d04f1cf34 100644 --- a/crates/intrinsic-test/src/main.rs +++ b/crates/intrinsic-test/src/main.rs @@ -240,7 +240,7 @@ fn compile_c( let arch_flags = if target.contains("v7") { "-march=armv8.6-a+crypto+crc+dotprod+fp16" } else { - "-march=armv8.6-a+crypto+sha3+crc+dotprod+fp16+faminmax" + "-march=armv8.6-a+crypto+sha3+crc+dotprod+fp16+faminmax+lut" }; let intrinsic_name = &intrinsic.name; diff --git a/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml b/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml index 1356c609fe..41fdfc5382 100644 --- a/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml +++ b/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml @@ -89,6 +89,10 @@ aarch64-crc-stable: &aarch64-crc-stable neon-unstable-f16: &neon-unstable-f16 FnCall: [unstable, ['feature = "stdarch_neon_f16"', 'issue = "136306"']] +# #[unstable(feature = "stdarch_neon_feat_lut", issue = "138050")] +neon-unstable-feat-lut: &neon-unstable-feat-lut + FnCall: [unstable, ['feature = "stdarch_neon_feat_lut"', 'issue = "138050"']] + intrinsics: - name: "vaddd_{type}" doc: Add @@ -14013,3 +14017,271 @@ intrinsics: links: - link: "llvm.aarch64.neon.famin.{neon_type}" arch: aarch64,arm64ec + + - name: "vluti2{neon_type[0].lane_nox}" + doc: "Lookup table read with 2-bit indices" + arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"] + return_type: "{neon_type[2]}" + attr: + - FnCall: [target_feature, ['enable = {type[4]}']] + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [nop, 'LANE = 1']]}]] + - *neon-unstable-feat-lut + - FnCall: [rustc_legacy_const_generics, ['2']] + static_defs: ["const LANE: i32"] + safety: + unsafe: [neon] + types: + - [int8x8_t, uint8x8_t, int8x16_t, 'LANE >= 0 && LANE <= 1', '"neon,lut"'] + - [int8x16_t, uint8x8_t, int8x16_t, 'LANE >= 0 && LANE <= 1', '"neon,lut"'] + - [int16x4_t, uint8x8_t, int16x8_t, 'LANE >= 0 && LANE <= 3', '"neon,lut"'] + - [int16x8_t, uint8x8_t, int16x8_t, 'LANE >= 0 && LANE <= 3', '"neon,lut"'] + compose: + - FnCall: ['static_assert!', ['{type[3]}']] + - LLVMLink: + name: "vluti2{neon_type[0].lane_nox}" + arguments: + - 'a: {neon_type[0]}' + - 'b: {neon_type[1]}' + - 'n: i32' + links: + - link: "llvm.aarch64.neon.vluti2.lane.{neon_type[2]}.{neon_type[0]}" + arch: aarch64,arm64ec + - FnCall: ['_vluti2{neon_type[0].lane_nox}', [a, b, LANE]] + + - name: "vluti2{neon_type[0].lane_nox}" + doc: "Lookup table read with 2-bit indices" + arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"] + return_type: "{neon_type[2]}" + attr: + - FnCall: [target_feature, ['enable = "neon,lut"']] + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [nop, 'LANE = 1']]}]] + - *neon-unstable-feat-lut + - FnCall: [rustc_legacy_const_generics, ['2']] + static_defs: ["const LANE: i32"] + safety: + unsafe: [neon] + types: + - [uint8x8_t, uint8x8_t, uint8x16_t, 'LANE >= 0 && LANE <= 1', 'int8x8_t'] + - [uint8x16_t, uint8x8_t, uint8x16_t, 'LANE >= 0 && LANE <= 1', 'int8x16_t'] + - [poly8x8_t, uint8x8_t, poly8x16_t, 'LANE >= 0 && LANE <= 1', 'int8x8_t'] + - [poly8x16_t, uint8x8_t, poly8x16_t, 'LANE >= 0 && LANE <= 1', 'int8x16_t'] + - [uint16x4_t, uint8x8_t, uint16x8_t, 'LANE >= 0 && LANE <= 3', 'int16x4_t'] + - [uint16x8_t, uint8x8_t, uint16x8_t, 'LANE >= 0 && LANE <= 3', 'int16x8_t'] + - [poly16x4_t, uint8x8_t, poly16x8_t, 'LANE >= 0 && LANE <= 3', 'int16x4_t'] + - [poly16x8_t, uint8x8_t, poly16x8_t, 'LANE >= 0 && LANE <= 3', 'int16x8_t'] + compose: + - FnCall: ['static_assert!', ['{type[3]}']] + - FnCall: + - transmute + - - FnCall: + - 'vluti2{neon_type[4].lane_nox}::' + - - FnCall: [transmute, [a]] + - b + + - name: "vluti4{neon_type[0].lane_nox}" + doc: "Lookup table read with 4-bit indices" + arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"] + return_type: "{neon_type[0]}" + attr: + - FnCall: [target_feature, ['enable = {type[3]}']] + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [nop, 'LANE = 0']]}]] + - *neon-unstable-feat-lut + - FnCall: [rustc_legacy_const_generics, ['2']] + static_defs: ["const LANE: i32"] + safety: + unsafe: [neon] + types: + - [int8x16_t, uint8x8_t, 'LANE == 0', '"neon,lut"'] + compose: + - FnCall: ['static_assert!', ['{type[2]}']] + - LLVMLink: + name: "vluti4{neon_type[0].lane_nox}" + arguments: + - 'a: {neon_type[0]}' + - 'b: {neon_type[1]}' + - 'n: i32' + links: + - link: "llvm.aarch64.neon.vluti4q.lane.{neon_type[1]}" + arch: aarch64,arm64ec + - FnCall: ['_vluti4{neon_type[0].lane_nox}', [a, b, LANE]] + + - name: "vluti4{neon_type[0].lane_nox}" + doc: "Lookup table read with 4-bit indices" + arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"] + return_type: "{neon_type[0]}" + attr: + - FnCall: [target_feature, ['enable = "neon,lut"']] + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [nop, 'LANE = 0']]}]] + - *neon-unstable-feat-lut + - FnCall: [rustc_legacy_const_generics, ['2']] + static_defs: ["const LANE: i32"] + safety: + unsafe: [neon] + types: + - [uint8x16_t, uint8x8_t, 'LANE == 0', int8x16_t] + - [poly8x16_t, uint8x8_t, 'LANE == 0', int8x16_t] + compose: + - FnCall: ['static_assert!', ['{type[2]}']] + - FnCall: + - transmute + - - FnCall: + - 'vluti4{neon_type[3].lane_nox}::' + - - FnCall: [transmute, [a]] + - b + + - name: "vluti4{neon_type[0].laneq_nox}" + doc: "Lookup table read with 4-bit indices" + arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"] + return_type: "{neon_type[0]}" + attr: + - FnCall: [target_feature, ['enable = "neon,lut"']] + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [nop, 'LANE = 0']]}]] + - *neon-unstable-feat-lut + - FnCall: [rustc_legacy_const_generics, ['2']] + static_defs: ["const LANE: i32"] + safety: + unsafe: [neon] + types: + - [int8x16_t, uint8x16_t, 'LANE >= 0 && LANE <= 1'] + compose: + - FnCall: ['static_assert!', ['{type[2]}']] + - LLVMLink: + name: "vluti4{neon_type[0].laneq_nox}" + arguments: + - 'a: {neon_type[0]}' + - 'b: {neon_type[1]}' + - 'n: i32' + links: + - link: "llvm.aarch64.neon.vluti4q.laneq.{neon_type[1]}" + arch: aarch64,arm64ec + - FnCall: ['_vluti4{neon_type[0].laneq_nox}', [a, b, LANE]] + + - name: "vluti4{neon_type[0].laneq_nox}" + doc: "Lookup table read with 4-bit indices" + arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"] + return_type: "{neon_type[0]}" + attr: + - FnCall: [target_feature, ['enable = "neon,lut"']] + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [nop, 'LANE = 0']]}]] + - *neon-unstable-feat-lut + - FnCall: [rustc_legacy_const_generics, ['2']] + static_defs: ["const LANE: i32"] + safety: + unsafe: [neon] + types: + - [uint8x16_t, uint8x16_t, 'LANE >= 0 && LANE <= 1', int8x16_t] + - [poly8x16_t, uint8x16_t, 'LANE >= 0 && LANE <= 1', int8x16_t] + compose: + - FnCall: ['static_assert!', ['{type[2]}']] + - FnCall: + - transmute + - - FnCall: + - 'vluti4{neon_type[3].laneq_nox}::' + - - FnCall: [transmute, [a]] + - b + + - name: "vluti4q_lane_{neon_type[0]}_x2" + doc: "Lookup table read with 4-bit indices" + arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"] + return_type: "{neon_type[2]}" + attr: + - FnCall: [target_feature, ['enable = {type[4]}']] + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [nop, 'LANE = 0']]}]] + - *neon-unstable-feat-lut + - FnCall: [rustc_legacy_const_generics, ['2']] + static_defs: ["const LANE: i32"] + safety: + unsafe: [neon] + types: + - [int16x8x2_t, uint8x8_t, int16x8_t, 'LANE >= 0 && LANE <= 1', '"neon,lut"'] + compose: + - FnCall: ['static_assert!', ['{type[3]}']] + - LLVMLink: + name: "vluti4q_lane_{neon_type[0]}_x2" + arguments: + - 'a: {neon_type[2]}' + - 'a: {neon_type[2]}' + - 'b: {neon_type[1]}' + - 'n: i32' + links: + - link: "llvm.aarch64.neon.vluti4q.lane.x2.{neon_type[2]}" + arch: aarch64,arm64ec + - FnCall: ['_vluti4q_lane_{neon_type[0]}_x2', ['a.0', 'a.1', b, LANE]] + + - name: "vluti4q_lane_{neon_type[0]}_x2" + doc: "Lookup table read with 4-bit indices" + arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"] + return_type: "{neon_type[2]}" + attr: + - FnCall: [target_feature, ['enable = {type[4]}']] + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [nop, 'LANE = 0']]}]] + - *neon-unstable-feat-lut + - FnCall: [rustc_legacy_const_generics, ['2']] + static_defs: ["const LANE: i32"] + safety: + unsafe: [neon] + types: + - [uint16x8x2_t, uint8x8_t, uint16x8_t, 'LANE >= 0 && LANE <= 1', '"neon,lut"', int16x8x2_t] + - [poly16x8x2_t, uint8x8_t, poly16x8_t, 'LANE >= 0 && LANE <= 1', '"neon,lut"', int16x8x2_t] + - [float16x8x2_t, uint8x8_t, float16x8_t, 'LANE >= 0 && LANE <= 1', '"neon,lut,fp16"', int16x8x2_t] + compose: + - FnCall: ['static_assert!', ['{type[3]}']] + - FnCall: + - transmute + - - FnCall: + - 'vluti4q_lane_{neon_type[5]}_x2::' + - - FnCall: [transmute, [a]] + - b + + - name: "vluti4q_laneq_{neon_type[0]}_x2" + doc: "Lookup table read with 4-bit indices" + arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"] + return_type: "{neon_type[2]}" + attr: + - FnCall: [target_feature, ['enable = {type[4]}']] + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [nop, 'LANE = 3']]}]] + - *neon-unstable-feat-lut + - FnCall: [rustc_legacy_const_generics, ['2']] + static_defs: ["const LANE: i32"] + safety: + unsafe: [neon] + types: + - [int16x8x2_t, uint8x16_t, int16x8_t, 'LANE >= 0 && LANE <= 3', '"neon,lut"'] + compose: + - FnCall: ['static_assert!', ['{type[3]}']] + - LLVMLink: + name: "vluti4{neon_type[0].lane_nox}" + arguments: + - 'a: {neon_type[2]}' + - 'b: {neon_type[2]}' + - 'c: {neon_type[1]}' + - 'n: i32' + links: + - link: "llvm.aarch64.neon.vluti4q.laneq.x2.{neon_type[2]}" + arch: aarch64,arm64ec + - FnCall: ['_vluti4q_laneq_{neon_type[0]}_x2', ['a.0', 'a.1', b, LANE]] + + - name: "vluti4q_laneq_{neon_type[0]}_x2" + doc: "Lookup table read with 4-bit indices" + arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"] + return_type: "{neon_type[2]}" + attr: + - FnCall: [target_feature, ['enable = {type[4]}']] + - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [nop, 'LANE = 3']]}]] + - *neon-unstable-feat-lut + - FnCall: [rustc_legacy_const_generics, ['2']] + static_defs: ["const LANE: i32"] + safety: + unsafe: [neon] + types: + - [uint16x8x2_t, uint8x16_t, uint16x8_t, 'LANE >= 0 && LANE <= 3', '"neon,lut"', int16x8x2_t] + - [poly16x8x2_t, uint8x16_t, poly16x8_t, 'LANE >= 0 && LANE <= 3', '"neon,lut"', int16x8x2_t] + - [float16x8x2_t, uint8x16_t, float16x8_t, 'LANE >= 0 && LANE <= 3', '"neon,lut,fp16"', int16x8x2_t] + compose: + - FnCall: ['static_assert!', ['{type[3]}']] + - FnCall: + - transmute + - - FnCall: + - 'vluti4q_laneq_{neon_type[5]}_x2::' + - - FnCall: [transmute, [a]] + - b diff --git a/intrinsics_data/arm_intrinsics.json b/intrinsics_data/arm_intrinsics.json index 32b6120d75..9d58aad49c 100644 --- a/intrinsics_data/arm_intrinsics.json +++ b/intrinsics_data/arm_intrinsics.json @@ -118843,5 +118843,915 @@ "FAMAX" ] ] + }, + { + "SIMD_ISA": "Neon", + "name": "vluti2_lane_u8", + "arguments": [ + "uint8x8_t a", + "uint8x8_t b", + "const int lane" + ], + "return_type": { + "value": "uint8x16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.16B" + }, + "b": { + "register": "Vm" + }, + "lane": { + "minimum": 0, + "maximum": 1 + }, + "r": { + "register": "Vd.16B" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "LUTI2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vluti2q_lane_u8", + "arguments": [ + "uint8x16_t a", + "uint8x8_t b", + "const int lane" + ], + "return_type": { + "value": "uint8x16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.16B" + }, + "b": { + "register": "Vm" + }, + "lane": { + "minimum": 0, + "maximum": 1 + }, + "r": { + "register": "Vd.16B" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "LUTI2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vluti2_lane_s8", + "arguments": [ + "int8x8_t a", + "uint8x8_t b", + "const int lane" + ], + "return_type": { + "value": "int8x16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.16B" + }, + "b": { + "register": "Vm" + }, + "lane": { + "minimum": 0, + "maximum": 1 + }, + "r": { + "register": "Vd.16B" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "LUTI2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vluti2q_lane_s8", + "arguments": [ + "int8x16_t a", + "uint8x8_t b", + "const int lane" + ], + "return_type": { + "value": "int8x16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.16B" + }, + "b": { + "register": "Vm" + }, + "lane": { + "minimum": 0, + "maximum": 1 + }, + "r": { + "register": "Vd.16B" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "LUTI2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vluti2_lane_p8", + "arguments": [ + "poly8x8_t a", + "uint8x8_t b", + "const int lane" + ], + "return_type": { + "value": "poly8x16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.16B" + }, + "b": { + "register": "Vm" + }, + "lane": { + "minimum": 0, + "maximum": 1 + }, + "r": { + "register": "Vd.16B" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "LUTI2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vluti2q_lane_p8", + "arguments": [ + "poly8x16_t a", + "uint8x8_t b", + "const int lane" + ], + "return_type": { + "value": "poly8x16_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.16B" + }, + "b": { + "register": "Vm" + }, + "lane": { + "minimum": 0, + "maximum": 1 + }, + "r": { + "register": "Vd.16B" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "LUTI2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vluti2_lane_u16", + "arguments": [ + "uint16x4_t a", + "uint8x8_t b", + "const int lane" + ], + "return_type": { + "value": "uint16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + }, + "b": { + "register": "Vm" + }, + "lane": { + "minimum": 0, + "maximum": 3 + }, + "r": { + "register": "Vd.8H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "LUTI2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vluti2q_lane_u16", + "arguments": [ + "uint16x8_t a", + "uint8x8_t b", + "const int lane" + ], + "return_type": { + "value": "uint16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + }, + "b": { + "register": "Vm" + }, + "lane": { + "minimum": 0, + "maximum": 3 + }, + "r": { + "register": "Vd.8H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "LUTI2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vluti2_lane_s16", + "arguments": [ + "int16x4_t a", + "uint8x8_t b", + "const int lane" + ], + "return_type": { + "value": "int16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + }, + "b": { + "register": "Vm" + }, + "lane": { + "minimum": 0, + "maximum": 3 + }, + "r": { + "register": "Vd.8H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "LUTI2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vluti2q_lane_s16", + "arguments": [ + "int16x8_t a", + "uint8x8_t b", + "const int lane" + ], + "return_type": { + "value": "int16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + }, + "b": { + "register": "Vm" + }, + "lane": { + "minimum": 0, + "maximum": 3 + }, + "r": { + "register": "Vd.8H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "LUTI2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vluti2_lane_p16", + "arguments": [ + "poly16x4_t a", + "uint8x8_t b", + "const int lane" + ], + "return_type": { + "value": "poly16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + }, + "b": { + "register": "Vm" + }, + "lane": { + "minimum": 0, + "maximum": 3 + }, + "r": { + "register": "Vd.8H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "LUTI2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vluti2q_lane_p16", + "arguments": [ + "poly16x8_t a", + "uint8x8_t b", + "const int lane" + ], + "return_type": { + "value": "poly16x8_t" + }, + "Arguments_Preparation": { + "a": { + "register": "Vn.8H" + }, + "b": { + "register": "Vm" + }, + "lane": { + "minimum": 0, + "maximum": 3 + }, + "r": { + "register": "Vd.8H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "LUTI2" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vluti4q_lane_u8", + "arguments": [ + "uint8x16_t vn", + "uint8x8_t vm", + "const int index" + ], + "return_type": { + "value": "uint8x16_t" + }, + "Arguments_Preparation": { + "vn": { + "register": "Vn.16B" + }, + "vm": { + "register": "Vm" + }, + "index": { + "minimum": 0, + "maximum": 0 + }, + "r": { + "register": "Vd.16B" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "LUTI4" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vluti4q_laneq_u8", + "arguments": [ + "uint8x16_t vn", + "uint8x16_t vm", + "const int index" + ], + "return_type": { + "value": "uint8x16_t" + }, + "Arguments_Preparation": { + "vn": { + "register": "Vn.16B" + }, + "vm": { + "register": "Vm" + }, + "index": { + "minimum": 0, + "maximum": 1 + }, + "r": { + "register": "Vd.16B" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "LUTI4" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vluti4q_lane_s8", + "arguments": [ + "int8x16_t vn", + "uint8x8_t vm", + "const int index" + ], + "return_type": { + "value": "int8x16_t" + }, + "Arguments_Preparation": { + "vn": { + "register": "Vn.16B" + }, + "vm": { + "register": "Vm" + }, + "index": { + "minimum": 0, + "maximum": 0 + }, + "r": { + "register": "Vd.16B" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "LUTI4" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vluti4q_laneq_s8", + "arguments": [ + "int8x16_t vn", + "uint8x16_t vm", + "const int index" + ], + "return_type": { + "value": "int8x16_t" + }, + "Arguments_Preparation": { + "vn": { + "register": "Vn.16B" + }, + "vm": { + "register": "Vm" + }, + "index": { + "minimum": 0, + "maximum": 1 + }, + "r": { + "register": "Vd.16B" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "LUTI4" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vluti4q_lane_p8", + "arguments": [ + "poly8x16_t vn", + "uint8x8_t vm", + "const int index" + ], + "return_type": { + "value": "poly8x16_t" + }, + "Arguments_Preparation": { + "vn": { + "register": "Vn.16B" + }, + "vm": { + "register": "Vm" + }, + "index": { + "minimum": 0, + "maximum": 0 + }, + "r": { + "register": "Vd.16B" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "LUTI4" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vluti4q_laneq_p8", + "arguments": [ + "poly8x16_t vn", + "uint8x16_t vm", + "const int index" + ], + "return_type": { + "value": "poly8x16_t" + }, + "Arguments_Preparation": { + "vn": { + "register": "Vn.16B" + }, + "vm": { + "register": "Vm" + }, + "index": { + "minimum": 0, + "maximum": 1 + }, + "r": { + "register": "Vd.16B" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "LUTI4" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vluti4q_lane_u16_x2", + "arguments": [ + "uint16x8x2_t vn", + "uint8x8_t vm", + "const int index" + ], + "return_type": { + "value": "uint16x8_t" + }, + "Arguments_Preparation": { + "vn": { + "register": "Vn1.8H" + }, + "vm": { + "register": "Vm" + }, + "index": { + "minimum": 0, + "maximum": 1 + }, + "r": { + "register": "Vd.8H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "LUTI4" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vluti4q_laneq_u16_x2", + "arguments": [ + "uint16x8x2_t vn", + "uint8x16_t vm", + "const int index" + ], + "return_type": { + "value": "uint16x8_t" + }, + "Arguments_Preparation": { + "vn": { + "register": "Vn1.8H" + }, + "vm": { + "register": "Vm" + }, + "index": { + "minimum": 0, + "maximum": 3 + }, + "r": { + "register": "Vd.8H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "LUTI4" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vluti4q_lane_s16_x2", + "arguments": [ + "int16x8x2_t vn", + "uint8x8_t vm", + "const int index" + ], + "return_type": { + "value": "int16x8_t" + }, + "Arguments_Preparation": { + "vn": { + "register": "Vn1.8H" + }, + "vm": { + "register": "Vm" + }, + "index": { + "minimum": 0, + "maximum": 1 + }, + "r": { + "register": "Vd.8H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "LUTI4" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vluti4q_laneq_s16_x2", + "arguments": [ + "int16x8x2_t vn", + "uint8x16_t vm", + "const int index" + ], + "return_type": { + "value": "int16x8_t" + }, + "Arguments_Preparation": { + "vn": { + "register": "Vn1.8H" + }, + "vm": { + "register": "Vm" + }, + "index": { + "minimum": 0, + "maximum": 3 + }, + "r": { + "register": "Vd.8H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "LUTI4" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vluti4q_lane_f16_x2", + "arguments": [ + "float16x8x2_t vn", + "uint8x8_t vm", + "const int index" + ], + "return_type": { + "value": "float16x8_t" + }, + "Arguments_Preparation": { + "vn": { + "register": "Vn1.8H" + }, + "vm": { + "register": "Vm" + }, + "index": { + "minimum": 0, + "maximum": 1 + }, + "r": { + "register": "Vd.8H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "LUTI4" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vluti4q_laneq_f16_x2", + "arguments": [ + "float16x8x2_t vn", + "uint8x16_t vm", + "const int index" + ], + "return_type": { + "value": "float16x8_t" + }, + "Arguments_Preparation": { + "vn": { + "register": "Vn1.8H" + }, + "vm": { + "register": "Vm" + }, + "index": { + "minimum": 0, + "maximum": 3 + }, + "r": { + "register": "Vd.8H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "LUTI4" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vluti4q_lane_p16_x2", + "arguments": [ + "poly16x8x2_t vn", + "uint8x8_t vm", + "const int index" + ], + "return_type": { + "value": "poly16x8_t" + }, + "Arguments_Preparation": { + "vn": { + "register": "Vn1.8H" + }, + "vm": { + "register": "Vm" + }, + "index": { + "minimum": 0, + "maximum": 1 + }, + "r": { + "register": "Vd.8H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "LUTI4" + ] + ] + }, + { + "SIMD_ISA": "Neon", + "name": "vluti4q_laneq_p16_x2", + "arguments": [ + "poly16x8x2_t vn", + "uint8x16_t vm", + "const int index" + ], + "return_type": { + "value": "poly16x8_t" + }, + "Arguments_Preparation": { + "vn": { + "register": "Vn1.8H" + }, + "vm": { + "register": "Vm" + }, + "index": { + "minimum": 0, + "maximum": 3 + }, + "r": { + "register": "Vd.8H" + } + }, + "Architectures": [ + "A64" + ], + "instructions": [ + [ + "LUTI4" + ] + ] } ]