diff --git a/ci/docker/x86_64-unknown-linux-gnu-emulated/Dockerfile b/ci/docker/x86_64-unknown-linux-gnu-emulated/Dockerfile
index 70ec5feecd..fe5fe474ca 100644
--- a/ci/docker/x86_64-unknown-linux-gnu-emulated/Dockerfile
+++ b/ci/docker/x86_64-unknown-linux-gnu-emulated/Dockerfile
@@ -8,8 +8,9 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
   wget \
   xz-utils
 
-RUN wget https://downloadmirror.intel.com/843185/sde-external-9.48.0-2024-11-25-lin.tar.xz
-RUN tar -xJf sde-external-9.48.0-2024-11-25-lin.tar.xz
-ENV CARGO_TARGET_X86_64_UNKNOWN_LINUX_GNU_RUNNER="/sde-external-9.48.0-2024-11-25-lin/sde64 \
+RUN wget http://ci-mirrors.rust-lang.org/stdarch/sde-external-9.48.0-2024-11-25-lin.tar.xz -O sde.tar.xz
+RUN mkdir intel-sde
+RUN tar -xJf sde.tar.xz --strip-components=1 -C intel-sde
+ENV CARGO_TARGET_X86_64_UNKNOWN_LINUX_GNU_RUNNER="/intel-sde/sde64 \
             -cpuid-in /checkout/ci/docker/x86_64-unknown-linux-gnu-emulated/cpuid.def \
             -rtm-mode full -tsx --"
diff --git a/crates/core_arch/src/aarch64/neon/generated.rs b/crates/core_arch/src/aarch64/neon/generated.rs
index 485aa80cfe..56e522a316 100644
--- a/crates/core_arch/src/aarch64/neon/generated.rs
+++ b/crates/core_arch/src/aarch64/neon/generated.rs
@@ -12826,6 +12826,403 @@ pub unsafe fn vld4q_u64(a: *const u64) -> uint64x2x4_t {
     ret_val.3 = unsafe { simd_shuffle!(ret_val.3, ret_val.3, [1, 0]) };
     ret_val
 }
+#[doc = "Lookup table read with 2-bit indices"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vluti2_lane_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon,lut")]
+#[cfg_attr(test, assert_instr(nop, LANE = 1))]
+#[unstable(feature = "stdarch_neon_feat_lut", issue = "138050")]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vluti2_lane_s8<const LANE: i32>(a: int8x8_t, b: uint8x8_t) -> int8x16_t {
+    static_assert!(LANE >= 0 && LANE <= 1);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.vluti2.lane.v16i8.v8i8"
+        )]
+        fn _vluti2_lane_s8(a: int8x8_t, b: uint8x8_t, n: i32) -> int8x16_t;
+    }
+    _vluti2_lane_s8(a, b, LANE)
+}
+#[doc = "Lookup table read with 2-bit indices"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vluti2q_lane_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon,lut")]
+#[cfg_attr(test, assert_instr(nop, LANE = 1))]
+#[unstable(feature = "stdarch_neon_feat_lut", issue = "138050")]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vluti2q_lane_s8<const LANE: i32>(a: int8x16_t, b: uint8x8_t) -> int8x16_t {
+    static_assert!(LANE >= 0 && LANE <= 1);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.vluti2.lane.v16i8.v16i8"
+        )]
+        fn _vluti2q_lane_s8(a: int8x16_t, b: uint8x8_t, n: i32) -> int8x16_t;
+    }
+    _vluti2q_lane_s8(a, b, LANE)
+}
+#[doc = "Lookup table read with 2-bit indices"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vluti2_lane_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon,lut")]
+#[cfg_attr(test, assert_instr(nop, LANE = 1))]
+#[unstable(feature = "stdarch_neon_feat_lut", issue = "138050")]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vluti2_lane_s16<const LANE: i32>(a: int16x4_t, b: uint8x8_t) -> int16x8_t {
+    static_assert!(LANE >= 0 && LANE <= 3);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.vluti2.lane.v8i16.v4i16"
+        )]
+        fn _vluti2_lane_s16(a: int16x4_t, b: uint8x8_t, n: i32) -> int16x8_t;
+    }
+    _vluti2_lane_s16(a, b, LANE)
+}
+#[doc = "Lookup table read with 2-bit indices"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vluti2q_lane_s16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon,lut")]
+#[cfg_attr(test, assert_instr(nop, LANE = 1))]
+#[unstable(feature = "stdarch_neon_feat_lut", issue = "138050")]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vluti2q_lane_s16<const LANE: i32>(a: int16x8_t, b: uint8x8_t) -> int16x8_t {
+    static_assert!(LANE >= 0 && LANE <= 3);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.vluti2.lane.v8i16.v8i16"
+        )]
+        fn _vluti2q_lane_s16(a: int16x8_t, b: uint8x8_t, n: i32) -> int16x8_t;
+    }
+    _vluti2q_lane_s16(a, b, LANE)
+}
+#[doc = "Lookup table read with 2-bit indices"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vluti2_lane_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon,lut")]
+#[cfg_attr(test, assert_instr(nop, LANE = 1))]
+#[unstable(feature = "stdarch_neon_feat_lut", issue = "138050")]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vluti2_lane_u8<const LANE: i32>(a: uint8x8_t, b: uint8x8_t) -> uint8x16_t {
+    static_assert!(LANE >= 0 && LANE <= 1);
+    transmute(vluti2_lane_s8::<LANE>(transmute(a), b))
+}
+#[doc = "Lookup table read with 2-bit indices"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vluti2q_lane_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon,lut")]
+#[cfg_attr(test, assert_instr(nop, LANE = 1))]
+#[unstable(feature = "stdarch_neon_feat_lut", issue = "138050")]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vluti2q_lane_u8<const LANE: i32>(a: uint8x16_t, b: uint8x8_t) -> uint8x16_t {
+    static_assert!(LANE >= 0 && LANE <= 1);
+    transmute(vluti2q_lane_s8::<LANE>(transmute(a), b))
+}
+#[doc = "Lookup table read with 2-bit indices"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vluti2_lane_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon,lut")]
+#[cfg_attr(test, assert_instr(nop, LANE = 1))]
+#[unstable(feature = "stdarch_neon_feat_lut", issue = "138050")]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vluti2_lane_u16<const LANE: i32>(a: uint16x4_t, b: uint8x8_t) -> uint16x8_t {
+    static_assert!(LANE >= 0 && LANE <= 3);
+    transmute(vluti2_lane_s16::<LANE>(transmute(a), b))
+}
+#[doc = "Lookup table read with 2-bit indices"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vluti2q_lane_u16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon,lut")]
+#[cfg_attr(test, assert_instr(nop, LANE = 1))]
+#[unstable(feature = "stdarch_neon_feat_lut", issue = "138050")]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vluti2q_lane_u16<const LANE: i32>(a: uint16x8_t, b: uint8x8_t) -> uint16x8_t {
+    static_assert!(LANE >= 0 && LANE <= 3);
+    transmute(vluti2q_lane_s16::<LANE>(transmute(a), b))
+}
+#[doc = "Lookup table read with 2-bit indices"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vluti2_lane_p8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon,lut")]
+#[cfg_attr(test, assert_instr(nop, LANE = 1))]
+#[unstable(feature = "stdarch_neon_feat_lut", issue = "138050")]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vluti2_lane_p8<const LANE: i32>(a: poly8x8_t, b: uint8x8_t) -> poly8x16_t {
+    static_assert!(LANE >= 0 && LANE <= 1);
+    transmute(vluti2_lane_s8::<LANE>(transmute(a), b))
+}
+#[doc = "Lookup table read with 2-bit indices"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vluti2q_lane_p8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon,lut")]
+#[cfg_attr(test, assert_instr(nop, LANE = 1))]
+#[unstable(feature = "stdarch_neon_feat_lut", issue = "138050")]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vluti2q_lane_p8<const LANE: i32>(a: poly8x16_t, b: uint8x8_t) -> poly8x16_t {
+    static_assert!(LANE >= 0 && LANE <= 1);
+    transmute(vluti2q_lane_s8::<LANE>(transmute(a), b))
+}
+#[doc = "Lookup table read with 2-bit indices"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vluti2_lane_p16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon,lut")]
+#[cfg_attr(test, assert_instr(nop, LANE = 1))]
+#[unstable(feature = "stdarch_neon_feat_lut", issue = "138050")]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vluti2_lane_p16<const LANE: i32>(a: poly16x4_t, b: uint8x8_t) -> poly16x8_t {
+    static_assert!(LANE >= 0 && LANE <= 3);
+    transmute(vluti2_lane_s16::<LANE>(transmute(a), b))
+}
+#[doc = "Lookup table read with 2-bit indices"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vluti2q_lane_p16)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon,lut")]
+#[cfg_attr(test, assert_instr(nop, LANE = 1))]
+#[unstable(feature = "stdarch_neon_feat_lut", issue = "138050")]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vluti2q_lane_p16<const LANE: i32>(a: poly16x8_t, b: uint8x8_t) -> poly16x8_t {
+    static_assert!(LANE >= 0 && LANE <= 3);
+    transmute(vluti2q_lane_s16::<LANE>(transmute(a), b))
+}
+#[doc = "Lookup table read with 4-bit indices"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vluti4q_lane_f16_x2)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon,lut,fp16")]
+#[cfg_attr(test, assert_instr(nop, LANE = 0))]
+#[unstable(feature = "stdarch_neon_feat_lut", issue = "138050")]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vluti4q_lane_f16_x2<const LANE: i32>(a: float16x8x2_t, b: uint8x8_t) -> float16x8_t {
+    static_assert!(LANE >= 0 && LANE <= 1);
+    transmute(vluti4q_lane_s16_x2::<LANE>(transmute(a), b))
+}
+#[doc = "Lookup table read with 4-bit indices"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vluti4q_lane_u16_x2)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon,lut")]
+#[cfg_attr(test, assert_instr(nop, LANE = 0))]
+#[unstable(feature = "stdarch_neon_feat_lut", issue = "138050")]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vluti4q_lane_u16_x2<const LANE: i32>(a: uint16x8x2_t, b: uint8x8_t) -> uint16x8_t {
+    static_assert!(LANE >= 0 && LANE <= 1);
+    transmute(vluti4q_lane_s16_x2::<LANE>(transmute(a), b))
+}
+#[doc = "Lookup table read with 4-bit indices"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vluti4q_lane_p16_x2)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon,lut")]
+#[cfg_attr(test, assert_instr(nop, LANE = 0))]
+#[unstable(feature = "stdarch_neon_feat_lut", issue = "138050")]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vluti4q_lane_p16_x2<const LANE: i32>(a: poly16x8x2_t, b: uint8x8_t) -> poly16x8_t {
+    static_assert!(LANE >= 0 && LANE <= 1);
+    transmute(vluti4q_lane_s16_x2::<LANE>(transmute(a), b))
+}
+#[doc = "Lookup table read with 4-bit indices"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vluti4q_lane_s16_x2)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon,lut")]
+#[cfg_attr(test, assert_instr(nop, LANE = 0))]
+#[unstable(feature = "stdarch_neon_feat_lut", issue = "138050")]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vluti4q_lane_s16_x2<const LANE: i32>(a: int16x8x2_t, b: uint8x8_t) -> int16x8_t {
+    static_assert!(LANE >= 0 && LANE <= 1);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.vluti4q.lane.x2.v8i16"
+        )]
+        fn _vluti4q_lane_s16_x2(a: int16x8_t, a: int16x8_t, b: uint8x8_t, n: i32) -> int16x8_t;
+    }
+    _vluti4q_lane_s16_x2(a.0, a.1, b, LANE)
+}
+#[doc = "Lookup table read with 4-bit indices"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vluti4q_lane_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon,lut")]
+#[cfg_attr(test, assert_instr(nop, LANE = 0))]
+#[unstable(feature = "stdarch_neon_feat_lut", issue = "138050")]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vluti4q_lane_s8<const LANE: i32>(a: int8x16_t, b: uint8x8_t) -> int8x16_t {
+    static_assert!(LANE == 0);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.vluti4q.lane.v8i8"
+        )]
+        fn _vluti4q_lane_s8(a: int8x16_t, b: uint8x8_t, n: i32) -> int8x16_t;
+    }
+    _vluti4q_lane_s8(a, b, LANE)
+}
+#[doc = "Lookup table read with 4-bit indices"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vluti4q_lane_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon,lut")]
+#[cfg_attr(test, assert_instr(nop, LANE = 0))]
+#[unstable(feature = "stdarch_neon_feat_lut", issue = "138050")]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vluti4q_lane_u8<const LANE: i32>(a: uint8x16_t, b: uint8x8_t) -> uint8x16_t {
+    static_assert!(LANE == 0);
+    transmute(vluti4q_lane_s8::<LANE>(transmute(a), b))
+}
+#[doc = "Lookup table read with 4-bit indices"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vluti4q_lane_p8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon,lut")]
+#[cfg_attr(test, assert_instr(nop, LANE = 0))]
+#[unstable(feature = "stdarch_neon_feat_lut", issue = "138050")]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vluti4q_lane_p8<const LANE: i32>(a: poly8x16_t, b: uint8x8_t) -> poly8x16_t {
+    static_assert!(LANE == 0);
+    transmute(vluti4q_lane_s8::<LANE>(transmute(a), b))
+}
+#[doc = "Lookup table read with 4-bit indices"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vluti4q_laneq_f16_x2)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon,lut,fp16")]
+#[cfg_attr(test, assert_instr(nop, LANE = 3))]
+#[unstable(feature = "stdarch_neon_feat_lut", issue = "138050")]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vluti4q_laneq_f16_x2<const LANE: i32>(
+    a: float16x8x2_t,
+    b: uint8x16_t,
+) -> float16x8_t {
+    static_assert!(LANE >= 0 && LANE <= 3);
+    transmute(vluti4q_laneq_s16_x2::<LANE>(transmute(a), b))
+}
+#[doc = "Lookup table read with 4-bit indices"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vluti4q_laneq_u16_x2)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon,lut")]
+#[cfg_attr(test, assert_instr(nop, LANE = 3))]
+#[unstable(feature = "stdarch_neon_feat_lut", issue = "138050")]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vluti4q_laneq_u16_x2<const LANE: i32>(a: uint16x8x2_t, b: uint8x16_t) -> uint16x8_t {
+    static_assert!(LANE >= 0 && LANE <= 3);
+    transmute(vluti4q_laneq_s16_x2::<LANE>(transmute(a), b))
+}
+#[doc = "Lookup table read with 4-bit indices"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vluti4q_laneq_p16_x2)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon,lut")]
+#[cfg_attr(test, assert_instr(nop, LANE = 3))]
+#[unstable(feature = "stdarch_neon_feat_lut", issue = "138050")]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vluti4q_laneq_p16_x2<const LANE: i32>(a: poly16x8x2_t, b: uint8x16_t) -> poly16x8_t {
+    static_assert!(LANE >= 0 && LANE <= 3);
+    transmute(vluti4q_laneq_s16_x2::<LANE>(transmute(a), b))
+}
+#[doc = "Lookup table read with 4-bit indices"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vluti4q_laneq_s16_x2)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon,lut")]
+#[cfg_attr(test, assert_instr(nop, LANE = 3))]
+#[unstable(feature = "stdarch_neon_feat_lut", issue = "138050")]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vluti4q_laneq_s16_x2<const LANE: i32>(a: int16x8x2_t, b: uint8x16_t) -> int16x8_t {
+    static_assert!(LANE >= 0 && LANE <= 3);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.vluti4q.laneq.x2.v8i16"
+        )]
+        fn _vluti4q_laneq_s16_x2(a: int16x8_t, b: int16x8_t, c: uint8x16_t, n: i32) -> int16x8_t;
+    }
+    _vluti4q_laneq_s16_x2(a.0, a.1, b, LANE)
+}
+#[doc = "Lookup table read with 4-bit indices"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vluti4q_laneq_s8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon,lut")]
+#[cfg_attr(test, assert_instr(nop, LANE = 0))]
+#[unstable(feature = "stdarch_neon_feat_lut", issue = "138050")]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vluti4q_laneq_s8<const LANE: i32>(a: int8x16_t, b: uint8x16_t) -> int8x16_t {
+    static_assert!(LANE >= 0 && LANE <= 1);
+    unsafe extern "unadjusted" {
+        #[cfg_attr(
+            any(target_arch = "aarch64", target_arch = "arm64ec"),
+            link_name = "llvm.aarch64.neon.vluti4q.laneq.v16i8"
+        )]
+        fn _vluti4q_laneq_s8(a: int8x16_t, b: uint8x16_t, n: i32) -> int8x16_t;
+    }
+    _vluti4q_laneq_s8(a, b, LANE)
+}
+#[doc = "Lookup table read with 4-bit indices"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vluti4q_laneq_u8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon,lut")]
+#[cfg_attr(test, assert_instr(nop, LANE = 0))]
+#[unstable(feature = "stdarch_neon_feat_lut", issue = "138050")]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vluti4q_laneq_u8<const LANE: i32>(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t {
+    static_assert!(LANE >= 0 && LANE <= 1);
+    transmute(vluti4q_laneq_s8::<LANE>(transmute(a), b))
+}
+#[doc = "Lookup table read with 4-bit indices"]
+#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vluti4q_laneq_p8)"]
+#[doc = "## Safety"]
+#[doc = "  * Neon instrinsic unsafe"]
+#[inline]
+#[target_feature(enable = "neon,lut")]
+#[cfg_attr(test, assert_instr(nop, LANE = 0))]
+#[unstable(feature = "stdarch_neon_feat_lut", issue = "138050")]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn vluti4q_laneq_p8<const LANE: i32>(a: poly8x16_t, b: uint8x16_t) -> poly8x16_t {
+    static_assert!(LANE >= 0 && LANE <= 1);
+    transmute(vluti4q_laneq_s8::<LANE>(transmute(a), b))
+}
 #[doc = "Maximum (vector)"]
 #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmax_f64)"]
 #[inline]
diff --git a/crates/core_arch/src/lib.rs b/crates/core_arch/src/lib.rs
index 16e52d2d9d..b8f4c2625e 100644
--- a/crates/core_arch/src/lib.rs
+++ b/crates/core_arch/src/lib.rs
@@ -38,7 +38,8 @@
     x86_amx_intrinsics,
     f16,
     keylocker_x86,
-    aarch64_unstable_target_feature
+    aarch64_unstable_target_feature,
+    bigint_helper_methods
 )]
 #![cfg_attr(test, feature(test, abi_vectorcall, stdarch_internal))]
 #![deny(clippy::missing_inline_in_public_items)]
diff --git a/crates/core_arch/src/macros.rs b/crates/core_arch/src/macros.rs
index f59e278bb7..e00b433536 100644
--- a/crates/core_arch/src/macros.rs
+++ b/crates/core_arch/src/macros.rs
@@ -132,7 +132,7 @@ macro_rules! types {
         impl crate::fmt::Debug for $name {
             #[inline]
             fn fmt(&self, f: &mut crate::fmt::Formatter<'_>) -> crate::fmt::Result {
-                crate::core_arch::simd::debug_simd_finish(f, stringify!($name), self.0)
+                crate::core_arch::simd::debug_simd_finish(f, stringify!($name), self.as_array())
             }
         }
     )*);
diff --git a/crates/core_arch/src/s390x/macros.rs b/crates/core_arch/src/s390x/macros.rs
index 2c40fedae2..4f0f84ec91 100644
--- a/crates/core_arch/src/s390x/macros.rs
+++ b/crates/core_arch/src/s390x/macros.rs
@@ -250,6 +250,19 @@ macro_rules! l_t_t {
         u8
     };
 
+    (vector_bool_long_long ) => {
+        u64
+    };
+    (vector_bool_int ) => {
+        u32
+    };
+    (vector_bool_short ) => {
+        u16
+    };
+    (vector_bool_char ) => {
+        u8
+    };
+
     (vector_float) => {
         f32
     };
@@ -338,6 +351,9 @@ macro_rules! t_u {
     (vector_bool_int) => {
         vector_unsigned_int
     };
+    (vector_bool_long_long) => {
+        vector_unsigned_long_long
+    };
     (vector_unsigned_char) => {
         vector_unsigned_char
     };
@@ -380,6 +396,9 @@ macro_rules! t_b {
     (vector_bool_int) => {
         vector_bool_int
     };
+    (vector_bool_long_long) => {
+        vector_bool_long_long
+    };
     (vector_signed_char) => {
         vector_bool_char
     };
diff --git a/crates/core_arch/src/s390x/vector.rs b/crates/core_arch/src/s390x/vector.rs
index 795e426384..5103273ed9 100644
--- a/crates/core_arch/src/s390x/vector.rs
+++ b/crates/core_arch/src/s390x/vector.rs
@@ -7,7 +7,7 @@
 
 #![allow(non_camel_case_types)]
 
-use crate::{core_arch::simd::*, intrinsics::simd::*, mem::transmute};
+use crate::{core_arch::simd::*, intrinsics::simd::*, mem::MaybeUninit, mem::transmute};
 
 #[cfg(test)]
 use stdarch_test::assert_instr;
@@ -97,6 +97,10 @@ unsafe extern "unadjusted" {
     #[link_name = "llvm.s390.vsrlb"] fn vsrlb(a: vector_signed_char, b: vector_signed_char) -> vector_signed_char;
     #[link_name = "llvm.s390.vslb"] fn vslb(a: vector_signed_char, b: vector_signed_char) -> vector_signed_char;
 
+    #[link_name = "llvm.s390.vsldb"] fn vsldb(a: i8x16, b: i8x16, c: u32) -> i8x16;
+    #[link_name = "llvm.s390.vsld"] fn vsld(a: i8x16, b: i8x16, c: u32) -> i8x16;
+    #[link_name = "llvm.s390.vsrd"] fn vsrd(a: i8x16, b: i8x16, c: u32) -> i8x16;
+
     #[link_name = "llvm.fshl.v16i8"] fn fshlb(a: vector_unsigned_char, b: vector_unsigned_char, c: vector_unsigned_char) -> vector_unsigned_char;
     #[link_name = "llvm.fshl.v8i16"] fn fshlh(a: vector_unsigned_short, b: vector_unsigned_short, c: vector_unsigned_short) -> vector_unsigned_short;
     #[link_name = "llvm.fshl.v4i32"] fn fshlf(a: vector_unsigned_int, b: vector_unsigned_int, c: vector_unsigned_int) -> vector_unsigned_int;
@@ -122,6 +126,8 @@ unsafe extern "unadjusted" {
     #[link_name = "llvm.s390.vsbiq"] fn vsbiq(a: u128, b: u128, c: u128) -> u128;
     #[link_name = "llvm.s390.vsbcbiq"] fn vsbcbiq(a: u128, b: u128, c: u128) -> u128;
 
+    #[link_name = "llvm.s390.vacq"] fn vacq(a: u128, b: u128, c: u128) -> u128;
+
     #[link_name = "llvm.s390.vscbib"] fn vscbib(a: vector_unsigned_char, b: vector_unsigned_char) -> vector_unsigned_char;
     #[link_name = "llvm.s390.vscbih"] fn vscbih(a: vector_unsigned_short, b: vector_unsigned_short) -> vector_unsigned_short;
     #[link_name = "llvm.s390.vscbif"] fn vscbif(a: vector_unsigned_int, b: vector_unsigned_int) -> vector_unsigned_int;
@@ -142,6 +148,189 @@ unsafe extern "unadjusted" {
     #[link_name = "llvm.s390.vfaezbs"] fn vfaezbs(a: vector_signed_char, b: vector_signed_char, c: i32) -> PackedTuple<vector_signed_char, i32>;
     #[link_name = "llvm.s390.vfaezhs"] fn vfaezhs(a: vector_signed_short, b: vector_signed_short, c: i32) -> PackedTuple<vector_signed_short, i32>;
     #[link_name = "llvm.s390.vfaezfs"] fn vfaezfs(a: vector_signed_int, b: vector_signed_int, c: i32) -> PackedTuple<vector_signed_int, i32>;
+
+    #[link_name = "llvm.s390.vll"] fn vll(a: u32, b: *const u8) -> vector_signed_char;
+    #[link_name = "llvm.s390.vstl"] fn vstl(a: vector_signed_char, b: u32, c: *mut u8);
+
+    #[link_name = "llvm.s390.vlrl"] fn vlrl(a: u32, b: *const u8) -> vector_unsigned_char;
+    #[link_name = "llvm.s390.vstrl"] fn vstrl(a: vector_unsigned_char, b: u32, c: *mut u8);
+
+    #[link_name = "llvm.s390.lcbb"] fn lcbb(a: *const u8, b: u32) -> u32;
+    #[link_name = "llvm.s390.vlbb"] fn vlbb(a: *const u8, b: u32) -> MaybeUninit<vector_signed_char>;
+
+    #[link_name = "llvm.s390.vpksh"] fn vpksh(a: vector_signed_short, b: vector_signed_short) -> vector_signed_char;
+    #[link_name = "llvm.s390.vpksf"] fn vpksf(a: vector_signed_int, b: vector_signed_int) -> vector_signed_short;
+    #[link_name = "llvm.s390.vpksg"] fn vpksg(a: vector_signed_long_long, b: vector_signed_long_long) -> vector_signed_int;
+
+    #[link_name = "llvm.s390.vpklsh"] fn vpklsh(a: vector_signed_short, b: vector_signed_short) -> vector_unsigned_char;
+    #[link_name = "llvm.s390.vpklsf"] fn vpklsf(a: vector_signed_int, b: vector_signed_int) -> vector_unsigned_short;
+    #[link_name = "llvm.s390.vpklsg"] fn vpklsg(a: vector_signed_long_long, b: vector_signed_long_long) -> vector_unsigned_int;
+
+    #[link_name = "llvm.s390.vpkshs"] fn vpkshs(a: vector_signed_short, b: vector_signed_short) -> PackedTuple<vector_signed_char, i32>;
+    #[link_name = "llvm.s390.vpksfs"] fn vpksfs(a: vector_signed_int, b: vector_signed_int) -> PackedTuple<vector_signed_short, i32>;
+    #[link_name = "llvm.s390.vpksgs"] fn vpksgs(a: vector_signed_long_long, b: vector_signed_long_long) -> PackedTuple<vector_signed_int, i32>;
+
+    #[link_name = "llvm.s390.vpklshs"] fn vpklshs(a: vector_unsigned_short, b: vector_unsigned_short) -> PackedTuple<vector_unsigned_char, i32>;
+    #[link_name = "llvm.s390.vpklsfs"] fn vpklsfs(a: vector_unsigned_int, b: vector_unsigned_int) -> PackedTuple<vector_unsigned_short, i32>;
+    #[link_name = "llvm.s390.vpklsgs"] fn vpklsgs(a: vector_unsigned_long_long, b: vector_unsigned_long_long) -> PackedTuple<vector_unsigned_int, i32>;
+
+    #[link_name = "llvm.s390.vuplbw"] fn vuplbw (a: vector_signed_char) -> vector_signed_short;
+    #[link_name = "llvm.s390.vuplhw"] fn vuplhw (a: vector_signed_short) -> vector_signed_int;
+    #[link_name = "llvm.s390.vuplfw"] fn vuplfw (a: vector_signed_int) -> vector_signed_long_long;
+    #[link_name = "llvm.s390.vupllb"] fn vupllb (a: vector_unsigned_char) -> vector_unsigned_short;
+    #[link_name = "llvm.s390.vupllh"] fn vupllh (a: vector_unsigned_short) -> vector_unsigned_int;
+    #[link_name = "llvm.s390.vupllf"] fn vupllf (a: vector_unsigned_int) -> vector_unsigned_long_long;
+
+    #[link_name = "llvm.s390.vavgb"] fn vavgb(a: vector_signed_char, b: vector_signed_char) -> vector_signed_char;
+    #[link_name = "llvm.s390.vavgh"] fn vavgh(a: vector_signed_short, b: vector_signed_short) -> vector_signed_short;
+    #[link_name = "llvm.s390.vavgf"] fn vavgf(a: vector_signed_int, b: vector_signed_int) -> vector_signed_int;
+    #[link_name = "llvm.s390.vavgg"] fn vavgg(a: vector_signed_long_long, b: vector_signed_long_long) -> vector_signed_long_long;
+
+    #[link_name = "llvm.s390.vavglb"] fn vavglb(a: vector_unsigned_char, b: vector_unsigned_char) -> vector_unsigned_char;
+    #[link_name = "llvm.s390.vavglh"] fn vavglh(a: vector_unsigned_short, b: vector_unsigned_short) -> vector_unsigned_short;
+    #[link_name = "llvm.s390.vavglf"] fn vavglf(a: vector_unsigned_int, b: vector_unsigned_int) -> vector_unsigned_int;
+    #[link_name = "llvm.s390.vavglg"] fn vavglg(a: vector_unsigned_long_long, b: vector_unsigned_long_long) -> vector_unsigned_long_long;
+
+    #[link_name = "llvm.s390.vcksm"] fn vcksm(a: vector_unsigned_int, b: vector_unsigned_int) -> vector_unsigned_int;
+
+    #[link_name = "llvm.s390.vmeb"] fn vmeb(a: vector_signed_char, b: vector_signed_char) -> vector_signed_short;
+    #[link_name = "llvm.s390.vmeh"] fn vmeh(a: vector_signed_short, b: vector_signed_short) -> vector_signed_int;
+    #[link_name = "llvm.s390.vmef"] fn vmef(a: vector_signed_int, b: vector_signed_int) -> vector_signed_long_long;
+
+    #[link_name = "llvm.s390.vmleb"] fn vmleb(a: vector_unsigned_char, b: vector_unsigned_char) -> vector_unsigned_short;
+    #[link_name = "llvm.s390.vmleh"] fn vmleh(a: vector_unsigned_short, b: vector_unsigned_short) -> vector_unsigned_int;
+    #[link_name = "llvm.s390.vmlef"] fn vmlef(a: vector_unsigned_int, b: vector_unsigned_int) -> vector_unsigned_long_long;
+
+    #[link_name = "llvm.s390.vmob"] fn vmob(a: vector_signed_char, b: vector_signed_char) -> vector_signed_short;
+    #[link_name = "llvm.s390.vmoh"] fn vmoh(a: vector_signed_short, b: vector_signed_short) -> vector_signed_int;
+    #[link_name = "llvm.s390.vmof"] fn vmof(a: vector_signed_int, b: vector_signed_int) -> vector_signed_long_long;
+
+    #[link_name = "llvm.s390.vmlob"] fn vmlob(a: vector_unsigned_char, b: vector_unsigned_char) -> vector_unsigned_short;
+    #[link_name = "llvm.s390.vmloh"] fn vmloh(a: vector_unsigned_short, b: vector_unsigned_short) -> vector_unsigned_int;
+    #[link_name = "llvm.s390.vmlof"] fn vmlof(a: vector_unsigned_int, b: vector_unsigned_int) -> vector_unsigned_long_long;
+
+    #[link_name = "llvm.s390.vmhb"] fn vmhb(a: vector_signed_char, b: vector_signed_char) -> vector_signed_char;
+    #[link_name = "llvm.s390.vmhh"] fn vmhh(a: vector_signed_short, b: vector_signed_short) -> vector_signed_short;
+    #[link_name = "llvm.s390.vmhf"] fn vmhf(a: vector_signed_int, b: vector_signed_int) -> vector_signed_int;
+
+    #[link_name = "llvm.s390.vmlhb"] fn vmlhb(a: vector_unsigned_char, b: vector_unsigned_char) -> vector_unsigned_char;
+    #[link_name = "llvm.s390.vmlhh"] fn vmlhh(a: vector_unsigned_short, b: vector_unsigned_short) -> vector_unsigned_short;
+    #[link_name = "llvm.s390.vmlhf"] fn vmlhf(a: vector_unsigned_int, b: vector_unsigned_int) -> vector_unsigned_int;
+
+    #[link_name = "llvm.s390.vmaeb"] fn vmaeb(a: vector_signed_char, b: vector_signed_char, c: vector_signed_short) -> vector_signed_short;
+    #[link_name = "llvm.s390.vmaeh"] fn vmaeh(a: vector_signed_short, b: vector_signed_short, c: vector_signed_int) -> vector_signed_int;
+    #[link_name = "llvm.s390.vmaef"] fn vmaef(a: vector_signed_int, b: vector_signed_int, c: vector_signed_long_long) -> vector_signed_long_long;
+
+    #[link_name = "llvm.s390.vmaleb"] fn vmaleb(a: vector_unsigned_char, b: vector_unsigned_char, c: vector_unsigned_short) -> vector_unsigned_short;
+    #[link_name = "llvm.s390.vmaleh"] fn vmaleh(a: vector_unsigned_short, b: vector_unsigned_short, c: vector_unsigned_int) -> vector_unsigned_int;
+    #[link_name = "llvm.s390.vmalef"] fn vmalef(a: vector_unsigned_int, b: vector_unsigned_int, c: vector_unsigned_long_long) -> vector_unsigned_long_long;
+
+    #[link_name = "llvm.s390.vmaob"] fn vmaob(a: vector_signed_char, b: vector_signed_char, c: vector_signed_short) -> vector_signed_short;
+    #[link_name = "llvm.s390.vmaoh"] fn vmaoh(a: vector_signed_short, b: vector_signed_short, c: vector_signed_int) -> vector_signed_int;
+    #[link_name = "llvm.s390.vmaof"] fn vmaof(a: vector_signed_int, b: vector_signed_int, c: vector_signed_long_long) -> vector_signed_long_long;
+
+    #[link_name = "llvm.s390.vmalob"] fn vmalob(a: vector_unsigned_char, b: vector_unsigned_char, c: vector_unsigned_short) -> vector_unsigned_short;
+    #[link_name = "llvm.s390.vmaloh"] fn vmaloh(a: vector_unsigned_short, b: vector_unsigned_short, c: vector_unsigned_int) -> vector_unsigned_int;
+    #[link_name = "llvm.s390.vmalof"] fn vmalof(a: vector_unsigned_int, b: vector_unsigned_int, c: vector_unsigned_long_long) -> vector_unsigned_long_long;
+
+    #[link_name = "llvm.s390.vmahb"] fn vmahb(a: vector_signed_char, b: vector_signed_char, c: vector_signed_char) -> vector_signed_char;
+    #[link_name = "llvm.s390.vmahh"] fn vmahh(a: vector_signed_short, b: vector_signed_short, c: vector_signed_short) -> vector_signed_short;
+    #[link_name = "llvm.s390.vmahf"] fn vmahf(a: vector_signed_int, b: vector_signed_int, c: vector_signed_int) -> vector_signed_int;
+
+    #[link_name = "llvm.s390.vmalhb"] fn vmalhb(a: vector_unsigned_char, b: vector_unsigned_char, c: vector_unsigned_char) -> vector_unsigned_char;
+    #[link_name = "llvm.s390.vmalhh"] fn vmalhh(a: vector_unsigned_short, b: vector_unsigned_short, c: vector_unsigned_short) -> vector_unsigned_short;
+    #[link_name = "llvm.s390.vmalhf"] fn vmalhf(a: vector_unsigned_int, b: vector_unsigned_int, c: vector_unsigned_int) -> vector_unsigned_int;
+
+    #[link_name = "llvm.s390.vmalb"] fn vmalb(a: vector_signed_char, b: vector_signed_char, c: vector_signed_char) -> vector_signed_char;
+    #[link_name = "llvm.s390.vmalh"] fn vmalh(a: vector_signed_short, b: vector_signed_short, c: vector_signed_short) -> vector_signed_short;
+    #[link_name = "llvm.s390.vmalf"] fn vmalf(a: vector_signed_int, b: vector_signed_int, c: vector_signed_int) -> vector_signed_int;
+
+    #[link_name = "llvm.s390.vmallb"] fn vmallb(a: vector_unsigned_char, b: vector_unsigned_char, c: vector_unsigned_char) -> vector_unsigned_char;
+    #[link_name = "llvm.s390.vmallh"] fn vmallh(a: vector_unsigned_short, b: vector_unsigned_short, c: vector_unsigned_short) -> vector_unsigned_short;
+    #[link_name = "llvm.s390.vmallf"] fn vmallf(a: vector_unsigned_int, b: vector_unsigned_int, c: vector_unsigned_int) -> vector_unsigned_int;
+
+    #[link_name = "llvm.s390.vgfmb"] fn vgfmb(a: vector_unsigned_char, b: vector_unsigned_char) -> vector_unsigned_short;
+    #[link_name = "llvm.s390.vgfmh"] fn vgfmh(a: vector_unsigned_short, b: vector_unsigned_short) -> vector_unsigned_int;
+    #[link_name = "llvm.s390.vgfmf"] fn vgfmf(a: vector_unsigned_int, b: vector_unsigned_int) -> vector_unsigned_long_long;
+    #[link_name = "llvm.s390.vgfmg"] fn vgfmg(a: vector_unsigned_long_long, b: vector_unsigned_long_long) -> u128;
+
+    #[link_name = "llvm.s390.vgfmab"] fn vgfmab(a: vector_unsigned_char, b: vector_unsigned_char, c: vector_unsigned_short) -> vector_unsigned_short;
+    #[link_name = "llvm.s390.vgfmah"] fn vgfmah(a: vector_unsigned_short, b: vector_unsigned_short, c: vector_unsigned_int) -> vector_unsigned_int;
+    #[link_name = "llvm.s390.vgfmaf"] fn vgfmaf(a: vector_unsigned_int, b: vector_unsigned_int, c: vector_unsigned_long_long) -> vector_unsigned_long_long;
+    #[link_name = "llvm.s390.vgfmag"] fn vgfmag(a: vector_unsigned_long_long, b: vector_unsigned_long_long, c: u128) -> u128;
+
+    #[link_name = "llvm.s390.vbperm"] fn vbperm(a: vector_unsigned_char, b: vector_unsigned_char) -> vector_unsigned_long_long;
+
+    #[link_name = "llvm.s390.vftcisb"] fn vftcisb(a: vector_float, b: u32) -> PackedTuple<vector_bool_int, i32>;
+    #[link_name = "llvm.s390.vftcidb"] fn vftcidb(a: vector_double, b: u32) -> PackedTuple<vector_bool_long_long, i32>;
+
+    #[link_name = "llvm.s390.vtm"] fn vtm(a: i8x16, b: i8x16) -> i32;
+
+    #[link_name = "llvm.s390.vstrsb"] fn vstrsb(a: vector_unsigned_char, b: vector_unsigned_char, c: vector_unsigned_char) -> PackedTuple<vector_unsigned_char, i32>;
+    #[link_name = "llvm.s390.vstrsh"] fn vstrsh(a: vector_unsigned_short, b: vector_unsigned_short, c: vector_unsigned_char) -> PackedTuple<vector_unsigned_char, i32>;
+    #[link_name = "llvm.s390.vstrsf"] fn vstrsf(a: vector_unsigned_int, b: vector_unsigned_int, c: vector_unsigned_char) -> PackedTuple<vector_unsigned_char, i32>;
+
+    #[link_name = "llvm.s390.vstrszb"] fn vstrszb(a: vector_unsigned_char, b: vector_unsigned_char, c: vector_unsigned_char) -> PackedTuple<vector_unsigned_char, i32>;
+    #[link_name = "llvm.s390.vstrszh"] fn vstrszh(a: vector_unsigned_short, b: vector_unsigned_short, c: vector_unsigned_char) -> PackedTuple<vector_unsigned_char, i32>;
+    #[link_name = "llvm.s390.vstrszf"] fn vstrszf(a: vector_unsigned_int, b: vector_unsigned_int, c: vector_unsigned_char) -> PackedTuple<vector_unsigned_char, i32>;
+
+    #[link_name = "llvm.s390.vistrb"] fn vistrb(a: vector_unsigned_char) -> vector_unsigned_char;
+    #[link_name = "llvm.s390.vistrh"] fn vistrh(a: vector_unsigned_short) -> vector_unsigned_short;
+    #[link_name = "llvm.s390.vistrf"] fn vistrf(a: vector_unsigned_int) -> vector_unsigned_int;
+
+    #[link_name = "llvm.s390.vistrbs"] fn vistrbs(a: vector_unsigned_char) -> PackedTuple<vector_unsigned_char, i32>;
+    #[link_name = "llvm.s390.vistrhs"] fn vistrhs(a: vector_unsigned_short) -> PackedTuple<vector_unsigned_short, i32>;
+    #[link_name = "llvm.s390.vistrfs"] fn vistrfs(a: vector_unsigned_int) -> PackedTuple<vector_unsigned_int, i32>;
+
+    #[link_name = "llvm.s390.vmslg"] fn vmslg(a: vector_unsigned_long_long, b: vector_unsigned_long_long, c: u128, d: u32) -> u128;
+
+    #[link_name = "llvm.s390.vstrcb"] fn vstrcb(a: vector_unsigned_char, b: vector_unsigned_char, c: vector_unsigned_char, d: u32) -> vector_bool_char;
+    #[link_name = "llvm.s390.vstrch"] fn vstrch(a: vector_unsigned_short, b: vector_unsigned_short, c: vector_unsigned_short, d: u32) -> vector_bool_short;
+    #[link_name = "llvm.s390.vstrcf"] fn vstrcf(a: vector_unsigned_int, b: vector_unsigned_int, c: vector_unsigned_int, d: u32) -> vector_bool_int;
+
+    #[link_name = "llvm.s390.vstrcbs"] fn vstrcbs(a: vector_unsigned_char, b: vector_unsigned_char, c: vector_unsigned_char, d: u32) -> PackedTuple<vector_bool_char, i32>;
+    #[link_name = "llvm.s390.vstrchs"] fn vstrchs(a: vector_unsigned_short, b: vector_unsigned_short, c: vector_unsigned_short, d: u32) -> PackedTuple<vector_bool_short, i32>;
+    #[link_name = "llvm.s390.vstrcfs"] fn vstrcfs(a: vector_unsigned_int, b: vector_unsigned_int, c: vector_unsigned_int, d: u32) -> PackedTuple<vector_bool_int, i32>;
+
+    #[link_name = "llvm.s390.vstrczb"] fn vstrczb(a: vector_unsigned_char, b: vector_unsigned_char, c: vector_unsigned_char, d: u32) -> vector_bool_char;
+    #[link_name = "llvm.s390.vstrczh"] fn vstrczh(a: vector_unsigned_short, b: vector_unsigned_short, c: vector_unsigned_short, d: u32) -> vector_bool_short;
+    #[link_name = "llvm.s390.vstrczf"] fn vstrczf(a: vector_unsigned_int, b: vector_unsigned_int, c: vector_unsigned_int, d: u32) -> vector_bool_int;
+
+    #[link_name = "llvm.s390.vstrczbs"] fn vstrczbs(a: vector_unsigned_char, b: vector_unsigned_char, c: vector_unsigned_char, d: u32) -> PackedTuple<vector_bool_char, i32>;
+    #[link_name = "llvm.s390.vstrczhs"] fn vstrczhs(a: vector_unsigned_short, b: vector_unsigned_short, c: vector_unsigned_short, d: u32) -> PackedTuple<vector_bool_short, i32>;
+    #[link_name = "llvm.s390.vstrczfs"] fn vstrczfs(a: vector_unsigned_int, b: vector_unsigned_int, c: vector_unsigned_int, d: u32) -> PackedTuple<vector_bool_int, i32>;
+
+    #[link_name = "llvm.s390.vfeeb"] fn vfeeb(a: i8x16, b: i8x16) -> i8x16;
+    #[link_name = "llvm.s390.vfeeh"] fn vfeeh(a: i16x8, b: i16x8) -> i16x8;
+    #[link_name = "llvm.s390.vfeef"] fn vfeef(a: i32x4, b: i32x4) -> i32x4;
+
+    #[link_name = "llvm.s390.vfeezb"] fn vfeezb(a: i8x16, b: i8x16) -> i8x16;
+    #[link_name = "llvm.s390.vfeezh"] fn vfeezh(a: i16x8, b: i16x8) -> i16x8;
+    #[link_name = "llvm.s390.vfeezf"] fn vfeezf(a: i32x4, b: i32x4) -> i32x4;
+
+    #[link_name = "llvm.s390.vfeebs"] fn vfeebs(a: i8x16, b: i8x16) -> PackedTuple<i8x16, i32>;
+    #[link_name = "llvm.s390.vfeehs"] fn vfeehs(a: i16x8, b: i16x8) -> PackedTuple<i16x8, i32>;
+    #[link_name = "llvm.s390.vfeefs"] fn vfeefs(a: i32x4, b: i32x4) -> PackedTuple<i32x4, i32>;
+
+    #[link_name = "llvm.s390.vfeezbs"] fn vfeezbs(a: i8x16, b: i8x16) -> PackedTuple<i8x16, i32>;
+    #[link_name = "llvm.s390.vfeezhs"] fn vfeezhs(a: i16x8, b: i16x8) -> PackedTuple<i16x8, i32>;
+    #[link_name = "llvm.s390.vfeezfs"] fn vfeezfs(a: i32x4, b: i32x4) -> PackedTuple<i32x4, i32>;
+
+    #[link_name = "llvm.s390.vfeneb"] fn vfeneb(a: i8x16, b: i8x16) -> i8x16;
+    #[link_name = "llvm.s390.vfeneh"] fn vfeneh(a: i16x8, b: i16x8) -> i16x8;
+    #[link_name = "llvm.s390.vfenef"] fn vfenef(a: i32x4, b: i32x4) -> i32x4;
+
+    #[link_name = "llvm.s390.vfenezb"] fn vfenezb(a: i8x16, b: i8x16) -> i8x16;
+    #[link_name = "llvm.s390.vfenezh"] fn vfenezh(a: i16x8, b: i16x8) -> i16x8;
+    #[link_name = "llvm.s390.vfenezf"] fn vfenezf(a: i32x4, b: i32x4) -> i32x4;
+
+    #[link_name = "llvm.s390.vfenebs"] fn vfenebs(a: i8x16, b: i8x16) -> PackedTuple<i8x16, i32>;
+    #[link_name = "llvm.s390.vfenehs"] fn vfenehs(a: i16x8, b: i16x8) -> PackedTuple<i16x8, i32>;
+    #[link_name = "llvm.s390.vfenefs"] fn vfenefs(a: i32x4, b: i32x4) -> PackedTuple<i32x4, i32>;
+
+    #[link_name = "llvm.s390.vfenezbs"] fn vfenezbs(a: i8x16, b: i8x16) -> PackedTuple<i8x16, i32>;
+    #[link_name = "llvm.s390.vfenezhs"] fn vfenezhs(a: i16x8, b: i16x8) -> PackedTuple<i16x8, i32>;
+    #[link_name = "llvm.s390.vfenezfs"] fn vfenezfs(a: i32x4, b: i32x4) -> PackedTuple<i32x4, i32>;
 }
 
 impl_from! { i8x16, u8x16,  i16x8, u16x8, i32x4, u32x4, i64x2, u64x2, f32x4, f64x2 }
@@ -194,6 +383,39 @@ impl<const N: usize> ShuffleMask<N> {
         }
         ShuffleMask(mask)
     }
+
+    const fn pack() -> Self {
+        let mut mask = [0; N];
+        let mut i = 1;
+        let mut index = 0;
+        while index < N {
+            mask[index] = i as u32;
+
+            i += 2;
+            index += 1;
+        }
+        ShuffleMask(mask)
+    }
+
+    const fn unpack_low() -> Self {
+        let mut mask = [0; N];
+        let mut i = 0;
+        while i < N {
+            mask[i] = (N + i) as u32;
+            i += 1;
+        }
+        ShuffleMask(mask)
+    }
+
+    const fn unpack_high() -> Self {
+        let mut mask = [0; N];
+        let mut i = 0;
+        while i < N {
+            mask[i] = i as u32;
+            i += 1;
+        }
+        ShuffleMask(mask)
+    }
 }
 
 const fn genmask<const MASK: u16>() -> [u8; 16] {
@@ -229,6 +451,23 @@ const fn genmasks(bit_width: u32, a: u8, b: u8) -> u64 {
     ((1u64.wrapping_shl(a as u32 + 1)) - 1) & !((1u64.wrapping_shl(b as u32)) - 1)
 }
 
+const fn validate_block_boundary(block_boundary: u16) -> u32 {
+    assert!(
+        block_boundary.is_power_of_two() && block_boundary >= 64 && block_boundary <= 4096,
+        "block boundary must be a constant power of 2 from 64 to 4096",
+    );
+
+    // so that 64 is encoded as 0, 128 as 1, ect.
+    block_boundary as u32 >> 7
+}
+
+enum FindImm {
+    Eq = 4,
+    Ne = 12,
+    EqIdx = 0,
+    NeIdx = 8,
+}
+
 #[macro_use]
 mod sealed {
     use super::*;
@@ -552,6 +791,88 @@ mod sealed {
     impl_vec_trait! { [VectorNabs vec_nabs] vec_nabs_f32 (vector_float) }
     impl_vec_trait! { [VectorNabs vec_nabs] vec_nabs_f64 (vector_double) }
 
+    #[unstable(feature = "stdarch_s390x", issue = "135681")]
+    pub trait VectorNmsub {
+        unsafe fn vec_nmsub(self, b: Self, c: Self) -> Self;
+    }
+
+    #[inline]
+    #[target_feature(enable = "vector")]
+    #[cfg_attr(
+        all(test, target_feature = "vector-enhancements-2"),
+        assert_instr(vfnmssb)
+    )]
+    unsafe fn vec_nmsub_f32(a: vector_float, b: vector_float, c: vector_float) -> vector_float {
+        simd_neg(simd_fma(a, b, simd_neg(c)))
+    }
+
+    #[unstable(feature = "stdarch_s390x", issue = "135681")]
+    impl VectorNmsub for vector_float {
+        #[target_feature(enable = "vector")]
+        unsafe fn vec_nmsub(self, b: Self, c: Self) -> Self {
+            vec_nmsub_f32(self, b, c)
+        }
+    }
+
+    #[inline]
+    #[target_feature(enable = "vector")]
+    #[cfg_attr(
+        all(test, target_feature = "vector-enhancements-2"),
+        assert_instr(vfnmsdb)
+    )]
+    unsafe fn vec_nmsub_f64(a: vector_double, b: vector_double, c: vector_double) -> vector_double {
+        simd_neg(simd_fma(a, b, simd_neg(c)))
+    }
+
+    #[unstable(feature = "stdarch_s390x", issue = "135681")]
+    impl VectorNmsub for vector_double {
+        #[target_feature(enable = "vector")]
+        unsafe fn vec_nmsub(self, b: Self, c: Self) -> Self {
+            vec_nmsub_f64(self, b, c)
+        }
+    }
+
+    #[unstable(feature = "stdarch_s390x", issue = "135681")]
+    pub trait VectorNmadd {
+        unsafe fn vec_nmadd(self, b: Self, c: Self) -> Self;
+    }
+
+    #[inline]
+    #[target_feature(enable = "vector")]
+    #[cfg_attr(
+        all(test, target_feature = "vector-enhancements-2"),
+        assert_instr(vfnmasb)
+    )]
+    unsafe fn vec_nmadd_f32(a: vector_float, b: vector_float, c: vector_float) -> vector_float {
+        simd_neg(simd_fma(a, b, c))
+    }
+
+    #[unstable(feature = "stdarch_s390x", issue = "135681")]
+    impl VectorNmadd for vector_float {
+        #[target_feature(enable = "vector")]
+        unsafe fn vec_nmadd(self, b: Self, c: Self) -> Self {
+            vec_nmadd_f32(self, b, c)
+        }
+    }
+
+    #[inline]
+    #[target_feature(enable = "vector")]
+    #[cfg_attr(
+        all(test, target_feature = "vector-enhancements-2"),
+        assert_instr(vfnmadb)
+    )]
+    unsafe fn vec_nmadd_f64(a: vector_double, b: vector_double, c: vector_double) -> vector_double {
+        simd_neg(simd_fma(a, b, c))
+    }
+
+    #[unstable(feature = "stdarch_s390x", issue = "135681")]
+    impl VectorNmadd for vector_double {
+        #[target_feature(enable = "vector")]
+        unsafe fn vec_nmadd(self, b: Self, c: Self) -> Self {
+            vec_nmadd_f64(self, b, c)
+        }
+    }
+
     #[unstable(feature = "stdarch_s390x", issue = "135681")]
     pub trait VectorSplat {
         unsafe fn vec_splat<const IMM: u32>(self) -> Self;
@@ -871,7 +1192,8 @@ mod sealed {
     test_impl! { vec_roundc_f32 (a: vector_float) -> vector_float [nearbyint_v4f32,  "vector-enhancements-1" vfisb] }
     test_impl! { vec_roundc_f64 (a: vector_double) -> vector_double [nearbyint_v2f64, vfidb] }
 
-    // FIXME(llvm) roundeven does not yet lower to vfidb (but should in the future)
+    // FIXME(llvm) llvm trunk already lowers roundeven to vfidb, but rust does not use it yet
+    // use https://godbolt.org/z/cWq95fexe to check, and enable the instruction test when it works
     test_impl! { vec_round_f32 (a: vector_float) -> vector_float [roundeven_v4f32, _] }
     test_impl! { vec_round_f64 (a: vector_double) -> vector_double [roundeven_v2f64, _] }
 
@@ -1212,6 +1534,21 @@ mod sealed {
         unsafe fn vec_reve(self) -> Self;
     }
 
+    #[repr(simd)]
+    struct ReverseMask<const N: usize>([u32; N]);
+
+    impl<const N: usize> ReverseMask<N> {
+        const fn new() -> Self {
+            let mut index = [0; N];
+            let mut i = 0;
+            while i < N {
+                index[i] = (N - i - 1) as u32;
+                i += 1;
+            }
+            ReverseMask(index)
+        }
+    }
+
     macro_rules! impl_reve {
         ($($ty:ident, $fun:ident, $instr:ident),*) => {
             $(
@@ -1592,10 +1929,9 @@ mod sealed {
                     type Result = $r;
                     #[inline]
                     #[target_feature(enable = "vector")]
-                    unsafe fn $m(self, b: Self, c: *mut i32) -> Self::Result {
+                    unsafe fn $m(self, b: Self) -> (Self::Result, i32) {
                         let PackedTuple { x, y } = $fun::<{ FindImm::$imm as i32 }>(transmute(self), transmute(b));
-                        c.write(y);
-                        transmute(x)
+                        (transmute(x), y)
                     }
                 }
             )*
@@ -1622,10 +1958,9 @@ mod sealed {
                     type Result = t_b!($ty);
                     #[inline]
                     #[target_feature(enable = "vector")]
-                    unsafe fn $m(self, b: Self, c: *mut i32) -> Self::Result {
+                    unsafe fn $m(self, b: Self) -> (Self::Result, i32) {
                         let PackedTuple { x, y } = $fun::<{ FindImm::$imm as i32 }>(transmute(self), transmute(b));
-                        c.write(y);
-                        transmute(x)
+                        (transmute(x), y)
                     }
                 }
             )*
@@ -1688,13 +2023,6 @@ mod sealed {
         };
     }
 
-    enum FindImm {
-        Eq = 4,
-        Ne = 12,
-        EqIdx = 0,
-        NeIdx = 8,
-    }
-
     #[unstable(feature = "stdarch_s390x", issue = "135681")]
     pub trait VectorFindAnyEq<Other> {
         type Result;
@@ -1784,19 +2112,19 @@ mod sealed {
      }
 
     vfaes_wrapper! {
-       vfaebs vector_signed_char
-       vfaehs vector_signed_short
-       vfaefs vector_signed_int
+        vfaebs vector_signed_char
+        vfaehs vector_signed_short
+        vfaefs vector_signed_int
 
-       vfaezbs vector_signed_char
-       vfaezhs vector_signed_short
-       vfaezfs vector_signed_int
+        vfaezbs vector_signed_char
+        vfaezhs vector_signed_short
+        vfaezfs vector_signed_int
     }
 
     #[unstable(feature = "stdarch_s390x", issue = "135681")]
     pub trait VectorFindAnyEqCC<Other> {
         type Result;
-        unsafe fn vec_find_any_eq_cc(self, other: Other, c: *mut i32) -> Self::Result;
+        unsafe fn vec_find_any_eq_cc(self, other: Other) -> (Self::Result, i32);
     }
 
     impl_vfae! { [cc VectorFindAnyEqCC vec_find_any_eq_cc] Eq vfaebs vfaehs vfaefs }
@@ -1804,7 +2132,7 @@ mod sealed {
     #[unstable(feature = "stdarch_s390x", issue = "135681")]
     pub trait VectorFindAnyNeCC<Other> {
         type Result;
-        unsafe fn vec_find_any_ne_cc(self, other: Other, c: *mut i32) -> Self::Result;
+        unsafe fn vec_find_any_ne_cc(self, other: Other) -> (Self::Result, i32);
     }
 
     impl_vfae! { [cc VectorFindAnyNeCC vec_find_any_ne_cc] Ne vfaebs vfaehs vfaefs }
@@ -1812,7 +2140,7 @@ mod sealed {
     #[unstable(feature = "stdarch_s390x", issue = "135681")]
     pub trait VectorFindAnyEqIdxCC<Other> {
         type Result;
-        unsafe fn vec_find_any_eq_idx_cc(self, other: Other, c: *mut i32) -> Self::Result;
+        unsafe fn vec_find_any_eq_idx_cc(self, other: Other) -> (Self::Result, i32);
     }
 
     impl_vfae! { [idx_cc VectorFindAnyEqIdxCC vec_find_any_eq_idx_cc] EqIdx vfaebs vfaehs vfaefs }
@@ -1820,7 +2148,7 @@ mod sealed {
     #[unstable(feature = "stdarch_s390x", issue = "135681")]
     pub trait VectorFindAnyNeIdxCC<Other> {
         type Result;
-        unsafe fn vec_find_any_ne_idx_cc(self, other: Other, c: *mut i32) -> Self::Result;
+        unsafe fn vec_find_any_ne_idx_cc(self, other: Other) -> (Self::Result, i32);
     }
 
     impl_vfae! { [idx_cc VectorFindAnyNeIdxCC vec_find_any_ne_idx_cc] NeIdx vfaebs vfaehs vfaefs }
@@ -1828,7 +2156,7 @@ mod sealed {
     #[unstable(feature = "stdarch_s390x", issue = "135681")]
     pub trait VectorFindAnyEqOrZeroIdxCC<Other> {
         type Result;
-        unsafe fn vec_find_any_eq_or_0_idx_cc(self, other: Other, c: *mut i32) -> Self::Result;
+        unsafe fn vec_find_any_eq_or_0_idx_cc(self, other: Other) -> (Self::Result, i32);
     }
 
     impl_vfae! { [idx_cc VectorFindAnyEqOrZeroIdxCC vec_find_any_eq_or_0_idx_cc] EqIdx vfaezbs vfaezhs vfaezfs }
@@ -1836,843 +2164,3441 @@ mod sealed {
     #[unstable(feature = "stdarch_s390x", issue = "135681")]
     pub trait VectorFindAnyNeOrZeroIdxCC<Other> {
         type Result;
-        unsafe fn vec_find_any_ne_or_0_idx_cc(self, other: Other, c: *mut i32) -> Self::Result;
+        unsafe fn vec_find_any_ne_or_0_idx_cc(self, other: Other) -> (Self::Result, i32);
     }
 
     impl_vfae! { [idx_cc VectorFindAnyNeOrZeroIdxCC vec_find_any_ne_or_0_idx_cc] NeIdx vfaezbs vfaezhs vfaezfs }
-}
 
-/// Vector element-wise addition.
-#[inline]
-#[target_feature(enable = "vector")]
-#[unstable(feature = "stdarch_s390x", issue = "135681")]
-pub unsafe fn vec_add<T, U>(a: T, b: U) -> <T as sealed::VectorAdd<U>>::Result
-where
-    T: sealed::VectorAdd<U>,
-{
-    a.vec_add(b)
-}
+    #[inline]
+    #[target_feature(enable = "vector")]
+    #[cfg_attr(test, assert_instr(vl))]
+    unsafe fn test_vector_load(offset: isize, ptr: *const i32) -> vector_signed_int {
+        ptr.byte_offset(offset)
+            .cast::<vector_signed_int>()
+            .read_unaligned()
+    }
 
-/// Vector element-wise subtraction.
-#[inline]
-#[target_feature(enable = "vector")]
-#[unstable(feature = "stdarch_s390x", issue = "135681")]
-pub unsafe fn vec_sub<T, U>(a: T, b: U) -> <T as sealed::VectorSub<U>>::Result
-where
-    T: sealed::VectorSub<U>,
-{
-    a.vec_sub(b)
-}
+    #[inline]
+    #[target_feature(enable = "vector")]
+    #[cfg_attr(test, assert_instr(vst))]
+    unsafe fn test_vector_store(vector: vector_signed_int, offset: isize, ptr: *mut i32) {
+        ptr.byte_offset(offset)
+            .cast::<vector_signed_int>()
+            .write_unaligned(vector)
+    }
 
-/// Vector element-wise multiplication.
-///
-/// ## Purpose
-/// Compute the products of corresponding elements of two vectors.
-///
-/// ## Result value
-/// Each element of r receives the product of the corresponding elements of a and b.
-#[inline]
-#[target_feature(enable = "vector")]
-#[unstable(feature = "stdarch_s390x", issue = "135681")]
-pub unsafe fn vec_mul<T>(a: T, b: T) -> T
-where
-    T: sealed::VectorMul,
-{
-    a.vec_mul(b)
-}
+    #[unstable(feature = "stdarch_s390x", issue = "135681")]
+    pub trait VectorLoad: Sized {
+        type ElementType;
 
-/// Vector Count Leading Zeros
-#[inline]
-#[target_feature(enable = "vector")]
-#[unstable(feature = "stdarch_s390x", issue = "135681")]
-pub unsafe fn vec_cntlz<T>(a: T) -> <T as sealed::CountBits>::Result
-where
-    T: sealed::CountBits,
-{
-    a.vec_cntlz()
-}
+        #[inline]
+        #[target_feature(enable = "vector")]
+        unsafe fn vec_xl(offset: isize, ptr: *const Self::ElementType) -> Self {
+            ptr.byte_offset(offset).cast::<Self>().read_unaligned()
+        }
 
-/// Vector Count Trailing Zeros
-#[inline]
-#[target_feature(enable = "vector")]
-#[unstable(feature = "stdarch_s390x", issue = "135681")]
-pub unsafe fn vec_cnttz<T>(a: T) -> <T as sealed::CountBits>::Result
-where
-    T: sealed::CountBits,
-{
-    a.vec_cnttz()
-}
+        unsafe fn vec_load_len(ptr: *const Self::ElementType, byte_count: u32) -> Self;
 
-/// Vector Population Count
-///
-/// Computes the population count (number of set bits) in each element of the input.
-#[inline]
-#[target_feature(enable = "vector")]
-#[unstable(feature = "stdarch_s390x", issue = "135681")]
-pub unsafe fn vec_popcnt<T>(a: T) -> <T as sealed::CountBits>::Result
-where
-    T: sealed::CountBits,
-{
-    a.vec_popcnt()
-}
+        unsafe fn vec_load_bndry<const BLOCK_BOUNDARY: u16>(
+            ptr: *const Self::ElementType,
+        ) -> MaybeUninit<Self>;
+    }
 
-/// Vector element-wise maximum.
-#[inline]
-#[target_feature(enable = "vector")]
-#[unstable(feature = "stdarch_s390x", issue = "135681")]
-pub unsafe fn vec_max<T, U>(a: T, b: U) -> <T as sealed::VectorMax<U>>::Result
-where
-    T: sealed::VectorMax<U>,
-{
-    a.vec_max(b)
-}
+    #[unstable(feature = "stdarch_s390x", issue = "135681")]
+    pub trait VectorStore: Sized {
+        type ElementType;
 
-/// Vector element-wise minimum.
-#[inline]
-#[target_feature(enable = "vector")]
-#[unstable(feature = "stdarch_s390x", issue = "135681")]
-pub unsafe fn vec_min<T, U>(a: T, b: U) -> <T as sealed::VectorMin<U>>::Result
-where
-    T: sealed::VectorMin<U>,
-{
-    a.vec_min(b)
-}
+        #[inline]
+        #[target_feature(enable = "vector")]
+        unsafe fn vec_xst(self, offset: isize, ptr: *mut Self::ElementType) {
+            ptr.byte_offset(offset).cast::<Self>().write_unaligned(self)
+        }
 
-/// Vector abs.
-#[inline]
-#[target_feature(enable = "vector")]
-#[unstable(feature = "stdarch_s390x", issue = "135681")]
-pub unsafe fn vec_abs<T>(a: T) -> T
-where
-    T: sealed::VectorAbs,
-{
-    a.vec_abs()
-}
+        unsafe fn vec_store_len(self, ptr: *mut Self::ElementType, byte_count: u32);
+    }
 
-/// Vector negative abs.
-#[inline]
-#[target_feature(enable = "vector")]
-#[unstable(feature = "stdarch_s390x", issue = "135681")]
-pub unsafe fn vec_nabs<T>(a: T) -> T
-where
-    T: sealed::VectorNabs,
-{
-    a.vec_nabs()
-}
+    macro_rules! impl_load_store {
+        ($($ty:ident)*) => {
+            $(
+                #[unstable(feature = "stdarch_s390x", issue = "135681")]
+                impl VectorLoad for t_t_l!($ty) {
+                    type ElementType = $ty;
 
-/// Vector square root.
-#[inline]
-#[target_feature(enable = "vector")]
-#[unstable(feature = "stdarch_s390x", issue = "135681")]
-pub unsafe fn vec_sqrt<T>(a: T) -> T
-where
-    T: sealed::VectorSqrt,
-{
-    a.vec_sqrt()
-}
+                    #[inline]
+                    #[target_feature(enable = "vector")]
+                    unsafe fn vec_load_len(ptr: *const Self::ElementType, byte_count: u32) -> Self {
+                        transmute(vll( byte_count, ptr.cast(),))
+                    }
 
-/// Vector Splat
-#[inline]
+                    #[inline]
+                    #[target_feature(enable = "vector")]
+                    unsafe fn vec_load_bndry<const BLOCK_BOUNDARY: u16>(ptr: *const Self::ElementType) -> MaybeUninit<Self> {
+                        transmute(vlbb(ptr.cast(), const { validate_block_boundary(BLOCK_BOUNDARY) }))
+                    }
+
+                }
+
+                #[unstable(feature = "stdarch_s390x", issue = "135681")]
+                impl VectorStore for t_t_l!($ty) {
+                    type ElementType = $ty;
+
+                    #[inline]
+                    #[target_feature(enable = "vector")]
+                    unsafe fn vec_store_len(self, ptr: *mut Self::ElementType, byte_count: u32) {
+                        vstl(transmute(self), byte_count, ptr.cast())
+                    }
+                }
+            )*
+        }
+    }
+
+    impl_load_store! { i8 u8 i16 u16 i32 u32 i64 u64 f32 f64 }
+
+    #[inline]
+    #[target_feature(enable = "vector")]
+    #[cfg_attr(test, assert_instr(vll))]
+    unsafe fn test_vec_load_len(ptr: *const i32, byte_count: u32) -> vector_signed_int {
+        vector_signed_int::vec_load_len(ptr, byte_count)
+    }
+
+    #[inline]
+    #[target_feature(enable = "vector")]
+    #[cfg_attr(test, assert_instr("vlbb"))]
+    unsafe fn test_vec_load_bndry(ptr: *const i32) -> MaybeUninit<vector_signed_int> {
+        vector_signed_int::vec_load_bndry::<512>(ptr)
+    }
+
+    #[inline]
+    #[target_feature(enable = "vector")]
+    #[cfg_attr(test, assert_instr(vst))]
+    unsafe fn test_vec_store_len(vector: vector_signed_int, ptr: *mut i32, byte_count: u32) {
+        vector.vec_store_len(ptr, byte_count)
+    }
+
+    #[unstable(feature = "stdarch_s390x", issue = "135681")]
+    pub trait VectorLoadPair: Sized {
+        type ElementType;
+
+        unsafe fn vec_load_pair(a: Self::ElementType, b: Self::ElementType) -> Self;
+    }
+
+    #[unstable(feature = "stdarch_s390x", issue = "135681")]
+    impl VectorLoadPair for vector_signed_long_long {
+        type ElementType = i64;
+
+        #[inline]
+        #[target_feature(enable = "vector")]
+        unsafe fn vec_load_pair(a: i64, b: i64) -> Self {
+            vector_signed_long_long([a, b])
+        }
+    }
+
+    #[unstable(feature = "stdarch_s390x", issue = "135681")]
+    impl VectorLoadPair for vector_unsigned_long_long {
+        type ElementType = u64;
+
+        #[inline]
+        #[target_feature(enable = "vector")]
+        unsafe fn vec_load_pair(a: u64, b: u64) -> Self {
+            vector_unsigned_long_long([a, b])
+        }
+    }
+
+    #[inline]
+    #[target_feature(enable = "vector")]
+    unsafe fn pack<T, const N: usize>(a: T, b: T) -> T {
+        simd_shuffle(a, b, const { ShuffleMask::<N>::pack() })
+    }
+
+    #[inline]
+    #[target_feature(enable = "vector")]
+    #[cfg_attr(test, assert_instr(vpkh))]
+    unsafe fn vpkh(a: i16x8, b: i16x8) -> i8x16 {
+        let a: i8x16 = transmute(a);
+        let b: i8x16 = transmute(b);
+        simd_shuffle(a, b, const { ShuffleMask::<16>::pack() })
+    }
+    #[inline]
+    #[target_feature(enable = "vector")]
+    #[cfg_attr(test, assert_instr(vpkf))]
+    unsafe fn vpkf(a: i32x4, b: i32x4) -> i16x8 {
+        let a: i16x8 = transmute(a);
+        let b: i16x8 = transmute(b);
+        simd_shuffle(a, b, const { ShuffleMask::<8>::pack() })
+    }
+    #[inline]
+    #[target_feature(enable = "vector")]
+    #[cfg_attr(test, assert_instr(vpkg))]
+    unsafe fn vpkg(a: i64x2, b: i64x2) -> i32x4 {
+        let a: i32x4 = transmute(a);
+        let b: i32x4 = transmute(b);
+        simd_shuffle(a, b, const { ShuffleMask::<4>::pack() })
+    }
+
+    #[unstable(feature = "stdarch_s390x", issue = "135681")]
+    pub trait VectorPack<Other> {
+        type Result;
+        unsafe fn vec_pack(self, b: Other) -> Self::Result;
+    }
+
+    impl_vec_trait! { [VectorPack vec_pack]+ vpkh (vector_signed_short, vector_signed_short) -> vector_signed_char }
+    impl_vec_trait! { [VectorPack vec_pack]+ vpkh (vector_unsigned_short, vector_unsigned_short) -> vector_unsigned_char }
+    impl_vec_trait! { [VectorPack vec_pack]+ vpkh (vector_bool_short, vector_bool_short) -> vector_bool_char }
+    impl_vec_trait! { [VectorPack vec_pack]+ vpkf (vector_signed_int, vector_signed_int) -> vector_signed_short }
+    impl_vec_trait! { [VectorPack vec_pack]+ vpkf (vector_unsigned_int, vector_unsigned_int) -> vector_unsigned_short }
+    impl_vec_trait! { [VectorPack vec_pack]+ vpkf (vector_bool_int, vector_bool_int) -> vector_bool_short }
+    impl_vec_trait! { [VectorPack vec_pack]+ vpkg (vector_signed_long_long, vector_signed_long_long) -> vector_signed_int }
+    impl_vec_trait! { [VectorPack vec_pack]+ vpkg (vector_unsigned_long_long, vector_unsigned_long_long) -> vector_unsigned_int }
+    impl_vec_trait! { [VectorPack vec_pack]+ vpkg (vector_bool_long_long, vector_bool_long_long) -> vector_bool_int }
+
+    #[unstable(feature = "stdarch_powerpc", issue = "111145")]
+    pub trait VectorPacks<Other> {
+        type Result;
+        unsafe fn vec_packs(self, b: Other) -> Self::Result;
+    }
+
+    impl_vec_trait! { [VectorPacks vec_packs] vpksh (vector_signed_short, vector_signed_short) -> vector_signed_char }
+    impl_vec_trait! { [VectorPacks vec_packs] vpklsh (vector_unsigned_short, vector_unsigned_short) -> vector_unsigned_char }
+    impl_vec_trait! { [VectorPacks vec_packs] vpksf (vector_signed_int, vector_signed_int) -> vector_signed_short }
+    impl_vec_trait! { [VectorPacks vec_packs] vpklsf (vector_unsigned_int, vector_unsigned_int) -> vector_unsigned_short }
+    impl_vec_trait! { [VectorPacks vec_packs] vpksg (vector_signed_long_long, vector_signed_long_long) -> vector_signed_int }
+    impl_vec_trait! { [VectorPacks vec_packs] vpklsg (vector_unsigned_long_long, vector_unsigned_long_long) -> vector_unsigned_int }
+
+    #[unstable(feature = "stdarch_powerpc", issue = "111145")]
+    pub trait VectorPacksu<Other> {
+        type Result;
+        unsafe fn vec_packsu(self, b: Other) -> Self::Result;
+    }
+
+    unsafe fn simd_smax<T: Copy>(a: T, b: T) -> T {
+        simd_select::<T, T>(simd_gt::<T, T>(a, b), a, b)
+    }
+
+    #[inline]
+    #[target_feature(enable = "vector")]
+    #[cfg_attr(test, assert_instr(vpklsh))]
+    unsafe fn vpacksuh(a: vector_signed_short, b: vector_signed_short) -> vector_unsigned_char {
+        vpklsh(
+            simd_smax(a, vector_signed_short([0; 8])),
+            simd_smax(b, vector_signed_short([0; 8])),
+        )
+    }
+    #[inline]
+    #[target_feature(enable = "vector")]
+    #[cfg_attr(test, assert_instr(vpklsf))]
+    unsafe fn vpacksuf(a: vector_signed_int, b: vector_signed_int) -> vector_unsigned_short {
+        vpklsf(
+            simd_smax(a, vector_signed_int([0; 4])),
+            simd_smax(b, vector_signed_int([0; 4])),
+        )
+    }
+    #[inline]
+    #[target_feature(enable = "vector")]
+    #[cfg_attr(test, assert_instr(vpklsg))]
+    unsafe fn vpacksug(
+        a: vector_signed_long_long,
+        b: vector_signed_long_long,
+    ) -> vector_unsigned_int {
+        vpklsg(
+            simd_smax(a, vector_signed_long_long([0; 2])),
+            simd_smax(b, vector_signed_long_long([0; 2])),
+        )
+    }
+
+    impl_vec_trait! { [VectorPacksu vec_packsu] vpacksuh (vector_signed_short, vector_signed_short) -> vector_unsigned_char }
+    impl_vec_trait! { [VectorPacksu vec_packsu] vpklsh (vector_unsigned_short, vector_unsigned_short) -> vector_unsigned_char }
+    impl_vec_trait! { [VectorPacksu vec_packsu] vpacksuf (vector_signed_int, vector_signed_int) -> vector_unsigned_short }
+    impl_vec_trait! { [VectorPacksu vec_packsu] vpklsf (vector_unsigned_int, vector_unsigned_int) -> vector_unsigned_short }
+    impl_vec_trait! { [VectorPacksu vec_packsu] vpacksug (vector_signed_long_long, vector_signed_long_long) -> vector_unsigned_int }
+    impl_vec_trait! { [VectorPacksu vec_packsu] vpklsg (vector_unsigned_long_long, vector_unsigned_long_long) -> vector_unsigned_int }
+
+    macro_rules! impl_vector_packs_cc {
+        ($($intr:ident $ty:ident $outty:ident)*) => {
+            $(
+                #[inline]
+                #[target_feature(enable = "vector")]
+                #[cfg_attr(test, assert_instr($intr))]
+                unsafe fn $intr(
+                    a: $ty,
+                    b: $ty,
+                ) -> ($outty, i32) {
+                    let PackedTuple { x, y } = super::$intr(a, b);
+                    (x, y)
+                }
+
+                #[unstable(feature = "stdarch_s390x", issue = "135681")]
+                impl VectorPacksCC for $ty {
+                    type Result = $outty;
+
+                    #[inline]
+                    #[target_feature(enable = "vector")]
+                    unsafe fn vec_packs_cc(self, b: Self) -> (Self::Result, i32) {
+                        $intr(self, b)
+                    }
+                }
+            )*
+        }
+    }
+
+    #[unstable(feature = "stdarch_powerpc", issue = "111145")]
+    pub trait VectorPacksCC {
+        type Result;
+        unsafe fn vec_packs_cc(self, b: Self) -> (Self::Result, i32);
+    }
+
+    impl_vector_packs_cc! {
+        vpkshs vector_signed_short vector_signed_char
+        vpklshs vector_unsigned_short vector_unsigned_char
+        vpksfs vector_signed_int vector_signed_short
+        vpklsfs vector_unsigned_int vector_unsigned_short
+        vpksgs vector_signed_long_long vector_signed_int
+        vpklsgs vector_unsigned_long_long vector_unsigned_int
+    }
+
+    macro_rules! impl_vector_packsu_cc {
+        ($($intr:ident $ty:ident $outty:ident)*) => {
+            $(
+                #[unstable(feature = "stdarch_s390x", issue = "135681")]
+                impl VectorPacksuCC for $ty {
+                    type Result = $outty;
+
+                    #[inline]
+                    #[target_feature(enable = "vector")]
+                    unsafe fn vec_packsu_cc(self, b: Self) -> (Self::Result, i32) {
+                        $intr(self, b)
+                    }
+                }
+            )*
+        }
+    }
+
+    #[unstable(feature = "stdarch_powerpc", issue = "111145")]
+    pub trait VectorPacksuCC {
+        type Result;
+        unsafe fn vec_packsu_cc(self, b: Self) -> (Self::Result, i32);
+    }
+
+    impl_vector_packsu_cc! {
+        vpklshs vector_unsigned_short vector_unsigned_char
+        vpklsfs vector_unsigned_int vector_unsigned_short
+        vpklsgs vector_unsigned_long_long vector_unsigned_int
+    }
+
+    #[unstable(feature = "stdarch_powerpc", issue = "111145")]
+    pub trait VectorMadd {
+        unsafe fn vec_madd(self, b: Self, c: Self) -> Self;
+        unsafe fn vec_msub(self, b: Self, c: Self) -> Self;
+    }
+
+    test_impl! { vfmasb (a: vector_float, b: vector_float, c: vector_float) -> vector_float [simd_fma, "vector-enhancements-1" vfmasb] }
+    test_impl! { vfmadb (a: vector_double, b: vector_double, c: vector_double) -> vector_double [simd_fma, vfmadb] }
+
+    #[inline]
+    unsafe fn simd_fms<T>(a: T, b: T, c: T) -> T {
+        simd_fma(a, b, simd_neg(c))
+    }
+
+    test_impl! { vfmssb (a: vector_float, b: vector_float, c: vector_float) -> vector_float [simd_fms, "vector-enhancements-1" vfmssb] }
+    test_impl! { vfmsdb (a: vector_double, b: vector_double, c: vector_double) -> vector_double [simd_fms, vfmsdb] }
+
+    #[unstable(feature = "stdarch_s390x", issue = "135681")]
+    impl VectorMadd for vector_float {
+        #[inline]
+        #[target_feature(enable = "vector")]
+        unsafe fn vec_madd(self, b: Self, c: Self) -> Self {
+            vfmasb(self, b, c)
+        }
+
+        #[inline]
+        #[target_feature(enable = "vector")]
+        unsafe fn vec_msub(self, b: Self, c: Self) -> Self {
+            vfmssb(self, b, c)
+        }
+    }
+
+    #[unstable(feature = "stdarch_s390x", issue = "135681")]
+    impl VectorMadd for vector_double {
+        #[inline]
+        #[target_feature(enable = "vector")]
+        unsafe fn vec_madd(self, b: Self, c: Self) -> Self {
+            vfmadb(self, b, c)
+        }
+
+        #[inline]
+        #[target_feature(enable = "vector")]
+        unsafe fn vec_msub(self, b: Self, c: Self) -> Self {
+            vfmsdb(self, b, c)
+        }
+    }
+
+    macro_rules! impl_vec_unpack {
+        ($mask:ident $instr:ident $src:ident $shuffled:ident $dst:ident $width:literal) => {
+            #[inline]
+            #[target_feature(enable = "vector")]
+            #[cfg_attr(test, assert_instr($instr))]
+            unsafe fn $instr(a: $src) -> $dst {
+                simd_as(simd_shuffle::<_, _, $shuffled>(
+                    a,
+                    a,
+                    const { ShuffleMask::<$width>::$mask() },
+                ))
+            }
+        };
+    }
+
+    #[unstable(feature = "stdarch_s390x", issue = "135681")]
+    pub trait VectorUnpackh {
+        type Result;
+        unsafe fn vec_unpackh(self) -> Self::Result;
+    }
+
+    impl_vec_unpack!(unpack_high vuphb vector_signed_char i8x8 vector_signed_short 8);
+    impl_vec_unpack!(unpack_high vuphh vector_signed_short i16x4 vector_signed_int 4);
+    impl_vec_unpack!(unpack_high vuphf vector_signed_int i32x2 vector_signed_long_long 2);
+
+    impl_vec_unpack!(unpack_high vuplhb vector_unsigned_char u8x8 vector_unsigned_short 8);
+    impl_vec_unpack!(unpack_high vuplhh vector_unsigned_short u16x4 vector_unsigned_int 4);
+    impl_vec_unpack!(unpack_high vuplhf vector_unsigned_int u32x2 vector_unsigned_long_long 2);
+
+    impl_vec_trait! {[VectorUnpackh vec_unpackh] vuphb (vector_signed_char) -> vector_signed_short}
+    impl_vec_trait! {[VectorUnpackh vec_unpackh] vuphh (vector_signed_short) -> vector_signed_int}
+    impl_vec_trait! {[VectorUnpackh vec_unpackh] vuphf (vector_signed_int) -> vector_signed_long_long}
+
+    impl_vec_trait! {[VectorUnpackh vec_unpackh] vuplhb (vector_unsigned_char) -> vector_unsigned_short}
+    impl_vec_trait! {[VectorUnpackh vec_unpackh] vuplhh (vector_unsigned_short) -> vector_unsigned_int}
+    impl_vec_trait! {[VectorUnpackh vec_unpackh] vuplhf (vector_unsigned_int) -> vector_unsigned_long_long}
+
+    impl_vec_trait! {[VectorUnpackh vec_unpackh]+ vuplhb (vector_bool_char) -> vector_bool_short}
+    impl_vec_trait! {[VectorUnpackh vec_unpackh]+ vuplhh (vector_bool_short) -> vector_bool_int}
+    impl_vec_trait! {[VectorUnpackh vec_unpackh]+ vuplhf (vector_bool_int) -> vector_bool_long_long}
+
+    #[unstable(feature = "stdarch_s390x", issue = "135681")]
+    pub trait VectorUnpackl {
+        type Result;
+        unsafe fn vec_unpackl(self) -> Self::Result;
+    }
+
+    // FIXME(llvm): a shuffle + simd_as does not currently optimize into a single instruction like
+    // unpachk above. Tracked in https://github.com/llvm/llvm-project/issues/129576.
+
+    impl_vec_trait! {[VectorUnpackl vec_unpackl] vuplbw (vector_signed_char) -> vector_signed_short}
+    impl_vec_trait! {[VectorUnpackl vec_unpackl] vuplhw (vector_signed_short) -> vector_signed_int}
+    impl_vec_trait! {[VectorUnpackl vec_unpackl] vuplfw (vector_signed_int) -> vector_signed_long_long}
+
+    impl_vec_trait! {[VectorUnpackl vec_unpackl] vupllb (vector_unsigned_char) -> vector_unsigned_short}
+    impl_vec_trait! {[VectorUnpackl vec_unpackl] vupllh (vector_unsigned_short) -> vector_unsigned_int}
+    impl_vec_trait! {[VectorUnpackl vec_unpackl] vupllf (vector_unsigned_int) -> vector_unsigned_long_long}
+
+    impl_vec_trait! {[VectorUnpackl vec_unpackl]+ vupllb (vector_bool_char) -> vector_bool_short}
+    impl_vec_trait! {[VectorUnpackl vec_unpackl]+ vupllh (vector_bool_short) -> vector_bool_int}
+    impl_vec_trait! {[VectorUnpackl vec_unpackl]+ vupllf (vector_bool_int) -> vector_bool_long_long}
+
+    test_impl! { vec_vavgb(a: vector_signed_char, b: vector_signed_char) -> vector_signed_char [ vavgb, vavgb ] }
+    test_impl! { vec_vavgh(a: vector_signed_short, b: vector_signed_short) -> vector_signed_short [ vavgh, vavgh ] }
+    test_impl! { vec_vavgf(a: vector_signed_int, b: vector_signed_int) -> vector_signed_int [ vavgf, vavgf ] }
+    test_impl! { vec_vavgg(a: vector_signed_long_long, b: vector_signed_long_long) -> vector_signed_long_long [ vavgg, vavgg ] }
+
+    test_impl! { vec_vavglb(a: vector_unsigned_char, b: vector_unsigned_char) -> vector_unsigned_char [ vavglb, vavglb ] }
+    test_impl! { vec_vavglh(a: vector_unsigned_short, b: vector_unsigned_short) -> vector_unsigned_short [ vavglh, vavglh ] }
+    test_impl! { vec_vavglf(a: vector_unsigned_int, b: vector_unsigned_int) -> vector_unsigned_int [ vavglf, vavglf ] }
+    test_impl! { vec_vavglg(a: vector_unsigned_long_long, b: vector_unsigned_long_long) -> vector_unsigned_long_long [ vavglg, vavglg ] }
+
+    #[unstable(feature = "stdarch_s390x", issue = "135681")]
+    pub trait VectorAvg<Other> {
+        type Result;
+        unsafe fn vec_avg(self, b: Other) -> Self::Result;
+    }
+
+    impl_vec_trait! { [VectorAvg vec_avg] 2 (vec_vavglb, vec_vavgb, vec_vavglh, vec_vavgh, vec_vavglf, vec_vavgf, vec_vavglg, vec_vavgg) }
+
+    macro_rules! impl_mul {
+        ([$Trait:ident $m:ident] $fun:ident ($a:ty, $b:ty) -> $r:ty) => {
+            #[unstable(feature = "stdarch_s390x", issue = "135681")]
+            impl $Trait<$r> for $a {
+                #[inline]
+                #[target_feature(enable = "vector")]
+                unsafe fn $m(self, b: $b) -> $r {
+                    $fun(transmute(self), transmute(b))
+                }
+            }
+        };
+        ([$Trait:ident $m:ident] $fun:ident ($a:ty, $b:ty, $c:ty) -> $r:ty) => {
+            #[unstable(feature = "stdarch_s390x", issue = "135681")]
+            impl $Trait for $a {
+                type Result = $r;
+                #[inline]
+                #[target_feature(enable = "vector")]
+                unsafe fn $m(self, b: $b, c: $c) -> $r {
+                    $fun(self, b, c)
+                }
+            }
+        };
+    }
+
+    #[unstable(feature = "stdarch_s390x", issue = "135681")]
+    pub trait VectorMule<Result> {
+        unsafe fn vec_mule(self, b: Self) -> Result;
+    }
+
+    // FIXME(llvm) sadly this does not yet work https://github.com/llvm/llvm-project/issues/129705
+    //    #[target_feature(enable = "vector")]
+    //    #[cfg_attr(test, assert_instr(vmleh))]
+    //    unsafe fn vec_vmleh(a: vector_unsigned_short, b: vector_unsigned_short) -> vector_unsigned_int {
+    //        let even_a: vector_unsigned_int = simd_as(simd_shuffle::<_, _, u16x4>(
+    //            a,
+    //            a,
+    //            const { ShuffleMask([0, 2, 4, 6]) },
+    //        ));
+    //
+    //        let even_b: vector_unsigned_int = simd_as(simd_shuffle::<_, _, u16x4>(
+    //            b,
+    //            b,
+    //            const { ShuffleMask([0, 2, 4, 6]) },
+    //        ));
+    //
+    //        simd_mul(even_a, even_b)
+    //    }
+
+    test_impl! { vec_vmeb(a: vector_signed_char, b: vector_signed_char) -> vector_signed_short [ vmeb, vmeb ] }
+    test_impl! { vec_vmeh(a: vector_signed_short, b: vector_signed_short) -> vector_signed_int[ vmeh, vmeh ] }
+    test_impl! { vec_vmef(a: vector_signed_int, b: vector_signed_int) -> vector_signed_long_long [ vmef, vmef ] }
+
+    test_impl! { vec_vmleb(a: vector_unsigned_char, b: vector_unsigned_char) -> vector_unsigned_short [ vmleb, vmleb ] }
+    test_impl! { vec_vmleh(a: vector_unsigned_short, b: vector_unsigned_short) -> vector_unsigned_int[ vmleh, vmleh ] }
+    test_impl! { vec_vmlef(a: vector_unsigned_int, b: vector_unsigned_int) -> vector_unsigned_long_long [ vmlef, vmlef ] }
+
+    impl_mul!([VectorMule vec_mule] vec_vmeb (vector_signed_char, vector_signed_char) -> vector_signed_short );
+    impl_mul!([VectorMule vec_mule] vec_vmeh (vector_signed_short, vector_signed_short) -> vector_signed_int);
+    impl_mul!([VectorMule vec_mule] vec_vmef (vector_signed_int, vector_signed_int) -> vector_signed_long_long );
+
+    impl_mul!([VectorMule vec_mule] vec_vmleb (vector_unsigned_char, vector_unsigned_char) -> vector_unsigned_short );
+    impl_mul!([VectorMule vec_mule] vec_vmleh (vector_unsigned_short, vector_unsigned_short) -> vector_unsigned_int);
+    impl_mul!([VectorMule vec_mule] vec_vmlef (vector_unsigned_int, vector_unsigned_int) -> vector_unsigned_long_long );
+
+    #[unstable(feature = "stdarch_s390x", issue = "135681")]
+    pub trait VectorMulo<Result> {
+        unsafe fn vec_mulo(self, b: Self) -> Result;
+    }
+
+    test_impl! { vec_vmob(a: vector_signed_char, b: vector_signed_char) -> vector_signed_short [ vmob, vmob ] }
+    test_impl! { vec_vmoh(a: vector_signed_short, b: vector_signed_short) -> vector_signed_int[ vmoh, vmoh ] }
+    test_impl! { vec_vmof(a: vector_signed_int, b: vector_signed_int) -> vector_signed_long_long [ vmof, vmof ] }
+
+    test_impl! { vec_vmlob(a: vector_unsigned_char, b: vector_unsigned_char) -> vector_unsigned_short [ vmlob, vmlob ] }
+    test_impl! { vec_vmloh(a: vector_unsigned_short, b: vector_unsigned_short) -> vector_unsigned_int[ vmloh, vmloh ] }
+    test_impl! { vec_vmlof(a: vector_unsigned_int, b: vector_unsigned_int) -> vector_unsigned_long_long [ vmlof, vmlof ] }
+
+    impl_mul!([VectorMulo vec_mulo] vec_vmob (vector_signed_char, vector_signed_char) -> vector_signed_short );
+    impl_mul!([VectorMulo vec_mulo] vec_vmoh (vector_signed_short, vector_signed_short) -> vector_signed_int);
+    impl_mul!([VectorMulo vec_mulo] vec_vmof (vector_signed_int, vector_signed_int) -> vector_signed_long_long );
+
+    impl_mul!([VectorMulo vec_mulo] vec_vmlob (vector_unsigned_char, vector_unsigned_char) -> vector_unsigned_short );
+    impl_mul!([VectorMulo vec_mulo] vec_vmloh (vector_unsigned_short, vector_unsigned_short) -> vector_unsigned_int);
+    impl_mul!([VectorMulo vec_mulo] vec_vmlof (vector_unsigned_int, vector_unsigned_int) -> vector_unsigned_long_long );
+
+    #[unstable(feature = "stdarch_s390x", issue = "135681")]
+    pub trait VectorMulh<Result> {
+        unsafe fn vec_mulh(self, b: Self) -> Result;
+    }
+
+    test_impl! { vec_vmhb(a: vector_signed_char, b: vector_signed_char) -> vector_signed_char [ vmhb, vmhb ] }
+    test_impl! { vec_vmhh(a: vector_signed_short, b: vector_signed_short) -> vector_signed_short [ vmhh, vmhh ] }
+    test_impl! { vec_vmhf(a: vector_signed_int, b: vector_signed_int) -> vector_signed_int [ vmhf, vmhf ] }
+
+    test_impl! { vec_vmlhb(a: vector_unsigned_char, b: vector_unsigned_char) -> vector_unsigned_char [ vmlhb, vmlhb ] }
+    test_impl! { vec_vmlhh(a: vector_unsigned_short, b: vector_unsigned_short) -> vector_unsigned_short [ vmlhh, vmlhh ] }
+    test_impl! { vec_vmlhf(a: vector_unsigned_int, b: vector_unsigned_int) -> vector_unsigned_int [ vmlhf, vmlhf ] }
+
+    impl_mul!([VectorMulh vec_mulh] vec_vmhb (vector_signed_char, vector_signed_char) -> vector_signed_char);
+    impl_mul!([VectorMulh vec_mulh] vec_vmhh (vector_signed_short, vector_signed_short) -> vector_signed_short);
+    impl_mul!([VectorMulh vec_mulh] vec_vmhf (vector_signed_int, vector_signed_int) -> vector_signed_int);
+
+    impl_mul!([VectorMulh vec_mulh] vec_vmlhb (vector_unsigned_char, vector_unsigned_char) -> vector_unsigned_char);
+    impl_mul!([VectorMulh vec_mulh] vec_vmlhh (vector_unsigned_short, vector_unsigned_short) -> vector_unsigned_short);
+    impl_mul!([VectorMulh vec_mulh] vec_vmlhf (vector_unsigned_int, vector_unsigned_int) -> vector_unsigned_int);
+
+    #[unstable(feature = "stdarch_s390x", issue = "135681")]
+    pub trait VectorMeadd {
+        type Result;
+        unsafe fn vec_meadd(self, b: Self, c: Self::Result) -> Self::Result;
+    }
+
+    test_impl! { vec_vmaeb(a: vector_signed_char, b: vector_signed_char, c: vector_signed_short) -> vector_signed_short [ vmaeb, vmaeb ] }
+    test_impl! { vec_vmaeh(a: vector_signed_short, b: vector_signed_short, c: vector_signed_int) -> vector_signed_int[ vmaeh, vmaeh ] }
+    test_impl! { vec_vmaef(a: vector_signed_int, b: vector_signed_int, c: vector_signed_long_long) -> vector_signed_long_long [ vmaef, vmaef ] }
+
+    test_impl! { vec_vmaleb(a: vector_unsigned_char, b: vector_unsigned_char, c: vector_unsigned_short) -> vector_unsigned_short [ vmaleb, vmaleb ] }
+    test_impl! { vec_vmaleh(a: vector_unsigned_short, b: vector_unsigned_short, c: vector_unsigned_int) -> vector_unsigned_int[ vmaleh, vmaleh ] }
+    test_impl! { vec_vmalef(a: vector_unsigned_int, b: vector_unsigned_int, c: vector_unsigned_long_long) -> vector_unsigned_long_long [ vmalef, vmalef ] }
+
+    impl_mul!([VectorMeadd vec_meadd] vec_vmaeb (vector_signed_char, vector_signed_char, vector_signed_short) -> vector_signed_short );
+    impl_mul!([VectorMeadd vec_meadd] vec_vmaeh (vector_signed_short, vector_signed_short, vector_signed_int) -> vector_signed_int);
+    impl_mul!([VectorMeadd vec_meadd] vec_vmaef (vector_signed_int, vector_signed_int, vector_signed_long_long) -> vector_signed_long_long );
+
+    impl_mul!([VectorMeadd vec_meadd] vec_vmaleb (vector_unsigned_char, vector_unsigned_char, vector_unsigned_short) -> vector_unsigned_short );
+    impl_mul!([VectorMeadd vec_meadd] vec_vmaleh (vector_unsigned_short, vector_unsigned_short, vector_unsigned_int) -> vector_unsigned_int);
+    impl_mul!([VectorMeadd vec_meadd] vec_vmalef (vector_unsigned_int, vector_unsigned_int, vector_unsigned_long_long) -> vector_unsigned_long_long );
+
+    #[unstable(feature = "stdarch_s390x", issue = "135681")]
+    pub trait VectorMoadd {
+        type Result;
+        unsafe fn vec_moadd(self, b: Self, c: Self::Result) -> Self::Result;
+    }
+
+    test_impl! { vec_vmaob(a: vector_signed_char, b: vector_signed_char, c: vector_signed_short) -> vector_signed_short [ vmaob, vmaob ] }
+    test_impl! { vec_vmaoh(a: vector_signed_short, b: vector_signed_short, c: vector_signed_int) -> vector_signed_int[ vmaoh, vmaoh ] }
+    test_impl! { vec_vmaof(a: vector_signed_int, b: vector_signed_int, c: vector_signed_long_long) -> vector_signed_long_long [ vmaof, vmaof ] }
+
+    test_impl! { vec_vmalob(a: vector_unsigned_char, b: vector_unsigned_char, c: vector_unsigned_short) -> vector_unsigned_short [ vmalob, vmalob ] }
+    test_impl! { vec_vmaloh(a: vector_unsigned_short, b: vector_unsigned_short, c: vector_unsigned_int) -> vector_unsigned_int[ vmaloh, vmaloh ] }
+    test_impl! { vec_vmalof(a: vector_unsigned_int, b: vector_unsigned_int, c: vector_unsigned_long_long) -> vector_unsigned_long_long [ vmalof, vmalof ] }
+
+    impl_mul!([VectorMoadd vec_moadd] vec_vmaob (vector_signed_char, vector_signed_char, vector_signed_short) -> vector_signed_short );
+    impl_mul!([VectorMoadd vec_moadd] vec_vmaoh (vector_signed_short, vector_signed_short, vector_signed_int) -> vector_signed_int);
+    impl_mul!([VectorMoadd vec_moadd] vec_vmaof (vector_signed_int, vector_signed_int, vector_signed_long_long) -> vector_signed_long_long );
+
+    impl_mul!([VectorMoadd vec_moadd] vec_vmalob (vector_unsigned_char, vector_unsigned_char, vector_unsigned_short) -> vector_unsigned_short );
+    impl_mul!([VectorMoadd vec_moadd] vec_vmaloh (vector_unsigned_short, vector_unsigned_short, vector_unsigned_int) -> vector_unsigned_int);
+    impl_mul!([VectorMoadd vec_moadd] vec_vmalof (vector_unsigned_int, vector_unsigned_int, vector_unsigned_long_long) -> vector_unsigned_long_long );
+
+    #[unstable(feature = "stdarch_s390x", issue = "135681")]
+    pub trait VectorMhadd {
+        type Result;
+        unsafe fn vec_mhadd(self, b: Self, c: Self::Result) -> Self::Result;
+    }
+
+    test_impl! { vec_vmahb(a: vector_signed_char, b: vector_signed_char, c: vector_signed_char) -> vector_signed_char [ vmahb, vmahb ] }
+    test_impl! { vec_vmahh(a: vector_signed_short, b: vector_signed_short, c: vector_signed_short) -> vector_signed_short[ vmahh, vmahh ] }
+    test_impl! { vec_vmahf(a: vector_signed_int, b: vector_signed_int, c: vector_signed_int) -> vector_signed_int [ vmahf, vmahf ] }
+
+    test_impl! { vec_vmalhb(a: vector_unsigned_char, b: vector_unsigned_char, c: vector_unsigned_char) -> vector_unsigned_char [ vmalhb, vmalhb ] }
+    test_impl! { vec_vmalhh(a: vector_unsigned_short, b: vector_unsigned_short, c: vector_unsigned_short) -> vector_unsigned_short[ vmalhh, vmalhh ] }
+    test_impl! { vec_vmalhf(a: vector_unsigned_int, b: vector_unsigned_int, c: vector_unsigned_int) -> vector_unsigned_int [ vmalhf, vmalhf ] }
+
+    impl_mul!([VectorMhadd vec_mhadd] vec_vmahb (vector_signed_char, vector_signed_char, vector_signed_char) -> vector_signed_char );
+    impl_mul!([VectorMhadd vec_mhadd] vec_vmahh (vector_signed_short, vector_signed_short, vector_signed_short) -> vector_signed_short);
+    impl_mul!([VectorMhadd vec_mhadd] vec_vmahf (vector_signed_int, vector_signed_int, vector_signed_int) -> vector_signed_int );
+
+    impl_mul!([VectorMhadd vec_mhadd] vec_vmalhb (vector_unsigned_char, vector_unsigned_char, vector_unsigned_char) -> vector_unsigned_char );
+    impl_mul!([VectorMhadd vec_mhadd] vec_vmalhh (vector_unsigned_short, vector_unsigned_short, vector_unsigned_short) -> vector_unsigned_short);
+    impl_mul!([VectorMhadd vec_mhadd] vec_vmalhf (vector_unsigned_int, vector_unsigned_int, vector_unsigned_int) -> vector_unsigned_int );
+
+    #[unstable(feature = "stdarch_s390x", issue = "135681")]
+    pub trait VectorMladd {
+        type Result;
+        unsafe fn vec_mladd(self, b: Self, c: Self::Result) -> Self::Result;
+    }
+
+    #[inline]
+    #[target_feature(enable = "vector")]
+    unsafe fn simd_mladd<T>(a: T, b: T, c: T) -> T {
+        simd_add(simd_mul(a, b), c)
+    }
+
+    test_impl! { vec_vmal_ib(a: vector_signed_char, b: vector_signed_char, c: vector_signed_char) -> vector_signed_char [simd_mladd, vmalb ] }
+    test_impl! { vec_vmal_ih(a: vector_signed_short, b: vector_signed_short, c: vector_signed_short) -> vector_signed_short[simd_mladd, vmalh ] }
+    test_impl! { vec_vmal_if(a: vector_signed_int, b: vector_signed_int, c: vector_signed_int) -> vector_signed_int [simd_mladd, vmalf ] }
+
+    test_impl! { vec_vmal_ub(a: vector_unsigned_char, b: vector_unsigned_char, c: vector_unsigned_char) -> vector_unsigned_char [simd_mladd, vmalb ] }
+    test_impl! { vec_vmal_uh(a: vector_unsigned_short, b: vector_unsigned_short, c: vector_unsigned_short) -> vector_unsigned_short[simd_mladd, vmalh ] }
+    test_impl! { vec_vmal_uf(a: vector_unsigned_int, b: vector_unsigned_int, c: vector_unsigned_int) -> vector_unsigned_int [simd_mladd, vmalf ] }
+
+    impl_mul!([VectorMladd vec_mladd] vec_vmal_ib (vector_signed_char, vector_signed_char, vector_signed_char) -> vector_signed_char );
+    impl_mul!([VectorMladd vec_mladd] vec_vmal_ih (vector_signed_short, vector_signed_short, vector_signed_short) -> vector_signed_short);
+    impl_mul!([VectorMladd vec_mladd] vec_vmal_if (vector_signed_int, vector_signed_int, vector_signed_int) -> vector_signed_int );
+
+    impl_mul!([VectorMladd vec_mladd] vec_vmal_ub (vector_unsigned_char, vector_unsigned_char, vector_unsigned_char) -> vector_unsigned_char );
+    impl_mul!([VectorMladd vec_mladd] vec_vmal_uh (vector_unsigned_short, vector_unsigned_short, vector_unsigned_short) -> vector_unsigned_short);
+    impl_mul!([VectorMladd vec_mladd] vec_vmal_uf (vector_unsigned_int, vector_unsigned_int, vector_unsigned_int) -> vector_unsigned_int );
+
+    #[unstable(feature = "stdarch_s390x", issue = "135681")]
+    pub trait VectorGfmsum<Result> {
+        unsafe fn vec_gfmsum(self, b: Self) -> Result;
+    }
+
+    test_impl! { vec_vgfmb(a: vector_unsigned_char, b: vector_unsigned_char) -> vector_unsigned_short [ vgfmb, vgfmb ] }
+    test_impl! { vec_vgfmh(a: vector_unsigned_short, b: vector_unsigned_short) -> vector_unsigned_int[ vgfmh, vgfmh] }
+    test_impl! { vec_vgfmf(a: vector_unsigned_int, b: vector_unsigned_int) -> vector_unsigned_long_long [ vgfmf, vgfmf ] }
+
+    impl_mul!([VectorGfmsum vec_gfmsum] vec_vgfmb (vector_unsigned_char, vector_unsigned_char) -> vector_unsigned_short );
+    impl_mul!([VectorGfmsum vec_gfmsum] vec_vgfmh (vector_unsigned_short, vector_unsigned_short) -> vector_unsigned_int);
+    impl_mul!([VectorGfmsum vec_gfmsum] vec_vgfmf (vector_unsigned_int, vector_unsigned_int) -> vector_unsigned_long_long );
+
+    #[unstable(feature = "stdarch_s390x", issue = "135681")]
+    pub trait VectorGfmsumAccum {
+        type Result;
+        unsafe fn vec_gfmsum_accum(self, b: Self, c: Self::Result) -> Self::Result;
+    }
+
+    test_impl! { vec_vgfmab(a: vector_unsigned_char, b: vector_unsigned_char, c: vector_unsigned_short) -> vector_unsigned_short [ vgfmab, vgfmab ] }
+    test_impl! { vec_vgfmah(a: vector_unsigned_short, b: vector_unsigned_short, c: vector_unsigned_int) -> vector_unsigned_int[ vgfmah, vgfmah] }
+    test_impl! { vec_vgfmaf(a: vector_unsigned_int, b: vector_unsigned_int, c: vector_unsigned_long_long) -> vector_unsigned_long_long [ vgfmaf, vgfmaf ] }
+
+    #[unstable(feature = "stdarch_s390x", issue = "135681")]
+    impl VectorGfmsumAccum for vector_unsigned_char {
+        type Result = vector_unsigned_short;
+        #[inline]
+        #[target_feature(enable = "vector")]
+        unsafe fn vec_gfmsum_accum(self, b: Self, c: Self::Result) -> Self::Result {
+            vec_vgfmab(self, b, c)
+        }
+    }
+    #[unstable(feature = "stdarch_s390x", issue = "135681")]
+    impl VectorGfmsumAccum for vector_unsigned_short {
+        type Result = vector_unsigned_int;
+        #[inline]
+        #[target_feature(enable = "vector")]
+        unsafe fn vec_gfmsum_accum(self, b: Self, c: Self::Result) -> Self::Result {
+            vec_vgfmah(self, b, c)
+        }
+    }
+    #[unstable(feature = "stdarch_s390x", issue = "135681")]
+    impl VectorGfmsumAccum for vector_unsigned_int {
+        type Result = vector_unsigned_long_long;
+        #[inline]
+        #[target_feature(enable = "vector")]
+        unsafe fn vec_gfmsum_accum(self, b: Self, c: Self::Result) -> Self::Result {
+            vec_vgfmaf(self, b, c)
+        }
+    }
+
+    #[inline]
+    #[target_feature(enable = "vector")]
+    #[cfg_attr(test, assert_instr(vgef, D = 3))]
+    unsafe fn vgef<const D: u32>(
+        a: vector_unsigned_int,
+        b: vector_unsigned_int,
+        c: *const u32,
+    ) -> vector_unsigned_int {
+        static_assert_uimm_bits!(D, 2);
+        let offset: u32 = simd_extract(b, D);
+        let ptr = c.byte_add(offset as usize);
+        let value = ptr.read();
+        simd_insert(a, D, value)
+    }
+
+    #[inline]
+    #[target_feature(enable = "vector")]
+    #[cfg_attr(test, assert_instr(vgeg, D = 1))]
+    unsafe fn vgeg<const D: u32>(
+        a: vector_unsigned_long_long,
+        b: vector_unsigned_long_long,
+        c: *const u64,
+    ) -> vector_unsigned_long_long {
+        static_assert_uimm_bits!(D, 1);
+        let offset: u64 = simd_extract(b, D);
+        let ptr = c.byte_add(offset as usize);
+        let value = ptr.read();
+        simd_insert(a, D, value)
+    }
+
+    #[unstable(feature = "stdarch_s390x", issue = "135681")]
+    pub trait VectorGatherElement {
+        type Element;
+        type Offset;
+        unsafe fn vec_gather_element<const D: u32>(
+            self,
+            b: Self::Offset,
+            c: *const Self::Element,
+        ) -> Self;
+    }
+
+    macro_rules! impl_vec_gather_element {
+        ($($instr:ident $ty:ident)*) => {
+            $(
+                #[unstable(feature = "stdarch_s390x", issue = "135681")]
+                impl VectorGatherElement for $ty {
+                    type Element = l_t_t!($ty);
+                    type Offset = t_u!($ty);
+
+                    #[inline]
+                    #[target_feature(enable = "vector")]
+                    unsafe fn vec_gather_element<const D: u32>(self, b: Self::Offset, c: *const Self::Element) -> Self {
+                        transmute($instr::<D>(transmute(self), b, c.cast()))
+                    }
+                }
+            )*
+        }
+    }
+
+    impl_vec_gather_element! {
+        vgef vector_signed_int
+        vgef vector_bool_int
+        vgef vector_unsigned_int
+
+        vgeg vector_signed_long_long
+        vgeg vector_bool_long_long
+        vgeg vector_unsigned_long_long
+
+        vgef vector_float
+        vgeg vector_double
+    }
+
+    #[inline]
+    #[target_feature(enable = "vector")]
+    #[cfg_attr(test, assert_instr(vscef, D = 3))]
+    unsafe fn vscef<const D: u32>(a: vector_unsigned_int, b: vector_unsigned_int, c: *mut u32) {
+        static_assert_uimm_bits!(D, 2);
+        let value = simd_extract(a, D);
+        let offset: u32 = simd_extract(b, D);
+        let ptr = c.byte_add(offset as usize);
+        ptr.write(value);
+    }
+
+    #[inline]
+    #[target_feature(enable = "vector")]
+    #[cfg_attr(test, assert_instr(vsceg, D = 1))]
+    unsafe fn vsceg<const D: u32>(
+        a: vector_unsigned_long_long,
+        b: vector_unsigned_long_long,
+        c: *mut u64,
+    ) {
+        static_assert_uimm_bits!(D, 1);
+        let value = simd_extract(a, D);
+        let offset: u64 = simd_extract(b, D);
+        let ptr = c.byte_add(offset as usize);
+        ptr.write(value);
+    }
+
+    #[unstable(feature = "stdarch_s390x", issue = "135681")]
+    pub trait VectorScatterElement {
+        type Element;
+        type Offset;
+        unsafe fn vec_scatter_element<const D: u32>(self, b: Self::Offset, c: *mut Self::Element);
+    }
+
+    macro_rules! impl_vec_scatter_element {
+        ($($instr:ident $ty:ident)*) => {
+            $(
+                #[unstable(feature = "stdarch_s390x", issue = "135681")]
+                impl VectorScatterElement for $ty {
+                    type Element = l_t_t!($ty);
+                    type Offset = t_u!($ty);
+
+                    #[inline]
+                    #[target_feature(enable = "vector")]
+                    unsafe fn vec_scatter_element<const D: u32>(self, b: Self::Offset, c: *mut Self::Element) {
+                        $instr::<D>(transmute(self), b, c.cast())
+                    }
+                }
+            )*
+        }
+    }
+
+    impl_vec_scatter_element! {
+        vscef vector_signed_int
+        vscef vector_bool_int
+        vscef vector_unsigned_int
+
+        vsceg vector_signed_long_long
+        vsceg vector_bool_long_long
+        vsceg vector_unsigned_long_long
+
+        vscef vector_float
+        vsceg vector_double
+    }
+
+    #[unstable(feature = "stdarch_s390x", issue = "135681")]
+    pub trait VectorSel<Mask>: Sized {
+        unsafe fn vec_sel(self, b: Self, c: Mask) -> Self;
+    }
+
+    macro_rules! impl_vec_sel {
+        ($($ty:ident)*) => {
+            $(
+                #[unstable(feature = "stdarch_s390x", issue = "135681")]
+                impl VectorSel<t_u!($ty)> for $ty {
+                    #[inline]
+                    #[target_feature(enable = "vector")]
+                    unsafe fn vec_sel(self, b: Self, c: t_u!($ty)) -> Self {
+                        let b = simd_and(b, transmute(c));
+                        let a = simd_and(self, simd_xor(transmute(c), transmute(vector_signed_char([!0; 16]))));
+                        simd_or(a, b)
+                    }
+                }
+
+                #[unstable(feature = "stdarch_s390x", issue = "135681")]
+                impl VectorSel<t_b!($ty)> for $ty {
+                    #[inline]
+                    #[target_feature(enable = "vector")]
+                    unsafe fn vec_sel(self, b: Self, c: t_b!($ty)) -> Self {
+                        // defer to the implementation with an unsigned mask
+                        self.vec_sel(b, transmute::<_, t_u!($ty)>(c))
+                    }
+                }
+            )*
+        }
+    }
+
+    impl_vec_sel! {
+        vector_signed_char
+        vector_signed_short
+        vector_signed_int
+        vector_signed_long_long
+
+        vector_unsigned_char
+        vector_unsigned_short
+        vector_unsigned_int
+        vector_unsigned_long_long
+
+        vector_bool_char
+        vector_bool_short
+        vector_bool_int
+        vector_bool_long_long
+
+        vector_float
+        vector_double
+    }
+
+    #[unstable(feature = "stdarch_s390x", issue = "135681")]
+    pub trait VectorFpTestDataClass {
+        type Result;
+        unsafe fn vec_fp_test_data_class<const CLASS: u32>(self) -> (Self::Result, i32);
+    }
+
+    #[unstable(feature = "stdarch_s390x", issue = "135681")]
+    impl VectorFpTestDataClass for vector_float {
+        type Result = vector_bool_int;
+
+        #[inline]
+        #[target_feature(enable = "vector")]
+        unsafe fn vec_fp_test_data_class<const CLASS: u32>(self) -> (Self::Result, i32) {
+            let PackedTuple { x, y } = vftcisb(self, CLASS);
+            (x, y)
+        }
+    }
+
+    #[unstable(feature = "stdarch_s390x", issue = "135681")]
+    impl VectorFpTestDataClass for vector_double {
+        type Result = vector_bool_long_long;
+
+        #[inline]
+        #[target_feature(enable = "vector")]
+        unsafe fn vec_fp_test_data_class<const CLASS: u32>(self) -> (Self::Result, i32) {
+            let PackedTuple { x, y } = vftcidb(self, CLASS);
+            (x, y)
+        }
+    }
+
+    #[unstable(feature = "stdarch_s390x", issue = "135681")]
+    pub trait VectorCompare {
+        unsafe fn vec_all_lt(self, other: Self) -> i32;
+        unsafe fn vec_all_le(self, other: Self) -> i32;
+        unsafe fn vec_all_gt(self, other: Self) -> i32;
+        unsafe fn vec_all_ge(self, other: Self) -> i32;
+    }
+
+    // NOTE: this implementation is currently non-optimal, but it does work for floats even with
+    // only `vector` enabled.
+    //
+    // - https://github.com/llvm/llvm-project/issues/129434
+    // - https://github.com/llvm/llvm-project/issues/130424
+    macro_rules! impl_vec_compare {
+        ($($ty:ident)*) => {
+            $(
+                #[unstable(feature = "stdarch_s390x", issue = "135681")]
+                impl VectorCompare for $ty {
+                    #[inline]
+                    #[target_feature(enable = "vector")]
+                    unsafe fn vec_all_lt(self, other: Self) -> i32 {
+                        simd_reduce_all(simd_lt::<_, t_b!($ty)>(self, other)) as i32
+                    }
+                    #[inline]
+                    #[target_feature(enable = "vector")]
+                    unsafe fn vec_all_le(self, other: Self) -> i32 {
+                        simd_reduce_all(simd_le::<_, t_b!($ty)>(self, other)) as i32
+                    }
+                    #[inline]
+                    #[target_feature(enable = "vector")]
+                    unsafe fn vec_all_gt(self, other: Self) -> i32 {
+                        simd_reduce_all(simd_gt::<_, t_b!($ty)>(self, other)) as i32
+                    }
+                    #[inline]
+                    #[target_feature(enable = "vector")]
+                    unsafe fn vec_all_ge(self, other: Self) -> i32 {
+                        simd_reduce_all(simd_ge::<_, t_b!($ty)>(self, other)) as i32
+                    }
+                }
+            )*
+        }
+    }
+
+    impl_vec_compare! {
+        vector_signed_char
+        vector_unsigned_char
+
+        vector_signed_short
+        vector_unsigned_short
+
+        vector_signed_int
+        vector_unsigned_int
+        vector_float
+
+        vector_signed_long_long
+        vector_unsigned_long_long
+        vector_double
+    }
+
+    #[unstable(feature = "stdarch_s390x", issue = "135681")]
+    pub trait VectorTestMask {
+        type Mask;
+        unsafe fn vec_test_mask(self, other: Self::Mask) -> i32;
+    }
+
+    macro_rules! impl_vec_test_mask {
+        ($($instr:ident $ty:ident)*) => {
+            $(
+                #[unstable(feature = "stdarch_s390x", issue = "135681")]
+                impl VectorTestMask for $ty {
+                    type Mask = t_u!($ty);
+
+                    #[inline]
+                    #[target_feature(enable = "vector")]
+                    unsafe fn vec_test_mask(self, other: Self::Mask) -> i32 {
+                        vtm(transmute(self), transmute(other))
+                    }
+                }
+            )*
+        }
+    }
+
+    impl_vec_test_mask! {
+        vector_signed_char
+        vector_signed_short
+        vector_signed_int
+        vector_signed_long_long
+
+        vector_unsigned_char
+        vector_unsigned_short
+        vector_unsigned_int
+        vector_unsigned_long_long
+
+        vector_float
+        vector_double
+    }
+
+    #[unstable(feature = "stdarch_s390x", issue = "135681")]
+    pub trait VectorSearchString {
+        unsafe fn vec_search_string_cc(
+            self,
+            b: Self,
+            c: vector_unsigned_char,
+        ) -> (vector_unsigned_char, i32);
+
+        unsafe fn vec_search_string_until_zero_cc(
+            self,
+            b: Self,
+            c: vector_unsigned_char,
+        ) -> (vector_unsigned_char, i32);
+    }
+
+    macro_rules! impl_vec_search_string{
+        ($($intr_s:ident $intr_sz:ident $ty:ident)*) => {
+            $(
+                #[unstable(feature = "stdarch_s390x", issue = "135681")]
+                impl VectorSearchString for $ty {
+                    #[inline]
+                    #[target_feature(enable = "vector")]
+                    unsafe fn vec_search_string_cc(self, b: Self, c: vector_unsigned_char) -> (vector_unsigned_char, i32) {
+                        let PackedTuple { x,y } = $intr_s(transmute(self), transmute(b), c);
+                        (x, y)
+                    }
+
+                    #[inline]
+                    #[target_feature(enable = "vector")]
+                    unsafe fn vec_search_string_until_zero_cc(self, b: Self, c: vector_unsigned_char) -> (vector_unsigned_char, i32) {
+                        let PackedTuple { x,y } = $intr_sz(transmute(self), transmute(b), c);
+                        (x, y)
+                    }
+                }
+
+            )*
+        }
+    }
+
+    impl_vec_search_string! {
+        vstrsb vstrszb vector_signed_char
+        vstrsb vstrszb vector_bool_char
+        vstrsb vstrszb vector_unsigned_char
+
+        vstrsh vstrszh vector_signed_short
+        vstrsh vstrszh vector_bool_short
+        vstrsh vstrszh vector_unsigned_short
+
+        vstrsf vstrszf vector_signed_int
+        vstrsf vstrszf vector_bool_int
+        vstrsf vstrszf vector_unsigned_int
+    }
+
+    #[inline]
+    #[target_feature(enable = "vector")]
+    #[cfg_attr(test, assert_instr(vcdgb))]
+    pub unsafe fn vcdgb(a: vector_signed_long_long) -> vector_double {
+        simd_as(a)
+    }
+
+    #[inline]
+    #[target_feature(enable = "vector")]
+    #[cfg_attr(test, assert_instr(vcdlgb))]
+    pub unsafe fn vcdlgb(a: vector_unsigned_long_long) -> vector_double {
+        simd_as(a)
+    }
+
+    #[unstable(feature = "stdarch_s390x", issue = "135681")]
+    pub trait VectorDouble {
+        unsafe fn vec_double(self) -> vector_double;
+    }
+
+    #[unstable(feature = "stdarch_s390x", issue = "135681")]
+    impl VectorDouble for vector_signed_long_long {
+        #[inline]
+        #[target_feature(enable = "vector")]
+        unsafe fn vec_double(self) -> vector_double {
+            vcdgb(self)
+        }
+    }
+
+    #[unstable(feature = "stdarch_s390x", issue = "135681")]
+    impl VectorDouble for vector_unsigned_long_long {
+        #[inline]
+        #[target_feature(enable = "vector")]
+        unsafe fn vec_double(self) -> vector_double {
+            vcdlgb(self)
+        }
+    }
+
+    #[inline]
+    #[target_feature(enable = "vector")]
+    #[cfg_attr(
+        all(test, target_feature = "vector-enhancements-2"),
+        assert_instr(vcefb)
+    )]
+    pub unsafe fn vcefb(a: vector_signed_int) -> vector_float {
+        simd_as(a)
+    }
+
+    #[inline]
+    #[target_feature(enable = "vector")]
+    #[cfg_attr(
+        all(test, target_feature = "vector-enhancements-2"),
+        assert_instr(vcelfb)
+    )]
+    pub unsafe fn vcelfb(a: vector_unsigned_int) -> vector_float {
+        simd_as(a)
+    }
+
+    #[unstable(feature = "stdarch_s390x", issue = "135681")]
+    pub trait VectorFloat {
+        unsafe fn vec_float(self) -> vector_float;
+    }
+
+    #[unstable(feature = "stdarch_s390x", issue = "135681")]
+    impl VectorFloat for vector_signed_int {
+        #[inline]
+        #[target_feature(enable = "vector")]
+        unsafe fn vec_float(self) -> vector_float {
+            vcefb(self)
+        }
+    }
+
+    #[unstable(feature = "stdarch_s390x", issue = "135681")]
+    impl VectorFloat for vector_unsigned_int {
+        #[inline]
+        #[target_feature(enable = "vector")]
+        unsafe fn vec_float(self) -> vector_float {
+            vcelfb(self)
+        }
+    }
+
+    #[unstable(feature = "stdarch_s390x", issue = "135681")]
+    pub trait VectorExtendSigned64 {
+        unsafe fn vec_extend_s64(self) -> vector_signed_long_long;
+    }
+
+    #[inline]
+    #[target_feature(enable = "vector")]
+    // FIXME(llvm): https://github.com/llvm/llvm-project/issues/129899
+    // #[cfg_attr(test, assert_instr(vsegb))]
+    pub unsafe fn vsegb(a: vector_signed_char) -> vector_signed_long_long {
+        simd_as(simd_shuffle::<_, _, i8x2>(
+            a,
+            a,
+            const { u32x2::from_array([7, 15]) },
+        ))
+    }
+
+    #[inline]
+    #[target_feature(enable = "vector")]
+    // FIXME(llvm): https://github.com/llvm/llvm-project/issues/129899
+    // #[cfg_attr(test, assert_instr(vsegh))]
+    pub unsafe fn vsegh(a: vector_signed_short) -> vector_signed_long_long {
+        simd_as(simd_shuffle::<_, _, i16x2>(
+            a,
+            a,
+            const { u32x2::from_array([3, 7]) },
+        ))
+    }
+
+    #[inline]
+    #[target_feature(enable = "vector")]
+    // FIXME(llvm): https://github.com/llvm/llvm-project/issues/129899
+    // #[cfg_attr(test, assert_instr(vsegf))]
+    pub unsafe fn vsegf(a: vector_signed_int) -> vector_signed_long_long {
+        simd_as(simd_shuffle::<_, _, i32x2>(
+            a,
+            a,
+            const { u32x2::from_array([1, 3]) },
+        ))
+    }
+
+    #[unstable(feature = "stdarch_s390x", issue = "135681")]
+    impl VectorExtendSigned64 for vector_signed_char {
+        #[inline]
+        #[target_feature(enable = "vector")]
+        unsafe fn vec_extend_s64(self) -> vector_signed_long_long {
+            vsegb(self)
+        }
+    }
+    #[unstable(feature = "stdarch_s390x", issue = "135681")]
+    impl VectorExtendSigned64 for vector_signed_short {
+        #[inline]
+        #[target_feature(enable = "vector")]
+        unsafe fn vec_extend_s64(self) -> vector_signed_long_long {
+            vsegh(self)
+        }
+    }
+    #[unstable(feature = "stdarch_s390x", issue = "135681")]
+    impl VectorExtendSigned64 for vector_signed_int {
+        #[inline]
+        #[target_feature(enable = "vector")]
+        unsafe fn vec_extend_s64(self) -> vector_signed_long_long {
+            vsegf(self)
+        }
+    }
+
+    // NOTE: VectorSigned and VectorUnsigned make strong safety assumptions around floats.
+    // This is what C provides, but even IBM does not clearly document these constraints.
+    //
+    // https://doc.rust-lang.org/std/intrinsics/simd/fn.simd_cast.html
+
+    #[unstable(feature = "stdarch_s390x", issue = "135681")]
+    pub trait VectorSigned {
+        type Result;
+        unsafe fn vec_signed(self) -> Self::Result;
+    }
+
+    test_impl! { vcgsb (a: vector_float) -> vector_signed_int [simd_cast, "vector-enhancements-2" vcgsb] }
+    test_impl! { vcgdb (a: vector_double) -> vector_signed_long_long [simd_cast, vcgdb] }
+
+    impl_vec_trait! { [VectorSigned vec_signed] vcgsb (vector_float) -> vector_signed_int }
+    impl_vec_trait! { [VectorSigned vec_signed] vcgdb (vector_double) -> vector_signed_long_long }
+
+    #[unstable(feature = "stdarch_s390x", issue = "135681")]
+    pub trait VectorUnsigned {
+        type Result;
+        unsafe fn vec_unsigned(self) -> Self::Result;
+    }
+
+    test_impl! { vclgsb (a: vector_float) -> vector_unsigned_int [simd_cast, "vector-enhancements-2" vclgsb] }
+    test_impl! { vclgdb (a: vector_double) -> vector_unsigned_long_long [simd_cast, vclgdb] }
+
+    impl_vec_trait! { [VectorUnsigned vec_unsigned] vclgsb (vector_float) -> vector_unsigned_int }
+    impl_vec_trait! { [VectorUnsigned vec_unsigned] vclgdb (vector_double) -> vector_unsigned_long_long }
+
+    #[unstable(feature = "stdarch_s390x", issue = "135681")]
+    pub trait VectorCopyUntilZero {
+        unsafe fn vec_cp_until_zero(self) -> Self;
+    }
+
+    test_impl! { vec_vistrb (a: vector_unsigned_char) -> vector_unsigned_char [vistrb, vistrb] }
+    test_impl! { vec_vistrh (a: vector_unsigned_short) -> vector_unsigned_short [vistrh, vistrh] }
+    test_impl! { vec_vistrf (a: vector_unsigned_int) -> vector_unsigned_int [vistrf, vistrf] }
+
+    impl_vec_trait! { [VectorCopyUntilZero vec_cp_until_zero]+ vec_vistrb (vector_signed_char) }
+    impl_vec_trait! { [VectorCopyUntilZero vec_cp_until_zero]+ vec_vistrb (vector_bool_char) }
+    impl_vec_trait! { [VectorCopyUntilZero vec_cp_until_zero]+ vec_vistrb (vector_unsigned_char) }
+
+    impl_vec_trait! { [VectorCopyUntilZero vec_cp_until_zero]+ vec_vistrh (vector_signed_short) }
+    impl_vec_trait! { [VectorCopyUntilZero vec_cp_until_zero]+ vec_vistrh (vector_bool_short) }
+    impl_vec_trait! { [VectorCopyUntilZero vec_cp_until_zero]+ vec_vistrh (vector_unsigned_short) }
+
+    impl_vec_trait! { [VectorCopyUntilZero vec_cp_until_zero]+ vec_vistrf (vector_signed_int) }
+    impl_vec_trait! { [VectorCopyUntilZero vec_cp_until_zero]+ vec_vistrf (vector_bool_int) }
+    impl_vec_trait! { [VectorCopyUntilZero vec_cp_until_zero]+ vec_vistrf (vector_unsigned_int) }
+
+    #[unstable(feature = "stdarch_s390x", issue = "135681")]
+    pub trait VectorCopyUntilZeroCC: Sized {
+        unsafe fn vec_cp_until_zero_cc(self) -> (Self, i32);
+    }
+
+    test_impl! { vec_vistrbs (a: vector_unsigned_char) -> PackedTuple<vector_unsigned_char, i32> [vistrbs, vistrbs] }
+    test_impl! { vec_vistrhs (a: vector_unsigned_short) -> PackedTuple<vector_unsigned_short, i32> [vistrhs, vistrhs] }
+    test_impl! { vec_vistrfs (a: vector_unsigned_int) -> PackedTuple<vector_unsigned_int, i32> [vistrfs, vistrfs] }
+
+    macro_rules! impl_vec_copy_until_zero_cc {
+        ($($intr:ident $ty:ident)*) => {
+            $(
+                #[unstable(feature = "stdarch_s390x", issue = "135681")]
+                impl VectorCopyUntilZeroCC for $ty {
+                    #[inline]
+                    #[target_feature(enable = "vector")]
+                    unsafe fn vec_cp_until_zero_cc(self) -> (Self, i32) {
+                        let PackedTuple { x,y } = $intr(transmute(self));
+                        (transmute(x), y)
+                    }
+                }
+
+            )*
+        }
+    }
+
+    impl_vec_copy_until_zero_cc! {
+        vec_vistrbs vector_signed_char
+        vec_vistrbs vector_bool_char
+        vec_vistrbs vector_unsigned_char
+
+        vec_vistrhs vector_signed_short
+        vec_vistrhs vector_bool_short
+        vec_vistrhs vector_unsigned_short
+
+        vec_vistrfs vector_signed_int
+        vec_vistrfs vector_bool_int
+        vec_vistrfs vector_unsigned_int
+    }
+
+    #[unstable(feature = "stdarch_s390x", issue = "135681")]
+    pub trait VectorSrdb {
+        unsafe fn vec_srdb<const C: u32>(self, b: Self) -> Self;
+    }
+
+    #[unstable(feature = "stdarch_s390x", issue = "135681")]
+    pub trait VectorSld {
+        unsafe fn vec_sld<const C: u32>(self, b: Self) -> Self;
+
+        unsafe fn vec_sldw<const C: u32>(self, b: Self) -> Self;
+
+        unsafe fn vec_sldb<const C: u32>(self, b: Self) -> Self;
+    }
+
+    // FIXME(llvm) https://github.com/llvm/llvm-project/issues/129955
+    // ideally we could implement this in terms of llvm.fshl.i128
+    // #[link_name = "llvm.fshl.i128"] fn fshl_i128(a: u128, b: u128, c: u128) -> u128;
+    // transmute(fshl_i128(transmute(a), transmute(b), const { C * 8 } ))
+
+    macro_rules! impl_vec_sld {
+        ($($ty:ident)*) => {
+            $(
+                #[unstable(feature = "stdarch_s390x", issue = "135681")]
+                impl VectorSld for $ty {
+                    #[inline]
+                    #[target_feature(enable = "vector")]
+                    unsafe fn vec_sld<const C: u32>(self, b: Self) -> Self {
+                        static_assert_uimm_bits!(C, 4);
+                        transmute(vsldb(transmute(self), transmute(b), C))
+                    }
+
+                    #[inline]
+                    #[target_feature(enable = "vector")]
+                    unsafe fn vec_sldw<const C: u32>(self, b: Self) -> Self {
+                        static_assert_uimm_bits!(C, 2);
+                        transmute(vsldb(transmute(self), transmute(b), const { 4 * C }))
+                    }
+
+                    #[inline]
+                    #[target_feature(enable = "vector-enhancements-2")]
+                    unsafe fn vec_sldb<const C: u32>(self, b: Self) -> Self {
+                        static_assert_uimm_bits!(C, 3);
+                        transmute(vsld(transmute(self), transmute(b), C))
+                    }
+                }
+
+                #[unstable(feature = "stdarch_s390x", issue = "135681")]
+                impl VectorSrdb for $ty {
+                    #[inline]
+                    #[target_feature(enable = "vector-enhancements-2")]
+                    unsafe fn vec_srdb<const C: u32>(self, b: Self) -> Self {
+                        static_assert_uimm_bits!(C, 3);
+                        transmute(vsrd(transmute(self), transmute(b), C))
+                    }
+                }
+            )*
+        }
+    }
+
+    impl_vec_sld! {
+        vector_signed_char
+        vector_bool_char
+        vector_unsigned_char
+
+        vector_signed_short
+        vector_bool_short
+        vector_unsigned_short
+
+        vector_signed_int
+        vector_bool_int
+        vector_unsigned_int
+
+        vector_signed_long_long
+        vector_bool_long_long
+        vector_unsigned_long_long
+
+        vector_float
+        vector_double
+    }
+
+    #[unstable(feature = "stdarch_s390x", issue = "135681")]
+    pub trait VectorCompareRange: Sized {
+        type Result;
+
+        unsafe fn vstrc<const IMM: u32>(self, b: Self, c: Self) -> Self::Result;
+        unsafe fn vstrcz<const IMM: u32>(self, b: Self, c: Self) -> Self::Result;
+        unsafe fn vstrcs<const IMM: u32>(self, b: Self, c: Self) -> (Self::Result, i32);
+        unsafe fn vstrczs<const IMM: u32>(self, b: Self, c: Self) -> (Self::Result, i32);
+    }
+
+    const fn validate_compare_range_imm(imm: u32) {
+        if !matches!(imm, 0 | 4 | 8 | 12) {
+            panic!("IMM needs to be one of 0, 4, 8, 12");
+        }
+    }
+
+    macro_rules! impl_compare_range {
+        ($($ty:ident $vstrc:ident $vstrcs:ident $vstrcz:ident $vstrczs:ident)*) => {
+            $(
+                #[unstable(feature = "stdarch_s390x", issue = "135681")]
+                impl VectorCompareRange for $ty {
+                    type Result = t_b!($ty);
+
+                    #[inline]
+                    #[target_feature(enable = "vector")]
+                    unsafe fn vstrc<const IMM: u32>(self, b: Self, c: Self) -> Self::Result {
+                        const { validate_compare_range_imm };
+                        $vstrc(self, b, c, IMM)
+                    }
+
+                    #[inline]
+                    #[target_feature(enable = "vector")]
+                    unsafe fn vstrcz<const IMM: u32>(self, b: Self, c: Self) -> Self::Result {
+                        const { validate_compare_range_imm };
+                        $vstrcz(self, b, c, IMM)
+                    }
+
+                    #[inline]
+                    #[target_feature(enable = "vector")]
+                    unsafe fn vstrcs<const IMM: u32>(self, b: Self, c: Self) -> (Self::Result, i32) {
+                        const { validate_compare_range_imm };
+                        let PackedTuple { x, y } = $vstrcs(self, b, c, IMM);
+                        (x,y)
+                    }
+
+                    #[inline]
+                    #[target_feature(enable = "vector")]
+                    unsafe fn vstrczs<const IMM: u32>(self, b: Self, c: Self) -> (Self::Result, i32) {
+                        const { validate_compare_range_imm };
+                        let PackedTuple { x, y } = $vstrczs(self, b, c, IMM);
+                        (x,y)
+                    }
+                }
+            )*
+        }
+    }
+
+    impl_compare_range! {
+        vector_unsigned_char    vstrcb vstrcbs vstrczb vstrczbs
+        vector_unsigned_short   vstrch vstrchs vstrczh vstrczhs
+        vector_unsigned_int     vstrcf vstrcfs vstrczf vstrczfs
+    }
+
+    #[unstable(feature = "stdarch_s390x", issue = "135681")]
+    pub trait VectorComparePredicate: Sized {
+        type Result;
+
+        #[inline]
+        #[target_feature(enable = "vector")]
+        unsafe fn vec_cmpgt(self, other: Self) -> Self::Result {
+            simd_gt(self, other)
+        }
+
+        #[inline]
+        #[target_feature(enable = "vector")]
+        unsafe fn vec_cmpge(self, other: Self) -> Self::Result {
+            simd_ge(self, other)
+        }
+
+        #[inline]
+        #[target_feature(enable = "vector")]
+        unsafe fn vec_cmplt(self, other: Self) -> Self::Result {
+            simd_lt(self, other)
+        }
+
+        #[inline]
+        #[target_feature(enable = "vector")]
+        unsafe fn vec_cmple(self, other: Self) -> Self::Result {
+            simd_le(self, other)
+        }
+    }
+
+    macro_rules! impl_compare_predicate {
+        ($($ty:ident)*) => {
+            $(
+                #[unstable(feature = "stdarch_s390x", issue = "135681")]
+                impl VectorComparePredicate for $ty {
+                    type Result = t_b!($ty);
+                }
+            )*
+        }
+    }
+
+    impl_compare_predicate! {
+        vector_signed_char
+        vector_unsigned_char
+
+        vector_signed_short
+        vector_unsigned_short
+
+        vector_signed_int
+        vector_unsigned_int
+        vector_float
+
+        vector_signed_long_long
+        vector_unsigned_long_long
+        vector_double
+    }
+
+    #[unstable(feature = "stdarch_s390x", issue = "135681")]
+    pub trait VectorEquality: Sized {
+        type Result;
+
+        #[inline]
+        #[target_feature(enable = "vector")]
+        unsafe fn vec_cmpeq(self, other: Self) -> Self::Result {
+            simd_eq(self, other)
+        }
+
+        #[inline]
+        #[target_feature(enable = "vector")]
+        unsafe fn vec_cmpne(self, other: Self) -> Self::Result {
+            simd_ne(self, other)
+        }
+    }
+
+    macro_rules! impl_compare_equality {
+        ($($ty:ident)*) => {
+            $(
+                #[unstable(feature = "stdarch_s390x", issue = "135681")]
+                impl VectorEquality for $ty {
+                    type Result = t_b!($ty);
+                }
+            )*
+        }
+    }
+
+    impl_compare_equality! {
+        vector_bool_char
+        vector_signed_char
+        vector_unsigned_char
+
+        vector_bool_short
+        vector_signed_short
+        vector_unsigned_short
+
+        vector_bool_int
+        vector_signed_int
+        vector_unsigned_int
+        vector_float
+
+        vector_bool_long_long
+        vector_signed_long_long
+        vector_unsigned_long_long
+        vector_double
+    }
+
+    #[unstable(feature = "stdarch_s390x", issue = "135681")]
+    pub trait VectorEqualityIdx: Sized {
+        type Result;
+
+        unsafe fn vec_cmpeq_idx(self, other: Self) -> Self::Result;
+        unsafe fn vec_cmpne_idx(self, other: Self) -> Self::Result;
+
+        unsafe fn vec_cmpeq_idx_cc(self, other: Self) -> (Self::Result, i32);
+        unsafe fn vec_cmpne_idx_cc(self, other: Self) -> (Self::Result, i32);
+
+        unsafe fn vec_cmpeq_or_0_idx(self, other: Self) -> Self::Result;
+        unsafe fn vec_cmpne_or_0_idx(self, other: Self) -> Self::Result;
+
+        unsafe fn vec_cmpeq_or_0_idx_cc(self, other: Self) -> (Self::Result, i32);
+        unsafe fn vec_cmpne_or_0_idx_cc(self, other: Self) -> (Self::Result, i32);
+    }
+
+    macro_rules! impl_compare_equality_idx {
+        ($($ty:ident $ret:ident
+                $cmpeq:ident $cmpne:ident
+                $cmpeq_or_0:ident $cmpne_or_0:ident
+                $cmpeq_cc:ident $cmpne_cc:ident
+                $cmpeq_or_0_cc:ident $cmpne_or_0_cc:ident
+        )*) => {
+            $(
+                #[unstable(feature = "stdarch_s390x", issue = "135681")]
+                impl VectorEqualityIdx for $ty {
+                    type Result = $ret;
+
+                    #[inline]
+                    #[target_feature(enable = "vector")]
+                    unsafe fn vec_cmpeq_idx(self, other: Self) -> Self::Result {
+                        transmute($cmpeq(transmute(self), transmute(other)))
+                    }
+
+                    #[inline]
+                    #[target_feature(enable = "vector")]
+                    unsafe fn vec_cmpne_idx(self, other: Self) -> Self::Result {
+                        transmute($cmpne(transmute(self), transmute(other)))
+                    }
+
+                    #[inline]
+                    #[target_feature(enable = "vector")]
+                    unsafe fn vec_cmpeq_or_0_idx(self, other: Self) -> Self::Result {
+                        transmute($cmpeq_or_0(transmute(self), transmute(other)))
+                    }
+
+                    #[inline]
+                    #[target_feature(enable = "vector")]
+                    unsafe fn vec_cmpne_or_0_idx(self, other: Self) -> Self::Result {
+                        transmute($cmpne_or_0(transmute(self), transmute(other)))
+                    }
+
+                    #[inline]
+                    #[target_feature(enable = "vector")]
+                    unsafe fn vec_cmpeq_idx_cc(self, other: Self) -> (Self::Result, i32) {
+                        let PackedTuple { x, y } = $cmpeq_cc(transmute(self), transmute(other));
+                        (transmute(x), y)
+                    }
+
+                    #[inline]
+                    #[target_feature(enable = "vector")]
+                    unsafe fn vec_cmpne_idx_cc(self, other: Self) -> (Self::Result, i32) {
+                        let PackedTuple { x, y } = $cmpne_cc(transmute(self), transmute(other));
+                        (transmute(x),y)
+                    }
+
+                    #[inline]
+                    #[target_feature(enable = "vector")]
+                    unsafe fn vec_cmpeq_or_0_idx_cc(self, other: Self) -> (Self::Result, i32) {
+                        let PackedTuple { x, y } = $cmpeq_or_0_cc(transmute(self), transmute(other));
+                        (transmute(x), y)
+                    }
+
+                    #[inline]
+                    #[target_feature(enable = "vector")]
+                    unsafe fn vec_cmpne_or_0_idx_cc(self, other: Self) -> (Self::Result, i32) {
+                        let PackedTuple { x, y } = $cmpne_or_0_cc(transmute(self), transmute(other));
+                        (transmute(x),y)
+                    }
+                }
+            )*
+        }
+    }
+
+    impl_compare_equality_idx! {
+        vector_signed_char vector_signed_char               vfeeb vfeneb vfeezb vfenezb vfeebs vfenebs vfeezbs vfenezbs
+        vector_bool_char vector_unsigned_char               vfeeb vfeneb vfeezb vfenezb vfeebs vfenebs vfeezbs vfenezbs
+        vector_unsigned_char vector_unsigned_char           vfeeb vfeneb vfeezb vfenezb vfeebs vfenebs vfeezbs vfenezbs
+        vector_signed_short vector_signed_short             vfeeh vfeneh vfeezh vfenezh vfeehs vfenehs vfeezhs vfenezhs
+        vector_bool_short  vector_unsigned_short            vfeeh vfeneh vfeezh vfenezh vfeehs vfenehs vfeezhs vfenezhs
+        vector_unsigned_short vector_unsigned_short         vfeeh vfeneh vfeezh vfenezh vfeehs vfenehs vfeezhs vfenezhs
+        vector_signed_int vector_signed_int                 vfeef vfenef vfeezf vfenezf vfeefs vfenefs vfeezfs vfenezfs
+        vector_bool_int  vector_unsigned_int                vfeef vfenef vfeezf vfenezf vfeefs vfenefs vfeezfs vfenezfs
+        vector_unsigned_int vector_unsigned_int             vfeef vfenef vfeezf vfenezf vfeefs vfenefs vfeezfs vfenezfs
+    }
+}
+
+/// Load Count to Block Boundary
+#[inline]
+#[target_feature(enable = "vector")]
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+#[cfg_attr(test, assert_instr(lcbb, BLOCK_BOUNDARY = 512))]
+unsafe fn __lcbb<const BLOCK_BOUNDARY: u16>(ptr: *const u8) -> u32 {
+    lcbb(ptr, const { validate_block_boundary(BLOCK_BOUNDARY) })
+}
+
+/// Vector element-wise addition.
+#[inline]
+#[target_feature(enable = "vector")]
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+pub unsafe fn vec_add<T: sealed::VectorAdd<U>, U>(a: T, b: U) -> T::Result {
+    a.vec_add(b)
+}
+
+/// Vector element-wise subtraction.
+#[inline]
+#[target_feature(enable = "vector")]
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+pub unsafe fn vec_sub<T: sealed::VectorSub<U>, U>(a: T, b: U) -> T::Result {
+    a.vec_sub(b)
+}
+
+/// Vector element-wise multiplication.
+///
+/// ## Purpose
+/// Compute the products of corresponding elements of two vectors.
+///
+/// ## Result value
+/// Each element of r receives the product of the corresponding elements of a and b.
+#[inline]
+#[target_feature(enable = "vector")]
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+pub unsafe fn vec_mul<T: sealed::VectorMul>(a: T, b: T) -> T {
+    a.vec_mul(b)
+}
+
+/// Vector Count Leading Zeros
+#[inline]
+#[target_feature(enable = "vector")]
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+pub unsafe fn vec_cntlz<T: sealed::CountBits>(a: T) -> T::Result {
+    a.vec_cntlz()
+}
+
+/// Vector Count Trailing Zeros
+#[inline]
+#[target_feature(enable = "vector")]
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+pub unsafe fn vec_cnttz<T: sealed::CountBits>(a: T) -> T::Result {
+    a.vec_cnttz()
+}
+
+/// Vector Population Count
+///
+/// Computes the population count (number of set bits) in each element of the input.
+#[inline]
+#[target_feature(enable = "vector")]
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+pub unsafe fn vec_popcnt<T: sealed::CountBits>(a: T) -> T::Result {
+    a.vec_popcnt()
+}
+
+/// Vector element-wise maximum.
+#[inline]
+#[target_feature(enable = "vector")]
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+pub unsafe fn vec_max<T: sealed::VectorMax<U>, U>(a: T, b: U) -> T::Result {
+    a.vec_max(b)
+}
+
+/// Vector element-wise minimum.
+#[inline]
+#[target_feature(enable = "vector")]
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+pub unsafe fn vec_min<T: sealed::VectorMin<U>, U>(a: T, b: U) -> T::Result {
+    a.vec_min(b)
+}
+
+/// Vector abs.
+#[inline]
+#[target_feature(enable = "vector")]
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+pub unsafe fn vec_abs<T: sealed::VectorAbs>(a: T) -> T {
+    a.vec_abs()
+}
+
+/// Vector negative abs.
+#[inline]
+#[target_feature(enable = "vector")]
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+pub unsafe fn vec_nabs<T: sealed::VectorNabs>(a: T) -> T {
+    a.vec_nabs()
+}
+
+/// Vector Negative Multiply Add
+#[inline]
+#[target_feature(enable = "vector")]
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+pub unsafe fn vec_nmadd<T: sealed::VectorNmadd>(a: T, b: T, c: T) -> T {
+    a.vec_nmadd(b, c)
+}
+
+/// Vector Negative Multiply Subtract
+#[inline]
+#[target_feature(enable = "vector")]
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+pub unsafe fn vec_nmsub<T: sealed::VectorNmsub>(a: T, b: T, c: T) -> T {
+    a.vec_nmsub(b, c)
+}
+
+/// Vector square root.
+#[inline]
+#[target_feature(enable = "vector")]
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+pub unsafe fn vec_sqrt<T: sealed::VectorSqrt>(a: T) -> T {
+    a.vec_sqrt()
+}
+
+/// Vector Splat
+#[inline]
+#[target_feature(enable = "vector")]
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+pub unsafe fn vec_splat<T: sealed::VectorSplat, const IMM: u32>(a: T) -> T {
+    a.vec_splat::<IMM>()
+}
+
+/// Vector splats.
+#[inline]
+#[target_feature(enable = "vector")]
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+pub unsafe fn vec_splats<T: sealed::VectorSplats<U>, U>(a: T) -> U {
+    a.vec_splats()
+}
+
+/// Vector and
+#[inline]
+#[target_feature(enable = "vector")]
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+pub unsafe fn vec_and<T: sealed::VectorAnd<U>, U>(a: T, b: U) -> T::Result {
+    a.vec_and(b)
+}
+
+/// Vector or
+#[inline]
+#[target_feature(enable = "vector")]
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+pub unsafe fn vec_or<T: sealed::VectorOr<U>, U>(a: T, b: U) -> T::Result {
+    a.vec_or(b)
+}
+
+/// Vector xor
+#[inline]
+#[target_feature(enable = "vector")]
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+pub unsafe fn vec_xor<T: sealed::VectorXor<U>, U>(a: T, b: U) -> T::Result {
+    a.vec_xor(b)
+}
+
+/// Vector nor
+#[inline]
+#[target_feature(enable = "vector")]
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+pub unsafe fn vec_nor<T: sealed::VectorNor<U>, U>(a: T, b: U) -> T::Result {
+    a.vec_nor(b)
+}
+
+/// Vector nand
+#[inline]
+#[target_feature(enable = "vector")]
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+pub unsafe fn vec_nand<T: sealed::VectorNand<U>, U>(a: T, b: U) -> T::Result {
+    a.vec_nand(b)
+}
+
+/// Vector xnor
+#[inline]
+#[target_feature(enable = "vector")]
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+pub unsafe fn vec_eqv<T: sealed::VectorEqv<U>, U>(a: T, b: U) -> T::Result {
+    a.vec_eqv(b)
+}
+
+/// Vector andc.
+#[inline]
+#[target_feature(enable = "vector")]
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+pub unsafe fn vec_andc<T: sealed::VectorAndc<U>, U>(a: T, b: U) -> T::Result {
+    a.vec_andc(b)
+}
+
+/// Vector OR with Complement
+///
+/// ## Purpose
+/// Performs a bitwise OR of the first vector with the bitwise-complemented second vector.
+///
+/// ## Result value
+/// r is the bitwise OR of a and the bitwise complement of b.
+#[inline]
+#[target_feature(enable = "vector")]
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+pub unsafe fn vec_orc<T: sealed::VectorOrc<U>, U>(a: T, b: U) -> T::Result {
+    a.vec_orc(b)
+}
+
+/// Vector floor.
+#[inline]
+#[target_feature(enable = "vector")]
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+pub unsafe fn vec_floor<T: sealed::VectorFloor>(a: T) -> T {
+    a.vec_floor()
+}
+
+/// Vector ceil.
+#[inline]
+#[target_feature(enable = "vector")]
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+pub unsafe fn vec_ceil<T: sealed::VectorCeil>(a: T) -> T {
+    a.vec_ceil()
+}
+
+/// Returns a vector containing the truncated values of the corresponding elements of the given vector.
+/// Each element of the result contains the value of the corresponding element of a, truncated to an integral value.
+#[inline]
+#[target_feature(enable = "vector")]
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+pub unsafe fn vec_trunc<T: sealed::VectorTrunc>(a: T) -> T {
+    a.vec_trunc()
+}
+
+/// Returns a vector containing the rounded values to the nearest representable floating-point integer,
+/// using IEEE round-to-nearest rounding, of the corresponding elements of the given vector
+#[inline]
+#[target_feature(enable = "vector")]
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+pub unsafe fn vec_round<T: sealed::VectorRound>(a: T) -> T {
+    a.vec_round()
+}
+
+/// Returns a vector by using the current rounding mode to round every
+/// floating-point element in the given vector to integer.
+#[inline]
+#[target_feature(enable = "vector")]
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+pub unsafe fn vec_roundc<T: sealed::VectorRoundc>(a: T) -> T {
+    a.vec_roundc()
+}
+
+/// Returns a vector containing the largest representable floating-point integral values less
+/// than or equal to the values of the corresponding elements of the given vector.
+#[inline]
+#[target_feature(enable = "vector")]
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+pub unsafe fn vec_roundm<T: sealed::VectorFloor>(a: T) -> T {
+    // the IBM docs note
+    //
+    // > vec_roundm provides the same functionality as vec_floor, except that vec_roundz would not trigger the IEEE-inexact exception.
+    //
+    // but in practice `vec_floor` also does not trigger that exception, so both are equivalent
+    a.vec_floor()
+}
+
+/// Returns a vector containing the smallest representable floating-point integral values greater
+/// than or equal to the values of the corresponding elements of the given vector.
+#[inline]
+#[target_feature(enable = "vector")]
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+pub unsafe fn vec_roundp<T: sealed::VectorCeil>(a: T) -> T {
+    // the IBM docs note
+    //
+    // > vec_roundp provides the same functionality as vec_ceil, except that vec_roundz would not trigger the IEEE-inexact exception.
+    //
+    // but in practice `vec_ceil` also does not trigger that exception, so both are equivalent
+    a.vec_ceil()
+}
+
+/// Returns a vector containing the truncated values of the corresponding elements of the given vector.
+/// Each element of the result contains the value of the corresponding element of a, truncated to an integral value.
+#[inline]
+#[target_feature(enable = "vector")]
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+pub unsafe fn vec_roundz<T: sealed::VectorTrunc>(a: T) -> T {
+    // the IBM docs note
+    //
+    // > vec_roundz provides the same functionality as vec_trunc, except that vec_roundz would not trigger the IEEE-inexact exception.
+    //
+    // but in practice `vec_trunc` also does not trigger that exception, so both are equivalent
+    a.vec_trunc()
+}
+
+/// Returns a vector by using the current rounding mode to round every floating-point element in the given vector to integer.
+#[inline]
+#[target_feature(enable = "vector")]
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+pub unsafe fn vec_rint<T: sealed::VectorRint>(a: T) -> T {
+    a.vec_rint()
+}
+
+/// Vector Average
+#[inline]
+#[target_feature(enable = "vector")]
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+pub unsafe fn vec_avg<T: sealed::VectorAvg<U>, U>(a: T, b: U) -> T::Result {
+    a.vec_avg(b)
+}
+
+/// Vector Shift Left
+#[inline]
+#[target_feature(enable = "vector")]
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+pub unsafe fn vec_sl<T: sealed::VectorSl<U>, U>(a: T, b: U) -> T::Result {
+    a.vec_sl(b)
+}
+
+/// Vector Shift Right
+#[inline]
+#[target_feature(enable = "vector")]
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+pub unsafe fn vec_sr<T: sealed::VectorSr<U>, U>(a: T, b: U) -> T::Result {
+    a.vec_sr(b)
+}
+
+/// Vector Shift Right Algebraic
+#[inline]
+#[target_feature(enable = "vector")]
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+pub unsafe fn vec_sra<T: sealed::VectorSra<U>, U>(a: T, b: U) -> T::Result {
+    a.vec_sra(b)
+}
+
+/// Vector Shift Left by Byte
+#[inline]
+#[target_feature(enable = "vector")]
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+pub unsafe fn vec_slb<T: sealed::VectorSlb<U>, U>(a: T, b: U) -> T::Result {
+    a.vec_slb(b)
+}
+
+/// Vector Shift Right by Byte
+#[inline]
+#[target_feature(enable = "vector")]
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+pub unsafe fn vec_srb<T: sealed::VectorSrb<U>, U>(a: T, b: U) -> T::Result {
+    a.vec_srb(b)
+}
+
+/// Vector Shift Right Algebraic by Byte
+#[inline]
+#[target_feature(enable = "vector")]
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+pub unsafe fn vec_srab<T: sealed::VectorSrab<U>, U>(a: T, b: U) -> T::Result {
+    a.vec_srab(b)
+}
+
+/// Vector Element Rotate Left
+#[inline]
+#[target_feature(enable = "vector")]
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+pub unsafe fn vec_rl<T: sealed::VectorRl<U>, U>(a: T, b: U) -> T::Result {
+    a.vec_rl(b)
+}
+
+/// Performs a left shift for a vector by a given number of bits. Each element of the result is obtained by shifting the corresponding
+/// element of a left by the number of bits specified by the last 3 bits of every byte of b. The bits that are shifted out are replaced by zeros.
+#[inline]
+#[target_feature(enable = "vector")]
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+pub unsafe fn vec_sll<T>(a: T, b: vector_unsigned_char) -> T
+where
+    T: sealed::VectorSll<vector_unsigned_char, Result = T>,
+{
+    a.vec_sll(b)
+}
+
+/// Performs a right shift for a vector by a given number of bits. Each element of the result is obtained by shifting the corresponding
+/// element of a right by the number of bits specified by the last 3 bits of every byte of b. The bits that are shifted out are replaced by zeros.
+#[inline]
+#[target_feature(enable = "vector")]
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+pub unsafe fn vec_srl<T>(a: T, b: vector_unsigned_char) -> T
+where
+    T: sealed::VectorSrl<vector_unsigned_char, Result = T>,
+{
+    a.vec_srl(b)
+}
+
+/// Performs an algebraic right shift for a vector by a given number of bits. Each element of the result is obtained by shifting the corresponding
+/// element of a right by the number of bits specified by the last 3 bits of every byte of b. The bits that are shifted out are replaced by copies of
+/// the most significant bit of the element of a.
+#[inline]
+#[target_feature(enable = "vector")]
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+pub unsafe fn vec_sral<T>(a: T, b: vector_unsigned_char) -> T
+where
+    T: sealed::VectorSral<vector_unsigned_char, Result = T>,
+{
+    a.vec_sral(b)
+}
+
+/// Rotates each element of a vector left by a given number of bits. Each element of the result is obtained by rotating the corresponding element
+/// of a left by the number of bits specified by b, modulo the number of bits in the element.
+#[inline]
+#[target_feature(enable = "vector")]
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+pub unsafe fn vec_rli<T: sealed::VectorRli>(a: T, bits: core::ffi::c_ulong) -> T {
+    a.vec_rli(bits)
+}
+
+/// Returns a vector with the elements of the input vector in reversed order.
+#[inline]
+#[target_feature(enable = "vector")]
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+pub unsafe fn vec_reve<T: sealed::VectorReve>(a: T) -> T {
+    a.vec_reve()
+}
+
+/// Returns a vector where each vector element contains the corresponding byte-reversed vector element of the input vector.
+#[inline]
+#[target_feature(enable = "vector")]
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+pub unsafe fn vec_revb<T: sealed::VectorRevb>(a: T) -> T {
+    a.vec_revb()
+}
+
+/// Merges the most significant ("high") halves of two vectors.
+#[inline]
+#[target_feature(enable = "vector")]
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+pub unsafe fn vec_mergeh<T: sealed::VectorMergeh>(a: T, b: T) -> T {
+    a.vec_mergeh(b)
+}
+
+/// Vector Pack
+#[inline]
+#[target_feature(enable = "vector")]
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+pub unsafe fn vec_pack<T: sealed::VectorPack<U>, U>(a: T, b: U) -> T::Result {
+    a.vec_pack(b)
+}
+
+/// Vector Pack Saturated
+#[inline]
+#[target_feature(enable = "vector")]
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+pub unsafe fn vec_packs<T: sealed::VectorPacks<U>, U>(a: T, b: U) -> T::Result {
+    a.vec_packs(b)
+}
+
+/// Vector Pack Saturated Condition Code
+#[inline]
+#[target_feature(enable = "vector")]
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+pub unsafe fn vec_packs_cc<T: sealed::VectorPacksCC>(a: T, b: T, c: *mut i32) -> T::Result {
+    let (x, y) = a.vec_packs_cc(b);
+    unsafe { c.write(y) };
+    x
+}
+
+/// Vector Pack Saturated Unsigned
+#[inline]
+#[target_feature(enable = "vector")]
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+pub unsafe fn vec_packsu<T: sealed::VectorPacksu<U>, U>(a: T, b: U) -> T::Result {
+    a.vec_packsu(b)
+}
+
+/// Vector Pack Saturated Unsigned Condition Code
+#[inline]
+#[target_feature(enable = "vector")]
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+pub unsafe fn vec_packsu_cc<T: sealed::VectorPacksuCC>(a: T, b: T, c: *mut i32) -> T::Result {
+    let (x, y) = a.vec_packsu_cc(b);
+    unsafe { c.write(y) };
+    x
+}
+
+/// Vector Unpack High
+#[inline]
+#[target_feature(enable = "vector")]
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+pub unsafe fn vec_unpackh<T: sealed::VectorUnpackh>(a: T) -> <T as sealed::VectorUnpackh>::Result {
+    a.vec_unpackh()
+}
+
+/// Vector Unpack Low
+#[inline]
+#[target_feature(enable = "vector")]
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+pub unsafe fn vec_unpackl<T: sealed::VectorUnpackl>(a: T) -> <T as sealed::VectorUnpackl>::Result {
+    a.vec_unpackl()
+}
+
+/// Merges the least significant ("low") halves of two vectors.
+#[inline]
+#[target_feature(enable = "vector")]
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+pub unsafe fn vec_mergel<T: sealed::VectorMergel>(a: T, b: T) -> T {
+    a.vec_mergel(b)
+}
+
+/// Generates byte masks for elements in the return vector. For each bit in a, if the bit is one, all bit positions
+/// in the corresponding byte element of d are set to ones. Otherwise, if the bit is zero, the corresponding byte element is set to zero.
+#[inline]
+#[target_feature(enable = "vector")]
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+#[cfg_attr(test, assert_instr(vgbm, MASK = 0x00FF))]
+pub unsafe fn vec_genmask<const MASK: u16>() -> vector_unsigned_char {
+    vector_unsigned_char(const { genmask::<MASK>() })
+}
+
+/// Vector Generate Mask (Byte)
+#[inline]
+#[target_feature(enable = "vector")]
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+#[cfg_attr(test, assert_instr(vrepib, L = 3, H = 5))]
+pub unsafe fn vec_genmasks_8<const L: u8, const H: u8>() -> vector_unsigned_char {
+    vector_unsigned_char(const { [genmasks(u8::BITS, L, H) as u8; 16] })
+}
+
+/// Vector Generate Mask (Halfword)
+#[inline]
+#[target_feature(enable = "vector")]
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+#[cfg_attr(test, assert_instr(vrepih, L = 3, H = 5))]
+pub unsafe fn vec_genmasks_16<const L: u8, const H: u8>() -> vector_unsigned_short {
+    vector_unsigned_short(const { [genmasks(u16::BITS, L, H) as u16; 8] })
+}
+
+/// Vector Generate Mask (Word)
+#[inline]
+#[target_feature(enable = "vector")]
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+#[cfg_attr(test, assert_instr(vgmf, L = 3, H = 5))]
+pub unsafe fn vec_genmasks_32<const L: u8, const H: u8>() -> vector_unsigned_int {
+    vector_unsigned_int(const { [genmasks(u32::BITS, L, H) as u32; 4] })
+}
+
+/// Vector Generate Mask (Doubleword)
+#[inline]
+#[target_feature(enable = "vector")]
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+#[cfg_attr(test, assert_instr(vgmg, L = 3, H = 5))]
+pub unsafe fn vec_genmasks_64<const L: u8, const H: u8>() -> vector_unsigned_long_long {
+    vector_unsigned_long_long(const { [genmasks(u64::BITS, L, H); 2] })
+}
+
+/// Returns a vector that contains some elements of two vectors, in the order specified by a third vector.
+/// Each byte of the result is selected by using the least significant 5 bits of the corresponding byte of c as an index into the concatenated bytes of a and b.
+/// Note: The vector generate mask built-in function [`vec_genmask`] could help generate the mask c.
+#[inline]
+#[target_feature(enable = "vector")]
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+pub unsafe fn vec_perm<T: sealed::VectorPerm>(a: T, b: T, c: vector_unsigned_char) -> T {
+    a.vec_perm(b, c)
+}
+
+/// Vector Sum Across Quadword
+///
+/// Returns a vector containing the results of performing a sum across all the elements in each of the quadword of vector a,
+/// and the rightmost word or doubleword element of the b. The result is an unsigned 128-bit integer.
+#[inline]
+#[target_feature(enable = "vector")]
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+pub unsafe fn vec_sum_u128<T: sealed::VectorSumU128>(a: T, b: T) -> vector_unsigned_char {
+    a.vec_sum_u128(b)
+}
+
+/// Vector Sum Across Doubleword
+///
+/// Returns a vector containing the results of performing a sum across all the elements in each of the doubleword of vector a,
+/// and the rightmost sub-element of the corresponding doubleword of b.
+#[inline]
+#[target_feature(enable = "vector")]
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+pub unsafe fn vec_sum2<T: sealed::VectorSum2>(a: T, b: T) -> vector_unsigned_long_long {
+    a.vec_sum2(b)
+}
+
+/// Vector Sum Across Word
+///
+/// Returns a vector containing the results of performing a sum across all the elements in each of the word of vector a,
+/// and the rightmost sub-element of the corresponding word of b.
+#[inline]
+#[target_feature(enable = "vector")]
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+pub unsafe fn vec_sum4<T: sealed::VectorSum4>(a: T, b: T) -> vector_unsigned_int {
+    a.vec_sum4(b)
+}
+
+/// Vector Addition unsigned 128-bits
+///
+/// Adds unsigned quadword values.
+///
+/// This function operates on the vectors as 128-bit unsigned integers. It returns low 128 bits of a + b.
+#[inline]
+#[target_feature(enable = "vector")]
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+#[cfg_attr(test, assert_instr(vaq))]
+pub unsafe fn vec_add_u128(
+    a: vector_unsigned_char,
+    b: vector_unsigned_char,
+) -> vector_unsigned_char {
+    let a: u128 = transmute(a);
+    let b: u128 = transmute(b);
+    transmute(a.wrapping_add(b))
+}
+
+/// Vector Subtract unsigned 128-bits
+///
+/// Subtracts unsigned quadword values.
+///
+/// This function operates on the vectors as 128-bit unsigned integers. It returns low 128 bits of a - b.
+#[inline]
+#[target_feature(enable = "vector")]
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+#[cfg_attr(test, assert_instr(vsq))]
+pub unsafe fn vec_sub_u128(
+    a: vector_unsigned_char,
+    b: vector_unsigned_char,
+) -> vector_unsigned_char {
+    let a: u128 = transmute(a);
+    let b: u128 = transmute(b);
+
+    transmute(a.wrapping_sub(b))
+}
+
+/// Vector Subtract Carryout
+///
+/// Returns a vector containing the borrow produced by subtracting each of corresponding elements of b from a.
+///
+/// On each resulting element, the value is 0 if a borrow occurred, or 1 if no borrow occurred.
+#[inline]
+#[target_feature(enable = "vector")]
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+pub unsafe fn vec_subc<T: sealed::VectorSubc<U>, U>(a: T, b: U) -> T::Result {
+    a.vec_subc(b)
+}
+
+/// Gets the carry bit of the 128-bit subtraction of two quadword values.
+/// This function operates on the vectors as 128-bit unsigned integers. It returns a vector containing the borrow produced by subtracting b from a, as unsigned 128-bits integers.
+/// If no borrow occurred, the bit 127 of d is 1; otherwise it is set to 0. All other bits of d are 0.
+#[inline]
+#[target_feature(enable = "vector")]
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+#[cfg_attr(test, assert_instr(vscbiq))]
+pub unsafe fn vec_subc_u128(
+    a: vector_unsigned_char,
+    b: vector_unsigned_char,
+) -> vector_unsigned_char {
+    // FIXME(llvm) sadly this does not work https://github.com/llvm/llvm-project/issues/129608
+    // let a: u128 = transmute(a);
+    // let b: u128 = transmute(b);
+    // transmute(!a.overflowing_sub(b).1 as u128)
+    transmute(vscbiq(transmute(a), transmute(b)))
+}
+
+/// Vector Add Compute Carryout unsigned 128-bits
+#[inline]
+#[target_feature(enable = "vector")]
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+#[cfg_attr(test, assert_instr(vaccq))]
+pub unsafe fn vec_addc_u128(
+    a: vector_unsigned_char,
+    b: vector_unsigned_char,
+) -> vector_unsigned_char {
+    let a: u128 = transmute(a);
+    let b: u128 = transmute(b);
+    transmute(a.overflowing_add(b).1 as u128)
+}
+
+/// Vector Add With Carry unsigned 128-bits
+#[inline]
+#[target_feature(enable = "vector")]
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+#[cfg_attr(test, assert_instr(vacq))]
+pub unsafe fn vec_adde_u128(
+    a: vector_unsigned_char,
+    b: vector_unsigned_char,
+    c: vector_unsigned_char,
+) -> vector_unsigned_char {
+    let a: u128 = transmute(a);
+    let b: u128 = transmute(b);
+    let c: u128 = transmute(c);
+    // FIXME(llvm) sadly this does not work
+    //     let (d, _carry) = a.carrying_add(b, c & 1 != 0);
+    //     transmute(d)
+    transmute(vacq(a, b, c))
+}
+
+/// Vector Add With Carry Compute Carry unsigned 128-bits
+#[inline]
+#[target_feature(enable = "vector")]
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+#[cfg_attr(test, assert_instr(vacccq))]
+pub unsafe fn vec_addec_u128(
+    a: vector_unsigned_char,
+    b: vector_unsigned_char,
+    c: vector_unsigned_char,
+) -> vector_unsigned_char {
+    let a: u128 = transmute(a);
+    let b: u128 = transmute(b);
+    let c: u128 = transmute(c);
+    let (_d, carry) = a.carrying_add(b, c & 1 != 0);
+    transmute(carry as u128)
+}
+
+/// Subtracts unsigned quadword values with carry bit from a previous operation.
+///
+/// This function operates on the vectors as 128-bit unsigned integers. It returns a vector containing the result of subtracting of b from a,
+/// and the carryout bit from a previous operation.
+///
+/// Note: Only the borrow indication bit (127-bit) of c is used, and the other bits are ignored.
+#[inline]
+#[target_feature(enable = "vector")]
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+#[cfg_attr(test, assert_instr(vsbiq))]
+pub unsafe fn vec_sube_u128(
+    a: vector_unsigned_char,
+    b: vector_unsigned_char,
+    c: vector_unsigned_char,
+) -> vector_unsigned_char {
+    transmute(vsbiq(transmute(a), transmute(b), transmute(c)))
+}
+
+/// Vector Subtract with Carryout, Carryout
+///
+/// Gets the carry bit of the 128-bit subtraction of two quadword values with carry bit from the previous operation.
+///
+/// It returns a vector containing the carryout produced from the result of subtracting of b from a,
+/// and the carryout bit from a previous operation. If no borrow occurred, the 127-bit of d is 1, otherwise 0.
+/// All other bits of d are 0.
+///
+/// Note: Only the borrow indication bit (127-bit) of c is used, and the other bits are ignored.
+#[inline]
+#[target_feature(enable = "vector")]
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+#[cfg_attr(test, assert_instr(vsbcbiq))]
+pub unsafe fn vec_subec_u128(
+    a: vector_unsigned_char,
+    b: vector_unsigned_char,
+    c: vector_unsigned_char,
+) -> vector_unsigned_char {
+    transmute(vsbcbiq(transmute(a), transmute(b), transmute(c)))
+}
+
+/// Vector Splat Signed Byte
+#[inline]
+#[target_feature(enable = "vector")]
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+#[cfg_attr(test, assert_instr(vrepib, IMM = 42))]
+pub unsafe fn vec_splat_s8<const IMM: i8>() -> vector_signed_char {
+    vector_signed_char([IMM; 16])
+}
+
+/// Vector Splat Signed Halfword
+#[inline]
+#[target_feature(enable = "vector")]
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+#[cfg_attr(test, assert_instr(vrepih, IMM = 42))]
+pub unsafe fn vec_splat_s16<const IMM: i16>() -> vector_signed_short {
+    vector_signed_short([IMM as i16; 8])
+}
+
+/// Vector Splat Signed Word
+#[inline]
+#[target_feature(enable = "vector")]
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+#[cfg_attr(test, assert_instr(vrepif, IMM = 42))]
+pub unsafe fn vec_splat_s32<const IMM: i16>() -> vector_signed_int {
+    vector_signed_int([IMM as i32; 4])
+}
+
+/// Vector Splat Signed Doubleword
+#[inline]
+#[target_feature(enable = "vector")]
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+#[cfg_attr(test, assert_instr(vrepig, IMM = 42))]
+pub unsafe fn vec_splat_s64<const IMM: i16>() -> vector_signed_long_long {
+    vector_signed_long_long([IMM as i64; 2])
+}
+
+/// Vector Splat Unsigned Byte
+#[inline]
+#[target_feature(enable = "vector")]
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+#[cfg_attr(test, assert_instr(vrepib, IMM = 42))]
+pub unsafe fn vec_splat_u8<const IMM: u8>() -> vector_unsigned_char {
+    vector_unsigned_char([IMM; 16])
+}
+
+/// Vector Splat Unsigned Halfword
+#[inline]
+#[target_feature(enable = "vector")]
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+#[cfg_attr(test, assert_instr(vrepih, IMM = 42))]
+pub unsafe fn vec_splat_u16<const IMM: i16>() -> vector_unsigned_short {
+    vector_unsigned_short([IMM as u16; 8])
+}
+
+/// Vector Splat Unsigned Word
+#[inline]
+#[target_feature(enable = "vector")]
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+#[cfg_attr(test, assert_instr(vrepif, IMM = 42))]
+pub unsafe fn vec_splat_u32<const IMM: i16>() -> vector_unsigned_int {
+    vector_unsigned_int([IMM as u32; 4])
+}
+
+/// Vector Splat Unsigned Doubleword
+#[inline]
+#[target_feature(enable = "vector")]
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+#[cfg_attr(test, assert_instr(vrepig, IMM = 42))]
+pub unsafe fn vec_splat_u64<const IMM: i16>() -> vector_unsigned_long_long {
+    vector_unsigned_long_long([IMM as u64; 2])
+}
+
+macro_rules! vec_find_any {
+    ($($Trait:ident $fun:ident)*) => {
+        $(
+            #[inline]
+            #[target_feature(enable = "vector")]
+            #[unstable(feature = "stdarch_s390x", issue = "135681")]
+            pub unsafe fn $fun<T: sealed::$Trait<U>, U>(a: T, b: U) -> T::Result {
+                a.$fun(b)
+            }
+        )*
+    }
+}
+
+vec_find_any! {
+    VectorFindAnyEq vec_find_any_eq
+    VectorFindAnyNe vec_find_any_ne
+    VectorFindAnyEqIdx vec_find_any_eq_idx
+    VectorFindAnyNeIdx vec_find_any_ne_idx
+    VectorFindAnyEqOrZeroIdx vec_find_any_eq_or_0_idx
+    VectorFindAnyNeOrZeroIdx vec_find_any_ne_or_0_idx
+}
+
+macro_rules! vec_find_any_cc {
+    ($($Trait:ident $fun:ident)*) => {
+        $(
+            #[inline]
+            #[target_feature(enable = "vector")]
+            #[unstable(feature = "stdarch_s390x", issue = "135681")]
+            pub unsafe fn $fun<T: sealed::$Trait<U>, U>(a: T, b: U, c: *mut i32) -> T::Result {
+                let (x, y) = a.$fun(b);
+                unsafe { c.write(y) };
+                x
+            }
+        )*
+    }
+}
+
+vec_find_any_cc! {
+    VectorFindAnyEqCC vec_find_any_eq_cc
+    VectorFindAnyNeCC vec_find_any_ne_cc
+    VectorFindAnyEqIdxCC vec_find_any_eq_idx_cc
+    VectorFindAnyNeIdxCC vec_find_any_ne_idx_cc
+    VectorFindAnyEqOrZeroIdxCC vec_find_any_eq_or_0_idx_cc
+    VectorFindAnyNeOrZeroIdxCC vec_find_any_ne_or_0_idx_cc
+}
+
+/// Vector Load
+#[inline]
+#[target_feature(enable = "vector")]
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+pub unsafe fn vec_xl<T: sealed::VectorLoad>(offset: isize, ptr: *const T::ElementType) -> T {
+    T::vec_xl(offset, ptr)
+}
+
+/// Vector Load Pair
+#[inline]
+#[target_feature(enable = "vector")]
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+pub unsafe fn vec_load_pair<T: sealed::VectorLoadPair>(a: T::ElementType, b: T::ElementType) -> T {
+    T::vec_load_pair(a, b)
+}
+
+/// Vector Load to Block Boundary
+#[inline]
+#[target_feature(enable = "vector")]
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+pub unsafe fn vec_load_bndry<T: sealed::VectorLoad, const BLOCK_BOUNDARY: u16>(
+    ptr: *const T::ElementType,
+) -> MaybeUninit<T> {
+    T::vec_load_bndry::<BLOCK_BOUNDARY>(ptr)
+}
+
+/// Vector Store
+#[inline]
+#[target_feature(enable = "vector")]
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+pub unsafe fn vec_xst<T: sealed::VectorStore>(vector: T, offset: isize, ptr: *mut T::ElementType) {
+    vector.vec_xst(offset, ptr)
+}
+
+/// Vector Load with Length
+#[inline]
+#[target_feature(enable = "vector")]
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+pub unsafe fn vec_load_len<T: sealed::VectorLoad>(
+    ptr: *const T::ElementType,
+    byte_count: u32,
+) -> T {
+    T::vec_load_len(ptr, byte_count)
+}
+
+/// Vector Store with Length
+#[inline]
+#[target_feature(enable = "vector")]
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+pub unsafe fn vec_store_len<T: sealed::VectorStore>(
+    vector: T,
+    ptr: *mut T::ElementType,
+    byte_count: u32,
+) {
+    vector.vec_store_len(ptr, byte_count)
+}
+
+/// Vector Load Rightmost with Length
+#[inline]
+#[target_feature(enable = "vector-packed-decimal")]
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+#[cfg_attr(test, assert_instr(vlrlr))]
+pub unsafe fn vec_load_len_r(ptr: *const u8, byte_count: u32) -> vector_unsigned_char {
+    vlrl(byte_count, ptr)
+}
+
+/// Vector Store Rightmost with Length
+#[inline]
+#[target_feature(enable = "vector-packed-decimal")]
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+#[cfg_attr(test, assert_instr(vstrlr))]
+pub unsafe fn vec_store_len_r(vector: vector_unsigned_char, ptr: *mut u8, byte_count: u32) {
+    vstrl(vector, byte_count, ptr)
+}
+
+/// Vector Multiply Add
+#[inline]
+#[target_feature(enable = "vector-packed-decimal")]
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+pub unsafe fn vec_madd<T: sealed::VectorMadd>(a: T, b: T, c: T) -> T {
+    a.vec_madd(b, c)
+}
+
+/// Vector Multiply Add
+#[inline]
+#[target_feature(enable = "vector-packed-decimal")]
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+pub unsafe fn vec_msub<T: sealed::VectorMadd>(a: T, b: T, c: T) -> T {
+    a.vec_msub(b, c)
+}
+
+/// Vector Multiply and Add Even
+#[inline]
+#[target_feature(enable = "vector-packed-decimal")]
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+pub unsafe fn vec_meadd<T: sealed::VectorMeadd>(a: T, b: T, c: T::Result) -> T::Result {
+    a.vec_meadd(b, c)
+}
+
+/// Vector Multiply and Add Odd
+#[inline]
+#[target_feature(enable = "vector-packed-decimal")]
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+pub unsafe fn vec_moadd<T: sealed::VectorMoadd>(a: T, b: T, c: T::Result) -> T::Result {
+    a.vec_moadd(b, c)
+}
+
+/// Vector Multiply and Add High
+#[inline]
+#[target_feature(enable = "vector-packed-decimal")]
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+pub unsafe fn vec_mhadd<T: sealed::VectorMhadd>(a: T, b: T, c: T::Result) -> T::Result {
+    a.vec_mhadd(b, c)
+}
+
+/// Vector Multiply and Add Low
+#[inline]
+#[target_feature(enable = "vector-packed-decimal")]
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+pub unsafe fn vec_mladd<T: sealed::VectorMladd>(a: T, b: T, c: T::Result) -> T::Result {
+    a.vec_mladd(b, c)
+}
+
+/// Vector Checksum
+#[inline]
 #[target_feature(enable = "vector")]
 #[unstable(feature = "stdarch_s390x", issue = "135681")]
-pub unsafe fn vec_splat<T, const IMM: u32>(a: T) -> T
-where
-    T: sealed::VectorSplat,
-{
-    a.vec_splat::<IMM>()
+#[cfg_attr(test, assert_instr(vcksm))]
+pub unsafe fn vec_checksum(a: vector_unsigned_int, b: vector_unsigned_int) -> vector_unsigned_int {
+    vcksm(a, b)
 }
 
-/// Vector splats.
+/// Vector Multiply Even
 #[inline]
 #[target_feature(enable = "vector")]
 #[unstable(feature = "stdarch_s390x", issue = "135681")]
-pub unsafe fn vec_splats<T, U>(a: T) -> U
-where
-    T: sealed::VectorSplats<U>,
-{
-    a.vec_splats()
+pub unsafe fn vec_mule<T: sealed::VectorMule<U>, U>(a: T, b: T) -> U {
+    a.vec_mule(b)
 }
 
-/// Vector and
+/// Vector Multiply Odd
 #[inline]
 #[target_feature(enable = "vector")]
 #[unstable(feature = "stdarch_s390x", issue = "135681")]
-pub unsafe fn vec_and<T, U>(a: T, b: U) -> <T as sealed::VectorAnd<U>>::Result
-where
-    T: sealed::VectorAnd<U>,
-{
-    a.vec_and(b)
+pub unsafe fn vec_mulo<T: sealed::VectorMulo<U>, U>(a: T, b: T) -> U {
+    a.vec_mulo(b)
 }
 
-/// Vector or
+/// Vector Multiply High
 #[inline]
 #[target_feature(enable = "vector")]
 #[unstable(feature = "stdarch_s390x", issue = "135681")]
-pub unsafe fn vec_or<T, U>(a: T, b: U) -> <T as sealed::VectorOr<U>>::Result
-where
-    T: sealed::VectorOr<U>,
-{
-    a.vec_or(b)
+pub unsafe fn vec_mulh<T: sealed::VectorMulh<U>, U>(a: T, b: T) -> U {
+    a.vec_mulh(b)
 }
 
-/// Vector xor
+/// Vector Galois Field Multiply Sum
 #[inline]
 #[target_feature(enable = "vector")]
 #[unstable(feature = "stdarch_s390x", issue = "135681")]
-pub unsafe fn vec_xor<T, U>(a: T, b: U) -> <T as sealed::VectorXor<U>>::Result
-where
-    T: sealed::VectorXor<U>,
-{
-    a.vec_xor(b)
+pub unsafe fn vec_gfmsum<T: sealed::VectorGfmsum<U>, U>(a: T, b: T) -> U {
+    a.vec_gfmsum(b)
 }
 
-/// Vector nor
+/// Vector Galois Field Multiply Sum
 #[inline]
 #[target_feature(enable = "vector")]
 #[unstable(feature = "stdarch_s390x", issue = "135681")]
-pub unsafe fn vec_nor<T, U>(a: T, b: U) -> <T as sealed::VectorNor<U>>::Result
-where
-    T: sealed::VectorNor<U>,
-{
-    a.vec_nor(b)
+pub unsafe fn vec_gfmsum_accum<T: sealed::VectorGfmsumAccum>(
+    a: T,
+    b: T,
+    c: T::Result,
+) -> T::Result {
+    a.vec_gfmsum_accum(b, c)
 }
 
-/// Vector nand
+/// Vector Galois Field Multiply Sum 128-bits
 #[inline]
 #[target_feature(enable = "vector")]
 #[unstable(feature = "stdarch_s390x", issue = "135681")]
-pub unsafe fn vec_nand<T, U>(a: T, b: U) -> <T as sealed::VectorNand<U>>::Result
-where
-    T: sealed::VectorNand<U>,
-{
-    a.vec_nand(b)
+#[cfg_attr(test, assert_instr(vgfmg))]
+pub unsafe fn vec_gfmsum_128(
+    a: vector_unsigned_long_long,
+    b: vector_unsigned_long_long,
+) -> vector_unsigned_char {
+    transmute(vgfmg(a, b))
 }
 
-/// Vector xnor
+/// Vector Galois Field Multiply Sum and Accumulate 128-bits
 #[inline]
 #[target_feature(enable = "vector")]
 #[unstable(feature = "stdarch_s390x", issue = "135681")]
-pub unsafe fn vec_eqv<T, U>(a: T, b: U) -> <T as sealed::VectorEqv<U>>::Result
-where
-    T: sealed::VectorEqv<U>,
-{
-    a.vec_eqv(b)
+#[cfg_attr(test, assert_instr(vgfmag))]
+pub unsafe fn vec_gfmsum_accum_128(
+    a: vector_unsigned_long_long,
+    b: vector_unsigned_long_long,
+    c: vector_unsigned_char,
+) -> vector_unsigned_char {
+    transmute(vgfmag(a, b, transmute(c)))
 }
 
-/// Vector andc.
+/// Vector Bit Permute
+#[inline]
+#[target_feature(enable = "vector-enhancements-1")]
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+#[cfg_attr(test, assert_instr(vbperm))]
+pub unsafe fn vec_bperm_u128(
+    a: vector_unsigned_char,
+    b: vector_unsigned_char,
+) -> vector_unsigned_long_long {
+    vbperm(a, b)
+}
+
+/// Vector Gather Element
 #[inline]
 #[target_feature(enable = "vector")]
 #[unstable(feature = "stdarch_s390x", issue = "135681")]
-pub unsafe fn vec_andc<T, U>(a: T, b: U) -> <T as sealed::VectorAndc<U>>::Result
-where
-    T: sealed::VectorAndc<U>,
-{
-    a.vec_andc(b)
+pub unsafe fn vec_gather_element<T: sealed::VectorGatherElement, const D: u32>(
+    a: T,
+    b: T::Offset,
+    c: *const T::Element,
+) -> T {
+    a.vec_gather_element::<D>(b, c)
 }
 
-/// Vector OR with Complement
-///
-/// ## Purpose
-/// Performs a bitwise OR of the first vector with the bitwise-complemented second vector.
+/// Vector Select
+#[inline]
+#[target_feature(enable = "vector")]
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+pub unsafe fn vec_sel<T: sealed::VectorSel<U>, U>(a: T, b: T, c: U) -> T {
+    a.vec_sel(b, c)
+}
+
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+pub const __VEC_CLASS_FP_ZERO_P: u32 = 1 << 11;
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+pub const __VEC_CLASS_FP_ZERO_N: u32 = 1 << 10;
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+pub const __VEC_CLASS_FP_ZERO: u32 = __VEC_CLASS_FP_ZERO_P | __VEC_CLASS_FP_ZERO_N;
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+pub const __VEC_CLASS_FP_NORMAL_P: u32 = 1 << 9;
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+pub const __VEC_CLASS_FP_NORMAL_N: u32 = 1 << 8;
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+pub const __VEC_CLASS_FP_NORMAL: u32 = __VEC_CLASS_FP_NORMAL_P | __VEC_CLASS_FP_NORMAL_N;
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+pub const __VEC_CLASS_FP_SUBNORMAL_P: u32 = 1 << 7;
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+pub const __VEC_CLASS_FP_SUBNORMAL_N: u32 = 1 << 6;
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+pub const __VEC_CLASS_FP_SUBNORMAL: u32 = __VEC_CLASS_FP_SUBNORMAL_P | __VEC_CLASS_FP_SUBNORMAL_N;
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+pub const __VEC_CLASS_FP_INFINITY_P: u32 = 1 << 5;
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+pub const __VEC_CLASS_FP_INFINITY_N: u32 = 1 << 4;
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+pub const __VEC_CLASS_FP_INFINITY: u32 = __VEC_CLASS_FP_INFINITY_P | __VEC_CLASS_FP_INFINITY_N;
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+pub const __VEC_CLASS_FP_QNAN_P: u32 = 1 << 3;
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+pub const __VEC_CLASS_FP_QNAN_N: u32 = 1 << 2;
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+pub const __VEC_CLASS_FP_QNAN: u32 = __VEC_CLASS_FP_QNAN_P | __VEC_CLASS_FP_QNAN_N;
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+pub const __VEC_CLASS_FP_SNAN_P: u32 = 1 << 1;
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+pub const __VEC_CLASS_FP_SNAN_N: u32 = 1 << 0;
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+pub const __VEC_CLASS_FP_SNAN: u32 = __VEC_CLASS_FP_SNAN_P | __VEC_CLASS_FP_SNAN_N;
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+pub const __VEC_CLASS_FP_NAN: u32 = __VEC_CLASS_FP_QNAN | __VEC_CLASS_FP_SNAN;
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+pub const __VEC_CLASS_FP_NOT_NORMAL: u32 =
+    __VEC_CLASS_FP_NAN | __VEC_CLASS_FP_SUBNORMAL | __VEC_CLASS_FP_ZERO | __VEC_CLASS_FP_INFINITY;
+
+/// Vector Floating-Point Test Data Class
 ///
-/// ## Result value
-/// r is the bitwise OR of a and the bitwise complement of b.
+/// You can use the `__VEC_CLASS_FP_*` constants as the argument for this operand
 #[inline]
 #[target_feature(enable = "vector")]
 #[unstable(feature = "stdarch_s390x", issue = "135681")]
-pub unsafe fn vec_orc<T, U>(a: T, b: U) -> <T as sealed::VectorOrc<U>>::Result
-where
-    T: sealed::VectorOrc<U>,
-{
-    a.vec_orc(b)
+pub unsafe fn vec_fp_test_data_class<T: sealed::VectorFpTestDataClass, const CLASS: u32>(
+    a: T,
+    c: *mut i32,
+) -> T::Result {
+    let (x, y) = a.vec_fp_test_data_class::<CLASS>();
+    c.write(y);
+    x
 }
 
-/// Vector floor.
 #[inline]
 #[target_feature(enable = "vector")]
 #[unstable(feature = "stdarch_s390x", issue = "135681")]
-pub unsafe fn vec_floor<T>(a: T) -> T
-where
-    T: sealed::VectorFloor,
-{
-    a.vec_floor()
+pub unsafe fn vec_all_nan<T: sealed::VectorFpTestDataClass>(a: T) -> i32 {
+    i32::from(a.vec_fp_test_data_class::<__VEC_CLASS_FP_NAN>().1 == 0)
 }
 
-/// Vector ceil.
 #[inline]
 #[target_feature(enable = "vector")]
 #[unstable(feature = "stdarch_s390x", issue = "135681")]
-pub unsafe fn vec_ceil<T>(a: T) -> T
-where
-    T: sealed::VectorCeil,
-{
-    a.vec_ceil()
+pub unsafe fn vec_all_numeric<T: sealed::VectorFpTestDataClass>(a: T) -> i32 {
+    i32::from(a.vec_fp_test_data_class::<__VEC_CLASS_FP_NAN>().1 == 3)
 }
 
-/// Returns a vector containing the truncated values of the corresponding elements of the given vector.
-/// Each element of the result contains the value of the corresponding element of a, truncated to an integral value.
 #[inline]
 #[target_feature(enable = "vector")]
 #[unstable(feature = "stdarch_s390x", issue = "135681")]
-pub unsafe fn vec_trunc<T>(a: T) -> T
-where
-    T: sealed::VectorTrunc,
-{
-    a.vec_trunc()
+pub unsafe fn vec_any_nan<T: sealed::VectorFpTestDataClass>(a: T) -> i32 {
+    i32::from(a.vec_fp_test_data_class::<__VEC_CLASS_FP_NAN>().1 != 3)
 }
 
-/// Returns a vector containing the rounded values to the nearest representable floating-point integer,
-/// using IEEE round-to-nearest rounding, of the corresponding elements of the given vector
 #[inline]
 #[target_feature(enable = "vector")]
 #[unstable(feature = "stdarch_s390x", issue = "135681")]
-pub unsafe fn vec_round<T>(a: T) -> T
-where
-    T: sealed::VectorRound,
-{
-    a.vec_round()
+pub unsafe fn vec_any_numeric<T: sealed::VectorFpTestDataClass>(a: T) -> i32 {
+    i32::from(a.vec_fp_test_data_class::<__VEC_CLASS_FP_NAN>().1 != 0)
 }
 
-/// Returns a vector by using the current rounding mode to round every
-/// floating-point element in the given vector to integer.
+/// Vector Test under Mask
 #[inline]
 #[target_feature(enable = "vector")]
 #[unstable(feature = "stdarch_s390x", issue = "135681")]
-pub unsafe fn vec_roundc<T>(a: T) -> T
-where
-    T: sealed::VectorRoundc,
-{
-    a.vec_roundc()
+pub unsafe fn vec_test_mask<T: sealed::VectorTestMask>(a: T, b: T::Mask) -> i32 {
+    // I can't find much information about this, but this might just be a check for whether the
+    // bitwise and of a and b is non-zero?
+    a.vec_test_mask(b)
 }
 
-/// Returns a vector containing the largest representable floating-point integral values less
-/// than or equal to the values of the corresponding elements of the given vector.
+/// Vector Search String
 #[inline]
 #[target_feature(enable = "vector")]
 #[unstable(feature = "stdarch_s390x", issue = "135681")]
-pub unsafe fn vec_roundm<T>(a: T) -> T
-where
-    T: sealed::VectorFloor,
-{
-    // the IBM docs note
-    //
-    // > vec_roundm provides the same functionality as vec_floor, except that vec_roundz would not trigger the IEEE-inexact exception.
-    //
-    // but in practice `vec_floor` also does not trigger that exception, so both are equivalent
-    a.vec_floor()
+pub unsafe fn vec_search_string_cc<T: sealed::VectorSearchString>(
+    a: T,
+    b: T,
+    c: vector_unsigned_char,
+    d: *mut i32,
+) -> vector_unsigned_char {
+    let (x, y) = a.vec_search_string_cc(b, c);
+    unsafe { d.write(y) };
+    x
 }
 
-/// Returns a vector containing the smallest representable floating-point integral values greater
-/// than or equal to the values of the corresponding elements of the given vector.
+/// Vector Search String Until Zero
 #[inline]
 #[target_feature(enable = "vector")]
 #[unstable(feature = "stdarch_s390x", issue = "135681")]
-pub unsafe fn vec_roundp<T>(a: T) -> T
-where
-    T: sealed::VectorCeil,
-{
-    // the IBM docs note
-    //
-    // > vec_roundp provides the same functionality as vec_ceil, except that vec_roundz would not trigger the IEEE-inexact exception.
-    //
-    // but in practice `vec_ceil` also does not trigger that exception, so both are equivalent
-    a.vec_ceil()
+pub unsafe fn vec_search_string_until_zero_cc<T: sealed::VectorSearchString>(
+    a: T,
+    b: T,
+    c: vector_unsigned_char,
+    d: *mut i32,
+) -> vector_unsigned_char {
+    let (x, y) = a.vec_search_string_until_zero_cc(b, c);
+    unsafe { d.write(y) };
+    x
 }
 
-/// Returns a vector containing the truncated values of the corresponding elements of the given vector.
-/// Each element of the result contains the value of the corresponding element of a, truncated to an integral value.
+/// Vector Convert from float (even elements) to double
+#[inline]
+#[target_feature(enable = "vector-enhancements-1")]
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+// FIXME: this emits `vflls` where `vldeb` is expected
+// #[cfg_attr(all(test, target_feature = "vector-enhancements-1"), assert_instr(vldeb))]
+pub unsafe fn vec_doublee(a: vector_float) -> vector_double {
+    let even = simd_shuffle::<_, _, f32x2>(a, a, const { u32x2::from_array([0, 2]) });
+    simd_as(even)
+}
+
+/// Vector Convert from double to float (even elements)
+#[inline]
+#[target_feature(enable = "vector-enhancements-1")]
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+// FIXME: the C version uses a shuffle mask with poison; we can't do that
+// #[cfg_attr(all(test, target_feature = "vector-enhancements-1"), assert_instr(vledb))]
+pub unsafe fn vec_floate(a: vector_double) -> vector_float {
+    let truncated: f32x2 = simd_as(a);
+    simd_shuffle(
+        truncated,
+        truncated,
+        const { u32x4::from_array([0, 0, 1, 1]) },
+    )
+}
+
+/// Vector Convert from int to float
 #[inline]
 #[target_feature(enable = "vector")]
 #[unstable(feature = "stdarch_s390x", issue = "135681")]
-pub unsafe fn vec_roundz<T>(a: T) -> T
-where
-    T: sealed::VectorTrunc,
-{
-    // the IBM docs note
-    //
-    // > vec_roundz provides the same functionality as vec_trunc, except that vec_roundz would not trigger the IEEE-inexact exception.
-    //
-    // but in practice `vec_trunc` also does not trigger that exception, so both are equivalent
-    a.vec_trunc()
+pub unsafe fn vec_float(a: impl sealed::VectorFloat) -> vector_float {
+    a.vec_float()
 }
 
-/// Returns a vector by using the current rounding mode to round every floating-point element in the given vector to integer.
+/// Vector Convert from long long to double
 #[inline]
 #[target_feature(enable = "vector")]
 #[unstable(feature = "stdarch_s390x", issue = "135681")]
-pub unsafe fn vec_rint<T>(a: T) -> T
-where
-    T: sealed::VectorRint,
-{
-    a.vec_rint()
+pub unsafe fn vec_double(a: impl sealed::VectorDouble) -> vector_double {
+    a.vec_double()
 }
 
-/// Vector Shift Left
+/// Vector Sign Extend to Doubleword
 #[inline]
 #[target_feature(enable = "vector")]
 #[unstable(feature = "stdarch_s390x", issue = "135681")]
-pub unsafe fn vec_sl<T, U>(a: T, b: U) -> <T as sealed::VectorSl<U>>::Result
-where
-    T: sealed::VectorSl<U>,
-{
-    a.vec_sl(b)
+pub unsafe fn vec_extend_s64(a: impl sealed::VectorExtendSigned64) -> vector_signed_long_long {
+    a.vec_extend_s64()
 }
 
-/// Vector Shift Right
+/// Vector Convert floating point to signed
+#[inline]
+#[target_feature(enable = "vector")]
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+pub unsafe fn vec_signed<T: sealed::VectorSigned>(a: T) -> T::Result {
+    a.vec_signed()
+}
+
+/// Vector Convert floating point to unsigned
+#[inline]
+#[target_feature(enable = "vector")]
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+pub unsafe fn vec_unsigned<T: sealed::VectorUnsigned>(a: T) -> T::Result {
+    a.vec_unsigned()
+}
+
+/// Vector Copy Until Zero
+#[inline]
+#[target_feature(enable = "vector")]
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+pub unsafe fn vec_cp_until_zero<T: sealed::VectorCopyUntilZero>(a: T) -> T {
+    a.vec_cp_until_zero()
+}
+
+/// Vector Copy Until Zero
+#[inline]
+#[target_feature(enable = "vector")]
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+pub unsafe fn vec_cp_until_zero_cc<T: sealed::VectorCopyUntilZeroCC>(a: T, cc: *mut i32) -> T {
+    let (x, y) = a.vec_cp_until_zero_cc();
+    unsafe { cc.write(y) };
+    x
+}
+
+/// Vector Multiply Sum Logical
+#[inline]
+#[target_feature(enable = "vector-enhancements-1")]
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+#[cfg_attr(
+    all(test, target_feature = "vector-enhancements-1"),
+    assert_instr(vmslg, D = 4)
+)]
+pub unsafe fn vec_msum_u128<const D: u32>(
+    a: vector_unsigned_long_long,
+    b: vector_unsigned_long_long,
+    c: vector_unsigned_char,
+) -> vector_unsigned_char {
+    const {
+        if !matches!(D, 0 | 4 | 8 | 12) {
+            panic!("D needs to be one of 0, 4, 8, 12");
+        }
+    };
+    transmute(vmslg(a, b, transmute(c), D))
+}
+
+/// Vector Shift Left Double by Byte
+#[inline]
+#[target_feature(enable = "vector")]
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+pub unsafe fn vec_sld<T: sealed::VectorSld, const C: u32>(a: T, b: T) -> T {
+    static_assert_uimm_bits!(C, 4);
+    a.vec_sld::<C>(b)
+}
+
+/// Vector Shift Left Double by Word
+#[inline]
+#[target_feature(enable = "vector")]
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+pub unsafe fn vec_sldw<T: sealed::VectorSld, const C: u32>(a: T, b: T) -> T {
+    static_assert_uimm_bits!(C, 2);
+    a.vec_sldw::<C>(b)
+}
+
+/// Vector Shift Left Double by Bit
+#[inline]
+#[target_feature(enable = "vector-enhancements-2")]
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+pub unsafe fn vec_sldb<T: sealed::VectorSld, const C: u32>(a: T, b: T) -> T {
+    static_assert_uimm_bits!(C, 3);
+    a.vec_sldb::<C>(b)
+}
+
+/// Vector Shift Right Double by Bit
+#[inline]
+#[target_feature(enable = "vector-enhancements-2")]
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+pub unsafe fn vec_srdb<T: sealed::VectorSrdb, const C: u32>(a: T, b: T) -> T {
+    static_assert_uimm_bits!(C, 3);
+    a.vec_srdb::<C>(b)
+}
+
+/// Vector Compare Ranges
+#[inline]
+#[target_feature(enable = "vector")]
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+pub unsafe fn vec_cmprg<T: sealed::VectorCompareRange>(a: T, b: T, c: T) -> T::Result {
+    a.vstrc::<{ FindImm::Eq as u32 }>(b, c)
+}
+
+/// Vector Compare Not in Ranges
+#[inline]
+#[target_feature(enable = "vector")]
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+pub unsafe fn vec_cmpnrg<T: sealed::VectorCompareRange>(a: T, b: T, c: T) -> T::Result {
+    a.vstrc::<{ FindImm::Ne as u32 }>(b, c)
+}
+
+/// Vector Compare Ranges Index
+#[inline]
+#[target_feature(enable = "vector")]
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+pub unsafe fn vec_cmprg_idx<T: sealed::VectorCompareRange>(a: T, b: T, c: T) -> T::Result {
+    a.vstrc::<{ FindImm::EqIdx as u32 }>(b, c)
+}
+
+/// Vector Compare Not in Ranges Index
+#[inline]
+#[target_feature(enable = "vector")]
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+pub unsafe fn vec_cmpnrg_idx<T: sealed::VectorCompareRange>(a: T, b: T, c: T) -> T::Result {
+    a.vstrc::<{ FindImm::NeIdx as u32 }>(b, c)
+}
+
+/// Vector Compare Ranges with Condition Code
+#[inline]
+#[target_feature(enable = "vector")]
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+pub unsafe fn vec_cmprg_cc<T: sealed::VectorCompareRange>(
+    a: T,
+    b: T,
+    c: T,
+    d: *mut i32,
+) -> T::Result {
+    let (x, y) = a.vstrcs::<{ FindImm::Eq as u32 }>(b, c);
+    d.write(y);
+    x
+}
+
+/// Vector Compare Not in Ranges with Condition Code
+#[inline]
+#[target_feature(enable = "vector")]
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+pub unsafe fn vec_cmpnrg_cc<T: sealed::VectorCompareRange>(
+    a: T,
+    b: T,
+    c: T,
+    d: *mut i32,
+) -> T::Result {
+    let (x, y) = a.vstrcs::<{ FindImm::Ne as u32 }>(b, c);
+    d.write(y);
+    x
+}
+
+/// Vector Compare Ranges Index with Condition Code
 #[inline]
 #[target_feature(enable = "vector")]
 #[unstable(feature = "stdarch_s390x", issue = "135681")]
-pub unsafe fn vec_sr<T, U>(a: T, b: U) -> <T as sealed::VectorSr<U>>::Result
-where
-    T: sealed::VectorSr<U>,
-{
-    a.vec_sr(b)
+pub unsafe fn vec_cmprg_idx_cc<T: sealed::VectorCompareRange>(
+    a: T,
+    b: T,
+    c: T,
+    d: *mut i32,
+) -> T::Result {
+    let (x, y) = a.vstrcs::<{ FindImm::EqIdx as u32 }>(b, c);
+    d.write(y);
+    x
 }
 
-/// Vector Shift Right Algebraic
+/// Vector Compare Not in Ranges Index with Condition Code
 #[inline]
 #[target_feature(enable = "vector")]
 #[unstable(feature = "stdarch_s390x", issue = "135681")]
-pub unsafe fn vec_sra<T, U>(a: T, b: U) -> <T as sealed::VectorSra<U>>::Result
-where
-    T: sealed::VectorSra<U>,
-{
-    a.vec_sra(b)
+pub unsafe fn vec_cmpnrg_idx_cc<T: sealed::VectorCompareRange>(
+    a: T,
+    b: T,
+    c: T,
+    d: *mut i32,
+) -> T::Result {
+    let (x, y) = a.vstrcs::<{ FindImm::NeIdx as u32 }>(b, c);
+    d.write(y);
+    x
 }
 
-/// Vector Shift Left by Byte
-#[inline]
+/// Vector Compare Ranges or Zero Index#[inline]
 #[target_feature(enable = "vector")]
 #[unstable(feature = "stdarch_s390x", issue = "135681")]
-pub unsafe fn vec_slb<T, U>(a: T, b: U) -> <T as sealed::VectorSlb<U>>::Result
-where
-    T: sealed::VectorSlb<U>,
-{
-    a.vec_slb(b)
+pub unsafe fn vec_cmprg_or_0_idx<T: sealed::VectorCompareRange>(a: T, b: T, c: T) -> T::Result {
+    a.vstrcz::<{ FindImm::EqIdx as u32 }>(b, c)
 }
 
-/// Vector Shift Right by Byte
+/// Vector Compare Not in Ranges or Zero Index
 #[inline]
 #[target_feature(enable = "vector")]
 #[unstable(feature = "stdarch_s390x", issue = "135681")]
-pub unsafe fn vec_srb<T, U>(a: T, b: U) -> <T as sealed::VectorSrb<U>>::Result
-where
-    T: sealed::VectorSrb<U>,
-{
-    a.vec_srb(b)
+pub unsafe fn vec_cmpnrg_or_0_idx<T: sealed::VectorCompareRange>(a: T, b: T, c: T) -> T::Result {
+    a.vstrcz::<{ FindImm::NeIdx as u32 }>(b, c)
 }
 
-/// Vector Shift Right Algebraic by Byte
+/// Vector Compare Ranges or Zero Index with Condition Code
 #[inline]
 #[target_feature(enable = "vector")]
 #[unstable(feature = "stdarch_s390x", issue = "135681")]
-pub unsafe fn vec_srab<T, U>(a: T, b: U) -> <T as sealed::VectorSrab<U>>::Result
-where
-    T: sealed::VectorSrab<U>,
-{
-    a.vec_srab(b)
+pub unsafe fn vec_cmprg_or_0_idx_cc<T: sealed::VectorCompareRange>(
+    a: T,
+    b: T,
+    c: T,
+    d: *mut i32,
+) -> T::Result {
+    let (x, y) = a.vstrczs::<{ FindImm::EqIdx as u32 }>(b, c);
+    d.write(y);
+    x
 }
 
-/// Vector Element Rotate Left
+/// Vector Compare Not in Ranges or Zero Index with Condition Code
 #[inline]
 #[target_feature(enable = "vector")]
 #[unstable(feature = "stdarch_s390x", issue = "135681")]
-pub unsafe fn vec_rl<T, U>(a: T, b: U) -> <T as sealed::VectorRl<U>>::Result
-where
-    T: sealed::VectorRl<U>,
-{
-    a.vec_rl(b)
+pub unsafe fn vec_cmpnrg_or_0_idx_cc<T: sealed::VectorCompareRange>(
+    a: T,
+    b: T,
+    c: T,
+    d: *mut i32,
+) -> T::Result {
+    let (x, y) = a.vstrczs::<{ FindImm::NeIdx as u32 }>(b, c);
+    d.write(y);
+    x
 }
 
-/// Performs a left shift for a vector by a given number of bits. Each element of the result is obtained by shifting the corresponding
-/// element of a left by the number of bits specified by the last 3 bits of every byte of b. The bits that are shifted out are replaced by zeros.
+/// Vector Compare Equal
 #[inline]
 #[target_feature(enable = "vector")]
 #[unstable(feature = "stdarch_s390x", issue = "135681")]
-pub unsafe fn vec_sll<T>(a: T, b: vector_unsigned_char) -> T
-where
-    T: sealed::VectorSll<vector_unsigned_char, Result = T>,
-{
-    a.vec_sll(b)
+pub unsafe fn vec_cmpeq<T: sealed::VectorEquality>(a: T, b: T) -> T::Result {
+    a.vec_cmpeq(b)
 }
 
-/// Performs a right shift for a vector by a given number of bits. Each element of the result is obtained by shifting the corresponding
-/// element of a right by the number of bits specified by the last 3 bits of every byte of b. The bits that are shifted out are replaced by zeros.
+/// Vector Compare Not Equal
 #[inline]
 #[target_feature(enable = "vector")]
 #[unstable(feature = "stdarch_s390x", issue = "135681")]
-pub unsafe fn vec_srl<T>(a: T, b: vector_unsigned_char) -> T
-where
-    T: sealed::VectorSrl<vector_unsigned_char, Result = T>,
-{
-    a.vec_srl(b)
+pub unsafe fn vec_cmpne<T: sealed::VectorEquality>(a: T, b: T) -> T::Result {
+    a.vec_cmpne(b)
 }
 
-/// Performs an algebraic right shift for a vector by a given number of bits. Each element of the result is obtained by shifting the corresponding
-/// element of a right by the number of bits specified by the last 3 bits of every byte of b. The bits that are shifted out are replaced by copies of
-/// the most significant bit of the element of a.
+/// Vector Compare Greater Than
 #[inline]
 #[target_feature(enable = "vector")]
 #[unstable(feature = "stdarch_s390x", issue = "135681")]
-pub unsafe fn vec_sral<T>(a: T, b: vector_unsigned_char) -> T
-where
-    T: sealed::VectorSral<vector_unsigned_char, Result = T>,
-{
-    a.vec_sral(b)
+pub unsafe fn vec_cmpgt<T: sealed::VectorComparePredicate>(a: T, b: T) -> T::Result {
+    a.vec_cmpgt(b)
 }
 
-/// Rotates each element of a vector left by a given number of bits. Each element of the result is obtained by rotating the corresponding element
-/// of a left by the number of bits specified by b, modulo the number of bits in the element.
+/// Vector Compare Greater Than or Equal
 #[inline]
 #[target_feature(enable = "vector")]
 #[unstable(feature = "stdarch_s390x", issue = "135681")]
-pub unsafe fn vec_rli<T>(a: T, bits: core::ffi::c_ulong) -> T
-where
-    T: sealed::VectorRli,
-{
-    a.vec_rli(bits)
+pub unsafe fn vec_cmpge<T: sealed::VectorComparePredicate>(a: T, b: T) -> T::Result {
+    a.vec_cmpge(b)
 }
 
-/// Returns a vector with the elements of the input vector in reversed order.
+/// Vector Compare Less
 #[inline]
 #[target_feature(enable = "vector")]
 #[unstable(feature = "stdarch_s390x", issue = "135681")]
-pub unsafe fn vec_reve<T>(a: T) -> T
-where
-    T: sealed::VectorReve,
-{
-    a.vec_reve()
+pub unsafe fn vec_cmplt<T: sealed::VectorComparePredicate>(a: T, b: T) -> T::Result {
+    a.vec_cmplt(b)
 }
 
-/// Returns a vector where each vector element contains the corresponding byte-reversed vector element of the input vector.
+/// Vector Compare Less Than or Equal
 #[inline]
 #[target_feature(enable = "vector")]
 #[unstable(feature = "stdarch_s390x", issue = "135681")]
-pub unsafe fn vec_revb<T>(a: T) -> T
-where
-    T: sealed::VectorRevb,
-{
-    a.vec_revb()
+pub unsafe fn vec_cmple<T: sealed::VectorComparePredicate>(a: T, b: T) -> T::Result {
+    a.vec_cmple(b)
 }
 
-/// Merges the most significant ("high") halves of two vectors.
+/// Vector Compare Equal Index
 #[inline]
 #[target_feature(enable = "vector")]
 #[unstable(feature = "stdarch_s390x", issue = "135681")]
-pub unsafe fn vec_mergeh<T>(a: T, b: T) -> T
-where
-    T: sealed::VectorMergeh,
-{
-    a.vec_mergeh(b)
+pub unsafe fn vec_cmpeq_idx<T: sealed::VectorEqualityIdx>(a: T, b: T) -> T::Result {
+    a.vec_cmpeq_idx(b)
 }
-
-/// Merges the least significant ("low") halves of two vectors.
+/// Vector Compare Not Equal Index
 #[inline]
 #[target_feature(enable = "vector")]
 #[unstable(feature = "stdarch_s390x", issue = "135681")]
-pub unsafe fn vec_mergel<T>(a: T, b: T) -> T
-where
-    T: sealed::VectorMergel,
-{
-    a.vec_mergel(b)
+pub unsafe fn vec_cmpne_idx<T: sealed::VectorEqualityIdx>(a: T, b: T) -> T::Result {
+    a.vec_cmpne_idx(b)
 }
-
-/// Generates byte masks for elements in the return vector. For each bit in a, if the bit is one, all bit positions
-/// in the corresponding byte element of d are set to ones. Otherwise, if the bit is zero, the corresponding byte element is set to zero.
+/// Vector Compare Equal Index with Condition Code
 #[inline]
 #[target_feature(enable = "vector")]
 #[unstable(feature = "stdarch_s390x", issue = "135681")]
-#[cfg_attr(test, assert_instr(vgbm, MASK = 0x00FF))]
-pub unsafe fn vec_genmask<const MASK: u16>() -> vector_unsigned_char {
-    vector_unsigned_char(const { genmask::<MASK>() })
+pub unsafe fn vec_cmpeq_idx_cc<T: sealed::VectorEqualityIdx>(
+    a: T,
+    b: T,
+    cc: *mut i32,
+) -> T::Result {
+    let (x, y) = a.vec_cmpeq_idx_cc(b);
+    unsafe { cc.write(y) };
+    x
 }
-
-/// Vector Generate Mask (Byte)
+/// Vector Compare Not Equal Index with Condition Code
 #[inline]
 #[target_feature(enable = "vector")]
 #[unstable(feature = "stdarch_s390x", issue = "135681")]
-#[cfg_attr(test, assert_instr(vrepib, L = 3, H = 5))]
-pub unsafe fn vec_genmasks_8<const L: u8, const H: u8>() -> vector_unsigned_char {
-    vector_unsigned_char(const { [genmasks(u8::BITS, L, H) as u8; 16] })
+pub unsafe fn vec_cmpne_idx_cc<T: sealed::VectorEqualityIdx>(
+    a: T,
+    b: T,
+    cc: *mut i32,
+) -> T::Result {
+    let (x, y) = a.vec_cmpne_idx_cc(b);
+    unsafe { cc.write(y) };
+    x
 }
-
-/// Vector Generate Mask (Halfword)
+/// Vector Compare Equal or Zero Index
 #[inline]
 #[target_feature(enable = "vector")]
 #[unstable(feature = "stdarch_s390x", issue = "135681")]
-#[cfg_attr(test, assert_instr(vrepih, L = 3, H = 5))]
-pub unsafe fn vec_genmasks_16<const L: u8, const H: u8>() -> vector_unsigned_short {
-    vector_unsigned_short(const { [genmasks(u16::BITS, L, H) as u16; 8] })
+pub unsafe fn vec_cmpeq_or_0_idx<T: sealed::VectorEqualityIdx>(a: T, b: T) -> T::Result {
+    a.vec_cmpeq_or_0_idx(b)
 }
-
-/// Vector Generate Mask (Word)
+/// Vector Compare Not Equal or Zero Index
 #[inline]
 #[target_feature(enable = "vector")]
 #[unstable(feature = "stdarch_s390x", issue = "135681")]
-#[cfg_attr(test, assert_instr(vgmf, L = 3, H = 5))]
-pub unsafe fn vec_genmasks_32<const L: u8, const H: u8>() -> vector_unsigned_int {
-    vector_unsigned_int(const { [genmasks(u32::BITS, L, H) as u32; 4] })
+pub unsafe fn vec_cmpne_or_0_idx<T: sealed::VectorEqualityIdx>(a: T, b: T) -> T::Result {
+    a.vec_cmpne_or_0_idx(b)
 }
-
-/// Vector Generate Mask (Doubleword)
+/// Vector Compare Equal or Zero Index with Condition Code
 #[inline]
 #[target_feature(enable = "vector")]
 #[unstable(feature = "stdarch_s390x", issue = "135681")]
-#[cfg_attr(test, assert_instr(vgmg, L = 3, H = 5))]
-pub unsafe fn vec_genmasks_64<const L: u8, const H: u8>() -> vector_unsigned_long_long {
-    vector_unsigned_long_long(const { [genmasks(u64::BITS, L, H); 2] })
+pub unsafe fn vec_cmpeq_or_0_idx_cc<T: sealed::VectorEqualityIdx>(
+    a: T,
+    b: T,
+    cc: *mut i32,
+) -> T::Result {
+    let (x, y) = a.vec_cmpeq_or_0_idx_cc(b);
+    unsafe { cc.write(y) };
+    x
 }
-
-/// Returns a vector that contains some elements of two vectors, in the order specified by a third vector.
-/// Each byte of the result is selected by using the least significant 5 bits of the corresponding byte of c as an index into the concatenated bytes of a and b.
-/// Note: The vector generate mask built-in function [`vec_genmask`] could help generate the mask c.
+/// Vector Compare Not Equal or Zero Index with Condition Code
 #[inline]
 #[target_feature(enable = "vector")]
 #[unstable(feature = "stdarch_s390x", issue = "135681")]
-pub unsafe fn vec_perm<T: sealed::VectorPerm>(a: T, b: T, c: vector_unsigned_char) -> T {
-    a.vec_perm(b, c)
+pub unsafe fn vec_cmpne_or_0_idx_cc<T: sealed::VectorEqualityIdx>(
+    a: T,
+    b: T,
+    cc: *mut i32,
+) -> T::Result {
+    let (x, y) = a.vec_cmpne_or_0_idx_cc(b);
+    unsafe { cc.write(y) };
+    x
 }
 
-/// Vector Sum Across Quadword
-///
-/// Returns a vector containing the results of performing a sum across all the elements in each of the quadword of vector a,
-/// and the rightmost word or doubleword element of the b. The result is an unsigned 128-bit integer.
+/// All Elements Equal
 #[inline]
 #[target_feature(enable = "vector")]
 #[unstable(feature = "stdarch_s390x", issue = "135681")]
-pub unsafe fn vec_sum_u128<T: sealed::VectorSumU128>(a: T, b: T) -> vector_unsigned_char {
-    a.vec_sum_u128(b)
+pub unsafe fn vec_all_eq<T: sealed::VectorEquality>(a: T, b: T) -> i32 {
+    simd_reduce_all(vec_cmpeq(a, b)) as i32 as i32
 }
 
-/// Vector Sum Across Doubleword
-///
-/// Returns a vector containing the results of performing a sum across all the elements in each of the doubleword of vector a,
-/// and the rightmost sub-element of the corresponding doubleword of b.
+/// All Elements Not Equal
 #[inline]
 #[target_feature(enable = "vector")]
 #[unstable(feature = "stdarch_s390x", issue = "135681")]
-pub unsafe fn vec_sum2<T: sealed::VectorSum2>(a: T, b: T) -> vector_unsigned_long_long {
-    a.vec_sum2(b)
+pub unsafe fn vec_all_ne<T: sealed::VectorEquality>(a: T, b: T) -> i32 {
+    simd_reduce_all(vec_cmpne(a, b)) as i32
 }
 
-/// Vector Sum Across Word
-///
-/// Returns a vector containing the results of performing a sum across all the elements in each of the word of vector a,
-/// and the rightmost sub-element of the corresponding word of b.
+/// Any Element Equal
 #[inline]
 #[target_feature(enable = "vector")]
 #[unstable(feature = "stdarch_s390x", issue = "135681")]
-pub unsafe fn vec_sum4<T: sealed::VectorSum4>(a: T, b: T) -> vector_unsigned_int {
-    a.vec_sum4(b)
+pub unsafe fn vec_any_eq<T: sealed::VectorEquality>(a: T, b: T) -> i32 {
+    simd_reduce_any(vec_cmpeq(a, b)) as i32
 }
 
-/// Vector Subtract unsigned 128-bits
-///
-/// Subtracts unsigned quadword values.
-///
-/// This function operates on the vectors as 128-bit unsigned integers. It returns low 128 bits of a - b.
+/// Any Element Not Equal
 #[inline]
 #[target_feature(enable = "vector")]
 #[unstable(feature = "stdarch_s390x", issue = "135681")]
-#[cfg_attr(test, assert_instr(vsq))]
-pub unsafe fn vec_sub_u128(
-    a: vector_unsigned_char,
-    b: vector_unsigned_char,
-) -> vector_unsigned_char {
-    let a: u128 = transmute(a);
-    let b: u128 = transmute(b);
-
-    transmute(a.wrapping_sub(b))
+pub unsafe fn vec_any_ne<T: sealed::VectorEquality>(a: T, b: T) -> i32 {
+    simd_reduce_any(vec_cmpne(a, b)) as i32
 }
 
-/// Vector Subtract Carryout
-///
-/// Returns a vector containing the borrow produced by subtracting each of corresponding elements of b from a.
-///
-/// On each resulting element, the value is 0 if a borrow occurred, or 1 if no borrow occurred.
+/// All Elements Less Than
 #[inline]
 #[target_feature(enable = "vector")]
 #[unstable(feature = "stdarch_s390x", issue = "135681")]
-pub unsafe fn vec_subc<T, U>(a: T, b: U) -> <T as sealed::VectorSubc<U>>::Result
-where
-    T: sealed::VectorSubc<U>,
-{
-    a.vec_subc(b)
+pub unsafe fn vec_all_lt<T: sealed::VectorCompare>(a: T, b: T) -> i32 {
+    a.vec_all_lt(b)
 }
 
-/// Gets the carry bit of the 128-bit subtraction of two quadword values.
-/// This function operates on the vectors as 128-bit unsigned integers. It returns a vector containing the borrow produced by subtracting b from a, as unsigned 128-bits integers.
-/// If no borrow occurred, the bit 127 of d is 1; otherwise it is set to 0. All other bits of d are 0.
+/// All Elements Less Than or Equal
 #[inline]
 #[target_feature(enable = "vector")]
 #[unstable(feature = "stdarch_s390x", issue = "135681")]
-#[cfg_attr(test, assert_instr(vscbiq))]
-pub unsafe fn vec_subc_u128(
-    a: vector_unsigned_char,
-    b: vector_unsigned_char,
-) -> vector_unsigned_char {
-    transmute(vscbiq(transmute(a), transmute(b)))
+pub unsafe fn vec_all_le<T: sealed::VectorCompare>(a: T, b: T) -> i32 {
+    a.vec_all_le(b)
 }
 
-/// Subtracts unsigned quadword values with carry bit from a previous operation.
-///
-/// This function operates on the vectors as 128-bit unsigned integers. It returns a vector containing the result of subtracting of b from a,
-/// and the carryout bit from a previous operation.
-///
-/// Note: Only the borrow indication bit (127-bit) of c is used, and the other bits are ignored.
+/// All Elements Greater Than
 #[inline]
 #[target_feature(enable = "vector")]
 #[unstable(feature = "stdarch_s390x", issue = "135681")]
-#[cfg_attr(test, assert_instr(vsbiq))]
-pub unsafe fn vec_sube_u128(
-    a: vector_unsigned_char,
-    b: vector_unsigned_char,
-    c: vector_unsigned_char,
-) -> vector_unsigned_char {
-    transmute(vsbiq(transmute(a), transmute(b), transmute(c)))
+pub unsafe fn vec_all_gt<T: sealed::VectorCompare>(a: T, b: T) -> i32 {
+    a.vec_all_gt(b)
 }
 
-/// Vector Subtract with Carryout, Carryout
-///
-/// Gets the carry bit of the 128-bit subtraction of two quadword values with carry bit from the previous operation.
-///
-/// It returns a vector containing the carryout produced from the result of subtracting of b from a,
-/// and the carryout bit from a previous operation. If no borrow occurred, the 127-bit of d is 1, otherwise 0.
-/// All other bits of d are 0.
-///
-/// Note: Only the borrow indication bit (127-bit) of c is used, and the other bits are ignored.
+/// All Elements Greater Than or Equal
 #[inline]
 #[target_feature(enable = "vector")]
 #[unstable(feature = "stdarch_s390x", issue = "135681")]
-#[cfg_attr(test, assert_instr(vsbcbiq))]
-pub unsafe fn vec_subec_u128(
-    a: vector_unsigned_char,
-    b: vector_unsigned_char,
-    c: vector_unsigned_char,
-) -> vector_unsigned_char {
-    transmute(vsbcbiq(transmute(a), transmute(b), transmute(c)))
+pub unsafe fn vec_all_ge<T: sealed::VectorCompare>(a: T, b: T) -> i32 {
+    a.vec_all_ge(b)
 }
 
-/// Vector Splat Signed Byte
+/// All Elements Not Less Than
 #[inline]
 #[target_feature(enable = "vector")]
 #[unstable(feature = "stdarch_s390x", issue = "135681")]
-#[cfg_attr(test, assert_instr(vrepib, IMM = 42))]
-pub unsafe fn vec_splat_i8<const IMM: i8>() -> vector_signed_char {
-    vector_signed_char([IMM; 16])
+pub unsafe fn vec_all_nlt<T: sealed::VectorCompare>(a: T, b: T) -> i32 {
+    vec_all_ge(a, b)
 }
 
-/// Vector Splat Signed Halfword
+/// All Elements Not Less Than or Equal
 #[inline]
 #[target_feature(enable = "vector")]
 #[unstable(feature = "stdarch_s390x", issue = "135681")]
-#[cfg_attr(test, assert_instr(vrepih, IMM = 42))]
-pub unsafe fn vec_splat_i16<const IMM: i16>() -> vector_signed_short {
-    vector_signed_short([IMM as i16; 8])
+pub unsafe fn vec_all_nle<T: sealed::VectorCompare>(a: T, b: T) -> i32 {
+    vec_all_gt(a, b)
 }
 
-/// Vector Splat Signed Word
+/// All Elements Not Greater Than
 #[inline]
 #[target_feature(enable = "vector")]
 #[unstable(feature = "stdarch_s390x", issue = "135681")]
-#[cfg_attr(test, assert_instr(vrepif, IMM = 42))]
-pub unsafe fn vec_splat_i32<const IMM: i16>() -> vector_signed_int {
-    vector_signed_int([IMM as i32; 4])
+pub unsafe fn vec_all_ngt<T: sealed::VectorCompare>(a: T, b: T) -> i32 {
+    vec_all_le(a, b)
 }
 
-/// Vector Splat Signed Doubleword
+/// All Elements Not Greater Than or Equal
 #[inline]
 #[target_feature(enable = "vector")]
 #[unstable(feature = "stdarch_s390x", issue = "135681")]
-#[cfg_attr(test, assert_instr(vrepig, IMM = 42))]
-pub unsafe fn vec_splat_i64<const IMM: i16>() -> vector_signed_long_long {
-    vector_signed_long_long([IMM as i64; 2])
+pub unsafe fn vec_all_nge<T: sealed::VectorCompare>(a: T, b: T) -> i32 {
+    vec_all_lt(a, b)
 }
 
-/// Vector Splat Unsigned Byte
+/// Any Elements Less Than
 #[inline]
 #[target_feature(enable = "vector")]
 #[unstable(feature = "stdarch_s390x", issue = "135681")]
-#[cfg_attr(test, assert_instr(vrepib, IMM = 42))]
-pub unsafe fn vec_splat_u8<const IMM: u8>() -> vector_unsigned_char {
-    vector_unsigned_char([IMM; 16])
+pub unsafe fn vec_any_lt<T: sealed::VectorCompare>(a: T, b: T) -> i32 {
+    !vec_all_ge(a, b)
 }
 
-/// Vector Splat Unsigned Halfword
+/// Any Elements Less Than or Equal
 #[inline]
 #[target_feature(enable = "vector")]
 #[unstable(feature = "stdarch_s390x", issue = "135681")]
-#[cfg_attr(test, assert_instr(vrepih, IMM = 42))]
-pub unsafe fn vec_splat_u16<const IMM: i16>() -> vector_unsigned_short {
-    vector_unsigned_short([IMM as u16; 8])
+pub unsafe fn vec_any_le<T: sealed::VectorCompare>(a: T, b: T) -> i32 {
+    !vec_all_gt(a, b)
 }
 
-/// Vector Splat Unsigned Word
+/// Any Elements Greater Than
 #[inline]
 #[target_feature(enable = "vector")]
 #[unstable(feature = "stdarch_s390x", issue = "135681")]
-#[cfg_attr(test, assert_instr(vrepif, IMM = 42))]
-pub unsafe fn vec_splat_u32<const IMM: i16>() -> vector_unsigned_int {
-    vector_unsigned_int([IMM as u32; 4])
+pub unsafe fn vec_any_gt<T: sealed::VectorCompare>(a: T, b: T) -> i32 {
+    !vec_all_le(a, b)
 }
 
-/// Vector Splat Unsigned Doubleword
+/// Any Elements Greater Than or Equal
 #[inline]
 #[target_feature(enable = "vector")]
 #[unstable(feature = "stdarch_s390x", issue = "135681")]
-#[cfg_attr(test, assert_instr(vrepig, IMM = 42))]
-pub unsafe fn vec_splat_u64<const IMM: i16>() -> vector_unsigned_long_long {
-    vector_unsigned_long_long([IMM as u64; 2])
+pub unsafe fn vec_any_ge<T: sealed::VectorCompare>(a: T, b: T) -> i32 {
+    !vec_all_lt(a, b)
 }
 
-macro_rules! vec_find_any {
-    ($($Trait:ident $fun:ident)*) => {
-        $(
-            #[inline]
-            #[target_feature(enable = "vector")]
-            #[unstable(feature = "stdarch_s390x", issue = "135681")]
-            pub unsafe fn $fun<T, U>(a: T, b: U) -> <T as sealed::$Trait<U>>::Result
-            where
-                T: sealed::$Trait<U>,
-            {
-                a.$fun(b)
-            }
-        )*
-    }
+/// Any Elements Not Less Than
+#[inline]
+#[target_feature(enable = "vector")]
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+pub unsafe fn vec_any_nlt<T: sealed::VectorCompare>(a: T, b: T) -> i32 {
+    vec_any_ge(a, b)
 }
 
-vec_find_any! {
-    VectorFindAnyEq vec_find_any_eq
-    VectorFindAnyNe vec_find_any_ne
-    VectorFindAnyEqIdx vec_find_any_eq_idx
-    VectorFindAnyNeIdx vec_find_any_ne_idx
-    VectorFindAnyEqOrZeroIdx vec_find_any_eq_or_0_idx
-    VectorFindAnyNeOrZeroIdx vec_find_any_ne_or_0_idx
+/// Any Elements Not Less Than or Equal
+#[inline]
+#[target_feature(enable = "vector")]
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+pub unsafe fn vec_any_nle<T: sealed::VectorCompare>(a: T, b: T) -> i32 {
+    vec_any_gt(a, b)
 }
 
-macro_rules! vec_find_any_cc {
-    ($($Trait:ident $fun:ident)*) => {
-        $(
-            #[inline]
-            #[target_feature(enable = "vector")]
-            #[unstable(feature = "stdarch_s390x", issue = "135681")]
-            pub unsafe fn $fun<T, U>(a: T, b: U, c: *mut i32) -> <T as sealed::$Trait<U>>::Result
-            where
-                T: sealed::$Trait<U>,
-            {
-                a.$fun(b, c)
-            }
-        )*
-    }
+/// Any Elements Not Greater Than
+#[inline]
+#[target_feature(enable = "vector")]
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+pub unsafe fn vec_any_ngt<T: sealed::VectorCompare>(a: T, b: T) -> i32 {
+    vec_any_le(a, b)
 }
 
-vec_find_any_cc! {
-    VectorFindAnyEqCC vec_find_any_eq_cc
-    VectorFindAnyNeCC vec_find_any_ne_cc
-    VectorFindAnyEqIdxCC vec_find_any_eq_idx_cc
-    VectorFindAnyNeIdxCC vec_find_any_ne_idx_cc
-    VectorFindAnyEqOrZeroIdxCC vec_find_any_eq_or_0_idx_cc
-    VectorFindAnyNeOrZeroIdxCC vec_find_any_ne_or_0_idx_cc
+/// Any Elements Not Greater Than or Equal
+#[inline]
+#[target_feature(enable = "vector")]
+#[unstable(feature = "stdarch_s390x", issue = "135681")]
+pub unsafe fn vec_any_nge<T: sealed::VectorCompare>(a: T, b: T) -> i32 {
+    vec_any_lt(a, b)
 }
 
 #[cfg(test)]
@@ -2699,6 +5625,11 @@ mod tests {
         assert_eq!(ShuffleMask::<4>::merge_high().0, [0, 4, 1, 5]);
     }
 
+    #[test]
+    fn pack_mask() {
+        assert_eq!(ShuffleMask::<4>::pack().0, [1, 3, 5, 7]);
+    }
+
     #[test]
     fn test_vec_mask() {
         assert_eq!(
@@ -3075,11 +6006,6 @@ mod tests {
     [0, 0, 0, 1, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 0, 16],
     [-4, -2, -1, -8] }
 
-    test_vec_2! { test_vec_rl, vec_rl, u32x4,
-    [0x12345678, 0x9ABCDEF0, 0x0F0F0F0F, 0x12345678],
-    [4, 8, 12, 68],
-    [0x23456781, 0xBCDEF09A, 0xF0F0F0F0, 0x23456781] }
-
     test_vec_1! { test_vec_reve_f32, vec_reve, f32x4,
         [0.1, 0.5, 0.6, 0.9],
         [0.9, 0.6, 0.5, 0.1]
@@ -3378,4 +6304,844 @@ mod tests {
         assert_eq!(c, 3);
         assert_eq!(d.as_array(), &[0, 16, 0, 0]);
     }
+
+    #[simd_test(enable = "vector")]
+    fn test_vector_load() {
+        let expected = [0xAAAA_AAAA, 0xBBBB_BBBB, 0xCCCC_CCCC, 0xDDDD_DDDD];
+
+        let source: [u32; 8] = [
+            0xAAAA_AAAA,
+            0xBBBB_BBBB,
+            0xCCCC_CCCC,
+            0xDDDD_DDDD,
+            0,
+            0,
+            0,
+            0,
+        ];
+        assert_eq!(
+            unsafe { vec_xl::<vector_unsigned_int>(0, source.as_ptr()) }.as_array(),
+            &expected
+        );
+
+        // offset is in bytes
+        let source: [u32; 8] = [
+            0x0000_AAAA,
+            0xAAAA_BBBB,
+            0xBBBB_CCCC,
+            0xCCCC_DDDD,
+            0xDDDD_0000,
+            0,
+            0,
+            0,
+        ];
+        assert_eq!(
+            unsafe { vec_xl::<vector_unsigned_int>(2, source.as_ptr()) }.as_array(),
+            &expected
+        );
+    }
+
+    #[simd_test(enable = "vector")]
+    fn test_vector_store() {
+        let vec = vector_unsigned_int([0xAAAA_AAAA, 0xBBBB_BBBB, 0xCCCC_CCCC, 0xDDDD_DDDD]);
+
+        let mut dest = [0u32; 8];
+        unsafe { vec_xst(vec, 0, dest.as_mut_ptr()) };
+        assert_eq!(
+            dest,
+            [
+                0xAAAA_AAAA,
+                0xBBBB_BBBB,
+                0xCCCC_CCCC,
+                0xDDDD_DDDD,
+                0,
+                0,
+                0,
+                0
+            ]
+        );
+
+        // offset is in bytes
+        let mut dest = [0u32; 8];
+        unsafe { vec_xst(vec, 2, dest.as_mut_ptr()) };
+        assert_eq!(
+            dest,
+            [
+                0x0000_AAAA,
+                0xAAAA_BBBB,
+                0xBBBB_CCCC,
+                0xCCCC_DDDD,
+                0xDDDD_0000,
+                0,
+                0,
+                0,
+            ]
+        );
+    }
+
+    #[simd_test(enable = "vector")]
+    fn test_vector_lcbb() {
+        #[repr(align(64))]
+        struct Align64<T>(T);
+
+        static ARRAY: Align64<[u8; 128]> = Align64([0; 128]);
+
+        assert_eq!(unsafe { __lcbb::<64>(ARRAY.0[64..].as_ptr()) }, 16);
+        assert_eq!(unsafe { __lcbb::<64>(ARRAY.0[63..].as_ptr()) }, 1);
+        assert_eq!(unsafe { __lcbb::<64>(ARRAY.0[56..].as_ptr()) }, 8);
+        assert_eq!(unsafe { __lcbb::<64>(ARRAY.0[48..].as_ptr()) }, 16);
+    }
+
+    test_vec_2! { test_vec_pack, vec_pack, i16x8, i16x8 -> i8x16,
+        [0, 1, -1, 42, 32767, -32768, 30000, -30000],
+        [32767, -32768, 12345, -12345, 0, 1, -1, 42],
+        [0, 1, -1, 42, -1, 0, 48, -48, -1, 0, 57, -57, 0, 1, -1, 42]
+    }
+
+    test_vec_2! { test_vec_packs, vec_packs, i16x8, i16x8 -> i8x16,
+        [0, 1, -1, 42, 32767, -32768, 30000, -30000],
+        [32767, -32768, 12345, -12345, 0, 1, -1, 42],
+        [0, 1, -1, 42, 127, -128, 127, -128, 127, -128, 127, -128, 0, 1, -1, 42]
+    }
+
+    test_vec_2! { test_vec_packsu_signed, vec_packsu, i16x8, i16x8 -> u8x16,
+        [0, 1, -1, 42, 32767, -32768, 30000, -30000],
+        [32767, -32768, 12345, -12345, 0, 1, -1, 42],
+        [0, 1, 0, 42, 255, 0, 255, 0, 255, 0, 255, 0, 0, 1, 0, 42]
+    }
+
+    test_vec_2! { test_vec_packsu_unsigned, vec_packsu, u16x8, u16x8 -> u8x16,
+        [65535, 32768, 1234, 5678, 16, 8, 4, 2],
+        [30000, 25000, 20000, 15000, 31, 63, 127, 255],
+        [255, 255, 255, 255, 16, 8, 4, 2, 255, 255, 255, 255, 31, 63, 127, 255]
+    }
+
+    test_vec_2! { test_vec_rl, vec_rl, u32x4,
+        [0x12345678, 0x9ABCDEF0, 0x0F0F0F0F, 0x12345678],
+        [4, 8, 12, 68],
+        [0x23456781, 0xBCDEF09A, 0xF0F0F0F0, 0x23456781]
+    }
+
+    test_vec_1! { test_vec_unpackh_i, vec_unpackh, i16x8 -> i32x4,
+        [0x1234, -2, 0x0F0F, -32768, 0, 0, 0, 0],
+        [0x1234, -2, 0x0F0F, -32768]
+    }
+
+    test_vec_1! { test_vec_unpackh_u, vec_unpackh, u16x8 -> u32x4,
+        [0x1234, 0xFFFF, 0x0F0F, 0x8000, 0, 0, 0, 0],
+        [0x1234, 0xFFFF, 0x0F0F, 0x8000]
+    }
+
+    test_vec_1! { test_vec_unpackl_i, vec_unpackl, i16x8 -> i32x4,
+        [0, 0, 0, 0, 0x1234, -2, 0x0F0F, -32768],
+        [0x1234, -2, 0x0F0F, -32768]
+    }
+
+    test_vec_1! { test_vec_unpackl_u, vec_unpackl, u16x8 -> u32x4,
+        [0, 0, 0, 0, 0x1234, 0xFFFF, 0x0F0F, 0x8000],
+        [0x1234, 0xFFFF, 0x0F0F, 0x8000]
+    }
+
+    test_vec_2! { test_vec_avg, vec_avg, u32x4,
+        [2, 1, u32::MAX, 0],
+        [4, 2, 2, 0],
+        [3, (1u32 + 2).div_ceil(2), (u32::MAX as u64 + 2u64).div_ceil(2) as u32, 0]
+    }
+
+    test_vec_2! { test_vec_checksum, vec_checksum, u32x4,
+        [1, 2, 3, u32::MAX],
+        [5, 6, 7, 8],
+        [0, 12, 0, 0]
+    }
+
+    test_vec_2! { test_vec_add_u128, vec_add_u128, u8x16,
+        [0x01, 0x05, 0x0F, 0x1A, 0x2F, 0x3F, 0x50, 0x65,
+                              0x7A, 0x8F, 0x9A, 0xAD, 0xB0, 0xC3, 0xD5, 0xE8],
+        [0xF0, 0xEF, 0xC3, 0xB1, 0x92, 0x71, 0x5A, 0x43,
+                              0x3B, 0x29, 0x13, 0x04, 0xD7, 0xA1, 0x8C, 0x76],
+        [0xF1, 0xF4, 0xD2, 0xCB, 0xC1, 0xB0, 0xAA, 0xA8, 0xB5, 0xB8, 0xAD, 0xB2, 0x88, 0x65, 0x62, 0x5E]
+    }
+
+    #[simd_test(enable = "vector")]
+    fn test_vec_addc_u128() {
+        unsafe {
+            let a = u128::MAX;
+            let b = 1u128;
+
+            let d: u128 = transmute(vec_addc_u128(transmute(a), transmute(b)));
+            assert!(a.checked_add(b).is_none());
+            assert_eq!(d, 1);
+
+            let a = 1u128;
+            let b = 1u128;
+
+            let d: u128 = transmute(vec_addc_u128(transmute(a), transmute(b)));
+            assert!(a.checked_add(b).is_some());
+            assert_eq!(d, 0);
+        }
+    }
+
+    #[simd_test(enable = "vector")]
+    fn test_vec_subc_u128() {
+        unsafe {
+            let a = 0u128;
+            let b = 1u128;
+
+            let d: u128 = transmute(vec_subc_u128(transmute(a), transmute(b)));
+            assert!(a.checked_sub(b).is_none());
+            assert_eq!(d, 0);
+
+            let a = 1u128;
+            let b = 1u128;
+
+            let d: u128 = transmute(vec_subc_u128(transmute(a), transmute(b)));
+            assert!(a.checked_sub(b).is_some());
+            assert_eq!(d, 1);
+        }
+    }
+
+    test_vec_2! { test_vec_mule_u, vec_mule, u16x8, u16x8 -> u32x4,
+        [0xFFFF, 0, 2, 0, 2, 0, 1, 0],
+        [0xFFFF, 0, 4, 0, 0xFFFF, 0, 2, 0],
+        [0xFFFE_0001, 8, 0x0001_FFFE, 2]
+    }
+
+    test_vec_2! { test_vec_mule_i, vec_mule, i16x8, i16x8 -> i32x4,
+        [i16::MIN, 0, -2, 0, 2, 0, 1, 0],
+        [i16::MIN, 0, 4, 0, i16::MAX, 0, 2, 0],
+        [0x4000_0000, -8, 0xFFFE, 2]
+    }
+
+    test_vec_2! { test_vec_mulo_u, vec_mulo, u16x8, u16x8 -> u32x4,
+        [0, 0xFFFF, 0, 2, 0, 2, 0, 1],
+        [0, 0xFFFF, 0, 4, 0, 0xFFFF, 0, 2],
+        [0xFFFE_0001, 8, 0x0001_FFFE, 2]
+    }
+
+    test_vec_2! { test_vec_mulo_i, vec_mulo, i16x8, i16x8 -> i32x4,
+        [0, i16::MIN, 0, -2, 0, 2, 0, 1],
+        [0, i16::MIN, 0, 4, 0, i16::MAX, 0, 2],
+        [0x4000_0000, -8, 0xFFFE, 2]
+    }
+
+    test_vec_2! { test_vec_mulh_u, vec_mulh, u32x4, u32x4 -> u32x4,
+        [u32::MAX, 2, 2, 1],
+        [u32::MAX, 4, u32::MAX, 2],
+        [u32::MAX - 1, 0, 1, 0]
+    }
+
+    test_vec_2! { test_vec_mulh_i, vec_mulh, i32x4, i32x4 -> i32x4,
+        [i32::MIN, -2, 2, 1],
+        [i32::MIN, 4, i32::MAX, 2],
+        [0x4000_0000, -1, 0, 0]
+    }
+
+    test_vec_2! { test_vec_gfmsum_1, vec_gfmsum, u16x8, u16x8 -> u32x4,
+        [0x1234, 0x5678, 0x9ABC, 0xDEF0, 0x1357, 0x2468, 0xACE0, 0xBDF0],
+        [0xFFFF, 0x0001, 0x8000, 0x7FFF, 0xAAAA, 0x5555, 0x1234, 0x5678],
+        [0xE13A794, 0x68764A50, 0x94AA3E, 0x2C93F300]
+    }
+
+    test_vec_2! { test_vec_gfmsum_2, vec_gfmsum, u16x8, u16x8 -> u32x4,
+        [0x0000, 0xFFFF, 0xAAAA, 0x5555, 0x1234, 0x5678, 0x9ABC, 0xDEF0],
+        [0xFFFF, 0x0000, 0x5555, 0xAAAA, 0x0001, 0x8000, 0x7FFF, 0x1357],
+        [0, 0, 0x2B3C1234, 0x3781D244]
+    }
+
+    #[simd_test(enable = "vector")]
+    fn test_vec_gfmsum_128() {
+        let a = vector_unsigned_long_long([1, 2]);
+        let b = vector_unsigned_long_long([3, 4]);
+
+        let d: u128 = unsafe { transmute(vec_gfmsum_128(a, b)) };
+        assert_eq!(d, 11);
+
+        let a = vector_unsigned_long_long([0x0101010101010101, 0x0202020202020202]);
+        let b = vector_unsigned_long_long([0x0404040404040404, 0x0505050505050505]);
+
+        let d: u128 = unsafe { transmute(vec_gfmsum_128(a, b)) };
+        assert_eq!(d, 0xE000E000E000E000E000E000E000E);
+    }
+
+    #[simd_test(enable = "vector-enhancements-1")]
+    fn test_vec_bperm_u128() {
+        let a = vector_unsigned_char([65, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]);
+        let b = vector_unsigned_char([
+            0, 0, 0, 0, 1, 1, 1, 1, 128, 128, 128, 128, 255, 255, 255, 255,
+        ]);
+        let d = unsafe { vec_bperm_u128(a, b) };
+        assert_eq!(d.as_array(), &[0xF00, 0]);
+    }
+
+    #[simd_test(enable = "vector")]
+    fn test_vec_sel() {
+        let a = vector_signed_int([1, 2, 3, 4]);
+        let b = vector_signed_int([5, 6, 7, 8]);
+
+        let e = vector_unsigned_int([9, 10, 11, 12]);
+        let f = vector_unsigned_int([9, 9, 11, 11]);
+
+        let c: vector_bool_int = unsafe { simd_eq(e, f) };
+        assert_eq!(c.as_array(), &[!0, 0, !0, 0]);
+        let d: vector_signed_int = unsafe { vec_sel(a, b, c) };
+        assert_eq!(d.as_array(), &[5, 2, 7, 4]);
+    }
+
+    #[simd_test(enable = "vector")]
+    fn test_vec_gather_element() {
+        let a1: [u32; 10] = [10, 11, 12, 13, 14, 15, 16, 17, 18, 19];
+        let a2: [u32; 10] = [20, 21, 22, 23, 24, 25, 26, 27, 28, 29];
+
+        let v1 = vector_unsigned_int([1, 2, 3, 4]);
+        let v2 = vector_unsigned_int([1, 2, 3, 4]);
+
+        let sizeof_int = core::mem::size_of::<u32>() as u32;
+        let v3 = vector_unsigned_int([
+            5 * sizeof_int,
+            8 * sizeof_int,
+            9 * sizeof_int,
+            6 * sizeof_int,
+        ]);
+
+        unsafe {
+            let d1 = vec_gather_element::<_, 0>(v1, v3, a1.as_ptr());
+            assert_eq!(d1.as_array(), &[15, 2, 3, 4]);
+            let d2 = vec_gather_element::<_, 0>(v2, v3, a2.as_ptr());
+            assert_eq!(d2.as_array(), &[25, 2, 3, 4]);
+        }
+    }
+
+    #[simd_test(enable = "vector")]
+    fn test_vec_fp_test_data_class() {
+        let mut cc = 42;
+
+        let v1 = vector_double([0.0, f64::NAN]);
+        let v2 = vector_double([f64::INFINITY, 1.0]);
+        let v3 = vector_double([1.0, 2.0]);
+
+        unsafe {
+            let d = vec_fp_test_data_class::<_, __VEC_CLASS_FP_ZERO>(v1, &mut cc);
+            assert_eq!(cc, 1);
+            assert_eq!(d.as_array(), &[!0, 0]);
+
+            let d = vec_fp_test_data_class::<_, __VEC_CLASS_FP_NAN>(v1, &mut cc);
+            assert_eq!(cc, 1);
+            assert_eq!(d.as_array(), &[0, !0]);
+
+            let d = vec_fp_test_data_class::<_, __VEC_CLASS_FP_INFINITY>(v2, &mut cc);
+            assert_eq!(cc, 1);
+            assert_eq!(d.as_array(), &[!0, 0]);
+
+            let d = vec_fp_test_data_class::<_, __VEC_CLASS_FP_INFINITY_N>(v2, &mut cc);
+            assert_eq!(cc, 3);
+            assert_eq!(d.as_array(), &[0, 0]);
+
+            let d = vec_fp_test_data_class::<_, __VEC_CLASS_FP_NORMAL>(v2, &mut cc);
+            assert_eq!(cc, 1);
+            assert_eq!(d.as_array(), &[0, !0]);
+
+            let d = vec_fp_test_data_class::<_, __VEC_CLASS_FP_NORMAL>(v3, &mut cc);
+            assert_eq!(cc, 0);
+            assert_eq!(d.as_array(), &[!0, !0]);
+        }
+    }
+
+    #[simd_test(enable = "vector")]
+    fn test_vec_fp_any_all_nan_numeric() {
+        unsafe {
+            assert_eq!(
+                vec_all_nan(vector_double([f64::NAN, f64::NAN])),
+                i32::from(true)
+            );
+            assert_eq!(
+                vec_all_nan(vector_double([f64::NAN, 1.0])),
+                i32::from(false)
+            );
+            assert_eq!(vec_all_nan(vector_double([0.0, 1.0])), i32::from(false));
+
+            assert_eq!(
+                vec_any_nan(vector_double([f64::NAN, f64::NAN])),
+                i32::from(true)
+            );
+            assert_eq!(vec_any_nan(vector_double([f64::NAN, 1.0])), i32::from(true));
+            assert_eq!(vec_any_nan(vector_double([0.0, 1.0])), i32::from(false));
+
+            assert_eq!(
+                vec_all_numeric(vector_double([f64::NAN, f64::NAN])),
+                i32::from(false)
+            );
+            assert_eq!(
+                vec_all_numeric(vector_double([f64::NAN, 1.0])),
+                i32::from(false)
+            );
+            assert_eq!(vec_all_numeric(vector_double([0.0, 1.0])), i32::from(true));
+
+            assert_eq!(
+                vec_any_numeric(vector_double([f64::NAN, f64::NAN])),
+                i32::from(false)
+            );
+            assert_eq!(
+                vec_any_numeric(vector_double([f64::NAN, 1.0])),
+                i32::from(true)
+            );
+            assert_eq!(vec_any_numeric(vector_double([0.0, 1.0])), i32::from(true));
+
+            // "numeric" means "not NaN". infinities are numeric
+            assert_eq!(
+                vec_all_numeric(vector_double([f64::INFINITY, f64::NEG_INFINITY])),
+                i32::from(true)
+            );
+            assert_eq!(
+                vec_any_numeric(vector_double([f64::INFINITY, f64::NEG_INFINITY])),
+                i32::from(true)
+            );
+        }
+    }
+
+    #[simd_test(enable = "vector")]
+    fn test_vec_test_mask() {
+        unsafe {
+            let v = vector_unsigned_long_long([0xFF00FF00FF00FF00; 2]);
+            let m = vector_unsigned_long_long([0x0000FF000000FF00; 2]);
+            assert_eq!(vec_test_mask(v, m), 3);
+
+            let v = vector_unsigned_long_long([u64::MAX; 2]);
+            let m = vector_unsigned_long_long([0; 2]);
+            assert_eq!(vec_test_mask(v, m), 0);
+
+            let v = vector_unsigned_long_long([0; 2]);
+            let m = vector_unsigned_long_long([u64::MAX; 2]);
+            assert_eq!(vec_test_mask(v, m), 0);
+
+            let v = vector_unsigned_long_long([0xAAAAAAAAAAAAAAAA; 2]);
+            let m = vector_unsigned_long_long([0xAAAAAAAAAAAAAAAA; 2]);
+            assert_eq!(vec_test_mask(v, m), 3);
+        }
+    }
+
+    #[simd_test(enable = "vector-enhancements-2")]
+    fn test_vec_search_string_cc() {
+        unsafe {
+            let b = vector_unsigned_char(*b"ABCD------------");
+            let c = vector_unsigned_char([4; 16]);
+            let mut d = 0i32;
+
+            let haystack = vector_unsigned_char(*b"__ABCD__________");
+            let result = vec_search_string_cc(haystack, b, c, &mut d);
+            assert_eq!(result.as_array()[7], 2);
+            assert_eq!(d, 2);
+
+            let haystack = vector_unsigned_char(*b"___ABCD_________");
+            let result = vec_search_string_cc(haystack, b, c, &mut d);
+            assert_eq!(result.as_array()[7], 3);
+            assert_eq!(d, 2);
+
+            let haystack = vector_unsigned_char(*b"________________");
+            let result = vec_search_string_cc(haystack, b, c, &mut d);
+            assert_eq!(result.as_array()[7], 16);
+            assert_eq!(d, 0);
+
+            let haystack = vector_unsigned_char(*b"______\0_________");
+            let result = vec_search_string_cc(haystack, b, c, &mut d);
+            assert_eq!(result.as_array()[7], 16);
+            assert_eq!(d, 0);
+
+            let haystack = vector_unsigned_char(*b"______\0__ABCD___");
+            let result = vec_search_string_cc(haystack, b, c, &mut d);
+            assert_eq!(result.as_array()[7], 9);
+            assert_eq!(d, 2);
+        }
+    }
+
+    #[simd_test(enable = "vector-enhancements-2")]
+    fn test_vec_search_string_until_zero_cc() {
+        unsafe {
+            let b = vector_unsigned_char(*b"ABCD\0\0\0\0\0\0\0\0\0\0\0\0");
+            let c = vector_unsigned_char([16; 16]);
+            let mut d = 0i32;
+
+            let haystack = vector_unsigned_char(*b"__ABCD__________");
+            let result = vec_search_string_until_zero_cc(haystack, b, c, &mut d);
+            assert_eq!(result.as_array()[7], 2);
+            assert_eq!(d, 2);
+
+            let haystack = vector_unsigned_char(*b"___ABCD_________");
+            let result = vec_search_string_until_zero_cc(haystack, b, c, &mut d);
+            assert_eq!(result.as_array()[7], 3);
+            assert_eq!(d, 2);
+
+            let haystack = vector_unsigned_char(*b"________________");
+            let result = vec_search_string_until_zero_cc(haystack, b, c, &mut d);
+            assert_eq!(result.as_array()[7], 16);
+            assert_eq!(d, 0);
+
+            let haystack = vector_unsigned_char(*b"______\0_________");
+            let result = vec_search_string_until_zero_cc(haystack, b, c, &mut d);
+            assert_eq!(result.as_array()[7], 16);
+            assert_eq!(d, 1);
+
+            let haystack = vector_unsigned_char(*b"______\0__ABCD___");
+            let result = vec_search_string_until_zero_cc(haystack, b, c, &mut d);
+            assert_eq!(result.as_array()[7], 16);
+            assert_eq!(d, 1);
+        }
+    }
+
+    #[simd_test(enable = "vector")]
+    fn test_vec_doublee() {
+        unsafe {
+            let v = vector_float([1.0, 2.0, 3.0, 4.0]);
+            assert_eq!(vec_doublee(v).as_array(), &[1.0, 3.0]);
+
+            let v = vector_float([f32::NAN, 2.0, f32::INFINITY, 4.0]);
+            let d = vec_doublee(v);
+            assert!(d.as_array()[0].is_nan());
+            assert_eq!(d.as_array()[1], f64::INFINITY);
+        }
+    }
+
+    #[simd_test(enable = "vector")]
+    fn test_vec_floate() {
+        // NOTE: indices 1 and 3 can have an arbitrary value. With the C version
+        // these are poison values, our version initializes the memory but its
+        // value still should not be relied upon by application code.
+        unsafe {
+            let v = vector_double([1.0, 2.0]);
+            let d = vec_floate(v);
+            assert_eq!(d.as_array()[0], 1.0);
+            assert_eq!(d.as_array()[2], 2.0);
+
+            let v = vector_double([f64::NAN, f64::INFINITY]);
+            let d = vec_floate(v);
+            assert!(d.as_array()[0].is_nan());
+            assert_eq!(d.as_array()[2], f32::INFINITY);
+
+            let v = vector_double([f64::MIN, f64::MAX]);
+            let d = vec_floate(v);
+            assert_eq!(d.as_array()[0], f64::MIN as f32);
+            assert_eq!(d.as_array()[2], f64::MAX as f32);
+        }
+    }
+
+    #[simd_test(enable = "vector")]
+    fn test_vec_extend_s64() {
+        unsafe {
+            let v = vector_signed_char([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]);
+            assert_eq!(vec_extend_s64(v).as_array(), &[7, 15]);
+
+            let v = vector_signed_short([0, 1, 2, 3, 4, 5, 6, 7]);
+            assert_eq!(vec_extend_s64(v).as_array(), &[3, 7]);
+
+            let v = vector_signed_int([0, 1, 2, 3]);
+            assert_eq!(vec_extend_s64(v).as_array(), &[1, 3]);
+        }
+    }
+
+    #[simd_test(enable = "vector")]
+    fn test_vec_signed() {
+        unsafe {
+            let v = vector_float([1.0, 2.5, -2.5, -0.0]);
+            assert_eq!(vec_signed(v).as_array(), &[1, 2, -2, 0]);
+
+            let v = vector_double([2.5, -2.5]);
+            assert_eq!(vec_signed(v).as_array(), &[2, -2]);
+        }
+    }
+
+    #[simd_test(enable = "vector")]
+    fn test_vec_unsigned() {
+        // NOTE: converting a negative floating point value is UB!
+        unsafe {
+            let v = vector_float([1.0, 2.5, 3.5, 0.0]);
+            assert_eq!(vec_unsigned(v).as_array(), &[1, 2, 3, 0]);
+
+            let v = vector_double([2.5, 3.5]);
+            assert_eq!(vec_unsigned(v).as_array(), &[2, 3]);
+        }
+    }
+
+    #[simd_test(enable = "vector")]
+    fn test_vec_cp_until_zero() {
+        unsafe {
+            let v = vector_signed_int([1, 2, 3, 4]);
+            let d = vec_cp_until_zero(v);
+            assert_eq!(d.as_array(), &[1, 2, 3, 4]);
+
+            let v = vector_signed_int([1, 2, 0, 4]);
+            let d = vec_cp_until_zero(v);
+            assert_eq!(d.as_array(), &[1, 2, 0, 0]);
+        }
+    }
+
+    #[simd_test(enable = "vector")]
+    fn test_vec_cp_until_zero_cc() {
+        let mut cc = 0;
+        unsafe {
+            let v = vector_signed_int([1, 2, 3, 4]);
+            let d = vec_cp_until_zero_cc(v, &mut cc);
+            assert_eq!(d.as_array(), &[1, 2, 3, 4]);
+            assert_eq!(cc, 3);
+
+            let v = vector_signed_int([1, 2, 0, 4]);
+            let d = vec_cp_until_zero_cc(v, &mut cc);
+            assert_eq!(d.as_array(), &[1, 2, 0, 0]);
+            assert_eq!(cc, 0);
+        }
+    }
+
+    #[simd_test(enable = "vector-enhancements-1")]
+    fn test_vec_msum_u128() {
+        let a = vector_unsigned_long_long([1, 2]);
+        let b = vector_unsigned_long_long([3, 4]);
+
+        unsafe {
+            let c: vector_unsigned_char = transmute(100u128);
+
+            let d: u128 = transmute(vec_msum_u128::<0>(a, b, c));
+            assert_eq!(d, (1 * 3) + (2 * 4) + 100);
+
+            let d: u128 = transmute(vec_msum_u128::<4>(a, b, c));
+            assert_eq!(d, (1 * 3) + (2 * 4) * 2 + 100);
+
+            let d: u128 = transmute(vec_msum_u128::<8>(a, b, c));
+            assert_eq!(d, (1 * 3) * 2 + (2 * 4) + 100);
+
+            let d: u128 = transmute(vec_msum_u128::<12>(a, b, c));
+            assert_eq!(d, (1 * 3) * 2 + (2 * 4) * 2 + 100);
+        }
+    }
+
+    #[simd_test(enable = "vector")]
+    fn test_vec_sld() {
+        let a = vector_unsigned_long_long([0xAAAAAAAAAAAAAAAA, 0xAAAAAAAAAAAAAAAA]);
+        let b = vector_unsigned_long_long([0xBBBBBBBBBBBBBBBB, 0xBBBBBBBBBBBBBBBB]);
+
+        unsafe {
+            let d = vec_sld::<_, 4>(a, b);
+            assert_eq!(d.as_array(), &[0xAAAAAAAAAAAAAAAA, 0xAAAAAAAABBBBBBBB]);
+        }
+    }
+
+    #[simd_test(enable = "vector")]
+    fn test_vec_sldw() {
+        let a = vector_unsigned_long_long([0xAAAAAAAAAAAAAAAA, 0xAAAAAAAAAAAAAAAA]);
+        let b = vector_unsigned_long_long([0xBBBBBBBBBBBBBBBB, 0xBBBBBBBBBBBBBBBB]);
+
+        unsafe {
+            let d = vec_sldw::<_, 1>(a, b);
+            assert_eq!(d.as_array(), &[0xAAAAAAAAAAAAAAAA, 0xAAAAAAAABBBBBBBB]);
+        }
+    }
+
+    #[simd_test(enable = "vector-enhancements-2")]
+    fn test_vec_sldb() {
+        let a = vector_unsigned_long_long([0xAAAAAAAAAAAAAAAA, 0xAAAAAAAAAAAAAAAA]);
+        let b = vector_unsigned_long_long([0xBBBBBBBBBBBBBBBB, 0xBBBBBBBBBBBBBBBB]);
+
+        unsafe {
+            let d = vec_sldb::<_, 4>(a, b);
+            assert_eq!(d.as_array(), &[0xAAAAAAAAAAAAAAAA, 0xAAAAAAAAAAAAAAAB]);
+        }
+    }
+
+    #[simd_test(enable = "vector-enhancements-2")]
+    fn test_vec_srdb() {
+        let a = vector_unsigned_long_long([0xAAAAAAAAAAAAAAAA, 0xAAAAAAAAAAAAAAAA]);
+        let b = vector_unsigned_long_long([0xBBBBBBBBBBBBBBBB, 0xBBBBBBBBBBBBBBBB]);
+
+        unsafe {
+            let d = vec_srdb::<_, 4>(a, b);
+            assert_eq!(d.as_array(), &[0xABBBBBBBBBBBBBBB, 0xBBBBBBBBBBBBBBBB]);
+        }
+    }
+
+    const GT: u32 = 0x20000000;
+    const LT: u32 = 0x40000000;
+    const EQ: u32 = 0x80000000;
+
+    #[simd_test(enable = "vector")]
+    fn test_vec_cmprg() {
+        let a = vector_unsigned_int([11, 22, 33, 44]);
+        let b = vector_unsigned_int([10, 20, 30, 40]);
+
+        let c = vector_unsigned_int([GT, LT, GT, LT]);
+        let d = unsafe { vec_cmprg(a, b, c) };
+        assert_eq!(d.as_array(), &[!0, 0, !0, 0]);
+
+        let c = vector_unsigned_int([GT, LT, 0, 0]);
+        let d = unsafe { vec_cmprg(a, b, c) };
+        assert_eq!(d.as_array(), &[!0, 0, 0, 0]);
+
+        let a = vector_unsigned_int([11, 22, 33, 30]);
+        let b = vector_unsigned_int([10, 20, 30, 30]);
+
+        let c = vector_unsigned_int([GT, LT, EQ, EQ]);
+        let d = unsafe { vec_cmprg(a, b, c) };
+        assert_eq!(d.as_array(), &[!0, 0, 0, !0]);
+    }
+
+    #[simd_test(enable = "vector")]
+    fn test_vec_cmpnrg() {
+        let a = vector_unsigned_int([11, 22, 33, 44]);
+        let b = vector_unsigned_int([10, 20, 30, 40]);
+
+        let c = vector_unsigned_int([GT, LT, GT, LT]);
+        let d = unsafe { vec_cmpnrg(a, b, c) };
+        assert_eq!(d.as_array(), &[0, !0, 0, !0]);
+
+        let c = vector_unsigned_int([GT, LT, 0, 0]);
+        let d = unsafe { vec_cmpnrg(a, b, c) };
+        assert_eq!(d.as_array(), &[0, !0, !0, !0]);
+
+        let a = vector_unsigned_int([11, 22, 33, 30]);
+        let b = vector_unsigned_int([10, 20, 30, 30]);
+
+        let c = vector_unsigned_int([GT, LT, EQ, EQ]);
+        let d = unsafe { vec_cmpnrg(a, b, c) };
+        assert_eq!(d.as_array(), &[0, !0, !0, 0]);
+    }
+
+    #[simd_test(enable = "vector")]
+    fn test_vec_cmprg_idx() {
+        let a = vector_unsigned_int([1, 11, 22, 33]);
+        let b = vector_unsigned_int([10, 20, 30, 40]);
+
+        let c = vector_unsigned_int([GT, LT, GT, LT]);
+        let d = unsafe { vec_cmprg_idx(a, b, c) };
+        assert_eq!(d.as_array(), &[0, 4, 0, 0]);
+    }
+
+    #[simd_test(enable = "vector")]
+    fn test_vec_cmpnrg_idx() {
+        let a = vector_unsigned_int([1, 11, 22, 33]);
+        let b = vector_unsigned_int([10, 20, 30, 40]);
+
+        let c = vector_unsigned_int([GT, LT, GT, LT]);
+        let d = unsafe { vec_cmpnrg_idx(a, b, c) };
+        assert_eq!(d.as_array(), &[0, 0, 0, 0]);
+    }
+
+    #[simd_test(enable = "vector")]
+    fn test_vec_cmprg_or_0_idx() {
+        let a = vector_unsigned_int([1, 0, 22, 33]);
+        let b = vector_unsigned_int([10, 20, 30, 40]);
+
+        let c = vector_unsigned_int([GT, LT, GT, LT]);
+        let d = unsafe { vec_cmprg_or_0_idx(a, b, c) };
+        assert_eq!(d.as_array(), &[0, 4, 0, 0]);
+    }
+
+    #[simd_test(enable = "vector")]
+    fn test_vec_cmpnrg_or_0_idx() {
+        let a = vector_unsigned_int([11, 33, 0, 22]);
+        let b = vector_unsigned_int([10, 20, 30, 40]);
+
+        let c = vector_unsigned_int([GT, LT, GT, LT]);
+        let d = unsafe { vec_cmpnrg_or_0_idx(a, b, c) };
+        assert_eq!(d.as_array(), &[0, 8, 0, 0]);
+    }
+
+    test_vec_2! { test_vec_cmpgt, vec_cmpgt, f32x4, f32x4 -> i32x4,
+        [1.0, f32::NAN, f32::NAN, 3.14],
+        [2.0, f32::NAN, 5.0, 2.0],
+        [0, 0, 0, !0]
+    }
+
+    test_vec_2! { test_vec_cmpge, vec_cmpge, f32x4, f32x4 -> i32x4,
+        [1.0, f32::NAN, f32::NAN, 3.14],
+        [1.0, f32::NAN, 5.0, 2.0],
+        [!0, 0, 0, !0]
+    }
+
+    test_vec_2! { test_vec_cmplt, vec_cmplt, f32x4, f32x4 -> i32x4,
+        [1.0, f32::NAN, f32::NAN, 2.0],
+        [2.0, f32::NAN, 5.0, 2.0],
+        [!0, 0, 0, 0]
+    }
+
+    test_vec_2! { test_vec_cmple, vec_cmple, f32x4, f32x4 -> i32x4,
+        [1.0, f32::NAN, f32::NAN, 2.0],
+        [1.0, f32::NAN, 5.0, 3.14],
+        [!0, 0, 0, !0]
+    }
+
+    test_vec_2! { test_vec_cmpeq, vec_cmpeq, f32x4, f32x4 -> i32x4,
+        [1.0, f32::NAN, f32::NAN, 2.0],
+        [1.0, f32::NAN, 5.0, 3.14],
+        [!0, 0, 0, 0]
+    }
+
+    test_vec_2! { test_vec_cmpne, vec_cmpne, f32x4, f32x4 -> i32x4,
+        [1.0, f32::NAN, f32::NAN, 2.0],
+        [1.0, f32::NAN, 5.0, 3.14],
+        [0, !0, !0, !0]
+    }
+
+    #[simd_test(enable = "vector")]
+    fn test_vec_meadd() {
+        let a = vector_unsigned_short([1, 0, 2, 0, 3, 0, 4, 0]);
+        let b = vector_unsigned_short([5, 0, 6, 0, 7, 0, 8, 0]);
+        let c = vector_unsigned_int([2, 2, 2, 2]);
+
+        let d = unsafe { vec_meadd(a, b, c) };
+        assert_eq!(d.as_array(), &[7, 14, 23, 34]);
+
+        let a = vector_signed_short([1, 0, 2, 0, 3, 0, 4, 0]);
+        let b = vector_signed_short([5, 0, 6, 0, 7, 0, 8, 0]);
+        let c = vector_signed_int([2, -2, 2, -2]);
+
+        let d = unsafe { vec_meadd(a, b, c) };
+        assert_eq!(d.as_array(), &[7, 10, 23, 30]);
+    }
+
+    #[simd_test(enable = "vector")]
+    fn test_vec_moadd() {
+        let a = vector_unsigned_short([0, 1, 0, 2, 0, 3, 0, 4]);
+        let b = vector_unsigned_short([0, 5, 0, 6, 0, 7, 0, 8]);
+        let c = vector_unsigned_int([2, 2, 2, 2]);
+
+        let d = unsafe { vec_moadd(a, b, c) };
+        assert_eq!(d.as_array(), &[7, 14, 23, 34]);
+
+        let a = vector_signed_short([0, 1, 0, 2, 0, 3, 0, 4]);
+        let b = vector_signed_short([0, 5, 0, 6, 0, 7, 0, 8]);
+        let c = vector_signed_int([2, -2, 2, -2]);
+
+        let d = unsafe { vec_moadd(a, b, c) };
+        assert_eq!(d.as_array(), &[7, 10, 23, 30]);
+    }
+
+    #[simd_test(enable = "vector")]
+    fn test_vec_mhadd() {
+        let a = vector_unsigned_int([1, 2, 3, 4]);
+        let b = vector_unsigned_int([5, 6, 7, 8]);
+        let c = vector_unsigned_int([u32::MAX; 4]);
+
+        let d = unsafe { vec_mhadd(a, b, c) };
+        assert_eq!(d.as_array(), &[1, 1, 1, 1]);
+
+        let a = vector_signed_int([-1, -2, -3, -4]);
+        let b = vector_signed_int([5, 6, 7, 8]);
+        let c = vector_signed_int([i32::MIN; 4]);
+
+        let d = unsafe { vec_mhadd(a, b, c) };
+        assert_eq!(d.as_array(), &[-1, -1, -1, -1]);
+    }
+
+    #[simd_test(enable = "vector")]
+    fn test_vec_mladd() {
+        let a = vector_unsigned_int([1, 2, 3, 4]);
+        let b = vector_unsigned_int([5, 6, 7, 8]);
+        let c = vector_unsigned_int([2, 2, 2, 2]);
+
+        let d = unsafe { vec_mladd(a, b, c) };
+        assert_eq!(d.as_array(), &[7, 14, 23, 34]);
+
+        let a = vector_signed_int([-1, -2, -3, -4]);
+        let b = vector_signed_int([5, 6, 7, 8]);
+        let c = vector_signed_int([2, 2, 2, 2]);
+
+        let d = unsafe { vec_mladd(a, b, c) };
+        assert_eq!(d.as_array(), &[-3, -10, -19, -30]);
+    }
 }
diff --git a/crates/core_arch/src/simd.rs b/crates/core_arch/src/simd.rs
index 29e4b3e7a2..25834943f0 100644
--- a/crates/core_arch/src/simd.rs
+++ b/crates/core_arch/src/simd.rs
@@ -5,7 +5,7 @@
 macro_rules! simd_ty {
     ($id:ident [$elem_type:ty ; $len:literal]: $($param_name:ident),*) => {
         #[repr(simd)]
-        #[derive(Copy, Clone, Debug, PartialEq)]
+        #[derive(Copy, Clone)]
         pub(crate) struct $id([$elem_type; $len]);
 
         #[allow(clippy::use_self)]
@@ -38,13 +38,32 @@ macro_rules! simd_ty {
             /// Use for testing only.
             // FIXME: Workaround rust@60637
             #[inline(always)]
-            pub(crate) fn extract(self, index: usize) -> $elem_type {
-                assert!(index < $len);
-                // Now that we know this is in-bounds, use pointer arithmetic to access the right element.
-                let self_ptr = &self as *const Self as *const $elem_type;
-                unsafe {
-                    self_ptr.add(index).read()
-                }
+            pub(crate) fn extract(&self, index: usize) -> $elem_type {
+                self.as_array()[index]
+            }
+
+            #[inline]
+            pub(crate) fn as_array(&self) -> &[$elem_type; $len] {
+                let simd_ptr: *const Self = self;
+                let array_ptr: *const [$elem_type; $len] = simd_ptr.cast();
+                // SAFETY: We can always read the prefix of a simd type as an array.
+                // There might be more padding afterwards for some widths, but
+                // that's not a problem for reading less than that.
+                unsafe { &*array_ptr }
+            }
+        }
+
+        impl core::cmp::PartialEq for $id {
+            #[inline]
+            fn eq(&self, other: &Self) -> bool {
+                self.as_array() == other.as_array()
+            }
+        }
+
+        impl core::fmt::Debug for $id {
+            #[inline]
+            fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
+                debug_simd_finish(f, stringify!($id), self.as_array())
             }
         }
     }
@@ -53,7 +72,7 @@ macro_rules! simd_ty {
 macro_rules! simd_m_ty {
     ($id:ident [$elem_type:ident ; $len:literal]: $($param_name:ident),*) => {
         #[repr(simd)]
-        #[derive(Copy, Clone, Debug, PartialEq)]
+        #[derive(Copy, Clone)]
         pub(crate) struct $id([$elem_type; $len]);
 
         #[allow(clippy::use_self)]
@@ -79,6 +98,30 @@ macro_rules! simd_m_ty {
                 // a simd type with exactly one element.
                 unsafe { simd_shuffle!(one, one, [0; $len]) }
             }
+
+            #[inline]
+            pub(crate) fn as_array(&self) -> &[$elem_type; $len] {
+                let simd_ptr: *const Self = self;
+                let array_ptr: *const [$elem_type; $len] = simd_ptr.cast();
+                // SAFETY: We can always read the prefix of a simd type as an array.
+                // There might be more padding afterwards for some widths, but
+                // that's not a problem for reading less than that.
+                unsafe { &*array_ptr }
+            }
+        }
+
+        impl core::cmp::PartialEq for $id {
+            #[inline]
+            fn eq(&self, other: &Self) -> bool {
+                self.as_array() == other.as_array()
+            }
+        }
+
+        impl core::fmt::Debug for $id {
+            #[inline]
+            fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
+                debug_simd_finish(f, stringify!($id), self.as_array())
+            }
         }
     }
 }
@@ -968,7 +1011,7 @@ simd_ty!(
 pub(crate) fn debug_simd_finish<T: crate::fmt::Debug, const N: usize>(
     formatter: &mut crate::fmt::Formatter<'_>,
     type_name: &str,
-    array: [T; N],
+    array: &[T; N],
 ) -> crate::fmt::Result {
     crate::fmt::Formatter::debug_tuple_fields_finish(
         formatter,
diff --git a/crates/core_arch/src/wasm32/simd128.rs b/crates/core_arch/src/wasm32/simd128.rs
index a774b68cfd..1a1e7dc780 100644
--- a/crates/core_arch/src/wasm32/simd128.rs
+++ b/crates/core_arch/src/wasm32/simd128.rs
@@ -2318,7 +2318,25 @@ pub fn u8x16_narrow_i16x8(a: v128, b: v128) -> v128 {
 #[doc(alias("i8x16.shl"))]
 #[stable(feature = "wasm_simd", since = "1.54.0")]
 pub fn i8x16_shl(a: v128, amt: u32) -> v128 {
-    unsafe { simd_shl(a.as_i8x16(), simd::i8x16::splat(amt as i8)).v128() }
+    // SAFETY: the safety of this intrinsic relies on the fact that the
+    // shift amount for each lane is less than the number of bits in the input
+    // lane. In this case the input has 8-bit lanes but the shift amount above
+    // is `u32`, so a mask is required to discard all the upper bits of `amt` to
+    // ensure that the safety condition is met.
+    //
+    // Note that this is distinct from the behavior of the native WebAssembly
+    // instruction here where WebAssembly defines this instruction as performing
+    // a mask as well. This is nonetheless required since this must have defined
+    // semantics in LLVM, not just WebAssembly.
+    //
+    // Finally note that this mask operation is not actually emitted into the
+    // final binary itself. LLVM understands that the wasm operation implicitly
+    // masks, so it knows this mask operation is redundant.
+    //
+    // Basically the extra mask here is required as a bridge from the documented
+    // semantics through LLVM back out to WebAssembly. Both ends have the
+    // documented semantics, and the mask is required by LLVM in the middle.
+    unsafe { simd_shl(a.as_i8x16(), simd::i8x16::splat((amt & 0x7) as i8)).v128() }
 }
 
 #[stable(feature = "wasm_simd", since = "1.54.0")]
@@ -2335,7 +2353,9 @@ pub use i8x16_shl as u8x16_shl;
 #[doc(alias("i8x16.shr_s"))]
 #[stable(feature = "wasm_simd", since = "1.54.0")]
 pub fn i8x16_shr(a: v128, amt: u32) -> v128 {
-    unsafe { simd_shr(a.as_i8x16(), simd::i8x16::splat(amt as i8)).v128() }
+    // SAFETY: see i8x16_shl for more documentation why this is unsafe,
+    // essentially the shift amount must be valid hence the mask.
+    unsafe { simd_shr(a.as_i8x16(), simd::i8x16::splat((amt & 0x7) as i8)).v128() }
 }
 
 /// Shifts each lane to the right by the specified number of bits, shifting in
@@ -2349,7 +2369,9 @@ pub fn i8x16_shr(a: v128, amt: u32) -> v128 {
 #[doc(alias("i8x16.shr_u"))]
 #[stable(feature = "wasm_simd", since = "1.54.0")]
 pub fn u8x16_shr(a: v128, amt: u32) -> v128 {
-    unsafe { simd_shr(a.as_u8x16(), simd::u8x16::splat(amt as u8)).v128() }
+    // SAFETY: see i8x16_shl for more documentation why this is unsafe,
+    // essentially the shift amount must be valid hence the mask.
+    unsafe { simd_shr(a.as_u8x16(), simd::u8x16::splat((amt & 0x7) as u8)).v128() }
 }
 
 /// Adds two 128-bit vectors as if they were two packed sixteen 8-bit integers.
@@ -2686,7 +2708,9 @@ pub use i16x8_extend_high_u8x16 as u16x8_extend_high_u8x16;
 #[doc(alias("i16x8.shl"))]
 #[stable(feature = "wasm_simd", since = "1.54.0")]
 pub fn i16x8_shl(a: v128, amt: u32) -> v128 {
-    unsafe { simd_shl(a.as_i16x8(), simd::i16x8::splat(amt as i16)).v128() }
+    // SAFETY: see i8x16_shl for more documentation why this is unsafe,
+    // essentially the shift amount must be valid hence the mask.
+    unsafe { simd_shl(a.as_i16x8(), simd::i16x8::splat((amt & 0xf) as i16)).v128() }
 }
 
 #[stable(feature = "wasm_simd", since = "1.54.0")]
@@ -2703,7 +2727,9 @@ pub use i16x8_shl as u16x8_shl;
 #[doc(alias("i16x8.shr_s"))]
 #[stable(feature = "wasm_simd", since = "1.54.0")]
 pub fn i16x8_shr(a: v128, amt: u32) -> v128 {
-    unsafe { simd_shr(a.as_i16x8(), simd::i16x8::splat(amt as i16)).v128() }
+    // SAFETY: see i8x16_shl for more documentation why this is unsafe,
+    // essentially the shift amount must be valid hence the mask.
+    unsafe { simd_shr(a.as_i16x8(), simd::i16x8::splat((amt & 0xf) as i16)).v128() }
 }
 
 /// Shifts each lane to the right by the specified number of bits, shifting in
@@ -2717,7 +2743,9 @@ pub fn i16x8_shr(a: v128, amt: u32) -> v128 {
 #[doc(alias("i16x8.shr_u"))]
 #[stable(feature = "wasm_simd", since = "1.54.0")]
 pub fn u16x8_shr(a: v128, amt: u32) -> v128 {
-    unsafe { simd_shr(a.as_u16x8(), simd::u16x8::splat(amt as u16)).v128() }
+    // SAFETY: see i8x16_shl for more documentation why this is unsafe,
+    // essentially the shift amount must be valid hence the mask.
+    unsafe { simd_shr(a.as_u16x8(), simd::u16x8::splat((amt & 0xf) as u16)).v128() }
 }
 
 /// Adds two 128-bit vectors as if they were two packed eight 16-bit integers.
@@ -3136,7 +3164,9 @@ pub use i32x4_extend_high_u16x8 as u32x4_extend_high_u16x8;
 #[doc(alias("i32x4.shl"))]
 #[stable(feature = "wasm_simd", since = "1.54.0")]
 pub fn i32x4_shl(a: v128, amt: u32) -> v128 {
-    unsafe { simd_shl(a.as_i32x4(), simd::i32x4::splat(amt as i32)).v128() }
+    // SAFETY: see i8x16_shl for more documentation why this is unsafe,
+    // essentially the shift amount must be valid hence the mask.
+    unsafe { simd_shl(a.as_i32x4(), simd::i32x4::splat((amt & 0x1f) as i32)).v128() }
 }
 
 #[stable(feature = "wasm_simd", since = "1.54.0")]
@@ -3153,7 +3183,9 @@ pub use i32x4_shl as u32x4_shl;
 #[doc(alias("i32x4.shr_s"))]
 #[stable(feature = "wasm_simd", since = "1.54.0")]
 pub fn i32x4_shr(a: v128, amt: u32) -> v128 {
-    unsafe { simd_shr(a.as_i32x4(), simd::i32x4::splat(amt as i32)).v128() }
+    // SAFETY: see i8x16_shl for more documentation why this is unsafe,
+    // essentially the shift amount must be valid hence the mask.
+    unsafe { simd_shr(a.as_i32x4(), simd::i32x4::splat((amt & 0x1f) as i32)).v128() }
 }
 
 /// Shifts each lane to the right by the specified number of bits, shifting in
@@ -3167,7 +3199,9 @@ pub fn i32x4_shr(a: v128, amt: u32) -> v128 {
 #[doc(alias("i32x4.shr_u"))]
 #[stable(feature = "wasm_simd", since = "1.54.0")]
 pub fn u32x4_shr(a: v128, amt: u32) -> v128 {
-    unsafe { simd_shr(a.as_u32x4(), simd::u32x4::splat(amt)).v128() }
+    // SAFETY: see i8x16_shl for more documentation why this is unsafe,
+    // essentially the shift amount must be valid hence the mask.
+    unsafe { simd_shr(a.as_u32x4(), simd::u32x4::splat(amt & 0x1f)).v128() }
 }
 
 /// Adds two 128-bit vectors as if they were two packed four 32-bit integers.
@@ -3502,7 +3536,9 @@ pub use i64x2_extend_high_u32x4 as u64x2_extend_high_u32x4;
 #[doc(alias("i64x2.shl"))]
 #[stable(feature = "wasm_simd", since = "1.54.0")]
 pub fn i64x2_shl(a: v128, amt: u32) -> v128 {
-    unsafe { simd_shl(a.as_i64x2(), simd::i64x2::splat(amt as i64)).v128() }
+    // SAFETY: see i8x16_shl for more documentation why this is unsafe,
+    // essentially the shift amount must be valid hence the mask.
+    unsafe { simd_shl(a.as_i64x2(), simd::i64x2::splat((amt & 0x3f) as i64)).v128() }
 }
 
 #[stable(feature = "wasm_simd", since = "1.54.0")]
@@ -3519,7 +3555,9 @@ pub use i64x2_shl as u64x2_shl;
 #[doc(alias("i64x2.shr_s"))]
 #[stable(feature = "wasm_simd", since = "1.54.0")]
 pub fn i64x2_shr(a: v128, amt: u32) -> v128 {
-    unsafe { simd_shr(a.as_i64x2(), simd::i64x2::splat(amt as i64)).v128() }
+    // SAFETY: see i8x16_shl for more documentation why this is unsafe,
+    // essentially the shift amount must be valid hence the mask.
+    unsafe { simd_shr(a.as_i64x2(), simd::i64x2::splat((amt & 0x3f) as i64)).v128() }
 }
 
 /// Shifts each lane to the right by the specified number of bits, shifting in
@@ -3533,7 +3571,9 @@ pub fn i64x2_shr(a: v128, amt: u32) -> v128 {
 #[doc(alias("i64x2.shr_u"))]
 #[stable(feature = "wasm_simd", since = "1.54.0")]
 pub fn u64x2_shr(a: v128, amt: u32) -> v128 {
-    unsafe { simd_shr(a.as_u64x2(), simd::u64x2::splat(amt as u64)).v128() }
+    // SAFETY: see i8x16_shl for more documentation why this is unsafe,
+    // essentially the shift amount must be valid hence the mask.
+    unsafe { simd_shr(a.as_u64x2(), simd::u64x2::splat((amt & 0x3f) as u64)).v128() }
 }
 
 /// Adds two 128-bit vectors as if they were two packed two 64-bit integers.
diff --git a/crates/intrinsic-test/missing_aarch64.txt b/crates/intrinsic-test/missing_aarch64.txt
index 2f625d3e21..bbcfc40c69 100644
--- a/crates/intrinsic-test/missing_aarch64.txt
+++ b/crates/intrinsic-test/missing_aarch64.txt
@@ -30,6 +30,32 @@ vrnd32x_f64
 vrnd32z_f64
 vrnd64x_f64
 vrnd64z_f64
+vluti2_lane_p16
+vluti2_lane_p8
+vluti2_lane_s16
+vluti2_lane_s8
+vluti2_lane_u16
+vluti2_lane_u8
+vluti2q_lane_p16
+vluti2q_lane_p8
+vluti2q_lane_s16
+vluti2q_lane_s8
+vluti2q_lane_u16
+vluti2q_lane_u8
+vluti4q_lane_f16_x2
+vluti4q_lane_p16_x2
+vluti4q_lane_p8
+vluti4q_lane_s16_x2
+vluti4q_lane_s8
+vluti4q_lane_u16_x2
+vluti4q_lane_u8
+vluti4q_laneq_f16_x2
+vluti4q_laneq_p16_x2
+vluti4q_laneq_p8
+vluti4q_laneq_s16_x2
+vluti4q_laneq_s8
+vluti4q_laneq_u16_x2
+vluti4q_laneq_u8
 
 # Broken in Clang
 vcvth_s16_f16
diff --git a/crates/intrinsic-test/src/main.rs b/crates/intrinsic-test/src/main.rs
index 0fa0268053..2d04f1cf34 100644
--- a/crates/intrinsic-test/src/main.rs
+++ b/crates/intrinsic-test/src/main.rs
@@ -240,7 +240,7 @@ fn compile_c(
     let arch_flags = if target.contains("v7") {
         "-march=armv8.6-a+crypto+crc+dotprod+fp16"
     } else {
-        "-march=armv8.6-a+crypto+sha3+crc+dotprod+fp16+faminmax"
+        "-march=armv8.6-a+crypto+sha3+crc+dotprod+fp16+faminmax+lut"
     };
 
     let intrinsic_name = &intrinsic.name;
diff --git a/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml b/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml
index 1356c609fe..41fdfc5382 100644
--- a/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml
+++ b/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml
@@ -89,6 +89,10 @@ aarch64-crc-stable: &aarch64-crc-stable
 neon-unstable-f16: &neon-unstable-f16
   FnCall: [unstable, ['feature = "stdarch_neon_f16"', 'issue = "136306"']]
 
+# #[unstable(feature = "stdarch_neon_feat_lut", issue = "138050")]
+neon-unstable-feat-lut: &neon-unstable-feat-lut
+  FnCall: [unstable, ['feature = "stdarch_neon_feat_lut"', 'issue = "138050"']]
+
 intrinsics:
   - name: "vaddd_{type}"
     doc: Add
@@ -14013,3 +14017,271 @@ intrinsics:
           links:
             - link: "llvm.aarch64.neon.famin.{neon_type}"
               arch: aarch64,arm64ec
+
+  - name: "vluti2{neon_type[0].lane_nox}"
+    doc: "Lookup table read with 2-bit indices"
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"]
+    return_type: "{neon_type[2]}"
+    attr:
+      - FnCall: [target_feature, ['enable = {type[4]}']]
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [nop, 'LANE = 1']]}]]
+      - *neon-unstable-feat-lut
+      - FnCall: [rustc_legacy_const_generics, ['2']]
+    static_defs: ["const LANE: i32"]
+    safety:
+      unsafe: [neon]
+    types:
+      - [int8x8_t,   uint8x8_t, int8x16_t, 'LANE >= 0 && LANE <= 1', '"neon,lut"']
+      - [int8x16_t,  uint8x8_t, int8x16_t, 'LANE >= 0 && LANE <= 1', '"neon,lut"']
+      - [int16x4_t,  uint8x8_t, int16x8_t, 'LANE >= 0 && LANE <= 3', '"neon,lut"']
+      - [int16x8_t,  uint8x8_t, int16x8_t, 'LANE >= 0 && LANE <= 3', '"neon,lut"']
+    compose:
+      - FnCall: ['static_assert!', ['{type[3]}']]
+      - LLVMLink:
+          name: "vluti2{neon_type[0].lane_nox}"
+          arguments:
+            - 'a: {neon_type[0]}'
+            - 'b: {neon_type[1]}'
+            - 'n: i32'
+          links:
+            - link: "llvm.aarch64.neon.vluti2.lane.{neon_type[2]}.{neon_type[0]}"
+              arch: aarch64,arm64ec
+      - FnCall: ['_vluti2{neon_type[0].lane_nox}', [a, b, LANE]]
+
+  - name: "vluti2{neon_type[0].lane_nox}"
+    doc: "Lookup table read with 2-bit indices"
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"]
+    return_type: "{neon_type[2]}"
+    attr:
+      - FnCall: [target_feature, ['enable = "neon,lut"']]
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [nop, 'LANE = 1']]}]]
+      - *neon-unstable-feat-lut
+      - FnCall: [rustc_legacy_const_generics, ['2']]
+    static_defs: ["const LANE: i32"]
+    safety:
+      unsafe: [neon]
+    types:
+      - [uint8x8_t,  uint8x8_t, uint8x16_t, 'LANE >= 0 && LANE <= 1', 'int8x8_t']
+      - [uint8x16_t, uint8x8_t, uint8x16_t, 'LANE >= 0 && LANE <= 1', 'int8x16_t']
+      - [poly8x8_t,  uint8x8_t, poly8x16_t, 'LANE >= 0 && LANE <= 1', 'int8x8_t']
+      - [poly8x16_t, uint8x8_t, poly8x16_t, 'LANE >= 0 && LANE <= 1', 'int8x16_t']
+      - [uint16x4_t, uint8x8_t, uint16x8_t, 'LANE >= 0 && LANE <= 3', 'int16x4_t']
+      - [uint16x8_t, uint8x8_t, uint16x8_t, 'LANE >= 0 && LANE <= 3', 'int16x8_t']
+      - [poly16x4_t, uint8x8_t, poly16x8_t, 'LANE >= 0 && LANE <= 3', 'int16x4_t']
+      - [poly16x8_t, uint8x8_t, poly16x8_t, 'LANE >= 0 && LANE <= 3', 'int16x8_t']
+    compose:
+      - FnCall: ['static_assert!', ['{type[3]}']]
+      - FnCall:
+          - transmute
+          - - FnCall:
+                - 'vluti2{neon_type[4].lane_nox}::<LANE>'
+                - - FnCall: [transmute, [a]]
+                  - b
+
+  - name: "vluti4{neon_type[0].lane_nox}"
+    doc: "Lookup table read with 4-bit indices"
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"]
+    return_type: "{neon_type[0]}"
+    attr:
+      - FnCall: [target_feature, ['enable = {type[3]}']]
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [nop, 'LANE = 0']]}]]
+      - *neon-unstable-feat-lut
+      - FnCall: [rustc_legacy_const_generics, ['2']]
+    static_defs: ["const LANE: i32"]
+    safety:
+      unsafe: [neon]
+    types:
+      - [int8x16_t,  uint8x8_t, 'LANE == 0', '"neon,lut"']
+    compose:
+      - FnCall: ['static_assert!', ['{type[2]}']]
+      - LLVMLink:
+          name: "vluti4{neon_type[0].lane_nox}"
+          arguments:
+            - 'a: {neon_type[0]}'
+            - 'b: {neon_type[1]}'
+            - 'n: i32'
+          links:
+            - link: "llvm.aarch64.neon.vluti4q.lane.{neon_type[1]}"
+              arch: aarch64,arm64ec
+      - FnCall: ['_vluti4{neon_type[0].lane_nox}', [a, b, LANE]]
+
+  - name: "vluti4{neon_type[0].lane_nox}"
+    doc: "Lookup table read with 4-bit indices"
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"]
+    return_type: "{neon_type[0]}"
+    attr:
+      - FnCall: [target_feature, ['enable = "neon,lut"']]
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [nop, 'LANE = 0']]}]]
+      - *neon-unstable-feat-lut
+      - FnCall: [rustc_legacy_const_generics, ['2']]
+    static_defs: ["const LANE: i32"]
+    safety:
+      unsafe: [neon]
+    types:
+      - [uint8x16_t, uint8x8_t, 'LANE == 0', int8x16_t]
+      - [poly8x16_t, uint8x8_t, 'LANE == 0', int8x16_t]
+    compose:
+      - FnCall: ['static_assert!', ['{type[2]}']]
+      - FnCall:
+          - transmute
+          - - FnCall:
+                - 'vluti4{neon_type[3].lane_nox}::<LANE>'
+                - - FnCall: [transmute, [a]]
+                  - b
+
+  - name: "vluti4{neon_type[0].laneq_nox}"
+    doc: "Lookup table read with 4-bit indices"
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"]
+    return_type: "{neon_type[0]}"
+    attr:
+      - FnCall: [target_feature, ['enable = "neon,lut"']]
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [nop, 'LANE = 0']]}]]
+      - *neon-unstable-feat-lut
+      - FnCall: [rustc_legacy_const_generics, ['2']]
+    static_defs: ["const LANE: i32"]
+    safety:
+      unsafe: [neon]
+    types:
+      - [int8x16_t,  uint8x16_t, 'LANE >= 0 && LANE <= 1']
+    compose:
+      - FnCall: ['static_assert!', ['{type[2]}']]
+      - LLVMLink:
+          name: "vluti4{neon_type[0].laneq_nox}"
+          arguments:
+            - 'a: {neon_type[0]}'
+            - 'b: {neon_type[1]}'
+            - 'n: i32'
+          links:
+            - link: "llvm.aarch64.neon.vluti4q.laneq.{neon_type[1]}"
+              arch: aarch64,arm64ec
+      - FnCall: ['_vluti4{neon_type[0].laneq_nox}', [a, b, LANE]]
+
+  - name: "vluti4{neon_type[0].laneq_nox}"
+    doc: "Lookup table read with 4-bit indices"
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"]
+    return_type: "{neon_type[0]}"
+    attr:
+      - FnCall: [target_feature, ['enable = "neon,lut"']]
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [nop, 'LANE = 0']]}]]
+      - *neon-unstable-feat-lut
+      - FnCall: [rustc_legacy_const_generics, ['2']]
+    static_defs: ["const LANE: i32"]
+    safety:
+      unsafe: [neon]
+    types:
+      - [uint8x16_t, uint8x16_t, 'LANE >= 0 && LANE <= 1', int8x16_t]
+      - [poly8x16_t, uint8x16_t, 'LANE >= 0 && LANE <= 1', int8x16_t]
+    compose:
+      - FnCall: ['static_assert!', ['{type[2]}']]
+      - FnCall:
+          - transmute
+          - - FnCall:
+                - 'vluti4{neon_type[3].laneq_nox}::<LANE>'
+                - - FnCall: [transmute, [a]]
+                  - b
+
+  - name: "vluti4q_lane_{neon_type[0]}_x2"
+    doc: "Lookup table read with 4-bit indices"
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"]
+    return_type: "{neon_type[2]}"
+    attr:
+      - FnCall: [target_feature, ['enable = {type[4]}']]
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [nop, 'LANE = 0']]}]]
+      - *neon-unstable-feat-lut
+      - FnCall: [rustc_legacy_const_generics, ['2']]
+    static_defs: ["const LANE: i32"]
+    safety:
+      unsafe: [neon]
+    types:
+      - [int16x8x2_t, uint8x8_t, int16x8_t, 'LANE >= 0 && LANE <= 1', '"neon,lut"']
+    compose:
+      - FnCall: ['static_assert!', ['{type[3]}']]
+      - LLVMLink:
+          name: "vluti4q_lane_{neon_type[0]}_x2"
+          arguments:
+            - 'a: {neon_type[2]}'
+            - 'a: {neon_type[2]}'
+            - 'b: {neon_type[1]}'
+            - 'n: i32'
+          links:
+            - link: "llvm.aarch64.neon.vluti4q.lane.x2.{neon_type[2]}"
+              arch: aarch64,arm64ec
+      - FnCall: ['_vluti4q_lane_{neon_type[0]}_x2', ['a.0', 'a.1', b, LANE]]
+
+  - name: "vluti4q_lane_{neon_type[0]}_x2"
+    doc: "Lookup table read with 4-bit indices"
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"]
+    return_type: "{neon_type[2]}"
+    attr:
+      - FnCall: [target_feature, ['enable = {type[4]}']]
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [nop, 'LANE = 0']]}]]
+      - *neon-unstable-feat-lut
+      - FnCall: [rustc_legacy_const_generics, ['2']]
+    static_defs: ["const LANE: i32"]
+    safety:
+      unsafe: [neon]
+    types:
+      - [uint16x8x2_t, uint8x8_t, uint16x8_t, 'LANE >= 0 && LANE <= 1', '"neon,lut"', int16x8x2_t]
+      - [poly16x8x2_t, uint8x8_t, poly16x8_t, 'LANE >= 0 && LANE <= 1', '"neon,lut"', int16x8x2_t]
+      - [float16x8x2_t, uint8x8_t, float16x8_t, 'LANE >= 0 && LANE <= 1', '"neon,lut,fp16"', int16x8x2_t]
+    compose:
+      - FnCall: ['static_assert!', ['{type[3]}']]
+      - FnCall:
+          - transmute
+          - - FnCall:
+                - 'vluti4q_lane_{neon_type[5]}_x2::<LANE>'
+                - - FnCall: [transmute, [a]]
+                  - b
+
+  - name: "vluti4q_laneq_{neon_type[0]}_x2"
+    doc: "Lookup table read with 4-bit indices"
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"]
+    return_type: "{neon_type[2]}"
+    attr:
+      - FnCall: [target_feature, ['enable = {type[4]}']]
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [nop, 'LANE = 3']]}]]
+      - *neon-unstable-feat-lut
+      - FnCall: [rustc_legacy_const_generics, ['2']]
+    static_defs: ["const LANE: i32"]
+    safety:
+      unsafe: [neon]
+    types:
+      - [int16x8x2_t, uint8x16_t, int16x8_t, 'LANE >= 0 && LANE <= 3', '"neon,lut"']
+    compose:
+      - FnCall: ['static_assert!', ['{type[3]}']]
+      - LLVMLink:
+          name: "vluti4{neon_type[0].lane_nox}"
+          arguments:
+            - 'a: {neon_type[2]}'
+            - 'b: {neon_type[2]}'
+            - 'c: {neon_type[1]}'
+            - 'n: i32'
+          links:
+            - link: "llvm.aarch64.neon.vluti4q.laneq.x2.{neon_type[2]}"
+              arch: aarch64,arm64ec
+      - FnCall: ['_vluti4q_laneq_{neon_type[0]}_x2', ['a.0', 'a.1', b, LANE]]
+
+  - name: "vluti4q_laneq_{neon_type[0]}_x2"
+    doc: "Lookup table read with 4-bit indices"
+    arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"]
+    return_type: "{neon_type[2]}"
+    attr:
+      - FnCall: [target_feature, ['enable = {type[4]}']]
+      - FnCall: [cfg_attr, [test, {FnCall: [assert_instr, [nop, 'LANE = 3']]}]]
+      - *neon-unstable-feat-lut
+      - FnCall: [rustc_legacy_const_generics, ['2']]
+    static_defs: ["const LANE: i32"]
+    safety:
+      unsafe: [neon]
+    types:
+      - [uint16x8x2_t,  uint8x16_t, uint16x8_t, 'LANE >= 0 && LANE <= 3', '"neon,lut"', int16x8x2_t]
+      - [poly16x8x2_t,  uint8x16_t, poly16x8_t, 'LANE >= 0 && LANE <= 3', '"neon,lut"', int16x8x2_t]
+      - [float16x8x2_t, uint8x16_t, float16x8_t, 'LANE >= 0 && LANE <= 3', '"neon,lut,fp16"', int16x8x2_t]
+    compose:
+      - FnCall: ['static_assert!', ['{type[3]}']]
+      - FnCall:
+          - transmute
+          - - FnCall:
+                - 'vluti4q_laneq_{neon_type[5]}_x2::<LANE>'
+                - - FnCall: [transmute, [a]]
+                  - b
diff --git a/intrinsics_data/arm_intrinsics.json b/intrinsics_data/arm_intrinsics.json
index 32b6120d75..9d58aad49c 100644
--- a/intrinsics_data/arm_intrinsics.json
+++ b/intrinsics_data/arm_intrinsics.json
@@ -118843,5 +118843,915 @@
         "FAMAX"
       ]
     ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vluti2_lane_u8",
+    "arguments": [
+      "uint8x8_t a",
+      "uint8x8_t b",
+      "const int lane"
+    ],
+    "return_type": {
+      "value": "uint8x16_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.16B"
+      },
+      "b": {
+        "register": "Vm"
+      },
+      "lane": {
+        "minimum": 0,
+        "maximum": 1
+      },
+      "r": {
+        "register": "Vd.16B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LUTI2"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vluti2q_lane_u8",
+    "arguments": [
+      "uint8x16_t a",
+      "uint8x8_t b",
+      "const int lane"
+    ],
+    "return_type": {
+      "value": "uint8x16_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.16B"
+      },
+      "b": {
+        "register": "Vm"
+      },
+      "lane": {
+        "minimum": 0,
+        "maximum": 1
+      },
+      "r": {
+        "register": "Vd.16B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LUTI2"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vluti2_lane_s8",
+    "arguments": [
+      "int8x8_t a",
+      "uint8x8_t b",
+      "const int lane"
+    ],
+    "return_type": {
+      "value": "int8x16_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.16B"
+      },
+      "b": {
+        "register": "Vm"
+      },
+      "lane": {
+        "minimum": 0,
+        "maximum": 1
+      },
+      "r": {
+        "register": "Vd.16B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LUTI2"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vluti2q_lane_s8",
+    "arguments": [
+      "int8x16_t a",
+      "uint8x8_t b",
+      "const int lane"
+    ],
+    "return_type": {
+      "value": "int8x16_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.16B"
+      },
+      "b": {
+        "register": "Vm"
+      },
+      "lane": {
+        "minimum": 0,
+        "maximum": 1
+      },
+      "r": {
+        "register": "Vd.16B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LUTI2"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vluti2_lane_p8",
+    "arguments": [
+      "poly8x8_t a",
+      "uint8x8_t b",
+      "const int lane"
+    ],
+    "return_type": {
+      "value": "poly8x16_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.16B"
+      },
+      "b": {
+        "register": "Vm"
+      },
+      "lane": {
+        "minimum": 0,
+        "maximum": 1
+      },
+      "r": {
+        "register": "Vd.16B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LUTI2"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vluti2q_lane_p8",
+    "arguments": [
+      "poly8x16_t a",
+      "uint8x8_t b",
+      "const int lane"
+    ],
+    "return_type": {
+      "value": "poly8x16_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.16B"
+      },
+      "b": {
+        "register": "Vm"
+      },
+      "lane": {
+        "minimum": 0,
+        "maximum": 1
+      },
+      "r": {
+        "register": "Vd.16B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LUTI2"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vluti2_lane_u16",
+    "arguments": [
+      "uint16x4_t a",
+      "uint8x8_t b",
+      "const int lane"
+    ],
+    "return_type": {
+      "value": "uint16x8_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.8H"
+      },
+      "b": {
+        "register": "Vm"
+      },
+      "lane": {
+        "minimum": 0,
+        "maximum": 3
+      },
+      "r": {
+        "register": "Vd.8H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LUTI2"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vluti2q_lane_u16",
+    "arguments": [
+      "uint16x8_t a",
+      "uint8x8_t b",
+      "const int lane"
+    ],
+    "return_type": {
+      "value": "uint16x8_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.8H"
+      },
+      "b": {
+        "register": "Vm"
+      },
+      "lane": {
+        "minimum": 0,
+        "maximum": 3
+      },
+      "r": {
+        "register": "Vd.8H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LUTI2"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vluti2_lane_s16",
+    "arguments": [
+      "int16x4_t a",
+      "uint8x8_t b",
+      "const int lane"
+    ],
+    "return_type": {
+      "value": "int16x8_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.8H"
+      },
+      "b": {
+        "register": "Vm"
+      },
+      "lane": {
+        "minimum": 0,
+        "maximum": 3
+      },
+      "r": {
+        "register": "Vd.8H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LUTI2"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vluti2q_lane_s16",
+    "arguments": [
+      "int16x8_t a",
+      "uint8x8_t b",
+      "const int lane"
+    ],
+    "return_type": {
+      "value": "int16x8_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.8H"
+      },
+      "b": {
+        "register": "Vm"
+      },
+      "lane": {
+        "minimum": 0,
+        "maximum": 3
+      },
+      "r": {
+        "register": "Vd.8H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LUTI2"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vluti2_lane_p16",
+    "arguments": [
+      "poly16x4_t a",
+      "uint8x8_t b",
+      "const int lane"
+    ],
+    "return_type": {
+      "value": "poly16x8_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.8H"
+      },
+      "b": {
+        "register": "Vm"
+      },
+      "lane": {
+        "minimum": 0,
+        "maximum": 3
+      },
+      "r": {
+        "register": "Vd.8H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LUTI2"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vluti2q_lane_p16",
+    "arguments": [
+      "poly16x8_t a",
+      "uint8x8_t b",
+      "const int lane"
+    ],
+    "return_type": {
+      "value": "poly16x8_t"
+    },
+    "Arguments_Preparation": {
+      "a": {
+        "register": "Vn.8H"
+      },
+      "b": {
+        "register": "Vm"
+      },
+      "lane": {
+        "minimum": 0,
+        "maximum": 3
+      },
+      "r": {
+        "register": "Vd.8H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LUTI2"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vluti4q_lane_u8",
+    "arguments": [
+      "uint8x16_t vn",
+      "uint8x8_t vm",
+      "const int index"
+    ],
+    "return_type": {
+      "value": "uint8x16_t"
+    },
+    "Arguments_Preparation": {
+      "vn": {
+        "register": "Vn.16B"
+      },
+      "vm": {
+        "register": "Vm"
+      },
+      "index": {
+        "minimum": 0,
+        "maximum": 0
+      },
+      "r": {
+        "register": "Vd.16B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LUTI4"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vluti4q_laneq_u8",
+    "arguments": [
+      "uint8x16_t vn",
+      "uint8x16_t vm",
+      "const int index"
+    ],
+    "return_type": {
+      "value": "uint8x16_t"
+    },
+    "Arguments_Preparation": {
+      "vn": {
+        "register": "Vn.16B"
+      },
+      "vm": {
+        "register": "Vm"
+      },
+      "index": {
+        "minimum": 0,
+        "maximum": 1
+      },
+      "r": {
+        "register": "Vd.16B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LUTI4"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vluti4q_lane_s8",
+    "arguments": [
+      "int8x16_t vn",
+      "uint8x8_t vm",
+      "const int index"
+    ],
+    "return_type": {
+      "value": "int8x16_t"
+    },
+    "Arguments_Preparation": {
+      "vn": {
+        "register": "Vn.16B"
+      },
+      "vm": {
+        "register": "Vm"
+      },
+      "index": {
+        "minimum": 0,
+        "maximum": 0
+      },
+      "r": {
+        "register": "Vd.16B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LUTI4"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vluti4q_laneq_s8",
+    "arguments": [
+      "int8x16_t vn",
+      "uint8x16_t vm",
+      "const int index"
+    ],
+    "return_type": {
+      "value": "int8x16_t"
+    },
+    "Arguments_Preparation": {
+      "vn": {
+        "register": "Vn.16B"
+      },
+      "vm": {
+        "register": "Vm"
+      },
+      "index": {
+        "minimum": 0,
+        "maximum": 1
+      },
+      "r": {
+        "register": "Vd.16B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LUTI4"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vluti4q_lane_p8",
+    "arguments": [
+      "poly8x16_t vn",
+      "uint8x8_t vm",
+      "const int index"
+    ],
+    "return_type": {
+      "value": "poly8x16_t"
+    },
+    "Arguments_Preparation": {
+      "vn": {
+        "register": "Vn.16B"
+      },
+      "vm": {
+        "register": "Vm"
+      },
+      "index": {
+        "minimum": 0,
+        "maximum": 0
+      },
+      "r": {
+        "register": "Vd.16B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LUTI4"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vluti4q_laneq_p8",
+    "arguments": [
+      "poly8x16_t vn",
+      "uint8x16_t vm",
+      "const int index"
+    ],
+    "return_type": {
+      "value": "poly8x16_t"
+    },
+    "Arguments_Preparation": {
+      "vn": {
+        "register": "Vn.16B"
+      },
+      "vm": {
+        "register": "Vm"
+      },
+      "index": {
+        "minimum": 0,
+        "maximum": 1
+      },
+      "r": {
+        "register": "Vd.16B"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LUTI4"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vluti4q_lane_u16_x2",
+    "arguments": [
+      "uint16x8x2_t vn",
+      "uint8x8_t vm",
+      "const int index"
+    ],
+    "return_type": {
+      "value": "uint16x8_t"
+    },
+    "Arguments_Preparation": {
+      "vn": {
+        "register": "Vn1.8H"
+      },
+      "vm": {
+        "register": "Vm"
+      },
+      "index": {
+        "minimum": 0,
+        "maximum": 1
+      },
+      "r": {
+        "register": "Vd.8H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LUTI4"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vluti4q_laneq_u16_x2",
+    "arguments": [
+      "uint16x8x2_t vn",
+      "uint8x16_t vm",
+      "const int index"
+    ],
+    "return_type": {
+      "value": "uint16x8_t"
+    },
+    "Arguments_Preparation": {
+      "vn": {
+        "register": "Vn1.8H"
+      },
+      "vm": {
+        "register": "Vm"
+      },
+      "index": {
+        "minimum": 0,
+        "maximum": 3
+      },
+      "r": {
+        "register": "Vd.8H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LUTI4"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vluti4q_lane_s16_x2",
+    "arguments": [
+      "int16x8x2_t vn",
+      "uint8x8_t vm",
+      "const int index"
+    ],
+    "return_type": {
+      "value": "int16x8_t"
+    },
+    "Arguments_Preparation": {
+      "vn": {
+        "register": "Vn1.8H"
+      },
+      "vm": {
+        "register": "Vm"
+      },
+      "index": {
+        "minimum": 0,
+        "maximum": 1
+      },
+      "r": {
+        "register": "Vd.8H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LUTI4"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vluti4q_laneq_s16_x2",
+    "arguments": [
+      "int16x8x2_t vn",
+      "uint8x16_t vm",
+      "const int index"
+    ],
+    "return_type": {
+      "value": "int16x8_t"
+    },
+    "Arguments_Preparation": {
+      "vn": {
+        "register": "Vn1.8H"
+      },
+      "vm": {
+        "register": "Vm"
+      },
+      "index": {
+        "minimum": 0,
+        "maximum": 3
+      },
+      "r": {
+        "register": "Vd.8H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LUTI4"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vluti4q_lane_f16_x2",
+    "arguments": [
+      "float16x8x2_t vn",
+      "uint8x8_t vm",
+      "const int index"
+    ],
+    "return_type": {
+      "value": "float16x8_t"
+    },
+    "Arguments_Preparation": {
+      "vn": {
+        "register": "Vn1.8H"
+      },
+      "vm": {
+        "register": "Vm"
+      },
+      "index": {
+        "minimum": 0,
+        "maximum": 1
+      },
+      "r": {
+        "register": "Vd.8H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LUTI4"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vluti4q_laneq_f16_x2",
+    "arguments": [
+      "float16x8x2_t vn",
+      "uint8x16_t vm",
+      "const int index"
+    ],
+    "return_type": {
+      "value": "float16x8_t"
+    },
+    "Arguments_Preparation": {
+      "vn": {
+        "register": "Vn1.8H"
+      },
+      "vm": {
+        "register": "Vm"
+      },
+      "index": {
+        "minimum": 0,
+        "maximum": 3
+      },
+      "r": {
+        "register": "Vd.8H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LUTI4"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vluti4q_lane_p16_x2",
+    "arguments": [
+      "poly16x8x2_t vn",
+      "uint8x8_t vm",
+      "const int index"
+    ],
+    "return_type": {
+      "value": "poly16x8_t"
+    },
+    "Arguments_Preparation": {
+      "vn": {
+        "register": "Vn1.8H"
+      },
+      "vm": {
+        "register": "Vm"
+      },
+      "index": {
+        "minimum": 0,
+        "maximum": 1
+      },
+      "r": {
+        "register": "Vd.8H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LUTI4"
+      ]
+    ]
+  },
+  {
+    "SIMD_ISA": "Neon",
+    "name": "vluti4q_laneq_p16_x2",
+    "arguments": [
+      "poly16x8x2_t vn",
+      "uint8x16_t vm",
+      "const int index"
+    ],
+    "return_type": {
+      "value": "poly16x8_t"
+    },
+    "Arguments_Preparation": {
+      "vn": {
+        "register": "Vn1.8H"
+      },
+      "vm": {
+        "register": "Vm"
+      },
+      "index": {
+        "minimum": 0,
+        "maximum": 3
+      },
+      "r": {
+        "register": "Vd.8H"
+      }
+    },
+    "Architectures": [
+      "A64"
+    ],
+    "instructions": [
+      [
+        "LUTI4"
+      ]
+    ]
   }
 ]