From 0e9a0e1c380b043a8517ab882e45f6b797dd514a Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Thu, 19 Dec 2024 17:07:07 -0800 Subject: [PATCH] pulley: Finish `simd` proposal implementation This commit fills out the final and miscellaneous set of opcodes for Pulley to have a complete implementation of the `simd` proposal for WebAssembly. All spec tests are now enabled and the Pulley-specific exceptions for `*.wast` tests are all gone. Closes #9783 --- .../codegen/src/isa/pulley_shared/lower.isle | 20 +++++ .../filetests/runtests/simd-arithmetic.clif | 4 + .../filetests/runtests/simd-avg-round.clif | 4 + .../filetests/runtests/simd-band-splat.clif | 4 + .../filetests/runtests/simd-bitcast.clif | 4 + .../runtests/simd-bitselect-to-vselect.clif | 4 + .../filetests/runtests/simd-bitselect.clif | 8 ++ .../filetests/runtests/simd-bor-splat.clif | 4 + .../filetests/runtests/simd-bxor-splat.clif | 4 + .../filetests/runtests/simd-ceil.clif | 4 + .../runtests/simd-fcvt-to-sint-sat.clif | 4 + .../runtests/simd-fcvt-to-uint-sat.clif | 4 + .../filetests/runtests/simd-iabs.clif | 4 + .../filetests/runtests/simd-iadd-splat.clif | 4 + .../runtests/simd-iadd-swiden-high.clif | 4 + .../runtests/simd-iadd-swiden-low.clif | 4 + .../runtests/simd-iadd-swiden-mix.clif | 4 + .../runtests/simd-iadd-uwiden-high.clif | 4 + .../runtests/simd-iadd-uwiden-low.clif | 4 + .../runtests/simd-iadd-uwiden-mix.clif | 4 + .../filetests/runtests/simd-ifma.clif | 4 + .../runtests/simd-insert-extract-lane.clif | 4 + .../filetests/runtests/simd-ishl.clif | 4 + .../filetests/runtests/simd-isub-splat.clif | 4 + .../runtests/simd-isub-swiden-high.clif | 4 + .../runtests/simd-isub-swiden-low.clif | 4 + .../runtests/simd-isub-uwiden-high.clif | 4 + .../runtests/simd-isub-uwiden-low.clif | 4 + .../filetests/runtests/simd-lane-access.clif | 4 + .../filetests/runtests/simd-logical.clif | 4 + .../filetests/runtests/simd-make-vectors.clif | 4 + .../filetests/runtests/simd-nearest.clif | 4 + .../filetests/runtests/simd-popcnt.clif | 4 + .../filetests/runtests/simd-saddsat.clif | 4 + .../filetests/runtests/simd-select.clif | 4 + .../filetests/runtests/simd-shuffle.clif | 4 + .../runtests/simd-sqmulroundsat.clif | 4 + .../filetests/runtests/simd-sqrt.clif | 4 + .../filetests/runtests/simd-sshr.clif | 4 + .../filetests/runtests/simd-ssubsat.clif | 4 + .../filetests/runtests/simd-swizzle.clif | 4 + .../filetests/runtests/simd-trunc.clif | 4 + .../filetests/runtests/simd-uaddsat.clif | 4 + .../filetests/runtests/simd-ushr.clif | 4 + .../filetests/runtests/simd-usubsat.clif | 4 + .../filetests/runtests/simd-vconst-large.clif | 4 + .../filetests/runtests/simd-vconst.clif | 4 + .../simd-wideningpairwisedotproducts.clif | 4 + .../filetests/runtests/simd_compare_zero.clif | 4 + crates/wast-util/src/lib.rs | 23 ------ pulley/build.rs | 21 +++++ pulley/src/interp.rs | 77 +++++++++++++++++++ pulley/src/lib.rs | 20 +++++ 53 files changed, 334 insertions(+), 23 deletions(-) create mode 100644 pulley/build.rs diff --git a/cranelift/codegen/src/isa/pulley_shared/lower.isle b/cranelift/codegen/src/isa/pulley_shared/lower.isle index 0bff08619940..c7c849f55ab3 100644 --- a/cranelift/codegen/src/isa/pulley_shared/lower.isle +++ b/cranelift/codegen/src/isa/pulley_shared/lower.isle @@ -1041,6 +1041,9 @@ (rule 1 (lower (has_type $F64 (select c a b))) (pulley_fselect64 (emit_cond (lower_cond c)) a b)) +(rule 2 (lower (has_type (ty_vec128 _) (select c a b))) + (pulley_vselect (emit_cond (lower_cond c)) a b)) + ;; Helper to emit a conditional into a register itself. (decl emit_cond (Cond) XReg) (rule (emit_cond (Cond.If32 reg)) reg) @@ -1213,6 +1216,18 @@ (rule (lower (has_type $I64 (fcvt_to_sint_sat val @ (value_type $F64)))) (pulley_x64_from_f64_s_sat val)) +(rule (lower (has_type $I32X4 (fcvt_to_sint_sat val @ (value_type $F32X4)))) + (pulley_vi32x4_from_f32x4_s val)) + +(rule (lower (has_type $I32X4 (fcvt_to_uint_sat val @ (value_type $F32X4)))) + (pulley_vi32x4_from_f32x4_u val)) + +(rule (lower (has_type $I64X2 (fcvt_to_sint_sat val @ (value_type $F64X2)))) + (pulley_vi64x2_from_f64x2_s val)) + +(rule (lower (has_type $I64X2 (fcvt_to_uint_sat val @ (value_type $F64X2)))) + (pulley_vi64x2_from_f64x2_u val)) + ;;;; Rules for `fdemote` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (rule (lower (has_type $F32 (fdemote val @ (value_type $F64)))) @@ -1429,12 +1444,17 @@ (rule (lower (snarrow a @ (value_type $I16X8) b)) (pulley_vnarrow16x8_s a b)) (rule (lower (snarrow a @ (value_type $I32X4) b)) (pulley_vnarrow32x4_s a b)) +(rule (lower (snarrow a @ (value_type $I64X2) b)) (pulley_vnarrow64x2_s a b)) ;;;; Rules for `unarrow` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (rule (lower (unarrow a @ (value_type $I16X8) b)) (pulley_vnarrow16x8_u a b)) (rule (lower (unarrow a @ (value_type $I32X4) b)) (pulley_vnarrow32x4_u a b)) +;;;; Rules for `uunarrow` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(rule (lower (uunarrow a @ (value_type $I64X2) b)) (pulley_vunarrow64x2_u a b)) + ;;;; Rules for `fvpromote_low` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (rule (lower (fvpromote_low a @ (value_type $F32X4))) (pulley_vfpromotelow a)) diff --git a/cranelift/filetests/filetests/runtests/simd-arithmetic.clif b/cranelift/filetests/filetests/runtests/simd-arithmetic.clif index 317f0ea932cf..ccbcd1b97be0 100644 --- a/cranelift/filetests/filetests/runtests/simd-arithmetic.clif +++ b/cranelift/filetests/filetests/runtests/simd-arithmetic.clif @@ -9,6 +9,10 @@ target x86_64 sse42 has_avx set enable_multi_ret_implicit_sret target riscv64 has_v target riscv64 has_v has_c has_zcb +target pulley32 +target pulley32be +target pulley64 +target pulley64be function %sadd_sat_i8x16(i8x16, i8x16) -> i8x16 { diff --git a/cranelift/filetests/filetests/runtests/simd-avg-round.clif b/cranelift/filetests/filetests/runtests/simd-avg-round.clif index 4b0b91368f4e..151d1ba6e1e0 100644 --- a/cranelift/filetests/filetests/runtests/simd-avg-round.clif +++ b/cranelift/filetests/filetests/runtests/simd-avg-round.clif @@ -6,6 +6,10 @@ target x86_64 skylake set enable_multi_ret_implicit_sret target riscv64 has_v target riscv64 has_v has_c has_zcb +target pulley32 +target pulley32be +target pulley64 +target pulley64be function %average_rounding_i8x16(i8x16, i8x16) -> i8x16 { block0(v0: i8x16, v1: i8x16): diff --git a/cranelift/filetests/filetests/runtests/simd-band-splat.clif b/cranelift/filetests/filetests/runtests/simd-band-splat.clif index 6c36ccb8d97e..7061ee6886c8 100644 --- a/cranelift/filetests/filetests/runtests/simd-band-splat.clif +++ b/cranelift/filetests/filetests/runtests/simd-band-splat.clif @@ -9,6 +9,10 @@ target x86_64 sse42 has_avx set enable_multi_ret_implicit_sret target riscv64 has_v target riscv64 has_v has_c has_zcb +target pulley32 +target pulley32be +target pulley64 +target pulley64be function %band_splat_const_i8x16(i8x16) -> i8x16 { block0(v0: i8x16): diff --git a/cranelift/filetests/filetests/runtests/simd-bitcast.clif b/cranelift/filetests/filetests/runtests/simd-bitcast.clif index 03bfbd9f4b85..5f5b67ab073c 100644 --- a/cranelift/filetests/filetests/runtests/simd-bitcast.clif +++ b/cranelift/filetests/filetests/runtests/simd-bitcast.clif @@ -7,6 +7,10 @@ target s390x set enable_multi_ret_implicit_sret target riscv64 has_v target riscv64 has_v has_c has_zcb +target pulley32 +target pulley32be +target pulley64 +target pulley64be function %bitcast_if32x4(i32x4) -> f32x4 { block0(v0: i32x4): diff --git a/cranelift/filetests/filetests/runtests/simd-bitselect-to-vselect.clif b/cranelift/filetests/filetests/runtests/simd-bitselect-to-vselect.clif index 778275189d23..469891ea1938 100644 --- a/cranelift/filetests/filetests/runtests/simd-bitselect-to-vselect.clif +++ b/cranelift/filetests/filetests/runtests/simd-bitselect-to-vselect.clif @@ -7,6 +7,10 @@ target x86_64 skylake set enable_multi_ret_implicit_sret target riscv64 has_v target riscv64 has_v has_c has_zcb +target pulley32 +target pulley32be +target pulley64 +target pulley64be function %mask_from_icmp(i32x4, i32x4) -> i32x4 { block0(v0: i32x4, v1: i32x4): diff --git a/cranelift/filetests/filetests/runtests/simd-bitselect.clif b/cranelift/filetests/filetests/runtests/simd-bitselect.clif index b2dc28a54110..2c8bf659d0ef 100644 --- a/cranelift/filetests/filetests/runtests/simd-bitselect.clif +++ b/cranelift/filetests/filetests/runtests/simd-bitselect.clif @@ -7,6 +7,10 @@ set enable_multi_ret_implicit_sret target riscv64 has_v target riscv64 has_v has_c has_zcb set enable_multi_ret_implicit_sret=false +target pulley32 +target pulley32be +target pulley64 +target pulley64be set opt_level=speed target aarch64 @@ -16,6 +20,10 @@ target x86_64 has_sse3 has_ssse3 has_sse41 has_avx set enable_multi_ret_implicit_sret target riscv64 has_v target riscv64 has_v has_c has_zcb +target pulley32 +target pulley32be +target pulley64 +target pulley64be function %bitselect_i64x2(i64x2, i64x2, i64x2) -> i64x2 { block0(v0: i64x2, v1: i64x2, v2: i64x2): diff --git a/cranelift/filetests/filetests/runtests/simd-bor-splat.clif b/cranelift/filetests/filetests/runtests/simd-bor-splat.clif index 9b60d0dff4d1..d30e3f2fbabc 100644 --- a/cranelift/filetests/filetests/runtests/simd-bor-splat.clif +++ b/cranelift/filetests/filetests/runtests/simd-bor-splat.clif @@ -8,6 +8,10 @@ target x86_64 sse42 has_avx set enable_multi_ret_implicit_sret target riscv64 has_v target riscv64 has_v has_c has_zcb +target pulley32 +target pulley32be +target pulley64 +target pulley64be function %bor_splat_const_i8x16(i8x16) -> i8x16 { block0(v0: i8x16): diff --git a/cranelift/filetests/filetests/runtests/simd-bxor-splat.clif b/cranelift/filetests/filetests/runtests/simd-bxor-splat.clif index 47fb134c328c..74962eefdc59 100644 --- a/cranelift/filetests/filetests/runtests/simd-bxor-splat.clif +++ b/cranelift/filetests/filetests/runtests/simd-bxor-splat.clif @@ -9,6 +9,10 @@ target x86_64 sse42 has_avx set enable_multi_ret_implicit_sret target riscv64 has_v target riscv64 has_v has_c has_zcb +target pulley32 +target pulley32be +target pulley64 +target pulley64be function %bxor_splat_const_i8x16(i8x16) -> i8x16 { block0(v0: i8x16): diff --git a/cranelift/filetests/filetests/runtests/simd-ceil.clif b/cranelift/filetests/filetests/runtests/simd-ceil.clif index 3c6532b44819..b19c4bfe74b6 100644 --- a/cranelift/filetests/filetests/runtests/simd-ceil.clif +++ b/cranelift/filetests/filetests/runtests/simd-ceil.clif @@ -9,6 +9,10 @@ target s390x set enable_multi_ret_implicit_sret target riscv64 has_v target riscv64 has_v has_c has_zcb +target pulley32 +target pulley32be +target pulley64 +target pulley64be function %ceil_f32x4(f32x4) -> f32x4 { block0(v0: f32x4): diff --git a/cranelift/filetests/filetests/runtests/simd-fcvt-to-sint-sat.clif b/cranelift/filetests/filetests/runtests/simd-fcvt-to-sint-sat.clif index bee6edf0bc43..dddfbb9b8fc4 100644 --- a/cranelift/filetests/filetests/runtests/simd-fcvt-to-sint-sat.clif +++ b/cranelift/filetests/filetests/runtests/simd-fcvt-to-sint-sat.clif @@ -9,6 +9,10 @@ target x86_64 sse42 has_avx set enable_multi_ret_implicit_sret target riscv64 has_v target riscv64 has_v has_c has_zcb +target pulley32 +target pulley32be +target pulley64 +target pulley64be function %fcvt_to_sint_sat(f32x4) -> i32x4 { block0(v0:f32x4): diff --git a/cranelift/filetests/filetests/runtests/simd-fcvt-to-uint-sat.clif b/cranelift/filetests/filetests/runtests/simd-fcvt-to-uint-sat.clif index d812f7302508..4268cadc0d2b 100644 --- a/cranelift/filetests/filetests/runtests/simd-fcvt-to-uint-sat.clif +++ b/cranelift/filetests/filetests/runtests/simd-fcvt-to-uint-sat.clif @@ -9,6 +9,10 @@ target x86_64 sse42 has_avx set enable_multi_ret_implicit_sret target riscv64 has_v target riscv64 has_v has_c has_zcb +target pulley32 +target pulley32be +target pulley64 +target pulley64be function %fcvt_to_uint_sat(f32x4) -> i32x4 { block0(v0:f32x4): diff --git a/cranelift/filetests/filetests/runtests/simd-iabs.clif b/cranelift/filetests/filetests/runtests/simd-iabs.clif index 7e79fe779e3b..fc749809b7f6 100644 --- a/cranelift/filetests/filetests/runtests/simd-iabs.clif +++ b/cranelift/filetests/filetests/runtests/simd-iabs.clif @@ -10,6 +10,10 @@ target x86_64 sse42 has_avx set enable_multi_ret_implicit_sret target riscv64 has_v target riscv64 has_v has_c has_zcb +target pulley32 +target pulley32be +target pulley64 +target pulley64be function %iabs_i8x16(i8x16) -> i8x16 { block0(v0: i8x16): diff --git a/cranelift/filetests/filetests/runtests/simd-iadd-splat.clif b/cranelift/filetests/filetests/runtests/simd-iadd-splat.clif index bd74203a5fab..5fe6ecccb622 100644 --- a/cranelift/filetests/filetests/runtests/simd-iadd-splat.clif +++ b/cranelift/filetests/filetests/runtests/simd-iadd-splat.clif @@ -8,6 +8,10 @@ target x86_64 sse42 has_avx set enable_multi_ret_implicit_sret target riscv64 has_v target riscv64 has_v has_c has_zcb +target pulley32 +target pulley32be +target pulley64 +target pulley64be function %iadd_splat_const_i8x16(i8x16) -> i8x16 { block0(v0: i8x16): diff --git a/cranelift/filetests/filetests/runtests/simd-iadd-swiden-high.clif b/cranelift/filetests/filetests/runtests/simd-iadd-swiden-high.clif index b20724615e16..87eed50d7f19 100644 --- a/cranelift/filetests/filetests/runtests/simd-iadd-swiden-high.clif +++ b/cranelift/filetests/filetests/runtests/simd-iadd-swiden-high.clif @@ -8,6 +8,10 @@ target x86_64 sse41 has_avx set enable_multi_ret_implicit_sret target riscv64 has_v target riscv64 has_v has_c has_zcb +target pulley32 +target pulley32be +target pulley64 +target pulley64be function %iadd_swidenhigh_i32x4(i32x4, i32x4) -> i64x2 { diff --git a/cranelift/filetests/filetests/runtests/simd-iadd-swiden-low.clif b/cranelift/filetests/filetests/runtests/simd-iadd-swiden-low.clif index d183f50c5bb3..6c10fd0efb5f 100644 --- a/cranelift/filetests/filetests/runtests/simd-iadd-swiden-low.clif +++ b/cranelift/filetests/filetests/runtests/simd-iadd-swiden-low.clif @@ -8,6 +8,10 @@ target x86_64 sse41 has_avx set enable_multi_ret_implicit_sret target riscv64 has_v target riscv64 has_v has_c has_zcb +target pulley32 +target pulley32be +target pulley64 +target pulley64be function %iadd_swidenlow_i32x4(i32x4, i32x4) -> i64x2 { diff --git a/cranelift/filetests/filetests/runtests/simd-iadd-swiden-mix.clif b/cranelift/filetests/filetests/runtests/simd-iadd-swiden-mix.clif index 5b652253498e..2353c428f5b9 100644 --- a/cranelift/filetests/filetests/runtests/simd-iadd-swiden-mix.clif +++ b/cranelift/filetests/filetests/runtests/simd-iadd-swiden-mix.clif @@ -8,6 +8,10 @@ target x86_64 sse41 has_avx set enable_multi_ret_implicit_sret target riscv64 has_v target riscv64 has_v has_c has_zcb +target pulley32 +target pulley32be +target pulley64 +target pulley64be function %iadd_swiden_high_low_i32x4(i32x4, i32x4) -> i64x2 { diff --git a/cranelift/filetests/filetests/runtests/simd-iadd-uwiden-high.clif b/cranelift/filetests/filetests/runtests/simd-iadd-uwiden-high.clif index af041d56cc51..edd864bebf2f 100644 --- a/cranelift/filetests/filetests/runtests/simd-iadd-uwiden-high.clif +++ b/cranelift/filetests/filetests/runtests/simd-iadd-uwiden-high.clif @@ -8,6 +8,10 @@ target x86_64 sse41 has_avx set enable_multi_ret_implicit_sret target riscv64 has_v target riscv64 has_v has_c has_zcb +target pulley32 +target pulley32be +target pulley64 +target pulley64be function %iadd_uwidenhigh_i32x4(i32x4, i32x4) -> i64x2 { diff --git a/cranelift/filetests/filetests/runtests/simd-iadd-uwiden-low.clif b/cranelift/filetests/filetests/runtests/simd-iadd-uwiden-low.clif index fec2ff0125f0..9bf4384093b1 100644 --- a/cranelift/filetests/filetests/runtests/simd-iadd-uwiden-low.clif +++ b/cranelift/filetests/filetests/runtests/simd-iadd-uwiden-low.clif @@ -8,6 +8,10 @@ target x86_64 sse41 has_avx set enable_multi_ret_implicit_sret target riscv64 has_v target riscv64 has_v has_c has_zcb +target pulley32 +target pulley32be +target pulley64 +target pulley64be function %iadd_uwidenlow_i32x4(i32x4, i32x4) -> i64x2 { diff --git a/cranelift/filetests/filetests/runtests/simd-iadd-uwiden-mix.clif b/cranelift/filetests/filetests/runtests/simd-iadd-uwiden-mix.clif index bdf89e287b48..e1ea2d8ee137 100644 --- a/cranelift/filetests/filetests/runtests/simd-iadd-uwiden-mix.clif +++ b/cranelift/filetests/filetests/runtests/simd-iadd-uwiden-mix.clif @@ -8,6 +8,10 @@ target x86_64 sse41 has_avx set enable_multi_ret_implicit_sret target riscv64 has_v target riscv64 has_v has_c has_zcb +target pulley32 +target pulley32be +target pulley64 +target pulley64be function %iadd_uwiden_high_low_i32x4(i32x4, i32x4) -> i64x2 { diff --git a/cranelift/filetests/filetests/runtests/simd-ifma.clif b/cranelift/filetests/filetests/runtests/simd-ifma.clif index 007818fa33b3..c67298288038 100644 --- a/cranelift/filetests/filetests/runtests/simd-ifma.clif +++ b/cranelift/filetests/filetests/runtests/simd-ifma.clif @@ -6,6 +6,10 @@ target x86_64 skylake set enable_multi_ret_implicit_sret target riscv64 has_v target riscv64 has_v has_c has_zcb +target pulley32 +target pulley32be +target pulley64 +target pulley64be ;; These tests test integer fused multiply add/subtract instructions. diff --git a/cranelift/filetests/filetests/runtests/simd-insert-extract-lane.clif b/cranelift/filetests/filetests/runtests/simd-insert-extract-lane.clif index 7127556e7bf1..fdf965ad8de4 100644 --- a/cranelift/filetests/filetests/runtests/simd-insert-extract-lane.clif +++ b/cranelift/filetests/filetests/runtests/simd-insert-extract-lane.clif @@ -8,6 +8,10 @@ target x86_64 sse42 has_avx set enable_multi_ret_implicit_sret target riscv64 has_v target riscv64 has_v has_c has_zcb +target pulley32 +target pulley32be +target pulley64 +target pulley64be function %insertlane_preserves_upper_bits(f64) -> i64 fast { block0(v5: f64): diff --git a/cranelift/filetests/filetests/runtests/simd-ishl.clif b/cranelift/filetests/filetests/runtests/simd-ishl.clif index b73f6aab8486..0812738cc3fb 100644 --- a/cranelift/filetests/filetests/runtests/simd-ishl.clif +++ b/cranelift/filetests/filetests/runtests/simd-ishl.clif @@ -7,6 +7,10 @@ target x86_64 skylake set enable_multi_ret_implicit_sret target riscv64 has_v target riscv64 has_v has_c has_zcb +target pulley32 +target pulley32be +target pulley64 +target pulley64be function %ishl_i8x16(i8x16, i32) -> i8x16 { diff --git a/cranelift/filetests/filetests/runtests/simd-isub-splat.clif b/cranelift/filetests/filetests/runtests/simd-isub-splat.clif index c4048c771be1..d4b78ff087fd 100644 --- a/cranelift/filetests/filetests/runtests/simd-isub-splat.clif +++ b/cranelift/filetests/filetests/runtests/simd-isub-splat.clif @@ -8,6 +8,10 @@ target x86_64 sse42 has_avx set enable_multi_ret_implicit_sret target riscv64 has_v target riscv64 has_v has_c has_zcb +target pulley32 +target pulley32be +target pulley64 +target pulley64be function %isub_splat_reverse_i8x16(i8x16, i8) -> i8x16 { diff --git a/cranelift/filetests/filetests/runtests/simd-isub-swiden-high.clif b/cranelift/filetests/filetests/runtests/simd-isub-swiden-high.clif index 6fbadc62c0f7..831b4ca24b80 100644 --- a/cranelift/filetests/filetests/runtests/simd-isub-swiden-high.clif +++ b/cranelift/filetests/filetests/runtests/simd-isub-swiden-high.clif @@ -8,6 +8,10 @@ target x86_64 sse41 has_avx set enable_multi_ret_implicit_sret target riscv64 has_v target riscv64 has_v has_c has_zcb +target pulley32 +target pulley32be +target pulley64 +target pulley64be function %isub_swidenhigh_i32x4(i32x4, i32x4) -> i64x2 { diff --git a/cranelift/filetests/filetests/runtests/simd-isub-swiden-low.clif b/cranelift/filetests/filetests/runtests/simd-isub-swiden-low.clif index 13dd57de0721..1993f15c08ba 100644 --- a/cranelift/filetests/filetests/runtests/simd-isub-swiden-low.clif +++ b/cranelift/filetests/filetests/runtests/simd-isub-swiden-low.clif @@ -8,6 +8,10 @@ target x86_64 sse41 has_avx set enable_multi_ret_implicit_sret target riscv64 has_v target riscv64 has_v has_c has_zcb +target pulley32 +target pulley32be +target pulley64 +target pulley64be function %isub_swidenlow_i32x4(i32x4, i32x4) -> i64x2 { diff --git a/cranelift/filetests/filetests/runtests/simd-isub-uwiden-high.clif b/cranelift/filetests/filetests/runtests/simd-isub-uwiden-high.clif index ca02cf4462e2..9cae46a07d64 100644 --- a/cranelift/filetests/filetests/runtests/simd-isub-uwiden-high.clif +++ b/cranelift/filetests/filetests/runtests/simd-isub-uwiden-high.clif @@ -8,6 +8,10 @@ target x86_64 sse41 has_avx set enable_multi_ret_implicit_sret target riscv64 has_v target riscv64 has_v has_c has_zcb +target pulley32 +target pulley32be +target pulley64 +target pulley64be function %isub_uwidenhigh_i32x4(i32x4, i32x4) -> i64x2 { diff --git a/cranelift/filetests/filetests/runtests/simd-isub-uwiden-low.clif b/cranelift/filetests/filetests/runtests/simd-isub-uwiden-low.clif index 58d333c8e0b7..11f83f5c99e8 100644 --- a/cranelift/filetests/filetests/runtests/simd-isub-uwiden-low.clif +++ b/cranelift/filetests/filetests/runtests/simd-isub-uwiden-low.clif @@ -8,6 +8,10 @@ target x86_64 sse41 has_avx set enable_multi_ret_implicit_sret target riscv64 has_v target riscv64 has_v has_c has_zcb +target pulley32 +target pulley32be +target pulley64 +target pulley64be function %isub_uwidenlow_i32x4(i32x4, i32x4) -> i64x2 { diff --git a/cranelift/filetests/filetests/runtests/simd-lane-access.clif b/cranelift/filetests/filetests/runtests/simd-lane-access.clif index de096a7b9e4f..29618eca7dd4 100644 --- a/cranelift/filetests/filetests/runtests/simd-lane-access.clif +++ b/cranelift/filetests/filetests/runtests/simd-lane-access.clif @@ -6,6 +6,10 @@ target x86_64 has_sse3 has_ssse3 has_sse41 has_avx set enable_multi_ret_implicit_sret target riscv64 has_v target riscv64 has_v has_c has_zcb +target pulley32 +target pulley32be +target pulley64 +target pulley64be ;; shuffle diff --git a/cranelift/filetests/filetests/runtests/simd-logical.clif b/cranelift/filetests/filetests/runtests/simd-logical.clif index 0ef00e610dd0..9a4c3b8bdae2 100644 --- a/cranelift/filetests/filetests/runtests/simd-logical.clif +++ b/cranelift/filetests/filetests/runtests/simd-logical.clif @@ -6,6 +6,10 @@ target x86_64 has_sse3 has_ssse3 has_sse41 has_avx set enable_multi_ret_implicit_sret target riscv64 has_v target riscv64 has_v has_c has_zcb +target pulley32 +target pulley32be +target pulley64 +target pulley64be function %bnot() -> i32 { block0: diff --git a/cranelift/filetests/filetests/runtests/simd-make-vectors.clif b/cranelift/filetests/filetests/runtests/simd-make-vectors.clif index f5b5e30ecfdd..8a6d074c3d06 100644 --- a/cranelift/filetests/filetests/runtests/simd-make-vectors.clif +++ b/cranelift/filetests/filetests/runtests/simd-make-vectors.clif @@ -8,6 +8,10 @@ target x86_64 sse42 has_avx set enable_multi_ret_implicit_sret target riscv64 has_v target riscv64 has_v has_c has_zcb +target pulley32 +target pulley32be +target pulley64 +target pulley64be function %i64x2_make0() -> i64x2 { block0: diff --git a/cranelift/filetests/filetests/runtests/simd-nearest.clif b/cranelift/filetests/filetests/runtests/simd-nearest.clif index 6f1385063ca0..7df9c4d8af12 100644 --- a/cranelift/filetests/filetests/runtests/simd-nearest.clif +++ b/cranelift/filetests/filetests/runtests/simd-nearest.clif @@ -9,6 +9,10 @@ target s390x set enable_multi_ret_implicit_sret target riscv64 has_v target riscv64 has_v has_c has_zcb +target pulley32 +target pulley32be +target pulley64 +target pulley64be function %nearest_f32x4(f32x4) -> f32x4 { block0(v0: f32x4): diff --git a/cranelift/filetests/filetests/runtests/simd-popcnt.clif b/cranelift/filetests/filetests/runtests/simd-popcnt.clif index ed613d7a8dc6..c518a43e5831 100644 --- a/cranelift/filetests/filetests/runtests/simd-popcnt.clif +++ b/cranelift/filetests/filetests/runtests/simd-popcnt.clif @@ -9,6 +9,10 @@ target x86_64 sse42 has_avx has_avx512vl has_avx512bitalg set enable_multi_ret_implicit_sret target riscv64 has_v target riscv64 has_v has_c has_zcb +target pulley32 +target pulley32be +target pulley64 +target pulley64be function %popcnt_i8x16(i8x16) -> i8x16 { block0(v0: i8x16): diff --git a/cranelift/filetests/filetests/runtests/simd-saddsat.clif b/cranelift/filetests/filetests/runtests/simd-saddsat.clif index afd4d2798b20..5582dd7cc237 100644 --- a/cranelift/filetests/filetests/runtests/simd-saddsat.clif +++ b/cranelift/filetests/filetests/runtests/simd-saddsat.clif @@ -7,6 +7,10 @@ target x86_64 has_sse3 has_ssse3 has_sse41 has_avx set enable_multi_ret_implicit_sret target riscv64 has_v target riscv64 has_v has_c has_zcb +target pulley32 +target pulley32be +target pulley64 +target pulley64be function %saddsat_i8x16(i8x16, i8x16) -> i8x16 { block0(v0: i8x16, v1: i8x16): diff --git a/cranelift/filetests/filetests/runtests/simd-select.clif b/cranelift/filetests/filetests/runtests/simd-select.clif index f5c4eea08994..89a2983a04ae 100644 --- a/cranelift/filetests/filetests/runtests/simd-select.clif +++ b/cranelift/filetests/filetests/runtests/simd-select.clif @@ -9,6 +9,10 @@ target x86_64 sse42 has_avx set enable_multi_ret_implicit_sret target riscv64 has_v target riscv64 has_v has_c has_zcb +target pulley32 +target pulley32be +target pulley64 +target pulley64be function %select_i64x2(i64, i64x2, i64x2) -> i64x2 { block0(v0: i64, v1: i64x2, v2: i64x2): diff --git a/cranelift/filetests/filetests/runtests/simd-shuffle.clif b/cranelift/filetests/filetests/runtests/simd-shuffle.clif index 5322a3dac223..72a48857ba3a 100644 --- a/cranelift/filetests/filetests/runtests/simd-shuffle.clif +++ b/cranelift/filetests/filetests/runtests/simd-shuffle.clif @@ -11,6 +11,10 @@ target x86_64 sse42 has_avx has_avx512vl has_avx512vbmi set enable_multi_ret_implicit_sret target riscv64 has_v target riscv64 has_v has_c has_zcb +target pulley32 +target pulley32be +target pulley64 +target pulley64be function %shuffle_i8x16(i8x16, i8x16) -> i8x16 { block0(v0: i8x16, v1: i8x16): diff --git a/cranelift/filetests/filetests/runtests/simd-sqmulroundsat.clif b/cranelift/filetests/filetests/runtests/simd-sqmulroundsat.clif index cdabcf34f5bd..e0c7c0f6529f 100644 --- a/cranelift/filetests/filetests/runtests/simd-sqmulroundsat.clif +++ b/cranelift/filetests/filetests/runtests/simd-sqmulroundsat.clif @@ -9,6 +9,10 @@ target x86_64 sse41 has_avx set enable_multi_ret_implicit_sret target riscv64 has_v target riscv64 has_v has_c has_zcb +target pulley32 +target pulley32be +target pulley64 +target pulley64be function %sqmulrs_i16x8(i16x8, i16x8) -> i16x8 { block0(v0: i16x8, v1: i16x8): diff --git a/cranelift/filetests/filetests/runtests/simd-sqrt.clif b/cranelift/filetests/filetests/runtests/simd-sqrt.clif index 5addd29f4fc6..0cc2f2e467ba 100644 --- a/cranelift/filetests/filetests/runtests/simd-sqrt.clif +++ b/cranelift/filetests/filetests/runtests/simd-sqrt.clif @@ -9,6 +9,10 @@ target x86_64 sse42 has_avx set enable_multi_ret_implicit_sret target riscv64 has_v target riscv64 has_v has_c has_zcb +target pulley32 +target pulley32be +target pulley64 +target pulley64be function %sqrt_f32x4(f32x4) -> f32x4 { diff --git a/cranelift/filetests/filetests/runtests/simd-sshr.clif b/cranelift/filetests/filetests/runtests/simd-sshr.clif index e654a2d820d5..b14e4d62155c 100644 --- a/cranelift/filetests/filetests/runtests/simd-sshr.clif +++ b/cranelift/filetests/filetests/runtests/simd-sshr.clif @@ -12,6 +12,10 @@ target x86_64 sse42 has_avx has_avx2 has_avx512f has_avx512vl set enable_multi_ret_implicit_sret target riscv64 has_v target riscv64 has_v has_c has_zcb +target pulley32 +target pulley32be +target pulley64 +target pulley64be function %sshr_i8x16(i8x16, i32) -> i8x16 { diff --git a/cranelift/filetests/filetests/runtests/simd-ssubsat.clif b/cranelift/filetests/filetests/runtests/simd-ssubsat.clif index 8b048572e5ab..c4e20dc67893 100644 --- a/cranelift/filetests/filetests/runtests/simd-ssubsat.clif +++ b/cranelift/filetests/filetests/runtests/simd-ssubsat.clif @@ -7,6 +7,10 @@ target x86_64 has_sse3 has_ssse3 has_sse41 has_avx set enable_multi_ret_implicit_sret target riscv64 has_v target riscv64 has_v has_c has_zcb +target pulley32 +target pulley32be +target pulley64 +target pulley64be function %ssubsat_i8x16(i8x16, i8x16) -> i8x16 { block0(v0: i8x16, v1: i8x16): diff --git a/cranelift/filetests/filetests/runtests/simd-swizzle.clif b/cranelift/filetests/filetests/runtests/simd-swizzle.clif index 16e12f4bced6..82dc6dafa020 100644 --- a/cranelift/filetests/filetests/runtests/simd-swizzle.clif +++ b/cranelift/filetests/filetests/runtests/simd-swizzle.clif @@ -9,6 +9,10 @@ target x86_64 sse41 has_avx set enable_multi_ret_implicit_sret target riscv64 has_v target riscv64 has_v has_c has_zcb +target pulley32 +target pulley32be +target pulley64 +target pulley64be function %swizzle_i8x16(i8x16, i8x16) -> i8x16 { block0(v0: i8x16, v1: i8x16): diff --git a/cranelift/filetests/filetests/runtests/simd-trunc.clif b/cranelift/filetests/filetests/runtests/simd-trunc.clif index c941a48b66b9..01d0c18a3f3c 100644 --- a/cranelift/filetests/filetests/runtests/simd-trunc.clif +++ b/cranelift/filetests/filetests/runtests/simd-trunc.clif @@ -9,6 +9,10 @@ target s390x set enable_multi_ret_implicit_sret target riscv64 has_v target riscv64 has_v has_c has_zcb +target pulley32 +target pulley32be +target pulley64 +target pulley64be function %trunc_f32x4(f32x4) -> f32x4 { block0(v0: f32x4): diff --git a/cranelift/filetests/filetests/runtests/simd-uaddsat.clif b/cranelift/filetests/filetests/runtests/simd-uaddsat.clif index 3e67f176f3b9..c0432842cb84 100644 --- a/cranelift/filetests/filetests/runtests/simd-uaddsat.clif +++ b/cranelift/filetests/filetests/runtests/simd-uaddsat.clif @@ -7,6 +7,10 @@ target x86_64 has_sse3 has_ssse3 has_sse41 has_avx set enable_multi_ret_implicit_sret target riscv64 has_v target riscv64 has_v has_c has_zcb +target pulley32 +target pulley32be +target pulley64 +target pulley64be function %uaddsat_i8x16(i8x16, i8x16) -> i8x16 { block0(v0: i8x16, v1: i8x16): diff --git a/cranelift/filetests/filetests/runtests/simd-ushr.clif b/cranelift/filetests/filetests/runtests/simd-ushr.clif index 86b5335a92de..15db013b6f6b 100644 --- a/cranelift/filetests/filetests/runtests/simd-ushr.clif +++ b/cranelift/filetests/filetests/runtests/simd-ushr.clif @@ -6,6 +6,10 @@ target x86_64 skylake set enable_multi_ret_implicit_sret target riscv64 has_v target riscv64 has_v has_c has_zcb +target pulley32 +target pulley32be +target pulley64 +target pulley64be function %ushr_i8x16(i8x16, i32) -> i8x16 { diff --git a/cranelift/filetests/filetests/runtests/simd-usubsat.clif b/cranelift/filetests/filetests/runtests/simd-usubsat.clif index 844e7eaa2424..735d004e45dd 100644 --- a/cranelift/filetests/filetests/runtests/simd-usubsat.clif +++ b/cranelift/filetests/filetests/runtests/simd-usubsat.clif @@ -7,6 +7,10 @@ target x86_64 has_sse3 has_ssse3 has_sse41 has_avx set enable_multi_ret_implicit_sret target riscv64 has_v target riscv64 has_v has_c has_zcb +target pulley32 +target pulley32be +target pulley64 +target pulley64be function %usubsat_i8x16(i8x16, i8x16) -> i8x16 { block0(v0: i8x16, v1: i8x16): diff --git a/cranelift/filetests/filetests/runtests/simd-vconst-large.clif b/cranelift/filetests/filetests/runtests/simd-vconst-large.clif index c2f9049056ff..b81a47e86ff0 100644 --- a/cranelift/filetests/filetests/runtests/simd-vconst-large.clif +++ b/cranelift/filetests/filetests/runtests/simd-vconst-large.clif @@ -6,6 +6,10 @@ target x86_64 has_sse3 has_ssse3 has_sse41 has_avx set enable_multi_ret_implicit_sret target riscv64 has_v target riscv64 has_v has_c has_zcb +target pulley32 +target pulley32be +target pulley64 +target pulley64be ;; This tests that vconst correctly loads large offsets into the constant pool diff --git a/cranelift/filetests/filetests/runtests/simd-vconst.clif b/cranelift/filetests/filetests/runtests/simd-vconst.clif index a7014dca797a..6bedc1b095ee 100644 --- a/cranelift/filetests/filetests/runtests/simd-vconst.clif +++ b/cranelift/filetests/filetests/runtests/simd-vconst.clif @@ -6,6 +6,10 @@ target x86_64 has_sse3 has_ssse3 has_sse41 has_avx set enable_multi_ret_implicit_sret target riscv64 has_v target riscv64 has_v has_c has_zcb +target pulley32 +target pulley32be +target pulley64 +target pulley64be function %vconst_zeroes_i8x16() -> i8x16 { diff --git a/cranelift/filetests/filetests/runtests/simd-wideningpairwisedotproducts.clif b/cranelift/filetests/filetests/runtests/simd-wideningpairwisedotproducts.clif index 78d98910ee5c..559a9a062da9 100644 --- a/cranelift/filetests/filetests/runtests/simd-wideningpairwisedotproducts.clif +++ b/cranelift/filetests/filetests/runtests/simd-wideningpairwisedotproducts.clif @@ -6,6 +6,10 @@ target x86_64 has_sse3 has_ssse3 has_sse41 has_avx set enable_multi_ret_implicit_sret target riscv64 has_v target riscv64 has_v has_c has_zcb +target pulley32 +target pulley32be +target pulley64 +target pulley64be function %wpdps(i16x8, i16x8) -> i32x4 { block0(v0: i16x8, v1: i16x8): diff --git a/cranelift/filetests/filetests/runtests/simd_compare_zero.clif b/cranelift/filetests/filetests/runtests/simd_compare_zero.clif index e8b7b64636eb..4406faf8e7f6 100644 --- a/cranelift/filetests/filetests/runtests/simd_compare_zero.clif +++ b/cranelift/filetests/filetests/runtests/simd_compare_zero.clif @@ -4,6 +4,10 @@ target s390x set enable_multi_ret_implicit_sret target riscv64 has_v target riscv64 has_v has_c has_zcb +target pulley32 +target pulley32be +target pulley64 +target pulley64be function %simd_icmp_eq_i8(i8x16) -> i8x16 { block0(v0: i8x16): diff --git a/crates/wast-util/src/lib.rs b/crates/wast-util/src/lib.rs index a72a05b2a8ec..f537fd5053b5 100644 --- a/crates/wast-util/src/lib.rs +++ b/crates/wast-util/src/lib.rs @@ -384,29 +384,6 @@ impl WastTest { return true; } - // Pulley supports a mishmash of proposals at this time as it's in an - // interim state. It doesn't support all of the MVP but it supports - // enough to pass some GC tests for example. This means that - // `Compiler::should_fail` is pretty liberal (the check above). To - // handle this there's an extra check here for an exhaustive list of - // unsupported tests on Pulley. This list will get burned down as - // features in Pulley are implemented. - if config.compiler == Compiler::CraneliftPulley { - let unsupported = [ - "misc_testsuite/simd/v128-select.wast", - "spec_testsuite/proposals/relaxed-simd/i32x4_relaxed_trunc.wast", - "spec_testsuite/proposals/memory64/i32x4_relaxed_trunc.wast", - "spec_testsuite/simd_i32x4_trunc_sat_f32x4.wast", - "spec_testsuite/simd_i32x4_trunc_sat_f64x2.wast", - "spec_testsuite/simd_load.wast", - "spec_testsuite/simd_splat.wast", - ]; - - if unsupported.iter().any(|part| self.path.ends_with(part)) { - return true; - } - } - // Disable spec tests for proposals that Winch does not implement yet. if config.compiler == Compiler::Winch { let unsupported = [ diff --git a/pulley/build.rs b/pulley/build.rs new file mode 100644 index 000000000000..052b611abe6b --- /dev/null +++ b/pulley/build.rs @@ -0,0 +1,21 @@ +fn main() { + // let opt_level = std::env::var("OPT_LEVEL").unwrap(); + // let target = std::env::var("TARGET").unwrap(); + // dbg!(&opt_level); + // dbg!(&target); + + // if let Ok(n) = opt_level.parse::() { + // if n >= 2 { + // match target.as_str() { + // "x86_64-unknown-linux-gnu" => { + // println!("cargo:rustc-cfg=pulley_assume_llvm_makes_tail_calls"); + // } + // _ => panic!(), + // } + // } else { + // // panic!() + // } + // } else { + // panic!() + // } +} diff --git a/pulley/src/interp.rs b/pulley/src/interp.rs index dbad29f7d2e1..fcc1952c3e05 100644 --- a/pulley/src/interp.rs +++ b/pulley/src/interp.rs @@ -3770,6 +3770,30 @@ impl ExtendedOpVisitor for Interpreter<'_> { ControlFlow::Continue(()) } + fn vi32x4_from_f32x4_s(&mut self, dst: VReg, src: VReg) -> ControlFlow { + let a = self.state[src].get_f32x4(); + self.state[dst].set_i32x4(a.map(|f| f as i32)); + ControlFlow::Continue(()) + } + + fn vi32x4_from_f32x4_u(&mut self, dst: VReg, src: VReg) -> ControlFlow { + let a = self.state[src].get_f32x4(); + self.state[dst].set_u32x4(a.map(|f| f as u32)); + ControlFlow::Continue(()) + } + + fn vi64x2_from_f64x2_s(&mut self, dst: VReg, src: VReg) -> ControlFlow { + let a = self.state[src].get_f64x2(); + self.state[dst].set_i64x2(a.map(|f| f as i64)); + ControlFlow::Continue(()) + } + + fn vi64x2_from_f64x2_u(&mut self, dst: VReg, src: VReg) -> ControlFlow { + let a = self.state[src].get_f64x2(); + self.state[dst].set_u64x2(a.map(|f| f as u64)); + ControlFlow::Continue(()) + } + fn vwidenlow8x16_s(&mut self, dst: VReg, src: VReg) -> ControlFlow { let a = *self.state[src].get_i8x16().first_chunk().unwrap(); self.state[dst].set_i16x8(a.map(|i| i.into())); @@ -3894,6 +3918,43 @@ impl ExtendedOpVisitor for Interpreter<'_> { ControlFlow::Continue(()) } + fn vnarrow64x2_s(&mut self, operands: BinaryOperands) -> ControlFlow { + let a = self.state[operands.src1].get_i64x2(); + let b = self.state[operands.src2].get_i64x2(); + let mut result = [0; 4]; + for (i, d) in a.iter().chain(&b).zip(&mut result) { + *d = (*i) + .try_into() + .unwrap_or(if *i < 0 { i32::MIN } else { i32::MAX }); + } + self.state[operands.dst].set_i32x4(result); + ControlFlow::Continue(()) + } + + fn vnarrow64x2_u(&mut self, operands: BinaryOperands) -> ControlFlow { + let a = self.state[operands.src1].get_i64x2(); + let b = self.state[operands.src2].get_i64x2(); + let mut result = [0; 4]; + for (i, d) in a.iter().chain(&b).zip(&mut result) { + *d = (*i) + .try_into() + .unwrap_or(if *i < 0 { u32::MIN } else { u32::MAX }); + } + self.state[operands.dst].set_u32x4(result); + ControlFlow::Continue(()) + } + + fn vunarrow64x2_u(&mut self, operands: BinaryOperands) -> ControlFlow { + let a = self.state[operands.src1].get_u64x2(); + let b = self.state[operands.src2].get_u64x2(); + let mut result = [0; 4]; + for (i, d) in a.iter().chain(&b).zip(&mut result) { + *d = (*i).try_into().unwrap_or(u32::MAX); + } + self.state[operands.dst].set_u32x4(result); + ControlFlow::Continue(()) + } + fn vfpromotelow(&mut self, dst: VReg, src: VReg) -> ControlFlow { let a = self.state[src].get_f32x4(); self.state[dst].set_f64x2([a[0].into(), a[1].into()]); @@ -4836,6 +4897,22 @@ impl ExtendedOpVisitor for Interpreter<'_> { ControlFlow::Continue(()) } + fn vselect( + &mut self, + dst: VReg, + cond: XReg, + if_nonzero: VReg, + if_zero: VReg, + ) -> ControlFlow { + let result = if self.state[cond].get_u32() != 0 { + self.state[if_nonzero] + } else { + self.state[if_zero] + }; + self.state[dst] = result; + ControlFlow::Continue(()) + } + fn xadd128( &mut self, dst_lo: XReg, diff --git a/pulley/src/lib.rs b/pulley/src/lib.rs index 39acfe45500e..f14c3f979ba5 100644 --- a/pulley/src/lib.rs +++ b/pulley/src/lib.rs @@ -1029,6 +1029,14 @@ macro_rules! for_each_extended_op { vf64x2_from_i64x2_s = VF64x2FromI64x2S { dst: VReg, src: VReg }; /// Int-to-float conversion (same as `f64_from_x64_u`) vf64x2_from_i64x2_u = VF64x2FromI64x2U { dst: VReg, src: VReg }; + /// Float-to-int conversion (same as `x32_from_f32_s` + vi32x4_from_f32x4_s = VI32x4FromF32x4S { dst: VReg, src: VReg }; + /// Float-to-int conversion (same as `x32_from_f32_u` + vi32x4_from_f32x4_u = VI32x4FromF32x4U { dst: VReg, src: VReg }; + /// Float-to-int conversion (same as `x64_from_f64_s` + vi64x2_from_f64x2_s = VI64x2FromF64x2S { dst: VReg, src: VReg }; + /// Float-to-int conversion (same as `x64_from_f64_u` + vi64x2_from_f64x2_u = VI64x2FromF64x2U { dst: VReg, src: VReg }; /// Widens the low lanes of the input vector, as signed, to twice /// the width. @@ -1079,6 +1087,15 @@ macro_rules! for_each_extended_op { /// Narrows the two 32x4 vectors, assuming all input lanes are /// signed, to half the width. Narrowing is unsigned and saturating. vnarrow32x4_u = Vnarrow32x4U { operands: BinaryOperands }; + /// Narrows the two 64x2 vectors, assuming all input lanes are + /// signed, to half the width. Narrowing is signed and saturating. + vnarrow64x2_s = Vnarrow64x2S { operands: BinaryOperands }; + /// Narrows the two 64x2 vectors, assuming all input lanes are + /// signed, to half the width. Narrowing is unsigned and saturating. + vnarrow64x2_u = Vnarrow64x2U { operands: BinaryOperands }; + /// Narrows the two 64x2 vectors, assuming all input lanes are + /// unsigned, to half the width. Narrowing is unsigned and saturating. + vunarrow64x2_u = Vunarrow64x2U { operands: BinaryOperands }; /// Promotes the low two lanes of the f32x4 input to f64x2. vfpromotelow = VFpromoteLow { dst: VReg, src: VReg }; /// Demotes the two f64x2 lanes to f32x2 and then extends with two @@ -1289,6 +1306,9 @@ macro_rules! for_each_extended_op { /// `dst = ieee_fma(a, b, c)` vfma64x2 = Vfma64x2 { dst: VReg, a: VReg, b: VReg, c: VReg }; + /// `dst = low32(cond) ? if_nonzero : if_zero` + vselect = Vselect { dst: VReg, cond: XReg, if_nonzero: VReg, if_zero: VReg }; + /// `dst_hi:dst_lo = lhs_hi:lhs_lo + rhs_hi:rhs_lo` xadd128 = Xadd128 { dst_lo: XReg,