diff --git a/cranelift/codegen/src/isa/pulley_shared/lower.isle b/cranelift/codegen/src/isa/pulley_shared/lower.isle index 783477331c0e..777af4fea7e5 100644 --- a/cranelift/codegen/src/isa/pulley_shared/lower.isle +++ b/cranelift/codegen/src/isa/pulley_shared/lower.isle @@ -510,6 +510,47 @@ (rule (lower_icmp128_hi (IntCC.SignedLessThan) a b) (pulley_xslt64 a b)) (rule (lower_icmp128_hi (IntCC.UnsignedLessThan) a b) (pulley_xult64 a b)) +;; vector comparisons + +(rule 1 (lower (icmp cc a @ (value_type (ty_vec128 ty)) b)) + (lower_vcmp ty cc a b)) + +(decl lower_vcmp (Type IntCC Value Value) VReg) +(rule (lower_vcmp $I8X16 (IntCC.Equal) a b) (pulley_veq8x16 a b)) +(rule (lower_vcmp $I8X16 (IntCC.NotEqual) a b) (pulley_vneq8x16 a b)) +(rule (lower_vcmp $I8X16 (IntCC.SignedLessThan) a b) (pulley_vslt8x16 a b)) +(rule (lower_vcmp $I8X16 (IntCC.SignedLessThanOrEqual) a b) (pulley_vslteq8x16 a b)) +(rule (lower_vcmp $I8X16 (IntCC.UnsignedLessThan) a b) (pulley_vult8x16 a b)) +(rule (lower_vcmp $I8X16 (IntCC.UnsignedLessThanOrEqual) a b) (pulley_vulteq8x16 a b)) +(rule (lower_vcmp $I16X8 (IntCC.Equal) a b) (pulley_veq16x8 a b)) +(rule (lower_vcmp $I16X8 (IntCC.NotEqual) a b) (pulley_vneq16x8 a b)) +(rule (lower_vcmp $I16X8 (IntCC.SignedLessThan) a b) (pulley_vslt16x8 a b)) +(rule (lower_vcmp $I16X8 (IntCC.SignedLessThanOrEqual) a b) (pulley_vslteq16x8 a b)) +(rule (lower_vcmp $I16X8 (IntCC.UnsignedLessThan) a b) (pulley_vult16x8 a b)) +(rule (lower_vcmp $I16X8 (IntCC.UnsignedLessThanOrEqual) a b) (pulley_vulteq16x8 a b)) +(rule (lower_vcmp $I32X4 (IntCC.Equal) a b) (pulley_veq32x4 a b)) +(rule (lower_vcmp $I32X4 (IntCC.NotEqual) a b) (pulley_vneq32x4 a b)) +(rule (lower_vcmp $I32X4 (IntCC.SignedLessThan) a b) (pulley_vslt32x4 a b)) +(rule (lower_vcmp $I32X4 (IntCC.SignedLessThanOrEqual) a b) (pulley_vslteq32x4 a b)) +(rule (lower_vcmp $I32X4 (IntCC.UnsignedLessThan) a b) (pulley_vult32x4 a b)) +(rule (lower_vcmp $I32X4 (IntCC.UnsignedLessThanOrEqual) a b) (pulley_vulteq32x4 a b)) +(rule (lower_vcmp $I64X2 (IntCC.Equal) a b) (pulley_veq64x2 a b)) +(rule (lower_vcmp $I64X2 (IntCC.NotEqual) a b) (pulley_vneq64x2 a b)) +(rule (lower_vcmp $I64X2 (IntCC.SignedLessThan) a b) (pulley_vslt64x2 a b)) +(rule (lower_vcmp $I64X2 (IntCC.SignedLessThanOrEqual) a b) (pulley_vslteq64x2 a b)) +(rule (lower_vcmp $I64X2 (IntCC.UnsignedLessThan) a b) (pulley_vult64x2 a b)) +(rule (lower_vcmp $I64X2 (IntCC.UnsignedLessThanOrEqual) a b) (pulley_vulteq64x2 a b)) + +;; Sweap operand order of ops pulley doesn't support +(rule (lower_vcmp ty cc @ (IntCC.SignedGreaterThan) a b) + (lower_vcmp ty (intcc_swap_args cc) b a)) +(rule (lower_vcmp ty cc @ (IntCC.SignedGreaterThanOrEqual) a b) + (lower_vcmp ty (intcc_swap_args cc) b a)) +(rule (lower_vcmp ty cc @ (IntCC.UnsignedGreaterThan) a b) + (lower_vcmp ty (intcc_swap_args cc) b a)) +(rule (lower_vcmp ty cc @ (IntCC.UnsignedGreaterThanOrEqual) a b) + (lower_vcmp ty (intcc_swap_args cc) b a)) + ;;;; Rules for `fcmp` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (rule (lower (fcmp cc a b @ (value_type (ty_scalar_float ty)))) diff --git a/cranelift/filetests/filetests/runtests/simd-icmp-eq.clif b/cranelift/filetests/filetests/runtests/simd-icmp-eq.clif index 148e8064cb3c..cfcfbfa967d1 100644 --- a/cranelift/filetests/filetests/runtests/simd-icmp-eq.clif +++ b/cranelift/filetests/filetests/runtests/simd-icmp-eq.clif @@ -7,6 +7,10 @@ target s390x set enable_multi_ret_implicit_sret target riscv64 has_v target riscv64 has_v has_c has_zcb +target pulley32 +target pulley32be +target pulley64 +target pulley64be function %simd_icmp_eq_i8(i8x16, i8x16) -> i8x16 { block0(v0: i8x16, v1: i8x16): diff --git a/cranelift/filetests/filetests/runtests/simd-icmp-ne.clif b/cranelift/filetests/filetests/runtests/simd-icmp-ne.clif index 163d92b0ea36..0299ea597923 100644 --- a/cranelift/filetests/filetests/runtests/simd-icmp-ne.clif +++ b/cranelift/filetests/filetests/runtests/simd-icmp-ne.clif @@ -9,6 +9,10 @@ target s390x set enable_multi_ret_implicit_sret target riscv64 has_v target riscv64 has_v has_c has_zcb +target pulley32 +target pulley32be +target pulley64 +target pulley64be function %simd_icmp_ne_i8(i8x16, i8x16) -> i8x16 { block0(v0: i8x16, v1: i8x16): diff --git a/cranelift/filetests/filetests/runtests/simd-icmp-sge.clif b/cranelift/filetests/filetests/runtests/simd-icmp-sge.clif index 0b3da8df6a49..778adbe41eaa 100644 --- a/cranelift/filetests/filetests/runtests/simd-icmp-sge.clif +++ b/cranelift/filetests/filetests/runtests/simd-icmp-sge.clif @@ -9,6 +9,10 @@ target s390x set enable_multi_ret_implicit_sret target riscv64 has_v target riscv64 has_v has_c has_zcb +target pulley32 +target pulley32be +target pulley64 +target pulley64be function %simd_icmp_sge_i8(i8x16, i8x16) -> i8x16 { block0(v0: i8x16, v1: i8x16): diff --git a/cranelift/filetests/filetests/runtests/simd-icmp-sgt.clif b/cranelift/filetests/filetests/runtests/simd-icmp-sgt.clif index 889debf367c0..b15aed5e8f59 100644 --- a/cranelift/filetests/filetests/runtests/simd-icmp-sgt.clif +++ b/cranelift/filetests/filetests/runtests/simd-icmp-sgt.clif @@ -9,6 +9,10 @@ target s390x set enable_multi_ret_implicit_sret target riscv64 has_v target riscv64 has_v has_c has_zcb +target pulley32 +target pulley32be +target pulley64 +target pulley64be function %simd_icmp_sgt_i8(i8x16, i8x16) -> i8x16 { block0(v0: i8x16, v1: i8x16): diff --git a/cranelift/filetests/filetests/runtests/simd-icmp-sle.clif b/cranelift/filetests/filetests/runtests/simd-icmp-sle.clif index 74fe6d99764e..fd87e7ed1da5 100644 --- a/cranelift/filetests/filetests/runtests/simd-icmp-sle.clif +++ b/cranelift/filetests/filetests/runtests/simd-icmp-sle.clif @@ -9,6 +9,10 @@ target s390x set enable_multi_ret_implicit_sret target riscv64 has_v target riscv64 has_v has_c has_zcb +target pulley32 +target pulley32be +target pulley64 +target pulley64be function %simd_icmp_sle_i8(i8x16, i8x16) -> i8x16 { block0(v0: i8x16, v1: i8x16): diff --git a/cranelift/filetests/filetests/runtests/simd-icmp-slt.clif b/cranelift/filetests/filetests/runtests/simd-icmp-slt.clif index fef0a5772333..0c9d26b69aad 100644 --- a/cranelift/filetests/filetests/runtests/simd-icmp-slt.clif +++ b/cranelift/filetests/filetests/runtests/simd-icmp-slt.clif @@ -9,6 +9,10 @@ target s390x set enable_multi_ret_implicit_sret target riscv64 has_v target riscv64 has_v has_c has_zcb +target pulley32 +target pulley32be +target pulley64 +target pulley64be function %simd_icmp_slt_i8(i8x16, i8x16) -> i8x16 { block0(v0: i8x16, v1: i8x16): diff --git a/cranelift/filetests/filetests/runtests/simd-icmp-uge.clif b/cranelift/filetests/filetests/runtests/simd-icmp-uge.clif index 931cabad9fa1..7ec57db07b7a 100644 --- a/cranelift/filetests/filetests/runtests/simd-icmp-uge.clif +++ b/cranelift/filetests/filetests/runtests/simd-icmp-uge.clif @@ -9,6 +9,10 @@ target s390x set enable_multi_ret_implicit_sret target riscv64 has_v target riscv64 has_v has_c has_zcb +target pulley32 +target pulley32be +target pulley64 +target pulley64be function %simd_icmp_uge_i8(i8x16, i8x16) -> i8x16 { block0(v0: i8x16, v1: i8x16): diff --git a/cranelift/filetests/filetests/runtests/simd-icmp-ugt.clif b/cranelift/filetests/filetests/runtests/simd-icmp-ugt.clif index f41b10cc0a0c..1925a1369368 100644 --- a/cranelift/filetests/filetests/runtests/simd-icmp-ugt.clif +++ b/cranelift/filetests/filetests/runtests/simd-icmp-ugt.clif @@ -9,6 +9,10 @@ target s390x set enable_multi_ret_implicit_sret target riscv64 has_v target riscv64 has_v has_c has_zcb +target pulley32 +target pulley32be +target pulley64 +target pulley64be function %simd_icmp_ugt_i8(i8x16, i8x16) -> i8x16 { block0(v0: i8x16, v1: i8x16): diff --git a/cranelift/filetests/filetests/runtests/simd-icmp-ule.clif b/cranelift/filetests/filetests/runtests/simd-icmp-ule.clif index 47ed26a9314f..1e10e44d02fb 100644 --- a/cranelift/filetests/filetests/runtests/simd-icmp-ule.clif +++ b/cranelift/filetests/filetests/runtests/simd-icmp-ule.clif @@ -9,6 +9,10 @@ target s390x set enable_multi_ret_implicit_sret target riscv64 has_v target riscv64 has_v has_c has_zcb +target pulley32 +target pulley32be +target pulley64 +target pulley64be function %simd_icmp_ule_i8(i8x16, i8x16) -> i8x16 { block0(v0: i8x16, v1: i8x16): diff --git a/cranelift/filetests/filetests/runtests/simd-icmp-ult.clif b/cranelift/filetests/filetests/runtests/simd-icmp-ult.clif index 033d1aa1622b..81b3b878cda3 100644 --- a/cranelift/filetests/filetests/runtests/simd-icmp-ult.clif +++ b/cranelift/filetests/filetests/runtests/simd-icmp-ult.clif @@ -9,6 +9,10 @@ target s390x set enable_multi_ret_implicit_sret target riscv64 has_v target riscv64 has_v has_c has_zcb +target pulley32 +target pulley32be +target pulley64 +target pulley64be function %simd_icmp_ult_i8(i8x16, i8x16) -> i8x16 { block0(v0: i8x16, v1: i8x16): diff --git a/crates/wast-util/src/lib.rs b/crates/wast-util/src/lib.rs index 6a9217a46a81..445cafa9b71e 100644 --- a/crates/wast-util/src/lib.rs +++ b/crates/wast-util/src/lib.rs @@ -402,7 +402,6 @@ impl WastTest { if config.compiler == Compiler::CraneliftPulley { let unsupported = [ "misc_testsuite/simd/canonicalize-nan.wast", - "misc_testsuite/simd/issue6725-no-egraph-panic.wast", "misc_testsuite/simd/issue_3327_bnot_lowering.wast", "misc_testsuite/simd/v128-select.wast", "spec_testsuite/proposals/annotations/simd_lane.wast", @@ -410,13 +409,11 @@ impl WastTest { "spec_testsuite/proposals/relaxed-simd/i32x4_relaxed_trunc.wast", "spec_testsuite/proposals/relaxed-simd/i8x16_relaxed_swizzle.wast", "spec_testsuite/proposals/relaxed-simd/relaxed_dot_product.wast", - "spec_testsuite/proposals/relaxed-simd/relaxed_laneselect.wast", "spec_testsuite/proposals/relaxed-simd/relaxed_madd_nmadd.wast", "spec_testsuite/proposals/relaxed-simd/relaxed_min_max.wast", "spec_testsuite/proposals/memory64/simd_lane.wast", "spec_testsuite/proposals/memory64/relaxed_min_max.wast", "spec_testsuite/proposals/memory64/relaxed_madd_nmadd.wast", - "spec_testsuite/proposals/memory64/relaxed_laneselect.wast", "spec_testsuite/proposals/memory64/relaxed_dot_product.wast", "spec_testsuite/proposals/memory64/i16x8_relaxed_q15mulr_s.wast", "spec_testsuite/proposals/memory64/i32x4_relaxed_trunc.wast", @@ -433,23 +430,19 @@ impl WastTest { "spec_testsuite/simd_f64x2_rounding.wast", "spec_testsuite/simd_i16x8_arith.wast", "spec_testsuite/simd_i16x8_arith2.wast", - "spec_testsuite/simd_i16x8_cmp.wast", "spec_testsuite/simd_i16x8_extadd_pairwise_i8x16.wast", "spec_testsuite/simd_i16x8_q15mulr_sat_s.wast", "spec_testsuite/simd_i16x8_sat_arith.wast", "spec_testsuite/simd_i32x4_arith.wast", "spec_testsuite/simd_i32x4_arith2.wast", - "spec_testsuite/simd_i32x4_cmp.wast", "spec_testsuite/simd_i32x4_dot_i16x8.wast", "spec_testsuite/simd_i32x4_extadd_pairwise_i16x8.wast", "spec_testsuite/simd_i32x4_trunc_sat_f32x4.wast", "spec_testsuite/simd_i32x4_trunc_sat_f64x2.wast", "spec_testsuite/simd_i64x2_arith.wast", "spec_testsuite/simd_i64x2_arith2.wast", - "spec_testsuite/simd_i64x2_cmp.wast", "spec_testsuite/simd_i8x16_arith.wast", "spec_testsuite/simd_i8x16_arith2.wast", - "spec_testsuite/simd_i8x16_cmp.wast", "spec_testsuite/simd_i8x16_sat_arith.wast", "spec_testsuite/simd_lane.wast", "spec_testsuite/simd_load.wast", diff --git a/pulley/src/interp.rs b/pulley/src/interp.rs index 08056630b026..aac67a879df7 100644 --- a/pulley/src/interp.rs +++ b/pulley/src/interp.rs @@ -3417,4 +3417,268 @@ impl ExtendedOpVisitor for Interpreter<'_> { self.state[operands.dst].set_f64x2(a); ControlFlow::Continue(()) } + + fn veq8x16(&mut self, operands: BinaryOperands) -> ControlFlow { + let a = self.state[operands.src1].get_u8x16(); + let b = self.state[operands.src2].get_u8x16(); + let mut c = [0; 16]; + for ((a, b), c) in a.iter().zip(&b).zip(&mut c) { + *c = if a == b { u8::MAX } else { 0 }; + } + self.state[operands.dst].set_u8x16(c); + ControlFlow::Continue(()) + } + + fn vneq8x16(&mut self, operands: BinaryOperands) -> ControlFlow { + let a = self.state[operands.src1].get_u8x16(); + let b = self.state[operands.src2].get_u8x16(); + let mut c = [0; 16]; + for ((a, b), c) in a.iter().zip(&b).zip(&mut c) { + *c = if a != b { u8::MAX } else { 0 }; + } + self.state[operands.dst].set_u8x16(c); + ControlFlow::Continue(()) + } + + fn vslt8x16(&mut self, operands: BinaryOperands) -> ControlFlow { + let a = self.state[operands.src1].get_i8x16(); + let b = self.state[operands.src2].get_i8x16(); + let mut c = [0; 16]; + for ((a, b), c) in a.iter().zip(&b).zip(&mut c) { + *c = if a < b { u8::MAX } else { 0 }; + } + self.state[operands.dst].set_u8x16(c); + ControlFlow::Continue(()) + } + + fn vslteq8x16(&mut self, operands: BinaryOperands) -> ControlFlow { + let a = self.state[operands.src1].get_i8x16(); + let b = self.state[operands.src2].get_i8x16(); + let mut c = [0; 16]; + for ((a, b), c) in a.iter().zip(&b).zip(&mut c) { + *c = if a <= b { u8::MAX } else { 0 }; + } + self.state[operands.dst].set_u8x16(c); + ControlFlow::Continue(()) + } + + fn vult8x16(&mut self, operands: BinaryOperands) -> ControlFlow { + let a = self.state[operands.src1].get_u8x16(); + let b = self.state[operands.src2].get_u8x16(); + let mut c = [0; 16]; + for ((a, b), c) in a.iter().zip(&b).zip(&mut c) { + *c = if a < b { u8::MAX } else { 0 }; + } + self.state[operands.dst].set_u8x16(c); + ControlFlow::Continue(()) + } + + fn vulteq8x16(&mut self, operands: BinaryOperands) -> ControlFlow { + let a = self.state[operands.src1].get_u8x16(); + let b = self.state[operands.src2].get_u8x16(); + let mut c = [0; 16]; + for ((a, b), c) in a.iter().zip(&b).zip(&mut c) { + *c = if a <= b { u8::MAX } else { 0 }; + } + self.state[operands.dst].set_u8x16(c); + ControlFlow::Continue(()) + } + + fn veq16x8(&mut self, operands: BinaryOperands) -> ControlFlow { + let a = self.state[operands.src1].get_u16x8(); + let b = self.state[operands.src2].get_u16x8(); + let mut c = [0; 8]; + for ((a, b), c) in a.iter().zip(&b).zip(&mut c) { + *c = if a == b { u16::MAX } else { 0 }; + } + self.state[operands.dst].set_u16x8(c); + ControlFlow::Continue(()) + } + + fn vneq16x8(&mut self, operands: BinaryOperands) -> ControlFlow { + let a = self.state[operands.src1].get_u16x8(); + let b = self.state[operands.src2].get_u16x8(); + let mut c = [0; 8]; + for ((a, b), c) in a.iter().zip(&b).zip(&mut c) { + *c = if a != b { u16::MAX } else { 0 }; + } + self.state[operands.dst].set_u16x8(c); + ControlFlow::Continue(()) + } + + fn vslt16x8(&mut self, operands: BinaryOperands) -> ControlFlow { + let a = self.state[operands.src1].get_i16x8(); + let b = self.state[operands.src2].get_i16x8(); + let mut c = [0; 8]; + for ((a, b), c) in a.iter().zip(&b).zip(&mut c) { + *c = if a < b { u16::MAX } else { 0 }; + } + self.state[operands.dst].set_u16x8(c); + ControlFlow::Continue(()) + } + + fn vslteq16x8(&mut self, operands: BinaryOperands) -> ControlFlow { + let a = self.state[operands.src1].get_i16x8(); + let b = self.state[operands.src2].get_i16x8(); + let mut c = [0; 8]; + for ((a, b), c) in a.iter().zip(&b).zip(&mut c) { + *c = if a <= b { u16::MAX } else { 0 }; + } + self.state[operands.dst].set_u16x8(c); + ControlFlow::Continue(()) + } + + fn vult16x8(&mut self, operands: BinaryOperands) -> ControlFlow { + let a = self.state[operands.src1].get_u16x8(); + let b = self.state[operands.src2].get_u16x8(); + let mut c = [0; 8]; + for ((a, b), c) in a.iter().zip(&b).zip(&mut c) { + *c = if a < b { u16::MAX } else { 0 }; + } + self.state[operands.dst].set_u16x8(c); + ControlFlow::Continue(()) + } + + fn vulteq16x8(&mut self, operands: BinaryOperands) -> ControlFlow { + let a = self.state[operands.src1].get_u16x8(); + let b = self.state[operands.src2].get_u16x8(); + let mut c = [0; 8]; + for ((a, b), c) in a.iter().zip(&b).zip(&mut c) { + *c = if a <= b { u16::MAX } else { 0 }; + } + self.state[operands.dst].set_u16x8(c); + ControlFlow::Continue(()) + } + + fn veq32x4(&mut self, operands: BinaryOperands) -> ControlFlow { + let a = self.state[operands.src1].get_u32x4(); + let b = self.state[operands.src2].get_u32x4(); + let mut c = [0; 4]; + for ((a, b), c) in a.iter().zip(&b).zip(&mut c) { + *c = if a == b { u32::MAX } else { 0 }; + } + self.state[operands.dst].set_u32x4(c); + ControlFlow::Continue(()) + } + + fn vneq32x4(&mut self, operands: BinaryOperands) -> ControlFlow { + let a = self.state[operands.src1].get_u32x4(); + let b = self.state[operands.src2].get_u32x4(); + let mut c = [0; 4]; + for ((a, b), c) in a.iter().zip(&b).zip(&mut c) { + *c = if a != b { u32::MAX } else { 0 }; + } + self.state[operands.dst].set_u32x4(c); + ControlFlow::Continue(()) + } + + fn vslt32x4(&mut self, operands: BinaryOperands) -> ControlFlow { + let a = self.state[operands.src1].get_i32x4(); + let b = self.state[operands.src2].get_i32x4(); + let mut c = [0; 4]; + for ((a, b), c) in a.iter().zip(&b).zip(&mut c) { + *c = if a < b { u32::MAX } else { 0 }; + } + self.state[operands.dst].set_u32x4(c); + ControlFlow::Continue(()) + } + + fn vslteq32x4(&mut self, operands: BinaryOperands) -> ControlFlow { + let a = self.state[operands.src1].get_i32x4(); + let b = self.state[operands.src2].get_i32x4(); + let mut c = [0; 4]; + for ((a, b), c) in a.iter().zip(&b).zip(&mut c) { + *c = if a <= b { u32::MAX } else { 0 }; + } + self.state[operands.dst].set_u32x4(c); + ControlFlow::Continue(()) + } + + fn vult32x4(&mut self, operands: BinaryOperands) -> ControlFlow { + let a = self.state[operands.src1].get_u32x4(); + let b = self.state[operands.src2].get_u32x4(); + let mut c = [0; 4]; + for ((a, b), c) in a.iter().zip(&b).zip(&mut c) { + *c = if a < b { u32::MAX } else { 0 }; + } + self.state[operands.dst].set_u32x4(c); + ControlFlow::Continue(()) + } + + fn vulteq32x4(&mut self, operands: BinaryOperands) -> ControlFlow { + let a = self.state[operands.src1].get_u32x4(); + let b = self.state[operands.src2].get_u32x4(); + let mut c = [0; 4]; + for ((a, b), c) in a.iter().zip(&b).zip(&mut c) { + *c = if a <= b { u32::MAX } else { 0 }; + } + self.state[operands.dst].set_u32x4(c); + ControlFlow::Continue(()) + } + + fn veq64x2(&mut self, operands: BinaryOperands) -> ControlFlow { + let a = self.state[operands.src1].get_u64x2(); + let b = self.state[operands.src2].get_u64x2(); + let mut c = [0; 2]; + for ((a, b), c) in a.iter().zip(&b).zip(&mut c) { + *c = if a == b { u64::MAX } else { 0 }; + } + self.state[operands.dst].set_u64x2(c); + ControlFlow::Continue(()) + } + + fn vneq64x2(&mut self, operands: BinaryOperands) -> ControlFlow { + let a = self.state[operands.src1].get_u64x2(); + let b = self.state[operands.src2].get_u64x2(); + let mut c = [0; 2]; + for ((a, b), c) in a.iter().zip(&b).zip(&mut c) { + *c = if a != b { u64::MAX } else { 0 }; + } + self.state[operands.dst].set_u64x2(c); + ControlFlow::Continue(()) + } + + fn vslt64x2(&mut self, operands: BinaryOperands) -> ControlFlow { + let a = self.state[operands.src1].get_i64x2(); + let b = self.state[operands.src2].get_i64x2(); + let mut c = [0; 2]; + for ((a, b), c) in a.iter().zip(&b).zip(&mut c) { + *c = if a < b { u64::MAX } else { 0 }; + } + self.state[operands.dst].set_u64x2(c); + ControlFlow::Continue(()) + } + + fn vslteq64x2(&mut self, operands: BinaryOperands) -> ControlFlow { + let a = self.state[operands.src1].get_i64x2(); + let b = self.state[operands.src2].get_i64x2(); + let mut c = [0; 2]; + for ((a, b), c) in a.iter().zip(&b).zip(&mut c) { + *c = if a <= b { u64::MAX } else { 0 }; + } + self.state[operands.dst].set_u64x2(c); + ControlFlow::Continue(()) + } + + fn vult64x2(&mut self, operands: BinaryOperands) -> ControlFlow { + let a = self.state[operands.src1].get_u64x2(); + let b = self.state[operands.src2].get_u64x2(); + let mut c = [0; 2]; + for ((a, b), c) in a.iter().zip(&b).zip(&mut c) { + *c = if a < b { u64::MAX } else { 0 }; + } + self.state[operands.dst].set_u64x2(c); + ControlFlow::Continue(()) + } + + fn vulteq64x2(&mut self, operands: BinaryOperands) -> ControlFlow { + let a = self.state[operands.src1].get_u64x2(); + let b = self.state[operands.src2].get_u64x2(); + let mut c = [0; 2]; + for ((a, b), c) in a.iter().zip(&b).zip(&mut c) { + *c = if a <= b { u64::MAX } else { 0 }; + } + self.state[operands.dst].set_u64x2(c); + ControlFlow::Continue(()) + } } diff --git a/pulley/src/lib.rs b/pulley/src/lib.rs index ad0b0bb269c5..3ee27c5bde1f 100644 --- a/pulley/src/lib.rs +++ b/pulley/src/lib.rs @@ -869,6 +869,55 @@ macro_rules! for_each_extended_op { vinsertf32 = VInsertF32 { operands: BinaryOperands, lane: u8 }; /// `dst = src1; dst[lane] = src2` vinsertf64 = VInsertF64 { operands: BinaryOperands, lane: u8 }; + + /// `dst = src == dst` + veq8x16 = Veq8x16 { operands: BinaryOperands }; + /// `dst = src != dst` + vneq8x16 = Vneq8x16 { operands: BinaryOperands }; + /// `dst = src < dst` (signed) + vslt8x16 = Vslt8x16 { operands: BinaryOperands }; + /// `dst = src <= dst` (signed) + vslteq8x16 = Vslteq8x16 { operands: BinaryOperands }; + /// `dst = src < dst` (unsigned) + vult8x16 = Vult8x16 { operands: BinaryOperands }; + /// `dst = src <= dst` (unsigned) + vulteq8x16 = Vulteq8x16 { operands: BinaryOperands }; + /// `dst = src == dst` + veq16x8 = Veq16x8 { operands: BinaryOperands }; + /// `dst = src != dst` + vneq16x8 = Vneq16x8 { operands: BinaryOperands }; + /// `dst = src < dst` (signed) + vslt16x8 = Vslt16x8 { operands: BinaryOperands }; + /// `dst = src <= dst` (signed) + vslteq16x8 = Vslteq16x8 { operands: BinaryOperands }; + /// `dst = src < dst` (unsigned) + vult16x8 = Vult16x8 { operands: BinaryOperands }; + /// `dst = src <= dst` (unsigned) + vulteq16x8 = Vulteq16x8 { operands: BinaryOperands }; + /// `dst = src == dst` + veq32x4 = Veq32x4 { operands: BinaryOperands }; + /// `dst = src != dst` + vneq32x4 = Vneq32x4 { operands: BinaryOperands }; + /// `dst = src < dst` (signed) + vslt32x4 = Vslt32x4 { operands: BinaryOperands }; + /// `dst = src <= dst` (signed) + vslteq32x4 = Vslteq32x4 { operands: BinaryOperands }; + /// `dst = src < dst` (unsigned) + vult32x4 = Vult32x4 { operands: BinaryOperands }; + /// `dst = src <= dst` (unsigned) + vulteq32x4 = Vulteq32x4 { operands: BinaryOperands }; + /// `dst = src == dst` + veq64x2 = Veq64x2 { operands: BinaryOperands }; + /// `dst = src != dst` + vneq64x2 = Vneq64x2 { operands: BinaryOperands }; + /// `dst = src < dst` (signed) + vslt64x2 = Vslt64x2 { operands: BinaryOperands }; + /// `dst = src <= dst` (signed) + vslteq64x2 = Vslteq64x2 { operands: BinaryOperands }; + /// `dst = src < dst` (unsigned) + vult64x2 = Vult64x2 { operands: BinaryOperands }; + /// `dst = src <= dst` (unsigned) + vulteq64x2 = Vulteq64x2 { operands: BinaryOperands }; } }; }