From 4c2a056b56e4f25b58c0e7fd8a9be9a552094461 Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Fri, 13 Dec 2024 14:11:54 -0800 Subject: [PATCH] pulley: Lower `umulhi` and `smulhi` in CLIF This is not directly reachable from wasm but can be created through optimizations. --- .../codegen/src/isa/pulley_shared/lower.isle | 28 +++++++++++++++++++ .../filetests/filetests/runtests/smulhi.clif | 4 +++ .../filetests/filetests/runtests/umulhi.clif | 4 +++ pulley/src/interp.rs | 16 +++++++++++ pulley/src/lib.rs | 5 ++++ 5 files changed, 57 insertions(+) diff --git a/cranelift/codegen/src/isa/pulley_shared/lower.isle b/cranelift/codegen/src/isa/pulley_shared/lower.isle index 612d181a148f..703fef501f6c 100644 --- a/cranelift/codegen/src/isa/pulley_shared/lower.isle +++ b/cranelift/codegen/src/isa/pulley_shared/lower.isle @@ -167,6 +167,34 @@ (rule (lower (has_type $I32 (imul a b))) (pulley_xmul32 a b)) (rule (lower (has_type $I64 (imul a b))) (pulley_xmul64 a b)) +;;;; Rules for `umulhi` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(rule (lower (has_type $I8 (umulhi a b))) + (pulley_xshr32_u (pulley_xmul32 (zext32 a) (zext32 b)) (pulley_xconst8 8))) + +(rule (lower (has_type $I16 (umulhi a b))) + (pulley_xshr32_u (pulley_xmul32 (zext32 a) (zext32 b)) (pulley_xconst8 16))) + +(rule (lower (has_type $I32 (umulhi a b))) + (pulley_xshr64_u (pulley_xmul64 (zext64 a) (zext64 b)) (pulley_xconst8 32))) + +(rule (lower (has_type $I64 (umulhi a b))) + (pulley_xmulhi64_u a b)) + +;;;; Rules for `smulhi` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(rule (lower (has_type $I8 (smulhi a b))) + (pulley_xshr32_s (pulley_xmul32 (sext32 a) (sext32 b)) (pulley_xconst8 8))) + +(rule (lower (has_type $I16 (smulhi a b))) + (pulley_xshr32_s (pulley_xmul32 (sext32 a) (sext32 b)) (pulley_xconst8 16))) + +(rule (lower (has_type $I32 (smulhi a b))) + (pulley_xshr64_s (pulley_xmul64 (sext64 a) (sext64 b)) (pulley_xconst8 32))) + +(rule (lower (has_type $I64 (smulhi a b))) + (pulley_xmulhi64_s a b)) + ;;;; Rules for `sdiv` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (rule 0 (lower (has_type (fits_in_32 _) (sdiv a b))) diff --git a/cranelift/filetests/filetests/runtests/smulhi.clif b/cranelift/filetests/filetests/runtests/smulhi.clif index 7cc05a1c0f18..b5d0790be358 100644 --- a/cranelift/filetests/filetests/runtests/smulhi.clif +++ b/cranelift/filetests/filetests/runtests/smulhi.clif @@ -8,6 +8,10 @@ target x86_64 has_sse3 has_ssse3 has_sse41 target x86_64 has_sse3 has_ssse3 has_sse41 has_avx target riscv64 target riscv64 has_c has_zcb +target pulley32 +target pulley32be +target pulley64 +target pulley64be function %smulhi_i8(i8, i8) -> i8 { diff --git a/cranelift/filetests/filetests/runtests/umulhi.clif b/cranelift/filetests/filetests/runtests/umulhi.clif index 2d6a1d5c3774..6aca3d24004d 100644 --- a/cranelift/filetests/filetests/runtests/umulhi.clif +++ b/cranelift/filetests/filetests/runtests/umulhi.clif @@ -7,6 +7,10 @@ target x86_64 has_bmi2 target s390x target riscv64 target riscv64 has_c has_zcb +target pulley32 +target pulley32be +target pulley64 +target pulley64be function %umulhi_i8(i8, i8) -> i8 { block0(v0: i8, v1: i8): diff --git a/pulley/src/interp.rs b/pulley/src/interp.rs index f918c5782e28..3496867653a2 100644 --- a/pulley/src/interp.rs +++ b/pulley/src/interp.rs @@ -1308,6 +1308,22 @@ impl OpVisitor for Interpreter<'_> { ControlFlow::Continue(()) } + fn xmulhi64_s(&mut self, operands: BinaryOperands) -> ControlFlow { + let a = self.state[operands.src1].get_i64(); + let b = self.state[operands.src2].get_i64(); + let result = ((i128::from(a) * i128::from(b)) >> 64) as i64; + self.state[operands.dst].set_i64(result); + ControlFlow::Continue(()) + } + + fn xmulhi64_u(&mut self, operands: BinaryOperands) -> ControlFlow { + let a = self.state[operands.src1].get_u64(); + let b = self.state[operands.src2].get_u64(); + let result = ((u128::from(a) * u128::from(b)) >> 64) as u64; + self.state[operands.dst].set_u64(result); + ControlFlow::Continue(()) + } + fn xshl32(&mut self, operands: BinaryOperands) -> ControlFlow { let a = self.state[operands.src1].get_u32(); let b = self.state[operands.src2].get_u32(); diff --git a/pulley/src/lib.rs b/pulley/src/lib.rs index fe6fdc45e724..facda91c3f9c 100644 --- a/pulley/src/lib.rs +++ b/pulley/src/lib.rs @@ -190,6 +190,11 @@ macro_rules! for_each_op { /// `dst = src1 * src2` xmul64 = XMul64 { operands: BinaryOperands }; + /// `dst = high64(src1 * src2)` (signed) + xmulhi64_s = XMulHi64S { operands: BinaryOperands }; + /// `dst = high64(src1 * src2)` (unsigned) + xmulhi64_u = XMulHi64U { operands: BinaryOperands }; + /// `low32(dst) = trailing_zeros(low32(src))` xctz32 = Xctz32 { dst: XReg, src: XReg }; /// `dst = trailing_zeros(src)`