From 70dd41ec62260f4e26403806a9317db35ca7795e Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Thu, 19 Dec 2024 13:45:19 -0800 Subject: [PATCH] pulley: Add macro `CallN` instructions This commit adds new macro instructions to assist with speeding up calls between functions. Pulley's previous `Call` instruction was similar to native call instructions where arguments/results are implicitly in the right location according to the ABI, but movement between registers is more expensive with Pulley than with native architectures. The `CallN` instructions here enable listing a few arguments (only integer registers) in the opcode itself. This removes the need for individual `xmov` instructions into individual registers and instead it can all be done within the opcode handlers. This additionally enables passing the same argument twice to a function to reside only in one register. Finally parallel-copies between these registers are supported as the interpreter loads all registers and then stores all registers. These new instructions participate in register allocation differently from before where the first few arguments are allowed to be in any register and no longer use `reg_fixed_use`. All other arguments (and all float arguments for example) continue to use `reg_fixed_use`. Locally sightglass reports this change speeding up `pulldown-cmark` by 2-10%. On a `fib(N)` micro-benchmark it didn't help as much as I was hoping that it was going to. --- cranelift/codegen/meta/src/pulley.rs | 3 +- .../codegen/src/isa/pulley_shared/abi.rs | 33 +++++++-- .../codegen/src/isa/pulley_shared/inst.isle | 3 +- .../src/isa/pulley_shared/inst/args.rs | 13 ++++ .../src/isa/pulley_shared/inst/emit.rs | 32 +++++++-- .../codegen/src/isa/pulley_shared/inst/mod.rs | 24 ++++++- .../src/isa/pulley_shared/lower/isle.rs | 6 +- cranelift/codegen/src/isa/s390x/inst/emit.rs | 17 +++-- cranelift/codegen/src/machinst/buffer.rs | 6 +- .../filetests/isa/pulley32/call.clif | 58 +++++++-------- .../filetests/isa/pulley32/extend.clif | 40 +++++------ .../filetests/isa/pulley64/call.clif | 70 +++++++------------ .../filetests/isa/pulley64/extend.clif | 52 +++++++------- pulley/src/interp.rs | 59 ++++++++++++++++ pulley/src/lib.rs | 16 +++++ 15 files changed, 288 insertions(+), 144 deletions(-) diff --git a/cranelift/codegen/meta/src/pulley.rs b/cranelift/codegen/meta/src/pulley.rs index fb7b2affb074..6fb8316b9270 100644 --- a/cranelift/codegen/meta/src/pulley.rs +++ b/cranelift/codegen/meta/src/pulley.rs @@ -89,7 +89,8 @@ impl Inst<'_> { match self.name { // Skip instructions related to control-flow as those require // special handling with `MachBuffer`. - "Jump" | "Call" | "CallIndirect" => true, + "Jump" => true, + n if n.starts_with("Call") => true, // Skip special instructions not used in Cranelift. "XPush32Many" | "XPush64Many" | "XPop32Many" | "XPop64Many" => true, diff --git a/cranelift/codegen/src/isa/pulley_shared/abi.rs b/cranelift/codegen/src/isa/pulley_shared/abi.rs index 5c72b5311fcd..b56af685cb8a 100644 --- a/cranelift/codegen/src/isa/pulley_shared/abi.rs +++ b/cranelift/codegen/src/isa/pulley_shared/abi.rs @@ -441,15 +441,40 @@ where fn gen_call( dest: &CallDest, _tmp: Writable, - info: CallInfo<()>, + mut info: CallInfo<()>, ) -> SmallVec<[Self::I; 2]> { match dest { // "near" calls are pulley->pulley calls so they use a normal "call" // opcode - CallDest::ExtName(name, RelocDistance::Near) => smallvec![Inst::Call { - info: Box::new(info.map(|()| name.clone())) + CallDest::ExtName(name, RelocDistance::Near) => { + // The first four integer arguments to a call can be handled via + // special pulley call instructions. Assert here that + // `info.uses` is sorted in order and then take out x0-x3 if + // they're present and move them from `info.uses` to + // `info.dest.args` to be handled differently during register + // allocation. + let mut args = SmallVec::new(); + assert!(info + .uses + .iter() + .filter_map(|arg| XReg::new(arg.preg)) + .is_sorted()); + info.uses.retain(|arg| { + if arg.preg != x0() && arg.preg != x1() && arg.preg != x2() && arg.preg != x3() + { + return true; + } + args.push(XReg::new(arg.vreg).unwrap()); + false + }); + smallvec![Inst::Call { + info: Box::new(info.map(|()| PulleyCall { + name: name.clone(), + args, + })) + } + .into()] } - .into()], // "far" calls are pulley->host calls so they use a different opcode // which is lowered with a special relocation in the backend. CallDest::ExtName(name, RelocDistance::Far) => smallvec![Inst::IndirectCallHost { diff --git a/cranelift/codegen/src/isa/pulley_shared/inst.isle b/cranelift/codegen/src/isa/pulley_shared/inst.isle index 30c98eb5819a..34742042671b 100644 --- a/cranelift/codegen/src/isa/pulley_shared/inst.isle +++ b/cranelift/codegen/src/isa/pulley_shared/inst.isle @@ -58,7 +58,7 @@ ;; An indirect call out to a host-defined function. The host function ;; pointer is the first "argument" of this function call. - (IndirectCallHost (info BoxCallInfo)) + (IndirectCallHost (info BoxCallIndirectHostInfo)) ;; Unconditional jumps. (Jump (label MachLabel)) @@ -154,6 +154,7 @@ (type BoxReturnCallInfo (primitive BoxReturnCallInfo)) (type BoxReturnCallIndInfo (primitive BoxReturnCallIndInfo)) (type XRegSet (primitive XRegSet)) +(type BoxCallIndirectHostInfo (primitive BoxCallIndirectHostInfo)) ;;;; Address Modes ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; diff --git a/cranelift/codegen/src/isa/pulley_shared/inst/args.rs b/cranelift/codegen/src/isa/pulley_shared/inst/args.rs index cb7496336341..6c68a356e1d3 100644 --- a/cranelift/codegen/src/isa/pulley_shared/inst/args.rs +++ b/cranelift/codegen/src/isa/pulley_shared/inst/args.rs @@ -1,6 +1,7 @@ //! Pulley instruction arguments. use super::*; +use crate::ir::ExternalName; use crate::machinst::abi::StackAMode; use pulley_interpreter::encode; use pulley_interpreter::regs::Reg as _; @@ -565,3 +566,15 @@ impl fmt::Display for Cond { } } } + +/// Payload of `CallInfo` for call instructions +#[derive(Clone, Debug)] +pub struct PulleyCall { + /// The external name that's being called, or the Cranelift-generated + /// function that's being invoked. + pub name: ExternalName, + /// Arguments tracked in this call invocation which aren't assigned fixed + /// registers. This tracks up to 4 registers and all remaining registers + /// will be present and tracked in `CallInfo` fields. + pub args: SmallVec<[XReg; 4]>, +} diff --git a/cranelift/codegen/src/isa/pulley_shared/inst/emit.rs b/cranelift/codegen/src/isa/pulley_shared/inst/emit.rs index 9140fa8fb60c..9090e3585f2e 100644 --- a/cranelift/codegen/src/isa/pulley_shared/inst/emit.rs +++ b/cranelift/codegen/src/isa/pulley_shared/inst/emit.rs @@ -172,16 +172,36 @@ fn pulley_emit

( Inst::LoadExtName { .. } => todo!(), Inst::Call { info } => { - sink.put1(pulley_interpreter::Opcode::Call as u8); - sink.add_reloc( + let offset = sink.cur_offset(); + + // If arguments happen to already be in the right register for the + // ABI then remove them from this list. Otherwise emit the + // appropriate `Call` instruction depending on how many arguments we + // have that aren't already in their correct register according to + // ABI conventions. + let mut args = &info.dest.args[..]; + while !args.is_empty() && args.last().copied() == XReg::new(x_reg(args.len() - 1)) { + args = &args[..args.len() - 1]; + } + match args { + [] => enc::call(sink, 0), + [x0] => enc::call1(sink, x0, 0), + [x0, x1] => enc::call2(sink, x0, x1, 0), + [x0, x1, x2] => enc::call3(sink, x0, x1, x2, 0), + [x0, x1, x2, x3] => enc::call4(sink, x0, x1, x2, x3, 0), + _ => unreachable!(), + } + let end = sink.cur_offset(); + sink.add_reloc_at_offset( + end - 4, // TODO: is it actually okay to reuse this reloc here? Reloc::X86CallPCRel4, - &info.dest, + &info.dest.name, // This addend adjusts for the difference between the start of - // the instruction and the beginning of the immediate field. - -1, + // the instruction and the beginning of the immediate offset + // field which is always the final 4 bytes of the instruction. + -i64::from(end - offset - 4), ); - sink.put4(0); if let Some(s) = state.take_stack_map() { let offset = sink.cur_offset(); sink.push_user_stack_map(state, offset, s); diff --git a/cranelift/codegen/src/isa/pulley_shared/inst/mod.rs b/cranelift/codegen/src/isa/pulley_shared/inst/mod.rs index ec95bdbe53e5..67e3bf075971 100644 --- a/cranelift/codegen/src/isa/pulley_shared/inst/mod.rs +++ b/cranelift/codegen/src/isa/pulley_shared/inst/mod.rs @@ -151,7 +151,29 @@ fn pulley_get_operands(inst: &mut Inst, collector: &mut impl OperandVisitor) { collector.reg_def(dst); } - Inst::Call { info } | Inst::IndirectCallHost { info } => { + Inst::Call { info } => { + let CallInfo { + uses, defs, dest, .. + } = &mut **info; + + // Pulley supports having the first few integer arguments in any + // register, so flag that with `reg_use` here. + let PulleyCall { args, .. } = dest; + for arg in args { + collector.reg_use(arg); + } + + // Remaining arguments (and return values) are all in fixed + // registers according to Pulley's ABI, however. + for CallArgPair { vreg, preg } in uses { + collector.reg_fixed_use(vreg, *preg); + } + for CallRetPair { vreg, preg } in defs { + collector.reg_fixed_def(vreg, *preg); + } + collector.reg_clobbers(info.clobbers); + } + Inst::IndirectCallHost { info } => { let CallInfo { uses, defs, .. } = &mut **info; for CallArgPair { vreg, preg } in uses { collector.reg_fixed_use(vreg, *preg); diff --git a/cranelift/codegen/src/isa/pulley_shared/lower/isle.rs b/cranelift/codegen/src/isa/pulley_shared/lower/isle.rs index ed77a698b0f7..f344d33436df 100644 --- a/cranelift/codegen/src/isa/pulley_shared/lower/isle.rs +++ b/cranelift/codegen/src/isa/pulley_shared/lower/isle.rs @@ -10,7 +10,8 @@ use crate::ir::{condcodes::*, immediates::*, types::*, *}; use crate::isa::pulley_shared::{ abi::*, inst::{ - FReg, OperandSize, ReturnCallInfo, VReg, WritableFReg, WritableVReg, WritableXReg, XReg, + FReg, OperandSize, PulleyCall, ReturnCallInfo, VReg, WritableFReg, WritableVReg, + WritableXReg, XReg, }, lower::{regs, Cond}, *, @@ -26,8 +27,9 @@ use regalloc2::PReg; type Unit = (); type VecArgPair = Vec; type VecRetPair = Vec; -type BoxCallInfo = Box>; +type BoxCallInfo = Box>; type BoxCallIndInfo = Box>; +type BoxCallIndirectHostInfo = Box>; type BoxReturnCallInfo = Box>; type BoxReturnCallIndInfo = Box>; type BoxExternalName = Box; diff --git a/cranelift/codegen/src/isa/s390x/inst/emit.rs b/cranelift/codegen/src/isa/s390x/inst/emit.rs index 24bd4cb0fcfc..53d383ea535a 100644 --- a/cranelift/codegen/src/isa/s390x/inst/emit.rs +++ b/cranelift/codegen/src/isa/s390x/inst/emit.rs @@ -220,7 +220,13 @@ pub fn mem_emit( &MemArg::Symbol { ref name, offset, .. } => { - sink.add_reloc_at_offset(2, Reloc::S390xPCRel32Dbl, &**name, (offset + 2).into()); + let reloc_offset = sink.cur_offset() + 2; + sink.add_reloc_at_offset( + reloc_offset, + Reloc::S390xPCRel32Dbl, + &**name, + (offset + 2).into(), + ); put(sink, &enc_ril_b(opcode_ril.unwrap(), rd, 0)); } _ => unreachable!(), @@ -3198,7 +3204,8 @@ impl Inst { // Add relocation for target function. This has to be done *before* // the S390xTlsGdCall relocation if any, to ensure linker relaxation // works correctly. - sink.add_reloc_at_offset(2, Reloc::S390xPLTRel32Dbl, &info.dest, 2); + let offset = sink.cur_offset() + 2; + sink.add_reloc_at_offset(offset, Reloc::S390xPLTRel32Dbl, &info.dest, 2); if let Some(s) = state.take_stack_map() { let offset = sink.cur_offset() + 6; @@ -3232,7 +3239,8 @@ impl Inst { } let opcode = 0xc04; // BCRL - sink.add_reloc_at_offset(2, Reloc::S390xPLTRel32Dbl, &info.dest, 2); + let offset = sink.cur_offset() + 2; + sink.add_reloc_at_offset(offset, Reloc::S390xPLTRel32Dbl, &info.dest, 2); put(sink, &enc_ril_c(opcode, 15, 0)); sink.add_call_site(); } @@ -3257,7 +3265,8 @@ impl Inst { // *before* the S390xTlsGdCall, to ensure linker relaxation // works correctly. let dest = ExternalName::LibCall(LibCall::ElfTlsGetOffset); - sink.add_reloc_at_offset(2, Reloc::S390xPLTRel32Dbl, &dest, 2); + let offset = sink.cur_offset() + 2; + sink.add_reloc_at_offset(offset, Reloc::S390xPLTRel32Dbl, &dest, 2); match &**symbol { SymbolReloc::TlsGd { name } => sink.add_reloc(Reloc::S390xTlsGdCall, name, 0), _ => unreachable!(), diff --git a/cranelift/codegen/src/machinst/buffer.rs b/cranelift/codegen/src/machinst/buffer.rs index 93c11658e34a..518b4cdbe389 100644 --- a/cranelift/codegen/src/machinst/buffer.rs +++ b/cranelift/codegen/src/machinst/buffer.rs @@ -1536,7 +1536,7 @@ impl MachBuffer { } } - /// Add an external relocation at the given offset from current offset. + /// Add an external relocation at the given offset. pub fn add_reloc_at_offset + Clone>( &mut self, offset: CodeOffset, @@ -1579,7 +1579,7 @@ impl MachBuffer { // when a relocation can't otherwise be resolved later, so it shouldn't // actually result in any memory unsafety or anything like that. self.relocs.push(MachReloc { - offset: self.data.len() as CodeOffset + offset, + offset, kind, target, addend, @@ -1593,7 +1593,7 @@ impl MachBuffer { target: &T, addend: Addend, ) { - self.add_reloc_at_offset(0, kind, target, addend); + self.add_reloc_at_offset(self.data.len() as CodeOffset, kind, target, addend); } /// Add a trap record at the current offset. diff --git a/cranelift/filetests/filetests/isa/pulley32/call.clif b/cranelift/filetests/filetests/isa/pulley32/call.clif index 6c8a95bce988..d2d9a29f232c 100644 --- a/cranelift/filetests/filetests/isa/pulley32/call.clif +++ b/cranelift/filetests/filetests/isa/pulley32/call.clif @@ -15,16 +15,16 @@ block0: ; VCode: ; push_frame ; block0: -; xconst8 x0, 0 -; call CallInfo { dest: TestCase(%g), uses: [CallArgPair { vreg: p0i, preg: p0i }], defs: [CallRetPair { vreg: Writable { reg: p0i }, preg: p0i }], clobbers: PRegSet { bits: [65534, 65535, 4294967295, 0] }, callee_conv: Fast, caller_conv: Fast, callee_pop_size: 0 } +; xconst8 x2, 0 +; call CallInfo { dest: PulleyCall { name: TestCase(%g), args: [XReg(p2i)] }, uses: [], defs: [CallRetPair { vreg: Writable { reg: p0i }, preg: p0i }], clobbers: PRegSet { bits: [65534, 65535, 4294967295, 0] }, callee_conv: Fast, caller_conv: Fast, callee_pop_size: 0 } ; xconst8 x0, 1 ; pop_frame ; ret ; ; Disassembled: ; push_frame -; xconst8 x0, 0 -; call 0x0 // target = 0x4 +; xconst8 x2, 0 +; call1 x2, 0x0 // target = 0x4 ; xconst8 x0, 1 ; pop_frame ; ret @@ -42,16 +42,16 @@ block0: ; VCode: ; push_frame ; block0: -; xconst8 x0, 0 -; call CallInfo { dest: TestCase(%g), uses: [CallArgPair { vreg: p0i, preg: p0i }], defs: [CallRetPair { vreg: Writable { reg: p0i }, preg: p0i }], clobbers: PRegSet { bits: [65534, 65535, 4294967295, 0] }, callee_conv: Fast, caller_conv: Fast, callee_pop_size: 0 } +; xconst8 x2, 0 +; call CallInfo { dest: PulleyCall { name: TestCase(%g), args: [XReg(p2i)] }, uses: [], defs: [CallRetPair { vreg: Writable { reg: p0i }, preg: p0i }], clobbers: PRegSet { bits: [65534, 65535, 4294967295, 0] }, callee_conv: Fast, caller_conv: Fast, callee_pop_size: 0 } ; xconst8 x0, 1 ; pop_frame ; ret ; ; Disassembled: ; push_frame -; xconst8 x0, 0 -; call 0x0 // target = 0x4 +; xconst8 x2, 0 +; call1 x2, 0x0 // target = 0x4 ; xconst8 x0, 1 ; pop_frame ; ret @@ -71,21 +71,21 @@ block0: ; VCode: ; push_frame ; block0: -; xconst8 x0, 0 -; xconst8 x1, 1 -; xconst8 x2, 2 -; xconst8 x3, 3 -; call CallInfo { dest: TestCase(%g), uses: [CallArgPair { vreg: p0i, preg: p0i }, CallArgPair { vreg: p1i, preg: p1i }, CallArgPair { vreg: p2i, preg: p2i }, CallArgPair { vreg: p3i, preg: p3i }], defs: [], clobbers: PRegSet { bits: [65535, 65535, 4294967295, 0] }, callee_conv: Fast, caller_conv: Fast, callee_pop_size: 0 } +; xconst8 x3, 0 +; xconst8 x4, 1 +; xconst8 x5, 2 +; xconst8 x6, 3 +; call CallInfo { dest: PulleyCall { name: TestCase(%g), args: [XReg(p3i), XReg(p4i), XReg(p5i), XReg(p6i)] }, uses: [], defs: [], clobbers: PRegSet { bits: [65535, 65535, 4294967295, 0] }, callee_conv: Fast, caller_conv: Fast, callee_pop_size: 0 } ; pop_frame ; ret ; ; Disassembled: ; push_frame -; xconst8 x0, 0 -; xconst8 x1, 1 -; xconst8 x2, 2 -; xconst8 x3, 3 -; call 0x0 // target = 0xd +; xconst8 x3, 0 +; xconst8 x4, 1 +; xconst8 x5, 2 +; xconst8 x6, 3 +; call4 x3, x4, x5, x6, 0x0 // target = 0xd ; pop_frame ; ret @@ -103,7 +103,7 @@ block0: ; VCode: ; push_frame ; block0: -; call CallInfo { dest: TestCase(%g), uses: [], defs: [CallRetPair { vreg: Writable { reg: p0i }, preg: p0i }, CallRetPair { vreg: Writable { reg: p1i }, preg: p1i }, CallRetPair { vreg: Writable { reg: p2i }, preg: p2i }, CallRetPair { vreg: Writable { reg: p3i }, preg: p3i }], clobbers: PRegSet { bits: [65520, 65535, 4294967295, 0] }, callee_conv: Fast, caller_conv: Fast, callee_pop_size: 0 } +; call CallInfo { dest: PulleyCall { name: TestCase(%g), args: [] }, uses: [], defs: [CallRetPair { vreg: Writable { reg: p0i }, preg: p0i }, CallRetPair { vreg: Writable { reg: p1i }, preg: p1i }, CallRetPair { vreg: Writable { reg: p2i }, preg: p2i }, CallRetPair { vreg: Writable { reg: p3i }, preg: p3i }], clobbers: PRegSet { bits: [65520, 65535, 4294967295, 0] }, callee_conv: Fast, caller_conv: Fast, callee_pop_size: 0 } ; xadd64 x4, x0, x2 ; xadd64 x3, x1, x3 ; xadd64 x0, x4, x3 @@ -138,10 +138,6 @@ block0: ; xstore64 OutgoingArg(24), x15 // flags = notrap aligned ; xstore64 OutgoingArg(32), x15 // flags = notrap aligned ; xstore64 OutgoingArg(40), x15 // flags = notrap aligned -; xmov x0, x15 -; xmov x1, x15 -; xmov x2, x15 -; xmov x3, x15 ; xmov x4, x15 ; xmov x5, x15 ; xmov x6, x15 @@ -153,7 +149,7 @@ block0: ; xmov x12, x15 ; xmov x13, x15 ; xmov x14, x15 -; call CallInfo { dest: TestCase(%g), uses: [CallArgPair { vreg: p0i, preg: p0i }, CallArgPair { vreg: p1i, preg: p1i }, CallArgPair { vreg: p2i, preg: p2i }, CallArgPair { vreg: p3i, preg: p3i }, CallArgPair { vreg: p4i, preg: p4i }, CallArgPair { vreg: p5i, preg: p5i }, CallArgPair { vreg: p6i, preg: p6i }, CallArgPair { vreg: p7i, preg: p7i }, CallArgPair { vreg: p8i, preg: p8i }, CallArgPair { vreg: p9i, preg: p9i }, CallArgPair { vreg: p10i, preg: p10i }, CallArgPair { vreg: p11i, preg: p11i }, CallArgPair { vreg: p12i, preg: p12i }, CallArgPair { vreg: p13i, preg: p13i }, CallArgPair { vreg: p14i, preg: p14i }, CallArgPair { vreg: p15i, preg: p15i }], defs: [], clobbers: PRegSet { bits: [65535, 65535, 4294967295, 0] }, callee_conv: Fast, caller_conv: Fast, callee_pop_size: 0 } +; call CallInfo { dest: PulleyCall { name: TestCase(%g), args: [XReg(p15i), XReg(p15i), XReg(p15i), XReg(p15i)] }, uses: [CallArgPair { vreg: p4i, preg: p4i }, CallArgPair { vreg: p5i, preg: p5i }, CallArgPair { vreg: p6i, preg: p6i }, CallArgPair { vreg: p7i, preg: p7i }, CallArgPair { vreg: p8i, preg: p8i }, CallArgPair { vreg: p9i, preg: p9i }, CallArgPair { vreg: p10i, preg: p10i }, CallArgPair { vreg: p11i, preg: p11i }, CallArgPair { vreg: p12i, preg: p12i }, CallArgPair { vreg: p13i, preg: p13i }, CallArgPair { vreg: p14i, preg: p14i }, CallArgPair { vreg: p15i, preg: p15i }], defs: [], clobbers: PRegSet { bits: [65535, 65535, 4294967295, 0] }, callee_conv: Fast, caller_conv: Fast, callee_pop_size: 0 } ; pop_frame_restore 48, {} ; ret ; @@ -166,10 +162,6 @@ block0: ; xstore64le_offset8 sp, 24, x15 ; xstore64le_offset8 sp, 32, x15 ; xstore64le_offset8 sp, 40, x15 -; xmov x0, x15 -; xmov x1, x15 -; xmov x2, x15 -; xmov x3, x15 ; xmov x4, x15 ; xmov x5, x15 ; xmov x6, x15 @@ -181,7 +173,7 @@ block0: ; xmov x12, x15 ; xmov x13, x15 ; xmov x14, x15 -; call 0x0 // target = 0x51 +; call4 x15, x15, x15, x15, 0x0 // target = 0x45 ; pop_frame_restore 48, ; ret @@ -224,8 +216,8 @@ block0: ; VCode: ; push_frame_save 112, {x17, x18, x20, x21, x22, x23, x29} ; block0: -; x0 = load_addr OutgoingArg(0) -; call CallInfo { dest: TestCase(%g), uses: [CallArgPair { vreg: p0i, preg: p0i }], defs: [CallRetPair { vreg: Writable { reg: p0i }, preg: p0i }, CallRetPair { vreg: Writable { reg: p1i }, preg: p1i }, CallRetPair { vreg: Writable { reg: p2i }, preg: p2i }, CallRetPair { vreg: Writable { reg: p3i }, preg: p3i }, CallRetPair { vreg: Writable { reg: p4i }, preg: p4i }, CallRetPair { vreg: Writable { reg: p5i }, preg: p5i }, CallRetPair { vreg: Writable { reg: p6i }, preg: p6i }, CallRetPair { vreg: Writable { reg: p7i }, preg: p7i }, CallRetPair { vreg: Writable { reg: p8i }, preg: p8i }, CallRetPair { vreg: Writable { reg: p9i }, preg: p9i }, CallRetPair { vreg: Writable { reg: p10i }, preg: p10i }, CallRetPair { vreg: Writable { reg: p11i }, preg: p11i }, CallRetPair { vreg: Writable { reg: p12i }, preg: p12i }, CallRetPair { vreg: Writable { reg: p13i }, preg: p13i }, CallRetPair { vreg: Writable { reg: p14i }, preg: p14i }, CallRetPair { vreg: Writable { reg: p15i }, preg: p15i }], clobbers: PRegSet { bits: [0, 65535, 4294967295, 0] }, callee_conv: Fast, caller_conv: Fast, callee_pop_size: 0 } +; x12 = load_addr OutgoingArg(0) +; call CallInfo { dest: PulleyCall { name: TestCase(%g), args: [XReg(p12i)] }, uses: [], defs: [CallRetPair { vreg: Writable { reg: p0i }, preg: p0i }, CallRetPair { vreg: Writable { reg: p1i }, preg: p1i }, CallRetPair { vreg: Writable { reg: p2i }, preg: p2i }, CallRetPair { vreg: Writable { reg: p3i }, preg: p3i }, CallRetPair { vreg: Writable { reg: p4i }, preg: p4i }, CallRetPair { vreg: Writable { reg: p5i }, preg: p5i }, CallRetPair { vreg: Writable { reg: p6i }, preg: p6i }, CallRetPair { vreg: Writable { reg: p7i }, preg: p7i }, CallRetPair { vreg: Writable { reg: p8i }, preg: p8i }, CallRetPair { vreg: Writable { reg: p9i }, preg: p9i }, CallRetPair { vreg: Writable { reg: p10i }, preg: p10i }, CallRetPair { vreg: Writable { reg: p11i }, preg: p11i }, CallRetPair { vreg: Writable { reg: p12i }, preg: p12i }, CallRetPair { vreg: Writable { reg: p13i }, preg: p13i }, CallRetPair { vreg: Writable { reg: p14i }, preg: p14i }, CallRetPair { vreg: Writable { reg: p15i }, preg: p15i }], clobbers: PRegSet { bits: [0, 65535, 4294967295, 0] }, callee_conv: Fast, caller_conv: Fast, callee_pop_size: 0 } ; xmov x20, x13 ; xmov x22, x11 ; x29 = xload64 OutgoingArg(0) // flags = notrap aligned @@ -263,8 +255,8 @@ block0: ; ; Disassembled: ; push_frame_save 112, x17, x18, x20, x21, x22, x23, x29 -; xmov x0, sp -; call 0x0 // target = 0xc +; xmov x12, sp +; call1 x12, 0x0 // target = 0xc ; xmov x20, x13 ; xmov x22, x11 ; xload64le_offset8 x29, sp, 0 diff --git a/cranelift/filetests/filetests/isa/pulley32/extend.clif b/cranelift/filetests/filetests/isa/pulley32/extend.clif index 4af13e4b19c8..d82485e18853 100644 --- a/cranelift/filetests/filetests/isa/pulley32/extend.clif +++ b/cranelift/filetests/filetests/isa/pulley32/extend.clif @@ -11,15 +11,15 @@ block0(v0: i8): ; VCode: ; push_frame ; block0: -; zext8 x0, x0 -; call CallInfo { dest: TestCase(%g), uses: [CallArgPair { vreg: p0i, preg: p0i }], defs: [], clobbers: PRegSet { bits: [65535, 65535, 4294967295, 0] }, callee_conv: Fast, caller_conv: Fast, callee_pop_size: 0 } +; zext8 x2, x0 +; call CallInfo { dest: PulleyCall { name: TestCase(%g), args: [XReg(p2i)] }, uses: [], defs: [], clobbers: PRegSet { bits: [65535, 65535, 4294967295, 0] }, callee_conv: Fast, caller_conv: Fast, callee_pop_size: 0 } ; pop_frame ; ret ; ; Disassembled: ; push_frame -; zext8 x0, x0 -; call 0x0 // target = 0x4 +; zext8 x2, x0 +; call1 x2, 0x0 // target = 0x4 ; pop_frame ; ret @@ -33,15 +33,15 @@ block0(v0: i16): ; VCode: ; push_frame ; block0: -; zext16 x0, x0 -; call CallInfo { dest: TestCase(%g), uses: [CallArgPair { vreg: p0i, preg: p0i }], defs: [], clobbers: PRegSet { bits: [65535, 65535, 4294967295, 0] }, callee_conv: Fast, caller_conv: Fast, callee_pop_size: 0 } +; zext16 x2, x0 +; call CallInfo { dest: PulleyCall { name: TestCase(%g), args: [XReg(p2i)] }, uses: [], defs: [], clobbers: PRegSet { bits: [65535, 65535, 4294967295, 0] }, callee_conv: Fast, caller_conv: Fast, callee_pop_size: 0 } ; pop_frame ; ret ; ; Disassembled: ; push_frame -; zext16 x0, x0 -; call 0x0 // target = 0x4 +; zext16 x2, x0 +; call1 x2, 0x0 // target = 0x4 ; pop_frame ; ret @@ -55,7 +55,7 @@ block0(v0: i32): ; VCode: ; push_frame ; block0: -; call CallInfo { dest: TestCase(%g), uses: [CallArgPair { vreg: p0i, preg: p0i }], defs: [], clobbers: PRegSet { bits: [65535, 65535, 4294967295, 0] }, callee_conv: Fast, caller_conv: Fast, callee_pop_size: 0 } +; call CallInfo { dest: PulleyCall { name: TestCase(%g), args: [XReg(p0i)] }, uses: [], defs: [], clobbers: PRegSet { bits: [65535, 65535, 4294967295, 0] }, callee_conv: Fast, caller_conv: Fast, callee_pop_size: 0 } ; pop_frame ; ret ; @@ -75,7 +75,7 @@ block0(v0: i64): ; VCode: ; push_frame ; block0: -; call CallInfo { dest: TestCase(%g), uses: [CallArgPair { vreg: p0i, preg: p0i }], defs: [], clobbers: PRegSet { bits: [65535, 65535, 4294967295, 0] }, callee_conv: Fast, caller_conv: Fast, callee_pop_size: 0 } +; call CallInfo { dest: PulleyCall { name: TestCase(%g), args: [XReg(p0i)] }, uses: [], defs: [], clobbers: PRegSet { bits: [65535, 65535, 4294967295, 0] }, callee_conv: Fast, caller_conv: Fast, callee_pop_size: 0 } ; pop_frame ; ret ; @@ -95,15 +95,15 @@ block0(v0: i8): ; VCode: ; push_frame ; block0: -; sext8 x0, x0 -; call CallInfo { dest: TestCase(%g), uses: [CallArgPair { vreg: p0i, preg: p0i }], defs: [], clobbers: PRegSet { bits: [65535, 65535, 4294967295, 0] }, callee_conv: Fast, caller_conv: Fast, callee_pop_size: 0 } +; sext8 x2, x0 +; call CallInfo { dest: PulleyCall { name: TestCase(%g), args: [XReg(p2i)] }, uses: [], defs: [], clobbers: PRegSet { bits: [65535, 65535, 4294967295, 0] }, callee_conv: Fast, caller_conv: Fast, callee_pop_size: 0 } ; pop_frame ; ret ; ; Disassembled: ; push_frame -; sext8 x0, x0 -; call 0x0 // target = 0x4 +; sext8 x2, x0 +; call1 x2, 0x0 // target = 0x4 ; pop_frame ; ret @@ -117,15 +117,15 @@ block0(v0: i16): ; VCode: ; push_frame ; block0: -; sext16 x0, x0 -; call CallInfo { dest: TestCase(%g), uses: [CallArgPair { vreg: p0i, preg: p0i }], defs: [], clobbers: PRegSet { bits: [65535, 65535, 4294967295, 0] }, callee_conv: Fast, caller_conv: Fast, callee_pop_size: 0 } +; sext16 x2, x0 +; call CallInfo { dest: PulleyCall { name: TestCase(%g), args: [XReg(p2i)] }, uses: [], defs: [], clobbers: PRegSet { bits: [65535, 65535, 4294967295, 0] }, callee_conv: Fast, caller_conv: Fast, callee_pop_size: 0 } ; pop_frame ; ret ; ; Disassembled: ; push_frame -; sext16 x0, x0 -; call 0x0 // target = 0x4 +; sext16 x2, x0 +; call1 x2, 0x0 // target = 0x4 ; pop_frame ; ret @@ -139,7 +139,7 @@ block0(v0: i32): ; VCode: ; push_frame ; block0: -; call CallInfo { dest: TestCase(%g), uses: [CallArgPair { vreg: p0i, preg: p0i }], defs: [], clobbers: PRegSet { bits: [65535, 65535, 4294967295, 0] }, callee_conv: Fast, caller_conv: Fast, callee_pop_size: 0 } +; call CallInfo { dest: PulleyCall { name: TestCase(%g), args: [XReg(p0i)] }, uses: [], defs: [], clobbers: PRegSet { bits: [65535, 65535, 4294967295, 0] }, callee_conv: Fast, caller_conv: Fast, callee_pop_size: 0 } ; pop_frame ; ret ; @@ -159,7 +159,7 @@ block0(v0: i64): ; VCode: ; push_frame ; block0: -; call CallInfo { dest: TestCase(%g), uses: [CallArgPair { vreg: p0i, preg: p0i }], defs: [], clobbers: PRegSet { bits: [65535, 65535, 4294967295, 0] }, callee_conv: Fast, caller_conv: Fast, callee_pop_size: 0 } +; call CallInfo { dest: PulleyCall { name: TestCase(%g), args: [XReg(p0i)] }, uses: [], defs: [], clobbers: PRegSet { bits: [65535, 65535, 4294967295, 0] }, callee_conv: Fast, caller_conv: Fast, callee_pop_size: 0 } ; pop_frame ; ret ; diff --git a/cranelift/filetests/filetests/isa/pulley64/call.clif b/cranelift/filetests/filetests/isa/pulley64/call.clif index 711216049cdd..e00fd590985e 100644 --- a/cranelift/filetests/filetests/isa/pulley64/call.clif +++ b/cranelift/filetests/filetests/isa/pulley64/call.clif @@ -15,16 +15,16 @@ block0: ; VCode: ; push_frame ; block0: -; xconst8 x0, 0 -; call CallInfo { dest: TestCase(%g), uses: [CallArgPair { vreg: p0i, preg: p0i }], defs: [CallRetPair { vreg: Writable { reg: p0i }, preg: p0i }], clobbers: PRegSet { bits: [65534, 65535, 4294967295, 0] }, callee_conv: Fast, caller_conv: Fast, callee_pop_size: 0 } +; xconst8 x2, 0 +; call CallInfo { dest: PulleyCall { name: TestCase(%g), args: [XReg(p2i)] }, uses: [], defs: [CallRetPair { vreg: Writable { reg: p0i }, preg: p0i }], clobbers: PRegSet { bits: [65534, 65535, 4294967295, 0] }, callee_conv: Fast, caller_conv: Fast, callee_pop_size: 0 } ; xconst8 x0, 1 ; pop_frame ; ret ; ; Disassembled: ; push_frame -; xconst8 x0, 0 -; call 0x0 // target = 0x4 +; xconst8 x2, 0 +; call1 x2, 0x0 // target = 0x4 ; xconst8 x0, 1 ; pop_frame ; ret @@ -42,16 +42,16 @@ block0: ; VCode: ; push_frame ; block0: -; xconst8 x0, 0 -; call CallInfo { dest: TestCase(%g), uses: [CallArgPair { vreg: p0i, preg: p0i }], defs: [CallRetPair { vreg: Writable { reg: p0i }, preg: p0i }], clobbers: PRegSet { bits: [65534, 65535, 4294967295, 0] }, callee_conv: Fast, caller_conv: Fast, callee_pop_size: 0 } +; xconst8 x2, 0 +; call CallInfo { dest: PulleyCall { name: TestCase(%g), args: [XReg(p2i)] }, uses: [], defs: [CallRetPair { vreg: Writable { reg: p0i }, preg: p0i }], clobbers: PRegSet { bits: [65534, 65535, 4294967295, 0] }, callee_conv: Fast, caller_conv: Fast, callee_pop_size: 0 } ; xconst8 x0, 1 ; pop_frame ; ret ; ; Disassembled: ; push_frame -; xconst8 x0, 0 -; call 0x0 // target = 0x4 +; xconst8 x2, 0 +; call1 x2, 0x0 // target = 0x4 ; xconst8 x0, 1 ; pop_frame ; ret @@ -71,21 +71,21 @@ block0: ; VCode: ; push_frame ; block0: -; xconst8 x0, 0 -; xconst8 x1, 1 -; xconst8 x2, 2 -; xconst8 x3, 3 -; call CallInfo { dest: TestCase(%g), uses: [CallArgPair { vreg: p0i, preg: p0i }, CallArgPair { vreg: p1i, preg: p1i }, CallArgPair { vreg: p2i, preg: p2i }, CallArgPair { vreg: p3i, preg: p3i }], defs: [], clobbers: PRegSet { bits: [65535, 65535, 4294967295, 0] }, callee_conv: Fast, caller_conv: Fast, callee_pop_size: 0 } +; xconst8 x3, 0 +; xconst8 x4, 1 +; xconst8 x5, 2 +; xconst8 x6, 3 +; call CallInfo { dest: PulleyCall { name: TestCase(%g), args: [XReg(p3i), XReg(p4i), XReg(p5i), XReg(p6i)] }, uses: [], defs: [], clobbers: PRegSet { bits: [65535, 65535, 4294967295, 0] }, callee_conv: Fast, caller_conv: Fast, callee_pop_size: 0 } ; pop_frame ; ret ; ; Disassembled: ; push_frame -; xconst8 x0, 0 -; xconst8 x1, 1 -; xconst8 x2, 2 -; xconst8 x3, 3 -; call 0x0 // target = 0xd +; xconst8 x3, 0 +; xconst8 x4, 1 +; xconst8 x5, 2 +; xconst8 x6, 3 +; call4 x3, x4, x5, x6, 0x0 // target = 0xd ; pop_frame ; ret @@ -103,7 +103,7 @@ block0: ; VCode: ; push_frame ; block0: -; call CallInfo { dest: TestCase(%g), uses: [], defs: [CallRetPair { vreg: Writable { reg: p0i }, preg: p0i }, CallRetPair { vreg: Writable { reg: p1i }, preg: p1i }, CallRetPair { vreg: Writable { reg: p2i }, preg: p2i }, CallRetPair { vreg: Writable { reg: p3i }, preg: p3i }], clobbers: PRegSet { bits: [65520, 65535, 4294967295, 0] }, callee_conv: Fast, caller_conv: Fast, callee_pop_size: 0 } +; call CallInfo { dest: PulleyCall { name: TestCase(%g), args: [] }, uses: [], defs: [CallRetPair { vreg: Writable { reg: p0i }, preg: p0i }, CallRetPair { vreg: Writable { reg: p1i }, preg: p1i }, CallRetPair { vreg: Writable { reg: p2i }, preg: p2i }, CallRetPair { vreg: Writable { reg: p3i }, preg: p3i }], clobbers: PRegSet { bits: [65520, 65535, 4294967295, 0] }, callee_conv: Fast, caller_conv: Fast, callee_pop_size: 0 } ; xadd64 x4, x0, x2 ; xadd64 x3, x1, x3 ; xadd64 x0, x4, x3 @@ -138,10 +138,6 @@ block0: ; xstore64 OutgoingArg(24), x15 // flags = notrap aligned ; xstore64 OutgoingArg(32), x15 // flags = notrap aligned ; xstore64 OutgoingArg(40), x15 // flags = notrap aligned -; xmov x0, x15 -; xmov x1, x15 -; xmov x2, x15 -; xmov x3, x15 ; xmov x4, x15 ; xmov x5, x15 ; xmov x6, x15 @@ -153,7 +149,7 @@ block0: ; xmov x12, x15 ; xmov x13, x15 ; xmov x14, x15 -; call CallInfo { dest: TestCase(%g), uses: [CallArgPair { vreg: p0i, preg: p0i }, CallArgPair { vreg: p1i, preg: p1i }, CallArgPair { vreg: p2i, preg: p2i }, CallArgPair { vreg: p3i, preg: p3i }, CallArgPair { vreg: p4i, preg: p4i }, CallArgPair { vreg: p5i, preg: p5i }, CallArgPair { vreg: p6i, preg: p6i }, CallArgPair { vreg: p7i, preg: p7i }, CallArgPair { vreg: p8i, preg: p8i }, CallArgPair { vreg: p9i, preg: p9i }, CallArgPair { vreg: p10i, preg: p10i }, CallArgPair { vreg: p11i, preg: p11i }, CallArgPair { vreg: p12i, preg: p12i }, CallArgPair { vreg: p13i, preg: p13i }, CallArgPair { vreg: p14i, preg: p14i }, CallArgPair { vreg: p15i, preg: p15i }], defs: [], clobbers: PRegSet { bits: [65535, 65535, 4294967295, 0] }, callee_conv: Fast, caller_conv: Fast, callee_pop_size: 0 } +; call CallInfo { dest: PulleyCall { name: TestCase(%g), args: [XReg(p15i), XReg(p15i), XReg(p15i), XReg(p15i)] }, uses: [CallArgPair { vreg: p4i, preg: p4i }, CallArgPair { vreg: p5i, preg: p5i }, CallArgPair { vreg: p6i, preg: p6i }, CallArgPair { vreg: p7i, preg: p7i }, CallArgPair { vreg: p8i, preg: p8i }, CallArgPair { vreg: p9i, preg: p9i }, CallArgPair { vreg: p10i, preg: p10i }, CallArgPair { vreg: p11i, preg: p11i }, CallArgPair { vreg: p12i, preg: p12i }, CallArgPair { vreg: p13i, preg: p13i }, CallArgPair { vreg: p14i, preg: p14i }, CallArgPair { vreg: p15i, preg: p15i }], defs: [], clobbers: PRegSet { bits: [65535, 65535, 4294967295, 0] }, callee_conv: Fast, caller_conv: Fast, callee_pop_size: 0 } ; pop_frame_restore 48, {} ; ret ; @@ -166,10 +162,6 @@ block0: ; xstore64le_offset8 sp, 24, x15 ; xstore64le_offset8 sp, 32, x15 ; xstore64le_offset8 sp, 40, x15 -; xmov x0, x15 -; xmov x1, x15 -; xmov x2, x15 -; xmov x3, x15 ; xmov x4, x15 ; xmov x5, x15 ; xmov x6, x15 @@ -181,7 +173,7 @@ block0: ; xmov x12, x15 ; xmov x13, x15 ; xmov x14, x15 -; call 0x0 // target = 0x51 +; call4 x15, x15, x15, x15, 0x0 // target = 0x45 ; pop_frame_restore 48, ; ret @@ -224,8 +216,8 @@ block0: ; VCode: ; push_frame_save 112, {x17, x18, x20, x21, x22, x23, x29} ; block0: -; x0 = load_addr OutgoingArg(0) -; call CallInfo { dest: TestCase(%g), uses: [CallArgPair { vreg: p0i, preg: p0i }], defs: [CallRetPair { vreg: Writable { reg: p0i }, preg: p0i }, CallRetPair { vreg: Writable { reg: p1i }, preg: p1i }, CallRetPair { vreg: Writable { reg: p2i }, preg: p2i }, CallRetPair { vreg: Writable { reg: p3i }, preg: p3i }, CallRetPair { vreg: Writable { reg: p4i }, preg: p4i }, CallRetPair { vreg: Writable { reg: p5i }, preg: p5i }, CallRetPair { vreg: Writable { reg: p6i }, preg: p6i }, CallRetPair { vreg: Writable { reg: p7i }, preg: p7i }, CallRetPair { vreg: Writable { reg: p8i }, preg: p8i }, CallRetPair { vreg: Writable { reg: p9i }, preg: p9i }, CallRetPair { vreg: Writable { reg: p10i }, preg: p10i }, CallRetPair { vreg: Writable { reg: p11i }, preg: p11i }, CallRetPair { vreg: Writable { reg: p12i }, preg: p12i }, CallRetPair { vreg: Writable { reg: p13i }, preg: p13i }, CallRetPair { vreg: Writable { reg: p14i }, preg: p14i }, CallRetPair { vreg: Writable { reg: p15i }, preg: p15i }], clobbers: PRegSet { bits: [0, 65535, 4294967295, 0] }, callee_conv: Fast, caller_conv: Fast, callee_pop_size: 0 } +; x12 = load_addr OutgoingArg(0) +; call CallInfo { dest: PulleyCall { name: TestCase(%g), args: [XReg(p12i)] }, uses: [], defs: [CallRetPair { vreg: Writable { reg: p0i }, preg: p0i }, CallRetPair { vreg: Writable { reg: p1i }, preg: p1i }, CallRetPair { vreg: Writable { reg: p2i }, preg: p2i }, CallRetPair { vreg: Writable { reg: p3i }, preg: p3i }, CallRetPair { vreg: Writable { reg: p4i }, preg: p4i }, CallRetPair { vreg: Writable { reg: p5i }, preg: p5i }, CallRetPair { vreg: Writable { reg: p6i }, preg: p6i }, CallRetPair { vreg: Writable { reg: p7i }, preg: p7i }, CallRetPair { vreg: Writable { reg: p8i }, preg: p8i }, CallRetPair { vreg: Writable { reg: p9i }, preg: p9i }, CallRetPair { vreg: Writable { reg: p10i }, preg: p10i }, CallRetPair { vreg: Writable { reg: p11i }, preg: p11i }, CallRetPair { vreg: Writable { reg: p12i }, preg: p12i }, CallRetPair { vreg: Writable { reg: p13i }, preg: p13i }, CallRetPair { vreg: Writable { reg: p14i }, preg: p14i }, CallRetPair { vreg: Writable { reg: p15i }, preg: p15i }], clobbers: PRegSet { bits: [0, 65535, 4294967295, 0] }, callee_conv: Fast, caller_conv: Fast, callee_pop_size: 0 } ; xmov x20, x13 ; xmov x22, x11 ; x29 = xload64 OutgoingArg(0) // flags = notrap aligned @@ -263,8 +255,8 @@ block0: ; ; Disassembled: ; push_frame_save 112, x17, x18, x20, x21, x22, x23, x29 -; xmov x0, sp -; call 0x0 // target = 0xc +; xmov x12, sp +; call1 x12, 0x0 // target = 0xc ; xmov x20, x13 ; xmov x22, x11 ; xload64le_offset8 x29, sp, 0 @@ -350,10 +342,6 @@ block0: ; xstore64 OutgoingArg(40), x15 // flags = notrap aligned ; xstore64 OutgoingArg(48), x15 // flags = notrap aligned ; xstore64 OutgoingArg(56), x15 // flags = notrap aligned -; xmov x0, x15 -; xmov x1, x15 -; xmov x2, x15 -; xmov x3, x15 ; xmov x4, x15 ; xmov x5, x15 ; xmov x6, x15 @@ -365,7 +353,7 @@ block0: ; xmov x12, x15 ; xmov x13, x15 ; xmov x14, x15 -; call CallInfo { dest: TestCase(%g), uses: [CallArgPair { vreg: p0i, preg: p0i }, CallArgPair { vreg: p1i, preg: p1i }, CallArgPair { vreg: p2i, preg: p2i }, CallArgPair { vreg: p3i, preg: p3i }, CallArgPair { vreg: p4i, preg: p4i }, CallArgPair { vreg: p5i, preg: p5i }, CallArgPair { vreg: p6i, preg: p6i }, CallArgPair { vreg: p7i, preg: p7i }, CallArgPair { vreg: p8i, preg: p8i }, CallArgPair { vreg: p9i, preg: p9i }, CallArgPair { vreg: p10i, preg: p10i }, CallArgPair { vreg: p11i, preg: p11i }, CallArgPair { vreg: p12i, preg: p12i }, CallArgPair { vreg: p13i, preg: p13i }, CallArgPair { vreg: p14i, preg: p14i }, CallArgPair { vreg: p15i, preg: p15i }], defs: [], clobbers: PRegSet { bits: [65535, 65535, 4294967295, 0] }, callee_conv: Fast, caller_conv: Fast, callee_pop_size: 0 } +; call CallInfo { dest: PulleyCall { name: TestCase(%g), args: [XReg(p15i), XReg(p15i), XReg(p15i), XReg(p15i)] }, uses: [CallArgPair { vreg: p4i, preg: p4i }, CallArgPair { vreg: p5i, preg: p5i }, CallArgPair { vreg: p6i, preg: p6i }, CallArgPair { vreg: p7i, preg: p7i }, CallArgPair { vreg: p8i, preg: p8i }, CallArgPair { vreg: p9i, preg: p9i }, CallArgPair { vreg: p10i, preg: p10i }, CallArgPair { vreg: p11i, preg: p11i }, CallArgPair { vreg: p12i, preg: p12i }, CallArgPair { vreg: p13i, preg: p13i }, CallArgPair { vreg: p14i, preg: p14i }, CallArgPair { vreg: p15i, preg: p15i }], defs: [], clobbers: PRegSet { bits: [65535, 65535, 4294967295, 0] }, callee_conv: Fast, caller_conv: Fast, callee_pop_size: 0 } ; pop_frame_restore 64, {} ; ret ; @@ -380,10 +368,6 @@ block0: ; xstore64le_offset8 sp, 40, x15 ; xstore64le_offset8 sp, 48, x15 ; xstore64le_offset8 sp, 56, x15 -; xmov x0, x15 -; xmov x1, x15 -; xmov x2, x15 -; xmov x3, x15 ; xmov x4, x15 ; xmov x5, x15 ; xmov x6, x15 @@ -395,7 +379,7 @@ block0: ; xmov x12, x15 ; xmov x13, x15 ; xmov x14, x15 -; call 0x0 // target = 0x59 +; call4 x15, x15, x15, x15, 0x0 // target = 0x4d ; pop_frame_restore 64, ; ret diff --git a/cranelift/filetests/filetests/isa/pulley64/extend.clif b/cranelift/filetests/filetests/isa/pulley64/extend.clif index 22b0e46dbf22..0efbfb6a9a5a 100644 --- a/cranelift/filetests/filetests/isa/pulley64/extend.clif +++ b/cranelift/filetests/filetests/isa/pulley64/extend.clif @@ -11,15 +11,15 @@ block0(v0: i8): ; VCode: ; push_frame ; block0: -; zext8 x0, x0 -; call CallInfo { dest: TestCase(%g), uses: [CallArgPair { vreg: p0i, preg: p0i }], defs: [], clobbers: PRegSet { bits: [65535, 65535, 4294967295, 0] }, callee_conv: Fast, caller_conv: Fast, callee_pop_size: 0 } +; zext8 x2, x0 +; call CallInfo { dest: PulleyCall { name: TestCase(%g), args: [XReg(p2i)] }, uses: [], defs: [], clobbers: PRegSet { bits: [65535, 65535, 4294967295, 0] }, callee_conv: Fast, caller_conv: Fast, callee_pop_size: 0 } ; pop_frame ; ret ; ; Disassembled: ; push_frame -; zext8 x0, x0 -; call 0x0 // target = 0x4 +; zext8 x2, x0 +; call1 x2, 0x0 // target = 0x4 ; pop_frame ; ret @@ -33,15 +33,15 @@ block0(v0: i16): ; VCode: ; push_frame ; block0: -; zext16 x0, x0 -; call CallInfo { dest: TestCase(%g), uses: [CallArgPair { vreg: p0i, preg: p0i }], defs: [], clobbers: PRegSet { bits: [65535, 65535, 4294967295, 0] }, callee_conv: Fast, caller_conv: Fast, callee_pop_size: 0 } +; zext16 x2, x0 +; call CallInfo { dest: PulleyCall { name: TestCase(%g), args: [XReg(p2i)] }, uses: [], defs: [], clobbers: PRegSet { bits: [65535, 65535, 4294967295, 0] }, callee_conv: Fast, caller_conv: Fast, callee_pop_size: 0 } ; pop_frame ; ret ; ; Disassembled: ; push_frame -; zext16 x0, x0 -; call 0x0 // target = 0x4 +; zext16 x2, x0 +; call1 x2, 0x0 // target = 0x4 ; pop_frame ; ret @@ -55,15 +55,15 @@ block0(v0: i32): ; VCode: ; push_frame ; block0: -; zext32 x0, x0 -; call CallInfo { dest: TestCase(%g), uses: [CallArgPair { vreg: p0i, preg: p0i }], defs: [], clobbers: PRegSet { bits: [65535, 65535, 4294967295, 0] }, callee_conv: Fast, caller_conv: Fast, callee_pop_size: 0 } +; zext32 x2, x0 +; call CallInfo { dest: PulleyCall { name: TestCase(%g), args: [XReg(p2i)] }, uses: [], defs: [], clobbers: PRegSet { bits: [65535, 65535, 4294967295, 0] }, callee_conv: Fast, caller_conv: Fast, callee_pop_size: 0 } ; pop_frame ; ret ; ; Disassembled: ; push_frame -; zext32 x0, x0 -; call 0x0 // target = 0x4 +; zext32 x2, x0 +; call1 x2, 0x0 // target = 0x4 ; pop_frame ; ret @@ -77,7 +77,7 @@ block0(v0: i64): ; VCode: ; push_frame ; block0: -; call CallInfo { dest: TestCase(%g), uses: [CallArgPair { vreg: p0i, preg: p0i }], defs: [], clobbers: PRegSet { bits: [65535, 65535, 4294967295, 0] }, callee_conv: Fast, caller_conv: Fast, callee_pop_size: 0 } +; call CallInfo { dest: PulleyCall { name: TestCase(%g), args: [XReg(p0i)] }, uses: [], defs: [], clobbers: PRegSet { bits: [65535, 65535, 4294967295, 0] }, callee_conv: Fast, caller_conv: Fast, callee_pop_size: 0 } ; pop_frame ; ret ; @@ -97,15 +97,15 @@ block0(v0: i8): ; VCode: ; push_frame ; block0: -; sext8 x0, x0 -; call CallInfo { dest: TestCase(%g), uses: [CallArgPair { vreg: p0i, preg: p0i }], defs: [], clobbers: PRegSet { bits: [65535, 65535, 4294967295, 0] }, callee_conv: Fast, caller_conv: Fast, callee_pop_size: 0 } +; sext8 x2, x0 +; call CallInfo { dest: PulleyCall { name: TestCase(%g), args: [XReg(p2i)] }, uses: [], defs: [], clobbers: PRegSet { bits: [65535, 65535, 4294967295, 0] }, callee_conv: Fast, caller_conv: Fast, callee_pop_size: 0 } ; pop_frame ; ret ; ; Disassembled: ; push_frame -; sext8 x0, x0 -; call 0x0 // target = 0x4 +; sext8 x2, x0 +; call1 x2, 0x0 // target = 0x4 ; pop_frame ; ret @@ -119,15 +119,15 @@ block0(v0: i16): ; VCode: ; push_frame ; block0: -; sext16 x0, x0 -; call CallInfo { dest: TestCase(%g), uses: [CallArgPair { vreg: p0i, preg: p0i }], defs: [], clobbers: PRegSet { bits: [65535, 65535, 4294967295, 0] }, callee_conv: Fast, caller_conv: Fast, callee_pop_size: 0 } +; sext16 x2, x0 +; call CallInfo { dest: PulleyCall { name: TestCase(%g), args: [XReg(p2i)] }, uses: [], defs: [], clobbers: PRegSet { bits: [65535, 65535, 4294967295, 0] }, callee_conv: Fast, caller_conv: Fast, callee_pop_size: 0 } ; pop_frame ; ret ; ; Disassembled: ; push_frame -; sext16 x0, x0 -; call 0x0 // target = 0x4 +; sext16 x2, x0 +; call1 x2, 0x0 // target = 0x4 ; pop_frame ; ret @@ -141,15 +141,15 @@ block0(v0: i32): ; VCode: ; push_frame ; block0: -; sext32 x0, x0 -; call CallInfo { dest: TestCase(%g), uses: [CallArgPair { vreg: p0i, preg: p0i }], defs: [], clobbers: PRegSet { bits: [65535, 65535, 4294967295, 0] }, callee_conv: Fast, caller_conv: Fast, callee_pop_size: 0 } +; sext32 x2, x0 +; call CallInfo { dest: PulleyCall { name: TestCase(%g), args: [XReg(p2i)] }, uses: [], defs: [], clobbers: PRegSet { bits: [65535, 65535, 4294967295, 0] }, callee_conv: Fast, caller_conv: Fast, callee_pop_size: 0 } ; pop_frame ; ret ; ; Disassembled: ; push_frame -; sext32 x0, x0 -; call 0x0 // target = 0x4 +; sext32 x2, x0 +; call1 x2, 0x0 // target = 0x4 ; pop_frame ; ret @@ -163,7 +163,7 @@ block0(v0: i64): ; VCode: ; push_frame ; block0: -; call CallInfo { dest: TestCase(%g), uses: [CallArgPair { vreg: p0i, preg: p0i }], defs: [], clobbers: PRegSet { bits: [65535, 65535, 4294967295, 0] }, callee_conv: Fast, caller_conv: Fast, callee_pop_size: 0 } +; call CallInfo { dest: PulleyCall { name: TestCase(%g), args: [XReg(p0i)] }, uses: [], defs: [], clobbers: PRegSet { bits: [65535, 65535, 4294967295, 0] }, callee_conv: Fast, caller_conv: Fast, callee_pop_size: 0 } ; pop_frame ; ret ; diff --git a/pulley/src/interp.rs b/pulley/src/interp.rs index cd1dbd1b4e15..faa15aa520e1 100644 --- a/pulley/src/interp.rs +++ b/pulley/src/interp.rs @@ -1065,6 +1065,65 @@ impl OpVisitor for Interpreter<'_> { ControlFlow::Continue(()) } + fn call1(&mut self, arg1: XReg, offset: PcRelOffset) -> ControlFlow { + let return_addr = self.pc.as_ptr(); + self.state.lr = return_addr.as_ptr(); + self.state[XReg::x0] = self.state[arg1]; + self.pc_rel_jump::(offset); + ControlFlow::Continue(()) + } + + fn call2(&mut self, arg1: XReg, arg2: XReg, offset: PcRelOffset) -> ControlFlow { + let return_addr = self.pc.as_ptr(); + self.state.lr = return_addr.as_ptr(); + let (x0, x1) = (self.state[arg1], self.state[arg2]); + self.state[XReg::x0] = x0; + self.state[XReg::x1] = x1; + self.pc_rel_jump::(offset); + ControlFlow::Continue(()) + } + + fn call3( + &mut self, + arg1: XReg, + arg2: XReg, + arg3: XReg, + offset: PcRelOffset, + ) -> ControlFlow { + let return_addr = self.pc.as_ptr(); + self.state.lr = return_addr.as_ptr(); + let (x0, x1, x2) = (self.state[arg1], self.state[arg2], self.state[arg3]); + self.state[XReg::x0] = x0; + self.state[XReg::x1] = x1; + self.state[XReg::x2] = x2; + self.pc_rel_jump::(offset); + ControlFlow::Continue(()) + } + + fn call4( + &mut self, + arg1: XReg, + arg2: XReg, + arg3: XReg, + arg4: XReg, + offset: PcRelOffset, + ) -> ControlFlow { + let return_addr = self.pc.as_ptr(); + self.state.lr = return_addr.as_ptr(); + let (x0, x1, x2, x3) = ( + self.state[arg1], + self.state[arg2], + self.state[arg3], + self.state[arg4], + ); + self.state[XReg::x0] = x0; + self.state[XReg::x1] = x1; + self.state[XReg::x2] = x2; + self.state[XReg::x3] = x3; + self.pc_rel_jump::(offset); + ControlFlow::Continue(()) + } + fn call_indirect(&mut self, dst: XReg) -> ControlFlow { let return_addr = self.pc.as_ptr(); self.state.lr = return_addr.as_ptr(); diff --git a/pulley/src/lib.rs b/pulley/src/lib.rs index db951407b614..c2d5ab2f09d7 100644 --- a/pulley/src/lib.rs +++ b/pulley/src/lib.rs @@ -92,7 +92,23 @@ macro_rules! for_each_op { /// Transfer control to the PC at the given offset and set the `lr` /// register to the PC just after this instruction. + /// + /// This instruction generally assumes that the Pulley ABI is being + /// respected where arguments are in argument registers (starting at + /// x0 for integer arguments) and results are in result registers. + /// This instruction itself assume that all arguments are already in + /// their registers. Subsequent instructions below enable moving + /// arguments into the correct registers as part of the same call + /// instruction. call = Call { offset: PcRelOffset }; + /// Like `call`, but also `x0 = arg1` + call1 = Call1 { arg1: XReg, offset: PcRelOffset }; + /// Like `call`, but also `x0, x1 = arg1, arg2` + call2 = Call2 { arg1: XReg, arg2: XReg, offset: PcRelOffset }; + /// Like `call`, but also `x0, x1, x2 = arg1, arg2, arg3` + call3 = Call3 { arg1: XReg, arg2: XReg, arg3: XReg, offset: PcRelOffset }; + /// Like `call`, but also `x0, x1, x2, x3 = arg1, arg2, arg3, arg4` + call4 = Call4 { arg1: XReg, arg2: XReg, arg3: XReg, arg4: XReg, offset: PcRelOffset }; /// Transfer control to the PC in `reg` and set `lr` to the PC just /// after this instruction.