Skip to content

Commit

Permalink
pulley: Implement return_call instructions
Browse files Browse the repository at this point in the history
This commit fleshes out the Cranelift lowerings of tail calls which gets
the wasm tail call proposal itself working on Pulley. Most of the bits
and pieces here were copied over from the riscv64 backend and then
edited to suit Pulley.
  • Loading branch information
alexcrichton committed Dec 16, 2024
1 parent 6ed283f commit f236dd8
Show file tree
Hide file tree
Showing 10 changed files with 236 additions and 12 deletions.
41 changes: 37 additions & 4 deletions cranelift/codegen/src/isa/pulley_shared/abi.rs
Original file line number Diff line number Diff line change
Expand Up @@ -611,12 +611,45 @@ where
P: PulleyTargetKind,
{
pub fn emit_return_call(
self,
_ctx: &mut Lower<InstAndKind<P>>,
_args: isle::ValueSlice,
mut self,
ctx: &mut Lower<InstAndKind<P>>,
args: isle::ValueSlice,
_backend: &PulleyBackend<P>,
) {
todo!()
let new_stack_arg_size =
u32::try_from(self.sig(ctx.sigs()).sized_stack_arg_space()).unwrap();

ctx.abi_mut().accumulate_tail_args_size(new_stack_arg_size);

// Put all arguments in registers and stack slots (within that newly
// allocated stack space).
self.emit_args(ctx, args);
self.emit_stack_ret_arg_for_tail_call(ctx);

let dest = self.dest().clone();
let uses = self.take_uses();

match dest {
CallDest::ExtName(name, RelocDistance::Near) => {
let info = Box::new(ReturnCallInfo {
dest: name,
uses,
new_stack_arg_size,
});
ctx.emit(Inst::ReturnCall { info }.into());
}
CallDest::ExtName(_name, RelocDistance::Far) => {
unimplemented!("return-call of a host function")
}
CallDest::Reg(callee) => {
let info = Box::new(ReturnCallInfo {
dest: XReg::new(callee).unwrap(),
uses,
new_stack_arg_size,
});
ctx.emit(Inst::ReturnIndirectCall { info }.into());
}
}
}
}

Expand Down
8 changes: 8 additions & 0 deletions cranelift/codegen/src/isa/pulley_shared/inst.isle
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,12 @@
;; An indirect call to an unknown callee.
(IndirectCall (info BoxCallIndInfo))

;; A direct return-call macro instruction.
(ReturnCall (info BoxReturnCallInfo))

;; An indirect return-call macro instruction.
(ReturnIndirectCall (info BoxReturnCallIndInfo))

;; An indirect call out to a host-defined function. The host function
;; pointer is the first "argument" of this function call.
(IndirectCallHost (info BoxCallInfo))
Expand Down Expand Up @@ -125,6 +131,8 @@

(type BoxCallInfo (primitive BoxCallInfo))
(type BoxCallIndInfo (primitive BoxCallIndInfo))
(type BoxReturnCallInfo (primitive BoxReturnCallInfo))
(type BoxReturnCallIndInfo (primitive BoxReturnCallIndInfo))

;;;; Address Modes ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

Expand Down
127 changes: 127 additions & 0 deletions cranelift/codegen/src/isa/pulley_shared/inst/emit.rs
Original file line number Diff line number Diff line change
Expand Up @@ -190,6 +190,29 @@ fn pulley_emit<P>(
}
}

Inst::ReturnCall { info } => {
emit_return_call_common_sequence(sink, emit_info, state, &info);

// Emit an unconditional jump which is quite similar to `Inst::Call`
// except that a `jump` opcode is used instead of a `call` opcode.
sink.put1(pulley_interpreter::Opcode::Jump as u8);
sink.add_reloc(Reloc::X86CallPCRel4, &info.dest, -1);
sink.put4(0);

// Islands were manually handled in
// `emit_return_call_common_sequence`.
*start_offset = sink.cur_offset();
}

Inst::ReturnIndirectCall { info } => {
emit_return_call_common_sequence(sink, emit_info, state, &info);
enc::xjump(sink, info.dest);

// Islands were manually handled in
// `emit_return_call_common_sequence`.
*start_offset = sink.cur_offset();
}

Inst::IndirectCallHost { info } => {
// Emit a relocation to fill in the actual immediate argument here
// in `call_indirect_host`.
Expand Down Expand Up @@ -496,3 +519,107 @@ fn pulley_emit<P>(
}
}
}

fn emit_return_call_common_sequence<T, P>(
sink: &mut MachBuffer<InstAndKind<P>>,
emit_info: &EmitInfo,
state: &mut EmitState<P>,
info: &ReturnCallInfo<T>,
) where
P: PulleyTargetKind,
{
// The return call sequence can potentially emit a lot of instructions, so
// lets emit an island here if we need it.
//
// It is difficult to calculate exactly how many instructions are going to
// be emitted, so we calculate it by emitting it into a disposable buffer,
// and then checking how many instructions were actually emitted.
let mut buffer = MachBuffer::new();
let mut fake_emit_state = state.clone();

return_call_emit_impl(&mut buffer, emit_info, &mut fake_emit_state, info);

// Finalize the buffer and get the number of bytes emitted.
let buffer = buffer.finish(&Default::default(), &mut Default::default());
let length = buffer.data().len() as u32;

// And now emit the island inline with this instruction.
if sink.island_needed(length) {
let jump_around_label = sink.get_label();
<InstAndKind<P>>::gen_jump(jump_around_label).emit(sink, emit_info, state);
sink.emit_island(length + 4, &mut state.ctrl_plane);
sink.bind_label(jump_around_label, &mut state.ctrl_plane);
}

// Now that we're done, emit the *actual* return sequence.
return_call_emit_impl(sink, emit_info, state, info);
}

/// This should not be called directly, Instead prefer to call [emit_return_call_common_sequence].
fn return_call_emit_impl<T, P>(
sink: &mut MachBuffer<InstAndKind<P>>,
emit_info: &EmitInfo,
state: &mut EmitState<P>,
info: &ReturnCallInfo<T>,
) where
P: PulleyTargetKind,
{
let sp_to_fp_offset = {
let frame_layout = state.frame_layout();
i64::from(
frame_layout.clobber_size
+ frame_layout.fixed_frame_storage_size
+ frame_layout.outgoing_args_size,
)
};

// Restore all clobbered registers before leaving the function.
let mut clobber_offset = sp_to_fp_offset - 8;
for reg in state.frame_layout().clobbered_callee_saves.clone() {
let rreg = reg.to_reg();
let ty = match rreg.class() {
RegClass::Int => I64,
RegClass::Float => F64,
RegClass::Vector => unimplemented!("Vector Clobber Restores"),
};

<InstAndKind<P>>::from(Inst::gen_load(
reg.map(Reg::from),
Amode::SpOffset {
offset: clobber_offset.try_into().unwrap(),
},
ty,
MemFlags::trusted(),
))
.emit(sink, emit_info, state);

clobber_offset -= 8
}

// Restore the link register and frame pointer using a `pop_frame`
// instruction. This will move `sp` to the current frame pointer and then
// restore the old lr/fp, so this restores all of sp/fp/lr in one
// instruction.
let setup_area_size = i64::from(state.frame_layout().setup_area_size);
assert!(setup_area_size > 0, "must have frame pointers enabled");
<InstAndKind<P>>::from(RawInst::PopFrame).emit(sink, emit_info, state);

// Now that `sp` is restored to what it was on function entry it may need to
// be adjusted if the stack arguments of our own function differ from the
// stack arguments of the callee. Perform any necessary adjustment here.
//
// Note that this means that there's a brief window where stack arguments
// might be below `sp` in the case that the callee has more stack arguments
// than ourselves. That's in theory ok though as we're inventing the pulley
// ABI and nothing like async signals are happening that we have to worry
// about.
let incoming_args_diff =
i64::from(state.frame_layout().tail_args_size - info.new_stack_arg_size);

if incoming_args_diff != 0 {
let amt = i32::try_from(incoming_args_diff).unwrap();
for inst in PulleyMachineDeps::<P>::gen_sp_reg_adjust(amt) {
<InstAndKind<P>>::from(inst).emit(sink, emit_info, state);
}
}
}
36 changes: 36 additions & 0 deletions cranelift/codegen/src/isa/pulley_shared/inst/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,20 @@ mod generated {
include!(concat!(env!("OUT_DIR"), "/pulley_inst_gen.rs"));
}

/// Out-of-line data for return-calls, to keep the size of `Inst` down.
#[derive(Clone, Debug)]
pub struct ReturnCallInfo<T> {
/// Where this call is going.
pub dest: T,

/// The size of the argument area for this return-call, potentially smaller
/// than that of the caller, but never larger.
pub new_stack_arg_size: u32,

/// The in-register arguments and their constraints.
pub uses: CallArgList,
}

impl Inst {
/// Generic constructor for a load (zero-extending where appropriate).
pub fn gen_load(dst: Writable<Reg>, mem: Amode, ty: Type, flags: MemFlags) -> Inst {
Expand Down Expand Up @@ -154,6 +168,18 @@ fn pulley_get_operands(inst: &mut Inst, collector: &mut impl OperandVisitor) {
}
collector.reg_clobbers(info.clobbers);
}
Inst::ReturnCall { info } => {
for CallArgPair { vreg, preg } in &mut info.uses {
collector.reg_fixed_use(vreg, *preg);
}
}
Inst::ReturnIndirectCall { info } => {
collector.reg_use(&mut info.dest);

for CallArgPair { vreg, preg } in &mut info.uses {
collector.reg_fixed_use(vreg, *preg);
}
}

Inst::Jump { .. } => {}

Expand Down Expand Up @@ -381,6 +407,7 @@ where
Inst::Jump { .. } => MachTerminator::Uncond,
Inst::BrIf { .. } => MachTerminator::Cond,
Inst::BrTable { .. } => MachTerminator::Indirect,
Inst::ReturnCall { .. } | Inst::ReturnIndirectCall { .. } => MachTerminator::Indirect,
_ => MachTerminator::None,
}
}
Expand Down Expand Up @@ -574,6 +601,15 @@ impl Inst {
format!("indirect_call {callee}, {info:?}")
}

Inst::ReturnCall { info } => {
format!("return_call {info:?}")
}

Inst::ReturnIndirectCall { info } => {
let callee = format_reg(*info.dest);
format!("return_indirect_call {callee}, {info:?}")
}

Inst::IndirectCallHost { info } => {
format!("indirect_call_host {info:?}")
}
Expand Down
6 changes: 5 additions & 1 deletion cranelift/codegen/src/isa/pulley_shared/lower/isle.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,9 @@ use inst::InstAndKind;
use crate::ir::{condcodes::*, immediates::*, types::*, *};
use crate::isa::pulley_shared::{
abi::*,
inst::{FReg, OperandSize, VReg, WritableFReg, WritableVReg, WritableXReg, XReg},
inst::{
FReg, OperandSize, ReturnCallInfo, VReg, WritableFReg, WritableVReg, WritableXReg, XReg,
},
lower::{regs, Cond},
*,
};
Expand All @@ -25,6 +27,8 @@ type VecArgPair = Vec<ArgPair>;
type VecRetPair = Vec<RetPair>;
type BoxCallInfo = Box<CallInfo<ExternalName>>;
type BoxCallIndInfo = Box<CallInfo<XReg>>;
type BoxReturnCallInfo = Box<ReturnCallInfo<ExternalName>>;
type BoxReturnCallIndInfo = Box<ReturnCallInfo<XReg>>;
type BoxExternalName = Box<ExternalName>;

pub(crate) struct PulleyIsleContext<'a, 'b, I, B>
Expand Down
6 changes: 1 addition & 5 deletions crates/wasmtime/src/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1963,12 +1963,8 @@ impl Config {
// Pulley at this time fundamentally doesn't support the
// `threads` proposal, notably shared memory, because Rust can't
// safely implement loads/stores in the face of shared memory.
//
// Additionally pulley currently panics on tail-call generation
// in Cranelift ABI call which will get implemented in the
// future but is listed here for now as unsupported.
if self.compiler_target().is_pulley() {
return WasmFeatures::TAIL_CALL | WasmFeatures::THREADS;
return WasmFeatures::THREADS;
}

// Other Cranelift backends are either 100% missing or complete
Expand Down
11 changes: 10 additions & 1 deletion crates/wast-util/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -308,7 +308,7 @@ impl Compiler {
// support at this time (pulley is a work-in-progress) and so
// individual tests are listed below as "should fail" even if
// they're not covered in this list.
if config.tail_call() || config.wide_arithmetic() {
if config.wide_arithmetic() {
return true;
}
}
Expand Down Expand Up @@ -424,6 +424,15 @@ impl WastTest {
"spec_testsuite/proposals/relaxed-simd/relaxed_laneselect.wast",
"spec_testsuite/proposals/relaxed-simd/relaxed_madd_nmadd.wast",
"spec_testsuite/proposals/relaxed-simd/relaxed_min_max.wast",
"spec_testsuite/proposals/memory64/simd_lane.wast",
"spec_testsuite/proposals/memory64/simd_memory-multi.wast",
"spec_testsuite/proposals/memory64/relaxed_min_max.wast",
"spec_testsuite/proposals/memory64/relaxed_madd_nmadd.wast",
"spec_testsuite/proposals/memory64/relaxed_laneselect.wast",
"spec_testsuite/proposals/memory64/relaxed_dot_product.wast",
"spec_testsuite/proposals/memory64/i16x8_relaxed_q15mulr_s.wast",
"spec_testsuite/proposals/memory64/i32x4_relaxed_trunc.wast",
"spec_testsuite/proposals/memory64/i8x16_relaxed_swizzle.wast",
"spec_testsuite/simd_align.wast",
"spec_testsuite/simd_bitwise.wast",
"spec_testsuite/simd_boolean.wast",
Expand Down
7 changes: 7 additions & 0 deletions pulley/src/interp.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1066,6 +1066,13 @@ impl OpVisitor for Interpreter<'_> {
ControlFlow::Continue(())
}

fn xjump(&mut self, reg: XReg) -> ControlFlow<Done> {
unsafe {
self.pc = UnsafeBytecodeStream::new(NonNull::new_unchecked(self.state[reg].get_ptr()));
}
ControlFlow::Continue(())
}

fn br_if32(&mut self, cond: XReg, offset: PcRelOffset) -> ControlFlow<Done> {
let cond = self.state[cond].get_u32();
if cond != 0 {
Expand Down
4 changes: 4 additions & 0 deletions pulley/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,10 @@ macro_rules! for_each_op {
/// Unconditionally transfer control to the PC at the given offset.
jump = Jump { offset: PcRelOffset };

/// Unconditionally transfer control to the PC at specified
/// register.
xjump = XJump { reg: XReg };

/// Conditionally transfer control to the given PC offset if
/// `low32(cond)` contains a non-zero value.
br_if32 = BrIf { cond: XReg, offset: PcRelOffset };
Expand Down
2 changes: 1 addition & 1 deletion tests/misc_testsuite/tail-call/loop-across-modules.wast
Original file line number Diff line number Diff line change
Expand Up @@ -42,5 +42,5 @@
(start $start)
)

(assert_return (invoke $B "g" (i32.const 100000000))
(assert_return (invoke $B "g" (i32.const 100000))
(i32.const 42))

0 comments on commit f236dd8

Please sign in to comment.