Skip to content

Commit

Permalink
pulley: Add offset8 integer loads/stores
Browse files Browse the repository at this point in the history
This commit extends the set of opcodes to load/stores from memory with
integer registers. Previously the only addressing mode supported was a
base register plus a 32-bit signed immediate. This immediate frequently
doesn't need 32-bits though and can often fit in a much smaller range.
Looking at `spidermonkey.cwasm` a large number of loads/stores can fit
within an unsigned 8-bit integer instead so this commit adds an
`offset8` mode in addition to the preexisting `offset32` mode.
Empirically this commit shrinks `spidermonkey.cwasm` for pulley64 from
33M to 31M.

This notably, at this time, does not extend general addressing modes in
Pulley nor does it extend all loads/stores. For example
float/vector/big-endian loads and stores all continue to only support a
32-bit signed offset from the base pointer. This is done under the
assumption that integer loads/stores dominate both
performance/code-size, but this is not empirically proven just yet.

Additionally at this time the choice is being made to add an
opcode-per-addressing-mode rather than having a single load opcode take
a general addressing mode. The assumption here is that decoding a fully
general addressing mode and processing it is probably slower at runtime
than specializing opcodes per addressing mode. This is currently an
unproven assumption however and the cost of this is increased complexity
in the Cranelift backend as it has to have many branches for all
loads/stores supported.
  • Loading branch information
alexcrichton committed Dec 18, 2024
1 parent ab325dc commit d95dcd1
Show file tree
Hide file tree
Showing 12 changed files with 694 additions and 152 deletions.
114 changes: 84 additions & 30 deletions cranelift/codegen/src/isa/pulley_shared/inst/emit.rs
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,26 @@ where
}
}

/// Representation of a static offset from a pointer.
///
/// In VCode this is always represented as an `i32` and then just before
/// lowering this is used to determine which instruction to emit.
enum Offset {
/// An unsigned 8-bit offset.
U8(u8),
/// A signed 32-bit offset.
I32(i32),
}

impl From<i32> for Offset {
fn from(i: i32) -> Offset {
if let Ok(i) = i.try_into() {
return Offset::U8(i);
}
Offset::I32(i)
}
}

fn pulley_emit<P>(
inst: &Inst,
sink: &mut MachBuffer<InstAndKind<P>>,
Expand Down Expand Up @@ -315,24 +335,40 @@ fn pulley_emit<P>(
let endian = emit_info.endianness(*flags);
match *ty {
I8 => match ext {
X::None | X::Zero32 => enc::xload8_u32_offset32(sink, dst, r, x),
X::Zero64 => enc::xload8_u64_offset32(sink, dst, r, x),
X::Sign32 => enc::xload8_s32_offset32(sink, dst, r, x),
X::Sign64 => enc::xload8_s64_offset32(sink, dst, r, x),
X::None | X::Zero32 => match x.into() {
Offset::I32(x) => enc::xload8_u32_offset32(sink, dst, r, x),
Offset::U8(x) => enc::xload8_u32_offset8(sink, dst, r, x),
},
X::Zero64 => match x.into() {
Offset::I32(x) => enc::xload8_u64_offset32(sink, dst, r, x),
Offset::U8(x) => enc::xload8_u64_offset8(sink, dst, r, x),
},
X::Sign32 => match x.into() {
Offset::I32(x) => enc::xload8_s32_offset32(sink, dst, r, x),
Offset::U8(x) => enc::xload8_s32_offset8(sink, dst, r, x),
},
X::Sign64 => match x.into() {
Offset::I32(x) => enc::xload8_s64_offset32(sink, dst, r, x),
Offset::U8(x) => enc::xload8_s64_offset8(sink, dst, r, x),
},
},
I16 => match (ext, endian) {
(X::None | X::Zero32, E::Little) => {
enc::xload16le_u32_offset32(sink, dst, r, x);
}
(X::Sign32, E::Little) => {
enc::xload16le_s32_offset32(sink, dst, r, x);
}
(X::Zero64, E::Little) => {
enc::xload16le_u64_offset32(sink, dst, r, x);
}
(X::Sign64, E::Little) => {
enc::xload16le_s64_offset32(sink, dst, r, x);
}
(X::None | X::Zero32, E::Little) => match x.into() {
Offset::I32(x) => enc::xload16le_u32_offset32(sink, dst, r, x),
Offset::U8(x) => enc::xload16le_u32_offset8(sink, dst, r, x),
},
(X::Sign32, E::Little) => match x.into() {
Offset::I32(x) => enc::xload16le_s32_offset32(sink, dst, r, x),
Offset::U8(x) => enc::xload16le_s32_offset8(sink, dst, r, x),
},
(X::Zero64, E::Little) => match x.into() {
Offset::I32(x) => enc::xload16le_u64_offset32(sink, dst, r, x),
Offset::U8(x) => enc::xload16le_u64_offset8(sink, dst, r, x),
},
(X::Sign64, E::Little) => match x.into() {
Offset::I32(x) => enc::xload16le_s64_offset32(sink, dst, r, x),
Offset::U8(x) => enc::xload16le_s64_offset8(sink, dst, r, x),
},
(X::None | X::Zero32 | X::Zero64, E::Big) => {
enc::xload16be_u64_offset32(sink, dst, r, x);
}
Expand All @@ -341,15 +377,18 @@ fn pulley_emit<P>(
}
},
I32 => match (ext, endian) {
(X::None | X::Zero32 | X::Sign32, E::Little) => {
enc::xload32le_offset32(sink, dst, r, x);
}
(X::Zero64, E::Little) => {
enc::xload32le_u64_offset32(sink, dst, r, x);
}
(X::Sign64, E::Little) => {
enc::xload32le_s64_offset32(sink, dst, r, x);
}
(X::None | X::Zero32 | X::Sign32, E::Little) => match x.into() {
Offset::I32(x) => enc::xload32le_offset32(sink, dst, r, x),
Offset::U8(x) => enc::xload32le_offset8(sink, dst, r, x),
},
(X::Zero64, E::Little) => match x.into() {
Offset::I32(x) => enc::xload32le_u64_offset32(sink, dst, r, x),
Offset::U8(x) => enc::xload32le_u64_offset8(sink, dst, r, x),
},
(X::Sign64, E::Little) => match x.into() {
Offset::I32(x) => enc::xload32le_s64_offset32(sink, dst, r, x),
Offset::U8(x) => enc::xload32le_s64_offset8(sink, dst, r, x),
},
(X::None | X::Zero32 | X::Zero64, E::Big) => {
enc::xload32be_u64_offset32(sink, dst, r, x);
}
Expand All @@ -358,7 +397,10 @@ fn pulley_emit<P>(
}
},
I64 => match endian {
E::Little => enc::xload64le_offset32(sink, dst, r, x),
E::Little => match x.into() {
Offset::I32(x) => enc::xload64le_offset32(sink, dst, r, x),
Offset::U8(x) => enc::xload64le_offset8(sink, dst, r, x),
},
E::Big => enc::xload64be_offset32(sink, dst, r, x),
},
_ => unimplemented!("xload ty={ty:?}"),
Expand Down Expand Up @@ -422,17 +464,29 @@ fn pulley_emit<P>(
let x = mem.get_offset_with_state(state);
let endian = emit_info.endianness(*flags);
match *ty {
I8 => enc::xstore8_offset32(sink, r, x, src),
I8 => match x.into() {
Offset::I32(x) => enc::xstore8_offset32(sink, r, x, src),
Offset::U8(x) => enc::xstore8_offset8(sink, r, x, src),
},
I16 => match endian {
E::Little => enc::xstore16le_offset32(sink, r, x, src),
E::Little => match x.into() {
Offset::I32(x) => enc::xstore16le_offset32(sink, r, x, src),
Offset::U8(x) => enc::xstore16le_offset8(sink, r, x, src),
},
E::Big => enc::xstore16be_offset32(sink, r, x, src),
},
I32 => match endian {
E::Little => enc::xstore32le_offset32(sink, r, x, src),
E::Little => match x.into() {
Offset::I32(x) => enc::xstore32le_offset32(sink, r, x, src),
Offset::U8(x) => enc::xstore32le_offset8(sink, r, x, src),
},
E::Big => enc::xstore32be_offset32(sink, r, x, src),
},
I64 => match endian {
E::Little => enc::xstore64le_offset32(sink, r, x, src),
E::Little => match x.into() {
Offset::I32(x) => enc::xstore64le_offset32(sink, r, x, src),
Offset::U8(x) => enc::xstore64le_offset8(sink, r, x, src),
},
E::Big => enc::xstore64be_offset32(sink, r, x, src),
},
_ => unimplemented!("xstore ty={ty:?}"),
Expand Down
54 changes: 27 additions & 27 deletions cranelift/filetests/filetests/isa/pulley32/call.clif
Original file line number Diff line number Diff line change
Expand Up @@ -163,12 +163,12 @@ block0:
; push_frame
; stack_alloc32 48
; xconst8 x15, 0
; xstore64le_offset32 sp, 0, x15
; xstore64le_offset32 sp, 8, x15
; xstore64le_offset32 sp, 16, x15
; xstore64le_offset32 sp, 24, x15
; xstore64le_offset32 sp, 32, x15
; xstore64le_offset32 sp, 40, x15
; xstore64le_offset8 sp, 0, x15
; xstore64le_offset8 sp, 8, x15
; xstore64le_offset8 sp, 16, x15
; xstore64le_offset8 sp, 24, x15
; xstore64le_offset8 sp, 32, x15
; xstore64le_offset8 sp, 40, x15
; xmov x0, x15
; xmov x1, x15
; xmov x2, x15
Expand All @@ -184,7 +184,7 @@ block0:
; xmov x12, x15
; xmov x13, x15
; xmov x14, x15
; call 0x0 // target = 0x60
; call 0x0 // target = 0x4e
; stack_free32 48
; pop_frame
; ret
Expand Down Expand Up @@ -284,22 +284,22 @@ block0:
; Disassembled:
; push_frame
; stack_alloc32 112
; xstore64le_offset32 sp, 104, x17
; xstore64le_offset32 sp, 96, x18
; xstore64le_offset32 sp, 88, x20
; xstore64le_offset32 sp, 80, x21
; xstore64le_offset32 sp, 72, x22
; xstore64le_offset32 sp, 64, x23
; xstore64le_offset32 sp, 56, x29
; xstore64le_offset8 sp, 104, x17
; xstore64le_offset8 sp, 96, x18
; xstore64le_offset8 sp, 88, x20
; xstore64le_offset8 sp, 80, x21
; xstore64le_offset8 sp, 72, x22
; xstore64le_offset8 sp, 64, x23
; xstore64le_offset8 sp, 56, x29
; xmov x0, sp
; call 0x0 // target = 0x3a
; call 0x0 // target = 0x25
; xmov x20, x13
; xmov x22, x11
; xload64le_offset32 x29, sp, 0
; xload64le_offset32 x11, sp, 8
; xload64le_offset32 x13, sp, 16
; xload64le_offset32 x21, sp, 24
; xload64le_offset32 x23, sp, 32
; xload64le_offset8 x29, sp, 0
; xload64le_offset8 x11, sp, 8
; xload64le_offset8 x13, sp, 16
; xload64le_offset8 x21, sp, 24
; xload64le_offset8 x23, sp, 32
; xadd64 x18, x0, x1
; xadd64 x17, x2, x3
; xadd64 x5, x4, x5
Expand All @@ -325,13 +325,13 @@ block0:
; xadd64 x14, x0, x14
; xadd64 x13, x13, x13
; xadd64 x0, x14, x13
; xload64le_offset32 x17, sp, 104
; xload64le_offset32 x18, sp, 96
; xload64le_offset32 x20, sp, 88
; xload64le_offset32 x21, sp, 80
; xload64le_offset32 x22, sp, 72
; xload64le_offset32 x23, sp, 64
; xload64le_offset32 x29, sp, 56
; xload64le_offset8 x17, sp, 104
; xload64le_offset8 x18, sp, 96
; xload64le_offset8 x20, sp, 88
; xload64le_offset8 x21, sp, 80
; xload64le_offset8 x22, sp, 72
; xload64le_offset8 x23, sp, 64
; xload64le_offset8 x29, sp, 56
; stack_free32 112
; pop_frame
; ret
Expand Down
8 changes: 4 additions & 4 deletions cranelift/filetests/filetests/isa/pulley32/load.clif
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ block0(v0: i32):
; ret
;
; Disassembled:
; xload32le_offset32 x0, x0, 0
; xload32le_offset8 x0, x0, 0
; ret

function %load_i64(i32) -> i64 {
Expand All @@ -28,7 +28,7 @@ block0(v0: i32):
; ret
;
; Disassembled:
; xload64le_offset32 x0, x0, 0
; xload64le_offset8 x0, x0, 0
; ret

function %load_i32_with_offset(i32) -> i32 {
Expand All @@ -43,7 +43,7 @@ block0(v0: i32):
; ret
;
; Disassembled:
; xload32le_offset32 x0, x0, 4
; xload32le_offset8 x0, x0, 4
; ret

function %load_i64_with_offset(i32) -> i64 {
Expand All @@ -58,6 +58,6 @@ block0(v0: i32):
; ret
;
; Disassembled:
; xload64le_offset32 x0, x0, 8
; xload64le_offset8 x0, x0, 8
; ret

8 changes: 4 additions & 4 deletions cranelift/filetests/filetests/isa/pulley32/store.clif
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ block0(v0: i32, v1: i32):
; ret
;
; Disassembled:
; xstore32le_offset32 x1, 0, x0
; xstore32le_offset8 x1, 0, x0
; ret

function %store_i64(i64, i32) {
Expand All @@ -28,7 +28,7 @@ block0(v0: i64, v1: i32):
; ret
;
; Disassembled:
; xstore64le_offset32 x1, 0, x0
; xstore64le_offset8 x1, 0, x0
; ret

function %store_i32_with_offset(i32, i32) {
Expand All @@ -43,7 +43,7 @@ block0(v0: i32, v1: i32):
; ret
;
; Disassembled:
; xstore32le_offset32 x1, 4, x0
; xstore32le_offset8 x1, 4, x0
; ret

function %store_i64_with_offset(i64, i32) {
Expand All @@ -58,6 +58,6 @@ block0(v0: i64, v1: i32):
; ret
;
; Disassembled:
; xstore64le_offset32 x1, 8, x0
; xstore64le_offset8 x1, 8, x0
; ret

Loading

0 comments on commit d95dcd1

Please sign in to comment.