From d95dcd1da01566c9e96d34abdc75a1ec1fb6816d Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Tue, 17 Dec 2024 15:56:05 -0800 Subject: [PATCH] pulley: Add offset8 integer loads/stores This commit extends the set of opcodes to load/stores from memory with integer registers. Previously the only addressing mode supported was a base register plus a 32-bit signed immediate. This immediate frequently doesn't need 32-bits though and can often fit in a much smaller range. Looking at `spidermonkey.cwasm` a large number of loads/stores can fit within an unsigned 8-bit integer instead so this commit adds an `offset8` mode in addition to the preexisting `offset32` mode. Empirically this commit shrinks `spidermonkey.cwasm` for pulley64 from 33M to 31M. This notably, at this time, does not extend general addressing modes in Pulley nor does it extend all loads/stores. For example float/vector/big-endian loads and stores all continue to only support a 32-bit signed offset from the base pointer. This is done under the assumption that integer loads/stores dominate both performance/code-size, but this is not empirically proven just yet. Additionally at this time the choice is being made to add an opcode-per-addressing-mode rather than having a single load opcode take a general addressing mode. The assumption here is that decoding a fully general addressing mode and processing it is probably slower at runtime than specializing opcodes per addressing mode. This is currently an unproven assumption however and the cost of this is increased complexity in the Cranelift backend as it has to have many branches for all loads/stores supported. --- .../src/isa/pulley_shared/inst/emit.rs | 114 +++-- .../filetests/isa/pulley32/call.clif | 54 +-- .../filetests/isa/pulley32/load.clif | 8 +- .../filetests/isa/pulley32/store.clif | 8 +- .../filetests/isa/pulley64/call.clif | 72 ++-- .../filetests/isa/pulley64/load.clif | 407 +++++++++++++++++- .../filetests/isa/pulley64/store.clif | 8 +- pulley/src/interp.rs | 64 +++ pulley/src/lib.rs | 35 ++ tests/disas/pulley/call.wat | 4 +- tests/disas/pulley/epoch-simple.wat | 16 +- tests/disas/pulley/memory-inbounds.wat | 56 +-- 12 files changed, 694 insertions(+), 152 deletions(-) diff --git a/cranelift/codegen/src/isa/pulley_shared/inst/emit.rs b/cranelift/codegen/src/isa/pulley_shared/inst/emit.rs index 642662eb8e43..59aebec1e49b 100644 --- a/cranelift/codegen/src/isa/pulley_shared/inst/emit.rs +++ b/cranelift/codegen/src/isa/pulley_shared/inst/emit.rs @@ -119,6 +119,26 @@ where } } +/// Representation of a static offset from a pointer. +/// +/// In VCode this is always represented as an `i32` and then just before +/// lowering this is used to determine which instruction to emit. +enum Offset { + /// An unsigned 8-bit offset. + U8(u8), + /// A signed 32-bit offset. + I32(i32), +} + +impl From for Offset { + fn from(i: i32) -> Offset { + if let Ok(i) = i.try_into() { + return Offset::U8(i); + } + Offset::I32(i) + } +} + fn pulley_emit

( inst: &Inst, sink: &mut MachBuffer>, @@ -315,24 +335,40 @@ fn pulley_emit

( let endian = emit_info.endianness(*flags); match *ty { I8 => match ext { - X::None | X::Zero32 => enc::xload8_u32_offset32(sink, dst, r, x), - X::Zero64 => enc::xload8_u64_offset32(sink, dst, r, x), - X::Sign32 => enc::xload8_s32_offset32(sink, dst, r, x), - X::Sign64 => enc::xload8_s64_offset32(sink, dst, r, x), + X::None | X::Zero32 => match x.into() { + Offset::I32(x) => enc::xload8_u32_offset32(sink, dst, r, x), + Offset::U8(x) => enc::xload8_u32_offset8(sink, dst, r, x), + }, + X::Zero64 => match x.into() { + Offset::I32(x) => enc::xload8_u64_offset32(sink, dst, r, x), + Offset::U8(x) => enc::xload8_u64_offset8(sink, dst, r, x), + }, + X::Sign32 => match x.into() { + Offset::I32(x) => enc::xload8_s32_offset32(sink, dst, r, x), + Offset::U8(x) => enc::xload8_s32_offset8(sink, dst, r, x), + }, + X::Sign64 => match x.into() { + Offset::I32(x) => enc::xload8_s64_offset32(sink, dst, r, x), + Offset::U8(x) => enc::xload8_s64_offset8(sink, dst, r, x), + }, }, I16 => match (ext, endian) { - (X::None | X::Zero32, E::Little) => { - enc::xload16le_u32_offset32(sink, dst, r, x); - } - (X::Sign32, E::Little) => { - enc::xload16le_s32_offset32(sink, dst, r, x); - } - (X::Zero64, E::Little) => { - enc::xload16le_u64_offset32(sink, dst, r, x); - } - (X::Sign64, E::Little) => { - enc::xload16le_s64_offset32(sink, dst, r, x); - } + (X::None | X::Zero32, E::Little) => match x.into() { + Offset::I32(x) => enc::xload16le_u32_offset32(sink, dst, r, x), + Offset::U8(x) => enc::xload16le_u32_offset8(sink, dst, r, x), + }, + (X::Sign32, E::Little) => match x.into() { + Offset::I32(x) => enc::xload16le_s32_offset32(sink, dst, r, x), + Offset::U8(x) => enc::xload16le_s32_offset8(sink, dst, r, x), + }, + (X::Zero64, E::Little) => match x.into() { + Offset::I32(x) => enc::xload16le_u64_offset32(sink, dst, r, x), + Offset::U8(x) => enc::xload16le_u64_offset8(sink, dst, r, x), + }, + (X::Sign64, E::Little) => match x.into() { + Offset::I32(x) => enc::xload16le_s64_offset32(sink, dst, r, x), + Offset::U8(x) => enc::xload16le_s64_offset8(sink, dst, r, x), + }, (X::None | X::Zero32 | X::Zero64, E::Big) => { enc::xload16be_u64_offset32(sink, dst, r, x); } @@ -341,15 +377,18 @@ fn pulley_emit

( } }, I32 => match (ext, endian) { - (X::None | X::Zero32 | X::Sign32, E::Little) => { - enc::xload32le_offset32(sink, dst, r, x); - } - (X::Zero64, E::Little) => { - enc::xload32le_u64_offset32(sink, dst, r, x); - } - (X::Sign64, E::Little) => { - enc::xload32le_s64_offset32(sink, dst, r, x); - } + (X::None | X::Zero32 | X::Sign32, E::Little) => match x.into() { + Offset::I32(x) => enc::xload32le_offset32(sink, dst, r, x), + Offset::U8(x) => enc::xload32le_offset8(sink, dst, r, x), + }, + (X::Zero64, E::Little) => match x.into() { + Offset::I32(x) => enc::xload32le_u64_offset32(sink, dst, r, x), + Offset::U8(x) => enc::xload32le_u64_offset8(sink, dst, r, x), + }, + (X::Sign64, E::Little) => match x.into() { + Offset::I32(x) => enc::xload32le_s64_offset32(sink, dst, r, x), + Offset::U8(x) => enc::xload32le_s64_offset8(sink, dst, r, x), + }, (X::None | X::Zero32 | X::Zero64, E::Big) => { enc::xload32be_u64_offset32(sink, dst, r, x); } @@ -358,7 +397,10 @@ fn pulley_emit

( } }, I64 => match endian { - E::Little => enc::xload64le_offset32(sink, dst, r, x), + E::Little => match x.into() { + Offset::I32(x) => enc::xload64le_offset32(sink, dst, r, x), + Offset::U8(x) => enc::xload64le_offset8(sink, dst, r, x), + }, E::Big => enc::xload64be_offset32(sink, dst, r, x), }, _ => unimplemented!("xload ty={ty:?}"), @@ -422,17 +464,29 @@ fn pulley_emit

( let x = mem.get_offset_with_state(state); let endian = emit_info.endianness(*flags); match *ty { - I8 => enc::xstore8_offset32(sink, r, x, src), + I8 => match x.into() { + Offset::I32(x) => enc::xstore8_offset32(sink, r, x, src), + Offset::U8(x) => enc::xstore8_offset8(sink, r, x, src), + }, I16 => match endian { - E::Little => enc::xstore16le_offset32(sink, r, x, src), + E::Little => match x.into() { + Offset::I32(x) => enc::xstore16le_offset32(sink, r, x, src), + Offset::U8(x) => enc::xstore16le_offset8(sink, r, x, src), + }, E::Big => enc::xstore16be_offset32(sink, r, x, src), }, I32 => match endian { - E::Little => enc::xstore32le_offset32(sink, r, x, src), + E::Little => match x.into() { + Offset::I32(x) => enc::xstore32le_offset32(sink, r, x, src), + Offset::U8(x) => enc::xstore32le_offset8(sink, r, x, src), + }, E::Big => enc::xstore32be_offset32(sink, r, x, src), }, I64 => match endian { - E::Little => enc::xstore64le_offset32(sink, r, x, src), + E::Little => match x.into() { + Offset::I32(x) => enc::xstore64le_offset32(sink, r, x, src), + Offset::U8(x) => enc::xstore64le_offset8(sink, r, x, src), + }, E::Big => enc::xstore64be_offset32(sink, r, x, src), }, _ => unimplemented!("xstore ty={ty:?}"), diff --git a/cranelift/filetests/filetests/isa/pulley32/call.clif b/cranelift/filetests/filetests/isa/pulley32/call.clif index d2f30e8e9a68..7e6b5c3fb4e9 100644 --- a/cranelift/filetests/filetests/isa/pulley32/call.clif +++ b/cranelift/filetests/filetests/isa/pulley32/call.clif @@ -163,12 +163,12 @@ block0: ; push_frame ; stack_alloc32 48 ; xconst8 x15, 0 -; xstore64le_offset32 sp, 0, x15 -; xstore64le_offset32 sp, 8, x15 -; xstore64le_offset32 sp, 16, x15 -; xstore64le_offset32 sp, 24, x15 -; xstore64le_offset32 sp, 32, x15 -; xstore64le_offset32 sp, 40, x15 +; xstore64le_offset8 sp, 0, x15 +; xstore64le_offset8 sp, 8, x15 +; xstore64le_offset8 sp, 16, x15 +; xstore64le_offset8 sp, 24, x15 +; xstore64le_offset8 sp, 32, x15 +; xstore64le_offset8 sp, 40, x15 ; xmov x0, x15 ; xmov x1, x15 ; xmov x2, x15 @@ -184,7 +184,7 @@ block0: ; xmov x12, x15 ; xmov x13, x15 ; xmov x14, x15 -; call 0x0 // target = 0x60 +; call 0x0 // target = 0x4e ; stack_free32 48 ; pop_frame ; ret @@ -284,22 +284,22 @@ block0: ; Disassembled: ; push_frame ; stack_alloc32 112 -; xstore64le_offset32 sp, 104, x17 -; xstore64le_offset32 sp, 96, x18 -; xstore64le_offset32 sp, 88, x20 -; xstore64le_offset32 sp, 80, x21 -; xstore64le_offset32 sp, 72, x22 -; xstore64le_offset32 sp, 64, x23 -; xstore64le_offset32 sp, 56, x29 +; xstore64le_offset8 sp, 104, x17 +; xstore64le_offset8 sp, 96, x18 +; xstore64le_offset8 sp, 88, x20 +; xstore64le_offset8 sp, 80, x21 +; xstore64le_offset8 sp, 72, x22 +; xstore64le_offset8 sp, 64, x23 +; xstore64le_offset8 sp, 56, x29 ; xmov x0, sp -; call 0x0 // target = 0x3a +; call 0x0 // target = 0x25 ; xmov x20, x13 ; xmov x22, x11 -; xload64le_offset32 x29, sp, 0 -; xload64le_offset32 x11, sp, 8 -; xload64le_offset32 x13, sp, 16 -; xload64le_offset32 x21, sp, 24 -; xload64le_offset32 x23, sp, 32 +; xload64le_offset8 x29, sp, 0 +; xload64le_offset8 x11, sp, 8 +; xload64le_offset8 x13, sp, 16 +; xload64le_offset8 x21, sp, 24 +; xload64le_offset8 x23, sp, 32 ; xadd64 x18, x0, x1 ; xadd64 x17, x2, x3 ; xadd64 x5, x4, x5 @@ -325,13 +325,13 @@ block0: ; xadd64 x14, x0, x14 ; xadd64 x13, x13, x13 ; xadd64 x0, x14, x13 -; xload64le_offset32 x17, sp, 104 -; xload64le_offset32 x18, sp, 96 -; xload64le_offset32 x20, sp, 88 -; xload64le_offset32 x21, sp, 80 -; xload64le_offset32 x22, sp, 72 -; xload64le_offset32 x23, sp, 64 -; xload64le_offset32 x29, sp, 56 +; xload64le_offset8 x17, sp, 104 +; xload64le_offset8 x18, sp, 96 +; xload64le_offset8 x20, sp, 88 +; xload64le_offset8 x21, sp, 80 +; xload64le_offset8 x22, sp, 72 +; xload64le_offset8 x23, sp, 64 +; xload64le_offset8 x29, sp, 56 ; stack_free32 112 ; pop_frame ; ret diff --git a/cranelift/filetests/filetests/isa/pulley32/load.clif b/cranelift/filetests/filetests/isa/pulley32/load.clif index 82cc4c52aac8..3fe058b2e6ba 100644 --- a/cranelift/filetests/filetests/isa/pulley32/load.clif +++ b/cranelift/filetests/filetests/isa/pulley32/load.clif @@ -13,7 +13,7 @@ block0(v0: i32): ; ret ; ; Disassembled: -; xload32le_offset32 x0, x0, 0 +; xload32le_offset8 x0, x0, 0 ; ret function %load_i64(i32) -> i64 { @@ -28,7 +28,7 @@ block0(v0: i32): ; ret ; ; Disassembled: -; xload64le_offset32 x0, x0, 0 +; xload64le_offset8 x0, x0, 0 ; ret function %load_i32_with_offset(i32) -> i32 { @@ -43,7 +43,7 @@ block0(v0: i32): ; ret ; ; Disassembled: -; xload32le_offset32 x0, x0, 4 +; xload32le_offset8 x0, x0, 4 ; ret function %load_i64_with_offset(i32) -> i64 { @@ -58,6 +58,6 @@ block0(v0: i32): ; ret ; ; Disassembled: -; xload64le_offset32 x0, x0, 8 +; xload64le_offset8 x0, x0, 8 ; ret diff --git a/cranelift/filetests/filetests/isa/pulley32/store.clif b/cranelift/filetests/filetests/isa/pulley32/store.clif index 5f87a2c2491d..90168ed0564f 100644 --- a/cranelift/filetests/filetests/isa/pulley32/store.clif +++ b/cranelift/filetests/filetests/isa/pulley32/store.clif @@ -13,7 +13,7 @@ block0(v0: i32, v1: i32): ; ret ; ; Disassembled: -; xstore32le_offset32 x1, 0, x0 +; xstore32le_offset8 x1, 0, x0 ; ret function %store_i64(i64, i32) { @@ -28,7 +28,7 @@ block0(v0: i64, v1: i32): ; ret ; ; Disassembled: -; xstore64le_offset32 x1, 0, x0 +; xstore64le_offset8 x1, 0, x0 ; ret function %store_i32_with_offset(i32, i32) { @@ -43,7 +43,7 @@ block0(v0: i32, v1: i32): ; ret ; ; Disassembled: -; xstore32le_offset32 x1, 4, x0 +; xstore32le_offset8 x1, 4, x0 ; ret function %store_i64_with_offset(i64, i32) { @@ -58,6 +58,6 @@ block0(v0: i64, v1: i32): ; ret ; ; Disassembled: -; xstore64le_offset32 x1, 8, x0 +; xstore64le_offset8 x1, 8, x0 ; ret diff --git a/cranelift/filetests/filetests/isa/pulley64/call.clif b/cranelift/filetests/filetests/isa/pulley64/call.clif index 13169a80a3e0..e876894e5e16 100644 --- a/cranelift/filetests/filetests/isa/pulley64/call.clif +++ b/cranelift/filetests/filetests/isa/pulley64/call.clif @@ -163,12 +163,12 @@ block0: ; push_frame ; stack_alloc32 48 ; xconst8 x15, 0 -; xstore64le_offset32 sp, 0, x15 -; xstore64le_offset32 sp, 8, x15 -; xstore64le_offset32 sp, 16, x15 -; xstore64le_offset32 sp, 24, x15 -; xstore64le_offset32 sp, 32, x15 -; xstore64le_offset32 sp, 40, x15 +; xstore64le_offset8 sp, 0, x15 +; xstore64le_offset8 sp, 8, x15 +; xstore64le_offset8 sp, 16, x15 +; xstore64le_offset8 sp, 24, x15 +; xstore64le_offset8 sp, 32, x15 +; xstore64le_offset8 sp, 40, x15 ; xmov x0, x15 ; xmov x1, x15 ; xmov x2, x15 @@ -184,7 +184,7 @@ block0: ; xmov x12, x15 ; xmov x13, x15 ; xmov x14, x15 -; call 0x0 // target = 0x60 +; call 0x0 // target = 0x4e ; stack_free32 48 ; pop_frame ; ret @@ -284,22 +284,22 @@ block0: ; Disassembled: ; push_frame ; stack_alloc32 112 -; xstore64le_offset32 sp, 104, x17 -; xstore64le_offset32 sp, 96, x18 -; xstore64le_offset32 sp, 88, x20 -; xstore64le_offset32 sp, 80, x21 -; xstore64le_offset32 sp, 72, x22 -; xstore64le_offset32 sp, 64, x23 -; xstore64le_offset32 sp, 56, x29 +; xstore64le_offset8 sp, 104, x17 +; xstore64le_offset8 sp, 96, x18 +; xstore64le_offset8 sp, 88, x20 +; xstore64le_offset8 sp, 80, x21 +; xstore64le_offset8 sp, 72, x22 +; xstore64le_offset8 sp, 64, x23 +; xstore64le_offset8 sp, 56, x29 ; xmov x0, sp -; call 0x0 // target = 0x3a +; call 0x0 // target = 0x25 ; xmov x20, x13 ; xmov x22, x11 -; xload64le_offset32 x29, sp, 0 -; xload64le_offset32 x11, sp, 8 -; xload64le_offset32 x13, sp, 16 -; xload64le_offset32 x21, sp, 24 -; xload64le_offset32 x23, sp, 32 +; xload64le_offset8 x29, sp, 0 +; xload64le_offset8 x11, sp, 8 +; xload64le_offset8 x13, sp, 16 +; xload64le_offset8 x21, sp, 24 +; xload64le_offset8 x23, sp, 32 ; xadd64 x18, x0, x1 ; xadd64 x17, x2, x3 ; xadd64 x5, x4, x5 @@ -325,13 +325,13 @@ block0: ; xadd64 x14, x0, x14 ; xadd64 x13, x13, x13 ; xadd64 x0, x14, x13 -; xload64le_offset32 x17, sp, 104 -; xload64le_offset32 x18, sp, 96 -; xload64le_offset32 x20, sp, 88 -; xload64le_offset32 x21, sp, 80 -; xload64le_offset32 x22, sp, 72 -; xload64le_offset32 x23, sp, 64 -; xload64le_offset32 x29, sp, 56 +; xload64le_offset8 x17, sp, 104 +; xload64le_offset8 x18, sp, 96 +; xload64le_offset8 x20, sp, 88 +; xload64le_offset8 x21, sp, 80 +; xload64le_offset8 x22, sp, 72 +; xload64le_offset8 x23, sp, 64 +; xload64le_offset8 x29, sp, 56 ; stack_free32 112 ; pop_frame ; ret @@ -411,14 +411,14 @@ block0: ; push_frame ; stack_alloc32 64 ; xconst8 x15, 0 -; xstore64le_offset32 sp, 0, x15 -; xstore64le_offset32 sp, 8, x15 -; xstore64le_offset32 sp, 16, x15 -; xstore64le_offset32 sp, 24, x15 -; xstore64le_offset32 sp, 32, x15 -; xstore64le_offset32 sp, 40, x15 -; xstore64le_offset32 sp, 48, x15 -; xstore64le_offset32 sp, 56, x15 +; xstore64le_offset8 sp, 0, x15 +; xstore64le_offset8 sp, 8, x15 +; xstore64le_offset8 sp, 16, x15 +; xstore64le_offset8 sp, 24, x15 +; xstore64le_offset8 sp, 32, x15 +; xstore64le_offset8 sp, 40, x15 +; xstore64le_offset8 sp, 48, x15 +; xstore64le_offset8 sp, 56, x15 ; xmov x0, x15 ; xmov x1, x15 ; xmov x2, x15 @@ -434,7 +434,7 @@ block0: ; xmov x12, x15 ; xmov x13, x15 ; xmov x14, x15 -; call 0x0 // target = 0x6e +; call 0x0 // target = 0x56 ; stack_free32 64 ; pop_frame ; ret diff --git a/cranelift/filetests/filetests/isa/pulley64/load.clif b/cranelift/filetests/filetests/isa/pulley64/load.clif index e91b1fb5d39f..aa6d826f212e 100644 --- a/cranelift/filetests/filetests/isa/pulley64/load.clif +++ b/cranelift/filetests/filetests/isa/pulley64/load.clif @@ -1,6 +1,96 @@ test compile precise-output target pulley64 +function %load_i8(i64) -> i8 { +block0(v0: i64): + v1 = load.i8 v0 + return v1 +} + +; VCode: +; block0: +; x0 = xload8 x0+0 // flags = +; ret +; +; Disassembled: +; xload8_u32_offset8 x0, x0, 0 +; ret + +function %load_i8_s32(i64) -> i32 { +block0(v0: i64): + v1 = sload8.i32 v0 + return v1 +} + +; VCode: +; block0: +; x0 = xload8_s32 x0+0 // flags = +; ret +; +; Disassembled: +; xload8_s32_offset8 x0, x0, 0 +; ret + +function %load_i8_u32(i64) -> i32 { +block0(v0: i64): + v1 = uload8.i32 v0 + return v1 +} + +; VCode: +; block0: +; x0 = xload8_u32 x0+0 // flags = +; ret +; +; Disassembled: +; xload8_u32_offset8 x0, x0, 0 +; ret + +function %load_i16(i64) -> i16 { +block0(v0: i64): + v1 = load.i16 v0 + return v1 +} + +; VCode: +; block0: +; x0 = xload16 x0+0 // flags = +; ret +; +; Disassembled: +; xload16le_u32_offset8 x0, x0, 0 +; ret + +function %load_i16_s32(i64) -> i32 { +block0(v0: i64): + v1 = sload16.i32 v0 + return v1 +} + +; VCode: +; block0: +; x0 = xload16_s32 x0+0 // flags = +; ret +; +; Disassembled: +; xload16le_s32_offset8 x0, x0, 0 +; ret + +function %load_i16_u32(i64) -> i32 { +block0(v0: i64): + v1 = uload16.i32 v0 + return v1 +} + +; VCode: +; block0: +; x0 = xload16_u32 x0+0 // flags = +; ret +; +; Disassembled: +; xload16le_u32_offset8 x0, x0, 0 +; ret + function %load_i32(i64) -> i32 { block0(v0: i64): v1 = load.i32 v0 @@ -13,7 +103,37 @@ block0(v0: i64): ; ret ; ; Disassembled: -; xload32le_offset32 x0, x0, 0 +; xload32le_offset8 x0, x0, 0 +; ret + +function %load_i32_s64(i64) -> i64 { +block0(v0: i64): + v1 = sload32.i64 v0 + return v1 +} + +; VCode: +; block0: +; x0 = xload32_s64 x0+0 // flags = +; ret +; +; Disassembled: +; xload32le_s64_offset8 x0, x0, 0 +; ret + +function %load_i32_u64(i64) -> i64 { +block0(v0: i64): + v1 = uload32.i64 v0 + return v1 +} + +; VCode: +; block0: +; x0 = xload32_u64 x0+0 // flags = +; ret +; +; Disassembled: +; xload32le_u64_offset8 x0, x0, 0 ; ret function %load_i64(i64) -> i64 { @@ -28,10 +148,100 @@ block0(v0: i64): ; ret ; ; Disassembled: -; xload64le_offset32 x0, x0, 0 +; xload64le_offset8 x0, x0, 0 +; ret + +function %load_i8_offset(i64) -> i8 { +block0(v0: i64): + v1 = load.i8 v0+4 + return v1 +} + +; VCode: +; block0: +; x0 = xload8 x0+4 // flags = +; ret +; +; Disassembled: +; xload8_u32_offset8 x0, x0, 4 +; ret + +function %load_i8_s32_offset(i64) -> i32 { +block0(v0: i64): + v1 = sload8.i32 v0+4 + return v1 +} + +; VCode: +; block0: +; x0 = xload8_s32 x0+4 // flags = +; ret +; +; Disassembled: +; xload8_s32_offset8 x0, x0, 4 +; ret + +function %load_i8_u32_offset(i64) -> i32 { +block0(v0: i64): + v1 = uload8.i32 v0+4 + return v1 +} + +; VCode: +; block0: +; x0 = xload8_u32 x0+4 // flags = +; ret +; +; Disassembled: +; xload8_u32_offset8 x0, x0, 4 +; ret + +function %load_i16_offset(i64) -> i16 { +block0(v0: i64): + v1 = load.i16 v0+4 + return v1 +} + +; VCode: +; block0: +; x0 = xload16 x0+4 // flags = +; ret +; +; Disassembled: +; xload16le_u32_offset8 x0, x0, 4 +; ret + +function %load_i16_s32_offset(i64) -> i32 { +block0(v0: i64): + v1 = sload16.i32 v0+4 + return v1 +} + +; VCode: +; block0: +; x0 = xload16_s32 x0+4 // flags = +; ret +; +; Disassembled: +; xload16le_s32_offset8 x0, x0, 4 ; ret -function %load_i32_with_offset(i64) -> i32 { +function %load_i16_u32_offset(i64) -> i32 { +block0(v0: i64): + v1 = uload16.i32 v0+4 + return v1 +} + +; VCode: +; block0: +; x0 = xload16_u32 x0+4 // flags = +; ret +; +; Disassembled: +; xload16le_u32_offset8 x0, x0, 4 +; ret + +function %load_i32_offset(i64) -> i32 { block0(v0: i64): v1 = load.i32 v0+4 return v1 @@ -43,24 +253,203 @@ block0(v0: i64): ; ret ; ; Disassembled: -; xload32le_offset32 x0, x0, 4 +; xload32le_offset8 x0, x0, 4 +; ret + +function %load_i32_s64_offset(i64) -> i64 { +block0(v0: i64): + v1 = sload32.i64 v0+4 + return v1 +} + +; VCode: +; block0: +; x0 = xload32_s64 x0+4 // flags = +; ret +; +; Disassembled: +; xload32le_s64_offset8 x0, x0, 4 +; ret + +function %load_i32_u64_offset(i64) -> i64 { +block0(v0: i64): + v1 = uload32.i64 v0+4 + return v1 +} + +; VCode: +; block0: +; x0 = xload32_u64 x0+4 // flags = +; ret +; +; Disassembled: +; xload32le_u64_offset8 x0, x0, 4 +; ret + +function %load_i64_offset(i64) -> i64 { +block0(v0: i64): + v1 = load.i64 v0+65536 + return v1 +} + +; VCode: +; block0: +; x0 = xload64 x0+65536 // flags = +; ret +; +; Disassembled: +; xload64le_offset32 x0, x0, 65536 +; ret + +function %load_i8_big_offset(i64) -> i8 { +block0(v0: i64): + v1 = load.i8 v0+65536 + return v1 +} + +; VCode: +; block0: +; x0 = xload8 x0+65536 // flags = +; ret +; +; Disassembled: +; xload8_u32_offset32 x0, x0, 65536 +; ret + +function %load_i8_s32_big_offset(i64) -> i32 { +block0(v0: i64): + v1 = sload8.i32 v0+65536 + return v1 +} + +; VCode: +; block0: +; x0 = xload8_s32 x0+65536 // flags = +; ret +; +; Disassembled: +; xload8_s32_offset32 x0, x0, 65536 +; ret + +function %load_i8_u32_big_offset(i64) -> i32 { +block0(v0: i64): + v1 = uload8.i32 v0+65536 + return v1 +} + +; VCode: +; block0: +; x0 = xload8_u32 x0+65536 // flags = +; ret +; +; Disassembled: +; xload8_u32_offset32 x0, x0, 65536 +; ret + +function %load_i16_big_offset(i64) -> i16 { +block0(v0: i64): + v1 = load.i16 v0+65536 + return v1 +} + +; VCode: +; block0: +; x0 = xload16 x0+65536 // flags = +; ret +; +; Disassembled: +; xload16le_u32_offset32 x0, x0, 65536 +; ret + +function %load_i16_s32_big_offset(i64) -> i32 { +block0(v0: i64): + v1 = sload16.i32 v0+65536 + return v1 +} + +; VCode: +; block0: +; x0 = xload16_s32 x0+65536 // flags = +; ret +; +; Disassembled: +; xload16le_s32_offset32 x0, x0, 65536 ; ret -function %load_i64_with_offset(i64) -> i64 { +function %load_i16_u32_big_offset(i64) -> i32 { block0(v0: i64): - v1 = load.i64 v0+8 + v1 = uload16.i32 v0+65536 return v1 } ; VCode: ; block0: -; x0 = xload64 x0+8 // flags = +; x0 = xload16_u32 x0+65536 // flags = ; ret ; ; Disassembled: -; xload64le_offset32 x0, x0, 8 +; xload16le_u32_offset32 x0, x0, 65536 ; ret +function %load_i32_big_offset(i64) -> i32 { +block0(v0: i64): + v1 = load.i32 v0+65536 + return v1 +} + +; VCode: +; block0: +; x0 = xload32 x0+65536 // flags = +; ret +; +; Disassembled: +; xload32le_offset32 x0, x0, 65536 +; ret + +function %load_i32_s64_big_offset(i64) -> i64 { +block0(v0: i64): + v1 = sload32.i64 v0+65536 + return v1 +} + +; VCode: +; block0: +; x0 = xload32_s64 x0+65536 // flags = +; ret +; +; Disassembled: +; xload32le_s64_offset32 x0, x0, 65536 +; ret + +function %load_i32_u64_big_offset(i64) -> i64 { +block0(v0: i64): + v1 = uload32.i64 v0+65536 + return v1 +} + +; VCode: +; block0: +; x0 = xload32_u64 x0+65536 // flags = +; ret +; +; Disassembled: +; xload32le_u64_offset32 x0, x0, 65536 +; ret + +function %load_i64_big_offset(i64) -> i64 { +block0(v0: i64): + v1 = load.i64 v0+65536 + return v1 +} + +; VCode: +; block0: +; x0 = xload64 x0+65536 // flags = +; ret +; +; Disassembled: +; xload64le_offset32 x0, x0, 65536 +; ret function %load_i64_with_add_and_offset(i64) -> i64 { block0(v0: i64): @@ -75,6 +464,6 @@ block0(v0: i64): ; ret ; ; Disassembled: -; xload64le_offset32 x0, x0, 18 +; xload64le_offset8 x0, x0, 18 ; ret diff --git a/cranelift/filetests/filetests/isa/pulley64/store.clif b/cranelift/filetests/filetests/isa/pulley64/store.clif index 67cdf9763aa4..a6cb23589b7f 100644 --- a/cranelift/filetests/filetests/isa/pulley64/store.clif +++ b/cranelift/filetests/filetests/isa/pulley64/store.clif @@ -13,7 +13,7 @@ block0(v0: i32, v1: i64): ; ret ; ; Disassembled: -; xstore32le_offset32 x1, 0, x0 +; xstore32le_offset8 x1, 0, x0 ; ret function %store_i64(i64, i64) { @@ -28,7 +28,7 @@ block0(v0: i64, v1: i64): ; ret ; ; Disassembled: -; xstore64le_offset32 x1, 0, x0 +; xstore64le_offset8 x1, 0, x0 ; ret function %store_i32_with_offset(i32, i64) { @@ -43,7 +43,7 @@ block0(v0: i32, v1: i64): ; ret ; ; Disassembled: -; xstore32le_offset32 x1, 4, x0 +; xstore32le_offset8 x1, 4, x0 ; ret function %store_i64_with_offset(i64, i64) { @@ -58,6 +58,6 @@ block0(v0: i64, v1: i64): ; ret ; ; Disassembled: -; xstore64le_offset32 x1, 8, x0 +; xstore64le_offset8 x1, 8, x0 ; ret diff --git a/pulley/src/interp.rs b/pulley/src/interp.rs index 043806f0f363..03a9367ba505 100644 --- a/pulley/src/interp.rs +++ b/pulley/src/interp.rs @@ -1457,6 +1457,70 @@ impl OpVisitor for Interpreter<'_> { ControlFlow::Continue(()) } + fn xload8_u32_offset8(&mut self, dst: XReg, ptr: XReg, offset: u8) -> ControlFlow { + self.xload8_u32_offset32(dst, ptr, offset.into()) + } + + fn xload8_s32_offset8(&mut self, dst: XReg, ptr: XReg, offset: u8) -> ControlFlow { + self.xload8_s32_offset32(dst, ptr, offset.into()) + } + + fn xload16le_u32_offset8(&mut self, dst: XReg, ptr: XReg, offset: u8) -> ControlFlow { + self.xload16le_u32_offset32(dst, ptr, offset.into()) + } + + fn xload16le_s32_offset8(&mut self, dst: XReg, ptr: XReg, offset: u8) -> ControlFlow { + self.xload16le_s32_offset32(dst, ptr, offset.into()) + } + + fn xload32le_offset8(&mut self, dst: XReg, ptr: XReg, offset: u8) -> ControlFlow { + self.xload32le_offset32(dst, ptr, offset.into()) + } + + fn xload8_u64_offset8(&mut self, dst: XReg, ptr: XReg, offset: u8) -> ControlFlow { + self.xload8_u64_offset32(dst, ptr, offset.into()) + } + + fn xload8_s64_offset8(&mut self, dst: XReg, ptr: XReg, offset: u8) -> ControlFlow { + self.xload8_s64_offset32(dst, ptr, offset.into()) + } + + fn xload16le_u64_offset8(&mut self, dst: XReg, ptr: XReg, offset: u8) -> ControlFlow { + self.xload16le_u64_offset32(dst, ptr, offset.into()) + } + + fn xload16le_s64_offset8(&mut self, dst: XReg, ptr: XReg, offset: u8) -> ControlFlow { + self.xload16le_s64_offset32(dst, ptr, offset.into()) + } + + fn xload32le_u64_offset8(&mut self, dst: XReg, ptr: XReg, offset: u8) -> ControlFlow { + self.xload32le_u64_offset32(dst, ptr, offset.into()) + } + + fn xload32le_s64_offset8(&mut self, dst: XReg, ptr: XReg, offset: u8) -> ControlFlow { + self.xload32le_s64_offset32(dst, ptr, offset.into()) + } + + fn xload64le_offset8(&mut self, dst: XReg, ptr: XReg, offset: u8) -> ControlFlow { + self.xload64le_offset32(dst, ptr, offset.into()) + } + + fn xstore8_offset8(&mut self, ptr: XReg, offset: u8, src: XReg) -> ControlFlow { + self.xstore8_offset32(ptr, offset.into(), src) + } + + fn xstore16le_offset8(&mut self, ptr: XReg, offset: u8, src: XReg) -> ControlFlow { + self.xstore16le_offset32(ptr, offset.into(), src) + } + + fn xstore32le_offset8(&mut self, ptr: XReg, offset: u8, src: XReg) -> ControlFlow { + self.xstore32le_offset32(ptr, offset.into(), src) + } + + fn xstore64le_offset8(&mut self, ptr: XReg, offset: u8, src: XReg) -> ControlFlow { + self.xstore64le_offset32(ptr, offset.into(), src) + } + fn xload8_u32_offset32(&mut self, dst: XReg, ptr: XReg, offset: i32) -> ControlFlow { let val = unsafe { self.load::(ptr, offset) }; self.state[dst].set_u32(val.into()); diff --git a/pulley/src/lib.rs b/pulley/src/lib.rs index ffb1c6940f11..aaccb54d6c00 100644 --- a/pulley/src/lib.rs +++ b/pulley/src/lib.rs @@ -299,6 +299,41 @@ macro_rules! for_each_op { /// `*(ptr + offset) = low64(src)` xstore64le_offset32 = XStore64LeOffset32 { ptr: XReg, offset: i32, src: XReg }; + /// `low32(dst) = zext(*(ptr + offset))` + xload8_u32_offset8 = XLoad8U32Offset8 { dst: XReg, ptr: XReg, offset: u8 }; + /// `low32(dst) = sext(*(ptr + offset))` + xload8_s32_offset8 = XLoad8S32Offset8 { dst: XReg, ptr: XReg, offset: u8 }; + /// `low32(dst) = zext(*(ptr + offset))` + xload16le_u32_offset8 = XLoad16LeU32Offset8 { dst: XReg, ptr: XReg, offset: u8 }; + /// `low32(dst) = sext(*(ptr + offset))` + xload16le_s32_offset8 = XLoad16LeS32Offset8 { dst: XReg, ptr: XReg, offset: u8 }; + /// `low32(dst) = *(ptr + offset)` + xload32le_offset8 = XLoad32LeOffset8 { dst: XReg, ptr: XReg, offset: u8 }; + + /// `dst = zext(*(ptr + offset))` + xload8_u64_offset8 = XLoad8U64Offset8 { dst: XReg, ptr: XReg, offset: u8 }; + /// `dst = sext(*(ptr + offset))` + xload8_s64_offset8 = XLoad8S64Offset8 { dst: XReg, ptr: XReg, offset: u8 }; + /// `dst = zext(*(ptr + offset))` + xload16le_u64_offset8 = XLoad16LeU64Offset8 { dst: XReg, ptr: XReg, offset: u8 }; + /// `dst = sext(*(ptr + offset))` + xload16le_s64_offset8 = XLoad16LeS64Offset8 { dst: XReg, ptr: XReg, offset: u8 }; + /// `dst = zext(*(ptr + offset))` + xload32le_u64_offset8 = XLoad32LeU64Offset8 { dst: XReg, ptr: XReg, offset: u8 }; + /// `dst = sext(*(ptr + offset))` + xload32le_s64_offset8 = XLoad32LeS64Offset8 { dst: XReg, ptr: XReg, offset: u8 }; + /// `dst = *(ptr + offset)` + xload64le_offset8 = XLoad64LeOffset8 { dst: XReg, ptr: XReg, offset: u8 }; + + /// `*(ptr + offset) = low8(src)` + xstore8_offset8 = XStore8Offset8 { ptr: XReg, offset: u8, src: XReg }; + /// `*(ptr + offset) = low16(src)` + xstore16le_offset8 = XStore16LeOffset8 { ptr: XReg, offset: u8, src: XReg }; + /// `*(ptr + offset) = low32(src)` + xstore32le_offset8 = XStore32LeOffset8 { ptr: XReg, offset: u8, src: XReg }; + /// `*(ptr + offset) = low64(src)` + xstore64le_offset8 = XStore64LeOffset8 { ptr: XReg, offset: u8, src: XReg }; + /// `push lr; push fp; fp = sp` push_frame = PushFrame ; /// `sp = fp; pop fp; pop lr` diff --git a/tests/disas/pulley/call.wat b/tests/disas/pulley/call.wat index 57f6f28d4349..05340bd9d936 100644 --- a/tests/disas/pulley/call.wat +++ b/tests/disas/pulley/call.wat @@ -7,9 +7,9 @@ ) ;; wasm[0]::function[1]: ;; push_frame -;; xload32le_offset32 x3, x0, 44 +;; xload32le_offset8 x3, x0, 44 ;; xmov x6, x0 -;; xload32le_offset32 x0, x6, 52 +;; xload32le_offset8 x0, x6, 52 ;; xmov x1, x6 ;; call_indirect x3 ;; pop_frame diff --git a/tests/disas/pulley/epoch-simple.wat b/tests/disas/pulley/epoch-simple.wat index 8a138229344c..763aaad534aa 100644 --- a/tests/disas/pulley/epoch-simple.wat +++ b/tests/disas/pulley/epoch-simple.wat @@ -7,12 +7,12 @@ ) ;; wasm[0]::function[0]: ;; push_frame -;; xload64le_offset32 x6, x0, 8 -;; xload64le_offset32 x7, x0, 32 -;; xload64le_offset32 x7, x7, 0 -;; xload64le_offset32 x6, x6, 8 -;; br_if_xulteq64 x6, x7, 0x9 // target = 0x26 -;; 24: pop_frame +;; xload64le_offset8 x6, x0, 8 +;; xload64le_offset8 x7, x0, 32 +;; xload64le_offset8 x7, x7, 0 +;; xload64le_offset8 x6, x6, 8 +;; br_if_xulteq64 x6, x7, 0x9 // target = 0x1a +;; 18: pop_frame ;; ret -;; 26: call 0xbf // target = 0xe5 -;; 2b: jump 0xfffffffffffffff9 // target = 0x24 +;; 1a: call 0xa4 // target = 0xbe +;; 1f: jump 0xfffffffffffffff9 // target = 0x18 diff --git a/tests/disas/pulley/memory-inbounds.wat b/tests/disas/pulley/memory-inbounds.wat index a6cea4efc631..f4a7d6fbffa2 100644 --- a/tests/disas/pulley/memory-inbounds.wat +++ b/tests/disas/pulley/memory-inbounds.wat @@ -20,99 +20,99 @@ ;; wasm[0]::function[0]::offset0: ;; push_frame -;; xload64le_offset32 x3, x0, 96 -;; xload32le_offset32 x0, x3, 0 +;; xload64le_offset8 x3, x0, 96 +;; xload32le_offset8 x0, x3, 0 ;; pop_frame ;; ret ;; ;; wasm[0]::function[1]::offset100: ;; push_frame -;; xload64le_offset32 x3, x0, 96 -;; xload32le_offset32 x0, x3, 100 +;; xload64le_offset8 x3, x0, 96 +;; xload32le_offset8 x0, x3, 100 ;; pop_frame ;; ret ;; ;; wasm[0]::function[2]::offset_mixed: ;; push_frame -;; xload64le_offset32 x3, x0, 96 -;; xload32le_offset32 x0, x3, 200 +;; xload64le_offset8 x3, x0, 96 +;; xload32le_offset8 x0, x3, 200 ;; pop_frame ;; ret ;; ;; wasm[0]::function[3]::offset_just_ok: ;; push_frame -;; xload64le_offset32 x3, x0, 96 +;; xload64le_offset8 x3, x0, 96 ;; xload32le_offset32 x0, x3, 65532 ;; pop_frame ;; ret ;; ;; wasm[0]::function[4]::offset_just_bad: ;; push_frame -;; xload64le_offset32 x6, x0, 104 +;; xload64le_offset8 x6, x0, 104 ;; xsub64_u8 x6, x6, 4 ;; xconst32 x7, 65533 -;; br_if_xult64 x6, x7, 0x17 // target = 0x29 -;; 19: xload64le_offset32 x7, x0, 96 +;; br_if_xult64 x6, x7, 0x14 // target = 0x23 +;; 16: xload64le_offset8 x7, x0, 96 ;; xload32le_offset32 x0, x7, 65533 ;; pop_frame ;; ret -;; 29: trap +;; 23: trap ;; ;; wasm[0]::function[5]::offset_just_ok_v2: ;; push_frame -;; xload64le_offset32 x3, x0, 96 +;; xload64le_offset8 x3, x0, 96 ;; xload32le_offset32 x0, x3, 65532 ;; pop_frame ;; ret ;; ;; wasm[0]::function[6]::offset_just_bad_v2: ;; push_frame -;; xload64le_offset32 x6, x0, 104 +;; xload64le_offset8 x6, x0, 104 ;; xsub64_u32 x6, x6, 65536 ;; xconst8 x7, 0 -;; br_if_xeq64 x6, x7, 0x17 // target = 0x29 -;; 19: xload64le_offset32 x7, x0, 96 +;; br_if_xeq64 x6, x7, 0x14 // target = 0x23 +;; 16: xload64le_offset8 x7, x0, 96 ;; xload32le_offset32 x0, x7, 65533 ;; pop_frame ;; ret -;; 29: trap +;; 23: trap ;; ;; wasm[0]::function[7]::maybe_inbounds: ;; push_frame -;; xload64le_offset32 x6, x0, 104 +;; xload64le_offset8 x6, x0, 104 ;; xsub64_u8 x6, x6, 4 ;; xconst32 x7, 131068 -;; br_if_xult64 x6, x7, 0x17 // target = 0x29 -;; 19: xload64le_offset32 x7, x0, 96 +;; br_if_xult64 x6, x7, 0x14 // target = 0x23 +;; 16: xload64le_offset8 x7, x0, 96 ;; xload32le_offset32 x0, x7, 131068 ;; pop_frame ;; ret -;; 29: trap +;; 23: trap ;; ;; wasm[0]::function[8]::maybe_inbounds_v2: ;; push_frame ;; xconst8 x7, 0 ;; xconst32 x8, 131072 ;; xadd64_uoverflow_trap x7, x7, x8 -;; xload64le_offset32 x8, x0, 104 -;; br_if_xult64 x8, x7, 0x17 // target = 0x2d -;; 1d: xload64le_offset32 x8, x0, 96 +;; xload64le_offset8 x8, x0, 104 +;; br_if_xult64 x8, x7, 0x14 // target = 0x27 +;; 1a: xload64le_offset8 x8, x0, 96 ;; xload32le_offset32 x0, x8, 131068 ;; pop_frame ;; ret -;; 2d: trap +;; 27: trap ;; ;; wasm[0]::function[9]::never_inbounds: ;; push_frame -;; xload64le_offset32 x6, x0, 104 +;; xload64le_offset8 x6, x0, 104 ;; xsub64_u8 x6, x6, 4 ;; xconst32 x7, 131069 -;; br_if_xult64 x6, x7, 0x17 // target = 0x29 -;; 19: xload64le_offset32 x7, x0, 96 +;; br_if_xult64 x6, x7, 0x14 // target = 0x23 +;; 16: xload64le_offset8 x7, x0, 96 ;; xload32le_offset32 x0, x7, 131069 ;; pop_frame ;; ret -;; 29: trap +;; 23: trap ;; ;; wasm[0]::function[10]::never_inbounds_v2: ;; push_frame