diff --git a/cranelift/codegen/src/isa/pulley_shared/inst.isle b/cranelift/codegen/src/isa/pulley_shared/inst.isle
index ab0d39a96a16..2f2539917a92 100644
--- a/cranelift/codegen/src/isa/pulley_shared/inst.isle
+++ b/cranelift/codegen/src/isa/pulley_shared/inst.isle
@@ -78,7 +78,7 @@
;; control behavior such as endianness.
(XLoad (dst WritableXReg) (mem Amode) (ty Type) (flags MemFlags) (ext ExtKind))
(FLoad (dst WritableFReg) (mem Amode) (ty Type) (flags MemFlags))
- (VLoad (dst WritableVReg) (mem Amode) (ty Type) (flags MemFlags))
+ (VLoad (dst WritableVReg) (mem Amode) (ty Type) (flags MemFlags) (ext VExtKind))
;; Stores.
(XStore (mem Amode) (src XReg) (ty Type) (flags MemFlags))
@@ -148,6 +148,8 @@
(type ExtKind (enum None Sign32 Sign64 Zero32 Zero64))
+(type VExtKind (enum None S8x8 U8x8 S16x4 U16x4 S32x2 U32x2))
+
;;;; Newtypes for Different Register Classes ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(type XReg (primitive XReg))
@@ -422,10 +424,10 @@
(rule (pulley_fstore amode src ty flags)
(SideEffectNoResult.Inst (MInst.FStore amode src ty flags)))
-(decl pulley_vload (Amode Type MemFlags) VReg)
-(rule (pulley_vload amode ty flags)
+(decl pulley_vload (Amode Type MemFlags VExtKind) VReg)
+(rule (pulley_vload amode ty flags ext)
(let ((dst WritableVReg (temp_writable_vreg))
- (_ Unit (emit (MInst.VLoad dst amode ty flags))))
+ (_ Unit (emit (MInst.VLoad dst amode ty flags ext))))
dst))
(decl pulley_vstore (Amode VReg Type MemFlags) SideEffectNoResult)
diff --git a/cranelift/codegen/src/isa/pulley_shared/inst/emit.rs b/cranelift/codegen/src/isa/pulley_shared/inst/emit.rs
index 5b0c435e83b7..642662eb8e43 100644
--- a/cranelift/codegen/src/isa/pulley_shared/inst/emit.rs
+++ b/cranelift/codegen/src/isa/pulley_shared/inst/emit.rs
@@ -393,13 +393,22 @@ fn pulley_emit
(
mem,
ty,
flags,
+ ext,
} => {
let r = mem.get_base_register().unwrap();
let x = mem.get_offset_with_state(state);
let endian = emit_info.endianness(*flags);
assert_eq!(endian, Endianness::Little);
assert_eq!(ty.bytes(), 16);
- enc::vload128le_offset32(sink, dst, r, x);
+ match ext {
+ VExtKind::None => enc::vload128le_offset32(sink, dst, r, x),
+ VExtKind::S8x8 => enc::vload8x8_s_offset32(sink, dst, r, x),
+ VExtKind::U8x8 => enc::vload8x8_u_offset32(sink, dst, r, x),
+ VExtKind::S16x4 => enc::vload16x4le_s_offset32(sink, dst, r, x),
+ VExtKind::U16x4 => enc::vload16x4le_u_offset32(sink, dst, r, x),
+ VExtKind::S32x2 => enc::vload32x2le_s_offset32(sink, dst, r, x),
+ VExtKind::U32x2 => enc::vload32x2le_u_offset32(sink, dst, r, x),
+ }
}
Inst::XStore {
diff --git a/cranelift/codegen/src/isa/pulley_shared/inst/mod.rs b/cranelift/codegen/src/isa/pulley_shared/inst/mod.rs
index bc79c4b322b8..9805d58996a2 100644
--- a/cranelift/codegen/src/isa/pulley_shared/inst/mod.rs
+++ b/cranelift/codegen/src/isa/pulley_shared/inst/mod.rs
@@ -25,6 +25,7 @@ pub use self::emit::*;
pub use crate::isa::pulley_shared::lower::isle::generated_code::MInst as Inst;
pub use crate::isa::pulley_shared::lower::isle::generated_code::RawInst;
+pub use crate::isa::pulley_shared::lower::isle::generated_code::VExtKind;
impl From for Inst {
fn from(raw: RawInst) -> Inst {
@@ -65,6 +66,7 @@ impl Inst {
mem,
ty,
flags,
+ ext: VExtKind::None,
}
} else if ty.is_int() {
Inst::XLoad {
@@ -242,6 +244,7 @@ fn pulley_get_operands(inst: &mut Inst, collector: &mut impl OperandVisitor) {
mem,
ty: _,
flags: _,
+ ext: _,
} => {
collector.reg_def(dst);
mem.get_operands(collector);
@@ -687,11 +690,12 @@ impl Inst {
mem,
ty,
flags,
+ ext,
} => {
let dst = format_reg(*dst.to_reg());
let ty = ty.bits();
let mem = mem.to_string();
- format!("{dst} = vload{ty} {mem} // flags ={flags}")
+ format!("{dst} = vload{ty}_{ext:?} {mem} // flags ={flags}")
}
Inst::VStore {
diff --git a/cranelift/codegen/src/isa/pulley_shared/lower.isle b/cranelift/codegen/src/isa/pulley_shared/lower.isle
index 0ae6935ef11c..84108d07c638 100644
--- a/cranelift/codegen/src/isa/pulley_shared/lower.isle
+++ b/cranelift/codegen/src/isa/pulley_shared/lower.isle
@@ -504,7 +504,25 @@
(pulley_xload (amode addr offset) $I32 flags (ExtKind.Sign64)))
(rule 2 (lower (has_type (ty_vec128 ty) (load flags addr offset)))
- (pulley_vload (amode addr offset) ty flags))
+ (pulley_vload (amode addr offset) ty flags (VExtKind.None)))
+
+(rule (lower (has_type ty (sload8x8 flags addr offset)))
+ (pulley_vload (amode addr offset) ty flags (VExtKind.S8x8)))
+
+(rule (lower (has_type ty (uload8x8 flags addr offset)))
+ (pulley_vload (amode addr offset) ty flags (VExtKind.U8x8)))
+
+(rule (lower (has_type ty (sload16x4 flags addr offset)))
+ (pulley_vload (amode addr offset) ty flags (VExtKind.S16x4)))
+
+(rule (lower (has_type ty (uload16x4 flags addr offset)))
+ (pulley_vload (amode addr offset) ty flags (VExtKind.U16x4)))
+
+(rule (lower (has_type ty (sload32x2 flags addr offset)))
+ (pulley_vload (amode addr offset) ty flags (VExtKind.S32x2)))
+
+(rule (lower (has_type ty (uload32x2 flags addr offset)))
+ (pulley_vload (amode addr offset) ty flags (VExtKind.U32x2)))
;;;; Rules for `store` and friends ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
diff --git a/crates/wast-util/src/lib.rs b/crates/wast-util/src/lib.rs
index 39fb051330cb..ce3647c92d09 100644
--- a/crates/wast-util/src/lib.rs
+++ b/crates/wast-util/src/lib.rs
@@ -431,7 +431,6 @@ impl WastTest {
"spec_testsuite/proposals/memory64/i16x8_relaxed_q15mulr_s.wast",
"spec_testsuite/proposals/memory64/i32x4_relaxed_trunc.wast",
"spec_testsuite/proposals/memory64/i8x16_relaxed_swizzle.wast",
- "spec_testsuite/simd_align.wast",
"spec_testsuite/simd_bitwise.wast",
"spec_testsuite/simd_boolean.wast",
"spec_testsuite/simd_conversions.wast",
diff --git a/pulley/src/interp.rs b/pulley/src/interp.rs
index 54f8ada170d8..20ce218a9139 100644
--- a/pulley/src/interp.rs
+++ b/pulley/src/interp.rs
@@ -2754,6 +2754,42 @@ impl OpVisitor for Interpreter<'_> {
self.state[dst].set_f64x2([val; 2]);
ControlFlow::Continue(())
}
+
+ fn vload8x8_s_offset32(&mut self, dst: VReg, ptr: XReg, offset: i32) -> ControlFlow {
+ let val = unsafe { self.load::<[i8; 8]>(ptr, offset) };
+ self.state[dst].set_i16x8(val.map(|i| i.into()));
+ ControlFlow::Continue(())
+ }
+
+ fn vload8x8_u_offset32(&mut self, dst: VReg, ptr: XReg, offset: i32) -> ControlFlow {
+ let val = unsafe { self.load::<[u8; 8]>(ptr, offset) };
+ self.state[dst].set_u16x8(val.map(|i| i.into()));
+ ControlFlow::Continue(())
+ }
+
+ fn vload16x4le_s_offset32(&mut self, dst: VReg, ptr: XReg, offset: i32) -> ControlFlow {
+ let val = unsafe { self.load::<[i16; 4]>(ptr, offset) };
+ self.state[dst].set_i32x4(val.map(|i| i16::from_le(i).into()));
+ ControlFlow::Continue(())
+ }
+
+ fn vload16x4le_u_offset32(&mut self, dst: VReg, ptr: XReg, offset: i32) -> ControlFlow {
+ let val = unsafe { self.load::<[u16; 4]>(ptr, offset) };
+ self.state[dst].set_u32x4(val.map(|i| u16::from_le(i).into()));
+ ControlFlow::Continue(())
+ }
+
+ fn vload32x2le_s_offset32(&mut self, dst: VReg, ptr: XReg, offset: i32) -> ControlFlow {
+ let val = unsafe { self.load::<[i32; 2]>(ptr, offset) };
+ self.state[dst].set_i64x2(val.map(|i| i32::from_le(i).into()));
+ ControlFlow::Continue(())
+ }
+
+ fn vload32x2le_u_offset32(&mut self, dst: VReg, ptr: XReg, offset: i32) -> ControlFlow {
+ let val = unsafe { self.load::<[u32; 2]>(ptr, offset) };
+ self.state[dst].set_u64x2(val.map(|i| u32::from_le(i).into()));
+ ControlFlow::Continue(())
+ }
}
impl ExtendedOpVisitor for Interpreter<'_> {
diff --git a/pulley/src/lib.rs b/pulley/src/lib.rs
index 415a6ea89de7..219dc99971ea 100644
--- a/pulley/src/lib.rs
+++ b/pulley/src/lib.rs
@@ -631,6 +631,19 @@ macro_rules! for_each_op {
vsplatf32 = VSplatF32 { dst: VReg, src: FReg };
/// `dst = splat(src)`
vsplatf64 = VSplatF64 { dst: VReg, src: FReg };
+
+ /// Load the 64-bit source as i8x8 and sign-extend to i16x8.
+ vload8x8_s_offset32 = VLoad8x8SOffset32 { dst: VReg, ptr: XReg, offset: i32 };
+ /// Load the 64-bit source as u8x8 and zero-extend to i16x8.
+ vload8x8_u_offset32 = VLoad8x8UOffset32 { dst: VReg, ptr: XReg, offset: i32 };
+ /// Load the 64-bit source as i16x4 and sign-extend to i32x4.
+ vload16x4le_s_offset32 = VLoad16x4LeSOffset32 { dst: VReg, ptr: XReg, offset: i32 };
+ /// Load the 64-bit source as u16x4 and zero-extend to i32x4.
+ vload16x4le_u_offset32 = VLoad16x4LeUOffset32 { dst: VReg, ptr: XReg, offset: i32 };
+ /// Load the 64-bit source as i32x2 and sign-extend to i64x2.
+ vload32x2le_s_offset32 = VLoad32x2LeSOffset32 { dst: VReg, ptr: XReg, offset: i32 };
+ /// Load the 64-bit source as u32x2 and zero-extend to i64x2.
+ vload32x2le_u_offset32 = VLoad32x2LeUOffset32 { dst: VReg, ptr: XReg, offset: i32 };
}
};
}