Skip to content

Commit

Permalink
pulley: Get simd_align.wast test passing
Browse files Browse the repository at this point in the history
Fill out vector load-and-extend instructions.
  • Loading branch information
alexcrichton committed Dec 17, 2024
1 parent 035c862 commit 13781e9
Show file tree
Hide file tree
Showing 7 changed files with 98 additions and 8 deletions.
10 changes: 6 additions & 4 deletions cranelift/codegen/src/isa/pulley_shared/inst.isle
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@
;; control behavior such as endianness.
(XLoad (dst WritableXReg) (mem Amode) (ty Type) (flags MemFlags) (ext ExtKind))
(FLoad (dst WritableFReg) (mem Amode) (ty Type) (flags MemFlags))
(VLoad (dst WritableVReg) (mem Amode) (ty Type) (flags MemFlags))
(VLoad (dst WritableVReg) (mem Amode) (ty Type) (flags MemFlags) (ext VExtKind))

;; Stores.
(XStore (mem Amode) (src XReg) (ty Type) (flags MemFlags))
Expand Down Expand Up @@ -140,6 +140,8 @@

(type ExtKind (enum None Sign32 Sign64 Zero32 Zero64))

(type VExtKind (enum None S8x8 U8x8 S16x4 U16x4 S32x2 U32x2))

;;;; Newtypes for Different Register Classes ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

(type XReg (primitive XReg))
Expand Down Expand Up @@ -414,10 +416,10 @@
(rule (pulley_fstore amode src ty flags)
(SideEffectNoResult.Inst (MInst.FStore amode src ty flags)))

(decl pulley_vload (Amode Type MemFlags) VReg)
(rule (pulley_vload amode ty flags)
(decl pulley_vload (Amode Type MemFlags VExtKind) VReg)
(rule (pulley_vload amode ty flags ext)
(let ((dst WritableVReg (temp_writable_vreg))
(_ Unit (emit (MInst.VLoad dst amode ty flags))))
(_ Unit (emit (MInst.VLoad dst amode ty flags ext))))
dst))

(decl pulley_vstore (Amode VReg Type MemFlags) SideEffectNoResult)
Expand Down
11 changes: 10 additions & 1 deletion cranelift/codegen/src/isa/pulley_shared/inst/emit.rs
Original file line number Diff line number Diff line change
Expand Up @@ -370,13 +370,22 @@ fn pulley_emit<P>(
mem,
ty,
flags,
ext,
} => {
let r = mem.get_base_register().unwrap();
let x = mem.get_offset_with_state(state);
let endian = emit_info.endianness(*flags);
assert_eq!(endian, Endianness::Little);
assert_eq!(ty.bytes(), 16);
enc::vload128le_offset32(sink, dst, r, x);
match ext {
VExtKind::None => enc::vload128le_offset32(sink, dst, r, x),
VExtKind::S8x8 => enc::vload8x8_s_offset32(sink, dst, r, x),
VExtKind::U8x8 => enc::vload8x8_u_offset32(sink, dst, r, x),
VExtKind::S16x4 => enc::vload16x4le_s_offset32(sink, dst, r, x),
VExtKind::U16x4 => enc::vload16x4le_u_offset32(sink, dst, r, x),
VExtKind::S32x2 => enc::vload32x2le_s_offset32(sink, dst, r, x),
VExtKind::U32x2 => enc::vload32x2le_u_offset32(sink, dst, r, x),
}
}

Inst::XStore {
Expand Down
6 changes: 5 additions & 1 deletion cranelift/codegen/src/isa/pulley_shared/inst/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ pub use self::emit::*;

pub use crate::isa::pulley_shared::lower::isle::generated_code::MInst as Inst;
pub use crate::isa::pulley_shared::lower::isle::generated_code::RawInst;
pub use crate::isa::pulley_shared::lower::isle::generated_code::VExtKind;

impl From<RawInst> for Inst {
fn from(raw: RawInst) -> Inst {
Expand All @@ -51,6 +52,7 @@ impl Inst {
mem,
ty,
flags,
ext: VExtKind::None,
}
} else if ty.is_int() {
Inst::XLoad {
Expand Down Expand Up @@ -216,6 +218,7 @@ fn pulley_get_operands(inst: &mut Inst, collector: &mut impl OperandVisitor) {
mem,
ty: _,
flags: _,
ext: _,
} => {
collector.reg_def(dst);
mem.get_operands(collector);
Expand Down Expand Up @@ -651,11 +654,12 @@ impl Inst {
mem,
ty,
flags,
ext,
} => {
let dst = format_reg(*dst.to_reg());
let ty = ty.bits();
let mem = mem.to_string();
format!("{dst} = vload{ty} {mem} // flags ={flags}")
format!("{dst} = vload{ty}_{ext:?} {mem} // flags ={flags}")
}

Inst::VStore {
Expand Down
20 changes: 19 additions & 1 deletion cranelift/codegen/src/isa/pulley_shared/lower.isle
Original file line number Diff line number Diff line change
Expand Up @@ -476,7 +476,25 @@
(pulley_xload (amode addr offset) $I32 flags (ExtKind.Sign64)))

(rule 2 (lower (has_type (ty_vec128 ty) (load flags addr offset)))
(pulley_vload (amode addr offset) ty flags))
(pulley_vload (amode addr offset) ty flags (VExtKind.None)))

(rule (lower (has_type ty (sload8x8 flags addr offset)))
(pulley_vload (amode addr offset) ty flags (VExtKind.S8x8)))

(rule (lower (has_type ty (uload8x8 flags addr offset)))
(pulley_vload (amode addr offset) ty flags (VExtKind.U8x8)))

(rule (lower (has_type ty (sload16x4 flags addr offset)))
(pulley_vload (amode addr offset) ty flags (VExtKind.S16x4)))

(rule (lower (has_type ty (uload16x4 flags addr offset)))
(pulley_vload (amode addr offset) ty flags (VExtKind.U16x4)))

(rule (lower (has_type ty (sload32x2 flags addr offset)))
(pulley_vload (amode addr offset) ty flags (VExtKind.S32x2)))

(rule (lower (has_type ty (uload32x2 flags addr offset)))
(pulley_vload (amode addr offset) ty flags (VExtKind.U32x2)))

;;;; Rules for `store` and friends ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

Expand Down
10 changes: 9 additions & 1 deletion crates/wast-util/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -422,7 +422,15 @@ impl WastTest {
"spec_testsuite/proposals/relaxed-simd/relaxed_laneselect.wast",
"spec_testsuite/proposals/relaxed-simd/relaxed_madd_nmadd.wast",
"spec_testsuite/proposals/relaxed-simd/relaxed_min_max.wast",
"spec_testsuite/simd_align.wast",
"spec_testsuite/proposals/memory64/simd_lane.wast",
"spec_testsuite/proposals/memory64/simd_memory-multi.wast",
"spec_testsuite/proposals/memory64/relaxed_min_max.wast",
"spec_testsuite/proposals/memory64/relaxed_madd_nmadd.wast",
"spec_testsuite/proposals/memory64/relaxed_laneselect.wast",
"spec_testsuite/proposals/memory64/relaxed_dot_product.wast",
"spec_testsuite/proposals/memory64/i16x8_relaxed_q15mulr_s.wast",
"spec_testsuite/proposals/memory64/i32x4_relaxed_trunc.wast",
"spec_testsuite/proposals/memory64/i8x16_relaxed_swizzle.wast",
"spec_testsuite/simd_bitwise.wast",
"spec_testsuite/simd_boolean.wast",
"spec_testsuite/simd_conversions.wast",
Expand Down
36 changes: 36 additions & 0 deletions pulley/src/interp.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2731,6 +2731,42 @@ impl OpVisitor for Interpreter<'_> {
self.state[dst].set_f64x2([val; 2]);
ControlFlow::Continue(())
}

fn vload8x8_s_offset32(&mut self, dst: VReg, ptr: XReg, offset: i32) -> ControlFlow<Done> {
let val = unsafe { self.load::<[i8; 8]>(ptr, offset) };
self.state[dst].set_i16x8(val.map(|i| i.into()));
ControlFlow::Continue(())
}

fn vload8x8_u_offset32(&mut self, dst: VReg, ptr: XReg, offset: i32) -> ControlFlow<Done> {
let val = unsafe { self.load::<[u8; 8]>(ptr, offset) };
self.state[dst].set_u16x8(val.map(|i| i.into()));
ControlFlow::Continue(())
}

fn vload16x4le_s_offset32(&mut self, dst: VReg, ptr: XReg, offset: i32) -> ControlFlow<Done> {
let val = unsafe { self.load::<[i16; 4]>(ptr, offset) };
self.state[dst].set_i32x4(val.map(|i| i16::from_le(i).into()));
ControlFlow::Continue(())
}

fn vload16x4le_u_offset32(&mut self, dst: VReg, ptr: XReg, offset: i32) -> ControlFlow<Done> {
let val = unsafe { self.load::<[u16; 4]>(ptr, offset) };
self.state[dst].set_u32x4(val.map(|i| u16::from_le(i).into()));
ControlFlow::Continue(())
}

fn vload32x2le_s_offset32(&mut self, dst: VReg, ptr: XReg, offset: i32) -> ControlFlow<Done> {
let val = unsafe { self.load::<[i32; 2]>(ptr, offset) };
self.state[dst].set_i64x2(val.map(|i| i32::from_le(i).into()));
ControlFlow::Continue(())
}

fn vload32x2le_u_offset32(&mut self, dst: VReg, ptr: XReg, offset: i32) -> ControlFlow<Done> {
let val = unsafe { self.load::<[u32; 2]>(ptr, offset) };
self.state[dst].set_u64x2(val.map(|i| u32::from_le(i).into()));
ControlFlow::Continue(())
}
}

impl ExtendedOpVisitor for Interpreter<'_> {
Expand Down
13 changes: 13 additions & 0 deletions pulley/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -622,6 +622,19 @@ macro_rules! for_each_op {
vsplatf32 = VSplatF32 { dst: VReg, src: FReg };
/// `dst = splat(src)`
vsplatf64 = VSplatF64 { dst: VReg, src: FReg };

/// Load the 64-bit source as i8x8 and sign-extend to i16x8.
vload8x8_s_offset32 = VLoad8x8SOffset32 { dst: VReg, ptr: XReg, offset: i32 };
/// Load the 64-bit source as u8x8 and zero-extend to i16x8.
vload8x8_u_offset32 = VLoad8x8UOffset32 { dst: VReg, ptr: XReg, offset: i32 };
/// Load the 64-bit source as i16x4 and sign-extend to i32x4.
vload16x4le_s_offset32 = VLoad16x4LeSOffset32 { dst: VReg, ptr: XReg, offset: i32 };
/// Load the 64-bit source as u16x4 and zero-extend to i32x4.
vload16x4le_u_offset32 = VLoad16x4LeUOffset32 { dst: VReg, ptr: XReg, offset: i32 };
/// Load the 64-bit source as i32x2 and sign-extend to i64x2.
vload32x2le_s_offset32 = VLoad32x2LeSOffset32 { dst: VReg, ptr: XReg, offset: i32 };
/// Load the 64-bit source as u32x2 and zero-extend to i64x2.
vload32x2le_u_offset32 = VLoad32x2LeUOffset32 { dst: VReg, ptr: XReg, offset: i32 };
}
};
}
Expand Down

0 comments on commit 13781e9

Please sign in to comment.