Skip to content

Commit f8f79ba

Browse files
authored
Merge pull request #2075 from akirilov-arm/simd_fp_arith
AArch64: Implement SIMD floating-point arithmetic
2 parents 71025e3 + adf25d2 commit f8f79ba

File tree

5 files changed

+248
-38
lines changed

5 files changed

+248
-38
lines changed

build.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -185,7 +185,11 @@ fn ignore(testsuite: &str, testname: &str, strategy: &str) -> bool {
185185
("simd", "simd_bitwise") => return false,
186186
("simd", "simd_bit_shift") => return false,
187187
("simd", "simd_boolean") => return false,
188+
("simd", "simd_f32x4") => return false,
189+
("simd", "simd_f32x4_arith") => return false,
188190
("simd", "simd_f32x4_cmp") => return false,
191+
("simd", "simd_f64x2") => return false,
192+
("simd", "simd_f64x2_arith") => return false,
189193
("simd", "simd_f64x2_cmp") => return false,
190194
("simd", "simd_i8x16_arith") => return false,
191195
("simd", "simd_i8x16_arith2") => return false,

cranelift/codegen/src/isa/aarch64/inst/emit.rs

Lines changed: 42 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1123,6 +1123,18 @@ impl MachInstEmit for Inst {
11231123
VecMisc2::Not => (0b1, 0b00101, 0b00),
11241124
VecMisc2::Neg => (0b1, 0b01011, enc_size),
11251125
VecMisc2::Abs => (0b0, 0b01011, enc_size),
1126+
VecMisc2::Fabs => {
1127+
debug_assert!(size == VectorSize::Size32x4 || size == VectorSize::Size64x2);
1128+
(0b0, 0b01111, enc_size)
1129+
}
1130+
VecMisc2::Fneg => {
1131+
debug_assert!(size == VectorSize::Size32x4 || size == VectorSize::Size64x2);
1132+
(0b1, 0b01111, enc_size)
1133+
}
1134+
VecMisc2::Fsqrt => {
1135+
debug_assert!(size == VectorSize::Size32x4 || size == VectorSize::Size64x2);
1136+
(0b1, 0b11111, enc_size)
1137+
}
11261138
};
11271139
sink.put4(enc_vec_rr_misc(u, size, bits_12_16, rd, rn));
11281140
}
@@ -1363,9 +1375,22 @@ impl MachInstEmit for Inst {
13631375
VectorSize::Size64x2 => 0b11,
13641376
_ => 0,
13651377
};
1366-
let enc_size_for_fcmp = match size {
1367-
VectorSize::Size32x4 => 0b0,
1368-
VectorSize::Size64x2 => 0b1,
1378+
let is_float = match alu_op {
1379+
VecALUOp::Fcmeq
1380+
| VecALUOp::Fcmgt
1381+
| VecALUOp::Fcmge
1382+
| VecALUOp::Fadd
1383+
| VecALUOp::Fsub
1384+
| VecALUOp::Fdiv
1385+
| VecALUOp::Fmax
1386+
| VecALUOp::Fmin
1387+
| VecALUOp::Fmul => true,
1388+
_ => false,
1389+
};
1390+
let enc_float_size = match (is_float, size) {
1391+
(true, VectorSize::Size32x4) => 0b0,
1392+
(true, VectorSize::Size64x2) => 0b1,
1393+
(true, _) => unimplemented!(),
13691394
_ => 0,
13701395
};
13711396

@@ -1379,9 +1404,9 @@ impl MachInstEmit for Inst {
13791404
VecALUOp::Cmgt => (0b010_01110_00_1 | enc_size << 1, 0b001101),
13801405
VecALUOp::Cmhi => (0b011_01110_00_1 | enc_size << 1, 0b001101),
13811406
VecALUOp::Cmhs => (0b011_01110_00_1 | enc_size << 1, 0b001111),
1382-
VecALUOp::Fcmeq => (0b010_01110_00_1 | enc_size_for_fcmp << 1, 0b111001),
1383-
VecALUOp::Fcmgt => (0b011_01110_10_1 | enc_size_for_fcmp << 1, 0b111001),
1384-
VecALUOp::Fcmge => (0b011_01110_00_1 | enc_size_for_fcmp << 1, 0b111001),
1407+
VecALUOp::Fcmeq => (0b010_01110_00_1, 0b111001),
1408+
VecALUOp::Fcmgt => (0b011_01110_10_1, 0b111001),
1409+
VecALUOp::Fcmge => (0b011_01110_00_1, 0b111001),
13851410
// The following logical instructions operate on bytes, so are not encoded differently
13861411
// for the different vector types.
13871412
VecALUOp::And => (0b010_01110_00_1, 0b000111),
@@ -1403,6 +1428,17 @@ impl MachInstEmit for Inst {
14031428
VecALUOp::Umax => (0b011_01110_00_1 | enc_size << 1, 0b011001),
14041429
VecALUOp::Smax => (0b010_01110_00_1 | enc_size << 1, 0b011001),
14051430
VecALUOp::Urhadd => (0b011_01110_00_1 | enc_size << 1, 0b000101),
1431+
VecALUOp::Fadd => (0b010_01110_00_1, 0b110101),
1432+
VecALUOp::Fsub => (0b010_01110_10_1, 0b110101),
1433+
VecALUOp::Fdiv => (0b011_01110_00_1, 0b111111),
1434+
VecALUOp::Fmax => (0b010_01110_00_1, 0b111101),
1435+
VecALUOp::Fmin => (0b010_01110_10_1, 0b111101),
1436+
VecALUOp::Fmul => (0b011_01110_00_1, 0b110111),
1437+
};
1438+
let top11 = if is_float {
1439+
top11 | enc_float_size << 1
1440+
} else {
1441+
top11
14061442
};
14071443
sink.put4(enc_vec_rrr(top11, rm, bit15_10, rn, rd));
14081444
}

cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs

Lines changed: 105 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2953,6 +2953,78 @@ fn test_aarch64_binemit() {
29532953
"urhadd v8.4s, v12.4s, v14.4s",
29542954
));
29552955

2956+
insns.push((
2957+
Inst::VecRRR {
2958+
alu_op: VecALUOp::Fadd,
2959+
rd: writable_vreg(31),
2960+
rn: vreg(0),
2961+
rm: vreg(16),
2962+
size: VectorSize::Size32x4,
2963+
},
2964+
"1FD4304E",
2965+
"fadd v31.4s, v0.4s, v16.4s",
2966+
));
2967+
2968+
insns.push((
2969+
Inst::VecRRR {
2970+
alu_op: VecALUOp::Fsub,
2971+
rd: writable_vreg(8),
2972+
rn: vreg(7),
2973+
rm: vreg(15),
2974+
size: VectorSize::Size64x2,
2975+
},
2976+
"E8D4EF4E",
2977+
"fsub v8.2d, v7.2d, v15.2d",
2978+
));
2979+
2980+
insns.push((
2981+
Inst::VecRRR {
2982+
alu_op: VecALUOp::Fdiv,
2983+
rd: writable_vreg(1),
2984+
rn: vreg(3),
2985+
rm: vreg(4),
2986+
size: VectorSize::Size32x4,
2987+
},
2988+
"61FC246E",
2989+
"fdiv v1.4s, v3.4s, v4.4s",
2990+
));
2991+
2992+
insns.push((
2993+
Inst::VecRRR {
2994+
alu_op: VecALUOp::Fmax,
2995+
rd: writable_vreg(31),
2996+
rn: vreg(16),
2997+
rm: vreg(0),
2998+
size: VectorSize::Size64x2,
2999+
},
3000+
"1FF6604E",
3001+
"fmax v31.2d, v16.2d, v0.2d",
3002+
));
3003+
3004+
insns.push((
3005+
Inst::VecRRR {
3006+
alu_op: VecALUOp::Fmin,
3007+
rd: writable_vreg(5),
3008+
rn: vreg(19),
3009+
rm: vreg(26),
3010+
size: VectorSize::Size32x4,
3011+
},
3012+
"65F6BA4E",
3013+
"fmin v5.4s, v19.4s, v26.4s",
3014+
));
3015+
3016+
insns.push((
3017+
Inst::VecRRR {
3018+
alu_op: VecALUOp::Fmul,
3019+
rd: writable_vreg(2),
3020+
rn: vreg(0),
3021+
rm: vreg(5),
3022+
size: VectorSize::Size64x2,
3023+
},
3024+
"02DC656E",
3025+
"fmul v2.2d, v0.2d, v5.2d",
3026+
));
3027+
29563028
insns.push((
29573029
Inst::VecMisc {
29583030
op: VecMisc2::Not,
@@ -3052,6 +3124,39 @@ fn test_aarch64_binemit() {
30523124
"abs v1.2d, v10.2d",
30533125
));
30543126

3127+
insns.push((
3128+
Inst::VecMisc {
3129+
op: VecMisc2::Fabs,
3130+
rd: writable_vreg(15),
3131+
rn: vreg(16),
3132+
size: VectorSize::Size32x4,
3133+
},
3134+
"0FFAA04E",
3135+
"fabs v15.4s, v16.4s",
3136+
));
3137+
3138+
insns.push((
3139+
Inst::VecMisc {
3140+
op: VecMisc2::Fneg,
3141+
rd: writable_vreg(31),
3142+
rn: vreg(0),
3143+
size: VectorSize::Size32x4,
3144+
},
3145+
"1FF8A06E",
3146+
"fneg v31.4s, v0.4s",
3147+
));
3148+
3149+
insns.push((
3150+
Inst::VecMisc {
3151+
op: VecMisc2::Fsqrt,
3152+
rd: writable_vreg(7),
3153+
rn: vreg(18),
3154+
size: VectorSize::Size64x2,
3155+
},
3156+
"47FAE16E",
3157+
"fsqrt v7.2d, v18.2d",
3158+
));
3159+
30553160
insns.push((
30563161
Inst::VecLanes {
30573162
op: VecLanesOp::Uminv,

cranelift/codegen/src/isa/aarch64/inst/mod.rs

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -271,6 +271,18 @@ pub enum VecALUOp {
271271
Smax,
272272
/// Unsigned rounding halving add
273273
Urhadd,
274+
/// Floating-point add
275+
Fadd,
276+
/// Floating-point subtract
277+
Fsub,
278+
/// Floating-point divide
279+
Fdiv,
280+
/// Floating-point maximum
281+
Fmax,
282+
/// Floating-point minimum
283+
Fmin,
284+
/// Floating-point multiply
285+
Fmul,
274286
}
275287

276288
/// A Vector miscellaneous operation with two registers.
@@ -282,6 +294,12 @@ pub enum VecMisc2 {
282294
Neg,
283295
/// Absolute value
284296
Abs,
297+
/// Floating-point absolute value
298+
Fabs,
299+
/// Floating-point negate
300+
Fneg,
301+
/// Floating-point square root
302+
Fsqrt,
285303
}
286304

287305
/// An operation across the lanes of vectors.
@@ -2810,6 +2828,12 @@ impl Inst {
28102828
VecALUOp::Umax => ("umax", size),
28112829
VecALUOp::Smax => ("smax", size),
28122830
VecALUOp::Urhadd => ("urhadd", size),
2831+
VecALUOp::Fadd => ("fadd", size),
2832+
VecALUOp::Fsub => ("fsub", size),
2833+
VecALUOp::Fdiv => ("fdiv", size),
2834+
VecALUOp::Fmax => ("fmax", size),
2835+
VecALUOp::Fmin => ("fmin", size),
2836+
VecALUOp::Fmul => ("fmul", size),
28132837
};
28142838
let rd = show_vreg_vector(rd.to_reg(), mb_rru, size);
28152839
let rn = show_vreg_vector(rn, mb_rru, size);
@@ -2821,6 +2845,9 @@ impl Inst {
28212845
VecMisc2::Not => ("mvn", VectorSize::Size8x16),
28222846
VecMisc2::Neg => ("neg", size),
28232847
VecMisc2::Abs => ("abs", size),
2848+
VecMisc2::Fabs => ("fabs", size),
2849+
VecMisc2::Fneg => ("fneg", size),
2850+
VecMisc2::Fsqrt => ("fsqrt", size),
28242851
};
28252852

28262853
let rd = show_vreg_vector(rd.to_reg(), mb_rru, size);

cranelift/codegen/src/isa/aarch64/lower_inst.rs

Lines changed: 70 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -1802,46 +1802,84 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
18021802
}
18031803

18041804
Opcode::Fadd | Opcode::Fsub | Opcode::Fmul | Opcode::Fdiv | Opcode::Fmin | Opcode::Fmax => {
1805-
let bits = ty_bits(ctx.output_ty(insn, 0));
1806-
let fpu_op = match (op, bits) {
1807-
(Opcode::Fadd, 32) => FPUOp2::Add32,
1808-
(Opcode::Fadd, 64) => FPUOp2::Add64,
1809-
(Opcode::Fsub, 32) => FPUOp2::Sub32,
1810-
(Opcode::Fsub, 64) => FPUOp2::Sub64,
1811-
(Opcode::Fmul, 32) => FPUOp2::Mul32,
1812-
(Opcode::Fmul, 64) => FPUOp2::Mul64,
1813-
(Opcode::Fdiv, 32) => FPUOp2::Div32,
1814-
(Opcode::Fdiv, 64) => FPUOp2::Div64,
1815-
(Opcode::Fmin, 32) => FPUOp2::Min32,
1816-
(Opcode::Fmin, 64) => FPUOp2::Min64,
1817-
(Opcode::Fmax, 32) => FPUOp2::Max32,
1818-
(Opcode::Fmax, 64) => FPUOp2::Max64,
1819-
_ => panic!("Unknown op/bits combination"),
1820-
};
1805+
let ty = ty.unwrap();
1806+
let bits = ty_bits(ty);
18211807
let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
18221808
let rm = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None);
18231809
let rd = get_output_reg(ctx, outputs[0]);
1824-
ctx.emit(Inst::FpuRRR { fpu_op, rd, rn, rm });
1810+
if bits < 128 {
1811+
let fpu_op = match (op, bits) {
1812+
(Opcode::Fadd, 32) => FPUOp2::Add32,
1813+
(Opcode::Fadd, 64) => FPUOp2::Add64,
1814+
(Opcode::Fsub, 32) => FPUOp2::Sub32,
1815+
(Opcode::Fsub, 64) => FPUOp2::Sub64,
1816+
(Opcode::Fmul, 32) => FPUOp2::Mul32,
1817+
(Opcode::Fmul, 64) => FPUOp2::Mul64,
1818+
(Opcode::Fdiv, 32) => FPUOp2::Div32,
1819+
(Opcode::Fdiv, 64) => FPUOp2::Div64,
1820+
(Opcode::Fmin, 32) => FPUOp2::Min32,
1821+
(Opcode::Fmin, 64) => FPUOp2::Min64,
1822+
(Opcode::Fmax, 32) => FPUOp2::Max32,
1823+
(Opcode::Fmax, 64) => FPUOp2::Max64,
1824+
_ => panic!("Unknown op/bits combination"),
1825+
};
1826+
ctx.emit(Inst::FpuRRR { fpu_op, rd, rn, rm });
1827+
} else {
1828+
let alu_op = match op {
1829+
Opcode::Fadd => VecALUOp::Fadd,
1830+
Opcode::Fsub => VecALUOp::Fsub,
1831+
Opcode::Fdiv => VecALUOp::Fdiv,
1832+
Opcode::Fmax => VecALUOp::Fmax,
1833+
Opcode::Fmin => VecALUOp::Fmin,
1834+
Opcode::Fmul => VecALUOp::Fmul,
1835+
_ => unreachable!(),
1836+
};
1837+
1838+
ctx.emit(Inst::VecRRR {
1839+
rd,
1840+
rn,
1841+
rm,
1842+
alu_op,
1843+
size: VectorSize::from_ty(ty),
1844+
});
1845+
}
18251846
}
18261847

18271848
Opcode::Sqrt | Opcode::Fneg | Opcode::Fabs | Opcode::Fpromote | Opcode::Fdemote => {
1828-
let bits = ty_bits(ctx.output_ty(insn, 0));
1829-
let fpu_op = match (op, bits) {
1830-
(Opcode::Sqrt, 32) => FPUOp1::Sqrt32,
1831-
(Opcode::Sqrt, 64) => FPUOp1::Sqrt64,
1832-
(Opcode::Fneg, 32) => FPUOp1::Neg32,
1833-
(Opcode::Fneg, 64) => FPUOp1::Neg64,
1834-
(Opcode::Fabs, 32) => FPUOp1::Abs32,
1835-
(Opcode::Fabs, 64) => FPUOp1::Abs64,
1836-
(Opcode::Fpromote, 32) => panic!("Cannot promote to 32 bits"),
1837-
(Opcode::Fpromote, 64) => FPUOp1::Cvt32To64,
1838-
(Opcode::Fdemote, 32) => FPUOp1::Cvt64To32,
1839-
(Opcode::Fdemote, 64) => panic!("Cannot demote to 64 bits"),
1840-
_ => panic!("Unknown op/bits combination"),
1841-
};
1849+
let ty = ty.unwrap();
1850+
let bits = ty_bits(ty);
18421851
let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
18431852
let rd = get_output_reg(ctx, outputs[0]);
1844-
ctx.emit(Inst::FpuRR { fpu_op, rd, rn });
1853+
if bits < 128 {
1854+
let fpu_op = match (op, bits) {
1855+
(Opcode::Sqrt, 32) => FPUOp1::Sqrt32,
1856+
(Opcode::Sqrt, 64) => FPUOp1::Sqrt64,
1857+
(Opcode::Fneg, 32) => FPUOp1::Neg32,
1858+
(Opcode::Fneg, 64) => FPUOp1::Neg64,
1859+
(Opcode::Fabs, 32) => FPUOp1::Abs32,
1860+
(Opcode::Fabs, 64) => FPUOp1::Abs64,
1861+
(Opcode::Fpromote, 32) => panic!("Cannot promote to 32 bits"),
1862+
(Opcode::Fpromote, 64) => FPUOp1::Cvt32To64,
1863+
(Opcode::Fdemote, 32) => FPUOp1::Cvt64To32,
1864+
(Opcode::Fdemote, 64) => panic!("Cannot demote to 64 bits"),
1865+
_ => panic!("Unknown op/bits combination"),
1866+
};
1867+
ctx.emit(Inst::FpuRR { fpu_op, rd, rn });
1868+
} else {
1869+
let op = match op {
1870+
Opcode::Fabs => VecMisc2::Fabs,
1871+
Opcode::Fneg => VecMisc2::Fneg,
1872+
Opcode::Sqrt => VecMisc2::Fsqrt,
1873+
_ => unimplemented!(),
1874+
};
1875+
1876+
ctx.emit(Inst::VecMisc {
1877+
op,
1878+
rd,
1879+
rn,
1880+
size: VectorSize::from_ty(ty),
1881+
});
1882+
}
18451883
}
18461884

18471885
Opcode::Ceil | Opcode::Floor | Opcode::Trunc | Opcode::Nearest => {

0 commit comments

Comments
 (0)