Skip to content

Commit 0166246

Browse files
Add Sve.ScatterXBitYNarrowing() on Arm64 (#104720)
* Add Sve.ScatterXBitYNarrowing() on Arm64 Includes: - Sve.Scatter16BitNarrowing() - Sve.Scatter8BitNarrowing() - Sve.Scatter32BitNarrowing() - Sve.Scatter16BitWithByteOffsetsNarrowing() - Sve.Scatter8BitWithByteOffsetsNarrowing() - Sve.Scatter32BitWithByteOffsetsNarrowing() * Remove duplicate API calls * Remove duplicate tests
1 parent f38e661 commit 0166246

File tree

8 files changed

+517
-24
lines changed

8 files changed

+517
-24
lines changed

src/coreclr/jit/gentree.cpp

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26671,6 +26671,7 @@ bool GenTreeHWIntrinsic::OperIsMemoryStore(GenTree** pAddr) const
2667126671
case NI_SSE2_MaskMove:
2667226672
addr = Op(3);
2667326673
break;
26674+
2667426675
#elif defined(TARGET_ARM64)
2667526676
case NI_Sve_StoreAndZip:
2667626677
case NI_Sve_StoreAndZipx2:
@@ -26682,9 +26683,14 @@ bool GenTreeHWIntrinsic::OperIsMemoryStore(GenTree** pAddr) const
2668226683
break;
2668326684

2668426685
case NI_Sve_Scatter:
26686+
case NI_Sve_Scatter16BitNarrowing:
26687+
case NI_Sve_Scatter16BitWithByteOffsetsNarrowing:
26688+
case NI_Sve_Scatter32BitNarrowing:
26689+
case NI_Sve_Scatter32BitWithByteOffsetsNarrowing:
26690+
case NI_Sve_Scatter8BitNarrowing:
26691+
case NI_Sve_Scatter8BitWithByteOffsetsNarrowing:
2668526692
addr = Op(2);
2668626693
break;
26687-
2668826694
#endif // TARGET_ARM64
2668926695

2669026696
default:

src/coreclr/jit/hwintrinsiccodegenarm64.cpp

Lines changed: 15 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2053,27 +2053,35 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
20532053
break;
20542054

20552055
case NI_Sve_Scatter:
2056+
case NI_Sve_Scatter16BitNarrowing:
2057+
case NI_Sve_Scatter16BitWithByteOffsetsNarrowing:
2058+
case NI_Sve_Scatter32BitNarrowing:
2059+
case NI_Sve_Scatter32BitWithByteOffsetsNarrowing:
2060+
case NI_Sve_Scatter8BitNarrowing:
2061+
case NI_Sve_Scatter8BitWithByteOffsetsNarrowing:
20562062
{
20572063
if (!varTypeIsSIMD(intrin.op2->gtType))
20582064
{
20592065
// Scatter(Vector<T1> mask, T1* address, Vector<T2> indicies, Vector<T> data)
20602066
assert(intrin.numOperands == 4);
2061-
emitAttr baseSize = emitActualTypeSize(intrin.baseType);
2067+
emitAttr baseSize = emitActualTypeSize(intrin.baseType);
2068+
insScalableOpts sopt;
20622069

20632070
if (baseSize == EA_8BYTE)
20642071
{
20652072
// Index is multiplied by 8
2066-
GetEmitter()->emitIns_R_R_R_R(ins, emitSize, op4Reg, op1Reg, op2Reg, op3Reg, opt,
2067-
INS_SCALABLE_OPTS_LSL_N);
2073+
sopt = (ins == INS_sve_st1b) ? INS_SCALABLE_OPTS_NONE : INS_SCALABLE_OPTS_LSL_N;
2074+
GetEmitter()->emitIns_R_R_R_R(ins, emitSize, op4Reg, op1Reg, op2Reg, op3Reg, opt, sopt);
20682075
}
20692076
else
20702077
{
20712078
// Index is sign or zero extended to 64bits, then multiplied by 4
20722079
assert(baseSize == EA_4BYTE);
2073-
opt = varTypeIsUnsigned(node->GetAuxiliaryType()) ? INS_OPTS_SCALABLE_S_UXTW
2074-
: INS_OPTS_SCALABLE_S_SXTW;
2075-
GetEmitter()->emitIns_R_R_R_R(ins, emitSize, op4Reg, op1Reg, op2Reg, op3Reg, opt,
2076-
INS_SCALABLE_OPTS_MOD_N);
2080+
opt = varTypeIsUnsigned(node->GetAuxiliaryType()) ? INS_OPTS_SCALABLE_S_UXTW
2081+
: INS_OPTS_SCALABLE_S_SXTW;
2082+
sopt = (ins == INS_sve_st1b) ? INS_SCALABLE_OPTS_NONE : INS_SCALABLE_OPTS_MOD_N;
2083+
2084+
GetEmitter()->emitIns_R_R_R_R(ins, emitSize, op4Reg, op1Reg, op2Reg, op3Reg, opt, sopt);
20772085
}
20782086
}
20792087
else

src/coreclr/jit/hwintrinsiclistarm64sve.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -225,6 +225,12 @@ HARDWARE_INTRINSIC(Sve, SaturatingIncrementBy8BitElementCount,
225225
HARDWARE_INTRINSIC(Sve, SaturatingIncrementByActiveElementCount, -1, 2, true, {INS_invalid, INS_sve_sqincp, INS_sve_sqincp, INS_sve_sqincp, INS_sve_sqincp, INS_sve_sqincp, INS_sve_sqincp, INS_sve_sqincp, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_SpecialImport|HW_Flag_BaseTypeFromSecondArg|HW_Flag_HasRMWSemantics)
226226
HARDWARE_INTRINSIC(Sve, Scale, -1, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_fscale, INS_sve_fscale}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_HasRMWSemantics)
227227
HARDWARE_INTRINSIC(Sve, Scatter, -1, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_st1w, INS_sve_st1w, INS_sve_st1d, INS_sve_st1d, INS_sve_st1w, INS_sve_st1d}, HW_Category_MemoryStore, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation)
228+
HARDWARE_INTRINSIC(Sve, Scatter16BitNarrowing, -1, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_st1h, INS_sve_st1h, INS_sve_st1h, INS_sve_st1h, INS_invalid, INS_invalid}, HW_Category_MemoryStore, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation)
229+
HARDWARE_INTRINSIC(Sve, Scatter16BitWithByteOffsetsNarrowing, -1, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_st1h, INS_sve_st1h, INS_sve_st1h, INS_sve_st1h, INS_invalid, INS_invalid}, HW_Category_MemoryStore, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation)
230+
HARDWARE_INTRINSIC(Sve, Scatter32BitNarrowing, -1, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_st1w, INS_sve_st1w, INS_invalid, INS_invalid}, HW_Category_MemoryStore, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation)
231+
HARDWARE_INTRINSIC(Sve, Scatter32BitWithByteOffsetsNarrowing, -1, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_st1w, INS_sve_st1w, INS_invalid, INS_invalid}, HW_Category_MemoryStore, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation)
232+
HARDWARE_INTRINSIC(Sve, Scatter8BitNarrowing, -1, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_st1b, INS_sve_st1b, INS_sve_st1b, INS_sve_st1b, INS_invalid, INS_invalid}, HW_Category_MemoryStore, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation)
233+
HARDWARE_INTRINSIC(Sve, Scatter8BitWithByteOffsetsNarrowing, -1, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_st1b, INS_sve_st1b, INS_sve_st1b, INS_sve_st1b, INS_invalid, INS_invalid}, HW_Category_MemoryStore, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation)
228234
HARDWARE_INTRINSIC(Sve, ShiftLeftLogical, -1, -1, false, {INS_sve_lsl, INS_sve_lsl, INS_sve_lsl, INS_sve_lsl, INS_sve_lsl, INS_sve_lsl, INS_sve_lsl, INS_sve_lsl, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_HasRMWSemantics)
229235
HARDWARE_INTRINSIC(Sve, ShiftRightArithmetic, -1, -1, false, {INS_sve_asr, INS_invalid, INS_sve_asr, INS_invalid, INS_sve_asr, INS_invalid, INS_sve_asr, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_HasRMWSemantics)
230236
HARDWARE_INTRINSIC(Sve, ShiftRightArithmeticForDivide, -1, -1, false, {INS_sve_asrd, INS_invalid, INS_sve_asrd, INS_invalid, INS_sve_asrd, INS_invalid, INS_sve_asrd, INS_invalid, INS_invalid, INS_invalid}, HW_Category_ShiftRightByImmediate, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_HasImmediateOperand)

0 commit comments

Comments
 (0)