Skip to content

Commit 1ba2974

Browse files
ARM64-SVE: Add MultiplyAddRotateComplexBySelectedScalar (#105002)
1 parent 27776e2 commit 1ba2974

File tree

15 files changed

+872
-28
lines changed

15 files changed

+872
-28
lines changed

src/coreclr/jit/codegenarm64test.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6592,11 +6592,11 @@ void CodeGen::genArm64EmitterUnitTestsSve()
65926592
// IF_SVE_GV_3A
65936593
theEmitter->emitIns_R_R_R_I_I(INS_sve_fcmla, EA_SCALABLE, REG_V0, REG_V1, REG_V0, 0, 0,
65946594
INS_OPTS_SCALABLE_S); // FCMLA <Zda>.S, <Zn>.S, <Zm>.S[<imm>], <const>
6595-
theEmitter->emitIns_R_R_R_I_I(INS_sve_fcmla, EA_SCALABLE, REG_V2, REG_V3, REG_V5, 1, 90,
6595+
theEmitter->emitIns_R_R_R_I_I(INS_sve_fcmla, EA_SCALABLE, REG_V2, REG_V3, REG_V5, 1, 1,
65966596
INS_OPTS_SCALABLE_S); // FCMLA <Zda>.S, <Zn>.S, <Zm>.S[<imm>], <const>
6597-
theEmitter->emitIns_R_R_R_I_I(INS_sve_fcmla, EA_SCALABLE, REG_V4, REG_V5, REG_V10, 0, 180,
6597+
theEmitter->emitIns_R_R_R_I_I(INS_sve_fcmla, EA_SCALABLE, REG_V4, REG_V5, REG_V10, 0, 2,
65986598
INS_OPTS_SCALABLE_S); // FCMLA <Zda>.S, <Zn>.S, <Zm>.S[<imm>], <const>
6599-
theEmitter->emitIns_R_R_R_I_I(INS_sve_fcmla, EA_SCALABLE, REG_V6, REG_V7, REG_V15, 1, 270,
6599+
theEmitter->emitIns_R_R_R_I_I(INS_sve_fcmla, EA_SCALABLE, REG_V6, REG_V7, REG_V15, 1, 3,
66006600
INS_OPTS_SCALABLE_S); // FCMLA <Zda>.S, <Zn>.S, <Zm>.S[<imm>], <const>
66016601

66026602
// IF_SVE_GX_3A

src/coreclr/jit/emitarm64sve.cpp

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -5825,14 +5825,13 @@ void emitter::emitInsSve_R_R_R_I_I(instruction ins,
58255825

58265826
case INS_sve_fcmla:
58275827
assert(opt == INS_OPTS_SCALABLE_S);
5828-
assert(isVectorRegister(reg1)); // ddddd
5829-
assert(isVectorRegister(reg2)); // nnnnn
5830-
assert(isLowVectorRegister(reg3)); // mmmm
5831-
assert(isValidUimm<1>(imm1)); // i
5832-
assert(isValidRot(imm2)); // rr
5828+
assert(isVectorRegister(reg1)); // ddddd
5829+
assert(isVectorRegister(reg2)); // nnnnn
5830+
assert(isLowVectorRegister(reg3)); // mmmm
5831+
assert(isValidUimm<1>(imm1)); // i
5832+
assert(emitIsValidEncodedRotationImm0_to_270(imm2)); // rr
58335833

5834-
// Convert imm2 from rotation value (0-270) to bitwise representation (0-3)
5835-
imm = (imm1 << 2) | emitEncodeRotationImm0_to_270(imm2);
5834+
imm = (imm1 << 2) | imm2;
58365835
fmt = IF_SVE_GV_3A;
58375836
break;
58385837

src/coreclr/jit/hwintrinsic.cpp

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -963,9 +963,7 @@ static void ValidateHWIntrinsicInfo(CORINFO_InstructionSet isa, NamedIntrinsic n
963963
if (info.numArgs != -1)
964964
{
965965
// We should only have an expected number of arguments
966-
#if defined(TARGET_ARM64)
967-
assert((info.numArgs >= 0) && (info.numArgs <= 4));
968-
#elif defined(TARGET_XARCH)
966+
#if defined(TARGET_ARM64) || defined(TARGET_XARCH)
969967
assert((info.numArgs >= 0) && (info.numArgs <= 5));
970968
#else
971969
unreached();

src/coreclr/jit/hwintrinsic.h

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1085,6 +1085,14 @@ struct HWIntrinsicInfo
10851085
break;
10861086
}
10871087

1088+
case NI_Sve_MultiplyAddRotateComplexBySelectedScalar:
1089+
{
1090+
assert(sig->numArgs == 5);
1091+
*imm1Pos = 0;
1092+
*imm2Pos = 1;
1093+
break;
1094+
}
1095+
10881096
default:
10891097
{
10901098
assert(sig->numArgs > 0);
@@ -1105,6 +1113,7 @@ struct HWIntrinsic final
11051113
, op2(nullptr)
11061114
, op3(nullptr)
11071115
, op4(nullptr)
1116+
, op5(nullptr)
11081117
, numOperands(0)
11091118
, baseType(TYP_UNDEF)
11101119
{
@@ -1134,6 +1143,7 @@ struct HWIntrinsic final
11341143
GenTree* op2;
11351144
GenTree* op3;
11361145
GenTree* op4;
1146+
GenTree* op5;
11371147
size_t numOperands;
11381148
var_types baseType;
11391149

@@ -1144,6 +1154,9 @@ struct HWIntrinsic final
11441154

11451155
switch (numOperands)
11461156
{
1157+
case 5:
1158+
op5 = node->Op(5);
1159+
FALLTHROUGH;
11471160
case 4:
11481161
op4 = node->Op(4);
11491162
FALLTHROUGH;

src/coreclr/jit/hwintrinsicarm64.cpp

Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -514,6 +514,27 @@ void HWIntrinsicInfo::lookupImmBounds(
514514
immUpperBound = 3;
515515
break;
516516

517+
case NI_Sve_MultiplyAddRotateComplexBySelectedScalar:
518+
// rotation comes after index in the intrinsic's signature,
519+
// but flip the order here so we check the larger range first.
520+
// This conforms to the existing logic in LinearScan::BuildHWIntrinsic
521+
// when determining if we need an internal register for the jump table.
522+
// This flipped ordering is reflected in HWIntrinsicInfo::GetImmOpsPositions.
523+
if (immNumber == 1)
524+
{
525+
// Bounds for rotation
526+
immLowerBound = 0;
527+
immUpperBound = 3;
528+
}
529+
else
530+
{
531+
// Bounds for index
532+
assert(immNumber == 2);
533+
immLowerBound = 0;
534+
immUpperBound = 1;
535+
}
536+
break;
537+
517538
case NI_Sve_TrigonometricMultiplyAddCoefficient:
518539
immLowerBound = 0;
519540
immUpperBound = 7;
@@ -3004,6 +3025,51 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic,
30043025
break;
30053026
}
30063027

3028+
case NI_Sve_MultiplyAddRotateComplexBySelectedScalar:
3029+
{
3030+
assert(sig->numArgs == 5);
3031+
assert(!isScalar);
3032+
3033+
CORINFO_ARG_LIST_HANDLE arg1 = sig->args;
3034+
CORINFO_ARG_LIST_HANDLE arg2 = info.compCompHnd->getArgNext(arg1);
3035+
CORINFO_ARG_LIST_HANDLE arg3 = info.compCompHnd->getArgNext(arg2);
3036+
CORINFO_ARG_LIST_HANDLE arg4 = info.compCompHnd->getArgNext(arg3);
3037+
CORINFO_ARG_LIST_HANDLE arg5 = info.compCompHnd->getArgNext(arg4);
3038+
var_types argType = TYP_UNKNOWN;
3039+
CORINFO_CLASS_HANDLE argClass = NO_CLASS_HANDLE;
3040+
3041+
int imm1LowerBound, imm1UpperBound; // Range for rotation
3042+
int imm2LowerBound, imm2UpperBound; // Range for index
3043+
HWIntrinsicInfo::lookupImmBounds(intrinsic, simdSize, simdBaseType, 1, &imm1LowerBound, &imm1UpperBound);
3044+
HWIntrinsicInfo::lookupImmBounds(intrinsic, simdSize, simdBaseType, 2, &imm2LowerBound, &imm2UpperBound);
3045+
3046+
argType = JITtype2varType(strip(info.compCompHnd->getArgType(sig, arg5, &argClass)));
3047+
GenTree* op5 = getArgForHWIntrinsic(argType, argClass);
3048+
assert(HWIntrinsicInfo::isImmOp(intrinsic, op5));
3049+
op5 = addRangeCheckIfNeeded(intrinsic, op5, mustExpand, imm1LowerBound, imm1UpperBound);
3050+
3051+
argType = JITtype2varType(strip(info.compCompHnd->getArgType(sig, arg4, &argClass)));
3052+
op4 = getArgForHWIntrinsic(argType, argClass);
3053+
assert(HWIntrinsicInfo::isImmOp(intrinsic, op4));
3054+
op4 = addRangeCheckIfNeeded(intrinsic, op4, mustExpand, imm2LowerBound, imm2UpperBound);
3055+
3056+
argType = JITtype2varType(strip(info.compCompHnd->getArgType(sig, arg3, &argClass)));
3057+
op3 = getArgForHWIntrinsic(argType, argClass);
3058+
argType = JITtype2varType(strip(info.compCompHnd->getArgType(sig, arg2, &argClass)));
3059+
op2 = getArgForHWIntrinsic(argType, argClass);
3060+
argType = JITtype2varType(strip(info.compCompHnd->getArgType(sig, arg1, &argClass)));
3061+
op1 = getArgForHWIntrinsic(argType, argClass);
3062+
3063+
SetOpLclRelatedToSIMDIntrinsic(op1);
3064+
SetOpLclRelatedToSIMDIntrinsic(op2);
3065+
SetOpLclRelatedToSIMDIntrinsic(op3);
3066+
SetOpLclRelatedToSIMDIntrinsic(op4);
3067+
SetOpLclRelatedToSIMDIntrinsic(op5);
3068+
retNode = new (this, GT_HWINTRINSIC) GenTreeHWIntrinsic(retType, getAllocator(CMK_ASTNode), intrinsic,
3069+
simdBaseJitType, simdSize, op1, op2, op3, op4, op5);
3070+
break;
3071+
}
3072+
30073073
default:
30083074
{
30093075
return nullptr;

src/coreclr/jit/hwintrinsiccodegenarm64.cpp

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -265,9 +265,15 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
265265
regNumber op2Reg = REG_NA;
266266
regNumber op3Reg = REG_NA;
267267
regNumber op4Reg = REG_NA;
268+
regNumber op5Reg = REG_NA;
268269

269270
switch (intrin.numOperands)
270271
{
272+
case 5:
273+
assert(intrin.op5 != nullptr);
274+
op5Reg = intrin.op5->GetRegNum();
275+
FALLTHROUGH;
276+
271277
case 4:
272278
assert(intrin.op4 != nullptr);
273279
op4Reg = intrin.op4->GetRegNum();
@@ -2425,6 +2431,60 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
24252431
break;
24262432
}
24272433

2434+
case NI_Sve_MultiplyAddRotateComplexBySelectedScalar:
2435+
{
2436+
assert(isRMW);
2437+
assert(hasImmediateOperand);
2438+
2439+
if (targetReg != op1Reg)
2440+
{
2441+
assert(targetReg != op2Reg);
2442+
assert(targetReg != op3Reg);
2443+
GetEmitter()->emitInsSve_R_R(INS_sve_movprfx, EA_SCALABLE, targetReg, op1Reg);
2444+
}
2445+
2446+
// If both immediates are constant, we don't need a jump table
2447+
if (intrin.op4->IsCnsIntOrI() && intrin.op5->IsCnsIntOrI())
2448+
{
2449+
assert(intrin.op4->isContainedIntOrIImmed() && intrin.op5->isContainedIntOrIImmed());
2450+
GetEmitter()->emitInsSve_R_R_R_I_I(ins, emitSize, targetReg, op2Reg, op3Reg,
2451+
intrin.op4->AsIntCon()->gtIconVal,
2452+
intrin.op5->AsIntCon()->gtIconVal, opt);
2453+
}
2454+
else
2455+
{
2456+
// Use the helper to generate a table. The table can only use a single lookup value, therefore
2457+
// the two immediates index (0 to 1, in op4Reg) and rotation (0 to 3, in op5Reg) must be
2458+
// combined to a single value (0 to 7)
2459+
assert(!intrin.op4->isContainedIntOrIImmed() && !intrin.op5->isContainedIntOrIImmed());
2460+
emitAttr scalarSize = emitActualTypeSize(node->GetSimdBaseType());
2461+
2462+
// Combine the two immediates into op4Reg
2463+
// Shift rotation left to be out of range of index
2464+
GetEmitter()->emitIns_R_R_I(INS_lsl, scalarSize, op5Reg, op5Reg, 1);
2465+
// Combine the two values by ORing
2466+
GetEmitter()->emitIns_R_R_R(INS_orr, scalarSize, op4Reg, op4Reg, op5Reg);
2467+
2468+
// Generate the table using the combined immediate
2469+
HWIntrinsicImmOpHelper helper(this, op4Reg, 0, 7, node);
2470+
for (helper.EmitBegin(); !helper.Done(); helper.EmitCaseEnd())
2471+
{
2472+
// Extract index and rotation from the immediate
2473+
const int value = helper.ImmValue();
2474+
const ssize_t index = value & 1;
2475+
const ssize_t rotation = value >> 1;
2476+
GetEmitter()->emitInsSve_R_R_R_I_I(ins, emitSize, targetReg, op2Reg, op3Reg, index, rotation,
2477+
opt);
2478+
}
2479+
2480+
// Restore the original values in op4Reg and op5Reg
2481+
GetEmitter()->emitIns_R_R_I(INS_and, scalarSize, op4Reg, op4Reg, 1);
2482+
GetEmitter()->emitIns_R_R_I(INS_lsr, scalarSize, op5Reg, op5Reg, 1);
2483+
}
2484+
2485+
break;
2486+
}
2487+
24282488
default:
24292489
unreached();
24302490
}

src/coreclr/jit/hwintrinsiclistarm64sve.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -191,6 +191,7 @@ HARDWARE_INTRINSIC(Sve, MinNumberAcross,
191191
HARDWARE_INTRINSIC(Sve, Multiply, -1, 2, true, {INS_sve_mul, INS_sve_mul, INS_sve_mul, INS_sve_mul, INS_sve_mul, INS_sve_mul, INS_sve_mul, INS_sve_mul, INS_sve_fmul, INS_sve_fmul}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_OptionalEmbeddedMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_LowMaskedOperation)
192192
HARDWARE_INTRINSIC(Sve, MultiplyAdd, -1, -1, false, {INS_sve_mla, INS_sve_mla, INS_sve_mla, INS_sve_mla, INS_sve_mla, INS_sve_mla, INS_sve_mla, INS_sve_mla, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_LowMaskedOperation|HW_Flag_FmaIntrinsic|HW_Flag_SpecialCodeGen)
193193
HARDWARE_INTRINSIC(Sve, MultiplyAddRotateComplex, -1, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_fcmla, INS_sve_fcmla}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_HasImmediateOperand)
194+
HARDWARE_INTRINSIC(Sve, MultiplyAddRotateComplexBySelectedScalar, -1, 5, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_fcmla, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_LowVectorOperation|HW_Flag_HasRMWSemantics|HW_Flag_SpecialCodeGen|HW_Flag_SpecialImport)
194195
HARDWARE_INTRINSIC(Sve, MultiplyBySelectedScalar, -1, 3, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_fmul, INS_sve_fmul}, HW_Category_SIMDByIndexedElement, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_LowVectorOperation)
195196
HARDWARE_INTRINSIC(Sve, MultiplyExtended, -1, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_fmulx, INS_sve_fmulx}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_LowMaskedOperation)
196197
HARDWARE_INTRINSIC(Sve, MultiplySubtract, -1, -1, false, {INS_sve_mls, INS_sve_mls, INS_sve_mls, INS_sve_mls, INS_sve_mls, INS_sve_mls, INS_sve_mls, INS_sve_mls, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_LowMaskedOperation|HW_Flag_FmaIntrinsic|HW_Flag_SpecialCodeGen)

src/coreclr/jit/lowerarmarch.cpp

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3698,6 +3698,18 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node)
36983698
}
36993699
break;
37003700

3701+
case NI_Sve_MultiplyAddRotateComplexBySelectedScalar:
3702+
assert(hasImmediateOperand);
3703+
assert(varTypeIsIntegral(intrin.op4));
3704+
assert(varTypeIsIntegral(intrin.op5));
3705+
// Can only avoid generating a table if both immediates are constant.
3706+
if (intrin.op4->IsCnsIntOrI() && intrin.op5->IsCnsIntOrI())
3707+
{
3708+
MakeSrcContained(node, intrin.op4);
3709+
MakeSrcContained(node, intrin.op5);
3710+
}
3711+
break;
3712+
37013713
default:
37023714
unreached();
37033715
}

0 commit comments

Comments
 (0)