Skip to content

Commit 2cc7fea

Browse files
authored
ARM64 - Consolidate 'msub' and 'madd' logic (#68363)
* Changed GT_MADD to not emit msub * Minor changes * Minor changes * Minor changes * Minor changes * Minor changes * Minor changes * Minor changes * Minor changes * Minor changes * Minor changes * Added break * Added lowering pass * Updates to lowering * Removing morph transformation * Minor format * Removed GT_MSUB. Using simple containment to emit msub. * Removed GT_MSUB case * Removed extra code * Updated comment * Formatting * Minor change * Combining GT_MADD changes * Added extra set flag checks * Added extra set flag checks * Creating helper functions * Fixing build * Fixing build * Fixing build and better codegen * Should produce zero diffs * Formatting * Update lowerarmarch.cpp
1 parent 844b298 commit 2cc7fea

File tree

7 files changed

+225
-149
lines changed

7 files changed

+225
-149
lines changed

src/coreclr/jit/codegen.h

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1379,8 +1379,6 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
13791379
#endif
13801380
#if defined(TARGET_ARM64)
13811381
void genCodeForJumpCompare(GenTreeOp* tree);
1382-
void genCodeForMadd(GenTreeOp* tree);
1383-
void genCodeForMsub(GenTreeOp* tree);
13841382
void genCodeForBfiz(GenTreeOp* tree);
13851383
void genCodeForAddEx(GenTreeOp* tree);
13861384
void genCodeForCond(GenTreeOp* tree);

src/coreclr/jit/codegenarm64.cpp

Lines changed: 51 additions & 81 deletions
Original file line numberDiff line numberDiff line change
@@ -2377,20 +2377,60 @@ void CodeGen::genCodeForMulHi(GenTreeOp* treeNode)
23772377

23782378
// Generate code for ADD, SUB, MUL, DIV, UDIV, AND, AND_NOT, OR and XOR
23792379
// This method is expected to have called genConsumeOperands() before calling it.
2380-
void CodeGen::genCodeForBinary(GenTreeOp* treeNode)
2380+
void CodeGen::genCodeForBinary(GenTreeOp* tree)
23812381
{
2382-
const genTreeOps oper = treeNode->OperGet();
2383-
regNumber targetReg = treeNode->GetRegNum();
2384-
var_types targetType = treeNode->TypeGet();
2382+
const genTreeOps oper = tree->OperGet();
2383+
regNumber targetReg = tree->GetRegNum();
2384+
var_types targetType = tree->TypeGet();
23852385
emitter* emit = GetEmitter();
23862386

2387-
assert(treeNode->OperIs(GT_ADD, GT_SUB, GT_MUL, GT_DIV, GT_UDIV, GT_AND, GT_AND_NOT, GT_OR, GT_XOR));
2387+
assert(tree->OperIs(GT_ADD, GT_SUB, GT_MUL, GT_DIV, GT_UDIV, GT_AND, GT_AND_NOT, GT_OR, GT_XOR));
23882388

2389-
GenTree* op1 = treeNode->gtGetOp1();
2390-
GenTree* op2 = treeNode->gtGetOp2();
2391-
instruction ins = genGetInsForOper(treeNode->OperGet(), targetType);
2389+
GenTree* op1 = tree->gtGetOp1();
2390+
GenTree* op2 = tree->gtGetOp2();
23922391

2393-
if ((treeNode->gtFlags & GTF_SET_FLAGS) != 0)
2392+
// Handles combined operations: 'madd', 'msub'
2393+
if (op2->OperIs(GT_MUL) && op2->isContained())
2394+
{
2395+
// In the future, we might consider enabling this for floating-point "unsafe" math.
2396+
assert(varTypeIsIntegral(tree));
2397+
assert(!(tree->gtFlags & GTF_SET_FLAGS));
2398+
2399+
GenTree* a = op1;
2400+
GenTree* b = op2->gtGetOp1();
2401+
GenTree* c = op2->gtGetOp2();
2402+
2403+
instruction ins;
2404+
switch (oper)
2405+
{
2406+
case GT_ADD:
2407+
{
2408+
// d = a + b * c
2409+
// madd: d, b, c, a
2410+
ins = INS_madd;
2411+
break;
2412+
}
2413+
2414+
case GT_SUB:
2415+
{
2416+
// d = a - b * c
2417+
// msub: d, b, c, a
2418+
ins = INS_msub;
2419+
break;
2420+
}
2421+
2422+
default:
2423+
unreached();
2424+
}
2425+
2426+
emit->emitIns_R_R_R_R(ins, emitActualTypeSize(tree), targetReg, b->GetRegNum(), c->GetRegNum(), a->GetRegNum());
2427+
genProduceReg(tree);
2428+
return;
2429+
}
2430+
2431+
instruction ins = genGetInsForOper(tree->OperGet(), targetType);
2432+
2433+
if ((tree->gtFlags & GTF_SET_FLAGS) != 0)
23942434
{
23952435
switch (oper)
23962436
{
@@ -2414,10 +2454,10 @@ void CodeGen::genCodeForBinary(GenTreeOp* treeNode)
24142454
// The arithmetic node must be sitting in a register (since it's not contained)
24152455
assert(targetReg != REG_NA);
24162456

2417-
regNumber r = emit->emitInsTernary(ins, emitActualTypeSize(treeNode), treeNode, op1, op2);
2457+
regNumber r = emit->emitInsTernary(ins, emitActualTypeSize(tree), tree, op1, op2);
24182458
assert(r == targetReg);
24192459

2420-
genProduceReg(treeNode);
2460+
genProduceReg(tree);
24212461
}
24222462

24232463
//------------------------------------------------------------------------
@@ -10062,76 +10102,6 @@ void CodeGen::instGen_MemoryBarrier(BarrierKind barrierKind)
1006210102
}
1006310103
}
1006410104

10065-
//-----------------------------------------------------------------------------------
10066-
// genCodeForMadd: Emit a madd (Multiply-Add) instruction
10067-
//
10068-
// Arguments:
10069-
// tree - GT_MADD tree where op1 or op2 is GT_ADD
10070-
//
10071-
void CodeGen::genCodeForMadd(GenTreeOp* tree)
10072-
{
10073-
assert(tree->OperIs(GT_MADD) && varTypeIsIntegral(tree) && !(tree->gtFlags & GTF_SET_FLAGS));
10074-
genConsumeOperands(tree);
10075-
10076-
GenTree* a;
10077-
GenTree* b;
10078-
GenTree* c;
10079-
if (tree->gtGetOp1()->OperIs(GT_MUL) && tree->gtGetOp1()->isContained())
10080-
{
10081-
a = tree->gtGetOp1()->gtGetOp1();
10082-
b = tree->gtGetOp1()->gtGetOp2();
10083-
c = tree->gtGetOp2();
10084-
}
10085-
else
10086-
{
10087-
assert(tree->gtGetOp2()->OperIs(GT_MUL) && tree->gtGetOp2()->isContained());
10088-
a = tree->gtGetOp2()->gtGetOp1();
10089-
b = tree->gtGetOp2()->gtGetOp2();
10090-
c = tree->gtGetOp1();
10091-
}
10092-
10093-
bool useMsub = false;
10094-
if (a->OperIs(GT_NEG) && a->isContained())
10095-
{
10096-
a = a->gtGetOp1();
10097-
useMsub = true;
10098-
}
10099-
if (b->OperIs(GT_NEG) && b->isContained())
10100-
{
10101-
b = b->gtGetOp1();
10102-
useMsub = !useMsub; // it's either "a * -b" or "-a * -b" which is the same as "a * b"
10103-
}
10104-
10105-
GetEmitter()->emitIns_R_R_R_R(useMsub ? INS_msub : INS_madd, emitActualTypeSize(tree), tree->GetRegNum(),
10106-
a->GetRegNum(), b->GetRegNum(), c->GetRegNum());
10107-
genProduceReg(tree);
10108-
}
10109-
10110-
//-----------------------------------------------------------------------------------
10111-
// genCodeForMsub: Emit a msub (Multiply-Subtract) instruction
10112-
//
10113-
// Arguments:
10114-
// tree - GT_MSUB tree where op2 is GT_MUL
10115-
//
10116-
void CodeGen::genCodeForMsub(GenTreeOp* tree)
10117-
{
10118-
assert(tree->OperIs(GT_MSUB) && varTypeIsIntegral(tree) && !(tree->gtFlags & GTF_SET_FLAGS));
10119-
genConsumeOperands(tree);
10120-
10121-
assert(tree->gtGetOp2()->OperIs(GT_MUL));
10122-
assert(tree->gtGetOp2()->isContained());
10123-
10124-
GenTree* a = tree->gtGetOp1();
10125-
GenTree* b = tree->gtGetOp2()->gtGetOp1();
10126-
GenTree* c = tree->gtGetOp2()->gtGetOp2();
10127-
10128-
// d = a - b * c
10129-
// MSUB d, b, c, a
10130-
GetEmitter()->emitIns_R_R_R_R(INS_msub, emitActualTypeSize(tree), tree->GetRegNum(), b->GetRegNum(), c->GetRegNum(),
10131-
a->GetRegNum());
10132-
genProduceReg(tree);
10133-
}
10134-
1013510105
//------------------------------------------------------------------------
1013610106
// genCodeForBfiz: Generates the code sequence for a GenTree node that
1013710107
// represents a bitfield insert in zero with sign/zero extension.

src/coreclr/jit/codegenarmarch.cpp

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -302,14 +302,6 @@ void CodeGen::genCodeForTreeNode(GenTree* treeNode)
302302
break;
303303

304304
#ifdef TARGET_ARM64
305-
case GT_MADD:
306-
genCodeForMadd(treeNode->AsOp());
307-
break;
308-
309-
case GT_MSUB:
310-
genCodeForMsub(treeNode->AsOp());
311-
break;
312-
313305
case GT_INC_SATURATE:
314306
genCodeForIncSaturate(treeNode);
315307
break;

src/coreclr/jit/gtlist.h

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -217,10 +217,6 @@ GTNODE(MUL_LONG , GenTreeOp ,1,GTK_BINOP|DBK_NOTHIR)
217217
GTNODE(AND_NOT , GenTreeOp ,0,GTK_BINOP|DBK_NOTHIR)
218218

219219
#ifdef TARGET_ARM64
220-
GTNODE(MADD , GenTreeOp ,0,GTK_BINOP|DBK_NOTHIR) // Generates the Multiply-Add instruction. In the future, we might consider
221-
// enabling it for both armarch and xarch for floating-point MADD "unsafe" math.
222-
GTNODE(MSUB , GenTreeOp ,0,GTK_BINOP|DBK_NOTHIR) // Generates the Multiply-Subtract instruction. In the future, we might consider
223-
// enabling it for both armarch and xarch for floating-point MSUB "unsafe" math.
224220
GTNODE(ADDEX, GenTreeOp ,0,GTK_BINOP|DBK_NOTHIR) // Add with sign/zero extension.
225221
GTNODE(BFIZ , GenTreeOp ,0,GTK_BINOP|DBK_NOTHIR) // Bitfield Insert in Zero.
226222
GTNODE(CSNEG_MI , GenTreeOp ,0,GTK_BINOP|DBK_NOTHIR) // Conditional select, negate, minus result

src/coreclr/jit/lower.cpp

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3010,7 +3010,10 @@ GenTree* Lowering::OptimizeConstCompare(GenTree* cmp)
30103010
#endif // FEATURE_HW_INTRINSICS
30113011
)
30123012
#else // TARGET_ARM64
3013-
op1->OperIs(GT_AND, GT_ADD, GT_SUB)
3013+
op1->OperIs(GT_AND, GT_ADD, GT_SUB) &&
3014+
// This happens in order to emit ARM64 'madd' and 'msub' instructions.
3015+
// We cannot combine 'adds'/'subs' and 'mul'.
3016+
!(op1->gtGetOp2()->OperIs(GT_MUL) && op1->gtGetOp2()->isContained())
30143017
#endif
30153018
)
30163019
{
@@ -5443,10 +5446,22 @@ GenTree* Lowering::LowerAdd(GenTreeOp* node)
54435446
#endif // TARGET_XARCH
54445447
}
54455448

5449+
#ifdef TARGET_ARM64
5450+
if (node->OperIs(GT_ADD))
5451+
{
5452+
GenTree* next = LowerAddForPossibleContainment(node);
5453+
if (next != nullptr)
5454+
{
5455+
return next;
5456+
}
5457+
}
5458+
#endif // TARGET_ARM64
5459+
54465460
if (node->OperIs(GT_ADD))
54475461
{
54485462
ContainCheckBinary(node);
54495463
}
5464+
54505465
return nullptr;
54515466
}
54525467

src/coreclr/jit/lower.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -355,6 +355,7 @@ class Lowering final : public Phase
355355
bool IsValidConstForMovImm(GenTreeHWIntrinsic* node);
356356
void LowerHWIntrinsicFusedMultiplyAddScalar(GenTreeHWIntrinsic* node);
357357
GenTree* LowerModPow2(GenTree* node);
358+
GenTree* LowerAddForPossibleContainment(GenTreeOp* node);
358359
#endif // !TARGET_XARCH && !TARGET_ARM64
359360

360361
union VectorConstant {
@@ -572,6 +573,10 @@ class Lowering final : public Phase
572573
return m_lsra->isContainableMemoryOp(node);
573574
}
574575

576+
#ifdef TARGET_ARM64
577+
bool IsContainableBinaryOp(GenTree* parentNode, GenTree* childNode) const;
578+
#endif // TARGET_ARM64
579+
575580
#ifdef FEATURE_HW_INTRINSICS
576581
// Tries to get a containable node for a given HWIntrinsic
577582
bool TryGetContainableHWIntrinsicOp(GenTreeHWIntrinsic* containingNode,

0 commit comments

Comments
 (0)