Skip to content

Commit 1d9e50c

Browse files
authored
[RyuJIT] Add "rorx" instruction (BMI2) and emit it instead of "rol" when possible (#41772)
* Use rorx instead of rol when possible
1 parent f4094bf commit 1d9e50c

File tree

3 files changed

+23
-4
lines changed

3 files changed

+23
-4
lines changed

src/coreclr/src/jit/codegenxarch.cpp

Lines changed: 18 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4080,10 +4080,11 @@ void CodeGen::genCodeForShift(GenTree* tree)
40804080

40814081
if (shiftBy->isContainedIntOrIImmed())
40824082
{
4083+
emitAttr size = emitTypeSize(tree);
4084+
40834085
// Optimize "X<<1" to "lea [reg+reg]" or "add reg, reg"
40844086
if (tree->OperIs(GT_LSH) && !tree->gtOverflowEx() && !tree->gtSetFlags() && shiftBy->IsIntegralConst(1))
40854087
{
4086-
emitAttr size = emitTypeSize(tree);
40874088
if (tree->GetRegNum() == operandReg)
40884089
{
40894090
GetEmitter()->emitIns_R_R(INS_add, size, tree->GetRegNum(), operandReg);
@@ -4095,16 +4096,29 @@ void CodeGen::genCodeForShift(GenTree* tree)
40954096
}
40964097
else
40974098
{
4099+
int shiftByValue = (int)shiftBy->AsIntConCommon()->IconValue();
4100+
4101+
#if defined(TARGET_64BIT)
4102+
// Try to emit rorx if BMI2 is available instead of mov+rol
4103+
// it makes sense only for 64bit integers
4104+
if ((genActualType(targetType) == TYP_LONG) && (tree->GetRegNum() != operandReg) &&
4105+
compiler->compOpportunisticallyDependsOn(InstructionSet_BMI2) && tree->OperIs(GT_ROL, GT_ROR) &&
4106+
(shiftByValue > 0) && (shiftByValue < 64))
4107+
{
4108+
const int value = tree->OperIs(GT_ROL) ? (64 - shiftByValue) : shiftByValue;
4109+
GetEmitter()->emitIns_R_R_I(INS_rorx, size, tree->GetRegNum(), operandReg, value);
4110+
genProduceReg(tree);
4111+
return;
4112+
}
4113+
#endif
40984114
// First, move the operand to the destination register and
40994115
// later on perform the shift in-place.
41004116
// (LSRA will try to avoid this situation through preferencing.)
41014117
if (tree->GetRegNum() != operandReg)
41024118
{
41034119
inst_RV_RV(INS_mov, tree->GetRegNum(), operandReg, targetType);
41044120
}
4105-
4106-
int shiftByValue = (int)shiftBy->AsIntConCommon()->IconValue();
4107-
inst_RV_SH(ins, emitTypeSize(tree), tree->GetRegNum(), shiftByValue);
4121+
inst_RV_SH(ins, size, tree->GetRegNum(), shiftByValue);
41084122
}
41094123
}
41104124
else

src/coreclr/src/jit/emitxarch.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -524,6 +524,7 @@ bool TakesRexWPrefix(instruction ins, emitAttr attr)
524524
case INS_mulx:
525525
case INS_pdep:
526526
case INS_pext:
527+
case INS_rorx:
527528
return true;
528529
default:
529530
return false;
@@ -758,6 +759,7 @@ unsigned emitter::emitOutputRexOrVexPrefixIfNeeded(instruction ins, BYTE* dst, c
758759
{
759760
switch (ins)
760761
{
762+
case INS_rorx:
761763
case INS_pdep:
762764
case INS_mulx:
763765
{
@@ -1242,6 +1244,7 @@ bool emitter::emitInsCanOnlyWriteSSE2OrAVXReg(instrDesc* id)
12421244
case INS_pextrq:
12431245
case INS_pextrw:
12441246
case INS_pextrw_sse41:
1247+
case INS_rorx:
12451248
{
12461249
// These SSE instructions write to a general purpose integer register.
12471250
return false;
@@ -14944,6 +14947,7 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins
1494414947
case INS_tzcnt:
1494514948
case INS_popcnt:
1494614949
case INS_crc32:
14950+
case INS_rorx:
1494714951
case INS_pdep:
1494814952
case INS_pext:
1494914953
case INS_addsubps:

src/coreclr/src/jit/instrsxarch.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -594,6 +594,7 @@ INST3(blsr, "blsr", IUM_WR, BAD_CODE, BAD_CODE,
594594
INST3(bextr, "bextr", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xF7), INS_Flags_IsDstDstSrcAVXInstruction) // Bit Field Extract
595595

596596
// BMI2
597+
INST3(rorx, "rorx", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0xF0), INS_FLAGS_None)
597598
INST3(pdep, "pdep", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xF5), INS_Flags_IsDstDstSrcAVXInstruction) // Parallel Bits Deposit
598599
INST3(pext, "pext", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xF5), INS_Flags_IsDstDstSrcAVXInstruction) // Parallel Bits Extract
599600
INST3(bzhi, "bzhi", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xF5), INS_Flags_IsDstDstSrcAVXInstruction) // Zero High Bits Starting with Specified Bit Position

0 commit comments

Comments
 (0)