Skip to content

Commit 949c3ec

Browse files
authored
JIT: Allow strength reducing to GCD of IVs (#110222)
This adds support for strength reduction to create a new primary IV that is the GCD of several IVs found in the loop. When the same index is used to access arrays of different sizes we will often see the IV being multiplied by different values; however, it is usually still profitable to strength reduce to the GCD of the step values and then "recover" the final IV by scaling. Example: ```csharp public static void Foo() { string puzzle = "003020600900305001001806400008102900700000008006708200002609500800203009005010300"; int[] board = new int[81]; for (int i = 0; i < puzzle.Length; i++) { board[i] = puzzle[i] - '0'; } } ``` Codegen diff for loop: ```diff xor ecx, ecx + mov edx, 81 G_M24659_IG03: - mov edx, ecx - movzx r8, word ptr [rbx+2*rdx+0x10] + movzx r8, word ptr [rbx+rcx+0x10] add r8d, -48 - mov dword ptr [rax+4*rdx+0x10], r8d - inc ecx - cmp ecx, 81 - jl SHORT G_M24659_IG03 - ;; size=24 bbWeight=3.96 PerfScore 19.80 + mov dword ptr [rax+2*rcx+0x10], r8d + add rcx, 2 + dec edx + jne SHORT G_M24659_IG03 + ;; size=23 bbWeight=3.96 PerfScore 18.81 ``` A similar diff in ``System.Linq.Enumerable+EnumerableSorter`2[System.__Canon,System.Decimal]:ComputeKeys(System.__Canon[],int)``: ```diff + xor edx, edx G_M57524_IG05: - mov edx, r15d - mov r8, gword ptr [rbx+8*rdx+0x10] + mov r8, gword ptr [rbx+rdx+0x10] vmovups xmm0, xmmword ptr [r8+0x20] vmovups xmmword ptr [rsp+0x28], xmm0 - shl rdx, 4 vmovups xmm0, xmmword ptr [rsp+0x28] - vmovups xmmword ptr [r14+rdx+0x10], xmm0 - inc r15d - cmp r13d, r15d - jg SHORT G_M57524_IG05 - ;; size=45 bbWeight=75.73 PerfScore 1079.10 + vmovups xmmword ptr [r14+2*rdx+0x10], xmm0 + add rdx, 8 + dec r13d + jne SHORT G_M57524_IG05 + ;; size=39 bbWeight=75.73 PerfScore 1022.31 ``` Fix #102068 Fix #105241
1 parent bc23f63 commit 949c3ec

File tree

4 files changed

+239
-11
lines changed

4 files changed

+239
-11
lines changed

src/coreclr/jit/gentree.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2140,6 +2140,11 @@ struct GenTree
21402140
gtFlags &= ~GTF_MUL_64RSLT;
21412141
}
21422142

2143+
bool IsPartOfAddressMode()
2144+
{
2145+
return OperIs(GT_ADD, GT_MUL, GT_LSH) && ((gtFlags & GTF_ADDRMODE_NO_CSE) != 0);
2146+
}
2147+
21432148
void SetAllEffectsFlags(GenTree* source)
21442149
{
21452150
SetAllEffectsFlags(source->gtFlags & GTF_ALL_EFFECT);

src/coreclr/jit/inductionvariableopts.cpp

Lines changed: 232 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -938,7 +938,7 @@ bool Compiler::optWidenPrimaryIV(FlowGraphNaturalLoop* loop,
938938
GenTree* initVal;
939939
if (initToConstant)
940940
{
941-
initVal = gtNewIconNode((int64_t)(uint32_t)startConstant, TYP_LONG);
941+
initVal = gtNewLconNode((int64_t)(uint32_t)startConstant);
942942
}
943943
else
944944
{
@@ -1376,6 +1376,16 @@ class StrengthReductionContext
13761376
void AdvanceCursors(ArrayStack<CursorInfo>* cursors, ArrayStack<CursorInfo>* nextCursors);
13771377
void ExpandStoredCursors(ArrayStack<CursorInfo>* cursors, ArrayStack<CursorInfo>* otherCursors);
13781378
bool CheckAdvancedCursors(ArrayStack<CursorInfo>* cursors, ScevAddRec** nextIV);
1379+
ScevAddRec* ComputeRephrasableIV(ScevAddRec* iv1,
1380+
bool allowRephrasingByScalingIV1,
1381+
ScevAddRec* iv2,
1382+
bool allowRephrasingByScalingIV2);
1383+
template <typename T>
1384+
ScevAddRec* ComputeRephrasableIVByScaling(ScevAddRec* iv1,
1385+
bool allowRephrasingByScalingIV1,
1386+
ScevAddRec* iv2,
1387+
bool allowRephrasingByScalingIV2);
1388+
GenTree* RephraseIV(ScevAddRec* iv, ScevAddRec* sourceIV, GenTree* sourceTree);
13791389
bool StaysWithinManagedObject(ArrayStack<CursorInfo>* cursors, ScevAddRec* addRec);
13801390
bool TryReplaceUsesWithNewPrimaryIV(ArrayStack<CursorInfo>* cursors, ScevAddRec* iv);
13811391
BasicBlock* FindUpdateInsertionPoint(ArrayStack<CursorInfo>* cursors, Statement** afterStmt);
@@ -1509,6 +1519,10 @@ bool StrengthReductionContext::TryStrengthReduce()
15091519
break;
15101520
}
15111521

1522+
JITDUMP(" Next IV is: ");
1523+
DBEXEC(VERBOSE, nextIV->Dump(m_comp));
1524+
JITDUMP("\n");
1525+
15121526
assert(nextIV != nullptr);
15131527

15141528
if (varTypeIsGC(nextIV->Type) && !StaysWithinManagedObject(nextCursors, nextIV))
@@ -1950,6 +1964,30 @@ void StrengthReductionContext::ExpandStoredCursors(ArrayStack<CursorInfo>* curso
19501964
}
19511965
}
19521966

1967+
//------------------------------------------------------------------------
1968+
// Gcd: Compute the greatest common divisor of two values.
1969+
//
1970+
// Parameters:
1971+
// a - First value
1972+
// b - Second value
1973+
//
1974+
// Returns:
1975+
// Greatest common divisor.
1976+
//
1977+
template <typename T>
1978+
static T Gcd(T a, T b)
1979+
{
1980+
while (a != 0)
1981+
{
1982+
T newA = b % a;
1983+
T newB = a;
1984+
a = newA;
1985+
b = newB;
1986+
}
1987+
1988+
return b;
1989+
}
1990+
19531991
//------------------------------------------------------------------------
19541992
// CheckAdvancedCursors: Check whether the specified advanced cursors still
19551993
// represent a valid set of cursors to introduce a new primary IV for.
@@ -1963,22 +2001,38 @@ void StrengthReductionContext::ExpandStoredCursors(ArrayStack<CursorInfo>* curso
19632001
// True if all cursors still represent a common derived IV and would be
19642002
// replacable by a new primary IV computing it.
19652003
//
1966-
// Remarks:
1967-
// This function may remove cursors from m_cursors1 and m_cursors2 if it
1968-
// decides to no longer consider some cursors for strength reduction.
1969-
//
19702004
bool StrengthReductionContext::CheckAdvancedCursors(ArrayStack<CursorInfo>* cursors, ScevAddRec** nextIV)
19712005
{
1972-
*nextIV = nullptr;
2006+
*nextIV = nullptr;
2007+
bool allowRephrasingNextIV = true;
19732008

19742009
for (int i = 0; i < cursors->Height(); i++)
19752010
{
19762011
CursorInfo& cursor = cursors->BottomRef(i);
19772012

1978-
if ((cursor.IV != nullptr) && ((*nextIV == nullptr) || Scev::Equals(cursor.IV, *nextIV)))
2013+
if (cursor.IV != nullptr)
19792014
{
1980-
*nextIV = cursor.IV;
1981-
continue;
2015+
bool allowRephrasingViaScaling = true;
2016+
#ifdef TARGET_ARM64
2017+
// On arm64 we break address modes if we have to scale, so disallow that.
2018+
allowRephrasingViaScaling = !cursor.Tree->IsPartOfAddressMode();
2019+
#endif
2020+
2021+
if (*nextIV == nullptr)
2022+
{
2023+
*nextIV = cursor.IV;
2024+
allowRephrasingNextIV = allowRephrasingViaScaling;
2025+
continue;
2026+
}
2027+
2028+
ScevAddRec* rephrasableAddRec =
2029+
ComputeRephrasableIV(cursor.IV, allowRephrasingViaScaling, *nextIV, allowRephrasingNextIV);
2030+
if (rephrasableAddRec != nullptr)
2031+
{
2032+
*nextIV = rephrasableAddRec;
2033+
allowRephrasingNextIV &= allowRephrasingViaScaling;
2034+
continue;
2035+
}
19822036
}
19832037

19842038
JITDUMP(" [%d] does not match; will not advance\n", i);
@@ -1988,6 +2042,174 @@ bool StrengthReductionContext::CheckAdvancedCursors(ArrayStack<CursorInfo>* curs
19882042
return *nextIV != nullptr;
19892043
}
19902044

2045+
//------------------------------------------------------------------------
2046+
// ComputeRephrasableIVWByScaling:
2047+
// Compute an IV that both "iv1" and "iv2" can be rephrased in terms of via
2048+
// scaling, assuming their step values do not match.
2049+
//
2050+
// Parameters:
2051+
// iv1 - First IV
2052+
// iv2 - Second IV
2053+
//
2054+
// Returns:
2055+
// The IV, or nullptr if no IV could be computed.
2056+
//
2057+
template <typename T>
2058+
ScevAddRec* StrengthReductionContext::ComputeRephrasableIVByScaling(ScevAddRec* iv1,
2059+
bool allowRephrasingByScalingIV1,
2060+
ScevAddRec* iv2,
2061+
bool allowRephrasingByScalingIV2)
2062+
{
2063+
// To rephrase the IVs we will need to scale them up. This requires the
2064+
// start value to be 0 since that starting value will be scaled too.
2065+
int64_t start;
2066+
if (!iv1->Start->GetConstantValue(m_comp, &start) || ((T)start != 0) ||
2067+
!iv2->Start->GetConstantValue(m_comp, &start) || ((T)start != 0))
2068+
{
2069+
return nullptr;
2070+
}
2071+
2072+
int64_t iv1Step;
2073+
int64_t iv2Step;
2074+
if (!iv1->Step->GetConstantValue(m_comp, &iv1Step) || !iv2->Step->GetConstantValue(m_comp, &iv2Step))
2075+
{
2076+
return nullptr;
2077+
}
2078+
2079+
T gcd = Gcd((T)iv1Step, (T)iv2Step);
2080+
2081+
if ((!allowRephrasingByScalingIV1 && (gcd != (T)iv1Step)) || (!allowRephrasingByScalingIV2 && (gcd != (T)iv2Step)))
2082+
{
2083+
return nullptr;
2084+
}
2085+
2086+
// Commonly one step value divides the other.
2087+
if (gcd == (T)iv1Step)
2088+
{
2089+
return iv1;
2090+
}
2091+
if (gcd == (T)iv2Step)
2092+
{
2093+
return iv2;
2094+
}
2095+
if ((gcd == 1) || (gcd == -1))
2096+
{
2097+
return nullptr;
2098+
}
2099+
2100+
return m_scevContext.NewAddRec(iv1->Start, m_scevContext.NewConstant(iv1->Type, gcd));
2101+
}
2102+
2103+
//------------------------------------------------------------------------
2104+
// ComputeRephrasableIV:
2105+
// Compute an IV that both "iv1" and "iv2" can be rephrased in terms of.
2106+
//
2107+
// Parameters:
2108+
// iv1 - First IV
2109+
// allowRephrasingByScalingIV1 - Whether we should allow rephrasing IV1 by scaling.
2110+
// iv2 - Second IV
2111+
// allowRephrasingByScalingIV2 - Whether we should allow rephrasing IV2 by scaling.
2112+
//
2113+
// Returns:
2114+
// The IV, or nullptr if no IV could be computed.
2115+
//
2116+
ScevAddRec* StrengthReductionContext::ComputeRephrasableIV(ScevAddRec* iv1,
2117+
bool allowRephrasingByScalingIV1,
2118+
ScevAddRec* iv2,
2119+
bool allowRephrasingByScalingIV2)
2120+
{
2121+
if (!Scev::Equals(iv1->Start, iv2->Start))
2122+
{
2123+
return nullptr;
2124+
}
2125+
2126+
if (Scev::Equals(iv1->Step, iv2->Step))
2127+
{
2128+
return iv1;
2129+
}
2130+
2131+
// Steps are not equal. However, if they have gcd > 1 it is still expected
2132+
// to be profitable to rewrite in terms of such a new IV.
2133+
if (iv1->Type == TYP_INT)
2134+
{
2135+
return ComputeRephrasableIVByScaling<int32_t>(iv1, allowRephrasingByScalingIV1, iv2,
2136+
allowRephrasingByScalingIV2);
2137+
}
2138+
2139+
if (iv1->Type == TYP_LONG)
2140+
{
2141+
return ComputeRephrasableIVByScaling<int64_t>(iv1, allowRephrasingByScalingIV1, iv2,
2142+
allowRephrasingByScalingIV2);
2143+
}
2144+
2145+
return nullptr;
2146+
}
2147+
2148+
//------------------------------------------------------------------------
2149+
// RephraseIV:
2150+
// Given an IV and a source IV with a tree that computes that source IV,
2151+
// compute a tree that calculates "iv" based on the source IV. Requires the
2152+
// source IV to have been computed via ComputeRephrasableIV.
2153+
//
2154+
// Parameters:
2155+
// iv - IV to rephrase in terms of the source IV
2156+
// sourceIV - Source IV
2157+
// sourceTree - Tree computing the source IV
2158+
//
2159+
// Returns:
2160+
// A tree computing "iv" via "sourceTree".
2161+
//
2162+
GenTree* StrengthReductionContext::RephraseIV(ScevAddRec* iv, ScevAddRec* sourceIV, GenTree* sourceTree)
2163+
{
2164+
assert(Scev::Equals(iv->Start, sourceIV->Start));
2165+
2166+
if (Scev::Equals(iv->Step, sourceIV->Step))
2167+
{
2168+
return sourceTree;
2169+
}
2170+
2171+
int64_t ivStep = 0;
2172+
int64_t sourceIVStep = 0;
2173+
if (!iv->Step->GetConstantValue(m_comp, &ivStep) || !sourceIV->Step->GetConstantValue(m_comp, &sourceIVStep))
2174+
{
2175+
unreached();
2176+
}
2177+
2178+
assert(iv->Type == sourceIV->Type);
2179+
2180+
if (iv->Type == TYP_INT)
2181+
{
2182+
assert((int32_t)ivStep % (int32_t)sourceIVStep == 0);
2183+
int32_t scale = (int32_t)ivStep / (int32_t)sourceIVStep;
2184+
if (isPow2(scale))
2185+
{
2186+
return m_comp->gtNewOperNode(GT_LSH, TYP_INT, sourceTree,
2187+
m_comp->gtNewIconNode(BitOperations::Log2((uint32_t)scale)));
2188+
}
2189+
else
2190+
{
2191+
return m_comp->gtNewOperNode(GT_MUL, TYP_INT, sourceTree, m_comp->gtNewIconNode(scale));
2192+
}
2193+
}
2194+
2195+
if (iv->Type == TYP_LONG)
2196+
{
2197+
assert(ivStep % sourceIVStep == 0);
2198+
int64_t scale = ivStep / sourceIVStep;
2199+
if (isPow2(scale))
2200+
{
2201+
return m_comp->gtNewOperNode(GT_LSH, TYP_LONG, sourceTree,
2202+
m_comp->gtNewLconNode(BitOperations::Log2((uint64_t)scale)));
2203+
}
2204+
else
2205+
{
2206+
return m_comp->gtNewOperNode(GT_MUL, TYP_LONG, sourceTree, m_comp->gtNewLconNode(scale));
2207+
}
2208+
}
2209+
2210+
unreached();
2211+
}
2212+
19912213
//------------------------------------------------------------------------
19922214
// StaysWithinManagedObject: Check whether the specified GC-pointer add-rec can
19932215
// be guaranteed to be inside the same managed object for the whole loop.
@@ -2211,6 +2433,7 @@ bool StrengthReductionContext::TryReplaceUsesWithNewPrimaryIV(ArrayStack<CursorI
22112433
{
22122434
CursorInfo& cursor = cursors->BottomRef(i);
22132435
GenTree* newUse = m_comp->gtNewLclVarNode(newPrimaryIV, iv->Type);
2436+
newUse = RephraseIV(cursor.IV, iv, newUse);
22142437

22152438
JITDUMP(" Replacing use [%06u] with [%06u]. Before:\n", Compiler::dspTreeID(cursor.Tree),
22162439
Compiler::dspTreeID(newUse));

src/coreclr/jit/morph.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3402,7 +3402,7 @@ void Compiler::fgMoveOpsLeft(GenTree* tree)
34023402
}
34033403

34043404
// Check for GTF_ADDRMODE_NO_CSE flag on add/mul Binary Operators
3405-
if (((oper == GT_ADD) || (oper == GT_MUL)) && ((tree->gtFlags & GTF_ADDRMODE_NO_CSE) != 0))
3405+
if (tree->IsPartOfAddressMode())
34063406
{
34073407
return;
34083408
}

src/coreclr/jit/optcse.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1825,7 +1825,7 @@ bool CSE_HeuristicCommon::CanConsiderTree(GenTree* tree, bool isReturn)
18251825
case GT_ADD: // Check for ADDRMODE flag on these Binary Operators
18261826
case GT_MUL:
18271827
case GT_LSH:
1828-
if ((tree->gtFlags & GTF_ADDRMODE_NO_CSE) != 0)
1828+
if (tree->IsPartOfAddressMode())
18291829
{
18301830
return false;
18311831
}

0 commit comments

Comments
 (0)