Skip to content

Commit a1bf79e

Browse files
Cloning improvements (#66257)
* Loop cloning improvements Fix various comments * Remove loop cloning var initialization condition Assume that any pre-existing initialization is ok. Check it against zero if necessary. Const inits remain as before. Lots of diffs due to more cloning for cases of `for (i = expression...` where `expression` is not just a constant or local var. * Feedback
1 parent a1f26fb commit a1bf79e

File tree

6 files changed

+118
-180
lines changed

6 files changed

+118
-180
lines changed

src/coreclr/jit/compiler.cpp

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -319,7 +319,6 @@ unsigned totalLoopCount; // counts the total number of natural loops
319319
unsigned totalUnnatLoopCount; // counts the total number of (not-necessarily natural) loops
320320
unsigned totalUnnatLoopOverflows; // # of methods that identified more unnatural loops than we can represent
321321
unsigned iterLoopCount; // counts the # of loops with an iterator (for like)
322-
unsigned simpleTestLoopCount; // counts the # of loops with an iterator and a simple loop condition (iter < const)
323322
unsigned constIterLoopCount; // counts the # of loops with a constant iterator (for like)
324323
bool hasMethodLoops; // flag to keep track if we already counted a method as having loops
325324
unsigned loopsThisMethod; // counts the number of loops in the current method
@@ -1556,7 +1555,6 @@ void Compiler::compShutdown()
15561555
fprintf(fout, "Total number of 'unnatural' loops is %5u\n", totalUnnatLoopCount);
15571556
fprintf(fout, "# of methods overflowing unnat loop limit is %5u\n", totalUnnatLoopOverflows);
15581557
fprintf(fout, "Total number of loops with an iterator is %5u\n", iterLoopCount);
1559-
fprintf(fout, "Total number of loops with a simple iterator is %5u\n", simpleTestLoopCount);
15601558
fprintf(fout, "Total number of loops with a constant iterator is %5u\n", constIterLoopCount);
15611559

15621560
fprintf(fout, "--------------------------------------------------\n");

src/coreclr/jit/compiler.h

Lines changed: 19 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -2498,13 +2498,13 @@ enum LoopFlags : unsigned short
24982498

24992499
// LPFLG_UNUSED = 0x0001,
25002500
// LPFLG_UNUSED = 0x0002,
2501-
LPFLG_ITER = 0x0004, // loop of form: for (i = icon or lclVar; test_condition(); i++)
2501+
LPFLG_ITER = 0x0004, // loop of form: for (i = icon or expression; test_condition(); i++)
25022502
// LPFLG_UNUSED = 0x0008,
25032503

25042504
LPFLG_CONTAINS_CALL = 0x0010, // If executing the loop body *may* execute a call
2505-
LPFLG_VAR_INIT = 0x0020, // iterator is initialized with a local var (var # found in lpVarInit)
2506-
LPFLG_CONST_INIT = 0x0040, // iterator is initialized with a constant (found in lpConstInit)
2507-
LPFLG_SIMD_LIMIT = 0x0080, // iterator is compared with vector element count (found in lpConstLimit)
2505+
// LPFLG_UNUSED = 0x0020,
2506+
LPFLG_CONST_INIT = 0x0040, // iterator is initialized with a constant (found in lpConstInit)
2507+
LPFLG_SIMD_LIMIT = 0x0080, // iterator is compared with vector element count (found in lpConstLimit)
25082508

25092509
LPFLG_VAR_LIMIT = 0x0100, // iterator is compared with a local var (var # found in lpVarLimit)
25102510
LPFLG_CONST_LIMIT = 0x0200, // iterator is compared with a constant (found in lpConstLimit)
@@ -6929,16 +6929,11 @@ class Compiler
69296929

69306930
var_types lpIterOperType() const; // For overflow instructions
69316931

6932-
// Set to the block where we found the initialization for LPFLG_CONST_INIT or LPFLG_VAR_INIT loops.
6932+
// Set to the block where we found the initialization for LPFLG_CONST_INIT loops.
69336933
// Initially, this will be 'head', but 'head' might change if we insert a loop pre-header block.
69346934
BasicBlock* lpInitBlock;
69356935

6936-
union {
6937-
int lpConstInit; // initial constant value of iterator
6938-
// : Valid if LPFLG_CONST_INIT
6939-
unsigned lpVarInit; // initial local var number to which we initialize the iterator
6940-
// : Valid if LPFLG_VAR_INIT
6941-
};
6936+
int lpConstInit; // initial constant value of iterator : Valid if LPFLG_CONST_INIT
69426937

69436938
// The following is for LPFLG_ITER loops only (i.e. the loop condition is "i RELOP const or var")
69446939

@@ -12068,21 +12063,19 @@ extern Histogram bbOneBBSizeTable;
1206812063

1206912064
#if COUNT_LOOPS
1207012065

12071-
extern unsigned totalLoopMethods; // counts the total number of methods that have natural loops
12072-
extern unsigned maxLoopsPerMethod; // counts the maximum number of loops a method has
12073-
extern unsigned totalLoopOverflows; // # of methods that identified more loops than we can represent
12074-
extern unsigned totalLoopCount; // counts the total number of natural loops
12075-
extern unsigned totalUnnatLoopCount; // counts the total number of (not-necessarily natural) loops
12076-
extern unsigned totalUnnatLoopOverflows; // # of methods that identified more unnatural loops than we can represent
12077-
extern unsigned iterLoopCount; // counts the # of loops with an iterator (for like)
12078-
extern unsigned simpleTestLoopCount; // counts the # of loops with an iterator and a simple loop condition (iter <
12079-
// const)
12080-
extern unsigned constIterLoopCount; // counts the # of loops with a constant iterator (for like)
12081-
extern bool hasMethodLoops; // flag to keep track if we already counted a method as having loops
12082-
extern unsigned loopsThisMethod; // counts the number of loops in the current method
12083-
extern bool loopOverflowThisMethod; // True if we exceeded the max # of loops in the method.
12084-
extern Histogram loopCountTable; // Histogram of loop counts
12085-
extern Histogram loopExitCountTable; // Histogram of loop exit counts
12066+
extern unsigned totalLoopMethods; // counts the total number of methods that have natural loops
12067+
extern unsigned maxLoopsPerMethod; // counts the maximum number of loops a method has
12068+
extern unsigned totalLoopOverflows; // # of methods that identified more loops than we can represent
12069+
extern unsigned totalLoopCount; // counts the total number of natural loops
12070+
extern unsigned totalUnnatLoopCount; // counts the total number of (not-necessarily natural) loops
12071+
extern unsigned totalUnnatLoopOverflows; // # of methods that identified more unnatural loops than we can represent
12072+
extern unsigned iterLoopCount; // counts the # of loops with an iterator (for like)
12073+
extern unsigned constIterLoopCount; // counts the # of loops with a constant iterator (for like)
12074+
extern bool hasMethodLoops; // flag to keep track if we already counted a method as having loops
12075+
extern unsigned loopsThisMethod; // counts the number of loops in the current method
12076+
extern bool loopOverflowThisMethod; // True if we exceeded the max # of loops in the method.
12077+
extern Histogram loopCountTable; // Histogram of loop counts
12078+
extern Histogram loopExitCountTable; // Histogram of loop exit counts
1208612079

1208712080
#endif // COUNT_LOOPS
1208812081

src/coreclr/jit/fgdiagnostic.cpp

Lines changed: 2 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -3781,13 +3781,10 @@ void Compiler::fgDebugCheckLoopTable()
37813781
}
37823782

37833783
// Check the flags.
3784-
// Note that the various init/limit flags are only used when LPFLG_ITER is set, but they are set first,
3784+
// Note that the various limit flags are only used when LPFLG_ITER is set, but they are set first,
37853785
// separately, and only if everything works out is LPFLG_ITER set. If LPFLG_ITER is NOT set, the
37863786
// individual flags are not un-set (arguably, they should be).
37873787

3788-
// Only one of the `init` flags can be set.
3789-
assert(genCountBits((unsigned)(loop.lpFlags & (LPFLG_VAR_INIT | LPFLG_CONST_INIT))) <= 1);
3790-
37913788
// Only one of the `limit` flags can be set. (Note that LPFLG_SIMD_LIMIT is a "sub-flag" that can be
37923789
// set when LPFLG_CONST_LIMIT is set.)
37933790
assert(genCountBits((unsigned)(loop.lpFlags & (LPFLG_VAR_LIMIT | LPFLG_CONST_LIMIT | LPFLG_ARRLEN_LIMIT))) <=
@@ -3799,14 +3796,9 @@ void Compiler::fgDebugCheckLoopTable()
37993796
assert(loop.lpFlags & LPFLG_CONST_LIMIT);
38003797
}
38013798

3802-
if (loop.lpFlags & (LPFLG_CONST_INIT | LPFLG_VAR_INIT))
3799+
if (loop.lpFlags & LPFLG_CONST_INIT)
38033800
{
38043801
assert(loop.lpInitBlock != nullptr);
3805-
3806-
if (loop.lpFlags & LPFLG_VAR_INIT)
3807-
{
3808-
assert(loop.lpVarInit < lvaCount);
3809-
}
38103802
}
38113803

38123804
if (loop.lpFlags & LPFLG_ITER)

src/coreclr/jit/loopcloning.cpp

Lines changed: 11 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1038,17 +1038,18 @@ bool Compiler::optDeriveLoopCloningConditions(unsigned loopNum, LoopCloneContext
10381038
if (loop->lpFlags & LPFLG_CONST_INIT)
10391039
{
10401040
// Only allowing non-negative const init at this time.
1041-
// REVIEW: why?
1041+
// This is because the variable initialized with this constant will be used as an array index,
1042+
// and array indices must be non-negative.
10421043
if (loop->lpConstInit < 0)
10431044
{
10441045
JITDUMP("> Init %d is invalid\n", loop->lpConstInit);
10451046
return false;
10461047
}
10471048
}
1048-
else if (loop->lpFlags & LPFLG_VAR_INIT)
1049+
else
10491050
{
1050-
// initVar >= 0
1051-
const unsigned initLcl = loop->lpVarInit;
1051+
// iterVar >= 0
1052+
const unsigned initLcl = loop->lpIterVar();
10521053
if (!genActualTypeIsInt(lvaGetDesc(initLcl)))
10531054
{
10541055
JITDUMP("> Init var V%02u not compatible with TYP_INT\n", initLcl);
@@ -1059,11 +1060,6 @@ bool Compiler::optDeriveLoopCloningConditions(unsigned loopNum, LoopCloneContext
10591060
LC_Expr(LC_Ident(0, LC_Ident::Const)));
10601061
context->EnsureConditions(loopNum)->Push(geZero);
10611062
}
1062-
else
1063-
{
1064-
JITDUMP("> Not variable init\n");
1065-
return false;
1066-
}
10671063

10681064
// Limit Conditions
10691065
LC_Ident ident;
@@ -1096,7 +1092,7 @@ bool Compiler::optDeriveLoopCloningConditions(unsigned loopNum, LoopCloneContext
10961092
ArrIndex* index = new (getAllocator(CMK_LoopClone)) ArrIndex(getAllocator(CMK_LoopClone));
10971093
if (!loop->lpArrLenLimit(this, index))
10981094
{
1099-
JITDUMP("> ArrLen not matching");
1095+
JITDUMP("> ArrLen not matching\n");
11001096
return false;
11011097
}
11021098
ident = LC_Ident(LC_Array(LC_Array::Jagged, index, LC_Array::ArrLen));
@@ -1823,8 +1819,7 @@ void Compiler::optCloneLoop(unsigned loopInd, LoopCloneContext* context)
18231819

18241820
// We're going to transform this loop:
18251821
//
1826-
// H --> E (or, H conditionally branches around the loop and has fall-through to F == T == E)
1827-
// F
1822+
// H --> E (or, H conditionally branches around the loop and has fall-through to T == E)
18281823
// T
18291824
// E
18301825
// B ?-> T
@@ -1833,14 +1828,12 @@ void Compiler::optCloneLoop(unsigned loopInd, LoopCloneContext* context)
18331828
// to this pair of loops:
18341829
//
18351830
// H ?-> H3 (all loop failure conditions branch to new slow path loop head)
1836-
// H2--> E (Optional; if E == T == F, let H fall through to F/T/E)
1837-
// F
1831+
// H2--> E (Optional; if T == E, let H fall through to T/E)
18381832
// T
18391833
// E
18401834
// B ?-> T
18411835
// X2--> X
1842-
// H3 --> E2 (aka slowHead. Or, H3 falls through to F2 == T2 == E2)
1843-
// F2
1836+
// H3 --> E2 (aka slowHead. Or, H3 falls through to T2 == E2)
18441837
// T2
18451838
// E2
18461839
// B2 ?-> T2
@@ -1908,7 +1901,7 @@ void Compiler::optCloneLoop(unsigned loopInd, LoopCloneContext* context)
19081901
BasicBlock* slowHeadPrev = newPred;
19091902

19101903
// Now we'll make "h2", after "h" to go to "e" -- unless the loop is a do-while,
1911-
// so that "h" already falls through to "e" (e == t == f).
1904+
// so that "h" already falls through to "e" (e == t).
19121905
// It might look like this code is unreachable, since "h" must be a BBJ_ALWAYS, but
19131906
// later we will change "h" to a BBJ_COND along with a set of loop conditions.
19141907
// TODO: it still might be unreachable, since cloning currently is restricted to "do-while" loop forms.
@@ -2100,7 +2093,7 @@ void Compiler::optCloneLoop(unsigned loopInd, LoopCloneContext* context)
21002093

21012094
BasicBlock* condLast = optInsertLoopChoiceConditions(context, loopInd, slowHead, h);
21022095

2103-
// Add the fall-through path pred (either to F/T/E for fall-through from conditions to fast path,
2096+
// Add the fall-through path pred (either to T/E for fall-through from conditions to fast path,
21042097
// or H2 if branch to E of fast path).
21052098
assert(condLast->bbJumpKind == BBJ_COND);
21062099
JITDUMP("Adding " FMT_BB " -> " FMT_BB "\n", condLast->bbNum, condLast->bbNext->bbNum);

src/coreclr/jit/loopcloning.h

Lines changed: 14 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -138,45 +138,37 @@ exception occurs.
138138
1. Loop detection has completed and the loop table is populated.
139139
140140
2. The loops that will be considered are the ones with the LPFLG_ITER flag:
141-
"for (i = icon or lclVar; test_condition(); i++)"
141+
"for ( ; test_condition(); i++)"
142142
143143
Limitations
144144
145-
1. For array based optimizations the loop choice condition is checked
146-
before the loop body. This implies that the loop initializer statement
147-
has not executed at the time of the check. So any loop cloning condition
148-
involving the initial value of the loop counter cannot be condition checked
149-
as it hasn't been assigned yet at the time of condition checking. Therefore
150-
the initial value has to be statically known. This can be fixed with further
151-
effort.
152-
153-
2. Loops containing nested exception handling regions are not cloned. (Cloning them
145+
1. Loops containing nested exception handling regions are not cloned. (Cloning them
154146
would require creating new exception handling regions for the cloned loop, which
155147
is "hard".) There are a few other EH-related edge conditions that also cause us to
156148
reject cloning.
157149
158-
3. If the loop contains RETURN blocks, and cloning those would push us over the maximum
150+
2. If the loop contains RETURN blocks, and cloning those would push us over the maximum
159151
number of allowed RETURN blocks in the function (either due to GC info encoding limitations
160152
or otherwise), we reject cloning.
161153
162-
4. Loop increment must be `i += 1`
154+
3. Loop increment must be `i += 1`
163155
164-
5. Loop test must be `i < x` where `x` is a constant, a variable, or `a.Length` for array `a`
156+
4. Loop test must be `i < x` or `i <= x` where `x` is a constant, a variable, or `a.Length` for array `a`
165157
166-
(There is some implementation support for decrementing loops, but it is incomplete.
167-
There is some implementation support for `i <= x` conditions, but it is incomplete
168-
(Compiler::optDeriveLoopCloningConditions() only handles GT_LT conditions))
158+
(There is some implementation support for decrementing loops, but it is incomplete.)
169159
170-
6. Loop must have been converted to a do-while form.
160+
5. Loop must have been converted to a do-while form.
171161
172-
7. There are a few other loop well-formedness conditions.
162+
6. There are a few other loop well-formedness conditions.
173163
174-
8. Multi-dimensional (non-jagged) loop index checking is only partially implemented.
164+
7. Multi-dimensional (non-jagged) loop index checking is only partially implemented.
175165
176-
9. Constant initializations and constant limits must be non-negative (REVIEW: why? The
177-
implementation does use `unsigned` to represent them.)
166+
8. Constant initializations and constant limits must be non-negative. This is because the
167+
iterator variable will be used as an array index, and array indices must be non-negative.
168+
For non-constant (or not found) iterator variable `i` initialization, we add a dynamic check that
169+
`i >= 0`. Constant initializations can be checked statically.
178170
179-
10. The cloned loop (the slow path) is not added to the loop table, meaning certain
171+
9. The cloned loop (the slow path) is not added to the loop table, meaning certain
180172
downstream optimization passes do not see them. See
181173
https://github.com/dotnet/runtime/issues/43713.
182174

0 commit comments

Comments
 (0)