diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 6b7dc5ddfd8089..2a2dc567e0b098 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -2233,7 +2233,9 @@ class FlowGraphNaturalLoop bool HasDef(unsigned lclNum); bool CanDuplicate(INDEBUG(const char** reason)); + bool CanDuplicateWithEH(INDEBUG(const char** reason)); void Duplicate(BasicBlock** insertAfter, BlockToBlockMap* map, weight_t weightScale); + void DuplicateWithEH(BasicBlock** insertAfter, BlockToBlockMap* map, weight_t weightScale); bool MayExecuteBlockMultipleTimesPerIteration(BasicBlock* block); @@ -2557,6 +2559,29 @@ struct RelopImplicationInfo bool reverseSense = false; }; +//------------------------------------------------------------------------ +// CloneTryInfo +// +// Describes information needed to clone a try region, and information +// produced by cloning that region +// +struct CloneTryInfo +{ + CloneTryInfo(Compiler* comp); + + // bbID based traits and vector + // + BitVecTraits Traits; + BitVec Visited; + + BlockToBlockMap* Map = nullptr; + jitstd::vector* BlocksToClone = nullptr; + weight_t ProfileScale = 0.0; + unsigned EHIndexShift = 0; + bool AddEdges = false; + bool ScaleOriginalBlockProfile = false; +}; + /* XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX @@ -3001,7 +3026,7 @@ class Compiler void fgRemoveEHTableEntry(unsigned XTnum); - EHblkDsc* fgAddEHTableEntry(unsigned XTnum); + EHblkDsc* fgTryAddEHTableEntries(unsigned XTnum, unsigned count = 1, bool deferAdding = false); void fgSortEHTable(); @@ -5359,6 +5384,10 @@ class Compiler PhaseStatus fgCloneFinally(); + bool fgCanCloneTryRegion(BasicBlock* tryEntry); + + BasicBlock* fgCloneTryRegion(BasicBlock* tryEntry, CloneTryInfo& info, BasicBlock** insertAfter = nullptr); + void fgUpdateACDsBeforeEHTableEntryRemoval(unsigned XTnum); void fgCleanupContinuation(BasicBlock* continuation); @@ -12263,6 +12292,13 @@ class EHClauses assert((m_begin != nullptr) || (m_begin == m_end)); } + EHClauses(Compiler* comp, EHblkDsc* begin) + : m_begin(begin) + , m_end(comp->compHndBBtab + comp->compHndBBtabCount) + { + assert((m_begin != nullptr) || (m_begin == m_end)); + } + iterator begin() const { return iterator(m_begin); diff --git a/src/coreclr/jit/compmemkind.h b/src/coreclr/jit/compmemkind.h index 959176dcc965ad..6de55d070a6e9e 100644 --- a/src/coreclr/jit/compmemkind.h +++ b/src/coreclr/jit/compmemkind.h @@ -65,6 +65,7 @@ CompMemKindMacro(EarlyProp) CompMemKindMacro(ZeroInit) CompMemKindMacro(Pgo) CompMemKindMacro(MaskConversionOpt) +CompMemKindMacro(TryRegionClone) //clang-format on #undef CompMemKindMacro diff --git a/src/coreclr/jit/fgehopt.cpp b/src/coreclr/jit/fgehopt.cpp index bb59ddccf6ecc4..5ed56c2beea0a4 100644 --- a/src/coreclr/jit/fgehopt.cpp +++ b/src/coreclr/jit/fgehopt.cpp @@ -2461,3 +2461,698 @@ PhaseStatus Compiler::fgTailMergeThrows() fgModified = false; return PhaseStatus::MODIFIED_EVERYTHING; } + +//------------------------------------------------------------------------ +// fgCloneTryRegion: clone a try region +// +// Arguments: +// tryEntry -- try entry block +// info -- [in, out] information about the cloning +// insertAfter -- [in, out] pointer to block to insert new blocks after +// +// Returns: +// If insertAfter == nullptr, check if cloning is possible +// return nullptr if not, tryEntry if so +// else +// Return the cloned try entry, or nullptr if cloning failed +// cloned blocks will be created and scaled by profile weight +// and if info.AddEdges is true have proper bbkinds and flow edges +// info data will be updated: +// Map will be modified to contain keys and for the blocks cloned +// Visited will include bits for each newly cloned block +// m_ehRegionShift will describe number of EH regions added +// insertAfter will point at the lexcially last block cloned +// +// Notes: +// * if insertAfter is non null, map must also be non null +// +// * If info.Map is not nullptr, it is not modified unless cloning succeeds +// When cloning succeeds, entries for the try blocks and related blocks +// (handler, filter, callfinally) will be updated; other map entries will +// be left as they were +// +// * the insertion point must be lexically after the original try region +// and be a block in the enclosing region for the original try. +// +// * If cloning and adding edges, +// The new try region entry will not be reachable by any uncloned block. +// The new try region exits will target the same uncloned blocks as the original, +// or as directed by pre-existing map entries. +// +BasicBlock* Compiler::fgCloneTryRegion(BasicBlock* tryEntry, CloneTryInfo& info, BasicBlock** insertAfter) +{ + assert(bbIsTryBeg(tryEntry)); + bool const deferCloning = (insertAfter == nullptr); + assert(deferCloning || ((*insertAfter != nullptr) && (info.Map != nullptr))); + INDEBUG(const char* msg = deferCloning ? "Checking if it is possible to clone" : "Cloning";) + JITDUMP("%s the try region EH#%02u headed by " FMT_BB "\n", msg, tryEntry->getTryIndex(), tryEntry->bbNum); + + // Determine the extent of cloning. + // + // We need to clone to the entire try region plus any + // enclosed regions and any enclosing mutual protect regions, + // plus all the the associated handlers and filters and any + // regions they enclose, plus any callfinallies that follow. + // + // This is necessary because try regions can't have multiple entries, or + // share parts in any meaningful way. + // + CompAllocator alloc = getAllocator(CMK_TryRegionClone); + ArrayStack regionsToProcess(alloc); + unsigned const tryIndex = tryEntry->getTryIndex(); + unsigned numberOfBlocksToClone = 0; + + // Track blocks to clone for caller, or if we are cloning and + // caller doesn't care. + // + jitstd::vector* blocks = info.BlocksToClone; + if (!deferCloning && (blocks == nullptr)) + { + blocks = new (alloc) jitstd::vector(alloc); + } + + unsigned regionCount = 0; + BitVecTraits* const traits = &info.Traits; + BitVec& visited = info.Visited; + BlockToBlockMap* const map = info.Map; + + auto addBlockToClone = [=, &blocks, &visited, &numberOfBlocksToClone](BasicBlock* block, const char* msg) { + if (!BitVecOps::TryAddElemD(traits, visited, block->bbID)) + { + return false; + } + + JITDUMP(" %s block " FMT_BB "\n", msg, block->bbNum); + + numberOfBlocksToClone++; + + if (blocks != nullptr) + { + blocks->push_back(block); + } + return true; + }; + + JITDUMP("==> try EH#%02u\n", tryIndex); + regionsToProcess.Push(tryIndex); + + // Walk through each try region + // + while (regionsToProcess.Height() > 0) + { + regionCount++; + unsigned const regionIndex = regionsToProcess.Pop(); + EHblkDsc* const ebd = ehGetDsc(regionIndex); + JITDUMP("== processing try EH#%02u\n", regionIndex); + + // Walk the try region + // + BasicBlock* const firstTryBlock = ebd->ebdTryBeg; + BasicBlock* const lastTryBlock = ebd->ebdTryLast; + + if (BitVecOps::IsMember(traits, visited, firstTryBlock->bbID)) + { + JITDUMP("already walked try region for EH#%02u\n", regionIndex); + assert(BitVecOps::IsMember(traits, visited, lastTryBlock->bbID)); + } + else + { + JITDUMP("walking try region for EH#%02u\n", regionIndex); + for (BasicBlock* const block : Blocks(firstTryBlock, lastTryBlock)) + { + bool added = addBlockToClone(block, "try region"); + if (bbIsTryBeg(block) && (block != ebd->ebdTryBeg)) + { + assert(added); + JITDUMP("==> found try EH#%02u nested in try EH#%02u region at " FMT_BB "\n", block->getTryIndex(), + regionIndex, block->bbNum); + regionsToProcess.Push(block->getTryIndex()); + } + } + } + + // Walk the callfinally region + // + if (ebd->HasFinallyHandler()) + { + BasicBlock* firstCallFinallyRangeBlock = nullptr; + BasicBlock* lastCallFinallyRangeBlock = nullptr; + ehGetCallFinallyBlockRange(regionIndex, &firstCallFinallyRangeBlock, &lastCallFinallyRangeBlock); + + // Note this range is potentially quite broad... + // Instead perhaps just walk preds of the handler? + // + JITDUMP("walking callfinally region for EH#%02u [" FMT_BB " ... " FMT_BB "]\n", regionIndex, + firstCallFinallyRangeBlock->bbNum, lastCallFinallyRangeBlock->bbNum); + + for (BasicBlock* const block : Blocks(firstCallFinallyRangeBlock, lastCallFinallyRangeBlock)) + { + if (block->KindIs(BBJ_CALLFINALLY) && block->TargetIs(ebd->ebdHndBeg)) + { + addBlockToClone(block, "callfinally"); + } + else if (block->KindIs(BBJ_CALLFINALLYRET) && block->Prev()->TargetIs(ebd->ebdHndBeg)) + { + addBlockToClone(block, "callfinallyret"); + +#if defined(FEATURE_EH_WINDOWS_X86) + + // For non-funclet X86 we must also clone the next block after the callfinallyret. + // (it will contain an END_LFIN) + // + if (!UsesFunclets()) + { + addBlockToClone(block->GetTarget(), "lfin-continuation"); + } +#endif + } + } + } + + // Walk the filter region + // + if (ebd->HasFilter()) + { + BasicBlock* const firstFltBlock = ebd->ebdFilter; + BasicBlock* const lastFltBlock = ebd->BBFilterLast(); + + if (BitVecOps::IsMember(traits, visited, firstFltBlock->bbID)) + { + JITDUMP("already walked filter region for EH#%02u\n", regionIndex); + assert(BitVecOps::IsMember(traits, visited, lastFltBlock->bbID)); + } + else + { + JITDUMP("walking filter region for EH#%02u\n", regionIndex); + for (BasicBlock* const block : Blocks(firstFltBlock, lastFltBlock)) + { + // A filter cannot enclose another EH region + // + assert(!bbIsTryBeg(block)); + addBlockToClone(block, "filter region"); + } + } + } + + // Walk the handler region + // + BasicBlock* const firstHndBlock = ebd->ebdHndBeg; + BasicBlock* const lastHndBlock = ebd->ebdHndLast; + + if (BitVecOps::IsMember(traits, visited, firstHndBlock->bbID)) + { + JITDUMP("already walked handler region for EH#%02u\n", regionIndex); + assert(BitVecOps::IsMember(traits, visited, lastHndBlock->bbID)); + } + else + { + JITDUMP("walking handler region for EH#%02u\n", regionIndex); + for (BasicBlock* const block : Blocks(firstHndBlock, lastHndBlock)) + { + bool added = addBlockToClone(block, "handler region"); + if (bbIsTryBeg(block)) + { + assert(added); + JITDUMP("==> found try entry for EH#%02u nested in handler at " FMT_BB "\n", block->bbNum, + block->getTryIndex()); + regionsToProcess.Push(block->getTryIndex()); + } + } + } + + // If there is an enclosing mutual-protect region, process it as well + // + unsigned const enclosingTryIndex = ebd->ebdEnclosingTryIndex; + if (enclosingTryIndex != EHblkDsc::NO_ENCLOSING_INDEX) + { + EHblkDsc* const enclosingTryEbd = ehGetDsc(enclosingTryIndex); + + if (EHblkDsc::ebdIsSameTry(ebd, enclosingTryEbd)) + { + JITDUMP("==> found mutual-protect try EH#%02u for EH#%02u\n", enclosingTryIndex, regionIndex); + regionsToProcess.Push(enclosingTryIndex); + } + } + + JITDUMP("<== finished try EH#%02u\n", regionIndex); + } + + // Find the outermost mutual-protect try region that begins at tryEntry + // + EHblkDsc* const tryEbd = ehGetDsc(tryIndex); + unsigned outermostTryIndex = tryIndex; + unsigned enclosingTryIndex = EHblkDsc::NO_ENCLOSING_INDEX; + { + EHblkDsc* outermostEbd = ehGetDsc(outermostTryIndex); + while (true) + { + enclosingTryIndex = outermostEbd->ebdEnclosingTryIndex; + if (enclosingTryIndex == EHblkDsc::NO_ENCLOSING_INDEX) + { + break; + } + outermostEbd = ehGetDsc(enclosingTryIndex); + if (!EHblkDsc::ebdIsSameILTry(outermostEbd, tryEbd)) + { + break; + } + outermostTryIndex = enclosingTryIndex; + } + } + + unsigned enclosingHndIndex = EHblkDsc::NO_ENCLOSING_INDEX; + if (tryEntry->hasHndIndex()) + { + enclosingHndIndex = tryEntry->getHndIndex(); + } + + // Now blocks contains an entry for each block to clone. + // + JITDUMP("Will need to clone %u EH regions (outermost: EH#%02u) and %u blocks\n", regionCount, outermostTryIndex, + numberOfBlocksToClone); + + // Allocate the new EH clauses. First, find the enclosing EH clause, if any... + // we will want to allocate the new clauses just "before" this point. + // + // If the region we're cloning is not enclosed, we put it at the end of the table; + // this is cheaper than any other insertion point, as no existing regions get renumbered. + // + unsigned insertBeforeIndex = enclosingTryIndex; + if (insertBeforeIndex == EHblkDsc::NO_ENCLOSING_INDEX) + { + JITDUMP("Cloned EH clauses will go at the end of the EH table\n"); + insertBeforeIndex = compHndBBtabCount; + } + else + { + JITDUMP("Cloned EH clauses will go before enclosing region EH#%02u\n", enclosingTryIndex); + } + + // Once we call fgTryAddEHTableEntries with deferCloning = false, + // all the EH indicies at or above insertBeforeIndex will shift, + // and the EH table may reallocate. + // + EHblkDsc* const clonedOutermostEbd = + fgTryAddEHTableEntries(insertBeforeIndex, regionCount, /* deferAdding */ deferCloning); + + if (clonedOutermostEbd == nullptr) + { + JITDUMP("fgCloneTryRegion: unable to expand EH table\n"); + return nullptr; + } + + if (deferCloning) + { + JITDUMP("fgCloneTryRegion: cloning is possible\n"); + return tryEntry; + } + + // None of the EH regions we're cloning should have been renumbered, + // though their clauses may have been moved to a new table.. + // + EHblkDsc* const oldTryEbd = ehGetDsc(outermostTryIndex); + assert(oldTryEbd->ebdTryBeg == tryEntry); + + // Callers will see enclosing EH region indices shift by this much + // + info.EHIndexShift = regionCount; + + // The EH table now looks like the following, for a middle insertion: + // + // =================== + // EH 0 -- unrelated regions + // ... + // --------------- + // EH x -- innermost region to clone + // ... + // EH x + regionCount - 1 -- outermost region to clone + // --------------- + // --------------- + // EH x + regionCount -- innermost cloned region + // ... + // EH x + 2*regionCount - 1 -- outermost cloned region + // --------------- + // ... + // EH k -- enclosing try / hnd regions (if any), or other regions + // + // =================== + // + // And like this, for an end insertion: + // + // =================== + // EH 0 -- unrelated regions + // ... + // --------------- + // EH x -- innermost region to clone + // ... + // EH x + regionCount - 1 -- outermost region to clone + // --------------- + // ... + // EH k -- unrelated regions + // ... + // --------------- + // EH c -- innermost cloned region + // ... + // EH c + regionCount - 1 -- outermost cloned region + // --------------- + // =================== + // + // So the cloned clauses will have higher indices, and each cloned clause + // should be the same distance from its original, but that distance + // depends on the kind of insertion. + // + // Compute that distance as `indexShift`. + // + unsigned const clonedOutermostRegionIndex = ehGetIndex(clonedOutermostEbd); + assert(clonedOutermostRegionIndex > outermostTryIndex); + unsigned const indexShift = clonedOutermostRegionIndex - outermostTryIndex; + + // Copy over the EH table entries and adjust their enclosing indicies. + // We will adjust the block references below. + // + unsigned const clonedLowestRegionIndex = clonedOutermostRegionIndex - regionCount + 1; + JITDUMP("New EH regions are EH#%02u ... EH#%02u\n", clonedLowestRegionIndex, clonedOutermostRegionIndex); + for (unsigned XTnum = clonedLowestRegionIndex; XTnum <= clonedOutermostRegionIndex; XTnum++) + { + unsigned originalXTnum = XTnum - indexShift; + compHndBBtab[XTnum] = compHndBBtab[originalXTnum]; + EHblkDsc* const ebd = &compHndBBtab[XTnum]; + + // Note the outermost region enclosing indices stay the same, because the original + // clause entries got adjusted when we inserted the new clauses. + // + if (ebd->ebdEnclosingTryIndex != EHblkDsc::NO_ENCLOSING_INDEX) + { + if (XTnum < clonedOutermostRegionIndex) + { + ebd->ebdEnclosingTryIndex += (unsigned short)indexShift; + } + JITDUMP("EH#%02u now enclosed in try EH#%02u\n", XTnum, ebd->ebdEnclosingTryIndex); + } + else + { + JITDUMP("EH#%02u not enclosed in any try\n", XTnum); + } + + if (ebd->ebdEnclosingHndIndex != EHblkDsc::NO_ENCLOSING_INDEX) + { + if (XTnum < clonedOutermostRegionIndex) + { + ebd->ebdEnclosingHndIndex += (unsigned short)indexShift; + } + JITDUMP("EH#%02u now enclosed in handler EH#%02u\n", XTnum, ebd->ebdEnclosingHndIndex); + } + else + { + JITDUMP("EH#%02u not enclosed in any handler\n", XTnum); + } + } + + // Clone the blocks. + // + // All blocks are initially put into the enclosing EH region, and it is not + // extended to cover them all. The step below puts the blocks into the + // appropriate cloned region and fixes up region extents. + // + JITDUMP("Cloning blocks for try...\n"); + for (BasicBlock* const block : *blocks) + { + BasicBlock* const newBlock = fgNewBBafter(BBJ_ALWAYS, *insertAfter, /* extendRegion */ false); + JITDUMP("Adding " FMT_BB " (copy of " FMT_BB ") after " FMT_BB "\n", newBlock->bbNum, block->bbNum, + (*insertAfter)->bbNum); + map->Set(block, newBlock, BlockToBlockMap::SetKind::Overwrite); + BasicBlock::CloneBlockState(this, newBlock, block); + newBlock->scaleBBWeight(info.ProfileScale); + + if (info.ScaleOriginalBlockProfile) + { + weight_t originalScale = max(0.0, 1.0 - info.ProfileScale); + block->scaleBBWeight(originalScale); + } + + *insertAfter = newBlock; + } + JITDUMP("Done cloning blocks for try...\n"); + + // Update the cloned block regions and impacted EH clauses + // + // Here we are assuming that the cloned try is always placed lexically *after* thge + // original, so that if the original try ended at the same point as an enclosing try, + // the new end point of the enclosing try is in the cloned try. + // + JITDUMP("Fixing region indices...\n"); + for (BasicBlock* const block : *blocks) + { + BasicBlock* newBlock = nullptr; + bool found = map->Lookup(block, &newBlock); + assert(found); + + // Update block references in the EH table + // + // `region` is the index of a cloned EH clause that may still refer to `block`. + // Update these block references and those of enclosing regions to refer to `newBlock`. + // + auto updateBlockReferences = [=](unsigned region) { + while (true) + { + EHblkDsc* const ebd = ehGetDsc(region); + + if (ebd->ebdTryBeg == block) + { + ebd->ebdTryBeg = newBlock; + JITDUMP("Try begin for EH#%02u is " FMT_BB "\n", region, newBlock->bbNum); + } + + if (ebd->ebdTryLast == block) + { + fgSetTryEnd(ebd, newBlock); + } + + if (ebd->ebdHndBeg == block) + { + ebd->ebdHndBeg = newBlock; + JITDUMP("Handler begin for EH#%02u is " FMT_BB "\n", region, newBlock->bbNum); + } + + if (ebd->ebdHndLast == block) + { + fgSetHndEnd(ebd, newBlock); + } + + if (ebd->HasFilter() && (ebd->ebdFilter == block)) + { + ebd->ebdFilter = newBlock; + JITDUMP("Filter begin for EH#%02u is " FMT_BB "\n", region, newBlock->bbNum); + } + + bool inTry = false; + region = ehGetEnclosingRegionIndex(region, &inTry); + + if (region == EHblkDsc::NO_ENCLOSING_INDEX) + { + break; + } + } + }; + + // Fix the EH regions for each cloned block, and the block + // references in the EH table entries. + // + // If the block's try index was outside of the original try region + // (say a handler for the try) then it is already properly adjusted. + // + if (block->hasTryIndex()) + { + const unsigned originalTryIndex = block->getTryIndex(); + unsigned cloneTryIndex = originalTryIndex; + + if (originalTryIndex <= outermostTryIndex) + { + cloneTryIndex += indexShift; + } + + EHblkDsc* const originalEbd = ehGetDsc(originalTryIndex); + EHblkDsc* const clonedEbd = ehGetDsc(cloneTryIndex); + newBlock->setTryIndex(cloneTryIndex); + updateBlockReferences(cloneTryIndex); + } + + if (block->hasHndIndex()) + { + const unsigned originalHndIndex = block->getHndIndex(); + + // if (originalHndIndex == + const unsigned cloneHndIndex = originalHndIndex + indexShift; + EHblkDsc* const originalEbd = ehGetDsc(originalHndIndex); + EHblkDsc* const clonedEbd = ehGetDsc(cloneHndIndex); + newBlock->setHndIndex(cloneHndIndex); + updateBlockReferences(cloneHndIndex); + + // Handler and filter entries also have an + // additional artificial reference count. + // + if (bbIsHandlerBeg(newBlock)) + { + newBlock->bbRefs++; + } + } + } + JITDUMP("Done fixing region indices\n"); + + // Redirect any branches within the newly-cloned blocks or + // from cloned blocks to non-cloned blocks + // + if (info.AddEdges) + { + JITDUMP("Adding edges in the newly cloned try\n"); + for (BasicBlock* const block : BlockToBlockMap::KeyIteration(map)) + { + BasicBlock* newBlock = (*map)[block]; + // Jump kind/target should not be set yet + assert(newBlock->KindIs(BBJ_ALWAYS)); + assert(!newBlock->HasInitializedTarget()); + optSetMappedBlockTargets(block, newBlock, map); + } + } + else + { + JITDUMP("Not adding edges in the newly cloned try\n"); + } + + // If the original regions had any ACDs, create equivalent + // ones for the cloned regions + // + if (fgHasAddCodeDscMap()) + { + AddCodeDscMap* const map = fgGetAddCodeDscMap(); + ArrayStack cloned(getAllocator(CMK_TryRegionClone)); + + assert(clonedLowestRegionIndex >= indexShift); + assert(clonedOutermostRegionIndex >= indexShift); + + unsigned const originalLowestRegionIndex = clonedLowestRegionIndex - indexShift; + unsigned const originalOutermostRegionIndex = clonedOutermostRegionIndex - indexShift; + + for (AddCodeDsc* const add : AddCodeDscMap::ValueIteration(map)) + { + bool needsCloningForTry = false; + bool needsCloningForHnd = false; + bool inTry = add->acdTryIndex > 0; + bool inHnd = add->acdHndIndex > 0; + + // acd region numbers are shifted up by one so + // that a value of zero means "not in an EH region" + // + if (inTry) + { + unsigned const trueAcdTryIndex = add->acdTryIndex - 1; + + if ((trueAcdTryIndex >= originalLowestRegionIndex) && (trueAcdTryIndex <= originalOutermostRegionIndex)) + { + needsCloningForTry = true; + } + } + + if (inHnd) + { + unsigned const trueAcdHndIndex = add->acdHndIndex - 1; + + if ((trueAcdHndIndex >= originalLowestRegionIndex) && (trueAcdHndIndex <= originalOutermostRegionIndex)) + { + needsCloningForHnd = true; + } + } + + if (!needsCloningForTry && !needsCloningForHnd) + { + continue; + } + + JITDUMP("Will need to clone: "); + JITDUMPEXEC(add->Dump()); + + AddCodeDsc* clone = new (this, CMK_Unknown) AddCodeDsc; + clone->acdDstBlk = nullptr; + + if (needsCloningForTry) + { + clone->acdTryIndex = (unsigned short)(add->acdTryIndex + indexShift); + } + else if (inTry) + { + clone->acdTryIndex = add->acdTryIndex; + } + else + { + clone->acdTryIndex = 0; + } + + if (needsCloningForHnd) + { + clone->acdHndIndex = (unsigned short)(add->acdHndIndex + indexShift); + } + else if (inHnd) + { + clone->acdHndIndex = add->acdHndIndex; + } + else + { + clone->acdHndIndex = 0; + } + + clone->acdKeyDsg = add->acdKeyDsg; + clone->acdKind = add->acdKind; + clone->acdUsed = false; + +#if !FEATURE_FIXED_OUT_ARGS + clone->acdStkLvl = 0; + clone->acdStkLvlInit = false; +#endif // !FEATURE_FIXED_OUT_ARGS + INDEBUG(clone->acdNum = acdCount++); + cloned.Push(clone); + } + + while (cloned.Height() > 0) + { + AddCodeDsc* const clone = cloned.Pop(); + AddCodeDscKey key(clone); + map->Set(key, clone); + JITDUMP("Added clone: "); + JITDUMPEXEC(clone->Dump()); + } + } + + BasicBlock* const clonedTryEntry = (*map)[tryEntry]; + JITDUMP("Done cloning, cloned try entry is " FMT_BB "\n", clonedTryEntry->bbNum); + return clonedTryEntry; +} + +//------------------------------------------------------------------------ +// fgCanCloneTryRegion: see if a try region can be cloned +// +// Arguments: +// tryEntry - try entry block +// +// Returns: +// true if try region is clonable +// +bool Compiler::fgCanCloneTryRegion(BasicBlock* tryEntry) +{ + assert(bbIsTryBeg(tryEntry)); + + CloneTryInfo info(this); + BasicBlock* const result = fgCloneTryRegion(tryEntry, info); + return result != nullptr; +} + +//------------------------------------------------------------------------ +// CloneTryInfo::CloneTryInfo +// +// Arguments: +// construct an object for cloning a try region +// +CloneTryInfo::CloneTryInfo(Compiler* comp) + : Traits(comp->compBasicBlockID, comp) + , Visited(BitVecOps::MakeEmpty(&Traits)) +{ +} diff --git a/src/coreclr/jit/flowgraph.cpp b/src/coreclr/jit/flowgraph.cpp index 52df8bab32b4f8..91d62de8316b75 100644 --- a/src/coreclr/jit/flowgraph.cpp +++ b/src/coreclr/jit/flowgraph.cpp @@ -1415,10 +1415,15 @@ void Compiler::fgAddSyncMethodEnterExit() // Add the new EH region at the end, since it is the least nested, // and thus should be last. - EHblkDsc* newEntry; - unsigned XTnew = compHndBBtabCount; + EHblkDsc* newEntry = nullptr; + unsigned XTnew = compHndBBtabCount; - newEntry = fgAddEHTableEntry(XTnew); + newEntry = fgTryAddEHTableEntries(XTnew); + + if (newEntry == nullptr) + { + IMPL_LIMITATION("too many exception clauses"); + } // Initialize the new entry @@ -5955,7 +5960,8 @@ bool FlowGraphNaturalLoop::HasDef(unsigned lclNum) // True if the loop can be duplicated. // // Remarks: -// We currently do not support duplicating loops with EH constructs in them. +// Does not support duplicating loops with EH constructs in them. +// (see CanDuplicateWithEH) // bool FlowGraphNaturalLoop::CanDuplicate(INDEBUG(const char** reason)) { @@ -6036,6 +6042,315 @@ void FlowGraphNaturalLoop::Duplicate(BasicBlock** insertAfter, BlockToBlockMap* }); } +//------------------------------------------------------------------------ +// CanDuplicateWithEH: Check if this loop (possibly containing try entries) +// can be duplicated. +// +// Parameters: +// reason - If this function returns false, the reason why. +// +// Returns: +// True if the loop can be duplicated. +// +// Notes: +// Extends CanDuplicate to cover loops with try region entries. +// +bool FlowGraphNaturalLoop::CanDuplicateWithEH(INDEBUG(const char** reason)) +{ +#ifdef DEBUG + const char* localReason; + if (reason == nullptr) + { + reason = &localReason; + } +#endif + + Compiler* comp = m_dfsTree->GetCompiler(); + BasicBlock* const header = GetHeader(); + + ArrayStack tryRegionsToClone(comp->getAllocator(CMK_TryRegionClone)); + + BasicBlockVisit result = VisitLoopBlocks([=, &tryRegionsToClone](BasicBlock* block) { + const bool inSameRegionAsHeader = BasicBlock::sameEHRegion(block, header); + + if (inSameRegionAsHeader) + { + return BasicBlockVisit::Continue; + } + + if (comp->bbIsTryBeg(block)) + { + // Check if this is an "outermost" try within the loop. + // If so, we have more checking to do later on. + // + const bool headerInTry = header->hasTryIndex(); + unsigned blockIndex = block->getTryIndex(); + unsigned outermostBlockIndex = comp->ehTrueEnclosingTryIndexIL(blockIndex); + + if ((headerInTry && (outermostBlockIndex == header->getTryIndex())) || + (!headerInTry && (outermostBlockIndex == EHblkDsc::NO_ENCLOSING_INDEX))) + { + tryRegionsToClone.Push(block); + } + } + + return BasicBlockVisit::Continue; + }); + + // Check any enclosed try regions to make sure they can be cloned + // (note this is potentially misleading with multiple trys as + // we are considering cloning each in isolation). + // + const unsigned numberOfTryRegions = tryRegionsToClone.Height(); + if ((result != BasicBlockVisit::Abort) && (numberOfTryRegions > 0)) + { + // Possibly limit to just 1 region. + // + JITDUMP(FMT_LP " contains %u top-level try region%s\n", GetIndex(), numberOfTryRegions, + numberOfTryRegions > 1 ? "s" : ""); + + while (tryRegionsToClone.Height() > 0) + { + BasicBlock* const tryEntry = tryRegionsToClone.Pop(); + bool const canCloneTry = comp->fgCanCloneTryRegion(tryEntry); + + if (!canCloneTry) + { + INDEBUG(*reason = "Loop contains uncloneable try region"); + result = BasicBlockVisit::Abort; + break; + } + } + } + + return result != BasicBlockVisit::Abort; +} + +//------------------------------------------------------------------------ +// DuplicateWithEH: Duplicate the blocks of this loop, inserting them after `insertAfter`, +// and also fully clone any try regions +// +// Parameters: +// insertAfter - [in, out] Block to insert duplicated blocks after; updated to last block inserted. +// map - A map that will have mappings from loop blocks to duplicated blocks added to it. +// weightScale - Factor to scale weight of new blocks by +// +// Notes: +// Extends Duplicate to cover loops with try region entries. +// +void FlowGraphNaturalLoop::DuplicateWithEH(BasicBlock** insertAfter, BlockToBlockMap* map, weight_t weightScale) +{ + assert(CanDuplicateWithEH(nullptr)); + + Compiler* const comp = m_dfsTree->GetCompiler(); + bool clonedTry = false; + BasicBlock* const insertionPoint = *insertAfter; + + // If the insertion point is within an EH region, remember all the EH regions + // current that end at the insertion point, so we can properly extend them + // when we're done cloning. + // + struct RegionEnd + { + RegionEnd(unsigned regionIndex, BasicBlock* block, bool isTryEnd) + : m_regionIndex(regionIndex) + , m_block(block) + , m_isTryEnd(isTryEnd) + { + } + unsigned m_regionIndex; + BasicBlock* m_block; + bool m_isTryEnd; + }; + + ArrayStack regionEnds(comp->getAllocator(CMK_TryRegionClone)); + + // Record enclosing EH region block references, + // so we can keep track of what the "before" picture looked like. + // + if (insertionPoint->hasTryIndex() || insertionPoint->hasHndIndex()) + { + bool inTry = false; + unsigned region = comp->ehGetMostNestedRegionIndex(insertionPoint, &inTry); + + if (region != 0) + { + // Convert to true region index + region--; + + while (true) + { + EHblkDsc* const ebd = comp->ehGetDsc(region); + + if (inTry) + { + JITDUMP("Noting that enclosing try EH#%02u ends at " FMT_BB "\n", region, ebd->ebdTryLast->bbNum); + regionEnds.Emplace(region, ebd->ebdTryLast, true); + } + else + { + JITDUMP("Noting that enclsoing handler EH#%02u ends at " FMT_BB "\n", region, + ebd->ebdHndLast->bbNum); + regionEnds.Emplace(region, ebd->ebdHndLast, false); + } + + region = comp->ehGetEnclosingRegionIndex(region, &inTry); + + if (region == EHblkDsc::NO_ENCLOSING_INDEX) + { + break; + } + } + } + } + + // Keep track of how much the EH indices change because of EH region cloning. + // + unsigned ehIndexShift = 0; + + // Keep track of which blocks were handled by EH region cloning + // + BitVecTraits traits(comp->compBasicBlockID, comp); + BitVec visited(BitVecOps::MakeEmpty(&traits)); + + VisitLoopBlocksLexical([=, &traits, &visited, &clonedTry, &ehIndexShift](BasicBlock* blk) { + // Try cloning may have already handled this block + // + if (BitVecOps::IsMember(&traits, visited, blk->bbID)) + { + return BasicBlockVisit::Continue; + } + + // If this is a try region entry, clone the entire region now. + // Defer adding edges and extending EH regions until later. + // + // Updates map, and insertAfter. + // + if (comp->bbIsTryBeg(blk)) + { + CloneTryInfo info(comp); + info.Map = map; + info.AddEdges = false; + info.ProfileScale = weightScale; + + BasicBlock* const clonedBlock = comp->fgCloneTryRegion(blk, info, insertAfter); + + assert(clonedBlock != nullptr); + BitVecOps::UnionD(&traits, visited, info.Visited); + ehIndexShift += info.EHIndexShift; + clonedTry = true; + return BasicBlockVisit::Continue; + } + else + { + // We're not expecting to find enclosed EH regions + // + assert(!comp->bbIsTryBeg(blk)); + assert(!comp->bbIsHandlerBeg(blk)); + assert(!BitVecOps::IsMember(&traits, visited, blk->bbID)); + } + + // `blk` was not in loop-enclosed try region or companion region. + // + // Initialize newBlk as BBJ_ALWAYS without jump target; these are fixed up subsequently. + // + // CloneBlockState puts newBlk in the proper EH region. We will fix enclosing region extents + // once cloning is done. + // + BasicBlock* newBlk = comp->fgNewBBafter(BBJ_ALWAYS, *insertAfter, /* extendRegion */ false); + JITDUMP("Adding " FMT_BB " (copy of " FMT_BB ") after " FMT_BB "\n", newBlk->bbNum, blk->bbNum, + (*insertAfter)->bbNum); + BasicBlock::CloneBlockState(comp, newBlk, blk); + + assert(newBlk->bbRefs == 0); + newBlk->scaleBBWeight(weightScale); + map->Set(blk, newBlk, BlockToBlockMap::Overwrite); + *insertAfter = newBlk; + + return BasicBlockVisit::Continue; + }); + + // Note the EH table may have grown, if we cloned try regions. If there was + // an enclosing EH entry, then its EH table entries will have shifted to + // higher index values. + // + // Update the enclosing EH region ends to reflect the new blocks we added. + // (here we assume cloned blocks are placed lexically after their originals, so if a + // region-ending block was cloned, the new region end is the last block cloned). + // + // Note we don't consult the block references in EH table here, since they + // may reflect interim updates to region endpoints (by fgCloneTry). Otherwise + // we could simply call ehUpdateLastBlocks. + // + BasicBlock* const lastClonedBlock = *insertAfter; + + while (regionEnds.Height() > 0) + { + RegionEnd r = regionEnds.Pop(); + EHblkDsc* const ebd = comp->ehGetDsc(r.m_regionIndex + ehIndexShift); + + if (r.m_block == insertionPoint) + { + if (r.m_isTryEnd) + { + comp->fgSetTryEnd(ebd, lastClonedBlock); + } + else + { + comp->fgSetHndEnd(ebd, lastClonedBlock); + } + } + else + { + if (r.m_isTryEnd) + { + comp->fgSetTryEnd(ebd, r.m_block); + } + else + { + comp->fgSetHndEnd(ebd, r.m_block); + } + } + } + + // Now go through the new blocks, remapping their jump targets within the loop + // and updating the preds lists. + // + VisitLoopBlocks([=](BasicBlock* blk) { + BasicBlock* newBlk = nullptr; + bool b = map->Lookup(blk, &newBlk); + assert(b && newBlk != nullptr); + + JITDUMP("Updating targets: " FMT_BB " mapped to " FMT_BB "\n", blk->bbNum, newBlk->bbNum); + + // Jump target should not be set yet + assert(!newBlk->HasInitializedTarget()); + + // Redirect the new block according to "blockMap". + // optSetMappedBlockTargets will set newBlk's successors, and add pred edges for the successors. + comp->optSetMappedBlockTargets(blk, newBlk, map); + + return BasicBlockVisit::Continue; + }); + + // If we cloned any EH regions, we may have some non-loop blocks to process as well. + // + if (clonedTry) + { + for (BasicBlock* const blk : BlockToBlockMap::KeyIteration(map)) + { + if (!ContainsBlock(blk)) + { + BasicBlock* newBlk = nullptr; + bool b = map->Lookup(blk, &newBlk); + assert(b && newBlk != nullptr); + assert(!newBlk->HasInitializedTarget()); + comp->optSetMappedBlockTargets(blk, newBlk, map); + } + } + } +} + //------------------------------------------------------------------------ // MayExecuteBlockMultipleTimesPerIteration: // Check if the loop may execute a particular loop block multiple times for diff --git a/src/coreclr/jit/jitconfigvalues.h b/src/coreclr/jit/jitconfigvalues.h index 7eb36d7be33406..73fd6451c18183 100644 --- a/src/coreclr/jit/jitconfigvalues.h +++ b/src/coreclr/jit/jitconfigvalues.h @@ -59,6 +59,7 @@ CONFIG_INTEGER(JitBreakMorphTree, "JitBreakMorphTree", 0xffffffff) CONFIG_INTEGER(JitBreakOnBadCode, "JitBreakOnBadCode", 0) CONFIG_INTEGER(JitBreakOnMinOpts, "JITBreakOnMinOpts", 0) // Halt if jit switches to MinOpts CONFIG_INTEGER(JitCloneLoops, "JitCloneLoops", 1) // If 0, don't clone. Otherwise clone loops for optimizations. +CONFIG_INTEGER(JitCloneLoopsWithEH, "JitCloneLoopsWithEH", 0) // If 0, don't clone loops containing EH regions CONFIG_INTEGER(JitCloneLoopsWithGdvTests, "JitCloneLoopsWithGdvTests", 1) // If 0, don't clone loops based on // invariant type/method address tests RELEASE_CONFIG_INTEGER(JitCloneLoopsSizeLimit, "JitCloneLoopsSizeLimit", 400) // limit cloning to loops with less @@ -95,6 +96,8 @@ CONFIG_INTEGER(JitUnrollLoopMaxIterationCount, "JitUnrollLoopMaxIterationCount", DEFAULT_UNROLL_LOOP_MAX_ITERATION_COUNT) +CONFIG_INTEGER(JitUnrollLoopsWithEH, "JitUnrollLoopsWithEH", 0) // If 0, don't unroll loops containing EH regions + CONFIG_INTEGER(JitDirectAlloc, "JitDirectAlloc", 0) CONFIG_INTEGER(JitDoubleAlign, "JitDoubleAlign", 1) CONFIG_INTEGER(JitEmitPrintRefRegs, "JitEmitPrintRefRegs", 0) diff --git a/src/coreclr/jit/jiteh.cpp b/src/coreclr/jit/jiteh.cpp index 96bc754d317166..8327f14f633239 100644 --- a/src/coreclr/jit/jiteh.cpp +++ b/src/coreclr/jit/jiteh.cpp @@ -1463,7 +1463,7 @@ void Compiler::fgAllocEHTable() // the maximum number of clauses we will need might be very large. We allocate // twice the number of EH clauses in the IL, which should be good in practice. // In extreme cases, we might need to abandon this and reallocate. See - // fgAddEHTableEntry() for more details. + // fgTryAddEHTableEntries() for more details. #ifdef DEBUG compHndBBtabAllocCount = info.compXcptnsCount; // force the resizing code to hit more frequently in DEBUG @@ -1676,20 +1676,61 @@ void Compiler::fgRemoveEHTableEntry(unsigned XTnum) } } -/***************************************************************************** - * - * Add a single exception table entry at index 'XTnum', [0 <= XTnum <= compHndBBtabCount]. - * If 'XTnum' is compHndBBtabCount, then add the entry at the end. - * Note that this changes the size of the exception table. - * All the blocks referring to the various index values are updated. - * The table entry itself is not filled in. - * Returns a pointer to the new entry. - */ -EHblkDsc* Compiler::fgAddEHTableEntry(unsigned XTnum) +//------------------------------------------------------------------------ +// fgTryAddEHTableEntries: try to add new EH table entries +// +// Arguments: +// XTnum -- new entries will be added before this entry +// (use compHndBBtabCount to add at end) +// count -- number of entries to add +// deferAdding -- if true, don't actually add new entries, just check +// if they can be added; return nullptr if not. +// +// Returns: +// A pointer to the new entry with the highest index, or +// nullptr if the table cannot be expanded to hold the new entries +// +// Notes: +// +// Note that changes the size of the exception table. +// All the blocks referring to the various index values are updated. +// The new table entries are not filled in. +// +// Note mid-table insertions can be expensive as they must walk +// all blocks to update block EH region indices. +// +// If there are active ACDs, these are updated as needed. Callers who +// are making room for cloned EH must take pains to find and clone these +// as well... +// +EHblkDsc* Compiler::fgTryAddEHTableEntries(unsigned XTnum, unsigned count, bool deferAdding) { - assert(UsesFunclets()); + bool reallocate = false; + bool const insert = (XTnum != compHndBBtabCount); + unsigned const newCount = compHndBBtabCount + count; + + if (newCount > MAX_XCPTN_INDEX) + { + // We have run out of indices. Fail. + // + return nullptr; + } + + if (deferAdding) + { + // We can add count entries... + // + return compHndBBtab; + } + + if (newCount > compHndBBtabAllocCount) + { + // We need to reallocate the table + // + reallocate = true; + } - if (XTnum != compHndBBtabCount) + if (insert) { // Update all enclosing links that will get invalidated by inserting an entry at 'XTnum' @@ -1698,12 +1739,12 @@ EHblkDsc* Compiler::fgAddEHTableEntry(unsigned XTnum) if ((xtab->ebdEnclosingTryIndex != EHblkDsc::NO_ENCLOSING_INDEX) && (xtab->ebdEnclosingTryIndex >= XTnum)) { // Update the enclosing scope link - xtab->ebdEnclosingTryIndex++; + xtab->ebdEnclosingTryIndex += (unsigned short)count; } if ((xtab->ebdEnclosingHndIndex != EHblkDsc::NO_ENCLOSING_INDEX) && (xtab->ebdEnclosingHndIndex >= XTnum)) { // Update the enclosing scope link - xtab->ebdEnclosingHndIndex++; + xtab->ebdEnclosingHndIndex += (unsigned short)count; } } @@ -1713,31 +1754,68 @@ EHblkDsc* Compiler::fgAddEHTableEntry(unsigned XTnum) { if (blk->hasTryIndex() && (blk->getTryIndex() >= XTnum)) { - blk->setTryIndex(blk->getTryIndex() + 1); + blk->setTryIndex(blk->getTryIndex() + count); } if (blk->hasHndIndex() && (blk->getHndIndex() >= XTnum)) { - blk->setHndIndex(blk->getHndIndex() + 1); + blk->setHndIndex(blk->getHndIndex() + count); } } - } - // Increase the number of entries in the EH table by one + // Update impacted ACDs + // + if (fgHasAddCodeDscMap()) + { + AddCodeDscMap* const map = fgGetAddCodeDscMap(); + ArrayStack modified(getAllocator(CMK_Unknown)); - if (compHndBBtabCount == compHndBBtabAllocCount) - { - // We need to reallocate the table + for (AddCodeDsc* const add : AddCodeDscMap::ValueIteration(map)) + { + bool isModified = false; + AddCodeDscKey oldKey(add); + + if (add->acdTryIndex > XTnum) + { + add->acdTryIndex += (unsigned short)count; + isModified = true; + } - if (compHndBBtabAllocCount == MAX_XCPTN_INDEX) - { // We're already at the max size for indices to be unsigned short - IMPL_LIMITATION("too many exception clauses"); + if (add->acdHndIndex > XTnum) + { + isModified = true; + add->acdHndIndex += (unsigned short)count; + } + + if (isModified) + { + add->UpdateKeyDesignator(this); + bool const removed = map->Remove(oldKey); + assert(removed); + modified.Push(add); + } + } + + while (modified.Height() > 0) + { + AddCodeDsc* const add = modified.Pop(); + AddCodeDscKey newKey(add); + JITDUMP("ACD%u updated\n", add->acdNum); + map->Set(newKey, add); + JITDUMPEXEC(add->Dump()); + } } + } - // Double the table size. For stress, we could use +1. Note that if the table isn't allocated + // If necessary, increase the number of entries in the EH table + // + if (reallocate) + { + // Roughly double the table size. Note that if the table isn't allocated // yet, such as when we add an EH region for synchronized methods that don't already have one, // we start at zero, so we need to make sure the new table has at least one entry. - unsigned newHndBBtabAllocCount = max(1u, compHndBBtabAllocCount * 2); + // + unsigned newHndBBtabAllocCount = max(1u, compHndBBtabAllocCount + newCount); noway_assert(compHndBBtabAllocCount < newHndBBtabAllocCount); // check for overflow if (newHndBBtabAllocCount > MAX_XCPTN_INDEX) @@ -1745,21 +1823,21 @@ EHblkDsc* Compiler::fgAddEHTableEntry(unsigned XTnum) newHndBBtabAllocCount = MAX_XCPTN_INDEX; // increase to the maximum size we allow } - JITDUMP("*********** fgAddEHTableEntry: increasing EH table size from %d to %d\n", compHndBBtabAllocCount, + JITDUMP("*********** fgTryAddEHTableEntries: increasing EH table size from %d to %d\n", compHndBBtabAllocCount, newHndBBtabAllocCount); compHndBBtabAllocCount = newHndBBtabAllocCount; EHblkDsc* newTable = new (this, CMK_BasicBlock) EHblkDsc[compHndBBtabAllocCount]; - // Move over the stuff before the new entry + // Move over the stuff before the new entries memcpy_s(newTable, compHndBBtabAllocCount * sizeof(*compHndBBtab), compHndBBtab, XTnum * sizeof(*compHndBBtab)); if (XTnum != compHndBBtabCount) { // Move over the stuff after the new entry - memcpy_s(newTable + XTnum + 1, (compHndBBtabAllocCount - XTnum - 1) * sizeof(*compHndBBtab), + memcpy_s(newTable + XTnum + count, (compHndBBtabAllocCount - XTnum - 1) * sizeof(*compHndBBtab), compHndBBtab + XTnum, (compHndBBtabCount - XTnum) * sizeof(*compHndBBtab)); } @@ -1770,18 +1848,18 @@ EHblkDsc* Compiler::fgAddEHTableEntry(unsigned XTnum) } else if (XTnum != compHndBBtabCount) { - // Leave the elements before the new element alone. Move the ones after it, to make space. + // Leave the elements before the new elements alone. Move the ones after it, to make space. EHblkDsc* HBtab = compHndBBtab + XTnum; - memmove_s(HBtab + 1, (compHndBBtabAllocCount - XTnum - 1) * sizeof(*compHndBBtab), HBtab, + memmove_s(HBtab + count, (compHndBBtabAllocCount - XTnum - 1) * sizeof(*compHndBBtab), HBtab, (compHndBBtabCount - XTnum) * sizeof(*compHndBBtab)); } // Now the entry is there, but not filled in - - compHndBBtabCount++; - return compHndBBtab + XTnum; + // + compHndBBtabCount = newCount; + return compHndBBtab + XTnum + (count - 1); } /***************************************************************************** diff --git a/src/coreclr/jit/loopcloning.cpp b/src/coreclr/jit/loopcloning.cpp index e7f34ac90dd188..21c7f9ac789d9a 100644 --- a/src/coreclr/jit/loopcloning.cpp +++ b/src/coreclr/jit/loopcloning.cpp @@ -1888,8 +1888,19 @@ bool Compiler::optIsLoopClonable(FlowGraphNaturalLoop* loop, LoopCloneContext* c return false; } + bool cloneLoopsWithEH = false; + INDEBUG(cloneLoopsWithEH = (JitConfig.JitCloneLoopsWithEH() > 0);) INDEBUG(const char* reason); - if (!loop->CanDuplicate(INDEBUG(&reason))) + + if (cloneLoopsWithEH) + { + if (!loop->CanDuplicateWithEH(INDEBUG(&reason))) + { + JITDUMP("Loop cloning: rejecting loop " FMT_LP ": %s\n", loop->GetIndex(), reason); + return false; + } + } + else if (!loop->CanDuplicate(INDEBUG(&reason))) { JITDUMP("Loop cloning: rejecting loop " FMT_LP ": %s\n", loop->GetIndex(), reason); return false; @@ -2031,6 +2042,9 @@ void Compiler::optCloneLoop(FlowGraphNaturalLoop* loop, LoopCloneContext* contex } #endif + bool cloneLoopsWithEH = false; + INDEBUG(cloneLoopsWithEH = (JitConfig.JitCloneLoopsWithEH() > 0);) + // Determine the depth of the loop, so we can properly weight blocks added (outside the cloned loop blocks). unsigned depth = loop->GetDepth(); weight_t ambientWeight = 1; @@ -2094,19 +2108,57 @@ void Compiler::optCloneLoop(FlowGraphNaturalLoop* loop, LoopCloneContext* contex // loop itself. All failed conditions will branch to the slow preheader. // The slow preheader will unconditionally branch to the slow loop header. // This puts the slow loop in the canonical loop form. + // + // The slow preheader needs to go in the same EH region as the preheader. + // JITDUMP("Create unique preheader for slow path loop\n"); - BasicBlock* slowPreheader = fgNewBBafter(BBJ_ALWAYS, newPred, /*extendRegion*/ true); + const bool extendRegion = BasicBlock::sameEHRegion(bottom, preheader); + BasicBlock* slowPreheader = fgNewBBafter(BBJ_ALWAYS, newPred, extendRegion); JITDUMP("Adding " FMT_BB " after " FMT_BB "\n", slowPreheader->bbNum, newPred->bbNum); slowPreheader->bbWeight = newPred->isRunRarely() ? BB_ZERO_WEIGHT : ambientWeight; slowPreheader->CopyFlags(newPred, (BBF_PROF_WEIGHT | BBF_RUN_RARELY)); slowPreheader->scaleBBWeight(LoopCloneContext::slowPathWeightScaleFactor); + + // If we didn't extend the region above (because the last loop + // block was in some enclosed EH region), put the slow preheader + // into the appropriate region, and make appropriate extent updates. + // + if (!extendRegion) + { + slowPreheader->copyEHRegion(preheader); + bool isTry = false; + unsigned enclosingRegion = ehGetMostNestedRegionIndex(slowPreheader, &isTry); + + if (enclosingRegion != 0) + { + EHblkDsc* const ebd = ehGetDsc(enclosingRegion - 1); + for (EHblkDsc* const HBtab : EHClauses(this, ebd)) + { + if (HBtab->ebdTryLast == bottom) + { + fgSetTryEnd(HBtab, slowPreheader); + } + if (HBtab->ebdHndLast == bottom) + { + fgSetHndEnd(HBtab, slowPreheader); + } + } + } + } newPred = slowPreheader; // Now we'll clone the blocks of the loop body. These cloned blocks will be the slow path. BlockToBlockMap* blockMap = new (getAllocator(CMK_LoopClone)) BlockToBlockMap(getAllocator(CMK_LoopClone)); - loop->Duplicate(&newPred, blockMap, LoopCloneContext::slowPathWeightScaleFactor); + if (cloneLoopsWithEH) + { + loop->DuplicateWithEH(&newPred, blockMap, LoopCloneContext::slowPathWeightScaleFactor); + } + else + { + loop->Duplicate(&newPred, blockMap, LoopCloneContext::slowPathWeightScaleFactor); + } // Scale old blocks to the fast path weight. loop->VisitLoopBlocks([=](BasicBlock* block) { diff --git a/src/coreclr/jit/optimizer.cpp b/src/coreclr/jit/optimizer.cpp index 3c819ceb293dd7..c2eae010428a01 100644 --- a/src/coreclr/jit/optimizer.cpp +++ b/src/coreclr/jit/optimizer.cpp @@ -586,6 +586,8 @@ void Compiler::optSetMappedBlockTargets(BasicBlock* blk, BasicBlock* newBlk, Blo case BBJ_CALLFINALLY: case BBJ_CALLFINALLYRET: case BBJ_LEAVE: + case BBJ_EHCATCHRET: + case BBJ_EHFILTERRET: { FlowEdge* newEdge; @@ -707,16 +709,6 @@ void Compiler::optSetMappedBlockTargets(BasicBlock* blk, BasicBlock* newBlk, Blo break; } - case BBJ_EHCATCHRET: - case BBJ_EHFILTERRET: - { - // newBlk's jump target should not need to be redirected - assert(!redirectMap->Lookup(blk->GetTarget(), &newTarget)); - FlowEdge* newEdge = fgAddRefPred(newBlk->GetTarget(), newBlk); - newBlk->SetKindAndTargetEdge(blk->GetKind(), newEdge); - break; - } - default: // blk doesn't have a jump destination assert(blk->NumSucc() == 0); @@ -1514,8 +1506,19 @@ bool Compiler::optTryUnrollLoop(FlowGraphNaturalLoop* loop, bool* changedIR) } // clang-format on + bool unrollLoopsWithEH = false; + INDEBUG(unrollLoopsWithEH = (JitConfig.JitUnrollLoopsWithEH() > 0);) INDEBUG(const char* reason); - if (!loop->CanDuplicate(INDEBUG(&reason))) + + if (unrollLoopsWithEH) + { + if (!loop->CanDuplicateWithEH(INDEBUG(&reason))) + { + JITDUMP("Failed to unroll loop " FMT_LP ": %s\n", loop->GetIndex(), reason); + return false; + } + } + else if (!loop->CanDuplicate(INDEBUG(&reason))) { JITDUMP("Failed to unroll loop " FMT_LP ": %s\n", loop->GetIndex(), reason); return false; @@ -1526,6 +1529,7 @@ bool Compiler::optTryUnrollLoop(FlowGraphNaturalLoop* loop, bool* changedIR) *changedIR = true; // Heuristic: Estimated cost in code size of the unrolled loop. + // TODO: duplication cost is higher if there is EH... ClrSafeInt loopCostSz; // Cost is size of one iteration @@ -1622,7 +1626,15 @@ bool Compiler::optTryUnrollLoop(FlowGraphNaturalLoop* loop, bool* changedIR) // and we might not have upscaled at all, if we had profile data. // weight_t scaleWeight = 1.0 / BB_LOOP_WEIGHT_SCALE; - loop->Duplicate(&insertAfter, &blockMap, scaleWeight); + + if (unrollLoopsWithEH) + { + loop->DuplicateWithEH(&insertAfter, &blockMap, scaleWeight); + } + else + { + loop->Duplicate(&insertAfter, &blockMap, scaleWeight); + } // Replace all uses of the loop iterator with the current value. loop->VisitLoopBlocks([=, &blockMap](BasicBlock* block) { diff --git a/src/tests/JIT/opt/Cloning/loops_with_eh.cs b/src/tests/JIT/opt/Cloning/loops_with_eh.cs new file mode 100644 index 00000000000000..ca0328e78bdaf4 --- /dev/null +++ b/src/tests/JIT/opt/Cloning/loops_with_eh.cs @@ -0,0 +1,941 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System; +using System.Runtime.CompilerServices; +using Xunit; + + +// Cheat codes +// +// L - loop +// TC - try catch (catch exits loop) +// TfC - try filter catch +// TF - try finally +// x - has padding between loop head and try entry +// c - catch continues loop +// m - multiple try exits (TF will remain a try finally) +// g - giant finally (TF will remain try finally) +// p - regions are serial, not nested +// +// x: we currently cannot clone loops where the try is the first thing +// as the header and preheader are different regions + +public class LoopsWithEH +{ + static int[] data; + static int n; + + static LoopsWithEH() + { + data = new int[100]; + for (int i = 0; i < data.Length; i++) + { + data[i] = i; + } + + n = data[20]; + } + + [Fact] + public static int Test_LTC() => Sum_LTC(data, n) - 90; + + public static int Sum_LTC(int[] data, int n) + { + int sum = 0; + for (int i = 0; i < n; i++) + { + try + { + sum += data[i]; + } + catch (Exception) + { + return -1; + } + } + return sum; + } + + [Fact] + public static int Test_LTfC() => Sum_LTfC(data, n) - 90; + + public static int Sum_LTfC(int[] data, int n) + { + int sum = 0; + for (int i = 0; i < n; i++) + { + try + { + sum += data[i]; + } + catch (Exception) when (n > 0) + { + return -1; + } + } + return sum; + } + + [Fact] + public static int Test_LxTC() => Sum_LxTC(data, n) - 110; + + public static int Sum_LxTC(int[] data, int n) + { + int sum = 0; + for (int i = 0; i < n; i++) + { + sum += 1; + try + { + sum += data[i]; + } + catch (Exception) + { + return -1; + } + } + return sum; + } + + [Fact] + public static int Test_LxTCc() => Sum_LxTCc(data, n) - 110; + + public static int Sum_LxTCc(int[] data, int n) + { + int sum = 0; + for (int i = 0; i < n; i++) + { + sum += 1; + try + { + sum += data[i]; + } + catch (Exception) + { + sum += 1; + } + } + return sum; + } + + [Fact] + public static int Test_LxTfC() => Sum_LxTfC(data, n) - 110; + + public static int Sum_LxTfC(int[] data, int n) + { + int sum = 0; + for (int i = 0; i < n; i++) + { + sum += 1; + try + { + sum += data[i]; + } + catch (Exception) when (n > 0) + { + return -1; + } + } + return sum; + } + + [Fact] + public static int Test_LxTfCc() => Sum_LxTfCc(data, n) - 110; + + public static int Sum_LxTfCc(int[] data, int n) + { + int sum = 0; + for (int i = 0; i < n; i++) + { + sum += 1; + try + { + sum += data[i]; + } + catch (Exception) when (n > 0) + { + sum += 1; + } + } + return sum; + } + + [Fact] + public static int Test_LxTCC() => Sum_LxTCC(data, n) - 110; + + public static int Sum_LxTCC(int[] data, int n) + { + int sum = 0; + for (int i = 0; i < n; i++) + { + sum += 1; + try + { + sum += data[i]; + } + catch (IndexOutOfRangeException) + { + return -1; + } + catch(Exception) + { + return -2; + } + } + return sum; + } + + [Fact] + public static int Test_LxTCcC() => Sum_LxTCcC(data, n) - 110; + + public static int Sum_LxTCcC(int[] data, int n) + { + int sum = 0; + for (int i = 0; i < n; i++) + { + sum += 1; + try + { + sum += data[i]; + } + catch (IndexOutOfRangeException) + { + sum +=1; + } + catch (Exception) + { + return -2; + } + } + return sum; + } + + [Fact] + public static int Test_LxTCCc() => Sum_LxTCCc(data, n) - 110; + + public static int Sum_LxTCCc(int[] data, int n) + { + int sum = 0; + for (int i = 0; i < n; i++) + { + sum += 1; + try + { + sum += data[i]; + } + catch (IndexOutOfRangeException) + { + return -1; + } + catch (Exception) + { + sum += 2; + } + } + return sum; + } + + [Fact] + public static int Test_LxTCcCc() => Sum_LxTCcCc(data, n) - 110; + + public static int Sum_LxTCcCc(int[] data, int n) + { + int sum = 0; + for (int i = 0; i < n; i++) + { + sum += 1; + try + { + sum += data[i]; + } + catch (IndexOutOfRangeException) + { + sum += 1; + } + catch (Exception) + { + sum += 2; + } + } + return sum; + } + + [Fact] + public static int Test_LxTCpTC() => Sum_LxTCpTC(data, n) - 300; + + public static int Sum_LxTCpTC(int[] data, int n) + { + int sum = 0; + for (int i = 0; i < n; i++) + { + sum += 1; + try + { + sum += data[i]; + } + catch (Exception) + { + return -1; + } + + try + { + sum += data[i]; + } + catch (Exception) + { + return -2; + } + + } + return sum; + } + + [Fact] + public static int Test_LxTCcpTC() => Sum_LxTCcpTC(data, n) - 300; + + public static int Sum_LxTCcpTC(int[] data, int n) + { + int sum = 0; + for (int i = 0; i < n; i++) + { + sum += 1; + try + { + sum += data[i]; + } + catch (Exception) + { + sum += 1; + } + + try + { + sum += data[i]; + } + catch (Exception) + { + return -2; + } + + } + return sum; + } + + [Fact] + public static int Test_LxTCpTCc() => Sum_LxTCpTCc(data, n) - 300; + + public static int Sum_LxTCpTCc(int[] data, int n) + { + int sum = 0; + for (int i = 0; i < n; i++) + { + sum += 1; + try + { + sum += data[i]; + } + catch (Exception) + { + return -1; + } + + try + { + sum += data[i]; + } + catch (Exception) + { + sum += 1; + } + + } + return sum; + } + + [Fact] + public static int Test_LxTCcpTCc() => Sum_LxTCcpTCc(data, n) - 300; + + public static int Sum_LxTCcpTCc(int[] data, int n) + { + int sum = 0; + for (int i = 0; i < n; i++) + { + sum += 1; + try + { + sum += data[i]; + } + catch (Exception) + { + sum += 2; + } + + try + { + sum += data[i]; + } + catch (Exception) + { + sum += 1; + } + + } + return sum; + } + + [Fact] + public static int Test_LxTF() => Sum_LxTF(data, n) - 130; + + public static int Sum_LxTF(int[] data, int n) + { + int sum = 0; + for (int i = 0; i < n; i++) + { + sum += 1; + try + { + sum += data[i]; + } + finally + { + sum += 1; + } + } + return sum; + } + + [Fact] + public static int Test_LxTFm() => Sum_LxTFm(data, n) - 1; + + public static int Sum_LxTFm(int[] data, int n) + { + int sum = 0; + for (int i = 0; i < n; i++) + { + sum += 1; + try + { + sum += data[i]; + + if (sum > 100) return 101; + } + finally + { + sum += 1; + } + } + return sum; + } + + [Fact] + public static int Test_LxTFg() => Sum_LxTFg(data, n) - 1; + + public static int Sum_LxTFg(int[] data, int n) + { + int sum = 0; + for (int i = 0; i < n; i++) + { + sum += 1; + try + { + sum += data[i]; + + if (sum > 100) return 101; + } + finally + { + sum += 1; sum *= 4; sum -= 1; sum /= 4; + sum += 1; sum *= 4; sum -= 1; sum /= 4; + sum += 1; sum *= 4; sum -= 1; sum /= 4; + sum += 1; sum *= 4; sum -= 1; sum /= 4; + sum += 1; sum *= 4; sum -= 1; sum /= 4; + sum += 1; sum *= 4; sum -= 1; sum /= 4; + sum += 1; sum *= 4; sum -= 1; sum /= 4; + sum += 1; sum *= 4; sum -= 1; sum /= 4; + } + } + return sum; + } + + [Fact] + public static int Test_TCLxTC() => Sum_TCLxTC(data, n) - 110; + + public static int Sum_TCLxTC(int[] data, int n) + { + int sum = 0; + try + { + for (int i = 0; i < n; i++) + { + sum += 1; + try + { + sum += data[i]; + } + catch (Exception) + { + return -1; + } + } + } + catch (Exception) + { + return -1; + } + return sum; + } + + [Fact] + public static int Test_TCLxTCc() => Sum_TCLxTCc(data, n) - 110; + + public static int Sum_TCLxTCc(int[] data, int n) + { + int sum = 0; + try + { + for (int i = 0; i < n; i++) + { + sum += 1; + try + { + sum += data[i]; + } + catch (Exception) + { + sum += 1; + } + } + } + catch (Exception) + { + return -1; + } + return sum; + } + + [Fact] + public static int Test_TCLxTfC() => Sum_TCLxTfC(data, n) - 110; + + public static int Sum_TCLxTfC(int[] data, int n) + { + int sum = 0; + try + { + for (int i = 0; i < n; i++) + { + sum += 1; + try + { + sum += data[i]; + } + catch (Exception) when (n > 0) + { + return -1; + } + } + } + catch (Exception) + { + return -1; + } + return sum; + } + + [Fact] + public static int Test_TfCLxTC() => Sum_TfCLxTC(data, n) - 110; + + public static int Sum_TfCLxTC(int[] data, int n) + { + int sum = 0; + try + { + for (int i = 0; i < n; i++) + { + sum += 1; + try + { + sum += data[i]; + } + catch (Exception) + { + return -1; + } + } + } + catch (Exception) when (n > 0) + { + return -1; + } + return sum; + } + + [Fact] + public static int Test_TfCLxTCc() => Sum_TfCLxTCc(data, n) - 110; + + public static int Sum_TfCLxTCc(int[] data, int n) + { + int sum = 0; + try + { + for (int i = 0; i < n; i++) + { + sum += 1; + try + { + sum += data[i]; + } + catch (Exception) + { + sum += 1; + } + } + } + catch (Exception) when (n > 0) + { + return -1; + } + return sum; + } + + [Fact] + public static int Test_TfCLxTfC() => Sum_TfCLxTfC(data, n) - 110; + + public static int Sum_TfCLxTfC(int[] data, int n) + { + int sum = 0; + try + { + for (int i = 0; i < n; i++) + { + sum += 1; + try + { + sum += data[i]; + } + catch (Exception) when (n > 0) + { + return -1; + } + } + } + catch (Exception) when (n > 0) + { + return -1; + } + return sum; + } + + [Fact] + public static int Test_TCLxTF() => Sum_TCLxTF(data, n) - 130; + + public static int Sum_TCLxTF(int[] data, int n) + { + int sum = 0; + try + { + for (int i = 0; i < n; i++) + { + sum += 1; + try + { + sum += data[i]; + } + finally + { + sum += 1; + } + } + } + catch (Exception) + { + return -1; + } + return sum; + } + + [Fact] + public static int Test_LxTCTF() => Sum_LxTCTF(data, n) - 130; + + public static int Sum_LxTCTF(int[] data, int n) + { + int sum = 0; + + for (int i = 0; i < n; i++) + { + sum += 1; + try + { + try + { + sum += data[i]; + } + finally + { + sum += 1; + } + } + catch (Exception) + { + return -1; + } + } + + return sum; + } + + [Fact] + public static int Test_LxTCcTF() => Sum_LxTCcTF(data, n) - 130; + + public static int Sum_LxTCcTF(int[] data, int n) + { + int sum = 0; + + for (int i = 0; i < n; i++) + { + sum += 1; + try + { + try + { + sum += data[i]; + } + finally + { + sum += 1; + } + } + catch (Exception) + { + sum += 2; + } + } + + return sum; + } + + [Fact] + public static int Test_LxTFTC() => Sum_LxTFTC(data, n) - 130; + + public static int Sum_LxTFTC(int[] data, int n) + { + int sum = 0; + + for (int i = 0; i < n; i++) + { + sum += 1; + try + { + try + { + sum += data[i]; + } + catch (Exception) + { + return -1; + } + } + finally + { + sum += 1; + } + } + + return sum; + } + + [Fact] + public static int Test_LxTFTCc() => Sum_LxTFTCc(data, n) - 130; + + public static int Sum_LxTFTCc(int[] data, int n) + { + int sum = 0; + + for (int i = 0; i < n; i++) + { + sum += 1; + try + { + try + { + sum += data[i]; + } + catch (Exception) + { + sum += 2; + } + } + finally + { + sum += 1; + } + } + + return sum; + } + + [Fact] + public static int Test_LxTFTF() => Sum_LxTFTF(data, n) - 110; + + public static int Sum_LxTFTF(int[] data, int n) + { + int sum = 0; + + for (int i = 0; i < n; i++) + { + sum += 1; + try + { + try + { + sum += data[i]; + } + finally + { + sum += -1; + } + } + finally + { + sum += 1; + } + } + + return sum; + } + + [Fact] + public static int Test_LxTFxTF() => Sum_LxTFTF(data, n) - 110; + + public static int Sum_TFLxTF(int[] data, int n) + { + int sum = 0; + try + { + for (int i = 0; i < n; i++) + { + sum += 1; + try + { + sum += data[i]; + } + finally + { + sum += 1; + } + } + } + finally + { + sum += 1; + } + return sum; + } + + [Fact] + public static int Test_TFTFLxTF() => Sum_TFTFLxTF(data, n) - 132; + + public static int Sum_TFTFLxTF(int[] data, int n) + { + int sum = 0; + try + { + try + { + for (int i = 0; i < n; i++) + { + sum += 1; + try + { + sum += data[i]; + } + finally + { + sum += 1; + } + } + } + finally + { + sum += 1; + } + } + finally + { + sum += 1; + } + return sum; + } + + [Fact] + public static int Test_TCTFLxTF() => Sum_TCTFLxTF(data, n) - 131; + + public static int Sum_TCTFLxTF(int[] data, int n) + { + int sum = 0; + try + { + try + { + for (int i = 0; i < n; i++) + { + sum += 1; + try + { + sum += data[i]; + } + finally + { + sum += 1; + } + } + } + finally + { + sum += 1; + } + } + catch(Exception) + { + return -1; + } + return sum; + } + + [Fact] + public static int Test_TFTCLxTF() => Sum_TCTFLxTF(data, n) - 131; + + public static int Sum_TFTCLxTF(int[] data, int n) + { + int sum = 0; + try + { + try + { + for (int i = 0; i < n; i++) + { + sum += 1; + try + { + sum += data[i]; + } + finally + { + sum += 1; + } + } + } + catch (Exception) + { + return -1; + } + } + finally + { + sum += 1; + } + return sum; + } +} + diff --git a/src/tests/JIT/opt/Cloning/loops_with_eh.csproj b/src/tests/JIT/opt/Cloning/loops_with_eh.csproj new file mode 100644 index 00000000000000..f071d60bc864f0 --- /dev/null +++ b/src/tests/JIT/opt/Cloning/loops_with_eh.csproj @@ -0,0 +1,17 @@ + + + + true + + + None + True + + + + true + + + + +