Skip to content

Commit

Permalink
JIT: Don't reorder handler blocks (#112292)
Browse files Browse the repository at this point in the history
Splitting this out of #112004 to simplify diff triage. The cost of executing a handler region should be dominated by the runtime overhead of exception handling, so I don't think we're losing anything meaningful in not reordering them. In the case of finally regions, if they are sufficiently hot, the JIT should've copied them into the main method region.
  • Loading branch information
amanasifkhalid authored Feb 10, 2025
1 parent 7122090 commit 9b77dd4
Show file tree
Hide file tree
Showing 3 changed files with 75 additions and 111 deletions.
20 changes: 15 additions & 5 deletions src/coreclr/jit/compiler.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -5062,12 +5062,22 @@ void Compiler::fgVisitBlocksInLoopAwareRPO(FlowGraphDfsTree* dfsTree, FlowGraphN
}
};

LoopAwareVisitor visitor(dfsTree, loops, func);

for (unsigned i = dfsTree->GetPostOrderCount(); i != 0; i--)
if (loops->NumLoops() == 0)
{
for (unsigned i = dfsTree->GetPostOrderCount(); i != 0; i--)
{
BasicBlock* const block = dfsTree->GetPostOrder(i - 1);
func(block);
}
}
else
{
BasicBlock* const block = dfsTree->GetPostOrder(i - 1);
visitor.VisitBlock(block);
LoopAwareVisitor visitor(dfsTree, loops, func);
for (unsigned i = dfsTree->GetPostOrderCount(); i != 0; i--)
{
BasicBlock* const block = dfsTree->GetPostOrder(i - 1);
visitor.VisitBlock(block);
}
}
}

Expand Down
155 changes: 57 additions & 98 deletions src/coreclr/jit/fgopt.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4663,120 +4663,86 @@ void Compiler::fgDoReversePostOrderLayout()
}
#endif // DEBUG

// If LSRA didn't create any new blocks, we can reuse its loop-aware RPO traversal,
// which is cached in Compiler::fgBBs.
// If the cache isn't available, we need to recompute the loop-aware RPO.
// If LSRA didn't create any new blocks, we can reuse its flowgraph annotations.
//
BasicBlock** rpoSequence = fgBBs;

if (rpoSequence == nullptr)
if (m_dfsTree == nullptr)
{
assert(m_dfsTree == nullptr);
m_dfsTree = fgComputeDfs</* useProfile */ true>();
FlowGraphNaturalLoops* const loops = FlowGraphNaturalLoops::Find(m_dfsTree);
rpoSequence = new (this, CMK_BasicBlock) BasicBlock*[m_dfsTree->GetPostOrderCount()];
unsigned index = 0;
auto addToSequence = [rpoSequence, &index](BasicBlock* block) {
rpoSequence[index++] = block;
};

fgVisitBlocksInLoopAwareRPO(m_dfsTree, loops, addToSequence);
m_dfsTree = fgComputeDfs</* useProfile */ true>();
m_loops = FlowGraphNaturalLoops::Find(m_dfsTree);
}
else
{
assert(m_dfsTree != nullptr);
assert(m_loops != nullptr);
}

// Fast path: We don't have any EH regions, so just reorder the blocks
//
if (compHndBBtabCount == 0)
{
for (unsigned i = 1; i < m_dfsTree->GetPostOrderCount(); i++)
BasicBlock** const rpoSequence = new (this, CMK_BasicBlock) BasicBlock*[m_dfsTree->GetPostOrderCount()];
unsigned numBlocks = 0;
auto addToSequence = [rpoSequence, &numBlocks](BasicBlock* block) {
// Exclude handler regions from being reordered.
//
if (!block->hasHndIndex())
{
BasicBlock* const block = rpoSequence[i - 1];
BasicBlock* const blockToMove = rpoSequence[i];

if (!block->NextIs(blockToMove))
{
fgUnlinkBlock(blockToMove);
fgInsertBBafter(block, blockToMove);
}
rpoSequence[numBlocks++] = block;
}
};

fgMoveHotJumps</* hasEH */ false>();

return;
}
fgVisitBlocksInLoopAwareRPO(m_dfsTree, m_loops, addToSequence);

// The RPO will break up call-finally pairs, so save them before re-ordering
// Reorder blocks.
//
struct CallFinallyPair
for (unsigned i = 1; i < numBlocks; i++)
{
BasicBlock* callFinally;
BasicBlock* callFinallyRet;
BasicBlock* block = rpoSequence[i - 1];
BasicBlock* const blockToMove = rpoSequence[i];

// Constructor provided so we can call ArrayStack::Emplace
//
CallFinallyPair(BasicBlock* first, BasicBlock* second)
: callFinally(first)
, callFinallyRet(second)
if (block->NextIs(blockToMove))
{
continue;
}
};

ArrayStack<CallFinallyPair> callFinallyPairs(getAllocator());

for (EHblkDsc* const HBtab : EHClauses(this))
{
if (HBtab->HasFinallyHandler())
// Only reorder blocks within the same try region. We don't want to make them non-contiguous.
//
if (!BasicBlock::sameTryRegion(block, blockToMove))
{
for (BasicBlock* const pred : HBtab->ebdHndBeg->PredBlocks())
{
assert(pred->KindIs(BBJ_CALLFINALLY));
if (pred->isBBCallFinallyPair())
{
callFinallyPairs.Emplace(pred, pred->Next());
}
}
continue;
}
}

// Reorder blocks
//
for (unsigned i = 1; i < m_dfsTree->GetPostOrderCount(); i++)
{
BasicBlock* const block = rpoSequence[i - 1];
BasicBlock* const blockToMove = rpoSequence[i];
// Don't move call-finally pair tails independently.
// When we encounter the head, we will move the entire pair.
//
if (blockToMove->isBBCallFinallyPairTail())
{
continue;
}

// Only reorder blocks within the same EH region -- we don't want to make them non-contiguous
// Don't break up call-finally pairs by inserting something in the middle.
//
if (BasicBlock::sameEHRegion(block, blockToMove))
if (block->isBBCallFinallyPair())
{
// Don't reorder EH regions with filter handlers -- we want the filter to come first
//
if (block->hasHndIndex() && ehGetDsc(block->getHndIndex())->HasFilter())
{
continue;
}
block = block->Next();
}

if (!block->NextIs(blockToMove))
{
fgUnlinkBlock(blockToMove);
fgInsertBBafter(block, blockToMove);
}
if (blockToMove->isBBCallFinallyPair())
{
BasicBlock* const callFinallyRet = blockToMove->Next();
fgUnlinkRange(blockToMove, callFinallyRet);
fgMoveBlocksAfter(blockToMove, callFinallyRet, block);
}
else
{
fgUnlinkBlock(blockToMove);
fgInsertBBafter(block, blockToMove);
}
}

// Fix up call-finally pairs
//
for (int i = 0; i < callFinallyPairs.Height(); i++)
if (compHndBBtabCount == 0)
{
const CallFinallyPair& pair = callFinallyPairs.BottomRef(i);
fgUnlinkBlock(pair.callFinallyRet);
fgInsertBBafter(pair.callFinally, pair.callFinallyRet);
fgMoveHotJumps</* hasEH */ false>();
}
else
{
fgMoveHotJumps</* hasEH */ true>();
}

fgMoveHotJumps</* hasEH */ true>();
}

//-----------------------------------------------------------------------------
Expand Down Expand Up @@ -5133,14 +5099,6 @@ void Compiler::ThreeOptLayout::ConsiderEdge(FlowEdge* edge)
return;
}

// Don't waste time reordering within handler regions.
// Note that if a finally region is sufficiently hot,
// we should have cloned it into the main method body already.
if (srcBlk->hasHndIndex() || dstBlk->hasHndIndex())
{
return;
}

// For backward jumps, we will consider partitioning before 'srcBlk'.
// If 'srcBlk' is a BBJ_CALLFINALLYRET, this partition will split up a call-finally pair.
// Thus, don't consider edges out of BBJ_CALLFINALLYRET blocks.
Expand Down Expand Up @@ -5256,7 +5214,8 @@ void Compiler::ThreeOptLayout::Run()
// Initialize the current block order
for (BasicBlock* const block : compiler->Blocks(compiler->fgFirstBB, finalBlock))
{
if (!compiler->m_dfsTree->Contains(block))
// Exclude unreachable blocks and handler blocks from being reordered
if (!compiler->m_dfsTree->Contains(block) || block->hasHndIndex())
{
continue;
}
Expand Down Expand Up @@ -5289,14 +5248,14 @@ void Compiler::ThreeOptLayout::Run()
continue;
}

// Only reorder within EH regions to maintain contiguity.
if (!BasicBlock::sameEHRegion(block, next))
// Only reorder within try regions to maintain contiguity.
if (!BasicBlock::sameTryRegion(block, next))
{
continue;
}

// Don't move the entry of an EH region.
if (compiler->bbIsTryBeg(next) || compiler->bbIsHandlerBeg(next))
// Don't move the entry of a try region.
if (compiler->bbIsTryBeg(next))
{
continue;
}
Expand Down
11 changes: 3 additions & 8 deletions src/coreclr/jit/lsra.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -972,7 +972,7 @@ void LinearScan::setBlockSequence()
FlowGraphDfsTree* const dfsTree = compiler->m_dfsTree;
blockSequence = new (compiler, CMK_LSRA) BasicBlock*[compiler->fgBBcount];

if (compiler->opts.OptimizationEnabled() && dfsTree->HasCycle())
if (compiler->opts.OptimizationEnabled())
{
// Ensure loop bodies are compact in the visitation order.
compiler->m_loops = FlowGraphNaturalLoops::Find(dfsTree);
Expand Down Expand Up @@ -1333,15 +1333,10 @@ PhaseStatus LinearScan::doLinearScan()
compiler->compLSRADone = true;

// If edge resolution didn't create new blocks,
// cache the block sequence so it can be used as an initial layout during block reordering.
if (compiler->fgBBcount == bbSeqCount)
{
compiler->fgBBs = blockSequence;
}
else
// we can reuse the current flowgraph annotations during block layout.
if (compiler->fgBBcount != bbSeqCount)
{
assert(compiler->fgBBcount > bbSeqCount);
compiler->fgBBs = nullptr;
compiler->fgInvalidateDfsTree();
}

Expand Down

0 comments on commit 9b77dd4

Please sign in to comment.