Skip to content

Commit

Permalink
JIT: Optimize struct parameter register accesses in the backend (#110819
Browse files Browse the repository at this point in the history
)

This PR adds an optimization in lowering to utilize the new parameter
register to local mappings added in #110795. The optimization detects IR
that is going to result in stack spills/loads and instead replaces them
with scalar locals that will be able to stay in registers.

Physical promotion benefits especially from this as it creates the kind
of IR that the optimization ends up kicking in for. The heuristics of
physical promotion are updated to account for the fact that the backend
is now able to do this optimization, making physical promotion more
likely to promote struct parameters.
  • Loading branch information
jakobbotsch authored Feb 11, 2025
1 parent 8ae8594 commit 8a22b87
Show file tree
Hide file tree
Showing 13 changed files with 469 additions and 41 deletions.
2 changes: 2 additions & 0 deletions src/coreclr/jit/codegen.h
Original file line number Diff line number Diff line change
Expand Up @@ -444,6 +444,8 @@ class CodeGen final : public CodeGenInterface
void genPopFltRegs(regMaskTP regMask);
regMaskTP genStackAllocRegisterMask(unsigned frameSize, regMaskTP maskCalleeSavedFloat);

regMaskTP genPrespilledUnmappedRegs();

regMaskTP genJmpCallArgMask();

void genFreeLclFrame(unsigned frameSize,
Expand Down
24 changes: 24 additions & 0 deletions src/coreclr/jit/codegenarm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2102,6 +2102,30 @@ regMaskTP CodeGen::genStackAllocRegisterMask(unsigned frameSize, regMaskTP maskC
}
}

//-----------------------------------------------------------------------------------
// genPrespilledUnmappedRegs: Get a mask of the registers that are prespilled
// and also not mapped to any locals.
//
// Returns:
// Mask of those registers. These registers can be used safely in prolog as
// they won't be needed after prespilling.
//
regMaskTP CodeGen::genPrespilledUnmappedRegs()
{
regMaskTP regs = regSet.rsMaskPreSpillRegs(false);

if (compiler->m_paramRegLocalMappings != nullptr)
{
for (int i = 0; i < compiler->m_paramRegLocalMappings->Height(); i++)
{
const ParameterRegisterLocalMapping& mapping = compiler->m_paramRegLocalMappings->BottomRef(i);
regs &= ~mapping.RegisterSegment->GetRegisterMask();
}
}

return regs;
}

//-----------------------------------------------------------------------------------
// instGen_MemoryBarrier: Emit a MemoryBarrier instruction
//
Expand Down
31 changes: 28 additions & 3 deletions src/coreclr/jit/codegencommon.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3411,6 +3411,12 @@ void CodeGen::genHomeRegisterParams(regNumber initReg, bool* initRegStillZeroed)
// top of the underlying registers.
RegGraph graph(compiler);

// Add everything to the graph, or spill directly to stack when needed.
// Note that some registers may be homed in multiple (stack) places.
// Particularly if there is a mapping to a local that does not share its
// (stack) home with the parameter local, in which case we will home it
// both into the parameter local's stack home (if it is used), but also to
// the mapping target.
for (unsigned lclNum = 0; lclNum < compiler->info.compArgsCount; lclNum++)
{
LclVarDsc* lclDsc = compiler->lvaGetDesc(lclNum);
Expand All @@ -3426,11 +3432,26 @@ void CodeGen::genHomeRegisterParams(regNumber initReg, bool* initRegStillZeroed)
const ParameterRegisterLocalMapping* mapping =
compiler->FindParameterRegisterLocalMappingByRegister(segment.GetRegister());

bool spillToBaseLocal = true;
if (mapping != nullptr)
{
genSpillOrAddRegisterParam(mapping->LclNum, mapping->Offset, lclNum, segment, &graph);

// If home is shared with base local, then skip spilling to the
// base local.
if (lclDsc->lvPromoted)
{
spillToBaseLocal = false;
}
}
else

#ifdef TARGET_ARM
// For arm32 the spills to the base local happen as part of
// prespilling sometimes, so skip it in that case.
spillToBaseLocal &= (regSet.rsMaskPreSpillRegs(false) & segment.GetRegisterMask()) == 0;
#endif

if (spillToBaseLocal)
{
genSpillOrAddRegisterParam(lclNum, segment.Offset, lclNum, segment, &graph);
}
Expand Down Expand Up @@ -3915,7 +3936,7 @@ void CodeGen::genCheckUseBlockInit()
// must force spill R4/R5/R6 so that we can use them during
// zero-initialization process.
//
int forceSpillRegCount = genCountBits(maskCalleeRegArgMask & ~regSet.rsMaskPreSpillRegs(false)) - 1;
int forceSpillRegCount = genCountBits(maskCalleeRegArgMask & ~genPrespilledUnmappedRegs()) - 1;
if (forceSpillRegCount > 0)
regSet.rsSetRegsModified(RBM_R4);
if (forceSpillRegCount > 1)
Expand Down Expand Up @@ -5347,7 +5368,7 @@ void CodeGen::genFnProlog()
// These registers will be available to use for the initReg. We just remove
// all of these registers from the rsCalleeRegArgMaskLiveIn.
//
intRegState.rsCalleeRegArgMaskLiveIn &= ~regSet.rsMaskPreSpillRegs(false);
intRegState.rsCalleeRegArgMaskLiveIn &= ~genPrespilledUnmappedRegs();
#endif

/* Choose the register to use for zero initialization */
Expand Down Expand Up @@ -5751,6 +5772,10 @@ void CodeGen::genFnProlog()
#else
genEnregisterOSRArgsAndLocals();
#endif
// OSR functions take no parameters in registers. Ensure no mappings
// are present.
// assert((compiler->m_paramRegLocalMappings == nullptr) || compiler->m_paramRegLocalMappings->Empty());

compiler->lvaUpdateArgsWithInitialReg();
}
else
Expand Down
2 changes: 1 addition & 1 deletion src/coreclr/jit/compiler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10419,7 +10419,7 @@ JITDBGAPI void __cdecl dVN(ValueNum vn)
cVN(JitTls::GetCompiler(), vn);
}

JITDBGAPI void __cdecl dRegMask(regMaskTP mask)
JITDBGAPI void __cdecl dRegMask(const regMaskTP& mask)
{
static unsigned sequenceNumber = 0; // separate calls with a number to indicate this function has been called
printf("===================================================================== dRegMask %u\n", sequenceNumber++);
Expand Down
2 changes: 1 addition & 1 deletion src/coreclr/jit/compiler.h
Original file line number Diff line number Diff line change
Expand Up @@ -4202,7 +4202,7 @@ class Compiler

#ifdef DEBUG
void lvaDumpRegLocation(unsigned lclNum);
void lvaDumpFrameLocation(unsigned lclNum);
void lvaDumpFrameLocation(unsigned lclNum, int minLength);
void lvaDumpEntry(unsigned lclNum, FrameLayoutState curState, size_t refCntWtdWidth = 6);
void lvaTableDump(FrameLayoutState curState = NO_FRAME_LAYOUT); // NO_FRAME_LAYOUT means use the current frame
// layout state defined by lvaDoneFrameLayout
Expand Down
5 changes: 3 additions & 2 deletions src/coreclr/jit/compiler.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -4260,8 +4260,9 @@ inline void Compiler::CLR_API_Leave(API_ICorJitInfo_Names ename)
bool Compiler::fgVarIsNeverZeroInitializedInProlog(unsigned varNum)
{
LclVarDsc* varDsc = lvaGetDesc(varNum);
bool result = varDsc->lvIsParam || lvaIsOSRLocal(varNum) || (varNum == lvaGSSecurityCookie) ||
(varNum == lvaInlinedPInvokeFrameVar) || (varNum == lvaStubArgumentVar) || (varNum == lvaRetAddrVar);
bool result = varDsc->lvIsParam || varDsc->lvIsParamRegTarget || lvaIsOSRLocal(varNum) ||
(varNum == lvaGSSecurityCookie) || (varNum == lvaInlinedPInvokeFrameVar) ||
(varNum == lvaStubArgumentVar) || (varNum == lvaRetAddrVar);

#ifdef TARGET_ARM64
result = result || (varNum == lvaFfrRegister);
Expand Down
20 changes: 15 additions & 5 deletions src/coreclr/jit/lclvars.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4939,8 +4939,8 @@ void Compiler::lvaComputeRefCounts(bool isRecompute, bool setSlotNumbers)
// that was set by past phases.
if (!isRecompute)
{
varDsc->lvSingleDef = varDsc->lvIsParam;
varDsc->lvSingleDefRegCandidate = varDsc->lvIsParam;
varDsc->lvSingleDef = varDsc->lvIsParam || varDsc->lvIsParamRegTarget;
varDsc->lvSingleDefRegCandidate = varDsc->lvIsParam || varDsc->lvIsParamRegTarget;

varDsc->lvAllDefsAreNoGc = (varDsc->lvImplicitlyReferenced == false);
}
Expand Down Expand Up @@ -5033,6 +5033,11 @@ void Compiler::lvaComputeRefCounts(bool isRecompute, bool setSlotNumbers)
varDsc->incRefCnts(BB_UNITY_WEIGHT, this);
}
}
else if (varDsc->lvIsParamRegTarget && (varDsc->lvRefCnt() > 0))
{
varDsc->incRefCnts(BB_UNITY_WEIGHT, this);
varDsc->incRefCnts(BB_UNITY_WEIGHT, this);
}

// If we have JMP, all arguments must have a location
// even if we don't use them inside the method
Expand Down Expand Up @@ -7370,7 +7375,7 @@ void Compiler::lvaDumpRegLocation(unsigned lclNum)
* in its home location.
*/

void Compiler::lvaDumpFrameLocation(unsigned lclNum)
void Compiler::lvaDumpFrameLocation(unsigned lclNum, int minLength)
{
int offset;
regNumber baseReg;
Expand All @@ -7383,7 +7388,12 @@ void Compiler::lvaDumpFrameLocation(unsigned lclNum)
baseReg = EBPbased ? REG_FPBASE : REG_SPBASE;
#endif

printf("[%2s%1s0x%02X] ", getRegName(baseReg), (offset < 0 ? "-" : "+"), (offset < 0 ? -offset : offset));
int printed =
printf("[%2s%1s0x%02X] ", getRegName(baseReg), (offset < 0 ? "-" : "+"), (offset < 0 ? -offset : offset));
if ((printed >= 0) && (printed < minLength))
{
printf("%*s", minLength - printed, "");
}
}

/*****************************************************************************
Expand Down Expand Up @@ -7474,7 +7484,7 @@ void Compiler::lvaDumpEntry(unsigned lclNum, FrameLayoutState curState, size_t r
// location. Otherwise, it's always on the stack.
if (lvaDoneFrameLayout != NO_FRAME_LAYOUT)
{
lvaDumpFrameLocation(lclNum);
lvaDumpFrameLocation(lclNum, (int)strlen("zero-ref "));
}
}
}
Expand Down
Loading

0 comments on commit 8a22b87

Please sign in to comment.