From bf5e96ffb3801b31c2027d93d056d01f46c46799 Mon Sep 17 00:00:00 2001 From: Carol Eidt Date: Thu, 5 Dec 2019 09:38:13 -0800 Subject: [PATCH 01/17] Combine free and busy allocation Perform register allocation for a given RefPosition with a single traversal of the registers, whether free or busy. --- src/coreclr/src/jit/lsra.cpp | 2295 +++++++++-------- src/coreclr/src/jit/lsra.h | 185 +- src/coreclr/src/jit/lsraarm64.cpp | 4 +- src/coreclr/src/jit/lsrabuild.cpp | 87 +- src/coreclr/src/jit/lsraxarch.cpp | 26 +- src/coreclr/src/jit/target.h | 2 +- .../JitBlue/GitHub_13735/GitHub_13735.cs | 62 + .../JitBlue/GitHub_13735/GitHub_13735.csproj | 12 + 8 files changed, 1551 insertions(+), 1122 deletions(-) create mode 100644 src/coreclr/tests/src/JIT/Regression/JitBlue/GitHub_13735/GitHub_13735.cs create mode 100644 src/coreclr/tests/src/JIT/Regression/JitBlue/GitHub_13735/GitHub_13735.csproj diff --git a/src/coreclr/src/jit/lsra.cpp b/src/coreclr/src/jit/lsra.cpp index bd94968c20376e..734fadd8f86fdc 100644 --- a/src/coreclr/src/jit/lsra.cpp +++ b/src/coreclr/src/jit/lsra.cpp @@ -273,6 +273,75 @@ regMaskTP LinearScan::allSIMDRegs() return availableFloatRegs; } +void LinearScan::updateNextFixedRef(RegRecord* regRecord, RefPosition* nextRefPosition) +{ + LsraLocation nextLocation; + + if (nextRefPosition == nullptr) + { + nextLocation = MaxLocation; + } + else + { + nextLocation = nextRefPosition->nodeLocation; + } + nextFixedRef[regRecord->regNum] = nextLocation; +} + +void LinearScan::clearNextIntervalRef(regNumber reg, var_types regType) +{ + nextIntervalRef[reg] = MaxLocation; +#ifdef TARGET_ARM + if (regType == TYP_DOUBLE) + { + assert(genIsValidDoubleReg(reg)); + regNumber otherReg = REG_NEXT(reg); + nextIntervalRef[otherReg] = MaxLocation; + } +#endif +} + +void LinearScan::clearSpillCost(regNumber reg, var_types regType) +{ + spillCost[reg] = 0; +#ifdef TARGET_ARM + if (regType == TYP_DOUBLE) + { + assert(genIsValidDoubleReg(reg)); + regNumber otherReg = REG_NEXT(reg); + spillCost[otherReg] = 0; + } +#endif +} + +void LinearScan::updateNextIntervalRef(regNumber reg, Interval* interval) +{ + LsraLocation nextRefLocation = interval->getNextRefLocation(); + nextIntervalRef[reg] = nextRefLocation; +#ifdef TARGET_ARM + if (interval->registerType == TYP_DOUBLE) + { + regNumber otherReg = REG_NEXT(reg); + nextIntervalRef[otherReg] = nextRefLocation; + } +#endif +} + +void LinearScan::updateSpillCost(regNumber reg, Interval* interval) +{ + // An interval can have no recentRefPosition if this is the initial assignment + // of a parameter to its home register. + unsigned int weight = (interval->recentRefPosition != nullptr) ? getWeight(interval->recentRefPosition) : 0; + spillCost[reg] = weight; +#ifdef TARGET_ARM + if (interval->registerType == TYP_DOUBLE) + { + regNumber otherReg = REG_NEXT(reg); + spillCost[otherReg] = weight; + } +#endif +} + //------------------------------------------------------------------------ // internalFloatRegCandidates: Return the set of registers that are appropriate // for use as internal float registers. @@ -299,18 +368,12 @@ regMaskTP LinearScan::internalFloatRegCandidates() } } -/***************************************************************************** - * Inline functions for RegRecord - *****************************************************************************/ - -bool RegRecord::isFree() +bool LinearScan::isFree(RegRecord* regRecord) { - return ((assignedInterval == nullptr || !assignedInterval->isActive) && !isBusyUntilNextKill); + return ((regRecord->assignedInterval == nullptr || !regRecord->assignedInterval->isActive) && + !isRegBusy(regRecord->regNum, regRecord->registerType)); } -/***************************************************************************** - * Inline functions for LinearScan - *****************************************************************************/ RegRecord* LinearScan::getRegisterRecord(regNumber regNum) { assert((unsigned)regNum < ArrLen(physRegs)); @@ -424,13 +487,14 @@ regMaskTP LinearScan::stressLimitRegs(RefPosition* refPosition, regMaskTP mask) // Assumptions: // 'refPosition is non-null. -bool RegRecord::conflictingFixedRegReference(RefPosition* refPosition) +bool LinearScan::conflictingFixedRegReference(regNumber regNum, RefPosition* refPosition) { // Is this a fixed reference of this register? If so, there is no conflict. if (refPosition->isFixedRefOfRegMask(genRegMask(regNum))) { return false; } + // Otherwise, check for conflicts. // There is a conflict if: // 1. There is a recent RefPosition on this RegRecord that is at this location, @@ -439,13 +503,14 @@ bool RegRecord::conflictingFixedRegReference(RefPosition* refPosition) // if refPosition is a delayed use (i.e. must be kept live through the next/def location). LsraLocation refLocation = refPosition->nodeLocation; - if (recentRefPosition != nullptr && recentRefPosition->refType != RefTypeKill && - recentRefPosition->nodeLocation == refLocation && - (!isBusyUntilNextKill || assignedInterval != refPosition->getInterval())) + RegRecord* regRecord = getRegisterRecord(regNum); + if (isRegInUse(regNum, refPosition->getInterval()->registerType) && + (regRecord->assignedInterval != refPosition->getInterval())) { return true; } - LsraLocation nextPhysRefLocation = getNextRefLocation(); + + LsraLocation nextPhysRefLocation = nextFixedRef[regNum]; if (nextPhysRefLocation == refLocation || (refPosition->delayRegFree && nextPhysRefLocation == (refLocation + 1))) { return true; @@ -637,6 +702,7 @@ LinearScan::LinearScan(Compiler* theCompiler) // Get the value of the environment variable that controls stress for register allocation lsraStressMask = JitConfig.JitStressRegs(); + #if 0 if (lsraStressMask != 0) { @@ -705,7 +771,7 @@ LinearScan::LinearScan(Compiler* theCompiler) #ifdef TARGET_ARM64 availableIntRegs = (RBM_ALLINT & ~(RBM_PR | RBM_FP | RBM_LR) & ~compiler->codeGen->regSet.rsMaskResvd); #else - availableIntRegs = (RBM_ALLINT & ~compiler->codeGen->regSet.rsMaskResvd); + availableIntRegs = (RBM_ALLINT & ~compiler->codeGen->regSet.rsMaskResvd); #endif #if ETW_EBP_FRAMED @@ -2580,101 +2646,40 @@ bool LinearScan::registerIsAvailable(RegRecord* physRegRecord, LsraLocation* nextRefLocationPtr, RegisterType regType) { - LsraLocation nextRefLocation = MaxLocation; - if (physRegRecord->isBusyUntilNextKill) + regNumber reg = physRegRecord->regNum; + LsraLocation nextRefLocation = Min(nextFixedRef[reg], nextIntervalRef[reg]); + assert(!isRegBusy(reg, regType) && !isRegInUse(reg, regType)); + bool isAvailable = ((m_AvailableRegs & genRegMask(reg)) != RBM_NONE); + if (!isAvailable) { - return false; - } - - RefPosition* nextPhysReference = physRegRecord->getNextRefPosition(); - if (nextPhysReference != nullptr) - { - nextRefLocation = nextPhysReference->nodeLocation; - } - else if (!physRegRecord->isCalleeSave) - { - nextRefLocation = MaxLocation - 1; - } - *nextRefLocationPtr = nextRefLocation; - - Interval* assignedInterval = physRegRecord->assignedInterval; - - if (assignedInterval != nullptr) - { - RefPosition* recentReference = assignedInterval->recentRefPosition; - - // The only case where we have an assignedInterval, but recentReference is null - // is where this interval is live at procedure entry (i.e. an arg register), in which - // case it's still live and its assigned register is not available - // (Note that the ParamDef will be recorded as a recentReference when we encounter - // it, but we will be allocating registers, potentially to other incoming parameters, - // as we process the ParamDefs.) - - if (recentReference == nullptr) - { - return false; - } - - // Is this a copyReg/moveReg? It is if the register assignment doesn't match. - // (the recentReference may not be a copyReg/moveReg, because we could have seen another - // reference since the copyReg/moveReg) - - if (!assignedInterval->isAssignedTo(physRegRecord->regNum)) - { - // If the recentReference is for a different register, it can be reassigned, but - // otherwise don't reassign it if it's still in use. - // (Note that it is unlikely that we have a recent copy or move to a different register, - // where this physRegRecord is still pointing at an earlier copy or move, but it is possible, - // especially in stress modes.) - if ((recentReference->registerAssignment == genRegMask(physRegRecord->regNum)) && - copyOrMoveRegInUse(recentReference, currentLoc)) - { - return false; - } - } - else if (!assignedInterval->isActive && assignedInterval->isConstant) - { - // Treat this as unassigned, i.e. do nothing. - // TODO-CQ: Consider adjusting the heuristics (probably in the caller of this method) - // to avoid reusing these registers. - } - // If this interval isn't active, it's available if it isn't referenced - // at this location (or the previous location, if the recent RefPosition - // is a delayRegFree). - else if (!assignedInterval->isActive && - (recentReference->refType == RefTypeExpUse || recentReference->getRefEndLocation() < currentLoc)) - { - // This interval must have a next reference (otherwise it wouldn't be assigned to this register) - RefPosition* nextReference = recentReference->nextRefPosition; - if (nextReference != nullptr) - { - if (nextReference->nodeLocation < nextRefLocation) - { - *nextRefLocationPtr = nextReference->nodeLocation; - } - } - else - { - assert(recentReference->copyReg && - (recentReference->registerAssignment != genRegMask(physRegRecord->regNum))); - } - } - else + assert(physRegRecord->assignedInterval != nullptr); + if (!physRegRecord->assignedInterval->isActive) { - return false; + // This must be in use in the current location. + RefPosition* recentRef = physRegRecord->assignedInterval->recentRefPosition; + assert((recentRef->nodeLocation == currentLoc) || + ((recentRef->nodeLocation == (currentLoc - 1)) && recentRef->delayRegFree)); } } + *nextRefLocationPtr = nextRefLocation; #ifdef TARGET_ARM if (regType == TYP_DOUBLE) { // Recurse, but check the other half this time (TYP_FLOAT) - if (!registerIsAvailable(findAnotherHalfRegRec(physRegRecord), currentLoc, nextRefLocationPtr, TYP_FLOAT)) + RegRecord* secondHalfRegRec = getSecondHalfRegRec(physRegRecord); + assert(!isRegBusy(secondHalfRegRec->regNum, TYP_FLOAT) && !isRegInUse(reg, regType)); + if (!registerIsAvailable(secondHalfRegRec, currentLoc, nextRefLocationPtr, TYP_FLOAT)) return false; + // The above will overwrite the value in *nextRefLocationPtr. We want to keep the nearest location. + if (*nextRefLocationPtr > nextRefLocation) + { + *nextRefLocationPtr = nextRefLocation; + } } #endif // TARGET_ARM - return true; + return isAvailable; } //------------------------------------------------------------------------ @@ -2719,7 +2724,13 @@ RegisterType LinearScan::getRegisterType(Interval* currentInterval, RefPosition* // bool LinearScan::isMatchingConstant(RegRecord* physRegRecord, RefPosition* refPosition) { - if ((physRegRecord->assignedInterval == nullptr) || !physRegRecord->assignedInterval->isConstant) + if ((physRegRecord->assignedInterval == nullptr) || !physRegRecord->assignedInterval->isConstant || + (refPosition->refType != RefTypeDef)) + { + return false; + } + Interval* interval = refPosition->getInterval(); + if (!interval->isConstant || !isRegConstant(physRegRecord->regNum, interval->registerType)) { return false; } @@ -2771,46 +2782,48 @@ bool LinearScan::isMatchingConstant(RegRecord* physRegRecord, RefPosition* refPo } //------------------------------------------------------------------------ -// tryAllocateFreeReg: Find a free register that satisfies the requirements for refPosition, -// and takes into account the preferences for the given Interval +// allocateReg: Find a register that satisfies the requirements for refPosition, +// taking into account the preferences for the given Interval, +// and possibly spilling a lower weight Interval. // // Arguments: // currentInterval: The interval for the current allocation // refPosition: The RefPosition of the current Interval for which a register is being allocated -// // Return Value: -// The regNumber, if any, allocated to the RefPositon. Returns REG_NA if no free register is found. +// The regNumber, if any, allocated to the RefPosition. +// Returns REG_NA only if 'refPosition->RegOptional()' is true, and there are +// no free register or registers with lower-weight Intervals that can be spilled. // // Notes: -// TODO-CQ: Consider whether we need to use a different order for tree temps than for vars, as -// reg predict does - -static const regNumber lsraRegOrder[] = {REG_VAR_ORDER}; -const unsigned lsraRegOrderSize = ArrLen(lsraRegOrder); -static const regNumber lsraRegOrderFlt[] = {REG_VAR_ORDER_FLT}; -const unsigned lsraRegOrderFltSize = ArrLen(lsraRegOrderFlt); +// This method will prefer to allocate a free register, but if none are available, or if +// this is not a last use and all availalbe registers will be killed prior to the next use, +// it will look for a lower-weight Interval to spill. +// Weight and farthest distance of next reference are used to determine whether an Interval +// currently occupying a register should be spilled. It will be spilled either: +// - At it most recent RefPosition, if that is within the current block, OR +// - At the boundary between the previous block and this one +// +// To select a ref position for spilling. +// - If refPosition->RegOptional() == false +// The RefPosition chosen for spilling will be the lowest weight +// of all and if there is is more than one ref position with the +// same lowest weight, among them choses the one with farthest +// distance to its next reference. +// +// - If refPosition->RegOptional() == true +// The ref position chosen for spilling will not only be lowest weight +// of all but also has a weight lower than 'refPosition'. If there is +// no such ref position, no register will be allocated. +// -regNumber LinearScan::tryAllocateFreeReg(Interval* currentInterval, RefPosition* refPosition) +regNumber LinearScan::allocateReg(Interval* currentInterval, RefPosition* refPosition) { regNumber foundReg = REG_NA; - RegisterType regType = getRegisterType(currentInterval, refPosition); - const regNumber* regOrder; - unsigned regOrderSize; - if (useFloatReg(regType)) - { - regOrder = lsraRegOrderFlt; - regOrderSize = lsraRegOrderFltSize; - } - else - { - regOrder = lsraRegOrder; - regOrderSize = lsraRegOrderSize; - } + RegisterType regType = getRegisterType(currentInterval, refPosition); LsraLocation currentLocation = refPosition->nodeLocation; RefPosition* nextRefPos = refPosition->nextRefPosition; - LsraLocation nextLocation = (nextRefPos == nullptr) ? currentLocation : nextRefPos->nodeLocation; regMaskTP candidates = refPosition->registerAssignment; regMaskTP preferences = currentInterval->registerPreferences; @@ -2909,9 +2922,15 @@ regNumber LinearScan::tryAllocateFreeReg(Interval* currentInterval, RefPosition* regMaskTP newRelatedPreferences = thisRelatedPreferences & relatedPreferences; if (newRelatedPreferences != RBM_NONE && (!avoidByteRegs || thisRelatedPreferences != RBM_BYTE_REGS)) { + // TODO-CQ: The following isFree() check doesn't account for the possibility that there's an + // assignedInterval whose recentRefPosition was delayFree. It also fails to account for + // the TYP_DOUBLE case on ARM. It would be better to replace the call to isFree with + // isRegAvailable(genRegNumFromMask(newRelatedPreferences), regType)), but this is retained + // to achieve zero diffs. + // bool thisIsSingleReg = isSingleRegister(newRelatedPreferences); if (!thisIsSingleReg || (finalRelatedInterval->isLocalVar && - getRegisterRecord(genRegNumFromMask(newRelatedPreferences))->isFree())) + isFree(getRegisterRecord(genRegNumFromMask(newRelatedPreferences))))) { relatedPreferences = newRelatedPreferences; // If this Interval has a downstream def without a single-register preference, continue to iterate. @@ -2995,329 +3014,594 @@ regNumber LinearScan::tryAllocateFreeReg(Interval* currentInterval, RefPosition* rangeEndLocation = rangeEndRefPosition->getRefEndLocation(); LsraLocation lastLocation = lastRefPosition->getRefEndLocation(); regNumber prevReg = REG_NA; + RegRecord* prevRegRec = nullptr; + + //------------------------------------------------------------------------- + // Register Selection + + RegRecord* availablePhysRegRecord = nullptr; + + // Each register will receive a score which takes into account the scoring criteria below. + // These were selected on the assumption that they will have an impact on the "goodness" + // of a register selection, and have been tuned to a certain extent by observing the impact + // of the ordering on asmDiffs. However, there is much more room for tuning, + // and perhaps additional criteria. + // + enum RegisterScore + { + // These are the original criteria for comparing registers that are free. + VALUE_AVAILABLE = 0x8000, // It is a constant value that is already in an acceptable register. + COVERS = 0x4000, // It is in the interval's preference set and it covers the current range. + OWN_PREFERENCE = 0x2000, // It is in the preference set of this interval. + COVERS_RELATED = 0x1000, // It is in the preference set of the related interval and covers its entire lifetime. + RELATED_PREFERENCE = 0x0800, // It is in the preference set of the related interval. + CALLER_CALLEE = 0x0400, // It is in the right "set" for the interval (caller or callee-save). + UNASSIGNED = 0x0200, // It is not currently assigned to any (active or inactive) interval + COVERS_FULL = 0x0080, + BEST_FIT = 0x0040, + IS_PREV_REG = 0x0020, + REG_ORDER = 0x0010, + + FREE = 0x008, // It is not currently assigned to an *active* interval + + // These are the original criteria for comparing registers that are in use. + SPILL_COST_THIS = 0x004, // Its spill cost is lower than 'thisSpillCost' + SPILL_COST_OTHERS = 0x002, // It has a lower spill cost than the best candidate thus far + FAR_NEXT_REF = 0x001, // It has a farther next reference than the best candidate thus far. + }; + + int bestScore = 0; + unsigned int bestSpillWeight; + LsraLocation bestLocation = MinLocation; + LsraLocation farRefLocation = MinLocation; + LsraLocation nextPhysRefLocation = MaxLocation; - if (currentInterval->assignedReg) + // Handle the common case where there is only one candidate - + // avoid looping over all the other registers + bool useAssignedReg = false; + if (refPosition->isFixedRegRef && (candidates == refPosition->registerAssignment)) + { + foundReg = genRegNumFromMask(candidates); + + // Skip searching the registers, and just define the info for the single candidate. + candidates &= ~refPosition->registerAssignment; + availablePhysRegRecord = getRegisterRecord(foundReg); + bool isAvailable = isRegAvailable(foundReg, currentInterval->registerType); + if ((currentInterval->assignedReg == availablePhysRegRecord) && + ((availablePhysRegRecord->assignedInterval == currentInterval) || isAvailable)) + { + useAssignedReg = true; + } + else + { + if (isAvailable) + { + bestScore |= FREE; + if (isMatchingConstant(availablePhysRegRecord, refPosition)) + { + bestScore |= VALUE_AVAILABLE; + } + else if ((availablePhysRegRecord->assignedInterval == nullptr) || + (availablePhysRegRecord->assignedInterval->getNextRefLocation() > lastLocation)) + { + bestScore |= UNASSIGNED; + } + } + } + } + else if (currentInterval->assignedReg != nullptr) { - bool useAssignedReg = false; // This was an interval that was previously allocated to the given // physical register, and we should try to allocate it to that register // again, if possible and reasonable. // Use it preemptively (i.e. before checking other available regs) // only if it is preferred and available. - RegRecord* regRec = currentInterval->assignedReg; - prevReg = regRec->regNum; - regMaskTP prevRegBit = genRegMask(prevReg); + availablePhysRegRecord = currentInterval->assignedReg; + foundReg = availablePhysRegRecord->regNum; + regMaskTP assignedRegBit = genRegMask(foundReg); // Is it in the preferred set of regs? - if ((prevRegBit & preferences) != RBM_NONE) + if ((assignedRegBit & preferences) != RBM_NONE) { // Is it currently available? LsraLocation nextPhysRefLoc; - if (registerIsAvailable(regRec, currentLocation, &nextPhysRefLoc, currentInterval->registerType)) + if (availablePhysRegRecord->assignedInterval == currentInterval) { - // If the register is next referenced at this location, only use it if - // this has a fixed reg requirement (i.e. this is the reference that caused - // the FixedReg ref to be created) - - if (!regRec->conflictingFixedRegReference(refPosition)) - { - useAssignedReg = true; - } + assert(!isRegBusy(foundReg, currentInterval->registerType) && + !isRegInUse(foundReg, currentInterval->registerType) && + !conflictingFixedRegReference(foundReg, refPosition)); + useAssignedReg = true; + } + else if (isRegBusy(foundReg, currentInterval->registerType) || + isRegInUse(foundReg, currentInterval->registerType)) + { + candidates &= ~assignedRegBit; + } + else if (registerIsAvailable(availablePhysRegRecord, currentLocation, &nextPhysRefLoc, + currentInterval->registerType)) + { + useAssignedReg = !conflictingFixedRegReference(foundReg, refPosition); } } - if (useAssignedReg) + else if (!refPosition->copyReg && (availablePhysRegRecord->assignedInterval == currentInterval)) { - regNumber foundReg = prevReg; - assignPhysReg(regRec, currentInterval); - refPosition->registerAssignment = genRegMask(foundReg); - return foundReg; + useAssignedReg = true; } - else + if (!useAssignedReg && !refPosition->copyReg) { - // Don't keep trying to allocate to this register + prevReg = foundReg; + // Don't keep trying to allocate to this register. Note, though, that we keep in in the candidates + // (unless removed above because it's in use or busy) in case we can't find anything better. + if (!currentInterval->isActive && (availablePhysRegRecord->assignedInterval == currentInterval)) + { + unassignPhysReg(availablePhysRegRecord, nullptr); + } + availablePhysRegRecord = nullptr; currentInterval->assignedReg = nullptr; + foundReg = REG_NA; } } - - //------------------------------------------------------------------------- - // Register Selection - - RegRecord* availablePhysRegInterval = nullptr; - bool unassignInterval = false; - - // Each register will receive a score which is the sum of the scoring criteria below. - // These were selected on the assumption that they will have an impact on the "goodness" - // of a register selection, and have been tuned to a certain extent by observing the impact - // of the ordering on asmDiffs. However, there is probably much more room for tuning, - // and perhaps additional criteria. - // - // These are FLAGS (bits) so that we can easily order them and add them together. - // If the scores are equal, but one covers more of the current interval's range, - // then it wins. Otherwise, the one encountered earlier in the regOrder wins. - - enum RegisterScore + if (useAssignedReg) { - VALUE_AVAILABLE = 0x40, // It is a constant value that is already in an acceptable register. - COVERS = 0x20, // It is in the interval's preference set and it covers the entire lifetime. - OWN_PREFERENCE = 0x10, // It is in the preference set of this interval. - COVERS_RELATED = 0x08, // It is in the preference set of the related interval and covers the entire lifetime. - RELATED_PREFERENCE = 0x04, // It is in the preference set of the related interval. - CALLER_CALLEE = 0x02, // It is in the right "set" for the interval (caller or callee-save). - UNASSIGNED = 0x01, // It is not currently assigned to an inactive interval. - }; - - int bestScore = 0; + assignPhysReg(availablePhysRegRecord, currentInterval); + refPosition->registerAssignment = genRegMask(foundReg); + return foundReg; + } // Compute the best possible score so we can stop looping early if we find it. // TODO-Throughput: At some point we may want to short-circuit the computation of each score, but // probably not until we've tuned the order of these criteria. At that point, // we'll need to avoid the short-circuit if we've got a stress option to reverse // the selection. - int bestPossibleScore = COVERS + UNASSIGNED + OWN_PREFERENCE + CALLER_CALLEE; + int bestPossibleScore = + COVERS + UNASSIGNED + OWN_PREFERENCE + CALLER_CALLEE + FREE + SPILL_COST_THIS + SPILL_COST_OTHERS; + if (currentInterval->isConstant) + { + bestPossibleScore |= VALUE_AVAILABLE; + } if (relatedPreferences != RBM_NONE) { bestPossibleScore |= RELATED_PREFERENCE + COVERS_RELATED; } - LsraLocation bestLocation = MinLocation; - // In non-debug builds, this will simply get optimized away bool reverseSelect = false; #ifdef DEBUG reverseSelect = doReverseSelect(); #endif // DEBUG - // An optimization for the common case where there is only one candidate - - // avoid looping over all the other registers - - regNumber singleReg = REG_NA; - - if (genMaxOneBit(candidates)) - { - regOrderSize = 1; - singleReg = genRegNumFromMask(candidates); - regOrder = &singleReg; - } - - for (unsigned i = 0; i < regOrderSize && (candidates != RBM_NONE); i++) - { - regNumber regNum = regOrder[i]; - regMaskTP candidateBit = genRegMask(regNum); - - if (!(candidates & candidateBit)) - { - continue; - } - + // The spill weight for 'refPosition' (the one we're allocating now). + unsigned int thisSpillWeight = refPosition->IsActualRef() ? getWeight(refPosition) : 0; + // 'bestSpillWeight' is the spill weight for the best candidate we've found so far. + // If allocating a reg is optional, we will consider those ref positions + // whose weight is less than 'refPosition' for spilling. + // If allocating a reg is a must, we start off with max weight so that the first spill + // candidate will be selected based on farthest distance alone. + // Since we start off with 'farRefLocation' initialized to 'MinLocation', the first available + // RefPosition will be selected as spill candidate and its weight as the bestSpillWeight. + bestSpillWeight = refPosition->RegOptional() ? thisSpillWeight : BB_MAX_WEIGHT; + + // Eliminate candidates that are in-use or busy. + // Note that a fixed reference will appear to be in use due to the RefTypeFixedReg before it, + // but we ignore those. + candidates &= ~regsBusyUntilKill; + candidates &= ~regsInUseThisLocation; + if (refPosition->isFixedRegRef && candidates != RBM_NONE) + { + assert(genMaxOneBit(refPosition->registerAssignment)); + candidates |= refPosition->registerAssignment; + } + while (candidates != RBM_NONE) + { + regMaskTP candidateBit = genFindLowestBit(candidates); candidates &= ~candidateBit; + regNumber regNum = genRegNumFromMask(candidateBit); + + // The spill weight for the register we're currently evaluating. + unsigned int currentSpillWeight = BB_ZERO_WEIGHT; RegRecord* physRegRecord = getRegisterRecord(regNum); - int score = 0; - LsraLocation nextPhysRefLocation = MaxLocation; + int score = 0; + bool isBetter = false; // By chance, is this register already holding this interval, as a copyReg or having // been restored as inactive after a kill? if (physRegRecord->assignedInterval == currentInterval) { - availablePhysRegInterval = physRegRecord; - unassignInterval = false; + availablePhysRegRecord = physRegRecord; + foundReg = regNum; break; } - - // Find the next RefPosition of the physical register - if (!registerIsAvailable(physRegRecord, currentLocation, &nextPhysRefLocation, regType)) + bool isFixedRef = refPosition->isFixedRefOfRegMask(candidateBit); + if (isFixedRef) { - continue; + nextPhysRefLocation = Min(nextFixedRef[regNum], nextIntervalRef[regNum]); } - - // If the register is next referenced at this location, only use it if - // this has a fixed reg requirement (i.e. this is the reference that caused - // the FixedReg ref to be created) - - if (physRegRecord->conflictingFixedRegReference(refPosition)) + else if (((nextFixedRef[regNum] == refPosition->nodeLocation) || + (refPosition->delayRegFree && nextFixedRef[regNum] == (refPosition->nodeLocation + 1)))) { continue; } - // If this is a definition of a constant interval, check to see if its value is already in this register. - if (currentInterval->isConstant && RefTypeIsDef(refPosition->refType) && - isMatchingConstant(physRegRecord, refPosition)) - { - score |= VALUE_AVAILABLE; - } - - // If the nextPhysRefLocation is a fixedRef for the rangeEndRefPosition, increment it so that - // we don't think it isn't covering the live range. - // This doesn't handle the case where earlier RefPositions for this Interval are also - // FixedRefs of this regNum, but at least those are only interesting in the case where those - // are "local last uses" of the Interval - otherwise the liveRange would interfere with the reg. - if (nextPhysRefLocation == rangeEndLocation && rangeEndRefPosition->isFixedRefOfReg(regNum)) - { - INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_INCREMENT_RANGE_END, currentInterval)); - nextPhysRefLocation++; - } + int comparisonScore = bestScore; - if ((candidateBit & preferences) != RBM_NONE) + // Find the next RefPosition of the physical register + if (isFixedRef || registerIsAvailable(physRegRecord, currentLocation, &nextPhysRefLocation, regType)) { - score |= OWN_PREFERENCE; - if (nextPhysRefLocation > rangeEndLocation) + // If this is a definition of a constant interval, check to see if its value is already in this register. + if (currentInterval->isConstant && RefTypeIsDef(refPosition->refType) && + isMatchingConstant(physRegRecord, refPosition)) { - score |= COVERS; + score |= VALUE_AVAILABLE; } - } - if ((candidateBit & relatedPreferences) != RBM_NONE) - { - score |= RELATED_PREFERENCE; - if (nextPhysRefLocation > relatedLastLocation) + else if (bestScore & VALUE_AVAILABLE) { - score |= COVERS_RELATED; + continue; } - } - // If we had a fixed-reg def of a reg that will be killed before the use, prefer it to any other registers - // with the same score. (Note that we haven't changed the original registerAssignment on the RefPosition). - // Overload the RELATED_PREFERENCE value. - else if (candidateBit == refPosition->registerAssignment) - { - score |= RELATED_PREFERENCE; - } - - if ((candidateBit & callerCalleePrefs) != RBM_NONE) - { - score |= CALLER_CALLEE; - } - - // The register is considered unassigned if it has no assignedInterval, OR - // if its next reference is beyond the range of this interval. - if (!isAssigned(physRegRecord, lastLocation ARM_ARG(currentInterval->registerType))) - { - score |= UNASSIGNED; - } - - bool foundBetterCandidate = false; - - if (score > bestScore) - { - foundBetterCandidate = true; - } - else if (score == bestScore) - { - // Prefer a register that covers the range. - if (bestLocation <= lastLocation) + // If the nextPhysRefLocation is a fixedRef for the rangeEndRefPosition, increment it so that + // we don't think it isn't covering the live range. + // This doesn't handle the case where earlier RefPositions for this Interval are also + // FixedRefs of this regNum, but at least those are only interesting in the case where those + // are "local last uses" of the Interval - otherwise the liveRange would interfere with the reg. + if (nextPhysRefLocation == rangeEndLocation && rangeEndRefPosition->isFixedRefOfReg(regNum)) { - if (nextPhysRefLocation > bestLocation) - { - foundBetterCandidate = true; - } + INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_INCREMENT_RANGE_END, currentInterval)); + nextPhysRefLocation++; } - // If both cover the range, prefer a register that is killed sooner (leaving the longer range register - // available). If both cover the range and also getting killed at the same location, prefer the one which - // is same as previous assignment. - else if (nextPhysRefLocation > lastLocation) + + if ((candidateBit & preferences) != RBM_NONE) { - if (nextPhysRefLocation < bestLocation) + score |= OWN_PREFERENCE; + if (nextPhysRefLocation > rangeEndLocation) { - foundBetterCandidate = true; + score |= COVERS; } - else if (nextPhysRefLocation == bestLocation && prevReg == regNum) + else if ((bestScore & ~(COVERS - 1)) > score) { - foundBetterCandidate = true; + continue; } } - } - -#ifdef DEBUG - if (doReverseSelect() && bestScore != 0) - { - foundBetterCandidate = !foundBetterCandidate; - } -#endif // DEBUG + else if ((bestScore & ~(OWN_PREFERENCE - 1)) > score) + { + continue; + } - if (foundBetterCandidate) - { - bestLocation = nextPhysRefLocation; - availablePhysRegInterval = physRegRecord; - unassignInterval = true; - bestScore = score; - } + if ((candidateBit & relatedPreferences) != RBM_NONE) + { + score |= RELATED_PREFERENCE; + if (nextPhysRefLocation > relatedInterval->lastRefPosition->nodeLocation) + { + score |= COVERS_RELATED; + } + } + else if (candidateBit == refPosition->registerAssignment) + { + // If we had a fixed-reg def of a reg that will be killed before the use, prefer it to any other + // registers with the same score. (Note that we haven't changed the original registerAssignment + // on the RefPosition). + // Overload the RELATED_PREFERENCE value. + score |= RELATED_PREFERENCE; + } - // there is no way we can get a better score so break out - if (!reverseSelect && score == bestPossibleScore && bestLocation == rangeEndLocation + 1) - { - break; - } - } + if ((candidateBit & callerCalleePrefs) != RBM_NONE) + { + score |= CALLER_CALLEE; + } - if (availablePhysRegInterval != nullptr) - { - if (unassignInterval && isAssigned(availablePhysRegInterval ARM_ARG(currentInterval->registerType))) - { - Interval* const intervalToUnassign = availablePhysRegInterval->assignedInterval; - unassignPhysReg(availablePhysRegInterval ARM_ARG(currentInterval->registerType)); + // The register is considered unassigned if it has no assignedInterval, OR + // if its next reference is beyond the range of this interval. - if ((bestScore & VALUE_AVAILABLE) != 0 && intervalToUnassign != nullptr) + // Unfortunately, we can't just look at the nextIntervalLoc, because there are + // cases where it's not strictly assigned, but neither is it unassigned (cases + // where it's inactive but we don't update nextIntervalRef). + // TODO-Cleanup: See whether we can unify this. + LsraLocation nextIntervalLoc = getNextIntervalRefLocation(regNum ARM_ARG(currentInterval->registerType)); + if (nextIntervalLoc == MaxLocation) { - assert(intervalToUnassign->isConstant); - refPosition->treeNode->SetReuseRegVal(); + if ((physRegRecord->assignedInterval == nullptr) || + (physRegRecord->assignedInterval->getNextRefLocation() > lastLocation)) + { + score |= UNASSIGNED; + } } - // If we considered this "unassigned" because this interval's lifetime ends before - // the next ref, remember it. - else if ((bestScore & UNASSIGNED) != 0 && intervalToUnassign != nullptr) + else if (nextIntervalLoc > lastLocation) { - updatePreviousInterval(availablePhysRegInterval, intervalToUnassign, intervalToUnassign->registerType); + score |= UNASSIGNED; } - } - else - { - assert((bestScore & VALUE_AVAILABLE) == 0); - } - assignPhysReg(availablePhysRegInterval, currentInterval); - foundReg = availablePhysRegInterval->regNum; - regMaskTP foundRegMask = genRegMask(foundReg); - refPosition->registerAssignment = foundRegMask; - } - return foundReg; -} + // Does this cover the full range of the interval? + if (nextPhysRefLocation > lastLocation) + { + score |= COVERS_FULL; + } -//------------------------------------------------------------------------ -// canSpillReg: Determine whether we can spill physRegRecord -// -// Arguments: + int bestPartialScore = (bestScore & ~(COVERS_FULL - 1)); + if (bestPartialScore > score) + { + continue; + } + else if ((score > bestPartialScore) || (bestScore < FREE)) + { + // At this point if we have a higher score, it is by definition the BEST_FIT. + score |= BEST_FIT; + } + else if (nextPhysRefLocation == bestLocation) + { + assert((comparisonScore & BEST_FIT) != 0); + score |= BEST_FIT; + } + else if ((score & COVERS_FULL) != 0) + { + // All things equal, if both cover the full range, pick the one that's killed soonest. + if (nextPhysRefLocation < bestLocation) + { + score |= BEST_FIT; + comparisonScore &= ~BEST_FIT; + } + } + else + { + // Neither cover the full range, so the BEST_FIT is the one that's killed later. + if (nextPhysRefLocation > bestLocation) + { + score |= BEST_FIT; + comparisonScore &= ~BEST_FIT; + } + else + { + assert((comparisonScore & BEST_FIT) != 0); + } + } + + // Oddly, the previous heuristics only considered this if both covered the range. + if ((prevReg == regNum) && ((score & COVERS_FULL) != 0)) + { + score |= IS_PREV_REG; + } + + if ((availablePhysRegRecord == nullptr) || (physRegRecord->regOrder < availablePhysRegRecord->regOrder)) + { + score |= REG_ORDER; + comparisonScore &= ~REG_ORDER; + } + + score |= FREE; + } + else + { + assert(isAssigned(physRegRecord ARM_ARG(regType))); + + // We would have to spill this register. + if (comparisonScore >= FREE) + { + continue; + } + + // We can't spill a register that's next used at this location, unless it's regOptional. + if ((nextPhysRefLocation == refPosition->nodeLocation) || + (refPosition->delayRegFree && nextPhysRefLocation == (refPosition->nodeLocation + 1))) + { + if (!physRegRecord->assignedInterval->getNextRefPosition()->RegOptional()) + { + continue; + } + } + + // Can and should the interval in this register be spilled for this one, + // if we don't find a better alternative? + if (!isSpillCandidate(currentInterval, refPosition, physRegRecord)) + { + continue; + } + + currentSpillWeight = spillCost[regNum]; +#ifdef TARGET_ARM + if (currentInterval->registerType == TYP_DOUBLE) + { + currentSpillWeight = max(currentSpillWeight, spillCost[REG_NEXT(regNum)]); + } +#endif + if (currentSpillWeight >= thisSpillWeight) + { + if (refPosition->RegOptional() || ((comparisonScore & SPILL_COST_THIS) != 0)) + { + continue; + } + } + else + { + score |= SPILL_COST_THIS; + } + + if (currentSpillWeight < bestSpillWeight) + { + score |= SPILL_COST_OTHERS; + comparisonScore &= ~SPILL_COST_OTHERS; + } + else if (currentSpillWeight == bestSpillWeight) + { + // All else is equal so finally consider: + // - farthest reference location. + // - To duplicate previous behavior: + // - IF this is a higher register number than the current best: + // - pick it if the recentAssignedRef is a reload of a reg-optional + // - otherwise, select the lower register number, to match old behavior of 'allocateBusyReg', + // which traversed in regNumber order. + if (nextPhysRefLocation < farRefLocation) + { + continue; + } + bool isBetter = (availablePhysRegRecord == nullptr) || (nextPhysRefLocation > farRefLocation); + if (!isBetter) + { + RefPosition* recentAssignedRef = physRegRecord->assignedInterval->recentRefPosition; + if (regNum > foundReg) + { + isBetter = (recentAssignedRef != nullptr) && recentAssignedRef->reload && + recentAssignedRef->RegOptional(); + } + else + { + RefPosition* otherRecentAssignedRef = + availablePhysRegRecord->assignedInterval->recentRefPosition; + isBetter = !((otherRecentAssignedRef != nullptr) && otherRecentAssignedRef->reload && + otherRecentAssignedRef->RegOptional()); + } + } + if (isBetter) + { + score |= (FAR_NEXT_REF | SPILL_COST_OTHERS); + comparisonScore &= ~FAR_NEXT_REF; + } + } + } + + bool foundBetterCandidate = (score > comparisonScore); + +#ifdef DEBUG + if (doReverseSelect() && bestScore != 0) + { + foundBetterCandidate = !foundBetterCandidate; + } +#endif // DEBUG + + if (foundBetterCandidate) + { + bestSpillWeight = currentSpillWeight; + bestLocation = nextPhysRefLocation; + farRefLocation = nextPhysRefLocation; + availablePhysRegRecord = physRegRecord; + foundReg = regNum; + bestScore = score; + // These are relative scorings, so we set them on whatever's best so far. + if (score & FREE) + { + bestScore |= (REG_ORDER | BEST_FIT); + } + else + { + bestScore |= (FAR_NEXT_REF | SPILL_COST_OTHERS); + } + } + + // If there is no way we can get a better score, break out + if (!reverseSelect && (score == bestPossibleScore) && (bestLocation == lastLocation + 1) && + (bestSpillWeight == 0)) + { + break; + } + } + + if (availablePhysRegRecord != nullptr) + { + Interval* assignedInterval = availablePhysRegRecord->assignedInterval; + if ((assignedInterval != currentInterval) && isAssigned(availablePhysRegRecord ARM_ARG(regType))) + { + if ((bestScore & FREE) == 0) + { +// We're spilling. +#ifdef TARGET_ARM + if (currentInterval->registerType == TYP_DOUBLE) + { + assert(genIsValidDoubleReg(availablePhysRegRecord->regNum)); + unassignDoublePhysReg(availablePhysRegRecord); + } + else if (assignedInterval->registerType == TYP_DOUBLE) + { + // Make sure we spill both halves of the double register. + assert(genIsValidDoubleReg(assignedInterval->assignedReg->regNum)); + unassignPhysReg(assignedInterval->assignedReg, assignedInterval->recentRefPosition); + } + else +#endif + { + unassignPhysReg(availablePhysRegRecord, assignedInterval->recentRefPosition); + } + } + else + { + unassignPhysReg(availablePhysRegRecord ARM_ARG(currentInterval->registerType)); + + if ((bestScore & VALUE_AVAILABLE) != 0 && assignedInterval != nullptr) + { + assert(assignedInterval->isConstant); + refPosition->treeNode->SetReuseRegVal(); + } + // If we considered this "unassigned" because this interval's lifetime ends before + // the next ref, remember it. + else if ((bestScore & UNASSIGNED) != 0 && assignedInterval != nullptr) + { + updatePreviousInterval(availablePhysRegRecord, assignedInterval, assignedInterval->registerType); + } + else + { + assert((bestScore & VALUE_AVAILABLE) == 0); + } + } + } + assignPhysReg(availablePhysRegRecord, currentInterval); + regMaskTP foundRegMask = genRegMask(foundReg); + refPosition->registerAssignment = foundRegMask; + } + + return foundReg; +} + +//------------------------------------------------------------------------ +// canSpillReg: Determine whether we can spill physRegRecord +// +// Arguments: // physRegRecord - reg to spill // refLocation - Location of RefPosition where this register will be spilled -// recentAssignedRefWeight - Weight of recent assigned RefPosition which will be determined in this function -// farthestRefPosWeight - Current farthestRefPosWeight at allocateBusyReg() // // Return Value: // True - if we can spill physRegRecord // False - otherwise // -// Note: This helper is designed to be used only from allocateBusyReg() and canSpillDoubleReg() -// -bool LinearScan::canSpillReg(RegRecord* physRegRecord, - LsraLocation refLocation, - BasicBlock::weight_t* recentAssignedRefWeight) +bool LinearScan::canSpillReg(RegRecord* physRegRecord, LsraLocation refLocation) { assert(physRegRecord->assignedInterval != nullptr); RefPosition* recentAssignedRef = physRegRecord->assignedInterval->recentRefPosition; if (recentAssignedRef != nullptr) { - if (isRefPositionActive(recentAssignedRef, refLocation)) - { - // We can't spill a register that's active at the current location - return false; - } - - // We don't prefer to spill a register if the weight of recentAssignedRef > weight - // of the spill candidate found so far. We would consider spilling a greater weight - // ref position only if the refPosition being allocated must need a reg. - *recentAssignedRefWeight = getWeight(recentAssignedRef); + // We can't spill a register that's active at the current location. + // We should already have determined this with isRegBusy before calling this method. + assert(!isRefPositionActive(recentAssignedRef, refLocation)); + return true; } - return true; + // recentAssignedRef can only be null if this is a parameter that has not yet been + // moved to a register (or stack), in which case we can't spill it yet. + assert(physRegRecord->assignedInterval->getLocalVar(compiler)->lvIsParam); + return false; +} + +//------------------------------------------------------------------------ +// getSpillWeight: Get the weight associated with spilling the given register +// +// Arguments: +// physRegRecord - reg to spill +// +// Return Value: +// The weight associated with the location at which we will spill. +// +// Note: This helper is designed to be used only from allocateReg() and getDoubleSpillWeight() +// +unsigned LinearScan::getSpillWeight(RegRecord* physRegRecord) +{ + assert(physRegRecord->assignedInterval != nullptr); + RefPosition* recentAssignedRef = physRegRecord->assignedInterval->recentRefPosition; + unsigned weight = BB_ZERO_WEIGHT; + + // We shouldn't call this method if there is no recentAssignedRef. + assert(recentAssignedRef != nullptr); + // We shouldn't call this method if the register is active at this location. + assert(!isRefPositionActive(recentAssignedRef, currentLoc)); + weight = getWeight(recentAssignedRef); + return weight; } #ifdef TARGET_ARM @@ -3327,48 +3611,31 @@ bool LinearScan::canSpillReg(RegRecord* physRegRecord, // Arguments: // physRegRecord - reg to spill (must be a valid double register) // refLocation - Location of RefPosition where this register will be spilled -// recentAssignedRefWeight - Weight of recent assigned RefPosition which will be determined in this function // // Return Value: // True - if we can spill physRegRecord // False - otherwise // -// Notes: -// This helper is designed to be used only from allocateBusyReg() and canSpillDoubleReg(). -// The recentAssignedRefWeight is not updated if either register cannot be spilled. -// -bool LinearScan::canSpillDoubleReg(RegRecord* physRegRecord, - LsraLocation refLocation, - BasicBlock::weight_t* recentAssignedRefWeight) +bool LinearScan::canSpillDoubleReg(RegRecord* physRegRecord, LsraLocation refLocation) { assert(genIsValidDoubleReg(physRegRecord->regNum)); bool retVal = true; BasicBlock::weight_t weight = BB_ZERO_WEIGHT; BasicBlock::weight_t weight2 = BB_ZERO_WEIGHT; - RegRecord* physRegRecord2 = findAnotherHalfRegRec(physRegRecord); + RegRecord* physRegRecord2 = getSecondHalfRegRec(physRegRecord); - if ((physRegRecord->assignedInterval != nullptr) && !canSpillReg(physRegRecord, refLocation, &weight)) + if ((physRegRecord->assignedInterval != nullptr) && !canSpillReg(physRegRecord, refLocation)) { return false; } - if (physRegRecord2->assignedInterval != nullptr) + if ((physRegRecord2->assignedInterval != nullptr) && !canSpillReg(physRegRecord2, refLocation)) { - if (!canSpillReg(physRegRecord2, refLocation, &weight2)) - { - return false; - } - if (weight2 > weight) - { - weight = weight2; - } + return false; } - *recentAssignedRefWeight = weight; return true; } -#endif -#ifdef TARGET_ARM //------------------------------------------------------------------------ // unassignDoublePhysReg: unassign a double register (pair) // @@ -3383,7 +3650,7 @@ void LinearScan::unassignDoublePhysReg(RegRecord* doubleRegRecord) assert(genIsValidDoubleReg(doubleRegRecord->regNum)); RegRecord* doubleRegRecordLo = doubleRegRecord; - RegRecord* doubleRegRecordHi = findAnotherHalfRegRec(doubleRegRecordLo); + RegRecord* doubleRegRecordHi = getSecondHalfRegRec(doubleRegRecordLo); // For a double register, we has following four cases. // Case 1: doubleRegRecLo is assigned to TYP_DOUBLE interval // Case 2: doubleRegRecLo and doubleRegRecHi are assigned to different TYP_FLOAT intervals @@ -3454,67 +3721,57 @@ bool LinearScan::isRefPositionActive(RefPosition* refPosition, LsraLocation refL // False - otherwise // // Notes: -// This helper is designed to be used only from allocateBusyReg(), where: -// - This register was *not* found when looking for a free register, and -// - The caller must have already checked for the case where 'refPosition' is a fixed ref -// (asserted at the beginning of this method). +// This helper is designed to be used only from allocateReg(). +// The caller must have already checked for the case where 'refPosition' is a fixed ref. +// (asserted at the beginning of this method). // bool LinearScan::isRegInUse(RegRecord* regRec, RefPosition* refPosition) { // We shouldn't reach this check if 'refPosition' is a FixedReg of this register. assert(!refPosition->isFixedRefOfReg(regRec->regNum)); + // We shouldn't call this method if there's no currently assigned Interval. + RegisterType regType = refPosition->getInterval()->registerType; + if (!isRegInUse(regRec->regNum, regType)) + { + return false; + } + + // We should never spill a register that's occupied by an Interval with its next use at the current + // location. + // TODO: We should have checked this already? Interval* assignedInterval = regRec->assignedInterval; - if (assignedInterval != nullptr) +#ifdef TARGET_ARM + if (regType == TYP_DOUBLE) { - if (!assignedInterval->isActive) + RegRecord* otherRegRecord = getSecondHalfRegRec(regRec); + if (assignedInterval != nullptr) { - // This can only happen if we have a recentRefPosition active at this location that hasn't yet been freed. - CLANG_FORMAT_COMMENT_ANCHOR; - - if (isRefPositionActive(assignedInterval->recentRefPosition, refPosition->nodeLocation)) - { - return true; - } - else + if (assignedInterval->isActive && (nextIntervalRef[regRec->regNum] <= refPosition->getRangeEndLocation())) { -#ifdef TARGET_ARM - // In the case of TYP_DOUBLE, we may have the case where 'assignedInterval' is inactive, - // but the other half register is active. If so, it must be have an active recentRefPosition, - // as above. - if (refPosition->getInterval()->registerType == TYP_DOUBLE) - { - RegRecord* otherHalfRegRec = findAnotherHalfRegRec(regRec); - if (!otherHalfRegRec->assignedInterval->isActive) - { - if (isRefPositionActive(otherHalfRegRec->assignedInterval->recentRefPosition, - refPosition->nodeLocation)) - { - return true; - } - else - { - assert(!"Unexpected inactive assigned interval in isRegInUse"); - return true; - } - } - } - else -#endif + RefPosition* nextAssignedRef = assignedInterval->getNextRefPosition(); + if (!nextAssignedRef->RegOptional()) { - assert(!"Unexpected inactive assigned interval in isRegInUse"); return true; } } + if (otherRegRecord == nullptr) + { + return false; + } + } + else + { + assert(otherRegRecord->assignedInterval != nullptr); } + assignedInterval = otherRegRecord->assignedInterval; + regRec = otherRegRecord; + } +#endif + assert(assignedInterval != nullptr); + if (assignedInterval->isActive && (nextIntervalRef[regRec->regNum] <= refPosition->getRangeEndLocation())) + { RefPosition* nextAssignedRef = assignedInterval->getNextRefPosition(); - - // We should never spill a register that's occupied by an Interval with its next use at the current - // location. - // Normally this won't occur (unless we actually had more uses in a single node than there are registers), - // because we'll always find something with a later nextLocation, but it can happen in stress when - // we have LSRA_SELECT_NEAREST. - if ((nextAssignedRef != nullptr) && isRefPositionActive(nextAssignedRef, refPosition->nodeLocation) && - !nextAssignedRef->RegOptional()) + if (!nextAssignedRef->RegOptional()) { return true; } @@ -3529,28 +3786,41 @@ bool LinearScan::isRegInUse(RegRecord* regRec, RefPosition* refPosition) // current The interval for the current allocation // refPosition The RefPosition of the current Interval for which a register is being allocated // physRegRecord The RegRecord for the register we're considering for spill -// nextLocation An out (reference) parameter in which the next use location of the -// given RegRecord will be returned. // // Return Value: // True iff the given register can be spilled to accommodate the given RefPosition. // -bool LinearScan::isSpillCandidate(Interval* current, - RefPosition* refPosition, - RegRecord* physRegRecord, - LsraLocation& nextLocation) +bool LinearScan::isSpillCandidate(Interval* current, RefPosition* refPosition, RegRecord* physRegRecord) { regMaskTP candidateBit = genRegMask(physRegRecord->regNum); LsraLocation refLocation = refPosition->nodeLocation; - if (physRegRecord->isBusyUntilNextKill) + // We shouldn't be calling this if we haven't already determined that the register is not + // busy until the next kill. + assert(!isRegBusy(physRegRecord->regNum, current->registerType)); + // We should already have determined that the register isn't actively in use. + assert(!isRegInUse(physRegRecord->regNum, current->registerType)); + // We shouldn't be calling this if 'refPosition' is a fixed reference to this register. + assert(!refPosition->isFixedRefOfRegMask(candidateBit)); + // We shouldn't be calling this if there is a fixed reference at the same location + // (and it's not due to this reference), as checked above. + assert(!conflictingFixedRegReference(physRegRecord->regNum, refPosition)); + + bool canSpill; +#ifdef TARGET_ARM + if (current->registerType == TYP_DOUBLE) { - return false; + canSpill = canSpillDoubleReg(physRegRecord, refLocation); } - Interval* assignedInterval = physRegRecord->assignedInterval; - if (assignedInterval != nullptr) + else +#endif // TARGET_ARM + { + canSpill = canSpillReg(physRegRecord, refLocation); + } + if (!canSpill) { - nextLocation = assignedInterval->getNextRefLocation(); + return false; } + Interval* assignedInterval = physRegRecord->assignedInterval; #ifdef TARGET_ARM RegRecord* physRegRecord2 = nullptr; Interval* assignedInterval2 = nullptr; @@ -3559,376 +3829,17 @@ bool LinearScan::isSpillCandidate(Interval* current, if (current->registerType == TYP_DOUBLE) { assert(genIsValidDoubleReg(physRegRecord->regNum)); - physRegRecord2 = findAnotherHalfRegRec(physRegRecord); - if (physRegRecord2->isBusyUntilNextKill) - { - return false; - } + physRegRecord2 = getSecondHalfRegRec(physRegRecord); assignedInterval2 = physRegRecord2->assignedInterval; - if ((assignedInterval2 != nullptr) && (assignedInterval2->getNextRefLocation() > nextLocation)) - { - nextLocation = assignedInterval2->getNextRefLocation(); - } } #endif - // If there is a fixed reference at the same location (and it's not due to this reference), - // don't use it. - if (physRegRecord->conflictingFixedRegReference(refPosition)) - { - return false; - } - - if (refPosition->isFixedRefOfRegMask(candidateBit)) - { - // Either: - // - there is a fixed reference due to this node, OR - // - or there is a fixed use fed by a def at this node, OR - // - or we have restricted the set of registers for stress. - // In any case, we must use this register as it's the only candidate - // TODO-CQ: At the time we allocate a register to a fixed-reg def, if it's not going - // to remain live until the use, we should set the candidates to allRegs(regType) - // to avoid a spill - codegen can then insert the copy. - // If this is marked as allocateIfProfitable, the caller will compare the weights - // of this RefPosition and the RefPosition to which it is currently assigned. - assert(refPosition->isFixedRegRef || - (refPosition->nextRefPosition != nullptr && refPosition->nextRefPosition->isFixedRegRef) || - candidatesAreStressLimited()); - return true; - } + // TODO: Delete; This is a duplicate of above. + assert(!isRegInUse(physRegRecord, refPosition)); - // If this register is not assigned to an interval, either - // - it has a FixedReg reference at the current location that is not this reference, OR - // - this is the special case of a fixed loReg, where this interval has a use at the same location - // In either case, we cannot use it - CLANG_FORMAT_COMMENT_ANCHOR; - -#ifdef TARGET_ARM - if (assignedInterval == nullptr && assignedInterval2 == nullptr) -#else - if (assignedInterval == nullptr) -#endif - { - RefPosition* nextPhysRegPosition = physRegRecord->getNextRefPosition(); - assert((nextPhysRegPosition != nullptr) && (nextPhysRegPosition->nodeLocation == refLocation) && - (candidateBit != refPosition->registerAssignment)); - return false; - } - - if (isRegInUse(physRegRecord, refPosition)) - { - return false; - } - -#ifdef TARGET_ARM - if (current->registerType == TYP_DOUBLE) - { - if (isRegInUse(physRegRecord2, refPosition)) - { - return false; - } - } -#endif return true; } -//------------------------------------------------------------------------ -// allocateBusyReg: Find a busy register that satisfies the requirements for refPosition, -// and that can be spilled. -// -// Arguments: -// current The interval for the current allocation -// refPosition The RefPosition of the current Interval for which a register is being allocated -// allocateIfProfitable If true, a reg may not be allocated if all other ref positions currently -// occupying registers are more important than the 'refPosition'. -// -// Return Value: -// The regNumber allocated to the RefPositon. Returns REG_NA if no free register is found. -// -// Note: Currently this routine uses weight and farthest distance of next reference -// to select a ref position for spilling. -// a) if allocateIfProfitable = false -// The ref position chosen for spilling will be the lowest weight -// of all and if there is is more than one ref position with the -// same lowest weight, among them choses the one with farthest -// distance to its next reference. -// -// b) if allocateIfProfitable = true -// The ref position chosen for spilling will not only be lowest weight -// of all but also has a weight lower than 'refPosition'. If there is -// no such ref position, reg will not be allocated. -// -regNumber LinearScan::allocateBusyReg(Interval* current, RefPosition* refPosition, bool allocateIfProfitable) -{ - regNumber foundReg = REG_NA; - - RegisterType regType = getRegisterType(current, refPosition); - regMaskTP candidates = refPosition->registerAssignment; - regMaskTP preferences = (current->registerPreferences & candidates); - if (preferences == RBM_NONE) - { - preferences = candidates; - } - if (candidates == RBM_NONE) - { - // This assumes only integer and floating point register types - // if we target a processor with additional register types, - // this would have to change - candidates = allRegs(regType); - } - -#ifdef DEBUG - candidates = stressLimitRegs(refPosition, candidates); -#endif // DEBUG - - // TODO-CQ: Determine whether/how to take preferences into account in addition to - // prefering the one with the furthest ref position when considering - // a candidate to spill - RegRecord* farthestRefPhysRegRecord = nullptr; -#ifdef TARGET_ARM - RegRecord* farthestRefPhysRegRecord2 = nullptr; -#endif - LsraLocation farthestLocation = MinLocation; - LsraLocation refLocation = refPosition->nodeLocation; - BasicBlock::weight_t farthestRefPosWeight; - if (allocateIfProfitable) - { - // If allocating a reg is optional, we will consider those ref positions - // whose weight is less than 'refPosition' for spilling. - farthestRefPosWeight = getWeight(refPosition); - } - else - { - // If allocating a reg is a must, we start off with max weight so - // that the first spill candidate will be selected based on - // farthest distance alone. Since we start off with farthestLocation - // initialized to MinLocation, the first available ref position - // will be selected as spill candidate and its weight as the - // fathestRefPosWeight. - farthestRefPosWeight = FloatingPointUtils::infinite_float(); - } - - for (regNumber regNum : Registers(regType)) - { - regMaskTP candidateBit = genRegMask(regNum); - if (!(candidates & candidateBit)) - { - continue; - } - RegRecord* physRegRecord = getRegisterRecord(regNum); - RegRecord* physRegRecord2 = nullptr; // only used for TARGET_ARM - LsraLocation nextLocation = MinLocation; - LsraLocation physRegNextLocation; - if (!isSpillCandidate(current, refPosition, physRegRecord, nextLocation)) - { - assert(candidates != candidateBit); - continue; - } - - // We've passed the preliminary checks for a spill candidate. - // Now, if we have a recentAssignedRef, check that it is going to be OK to spill it. - Interval* assignedInterval = physRegRecord->assignedInterval; - BasicBlock::weight_t recentAssignedRefWeight = BB_ZERO_WEIGHT; - RefPosition* recentAssignedRef = nullptr; - RefPosition* recentAssignedRef2 = nullptr; -#ifdef TARGET_ARM - if (current->registerType == TYP_DOUBLE) - { - recentAssignedRef = (assignedInterval == nullptr) ? nullptr : assignedInterval->recentRefPosition; - physRegRecord2 = findAnotherHalfRegRec(physRegRecord); - Interval* assignedInterval2 = physRegRecord2->assignedInterval; - recentAssignedRef2 = (assignedInterval2 == nullptr) ? nullptr : assignedInterval2->recentRefPosition; - if (!canSpillDoubleReg(physRegRecord, refLocation, &recentAssignedRefWeight)) - { - continue; - } - } - else -#endif - { - recentAssignedRef = assignedInterval->recentRefPosition; - if (!canSpillReg(physRegRecord, refLocation, &recentAssignedRefWeight)) - { - continue; - } - } - if (recentAssignedRefWeight > farthestRefPosWeight) - { - continue; - } - - physRegNextLocation = physRegRecord->getNextRefLocation(); - if (nextLocation > physRegNextLocation) - { - nextLocation = physRegNextLocation; - } - - bool isBetterLocation; - -#ifdef DEBUG - if (doSelectNearest() && farthestRefPhysRegRecord != nullptr) - { - isBetterLocation = (nextLocation <= farthestLocation); - } - else -#endif - // This if-stmt is associated with the above else - if (recentAssignedRefWeight < farthestRefPosWeight) - { - isBetterLocation = true; - } - else - { - // This would mean the weight of spill ref position we found so far is equal - // to the weight of the ref position that is being evaluated. In this case - // we prefer to spill ref position whose distance to its next reference is - // the farthest. - assert(recentAssignedRefWeight == farthestRefPosWeight); - - // If allocateIfProfitable=true, the first spill candidate selected - // will be based on weight alone. After we have found a spill - // candidate whose weight is less than the 'refPosition', we will - // consider farthest distance when there is a tie in weights. - // This is to ensure that we don't spill a ref position whose - // weight is equal to weight of 'refPosition'. - if (allocateIfProfitable && farthestRefPhysRegRecord == nullptr) - { - isBetterLocation = false; - } - else - { - isBetterLocation = (nextLocation > farthestLocation); - - if (nextLocation > farthestLocation) - { - isBetterLocation = true; - } - else if (nextLocation == farthestLocation) - { - // Both weight and distance are equal. - // Prefer that ref position which is marked both reload and - // allocate if profitable. These ref positions don't need - // need to be spilled as they are already in memory and - // codegen considers them as contained memory operands. - CLANG_FORMAT_COMMENT_ANCHOR; -#ifdef TARGET_ARM - // TODO-CQ-ARM: Just conservatively "and" two conditions. We may implement a better condition later. - isBetterLocation = true; - if (recentAssignedRef != nullptr) - isBetterLocation &= (recentAssignedRef->reload && recentAssignedRef->RegOptional()); - - if (recentAssignedRef2 != nullptr) - isBetterLocation &= (recentAssignedRef2->reload && recentAssignedRef2->RegOptional()); -#else - isBetterLocation = - (recentAssignedRef != nullptr) && recentAssignedRef->reload && recentAssignedRef->RegOptional(); -#endif - } - else - { - isBetterLocation = false; - } - } - } - - if (isBetterLocation) - { - farthestLocation = nextLocation; - farthestRefPhysRegRecord = physRegRecord; -#ifdef TARGET_ARM - farthestRefPhysRegRecord2 = physRegRecord2; -#endif - farthestRefPosWeight = recentAssignedRefWeight; - } - } - -#if DEBUG - if (allocateIfProfitable) - { - // There may not be a spill candidate or if one is found - // its weight must be less than the weight of 'refPosition' - assert((farthestRefPhysRegRecord == nullptr) || (farthestRefPosWeight < getWeight(refPosition))); - } - else - { - // Must have found a spill candidate. - assert(farthestRefPhysRegRecord != nullptr); - - if (farthestLocation == refLocation) - { - // This must be a RefPosition that is constrained to use a single register, either directly, - // or at the use, or by stress. - bool isConstrained = (refPosition->isFixedRegRef || (refPosition->nextRefPosition != nullptr && - refPosition->nextRefPosition->isFixedRegRef) || - candidatesAreStressLimited()); - if (!isConstrained) - { -#ifdef TARGET_ARM - Interval* assignedInterval = - (farthestRefPhysRegRecord == nullptr) ? nullptr : farthestRefPhysRegRecord->assignedInterval; - Interval* assignedInterval2 = - (farthestRefPhysRegRecord2 == nullptr) ? nullptr : farthestRefPhysRegRecord2->assignedInterval; - RefPosition* nextRefPosition = - (assignedInterval == nullptr) ? nullptr : assignedInterval->getNextRefPosition(); - RefPosition* nextRefPosition2 = - (assignedInterval2 == nullptr) ? nullptr : assignedInterval2->getNextRefPosition(); - if (nextRefPosition != nullptr) - { - if (nextRefPosition2 != nullptr) - { - assert(nextRefPosition->RegOptional() || nextRefPosition2->RegOptional()); - } - else - { - assert(nextRefPosition->RegOptional()); - } - } - else - { - assert(nextRefPosition2 != nullptr && nextRefPosition2->RegOptional()); - } -#else // !TARGET_ARM - Interval* assignedInterval = farthestRefPhysRegRecord->assignedInterval; - RefPosition* nextRefPosition = assignedInterval->getNextRefPosition(); - assert(nextRefPosition->RegOptional()); -#endif // !TARGET_ARM - } - } - else - { - assert(farthestLocation > refLocation); - } - } -#endif // DEBUG - - if (farthestRefPhysRegRecord != nullptr) - { - foundReg = farthestRefPhysRegRecord->regNum; - -#ifdef TARGET_ARM - if (current->registerType == TYP_DOUBLE) - { - assert(genIsValidDoubleReg(foundReg)); - unassignDoublePhysReg(farthestRefPhysRegRecord); - } - else -#endif - { - unassignPhysReg(farthestRefPhysRegRecord, farthestRefPhysRegRecord->assignedInterval->recentRefPosition); - } - - assignPhysReg(farthestRefPhysRegRecord, current); - refPosition->registerAssignment = genRegMask(foundReg); - } - else - { - foundReg = REG_NA; - refPosition->registerAssignment = RBM_NONE; - } - - return foundReg; -} - // Grab a register to use to copy and then immediately use. // This is called only for localVar intervals that already have a register // assignment that is not compatible with the current RefPosition. @@ -3959,11 +3870,11 @@ regNumber LinearScan::assignCopyReg(RefPosition* refPosition) assert(oldRegRecord->regNum == oldPhysReg); currentInterval->isActive = false; - regNumber allocatedReg = tryAllocateFreeReg(currentInterval, refPosition); - if (allocatedReg == REG_NA) - { - allocatedReg = allocateBusyReg(currentInterval, refPosition, false); - } + // We *must* allocate a register, and it will be a copyReg. Set that field now, so that + // refPosition->RegOptional() will return false. + refPosition->copyReg = true; + + regNumber allocatedReg = allocateReg(currentInterval, refPosition); // Now restore the old info currentInterval->relatedInterval = savedRelatedInterval; @@ -3971,7 +3882,6 @@ regNumber LinearScan::assignCopyReg(RefPosition* refPosition) currentInterval->assignedReg = oldRegRecord; currentInterval->isActive = true; - refPosition->copyReg = true; return allocatedReg; } @@ -3992,54 +3902,45 @@ regNumber LinearScan::assignCopyReg(RefPosition* refPosition) // bool LinearScan::isAssigned(RegRecord* regRec ARM_ARG(RegisterType newRegType)) { - return isAssigned(regRec, MaxLocation ARM_ARG(newRegType)); -} - -//------------------------------------------------------------------------ -// isAssigned: Check whether the given RegRecord has an assignedInterval -// that has a reference prior to the given location. -// -// Arguments: -// regRec - The RegRecord of interest -// lastLocation - The LsraLocation up to which we want to check -// newRegType - The `RegisterType` of interval we want to check -// (this is for the purposes of checking the other half of a TYP_DOUBLE RegRecord) -// -// Return value: -// Returns true if the given RegRecord (and its other half, if TYP_DOUBLE) has an assignedInterval -// that is referenced prior to the given location -// -// Notes: -// The register is not considered to be assigned if it has no assignedInterval, or that Interval's -// next reference is beyond lastLocation -// -bool LinearScan::isAssigned(RegRecord* regRec, LsraLocation lastLocation ARM_ARG(RegisterType newRegType)) -{ - Interval* assignedInterval = regRec->assignedInterval; - - if ((assignedInterval == nullptr) || assignedInterval->getNextRefLocation() > lastLocation) + if (regRec->assignedInterval != nullptr) { + return true; + } #ifdef TARGET_ARM - if (newRegType == TYP_DOUBLE) - { - RegRecord* anotherRegRec = findAnotherHalfRegRec(regRec); + if (newRegType == TYP_DOUBLE) + { + RegRecord* otherRegRecord = getSecondHalfRegRec(regRec); - if ((anotherRegRec->assignedInterval == nullptr) || - (anotherRegRec->assignedInterval->getNextRefLocation() > lastLocation)) - { - // In case the newRegType is a double register, - // the score would be set UNASSIGNED if another register is also not set. - return false; - } - } - else -#endif + if (otherRegRecord->assignedInterval != nullptr) { - return false; + return true; } } +#endif + return false; +} - return true; +//------------------------------------------------------------------------ +// isAssigned: Check whether the given RegRecord has an assignedInterval +// that has a reference prior to the given location. +// +// Arguments: +// regRec - The RegRecord of interest +// lastLocation - The LsraLocation up to which we want to check +// newRegType - The `RegisterType` of interval we want to check +// (this is for the purposes of checking the other half of a TYP_DOUBLE RegRecord) +// +// Return value: +// Returns true if the given RegRecord (and its other half, if TYP_DOUBLE) has an assignedInterval +// that is referenced prior to the given location +// +// Notes: +// The register is not considered to be assigned if it has no assignedInterval, or that Interval's +// next reference is beyond lastLocation +// +bool LinearScan::isAssigned(RegRecord* regRec, LsraLocation lastLocation ARM_ARG(RegisterType newRegType)) +{ + return getNextIntervalRefLocation(regRec->regNum ARM_ARG(newRegType)) > lastLocation; } // Check if the interval is already assigned and if it is then unassign the physical record @@ -4094,8 +3995,8 @@ void LinearScan::assignPhysReg(RegRecord* regRec, Interval* interval) regMaskTP assignedRegMask = genRegMask(regRec->regNum); compiler->codeGen->regSet.rsSetRegsModified(assignedRegMask DEBUGARG(true)); - checkAndAssignInterval(regRec, interval); interval->assignedReg = regRec; + checkAndAssignInterval(regRec, interval); interval->physReg = regRec->regNum; interval->isActive = true; @@ -4163,6 +4064,15 @@ void LinearScan::setIntervalAsSpilled(Interval* interval) // Now we need to mark the local as spilled also, even if the lower half is never spilled, // as this will use the upper part of its home location. interval = interval->relatedInterval; + // We'll now mark this as spilled, so it changes the spillCost for its recent ref, if any. + RefPosition* recentRefPos = interval->recentRefPosition; + if (!interval->isSpilled && interval->isActive && (recentRefPos != nullptr)) + { + VarSetOps::AddElemD(compiler, splitOrSpilledVars, interval->getVarIndex(compiler)); + interval->isSpilled = true; + regNumber reg = interval->recentRefPosition->assignedReg(); + spillCost[reg] = getSpillWeight(getRegisterRecord(reg)); + } } #endif if (interval->isLocalVar) @@ -4261,6 +4171,7 @@ void LinearScan::unassignPhysRegNoSpill(RegRecord* regRec) assert(assignedInterval != nullptr && assignedInterval->isActive); assignedInterval->isActive = false; unassignPhysReg(regRec, nullptr); + // makeRegAvailable(regRec->regNum, assignedInterval->registerType); assignedInterval->isActive = true; } @@ -4337,7 +4248,7 @@ void LinearScan::unassignPhysReg(RegRecord* regRec ARM_ARG(RegisterType newRegTy { if (newRegType == TYP_DOUBLE) { - anotherRegRec = findAnotherHalfRegRec(regRecToUnassign); + anotherRegRec = getSecondHalfRegRec(regRecToUnassign); } } #endif @@ -4379,7 +4290,8 @@ void LinearScan::unassignPhysReg(RegRecord* regRec, RefPosition* spillRefPositio regNumber thisRegNum = regRec->regNum; // Is assignedInterval actually still assigned to this register? - bool intervalIsAssigned = (assignedInterval->physReg == thisRegNum); + bool intervalIsAssigned = (assignedInterval->physReg == thisRegNum); + regNumber regToUnassign = thisRegNum; #ifdef TARGET_ARM RegRecord* anotherRegRec = nullptr; @@ -4388,28 +4300,42 @@ void LinearScan::unassignPhysReg(RegRecord* regRec, RefPosition* spillRefPositio if (assignedInterval->registerType == TYP_DOUBLE) { assert(isFloatRegType(regRec->registerType)); + RegRecord* doubleRegRec; + if (genIsValidDoubleReg(thisRegNum)) + { + anotherRegRec = getSecondHalfRegRec(regRec); + doubleRegRec = regRec; + } + else + { + regToUnassign = REG_PREV(thisRegNum); + anotherRegRec = getRegisterRecord(regToUnassign); + doubleRegRec = anotherRegRec; + } - anotherRegRec = findAnotherHalfRegRec(regRec); - - // Both two RegRecords should have been assigned to the same interval. + // Both RegRecords should have been assigned to the same interval. assert(assignedInterval == anotherRegRec->assignedInterval); if (!intervalIsAssigned && (assignedInterval->physReg == anotherRegRec->regNum)) { intervalIsAssigned = true; } - } -#endif // TARGET_ARM - checkAndClearInterval(regRec, spillRefPosition); + clearNextIntervalRef(regToUnassign, TYP_DOUBLE); + clearSpillCost(regToUnassign, TYP_DOUBLE); + checkAndClearInterval(doubleRegRec, spillRefPosition); -#ifdef TARGET_ARM - if (assignedInterval->registerType == TYP_DOUBLE) - { - // Both two RegRecords should have been unassigned together. + // Both RegRecords should have been unassigned together. assert(regRec->assignedInterval == nullptr); assert(anotherRegRec->assignedInterval == nullptr); } + else #endif // TARGET_ARM + { + clearNextIntervalRef(thisRegNum, assignedInterval->registerType); + clearSpillCost(thisRegNum, assignedInterval->registerType); + checkAndClearInterval(regRec, spillRefPosition); + } + makeRegAvailable(thisRegNum, assignedInterval->registerType); RefPosition* nextRefPosition = nullptr; if (spillRefPosition != nullptr) @@ -4505,6 +4431,14 @@ void LinearScan::unassignPhysReg(RegRecord* regRec, RefPosition* spillRefPositio { regRec->assignedInterval = regRec->previousInterval; regRec->previousInterval = nullptr; + if (regRec->assignedInterval->physReg != thisRegNum) + { + clearNextIntervalRef(thisRegNum, regRec->assignedInterval->registerType); + } + else + { + updateNextIntervalRef(thisRegNum, regRec->assignedInterval); + } #ifdef TARGET_ARM // Note: @@ -4588,6 +4522,7 @@ void LinearScan::spillGCRefs(RefPosition* killRefPosition) { INDEBUG(killedRegs = true); unassignPhysReg(regRecord, assignedInterval->recentRefPosition); + makeRegAvailable(nextReg, assignedInterval->registerType); } } INDEBUG(dumpLsraAllocationEvent(killedRegs ? LSRA_EVENT_DONE_KILL_GC_REFS : LSRA_EVENT_NO_GC_KILLS, nullptr, REG_NA, @@ -4876,11 +4811,13 @@ void LinearScan::processBlockStartLocations(BasicBlock* currentBlock) if (!enregisterLocalVars) { // Just clear any constant registers and return. + resetAvailableRegs(); for (regNumber reg = REG_FIRST; reg < ACTUAL_REG_COUNT; reg = REG_NEXT(reg)) { RegRecord* physRegRecord = getRegisterRecord(reg); Interval* assignedInterval = physRegRecord->assignedInterval; - + clearNextIntervalRef(reg, physRegRecord->registerType); + clearSpillCost(reg, physRegRecord->registerType); if (assignedInterval != nullptr) { assert(assignedInterval->isConstant); @@ -5024,7 +4961,7 @@ void LinearScan::processBlockStartLocations(BasicBlock* currentBlock) assert(targetReg != REG_STK); assert(interval->assignedReg != nullptr && interval->assignedReg->regNum == targetReg && interval->assignedReg->assignedInterval == interval); - liveRegs |= genRegMask(targetReg); + liveRegs |= getRegMask(targetReg, interval->registerType); continue; } } @@ -5052,10 +4989,11 @@ void LinearScan::processBlockStartLocations(BasicBlock* currentBlock) // Keep the register assignment - if another var has it, it will get unassigned. // Otherwise, resolution will fix it up later, and it will be more // likely to match other assignments this way. + targetReg = interval->physReg; interval->isActive = true; - liveRegs |= genRegMask(interval->physReg); - INDEBUG(inactiveRegs |= genRegMask(interval->physReg)); - setVarReg(inVarToRegMap, varIndex, interval->physReg); + liveRegs |= getRegMask(targetReg, interval->registerType); + INDEBUG(inactiveRegs |= genRegMask(targetReg)); + setVarReg(inVarToRegMap, varIndex, targetReg); } else { @@ -5065,7 +5003,12 @@ void LinearScan::processBlockStartLocations(BasicBlock* currentBlock) if (targetReg != REG_STK) { RegRecord* targetRegRecord = getRegisterRecord(targetReg); - liveRegs |= genRegMask(targetReg); + liveRegs |= getRegMask(targetReg, interval->registerType); + if (!allocationPassComplete) + { + updateNextIntervalRef(targetReg, interval); + updateSpillCost(targetReg, interval); + } if (!interval->isActive) { interval->isActive = true; @@ -5083,7 +5026,7 @@ void LinearScan::processBlockStartLocations(BasicBlock* currentBlock) (targetRegRecord->assignedInterval->registerType == TYP_FLOAT))) { assert(genIsValidDoubleReg(targetReg)); - unassignIntervalBlockStart(findAnotherHalfRegRec(targetRegRecord), + unassignIntervalBlockStart(getSecondHalfRegRec(targetRegRecord), allocationPassComplete ? nullptr : inVarToRegMap); } #endif // TARGET_ARM @@ -5098,13 +5041,19 @@ void LinearScan::processBlockStartLocations(BasicBlock* currentBlock) } } - // Unassign any registers that are no longer live. + // Unassign any registers that are no longer live, and set register state, if allocating. + if (!allocationPassComplete) + { + resetRegState(); + setRegsInUse(liveRegs); + } for (regNumber reg = REG_FIRST; reg < ACTUAL_REG_COUNT; reg = REG_NEXT(reg)) { + RegRecord* physRegRecord = getRegisterRecord(reg); if ((liveRegs & genRegMask(reg)) == 0) { - RegRecord* physRegRecord = getRegisterRecord(reg); - Interval* assignedInterval = physRegRecord->assignedInterval; + makeRegAvailable(reg, physRegRecord->registerType); + Interval* assignedInterval = physRegRecord->assignedInterval; if (assignedInterval != nullptr) { @@ -5142,6 +5091,7 @@ void LinearScan::processBlockStartLocations(BasicBlock* currentBlock) // Skip next float register, because we already addressed a double register assert(genIsValidDoubleReg(reg)); reg = REG_NEXT(reg); + makeRegAvailable(reg, physRegRecord->registerType); } #endif // TARGET_ARM } @@ -5227,20 +5177,33 @@ void LinearScan::dumpRefPositions(const char* str) } #endif // DEBUG -bool LinearScan::registerIsFree(regNumber regNum, RegisterType regType) +//------------------------------------------------------------------------ +// LinearScan::makeRegisterInactive: Make the interval currently assigned to +// a register inactive. +// +// Arguments: +// physRegRecord - the RegRecord for the register +// +// Return Value: +// None. +// +// Notes: +// It may be that the RegRecord has already been freed, e.g. due to a kill, +// or it may be that the register was a copyReg, so is not the assigned register +// of the Interval currently occupying the register, in which case this method has no effect. +// +void LinearScan::makeRegisterInactive(RegRecord* physRegRecord) { - RegRecord* physRegRecord = getRegisterRecord(regNum); - - bool isFree = physRegRecord->isFree(); - -#ifdef TARGET_ARM - if (isFree && regType == TYP_DOUBLE) + Interval* assignedInterval = physRegRecord->assignedInterval; + // It may have already been freed by a "Kill" + if ((assignedInterval != nullptr) && (assignedInterval->physReg == physRegRecord->regNum)) { - isFree = getSecondHalfRegRec(physRegRecord)->isFree(); + assignedInterval->isActive = false; + if (assignedInterval->isConstant) + { + clearNextIntervalRef(physRegRecord->regNum, assignedInterval->registerType); + } } -#endif // TARGET_ARM - - return isFree; } //------------------------------------------------------------------------ @@ -5263,31 +5226,36 @@ bool LinearScan::registerIsFree(regNumber regNum, RegisterType regType) // defs remain), it will remain assigned to the physRegRecord. However, since // it is marked inactive, the register will be available, albeit less desirable // to allocate. +// void LinearScan::freeRegister(RegRecord* physRegRecord) { Interval* assignedInterval = physRegRecord->assignedInterval; - // It may have already been freed by a "Kill" + makeRegAvailable(physRegRecord->regNum, physRegRecord->registerType); + clearSpillCost(physRegRecord->regNum, physRegRecord->registerType); + makeRegisterInactive(physRegRecord); + if (assignedInterval != nullptr) { - assignedInterval->isActive = false; - // If this is a constant node, that we may encounter again (e.g. constant), - // don't unassign it until we need the register. - if (!assignedInterval->isConstant) - { - RefPosition* nextRefPosition = assignedInterval->getNextRefPosition(); - // Unassign the register only if there are no more RefPositions, or the next - // one is a def. Note that the latter condition doesn't actually ensure that - // there aren't subsequent uses that could be reached by a def in the assigned - // register, but is merely a heuristic to avoid tying up the register (or using - // it when it's non-optimal). A better alternative would be to use SSA, so that - // we wouldn't unnecessarily link separate live ranges to the same register. - if (nextRefPosition == nullptr || RefTypeIsDef(nextRefPosition->refType)) - { + // TODO: Under the following conditions we should be just putting it in regsToMakeInactive + // not regsToFree. + // + // We don't unassign in the following conditions: + // - If this is a constant node, that we may encounter again, OR + // - If its recent RefPosition is not a last-use and its next RefPosition is non-null. + // - If there are no more RefPositions, or the next + // one is a def. Note that the latter condition doesn't actually ensure that + // there aren't subsequent uses that could be reached by a value in the assigned + // register, but is merely a heuristic to avoid tying up the register (or using + // it when it's non-optimal). A better alternative would be to use SSA, so that + // we wouldn't unnecessarily link separate live ranges to the same register. + // + RefPosition* nextRefPosition = assignedInterval->getNextRefPosition(); + if (!assignedInterval->isConstant && (nextRefPosition == nullptr || RefTypeIsDef(nextRefPosition->refType))) + { #ifdef TARGET_ARM - assert((assignedInterval->registerType != TYP_DOUBLE) || genIsValidDoubleReg(physRegRecord->regNum)); + assert((assignedInterval->registerType != TYP_DOUBLE) || genIsValidDoubleReg(physRegRecord->regNum)); #endif // TARGET_ARM - unassignPhysReg(physRegRecord, nullptr); - } + unassignPhysReg(physRegRecord, nullptr); } } } @@ -5306,11 +5274,20 @@ void LinearScan::freeRegisters(regMaskTP regsToFree) } INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_FREE_REGS)); + makeRegsAvailable(regsToFree); while (regsToFree != RBM_NONE) { regMaskTP nextRegBit = genFindLowestBit(regsToFree); regsToFree &= ~nextRegBit; - regNumber nextReg = genRegNumFromMask(nextRegBit); + regNumber nextReg = genRegNumFromMask(nextRegBit); + RegRecord* regRecord = getRegisterRecord(nextReg); +#ifdef TARGET_ARM + if (regRecord->assignedInterval != nullptr && (regRecord->assignedInterval->registerType == TYP_DOUBLE)) + { + assert(genIsValidDoubleReg(nextReg)); + regsToFree &= ~(nextRegBit << 1); + } +#endif freeRegister(getRegisterRecord(nextReg)); } } @@ -5353,9 +5330,33 @@ void LinearScan::allocateRegisters() #endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE } + resetRegState(); for (regNumber reg = REG_FIRST; reg < ACTUAL_REG_COUNT; reg = REG_NEXT(reg)) { - getRegisterRecord(reg)->recentRefPosition = nullptr; + RegRecord* physRegRecord = getRegisterRecord(reg); + physRegRecord->recentRefPosition = nullptr; + updateNextFixedRef(physRegRecord, physRegRecord->firstRefPosition); + + // Is this an incoming arg register? (Note that we don't, currently, consider reassigning + // an incoming arg register as having spill cost.) + Interval* interval = physRegRecord->assignedInterval; + if (interval != nullptr) + { +#ifdef TARGET_ARM + if ((interval->registerType != TYP_DOUBLE) || genIsValidDoubleReg(reg)) +#endif // TARGET_ARM + { + updateNextIntervalRef(reg, interval); + updateSpillCost(reg, interval); + setRegInUse(reg, interval->registerType); + INDEBUG(registersToDump |= getRegMask(reg, interval->registerType)); + } + } + else + { + clearNextIntervalRef(reg, physRegRecord->registerType); + clearSpillCost(reg, physRegRecord->registerType); + } } #ifdef DEBUG @@ -5378,9 +5379,14 @@ void LinearScan::allocateRegisters() BasicBlock* currentBlock = nullptr; - LsraLocation prevLocation = MinLocation; - regMaskTP regsToFree = RBM_NONE; - regMaskTP delayRegsToFree = RBM_NONE; + LsraLocation prevLocation = MinLocation; + regMaskTP regsToFree = RBM_NONE; + regMaskTP delayRegsToFree = RBM_NONE; + regMaskTP regsToMakeInactive = RBM_NONE; + regMaskTP delayRegsToMakeInactive = RBM_NONE; + regMaskTP copyRegsToFree = RBM_NONE; + regsInUseThisLocation = RBM_NONE; + regsInUseNextLocation = RBM_NONE; // This is the most recent RefPosition for which a register was allocated // - currently only used for DEBUG but maintained in non-debug, for clarity of code @@ -5394,6 +5400,29 @@ void LinearScan::allocateRegisters() RefPosition* currentRefPosition = &refPositionIterator; RefPosition* nextRefPosition = currentRefPosition->nextRefPosition; + // TODO: Can we combine this with the freeing of registers below? It might + // mess with the dump, since this was previously being done before the call below + // to dumpRegRecords. + regMaskTP tempRegsToMakeInactive = (regsToMakeInactive | delayRegsToMakeInactive); + while (tempRegsToMakeInactive != RBM_NONE) + { + regMaskTP nextRegBit = genFindLowestBit(tempRegsToMakeInactive); + tempRegsToMakeInactive &= ~nextRegBit; + regNumber nextReg = genRegNumFromMask(nextRegBit); + RegRecord* regRecord = getRegisterRecord(nextReg); + clearSpillCost(regRecord->regNum, regRecord->registerType); + makeRegisterInactive(regRecord); + } + if (currentRefPosition->nodeLocation > prevLocation) + { + makeRegsAvailable(regsToMakeInactive); + // TODO: Clean this up. We need to make the delayRegs inactive as well, but don't want + // to mark them as free yet. + regsToMakeInactive |= delayRegsToMakeInactive; + regsToMakeInactive = delayRegsToMakeInactive; + delayRegsToMakeInactive = RBM_NONE; + } + #ifdef DEBUG // Set the activeRefPosition to null until we're done with any boundary handling. activeRefPosition = nullptr; @@ -5437,14 +5466,15 @@ void LinearScan::allocateRegisters() LsraLocation currentLocation = currentRefPosition->nodeLocation; - if ((regsToFree | delayRegsToFree) != RBM_NONE) + // Free at a new location. + if (currentLocation > prevLocation) { - // Free at a new location, or at a basic block boundary - if (refType == RefTypeBB) - { - assert(currentLocation > prevLocation); - } - if (currentLocation > prevLocation) + // CopyRegs are simply made available - we don't want to make the associated interval inactive. + makeRegsAvailable(copyRegsToFree); + copyRegsToFree = RBM_NONE; + regsInUseThisLocation = regsInUseNextLocation; + regsInUseNextLocation = RBM_NONE; + if ((regsToFree | delayRegsToFree) != RBM_NONE) { freeRegisters(regsToFree); if ((currentLocation > (prevLocation + 1)) && (delayRegsToFree != RBM_NONE)) @@ -5454,10 +5484,109 @@ void LinearScan::allocateRegisters() assert(!"Found a delayRegFree associated with Location with no reference"); // However, to be cautious for the Release build case, we will free them. freeRegisters(delayRegsToFree); - delayRegsToFree = RBM_NONE; + delayRegsToFree = RBM_NONE; + regsInUseThisLocation = RBM_NONE; } regsToFree = delayRegsToFree; delayRegsToFree = RBM_NONE; + +#ifdef DEBUG + // Validate the current state just after we've freed the registers. This ensures that any pending + // freed registers will have had their state updated to reflect the intervals they were holding. + for (regNumber reg = REG_FIRST; reg < ACTUAL_REG_COUNT; reg = REG_NEXT(reg)) + { + regMaskTP regMask = genRegMask(reg); + // If this isn't available or if it's still waiting to be freed (i.e. it was in + // delayRegsToFree and so now it's in regsToFree), then skip it. + if ((regMask & (availableIntRegs | availableFloatRegs) & ~regsToFree) == RBM_NONE) + { + continue; + } + RegRecord* physRegRecord = getRegisterRecord(reg); + Interval* assignedInterval = physRegRecord->assignedInterval; + if (assignedInterval != nullptr) + { + bool isAssignedReg = (assignedInterval->physReg == reg); + RefPosition* recentRefPosition = assignedInterval->recentRefPosition; + // If we have a copyReg or a moveReg, we might have assigned this register to an Interval, + // but that isn't considered its assignedReg. + if (recentRefPosition != nullptr) + { + if (recentRefPosition->refType == RefTypeExpUse) + { + // We don't update anything on these, as they're just placeholders to extend the + // lifetime. + continue; + } + // For copyReg or moveReg, we don't have anything further to assert. + if (recentRefPosition->copyReg || recentRefPosition->moveReg) + { + continue; + } + assert(assignedInterval->isConstant == isRegConstant(reg, assignedInterval->registerType)); + if (assignedInterval->isActive) + { + // If this is not the register most recently allocated, it must be from a copyReg, + // or it was placed there by the inVarToRegMap. In either case it must be a lclVar. + + if (!isAssignedToInterval(assignedInterval, physRegRecord)) + { + assert(assignedInterval->isLocalVar); + // We'd like to assert that this was either set by the inVarToRegMap, or by + // a copyReg, but we can't traverse backward to check for a copyReg, because + // we only have recentRefPosition, and there may be a previous RefPosition + // at the same Location with a copyReg. + } + if (isAssignedReg) + { + assert(nextIntervalRef[reg] == assignedInterval->getNextRefLocation()); + if (assignedInterval->isActive) + { + assert(!isRegAvailable(reg, assignedInterval->registerType)); + assert((recentRefPosition == nullptr) || + (spillCost[reg] == getSpillWeight(physRegRecord))); + } + else + { + assert(isRegAvailable(reg, assignedInterval->registerType)); + assert(spillCost[reg] == 0); + } + } + else + { + assert((nextIntervalRef[reg] == MaxLocation) || + isRegBusy(reg, assignedInterval->registerType)); + } + } + else if ((assignedInterval->physReg == reg) && !assignedInterval->isConstant) + // else if (!assignedInterval->isConstant) + { + assert(nextIntervalRef[reg] == assignedInterval->getNextRefLocation()); + } + else + { + assert(nextIntervalRef[reg] == MaxLocation); + } + } + } + else + { + assert(isRegAvailable(reg, physRegRecord->registerType)); + assert(!isRegConstant(reg, physRegRecord->registerType)); + assert(nextIntervalRef[reg] == MaxLocation); + assert(spillCost[reg] == 0); + } + LsraLocation thisNextFixedRef = physRegRecord->getNextRefLocation(); + assert(nextFixedRef[reg] == thisNextFixedRef); +#ifdef TARGET_ARM + // If this is occupied by a double interval, skip the corresponding float reg. + if ((assignedInterval != nullptr) && (assignedInterval->registerType == TYP_DOUBLE)) + { + reg = REG_NEXT(reg); + } +#endif + } +#endif // DEBUG } } prevLocation = currentLocation; @@ -5495,9 +5624,11 @@ void LinearScan::allocateRegisters() { // Free any delayed regs (now in regsToFree) before processing the block boundary freeRegisters(regsToFree); - regsToFree = RBM_NONE; - handledBlockEnd = true; - curBBStartLocation = currentRefPosition->nodeLocation; + regsToFree = RBM_NONE; + regsInUseThisLocation = RBM_NONE; + regsInUseNextLocation = RBM_NONE; + handledBlockEnd = true; + curBBStartLocation = currentRefPosition->nodeLocation; if (currentBlock == nullptr) { currentBlock = startBlockSequence(); @@ -5523,42 +5654,49 @@ void LinearScan::allocateRegisters() continue; } - // If this is a FixedReg, disassociate any inactive constant interval from this register. - // Otherwise, do nothing. - if (refType == RefTypeFixedReg) + if (currentRefPosition->isPhysRegRef) { RegRecord* regRecord = currentRefPosition->getReg(); Interval* assignedInterval = regRecord->assignedInterval; - if (assignedInterval != nullptr && !assignedInterval->isActive && assignedInterval->isConstant) + updateNextFixedRef(regRecord, currentRefPosition->nextRefPosition); + + // If this is a FixedReg, disassociate any inactive constant interval from this register. + // Otherwise, do nothing. + if (refType == RefTypeFixedReg) { - regRecord->assignedInterval = nullptr; + if (assignedInterval != nullptr && !assignedInterval->isActive && assignedInterval->isConstant) + { + regRecord->assignedInterval = nullptr; + spillCost[regRecord->regNum] = 0; #ifdef TARGET_ARM - // Update overlapping floating point register for TYP_DOUBLE - if (assignedInterval->registerType == TYP_DOUBLE) - { - regRecord = findAnotherHalfRegRec(regRecord); - assert(regRecord->assignedInterval == assignedInterval); - regRecord->assignedInterval = nullptr; + // Update overlapping floating point register for TYP_DOUBLE + if (assignedInterval->registerType == TYP_DOUBLE) + { + RegRecord* otherRegRecord = findAnotherHalfRegRec(regRecord); + assert(otherRegRecord->assignedInterval == assignedInterval); + otherRegRecord->assignedInterval = nullptr; + spillCost[otherRegRecord->regNum] = 0; + } +#endif // TARGET_ARM } -#endif + regsInUseThisLocation |= currentRefPosition->registerAssignment; + INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_FIXED_REG, nullptr, currentRefPosition->assignedReg())); + continue; } - INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_FIXED_REG, nullptr, currentRefPosition->assignedReg())); - continue; - } - if (refType == RefTypeKill) - { - RegRecord* currentReg = currentRefPosition->getReg(); - Interval* assignedInterval = currentReg->assignedInterval; - - if (assignedInterval != nullptr) + if (refType == RefTypeKill) { - unassignPhysReg(currentReg, assignedInterval->recentRefPosition); + if (assignedInterval != nullptr) + { + unassignPhysReg(regRecord, assignedInterval->recentRefPosition); + clearConstantReg(regRecord->regNum, assignedInterval->registerType); + makeRegAvailable(regRecord->regNum, assignedInterval->registerType); + } + clearRegBusyUntilKill(regRecord->regNum); + INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_KEPT_ALLOCATION, nullptr, regRecord->regNum)); + continue; } - currentReg->isBusyUntilNextKill = false; - INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_KEPT_ALLOCATION, nullptr, currentReg->regNum)); - continue; } // If this is an exposed use, do nothing - this is merely a placeholder to attempt to @@ -5568,6 +5706,11 @@ void LinearScan::allocateRegisters() if (refType == RefTypeExpUse) { INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_EXP_USE)); + currentInterval = currentRefPosition->getInterval(); + if (currentInterval->physReg != REG_NA) + { + updateNextIntervalRef(currentInterval->physReg, currentInterval); + } continue; } @@ -5576,6 +5719,7 @@ void LinearScan::allocateRegisters() assert(currentRefPosition->isIntervalRef()); currentInterval = currentRefPosition->getInterval(); assert(currentInterval != nullptr); + assert(currentRefPosition->isFixedRegRef == isSingleRegister(currentRefPosition->registerAssignment)); assignedRegister = currentInterval->physReg; // Identify the special cases where we decide up-front not to allocate @@ -5620,6 +5764,12 @@ void LinearScan::allocateRegisters() INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_NO_ENTRY_REG_ALLOCATED, currentInterval)); didDump = true; setIntervalAsSpilled(currentInterval); + if (assignedRegister != REG_NA) + { + clearNextIntervalRef(assignedRegister, currentInterval->registerType); + clearSpillCost(assignedRegister, currentInterval->registerType); + makeRegAvailable(assignedRegister, currentInterval->registerType); + } } } #ifdef FEATURE_SIMD @@ -5704,7 +5854,7 @@ void LinearScan::allocateRegisters() // as special. if (srcInterval->isActive && genRegMask(srcInterval->physReg) == currentRefPosition->registerAssignment && - currentInterval->getNextRefLocation() == physRegRecord->getNextRefLocation()) + currentInterval->getNextRefLocation() == nextFixedRef[srcInterval->physReg]) { assert(physRegRecord->regNum == srcInterval->physReg); @@ -5720,7 +5870,7 @@ void LinearScan::allocateRegisters() // special putarg_reg doesn't get spilled and re-allocated prior to // its use at the call node. This is ensured by marking physical reg // record as busy until next kill. - physRegRecord->isBusyUntilNextKill = true; + setRegBusyUntilKill(srcInterval->physReg, srcInterval->registerType); } else { @@ -5762,7 +5912,10 @@ void LinearScan::allocateRegisters() else { currentInterval->isActive = true; + setRegInUse(assignedRegister, currentInterval->registerType); + updateSpillCost(assignedRegister, currentInterval); } + updateNextIntervalRef(assignedRegister, currentInterval); } assert(currentInterval->assignedReg != nullptr && currentInterval->assignedReg->regNum == assignedRegister && @@ -5792,18 +5945,20 @@ void LinearScan::allocateRegisters() // Will the assigned register cover the lifetime? If not, does it at least // meet the preferences for the next RefPosition? - RegRecord* physRegRecord = getRegisterRecord(currentInterval->physReg); - RefPosition* nextPhysRegRefPos = physRegRecord->getNextRefPosition(); - if (nextPhysRegRefPos != nullptr && - nextPhysRegRefPos->nodeLocation <= currentInterval->lastRefPosition->nodeLocation) + LsraLocation nextPhysRegLocation = nextFixedRef[assignedRegister]; + if (nextPhysRegLocation <= currentInterval->lastRefPosition->nodeLocation) { // Check to see if the existing assignment matches the preferences (e.g. callee save registers) // and ensure that the next use of this localVar does not occur after the nextPhysRegRefPos // There must be a next RefPosition, because we know that the Interval extends beyond the // nextPhysRegRefPos. assert(nextRefPosition != nullptr); - if (!matchesPreferences || nextPhysRegRefPos->nodeLocation < nextRefPosition->nodeLocation || - physRegRecord->conflictingFixedRegReference(nextRefPosition)) + if (!matchesPreferences || nextPhysRegLocation < nextRefPosition->nodeLocation) + { + keepAssignment = false; + } + else if ((nextRefPosition->registerAssignment != assignedRegBit) && + (nextPhysRegLocation <= nextRefPosition->getRefEndLocation())) { keepAssignment = false; } @@ -5821,7 +5976,9 @@ void LinearScan::allocateRegisters() if (keepAssignment == false) { + RegRecord* physRegRecord = getRegisterRecord(currentInterval->physReg); currentRefPosition->registerAssignment = allRegs(currentInterval->registerType); + currentRefPosition->isFixedRegRef = false; unassignPhysRegNoSpill(physRegRecord); // If the preferences are currently set to just this register, reset them to allRegs @@ -5847,18 +6004,21 @@ void LinearScan::allocateRegisters() if (assignedRegister != REG_NA) { RegRecord* physRegRecord = getRegisterRecord(assignedRegister); - - // If there is a conflicting fixed reference, insert a copy. - if (physRegRecord->conflictingFixedRegReference(currentRefPosition)) + assert((assignedRegBit == currentRefPosition->registerAssignment) || + (physRegRecord->assignedInterval == currentInterval) || + !isRegInUse(assignedRegister, currentInterval->registerType)); + if (conflictingFixedRegReference(assignedRegister, currentRefPosition)) { // We may have already reassigned the register to the conflicting reference. // If not, we need to unassign this interval. if (physRegRecord->assignedInterval == currentInterval) { unassignPhysRegNoSpill(physRegRecord); + physRegRecord->assignedInterval = nullptr; } currentRefPosition->moveReg = true; assignedRegister = REG_NA; + currentRefPosition->registerAssignment &= ~assignedRegBit; setIntervalAsSplit(currentInterval); INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_MOVE_REG, currentInterval, assignedRegister)); } @@ -5907,37 +6067,52 @@ void LinearScan::allocateRegisters() } } } + regMaskTP copyRegMask = getRegMask(copyReg, currentInterval->registerType); + regMaskTP assignedRegMask = getRegMask(assignedRegister, currentInterval->registerType); + regsInUseThisLocation |= copyRegMask | assignedRegMask; if (currentRefPosition->lastUse) - { - assert(currentRefPosition->isIntervalRef()); - unassign = true; - } - if (unassign) { if (currentRefPosition->delayRegFree) { INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_LAST_USE_DELAYED, currentInterval, assignedRegister)); - delayRegsToFree |= (genRegMask(assignedRegister) | currentRefPosition->registerAssignment); + delayRegsToFree |= copyRegMask | assignedRegMask; + regsInUseNextLocation |= copyRegMask | assignedRegMask; } else { INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_LAST_USE, currentInterval, assignedRegister)); - regsToFree |= (genRegMask(assignedRegister) | currentRefPosition->registerAssignment); + regsToFree |= copyRegMask | assignedRegMask; + } + } + else + { + copyRegsToFree |= copyRegMask; + if (currentRefPosition->delayRegFree) + { + regsInUseNextLocation |= copyRegMask | assignedRegMask; } } + // If this is a tree temp (non-localVar) interval, we will need an explicit move. + // Note: In theory a moveReg should cause the Interval to now have the new reg as its + // assigned register. However, that's not currently how this works. + // If we ever actually move lclVar intervals instead of copying, this will need to change. if (!currentInterval->isLocalVar) { currentRefPosition->moveReg = true; currentRefPosition->copyReg = false; } + clearNextIntervalRef(copyReg, currentInterval->registerType); + clearSpillCost(copyReg, currentInterval->registerType); + updateNextIntervalRef(assignedRegister, currentInterval); + updateSpillCost(assignedRegister, currentInterval); continue; } else { INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_NEEDS_NEW_REG, nullptr, assignedRegister)); - regsToFree |= genRegMask(assignedRegister); + regsToFree |= getRegMask(assignedRegister, currentInterval->registerType); // We want a new register, but we don't want this to be considered a spill. assignedRegister = REG_NA; if (physRegRecord->assignedInterval == currentInterval) @@ -5950,21 +6125,19 @@ void LinearScan::allocateRegisters() if (assignedRegister == REG_NA) { - bool allocateReg = true; - if (currentRefPosition->RegOptional()) { - // We can avoid allocating a register if it is a the last use requiring a reload. + // We can avoid allocating a register if it is a last use requiring a reload. if (currentRefPosition->lastUse && currentRefPosition->reload) { - allocateReg = false; + allocate = false; } else if (currentInterval->isWriteThru) { // Don't allocate if the next reference is in a cold block. if (nextRefPosition == nullptr || (nextRefPosition->nodeLocation >= firstColdLoc)) { - allocateReg = false; + allocate = false; } } @@ -5974,71 +6147,43 @@ void LinearScan::allocateRegisters() if ((currentRefPosition->refType == RefTypeUpperVectorRestore) && (currentInterval->physReg == REG_NA)) { assert(currentRefPosition->regOptional); - allocateReg = false; + allocate = false; } #endif #ifdef DEBUG // Under stress mode, don't allocate registers to RegOptional RefPositions. - if (allocateReg && regOptionalNoAlloc()) + if (allocate && regOptionalNoAlloc()) { - allocateReg = false; + allocate = false; } #endif } - if (allocateReg) + if (allocate) { - // Try to allocate a register - assignedRegister = tryAllocateFreeReg(currentInterval, currentRefPosition); + // Allocate a register, if we must, or if it is profitable to do so. + // If we have a fixed reg requirement, and the interval is inactive in another register, + // unassign that register. + if (currentRefPosition->isFixedRegRef && !currentInterval->isActive && + (currentInterval->assignedReg != nullptr) && + (currentInterval->assignedReg->assignedInterval == currentInterval) && + (genRegMask(currentInterval->assignedReg->regNum) != currentRefPosition->registerAssignment)) + { + unassignPhysReg(currentInterval->assignedReg, nullptr); + } + assignedRegister = allocateReg(currentInterval, currentRefPosition); } - // If no register was found, and if the currentRefPosition must have a register, - // then find a register to spill + // If no register was found, this RefPosition must not require a register. if (assignedRegister == REG_NA) { - bool isAllocatable = currentRefPosition->IsActualRef(); -#if FEATURE_PARTIAL_SIMD_CALLEE_SAVE && defined(TARGET_ARM64) - if (currentInterval->isUpperVector) - { - // On Arm64, we can't save the upper half to memory without a register. - isAllocatable = true; - assert(!currentRefPosition->RegOptional()); - } -#endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE && TARGET_ARM64 - if (isAllocatable) - { - if (allocateReg) - { - assignedRegister = - allocateBusyReg(currentInterval, currentRefPosition, currentRefPosition->RegOptional()); - } - - if (assignedRegister != REG_NA) - { - INDEBUG( - dumpLsraAllocationEvent(LSRA_EVENT_ALLOC_SPILLED_REG, currentInterval, assignedRegister)); - } - else - { - // This can happen only for those ref positions that are to be allocated - // only if profitable. - noway_assert(currentRefPosition->RegOptional()); - - currentRefPosition->registerAssignment = RBM_NONE; - currentRefPosition->reload = false; - setIntervalAsSpilled(currentInterval); - - INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_NO_REG_ALLOCATED, currentInterval)); - } - } - else - { - INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_NO_REG_ALLOCATED, currentInterval)); - currentRefPosition->registerAssignment = RBM_NONE; - currentInterval->isActive = false; - setIntervalAsSpilled(currentInterval); - } + assert(currentRefPosition->RegOptional()); + INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_NO_REG_ALLOCATED, currentInterval)); + currentRefPosition->registerAssignment = RBM_NONE; + currentRefPosition->reload = false; + currentInterval->isActive = false; + setIntervalAsSpilled(currentInterval); } #ifdef DEBUG else @@ -6072,12 +6217,23 @@ void LinearScan::allocateRegisters() } // If we allocated a register, record it - if (currentInterval != nullptr && assignedRegister != REG_NA) + assert(currentInterval != nullptr); + if (assignedRegister != REG_NA) { - assignedRegBit = genRegMask(assignedRegister); + assignedRegBit = genRegMask(assignedRegister); + regMaskTP regMask = getRegMask(assignedRegister, currentInterval->registerType); + // if (!RefTypeIsDef(refType)) + { + regsInUseThisLocation |= regMask; + if (currentRefPosition->delayRegFree) + { + regsInUseNextLocation |= regMask; + } + } currentRefPosition->registerAssignment = assignedRegBit; - currentInterval->physReg = assignedRegister; - regsToFree &= ~assignedRegBit; // we'll set it again later if it's dead + + currentInterval->physReg = assignedRegister; + regsToFree &= ~regMask; // we'll set it again later if it's dead // If this interval is dead, free the register. // The interval could be dead if this is a user variable, or if the @@ -6085,9 +6241,10 @@ void LinearScan::allocateRegisters() // is not used, etc. // If this is an UpperVector we'll neither free it nor preference it // (it will be freed when it is used). + bool stillInReg = true; + bool unassign = false; if (!currentInterval->IsUpperVector()) { - bool unassign = false; if (currentInterval->isWriteThru) { if (currentRefPosition->refType == RefTypeDef) @@ -6105,44 +6262,57 @@ void LinearScan::allocateRegisters() if (currentRefPosition->lastUse || currentRefPosition->nextRefPosition == nullptr) { assert(currentRefPosition->isIntervalRef()); - - if (refType != RefTypeExpUse && currentRefPosition->nextRefPosition == nullptr) + // If this isn't a final use, we'll mark the register as available, but keep the association. + if ((refType != RefTypeExpUse) && (currentRefPosition->nextRefPosition == nullptr)) { unassign = true; } else { + if (currentRefPosition->delayRegFree) + { + delayRegsToMakeInactive |= regMask; + } + else + { + regsToMakeInactive |= regMask; + } + // TODO-Cleanup: this makes things consistent with previous, and will enable preferences + // to be propagated, but it seems less than ideal. currentInterval->isActive = false; } + // Update the register preferences for the relatedInterval, if this is 'preferencedToDef'. + // Don't propagate to subsequent relatedIntervals; that will happen as they are allocated, and we + // don't know yet whether the register will be retained. + if (currentInterval->relatedInterval != nullptr) + { + currentInterval->relatedInterval->updateRegisterPreferences(assignedRegBit); + } } + if (unassign) { if (currentRefPosition->delayRegFree) { - delayRegsToFree |= assignedRegBit; + delayRegsToFree |= regMask; INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_LAST_USE_DELAYED)); } else { - regsToFree |= assignedRegBit; + regsToFree |= regMask; INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_LAST_USE)); } } - - // Update the register preferences for the relatedInterval, if this is 'preferencedToDef'. - // Don't propagate to subsequent relatedIntervals; that will happen as they are allocated, and we - // don't know yet whether the register will be retained. - if ((currentRefPosition->lastUse || nextRefPosition == nullptr) && - (currentInterval->relatedInterval != nullptr)) - { - currentInterval->relatedInterval->updateRegisterPreferences(assignedRegBit); - } } - - lastAllocatedRefPosition = currentRefPosition; + if (!unassign) + { + updateNextIntervalRef(assignedRegister, currentInterval); + updateSpillCost(assignedRegister, currentInterval); + } } + lastAllocatedRefPosition = currentRefPosition; } #ifdef JIT32_GCENCODER @@ -6284,20 +6454,47 @@ void LinearScan::updateAssignedInterval(RegRecord* reg, Interval* interval, Regi #ifdef TARGET_ARM // Update overlapping floating point register for TYP_DOUBLE. Interval* oldAssignedInterval = reg->assignedInterval; + regNumber doubleReg = REG_NA; if (regType == TYP_DOUBLE) { - RegRecord* anotherHalfReg = findAnotherHalfRegRec(reg); - + doubleReg = reg->regNum; + assert(genIsValidDoubleReg(doubleReg)); + RegRecord* anotherHalfReg = getSecondHalfRegRec(reg); anotherHalfReg->assignedInterval = interval; } else if ((oldAssignedInterval != nullptr) && (oldAssignedInterval->registerType == TYP_DOUBLE)) { - RegRecord* anotherHalfReg = findAnotherHalfRegRec(reg); - + RegRecord* anotherHalfReg = findAnotherHalfRegRec(reg); + doubleReg = genIsValidDoubleReg(reg->regNum) ? reg->regNum : anotherHalfReg->regNum; anotherHalfReg->assignedInterval = nullptr; } + if (doubleReg != REG_NA) + { + clearNextIntervalRef(doubleReg, TYP_DOUBLE); + clearSpillCost(doubleReg, TYP_DOUBLE); + clearConstantReg(doubleReg, TYP_DOUBLE); + } #endif reg->assignedInterval = interval; + if (interval != nullptr) + { + setRegInUse(reg->regNum, interval->registerType); + if (interval->isConstant) + { + setConstantReg(reg->regNum, interval->registerType); + } + else + { + clearConstantReg(reg->regNum, interval->registerType); + } + updateNextIntervalRef(reg->regNum, interval); + updateSpillCost(reg->regNum, interval); + } + else + { + clearNextIntervalRef(reg->regNum, reg->registerType); + clearSpillCost(reg->regNum, reg->registerType); + } } //----------------------------------------------------------------------------- @@ -10018,7 +10215,7 @@ void LinearScan::dumpLsraAllocationEvent(LsraDumpEvent event, } if ((interval != nullptr) && (reg != REG_NA) && (reg != REG_STK)) { - registersToDump |= genRegMask(reg); + registersToDump |= getRegMask(reg, interval->registerType); dumpRegRecordTitleIfNeeded(); } @@ -10074,16 +10271,10 @@ void LinearScan::dumpLsraAllocationEvent(LsraDumpEvent event, break; // Restoring the previous register - case LSRA_EVENT_RESTORE_PREVIOUS_INTERVAL_AFTER_SPILL: - assert(interval != nullptr); - dumpRefPositionShort(activeRefPosition, currentBlock); - printf("SRstr %-4s ", getRegName(reg)); - dumpRegRecords(); - break; - case LSRA_EVENT_RESTORE_PREVIOUS_INTERVAL: + case LSRA_EVENT_RESTORE_PREVIOUS_INTERVAL_AFTER_SPILL: assert(interval != nullptr); - if (activeRefPosition == nullptr) + if ((activeRefPosition == nullptr) || (activeRefPosition->refType == RefTypeBB)) { printf(emptyRefPositionFormat, ""); } @@ -10091,7 +10282,7 @@ void LinearScan::dumpLsraAllocationEvent(LsraDumpEvent event, { dumpRefPositionShort(activeRefPosition, currentBlock); } - printf("Restr %-4s ", getRegName(reg)); + printf((event == LSRA_EVENT_RESTORE_PREVIOUS_INTERVAL) ? "Restr %-4s " : "SRstr %-4s ", getRegName(reg)); dumpRegRecords(); break; @@ -10166,11 +10357,6 @@ void LinearScan::dumpLsraAllocationEvent(LsraDumpEvent event, printf("Reuse %-4s ", getRegName(reg)); break; - case LSRA_EVENT_ALLOC_SPILLED_REG: - dumpRefPositionShort(activeRefPosition, currentBlock); - printf("Steal %-4s ", getRegName(reg)); - break; - case LSRA_EVENT_NO_ENTRY_REG_ALLOCATED: assert(interval != nullptr && interval->isLocalVar); dumpRefPositionShort(activeRefPosition, currentBlock); @@ -10412,12 +10598,12 @@ void LinearScan::dumpRegRecords() { static char columnFormatArray[18]; - for (int regNumIndex = 0; regNumIndex <= lastUsedRegNumIndex; regNumIndex++) + for (regNumber regNum = REG_FIRST; regNum <= (regNumber)lastUsedRegNumIndex; regNum = REG_NEXT(regNum)) { - if (shouldDumpReg((regNumber)regNumIndex)) + if (shouldDumpReg(regNum)) { printf("%s", columnSeparator); - RegRecord& regRecord = physRegs[regNumIndex]; + RegRecord& regRecord = physRegs[regNum]; Interval* interval = regRecord.assignedInterval; if (interval != nullptr) { @@ -10431,7 +10617,7 @@ void LinearScan::dumpRegRecords() #endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE printf("%c", activeChar); } - else if (regRecord.isBusyUntilNextKill) + else if ((genRegMask(regNum) & regsBusyUntilKill) != RBM_NONE) { printf(columnFormatArray, "Busy"); } @@ -10957,8 +11143,11 @@ void LinearScan::verifyFinalAllocation() } else { - interval->physReg = regNum; - interval->assignedReg = regRecord; + if (!currentRefPosition->copyReg) + { + interval->physReg = regNum; + interval->assignedReg = regRecord; + } regRecord->assignedInterval = interval; } } diff --git a/src/coreclr/src/jit/lsra.h b/src/coreclr/src/jit/lsra.h index 0d443f900c6cd8..7ddf45fc1c5d08 100644 --- a/src/coreclr/src/jit/lsra.h +++ b/src/coreclr/src/jit/lsra.h @@ -471,12 +471,11 @@ class RegRecord : public Referenceable public: RegRecord() { - assignedInterval = nullptr; - previousInterval = nullptr; - regNum = REG_NA; - isCalleeSave = false; - registerType = IntRegisterType; - isBusyUntilNextKill = false; + assignedInterval = nullptr; + previousInterval = nullptr; + regNum = REG_NA; + isCalleeSave = false; + registerType = IntRegisterType; } void init(regNumber reg) @@ -511,8 +510,6 @@ class RegRecord : public Referenceable void tinyDump(); #endif // DEBUG - bool isFree(); - // RefPosition * getNextRefPosition(); // LsraLocation getNextRefLocation(); @@ -528,15 +525,10 @@ class RegRecord : public Referenceable // assignedInterval becomes inactive. Interval* previousInterval; - regNumber regNum; - bool isCalleeSave; - RegisterType registerType; - // This register must be considered busy until the next time it is explicitly killed. - // This is used so that putarg_reg can avoid killing its lclVar source, while avoiding - // the problem with the reg becoming free if the last-use is encountered before the call. - bool isBusyUntilNextKill; - - bool conflictingFixedRegReference(RefPosition* refPosition); + regNumber regNum; + bool isCalleeSave; + RegisterType registerType; + unsigned char regOrder; }; inline bool leafInRange(GenTree* leaf, int lower, int upper) @@ -976,9 +968,7 @@ class LinearScan : public LinearScanInterface bool isSecondHalfReg(RegRecord* regRec, Interval* interval); RegRecord* getSecondHalfRegRec(RegRecord* regRec); RegRecord* findAnotherHalfRegRec(RegRecord* regRec); - bool canSpillDoubleReg(RegRecord* physRegRecord, - LsraLocation refLocation, - BasicBlock::weight_t* recentAssignedRefWeight); + bool canSpillDoubleReg(RegRecord* physRegRecord, LsraLocation refLocation); void unassignDoublePhysReg(RegRecord* doubleRegRecord); #endif void updateAssignedInterval(RegRecord* reg, Interval* interval, RegisterType regType); @@ -986,7 +976,8 @@ class LinearScan : public LinearScanInterface bool canRestorePreviousInterval(RegRecord* regRec, Interval* assignedInterval); bool isAssignedToInterval(Interval* interval, RegRecord* regRec); bool isRefPositionActive(RefPosition* refPosition, LsraLocation refLocation); - bool canSpillReg(RegRecord* physRegRecord, LsraLocation refLocation, BasicBlock::weight_t* recentAssignedRefWeight); + bool canSpillReg(RegRecord* physRegRecord, LsraLocation refLocation); + unsigned getSpillWeight(RegRecord* physRegRecord); bool isRegInUse(RegRecord* regRec, RefPosition* refPosition); // insert refpositions representing prolog zero-inits which will be added later @@ -1056,11 +1047,11 @@ class LinearScan : public LinearScanInterface regMaskTP allSIMDRegs(); regMaskTP internalFloatRegCandidates(); - bool registerIsFree(regNumber regNum, RegisterType regType); bool registerIsAvailable(RegRecord* physRegRecord, LsraLocation currentLoc, LsraLocation* nextRefLocationPtr, RegisterType regType); + void makeRegisterInactive(RegRecord* physRegRecord); void freeRegister(RegRecord* physRegRecord); void freeRegisters(regMaskTP regsToFree); @@ -1143,15 +1134,11 @@ class LinearScan : public LinearScanInterface * Register management ****************************************************************************/ RegisterType getRegisterType(Interval* currentInterval, RefPosition* refPosition); - regNumber tryAllocateFreeReg(Interval* current, RefPosition* refPosition); - regNumber allocateBusyReg(Interval* current, RefPosition* refPosition, bool allocateIfProfitable); + regNumber allocateReg(Interval* current, RefPosition* refPosition); regNumber assignCopyReg(RefPosition* refPosition); bool isMatchingConstant(RegRecord* physRegRecord, RefPosition* refPosition); - bool isSpillCandidate(Interval* current, - RefPosition* refPosition, - RegRecord* physRegRecord, - LsraLocation& nextLocation); + bool isSpillCandidate(Interval* current, RefPosition* refPosition, RegRecord* physRegRecord); void checkAndAssignInterval(RegRecord* regRec, Interval* interval); void assignPhysReg(RegRecord* regRec, Interval* interval); void assignPhysReg(regNumber reg, Interval* interval) @@ -1316,8 +1303,7 @@ class LinearScan : public LinearScanInterface // Allocation decisions LSRA_EVENT_FIXED_REG, LSRA_EVENT_EXP_USE, LSRA_EVENT_ZERO_REF, LSRA_EVENT_NO_ENTRY_REG_ALLOCATED, LSRA_EVENT_KEPT_ALLOCATION, LSRA_EVENT_COPY_REG, LSRA_EVENT_MOVE_REG, LSRA_EVENT_ALLOC_REG, - LSRA_EVENT_ALLOC_SPILLED_REG, LSRA_EVENT_NO_REG_ALLOCATED, LSRA_EVENT_RELOAD, LSRA_EVENT_SPECIAL_PUTARG, - LSRA_EVENT_REUSE_REG, + LSRA_EVENT_NO_REG_ALLOCATED, LSRA_EVENT_RELOAD, LSRA_EVENT_SPECIAL_PUTARG, LSRA_EVENT_REUSE_REG, }; void dumpLsraAllocationEvent(LsraDumpEvent event, Interval* interval = nullptr, @@ -1488,6 +1474,141 @@ class LinearScan : public LinearScanInterface VARSET_TP largeVectorCalleeSaveCandidateVars; #endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE + //----------------------------------------------------------------------- + // Register status + //----------------------------------------------------------------------- + + regMaskTP m_AvailableRegs; + regNumber getRegForType(regNumber reg, var_types regType) + { +#ifdef TARGET_ARM + if ((regType == TYP_DOUBLE) && !genIsValidDoubleReg(reg)) + { + reg = REG_PREV(reg); + } +#endif // TARGET_ARM + return reg; + } + + regMaskTP getRegMask(regNumber reg, var_types regType) + { + reg = getRegForType(reg, regType); + regMaskTP regMask = genRegMask(reg); +#ifdef TARGET_ARM + if (regType == TYP_DOUBLE) + { + assert(genIsValidDoubleReg(reg)); + regMask |= (regMask << 1); + } +#endif // TARGET_ARM + return regMask; + } + + void resetAvailableRegs() + { + m_AvailableRegs = (availableIntRegs | availableFloatRegs); + m_RegistersWithConstants = RBM_NONE; + } + + bool isRegAvailable(regNumber reg, var_types regType) + { + regMaskTP regMask = getRegMask(reg, regType); + return (m_AvailableRegs & regMask) == regMask; + } + void setRegsInUse(regMaskTP regMask) + { + m_AvailableRegs &= ~regMask; + } + void setRegInUse(regNumber reg, var_types regType) + { + regMaskTP regMask = getRegMask(reg, regType); + setRegsInUse(regMask); + } + void makeRegsAvailable(regMaskTP regMask) + { + m_AvailableRegs |= regMask; + } + void makeRegAvailable(regNumber reg, var_types regType) + { + regMaskTP regMask = getRegMask(reg, regType); + makeRegsAvailable(regMask); + } + + void clearNextIntervalRef(regNumber reg, var_types regType); + void updateNextIntervalRef(regNumber reg, Interval* interval); + LsraLocation getNextIntervalRefLocation(regNumber reg ARM_ARG(var_types theRegType)) + { + LsraLocation nextLocation = nextIntervalRef[reg]; +#ifdef TARGET_ARM + if (theRegType == TYP_DOUBLE) + { + nextLocation = Min(nextLocation, nextIntervalRef[REG_NEXT(reg)]); + } +#endif + return nextLocation; + } + + void clearSpillCost(regNumber reg, var_types regType); + void updateSpillCost(regNumber reg, Interval* interval); + + regMaskTP m_RegistersWithConstants; + void clearConstantReg(regNumber reg, var_types regType) + { + m_RegistersWithConstants &= ~getRegMask(reg, regType); + } + void setConstantReg(regNumber reg, var_types regType) + { + m_RegistersWithConstants |= getRegMask(reg, regType); + } + bool isRegConstant(regNumber reg, var_types regType) + { + reg = getRegForType(reg, regType); + regMaskTP regMask = getRegMask(reg, regType); + return (m_RegistersWithConstants & regMask) == regMask; + } + + LsraLocation nextFixedRef[REG_COUNT]; + void updateNextFixedRef(RegRecord* regRecord, RefPosition* nextRefPosition); + + LsraLocation nextIntervalRef[REG_COUNT]; + unsigned int spillCost[REG_COUNT]; + + regMaskTP regsBusyUntilKill; + regMaskTP regsInUseThisLocation; + regMaskTP regsInUseNextLocation; + bool isRegBusy(regNumber reg, var_types regType) + { + regMaskTP regMask = getRegMask(reg, regType); + return (regsBusyUntilKill & regMask) != RBM_NONE; + } + void setRegBusyUntilKill(regNumber reg, var_types regType) + { + regsBusyUntilKill |= getRegMask(reg, regType); + } + void clearRegBusyUntilKill(regNumber reg) + { + regsBusyUntilKill &= ~genRegMask(reg); + } + + bool isRegInUse(regNumber reg, var_types regType) + { + regMaskTP regMask = getRegMask(reg, regType); + return (regsInUseThisLocation & regMask) != RBM_NONE; + } + + void resetRegState() + { + resetAvailableRegs(); + regsBusyUntilKill = RBM_NONE; + } + + bool conflictingFixedRegReference(regNumber regNum, RefPosition* refPosition); + + // This method should not be used and is here to retain old behavior. + // It should be replaced by isRegAvailable(). + // See comment in allocateReg(); + bool isFree(RegRecord* regRecord); + //----------------------------------------------------------------------- // Build methods //----------------------------------------------------------------------- @@ -1551,7 +1672,7 @@ class LinearScan : public LinearScanInterface int BuildSimple(GenTree* tree); int BuildOperandUses(GenTree* node, regMaskTP candidates = RBM_NONE); - int BuildDelayFreeUses(GenTree* node, regMaskTP candidates = RBM_NONE); + int BuildDelayFreeUses(GenTree* node, GenTree* rmwNode = nullptr, regMaskTP candidates = RBM_NONE); int BuildIndirUses(GenTreeIndir* indirTree, regMaskTP candidates = RBM_NONE); int BuildAddrUses(GenTree* addr, regMaskTP candidates = RBM_NONE); void HandleFloatVarArgs(GenTreeCall* call, GenTree* argNode, bool* callHasFloatRegArgs); @@ -1728,7 +1849,7 @@ class Interval : public Referenceable // True if this interval is defined by a putArg, whose source is a non-last-use lclVar. // During allocation, this flag will be cleared if the source is not already in the required register. // Othewise, we will leave the register allocated to the lclVar, but mark the RegRecord as - // isBusyUntilNextKill, so that it won't be reused if the lclVar goes dead before the call. + // isBusyUntilKill, so that it won't be reused if the lclVar goes dead before the call. bool isSpecialPutArg : 1; // True if this interval interferes with a call. diff --git a/src/coreclr/src/jit/lsraarm64.cpp b/src/coreclr/src/jit/lsraarm64.cpp index 9c67a05cc55657..9463b1f4820870 100644 --- a/src/coreclr/src/jit/lsraarm64.cpp +++ b/src/coreclr/src/jit/lsraarm64.cpp @@ -1098,8 +1098,8 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree) { if (isRMW) { - srcCount += BuildDelayFreeUses(intrin.op2); - srcCount += BuildDelayFreeUses(intrin.op3, RBM_ASIMD_INDEXED_H_ELEMENT_ALLOWED_REGS); + srcCount += BuildDelayFreeUses(intrin.op2, nullptr); + srcCount += BuildDelayFreeUses(intrin.op3, nullptr, RBM_ASIMD_INDEXED_H_ELEMENT_ALLOWED_REGS); } else { diff --git a/src/coreclr/src/jit/lsrabuild.cpp b/src/coreclr/src/jit/lsrabuild.cpp index bdcd8dcd3999cb..b7be02c7d895fb 100644 --- a/src/coreclr/src/jit/lsrabuild.cpp +++ b/src/coreclr/src/jit/lsrabuild.cpp @@ -314,6 +314,7 @@ void LinearScan::resolveConflictingDefAndUse(Interval* interval, RefPosition* de // This is case #2. Use the useRegAssignment INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_DEFUSE_CASE2, interval)); defRefPosition->registerAssignment = useRegAssignment; + defRefPosition->isFixedRegRef = useRefPosition->isFixedRegRef; return; } } @@ -327,6 +328,7 @@ void LinearScan::resolveConflictingDefAndUse(Interval* interval, RefPosition* de // This is case #3. INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_DEFUSE_CASE3, interval)); defRefPosition->registerAssignment = useRegAssignment; + defRefPosition->isFixedRegRef = useRefPosition->isFixedRegRef; return; } if (useRegRecord != nullptr && !defRegConflict && canChangeUseAssignment) @@ -334,6 +336,7 @@ void LinearScan::resolveConflictingDefAndUse(Interval* interval, RefPosition* de // This is case #4. INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_DEFUSE_CASE4, interval)); useRefPosition->registerAssignment = defRegAssignment; + useRefPosition->isFixedRegRef = defRefPosition->isFixedRegRef; return; } if (defRegRecord != nullptr && useRegRecord != nullptr) @@ -345,6 +348,7 @@ void LinearScan::resolveConflictingDefAndUse(Interval* interval, RefPosition* de (getRegisterType(interval, useRefPosition) == regType)); regMaskTP candidates = allRegs(regType); defRefPosition->registerAssignment = candidates; + defRefPosition->isFixedRegRef = false; return; } INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_DEFUSE_CASE6, interval)); @@ -420,6 +424,7 @@ void LinearScan::checkConflictingDefUse(RefPosition* useRP) if (!isSingleRegister(newAssignment) || !theInterval->hasInterferingUses) { defRP->registerAssignment = newAssignment; + defRP->isFixedRegRef = isSingleRegister(newAssignment); } } else @@ -1794,13 +1799,27 @@ void LinearScan::buildRefPositionsForNode(GenTree* tree, BasicBlock* block, Lsra //------------------------------------------------------------------------ // buildPhysRegRecords: Make an interval for each physical register // +static const regNumber lsraRegOrder[] = {REG_VAR_ORDER}; +const unsigned lsraRegOrderSize = ArrLen(lsraRegOrder); +static const regNumber lsraRegOrderFlt[] = {REG_VAR_ORDER_FLT}; +const unsigned lsraRegOrderFltSize = ArrLen(lsraRegOrderFlt); + void LinearScan::buildPhysRegRecords() { RegisterType regType = IntRegisterType; - for (regNumber reg = REG_FIRST; reg < ACTUAL_REG_COUNT; reg = REG_NEXT(reg)) + for (unsigned int i = 0; i < lsraRegOrderSize; i++) + { + regNumber reg = lsraRegOrder[i]; + RegRecord* curr = &physRegs[reg]; + curr->init(reg); + curr->regOrder = (unsigned char)i; + } + for (unsigned int i = 0; i < lsraRegOrderFltSize; i++) { + regNumber reg = lsraRegOrderFlt[i]; RegRecord* curr = &physRegs[reg]; curr->init(reg); + curr->regOrder = (unsigned char)i; } } @@ -2139,6 +2158,7 @@ void LinearScan::buildIntervals() assert(inArgReg < REG_COUNT); mask = genRegMask(inArgReg); assignPhysReg(inArgReg, interval); + INDEBUG(registersToDump |= getRegMask(inArgReg, interval->registerType)); } RefPosition* pos = newRefPosition(interval, MinLocation, RefTypeParamDef, nullptr, mask); pos->setRegOptional(true); @@ -3029,59 +3049,84 @@ void LinearScan::setDelayFree(RefPosition* use) //------------------------------------------------------------------------ // BuildDelayFreeUses: Build Use RefPositions for an operand that might be contained, -// and which need to be marked delayRegFree +// and which may need to be marked delayRegFree // // Arguments: -// node - The node of interest +// node - The node of interest +// rmwNode - The node that has RMW semantics (if applicable) +// candidates - The set of candidates for the uses // // Return Value: // The number of source registers used by the *parent* of this node. // -int LinearScan::BuildDelayFreeUses(GenTree* node, regMaskTP candidates) +int LinearScan::BuildDelayFreeUses(GenTree* node, GenTree* rmwNode, regMaskTP candidates) { - RefPosition* use; + RefPosition* use = nullptr; + Interval* rmwInterval = nullptr; + bool rmwIsLastUse = false; + GenTree* addr = nullptr; + if ((rmwNode != nullptr) && isCandidateLocalRef(rmwNode)) + { + rmwInterval = getIntervalForLocalVarNode(rmwNode->AsLclVar()); + // Note: we don't handle multi-reg vars here. It's not clear that there are any cases + // where we'd encounter a multi-reg var in an RMW context. + rmwIsLastUse = rmwNode->AsLclVar()->IsLastUse(0); + } if (!node->isContained()) { use = BuildUse(node, candidates); - setDelayFree(use); - return 1; } - if (node->OperIsHWIntrinsic()) + else if (node->OperIsHWIntrinsic()) { use = BuildUse(node->gtGetOp1(), candidates); - setDelayFree(use); - return 1; } - if (!node->OperIsIndir()) + else if (!node->OperIsIndir()) { return 0; } - GenTreeIndir* indirTree = node->AsIndir(); - GenTree* addr = indirTree->gtOp1; - if (!addr->isContained()) + else { - use = BuildUse(addr, candidates); - setDelayFree(use); - return 1; + GenTreeIndir* indirTree = node->AsIndir(); + addr = indirTree->gtOp1; + if (!addr->isContained()) + { + use = BuildUse(addr, candidates); + } + else if (!addr->OperIs(GT_LEA)) + { + return 0; + } } - if (!addr->OperIs(GT_LEA)) + if (use != nullptr) { - return 0; + if ((use->getInterval() != rmwInterval) || (!rmwIsLastUse && !use->lastUse)) + { + setDelayFree(use); + } + return 1; } + // If we reach here we have a contained LEA in 'addr'. + GenTreeAddrMode* const addrMode = addr->AsAddrMode(); unsigned srcCount = 0; if ((addrMode->Base() != nullptr) && !addrMode->Base()->isContained()) { use = BuildUse(addrMode->Base(), candidates); - setDelayFree(use); + if ((use->getInterval() != rmwInterval) || (!rmwIsLastUse && !use->lastUse)) + { + setDelayFree(use); + } srcCount++; } if ((addrMode->Index() != nullptr) && !addrMode->Index()->isContained()) { use = BuildUse(addrMode->Index(), candidates); - setDelayFree(use); + if ((use->getInterval() != rmwInterval) || (!rmwIsLastUse && !use->lastUse)) + { + setDelayFree(use); + } srcCount++; } return srcCount; diff --git a/src/coreclr/src/jit/lsraxarch.cpp b/src/coreclr/src/jit/lsraxarch.cpp index db787c55fdf282..133bcf28acf5a5 100644 --- a/src/coreclr/src/jit/lsraxarch.cpp +++ b/src/coreclr/src/jit/lsraxarch.cpp @@ -876,7 +876,7 @@ int LinearScan::BuildRMWUses(GenTreeOp* node, regMaskTP candidates) } else if (delayUseOperand == op1) { - srcCount += BuildDelayFreeUses(op1, op1Candidates); + srcCount += BuildDelayFreeUses(op1, op2, op1Candidates); } else { @@ -893,7 +893,7 @@ int LinearScan::BuildRMWUses(GenTreeOp* node, regMaskTP candidates) } else if (delayUseOperand == op2) { - srcCount += BuildDelayFreeUses(op2, op2Candidates); + srcCount += BuildDelayFreeUses(op2, op1, op2Candidates); } else { @@ -987,7 +987,7 @@ int LinearScan::BuildShiftRotate(GenTree* tree) { if (!shiftBy->isContained()) { - srcCount += BuildDelayFreeUses(shiftBy, RBM_RCX); + srcCount += BuildDelayFreeUses(shiftBy, source, RBM_RCX); buildKillPositionsForNode(tree, currentLoc + 1, RBM_RCX); } BuildDef(tree, dstCandidates); @@ -1778,7 +1778,7 @@ int LinearScan::BuildModDiv(GenTree* tree) srcCount = 1; } - srcCount += BuildDelayFreeUses(op2, allRegs(TYP_INT) & ~(RBM_RAX | RBM_RDX)); + srcCount += BuildDelayFreeUses(op2, op1, allRegs(TYP_INT) & ~(RBM_RAX | RBM_RDX)); buildInternalRegisterUses(); @@ -2341,8 +2341,8 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree) tgtPrefUse = BuildUse(op1); srcCount += 1; - srcCount += op2->isContained() ? BuildOperandUses(op2) : BuildDelayFreeUses(op2); - srcCount += BuildDelayFreeUses(op3, RBM_XMM0); + srcCount += op2->isContained() ? BuildOperandUses(op2) : BuildDelayFreeUses(op2, op1); + srcCount += BuildDelayFreeUses(op3, op1, RBM_XMM0); buildUses = false; } @@ -2378,7 +2378,7 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree) tgtPrefUse = BuildUse(op1); srcCount += 1; - srcCount += BuildDelayFreeUses(op2, varTypeIsByte(baseType) ? allByteRegs() : RBM_NONE); + srcCount += BuildDelayFreeUses(op2, op1, varTypeIsByte(baseType) ? allByteRegs() : RBM_NONE); buildUses = false; break; @@ -2395,7 +2395,7 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree) { // op3 reg should be different from target reg to // store the lower half result after executing the instruction - srcCount += BuildDelayFreeUses(op3); + srcCount += BuildDelayFreeUses(op3, op1); // Need a internal register different from the dst to take the lower half result buildInternalIntRegisterDefForNode(intrinsicTree); setInternalRegsDelayFree = true; @@ -2431,7 +2431,7 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree) srcCount += 1; srcCount += BuildOperandUses(op2); - srcCount += BuildDelayFreeUses(op3); + srcCount += BuildDelayFreeUses(op3, op1); } else if (op1->isContained()) { @@ -2440,7 +2440,7 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree) tgtPrefUse = BuildUse(op3); srcCount += BuildOperandUses(op1); - srcCount += BuildDelayFreeUses(op2); + srcCount += BuildDelayFreeUses(op2, op1); srcCount += 1; } else @@ -2452,7 +2452,7 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree) if (copiesUpperBits) { - srcCount += BuildDelayFreeUses(op2); + srcCount += BuildDelayFreeUses(op2, op1); } else { @@ -2460,7 +2460,7 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree) srcCount += 1; } - srcCount += op3->isContained() ? BuildOperandUses(op3) : BuildDelayFreeUses(op3); + srcCount += op3->isContained() ? BuildOperandUses(op3) : BuildDelayFreeUses(op3, op1); } buildUses = false; @@ -2475,7 +2475,7 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree) // Any pair of the index, mask, or destination registers should be different srcCount += BuildOperandUses(op1); - srcCount += BuildDelayFreeUses(op2); + srcCount += BuildDelayFreeUses(op2, op1); // op3 should always be contained assert(op3->isContained()); diff --git a/src/coreclr/src/jit/target.h b/src/coreclr/src/jit/target.h index a2b5f93387662e..5d3e7ad96d14c3 100644 --- a/src/coreclr/src/jit/target.h +++ b/src/coreclr/src/jit/target.h @@ -1288,7 +1288,7 @@ typedef unsigned char regNumberSmall; REG_R6, REG_R7, REG_R8, REG_R9, REG_R10, \ REG_R11, REG_R13, REG_R14, \ REG_R12, REG_R15, REG_IP0, REG_IP1, \ - REG_CALLEE_SAVED_ORDER + REG_CALLEE_SAVED_ORDER, REG_LR #define REG_VAR_ORDER_FLT REG_V16, REG_V17, REG_V18, REG_V19, \ REG_V20, REG_V21, REG_V22, REG_V23, \ diff --git a/src/coreclr/tests/src/JIT/Regression/JitBlue/GitHub_13735/GitHub_13735.cs b/src/coreclr/tests/src/JIT/Regression/JitBlue/GitHub_13735/GitHub_13735.cs new file mode 100644 index 00000000000000..1a586001776eba --- /dev/null +++ b/src/coreclr/tests/src/JIT/Regression/JitBlue/GitHub_13735/GitHub_13735.cs @@ -0,0 +1,62 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + + +using System; +using System.Runtime.CompilerServices; + + +class GitHub_13735 +{ + [MethodImpl(MethodImplOptions.NoInlining)] + static int GetRandom() { + return 0; + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static void Print(int x) { + // Console.WriteLine(x); + } + + static void SampleA() + { + var a = GetRandom(); + var b = GetRandom(); + var c = GetRandom(); + var d = GetRandom(); + var e = GetRandom(); + var f = GetRandom(); + var g = GetRandom(); + var h = GetRandom(); + for (var x = 0; x < 100; ++x) + { + var xa = GetRandom(); + var xb = GetRandom(); + var xc = GetRandom(); + var xd = GetRandom(); + var xe = GetRandom(); + var xf = GetRandom(); + Print(xa); + Print(xb); + Print(xc); + Print(xd); + Print(xe); + Print(xf); + } + Print(a); + Print(b); + Print(c); + Print(d); + Print(e); + Print(f); + Print(g); + Print(h); + } + + public static int Main() + { + SampleA(); + return 100; + } +} diff --git a/src/coreclr/tests/src/JIT/Regression/JitBlue/GitHub_13735/GitHub_13735.csproj b/src/coreclr/tests/src/JIT/Regression/JitBlue/GitHub_13735/GitHub_13735.csproj new file mode 100644 index 00000000000000..986494e092b5aa --- /dev/null +++ b/src/coreclr/tests/src/JIT/Regression/JitBlue/GitHub_13735/GitHub_13735.csproj @@ -0,0 +1,12 @@ + + + Exe + + + + True + + + + + From f7a64bb6d2e33b4abeeed61c2447e6ac9987a448 Mon Sep 17 00:00:00 2001 From: Carol Eidt Date: Fri, 16 Oct 2020 11:49:57 -0700 Subject: [PATCH 02/17] More refactoring --- src/coreclr/src/jit/lsra.cpp | 302 +++++++++++++++++------------------ src/coreclr/src/jit/lsra.h | 16 ++ 2 files changed, 165 insertions(+), 153 deletions(-) diff --git a/src/coreclr/src/jit/lsra.cpp b/src/coreclr/src/jit/lsra.cpp index 734fadd8f86fdc..52639cf2b3f0e2 100644 --- a/src/coreclr/src/jit/lsra.cpp +++ b/src/coreclr/src/jit/lsra.cpp @@ -288,6 +288,25 @@ void LinearScan::updateNextFixedRef(RegRecord* regRecord, RefPosition* nextRefPo nextFixedRef[regRecord->regNum] = nextLocation; } +regMaskTP LinearScan::getMatchingConstants(regMaskTP mask, Interval* currentInterval, RefPosition* refPosition) +{ + assert(currentInterval->isConstant && RefTypeIsDef(refPosition->refType)); + regMaskTP candidates = (mask & m_RegistersWithConstants); + regMaskTP result = RBM_NONE; + while (candidates != RBM_NONE) + { + regMaskTP candidateBit = genFindLowestBit(candidates); + candidates &= ~candidateBit; + regNumber regNum = genRegNumFromMask(candidateBit); + RegRecord* physRegRecord = getRegisterRecord(regNum); + if (isMatchingConstant(physRegRecord, refPosition)) + { + result |= candidateBit; + } + } + return result; +} + void LinearScan::clearNextIntervalRef(regNumber reg, var_types regType) { nextIntervalRef[reg] = MaxLocation; @@ -3013,8 +3032,7 @@ regNumber LinearScan::allocateReg(Interval* currentInterval, RefPosition* refPos // position, which is one location past the use (getRefEndLocation() takes care of this). rangeEndLocation = rangeEndRefPosition->getRefEndLocation(); LsraLocation lastLocation = lastRefPosition->getRefEndLocation(); - regNumber prevReg = REG_NA; - RegRecord* prevRegRec = nullptr; + RegRecord* prevRegRec = currentInterval->assignedReg; //------------------------------------------------------------------------- // Register Selection @@ -3029,20 +3047,21 @@ regNumber LinearScan::allocateReg(Interval* currentInterval, RefPosition* refPos // enum RegisterScore { + FREE = 0x8000, // It is not currently assigned to an *active* interval + // These are the original criteria for comparing registers that are free. - VALUE_AVAILABLE = 0x8000, // It is a constant value that is already in an acceptable register. - COVERS = 0x4000, // It is in the interval's preference set and it covers the current range. - OWN_PREFERENCE = 0x2000, // It is in the preference set of this interval. - COVERS_RELATED = 0x1000, // It is in the preference set of the related interval and covers its entire lifetime. - RELATED_PREFERENCE = 0x0800, // It is in the preference set of the related interval. - CALLER_CALLEE = 0x0400, // It is in the right "set" for the interval (caller or callee-save). - UNASSIGNED = 0x0200, // It is not currently assigned to any (active or inactive) interval - COVERS_FULL = 0x0080, - BEST_FIT = 0x0040, - IS_PREV_REG = 0x0020, - REG_ORDER = 0x0010, - - FREE = 0x008, // It is not currently assigned to an *active* interval + VALUE_AVAILABLE = 0x4000, // It is a constant value that is already in an acceptable register. + THIS_ASSIGNED = 0x2000, // It is in the interval's preference set and it is already assigned to this interval. + COVERS = 0x1000, // It is in the interval's preference set and it covers the current range. + OWN_PREFERENCE = 0x0800, // It is in the preference set of this interval. + COVERS_RELATED = 0x0400, // It is in the preference set of the related interval and covers its entire lifetime. + RELATED_PREFERENCE = 0x0200, // It is in the preference set of the related interval. + CALLER_CALLEE = 0x0100, // It is in the right "set" for the interval (caller or callee-save). + UNASSIGNED = 0x0080, // It is not currently assigned to any (active or inactive) interval + COVERS_FULL = 0x0040, + BEST_FIT = 0x0020, + IS_PREV_REG = 0x0010, + REG_ORDER = 0x0008, // These are the original criteria for comparing registers that are in use. SPILL_COST_THIS = 0x004, // Its spill cost is lower than 'thisSpillCost' @@ -3056,97 +3075,64 @@ regNumber LinearScan::allocateReg(Interval* currentInterval, RefPosition* refPos LsraLocation farRefLocation = MinLocation; LsraLocation nextPhysRefLocation = MaxLocation; - // Handle the common case where there is only one candidate - - // avoid looping over all the other registers - bool useAssignedReg = false; - if (refPosition->isFixedRegRef && (candidates == refPosition->registerAssignment)) - { - foundReg = genRegNumFromMask(candidates); + // Eliminate candidates that are in-use or busy. + // Note that a fixed reference will appear to be in use due to the RefTypeFixedReg before it, + // but we ignore those. + regMaskTP busyRegs = regsBusyUntilKill | regsInUseThisLocation; + candidates &= ~busyRegs; - // Skip searching the registers, and just define the info for the single candidate. - candidates &= ~refPosition->registerAssignment; - availablePhysRegRecord = getRegisterRecord(foundReg); - bool isAvailable = isRegAvailable(foundReg, currentInterval->registerType); - if ((currentInterval->assignedReg == availablePhysRegRecord) && - ((availablePhysRegRecord->assignedInterval == currentInterval) || isAvailable)) + // Also eliminate as busy any register with a conflicting fixed reference at this or + // the next location. + // Note that this will eliminate the fixedReg, if any, but we'll add it back below. + regMaskTP checkConflictMask = candidates; + while (checkConflictMask != RBM_NONE) + { + regMaskTP checkConflictBit = genFindLowestBit(checkConflictMask); + checkConflictMask &= ~checkConflictBit; + regNumber checkConflictReg = genRegNumFromMask(checkConflictBit); + LsraLocation nextPhysRefLocation = nextFixedRef[checkConflictReg]; + if ((nextPhysRefLocation == currentLocation) || + (refPosition->delayRegFree && nextPhysRefLocation == (currentLocation + 1))) { - useAssignedReg = true; + candidates &= ~checkConflictBit; } - else + // By chance, is this register already holding this interval, as a copyReg or having + // been restored as inactive after a kill? + else if (getRegisterRecord(checkConflictReg)->assignedInterval == currentInterval) { - if (isAvailable) - { - bestScore |= FREE; - if (isMatchingConstant(availablePhysRegRecord, refPosition)) - { - bestScore |= VALUE_AVAILABLE; - } - else if ((availablePhysRegRecord->assignedInterval == nullptr) || - (availablePhysRegRecord->assignedInterval->getNextRefLocation() > lastLocation)) - { - bestScore |= UNASSIGNED; - } - } + candidates = checkConflictBit; + break; } } - else if (currentInterval->assignedReg != nullptr) + + // Add back the fixedReg, if any. + regMaskTP fixedRegMask = RBM_NONE; + if (refPosition->isFixedRegRef) { - // This was an interval that was previously allocated to the given - // physical register, and we should try to allocate it to that register - // again, if possible and reasonable. - // Use it preemptively (i.e. before checking other available regs) - // only if it is preferred and available. + assert(genMaxOneBit(refPosition->registerAssignment)); + candidates |= refPosition->registerAssignment; + } - availablePhysRegRecord = currentInterval->assignedReg; - foundReg = availablePhysRegRecord->regNum; - regMaskTP assignedRegBit = genRegMask(foundReg); + // At this pointwe have determined all the candidates that can be considered, + // and we'll being to apply the heuristics in order. - // Is it in the preferred set of regs? - if ((assignedRegBit & preferences) != RBM_NONE) - { - // Is it currently available? - LsraLocation nextPhysRefLoc; - if (availablePhysRegRecord->assignedInterval == currentInterval) - { - assert(!isRegBusy(foundReg, currentInterval->registerType) && - !isRegInUse(foundReg, currentInterval->registerType) && - !conflictingFixedRegReference(foundReg, refPosition)); - useAssignedReg = true; - } - else if (isRegBusy(foundReg, currentInterval->registerType) || - isRegInUse(foundReg, currentInterval->registerType)) - { - candidates &= ~assignedRegBit; - } - else if (registerIsAvailable(availablePhysRegRecord, currentLocation, &nextPhysRefLoc, - currentInterval->registerType)) - { - useAssignedReg = !conflictingFixedRegReference(foundReg, refPosition); - } - } - else if (!refPosition->copyReg && (availablePhysRegRecord->assignedInterval == currentInterval)) - { - useAssignedReg = true; - } - if (!useAssignedReg && !refPosition->copyReg) - { - prevReg = foundReg; - // Don't keep trying to allocate to this register. Note, though, that we keep in in the candidates - // (unless removed above because it's in use or busy) in case we can't find anything better. - if (!currentInterval->isActive && (availablePhysRegRecord->assignedInterval == currentInterval)) - { - unassignPhysReg(availablePhysRegRecord, nullptr); - } - availablePhysRegRecord = nullptr; - currentInterval->assignedReg = nullptr; - foundReg = REG_NA; - } + // Apply the FREE heuristic. + regMaskTP freeCandidates = applyFreeHeuristic(candidates, currentInterval->registerType); + if (candidates == RBM_NONE) + { + assert(refPosition->RegOptional()); + return REG_NA; } - if (useAssignedReg) + + // Apply the VALUE_AVAILABLE (matching constant) heuristic. + regMaskTP matchingConstants = RBM_NONE; + if (freeCandidates != RBM_NONE) { - assignPhysReg(availablePhysRegRecord, currentInterval); - refPosition->registerAssignment = genRegMask(foundReg); - return foundReg; + candidates = freeCandidates; + if (currentInterval->isConstant && RefTypeIsDef(refPosition->refType)) + { + matchingConstants = getMatchingConstants(candidates, currentInterval, refPosition); + } } // Compute the best possible score so we can stop looping early if we find it. @@ -3154,8 +3140,8 @@ regNumber LinearScan::allocateReg(Interval* currentInterval, RefPosition* refPos // probably not until we've tuned the order of these criteria. At that point, // we'll need to avoid the short-circuit if we've got a stress option to reverse // the selection. - int bestPossibleScore = - COVERS + UNASSIGNED + OWN_PREFERENCE + CALLER_CALLEE + FREE + SPILL_COST_THIS + SPILL_COST_OTHERS; + int bestPossibleScore = COVERS + UNASSIGNED + OWN_PREFERENCE + THIS_ASSIGNED + CALLER_CALLEE + FREE + + SPILL_COST_THIS + SPILL_COST_OTHERS; if (currentInterval->isConstant) { bestPossibleScore |= VALUE_AVAILABLE; @@ -3182,16 +3168,6 @@ regNumber LinearScan::allocateReg(Interval* currentInterval, RefPosition* refPos // RefPosition will be selected as spill candidate and its weight as the bestSpillWeight. bestSpillWeight = refPosition->RegOptional() ? thisSpillWeight : BB_MAX_WEIGHT; - // Eliminate candidates that are in-use or busy. - // Note that a fixed reference will appear to be in use due to the RefTypeFixedReg before it, - // but we ignore those. - candidates &= ~regsBusyUntilKill; - candidates &= ~regsInUseThisLocation; - if (refPosition->isFixedRegRef && candidates != RBM_NONE) - { - assert(genMaxOneBit(refPosition->registerAssignment)); - candidates |= refPosition->registerAssignment; - } while (candidates != RBM_NONE) { regMaskTP candidateBit = genFindLowestBit(candidates); @@ -3206,30 +3182,22 @@ regNumber LinearScan::allocateReg(Interval* currentInterval, RefPosition* refPos int score = 0; bool isBetter = false; - // By chance, is this register already holding this interval, as a copyReg or having - // been restored as inactive after a kill? - if (physRegRecord->assignedInterval == currentInterval) - { - availablePhysRegRecord = physRegRecord; - foundReg = regNum; - break; - } - bool isFixedRef = refPosition->isFixedRefOfRegMask(candidateBit); - if (isFixedRef) - { - nextPhysRefLocation = Min(nextFixedRef[regNum], nextIntervalRef[regNum]); - } - else if (((nextFixedRef[regNum] == refPosition->nodeLocation) || - (refPosition->delayRegFree && nextFixedRef[regNum] == (refPosition->nodeLocation + 1)))) - { - continue; - } - int comparisonScore = bestScore; // Find the next RefPosition of the physical register - if (isFixedRef || registerIsAvailable(physRegRecord, currentLocation, &nextPhysRefLocation, regType)) + nextPhysRefLocation = Min(nextFixedRef[regNum], nextIntervalRef[regNum]); +#ifdef TARGET_ARM + if (currentInterval->registerType == TYP_DOUBLE) + { + LsraLocation otherNextPhysRefLocation = Min(nextFixedRef[regNum + 1], nextIntervalRef[regNum + 1]); + nextPhysRefLocation = Min(nextPhysRefLocation, otherNextPhysRefLocation); + } +#endif // TARGET_ARM + + if ((freeCandidates & candidateBit) != RBM_NONE) { + score |= FREE; + // If this is a definition of a constant interval, check to see if its value is already in this register. if (currentInterval->isConstant && RefTypeIsDef(refPosition->refType) && isMatchingConstant(physRegRecord, refPosition)) @@ -3255,6 +3223,12 @@ regNumber LinearScan::allocateReg(Interval* currentInterval, RefPosition* refPos if ((candidateBit & preferences) != RBM_NONE) { score |= OWN_PREFERENCE; + + if (prevRegRec == physRegRecord) + { + score |= THIS_ASSIGNED; + } + if (nextPhysRefLocation > rangeEndLocation) { score |= COVERS; @@ -3301,13 +3275,11 @@ regNumber LinearScan::allocateReg(Interval* currentInterval, RefPosition* refPos LsraLocation nextIntervalLoc = getNextIntervalRefLocation(regNum ARM_ARG(currentInterval->registerType)); if (nextIntervalLoc == MaxLocation) { - if ((physRegRecord->assignedInterval == nullptr) || - (physRegRecord->assignedInterval->getNextRefLocation() > lastLocation)) - { - score |= UNASSIGNED; - } + assert((physRegRecord->assignedInterval == nullptr) || + (physRegRecord->assignedInterval->physReg != regNum) || + (physRegRecord->assignedInterval->getNextRefLocation() > lastLocation)); } - else if (nextIntervalLoc > lastLocation) + if (nextIntervalLoc > lastLocation) { score |= UNASSIGNED; } @@ -3357,7 +3329,7 @@ regNumber LinearScan::allocateReg(Interval* currentInterval, RefPosition* refPos } // Oddly, the previous heuristics only considered this if both covered the range. - if ((prevReg == regNum) && ((score & COVERS_FULL) != 0)) + if ((prevRegRec != nullptr) && (prevRegRec->regNum == regNum) && ((score & COVERS_FULL) != 0)) { score |= IS_PREV_REG; } @@ -3367,8 +3339,6 @@ regNumber LinearScan::allocateReg(Interval* currentInterval, RefPosition* refPos score |= REG_ORDER; comparisonScore &= ~REG_ORDER; } - - score |= FREE; } else { @@ -3381,20 +3351,24 @@ regNumber LinearScan::allocateReg(Interval* currentInterval, RefPosition* refPos } // We can't spill a register that's next used at this location, unless it's regOptional. - if ((nextPhysRefLocation == refPosition->nodeLocation) || - (refPosition->delayRegFree && nextPhysRefLocation == (refPosition->nodeLocation + 1))) + // TODO: Make this cheaper? + if (!refPosition->isFixedRegRef) { - if (!physRegRecord->assignedInterval->getNextRefPosition()->RegOptional()) + if ((nextPhysRefLocation == refPosition->nodeLocation) || + (refPosition->delayRegFree && nextPhysRefLocation == (refPosition->nodeLocation + 1))) { - continue; + if (!physRegRecord->assignedInterval->getNextRefPosition()->RegOptional()) + { + continue; + } } - } - // Can and should the interval in this register be spilled for this one, - // if we don't find a better alternative? - if (!isSpillCandidate(currentInterval, refPosition, physRegRecord)) - { - continue; + // Can and should the interval in this register be spilled for this one, + // if we don't find a better alternative? + if (!isSpillCandidate(currentInterval, refPosition, physRegRecord)) + { + continue; + } } currentSpillWeight = spillCost[regNum]; @@ -3495,12 +3469,29 @@ regNumber LinearScan::allocateReg(Interval* currentInterval, RefPosition* refPos } } - if (availablePhysRegRecord != nullptr) + if ((prevRegRec != nullptr) && (foundReg != prevRegRec->regNum) && !refPosition->copyReg) + { + if ((prevRegRec->assignedInterval == currentInterval)) + { + unassignPhysReg(prevRegRec, nullptr); + } + currentInterval->assignedReg = nullptr; + } + + if (foundReg == REG_NA) + { + assert(refPosition->RegOptional()); + return REG_NA; + } + + // FOUND: + availablePhysRegRecord = getRegisterRecord(foundReg); + regMaskTP foundRegBit = genRegMask(foundReg); { Interval* assignedInterval = availablePhysRegRecord->assignedInterval; if ((assignedInterval != currentInterval) && isAssigned(availablePhysRegRecord ARM_ARG(regType))) { - if ((bestScore & FREE) == 0) + if ((foundRegBit & freeCandidates) == RBM_NONE) { // We're spilling. #ifdef TARGET_ARM @@ -3523,16 +3514,21 @@ regNumber LinearScan::allocateReg(Interval* currentInterval, RefPosition* refPos } else { + // If we considered this "unassigned" because this interval's lifetime ends before + // the next ref, remember it. + // For historical reasons (due to former short-circuiting of this case), if we're reassigning + // the current interval to a previous assignment, we don't remember the previous interval. + // Note that we need to compute this condition before calling unassignPhysReg, which wil reset + // assignedInterval->physReg. + bool wasAssigned = (((bestScore & UNASSIGNED) != 0) && ((bestScore & THIS_ASSIGNED) == 0) && + (assignedInterval != nullptr) && (assignedInterval->physReg == foundReg)); unassignPhysReg(availablePhysRegRecord ARM_ARG(currentInterval->registerType)); - - if ((bestScore & VALUE_AVAILABLE) != 0 && assignedInterval != nullptr) + if ((matchingConstants & foundRegBit) != RBM_NONE) { assert(assignedInterval->isConstant); refPosition->treeNode->SetReuseRegVal(); } - // If we considered this "unassigned" because this interval's lifetime ends before - // the next ref, remember it. - else if ((bestScore & UNASSIGNED) != 0 && assignedInterval != nullptr) + else if (wasAssigned) { updatePreviousInterval(availablePhysRegRecord, assignedInterval, assignedInterval->registerType); } diff --git a/src/coreclr/src/jit/lsra.h b/src/coreclr/src/jit/lsra.h index 7ddf45fc1c5d08..ad961c0a4aca4b 100644 --- a/src/coreclr/src/jit/lsra.h +++ b/src/coreclr/src/jit/lsra.h @@ -1134,6 +1134,21 @@ class LinearScan : public LinearScanInterface * Register management ****************************************************************************/ RegisterType getRegisterType(Interval* currentInterval, RefPosition* refPosition); + + regMaskTP applyFreeHeuristic(regMaskTP candidates, var_types regType) + { + regMaskTP result = candidates & m_AvailableRegs; +#ifdef TARGET_ARM + // For TYP_DOUBLE on ARM, we can only use register for which the odd half is + // also available. + if (regType == TYP_DOUBLE) + { + result &= (m_AvailableRegs << 1); + } +#endif // TARGET_ARM + return result; + } + regNumber allocateReg(Interval* current, RefPosition* refPosition); regNumber assignCopyReg(RefPosition* refPosition); @@ -1566,6 +1581,7 @@ class LinearScan : public LinearScanInterface regMaskTP regMask = getRegMask(reg, regType); return (m_RegistersWithConstants & regMask) == regMask; } + regMaskTP getMatchingConstants(regMaskTP mask, Interval* currentInterval, RefPosition* refPosition); LsraLocation nextFixedRef[REG_COUNT]; void updateNextFixedRef(RegRecord* regRecord, RefPosition* nextRefPosition); From 54dabe2691526eefdbfd8c4880ee5f5e6b890efa Mon Sep 17 00:00:00 2001 From: Carol Eidt Date: Fri, 6 Nov 2020 10:57:45 -0800 Subject: [PATCH 03/17] More refactoring to separate evaluation of heursitics. --- src/coreclr/src/jit/lsra.cpp | 864 +++++++++++++++++------------- src/coreclr/src/jit/lsra.h | 1 + src/coreclr/src/jit/lsrabuild.cpp | 8 +- 3 files changed, 488 insertions(+), 385 deletions(-) diff --git a/src/coreclr/src/jit/lsra.cpp b/src/coreclr/src/jit/lsra.cpp index 52639cf2b3f0e2..ea7c4cc67b3e14 100644 --- a/src/coreclr/src/jit/lsra.cpp +++ b/src/coreclr/src/jit/lsra.cpp @@ -280,10 +280,12 @@ void LinearScan::updateNextFixedRef(RegRecord* regRecord, RefPosition* nextRefPo if (nextRefPosition == nullptr) { nextLocation = MaxLocation; + fixedRegs &= ~genRegMask(regRecord->regNum); } else { nextLocation = nextRefPosition->nodeLocation; + fixedRegs |= genRegMask(regRecord->regNum); } nextFixedRef[regRecord->regNum] = nextLocation; } @@ -3047,502 +3049,617 @@ regNumber LinearScan::allocateReg(Interval* currentInterval, RefPosition* refPos // enum RegisterScore { - FREE = 0x8000, // It is not currently assigned to an *active* interval + FREE = 0x10000, // It is not currently assigned to an *active* interval // These are the original criteria for comparing registers that are free. - VALUE_AVAILABLE = 0x4000, // It is a constant value that is already in an acceptable register. - THIS_ASSIGNED = 0x2000, // It is in the interval's preference set and it is already assigned to this interval. - COVERS = 0x1000, // It is in the interval's preference set and it covers the current range. - OWN_PREFERENCE = 0x0800, // It is in the preference set of this interval. - COVERS_RELATED = 0x0400, // It is in the preference set of the related interval and covers its entire lifetime. - RELATED_PREFERENCE = 0x0200, // It is in the preference set of the related interval. - CALLER_CALLEE = 0x0100, // It is in the right "set" for the interval (caller or callee-save). - UNASSIGNED = 0x0080, // It is not currently assigned to any (active or inactive) interval - COVERS_FULL = 0x0040, - BEST_FIT = 0x0020, - IS_PREV_REG = 0x0010, - REG_ORDER = 0x0008, + VALUE_AVAILABLE = 0x8000, // It is a constant value that is already in an acceptable register. + THIS_ASSIGNED = 0x4000, // It is in the interval's preference set and it is already assigned to this interval. + COVERS = 0x2000, // It is in the interval's preference set and it covers the current range. + OWN_PREFERENCE = 0x1000, // It is in the preference set of this interval. + COVERS_RELATED = 0x0800, // It is in the preference set of the related interval and covers its entire lifetime. + RELATED_PREFERENCE = 0x0400, // It is in the preference set of the related interval. + CALLER_CALLEE = 0x0200, // It is in the right "set" for the interval (caller or callee-save). + UNASSIGNED = 0x0100, // It is not currently assigned to any (active or inactive) interval + COVERS_FULL = 0x0080, + BEST_FIT = 0x0040, + IS_PREV_REG = 0x0020, + REG_ORDER = 0x0010, // These are the original criteria for comparing registers that are in use. - SPILL_COST_THIS = 0x004, // Its spill cost is lower than 'thisSpillCost' - SPILL_COST_OTHERS = 0x002, // It has a lower spill cost than the best candidate thus far - FAR_NEXT_REF = 0x001, // It has a farther next reference than the best candidate thus far. + SPILL_COST = 0x0008, // It has the lowest cost of all the candidates. + FAR_NEXT_REF = 0x0004, // It has a farther next reference than the best candidate thus far. + PREV_REG_OPT = 0x0002, // The previous RefPosition of its current assigned interval is RegOptional. + REG_NUM = 0x0001, // It has a lower register number. }; int bestScore = 0; - unsigned int bestSpillWeight; LsraLocation bestLocation = MinLocation; LsraLocation farRefLocation = MinLocation; - LsraLocation nextPhysRefLocation = MaxLocation; - // Eliminate candidates that are in-use or busy. - // Note that a fixed reference will appear to be in use due to the RefTypeFixedReg before it, - // but we ignore those. - regMaskTP busyRegs = regsBusyUntilKill | regsInUseThisLocation; - candidates &= ~busyRegs; - - // Also eliminate as busy any register with a conflicting fixed reference at this or - // the next location. - // Note that this will eliminate the fixedReg, if any, but we'll add it back below. - regMaskTP checkConflictMask = candidates; - while (checkConflictMask != RBM_NONE) - { - regMaskTP checkConflictBit = genFindLowestBit(checkConflictMask); - checkConflictMask &= ~checkConflictBit; - regNumber checkConflictReg = genRegNumFromMask(checkConflictBit); - LsraLocation nextPhysRefLocation = nextFixedRef[checkConflictReg]; - if ((nextPhysRefLocation == currentLocation) || - (refPosition->delayRegFree && nextPhysRefLocation == (currentLocation + 1))) - { - candidates &= ~checkConflictBit; - } - // By chance, is this register already holding this interval, as a copyReg or having - // been restored as inactive after a kill? - else if (getRegisterRecord(checkConflictReg)->assignedInterval == currentInterval) - { - candidates = checkConflictBit; - break; - } - } + // These are used in the post-selection updates, and must be set for any selection. + regMaskTP freeCandidates = RBM_NONE; + regMaskTP matchingConstants = RBM_NONE; + + // These are set prior to their use in the associated heuristics. + unsigned int thisSpillWeight = 0; + regMaskTP unassignedSet = RBM_NONE; - // Add back the fixedReg, if any. + // We'll set this to short-circuit remaining heuristics when we have a single candidate. + bool found = false; + + // Is this a fixedReg? regMaskTP fixedRegMask = RBM_NONE; if (refPosition->isFixedRegRef) { assert(genMaxOneBit(refPosition->registerAssignment)); - candidates |= refPosition->registerAssignment; + fixedRegMask = refPosition->registerAssignment; + if (candidates == refPosition->registerAssignment) + { + found = true; + if (nextIntervalRef[genRegNumFromMask(candidates)] > lastLocation) + { + unassignedSet = candidates; + } + } } - // At this pointwe have determined all the candidates that can be considered, - // and we'll being to apply the heuristics in order. - - // Apply the FREE heuristic. - regMaskTP freeCandidates = applyFreeHeuristic(candidates, currentInterval->registerType); - if (candidates == RBM_NONE) + // Eliminate candidates that are in-use or busy. + if (!found) { - assert(refPosition->RegOptional()); - return REG_NA; - } + regMaskTP busyRegs = regsBusyUntilKill | regsInUseThisLocation; + candidates &= ~busyRegs; - // Apply the VALUE_AVAILABLE (matching constant) heuristic. - regMaskTP matchingConstants = RBM_NONE; - if (freeCandidates != RBM_NONE) - { - candidates = freeCandidates; - if (currentInterval->isConstant && RefTypeIsDef(refPosition->refType)) + // Also eliminate as busy any register with a conflicting fixed reference at this or + // the next location. + // Note that this will eliminate the fixedReg, if any, but we'll add it back below. + regMaskTP checkConflictMask = candidates & fixedRegs; + while (checkConflictMask != RBM_NONE) { - matchingConstants = getMatchingConstants(candidates, currentInterval, refPosition); + regMaskTP checkConflictBit = genFindLowestBit(checkConflictMask); + checkConflictMask &= ~checkConflictBit; + regNumber checkConflictReg = genRegNumFromMask(checkConflictBit); + LsraLocation checkConflictLocation = nextFixedRef[checkConflictReg]; + + if ((checkConflictLocation == currentLocation) || + (refPosition->delayRegFree && (checkConflictLocation == (currentLocation + 1)))) + { + candidates &= ~checkConflictBit; + } } + candidates |= fixedRegMask; + found = isSingleRegister(candidates); } - // Compute the best possible score so we can stop looping early if we find it. - // TODO-Throughput: At some point we may want to short-circuit the computation of each score, but - // probably not until we've tuned the order of these criteria. At that point, - // we'll need to avoid the short-circuit if we've got a stress option to reverse - // the selection. - int bestPossibleScore = COVERS + UNASSIGNED + OWN_PREFERENCE + THIS_ASSIGNED + CALLER_CALLEE + FREE + - SPILL_COST_THIS + SPILL_COST_OTHERS; - if (currentInterval->isConstant) + // By chance, is prevRegRec already holding this interval, as a copyReg or having + // been restored as inactive after a kill? + regMaskTP prevRegBit = RBM_NONE; + if (!found && (prevRegRec != nullptr)) { - bestPossibleScore |= VALUE_AVAILABLE; + prevRegBit = genRegMask(prevRegRec->regNum); + if ((prevRegRec->assignedInterval == currentInterval) && ((candidates & prevRegBit) != RBM_NONE)) + { + candidates = prevRegBit; + found = true; + } } - if (relatedPreferences != RBM_NONE) + + if (!found && (candidates == RBM_NONE)) { - bestPossibleScore |= RELATED_PREFERENCE + COVERS_RELATED; + assert(refPosition->RegOptional()); + currentInterval->assignedReg = nullptr; + return REG_NA; } + // TODO-Cleanup: Previously, the "reverseSelect" stress mode modified some of the heuristics. + // that needs to be re-engineered with this refactoring. // In non-debug builds, this will simply get optimized away bool reverseSelect = false; #ifdef DEBUG reverseSelect = doReverseSelect(); #endif // DEBUG - // The spill weight for 'refPosition' (the one we're allocating now). - unsigned int thisSpillWeight = refPosition->IsActualRef() ? getWeight(refPosition) : 0; - // 'bestSpillWeight' is the spill weight for the best candidate we've found so far. - // If allocating a reg is optional, we will consider those ref positions - // whose weight is less than 'refPosition' for spilling. - // If allocating a reg is a must, we start off with max weight so that the first spill - // candidate will be selected based on farthest distance alone. - // Since we start off with 'farRefLocation' initialized to 'MinLocation', the first available - // RefPosition will be selected as spill candidate and its weight as the bestSpillWeight. - bestSpillWeight = refPosition->RegOptional() ? thisSpillWeight : BB_MAX_WEIGHT; + // At this point we have determined all the candidates that CAN be considered, + // and we'll begin to apply the heuristics in order. - while (candidates != RBM_NONE) + // Apply the FREE heuristic. + freeCandidates = applyFreeHeuristic(candidates, currentInterval->registerType); + if (freeCandidates == RBM_NONE) { - regMaskTP candidateBit = genFindLowestBit(candidates); - candidates &= ~candidateBit; - regNumber regNum = genRegNumFromMask(candidateBit); - - // The spill weight for the register we're currently evaluating. - unsigned int currentSpillWeight = BB_ZERO_WEIGHT; - - RegRecord* physRegRecord = getRegisterRecord(regNum); - - int score = 0; - bool isBetter = false; - - int comparisonScore = bestScore; - - // Find the next RefPosition of the physical register - nextPhysRefLocation = Min(nextFixedRef[regNum], nextIntervalRef[regNum]); -#ifdef TARGET_ARM - if (currentInterval->registerType == TYP_DOUBLE) + // We won't spill if this refPosition is not an actual ref. + if (!refPosition->IsActualRef()) { - LsraLocation otherNextPhysRefLocation = Min(nextFixedRef[regNum + 1], nextIntervalRef[regNum + 1]); - nextPhysRefLocation = Min(nextPhysRefLocation, otherNextPhysRefLocation); + currentInterval->assignedReg = nullptr; + return REG_NA; } -#endif // TARGET_ARM + } + else if (!found) + { + bestScore |= FREE; + candidates = freeCandidates; + found = isSingleRegister(candidates); + } - if ((freeCandidates & candidateBit) != RBM_NONE) + // Apply the VALUE_AVAILABLE (matching constant) heuristic. Only applies if we have freeCandidates. + // Note that we always need to define the 'matchingConstants' set. + if (freeCandidates != RBM_NONE) + { + if (currentInterval->isConstant && RefTypeIsDef(refPosition->refType)) { - score |= FREE; - - // If this is a definition of a constant interval, check to see if its value is already in this register. - if (currentInterval->isConstant && RefTypeIsDef(refPosition->refType) && - isMatchingConstant(physRegRecord, refPosition)) - { - score |= VALUE_AVAILABLE; - } - else if (bestScore & VALUE_AVAILABLE) + matchingConstants = getMatchingConstants(candidates, currentInterval, refPosition); + if (matchingConstants != RBM_NONE) { - continue; + bestScore |= VALUE_AVAILABLE; + candidates = matchingConstants; + found = isSingleRegister(candidates); } + } + } - // If the nextPhysRefLocation is a fixedRef for the rangeEndRefPosition, increment it so that - // we don't think it isn't covering the live range. - // This doesn't handle the case where earlier RefPositions for this Interval are also - // FixedRefs of this regNum, but at least those are only interesting in the case where those - // are "local last uses" of the Interval - otherwise the liveRange would interfere with the reg. - if (nextPhysRefLocation == rangeEndLocation && rangeEndRefPosition->isFixedRefOfReg(regNum)) + // Apply the THIS_ASSIGNED heuristic. Only applies if we have freeCandidates. + if (!found && (prevRegRec != nullptr) && (freeCandidates != RBM_NONE)) + { + regMaskTP freePrefCandidates = (candidates & preferences & freeCandidates); + if (freePrefCandidates != RBM_NONE) + { + regMaskTP prevRegMask = genRegMask(prevRegRec->regNum); + if ((freePrefCandidates & prevRegMask) != RBM_NONE) { - INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_INCREMENT_RANGE_END, currentInterval)); - nextPhysRefLocation++; + bestScore |= THIS_ASSIGNED; + candidates = prevRegMask; + found = true; } + } + } - if ((candidateBit & preferences) != RBM_NONE) - { - score |= OWN_PREFERENCE; + // Apply the COVERS heuristic. Only applies if we have freeCandidates in the preference set. + // We will also compute the OWN_PREFERENCE, COVERS_RELATED, COVERS_FULL and UNASSIGNED sets, as they all + // require similar computation. + regMaskTP preferenceSet = RBM_NONE; + regMaskTP coversRelatedSet = RBM_NONE; + regMaskTP coversFullSet = RBM_NONE; + if (freeCandidates != RBM_NONE) + { + preferenceSet = (candidates & preferences); + regMaskTP coversSet = RBM_NONE; + regMaskTP coversCandidates = (preferenceSet == RBM_NONE) ? candidates : preferenceSet; + for (; coversCandidates != RBM_NONE; ) + { + regMaskTP coversCandidateBit = genFindLowestBit(coversCandidates); + coversCandidates &= ~coversCandidateBit; + regNumber coversCandidateRegNum = genRegNumFromMask(coversCandidateBit); - if (prevRegRec == physRegRecord) + // If we have a single candidate we don't need to compute the preference-related sets, but we + // do need to compute the unassignedSet. + if (!found) + { + // Find the next RefPosition of the register. + LsraLocation nextIntervalLocation = nextIntervalRef[coversCandidateRegNum]; + LsraLocation coversCandidateLocation = Min(nextFixedRef[coversCandidateRegNum], nextIntervalLocation); +#ifdef TARGET_ARM + if (currentInterval->registerType == TYP_DOUBLE) { - score |= THIS_ASSIGNED; + LsraLocation otherNextPhysRefLocation = Min(nextFixedRef[coversCandidateRegNum + 1], nextIntervalRef[coversCandidateRegNum + 1]); + coversCandidateLocation = Min(coversCandidateLocation, otherNextPhysRefLocation); } - - if (nextPhysRefLocation > rangeEndLocation) +#endif // TARGET_ARM + // If the nextPhysRefLocation is a fixedRef for the rangeEndRefPosition, increment it so that + // we don't think it isn't covering the live range. + // This doesn't handle the case where earlier RefPositions for this Interval are also + // FixedRefs of this regNum, but at least those are only interesting in the case where those + // are "local last uses" of the Interval - otherwise the liveRange would interfere with the reg. + if (coversCandidateLocation == rangeEndLocation && rangeEndRefPosition->isFixedRefOfReg(coversCandidateRegNum)) { - score |= COVERS; + INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_INCREMENT_RANGE_END, currentInterval)); + coversCandidateLocation++; } - else if ((bestScore & ~(COVERS - 1)) > score) + if (coversCandidateLocation > rangeEndLocation) { - continue; + coversSet |= coversCandidateBit; } - } - else if ((bestScore & ~(OWN_PREFERENCE - 1)) > score) - { - continue; - } - - if ((candidateBit & relatedPreferences) != RBM_NONE) - { - score |= RELATED_PREFERENCE; - if (nextPhysRefLocation > relatedInterval->lastRefPosition->nodeLocation) + if ((coversCandidateBit & relatedPreferences) != RBM_NONE) + { + if (coversCandidateLocation > relatedInterval->lastRefPosition->nodeLocation) + { + coversRelatedSet |= coversCandidateBit; + } + } + else if (coversCandidateBit == refPosition->registerAssignment) { - score |= COVERS_RELATED; + // If we had a fixed-reg def of a reg that will be killed before the use, prefer it to any other + // registers with the same score. (Note that we haven't changed the original registerAssignment + // on the RefPosition). + // Overload the RELATED_PREFERENCE value. + // TODO-CQ: Consider if this should be split out. + coversRelatedSet |= coversCandidateBit; + } + // Does this cover the full range of the interval? + if (coversCandidateLocation > lastLocation) + { + coversFullSet |= coversCandidateBit; } } - else if (candidateBit == refPosition->registerAssignment) + // The register is considered unassigned if it has no assignedInterval, OR + // if its next reference is beyond the range of this interval. + if (nextIntervalRef[coversCandidateRegNum] > lastLocation) { - // If we had a fixed-reg def of a reg that will be killed before the use, prefer it to any other - // registers with the same score. (Note that we haven't changed the original registerAssignment - // on the RefPosition). - // Overload the RELATED_PREFERENCE value. - score |= RELATED_PREFERENCE; + unassignedSet |= coversCandidateBit; } + } + if ((coversSet & preferences) != RBM_NONE) + { + bestScore |= COVERS; + candidates = coversSet & preferences; + found = isSingleRegister(candidates); + } + } - if ((candidateBit & callerCalleePrefs) != RBM_NONE) - { - score |= CALLER_CALLEE; - } + // Apply the OWN_PREFERENCE heuristic. Only applies if we have freeCandidates. + if (!found && (preferenceSet != RBM_NONE)) + { + preferenceSet = candidates & preferenceSet; + if (preferenceSet != RBM_NONE) + { + bestScore |= OWN_PREFERENCE; + candidates = preferenceSet; + found = isSingleRegister(candidates); + } + } - // The register is considered unassigned if it has no assignedInterval, OR - // if its next reference is beyond the range of this interval. + // Apply the COVERS_RELATED heuristic. Only applies if we have freeCandidates. + if (!found && (coversRelatedSet != RBM_NONE)) + { + coversRelatedSet = candidates & coversRelatedSet; + if (coversRelatedSet != RBM_NONE) + { + bestScore |= COVERS_RELATED; + candidates = coversRelatedSet; + found = isSingleRegister(candidates); + } + } - // Unfortunately, we can't just look at the nextIntervalLoc, because there are - // cases where it's not strictly assigned, but neither is it unassigned (cases - // where it's inactive but we don't update nextIntervalRef). - // TODO-Cleanup: See whether we can unify this. - LsraLocation nextIntervalLoc = getNextIntervalRefLocation(regNum ARM_ARG(currentInterval->registerType)); - if (nextIntervalLoc == MaxLocation) - { - assert((physRegRecord->assignedInterval == nullptr) || - (physRegRecord->assignedInterval->physReg != regNum) || - (physRegRecord->assignedInterval->getNextRefLocation() > lastLocation)); - } - if (nextIntervalLoc > lastLocation) - { - score |= UNASSIGNED; - } + // Apply the RELATED_PREFERENCE heuristic. Only applies if we have freeCandidates. + if (!found && (freeCandidates != RBM_NONE)) + { + regMaskTP relatedPreferenceSet = candidates & relatedPreferences; + if (relatedPreferenceSet != RBM_NONE) + { + bestScore |= RELATED_PREFERENCE; + candidates = relatedPreferenceSet; + found = isSingleRegister(candidates); + } + } - // Does this cover the full range of the interval? - if (nextPhysRefLocation > lastLocation) - { - score |= COVERS_FULL; - } + // Apply the CALLER_CALLEE heuristic. Only applies if we have freeCandidates. + if (!found && (freeCandidates != RBM_NONE)) + { + regMaskTP callerCalleeSet = candidates & callerCalleePrefs; + if (callerCalleeSet != RBM_NONE) + { + bestScore |= CALLER_CALLEE; + candidates = callerCalleeSet; + found = isSingleRegister(candidates); + } + } - int bestPartialScore = (bestScore & ~(COVERS_FULL - 1)); - if (bestPartialScore > score) - { - continue; - } - else if ((score > bestPartialScore) || (bestScore < FREE)) - { - // At this point if we have a higher score, it is by definition the BEST_FIT. - score |= BEST_FIT; - } - else if (nextPhysRefLocation == bestLocation) + // Apply the UNASSIGNED heuristic. Only applies if we have freeCandidates. + if (!found && (freeCandidates != RBM_NONE)) + { + unassignedSet &= candidates; + if (unassignedSet != RBM_NONE) + { + bestScore |= UNASSIGNED; + candidates = unassignedSet; + found = isSingleRegister(candidates); + } + } + + // Apply the COVERS_FULL heuristic. Only applies if we have freeCandidates. + if (!found && (coversFullSet != RBM_NONE)) + { + coversFullSet &= candidates; + if (coversFullSet != RBM_NONE) + { + bestScore |= COVERS_FULL; + candidates = coversFullSet; + found = isSingleRegister(candidates); + } + } + + // Apply the BEST_FIT heuristic. Only applies if we have freeCandidates. + if (!found && (freeCandidates != RBM_NONE)) + { + regMaskTP bestFitSet = RBM_NONE; + // If the best score includes COVERS_FULL, pick the one that's killed soonest. + // If none cover the full range, the BEST_FIT is the one that's killed later. + bool earliestIsBest = ((bestScore & COVERS_FULL) != 0); + LsraLocation bestFitLocation = earliestIsBest ? MaxLocation : MinLocation; + for (regMaskTP bestFitCandidates = candidates; bestFitCandidates != RBM_NONE; ) + { + regMaskTP bestFitCandidateBit = genFindLowestBit(bestFitCandidates); + bestFitCandidates &= ~bestFitCandidateBit; + regNumber bestFitCandidateRegNum = genRegNumFromMask(bestFitCandidateBit); + + // Find the next RefPosition of the register. + LsraLocation nextIntervalLocation = nextIntervalRef[bestFitCandidateRegNum]; + LsraLocation nextPhysRefLocation = Min(nextFixedRef[bestFitCandidateRegNum], nextIntervalLocation); + // If the nextPhysRefLocation is a fixedRef for the rangeEndRefPosition, increment it so that + // we don't think it isn't covering the live range. + // This doesn't handle the case where earlier RefPositions for this Interval are also + // FixedRefs of this regNum, but at least those are only interesting in the case where those + // are "local last uses" of the Interval - otherwise the liveRange would interfere with the reg. + // TODO: This duplicates code in an earlier loop, and is basically here to duplicate previous + // behavior; see if we can avoid this. + if (nextPhysRefLocation == rangeEndLocation && rangeEndRefPosition->isFixedRefOfReg(bestFitCandidateRegNum)) { - assert((comparisonScore & BEST_FIT) != 0); - score |= BEST_FIT; + INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_INCREMENT_RANGE_END, currentInterval)); + nextPhysRefLocation++; } - else if ((score & COVERS_FULL) != 0) + + if (nextPhysRefLocation == bestFitLocation) { - // All things equal, if both cover the full range, pick the one that's killed soonest. - if (nextPhysRefLocation < bestLocation) - { - score |= BEST_FIT; - comparisonScore &= ~BEST_FIT; - } + bestFitSet |= bestFitCandidateBit; } else { - // Neither cover the full range, so the BEST_FIT is the one that's killed later. - if (nextPhysRefLocation > bestLocation) + bool isBetter = false; + if (nextPhysRefLocation > lastLocation) { - score |= BEST_FIT; - comparisonScore &= ~BEST_FIT; + // This covers the full range; favor it if the other doesn't, or if it's a closer match. + if ((bestFitLocation <= lastLocation) || (nextPhysRefLocation < bestFitLocation)) + { + isBetter = true; + } } else { - assert((comparisonScore & BEST_FIT) != 0); + // This doesn't cover the full range; favor it if the other doesn't either, but this ends later. + if ((bestFitLocation <= lastLocation) && (nextPhysRefLocation > bestFitLocation)) + { + isBetter = true; + } + } + if (isBetter) + { + bestFitSet = bestFitCandidateBit; + bestFitLocation = nextPhysRefLocation; } } + } + assert(bestFitSet != RBM_NONE); + bestScore |= BEST_FIT; + candidates = bestFitSet; + found = isSingleRegister(candidates); + } - // Oddly, the previous heuristics only considered this if both covered the range. - if ((prevRegRec != nullptr) && (prevRegRec->regNum == regNum) && ((score & COVERS_FULL) != 0)) - { - score |= IS_PREV_REG; - } + // Apply the IS_PREV_REG heuristic. Only applies if we have freeCandidates. + // Oddly, the previous heuristics only considered this if it covered the range. + if ((prevRegRec != nullptr) && ((bestScore & COVERS_FULL) != 0)) + { + prevRegBit &= candidates; + if (prevRegBit != RBM_NONE) + { + bestScore |= IS_PREV_REG; + candidates = prevRegBit; + found = true; + } + } - if ((availablePhysRegRecord == nullptr) || (physRegRecord->regOrder < availablePhysRegRecord->regOrder)) + // Apply the REG_ORDER heuristic. Only applies if we have freeCandidates. + if (!found && (freeCandidates != RBM_NONE)) + { + // This will always result in a single candidate. That is, it is the tie-breaker + // for free candidates, and doesn't make sense as anything other than the last + // heuristic for free registers. + unsigned lowestRegOrder = MAXUINT; + regMaskTP lowestRegOrderBit = RBM_NONE; + for (regMaskTP regOrderCandidates = candidates; regOrderCandidates != RBM_NONE; ) + { + regMaskTP regOrderCandidateBit = genFindLowestBit(regOrderCandidates); + regOrderCandidates &= ~regOrderCandidateBit; + regNumber regOrderCandidateRegNum = genRegNumFromMask(regOrderCandidateBit); + unsigned thisRegOrder = getRegisterRecord(regOrderCandidateRegNum)->regOrder; + if (thisRegOrder < lowestRegOrder) { - score |= REG_ORDER; - comparisonScore &= ~REG_ORDER; + lowestRegOrder = thisRegOrder; + lowestRegOrderBit = regOrderCandidateBit; } } - else + assert(lowestRegOrderBit != RBM_NONE); + bestScore |= REG_ORDER; + candidates = lowestRegOrderBit; + found = true; + } + + // Apply the SPILL_COST heuristic and eliminate regs that can't be spilled. + if (!found) + { + // The spill weight for 'refPosition' (the one we're allocating now). + unsigned int thisSpillWeight = getWeight(refPosition); + // The spill weight for the best candidate we've found so far. + unsigned int bestSpillWeight = BB_MAX_WEIGHT; + // True if we found registers with lower spill weight than this refPosition. + bool foundLowerSpillWeight = false; + // The set of registers with the lowest spill weight. + regMaskTP lowestCostSpillSet = RBM_NONE; + + for (regMaskTP spillCandidates = candidates; spillCandidates != RBM_NONE; ) { - assert(isAssigned(physRegRecord ARM_ARG(regType))); + regMaskTP spillCandidateBit = genFindLowestBit(spillCandidates); + spillCandidates &= ~spillCandidateBit; + regNumber spillCandidateRegNum = genRegNumFromMask(spillCandidateBit); + RegRecord* spillCandidateRegRecord = &physRegs[spillCandidateRegNum]; - // We would have to spill this register. - if (comparisonScore >= FREE) + // Can and should the interval in this register be spilled for this one, + // if we don't find a better alternative? + if ((getNextIntervalRefLocation(spillCandidateRegNum ARM_ARG(regType)) == currentLocation) && + !spillCandidateRegRecord->assignedInterval->getNextRefPosition()->RegOptional()) { continue; } - - // We can't spill a register that's next used at this location, unless it's regOptional. - // TODO: Make this cheaper? - if (!refPosition->isFixedRegRef) + if (!isSpillCandidate(currentInterval, refPosition, spillCandidateRegRecord)) { - if ((nextPhysRefLocation == refPosition->nodeLocation) || - (refPosition->delayRegFree && nextPhysRefLocation == (refPosition->nodeLocation + 1))) - { - if (!physRegRecord->assignedInterval->getNextRefPosition()->RegOptional()) - { - continue; - } - } - - // Can and should the interval in this register be spilled for this one, - // if we don't find a better alternative? - if (!isSpillCandidate(currentInterval, refPosition, physRegRecord)) - { - continue; - } + continue; } - currentSpillWeight = spillCost[regNum]; + unsigned currentSpillWeight = spillCost[spillCandidateRegNum]; #ifdef TARGET_ARM if (currentInterval->registerType == TYP_DOUBLE) { - currentSpillWeight = max(currentSpillWeight, spillCost[REG_NEXT(regNum)]); + currentSpillWeight = max(currentSpillWeight, spillCost[REG_NEXT(spillCandidateRegNum)]); } #endif - if (currentSpillWeight >= thisSpillWeight) - { - if (refPosition->RegOptional() || ((comparisonScore & SPILL_COST_THIS) != 0)) - { - continue; - } - } - else - { - score |= SPILL_COST_THIS; - } - if (currentSpillWeight < bestSpillWeight) { - score |= SPILL_COST_OTHERS; - comparisonScore &= ~SPILL_COST_OTHERS; + bestSpillWeight = currentSpillWeight; + lowestCostSpillSet = spillCandidateBit; } else if (currentSpillWeight == bestSpillWeight) { - // All else is equal so finally consider: - // - farthest reference location. - // - To duplicate previous behavior: - // - IF this is a higher register number than the current best: - // - pick it if the recentAssignedRef is a reload of a reg-optional - // - otherwise, select the lower register number, to match old behavior of 'allocateBusyReg', - // which traversed in regNumber order. - if (nextPhysRefLocation < farRefLocation) - { - continue; - } - bool isBetter = (availablePhysRegRecord == nullptr) || (nextPhysRefLocation > farRefLocation); - if (!isBetter) - { - RefPosition* recentAssignedRef = physRegRecord->assignedInterval->recentRefPosition; - if (regNum > foundReg) - { - isBetter = (recentAssignedRef != nullptr) && recentAssignedRef->reload && - recentAssignedRef->RegOptional(); - } - else - { - RefPosition* otherRecentAssignedRef = - availablePhysRegRecord->assignedInterval->recentRefPosition; - isBetter = !((otherRecentAssignedRef != nullptr) && otherRecentAssignedRef->reload && - otherRecentAssignedRef->RegOptional()); - } - } - if (isBetter) - { - score |= (FAR_NEXT_REF | SPILL_COST_OTHERS); - comparisonScore &= ~FAR_NEXT_REF; - } + lowestCostSpillSet |= spillCandidateBit; } } - bool foundBetterCandidate = (score > comparisonScore); - -#ifdef DEBUG - if (doReverseSelect() && bestScore != 0) + // We won't spill if this refPosition is RegOptional() and we have no candidates + // with a lower spill cost. + if ((bestSpillWeight >= thisSpillWeight) && refPosition->RegOptional()) { - foundBetterCandidate = !foundBetterCandidate; + currentInterval->assignedReg = nullptr; + return REG_NA; } -#endif // DEBUG + // We must have at least one with the lowest spill cost. + assert(lowestCostSpillSet != RBM_NONE); + bestScore |= SPILL_COST; + candidates = lowestCostSpillSet; + found = isSingleRegister(candidates); + } - if (foundBetterCandidate) + // Apply the FAR_NEXT_REF heuristic. + if (!found) + { + LsraLocation farthestLocation = MinLocation; + regMaskTP farthestSet = RBM_NONE; + for (regMaskTP farthestCandidates = candidates; farthestCandidates != RBM_NONE; ) { - bestSpillWeight = currentSpillWeight; - bestLocation = nextPhysRefLocation; - farRefLocation = nextPhysRefLocation; - availablePhysRegRecord = physRegRecord; - foundReg = regNum; - bestScore = score; - // These are relative scorings, so we set them on whatever's best so far. - if (score & FREE) + regMaskTP farthestCandidateBit = genFindLowestBit(farthestCandidates); + farthestCandidates &= ~farthestCandidateBit; + regNumber farthestCandidateRegNum = genRegNumFromMask(farthestCandidateBit); + + // Find the next RefPosition of the register. + LsraLocation nextIntervalLocation = nextIntervalRef[farthestCandidateRegNum]; + LsraLocation nextPhysRefLocation = Min(nextFixedRef[farthestCandidateRegNum], nextIntervalLocation); + if (nextPhysRefLocation == farthestLocation) { - bestScore |= (REG_ORDER | BEST_FIT); + farthestSet |= farthestCandidateBit; } - else + else if (nextPhysRefLocation > farthestLocation) { - bestScore |= (FAR_NEXT_REF | SPILL_COST_OTHERS); + farthestSet = farthestCandidateBit; + farthestLocation = nextPhysRefLocation; } } - - // If there is no way we can get a better score, break out - if (!reverseSelect && (score == bestPossibleScore) && (bestLocation == lastLocation + 1) && - (bestSpillWeight == 0)) - { - break; - } + // We must have at least one with the lowest spill cost. + assert(farthestSet != RBM_NONE); + bestScore |= FAR_NEXT_REF; + candidates = farthestSet; + found = isSingleRegister(candidates); } - if ((prevRegRec != nullptr) && (foundReg != prevRegRec->regNum) && !refPosition->copyReg) + // Apply the PREV_REG_OPT heuristic. + if (!found) { - if ((prevRegRec->assignedInterval == currentInterval)) + regMaskTP prevRegOptSet = RBM_NONE; + for (regMaskTP prevRegOptCandidates = candidates; prevRegOptCandidates != RBM_NONE; ) + { + regMaskTP prevRegOptCandidateBit = genFindLowestBit(prevRegOptCandidates); + prevRegOptCandidates &= ~prevRegOptCandidateBit; + regNumber prevRegOptCandidateRegNum = genRegNumFromMask(prevRegOptCandidateBit); + Interval* assignedInterval = physRegs[prevRegOptCandidateRegNum].assignedInterval; + // The assigned should be non-null, and should have a recentRefPosition, however since + // this is a heuristic, we don't want a fatal error, so we just assert (not noway_assert). + if ((assignedInterval != nullptr) && (assignedInterval->recentRefPosition != nullptr)) + { + if (assignedInterval->recentRefPosition->reload && assignedInterval->recentRefPosition->RegOptional()) + { + prevRegOptSet |= prevRegOptCandidateBit; + } + } + else + { + assert(!"Spill candidate has no assignedInterval recentRefPosition"); + } + } + if (prevRegOptSet != RBM_NONE) { - unassignPhysReg(prevRegRec, nullptr); + bestScore |= PREV_REG_OPT; + candidates = prevRegOptSet; + found = isSingleRegister(candidates); } - currentInterval->assignedReg = nullptr; } - if (foundReg == REG_NA) + // Apply the REG_NUM heuristic. + if (!found) { - assert(refPosition->RegOptional()); - return REG_NA; + candidates = genFindLowestBit(candidates); } - // FOUND: + assert(isSingleRegister(candidates)); + regMaskTP foundRegBit = candidates; + foundReg = genRegNumFromMask(foundRegBit); availablePhysRegRecord = getRegisterRecord(foundReg); - regMaskTP foundRegBit = genRegMask(foundReg); + Interval* assignedInterval = availablePhysRegRecord->assignedInterval; + if ((assignedInterval != currentInterval) && isAssigned(availablePhysRegRecord ARM_ARG(regType))) { - Interval* assignedInterval = availablePhysRegRecord->assignedInterval; - if ((assignedInterval != currentInterval) && isAssigned(availablePhysRegRecord ARM_ARG(regType))) + if ((foundRegBit & freeCandidates) == RBM_NONE) { - if ((foundRegBit & freeCandidates) == RBM_NONE) - { -// We're spilling. + // We're spilling. + CLANG_FORMAT_COMMENT_ANCHOR; + #ifdef TARGET_ARM - if (currentInterval->registerType == TYP_DOUBLE) - { - assert(genIsValidDoubleReg(availablePhysRegRecord->regNum)); - unassignDoublePhysReg(availablePhysRegRecord); - } - else if (assignedInterval->registerType == TYP_DOUBLE) - { - // Make sure we spill both halves of the double register. - assert(genIsValidDoubleReg(assignedInterval->assignedReg->regNum)); - unassignPhysReg(assignedInterval->assignedReg, assignedInterval->recentRefPosition); - } - else + if (currentInterval->registerType == TYP_DOUBLE) + { + assert(genIsValidDoubleReg(availablePhysRegRecord->regNum)); + unassignDoublePhysReg(availablePhysRegRecord); + } + else if (assignedInterval->registerType == TYP_DOUBLE) + { + // Make sure we spill both halves of the double register. + assert(genIsValidDoubleReg(assignedInterval->assignedReg->regNum)); + unassignPhysReg(assignedInterval->assignedReg, assignedInterval->recentRefPosition); + } + else #endif - { - unassignPhysReg(availablePhysRegRecord, assignedInterval->recentRefPosition); - } + { + unassignPhysReg(availablePhysRegRecord, assignedInterval->recentRefPosition); + } + } + else + { + // If we considered this "unassigned" because this interval's lifetime ends before + // the next ref, remember it. + // For historical reasons (due to former short-circuiting of this case), if we're reassigning + // the current interval to a previous assignment, we don't remember the previous interval. + // Note that we need to compute this condition before calling unassignPhysReg, which wil reset + // assignedInterval->physReg. + bool wasAssigned = (((foundRegBit & unassignedSet) != RBM_NONE) && ((bestScore & THIS_ASSIGNED) == 0) && + (assignedInterval != nullptr) && (assignedInterval->physReg == foundReg)); + unassignPhysReg(availablePhysRegRecord ARM_ARG(currentInterval->registerType)); + if ((matchingConstants & foundRegBit) != RBM_NONE) + { + assert(assignedInterval->isConstant); + refPosition->treeNode->SetReuseRegVal(); + } + else if (wasAssigned) + { + updatePreviousInterval(availablePhysRegRecord, assignedInterval, assignedInterval->registerType); } else { - // If we considered this "unassigned" because this interval's lifetime ends before - // the next ref, remember it. - // For historical reasons (due to former short-circuiting of this case), if we're reassigning - // the current interval to a previous assignment, we don't remember the previous interval. - // Note that we need to compute this condition before calling unassignPhysReg, which wil reset - // assignedInterval->physReg. - bool wasAssigned = (((bestScore & UNASSIGNED) != 0) && ((bestScore & THIS_ASSIGNED) == 0) && - (assignedInterval != nullptr) && (assignedInterval->physReg == foundReg)); - unassignPhysReg(availablePhysRegRecord ARM_ARG(currentInterval->registerType)); - if ((matchingConstants & foundRegBit) != RBM_NONE) - { - assert(assignedInterval->isConstant); - refPosition->treeNode->SetReuseRegVal(); - } - else if (wasAssigned) - { - updatePreviousInterval(availablePhysRegRecord, assignedInterval, assignedInterval->registerType); - } - else - { - assert((bestScore & VALUE_AVAILABLE) == 0); - } + assert((bestScore & VALUE_AVAILABLE) == 0); } } - assignPhysReg(availablePhysRegRecord, currentInterval); - regMaskTP foundRegMask = genRegMask(foundReg); - refPosition->registerAssignment = foundRegMask; } - + assignPhysReg(availablePhysRegRecord, currentInterval); + refPosition->registerAssignment = foundRegBit; return foundReg; } @@ -3816,23 +3933,6 @@ bool LinearScan::isSpillCandidate(Interval* current, RefPosition* refPosition, R { return false; } - Interval* assignedInterval = physRegRecord->assignedInterval; -#ifdef TARGET_ARM - RegRecord* physRegRecord2 = nullptr; - Interval* assignedInterval2 = nullptr; - - // For ARM32, a double occupies a consecutive even/odd pair of float registers. - if (current->registerType == TYP_DOUBLE) - { - assert(genIsValidDoubleReg(physRegRecord->regNum)); - physRegRecord2 = getSecondHalfRegRec(physRegRecord); - assignedInterval2 = physRegRecord2->assignedInterval; - } -#endif - - // TODO: Delete; This is a duplicate of above. - assert(!isRegInUse(physRegRecord, refPosition)); - return true; } diff --git a/src/coreclr/src/jit/lsra.h b/src/coreclr/src/jit/lsra.h index ad961c0a4aca4b..6c3e8de4624a8d 100644 --- a/src/coreclr/src/jit/lsra.h +++ b/src/coreclr/src/jit/lsra.h @@ -1583,6 +1583,7 @@ class LinearScan : public LinearScanInterface } regMaskTP getMatchingConstants(regMaskTP mask, Interval* currentInterval, RefPosition* refPosition); + regMaskTP fixedRegs; LsraLocation nextFixedRef[REG_COUNT]; void updateNextFixedRef(RegRecord* regRecord, RefPosition* nextRefPosition); diff --git a/src/coreclr/src/jit/lsrabuild.cpp b/src/coreclr/src/jit/lsrabuild.cpp index b7be02c7d895fb..5476d9c4d80d1b 100644 --- a/src/coreclr/src/jit/lsrabuild.cpp +++ b/src/coreclr/src/jit/lsrabuild.cpp @@ -1806,19 +1806,21 @@ const unsigned lsraRegOrderFltSize = ArrLen(lsraRegOrderFlt); void LinearScan::buildPhysRegRecords() { - RegisterType regType = IntRegisterType; + for (regNumber reg = REG_FIRST; reg < ACTUAL_REG_COUNT; reg = REG_NEXT(reg)) + { + RegRecord* curr = &physRegs[reg]; + curr->init(reg); + } for (unsigned int i = 0; i < lsraRegOrderSize; i++) { regNumber reg = lsraRegOrder[i]; RegRecord* curr = &physRegs[reg]; - curr->init(reg); curr->regOrder = (unsigned char)i; } for (unsigned int i = 0; i < lsraRegOrderFltSize; i++) { regNumber reg = lsraRegOrderFlt[i]; RegRecord* curr = &physRegs[reg]; - curr->init(reg); curr->regOrder = (unsigned char)i; } } From 7a5640e3cc7bddd6a137c1d067dac8a224c43c3c Mon Sep 17 00:00:00 2001 From: Carol Eidt Date: Mon, 16 Nov 2020 09:57:34 -0800 Subject: [PATCH 04/17] More refactoring to simplify configuration. Fix one source of arm32 double diffs. --- src/coreclr/src/jit/lsra.cpp | 281 ++++++++++++------------------ src/coreclr/src/jit/lsra.h | 97 +++++++++-- src/coreclr/src/jit/lsrabuild.cpp | 4 +- 3 files changed, 200 insertions(+), 182 deletions(-) diff --git a/src/coreclr/src/jit/lsra.cpp b/src/coreclr/src/jit/lsra.cpp index ea7c4cc67b3e14..710bc1564e195c 100644 --- a/src/coreclr/src/jit/lsra.cpp +++ b/src/coreclr/src/jit/lsra.cpp @@ -3049,14 +3049,14 @@ regNumber LinearScan::allocateReg(Interval* currentInterval, RefPosition* refPos // enum RegisterScore { - FREE = 0x10000, // It is not currently assigned to an *active* interval + FREE = 0x10000, // It is not currently assigned to an *active* interval // These are the original criteria for comparing registers that are free. - VALUE_AVAILABLE = 0x8000, // It is a constant value that is already in an acceptable register. - THIS_ASSIGNED = 0x4000, // It is in the interval's preference set and it is already assigned to this interval. - COVERS = 0x2000, // It is in the interval's preference set and it covers the current range. - OWN_PREFERENCE = 0x1000, // It is in the preference set of this interval. - COVERS_RELATED = 0x0800, // It is in the preference set of the related interval and covers its entire lifetime. + VALUE_AVAILABLE = 0x8000, // It is a constant value that is already in an acceptable register. + THIS_ASSIGNED = 0x4000, // It is in the interval's preference set and it is already assigned to this interval. + COVERS = 0x2000, // It is in the interval's preference set and it covers the current range. + OWN_PREFERENCE = 0x1000, // It is in the preference set of this interval. + COVERS_RELATED = 0x0800, // It is in the preference set of the related interval and covers its entire lifetime. RELATED_PREFERENCE = 0x0400, // It is in the preference set of the related interval. CALLER_CALLEE = 0x0200, // It is in the right "set" for the interval (caller or callee-save). UNASSIGNED = 0x0100, // It is not currently assigned to any (active or inactive) interval @@ -3066,23 +3066,22 @@ regNumber LinearScan::allocateReg(Interval* currentInterval, RefPosition* refPos REG_ORDER = 0x0010, // These are the original criteria for comparing registers that are in use. - SPILL_COST = 0x0008, // It has the lowest cost of all the candidates. - FAR_NEXT_REF = 0x0004, // It has a farther next reference than the best candidate thus far. - PREV_REG_OPT = 0x0002, // The previous RefPosition of its current assigned interval is RegOptional. - REG_NUM = 0x0001, // It has a lower register number. + SPILL_COST = 0x0008, // It has the lowest cost of all the candidates. + FAR_NEXT_REF = 0x0004, // It has a farther next reference than the best candidate thus far. + PREV_REG_OPT = 0x0002, // The previous RefPosition of its current assigned interval is RegOptional. + REG_NUM = 0x0001, // It has a lower register number. }; - int bestScore = 0; - LsraLocation bestLocation = MinLocation; - LsraLocation farRefLocation = MinLocation; + LsraLocation bestLocation = MinLocation; + LsraLocation farRefLocation = MinLocation; // These are used in the post-selection updates, and must be set for any selection. - regMaskTP freeCandidates = RBM_NONE; - regMaskTP matchingConstants = RBM_NONE; + regMaskTP freeCandidates = RBM_NONE; + regMaskTP matchingConstants = RBM_NONE; + regMaskTP unassignedSet = RBM_NONE; - // These are set prior to their use in the associated heuristics. + // These must be set prior to their use in the associated heuristics. unsigned int thisSpillWeight = 0; - regMaskTP unassignedSet = RBM_NONE; // We'll set this to short-circuit remaining heuristics when we have a single candidate. bool found = false; @@ -3117,7 +3116,7 @@ regNumber LinearScan::allocateReg(Interval* currentInterval, RefPosition* refPos { regMaskTP checkConflictBit = genFindLowestBit(checkConflictMask); checkConflictMask &= ~checkConflictBit; - regNumber checkConflictReg = genRegNumFromMask(checkConflictBit); + regNumber checkConflictReg = genRegNumFromMask(checkConflictBit); LsraLocation checkConflictLocation = nextFixedRef[checkConflictReg]; if ((checkConflictLocation == currentLocation) || @@ -3132,6 +3131,8 @@ regNumber LinearScan::allocateReg(Interval* currentInterval, RefPosition* refPos // By chance, is prevRegRec already holding this interval, as a copyReg or having // been restored as inactive after a kill? + // NOTE: this is not currently considered one of the selection criteria - it always wins + // if it is the assignedInterval of 'prevRegRec'. regMaskTP prevRegBit = RBM_NONE; if (!found && (prevRegRec != nullptr)) { @@ -3139,7 +3140,7 @@ regNumber LinearScan::allocateReg(Interval* currentInterval, RefPosition* refPos if ((prevRegRec->assignedInterval == currentInterval) && ((candidates & prevRegBit) != RBM_NONE)) { candidates = prevRegBit; - found = true; + found = true; } } @@ -3150,8 +3151,8 @@ regNumber LinearScan::allocateReg(Interval* currentInterval, RefPosition* refPos return REG_NA; } - // TODO-Cleanup: Previously, the "reverseSelect" stress mode modified some of the heuristics. - // that needs to be re-engineered with this refactoring. + // TODO-Cleanup: Previously, the "reverseSelect" stress mode reversed the order of the heuristics. + // It needs to be re-engineered with this refactoring. // In non-debug builds, this will simply get optimized away bool reverseSelect = false; #ifdef DEBUG @@ -3160,9 +3161,16 @@ regNumber LinearScan::allocateReg(Interval* currentInterval, RefPosition* refPos // At this point we have determined all the candidates that CAN be considered, // and we'll begin to apply the heuristics in order. + registerSelector selector; + selector.candidates = candidates; + selector.score = 0; +#ifdef TARGET_ARM + selector.regType = regType; +#endif // TARGET_ARM + + freeCandidates = getFreeCandidates(candidates, regType); // Apply the FREE heuristic. - freeCandidates = applyFreeHeuristic(candidates, currentInterval->registerType); if (freeCandidates == RBM_NONE) { // We won't spill if this refPosition is not an actual ref. @@ -3174,9 +3182,7 @@ regNumber LinearScan::allocateReg(Interval* currentInterval, RefPosition* refPos } else if (!found) { - bestScore |= FREE; - candidates = freeCandidates; - found = isSingleRegister(candidates); + found = selector.applySelection(FREE, freeCandidates); } // Apply the VALUE_AVAILABLE (matching constant) heuristic. Only applies if we have freeCandidates. @@ -3185,44 +3191,28 @@ regNumber LinearScan::allocateReg(Interval* currentInterval, RefPosition* refPos { if (currentInterval->isConstant && RefTypeIsDef(refPosition->refType)) { - matchingConstants = getMatchingConstants(candidates, currentInterval, refPosition); - if (matchingConstants != RBM_NONE) - { - bestScore |= VALUE_AVAILABLE; - candidates = matchingConstants; - found = isSingleRegister(candidates); - } + matchingConstants = getMatchingConstants(selector.candidates, currentInterval, refPosition); + found = selector.applySelection(VALUE_AVAILABLE, matchingConstants); } } // Apply the THIS_ASSIGNED heuristic. Only applies if we have freeCandidates. if (!found && (prevRegRec != nullptr) && (freeCandidates != RBM_NONE)) { - regMaskTP freePrefCandidates = (candidates & preferences & freeCandidates); - if (freePrefCandidates != RBM_NONE) - { - regMaskTP prevRegMask = genRegMask(prevRegRec->regNum); - if ((freePrefCandidates & prevRegMask) != RBM_NONE) - { - bestScore |= THIS_ASSIGNED; - candidates = prevRegMask; - found = true; - } - } + found = selector.applySelection(THIS_ASSIGNED, freeCandidates & preferences & prevRegBit); } - // Apply the COVERS heuristic. Only applies if we have freeCandidates in the preference set. - // We will also compute the OWN_PREFERENCE, COVERS_RELATED, COVERS_FULL and UNASSIGNED sets, as they all - // require similar computation. - regMaskTP preferenceSet = RBM_NONE; + // Compute the sets for COVERS, OWN_PREFERENCE, COVERS_RELATED, COVERS_FULL and UNASSIGNED together, + // as they all require similar computation. + regMaskTP coversSet = RBM_NONE; + regMaskTP preferenceSet = RBM_NONE; regMaskTP coversRelatedSet = RBM_NONE; - regMaskTP coversFullSet = RBM_NONE; + regMaskTP coversFullSet = RBM_NONE; if (freeCandidates != RBM_NONE) { - preferenceSet = (candidates & preferences); - regMaskTP coversSet = RBM_NONE; - regMaskTP coversCandidates = (preferenceSet == RBM_NONE) ? candidates : preferenceSet; - for (; coversCandidates != RBM_NONE; ) + preferenceSet = (selector.candidates & preferences); + regMaskTP coversCandidates = (preferenceSet == RBM_NONE) ? selector.candidates : preferenceSet; + for (; coversCandidates != RBM_NONE;) { regMaskTP coversCandidateBit = genFindLowestBit(coversCandidates); coversCandidates &= ~coversCandidateBit; @@ -3233,12 +3223,13 @@ regNumber LinearScan::allocateReg(Interval* currentInterval, RefPosition* refPos if (!found) { // Find the next RefPosition of the register. - LsraLocation nextIntervalLocation = nextIntervalRef[coversCandidateRegNum]; + LsraLocation nextIntervalLocation = nextIntervalRef[coversCandidateRegNum]; LsraLocation coversCandidateLocation = Min(nextFixedRef[coversCandidateRegNum], nextIntervalLocation); #ifdef TARGET_ARM if (currentInterval->registerType == TYP_DOUBLE) { - LsraLocation otherNextPhysRefLocation = Min(nextFixedRef[coversCandidateRegNum + 1], nextIntervalRef[coversCandidateRegNum + 1]); + LsraLocation otherNextPhysRefLocation = + Min(nextFixedRef[coversCandidateRegNum + 1], nextIntervalRef[coversCandidateRegNum + 1]); coversCandidateLocation = Min(coversCandidateLocation, otherNextPhysRefLocation); } #endif // TARGET_ARM @@ -3247,7 +3238,8 @@ regNumber LinearScan::allocateReg(Interval* currentInterval, RefPosition* refPos // This doesn't handle the case where earlier RefPositions for this Interval are also // FixedRefs of this regNum, but at least those are only interesting in the case where those // are "local last uses" of the Interval - otherwise the liveRange would interfere with the reg. - if (coversCandidateLocation == rangeEndLocation && rangeEndRefPosition->isFixedRefOfReg(coversCandidateRegNum)) + if (coversCandidateLocation == rangeEndLocation && + rangeEndRefPosition->isFixedRefOfReg(coversCandidateRegNum)) { INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_INCREMENT_RANGE_END, currentInterval)); coversCandidateLocation++; @@ -3285,84 +3277,52 @@ regNumber LinearScan::allocateReg(Interval* currentInterval, RefPosition* refPos unassignedSet |= coversCandidateBit; } } - if ((coversSet & preferences) != RBM_NONE) - { - bestScore |= COVERS; - candidates = coversSet & preferences; - found = isSingleRegister(candidates); - } + } + + // Apply the COVERS heuristic. Only applies if we have freeCandidates. + if (!found) + { + found = selector.applySelection(COVERS, coversSet & preferenceSet); } // Apply the OWN_PREFERENCE heuristic. Only applies if we have freeCandidates. - if (!found && (preferenceSet != RBM_NONE)) + // Note that 'preferenceSet' already includes only freeCandidates. + if (!found) { - preferenceSet = candidates & preferenceSet; - if (preferenceSet != RBM_NONE) - { - bestScore |= OWN_PREFERENCE; - candidates = preferenceSet; - found = isSingleRegister(candidates); - } + assert((preferenceSet & freeCandidates) == preferenceSet); + found = selector.applySelection(OWN_PREFERENCE, preferenceSet); } // Apply the COVERS_RELATED heuristic. Only applies if we have freeCandidates. - if (!found && (coversRelatedSet != RBM_NONE)) + if (!found) { - coversRelatedSet = candidates & coversRelatedSet; - if (coversRelatedSet != RBM_NONE) - { - bestScore |= COVERS_RELATED; - candidates = coversRelatedSet; - found = isSingleRegister(candidates); - } + assert((coversRelatedSet & freeCandidates) == coversRelatedSet); + found = selector.applySelection(COVERS_RELATED, coversRelatedSet); } // Apply the RELATED_PREFERENCE heuristic. Only applies if we have freeCandidates. - if (!found && (freeCandidates != RBM_NONE)) + if (!found) { - regMaskTP relatedPreferenceSet = candidates & relatedPreferences; - if (relatedPreferenceSet != RBM_NONE) - { - bestScore |= RELATED_PREFERENCE; - candidates = relatedPreferenceSet; - found = isSingleRegister(candidates); - } + found = selector.applySelection(RELATED_PREFERENCE, relatedPreferences & freeCandidates); } // Apply the CALLER_CALLEE heuristic. Only applies if we have freeCandidates. - if (!found && (freeCandidates != RBM_NONE)) + if (!found) { - regMaskTP callerCalleeSet = candidates & callerCalleePrefs; - if (callerCalleeSet != RBM_NONE) - { - bestScore |= CALLER_CALLEE; - candidates = callerCalleeSet; - found = isSingleRegister(candidates); - } + found = selector.applySelection(CALLER_CALLEE, callerCalleePrefs & freeCandidates); } - // Apply the UNASSIGNED heuristic. Only applies if we have freeCandidates. - if (!found && (freeCandidates != RBM_NONE)) + // Apply the UNASSIGNED heuristic. + if (!found) { - unassignedSet &= candidates; - if (unassignedSet != RBM_NONE) - { - bestScore |= UNASSIGNED; - candidates = unassignedSet; - found = isSingleRegister(candidates); - } + found = selector.applySelection(UNASSIGNED, unassignedSet); } - // Apply the COVERS_FULL heuristic. Only applies if we have freeCandidates. - if (!found && (coversFullSet != RBM_NONE)) + // Apply the COVERS_FULL heuristic. + if (!found) { - coversFullSet &= candidates; - if (coversFullSet != RBM_NONE) - { - bestScore |= COVERS_FULL; - candidates = coversFullSet; - found = isSingleRegister(candidates); - } + assert((coversFullSet & freeCandidates) == coversFullSet); + found = selector.applySelection(COVERS_FULL, coversFullSet); } // Apply the BEST_FIT heuristic. Only applies if we have freeCandidates. @@ -3371,17 +3331,18 @@ regNumber LinearScan::allocateReg(Interval* currentInterval, RefPosition* refPos regMaskTP bestFitSet = RBM_NONE; // If the best score includes COVERS_FULL, pick the one that's killed soonest. // If none cover the full range, the BEST_FIT is the one that's killed later. - bool earliestIsBest = ((bestScore & COVERS_FULL) != 0); + bool earliestIsBest = ((selector.score & COVERS_FULL) != 0); LsraLocation bestFitLocation = earliestIsBest ? MaxLocation : MinLocation; - for (regMaskTP bestFitCandidates = candidates; bestFitCandidates != RBM_NONE; ) + for (regMaskTP bestFitCandidates = selector.candidates; bestFitCandidates != RBM_NONE;) { regMaskTP bestFitCandidateBit = genFindLowestBit(bestFitCandidates); bestFitCandidates &= ~bestFitCandidateBit; regNumber bestFitCandidateRegNum = genRegNumFromMask(bestFitCandidateBit); // Find the next RefPosition of the register. - LsraLocation nextIntervalLocation = nextIntervalRef[bestFitCandidateRegNum]; - LsraLocation nextPhysRefLocation = Min(nextFixedRef[bestFitCandidateRegNum], nextIntervalLocation); + LsraLocation nextIntervalLocation = getNextIntervalRef(bestFitCandidateRegNum, regType); + LsraLocation nextPhysRefLocation = getNextFixedRef(bestFitCandidateRegNum, regType); + nextPhysRefLocation = Min(nextPhysRefLocation, nextIntervalLocation); // If the nextPhysRefLocation is a fixedRef for the rangeEndRefPosition, increment it so that // we don't think it isn't covering the live range. // This doesn't handle the case where earlier RefPositions for this Interval are also @@ -3420,28 +3381,20 @@ regNumber LinearScan::allocateReg(Interval* currentInterval, RefPosition* refPos } if (isBetter) { - bestFitSet = bestFitCandidateBit; + bestFitSet = bestFitCandidateBit; bestFitLocation = nextPhysRefLocation; } } } assert(bestFitSet != RBM_NONE); - bestScore |= BEST_FIT; - candidates = bestFitSet; - found = isSingleRegister(candidates); + found = selector.applySelection(BEST_FIT, bestFitSet); } // Apply the IS_PREV_REG heuristic. Only applies if we have freeCandidates. // Oddly, the previous heuristics only considered this if it covered the range. - if ((prevRegRec != nullptr) && ((bestScore & COVERS_FULL) != 0)) + if ((prevRegRec != nullptr) && ((selector.score & COVERS_FULL) != 0)) { - prevRegBit &= candidates; - if (prevRegBit != RBM_NONE) - { - bestScore |= IS_PREV_REG; - candidates = prevRegBit; - found = true; - } + found = selector.applySingleRegSelection(IS_PREV_REG, prevRegBit); } // Apply the REG_ORDER heuristic. Only applies if we have freeCandidates. @@ -3450,26 +3403,26 @@ regNumber LinearScan::allocateReg(Interval* currentInterval, RefPosition* refPos // This will always result in a single candidate. That is, it is the tie-breaker // for free candidates, and doesn't make sense as anything other than the last // heuristic for free registers. - unsigned lowestRegOrder = MAXUINT; + unsigned lowestRegOrder = MAXUINT; regMaskTP lowestRegOrderBit = RBM_NONE; - for (regMaskTP regOrderCandidates = candidates; regOrderCandidates != RBM_NONE; ) + for (regMaskTP regOrderCandidates = selector.candidates; regOrderCandidates != RBM_NONE;) { regMaskTP regOrderCandidateBit = genFindLowestBit(regOrderCandidates); regOrderCandidates &= ~regOrderCandidateBit; regNumber regOrderCandidateRegNum = genRegNumFromMask(regOrderCandidateBit); - unsigned thisRegOrder = getRegisterRecord(regOrderCandidateRegNum)->regOrder; + unsigned thisRegOrder = getRegisterRecord(regOrderCandidateRegNum)->regOrder; if (thisRegOrder < lowestRegOrder) { - lowestRegOrder = thisRegOrder; + lowestRegOrder = thisRegOrder; lowestRegOrderBit = regOrderCandidateBit; } } assert(lowestRegOrderBit != RBM_NONE); - bestScore |= REG_ORDER; - candidates = lowestRegOrderBit; - found = true; + found = selector.applySingleRegSelection(REG_ORDER, lowestRegOrderBit); } + // The set of registers with the lowest spill weight. + regMaskTP lowestCostSpillSet = RBM_NONE; // Apply the SPILL_COST heuristic and eliminate regs that can't be spilled. if (!found) { @@ -3479,14 +3432,12 @@ regNumber LinearScan::allocateReg(Interval* currentInterval, RefPosition* refPos unsigned int bestSpillWeight = BB_MAX_WEIGHT; // True if we found registers with lower spill weight than this refPosition. bool foundLowerSpillWeight = false; - // The set of registers with the lowest spill weight. - regMaskTP lowestCostSpillSet = RBM_NONE; - for (regMaskTP spillCandidates = candidates; spillCandidates != RBM_NONE; ) + for (regMaskTP spillCandidates = selector.candidates; spillCandidates != RBM_NONE;) { regMaskTP spillCandidateBit = genFindLowestBit(spillCandidates); spillCandidates &= ~spillCandidateBit; - regNumber spillCandidateRegNum = genRegNumFromMask(spillCandidateBit); + regNumber spillCandidateRegNum = genRegNumFromMask(spillCandidateBit); RegRecord* spillCandidateRegRecord = &physRegs[spillCandidateRegNum]; // Can and should the interval in this register be spilled for this one, @@ -3510,7 +3461,7 @@ regNumber LinearScan::allocateReg(Interval* currentInterval, RefPosition* refPos #endif if (currentSpillWeight < bestSpillWeight) { - bestSpillWeight = currentSpillWeight; + bestSpillWeight = currentSpillWeight; lowestCostSpillSet = spillCandidateBit; } else if (currentSpillWeight == bestSpillWeight) @@ -3526,19 +3477,21 @@ regNumber LinearScan::allocateReg(Interval* currentInterval, RefPosition* refPos currentInterval->assignedReg = nullptr; return REG_NA; } + } + + if (!found) + { // We must have at least one with the lowest spill cost. assert(lowestCostSpillSet != RBM_NONE); - bestScore |= SPILL_COST; - candidates = lowestCostSpillSet; - found = isSingleRegister(candidates); + found = selector.applySelection(SPILL_COST, lowestCostSpillSet); } // Apply the FAR_NEXT_REF heuristic. if (!found) { LsraLocation farthestLocation = MinLocation; - regMaskTP farthestSet = RBM_NONE; - for (regMaskTP farthestCandidates = candidates; farthestCandidates != RBM_NONE; ) + regMaskTP farthestSet = RBM_NONE; + for (regMaskTP farthestCandidates = selector.candidates; farthestCandidates != RBM_NONE;) { regMaskTP farthestCandidateBit = genFindLowestBit(farthestCandidates); farthestCandidates &= ~farthestCandidateBit; @@ -3546,41 +3499,42 @@ regNumber LinearScan::allocateReg(Interval* currentInterval, RefPosition* refPos // Find the next RefPosition of the register. LsraLocation nextIntervalLocation = nextIntervalRef[farthestCandidateRegNum]; - LsraLocation nextPhysRefLocation = Min(nextFixedRef[farthestCandidateRegNum], nextIntervalLocation); + LsraLocation nextPhysRefLocation = Min(nextFixedRef[farthestCandidateRegNum], nextIntervalLocation); if (nextPhysRefLocation == farthestLocation) { farthestSet |= farthestCandidateBit; } else if (nextPhysRefLocation > farthestLocation) { - farthestSet = farthestCandidateBit; + farthestSet = farthestCandidateBit; farthestLocation = nextPhysRefLocation; } } // We must have at least one with the lowest spill cost. assert(farthestSet != RBM_NONE); - bestScore |= FAR_NEXT_REF; - candidates = farthestSet; - found = isSingleRegister(candidates); + found = selector.applySelection(FAR_NEXT_REF, farthestSet); } // Apply the PREV_REG_OPT heuristic. if (!found) { regMaskTP prevRegOptSet = RBM_NONE; - for (regMaskTP prevRegOptCandidates = candidates; prevRegOptCandidates != RBM_NONE; ) + for (regMaskTP prevRegOptCandidates = selector.candidates; prevRegOptCandidates != RBM_NONE;) { regMaskTP prevRegOptCandidateBit = genFindLowestBit(prevRegOptCandidates); prevRegOptCandidates &= ~prevRegOptCandidateBit; regNumber prevRegOptCandidateRegNum = genRegNumFromMask(prevRegOptCandidateBit); - Interval* assignedInterval = physRegs[prevRegOptCandidateRegNum].assignedInterval; + Interval* assignedInterval = physRegs[prevRegOptCandidateRegNum].assignedInterval; // The assigned should be non-null, and should have a recentRefPosition, however since // this is a heuristic, we don't want a fatal error, so we just assert (not noway_assert). if ((assignedInterval != nullptr) && (assignedInterval->recentRefPosition != nullptr)) { if (assignedInterval->recentRefPosition->reload && assignedInterval->recentRefPosition->RegOptional()) { - prevRegOptSet |= prevRegOptCandidateBit; + // TODO-Cleanup: Previously, we always used the highest regNum with a previous regOptional + // RefPosition, which is not really consistent with the way other selection criteria are applied. + // should probably be: prevRegOptSet |= prevRegOptCandidateBit; + prevRegOptSet = prevRegOptCandidateBit; } } else @@ -3588,24 +3542,19 @@ regNumber LinearScan::allocateReg(Interval* currentInterval, RefPosition* refPos assert(!"Spill candidate has no assignedInterval recentRefPosition"); } } - if (prevRegOptSet != RBM_NONE) - { - bestScore |= PREV_REG_OPT; - candidates = prevRegOptSet; - found = isSingleRegister(candidates); - } + found = selector.applySelection(PREV_REG_OPT, prevRegOptSet); } // Apply the REG_NUM heuristic. if (!found) { - candidates = genFindLowestBit(candidates); + found = selector.applySingleRegSelection(REG_NUM, genFindLowestBit(selector.candidates)); } - assert(isSingleRegister(candidates)); - regMaskTP foundRegBit = candidates; - foundReg = genRegNumFromMask(foundRegBit); - availablePhysRegRecord = getRegisterRecord(foundReg); + assert(found && isSingleRegister(selector.candidates)); + regMaskTP foundRegBit = selector.candidates; + foundReg = genRegNumFromMask(foundRegBit); + availablePhysRegRecord = getRegisterRecord(foundReg); Interval* assignedInterval = availablePhysRegRecord->assignedInterval; if ((assignedInterval != currentInterval) && isAssigned(availablePhysRegRecord ARM_ARG(regType))) { @@ -3640,7 +3589,9 @@ regNumber LinearScan::allocateReg(Interval* currentInterval, RefPosition* refPos // the current interval to a previous assignment, we don't remember the previous interval. // Note that we need to compute this condition before calling unassignPhysReg, which wil reset // assignedInterval->physReg. - bool wasAssigned = (((foundRegBit & unassignedSet) != RBM_NONE) && ((bestScore & THIS_ASSIGNED) == 0) && + regMaskTP freePrefCandidates = (selector.candidates & preferences & freeCandidates); + bool wasThisAssigned = ((prevRegBit & preferences) == foundRegBit); + bool wasAssigned = (((foundRegBit & unassignedSet) != RBM_NONE) && !wasThisAssigned && (assignedInterval != nullptr) && (assignedInterval->physReg == foundReg)); unassignPhysReg(availablePhysRegRecord ARM_ARG(currentInterval->registerType)); if ((matchingConstants & foundRegBit) != RBM_NONE) @@ -3654,7 +3605,7 @@ regNumber LinearScan::allocateReg(Interval* currentInterval, RefPosition* refPos } else { - assert((bestScore & VALUE_AVAILABLE) == 0); + assert((selector.score & VALUE_AVAILABLE) == 0); } } } diff --git a/src/coreclr/src/jit/lsra.h b/src/coreclr/src/jit/lsra.h index 6c3e8de4624a8d..505460942c2022 100644 --- a/src/coreclr/src/jit/lsra.h +++ b/src/coreclr/src/jit/lsra.h @@ -1135,20 +1135,6 @@ class LinearScan : public LinearScanInterface ****************************************************************************/ RegisterType getRegisterType(Interval* currentInterval, RefPosition* refPosition); - regMaskTP applyFreeHeuristic(regMaskTP candidates, var_types regType) - { - regMaskTP result = candidates & m_AvailableRegs; -#ifdef TARGET_ARM - // For TYP_DOUBLE on ARM, we can only use register for which the odd half is - // also available. - if (regType == TYP_DOUBLE) - { - result &= (m_AvailableRegs << 1); - } -#endif // TARGET_ARM - return result; - } - regNumber allocateReg(Interval* current, RefPosition* refPosition); regNumber assignCopyReg(RefPosition* refPosition); @@ -1178,6 +1164,59 @@ class LinearScan : public LinearScanInterface void spillGCRefs(RefPosition* killRefPosition); + /***************************************************************************** + * Register selection + ****************************************************************************/ + regMaskTP getFreeCandidates(regMaskTP candidates, var_types regType) + { + regMaskTP result = candidates & m_AvailableRegs; +#ifdef TARGET_ARM + // For TYP_DOUBLE on ARM, we can only use register for which the odd half is + // also available. + if (regType == TYP_DOUBLE) + { + result &= (m_AvailableRegs >> 1); + } +#endif // TARGET_ARM + return result; + } + + struct registerSelector + { + regMaskTP candidates; + int score; +#ifdef TARGET_ARM + var_types regType; +#endif // TARGET_ARM + + // Apply a simple mask-based selection heuristic, and return 'true' if we now have a single candidate. + bool applySelection(int selectionScore, regMaskTP selectionCandidates) + { + regMaskTP newCandidates = candidates & selectionCandidates; + if (newCandidates != RBM_NONE) + { + score += selectionScore; + candidates = newCandidates; + return isSingleRegister(candidates); + } + return false; + } + + // Select a single register, if it is in the candidate set. + // Return true if so. + bool applySingleRegSelection(int selectionScore, regMaskTP selectionCandidate) + { + assert(isSingleRegister(selectionCandidate)); + regMaskTP newCandidates = candidates & selectionCandidate; + if (newCandidates != RBM_NONE) + { + candidates = newCandidates; + return true; + } + return false; + } + }; + /***************************************************************************** * For Resolution phase ****************************************************************************/ @@ -1583,11 +1622,39 @@ class LinearScan : public LinearScanInterface } regMaskTP getMatchingConstants(regMaskTP mask, Interval* currentInterval, RefPosition* refPosition); - regMaskTP fixedRegs; + regMaskTP fixedRegs; LsraLocation nextFixedRef[REG_COUNT]; void updateNextFixedRef(RegRecord* regRecord, RefPosition* nextRefPosition); + LsraLocation getNextFixedRef(regNumber regNum, var_types regType) + { + LsraLocation loc = nextFixedRef[regNum]; +#ifdef TARGET_ARM + if (regType == TYP_DOUBLE) + { + // The following is what you'd *expect* this to be (or at least some form of combining + // the two), but the existing heuristics always use the location of the odd (second) half. + // loc = Min(loc, nextFixedRef[regNum + 1]); + loc = nextFixedRef[regNum + 1]; + } +#endif + return loc; + } LsraLocation nextIntervalRef[REG_COUNT]; + LsraLocation getNextIntervalRef(regNumber regNum, var_types regType) + { + LsraLocation loc = nextIntervalRef[regNum]; +#ifdef TARGET_ARM + if (regType == TYP_DOUBLE) + { + // The following is what you'd *expect* this to be (or at least some form of combining + // the two), but the existing heuristics always use the location of the odd (second) half. + // loc = Min(loc, nextIntervalRef[regNum + 1]); + loc = nextIntervalRef[regNum + 1]; + } +#endif + return loc; + } unsigned int spillCost[REG_COUNT]; regMaskTP regsBusyUntilKill; diff --git a/src/coreclr/src/jit/lsrabuild.cpp b/src/coreclr/src/jit/lsrabuild.cpp index 5476d9c4d80d1b..15821dc785d12a 100644 --- a/src/coreclr/src/jit/lsrabuild.cpp +++ b/src/coreclr/src/jit/lsrabuild.cpp @@ -1815,13 +1815,13 @@ void LinearScan::buildPhysRegRecords() { regNumber reg = lsraRegOrder[i]; RegRecord* curr = &physRegs[reg]; - curr->regOrder = (unsigned char)i; + curr->regOrder = (unsigned char)i; } for (unsigned int i = 0; i < lsraRegOrderFltSize; i++) { regNumber reg = lsraRegOrderFlt[i]; RegRecord* curr = &physRegs[reg]; - curr->regOrder = (unsigned char)i; + curr->regOrder = (unsigned char)i; } } From be484b4209acdf4eec1907046f63d2494f031993 Mon Sep 17 00:00:00 2001 From: Carol Eidt Date: Mon, 23 Nov 2020 11:37:42 -0800 Subject: [PATCH 05/17] Delete unused `registerIsAvailable` method Fix `nextPhysRefLocation` computation for Arm32 --- src/coreclr/src/jit/lsra.cpp | 50 +++--------------------------------- src/coreclr/src/jit/lsra.h | 4 --- 2 files changed, 3 insertions(+), 51 deletions(-) diff --git a/src/coreclr/src/jit/lsra.cpp b/src/coreclr/src/jit/lsra.cpp index 710bc1564e195c..ea262abe09f7ba 100644 --- a/src/coreclr/src/jit/lsra.cpp +++ b/src/coreclr/src/jit/lsra.cpp @@ -2658,51 +2658,6 @@ bool copyOrMoveRegInUse(RefPosition* ref, LsraLocation loc) return false; } -// Determine whether the register represented by "physRegRecord" is available at least -// at the "currentLoc", and if so, return the next location at which it is in use in -// "nextRefLocationPtr" -// -bool LinearScan::registerIsAvailable(RegRecord* physRegRecord, - LsraLocation currentLoc, - LsraLocation* nextRefLocationPtr, - RegisterType regType) -{ - regNumber reg = physRegRecord->regNum; - LsraLocation nextRefLocation = Min(nextFixedRef[reg], nextIntervalRef[reg]); - assert(!isRegBusy(reg, regType) && !isRegInUse(reg, regType)); - bool isAvailable = ((m_AvailableRegs & genRegMask(reg)) != RBM_NONE); - if (!isAvailable) - { - assert(physRegRecord->assignedInterval != nullptr); - if (!physRegRecord->assignedInterval->isActive) - { - // This must be in use in the current location. - RefPosition* recentRef = physRegRecord->assignedInterval->recentRefPosition; - assert((recentRef->nodeLocation == currentLoc) || - ((recentRef->nodeLocation == (currentLoc - 1)) && recentRef->delayRegFree)); - } - } - *nextRefLocationPtr = nextRefLocation; - -#ifdef TARGET_ARM - if (regType == TYP_DOUBLE) - { - // Recurse, but check the other half this time (TYP_FLOAT) - RegRecord* secondHalfRegRec = getSecondHalfRegRec(physRegRecord); - assert(!isRegBusy(secondHalfRegRec->regNum, TYP_FLOAT) && !isRegInUse(reg, regType)); - if (!registerIsAvailable(secondHalfRegRec, currentLoc, nextRefLocationPtr, TYP_FLOAT)) - return false; - // The above will overwrite the value in *nextRefLocationPtr. We want to keep the nearest location. - if (*nextRefLocationPtr > nextRefLocation) - { - *nextRefLocationPtr = nextRefLocation; - } - } -#endif // TARGET_ARM - - return isAvailable; -} - //------------------------------------------------------------------------ // getRegisterType: Get the RegisterType to use for the given RefPosition // @@ -3223,8 +3178,9 @@ regNumber LinearScan::allocateReg(Interval* currentInterval, RefPosition* refPos if (!found) { // Find the next RefPosition of the register. - LsraLocation nextIntervalLocation = nextIntervalRef[coversCandidateRegNum]; - LsraLocation coversCandidateLocation = Min(nextFixedRef[coversCandidateRegNum], nextIntervalLocation); + LsraLocation nextIntervalLocation = getNextIntervalRef(coversCandidateRegNum, regType); + LsraLocation nextPhysRefLocation = getNextFixedRef(coversCandidateRegNum, regType); + LsraLocation coversCandidateLocation = Min(nextPhysRefLocation, nextIntervalLocation); #ifdef TARGET_ARM if (currentInterval->registerType == TYP_DOUBLE) { diff --git a/src/coreclr/src/jit/lsra.h b/src/coreclr/src/jit/lsra.h index 505460942c2022..265bd690ade583 100644 --- a/src/coreclr/src/jit/lsra.h +++ b/src/coreclr/src/jit/lsra.h @@ -1047,10 +1047,6 @@ class LinearScan : public LinearScanInterface regMaskTP allSIMDRegs(); regMaskTP internalFloatRegCandidates(); - bool registerIsAvailable(RegRecord* physRegRecord, - LsraLocation currentLoc, - LsraLocation* nextRefLocationPtr, - RegisterType regType); void makeRegisterInactive(RegRecord* physRegRecord); void freeRegister(RegRecord* physRegRecord); void freeRegisters(regMaskTP regsToFree); From 04db361df8f35b438e60f9a946ead285af9cd2c3 Mon Sep 17 00:00:00 2001 From: Carol Eidt Date: Tue, 24 Nov 2020 07:29:46 -0800 Subject: [PATCH 06/17] Use `UNIT_MAX` not `MAXUINT` --- src/coreclr/src/jit/lsra.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/coreclr/src/jit/lsra.cpp b/src/coreclr/src/jit/lsra.cpp index ea262abe09f7ba..75bf04502b2cdb 100644 --- a/src/coreclr/src/jit/lsra.cpp +++ b/src/coreclr/src/jit/lsra.cpp @@ -3359,7 +3359,7 @@ regNumber LinearScan::allocateReg(Interval* currentInterval, RefPosition* refPos // This will always result in a single candidate. That is, it is the tie-breaker // for free candidates, and doesn't make sense as anything other than the last // heuristic for free registers. - unsigned lowestRegOrder = MAXUINT; + unsigned lowestRegOrder = UINT_MAX; regMaskTP lowestRegOrderBit = RBM_NONE; for (regMaskTP regOrderCandidates = selector.candidates; regOrderCandidates != RBM_NONE;) { From 81116e94250ff17880735506b465bd07fa210ffc Mon Sep 17 00:00:00 2001 From: Carol Eidt Date: Tue, 24 Nov 2020 17:05:02 -0800 Subject: [PATCH 07/17] clear constantReg when interval is moved --- src/coreclr/src/jit/lsra.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/coreclr/src/jit/lsra.cpp b/src/coreclr/src/jit/lsra.cpp index 75bf04502b2cdb..46a81af0a1c18b 100644 --- a/src/coreclr/src/jit/lsra.cpp +++ b/src/coreclr/src/jit/lsra.cpp @@ -4174,7 +4174,6 @@ void LinearScan::unassignPhysRegNoSpill(RegRecord* regRec) assert(assignedInterval != nullptr && assignedInterval->isActive); assignedInterval->isActive = false; unassignPhysReg(regRec, nullptr); - // makeRegAvailable(regRec->regNum, assignedInterval->registerType); assignedInterval->isActive = true; } @@ -6018,6 +6017,7 @@ void LinearScan::allocateRegisters() { unassignPhysRegNoSpill(physRegRecord); physRegRecord->assignedInterval = nullptr; + clearConstantReg(assignedRegister, currentInterval->registerType); } currentRefPosition->moveReg = true; assignedRegister = REG_NA; From d0d40153cf81e3fe779e3d30c8b792c88af81660 Mon Sep 17 00:00:00 2001 From: Carol Eidt Date: Wed, 25 Nov 2020 10:01:46 -0800 Subject: [PATCH 08/17] Fix merge issues --- src/coreclr/src/jit/lsra.cpp | 16 ++++++++-------- src/coreclr/src/jit/lsra.h | 4 ++-- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/src/coreclr/src/jit/lsra.cpp b/src/coreclr/src/jit/lsra.cpp index 46a81af0a1c18b..3781886929c7c8 100644 --- a/src/coreclr/src/jit/lsra.cpp +++ b/src/coreclr/src/jit/lsra.cpp @@ -352,13 +352,13 @@ void LinearScan::updateSpillCost(regNumber reg, Interval* interval) { // An interval can have no recentRefPosition if this is the initial assignment // of a parameter to its home register. - unsigned int weight = (interval->recentRefPosition != nullptr) ? getWeight(interval->recentRefPosition) : 0; - spillCost[reg] = weight; + float cost = (interval->recentRefPosition != nullptr) ? getWeight(interval->recentRefPosition) : 0; + spillCost[reg] = cost; #ifdef TARGET_ARM if (interval->registerType == TYP_DOUBLE) { regNumber otherReg = REG_NEXT(reg); - spillCost[otherReg] = weight; + spillCost[otherReg] = cost; } #endif } @@ -3383,9 +3383,9 @@ regNumber LinearScan::allocateReg(Interval* currentInterval, RefPosition* refPos if (!found) { // The spill weight for 'refPosition' (the one we're allocating now). - unsigned int thisSpillWeight = getWeight(refPosition); + float thisSpillWeight = getWeight(refPosition); // The spill weight for the best candidate we've found so far. - unsigned int bestSpillWeight = BB_MAX_WEIGHT; + float bestSpillWeight = BB_MAX_WEIGHT; // True if we found registers with lower spill weight than this refPosition. bool foundLowerSpillWeight = false; @@ -3408,7 +3408,7 @@ regNumber LinearScan::allocateReg(Interval* currentInterval, RefPosition* refPos continue; } - unsigned currentSpillWeight = spillCost[spillCandidateRegNum]; + float currentSpillWeight = spillCost[spillCandidateRegNum]; #ifdef TARGET_ARM if (currentInterval->registerType == TYP_DOUBLE) { @@ -3610,11 +3610,11 @@ bool LinearScan::canSpillReg(RegRecord* physRegRecord, LsraLocation refLocation) // // Note: This helper is designed to be used only from allocateReg() and getDoubleSpillWeight() // -unsigned LinearScan::getSpillWeight(RegRecord* physRegRecord) +float LinearScan::getSpillWeight(RegRecord* physRegRecord) { assert(physRegRecord->assignedInterval != nullptr); RefPosition* recentAssignedRef = physRegRecord->assignedInterval->recentRefPosition; - unsigned weight = BB_ZERO_WEIGHT; + float weight = BB_ZERO_WEIGHT; // We shouldn't call this method if there is no recentAssignedRef. assert(recentAssignedRef != nullptr); diff --git a/src/coreclr/src/jit/lsra.h b/src/coreclr/src/jit/lsra.h index 265bd690ade583..9545ba2a39fcc9 100644 --- a/src/coreclr/src/jit/lsra.h +++ b/src/coreclr/src/jit/lsra.h @@ -977,7 +977,7 @@ class LinearScan : public LinearScanInterface bool isAssignedToInterval(Interval* interval, RegRecord* regRec); bool isRefPositionActive(RefPosition* refPosition, LsraLocation refLocation); bool canSpillReg(RegRecord* physRegRecord, LsraLocation refLocation); - unsigned getSpillWeight(RegRecord* physRegRecord); + float getSpillWeight(RegRecord* physRegRecord); bool isRegInUse(RegRecord* regRec, RefPosition* refPosition); // insert refpositions representing prolog zero-inits which will be added later @@ -1651,7 +1651,7 @@ class LinearScan : public LinearScanInterface #endif return loc; } - unsigned int spillCost[REG_COUNT]; + float spillCost[REG_COUNT]; regMaskTP regsBusyUntilKill; regMaskTP regsInUseThisLocation; From a9820c8d6826c187b420c33aca3f9b3d62a2361b Mon Sep 17 00:00:00 2001 From: Carol Eidt Date: Mon, 30 Nov 2020 08:23:51 -0800 Subject: [PATCH 09/17] Fix weight --- src/coreclr/src/jit/lsra.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/coreclr/src/jit/lsra.cpp b/src/coreclr/src/jit/lsra.cpp index 3781886929c7c8..488a60bc9fa53c 100644 --- a/src/coreclr/src/jit/lsra.cpp +++ b/src/coreclr/src/jit/lsra.cpp @@ -3385,7 +3385,7 @@ regNumber LinearScan::allocateReg(Interval* currentInterval, RefPosition* refPos // The spill weight for 'refPosition' (the one we're allocating now). float thisSpillWeight = getWeight(refPosition); // The spill weight for the best candidate we've found so far. - float bestSpillWeight = BB_MAX_WEIGHT; + float bestSpillWeight = FloatingPointUtils::infinite_float(); // True if we found registers with lower spill weight than this refPosition. bool foundLowerSpillWeight = false; From 68c914cbf89949d1b7e5db3fb722025ef83733b5 Mon Sep 17 00:00:00 2001 From: Carol Eidt Date: Tue, 1 Dec 2020 11:01:42 -0800 Subject: [PATCH 10/17] Fix spill cost update for UpperVectors --- src/coreclr/src/jit/lsra.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/coreclr/src/jit/lsra.cpp b/src/coreclr/src/jit/lsra.cpp index 488a60bc9fa53c..8e6da9aba1c071 100644 --- a/src/coreclr/src/jit/lsra.cpp +++ b/src/coreclr/src/jit/lsra.cpp @@ -4067,13 +4067,13 @@ void LinearScan::setIntervalAsSpilled(Interval* interval) // Now we need to mark the local as spilled also, even if the lower half is never spilled, // as this will use the upper part of its home location. interval = interval->relatedInterval; - // We'll now mark this as spilled, so it changes the spillCost for its recent ref, if any. + // We'll now mark this as spilled, so it changes the spillCost. RefPosition* recentRefPos = interval->recentRefPosition; if (!interval->isSpilled && interval->isActive && (recentRefPos != nullptr)) { VarSetOps::AddElemD(compiler, splitOrSpilledVars, interval->getVarIndex(compiler)); interval->isSpilled = true; - regNumber reg = interval->recentRefPosition->assignedReg(); + regNumber reg = interval->physReg; spillCost[reg] = getSpillWeight(getRegisterRecord(reg)); } } From 499514936cf65bcf79326e45b2ce48d57a3fa20f Mon Sep 17 00:00:00 2001 From: Carol Eidt Date: Tue, 1 Dec 2020 14:33:00 -0800 Subject: [PATCH 11/17] Fix a bug in reload of multireg call on Arm64 --- src/coreclr/src/jit/codegenarmarch.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/coreclr/src/jit/codegenarmarch.cpp b/src/coreclr/src/jit/codegenarmarch.cpp index d51353f48aa30b..6fef0bbea88bb2 100644 --- a/src/coreclr/src/jit/codegenarmarch.cpp +++ b/src/coreclr/src/jit/codegenarmarch.cpp @@ -1405,7 +1405,7 @@ void CodeGen::genMultiRegStoreToSIMDLocal(GenTreeLclVar* lclNode) for (int i = regCount - 1; i >= 0; --i) { var_types type = op1->gtSkipReloadOrCopy()->GetRegTypeByIndex(i); - regNumber reg = op1->GetRegByIndex(i); + regNumber reg = actualOp1->GetRegByIndex(i); if (op1->IsCopyOrReload()) { // GT_COPY/GT_RELOAD will have valid reg for those positions From 52735ccd703dc477c086d42ef6d69d95fc23ca4a Mon Sep 17 00:00:00 2001 From: Carol Eidt Date: Thu, 3 Dec 2020 12:54:14 -0800 Subject: [PATCH 12/17] FixedReg fixes --- src/coreclr/src/jit/lsra.cpp | 6 +++--- src/coreclr/src/jit/lsrabuild.cpp | 4 ---- 2 files changed, 3 insertions(+), 7 deletions(-) diff --git a/src/coreclr/src/jit/lsra.cpp b/src/coreclr/src/jit/lsra.cpp index 8e6da9aba1c071..f73c0f1bc83149 100644 --- a/src/coreclr/src/jit/lsra.cpp +++ b/src/coreclr/src/jit/lsra.cpp @@ -2827,8 +2827,9 @@ regNumber LinearScan::allocateReg(Interval* currentInterval, RefPosition* refPos // If there is another fixed reference to this register before the use, change the candidates // on this RefPosition to include that of nextRefPos. - if (currFixedRegRefPosition->nextRefPosition != nullptr && - currFixedRegRefPosition->nextRefPosition->nodeLocation <= nextRefPos->getRefEndLocation()) + RefPosition* nextFixedRegRefPosition = defRegRecord->getNextRefPosition(); + if (nextFixedRegRefPosition != nullptr && + nextFixedRegRefPosition->nodeLocation <= nextRefPos->getRefEndLocation()) { candidates |= nextRefPos->registerAssignment; if (preferences == refPosition->registerAssignment) @@ -5721,7 +5722,6 @@ void LinearScan::allocateRegisters() assert(currentRefPosition->isIntervalRef()); currentInterval = currentRefPosition->getInterval(); assert(currentInterval != nullptr); - assert(currentRefPosition->isFixedRegRef == isSingleRegister(currentRefPosition->registerAssignment)); assignedRegister = currentInterval->physReg; // Identify the special cases where we decide up-front not to allocate diff --git a/src/coreclr/src/jit/lsrabuild.cpp b/src/coreclr/src/jit/lsrabuild.cpp index 15821dc785d12a..32c72e287b24e5 100644 --- a/src/coreclr/src/jit/lsrabuild.cpp +++ b/src/coreclr/src/jit/lsrabuild.cpp @@ -314,7 +314,6 @@ void LinearScan::resolveConflictingDefAndUse(Interval* interval, RefPosition* de // This is case #2. Use the useRegAssignment INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_DEFUSE_CASE2, interval)); defRefPosition->registerAssignment = useRegAssignment; - defRefPosition->isFixedRegRef = useRefPosition->isFixedRegRef; return; } } @@ -328,7 +327,6 @@ void LinearScan::resolveConflictingDefAndUse(Interval* interval, RefPosition* de // This is case #3. INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_DEFUSE_CASE3, interval)); defRefPosition->registerAssignment = useRegAssignment; - defRefPosition->isFixedRegRef = useRefPosition->isFixedRegRef; return; } if (useRegRecord != nullptr && !defRegConflict && canChangeUseAssignment) @@ -336,7 +334,6 @@ void LinearScan::resolveConflictingDefAndUse(Interval* interval, RefPosition* de // This is case #4. INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_DEFUSE_CASE4, interval)); useRefPosition->registerAssignment = defRegAssignment; - useRefPosition->isFixedRegRef = defRefPosition->isFixedRegRef; return; } if (defRegRecord != nullptr && useRegRecord != nullptr) @@ -424,7 +421,6 @@ void LinearScan::checkConflictingDefUse(RefPosition* useRP) if (!isSingleRegister(newAssignment) || !theInterval->hasInterferingUses) { defRP->registerAssignment = newAssignment; - defRP->isFixedRegRef = isSingleRegister(newAssignment); } } else From c7b88736ccb2e6e09b30e8ae6cbbc4a28664ea56 Mon Sep 17 00:00:00 2001 From: Carol Eidt Date: Thu, 3 Dec 2020 18:10:08 -0800 Subject: [PATCH 13/17] Review feedback and other cleanup --- src/coreclr/src/jit/lsra.cpp | 156 ++++++----------------------------- src/coreclr/src/jit/lsra.h | 23 +----- 2 files changed, 25 insertions(+), 154 deletions(-) diff --git a/src/coreclr/src/jit/lsra.cpp b/src/coreclr/src/jit/lsra.cpp index f73c0f1bc83149..54f2fdab204df5 100644 --- a/src/coreclr/src/jit/lsra.cpp +++ b/src/coreclr/src/jit/lsra.cpp @@ -3182,14 +3182,7 @@ regNumber LinearScan::allocateReg(Interval* currentInterval, RefPosition* refPos LsraLocation nextIntervalLocation = getNextIntervalRef(coversCandidateRegNum, regType); LsraLocation nextPhysRefLocation = getNextFixedRef(coversCandidateRegNum, regType); LsraLocation coversCandidateLocation = Min(nextPhysRefLocation, nextIntervalLocation); -#ifdef TARGET_ARM - if (currentInterval->registerType == TYP_DOUBLE) - { - LsraLocation otherNextPhysRefLocation = - Min(nextFixedRef[coversCandidateRegNum + 1], nextIntervalRef[coversCandidateRegNum + 1]); - coversCandidateLocation = Min(coversCandidateLocation, otherNextPhysRefLocation); - } -#endif // TARGET_ARM + // If the nextPhysRefLocation is a fixedRef for the rangeEndRefPosition, increment it so that // we don't think it isn't covering the live range. // This doesn't handle the case where earlier RefPositions for this Interval are also @@ -3399,7 +3392,7 @@ regNumber LinearScan::allocateReg(Interval* currentInterval, RefPosition* refPos // Can and should the interval in this register be spilled for this one, // if we don't find a better alternative? - if ((getNextIntervalRefLocation(spillCandidateRegNum ARM_ARG(regType)) == currentLocation) && + if ((getNextIntervalRef(spillCandidateRegNum, regType) == currentLocation) && !spillCandidateRegRecord->assignedInterval->getNextRefPosition()->RegOptional()) { continue; @@ -3640,10 +3633,6 @@ float LinearScan::getSpillWeight(RegRecord* physRegRecord) bool LinearScan::canSpillDoubleReg(RegRecord* physRegRecord, LsraLocation refLocation) { assert(genIsValidDoubleReg(physRegRecord->regNum)); - bool retVal = true; - BasicBlock::weight_t weight = BB_ZERO_WEIGHT; - BasicBlock::weight_t weight2 = BB_ZERO_WEIGHT; - RegRecord* physRegRecord2 = getSecondHalfRegRec(physRegRecord); if ((physRegRecord->assignedInterval != nullptr) && !canSpillReg(physRegRecord, refLocation)) @@ -3730,76 +3719,6 @@ bool LinearScan::isRefPositionActive(RefPosition* refPosition, LsraLocation refL ((refPosition->nodeLocation + 1 == refLocation) && refPosition->delayRegFree)); } -//---------------------------------------------------------------------------------------- -// isRegInUse: Test whether regRec is being used at the refPosition -// -// Arguments: -// regRec - A register to be tested -// refPosition - RefPosition where regRec is tested -// -// Return Value: -// True - if regRec is being used -// False - otherwise -// -// Notes: -// This helper is designed to be used only from allocateReg(). -// The caller must have already checked for the case where 'refPosition' is a fixed ref. -// (asserted at the beginning of this method). -// -bool LinearScan::isRegInUse(RegRecord* regRec, RefPosition* refPosition) -{ - // We shouldn't reach this check if 'refPosition' is a FixedReg of this register. - assert(!refPosition->isFixedRefOfReg(regRec->regNum)); - // We shouldn't call this method if there's no currently assigned Interval. - RegisterType regType = refPosition->getInterval()->registerType; - if (!isRegInUse(regRec->regNum, regType)) - { - return false; - } - - // We should never spill a register that's occupied by an Interval with its next use at the current - // location. - // TODO: We should have checked this already? - Interval* assignedInterval = regRec->assignedInterval; -#ifdef TARGET_ARM - if (regType == TYP_DOUBLE) - { - RegRecord* otherRegRecord = getSecondHalfRegRec(regRec); - if (assignedInterval != nullptr) - { - if (assignedInterval->isActive && (nextIntervalRef[regRec->regNum] <= refPosition->getRangeEndLocation())) - { - RefPosition* nextAssignedRef = assignedInterval->getNextRefPosition(); - if (!nextAssignedRef->RegOptional()) - { - return true; - } - } - if (otherRegRecord == nullptr) - { - return false; - } - } - else - { - assert(otherRegRecord->assignedInterval != nullptr); - } - assignedInterval = otherRegRecord->assignedInterval; - regRec = otherRegRecord; - } -#endif - assert(assignedInterval != nullptr); - if (assignedInterval->isActive && (nextIntervalRef[regRec->regNum] <= refPosition->getRangeEndLocation())) - { - RefPosition* nextAssignedRef = assignedInterval->getNextRefPosition(); - if (!nextAssignedRef->RegOptional()) - { - return true; - } - } - return false; -} - //------------------------------------------------------------------------ // isSpillCandidate: Determine if a register is a spill candidate for a given RefPosition. // @@ -3890,9 +3809,7 @@ regNumber LinearScan::assignCopyReg(RefPosition* refPosition) } //------------------------------------------------------------------------ -// isAssigned: This is the function to check if the given RegRecord has an assignedInterval -// regardless of lastLocation. -// So it would be call isAssigned() with Maxlocation value. +// isAssigned: This is the function to check if the given RegRecord has an assignedInterval. // // Arguments: // regRec - The RegRecord to check that it is assigned. @@ -3901,9 +3818,6 @@ regNumber LinearScan::assignCopyReg(RefPosition* refPosition) // Return Value: // Returns true if the given RegRecord has an assignedInterval. // -// Notes: -// There is the case to check if the RegRecord has an assignedInterval regardless of Lastlocation. -// bool LinearScan::isAssigned(RegRecord* regRec ARM_ARG(RegisterType newRegType)) { if (regRec->assignedInterval != nullptr) @@ -3925,30 +3839,13 @@ bool LinearScan::isAssigned(RegRecord* regRec ARM_ARG(RegisterType newRegType)) } //------------------------------------------------------------------------ -// isAssigned: Check whether the given RegRecord has an assignedInterval -// that has a reference prior to the given location. +// checkAndAssignInterval: Check if the interval is already assigned and +// if it is then unassign the physical record +// and set the assignedInterval to 'interval' // // Arguments: // regRec - The RegRecord of interest -// lastLocation - The LsraLocation up to which we want to check -// newRegType - The `RegisterType` of interval we want to check -// (this is for the purposes of checking the other half of a TYP_DOUBLE RegRecord) -// -// Return value: -// Returns true if the given RegRecord (and its other half, if TYP_DOUBLE) has an assignedInterval -// that is referenced prior to the given location -// -// Notes: -// The register is not considered to be assigned if it has no assignedInterval, or that Interval's -// next reference is beyond lastLocation -// -bool LinearScan::isAssigned(RegRecord* regRec, LsraLocation lastLocation ARM_ARG(RegisterType newRegType)) -{ - return getNextIntervalRefLocation(regRec->regNum ARM_ARG(newRegType)) > lastLocation; -} - -// Check if the interval is already assigned and if it is then unassign the physical record -// then set the assignedInterval to 'interval' +// interval - The Interval that we're going to assign to 'regRec' // void LinearScan::checkAndAssignInterval(RegRecord* regRec, Interval* interval) { @@ -4338,7 +4235,7 @@ void LinearScan::unassignPhysReg(RegRecord* regRec, RefPosition* spillRefPositio clearSpillCost(thisRegNum, assignedInterval->registerType); checkAndClearInterval(regRec, spillRefPosition); } - makeRegAvailable(thisRegNum, assignedInterval->registerType); + makeRegAvailable(regToUnassign, assignedInterval->registerType); RefPosition* nextRefPosition = nullptr; if (spillRefPosition != nullptr) @@ -5102,8 +4999,7 @@ void LinearScan::processBlockStartLocations(BasicBlock* currentBlock) #ifdef TARGET_ARM else { - RegRecord* physRegRecord = getRegisterRecord(reg); - Interval* assignedInterval = physRegRecord->assignedInterval; + Interval* assignedInterval = physRegRecord->assignedInterval; if (assignedInterval != nullptr && assignedInterval->registerType == TYP_DOUBLE) { @@ -5549,11 +5445,6 @@ void LinearScan::allocateRegisters() assert((recentRefPosition == nullptr) || (spillCost[reg] == getSpillWeight(physRegRecord))); } - else - { - assert(isRegAvailable(reg, assignedInterval->registerType)); - assert(spillCost[reg] == 0); - } } else { @@ -5561,14 +5452,18 @@ void LinearScan::allocateRegisters() isRegBusy(reg, assignedInterval->registerType)); } } - else if ((assignedInterval->physReg == reg) && !assignedInterval->isConstant) - // else if (!assignedInterval->isConstant) - { - assert(nextIntervalRef[reg] == assignedInterval->getNextRefLocation()); - } else { - assert(nextIntervalRef[reg] == MaxLocation); + if ((assignedInterval->physReg == reg) && !assignedInterval->isConstant) + { + assert(nextIntervalRef[reg] == assignedInterval->getNextRefLocation()); + } + else + { + assert(nextIntervalRef[reg] == MaxLocation); + assert(isRegAvailable(reg, assignedInterval->registerType)); + assert(spillCost[reg] == 0); + } } } } @@ -6220,18 +6115,14 @@ void LinearScan::allocateRegisters() } // If we allocated a register, record it - assert(currentInterval != nullptr); if (assignedRegister != REG_NA) { assignedRegBit = genRegMask(assignedRegister); regMaskTP regMask = getRegMask(assignedRegister, currentInterval->registerType); - // if (!RefTypeIsDef(refType)) + regsInUseThisLocation |= regMask; + if (currentRefPosition->delayRegFree) { - regsInUseThisLocation |= regMask; - if (currentRefPosition->delayRegFree) - { - regsInUseNextLocation |= regMask; - } + regsInUseNextLocation |= regMask; } currentRefPosition->registerAssignment = assignedRegBit; @@ -6460,8 +6351,7 @@ void LinearScan::updateAssignedInterval(RegRecord* reg, Interval* interval, Regi regNumber doubleReg = REG_NA; if (regType == TYP_DOUBLE) { - doubleReg = reg->regNum; - assert(genIsValidDoubleReg(doubleReg)); + doubleReg = reg->regNum; RegRecord* anotherHalfReg = getSecondHalfRegRec(reg); anotherHalfReg->assignedInterval = interval; } diff --git a/src/coreclr/src/jit/lsra.h b/src/coreclr/src/jit/lsra.h index 9545ba2a39fcc9..a89a008dd60261 100644 --- a/src/coreclr/src/jit/lsra.h +++ b/src/coreclr/src/jit/lsra.h @@ -978,7 +978,6 @@ class LinearScan : public LinearScanInterface bool isRefPositionActive(RefPosition* refPosition, LsraLocation refLocation); bool canSpillReg(RegRecord* physRegRecord, LsraLocation refLocation); float getSpillWeight(RegRecord* physRegRecord); - bool isRegInUse(RegRecord* regRec, RefPosition* refPosition); // insert refpositions representing prolog zero-inits which will be added later void insertZeroInitRefPositions(); @@ -1144,7 +1143,6 @@ class LinearScan : public LinearScanInterface } bool isAssigned(RegRecord* regRec ARM_ARG(RegisterType newRegType)); - bool isAssigned(RegRecord* regRec, LsraLocation lastLocation ARM_ARG(RegisterType newRegType)); void checkAndClearInterval(RegRecord* regRec, RefPosition* spillRefPosition); void unassignPhysReg(RegRecord* regRec ARM_ARG(RegisterType newRegType)); void unassignPhysReg(RegRecord* regRec, RefPosition* spillRefPosition); @@ -1586,17 +1584,6 @@ class LinearScan : public LinearScanInterface void clearNextIntervalRef(regNumber reg, var_types regType); void updateNextIntervalRef(regNumber reg, Interval* interval); - LsraLocation getNextIntervalRefLocation(regNumber reg ARM_ARG(var_types theRegType)) - { - LsraLocation nextLocation = nextIntervalRef[reg]; -#ifdef TARGET_ARM - if (theRegType == TYP_DOUBLE) - { - nextLocation = Min(nextLocation, nextIntervalRef[REG_NEXT(reg)]); - } -#endif - return nextLocation; - } void clearSpillCost(regNumber reg, var_types regType); void updateSpillCost(regNumber reg, Interval* interval); @@ -1627,10 +1614,7 @@ class LinearScan : public LinearScanInterface #ifdef TARGET_ARM if (regType == TYP_DOUBLE) { - // The following is what you'd *expect* this to be (or at least some form of combining - // the two), but the existing heuristics always use the location of the odd (second) half. - // loc = Min(loc, nextFixedRef[regNum + 1]); - loc = nextFixedRef[regNum + 1]; + loc = Min(loc, nextFixedRef[regNum + 1]); } #endif return loc; @@ -1643,10 +1627,7 @@ class LinearScan : public LinearScanInterface #ifdef TARGET_ARM if (regType == TYP_DOUBLE) { - // The following is what you'd *expect* this to be (or at least some form of combining - // the two), but the existing heuristics always use the location of the odd (second) half. - // loc = Min(loc, nextIntervalRef[regNum + 1]); - loc = nextIntervalRef[regNum + 1]; + loc = Min(loc, nextIntervalRef[regNum + 1]); } #endif return loc; From 4a55595d8bd118e088485a6dc60e8d84e46de216 Mon Sep 17 00:00:00 2001 From: Carol Eidt Date: Fri, 4 Dec 2020 09:50:02 -0800 Subject: [PATCH 14/17] Call clearConstanReg before nulling out the interval, so that we can correctly handling doubles on ARM --- src/coreclr/src/jit/lsra.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/coreclr/src/jit/lsra.cpp b/src/coreclr/src/jit/lsra.cpp index 54f2fdab204df5..727279ecc816a9 100644 --- a/src/coreclr/src/jit/lsra.cpp +++ b/src/coreclr/src/jit/lsra.cpp @@ -5565,6 +5565,7 @@ void LinearScan::allocateRegisters() { if (assignedInterval != nullptr && !assignedInterval->isActive && assignedInterval->isConstant) { + clearConstantReg(regRecord->regNum, assignedInterval->registerType); regRecord->assignedInterval = nullptr; spillCost[regRecord->regNum] = 0; From 080b9e4482235e1bdf0711dd21bd8d4a05055165 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Fri, 4 Dec 2020 15:45:21 -0800 Subject: [PATCH 15/17] Minor cleanup --- src/coreclr/src/jit/lsra.cpp | 85 ++++++++++--------- src/coreclr/src/jit/lsrabuild.cpp | 7 +- .../JitBlue/GitHub_13735/GitHub_13735.cs | 4 +- 3 files changed, 49 insertions(+), 47 deletions(-) diff --git a/src/coreclr/src/jit/lsra.cpp b/src/coreclr/src/jit/lsra.cpp index 727279ecc816a9..76d069d8024aad 100644 --- a/src/coreclr/src/jit/lsra.cpp +++ b/src/coreclr/src/jit/lsra.cpp @@ -518,8 +518,7 @@ bool LinearScan::conflictingFixedRegReference(regNumber regNum, RefPosition* ref // Otherwise, check for conflicts. // There is a conflict if: - // 1. There is a recent RefPosition on this RegRecord that is at this location, - // except in the case where it is a special "putarg" that is associated with this interval, OR + // 1. There is a recent RefPosition on this RegRecord that is at this location, OR // 2. There is an upcoming RefPosition at this location, or at the next location // if refPosition is a delayed use (i.e. must be kept live through the next/def location). @@ -2714,46 +2713,49 @@ bool LinearScan::isMatchingConstant(RegRecord* physRegRecord, RefPosition* refPo GenTree* otherTreeNode = physRegRecord->assignedInterval->firstRefPosition->treeNode; noway_assert(otherTreeNode != nullptr); - if (refPosition->treeNode->OperGet() == otherTreeNode->OperGet()) + if (refPosition->treeNode->OperGet() != otherTreeNode->OperGet()) { - switch (otherTreeNode->OperGet()) + return false; + } + + switch (otherTreeNode->OperGet()) + { + case GT_CNS_INT: { - case GT_CNS_INT: + ssize_t v1 = refPosition->treeNode->AsIntCon()->IconValue(); + ssize_t v2 = otherTreeNode->AsIntCon()->IconValue(); + if ((v1 == v2) && (varTypeGCtype(refPosition->treeNode) == varTypeGCtype(otherTreeNode) || v1 == 0)) { - ssize_t v1 = refPosition->treeNode->AsIntCon()->IconValue(); - ssize_t v2 = otherTreeNode->AsIntCon()->IconValue(); - if ((v1 == v2) && (varTypeGCtype(refPosition->treeNode) == varTypeGCtype(otherTreeNode) || v1 == 0)) - { #ifdef TARGET_64BIT - // If the constant is negative, only reuse registers of the same type. - // This is because, on a 64-bit system, we do not sign-extend immediates in registers to - // 64-bits unless they are actually longs, as this requires a longer instruction. - // This doesn't apply to a 32-bit system, on which long values occupy multiple registers. - // (We could sign-extend, but we would have to always sign-extend, because if we reuse more - // than once, we won't have access to the instruction that originally defines the constant). - if ((refPosition->treeNode->TypeGet() == otherTreeNode->TypeGet()) || (v1 >= 0)) + // If the constant is negative, only reuse registers of the same type. + // This is because, on a 64-bit system, we do not sign-extend immediates in registers to + // 64-bits unless they are actually longs, as this requires a longer instruction. + // This doesn't apply to a 32-bit system, on which long values occupy multiple registers. + // (We could sign-extend, but we would have to always sign-extend, because if we reuse more + // than once, we won't have access to the instruction that originally defines the constant). + if ((refPosition->treeNode->TypeGet() == otherTreeNode->TypeGet()) || (v1 >= 0)) #endif // TARGET_64BIT - { - return true; - } - } - break; - } - case GT_CNS_DBL: - { - // For floating point constants, the values must be identical, not simply compare - // equal. So we compare the bits. - if (refPosition->treeNode->AsDblCon()->isBitwiseEqual(otherTreeNode->AsDblCon()) && - (refPosition->treeNode->TypeGet() == otherTreeNode->TypeGet())) { return true; } - break; } - default: - break; + break; + } + case GT_CNS_DBL: + { + // For floating point constants, the values must be identical, not simply compare + // equal. So we compare the bits. + if (refPosition->treeNode->AsDblCon()->isBitwiseEqual(otherTreeNode->AsDblCon()) && + (refPosition->treeNode->TypeGet() == otherTreeNode->TypeGet())) + { + return true; + } + break; } + default: + break; } + return false; } @@ -2771,8 +2773,7 @@ bool LinearScan::isMatchingConstant(RegRecord* physRegRecord, RefPosition* refPo // no free register or registers with lower-weight Intervals that can be spilled. // // Notes: -// This method will prefer to allocate a free register, but if none are available, or if -// this is not a last use and all availalbe registers will be killed prior to the next use, +// This method will prefer to allocate a free register, but if none are available, // it will look for a lower-weight Interval to spill. // Weight and farthest distance of next reference are used to determine whether an Interval // currently occupying a register should be spilled. It will be spilled either: @@ -3008,7 +3009,7 @@ regNumber LinearScan::allocateReg(Interval* currentInterval, RefPosition* refPos FREE = 0x10000, // It is not currently assigned to an *active* interval // These are the original criteria for comparing registers that are free. - VALUE_AVAILABLE = 0x8000, // It is a constant value that is already in an acceptable register. + CONST_AVAILABLE = 0x8000, // It is a constant value that is already in an acceptable register. THIS_ASSIGNED = 0x4000, // It is in the interval's preference set and it is already assigned to this interval. COVERS = 0x2000, // It is in the interval's preference set and it covers the current range. OWN_PREFERENCE = 0x1000, // It is in the preference set of this interval. @@ -3016,15 +3017,17 @@ regNumber LinearScan::allocateReg(Interval* currentInterval, RefPosition* refPos RELATED_PREFERENCE = 0x0400, // It is in the preference set of the related interval. CALLER_CALLEE = 0x0200, // It is in the right "set" for the interval (caller or callee-save). UNASSIGNED = 0x0100, // It is not currently assigned to any (active or inactive) interval - COVERS_FULL = 0x0080, - BEST_FIT = 0x0040, - IS_PREV_REG = 0x0020, - REG_ORDER = 0x0010, + COVERS_FULL = 0x0080, // It covers the full range of the interval from current position to the end. + BEST_FIT = 0x0040, // The available range is the closest match to the full range of the interval. + IS_PREV_REG = 0x0020, // This register was previously assigned to the interval. + REG_ORDER = 0x0010, // Tie-breaker // These are the original criteria for comparing registers that are in use. SPILL_COST = 0x0008, // It has the lowest cost of all the candidates. FAR_NEXT_REF = 0x0004, // It has a farther next reference than the best candidate thus far. PREV_REG_OPT = 0x0002, // The previous RefPosition of its current assigned interval is RegOptional. + + // TODO-CQ: Consider using REG_ORDER as a tie-breaker even for busy registers. REG_NUM = 0x0001, // It has a lower register number. }; @@ -3141,14 +3144,14 @@ regNumber LinearScan::allocateReg(Interval* currentInterval, RefPosition* refPos found = selector.applySelection(FREE, freeCandidates); } - // Apply the VALUE_AVAILABLE (matching constant) heuristic. Only applies if we have freeCandidates. + // Apply the CONST_AVAILABLE (matching constant) heuristic. Only applies if we have freeCandidates. // Note that we always need to define the 'matchingConstants' set. if (freeCandidates != RBM_NONE) { if (currentInterval->isConstant && RefTypeIsDef(refPosition->refType)) { matchingConstants = getMatchingConstants(selector.candidates, currentInterval, refPosition); - found = selector.applySelection(VALUE_AVAILABLE, matchingConstants); + found = selector.applySelection(CONST_AVAILABLE, matchingConstants); } } @@ -3555,7 +3558,7 @@ regNumber LinearScan::allocateReg(Interval* currentInterval, RefPosition* refPos } else { - assert((selector.score & VALUE_AVAILABLE) == 0); + assert((selector.score & CONST_AVAILABLE) == 0); } } } diff --git a/src/coreclr/src/jit/lsrabuild.cpp b/src/coreclr/src/jit/lsrabuild.cpp index 32c72e287b24e5..a0cbf2d5f24a9d 100644 --- a/src/coreclr/src/jit/lsrabuild.cpp +++ b/src/coreclr/src/jit/lsrabuild.cpp @@ -1792,14 +1792,14 @@ void LinearScan::buildRefPositionsForNode(GenTree* tree, BasicBlock* block, Lsra JITDUMP("\n"); } -//------------------------------------------------------------------------ -// buildPhysRegRecords: Make an interval for each physical register -// static const regNumber lsraRegOrder[] = {REG_VAR_ORDER}; const unsigned lsraRegOrderSize = ArrLen(lsraRegOrder); static const regNumber lsraRegOrderFlt[] = {REG_VAR_ORDER_FLT}; const unsigned lsraRegOrderFltSize = ArrLen(lsraRegOrderFlt); +//------------------------------------------------------------------------ +// buildPhysRegRecords: Make an interval for each physical register +// void LinearScan::buildPhysRegRecords() { for (regNumber reg = REG_FIRST; reg < ACTUAL_REG_COUNT; reg = REG_NEXT(reg)) @@ -3068,6 +3068,7 @@ int LinearScan::BuildDelayFreeUses(GenTree* node, GenTree* rmwNode, regMaskTP ca rmwInterval = getIntervalForLocalVarNode(rmwNode->AsLclVar()); // Note: we don't handle multi-reg vars here. It's not clear that there are any cases // where we'd encounter a multi-reg var in an RMW context. + assert(!rmwNode->AsLclVar()->IsMultiReg()); rmwIsLastUse = rmwNode->AsLclVar()->IsLastUse(0); } if (!node->isContained()) diff --git a/src/coreclr/tests/src/JIT/Regression/JitBlue/GitHub_13735/GitHub_13735.cs b/src/coreclr/tests/src/JIT/Regression/JitBlue/GitHub_13735/GitHub_13735.cs index 1a586001776eba..53081061bb0775 100644 --- a/src/coreclr/tests/src/JIT/Regression/JitBlue/GitHub_13735/GitHub_13735.cs +++ b/src/coreclr/tests/src/JIT/Regression/JitBlue/GitHub_13735/GitHub_13735.cs @@ -15,9 +15,7 @@ static int GetRandom() { } [MethodImpl(MethodImplOptions.NoInlining)] - static void Print(int x) { - // Console.WriteLine(x); - } + static void Print(int x) { } static void SampleA() { From e3bfebee50bf33607b57b38a063b2de9b00d93f4 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Fri, 4 Dec 2020 17:55:00 -0800 Subject: [PATCH 16/17] Formatting changes --- src/coreclr/src/jit/lsra.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/coreclr/src/jit/lsra.cpp b/src/coreclr/src/jit/lsra.cpp index 76d069d8024aad..2bf7c3b61b6dd1 100644 --- a/src/coreclr/src/jit/lsra.cpp +++ b/src/coreclr/src/jit/lsra.cpp @@ -3028,7 +3028,7 @@ regNumber LinearScan::allocateReg(Interval* currentInterval, RefPosition* refPos PREV_REG_OPT = 0x0002, // The previous RefPosition of its current assigned interval is RegOptional. // TODO-CQ: Consider using REG_ORDER as a tie-breaker even for busy registers. - REG_NUM = 0x0001, // It has a lower register number. + REG_NUM = 0x0001, // It has a lower register number. }; LsraLocation bestLocation = MinLocation; From 6e6365e60397a895528d9bae6560481a76f81bab Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Fri, 4 Dec 2020 17:41:37 -0800 Subject: [PATCH 17/17] Other minor pending fixes --- src/coreclr/src/jit/lsra.cpp | 28 +++++++++++++--------------- 1 file changed, 13 insertions(+), 15 deletions(-) diff --git a/src/coreclr/src/jit/lsra.cpp b/src/coreclr/src/jit/lsra.cpp index 2bf7c3b61b6dd1..83f010226e3d3e 100644 --- a/src/coreclr/src/jit/lsra.cpp +++ b/src/coreclr/src/jit/lsra.cpp @@ -492,10 +492,11 @@ regMaskTP LinearScan::stressLimitRegs(RefPosition* refPosition, regMaskTP mask) #endif // DEBUG //------------------------------------------------------------------------ -// conflictingFixedRegReference: Determine whether the current RegRecord has a +// conflictingFixedRegReference: Determine whether the 'reg' has a // fixed register use that conflicts with 'refPosition' // // Arguments: +// regNum - The register of interest // refPosition - The RefPosition of interest // // Return Value: @@ -2770,14 +2771,14 @@ bool LinearScan::isMatchingConstant(RegRecord* physRegRecord, RefPosition* refPo // Return Value: // The regNumber, if any, allocated to the RefPosition. // Returns REG_NA only if 'refPosition->RegOptional()' is true, and there are -// no free register or registers with lower-weight Intervals that can be spilled. +// no free registers and no registers containing lower-weight Intervals that can be spilled. // // Notes: // This method will prefer to allocate a free register, but if none are available, // it will look for a lower-weight Interval to spill. // Weight and farthest distance of next reference are used to determine whether an Interval // currently occupying a register should be spilled. It will be spilled either: -// - At it most recent RefPosition, if that is within the current block, OR +// - At its most recent RefPosition, if that is within the current block, OR // - At the boundary between the previous block and this one // // To select a ref position for spilling. @@ -3232,13 +3233,13 @@ regNumber LinearScan::allocateReg(Interval* currentInterval, RefPosition* refPos } } - // Apply the COVERS heuristic. Only applies if we have freeCandidates. + // Apply the COVERS heuristic. if (!found) { found = selector.applySelection(COVERS, coversSet & preferenceSet); } - // Apply the OWN_PREFERENCE heuristic. Only applies if we have freeCandidates. + // Apply the OWN_PREFERENCE heuristic. // Note that 'preferenceSet' already includes only freeCandidates. if (!found) { @@ -3246,20 +3247,20 @@ regNumber LinearScan::allocateReg(Interval* currentInterval, RefPosition* refPos found = selector.applySelection(OWN_PREFERENCE, preferenceSet); } - // Apply the COVERS_RELATED heuristic. Only applies if we have freeCandidates. + // Apply the COVERS_RELATED heuristic. if (!found) { assert((coversRelatedSet & freeCandidates) == coversRelatedSet); found = selector.applySelection(COVERS_RELATED, coversRelatedSet); } - // Apply the RELATED_PREFERENCE heuristic. Only applies if we have freeCandidates. + // Apply the RELATED_PREFERENCE heuristic. if (!found) { found = selector.applySelection(RELATED_PREFERENCE, relatedPreferences & freeCandidates); } - // Apply the CALLER_CALLEE heuristic. Only applies if we have freeCandidates. + // Apply the CALLER_CALLEE heuristic. if (!found) { found = selector.applySelection(CALLER_CALLEE, callerCalleePrefs & freeCandidates); @@ -3343,7 +3344,7 @@ regNumber LinearScan::allocateReg(Interval* currentInterval, RefPosition* refPos found = selector.applySelection(BEST_FIT, bestFitSet); } - // Apply the IS_PREV_REG heuristic. Only applies if we have freeCandidates. + // Apply the IS_PREV_REG heuristic. TODO: Check if Only applies if we have freeCandidates. // Oddly, the previous heuristics only considered this if it covered the range. if ((prevRegRec != nullptr) && ((selector.score & COVERS_FULL) != 0)) { @@ -5442,12 +5443,9 @@ void LinearScan::allocateRegisters() if (isAssignedReg) { assert(nextIntervalRef[reg] == assignedInterval->getNextRefLocation()); - if (assignedInterval->isActive) - { - assert(!isRegAvailable(reg, assignedInterval->registerType)); - assert((recentRefPosition == nullptr) || - (spillCost[reg] == getSpillWeight(physRegRecord))); - } + assert(!isRegAvailable(reg, assignedInterval->registerType)); + assert((recentRefPosition == nullptr) || + (spillCost[reg] == getSpillWeight(physRegRecord))); } else {