Skip to content

Commit

Permalink
Move the "do not zero-extend setcc" optimization to lower (#53778)
Browse files Browse the repository at this point in the history
* Strongly type StoreInd lowering

* Improve clarity of code through the use of helpers

* Move the "do not zero-extend setcc" to lowering

It is XARCH-specific and moving it eliminates questionable code
that is trying to compensate for CSE changing the store.

* Delete now unnecessary copying of the relop type
  • Loading branch information
SingleAccretion authored Jul 13, 2021
1 parent c786e4f commit d021b56
Show file tree
Hide file tree
Showing 5 changed files with 33 additions and 44 deletions.
23 changes: 11 additions & 12 deletions src/coreclr/jit/lower.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,7 @@ GenTree* Lowering::LowerNode(GenTree* node)
break;

case GT_STOREIND:
LowerStoreIndirCommon(node->AsIndir());
LowerStoreIndirCommon(node->AsStoreInd());
break;

case GT_ADD:
Expand Down Expand Up @@ -3558,7 +3558,7 @@ void Lowering::LowerStoreSingleRegCallStruct(GenTreeBlk* store)
{
store->ChangeType(regType);
store->SetOper(GT_STOREIND);
LowerStoreIndirCommon(store);
LowerStoreIndirCommon(store->AsStoreInd());
return;
}
else
Expand Down Expand Up @@ -4100,7 +4100,7 @@ void Lowering::InsertPInvokeMethodProlog()
// The init routine sets InlinedCallFrame's m_pNext, so we just set the thead's top-of-stack
GenTree* frameUpd = CreateFrameLinkUpdate(PushFrame);
firstBlockRange.InsertBefore(insertionPoint, LIR::SeqTree(comp, frameUpd));
ContainCheckStoreIndir(frameUpd->AsIndir());
ContainCheckStoreIndir(frameUpd->AsStoreInd());
DISPTREERANGE(firstBlockRange, frameUpd);
}
#endif // TARGET_64BIT
Expand Down Expand Up @@ -4163,7 +4163,7 @@ void Lowering::InsertPInvokeMethodEpilog(BasicBlock* returnBB DEBUGARG(GenTree*
{
GenTree* frameUpd = CreateFrameLinkUpdate(PopFrame);
returnBlockRange.InsertBefore(insertionPoint, LIR::SeqTree(comp, frameUpd));
ContainCheckStoreIndir(frameUpd->AsIndir());
ContainCheckStoreIndir(frameUpd->AsStoreInd());
}
}

Expand Down Expand Up @@ -4325,7 +4325,7 @@ void Lowering::InsertPInvokeCallProlog(GenTreeCall* call)
// Stubs do this once per stub, not once per call.
GenTree* frameUpd = CreateFrameLinkUpdate(PushFrame);
BlockRange().InsertBefore(insertBefore, LIR::SeqTree(comp, frameUpd));
ContainCheckStoreIndir(frameUpd->AsIndir());
ContainCheckStoreIndir(frameUpd->AsStoreInd());
}
#endif // TARGET_64BIT

Expand All @@ -4335,7 +4335,7 @@ void Lowering::InsertPInvokeCallProlog(GenTreeCall* call)
// [tcb + offsetOfGcState] = 0
GenTree* storeGCState = SetGCState(0);
BlockRange().InsertBefore(insertBefore, LIR::SeqTree(comp, storeGCState));
ContainCheckStoreIndir(storeGCState->AsIndir());
ContainCheckStoreIndir(storeGCState->AsStoreInd());

// Indicate that codegen has switched this thread to preemptive GC.
// This tree node doesn't generate any code, but impacts LSRA and gc reporting.
Expand Down Expand Up @@ -4381,7 +4381,7 @@ void Lowering::InsertPInvokeCallEpilog(GenTreeCall* call)

GenTree* tree = SetGCState(1);
BlockRange().InsertBefore(insertionPoint, LIR::SeqTree(comp, tree));
ContainCheckStoreIndir(tree->AsIndir());
ContainCheckStoreIndir(tree->AsStoreInd());

tree = CreateReturnTrapSeq();
BlockRange().InsertBefore(insertionPoint, LIR::SeqTree(comp, tree));
Expand All @@ -4396,7 +4396,7 @@ void Lowering::InsertPInvokeCallEpilog(GenTreeCall* call)
{
tree = CreateFrameLinkUpdate(PopFrame);
BlockRange().InsertBefore(insertionPoint, LIR::SeqTree(comp, tree));
ContainCheckStoreIndir(tree->AsIndir());
ContainCheckStoreIndir(tree->AsStoreInd());
}
#else
const CORINFO_EE_INFO::InlinedCallFrameInfo& callFrameInfo = comp->eeGetEEInfo()->inlinedCallFrameInfo;
Expand Down Expand Up @@ -6421,7 +6421,7 @@ void Lowering::ContainCheckNode(GenTree* node)
ContainCheckReturnTrap(node->AsOp());
break;
case GT_STOREIND:
ContainCheckStoreIndir(node->AsIndir());
ContainCheckStoreIndir(node->AsStoreInd());
break;
case GT_IND:
ContainCheckIndir(node->AsIndir());
Expand Down Expand Up @@ -6604,9 +6604,8 @@ void Lowering::ContainCheckBitCast(GenTree* node)
// Arguments:
// ind - the store indirection node we are lowering.
//
void Lowering::LowerStoreIndirCommon(GenTreeIndir* ind)
void Lowering::LowerStoreIndirCommon(GenTreeStoreInd* ind)
{
assert(ind->OperIs(GT_STOREIND));
assert(ind->TypeGet() != TYP_STRUCT);
TryCreateAddrMode(ind->Addr(), true);
if (!comp->codeGen->gcInfo.gcIsWriteBarrierStoreIndNode(ind))
Expand Down Expand Up @@ -6806,6 +6805,6 @@ bool Lowering::TryTransformStoreObjAsStoreInd(GenTreeBlk* blkNode)
{
assert(src->TypeIs(regType) || src->IsCnsIntOrI() || src->IsCall());
}
LowerStoreIndirCommon(blkNode);
LowerStoreIndirCommon(blkNode->AsStoreInd());
return true;
}
6 changes: 3 additions & 3 deletions src/coreclr/jit/lower.h
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ class Lowering final : public Phase
void ContainCheckBitCast(GenTree* node);
void ContainCheckCallOperands(GenTreeCall* call);
void ContainCheckIndir(GenTreeIndir* indirNode);
void ContainCheckStoreIndir(GenTreeIndir* indirNode);
void ContainCheckStoreIndir(GenTreeStoreInd* indirNode);
void ContainCheckMul(GenTreeOp* node);
void ContainCheckShiftRotate(GenTreeOp* node);
void ContainCheckStoreLoc(GenTreeLclVarCommon* storeLoc) const;
Expand Down Expand Up @@ -292,9 +292,9 @@ class Lowering final : public Phase
#endif // defined(TARGET_XARCH)

// Per tree node member functions
void LowerStoreIndirCommon(GenTreeIndir* ind);
void LowerStoreIndirCommon(GenTreeStoreInd* ind);
void LowerIndir(GenTreeIndir* ind);
void LowerStoreIndir(GenTreeIndir* node);
void LowerStoreIndir(GenTreeStoreInd* node);
GenTree* LowerAdd(GenTreeOp* node);
bool LowerUnsignedDivOrMod(GenTreeOp* divMod);
GenTree* LowerConstIntDivOrMod(GenTree* node);
Expand Down
8 changes: 4 additions & 4 deletions src/coreclr/jit/lowerarmarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -218,7 +218,7 @@ void Lowering::LowerStoreLoc(GenTreeLclVarCommon* storeLoc)
// Return Value:
// None.
//
void Lowering::LowerStoreIndir(GenTreeIndir* node)
void Lowering::LowerStoreIndir(GenTreeStoreInd* node)
{
ContainCheckStoreIndir(node);
}
Expand Down Expand Up @@ -1376,11 +1376,11 @@ void Lowering::ContainCheckCallOperands(GenTreeCall* call)
// Arguments:
// node - pointer to the node
//
void Lowering::ContainCheckStoreIndir(GenTreeIndir* node)
void Lowering::ContainCheckStoreIndir(GenTreeStoreInd* node)
{
#ifdef TARGET_ARM64
GenTree* src = node->AsOp()->gtOp2;
if (!varTypeIsFloating(src->TypeGet()) && src->IsIntegralConst(0))
GenTree* src = node->Data();
if (src->IsIntegralConst(0))
{
// an integer zero for 'src' can be contained.
MakeSrcContained(node, src);
Expand Down
22 changes: 15 additions & 7 deletions src/coreclr/jit/lowerxarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -110,11 +110,11 @@ void Lowering::LowerStoreLoc(GenTreeLclVarCommon* storeLoc)
// Return Value:
// None.
//
void Lowering::LowerStoreIndir(GenTreeIndir* node)
void Lowering::LowerStoreIndir(GenTreeStoreInd* node)
{
// Mark all GT_STOREIND nodes to indicate that it is not known
// whether it represents a RMW memory op.
node->AsStoreInd()->SetRMWStatusDefault();
node->SetRMWStatusDefault();

if (!varTypeIsFloating(node))
{
Expand All @@ -130,10 +130,10 @@ void Lowering::LowerStoreIndir(GenTreeIndir* node)
return;
}
}
else if (node->AsStoreInd()->Data()->OperIs(GT_CNS_DBL))
else if (node->Data()->IsCnsFltOrDbl())
{
// Optimize *x = DCON to *x = ICON which is slightly faster on xarch
GenTree* data = node->AsStoreInd()->Data();
GenTree* data = node->Data();
double dblCns = data->AsDblCon()->gtDconVal;
ssize_t intCns = 0;
var_types type = TYP_UNKNOWN;
Expand Down Expand Up @@ -162,6 +162,13 @@ void Lowering::LowerStoreIndir(GenTreeIndir* node)
node->ChangeType(type);
}
}

// Optimization: do not unnecessarily zero-extend the result of setcc.
if (varTypeIsByte(node) && (node->Data()->OperIsCompare() || node->Data()->OperIs(GT_SETCC)))
{
node->Data()->ChangeType(TYP_BYTE);
}

ContainCheckStoreIndir(node);
}

Expand Down Expand Up @@ -4588,17 +4595,18 @@ void Lowering::ContainCheckIndir(GenTreeIndir* node)
// Arguments:
// node - pointer to the node
//
void Lowering::ContainCheckStoreIndir(GenTreeIndir* node)
void Lowering::ContainCheckStoreIndir(GenTreeStoreInd* node)
{
// If the source is a containable immediate, make it contained, unless it is
// an int-size or larger store of zero to memory, because we can generate smaller code
// by zeroing a register and then storing it.
GenTree* src = node->AsOp()->gtOp2;
GenTree* src = node->Data();
if (IsContainableImmed(node, src) &&
(!src->IsIntegralConst(0) || varTypeIsSmall(node) || node->gtGetOp1()->OperGet() == GT_CLS_VAR_ADDR))
(!src->IsIntegralConst(0) || varTypeIsSmall(node) || node->Addr()->OperIs(GT_CLS_VAR_ADDR)))
{
MakeSrcContained(node, src);
}

ContainCheckIndir(node);
}

Expand Down
18 changes: 0 additions & 18 deletions src/coreclr/jit/morph.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12087,23 +12087,8 @@ GenTree* Compiler::fgMorphSmpOp(GenTree* tree, MorphAddrContext* mac)
tree->AsOp()->gtOp2 = op2 = op2->AsCast()->CastOp();
}
}
else if (op2->OperIsCompare() && varTypeIsByte(effectiveOp1->TypeGet()))
{
/* We don't need to zero extend the setcc instruction */
op2->gtType = TYP_BYTE;
}
}
// If we introduced a CSE we may need to undo the optimization above
// (i.e. " op2->gtType = TYP_BYTE;" which depends upon op1 being a GT_IND of a byte type)
// When we introduce the CSE we remove the GT_IND and subsitute a GT_LCL_VAR in it place.
else if (op2->OperIsCompare() && (op2->gtType == TYP_BYTE) && (op1->gtOper == GT_LCL_VAR))
{
unsigned varNum = op1->AsLclVarCommon()->GetLclNum();
LclVarDsc* varDsc = &lvaTable[varNum];

/* We again need to zero extend the setcc instruction */
op2->gtType = varDsc->TypeGet();
}
fgAssignSetVarDef(tree);

/* We can't CSE the LHS of an assignment */
Expand Down Expand Up @@ -12345,9 +12330,6 @@ GenTree* Compiler::fgMorphSmpOp(GenTree* tree, MorphAddrContext* mac)
gtReverseCond(op1);
}

/* Propagate gtType of tree into op1 in case it is TYP_BYTE for setcc optimization */
op1->gtType = tree->gtType;

noway_assert((op1->gtFlags & GTF_RELOP_JMP_USED) == 0);
op1->gtFlags |= tree->gtFlags & (GTF_RELOP_JMP_USED | GTF_RELOP_QMARK | GTF_DONT_CSE);

Expand Down

0 comments on commit d021b56

Please sign in to comment.