From 628ce55fecc9e7478194c354a9bb0f9259d30518 Mon Sep 17 00:00:00 2001 From: Tanner Gooding Date: Tue, 7 Jan 2020 09:37:20 -0800 Subject: [PATCH 1/2] Updating genCodeForBinary to be VEX aware --- src/coreclr/src/jit/codegen.h | 1 + src/coreclr/src/jit/codegenxarch.cpp | 12 ++ src/coreclr/src/jit/emitxarch.cpp | 4 +- src/coreclr/src/jit/emitxarch.h | 2 +- .../src/jit/hwintrinsiccodegenxarch.cpp | 120 +------------ src/coreclr/src/jit/instr.cpp | 166 +++++++++++++++++- src/coreclr/src/jit/lsraxarch.cpp | 13 ++ 7 files changed, 191 insertions(+), 127 deletions(-) diff --git a/src/coreclr/src/jit/codegen.h b/src/coreclr/src/jit/codegen.h index 4e87dc0f3586e5..e0d2b4da8ec31a 100644 --- a/src/coreclr/src/jit/codegen.h +++ b/src/coreclr/src/jit/codegen.h @@ -1336,6 +1336,7 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX #if defined(_TARGET_XARCH_) void inst_RV_RV_IV(instruction ins, emitAttr size, regNumber reg1, regNumber reg2, unsigned ival); void inst_RV_TT_IV(instruction ins, emitAttr attr, regNumber reg1, GenTree* rmOp, int ival); + void inst_RV_RV_TT(instruction ins, emitAttr size, regNumber targetReg, regNumber op1Reg, GenTree* op2, bool isRMW); #endif void inst_RV_RR(instruction ins, emitAttr size, regNumber reg1, regNumber reg2); diff --git a/src/coreclr/src/jit/codegenxarch.cpp b/src/coreclr/src/jit/codegenxarch.cpp index 66bc4fe486a3b5..578353e27e6fca 100644 --- a/src/coreclr/src/jit/codegenxarch.cpp +++ b/src/coreclr/src/jit/codegenxarch.cpp @@ -909,6 +909,18 @@ void CodeGen::genCodeForBinary(GenTreeOp* treeNode) regNumber op1reg = op1->isUsedFromReg() ? op1->GetRegNum() : REG_NA; regNumber op2reg = op2->isUsedFromReg() ? op2->GetRegNum() : REG_NA; + if (varTypeIsFloating(treeNode->TypeGet())) + { + // floating-point addition, subtraction, multiplication, and division + // all have RMW semantics if VEX support is not available + + bool isRMW = !compiler->canUseVexEncoding(); + inst_RV_RV_TT(ins, emitTypeSize(treeNode), targetReg, op1reg, op2, isRMW); + + genProduceReg(treeNode); + return; + } + GenTree* dst; GenTree* src; diff --git a/src/coreclr/src/jit/emitxarch.cpp b/src/coreclr/src/jit/emitxarch.cpp index 90b254f00d28fc..98bd91ef41bc87 100644 --- a/src/coreclr/src/jit/emitxarch.cpp +++ b/src/coreclr/src/jit/emitxarch.cpp @@ -214,7 +214,6 @@ bool emitter::AreUpper32BitsZero(regNumber reg) return false; } -#ifdef FEATURE_HW_INTRINSICS //------------------------------------------------------------------------ // IsDstSrcImmAvxInstruction: Checks if the instruction has a "reg, reg/mem, imm" or // "reg/mem, reg, imm" form for the legacy, VEX, and EVEX @@ -250,7 +249,6 @@ static bool IsDstSrcImmAvxInstruction(instruction ins) return false; } } -#endif // FEATURE_HW_INTRINSICS // ------------------------------------------------------------------- // Is4ByteSSEInstruction: Returns true if the SSE instruction is a 4-byte opcode. @@ -5629,7 +5627,6 @@ void emitter::emitIns_AX_R(instruction ins, emitAttr attr, regNumber ireg, regNu emitAdjustStackDepthPushPop(ins); } -#ifdef FEATURE_HW_INTRINSICS //------------------------------------------------------------------------ // emitIns_SIMD_R_R_I: emits the code for an instruction that takes a register operand, an immediate operand // and that returns a value in register @@ -5810,6 +5807,7 @@ void emitter::emitIns_SIMD_R_R_S( } } +#ifdef FEATURE_HW_INTRINSICS //------------------------------------------------------------------------ // emitIns_SIMD_R_R_A_I: emits the code for a SIMD instruction that takes a register operand, a GenTreeIndir address, // an immediate operand, and that returns a value in register diff --git a/src/coreclr/src/jit/emitxarch.h b/src/coreclr/src/jit/emitxarch.h index 005d72e2d5c30d..cd16b116cdc4e4 100644 --- a/src/coreclr/src/jit/emitxarch.h +++ b/src/coreclr/src/jit/emitxarch.h @@ -432,7 +432,6 @@ void emitIns_R_AX(instruction ins, emitAttr attr, regNumber ireg, regNumber reg, void emitIns_AX_R(instruction ins, emitAttr attr, regNumber ireg, regNumber reg, unsigned mul, int disp); -#ifdef FEATURE_HW_INTRINSICS void emitIns_SIMD_R_R_I(instruction ins, emitAttr attr, regNumber targetReg, regNumber op1Reg, int ival); void emitIns_SIMD_R_R_A(instruction ins, emitAttr attr, regNumber targetReg, regNumber op1Reg, GenTreeIndir* indir); @@ -443,6 +442,7 @@ void emitIns_SIMD_R_R_C( void emitIns_SIMD_R_R_R(instruction ins, emitAttr attr, regNumber targetReg, regNumber op1Reg, regNumber op2Reg); void emitIns_SIMD_R_R_S(instruction ins, emitAttr attr, regNumber targetReg, regNumber op1Reg, int varx, int offs); +#ifdef FEATURE_HW_INTRINSICS void emitIns_SIMD_R_R_A_I( instruction ins, emitAttr attr, regNumber targetReg, regNumber op1Reg, GenTreeIndir* indir, int ival); void emitIns_SIMD_R_R_AR_I( diff --git a/src/coreclr/src/jit/hwintrinsiccodegenxarch.cpp b/src/coreclr/src/jit/hwintrinsiccodegenxarch.cpp index 20935610618c54..1ba67be0bb2d9e 100644 --- a/src/coreclr/src/jit/hwintrinsiccodegenxarch.cpp +++ b/src/coreclr/src/jit/hwintrinsiccodegenxarch.cpp @@ -600,11 +600,6 @@ void CodeGen::genHWIntrinsic_R_R_RM(GenTreeHWIntrinsic* node, instruction ins, e void CodeGen::genHWIntrinsic_R_R_RM( GenTreeHWIntrinsic* node, instruction ins, emitAttr attr, regNumber targetReg, regNumber op1Reg, GenTree* op2) { - emitter* emit = GetEmitter(); - - // TODO-XArch-CQ: Commutative operations can have op1 be contained - // TODO-XArch-CQ: Non-VEX encoded instructions can have both ops contained - assert(targetReg != REG_NA); assert(op1Reg != REG_NA); @@ -612,121 +607,10 @@ void CodeGen::genHWIntrinsic_R_R_RM( { assert(HWIntrinsicInfo::SupportsContainment(node->gtHWIntrinsicId)); assertIsContainableHWIntrinsicOp(compiler->m_pLowering, node, op2); - - TempDsc* tmpDsc = nullptr; - unsigned varNum = BAD_VAR_NUM; - unsigned offset = (unsigned)-1; - - if (op2->isUsedFromSpillTemp()) - { - assert(op2->IsRegOptional()); - - tmpDsc = getSpillTempDsc(op2); - varNum = tmpDsc->tdTempNum(); - offset = 0; - - regSet.tmpRlsTemp(tmpDsc); - } - else if (op2->isIndir() || op2->OperIsHWIntrinsic()) - { - GenTree* addr; - GenTreeIndir* memIndir = nullptr; - - if (op2->isIndir()) - { - memIndir = op2->AsIndir(); - addr = memIndir->Addr(); - } - else - { - assert(op2->AsHWIntrinsic()->OperIsMemoryLoad()); - assert(HWIntrinsicInfo::lookupNumArgs(op2->AsHWIntrinsic()) == 1); - addr = op2->gtGetOp1(); - } - - switch (addr->OperGet()) - { - case GT_LCL_VAR_ADDR: - { - varNum = addr->AsLclVarCommon()->GetLclNum(); - offset = 0; - break; - } - - case GT_CLS_VAR_ADDR: - { - emit->emitIns_SIMD_R_R_C(ins, attr, targetReg, op1Reg, addr->AsClsVar()->gtClsVarHnd, 0); - return; - } - - default: - { - GenTreeIndir load = indirForm(op2->TypeGet(), addr); - - if (memIndir == nullptr) - { - // This is the HW intrinsic load case. - // Until we improve the handling of addressing modes in the emitter, we'll create a - // temporary GT_IND to generate code with. - memIndir = &load; - } - emit->emitIns_SIMD_R_R_A(ins, attr, targetReg, op1Reg, memIndir); - return; - } - } - } - else - { - switch (op2->OperGet()) - { - case GT_LCL_FLD: - varNum = op2->AsLclFld()->GetLclNum(); - offset = op2->AsLclFld()->GetLclOffs(); - break; - - case GT_LCL_VAR: - { - assert(op2->IsRegOptional() || - !compiler->lvaTable[op2->AsLclVar()->GetLclNum()].lvIsRegCandidate()); - varNum = op2->AsLclVar()->GetLclNum(); - offset = 0; - break; - } - - default: - unreached(); - break; - } - } - - // Ensure we got a good varNum and offset. - // We also need to check for `tmpDsc != nullptr` since spill temp numbers - // are negative and start with -1, which also happens to be BAD_VAR_NUM. - assert((varNum != BAD_VAR_NUM) || (tmpDsc != nullptr)); - assert(offset != (unsigned)-1); - - emit->emitIns_SIMD_R_R_S(ins, attr, targetReg, op1Reg, varNum, offset); } - else - { - regNumber op2Reg = op2->GetRegNum(); - - if ((op1Reg != targetReg) && (op2Reg == targetReg) && node->isRMWHWIntrinsic(compiler)) - { - // We have "reg2 = reg1 op reg2" where "reg1 != reg2" on a RMW intrinsic. - // - // For non-commutative intrinsics, we should have ensured that op2 was marked - // delay free in order to prevent it from getting assigned the same register - // as target. However, for commutative intrinsics, we can just swap the operands - // in order to have "reg2 = reg2 op reg1" which will end up producing the right code. - noway_assert(node->OperIsCommutative()); - op2Reg = op1Reg; - op1Reg = targetReg; - } - - emit->emitIns_SIMD_R_R_R(ins, attr, targetReg, op1Reg, op2Reg); - } + bool isRMW = node->isRMWHWIntrinsic(compiler); + inst_RV_RV_TT(ins, attr, targetReg, op1Reg, op2, isRMW); } //------------------------------------------------------------------------ diff --git a/src/coreclr/src/jit/instr.cpp b/src/coreclr/src/jit/instr.cpp index 4fcb04d7ca81bd..8fb926277a6208 100644 --- a/src/coreclr/src/jit/instr.cpp +++ b/src/coreclr/src/jit/instr.cpp @@ -963,7 +963,6 @@ void CodeGen::inst_RV_RV_IV(instruction ins, emitAttr size, regNumber reg1, regN GetEmitter()->emitIns_R_R_I(ins, size, reg1, reg2, ival); } -#ifdef FEATURE_HW_INTRINSICS //------------------------------------------------------------------------ // inst_RV_TT_IV: Generates an instruction that takes 3 operands: // a register operand, an operand that may be memory or register and an immediate @@ -976,10 +975,6 @@ void CodeGen::inst_RV_RV_IV(instruction ins, emitAttr size, regNumber reg1, regN // rmOp -- The second operand, which may be a memory node or a node producing a register // ival -- The immediate operand // -// Notes: -// This isn't really specific to HW intrinsics, but depends on other methods that are -// only defined for FEATURE_HW_INTRINSICS, and is currently only used in that context. -// void CodeGen::inst_RV_TT_IV(instruction ins, emitAttr attr, regNumber reg1, GenTree* rmOp, int ival) { noway_assert(GetEmitter()->emitVerifyEncodable(ins, EA_SIZE(attr), reg1)); @@ -1012,9 +1007,13 @@ void CodeGen::inst_RV_TT_IV(instruction ins, emitAttr attr, regNumber reg1, GenT } else { +#if defined(FEATURE_HW_INTRINSICS) assert(rmOp->AsHWIntrinsic()->OperIsMemoryLoad()); assert(HWIntrinsicInfo::lookupNumArgs(rmOp->AsHWIntrinsic()) == 1); addr = rmOp->gtGetOp1(); +#else + unreached(); +#endif // FEATURE_HW_INTRINSICS } switch (addr->OperGet()) @@ -1053,9 +1052,11 @@ void CodeGen::inst_RV_TT_IV(instruction ins, emitAttr attr, regNumber reg1, GenT switch (rmOp->OperGet()) { case GT_LCL_FLD: + { varNum = rmOp->AsLclFld()->GetLclNum(); offset = rmOp->AsLclFld()->GetLclOffs(); break; + } case GT_LCL_VAR: { @@ -1067,8 +1068,10 @@ void CodeGen::inst_RV_TT_IV(instruction ins, emitAttr attr, regNumber reg1, GenT } default: + { unreached(); break; + } } } @@ -1086,8 +1089,161 @@ void CodeGen::inst_RV_TT_IV(instruction ins, emitAttr attr, regNumber reg1, GenT GetEmitter()->emitIns_SIMD_R_R_I(ins, attr, reg1, rmOpReg, ival); } } + +//------------------------------------------------------------------------ +// inst_RV_RV_TT: Generates an instruction that takes 2 operands: +// a register operand and an operand that may be in memory or register +// the result is returned in register +// +// Arguments: +// ins -- The instruction being emitted +// size -- The emit size attribute +// targetReg -- The target register +// op1Reg -- The first operand register +// op2 -- The second operand, which may be a memory node or a node producing a register +// isRMW -- true if the instruction is RMW; otherwise, false +// +void CodeGen::inst_RV_RV_TT( + instruction ins, emitAttr size, regNumber targetReg, regNumber op1Reg, GenTree* op2, bool isRMW) +{ + noway_assert(GetEmitter()->emitVerifyEncodable(ins, EA_SIZE(size), targetReg)); + + // TODO-XArch-CQ: Commutative operations can have op1 be contained + // TODO-XArch-CQ: Non-VEX encoded instructions can have both ops contained + + if (op2->isContained() || op2->isUsedFromSpillTemp()) + { + TempDsc* tmpDsc = nullptr; + unsigned varNum = BAD_VAR_NUM; + unsigned offset = (unsigned)-1; + + if (op2->isUsedFromSpillTemp()) + { + assert(op2->IsRegOptional()); + + tmpDsc = getSpillTempDsc(op2); + varNum = tmpDsc->tdTempNum(); + offset = 0; + + regSet.tmpRlsTemp(tmpDsc); + } + else if (op2->isIndir() || op2->OperIsHWIntrinsic()) + { + GenTree* addr; + GenTreeIndir* memIndir = nullptr; + + if (op2->isIndir()) + { + memIndir = op2->AsIndir(); + addr = memIndir->Addr(); + } + else + { +#if defined(FEATURE_HW_INTRINSICS) + assert(op2->AsHWIntrinsic()->OperIsMemoryLoad()); + assert(HWIntrinsicInfo::lookupNumArgs(op2->AsHWIntrinsic()) == 1); + addr = op2->gtGetOp1(); +#else + unreached(); #endif // FEATURE_HW_INTRINSICS + } + + switch (addr->OperGet()) + { + case GT_LCL_VAR_ADDR: + { + varNum = addr->AsLclVarCommon()->GetLclNum(); + offset = 0; + break; + } + + case GT_CLS_VAR_ADDR: + { + GetEmitter()->emitIns_SIMD_R_R_C(ins, size, targetReg, op1Reg, addr->AsClsVar()->gtClsVarHnd, 0); + return; + } + + default: + { + GenTreeIndir load = indirForm(op2->TypeGet(), addr); + + if (memIndir == nullptr) + { + // This is the HW intrinsic load case. + // Until we improve the handling of addressing modes in the emitter, we'll create a + // temporary GT_IND to generate code with. + memIndir = &load; + } + GetEmitter()->emitIns_SIMD_R_R_A(ins, size, targetReg, op1Reg, memIndir); + return; + } + } + } + else + { + switch (op2->OperGet()) + { + case GT_LCL_FLD: + { + varNum = op2->AsLclFld()->GetLclNum(); + offset = op2->AsLclFld()->GetLclOffs(); + break; + } + + case GT_LCL_VAR: + { + assert(op2->IsRegOptional() || + !compiler->lvaTable[op2->AsLclVar()->GetLclNum()].lvIsRegCandidate()); + varNum = op2->AsLclVar()->GetLclNum(); + offset = 0; + break; + } + case GT_CNS_DBL: + { + GenTreeDblCon* dblCns = op2->AsDblCon(); + CORINFO_FIELD_HANDLE cnsDblHnd = + GetEmitter()->emitFltOrDblConst(dblCns->gtDconVal, emitTypeSize(dblCns)); + GetEmitter()->emitIns_SIMD_R_R_C(ins, size, targetReg, op1Reg, cnsDblHnd, 0); + return; + } + + default: + { + unreached(); + break; + } + } + } + + // Ensure we got a good varNum and offset. + // We also need to check for `tmpDsc != nullptr` since spill temp numbers + // are negative and start with -1, which also happens to be BAD_VAR_NUM. + assert((varNum != BAD_VAR_NUM) || (tmpDsc != nullptr)); + assert(offset != (unsigned)-1); + + GetEmitter()->emitIns_SIMD_R_R_S(ins, size, targetReg, op1Reg, varNum, offset); + } + else + { + regNumber op2Reg = op2->GetRegNum(); + + if ((op1Reg != targetReg) && (op2Reg == targetReg) && isRMW) + { + // We have "reg2 = reg1 op reg2" where "reg1 != reg2" on a RMW instruction. + // + // For non-commutative instructions, we should have ensured that op2 was marked + // delay free in order to prevent it from getting assigned the same register + // as target. However, for commutative instructions, we can just swap the operands + // in order to have "reg2 = reg2 op reg1" which will end up producing the right code. + + op2Reg = op1Reg; + op1Reg = targetReg; + } + + GetEmitter()->emitIns_SIMD_R_R_R(ins, size, targetReg, op1Reg, op2Reg); + } +} #endif // _TARGET_XARCH_ /***************************************************************************** diff --git a/src/coreclr/src/jit/lsraxarch.cpp b/src/coreclr/src/jit/lsraxarch.cpp index 2c0cd97373e6c7..ac406c7bc60345 100644 --- a/src/coreclr/src/jit/lsraxarch.cpp +++ b/src/coreclr/src/jit/lsraxarch.cpp @@ -787,9 +787,22 @@ bool LinearScan::isRMWRegOper(GenTree* tree) #endif return false; + case GT_ADD: + case GT_SUB: + case GT_DIV: + { + return !varTypeIsFloating(tree->TypeGet()) || !compiler->canUseVexEncoding(); + } + // x86/x64 does support a three op multiply when op2|op1 is a contained immediate case GT_MUL: + { + if (varTypeIsFloating(tree->TypeGet())) + { + return !compiler->canUseVexEncoding(); + } return (!tree->gtGetOp2()->isContainedIntOrIImmed() && !tree->gtGetOp1()->isContainedIntOrIImmed()); + } #ifdef FEATURE_HW_INTRINSICS case GT_HWINTRINSIC: From dedbaf59aa3859aaf4949bf12aff49fb9d8298a3 Mon Sep 17 00:00:00 2001 From: Tanner Gooding Date: Wed, 8 Jan 2020 10:48:51 -0800 Subject: [PATCH 2/2] Resolving some PR feedback --- src/coreclr/src/jit/instr.cpp | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/src/coreclr/src/jit/instr.cpp b/src/coreclr/src/jit/instr.cpp index 8fb926277a6208..40005b944268e2 100644 --- a/src/coreclr/src/jit/instr.cpp +++ b/src/coreclr/src/jit/instr.cpp @@ -1052,11 +1052,9 @@ void CodeGen::inst_RV_TT_IV(instruction ins, emitAttr attr, regNumber reg1, GenT switch (rmOp->OperGet()) { case GT_LCL_FLD: - { varNum = rmOp->AsLclFld()->GetLclNum(); offset = rmOp->AsLclFld()->GetLclOffs(); break; - } case GT_LCL_VAR: { @@ -1068,10 +1066,7 @@ void CodeGen::inst_RV_TT_IV(instruction ins, emitAttr attr, regNumber reg1, GenT } default: - { unreached(); - break; - } } } @@ -1193,7 +1188,7 @@ void CodeGen::inst_RV_RV_TT( case GT_LCL_VAR: { assert(op2->IsRegOptional() || - !compiler->lvaTable[op2->AsLclVar()->GetLclNum()].lvIsRegCandidate()); + !compiler->lvaGetDesc(op2->AsLclVar()->GetLclNum())->lvIsRegCandidate()); varNum = op2->AsLclVar()->GetLclNum(); offset = 0; break; @@ -1211,7 +1206,6 @@ void CodeGen::inst_RV_RV_TT( default: { unreached(); - break; } } }