From 09dc7e38315f1bc65a23e6d6862aa906c8b624d8 Mon Sep 17 00:00:00 2001 From: Will Smith Date: Mon, 14 Mar 2022 16:29:06 -0700 Subject: [PATCH 1/6] Emitting MSUB for ARM64 --- src/coreclr/jit/codegen.h | 1 + src/coreclr/jit/codegenarm64.cpp | 27 ++++++++++++++++++++++++++- src/coreclr/jit/codegenarmarch.cpp | 4 ++++ src/coreclr/jit/gtlist.h | 4 +++- src/coreclr/jit/lowerarmarch.cpp | 15 +++++++++++++++ 5 files changed, 49 insertions(+), 2 deletions(-) diff --git a/src/coreclr/jit/codegen.h b/src/coreclr/jit/codegen.h index 7820c437429dad..49327411fb2bc2 100644 --- a/src/coreclr/jit/codegen.h +++ b/src/coreclr/jit/codegen.h @@ -1336,6 +1336,7 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX #ifdef TARGET_ARM64 void genCodeForJumpCompare(GenTreeOp* tree); void genCodeForMadd(GenTreeOp* tree); + void genCodeForMsub(GenTreeOp* tree); void genCodeForBfiz(GenTreeOp* tree); void genCodeForAddEx(GenTreeOp* tree); #endif // TARGET_ARM64 diff --git a/src/coreclr/jit/codegenarm64.cpp b/src/coreclr/jit/codegenarm64.cpp index 0c41d89a860d54..92b23fe7630279 100644 --- a/src/coreclr/jit/codegenarm64.cpp +++ b/src/coreclr/jit/codegenarm64.cpp @@ -10023,7 +10023,7 @@ void CodeGen::instGen_MemoryBarrier(BarrierKind barrierKind) } //----------------------------------------------------------------------------------- -// genCodeForMadd: Emit a madd/msub (Multiply-Add) instruction +// genCodeForMadd: Emit a madd (Multiply-Add) instruction // // Arguments: // tree - GT_MADD tree where op1 or op2 is GT_ADD @@ -10067,6 +10067,31 @@ void CodeGen::genCodeForMadd(GenTreeOp* tree) genProduceReg(tree); } +//----------------------------------------------------------------------------------- +// genCodeForMsub: Emit a msub (Multiply-Subtract) instruction +// +// Arguments: +// tree - GT_MSUB tree where op2 is GT_SUB +// +void CodeGen::genCodeForMsub(GenTreeOp* tree) +{ + assert(tree->OperIs(GT_MSUB) && varTypeIsIntegral(tree) && !(tree->gtFlags & GTF_SET_FLAGS)); + genConsumeOperands(tree); + + assert(tree->gtGetOp2()->OperIs(GT_MUL)); + assert(tree->gtGetOp2()->isContained()); + + GenTree* a = tree->gtGetOp1(); + GenTree* b = tree->gtGetOp2()->gtGetOp1(); + GenTree* c = tree->gtGetOp2()->gtGetOp2(); + + // d = a - b * c + // MADD d, b, c, a + GetEmitter()->emitIns_R_R_R_R(INS_msub, emitActualTypeSize(tree), tree->GetRegNum(), + b->GetRegNum(), c->GetRegNum(), a->GetRegNum()); + genProduceReg(tree); +} + //------------------------------------------------------------------------ // genCodeForBfiz: Generates the code sequence for a GenTree node that // represents a bitfield insert in zero with sign/zero extension. diff --git a/src/coreclr/jit/codegenarmarch.cpp b/src/coreclr/jit/codegenarmarch.cpp index 01fbaff7a8c4d7..ffabbe53950e3e 100644 --- a/src/coreclr/jit/codegenarmarch.cpp +++ b/src/coreclr/jit/codegenarmarch.cpp @@ -306,6 +306,10 @@ void CodeGen::genCodeForTreeNode(GenTree* treeNode) genCodeForMadd(treeNode->AsOp()); break; + case GT_MSUB: + genCodeForMsub(treeNode->AsOp()); + break; + case GT_INC_SATURATE: genCodeForIncSaturate(treeNode); break; diff --git a/src/coreclr/jit/gtlist.h b/src/coreclr/jit/gtlist.h index 1d0be8bd167664..54375f8b873217 100644 --- a/src/coreclr/jit/gtlist.h +++ b/src/coreclr/jit/gtlist.h @@ -220,8 +220,10 @@ GTNODE(MUL_LONG , GenTreeOp ,1,GTK_BINOP|DBK_NOTHIR) GTNODE(AND_NOT , GenTreeOp ,0,GTK_BINOP|DBK_NOTHIR) #ifdef TARGET_ARM64 -GTNODE(MADD , GenTreeOp ,0,GTK_BINOP|DBK_NOTHIR) // Generates the Multiply-Add instruction (madd/msub) In the future, we might consider +GTNODE(MADD , GenTreeOp ,0,GTK_BINOP|DBK_NOTHIR) // Generates the Multiply-Add instruction. In the future, we might consider // enabling it for both armarch and xarch for floating-point MADD "unsafe" math. +GTNODE(MSUB , GenTreeOp ,0,GTK_BINOP|DBK_NOTHIR) // Generates the Multiply-Subtract instruction. In the future, we might consider + // enabling it for both armarch and xarch for floating-point MSUB "unsafe" math. GTNODE(ADDEX, GenTreeOp ,0,GTK_BINOP|DBK_NOTHIR) // Add with sign/zero extension. GTNODE(BFIZ , GenTreeOp ,0,GTK_BINOP|DBK_NOTHIR) // Bitfield Insert in Zero. #endif diff --git a/src/coreclr/jit/lowerarmarch.cpp b/src/coreclr/jit/lowerarmarch.cpp index e7d74f303da0bf..b74af233a05152 100644 --- a/src/coreclr/jit/lowerarmarch.cpp +++ b/src/coreclr/jit/lowerarmarch.cpp @@ -1673,6 +1673,21 @@ void Lowering::ContainCheckBinary(GenTreeOp* node) } } + // Find "a - b * c" in order to emit MSUB + if (comp->opts.OptimizationEnabled() && varTypeIsIntegral(node) && !node->isContained() && node->OperIs(GT_SUB) && + !node->gtOverflow() && op2->OperIs(GT_MUL) && !op2->isContained() && !op2->gtOverflow() && varTypeIsIntegral(op2)) + { + GenTree* a = op1; + GenTree* b = op2->gtGetOp1(); + GenTree* c = op2->gtGetOp2(); + + if (!a->isContained() && !b->isContained() && !c->isContained()) + { + node->ChangeOper(GT_MSUB); + MakeSrcContained(node, op2); + } + } + // Change ADD TO ADDEX for ADD(X, CAST(Y)) or ADD(CAST(X), Y) where CAST is int->long // or for ADD(LSH(X, CNS), X) or ADD(X, LSH(X, CNS)) where CNS is in the (0..typeWidth) range if (node->OperIs(GT_ADD) && !op1->isContained() && !op2->isContained() && varTypeIsIntegral(node) && From 02472549807cad6afc9803d4f96f98930ed87f85 Mon Sep 17 00:00:00 2001 From: Will Smith Date: Mon, 14 Mar 2022 16:52:51 -0700 Subject: [PATCH 2/6] Update codegenarm64.cpp --- src/coreclr/jit/codegenarm64.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/coreclr/jit/codegenarm64.cpp b/src/coreclr/jit/codegenarm64.cpp index 92b23fe7630279..ad912f8e9aca3f 100644 --- a/src/coreclr/jit/codegenarm64.cpp +++ b/src/coreclr/jit/codegenarm64.cpp @@ -10086,7 +10086,7 @@ void CodeGen::genCodeForMsub(GenTreeOp* tree) GenTree* c = tree->gtGetOp2()->gtGetOp2(); // d = a - b * c - // MADD d, b, c, a + // MSUB d, b, c, a GetEmitter()->emitIns_R_R_R_R(INS_msub, emitActualTypeSize(tree), tree->GetRegNum(), b->GetRegNum(), c->GetRegNum(), a->GetRegNum()); genProduceReg(tree); From ff8ee6f7e862b43065b64897a897e71f0f19910e Mon Sep 17 00:00:00 2001 From: Will Smith Date: Mon, 14 Mar 2022 16:56:08 -0700 Subject: [PATCH 3/6] Updated comments --- src/coreclr/jit/codegenarm64.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/coreclr/jit/codegenarm64.cpp b/src/coreclr/jit/codegenarm64.cpp index ad912f8e9aca3f..60a1670ed9eada 100644 --- a/src/coreclr/jit/codegenarm64.cpp +++ b/src/coreclr/jit/codegenarm64.cpp @@ -10071,7 +10071,7 @@ void CodeGen::genCodeForMadd(GenTreeOp* tree) // genCodeForMsub: Emit a msub (Multiply-Subtract) instruction // // Arguments: -// tree - GT_MSUB tree where op2 is GT_SUB +// tree - GT_MSUB tree where op2 is GT_MUL // void CodeGen::genCodeForMsub(GenTreeOp* tree) { From 39d098c5a6d03cc658576d6459d1f4ed5e45d4a9 Mon Sep 17 00:00:00 2001 From: Will Smith Date: Mon, 14 Mar 2022 17:14:13 -0700 Subject: [PATCH 4/6] Formatting --- src/coreclr/jit/codegenarm64.cpp | 4 ++-- src/coreclr/jit/lowerarmarch.cpp | 3 ++- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/src/coreclr/jit/codegenarm64.cpp b/src/coreclr/jit/codegenarm64.cpp index 60a1670ed9eada..4f5e6a03bedf6e 100644 --- a/src/coreclr/jit/codegenarm64.cpp +++ b/src/coreclr/jit/codegenarm64.cpp @@ -10087,8 +10087,8 @@ void CodeGen::genCodeForMsub(GenTreeOp* tree) // d = a - b * c // MSUB d, b, c, a - GetEmitter()->emitIns_R_R_R_R(INS_msub, emitActualTypeSize(tree), tree->GetRegNum(), - b->GetRegNum(), c->GetRegNum(), a->GetRegNum()); + GetEmitter()->emitIns_R_R_R_R(INS_msub, emitActualTypeSize(tree), tree->GetRegNum(), b->GetRegNum(), c->GetRegNum(), + a->GetRegNum()); genProduceReg(tree); } diff --git a/src/coreclr/jit/lowerarmarch.cpp b/src/coreclr/jit/lowerarmarch.cpp index b74af233a05152..3c1d0ff280606b 100644 --- a/src/coreclr/jit/lowerarmarch.cpp +++ b/src/coreclr/jit/lowerarmarch.cpp @@ -1675,7 +1675,8 @@ void Lowering::ContainCheckBinary(GenTreeOp* node) // Find "a - b * c" in order to emit MSUB if (comp->opts.OptimizationEnabled() && varTypeIsIntegral(node) && !node->isContained() && node->OperIs(GT_SUB) && - !node->gtOverflow() && op2->OperIs(GT_MUL) && !op2->isContained() && !op2->gtOverflow() && varTypeIsIntegral(op2)) + !node->gtOverflow() && op2->OperIs(GT_MUL) && !op2->isContained() && !op2->gtOverflow() && + varTypeIsIntegral(op2)) { GenTree* a = op1; GenTree* b = op2->gtGetOp1(); From f006f8e37e763f0117453c79a4733039f1888a68 Mon Sep 17 00:00:00 2001 From: Will Smith Date: Tue, 15 Mar 2022 14:01:41 -0700 Subject: [PATCH 5/6] Combining some logic for GT_MADD and GT_MSUB --- src/coreclr/jit/lowerarmarch.cpp | 86 ++++++++++++++++---------------- 1 file changed, 43 insertions(+), 43 deletions(-) diff --git a/src/coreclr/jit/lowerarmarch.cpp b/src/coreclr/jit/lowerarmarch.cpp index 3c1d0ff280606b..2f373998155d05 100644 --- a/src/coreclr/jit/lowerarmarch.cpp +++ b/src/coreclr/jit/lowerarmarch.cpp @@ -1633,59 +1633,59 @@ void Lowering::ContainCheckBinary(GenTreeOp* node) CheckImmedAndMakeContained(node, op2); #ifdef TARGET_ARM64 - // Find "a * b + c" or "c + a * b" in order to emit MADD/MSUB - if (comp->opts.OptimizationEnabled() && varTypeIsIntegral(node) && !node->isContained() && node->OperIs(GT_ADD) && - !node->gtOverflow() && (op1->OperIs(GT_MUL) || op2->OperIs(GT_MUL))) + if (comp->opts.OptimizationEnabled() && varTypeIsIntegral(node) && !node->isContained() && !node->gtOverflow()) { - GenTree* mul; - GenTree* c; - if (op1->OperIs(GT_MUL)) + // Find "a * b + c" or "c + a * b" in order to emit MADD/MSUB + if (node->OperIs(GT_ADD) && (op1->OperIs(GT_MUL) || op2->OperIs(GT_MUL))) { - mul = op1; - c = op2; - } - else - { - mul = op2; - c = op1; - } - - GenTree* a = mul->gtGetOp1(); - GenTree* b = mul->gtGetOp2(); - - if (!mul->isContained() && !mul->gtOverflow() && !a->isContained() && !b->isContained() && !c->isContained() && - varTypeIsIntegral(mul)) - { - if (a->OperIs(GT_NEG) && !a->gtGetOp1()->isContained() && !a->gtGetOp1()->IsRegOptional()) + GenTree* mul; + GenTree* c; + if (op1->OperIs(GT_MUL)) { - // "-a * b + c" to MSUB - MakeSrcContained(mul, a); + mul = op1; + c = op2; } - if (b->OperIs(GT_NEG) && !b->gtGetOp1()->isContained()) + else { - // "a * -b + c" to MSUB - MakeSrcContained(mul, b); + mul = op2; + c = op1; } - // If both 'a' and 'b' are GT_NEG - MADD will be emitted. - node->ChangeOper(GT_MADD); - MakeSrcContained(node, mul); - } - } + GenTree* a = mul->gtGetOp1(); + GenTree* b = mul->gtGetOp2(); - // Find "a - b * c" in order to emit MSUB - if (comp->opts.OptimizationEnabled() && varTypeIsIntegral(node) && !node->isContained() && node->OperIs(GT_SUB) && - !node->gtOverflow() && op2->OperIs(GT_MUL) && !op2->isContained() && !op2->gtOverflow() && - varTypeIsIntegral(op2)) - { - GenTree* a = op1; - GenTree* b = op2->gtGetOp1(); - GenTree* c = op2->gtGetOp2(); + if (!mul->isContained() && !mul->gtOverflow() && !a->isContained() && !b->isContained() && + !c->isContained() && varTypeIsIntegral(mul)) + { + if (a->OperIs(GT_NEG) && !a->gtGetOp1()->isContained() && !a->gtGetOp1()->IsRegOptional()) + { + // "-a * b + c" to MSUB + MakeSrcContained(mul, a); + } + if (b->OperIs(GT_NEG) && !b->gtGetOp1()->isContained()) + { + // "a * -b + c" to MSUB + MakeSrcContained(mul, b); + } + // If both 'a' and 'b' are GT_NEG - MADD will be emitted. - if (!a->isContained() && !b->isContained() && !c->isContained()) + node->ChangeOper(GT_MADD); + MakeSrcContained(node, mul); + } + } + // Find "a - b * c" in order to emit MSUB + else if (node->OperIs(GT_SUB) && op2->OperIs(GT_MUL) && !op2->isContained() && !op2->gtOverflow() && + varTypeIsIntegral(op2)) { - node->ChangeOper(GT_MSUB); - MakeSrcContained(node, op2); + GenTree* a = op1; + GenTree* b = op2->gtGetOp1(); + GenTree* c = op2->gtGetOp2(); + + if (!a->isContained() && !b->isContained() && !c->isContained()) + { + node->ChangeOper(GT_MSUB); + MakeSrcContained(node, op2); + } } } From 4655c4f167f39c4d931ee15165b0cf6da6454667 Mon Sep 17 00:00:00 2001 From: Will Smith Date: Sat, 9 Apr 2022 16:58:14 -0700 Subject: [PATCH 6/6] Can only use gtOverflow after checking to see if it's a valid operator that supports overflow --- src/coreclr/jit/lowerarmarch.cpp | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/src/coreclr/jit/lowerarmarch.cpp b/src/coreclr/jit/lowerarmarch.cpp index d56e79e103f276..89b590adcab5f8 100644 --- a/src/coreclr/jit/lowerarmarch.cpp +++ b/src/coreclr/jit/lowerarmarch.cpp @@ -1609,10 +1609,10 @@ void Lowering::ContainCheckBinary(GenTreeOp* node) CheckImmedAndMakeContained(node, op2); #ifdef TARGET_ARM64 - if (comp->opts.OptimizationEnabled() && varTypeIsIntegral(node) && !node->isContained() && !node->gtOverflow()) + if (comp->opts.OptimizationEnabled() && varTypeIsIntegral(node) && !node->isContained()) { // Find "a * b + c" or "c + a * b" in order to emit MADD/MSUB - if (node->OperIs(GT_ADD) && (op1->OperIs(GT_MUL) || op2->OperIs(GT_MUL))) + if (node->OperIs(GT_ADD) && !node->gtOverflow() && (op1->OperIs(GT_MUL) || op2->OperIs(GT_MUL))) { GenTree* mul; GenTree* c; @@ -1650,18 +1650,18 @@ void Lowering::ContainCheckBinary(GenTreeOp* node) } } // Find "a - b * c" in order to emit MSUB - else if (node->OperIs(GT_SUB) && op2->OperIs(GT_MUL) && !op2->isContained() && !op2->gtOverflow() && - varTypeIsIntegral(op2)) + else if (node->OperIs(GT_SUB) && !node->gtOverflow() && op2->OperIs(GT_MUL) && !op2->isContained() && + !op2->gtOverflow() && varTypeIsIntegral(op2)) { - GenTree* a = op1; - GenTree* b = op2->gtGetOp1(); - GenTree* c = op2->gtGetOp2(); + GenTree* a = op1; + GenTree* b = op2->gtGetOp1(); + GenTree* c = op2->gtGetOp2(); - if (!a->isContained() && !b->isContained() && !c->isContained()) - { + if (!a->isContained() && !b->isContained() && !c->isContained()) + { node->ChangeOper(GT_MSUB); MakeSrcContained(node, op2); - } + } } }