-
Notifications
You must be signed in to change notification settings - Fork 12.8k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[GlobalIsel] Combine G_ADD and G_SUB with constants #97771
Conversation
@llvm/pr-subscribers-backend-aarch64 @llvm/pr-subscribers-llvm-globalisel Author: Thorsten Schütt (tschuett) ChangesFull diff: https://github.com/llvm/llvm-project/pull/97771.diff 5 Files Affected:
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
index 37a56e12efcc3..b9286ddc0f7c0 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
@@ -886,6 +886,16 @@ class CombinerHelper {
bool matchShlOfVScale(const MachineOperand &MO, BuildFnTy &MatchInfo);
+ bool matchFoldAPlusC1MinusC2(const MachineInstr &MI, BuildFnTy &MatchInfo);
+
+ bool matchFoldC2MinusAPlusC1(const MachineInstr &MI, BuildFnTy &MatchInfo);
+
+ bool matchFoldAMinusC1MinusC2(const MachineInstr &MI, BuildFnTy &MatchInfo);
+
+ bool matchFoldC1Minus2MinusC2(const MachineInstr &MI, BuildFnTy &MatchInfo);
+
+ bool matchFoldAMinusC2PlusC2(const MachineInstr &MI, BuildFnTy &MatchInfo);
+
private:
/// Checks for legality of an indexed variant of \p LdSt.
bool isIndexedLoadStoreLegal(GLoadStore &LdSt) const;
diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td
index 3ef0636ebf1c7..f3c9db368eb4e 100644
--- a/llvm/include/llvm/Target/GlobalISel/Combine.td
+++ b/llvm/include/llvm/Target/GlobalISel/Combine.td
@@ -1746,6 +1746,56 @@ def APlusBMinusCPlusA : GICombineRule<
(G_ADD $root, $A, $sub1)),
(apply (G_SUB $root, $B, $C))>;
+// fold (A+C1)-C2 -> A+(C1-C2)
+def APlusC1MinusC2: GICombineRule<
+ (defs root:$root, build_fn_matchinfo:$matchinfo),
+ (match (G_CONSTANT $c2, $imm2),
+ (G_CONSTANT $c1, $imm1),
+ (G_ADD $add, $A, $c1),
+ (G_SUB $root, $add, $c2):$root,
+ [{ return Helper.matchFoldAPlusC1MinusC2(*${root}, ${matchinfo}); }]),
+ (apply [{ Helper.applyBuildFn(*${root}, ${matchinfo}); }])>;
+
+// fold C2-(A+C1) -> (C2-C1)-A
+def C2MinusAPlusC1: GICombineRule<
+ (defs root:$root, build_fn_matchinfo:$matchinfo),
+ (match (G_CONSTANT $c2, $imm2),
+ (G_CONSTANT $c1, $imm1),
+ (G_ADD $add, $A, $c1),
+ (G_SUB $root, $c2, $add):$root,
+ [{ return Helper.matchFoldC2MinusAPlusC1(*${root}, ${matchinfo}); }]),
+ (apply [{ Helper.applyBuildFn(*${root}, ${matchinfo}); }])>;
+
+// fold (A-C1)-C2 -> A-(C1+C2)
+def AMinusC1MinusC2: GICombineRule<
+ (defs root:$root, build_fn_matchinfo:$matchinfo),
+ (match (G_CONSTANT $c2, $imm2),
+ (G_CONSTANT $c1, $imm1),
+ (G_SUB $sub1, $A, $c1),
+ (G_SUB $root, $sub1, $c2):$root,
+ [{ return Helper.matchFoldAMinusC1MinusC2(*${root}, ${matchinfo}); }]),
+ (apply [{ Helper.applyBuildFn(*${root}, ${matchinfo}); }])>;
+
+// fold (C1-A)-C2 -> (C1-C2)-A
+def C1Minus2MinusC2: GICombineRule<
+ (defs root:$root, build_fn_matchinfo:$matchinfo),
+ (match (G_CONSTANT $c2, $imm2),
+ (G_CONSTANT $c1, $imm1),
+ (G_SUB $sub1, $c1, $A),
+ (G_SUB $root, $sub1, $c2):$root,
+ [{ return Helper.matchFoldC1Minus2MinusC2(*${root}, ${matchinfo}); }]),
+ (apply [{ Helper.applyBuildFn(*${root}, ${matchinfo}); }])>;
+
+// fold ((A-C1)+C2) -> (A+(C2-C1))
+def AMinusC2PlusC2: GICombineRule<
+ (defs root:$root, build_fn_matchinfo:$matchinfo),
+ (match (G_CONSTANT $c2, $imm2),
+ (G_CONSTANT $c1, $imm1),
+ (G_SUB $sub, $A, $c1),
+ (G_ADD $root, $sub, $c2):$root,
+ [{ return Helper.matchFoldAMinusC2PlusC2(*${root}, ${matchinfo}); }]),
+ (apply [{ Helper.applyBuildFn(*${root}, ${matchinfo}); }])>;
+
def integer_reassoc_combines: GICombineGroup<[
ZeroMinusAPlusB,
APlusZeroMinusB,
@@ -1754,7 +1804,12 @@ def integer_reassoc_combines: GICombineGroup<[
AMinusBPlusCMinusA,
AMinusBPlusBMinusC,
APlusBMinusAplusC,
- APlusBMinusCPlusA
+ APlusBMinusCPlusA,
+ APlusC1MinusC2,
+ C2MinusAPlusC1,
+ AMinusC1MinusC2,
+ C1Minus2MinusC2,
+ AMinusC2PlusC2
]>;
def freeze_of_non_undef_non_poison : GICombineRule<
diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
index c27b882f17003..bb02a705ba89a 100644
--- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
@@ -7490,3 +7490,153 @@ bool CombinerHelper::matchNonNegZext(const MachineOperand &MO,
return false;
}
+
+bool CombinerHelper::matchFoldAPlusC1MinusC2(const MachineInstr &MI,
+ BuildFnTy &MatchInfo) {
+ // fold (A+C1)-C2 -> A+(C1-C2)
+ const GSub *Sub = cast<GSub>(&MI);
+ GAdd *Add = cast<GAdd>(MRI.getVRegDef(Sub->getLHSReg()));
+
+ if (!MRI.hasOneNonDBGUse(Add->getReg(0)))
+ return false;
+
+ // Cannot fail due to pattern.
+ std::optional<APInt> MaybeC2 = getIConstantVRegVal(Sub->getRHSReg(), MRI);
+ if (!MaybeC2)
+ return false;
+
+ // Cannot fail due to pattern.
+ std::optional<APInt> MaybeC1 = getIConstantVRegVal(Add->getRHSReg(), MRI);
+ if (!MaybeC1)
+ return false;
+
+ Register Dst = Sub->getReg(0);
+ LLT DstTy = MRI.getType(Dst);
+
+ MatchInfo = [=](MachineIRBuilder &B) {
+ auto Const = B.buildConstant(DstTy, *MaybeC1 - *MaybeC2);
+ B.buildAdd(Dst, Add->getLHSReg(), Const);
+ };
+
+ return true;
+}
+
+bool CombinerHelper::matchFoldC2MinusAPlusC1(const MachineInstr &MI,
+ BuildFnTy &MatchInfo) {
+ // fold C2-(A+C1) -> (C2-C1)-A
+ const GSub *Sub = cast<GSub>(&MI);
+ GAdd *Add = cast<GAdd>(MRI.getVRegDef(Sub->getRHSReg()));
+
+ if (!MRI.hasOneNonDBGUse(Add->getReg(0)))
+ return false;
+
+ // Cannot fail due to pattern.
+ std::optional<APInt> MaybeC2 = getIConstantVRegVal(Sub->getLHSReg(), MRI);
+ if (!MaybeC2)
+ return false;
+
+ // Cannot fail due to pattern.
+ std::optional<APInt> MaybeC1 = getIConstantVRegVal(Add->getRHSReg(), MRI);
+ if (!MaybeC1)
+ return false;
+
+ Register Dst = Sub->getReg(0);
+ LLT DstTy = MRI.getType(Dst);
+
+ MatchInfo = [=](MachineIRBuilder &B) {
+ auto Const = B.buildConstant(DstTy, *MaybeC2 - *MaybeC1);
+ B.buildSub(Dst, Const, Add->getLHSReg());
+ };
+
+ return true;
+}
+
+bool CombinerHelper::matchFoldAMinusC1MinusC2(const MachineInstr &MI,
+ BuildFnTy &MatchInfo) {
+ // fold (A-C1)-C2 -> A-(C1+C2)
+ const GSub *Sub1 = cast<GSub>(&MI);
+ GSub *Sub2 = cast<GSub>(MRI.getVRegDef(Sub1->getLHSReg()));
+
+ if (!MRI.hasOneNonDBGUse(Sub2->getReg(0)))
+ return false;
+
+ // Cannot fail due to pattern.
+ std::optional<APInt> MaybeC2 = getIConstantVRegVal(Sub1->getRHSReg(), MRI);
+ if (!MaybeC2)
+ return false;
+
+ // Cannot fail due to pattern.
+ std::optional<APInt> MaybeC1 = getIConstantVRegVal(Sub2->getRHSReg(), MRI);
+ if (!MaybeC1)
+ return false;
+
+ Register Dst = Sub1->getReg(0);
+ LLT DstTy = MRI.getType(Dst);
+
+ MatchInfo = [=](MachineIRBuilder &B) {
+ auto Const = B.buildConstant(DstTy, *MaybeC1 + *MaybeC2);
+ B.buildSub(Dst, Sub2->getLHSReg(), Const);
+ };
+
+ return true;
+}
+
+bool CombinerHelper::matchFoldC1Minus2MinusC2(const MachineInstr &MI,
+ BuildFnTy &MatchInfo) {
+ // fold (C1-A)-C2 -> (C1-C2)-A
+ const GSub *Sub1 = cast<GSub>(&MI);
+ GSub *Sub2 = cast<GSub>(MRI.getVRegDef(Sub1->getLHSReg()));
+
+ if (!MRI.hasOneNonDBGUse(Sub2->getReg(0)))
+ return false;
+
+ // Cannot fail due to pattern.
+ std::optional<APInt> MaybeC2 = getIConstantVRegVal(Sub1->getRHSReg(), MRI);
+ if (!MaybeC2)
+ return false;
+
+ // Cannot fail due to pattern.
+ std::optional<APInt> MaybeC1 = getIConstantVRegVal(Sub2->getLHSReg(), MRI);
+ if (!MaybeC1)
+ return false;
+
+ Register Dst = Sub1->getReg(0);
+ LLT DstTy = MRI.getType(Dst);
+
+ MatchInfo = [=](MachineIRBuilder &B) {
+ auto Const = B.buildConstant(DstTy, *MaybeC1 - *MaybeC2);
+ B.buildSub(Dst, Const, Sub2->getRHSReg());
+ };
+
+ return true;
+}
+
+bool CombinerHelper::matchFoldAMinusC2PlusC2(const MachineInstr &MI,
+ BuildFnTy &MatchInfo) {
+ // fold ((A-C1)+C2) -> (A+(C2-C1))
+ const GAdd *Add = cast<GAdd>(&MI);
+ GSub *Sub = cast<GSub>(MRI.getVRegDef(Add->getLHSReg()));
+
+ if (!MRI.hasOneNonDBGUse(Sub->getReg(0)))
+ return false;
+
+ // Cannot fail due to pattern.
+ std::optional<APInt> MaybeC2 = getIConstantVRegVal(Add->getRHSReg(), MRI);
+ if (!MaybeC2)
+ return false;
+
+ // Cannot fail due to pattern.
+ std::optional<APInt> MaybeC1 = getIConstantVRegVal(Sub->getRHSReg(), MRI);
+ if (!MaybeC1)
+ return false;
+
+ Register Dst = Add->getReg(0);
+ LLT DstTy = MRI.getType(Dst);
+
+ MatchInfo = [=](MachineIRBuilder &B) {
+ auto Const = B.buildConstant(DstTy, *MaybeC2 - *MaybeC1);
+ B.buildAdd(Dst, Sub->getLHSReg(), Const);
+ };
+
+ return true;
+}
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-add-of-sub.mir b/llvm/test/CodeGen/AArch64/GlobalISel/combine-add-of-sub.mir
index ac42d2da16d56..6bd1d996da85f 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/combine-add-of-sub.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-add-of-sub.mir
@@ -3,12 +3,12 @@
...
---
+# (x + y) - y -> x
name: simplify_to_x
tracksRegLiveness: true
body: |
bb.0:
liveins: $w0, $w1
- ; (x + y) - y -> x
; CHECK-LABEL: name: simplify_to_x
; CHECK: liveins: $w0, $w1
; CHECK-NEXT: {{ $}}
@@ -23,12 +23,12 @@ body: |
RET_ReallyLR implicit $w0
...
---
+# (x + y) - x -> y
name: simplify_to_y
tracksRegLiveness: true
body: |
bb.0:
liveins: $w0, $w1
- ; (x + y) - x -> y
; CHECK-LABEL: name: simplify_to_y
; CHECK: liveins: $w0, $w1
; CHECK-NEXT: {{ $}}
@@ -43,12 +43,12 @@ body: |
RET_ReallyLR implicit $w0
...
---
+# (x + 1) - 1 -> x
name: simplify_to_constant_x
tracksRegLiveness: true
body: |
bb.0:
liveins: $w0, $w1
- ; (x + 1) - 1 -> x
; CHECK-LABEL: name: simplify_to_constant_x
; CHECK: liveins: $w0, $w1
; CHECK-NEXT: {{ $}}
@@ -64,12 +64,12 @@ body: |
RET_ReallyLR implicit $w0
...
---
+# (x + y) - x -> y
name: simplify_to_constant_y
tracksRegLiveness: true
body: |
bb.0:
liveins: $w0, $w1
- ; (x + y) - x -> y
; CHECK-LABEL: name: simplify_to_constant_y
; CHECK: liveins: $w0, $w1
; CHECK-NEXT: {{ $}}
@@ -85,12 +85,12 @@ body: |
RET_ReallyLR implicit $w0
...
---
+# (x + y) - y -> x
name: vector_simplify_to_x
tracksRegLiveness: true
body: |
bb.0:
liveins: $d0, $d1
- ; (x + y) - y -> x
; CHECK-LABEL: name: vector_simplify_to_x
; CHECK: liveins: $d0, $d1
; CHECK-NEXT: {{ $}}
@@ -105,12 +105,12 @@ body: |
RET_ReallyLR implicit $d0
...
---
+# (x + 1) - 1 -> x
name: splat_simplify_to_x
tracksRegLiveness: true
body: |
bb.0:
liveins: $d0, $d1
- ; (x + 1) - 1 -> x
; CHECK-LABEL: name: splat_simplify_to_x
; CHECK: liveins: $d0, $d1
; CHECK-NEXT: {{ $}}
@@ -127,6 +127,7 @@ body: |
RET_ReallyLR implicit $d0
...
---
+# (x + y) - x -> y
name: unique_registers_no_fold
tracksRegLiveness: true
body: |
@@ -151,20 +152,18 @@ body: |
RET_ReallyLR implicit $w0
...
---
+# (x + y) - x -> y
name: unique_constants_no_fold
tracksRegLiveness: true
body: |
bb.0:
liveins: $w0, $w1
- ; (x + y) - x -> y
; CHECK-LABEL: name: unique_constants_no_fold
; CHECK: liveins: $w0, $w1
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: %x1:_(s32) = G_CONSTANT i32 1
- ; CHECK-NEXT: %x2:_(s32) = G_CONSTANT i32 2
; CHECK-NEXT: %y:_(s32) = COPY $w1
- ; CHECK-NEXT: %add:_(s32) = G_ADD %y, %x1
- ; CHECK-NEXT: %sub:_(s32) = G_SUB %add, %x2
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1
+ ; CHECK-NEXT: %sub:_(s32) = G_ADD %y, [[C]]
; CHECK-NEXT: $w0 = COPY %sub(s32)
; CHECK-NEXT: RET_ReallyLR implicit $w0
%x1:_(s32) = G_CONSTANT i32 1
@@ -176,12 +175,12 @@ body: |
RET_ReallyLR implicit $w0
...
---
+# x - (y + x) -> 0 - y
name: simplify_to_neg_y
tracksRegLiveness: true
body: |
bb.0:
liveins: $w0, $w1
- ; x - (y + x) -> 0 - y
; CHECK-LABEL: name: simplify_to_neg_y
; CHECK: liveins: $w0, $w1
; CHECK-NEXT: {{ $}}
@@ -198,12 +197,12 @@ body: |
RET_ReallyLR implicit $w0
...
---
+# y - (y + x) -> 0 - x
name: simplify_to_neg_x
tracksRegLiveness: true
body: |
bb.0:
liveins: $w0, $w1
- ; y - (y + x) -> 0 - x
; CHECK-LABEL: name: simplify_to_neg_x
; CHECK: liveins: $w0, $w1
; CHECK-NEXT: {{ $}}
@@ -220,12 +219,12 @@ body: |
RET_ReallyLR implicit $w0
...
---
+# x - (y + x) -> 0 - y
name: simplify_to_neg_y_constant
tracksRegLiveness: true
body: |
bb.0:
liveins: $w0, $w1
- ; x - (y + x) -> 0 - y
; CHECK-LABEL: name: simplify_to_neg_y_constant
; CHECK: liveins: $w0, $w1
; CHECK-NEXT: {{ $}}
@@ -243,12 +242,12 @@ body: |
RET_ReallyLR implicit $w0
...
---
+# y - (y + x) -> 0 - x
name: simplify_to_neg_x_constant
tracksRegLiveness: true
body: |
bb.0:
liveins: $w0, $w1
- ; y - (y + x) -> 0 - x
; CHECK-LABEL: name: simplify_to_neg_x_constant
; CHECK: liveins: $w0, $w1
; CHECK-NEXT: {{ $}}
@@ -266,12 +265,12 @@ body: |
RET_ReallyLR implicit $w0
...
---
+# y - (y + x) -> 0 - x
name: vector_simplify_to_neg_x
tracksRegLiveness: true
body: |
bb.0:
liveins: $d0, $d1
- ; y - (y + x) -> 0 - x
; CHECK-LABEL: name: vector_simplify_to_neg_x
; CHECK: liveins: $d0, $d1
; CHECK-NEXT: {{ $}}
@@ -289,12 +288,12 @@ body: |
RET_ReallyLR implicit $d0
...
---
+# x - (y + x) -> 0 - y
name: vector_simplify_to_neg_y_constant
tracksRegLiveness: true
body: |
bb.0:
liveins: $d0, $d1
- ; x - (y + x) -> 0 - y
; CHECK-LABEL: name: vector_simplify_to_neg_y_constant
; CHECK: liveins: $d0, $d1
; CHECK-NEXT: {{ $}}
@@ -314,12 +313,12 @@ body: |
RET_ReallyLR implicit $d0
...
---
+# y - (y + x) -> 0 - x
name: unique_registers_neg_no_fold
tracksRegLiveness: true
body: |
bb.0:
liveins: $w0, $w1, $w2
- ; y - (y + x) -> 0 - x
; CHECK-LABEL: name: unique_registers_neg_no_fold
; CHECK: liveins: $w0, $w1, $w2
; CHECK-NEXT: {{ $}}
@@ -339,20 +338,18 @@ body: |
RET_ReallyLR implicit $w0
...
---
+# x - (y + x) -> 0 - y
name: wrong_constant_neg_no_fold
tracksRegLiveness: true
body: |
bb.0:
liveins: $w0, $w1
- ; x - (y + x) -> 0 - y
; CHECK-LABEL: name: wrong_constant_neg_no_fold
; CHECK: liveins: $w0, $w1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: %x1:_(s32) = G_CONSTANT i32 1
- ; CHECK-NEXT: %x2:_(s32) = G_CONSTANT i32 2
; CHECK-NEXT: %y:_(s32) = COPY $w1
- ; CHECK-NEXT: %add:_(s32) = G_ADD %y, %x1
- ; CHECK-NEXT: %sub:_(s32) = G_SUB %x2, %add
+ ; CHECK-NEXT: %sub:_(s32) = G_SUB %x1, %y
; CHECK-NEXT: $w0 = COPY %sub(s32)
; CHECK-NEXT: RET_ReallyLR implicit $w0
%x1:_(s32) = G_CONSTANT i32 1
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-integer.mir b/llvm/test/CodeGen/AArch64/GlobalISel/combine-integer.mir
index be33f9f7b284b..2f10a497fa74c 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/combine-integer.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-integer.mir
@@ -250,3 +250,120 @@ body: |
%add:_(<2 x s64>) = G_ADD %a, %sub1
$q0 = COPY %add
RET_ReallyLR implicit $x0
+
+...
+---
+name: APlusC1MinusC2
+body: |
+ bb.0:
+ liveins: $w0, $w1
+
+ ; CHECK-LABEL: name: APlusC1MinusC2
+ ; CHECK: liveins: $w0, $w1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: %a:_(s64) = COPY $x0
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -2
+ ; CHECK-NEXT: %sub:_(s64) = G_ADD %a, [[C]]
+ ; CHECK-NEXT: $x0 = COPY %sub(s64)
+ ; CHECK-NEXT: RET_ReallyLR implicit $x0
+ %a:_(s64) = COPY $x0
+ %c1:_(s64) = G_CONSTANT i64 5
+ %c2:_(s64) = G_CONSTANT i64 7
+ %add:_(s64) = G_ADD %a, %c1
+ %sub:_(s64) = G_SUB %add, %c2
+ $x0 = COPY %sub
+ RET_ReallyLR implicit $x0
+
+...
+---
+name: C2MinusAPlusC1
+body: |
+ bb.0:
+ liveins: $w0, $w1
+
+ ; CHECK-LABEL: name: C2MinusAPlusC1
+ ; CHECK: liveins: $w0, $w1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: %a:_(s64) = COPY $x0
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 5
+ ; CHECK-NEXT: %sub:_(s64) = G_SUB [[C]], %a
+ ; CHECK-NEXT: $x0 = COPY %sub(s64)
+ ; CHECK-NEXT: RET_ReallyLR implicit $x0
+ %a:_(s64) = COPY $x0
+ %c1:_(s64) = G_CONSTANT i64 4
+ %c2:_(s64) = G_CONSTANT i64 9
+ %add:_(s64) = G_ADD %a, %c1
+ %sub:_(s64) = G_SUB %c2, %add
+ $x0 = COPY %sub
+ RET_ReallyLR implicit $x0
+
+...
+---
+name: AMinusC1MinusC2
+body: |
+ bb.0:
+ liveins: $w0, $w1
+
+ ; CHECK-LABEL: name: AMinusC1MinusC2
+ ; CHECK: liveins: $w0, $w1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: %a:_(s64) = COPY $x0
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 71
+ ; CHECK-NEXT: %sub:_(s64) = G_SUB %a, [[C]]
+ ; CHECK-NEXT: $x0 = COPY %sub(s64)
+ ; CHECK-NEXT: RET_ReallyLR implicit $x0
+ %a:_(s64) = COPY $x0
+ %c1:_(s64) = G_CONSTANT i64 11
+ %c2:_(s64) = G_CONSTANT i64 60
+ %sub1:_(s64) = G_SUB %a, %c1
+ %sub:_(s64) = G_SUB %sub1, %c2
+ $x0 = COPY %sub
+ RET_ReallyLR implicit $x0
+
+...
+---
+name: C1Minus2MinusC2
+body: |
+ bb.0:
+ liveins: $w0, $w1
+
+ ; CHECK-LABEL: name: C1Minus2MinusC2
+ ; CHECK: liveins: $w0, $w1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: %a:_(s64) = COPY $x0
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -49
+ ; CHECK-NEXT: %sub:_(s64) = G_SUB [[C]], %a
+ ; CHECK-NEXT: $x0 = COPY %sub(s64)
+ ; CHECK-NEXT: RET_ReallyLR implicit $x0
+ %a:_(s64) = COPY $x0
+ %c1:_(s64) = G_CONSTANT i64 11
+ %c2:_(s64) = G_CONSTANT i64 60
+ %sub1:_(s64) = G_SUB %c1, %a
+ %sub:_(s64) = G_SUB %sub1, %c2
+ $x0 = COPY %sub
+ RET_ReallyLR implicit $x0
+
+
+...
+---
+name: AMinusC2PlusC2
+body: |
+ bb.0:
+ liveins: $w0, $w1
+
+ ; CHECK-LABEL: name: AMinusC2PlusC2
+ ; CHECK: liveins: $w0, $w1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: %a:_(s64) = COPY $x0
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 43
+ ; CHECK-NEXT: %add:_(s64) = G_ADD %a, [[C]]
+ ; CHECK-NEXT: $x0 = COPY %add(s64)
+ ; CHECK-NEXT: RET_ReallyLR implicit $x0
+ %a:_(s64) = COPY $x0
+ %c1:_(s64) = G_CONSTANT i64 13
+ %c2:_(s64) = G_CONSTANT i64 56
+ %sub:_(s64) = G_SUB %a, %c1
+ %add:_(s64) = G_ADD %sub, %c2
+ $x0 = COPY %add
+ RET_ReallyLR implicit $x0
+
|
MIR test is useful but is it possible to add an IR test? |
Anything is possible, but honestly I prefer MIR files for combines. |
define i64 @AMinusC2PlusC2(i64 %a) { | ||
; CHECK-LABEL: AMinusC2PlusC2: | ||
; CHECK: // %bb.0: // %entry | ||
; CHECK-NEXT: sub x0, x0, #4 |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This sub
is an add
in MIR and the comment.
ping |
1 similar comment
ping |
if (!MaybeC2) | ||
return false; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
You are still checking it despite the comment that it cannot fail
if (!MaybeC1) | ||
return false; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Ditto
if (!MaybeC2) | ||
return false; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Ditto
if (!MaybeC1) | ||
return false; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Ditto
if (!MaybeC2) | ||
return false; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Ditto
if (!MaybeC2) | ||
return false; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Ditto
if (!MaybeC1) | ||
return false; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Ditto
if (!MaybeC2) | ||
return false; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Ditto
if (!MaybeC1) | ||
return false; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Ditto
The comment is for documentation. I have to check optionals. I will never read from an optional before checking. |
I say remove them, and/or introduce a cannot fail API |
I can remove the comments. The alternative would be to pass the |
A null check that can never happen is misleading. Just don't check and let the assert in operator * do its job |
That's possibly less ugly than the verbose constant lookup helper |
I removed the comments. |
e49d8be
to
895eb8b
Compare
Ping. |
895eb8b
to
490ef26
Compare
They are running commercial static analyzers over the project. They will scream when they find unchecked access to optionals. This is the wrong approach. |
I can sympathize with both of your sides. If we can guarantee that the constant is there why don't we just access it directly? What value does the Unchecked optionals has an off vibe, but ones that never fire also have a compile time impact. |
I found
which is also fallible and in an anonymous namespace. The other option is to change the signature of the match functions:
with the implication that C1 and C2 are G_CONSTANTs. I don't like because it is too low-level. |
This is a common pattern and it's good if we can wholesale remove unnecessary optional checks across multiple combines by adding an API to do exactly what we want. |
Thanks for chiming in. Surprise, I like |
If the instruction is known to be a G_CONSTANT it doesn't have to be fallible. MRI.getVRegDef() is guaranteed to return an instance of it and if it doesn't it should be an assert. I don't see why that's a problem to add? |
Firstly, please update the coding style section of the Combiner.rst with the outcome of the discussion.
Bugs will make it fail badly. In terms of compile-time, the next step is to hoist the oneuse checks into the MIR patterns. |
If it's expected that there is always a value in an optional or the compiler is in a bad internal state, then just bailing out and skipping the combine isn't solving anything. In fact I'd argue is actively harmful since it hides what should be a fatal error/assertion failure. So we should actually write code that expresses our intentions and preconditions, and using optional in this case doesn't express that accurately. |
This isn't specific to the combiner, it's a general principle. |
Fine. I thought something like, e.g., :
|
Perhaps instead of |
I am open for other names, but I like the pattern: |
I did a build of CTMark with -Os and I don't see any change. What's motivating your patches here? Is there some benchmark that's getting better? Our work should be targeting closing our gap to SelectionDAG so that we can enable it by default. |
https://github.com/llvm/llvm-project/blob/8cae9dcd4a45ac78b22c05eff96b0fee3e1c5e55/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp#L3870 |
That's true but implementing things that don't make any impact isn't going to get us closer at all. I'll do a build of the entire test suite to see what impact there is. Our geomean deficit on Os is around 0.4-0.5% which suggests there are still relevant optimizations we can do that will noticeably move the needle. I plan on starting work on that again soon but it's faster if we all align on the strategy instead of throwing darts at the wall. |
; CHECK-LABEL: name: unique_constants_no_fold | ||
; CHECK: liveins: $w0, $w1 | ||
; CHECK-NEXT: {{ $}} | ||
; CHECK-NEXT: %x1:_(s32) = G_CONSTANT i32 1 | ||
; CHECK-NEXT: %x2:_(s32) = G_CONSTANT i32 2 | ||
; CHECK-NEXT: %y:_(s32) = COPY $w1 |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
There was a hit.
You never gave me https://youtu.be/8427bl_7k1g?t=2147.
Review is very efficient and goal oriented. |
I put an assert into the combine to see when it fires. Essentially across the whole test suite I only see it hit in the MultiSource build variant of sqlite3 once, which saves 4 bytes across ~3.5k binaries. I will approve this but I really think it's not a good use of time to pursue this. If anything this might indicate we should remove these combines from SDAG for compile time reasons. |
* 'main' of https://github.com/llvm/llvm-project: (700 commits) [SandboxIR][NFC] SingleLLVMInstructionImpl class (llvm#102687) [ThinLTO]Clean up 'import-assume-unique-local' flag. (llvm#102424) [nsan] Make #include more conventional [SandboxIR][NFC] Use Tracker.emplaceIfTracking() [libc] Moved range_reduction_double ifdef statement (llvm#102659) [libc] Fix CFP long double and add tests (llvm#102660) [TargetLowering] Handle vector types in expandFixedPointMul (llvm#102635) [compiler-rt][NFC] Replace environment variable with %t (llvm#102197) [UnitTests] Convert a test to use opaque pointers (llvm#102668) [CodeGen][NFCI] Don't re-implement parts of ASTContext::getIntWidth (llvm#101765) [SandboxIR] Clean up tracking code with the help of emplaceIfTracking() (llvm#102406) [mlir][bazel] remove extra blanks in mlir-tblgen test [NVPTX][NFC] Update tests to use bfloat type (llvm#101493) [mlir] Add support for parsing nested PassPipelineOptions (llvm#101118) [mlir][bazel] add missing td dependency in mlir-tblgen test [flang][cuda] Fix lib dependency [libc] Clean up remaining use of *_WIDTH macros in printf (llvm#102679) [flang][cuda] Convert cuf.alloc for box to fir.alloca in device context (llvm#102662) [SandboxIR] Implement the InsertElementInst class (llvm#102404) [libc] Fix use of cpp::numeric_limits<...>::digits (llvm#102674) [mlir][ODS] Verify type constraints in Types and Attributes (llvm#102326) [LTO] enable `ObjCARCContractPass` only on optimized build (llvm#101114) [mlir][ODS] Consistent `cppType` / `cppClassName` usage (llvm#102657) [lldb] Move definition of SBSaveCoreOptions dtor out of header (llvm#102539) [libc] Use cpp::numeric_limits in preference to C23 <limits.h> macros (llvm#102665) [clang] Implement -fptrauth-auth-traps. (llvm#102417) [LLVM][rtsan] rtsan transform to preserve CFGAnalyses (llvm#102651) Revert "[AMDGPU] Move `AMDGPUAttributorPass` to full LTO post link stage (llvm#102086)" [RISCV][GISel] Add missing tests for G_CTLZ/CTTZ instruction selection. NFC Return available function types for BindingDecls. (llvm#102196) [clang] Wire -fptrauth-returns to "ptrauth-returns" fn attribute. (llvm#102416) [RISCV] Remove riscv-experimental-rv64-legal-i32. (llvm#102509) [RISCV] Move PseudoVSET(I)VLI expansion to use PseudoInstExpansion. (llvm#102496) [NVPTX] support switch statement with brx.idx (reland) (llvm#102550) [libc][newhdrgen]sorted function names in yaml (llvm#102544) [GlobalIsel] Combine G_ADD and G_SUB with constants (llvm#97771) Suppress spurious warnings due to R_RISCV_SET_ULEB128 [scudo] Separated committed and decommitted entries. (llvm#101409) [MIPS] Fix missing ANDI optimization (llvm#97689) [Clang] Add env var for nvptx-arch/amdgpu-arch timeout (llvm#102521) [asan] Switch allocator to dynamic base address (llvm#98511) [AMDGPU] Move `AMDGPUAttributorPass` to full LTO post link stage (llvm#102086) [libc][math][c23] Add fadd{l,f128} C23 math functions (llvm#102531) [mlir][bazel] revert bazel rule change for DLTITransformOps [msan] Support vst{2,3,4}_lane instructions (llvm#101215) Revert "[MLIR][DLTI][Transform] Introduce transform.dlti.query (llvm#101561)" [X86] pr57673.ll - generate MIR test checks [mlir][vector][test] Split tests from vector-transfer-flatten.mlir (llvm#102584) [mlir][bazel] add bazel rule for DLTITransformOps OpenMPOpt: Remove dead include [IR] Add method to GlobalVariable to change type of initializer. (llvm#102553) [flang][cuda] Force default allocator in device code (llvm#102238) [llvm] Construct SmallVector<SDValue> with ArrayRef (NFC) (llvm#102578) [MLIR][DLTI][Transform] Introduce transform.dlti.query (llvm#101561) [AMDGPU][AsmParser][NFC] Remove a misleading comment. (llvm#102604) [Arm][AArch64][Clang] Respect function's branch protection attributes. (llvm#101978) [mlir] Verifier: steal bit to track seen instead of set. (llvm#102626) [Clang] Fix Handling of Init Capture with Parameter Packs in LambdaScopeForCallOperatorInstantiationRAII (llvm#100766) [X86] Convert truncsat clamping patterns to use SDPatternMatch. NFC. [gn] Give two scripts argparse.RawDescriptionHelpFormatter [bazel] Add missing dep for the SPIRVToLLVM target [Clang] Simplify specifying passes via -Xoffload-linker (llvm#102483) [bazel] Port for d45de80 [SelectionDAG] Use unaligned store/load to move AVX registers onto stack for `insertelement` (llvm#82130) [Clang][OMPX] Add the code generation for multi-dim `num_teams` (llvm#101407) [ARM] Regenerate big-endian-vmov.ll. NFC [AMDGPU][AsmParser][NFCI] All NamedIntOperands to be of the i32 type. (llvm#102616) [libc][math][c23] Add totalorderl function. (llvm#102564) [mlir][spirv] Support `memref` in `convert-to-spirv` pass (llvm#102534) [MLIR][GPU-LLVM] Convert `gpu.func` to `llvm.func` (llvm#101664) Fix a unit test input file (llvm#102567) [llvm-readobj][COFF] Dump hybrid objects for ARM64X files. (llvm#102245) AMDGPU/NewPM: Port SIFixSGPRCopies to new pass manager (llvm#102614) [MemoryBuiltins] Simplify getCalledFunction() helper (NFC) [AArch64] Add invalid 1 x vscale costs for reductions and reduction-operations. (llvm#102105) [MemoryBuiltins] Handle allocator attributes on call-site LSV/test/AArch64: add missing lit.local.cfg; fix build (llvm#102607) Revert "Enable logf128 constant folding for hosts with 128bit floats (llvm#96287)" [RISCV] Add Syntacore SCR5 RV32/64 processors definition (llvm#102285) [InstCombine] Remove unnecessary RUN line from test (NFC) [flang][OpenMP] Handle multiple ranges in `num_teams` clause (llvm#102535) [mlir][vector] Add tests for scalable vectors in one-shot-bufferize.mlir (llvm#102361) [mlir][vector] Disable `vector.matrix_multiply` for scalable vectors (llvm#102573) [clang] Implement CWG2627 Bit-fields and narrowing conversions (llvm#78112) [NFC] Use references to avoid copying (llvm#99863) Revert "[mlir][ArmSME] Pattern to swap shape_cast(tranpose) with transpose(shape_cast) (llvm#100731)" (llvm#102457) [IRBuilder] Generate nuw GEPs for struct member accesses (llvm#99538) [bazel] Port for 9b06e25 [CodeGen][NewPM] Improve start/stop pass error message CodeGenPassBuilder (llvm#102591) [AArch64] Implement TRBMPAM_EL1 system register (llvm#102485) [InstCombine] Fixing wrong select folding in vectors with undef elements (llvm#102244) [AArch64] Sink operands to fmuladd. (llvm#102297) LSV: document hang reported in llvm#37865 (llvm#102479) Enable logf128 constant folding for hosts with 128bit floats (llvm#96287) [RISCV][clang] Remove bfloat base type in non-zvfbfmin vcreate (llvm#102146) [RISCV][clang] Add missing `zvfbfmin` to `vget_v` intrinsic (llvm#102149) [mlir][vector] Add mask elimination transform (llvm#99314) [Clang][Interp] Fix display of syntactically-invalid note for member function calls (llvm#102170) [bazel] Port for 3fffa6d [DebugInfo][RemoveDIs] Use iterator-inserters in clang (llvm#102006) ... Signed-off-by: Edwiin Kusuma Jaya <kutemeikito0905@gmail.com>
No description provided.