diff --git a/L/LLVM/LLVM_full@11.0.1/bundled/patches/0704-D97435-AArch64-movaddrreg.patch b/L/LLVM/LLVM_full@11.0.1/bundled/patches/0704-D97435-AArch64-movaddrreg.patch new file mode 100644 index 00000000000..d33658f4937 --- /dev/null +++ b/L/LLVM/LLVM_full@11.0.1/bundled/patches/0704-D97435-AArch64-movaddrreg.patch @@ -0,0 +1,484 @@ +From 4103660e5362b2ab73256740c28e4f0e042f96d4 Mon Sep 17 00:00:00 2001 +From: Keno Fischer +Date: Mon, 1 Mar 2021 16:37:03 -0500 +Subject: [PATCH 1/4] [Aarch64] Correct register class for pseudo instructions + +This constrains the Mov* and similar pseudo instruction to take +GPR64common register classes rather than GPR64. GPR64 includs XZR +which is invalid here, because this pseudo instructions expands +into an adrp/add pair sharing a destination register. XZR is invalid +on add and attempting to encode it will instead increment the stack +pointer causing crashes (downstream report at [1]). The test case +there reproduces on LLVM11, but I do not have a test case that +reaches this code path on main, since it is being masked by +improved dead code elimination introduced in D91513. Nevertheless, +this seems like a good thing to fix in case there are other cases +that dead code elimination doesn't clean up (e.g. if `optnone` is +used and the optimization is skipped). + +I think it would be worth auditing uses of GPR64 in pseudo +instructions to see if there are any similar issues, but I do not +have a high enough view of the backend or knowledge of the +Aarch64 architecture to do this quickly. + +[1] https://github.com/JuliaLang/julia/issues/39818 + +Reviewed By: t.p.northover + +Differential Revision: https://reviews.llvm.org/D97435 +--- + .../AArch64/AArch64ExpandPseudoInsts.cpp | 1 + + llvm/lib/Target/AArch64/AArch64InstrInfo.td | 32 +- + .../GlobalISel/select-blockaddress.mir | 5 +- + .../select-jump-table-brjt-constrain.mir | 2 +- + .../GlobalISel/select-jump-table-brjt.mir | 2 +- + .../CodeGen/AArch64/GlobalISel/select.mir | 312 ------------------ + 6 files changed, 22 insertions(+), 332 deletions(-) + delete mode 100644 llvm/test/CodeGen/AArch64/GlobalISel/select.mir + +diff --git a/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp b/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp +index 9e65ad2e18f9..3497a15a7574 100644 +--- a/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp ++++ b/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp +@@ -844,6 +844,7 @@ bool AArch64ExpandPseudo::expandMI(MachineBasicBlock &MBB, + case AArch64::MOVaddrEXT: { + // Expand into ADRP + ADD. + Register DstReg = MI.getOperand(0).getReg(); ++ assert(DstReg != AArch64::XZR); + MachineInstrBuilder MIB1 = + BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ADRP), DstReg) + .add(MI.getOperand(1)); +diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td +index f4a5f639e497..9e69934a97e2 100644 +--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td ++++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td +@@ -630,40 +630,40 @@ let isReMaterializable = 1, isCodeGenOnly = 1 in { + // removed, along with the AArch64Wrapper node. + + let AddedComplexity = 10 in +-def LOADgot : Pseudo<(outs GPR64:$dst), (ins i64imm:$addr), +- [(set GPR64:$dst, (AArch64LOADgot tglobaladdr:$addr))]>, ++def LOADgot : Pseudo<(outs GPR64common:$dst), (ins i64imm:$addr), ++ [(set GPR64common:$dst, (AArch64LOADgot tglobaladdr:$addr))]>, + Sched<[WriteLDAdr]>; + + // The MOVaddr instruction should match only when the add is not folded + // into a load or store address. + def MOVaddr +- : Pseudo<(outs GPR64:$dst), (ins i64imm:$hi, i64imm:$low), +- [(set GPR64:$dst, (AArch64addlow (AArch64adrp tglobaladdr:$hi), ++ : Pseudo<(outs GPR64common:$dst), (ins i64imm:$hi, i64imm:$low), ++ [(set GPR64common:$dst, (AArch64addlow (AArch64adrp tglobaladdr:$hi), + tglobaladdr:$low))]>, + Sched<[WriteAdrAdr]>; + def MOVaddrJT +- : Pseudo<(outs GPR64:$dst), (ins i64imm:$hi, i64imm:$low), +- [(set GPR64:$dst, (AArch64addlow (AArch64adrp tjumptable:$hi), ++ : Pseudo<(outs GPR64common:$dst), (ins i64imm:$hi, i64imm:$low), ++ [(set GPR64common:$dst, (AArch64addlow (AArch64adrp tjumptable:$hi), + tjumptable:$low))]>, + Sched<[WriteAdrAdr]>; + def MOVaddrCP +- : Pseudo<(outs GPR64:$dst), (ins i64imm:$hi, i64imm:$low), +- [(set GPR64:$dst, (AArch64addlow (AArch64adrp tconstpool:$hi), ++ : Pseudo<(outs GPR64common:$dst), (ins i64imm:$hi, i64imm:$low), ++ [(set GPR64common:$dst, (AArch64addlow (AArch64adrp tconstpool:$hi), + tconstpool:$low))]>, + Sched<[WriteAdrAdr]>; + def MOVaddrBA +- : Pseudo<(outs GPR64:$dst), (ins i64imm:$hi, i64imm:$low), +- [(set GPR64:$dst, (AArch64addlow (AArch64adrp tblockaddress:$hi), ++ : Pseudo<(outs GPR64common:$dst), (ins i64imm:$hi, i64imm:$low), ++ [(set GPR64common:$dst, (AArch64addlow (AArch64adrp tblockaddress:$hi), + tblockaddress:$low))]>, + Sched<[WriteAdrAdr]>; + def MOVaddrTLS +- : Pseudo<(outs GPR64:$dst), (ins i64imm:$hi, i64imm:$low), +- [(set GPR64:$dst, (AArch64addlow (AArch64adrp tglobaltlsaddr:$hi), ++ : Pseudo<(outs GPR64common:$dst), (ins i64imm:$hi, i64imm:$low), ++ [(set GPR64common:$dst, (AArch64addlow (AArch64adrp tglobaltlsaddr:$hi), + tglobaltlsaddr:$low))]>, + Sched<[WriteAdrAdr]>; + def MOVaddrEXT +- : Pseudo<(outs GPR64:$dst), (ins i64imm:$hi, i64imm:$low), +- [(set GPR64:$dst, (AArch64addlow (AArch64adrp texternalsym:$hi), ++ : Pseudo<(outs GPR64common:$dst), (ins i64imm:$hi, i64imm:$low), ++ [(set GPR64common:$dst, (AArch64addlow (AArch64adrp texternalsym:$hi), + texternalsym:$low))]>, + Sched<[WriteAdrAdr]>; + // Normally AArch64addlow either gets folded into a following ldr/str, +@@ -671,8 +671,8 @@ def MOVaddrEXT + // might appear without either of them, so allow lowering it into a plain + // add. + def ADDlowTLS +- : Pseudo<(outs GPR64:$dst), (ins GPR64:$src, i64imm:$low), +- [(set GPR64:$dst, (AArch64addlow GPR64:$src, ++ : Pseudo<(outs GPR64sp:$dst), (ins GPR64sp:$src, i64imm:$low), ++ [(set GPR64sp:$dst, (AArch64addlow GPR64sp:$src, + tglobaltlsaddr:$low))]>, + Sched<[WriteAdr]>; + +diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/select-blockaddress.mir b/llvm/test/CodeGen/AArch64/GlobalISel/select-blockaddress.mir +index bd5ee80d5841..10732660d34a 100644 +--- a/llvm/test/CodeGen/AArch64/GlobalISel/select-blockaddress.mir ++++ b/llvm/test/CodeGen/AArch64/GlobalISel/select-blockaddress.mir +@@ -30,9 +30,10 @@ registers: + body: | + ; CHECK-LABEL: name: test_blockaddress + ; CHECK: bb.0 (%ir-block.0): +- ; CHECK: [[MOVaddrBA:%[0-9]+]]:gpr64 = MOVaddrBA target-flags(aarch64-page) blockaddress(@test_blockaddress, %ir-block.block), target-flags(aarch64-pageoff, aarch64-nc) blockaddress(@test_blockaddress, %ir-block.block) ++ ; CHECK: [[MOVaddrBA:%[0-9]+]]:gpr64common = MOVaddrBA target-flags(aarch64-page) blockaddress(@test_blockaddress, %ir-block.block), target-flags(aarch64-pageoff, aarch64-nc) blockaddress(@test_blockaddress, %ir-block.block) + ; CHECK: [[MOVaddr:%[0-9]+]]:gpr64common = MOVaddr target-flags(aarch64-page) @addr, target-flags(aarch64-pageoff, aarch64-nc) @addr +- ; CHECK: STRXui [[MOVaddrBA]], [[MOVaddr]], 0 :: (store 8 into @addr) ++ ; CHECK: [[COPY:%[0-9]+]]:gpr64 = COPY [[MOVaddrBA]] ++ ; CHECK: STRXui [[COPY]], [[MOVaddr]], 0 :: (store 8 into @addr) + ; CHECK: BR [[MOVaddrBA]] + ; CHECK: bb.1.block (address-taken): + ; CHECK: RET_ReallyLR +diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/select-jump-table-brjt-constrain.mir b/llvm/test/CodeGen/AArch64/GlobalISel/select-jump-table-brjt-constrain.mir +index 082bf43061da..6f3540a0768b 100644 +--- a/llvm/test/CodeGen/AArch64/GlobalISel/select-jump-table-brjt-constrain.mir ++++ b/llvm/test/CodeGen/AArch64/GlobalISel/select-jump-table-brjt-constrain.mir +@@ -30,7 +30,7 @@ body: | + ; CHECK: Bcc 8, %bb.3, implicit $nzcv + ; CHECK: bb.1: + ; CHECK: successors: %bb.2(0x40000000), %bb.3(0x40000000) +- ; CHECK: [[MOVaddrJT:%[0-9]+]]:gpr64 = MOVaddrJT target-flags(aarch64-page) %jump-table.0, target-flags(aarch64-pageoff, aarch64-nc) %jump-table.0 ++ ; CHECK: [[MOVaddrJT:%[0-9]+]]:gpr64common = MOVaddrJT target-flags(aarch64-page) %jump-table.0, target-flags(aarch64-pageoff, aarch64-nc) %jump-table.0 + ; CHECK: early-clobber %6:gpr64, early-clobber %7:gpr64sp = JumpTableDest32 [[MOVaddrJT]], [[SUBREG_TO_REG]], %jump-table.0 + ; CHECK: BR %6 + ; CHECK: bb.2: +diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/select-jump-table-brjt.mir b/llvm/test/CodeGen/AArch64/GlobalISel/select-jump-table-brjt.mir +index ae7d90769f99..80631eadb5e8 100644 +--- a/llvm/test/CodeGen/AArch64/GlobalISel/select-jump-table-brjt.mir ++++ b/llvm/test/CodeGen/AArch64/GlobalISel/select-jump-table-brjt.mir +@@ -65,7 +65,7 @@ body: | + ; CHECK: bb.1.entry: + ; CHECK: successors: %bb.3(0x2aaaaaab), %bb.4(0x2aaaaaab), %bb.2(0x2aaaaaab) + ; CHECK: [[COPY2:%[0-9]+]]:gpr32 = COPY $wzr +- ; CHECK: [[MOVaddrJT:%[0-9]+]]:gpr64 = MOVaddrJT target-flags(aarch64-page) %jump-table.0, target-flags(aarch64-pageoff, aarch64-nc) %jump-table.0 ++ ; CHECK: [[MOVaddrJT:%[0-9]+]]:gpr64common = MOVaddrJT target-flags(aarch64-page) %jump-table.0, target-flags(aarch64-pageoff, aarch64-nc) %jump-table.0 + ; CHECK: early-clobber %18:gpr64, early-clobber %19:gpr64sp = JumpTableDest32 [[MOVaddrJT]], [[SUBREG_TO_REG]], %jump-table.0 + ; CHECK: BR %18 + ; CHECK: bb.2.sw.bb: +diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/select.mir b/llvm/test/CodeGen/AArch64/GlobalISel/select.mir +deleted file mode 100644 +index 112aee8d552c..000000000000 +--- a/llvm/test/CodeGen/AArch64/GlobalISel/select.mir ++++ /dev/null +@@ -1,312 +0,0 @@ +-# RUN: llc -O0 -mtriple=aarch64-apple-ios -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s -check-prefix=CHECK -check-prefix=IOS +-# RUN: llc -O0 -mtriple=aarch64-linux-gnu -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s -check-prefix=CHECK -check-prefix=LINUX-DEFAULT +-# RUN: llc -O0 -mtriple=aarch64-linux-gnu -relocation-model=pic -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s -check-prefix=CHECK -check-prefix=LINUX-PIC +- +---- | +- target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" +- +- define void @frame_index() { +- %ptr0 = alloca i64 +- ret void +- } +- +- define i8* @ptr_mask(i8* %in) { ret i8* undef } +- +- @var_local = global i8 0 +- define i8* @global_local() { ret i8* undef } +- +- @var_got = external global i8 +- define i8* @global_got() { ret i8* undef } +- +- define void @icmp() { ret void } +- define void @fcmp() { ret void } +- +- define void @phi() { ret void } +- +- define void @select() { ret void } +-... +- +---- +-# CHECK-LABEL: name: frame_index +-name: frame_index +-legalized: true +-regBankSelected: true +- +-# CHECK: registers: +-# CHECK-NEXT: - { id: 0, class: gpr64sp, preferred-register: '' } +-registers: +- - { id: 0, class: gpr } +- +-stack: +- - { id: 0, name: ptr0, offset: 0, size: 8, alignment: 8 } +- +-# CHECK: body: +-# CHECK: %0:gpr64sp = ADDXri %stack.0.ptr0, 0, 0 +-body: | +- bb.0: +- %0(p0) = G_FRAME_INDEX %stack.0.ptr0 +- $x0 = COPY %0(p0) +-... +- +---- +- +---- +-# CHECK-LABEL: name: ptr_mask +-name: ptr_mask +-legalized: true +-regBankSelected: true +- +-# CHECK: body: +-# CHECK: %2:gpr64sp = ANDXri %0, 8060 +-body: | +- bb.0: +- liveins: $x0 +- %0:gpr(p0) = COPY $x0 +- %const:gpr(s64) = G_CONSTANT i64 -8 +- %1:gpr(p0) = G_PTRMASK %0, %const +- $x0 = COPY %1(p0) +-... +- +---- +-# Global defined in the same linkage unit so no GOT is needed +-# CHECK-LABEL: name: global_local +-name: global_local +-legalized: true +-regBankSelected: true +-registers: +- - { id: 0, class: gpr } +- +-# CHECK: body: +-# IOS: %0:gpr64 = MOVaddr target-flags(aarch64-page) @var_local, target-flags(aarch64-pageoff, aarch64-nc) @var_local +-# LINUX-DEFAULT: %0:gpr64 = MOVaddr target-flags(aarch64-page) @var_local, target-flags(aarch64-pageoff, aarch64-nc) @var_local +-# LINUX-PIC: %0:gpr64 = LOADgot target-flags(aarch64-got) @var_local +-body: | +- bb.0: +- %0(p0) = G_GLOBAL_VALUE @var_local +- $x0 = COPY %0(p0) +-... +- +---- +-# CHECK-LABEL: name: global_got +-name: global_got +-legalized: true +-regBankSelected: true +-registers: +- - { id: 0, class: gpr } +- +-# CHECK: body: +-# IOS: %0:gpr64 = LOADgot target-flags(aarch64-got) @var_got +-# LINUX-DEFAULT: %0:gpr64 = MOVaddr target-flags(aarch64-page) @var_got, target-flags(aarch64-pageoff, aarch64-nc) @var_got +-# LINUX-PIC: %0:gpr64 = LOADgot target-flags(aarch64-got) @var_got +-body: | +- bb.0: +- %0(p0) = G_GLOBAL_VALUE @var_got +- $x0 = COPY %0(p0) +-... +- +---- +-# CHECK-LABEL: name: icmp +-name: icmp +-legalized: true +-regBankSelected: true +- +-# CHECK: registers: +-# CHECK-NEXT: - { id: 0, class: gpr32, preferred-register: '' } +-# CHECK-NEXT: - { id: 1, class: gpr32, preferred-register: '' } +-# CHECK-NEXT: - { id: 2, class: gpr64, preferred-register: '' } +-# CHECK-NEXT: - { id: 3, class: gpr32, preferred-register: '' } +-# CHECK-NEXT: - { id: 4, class: gpr64, preferred-register: '' } +-# CHECK-NEXT: - { id: 5, class: gpr32, preferred-register: '' } +-registers: +- - { id: 0, class: gpr } +- - { id: 1, class: gpr } +- - { id: 2, class: gpr } +- - { id: 3, class: gpr } +- - { id: 4, class: gpr } +- - { id: 5, class: gpr } +- - { id: 6, class: gpr } +- - { id: 7, class: gpr } +- - { id: 8, class: gpr } +- - { id: 9, class: gpr } +- - { id: 10, class: gpr } +- - { id: 11, class: gpr } +- +-# CHECK: body: +-# CHECK: SUBSWrr %0, %0, implicit-def $nzcv +-# CHECK: %1:gpr32 = CSINCWr $wzr, $wzr, 1, implicit $nzcv +- +-# CHECK: SUBSXrr %2, %2, implicit-def $nzcv +-# CHECK: %3:gpr32 = CSINCWr $wzr, $wzr, 3, implicit $nzcv +- +-# CHECK: SUBSXrr %4, %4, implicit-def $nzcv +-# CHECK: %5:gpr32 = CSINCWr $wzr, $wzr, 0, implicit $nzcv +- +-body: | +- bb.0: +- liveins: $w0, $x0 +- +- %0(s32) = COPY $w0 +- %1(s32) = G_ICMP intpred(eq), %0, %0 +- %6(s1) = G_TRUNC %1(s32) +- %9(s32) = G_ANYEXT %6 +- $w0 = COPY %9(s32) +- +- %2(s64) = COPY $x0 +- %3(s32) = G_ICMP intpred(uge), %2, %2 +- %7(s1) = G_TRUNC %3(s32) +- %10(s32) = G_ANYEXT %7 +- $w0 = COPY %10(s32) +- +- %4(p0) = COPY $x0 +- %5(s32) = G_ICMP intpred(ne), %4, %4 +- %8(s1) = G_TRUNC %5(s32) +- %11(s32) = G_ANYEXT %8 +- $w0 = COPY %11(s32) +-... +- +---- +-# CHECK-LABEL: name: fcmp +-name: fcmp +-legalized: true +-regBankSelected: true +- +-# CHECK: registers: +-# CHECK-NEXT: - { id: 0, class: fpr32, preferred-register: '' } +-# CHECK-NEXT: - { id: 1, class: gpr32, preferred-register: '' } +-# CHECK-NEXT: - { id: 2, class: fpr64, preferred-register: '' } +-# CHECK-NEXT: - { id: 3, class: gpr32, preferred-register: '' } +-# CHECK-NEXT: - { id: 4, class: gpr32, preferred-register: '' } +-# CHECK-NEXT: - { id: 5, class: gpr32, preferred-register: '' } +-registers: +- - { id: 0, class: fpr } +- - { id: 1, class: gpr } +- - { id: 2, class: fpr } +- - { id: 3, class: gpr } +- - { id: 4, class: gpr } +- - { id: 5, class: gpr } +- - { id: 6, class: gpr } +- - { id: 7, class: gpr } +- +-# CHECK: body: +-# CHECK: FCMPSrr %0, %0, implicit-def $nzcv +-# CHECK: [[TST_MI:%[0-9]+]]:gpr32 = CSINCWr $wzr, $wzr, 5, implicit $nzcv +-# CHECK: [[TST_GT:%[0-9]+]]:gpr32 = CSINCWr $wzr, $wzr, 13, implicit $nzcv +-# CHECK: %1:gpr32 = ORRWrr [[TST_MI]], [[TST_GT]] +- +-# CHECK: FCMPDrr %2, %2, implicit-def $nzcv +-# CHECK: %3:gpr32 = CSINCWr $wzr, $wzr, 4, implicit $nzcv +- +-body: | +- bb.0: +- liveins: $w0, $x0 +- +- %0(s32) = COPY $s0 +- %1(s32) = G_FCMP floatpred(one), %0, %0 +- %4(s1) = G_TRUNC %1(s32) +- %6(s32) = G_ANYEXT %4 +- $w0 = COPY %6(s32) +- +- %2(s64) = COPY $d0 +- %3(s32) = G_FCMP floatpred(uge), %2, %2 +- %5(s1) = G_TRUNC %3(s32) +- %7(s32) = G_ANYEXT %5 +- $w0 = COPY %7(s32) +- +-... +- +---- +-# CHECK-LABEL: name: phi +-name: phi +-legalized: true +-regBankSelected: true +-tracksRegLiveness: true +- +-# CHECK: registers: +-# CHECK-NEXT: - { id: 0, class: fpr32, preferred-register: '' } +-# CHECK-NEXT: - { id: 1, class: gpr32, preferred-register: '' } +-# CHECK-NEXT: - { id: 2, class: fpr32, preferred-register: '' } +-registers: +- - { id: 0, class: fpr } +- - { id: 1, class: gpr } +- - { id: 2, class: fpr } +- +-# CHECK: body: +-# CHECK: bb.1: +-# CHECK: %2:fpr32 = PHI %0, %bb.0, %2, %bb.1 +- +-body: | +- bb.0: +- liveins: $s0, $w0 +- successors: %bb.1 +- %0(s32) = COPY $s0 +- %3:gpr(s32) = COPY $w0 +- %1(s1) = G_TRUNC %3 +- +- bb.1: +- successors: %bb.1, %bb.2 +- %2(s32) = PHI %0, %bb.0, %2, %bb.1 +- G_BRCOND %1, %bb.1 +- +- bb.2: +- $s0 = COPY %2 +- RET_ReallyLR implicit $s0 +-... +- +---- +-# CHECK-LABEL: name: select +-name: select +-legalized: true +-regBankSelected: true +-tracksRegLiveness: true +- +-# CHECK: registers: +-# CHECK-NEXT: - { id: 0, class: gpr32, preferred-register: '' } +-# CHECK-NEXT: - { id: 1, class: gpr32, preferred-register: '' } +-# CHECK-NEXT: - { id: 2, class: gpr32, preferred-register: '' } +-# CHECK-NEXT: - { id: 3, class: gpr32, preferred-register: '' } +-# CHECK-NEXT: - { id: 4, class: gpr64, preferred-register: '' } +-# CHECK-NEXT: - { id: 5, class: gpr64, preferred-register: '' } +-# CHECK-NEXT: - { id: 6, class: gpr64, preferred-register: '' } +-# CHECK-NEXT: - { id: 7, class: gpr64, preferred-register: '' } +-# CHECK-NEXT: - { id: 8, class: gpr64, preferred-register: '' } +-# CHECK-NEXT: - { id: 9, class: gpr64, preferred-register: '' } +-registers: +- - { id: 0, class: gpr } +- - { id: 1, class: gpr } +- - { id: 2, class: gpr } +- - { id: 3, class: gpr } +- - { id: 4, class: gpr } +- - { id: 5, class: gpr } +- - { id: 6, class: gpr } +- - { id: 7, class: gpr } +- - { id: 8, class: gpr } +- - { id: 9, class: gpr } +- +-# CHECK: body: +-# CHECK: $wzr = ANDSWri %10, 0, implicit-def $nzcv +-# CHECK: %3:gpr32 = CSELWr %1, %2, 1, implicit $nzcv +-# CHECK: $wzr = ANDSWri %10, 0, implicit-def $nzcv +-# CHECK: %6:gpr64 = CSELXr %4, %5, 1, implicit $nzcv +-# CHECK: $wzr = ANDSWri %10, 0, implicit-def $nzcv +-# CHECK: %9:gpr64 = CSELXr %7, %8, 1, implicit $nzcv +-body: | +- bb.0: +- liveins: $w0, $w1, $w2 +- %10:gpr(s32) = COPY $w0 +- %0(s1) = G_TRUNC %10 +- +- %1(s32) = COPY $w1 +- %2(s32) = COPY $w2 +- %3(s32) = G_SELECT %0, %1, %2 +- $w0 = COPY %3(s32) +- +- %4(s64) = COPY $x0 +- %5(s64) = COPY $x1 +- %6(s64) = G_SELECT %0, %4, %5 +- $x0 = COPY %6(s64) +- +- %7(p0) = COPY $x0 +- %8(p0) = COPY $x1 +- %9(p0) = G_SELECT %0, %7, %8 +- $x0 = COPY %9(p0) +-... +-- +2.25.1 + diff --git a/L/LLVM/LLVM_full@11.0.1/bundled/patches/0705-D97571-AArch64-loh.patch b/L/LLVM/LLVM_full@11.0.1/bundled/patches/0705-D97571-AArch64-loh.patch new file mode 100644 index 00000000000..352a9504652 --- /dev/null +++ b/L/LLVM/LLVM_full@11.0.1/bundled/patches/0705-D97571-AArch64-loh.patch @@ -0,0 +1,101 @@ +From 84eff6b2defb7f2d847d5bac165d6a44472b9ddf Mon Sep 17 00:00:00 2001 +From: Keno Fischer +Date: Mon, 1 Mar 2021 16:38:19 -0500 +Subject: [PATCH 2/4] [AArch64] Fix emitting an AdrpAddLdr LOH when there's a + potential clobber of the def of the adrp before the ldr. + +Apparently this pass used to have liveness analysis but it was removed for scompile time reasons. This workaround prevents the LOH from being emitted unless the ADD and LDR are adjacent. + +Fixes https://github.com/JuliaLang/julia/issues/39820 + +Reviewed By: loladiro, qcolombet + +Differential Revision: https://reviews.llvm.org/D97571 +--- + llvm/lib/Target/AArch64/AArch64CollectLOH.cpp | 26 +++++++++++++++- + .../AArch64/loh-adrp-add-ldr-clobber.mir | 30 +++++++++++++++++++ + 2 files changed, 55 insertions(+), 1 deletion(-) + create mode 100644 llvm/test/CodeGen/AArch64/loh-adrp-add-ldr-clobber.mir + +diff --git a/llvm/lib/Target/AArch64/AArch64CollectLOH.cpp b/llvm/lib/Target/AArch64/AArch64CollectLOH.cpp +index efdb1131abc9..ac243347b24d 100644 +--- a/llvm/lib/Target/AArch64/AArch64CollectLOH.cpp ++++ b/llvm/lib/Target/AArch64/AArch64CollectLOH.cpp +@@ -419,13 +419,37 @@ static void handleADRP(const MachineInstr &MI, AArch64FunctionInfo &AFI, + ++NumADRPToLDR; + } + break; +- case MCLOH_AdrpAddLdr: ++ case MCLOH_AdrpAddLdr: { ++ // There is a possibility that the linker may try to rewrite: ++ // adrp x0, @sym@PAGE ++ // add x1, x0, @sym@PAGEOFF ++ // [x0 = some other def] ++ // ldr x2, [x1] ++ // ...into... ++ // adrp x0, @sym ++ // nop ++ // [x0 = some other def] ++ // ldr x2, [x0] ++ // ...if the offset to the symbol won't fit within a literal load. ++ // This causes the load to use the result of the adrp, which in this ++ // case has already been clobbered. ++ // FIXME: Implement proper liveness tracking for all registers. For now, ++ // don't emit the LOH if there are any instructions between the add and ++ // the ldr. ++ MachineInstr *AddMI = const_cast(Info.MI1); ++ const MachineInstr *LdrMI = Info.MI0; ++ auto AddIt = MachineBasicBlock::iterator(AddMI); ++ auto EndIt = AddMI->getParent()->end(); ++ if (AddMI->getIterator() == EndIt || LdrMI != &*next_nodbg(AddIt, EndIt)) ++ break; ++ + LLVM_DEBUG(dbgs() << "Adding MCLOH_AdrpAddLdr:\n" + << '\t' << MI << '\t' << *Info.MI1 << '\t' + << *Info.MI0); + AFI.addLOHDirective(MCLOH_AdrpAddLdr, {&MI, Info.MI1, Info.MI0}); + ++NumADDToLDR; + break; ++ } + case MCLOH_AdrpAddStr: + if (Info.MI1 != nullptr) { + LLVM_DEBUG(dbgs() << "Adding MCLOH_AdrpAddStr:\n" +diff --git a/llvm/test/CodeGen/AArch64/loh-adrp-add-ldr-clobber.mir b/llvm/test/CodeGen/AArch64/loh-adrp-add-ldr-clobber.mir +new file mode 100644 +index 000000000000..e1e893c6383a +--- /dev/null ++++ b/llvm/test/CodeGen/AArch64/loh-adrp-add-ldr-clobber.mir +@@ -0,0 +1,30 @@ ++# RUN: llc -o /dev/null %s -mtriple=aarch64-apple-ios -run-pass=aarch64-collect-loh -debug-only=aarch64-collect-loh 2>&1 | FileCheck %s ++--- | ++ @sym2 = local_unnamed_addr global [10000000 x i32] zeroinitializer, align 8 ++ @sym = local_unnamed_addr global i32 zeroinitializer, align 8 ++ ++ define i32 @main() { ++ ret i32 0 ++ } ++ ++... ++--- ++name: main ++alignment: 4 ++tracksRegLiveness: true ++liveins: ++ - { reg: '$x22', virtual-reg: '' } ++ - { reg: '$x21', virtual-reg: '' } ++body: | ++ bb.0: ++ liveins: $x21, $x22 ++ ; Check we don't emit an loh here because there's a clobbering def of x8 before the ldr. ++ ; CHECK-LABEL: main ++ ; CHECK-NOT: MCLOH_AdrpAddLdr ++ renamable $x8 = ADRP target-flags(aarch64-page) @sym ++ renamable $x9 = ADDXri killed renamable $x8, target-flags(aarch64-pageoff, aarch64-nc) @sym, 0 ++ renamable $x8 = ADDXri killed renamable $x22, 1, 0 ++ $x9 = LDRXui $x9, 0 ++ RET undef $lr ++ ++... +-- +2.25.1 + diff --git a/L/LLVM/LLVM_full@11.0.1/bundled/patches/0706-aarch64-addrspace.patch b/L/LLVM/LLVM_full@11.0.1/bundled/patches/0706-aarch64-addrspace.patch new file mode 100644 index 00000000000..f046a5000ed --- /dev/null +++ b/L/LLVM/LLVM_full@11.0.1/bundled/patches/0706-aarch64-addrspace.patch @@ -0,0 +1,31 @@ +From a1178fdd072b7addcf56c2f6e2298165263040bb Mon Sep 17 00:00:00 2001 +From: Tim Northover +Date: Thu, 25 Feb 2021 10:13:59 +0000 +Subject: [PATCH 3/4] AArch64: relax address-space assertion in FastISel. + +Some people are using alternative address spaces to track GC data, but +otherwise they behave exactly the same. This is the only place in the backend +we even try to care about it so it's really not achieving anything. +--- + llvm/lib/Target/AArch64/AArch64FastISel.cpp | 5 +---- + 1 file changed, 1 insertion(+), 4 deletions(-) + +diff --git a/llvm/lib/Target/AArch64/AArch64FastISel.cpp b/llvm/lib/Target/AArch64/AArch64FastISel.cpp +index 0f63f4ca62e5..cf3ebed6ef19 100644 +--- a/llvm/lib/Target/AArch64/AArch64FastISel.cpp ++++ b/llvm/lib/Target/AArch64/AArch64FastISel.cpp +@@ -526,10 +526,7 @@ unsigned AArch64FastISel::fastMaterializeConstant(const Constant *C) { + MVT VT = CEVT.getSimpleVT(); + // arm64_32 has 32-bit pointers held in 64-bit registers. Because of that, + // 'null' pointers need to have a somewhat special treatment. +- if (const auto *CPN = dyn_cast(C)) { +- (void)CPN; +- assert(CPN->getType()->getPointerAddressSpace() == 0 && +- "Unexpected address space"); ++ if (isa(C)) { + assert(VT == MVT::i64 && "Expected 64-bit pointers"); + return materializeInt(ConstantInt::get(Type::getInt64Ty(*Context), 0), VT); + } +-- +2.25.1 + diff --git a/L/LLVM/LLVM_full@11.0.1/bundled/patches/0707-PR49357-AArch64-FastIsel-bug.patch b/L/LLVM/LLVM_full@11.0.1/bundled/patches/0707-PR49357-AArch64-FastIsel-bug.patch new file mode 100644 index 00000000000..5ab5e96c37a --- /dev/null +++ b/L/LLVM/LLVM_full@11.0.1/bundled/patches/0707-PR49357-AArch64-FastIsel-bug.patch @@ -0,0 +1,50 @@ +From d82434785ecab16223a0bd4ec8ede020bf244003 Mon Sep 17 00:00:00 2001 +From: Keno Fischer +Date: Mon, 1 Mar 2021 16:42:05 -0500 +Subject: [PATCH 4/4] AArch64: Remove Bad optimization + +Removes the code responsible for causing https://bugs.llvm.org/show_bug.cgi?id=49357. +A fix is in progress upstream, but I don't think it's easy, so this +fixes the bug in the meantime. The optimization it does is minor. +--- + llvm/lib/Target/AArch64/AArch64FastISel.cpp | 24 --------------------- + 1 file changed, 24 deletions(-) + +diff --git a/llvm/lib/Target/AArch64/AArch64FastISel.cpp b/llvm/lib/Target/AArch64/AArch64FastISel.cpp +index cf3ebed6ef19..6908a51c47d6 100644 +--- a/llvm/lib/Target/AArch64/AArch64FastISel.cpp ++++ b/llvm/lib/Target/AArch64/AArch64FastISel.cpp +@@ -4600,30 +4600,6 @@ bool AArch64FastISel::selectIntExt(const Instruction *I) { + + // Try to optimize already sign-/zero-extended values from function arguments. + bool IsZExt = isa(I); +- if (const auto *Arg = dyn_cast(I->getOperand(0))) { +- if ((IsZExt && Arg->hasZExtAttr()) || (!IsZExt && Arg->hasSExtAttr())) { +- if (RetVT == MVT::i64 && SrcVT != MVT::i64) { +- unsigned ResultReg = createResultReg(&AArch64::GPR64RegClass); +- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, +- TII.get(AArch64::SUBREG_TO_REG), ResultReg) +- .addImm(0) +- .addReg(SrcReg, getKillRegState(SrcIsKill)) +- .addImm(AArch64::sub_32); +- SrcReg = ResultReg; +- } +- // Conservatively clear all kill flags from all uses, because we are +- // replacing a sign-/zero-extend instruction at IR level with a nop at MI +- // level. The result of the instruction at IR level might have been +- // trivially dead, which is now not longer true. +- unsigned UseReg = lookUpRegForValue(I); +- if (UseReg) +- MRI.clearKillFlags(UseReg); +- +- updateValueMap(I, SrcReg); +- return true; +- } +- } +- + unsigned ResultReg = emitIntExt(SrcVT, SrcReg, RetVT, IsZExt); + if (!ResultReg) + return false; +-- +2.25.1 +