Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[AArch64][SME] Remove unused ZA lazy-save #81648

Merged
merged 23 commits into from
Jun 18, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Revert "fixup: lower to STORETPIDR2 pseudo"
This reverts commit 19a7169.
  • Loading branch information
SamTebbs33 committed Jun 14, 2024
commit de64455ac9b3a16b17d10ceaa05f33d670d1732c
23 changes: 1 addition & 22 deletions llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1166,30 +1166,9 @@ bool AArch64ExpandPseudo::expandMI(MachineBasicBlock &MBB,
default:
break;

case AArch64::STORETPIDR2: {
Register BufferAddr = MI.getOperand(0).getReg();
auto TPIDR2Object = MI.getOperand(1).getReg();
unsigned Offset = MI.getOperand(2).getImm();
// Store the buffer pointer to the TPIDR2 stack object.
BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(AArch64::STRXui))
.addReg(BufferAddr)
.addUse(TPIDR2Object)
.addImm(0 + Offset);
// Set the reserved bytes (10-15) to zero
BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(AArch64::STRHHui))
.addReg(AArch64::WZR)
.addUse(TPIDR2Object)
.addImm(5 + Offset);
BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(AArch64::STRWui))
.addReg(AArch64::WZR)
.addUse(TPIDR2Object)
.addImm(3 + Offset);
MI.eraseFromParent();
return true;
}

case AArch64::STACKALLOC: {
Register Dest = MI.getOperand(0).getReg();
Register Src = MI.getOperand(1).getReg();
Register SPCopy = MI.getOperand(2).getReg();
BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::SUBXrs), Dest)
.addReg(SPCopy)
Expand Down
26 changes: 22 additions & 4 deletions llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3036,12 +3036,30 @@ AArch64TargetLowering::EmitExpandZABuffer(MachineInstr &MI,
.addReg(SPCopy);
MFI.CreateVariableSizedObject(Align(16), nullptr);

// expand pseudo in expand pass or remove pseudo and remove stack object

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit:

Suggested change
case AArch64::ExpandZABuffer:
case AArch64::AllocateZABuffer:

unsigned TPIDR2Object = TPIDR2->FrameIndex;

auto MI2 = BuildMI(*BB, MI, MI.getDebugLoc(), TII->get(AArch64::STORETPIDR2))
.addReg(BufferAddr)
.addFrameIndex(TPIDR2Object)
.addImm(0);
Register Zero32 = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
MachineInstrBuilder Wzr =
BuildMI(*BB, MI, MI.getDebugLoc(), TII->get(TargetOpcode::COPY), Zero32)
.addReg(AArch64::WZR);

// Store the buffer pointer to the TPIDR2 stack object.
BuildMI(*BB, MI, MI.getDebugLoc(), TII->get(AArch64::STRXui))
.addReg(BufferAddr)
.addFrameIndex(TPIDR2Object)
.addImm(0);
// Set the reserved bytes (10-15) to zero
BuildMI(*BB, MI, MI.getDebugLoc(), TII->get(AArch64::STRHHui))
.addReg(Wzr.getReg(0))
.addFrameIndex(TPIDR2Object)
.addImm(5);
BuildMI(*BB, MI, MI.getDebugLoc(), TII->get(AArch64::STRWui))
.addReg(Wzr.getReg(0))
.addFrameIndex(TPIDR2Object)
.addImm(3);

BB->remove_instr(&MI);
return BB;
}
Expand Down
1 change: 0 additions & 1 deletion llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3640,7 +3640,6 @@ bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, TypeSize &Scale,
case AArch64::LDRDui:
case AArch64::STRXui:
case AArch64::STRDui:
case AArch64::STORETPIDR2:
Scale = TypeSize::getFixed(8);
Width = TypeSize::getFixed(8);
MinOffset = 0;
Expand Down
3 changes: 0 additions & 3 deletions llvm/lib/Target/AArch64/AArch64InstrInfo.td
Original file line number Diff line number Diff line change
Expand Up @@ -1021,9 +1021,6 @@ include "SMEInstrFormats.td"
//===----------------------------------------------------------------------===//

let hasSideEffects = 1, isCodeGenOnly = 1 in {

def STORETPIDR2 : Pseudo<(outs), (ins GPR64:$addr, GPR64sp:$frameindex, i32imm:$offset), []>, Sched<[]>;

let Defs = [SP] in {

def STACKALLOC : Pseudo<(outs GPR64:$addr), (ins GPR64:$size, GPR64:$sp), []>, Sched<[]>;
Expand Down
44 changes: 20 additions & 24 deletions llvm/test/CodeGen/AArch64/sme-disable-gisel-fisel.ll
Original file line number Diff line number Diff line change
Expand Up @@ -241,10 +241,9 @@ define double @za_new_caller_to_za_shared_callee(double %x) nounwind noinline o
; CHECK-COMMON-NEXT: mov x9, sp
; CHECK-COMMON-NEXT: sub x8, x9, x8
; CHECK-COMMON-NEXT: mov sp, x8
; CHECK-COMMON-NEXT: sub x9, x29, #16
; CHECK-COMMON-NEXT: str x8, [x9]
; CHECK-COMMON-NEXT: strh wzr, [x9, #10]
; CHECK-COMMON-NEXT: str wzr, [x9, #12]
; CHECK-COMMON-NEXT: stur x8, [x29, #-16]
; CHECK-COMMON-NEXT: sturh wzr, [x29, #-6]
; CHECK-COMMON-NEXT: stur wzr, [x29, #-4]
; CHECK-COMMON-NEXT: mrs x8, TPIDR2_EL0
; CHECK-COMMON-NEXT: cbz x8, .LBB6_2
; CHECK-COMMON-NEXT: b .LBB6_1
Expand Down Expand Up @@ -282,10 +281,9 @@ define double @za_shared_caller_to_za_none_callee(double %x) nounwind noinline
; CHECK-COMMON-NEXT: mov x9, sp
; CHECK-COMMON-NEXT: sub x8, x9, x8
; CHECK-COMMON-NEXT: mov sp, x8
; CHECK-COMMON-NEXT: sub x9, x29, #16
; CHECK-COMMON-NEXT: str x8, [x9]
; CHECK-COMMON-NEXT: strh wzr, [x9, #10]
; CHECK-COMMON-NEXT: str wzr, [x9, #12]
; CHECK-COMMON-NEXT: stur x8, [x29, #-16]
; CHECK-COMMON-NEXT: sturh wzr, [x29, #-6]
; CHECK-COMMON-NEXT: stur wzr, [x29, #-4]
; CHECK-COMMON-NEXT: rdsvl x8, #1
; CHECK-COMMON-NEXT: sturh w8, [x29, #-8]
; CHECK-COMMON-NEXT: sub x8, x29, #16
Expand Down Expand Up @@ -327,14 +325,13 @@ define fp128 @f128_call_za(fp128 %a, fp128 %b) "aarch64_inout_za" nounwind {
; CHECK-COMMON-NEXT: mul x8, x8, x8
; CHECK-COMMON-NEXT: sub x8, x9, x8
; CHECK-COMMON-NEXT: mov sp, x8
; CHECK-COMMON-NEXT: rdsvl x9, #1
; CHECK-COMMON-NEXT: sub x10, x29, #16
; CHECK-COMMON-NEXT: sub x11, x29, #16
; CHECK-COMMON-NEXT: str x8, [x11]
; CHECK-COMMON-NEXT: strh wzr, [x11, #10]
; CHECK-COMMON-NEXT: str wzr, [x11, #12]
; CHECK-COMMON-NEXT: sturh w9, [x29, #-8]
; CHECK-COMMON-NEXT: msr TPIDR2_EL0, x10
; CHECK-COMMON-NEXT: stur x8, [x29, #-16]
; CHECK-COMMON-NEXT: rdsvl x8, #1
; CHECK-COMMON-NEXT: sub x9, x29, #16
; CHECK-COMMON-NEXT: sturh wzr, [x29, #-6]
; CHECK-COMMON-NEXT: stur wzr, [x29, #-4]
; CHECK-COMMON-NEXT: sturh w8, [x29, #-8]
; CHECK-COMMON-NEXT: msr TPIDR2_EL0, x9
; CHECK-COMMON-NEXT: bl __addtf3
; CHECK-COMMON-NEXT: smstart za
; CHECK-COMMON-NEXT: mrs x8, TPIDR2_EL0
Expand Down Expand Up @@ -395,14 +392,13 @@ define double @frem_call_za(double %a, double %b) "aarch64_inout_za" nounwind {
; CHECK-COMMON-NEXT: mul x8, x8, x8
; CHECK-COMMON-NEXT: sub x8, x9, x8
; CHECK-COMMON-NEXT: mov sp, x8
; CHECK-COMMON-NEXT: rdsvl x9, #1
; CHECK-COMMON-NEXT: sub x10, x29, #16
; CHECK-COMMON-NEXT: sub x11, x29, #16
; CHECK-COMMON-NEXT: str x8, [x11]
; CHECK-COMMON-NEXT: strh wzr, [x11, #10]
; CHECK-COMMON-NEXT: str wzr, [x11, #12]
; CHECK-COMMON-NEXT: sturh w9, [x29, #-8]
; CHECK-COMMON-NEXT: msr TPIDR2_EL0, x10
; CHECK-COMMON-NEXT: stur x8, [x29, #-16]
; CHECK-COMMON-NEXT: rdsvl x8, #1
; CHECK-COMMON-NEXT: sub x9, x29, #16
; CHECK-COMMON-NEXT: sturh wzr, [x29, #-6]
; CHECK-COMMON-NEXT: stur wzr, [x29, #-4]
; CHECK-COMMON-NEXT: sturh w8, [x29, #-8]
; CHECK-COMMON-NEXT: msr TPIDR2_EL0, x9
; CHECK-COMMON-NEXT: bl fmod
; CHECK-COMMON-NEXT: smstart za
; CHECK-COMMON-NEXT: mrs x8, TPIDR2_EL0
Expand Down
30 changes: 14 additions & 16 deletions llvm/test/CodeGen/AArch64/sme-shared-za-interface.ll
Original file line number Diff line number Diff line change
Expand Up @@ -15,14 +15,13 @@ define void @disable_tailcallopt() "aarch64_inout_za" nounwind {
; CHECK-NEXT: mul x8, x8, x8
; CHECK-NEXT: sub x8, x9, x8
; CHECK-NEXT: mov sp, x8
; CHECK-NEXT: rdsvl x9, #1
; CHECK-NEXT: sub x10, x29, #16
; CHECK-NEXT: sub x11, x29, #16
; CHECK-NEXT: str x8, [x11]
; CHECK-NEXT: strh wzr, [x11, #10]
; CHECK-NEXT: str wzr, [x11, #12]
; CHECK-NEXT: sturh w9, [x29, #-8]
; CHECK-NEXT: msr TPIDR2_EL0, x10
; CHECK-NEXT: stur x8, [x29, #-16]
; CHECK-NEXT: rdsvl x8, #1
; CHECK-NEXT: sub x9, x29, #16
; CHECK-NEXT: sturh wzr, [x29, #-6]
; CHECK-NEXT: stur wzr, [x29, #-4]
; CHECK-NEXT: sturh w8, [x29, #-8]
; CHECK-NEXT: msr TPIDR2_EL0, x9
; CHECK-NEXT: bl private_za_callee
; CHECK-NEXT: smstart za
; CHECK-NEXT: mrs x8, TPIDR2_EL0
Expand Down Expand Up @@ -51,14 +50,13 @@ define fp128 @f128_call_za(fp128 %a, fp128 %b) "aarch64_inout_za" nounwind {
; CHECK-NEXT: mul x8, x8, x8
; CHECK-NEXT: sub x8, x9, x8
; CHECK-NEXT: mov sp, x8
; CHECK-NEXT: rdsvl x9, #1
; CHECK-NEXT: sub x10, x29, #16
; CHECK-NEXT: sub x11, x29, #16
; CHECK-NEXT: str x8, [x11]
; CHECK-NEXT: strh wzr, [x11, #10]
; CHECK-NEXT: str wzr, [x11, #12]
; CHECK-NEXT: sturh w9, [x29, #-8]
; CHECK-NEXT: msr TPIDR2_EL0, x10
; CHECK-NEXT: stur x8, [x29, #-16]
; CHECK-NEXT: rdsvl x8, #1
; CHECK-NEXT: sub x9, x29, #16
; CHECK-NEXT: sturh wzr, [x29, #-6]
; CHECK-NEXT: stur wzr, [x29, #-4]
; CHECK-NEXT: sturh w8, [x29, #-8]
; CHECK-NEXT: msr TPIDR2_EL0, x9
; CHECK-NEXT: bl __addtf3
; CHECK-NEXT: smstart za
; CHECK-NEXT: mrs x8, TPIDR2_EL0
Expand Down
35 changes: 30 additions & 5 deletions llvm/test/CodeGen/AArch64/sme-za-lazy-save-buffer.ll
Original file line number Diff line number Diff line change
Expand Up @@ -13,18 +13,43 @@ entry:
define float @multi_bb_stpidr2_save_required(i32 %a, float %b, float %c) "aarch64_inout_za" {
; CHECK-LABEL: multi_bb_stpidr2_save_required:
; CHECK: // %bb.0:
; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
; CHECK-NEXT: mov x29, sp
; CHECK-NEXT: sub sp, sp, #16
; CHECK-NEXT: .cfi_def_cfa w29, 16
; CHECK-NEXT: .cfi_offset w30, -8
; CHECK-NEXT: .cfi_offset w29, -16
; CHECK-NEXT: rdsvl x8, #1
; CHECK-NEXT: mov x9, sp
; CHECK-NEXT: mul x8, x8, x8
; CHECK-NEXT: sub x8, x9, x8
; CHECK-NEXT: mov sp, x8
; CHECK-NEXT: sub x9, x29, #16
; CHECK-NEXT: str x8, [x9]
; CHECK-NEXT: strh wzr, [x9, #10]
; CHECK-NEXT: str wzr, [x9, #12]
; CHECK-NEXT: cbz w0, .LBB1_2
; CHECK-NEXT: // %bb.1: // %use_b
; CHECK-NEXT: fmov s1, #4.00000000
; CHECK-NEXT: fadd s0, s0, s1
; CHECK-NEXT: ret
; CHECK-NEXT: b .LBB1_5
; CHECK-NEXT: .LBB1_2: // %use_c
; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: .cfi_offset w30, -16
; CHECK-NEXT: fmov s0, s1
; CHECK-NEXT: rdsvl x8, #1
; CHECK-NEXT: sturh w8, [x29, #-8]
; CHECK-NEXT: msr TPIDR2_EL0, x9
; CHECK-NEXT: bl cosf
; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: smstart za
; CHECK-NEXT: mrs x8, TPIDR2_EL0
; CHECK-NEXT: sub x0, x29, #16
; CHECK-NEXT: cbnz x8, .LBB1_4
; CHECK-NEXT: // %bb.3: // %use_c
; CHECK-NEXT: bl __arm_tpidr2_restore
; CHECK-NEXT: .LBB1_4: // %use_c
; CHECK-NEXT: msr TPIDR2_EL0, xzr
; CHECK-NEXT: .LBB1_5: // %exit
; CHECK-NEXT: mov sp, x29
; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload
; CHECK-NEXT: ret
%cmp = icmp ne i32 %a, 0
br i1 %cmp, label %use_b, label %use_c
Expand Down
15 changes: 7 additions & 8 deletions llvm/test/CodeGen/AArch64/sme-zt0-state.ll
Original file line number Diff line number Diff line change
Expand Up @@ -42,12 +42,12 @@ define void @za_zt0_shared_caller_no_state_callee() "aarch64_inout_za" "aarch64_
; CHECK-NEXT: mul x8, x8, x8
; CHECK-NEXT: sub x8, x9, x8
; CHECK-NEXT: mov sp, x8
; CHECK-NEXT: stur x8, [x29, #-16]
; CHECK-NEXT: rdsvl x8, #1
; CHECK-NEXT: sub x9, x29, #16
; CHECK-NEXT: sub x19, x29, #80
; CHECK-NEXT: str x8, [x9]
; CHECK-NEXT: rdsvl x8, #1
; CHECK-NEXT: strh wzr, [x9, #10]
; CHECK-NEXT: str wzr, [x9, #12]
; CHECK-NEXT: sturh wzr, [x29, #-6]
; CHECK-NEXT: stur wzr, [x29, #-4]
; CHECK-NEXT: sturh w8, [x29, #-8]
; CHECK-NEXT: msr TPIDR2_EL0, x9
; CHECK-NEXT: str zt0, [x19]
Expand Down Expand Up @@ -182,10 +182,9 @@ define void @new_za_zt0_caller() "aarch64_new_za" "aarch64_new_zt0" nounwind {
; CHECK-NEXT: mul x8, x8, x8
; CHECK-NEXT: sub x8, x9, x8
; CHECK-NEXT: mov sp, x8
; CHECK-NEXT: sub x9, x29, #16
; CHECK-NEXT: str x8, [x9]
; CHECK-NEXT: strh wzr, [x9, #10]
; CHECK-NEXT: str wzr, [x9, #12]
; CHECK-NEXT: stur x8, [x29, #-16]
; CHECK-NEXT: sturh wzr, [x29, #-6]
; CHECK-NEXT: stur wzr, [x29, #-4]
; CHECK-NEXT: mrs x8, TPIDR2_EL0
; CHECK-NEXT: cbz x8, .LBB7_2
; CHECK-NEXT: // %bb.1: // %save.za
Expand Down