diff --git a/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp b/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp index 03f0778bae59d5e..69199e81c1c6dee 100644 --- a/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp +++ b/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp @@ -1121,6 +1121,22 @@ bool AArch64ExpandPseudo::expandMI(MachineBasicBlock &MBB, default: break; + case AArch64::STACKALLOC: { + Register Dest = MI.getOperand(0).getReg(); + Register Src = MI.getOperand(1).getReg(); + Register SPCopy = MI.getOperand(2).getReg(); + BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::SUBXrs), Dest) + .addReg(SPCopy) + .add(MI.getOperand(1)) + .addImm(0); + BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ADDXri)) + .addReg(AArch64::SP, RegState::Define) + .addReg(Dest) + .addImm(0) + .addImm(0); + MI.eraseFromParent(); + return true; + } case AArch64::BSPv8i8: case AArch64::BSPv16i8: { Register DstReg = MI.getOperand(0).getReg(); diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 82f9190983c37e6..39d5a26ae485e0e 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -2959,22 +2959,24 @@ AArch64TargetLowering::EmitExpandZABuffer(MachineInstr &MI, BuildMI(*BB, MI, MI.getDebugLoc(), TII->get(AArch64::RDSVLI_XI), RDSVL) .addImm(1); - Register SP = MRI.createVirtualRegister(&AArch64::GPR64RegClass); - BuildMI(*BB, MI, MI.getDebugLoc(), TII->get(TargetOpcode::COPY), SP) - .addReg(AArch64::SP); - - // Allocate a lazy-save buffer object of size SVL.B * SVL.B (worst-case) - Register MSub = MRI.createVirtualRegister(&AArch64::GPR64RegClass); - BuildMI(*BB, MI, MI.getDebugLoc(), TII->get(AArch64::MSUBXrrr), MSub) + // Allocate the ZA buffer + Register BufferSize = MRI.createVirtualRegister(&AArch64::GPR64RegClass); + BuildMI(*BB, MI, MI.getDebugLoc(), TII->get(AArch64::MADDXrrr), BufferSize) .addReg(RDSVL) .addReg(RDSVL) - .addReg(SP); - BuildMI(*BB, MI, MI.getDebugLoc(), TII->get(TargetOpcode::COPY), AArch64::SP) - .addReg(MSub); + .addReg(AArch64::XZR); + Register BufferAddr = MRI.createVirtualRegister(&AArch64::GPR64RegClass); + Register SPCopy = MRI.createVirtualRegister(&AArch64::GPR64RegClass); + BuildMI(*BB, MI, MI.getDebugLoc(), TII->get(TargetOpcode::COPY), SPCopy) + .addReg(AArch64::SP); + BuildMI(*BB, MI, MI.getDebugLoc(), TII->get(AArch64::STACKALLOC), BufferAddr) + .addReg(BufferSize) + .addReg(SPCopy); + MFI.CreateVariableSizedObject(Align(16), nullptr); + + // expand pseudo in expand pass or remove pseudo and remove stack object - // Allocate an additional TPIDR2 object on the stack (16 bytes) unsigned TPIDR2Object = TPIDR2->FrameIndex; - MFI.CreateVariableSizedObject(Align(16), nullptr); Register Zero32 = MRI.createVirtualRegister(&AArch64::GPR32RegClass); MachineInstrBuilder Wzr = @@ -2983,7 +2985,7 @@ AArch64TargetLowering::EmitExpandZABuffer(MachineInstr &MI, // Store the buffer pointer to the TPIDR2 stack object. BuildMI(*BB, MI, MI.getDebugLoc(), TII->get(AArch64::STRXui)) - .addReg(MSub) + .addReg(BufferAddr) .addFrameIndex(TPIDR2Object) .addImm(0); // Set the reserved bytes (10-15) to zero diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td index b1f514f75207f00..2ad25e041edb162 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -980,7 +980,11 @@ include "SMEInstrFormats.td" //===----------------------------------------------------------------------===// let hasSideEffects = 1, isCodeGenOnly = 1 in { -let Defs = [SP], Uses = [SP] in { +let Defs = [SP] in { + +def STACKALLOC : Pseudo<(outs GPR64:$addr), (ins GPR64:$size, GPR64:$sp), []>, Sched<[]>; + +let Uses = [SP] in { // We set Sched to empty list because we expect these instructions to simply get // removed in most cases. def ADJCALLSTACKDOWN : Pseudo<(outs), (ins i32imm:$amt1, i32imm:$amt2), @@ -991,6 +995,7 @@ def ADJCALLSTACKUP : Pseudo<(outs), (ins i32imm:$amt1, i32imm:$amt2), Sched<[]>; } +} let Defs = [SP, NZCV], Uses = [SP] in { // Probed stack allocation of a constant size, used in function prologues when diff --git a/llvm/test/CodeGen/AArch64/sme-disable-gisel-fisel.ll b/llvm/test/CodeGen/AArch64/sme-disable-gisel-fisel.ll index 694bc6d0bd93776..eb2e346873b94a3 100644 --- a/llvm/test/CodeGen/AArch64/sme-disable-gisel-fisel.ll +++ b/llvm/test/CodeGen/AArch64/sme-disable-gisel-fisel.ll @@ -218,8 +218,9 @@ define double @za_new_caller_to_za_shared_callee(double %x) nounwind noinline o ; CHECK-COMMON-NEXT: mov x29, sp ; CHECK-COMMON-NEXT: sub sp, sp, #16 ; CHECK-COMMON-NEXT: rdsvl x8, #1 +; CHECK-COMMON-NEXT: mul x8, x8, x8 ; CHECK-COMMON-NEXT: mov x9, sp -; CHECK-COMMON-NEXT: msub x8, x8, x8, x9 +; CHECK-COMMON-NEXT: sub x8, x9, x8 ; CHECK-COMMON-NEXT: mov sp, x8 ; CHECK-COMMON-NEXT: stur x8, [x29, #-16] ; CHECK-COMMON-NEXT: sturh wzr, [x29, #-6] @@ -255,8 +256,9 @@ define double @za_shared_caller_to_za_none_callee(double %x) nounwind noinline ; CHECK-COMMON-NEXT: mov x29, sp ; CHECK-COMMON-NEXT: sub sp, sp, #16 ; CHECK-COMMON-NEXT: rdsvl x8, #1 +; CHECK-COMMON-NEXT: mul x8, x8, x8 ; CHECK-COMMON-NEXT: mov x9, sp -; CHECK-COMMON-NEXT: msub x8, x8, x8, x9 +; CHECK-COMMON-NEXT: sub x8, x9, x8 ; CHECK-COMMON-NEXT: mov sp, x8 ; CHECK-COMMON-NEXT: stur x8, [x29, #-16] ; CHECK-COMMON-NEXT: sturh wzr, [x29, #-6] @@ -297,7 +299,8 @@ define fp128 @f128_call_za(fp128 %a, fp128 %b) "aarch64_inout_za" nounwind { ; CHECK-COMMON-NEXT: sub sp, sp, #16 ; CHECK-COMMON-NEXT: rdsvl x8, #1 ; CHECK-COMMON-NEXT: mov x9, sp -; CHECK-COMMON-NEXT: msub x8, x8, x8, x9 +; CHECK-COMMON-NEXT: mul x8, x8, x8 +; CHECK-COMMON-NEXT: sub x8, x9, x8 ; CHECK-COMMON-NEXT: mov sp, x8 ; CHECK-COMMON-NEXT: stur x8, [x29, #-16] ; CHECK-COMMON-NEXT: rdsvl x8, #1 @@ -360,7 +363,8 @@ define double @frem_call_za(double %a, double %b) "aarch64_inout_za" nounwind { ; CHECK-COMMON-NEXT: sub sp, sp, #16 ; CHECK-COMMON-NEXT: rdsvl x8, #1 ; CHECK-COMMON-NEXT: mov x9, sp -; CHECK-COMMON-NEXT: msub x8, x8, x8, x9 +; CHECK-COMMON-NEXT: mul x8, x8, x8 +; CHECK-COMMON-NEXT: sub x8, x9, x8 ; CHECK-COMMON-NEXT: mov sp, x8 ; CHECK-COMMON-NEXT: stur x8, [x29, #-16] ; CHECK-COMMON-NEXT: rdsvl x8, #1 diff --git a/llvm/test/CodeGen/AArch64/sme-lazy-save-call.ll b/llvm/test/CodeGen/AArch64/sme-lazy-save-call.ll index 4eb21ed70dd0fb1..9d24708577c134a 100644 --- a/llvm/test/CodeGen/AArch64/sme-lazy-save-call.ll +++ b/llvm/test/CodeGen/AArch64/sme-lazy-save-call.ll @@ -13,7 +13,8 @@ define void @test_lazy_save_1_callee() nounwind "aarch64_inout_za" { ; CHECK-NEXT: sub sp, sp, #16 ; CHECK-NEXT: rdsvl x8, #1 ; CHECK-NEXT: mov x9, sp -; CHECK-NEXT: msub x8, x8, x8, x9 +; CHECK-NEXT: mul x8, x8, x8 +; CHECK-NEXT: sub x8, x9, x8 ; CHECK-NEXT: mov sp, x8 ; CHECK-NEXT: stur x8, [x29, #-16] ; CHECK-NEXT: rdsvl x8, #1 @@ -48,7 +49,8 @@ define void @test_lazy_save_2_callees() nounwind "aarch64_inout_za" { ; CHECK-NEXT: sub sp, sp, #16 ; CHECK-NEXT: rdsvl x8, #1 ; CHECK-NEXT: mov x9, sp -; CHECK-NEXT: msub x8, x8, x8, x9 +; CHECK-NEXT: mul x8, x8, x8 +; CHECK-NEXT: sub x8, x9, x8 ; CHECK-NEXT: mov sp, x8 ; CHECK-NEXT: rdsvl x19, #1 ; CHECK-NEXT: sub x20, x29, #16 @@ -95,7 +97,8 @@ define float @test_lazy_save_expanded_intrinsic(float %a) nounwind "aarch64_inou ; CHECK-NEXT: sub sp, sp, #16 ; CHECK-NEXT: rdsvl x8, #1 ; CHECK-NEXT: mov x9, sp -; CHECK-NEXT: msub x8, x8, x8, x9 +; CHECK-NEXT: mul x8, x8, x8 +; CHECK-NEXT: sub x8, x9, x8 ; CHECK-NEXT: mov sp, x8 ; CHECK-NEXT: stur x8, [x29, #-16] ; CHECK-NEXT: rdsvl x8, #1 @@ -134,7 +137,8 @@ define void @test_lazy_save_and_conditional_smstart() nounwind "aarch64_inout_za ; CHECK-NEXT: sub sp, sp, #16 ; CHECK-NEXT: rdsvl x8, #1 ; CHECK-NEXT: mov x9, sp -; CHECK-NEXT: msub x8, x8, x8, x9 +; CHECK-NEXT: mul x8, x8, x8 +; CHECK-NEXT: sub x8, x9, x8 ; CHECK-NEXT: mov sp, x8 ; CHECK-NEXT: stur x8, [x29, #-80] ; CHECK-NEXT: rdsvl x8, #1 diff --git a/llvm/test/CodeGen/AArch64/sme-shared-za-interface.ll b/llvm/test/CodeGen/AArch64/sme-shared-za-interface.ll index 46672c364b73d82..03b49c39a4539e6 100644 --- a/llvm/test/CodeGen/AArch64/sme-shared-za-interface.ll +++ b/llvm/test/CodeGen/AArch64/sme-shared-za-interface.ll @@ -12,7 +12,8 @@ define void @disable_tailcallopt() "aarch64_inout_za" nounwind { ; CHECK-NEXT: sub sp, sp, #16 ; CHECK-NEXT: rdsvl x8, #1 ; CHECK-NEXT: mov x9, sp -; CHECK-NEXT: msub x8, x8, x8, x9 +; CHECK-NEXT: mul x8, x8, x8 +; CHECK-NEXT: sub x8, x9, x8 ; CHECK-NEXT: mov sp, x8 ; CHECK-NEXT: stur x8, [x29, #-16] ; CHECK-NEXT: rdsvl x8, #1 @@ -46,7 +47,8 @@ define fp128 @f128_call_za(fp128 %a, fp128 %b) "aarch64_inout_za" nounwind { ; CHECK-NEXT: sub sp, sp, #16 ; CHECK-NEXT: rdsvl x8, #1 ; CHECK-NEXT: mov x9, sp -; CHECK-NEXT: msub x8, x8, x8, x9 +; CHECK-NEXT: mul x8, x8, x8 +; CHECK-NEXT: sub x8, x9, x8 ; CHECK-NEXT: mov sp, x8 ; CHECK-NEXT: stur x8, [x29, #-16] ; CHECK-NEXT: rdsvl x8, #1 diff --git a/llvm/test/CodeGen/AArch64/sme-za-lazy-save-buffer.ll b/llvm/test/CodeGen/AArch64/sme-za-lazy-save-buffer.ll index aaf11bf2ba64a6b..26bacd72ffa47eb 100644 --- a/llvm/test/CodeGen/AArch64/sme-za-lazy-save-buffer.ll +++ b/llvm/test/CodeGen/AArch64/sme-za-lazy-save-buffer.ll @@ -21,7 +21,8 @@ define float @multi_bb_stpidr2_save_required(i32 %a, float %b, float %c) "aarch6 ; CHECK-NEXT: .cfi_offset w29, -16 ; CHECK-NEXT: rdsvl x8, #1 ; CHECK-NEXT: mov x9, sp -; CHECK-NEXT: msub x8, x8, x8, x9 +; CHECK-NEXT: mul x8, x8, x8 +; CHECK-NEXT: sub x8, x9, x8 ; CHECK-NEXT: mov sp, x8 ; CHECK-NEXT: stur x8, [x29, #-16] ; CHECK-NEXT: sturh wzr, [x29, #-6] diff --git a/llvm/test/CodeGen/AArch64/sme-zt0-state.ll b/llvm/test/CodeGen/AArch64/sme-zt0-state.ll index cbbfb4a7ca7a687..f810054eac8315c 100644 --- a/llvm/test/CodeGen/AArch64/sme-zt0-state.ll +++ b/llvm/test/CodeGen/AArch64/sme-zt0-state.ll @@ -39,7 +39,8 @@ define void @za_zt0_shared_caller_no_state_callee() "aarch64_inout_za" "aarch64_ ; CHECK-NEXT: sub sp, sp, #80 ; CHECK-NEXT: rdsvl x8, #1 ; CHECK-NEXT: mov x9, sp -; CHECK-NEXT: msub x8, x8, x8, x9 +; CHECK-NEXT: mul x8, x8, x8 +; CHECK-NEXT: sub x8, x9, x8 ; CHECK-NEXT: mov sp, x8 ; CHECK-NEXT: stur x8, [x29, #-16] ; CHECK-NEXT: rdsvl x8, #1 @@ -178,7 +179,8 @@ define void @new_za_zt0_caller() "aarch64_new_za" "aarch64_new_zt0" nounwind { ; CHECK-NEXT: sub sp, sp, #80 ; CHECK-NEXT: rdsvl x8, #1 ; CHECK-NEXT: mov x9, sp -; CHECK-NEXT: msub x8, x8, x8, x9 +; CHECK-NEXT: mul x8, x8, x8 +; CHECK-NEXT: sub x8, x9, x8 ; CHECK-NEXT: mov sp, x8 ; CHECK-NEXT: stur x8, [x29, #-16] ; CHECK-NEXT: sturh wzr, [x29, #-6]