Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[SPARC] Allow overaligned allocas #107223

Merged
merged 8 commits into from
Nov 3, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions compiler-rt/test/asan/TestCases/alloca_vla_interact.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,9 @@
//
// REQUIRES: stable-runtime

// See https://github.com/llvm/llvm-project/issues/110956
// XFAIL: target=sparc{{.*}}

// This testcase checks correct interaction between VLAs and allocas.

#include <assert.h>
Expand Down
42 changes: 3 additions & 39 deletions llvm/lib/Target/Sparc/SparcFrameLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,8 @@ DisableLeafProc("disable-sparc-leaf-proc",
SparcFrameLowering::SparcFrameLowering(const SparcSubtarget &ST)
: TargetFrameLowering(TargetFrameLowering::StackGrowsDown,
ST.is64Bit() ? Align(16) : Align(8), 0,
ST.is64Bit() ? Align(16) : Align(8)) {}
ST.is64Bit() ? Align(16) : Align(8),
/*StackRealignable=*/false) {}

void SparcFrameLowering::emitSPAdjustment(MachineFunction &MF,
MachineBasicBlock &MBB,
Expand Down Expand Up @@ -97,12 +98,6 @@ void SparcFrameLowering::emitPrologue(MachineFunction &MF,
// Debug location must be unknown since the first debug location is used
// to determine the end of the prologue.
DebugLoc dl;
bool NeedsStackRealignment = RegInfo.shouldRealignStack(MF);
s-barannikov marked this conversation as resolved.
Show resolved Hide resolved

if (NeedsStackRealignment && !RegInfo.canRealignStack(MF))
report_fatal_error("Function \"" + Twine(MF.getName()) + "\" required "
"stack re-alignment, but LLVM couldn't handle it "
"(probably because it has a dynamic alloca).");

// Get the number of bytes to allocate from the FrameInfo
int NumBytes = (int) MFI.getStackSize();
Expand Down Expand Up @@ -168,31 +163,6 @@ void SparcFrameLowering::emitPrologue(MachineFunction &MF,
MCCFIInstruction::createRegister(nullptr, regOutRA, regInRA));
BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
.addCFIIndex(CFIIndex);

if (NeedsStackRealignment) {
int64_t Bias = Subtarget.getStackPointerBias();
unsigned regUnbiased;
if (Bias) {
// This clobbers G1 which we always know is available here.
regUnbiased = SP::G1;
// add %o6, BIAS, %g1
BuildMI(MBB, MBBI, dl, TII.get(SP::ADDri), regUnbiased)
.addReg(SP::O6).addImm(Bias);
} else
regUnbiased = SP::O6;

// andn %regUnbiased, MaxAlign-1, %regUnbiased
Align MaxAlign = MFI.getMaxAlign();
BuildMI(MBB, MBBI, dl, TII.get(SP::ANDNri), regUnbiased)
.addReg(regUnbiased)
.addImm(MaxAlign.value() - 1U);

if (Bias) {
// add %g1, -BIAS, %o6
BuildMI(MBB, MBBI, dl, TII.get(SP::ADDri), SP::O6)
.addReg(regUnbiased).addImm(-Bias);
}
}
}

MachineBasicBlock::iterator SparcFrameLowering::
Expand Down Expand Up @@ -257,8 +227,7 @@ bool SparcFrameLowering::hasFP(const MachineFunction &MF) const {

const MachineFrameInfo &MFI = MF.getFrameInfo();
return MF.getTarget().Options.DisableFramePointerElim(MF) ||
RegInfo->hasStackRealignment(MF) || MFI.hasVarSizedObjects() ||
MFI.isFrameAddressTaken();
MFI.hasVarSizedObjects() || MFI.isFrameAddressTaken();
}

StackOffset
Expand All @@ -284,11 +253,6 @@ SparcFrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI,
} else if (isFixed) {
// Otherwise, argument access should always use %fp.
UseFP = true;
} else if (RegInfo->hasStackRealignment(MF)) {
// If there is dynamic stack realignment, all local object
// references need to be via %sp, to take account of the
// re-alignment.
UseFP = false;
} else {
// Finally, default to using %fp.
UseFP = true;
Expand Down
53 changes: 30 additions & 23 deletions llvm/lib/Target/Sparc/SparcISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2762,22 +2762,16 @@ static SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) {

static SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG,
const SparcSubtarget *Subtarget) {
SDValue Chain = Op.getOperand(0); // Legalize the chain.
SDValue Size = Op.getOperand(1); // Legalize the size.
MaybeAlign Alignment =
cast<ConstantSDNode>(Op.getOperand(2))->getMaybeAlignValue();
Align StackAlign = Subtarget->getFrameLowering()->getStackAlign();
SDValue Chain = Op.getOperand(0);
SDValue Size = Op.getOperand(1);
SDValue Alignment = Op.getOperand(2);
MaybeAlign MaybeAlignment =
cast<ConstantSDNode>(Alignment)->getMaybeAlignValue();
EVT VT = Size->getValueType(0);
SDLoc dl(Op);

// TODO: implement over-aligned alloca. (Note: also implies
// supporting support for overaligned function frames + dynamic
// allocations, at all, which currently isn't supported)
if (Alignment && *Alignment > StackAlign) {
const MachineFunction &MF = DAG.getMachineFunction();
report_fatal_error("Function \"" + Twine(MF.getName()) + "\": "
"over-aligned dynamic alloca not supported.");
}
unsigned SPReg = SP::O6;
SDValue SP = DAG.getCopyFromReg(Chain, dl, SPReg, VT);

// The resultant pointer needs to be above the register spill area
// at the bottom of the stack.
Expand Down Expand Up @@ -2811,16 +2805,29 @@ static SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG,
regSpillArea = 96;
s-barannikov marked this conversation as resolved.
Show resolved Hide resolved
}

unsigned SPReg = SP::O6;
SDValue SP = DAG.getCopyFromReg(Chain, dl, SPReg, VT);
SDValue NewSP = DAG.getNode(ISD::SUB, dl, VT, SP, Size); // Value
Chain = DAG.getCopyToReg(SP.getValue(1), dl, SPReg, NewSP); // Output chain

regSpillArea += Subtarget->getStackPointerBias();

SDValue NewVal = DAG.getNode(ISD::ADD, dl, VT, NewSP,
DAG.getConstant(regSpillArea, dl, VT));
SDValue Ops[2] = { NewVal, Chain };
int64_t Bias = Subtarget->getStackPointerBias();

// Debias and increment SP past the reserved spill area.
// We need the SP to point to the first usable region before calculating
// anything to prevent any of the pointers from becoming out of alignment when
// we rebias the SP later on.
SDValue StartOfUsableStack = DAG.getNode(
ISD::ADD, dl, VT, SP, DAG.getConstant(regSpillArea + Bias, dl, VT));
SDValue AllocatedPtr =
DAG.getNode(ISD::SUB, dl, VT, StartOfUsableStack, Size);

bool IsOveraligned = MaybeAlignment.has_value();
SDValue AlignedPtr =
IsOveraligned
? DAG.getNode(ISD::AND, dl, VT, AllocatedPtr,
DAG.getConstant(-MaybeAlignment->value(), dl, VT))
: AllocatedPtr;

// Now that we are done, restore the bias and reserved spill area.
SDValue NewSP = DAG.getNode(ISD::SUB, dl, VT, AlignedPtr,
DAG.getConstant(regSpillArea + Bias, dl, VT));
Chain = DAG.getCopyToReg(SP.getValue(1), dl, SPReg, NewSP);
SDValue Ops[2] = {AlignedPtr, Chain};
return DAG.getMergeValues(Ops, dl);
}

Expand Down
23 changes: 0 additions & 23 deletions llvm/lib/Target/Sparc/SparcRegisterInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -226,26 +226,3 @@ SparcRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
Register SparcRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
return SP::I6;
}

// Sparc has no architectural need for stack realignment support,
// except that LLVM unfortunately currently implements overaligned
// stack objects by depending upon stack realignment support.
// If that ever changes, this can probably be deleted.
bool SparcRegisterInfo::canRealignStack(const MachineFunction &MF) const {
if (!TargetRegisterInfo::canRealignStack(MF))
return false;

// Sparc always has a fixed frame pointer register, so don't need to
// worry about needing to reserve it. [even if we don't have a frame
// pointer for our frame, it still cannot be used for other things,
// or register window traps will be SADNESS.]

// If there's a reserved call frame, we can use SP to access locals.
if (getFrameLowering(MF)->hasReservedCallFrame(MF))
return true;

// Otherwise, we'd need a base pointer, but those aren't implemented
// for SPARC at the moment.

return false;
}
3 changes: 0 additions & 3 deletions llvm/lib/Target/Sparc/SparcRegisterInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -40,9 +40,6 @@ struct SparcRegisterInfo : public SparcGenRegisterInfo {
RegScavenger *RS = nullptr) const override;

Register getFrameRegister(const MachineFunction &MF) const override;

bool canRealignStack(const MachineFunction &MF) const override;

};

} // end namespace llvm
Expand Down
3 changes: 0 additions & 3 deletions llvm/test/CodeGen/Generic/ForceStackAlign.ll
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,6 @@
; CHECK-LABEL: @f
; CHECK-LABEL: @g

; Stack realignment not supported.
; XFAIL: target=sparc{{.*}}

; NVPTX can only select dynamic_stackalloc on sm_52+ and with ptx73+
; XFAIL: target=nvptx{{.*}}

Expand Down
14 changes: 3 additions & 11 deletions llvm/test/CodeGen/SPARC/2013-05-17-CallFrame.ll
Original file line number Diff line number Diff line change
Expand Up @@ -5,14 +5,10 @@
; (this should ideally be doing "add 4+7; and -8", instead of
; "add 7; and -8; add 8"; see comments in LowerDYNAMIC_STACKALLOC)

define void @variable_alloca_with_adj_call_stack(i32 %num) {
define void @variable_alloca_with_adj_call_stack(i32 %num) nounwind {
; V8-LABEL: variable_alloca_with_adj_call_stack:
; V8: .cfi_startproc
; V8-NEXT: ! %bb.0: ! %entry
; V8: ! %bb.0: ! %entry
; V8-NEXT: save %sp, -96, %sp
; V8-NEXT: .cfi_def_cfa_register %fp
; V8-NEXT: .cfi_window_save
; V8-NEXT: .cfi_register %o7, %i7
; V8-NEXT: add %i0, 7, %i0
; V8-NEXT: and %i0, -8, %i0
; V8-NEXT: sub %sp, %i0, %i0
Expand All @@ -34,12 +30,8 @@ define void @variable_alloca_with_adj_call_stack(i32 %num) {
; V8-NEXT: restore
;
; SPARC64-LABEL: variable_alloca_with_adj_call_stack:
; SPARC64: .cfi_startproc
; SPARC64-NEXT: ! %bb.0: ! %entry
; SPARC64: ! %bb.0: ! %entry
; SPARC64-NEXT: save %sp, -128, %sp
; SPARC64-NEXT: .cfi_def_cfa_register %fp
; SPARC64-NEXT: .cfi_window_save
; SPARC64-NEXT: .cfi_register %o7, %i7
; SPARC64-NEXT: srl %i0, 0, %i0
; SPARC64-NEXT: add %i0, 15, %i0
; SPARC64-NEXT: sethi 4194303, %i1
Expand Down
93 changes: 93 additions & 0 deletions llvm/test/CodeGen/SPARC/alloca-align.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc -march=sparc < %s | FileCheck %s --check-prefixes=CHECK32
; RUN: llc -march=sparcv9 < %s | FileCheck %s --check-prefixes=CHECK64

define void @variable_alloca_with_overalignment(i32 %num) nounwind {
; CHECK32-LABEL: variable_alloca_with_overalignment:
; CHECK32: ! %bb.0:
; CHECK32-NEXT: save %sp, -96, %sp
; CHECK32-NEXT: add %sp, 80, %i1
; CHECK32-NEXT: and %i1, -64, %o0
; CHECK32-NEXT: add %o0, -96, %sp
; CHECK32-NEXT: add %i0, 7, %i0
; CHECK32-NEXT: and %i0, -8, %i0
; CHECK32-NEXT: sub %sp, %i0, %i0
; CHECK32-NEXT: add %i0, -8, %sp
; CHECK32-NEXT: call foo
; CHECK32-NEXT: add %i0, 88, %o1
; CHECK32-NEXT: ret
; CHECK32-NEXT: restore
;
; CHECK64-LABEL: variable_alloca_with_overalignment:
; CHECK64: ! %bb.0:
; CHECK64-NEXT: save %sp, -128, %sp
; CHECK64-NEXT: add %sp, 2159, %i1
; CHECK64-NEXT: and %i1, -64, %o0
; CHECK64-NEXT: add %o0, -2175, %sp
; CHECK64-NEXT: srl %i0, 0, %i0
; CHECK64-NEXT: add %i0, 15, %i0
; CHECK64-NEXT: sethi 4194303, %i1
; CHECK64-NEXT: or %i1, 1008, %i1
; CHECK64-NEXT: sethi 0, %i2
; CHECK64-NEXT: or %i2, 1, %i2
; CHECK64-NEXT: sllx %i2, 32, %i2
; CHECK64-NEXT: or %i2, %i1, %i1
; CHECK64-NEXT: and %i0, %i1, %i0
; CHECK64-NEXT: sub %sp, %i0, %i0
; CHECK64-NEXT: add %i0, 2175, %o1
; CHECK64-NEXT: mov %i0, %sp
; CHECK64-NEXT: call foo
; CHECK64-NEXT: add %sp, -48, %sp
; CHECK64-NEXT: add %sp, 48, %sp
; CHECK64-NEXT: ret
; CHECK64-NEXT: restore
%aligned = alloca i32, align 64
%var_size = alloca i8, i32 %num, align 4
call void @foo(ptr %aligned, ptr %var_size)
ret void
}

;; Same but with the alloca itself overaligned
define void @variable_alloca_with_overalignment_2(i32 %num) nounwind {
; CHECK32-LABEL: variable_alloca_with_overalignment_2:
; CHECK32: ! %bb.0:
; CHECK32-NEXT: save %sp, -96, %sp
; CHECK32-NEXT: add %i0, 7, %i0
; CHECK32-NEXT: and %i0, -8, %i0
; CHECK32-NEXT: sub %sp, %i0, %i0
; CHECK32-NEXT: add %i0, 88, %i0
; CHECK32-NEXT: and %i0, -64, %o1
; CHECK32-NEXT: add %o1, -96, %sp
; CHECK32-NEXT: call foo
; CHECK32-NEXT: mov %g0, %o0
; CHECK32-NEXT: ret
; CHECK32-NEXT: restore
;
; CHECK64-LABEL: variable_alloca_with_overalignment_2:
; CHECK64: ! %bb.0:
; CHECK64-NEXT: save %sp, -128, %sp
; CHECK64-NEXT: srl %i0, 0, %i0
; CHECK64-NEXT: add %i0, 15, %i0
; CHECK64-NEXT: sethi 4194303, %i1
; CHECK64-NEXT: or %i1, 1008, %i1
; CHECK64-NEXT: sethi 0, %i2
; CHECK64-NEXT: or %i2, 1, %i2
; CHECK64-NEXT: sllx %i2, 32, %i2
; CHECK64-NEXT: or %i2, %i1, %i1
; CHECK64-NEXT: and %i0, %i1, %i0
; CHECK64-NEXT: sub %sp, %i0, %i0
; CHECK64-NEXT: add %i0, 2175, %i0
; CHECK64-NEXT: and %i0, -64, %o1
; CHECK64-NEXT: add %o1, -2175, %sp
; CHECK64-NEXT: add %sp, -48, %sp
; CHECK64-NEXT: call foo
; CHECK64-NEXT: mov %g0, %o0
; CHECK64-NEXT: add %sp, 48, %sp
; CHECK64-NEXT: ret
; CHECK64-NEXT: restore
%var_size = alloca i8, i32 %num, align 64
call void @foo(ptr null, ptr %var_size)
ret void
}

declare void @foo(ptr, ptr);
23 changes: 0 additions & 23 deletions llvm/test/CodeGen/SPARC/fail-alloca-align.ll

This file was deleted.

16 changes: 4 additions & 12 deletions llvm/test/CodeGen/SPARC/fp128.ll
Original file line number Diff line number Diff line change
Expand Up @@ -54,18 +54,10 @@ entry:

; CHECK-LABEL: f128_spill_large:
; CHECK: sethi 4, %g1
; CHECK: sethi 4, %g1
; CHECK-NEXT: add %g1, %sp, %g1
; CHECK-NEXT: std %f{{.+}}, [%g1]
; CHECK: sethi 4, %g1
; CHECK-NEXT: add %g1, %sp, %g1
; CHECK-NEXT: std %f{{.+}}, [%g1+8]
; CHECK: sethi 4, %g1
; CHECK-NEXT: add %g1, %sp, %g1
; CHECK-NEXT: ldd [%g1], %f{{.+}}
; CHECK: sethi 4, %g1
; CHECK-NEXT: add %g1, %sp, %g1
; CHECK-NEXT: ldd [%g1+8], %f{{.+}}
; CHECK: std %f{{.+}}, [%fp+-16]
; CHECK-NEXT: std %f{{.+}}, [%fp+-8]
; CHECK: ldd [%fp+-16], %f{{.+}}
; CHECK-NEXT: ldd [%fp+-8], %f{{.+}}

define void @f128_spill_large(ptr noalias sret(<251 x fp128>) %scalar.result, ptr byval(<251 x fp128>) %a) {
entry:
Expand Down
Loading
Loading