Skip to content

Commit

Permalink
Revert "[ARM] musttail fixes"
Browse files Browse the repository at this point in the history
committed by accident, see #104795

This reverts commit a2088a2.
  • Loading branch information
kiran-isaac committed Aug 27, 2024
1 parent bc4bedd commit c50d11e
Show file tree
Hide file tree
Showing 10 changed files with 191 additions and 661 deletions.
2 changes: 1 addition & 1 deletion clang/lib/CodeGen/CGCall.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5086,7 +5086,7 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
RawAddress SRetAlloca = RawAddress::invalid();
llvm::Value *UnusedReturnSizePtr = nullptr;
if (RetAI.isIndirect() || RetAI.isInAlloca() || RetAI.isCoerceAndExpand()) {
if ((IsVirtualFunctionPointerThunk || IsMustTail) && RetAI.isIndirect()) {
if (IsVirtualFunctionPointerThunk && RetAI.isIndirect()) {
SRetPtr = makeNaturalAddressForPointer(CurFn->arg_begin() +
IRFunctionArgs.getSRetArgNo(),
RetTy, CharUnits::fromQuantity(1));
Expand Down
2 changes: 0 additions & 2 deletions llvm/include/llvm/CodeGen/CallingConvLower.h
Original file line number Diff line number Diff line change
Expand Up @@ -540,8 +540,6 @@ class CCState {
});
}

void dump() const;

private:
/// MarkAllocated - Mark a register and all of its aliases as allocated.
void MarkAllocated(MCPhysReg Reg);
Expand Down
61 changes: 0 additions & 61 deletions llvm/lib/CodeGen/CallingConvLower.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -290,64 +290,3 @@ bool CCState::resultsCompatible(CallingConv::ID CalleeCC,
return std::equal(RVLocs1.begin(), RVLocs1.end(), RVLocs2.begin(),
RVLocs2.end(), AreCompatible);
}

void CCState::dump() const {
dbgs() << "CCState:\n";
for (const CCValAssign &Loc : Locs) {
if (Loc.isRegLoc()) {
dbgs() << " Reg " << TRI.getName(Loc.getLocReg());
} else if (Loc.isMemLoc()) {
dbgs() << " Mem " << Loc.getLocMemOffset();
} else {
assert(Loc.isPendingLoc());
dbgs() << " Pend " << Loc.getExtraInfo();
}

dbgs() << " ValVT:" << Loc.getValVT();
dbgs() << " LocVT:" << Loc.getLocVT();

if (Loc.needsCustom())
dbgs() << " custom";

switch (Loc.getLocInfo()) {
case CCValAssign::Full:
dbgs() << " Full";
break;
case CCValAssign::SExt:
dbgs() << " SExt";
break;
case CCValAssign::ZExt:
dbgs() << " ZExt";
break;
case CCValAssign::AExt:
dbgs() << " AExt";
break;
case CCValAssign::SExtUpper:
dbgs() << " SExtUpper";
break;
case CCValAssign::ZExtUpper:
dbgs() << " ZExtUpper";
break;
case CCValAssign::AExtUpper:
dbgs() << " AExtUpper";
break;
case CCValAssign::BCvt:
dbgs() << " BCvt";
break;
case CCValAssign::Trunc:
dbgs() << " Trunc";
break;
case CCValAssign::VExt:
dbgs() << " VExt";
break;
case CCValAssign::FPExt:
dbgs() << " FPExt";
break;
case CCValAssign::Indirect:
dbgs() << " Indirect";
break;
}

dbgs() << "\n";
}
}
141 changes: 99 additions & 42 deletions llvm/lib/Target/ARM/ARMISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2407,8 +2407,8 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
isTailCall = false;

// For both the non-secure calls and the returns from a CMSE entry function,
// the function needs to do some extra work after the call, or before the
// return, respectively, thus it cannot end with a tail call
// the function needs to do some extra work afte r the call, or before the
// return, respectively, thus it cannot end with atail call
if (isCmseNSCall || AFI->isCmseNSEntryFunction())
isTailCall = false;

Expand Down Expand Up @@ -2960,6 +2960,50 @@ void ARMTargetLowering::HandleByVal(CCState *State, unsigned &Size,
Size = std::max<int>(Size - Excess, 0);
}

/// MatchingStackOffset - Return true if the given stack call argument is
/// already available in the same position (relatively) of the caller's
/// incoming argument stack.
static
bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags,
MachineFrameInfo &MFI, const MachineRegisterInfo *MRI,
const TargetInstrInfo *TII) {
unsigned Bytes = Arg.getValueSizeInBits() / 8;
int FI = std::numeric_limits<int>::max();
if (Arg.getOpcode() == ISD::CopyFromReg) {
Register VR = cast<RegisterSDNode>(Arg.getOperand(1))->getReg();
if (!VR.isVirtual())
return false;
MachineInstr *Def = MRI->getVRegDef(VR);
if (!Def)
return false;
if (!Flags.isByVal()) {
if (!TII->isLoadFromStackSlot(*Def, FI))
return false;
} else {
return false;
}
} else if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Arg)) {
if (Flags.isByVal())
// ByVal argument is passed in as a pointer but it's now being
// dereferenced. e.g.
// define @foo(%struct.X* %A) {
// tail call @bar(%struct.X* byval %A)
// }
return false;
SDValue Ptr = Ld->getBasePtr();
FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(Ptr);
if (!FINode)
return false;
FI = FINode->getIndex();
} else
return false;

assert(FI != std::numeric_limits<int>::max());
if (!MFI.isFixedObjectIndex(FI))
return false;
return Offset == MFI.getObjectOffset(FI) && Bytes == MFI.getObjectSize(FI);
}

/// IsEligibleForTailCallOptimization - Check whether the call is eligible
/// for tail call optimization. Targets which want to do tail call
/// optimization should implement this function. Note that this function also
Expand Down Expand Up @@ -3001,10 +3045,8 @@ bool ARMTargetLowering::IsEligibleForTailCallOptimization(
for (const CCValAssign &AL : ArgLocs)
if (AL.isRegLoc())
AddressRegisters.erase(AL.getLocReg());
if (AddressRegisters.empty()) {
LLVM_DEBUG(dbgs() << "false (no space for target address)\n");
if (AddressRegisters.empty())
return false;
}
}

// Look for obvious safe cases to perform tail call optimization that do not
Expand All @@ -3013,26 +3055,18 @@ bool ARMTargetLowering::IsEligibleForTailCallOptimization(
// Exception-handling functions need a special set of instructions to indicate
// a return to the hardware. Tail-calling another function would probably
// break this.
if (CallerF.hasFnAttribute("interrupt")) {
LLVM_DEBUG(dbgs() << "false (interrupt attribute)\n");
if (CallerF.hasFnAttribute("interrupt"))
return false;
}

if (canGuaranteeTCO(CalleeCC,
getTargetMachine().Options.GuaranteedTailCallOpt)) {
LLVM_DEBUG(dbgs() << (CalleeCC == CallerCC ? "true" : "false")
<< " (guaranteed tail-call CC)\n");
if (canGuaranteeTCO(CalleeCC, getTargetMachine().Options.GuaranteedTailCallOpt))
return CalleeCC == CallerCC;
}

// Also avoid sibcall optimization if only one of caller or callee uses
// struct return semantics.
// Also avoid sibcall optimization if either caller or callee uses struct
// return semantics.
bool isCalleeStructRet = Outs.empty() ? false : Outs[0].Flags.isSRet();
bool isCallerStructRet = MF.getFunction().hasStructRetAttr();
if (isCalleeStructRet != isCallerStructRet) {
LLVM_DEBUG(dbgs() << "false (struct-ret)\n");
if (isCalleeStructRet || isCallerStructRet)
return false;
}

// Externally-defined functions with weak linkage should not be
// tail-called on ARM when the OS does not support dynamic
Expand All @@ -3045,11 +3079,8 @@ bool ARMTargetLowering::IsEligibleForTailCallOptimization(
const GlobalValue *GV = G->getGlobal();
const Triple &TT = getTargetMachine().getTargetTriple();
if (GV->hasExternalWeakLinkage() &&
(!TT.isOSWindows() || TT.isOSBinFormatELF() ||
TT.isOSBinFormatMachO())) {
LLVM_DEBUG(dbgs() << "false (external weak linkage)\n");
(!TT.isOSWindows() || TT.isOSBinFormatELF() || TT.isOSBinFormatMachO()))
return false;
}
}

// Check that the call results are passed in the same way.
Expand All @@ -3058,44 +3089,70 @@ bool ARMTargetLowering::IsEligibleForTailCallOptimization(
getEffectiveCallingConv(CalleeCC, isVarArg),
getEffectiveCallingConv(CallerCC, CallerF.isVarArg()), MF, C, Ins,
CCAssignFnForReturn(CalleeCC, isVarArg),
CCAssignFnForReturn(CallerCC, CallerF.isVarArg()))) {
LLVM_DEBUG(dbgs() << "false (incompatible results)\n");
CCAssignFnForReturn(CallerCC, CallerF.isVarArg())))
return false;
}
// The callee has to preserve all registers the caller needs to preserve.
const ARMBaseRegisterInfo *TRI = Subtarget->getRegisterInfo();
const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
if (CalleeCC != CallerCC) {
const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved)) {
LLVM_DEBUG(dbgs() << "false (not all registers preserved)\n");
if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
return false;
}
}

// If Caller's vararg argument has been split between registers and
// If Caller's vararg or byval argument has been split between registers and
// stack, do not perform tail call, since part of the argument is in caller's
// local frame.
const ARMFunctionInfo *AFI_Caller = MF.getInfo<ARMFunctionInfo>();
if (CLI.IsVarArg && AFI_Caller->getArgRegsSaveSize()) {
LLVM_DEBUG(dbgs() << "false (vararg arg reg save area)\n");
if (AFI_Caller->getArgRegsSaveSize())
return false;
}

// If the callee takes no arguments then go on to check the results of the
// call.
const MachineRegisterInfo &MRI = MF.getRegInfo();
if (!parametersInCSRMatch(MRI, CallerPreserved, ArgLocs, OutVals)) {
LLVM_DEBUG(dbgs() << "false (parameters in CSRs do not match)\n");
return false;
}
if (!Outs.empty()) {
if (CCInfo.getStackSize()) {
// Check if the arguments are already laid out in the right way as
// the caller's fixed stack objects.
MachineFrameInfo &MFI = MF.getFrameInfo();
const MachineRegisterInfo *MRI = &MF.getRegInfo();
const TargetInstrInfo *TII = Subtarget->getInstrInfo();
for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size();
i != e;
++i, ++realArgIdx) {
CCValAssign &VA = ArgLocs[i];
EVT RegVT = VA.getLocVT();
SDValue Arg = OutVals[realArgIdx];
ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags;
if (VA.getLocInfo() == CCValAssign::Indirect)
return false;
if (VA.needsCustom() && (RegVT == MVT::f64 || RegVT == MVT::v2f64)) {
// f64 and vector types are split into multiple registers or
// register/stack-slot combinations. The types will not match
// the registers; give up on memory f64 refs until we figure
// out what to do about this.
if (!VA.isRegLoc())
return false;
if (!ArgLocs[++i].isRegLoc())
return false;
if (RegVT == MVT::v2f64) {
if (!ArgLocs[++i].isRegLoc())
return false;
if (!ArgLocs[++i].isRegLoc())
return false;
}
} else if (!VA.isRegLoc()) {
if (!MatchingStackOffset(Arg, VA.getLocMemOffset(), Flags,
MFI, MRI, TII))
return false;
}
}
}

// If the stack arguments for this call do not fit into our own save area then
// the call cannot be made tail.
if (CCInfo.getStackSize() > AFI_Caller->getArgumentStackSize())
return false;
const MachineRegisterInfo &MRI = MF.getRegInfo();
if (!parametersInCSRMatch(MRI, CallerPreserved, ArgLocs, OutVals))
return false;
}

LLVM_DEBUG(dbgs() << "true\n");
return true;
}

Expand Down
16 changes: 11 additions & 5 deletions llvm/test/CodeGen/ARM/2013-05-13-AAPCS-byval-padding.ll
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,17 @@ define void @check227(
; arg1 --> SP+188

entry:
; CHECK: sub sp, sp, #12
; CHECK: stm sp, {r1, r2, r3}
; CHECK: ldr r0, [sp, #200]
; CHECK: add sp, sp, #12
; CHECK: b useInt

;CHECK: sub sp, sp, #12
;CHECK: push {r11, lr}
;CHECK: sub sp, sp, #4
;CHECK: add r0, sp, #12
;CHECK: stm r0, {r1, r2, r3}
;CHECK: ldr r0, [sp, #212]
;CHECK: bl useInt
;CHECK: add sp, sp, #4
;CHECK: pop {r11, lr}
;CHECK: add sp, sp, #12

%0 = ptrtoint ptr %arg1 to i32
tail call void @useInt(i32 %0)
Expand Down
13 changes: 8 additions & 5 deletions llvm/test/CodeGen/ARM/2013-05-13-AAPCS-byval-padding2.ll
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,14 @@
define void @foo(ptr byval(%struct4bytes) %p0, ; --> R0
ptr byval(%struct20bytes) %p1 ; --> R1,R2,R3, [SP+0 .. SP+8)
) {
;CHECK: sub sp, sp, #16
;CHECK: stm sp, {r0, r1, r2, r3}
;CHECK: add r0, sp, #4
;CHECK: add sp, sp, #16
;CHECK: b useInt
;CHECK: sub sp, sp, #16
;CHECK: push {r11, lr}
;CHECK: add r12, sp, #8
;CHECK: stm r12, {r0, r1, r2, r3}
;CHECK: add r0, sp, #12
;CHECK: bl useInt
;CHECK: pop {r11, lr}
;CHECK: add sp, sp, #16

%1 = ptrtoint ptr %p1 to i32
tail call void @useInt(i32 %1)
Expand Down
22 changes: 0 additions & 22 deletions llvm/test/CodeGen/ARM/fp-arg-shuffle.ll
Original file line number Diff line number Diff line change
Expand Up @@ -3,28 +3,6 @@
; CHECK: function1
; CHECK-NOT: vmov
define double @function1(double %a, double %b, double %c, double %d, double %e, double %f) nounwind noinline ssp {
; CHECK-LABEL: function1:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r4, r5, r11, lr}
; CHECK-NEXT: push {r4, r5, r11, lr}
; CHECK-NEXT: vldr d16, [sp, #40]
; CHECK-NEXT: vldr d17, [sp, #32]
; CHECK-NEXT: vmov r12, lr, d16
; CHECK-NEXT: vldr d16, [sp, #16]
; CHECK-NEXT: vmov r4, r5, d17
; CHECK-NEXT: vldr d17, [sp, #24]
; CHECK-NEXT: str r3, [sp, #36]
; CHECK-NEXT: str r2, [sp, #32]
; CHECK-NEXT: str r1, [sp, #44]
; CHECK-NEXT: str r0, [sp, #40]
; CHECK-NEXT: vstr d17, [sp, #16]
; CHECK-NEXT: vstr d16, [sp, #24]
; CHECK-NEXT: mov r0, r12
; CHECK-NEXT: mov r1, lr
; CHECK-NEXT: mov r2, r4
; CHECK-NEXT: mov r3, r5
; CHECK-NEXT: pop {r4, r5, r11, lr}
; CHECK-NEXT: b function2
entry:
%call = tail call double @function2(double %f, double %e, double %d, double %c, double %b, double %a) nounwind
ret double %call
Expand Down
Loading

0 comments on commit c50d11e

Please sign in to comment.