Skip to content

Commit

Permalink
Re-land: [ARM] Fix frame chains with M-profile PACBTI (llvm#110285)
Browse files Browse the repository at this point in the history
When using AAPCS-compliant frame chains with PACBTI return address
signing, there ware a number of bugs in the generation of the frame
pointer and function prologues. The most obvious was that we sometimes
would modify r11 before pushing it to the stack, so it wasn't preserved
as required by the PCS. We also sometimes did not push R11 and LR
adjacent to one another on the stack, or used R11 as a frame pointer
without pointing it at the saved value of R11, both of which are
required to have an AAPCS compliant frame chain.

The original work of this patch was done by James Westwood, reviewed as
 llvm#82801 and llvm#81249, with some tidy-ups done by Mark Murray and myself.
  • Loading branch information
ostannard committed Oct 24, 2024
1 parent 3a2c957 commit 493529f
Show file tree
Hide file tree
Showing 6 changed files with 281 additions and 57 deletions.
5 changes: 4 additions & 1 deletion llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -116,9 +116,12 @@ ARMBaseRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
return CSR_iOS_SaveList;

if (PushPopSplit == ARMSubtarget::SplitR7)
return STI.createAAPCSFrameChain() ? CSR_AAPCS_SplitPush_SaveList
return STI.createAAPCSFrameChain() ? CSR_AAPCS_SplitPush_R7_SaveList
: CSR_ATPCS_SplitPush_SaveList;

if (PushPopSplit == ARMSubtarget::SplitR11AAPCSSignRA)
return CSR_AAPCS_SplitPush_R11_SaveList;

return CSR_AAPCS_SaveList;
}

Expand Down
19 changes: 11 additions & 8 deletions llvm/lib/Target/ARM/ARMCallingConv.td
Original file line number Diff line number Diff line change
Expand Up @@ -301,14 +301,17 @@ def CSR_ATPCS_SplitPush_SwiftError : CalleeSavedRegs<(sub CSR_ATPCS_SplitPush,
def CSR_ATPCS_SplitPush_SwiftTail : CalleeSavedRegs<(sub CSR_ATPCS_SplitPush,
R10)>;

// When enforcing an AAPCS compliant frame chain, R11 is used as the frame
// pointer even for Thumb targets, where split pushes are necessary.
// This AAPCS alternative makes sure the frame index slots match the push
// order in that case.
def CSR_AAPCS_SplitPush : CalleeSavedRegs<(add LR, R11,
R7, R6, R5, R4,
R10, R9, R8,
(sequence "D%u", 15, 8))>;
// Sometimes we need to split the push of the callee-saved GPRs into two
// regions, to ensure that the frame chain record is set up correctly. These
// list the callee-saved registers in the order they end up on the stack, which
// depends on whether the frame pointer is r7 or r11.
def CSR_AAPCS_SplitPush_R11 : CalleeSavedRegs<(add R10, R9, R8, R7, R6, R5, R4,
LR, R11,
(sequence "D%u", 15, 8))>;
def CSR_AAPCS_SplitPush_R7 : CalleeSavedRegs<(add LR, R11,
R7, R6, R5, R4,
R10, R9, R8,
(sequence "D%u", 15, 8))>;

// Constructors and destructors return 'this' in the ARM C++ ABI; since 'this'
// and the pointer return value are both passed in R0 in these cases, this can
Expand Down
145 changes: 97 additions & 48 deletions llvm/lib/Target/ARM/ARMFrameLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -199,6 +199,11 @@ SpillArea getSpillArea(Register Reg,
// push {r0-r10, r12} GPRCS1
// vpush {r8-d15} DPRCS1
// push {r11, lr} GPRCS2
//
// SplitR11AAPCSSignRA:
// push {r0-r10, r12} GPRSC1
// push {r11, lr} GPRCS2
// vpush {r8-d15} DPRCS1

// If FPCXTNS is spilled (for CMSE secure entryfunctions), it is always at
// the top of the stack frame.
Expand Down Expand Up @@ -246,7 +251,8 @@ SpillArea getSpillArea(Register Reg,
return SpillArea::GPRCS1;

case ARM::LR:
if (Variation == ARMSubtarget::SplitR11WindowsSEH)
if (Variation == ARMSubtarget::SplitR11WindowsSEH ||
Variation == ARMSubtarget::SplitR11AAPCSSignRA)
return SpillArea::GPRCS2;
else
return SpillArea::GPRCS1;
Expand Down Expand Up @@ -863,6 +869,9 @@ static int getMaxFPOffset(const ARMSubtarget &STI, const ARMFunctionInfo &AFI,
// This is a conservative estimation: Assume the frame pointer being r7 and
// pc("r15") up to r8 getting spilled before (= 8 registers).
int MaxRegBytes = 8 * 4;
if (PushPopSplit == ARMSubtarget::SplitR11AAPCSSignRA)
// Here, r11 can be stored below all of r4-r15.
MaxRegBytes = 11 * 4;
if (PushPopSplit == ARMSubtarget::SplitR11WindowsSEH) {
// Here, r11 can be stored below all of r4-r15 plus d8-d15.
MaxRegBytes = 11 * 4 + 8 * 8;
Expand Down Expand Up @@ -935,17 +944,23 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
}

// Determine spill area sizes, and some important frame indices.
SpillArea FramePtrSpillArea = SpillArea::GPRCS1;
bool BeforeFPPush = true;
for (const CalleeSavedInfo &I : CSI) {
Register Reg = I.getReg();
int FI = I.getFrameIdx();

if (Reg == FramePtr)
SpillArea Area = getSpillArea(Reg, PushPopSplit,
AFI->getNumAlignedDPRCS2Regs(), RegInfo);

if (Reg == FramePtr) {
FramePtrSpillFI = FI;
FramePtrSpillArea = Area;
}
if (Reg == ARM::D8)
D8SpillFI = FI;

switch (getSpillArea(Reg, PushPopSplit, AFI->getNumAlignedDPRCS2Regs(),
RegInfo)) {
switch (Area) {
case SpillArea::FPCXT:
FPCXTSaveSize += 4;
break;
Expand All @@ -972,21 +987,23 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
// Move past FPCXT area.
if (FPCXTSaveSize > 0) {
LastPush = MBBI++;
DefCFAOffsetCandidates.addInst(LastPush, FPCXTSaveSize, true);
DefCFAOffsetCandidates.addInst(LastPush, FPCXTSaveSize, BeforeFPPush);
}

// Allocate the vararg register save area.
if (ArgRegsSaveSize) {
emitSPUpdate(isARM, MBB, MBBI, dl, TII, -ArgRegsSaveSize,
MachineInstr::FrameSetup);
LastPush = std::prev(MBBI);
DefCFAOffsetCandidates.addInst(LastPush, ArgRegsSaveSize, true);
DefCFAOffsetCandidates.addInst(LastPush, ArgRegsSaveSize, BeforeFPPush);
}

// Move past area 1.
if (GPRCS1Size > 0) {
GPRCS1Push = LastPush = MBBI++;
DefCFAOffsetCandidates.addInst(LastPush, GPRCS1Size, true);
DefCFAOffsetCandidates.addInst(LastPush, GPRCS1Size, BeforeFPPush);
if (FramePtrSpillArea == SpillArea::GPRCS1)
BeforeFPPush = false;
}

// Determine starting offsets of spill areas. These offsets are all positive
Expand All @@ -1010,21 +1027,13 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
} else {
DPRCSOffset = GPRCS2Offset - DPRGapSize - DPRCSSize;
}
int FramePtrOffsetInPush = 0;
if (HasFP) {
// Offset from the CFA to the saved frame pointer, will be negative.
[[maybe_unused]] int FPOffset = MFI.getObjectOffset(FramePtrSpillFI);
LLVM_DEBUG(dbgs() << "FramePtrSpillFI: " << FramePtrSpillFI
<< ", FPOffset: " << FPOffset << "\n");
assert(getMaxFPOffset(STI, *AFI, MF) <= FPOffset &&
"Max FP estimation is wrong");
// Offset from the top of the GPRCS1 area to the saved frame pointer, will
// be negative.
FramePtrOffsetInPush = FPOffset + ArgRegsSaveSize + FPCXTSaveSize;
LLVM_DEBUG(dbgs() << "FramePtrOffsetInPush=" << FramePtrOffsetInPush
<< ", FramePtrSpillOffset="
<< (MFI.getObjectOffset(FramePtrSpillFI) + NumBytes)
<< "\n");
AFI->setFramePtrSpillOffset(MFI.getObjectOffset(FramePtrSpillFI) +
NumBytes);
}
Expand All @@ -1036,7 +1045,9 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
// after DPRCS1.
if (GPRCS2Size > 0 && PushPopSplit != ARMSubtarget::SplitR11WindowsSEH) {
GPRCS2Push = LastPush = MBBI++;
DefCFAOffsetCandidates.addInst(LastPush, GPRCS2Size);
DefCFAOffsetCandidates.addInst(LastPush, GPRCS2Size, BeforeFPPush);
if (FramePtrSpillArea == SpillArea::GPRCS2)
BeforeFPPush = false;
}

// Prolog/epilog inserter assumes we correctly align DPRs on the stack, so our
Expand All @@ -1049,7 +1060,7 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
else {
emitSPUpdate(isARM, MBB, MBBI, dl, TII, -DPRGapSize,
MachineInstr::FrameSetup);
DefCFAOffsetCandidates.addInst(std::prev(MBBI), DPRGapSize);
DefCFAOffsetCandidates.addInst(std::prev(MBBI), DPRGapSize, BeforeFPPush);
}
}

Expand All @@ -1058,7 +1069,8 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
// Since vpush register list cannot have gaps, there may be multiple vpush
// instructions in the prologue.
while (MBBI != MBB.end() && MBBI->getOpcode() == ARM::VSTMDDB_UPD) {
DefCFAOffsetCandidates.addInst(MBBI, sizeOfSPAdjustment(*MBBI));
DefCFAOffsetCandidates.addInst(MBBI, sizeOfSPAdjustment(*MBBI),
BeforeFPPush);
LastPush = MBBI++;
}
}
Expand All @@ -1077,7 +1089,9 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
// Move GPRCS2, if using using SplitR11WindowsSEH.
if (GPRCS2Size > 0 && PushPopSplit == ARMSubtarget::SplitR11WindowsSEH) {
GPRCS2Push = LastPush = MBBI++;
DefCFAOffsetCandidates.addInst(LastPush, GPRCS2Size);
DefCFAOffsetCandidates.addInst(LastPush, GPRCS2Size, BeforeFPPush);
if (FramePtrSpillArea == SpillArea::GPRCS2)
BeforeFPPush = false;
}

bool NeedsWinCFIStackAlloc = NeedsWinCFI;
Expand Down Expand Up @@ -1178,28 +1192,51 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
// into spill area 1, including the FP in R11. In either case, it
// is in area one and the adjustment needs to take place just after
// that push.
// FIXME: The above is not necessary true when PACBTI is enabled.
// AAPCS requires use of R11, and PACBTI gets in the way of regular pushes,
// so FP ends up on area two.
MachineBasicBlock::iterator AfterPush;
if (HasFP) {
AfterPush = std::next(GPRCS1Push);
unsigned PushSize = sizeOfSPAdjustment(*GPRCS1Push);
int FPOffset = PushSize + FramePtrOffsetInPush;
if (PushPopSplit == ARMSubtarget::SplitR11WindowsSEH) {
AfterPush = std::next(GPRCS2Push);
emitRegPlusImmediate(!AFI->isThumbFunction(), MBB, AfterPush, dl, TII,
FramePtr, ARM::SP, 0, MachineInstr::FrameSetup);
} else {
emitRegPlusImmediate(!AFI->isThumbFunction(), MBB, AfterPush, dl, TII,
FramePtr, ARM::SP, FPOffset,
MachineInstr::FrameSetup);
MachineBasicBlock::iterator FPPushInst;
// Offset from SP immediately after the push which saved the FP to the FP
// save slot.
int64_t FPOffsetAfterPush;
switch (FramePtrSpillArea) {
case SpillArea::GPRCS1:
FPPushInst = GPRCS1Push;
FPOffsetAfterPush = MFI.getObjectOffset(FramePtrSpillFI) +
ArgRegsSaveSize + FPCXTSaveSize +
sizeOfSPAdjustment(*FPPushInst);
LLVM_DEBUG(dbgs() << "Frame pointer in GPRCS1, offset "
<< FPOffsetAfterPush << " after that push\n");
break;
case SpillArea::GPRCS2:
FPPushInst = GPRCS2Push;
FPOffsetAfterPush = MFI.getObjectOffset(FramePtrSpillFI) +
ArgRegsSaveSize + FPCXTSaveSize + GPRCS1Size +
sizeOfSPAdjustment(*FPPushInst);
if (PushPopSplit == ARMSubtarget::SplitR11WindowsSEH)
FPOffsetAfterPush += DPRCSSize + DPRGapSize;
LLVM_DEBUG(dbgs() << "Frame pointer in GPRCS2, offset "
<< FPOffsetAfterPush << " after that push\n");
break;
default:
llvm_unreachable("frame pointer in unknown spill area");
break;
}
AfterPush = std::next(FPPushInst);
if (PushPopSplit == ARMSubtarget::SplitR11WindowsSEH)
assert(FPOffsetAfterPush == 0);

// Emit the MOV or ADD to set up the frame pointer register.
emitRegPlusImmediate(!AFI->isThumbFunction(), MBB, AfterPush, dl, TII,
FramePtr, ARM::SP, FPOffsetAfterPush,
MachineInstr::FrameSetup);

if (!NeedsWinCFI) {
if (FramePtrOffsetInPush + PushSize != 0) {
// Emit DWARF info to find the CFA using the frame pointer from this
// point onward.
if (FPOffsetAfterPush != 0) {
unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::cfiDefCfa(
nullptr, MRI->getDwarfRegNum(FramePtr, true),
FPCXTSaveSize + ArgRegsSaveSize - FramePtrOffsetInPush));
-MFI.getObjectOffset(FramePtrSpillFI)));
BuildMI(MBB, AfterPush, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
.addCFIIndex(CFIIndex)
.setMIFlags(MachineInstr::FrameSetup);
Expand Down Expand Up @@ -1712,7 +1749,8 @@ void ARMFrameLowering::emitPopInst(MachineBasicBlock &MBB,
if (Reg == ARM::LR && !isTailCall && !isVarArg && !isInterrupt &&
!isCmseEntry && !isTrap && AFI->getArgumentStackToRestore() == 0 &&
STI.hasV5TOps() && MBB.succ_empty() && !hasPAC &&
PushPopSplit != ARMSubtarget::SplitR11WindowsSEH) {
(PushPopSplit != ARMSubtarget::SplitR11WindowsSEH &&
PushPopSplit != ARMSubtarget::SplitR11AAPCSSignRA)) {
Reg = ARM::PC;
// Fold the return instruction into the LDM.
DeleteRet = true;
Expand Down Expand Up @@ -2945,18 +2983,29 @@ bool ARMFrameLowering::assignCalleeSavedSpillSlots(
const auto &AFI = *MF.getInfo<ARMFunctionInfo>();
if (AFI.shouldSignReturnAddress()) {
// The order of register must match the order we push them, because the
// PEI assigns frame indices in that order. When compiling for return
// address sign and authenication, we use split push, therefore the orders
// we want are:
// LR, R7, R6, R5, R4, <R12>, R11, R10, R9, R8, D15-D8
CSI.insert(find_if(CSI,
[=](const auto &CS) {
Register Reg = CS.getReg();
return Reg == ARM::R10 || Reg == ARM::R11 ||
Reg == ARM::R8 || Reg == ARM::R9 ||
ARM::DPRRegClass.contains(Reg);
}),
CalleeSavedInfo(ARM::R12));
// PEI assigns frame indices in that order. That order depends on the
// PushPopSplitVariation, there are only two cases which we use with return
// address signing:
switch (STI.getPushPopSplitVariation(MF)) {
case ARMSubtarget::SplitR7:
// LR, R7, R6, R5, R4, <R12>, R11, R10, R9, R8, D15-D8
CSI.insert(find_if(CSI,
[=](const auto &CS) {
Register Reg = CS.getReg();
return Reg == ARM::R10 || Reg == ARM::R11 ||
Reg == ARM::R8 || Reg == ARM::R9 ||
ARM::DPRRegClass.contains(Reg);
}),
CalleeSavedInfo(ARM::R12));
break;
case ARMSubtarget::SplitR11AAPCSSignRA:
// With SplitR11AAPCSSignRA, R12 will always be the highest-addressed CSR
// on the stack.
CSI.insert(CSI.begin(), CalleeSavedInfo(ARM::R12));
break;
default:
llvm_unreachable("Unexpected CSR split with return address signing");
}
}

return false;
Expand Down
7 changes: 7 additions & 0 deletions llvm/lib/Target/ARM/ARMSubtarget.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -514,5 +514,12 @@ ARMSubtarget::getPushPopSplitVariation(const MachineFunction &MF) const {
F.needsUnwindTableEntry() &&
(MFI.hasVarSizedObjects() || getRegisterInfo()->hasStackRealignment(MF)))
return SplitR11WindowsSEH;

// Returns R11SplitAAPCSBranchSigning if R11 and lr are not adjacent to each
// other in the list of callee saved registers in a frame, and branch
// signing is enabled.
if (MF.getInfo<ARMFunctionInfo>()->shouldSignReturnAddress() &&
getFramePointerReg() == ARM::R11)
return SplitR11AAPCSSignRA;
return NoSplit;
}
12 changes: 12 additions & 0 deletions llvm/lib/Target/ARM/ARMSubtarget.h
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,18 @@ class ARMSubtarget : public ARMGenSubtargetInfo {
/// vpush {d8-d15}
/// push {r11, lr}
SplitR11WindowsSEH,

/// When generating AAPCS-compilant frame chains, R11 is the frame pointer,
/// and must be pushed adjacent to the return address (LR). Normally this
/// isn't a problem, because the only register between them is r12, which is
/// the intra-procedure-call scratch register, so doesn't need to be saved.
/// However, when PACBTI is in use, r12 contains the authentication code, so
/// does need to be saved. This means that we need a separate push for R11
/// and LR.
/// push {r0-r10, r12}
/// push {r11, lr}
/// vpush {d8-d15}
SplitR11AAPCSSignRA,
};

protected:
Expand Down
Loading

0 comments on commit 493529f

Please sign in to comment.