diff --git a/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp index 3f28ce8ca4b559..aad305cce03961 100644 --- a/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp +++ b/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp @@ -116,9 +116,12 @@ ARMBaseRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { return CSR_iOS_SaveList; if (PushPopSplit == ARMSubtarget::SplitR7) - return STI.createAAPCSFrameChain() ? CSR_AAPCS_SplitPush_SaveList + return STI.createAAPCSFrameChain() ? CSR_AAPCS_SplitPush_R7_SaveList : CSR_ATPCS_SplitPush_SaveList; + if (PushPopSplit == ARMSubtarget::SplitR11AAPCSSignRA) + return CSR_AAPCS_SplitPush_R11_SaveList; + return CSR_AAPCS_SaveList; } diff --git a/llvm/lib/Target/ARM/ARMCallingConv.td b/llvm/lib/Target/ARM/ARMCallingConv.td index d14424c2decac3..27f175a7003366 100644 --- a/llvm/lib/Target/ARM/ARMCallingConv.td +++ b/llvm/lib/Target/ARM/ARMCallingConv.td @@ -301,14 +301,17 @@ def CSR_ATPCS_SplitPush_SwiftError : CalleeSavedRegs<(sub CSR_ATPCS_SplitPush, def CSR_ATPCS_SplitPush_SwiftTail : CalleeSavedRegs<(sub CSR_ATPCS_SplitPush, R10)>; -// When enforcing an AAPCS compliant frame chain, R11 is used as the frame -// pointer even for Thumb targets, where split pushes are necessary. -// This AAPCS alternative makes sure the frame index slots match the push -// order in that case. -def CSR_AAPCS_SplitPush : CalleeSavedRegs<(add LR, R11, - R7, R6, R5, R4, - R10, R9, R8, - (sequence "D%u", 15, 8))>; +// Sometimes we need to split the push of the callee-saved GPRs into two +// regions, to ensure that the frame chain record is set up correctly. These +// list the callee-saved registers in the order they end up on the stack, which +// depends on whether the frame pointer is r7 or r11. +def CSR_AAPCS_SplitPush_R11 : CalleeSavedRegs<(add R10, R9, R8, R7, R6, R5, R4, + LR, R11, + (sequence "D%u", 15, 8))>; +def CSR_AAPCS_SplitPush_R7 : CalleeSavedRegs<(add LR, R11, + R7, R6, R5, R4, + R10, R9, R8, + (sequence "D%u", 15, 8))>; // Constructors and destructors return 'this' in the ARM C++ ABI; since 'this' // and the pointer return value are both passed in R0 in these cases, this can diff --git a/llvm/lib/Target/ARM/ARMFrameLowering.cpp b/llvm/lib/Target/ARM/ARMFrameLowering.cpp index e0703457aa8139..4f366dcffcd29a 100644 --- a/llvm/lib/Target/ARM/ARMFrameLowering.cpp +++ b/llvm/lib/Target/ARM/ARMFrameLowering.cpp @@ -199,6 +199,11 @@ SpillArea getSpillArea(Register Reg, // push {r0-r10, r12} GPRCS1 // vpush {r8-d15} DPRCS1 // push {r11, lr} GPRCS2 + // + // SplitR11AAPCSSignRA: + // push {r0-r10, r12} GPRSC1 + // push {r11, lr} GPRCS2 + // vpush {r8-d15} DPRCS1 // If FPCXTNS is spilled (for CMSE secure entryfunctions), it is always at // the top of the stack frame. @@ -246,7 +251,8 @@ SpillArea getSpillArea(Register Reg, return SpillArea::GPRCS1; case ARM::LR: - if (Variation == ARMSubtarget::SplitR11WindowsSEH) + if (Variation == ARMSubtarget::SplitR11WindowsSEH || + Variation == ARMSubtarget::SplitR11AAPCSSignRA) return SpillArea::GPRCS2; else return SpillArea::GPRCS1; @@ -863,6 +869,9 @@ static int getMaxFPOffset(const ARMSubtarget &STI, const ARMFunctionInfo &AFI, // This is a conservative estimation: Assume the frame pointer being r7 and // pc("r15") up to r8 getting spilled before (= 8 registers). int MaxRegBytes = 8 * 4; + if (PushPopSplit == ARMSubtarget::SplitR11AAPCSSignRA) + // Here, r11 can be stored below all of r4-r15. + MaxRegBytes = 11 * 4; if (PushPopSplit == ARMSubtarget::SplitR11WindowsSEH) { // Here, r11 can be stored below all of r4-r15 plus d8-d15. MaxRegBytes = 11 * 4 + 8 * 8; @@ -935,17 +944,23 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF, } // Determine spill area sizes, and some important frame indices. + SpillArea FramePtrSpillArea = SpillArea::GPRCS1; + bool BeforeFPPush = true; for (const CalleeSavedInfo &I : CSI) { Register Reg = I.getReg(); int FI = I.getFrameIdx(); - if (Reg == FramePtr) + SpillArea Area = getSpillArea(Reg, PushPopSplit, + AFI->getNumAlignedDPRCS2Regs(), RegInfo); + + if (Reg == FramePtr) { FramePtrSpillFI = FI; + FramePtrSpillArea = Area; + } if (Reg == ARM::D8) D8SpillFI = FI; - switch (getSpillArea(Reg, PushPopSplit, AFI->getNumAlignedDPRCS2Regs(), - RegInfo)) { + switch (Area) { case SpillArea::FPCXT: FPCXTSaveSize += 4; break; @@ -972,7 +987,7 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF, // Move past FPCXT area. if (FPCXTSaveSize > 0) { LastPush = MBBI++; - DefCFAOffsetCandidates.addInst(LastPush, FPCXTSaveSize, true); + DefCFAOffsetCandidates.addInst(LastPush, FPCXTSaveSize, BeforeFPPush); } // Allocate the vararg register save area. @@ -980,13 +995,15 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF, emitSPUpdate(isARM, MBB, MBBI, dl, TII, -ArgRegsSaveSize, MachineInstr::FrameSetup); LastPush = std::prev(MBBI); - DefCFAOffsetCandidates.addInst(LastPush, ArgRegsSaveSize, true); + DefCFAOffsetCandidates.addInst(LastPush, ArgRegsSaveSize, BeforeFPPush); } // Move past area 1. if (GPRCS1Size > 0) { GPRCS1Push = LastPush = MBBI++; - DefCFAOffsetCandidates.addInst(LastPush, GPRCS1Size, true); + DefCFAOffsetCandidates.addInst(LastPush, GPRCS1Size, BeforeFPPush); + if (FramePtrSpillArea == SpillArea::GPRCS1) + BeforeFPPush = false; } // Determine starting offsets of spill areas. These offsets are all positive @@ -1010,7 +1027,6 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF, } else { DPRCSOffset = GPRCS2Offset - DPRGapSize - DPRCSSize; } - int FramePtrOffsetInPush = 0; if (HasFP) { // Offset from the CFA to the saved frame pointer, will be negative. [[maybe_unused]] int FPOffset = MFI.getObjectOffset(FramePtrSpillFI); @@ -1018,13 +1034,6 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF, << ", FPOffset: " << FPOffset << "\n"); assert(getMaxFPOffset(STI, *AFI, MF) <= FPOffset && "Max FP estimation is wrong"); - // Offset from the top of the GPRCS1 area to the saved frame pointer, will - // be negative. - FramePtrOffsetInPush = FPOffset + ArgRegsSaveSize + FPCXTSaveSize; - LLVM_DEBUG(dbgs() << "FramePtrOffsetInPush=" << FramePtrOffsetInPush - << ", FramePtrSpillOffset=" - << (MFI.getObjectOffset(FramePtrSpillFI) + NumBytes) - << "\n"); AFI->setFramePtrSpillOffset(MFI.getObjectOffset(FramePtrSpillFI) + NumBytes); } @@ -1036,7 +1045,9 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF, // after DPRCS1. if (GPRCS2Size > 0 && PushPopSplit != ARMSubtarget::SplitR11WindowsSEH) { GPRCS2Push = LastPush = MBBI++; - DefCFAOffsetCandidates.addInst(LastPush, GPRCS2Size); + DefCFAOffsetCandidates.addInst(LastPush, GPRCS2Size, BeforeFPPush); + if (FramePtrSpillArea == SpillArea::GPRCS2) + BeforeFPPush = false; } // Prolog/epilog inserter assumes we correctly align DPRs on the stack, so our @@ -1049,7 +1060,7 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF, else { emitSPUpdate(isARM, MBB, MBBI, dl, TII, -DPRGapSize, MachineInstr::FrameSetup); - DefCFAOffsetCandidates.addInst(std::prev(MBBI), DPRGapSize); + DefCFAOffsetCandidates.addInst(std::prev(MBBI), DPRGapSize, BeforeFPPush); } } @@ -1058,7 +1069,8 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF, // Since vpush register list cannot have gaps, there may be multiple vpush // instructions in the prologue. while (MBBI != MBB.end() && MBBI->getOpcode() == ARM::VSTMDDB_UPD) { - DefCFAOffsetCandidates.addInst(MBBI, sizeOfSPAdjustment(*MBBI)); + DefCFAOffsetCandidates.addInst(MBBI, sizeOfSPAdjustment(*MBBI), + BeforeFPPush); LastPush = MBBI++; } } @@ -1077,7 +1089,9 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF, // Move GPRCS2, if using using SplitR11WindowsSEH. if (GPRCS2Size > 0 && PushPopSplit == ARMSubtarget::SplitR11WindowsSEH) { GPRCS2Push = LastPush = MBBI++; - DefCFAOffsetCandidates.addInst(LastPush, GPRCS2Size); + DefCFAOffsetCandidates.addInst(LastPush, GPRCS2Size, BeforeFPPush); + if (FramePtrSpillArea == SpillArea::GPRCS2) + BeforeFPPush = false; } bool NeedsWinCFIStackAlloc = NeedsWinCFI; @@ -1178,28 +1192,51 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF, // into spill area 1, including the FP in R11. In either case, it // is in area one and the adjustment needs to take place just after // that push. - // FIXME: The above is not necessary true when PACBTI is enabled. - // AAPCS requires use of R11, and PACBTI gets in the way of regular pushes, - // so FP ends up on area two. MachineBasicBlock::iterator AfterPush; if (HasFP) { - AfterPush = std::next(GPRCS1Push); - unsigned PushSize = sizeOfSPAdjustment(*GPRCS1Push); - int FPOffset = PushSize + FramePtrOffsetInPush; - if (PushPopSplit == ARMSubtarget::SplitR11WindowsSEH) { - AfterPush = std::next(GPRCS2Push); - emitRegPlusImmediate(!AFI->isThumbFunction(), MBB, AfterPush, dl, TII, - FramePtr, ARM::SP, 0, MachineInstr::FrameSetup); - } else { - emitRegPlusImmediate(!AFI->isThumbFunction(), MBB, AfterPush, dl, TII, - FramePtr, ARM::SP, FPOffset, - MachineInstr::FrameSetup); + MachineBasicBlock::iterator FPPushInst; + // Offset from SP immediately after the push which saved the FP to the FP + // save slot. + int64_t FPOffsetAfterPush; + switch (FramePtrSpillArea) { + case SpillArea::GPRCS1: + FPPushInst = GPRCS1Push; + FPOffsetAfterPush = MFI.getObjectOffset(FramePtrSpillFI) + + ArgRegsSaveSize + FPCXTSaveSize + + sizeOfSPAdjustment(*FPPushInst); + LLVM_DEBUG(dbgs() << "Frame pointer in GPRCS1, offset " + << FPOffsetAfterPush << " after that push\n"); + break; + case SpillArea::GPRCS2: + FPPushInst = GPRCS2Push; + FPOffsetAfterPush = MFI.getObjectOffset(FramePtrSpillFI) + + ArgRegsSaveSize + FPCXTSaveSize + GPRCS1Size + + sizeOfSPAdjustment(*FPPushInst); + if (PushPopSplit == ARMSubtarget::SplitR11WindowsSEH) + FPOffsetAfterPush += DPRCSSize + DPRGapSize; + LLVM_DEBUG(dbgs() << "Frame pointer in GPRCS2, offset " + << FPOffsetAfterPush << " after that push\n"); + break; + default: + llvm_unreachable("frame pointer in unknown spill area"); + break; } + AfterPush = std::next(FPPushInst); + if (PushPopSplit == ARMSubtarget::SplitR11WindowsSEH) + assert(FPOffsetAfterPush == 0); + + // Emit the MOV or ADD to set up the frame pointer register. + emitRegPlusImmediate(!AFI->isThumbFunction(), MBB, AfterPush, dl, TII, + FramePtr, ARM::SP, FPOffsetAfterPush, + MachineInstr::FrameSetup); + if (!NeedsWinCFI) { - if (FramePtrOffsetInPush + PushSize != 0) { + // Emit DWARF info to find the CFA using the frame pointer from this + // point onward. + if (FPOffsetAfterPush != 0) { unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::cfiDefCfa( nullptr, MRI->getDwarfRegNum(FramePtr, true), - FPCXTSaveSize + ArgRegsSaveSize - FramePtrOffsetInPush)); + -MFI.getObjectOffset(FramePtrSpillFI))); BuildMI(MBB, AfterPush, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) .addCFIIndex(CFIIndex) .setMIFlags(MachineInstr::FrameSetup); @@ -1712,7 +1749,8 @@ void ARMFrameLowering::emitPopInst(MachineBasicBlock &MBB, if (Reg == ARM::LR && !isTailCall && !isVarArg && !isInterrupt && !isCmseEntry && !isTrap && AFI->getArgumentStackToRestore() == 0 && STI.hasV5TOps() && MBB.succ_empty() && !hasPAC && - PushPopSplit != ARMSubtarget::SplitR11WindowsSEH) { + (PushPopSplit != ARMSubtarget::SplitR11WindowsSEH && + PushPopSplit != ARMSubtarget::SplitR11AAPCSSignRA)) { Reg = ARM::PC; // Fold the return instruction into the LDM. DeleteRet = true; @@ -2945,18 +2983,29 @@ bool ARMFrameLowering::assignCalleeSavedSpillSlots( const auto &AFI = *MF.getInfo(); if (AFI.shouldSignReturnAddress()) { // The order of register must match the order we push them, because the - // PEI assigns frame indices in that order. When compiling for return - // address sign and authenication, we use split push, therefore the orders - // we want are: - // LR, R7, R6, R5, R4, , R11, R10, R9, R8, D15-D8 - CSI.insert(find_if(CSI, - [=](const auto &CS) { - Register Reg = CS.getReg(); - return Reg == ARM::R10 || Reg == ARM::R11 || - Reg == ARM::R8 || Reg == ARM::R9 || - ARM::DPRRegClass.contains(Reg); - }), - CalleeSavedInfo(ARM::R12)); + // PEI assigns frame indices in that order. That order depends on the + // PushPopSplitVariation, there are only two cases which we use with return + // address signing: + switch (STI.getPushPopSplitVariation(MF)) { + case ARMSubtarget::SplitR7: + // LR, R7, R6, R5, R4, , R11, R10, R9, R8, D15-D8 + CSI.insert(find_if(CSI, + [=](const auto &CS) { + Register Reg = CS.getReg(); + return Reg == ARM::R10 || Reg == ARM::R11 || + Reg == ARM::R8 || Reg == ARM::R9 || + ARM::DPRRegClass.contains(Reg); + }), + CalleeSavedInfo(ARM::R12)); + break; + case ARMSubtarget::SplitR11AAPCSSignRA: + // With SplitR11AAPCSSignRA, R12 will always be the highest-addressed CSR + // on the stack. + CSI.insert(CSI.begin(), CalleeSavedInfo(ARM::R12)); + break; + default: + llvm_unreachable("Unexpected CSR split with return address signing"); + } } return false; diff --git a/llvm/lib/Target/ARM/ARMSubtarget.cpp b/llvm/lib/Target/ARM/ARMSubtarget.cpp index c4a782bc40910a..9adfb1fab5f084 100644 --- a/llvm/lib/Target/ARM/ARMSubtarget.cpp +++ b/llvm/lib/Target/ARM/ARMSubtarget.cpp @@ -514,5 +514,12 @@ ARMSubtarget::getPushPopSplitVariation(const MachineFunction &MF) const { F.needsUnwindTableEntry() && (MFI.hasVarSizedObjects() || getRegisterInfo()->hasStackRealignment(MF))) return SplitR11WindowsSEH; + + // Returns R11SplitAAPCSBranchSigning if R11 and lr are not adjacent to each + // other in the list of callee saved registers in a frame, and branch + // signing is enabled. + if (MF.getInfo()->shouldSignReturnAddress() && + getFramePointerReg() == ARM::R11) + return SplitR11AAPCSSignRA; return NoSplit; } diff --git a/llvm/lib/Target/ARM/ARMSubtarget.h b/llvm/lib/Target/ARM/ARMSubtarget.h index 7917ddc17bdb81..214c5f1b45e556 100644 --- a/llvm/lib/Target/ARM/ARMSubtarget.h +++ b/llvm/lib/Target/ARM/ARMSubtarget.h @@ -105,6 +105,18 @@ class ARMSubtarget : public ARMGenSubtargetInfo { /// vpush {d8-d15} /// push {r11, lr} SplitR11WindowsSEH, + + /// When generating AAPCS-compilant frame chains, R11 is the frame pointer, + /// and must be pushed adjacent to the return address (LR). Normally this + /// isn't a problem, because the only register between them is r12, which is + /// the intra-procedure-call scratch register, so doesn't need to be saved. + /// However, when PACBTI is in use, r12 contains the authentication code, so + /// does need to be saved. This means that we need a separate push for R11 + /// and LR. + /// push {r0-r10, r12} + /// push {r11, lr} + /// vpush {d8-d15} + SplitR11AAPCSSignRA, }; protected: diff --git a/llvm/test/CodeGen/Thumb2/pacbti-m-frame-chain.ll b/llvm/test/CodeGen/Thumb2/pacbti-m-frame-chain.ll new file mode 100644 index 00000000000000..8bcf87130c5400 --- /dev/null +++ b/llvm/test/CodeGen/Thumb2/pacbti-m-frame-chain.ll @@ -0,0 +1,150 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -mtriple=thumbv8.1m.main-none-eabi < %s --force-dwarf-frame-section -frame-pointer=all -mattr=+aapcs-frame-chain | FileCheck %s + +; int test1() { +; return 0; +; } +define i32 @test1() "sign-return-address"="non-leaf" { +; CHECK-LABEL: test1: +; CHECK: .cfi_sections .debug_frame +; CHECK-NEXT: .cfi_startproc +; CHECK-NEXT: @ %bb.0: @ %entry +; CHECK-NEXT: pac r12, lr, sp +; CHECK-NEXT: .save {ra_auth_code} +; CHECK-NEXT: str r12, [sp, #-4]! +; CHECK-NEXT: .cfi_def_cfa_offset 4 +; CHECK-NEXT: .cfi_offset ra_auth_code, -4 +; CHECK-NEXT: .save {r11, lr} +; CHECK-NEXT: push.w {r11, lr} +; CHECK-NEXT: .cfi_def_cfa_offset 12 +; CHECK-NEXT: .cfi_offset lr, -8 +; CHECK-NEXT: .cfi_offset r11, -12 +; CHECK-NEXT: .setfp r11, sp +; CHECK-NEXT: mov r11, sp +; CHECK-NEXT: .cfi_def_cfa_register r11 +; CHECK-NEXT: movs r0, #0 +; CHECK-NEXT: pop.w {r11, lr} +; CHECK-NEXT: ldr r12, [sp], #4 +; CHECK-NEXT: aut r12, lr, sp +; CHECK-NEXT: bx lr +entry: + ret i32 0 +} + +; void foo(int n) { +; int a[n]; +; bar(a); +; } +define dso_local void @test2(i32 noundef %n) "sign-return-address"="non-leaf" { +; CHECK-LABEL: test2: +; CHECK: .cfi_startproc +; CHECK-NEXT: @ %bb.0: @ %entry +; CHECK-NEXT: pac r12, lr, sp +; CHECK-NEXT: .save {r4, r7, ra_auth_code} +; CHECK-NEXT: push.w {r4, r7, r12} +; CHECK-NEXT: .cfi_def_cfa_offset 12 +; CHECK-NEXT: .cfi_offset ra_auth_code, -4 +; CHECK-NEXT: .cfi_offset r7, -8 +; CHECK-NEXT: .cfi_offset r4, -12 +; CHECK-NEXT: .save {r11, lr} +; CHECK-NEXT: push.w {r11, lr} +; CHECK-NEXT: .cfi_def_cfa_offset 20 +; CHECK-NEXT: .cfi_offset lr, -16 +; CHECK-NEXT: .cfi_offset r11, -20 +; CHECK-NEXT: .setfp r11, sp +; CHECK-NEXT: mov r11, sp +; CHECK-NEXT: .cfi_def_cfa_register r11 +; CHECK-NEXT: .pad #4 +; CHECK-NEXT: sub sp, #4 +; CHECK-NEXT: movs r1, #7 +; CHECK-NEXT: add.w r0, r1, r0, lsl #2 +; CHECK-NEXT: bic r0, r0, #7 +; CHECK-NEXT: sub.w r0, sp, r0 +; CHECK-NEXT: mov sp, r0 +; CHECK-NEXT: bl take_ptr +; CHECK-NEXT: mov sp, r11 +; CHECK-NEXT: pop.w {r11, lr} +; CHECK-NEXT: pop.w {r4, r7, r12} +; CHECK-NEXT: aut r12, lr, sp +; CHECK-NEXT: bx lr +entry: + %vla = alloca i32, i32 %n, align 4 + call void @take_ptr(ptr noundef nonnull %vla) + ret void +} + +; void test3(int c, float e, int z) { +; if (c) +; knr(); +; take_ptr(alloca(z)); +; if (e) +; knr(); +; } +define void @test3(i32 noundef %c, float noundef %e, i32 noundef %z) "sign-return-address"="non-leaf" { +; CHECK-LABEL: test3: +; CHECK: .cfi_startproc +; CHECK-NEXT: @ %bb.0: @ %entry +; CHECK-NEXT: pac r12, lr, sp +; CHECK-NEXT: .save {r4, r5, r6, r7, ra_auth_code} +; CHECK-NEXT: push.w {r4, r5, r6, r7, r12} +; CHECK-NEXT: .cfi_def_cfa_offset 20 +; CHECK-NEXT: .cfi_offset ra_auth_code, -4 +; CHECK-NEXT: .cfi_offset r7, -8 +; CHECK-NEXT: .cfi_offset r6, -12 +; CHECK-NEXT: .cfi_offset r5, -16 +; CHECK-NEXT: .cfi_offset r4, -20 +; CHECK-NEXT: .save {r11, lr} +; CHECK-NEXT: push.w {r11, lr} +; CHECK-NEXT: .cfi_def_cfa_offset 28 +; CHECK-NEXT: .cfi_offset lr, -24 +; CHECK-NEXT: .cfi_offset r11, -28 +; CHECK-NEXT: .setfp r11, sp +; CHECK-NEXT: mov r11, sp +; CHECK-NEXT: .cfi_def_cfa_register r11 +; CHECK-NEXT: .pad #4 +; CHECK-NEXT: sub sp, #4 +; CHECK-NEXT: cmp r0, #0 +; CHECK-NEXT: mov r5, r2 +; CHECK-NEXT: mov r4, r1 +; CHECK-NEXT: it ne +; CHECK-NEXT: blne knr +; CHECK-NEXT: adds r0, r5, #7 +; CHECK-NEXT: bic r0, r0, #7 +; CHECK-NEXT: sub.w r0, sp, r0 +; CHECK-NEXT: mov sp, r0 +; CHECK-NEXT: bl take_ptr +; CHECK-NEXT: mov r0, r4 +; CHECK-NEXT: movs r1, #0 +; CHECK-NEXT: bl __aeabi_fcmpeq +; CHECK-NEXT: cmp r0, #0 +; CHECK-NEXT: it eq +; CHECK-NEXT: bleq knr +; CHECK-NEXT: mov sp, r11 +; CHECK-NEXT: pop.w {r11, lr} +; CHECK-NEXT: pop.w {r4, r5, r6, r7, r12} +; CHECK-NEXT: aut r12, lr, sp +; CHECK-NEXT: bx lr +entry: + %tobool.not = icmp eq i32 %c, 0 + br i1 %tobool.not, label %if.end, label %if.then + +if.then: ; preds = %entry + tail call void @knr() + br label %if.end + +if.end: ; preds = %if.then, %entry + %0 = alloca i8, i32 %z, align 8 + call void @take_ptr(ptr noundef nonnull %0) + %tobool1 = fcmp une float %e, 0.000000e+00 + br i1 %tobool1, label %if.then2, label %if.end3 + +if.then2: ; preds = %if.end + call void @knr() + br label %if.end3 + +if.end3: ; preds = %if.then2, %if.end + ret void +} + +declare void @knr(...) +declare void @take_ptr(ptr noundef)