Skip to content

Commit 9031940

Browse files
[NativeAOT/ARM64] Generate frames compatible with Apple compact unwinding (#111451)
* [NativeAOT/ARM64] Generate frames compatible with Apple compact unwinding (#107766) * JIT/ARM64: Add ability to generate frames compatible with Apple compact unwinding format. For NativeAOT/ARM64/Apple API do the following: - Save callee registers in opposite order and in pairs. - Prefer saving FP/LR on the top of the frame. Heuristics are used to avoid worse code quality outside of prolog/epilog due to addressing range limits of the ARM64 instruction set. - Added optimization to lvaFrameAddress to rewrite FP-x references to SP+y when possible. This allows efficient addressing using positive indexes when FP points to the top of the frame. It mimics similar optimization on ARM32. * ObjWriter: For Mach-O ARM64 try to convert the DWARF CFI unwinding codes into compact unwinding code * Disable lvaFrameAddress FP->SP optimization for OSR methods * Fix variable offsets used in emitGCvarLiveUpd by suppressing the FP-n => SP+m (for m = frameSize - n) optimization. --------- Co-authored-by: Kunal Pathak <[email protected]>
1 parent 7d75878 commit 9031940

File tree

9 files changed

+337
-93
lines changed

9 files changed

+337
-93
lines changed

src/coreclr/jit/codegen.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -659,6 +659,7 @@ class CodeGen final : public CodeGenInterface
659659
virtual bool IsSaveFpLrWithAllCalleeSavedRegisters() const;
660660
bool genSaveFpLrWithAllCalleeSavedRegisters;
661661
bool genForceFuncletFrameType5;
662+
bool genReverseAndPairCalleeSavedRegisters;
662663
#endif // TARGET_ARM64
663664

664665
//-------------------------------------------------------------------------

src/coreclr/jit/codegenarm64.cpp

Lines changed: 38 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -845,12 +845,19 @@ void CodeGen::genSaveCalleeSavedRegisterGroup(regMaskTP regsMask, int spDelta, i
845845

846846
for (int i = 0; i < regStack.Height(); ++i)
847847
{
848-
RegPair regPair = regStack.Bottom(i);
848+
RegPair regPair = genReverseAndPairCalleeSavedRegisters ? regStack.Top(i) : regStack.Bottom(i);
849849
if (regPair.reg2 != REG_NA)
850850
{
851851
// We can use a STP instruction.
852-
genPrologSaveRegPair(regPair.reg1, regPair.reg2, spOffset, spDelta, regPair.useSaveNextPair, REG_IP0,
853-
nullptr);
852+
if (genReverseAndPairCalleeSavedRegisters)
853+
{
854+
genPrologSaveRegPair(regPair.reg2, regPair.reg1, spOffset, spDelta, false, REG_IP0, nullptr);
855+
}
856+
else
857+
{
858+
genPrologSaveRegPair(regPair.reg1, regPair.reg2, spOffset, spDelta, regPair.useSaveNextPair, REG_IP0,
859+
nullptr);
860+
}
854861

855862
spOffset += 2 * slotSize;
856863
}
@@ -926,8 +933,9 @@ void CodeGen::genSaveCalleeSavedRegistersHelp(regMaskTP regsToSaveMask, int lowe
926933

927934
// Save integer registers at higher addresses than floating-point registers.
928935

936+
regMaskTP maskSaveRegsFrame = regsToSaveMask & (RBM_FP | RBM_LR);
929937
regMaskTP maskSaveRegsFloat = regsToSaveMask & RBM_ALLFLOAT;
930-
regMaskTP maskSaveRegsInt = regsToSaveMask & ~maskSaveRegsFloat;
938+
regMaskTP maskSaveRegsInt = regsToSaveMask & ~maskSaveRegsFloat & ~maskSaveRegsFrame;
931939

932940
if (maskSaveRegsFloat != RBM_NONE)
933941
{
@@ -939,6 +947,13 @@ void CodeGen::genSaveCalleeSavedRegistersHelp(regMaskTP regsToSaveMask, int lowe
939947
if (maskSaveRegsInt != RBM_NONE)
940948
{
941949
genSaveCalleeSavedRegisterGroup(maskSaveRegsInt, spDelta, lowestCalleeSavedOffset);
950+
spDelta = 0;
951+
lowestCalleeSavedOffset += genCountBits(maskSaveRegsInt) * FPSAVE_REGSIZE_BYTES;
952+
}
953+
954+
if (maskSaveRegsFrame != RBM_NONE)
955+
{
956+
genPrologSaveRegPair(REG_FP, REG_LR, lowestCalleeSavedOffset, spDelta, false, REG_IP0, nullptr);
942957
// No need to update spDelta, lowestCalleeSavedOffset since they're not used after this.
943958
}
944959
}
@@ -970,13 +985,20 @@ void CodeGen::genRestoreCalleeSavedRegisterGroup(regMaskTP regsMask, int spDelta
970985
stackDelta = spDelta;
971986
}
972987

973-
RegPair regPair = regStack.Top(i);
988+
RegPair regPair = genReverseAndPairCalleeSavedRegisters ? regStack.Bottom(i) : regStack.Top(i);
974989
if (regPair.reg2 != REG_NA)
975990
{
976991
spOffset -= 2 * slotSize;
977992

978-
genEpilogRestoreRegPair(regPair.reg1, regPair.reg2, spOffset, stackDelta, regPair.useSaveNextPair, REG_IP1,
979-
nullptr);
993+
if (genReverseAndPairCalleeSavedRegisters)
994+
{
995+
genEpilogRestoreRegPair(regPair.reg2, regPair.reg1, spOffset, stackDelta, false, REG_IP1, nullptr);
996+
}
997+
else
998+
{
999+
genEpilogRestoreRegPair(regPair.reg1, regPair.reg2, spOffset, stackDelta, regPair.useSaveNextPair,
1000+
REG_IP1, nullptr);
1001+
}
9801002
}
9811003
else
9821004
{
@@ -1043,11 +1065,19 @@ void CodeGen::genRestoreCalleeSavedRegistersHelp(regMaskTP regsToRestoreMask, in
10431065

10441066
// Save integer registers at higher addresses than floating-point registers.
10451067

1068+
regMaskTP maskRestoreRegsFrame = regsToRestoreMask & (RBM_FP | RBM_LR);
10461069
regMaskTP maskRestoreRegsFloat = regsToRestoreMask & RBM_ALLFLOAT;
1047-
regMaskTP maskRestoreRegsInt = regsToRestoreMask & ~maskRestoreRegsFloat;
1070+
regMaskTP maskRestoreRegsInt = regsToRestoreMask & ~maskRestoreRegsFloat & ~maskRestoreRegsFrame;
10481071

10491072
// Restore in the opposite order of saving.
10501073

1074+
if (maskRestoreRegsFrame != RBM_NONE)
1075+
{
1076+
int spFrameDelta = (maskRestoreRegsFloat != RBM_NONE || maskRestoreRegsInt != RBM_NONE) ? 0 : spDelta;
1077+
spOffset -= 2 * REGSIZE_BYTES;
1078+
genEpilogRestoreRegPair(REG_FP, REG_LR, spOffset, spFrameDelta, false, REG_IP1, nullptr);
1079+
}
1080+
10511081
if (maskRestoreRegsInt != RBM_NONE)
10521082
{
10531083
int spIntDelta = (maskRestoreRegsFloat != RBM_NONE) ? 0 : spDelta; // should we delay the SP adjustment?

src/coreclr/jit/codegencommon.cpp

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -255,6 +255,7 @@ CodeGen::CodeGen(Compiler* theCompiler)
255255
#ifdef TARGET_ARM64
256256
genSaveFpLrWithAllCalleeSavedRegisters = false;
257257
genForceFuncletFrameType5 = false;
258+
genReverseAndPairCalleeSavedRegisters = false;
258259
#endif // TARGET_ARM64
259260
}
260261

@@ -4812,6 +4813,29 @@ void CodeGen::genFinalizeFrame()
48124813
}
48134814
#endif // TARGET_ARM
48144815

4816+
#ifdef TARGET_ARM64
4817+
if (compiler->IsTargetAbi(CORINFO_NATIVEAOT_ABI) && TargetOS::IsApplePlatform)
4818+
{
4819+
JITDUMP("Setting genReverseAndPairCalleeSavedRegisters = true");
4820+
4821+
genReverseAndPairCalleeSavedRegisters = true;
4822+
4823+
// Make sure we push the registers in pairs if possible. If we only allocate a contiguous
4824+
// block of registers this should add at most one integer and at most one floating point
4825+
// register to the list. The stack has to be 16-byte aligned, so in worst case it results
4826+
// in allocating 16 bytes more space on stack if odd number of integer and odd number of
4827+
// FP registers were occupied. Same number of instructions will be generated, just the
4828+
// STR instructions are replaced with STP (store pair).
4829+
regMaskTP maskModifiedRegs = regSet.rsGetModifiedRegsMask();
4830+
regMaskTP maskPairRegs = ((maskModifiedRegs & (RBM_V8 | RBM_V10 | RBM_V12 | RBM_V14)).getLow() << 1) |
4831+
((maskModifiedRegs & (RBM_R19 | RBM_R21 | RBM_R23 | RBM_R25 | RBM_R27)).getLow() << 1);
4832+
if (maskPairRegs != RBM_NONE)
4833+
{
4834+
regSet.rsSetRegsModified(maskPairRegs);
4835+
}
4836+
}
4837+
#endif
4838+
48154839
#ifdef DEBUG
48164840
if (verbose)
48174841
{

src/coreclr/jit/compiler.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4324,6 +4324,8 @@ class Compiler
43244324

43254325
#ifdef TARGET_ARM
43264326
int lvaFrameAddress(int varNum, bool mustBeFPBased, regNumber* pBaseReg, int addrModeOffset, bool isFloatUsage);
4327+
#elif TARGET_ARM64
4328+
int lvaFrameAddress(int varNum, bool* pFPbased, bool suppressFPtoSPRewrite = false);
43274329
#else
43284330
int lvaFrameAddress(int varNum, bool* pFPbased);
43294331
#endif

src/coreclr/jit/compiler.hpp

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2639,6 +2639,9 @@ inline
26392639
int
26402640
Compiler::lvaFrameAddress(
26412641
int varNum, bool mustBeFPBased, regNumber* pBaseReg, int addrModeOffset, bool isFloatUsage)
2642+
#elif TARGET_ARM64
2643+
int
2644+
Compiler::lvaFrameAddress(int varNum, bool* pFPbased, bool suppressFPtoSPRewrite)
26422645
#else
26432646
int
26442647
Compiler::lvaFrameAddress(int varNum, bool* pFPbased)
@@ -2808,6 +2811,16 @@ inline
28082811
{
28092812
*pBaseReg = REG_SPBASE;
28102813
}
2814+
#elif defined(TARGET_ARM64)
2815+
if (FPbased && !suppressFPtoSPRewrite && !codeGen->isFramePointerRequired() && varOffset < 0 && !opts.IsOSR() &&
2816+
lvaDoneFrameLayout == Compiler::FINAL_FRAME_LAYOUT && codeGen->IsSaveFpLrWithAllCalleeSavedRegisters())
2817+
{
2818+
int spVarOffset = varOffset + codeGen->genSPtoFPdelta();
2819+
JITDUMP("lvaFrameAddress optimization for V%02u: [FP-%d] -> [SP+%d]\n", varNum, -varOffset, spVarOffset);
2820+
FPbased = false;
2821+
varOffset = spVarOffset;
2822+
}
2823+
*pFPbased = FPbased;
28112824
#else
28122825
*pFPbased = FPbased;
28132826
#endif

src/coreclr/jit/emitarm64.cpp

Lines changed: 3 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -12270,7 +12270,7 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp)
1227012270
int varNum = id->idAddr()->iiaLclVar.lvaVarNum();
1227112271
unsigned ofs = AlignDown(id->idAddr()->iiaLclVar.lvaOffset(), TARGET_POINTER_SIZE);
1227212272
bool FPbased;
12273-
int adr = emitComp->lvaFrameAddress(varNum, &FPbased);
12273+
int adr = emitComp->lvaFrameAddress(varNum, &FPbased, true);
1227412274
if (id->idGCref() != GCT_NONE)
1227512275
{
1227612276
emitGCvarLiveUpd(adr + ofs, varNum, id->idGCref(), dst DEBUG_ARG(varNum));
@@ -12311,15 +12311,11 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp)
1231112311

1231212312
// If there are 2 GC vars in this instrDesc, get the 2nd variable
1231312313
// that should be tracked.
12314-
adr2 = emitComp->lvaFrameAddress(varNum2, &FPbased2);
12314+
adr2 = emitComp->lvaFrameAddress(varNum2, &FPbased2, true);
1231512315
ofs2Dist = EA_SIZE_IN_BYTES(size);
1231612316
#ifdef DEBUG
1231712317
assert(FPbased == FPbased2);
12318-
if (FPbased)
12319-
{
12320-
assert(id->idReg3() == REG_FP);
12321-
}
12322-
else
12318+
if (!FPbased)
1232312319
{
1232412320
assert(encodingZRtoSP(id->idReg3()) == REG_SP);
1232512321
}

0 commit comments

Comments
 (0)