Skip to content

[Arm64] Implement stack probe helper #43250

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
58c0c36
Copy CodeGen::genPushCalleeSavedRegisters to codegenxarch.cpp
echesakov Oct 7, 2020
49aaa0b
Remove code under #ifdef/#endif that never executes in codegenxarch.cpp
echesakov Oct 7, 2020
b669ba8
Move CodeGen::genPushCalleeSavedRegisters from codegencommon.cpp to c…
echesakov Oct 7, 2020
de9cfa0
Remove code under #ifdef/#endif that never executes in codegenarmarch…
echesakov Oct 7, 2020
40cac3c
Move call to genAllocLclFrame in CodeGen::genFnProlog in codegencommo…
echesakov Oct 7, 2020
537bac2
Define JIT_StackProbe helper on all platforms in jithelpers.h readyto…
echesakov Oct 9, 2020
f3f39b4
Implement Arm64 JIT_StackProbe helper in asmhelpers.asm
echesakov Oct 9, 2020
6abb3ce
Implement Arm64 JIT_StackProbe helper in asmhelpers.S
echesakov Oct 12, 2020
456ebb9
Display stack trace at stack overflow on Arm64 in excep.cpp
echesakov Oct 9, 2020
95a307c
Define REG/RBM_STACK_PROBE_HELPER_ARG, REG/RBM_STACK_PROBE_HELPER_CAL…
echesakov Oct 9, 2020
4b5468d
Remove genAllocLclFrame and use stack probing helper on Arm64
echesakov Oct 10, 2020
c80961e
Improve inlined stack probing instructions sequence in codegenarmarch…
echesakov Oct 12, 2020
eb3b64d
Increase the size of "very large frame" in compiler.h
echesakov Oct 29, 2020
62851fa
Remove assertion and add proper logic to ensure that when tempReg is …
echesakov Nov 2, 2020
2772cb2
Rename PAGE_SIZE->PROBE_PAGE_SIZE in src/coreclr/vm/arm64/asmhelpers.…
echesakov Jan 22, 2021
b9822cb
Remove artifact from having a stack probing loop in the past on XArch…
echesakov Jan 28, 2021
5f98ef8
Remove stack probing under sp on Arm in src/coreclr/jit/codegenarm.cp…
echesakov Jan 29, 2021
e255807
Remove maskArgRegsLiveIn argument from genAllocLclFrame in src/corecl…
echesakov Jan 29, 2021
16e4b75
Remove getVeryLargeFrameSize() in src/coreclr/jit/compiler.h
echesakov Jan 29, 2021
c9c2ab3
In AOT scenarios the VM reports to the JIT the minimum page size in s…
echesakov Jan 29, 2021
8aba2d5
Rename PAGE_SIZE->PROBE_PAGE_SIZE in src/coreclr/vm/arm/asmhelpers.S …
echesakov Jan 29, 2021
138faab
Add CodeGen::genEmitStackProbeHelperCall in src/coreclr/jit/codegen.h…
echesakov Feb 3, 2021
bcb4a74
Implement Arm64 Stack Probe in src/coreclr/jit/codegenarmarch.cpp src…
echesakov Feb 2, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 0 additions & 4 deletions src/coreclr/inc/jithelpers.h
Original file line number Diff line number Diff line change
Expand Up @@ -350,11 +350,7 @@

JITHELPER(CORINFO_HELP_GVMLOOKUP_FOR_SLOT, NULL, CORINFO_HELP_SIG_NO_ALIGN_STUB)

#ifndef TARGET_ARM64
JITHELPER(CORINFO_HELP_STACK_PROBE, JIT_StackProbe, CORINFO_HELP_SIG_REG_ONLY)
#else
JITHELPER(CORINFO_HELP_STACK_PROBE, NULL, CORINFO_HELP_SIG_UNDEF)
#endif

JITHELPER(CORINFO_HELP_PATCHPOINT, JIT_Patchpoint, CORINFO_HELP_SIG_REG_ONLY)
JITHELPER(CORINFO_HELP_CLASSPROFILE, JIT_ClassProfile, CORINFO_HELP_SIG_REG_ONLY)
Expand Down
2 changes: 0 additions & 2 deletions src/coreclr/inc/readytorunhelpers.h
Original file line number Diff line number Diff line change
Expand Up @@ -119,9 +119,7 @@ HELPER(READYTORUN_HELPER_ReversePInvokeExit, CORINFO_HELP_JIT_REVERSE_PIN
HELPER(READYTORUN_HELPER_MonitorEnter, CORINFO_HELP_MON_ENTER, )
HELPER(READYTORUN_HELPER_MonitorExit, CORINFO_HELP_MON_EXIT, )

#ifndef TARGET_ARM64
HELPER(READYTORUN_HELPER_StackProbe, CORINFO_HELP_STACK_PROBE, )
#endif

HELPER(READYTORUN_HELPER_GetCurrentManagedThreadId, CORINFO_HELP_GETCURRENTMANAGEDTHREADID, )

Expand Down
6 changes: 5 additions & 1 deletion src/coreclr/jit/codegen.h
Original file line number Diff line number Diff line change
Expand Up @@ -346,7 +346,11 @@ class CodeGen final : public CodeGenInterface
void genPushCalleeSavedRegisters();
#endif

void genAllocLclFrame(unsigned frameSize, regNumber initReg, bool* pInitRegZeroed, regMaskTP maskArgRegsLiveIn);
void genAllocLclFrame(unsigned frameSize, regNumber initReg, bool* pInitRegZeroed);

#ifdef TARGET_ARMARCH
void genEmitStackProbeHelperCall(int currentSpToFinalSp, regNumber initReg, bool* pInitRegZeroed);
#endif

#if defined(TARGET_ARM)

Expand Down
70 changes: 35 additions & 35 deletions src/coreclr/jit/codegenarm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1827,12 +1827,11 @@ void CodeGen::genProfilingLeaveCallback(unsigned helper)
// initReg - register to use as a scratch register.
// pInitRegZeroed - OUT parameter. *pInitRegZeroed is set to 'false' if and only if
// this call sets 'initReg' to a non-zero value.
// maskArgRegsLiveIn - incoming argument registers that are currently live.
//
// Return value:
// None
//
void CodeGen::genAllocLclFrame(unsigned frameSize, regNumber initReg, bool* pInitRegZeroed, regMaskTP maskArgRegsLiveIn)
void CodeGen::genAllocLclFrame(unsigned frameSize, regNumber initReg, bool* pInitRegZeroed)
{
assert(compiler->compGeneratingProlog);

Expand All @@ -1849,41 +1848,9 @@ void CodeGen::genAllocLclFrame(unsigned frameSize, regNumber initReg, bool* pIni
{
GetEmitter()->emitIns_R_I(INS_sub, EA_PTRSIZE, REG_SPBASE, frameSize);
}
else if (frameSize < compiler->getVeryLargeFrameSize())
{
for (target_size_t probeOffset = pageSize; probeOffset <= frameSize; probeOffset += pageSize)
{
// Generate:
// movw initReg, -probeOffset
// ldr initReg, [SP + initReg]

instGen_Set_Reg_To_Imm(EA_PTRSIZE, initReg, -(ssize_t)probeOffset);
GetEmitter()->emitIns_R_R_R(INS_ldr, EA_PTRSIZE, initReg, REG_SPBASE, initReg);
}

regSet.verifyRegUsed(initReg);
*pInitRegZeroed = false; // The initReg does not contain zero

instGen_Set_Reg_To_Imm(EA_PTRSIZE, initReg, frameSize);
compiler->unwindPadding();
GetEmitter()->emitIns_R_R_R(INS_sub, EA_PTRSIZE, REG_SPBASE, REG_SPBASE, initReg);
}
else
{
assert(frameSize >= compiler->getVeryLargeFrameSize());

genInstrWithConstant(INS_sub, EA_PTRSIZE, REG_STACK_PROBE_HELPER_ARG, REG_SPBASE, frameSize,
INS_FLAGS_DONT_CARE, REG_STACK_PROBE_HELPER_ARG);
regSet.verifyRegUsed(REG_STACK_PROBE_HELPER_ARG);
genEmitHelperCall(CORINFO_HELP_STACK_PROBE, 0, EA_UNKNOWN, REG_STACK_PROBE_HELPER_CALL_TARGET);
compiler->unwindPadding();
GetEmitter()->emitIns_R_R(INS_mov, EA_PTRSIZE, REG_SPBASE, REG_STACK_PROBE_HELPER_ARG);

if ((genRegMask(initReg) & (RBM_STACK_PROBE_HELPER_ARG | RBM_STACK_PROBE_HELPER_CALL_TARGET |
RBM_STACK_PROBE_HELPER_TRASH)) != RBM_NONE)
{
*pInitRegZeroed = false;
}
genEmitStackProbeHelperCall(frameSize, initReg, pInitRegZeroed);
}

compiler->unwindAllocStack(frameSize);
Expand All @@ -1895,4 +1862,37 @@ void CodeGen::genAllocLclFrame(unsigned frameSize, regNumber initReg, bool* pIni
#endif // USING_SCOPE_INFO
}

void CodeGen::genEmitStackProbeHelperCall(int currentSpToFinalSp, regNumber initReg, bool* pInitRegZeroed)
{
// Generate the following code:
//
// movw r4, #currentSpToFinalSp
// sub r4, sp, r4
// bl CORINFO_HELP_STACK_PROBE
// mov sp, r4
//
// If frameSize can not be encoded by movw immediate this becomes:
//
// movw r4, #currentSpToFinalSpLo16
// movt r4, #currentSpToFinalSpHi16
// sub r4, sp, r4
// bl CORINFO_HELP_STACK_PROBE
// mov sp, r4

genInstrWithConstant(INS_sub, EA_PTRSIZE, REG_STACK_PROBE_HELPER_ARG, REG_SPBASE, currentSpToFinalSp,
INS_FLAGS_DONT_CARE, REG_STACK_PROBE_HELPER_ARG);
regSet.verifyRegUsed(REG_STACK_PROBE_HELPER_ARG);

genEmitHelperCall(CORINFO_HELP_STACK_PROBE, 0, EA_UNKNOWN, REG_STACK_PROBE_HELPER_CALL_TARGET);
compiler->unwindPadding();

GetEmitter()->emitIns_R_R(INS_mov, EA_PTRSIZE, REG_SPBASE, REG_STACK_PROBE_HELPER_ARG);

if ((genRegMask(initReg) &
(RBM_STACK_PROBE_HELPER_ARG | RBM_STACK_PROBE_HELPER_CALL_TARGET | RBM_STACK_PROBE_HELPER_TRASH)) != RBM_NONE)
{
*pInitRegZeroed = false;
}
}

#endif // TARGET_ARM
153 changes: 19 additions & 134 deletions src/coreclr/jit/codegenarm64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4904,6 +4904,25 @@ void CodeGen::genProfilingLeaveCallback(unsigned helper)

#endif // PROFILING_SUPPORTED

void CodeGen::genEmitStackProbeHelperCall(int currentSpToFinalSp, regNumber initReg, bool* pInitRegZeroed)
{
assert(compiler->compGeneratingProlog);

const bool reportUnwindData = false;
genInstrWithConstant(INS_sub, EA_PTRSIZE, REG_STACK_PROBE_HELPER_ARG, REG_SPBASE, currentSpToFinalSp,
REG_STACK_PROBE_HELPER_ARG, reportUnwindData);
regSet.verifyRegUsed(REG_STACK_PROBE_HELPER_ARG);

genEmitHelperCall(CORINFO_HELP_STACK_PROBE, 0, EA_UNKNOWN, REG_STACK_PROBE_HELPER_CALL_TARGET);
GetEmitter()->emitIns_R_R(INS_mov, EA_PTRSIZE, REG_SPBASE, REG_STACK_PROBE_HELPER_ARG);

if ((genRegMask(initReg) &
(RBM_STACK_PROBE_HELPER_ARG | RBM_STACK_PROBE_HELPER_CALL_TARGET | RBM_STACK_PROBE_HELPER_TRASH)) != RBM_NONE)
{
*pInitRegZeroed = false;
}
}

/*****************************************************************************
* Unit testing of the ARM64 emitter: generate a bunch of instructions into the prolog
* (it's as good a place as any), then use COMPlus_JitLateDisasm=* to see if the late
Expand Down Expand Up @@ -9588,138 +9607,4 @@ void CodeGen::genArm64EmitterUnitTests()
}
#endif // defined(DEBUG)

//------------------------------------------------------------------------
// genAllocLclFrame: Probe the stack.
//
// Notes:
// This only does the probing; allocating the frame is done when callee-saved registers are saved.
// This is done before anything has been pushed. The previous frame might have a large outgoing argument
// space that has been allocated, but the lowest addresses have not been touched. Our frame setup might
// not touch up to the first 504 bytes. This means we could miss a guard page. On Windows, however,
// there are always three guard pages, so we will not miss them all. On Linux, there is only one guard
// page by default, so we need to be more careful. We do an extra probe if we might not have probed
// recently enough. That is, if a call and prolog establishment might lead to missing a page. We do this
// on Windows as well just to be consistent, even though it should not be necessary.
//
// Arguments:
// frameSize - the size of the stack frame being allocated.
// initReg - register to use as a scratch register.
// pInitRegZeroed - OUT parameter. *pInitRegZeroed is set to 'false' if and only if
// this call sets 'initReg' to a non-zero value. Otherwise, it is unchanged.
// maskArgRegsLiveIn - incoming argument registers that are currently live.
//
// Return value:
// None
//
void CodeGen::genAllocLclFrame(unsigned frameSize, regNumber initReg, bool* pInitRegZeroed, regMaskTP maskArgRegsLiveIn)
{
assert(compiler->compGeneratingProlog);

if (frameSize == 0)
{
return;
}

const target_size_t pageSize = compiler->eeGetPageSize();

// What offset from the final SP was the last probe? If we haven't probed almost a complete page, and
// if the next action on the stack might subtract from SP first, before touching the current SP, then
// we do one more probe at the very bottom. This can happen if we call a function on arm64 that does
// a "STP fp, lr, [sp-504]!", that is, pre-decrement SP then store. Note that we probe here for arm64,
// but we don't alter SP.
target_size_t lastTouchDelta = 0;

assert(!compiler->info.compPublishStubParam || (REG_SECRET_STUB_PARAM != initReg));

if (frameSize < pageSize)
{
lastTouchDelta = frameSize;
}
else if (frameSize < compiler->getVeryLargeFrameSize())
{
lastTouchDelta = frameSize;

for (target_size_t probeOffset = pageSize; probeOffset <= frameSize; probeOffset += pageSize)
{
// Generate:
// movw initReg, -probeOffset
// ldr wzr, [sp + initReg]

instGen_Set_Reg_To_Imm(EA_PTRSIZE, initReg, -(ssize_t)probeOffset);
GetEmitter()->emitIns_R_R_R(INS_ldr, EA_4BYTE, REG_ZR, REG_SPBASE, initReg);
regSet.verifyRegUsed(initReg);
*pInitRegZeroed = false; // The initReg does not contain zero

lastTouchDelta -= pageSize;
}

assert(lastTouchDelta == frameSize % pageSize);
compiler->unwindPadding();
}
else
{
assert(frameSize >= compiler->getVeryLargeFrameSize());

// Emit the following sequence to 'tickle' the pages. Note it is important that stack pointer not change
// until this is complete since the tickles could cause a stack overflow, and we need to be able to crawl
// the stack afterward (which means the stack pointer needs to be known).

regMaskTP availMask = RBM_ALLINT & (regSet.rsGetModifiedRegsMask() | ~RBM_INT_CALLEE_SAVED);
availMask &= ~maskArgRegsLiveIn; // Remove all of the incoming argument registers as they are currently live
availMask &= ~genRegMask(initReg); // Remove the pre-calculated initReg

regNumber rOffset = initReg;
regNumber rLimit;
regMaskTP tempMask;

// We pick the next lowest register number for rLimit
noway_assert(availMask != RBM_NONE);
tempMask = genFindLowestBit(availMask);
rLimit = genRegNumFromMask(tempMask);

// Generate:
//
// mov rOffset, -pageSize // On arm, this turns out to be "movw r1, 0xf000; sxth r1, r1".
// // We could save 4 bytes in the prolog by using "movs r1, 0" at the
// // runtime expense of running a useless first loop iteration.
// mov rLimit, -frameSize
// loop:
// ldr wzr, [sp + rOffset]
// sub rOffset, pageSize
// cmp rLimit, rOffset
// b.ls loop // If rLimit is lower or same, we need to probe this rOffset. Note
// // especially that if it is the same, we haven't probed this page.

noway_assert((ssize_t)(int)frameSize == (ssize_t)frameSize); // make sure framesize safely fits within an int

instGen_Set_Reg_To_Imm(EA_PTRSIZE, rOffset, -(ssize_t)pageSize);
instGen_Set_Reg_To_Imm(EA_PTRSIZE, rLimit, -(ssize_t)frameSize);

// There's a "virtual" label here. But we can't create a label in the prolog, so we use the magic
// `emitIns_J` with a negative `instrCount` to branch back a specific number of instructions.

GetEmitter()->emitIns_R_R_R(INS_ldr, EA_4BYTE, REG_ZR, REG_SPBASE, rOffset);
GetEmitter()->emitIns_R_R_I(INS_sub, EA_PTRSIZE, rOffset, rOffset, pageSize);
GetEmitter()->emitIns_R_R(INS_cmp, EA_PTRSIZE, rLimit, rOffset); // If equal, we need to probe again
GetEmitter()->emitIns_J(INS_bls, NULL, -4);

*pInitRegZeroed = false; // The initReg does not contain zero

compiler->unwindPadding();

lastTouchDelta = frameSize % pageSize;
}

if (lastTouchDelta + STACK_PROBE_BOUNDARY_THRESHOLD_BYTES > pageSize)
{
assert(lastTouchDelta + STACK_PROBE_BOUNDARY_THRESHOLD_BYTES < 2 * pageSize);
instGen_Set_Reg_To_Imm(EA_PTRSIZE, initReg, -(ssize_t)frameSize);
GetEmitter()->emitIns_R_R_R(INS_ldr, EA_4BYTE, REG_ZR, REG_SPBASE, initReg);
compiler->unwindPadding();

regSet.verifyRegUsed(initReg);
*pInitRegZeroed = false; // The initReg does not contain zero
}
}

#endif // TARGET_ARM64
Loading