@@ -3821,77 +3821,7 @@ void CodeGen::genPushCalleeSavedRegisters()
3821
3821
3822
3822
int totalFrameSize = genTotalFrameSize ();
3823
3823
3824
- bool useStackProbeHelper = false ;
3825
- const int pageSize = (int )compiler->eeGetPageSize ();
3826
-
3827
- const int currentSpToFinalSp = compiler->compLclFrameSize ;
3828
-
3829
- if (currentSpToFinalSp < compiler->getVeryLargeFrameSize ())
3830
- {
3831
- const regNumber tempReg = REG_SCRATCH;
3832
- bool tempRegWasModified = false ;
3833
-
3834
- constexpr int ldrLargestPositiveImmByteOffset = 0x8000 ;
3835
- const bool useLdrUnsignedImmediate = (pageSize < ldrLargestPositiveImmByteOffset / 2 );
3836
-
3837
- int currentSpToLastProbedLoc = 0 ;
3838
-
3839
- if (useLdrUnsignedImmediate)
3840
- {
3841
- for (int currentSpToTempReg = 0 ;
3842
- currentSpToFinalSp + STACK_PROBE_BOUNDARY_THRESHOLD_BYTES - currentSpToLastProbedLoc > pageSize;)
3843
- {
3844
- const int currentSpToProbeLoc = min (currentSpToLastProbedLoc + pageSize, currentSpToFinalSp);
3845
-
3846
- if (currentSpToProbeLoc > currentSpToTempReg)
3847
- {
3848
- if (currentSpToFinalSp + STACK_PROBE_BOUNDARY_THRESHOLD_BYTES - currentSpToProbeLoc > pageSize)
3849
- {
3850
- // At least one more probing beside the one at [sp, #currentSpToProbeLoc] are needed,
3851
- // so it is worthwhile to advance tempReg and emit two or more ldr xzr, [tempReg, #imm].
3852
- currentSpToTempReg = currentSpToTempReg + ldrLargestPositiveImmByteOffset;
3853
- GetEmitter ()->emitIns_R_R_I (INS_sub, EA_PTRSIZE, tempReg, REG_SPBASE, currentSpToTempReg);
3854
- tempRegWasModified = true ;
3855
- }
3856
- else
3857
- {
3858
- break ;
3859
- }
3860
- }
3861
-
3862
- const int probeLocToTempReg = currentSpToTempReg - currentSpToProbeLoc;
3863
- GetEmitter ()->emitIns_R_R_I (INS_ldr, EA_8BYTE, REG_ZR, tempReg, probeLocToTempReg);
3864
- currentSpToLastProbedLoc = currentSpToProbeLoc;
3865
- }
3866
- }
3867
-
3868
- while (currentSpToFinalSp + STACK_PROBE_BOUNDARY_THRESHOLD_BYTES - currentSpToLastProbedLoc > pageSize)
3869
- {
3870
- const int currentSpToProbeLoc = min (currentSpToLastProbedLoc + pageSize, currentSpToFinalSp);
3871
-
3872
- // Emit mov tempReg, #imm followed by ldr wzr, [sp, tempReg].
3873
- genSetRegToIcon (tempReg, -currentSpToProbeLoc, TYP_I_IMPL);
3874
- tempRegWasModified = true ;
3875
- GetEmitter ()->emitIns_R_R_R (INS_ldr, EA_4BYTE, REG_ZR, REG_SPBASE, tempReg);
3876
- currentSpToLastProbedLoc = currentSpToProbeLoc;
3877
- }
3878
-
3879
- if (tempRegWasModified)
3880
- {
3881
- regSet.verifyRegUsed (tempReg);
3882
-
3883
- if (initReg == tempReg)
3884
- {
3885
- *pInitRegZeroed = false ;
3886
- }
3887
- }
3888
-
3889
- compiler->unwindPadding ();
3890
- }
3891
- else
3892
- {
3893
- useStackProbeHelper = true ;
3894
- }
3824
+ const int probePageSize = (int )compiler->eeGetPageSize ();
3895
3825
3896
3826
int offset; // This will be the starting place for saving the callee-saved registers, in increasing order.
3897
3827
@@ -4177,6 +4107,51 @@ void CodeGen::genPushCalleeSavedRegisters()
4177
4107
// If we do establish the frame pointer, what is the amount we add to SP to do so?
4178
4108
unsigned offsetSpToSavedFp = 0 ;
4179
4109
4110
+ auto emitUnrolledStackProbeLoop = [this , probePageSize, initReg, pInitRegZeroed](int currentSpToFpLrLoc,
4111
+ int lastProbedLocToCurrentSp) {
4112
+ // We can not call a stack probe helper before storing lr register on the stack
4113
+ // since the call would trash that register.
4114
+ // Instead for relatively small frames (smaller than STACK_PROBE_HELPER_FRAME_SIZE_PAGES)
4115
+ // the JIT emits unrolled stack probing loop.
4116
+ // "stp fp, lr, [sp, #fpLrLoc]" would also count as a probe, hence we use and maintain
4117
+ // the value of currentSpToFpLrLoc in the algorithm below.
4118
+
4119
+ assert (currentSpToFpLrLoc < STACK_PROBE_HELPER_FRAME_SIZE_PAGES * probePageSize);
4120
+
4121
+ // Generate the following code
4122
+ //
4123
+ // sub sp, sp, #probePageSize
4124
+ // ldr xzr, [sp,#probeLocToCurrentSp]
4125
+ // ...
4126
+ // sub sp, sp, #probePageSize
4127
+ // ldr xzr, [sp,#probeLocToCurrentSp]
4128
+ //
4129
+ // until sp is closer than probePageSize to a location
4130
+ // where fp,lr register pair will be written.
4131
+
4132
+ int lastProbedLocToFpLrLoc = lastProbedLocToCurrentSp + currentSpToFpLrLoc;
4133
+
4134
+ while (currentSpToFpLrLoc > probePageSize)
4135
+ {
4136
+ const int probeLocToFpLrLoc = lastProbedLocToFpLrLoc - probePageSize;
4137
+
4138
+ genStackPointerAdjustment (-probePageSize, initReg, pInitRegZeroed, /* reportUnwindData */ true );
4139
+ currentSpToFpLrLoc -= probePageSize;
4140
+
4141
+ const int probeLocToCurrentSp = probeLocToFpLrLoc - currentSpToFpLrLoc;
4142
+
4143
+ GetEmitter ()->emitIns_R_R_I (INS_ldr, EA_4BYTE, REG_ZR, REG_SPBASE, probeLocToCurrentSp);
4144
+ compiler->unwindNop ();
4145
+
4146
+ lastProbedLocToFpLrLoc = probeLocToFpLrLoc;
4147
+ }
4148
+
4149
+ // The loop doesn't have to stop at a location where fp,lr register pair will be written.
4150
+ // Therefore, we need to return the distance between updated sp value and that location to
4151
+ // a user of this lambda.
4152
+ return currentSpToFpLrLoc;
4153
+ };
4154
+
4180
4155
if (frameType == 1 )
4181
4156
{
4182
4157
assert (!genSaveFpLrWithAllCalleeSavedRegisters);
@@ -4197,6 +4172,13 @@ void CodeGen::genPushCalleeSavedRegisters()
4197
4172
assert ((remainingFrameSz % 16 ) == 0 ); // this is guaranteed to be 16-byte aligned because each component --
4198
4173
// totalFrameSize and calleeSaveSPDelta -- is 16-byte aligned.
4199
4174
4175
+ int lastProbedLocToCurrentSp = STACK_PROBE_BOUNDARY_THRESHOLD_BYTES;
4176
+
4177
+ if (compiler->info .compIsVarArgs || ((maskSaveRegsInt | maskSaveRegsFloat) != 0 ))
4178
+ {
4179
+ lastProbedLocToCurrentSp = 0 ;
4180
+ }
4181
+
4200
4182
if (compiler->lvaOutgoingArgSpaceSize > 504 )
4201
4183
{
4202
4184
// We can't do "stp fp,lr,[sp,#outsz]" because #outsz is too big.
@@ -4209,7 +4191,11 @@ void CodeGen::genPushCalleeSavedRegisters()
4209
4191
4210
4192
JITDUMP (" spAdjustment2=%d\n " , spAdjustment2);
4211
4193
4212
- genPrologSaveRegPair (REG_FP, REG_LR, alignmentAdjustment2, -spAdjustment2, false , initReg, pInitRegZeroed);
4194
+ int currentSpToFpLrLoc = spAdjustment2;
4195
+ currentSpToFpLrLoc = emitUnrolledStackProbeLoop (currentSpToFpLrLoc, lastProbedLocToCurrentSp);
4196
+
4197
+ genPrologSaveRegPair (REG_FP, REG_LR, alignmentAdjustment2, -currentSpToFpLrLoc, false , initReg,
4198
+ pInitRegZeroed);
4213
4199
offset += spAdjustment2;
4214
4200
4215
4201
// Now subtract off the #outsz (or the rest of the #outsz if it was unaligned, and the above "sub"
@@ -4227,17 +4213,28 @@ void CodeGen::genPushCalleeSavedRegisters()
4227
4213
4228
4214
JITDUMP (" spAdjustment3=%d\n " , spAdjustment3);
4229
4215
4230
- // We've already established the frame pointer, so no need to report the stack pointer change to unwind
4231
- // info.
4232
- genStackPointerAdjustment (-spAdjustment3, initReg, pInitRegZeroed, /* reportUnwindData */ false );
4216
+ if (spAdjustment3 >= probePageSize)
4217
+ {
4218
+ genEmitStackProbeHelperCall (spAdjustment3, initReg, pInitRegZeroed);
4219
+ }
4220
+ else
4221
+ {
4222
+ // We've already established the frame pointer, so no need to report the stack pointer change to unwind
4223
+ // info.
4224
+ genStackPointerAdjustment (-spAdjustment3, initReg, pInitRegZeroed, /* reportUnwindData */ false );
4225
+ }
4233
4226
offset += spAdjustment3;
4234
4227
}
4235
4228
else
4236
4229
{
4237
- genPrologSaveRegPair (REG_FP, REG_LR, compiler->lvaOutgoingArgSpaceSize , -remainingFrameSz, false , initReg,
4230
+ int currentSpToFpLrLoc = remainingFrameSz - compiler->lvaOutgoingArgSpaceSize ;
4231
+ currentSpToFpLrLoc = emitUnrolledStackProbeLoop (currentSpToFpLrLoc, lastProbedLocToCurrentSp);
4232
+
4233
+ const int currentSpToFinalSp = currentSpToFpLrLoc + compiler->lvaOutgoingArgSpaceSize ;
4234
+ genPrologSaveRegPair (REG_FP, REG_LR, compiler->lvaOutgoingArgSpaceSize , -currentSpToFinalSp, false , initReg,
4238
4235
pInitRegZeroed);
4239
- offset += remainingFrameSz;
4240
4236
4237
+ offset += remainingFrameSz;
4241
4238
offsetSpToSavedFp = compiler->lvaOutgoingArgSpaceSize ;
4242
4239
}
4243
4240
}
@@ -4266,26 +4263,14 @@ void CodeGen::genPushCalleeSavedRegisters()
4266
4263
4267
4264
JITDUMP (" remainingFrameSz=%d\n " , remainingFrameSz);
4268
4265
4269
- // We've already established the frame pointer, so no need to report the unwind info at this point.
4270
- const bool reportUnwindData = false ;
4271
-
4272
- if (useStackProbeHelper)
4266
+ if (remainingFrameSz >= probePageSize)
4273
4267
{
4274
- genInstrWithConstant (INS_sub, EA_PTRSIZE, REG_STACK_PROBE_HELPER_ARG, REG_SPBASE, remainingFrameSz,
4275
- REG_STACK_PROBE_HELPER_ARG, reportUnwindData);
4276
- regSet.verifyRegUsed (REG_STACK_PROBE_HELPER_ARG);
4277
- genEmitHelperCall (CORINFO_HELP_STACK_PROBE, 0 , EA_UNKNOWN, REG_STACK_PROBE_HELPER_CALL_TARGET);
4278
- GetEmitter ()->emitIns_R_R (INS_mov, EA_PTRSIZE, REG_SPBASE, REG_STACK_PROBE_HELPER_ARG);
4279
-
4280
- if ((genRegMask (initReg) & (RBM_STACK_PROBE_HELPER_ARG | RBM_STACK_PROBE_HELPER_CALL_TARGET |
4281
- RBM_STACK_PROBE_HELPER_TRASH)) != RBM_NONE)
4282
- {
4283
- *pInitRegZeroed = false ;
4284
- }
4268
+ genEmitStackProbeHelperCall (remainingFrameSz, initReg, pInitRegZeroed);
4285
4269
}
4286
4270
else
4287
4271
{
4288
- genStackPointerAdjustment (-remainingFrameSz, initReg, pInitRegZeroed, reportUnwindData);
4272
+ // We've already established the frame pointer, so no need to report the unwind info at this point.
4273
+ genStackPointerAdjustment (-remainingFrameSz, initReg, pInitRegZeroed, /* reportUnwindData */ false );
4289
4274
}
4290
4275
4291
4276
offset += remainingFrameSz;
0 commit comments