Skip to content

Commit 47ec7f3

Browse files
committed
tweak the suspend loop implementation
1 parent ee00463 commit 47ec7f3

File tree

4 files changed

+77
-80
lines changed

4 files changed

+77
-80
lines changed

src/coreclr/nativeaot/Runtime/StackFrameIterator.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -95,7 +95,7 @@ GVAL_IMPL_INIT(PTR_VOID, g_RhpRethrow2Addr, PointerToRhpRethrow2);
9595
StackFrameIterator::StackFrameIterator(Thread * pThreadToWalk, PInvokeTransitionFrame* pInitialTransitionFrame)
9696
{
9797
STRESS_LOG0(LF_STACKWALK, LL_INFO10000, "----Init---- [ GC ]\n");
98-
ASSERT(!pThreadToWalk->DangerousCrossThreadIsHijacked());
98+
ASSERT(!pThreadToWalk->IsHijacked());
9999

100100
if (pInitialTransitionFrame == INTERRUPTED_THREAD_MARKER)
101101
{
@@ -1463,7 +1463,7 @@ void StackFrameIterator::NextInternal()
14631463
else
14641464
{
14651465
// if the thread is safe to walk, it better not have a hijack in place.
1466-
ASSERT((ThreadStore::GetCurrentThread() == m_pThread) || !m_pThread->DangerousCrossThreadIsHijacked());
1466+
ASSERT(!m_pThread->IsHijacked());
14671467

14681468
SetControlPC(dac_cast<PTR_VOID>(*(m_RegDisplay.GetAddrOfIP())));
14691469

src/coreclr/nativeaot/Runtime/thread.cpp

Lines changed: 4 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -901,55 +901,16 @@ void Thread::UnhijackWorker()
901901
m_uHijackedReturnValueFlags = 0;
902902
}
903903

904-
// @TODO: it would be very, very nice if we did not have to bleed knowledge of hijacking
905-
// and hijack state to other components in the runtime. For now, these are only used
906-
// when getting EH info during exception dispatch. We should find a better way to encapsulate
907-
// this.
908904
bool Thread::IsHijacked()
909905
{
910-
// Note: this operation is only valid from the current thread. If one thread invokes
911-
// this on another then it may be racing with other changes to the thread's hijack state.
912-
ASSERT(ThreadStore::GetCurrentThread() == this);
913-
914-
return m_pvHijackedReturnAddress != NULL;
915-
}
906+
ASSERT(((ThreadStore::GetCurrentThread() == this) && IsCurrentThreadInCooperativeMode()) ||
907+
ThreadStore::GetCurrentThread()->IsGCSpecial() ||
908+
ThreadStore::GetCurrentThread() == ThreadStore::GetSuspendingThread()
909+
);
916910

917-
//
918-
// WARNING: This method must ONLY be called during stackwalks when we believe that all threads are
919-
// synchronized and there is no other thread racing with us trying to apply hijacks.
920-
//
921-
bool Thread::DangerousCrossThreadIsHijacked()
922-
{
923-
// If we have a CachedTransitionFrame available, then we're in the proper state. Otherwise, this method
924-
// was called from an improper state.
925-
ASSERT(GetTransitionFrame() != NULL);
926911
return m_pvHijackedReturnAddress != NULL;
927912
}
928913

929-
void * Thread::GetHijackedReturnAddress()
930-
{
931-
// Note: this operation is only valid from the current thread. If one thread invokes
932-
// this on another then it may be racing with other changes to the thread's hijack state.
933-
ASSERT(IsHijacked());
934-
ASSERT(ThreadStore::GetCurrentThread() == this);
935-
936-
return m_pvHijackedReturnAddress;
937-
}
938-
939-
void * Thread::GetUnhijackedReturnAddress(void ** ppvReturnAddressLocation)
940-
{
941-
ASSERT(ThreadStore::GetCurrentThread() == this);
942-
943-
void * pvReturnAddress;
944-
if (m_ppvHijackedReturnAddressLocation == ppvReturnAddressLocation)
945-
pvReturnAddress = m_pvHijackedReturnAddress;
946-
else
947-
pvReturnAddress = *ppvReturnAddressLocation;
948-
949-
ASSERT(GetRuntimeInstance()->IsManaged(pvReturnAddress));
950-
return pvReturnAddress;
951-
}
952-
953914
void Thread::SetState(ThreadStateFlags flags)
954915
{
955916
PalInterlockedOr(&m_ThreadStateFlags, flags);

src/coreclr/nativeaot/Runtime/thread.h

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -138,9 +138,9 @@ class Thread : private ThreadBuffer
138138
#endif // FEATURE_GC_STRESS
139139

140140
#ifdef FEATURE_SUSPEND_REDIRECTION
141-
TSF_Redirected = 0x00000080, // Set to indicate the thread is redirected and will inevitably
141+
TSF_Redirected = 0x00000080, // Set to indicate the thread is redirected and will inevitably
142142
// suspend once resumed.
143-
// As an optimization, if we see this flag, we skip hijacking.
143+
// If we see this flag, we skip hijacking as an optimization.
144144
#endif //FEATURE_SUSPEND_REDIRECTION
145145
};
146146
private:
@@ -203,13 +203,11 @@ class Thread : private ThreadBuffer
203203

204204
void Hijack();
205205
void Unhijack();
206+
bool IsHijacked();
207+
206208
#ifdef FEATURE_GC_STRESS
207209
static void HijackForGcStress(PAL_LIMITED_CONTEXT * pSuspendCtx);
208210
#endif // FEATURE_GC_STRESS
209-
bool IsHijacked();
210-
void * GetHijackedReturnAddress();
211-
void * GetUnhijackedReturnAddress(void** ppvReturnAddressLocation);
212-
bool DangerousCrossThreadIsHijacked();
213211

214212
bool IsSuppressGcStressSet();
215213
void SetSuppressGcStress();
@@ -236,7 +234,7 @@ class Thread : private ThreadBuffer
236234
#endif // DACCESS_COMPILE
237235
#ifdef FEATURE_GC_STRESS
238236
void SetRandomSeed(uint32_t seed);
239-
uint32_t NextRand();
237+
uint32_t NextRand();
240238
bool IsRandInited();
241239
#endif // FEATURE_GC_STRESS
242240
PTR_ExInfo GetCurExInfo();

src/coreclr/nativeaot/Runtime/threadstore.cpp

Lines changed: 66 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -196,6 +196,35 @@ void ThreadStore::UnlockThreadStore()
196196
m_Lock.ReleaseReadLock();
197197
}
198198

199+
// exponential spinwait with an approximate time limit for waiting in microsecond range.
200+
// when iteration == -1, only usecLimit is used
201+
void SpinWait(int iteration, int usecLimit)
202+
{
203+
LARGE_INTEGER li;
204+
PalQueryPerformanceCounter(&li);
205+
int64_t startTicks = li.QuadPart;
206+
207+
PalQueryPerformanceFrequency(&li);
208+
int64_t ticksPerSecond = li.QuadPart;
209+
int64_t endTicks = startTicks + (usecLimit * ticksPerSecond) / 1000000;
210+
211+
int l = min((unsigned)iteration, 30);
212+
for (int i = 0; i < l; i++)
213+
{
214+
for (int j = 0; j < (1 << i); j++)
215+
{
216+
System_YieldProcessor();
217+
}
218+
219+
PalQueryPerformanceCounter(&li);
220+
int64_t currentTicks = li.QuadPart;
221+
if (currentTicks > endTicks)
222+
{
223+
break;
224+
}
225+
}
226+
}
227+
199228
void ThreadStore::SuspendAllThreads(bool waitForGCEvent)
200229
{
201230
Thread * pThisThread = GetCurrentThreadIfAvailable();
@@ -220,50 +249,59 @@ void ThreadStore::SuspendAllThreads(bool waitForGCEvent)
220249
// reason for this is that we essentially implement Dekker's algorithm, which requires write ordering.
221250
PalFlushProcessWriteBuffers();
222251

223-
bool keepWaiting;
224-
YieldProcessorNormalizationInfo normalizationInfo;
225-
int waitCycles = 1;
226-
do
252+
int retries = 0;
253+
int prevRemaining = 0;
254+
int remaining = 0;
255+
bool observeOnly = false;
256+
257+
while(true)
227258
{
228-
keepWaiting = false;
259+
prevRemaining = remaining;
260+
remaining = 0;
261+
229262
FOREACH_THREAD(pTargetThread)
230263
{
231264
if (pTargetThread == pThisThread)
232265
continue;
233266

234267
if (!pTargetThread->CacheTransitionFrameForSuspend())
235268
{
236-
// We drive all threads to preemptive mode by hijacking them with return-address hijack.
237-
keepWaiting = true;
238-
pTargetThread->Hijack();
269+
remaining++;
270+
if (!observeOnly)
271+
{
272+
pTargetThread->Hijack();
273+
}
239274
}
240275
}
241276
END_FOREACH_THREAD
242277

243-
if (keepWaiting)
278+
if (!remaining)
279+
break;
280+
281+
// if we see progress or have just done a hijacking pass
282+
// do not hijack in the next iteration
283+
if (remaining < prevRemaining || !observeOnly)
244284
{
245-
if (PalSwitchToThread() == 0 && g_RhNumberOfProcessors > 1)
285+
// 5 usec delay, then check for more progress
286+
SpinWait(-1, 5);
287+
observeOnly = true;
288+
}
289+
else
290+
{
291+
SpinWait(retries++, 100);
292+
293+
// make sure our spining is not starving other threads, but not too often,
294+
// this can cause a 1-15 msec delay, depending on OS, and that is a lot while
295+
// very rarely needed, since threads are supposed to be releasing their CPUs
296+
if ((retries & 127) == 0)
246297
{
247-
// No threads are scheduled on this processor. Perhaps we're waiting for a thread
248-
// that's scheduled on another processor. If so, let's give it a little time
249-
// to make forward progress.
250-
// Note that we do not call Sleep, because the minimum granularity of Sleep is much
251-
// too long (we probably don't need a 15ms wait here). Instead, we'll just burn some
252-
// cycles.
253-
// @TODO: need tuning for spin
254-
// @TODO: need tuning for this whole loop as well.
255-
// we are likley too aggressive with interruptions which may result in longer pauses.
256-
YieldProcessorNormalizedForPreSkylakeCount(normalizationInfo, waitCycles);
257-
258-
// simplistic linear backoff for now
259-
// we could be catching threads in restartable sequences such as LL/SC style interlocked on ARM64
260-
// and forcing them to restart.
261-
// if interrupt mechanism is fast, eagerness could be hurting our overall progress.
262-
waitCycles += 10000;
298+
PalSwitchToThread();
263299
}
264-
}
265300

266-
} while (keepWaiting);
301+
observeOnly = false;
302+
// printf("RETRY: %i \n", retries);
303+
}
304+
}
267305

268306
#if defined(TARGET_ARM) || defined(TARGET_ARM64)
269307
// Flush the store buffers on all CPUs, to ensure that all changes made so far are seen

0 commit comments

Comments
 (0)