Skip to content

Commit 2369156

Browse files
authored
[NativeAOT] Follow up changes for the suspension loop routine. (#73741)
* add timing * do not hijack redirected threads * tweak the suspend loop implementation * remove instrumentation
1 parent a0993e3 commit 2369156

File tree

4 files changed

+93
-78
lines changed

4 files changed

+93
-78
lines changed

src/coreclr/nativeaot/Runtime/StackFrameIterator.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -95,7 +95,7 @@ GVAL_IMPL_INIT(PTR_VOID, g_RhpRethrow2Addr, PointerToRhpRethrow2);
9595
StackFrameIterator::StackFrameIterator(Thread * pThreadToWalk, PInvokeTransitionFrame* pInitialTransitionFrame)
9696
{
9797
STRESS_LOG0(LF_STACKWALK, LL_INFO10000, "----Init---- [ GC ]\n");
98-
ASSERT(!pThreadToWalk->DangerousCrossThreadIsHijacked());
98+
ASSERT(!pThreadToWalk->IsHijacked());
9999

100100
if (pInitialTransitionFrame == INTERRUPTED_THREAD_MARKER)
101101
{
@@ -1463,7 +1463,7 @@ void StackFrameIterator::NextInternal()
14631463
else
14641464
{
14651465
// if the thread is safe to walk, it better not have a hijack in place.
1466-
ASSERT((ThreadStore::GetCurrentThread() == m_pThread) || !m_pThread->DangerousCrossThreadIsHijacked());
1466+
ASSERT(!m_pThread->IsHijacked());
14671467

14681468
SetControlPC(dac_cast<PTR_VOID>(*(m_RegDisplay.GetAddrOfIP())));
14691469

src/coreclr/nativeaot/Runtime/thread.cpp

Lines changed: 18 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,10 @@ void Thread::WaitForGC(PInvokeTransitionFrame* pTransitionFrame)
7777
// set preemptive mode
7878
VolatileStoreWithoutBarrier(&m_pTransitionFrame, pTransitionFrame);
7979

80+
#ifdef FEATURE_SUSPEND_REDIRECTION
81+
ClearState(TSF_Redirected);
82+
#endif //FEATURE_SUSPEND_REDIRECTION
83+
8084
RedhawkGCInterface::WaitForGCCompletion();
8185

8286
// must be in cooperative mode when checking the trap flag
@@ -587,6 +591,14 @@ void Thread::Hijack()
587591
return;
588592
}
589593

594+
#ifdef FEATURE_SUSPEND_REDIRECTION
595+
// if the thread is redirected, leave it as-is.
596+
if (IsStateSet(TSF_Redirected))
597+
{
598+
return;
599+
}
600+
#endif //FEATURE_SUSPEND_REDIRECTION
601+
590602
// PalHijack will call HijackCallback or make the target thread call it.
591603
// It may also do nothing if the target thread is in inconvenient state.
592604
PalHijack(m_hPalThread, this);
@@ -806,6 +818,8 @@ bool Thread::Redirect()
806818
if (!PalSetThreadContext(m_hPalThread, redirectionContext))
807819
return false;
808820

821+
// the thread will now inevitably try to suspend
822+
SetState(TSF_Redirected);
809823
redirectionContext->SetIp(origIP);
810824

811825
STRESS_LOG2(LF_STACKWALK, LL_INFO10000, "InternalRedirect: TgtThread = %llx, IP = %p\n",
@@ -887,55 +901,16 @@ void Thread::UnhijackWorker()
887901
m_uHijackedReturnValueFlags = 0;
888902
}
889903

890-
// @TODO: it would be very, very nice if we did not have to bleed knowledge of hijacking
891-
// and hijack state to other components in the runtime. For now, these are only used
892-
// when getting EH info during exception dispatch. We should find a better way to encapsulate
893-
// this.
894904
bool Thread::IsHijacked()
895905
{
896-
// Note: this operation is only valid from the current thread. If one thread invokes
897-
// this on another then it may be racing with other changes to the thread's hijack state.
898-
ASSERT(ThreadStore::GetCurrentThread() == this);
899-
900-
return m_pvHijackedReturnAddress != NULL;
901-
}
906+
ASSERT(((ThreadStore::GetCurrentThread() == this) && IsCurrentThreadInCooperativeMode()) ||
907+
ThreadStore::GetCurrentThread()->IsGCSpecial() ||
908+
ThreadStore::GetCurrentThread() == ThreadStore::GetSuspendingThread()
909+
);
902910

903-
//
904-
// WARNING: This method must ONLY be called during stackwalks when we believe that all threads are
905-
// synchronized and there is no other thread racing with us trying to apply hijacks.
906-
//
907-
bool Thread::DangerousCrossThreadIsHijacked()
908-
{
909-
// If we have a CachedTransitionFrame available, then we're in the proper state. Otherwise, this method
910-
// was called from an improper state.
911-
ASSERT(GetTransitionFrame() != NULL);
912911
return m_pvHijackedReturnAddress != NULL;
913912
}
914913

915-
void * Thread::GetHijackedReturnAddress()
916-
{
917-
// Note: this operation is only valid from the current thread. If one thread invokes
918-
// this on another then it may be racing with other changes to the thread's hijack state.
919-
ASSERT(IsHijacked());
920-
ASSERT(ThreadStore::GetCurrentThread() == this);
921-
922-
return m_pvHijackedReturnAddress;
923-
}
924-
925-
void * Thread::GetUnhijackedReturnAddress(void ** ppvReturnAddressLocation)
926-
{
927-
ASSERT(ThreadStore::GetCurrentThread() == this);
928-
929-
void * pvReturnAddress;
930-
if (m_ppvHijackedReturnAddressLocation == ppvReturnAddressLocation)
931-
pvReturnAddress = m_pvHijackedReturnAddress;
932-
else
933-
pvReturnAddress = *ppvReturnAddressLocation;
934-
935-
ASSERT(GetRuntimeInstance()->IsManaged(pvReturnAddress));
936-
return pvReturnAddress;
937-
}
938-
939914
void Thread::SetState(ThreadStateFlags flags)
940915
{
941916
PalInterlockedOr(&m_ThreadStateFlags, flags);

src/coreclr/nativeaot/Runtime/thread.h

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -136,6 +136,12 @@ class Thread : private ThreadBuffer
136136
#ifdef FEATURE_GC_STRESS
137137
TSF_IsRandSeedSet = 0x00000040, // set to indicate the random number generator for GCStress was inited
138138
#endif // FEATURE_GC_STRESS
139+
140+
#ifdef FEATURE_SUSPEND_REDIRECTION
141+
TSF_Redirected = 0x00000080, // Set to indicate the thread is redirected and will inevitably
142+
// suspend once resumed.
143+
// If we see this flag, we skip hijacking as an optimization.
144+
#endif //FEATURE_SUSPEND_REDIRECTION
139145
};
140146
private:
141147

@@ -197,13 +203,11 @@ class Thread : private ThreadBuffer
197203

198204
void Hijack();
199205
void Unhijack();
206+
bool IsHijacked();
207+
200208
#ifdef FEATURE_GC_STRESS
201209
static void HijackForGcStress(PAL_LIMITED_CONTEXT * pSuspendCtx);
202210
#endif // FEATURE_GC_STRESS
203-
bool IsHijacked();
204-
void * GetHijackedReturnAddress();
205-
void * GetUnhijackedReturnAddress(void** ppvReturnAddressLocation);
206-
bool DangerousCrossThreadIsHijacked();
207211

208212
bool IsSuppressGcStressSet();
209213
void SetSuppressGcStress();
@@ -230,7 +234,7 @@ class Thread : private ThreadBuffer
230234
#endif // DACCESS_COMPILE
231235
#ifdef FEATURE_GC_STRESS
232236
void SetRandomSeed(uint32_t seed);
233-
uint32_t NextRand();
237+
uint32_t NextRand();
234238
bool IsRandInited();
235239
#endif // FEATURE_GC_STRESS
236240
PTR_ExInfo GetCurExInfo();

src/coreclr/nativeaot/Runtime/threadstore.cpp

Lines changed: 64 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -196,6 +196,35 @@ void ThreadStore::UnlockThreadStore()
196196
m_Lock.ReleaseReadLock();
197197
}
198198

199+
// exponential spinwait with an approximate time limit for waiting in microsecond range.
200+
// when iteration == -1, only usecLimit is used
201+
void SpinWait(int iteration, int usecLimit)
202+
{
203+
LARGE_INTEGER li;
204+
PalQueryPerformanceCounter(&li);
205+
int64_t startTicks = li.QuadPart;
206+
207+
PalQueryPerformanceFrequency(&li);
208+
int64_t ticksPerSecond = li.QuadPart;
209+
int64_t endTicks = startTicks + (usecLimit * ticksPerSecond) / 1000000;
210+
211+
int l = min((unsigned)iteration, 30);
212+
for (int i = 0; i < l; i++)
213+
{
214+
for (int j = 0; j < (1 << i); j++)
215+
{
216+
System_YieldProcessor();
217+
}
218+
219+
PalQueryPerformanceCounter(&li);
220+
int64_t currentTicks = li.QuadPart;
221+
if (currentTicks > endTicks)
222+
{
223+
break;
224+
}
225+
}
226+
}
227+
199228
void ThreadStore::SuspendAllThreads(bool waitForGCEvent)
200229
{
201230
Thread * pThisThread = GetCurrentThreadIfAvailable();
@@ -216,50 +245,57 @@ void ThreadStore::SuspendAllThreads(bool waitForGCEvent)
216245
// reason for this is that we essentially implement Dekker's algorithm, which requires write ordering.
217246
PalFlushProcessWriteBuffers();
218247

219-
bool keepWaiting;
220-
YieldProcessorNormalizationInfo normalizationInfo;
221-
int waitCycles = 1;
222-
do
248+
int retries = 0;
249+
int prevRemaining = 0;
250+
int remaining = 0;
251+
bool observeOnly = false;
252+
253+
while(true)
223254
{
224-
keepWaiting = false;
255+
prevRemaining = remaining;
256+
remaining = 0;
257+
225258
FOREACH_THREAD(pTargetThread)
226259
{
227260
if (pTargetThread == pThisThread)
228261
continue;
229262

230263
if (!pTargetThread->CacheTransitionFrameForSuspend())
231264
{
232-
// We drive all threads to preemptive mode by hijacking them with return-address hijack.
233-
keepWaiting = true;
234-
pTargetThread->Hijack();
265+
remaining++;
266+
if (!observeOnly)
267+
{
268+
pTargetThread->Hijack();
269+
}
235270
}
236271
}
237272
END_FOREACH_THREAD
238273

239-
if (keepWaiting)
274+
if (!remaining)
275+
break;
276+
277+
// if we see progress or have just done a hijacking pass
278+
// do not hijack in the next iteration
279+
if (remaining < prevRemaining || !observeOnly)
240280
{
241-
if (PalSwitchToThread() == 0 && g_RhNumberOfProcessors > 1)
281+
// 5 usec delay, then check for more progress
282+
SpinWait(-1, 5);
283+
observeOnly = true;
284+
}
285+
else
286+
{
287+
SpinWait(retries++, 100);
288+
observeOnly = false;
289+
290+
// make sure our spining is not starving other threads, but not too often,
291+
// this can cause a 1-15 msec delay, depending on OS, and that is a lot while
292+
// very rarely needed, since threads are supposed to be releasing their CPUs
293+
if ((retries & 127) == 0)
242294
{
243-
// No threads are scheduled on this processor. Perhaps we're waiting for a thread
244-
// that's scheduled on another processor. If so, let's give it a little time
245-
// to make forward progress.
246-
// Note that we do not call Sleep, because the minimum granularity of Sleep is much
247-
// too long (we probably don't need a 15ms wait here). Instead, we'll just burn some
248-
// cycles.
249-
// @TODO: need tuning for spin
250-
// @TODO: need tuning for this whole loop as well.
251-
// we are likley too aggressive with interruptions which may result in longer pauses.
252-
YieldProcessorNormalizedForPreSkylakeCount(normalizationInfo, waitCycles);
253-
254-
// simplistic linear backoff for now
255-
// we could be catching threads in restartable sequences such as LL/SC style interlocked on ARM64
256-
// and forcing them to restart.
257-
// if interrupt mechanism is fast, eagerness could be hurting our overall progress.
258-
waitCycles += 10000;
295+
PalSwitchToThread();
259296
}
260297
}
261-
262-
} while (keepWaiting);
298+
}
263299

264300
#if defined(TARGET_ARM) || defined(TARGET_ARM64)
265301
// Flush the store buffers on all CPUs, to ensure that all changes made so far are seen

0 commit comments

Comments
 (0)