diff --git a/FEXCore/Source/CMakeLists.txt b/FEXCore/Source/CMakeLists.txt index 36b820dc25..4279591fcf 100644 --- a/FEXCore/Source/CMakeLists.txt +++ b/FEXCore/Source/CMakeLists.txt @@ -7,6 +7,7 @@ set (FEXCORE_BASE_SRCS Utils/FileLoading.cpp Utils/ForcedAssert.cpp Utils/LogManager.cpp + Utils/SpinWaitLock.cpp ) if (NOT MINGW_BUILD) diff --git a/FEXCore/Source/Interface/Core/JIT/Arm64/JIT.cpp b/FEXCore/Source/Interface/Core/JIT/Arm64/JIT.cpp index e13cbc28b1..f04f46d5d7 100644 --- a/FEXCore/Source/Interface/Core/JIT/Arm64/JIT.cpp +++ b/FEXCore/Source/Interface/Core/JIT/Arm64/JIT.cpp @@ -858,6 +858,7 @@ CPUBackend::CompiledCode Arm64JITCore::CompileCode(uint64_t Entry, // TODO: This needs to be a data RIP relocation once code caching works. // Current relocation code doesn't support this feature yet. JITBlockTail->RIP = Entry; + JITBlockTail->SpinLockFutex = 0; { // Store the RIP entries. diff --git a/FEXCore/Source/Utils/ArchHelpers/Arm64.cpp b/FEXCore/Source/Utils/ArchHelpers/Arm64.cpp index 5de71fa0bc..dc32a33705 100644 --- a/FEXCore/Source/Utils/ArchHelpers/Arm64.cpp +++ b/FEXCore/Source/Utils/ArchHelpers/Arm64.cpp @@ -1,4 +1,8 @@ // SPDX-License-Identifier: MIT + +#include "Utils/SpinWaitLock.h" + +#include #include #include #include @@ -2044,9 +2048,11 @@ static uint64_t HandleAtomicLoadstoreExclusive(uintptr_t ProgramCounter, uint64_ uint32_t Size = (Instr & 0xC000'0000) >> 30; uint32_t AddrReg = (Instr >> 5) & 0x1F; uint32_t DataReg = Instr & 0x1F; - if ((Instr & LDAXR_MASK) == LDAR_INST || // LDAR* - (Instr & LDAXR_MASK) == LDAPR_INST) { // LDAPR* - if (ParanoidTSO) { + + // ParanoidTSO path doesn't modify any code. + if (ParanoidTSO) [[unlikely]] { + if ((Instr & LDAXR_MASK) == LDAR_INST || // LDAR* + (Instr & LDAXR_MASK) == LDAPR_INST) { // LDAPR* if (ArchHelpers::Arm64::HandleAtomicLoad(Instr, GPRs, 0)) { // Skip this instruction now return std::make_pair(true, 4); @@ -2056,20 +2062,7 @@ static uint64_t HandleAtomicLoadstoreExclusive(uintptr_t ProgramCounter, uint64_ return NotHandled; } } - else { - uint32_t LDR = 0b0011'1000'0111'1111'0110'1000'0000'0000; - LDR |= Size << 30; - LDR |= AddrReg << 5; - LDR |= DataReg; - PC[0] = LDR; - PC[1] = DMB_LD; // Back-patch the half-barrier. - ClearICache(&PC[-1], 16); - // With the instruction modified, now execute again. - return std::make_pair(true, 0); - } - } - else if ( (Instr & LDAXR_MASK) == STLR_INST) { // STLR* - if (ParanoidTSO) { + else if ( (Instr & LDAXR_MASK) == STLR_INST) { // STLR* if (ArchHelpers::Arm64::HandleAtomicStore(Instr, GPRs, 0)) { // Skip this instruction now return std::make_pair(true, 4); @@ -2079,22 +2072,9 @@ static uint64_t HandleAtomicLoadstoreExclusive(uintptr_t ProgramCounter, uint64_ return NotHandled; } } - else { - uint32_t STR = 0b0011'1000'0011'1111'0110'1000'0000'0000; - STR |= Size << 30; - STR |= AddrReg << 5; - STR |= DataReg; - PC[-1] = DMB; // Back-patch the half-barrier. - PC[0] = STR; - ClearICache(&PC[-1], 16); - // Back up one instruction and have another go - return std::make_pair(true, -4); - } - } - else if ((Instr & RCPC2_MASK) == LDAPUR_INST) { // LDAPUR* - // Extract the 9-bit offset from the instruction - int32_t Offset = static_cast(Instr) << 11 >> 23; - if (ParanoidTSO) { + else if ((Instr & RCPC2_MASK) == LDAPUR_INST) { // LDAPUR* + // Extract the 9-bit offset from the instruction + int32_t Offset = static_cast(Instr) << 11 >> 23; if (ArchHelpers::Arm64::HandleAtomicLoad(Instr, GPRs, Offset)) { // Skip this instruction now return std::make_pair(true, 4); @@ -2104,23 +2084,9 @@ static uint64_t HandleAtomicLoadstoreExclusive(uintptr_t ProgramCounter, uint64_ return NotHandled; } } - else { - uint32_t LDUR = 0b0011'1000'0100'0000'0000'0000'0000'0000; - LDUR |= Size << 30; - LDUR |= AddrReg << 5; - LDUR |= DataReg; - LDUR |= Instr & (0b1'1111'1111 << 9); - PC[0] = LDUR; - PC[1] = DMB_LD; // Back-patch the half-barrier. - ClearICache(&PC[-1], 16); - // With the instruction modified, now execute again. - return std::make_pair(true, 0); - } - } - else if ((Instr & RCPC2_MASK) == STLUR_INST) { // STLUR* - // Extract the 9-bit offset from the instruction - int32_t Offset = static_cast(Instr) << 11 >> 23; - if (ParanoidTSO) { + else if ((Instr & RCPC2_MASK) == STLUR_INST) { // STLUR* + // Extract the 9-bit offset from the instruction + int32_t Offset = static_cast(Instr) << 11 >> 23; if (ArchHelpers::Arm64::HandleAtomicStore(Instr, GPRs, Offset)) { // Skip this instruction now return std::make_pair(true, 4); @@ -2130,18 +2096,64 @@ static uint64_t HandleAtomicLoadstoreExclusive(uintptr_t ProgramCounter, uint64_ return NotHandled; } } - else { - uint32_t STUR = 0b0011'1000'0000'0000'0000'0000'0000'0000; - STUR |= Size << 30; - STUR |= AddrReg << 5; - STUR |= DataReg; - STUR |= Instr & (0b1'1111'1111 << 9); - PC[-1] = DMB; // Back-patch the half-barrier. - PC[0] = STUR; - ClearICache(&PC[-1], 16); - // Back up one instruction and have another go - return std::make_pair(true, -4); - } + } + + const auto Frame = Thread->CurrentFrame; + const uint64_t BlockBegin = Frame->State.InlineJITBlockHeader; + auto InlineHeader = reinterpret_cast(BlockBegin); + auto InlineTail = reinterpret_cast(Frame->State.InlineJITBlockHeader + InlineHeader->OffsetToBlockTail); + + // Lock code mutex during any SIGBUS handling that potentially changes code. + // Need to be careful to not read any code part-way through modification. + FEXCore::Utils::SpinWaitLock::UniqueSpinMutex lk(&InlineTail->SpinLockFutex); + + if ((Instr & LDAXR_MASK) == LDAR_INST || // LDAR* + (Instr & LDAXR_MASK) == LDAPR_INST) { // LDAPR* + uint32_t LDR = 0b0011'1000'0111'1111'0110'1000'0000'0000; + LDR |= Size << 30; + LDR |= AddrReg << 5; + LDR |= DataReg; + PC[0] = LDR; + PC[1] = DMB_LD; // Back-patch the half-barrier. + ClearICache(&PC[-1], 16); + // With the instruction modified, now execute again. + return std::make_pair(true, 0); + } + else if ( (Instr & LDAXR_MASK) == STLR_INST) { // STLR* + uint32_t STR = 0b0011'1000'0011'1111'0110'1000'0000'0000; + STR |= Size << 30; + STR |= AddrReg << 5; + STR |= DataReg; + PC[-1] = DMB; // Back-patch the half-barrier. + PC[0] = STR; + ClearICache(&PC[-1], 16); + // Back up one instruction and have another go + return std::make_pair(true, -4); + } + else if ((Instr & RCPC2_MASK) == LDAPUR_INST) { // LDAPUR* + // Extract the 9-bit offset from the instruction + uint32_t LDUR = 0b0011'1000'0100'0000'0000'0000'0000'0000; + LDUR |= Size << 30; + LDUR |= AddrReg << 5; + LDUR |= DataReg; + LDUR |= Instr & (0b1'1111'1111 << 9); + PC[0] = LDUR; + PC[1] = DMB_LD; // Back-patch the half-barrier. + ClearICache(&PC[-1], 16); + // With the instruction modified, now execute again. + return std::make_pair(true, 0); + } + else if ((Instr & RCPC2_MASK) == STLUR_INST) { // STLUR* + uint32_t STUR = 0b0011'1000'0000'0000'0000'0000'0000'0000; + STUR |= Size << 30; + STUR |= AddrReg << 5; + STUR |= DataReg; + STUR |= Instr & (0b1'1111'1111 << 9); + PC[-1] = DMB; // Back-patch the half-barrier. + PC[0] = STUR; + ClearICache(&PC[-1], 16); + // Back up one instruction and have another go + return std::make_pair(true, -4); } else if ((Instr & ArchHelpers::Arm64::LDAXP_MASK) == ArchHelpers::Arm64::LDAXP_INST) { // LDAXP //Should be compare and swap pair only. LDAXP not used elsewhere diff --git a/FEXCore/Source/Utils/SpinWaitLock.cpp b/FEXCore/Source/Utils/SpinWaitLock.cpp new file mode 100644 index 0000000000..b2f011adec --- /dev/null +++ b/FEXCore/Source/Utils/SpinWaitLock.cpp @@ -0,0 +1,27 @@ +#include "Utils/SpinWaitLock.h" + +namespace FEXCore::Utils::SpinWaitLock { +#ifdef _M_ARM_64 + constexpr uint64_t NanosecondsInSecond = 1'000'000'000ULL; + + static uint32_t GetCycleCounterFrequency() { + uint64_t Result{}; + __asm("mrs %[Res], CNTFRQ_EL0" + : [Res] "=r" (Result)); + return Result; + } + + static uint64_t CalculateCyclesPerNanosecond() { + // Snapdragon devices historically use a 19.2Mhz cycle counter frequency + // This means that the number of cycles per nanosecond ends up being 52.0833... + // + // ARMv8.6 and ARMv9.1 requires the cycle counter frequency to be 1Ghz. + // This means the number of cycles per nanosecond ends up being 1. + uint64_t CounterFrequency = GetCycleCounterFrequency(); + return NanosecondsInSecond / CounterFrequency; + } + + uint32_t CycleCounterFrequency = GetCycleCounterFrequency(); + uint64_t CyclesPerNanosecond = CalculateCyclesPerNanosecond(); +#endif +} diff --git a/FEXCore/Source/Utils/SpinWaitLock.h b/FEXCore/Source/Utils/SpinWaitLock.h new file mode 100644 index 0000000000..d0a3510d08 --- /dev/null +++ b/FEXCore/Source/Utils/SpinWaitLock.h @@ -0,0 +1,301 @@ +#include +#include +#include +#include + +namespace FEXCore::Utils::SpinWaitLock { + /** + * @brief This provides routines to implement implement an "efficient spin-loop" using ARM's WFE and exclusive monitor interfaces. + * + * Spin-loops on mobile devices with a battery can be a bad idea as they burn a bunch of power. This attempts to mitigate some of the impact + * by putting the CPU in to a lower-power state using WFE. + * On platforms tested, WFE will put the CPU in to a lower power state for upwards of 52ns per WFE. Which isn't a significant amount of time + * but should still have power savings. Ideally WFE would be able to keep the CPU in a lower power state for longer. This also has the added benefit + * that atomics aren't abusing the caches when spinning on a cacheline, which has knock-on powersaving benefits. + * + * FEAT_WFxT adds a new instruction with a timeout, but since the spurious wake-up is so aggressive it isn't worth using. + * + * It should be noted that this implementation has a few dozen cycles of start-up time. Which means the overhead for invoking this implementation is + * slightly higher than a true spin-loop. The hot loop body itself is only three instructions so it is quite efficient. + * + * On non-ARM platforms it is truly a spin-loop, which is okay for debugging only. + */ +#ifdef _M_ARM_64 + +#define LOADEXCLUSIVE(LoadExclusiveOp, RegSize) \ + /* Prime the exclusive monitor with the passed in address. */ \ + #LoadExclusiveOp " %" #RegSize "[Result], [%[Futex]];" + +#define SPINLOOP_BODY(LoadAtomicOp, RegSize) \ + /* WFE will wait for either the memory to change or spurious wake-up. */ \ + "wfe;" \ + /* Load with acquire to get the result of memory. */ \ + #LoadAtomicOp " %" #RegSize "[Result], [%[Futex]]; " + +#define SPINLOOP_WFE_LDX_8BIT LOADEXCLUSIVE(ldaxrb, w) +#define SPINLOOP_WFE_LDX_16BIT LOADEXCLUSIVE(ldaxrh, w) +#define SPINLOOP_WFE_LDX_32BIT LOADEXCLUSIVE(ldaxr, w) +#define SPINLOOP_WFE_LDX_64BIT LOADEXCLUSIVE(ldaxr, x) + +#define SPINLOOP_8BIT SPINLOOP_BODY(ldarb, w) +#define SPINLOOP_16BIT SPINLOOP_BODY(ldarh, w) +#define SPINLOOP_32BIT SPINLOOP_BODY(ldar, w) +#define SPINLOOP_64BIT SPINLOOP_BODY(ldar, x) + + extern uint32_t CycleCounterFrequency; + extern uint64_t CyclesPerNanosecond; + + ///< Get the raw cycle counter which is synchronizing. + /// `CNTVCTSS_EL0` also does the same thing, but requires the FEAT_ECV feature. + static inline uint64_t GetCycleCounter() { + uint64_t Result{}; + __asm volatile(R"( + isb; + mrs %[Res], CNTVCT_EL0; + )" + : [Res] "=r" (Result)); + return Result; + } + + ///< Converts nanoseconds to number of cycles. + /// If the cycle counter is 1Ghz then this is a direct 1:1 map. + static inline uint64_t ConvertNanosecondsToCycles(std::chrono::nanoseconds const &Nanoseconds) { + const auto NanosecondCount = Nanoseconds.count(); + return NanosecondCount / CyclesPerNanosecond; + } + + static inline uint8_t LoadExclusive(uint8_t *Futex) { + uint8_t Result{}; + __asm volatile(SPINLOOP_WFE_LDX_8BIT + : [Result] "=r" (Result) + , [Futex] "+r" (Futex) + :: "memory"); + + return Result; + } + + static inline uint16_t LoadExclusive(uint16_t *Futex) { + uint16_t Result{}; + __asm volatile(SPINLOOP_WFE_LDX_16BIT + : [Result] "=r" (Result) + , [Futex] "+r" (Futex) + :: "memory"); + + return Result; + } + + static inline uint32_t LoadExclusive(uint32_t *Futex) { + uint32_t Result{}; + __asm volatile(SPINLOOP_WFE_LDX_32BIT + : [Result] "=r" (Result) + , [Futex] "+r" (Futex) + :: "memory"); + + return Result; + } + + static inline uint64_t LoadExclusive(uint64_t *Futex) { + uint64_t Result{}; + __asm volatile(SPINLOOP_WFE_LDX_64BIT + : [Result] "=r" (Result) + , [Futex] "+r" (Futex) + :: "memory"); + + return Result; + } + + static inline uint8_t WFELoadAtomic(uint8_t *Futex) { + uint8_t Result{}; + __asm volatile(SPINLOOP_8BIT + : [Result] "=r" (Result) + , [Futex] "+r" (Futex) + :: "memory"); + + return Result; + } + + static inline uint16_t WFELoadAtomic(uint16_t *Futex) { + uint16_t Result{}; + __asm volatile(SPINLOOP_16BIT + : [Result] "=r" (Result) + , [Futex] "+r" (Futex) + :: "memory"); + + return Result; + } + + static inline uint32_t WFELoadAtomic(uint32_t *Futex) { + uint32_t Result{}; + __asm volatile(SPINLOOP_32BIT + : [Result] "=r" (Result) + , [Futex] "+r" (Futex) + :: "memory"); + + return Result; + } + + static inline uint64_t WFELoadAtomic(uint64_t *Futex) { + uint64_t Result{}; + __asm volatile(SPINLOOP_64BIT + : [Result] "=r" (Result) + , [Futex] "+r" (Futex) + :: "memory"); + + return Result; + } + + template + static inline void Wait(T *Futex, TT ExpectedValue) { + std::atomic *AtomicFutex = reinterpret_cast*>(Futex); + T Result = AtomicFutex->load(); + + // Early exit if possible. + if (Result == ExpectedValue) return; + + do { + Result = LoadExclusive(Futex); + if (Result == ExpectedValue) return; + Result = WFELoadAtomic(Futex); + } while (Result != ExpectedValue); + } + + template + void Wait(uint8_t*, uint8_t); + template + void Wait(uint16_t*, uint16_t); + template + void Wait(uint32_t*, uint32_t); + template + void Wait(uint64_t*, uint64_t); + + template + static inline bool Wait(T *Futex, TT ExpectedValue, std::chrono::nanoseconds const &Timeout) { + std::atomic *AtomicFutex = reinterpret_cast*>(Futex); + + T Result = AtomicFutex->load(); + + // Early exit if possible. + if (Result == ExpectedValue) return true; + + const auto TimeoutCycles = ConvertNanosecondsToCycles(Timeout); + const auto Begin = GetCycleCounter(); + + do { + Result = LoadExclusive(Futex); + if (Result == ExpectedValue) return true; + Result = WFELoadAtomic(Futex); + + const auto CurrentCycleCounter = GetCycleCounter(); + if ((CurrentCycleCounter - Begin) >= TimeoutCycles) { + // Couldn't get value before timeout. + return false; + } + } while (Result != ExpectedValue); + + // We got our result. + return true; + } + + template + bool Wait(uint8_t*, uint8_t, std::chrono::nanoseconds const &); + template + bool Wait(uint16_t*, uint16_t, std::chrono::nanoseconds const &); + template + bool Wait(uint32_t*, uint32_t, std::chrono::nanoseconds const &); + template + bool Wait(uint64_t*, uint64_t, std::chrono::nanoseconds const &); + +#else + template + static inline void Wait(T *Futex, TT ExpectedValue) { + std::atomic *AtomicFutex = reinterpret_cast*>(Futex); + T Tmp{}; + T Result = AtomicFutex->load(); + + // Early exit if possible. + if (Result == ExpectedValue) return; + + do { + Result = AtomicFutex->load(); + } while (Result != ExpectedValue); + } + + template + static inline bool Wait(T *Futex, TT ExpectedValue, std::chrono::nanoseconds const &Timeout) { + std::atomic *AtomicFutex = reinterpret_cast*>(Futex); + + T Tmp{}; + T Result = AtomicFutex->load(); + + // Early exit if possible. + if (Result == ExpectedValue) return true; + + const auto Begin = std::chrono::high_resolution_clock::now(); + + do { + Result = AtomicFutex->load(); + + const auto CurrentCycleCounter = std::chrono::high_resolution_clock::now(); + if ((CurrentCycleCounter - Begin) >= Timeout) { + // Couldn't get value before timeout. + return false; + } + } while (Result != ExpectedValue); + + // We got our result. + return true; + } +#endif + + template + static inline void lock(T *Futex) { + std::atomic *AtomicFutex = reinterpret_cast*>(Futex); + T Expected{}; + T Desired {1}; + + // Try to CAS immediately. + if (AtomicFutex->compare_exchange_strong(Expected, Desired)) return; + + do { + // Wait until the futex is unlocked. + Wait(Futex, 0); + } while (!AtomicFutex->compare_exchange_strong(Expected, Desired)); + } + + template + static inline bool try_lock(T *Futex) { + std::atomic *AtomicFutex = reinterpret_cast*>(Futex); + T Expected{}; + T Desired {1}; + + // Try to CAS immediately. + if (AtomicFutex->compare_exchange_strong(Expected, Desired)) return true; + + return false; + } + + template + static inline void unlock(T *Futex) { + std::atomic *AtomicFutex = reinterpret_cast*>(Futex); + AtomicFutex->store(0); + } + +#undef SPINLOOP_8BIT +#undef SPINLOOP_16BIT +#undef SPINLOOP_32BIT +#undef SPINLOOP_64BIT + template + class UniqueSpinMutex final { + public: + UniqueSpinMutex(T *Futex) + : Futex {Futex} { + FEXCore::Utils::SpinWaitLock::lock(Futex); + } + + ~UniqueSpinMutex() { + FEXCore::Utils::SpinWaitLock::unlock(Futex); + } + private: + T *Futex; + }; +} diff --git a/FEXCore/include/FEXCore/Core/CPUBackend.h b/FEXCore/include/FEXCore/Core/CPUBackend.h index 4f9cab7b0f..dd34561cf6 100644 --- a/FEXCore/include/FEXCore/Core/CPUBackend.h +++ b/FEXCore/include/FEXCore/Core/CPUBackend.h @@ -98,6 +98,11 @@ namespace CPU { // Offset after this block to the start of the RIP entries. uint32_t OffsetToRIPEntries; + + // Shared-code modification spin-loop futex. + uint32_t SpinLockFutex; + + uint32_t _Pad; }; // Entries that live after the JITCodeTail. diff --git a/FEXCore/unittests/APITests/FutexSpinTest.cpp b/FEXCore/unittests/APITests/FutexSpinTest.cpp new file mode 100644 index 0000000000..9871b318cc --- /dev/null +++ b/FEXCore/unittests/APITests/FutexSpinTest.cpp @@ -0,0 +1,85 @@ +#include "Utils/SpinWaitLock.h" +#include +#include +#include + +constexpr auto SleepAmount = std::chrono::milliseconds(250); + +TEST_CASE("FutexSpin-Timed-8bit") { + uint8_t Test{}; + + auto now = std::chrono::high_resolution_clock::now(); + FEXCore::Utils::SpinWaitLock::Wait(&Test, 1, SleepAmount); + auto end = std::chrono::high_resolution_clock::now(); + auto diff = end - now; + + // The futex spinwait needs to have slept for at /least/ the amount specified. It will always run slightly late. + REQUIRE(std::chrono::duration_cast(diff) >= std::chrono::duration_cast(SleepAmount)); +} + +TEST_CASE("FutexSpin-Sleep-8bit") { + constexpr auto SleepAmount = std::chrono::seconds(1); + + uint8_t Test{}; + std::atomic ActualSpinLoop{}; + std::chrono::nanoseconds SleptAmount; + + std::thread t([&Test, &SleptAmount, &ActualSpinLoop]() { + auto now = std::chrono::high_resolution_clock::now(); + ActualSpinLoop.store(1); + FEXCore::Utils::SpinWaitLock::Wait(&Test, 1); + auto end = std::chrono::high_resolution_clock::now(); + SleptAmount = end - now; + }); + + // Wait until the second thread lets us know to stop waiting sleeping. + while(ActualSpinLoop.load() == 0); + + // sleep this thread for the sleep amount. + std::this_thread::sleep_for(SleepAmount); + + // Set the futex + FEXCore::Utils::SpinWaitLock::lock(&Test); + + // Wait for the thread to get done. + t.join(); + + // The futex spinwait needs to have slept for at /least/ the amount specified. It will always run slightly late. + REQUIRE(SleptAmount >= std::chrono::duration_cast(SleepAmount)); +} + +TEST_CASE("FutexSpin-Timed-16bit") { + uint16_t Test{}; + + auto now = std::chrono::high_resolution_clock::now(); + FEXCore::Utils::SpinWaitLock::Wait(&Test, 1, SleepAmount); + auto end = std::chrono::high_resolution_clock::now(); + auto diff = end - now; + + // The futex spinwait needs to have slept for at /least/ the amount specified. It will always run slightly late. + REQUIRE(std::chrono::duration_cast(diff) >= std::chrono::duration_cast(SleepAmount)); +} + +TEST_CASE("FutexSpin-Timed-32bit") { + uint32_t Test{}; + + auto now = std::chrono::high_resolution_clock::now(); + FEXCore::Utils::SpinWaitLock::Wait(&Test, 1, SleepAmount); + auto end = std::chrono::high_resolution_clock::now(); + auto diff = end - now; + + // The futex spinwait needs to have slept for at /least/ the amount specified. It will always run slightly late. + REQUIRE(std::chrono::duration_cast(diff) >= std::chrono::duration_cast(SleepAmount)); +} + +TEST_CASE("FutexSpin-Timed-64bit") { + uint64_t Test{}; + + auto now = std::chrono::high_resolution_clock::now(); + FEXCore::Utils::SpinWaitLock::Wait(&Test, 1, SleepAmount); + auto end = std::chrono::high_resolution_clock::now(); + auto diff = end - now; + + // The futex spinwait needs to have slept for at /least/ the amount specified. It will always run slightly late. + REQUIRE(std::chrono::duration_cast(diff) >= std::chrono::duration_cast(SleepAmount)); +}