-
Notifications
You must be signed in to change notification settings - Fork 134
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #4291 from Sonicadvance1/profile_stats
FEX: Implements new sampling based stats
- Loading branch information
Showing
20 changed files
with
736 additions
and
17 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,120 @@ | ||
// SPDX-License-Identifier: MIT | ||
#include "Common/Profiler.h" | ||
#include "git_version.h" | ||
|
||
#include <FEXCore/Debug/InternalThreadState.h> | ||
|
||
namespace FEX::Profiler { | ||
void StatAllocBase::SaveHeader(FEXCore::Profiler::AppType AppType) { | ||
if (!Base) { | ||
return; | ||
} | ||
|
||
Head = reinterpret_cast<FEXCore::Profiler::ThreadStatsHeader*>(Base); | ||
Head->Size.store(CurrentSize, std::memory_order_relaxed); | ||
Head->Version = FEXCore::Profiler::STATS_VERSION; | ||
|
||
std::string_view GitString = GIT_DESCRIBE_STRING; | ||
strncpy(Head->fex_version, GitString.data(), std::min(GitString.size(), sizeof(Head->fex_version))); | ||
Head->app_type = AppType; | ||
|
||
Stats = reinterpret_cast<FEXCore::Profiler::ThreadStats*>(reinterpret_cast<uint64_t>(Base) + sizeof(FEXCore::Profiler::ThreadStatsHeader)); | ||
|
||
RemainingSlots = TotalSlotsFromSize(); | ||
} | ||
|
||
bool StatAllocBase::AllocateMoreSlots() { | ||
const auto OriginalSlotCount = TotalSlotsFromSize(); | ||
|
||
uint32_t NewSize = FrontendAllocateSlots(CurrentSize * 2); | ||
|
||
if (NewSize == CurrentSize) { | ||
return false; | ||
} | ||
|
||
CurrentSize = NewSize; | ||
Head->Size.store(CurrentSize, std::memory_order_relaxed); | ||
RemainingSlots = TotalSlotsFromSize() - OriginalSlotCount; | ||
|
||
return true; | ||
} | ||
|
||
FEXCore::Profiler::ThreadStats* StatAllocBase::AllocateSlot(uint32_t TID) { | ||
if (!RemainingSlots) { | ||
if (!AllocateMoreSlots()) { | ||
return nullptr; | ||
} | ||
} | ||
|
||
// Find a free slot | ||
store_memory_barrier(); | ||
FEXCore::Profiler::ThreadStats* AllocatedSlot {}; | ||
for (size_t i = 0; i < TotalSlotsFromSize(); ++i) { | ||
AllocatedSlot = &Stats[i]; | ||
if (AllocatedSlot->TID.load(std::memory_order_relaxed) == 0) { | ||
break; | ||
} | ||
} | ||
|
||
--RemainingSlots; | ||
|
||
// Slot might be reused, just zero it now. | ||
memset(AllocatedSlot, 0, sizeof(FEXCore::Profiler::ThreadStatsHeader)); | ||
|
||
// TID != 0 means slot is allocated. | ||
AllocatedSlot->TID.store(TID, std::memory_order_relaxed); | ||
|
||
// Setup singly-linked list | ||
if (Head->Head.load(std::memory_order_relaxed) == 0) { | ||
Head->Head.store(OffsetFromStat(AllocatedSlot), std::memory_order_relaxed); | ||
} else { | ||
StatTail->Next.store(OffsetFromStat(AllocatedSlot), std::memory_order_relaxed); | ||
} | ||
|
||
// Update the tail. | ||
StatTail = AllocatedSlot; | ||
return AllocatedSlot; | ||
} | ||
|
||
void StatAllocBase::DeallocateSlot(FEXCore::Profiler::ThreadStats* AllocatedSlot) { | ||
if (!AllocatedSlot) { | ||
return; | ||
} | ||
|
||
// TID == 0 will signal the reader to ignore this slot & deallocate it! | ||
AllocatedSlot->TID.store(0, std::memory_order_relaxed); | ||
|
||
store_memory_barrier(); | ||
|
||
const auto SlotOffset = OffsetFromStat(AllocatedSlot); | ||
const auto AllocatedSlotNext = AllocatedSlot->Next.load(std::memory_order_relaxed); | ||
|
||
const bool IsTail = AllocatedSlot == StatTail; | ||
|
||
// Update the linked list. | ||
if (Head->Head == SlotOffset) { | ||
Head->Head.store(AllocatedSlotNext, std::memory_order_relaxed); | ||
if (IsTail) { | ||
StatTail = nullptr; | ||
} | ||
} else { | ||
for (size_t i = 0; i < TotalSlotsFromSize(); ++i) { | ||
auto Slot = &Stats[i]; | ||
auto NextSlotOffset = Slot->Next.load(std::memory_order_relaxed); | ||
|
||
if (NextSlotOffset == SlotOffset) { | ||
Slot->Next.store(AllocatedSlotNext, std::memory_order_relaxed); | ||
|
||
if (IsTail) { | ||
// This slot is now the tail. | ||
StatTail = Slot; | ||
} | ||
break; | ||
} | ||
} | ||
} | ||
|
||
++RemainingSlots; | ||
} | ||
|
||
} // namespace FEX::Profiler |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,69 @@ | ||
// SPDX-License-Identifier: MIT | ||
/* | ||
$info$ | ||
tags: Common|Profiler | ||
desc: Frontend profiler common code | ||
$end_info$ | ||
*/ | ||
#pragma once | ||
#include <FEXCore/Utils/Profiler.h> | ||
|
||
namespace FEXCore::Core { | ||
struct InternalThreadState; | ||
} | ||
|
||
#ifdef _M_ARM_64 | ||
static inline void store_memory_barrier() { | ||
asm volatile("dmb ishst;" ::: "memory"); | ||
} | ||
|
||
#else | ||
static inline void store_memory_barrier() { | ||
// Intentionally empty. | ||
// x86 is strongly memory ordered with regular loadstores. No need for barrier. | ||
} | ||
#endif | ||
|
||
namespace FEX::Profiler { | ||
class StatAllocBase { | ||
protected: | ||
FEXCore::Profiler::ThreadStats* AllocateSlot(uint32_t TID); | ||
void DeallocateSlot(FEXCore::Profiler::ThreadStats* AllocatedSlot); | ||
|
||
uint32_t OffsetFromStat(FEXCore::Profiler::ThreadStats* Stat) const { | ||
return reinterpret_cast<uint64_t>(Stat) - reinterpret_cast<uint64_t>(Base); | ||
} | ||
uint32_t TotalSlotsFromSize() const { | ||
return (CurrentSize - sizeof(FEXCore::Profiler::ThreadStatsHeader)) / sizeof(FEXCore::Profiler::ThreadStats) - 1; | ||
} | ||
static uint32_t TotalSlotsFromSize(uint32_t Size) { | ||
return (Size - sizeof(FEXCore::Profiler::ThreadStatsHeader)) / sizeof(FEXCore::Profiler::ThreadStats) - 1; | ||
} | ||
|
||
static uint32_t SlotIndexFromOffset(uint32_t Offset) { | ||
return (Offset - sizeof(FEXCore::Profiler::ThreadStatsHeader)) / sizeof(FEXCore::Profiler::ThreadStats); | ||
} | ||
|
||
void SaveHeader(FEXCore::Profiler::AppType AppType); | ||
|
||
void* Base; | ||
uint32_t CurrentSize {}; | ||
FEXCore::Profiler::ThreadStatsHeader* Head {}; | ||
FEXCore::Profiler::ThreadStats* Stats; | ||
FEXCore::Profiler::ThreadStats* StatTail {}; | ||
uint32_t RemainingSlots; | ||
|
||
// Limited to 4MB which should be a few hundred threads of tracking capability. | ||
// I (Sonicadvance1) wanted to reserve 128MB of VA space because it's cheap, but ran in to a bug when running WINE. | ||
// WINE allocates [0x7fff'fe00'0000, 0x7fff'ffff'0000) which /consistently/ overlaps with FEX's sigaltstack. | ||
// This only occurs when this stat allocation size is large as the top-down allocation pushes the alt-stack further. | ||
// Additionally, only occurs on 48-bit VA systems, as mmap on lesser VA will fail regardless. | ||
// TODO: Bump allocation size up once FEXCore's allocator can first use the 128TB of blocked VA space on 48-bit systems. | ||
constexpr static uint32_t MAX_STATS_SIZE = 4 * 1024 * 1024; | ||
|
||
private: | ||
virtual uint32_t FrontendAllocateSlots(uint32_t NewSize) = 0; | ||
bool AllocateMoreSlots(); | ||
}; | ||
|
||
} // namespace FEX::Profiler |
Oops, something went wrong.