From 591e0259099218189cc3119bdddf56bd1a5d620f Mon Sep 17 00:00:00 2001 From: Ryan Houdek Date: Wed, 22 Jan 2025 13:28:59 -0800 Subject: [PATCH] FEX-Emu: Profile stats support This implements support for FEX-Emu's profiling stats interface behind a command line option that isn't default enabled. The purpose of these statistics in FEX-Emu is to expose statistics that are high frequency and can't easily be collected in other ways due to overhead. The reason why these are getting exposed in Mangohud versus another path is to have that information readily available while running a game. It's sometimes difficult to understand why a game has stuttered in FEX and being able to attribute the stutter to FEX-Emu overheads. This allows us to directly related these stats to frame time drops in the game. The attached example is a good indicator for why a game is having low performance and how the stats look. We can see in the image that the game (Celeste) is only running at 55FPS, with a CPU core being pegged to 100% and then the FEX stats lets us know that 33.7 million soft float operations are happening, ~600k per frame. The implementation is already in FEX-Emu upstream (https://github.com/FEX-Emu/FEX/pull/4291) and we aren't expecting the implementation to change heavily, potentially just adding additional sample events. We version these stats so that if they change in the future, that the interface doesn't get broken on one side versus the other. --- README.md | 1 + data/MangoHud.conf | 4 + meson_options.txt | 1 + src/fex.cpp | 400 +++++++++++++++++++++++++++++++++++++++++ src/fex.h | 57 ++++++ src/hud_elements.cpp | 119 +++++++++++- src/hud_elements.h | 1 + src/meson.build | 9 +- src/overlay.cpp | 4 + src/overlay_params.cpp | 38 ++++ src/overlay_params.h | 19 ++ 11 files changed, 650 insertions(+), 3 deletions(-) create mode 100644 src/fex.cpp create mode 100644 src/fex.h diff --git a/README.md b/README.md index 8c8b727e76..9126d13b35 100644 --- a/README.md +++ b/README.md @@ -464,6 +464,7 @@ Parameters that are enabled by default have to be explicitly disabled. These (cu | `winesync` | Show wine sync method in use | | `present_mode` | Shows current vulkan [present mode](https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VkPresentModeKHR.html) or vsync status in opengl | | `network` | Show network interfaces tx and rx kb/s. You can specify interface with `network=eth0` | +| `fex_stats` | Show FEX-Emu statistics. Default = `status+apptype+hotthreads+jitload+sigbus+smc+softfloat` | Example: `MANGOHUD_CONFIG=cpu_temp,gpu_temp,position=top-right,height=500,font_size=32` Because comma is also used as option delimiter and needs to be escaped for values with a backslash, you can use `+` like `MANGOHUD_CONFIG=fps_limit=60+30+0` instead. diff --git a/data/MangoHud.conf b/data/MangoHud.conf index 7244ff9b4a..064128d664 100644 --- a/data/MangoHud.conf +++ b/data/MangoHud.conf @@ -276,6 +276,10 @@ text_outline ### Disable / hide the hud by default # no_display +### Show FEX-Emu statistics +## Only useful for Arm64 devices running applications under emulation +# fex_stats + ### Hud position offset # offset_x=0 # offset_y=0 diff --git a/meson_options.txt b/meson_options.txt index 818c3b9079..2bdaed1c5e 100644 --- a/meson_options.txt +++ b/meson_options.txt @@ -13,3 +13,4 @@ option('mangohudctl', type: 'boolean', value : false) option('tests', type: 'feature', value: 'auto', description: 'Run tests') option('mangoplot', type: 'feature', value: 'enabled') option('dynamic_string_tokens', type: 'boolean', value: true, description: 'Use dynamic string tokens in LD_PRELOAD') +option('with_fex', type : 'boolean', value : false) diff --git a/src/fex.cpp b/src/fex.cpp new file mode 100644 index 0000000000..ffd4a14c47 --- /dev/null +++ b/src/fex.cpp @@ -0,0 +1,400 @@ +#include +#include +#include +#include +#include + +#include "fex.h" +#include "hud_elements.h" +#include "mesa/util/macros.h" + +namespace fex { +const char* fex_status = "Not Found!"; +std::string fex_version; +std::vector fex_load_data(200,0.f); + +fex_event_counts sigbus_counts; +fex_event_counts smc_counts; +fex_event_counts softfloat_counts; + +std::vector fex_max_thread_loads; + +constexpr static uint32_t MAXIMUM_THREAD_WAIT_TIME = 10; + +// FEX-Emu stats definitions +// Semantically these match upstream FEX-Emu. +constexpr uint32_t FEX_STATS_VERSION = 2; +enum class AppType : uint8_t { + LINUX_32, + LINUX_64, + WIN_ARM64EC, + WIN_WOW64, +}; + +// The profile statistics header that is at the base of the shared memory mapped from FEX. +// The version member is guaranteed to be first, to ensure that any version changes can be picked up immediately. +struct fex_stats_header { + uint8_t Version; + AppType app_type; + uint8_t _pad[2]; + char fex_version[48]; + // Atomic variables. std::atomic_ref isn't available until C++20, so need to use GCC builtin atomics to access. + uint32_t Head; + uint32_t Size; + uint32_t Pad; +}; + +// The thread-specific datapoints. If TID is zero then it is deallocated and happens to still be in the linked list. +struct fex_thread_stats { + // Atomic variables. + uint32_t Next; + uint32_t TID; + // Thread-specific stats. + uint64_t AccumulatedJITTime; + uint64_t AccumulatedSignalTime; + uint64_t AccumulatedSIGBUSCount; + uint64_t AccumulatedSMCEvents; + uint64_t AccumulatedFloatFallbackCount; +}; + +// Sampled stats information +struct fex_stats { + int pid {-1}; + int shm_fd {-1}; + bool first_sample = true; + uint32_t shm_size{}; + uint64_t cycle_counter_frequency{}; + size_t hardware_concurrency{}; + size_t page_size{}; + + void* shm_base{}; + fex_stats_header* head{}; + fex_thread_stats* stats{}; + + struct retained_stats { + std::chrono::time_point last_seen{}; + fex_thread_stats previous{}; + fex_thread_stats current{}; + }; + std::chrono::time_point previous_sample_period; + std::map sampled_stats; +}; + +fex_stats g_stats {}; + +const char* get_fex_app_type() { + if (!g_stats.head) { + return "Unknown"; + } + + // These are the only application types that FEX-Emu supports today. + // Linux32: A 32-bit x86 Linux application + // Linux64: A 64-bit x86_64 Linux application + // arm64ec: A 64-bit x86_64 WINE application + // wow64: A 32-bit x86 WINE application + switch (g_stats.head->app_type) { + case AppType::LINUX_32: return "Linux32"; + case AppType::LINUX_64: return "Linux64"; + case AppType::WIN_ARM64EC: return "arm64ec"; + case AppType::WIN_WOW64: return "wow64"; + default: return "Unknown"; + } +} + +static fex_thread_stats *offset_to_stats(void* shm_base, uint32_t *offset) { + const auto ld = __atomic_load_n(offset, __ATOMIC_RELAXED); + if (ld == 0) return nullptr; + return reinterpret_cast(reinterpret_cast(shm_base) + ld); +} + +static fex_thread_stats *offset_to_stats(void* shm_base, uint32_t offset) { + if (offset == 0) return nullptr; + return reinterpret_cast(reinterpret_cast(shm_base) + offset); +} + +#ifdef __aarch64__ +static void memory_barrier() { + asm volatile("dmb ishst" ::: "memory"); +} +static uint64_t get_cycle_counter_frequency() { + uint64_t result; + asm ("mrs %[res], CNTFRQ_EL0;" + : [res] "=r" (result)); + return result; +} +bool is_fex_capable() { + // All aarch64 systems are fex capable. + return true; +} + +#elif defined(__x86_64__) || defined(__i386__) +static void memory_barrier() { + // Intentionally empty. +} +static void cpuid(uint32_t leaf, uint32_t &eax, uint32_t &ebx, uint32_t &ecx, uint32_t &edx) { + asm volatile("cpuid" + : "=a"(eax), "=b"(ebx), "=c"(ecx), "=d"(edx) + : "a"(leaf), "c"(0)); +} + +bool is_fex_capable() { + // FEX-Emu CPUID documentation: https://github.com/FEX-Emu/FEX/blob/main/docs/CPUID.md + const uint32_t HYPERVISOR_BIT = 1U << 31; + const char FEXHypervisorString[] = "FEXIFEXIEMU"; + char HypervisorString[4 * 3]; + + uint32_t eax, ebx, ecx, edx; + // Check that the hypervisor bit is set first. Not required, but good to do. + cpuid(1, eax, ebx, ecx, edx); + if ((ecx & HYPERVISOR_BIT) != HYPERVISOR_BIT) return false; + + // Once the hypervisor bit is set, query the hypervisor leaf. + cpuid(0x4000'0000U, eax, ebx, ecx, edx); + if (eax == 0) return false; + + // If the hypervisor description matches FEX then we're good. + memcpy(&HypervisorString[0], &ebx, sizeof(uint32_t)); + memcpy(&HypervisorString[4], &ecx, sizeof(uint32_t)); + memcpy(&HypervisorString[8], &edx, sizeof(uint32_t)); + if (strncmp(HypervisorString, FEXHypervisorString, sizeof(HypervisorString)) != 0) return false; + + return true; +} + +static uint64_t get_cycle_counter_frequency() { + // In a FEX-Emu environment, the cycle counter frequency is exposed in CPUID leaf 0x15. + // This matches x86 Intel semantics on latest CPUs, see their documentation for the exact implementation details. + uint32_t eax, ebx, ecx, edx; + cpuid(0, eax, ebx, ecx, edx); + if (eax < 0x15) return 0; + + cpuid(0x15U, eax, ebx, ecx, edx); + + // Ignore scale in ebx + // Baseline clock is provided in ecx. + return ecx; +} +#endif + +static void destroy_shm() { + munmap(g_stats.shm_base, g_stats.shm_size); + close(g_stats.shm_fd); + g_stats.shm_fd = -1; + g_stats.shm_size = 0; + g_stats.shm_base = nullptr; + g_stats.head = nullptr; + g_stats.stats = nullptr; + g_stats.sampled_stats.clear(); +} + +static void init_shm(int pid) { + if (g_stats.shm_fd != -1) { + // Destroy first if the FD changed. + destroy_shm(); + } + + // Initialize global hardware stats. + g_stats.cycle_counter_frequency = get_cycle_counter_frequency(); + g_stats.hardware_concurrency = std::thread::hardware_concurrency(); + g_stats.page_size = sysconf(_SC_PAGESIZE); + if (g_stats.page_size <= 0) g_stats.page_size = 4096; + + // Try and open a FEX stats file that relates to the PID in focus. + // If this fails then it is non-fatal, just means FEX isn't creating stats for that process. + std::string f = "fex-"; + f += std::to_string(pid); + f += "-stats"; + int fd {-1}; + struct stat buf{}; + uint64_t shm_size{}; + void* shm_base{MAP_FAILED}; + fex_stats_header *header{}; + + fd = shm_open(f.c_str(), O_RDONLY, 0); + if (fd == -1) { + goto err; + } + + if (fstat(fd, &buf) == -1) { + goto err; + } + + if (buf.st_size < static_cast(sizeof(fex_stats_header))) { + goto err; + } + + shm_size = ALIGN_POT(buf.st_size, g_stats.page_size); + + shm_base = mmap(nullptr, shm_size, PROT_READ, MAP_SHARED, fd, 0); + if (shm_base == MAP_FAILED) { + goto err; + } + + memory_barrier(); + header = reinterpret_cast(shm_base); + if (header->Version != FEX_STATS_VERSION) { + // If the version read doesn't match the implementation then we can't read. + fex_status = "version mismatch"; + goto err; + } + + // Cache off the information, we have successfully loaded the stats SHM. + g_stats.pid = pid; + g_stats.shm_fd = fd; + g_stats.shm_size = shm_size; + g_stats.shm_base = shm_base; + g_stats.head = header; + g_stats.stats = offset_to_stats(shm_base, &header->Head); + g_stats.previous_sample_period = std::chrono::steady_clock::now(); + g_stats.first_sample = true; + g_stats.sampled_stats.clear(); + fex_version = std::string {header->fex_version, strnlen(header->fex_version, sizeof(header->fex_version))}; + sigbus_counts.account_time(g_stats.previous_sample_period); + smc_counts.account_time(g_stats.previous_sample_period); + softfloat_counts.account_time(g_stats.previous_sample_period); + std::fill(fex_load_data.begin(), fex_load_data.end(), 0.0); + fex_max_thread_loads.clear(); + return; +err: + if (fd != -1) { + close(fd); + } + + if (shm_base != MAP_FAILED) { + munmap(shm_base, shm_size); + } +} + +static void check_shm_update_necessary() { + // If the SHM has changed size then we need to unmap and remap with the new size. + // Required since FEX may grow the SHM region to fit more threads, although previous thread data won't be invalidated. + memory_barrier(); + auto new_shm_size = ALIGN_POT(__atomic_load_n(&g_stats.head->Size, __ATOMIC_RELAXED), g_stats.page_size); + if (g_stats.shm_size == new_shm_size) { + return; + } + + munmap(g_stats.shm_base, g_stats.shm_size); + g_stats.shm_size = new_shm_size; + g_stats.shm_base = mmap(nullptr, new_shm_size, PROT_READ, MAP_SHARED, g_stats.shm_fd, 0); + g_stats.head = reinterpret_cast(g_stats.shm_base); + g_stats.stats = offset_to_stats(g_stats.shm_base, &g_stats.head->Head); +} + +bool is_fex_pid_found() { + return g_stats.pid != -1; +} + +void update_fex_stats() { + auto gs_pid = HUDElements.g_gamescopePid > 0 ? HUDElements.g_gamescopePid : ::getpid(); + if (gs_pid < 1) { + // No PID yet. + return; + } + + if (g_stats.pid != gs_pid) { + // PID changed, likely gamescope changed focus. + init_shm(gs_pid); + } + + if (g_stats.pid == -1) { + // PID became invalid. Likely due to error reading SHM. + return; + } + + // Check if SHM changed first. + check_shm_update_necessary(); + + // Before reading stats, a memory barrier needs to be done. + // This ensures visibility of the stats before reading, as they use weak atomics for writes. + memory_barrier(); + + // Sample the stats and store them off. + // Sampling these quickly lets us become a loose sampling profiler, since FEX updates these constantly. + uint32_t *header_offset_atomic = &g_stats.head->Head; + auto now = std::chrono::steady_clock::now(); + for (auto header_offset = __atomic_load_n(header_offset_atomic, __ATOMIC_RELAXED); + header_offset != 0; + header_offset = __atomic_load_n(header_offset_atomic, __ATOMIC_RELAXED)) { + if (header_offset >= g_stats.shm_size) break; + + fex_thread_stats *stat = offset_to_stats(g_stats.shm_base, header_offset); + const auto TID = __atomic_load_n(&stat->TID, __ATOMIC_RELAXED); + if (TID != 0) { + fex_stats::retained_stats &sampled_stats = g_stats.sampled_stats[TID]; + memcpy(&sampled_stats.current, stat, sizeof(fex_thread_stats)); + sampled_stats.last_seen = now; + } + + header_offset_atomic = &stat->Next; + } + + if (g_stats.first_sample) { + // Skip first sample, it'll look crazy. + g_stats.first_sample = false; + fex_status = "Accumulating"; + return; + } + + // Update the status with the FEX version. + fex_status = fex_version.c_str(); + + // Accumulate full JIT time + uint64_t total_jit_time{}; + uint64_t total_sigbus_events{}; + uint64_t total_smc_events{}; + uint64_t total_softfloat_events{}; + size_t threads_sampled{}; +#define accumulate(dest, name) dest += it->second.current.name - it->second.previous.name + std::vector hottest_threads{}; + for (auto it = g_stats.sampled_stats.begin(); it != g_stats.sampled_stats.end();) { + ++threads_sampled; + uint64_t total_time{}; + accumulate(total_time, AccumulatedJITTime); + accumulate(total_time, AccumulatedSignalTime); + accumulate(total_sigbus_events, AccumulatedSIGBUSCount); + accumulate(total_smc_events, AccumulatedSMCEvents); + accumulate(total_softfloat_events, AccumulatedFloatFallbackCount); + + memcpy(&it->second.previous, &it->second.current, sizeof(fex_thread_stats)); + + total_jit_time += total_time; + if ((now - it->second.last_seen) >= std::chrono::seconds(MAXIMUM_THREAD_WAIT_TIME)) { + it = g_stats.sampled_stats.erase(it); + continue; + } + hottest_threads.emplace_back(total_time); + ++it; + } + + std::sort(hottest_threads.begin(), hottest_threads.end(), std::greater()); + + // Calculate loads based on the sample period that occurred. + // FEX-Emu only counts cycles for the amount of time, so we need to calculate load based on the number of cycles that the sample period has. + const auto sample_period = now - g_stats.previous_sample_period; + + const double NanosecondsInSeconds = 1'000'000'000.0; + const double SamplePeriodNanoseconds = std::chrono::duration_cast(sample_period).count(); + const double MaximumCyclesInSecond = (double)g_stats.cycle_counter_frequency; + const double MaximumCyclesInSamplePeriod = MaximumCyclesInSecond * (SamplePeriodNanoseconds / NanosecondsInSeconds); + const double MaximumCoresThreadsPossible = std::min(g_stats.hardware_concurrency, threads_sampled); + + // Calculate the percentage of JIT time that could possibly exist inside the sample period. + double fex_load = ((double)total_jit_time / (MaximumCyclesInSamplePeriod * MaximumCoresThreadsPossible)) * 100.0; + size_t minimum_hot_threads = std::min(g_stats.hardware_concurrency, hottest_threads.size()); + // For the top thread-loads, we are only ever showing up to how many hardware threads are available. + fex_max_thread_loads.resize(minimum_hot_threads); + for (size_t i = 0; i < minimum_hot_threads; ++i) { + fex_max_thread_loads[i] = ((double)hottest_threads[i] / MaximumCyclesInSamplePeriod) * 100.0; + } + + sigbus_counts.account(total_sigbus_events, now); + smc_counts.account(total_smc_events, now); + softfloat_counts.account(total_softfloat_events, now); + + g_stats.previous_sample_period = now; + + fex_load_data.push_back(fex_load); + fex_load_data.erase(fex_load_data.begin()); +} +} diff --git a/src/fex.h b/src/fex.h new file mode 100644 index 0000000000..213f284ffd --- /dev/null +++ b/src/fex.h @@ -0,0 +1,57 @@ +#pragma once +#ifdef HAVE_FEX +#ifndef MANGOHUD_FEX_H +#define MANGOHUD_FEX_H +#include +#include +#include + +namespace fex { +bool is_fex_capable(); +bool is_fex_pid_found(); +const char* get_fex_app_type(); + +extern const char* fex_status; +extern std::string fex_version; + +extern std::vector fex_load_data; + +struct fex_event_counts { + public: + void account(uint64_t total, std::chrono::time_point now) { + count = total; + last_sample_count += total; + + const auto diff = now - last_chrono; + if (diff >= std::chrono::seconds(1)) { + // Calculate the average over the last second. + const double NanosecondsInSeconds = 1'000'000'000.0; + const auto diff_ns = std::chrono::duration_cast(diff).count(); + const double Percentage = (double)diff_ns / NanosecondsInSeconds; + average_sec = double(last_sample_count) * Percentage; + last_sample_count = 0; + last_chrono = now; + } + } + + void account_time(std::chrono::time_point now) { + last_chrono = now; + } + uint64_t Count() const { return count; } + double Avg() const { return average_sec; } + private: + uint64_t count{}; + uint64_t last_sample_count{}; + double average_sec{}; + std::chrono::time_point last_chrono{}; +}; +extern fex_event_counts sigbus_counts; +extern fex_event_counts smc_counts; +extern fex_event_counts softfloat_counts; + +extern std::vector fex_max_thread_loads; +void update_fex_stats(); +} + +#endif //MANGOHUD_FEX_H +#endif //HAVE_FEX diff --git a/src/hud_elements.cpp b/src/hud_elements.cpp index 47bc807739..e8209088e3 100644 --- a/src/hud_elements.cpp +++ b/src/hud_elements.cpp @@ -25,6 +25,7 @@ #endif #include "amdgpu.h" #include "fps_metrics.h" +#include "fex.h" #define CHAR_CELSIUS "\xe2\x84\x83" #define CHAR_FAHRENHEIT "\xe2\x84\x89" @@ -1534,6 +1535,118 @@ void HudElements::_display_session() { ImGui::PopFont(); } +void HudElements::fex_stats() +{ +#ifdef HAVE_FEX + if (!HUDElements.params->fex_stats.enabled) { + return; + } + + ImGui::PushFont(HUDElements.sw_stats->font1); + + if (HUDElements.params->fex_stats.status) { + ImguiNextColumnFirstItem(); + HUDElements.TextColored(HUDElements.colors.engine, "%s", "FEX"); + ImguiNextColumnOrNewRow(); + ImGui::Dummy(ImVec2(0.0f, real_font_size.y)); + ImguiNextColumnOrNewRow(); + right_aligned_text(HUDElements.colors.text, HUDElements.ralign_width, "%s", fex::fex_status); + } + + if (!fex::is_fex_pid_found()) { + ImGui::PopFont(); + return; + } + + if (HUDElements.params->fex_stats.app_type) { + ImguiNextColumnFirstItem(); + HUDElements.TextColored(HUDElements.colors.engine, "%s", "Type"); + ImguiNextColumnOrNewRow(); + ImGui::Dummy(ImVec2(0.0f, real_font_size.y)); + ImguiNextColumnOrNewRow(); + right_aligned_text(HUDElements.colors.text, HUDElements.ralign_width, "%s", fex::get_fex_app_type()); + } + + if (HUDElements.params->fex_stats.sigbus_counts) { + ImguiNextColumnFirstItem(); + HUDElements.TextColored(HUDElements.colors.engine, "%s", "SIGBUS"); + ImguiNextColumnOrNewRow(); + ImGui::Dummy(ImVec2(0.0f, real_font_size.y)); + ImguiNextColumnOrNewRow(); + right_aligned_text(HUDElements.colors.text, HUDElements.ralign_width, "%d - %.0f avg/s", fex::sigbus_counts.Count(), fex::sigbus_counts.Avg()); + } + + if (HUDElements.params->fex_stats.smc_counts) { + ImguiNextColumnFirstItem(); + HUDElements.TextColored(HUDElements.colors.engine, "%s", "SMC"); + ImguiNextColumnOrNewRow(); + ImGui::Dummy(ImVec2(0.0f, real_font_size.y)); + ImguiNextColumnOrNewRow(); + right_aligned_text(HUDElements.colors.text, HUDElements.ralign_width, "%d - %.0f avg/s", fex::smc_counts.Count(), fex::smc_counts.Avg()); + } + + if (HUDElements.params->fex_stats.softfloat_counts) { + ImguiNextColumnFirstItem(); + HUDElements.TextColored(HUDElements.colors.engine, "%s", "Softfloat"); + ImguiNextColumnOrNewRow(); + ImGui::Dummy(ImVec2(0.0f, real_font_size.y)); + ImguiNextColumnOrNewRow(); + right_aligned_text(HUDElements.colors.text, HUDElements.ralign_width, "%d - %.0f avg/s", fex::softfloat_counts.Count(), fex::softfloat_counts.Avg()); + } + + ImGui::PopFont(); + + ImguiNextColumnFirstItem(); + ImGui::Dummy(ImVec2(0.0f, real_font_size.y)); + + if (HUDElements.params->fex_stats.hot_threads) { + // Draw hot threads + bool Warning = false; + ImVec4 WarningColor; + + for (auto it : fex::fex_max_thread_loads){ + if (it >= 75.0) { + Warning = true; + WarningColor = ImVec4(1.0f, 0.0f, 0.0f, 1.0f); + } + else if (it >= 50.0) { + Warning = true; + WarningColor = ImVec4(1.0f, 1.0f, 0.0f, 1.0f); + } + } + + ImGui::PushFont(HUDElements.sw_stats->font1); + HUDElements.TextColored(HUDElements.colors.engine, "%s", "FEX JIT top loaded threads"); + ImGui::PopFont(); + + ImGui::PushStyleColor(ImGuiCol_FrameBg, ImVec4(0.0f, 0.0f, 0.0f, 0.0f)); + if (Warning) { + ImGui::PushStyleColor(ImGuiCol_PlotHistogram, WarningColor); + } + + ImGui::PlotHistogram("", fex::fex_max_thread_loads.data(), + fex::fex_max_thread_loads.size(), 0, + NULL, 0, 100, + ImVec2(ImGui::GetWindowContentRegionWidth(), 50)); + ImGui::PopStyleColor(1 + (Warning ? 1 : 0)); + } + + if (HUDElements.params->fex_stats.jit_load) { + ImGui::PushFont(HUDElements.sw_stats->font1); + HUDElements.TextColored(HUDElements.colors.engine, "%s", "FEX JIT Load"); + ImGui::PopFont(); + + ImGui::PushStyleColor(ImGuiCol_FrameBg, ImVec4(0.0f, 0.0f, 0.0f, 0.0f)); + + ImGui::PlotLines("", fex::fex_load_data.data(), + fex::fex_load_data.size(), 0, + NULL, 0, 100, + ImVec2(ImGui::GetWindowContentRegionWidth(), 50)); + ImGui::PopStyleColor(1); + } +#endif //HAVE_FEX +} + void HudElements::sort_elements(const std::pair& option) { const auto& param = option.first; const auto& value = option.second; @@ -1582,8 +1695,8 @@ void HudElements::sort_elements(const std::pair& optio {"winesync", {winesync}}, {"present_mode", {present_mode}}, {"network", {network}}, - {"display_server", {_display_session}} - + {"display_server", {_display_session}}, + {"fex_stats", {fex_stats}}, }; auto check_param = display_params.find(param); @@ -1712,6 +1825,8 @@ void HudElements::legacy_elements(){ ordered_functions.push_back({refresh_rate, "refresh_rate", value}); if (params->enabled[OVERLAY_PARAM_ENABLED_display_server]) ordered_functions.push_back({_display_session, "display_session", value}); + if (params->fex_stats.enabled) + ordered_functions.push_back({fex_stats, "fex_stats", value}); } void HudElements::update_exec(){ diff --git a/src/hud_elements.h b/src/hud_elements.h index 6da8df1aa1..2fa512cbb4 100644 --- a/src/hud_elements.h +++ b/src/hud_elements.h @@ -117,6 +117,7 @@ class HudElements{ static void present_mode(); static void network(); static void _display_session(); + static void fex_stats(); void convert_colors(const struct overlay_params& params); void convert_colors(bool do_conv, const struct overlay_params& params); diff --git a/src/meson.build b/src/meson.build index a05207910a..5713539bdf 100644 --- a/src/meson.build +++ b/src/meson.build @@ -93,9 +93,16 @@ if is_unixy 'control.cpp', 'device.cpp', 'net.cpp', - 'shell.cpp' + 'shell.cpp', ) + if get_option('with_fex') + pre_args += '-DHAVE_FEX' + vklayer_files += files( + 'fex.cpp', + ) + endif + opengl_files = files( 'gl/glad.c', 'gl/gl_renderer.cpp', diff --git a/src/overlay.cpp b/src/overlay.cpp index c9aa4f802a..a2b741fd10 100644 --- a/src/overlay.cpp +++ b/src/overlay.cpp @@ -25,6 +25,7 @@ #include "amdgpu.h" #include "fps_metrics.h" #include "net.h" +#include "fex.h" #ifdef __linux__ #include @@ -247,6 +248,9 @@ void update_hud_info_with_frametime(struct swapchain_stats& sw_stats, const stru #ifdef __linux__ if (gpus) gpus->update_throttling(); +#endif +#ifdef HAVE_FEX + fex::update_fex_stats(); #endif frametime = frametime_ms; fps = double(1000 / frametime_ms); diff --git a/src/overlay_params.cpp b/src/overlay_params.cpp index cf4446a87f..7863e2c5ef 100644 --- a/src/overlay_params.cpp +++ b/src/overlay_params.cpp @@ -29,6 +29,7 @@ #include "blacklist.h" #include "mesa/util/os_socket.h" #include "file_utils.h" +#include "fex.h" #if defined(HAVE_X11) || defined(HAVE_WAYLAND) #include @@ -418,6 +419,43 @@ parse_fps_metrics(const char *str){ return metrics; } +static overlay_params::fex_stats_options +parse_fex_stats(const char *str) { + overlay_params::fex_stats_options options { +#ifdef HAVE_FEX + .enabled = fex::is_fex_capable(), +#endif + }; + + auto tokens = str_tokenize(str); +#define option_check(str, option) do { \ + if (token == #str) options.option = true; \ + } while (0) + + // If we have any tokens then default disable. + if (!tokens.empty()) { + options.status = false; + options.app_type = false; + options.hot_threads = false; + options.jit_load = false; + options.sigbus_counts = false; + options.smc_counts = false; + options.softfloat_counts = false; + } + + for (auto& token : tokens) { + option_check(status, status); + option_check(apptype, app_type); + option_check(hotthreads, hot_threads); + option_check(jitload, jit_load); + option_check(sigbus, sigbus_counts); + option_check(smc, smc_counts); + option_check(softfloat, softfloat_counts); + } + + return options; +} + #define parse_width(s) parse_unsigned(s) #define parse_height(s) parse_unsigned(s) #define parse_vsync(s) parse_unsigned(s) diff --git a/src/overlay_params.h b/src/overlay_params.h index 5ad56a8d5b..b6c59ea69a 100644 --- a/src/overlay_params.h +++ b/src/overlay_params.h @@ -200,6 +200,7 @@ typedef unsigned long KeySym; OVERLAY_PARAM_CUSTOM(fps_metrics) \ OVERLAY_PARAM_CUSTOM(network) \ OVERLAY_PARAM_CUSTOM(gpu_list) \ + OVERLAY_PARAM_CUSTOM(fex_stats) \ enum overlay_param_position { LAYER_POSITION_TOP_LEFT, @@ -327,6 +328,24 @@ struct overlay_params { std::vector fps_metrics; std::vector network; std::vector gpu_list; + + struct fex_stats_options { + bool enabled {false}; + + // Enabled Texts + bool status {true}; + bool app_type {true}; + + // Graphs + bool hot_threads {true}; + bool jit_load {true}; + + // Counts + bool sigbus_counts {true}; + bool smc_counts {true}; + bool softfloat_counts {true}; + }; + fex_stats_options fex_stats{}; }; const extern char *overlay_param_names[];