From d098545c20918099a638237d4c9a133ad32410fa Mon Sep 17 00:00:00 2001 From: Ryan Houdek Date: Thu, 28 Dec 2023 18:28:02 -0800 Subject: [PATCH] FEXCore: Removes SRA option, it's now permanently enabled --- .../Source/Interface/Config/Config.json.in | 7 - FEXCore/Source/Interface/Context/Context.h | 2 - .../Core/ArchHelpers/Arm64Emitter.cpp | 8 - .../Interface/Core/ArchHelpers/Arm64Emitter.h | 1 - FEXCore/Source/Interface/Core/Core.cpp | 10 +- .../Interface/Core/Dispatcher/Dispatcher.cpp | 43 ++--- .../Interface/Core/Dispatcher/Dispatcher.h | 11 +- .../Source/Interface/Core/JIT/Arm64/JIT.cpp | 10 -- .../Interface/Core/JIT/Arm64/JITClass.h | 6 - .../Interface/Core/JIT/Arm64/MemoryOps.cpp | 166 +----------------- .../CodeObjectSerializationConfig.h | 7 +- .../ObjectCache/NamedRegionObjectHandler.cpp | 1 - FEXCore/Source/Interface/IR/IR.json | 4 +- FEXCore/Source/Interface/IR/PassManager.cpp | 6 +- FEXCore/Source/Interface/IR/PassManager.h | 4 +- FEXCore/Source/Interface/IR/Passes.h | 1 - .../IR/Passes/RegisterAllocationPass.cpp | 14 +- FEXCore/include/FEXCore/Core/CPUBackend.h | 1 - .../include/FEXCore/Core/SignalDelegator.h | 1 - .../LinuxSyscalls/SignalDelegator.cpp | 78 ++++---- 20 files changed, 70 insertions(+), 311 deletions(-) diff --git a/FEXCore/Source/Interface/Config/Config.json.in b/FEXCore/Source/Interface/Config/Config.json.in index d7a6f589a1..a598459c6e 100644 --- a/FEXCore/Source/Interface/Config/Config.json.in +++ b/FEXCore/Source/Interface/Config/Config.json.in @@ -260,13 +260,6 @@ "Disables optimizations passes for debugging." ] }, - "SRA": { - "Type": "bool", - "Default": "true", - "Desc": [ - "Set to false to disable Static Register Allocation" - ] - }, "GlobalJITNaming": { "Type": "bool", "Default": "false", diff --git a/FEXCore/Source/Interface/Context/Context.h b/FEXCore/Source/Interface/Context/Context.h index 66c296fa10..477433619e 100644 --- a/FEXCore/Source/Interface/Context/Context.h +++ b/FEXCore/Source/Interface/Context/Context.h @@ -210,7 +210,6 @@ namespace FEXCore::Context { FEX_CONFIG_OPT(ThunkHostLibsPath, THUNKHOSTLIBS); FEX_CONFIG_OPT(ThunkHostLibsPath32, THUNKHOSTLIBS32); FEX_CONFIG_OPT(ThunkConfigFile, THUNKCONFIG); - FEX_CONFIG_OPT(StaticRegisterAllocation, SRA); FEX_CONFIG_OPT(GlobalJITNaming, GLOBALJITNAMING); FEX_CONFIG_OPT(LibraryJITNaming, LIBRARYJITNAMING); FEX_CONFIG_OPT(BlockJITNaming, BLOCKJITNAMING); @@ -391,7 +390,6 @@ namespace FEXCore::Context { std::shared_mutex CustomIRMutex; std::atomic HasCustomIRHandlers{}; fextl::unordered_map> CustomIRHandlers; - FEXCore::CPU::DispatcherConfig DispatcherConfig; }; uint64_t HandleSyscall(FEXCore::HLE::SyscallHandler *Handler, FEXCore::Core::CpuStateFrame *Frame, FEXCore::HLE::SyscallArguments *Args); diff --git a/FEXCore/Source/Interface/Core/ArchHelpers/Arm64Emitter.cpp b/FEXCore/Source/Interface/Core/ArchHelpers/Arm64Emitter.cpp index ea13ac7702..873dcfa13a 100644 --- a/FEXCore/Source/Interface/Core/ArchHelpers/Arm64Emitter.cpp +++ b/FEXCore/Source/Interface/Core/ArchHelpers/Arm64Emitter.cpp @@ -611,10 +611,6 @@ void Arm64Emitter::SpillStaticRegs(FEXCore::ARMEmitter::Register TmpReg, bool FP mrs(TmpReg, ARMEmitter::SystemRegister::NZCV); str(TmpReg.W(), STATE.R(), offsetof(FEXCore::Core::CpuStateFrame, State.flags[24])); - if (!StaticRegisterAllocation()) { - return; - } - // PF/AF are special, remove them from the mask uint32_t PFAFMask = ((1u << REG_PF.Idx()) | ((1u << REG_AF.Idx()))); unsigned PFAFSpillMask = GPRSpillMask & PFAFMask; @@ -728,10 +724,6 @@ void Arm64Emitter::FillStaticRegs(bool FPRs, uint32_t GPRFillMask, uint32_t FPRF ldr(TmpReg.W(), STATE.R(), offsetof(FEXCore::Core::CpuStateFrame, State.flags[24])); msr(ARMEmitter::SystemRegister::NZCV, TmpReg); - if (!StaticRegisterAllocation()) { - return; - } - if (FPRs) { // Set up predicate registers. // We don't bother spilling these in SpillStaticRegs, diff --git a/FEXCore/Source/Interface/Core/ArchHelpers/Arm64Emitter.h b/FEXCore/Source/Interface/Core/ArchHelpers/Arm64Emitter.h index 859a3f192f..70b9bd92e5 100644 --- a/FEXCore/Source/Interface/Core/ArchHelpers/Arm64Emitter.h +++ b/FEXCore/Source/Interface/Core/ArchHelpers/Arm64Emitter.h @@ -238,7 +238,6 @@ class Arm64Emitter : public FEXCore::ARMEmitter::Emitter { FEX_CONFIG_OPT(Disassemble, DISASSEMBLE); #endif - FEX_CONFIG_OPT(StaticRegisterAllocation, SRA); }; } diff --git a/FEXCore/Source/Interface/Core/Core.cpp b/FEXCore/Source/Interface/Core/Core.cpp index e3bc6ddb48..c7ac67e9d1 100644 --- a/FEXCore/Source/Interface/Core/Core.cpp +++ b/FEXCore/Source/Interface/Core/Core.cpp @@ -268,12 +268,10 @@ namespace FEXCore::Context { return false; } - DispatcherConfig.StaticRegisterAllocation = Config.StaticRegisterAllocation && BackendFeatures.SupportsStaticRegisterAllocation; - Dispatcher = FEXCore::CPU::Dispatcher::Create(this, DispatcherConfig); + Dispatcher = FEXCore::CPU::Dispatcher::Create(this); // Set up the SignalDelegator config since core is initialized. FEXCore::SignalDelegator::SignalDelegatorConfig SignalConfig { - .StaticRegisterAllocation = DispatcherConfig.StaticRegisterAllocation, .SupportsAVX = HostFeatures.SupportsAVX, .DispatcherBegin = Dispatcher->Start, @@ -375,9 +373,7 @@ namespace FEXCore::Context { Thread->CTX = this; - bool DoSRA = DispatcherConfig.StaticRegisterAllocation; - - Thread->PassManager->AddDefaultPasses(this, Config.Core == FEXCore::Config::CONFIG_IRJIT, DoSRA); + Thread->PassManager->AddDefaultPasses(this, Config.Core == FEXCore::Config::CONFIG_IRJIT); Thread->PassManager->AddDefaultValidationPasses(); Thread->PassManager->RegisterSyscallHandler(SyscallHandler); @@ -385,7 +381,7 @@ namespace FEXCore::Context { // Create CPU backend switch (Config.Core) { case FEXCore::Config::CONFIG_IRJIT: - Thread->PassManager->InsertRegisterAllocationPass(DoSRA, HostFeatures.SupportsAVX); + Thread->PassManager->InsertRegisterAllocationPass(HostFeatures.SupportsAVX); Thread->CPUBackend = FEXCore::CPU::CreateArm64JITCore(this, Thread); break; case FEXCore::Config::CONFIG_CUSTOM: diff --git a/FEXCore/Source/Interface/Core/Dispatcher/Dispatcher.cpp b/FEXCore/Source/Interface/Core/Dispatcher/Dispatcher.cpp index b846b22215..a35b3bb93e 100644 --- a/FEXCore/Source/Interface/Core/Dispatcher/Dispatcher.cpp +++ b/FEXCore/Source/Interface/Core/Dispatcher/Dispatcher.cpp @@ -31,10 +31,9 @@ static void SleepThread(FEXCore::Context::ContextImpl *CTX, FEXCore::Core::CpuSt constexpr size_t MAX_DISPATCHER_CODE_SIZE = 4096 * 2; -Dispatcher::Dispatcher(FEXCore::Context::ContextImpl *ctx, const DispatcherConfig &config) +Dispatcher::Dispatcher(FEXCore::Context::ContextImpl *ctx) : Arm64Emitter(ctx, FEXCore::Allocator::VirtualAlloc(MAX_DISPATCHER_CODE_SIZE, true), MAX_DISPATCHER_CODE_SIZE) - , CTX {ctx} - , config {config} { + , CTX {ctx} { EmitDispatcher(); } @@ -79,9 +78,7 @@ void Dispatcher::EmitDispatcher() { AbsoluteLoopTopAddressFillSRA = GetCursorAddress(); - if (config.StaticRegisterAllocation) { - FillStaticRegs(); - } + FillStaticRegs(); // We want to ensure that we are 16 byte aligned at the top of this loop Align16B(); @@ -172,8 +169,7 @@ void Dispatcher::EmitDispatcher() { { ThreadStopHandlerAddressSpillSRA = GetCursorAddress(); - if (config.StaticRegisterAllocation) - SpillStaticRegs(TMP1); + SpillStaticRegs(TMP1); ThreadStopHandlerAddress = GetCursorAddress(); @@ -186,8 +182,7 @@ void Dispatcher::EmitDispatcher() { { ExitFunctionLinkerAddress = GetCursorAddress(); - if (config.StaticRegisterAllocation) - SpillStaticRegs(TMP1); + SpillStaticRegs(TMP1); ldr(ARMEmitter::XReg::x0, STATE, offsetof(FEXCore::Core::CPUState, DeferredSignalRefCount)); add(ARMEmitter::Size::i64Bit, ARMEmitter::XReg::x0, ARMEmitter::XReg::x0, 1); @@ -204,8 +199,7 @@ void Dispatcher::EmitDispatcher() { blr(ARMEmitter::Reg::r2); } - if (config.StaticRegisterAllocation) - FillStaticRegs(); + FillStaticRegs(); ldr(ARMEmitter::XReg::x1, STATE, offsetof(FEXCore::Core::CPUState, DeferredSignalRefCount)); sub(ARMEmitter::Size::i64Bit, ARMEmitter::XReg::x1, ARMEmitter::XReg::x1, 1); @@ -222,8 +216,7 @@ void Dispatcher::EmitDispatcher() { { Bind(&NoBlock); - if (config.StaticRegisterAllocation) - SpillStaticRegs(TMP1); + SpillStaticRegs(TMP1); ldr(ARMEmitter::XReg::x0, STATE, offsetof(FEXCore::Core::CPUState, DeferredSignalRefCount)); add(ARMEmitter::Size::i64Bit, ARMEmitter::XReg::x0, ARMEmitter::XReg::x0, 1); @@ -242,8 +235,7 @@ void Dispatcher::EmitDispatcher() { blr(ARMEmitter::Reg::r4); // { CTX, Frame, RIP, MaxInst } } - if (config.StaticRegisterAllocation) - FillStaticRegs(); + FillStaticRegs(); ldr(ARMEmitter::XReg::x0, STATE, offsetof(FEXCore::Core::CPUState, DeferredSignalRefCount)); sub(ARMEmitter::Size::i64Bit, ARMEmitter::XReg::x0, ARMEmitter::XReg::x0, 1); @@ -277,8 +269,7 @@ void Dispatcher::EmitDispatcher() { // Needs to be distinct from the SignalHandlerReturnAddress GuestSignal_SIGILL = GetCursorAddress(); - if (config.StaticRegisterAllocation) - SpillStaticRegs(TMP1); + SpillStaticRegs(TMP1); hlt(0); } @@ -288,8 +279,7 @@ void Dispatcher::EmitDispatcher() { // Needs to be distinct from the SignalHandlerReturnAddress GuestSignal_SIGTRAP = GetCursorAddress(); - if (config.StaticRegisterAllocation) - SpillStaticRegs(TMP1); + SpillStaticRegs(TMP1); brk(0); } @@ -299,8 +289,7 @@ void Dispatcher::EmitDispatcher() { // Needs to be distinct from the SignalHandlerReturnAddress GuestSignal_SIGSEGV = GetCursorAddress(); - if (config.StaticRegisterAllocation) - SpillStaticRegs(TMP1); + SpillStaticRegs(TMP1); // hlt/udf = SIGILL // brk = SIGTRAP @@ -320,8 +309,7 @@ void Dispatcher::EmitDispatcher() { { ThreadPauseHandlerAddressSpillSRA = GetCursorAddress(); - if (config.StaticRegisterAllocation) - SpillStaticRegs(TMP1); + SpillStaticRegs(TMP1); ThreadPauseHandlerAddress = GetCursorAddress(); // We are pausing, this means the frontend should be waiting for this thread to idle @@ -388,8 +376,7 @@ void Dispatcher::EmitDispatcher() { str(ARMEmitter::XReg::x1, STATE_PTR(CpuStateFrame, State.rip)); // load static regs - if (config.StaticRegisterAllocation) - FillStaticRegs(); + FillStaticRegs(); // Now go back to the regular dispatcher loop b(&LoopTop); @@ -558,8 +545,8 @@ void Dispatcher::InitThreadPointers(FEXCore::Core::InternalThreadState *Thread) } } -fextl::unique_ptr Dispatcher::Create(FEXCore::Context::ContextImpl *CTX, const DispatcherConfig &Config) { - return fextl::make_unique(CTX, Config); +fextl::unique_ptr Dispatcher::Create(FEXCore::Context::ContextImpl *CTX) { + return fextl::make_unique(CTX); } } diff --git a/FEXCore/Source/Interface/Core/Dispatcher/Dispatcher.h b/FEXCore/Source/Interface/Core/Dispatcher/Dispatcher.h index bc98c1b743..63d0f504b0 100644 --- a/FEXCore/Source/Interface/Core/Dispatcher/Dispatcher.h +++ b/FEXCore/Source/Interface/Core/Dispatcher/Dispatcher.h @@ -31,18 +31,14 @@ class ContextImpl; namespace FEXCore::CPU { -struct DispatcherConfig { - bool StaticRegisterAllocation = false; -}; - #define STATE_PTR(STATE_TYPE, FIELD) \ STATE.R(), offsetof(FEXCore::Core::STATE_TYPE, FIELD) class Dispatcher final : public Arm64Emitter { public: - static fextl::unique_ptr Create(FEXCore::Context::ContextImpl *CTX, const DispatcherConfig &Config); + static fextl::unique_ptr Create(FEXCore::Context::ContextImpl *CTX); - Dispatcher(FEXCore::Context::ContextImpl *ctx, const DispatcherConfig &Config); + Dispatcher(FEXCore::Context::ContextImpl *ctx); ~Dispatcher(); /** @@ -106,11 +102,8 @@ class Dispatcher final : public Arm64Emitter { } } - const DispatcherConfig& GetConfig() const { return config; } - protected: FEXCore::Context::ContextImpl *CTX; - DispatcherConfig config; using AsmDispatch = void(*)(FEXCore::Core::CpuStateFrame *Frame); using JITCallback = void(*)(FEXCore::Core::CpuStateFrame *Frame, uint64_t RIP); diff --git a/FEXCore/Source/Interface/Core/JIT/Arm64/JIT.cpp b/FEXCore/Source/Interface/Core/JIT/Arm64/JIT.cpp index 980a931e8b..5c1aad86b4 100644 --- a/FEXCore/Source/Interface/Core/JIT/Arm64/JIT.cpp +++ b/FEXCore/Source/Interface/Core/JIT/Arm64/JIT.cpp @@ -594,15 +594,6 @@ Arm64JITCore::Arm64JITCore(FEXCore::Context::ContextImpl *ctx, FEXCore::Core::In ClearCache(); // Setup dynamic dispatch. - if (CTX->Dispatcher->GetConfig().StaticRegisterAllocation) { - RT_LoadRegister = &Arm64JITCore::Op_LoadRegisterSRA; - RT_StoreRegister = &Arm64JITCore::Op_StoreRegisterSRA; - } - else { - RT_LoadRegister = &Arm64JITCore::Op_LoadRegister; - RT_StoreRegister = &Arm64JITCore::Op_StoreRegister; - } - if (ParanoidTSO()) { RT_LoadMemTSO = &Arm64JITCore::Op_ParanoidLoadMemTSO; RT_StoreMemTSO = &Arm64JITCore::Op_ParanoidStoreMemTSO; @@ -920,7 +911,6 @@ fextl::unique_ptr CreateArm64JITCore(FEXCore::Context::ContextImpl * CPUBackendFeatures GetArm64JITBackendFeatures() { return CPUBackendFeatures { - .SupportsStaticRegisterAllocation = true, .SupportsFlags = true, .SupportsSaturatingRoundingShifts = true, .SupportsVTBL2 = true, diff --git a/FEXCore/Source/Interface/Core/JIT/Arm64/JITClass.h b/FEXCore/Source/Interface/Core/JIT/Arm64/JITClass.h index 63ebc6c6c2..4dd06e68a0 100644 --- a/FEXCore/Source/Interface/Core/JIT/Arm64/JITClass.h +++ b/FEXCore/Source/Interface/Core/JIT/Arm64/JITClass.h @@ -226,9 +226,6 @@ class Arm64JITCore final : public CPUBackend, public Arm64Emitter { void VFScalarUnaryOperation(uint8_t OpSize, uint8_t ElementSize, bool ZeroUpperBits, ScalarUnaryOpCaller ScalarEmit, ARMEmitter::VRegister Dst, ARMEmitter::VRegister Vector1, std::variant Vector2); // Runtime selection; - // Load and store register style. - OpType RT_LoadRegister; - OpType RT_StoreRegister; // Load and store TSO memory style OpType RT_LoadMemTSO; OpType RT_StoreMemTSO; @@ -236,9 +233,6 @@ class Arm64JITCore final : public CPUBackend, public Arm64Emitter { #define DEF_OP(x) void Op_##x(IR::IROp_Header const *IROp, IR::NodeID Node) // Dynamic Dispatcher supporting operations - DEF_OP(LoadRegisterSRA); - DEF_OP(StoreRegisterSRA); - DEF_OP(ParanoidLoadMemTSO); DEF_OP(ParanoidStoreMemTSO); diff --git a/FEXCore/Source/Interface/Core/JIT/Arm64/MemoryOps.cpp b/FEXCore/Source/Interface/Core/JIT/Arm64/MemoryOps.cpp index 81e247ab6b..b85e14c82e 100644 --- a/FEXCore/Source/Interface/Core/JIT/Arm64/MemoryOps.cpp +++ b/FEXCore/Source/Interface/Core/JIT/Arm64/MemoryOps.cpp @@ -131,170 +131,6 @@ DEF_OP(LoadRegister) { const auto Op = IROp->C(); const auto OpSize = IROp->Size; - if (Op->Class == IR::GPRClass) { - [[maybe_unused]] const auto regId = (Op->Offset / Core::CPUState::GPR_REG_SIZE) - 1; - const auto regOffs = Op->Offset & 7; - - LOGMAN_THROW_A_FMT(regId < StaticRegisters.size(), "out of range regId"); - - switch (OpSize) { - case 1: - LOGMAN_THROW_AA_FMT(regOffs == 0 || regOffs == 1, "unexpected regOffs"); - ldrb(GetReg(Node), STATE, Op->Offset); - break; - - case 2: - LOGMAN_THROW_AA_FMT(regOffs == 0, "unexpected regOffs"); - ldrh(GetReg(Node), STATE, Op->Offset); - break; - - case 4: - LOGMAN_THROW_AA_FMT(regOffs == 0, "unexpected regOffs"); - ldr(GetReg(Node).W(), STATE, Op->Offset); - break; - - case 8: - LOGMAN_THROW_AA_FMT(regOffs == 0, "unexpected regOffs"); - ldr(GetReg(Node).X(), STATE, Op->Offset); - break; - - default: - LOGMAN_MSG_A_FMT("Unhandled LoadRegister GPR size: {}", OpSize); - break; - } - } - else if (Op->Class == IR::FPRClass) { - const auto regSize = HostSupportsSVE256 ? Core::CPUState::XMM_AVX_REG_SIZE - : Core::CPUState::XMM_SSE_REG_SIZE; - [[maybe_unused]] const auto regId = (Op->Offset - offsetof(Core::CpuStateFrame, State.xmm.avx.data[0][0])) / regSize; - - LOGMAN_THROW_A_FMT(HostSupportsSVE256, "Unsupported code path!"); - LOGMAN_THROW_A_FMT(regId < StaticFPRegisters.size(), "out of range regId"); - - const auto host = GetVReg(Node); - - const auto regOffs = Op->Offset & 15; - - switch (OpSize) { - case 1: { - LOGMAN_THROW_AA_FMT(regOffs == 0, "unexpected regOffs: {}", regOffs); - ldrb(host, STATE, Op->Offset); - break; - } - case 2: { - LOGMAN_THROW_AA_FMT(regOffs == 0, "unexpected regOffs: {}", regOffs); - ldrh(host, STATE, Op->Offset); - break; - } - case 4: { - LOGMAN_THROW_AA_FMT((regOffs & 3) == 0, "unexpected regOffs: {}", regOffs); - ldr(host.S(), STATE, Op->Offset); - break; - } - - case 8: { - LOGMAN_THROW_AA_FMT((regOffs & 7) == 0, "unexpected regOffs: {}", regOffs); - ldr(host.D(), STATE, Op->Offset); - break; - } - - case 16: { - LOGMAN_THROW_AA_FMT(regOffs == 0, "unexpected regOffs: {}", regOffs); - ldr(host.Q(), STATE, Op->Offset); - break; - } - } - } else { - LOGMAN_THROW_AA_FMT(false, "Unhandled Op->Class {}", Op->Class); - } -} - -DEF_OP(StoreRegister) { - const auto Op = IROp->C(); - const auto OpSize = IROp->Size; - - if (Op->Class == IR::GPRClass) { - [[maybe_unused]] const auto regId = (Op->Offset / Core::CPUState::GPR_REG_SIZE) - 1; - const auto regOffs = Op->Offset & 7; - - LOGMAN_THROW_A_FMT(regId < StaticFPRegisters.size(), "out of range regId"); - - const auto Src = GetReg(Op->Value.ID()); - - switch (OpSize) { - case 1: - LOGMAN_THROW_AA_FMT(regOffs == 0 || regOffs == 1, "unexpected regOffs"); - strb(Src, STATE, Op->Offset); - break; - - case 2: - LOGMAN_THROW_AA_FMT(regOffs == 0, "unexpected regOffs"); - strh(Src, STATE, Op->Offset); - break; - - case 4: - LOGMAN_THROW_AA_FMT(regOffs == 0, "unexpected regOffs"); - str(Src.W(), STATE, Op->Offset); - break; - case 8: - LOGMAN_THROW_AA_FMT(regOffs == 0, "unexpected regOffs"); - str(Src.X(), STATE, Op->Offset); - break; - - default: - LOGMAN_MSG_A_FMT("Unhandled StoreRegister GPR size: {}", OpSize); - break; - } - } else if (Op->Class == IR::FPRClass) { - const auto regSize = HostSupportsSVE256 ? Core::CPUState::XMM_AVX_REG_SIZE - : Core::CPUState::XMM_SSE_REG_SIZE; - [[maybe_unused]] const auto regId = (Op->Offset - offsetof(Core::CpuStateFrame, State.xmm.avx.data[0][0])) / regSize; - - LOGMAN_THROW_A_FMT(HostSupportsSVE256, "Unsupported code path!"); - LOGMAN_THROW_A_FMT(regId < StaticFPRegisters.size(), "regId out of range"); - - const auto host = GetVReg(Op->Value.ID()); - - const auto regOffs = Op->Offset & 15; - - switch (OpSize) { - case 1: - strb(host, STATE, Op->Offset); - break; - - case 2: - LOGMAN_THROW_AA_FMT((regOffs & 1) == 0, "unexpected regOffs: {}", regOffs); - strh(host, STATE, Op->Offset); - break; - - case 4: - LOGMAN_THROW_AA_FMT((regOffs & 3) == 0, "unexpected regOffs: {}", regOffs); - str(host.S(), STATE, Op->Offset); - break; - - case 8: - LOGMAN_THROW_AA_FMT((regOffs & 7) == 0, "unexpected regOffs: {}", regOffs); - str(host.D(), STATE, Op->Offset); - break; - - case 16: - LOGMAN_THROW_AA_FMT(regOffs == 0, "unexpected regOffs: {}", regOffs); - str(host.Q(), STATE, Op->Offset); - break; - - default: - LOGMAN_MSG_A_FMT("Unhandled StoreRegister FPR size: {}", OpSize); - break; - } - } else { - LOGMAN_THROW_AA_FMT(false, "Unhandled Op->Class {}", Op->Class); - } -} - -DEF_OP(LoadRegisterSRA) { - const auto Op = IROp->C(); - const auto OpSize = IROp->Size; - if (Op->Class == IR::GPRClass) { const auto regId = Op->Offset == offsetof(Core::CpuStateFrame, State.pf_raw) ? (StaticRegisters.size() - 2) : @@ -473,7 +309,7 @@ DEF_OP(LoadRegisterSRA) { } } -DEF_OP(StoreRegisterSRA) { +DEF_OP(StoreRegister) { const auto Op = IROp->C(); const auto OpSize = IROp->Size; diff --git a/FEXCore/Source/Interface/Core/ObjectCache/CodeObjectSerializationConfig.h b/FEXCore/Source/Interface/Core/ObjectCache/CodeObjectSerializationConfig.h index 60e1f1c126..200e3f997c 100644 --- a/FEXCore/Source/Interface/Core/ObjectCache/CodeObjectSerializationConfig.h +++ b/FEXCore/Source/Interface/Core/ObjectCache/CodeObjectSerializationConfig.h @@ -32,9 +32,6 @@ namespace FEXCore::CodeSerialize { // ABI local flag unsafe optimization unsigned ABILocalFlags : 1; - // Static register allocation enabled - unsigned SRA : 1; - // Paranoid TSO mode enabled unsigned ParanoidTSO : 1; @@ -49,7 +46,7 @@ namespace FEXCore::CodeSerialize { // Padding to remove uninitialized data warning from asan // Shows remaining amount of bits available for config - unsigned _Pad : 18; + unsigned _Pad : 19; bool operator==(CodeObjectSerializationConfig const &other) const { return Cookie == other.Cookie && @@ -59,7 +56,6 @@ namespace FEXCore::CodeSerialize { HardwareTSOEnabled == other.HardwareTSOEnabled && TSOEnabled == other.TSOEnabled && ABILocalFlags == other.ABILocalFlags && - SRA == other.SRA && ParanoidTSO == other.ParanoidTSO && Is64BitMode == other.Is64BitMode && SMCChecks == other.SMCChecks && @@ -75,7 +71,6 @@ namespace FEXCore::CodeSerialize { Hash <<= 1; Hash |= other.HardwareTSOEnabled; Hash <<= 1; Hash |= other.TSOEnabled; Hash <<= 1; Hash |= other.ABILocalFlags; - Hash <<= 1; Hash |= other.SRA; Hash <<= 1; Hash |= other.ParanoidTSO; Hash <<= 1; Hash |= other.Is64BitMode; Hash <<= 2; Hash |= other.SMCChecks; diff --git a/FEXCore/Source/Interface/Core/ObjectCache/NamedRegionObjectHandler.cpp b/FEXCore/Source/Interface/Core/ObjectCache/NamedRegionObjectHandler.cpp index 4ca0db5fa4..3a9c116971 100644 --- a/FEXCore/Source/Interface/Core/ObjectCache/NamedRegionObjectHandler.cpp +++ b/FEXCore/Source/Interface/Core/ObjectCache/NamedRegionObjectHandler.cpp @@ -18,7 +18,6 @@ namespace FEXCore::CodeSerialize { DefaultSerializationConfig.MultiBlock = ctx->Config.Multiblock; DefaultSerializationConfig.TSOEnabled = ctx->Config.TSOEnabled; DefaultSerializationConfig.ABILocalFlags = ctx->Config.ABILocalFlags; - DefaultSerializationConfig.SRA = ctx->Config.StaticRegisterAllocation; DefaultSerializationConfig.ParanoidTSO = ctx->Config.ParanoidTSO; DefaultSerializationConfig.Is64BitMode = ctx->Config.Is64BitMode; DefaultSerializationConfig.SMCChecks = ctx->Config.SMCChecks; diff --git a/FEXCore/Source/Interface/IR/IR.json b/FEXCore/Source/Interface/IR/IR.json index b9ff45d619..cd0f7f5762 100644 --- a/FEXCore/Source/Interface/IR/IR.json +++ b/FEXCore/Source/Interface/IR/IR.json @@ -347,8 +347,7 @@ "Desc": ["Loads a value from the static-ra context with offset", "Dest = Ctx[Offset]" ], - "DestSize": "Size", - "DynamicDispatch": true + "DestSize": "Size" }, "StoreRegister SSA:$Value, i1:$IsPrewrite, u32:$Offset, RegisterClass:$Class, RegisterClass:$StaticClass, u8:#Size": { @@ -359,7 +358,6 @@ "Truncates if value's type is too large" ], "DestSize": "Size", - "DynamicDispatch": true, "EmitValidation": [ "WalkFindRegClass($Value) == $Class" ] diff --git a/FEXCore/Source/Interface/IR/PassManager.cpp b/FEXCore/Source/Interface/IR/PassManager.cpp index dd915a9721..1f3abbc441 100644 --- a/FEXCore/Source/Interface/IR/PassManager.cpp +++ b/FEXCore/Source/Interface/IR/PassManager.cpp @@ -66,7 +66,7 @@ void PassManager::Finalize() { } } -void PassManager::AddDefaultPasses(FEXCore::Context::ContextImpl *ctx, bool InlineConstants, bool StaticRegisterAllocation) { +void PassManager::AddDefaultPasses(FEXCore::Context::ContextImpl *ctx, bool InlineConstants) { FEX_CONFIG_OPT(DisablePasses, O0); if (!DisablePasses()) { @@ -101,8 +101,8 @@ void PassManager::AddDefaultValidationPasses() { #endif } -void PassManager::InsertRegisterAllocationPass(bool OptimizeSRA, bool SupportsAVX) { - InsertPass(IR::CreateRegisterAllocationPass(GetPass("Compaction"), OptimizeSRA, SupportsAVX), "RA"); +void PassManager::InsertRegisterAllocationPass(bool SupportsAVX) { + InsertPass(IR::CreateRegisterAllocationPass(GetPass("Compaction"), SupportsAVX), "RA"); } bool PassManager::Run(IREmitter *IREmit) { diff --git a/FEXCore/Source/Interface/IR/PassManager.h b/FEXCore/Source/Interface/IR/PassManager.h index 2b613ba8e2..08efd124d4 100644 --- a/FEXCore/Source/Interface/IR/PassManager.h +++ b/FEXCore/Source/Interface/IR/PassManager.h @@ -45,7 +45,7 @@ class Pass { class PassManager final { friend class InlineCallOptimization; public: - void AddDefaultPasses(FEXCore::Context::ContextImpl *ctx, bool InlineConstants, bool StaticRegisterAllocation); + void AddDefaultPasses(FEXCore::Context::ContextImpl *ctx, bool InlineConstants); void AddDefaultValidationPasses(); Pass* InsertPass(fextl::unique_ptr Pass, fextl::string Name = "") { auto PassPtr = InsertAt(Passes.end(), std::move(Pass))->get(); @@ -56,7 +56,7 @@ class PassManager final { return PassPtr; } - void InsertRegisterAllocationPass(bool OptimizeSRA, bool SupportsAVX); + void InsertRegisterAllocationPass(bool SupportsAVX); bool Run(IREmitter *IREmit); diff --git a/FEXCore/Source/Interface/IR/Passes.h b/FEXCore/Source/Interface/IR/Passes.h index 9eec745a70..07d8760420 100644 --- a/FEXCore/Source/Interface/IR/Passes.h +++ b/FEXCore/Source/Interface/IR/Passes.h @@ -24,7 +24,6 @@ fextl::unique_ptr CreateDeadStoreElimination(bool SupportsAVX fextl::unique_ptr CreatePassDeadCodeElimination(); fextl::unique_ptr CreateIRCompaction(FEXCore::Utils::IntrusivePooledAllocator &Allocator); fextl::unique_ptr CreateRegisterAllocationPass(FEXCore::IR::Pass* CompactionPass, - bool OptimizeSRA, bool SupportsAVX); fextl::unique_ptr CreateLongDivideEliminationPass(); diff --git a/FEXCore/Source/Interface/IR/Passes/RegisterAllocationPass.cpp b/FEXCore/Source/Interface/IR/Passes/RegisterAllocationPass.cpp index 8df0fdb460..b398eed214 100644 --- a/FEXCore/Source/Interface/IR/Passes/RegisterAllocationPass.cpp +++ b/FEXCore/Source/Interface/IR/Passes/RegisterAllocationPass.cpp @@ -223,7 +223,7 @@ namespace { class ConstrainedRAPass final : public RegisterAllocationPass { public: - ConstrainedRAPass(FEXCore::IR::Pass* _CompactionPass, bool OptimizeSRA, bool SupportsAVX); + ConstrainedRAPass(FEXCore::IR::Pass* _CompactionPass, bool SupportsAVX); ~ConstrainedRAPass(); bool Run(IREmitter *IREmit) override; @@ -248,7 +248,6 @@ namespace { RegisterGraph *Graph; FEXCore::IR::Pass* CompactionPass; - bool OptimizeSRA; bool SupportsAVX; fextl::vector LiveRanges; @@ -297,8 +296,8 @@ namespace { bool RunAllocateVirtualRegisters(IREmitter *IREmit); }; - ConstrainedRAPass::ConstrainedRAPass(FEXCore::IR::Pass* _CompactionPass, bool _OptimizeSRA, bool _SupportsAVX) - : CompactionPass {_CompactionPass}, OptimizeSRA(_OptimizeSRA), SupportsAVX{_SupportsAVX} { + ConstrainedRAPass::ConstrainedRAPass(FEXCore::IR::Pass* _CompactionPass, bool _SupportsAVX) + : CompactionPass {_CompactionPass}, SupportsAVX{_SupportsAVX} { } ConstrainedRAPass::~ConstrainedRAPass() { @@ -1402,8 +1401,7 @@ namespace { ResetRegisterGraph(Graph, SSACount); FindNodeClasses(Graph, &IR); CalculateLiveRange(&IR); - if (OptimizeSRA) - OptimizeStaticRegisters(&IR); + OptimizeStaticRegisters(&IR); // Linear forward scan based interference calculation is faster for smaller blocks // Smarter block based interference calculation is faster for larger blocks @@ -1470,7 +1468,7 @@ namespace { return Changed; } - fextl::unique_ptr CreateRegisterAllocationPass(FEXCore::IR::Pass* CompactionPass, bool OptimizeSRA, bool SupportsAVX) { - return fextl::make_unique(CompactionPass, OptimizeSRA, SupportsAVX); + fextl::unique_ptr CreateRegisterAllocationPass(FEXCore::IR::Pass* CompactionPass, bool SupportsAVX) { + return fextl::make_unique(CompactionPass, SupportsAVX); } } diff --git a/FEXCore/include/FEXCore/Core/CPUBackend.h b/FEXCore/include/FEXCore/Core/CPUBackend.h index f0c400d3ae..4f9cab7b0f 100644 --- a/FEXCore/include/FEXCore/Core/CPUBackend.h +++ b/FEXCore/include/FEXCore/Core/CPUBackend.h @@ -35,7 +35,6 @@ namespace CodeSerialize { namespace CPU { struct CPUBackendFeatures { - bool SupportsStaticRegisterAllocation = false; bool SupportsFlags = false; bool SupportsSaturatingRoundingShifts = false; bool SupportsVTBL2 = false; diff --git a/FEXCore/include/FEXCore/Core/SignalDelegator.h b/FEXCore/include/FEXCore/Core/SignalDelegator.h index 5b9fe8826c..0e319c4c34 100644 --- a/FEXCore/include/FEXCore/Core/SignalDelegator.h +++ b/FEXCore/include/FEXCore/Core/SignalDelegator.h @@ -48,7 +48,6 @@ namespace Core { virtual void UninstallTLSState(FEXCore::Core::InternalThreadState *Thread) = 0; struct SignalDelegatorConfig { - bool StaticRegisterAllocation{}; bool SupportsAVX{}; // Dispatcher information diff --git a/Source/Tools/LinuxEmulation/LinuxSyscalls/SignalDelegator.cpp b/Source/Tools/LinuxEmulation/LinuxSyscalls/SignalDelegator.cpp index 0ca6f52862..924a97f48e 100644 --- a/Source/Tools/LinuxEmulation/LinuxSyscalls/SignalDelegator.cpp +++ b/Source/Tools/LinuxEmulation/LinuxSyscalls/SignalDelegator.cpp @@ -1175,41 +1175,39 @@ namespace FEX::HLE { // Spill the SRA regardless of signal handler type // We are going to be returning to the top of the dispatcher which will fill again // Otherwise we might load garbage - if (Config.StaticRegisterAllocation) { - if (WasInJIT) { - uint32_t IgnoreMask{}; + if (WasInJIT) { + uint32_t IgnoreMask{}; #ifdef _M_ARM_64 - if (Frame->InSyscallInfo != 0) { - // We are in a syscall, this means we are in a weird register state - // We need to spill SRA but only some of it, since some values have already been spilled - // Lower 16 bits tells us which registers are already spilled to the context - // So we ignore spilling those ones - IgnoreMask = Frame->InSyscallInfo & 0xFFFF; - } - else { - // We must spill everything - IgnoreMask = 0; - } + if (Frame->InSyscallInfo != 0) { + // We are in a syscall, this means we are in a weird register state + // We need to spill SRA but only some of it, since some values have already been spilled + // Lower 16 bits tells us which registers are already spilled to the context + // So we ignore spilling those ones + IgnoreMask = Frame->InSyscallInfo & 0xFFFF; + } + else { + // We must spill everything + IgnoreMask = 0; + } #endif - // We are in jit, SRA must be spilled - SpillSRA(Thread, ucontext, IgnoreMask); + // We are in jit, SRA must be spilled + SpillSRA(Thread, ucontext, IgnoreMask); - ContextBackup->Flags |= ArchHelpers::Context::ContextFlags::CONTEXT_FLAG_INJIT; + ContextBackup->Flags |= ArchHelpers::Context::ContextFlags::CONTEXT_FLAG_INJIT; - // We are leaving the syscall information behind. Make sure to store the previous state. - ContextBackup->InSyscallInfo = Thread->CurrentFrame->InSyscallInfo; - Thread->CurrentFrame->InSyscallInfo = 0; - } else { - if (!IsAddressInDispatcher(OldPC)) { - // This is likely to cause issues but in some cases it isn't fatal - // This can also happen if we have put a signal on hold, then we just reenabled the signal - // So we are in the syscall handler - // Only throw a log message in this case - if constexpr (false) { - // XXX: Messages in the signal handler can cause us to crash - LogMan::Msg::EFmt("Signals in dispatcher have unsynchronized context"); - } + // We are leaving the syscall information behind. Make sure to store the previous state. + ContextBackup->InSyscallInfo = Thread->CurrentFrame->InSyscallInfo; + Thread->CurrentFrame->InSyscallInfo = 0; + } else { + if (!IsAddressInDispatcher(OldPC)) { + // This is likely to cause issues but in some cases it isn't fatal + // This can also happen if we have put a signal on hold, then we just reenabled the signal + // So we are in the syscall handler + // Only throw a log message in this case + if constexpr (false) { + // XXX: Messages in the signal handler can cause us to crash + LogMan::Msg::EFmt("Signals in dispatcher have unsynchronized context"); } } } @@ -1314,15 +1312,13 @@ namespace FEX::HLE { // Store our thread state so we can come back to this StoreThreadState(Thread, Signal, ucontext); - if (Config.StaticRegisterAllocation && Thread->CPUBackend->IsAddressInCodeBuffer(ArchHelpers::Context::GetPc(ucontext))) { + if (Thread->CPUBackend->IsAddressInCodeBuffer(ArchHelpers::Context::GetPc(ucontext))) { // We are in jit, SRA must be spilled ArchHelpers::Context::SetPc(ucontext, Config.ThreadPauseHandlerAddressSpillSRA); } else { - if (Config.StaticRegisterAllocation) { - // We are in non-jit, SRA is already spilled - LOGMAN_THROW_A_FMT(!IsAddressInDispatcher(ArchHelpers::Context::GetPc(ucontext)), - "Signals in dispatcher have unsynchronized context"); - } + // We are in non-jit, SRA is already spilled + LOGMAN_THROW_A_FMT(!IsAddressInDispatcher(ArchHelpers::Context::GetPc(ucontext)), + "Signals in dispatcher have unsynchronized context"); ArchHelpers::Context::SetPc(ucontext, Config.ThreadPauseHandlerAddress); } @@ -1347,15 +1343,13 @@ namespace FEX::HLE { Thread->CurrentFrame->SignalHandlerRefCounter = 0; // Set the new PC - if (Config.StaticRegisterAllocation && Thread->CPUBackend->IsAddressInCodeBuffer(ArchHelpers::Context::GetPc(ucontext))) { + if (Thread->CPUBackend->IsAddressInCodeBuffer(ArchHelpers::Context::GetPc(ucontext))) { // We are in jit, SRA must be spilled ArchHelpers::Context::SetPc(ucontext, Config.ThreadStopHandlerAddressSpillSRA); } else { - if (Config.StaticRegisterAllocation) { - // We are in non-jit, SRA is already spilled - LOGMAN_THROW_A_FMT(!IsAddressInDispatcher(ArchHelpers::Context::GetPc(ucontext)), - "Signals in dispatcher have unsynchronized context"); - } + // We are in non-jit, SRA is already spilled + LOGMAN_THROW_A_FMT(!IsAddressInDispatcher(ArchHelpers::Context::GetPc(ucontext)), + "Signals in dispatcher have unsynchronized context"); ArchHelpers::Context::SetPc(ucontext, Config.ThreadStopHandlerAddress); }