From 248dc97993bc9152d8558d07f6a341357a0e7c22 Mon Sep 17 00:00:00 2001 From: Ryan Houdek Date: Sat, 6 Jan 2024 19:22:43 -0800 Subject: [PATCH 1/3] FEXCore: Decompose some std::function usage to regular pointers The delinker step of the JIT was using std::function with capture lambdas that required memory allocation when unnecessary. Because the compiler can't see through our std::function usage it could never decompose these by itself. By passing the Thread's frame and record to the function as arguments then we can have the signature be a raw function pointer. This fixes an area of concern from: https://github.com/FEX-Emu/FEX/blob/main/docs/ProgrammingConcerns.md#stdfunction-and-lambdas --- FEXCore/Source/Interface/Context/Context.h | 14 ++++--- FEXCore/Source/Interface/Core/Core.cpp | 4 +- .../Source/Interface/Core/JIT/Arm64/JIT.cpp | 38 +++++++++++-------- FEXCore/Source/Interface/Core/LookupCache.h | 15 ++++---- 4 files changed, 39 insertions(+), 32 deletions(-) diff --git a/FEXCore/Source/Interface/Context/Context.h b/FEXCore/Source/Interface/Context/Context.h index 09a280f097..9916303a7d 100644 --- a/FEXCore/Source/Interface/Context/Context.h +++ b/FEXCore/Source/Interface/Context/Context.h @@ -68,6 +68,13 @@ namespace FEXCore::Context { MODE_SINGLESTEP = 1, }; + struct ExitFunctionLinkData { + uint64_t HostBranch; + uint64_t GuestRIP; + }; + + using BlockDelinkerFunc = void(*)(FEXCore::Core::CpuStateFrame *Frame, FEXCore::Context::ExitFunctionLinkData *Record); + class ContextImpl final : public FEXCore::Context::Context { public: // Context base class implementation. @@ -274,12 +281,7 @@ namespace FEXCore::Context { void SignalThread(FEXCore::Core::InternalThreadState *Thread, FEXCore::Core::SignalEvent Event); static void ThreadRemoveCodeEntry(FEXCore::Core::InternalThreadState *Thread, uint64_t GuestRIP); - static void ThreadAddBlockLink(FEXCore::Core::InternalThreadState *Thread, uint64_t GuestDestination, uintptr_t HostLink, const std::function &delinker); - - struct ExitFunctionLinkData { - uint64_t HostBranch; - uint64_t GuestRIP; - }; + static void ThreadAddBlockLink(FEXCore::Core::InternalThreadState *Thread, uint64_t GuestDestination, FEXCore::Context::ExitFunctionLinkData *HostLink, const BlockDelinkerFunc &delinker); template static uint64_t ThreadExitFunctionLink(FEXCore::Core::CpuStateFrame *Frame, ExitFunctionLinkData *Record) { diff --git a/FEXCore/Source/Interface/Core/Core.cpp b/FEXCore/Source/Interface/Core/Core.cpp index 02e40f8d7a..5ba18b8cd2 100644 --- a/FEXCore/Source/Interface/Core/Core.cpp +++ b/FEXCore/Source/Interface/Core/Core.cpp @@ -1237,7 +1237,7 @@ namespace FEXCore::Context { } } - void ContextImpl::ThreadAddBlockLink(FEXCore::Core::InternalThreadState *Thread, uint64_t GuestDestination, uintptr_t HostLink, const std::function &delinker) { + void ContextImpl::ThreadAddBlockLink(FEXCore::Core::InternalThreadState *Thread, uint64_t GuestDestination, FEXCore::Context::ExitFunctionLinkData *HostLink, const FEXCore::Context::BlockDelinkerFunc &delinker) { auto lk = GuardSignalDeferringSection(static_cast(Thread->CTX)->CodeInvalidationMutex, Thread); Thread->LookupCache->AddBlockLink(GuestDestination, HostLink, delinker); @@ -1249,7 +1249,7 @@ namespace FEXCore::Context { std::lock_guard lk(Thread->LookupCache->WriteLock); Thread->DebugStore.erase(GuestRIP); - Thread->LookupCache->Erase(GuestRIP); + Thread->LookupCache->Erase(Thread->CurrentFrame, GuestRIP); } CustomIRResult ContextImpl::AddCustomIREntrypoint(uintptr_t Entrypoint, CustomIREntrypointHandler Handler, void *Creator, void *Data) { diff --git a/FEXCore/Source/Interface/Core/JIT/Arm64/JIT.cpp b/FEXCore/Source/Interface/Core/JIT/Arm64/JIT.cpp index 390e9ab571..2fd3af18d1 100644 --- a/FEXCore/Source/Interface/Core/JIT/Arm64/JIT.cpp +++ b/FEXCore/Source/Interface/Core/JIT/Arm64/JIT.cpp @@ -481,7 +481,24 @@ void Arm64JITCore::Op_Unhandled(IR::IROp_Header const *IROp, IR::NodeID Node) { } -static uint64_t Arm64JITCore_ExitFunctionLink(FEXCore::Core::CpuStateFrame *Frame, FEXCore::Context::ContextImpl::ExitFunctionLinkData *Record) { +static void DirectBlockDelinker(FEXCore::Core::CpuStateFrame *Frame, FEXCore::Context::ExitFunctionLinkData *Record) { + auto LinkerAddress = Frame->Pointers.Common.ExitFunctionLinker; + uintptr_t branch = (uintptr_t)(Record) - 8; + FEXCore::ARMEmitter::Emitter emit((uint8_t*)(branch), 8); + FEXCore::ARMEmitter::ForwardLabel l_BranchHost; + emit.ldr(FEXCore::ARMEmitter::XReg::x0, &l_BranchHost); + emit.blr(FEXCore::ARMEmitter::Reg::r0); + emit.Bind(&l_BranchHost); + emit.dc64(LinkerAddress); + FEXCore::ARMEmitter::Emitter::ClearICache((void*)branch, 8); +} + +static void IndirectBlockDelinker(FEXCore::Core::CpuStateFrame *Frame, FEXCore::Context::ExitFunctionLinkData *Record) { + auto LinkerAddress = Frame->Pointers.Common.ExitFunctionLinker; + Record->HostBranch = LinkerAddress; +} + +static uint64_t Arm64JITCore_ExitFunctionLink(FEXCore::Core::CpuStateFrame *Frame, FEXCore::Context::ExitFunctionLinkData *Record) { auto Thread = Frame->Thread; auto GuestRip = Record->GuestRIP; @@ -493,34 +510,23 @@ static uint64_t Arm64JITCore_ExitFunctionLink(FEXCore::Core::CpuStateFrame *Fram } uintptr_t branch = (uintptr_t)(Record) - 8; - auto LinkerAddress = Frame->Pointers.Common.ExitFunctionLinker; auto offset = HostCode/4 - branch/4; if (vixl::IsInt26(offset)) { // optimal case - can branch directly // patch the code - FEXCore::ARMEmitter::Emitter emit((uint8_t*)(branch), 24); + FEXCore::ARMEmitter::Emitter emit((uint8_t*)(branch), 4); emit.b(offset); - FEXCore::ARMEmitter::Emitter::ClearICache((void*)branch, 24); + FEXCore::ARMEmitter::Emitter::ClearICache((void*)branch, 4); // Add de-linking handler - Thread->LookupCache->AddBlockLink(GuestRip, (uintptr_t)Record, [branch, LinkerAddress]{ - FEXCore::ARMEmitter::Emitter emit((uint8_t*)(branch), 24); - FEXCore::ARMEmitter::ForwardLabel l_BranchHost; - emit.ldr(FEXCore::ARMEmitter::XReg::x0, &l_BranchHost); - emit.blr(FEXCore::ARMEmitter::Reg::r0); - emit.Bind(&l_BranchHost); - emit.dc64(LinkerAddress); - FEXCore::ARMEmitter::Emitter::ClearICache((void*)branch, 24); - }); + Thread->LookupCache->AddBlockLink(GuestRip, Record, DirectBlockDelinker); } else { // fallback case - do a soft-er link by patching the pointer Record->HostBranch = HostCode; // Add de-linking handler - Thread->LookupCache->AddBlockLink(GuestRip, (uintptr_t)Record, [Record, LinkerAddress]{ - Record->HostBranch = LinkerAddress; - }); + Thread->LookupCache->AddBlockLink(GuestRip, Record, IndirectBlockDelinker); } return HostCode; diff --git a/FEXCore/Source/Interface/Core/LookupCache.h b/FEXCore/Source/Interface/Core/LookupCache.h index 0f0e51bac5..d02d02a380 100644 --- a/FEXCore/Source/Interface/Core/LookupCache.h +++ b/FEXCore/Source/Interface/Core/LookupCache.h @@ -100,15 +100,15 @@ class LookupCache { L1Entry.HostCode = (uintptr_t)HostCode; } - void Erase(uint64_t Address) { + void Erase(FEXCore::Core::CpuStateFrame *Frame, uint64_t Address) { std::lock_guard lk(WriteLock); // Sever any links to this block - auto lower = BlockLinks->lower_bound({Address, 0}); - auto upper = BlockLinks->upper_bound({Address, UINTPTR_MAX}); + auto lower = BlockLinks->lower_bound({Address, nullptr}); + auto upper = BlockLinks->upper_bound({Address, reinterpret_cast(UINTPTR_MAX)}); for (auto it = lower; it != upper; it = BlockLinks->erase(it)) { - it->second(); + it->second(Frame, it->first.HostLink); } // Remove from BlockList @@ -141,8 +141,7 @@ class LookupCache { BlockPointers[PageOffset].HostCode = 0; } - - void AddBlockLink(uint64_t GuestDestination, uintptr_t HostLink, const std::function &delinker) { + void AddBlockLink(uint64_t GuestDestination, FEXCore::Context::ExitFunctionLinkData * HostLink, const FEXCore::Context::BlockDelinkerFunc &delinker) { std::lock_guard lk(WriteLock); BlockLinks->insert({{GuestDestination, HostLink}, delinker}); @@ -224,7 +223,7 @@ class LookupCache { struct BlockLinkTag { uint64_t GuestDestination; - uintptr_t HostLink; + FEXCore::Context::ExitFunctionLinkData *HostLink; bool operator <(const BlockLinkTag& other) const { if (GuestDestination < other.GuestDestination) @@ -243,7 +242,7 @@ class LookupCache { // // This makes `BlockLinks` look like a raw pointer that could memory leak, but since it is backed by the MBR, it won't. std::pmr::monotonic_buffer_resource BlockLinks_mbr; - using BlockLinksMapType = std::pmr::map>; + using BlockLinksMapType = std::pmr::map; fextl::unique_ptr> BlockLinks_pma; BlockLinksMapType *BlockLinks; From c01e6283ae03c7fc887739184562e02f2ab16fa1 Mon Sep 17 00:00:00 2001 From: Ryan Houdek Date: Mon, 8 Jan 2024 22:01:26 -0800 Subject: [PATCH 2/3] CodeEmitter: Support a single use forward label Currently all uses of the forward label calls in to jemalloc to allocate memory. This allows a forward label that doesn't require any memory allocation, which is the common case in FEX. --- .../Core/ArchHelpers/CodeEmitter/ALUOps.inl | 14 +- .../ArchHelpers/CodeEmitter/BranchOps.inl | 50 ++-- .../Core/ArchHelpers/CodeEmitter/Emitter.h | 267 +++++++++--------- .../ArchHelpers/CodeEmitter/LoadstoreOps.inl | 48 +++- 4 files changed, 218 insertions(+), 161 deletions(-) diff --git a/FEXCore/Source/Interface/Core/ArchHelpers/CodeEmitter/ALUOps.inl b/FEXCore/Source/Interface/Core/ArchHelpers/CodeEmitter/ALUOps.inl index 3c7ab467c7..9010b013e3 100644 --- a/FEXCore/Source/Interface/Core/ArchHelpers/CodeEmitter/ALUOps.inl +++ b/FEXCore/Source/Interface/Core/ArchHelpers/CodeEmitter/ALUOps.inl @@ -35,8 +35,10 @@ public: constexpr uint32_t Op = 0b0001'0000 << 24; DataProcessing_PCRel_Imm(Op, rd, Imm); } - void adr(FEXCore::ARMEmitter::Register rd, ForwardLabel *Label) { - Label->Insts.emplace_back(ForwardLabel::Instructions{ .Location = GetCursorAddress(), .Type = ForwardLabel::Instructions::InstType::ADR }); + template + requires (std::is_same_v || std::is_same_v) + void adr(FEXCore::ARMEmitter::Register rd, LabelType *Label) { + AddLocationToLabel(Label, SingleUseForwardLabel{ .Location = GetCursorAddress(), .Type = SingleUseForwardLabel::InstType::ADR }); constexpr uint32_t Op = 0b0001'0000 << 24; DataProcessing_PCRel_Imm(Op, rd, 0); } @@ -62,8 +64,10 @@ public: constexpr uint32_t Op = 0b1001'0000 << 24; DataProcessing_PCRel_Imm(Op, rd, Imm); } - void adrp(FEXCore::ARMEmitter::Register rd, ForwardLabel *Label) { - Label->Insts.emplace_back(ForwardLabel::Instructions{ .Location = GetCursorAddress(), .Type = ForwardLabel::Instructions::InstType::ADRP }); + template + requires (std::is_same_v || std::is_same_v) + void adrp(FEXCore::ARMEmitter::Register rd, LabelType *Label) { + AddLocationToLabel(Label, SingleUseForwardLabel{ .Location = GetCursorAddress(), .Type = SingleUseForwardLabel::InstType::ADRP }); constexpr uint32_t Op = 0b1001'0000 << 24; DataProcessing_PCRel_Imm(Op, rd, 0); } @@ -105,7 +109,7 @@ public: } } void LongAddressGen(FEXCore::ARMEmitter::Register rd, ForwardLabel* Label) { - Label->Insts.emplace_back(ForwardLabel::Instructions{ .Location = GetCursorAddress(), .Type = ForwardLabel::Instructions::InstType::LONG_ADDRESS_GEN }); + Label->Insts.emplace_back(SingleUseForwardLabel{ .Location = GetCursorAddress(), .Type = SingleUseForwardLabel::InstType::LONG_ADDRESS_GEN }); // Emit a register index and a nop. These will be backpatched. dc32(rd.Idx()); nop(); diff --git a/FEXCore/Source/Interface/Core/ArchHelpers/CodeEmitter/BranchOps.inl b/FEXCore/Source/Interface/Core/ArchHelpers/CodeEmitter/BranchOps.inl index 0b417cac2f..abe01ddfab 100644 --- a/FEXCore/Source/Interface/Core/ArchHelpers/CodeEmitter/BranchOps.inl +++ b/FEXCore/Source/Interface/Core/ArchHelpers/CodeEmitter/BranchOps.inl @@ -18,8 +18,10 @@ public: constexpr uint32_t Op = 0b0101'010 << 25; Branch_Conditional(Op, 0, 0, Cond, Imm >> 2); } - void b(FEXCore::ARMEmitter::Condition Cond, ForwardLabel *Label) { - Label->Insts.emplace_back(ForwardLabel::Instructions{ .Location = GetCursorAddress(), .Type = ForwardLabel::Instructions::InstType::BC }); + template + requires (std::is_same_v || std::is_same_v) + void b(FEXCore::ARMEmitter::Condition Cond, LabelType *Label) { + AddLocationToLabel(Label, SingleUseForwardLabel{ .Location = GetCursorAddress(), .Type = SingleUseForwardLabel::InstType::BC }); constexpr uint32_t Op = 0b0101'010 << 25; Branch_Conditional(Op, 0, 0, Cond, 0); } @@ -45,8 +47,10 @@ public: Branch_Conditional(Op, 0, 1, Cond, Imm >> 2); } - void bc(FEXCore::ARMEmitter::Condition Cond, ForwardLabel *Label) { - Label->Insts.emplace_back(ForwardLabel::Instructions{ .Location = GetCursorAddress(), .Type = ForwardLabel::Instructions::InstType::BC }); + template + requires (std::is_same_v || std::is_same_v) + void bc(FEXCore::ARMEmitter::Condition Cond, LabelType *Label) { + AddLocationToLabel(Label, SingleUseForwardLabel{ .Location = GetCursorAddress(), .Type = SingleUseForwardLabel::InstType::BC }); constexpr uint32_t Op = 0b0101'010 << 25; Branch_Conditional(Op, 0, 1, Cond, 0); } @@ -102,8 +106,10 @@ public: UnconditionalBranch(Op, Imm >> 2); } - void b(ForwardLabel *Label) { - Label->Insts.emplace_back(ForwardLabel::Instructions{ .Location = GetCursorAddress(), .Type = ForwardLabel::Instructions::InstType::B }); + template + requires (std::is_same_v || std::is_same_v) + void b(LabelType *Label) { + AddLocationToLabel(Label, SingleUseForwardLabel{ .Location = GetCursorAddress(), .Type = SingleUseForwardLabel::InstType::B }); constexpr uint32_t Op = 0b0001'01 << 26; UnconditionalBranch(Op, 0); @@ -131,8 +137,10 @@ public: UnconditionalBranch(Op, Imm >> 2); } - void bl(ForwardLabel *Label) { - Label->Insts.emplace_back(ForwardLabel::Instructions{ .Location = GetCursorAddress(), .Type = ForwardLabel::Instructions::InstType::B }); + template + requires (std::is_same_v || std::is_same_v) + void bl(LabelType *Label) { + AddLocationToLabel(Label, SingleUseForwardLabel{ .Location = GetCursorAddress(), .Type = SingleUseForwardLabel::InstType::B }); constexpr uint32_t Op = 0b1001'01 << 26; UnconditionalBranch(Op, 0); @@ -163,8 +171,10 @@ public: CompareAndBranch(Op, s, rt, Imm >> 2); } - void cbz(FEXCore::ARMEmitter::Size s, FEXCore::ARMEmitter::Register rt, ForwardLabel *Label) { - Label->Insts.emplace_back(ForwardLabel::Instructions{ .Location = GetCursorAddress(), .Type = ForwardLabel::Instructions::InstType::BC }); + template + requires (std::is_same_v || std::is_same_v) + void cbz(FEXCore::ARMEmitter::Size s, FEXCore::ARMEmitter::Register rt, LabelType *Label) { + AddLocationToLabel(Label, SingleUseForwardLabel{ .Location = GetCursorAddress(), .Type = SingleUseForwardLabel::InstType::BC }); constexpr uint32_t Op = 0b0011'0100 << 24; @@ -195,8 +205,10 @@ public: CompareAndBranch(Op, s, rt, Imm >> 2); } - void cbnz(FEXCore::ARMEmitter::Size s, FEXCore::ARMEmitter::Register rt, ForwardLabel *Label) { - Label->Insts.emplace_back(ForwardLabel::Instructions{ .Location = GetCursorAddress(), .Type = ForwardLabel::Instructions::InstType::BC }); + template + requires (std::is_same_v || std::is_same_v) + void cbnz(FEXCore::ARMEmitter::Size s, FEXCore::ARMEmitter::Register rt, LabelType *Label) { + AddLocationToLabel(Label, SingleUseForwardLabel{ .Location = GetCursorAddress(), .Type = SingleUseForwardLabel::InstType::BC }); constexpr uint32_t Op = 0b0011'0101 << 24; @@ -226,8 +238,11 @@ public: TestAndBranch(Op, rt, Bit, Imm >> 2); } - void tbz(FEXCore::ARMEmitter::Register rt, uint32_t Bit, ForwardLabel *Label) { - Label->Insts.emplace_back(ForwardLabel::Instructions{ .Location = GetCursorAddress(), .Type = ForwardLabel::Instructions::InstType::TEST_BRANCH }); + + template + requires (std::is_same_v || std::is_same_v) + void tbz(FEXCore::ARMEmitter::Register rt, uint32_t Bit, LabelType *Label) { + AddLocationToLabel(Label, SingleUseForwardLabel{ .Location = GetCursorAddress(), .Type = SingleUseForwardLabel::InstType::TEST_BRANCH }); constexpr uint32_t Op = 0b0011'0110 << 24; @@ -256,8 +271,11 @@ public: TestAndBranch(Op, rt, Bit, Imm >> 2); } - void tbnz(FEXCore::ARMEmitter::Register rt, uint32_t Bit, ForwardLabel *Label) { - Label->Insts.emplace_back(ForwardLabel::Instructions{ .Location = GetCursorAddress(), .Type = ForwardLabel::Instructions::InstType::TEST_BRANCH }); + + template + requires (std::is_same_v || std::is_same_v) + void tbnz(FEXCore::ARMEmitter::Register rt, uint32_t Bit, LabelType *Label) { + AddLocationToLabel(Label, SingleUseForwardLabel{ .Location = GetCursorAddress(), .Type = SingleUseForwardLabel::InstType::TEST_BRANCH }); constexpr uint32_t Op = 0b0011'0111 << 24; TestAndBranch(Op, rt, Bit, 0); diff --git a/FEXCore/Source/Interface/Core/ArchHelpers/CodeEmitter/Emitter.h b/FEXCore/Source/Interface/Core/ArchHelpers/CodeEmitter/Emitter.h index df4ccfea47..babb1801f4 100644 --- a/FEXCore/Source/Interface/Core/ArchHelpers/CodeEmitter/Emitter.h +++ b/FEXCore/Source/Interface/Core/ArchHelpers/CodeEmitter/Emitter.h @@ -538,27 +538,29 @@ namespace FEXCore::ARMEmitter { uint8_t *Location{}; }; - /* This `ForwardLabel` struct used for retaining a location for PC-Relative instructions. + /* This `SingleUseForwardLabel` struct used for retaining a location for PC-Relative instructions. * This is specifically a label for a target that is logically `above` an instruction that uses it. * Which means that a branch would jump forwards. * - * This can be bound to multiple instructions, so it needs a vector for each bind instruction type. + * The `ForwardLabel` struct can be bound to multiple instructions, so it needs a vector for each bind instruction type. */ - struct ForwardLabel { - struct Instructions { - enum class InstType { - ADR, - ADRP, - B, - BC, - TEST_BRANCH, - RELATIVE_LOAD, - LONG_ADDRESS_GEN, - }; - uint8_t *Location{}; - InstType Type; + struct SingleUseForwardLabel { + enum class InstType { + UNKNOWN, + ADR, + ADRP, + B, + BC, + TEST_BRANCH, + RELATIVE_LOAD, + LONG_ADDRESS_GEN, }; - fextl::vector Insts{}; + uint8_t *Location{}; + InstType Type = InstType::UNKNOWN; + }; + + struct ForwardLabel { + fextl::vector Insts{}; }; /* This `BiDirectionalLabel` struct used for retaining a location for PC-Relative instructions. @@ -570,6 +572,15 @@ namespace FEXCore::ARMEmitter { ForwardLabel Forward; }; + static inline void AddLocationToLabel(SingleUseForwardLabel *Label, SingleUseForwardLabel&& Location) { + LOGMAN_THROW_A_FMT(Label->Type == SingleUseForwardLabel::InstType::UNKNOWN, "Trying to bind a SingleUseForwardLabel to multiple locations. Use ForwardLabel instead."); + *Label = std::move(Location); + } + + static inline void AddLocationToLabel(ForwardLabel *Label, SingleUseForwardLabel&& Location) { + Label->Insts.emplace_back(std::move(Location)); + } + // Some FCMA ASIMD instructions support a rotation argument. enum class Rotation : uint32_t { ROTATE_0 = 0b00, @@ -629,127 +640,131 @@ namespace FEXCore::ARMEmitter { Label->Location = GetCursorAddress(); } - // Bind a forward label to a location. - // This walks all the instructions in the label's vector. - // Then backpatching all instructions that have used the label. - template - void Bind(ForwardLabel *Label) { - if constexpr (WarnAboutEmpty) { - LOGMAN_THROW_A_FMT(Label->Insts.empty() == false, "Binding forward label that didn't have any instructions using it"); - } + void Bind(const SingleUseForwardLabel *Label) { uint8_t *CurrentAddress = GetCursorAddress(); - for (const auto &Inst : Label->Insts) { - // Patch up the instructions - switch (Inst.Type) { - case ForwardLabel::Instructions::InstType::ADR: { - uint32_t *Instruction = reinterpret_cast(Inst.Location); - int64_t Imm = reinterpret_cast(CurrentAddress) - reinterpret_cast(Instruction); - LOGMAN_THROW_A_FMT(IsADRRange(Imm), "Unscaled offset too large"); - uint32_t InstMask = 0b11 << 29 | 0b1111'1111'1111'1111'111 << 5; - uint32_t Offset = static_cast(Imm) & 0x3F'FFFF; - uint32_t Inst = *Instruction & ~InstMask; - Inst |= (Offset & 0b11) << 29; - Inst |= (Offset >> 2) << 5; - *Instruction = Inst; - break; - } - case ForwardLabel::Instructions::InstType::ADRP: { - uint32_t *Instruction = reinterpret_cast(Inst.Location); - int64_t Imm = reinterpret_cast(CurrentAddress) - reinterpret_cast(Instruction); - LOGMAN_THROW_A_FMT(IsADRPRange(Imm) && IsADRPAligned(Imm), "Unscaled offset too large"); - Imm >>= 12; - uint32_t InstMask = 0b11 << 29 | 0b1111'1111'1111'1111'111 << 5; - uint32_t Offset = static_cast(Imm) & 0x3F'FFFF; - uint32_t Inst = *Instruction & ~InstMask; - Inst |= (Offset & 0b11) << 29; - Inst |= (Offset >> 2) << 5; - *Instruction = Inst; - break; - } + // Patch up the instructions + switch (Label->Type) { + case SingleUseForwardLabel::InstType::ADR: { + uint32_t *Instruction = reinterpret_cast(Label->Location); + int64_t Imm = reinterpret_cast(CurrentAddress) - reinterpret_cast(Instruction); + LOGMAN_THROW_A_FMT(IsADRRange(Imm), "Unscaled offset too large"); + uint32_t InstMask = 0b11 << 29 | 0b1111'1111'1111'1111'111 << 5; + uint32_t Offset = static_cast(Imm) & 0x3F'FFFF; + uint32_t Inst = *Instruction & ~InstMask; + Inst |= (Offset & 0b11) << 29; + Inst |= (Offset >> 2) << 5; + *Instruction = Inst; + break; + } + case SingleUseForwardLabel::InstType::ADRP: { + uint32_t *Instruction = reinterpret_cast(Label->Location); + int64_t Imm = reinterpret_cast(CurrentAddress) - reinterpret_cast(Instruction); + LOGMAN_THROW_A_FMT(IsADRPRange(Imm) && IsADRPAligned(Imm), "Unscaled offset too large"); + Imm >>= 12; + uint32_t InstMask = 0b11 << 29 | 0b1111'1111'1111'1111'111 << 5; + uint32_t Offset = static_cast(Imm) & 0x3F'FFFF; + uint32_t Inst = *Instruction & ~InstMask; + Inst |= (Offset & 0b11) << 29; + Inst |= (Offset >> 2) << 5; + *Instruction = Inst; + break; + } - case ForwardLabel::Instructions::InstType::B: { - uint32_t *Instruction = reinterpret_cast(Inst.Location); - int64_t Imm = reinterpret_cast(CurrentAddress) - reinterpret_cast(Instruction); - LOGMAN_THROW_A_FMT(Imm >= -134217728 && Imm <= 134217724 && ((Imm & 0b11) == 0), "Unscaled offset too large"); - Imm >>= 2; - uint32_t InstMask = 0x3FF'FFFF; - uint32_t Offset = static_cast(Imm) & InstMask; - uint32_t Inst = *Instruction & ~InstMask; - Inst |= Offset; - *Instruction = Inst; - - break; - } + case SingleUseForwardLabel::InstType::B: { + uint32_t *Instruction = reinterpret_cast(Label->Location); + int64_t Imm = reinterpret_cast(CurrentAddress) - reinterpret_cast(Instruction); + LOGMAN_THROW_A_FMT(Imm >= -134217728 && Imm <= 134217724 && ((Imm & 0b11) == 0), "Unscaled offset too large"); + Imm >>= 2; + uint32_t InstMask = 0x3FF'FFFF; + uint32_t Offset = static_cast(Imm) & InstMask; + uint32_t Inst = *Instruction & ~InstMask; + Inst |= Offset; + *Instruction = Inst; + + break; + } - case ForwardLabel::Instructions::InstType::TEST_BRANCH: { - uint32_t *Instruction = reinterpret_cast(Inst.Location); - int64_t Imm = reinterpret_cast(CurrentAddress) - reinterpret_cast(Instruction); - LOGMAN_THROW_A_FMT(Imm >= -32768 && Imm <= 32764 && ((Imm & 0b11) == 0), "Unscaled offset too large"); - Imm >>= 2; - uint32_t InstMask = 0x3FFF; - uint32_t Offset = static_cast(Imm) & InstMask; - uint32_t Inst = *Instruction & ~(InstMask << 5); - Inst |= Offset << 5; - *Instruction = Inst; - - break; - } - case ForwardLabel::Instructions::InstType::BC: - case ForwardLabel::Instructions::InstType::RELATIVE_LOAD: { - uint32_t *Instruction = reinterpret_cast(Inst.Location); - int64_t Imm = reinterpret_cast(CurrentAddress) - reinterpret_cast(Instruction); - LOGMAN_THROW_A_FMT(Imm >= -1048576 && Imm <= 1048575 && ((Imm & 0b11) == 0), "Unscaled offset too large"); - Imm >>= 2; - uint32_t InstMask = 0x7'FFFF; - uint32_t Offset = static_cast(Imm) & InstMask; - uint32_t Inst = *Instruction & ~(InstMask << 5); - Inst |= Offset << 5; - *Instruction = Inst; - break; + case SingleUseForwardLabel::InstType::TEST_BRANCH: { + uint32_t *Instruction = reinterpret_cast(Label->Location); + int64_t Imm = reinterpret_cast(CurrentAddress) - reinterpret_cast(Instruction); + LOGMAN_THROW_A_FMT(Imm >= -32768 && Imm <= 32764 && ((Imm & 0b11) == 0), "Unscaled offset too large"); + Imm >>= 2; + uint32_t InstMask = 0x3FFF; + uint32_t Offset = static_cast(Imm) & InstMask; + uint32_t Inst = *Instruction & ~(InstMask << 5); + Inst |= Offset << 5; + *Instruction = Inst; + + break; + } + case SingleUseForwardLabel::InstType::BC: + case SingleUseForwardLabel::InstType::RELATIVE_LOAD: { + uint32_t *Instruction = reinterpret_cast(Label->Location); + int64_t Imm = reinterpret_cast(CurrentAddress) - reinterpret_cast(Instruction); + LOGMAN_THROW_A_FMT(Imm >= -1048576 && Imm <= 1048575 && ((Imm & 0b11) == 0), "Unscaled offset too large"); + Imm >>= 2; + uint32_t InstMask = 0x7'FFFF; + uint32_t Offset = static_cast(Imm) & InstMask; + uint32_t Inst = *Instruction & ~(InstMask << 5); + Inst |= Offset << 5; + *Instruction = Inst; + break; + } + case SingleUseForwardLabel::InstType::LONG_ADDRESS_GEN: { + uint32_t *Instructions = reinterpret_cast(Label->Location); + int64_t ImmInstOne = reinterpret_cast(CurrentAddress) - reinterpret_cast(&Instructions[0]); + int64_t ImmInstTwo = reinterpret_cast(CurrentAddress) - reinterpret_cast(&Instructions[1]); + auto OriginalOffset = GetCursorOffset(); + + auto InstOffset = GetCursorOffsetFromAddress(Instructions); + SetCursorOffset(InstOffset); + + // We encoded the destination register in to the first instruction space. + // Read it back. + ARMEmitter::Register DestReg(Instructions[0]); + + if (IsADRRange(ImmInstTwo)) { + // If within ADR range from the second instruction, then we can emit NOP+ADR + nop(); + adr(DestReg, static_cast(ImmInstTwo) & 0x7FFF); } - case ForwardLabel::Instructions::InstType::LONG_ADDRESS_GEN: { - uint32_t *Instructions = reinterpret_cast(Inst.Location); - int64_t ImmInstOne = reinterpret_cast(CurrentAddress) - reinterpret_cast(&Instructions[0]); - int64_t ImmInstTwo = reinterpret_cast(CurrentAddress) - reinterpret_cast(&Instructions[1]); - auto OriginalOffset = GetCursorOffset(); - - auto InstOffset = GetCursorOffsetFromAddress(Instructions); - SetCursorOffset(InstOffset); + else if (IsADRPRange(ImmInstOne)) { - // We encoded the destination register in to the first instruction space. - // Read it back. - ARMEmitter::Register DestReg(Instructions[0]); - - if (IsADRRange(ImmInstTwo)) { - // If within ADR range from the second instruction, then we can emit NOP+ADR + // If within ADRP range from the first instruction, then we are /definitely/ in range for the second instruction. + // First check if we are in non-offset range for second instruction. + if (IsADRPAligned(reinterpret_cast(CurrentAddress))) { + // We can emit nop + adrp nop(); - adr(DestReg, static_cast(ImmInstTwo) & 0x7FFF); - } - else if (IsADRPRange(ImmInstOne)) { - - // If within ADRP range from the first instruction, then we are /definitely/ in range for the second instruction. - // First check if we are in non-offset range for second instruction. - if (IsADRPAligned(reinterpret_cast(CurrentAddress))) { - // We can emit nop + adrp - nop(); - adrp(DestReg, static_cast(ImmInstTwo >> 12) & 0x7FFF); - } - else { - // Not aligned, need adrp + add - adrp(DestReg, static_cast(ImmInstOne >> 12) & 0x7FFF); - add(ARMEmitter::Size::i64Bit, DestReg, DestReg, ImmInstOne & 0xFFF); - } + adrp(DestReg, static_cast(ImmInstTwo >> 12) & 0x7FFF); } else { - LOGMAN_MSG_A_FMT("Unscaled offset is too large"); - FEX_UNREACHABLE; + // Not aligned, need adrp + add + adrp(DestReg, static_cast(ImmInstOne >> 12) & 0x7FFF); + add(ARMEmitter::Size::i64Bit, DestReg, DestReg, ImmInstOne & 0xFFF); } - - SetCursorOffset(OriginalOffset); - break; } - default: LOGMAN_MSG_A_FMT("Unexpected inst type in label fixup"); + else { + LOGMAN_MSG_A_FMT("Unscaled offset is too large"); + FEX_UNREACHABLE; + } + + SetCursorOffset(OriginalOffset); + break; } + default: LOGMAN_MSG_A_FMT("Unexpected inst type in label fixup"); + } + } + + // Bind a forward label to a location. + // This walks all the instructions in the label's vector. + // Then backpatching all instructions that have used the label. + template + void Bind(ForwardLabel *Label) { + if constexpr (WarnAboutEmpty) { + LOGMAN_THROW_A_FMT(Label->Insts.empty() == false, "Binding forward label that didn't have any instructions using it"); + } + for (auto &Inst : Label->Insts) { + Bind(&Inst); } } diff --git a/FEXCore/Source/Interface/Core/ArchHelpers/CodeEmitter/LoadstoreOps.inl b/FEXCore/Source/Interface/Core/ArchHelpers/CodeEmitter/LoadstoreOps.inl index c6e3134289..67a770e58c 100644 --- a/FEXCore/Source/Interface/Core/ArchHelpers/CodeEmitter/LoadstoreOps.inl +++ b/FEXCore/Source/Interface/Core/ArchHelpers/CodeEmitter/LoadstoreOps.inl @@ -2121,38 +2121,58 @@ public: LoadStoreLiteral(Op, prfop, static_cast(Imm >> 2) & 0x7'FFFF); } - void ldr(FEXCore::ARMEmitter::WRegister rt, ForwardLabel *Label) { - Label->Insts.emplace_back(ForwardLabel::Instructions{ .Location = GetCursorAddress(), .Type = ForwardLabel::Instructions::InstType::RELATIVE_LOAD }); + template + requires (std::is_same_v || std::is_same_v) + void ldr(FEXCore::ARMEmitter::WRegister rt, LabelType *Label) { + AddLocationToLabel(Label, SingleUseForwardLabel{ .Location = GetCursorAddress(), .Type = SingleUseForwardLabel::InstType::RELATIVE_LOAD }); constexpr uint32_t Op = 0b0001'1000 << 24; LoadStoreLiteral(Op, rt, 0); } - void ldr(FEXCore::ARMEmitter::SRegister rt, ForwardLabel *Label) { - Label->Insts.emplace_back(ForwardLabel::Instructions{ .Location = GetCursorAddress(), .Type = ForwardLabel::Instructions::InstType::RELATIVE_LOAD }); + + template + requires (std::is_same_v || std::is_same_v) + void ldr(FEXCore::ARMEmitter::SRegister rt, LabelType *Label) { + AddLocationToLabel(Label, SingleUseForwardLabel{ .Location = GetCursorAddress(), .Type = SingleUseForwardLabel::InstType::RELATIVE_LOAD }); constexpr uint32_t Op = 0b0001'1100 << 24; LoadStoreLiteral(Op, rt, 0); } - void ldr(FEXCore::ARMEmitter::XRegister rt, ForwardLabel *Label) { - Label->Insts.emplace_back(ForwardLabel::Instructions{ .Location = GetCursorAddress(), .Type = ForwardLabel::Instructions::InstType::RELATIVE_LOAD }); + + template + requires (std::is_same_v || std::is_same_v) + void ldr(FEXCore::ARMEmitter::XRegister rt, LabelType *Label) { + AddLocationToLabel(Label, SingleUseForwardLabel{ .Location = GetCursorAddress(), .Type = SingleUseForwardLabel::InstType::RELATIVE_LOAD }); constexpr uint32_t Op = 0b0101'1000 << 24; LoadStoreLiteral(Op, rt, 0); } - void ldr(FEXCore::ARMEmitter::DRegister rt, ForwardLabel *Label) { - Label->Insts.emplace_back(ForwardLabel::Instructions{ .Location = GetCursorAddress(), .Type = ForwardLabel::Instructions::InstType::RELATIVE_LOAD }); + + template + requires (std::is_same_v || std::is_same_v) + void ldr(FEXCore::ARMEmitter::DRegister rt, LabelType *Label) { + AddLocationToLabel(Label, SingleUseForwardLabel{ .Location = GetCursorAddress(), .Type = SingleUseForwardLabel::InstType::RELATIVE_LOAD }); constexpr uint32_t Op = 0b0101'1100 << 24; LoadStoreLiteral(Op, rt, 0); } - void ldrsw(FEXCore::ARMEmitter::XRegister rt, ForwardLabel *Label) { - Label->Insts.emplace_back(ForwardLabel::Instructions{ .Location = GetCursorAddress(), .Type = ForwardLabel::Instructions::InstType::RELATIVE_LOAD }); + + template + requires (std::is_same_v || std::is_same_v) + void ldrsw(FEXCore::ARMEmitter::XRegister rt, LabelType *Label) { + AddLocationToLabel(Label, SingleUseForwardLabel{ .Location = GetCursorAddress(), .Type = SingleUseForwardLabel::InstType::RELATIVE_LOAD }); constexpr uint32_t Op = 0b1001'1000 << 24; LoadStoreLiteral(Op, rt, 0); } - void ldr(FEXCore::ARMEmitter::QRegister rt, ForwardLabel *Label) { - Label->Insts.emplace_back(ForwardLabel::Instructions{ .Location = GetCursorAddress(), .Type = ForwardLabel::Instructions::InstType::RELATIVE_LOAD }); + + template + requires (std::is_same_v || std::is_same_v) + void ldr(FEXCore::ARMEmitter::QRegister rt, LabelType *Label) { + AddLocationToLabel(Label, SingleUseForwardLabel{ .Location = GetCursorAddress(), .Type = SingleUseForwardLabel::InstType::RELATIVE_LOAD }); constexpr uint32_t Op = 0b1001'1100 << 24; LoadStoreLiteral(Op, rt, 0); } - void prfm(FEXCore::ARMEmitter::Prefetch prfop, ForwardLabel *Label) { - Label->Insts.emplace_back(ForwardLabel::Instructions{ .Location = GetCursorAddress(), .Type = ForwardLabel::Instructions::InstType::RELATIVE_LOAD }); + + template + requires (std::is_same_v || std::is_same_v) + void prfm(FEXCore::ARMEmitter::Prefetch prfop, LabelType *Label) { + AddLocationToLabel(Label, SingleUseForwardLabel{ .Location = GetCursorAddress(), .Type = SingleUseForwardLabel::InstType::RELATIVE_LOAD }); constexpr uint32_t Op = 0b1101'1000 << 24; LoadStoreLiteral(Op, prfop, 0); } From 37102400b5f306e52f1abd24b750c18315c21046 Mon Sep 17 00:00:00 2001 From: Ryan Houdek Date: Mon, 8 Jan 2024 22:03:46 -0800 Subject: [PATCH 3/3] Arm64: Switches uses of forward label over to SingleUse if possible Primary goal for this is to ensure that the delinker doesn't need to allocate any memory. This delinker can end up getting hit heavily with JIT code so we don't want it to be allocating memory. --- .../Interface/Core/Dispatcher/Dispatcher.cpp | 4 ++-- .../Interface/Core/JIT/Arm64/ALUOps.cpp | 24 +++++++++---------- .../Interface/Core/JIT/Arm64/AtomicOps.cpp | 8 +++---- .../Interface/Core/JIT/Arm64/BranchOps.cpp | 7 ++---- .../Source/Interface/Core/JIT/Arm64/JIT.cpp | 2 +- .../Interface/Core/JIT/Arm64/MemoryOps.cpp | 20 ++++++++-------- 6 files changed, 31 insertions(+), 34 deletions(-) diff --git a/FEXCore/Source/Interface/Core/Dispatcher/Dispatcher.cpp b/FEXCore/Source/Interface/Core/Dispatcher/Dispatcher.cpp index 0b7ed0468c..c720a3d6d3 100644 --- a/FEXCore/Source/Interface/Core/Dispatcher/Dispatcher.cpp +++ b/FEXCore/Source/Interface/Core/Dispatcher/Dispatcher.cpp @@ -73,8 +73,8 @@ void Dispatcher::EmitDispatcher() { // } ARMEmitter::ForwardLabel l_CTX; - ARMEmitter::ForwardLabel l_Sleep; - ARMEmitter::ForwardLabel l_CompileBlock; + ARMEmitter::SingleUseForwardLabel l_Sleep; + ARMEmitter::SingleUseForwardLabel l_CompileBlock; // Push all the register we need to save PushCalleeSavedRegisters(); diff --git a/FEXCore/Source/Interface/Core/JIT/Arm64/ALUOps.cpp b/FEXCore/Source/Interface/Core/JIT/Arm64/ALUOps.cpp index 25cd588d0e..30dbd410da 100644 --- a/FEXCore/Source/Interface/Core/JIT/Arm64/ALUOps.cpp +++ b/FEXCore/Source/Interface/Core/JIT/Arm64/ALUOps.cpp @@ -731,9 +731,9 @@ DEF_OP(PDep) { 1U << MaskReg.Idx() | 1U << DestReg.Idx(); - ARMEmitter::ForwardLabel EarlyExit; + ARMEmitter::SingleUseForwardLabel EarlyExit; ARMEmitter::BackwardLabel NextBit; - ARMEmitter::ForwardLabel Done; + ARMEmitter::SingleUseForwardLabel Done; cbz(EmitSize, Mask, &EarlyExit); mov(EmitSize, IndexReg, ZeroReg); @@ -792,9 +792,9 @@ DEF_OP(PExt) { const auto BitReg = TMP2; const auto ValueReg = TMP3; - ARMEmitter::ForwardLabel EarlyExit; + ARMEmitter::SingleUseForwardLabel EarlyExit; ARMEmitter::BackwardLabel NextBit; - ARMEmitter::ForwardLabel Done; + ARMEmitter::SingleUseForwardLabel Done; cbz(EmitSize, Mask, &EarlyExit); mov(EmitSize, MaskReg, Mask); @@ -848,8 +848,8 @@ DEF_OP(LDiv) { break; } case 8: { - ARMEmitter::ForwardLabel Only64Bit{}; - ARMEmitter::ForwardLabel LongDIVRet{}; + ARMEmitter::SingleUseForwardLabel Only64Bit{}; + ARMEmitter::SingleUseForwardLabel LongDIVRet{}; // Check if the upper bits match the top bit of the lower 64-bits // Sign extend the top bit of lower bits @@ -920,8 +920,8 @@ DEF_OP(LUDiv) { break; } case 8: { - ARMEmitter::ForwardLabel Only64Bit{}; - ARMEmitter::ForwardLabel LongDIVRet{}; + ARMEmitter::SingleUseForwardLabel Only64Bit{}; + ARMEmitter::SingleUseForwardLabel LongDIVRet{}; // Check the upper bits for zero // If the upper bits are zero then we can do a 64-bit divide @@ -992,8 +992,8 @@ DEF_OP(LRem) { break; } case 8: { - ARMEmitter::ForwardLabel Only64Bit{}; - ARMEmitter::ForwardLabel LongDIVRet{}; + ARMEmitter::SingleUseForwardLabel Only64Bit{}; + ARMEmitter::SingleUseForwardLabel LongDIVRet{}; // Check if the upper bits match the top bit of the lower 64-bits // Sign extend the top bit of lower bits @@ -1066,8 +1066,8 @@ DEF_OP(LURem) { break; } case 8: { - ARMEmitter::ForwardLabel Only64Bit{}; - ARMEmitter::ForwardLabel LongDIVRet{}; + ARMEmitter::SingleUseForwardLabel Only64Bit{}; + ARMEmitter::SingleUseForwardLabel LongDIVRet{}; // Check the upper bits for zero // If the upper bits are zero then we can do a 64-bit divide diff --git a/FEXCore/Source/Interface/Core/JIT/Arm64/AtomicOps.cpp b/FEXCore/Source/Interface/Core/JIT/Arm64/AtomicOps.cpp index 0c0dc78dd4..39b67f8f56 100644 --- a/FEXCore/Source/Interface/Core/JIT/Arm64/AtomicOps.cpp +++ b/FEXCore/Source/Interface/Core/JIT/Arm64/AtomicOps.cpp @@ -32,8 +32,8 @@ DEF_OP(CASPair) { } else { ARMEmitter::BackwardLabel LoopTop; - ARMEmitter::ForwardLabel LoopNotExpected; - ARMEmitter::ForwardLabel LoopExpected; + ARMEmitter::SingleUseForwardLabel LoopNotExpected; + ARMEmitter::SingleUseForwardLabel LoopExpected; Bind(&LoopTop); ldaxp(EmitSize, TMP2, TMP3, MemSrc); @@ -82,8 +82,8 @@ DEF_OP(CAS) { } else { ARMEmitter::BackwardLabel LoopTop; - ARMEmitter::ForwardLabel LoopNotExpected; - ARMEmitter::ForwardLabel LoopExpected; + ARMEmitter::SingleUseForwardLabel LoopNotExpected; + ARMEmitter::SingleUseForwardLabel LoopExpected; Bind(&LoopTop); ldaxr(SubEmitSize, TMP2, MemSrc); if (OpSize == 1) { diff --git a/FEXCore/Source/Interface/Core/JIT/Arm64/BranchOps.cpp b/FEXCore/Source/Interface/Core/JIT/Arm64/BranchOps.cpp index 6b8e93520c..9470d6bc7e 100644 --- a/FEXCore/Source/Interface/Core/JIT/Arm64/BranchOps.cpp +++ b/FEXCore/Source/Interface/Core/JIT/Arm64/BranchOps.cpp @@ -53,20 +53,17 @@ DEF_OP(ExitFunction) { uint64_t NewRIP; if (IsInlineConstant(Op->NewRIP, &NewRIP) || IsInlineEntrypointOffset(Op->NewRIP, &NewRIP)) { - ARMEmitter::ForwardLabel l_BranchHost; - ARMEmitter::ForwardLabel l_BranchGuest; + ARMEmitter::SingleUseForwardLabel l_BranchHost; ldr(ARMEmitter::XReg::x0, &l_BranchHost); blr(ARMEmitter::Reg::r0); Bind(&l_BranchHost); dc64(ThreadState->CurrentFrame->Pointers.Common.ExitFunctionLinker); - Bind(&l_BranchGuest); dc64(NewRIP); - } else { - ARMEmitter::ForwardLabel FullLookup; + ARMEmitter::SingleUseForwardLabel FullLookup; auto RipReg = GetReg(Op->NewRIP.ID()); // L1 Cache diff --git a/FEXCore/Source/Interface/Core/JIT/Arm64/JIT.cpp b/FEXCore/Source/Interface/Core/JIT/Arm64/JIT.cpp index 2fd3af18d1..414c5a254c 100644 --- a/FEXCore/Source/Interface/Core/JIT/Arm64/JIT.cpp +++ b/FEXCore/Source/Interface/Core/JIT/Arm64/JIT.cpp @@ -485,7 +485,7 @@ static void DirectBlockDelinker(FEXCore::Core::CpuStateFrame *Frame, FEXCore::Co auto LinkerAddress = Frame->Pointers.Common.ExitFunctionLinker; uintptr_t branch = (uintptr_t)(Record) - 8; FEXCore::ARMEmitter::Emitter emit((uint8_t*)(branch), 8); - FEXCore::ARMEmitter::ForwardLabel l_BranchHost; + FEXCore::ARMEmitter::SingleUseForwardLabel l_BranchHost; emit.ldr(FEXCore::ARMEmitter::XReg::x0, &l_BranchHost); emit.blr(FEXCore::ARMEmitter::Reg::r0); emit.Bind(&l_BranchHost); diff --git a/FEXCore/Source/Interface/Core/JIT/Arm64/MemoryOps.cpp b/FEXCore/Source/Interface/Core/JIT/Arm64/MemoryOps.cpp index b85e14c82e..40472ac20a 100644 --- a/FEXCore/Source/Interface/Core/JIT/Arm64/MemoryOps.cpp +++ b/FEXCore/Source/Interface/Core/JIT/Arm64/MemoryOps.cpp @@ -175,7 +175,7 @@ DEF_OP(LoadRegister) { if (HostSupportsSVE256) { const auto regOffs = Op->Offset & 31; - ARMEmitter::ForwardLabel DataLocation; + ARMEmitter::SingleUseForwardLabel DataLocation; const auto LoadPredicate = [this, &DataLocation] { const auto Predicate = ARMEmitter::PReg::p0; adr(TMP1, &DataLocation); @@ -184,7 +184,7 @@ DEF_OP(LoadRegister) { }; const auto EmitData = [this, &DataLocation](uint32_t Value) { - ARMEmitter::ForwardLabel PastConstant; + ARMEmitter::SingleUseForwardLabel PastConstant; b(&PastConstant); Bind(&DataLocation); dc32(Value); @@ -364,7 +364,7 @@ DEF_OP(StoreRegister) { const auto regOffs = Op->Offset & 31; // Compartmentalized setting up of the predicate for the cases that need it. - ARMEmitter::ForwardLabel DataLocation; + ARMEmitter::SingleUseForwardLabel DataLocation; const auto LoadPredicate = [this, &DataLocation] { const auto Predicate = ARMEmitter::PReg::p0; adr(TMP1, &DataLocation); @@ -377,7 +377,7 @@ DEF_OP(StoreRegister) { // It's helpful to treat LoadPredicate and EmitData as a prologue and epilogue // respectfully. const auto EmitData = [this, &DataLocation](uint32_t Data) { - ARMEmitter::ForwardLabel PastConstant; + ARMEmitter::SingleUseForwardLabel PastConstant; b(&PastConstant); Bind(&DataLocation); dc32(Data); @@ -1715,8 +1715,8 @@ DEF_OP(MemSet) { // // Counter is decremented regardless. - ARMEmitter::ForwardLabel BackwardImpl{}; - ARMEmitter::ForwardLabel Done{}; + ARMEmitter::SingleUseForwardLabel BackwardImpl{}; + ARMEmitter::SingleUseForwardLabel Done{}; mov(TMP1, Length.X()); if (Op->Prefix.IsInvalid()) { @@ -1789,7 +1789,7 @@ DEF_OP(MemSet) { const int32_t SizeDirection = Size * Direction; ARMEmitter::BackwardLabel AgainInternal{}; - ARMEmitter::ForwardLabel DoneInternal{}; + ARMEmitter::SingleUseForwardLabel DoneInternal{}; // Early exit if zero count. cbz(ARMEmitter::Size::i64Bit, TMP1, &DoneInternal); @@ -1895,8 +1895,8 @@ DEF_OP(MemCpy) { // // Counter is decremented regardless. - ARMEmitter::ForwardLabel BackwardImpl{}; - ARMEmitter::ForwardLabel Done{}; + ARMEmitter::SingleUseForwardLabel BackwardImpl{}; + ARMEmitter::SingleUseForwardLabel Done{}; mov(TMP1, Length.X()); if (Op->PrefixDest.IsInvalid()) { @@ -2050,7 +2050,7 @@ DEF_OP(MemCpy) { const int32_t SizeDirection = Size * Direction; ARMEmitter::BackwardLabel AgainInternal{}; - ARMEmitter::ForwardLabel DoneInternal{}; + ARMEmitter::SingleUseForwardLabel DoneInternal{}; // Early exit if zero count. cbz(ARMEmitter::Size::i64Bit, TMP1, &DoneInternal);