diff --git a/FEXCore/Source/Interface/Context/Context.h b/FEXCore/Source/Interface/Context/Context.h index 09a280f097..9916303a7d 100644 --- a/FEXCore/Source/Interface/Context/Context.h +++ b/FEXCore/Source/Interface/Context/Context.h @@ -68,6 +68,13 @@ namespace FEXCore::Context { MODE_SINGLESTEP = 1, }; + struct ExitFunctionLinkData { + uint64_t HostBranch; + uint64_t GuestRIP; + }; + + using BlockDelinkerFunc = void(*)(FEXCore::Core::CpuStateFrame *Frame, FEXCore::Context::ExitFunctionLinkData *Record); + class ContextImpl final : public FEXCore::Context::Context { public: // Context base class implementation. @@ -274,12 +281,7 @@ namespace FEXCore::Context { void SignalThread(FEXCore::Core::InternalThreadState *Thread, FEXCore::Core::SignalEvent Event); static void ThreadRemoveCodeEntry(FEXCore::Core::InternalThreadState *Thread, uint64_t GuestRIP); - static void ThreadAddBlockLink(FEXCore::Core::InternalThreadState *Thread, uint64_t GuestDestination, uintptr_t HostLink, const std::function &delinker); - - struct ExitFunctionLinkData { - uint64_t HostBranch; - uint64_t GuestRIP; - }; + static void ThreadAddBlockLink(FEXCore::Core::InternalThreadState *Thread, uint64_t GuestDestination, FEXCore::Context::ExitFunctionLinkData *HostLink, const BlockDelinkerFunc &delinker); template static uint64_t ThreadExitFunctionLink(FEXCore::Core::CpuStateFrame *Frame, ExitFunctionLinkData *Record) { diff --git a/FEXCore/Source/Interface/Core/ArchHelpers/CodeEmitter/ALUOps.inl b/FEXCore/Source/Interface/Core/ArchHelpers/CodeEmitter/ALUOps.inl index 3c7ab467c7..9010b013e3 100644 --- a/FEXCore/Source/Interface/Core/ArchHelpers/CodeEmitter/ALUOps.inl +++ b/FEXCore/Source/Interface/Core/ArchHelpers/CodeEmitter/ALUOps.inl @@ -35,8 +35,10 @@ public: constexpr uint32_t Op = 0b0001'0000 << 24; DataProcessing_PCRel_Imm(Op, rd, Imm); } - void adr(FEXCore::ARMEmitter::Register rd, ForwardLabel *Label) { - Label->Insts.emplace_back(ForwardLabel::Instructions{ .Location = GetCursorAddress(), .Type = ForwardLabel::Instructions::InstType::ADR }); + template + requires (std::is_same_v || std::is_same_v) + void adr(FEXCore::ARMEmitter::Register rd, LabelType *Label) { + AddLocationToLabel(Label, SingleUseForwardLabel{ .Location = GetCursorAddress(), .Type = SingleUseForwardLabel::InstType::ADR }); constexpr uint32_t Op = 0b0001'0000 << 24; DataProcessing_PCRel_Imm(Op, rd, 0); } @@ -62,8 +64,10 @@ public: constexpr uint32_t Op = 0b1001'0000 << 24; DataProcessing_PCRel_Imm(Op, rd, Imm); } - void adrp(FEXCore::ARMEmitter::Register rd, ForwardLabel *Label) { - Label->Insts.emplace_back(ForwardLabel::Instructions{ .Location = GetCursorAddress(), .Type = ForwardLabel::Instructions::InstType::ADRP }); + template + requires (std::is_same_v || std::is_same_v) + void adrp(FEXCore::ARMEmitter::Register rd, LabelType *Label) { + AddLocationToLabel(Label, SingleUseForwardLabel{ .Location = GetCursorAddress(), .Type = SingleUseForwardLabel::InstType::ADRP }); constexpr uint32_t Op = 0b1001'0000 << 24; DataProcessing_PCRel_Imm(Op, rd, 0); } @@ -105,7 +109,7 @@ public: } } void LongAddressGen(FEXCore::ARMEmitter::Register rd, ForwardLabel* Label) { - Label->Insts.emplace_back(ForwardLabel::Instructions{ .Location = GetCursorAddress(), .Type = ForwardLabel::Instructions::InstType::LONG_ADDRESS_GEN }); + Label->Insts.emplace_back(SingleUseForwardLabel{ .Location = GetCursorAddress(), .Type = SingleUseForwardLabel::InstType::LONG_ADDRESS_GEN }); // Emit a register index and a nop. These will be backpatched. dc32(rd.Idx()); nop(); diff --git a/FEXCore/Source/Interface/Core/ArchHelpers/CodeEmitter/BranchOps.inl b/FEXCore/Source/Interface/Core/ArchHelpers/CodeEmitter/BranchOps.inl index 0b417cac2f..abe01ddfab 100644 --- a/FEXCore/Source/Interface/Core/ArchHelpers/CodeEmitter/BranchOps.inl +++ b/FEXCore/Source/Interface/Core/ArchHelpers/CodeEmitter/BranchOps.inl @@ -18,8 +18,10 @@ public: constexpr uint32_t Op = 0b0101'010 << 25; Branch_Conditional(Op, 0, 0, Cond, Imm >> 2); } - void b(FEXCore::ARMEmitter::Condition Cond, ForwardLabel *Label) { - Label->Insts.emplace_back(ForwardLabel::Instructions{ .Location = GetCursorAddress(), .Type = ForwardLabel::Instructions::InstType::BC }); + template + requires (std::is_same_v || std::is_same_v) + void b(FEXCore::ARMEmitter::Condition Cond, LabelType *Label) { + AddLocationToLabel(Label, SingleUseForwardLabel{ .Location = GetCursorAddress(), .Type = SingleUseForwardLabel::InstType::BC }); constexpr uint32_t Op = 0b0101'010 << 25; Branch_Conditional(Op, 0, 0, Cond, 0); } @@ -45,8 +47,10 @@ public: Branch_Conditional(Op, 0, 1, Cond, Imm >> 2); } - void bc(FEXCore::ARMEmitter::Condition Cond, ForwardLabel *Label) { - Label->Insts.emplace_back(ForwardLabel::Instructions{ .Location = GetCursorAddress(), .Type = ForwardLabel::Instructions::InstType::BC }); + template + requires (std::is_same_v || std::is_same_v) + void bc(FEXCore::ARMEmitter::Condition Cond, LabelType *Label) { + AddLocationToLabel(Label, SingleUseForwardLabel{ .Location = GetCursorAddress(), .Type = SingleUseForwardLabel::InstType::BC }); constexpr uint32_t Op = 0b0101'010 << 25; Branch_Conditional(Op, 0, 1, Cond, 0); } @@ -102,8 +106,10 @@ public: UnconditionalBranch(Op, Imm >> 2); } - void b(ForwardLabel *Label) { - Label->Insts.emplace_back(ForwardLabel::Instructions{ .Location = GetCursorAddress(), .Type = ForwardLabel::Instructions::InstType::B }); + template + requires (std::is_same_v || std::is_same_v) + void b(LabelType *Label) { + AddLocationToLabel(Label, SingleUseForwardLabel{ .Location = GetCursorAddress(), .Type = SingleUseForwardLabel::InstType::B }); constexpr uint32_t Op = 0b0001'01 << 26; UnconditionalBranch(Op, 0); @@ -131,8 +137,10 @@ public: UnconditionalBranch(Op, Imm >> 2); } - void bl(ForwardLabel *Label) { - Label->Insts.emplace_back(ForwardLabel::Instructions{ .Location = GetCursorAddress(), .Type = ForwardLabel::Instructions::InstType::B }); + template + requires (std::is_same_v || std::is_same_v) + void bl(LabelType *Label) { + AddLocationToLabel(Label, SingleUseForwardLabel{ .Location = GetCursorAddress(), .Type = SingleUseForwardLabel::InstType::B }); constexpr uint32_t Op = 0b1001'01 << 26; UnconditionalBranch(Op, 0); @@ -163,8 +171,10 @@ public: CompareAndBranch(Op, s, rt, Imm >> 2); } - void cbz(FEXCore::ARMEmitter::Size s, FEXCore::ARMEmitter::Register rt, ForwardLabel *Label) { - Label->Insts.emplace_back(ForwardLabel::Instructions{ .Location = GetCursorAddress(), .Type = ForwardLabel::Instructions::InstType::BC }); + template + requires (std::is_same_v || std::is_same_v) + void cbz(FEXCore::ARMEmitter::Size s, FEXCore::ARMEmitter::Register rt, LabelType *Label) { + AddLocationToLabel(Label, SingleUseForwardLabel{ .Location = GetCursorAddress(), .Type = SingleUseForwardLabel::InstType::BC }); constexpr uint32_t Op = 0b0011'0100 << 24; @@ -195,8 +205,10 @@ public: CompareAndBranch(Op, s, rt, Imm >> 2); } - void cbnz(FEXCore::ARMEmitter::Size s, FEXCore::ARMEmitter::Register rt, ForwardLabel *Label) { - Label->Insts.emplace_back(ForwardLabel::Instructions{ .Location = GetCursorAddress(), .Type = ForwardLabel::Instructions::InstType::BC }); + template + requires (std::is_same_v || std::is_same_v) + void cbnz(FEXCore::ARMEmitter::Size s, FEXCore::ARMEmitter::Register rt, LabelType *Label) { + AddLocationToLabel(Label, SingleUseForwardLabel{ .Location = GetCursorAddress(), .Type = SingleUseForwardLabel::InstType::BC }); constexpr uint32_t Op = 0b0011'0101 << 24; @@ -226,8 +238,11 @@ public: TestAndBranch(Op, rt, Bit, Imm >> 2); } - void tbz(FEXCore::ARMEmitter::Register rt, uint32_t Bit, ForwardLabel *Label) { - Label->Insts.emplace_back(ForwardLabel::Instructions{ .Location = GetCursorAddress(), .Type = ForwardLabel::Instructions::InstType::TEST_BRANCH }); + + template + requires (std::is_same_v || std::is_same_v) + void tbz(FEXCore::ARMEmitter::Register rt, uint32_t Bit, LabelType *Label) { + AddLocationToLabel(Label, SingleUseForwardLabel{ .Location = GetCursorAddress(), .Type = SingleUseForwardLabel::InstType::TEST_BRANCH }); constexpr uint32_t Op = 0b0011'0110 << 24; @@ -256,8 +271,11 @@ public: TestAndBranch(Op, rt, Bit, Imm >> 2); } - void tbnz(FEXCore::ARMEmitter::Register rt, uint32_t Bit, ForwardLabel *Label) { - Label->Insts.emplace_back(ForwardLabel::Instructions{ .Location = GetCursorAddress(), .Type = ForwardLabel::Instructions::InstType::TEST_BRANCH }); + + template + requires (std::is_same_v || std::is_same_v) + void tbnz(FEXCore::ARMEmitter::Register rt, uint32_t Bit, LabelType *Label) { + AddLocationToLabel(Label, SingleUseForwardLabel{ .Location = GetCursorAddress(), .Type = SingleUseForwardLabel::InstType::TEST_BRANCH }); constexpr uint32_t Op = 0b0011'0111 << 24; TestAndBranch(Op, rt, Bit, 0); diff --git a/FEXCore/Source/Interface/Core/ArchHelpers/CodeEmitter/Emitter.h b/FEXCore/Source/Interface/Core/ArchHelpers/CodeEmitter/Emitter.h index df4ccfea47..babb1801f4 100644 --- a/FEXCore/Source/Interface/Core/ArchHelpers/CodeEmitter/Emitter.h +++ b/FEXCore/Source/Interface/Core/ArchHelpers/CodeEmitter/Emitter.h @@ -538,27 +538,29 @@ namespace FEXCore::ARMEmitter { uint8_t *Location{}; }; - /* This `ForwardLabel` struct used for retaining a location for PC-Relative instructions. + /* This `SingleUseForwardLabel` struct used for retaining a location for PC-Relative instructions. * This is specifically a label for a target that is logically `above` an instruction that uses it. * Which means that a branch would jump forwards. * - * This can be bound to multiple instructions, so it needs a vector for each bind instruction type. + * The `ForwardLabel` struct can be bound to multiple instructions, so it needs a vector for each bind instruction type. */ - struct ForwardLabel { - struct Instructions { - enum class InstType { - ADR, - ADRP, - B, - BC, - TEST_BRANCH, - RELATIVE_LOAD, - LONG_ADDRESS_GEN, - }; - uint8_t *Location{}; - InstType Type; + struct SingleUseForwardLabel { + enum class InstType { + UNKNOWN, + ADR, + ADRP, + B, + BC, + TEST_BRANCH, + RELATIVE_LOAD, + LONG_ADDRESS_GEN, }; - fextl::vector Insts{}; + uint8_t *Location{}; + InstType Type = InstType::UNKNOWN; + }; + + struct ForwardLabel { + fextl::vector Insts{}; }; /* This `BiDirectionalLabel` struct used for retaining a location for PC-Relative instructions. @@ -570,6 +572,15 @@ namespace FEXCore::ARMEmitter { ForwardLabel Forward; }; + static inline void AddLocationToLabel(SingleUseForwardLabel *Label, SingleUseForwardLabel&& Location) { + LOGMAN_THROW_A_FMT(Label->Type == SingleUseForwardLabel::InstType::UNKNOWN, "Trying to bind a SingleUseForwardLabel to multiple locations. Use ForwardLabel instead."); + *Label = std::move(Location); + } + + static inline void AddLocationToLabel(ForwardLabel *Label, SingleUseForwardLabel&& Location) { + Label->Insts.emplace_back(std::move(Location)); + } + // Some FCMA ASIMD instructions support a rotation argument. enum class Rotation : uint32_t { ROTATE_0 = 0b00, @@ -629,127 +640,131 @@ namespace FEXCore::ARMEmitter { Label->Location = GetCursorAddress(); } - // Bind a forward label to a location. - // This walks all the instructions in the label's vector. - // Then backpatching all instructions that have used the label. - template - void Bind(ForwardLabel *Label) { - if constexpr (WarnAboutEmpty) { - LOGMAN_THROW_A_FMT(Label->Insts.empty() == false, "Binding forward label that didn't have any instructions using it"); - } + void Bind(const SingleUseForwardLabel *Label) { uint8_t *CurrentAddress = GetCursorAddress(); - for (const auto &Inst : Label->Insts) { - // Patch up the instructions - switch (Inst.Type) { - case ForwardLabel::Instructions::InstType::ADR: { - uint32_t *Instruction = reinterpret_cast(Inst.Location); - int64_t Imm = reinterpret_cast(CurrentAddress) - reinterpret_cast(Instruction); - LOGMAN_THROW_A_FMT(IsADRRange(Imm), "Unscaled offset too large"); - uint32_t InstMask = 0b11 << 29 | 0b1111'1111'1111'1111'111 << 5; - uint32_t Offset = static_cast(Imm) & 0x3F'FFFF; - uint32_t Inst = *Instruction & ~InstMask; - Inst |= (Offset & 0b11) << 29; - Inst |= (Offset >> 2) << 5; - *Instruction = Inst; - break; - } - case ForwardLabel::Instructions::InstType::ADRP: { - uint32_t *Instruction = reinterpret_cast(Inst.Location); - int64_t Imm = reinterpret_cast(CurrentAddress) - reinterpret_cast(Instruction); - LOGMAN_THROW_A_FMT(IsADRPRange(Imm) && IsADRPAligned(Imm), "Unscaled offset too large"); - Imm >>= 12; - uint32_t InstMask = 0b11 << 29 | 0b1111'1111'1111'1111'111 << 5; - uint32_t Offset = static_cast(Imm) & 0x3F'FFFF; - uint32_t Inst = *Instruction & ~InstMask; - Inst |= (Offset & 0b11) << 29; - Inst |= (Offset >> 2) << 5; - *Instruction = Inst; - break; - } + // Patch up the instructions + switch (Label->Type) { + case SingleUseForwardLabel::InstType::ADR: { + uint32_t *Instruction = reinterpret_cast(Label->Location); + int64_t Imm = reinterpret_cast(CurrentAddress) - reinterpret_cast(Instruction); + LOGMAN_THROW_A_FMT(IsADRRange(Imm), "Unscaled offset too large"); + uint32_t InstMask = 0b11 << 29 | 0b1111'1111'1111'1111'111 << 5; + uint32_t Offset = static_cast(Imm) & 0x3F'FFFF; + uint32_t Inst = *Instruction & ~InstMask; + Inst |= (Offset & 0b11) << 29; + Inst |= (Offset >> 2) << 5; + *Instruction = Inst; + break; + } + case SingleUseForwardLabel::InstType::ADRP: { + uint32_t *Instruction = reinterpret_cast(Label->Location); + int64_t Imm = reinterpret_cast(CurrentAddress) - reinterpret_cast(Instruction); + LOGMAN_THROW_A_FMT(IsADRPRange(Imm) && IsADRPAligned(Imm), "Unscaled offset too large"); + Imm >>= 12; + uint32_t InstMask = 0b11 << 29 | 0b1111'1111'1111'1111'111 << 5; + uint32_t Offset = static_cast(Imm) & 0x3F'FFFF; + uint32_t Inst = *Instruction & ~InstMask; + Inst |= (Offset & 0b11) << 29; + Inst |= (Offset >> 2) << 5; + *Instruction = Inst; + break; + } - case ForwardLabel::Instructions::InstType::B: { - uint32_t *Instruction = reinterpret_cast(Inst.Location); - int64_t Imm = reinterpret_cast(CurrentAddress) - reinterpret_cast(Instruction); - LOGMAN_THROW_A_FMT(Imm >= -134217728 && Imm <= 134217724 && ((Imm & 0b11) == 0), "Unscaled offset too large"); - Imm >>= 2; - uint32_t InstMask = 0x3FF'FFFF; - uint32_t Offset = static_cast(Imm) & InstMask; - uint32_t Inst = *Instruction & ~InstMask; - Inst |= Offset; - *Instruction = Inst; - - break; - } + case SingleUseForwardLabel::InstType::B: { + uint32_t *Instruction = reinterpret_cast(Label->Location); + int64_t Imm = reinterpret_cast(CurrentAddress) - reinterpret_cast(Instruction); + LOGMAN_THROW_A_FMT(Imm >= -134217728 && Imm <= 134217724 && ((Imm & 0b11) == 0), "Unscaled offset too large"); + Imm >>= 2; + uint32_t InstMask = 0x3FF'FFFF; + uint32_t Offset = static_cast(Imm) & InstMask; + uint32_t Inst = *Instruction & ~InstMask; + Inst |= Offset; + *Instruction = Inst; + + break; + } - case ForwardLabel::Instructions::InstType::TEST_BRANCH: { - uint32_t *Instruction = reinterpret_cast(Inst.Location); - int64_t Imm = reinterpret_cast(CurrentAddress) - reinterpret_cast(Instruction); - LOGMAN_THROW_A_FMT(Imm >= -32768 && Imm <= 32764 && ((Imm & 0b11) == 0), "Unscaled offset too large"); - Imm >>= 2; - uint32_t InstMask = 0x3FFF; - uint32_t Offset = static_cast(Imm) & InstMask; - uint32_t Inst = *Instruction & ~(InstMask << 5); - Inst |= Offset << 5; - *Instruction = Inst; - - break; - } - case ForwardLabel::Instructions::InstType::BC: - case ForwardLabel::Instructions::InstType::RELATIVE_LOAD: { - uint32_t *Instruction = reinterpret_cast(Inst.Location); - int64_t Imm = reinterpret_cast(CurrentAddress) - reinterpret_cast(Instruction); - LOGMAN_THROW_A_FMT(Imm >= -1048576 && Imm <= 1048575 && ((Imm & 0b11) == 0), "Unscaled offset too large"); - Imm >>= 2; - uint32_t InstMask = 0x7'FFFF; - uint32_t Offset = static_cast(Imm) & InstMask; - uint32_t Inst = *Instruction & ~(InstMask << 5); - Inst |= Offset << 5; - *Instruction = Inst; - break; + case SingleUseForwardLabel::InstType::TEST_BRANCH: { + uint32_t *Instruction = reinterpret_cast(Label->Location); + int64_t Imm = reinterpret_cast(CurrentAddress) - reinterpret_cast(Instruction); + LOGMAN_THROW_A_FMT(Imm >= -32768 && Imm <= 32764 && ((Imm & 0b11) == 0), "Unscaled offset too large"); + Imm >>= 2; + uint32_t InstMask = 0x3FFF; + uint32_t Offset = static_cast(Imm) & InstMask; + uint32_t Inst = *Instruction & ~(InstMask << 5); + Inst |= Offset << 5; + *Instruction = Inst; + + break; + } + case SingleUseForwardLabel::InstType::BC: + case SingleUseForwardLabel::InstType::RELATIVE_LOAD: { + uint32_t *Instruction = reinterpret_cast(Label->Location); + int64_t Imm = reinterpret_cast(CurrentAddress) - reinterpret_cast(Instruction); + LOGMAN_THROW_A_FMT(Imm >= -1048576 && Imm <= 1048575 && ((Imm & 0b11) == 0), "Unscaled offset too large"); + Imm >>= 2; + uint32_t InstMask = 0x7'FFFF; + uint32_t Offset = static_cast(Imm) & InstMask; + uint32_t Inst = *Instruction & ~(InstMask << 5); + Inst |= Offset << 5; + *Instruction = Inst; + break; + } + case SingleUseForwardLabel::InstType::LONG_ADDRESS_GEN: { + uint32_t *Instructions = reinterpret_cast(Label->Location); + int64_t ImmInstOne = reinterpret_cast(CurrentAddress) - reinterpret_cast(&Instructions[0]); + int64_t ImmInstTwo = reinterpret_cast(CurrentAddress) - reinterpret_cast(&Instructions[1]); + auto OriginalOffset = GetCursorOffset(); + + auto InstOffset = GetCursorOffsetFromAddress(Instructions); + SetCursorOffset(InstOffset); + + // We encoded the destination register in to the first instruction space. + // Read it back. + ARMEmitter::Register DestReg(Instructions[0]); + + if (IsADRRange(ImmInstTwo)) { + // If within ADR range from the second instruction, then we can emit NOP+ADR + nop(); + adr(DestReg, static_cast(ImmInstTwo) & 0x7FFF); } - case ForwardLabel::Instructions::InstType::LONG_ADDRESS_GEN: { - uint32_t *Instructions = reinterpret_cast(Inst.Location); - int64_t ImmInstOne = reinterpret_cast(CurrentAddress) - reinterpret_cast(&Instructions[0]); - int64_t ImmInstTwo = reinterpret_cast(CurrentAddress) - reinterpret_cast(&Instructions[1]); - auto OriginalOffset = GetCursorOffset(); - - auto InstOffset = GetCursorOffsetFromAddress(Instructions); - SetCursorOffset(InstOffset); + else if (IsADRPRange(ImmInstOne)) { - // We encoded the destination register in to the first instruction space. - // Read it back. - ARMEmitter::Register DestReg(Instructions[0]); - - if (IsADRRange(ImmInstTwo)) { - // If within ADR range from the second instruction, then we can emit NOP+ADR + // If within ADRP range from the first instruction, then we are /definitely/ in range for the second instruction. + // First check if we are in non-offset range for second instruction. + if (IsADRPAligned(reinterpret_cast(CurrentAddress))) { + // We can emit nop + adrp nop(); - adr(DestReg, static_cast(ImmInstTwo) & 0x7FFF); - } - else if (IsADRPRange(ImmInstOne)) { - - // If within ADRP range from the first instruction, then we are /definitely/ in range for the second instruction. - // First check if we are in non-offset range for second instruction. - if (IsADRPAligned(reinterpret_cast(CurrentAddress))) { - // We can emit nop + adrp - nop(); - adrp(DestReg, static_cast(ImmInstTwo >> 12) & 0x7FFF); - } - else { - // Not aligned, need adrp + add - adrp(DestReg, static_cast(ImmInstOne >> 12) & 0x7FFF); - add(ARMEmitter::Size::i64Bit, DestReg, DestReg, ImmInstOne & 0xFFF); - } + adrp(DestReg, static_cast(ImmInstTwo >> 12) & 0x7FFF); } else { - LOGMAN_MSG_A_FMT("Unscaled offset is too large"); - FEX_UNREACHABLE; + // Not aligned, need adrp + add + adrp(DestReg, static_cast(ImmInstOne >> 12) & 0x7FFF); + add(ARMEmitter::Size::i64Bit, DestReg, DestReg, ImmInstOne & 0xFFF); } - - SetCursorOffset(OriginalOffset); - break; } - default: LOGMAN_MSG_A_FMT("Unexpected inst type in label fixup"); + else { + LOGMAN_MSG_A_FMT("Unscaled offset is too large"); + FEX_UNREACHABLE; + } + + SetCursorOffset(OriginalOffset); + break; } + default: LOGMAN_MSG_A_FMT("Unexpected inst type in label fixup"); + } + } + + // Bind a forward label to a location. + // This walks all the instructions in the label's vector. + // Then backpatching all instructions that have used the label. + template + void Bind(ForwardLabel *Label) { + if constexpr (WarnAboutEmpty) { + LOGMAN_THROW_A_FMT(Label->Insts.empty() == false, "Binding forward label that didn't have any instructions using it"); + } + for (auto &Inst : Label->Insts) { + Bind(&Inst); } } diff --git a/FEXCore/Source/Interface/Core/ArchHelpers/CodeEmitter/LoadstoreOps.inl b/FEXCore/Source/Interface/Core/ArchHelpers/CodeEmitter/LoadstoreOps.inl index c6e3134289..67a770e58c 100644 --- a/FEXCore/Source/Interface/Core/ArchHelpers/CodeEmitter/LoadstoreOps.inl +++ b/FEXCore/Source/Interface/Core/ArchHelpers/CodeEmitter/LoadstoreOps.inl @@ -2121,38 +2121,58 @@ public: LoadStoreLiteral(Op, prfop, static_cast(Imm >> 2) & 0x7'FFFF); } - void ldr(FEXCore::ARMEmitter::WRegister rt, ForwardLabel *Label) { - Label->Insts.emplace_back(ForwardLabel::Instructions{ .Location = GetCursorAddress(), .Type = ForwardLabel::Instructions::InstType::RELATIVE_LOAD }); + template + requires (std::is_same_v || std::is_same_v) + void ldr(FEXCore::ARMEmitter::WRegister rt, LabelType *Label) { + AddLocationToLabel(Label, SingleUseForwardLabel{ .Location = GetCursorAddress(), .Type = SingleUseForwardLabel::InstType::RELATIVE_LOAD }); constexpr uint32_t Op = 0b0001'1000 << 24; LoadStoreLiteral(Op, rt, 0); } - void ldr(FEXCore::ARMEmitter::SRegister rt, ForwardLabel *Label) { - Label->Insts.emplace_back(ForwardLabel::Instructions{ .Location = GetCursorAddress(), .Type = ForwardLabel::Instructions::InstType::RELATIVE_LOAD }); + + template + requires (std::is_same_v || std::is_same_v) + void ldr(FEXCore::ARMEmitter::SRegister rt, LabelType *Label) { + AddLocationToLabel(Label, SingleUseForwardLabel{ .Location = GetCursorAddress(), .Type = SingleUseForwardLabel::InstType::RELATIVE_LOAD }); constexpr uint32_t Op = 0b0001'1100 << 24; LoadStoreLiteral(Op, rt, 0); } - void ldr(FEXCore::ARMEmitter::XRegister rt, ForwardLabel *Label) { - Label->Insts.emplace_back(ForwardLabel::Instructions{ .Location = GetCursorAddress(), .Type = ForwardLabel::Instructions::InstType::RELATIVE_LOAD }); + + template + requires (std::is_same_v || std::is_same_v) + void ldr(FEXCore::ARMEmitter::XRegister rt, LabelType *Label) { + AddLocationToLabel(Label, SingleUseForwardLabel{ .Location = GetCursorAddress(), .Type = SingleUseForwardLabel::InstType::RELATIVE_LOAD }); constexpr uint32_t Op = 0b0101'1000 << 24; LoadStoreLiteral(Op, rt, 0); } - void ldr(FEXCore::ARMEmitter::DRegister rt, ForwardLabel *Label) { - Label->Insts.emplace_back(ForwardLabel::Instructions{ .Location = GetCursorAddress(), .Type = ForwardLabel::Instructions::InstType::RELATIVE_LOAD }); + + template + requires (std::is_same_v || std::is_same_v) + void ldr(FEXCore::ARMEmitter::DRegister rt, LabelType *Label) { + AddLocationToLabel(Label, SingleUseForwardLabel{ .Location = GetCursorAddress(), .Type = SingleUseForwardLabel::InstType::RELATIVE_LOAD }); constexpr uint32_t Op = 0b0101'1100 << 24; LoadStoreLiteral(Op, rt, 0); } - void ldrsw(FEXCore::ARMEmitter::XRegister rt, ForwardLabel *Label) { - Label->Insts.emplace_back(ForwardLabel::Instructions{ .Location = GetCursorAddress(), .Type = ForwardLabel::Instructions::InstType::RELATIVE_LOAD }); + + template + requires (std::is_same_v || std::is_same_v) + void ldrsw(FEXCore::ARMEmitter::XRegister rt, LabelType *Label) { + AddLocationToLabel(Label, SingleUseForwardLabel{ .Location = GetCursorAddress(), .Type = SingleUseForwardLabel::InstType::RELATIVE_LOAD }); constexpr uint32_t Op = 0b1001'1000 << 24; LoadStoreLiteral(Op, rt, 0); } - void ldr(FEXCore::ARMEmitter::QRegister rt, ForwardLabel *Label) { - Label->Insts.emplace_back(ForwardLabel::Instructions{ .Location = GetCursorAddress(), .Type = ForwardLabel::Instructions::InstType::RELATIVE_LOAD }); + + template + requires (std::is_same_v || std::is_same_v) + void ldr(FEXCore::ARMEmitter::QRegister rt, LabelType *Label) { + AddLocationToLabel(Label, SingleUseForwardLabel{ .Location = GetCursorAddress(), .Type = SingleUseForwardLabel::InstType::RELATIVE_LOAD }); constexpr uint32_t Op = 0b1001'1100 << 24; LoadStoreLiteral(Op, rt, 0); } - void prfm(FEXCore::ARMEmitter::Prefetch prfop, ForwardLabel *Label) { - Label->Insts.emplace_back(ForwardLabel::Instructions{ .Location = GetCursorAddress(), .Type = ForwardLabel::Instructions::InstType::RELATIVE_LOAD }); + + template + requires (std::is_same_v || std::is_same_v) + void prfm(FEXCore::ARMEmitter::Prefetch prfop, LabelType *Label) { + AddLocationToLabel(Label, SingleUseForwardLabel{ .Location = GetCursorAddress(), .Type = SingleUseForwardLabel::InstType::RELATIVE_LOAD }); constexpr uint32_t Op = 0b1101'1000 << 24; LoadStoreLiteral(Op, prfop, 0); } diff --git a/FEXCore/Source/Interface/Core/Core.cpp b/FEXCore/Source/Interface/Core/Core.cpp index 02e40f8d7a..5ba18b8cd2 100644 --- a/FEXCore/Source/Interface/Core/Core.cpp +++ b/FEXCore/Source/Interface/Core/Core.cpp @@ -1237,7 +1237,7 @@ namespace FEXCore::Context { } } - void ContextImpl::ThreadAddBlockLink(FEXCore::Core::InternalThreadState *Thread, uint64_t GuestDestination, uintptr_t HostLink, const std::function &delinker) { + void ContextImpl::ThreadAddBlockLink(FEXCore::Core::InternalThreadState *Thread, uint64_t GuestDestination, FEXCore::Context::ExitFunctionLinkData *HostLink, const FEXCore::Context::BlockDelinkerFunc &delinker) { auto lk = GuardSignalDeferringSection(static_cast(Thread->CTX)->CodeInvalidationMutex, Thread); Thread->LookupCache->AddBlockLink(GuestDestination, HostLink, delinker); @@ -1249,7 +1249,7 @@ namespace FEXCore::Context { std::lock_guard lk(Thread->LookupCache->WriteLock); Thread->DebugStore.erase(GuestRIP); - Thread->LookupCache->Erase(GuestRIP); + Thread->LookupCache->Erase(Thread->CurrentFrame, GuestRIP); } CustomIRResult ContextImpl::AddCustomIREntrypoint(uintptr_t Entrypoint, CustomIREntrypointHandler Handler, void *Creator, void *Data) { diff --git a/FEXCore/Source/Interface/Core/Dispatcher/Dispatcher.cpp b/FEXCore/Source/Interface/Core/Dispatcher/Dispatcher.cpp index 0b7ed0468c..c720a3d6d3 100644 --- a/FEXCore/Source/Interface/Core/Dispatcher/Dispatcher.cpp +++ b/FEXCore/Source/Interface/Core/Dispatcher/Dispatcher.cpp @@ -73,8 +73,8 @@ void Dispatcher::EmitDispatcher() { // } ARMEmitter::ForwardLabel l_CTX; - ARMEmitter::ForwardLabel l_Sleep; - ARMEmitter::ForwardLabel l_CompileBlock; + ARMEmitter::SingleUseForwardLabel l_Sleep; + ARMEmitter::SingleUseForwardLabel l_CompileBlock; // Push all the register we need to save PushCalleeSavedRegisters(); diff --git a/FEXCore/Source/Interface/Core/JIT/Arm64/ALUOps.cpp b/FEXCore/Source/Interface/Core/JIT/Arm64/ALUOps.cpp index 25cd588d0e..30dbd410da 100644 --- a/FEXCore/Source/Interface/Core/JIT/Arm64/ALUOps.cpp +++ b/FEXCore/Source/Interface/Core/JIT/Arm64/ALUOps.cpp @@ -731,9 +731,9 @@ DEF_OP(PDep) { 1U << MaskReg.Idx() | 1U << DestReg.Idx(); - ARMEmitter::ForwardLabel EarlyExit; + ARMEmitter::SingleUseForwardLabel EarlyExit; ARMEmitter::BackwardLabel NextBit; - ARMEmitter::ForwardLabel Done; + ARMEmitter::SingleUseForwardLabel Done; cbz(EmitSize, Mask, &EarlyExit); mov(EmitSize, IndexReg, ZeroReg); @@ -792,9 +792,9 @@ DEF_OP(PExt) { const auto BitReg = TMP2; const auto ValueReg = TMP3; - ARMEmitter::ForwardLabel EarlyExit; + ARMEmitter::SingleUseForwardLabel EarlyExit; ARMEmitter::BackwardLabel NextBit; - ARMEmitter::ForwardLabel Done; + ARMEmitter::SingleUseForwardLabel Done; cbz(EmitSize, Mask, &EarlyExit); mov(EmitSize, MaskReg, Mask); @@ -848,8 +848,8 @@ DEF_OP(LDiv) { break; } case 8: { - ARMEmitter::ForwardLabel Only64Bit{}; - ARMEmitter::ForwardLabel LongDIVRet{}; + ARMEmitter::SingleUseForwardLabel Only64Bit{}; + ARMEmitter::SingleUseForwardLabel LongDIVRet{}; // Check if the upper bits match the top bit of the lower 64-bits // Sign extend the top bit of lower bits @@ -920,8 +920,8 @@ DEF_OP(LUDiv) { break; } case 8: { - ARMEmitter::ForwardLabel Only64Bit{}; - ARMEmitter::ForwardLabel LongDIVRet{}; + ARMEmitter::SingleUseForwardLabel Only64Bit{}; + ARMEmitter::SingleUseForwardLabel LongDIVRet{}; // Check the upper bits for zero // If the upper bits are zero then we can do a 64-bit divide @@ -992,8 +992,8 @@ DEF_OP(LRem) { break; } case 8: { - ARMEmitter::ForwardLabel Only64Bit{}; - ARMEmitter::ForwardLabel LongDIVRet{}; + ARMEmitter::SingleUseForwardLabel Only64Bit{}; + ARMEmitter::SingleUseForwardLabel LongDIVRet{}; // Check if the upper bits match the top bit of the lower 64-bits // Sign extend the top bit of lower bits @@ -1066,8 +1066,8 @@ DEF_OP(LURem) { break; } case 8: { - ARMEmitter::ForwardLabel Only64Bit{}; - ARMEmitter::ForwardLabel LongDIVRet{}; + ARMEmitter::SingleUseForwardLabel Only64Bit{}; + ARMEmitter::SingleUseForwardLabel LongDIVRet{}; // Check the upper bits for zero // If the upper bits are zero then we can do a 64-bit divide diff --git a/FEXCore/Source/Interface/Core/JIT/Arm64/AtomicOps.cpp b/FEXCore/Source/Interface/Core/JIT/Arm64/AtomicOps.cpp index 0c0dc78dd4..39b67f8f56 100644 --- a/FEXCore/Source/Interface/Core/JIT/Arm64/AtomicOps.cpp +++ b/FEXCore/Source/Interface/Core/JIT/Arm64/AtomicOps.cpp @@ -32,8 +32,8 @@ DEF_OP(CASPair) { } else { ARMEmitter::BackwardLabel LoopTop; - ARMEmitter::ForwardLabel LoopNotExpected; - ARMEmitter::ForwardLabel LoopExpected; + ARMEmitter::SingleUseForwardLabel LoopNotExpected; + ARMEmitter::SingleUseForwardLabel LoopExpected; Bind(&LoopTop); ldaxp(EmitSize, TMP2, TMP3, MemSrc); @@ -82,8 +82,8 @@ DEF_OP(CAS) { } else { ARMEmitter::BackwardLabel LoopTop; - ARMEmitter::ForwardLabel LoopNotExpected; - ARMEmitter::ForwardLabel LoopExpected; + ARMEmitter::SingleUseForwardLabel LoopNotExpected; + ARMEmitter::SingleUseForwardLabel LoopExpected; Bind(&LoopTop); ldaxr(SubEmitSize, TMP2, MemSrc); if (OpSize == 1) { diff --git a/FEXCore/Source/Interface/Core/JIT/Arm64/BranchOps.cpp b/FEXCore/Source/Interface/Core/JIT/Arm64/BranchOps.cpp index 6b8e93520c..9470d6bc7e 100644 --- a/FEXCore/Source/Interface/Core/JIT/Arm64/BranchOps.cpp +++ b/FEXCore/Source/Interface/Core/JIT/Arm64/BranchOps.cpp @@ -53,20 +53,17 @@ DEF_OP(ExitFunction) { uint64_t NewRIP; if (IsInlineConstant(Op->NewRIP, &NewRIP) || IsInlineEntrypointOffset(Op->NewRIP, &NewRIP)) { - ARMEmitter::ForwardLabel l_BranchHost; - ARMEmitter::ForwardLabel l_BranchGuest; + ARMEmitter::SingleUseForwardLabel l_BranchHost; ldr(ARMEmitter::XReg::x0, &l_BranchHost); blr(ARMEmitter::Reg::r0); Bind(&l_BranchHost); dc64(ThreadState->CurrentFrame->Pointers.Common.ExitFunctionLinker); - Bind(&l_BranchGuest); dc64(NewRIP); - } else { - ARMEmitter::ForwardLabel FullLookup; + ARMEmitter::SingleUseForwardLabel FullLookup; auto RipReg = GetReg(Op->NewRIP.ID()); // L1 Cache diff --git a/FEXCore/Source/Interface/Core/JIT/Arm64/JIT.cpp b/FEXCore/Source/Interface/Core/JIT/Arm64/JIT.cpp index 390e9ab571..414c5a254c 100644 --- a/FEXCore/Source/Interface/Core/JIT/Arm64/JIT.cpp +++ b/FEXCore/Source/Interface/Core/JIT/Arm64/JIT.cpp @@ -481,7 +481,24 @@ void Arm64JITCore::Op_Unhandled(IR::IROp_Header const *IROp, IR::NodeID Node) { } -static uint64_t Arm64JITCore_ExitFunctionLink(FEXCore::Core::CpuStateFrame *Frame, FEXCore::Context::ContextImpl::ExitFunctionLinkData *Record) { +static void DirectBlockDelinker(FEXCore::Core::CpuStateFrame *Frame, FEXCore::Context::ExitFunctionLinkData *Record) { + auto LinkerAddress = Frame->Pointers.Common.ExitFunctionLinker; + uintptr_t branch = (uintptr_t)(Record) - 8; + FEXCore::ARMEmitter::Emitter emit((uint8_t*)(branch), 8); + FEXCore::ARMEmitter::SingleUseForwardLabel l_BranchHost; + emit.ldr(FEXCore::ARMEmitter::XReg::x0, &l_BranchHost); + emit.blr(FEXCore::ARMEmitter::Reg::r0); + emit.Bind(&l_BranchHost); + emit.dc64(LinkerAddress); + FEXCore::ARMEmitter::Emitter::ClearICache((void*)branch, 8); +} + +static void IndirectBlockDelinker(FEXCore::Core::CpuStateFrame *Frame, FEXCore::Context::ExitFunctionLinkData *Record) { + auto LinkerAddress = Frame->Pointers.Common.ExitFunctionLinker; + Record->HostBranch = LinkerAddress; +} + +static uint64_t Arm64JITCore_ExitFunctionLink(FEXCore::Core::CpuStateFrame *Frame, FEXCore::Context::ExitFunctionLinkData *Record) { auto Thread = Frame->Thread; auto GuestRip = Record->GuestRIP; @@ -493,34 +510,23 @@ static uint64_t Arm64JITCore_ExitFunctionLink(FEXCore::Core::CpuStateFrame *Fram } uintptr_t branch = (uintptr_t)(Record) - 8; - auto LinkerAddress = Frame->Pointers.Common.ExitFunctionLinker; auto offset = HostCode/4 - branch/4; if (vixl::IsInt26(offset)) { // optimal case - can branch directly // patch the code - FEXCore::ARMEmitter::Emitter emit((uint8_t*)(branch), 24); + FEXCore::ARMEmitter::Emitter emit((uint8_t*)(branch), 4); emit.b(offset); - FEXCore::ARMEmitter::Emitter::ClearICache((void*)branch, 24); + FEXCore::ARMEmitter::Emitter::ClearICache((void*)branch, 4); // Add de-linking handler - Thread->LookupCache->AddBlockLink(GuestRip, (uintptr_t)Record, [branch, LinkerAddress]{ - FEXCore::ARMEmitter::Emitter emit((uint8_t*)(branch), 24); - FEXCore::ARMEmitter::ForwardLabel l_BranchHost; - emit.ldr(FEXCore::ARMEmitter::XReg::x0, &l_BranchHost); - emit.blr(FEXCore::ARMEmitter::Reg::r0); - emit.Bind(&l_BranchHost); - emit.dc64(LinkerAddress); - FEXCore::ARMEmitter::Emitter::ClearICache((void*)branch, 24); - }); + Thread->LookupCache->AddBlockLink(GuestRip, Record, DirectBlockDelinker); } else { // fallback case - do a soft-er link by patching the pointer Record->HostBranch = HostCode; // Add de-linking handler - Thread->LookupCache->AddBlockLink(GuestRip, (uintptr_t)Record, [Record, LinkerAddress]{ - Record->HostBranch = LinkerAddress; - }); + Thread->LookupCache->AddBlockLink(GuestRip, Record, IndirectBlockDelinker); } return HostCode; diff --git a/FEXCore/Source/Interface/Core/JIT/Arm64/MemoryOps.cpp b/FEXCore/Source/Interface/Core/JIT/Arm64/MemoryOps.cpp index b85e14c82e..40472ac20a 100644 --- a/FEXCore/Source/Interface/Core/JIT/Arm64/MemoryOps.cpp +++ b/FEXCore/Source/Interface/Core/JIT/Arm64/MemoryOps.cpp @@ -175,7 +175,7 @@ DEF_OP(LoadRegister) { if (HostSupportsSVE256) { const auto regOffs = Op->Offset & 31; - ARMEmitter::ForwardLabel DataLocation; + ARMEmitter::SingleUseForwardLabel DataLocation; const auto LoadPredicate = [this, &DataLocation] { const auto Predicate = ARMEmitter::PReg::p0; adr(TMP1, &DataLocation); @@ -184,7 +184,7 @@ DEF_OP(LoadRegister) { }; const auto EmitData = [this, &DataLocation](uint32_t Value) { - ARMEmitter::ForwardLabel PastConstant; + ARMEmitter::SingleUseForwardLabel PastConstant; b(&PastConstant); Bind(&DataLocation); dc32(Value); @@ -364,7 +364,7 @@ DEF_OP(StoreRegister) { const auto regOffs = Op->Offset & 31; // Compartmentalized setting up of the predicate for the cases that need it. - ARMEmitter::ForwardLabel DataLocation; + ARMEmitter::SingleUseForwardLabel DataLocation; const auto LoadPredicate = [this, &DataLocation] { const auto Predicate = ARMEmitter::PReg::p0; adr(TMP1, &DataLocation); @@ -377,7 +377,7 @@ DEF_OP(StoreRegister) { // It's helpful to treat LoadPredicate and EmitData as a prologue and epilogue // respectfully. const auto EmitData = [this, &DataLocation](uint32_t Data) { - ARMEmitter::ForwardLabel PastConstant; + ARMEmitter::SingleUseForwardLabel PastConstant; b(&PastConstant); Bind(&DataLocation); dc32(Data); @@ -1715,8 +1715,8 @@ DEF_OP(MemSet) { // // Counter is decremented regardless. - ARMEmitter::ForwardLabel BackwardImpl{}; - ARMEmitter::ForwardLabel Done{}; + ARMEmitter::SingleUseForwardLabel BackwardImpl{}; + ARMEmitter::SingleUseForwardLabel Done{}; mov(TMP1, Length.X()); if (Op->Prefix.IsInvalid()) { @@ -1789,7 +1789,7 @@ DEF_OP(MemSet) { const int32_t SizeDirection = Size * Direction; ARMEmitter::BackwardLabel AgainInternal{}; - ARMEmitter::ForwardLabel DoneInternal{}; + ARMEmitter::SingleUseForwardLabel DoneInternal{}; // Early exit if zero count. cbz(ARMEmitter::Size::i64Bit, TMP1, &DoneInternal); @@ -1895,8 +1895,8 @@ DEF_OP(MemCpy) { // // Counter is decremented regardless. - ARMEmitter::ForwardLabel BackwardImpl{}; - ARMEmitter::ForwardLabel Done{}; + ARMEmitter::SingleUseForwardLabel BackwardImpl{}; + ARMEmitter::SingleUseForwardLabel Done{}; mov(TMP1, Length.X()); if (Op->PrefixDest.IsInvalid()) { @@ -2050,7 +2050,7 @@ DEF_OP(MemCpy) { const int32_t SizeDirection = Size * Direction; ARMEmitter::BackwardLabel AgainInternal{}; - ARMEmitter::ForwardLabel DoneInternal{}; + ARMEmitter::SingleUseForwardLabel DoneInternal{}; // Early exit if zero count. cbz(ARMEmitter::Size::i64Bit, TMP1, &DoneInternal); diff --git a/FEXCore/Source/Interface/Core/LookupCache.h b/FEXCore/Source/Interface/Core/LookupCache.h index 0f0e51bac5..d02d02a380 100644 --- a/FEXCore/Source/Interface/Core/LookupCache.h +++ b/FEXCore/Source/Interface/Core/LookupCache.h @@ -100,15 +100,15 @@ class LookupCache { L1Entry.HostCode = (uintptr_t)HostCode; } - void Erase(uint64_t Address) { + void Erase(FEXCore::Core::CpuStateFrame *Frame, uint64_t Address) { std::lock_guard lk(WriteLock); // Sever any links to this block - auto lower = BlockLinks->lower_bound({Address, 0}); - auto upper = BlockLinks->upper_bound({Address, UINTPTR_MAX}); + auto lower = BlockLinks->lower_bound({Address, nullptr}); + auto upper = BlockLinks->upper_bound({Address, reinterpret_cast(UINTPTR_MAX)}); for (auto it = lower; it != upper; it = BlockLinks->erase(it)) { - it->second(); + it->second(Frame, it->first.HostLink); } // Remove from BlockList @@ -141,8 +141,7 @@ class LookupCache { BlockPointers[PageOffset].HostCode = 0; } - - void AddBlockLink(uint64_t GuestDestination, uintptr_t HostLink, const std::function &delinker) { + void AddBlockLink(uint64_t GuestDestination, FEXCore::Context::ExitFunctionLinkData * HostLink, const FEXCore::Context::BlockDelinkerFunc &delinker) { std::lock_guard lk(WriteLock); BlockLinks->insert({{GuestDestination, HostLink}, delinker}); @@ -224,7 +223,7 @@ class LookupCache { struct BlockLinkTag { uint64_t GuestDestination; - uintptr_t HostLink; + FEXCore::Context::ExitFunctionLinkData *HostLink; bool operator <(const BlockLinkTag& other) const { if (GuestDestination < other.GuestDestination) @@ -243,7 +242,7 @@ class LookupCache { // // This makes `BlockLinks` look like a raw pointer that could memory leak, but since it is backed by the MBR, it won't. std::pmr::monotonic_buffer_resource BlockLinks_mbr; - using BlockLinksMapType = std::pmr::map>; + using BlockLinksMapType = std::pmr::map; fextl::unique_ptr> BlockLinks_pma; BlockLinksMapType *BlockLinks;