From cc17bf6df0d37e79f0affaba7ae53bbda9990d98 Mon Sep 17 00:00:00 2001 From: Ryan Houdek Date: Mon, 8 Jan 2024 22:03:46 -0800 Subject: [PATCH] Arm64: Switches uses of forward label over to SingleUse if possible Primary goal for this is to ensure that the delinker doesn't need to allocate any memory. This delinker can end up getting hit heavily with JIT code so we don't want it to be allocating memory. --- .../Interface/Core/Dispatcher/Dispatcher.cpp | 4 ++-- .../Interface/Core/JIT/Arm64/ALUOps.cpp | 24 +++++++++---------- .../Interface/Core/JIT/Arm64/AtomicOps.cpp | 8 +++---- .../Interface/Core/JIT/Arm64/BranchOps.cpp | 7 ++---- .../Source/Interface/Core/JIT/Arm64/JIT.cpp | 2 +- .../Interface/Core/JIT/Arm64/MemoryOps.cpp | 20 ++++++++-------- 6 files changed, 31 insertions(+), 34 deletions(-) diff --git a/FEXCore/Source/Interface/Core/Dispatcher/Dispatcher.cpp b/FEXCore/Source/Interface/Core/Dispatcher/Dispatcher.cpp index 0b7ed0468c..c720a3d6d3 100644 --- a/FEXCore/Source/Interface/Core/Dispatcher/Dispatcher.cpp +++ b/FEXCore/Source/Interface/Core/Dispatcher/Dispatcher.cpp @@ -73,8 +73,8 @@ void Dispatcher::EmitDispatcher() { // } ARMEmitter::ForwardLabel l_CTX; - ARMEmitter::ForwardLabel l_Sleep; - ARMEmitter::ForwardLabel l_CompileBlock; + ARMEmitter::SingleUseForwardLabel l_Sleep; + ARMEmitter::SingleUseForwardLabel l_CompileBlock; // Push all the register we need to save PushCalleeSavedRegisters(); diff --git a/FEXCore/Source/Interface/Core/JIT/Arm64/ALUOps.cpp b/FEXCore/Source/Interface/Core/JIT/Arm64/ALUOps.cpp index 25cd588d0e..30dbd410da 100644 --- a/FEXCore/Source/Interface/Core/JIT/Arm64/ALUOps.cpp +++ b/FEXCore/Source/Interface/Core/JIT/Arm64/ALUOps.cpp @@ -731,9 +731,9 @@ DEF_OP(PDep) { 1U << MaskReg.Idx() | 1U << DestReg.Idx(); - ARMEmitter::ForwardLabel EarlyExit; + ARMEmitter::SingleUseForwardLabel EarlyExit; ARMEmitter::BackwardLabel NextBit; - ARMEmitter::ForwardLabel Done; + ARMEmitter::SingleUseForwardLabel Done; cbz(EmitSize, Mask, &EarlyExit); mov(EmitSize, IndexReg, ZeroReg); @@ -792,9 +792,9 @@ DEF_OP(PExt) { const auto BitReg = TMP2; const auto ValueReg = TMP3; - ARMEmitter::ForwardLabel EarlyExit; + ARMEmitter::SingleUseForwardLabel EarlyExit; ARMEmitter::BackwardLabel NextBit; - ARMEmitter::ForwardLabel Done; + ARMEmitter::SingleUseForwardLabel Done; cbz(EmitSize, Mask, &EarlyExit); mov(EmitSize, MaskReg, Mask); @@ -848,8 +848,8 @@ DEF_OP(LDiv) { break; } case 8: { - ARMEmitter::ForwardLabel Only64Bit{}; - ARMEmitter::ForwardLabel LongDIVRet{}; + ARMEmitter::SingleUseForwardLabel Only64Bit{}; + ARMEmitter::SingleUseForwardLabel LongDIVRet{}; // Check if the upper bits match the top bit of the lower 64-bits // Sign extend the top bit of lower bits @@ -920,8 +920,8 @@ DEF_OP(LUDiv) { break; } case 8: { - ARMEmitter::ForwardLabel Only64Bit{}; - ARMEmitter::ForwardLabel LongDIVRet{}; + ARMEmitter::SingleUseForwardLabel Only64Bit{}; + ARMEmitter::SingleUseForwardLabel LongDIVRet{}; // Check the upper bits for zero // If the upper bits are zero then we can do a 64-bit divide @@ -992,8 +992,8 @@ DEF_OP(LRem) { break; } case 8: { - ARMEmitter::ForwardLabel Only64Bit{}; - ARMEmitter::ForwardLabel LongDIVRet{}; + ARMEmitter::SingleUseForwardLabel Only64Bit{}; + ARMEmitter::SingleUseForwardLabel LongDIVRet{}; // Check if the upper bits match the top bit of the lower 64-bits // Sign extend the top bit of lower bits @@ -1066,8 +1066,8 @@ DEF_OP(LURem) { break; } case 8: { - ARMEmitter::ForwardLabel Only64Bit{}; - ARMEmitter::ForwardLabel LongDIVRet{}; + ARMEmitter::SingleUseForwardLabel Only64Bit{}; + ARMEmitter::SingleUseForwardLabel LongDIVRet{}; // Check the upper bits for zero // If the upper bits are zero then we can do a 64-bit divide diff --git a/FEXCore/Source/Interface/Core/JIT/Arm64/AtomicOps.cpp b/FEXCore/Source/Interface/Core/JIT/Arm64/AtomicOps.cpp index 0c0dc78dd4..39b67f8f56 100644 --- a/FEXCore/Source/Interface/Core/JIT/Arm64/AtomicOps.cpp +++ b/FEXCore/Source/Interface/Core/JIT/Arm64/AtomicOps.cpp @@ -32,8 +32,8 @@ DEF_OP(CASPair) { } else { ARMEmitter::BackwardLabel LoopTop; - ARMEmitter::ForwardLabel LoopNotExpected; - ARMEmitter::ForwardLabel LoopExpected; + ARMEmitter::SingleUseForwardLabel LoopNotExpected; + ARMEmitter::SingleUseForwardLabel LoopExpected; Bind(&LoopTop); ldaxp(EmitSize, TMP2, TMP3, MemSrc); @@ -82,8 +82,8 @@ DEF_OP(CAS) { } else { ARMEmitter::BackwardLabel LoopTop; - ARMEmitter::ForwardLabel LoopNotExpected; - ARMEmitter::ForwardLabel LoopExpected; + ARMEmitter::SingleUseForwardLabel LoopNotExpected; + ARMEmitter::SingleUseForwardLabel LoopExpected; Bind(&LoopTop); ldaxr(SubEmitSize, TMP2, MemSrc); if (OpSize == 1) { diff --git a/FEXCore/Source/Interface/Core/JIT/Arm64/BranchOps.cpp b/FEXCore/Source/Interface/Core/JIT/Arm64/BranchOps.cpp index 6b8e93520c..9470d6bc7e 100644 --- a/FEXCore/Source/Interface/Core/JIT/Arm64/BranchOps.cpp +++ b/FEXCore/Source/Interface/Core/JIT/Arm64/BranchOps.cpp @@ -53,20 +53,17 @@ DEF_OP(ExitFunction) { uint64_t NewRIP; if (IsInlineConstant(Op->NewRIP, &NewRIP) || IsInlineEntrypointOffset(Op->NewRIP, &NewRIP)) { - ARMEmitter::ForwardLabel l_BranchHost; - ARMEmitter::ForwardLabel l_BranchGuest; + ARMEmitter::SingleUseForwardLabel l_BranchHost; ldr(ARMEmitter::XReg::x0, &l_BranchHost); blr(ARMEmitter::Reg::r0); Bind(&l_BranchHost); dc64(ThreadState->CurrentFrame->Pointers.Common.ExitFunctionLinker); - Bind(&l_BranchGuest); dc64(NewRIP); - } else { - ARMEmitter::ForwardLabel FullLookup; + ARMEmitter::SingleUseForwardLabel FullLookup; auto RipReg = GetReg(Op->NewRIP.ID()); // L1 Cache diff --git a/FEXCore/Source/Interface/Core/JIT/Arm64/JIT.cpp b/FEXCore/Source/Interface/Core/JIT/Arm64/JIT.cpp index 2fd3af18d1..414c5a254c 100644 --- a/FEXCore/Source/Interface/Core/JIT/Arm64/JIT.cpp +++ b/FEXCore/Source/Interface/Core/JIT/Arm64/JIT.cpp @@ -485,7 +485,7 @@ static void DirectBlockDelinker(FEXCore::Core::CpuStateFrame *Frame, FEXCore::Co auto LinkerAddress = Frame->Pointers.Common.ExitFunctionLinker; uintptr_t branch = (uintptr_t)(Record) - 8; FEXCore::ARMEmitter::Emitter emit((uint8_t*)(branch), 8); - FEXCore::ARMEmitter::ForwardLabel l_BranchHost; + FEXCore::ARMEmitter::SingleUseForwardLabel l_BranchHost; emit.ldr(FEXCore::ARMEmitter::XReg::x0, &l_BranchHost); emit.blr(FEXCore::ARMEmitter::Reg::r0); emit.Bind(&l_BranchHost); diff --git a/FEXCore/Source/Interface/Core/JIT/Arm64/MemoryOps.cpp b/FEXCore/Source/Interface/Core/JIT/Arm64/MemoryOps.cpp index b85e14c82e..40472ac20a 100644 --- a/FEXCore/Source/Interface/Core/JIT/Arm64/MemoryOps.cpp +++ b/FEXCore/Source/Interface/Core/JIT/Arm64/MemoryOps.cpp @@ -175,7 +175,7 @@ DEF_OP(LoadRegister) { if (HostSupportsSVE256) { const auto regOffs = Op->Offset & 31; - ARMEmitter::ForwardLabel DataLocation; + ARMEmitter::SingleUseForwardLabel DataLocation; const auto LoadPredicate = [this, &DataLocation] { const auto Predicate = ARMEmitter::PReg::p0; adr(TMP1, &DataLocation); @@ -184,7 +184,7 @@ DEF_OP(LoadRegister) { }; const auto EmitData = [this, &DataLocation](uint32_t Value) { - ARMEmitter::ForwardLabel PastConstant; + ARMEmitter::SingleUseForwardLabel PastConstant; b(&PastConstant); Bind(&DataLocation); dc32(Value); @@ -364,7 +364,7 @@ DEF_OP(StoreRegister) { const auto regOffs = Op->Offset & 31; // Compartmentalized setting up of the predicate for the cases that need it. - ARMEmitter::ForwardLabel DataLocation; + ARMEmitter::SingleUseForwardLabel DataLocation; const auto LoadPredicate = [this, &DataLocation] { const auto Predicate = ARMEmitter::PReg::p0; adr(TMP1, &DataLocation); @@ -377,7 +377,7 @@ DEF_OP(StoreRegister) { // It's helpful to treat LoadPredicate and EmitData as a prologue and epilogue // respectfully. const auto EmitData = [this, &DataLocation](uint32_t Data) { - ARMEmitter::ForwardLabel PastConstant; + ARMEmitter::SingleUseForwardLabel PastConstant; b(&PastConstant); Bind(&DataLocation); dc32(Data); @@ -1715,8 +1715,8 @@ DEF_OP(MemSet) { // // Counter is decremented regardless. - ARMEmitter::ForwardLabel BackwardImpl{}; - ARMEmitter::ForwardLabel Done{}; + ARMEmitter::SingleUseForwardLabel BackwardImpl{}; + ARMEmitter::SingleUseForwardLabel Done{}; mov(TMP1, Length.X()); if (Op->Prefix.IsInvalid()) { @@ -1789,7 +1789,7 @@ DEF_OP(MemSet) { const int32_t SizeDirection = Size * Direction; ARMEmitter::BackwardLabel AgainInternal{}; - ARMEmitter::ForwardLabel DoneInternal{}; + ARMEmitter::SingleUseForwardLabel DoneInternal{}; // Early exit if zero count. cbz(ARMEmitter::Size::i64Bit, TMP1, &DoneInternal); @@ -1895,8 +1895,8 @@ DEF_OP(MemCpy) { // // Counter is decremented regardless. - ARMEmitter::ForwardLabel BackwardImpl{}; - ARMEmitter::ForwardLabel Done{}; + ARMEmitter::SingleUseForwardLabel BackwardImpl{}; + ARMEmitter::SingleUseForwardLabel Done{}; mov(TMP1, Length.X()); if (Op->PrefixDest.IsInvalid()) { @@ -2050,7 +2050,7 @@ DEF_OP(MemCpy) { const int32_t SizeDirection = Size * Direction; ARMEmitter::BackwardLabel AgainInternal{}; - ARMEmitter::ForwardLabel DoneInternal{}; + ARMEmitter::SingleUseForwardLabel DoneInternal{}; // Early exit if zero count. cbz(ARMEmitter::Size::i64Bit, TMP1, &DoneInternal);