From 61758ea47dcfcb49372ccda78342d65dfd5464bf Mon Sep 17 00:00:00 2001 From: Alyssa Rosenzweig Date: Wed, 27 Mar 2024 12:25:32 -0400 Subject: [PATCH 1/2] OpcodeDispatcher: eliminate branch in cmpxchg pair In the old case: * if we take the branch, 1 instruction * if we don't take the branch, 3 instruction * branch predictor fun * 3 instructions of icache pressure In the new case: * unconditionally 2 instructions * no branch predictor dependence * 2 instructions of icache pressure This should not be non-neglibly worse, and it simplifies things for RA. Signed-off-by: Alyssa Rosenzweig --- .../Interface/Core/OpcodeDispatcher.cpp | 24 +++++++------------ 1 file changed, 8 insertions(+), 16 deletions(-) diff --git a/FEXCore/Source/Interface/Core/OpcodeDispatcher.cpp b/FEXCore/Source/Interface/Core/OpcodeDispatcher.cpp index afff1afb65..12091359d6 100644 --- a/FEXCore/Source/Interface/Core/OpcodeDispatcher.cpp +++ b/FEXCore/Source/Interface/Core/OpcodeDispatcher.cpp @@ -4270,23 +4270,15 @@ void OpDispatchBuilder::CMPXCHGPairOp(OpcodeArgs) { SetRFLAG(ZFResult); CalculateDeferredFlags(); - auto CondJump_ = CondJump(ZFResult); - - // Make sure to start a new block after ending this one - auto JumpTarget = CreateNewCodeBlockAfter(GetCurrentBlock()); - SetFalseJumpTarget(CondJump_, JumpTarget); - SetCurrentCodeBlock(JumpTarget); - StartNewBlock(); - - StoreGPRRegister(X86State::REG_RAX, Result_Lower); - StoreGPRRegister(X86State::REG_RDX, Result_Upper); + auto UpdateIfNotZF = [this](auto Reg, auto Value) { + // Always use 64-bit csel to preserve existing upper bits. If we have a + // 32-bit cmpxchg in a 64-bit context, Value will be zeroed in upper bits. + StoreGPRRegister(Reg, _NZCVSelect(OpSize::i64Bit, CondClassType{COND_NEQ}, + Value, LoadGPRRegister(Reg))); + }; - auto Jump_ = Jump(); - auto NextJumpTarget = CreateNewCodeBlockAfter(JumpTarget); - SetJumpTarget(Jump_, NextJumpTarget); - SetTrueJumpTarget(CondJump_, NextJumpTarget); - SetCurrentCodeBlock(NextJumpTarget); - StartNewBlock(); + UpdateIfNotZF(X86State::REG_RAX, Result_Lower); + UpdateIfNotZF(X86State::REG_RDX, Result_Upper); } void OpDispatchBuilder::CreateJumpBlocks(fextl::vector const *Blocks) { From d1722ab11929022ca048cf121a16a0ce64b24fa9 Mon Sep 17 00:00:00 2001 From: Alyssa Rosenzweig Date: Wed, 27 Mar 2024 12:39:30 -0400 Subject: [PATCH 2/2] InstCountCI: Update Signed-off-by: Alyssa Rosenzweig --- .../InstructionCountCI/FlagM/HotBlocks_32Bit.json | 7 +++---- .../InstructionCountCI/FlagM/SecondaryGroup.json | 14 ++++++-------- unittests/InstructionCountCI/SecondaryGroup.json | 14 ++++++-------- 3 files changed, 15 insertions(+), 20 deletions(-) diff --git a/unittests/InstructionCountCI/FlagM/HotBlocks_32Bit.json b/unittests/InstructionCountCI/FlagM/HotBlocks_32Bit.json index 75cba2d486..e85848d4ef 100644 --- a/unittests/InstructionCountCI/FlagM/HotBlocks_32Bit.json +++ b/unittests/InstructionCountCI/FlagM/HotBlocks_32Bit.json @@ -80,7 +80,7 @@ ] }, "dxvk hotblock from MGRR": { - "ExpectedInstructionCount": 43, + "ExpectedInstructionCount": 42, "Comment": [ "Hottest block in Metal Gear Rising: Revengeance render thread" ], @@ -141,9 +141,8 @@ "cset x22, eq", "msr nzcv, x21", "rmif x22, #62, #nZcv", - "cbnz x22, #+0xc", - "mov w4, w20", - "mov w6, w12" + "csel x4, x20, x4, ne", + "csel x6, x12, x6, ne" ] }, "Psychonauts matrix swizzle": { diff --git a/unittests/InstructionCountCI/FlagM/SecondaryGroup.json b/unittests/InstructionCountCI/FlagM/SecondaryGroup.json index 1546fe1372..6d1e2e9639 100644 --- a/unittests/InstructionCountCI/FlagM/SecondaryGroup.json +++ b/unittests/InstructionCountCI/FlagM/SecondaryGroup.json @@ -644,7 +644,7 @@ ] }, "cmpxchg8b [rbp]": { - "ExpectedInstructionCount": 25, + "ExpectedInstructionCount": 24, "Comment": "GROUP9 0x0F 0xC7 /1", "ExpectedArm64ASM": [ "add x20, x9, #0x0 (0)", @@ -669,13 +669,12 @@ "cset x22, eq", "msr nzcv, x21", "rmif x22, #62, #nZcv", - "cbnz x22, #+0xc", - "mov x4, x20", - "mov x6, x30" + "csel x4, x20, x4, ne", + "csel x6, x30, x6, ne" ] }, "cmpxchg16b [rbp]": { - "ExpectedInstructionCount": 21, + "ExpectedInstructionCount": 20, "Comment": "GROUP9 0x0F 0xC7 /1", "ExpectedArm64ASM": [ "add x20, x9, #0x0 (0)", @@ -696,9 +695,8 @@ "cset x22, eq", "msr nzcv, x21", "rmif x22, #62, #nZcv", - "cbnz x22, #+0xc", - "mov x4, x20", - "mov x6, x30" + "csel x4, x20, x4, ne", + "csel x6, x30, x6, ne" ] }, "rdrand ax": { diff --git a/unittests/InstructionCountCI/SecondaryGroup.json b/unittests/InstructionCountCI/SecondaryGroup.json index 540e4c6a67..4e5cbc5a74 100644 --- a/unittests/InstructionCountCI/SecondaryGroup.json +++ b/unittests/InstructionCountCI/SecondaryGroup.json @@ -776,7 +776,7 @@ ] }, "cmpxchg8b [rbp]": { - "ExpectedInstructionCount": 25, + "ExpectedInstructionCount": 24, "Comment": "GROUP9 0x0F 0xC7 /1", "ExpectedArm64ASM": [ "add x20, x9, #0x0 (0)", @@ -801,13 +801,12 @@ "cset x22, eq", "bfi w21, w22, #30, #1", "msr nzcv, x21", - "cbnz x22, #+0xc", - "mov x4, x20", - "mov x6, x30" + "csel x4, x20, x4, ne", + "csel x6, x30, x6, ne" ] }, "cmpxchg16b [rbp]": { - "ExpectedInstructionCount": 21, + "ExpectedInstructionCount": 20, "Comment": "GROUP9 0x0F 0xC7 /1", "ExpectedArm64ASM": [ "add x20, x9, #0x0 (0)", @@ -828,9 +827,8 @@ "cset x22, eq", "bfi w21, w22, #30, #1", "msr nzcv, x21", - "cbnz x22, #+0xc", - "mov x4, x20", - "mov x6, x30" + "csel x4, x20, x4, ne", + "csel x6, x30, x6, ne" ] }, "rdrand ax": {