From 831346e9c0335e39cac542f2a47a68561463219d Mon Sep 17 00:00:00 2001 From: Alyssa Rosenzweig Date: Thu, 4 Apr 2024 10:43:49 -0400 Subject: [PATCH 1/4] OpcodeDispatcher: calculate deferred flags before RMW on NZCV otherwise we might have the wrong input NZCV. Signed-off-by: Alyssa Rosenzweig --- FEXCore/Source/Interface/Core/OpcodeDispatcher.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/FEXCore/Source/Interface/Core/OpcodeDispatcher.h b/FEXCore/Source/Interface/Core/OpcodeDispatcher.h index f43383dd28..4fdff3e0e0 100644 --- a/FEXCore/Source/Interface/Core/OpcodeDispatcher.h +++ b/FEXCore/Source/Interface/Core/OpcodeDispatcher.h @@ -1293,6 +1293,8 @@ friend class FEXCore::IR::PassManager; // Set flag tracking to prepare for a read-modify-write operation on NZCV. void HandleNZCV_RMW(uint32_t _PossiblySetNZCVBits = ~0) { + CalculateDeferredFlags(); + if (NZCVDirty && CachedNZCV) _StoreNZCV(CachedNZCV); From 31522bc30b0491e3c99d0bc4c6e7fa60a49aa67f Mon Sep 17 00:00:00 2001 From: Alyssa Rosenzweig Date: Fri, 5 Apr 2024 16:16:13 -0400 Subject: [PATCH 2/4] OpcodeDispatcher: optimize shl flag This is something the new shift flag code will do. Backporting the opt since that's stalled and this reduces the diff. Signed-off-by: Alyssa Rosenzweig --- FEXCore/Source/Interface/Core/OpcodeDispatcher/Flags.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/FEXCore/Source/Interface/Core/OpcodeDispatcher/Flags.cpp b/FEXCore/Source/Interface/Core/OpcodeDispatcher/Flags.cpp index 52372af0d6..f3639858c7 100644 --- a/FEXCore/Source/Interface/Core/OpcodeDispatcher/Flags.cpp +++ b/FEXCore/Source/Interface/Core/OpcodeDispatcher/Flags.cpp @@ -587,7 +587,7 @@ void OpDispatchBuilder::CalculateFlags_ShiftLeft(uint8_t SrcSize, OrderedNode *R // Extract the last bit shifted in to CF auto Size = _Constant(SrcSize * 8); - auto ShiftAmt = _Sub(OpSize, Size, Src2); + auto ShiftAmt = SrcSize >= 4 ? _Neg(OpSize, Src2) : _Sub(OpSize, Size, Src2); auto LastBit = _Lshr(OpSize, Src1, ShiftAmt); SetRFLAG(LastBit, 0, true); From 56df6694184d85accad5b3bbf48ac946e509fea4 Mon Sep 17 00:00:00 2001 From: Alyssa Rosenzweig Date: Thu, 4 Apr 2024 10:51:02 -0400 Subject: [PATCH 3/4] unittests: add test for deferred flags + shift with cl=0 this failed on an earlier version of the series that otherwise passed ci. Signed-off-by: Alyssa Rosenzweig --- unittests/ASM/FEX_bugs/nzcv_rmw.asm | 38 +++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) create mode 100644 unittests/ASM/FEX_bugs/nzcv_rmw.asm diff --git a/unittests/ASM/FEX_bugs/nzcv_rmw.asm b/unittests/ASM/FEX_bugs/nzcv_rmw.asm new file mode 100644 index 0000000000..a3652a9eaf --- /dev/null +++ b/unittests/ASM/FEX_bugs/nzcv_rmw.asm @@ -0,0 +1,38 @@ +%ifdef CONFIG +{ + "RegData": { + "RAX": "0xcafe" + } +} +%endif + +; FEX had a bug where an NZCV RMW would fail to calculate previously deferred +; flags, resulting in garbage flag values + +; First zero NZCV and break visibility +mov rax, 0 +add rax, 1 +jz fexi_fexi_im_so_broken + +jmp .begin +.begin: + +; NZCV is zero. Set it to something nonzero with a deferred flag operation. +mov rax, 0 +popcnt rax, rax + +; Now do a variable shift that preserves flags. This would clear ZF if not for +; the condition on the shift flags. +mov rbx, 100 +mov cl, 0 +sar rbx, cl + +; ZF should still be set. +jnz fexi_fexi_im_so_broken + +mov rax, 0xcafe +hlt + +fexi_fexi_im_so_broken: +mov rax, 0xdead +hlt From 77e0ba2460328bba27372a6e9259441851509522 Mon Sep 17 00:00:00 2001 From: Alyssa Rosenzweig Date: Fri, 5 Apr 2024 16:16:58 -0400 Subject: [PATCH 4/4] InstCountCI: Update Signed-off-by: Alyssa Rosenzweig --- .../InstructionCountCI/FlagM/PrimaryGroup.json | 14 ++++++-------- unittests/InstructionCountCI/FlagM/Secondary.json | 14 ++++++-------- unittests/InstructionCountCI/PrimaryGroup.json | 14 ++++++-------- unittests/InstructionCountCI/Secondary.json | 14 ++++++-------- 4 files changed, 24 insertions(+), 32 deletions(-) diff --git a/unittests/InstructionCountCI/FlagM/PrimaryGroup.json b/unittests/InstructionCountCI/FlagM/PrimaryGroup.json index 1bed89c1e2..a43622a4f9 100644 --- a/unittests/InstructionCountCI/FlagM/PrimaryGroup.json +++ b/unittests/InstructionCountCI/FlagM/PrimaryGroup.json @@ -1955,17 +1955,16 @@ ] }, "shl eax, cl": { - "ExpectedInstructionCount": 13, + "ExpectedInstructionCount": 12, "Comment": "GROUP2 0xd3 /4", "ExpectedArm64ASM": [ "mov w20, w4", "mov w21, w5", "lsl w22, w20, w21", "mov x4, x22", - "cbz x21, #+0x24", + "cbz x21, #+0x20", "tst w22, w22", - "mov w23, #0x20", - "sub w21, w23, w21", + "neg w21, w21", "lsr w21, w20, w21", "rmif x21, #63, #nzCv", "mov x26, x22", @@ -1974,17 +1973,16 @@ ] }, "shl rax, cl": { - "ExpectedInstructionCount": 13, + "ExpectedInstructionCount": 12, "Comment": "GROUP2 0xd3 /4", "ExpectedArm64ASM": [ "mov x20, x4", "mov x21, x5", "lsl x22, x20, x21", "mov x4, x22", - "cbz x21, #+0x24", + "cbz x21, #+0x20", "tst x22, x22", - "mov w23, #0x40", - "sub x21, x23, x21", + "neg x21, x21", "lsr x21, x20, x21", "rmif x21, #63, #nzCv", "mov x26, x22", diff --git a/unittests/InstructionCountCI/FlagM/Secondary.json b/unittests/InstructionCountCI/FlagM/Secondary.json index 616b060106..b3bef13de8 100644 --- a/unittests/InstructionCountCI/FlagM/Secondary.json +++ b/unittests/InstructionCountCI/FlagM/Secondary.json @@ -806,7 +806,7 @@ ] }, "shld eax, ebx, cl": { - "ExpectedInstructionCount": 21, + "ExpectedInstructionCount": 20, "Comment": "0x0f 0xad", "ExpectedArm64ASM": [ "mov w20, w7", @@ -821,10 +821,9 @@ "csel x20, x21, x20, eq", "mov w4, w20", "msr nzcv, x23", - "cbz x22, #+0x24", + "cbz x22, #+0x20", "tst w20, w20", - "mov w23, #0x20", - "sub w22, w23, w22", + "neg w22, w22", "lsr w22, w21, w22", "rmif x22, #63, #nzCv", "mov x26, x20", @@ -833,7 +832,7 @@ ] }, "shld rax, rbx, cl": { - "ExpectedInstructionCount": 20, + "ExpectedInstructionCount": 19, "Comment": "0x0f 0xad", "ExpectedArm64ASM": [ "mov x20, x4", @@ -847,10 +846,9 @@ "csel x22, x20, x22, eq", "mov x4, x22", "msr nzcv, x23", - "cbz x21, #+0x24", + "cbz x21, #+0x20", "tst x22, x22", - "mov w23, #0x40", - "sub x21, x23, x21", + "neg x21, x21", "lsr x21, x20, x21", "rmif x21, #63, #nzCv", "mov x26, x22", diff --git a/unittests/InstructionCountCI/PrimaryGroup.json b/unittests/InstructionCountCI/PrimaryGroup.json index e17cc53126..31b8ae7f6f 100644 --- a/unittests/InstructionCountCI/PrimaryGroup.json +++ b/unittests/InstructionCountCI/PrimaryGroup.json @@ -2401,17 +2401,16 @@ ] }, "shl eax, cl": { - "ExpectedInstructionCount": 17, + "ExpectedInstructionCount": 16, "Comment": "GROUP2 0xd3 /4", "ExpectedArm64ASM": [ "mov w20, w4", "mov w21, w5", "lsl w22, w20, w21", "mov x4, x22", - "cbz x21, #+0x34", + "cbz x21, #+0x30", "tst w22, w22", - "mov w23, #0x20", - "sub w21, w23, w21", + "neg w21, w21", "lsr w21, w20, w21", "ubfx x21, x21, #0, #1", "mrs x23, nzcv", @@ -2424,17 +2423,16 @@ ] }, "shl rax, cl": { - "ExpectedInstructionCount": 17, + "ExpectedInstructionCount": 16, "Comment": "GROUP2 0xd3 /4", "ExpectedArm64ASM": [ "mov x20, x4", "mov x21, x5", "lsl x22, x20, x21", "mov x4, x22", - "cbz x21, #+0x34", + "cbz x21, #+0x30", "tst x22, x22", - "mov w23, #0x40", - "sub x21, x23, x21", + "neg x21, x21", "lsr x21, x20, x21", "ubfx x21, x21, #0, #1", "mrs x23, nzcv", diff --git a/unittests/InstructionCountCI/Secondary.json b/unittests/InstructionCountCI/Secondary.json index 1b224644ad..04878369de 100644 --- a/unittests/InstructionCountCI/Secondary.json +++ b/unittests/InstructionCountCI/Secondary.json @@ -1620,7 +1620,7 @@ ] }, "shld eax, ebx, cl": { - "ExpectedInstructionCount": 25, + "ExpectedInstructionCount": 24, "Comment": "0x0f 0xad", "ExpectedArm64ASM": [ "mov w20, w7", @@ -1635,10 +1635,9 @@ "csel x20, x21, x20, eq", "mov w4, w20", "msr nzcv, x23", - "cbz x22, #+0x34", + "cbz x22, #+0x30", "tst w20, w20", - "mov w23, #0x20", - "sub w22, w23, w22", + "neg w22, w22", "lsr w22, w21, w22", "ubfx x22, x22, #0, #1", "mrs x23, nzcv", @@ -1651,7 +1650,7 @@ ] }, "shld rax, rbx, cl": { - "ExpectedInstructionCount": 24, + "ExpectedInstructionCount": 23, "Comment": "0x0f 0xad", "ExpectedArm64ASM": [ "mov x20, x4", @@ -1665,10 +1664,9 @@ "csel x22, x20, x22, eq", "mov x4, x22", "msr nzcv, x23", - "cbz x21, #+0x34", + "cbz x21, #+0x30", "tst x22, x22", - "mov w23, #0x40", - "sub x21, x23, x21", + "neg x21, x21", "lsr x21, x20, x21", "ubfx x21, x21, #0, #1", "mrs x23, nzcv",