From 824f12268056b05d24a1ae7fc1add759b56d5280 Mon Sep 17 00:00:00 2001
From: Ryan Houdek <Sonicadvance1@gmail.com>
Date: Thu, 21 Mar 2024 20:13:15 -0700
Subject: [PATCH 1/2] OpcodeDispatcher: Fixes 32-bit mode LOOP RCX register
 usage

In 64-bit mode, the LOOP instruction's RCX register usage is 64-bit or
32-bit.
In 32-bit mode, the LOOP instruction's RCX register usage is 32-bit or
16-bit.

FEX wasn't handling the 16-bit case at all which was causing the LOOP
instruction to effectively always operate at 32-bit size. Now this is
correctly supported, and it also stops treating the operation as 64-bit.
---
 FEXCore/Source/Interface/Core/OpcodeDispatcher.cpp | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/FEXCore/Source/Interface/Core/OpcodeDispatcher.cpp b/FEXCore/Source/Interface/Core/OpcodeDispatcher.cpp
index 0a7cca0d28..1f9bd1f4cf 100644
--- a/FEXCore/Source/Interface/Core/OpcodeDispatcher.cpp
+++ b/FEXCore/Source/Interface/Core/OpcodeDispatcher.cpp
@@ -1058,13 +1058,19 @@ void OpDispatchBuilder::LoopOp(OpcodeArgs) {
 
   BlockSetRIP = true;
   uint32_t SrcSize = (Op->Flags & X86Tables::DecodeFlags::FLAG_ADDRESS_SIZE) ? 4 : 8;
+  auto OpSize = SrcSize == 8 ? OpSize::i64Bit : OpSize::i32Bit;
+
+  if (!CTX->Config.Is64BitMode) {
+    // RCX size is 32-bit or 16-bit when executing in 32-bit mode.
+    SrcSize >>= 1;
+    OpSize = OpSize::i32Bit;
+  }
 
   LOGMAN_THROW_A_FMT(Op->Src[1].IsLiteral(), "Src1 needs to be literal here");
 
   uint64_t Target = Op->PC + Op->InstSize + Op->Src[1].Data.Literal.Value;
 
-  auto OpSize = SrcSize == 8 ? OpSize::i64Bit : OpSize::i32Bit;
-  OrderedNode *CondReg = LoadSource(GPRClass, Op, Op->Src[0], Op->Flags);
+  OrderedNode *CondReg = LoadSource_WithOpSize(GPRClass, Op, Op->Src[0], SrcSize, Op->Flags);
   CondReg = _Sub(OpSize, CondReg, _Constant(SrcSize * 8, 1));
   StoreResult(GPRClass, Op, Op->Src[0], CondReg, -1);
 

From 9ab930cb260c404d26c212e7bde3657e20a5a351 Mon Sep 17 00:00:00 2001
From: Ryan Houdek <Sonicadvance1@gmail.com>
Date: Thu, 21 Mar 2024 20:18:43 -0700
Subject: [PATCH 2/2] unittests/ASM: Adds tests for loop instruction address
 size overrides

32-bit test would fail if the 16-bit address size override wasn't
respected.
---
 .../FEX_bugs/LoopAddressSizeCheck.asm         | 27 +++++++++++++++++++
 .../ASM/FEX_bugs/LoopAddressSizeCheck.asm     | 25 +++++++++++++++++
 2 files changed, 52 insertions(+)
 create mode 100644 unittests/32Bit_ASM/FEX_bugs/LoopAddressSizeCheck.asm
 create mode 100644 unittests/ASM/FEX_bugs/LoopAddressSizeCheck.asm

diff --git a/unittests/32Bit_ASM/FEX_bugs/LoopAddressSizeCheck.asm b/unittests/32Bit_ASM/FEX_bugs/LoopAddressSizeCheck.asm
new file mode 100644
index 0000000000..2c1937049e
--- /dev/null
+++ b/unittests/32Bit_ASM/FEX_bugs/LoopAddressSizeCheck.asm
@@ -0,0 +1,27 @@
+%ifdef CONFIG
+{
+  "RegData": {
+    "RAX": "0x0000000000000001",
+    "RBX": "0x0000000000010001"
+  },
+  "Mode": "32BIT"
+}
+%endif
+
+; FEX-Emu had a bug where a16 loop instructions weren't treating the input RCX register as 16-bit.
+; Effectively always treating it as 32-bit.
+; Little test that operates at 16-bit and 32-bit sizes to ensure it is correctly handled.
+mov eax, 0
+mov ebx, 0
+mov ecx, 0x0001_0001
+
+.test:
+inc eax
+a16 loop .test
+
+mov ecx, 0x0001_0001
+.test2:
+inc ebx
+a32 loop .test2
+
+hlt
diff --git a/unittests/ASM/FEX_bugs/LoopAddressSizeCheck.asm b/unittests/ASM/FEX_bugs/LoopAddressSizeCheck.asm
new file mode 100644
index 0000000000..92983c1ec7
--- /dev/null
+++ b/unittests/ASM/FEX_bugs/LoopAddressSizeCheck.asm
@@ -0,0 +1,25 @@
+%ifdef CONFIG
+{
+  "RegData": {
+    "RAX": "0x0000000000010001",
+    "RBX": "0x0000000000000001"
+  }
+}
+%endif
+
+; FEX-Emu had a bug in the 32-bit implementation of LOOP where it didn't handle 16-bit RCX correctly.
+; For test coverage on the 64-bit side, ensure that both 64-bit and 32-bit operation works correctly.
+mov rax, 0
+mov rbx, 0
+mov rcx, 0x0001_0001
+
+.test:
+inc rax
+a64 loop .test
+
+mov rcx, 0x1_0000_0001
+.test2:
+inc rbx
+a32 loop .test2
+
+hlt