From aa1076d12b38d3fee83c14d1a99db83898453b4d Mon Sep 17 00:00:00 2001
From: Ryan Houdek <Sonicadvance1@gmail.com>
Date: Fri, 20 Dec 2024 12:12:24 -0800
Subject: [PATCH] InstCountCI: Adds more LRCPC2 tests that are missed

We weren't testing 64-bit variants, and we also weren't testing 8-bit
and 16-bit loadstores. Add some more to ensure we are hitting these.
---
 .../FEXOpt/MultiInst_TSO.json                 | 91 +++++++++++++++++++
 .../FEXOpt/MultiInst_TSO_32bit.json           | 60 ++++++++++++
 2 files changed, 151 insertions(+)
 create mode 100644 unittests/InstructionCountCI/FEXOpt/MultiInst_TSO.json

diff --git a/unittests/InstructionCountCI/FEXOpt/MultiInst_TSO.json b/unittests/InstructionCountCI/FEXOpt/MultiInst_TSO.json
new file mode 100644
index 0000000000..cc729583b1
--- /dev/null
+++ b/unittests/InstructionCountCI/FEXOpt/MultiInst_TSO.json
@@ -0,0 +1,91 @@
+{
+  "Features": {
+    "Bitness": 64,
+    "EnabledHostFeatures": [
+      "TSO",
+      "LRCPC",
+      "LRCPC2",
+      "FLAGM",
+      "FLAGM2"
+    ],
+    "DisabledHostFeatures": [
+      "SVE128",
+      "SVE256",
+      "RPRES",
+      "AFP"
+    ]
+  },
+  "Comment": [
+    "These are instruction combinations that could be more optimal if FEX optimized for them"
+  ],
+  "Instructions": {
+    "Load variables from memory": {
+      "x86InstructionCount": 6,
+      "ExpectedInstructionCount": 18,
+      "Comment": [
+        "Just to ensure small atomic offset loads are using LRCPC2"
+      ],
+      "x86Insts": [
+        "mov edi, [rcx]",
+        "mov edx, [rcx + 4]",
+        "mov rbx, [rcx + 8]",
+        "mov rsi, [rcx + 16]",
+        "mov ax, [rcx + 24]",
+        "mov bl, [rcx + 26]"
+      ],
+      "ExpectedArm64ASM": [
+        "ldapur w11, [x7]",
+        "nop",
+        "add x20, x7, #0x4 (4)",
+        "ldapur w5, [x20]",
+        "nop",
+        "add x20, x7, #0x8 (8)",
+        "ldapur x6, [x20]",
+        "nop",
+        "add x20, x7, #0x10 (16)",
+        "ldapur x10, [x20]",
+        "nop",
+        "add x20, x7, #0x18 (24)",
+        "ldapurh w20, [x20]",
+        "nop",
+        "bfxil x4, x20, #0, #16",
+        "add x20, x7, #0x1a (26)",
+        "ldapurb w20, [x20]",
+        "bfxil x6, x20, #0, #8"
+      ]
+    },
+    "Store variables to memory": {
+      "x86InstructionCount": 6,
+      "ExpectedInstructionCount": 16,
+      "Comment": [
+        "Just to ensure small atomic offset stores are using LRCPC2"
+      ],
+      "x86Insts": [
+        "mov [rcx], edi",
+        "mov [rcx + 4], edx",
+        "mov [rcx + 8], rbx",
+        "mov [rcx + 16], rsi",
+        "mov [rcx + 24], ax",
+        "mov [rcx + 26], bl"
+      ],
+      "ExpectedArm64ASM": [
+        "nop",
+        "stlur w11, [x7]",
+        "add x20, x7, #0x4 (4)",
+        "nop",
+        "stlur w5, [x20]",
+        "add x20, x7, #0x8 (8)",
+        "nop",
+        "stlur x6, [x20]",
+        "add x20, x7, #0x10 (16)",
+        "nop",
+        "stlur x10, [x20]",
+        "add x20, x7, #0x18 (24)",
+        "nop",
+        "stlurh w4, [x20]",
+        "add x20, x7, #0x1a (26)",
+        "stlurb w6, [x20]"
+      ]
+    }
+  }
+}
diff --git a/unittests/InstructionCountCI/FEXOpt/MultiInst_TSO_32bit.json b/unittests/InstructionCountCI/FEXOpt/MultiInst_TSO_32bit.json
index b614599fb6..aeabad4300 100644
--- a/unittests/InstructionCountCI/FEXOpt/MultiInst_TSO_32bit.json
+++ b/unittests/InstructionCountCI/FEXOpt/MultiInst_TSO_32bit.json
@@ -65,6 +65,66 @@
         "subs w26, w4, w20",
         "mov x4, x26"
       ]
+    },
+    "Load variables from memory": {
+      "x86InstructionCount": 4,
+      "ExpectedInstructionCount": 16,
+      "Comment": [
+        "Just to ensure small atomic offset loads are using LRCPC2"
+      ],
+      "x86Insts": [
+        "mov edi, [ecx]",
+        "mov edx, [ecx + 4]",
+        "mov ax, [ecx + 24]",
+        "mov bl, [ecx + 26]"
+      ],
+      "ExpectedArm64ASM": [
+        "mov w20, w7",
+        "ldapur w11, [x20]",
+        "nop",
+        "add x20, x7, #0x4 (4)",
+        "mov w20, w20",
+        "ldapur w5, [x20]",
+        "nop",
+        "add x20, x7, #0x18 (24)",
+        "mov w20, w20",
+        "ldapurh w20, [x20]",
+        "nop",
+        "bfxil x4, x20, #0, #16",
+        "add x20, x7, #0x1a (26)",
+        "mov w20, w20",
+        "ldapurb w20, [x20]",
+        "bfxil x6, x20, #0, #8"
+      ]
+    },
+    "Store variables to memory": {
+      "x86InstructionCount": 4,
+      "ExpectedInstructionCount": 14,
+      "Comment": [
+        "Just to ensure small atomic offset stores are using LRCPC2"
+      ],
+      "x86Insts": [
+        "mov [ecx], edi",
+        "mov [ecx + 4], edx",
+        "mov [ecx + 24], ax",
+        "mov [ecx + 26], bl"
+      ],
+      "ExpectedArm64ASM": [
+        "mov w20, w7",
+        "nop",
+        "stlur w11, [x20]",
+        "add x20, x7, #0x4 (4)",
+        "mov w20, w20",
+        "nop",
+        "stlur w5, [x20]",
+        "add x20, x7, #0x18 (24)",
+        "mov w20, w20",
+        "nop",
+        "stlurh w4, [x20]",
+        "add x20, x7, #0x1a (26)",
+        "mov w20, w20",
+        "stlurb w6, [x20]"
+      ]
     }
   }
 }