diff --git a/unittests/InstructionCountCI/AVX128/VEX_map1.json b/unittests/InstructionCountCI/AVX128/VEX_map1.json index 7f2416cf4a..9dc1091bde 100644 --- a/unittests/InstructionCountCI/AVX128/VEX_map1.json +++ b/unittests/InstructionCountCI/AVX128/VEX_map1.json @@ -505,7 +505,7 @@ ], "ExpectedArm64ASM": [ "ushr v2.4s, v16.4s, #31", - "ldr q3, [x28, #2448]", + "ldr q3, [x28, #2512]", "ushl v2.4s, v2.4s, v3.4s", "addv s2, v2.4s", "mov w4, v2.s[0]" @@ -519,7 +519,7 @@ "ExpectedArm64ASM": [ "ldr q2, [x28, #16]", "ushr v3.4s, v16.4s, #31", - "ldr q4, [x28, #2448]", + "ldr q4, [x28, #2512]", "ushl v3.4s, v3.4s, v4.4s", "addv s3, v3.4s", "mov w20, v3.s[0]", @@ -1168,7 +1168,7 @@ ], "ExpectedArm64ASM": [ "movi v2.2d, #0x0", - "ldr x0, [x28, #1992]", + "ldr x0, [x28, #2056]", "ldr q3, [x0, #16]", "tbl v16.16b, {v17.16b}, v3.16b", "str q2, [x28, #16]" @@ -1181,7 +1181,7 @@ ], "ExpectedArm64ASM": [ "movi v2.2d, #0x0", - "ldr x0, [x28, #1992]", + "ldr x0, [x28, #2056]", "ldr q3, [x0, #32]", "tbl v16.16b, {v17.16b}, v3.16b", "str q2, [x28, #16]" @@ -1194,7 +1194,7 @@ ], "ExpectedArm64ASM": [ "movi v2.2d, #0x0", - "ldr x0, [x28, #1992]", + "ldr x0, [x28, #2056]", "ldr q3, [x0, #48]", "tbl v16.16b, {v17.16b}, v3.16b", "str q2, [x28, #16]" @@ -1219,7 +1219,7 @@ ], "ExpectedArm64ASM": [ "ldr q2, [x28, #32]", - "ldr x0, [x28, #1992]", + "ldr x0, [x28, #2056]", "ldr q3, [x0, #16]", "tbl v16.16b, {v17.16b}, v3.16b", "tbl v2.16b, {v2.16b}, v3.16b", @@ -1233,7 +1233,7 @@ ], "ExpectedArm64ASM": [ "ldr q2, [x28, #32]", - "ldr x0, [x28, #1992]", + "ldr x0, [x28, #2056]", "ldr q3, [x0, #32]", "tbl v16.16b, {v17.16b}, v3.16b", "tbl v2.16b, {v2.16b}, v3.16b", @@ -1247,7 +1247,7 @@ ], "ExpectedArm64ASM": [ "ldr q2, [x28, #32]", - "ldr x0, [x28, #1992]", + "ldr x0, [x28, #2056]", "ldr q3, [x0, #48]", "tbl v16.16b, {v17.16b}, v3.16b", "tbl v2.16b, {v2.16b}, v3.16b", @@ -1272,7 +1272,7 @@ "Map 1 0b10 0x70 128-bit" ], "ExpectedArm64ASM": [ - "ldr x0, [x28, #1984]", + "ldr x0, [x28, #2048]", "ldr q2, [x0, #16]", "tbl v16.16b, {v17.16b}, v2.16b", "movi v2.2d, #0x0", @@ -1285,7 +1285,7 @@ "Map 1 0b10 0x70 128-bit" ], "ExpectedArm64ASM": [ - "ldr x0, [x28, #1984]", + "ldr x0, [x28, #2048]", "ldr q2, [x0, #32]", "tbl v16.16b, {v17.16b}, v2.16b", "movi v2.2d, #0x0", @@ -1298,7 +1298,7 @@ "Map 1 0b10 0x70 128-bit" ], "ExpectedArm64ASM": [ - "ldr x0, [x28, #1984]", + "ldr x0, [x28, #2048]", "ldr q2, [x0, #48]", "tbl v16.16b, {v17.16b}, v2.16b", "movi v2.2d, #0x0", @@ -1326,7 +1326,7 @@ ], "ExpectedArm64ASM": [ "ldr q2, [x28, #32]", - "ldr x0, [x28, #1984]", + "ldr x0, [x28, #2048]", "ldr q3, [x0, #16]", "tbl v16.16b, {v17.16b}, v3.16b", "tbl v2.16b, {v2.16b}, v3.16b", @@ -1340,7 +1340,7 @@ ], "ExpectedArm64ASM": [ "ldr q2, [x28, #32]", - "ldr x0, [x28, #1984]", + "ldr x0, [x28, #2048]", "ldr q3, [x0, #32]", "tbl v16.16b, {v17.16b}, v3.16b", "tbl v2.16b, {v2.16b}, v3.16b", @@ -1354,7 +1354,7 @@ ], "ExpectedArm64ASM": [ "ldr q2, [x28, #32]", - "ldr x0, [x28, #1984]", + "ldr x0, [x28, #2048]", "ldr q3, [x0, #48]", "tbl v16.16b, {v17.16b}, v3.16b", "tbl v2.16b, {v2.16b}, v3.16b", @@ -1379,7 +1379,7 @@ "Map 1 0b11 0x70 128-bit" ], "ExpectedArm64ASM": [ - "ldr x0, [x28, #1976]", + "ldr x0, [x28, #2040]", "ldr q2, [x0, #16]", "tbl v16.16b, {v17.16b}, v2.16b", "movi v2.2d, #0x0", @@ -1392,7 +1392,7 @@ "Map 1 0b11 0x70 128-bit" ], "ExpectedArm64ASM": [ - "ldr x0, [x28, #1976]", + "ldr x0, [x28, #2040]", "ldr q2, [x0, #32]", "tbl v16.16b, {v17.16b}, v2.16b", "movi v2.2d, #0x0", @@ -1405,7 +1405,7 @@ "Map 1 0b11 0x70 128-bit" ], "ExpectedArm64ASM": [ - "ldr x0, [x28, #1976]", + "ldr x0, [x28, #2040]", "ldr q2, [x0, #48]", "tbl v16.16b, {v17.16b}, v2.16b", "movi v2.2d, #0x0", @@ -1433,7 +1433,7 @@ ], "ExpectedArm64ASM": [ "ldr q2, [x28, #32]", - "ldr x0, [x28, #1976]", + "ldr x0, [x28, #2040]", "ldr q3, [x0, #16]", "tbl v16.16b, {v17.16b}, v3.16b", "tbl v2.16b, {v2.16b}, v3.16b", @@ -1447,7 +1447,7 @@ ], "ExpectedArm64ASM": [ "ldr q2, [x28, #32]", - "ldr x0, [x28, #1976]", + "ldr x0, [x28, #2040]", "ldr q3, [x0, #32]", "tbl v16.16b, {v17.16b}, v3.16b", "tbl v2.16b, {v2.16b}, v3.16b", @@ -1461,7 +1461,7 @@ ], "ExpectedArm64ASM": [ "ldr q2, [x28, #32]", - "ldr x0, [x28, #1976]", + "ldr x0, [x28, #2040]", "ldr q3, [x0, #48]", "tbl v16.16b, {v17.16b}, v3.16b", "tbl v2.16b, {v2.16b}, v3.16b", @@ -2384,7 +2384,7 @@ "Map 1 0b00 0xC6 128-bit" ], "ExpectedArm64ASM": [ - "ldr x0, [x28, #2000]", + "ldr x0, [x28, #2064]", "ldr q2, [x0, #16]", "tbl v16.16b, {v17.16b, v18.16b}, v2.16b", "movi v2.2d, #0x0", @@ -2399,7 +2399,7 @@ "ExpectedArm64ASM": [ "ldr q2, [x28, #32]", "ldr q3, [x28, #48]", - "ldr x0, [x28, #2000]", + "ldr x0, [x28, #2064]", "ldr q4, [x0, #16]", "tbl v16.16b, {v17.16b, v18.16b}, v4.16b", "tbl v2.16b, {v2.16b, v3.16b}, v4.16b", @@ -2412,7 +2412,7 @@ "Map 1 0b00 0xC6 128-bit" ], "ExpectedArm64ASM": [ - "ldr x0, [x28, #2000]", + "ldr x0, [x28, #2064]", "ldr q2, [x0, #32]", "tbl v16.16b, {v17.16b, v18.16b}, v2.16b", "movi v2.2d, #0x0", @@ -2427,7 +2427,7 @@ "ExpectedArm64ASM": [ "ldr q2, [x28, #32]", "ldr q3, [x28, #48]", - "ldr x0, [x28, #2000]", + "ldr x0, [x28, #2064]", "ldr q4, [x0, #32]", "tbl v16.16b, {v17.16b, v18.16b}, v4.16b", "tbl v2.16b, {v2.16b, v3.16b}, v4.16b", @@ -2440,7 +2440,7 @@ "Map 1 0b00 0xC6 128-bit" ], "ExpectedArm64ASM": [ - "ldr x0, [x28, #2000]", + "ldr x0, [x28, #2064]", "ldr q2, [x0, #48]", "tbl v16.16b, {v17.16b, v18.16b}, v2.16b", "movi v2.2d, #0x0", @@ -2455,7 +2455,7 @@ "ExpectedArm64ASM": [ "ldr q2, [x28, #32]", "ldr q3, [x28, #48]", - "ldr x0, [x28, #2000]", + "ldr x0, [x28, #2064]", "ldr q4, [x0, #48]", "tbl v16.16b, {v17.16b, v18.16b}, v4.16b", "tbl v2.16b, {v2.16b, v3.16b}, v4.16b", @@ -2725,79 +2725,127 @@ ] }, "vcvttss2si eax, xmm0": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 7, "Comment": [ "Map 1 0b10 0x2c 128-bit" ], "ExpectedArm64ASM": [ - "fcvtzs w4, s16" + "fcvtzs w20, s16", + "mov w21, #0x80000000", + "ldr s2, [x28, #2768]", + "mrs x22, nzcv", + "fcmp s2, s16", + "csel w4, w20, w21, gt", + "msr nzcv, x22" ] }, "vcvttss2si rax, xmm0": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 7, "Comment": [ "Map 1 0b10 0x2c 128-bit" ], "ExpectedArm64ASM": [ - "fcvtzs x4, s16" + "fcvtzs x20, s16", + "mov x21, #0x8000000000000000", + "ldr s2, [x28, #2800]", + "mrs x22, nzcv", + "fcmp s2, s16", + "csel x4, x20, x21, gt", + "msr nzcv, x22" ] }, "vcvttsd2si eax, xmm0": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 7, "Comment": [ "Map 1 0b11 0x2c 128-bit" ], "ExpectedArm64ASM": [ - "fcvtzs w4, d16" + "fcvtzs w20, d16", + "mov w21, #0x80000000", + "ldr d2, [x28, #2816]", + "mrs x22, nzcv", + "fcmp d2, d16", + "csel w4, w20, w21, gt", + "msr nzcv, x22" ] }, "vcvttsd2si rax, xmm0": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 7, "Comment": [ "Map 1 0b11 0x2c 128-bit" ], "ExpectedArm64ASM": [ - "fcvtzs x4, d16" + "fcvtzs x20, d16", + "mov x21, #0x8000000000000000", + "ldr d2, [x28, #2848]", + "mrs x22, nzcv", + "fcmp d2, d16", + "csel x4, x20, x21, gt", + "msr nzcv, x22" ] }, "vcvtss2si eax, xmm0": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 8, "Comment": [ "Map 1 0b10 0x2d 128-bit" ], "ExpectedArm64ASM": [ - "frinti s0, s16", - "fcvtzs w4, s0" + "frinti s2, s16", + "fcvtzs w20, s2", + "mov w21, #0x80000000", + "ldr s3, [x28, #2768]", + "mrs x22, nzcv", + "fcmp s3, s2", + "csel w4, w20, w21, gt", + "msr nzcv, x22" ] }, "vcvtss2si rax, xmm0": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 8, "Comment": [ "Map 1 0b10 0x2d 128-bit" ], "ExpectedArm64ASM": [ - "frinti s0, s16", - "fcvtzs x4, s0" + "frinti s2, s16", + "fcvtzs x20, s2", + "mov x21, #0x8000000000000000", + "ldr s3, [x28, #2800]", + "mrs x22, nzcv", + "fcmp s3, s2", + "csel x4, x20, x21, gt", + "msr nzcv, x22" ] }, "vcvtsd2si eax, xmm0": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 8, "Comment": [ "Map 1 0b11 0x2d 128-bit" ], "ExpectedArm64ASM": [ - "frinti d0, d16", - "fcvtzs x4, d0" + "frinti d2, d16", + "fcvtzs x20, d2", + "mov x21, #0x8000000000000000", + "ldr d3, [x28, #2848]", + "mrs x22, nzcv", + "fcmp d3, d2", + "csel x4, x20, x21, gt", + "msr nzcv, x22" ] }, "vcvtsd2si rax, xmm0": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 8, "Comment": [ "Map 1 0b11 0x2d 128-bit" ], "ExpectedArm64ASM": [ - "frinti d0, d16", - "fcvtzs x4, d0" + "frinti d2, d16", + "fcvtzs x20, d2", + "mov x21, #0x8000000000000000", + "ldr d3, [x28, #2848]", + "mrs x22, nzcv", + "fcmp d3, d2", + "csel x4, x20, x21, gt", + "msr nzcv, x22" ] }, "vucomiss xmm0, xmm1": { @@ -3099,51 +3147,75 @@ ] }, "vcvtps2dq xmm0, xmm1": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 9, "Comment": [ "Map 1 0b01 0x5b 128-bit" ], "ExpectedArm64ASM": [ - "frinti v16.4s, v17.4s", - "fcvtzs v16.4s, v16.4s", + "frinti v2.4s, v17.4s", + "ldr q3, [x28, #2864]", + "ldr q4, [x28, #2768]", + "fcvtzs v5.4s, v2.4s", + "fcmgt v2.4s, v4.4s, v2.4s", + "mov v16.16b, v2.16b", + "bsl v16.16b, v5.16b, v3.16b", "movi v2.2d, #0x0", "str q2, [x28, #16]" ] }, "vcvtps2dq ymm0, ymm1": { - "ExpectedInstructionCount": 6, + "ExpectedInstructionCount": 13, "Comment": [ "Map 1 0b01 0x5b 256-bit" ], "ExpectedArm64ASM": [ "ldr q2, [x28, #32]", - "frinti v16.4s, v17.4s", - "fcvtzs v16.4s, v16.4s", + "frinti v3.4s, v17.4s", + "ldr q4, [x28, #2864]", + "ldr q5, [x28, #2768]", + "fcvtzs v6.4s, v3.4s", + "fcmgt v3.4s, v5.4s, v3.4s", + "mov v16.16b, v3.16b", + "bsl v16.16b, v6.16b, v4.16b", "frinti v2.4s, v2.4s", - "fcvtzs v2.4s, v2.4s", + "fcvtzs v3.4s, v2.4s", + "fcmgt v2.4s, v5.4s, v2.4s", + "bsl v2.16b, v3.16b, v4.16b", "str q2, [x28, #16]" ] }, "vcvttps2dq xmm0, xmm1": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 8, "Comment": [ "Map 1 0b10 0x5b 128-bit" ], "ExpectedArm64ASM": [ - "fcvtzs v16.4s, v17.4s", + "ldr q2, [x28, #2864]", + "ldr q3, [x28, #2768]", + "fcvtzs v4.4s, v17.4s", + "fcmgt v3.4s, v3.4s, v17.4s", + "mov v16.16b, v3.16b", + "bsl v16.16b, v4.16b, v2.16b", "movi v2.2d, #0x0", "str q2, [x28, #16]" ] }, "vcvttps2dq ymm0, ymm1": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 11, "Comment": [ "Map 1 0b10 0x5b 256-bit" ], "ExpectedArm64ASM": [ "ldr q2, [x28, #32]", - "fcvtzs v16.4s, v17.4s", - "fcvtzs v2.4s, v2.4s", + "ldr q3, [x28, #2864]", + "ldr q4, [x28, #2768]", + "fcvtzs v5.4s, v17.4s", + "fcmgt v6.4s, v4.4s, v17.4s", + "mov v16.16b, v6.16b", + "bsl v16.16b, v5.16b, v3.16b", + "fcvtzs v5.4s, v2.4s", + "fcmgt v2.4s, v4.4s, v2.4s", + "bsl v2.16b, v5.16b, v3.16b", "str q2, [x28, #16]" ] }, @@ -3912,7 +3984,7 @@ "Map 1 0b01 0xd0 128-bit" ], "ExpectedArm64ASM": [ - "ldr q2, [x28, #2352]", + "ldr q2, [x28, #2416]", "eor v2.16b, v18.16b, v2.16b", "fadd v16.2d, v17.2d, v2.2d", "movi v2.2d, #0x0", @@ -3927,7 +3999,7 @@ "ExpectedArm64ASM": [ "ldr q2, [x28, #32]", "ldr q3, [x28, #48]", - "ldr q4, [x28, #2352]", + "ldr q4, [x28, #2416]", "eor v5.16b, v18.16b, v4.16b", "fadd v16.2d, v17.2d, v5.2d", "eor v3.16b, v3.16b, v4.16b", @@ -3941,7 +4013,7 @@ "Map 1 0b11 0xd0 128-bit" ], "ExpectedArm64ASM": [ - "ldr q2, [x28, #2320]", + "ldr q2, [x28, #2384]", "eor v2.16b, v18.16b, v2.16b", "fadd v16.4s, v17.4s, v2.4s", "movi v2.2d, #0x0", @@ -3956,7 +4028,7 @@ "ExpectedArm64ASM": [ "ldr q2, [x28, #32]", "ldr q3, [x28, #48]", - "ldr q4, [x28, #2320]", + "ldr q4, [x28, #2384]", "eor v5.16b, v18.16b, v4.16b", "fadd v16.4s, v17.4s, v5.4s", "eor v3.16b, v3.16b, v4.16b", @@ -4132,7 +4204,7 @@ "Map 1 0b01 0xd7 256-bit" ], "ExpectedArm64ASM": [ - "ldr q2, [x28, #2576]", + "ldr q2, [x28, #2640]", "cmlt v3.16b, v16.16b, #0", "and v2.16b, v3.16b, v2.16b", "addp v2.16b, v2.16b, v2.16b", @@ -4148,7 +4220,7 @@ ], "ExpectedArm64ASM": [ "ldr q2, [x28, #16]", - "ldr q3, [x28, #2576]", + "ldr q3, [x28, #2640]", "cmlt v4.16b, v16.16b, #0", "and v4.16b, v4.16b, v3.16b", "addp v4.16b, v4.16b, v4.16b", @@ -4649,32 +4721,46 @@ ] }, "vcvttpd2dq xmm0, xmm1": { - "ExpectedInstructionCount": 5, + "ExpectedInstructionCount": 11, "Comment": [ "Map 1 0b01 0xe6 128-bit" ], "ExpectedArm64ASM": [ - "frintz v16.2d, v17.2d", - "fcvtn v16.2s, v16.2d", - "fcvtzs v16.2s, v16.2s", + "ldr d2, [x28, #2864]", + "ldr q3, [x28, #2816]", + "frintz v4.2d, v17.2d", + "fcvtn v4.2s, v4.2d", + "fcvtzs v4.2s, v4.2s", + "fcmgt v3.2d, v3.2d, v17.2d", + "shrn v3.2s, v3.2d, #32", + "mov v16.16b, v3.16b", + "bsl v16.16b, v4.16b, v2.16b", "movi v2.2d, #0x0", "str q2, [x28, #16]" ] }, "vcvttpd2dq xmm0, ymm1": { - "ExpectedInstructionCount": 10, + "ExpectedInstructionCount": 18, "Comment": [ "Map 1 0b01 0xe6 256-bit" ], "ExpectedArm64ASM": [ "ldr q2, [x28, #32]", - "frintz v3.2d, v17.2d", - "fcvtn v3.2s, v3.2d", - "fcvtzs v3.2s, v3.2s", - "frintz v2.2d, v2.2d", - "fcvtn v2.2s, v2.2d", - "fcvtzs v2.2s, v2.2s", - "zip1 v16.2d, v3.2d, v2.2d", + "ldr q3, [x28, #2864]", + "ldr q4, [x28, #2816]", + "frintz v5.2d, v17.2d", + "fcvtn v5.2s, v5.2d", + "fcvtzs v5.2s, v5.2s", + "fcmgt v6.2d, v4.2d, v17.2d", + "shrn v6.2s, v6.2d, #32", + "bif v5.16b, v3.16b, v6.16b", + "frintz v6.2d, v2.2d", + "fcvtn v6.2s, v6.2d", + "fcvtzs v6.2s, v6.2s", + "fcmgt v2.2d, v4.2d, v2.2d", + "shrn v2.2s, v2.2d, #32", + "bsl v2.16b, v6.16b, v3.16b", + "zip1 v16.2d, v5.2d, v2.2d", "movi v2.2d, #0x0", "str q2, [x28, #16]" ] @@ -4705,31 +4791,48 @@ ] }, "vcvtpd2dq xmm0, xmm1": { - "ExpectedInstructionCount": 5, + "ExpectedInstructionCount": 12, "Comment": [ "Map 1 0b11 0xe6 128-bit" ], "ExpectedArm64ASM": [ - "frinti v16.2d, v17.2d", - "fcvtn v16.2s, v16.2d", - "fcvtzs v16.2s, v16.2s", + "frinti v2.2d, v17.2d", + "ldr d3, [x28, #2864]", + "ldr q4, [x28, #2816]", + "frintz v5.2d, v2.2d", + "fcvtn v5.2s, v5.2d", + "fcvtzs v5.2s, v5.2s", + "fcmgt v2.2d, v4.2d, v2.2d", + "shrn v2.2s, v2.2d, #32", + "mov v16.16b, v2.16b", + "bsl v16.16b, v5.16b, v3.16b", "movi v2.2d, #0x0", "str q2, [x28, #16]" ] }, "vcvtpd2dq xmm0, ymm1": { - "ExpectedInstructionCount": 10, + "ExpectedInstructionCount": 20, "Comment": [ "Map 1 0b11 0xe6 256-bit" ], "ExpectedArm64ASM": [ "ldr q2, [x28, #32]", "frinti v3.2d, v17.2d", - "fcvtn v3.2s, v3.2d", - "fcvtzs v3.2s, v3.2s", + "ldr q4, [x28, #2864]", + "ldr q5, [x28, #2816]", + "frintz v6.2d, v3.2d", + "fcvtn v6.2s, v6.2d", + "fcvtzs v6.2s, v6.2s", + "fcmgt v3.2d, v5.2d, v3.2d", + "shrn v3.2s, v3.2d, #32", + "bsl v3.16b, v6.16b, v4.16b", "frinti v2.2d, v2.2d", - "fcvtn v2.2s, v2.2d", - "fcvtzs v2.2s, v2.2s", + "frintz v6.2d, v2.2d", + "fcvtn v6.2s, v6.2d", + "fcvtzs v6.2s, v6.2s", + "fcmgt v2.2d, v5.2d, v2.2d", + "shrn v2.2s, v2.2d, #32", + "bsl v2.16b, v6.16b, v4.16b", "zip1 v16.2d, v3.2d, v2.2d", "movi v2.2d, #0x0", "str q2, [x28, #16]" diff --git a/unittests/InstructionCountCI/AVX128/VEX_map2.json b/unittests/InstructionCountCI/AVX128/VEX_map2.json index b5e2ebb39e..cbf4d0eed3 100644 --- a/unittests/InstructionCountCI/AVX128/VEX_map2.json +++ b/unittests/InstructionCountCI/AVX128/VEX_map2.json @@ -1964,7 +1964,7 @@ "Map 2 0b01 0x41 256-bit" ], "ExpectedArm64ASM": [ - "ldr q2, [x28, #2288]", + "ldr q2, [x28, #2352]", "zip1 v3.8h, v2.8h, v17.8h", "zip2 v2.8h, v2.8h, v17.8h", "umin v2.4s, v3.4s, v2.4s", @@ -4548,7 +4548,7 @@ "Map 2 0b01 0x96 128-bit" ], "ExpectedArm64ASM": [ - "ldr q2, [x28, #2320]", + "ldr q2, [x28, #2384]", "eor v2.16b, v17.16b, v2.16b", "mov v0.16b, v2.16b", "fmla v0.4s, v16.4s, v18.4s", @@ -4566,7 +4566,7 @@ "ldr q2, [x28, #16]", "ldr q3, [x28, #32]", "ldr q4, [x28, #48]", - "ldr q5, [x28, #2320]", + "ldr q5, [x28, #2384]", "eor v6.16b, v17.16b, v5.16b", "mov v0.16b, v6.16b", "fmla v0.4s, v16.4s, v18.4s", @@ -4582,7 +4582,7 @@ "Map 2 0b01 0x96 128-bit" ], "ExpectedArm64ASM": [ - "ldr q2, [x28, #2352]", + "ldr q2, [x28, #2416]", "eor v2.16b, v17.16b, v2.16b", "mov v0.16b, v2.16b", "fmla v0.2d, v16.2d, v18.2d", @@ -4600,7 +4600,7 @@ "ldr q2, [x28, #16]", "ldr q3, [x28, #32]", "ldr q4, [x28, #48]", - "ldr q5, [x28, #2352]", + "ldr q5, [x28, #2416]", "eor v6.16b, v17.16b, v5.16b", "mov v0.16b, v6.16b", "fmla v0.2d, v16.2d, v18.2d", @@ -4616,7 +4616,7 @@ "Map 2 0b01 0x97 128-bit" ], "ExpectedArm64ASM": [ - "ldr q2, [x28, #2384]", + "ldr q2, [x28, #2448]", "eor v2.16b, v17.16b, v2.16b", "mov v0.16b, v2.16b", "fmla v0.4s, v16.4s, v18.4s", @@ -4634,7 +4634,7 @@ "ldr q2, [x28, #16]", "ldr q3, [x28, #32]", "ldr q4, [x28, #48]", - "ldr q5, [x28, #2384]", + "ldr q5, [x28, #2448]", "eor v6.16b, v17.16b, v5.16b", "mov v0.16b, v6.16b", "fmla v0.4s, v16.4s, v18.4s", @@ -4650,7 +4650,7 @@ "Map 2 0b01 0x97 128-bit" ], "ExpectedArm64ASM": [ - "ldr q2, [x28, #2416]", + "ldr q2, [x28, #2480]", "eor v2.16b, v17.16b, v2.16b", "mov v0.16b, v2.16b", "fmla v0.2d, v16.2d, v18.2d", @@ -4668,7 +4668,7 @@ "ldr q2, [x28, #16]", "ldr q3, [x28, #32]", "ldr q4, [x28, #48]", - "ldr q5, [x28, #2416]", + "ldr q5, [x28, #2480]", "eor v6.16b, v17.16b, v5.16b", "mov v0.16b, v6.16b", "fmla v0.2d, v16.2d, v18.2d", @@ -5656,7 +5656,7 @@ "Map 2 0b01 0xa6 128-bit" ], "ExpectedArm64ASM": [ - "ldr q2, [x28, #2320]", + "ldr q2, [x28, #2384]", "eor v2.16b, v18.16b, v2.16b", "mov v0.16b, v2.16b", "fmla v0.4s, v17.4s, v16.4s", @@ -5674,7 +5674,7 @@ "ldr q2, [x28, #16]", "ldr q3, [x28, #32]", "ldr q4, [x28, #48]", - "ldr q5, [x28, #2320]", + "ldr q5, [x28, #2384]", "eor v6.16b, v18.16b, v5.16b", "mov v0.16b, v6.16b", "fmla v0.4s, v17.4s, v16.4s", @@ -5690,7 +5690,7 @@ "Map 2 0b01 0xa6 128-bit" ], "ExpectedArm64ASM": [ - "ldr q2, [x28, #2352]", + "ldr q2, [x28, #2416]", "eor v2.16b, v18.16b, v2.16b", "mov v0.16b, v2.16b", "fmla v0.2d, v17.2d, v16.2d", @@ -5708,7 +5708,7 @@ "ldr q2, [x28, #16]", "ldr q3, [x28, #32]", "ldr q4, [x28, #48]", - "ldr q5, [x28, #2352]", + "ldr q5, [x28, #2416]", "eor v6.16b, v18.16b, v5.16b", "mov v0.16b, v6.16b", "fmla v0.2d, v17.2d, v16.2d", @@ -5724,7 +5724,7 @@ "Map 2 0b01 0xa7 128-bit" ], "ExpectedArm64ASM": [ - "ldr q2, [x28, #2384]", + "ldr q2, [x28, #2448]", "eor v2.16b, v18.16b, v2.16b", "mov v0.16b, v2.16b", "fmla v0.4s, v17.4s, v16.4s", @@ -5742,7 +5742,7 @@ "ldr q2, [x28, #16]", "ldr q3, [x28, #32]", "ldr q4, [x28, #48]", - "ldr q5, [x28, #2384]", + "ldr q5, [x28, #2448]", "eor v6.16b, v18.16b, v5.16b", "mov v0.16b, v6.16b", "fmla v0.4s, v17.4s, v16.4s", @@ -5758,7 +5758,7 @@ "Map 2 0b01 0xa7 128-bit" ], "ExpectedArm64ASM": [ - "ldr q2, [x28, #2416]", + "ldr q2, [x28, #2480]", "eor v2.16b, v18.16b, v2.16b", "mov v0.16b, v2.16b", "fmla v0.2d, v17.2d, v16.2d", @@ -5776,7 +5776,7 @@ "ldr q2, [x28, #16]", "ldr q3, [x28, #32]", "ldr q4, [x28, #48]", - "ldr q5, [x28, #2416]", + "ldr q5, [x28, #2480]", "eor v6.16b, v18.16b, v5.16b", "mov v0.16b, v6.16b", "fmla v0.2d, v17.2d, v16.2d", @@ -5792,7 +5792,7 @@ "Map 2 0b01 0xb6 128-bit" ], "ExpectedArm64ASM": [ - "ldr q2, [x28, #2320]", + "ldr q2, [x28, #2384]", "eor v2.16b, v16.16b, v2.16b", "mov v16.16b, v2.16b", "fmla v16.4s, v17.4s, v18.4s", @@ -5809,7 +5809,7 @@ "ldr q2, [x28, #16]", "ldr q3, [x28, #32]", "ldr q4, [x28, #48]", - "ldr q5, [x28, #2320]", + "ldr q5, [x28, #2384]", "eor v6.16b, v16.16b, v5.16b", "mov v16.16b, v6.16b", "fmla v16.4s, v17.4s, v18.4s", @@ -5824,7 +5824,7 @@ "Map 2 0b01 0xb6 128-bit" ], "ExpectedArm64ASM": [ - "ldr q2, [x28, #2352]", + "ldr q2, [x28, #2416]", "eor v2.16b, v16.16b, v2.16b", "mov v16.16b, v2.16b", "fmla v16.2d, v17.2d, v18.2d", @@ -5841,7 +5841,7 @@ "ldr q2, [x28, #16]", "ldr q3, [x28, #32]", "ldr q4, [x28, #48]", - "ldr q5, [x28, #2352]", + "ldr q5, [x28, #2416]", "eor v6.16b, v16.16b, v5.16b", "mov v16.16b, v6.16b", "fmla v16.2d, v17.2d, v18.2d", @@ -5856,7 +5856,7 @@ "Map 2 0b01 0xb7 128-bit" ], "ExpectedArm64ASM": [ - "ldr q2, [x28, #2384]", + "ldr q2, [x28, #2448]", "eor v2.16b, v16.16b, v2.16b", "mov v16.16b, v2.16b", "fmla v16.4s, v17.4s, v18.4s", @@ -5873,7 +5873,7 @@ "ldr q2, [x28, #16]", "ldr q3, [x28, #32]", "ldr q4, [x28, #48]", - "ldr q5, [x28, #2384]", + "ldr q5, [x28, #2448]", "eor v6.16b, v16.16b, v5.16b", "mov v16.16b, v6.16b", "fmla v16.4s, v17.4s, v18.4s", @@ -5888,7 +5888,7 @@ "Map 2 0b01 0xb7 128-bit" ], "ExpectedArm64ASM": [ - "ldr q2, [x28, #2416]", + "ldr q2, [x28, #2480]", "eor v2.16b, v16.16b, v2.16b", "mov v16.16b, v2.16b", "fmla v16.2d, v17.2d, v18.2d", @@ -5905,7 +5905,7 @@ "ldr q2, [x28, #16]", "ldr q3, [x28, #32]", "ldr q4, [x28, #48]", - "ldr q5, [x28, #2416]", + "ldr q5, [x28, #2480]", "eor v6.16b, v16.16b, v5.16b", "mov v16.16b, v6.16b", "fmla v16.2d, v17.2d, v18.2d", diff --git a/unittests/InstructionCountCI/AVX128/VEX_map2_SVE128.json b/unittests/InstructionCountCI/AVX128/VEX_map2_SVE128.json index efdf3e2a4e..8cbc9d928c 100644 --- a/unittests/InstructionCountCI/AVX128/VEX_map2_SVE128.json +++ b/unittests/InstructionCountCI/AVX128/VEX_map2_SVE128.json @@ -2850,7 +2850,7 @@ "Map 2 0b01 0x96 128-bit" ], "ExpectedArm64ASM": [ - "ldr q2, [x28, #2320]", + "ldr q2, [x28, #2384]", "eor v2.16b, v17.16b, v2.16b", "mov v0.16b, v2.16b", "fmla v0.4s, v16.4s, v18.4s", @@ -2868,7 +2868,7 @@ "ldr q2, [x28, #16]", "ldr q3, [x28, #32]", "ldr q4, [x28, #48]", - "ldr q5, [x28, #2320]", + "ldr q5, [x28, #2384]", "eor v6.16b, v17.16b, v5.16b", "mov v0.16b, v6.16b", "fmla v0.4s, v16.4s, v18.4s", @@ -2884,7 +2884,7 @@ "Map 2 0b01 0x96 128-bit" ], "ExpectedArm64ASM": [ - "ldr q2, [x28, #2352]", + "ldr q2, [x28, #2416]", "eor v2.16b, v17.16b, v2.16b", "mov v0.16b, v2.16b", "fmla v0.2d, v16.2d, v18.2d", @@ -2902,7 +2902,7 @@ "ldr q2, [x28, #16]", "ldr q3, [x28, #32]", "ldr q4, [x28, #48]", - "ldr q5, [x28, #2352]", + "ldr q5, [x28, #2416]", "eor v6.16b, v17.16b, v5.16b", "mov v0.16b, v6.16b", "fmla v0.2d, v16.2d, v18.2d", @@ -2918,7 +2918,7 @@ "Map 2 0b01 0x97 128-bit" ], "ExpectedArm64ASM": [ - "ldr q2, [x28, #2384]", + "ldr q2, [x28, #2448]", "eor v2.16b, v17.16b, v2.16b", "mov v0.16b, v2.16b", "fmla v0.4s, v16.4s, v18.4s", @@ -2936,7 +2936,7 @@ "ldr q2, [x28, #16]", "ldr q3, [x28, #32]", "ldr q4, [x28, #48]", - "ldr q5, [x28, #2384]", + "ldr q5, [x28, #2448]", "eor v6.16b, v17.16b, v5.16b", "mov v0.16b, v6.16b", "fmla v0.4s, v16.4s, v18.4s", @@ -2952,7 +2952,7 @@ "Map 2 0b01 0x97 128-bit" ], "ExpectedArm64ASM": [ - "ldr q2, [x28, #2416]", + "ldr q2, [x28, #2480]", "eor v2.16b, v17.16b, v2.16b", "mov v0.16b, v2.16b", "fmla v0.2d, v16.2d, v18.2d", @@ -2970,7 +2970,7 @@ "ldr q2, [x28, #16]", "ldr q3, [x28, #32]", "ldr q4, [x28, #48]", - "ldr q5, [x28, #2416]", + "ldr q5, [x28, #2480]", "eor v6.16b, v17.16b, v5.16b", "mov v0.16b, v6.16b", "fmla v0.2d, v16.2d, v18.2d", @@ -3938,7 +3938,7 @@ "Map 2 0b01 0xa6 128-bit" ], "ExpectedArm64ASM": [ - "ldr q2, [x28, #2320]", + "ldr q2, [x28, #2384]", "eor v2.16b, v18.16b, v2.16b", "mov v0.16b, v2.16b", "fmla v0.4s, v17.4s, v16.4s", @@ -3956,7 +3956,7 @@ "ldr q2, [x28, #16]", "ldr q3, [x28, #32]", "ldr q4, [x28, #48]", - "ldr q5, [x28, #2320]", + "ldr q5, [x28, #2384]", "eor v6.16b, v18.16b, v5.16b", "mov v0.16b, v6.16b", "fmla v0.4s, v17.4s, v16.4s", @@ -3972,7 +3972,7 @@ "Map 2 0b01 0xa6 128-bit" ], "ExpectedArm64ASM": [ - "ldr q2, [x28, #2352]", + "ldr q2, [x28, #2416]", "eor v2.16b, v18.16b, v2.16b", "mov v0.16b, v2.16b", "fmla v0.2d, v17.2d, v16.2d", @@ -3990,7 +3990,7 @@ "ldr q2, [x28, #16]", "ldr q3, [x28, #32]", "ldr q4, [x28, #48]", - "ldr q5, [x28, #2352]", + "ldr q5, [x28, #2416]", "eor v6.16b, v18.16b, v5.16b", "mov v0.16b, v6.16b", "fmla v0.2d, v17.2d, v16.2d", @@ -4006,7 +4006,7 @@ "Map 2 0b01 0xa7 128-bit" ], "ExpectedArm64ASM": [ - "ldr q2, [x28, #2384]", + "ldr q2, [x28, #2448]", "eor v2.16b, v18.16b, v2.16b", "mov v0.16b, v2.16b", "fmla v0.4s, v17.4s, v16.4s", @@ -4024,7 +4024,7 @@ "ldr q2, [x28, #16]", "ldr q3, [x28, #32]", "ldr q4, [x28, #48]", - "ldr q5, [x28, #2384]", + "ldr q5, [x28, #2448]", "eor v6.16b, v18.16b, v5.16b", "mov v0.16b, v6.16b", "fmla v0.4s, v17.4s, v16.4s", @@ -4040,7 +4040,7 @@ "Map 2 0b01 0xa7 128-bit" ], "ExpectedArm64ASM": [ - "ldr q2, [x28, #2416]", + "ldr q2, [x28, #2480]", "eor v2.16b, v18.16b, v2.16b", "mov v0.16b, v2.16b", "fmla v0.2d, v17.2d, v16.2d", @@ -4058,7 +4058,7 @@ "ldr q2, [x28, #16]", "ldr q3, [x28, #32]", "ldr q4, [x28, #48]", - "ldr q5, [x28, #2416]", + "ldr q5, [x28, #2480]", "eor v6.16b, v18.16b, v5.16b", "mov v0.16b, v6.16b", "fmla v0.2d, v17.2d, v16.2d", @@ -4074,7 +4074,7 @@ "Map 2 0b01 0xb6 128-bit" ], "ExpectedArm64ASM": [ - "ldr q2, [x28, #2320]", + "ldr q2, [x28, #2384]", "eor v2.16b, v16.16b, v2.16b", "mov v16.16b, v2.16b", "fmla v16.4s, v17.4s, v18.4s", @@ -4091,7 +4091,7 @@ "ldr q2, [x28, #16]", "ldr q3, [x28, #32]", "ldr q4, [x28, #48]", - "ldr q5, [x28, #2320]", + "ldr q5, [x28, #2384]", "eor v6.16b, v16.16b, v5.16b", "mov v16.16b, v6.16b", "fmla v16.4s, v17.4s, v18.4s", @@ -4106,7 +4106,7 @@ "Map 2 0b01 0xb6 128-bit" ], "ExpectedArm64ASM": [ - "ldr q2, [x28, #2352]", + "ldr q2, [x28, #2416]", "eor v2.16b, v16.16b, v2.16b", "mov v16.16b, v2.16b", "fmla v16.2d, v17.2d, v18.2d", @@ -4123,7 +4123,7 @@ "ldr q2, [x28, #16]", "ldr q3, [x28, #32]", "ldr q4, [x28, #48]", - "ldr q5, [x28, #2352]", + "ldr q5, [x28, #2416]", "eor v6.16b, v16.16b, v5.16b", "mov v16.16b, v6.16b", "fmla v16.2d, v17.2d, v18.2d", @@ -4138,7 +4138,7 @@ "Map 2 0b01 0xb7 128-bit" ], "ExpectedArm64ASM": [ - "ldr q2, [x28, #2384]", + "ldr q2, [x28, #2448]", "eor v2.16b, v16.16b, v2.16b", "mov v16.16b, v2.16b", "fmla v16.4s, v17.4s, v18.4s", @@ -4155,7 +4155,7 @@ "ldr q2, [x28, #16]", "ldr q3, [x28, #32]", "ldr q4, [x28, #48]", - "ldr q5, [x28, #2384]", + "ldr q5, [x28, #2448]", "eor v6.16b, v16.16b, v5.16b", "mov v16.16b, v6.16b", "fmla v16.4s, v17.4s, v18.4s", @@ -4170,7 +4170,7 @@ "Map 2 0b01 0xb7 128-bit" ], "ExpectedArm64ASM": [ - "ldr q2, [x28, #2416]", + "ldr q2, [x28, #2480]", "eor v2.16b, v16.16b, v2.16b", "mov v16.16b, v2.16b", "fmla v16.2d, v17.2d, v18.2d", @@ -4187,7 +4187,7 @@ "ldr q2, [x28, #16]", "ldr q3, [x28, #32]", "ldr q4, [x28, #48]", - "ldr q5, [x28, #2416]", + "ldr q5, [x28, #2480]", "eor v6.16b, v16.16b, v5.16b", "mov v16.16b, v6.16b", "fmla v16.2d, v17.2d, v18.2d", diff --git a/unittests/InstructionCountCI/AVX128/VEX_map3.json b/unittests/InstructionCountCI/AVX128/VEX_map3.json index 90b355055d..266b50dcd8 100644 --- a/unittests/InstructionCountCI/AVX128/VEX_map3.json +++ b/unittests/InstructionCountCI/AVX128/VEX_map3.json @@ -343,7 +343,7 @@ "Map 3 0b01 0x02 128-bit" ], "ExpectedArm64ASM": [ - "ldr q2, [x28, #2480]", + "ldr q2, [x28, #2544]", "tbx v16.16b, {v17.16b}, v2.16b", "movi v2.2d, #0x0", "str q2, [x28, #16]" @@ -355,7 +355,7 @@ "Map 3 0b01 0x02 128-bit" ], "ExpectedArm64ASM": [ - "ldr q2, [x28, #2496]", + "ldr q2, [x28, #2560]", "tbx v16.16b, {v17.16b}, v2.16b", "movi v2.2d, #0x0", "str q2, [x28, #16]" @@ -378,7 +378,7 @@ "Map 3 0b01 0x02 128-bit" ], "ExpectedArm64ASM": [ - "ldr q2, [x28, #2512]", + "ldr q2, [x28, #2576]", "tbx v16.16b, {v17.16b}, v2.16b", "movi v2.2d, #0x0", "str q2, [x28, #16]" @@ -402,7 +402,7 @@ "Map 3 0b01 0x02 128-bit" ], "ExpectedArm64ASM": [ - "ldr q2, [x28, #2528]", + "ldr q2, [x28, #2592]", "tbx v16.16b, {v17.16b}, v2.16b", "movi v2.2d, #0x0", "str q2, [x28, #16]" @@ -425,7 +425,7 @@ "Map 3 0b01 0x02 128-bit" ], "ExpectedArm64ASM": [ - "ldr q2, [x28, #2544]", + "ldr q2, [x28, #2608]", "tbx v16.16b, {v17.16b}, v2.16b", "movi v2.2d, #0x0", "str q2, [x28, #16]" @@ -437,7 +437,7 @@ "Map 3 0b01 0x02 128-bit" ], "ExpectedArm64ASM": [ - "ldr q2, [x28, #2560]", + "ldr q2, [x28, #2624]", "tbx v16.16b, {v17.16b}, v2.16b", "movi v2.2d, #0x0", "str q2, [x28, #16]" @@ -3576,7 +3576,7 @@ "str x20, [x28, #272]", "mov w1, #0x401", "str x1, [x28, #1328]", - "ldr x0, [x28, #2232]", + "ldr x0, [x28, #2296]", "br x0" ] }, @@ -3591,7 +3591,7 @@ "str x20, [x28, #272]", "mov w1, #0x401", "str x1, [x28, #1328]", - "ldr x0, [x28, #2232]", + "ldr x0, [x28, #2296]", "br x0" ] }, @@ -3606,7 +3606,7 @@ "str x20, [x28, #272]", "mov w1, #0x401", "str x1, [x28, #1328]", - "ldr x0, [x28, #2232]", + "ldr x0, [x28, #2296]", "br x0" ] }, @@ -3621,7 +3621,7 @@ "str x20, [x28, #272]", "mov w1, #0x401", "str x1, [x28, #1328]", - "ldr x0, [x28, #2232]", + "ldr x0, [x28, #2296]", "br x0" ] }, @@ -3636,7 +3636,7 @@ "str x20, [x28, #272]", "mov w1, #0x401", "str x1, [x28, #1328]", - "ldr x0, [x28, #2232]", + "ldr x0, [x28, #2296]", "br x0" ] }, @@ -3651,7 +3651,7 @@ "str x20, [x28, #272]", "mov w1, #0x401", "str x1, [x28, #1328]", - "ldr x0, [x28, #2232]", + "ldr x0, [x28, #2296]", "br x0" ] }, @@ -3666,7 +3666,7 @@ "str x20, [x28, #272]", "mov w1, #0x401", "str x1, [x28, #1328]", - "ldr x0, [x28, #2232]", + "ldr x0, [x28, #2296]", "br x0" ] }, @@ -3681,7 +3681,7 @@ "str x20, [x28, #272]", "mov w1, #0x401", "str x1, [x28, #1328]", - "ldr x0, [x28, #2232]", + "ldr x0, [x28, #2296]", "br x0" ] }, @@ -3696,7 +3696,7 @@ "str x20, [x28, #272]", "mov w1, #0x401", "str x1, [x28, #1328]", - "ldr x0, [x28, #2232]", + "ldr x0, [x28, #2296]", "br x0" ] }, @@ -3711,7 +3711,7 @@ "str x20, [x28, #272]", "mov w1, #0x401", "str x1, [x28, #1328]", - "ldr x0, [x28, #2232]", + "ldr x0, [x28, #2296]", "br x0" ] }, @@ -3726,7 +3726,7 @@ "str x20, [x28, #272]", "mov w1, #0x401", "str x1, [x28, #1328]", - "ldr x0, [x28, #2232]", + "ldr x0, [x28, #2296]", "br x0" ] }, @@ -3741,7 +3741,7 @@ "str x20, [x28, #272]", "mov w1, #0x401", "str x1, [x28, #1328]", - "ldr x0, [x28, #2232]", + "ldr x0, [x28, #2296]", "br x0" ] }, @@ -3756,7 +3756,7 @@ "str x20, [x28, #272]", "mov w1, #0x401", "str x1, [x28, #1328]", - "ldr x0, [x28, #2232]", + "ldr x0, [x28, #2296]", "br x0" ] }, @@ -3771,7 +3771,7 @@ "str x20, [x28, #272]", "mov w1, #0x401", "str x1, [x28, #1328]", - "ldr x0, [x28, #2232]", + "ldr x0, [x28, #2296]", "br x0" ] }, @@ -3786,7 +3786,7 @@ "str x20, [x28, #272]", "mov w1, #0x401", "str x1, [x28, #1328]", - "ldr x0, [x28, #2232]", + "ldr x0, [x28, #2296]", "br x0" ] }, @@ -3801,7 +3801,7 @@ "str x20, [x28, #272]", "mov w1, #0x401", "str x1, [x28, #1328]", - "ldr x0, [x28, #2232]", + "ldr x0, [x28, #2296]", "br x0" ] }, @@ -3816,7 +3816,7 @@ "str x20, [x28, #272]", "mov w1, #0x401", "str x1, [x28, #1328]", - "ldr x0, [x28, #2232]", + "ldr x0, [x28, #2296]", "br x0" ] }, @@ -3831,7 +3831,7 @@ "str x20, [x28, #272]", "mov w1, #0x401", "str x1, [x28, #1328]", - "ldr x0, [x28, #2232]", + "ldr x0, [x28, #2296]", "br x0" ] }, @@ -3846,7 +3846,7 @@ "str x20, [x28, #272]", "mov w1, #0x401", "str x1, [x28, #1328]", - "ldr x0, [x28, #2232]", + "ldr x0, [x28, #2296]", "br x0" ] }, @@ -3861,7 +3861,7 @@ "str x20, [x28, #272]", "mov w1, #0x401", "str x1, [x28, #1328]", - "ldr x0, [x28, #2232]", + "ldr x0, [x28, #2296]", "br x0" ] }, @@ -3876,7 +3876,7 @@ "str x20, [x28, #272]", "mov w1, #0x401", "str x1, [x28, #1328]", - "ldr x0, [x28, #2232]", + "ldr x0, [x28, #2296]", "br x0" ] }, @@ -3891,7 +3891,7 @@ "str x20, [x28, #272]", "mov w1, #0x401", "str x1, [x28, #1328]", - "ldr x0, [x28, #2232]", + "ldr x0, [x28, #2296]", "br x0" ] }, @@ -3906,7 +3906,7 @@ "str x20, [x28, #272]", "mov w1, #0x401", "str x1, [x28, #1328]", - "ldr x0, [x28, #2232]", + "ldr x0, [x28, #2296]", "br x0" ] }, @@ -3921,7 +3921,7 @@ "str x20, [x28, #272]", "mov w1, #0x401", "str x1, [x28, #1328]", - "ldr x0, [x28, #2232]", + "ldr x0, [x28, #2296]", "br x0" ] }, @@ -3936,7 +3936,7 @@ "str x20, [x28, #272]", "mov w1, #0x401", "str x1, [x28, #1328]", - "ldr x0, [x28, #2232]", + "ldr x0, [x28, #2296]", "br x0" ] }, @@ -3951,7 +3951,7 @@ "str x20, [x28, #272]", "mov w1, #0x401", "str x1, [x28, #1328]", - "ldr x0, [x28, #2232]", + "ldr x0, [x28, #2296]", "br x0" ] }, @@ -3966,7 +3966,7 @@ "str x20, [x28, #272]", "mov w1, #0x401", "str x1, [x28, #1328]", - "ldr x0, [x28, #2232]", + "ldr x0, [x28, #2296]", "br x0" ] }, @@ -3981,7 +3981,7 @@ "str x20, [x28, #272]", "mov w1, #0x401", "str x1, [x28, #1328]", - "ldr x0, [x28, #2232]", + "ldr x0, [x28, #2296]", "br x0" ] }, @@ -3996,7 +3996,7 @@ "str x20, [x28, #272]", "mov w1, #0x401", "str x1, [x28, #1328]", - "ldr x0, [x28, #2232]", + "ldr x0, [x28, #2296]", "br x0" ] }, @@ -4011,7 +4011,7 @@ "str x20, [x28, #272]", "mov w1, #0x401", "str x1, [x28, #1328]", - "ldr x0, [x28, #2232]", + "ldr x0, [x28, #2296]", "br x0" ] }, @@ -4022,7 +4022,7 @@ ], "ExpectedArm64ASM": [ "movi v2.2d, #0x0", - "ldr q3, [x28, #2464]", + "ldr q3, [x28, #2528]", "mov v16.16b, v17.16b", "unimplemented (Unimplemented)", "tbl v16.16b, {v16.16b}, v3.16b", @@ -4036,7 +4036,7 @@ ], "ExpectedArm64ASM": [ "movi v2.2d, #0x0", - "ldr q3, [x28, #2464]", + "ldr q3, [x28, #2528]", "mov v16.16b, v17.16b", "unimplemented (Unimplemented)", "tbl v16.16b, {v16.16b}, v3.16b", diff --git a/unittests/InstructionCountCI/Crypto/H0F3A.json b/unittests/InstructionCountCI/Crypto/H0F3A.json index ff742c7dc2..a7c88fdfc5 100644 --- a/unittests/InstructionCountCI/Crypto/H0F3A.json +++ b/unittests/InstructionCountCI/Crypto/H0F3A.json @@ -55,7 +55,7 @@ "0x66 0x0f 0x3a 0xdf" ], "ExpectedArm64ASM": [ - "ldr q2, [x28, #2464]", + "ldr q2, [x28, #2528]", "movi v3.2d, #0x0", "mov v16.16b, v17.16b", "unimplemented (Unimplemented)", @@ -68,7 +68,7 @@ "0x66 0x0f 0x3a 0xdf" ], "ExpectedArm64ASM": [ - "ldr q2, [x28, #2464]", + "ldr q2, [x28, #2528]", "movi v3.2d, #0x0", "mov v16.16b, v17.16b", "unimplemented (Unimplemented)", diff --git a/unittests/InstructionCountCI/DDD.json b/unittests/InstructionCountCI/DDD.json index 254aad64bc..350c00c9cc 100644 --- a/unittests/InstructionCountCI/DDD.json +++ b/unittests/InstructionCountCI/DDD.json @@ -59,13 +59,17 @@ ] }, "pf2id mm0, mm1": { - "ExpectedInstructionCount": 5, + "ExpectedInstructionCount": 9, "Comment": [ "0x0f 0x0f 0x1d" ], "ExpectedArm64ASM": [ "ldr d2, [x28, #1056]", - "fcvtzs v2.2s, v2.2s", + "ldr d3, [x28, #2864]", + "ldr d4, [x28, #2768]", + "fcvtzs v5.2s, v2.2s", + "fcmgt v2.4s, v4.4s, v2.4s", + "bsl v2.8b, v5.8b, v3.8b", "str d2, [x28, #1040]", "mov w20, #0xffff", "strh w20, [x28, #1048]" diff --git a/unittests/InstructionCountCI/FEXOpt/MultiInst_TSO_32bit.json b/unittests/InstructionCountCI/FEXOpt/MultiInst_TSO_32bit.json index aeabad4300..b614599fb6 100644 --- a/unittests/InstructionCountCI/FEXOpt/MultiInst_TSO_32bit.json +++ b/unittests/InstructionCountCI/FEXOpt/MultiInst_TSO_32bit.json @@ -65,66 +65,6 @@ "subs w26, w4, w20", "mov x4, x26" ] - }, - "Load variables from memory": { - "x86InstructionCount": 4, - "ExpectedInstructionCount": 16, - "Comment": [ - "Just to ensure small atomic offset loads are using LRCPC2" - ], - "x86Insts": [ - "mov edi, [ecx]", - "mov edx, [ecx + 4]", - "mov ax, [ecx + 24]", - "mov bl, [ecx + 26]" - ], - "ExpectedArm64ASM": [ - "mov w20, w7", - "ldapur w11, [x20]", - "nop", - "add x20, x7, #0x4 (4)", - "mov w20, w20", - "ldapur w5, [x20]", - "nop", - "add x20, x7, #0x18 (24)", - "mov w20, w20", - "ldapurh w20, [x20]", - "nop", - "bfxil x4, x20, #0, #16", - "add x20, x7, #0x1a (26)", - "mov w20, w20", - "ldapurb w20, [x20]", - "bfxil x6, x20, #0, #8" - ] - }, - "Store variables to memory": { - "x86InstructionCount": 4, - "ExpectedInstructionCount": 14, - "Comment": [ - "Just to ensure small atomic offset stores are using LRCPC2" - ], - "x86Insts": [ - "mov [ecx], edi", - "mov [ecx + 4], edx", - "mov [ecx + 24], ax", - "mov [ecx + 26], bl" - ], - "ExpectedArm64ASM": [ - "mov w20, w7", - "nop", - "stlur w11, [x20]", - "add x20, x7, #0x4 (4)", - "mov w20, w20", - "nop", - "stlur w5, [x20]", - "add x20, x7, #0x18 (24)", - "mov w20, w20", - "nop", - "stlurh w4, [x20]", - "add x20, x7, #0x1a (26)", - "mov w20, w20", - "stlurb w6, [x20]" - ] } } } diff --git a/unittests/InstructionCountCI/FEXOpt/libnss.json b/unittests/InstructionCountCI/FEXOpt/libnss.json index aa7f38fb1a..1782ef2419 100644 --- a/unittests/InstructionCountCI/FEXOpt/libnss.json +++ b/unittests/InstructionCountCI/FEXOpt/libnss.json @@ -198,10 +198,10 @@ "ldr q19, [x11, #272]", "ldr q24, [x11]", "ldr q23, [x11, #16]", - "ldr x0, [x28, #1992]", + "ldr x0, [x28, #2056]", "ldr q2, [x0, #2832]", "tbl v16.16b, {v18.16b}, v2.16b", - "ldr x0, [x28, #1992]", + "ldr x0, [x28, #2056]", "ldr q3, [x0, #432]", "tbl v18.16b, {v19.16b}, v3.16b", "ldr q22, [x11, #32]", @@ -285,7 +285,7 @@ "mov v4.s[1], w21", "mov v20.16b, v4.16b", "mov v20.s[0], w23", - "ldr x0, [x28, #1992]", + "ldr x0, [x28, #2056]", "ldr q4, [x0, #224]", "tbl v16.16b, {v16.16b}, v4.16b", "mov w21, v20.s[1]", diff --git a/unittests/InstructionCountCI/FlagM/HotBlocks.json b/unittests/InstructionCountCI/FlagM/HotBlocks.json index 08b4be34d5..5be17293cc 100644 --- a/unittests/InstructionCountCI/FlagM/HotBlocks.json +++ b/unittests/InstructionCountCI/FlagM/HotBlocks.json @@ -693,71 +693,6 @@ "mov x26, x5" ] }, - "Factorio drawSprite+0x890": { - "x86InstructionCount": 3, - "ExpectedInstructionCount": 5, - "Comment": "first load should be rip relative", - "x86Insts": [ - "movss xmm9,dword [rbp]", - "and r9d,0x800000", - "movss dword [rbp-0x58],xmm9" - ], - "ExpectedArm64ASM": [ - "ldr s25, [x9]", - "ands w26, w13, #0x800000", - "mov x13, x26", - "stur s25, [x9, #-88]", - "cfinv" - ] - }, - "Factorio drawSprite+0xf2": { - "x86InstructionCount": 9, - "ExpectedInstructionCount": 11, - "x86Insts": [ - "movss xmm8,dword [rbp-0x58]", - "mov byte [rbp-0x49],r13b", - "mov byte [rbp-0x4a],r14b", - "mov rdx,qword [rdi+0x8]", - "mov qword [rbp-0x38],rbx", - "mov byte [rbp-0x4b],al", - "mov qword [rbp-0x40],r11", - "movss dword [rbp-0x48],xmm8", - "cmp rdx,qword [rdi]" - ], - "ExpectedArm64ASM": [ - "ldur s24, [x9, #-88]", - "sturb w17, [x9, #-73]", - "sturb w19, [x9, #-74]", - "ldr x5, [x11, #8]", - "stur x6, [x9, #-56]", - "sturb w4, [x9, #-75]", - "stur x15, [x9, #-64]", - "stur s24, [x9, #-72]", - "ldr x20, [x11]", - "eor x27, x5, x20", - "subs x26, x5, x20" - ] - }, - "Factorio drawSprite+0x520": { - "x86InstructionCount": 4, - "ExpectedInstructionCount": 8, - "x86Insts": [ - "sub ecx,0x9", - "xor r11d,r11d", - "cmp cl,0x1", - "cmovbe r11,rsi" - ], - "ExpectedArm64ASM": [ - "sub w7, w7, #0x9 (9)", - "mov w15, #0x0", - "mov w20, #0x1", - "lsl w0, w7, #24", - "cmp w0, w20, lsl #24", - "sub w26, w7, #0x1 (1)", - "mov x27, x7", - "csel x15, x10, x15, ls" - ] - }, "pcmpistri xmm0, xmm1, 0_0_00_11_01b": { "ExpectedInstructionCount": 38, "Comment": [ diff --git a/unittests/InstructionCountCI/FlagM/Secondary.json b/unittests/InstructionCountCI/FlagM/Secondary.json index 6010289c5d..cb84d86fa6 100644 --- a/unittests/InstructionCountCI/FlagM/Secondary.json +++ b/unittests/InstructionCountCI/FlagM/Secondary.json @@ -1608,7 +1608,7 @@ "Comment": "0x0f 0xd7", "ExpectedArm64ASM": [ "ldr d2, [x28, #1040]", - "ldr d3, [x28, #2576]", + "ldr d3, [x28, #2640]", "cmlt v2.16b, v2.16b, #0", "and v2.16b, v2.16b, v3.16b", "addp v2.16b, v2.16b, v2.16b", diff --git a/unittests/InstructionCountCI/FlagM/Secondary_OpSize.json b/unittests/InstructionCountCI/FlagM/Secondary_OpSize.json index 723429cbe0..93f1c2949b 100644 --- a/unittests/InstructionCountCI/FlagM/Secondary_OpSize.json +++ b/unittests/InstructionCountCI/FlagM/Secondary_OpSize.json @@ -37,7 +37,7 @@ "ExpectedInstructionCount": 7, "Comment": "0x66 0x0f 0xd7", "ExpectedArm64ASM": [ - "ldr q2, [x28, #2576]", + "ldr q2, [x28, #2640]", "cmlt v3.16b, v16.16b, #0", "and v2.16b, v3.16b, v2.16b", "addp v2.16b, v2.16b, v2.16b", diff --git a/unittests/InstructionCountCI/FlagM/VEX_map1.json b/unittests/InstructionCountCI/FlagM/VEX_map1.json index a8c31ebc8f..8c197a9093 100644 --- a/unittests/InstructionCountCI/FlagM/VEX_map1.json +++ b/unittests/InstructionCountCI/FlagM/VEX_map1.json @@ -68,7 +68,7 @@ "Map 1 0b01 0xd7 256-bit" ], "ExpectedArm64ASM": [ - "ldr q2, [x28, #2576]", + "ldr q2, [x28, #2640]", "cmlt v3.16b, v16.16b, #0", "and v2.16b, v3.16b, v2.16b", "addp v2.16b, v2.16b, v2.16b", diff --git a/unittests/InstructionCountCI/FlagM/x87-HalfLife.json b/unittests/InstructionCountCI/FlagM/x87-HalfLife.json index 1bc61434cb..035b4e24c3 100644 --- a/unittests/InstructionCountCI/FlagM/x87-HalfLife.json +++ b/unittests/InstructionCountCI/FlagM/x87-HalfLife.json @@ -3164,7 +3164,7 @@ "lsl w20, w23, w20", "bic w20, w22, w20", "strb w20, [x28, #1298]", - "ldr x0, [x28, #2272]", + "ldr x0, [x28, #2336]", "ubfiz x3, x21, #4, #20", "add x0, x0, x3", "ldp x1, x0, [x0]" @@ -4191,7 +4191,7 @@ "lsl w20, w21, w20", "bic w20, w23, w20", "strb w20, [x28, #1298]", - "ldr x0, [x28, #2272]", + "ldr x0, [x28, #2336]", "ubfiz x3, x22, #4, #20", "add x0, x0, x3", "ldp x1, x0, [x0]" @@ -6334,7 +6334,7 @@ "movk w21, #0x6, lsl #16", "add w21, w20, w21", "str w20, [x8, #-4]!", - "ldr x0, [x28, #2272]", + "ldr x0, [x28, #2336]", "ubfiz x3, x21, #4, #20", "add x0, x0, x3", "ldp x1, x0, [x0]" @@ -6621,7 +6621,7 @@ "movk w21, #0x6, lsl #16", "add w21, w20, w21", "str w20, [x8, #-4]!", - "ldr x0, [x28, #2272]", + "ldr x0, [x28, #2336]", "ubfiz x3, x21, #4, #20", "add x0, x0, x3", "ldp x1, x0, [x0]" @@ -6780,7 +6780,7 @@ "movk w21, #0x1, lsl #16", "add w21, w20, w21", "str w20, [x8, #-4]!", - "ldr x0, [x28, #2272]", + "ldr x0, [x28, #2336]", "ubfiz x3, x21, #4, #20", "add x0, x0, x3", "ldp x1, x0, [x0]" diff --git a/unittests/InstructionCountCI/FlagM/x87-Oblivion.json b/unittests/InstructionCountCI/FlagM/x87-Oblivion.json index 6552e9a8f1..ce057ca9b1 100644 --- a/unittests/InstructionCountCI/FlagM/x87-Oblivion.json +++ b/unittests/InstructionCountCI/FlagM/x87-Oblivion.json @@ -56173,7 +56173,7 @@ "lsl w20, w22, w20", "bic w20, w23, w20", "strb w20, [x28, #1298]", - "ldr x0, [x28, #2272]", + "ldr x0, [x28, #2336]", "ubfiz x3, x21, #4, #20", "add x0, x0, x3", "ldp x1, x0, [x0]" @@ -58294,7 +58294,7 @@ "lsl w20, w23, w20", "bic w20, w22, w20", "strb w20, [x28, #1298]", - "ldr x0, [x28, #2272]", + "ldr x0, [x28, #2336]", "ubfiz x3, x21, #4, #20", "add x0, x0, x3", "ldp x1, x0, [x0]" @@ -58683,7 +58683,7 @@ "add w20, w8, #0x10 (16)", "str s2, [x20]", "uxtb w7, w4", - "ldr q2, [x28, #2608]", + "ldr q2, [x28, #2672]", "str w7, [x8, #56]", "add w20, w8, #0xc (12)", "mrs x0, nzcv", @@ -58953,7 +58953,7 @@ "lsl w20, w23, w20", "bic w20, w22, w20", "strb w20, [x28, #1298]", - "ldr x0, [x28, #2272]", + "ldr x0, [x28, #2336]", "ubfiz x3, x21, #4, #20", "add x0, x0, x3", "ldp x1, x0, [x0]" @@ -59334,7 +59334,7 @@ "lsl w20, w23, w20", "bic w20, w22, w20", "strb w20, [x28, #1298]", - "ldr x0, [x28, #2272]", + "ldr x0, [x28, #2336]", "ubfiz x3, x21, #4, #20", "add x0, x0, x3", "ldp x1, x0, [x0]" @@ -74271,7 +74271,7 @@ "lsl w20, w22, w20", "orr w20, w23, w20", "strb w20, [x28, #1298]", - "ldr x0, [x28, #2272]", + "ldr x0, [x28, #2336]", "ubfiz x3, x21, #4, #20", "add x0, x0, x3", "ldp x1, x0, [x0]" @@ -74935,7 +74935,7 @@ "movk w21, #0x79, lsl #16", "add w21, w20, w21", "str w20, [x8, #-4]!", - "ldr x0, [x28, #2272]", + "ldr x0, [x28, #2336]", "ubfiz x3, x21, #4, #20", "add x0, x0, x3", "ldp x1, x0, [x0]" diff --git a/unittests/InstructionCountCI/FlagM/x87-Psychonauts.json b/unittests/InstructionCountCI/FlagM/x87-Psychonauts.json index ac0769f959..eb8137771a 100644 --- a/unittests/InstructionCountCI/FlagM/x87-Psychonauts.json +++ b/unittests/InstructionCountCI/FlagM/x87-Psychonauts.json @@ -32174,7 +32174,7 @@ "add w21, w20, w21", "str w20, [x8, #-4]!", "cfinv", - "ldr x0, [x28, #2272]", + "ldr x0, [x28, #2336]", "ubfiz x3, x21, #4, #20", "add x0, x0, x3", "ldp x1, x0, [x0]" @@ -65424,7 +65424,7 @@ "add w21, w20, w21", "str w20, [x8, #-4]!", "strb wzr, [x28, #1298]", - "ldr x0, [x28, #2272]", + "ldr x0, [x28, #2336]", "ubfiz x3, x21, #4, #20", "add x0, x0, x3", "ldp x1, x0, [x0]" @@ -73422,7 +73422,7 @@ "add w21, w20, w21", "str w20, [x8, #-4]!", "strb wzr, [x28, #1298]", - "ldr x0, [x28, #2272]", + "ldr x0, [x28, #2336]", "ubfiz x3, x21, #4, #20", "add x0, x0, x3", "ldp x1, x0, [x0]" @@ -97581,7 +97581,7 @@ "lsl w20, w23, w20", "bic w20, w22, w20", "strb w20, [x28, #1298]", - "ldr x0, [x28, #2272]", + "ldr x0, [x28, #2336]", "ubfiz x3, x21, #4, #20", "add x0, x0, x3", "ldp x1, x0, [x0]" diff --git a/unittests/InstructionCountCI/FlagM/x87.json b/unittests/InstructionCountCI/FlagM/x87.json index c1aef6b98b..e670270dd9 100644 --- a/unittests/InstructionCountCI/FlagM/x87.json +++ b/unittests/InstructionCountCI/FlagM/x87.json @@ -4468,7 +4468,7 @@ "0xd9 11b 0xe8 /5" ], "ExpectedArm64ASM": [ - "ldr q2, [x28, #2608]", + "ldr q2, [x28, #2672]", "ldrb w20, [x28, #1019]", "mov w21, #0x1", "sub w20, w20, #0x1 (1)", @@ -4488,7 +4488,7 @@ "0xd9 11b 0xe9 /5" ], "ExpectedArm64ASM": [ - "ldr q2, [x28, #2624]", + "ldr q2, [x28, #2688]", "ldrb w20, [x28, #1019]", "mov w21, #0x1", "sub w20, w20, #0x1 (1)", @@ -4508,7 +4508,7 @@ "0xd9 11b 0xea /5" ], "ExpectedArm64ASM": [ - "ldr q2, [x28, #2640]", + "ldr q2, [x28, #2704]", "ldrb w20, [x28, #1019]", "mov w21, #0x1", "sub w20, w20, #0x1 (1)", @@ -4528,7 +4528,7 @@ "0xd9 11b 0xeb /5" ], "ExpectedArm64ASM": [ - "ldr q2, [x28, #2656]", + "ldr q2, [x28, #2720]", "ldrb w20, [x28, #1019]", "mov w21, #0x1", "sub w20, w20, #0x1 (1)", @@ -4548,7 +4548,7 @@ "0xd9 11b 0xec /5" ], "ExpectedArm64ASM": [ - "ldr q2, [x28, #2672]", + "ldr q2, [x28, #2736]", "ldrb w20, [x28, #1019]", "mov w21, #0x1", "sub w20, w20, #0x1 (1)", @@ -4568,7 +4568,7 @@ "0xd9 11b 0xed /5" ], "ExpectedArm64ASM": [ - "ldr q2, [x28, #2688]", + "ldr q2, [x28, #2752]", "ldrb w20, [x28, #1019]", "mov w21, #0x1", "sub w20, w20, #0x1 (1)", @@ -4732,7 +4732,7 @@ "mov v2.h[4], w1", "add x0, x28, x20, lsl #4", "str q2, [x0, #1040]", - "ldr q2, [x28, #2608]", + "ldr q2, [x28, #2672]", "mov w21, #0x1", "sub w20, w20, #0x1 (1)", "and w20, w20, #0x7", @@ -5015,7 +5015,7 @@ "0xd9 11b 0xf9 /7" ], "ExpectedArm64ASM": [ - "ldr q2, [x28, #2608]", + "ldr q2, [x28, #2672]", "ldrb w20, [x28, #1019]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #1040]", diff --git a/unittests/InstructionCountCI/H0F38.json b/unittests/InstructionCountCI/H0F38.json index d6bdcb649b..8751dc946f 100644 --- a/unittests/InstructionCountCI/H0F38.json +++ b/unittests/InstructionCountCI/H0F38.json @@ -655,7 +655,7 @@ "0x66 0x0f 0x38 0x41" ], "ExpectedArm64ASM": [ - "ldr q2, [x28, #2288]", + "ldr q2, [x28, #2352]", "zip1 v3.8h, v2.8h, v17.8h", "zip2 v2.8h, v2.8h, v17.8h", "umin v2.4s, v3.4s, v2.4s", diff --git a/unittests/InstructionCountCI/H0F3A.json b/unittests/InstructionCountCI/H0F3A.json index f34c72d31e..dd66c7bb3e 100644 --- a/unittests/InstructionCountCI/H0F3A.json +++ b/unittests/InstructionCountCI/H0F3A.json @@ -321,7 +321,7 @@ "0x66 0x0f 0x3a 0x0c" ], "ExpectedArm64ASM": [ - "ldr q2, [x28, #2480]", + "ldr q2, [x28, #2544]", "tbx v16.16b, {v17.16b}, v2.16b" ] }, @@ -331,7 +331,7 @@ "0x66 0x0f 0x3a 0x0c" ], "ExpectedArm64ASM": [ - "ldr q2, [x28, #2496]", + "ldr q2, [x28, #2560]", "tbx v16.16b, {v17.16b}, v2.16b" ] }, @@ -350,7 +350,7 @@ "0x66 0x0f 0x3a 0x0c" ], "ExpectedArm64ASM": [ - "ldr q2, [x28, #2512]", + "ldr q2, [x28, #2576]", "tbx v16.16b, {v17.16b}, v2.16b" ] }, @@ -370,7 +370,7 @@ "0x66 0x0f 0x3a 0x0c" ], "ExpectedArm64ASM": [ - "ldr q2, [x28, #2528]", + "ldr q2, [x28, #2592]", "tbx v16.16b, {v17.16b}, v2.16b" ] }, @@ -389,7 +389,7 @@ "0x66 0x0f 0x3a 0x0c" ], "ExpectedArm64ASM": [ - "ldr q2, [x28, #2544]", + "ldr q2, [x28, #2608]", "tbx v16.16b, {v17.16b}, v2.16b" ] }, @@ -399,7 +399,7 @@ "0x66 0x0f 0x3a 0x0c" ], "ExpectedArm64ASM": [ - "ldr q2, [x28, #2560]", + "ldr q2, [x28, #2624]", "tbx v16.16b, {v17.16b}, v2.16b" ] }, @@ -468,7 +468,7 @@ "0x66 0x0f 0x3a 0x0e" ], "ExpectedArm64ASM": [ - "ldr x0, [x28, #2024]", + "ldr x0, [x28, #2088]", "ldr q2, [x0, #3440]", "tbx v16.16b, {v17.16b}, v2.16b" ] diff --git a/unittests/InstructionCountCI/PrimaryGroup.json b/unittests/InstructionCountCI/PrimaryGroup.json index d2d75a8cd1..15582e309e 100644 --- a/unittests/InstructionCountCI/PrimaryGroup.json +++ b/unittests/InstructionCountCI/PrimaryGroup.json @@ -2829,7 +2829,7 @@ "mov x0, x5", "mov x1, x4", "mov x2, x6", - "ldr x3, [x28, #2736]", + "ldr x3, [x28, #2928]", "str x30, [sp, #-16]!", "blr x3", "ldr x30, [sp], #16", @@ -2840,7 +2840,7 @@ "mov x0, x5", "mov x1, x4", "mov x2, x6", - "ldr x3, [x28, #2752]", + "ldr x3, [x28, #2944]", "str x30, [sp, #-16]!", "blr x3", "ldr x30, [sp], #16", @@ -2901,7 +2901,7 @@ "mov x0, x5", "mov x1, x4", "mov x2, x6", - "ldr x3, [x28, #2744]", + "ldr x3, [x28, #2936]", "str x30, [sp, #-16]!", "blr x3", "ldr x30, [sp], #16", @@ -2914,7 +2914,7 @@ "mov x0, x5", "mov x1, x4", "mov x2, x6", - "ldr x3, [x28, #2760]", + "ldr x3, [x28, #2952]", "str x30, [sp, #-16]!", "blr x3", "ldr x30, [sp], #16", diff --git a/unittests/InstructionCountCI/Secondary.json b/unittests/InstructionCountCI/Secondary.json index 8013f37de9..e2e87690ac 100644 --- a/unittests/InstructionCountCI/Secondary.json +++ b/unittests/InstructionCountCI/Secondary.json @@ -168,46 +168,62 @@ ] }, "cvttps2pi mm0, [rax]": { - "ExpectedInstructionCount": 7, + "ExpectedInstructionCount": 11, "Comment": "0x0f 0x2c", "ExpectedArm64ASM": [ "strb wzr, [x28, #1019]", "mov w20, #0xffff", "ldr d2, [x4]", - "fcvtzs v2.2s, v2.2s", + "ldr d3, [x28, #2864]", + "ldr d4, [x28, #2768]", + "fcvtzs v5.2s, v2.2s", + "fcmgt v2.4s, v4.4s, v2.4s", + "bsl v2.8b, v5.8b, v3.8b", "strb w20, [x28, #1298]", "str d2, [x28, #1040]", "strh w20, [x28, #1048]" ] }, "cvttps2pi mm0, xmm0": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 8, "Comment": "0x0f 0x2c", "ExpectedArm64ASM": [ - "fcvtzs v2.2s, v16.2s", + "ldr d2, [x28, #2864]", + "ldr d3, [x28, #2768]", + "fcvtzs v4.2s, v16.2s", + "fcmgt v3.4s, v3.4s, v16.4s", + "bit v2.8b, v4.8b, v3.8b", "str d2, [x28, #1040]", "mov w20, #0xffff", "strh w20, [x28, #1048]" ] }, "cvtps2pi mm0, [rax]": { - "ExpectedInstructionCount": 6, + "ExpectedInstructionCount": 10, "Comment": "0x0f 0x2d", "ExpectedArm64ASM": [ "ldr d2, [x4]", - "frinti v2.2s, v2.2s", - "fcvtzs v2.2s, v2.2s", + "frinti v2.4s, v2.4s", + "ldr d3, [x28, #2864]", + "ldr d4, [x28, #2768]", + "fcvtzs v5.2s, v2.2s", + "fcmgt v2.4s, v4.4s, v2.4s", + "bsl v2.8b, v5.8b, v3.8b", "str d2, [x28, #1040]", "mov w20, #0xffff", "strh w20, [x28, #1048]" ] }, "cvtps2pi mm0, xmm0": { - "ExpectedInstructionCount": 5, + "ExpectedInstructionCount": 9, "Comment": "0x0f 0x2d", "ExpectedArm64ASM": [ - "frinti v2.2s, v16.2s", - "fcvtzs v2.2s, v2.2s", + "frinti v2.4s, v16.4s", + "ldr d3, [x28, #2864]", + "ldr d4, [x28, #2768]", + "fcvtzs v5.2s, v2.2s", + "fcmgt v2.4s, v4.4s, v2.4s", + "bsl v2.8b, v5.8b, v3.8b", "str d2, [x28, #1040]", "mov w20, #0xffff", "strh w20, [x28, #1048]" @@ -638,7 +654,7 @@ "Comment": "0x0f 0x50", "ExpectedArm64ASM": [ "ushr v2.4s, v16.4s, #31", - "ldr q3, [x28, #2448]", + "ldr q3, [x28, #2512]", "ushl v2.4s, v2.4s, v3.4s", "addv s2, v2.4s", "mov w4, v2.s[0]" @@ -649,7 +665,7 @@ "Comment": "0x0f 0x50", "ExpectedArm64ASM": [ "ushr v2.4s, v16.4s, #31", - "ldr q3, [x28, #2448]", + "ldr q3, [x28, #2512]", "ushl v2.4s, v2.4s, v3.4s", "addv s2, v2.4s", "mov w4, v2.s[0]" @@ -1092,7 +1108,7 @@ "Comment": "0x0f 0x70", "ExpectedArm64ASM": [ "ldr d2, [x28, #1056]", - "ldr x0, [x28, #1976]", + "ldr x0, [x28, #2040]", "ldr d3, [x0, #16]", "tbl v2.8b, {v2.16b}, v3.8b", "str d2, [x28, #1040]", @@ -1105,7 +1121,7 @@ "Comment": "0x0f 0x70", "ExpectedArm64ASM": [ "ldr d2, [x4]", - "ldr x0, [x28, #1976]", + "ldr x0, [x28, #2040]", "ldr d3, [x0, #16]", "tbl v2.8b, {v2.16b}, v3.8b", "str d2, [x28, #1040]", @@ -3384,7 +3400,7 @@ "ExpectedInstructionCount": 3, "Comment": "0x0f 0xc6", "ExpectedArm64ASM": [ - "ldr x0, [x28, #2000]", + "ldr x0, [x28, #2064]", "ldr q2, [x0, #16]", "tbl v16.16b, {v16.16b, v17.16b}, v2.16b" ] @@ -3393,7 +3409,7 @@ "ExpectedInstructionCount": 5, "Comment": "0x0f 0xc6", "ExpectedArm64ASM": [ - "ldr x0, [x28, #2000]", + "ldr x0, [x28, #2064]", "ldr q2, [x0, #16]", "mov v0.16b, v17.16b", "mov v1.16b, v16.16b", @@ -3405,7 +3421,7 @@ "Comment": "0x0f 0xc6", "ExpectedArm64ASM": [ "ldr q2, [x4]", - "ldr x0, [x28, #2000]", + "ldr x0, [x28, #2064]", "ldr q3, [x0, #16]", "mov v0.16b, v16.16b", "mov v1.16b, v2.16b", @@ -3513,7 +3529,7 @@ "Comment": "0x0f 0xd7", "ExpectedArm64ASM": [ "ldr d2, [x28, #1040]", - "ldr d3, [x28, #2576]", + "ldr d3, [x28, #2640]", "cmlt v2.16b, v2.16b, #0", "and v2.16b, v2.16b, v3.16b", "addp v2.16b, v2.16b, v2.16b", diff --git a/unittests/InstructionCountCI/Secondary_OpSize.json b/unittests/InstructionCountCI/Secondary_OpSize.json index 9f44d8acf9..3be79e6e97 100644 --- a/unittests/InstructionCountCI/Secondary_OpSize.json +++ b/unittests/InstructionCountCI/Secondary_OpSize.json @@ -123,23 +123,35 @@ ] }, "cvttpd2pi mm0, xmm0": { - "ExpectedInstructionCount": 5, + "ExpectedInstructionCount": 11, "Comment": "0x66 0x0f 0x2c", "ExpectedArm64ASM": [ - "fcvtn v2.2s, v16.2d", - "fcvtzs v2.2s, v2.2s", + "ldr d2, [x28, #2864]", + "ldr q3, [x28, #2816]", + "frintz v4.2d, v16.2d", + "fcvtn v4.2s, v4.2d", + "fcvtzs v4.2s, v4.2s", + "fcmgt v3.2d, v3.2d, v16.2d", + "shrn v3.2s, v3.2d, #32", + "bit v2.8b, v4.8b, v3.8b", "str d2, [x28, #1040]", "mov w20, #0xffff", "strh w20, [x28, #1048]" ] }, "cvtpd2pi mm0, xmm0": { - "ExpectedInstructionCount": 6, + "ExpectedInstructionCount": 12, "Comment": "0x66 0x0f 0x2d", "ExpectedArm64ASM": [ - "fcvtn v2.2s, v16.2d", - "frinti v2.2s, v2.2s", - "fcvtzs v2.2s, v2.2s", + "frinti v2.2d, v16.2d", + "ldr d3, [x28, #2864]", + "ldr q4, [x28, #2816]", + "frintz v5.2d, v2.2d", + "fcvtn v5.2s, v5.2d", + "fcvtzs v5.2s, v5.2s", + "fcmgt v2.2d, v4.2d, v2.2d", + "shrn v2.2s, v2.2d, #32", + "bsl v2.8b, v5.8b, v3.8b", "str d2, [x28, #1040]", "mov w20, #0xffff", "strh w20, [x28, #1048]" @@ -214,20 +226,30 @@ ] }, "cvtps2dq xmm0, xmm1": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 7, "Comment": "0x66 0x0f 0x5b", "ExpectedArm64ASM": [ - "frinti v16.4s, v17.4s", - "fcvtzs v16.4s, v16.4s" + "frinti v2.4s, v17.4s", + "ldr q3, [x28, #2864]", + "ldr q4, [x28, #2768]", + "fcvtzs v5.4s, v2.4s", + "fcmgt v2.4s, v4.4s, v2.4s", + "mov v16.16b, v2.16b", + "bsl v16.16b, v5.16b, v3.16b" ] }, "cvtps2dq xmm0, [rax]": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 8, "Comment": "0xf2 0x0f 0x5b", "ExpectedArm64ASM": [ "ldr q2, [x4]", - "frinti v16.4s, v2.4s", - "fcvtzs v16.4s, v16.4s" + "frinti v2.4s, v2.4s", + "ldr q3, [x28, #2864]", + "ldr q4, [x28, #2768]", + "fcvtzs v5.4s, v2.4s", + "fcmgt v2.4s, v4.4s, v2.4s", + "mov v16.16b, v2.16b", + "bsl v16.16b, v5.16b, v3.16b" ] }, "subpd xmm0, xmm1": { @@ -515,7 +537,7 @@ "0x66 0x0f 0x70" ], "ExpectedArm64ASM": [ - "ldr x0, [x28, #1992]", + "ldr x0, [x28, #2056]", "ldr q2, [x0, #16]", "tbl v16.16b, {v17.16b}, v2.16b" ] @@ -529,7 +551,7 @@ ], "ExpectedArm64ASM": [ "ldr q2, [x4]", - "ldr x0, [x28, #1992]", + "ldr x0, [x28, #2056]", "ldr q3, [x0, #16]", "tbl v16.16b, {v2.16b}, v3.16b" ] @@ -643,7 +665,7 @@ ], "ExpectedArm64ASM": [ "ldr q2, [x4]", - "ldr x0, [x28, #1992]", + "ldr x0, [x28, #2056]", "ldr q3, [x0, #2480]", "tbl v16.16b, {v2.16b}, v3.16b" ] @@ -1184,7 +1206,7 @@ "ExpectedInstructionCount": 3, "Comment": "0x66 0x0f 0xd0", "ExpectedArm64ASM": [ - "ldr q2, [x28, #2352]", + "ldr q2, [x28, #2416]", "eor v2.16b, v17.16b, v2.16b", "fadd v16.2d, v16.2d, v2.2d" ] @@ -1240,7 +1262,7 @@ "ExpectedInstructionCount": 7, "Comment": "0x66 0x0f 0xd7", "ExpectedArm64ASM": [ - "ldr q2, [x28, #2576]", + "ldr q2, [x28, #2640]", "cmlt v3.16b, v16.16b, #0", "and v2.16b, v3.16b, v2.16b", "addp v2.16b, v2.16b, v2.16b", @@ -1360,12 +1382,18 @@ ] }, "cvttpd2dq xmm0, xmm1": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 9, "Comment": "0x66 0x0f 0xe6", "ExpectedArm64ASM": [ - "frintz v16.2d, v17.2d", - "fcvtn v16.2s, v16.2d", - "fcvtzs v16.2s, v16.2s" + "ldr d2, [x28, #2864]", + "ldr q3, [x28, #2816]", + "frintz v4.2d, v17.2d", + "fcvtn v4.2s, v4.2d", + "fcvtzs v4.2s, v4.2s", + "fcmgt v3.2d, v3.2d, v17.2d", + "shrn v3.2s, v3.2d, #32", + "mov v16.16b, v3.16b", + "bsl v16.16b, v4.16b, v2.16b" ] }, "movntdq [rax], xmm0": { diff --git a/unittests/InstructionCountCI/Secondary_REP.json b/unittests/InstructionCountCI/Secondary_REP.json index 540e2f7f9a..f98e9b397c 100644 --- a/unittests/InstructionCountCI/Secondary_REP.json +++ b/unittests/InstructionCountCI/Secondary_REP.json @@ -111,67 +111,115 @@ ] }, "cvttss2si eax, xmm0": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 7, "Comment": "0xf3 0x0f 0x2c", "ExpectedArm64ASM": [ - "fcvtzs w4, s16" + "fcvtzs w20, s16", + "mov w21, #0x80000000", + "ldr s2, [x28, #2768]", + "mrs x22, nzcv", + "fcmp s2, s16", + "csel w4, w20, w21, gt", + "msr nzcv, x22" ] }, "cvttss2si eax, dword [rbx]": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 8, "Comment": "0xf3 0x0f 0x2c", "ExpectedArm64ASM": [ "ldr s2, [x6]", - "fcvtzs w4, s2" + "fcvtzs w20, s2", + "mov w21, #0x80000000", + "ldr s3, [x28, #2768]", + "mrs x22, nzcv", + "fcmp s3, s2", + "csel w4, w20, w21, gt", + "msr nzcv, x22" ] }, "cvttss2si rax, xmm0": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 7, "Comment": "0xf3 0x0f 0x2c", "ExpectedArm64ASM": [ - "fcvtzs x4, s16" + "fcvtzs x20, s16", + "mov x21, #0x8000000000000000", + "ldr s2, [x28, #2800]", + "mrs x22, nzcv", + "fcmp s2, s16", + "csel x4, x20, x21, gt", + "msr nzcv, x22" ] }, "cvttss2si rax, dword [rbx]": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 8, "Comment": "0xf3 0x0f 0x2c", "ExpectedArm64ASM": [ "ldr d2, [x6]", - "fcvtzs x4, s2" + "fcvtzs x20, s2", + "mov x21, #0x8000000000000000", + "ldr s3, [x28, #2800]", + "mrs x22, nzcv", + "fcmp s3, s2", + "csel x4, x20, x21, gt", + "msr nzcv, x22" ] }, "cvtss2si eax, xmm0": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 8, "Comment": "0xf3 0x0f 0x2d", "ExpectedArm64ASM": [ - "frinti s0, s16", - "fcvtzs w4, s0" + "frinti s2, s16", + "fcvtzs w20, s2", + "mov w21, #0x80000000", + "ldr s3, [x28, #2768]", + "mrs x22, nzcv", + "fcmp s3, s2", + "csel w4, w20, w21, gt", + "msr nzcv, x22" ] }, "cvtss2si eax, dword [rbx]": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 9, "Comment": "0xf3 0x0f 0x2d", "ExpectedArm64ASM": [ "ldr s2, [x6]", - "frinti s0, s2", - "fcvtzs w4, s0" + "frinti s2, s2", + "fcvtzs w20, s2", + "mov w21, #0x80000000", + "ldr s3, [x28, #2768]", + "mrs x22, nzcv", + "fcmp s3, s2", + "csel w4, w20, w21, gt", + "msr nzcv, x22" ] }, "cvtss2si rax, xmm0": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 8, "Comment": "0xf3 0x0f 0x2d", "ExpectedArm64ASM": [ - "frinti s0, s16", - "fcvtzs x4, s0" + "frinti s2, s16", + "fcvtzs x20, s2", + "mov x21, #0x8000000000000000", + "ldr s3, [x28, #2800]", + "mrs x22, nzcv", + "fcmp s3, s2", + "csel x4, x20, x21, gt", + "msr nzcv, x22" ] }, "cvtss2si rax, dword [rbx]": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 9, "Comment": "0xf3 0x0f 0x2d", "ExpectedArm64ASM": [ "ldr d2, [x6]", - "frinti s0, s2", - "fcvtzs x4, s0" + "frinti s2, s2", + "fcvtzs x20, s2", + "mov x21, #0x8000000000000000", + "ldr s3, [x28, #2800]", + "mrs x22, nzcv", + "fcmp s3, s2", + "csel x4, x20, x21, gt", + "msr nzcv, x22" ] }, "sqrtss xmm0, xmm1": { @@ -243,10 +291,15 @@ ] }, "cvttps2dq xmm0, xmm1": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 6, "Comment": "0xf3 0x0f 0x5b", "ExpectedArm64ASM": [ - "fcvtzs v16.4s, v17.4s" + "ldr q2, [x28, #2864]", + "ldr q3, [x28, #2768]", + "fcvtzs v4.4s, v17.4s", + "fcmgt v3.4s, v3.4s, v17.4s", + "mov v16.16b, v3.16b", + "bsl v16.16b, v4.16b, v2.16b" ] }, "subss xmm0, xmm1": { @@ -342,7 +395,7 @@ "0xf3 0x0f 0x70" ], "ExpectedArm64ASM": [ - "ldr x0, [x28, #1984]", + "ldr x0, [x28, #2048]", "ldr q2, [x0, #1280]", "tbl v16.16b, {v17.16b}, v2.16b" ] @@ -355,7 +408,7 @@ "0xf3 0x0f 0x70" ], "ExpectedArm64ASM": [ - "ldr x0, [x28, #1984]", + "ldr x0, [x28, #2048]", "ldr q2, [x0, #16]", "tbl v16.16b, {v17.16b}, v2.16b" ] diff --git a/unittests/InstructionCountCI/Secondary_REPNE.json b/unittests/InstructionCountCI/Secondary_REPNE.json index 5a3bcbdc28..ab48ac3007 100644 --- a/unittests/InstructionCountCI/Secondary_REPNE.json +++ b/unittests/InstructionCountCI/Secondary_REPNE.json @@ -96,67 +96,115 @@ ] }, "cvttsd2si eax, xmm0": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 7, "Comment": "0xf2 0x0f 0x2c", "ExpectedArm64ASM": [ - "fcvtzs w4, d16" + "fcvtzs w20, d16", + "mov w21, #0x80000000", + "ldr d2, [x28, #2816]", + "mrs x22, nzcv", + "fcmp d2, d16", + "csel w4, w20, w21, gt", + "msr nzcv, x22" ] }, "cvttsd2si eax, qword [rbx]": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 8, "Comment": "0xf2 0x0f 0x2c", "ExpectedArm64ASM": [ "ldr d2, [x6]", - "fcvtzs w4, d2" + "fcvtzs w20, d2", + "mov w21, #0x80000000", + "ldr d3, [x28, #2816]", + "mrs x22, nzcv", + "fcmp d3, d2", + "csel w4, w20, w21, gt", + "msr nzcv, x22" ] }, "cvttsd2si rax, xmm0": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 7, "Comment": "0xf2 0x0f 0x2c", "ExpectedArm64ASM": [ - "fcvtzs x4, d16" + "fcvtzs x20, d16", + "mov x21, #0x8000000000000000", + "ldr d2, [x28, #2848]", + "mrs x22, nzcv", + "fcmp d2, d16", + "csel x4, x20, x21, gt", + "msr nzcv, x22" ] }, "cvttsd2si rax, qword [rbx]": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 8, "Comment": "0xf2 0x0f 0x2c", "ExpectedArm64ASM": [ "ldr d2, [x6]", - "fcvtzs x4, d2" + "fcvtzs x20, d2", + "mov x21, #0x8000000000000000", + "ldr d3, [x28, #2848]", + "mrs x22, nzcv", + "fcmp d3, d2", + "csel x4, x20, x21, gt", + "msr nzcv, x22" ] }, "cvtsd2si eax, xmm0": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 8, "Comment": "0xf2 0x0f 0x2d", "ExpectedArm64ASM": [ - "frinti d0, d16", - "fcvtzs x4, d0" + "frinti d2, d16", + "fcvtzs x20, d2", + "mov x21, #0x8000000000000000", + "ldr d3, [x28, #2848]", + "mrs x22, nzcv", + "fcmp d3, d2", + "csel x4, x20, x21, gt", + "msr nzcv, x22" ] }, "cvtsd2si eax, qword [rbx]": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 9, "Comment": "0xf2 0x0f 0x2d", "ExpectedArm64ASM": [ "ldr d2, [x6]", - "frinti d0, d2", - "fcvtzs x4, d0" + "frinti d2, d2", + "fcvtzs x20, d2", + "mov x21, #0x8000000000000000", + "ldr d3, [x28, #2848]", + "mrs x22, nzcv", + "fcmp d3, d2", + "csel x4, x20, x21, gt", + "msr nzcv, x22" ] }, "cvtsd2si rax, xmm0": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 8, "Comment": "0xf2 0x0f 0x2d", "ExpectedArm64ASM": [ - "frinti d0, d16", - "fcvtzs x4, d0" + "frinti d2, d16", + "fcvtzs x20, d2", + "mov x21, #0x8000000000000000", + "ldr d3, [x28, #2848]", + "mrs x22, nzcv", + "fcmp d3, d2", + "csel x4, x20, x21, gt", + "msr nzcv, x22" ] }, "cvtsd2si rax, qword [rbx]": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 9, "Comment": "0xf2 0x0f 0x2d", "ExpectedArm64ASM": [ "ldr d2, [x6]", - "frinti d0, d2", - "fcvtzs x4, d0" + "frinti d2, d2", + "fcvtzs x20, d2", + "mov x21, #0x8000000000000000", + "ldr d3, [x28, #2848]", + "mrs x22, nzcv", + "fcmp d3, d2", + "csel x4, x20, x21, gt", + "msr nzcv, x22" ] }, "sqrtsd xmm0, xmm1": { @@ -284,7 +332,7 @@ "0xf2 0x0f 0x70" ], "ExpectedArm64ASM": [ - "ldr x0, [x28, #1976]", + "ldr x0, [x28, #2040]", "ldr q2, [x0, #1280]", "tbl v16.16b, {v17.16b}, v2.16b" ] @@ -297,7 +345,7 @@ "0xf2 0x0f 0x70" ], "ExpectedArm64ASM": [ - "ldr x0, [x28, #1976]", + "ldr x0, [x28, #2040]", "ldr q2, [x0, #16]", "tbl v16.16b, {v17.16b}, v2.16b" ] @@ -453,7 +501,7 @@ "ExpectedInstructionCount": 3, "Comment": "0xf2 0x0f 0xd0", "ExpectedArm64ASM": [ - "ldr q2, [x28, #2320]", + "ldr q2, [x28, #2384]", "eor v2.16b, v17.16b, v2.16b", "fadd v16.4s, v16.4s, v2.4s" ] @@ -468,12 +516,19 @@ ] }, "cvtpd2dq xmm0, xmm1": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 10, "Comment": "0xf2 0x0f 0xe6", "ExpectedArm64ASM": [ - "frinti v16.2d, v17.2d", - "fcvtn v16.2s, v16.2d", - "fcvtzs v16.2s, v16.2s" + "frinti v2.2d, v17.2d", + "ldr d3, [x28, #2864]", + "ldr q4, [x28, #2816]", + "frintz v5.2d, v2.2d", + "fcvtn v5.2s, v5.2d", + "fcvtzs v5.2s, v5.2s", + "fcmgt v2.2d, v4.2d, v2.2d", + "shrn v2.2s, v2.2d, #32", + "mov v16.16b, v2.16b", + "bsl v16.16b, v5.16b, v3.16b" ] }, "lddqu xmm0, [rax]": { diff --git a/unittests/InstructionCountCI/Secondary_REPNE_SVE128.json b/unittests/InstructionCountCI/Secondary_REPNE_SVE128.json index 7f61fda1ae..485bf0505e 100644 --- a/unittests/InstructionCountCI/Secondary_REPNE_SVE128.json +++ b/unittests/InstructionCountCI/Secondary_REPNE_SVE128.json @@ -12,13 +12,19 @@ }, "Instructions": { "cvtpd2dq xmm0, xmm1": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 10, "Comment": "0xf2 0x0f 0xe6", "ExpectedArm64ASM": [ - "frinti z16.d, p6/m, z17.d", - "fcvtzs z16.s, p6/m, z16.d", - "uzp1 z16.s, z16.s, z16.s", - "mov v16.8b, v16.8b" + "frinti v2.2d, v17.2d", + "ldr d3, [x28, #2864]", + "ldr q4, [x28, #2816]", + "fcvtzs z5.s, p6/m, z2.d", + "uzp1 z5.s, z5.s, z5.s", + "mov v5.8b, v5.8b", + "fcmgt v2.2d, v4.2d, v2.2d", + "shrn v2.2s, v2.2d, #32", + "movprfx z16, z5", + "bsl z16.d, z16.d, z3.d, z2.d" ] } } diff --git a/unittests/InstructionCountCI/VEX_map1.json b/unittests/InstructionCountCI/VEX_map1.json index 04159d40fe..1eeafb2189 100644 --- a/unittests/InstructionCountCI/VEX_map1.json +++ b/unittests/InstructionCountCI/VEX_map1.json @@ -2801,7 +2801,7 @@ "Map 1 0b00 0xC6 128-bit" ], "ExpectedArm64ASM": [ - "ldr x0, [x28, #2000]", + "ldr x0, [x28, #2064]", "ldr q2, [x0, #16]", "tbl v16.16b, {v17.16b, v18.16b}, v2.16b" ] @@ -2870,7 +2870,7 @@ "Map 1 0b00 0xC6 128-bit" ], "ExpectedArm64ASM": [ - "ldr x0, [x28, #2000]", + "ldr x0, [x28, #2064]", "ldr q2, [x0, #32]", "tbl v16.16b, {v17.16b, v18.16b}, v2.16b" ] @@ -2939,7 +2939,7 @@ "Map 1 0b00 0xC6 128-bit" ], "ExpectedArm64ASM": [ - "ldr x0, [x28, #2000]", + "ldr x0, [x28, #2064]", "ldr q2, [x0, #48]", "tbl v16.16b, {v17.16b, v18.16b}, v2.16b" ] @@ -3277,79 +3277,127 @@ ] }, "vcvttss2si eax, xmm0": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 7, "Comment": [ "Map 1 0b10 0x2c 128-bit" ], "ExpectedArm64ASM": [ - "fcvtzs w4, s16" + "fcvtzs w20, s16", + "mov w21, #0x80000000", + "ldr s2, [x28, #2768]", + "mrs x22, nzcv", + "fcmp s2, s16", + "csel w4, w20, w21, gt", + "msr nzcv, x22" ] }, "vcvttss2si rax, xmm0": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 7, "Comment": [ "Map 1 0b10 0x2c 128-bit" ], "ExpectedArm64ASM": [ - "fcvtzs x4, s16" + "fcvtzs x20, s16", + "mov x21, #0x8000000000000000", + "ldr s2, [x28, #2800]", + "mrs x22, nzcv", + "fcmp s2, s16", + "csel x4, x20, x21, gt", + "msr nzcv, x22" ] }, "vcvttsd2si eax, xmm0": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 7, "Comment": [ "Map 1 0b11 0x2c 128-bit" ], "ExpectedArm64ASM": [ - "fcvtzs w4, d16" + "fcvtzs w20, d16", + "mov w21, #0x80000000", + "ldr d2, [x28, #2816]", + "mrs x22, nzcv", + "fcmp d2, d16", + "csel w4, w20, w21, gt", + "msr nzcv, x22" ] }, "vcvttsd2si rax, xmm0": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 7, "Comment": [ "Map 1 0b11 0x2c 128-bit" ], "ExpectedArm64ASM": [ - "fcvtzs x4, d16" + "fcvtzs x20, d16", + "mov x21, #0x8000000000000000", + "ldr d2, [x28, #2848]", + "mrs x22, nzcv", + "fcmp d2, d16", + "csel x4, x20, x21, gt", + "msr nzcv, x22" ] }, "vcvtss2si eax, xmm0": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 8, "Comment": [ "Map 1 0b10 0x2d 128-bit" ], "ExpectedArm64ASM": [ - "frinti s0, s16", - "fcvtzs w4, s0" + "frinti s2, s16", + "fcvtzs w20, s2", + "mov w21, #0x80000000", + "ldr s3, [x28, #2768]", + "mrs x22, nzcv", + "fcmp s3, s2", + "csel w4, w20, w21, gt", + "msr nzcv, x22" ] }, "vcvtss2si rax, xmm0": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 8, "Comment": [ "Map 1 0b10 0x2d 128-bit" ], "ExpectedArm64ASM": [ - "frinti s0, s16", - "fcvtzs x4, s0" + "frinti s2, s16", + "fcvtzs x20, s2", + "mov x21, #0x8000000000000000", + "ldr s3, [x28, #2800]", + "mrs x22, nzcv", + "fcmp s3, s2", + "csel x4, x20, x21, gt", + "msr nzcv, x22" ] }, "vcvtsd2si eax, xmm0": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 8, "Comment": [ "Map 1 0b11 0x2d 128-bit" ], "ExpectedArm64ASM": [ - "frinti d0, d16", - "fcvtzs x4, d0" + "frinti d2, d16", + "fcvtzs x20, d2", + "mov x21, #0x8000000000000000", + "ldr d3, [x28, #2848]", + "mrs x22, nzcv", + "fcmp d3, d2", + "csel x4, x20, x21, gt", + "msr nzcv, x22" ] }, "vcvtsd2si rax, xmm0": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 8, "Comment": [ "Map 1 0b11 0x2d 128-bit" ], "ExpectedArm64ASM": [ - "frinti d0, d16", - "fcvtzs x4, d0" + "frinti d2, d16", + "fcvtzs x20, d2", + "mov x21, #0x8000000000000000", + "ldr d3, [x28, #2848]", + "mrs x22, nzcv", + "fcmp d3, d2", + "csel x4, x20, x21, gt", + "msr nzcv, x22" ] }, "vucomiss xmm0, xmm1": { @@ -3602,41 +3650,73 @@ ] }, "vcvtps2dq xmm0, xmm1": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 7, "Comment": [ "Map 1 0b01 0x5b 128-bit" ], "ExpectedArm64ASM": [ - "frinti v16.4s, v17.4s", - "fcvtzs v16.4s, v16.4s" + "frinti v2.4s, v17.4s", + "ldr q3, [x28, #2864]", + "ldr q4, [x28, #2768]", + "fcvtzs v5.4s, v2.4s", + "fcmgt v2.4s, v4.4s, v2.4s", + "mov v16.16b, v2.16b", + "bsl v16.16b, v5.16b, v3.16b" ] }, "vcvtps2dq ymm0, ymm1": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 13, "Comment": [ "Map 1 0b01 0x5b 256-bit" ], "ExpectedArm64ASM": [ - "frinti z16.s, p7/m, z17.s", - "fcvtzs z16.s, p7/m, z16.s" + "frinti z2.s, p7/m, z17.s", + "ldr x0, [x28, #2024]", + "ld1b {z3.b}, p7/z, [x0]", + "ldr x0, [x28, #1976]", + "ld1b {z4.b}, p7/z, [x0]", + "fcvtzs z5.s, p7/m, z2.s", + "fcmgt p0.s, p7/z, z4.s, z2.s", + "not z0.s, p0/m, z4.s", + "movprfx z2.s, p0/z, z4.s", + "orr z2.s, p0/m, z2.s, z0.s", + "movprfx z0, z5", + "bsl z0.d, z0.d, z3.d, z2.d", + "mov z16.d, z0.d" ] }, "vcvttps2dq xmm0, xmm1": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 6, "Comment": [ "Map 1 0b10 0x5b 128-bit" ], "ExpectedArm64ASM": [ - "fcvtzs v16.4s, v17.4s" + "ldr q2, [x28, #2864]", + "ldr q3, [x28, #2768]", + "fcvtzs v4.4s, v17.4s", + "fcmgt v3.4s, v3.4s, v17.4s", + "mov v16.16b, v3.16b", + "bsl v16.16b, v4.16b, v2.16b" ] }, "vcvttps2dq ymm0, ymm1": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 12, "Comment": [ "Map 1 0b10 0x5b 256-bit" ], "ExpectedArm64ASM": [ - "fcvtzs z16.s, p7/m, z17.s" + "ldr x0, [x28, #2024]", + "ld1b {z2.b}, p7/z, [x0]", + "ldr x0, [x28, #1976]", + "ld1b {z3.b}, p7/z, [x0]", + "fcvtzs z4.s, p7/m, z17.s", + "fcmgt p0.s, p7/z, z3.s, z17.s", + "not z0.s, p0/m, z3.s", + "movprfx z3.s, p0/z, z3.s", + "orr z3.s, p0/m, z3.s, z0.s", + "movprfx z0, z4", + "bsl z0.d, z0.d, z2.d, z3.d", + "mov z16.d, z0.d" ] }, "vsubps xmm0, xmm1, xmm2": { @@ -4360,7 +4440,7 @@ "Map 1 0b01 0xd0 128-bit" ], "ExpectedArm64ASM": [ - "ldr q2, [x28, #2352]", + "ldr q2, [x28, #2416]", "eor v2.16b, v18.16b, v2.16b", "fadd v16.2d, v17.2d, v2.2d" ] @@ -4383,7 +4463,7 @@ "Map 1 0b11 0xd0 128-bit" ], "ExpectedArm64ASM": [ - "ldr q2, [x28, #2320]", + "ldr q2, [x28, #2384]", "eor v2.16b, v18.16b, v2.16b", "fadd v16.4s, v17.4s, v2.4s" ] @@ -4520,7 +4600,7 @@ "Map 1 0b01 0xd7 256-bit" ], "ExpectedArm64ASM": [ - "ldr q2, [x28, #2576]", + "ldr q2, [x28, #2640]", "cmlt v3.16b, v16.16b, #0", "and v2.16b, v3.16b, v2.16b", "addp v2.16b, v2.16b, v2.16b", @@ -4907,25 +4987,43 @@ ] }, "vcvttpd2dq xmm0, xmm1": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 9, "Comment": [ "Map 1 0b01 0xe6 128-bit" ], "ExpectedArm64ASM": [ - "fcvtzs z16.s, p6/m, z17.d", - "uzp1 z16.s, z16.s, z16.s", - "mov v16.8b, v16.8b" + "ldr d2, [x28, #2864]", + "ldr q3, [x28, #2816]", + "fcvtzs z4.s, p6/m, z17.d", + "uzp1 z4.s, z4.s, z4.s", + "mov v4.8b, v4.8b", + "fcmgt v3.2d, v3.2d, v17.2d", + "shrn v3.2s, v3.2d, #32", + "mov v16.16b, v3.16b", + "bsl v16.16b, v4.16b, v2.16b" ] }, "vcvttpd2dq xmm0, ymm1": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 15, "Comment": [ "Map 1 0b01 0xe6 256-bit" ], "ExpectedArm64ASM": [ - "fcvtzs z16.s, p7/m, z17.d", - "uzp1 z16.s, z16.s, z16.s", - "mov v16.16b, v16.16b" + "ldr q2, [x28, #2864]", + "ldr x0, [x28, #2000]", + "ld1b {z3.b}, p7/z, [x0]", + "fcvtzs z4.s, p7/m, z17.d", + "uzp1 z4.s, z4.s, z4.s", + "mov v4.16b, v4.16b", + "fcmgt p0.d, p7/z, z3.d, z17.d", + "not z0.d, p0/m, z3.d", + "movprfx z3.d, p0/z, z3.d", + "orr z3.d, p0/m, z3.d, z0.d", + "shrnb z3.s, z3.d, #32", + "uzp1 z3.s, z3.s, z3.s", + "movprfx z0, z4", + "bsl z0.d, z0.d, z2.d, z3.d", + "mov z16.d, z0.d" ] }, "vcvtdq2pd xmm0, xmm1": { @@ -4949,27 +5047,45 @@ ] }, "vcvtpd2dq xmm0, xmm1": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 10, "Comment": [ "Map 1 0b11 0xe6 128-bit" ], "ExpectedArm64ASM": [ - "frinti z16.d, p6/m, z17.d", - "fcvtzs z16.s, p6/m, z16.d", - "uzp1 z16.s, z16.s, z16.s", - "mov v16.8b, v16.8b" + "frinti v2.2d, v17.2d", + "ldr d3, [x28, #2864]", + "ldr q4, [x28, #2816]", + "fcvtzs z5.s, p6/m, z2.d", + "uzp1 z5.s, z5.s, z5.s", + "mov v5.8b, v5.8b", + "fcmgt v2.2d, v4.2d, v2.2d", + "shrn v2.2s, v2.2d, #32", + "mov v16.16b, v2.16b", + "bsl v16.16b, v5.16b, v3.16b" ] }, "vcvtpd2dq xmm0, ymm1": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 16, "Comment": [ "Map 1 0b11 0xe6 256-bit" ], "ExpectedArm64ASM": [ - "frinti z16.d, p7/m, z17.d", - "fcvtzs z16.s, p7/m, z16.d", - "uzp1 z16.s, z16.s, z16.s", - "mov v16.16b, v16.16b" + "frinti z2.d, p7/m, z17.d", + "ldr q3, [x28, #2864]", + "ldr x0, [x28, #2000]", + "ld1b {z4.b}, p7/z, [x0]", + "fcvtzs z5.s, p7/m, z2.d", + "uzp1 z5.s, z5.s, z5.s", + "mov v5.16b, v5.16b", + "fcmgt p0.d, p7/z, z4.d, z2.d", + "not z0.d, p0/m, z4.d", + "movprfx z2.d, p0/z, z4.d", + "orr z2.d, p0/m, z2.d, z0.d", + "shrnb z2.s, z2.d, #32", + "uzp1 z2.s, z2.s, z2.s", + "movprfx z0, z5", + "bsl z0.d, z0.d, z3.d, z2.d", + "mov z16.d, z0.d" ] }, "vmovntdq [rax], xmm0": { diff --git a/unittests/InstructionCountCI/VEX_map2.json b/unittests/InstructionCountCI/VEX_map2.json index 05484a68ce..9535508461 100644 --- a/unittests/InstructionCountCI/VEX_map2.json +++ b/unittests/InstructionCountCI/VEX_map2.json @@ -1603,7 +1603,7 @@ "Map 2 0b01 0x41 256-bit" ], "ExpectedArm64ASM": [ - "ldr q2, [x28, #2288]", + "ldr q2, [x28, #2352]", "zip1 v3.8h, v2.8h, v17.8h", "zip2 v2.8h, v2.8h, v17.8h", "umin v2.4s, v3.4s, v2.4s", @@ -3657,7 +3657,7 @@ "Map 2 0b01 0x96 128-bit" ], "ExpectedArm64ASM": [ - "ldr q2, [x28, #2320]", + "ldr q2, [x28, #2384]", "eor v2.16b, v17.16b, v2.16b", "fmla v2.4s, v16.4s, v18.4s", "mov v16.16b, v2.16b" @@ -3683,7 +3683,7 @@ "Map 2 0b01 0x96 128-bit" ], "ExpectedArm64ASM": [ - "ldr q2, [x28, #2352]", + "ldr q2, [x28, #2416]", "eor v2.16b, v17.16b, v2.16b", "fmla v2.2d, v16.2d, v18.2d", "mov v16.16b, v2.16b" @@ -3709,7 +3709,7 @@ "Map 2 0b01 0x97 128-bit" ], "ExpectedArm64ASM": [ - "ldr q2, [x28, #2384]", + "ldr q2, [x28, #2448]", "eor v2.16b, v17.16b, v2.16b", "fmla v2.4s, v16.4s, v18.4s", "mov v16.16b, v2.16b" @@ -3735,7 +3735,7 @@ "Map 2 0b01 0x97 128-bit" ], "ExpectedArm64ASM": [ - "ldr q2, [x28, #2416]", + "ldr q2, [x28, #2480]", "eor v2.16b, v17.16b, v2.16b", "fmla v2.2d, v16.2d, v18.2d", "mov v16.16b, v2.16b" @@ -4609,7 +4609,7 @@ "Map 2 0b01 0xa6 128-bit" ], "ExpectedArm64ASM": [ - "ldr q2, [x28, #2320]", + "ldr q2, [x28, #2384]", "eor v2.16b, v18.16b, v2.16b", "fmla v2.4s, v17.4s, v16.4s", "mov v16.16b, v2.16b" @@ -4635,7 +4635,7 @@ "Map 2 0b01 0xa6 128-bit" ], "ExpectedArm64ASM": [ - "ldr q2, [x28, #2352]", + "ldr q2, [x28, #2416]", "eor v2.16b, v18.16b, v2.16b", "fmla v2.2d, v17.2d, v16.2d", "mov v16.16b, v2.16b" @@ -4661,7 +4661,7 @@ "Map 2 0b01 0xa7 128-bit" ], "ExpectedArm64ASM": [ - "ldr q2, [x28, #2384]", + "ldr q2, [x28, #2448]", "eor v2.16b, v18.16b, v2.16b", "fmla v2.4s, v17.4s, v16.4s", "mov v16.16b, v2.16b" @@ -4687,7 +4687,7 @@ "Map 2 0b01 0xa7 128-bit" ], "ExpectedArm64ASM": [ - "ldr q2, [x28, #2416]", + "ldr q2, [x28, #2480]", "eor v2.16b, v18.16b, v2.16b", "fmla v2.2d, v17.2d, v16.2d", "mov v16.16b, v2.16b" @@ -4713,7 +4713,7 @@ "Map 2 0b01 0xb6 128-bit" ], "ExpectedArm64ASM": [ - "ldr q2, [x28, #2320]", + "ldr q2, [x28, #2384]", "eor v2.16b, v16.16b, v2.16b", "fmla v2.4s, v17.4s, v18.4s", "mov v16.16b, v2.16b" @@ -4738,7 +4738,7 @@ "Map 2 0b01 0xb6 128-bit" ], "ExpectedArm64ASM": [ - "ldr q2, [x28, #2352]", + "ldr q2, [x28, #2416]", "eor v2.16b, v16.16b, v2.16b", "fmla v2.2d, v17.2d, v18.2d", "mov v16.16b, v2.16b" @@ -4763,7 +4763,7 @@ "Map 2 0b01 0xb7 128-bit" ], "ExpectedArm64ASM": [ - "ldr q2, [x28, #2384]", + "ldr q2, [x28, #2448]", "eor v2.16b, v16.16b, v2.16b", "fmla v2.4s, v17.4s, v18.4s", "mov v16.16b, v2.16b" @@ -4788,7 +4788,7 @@ "Map 2 0b01 0xb7 128-bit" ], "ExpectedArm64ASM": [ - "ldr q2, [x28, #2416]", + "ldr q2, [x28, #2480]", "eor v2.16b, v16.16b, v2.16b", "fmla v2.2d, v17.2d, v18.2d", "mov v16.16b, v2.16b" diff --git a/unittests/InstructionCountCI/VEX_map3.json b/unittests/InstructionCountCI/VEX_map3.json index 15691167ca..50326dd5f9 100644 --- a/unittests/InstructionCountCI/VEX_map3.json +++ b/unittests/InstructionCountCI/VEX_map3.json @@ -5372,7 +5372,7 @@ "Map 3 0b01 0xdf 128-bit" ], "ExpectedArm64ASM": [ - "ldr q2, [x28, #2464]", + "ldr q2, [x28, #2528]", "movi v3.2d, #0x0", "mov v16.16b, v17.16b", "unimplemented (Unimplemented)", @@ -5385,7 +5385,7 @@ "Map 3 0b01 0xdf 128-bit" ], "ExpectedArm64ASM": [ - "ldr q2, [x28, #2464]", + "ldr q2, [x28, #2528]", "movi v3.2d, #0x0", "mov v16.16b, v17.16b", "unimplemented (Unimplemented)", diff --git a/unittests/InstructionCountCI/x87.json b/unittests/InstructionCountCI/x87.json index 18227db459..e4ee32aa61 100644 --- a/unittests/InstructionCountCI/x87.json +++ b/unittests/InstructionCountCI/x87.json @@ -4467,7 +4467,7 @@ "0xd9 11b 0xe8 /5" ], "ExpectedArm64ASM": [ - "ldr q2, [x28, #2608]", + "ldr q2, [x28, #2672]", "ldrb w20, [x28, #1019]", "mov w21, #0x1", "sub w20, w20, #0x1 (1)", @@ -4487,7 +4487,7 @@ "0xd9 11b 0xe9 /5" ], "ExpectedArm64ASM": [ - "ldr q2, [x28, #2624]", + "ldr q2, [x28, #2688]", "ldrb w20, [x28, #1019]", "mov w21, #0x1", "sub w20, w20, #0x1 (1)", @@ -4507,7 +4507,7 @@ "0xd9 11b 0xea /5" ], "ExpectedArm64ASM": [ - "ldr q2, [x28, #2640]", + "ldr q2, [x28, #2704]", "ldrb w20, [x28, #1019]", "mov w21, #0x1", "sub w20, w20, #0x1 (1)", @@ -4527,7 +4527,7 @@ "0xd9 11b 0xeb /5" ], "ExpectedArm64ASM": [ - "ldr q2, [x28, #2656]", + "ldr q2, [x28, #2720]", "ldrb w20, [x28, #1019]", "mov w21, #0x1", "sub w20, w20, #0x1 (1)", @@ -4547,7 +4547,7 @@ "0xd9 11b 0xec /5" ], "ExpectedArm64ASM": [ - "ldr q2, [x28, #2672]", + "ldr q2, [x28, #2736]", "ldrb w20, [x28, #1019]", "mov w21, #0x1", "sub w20, w20, #0x1 (1)", @@ -4567,7 +4567,7 @@ "0xd9 11b 0xed /5" ], "ExpectedArm64ASM": [ - "ldr q2, [x28, #2688]", + "ldr q2, [x28, #2752]", "ldrb w20, [x28, #1019]", "mov w21, #0x1", "sub w20, w20, #0x1 (1)", @@ -4731,7 +4731,7 @@ "mov v2.h[4], w1", "add x0, x28, x20, lsl #4", "str q2, [x0, #1040]", - "ldr q2, [x28, #2608]", + "ldr q2, [x28, #2672]", "mov w21, #0x1", "sub w20, w20, #0x1 (1)", "and w20, w20, #0x7", @@ -5014,7 +5014,7 @@ "0xd9 11b 0xf9 /7" ], "ExpectedArm64ASM": [ - "ldr q2, [x28, #2608]", + "ldr q2, [x28, #2672]", "ldrb w20, [x28, #1019]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #1040]",