From 6045a0d3167a082668efe0e76b0063968f8d6e75 Mon Sep 17 00:00:00 2001 From: Paulo Matos Date: Mon, 13 Jan 2025 15:31:02 +0100 Subject: [PATCH] instcountci: Ensure predicate cache is reset when control flow leaves block --- unittests/InstructionCountCI/X87ldst-SVE.json | 30 +++++-------------- 1 file changed, 7 insertions(+), 23 deletions(-) diff --git a/unittests/InstructionCountCI/X87ldst-SVE.json b/unittests/InstructionCountCI/X87ldst-SVE.json index d82b68d9b1..2050491143 100644 --- a/unittests/InstructionCountCI/X87ldst-SVE.json +++ b/unittests/InstructionCountCI/X87ldst-SVE.json @@ -17,10 +17,10 @@ "ExpectedInstructionCount": 13, "Comment": "Single 80-bit store.", "ExpectedArm64ASM": [ + "ptrue p2.h, vl5", "ldrb w20, [x28, #1019]", "add x0, x28, x20, lsl #4", "ldr q2, [x0, #1040]", - "ptrue p2.h, vl5", "st1h {z2.h}, p2, [x4]", "ldrb w21, [x28, #1298]", "mov w22, #0x1", @@ -34,16 +34,16 @@ }, "2-store 80bit": { "x86InstructionCount": 2, - "ExpectedInstructionCount": 25, + "ExpectedInstructionCount": 24, "x86Insts": [ "fstp tword [rax]", "fstp tword [rax+10]" ], "ExpectedArm64ASM": [ + "ptrue p2.h, vl5", "ldrb w20, [x28, #1019]", "add x0, x28, x20, lsl #4", "ldr q2, [x0, #1040]", - "ptrue p2.h, vl5", "st1h {z2.h}, p2, [x4]", "ldrb w21, [x28, #1298]", "mov w22, #0x1", @@ -56,7 +56,6 @@ "add x21, x4, #0xa (10)", "add x0, x28, x20, lsl #4", "ldr q2, [x0, #1040]", - "ptrue p2.h, vl5", "st1h {z2.h}, p2, [x21]", "ldrb w21, [x28, #1298]", "lsl w22, w22, w20", @@ -69,7 +68,7 @@ }, "8-store 80bit": { "x86InstructionCount": 8, - "ExpectedInstructionCount": 97, + "ExpectedInstructionCount": 90, "x86Insts": [ "fstp tword [rax]", "fstp tword [rax+10]", @@ -81,10 +80,10 @@ "fstp tword [rax+70]" ], "ExpectedArm64ASM": [ + "ptrue p2.h, vl5", "ldrb w20, [x28, #1019]", "add x0, x28, x20, lsl #4", "ldr q2, [x0, #1040]", - "ptrue p2.h, vl5", "st1h {z2.h}, p2, [x4]", "ldrb w21, [x28, #1298]", "mov w22, #0x1", @@ -97,7 +96,6 @@ "add x21, x4, #0xa (10)", "add x0, x28, x20, lsl #4", "ldr q2, [x0, #1040]", - "ptrue p2.h, vl5", "st1h {z2.h}, p2, [x21]", "ldrb w21, [x28, #1298]", "lsl w23, w22, w20", @@ -109,7 +107,6 @@ "add x21, x4, #0x14 (20)", "add x0, x28, x20, lsl #4", "ldr q2, [x0, #1040]", - "ptrue p2.h, vl5", "st1h {z2.h}, p2, [x21]", "ldrb w21, [x28, #1298]", "lsl w23, w22, w20", @@ -121,7 +118,6 @@ "add x21, x4, #0x1e (30)", "add x0, x28, x20, lsl #4", "ldr q2, [x0, #1040]", - "ptrue p2.h, vl5", "st1h {z2.h}, p2, [x21]", "ldrb w21, [x28, #1298]", "lsl w23, w22, w20", @@ -133,7 +129,6 @@ "add x21, x4, #0x28 (40)", "add x0, x28, x20, lsl #4", "ldr q2, [x0, #1040]", - "ptrue p2.h, vl5", "st1h {z2.h}, p2, [x21]", "ldrb w21, [x28, #1298]", "lsl w23, w22, w20", @@ -145,7 +140,6 @@ "add x21, x4, #0x32 (50)", "add x0, x28, x20, lsl #4", "ldr q2, [x0, #1040]", - "ptrue p2.h, vl5", "st1h {z2.h}, p2, [x21]", "ldrb w21, [x28, #1298]", "lsl w23, w22, w20", @@ -157,7 +151,6 @@ "add x21, x4, #0x3c (60)", "add x0, x28, x20, lsl #4", "ldr q2, [x0, #1040]", - "ptrue p2.h, vl5", "st1h {z2.h}, p2, [x21]", "ldrb w21, [x28, #1298]", "lsl w23, w22, w20", @@ -169,7 +162,6 @@ "add x21, x4, #0x46 (70)", "add x0, x28, x20, lsl #4", "ldr q2, [x0, #1040]", - "ptrue p2.h, vl5", "st1h {z2.h}, p2, [x21]", "ldrb w21, [x28, #1298]", "lsl w22, w22, w20", @@ -201,7 +193,7 @@ }, "2-load 80bit": { "x86InstructionCount": 2, - "ExpectedInstructionCount": 22, + "ExpectedInstructionCount": 21, "x86Insts": [ "fld tword [rax]", "fld tword [rax+10]" @@ -210,7 +202,6 @@ "ptrue p2.h, vl5", "ld1h {z2.h}, p2/z, [x4]", "add x20, x4, #0xa (10)", - "ptrue p2.h, vl5", "ld1h {z3.h}, p2/z, [x20]", "ldrb w20, [x28, #1019]", "sub w20, w20, #0x2 (2)", @@ -233,7 +224,7 @@ }, "8-load 80bit": { "x86InstructionCount": 8, - "ExpectedInstructionCount": 59, + "ExpectedInstructionCount": 52, "x86Insts": [ "fld tword [rax]", "fld tword [rax+10]", @@ -248,25 +239,18 @@ "ptrue p2.h, vl5", "ld1h {z2.h}, p2/z, [x4]", "add x20, x4, #0xa (10)", - "ptrue p2.h, vl5", "ld1h {z3.h}, p2/z, [x20]", "add x20, x4, #0x14 (20)", - "ptrue p2.h, vl5", "ld1h {z4.h}, p2/z, [x20]", "add x20, x4, #0x1e (30)", - "ptrue p2.h, vl5", "ld1h {z5.h}, p2/z, [x20]", "add x20, x4, #0x28 (40)", - "ptrue p2.h, vl5", "ld1h {z6.h}, p2/z, [x20]", "add x20, x4, #0x32 (50)", - "ptrue p2.h, vl5", "ld1h {z7.h}, p2/z, [x20]", "add x20, x4, #0x3c (60)", - "ptrue p2.h, vl5", "ld1h {z8.h}, p2/z, [x20]", "add x20, x4, #0x46 (70)", - "ptrue p2.h, vl5", "ld1h {z9.h}, p2/z, [x20]", "ldrb w20, [x28, #1019]", "sub w20, w20, #0x8 (8)",