From eb83c9e7f225aa52fbe3f080eba544fa5d27ec8a Mon Sep 17 00:00:00 2001 From: Alyssa Rosenzweig Date: Tue, 2 Apr 2024 13:26:57 -0400 Subject: [PATCH 1/7] Core: use safe CondJump for self-modifying code this ensures we put the StoreNZCV in the right block, which will fix validation fails later in the series. Signed-off-by: Alyssa Rosenzweig --- FEXCore/Source/Interface/Core/Core.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/FEXCore/Source/Interface/Core/Core.cpp b/FEXCore/Source/Interface/Core/Core.cpp index 47f9fd661d..d0ba5a42ed 100644 --- a/FEXCore/Source/Interface/Core/Core.cpp +++ b/FEXCore/Source/Interface/Core/Core.cpp @@ -579,7 +579,7 @@ namespace FEXCore::Context { auto CodeChanged = Thread->OpDispatcher->_ValidateCode(ExistingCodePtr[0], ExistingCodePtr[1], (uintptr_t)ExistingCodePtr - GuestRIP, DecodedInfo->InstSize); - auto InvalidateCodeCond = Thread->OpDispatcher->_CondJump(CodeChanged); + auto InvalidateCodeCond = Thread->OpDispatcher->CondJump(CodeChanged); auto CurrentBlock = Thread->OpDispatcher->GetCurrentBlock(); auto CodeWasChangedBlock = Thread->OpDispatcher->CreateNewCodeBlockAtEnd(); From d9493e5d9b421fb4a18206d2be39529a55a5f3df Mon Sep 17 00:00:00 2001 From: Alyssa Rosenzweig Date: Tue, 2 Apr 2024 13:17:32 -0400 Subject: [PATCH 2/7] OpcodeDispatcher: fix xblock liveness in xsave/xrstr didn't fix this hard enough before. caught by validation. Signed-off-by: Alyssa Rosenzweig --- .../Core/OpcodeDispatcher/Vector.cpp | 22 ++++++++++--------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/FEXCore/Source/Interface/Core/OpcodeDispatcher/Vector.cpp b/FEXCore/Source/Interface/Core/OpcodeDispatcher/Vector.cpp index 58e28ee121..4bff1fb53b 100644 --- a/FEXCore/Source/Interface/Core/OpcodeDispatcher/Vector.cpp +++ b/FEXCore/Source/Interface/Core/OpcodeDispatcher/Vector.cpp @@ -3008,10 +3008,10 @@ OrderedNode *OpDispatchBuilder::XSaveBase(X86Tables::DecodedOp Op) { void OpDispatchBuilder::XSaveOpImpl(OpcodeArgs) { // NOTE: Mask should be EAX and EDX concatenated, but we only need to test // for features that are in the lower 32 bits, so EAX only is sufficient. - OrderedNode *Mask = LoadGPRRegister(X86State::REG_RAX); const auto OpSize = IR::SizeToOpSize(CTX->GetGPRSize()); - const auto StoreIfFlagSet = [&](uint32_t BitIndex, auto fn, uint32_t FieldSize = 1){ + const auto StoreIfFlagSet = [this, OpSize](uint32_t BitIndex, auto fn, uint32_t FieldSize = 1){ + OrderedNode *Mask = LoadGPRRegister(X86State::REG_RAX); OrderedNode *BitFlag = _Bfe(OpSize, FieldSize, BitIndex, Mask); auto CondJump_ = CondJump(BitFlag, {COND_NEQ}); @@ -3055,6 +3055,7 @@ void OpDispatchBuilder::XSaveOpImpl(OpcodeArgs) { OrderedNode *HeaderOffset = _Add(OpSize, Base, _Constant(512)); // NOTE: We currently only support the first 3 bits (x87, SSE, and AVX) + OrderedNode *Mask = LoadGPRRegister(X86State::REG_RAX); OrderedNode *RequestedFeatures = _Bfe(OpSize, 3, 0, Mask); // XSTATE_BV section of the header is 8 bytes in size, but we only really @@ -3209,17 +3210,18 @@ void OpDispatchBuilder::FXRStoreOp(OpcodeArgs) { void OpDispatchBuilder::XRstorOpImpl(OpcodeArgs) { const auto OpSize = IR::SizeToOpSize(CTX->GetGPRSize()); - // Set up base address for the XSAVE region to restore from, and also read the - // XSTATE_BV bit flags out of the XSTATE header. - // - // Note: we rematerialize Base in each block to avoid crossblock liveness. - OrderedNode *Base = XSaveBase(Op); - OrderedNode *Mask = _LoadMem(GPRClass, 8, _Add(OpSize, Base, _Constant(512)), 8); - // If a bit in our XSTATE_BV is set, then we restore from that region of the XSAVE area, // otherwise, if not set, then we need to set the relevant data the bit corresponds to // to it's defined initial configuration. - const auto RestoreIfFlagSetOrDefault = [&](uint32_t BitIndex, auto restore_fn, auto default_fn, uint32_t FieldSize = 1){ + const auto RestoreIfFlagSetOrDefault = [this, Op, OpSize](uint32_t BitIndex, auto restore_fn, auto default_fn, uint32_t FieldSize = 1){ + // Set up base address for the XSAVE region to restore from, and also read + // the XSTATE_BV bit flags out of the XSTATE header. + // + // Note: we rematerialize Base/Mask in each block to avoid crossblock + // liveness. + OrderedNode *Base = XSaveBase(Op); + OrderedNode *Mask = _LoadMem(GPRClass, 8, _Add(OpSize, Base, _Constant(512)), 8); + OrderedNode *BitFlag = _Bfe(OpSize, FieldSize, BitIndex, Mask); auto CondJump_ = CondJump(BitFlag, {COND_NEQ}); From d0ff3e64d98d7a9b92fbb55fabccc0534c132e71 Mon Sep 17 00:00:00 2001 From: Alyssa Rosenzweig Date: Mon, 8 Apr 2024 13:49:31 -0400 Subject: [PATCH 3/7] InstCountCI: Update Signed-off-by: Alyssa Rosenzweig --- .../FlagM/SecondaryGroup.json | 129 +++++++++--------- .../InstructionCountCI/SecondaryGroup.json | 129 +++++++++--------- 2 files changed, 130 insertions(+), 128 deletions(-) diff --git a/unittests/InstructionCountCI/FlagM/SecondaryGroup.json b/unittests/InstructionCountCI/FlagM/SecondaryGroup.json index 0eaf853d60..1a465a88f6 100644 --- a/unittests/InstructionCountCI/FlagM/SecondaryGroup.json +++ b/unittests/InstructionCountCI/FlagM/SecondaryGroup.json @@ -1403,29 +1403,28 @@ ] }, "xsave [rax]": { - "ExpectedInstructionCount": 70, + "ExpectedInstructionCount": 69, "Comment": "GROUP15 0x0F 0xAE /4", "ExpectedArm64ASM": [ - "mov x20, x4", - "ubfx x21, x20, #0, #1", - "cbnz x21, #+0x8", + "ubfx x20, x4, #0, #1", + "cbnz x20, #+0x8", "b #+0x84", - "ldrh w21, [x28, #1024]", - "strh w21, [x4]", - "mov w21, #0x0", - "ldrb w22, [x28, #747]", - "bfi x21, x22, #11, #3", - "ldrb w22, [x28, #744]", - "ldrb w23, [x28, #745]", - "ldrb w24, [x28, #746]", - "ldrb w25, [x28, #750]", - "orr x21, x21, x22, lsl #8", - "orr x21, x21, x23, lsl #9", - "orr x21, x21, x24, lsl #10", - "orr x21, x21, x25, lsl #14", - "strh w21, [x4, #2]", - "ldrb w21, [x28, #1026]", - "strb w21, [x4, #4]", + "ldrh w20, [x28, #1024]", + "strh w20, [x4]", + "mov w20, #0x0", + "ldrb w21, [x28, #747]", + "bfi x20, x21, #11, #3", + "ldrb w21, [x28, #744]", + "ldrb w22, [x28, #745]", + "ldrb w23, [x28, #746]", + "ldrb w24, [x28, #750]", + "orr x20, x20, x21, lsl #8", + "orr x20, x20, x22, lsl #9", + "orr x20, x20, x23, lsl #10", + "orr x20, x20, x24, lsl #14", + "strh w20, [x4, #2]", + "ldrb w20, [x28, #1026]", + "strb w20, [x4, #4]", "ldr q2, [x28, #768]", "str q2, [x4, #32]", "ldr q2, [x28, #784]", @@ -1442,8 +1441,8 @@ "str q2, [x4, #128]", "ldr q2, [x28, #880]", "str q2, [x4, #144]", - "ubfx x21, x20, #1, #1", - "cbnz x21, #+0x8", + "ubfx x20, x4, #1, #1", + "cbnz x20, #+0x8", "b #+0x44", "str q16, [x4, #160]", "str q17, [x4, #176]", @@ -1461,20 +1460,20 @@ "str q29, [x4, #368]", "str q30, [x4, #384]", "str q31, [x4, #400]", - "ubfx x21, x20, #1, #2", - "cbnz x21, #+0x8", + "ubfx x20, x4, #1, #2", + "cbnz x20, #+0x8", "b #+0x2c", - "mov w21, #0x1f80", - "mrs x22, fpcr", - "ubfx x22, x22, #22, #3", - "rbit w0, w22", - "bfi x22, x0, #30, #2", - "bfi w21, w22, #13, #3", - "add x22, x4, #0x18 (24)", - "str w21, [x4, #24]", - "mov w21, #0xffff", - "str w21, [x22, #4]", - "ubfx x20, x20, #0, #3", + "mov w20, #0x1f80", + "mrs x21, fpcr", + "ubfx x21, x21, #22, #3", + "rbit w0, w21", + "bfi x21, x0, #30, #2", + "bfi w20, w21, #13, #3", + "add x21, x4, #0x18 (24)", + "str w20, [x4, #24]", + "mov w20, #0xffff", + "str w20, [x21, #4]", + "ubfx x20, x4, #0, #3", "str x20, [x4, #512]" ] }, @@ -1486,28 +1485,28 @@ ] }, "xrstor [rax]": { - "ExpectedInstructionCount": 103, + "ExpectedInstructionCount": 105, "Comment": "GROUP15 0x0F 0xAE /5", "ExpectedArm64ASM": [ "ldr x20, [x4, #512]", - "ubfx x21, x20, #0, #1", - "cbnz x21, #+0x8", + "ubfx x20, x20, #0, #1", + "cbnz x20, #+0x8", "b #+0x84", - "ldrh w21, [x4]", - "strh w21, [x28, #1024]", - "ldrh w21, [x4, #2]", - "ubfx w22, w21, #11, #3", - "strb w22, [x28, #747]", - "ubfx w22, w21, #8, #1", - "ubfx w23, w21, #9, #1", - "ubfx w24, w21, #10, #1", - "ubfx w21, w21, #14, #1", - "strb w22, [x28, #744]", - "strb w23, [x28, #745]", - "strb w24, [x28, #746]", - "strb w21, [x28, #750]", - "ldrb w21, [x4, #4]", - "strb w21, [x28, #1026]", + "ldrh w20, [x4]", + "strh w20, [x28, #1024]", + "ldrh w20, [x4, #2]", + "ubfx w21, w20, #11, #3", + "strb w21, [x28, #747]", + "ubfx w21, w20, #8, #1", + "ubfx w22, w20, #9, #1", + "ubfx w23, w20, #10, #1", + "ubfx w20, w20, #14, #1", + "strb w21, [x28, #744]", + "strb w22, [x28, #745]", + "strb w23, [x28, #746]", + "strb w20, [x28, #750]", + "ldrb w20, [x4, #4]", + "strb w20, [x28, #1026]", "ldr q2, [x4, #32]", "str q2, [x28, #768]", "ldr q2, [x4, #48]", @@ -1525,15 +1524,15 @@ "ldr q2, [x4, #144]", "str q2, [x28, #880]", "b #+0x4c", - "mov w21, #0x0", - "mov w22, #0x37f", - "strh w22, [x28, #1024]", - "strb w21, [x28, #747]", - "strb w21, [x28, #744]", - "strb w21, [x28, #745]", - "strb w21, [x28, #746]", - "strb w21, [x28, #750]", - "strb w21, [x28, #1026]", + "mov w20, #0x0", + "mov w21, #0x37f", + "strh w21, [x28, #1024]", + "strb w20, [x28, #747]", + "strb w20, [x28, #744]", + "strb w20, [x28, #745]", + "strb w20, [x28, #746]", + "strb w20, [x28, #750]", + "strb w20, [x28, #1026]", "movi v2.2d, #0x0", "str q2, [x28, #768]", "str q2, [x28, #784]", @@ -1543,8 +1542,9 @@ "str q2, [x28, #848]", "str q2, [x28, #864]", "str q2, [x28, #880]", - "ubfx x21, x20, #1, #1", - "cbnz x21, #+0x8", + "ldr x20, [x4, #512]", + "ubfx x20, x20, #1, #1", + "cbnz x20, #+0x8", "b #+0x48", "ldr q16, [x4, #160]", "ldr q17, [x4, #176]", @@ -1579,6 +1579,7 @@ "mov v29.16b, v16.16b", "mov v30.16b, v16.16b", "mov v31.16b, v16.16b", + "ldr x20, [x4, #512]", "ubfx x20, x20, #1, #2", "cbnz x20, #+0x8", "b #+0x2c", diff --git a/unittests/InstructionCountCI/SecondaryGroup.json b/unittests/InstructionCountCI/SecondaryGroup.json index 4ad5d8012d..c0a7059233 100644 --- a/unittests/InstructionCountCI/SecondaryGroup.json +++ b/unittests/InstructionCountCI/SecondaryGroup.json @@ -1587,29 +1587,28 @@ ] }, "xsave [rax]": { - "ExpectedInstructionCount": 70, + "ExpectedInstructionCount": 69, "Comment": "GROUP15 0x0F 0xAE /4", "ExpectedArm64ASM": [ - "mov x20, x4", - "ubfx x21, x20, #0, #1", - "cbnz x21, #+0x8", + "ubfx x20, x4, #0, #1", + "cbnz x20, #+0x8", "b #+0x84", - "ldrh w21, [x28, #1024]", - "strh w21, [x4]", - "mov w21, #0x0", - "ldrb w22, [x28, #747]", - "bfi x21, x22, #11, #3", - "ldrb w22, [x28, #744]", - "ldrb w23, [x28, #745]", - "ldrb w24, [x28, #746]", - "ldrb w25, [x28, #750]", - "orr x21, x21, x22, lsl #8", - "orr x21, x21, x23, lsl #9", - "orr x21, x21, x24, lsl #10", - "orr x21, x21, x25, lsl #14", - "strh w21, [x4, #2]", - "ldrb w21, [x28, #1026]", - "strb w21, [x4, #4]", + "ldrh w20, [x28, #1024]", + "strh w20, [x4]", + "mov w20, #0x0", + "ldrb w21, [x28, #747]", + "bfi x20, x21, #11, #3", + "ldrb w21, [x28, #744]", + "ldrb w22, [x28, #745]", + "ldrb w23, [x28, #746]", + "ldrb w24, [x28, #750]", + "orr x20, x20, x21, lsl #8", + "orr x20, x20, x22, lsl #9", + "orr x20, x20, x23, lsl #10", + "orr x20, x20, x24, lsl #14", + "strh w20, [x4, #2]", + "ldrb w20, [x28, #1026]", + "strb w20, [x4, #4]", "ldr q2, [x28, #768]", "str q2, [x4, #32]", "ldr q2, [x28, #784]", @@ -1626,8 +1625,8 @@ "str q2, [x4, #128]", "ldr q2, [x28, #880]", "str q2, [x4, #144]", - "ubfx x21, x20, #1, #1", - "cbnz x21, #+0x8", + "ubfx x20, x4, #1, #1", + "cbnz x20, #+0x8", "b #+0x44", "str q16, [x4, #160]", "str q17, [x4, #176]", @@ -1645,20 +1644,20 @@ "str q29, [x4, #368]", "str q30, [x4, #384]", "str q31, [x4, #400]", - "ubfx x21, x20, #1, #2", - "cbnz x21, #+0x8", + "ubfx x20, x4, #1, #2", + "cbnz x20, #+0x8", "b #+0x2c", - "mov w21, #0x1f80", - "mrs x22, fpcr", - "ubfx x22, x22, #22, #3", - "rbit w0, w22", - "bfi x22, x0, #30, #2", - "bfi w21, w22, #13, #3", - "add x22, x4, #0x18 (24)", - "str w21, [x4, #24]", - "mov w21, #0xffff", - "str w21, [x22, #4]", - "ubfx x20, x20, #0, #3", + "mov w20, #0x1f80", + "mrs x21, fpcr", + "ubfx x21, x21, #22, #3", + "rbit w0, w21", + "bfi x21, x0, #30, #2", + "bfi w20, w21, #13, #3", + "add x21, x4, #0x18 (24)", + "str w20, [x4, #24]", + "mov w20, #0xffff", + "str w20, [x21, #4]", + "ubfx x20, x4, #0, #3", "str x20, [x4, #512]" ] }, @@ -1670,28 +1669,28 @@ ] }, "xrstor [rax]": { - "ExpectedInstructionCount": 103, + "ExpectedInstructionCount": 105, "Comment": "GROUP15 0x0F 0xAE /5", "ExpectedArm64ASM": [ "ldr x20, [x4, #512]", - "ubfx x21, x20, #0, #1", - "cbnz x21, #+0x8", + "ubfx x20, x20, #0, #1", + "cbnz x20, #+0x8", "b #+0x84", - "ldrh w21, [x4]", - "strh w21, [x28, #1024]", - "ldrh w21, [x4, #2]", - "ubfx w22, w21, #11, #3", - "strb w22, [x28, #747]", - "ubfx w22, w21, #8, #1", - "ubfx w23, w21, #9, #1", - "ubfx w24, w21, #10, #1", - "ubfx w21, w21, #14, #1", - "strb w22, [x28, #744]", - "strb w23, [x28, #745]", - "strb w24, [x28, #746]", - "strb w21, [x28, #750]", - "ldrb w21, [x4, #4]", - "strb w21, [x28, #1026]", + "ldrh w20, [x4]", + "strh w20, [x28, #1024]", + "ldrh w20, [x4, #2]", + "ubfx w21, w20, #11, #3", + "strb w21, [x28, #747]", + "ubfx w21, w20, #8, #1", + "ubfx w22, w20, #9, #1", + "ubfx w23, w20, #10, #1", + "ubfx w20, w20, #14, #1", + "strb w21, [x28, #744]", + "strb w22, [x28, #745]", + "strb w23, [x28, #746]", + "strb w20, [x28, #750]", + "ldrb w20, [x4, #4]", + "strb w20, [x28, #1026]", "ldr q2, [x4, #32]", "str q2, [x28, #768]", "ldr q2, [x4, #48]", @@ -1709,15 +1708,15 @@ "ldr q2, [x4, #144]", "str q2, [x28, #880]", "b #+0x4c", - "mov w21, #0x0", - "mov w22, #0x37f", - "strh w22, [x28, #1024]", - "strb w21, [x28, #747]", - "strb w21, [x28, #744]", - "strb w21, [x28, #745]", - "strb w21, [x28, #746]", - "strb w21, [x28, #750]", - "strb w21, [x28, #1026]", + "mov w20, #0x0", + "mov w21, #0x37f", + "strh w21, [x28, #1024]", + "strb w20, [x28, #747]", + "strb w20, [x28, #744]", + "strb w20, [x28, #745]", + "strb w20, [x28, #746]", + "strb w20, [x28, #750]", + "strb w20, [x28, #1026]", "movi v2.2d, #0x0", "str q2, [x28, #768]", "str q2, [x28, #784]", @@ -1727,8 +1726,9 @@ "str q2, [x28, #848]", "str q2, [x28, #864]", "str q2, [x28, #880]", - "ubfx x21, x20, #1, #1", - "cbnz x21, #+0x8", + "ldr x20, [x4, #512]", + "ubfx x20, x20, #1, #1", + "cbnz x20, #+0x8", "b #+0x48", "ldr q16, [x4, #160]", "ldr q17, [x4, #176]", @@ -1763,6 +1763,7 @@ "mov v29.16b, v16.16b", "mov v30.16b, v16.16b", "mov v31.16b, v16.16b", + "ldr x20, [x4, #512]", "ubfx x20, x20, #1, #2", "cbnz x20, #+0x8", "b #+0x2c", From 0e990195866e9142bdce5466978da8a4987009ed Mon Sep 17 00:00:00 2001 From: Alyssa Rosenzweig Date: Tue, 2 Apr 2024 12:50:55 -0400 Subject: [PATCH 4/7] ValueDominanceValidation: actually validate Signed-off-by: Alyssa Rosenzweig --- FEXCore/Source/Interface/IR/Passes/ValueDominanceValidation.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/FEXCore/Source/Interface/IR/Passes/ValueDominanceValidation.cpp b/FEXCore/Source/Interface/IR/Passes/ValueDominanceValidation.cpp index adf731a885..530e6aacaa 100644 --- a/FEXCore/Source/Interface/IR/Passes/ValueDominanceValidation.cpp +++ b/FEXCore/Source/Interface/IR/Passes/ValueDominanceValidation.cpp @@ -202,6 +202,7 @@ bool ValueDominanceValidation::Run(IREmitter *IREmit) { FEXCore::IR::Dump(&Out, &CurrentIR, nullptr); Out << "Errors:" << std::endl << Errors.str() << std::endl; LogMan::Msg::EFmt("{}", Out.str()); + LOGMAN_MSG_A_FMT("Encountered IR validation Error"); } return false; From a775e474d59b677650ede77f3fd916b99d74f2d1 Mon Sep 17 00:00:00 2001 From: Alyssa Rosenzweig Date: Tue, 2 Apr 2024 13:11:44 -0400 Subject: [PATCH 5/7] ValueDominanceValidation: do not validate inline constants Signed-off-by: Alyssa Rosenzweig --- .../Interface/IR/Passes/ValueDominanceValidation.cpp | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/FEXCore/Source/Interface/IR/Passes/ValueDominanceValidation.cpp b/FEXCore/Source/Interface/IR/Passes/ValueDominanceValidation.cpp index 530e6aacaa..cd51e824d8 100644 --- a/FEXCore/Source/Interface/IR/Passes/ValueDominanceValidation.cpp +++ b/FEXCore/Source/Interface/IR/Passes/ValueDominanceValidation.cpp @@ -97,7 +97,12 @@ bool ValueDominanceValidation::Run(IREmitter *IREmit) { const uint8_t NumArgs = IR::GetRAArgs(IROp->Op); for (uint32_t i = 0; i < NumArgs; ++i) { if (IROp->Args[i].IsInvalid()) continue; - if (CurrentIR.GetOp(IROp->Args[i])->Op == OP_IRHEADER) continue; + + // We do not validate the location of inline constants because it's + // irrelevant, they're ignored by RA and always inlined to where they + // need to be. This lets us pool inline constants globally. + IROps Op = CurrentIR.GetOp(IROp->Args[i])->Op; + if (Op == OP_IRHEADER || Op == OP_INLINECONSTANT) continue; OrderedNodeWrapper Arg = IROp->Args[i]; From 7b3e0316782044d7a46aff19897044ced585f1af Mon Sep 17 00:00:00 2001 From: Alyssa Rosenzweig Date: Tue, 2 Apr 2024 12:45:08 -0400 Subject: [PATCH 6/7] ValueDominanceValidation: forbid crossblock liveness Now that we have successfully eliminated crossblock liveness from the IR we generate, validate as much to ensure it doesn't come back. We will take advantage of this new invariant in RA in the future. Signed-off-by: Alyssa Rosenzweig --- .../IR/Passes/ValueDominanceValidation.cpp | 108 ++++-------------- 1 file changed, 21 insertions(+), 87 deletions(-) diff --git a/FEXCore/Source/Interface/IR/Passes/ValueDominanceValidation.cpp b/FEXCore/Source/Interface/IR/Passes/ValueDominanceValidation.cpp index cd51e824d8..f40f878b24 100644 --- a/FEXCore/Source/Interface/IR/Passes/ValueDominanceValidation.cpp +++ b/FEXCore/Source/Interface/IR/Passes/ValueDominanceValidation.cpp @@ -106,95 +106,29 @@ bool ValueDominanceValidation::Run(IREmitter *IREmit) { OrderedNodeWrapper Arg = IROp->Args[i]; - // We must ensure domininance of all SSA arguments - if (Arg.ID() >= BlockIROp->Begin.ID() && - Arg.ID() < BlockIROp->Last.ID()) { - // If the SSA argument is defined INSIDE this block - // then it must only be declared prior to this instruction - // Eg: Valid - // CodeBlock_1: - // %_1 = Load - // %_2 = Load - // %_3 = %_1, %_2 - // - // Eg: Invalid - // CodeBlock_1: - // %_1 = Load - // %_2 = %_1, %_3 - // %_3 = Load - if (Arg.ID() > CodeID) { - HadError |= true; - Errors << "Inst %" << CodeID << ": Arg[" << i << "] %" << Arg.ID() << " definition does not dominate this use!" << std::endl; - } + // If the SSA argument is not defined INSIDE the block, we have + // cross-block liveness, which we forbid in the IR to simplify RA. + if (!(Arg.ID() >= BlockIROp->Begin.ID() && + Arg.ID() < BlockIROp->Last.ID())) { + HadError |= true; + Errors << "Inst %" << CodeID << ": Arg[" << i << "] %" << Arg.ID() << " definition not local!" << std::endl; + continue; } - else if (Arg.ID() < BlockIROp->Begin.ID()) { - // If the SSA argument is defined BEFORE this block - // then THIS block needs to be dominated by the flow of blocks up until this point - - // Eg: Valid - // CodeBlock_1: - // %_1 = Load - // %_2 = Load - // Jump %CodeBlock_2 - // - // CodeBlock_2: - // %_3 = %_1, %_2 - // - // Eg: Invalid - // CodeBlock_1: - // %_1 = Load - // %_2 = Load - // Jump %CodeBlock_3 - // - // CodeBlock_2: - // %_3 = %_1, %_2 - // - // CodeBlock_3: - // ... - - // We need to walk the predecessors to see if the value comes from there - fextl::set Predecessors { BlockNode }; - // Recursively gather all predecessors of BlockNode - for (auto NodeIt = Predecessors.begin(); NodeIt != Predecessors.end();) { - auto PredBlock = &OffsetToBlockMap.try_emplace(CurrentIR.GetID(*NodeIt)).first->second; - ++NodeIt; - - for (auto *Pred : PredBlock->Predecessors) { - if (Predecessors.insert(Pred).second) { - // New blocks added, so repeat from the beginning to pull in their predecessors - NodeIt = Predecessors.begin(); - } - } - } - - bool FoundPredDefine = false; - - for (auto* Pred : Predecessors) { - auto PredIROp = CurrentIR.GetOp(Pred); - if (Arg.ID() >= PredIROp->Begin.ID() && - Arg.ID() < PredIROp->Last.ID()) { - FoundPredDefine = true; - break; - } - Errors << "\tChecking Pred %" << CurrentIR.GetID(Pred) << std::endl; - } - - if (!FoundPredDefine) { - HadError |= true; - Errors << "Inst %" << CodeID << ": Arg[" << i << "] %" << Arg.ID() << " definition does not dominate this use! But was defined before this block!" << std::endl; - } - } - else if (Arg.ID() > BlockIROp->Last.ID()) { - // If this SSA argument is defined AFTER this block then it is just completely broken - // Eg: Invalid - // CodeBlock_1: - // %_1 = Load - // %_2 = %_1, %_3 - // Jump %CodeBlock_2 - // - // CodeBlock_2: - // %_3 = Load + // The SSA argument is defined INSIDE this block. + // It must only be declared prior to this instruction + // Eg: Valid + // CodeBlock_1: + // %_1 = Load + // %_2 = Load + // %_3 = %_1, %_2 + // + // Eg: Invalid + // CodeBlock_1: + // %_1 = Load + // %_2 = %_1, %_3 + // %_3 = Load + if (Arg.ID() > CodeID) { HadError |= true; Errors << "Inst %" << CodeID << ": Arg[" << i << "] %" << Arg.ID() << " definition does not dominate this use!" << std::endl; } From 063954c9b381f6b8f8bbfd7f9603c18cf70833ae Mon Sep 17 00:00:00 2001 From: Alyssa Rosenzweig Date: Tue, 2 Apr 2024 12:47:01 -0400 Subject: [PATCH 7/7] ValueDominanceValidation: rm deadcode Signed-off-by: Alyssa Rosenzweig --- .../IR/Passes/ValueDominanceValidation.cpp | 51 ------------------- 1 file changed, 51 deletions(-) diff --git a/FEXCore/Source/Interface/IR/Passes/ValueDominanceValidation.cpp b/FEXCore/Source/Interface/IR/Passes/ValueDominanceValidation.cpp index f40f878b24..fcc261609c 100644 --- a/FEXCore/Source/Interface/IR/Passes/ValueDominanceValidation.cpp +++ b/FEXCore/Source/Interface/IR/Passes/ValueDominanceValidation.cpp @@ -23,13 +23,6 @@ desc: Sanity Checking #include #include -namespace { - struct BlockInfo { - fextl::vector Predecessors; - fextl::vector Successors; - }; -} - namespace FEXCore::IR::Validation { class ValueDominanceValidation final : public FEXCore::IR::Pass { public: @@ -43,50 +36,6 @@ bool ValueDominanceValidation::Run(IREmitter *IREmit) { auto CurrentIR = IREmit->ViewIR(); fextl::ostringstream Errors; - fextl::unordered_map OffsetToBlockMap; - - for (auto [BlockNode, BlockHeader] : CurrentIR.GetBlocks()) { - - BlockInfo *CurrentBlock = &OffsetToBlockMap.try_emplace(CurrentIR.GetID(BlockNode)).first->second; - - for (auto [CodeNode, IROp] : CurrentIR.GetCode(BlockNode)) { - switch (IROp->Op) { - case IR::OP_CONDJUMP: { - auto Op = IROp->CW(); - - OrderedNode *TrueTargetNode = CurrentIR.GetNode(Op->TrueBlock); - OrderedNode *FalseTargetNode = CurrentIR.GetNode(Op->FalseBlock); - - CurrentBlock->Successors.emplace_back(TrueTargetNode); - CurrentBlock->Successors.emplace_back(FalseTargetNode); - - { - auto Block = &OffsetToBlockMap.try_emplace(Op->TrueBlock.ID()).first->second; - Block->Predecessors.emplace_back(BlockNode); - } - - { - auto Block = &OffsetToBlockMap.try_emplace(Op->FalseBlock.ID()).first->second; - Block->Predecessors.emplace_back(BlockNode); - } - - break; - } - case IR::OP_JUMP: { - auto Op = IROp->CW(); - OrderedNode *TargetNode = CurrentIR.GetNode(Op->Header.Args[0]); - CurrentBlock->Successors.emplace_back(TargetNode); - - { - auto Block = OffsetToBlockMap.try_emplace(Op->Header.Args[0].ID()).first; - Block->second.Predecessors.emplace_back(BlockNode); - } - break; - } - default: break; - } - } - } for (auto [BlockNode, BlockHeader] : CurrentIR.GetBlocks()) { auto BlockIROp = BlockHeader->CW();