diff --git a/FEXCore/Source/Interface/Core/Core.cpp b/FEXCore/Source/Interface/Core/Core.cpp index 47f9fd661d..d0ba5a42ed 100644 --- a/FEXCore/Source/Interface/Core/Core.cpp +++ b/FEXCore/Source/Interface/Core/Core.cpp @@ -579,7 +579,7 @@ namespace FEXCore::Context { auto CodeChanged = Thread->OpDispatcher->_ValidateCode(ExistingCodePtr[0], ExistingCodePtr[1], (uintptr_t)ExistingCodePtr - GuestRIP, DecodedInfo->InstSize); - auto InvalidateCodeCond = Thread->OpDispatcher->_CondJump(CodeChanged); + auto InvalidateCodeCond = Thread->OpDispatcher->CondJump(CodeChanged); auto CurrentBlock = Thread->OpDispatcher->GetCurrentBlock(); auto CodeWasChangedBlock = Thread->OpDispatcher->CreateNewCodeBlockAtEnd(); diff --git a/FEXCore/Source/Interface/Core/OpcodeDispatcher/Vector.cpp b/FEXCore/Source/Interface/Core/OpcodeDispatcher/Vector.cpp index 58e28ee121..4bff1fb53b 100644 --- a/FEXCore/Source/Interface/Core/OpcodeDispatcher/Vector.cpp +++ b/FEXCore/Source/Interface/Core/OpcodeDispatcher/Vector.cpp @@ -3008,10 +3008,10 @@ OrderedNode *OpDispatchBuilder::XSaveBase(X86Tables::DecodedOp Op) { void OpDispatchBuilder::XSaveOpImpl(OpcodeArgs) { // NOTE: Mask should be EAX and EDX concatenated, but we only need to test // for features that are in the lower 32 bits, so EAX only is sufficient. - OrderedNode *Mask = LoadGPRRegister(X86State::REG_RAX); const auto OpSize = IR::SizeToOpSize(CTX->GetGPRSize()); - const auto StoreIfFlagSet = [&](uint32_t BitIndex, auto fn, uint32_t FieldSize = 1){ + const auto StoreIfFlagSet = [this, OpSize](uint32_t BitIndex, auto fn, uint32_t FieldSize = 1){ + OrderedNode *Mask = LoadGPRRegister(X86State::REG_RAX); OrderedNode *BitFlag = _Bfe(OpSize, FieldSize, BitIndex, Mask); auto CondJump_ = CondJump(BitFlag, {COND_NEQ}); @@ -3055,6 +3055,7 @@ void OpDispatchBuilder::XSaveOpImpl(OpcodeArgs) { OrderedNode *HeaderOffset = _Add(OpSize, Base, _Constant(512)); // NOTE: We currently only support the first 3 bits (x87, SSE, and AVX) + OrderedNode *Mask = LoadGPRRegister(X86State::REG_RAX); OrderedNode *RequestedFeatures = _Bfe(OpSize, 3, 0, Mask); // XSTATE_BV section of the header is 8 bytes in size, but we only really @@ -3209,17 +3210,18 @@ void OpDispatchBuilder::FXRStoreOp(OpcodeArgs) { void OpDispatchBuilder::XRstorOpImpl(OpcodeArgs) { const auto OpSize = IR::SizeToOpSize(CTX->GetGPRSize()); - // Set up base address for the XSAVE region to restore from, and also read the - // XSTATE_BV bit flags out of the XSTATE header. - // - // Note: we rematerialize Base in each block to avoid crossblock liveness. - OrderedNode *Base = XSaveBase(Op); - OrderedNode *Mask = _LoadMem(GPRClass, 8, _Add(OpSize, Base, _Constant(512)), 8); - // If a bit in our XSTATE_BV is set, then we restore from that region of the XSAVE area, // otherwise, if not set, then we need to set the relevant data the bit corresponds to // to it's defined initial configuration. - const auto RestoreIfFlagSetOrDefault = [&](uint32_t BitIndex, auto restore_fn, auto default_fn, uint32_t FieldSize = 1){ + const auto RestoreIfFlagSetOrDefault = [this, Op, OpSize](uint32_t BitIndex, auto restore_fn, auto default_fn, uint32_t FieldSize = 1){ + // Set up base address for the XSAVE region to restore from, and also read + // the XSTATE_BV bit flags out of the XSTATE header. + // + // Note: we rematerialize Base/Mask in each block to avoid crossblock + // liveness. + OrderedNode *Base = XSaveBase(Op); + OrderedNode *Mask = _LoadMem(GPRClass, 8, _Add(OpSize, Base, _Constant(512)), 8); + OrderedNode *BitFlag = _Bfe(OpSize, FieldSize, BitIndex, Mask); auto CondJump_ = CondJump(BitFlag, {COND_NEQ}); diff --git a/FEXCore/Source/Interface/IR/Passes/ValueDominanceValidation.cpp b/FEXCore/Source/Interface/IR/Passes/ValueDominanceValidation.cpp index adf731a885..fcc261609c 100644 --- a/FEXCore/Source/Interface/IR/Passes/ValueDominanceValidation.cpp +++ b/FEXCore/Source/Interface/IR/Passes/ValueDominanceValidation.cpp @@ -23,13 +23,6 @@ desc: Sanity Checking #include #include -namespace { - struct BlockInfo { - fextl::vector Predecessors; - fextl::vector Successors; - }; -} - namespace FEXCore::IR::Validation { class ValueDominanceValidation final : public FEXCore::IR::Pass { public: @@ -43,50 +36,6 @@ bool ValueDominanceValidation::Run(IREmitter *IREmit) { auto CurrentIR = IREmit->ViewIR(); fextl::ostringstream Errors; - fextl::unordered_map OffsetToBlockMap; - - for (auto [BlockNode, BlockHeader] : CurrentIR.GetBlocks()) { - - BlockInfo *CurrentBlock = &OffsetToBlockMap.try_emplace(CurrentIR.GetID(BlockNode)).first->second; - - for (auto [CodeNode, IROp] : CurrentIR.GetCode(BlockNode)) { - switch (IROp->Op) { - case IR::OP_CONDJUMP: { - auto Op = IROp->CW(); - - OrderedNode *TrueTargetNode = CurrentIR.GetNode(Op->TrueBlock); - OrderedNode *FalseTargetNode = CurrentIR.GetNode(Op->FalseBlock); - - CurrentBlock->Successors.emplace_back(TrueTargetNode); - CurrentBlock->Successors.emplace_back(FalseTargetNode); - - { - auto Block = &OffsetToBlockMap.try_emplace(Op->TrueBlock.ID()).first->second; - Block->Predecessors.emplace_back(BlockNode); - } - - { - auto Block = &OffsetToBlockMap.try_emplace(Op->FalseBlock.ID()).first->second; - Block->Predecessors.emplace_back(BlockNode); - } - - break; - } - case IR::OP_JUMP: { - auto Op = IROp->CW(); - OrderedNode *TargetNode = CurrentIR.GetNode(Op->Header.Args[0]); - CurrentBlock->Successors.emplace_back(TargetNode); - - { - auto Block = OffsetToBlockMap.try_emplace(Op->Header.Args[0].ID()).first; - Block->second.Predecessors.emplace_back(BlockNode); - } - break; - } - default: break; - } - } - } for (auto [BlockNode, BlockHeader] : CurrentIR.GetBlocks()) { auto BlockIROp = BlockHeader->CW(); @@ -97,99 +46,38 @@ bool ValueDominanceValidation::Run(IREmitter *IREmit) { const uint8_t NumArgs = IR::GetRAArgs(IROp->Op); for (uint32_t i = 0; i < NumArgs; ++i) { if (IROp->Args[i].IsInvalid()) continue; - if (CurrentIR.GetOp(IROp->Args[i])->Op == OP_IRHEADER) continue; + + // We do not validate the location of inline constants because it's + // irrelevant, they're ignored by RA and always inlined to where they + // need to be. This lets us pool inline constants globally. + IROps Op = CurrentIR.GetOp(IROp->Args[i])->Op; + if (Op == OP_IRHEADER || Op == OP_INLINECONSTANT) continue; OrderedNodeWrapper Arg = IROp->Args[i]; - // We must ensure domininance of all SSA arguments - if (Arg.ID() >= BlockIROp->Begin.ID() && - Arg.ID() < BlockIROp->Last.ID()) { - // If the SSA argument is defined INSIDE this block - // then it must only be declared prior to this instruction - // Eg: Valid - // CodeBlock_1: - // %_1 = Load - // %_2 = Load - // %_3 = %_1, %_2 - // - // Eg: Invalid - // CodeBlock_1: - // %_1 = Load - // %_2 = %_1, %_3 - // %_3 = Load - if (Arg.ID() > CodeID) { - HadError |= true; - Errors << "Inst %" << CodeID << ": Arg[" << i << "] %" << Arg.ID() << " definition does not dominate this use!" << std::endl; - } - } - else if (Arg.ID() < BlockIROp->Begin.ID()) { - // If the SSA argument is defined BEFORE this block - // then THIS block needs to be dominated by the flow of blocks up until this point - - // Eg: Valid - // CodeBlock_1: - // %_1 = Load - // %_2 = Load - // Jump %CodeBlock_2 - // - // CodeBlock_2: - // %_3 = %_1, %_2 - // - // Eg: Invalid - // CodeBlock_1: - // %_1 = Load - // %_2 = Load - // Jump %CodeBlock_3 - // - // CodeBlock_2: - // %_3 = %_1, %_2 - // - // CodeBlock_3: - // ... - - // We need to walk the predecessors to see if the value comes from there - fextl::set Predecessors { BlockNode }; - // Recursively gather all predecessors of BlockNode - for (auto NodeIt = Predecessors.begin(); NodeIt != Predecessors.end();) { - auto PredBlock = &OffsetToBlockMap.try_emplace(CurrentIR.GetID(*NodeIt)).first->second; - ++NodeIt; - - for (auto *Pred : PredBlock->Predecessors) { - if (Predecessors.insert(Pred).second) { - // New blocks added, so repeat from the beginning to pull in their predecessors - NodeIt = Predecessors.begin(); - } - } - } - - bool FoundPredDefine = false; - - for (auto* Pred : Predecessors) { - auto PredIROp = CurrentIR.GetOp(Pred); - - if (Arg.ID() >= PredIROp->Begin.ID() && - Arg.ID() < PredIROp->Last.ID()) { - FoundPredDefine = true; - break; - } - Errors << "\tChecking Pred %" << CurrentIR.GetID(Pred) << std::endl; - } - - if (!FoundPredDefine) { - HadError |= true; - Errors << "Inst %" << CodeID << ": Arg[" << i << "] %" << Arg.ID() << " definition does not dominate this use! But was defined before this block!" << std::endl; - } + // If the SSA argument is not defined INSIDE the block, we have + // cross-block liveness, which we forbid in the IR to simplify RA. + if (!(Arg.ID() >= BlockIROp->Begin.ID() && + Arg.ID() < BlockIROp->Last.ID())) { + HadError |= true; + Errors << "Inst %" << CodeID << ": Arg[" << i << "] %" << Arg.ID() << " definition not local!" << std::endl; + continue; } - else if (Arg.ID() > BlockIROp->Last.ID()) { - // If this SSA argument is defined AFTER this block then it is just completely broken - // Eg: Invalid - // CodeBlock_1: - // %_1 = Load - // %_2 = %_1, %_3 - // Jump %CodeBlock_2 - // - // CodeBlock_2: - // %_3 = Load + + // The SSA argument is defined INSIDE this block. + // It must only be declared prior to this instruction + // Eg: Valid + // CodeBlock_1: + // %_1 = Load + // %_2 = Load + // %_3 = %_1, %_2 + // + // Eg: Invalid + // CodeBlock_1: + // %_1 = Load + // %_2 = %_1, %_3 + // %_3 = Load + if (Arg.ID() > CodeID) { HadError |= true; Errors << "Inst %" << CodeID << ": Arg[" << i << "] %" << Arg.ID() << " definition does not dominate this use!" << std::endl; } @@ -202,6 +90,7 @@ bool ValueDominanceValidation::Run(IREmitter *IREmit) { FEXCore::IR::Dump(&Out, &CurrentIR, nullptr); Out << "Errors:" << std::endl << Errors.str() << std::endl; LogMan::Msg::EFmt("{}", Out.str()); + LOGMAN_MSG_A_FMT("Encountered IR validation Error"); } return false; diff --git a/unittests/InstructionCountCI/FlagM/SecondaryGroup.json b/unittests/InstructionCountCI/FlagM/SecondaryGroup.json index 0eaf853d60..1a465a88f6 100644 --- a/unittests/InstructionCountCI/FlagM/SecondaryGroup.json +++ b/unittests/InstructionCountCI/FlagM/SecondaryGroup.json @@ -1403,29 +1403,28 @@ ] }, "xsave [rax]": { - "ExpectedInstructionCount": 70, + "ExpectedInstructionCount": 69, "Comment": "GROUP15 0x0F 0xAE /4", "ExpectedArm64ASM": [ - "mov x20, x4", - "ubfx x21, x20, #0, #1", - "cbnz x21, #+0x8", + "ubfx x20, x4, #0, #1", + "cbnz x20, #+0x8", "b #+0x84", - "ldrh w21, [x28, #1024]", - "strh w21, [x4]", - "mov w21, #0x0", - "ldrb w22, [x28, #747]", - "bfi x21, x22, #11, #3", - "ldrb w22, [x28, #744]", - "ldrb w23, [x28, #745]", - "ldrb w24, [x28, #746]", - "ldrb w25, [x28, #750]", - "orr x21, x21, x22, lsl #8", - "orr x21, x21, x23, lsl #9", - "orr x21, x21, x24, lsl #10", - "orr x21, x21, x25, lsl #14", - "strh w21, [x4, #2]", - "ldrb w21, [x28, #1026]", - "strb w21, [x4, #4]", + "ldrh w20, [x28, #1024]", + "strh w20, [x4]", + "mov w20, #0x0", + "ldrb w21, [x28, #747]", + "bfi x20, x21, #11, #3", + "ldrb w21, [x28, #744]", + "ldrb w22, [x28, #745]", + "ldrb w23, [x28, #746]", + "ldrb w24, [x28, #750]", + "orr x20, x20, x21, lsl #8", + "orr x20, x20, x22, lsl #9", + "orr x20, x20, x23, lsl #10", + "orr x20, x20, x24, lsl #14", + "strh w20, [x4, #2]", + "ldrb w20, [x28, #1026]", + "strb w20, [x4, #4]", "ldr q2, [x28, #768]", "str q2, [x4, #32]", "ldr q2, [x28, #784]", @@ -1442,8 +1441,8 @@ "str q2, [x4, #128]", "ldr q2, [x28, #880]", "str q2, [x4, #144]", - "ubfx x21, x20, #1, #1", - "cbnz x21, #+0x8", + "ubfx x20, x4, #1, #1", + "cbnz x20, #+0x8", "b #+0x44", "str q16, [x4, #160]", "str q17, [x4, #176]", @@ -1461,20 +1460,20 @@ "str q29, [x4, #368]", "str q30, [x4, #384]", "str q31, [x4, #400]", - "ubfx x21, x20, #1, #2", - "cbnz x21, #+0x8", + "ubfx x20, x4, #1, #2", + "cbnz x20, #+0x8", "b #+0x2c", - "mov w21, #0x1f80", - "mrs x22, fpcr", - "ubfx x22, x22, #22, #3", - "rbit w0, w22", - "bfi x22, x0, #30, #2", - "bfi w21, w22, #13, #3", - "add x22, x4, #0x18 (24)", - "str w21, [x4, #24]", - "mov w21, #0xffff", - "str w21, [x22, #4]", - "ubfx x20, x20, #0, #3", + "mov w20, #0x1f80", + "mrs x21, fpcr", + "ubfx x21, x21, #22, #3", + "rbit w0, w21", + "bfi x21, x0, #30, #2", + "bfi w20, w21, #13, #3", + "add x21, x4, #0x18 (24)", + "str w20, [x4, #24]", + "mov w20, #0xffff", + "str w20, [x21, #4]", + "ubfx x20, x4, #0, #3", "str x20, [x4, #512]" ] }, @@ -1486,28 +1485,28 @@ ] }, "xrstor [rax]": { - "ExpectedInstructionCount": 103, + "ExpectedInstructionCount": 105, "Comment": "GROUP15 0x0F 0xAE /5", "ExpectedArm64ASM": [ "ldr x20, [x4, #512]", - "ubfx x21, x20, #0, #1", - "cbnz x21, #+0x8", + "ubfx x20, x20, #0, #1", + "cbnz x20, #+0x8", "b #+0x84", - "ldrh w21, [x4]", - "strh w21, [x28, #1024]", - "ldrh w21, [x4, #2]", - "ubfx w22, w21, #11, #3", - "strb w22, [x28, #747]", - "ubfx w22, w21, #8, #1", - "ubfx w23, w21, #9, #1", - "ubfx w24, w21, #10, #1", - "ubfx w21, w21, #14, #1", - "strb w22, [x28, #744]", - "strb w23, [x28, #745]", - "strb w24, [x28, #746]", - "strb w21, [x28, #750]", - "ldrb w21, [x4, #4]", - "strb w21, [x28, #1026]", + "ldrh w20, [x4]", + "strh w20, [x28, #1024]", + "ldrh w20, [x4, #2]", + "ubfx w21, w20, #11, #3", + "strb w21, [x28, #747]", + "ubfx w21, w20, #8, #1", + "ubfx w22, w20, #9, #1", + "ubfx w23, w20, #10, #1", + "ubfx w20, w20, #14, #1", + "strb w21, [x28, #744]", + "strb w22, [x28, #745]", + "strb w23, [x28, #746]", + "strb w20, [x28, #750]", + "ldrb w20, [x4, #4]", + "strb w20, [x28, #1026]", "ldr q2, [x4, #32]", "str q2, [x28, #768]", "ldr q2, [x4, #48]", @@ -1525,15 +1524,15 @@ "ldr q2, [x4, #144]", "str q2, [x28, #880]", "b #+0x4c", - "mov w21, #0x0", - "mov w22, #0x37f", - "strh w22, [x28, #1024]", - "strb w21, [x28, #747]", - "strb w21, [x28, #744]", - "strb w21, [x28, #745]", - "strb w21, [x28, #746]", - "strb w21, [x28, #750]", - "strb w21, [x28, #1026]", + "mov w20, #0x0", + "mov w21, #0x37f", + "strh w21, [x28, #1024]", + "strb w20, [x28, #747]", + "strb w20, [x28, #744]", + "strb w20, [x28, #745]", + "strb w20, [x28, #746]", + "strb w20, [x28, #750]", + "strb w20, [x28, #1026]", "movi v2.2d, #0x0", "str q2, [x28, #768]", "str q2, [x28, #784]", @@ -1543,8 +1542,9 @@ "str q2, [x28, #848]", "str q2, [x28, #864]", "str q2, [x28, #880]", - "ubfx x21, x20, #1, #1", - "cbnz x21, #+0x8", + "ldr x20, [x4, #512]", + "ubfx x20, x20, #1, #1", + "cbnz x20, #+0x8", "b #+0x48", "ldr q16, [x4, #160]", "ldr q17, [x4, #176]", @@ -1579,6 +1579,7 @@ "mov v29.16b, v16.16b", "mov v30.16b, v16.16b", "mov v31.16b, v16.16b", + "ldr x20, [x4, #512]", "ubfx x20, x20, #1, #2", "cbnz x20, #+0x8", "b #+0x2c", diff --git a/unittests/InstructionCountCI/SecondaryGroup.json b/unittests/InstructionCountCI/SecondaryGroup.json index 4ad5d8012d..c0a7059233 100644 --- a/unittests/InstructionCountCI/SecondaryGroup.json +++ b/unittests/InstructionCountCI/SecondaryGroup.json @@ -1587,29 +1587,28 @@ ] }, "xsave [rax]": { - "ExpectedInstructionCount": 70, + "ExpectedInstructionCount": 69, "Comment": "GROUP15 0x0F 0xAE /4", "ExpectedArm64ASM": [ - "mov x20, x4", - "ubfx x21, x20, #0, #1", - "cbnz x21, #+0x8", + "ubfx x20, x4, #0, #1", + "cbnz x20, #+0x8", "b #+0x84", - "ldrh w21, [x28, #1024]", - "strh w21, [x4]", - "mov w21, #0x0", - "ldrb w22, [x28, #747]", - "bfi x21, x22, #11, #3", - "ldrb w22, [x28, #744]", - "ldrb w23, [x28, #745]", - "ldrb w24, [x28, #746]", - "ldrb w25, [x28, #750]", - "orr x21, x21, x22, lsl #8", - "orr x21, x21, x23, lsl #9", - "orr x21, x21, x24, lsl #10", - "orr x21, x21, x25, lsl #14", - "strh w21, [x4, #2]", - "ldrb w21, [x28, #1026]", - "strb w21, [x4, #4]", + "ldrh w20, [x28, #1024]", + "strh w20, [x4]", + "mov w20, #0x0", + "ldrb w21, [x28, #747]", + "bfi x20, x21, #11, #3", + "ldrb w21, [x28, #744]", + "ldrb w22, [x28, #745]", + "ldrb w23, [x28, #746]", + "ldrb w24, [x28, #750]", + "orr x20, x20, x21, lsl #8", + "orr x20, x20, x22, lsl #9", + "orr x20, x20, x23, lsl #10", + "orr x20, x20, x24, lsl #14", + "strh w20, [x4, #2]", + "ldrb w20, [x28, #1026]", + "strb w20, [x4, #4]", "ldr q2, [x28, #768]", "str q2, [x4, #32]", "ldr q2, [x28, #784]", @@ -1626,8 +1625,8 @@ "str q2, [x4, #128]", "ldr q2, [x28, #880]", "str q2, [x4, #144]", - "ubfx x21, x20, #1, #1", - "cbnz x21, #+0x8", + "ubfx x20, x4, #1, #1", + "cbnz x20, #+0x8", "b #+0x44", "str q16, [x4, #160]", "str q17, [x4, #176]", @@ -1645,20 +1644,20 @@ "str q29, [x4, #368]", "str q30, [x4, #384]", "str q31, [x4, #400]", - "ubfx x21, x20, #1, #2", - "cbnz x21, #+0x8", + "ubfx x20, x4, #1, #2", + "cbnz x20, #+0x8", "b #+0x2c", - "mov w21, #0x1f80", - "mrs x22, fpcr", - "ubfx x22, x22, #22, #3", - "rbit w0, w22", - "bfi x22, x0, #30, #2", - "bfi w21, w22, #13, #3", - "add x22, x4, #0x18 (24)", - "str w21, [x4, #24]", - "mov w21, #0xffff", - "str w21, [x22, #4]", - "ubfx x20, x20, #0, #3", + "mov w20, #0x1f80", + "mrs x21, fpcr", + "ubfx x21, x21, #22, #3", + "rbit w0, w21", + "bfi x21, x0, #30, #2", + "bfi w20, w21, #13, #3", + "add x21, x4, #0x18 (24)", + "str w20, [x4, #24]", + "mov w20, #0xffff", + "str w20, [x21, #4]", + "ubfx x20, x4, #0, #3", "str x20, [x4, #512]" ] }, @@ -1670,28 +1669,28 @@ ] }, "xrstor [rax]": { - "ExpectedInstructionCount": 103, + "ExpectedInstructionCount": 105, "Comment": "GROUP15 0x0F 0xAE /5", "ExpectedArm64ASM": [ "ldr x20, [x4, #512]", - "ubfx x21, x20, #0, #1", - "cbnz x21, #+0x8", + "ubfx x20, x20, #0, #1", + "cbnz x20, #+0x8", "b #+0x84", - "ldrh w21, [x4]", - "strh w21, [x28, #1024]", - "ldrh w21, [x4, #2]", - "ubfx w22, w21, #11, #3", - "strb w22, [x28, #747]", - "ubfx w22, w21, #8, #1", - "ubfx w23, w21, #9, #1", - "ubfx w24, w21, #10, #1", - "ubfx w21, w21, #14, #1", - "strb w22, [x28, #744]", - "strb w23, [x28, #745]", - "strb w24, [x28, #746]", - "strb w21, [x28, #750]", - "ldrb w21, [x4, #4]", - "strb w21, [x28, #1026]", + "ldrh w20, [x4]", + "strh w20, [x28, #1024]", + "ldrh w20, [x4, #2]", + "ubfx w21, w20, #11, #3", + "strb w21, [x28, #747]", + "ubfx w21, w20, #8, #1", + "ubfx w22, w20, #9, #1", + "ubfx w23, w20, #10, #1", + "ubfx w20, w20, #14, #1", + "strb w21, [x28, #744]", + "strb w22, [x28, #745]", + "strb w23, [x28, #746]", + "strb w20, [x28, #750]", + "ldrb w20, [x4, #4]", + "strb w20, [x28, #1026]", "ldr q2, [x4, #32]", "str q2, [x28, #768]", "ldr q2, [x4, #48]", @@ -1709,15 +1708,15 @@ "ldr q2, [x4, #144]", "str q2, [x28, #880]", "b #+0x4c", - "mov w21, #0x0", - "mov w22, #0x37f", - "strh w22, [x28, #1024]", - "strb w21, [x28, #747]", - "strb w21, [x28, #744]", - "strb w21, [x28, #745]", - "strb w21, [x28, #746]", - "strb w21, [x28, #750]", - "strb w21, [x28, #1026]", + "mov w20, #0x0", + "mov w21, #0x37f", + "strh w21, [x28, #1024]", + "strb w20, [x28, #747]", + "strb w20, [x28, #744]", + "strb w20, [x28, #745]", + "strb w20, [x28, #746]", + "strb w20, [x28, #750]", + "strb w20, [x28, #1026]", "movi v2.2d, #0x0", "str q2, [x28, #768]", "str q2, [x28, #784]", @@ -1727,8 +1726,9 @@ "str q2, [x28, #848]", "str q2, [x28, #864]", "str q2, [x28, #880]", - "ubfx x21, x20, #1, #1", - "cbnz x21, #+0x8", + "ldr x20, [x4, #512]", + "ubfx x20, x20, #1, #1", + "cbnz x20, #+0x8", "b #+0x48", "ldr q16, [x4, #160]", "ldr q17, [x4, #176]", @@ -1763,6 +1763,7 @@ "mov v29.16b, v16.16b", "mov v30.16b, v16.16b", "mov v31.16b, v16.16b", + "ldr x20, [x4, #512]", "ubfx x20, x20, #1, #2", "cbnz x20, #+0x8", "b #+0x2c",