diff --git a/FEXCore/Source/Interface/Core/OpcodeDispatcher.cpp b/FEXCore/Source/Interface/Core/OpcodeDispatcher.cpp index 12091359d6..96304fd279 100644 --- a/FEXCore/Source/Interface/Core/OpcodeDispatcher.cpp +++ b/FEXCore/Source/Interface/Core/OpcodeDispatcher.cpp @@ -1579,10 +1579,6 @@ void OpDispatchBuilder::SHLOp(OpcodeArgs) { OrderedNode *Result = _Lshl(Size == 64 ? OpSize::i64Bit : OpSize::i32Bit, Dest, Src); StoreResult(GPRClass, Op, Result, -1); - if (Size < 32) { - Result = _Bfe(OpSize::i32Bit, Size, 0, Result); - } - if constexpr (SHL1Bit) { GenerateFlags_ShiftLeftImmediate(Op, Result, Dest, 1); } @@ -1789,9 +1785,6 @@ void OpDispatchBuilder::SHRDOp(OpcodeArgs) { StoreResult(GPRClass, Op, Res, -1); - if (Size != 64) { - Res = _Bfe(OpSize::i64Bit, Size, 0, Res); - } GenerateFlags_ShiftRight(Op, Res, Dest, Shift); } @@ -1928,7 +1921,7 @@ void OpDispatchBuilder::ROROp(OpcodeArgs) { if constexpr (Is1Bit) { GenerateFlags_RotateRightImmediate(Op, ALUOp, Dest, 1); } else { - GenerateFlags_RotateRight(Op, ALUOp, Dest, Src); + GenerateFlags_RotateRight(Op, Src); } } @@ -1974,6 +1967,7 @@ void OpDispatchBuilder::ROLOp(OpcodeArgs) { OrderedNode *Dest = LoadSource(GPRClass, Op, Op->Dest, Op->Flags); const uint32_t Size = GetSrcBitSize(Op); + auto OpSize = Size == 64 ? OpSize::i64Bit : OpSize::i32Bit; // Need to negate the shift so we can use ROR instead if constexpr (Is1Bit) { @@ -1983,33 +1977,28 @@ void OpDispatchBuilder::ROLOp(OpcodeArgs) { } // x86 masks the shift by 0x3F or 0x1F depending on size of op - if (Size == 64) { - Src = _And(OpSize::i64Bit, Src, _Constant(Size, 0x3F)); - } else { - Src = _And(OpSize::i32Bit, Src, _Constant(Size, 0x1F)); - } + Src = _And(OpSize, Src, _Constant(Size, Size == 64 ? 0x3F : 0x1F)); if (Size < 32) { // ARM doesn't support 8/16bit rotates. Emulate with an insert // StoreResult truncates back to a 8/16 bit value - Dest = _Bfi(OpSize::i32Bit, Size, Size, Dest, Dest); + Dest = _Bfi(OpSize, Size, Size, Dest, Dest); if (Size == 8) { // And because the shift size isn't masked to 8 bits, we need to fill the // the full 32bits to get the correct result. - Dest = _Bfi(OpSize::i32Bit, 16, 16, Dest, Dest); + Dest = _Bfi(OpSize, 16, 16, Dest, Dest); } } - auto ALUOp = _Ror(Size == 64 ? OpSize::i64Bit : OpSize::i32Bit, - Dest, - _Sub(Size == 64 ? OpSize::i64Bit : OpSize::i32Bit, _Constant(Size, std::max(32U, Size)), Src)); + // (32 - Size) % 32 = (-Size) % 32. Using Neg over Sub saves a constant. + auto ALUOp = _Ror(OpSize, Dest, _Neg(OpSize, Src)); StoreResult(GPRClass, Op, ALUOp, -1); if constexpr (Is1Bit) { GenerateFlags_RotateLeftImmediate(Op, ALUOp, Dest, 1); } else { - GenerateFlags_RotateLeft(Op, ALUOp, Dest, Src); + GenerateFlags_RotateLeft(Op, Src); } } diff --git a/FEXCore/Source/Interface/Core/OpcodeDispatcher.h b/FEXCore/Source/Interface/Core/OpcodeDispatcher.h index d9f8fb08b4..6e8215e127 100644 --- a/FEXCore/Source/Interface/Core/OpcodeDispatcher.h +++ b/FEXCore/Source/Interface/Core/OpcodeDispatcher.h @@ -1669,7 +1669,7 @@ friend class FEXCore::IR::PassManager; OrderedNode *Src1; } OneSource; - // Logical, LSHL, LSHR, ASHR, ROR, ROL + // Logical, LSHL, LSHR, ASHR struct { OrderedNode *Src1; OrderedNode *Src2; @@ -1688,6 +1688,12 @@ friend class FEXCore::IR::PassManager; bool UpdateCF; } TwoSrcImmediate; + + // ROL, ROR + struct { + X86Tables::DecodedOp Op; + OrderedNode *Src2; + } Decoded; } Sources{}; }; @@ -1785,8 +1791,8 @@ friend class FEXCore::IR::PassManager; void CalculateFlags_ShiftRightImmediateCommon(uint8_t SrcSize, OrderedNode *Res, OrderedNode *Src1, uint64_t Shift); void CalculateFlags_SignShiftRight(uint8_t SrcSize, OrderedNode *Res, OrderedNode *Src1, OrderedNode *Src2); void CalculateFlags_SignShiftRightImmediate(uint8_t SrcSize, OrderedNode *Res, OrderedNode *Src1, uint64_t Shift); - void CalculateFlags_RotateRight(uint8_t SrcSize, OrderedNode *Res, OrderedNode *Src1, OrderedNode *Src2); - void CalculateFlags_RotateLeft(uint8_t SrcSize, OrderedNode *Res, OrderedNode *Src1, OrderedNode *Src2); + void CalculateFlags_RotateRight(uint8_t SrcSize, X86Tables::DecodedOp Op, OrderedNode *Src2); + void CalculateFlags_RotateLeft(uint8_t SrcSize, X86Tables::DecodedOp Op, OrderedNode *Src2); void CalculateFlags_RotateRightImmediate(uint8_t SrcSize, OrderedNode *Res, OrderedNode *Src1, uint64_t Shift); void CalculateFlags_RotateLeftImmediate(uint8_t SrcSize, OrderedNode *Res, OrderedNode *Src1, uint64_t Shift); void CalculateFlags_BEXTR(OrderedNode *Src); @@ -1976,34 +1982,32 @@ friend class FEXCore::IR::PassManager; }; } - void GenerateFlags_RotateRight(FEXCore::X86Tables::DecodedOp Op, OrderedNode *Res, OrderedNode *Src1, OrderedNode *Src2) { + void GenerateFlags_RotateRight(FEXCore::X86Tables::DecodedOp Op, OrderedNode *Src2) { // Doesn't set all the flags, needs to calculate. CalculateDeferredFlags(); CurrentDeferredFlags = DeferredFlagData { .Type = FlagsGenerationType::TYPE_ROR, .SrcSize = GetSrcSize(Op), - .Res = Res, .Sources = { - .TwoSource = { - .Src1 = Src1, + .Decoded = { + .Op = Op, .Src2 = Src2, }, }, }; } - void GenerateFlags_RotateLeft(FEXCore::X86Tables::DecodedOp Op, OrderedNode *Res, OrderedNode *Src1, OrderedNode *Src2) { + void GenerateFlags_RotateLeft(FEXCore::X86Tables::DecodedOp Op, OrderedNode *Src2) { // Doesn't set all the flags, needs to calculate. CalculateDeferredFlags(); CurrentDeferredFlags = DeferredFlagData { .Type = FlagsGenerationType::TYPE_ROL, .SrcSize = GetSrcSize(Op), - .Res = Res, .Sources = { - .TwoSource = { - .Src1 = Src1, + .Decoded = { + .Op = Op, .Src2 = Src2, }, }, diff --git a/FEXCore/Source/Interface/Core/OpcodeDispatcher/Flags.cpp b/FEXCore/Source/Interface/Core/OpcodeDispatcher/Flags.cpp index b8aa8371fa..069450df51 100644 --- a/FEXCore/Source/Interface/Core/OpcodeDispatcher/Flags.cpp +++ b/FEXCore/Source/Interface/Core/OpcodeDispatcher/Flags.cpp @@ -404,9 +404,8 @@ void OpDispatchBuilder::CalculateDeferredFlags(uint32_t FlagsToCalculateMask) { case FlagsGenerationType::TYPE_ROR: CalculateFlags_RotateRight( CurrentDeferredFlags.SrcSize, - CurrentDeferredFlags.Res, - CurrentDeferredFlags.Sources.TwoSource.Src1, - CurrentDeferredFlags.Sources.TwoSource.Src2); + CurrentDeferredFlags.Sources.Decoded.Op, + CurrentDeferredFlags.Sources.Decoded.Src2); break; case FlagsGenerationType::TYPE_RORI: CalculateFlags_RotateRightImmediate( @@ -418,9 +417,8 @@ void OpDispatchBuilder::CalculateDeferredFlags(uint32_t FlagsToCalculateMask) { case FlagsGenerationType::TYPE_ROL: CalculateFlags_RotateLeft( CurrentDeferredFlags.SrcSize, - CurrentDeferredFlags.Res, - CurrentDeferredFlags.Sources.TwoSource.Src1, - CurrentDeferredFlags.Sources.TwoSource.Src2); + CurrentDeferredFlags.Sources.Decoded.Op, + CurrentDeferredFlags.Sources.Decoded.Src2); break; case FlagsGenerationType::TYPE_ROLI: CalculateFlags_RotateLeftImmediate( @@ -835,11 +833,14 @@ void OpDispatchBuilder::CalculateFlags_ShiftRightDoubleImmediate(uint8_t SrcSize } } -void OpDispatchBuilder::CalculateFlags_RotateRight(uint8_t SrcSize, OrderedNode *Res, OrderedNode *Src1, OrderedNode *Src2) { - CalculateFlags_ShiftVariable(Src2, [this, SrcSize, Res](){ +void OpDispatchBuilder::CalculateFlags_RotateRight(uint8_t SrcSize, X86Tables::DecodedOp Op, OrderedNode *Src2) { + CalculateFlags_ShiftVariable(Src2, [this, SrcSize, Op](){ auto SizeBits = SrcSize * 8; const auto OpSize = SrcSize == 8 ? OpSize::i64Bit : OpSize::i32Bit; + // Rematerialize inside block. Only lower SrcSize bits are used. + OrderedNode *Res = LoadSource(GPRClass, Op, Op->Dest, Op->Flags, {.AllowUpperGarbage = true}); + // Ends up faster overall if we don't have FlagM, slower if we do... // If Shift != 1, OF is undefined so we choose to zero here. if (!CTX->HostFeatures.SupportsFlagM) @@ -855,19 +856,20 @@ void OpDispatchBuilder::CalculateFlags_RotateRight(uint8_t SrcSize, OrderedNode }); } -void OpDispatchBuilder::CalculateFlags_RotateLeft(uint8_t SrcSize, OrderedNode *Res, OrderedNode *Src1, OrderedNode *Src2) { - CalculateFlags_ShiftVariable(Src2, [this, SrcSize, Res](){ +void OpDispatchBuilder::CalculateFlags_RotateLeft(uint8_t SrcSize, X86Tables::DecodedOp Op, OrderedNode *Src2) { + CalculateFlags_ShiftVariable(Src2, [this, SrcSize, Op](){ const auto OpSize = SrcSize == 8 ? OpSize::i64Bit : OpSize::i32Bit; auto SizeBits = SrcSize * 8; + // Rematerialize inside block. Only lower SrcSize bits are used. + OrderedNode *Res = LoadSource(GPRClass, Op, Op->Dest, Op->Flags, {.AllowUpperGarbage = true}); + // Ends up faster overall if we don't have FlagM, slower if we do... // If Shift != 1, OF is undefined so we choose to zero here. if (!CTX->HostFeatures.SupportsFlagM) ZeroCV(); // Extract the last bit shifted in to CF - //auto Size = _Constant(GetSrcSize(Res) * 8); - //auto ShiftAmt = _Sub(OpSize::i64Bit, Size, Src2); SetRFLAG(Res, 0, true); // OF is the LSB and MSB XOR'd together. diff --git a/FEXCore/Source/Interface/IR/Passes/ConstProp.cpp b/FEXCore/Source/Interface/IR/Passes/ConstProp.cpp index 192ae6520c..00875371e5 100644 --- a/FEXCore/Source/Interface/IR/Passes/ConstProp.cpp +++ b/FEXCore/Source/Interface/IR/Passes/ConstProp.cpp @@ -865,6 +865,17 @@ bool ConstProp::ConstantPropagation(IREmitter *IREmit, const IRListView& Current } break; } + case OP_NEG: { + auto Op = IROp->CW(); + uint64_t Constant{}; + + if (IREmit->IsValueConstant(Op->Header.Args[0], &Constant)) { + uint64_t NewConstant = -Constant; + IREmit->ReplaceWithConstant(CodeNode, NewConstant); + Changed = true; + } + break; + } case OP_LSHL: { auto Op = IROp->CW(); uint64_t Constant1{}; diff --git a/unittests/InstructionCountCI/FlagM/PrimaryGroup.json b/unittests/InstructionCountCI/FlagM/PrimaryGroup.json index dd7ce4bd1e..8d533a9566 100644 --- a/unittests/InstructionCountCI/FlagM/PrimaryGroup.json +++ b/unittests/InstructionCountCI/FlagM/PrimaryGroup.json @@ -1297,13 +1297,12 @@ ] }, "shl al, 1": { - "ExpectedInstructionCount": 8, + "ExpectedInstructionCount": 7, "Comment": "GROUP2 0xd0 /4", "ExpectedArm64ASM": [ "uxtb w20, w4", - "lsl w21, w20, #1", - "bfxil x4, x21, #0, #8", - "uxtb w26, w21", + "lsl w26, w20, #1", + "bfxil x4, x26, #0, #8", "cmn wzr, w26, lsl #24", "rmif x20, #6, #nzCv", "eor w20, w26, w20", @@ -1475,13 +1474,12 @@ ] }, "shl ax, 1": { - "ExpectedInstructionCount": 8, + "ExpectedInstructionCount": 7, "Comment": "GROUP2 0xd1 /4", "ExpectedArm64ASM": [ "uxth w20, w4", - "lsl w21, w20, #1", - "bfxil x4, x21, #0, #16", - "uxth w26, w21", + "lsl w26, w20, #1", + "bfxil x4, x26, #0, #16", "cmn wzr, w26, lsl #16", "rmif x20, #14, #nzCv", "eor w20, w26, w20", @@ -1585,7 +1583,7 @@ ] }, "rol al, cl": { - "ExpectedInstructionCount": 13, + "ExpectedInstructionCount": 12, "Comment": "GROUP2 0xd2 /0", "ExpectedArm64ASM": [ "uxtb w20, w4", @@ -1593,13 +1591,12 @@ "and w21, w21, #0x1f", "bfi w20, w20, #8, #8", "bfi w20, w20, #16, #16", - "mov w22, #0x20", - "sub w22, w22, w21", + "neg w22, w21", "ror w20, w20, w22", "bfxil x4, x20, #0, #8", "cbz x21, #+0x10", - "rmif x20, #63, #nzCv", - "eor w20, w20, w20, lsr #7", + "rmif x4, #63, #nzCv", + "eor w20, w4, w4, lsr #7", "rmif x20, #0, #nzcV" ] }, @@ -1615,8 +1612,8 @@ "ror w20, w20, w21", "bfxil x4, x20, #0, #8", "cbz x21, #+0x10", - "rmif x20, #6, #nzCv", - "eor w20, w20, w20, lsr #1", + "rmif x4, #6, #nzCv", + "eor w20, w4, w4, lsr #1", "rmif x20, #6, #nzcV" ] }, @@ -1678,14 +1675,13 @@ ] }, "shl al, cl": { - "ExpectedInstructionCount": 14, + "ExpectedInstructionCount": 13, "Comment": "GROUP2 0xd2 /4", "ExpectedArm64ASM": [ "uxtb w20, w4", "uxtb w21, w5", "lsl w22, w20, w21", "bfxil x4, x22, #0, #8", - "uxtb w22, w22", "cbz x21, #+0x24", "cmn wzr, w22, lsl #24", "mov w23, #0x8", @@ -1733,52 +1729,47 @@ ] }, "rol ax, cl": { - "ExpectedInstructionCount": 12, + "ExpectedInstructionCount": 11, "Comment": "GROUP2 0xd3 /0", "ExpectedArm64ASM": [ "uxth w20, w4", "uxth w21, w5", "and w21, w21, #0x1f", "bfi w20, w20, #16, #16", - "mov w22, #0x20", - "sub w22, w22, w21", + "neg w22, w21", "ror w20, w20, w22", "bfxil x4, x20, #0, #16", "cbz x21, #+0x10", - "rmif x20, #63, #nzCv", - "eor w20, w20, w20, lsr #15", + "rmif x4, #63, #nzCv", + "eor w20, w4, w4, lsr #15", "rmif x20, #0, #nzcV" ] }, "rol eax, cl": { - "ExpectedInstructionCount": 11, + "ExpectedInstructionCount": 9, "Comment": "GROUP2 0xd3 /0", "ExpectedArm64ASM": [ "mov w20, w4", "mov w21, w5", "and w21, w21, #0x1f", - "mov w22, #0x20", - "sub w22, w22, w21", - "ror w20, w20, w22", - "mov x4, x20", + "neg w22, w21", + "ror w4, w20, w22", "cbz x21, #+0x10", - "rmif x20, #63, #nzCv", - "eor w20, w20, w20, lsr #31", + "rmif x4, #63, #nzCv", + "eor w20, w4, w4, lsr #31", "rmif x20, #0, #nzcV" ] }, "rol rax, cl": { - "ExpectedInstructionCount": 9, + "ExpectedInstructionCount": 7, "Comment": "GROUP2 0xd3 /0", "ExpectedArm64ASM": [ "and x20, x5, #0x3f", - "mov w21, #0x40", - "sub x21, x21, x20", - "ror x21, x4, x21", - "mov x4, x21", + "neg x21, x20", + "ror x4, x4, x21", "cbz x20, #+0x10", - "rmif x21, #63, #nzCv", - "eor x20, x21, x21, lsr #63", + "rmif x4, #63, #nzCv", + "eor x20, x4, x4, lsr #63", "rmif x20, #0, #nzcV" ] }, @@ -1793,36 +1784,34 @@ "ror w20, w20, w21", "bfxil x4, x20, #0, #16", "cbz x21, #+0x10", - "rmif x20, #14, #nzCv", - "eor w20, w20, w20, lsr #1", + "rmif x4, #14, #nzCv", + "eor w20, w4, w4, lsr #1", "rmif x20, #14, #nzcV" ] }, "ror eax, cl": { - "ExpectedInstructionCount": 9, + "ExpectedInstructionCount": 8, "Comment": "GROUP2 0xd3 /1", "ExpectedArm64ASM": [ "mov w20, w4", "mov w21, w5", "and w21, w21, #0x1f", - "ror w20, w20, w21", - "mov x4, x20", + "ror w4, w20, w21", "cbz x21, #+0x10", - "rmif x20, #30, #nzCv", - "eor w20, w20, w20, lsr #1", + "rmif x4, #30, #nzCv", + "eor w20, w4, w4, lsr #1", "rmif x20, #30, #nzcV" ] }, "ror rax, cl": { - "ExpectedInstructionCount": 7, + "ExpectedInstructionCount": 6, "Comment": "GROUP2 0xd3 /1", "ExpectedArm64ASM": [ "and x20, x5, #0x3f", - "ror x21, x4, x20", - "mov x4, x21", + "ror x4, x4, x20", "cbz x20, #+0x10", - "rmif x21, #62, #nzCv", - "eor x20, x21, x21, lsr #1", + "rmif x4, #62, #nzCv", + "eor x20, x4, x4, lsr #1", "rmif x20, #62, #nzcV" ] }, @@ -1963,14 +1952,13 @@ ] }, "shl ax, cl": { - "ExpectedInstructionCount": 14, + "ExpectedInstructionCount": 13, "Comment": "GROUP2 0xd3 /4", "ExpectedArm64ASM": [ "uxth w20, w4", "uxth w21, w5", "lsl w22, w20, w21", "bfxil x4, x22, #0, #16", - "uxth w22, w22", "cbz x21, #+0x24", "cmn wzr, w22, lsl #16", "mov w23, #0x10", diff --git a/unittests/InstructionCountCI/PrimaryGroup.json b/unittests/InstructionCountCI/PrimaryGroup.json index 66426ee8c4..d1a754ec90 100644 --- a/unittests/InstructionCountCI/PrimaryGroup.json +++ b/unittests/InstructionCountCI/PrimaryGroup.json @@ -1494,13 +1494,12 @@ ] }, "shl al, 1": { - "ExpectedInstructionCount": 12, + "ExpectedInstructionCount": 11, "Comment": "GROUP2 0xd0 /4", "ExpectedArm64ASM": [ "uxtb w20, w4", - "lsl w21, w20, #1", - "bfxil x4, x21, #0, #8", - "uxtb w26, w21", + "lsl w26, w20, #1", + "bfxil x4, x26, #0, #8", "cmn wzr, w26, lsl #24", "mrs x21, nzcv", "ubfx x22, x20, #7, #1", @@ -1761,13 +1760,12 @@ ] }, "shl ax, 1": { - "ExpectedInstructionCount": 12, + "ExpectedInstructionCount": 11, "Comment": "GROUP2 0xd1 /4", "ExpectedArm64ASM": [ "uxth w20, w4", - "lsl w21, w20, #1", - "bfxil x4, x21, #0, #16", - "uxth w26, w21", + "lsl w26, w20, #1", + "bfxil x4, x26, #0, #16", "cmn wzr, w26, lsl #16", "mrs x21, nzcv", "ubfx x22, x20, #15, #1", @@ -1904,7 +1902,7 @@ ] }, "rol al, cl": { - "ExpectedInstructionCount": 18, + "ExpectedInstructionCount": 17, "Comment": "GROUP2 0xd2 /0", "ExpectedArm64ASM": [ "uxtb w20, w4", @@ -1912,18 +1910,17 @@ "and w21, w21, #0x1f", "bfi w20, w20, #8, #8", "bfi w20, w20, #16, #16", - "mov w22, #0x20", - "sub w22, w22, w21", + "neg w22, w21", "ror w20, w20, w22", "bfxil x4, x20, #0, #8", "cbz x21, #+0x24", - "mrs x21, nzcv", - "and w21, w21, #0xc0000000", - "ubfx x22, x20, #0, #1", - "orr w21, w21, w22, lsl #29", - "eor w20, w20, w20, lsr #7", - "ubfx x20, x20, #0, #1", - "orr w20, w21, w20, lsl #28", + "mrs x20, nzcv", + "and w20, w20, #0xc0000000", + "ubfx x21, x4, #0, #1", + "orr w20, w20, w21, lsl #29", + "eor w21, w4, w4, lsr #7", + "ubfx x21, x21, #0, #1", + "orr w20, w20, w21, lsl #28", "msr nzcv, x20" ] }, @@ -1939,13 +1936,13 @@ "ror w20, w20, w21", "bfxil x4, x20, #0, #8", "cbz x21, #+0x24", - "mrs x21, nzcv", - "and w21, w21, #0xc0000000", - "ubfx x22, x20, #7, #1", - "orr w21, w21, w22, lsl #29", - "eor w20, w20, w20, lsr #1", - "ubfx x20, x20, #6, #1", - "orr w20, w21, w20, lsl #28", + "mrs x20, nzcv", + "and w20, w20, #0xc0000000", + "ubfx x21, x4, #7, #1", + "orr w20, w20, w21, lsl #29", + "eor w21, w4, w4, lsr #1", + "ubfx x21, x21, #6, #1", + "orr w20, w20, w21, lsl #28", "msr nzcv, x20" ] }, @@ -2021,14 +2018,13 @@ ] }, "shl al, cl": { - "ExpectedInstructionCount": 18, + "ExpectedInstructionCount": 17, "Comment": "GROUP2 0xd2 /4", "ExpectedArm64ASM": [ "uxtb w20, w4", "uxtb w21, w5", "lsl w22, w20, w21", "bfxil x4, x22, #0, #8", - "uxtb w22, w22", "cbz x21, #+0x34", "cmn wzr, w22, lsl #24", "mrs x23, nzcv", @@ -2087,65 +2083,60 @@ ] }, "rol ax, cl": { - "ExpectedInstructionCount": 17, + "ExpectedInstructionCount": 16, "Comment": "GROUP2 0xd3 /0", "ExpectedArm64ASM": [ "uxth w20, w4", "uxth w21, w5", "and w21, w21, #0x1f", "bfi w20, w20, #16, #16", - "mov w22, #0x20", - "sub w22, w22, w21", + "neg w22, w21", "ror w20, w20, w22", "bfxil x4, x20, #0, #16", "cbz x21, #+0x24", - "mrs x21, nzcv", - "and w21, w21, #0xc0000000", - "ubfx x22, x20, #0, #1", - "orr w21, w21, w22, lsl #29", - "eor w20, w20, w20, lsr #15", - "ubfx x20, x20, #0, #1", - "orr w20, w21, w20, lsl #28", + "mrs x20, nzcv", + "and w20, w20, #0xc0000000", + "ubfx x21, x4, #0, #1", + "orr w20, w20, w21, lsl #29", + "eor w21, w4, w4, lsr #15", + "ubfx x21, x21, #0, #1", + "orr w20, w20, w21, lsl #28", "msr nzcv, x20" ] }, "rol eax, cl": { - "ExpectedInstructionCount": 16, + "ExpectedInstructionCount": 14, "Comment": "GROUP2 0xd3 /0", "ExpectedArm64ASM": [ "mov w20, w4", "mov w21, w5", "and w21, w21, #0x1f", - "mov w22, #0x20", - "sub w22, w22, w21", - "ror w20, w20, w22", - "mov x4, x20", + "neg w22, w21", + "ror w4, w20, w22", "cbz x21, #+0x24", - "mrs x21, nzcv", - "and w21, w21, #0xc0000000", - "ubfx x22, x20, #0, #1", - "orr w21, w21, w22, lsl #29", - "eor w20, w20, w20, lsr #31", - "ubfx x20, x20, #0, #1", - "orr w20, w21, w20, lsl #28", + "mrs x20, nzcv", + "and w20, w20, #0xc0000000", + "ubfx x21, x4, #0, #1", + "orr w20, w20, w21, lsl #29", + "eor w21, w4, w4, lsr #31", + "ubfx x21, x21, #0, #1", + "orr w20, w20, w21, lsl #28", "msr nzcv, x20" ] }, "rol rax, cl": { - "ExpectedInstructionCount": 14, + "ExpectedInstructionCount": 12, "Comment": "GROUP2 0xd3 /0", "ExpectedArm64ASM": [ "and x20, x5, #0x3f", - "mov w21, #0x40", - "sub x21, x21, x20", - "ror x21, x4, x21", - "mov x4, x21", + "neg x21, x20", + "ror x4, x4, x21", "cbz x20, #+0x24", "mrs x20, nzcv", "and w20, w20, #0xc0000000", - "ubfx x22, x21, #0, #1", - "orr w20, w20, w22, lsl #29", - "eor x21, x21, x21, lsr #63", + "ubfx x21, x4, #0, #1", + "orr w20, w20, w21, lsl #29", + "eor x21, x4, x4, lsr #63", "ubfx x21, x21, #0, #1", "orr w20, w20, w21, lsl #28", "msr nzcv, x20" @@ -2162,49 +2153,47 @@ "ror w20, w20, w21", "bfxil x4, x20, #0, #16", "cbz x21, #+0x24", - "mrs x21, nzcv", - "and w21, w21, #0xc0000000", - "ubfx x22, x20, #15, #1", - "orr w21, w21, w22, lsl #29", - "eor w20, w20, w20, lsr #1", - "ubfx x20, x20, #14, #1", - "orr w20, w21, w20, lsl #28", + "mrs x20, nzcv", + "and w20, w20, #0xc0000000", + "ubfx x21, x4, #15, #1", + "orr w20, w20, w21, lsl #29", + "eor w21, w4, w4, lsr #1", + "ubfx x21, x21, #14, #1", + "orr w20, w20, w21, lsl #28", "msr nzcv, x20" ] }, "ror eax, cl": { - "ExpectedInstructionCount": 14, + "ExpectedInstructionCount": 13, "Comment": "GROUP2 0xd3 /1", "ExpectedArm64ASM": [ "mov w20, w4", "mov w21, w5", "and w21, w21, #0x1f", - "ror w20, w20, w21", - "mov x4, x20", + "ror w4, w20, w21", "cbz x21, #+0x24", - "mrs x21, nzcv", - "and w21, w21, #0xc0000000", - "ubfx x22, x20, #31, #1", - "orr w21, w21, w22, lsl #29", - "eor w20, w20, w20, lsr #1", - "ubfx x20, x20, #30, #1", - "orr w20, w21, w20, lsl #28", + "mrs x20, nzcv", + "and w20, w20, #0xc0000000", + "ubfx x21, x4, #31, #1", + "orr w20, w20, w21, lsl #29", + "eor w21, w4, w4, lsr #1", + "ubfx x21, x21, #30, #1", + "orr w20, w20, w21, lsl #28", "msr nzcv, x20" ] }, "ror rax, cl": { - "ExpectedInstructionCount": 12, + "ExpectedInstructionCount": 11, "Comment": "GROUP2 0xd3 /1", "ExpectedArm64ASM": [ "and x20, x5, #0x3f", - "ror x21, x4, x20", - "mov x4, x21", + "ror x4, x4, x20", "cbz x20, #+0x24", "mrs x20, nzcv", "and w20, w20, #0xc0000000", - "lsr x22, x21, #63", - "orr w20, w20, w22, lsl #29", - "eor x21, x21, x21, lsr #1", + "lsr x21, x4, #63", + "orr w20, w20, w21, lsl #29", + "eor x21, x4, x4, lsr #1", "ubfx x21, x21, #62, #1", "orr w20, w20, w21, lsl #28", "msr nzcv, x20" @@ -2389,14 +2378,13 @@ ] }, "shl ax, cl": { - "ExpectedInstructionCount": 18, + "ExpectedInstructionCount": 17, "Comment": "GROUP2 0xd3 /4", "ExpectedArm64ASM": [ "uxth w20, w4", "uxth w21, w5", "lsl w22, w20, w21", "bfxil x4, x22, #0, #16", - "uxth w22, w22", "cbz x21, #+0x34", "cmn wzr, w22, lsl #16", "mrs x23, nzcv",