diff --git a/FEXCore/Source/Interface/Core/OpcodeDispatcher.cpp b/FEXCore/Source/Interface/Core/OpcodeDispatcher.cpp index 12091359d6..b570abd7ad 100644 --- a/FEXCore/Source/Interface/Core/OpcodeDispatcher.cpp +++ b/FEXCore/Source/Interface/Core/OpcodeDispatcher.cpp @@ -1891,165 +1891,71 @@ void OpDispatchBuilder::ASHRImmediateOp(OpcodeArgs) { GenerateFlags_SignShiftRightImmediate(Op, Result, Dest, Shift); } -template -void OpDispatchBuilder::ROROp(OpcodeArgs) { - OrderedNode *Src; - OrderedNode *Dest = LoadSource(GPRClass, Op, Op->Dest, Op->Flags); - - const uint32_t Size = GetSrcBitSize(Op); - if constexpr (Is1Bit) { - Src = _Constant(std::max(32U, Size), 1); - } else { - Src = LoadSource(GPRClass, Op, Op->Src[1], Op->Flags); - } - - // x86 masks the shift by 0x3F or 0x1F depending on size of op - if (Size == 64) { - Src = _And(OpSize::i64Bit, Src, _Constant(Size, 0x3F)); - } else { - Src = _And(OpSize::i32Bit, Src, _Constant(Size, 0x1F)); - } - - if (Size < 32) { - // ARM doesn't support 8/16bit rotates. Emulate with an insert - // StoreResult truncates back to a 8/16 bit value - Dest = _Bfi(OpSize::i32Bit, Size, Size, Dest, Dest); - if (Size == 8 && !Is1Bit) { - // And because the shift size isn't masked to 8 bits, we need to fill the - // the full 32bits to get the correct result. - Dest = _Bfi(OpSize::i32Bit, 16, 16, Dest, Dest); - } - } - - auto ALUOp = _Ror(Size == 64 ? OpSize::i64Bit : OpSize::i32Bit, Dest, Src); - - StoreResult(GPRClass, Op, ALUOp, -1); - - if constexpr (Is1Bit) { - GenerateFlags_RotateRightImmediate(Op, ALUOp, Dest, 1); - } else { - GenerateFlags_RotateRight(Op, ALUOp, Dest, Src); - } -} - -void OpDispatchBuilder::RORImmediateOp(OpcodeArgs) { - // See ROLImmediateOp for masking explanation - OrderedNode *Dest = LoadSource(GPRClass, Op, Op->Dest, Op->Flags, {.AllowUpperGarbage = true}); - - LOGMAN_THROW_A_FMT(Op->Src[1].IsLiteral(), "Src1 needs to be literal here"); - - uint64_t Shift = Op->Src[1].Data.Literal.Value; - const uint32_t Size = GetSrcBitSize(Op); - - // x86 masks the shift by 0x3F or 0x1F depending on size of op - if (Size == 64) { - Shift &= 0x3F; - } else { - Shift &= 0x1F; - } +template +void OpDispatchBuilder::RotateOp(OpcodeArgs) { + CalculateDeferredFlags(); - OrderedNode *Src = _Constant(std::max(32U, Size), Shift); + auto LoadShift = [this, Op](bool MustMask) -> OrderedNode * { + // x86 masks the shift by 0x3F or 0x1F depending on size of op + const uint32_t Size = GetSrcBitSize(Op); + uint64_t Mask = Size == 64 ? 0x3F : 0x1F; - if (Size < 32) { - // ARM doesn't support 8/16bit rotates. Emulate with an insert - // StoreResult truncates back to a 8/16 bit value - Dest = _Bfi(OpSize::i32Bit, Size, Size, Dest, Dest); - if (Size == 8 && Shift > 8) { - // And because the shift size isn't masked to 8 bits, we need to fill the - // the full 32bits to get the correct result. - Dest = _Bfi(OpSize::i32Bit, 16, 16, Dest, Dest); + if (Is1Bit) { + return _Constant(1); + } else if (IsImmediate) { + LOGMAN_THROW_A_FMT(Op->Src[1].IsLiteral(), "Src1 needs to be literal here"); + return _Constant(Op->Src[1].Data.Literal.Value & Mask); + } else { + auto Src = LoadSource(GPRClass, Op, Op->Src[1], Op->Flags, {.AllowUpperGarbage = true}); + return MustMask ? _And(OpSize::i64Bit, Src, _Constant(Mask)) : Src; } - } - - auto ALUOp = _Ror(Size == 64 ? OpSize::i64Bit : OpSize::i32Bit, Dest, Src); - - StoreResult(GPRClass, Op, ALUOp, -1); - - GenerateFlags_RotateRightImmediate(Op, ALUOp, Dest, Shift); -} + }; -template -void OpDispatchBuilder::ROLOp(OpcodeArgs) { - OrderedNode *Src; - OrderedNode *Dest = LoadSource(GPRClass, Op, Op->Dest, Op->Flags); + Calculate_ShiftVariable(LoadShift(true), [this, LoadShift, Op](){ + const uint32_t Size = GetSrcBitSize(Op); + const auto OpSize = Size == 64 ? OpSize::i64Bit : OpSize::i32Bit; - const uint32_t Size = GetSrcBitSize(Op); + // We don't need to mask when we rematerialize since the Ror aborbs. + auto Src = LoadShift(false); - // Need to negate the shift so we can use ROR instead - if constexpr (Is1Bit) { - Src = _Constant(Size, 1); - } else { - Src = LoadSource(GPRClass, Op, Op->Src[1], Op->Flags); - } + uint64_t Const; + bool IsConst = IsValueConstant(WrapNode(Src), &Const); - // x86 masks the shift by 0x3F or 0x1F depending on size of op - if (Size == 64) { - Src = _And(OpSize::i64Bit, Src, _Constant(Size, 0x3F)); - } else { - Src = _And(OpSize::i32Bit, Src, _Constant(Size, 0x1F)); - } + // We fill the upper bits so we allow garbage on load. + auto Dest = LoadSource(GPRClass, Op, Op->Dest, Op->Flags, {.AllowUpperGarbage = true}); - if (Size < 32) { - // ARM doesn't support 8/16bit rotates. Emulate with an insert - // StoreResult truncates back to a 8/16 bit value - Dest = _Bfi(OpSize::i32Bit, Size, Size, Dest, Dest); - if (Size == 8) { - // And because the shift size isn't masked to 8 bits, we need to fill the - // the full 32bits to get the correct result. - Dest = _Bfi(OpSize::i32Bit, 16, 16, Dest, Dest); + if (Size < 32) { + // ARM doesn't support 8/16bit rotates. Emulate with an insert + // StoreResult truncates back to a 8/16 bit value + Dest = _Bfi(OpSize, Size, Size, Dest, Dest); + + if (Size == 8 && !(IsConst && Const < 8 && !Left)) { + // And because the shift size isn't masked to 8 bits, we need to fill the + // the full 32bits to get the correct result. + Dest = _Bfi(OpSize, 16, 16, Dest, Dest); + } } - } - - auto ALUOp = _Ror(Size == 64 ? OpSize::i64Bit : OpSize::i32Bit, - Dest, - _Sub(Size == 64 ? OpSize::i64Bit : OpSize::i32Bit, _Constant(Size, std::max(32U, Size)), Src)); - StoreResult(GPRClass, Op, ALUOp, -1); - - if constexpr (Is1Bit) { - GenerateFlags_RotateLeftImmediate(Op, ALUOp, Dest, 1); - } else { - GenerateFlags_RotateLeft(Op, ALUOp, Dest, Src); - } -} - -void OpDispatchBuilder::ROLImmediateOp(OpcodeArgs) { - // For 32-bit, garbage is ignored in hardware. For < 32, see Bfi comment. - OrderedNode *Dest = LoadSource(GPRClass, Op, Op->Dest, Op->Flags, {.AllowUpperGarbage = true}); + // To rotate 64-bits left, right-rotate by (64 - Shift) = -Shift mod 64. + auto Res = _Ror(OpSize, Dest, Left ? _Neg(OpSize, Src) : Src); + StoreResult(GPRClass, Op, Res, -1); - LOGMAN_THROW_A_FMT(Op->Src[1].IsLiteral(), "Src1 needs to be literal here"); + // Ends up faster overall if we don't have FlagM, slower if we do... + // If Shift != 1, OF is undefined so we choose to zero here. + if (!CTX->HostFeatures.SupportsFlagM) + ZeroCV(); - uint64_t Shift = Op->Src[1].Data.Literal.Value; - const uint32_t Size = GetSrcBitSize(Op); + // Extract the last bit shifted in to CF + SetRFLAG(Res, Left ? 0 : Size - 1, true); - // x86 masks the shift by 0x3F or 0x1F depending on size of op - if (Size == 64) { - Shift &= 0x3F; - } else { - Shift &= 0x1F; - } - - // We also negate the shift so we can emulate Rol with Ror. - const auto NegatedShift = std::max(32U, Size) - Shift; - OrderedNode *Src = _Constant(Size, NegatedShift); - - if (Size < 32) { - // ARM doesn't support 8/16bit rotates. Emulate with an insert - // StoreResult truncates back to a 8/16 bit value. The inserts have the side - // effect of stomping over any garbage we had in the upper bits. - Dest = _Bfi(OpSize::i32Bit, Size, Size, Dest, Dest); - if (Size == 8) { - // And because the shift size isn't masked to 8 bits, we need to fill the - // the full 32bits to get the correct result. - Dest = _Bfi(OpSize::i32Bit, 16, 16, Dest, Dest); + // For ROR, OF is the XOR of the new CF bit and the most significant bit of the result. + // For ROL, OF is the LSB and MSB XOR'd together. + // OF is architecturally only defined for 1-bit rotate. + if (!IsConst || Const == 1) { + auto NewOF = _XorShift(OpSize, Res, Res, ShiftType::LSR, Left ? Size - 1 : 1); + SetRFLAG(NewOF, Left ? 0 : Size - 2, true); } - } - - auto ALUOp = _Ror(Size == 64 ? OpSize::i64Bit : OpSize::i32Bit, Dest, Src); - - StoreResult(GPRClass, Op, ALUOp, -1); - - GenerateFlags_RotateLeftImmediate(Op, ALUOp, Dest, Shift); + }); } void OpDispatchBuilder::ANDNBMIOp(OpcodeArgs) { @@ -6144,8 +6050,8 @@ void InstallOpcodeHandlers(Context::OperatingMode Mode) { {OPD(FEXCore::X86Tables::TYPE_GROUP_1, OpToIndex(0x83), 7), 1, &OpDispatchBuilder::CMPOp<1>}, // GROUP 2 - {OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xC0), 0), 1, &OpDispatchBuilder::ROLImmediateOp}, - {OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xC0), 1), 1, &OpDispatchBuilder::RORImmediateOp}, + {OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xC0), 0), 1, &OpDispatchBuilder::RotateOp}, + {OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xC0), 1), 1, &OpDispatchBuilder::RotateOp}, {OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xC0), 2), 1, &OpDispatchBuilder::RCLOp}, {OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xC0), 3), 1, &OpDispatchBuilder::RCROp}, {OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xC0), 4), 1, &OpDispatchBuilder::SHLImmediateOp}, @@ -6153,8 +6059,8 @@ void InstallOpcodeHandlers(Context::OperatingMode Mode) { {OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xC0), 6), 1, &OpDispatchBuilder::SHLImmediateOp}, // SAL {OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xC0), 7), 1, &OpDispatchBuilder::ASHRImmediateOp}, // SAR - {OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xC1), 0), 1, &OpDispatchBuilder::ROLImmediateOp}, - {OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xC1), 1), 1, &OpDispatchBuilder::RORImmediateOp}, + {OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xC1), 0), 1, &OpDispatchBuilder::RotateOp}, + {OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xC1), 1), 1, &OpDispatchBuilder::RotateOp}, {OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xC1), 2), 1, &OpDispatchBuilder::RCLOp}, {OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xC1), 3), 1, &OpDispatchBuilder::RCROp}, {OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xC1), 4), 1, &OpDispatchBuilder::SHLImmediateOp}, @@ -6162,8 +6068,8 @@ void InstallOpcodeHandlers(Context::OperatingMode Mode) { {OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xC1), 6), 1, &OpDispatchBuilder::SHLImmediateOp}, // SAL {OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xC1), 7), 1, &OpDispatchBuilder::ASHRImmediateOp}, // SAR - {OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xD0), 0), 1, &OpDispatchBuilder::ROLOp}, - {OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xD0), 1), 1, &OpDispatchBuilder::ROROp}, + {OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xD0), 0), 1, &OpDispatchBuilder::RotateOp}, + {OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xD0), 1), 1, &OpDispatchBuilder::RotateOp}, {OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xD0), 2), 1, &OpDispatchBuilder::RCLOp1Bit}, {OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xD0), 3), 1, &OpDispatchBuilder::RCROp8x1Bit}, {OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xD0), 4), 1, &OpDispatchBuilder::SHLOp}, @@ -6171,8 +6077,8 @@ void InstallOpcodeHandlers(Context::OperatingMode Mode) { {OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xD0), 6), 1, &OpDispatchBuilder::SHLOp}, // SAL {OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xD0), 7), 1, &OpDispatchBuilder::ASHROp}, // SAR - {OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xD1), 0), 1, &OpDispatchBuilder::ROLOp}, - {OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xD1), 1), 1, &OpDispatchBuilder::ROROp}, + {OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xD1), 0), 1, &OpDispatchBuilder::RotateOp}, + {OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xD1), 1), 1, &OpDispatchBuilder::RotateOp}, {OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xD1), 2), 1, &OpDispatchBuilder::RCLOp1Bit}, {OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xD1), 3), 1, &OpDispatchBuilder::RCROp1Bit}, {OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xD1), 4), 1, &OpDispatchBuilder::SHLOp}, @@ -6180,8 +6086,8 @@ void InstallOpcodeHandlers(Context::OperatingMode Mode) { {OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xD1), 6), 1, &OpDispatchBuilder::SHLOp}, // SAL {OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xD1), 7), 1, &OpDispatchBuilder::ASHROp}, // SAR - {OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xD2), 0), 1, &OpDispatchBuilder::ROLOp}, - {OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xD2), 1), 1, &OpDispatchBuilder::ROROp}, + {OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xD2), 0), 1, &OpDispatchBuilder::RotateOp}, + {OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xD2), 1), 1, &OpDispatchBuilder::RotateOp}, {OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xD2), 2), 1, &OpDispatchBuilder::RCLSmallerOp}, {OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xD2), 3), 1, &OpDispatchBuilder::RCRSmallerOp}, {OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xD2), 4), 1, &OpDispatchBuilder::SHLOp}, @@ -6189,8 +6095,8 @@ void InstallOpcodeHandlers(Context::OperatingMode Mode) { {OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xD2), 6), 1, &OpDispatchBuilder::SHLOp}, // SAL {OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xD2), 7), 1, &OpDispatchBuilder::ASHROp}, // SAR - {OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xD3), 0), 1, &OpDispatchBuilder::ROLOp}, - {OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xD3), 1), 1, &OpDispatchBuilder::ROROp}, + {OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xD3), 0), 1, &OpDispatchBuilder::RotateOp}, + {OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xD3), 1), 1, &OpDispatchBuilder::RotateOp}, {OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xD3), 2), 1, &OpDispatchBuilder::RCLOp}, {OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xD3), 3), 1, &OpDispatchBuilder::RCROp}, {OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xD3), 4), 1, &OpDispatchBuilder::SHLOp}, diff --git a/FEXCore/Source/Interface/Core/OpcodeDispatcher.h b/FEXCore/Source/Interface/Core/OpcodeDispatcher.h index 9c58e50725..e65dd6374a 100644 --- a/FEXCore/Source/Interface/Core/OpcodeDispatcher.h +++ b/FEXCore/Source/Interface/Core/OpcodeDispatcher.h @@ -89,10 +89,6 @@ friend class FEXCore::IR::PassManager; TYPE_LSHRDI, TYPE_ASHR, TYPE_ASHRI, - TYPE_ROR, - TYPE_RORI, - TYPE_ROL, - TYPE_ROLI, TYPE_BEXTR, TYPE_BLSI, TYPE_BLSMSK, @@ -321,12 +317,8 @@ friend class FEXCore::IR::PassManager; template void ASHROp(OpcodeArgs); void ASHRImmediateOp(OpcodeArgs); - template - void ROROp(OpcodeArgs); - void RORImmediateOp(OpcodeArgs); - template - void ROLOp(OpcodeArgs); - void ROLImmediateOp(OpcodeArgs); + template + void RotateOp(OpcodeArgs); void RCROp1Bit(OpcodeArgs); void RCROp8x1Bit(OpcodeArgs); void RCROp(OpcodeArgs); @@ -1670,13 +1662,13 @@ friend class FEXCore::IR::PassManager; OrderedNode *Src1; } OneSource; - // Logical, LSHL, LSHR, ASHR, ROR, ROL + // Logical, LSHL, LSHR, ASHR struct { OrderedNode *Src1; OrderedNode *Src2; } TwoSource; - // LSHLI, LSHRI, ASHRI, RORI, ROLI + // LSHLI, LSHRI, ASHRI struct { OrderedNode *Src1; uint64_t Imm; @@ -1725,15 +1717,12 @@ friend class FEXCore::IR::PassManager; } template - void CalculateFlags_ShiftVariable(OrderedNode *Shift, F&& CalculateFlags) { - // We are the ones calculating the deferred flags. Don't recurse! - InvalidateDeferredFlags(); - + void Calculate_ShiftVariable(OrderedNode *Shift, F&& Calculate) { // RCR can call this with constants, so handle that without branching. uint64_t Const; if (IsValueConstant(WrapNode(Shift), &Const)) { if (Const) - CalculateFlags(); + Calculate(); return; } @@ -1750,7 +1739,7 @@ friend class FEXCore::IR::PassManager; SetCurrentCodeBlock(SetBlock); StartNewBlock(); { - CalculateFlags(); + Calculate(); Jump(EndBlock); } @@ -1759,6 +1748,13 @@ friend class FEXCore::IR::PassManager; PossiblySetNZCVBits |= OldSetNZCVBits; } + template + void CalculateFlags_ShiftVariable(OrderedNode *Shift, F&& CalculateFlags) { + // We are the ones calculating the deferred flags. Don't recurse! + InvalidateDeferredFlags(); + Calculate_ShiftVariable(Shift, CalculateFlags); + } + /** * @name These functions are used by the deferred flag handling while it is calculating and storing flags in to RFLAGs. * @{ */ @@ -1786,10 +1782,6 @@ friend class FEXCore::IR::PassManager; void CalculateFlags_ShiftRightImmediateCommon(uint8_t SrcSize, OrderedNode *Res, OrderedNode *Src1, uint64_t Shift); void CalculateFlags_SignShiftRight(uint8_t SrcSize, OrderedNode *Res, OrderedNode *Src1, OrderedNode *Src2); void CalculateFlags_SignShiftRightImmediate(uint8_t SrcSize, OrderedNode *Res, OrderedNode *Src1, uint64_t Shift); - void CalculateFlags_RotateRight(uint8_t SrcSize, OrderedNode *Res, OrderedNode *Src1, OrderedNode *Src2); - void CalculateFlags_RotateLeft(uint8_t SrcSize, OrderedNode *Res, OrderedNode *Src1, OrderedNode *Src2); - void CalculateFlags_RotateRightImmediate(uint8_t SrcSize, OrderedNode *Res, OrderedNode *Src1, uint64_t Shift); - void CalculateFlags_RotateLeftImmediate(uint8_t SrcSize, OrderedNode *Res, OrderedNode *Src1, uint64_t Shift); void CalculateFlags_BEXTR(OrderedNode *Src); void CalculateFlags_BLSI(uint8_t SrcSize, OrderedNode *Src); void CalculateFlags_BLSMSK(uint8_t SrcSize, OrderedNode *Res, OrderedNode *Src); @@ -1977,78 +1969,6 @@ friend class FEXCore::IR::PassManager; }; } - void GenerateFlags_RotateRight(FEXCore::X86Tables::DecodedOp Op, OrderedNode *Res, OrderedNode *Src1, OrderedNode *Src2) { - // Doesn't set all the flags, needs to calculate. - CalculateDeferredFlags(); - - CurrentDeferredFlags = DeferredFlagData { - .Type = FlagsGenerationType::TYPE_ROR, - .SrcSize = GetSrcSize(Op), - .Res = Res, - .Sources = { - .TwoSource = { - .Src1 = Src1, - .Src2 = Src2, - }, - }, - }; - } - - void GenerateFlags_RotateLeft(FEXCore::X86Tables::DecodedOp Op, OrderedNode *Res, OrderedNode *Src1, OrderedNode *Src2) { - // Doesn't set all the flags, needs to calculate. - CalculateDeferredFlags(); - - CurrentDeferredFlags = DeferredFlagData { - .Type = FlagsGenerationType::TYPE_ROL, - .SrcSize = GetSrcSize(Op), - .Res = Res, - .Sources = { - .TwoSource = { - .Src1 = Src1, - .Src2 = Src2, - }, - }, - }; - } - - void GenerateFlags_RotateRightImmediate(FEXCore::X86Tables::DecodedOp Op, OrderedNode *Res, OrderedNode *Src1, uint64_t Shift) { - if (Shift == 0) return; - - // Doesn't set all the flags, needs to calculate. - CalculateDeferredFlags(); - - CurrentDeferredFlags = DeferredFlagData { - .Type = FlagsGenerationType::TYPE_RORI, - .SrcSize = GetSrcSize(Op), - .Res = Res, - .Sources = { - .OneSrcImmediate = { - .Src1 = Src1, - .Imm = Shift, - }, - }, - }; - } - - void GenerateFlags_RotateLeftImmediate(FEXCore::X86Tables::DecodedOp Op, OrderedNode *Res, OrderedNode *Src1, uint64_t Shift) { - if (Shift == 0) return; - - // Doesn't set all the flags, needs to calculate. - CalculateDeferredFlags(); - - CurrentDeferredFlags = DeferredFlagData { - .Type = FlagsGenerationType::TYPE_ROLI, - .SrcSize = GetSrcSize(Op), - .Res = Res, - .Sources = { - .OneSrcImmediate = { - .Src1 = Src1, - .Imm = Shift, - }, - } - }; - } - void GenerateFlags_BEXTR(FEXCore::X86Tables::DecodedOp Op, OrderedNode *Src) { CurrentDeferredFlags = DeferredFlagData { .Type = FlagsGenerationType::TYPE_BEXTR, diff --git a/FEXCore/Source/Interface/Core/OpcodeDispatcher/Flags.cpp b/FEXCore/Source/Interface/Core/OpcodeDispatcher/Flags.cpp index b8aa8371fa..2be21cf7bc 100644 --- a/FEXCore/Source/Interface/Core/OpcodeDispatcher/Flags.cpp +++ b/FEXCore/Source/Interface/Core/OpcodeDispatcher/Flags.cpp @@ -401,34 +401,6 @@ void OpDispatchBuilder::CalculateDeferredFlags(uint32_t FlagsToCalculateMask) { CurrentDeferredFlags.Sources.OneSrcImmediate.Src1, CurrentDeferredFlags.Sources.OneSrcImmediate.Imm); break; - case FlagsGenerationType::TYPE_ROR: - CalculateFlags_RotateRight( - CurrentDeferredFlags.SrcSize, - CurrentDeferredFlags.Res, - CurrentDeferredFlags.Sources.TwoSource.Src1, - CurrentDeferredFlags.Sources.TwoSource.Src2); - break; - case FlagsGenerationType::TYPE_RORI: - CalculateFlags_RotateRightImmediate( - CurrentDeferredFlags.SrcSize, - CurrentDeferredFlags.Res, - CurrentDeferredFlags.Sources.OneSrcImmediate.Src1, - CurrentDeferredFlags.Sources.OneSrcImmediate.Imm); - break; - case FlagsGenerationType::TYPE_ROL: - CalculateFlags_RotateLeft( - CurrentDeferredFlags.SrcSize, - CurrentDeferredFlags.Res, - CurrentDeferredFlags.Sources.TwoSource.Src1, - CurrentDeferredFlags.Sources.TwoSource.Src2); - break; - case FlagsGenerationType::TYPE_ROLI: - CalculateFlags_RotateLeftImmediate( - CurrentDeferredFlags.SrcSize, - CurrentDeferredFlags.Res, - CurrentDeferredFlags.Sources.OneSrcImmediate.Src1, - CurrentDeferredFlags.Sources.OneSrcImmediate.Imm); - break; case FlagsGenerationType::TYPE_BEXTR: CalculateFlags_BEXTR(CurrentDeferredFlags.Res); break; @@ -835,107 +807,6 @@ void OpDispatchBuilder::CalculateFlags_ShiftRightDoubleImmediate(uint8_t SrcSize } } -void OpDispatchBuilder::CalculateFlags_RotateRight(uint8_t SrcSize, OrderedNode *Res, OrderedNode *Src1, OrderedNode *Src2) { - CalculateFlags_ShiftVariable(Src2, [this, SrcSize, Res](){ - auto SizeBits = SrcSize * 8; - const auto OpSize = SrcSize == 8 ? OpSize::i64Bit : OpSize::i32Bit; - - // Ends up faster overall if we don't have FlagM, slower if we do... - // If Shift != 1, OF is undefined so we choose to zero here. - if (!CTX->HostFeatures.SupportsFlagM) - ZeroCV(); - - // Extract the last bit shifted in to CF - SetRFLAG(Res, SizeBits - 1, true); - - // OF is set to the XOR of the new CF bit and the most significant bit of the result - // OF is architecturally only defined for 1-bit rotate, which is why this only happens when the shift is one. - auto NewOF = _XorShift(OpSize, Res, Res, ShiftType::LSR, 1); - SetRFLAG(NewOF, SizeBits - 2, true); - }); -} - -void OpDispatchBuilder::CalculateFlags_RotateLeft(uint8_t SrcSize, OrderedNode *Res, OrderedNode *Src1, OrderedNode *Src2) { - CalculateFlags_ShiftVariable(Src2, [this, SrcSize, Res](){ - const auto OpSize = SrcSize == 8 ? OpSize::i64Bit : OpSize::i32Bit; - auto SizeBits = SrcSize * 8; - - // Ends up faster overall if we don't have FlagM, slower if we do... - // If Shift != 1, OF is undefined so we choose to zero here. - if (!CTX->HostFeatures.SupportsFlagM) - ZeroCV(); - - // Extract the last bit shifted in to CF - //auto Size = _Constant(GetSrcSize(Res) * 8); - //auto ShiftAmt = _Sub(OpSize::i64Bit, Size, Src2); - SetRFLAG(Res, 0, true); - - // OF is the LSB and MSB XOR'd together. - // OF is set to the XOR of the new CF bit and the most significant bit of the result. - // OF is architecturally only defined for 1-bit rotate, which is why this only happens when the shift is one. - auto NewOF = _XorShift(OpSize, Res, Res, ShiftType::LSR, SizeBits - 1); - SetRFLAG(NewOF, 0, true); - }); -} - -void OpDispatchBuilder::CalculateFlags_RotateRightImmediate(uint8_t SrcSize, OrderedNode *Res, OrderedNode *Src1, uint64_t Shift) { - if (Shift == 0) return; - - const auto OpSize = SrcSize == 8 ? OpSize::i64Bit : OpSize::i32Bit; - auto SizeBits = SrcSize * 8; - - // Ends up faster overall if we don't have FlagM, slower if we do... - // If Shift != 1, OF is undefined so we choose to zero here. - if (!CTX->HostFeatures.SupportsFlagM) - ZeroCV(); - - // CF - { - // Extract the last bit shifted in to CF - SetRFLAG(Res, SizeBits - 1, true); - } - - // OF - { - if (Shift == 1) { - // OF is the top two MSBs XOR'd together - // OF is architecturally only defined for 1-bit rotate, which is why this only happens when the shift is one. - auto NewOF = _XorShift(OpSize, Res, Res, ShiftType::LSR, 1); - SetRFLAG(NewOF, SizeBits - 2, 1); - } - } -} - -void OpDispatchBuilder::CalculateFlags_RotateLeftImmediate(uint8_t SrcSize, OrderedNode *Res, OrderedNode *Src1, uint64_t Shift) { - if (Shift == 0) return; - - const auto OpSize = SrcSize == 8 ? OpSize::i64Bit : OpSize::i32Bit; - auto SizeBits = SrcSize * 8; - - // Ends up faster overall if we don't have FlagM, slower if we do... - // If Shift != 1, OF is undefined so we choose to zero here. - if (!CTX->HostFeatures.SupportsFlagM) - ZeroCV(); - - // CF - { - // Extract the last bit shifted in to CF - SetRFLAG(Res, 0, true); - } - - // OF - { - if (Shift == 1) { - // OF is the LSB and MSB XOR'd together. - // OF is set to the XOR of the new CF bit and the most significant bit of the result. - // OF is architecturally only defined for 1-bit rotate, which is why this only happens when the shift is one. - auto NewOF = _XorShift(OpSize, Res, Res, ShiftType::LSR, SizeBits - 1); - - SetRFLAG(NewOF, 0, true); - } - } -} - void OpDispatchBuilder::CalculateFlags_BEXTR(OrderedNode *Src) { // ZF is set properly. CF and OF are defined as being set to zero. SF, PF, and // AF are undefined. diff --git a/FEXCore/Source/Interface/IR/Passes/ConstProp.cpp b/FEXCore/Source/Interface/IR/Passes/ConstProp.cpp index 192ae6520c..00875371e5 100644 --- a/FEXCore/Source/Interface/IR/Passes/ConstProp.cpp +++ b/FEXCore/Source/Interface/IR/Passes/ConstProp.cpp @@ -865,6 +865,17 @@ bool ConstProp::ConstantPropagation(IREmitter *IREmit, const IRListView& Current } break; } + case OP_NEG: { + auto Op = IROp->CW(); + uint64_t Constant{}; + + if (IREmit->IsValueConstant(Op->Header.Args[0], &Constant)) { + uint64_t NewConstant = -Constant; + IREmit->ReplaceWithConstant(CodeNode, NewConstant); + Changed = true; + } + break; + } case OP_LSHL: { auto Op = IROp->CW(); uint64_t Constant1{}; diff --git a/unittests/InstructionCountCI/FlagM/PrimaryGroup.json b/unittests/InstructionCountCI/FlagM/PrimaryGroup.json index dd7ce4bd1e..aea5406844 100644 --- a/unittests/InstructionCountCI/FlagM/PrimaryGroup.json +++ b/unittests/InstructionCountCI/FlagM/PrimaryGroup.json @@ -1246,8 +1246,8 @@ "ExpectedInstructionCount": 8, "Comment": "GROUP2 0xd0 /0", "ExpectedArm64ASM": [ - "uxtb w20, w4", - "bfi w20, w20, #8, #8", + "mov w20, w4", + "bfi w20, w4, #8, #8", "bfi w20, w20, #16, #16", "ror w20, w20, #31", "bfxil x4, x20, #0, #8", @@ -1260,8 +1260,8 @@ "ExpectedInstructionCount": 7, "Comment": "GROUP2 0xd0 /1", "ExpectedArm64ASM": [ - "uxtb w20, w4", - "bfi w20, w20, #8, #8", + "mov w20, w4", + "bfi w20, w4, #8, #8", "ror w20, w20, #1", "bfxil x4, x20, #0, #8", "rmif x20, #6, #nzCv", @@ -1338,8 +1338,8 @@ "ExpectedInstructionCount": 7, "Comment": "GROUP2 0xd1 /0", "ExpectedArm64ASM": [ - "uxth w20, w4", - "bfi w20, w20, #16, #16", + "mov w20, w4", + "bfi w20, w4, #16, #16", "ror w20, w20, #31", "bfxil x4, x20, #0, #16", "rmif x20, #63, #nzCv", @@ -1348,11 +1348,10 @@ ] }, "rol eax, 1": { - "ExpectedInstructionCount": 5, + "ExpectedInstructionCount": 4, "Comment": "GROUP2 0xd1 /0", "ExpectedArm64ASM": [ - "mov w20, w4", - "ror w4, w20, #31", + "ror w4, w4, #31", "rmif x4, #63, #nzCv", "eor w20, w4, w4, lsr #31", "rmif x20, #0, #nzcV" @@ -1372,8 +1371,8 @@ "ExpectedInstructionCount": 7, "Comment": "GROUP2 0xd1 /1", "ExpectedArm64ASM": [ - "uxth w20, w4", - "bfi w20, w20, #16, #16", + "mov w20, w4", + "bfi w20, w4, #16, #16", "ror w20, w20, #1", "bfxil x4, x20, #0, #16", "rmif x20, #14, #nzCv", @@ -1382,11 +1381,10 @@ ] }, "ror eax, 1": { - "ExpectedInstructionCount": 5, + "ExpectedInstructionCount": 4, "Comment": "GROUP2 0xd1 /1", "ExpectedArm64ASM": [ - "mov w20, w4", - "ror w4, w20, #1", + "ror w4, w4, #1", "rmif x4, #30, #nzCv", "eor w20, w4, w4, lsr #1", "rmif x20, #30, #nzcV" @@ -1585,36 +1583,33 @@ ] }, "rol al, cl": { - "ExpectedInstructionCount": 13, + "ExpectedInstructionCount": 11, "Comment": "GROUP2 0xd2 /0", "ExpectedArm64ASM": [ - "uxtb w20, w4", - "uxtb w21, w5", - "and w21, w21, #0x1f", - "bfi w20, w20, #8, #8", + "and x20, x5, #0x1f", + "cbz x20, #+0x28", + "mov w20, w4", + "bfi w20, w4, #8, #8", "bfi w20, w20, #16, #16", - "mov w22, #0x20", - "sub w22, w22, w21", - "ror w20, w20, w22", + "neg w21, w5", + "ror w20, w20, w21", "bfxil x4, x20, #0, #8", - "cbz x21, #+0x10", "rmif x20, #63, #nzCv", "eor w20, w20, w20, lsr #7", "rmif x20, #0, #nzcV" ] }, "ror al, cl": { - "ExpectedInstructionCount": 11, + "ExpectedInstructionCount": 10, "Comment": "GROUP2 0xd2 /1", "ExpectedArm64ASM": [ - "uxtb w20, w4", - "uxtb w21, w5", - "and w21, w21, #0x1f", - "bfi w20, w20, #8, #8", + "and x20, x5, #0x1f", + "cbz x20, #+0x24", + "mov w20, w4", + "bfi w20, w4, #8, #8", "bfi w20, w20, #16, #16", - "ror w20, w20, w21", + "ror w20, w20, w5", "bfxil x4, x20, #0, #8", - "cbz x21, #+0x10", "rmif x20, #6, #nzCv", "eor w20, w20, w20, lsr #1", "rmif x20, #6, #nzcV" @@ -1733,96 +1728,83 @@ ] }, "rol ax, cl": { - "ExpectedInstructionCount": 12, + "ExpectedInstructionCount": 10, "Comment": "GROUP2 0xd3 /0", "ExpectedArm64ASM": [ - "uxth w20, w4", - "uxth w21, w5", - "and w21, w21, #0x1f", - "bfi w20, w20, #16, #16", - "mov w22, #0x20", - "sub w22, w22, w21", - "ror w20, w20, w22", + "and x20, x5, #0x1f", + "cbz x20, #+0x24", + "mov w20, w4", + "bfi w20, w4, #16, #16", + "neg w21, w5", + "ror w20, w20, w21", "bfxil x4, x20, #0, #16", - "cbz x21, #+0x10", "rmif x20, #63, #nzCv", "eor w20, w20, w20, lsr #15", "rmif x20, #0, #nzcV" ] }, "rol eax, cl": { - "ExpectedInstructionCount": 11, + "ExpectedInstructionCount": 7, "Comment": "GROUP2 0xd3 /0", "ExpectedArm64ASM": [ - "mov w20, w4", - "mov w21, w5", - "and w21, w21, #0x1f", - "mov w22, #0x20", - "sub w22, w22, w21", - "ror w20, w20, w22", - "mov x4, x20", - "cbz x21, #+0x10", - "rmif x20, #63, #nzCv", - "eor w20, w20, w20, lsr #31", + "and x20, x5, #0x1f", + "cbz x20, #+0x18", + "neg w20, w5", + "ror w4, w4, w20", + "rmif x4, #63, #nzCv", + "eor w20, w4, w4, lsr #31", "rmif x20, #0, #nzcV" ] }, "rol rax, cl": { - "ExpectedInstructionCount": 9, + "ExpectedInstructionCount": 7, "Comment": "GROUP2 0xd3 /0", "ExpectedArm64ASM": [ "and x20, x5, #0x3f", - "mov w21, #0x40", - "sub x21, x21, x20", - "ror x21, x4, x21", - "mov x4, x21", - "cbz x20, #+0x10", - "rmif x21, #63, #nzCv", - "eor x20, x21, x21, lsr #63", + "cbz x20, #+0x18", + "neg x20, x5", + "ror x4, x4, x20", + "rmif x4, #63, #nzCv", + "eor x20, x4, x4, lsr #63", "rmif x20, #0, #nzcV" ] }, "ror ax, cl": { - "ExpectedInstructionCount": 10, + "ExpectedInstructionCount": 9, "Comment": "GROUP2 0xd3 /1", "ExpectedArm64ASM": [ - "uxth w20, w4", - "uxth w21, w5", - "and w21, w21, #0x1f", - "bfi w20, w20, #16, #16", - "ror w20, w20, w21", + "and x20, x5, #0x1f", + "cbz x20, #+0x20", + "mov w20, w4", + "bfi w20, w4, #16, #16", + "ror w20, w20, w5", "bfxil x4, x20, #0, #16", - "cbz x21, #+0x10", "rmif x20, #14, #nzCv", "eor w20, w20, w20, lsr #1", "rmif x20, #14, #nzcV" ] }, "ror eax, cl": { - "ExpectedInstructionCount": 9, + "ExpectedInstructionCount": 6, "Comment": "GROUP2 0xd3 /1", "ExpectedArm64ASM": [ - "mov w20, w4", - "mov w21, w5", - "and w21, w21, #0x1f", - "ror w20, w20, w21", - "mov x4, x20", - "cbz x21, #+0x10", - "rmif x20, #30, #nzCv", - "eor w20, w20, w20, lsr #1", + "and x20, x5, #0x1f", + "cbz x20, #+0x14", + "ror w4, w4, w5", + "rmif x4, #30, #nzCv", + "eor w20, w4, w4, lsr #1", "rmif x20, #30, #nzcV" ] }, "ror rax, cl": { - "ExpectedInstructionCount": 7, + "ExpectedInstructionCount": 6, "Comment": "GROUP2 0xd3 /1", "ExpectedArm64ASM": [ "and x20, x5, #0x3f", - "ror x21, x4, x20", - "mov x4, x21", - "cbz x20, #+0x10", - "rmif x21, #62, #nzCv", - "eor x20, x21, x21, lsr #1", + "cbz x20, #+0x14", + "ror x4, x4, x5", + "rmif x4, #62, #nzCv", + "eor x20, x4, x4, lsr #1", "rmif x20, #62, #nzcV" ] }, diff --git a/unittests/InstructionCountCI/PrimaryGroup.json b/unittests/InstructionCountCI/PrimaryGroup.json index 66426ee8c4..054b14d3ea 100644 --- a/unittests/InstructionCountCI/PrimaryGroup.json +++ b/unittests/InstructionCountCI/PrimaryGroup.json @@ -1417,8 +1417,8 @@ "ExpectedInstructionCount": 13, "Comment": "GROUP2 0xd0 /0", "ExpectedArm64ASM": [ - "uxtb w20, w4", - "bfi w20, w20, #8, #8", + "mov w20, w4", + "bfi w20, w4, #8, #8", "bfi w20, w20, #16, #16", "ror w20, w20, #31", "bfxil x4, x20, #0, #8", @@ -1436,8 +1436,8 @@ "ExpectedInstructionCount": 12, "Comment": "GROUP2 0xd0 /1", "ExpectedArm64ASM": [ - "uxtb w20, w4", - "bfi w20, w20, #8, #8", + "mov w20, w4", + "bfi w20, w4, #8, #8", "ror w20, w20, #1", "bfxil x4, x20, #0, #8", "mrs x21, nzcv", @@ -1546,8 +1546,8 @@ "ExpectedInstructionCount": 12, "Comment": "GROUP2 0xd1 /0", "ExpectedArm64ASM": [ - "uxth w20, w4", - "bfi w20, w20, #16, #16", + "mov w20, w4", + "bfi w20, w4, #16, #16", "ror w20, w20, #31", "bfxil x4, x20, #0, #16", "mrs x21, nzcv", @@ -1561,11 +1561,10 @@ ] }, "rol eax, 1": { - "ExpectedInstructionCount": 10, + "ExpectedInstructionCount": 9, "Comment": "GROUP2 0xd1 /0", "ExpectedArm64ASM": [ - "mov w20, w4", - "ror w4, w20, #31", + "ror w4, w4, #31", "mrs x20, nzcv", "and w20, w20, #0xc0000000", "ubfx x21, x4, #0, #1", @@ -1595,8 +1594,8 @@ "ExpectedInstructionCount": 12, "Comment": "GROUP2 0xd1 /1", "ExpectedArm64ASM": [ - "uxth w20, w4", - "bfi w20, w20, #16, #16", + "mov w20, w4", + "bfi w20, w4, #16, #16", "ror w20, w20, #1", "bfxil x4, x20, #0, #16", "mrs x21, nzcv", @@ -1610,11 +1609,10 @@ ] }, "ror eax, 1": { - "ExpectedInstructionCount": 10, + "ExpectedInstructionCount": 9, "Comment": "GROUP2 0xd1 /1", "ExpectedArm64ASM": [ - "mov w20, w4", - "ror w4, w20, #1", + "ror w4, w4, #1", "mrs x20, nzcv", "and w20, w20, #0xc0000000", "ubfx x21, x4, #31, #1", @@ -1904,19 +1902,17 @@ ] }, "rol al, cl": { - "ExpectedInstructionCount": 18, + "ExpectedInstructionCount": 16, "Comment": "GROUP2 0xd2 /0", "ExpectedArm64ASM": [ - "uxtb w20, w4", - "uxtb w21, w5", - "and w21, w21, #0x1f", - "bfi w20, w20, #8, #8", + "and x20, x5, #0x1f", + "cbz x20, #+0x3c", + "mov w20, w4", + "bfi w20, w4, #8, #8", "bfi w20, w20, #16, #16", - "mov w22, #0x20", - "sub w22, w22, w21", - "ror w20, w20, w22", + "neg w21, w5", + "ror w20, w20, w21", "bfxil x4, x20, #0, #8", - "cbz x21, #+0x24", "mrs x21, nzcv", "and w21, w21, #0xc0000000", "ubfx x22, x20, #0, #1", @@ -1928,17 +1924,16 @@ ] }, "ror al, cl": { - "ExpectedInstructionCount": 16, + "ExpectedInstructionCount": 15, "Comment": "GROUP2 0xd2 /1", "ExpectedArm64ASM": [ - "uxtb w20, w4", - "uxtb w21, w5", - "and w21, w21, #0x1f", - "bfi w20, w20, #8, #8", + "and x20, x5, #0x1f", + "cbz x20, #+0x38", + "mov w20, w4", + "bfi w20, w4, #8, #8", "bfi w20, w20, #16, #16", - "ror w20, w20, w21", + "ror w20, w20, w5", "bfxil x4, x20, #0, #8", - "cbz x21, #+0x24", "mrs x21, nzcv", "and w21, w21, #0xc0000000", "ubfx x22, x20, #7, #1", @@ -2087,18 +2082,16 @@ ] }, "rol ax, cl": { - "ExpectedInstructionCount": 17, + "ExpectedInstructionCount": 15, "Comment": "GROUP2 0xd3 /0", "ExpectedArm64ASM": [ - "uxth w20, w4", - "uxth w21, w5", - "and w21, w21, #0x1f", - "bfi w20, w20, #16, #16", - "mov w22, #0x20", - "sub w22, w22, w21", - "ror w20, w20, w22", + "and x20, x5, #0x1f", + "cbz x20, #+0x38", + "mov w20, w4", + "bfi w20, w4, #16, #16", + "neg w21, w5", + "ror w20, w20, w21", "bfxil x4, x20, #0, #16", - "cbz x21, #+0x24", "mrs x21, nzcv", "and w21, w21, #0xc0000000", "ubfx x22, x20, #0, #1", @@ -2110,58 +2103,51 @@ ] }, "rol eax, cl": { - "ExpectedInstructionCount": 16, + "ExpectedInstructionCount": 12, "Comment": "GROUP2 0xd3 /0", "ExpectedArm64ASM": [ - "mov w20, w4", - "mov w21, w5", - "and w21, w21, #0x1f", - "mov w22, #0x20", - "sub w22, w22, w21", - "ror w20, w20, w22", - "mov x4, x20", - "cbz x21, #+0x24", - "mrs x21, nzcv", - "and w21, w21, #0xc0000000", - "ubfx x22, x20, #0, #1", - "orr w21, w21, w22, lsl #29", - "eor w20, w20, w20, lsr #31", - "ubfx x20, x20, #0, #1", - "orr w20, w21, w20, lsl #28", + "and x20, x5, #0x1f", + "cbz x20, #+0x2c", + "neg w20, w5", + "ror w4, w4, w20", + "mrs x20, nzcv", + "and w20, w20, #0xc0000000", + "ubfx x21, x4, #0, #1", + "orr w20, w20, w21, lsl #29", + "eor w21, w4, w4, lsr #31", + "ubfx x21, x21, #0, #1", + "orr w20, w20, w21, lsl #28", "msr nzcv, x20" ] }, "rol rax, cl": { - "ExpectedInstructionCount": 14, + "ExpectedInstructionCount": 12, "Comment": "GROUP2 0xd3 /0", "ExpectedArm64ASM": [ "and x20, x5, #0x3f", - "mov w21, #0x40", - "sub x21, x21, x20", - "ror x21, x4, x21", - "mov x4, x21", - "cbz x20, #+0x24", + "cbz x20, #+0x2c", + "neg x20, x5", + "ror x4, x4, x20", "mrs x20, nzcv", "and w20, w20, #0xc0000000", - "ubfx x22, x21, #0, #1", - "orr w20, w20, w22, lsl #29", - "eor x21, x21, x21, lsr #63", + "ubfx x21, x4, #0, #1", + "orr w20, w20, w21, lsl #29", + "eor x21, x4, x4, lsr #63", "ubfx x21, x21, #0, #1", "orr w20, w20, w21, lsl #28", "msr nzcv, x20" ] }, "ror ax, cl": { - "ExpectedInstructionCount": 15, + "ExpectedInstructionCount": 14, "Comment": "GROUP2 0xd3 /1", "ExpectedArm64ASM": [ - "uxth w20, w4", - "uxth w21, w5", - "and w21, w21, #0x1f", - "bfi w20, w20, #16, #16", - "ror w20, w20, w21", + "and x20, x5, #0x1f", + "cbz x20, #+0x34", + "mov w20, w4", + "bfi w20, w4, #16, #16", + "ror w20, w20, w5", "bfxil x4, x20, #0, #16", - "cbz x21, #+0x24", "mrs x21, nzcv", "and w21, w21, #0xc0000000", "ubfx x22, x20, #15, #1", @@ -2173,38 +2159,34 @@ ] }, "ror eax, cl": { - "ExpectedInstructionCount": 14, + "ExpectedInstructionCount": 11, "Comment": "GROUP2 0xd3 /1", "ExpectedArm64ASM": [ - "mov w20, w4", - "mov w21, w5", - "and w21, w21, #0x1f", - "ror w20, w20, w21", - "mov x4, x20", - "cbz x21, #+0x24", - "mrs x21, nzcv", - "and w21, w21, #0xc0000000", - "ubfx x22, x20, #31, #1", - "orr w21, w21, w22, lsl #29", - "eor w20, w20, w20, lsr #1", - "ubfx x20, x20, #30, #1", - "orr w20, w21, w20, lsl #28", + "and x20, x5, #0x1f", + "cbz x20, #+0x28", + "ror w4, w4, w5", + "mrs x20, nzcv", + "and w20, w20, #0xc0000000", + "ubfx x21, x4, #31, #1", + "orr w20, w20, w21, lsl #29", + "eor w21, w4, w4, lsr #1", + "ubfx x21, x21, #30, #1", + "orr w20, w20, w21, lsl #28", "msr nzcv, x20" ] }, "ror rax, cl": { - "ExpectedInstructionCount": 12, + "ExpectedInstructionCount": 11, "Comment": "GROUP2 0xd3 /1", "ExpectedArm64ASM": [ "and x20, x5, #0x3f", - "ror x21, x4, x20", - "mov x4, x21", - "cbz x20, #+0x24", + "cbz x20, #+0x28", + "ror x4, x4, x5", "mrs x20, nzcv", "and w20, w20, #0xc0000000", - "lsr x22, x21, #63", - "orr w20, w20, w22, lsl #29", - "eor x21, x21, x21, lsr #1", + "lsr x21, x4, #63", + "orr w20, w20, w21, lsl #29", + "eor x21, x4, x4, lsr #1", "ubfx x21, x21, #62, #1", "orr w20, w20, w21, lsl #28", "msr nzcv, x20"