From bdda99e44fa291f0d2066a724d08466a652b5299 Mon Sep 17 00:00:00 2001 From: Alyssa Rosenzweig Date: Fri, 29 Mar 2024 17:08:59 -0400 Subject: [PATCH 1/4] ConstProp: constant fold Neg will come up with rotate in the next patch Signed-off-by: Alyssa Rosenzweig --- FEXCore/Source/Interface/IR/Passes/ConstProp.cpp | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/FEXCore/Source/Interface/IR/Passes/ConstProp.cpp b/FEXCore/Source/Interface/IR/Passes/ConstProp.cpp index 192ae6520c..00875371e5 100644 --- a/FEXCore/Source/Interface/IR/Passes/ConstProp.cpp +++ b/FEXCore/Source/Interface/IR/Passes/ConstProp.cpp @@ -865,6 +865,17 @@ bool ConstProp::ConstantPropagation(IREmitter *IREmit, const IRListView& Current } break; } + case OP_NEG: { + auto Op = IROp->CW(); + uint64_t Constant{}; + + if (IREmit->IsValueConstant(Op->Header.Args[0], &Constant)) { + uint64_t NewConstant = -Constant; + IREmit->ReplaceWithConstant(CodeNode, NewConstant); + Changed = true; + } + break; + } case OP_LSHL: { auto Op = IROp->CW(); uint64_t Constant1{}; From 6f5e4fd34b2149cd313cc5fc834a58f749455faf Mon Sep 17 00:00:00 2001 From: Alyssa Rosenzweig Date: Sat, 30 Mar 2024 13:25:53 -0400 Subject: [PATCH 2/4] OpcodeDispatcher: add non-flag calc version of ShiftVariable more correct for rcl, etc Signed-off-by: Alyssa Rosenzweig --- FEXCore/Source/Interface/Core/OpcodeDispatcher.h | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/FEXCore/Source/Interface/Core/OpcodeDispatcher.h b/FEXCore/Source/Interface/Core/OpcodeDispatcher.h index 9c58e50725..cf0607118b 100644 --- a/FEXCore/Source/Interface/Core/OpcodeDispatcher.h +++ b/FEXCore/Source/Interface/Core/OpcodeDispatcher.h @@ -1725,15 +1725,12 @@ friend class FEXCore::IR::PassManager; } template - void CalculateFlags_ShiftVariable(OrderedNode *Shift, F&& CalculateFlags) { - // We are the ones calculating the deferred flags. Don't recurse! - InvalidateDeferredFlags(); - + void Calculate_ShiftVariable(OrderedNode *Shift, F&& Calculate) { // RCR can call this with constants, so handle that without branching. uint64_t Const; if (IsValueConstant(WrapNode(Shift), &Const)) { if (Const) - CalculateFlags(); + Calculate(); return; } @@ -1750,7 +1747,7 @@ friend class FEXCore::IR::PassManager; SetCurrentCodeBlock(SetBlock); StartNewBlock(); { - CalculateFlags(); + Calculate(); Jump(EndBlock); } @@ -1759,6 +1756,13 @@ friend class FEXCore::IR::PassManager; PossiblySetNZCVBits |= OldSetNZCVBits; } + template + void CalculateFlags_ShiftVariable(OrderedNode *Shift, F&& CalculateFlags) { + // We are the ones calculating the deferred flags. Don't recurse! + InvalidateDeferredFlags(); + Calculate_ShiftVariable(Shift, CalculateFlags); + } + /** * @name These functions are used by the deferred flag handling while it is calculating and storing flags in to RFLAGs. * @{ */ From 15b86e4c5a2651cc98821a9b2e78ff1c0f5d4adb Mon Sep 17 00:00:00 2001 From: Alyssa Rosenzweig Date: Fri, 29 Mar 2024 17:04:35 -0400 Subject: [PATCH 3/4] OpcodeDispatcher: rewrite ROL/ROR single unified implementation for ROL & ROR (instead of 4 cases). no more deferred flags because it's easy to shoot ourselves in the foot with deferred flags w.r.t the new RA design, and rotates are rare enough with very efficient flag calculations such that the extra JIT overhead should be minimal to DCE the resulting calculations later. Signed-off-by: Alyssa Rosenzweig --- .../Interface/Core/OpcodeDispatcher.cpp | 220 +++++------------- .../Source/Interface/Core/OpcodeDispatcher.h | 92 +------- .../Interface/Core/OpcodeDispatcher/Flags.cpp | 129 ---------- 3 files changed, 67 insertions(+), 374 deletions(-) diff --git a/FEXCore/Source/Interface/Core/OpcodeDispatcher.cpp b/FEXCore/Source/Interface/Core/OpcodeDispatcher.cpp index 12091359d6..b570abd7ad 100644 --- a/FEXCore/Source/Interface/Core/OpcodeDispatcher.cpp +++ b/FEXCore/Source/Interface/Core/OpcodeDispatcher.cpp @@ -1891,165 +1891,71 @@ void OpDispatchBuilder::ASHRImmediateOp(OpcodeArgs) { GenerateFlags_SignShiftRightImmediate(Op, Result, Dest, Shift); } -template -void OpDispatchBuilder::ROROp(OpcodeArgs) { - OrderedNode *Src; - OrderedNode *Dest = LoadSource(GPRClass, Op, Op->Dest, Op->Flags); - - const uint32_t Size = GetSrcBitSize(Op); - if constexpr (Is1Bit) { - Src = _Constant(std::max(32U, Size), 1); - } else { - Src = LoadSource(GPRClass, Op, Op->Src[1], Op->Flags); - } - - // x86 masks the shift by 0x3F or 0x1F depending on size of op - if (Size == 64) { - Src = _And(OpSize::i64Bit, Src, _Constant(Size, 0x3F)); - } else { - Src = _And(OpSize::i32Bit, Src, _Constant(Size, 0x1F)); - } - - if (Size < 32) { - // ARM doesn't support 8/16bit rotates. Emulate with an insert - // StoreResult truncates back to a 8/16 bit value - Dest = _Bfi(OpSize::i32Bit, Size, Size, Dest, Dest); - if (Size == 8 && !Is1Bit) { - // And because the shift size isn't masked to 8 bits, we need to fill the - // the full 32bits to get the correct result. - Dest = _Bfi(OpSize::i32Bit, 16, 16, Dest, Dest); - } - } - - auto ALUOp = _Ror(Size == 64 ? OpSize::i64Bit : OpSize::i32Bit, Dest, Src); - - StoreResult(GPRClass, Op, ALUOp, -1); - - if constexpr (Is1Bit) { - GenerateFlags_RotateRightImmediate(Op, ALUOp, Dest, 1); - } else { - GenerateFlags_RotateRight(Op, ALUOp, Dest, Src); - } -} - -void OpDispatchBuilder::RORImmediateOp(OpcodeArgs) { - // See ROLImmediateOp for masking explanation - OrderedNode *Dest = LoadSource(GPRClass, Op, Op->Dest, Op->Flags, {.AllowUpperGarbage = true}); - - LOGMAN_THROW_A_FMT(Op->Src[1].IsLiteral(), "Src1 needs to be literal here"); - - uint64_t Shift = Op->Src[1].Data.Literal.Value; - const uint32_t Size = GetSrcBitSize(Op); - - // x86 masks the shift by 0x3F or 0x1F depending on size of op - if (Size == 64) { - Shift &= 0x3F; - } else { - Shift &= 0x1F; - } +template +void OpDispatchBuilder::RotateOp(OpcodeArgs) { + CalculateDeferredFlags(); - OrderedNode *Src = _Constant(std::max(32U, Size), Shift); + auto LoadShift = [this, Op](bool MustMask) -> OrderedNode * { + // x86 masks the shift by 0x3F or 0x1F depending on size of op + const uint32_t Size = GetSrcBitSize(Op); + uint64_t Mask = Size == 64 ? 0x3F : 0x1F; - if (Size < 32) { - // ARM doesn't support 8/16bit rotates. Emulate with an insert - // StoreResult truncates back to a 8/16 bit value - Dest = _Bfi(OpSize::i32Bit, Size, Size, Dest, Dest); - if (Size == 8 && Shift > 8) { - // And because the shift size isn't masked to 8 bits, we need to fill the - // the full 32bits to get the correct result. - Dest = _Bfi(OpSize::i32Bit, 16, 16, Dest, Dest); + if (Is1Bit) { + return _Constant(1); + } else if (IsImmediate) { + LOGMAN_THROW_A_FMT(Op->Src[1].IsLiteral(), "Src1 needs to be literal here"); + return _Constant(Op->Src[1].Data.Literal.Value & Mask); + } else { + auto Src = LoadSource(GPRClass, Op, Op->Src[1], Op->Flags, {.AllowUpperGarbage = true}); + return MustMask ? _And(OpSize::i64Bit, Src, _Constant(Mask)) : Src; } - } - - auto ALUOp = _Ror(Size == 64 ? OpSize::i64Bit : OpSize::i32Bit, Dest, Src); - - StoreResult(GPRClass, Op, ALUOp, -1); - - GenerateFlags_RotateRightImmediate(Op, ALUOp, Dest, Shift); -} + }; -template -void OpDispatchBuilder::ROLOp(OpcodeArgs) { - OrderedNode *Src; - OrderedNode *Dest = LoadSource(GPRClass, Op, Op->Dest, Op->Flags); + Calculate_ShiftVariable(LoadShift(true), [this, LoadShift, Op](){ + const uint32_t Size = GetSrcBitSize(Op); + const auto OpSize = Size == 64 ? OpSize::i64Bit : OpSize::i32Bit; - const uint32_t Size = GetSrcBitSize(Op); + // We don't need to mask when we rematerialize since the Ror aborbs. + auto Src = LoadShift(false); - // Need to negate the shift so we can use ROR instead - if constexpr (Is1Bit) { - Src = _Constant(Size, 1); - } else { - Src = LoadSource(GPRClass, Op, Op->Src[1], Op->Flags); - } + uint64_t Const; + bool IsConst = IsValueConstant(WrapNode(Src), &Const); - // x86 masks the shift by 0x3F or 0x1F depending on size of op - if (Size == 64) { - Src = _And(OpSize::i64Bit, Src, _Constant(Size, 0x3F)); - } else { - Src = _And(OpSize::i32Bit, Src, _Constant(Size, 0x1F)); - } + // We fill the upper bits so we allow garbage on load. + auto Dest = LoadSource(GPRClass, Op, Op->Dest, Op->Flags, {.AllowUpperGarbage = true}); - if (Size < 32) { - // ARM doesn't support 8/16bit rotates. Emulate with an insert - // StoreResult truncates back to a 8/16 bit value - Dest = _Bfi(OpSize::i32Bit, Size, Size, Dest, Dest); - if (Size == 8) { - // And because the shift size isn't masked to 8 bits, we need to fill the - // the full 32bits to get the correct result. - Dest = _Bfi(OpSize::i32Bit, 16, 16, Dest, Dest); + if (Size < 32) { + // ARM doesn't support 8/16bit rotates. Emulate with an insert + // StoreResult truncates back to a 8/16 bit value + Dest = _Bfi(OpSize, Size, Size, Dest, Dest); + + if (Size == 8 && !(IsConst && Const < 8 && !Left)) { + // And because the shift size isn't masked to 8 bits, we need to fill the + // the full 32bits to get the correct result. + Dest = _Bfi(OpSize, 16, 16, Dest, Dest); + } } - } - - auto ALUOp = _Ror(Size == 64 ? OpSize::i64Bit : OpSize::i32Bit, - Dest, - _Sub(Size == 64 ? OpSize::i64Bit : OpSize::i32Bit, _Constant(Size, std::max(32U, Size)), Src)); - StoreResult(GPRClass, Op, ALUOp, -1); - - if constexpr (Is1Bit) { - GenerateFlags_RotateLeftImmediate(Op, ALUOp, Dest, 1); - } else { - GenerateFlags_RotateLeft(Op, ALUOp, Dest, Src); - } -} - -void OpDispatchBuilder::ROLImmediateOp(OpcodeArgs) { - // For 32-bit, garbage is ignored in hardware. For < 32, see Bfi comment. - OrderedNode *Dest = LoadSource(GPRClass, Op, Op->Dest, Op->Flags, {.AllowUpperGarbage = true}); + // To rotate 64-bits left, right-rotate by (64 - Shift) = -Shift mod 64. + auto Res = _Ror(OpSize, Dest, Left ? _Neg(OpSize, Src) : Src); + StoreResult(GPRClass, Op, Res, -1); - LOGMAN_THROW_A_FMT(Op->Src[1].IsLiteral(), "Src1 needs to be literal here"); + // Ends up faster overall if we don't have FlagM, slower if we do... + // If Shift != 1, OF is undefined so we choose to zero here. + if (!CTX->HostFeatures.SupportsFlagM) + ZeroCV(); - uint64_t Shift = Op->Src[1].Data.Literal.Value; - const uint32_t Size = GetSrcBitSize(Op); + // Extract the last bit shifted in to CF + SetRFLAG(Res, Left ? 0 : Size - 1, true); - // x86 masks the shift by 0x3F or 0x1F depending on size of op - if (Size == 64) { - Shift &= 0x3F; - } else { - Shift &= 0x1F; - } - - // We also negate the shift so we can emulate Rol with Ror. - const auto NegatedShift = std::max(32U, Size) - Shift; - OrderedNode *Src = _Constant(Size, NegatedShift); - - if (Size < 32) { - // ARM doesn't support 8/16bit rotates. Emulate with an insert - // StoreResult truncates back to a 8/16 bit value. The inserts have the side - // effect of stomping over any garbage we had in the upper bits. - Dest = _Bfi(OpSize::i32Bit, Size, Size, Dest, Dest); - if (Size == 8) { - // And because the shift size isn't masked to 8 bits, we need to fill the - // the full 32bits to get the correct result. - Dest = _Bfi(OpSize::i32Bit, 16, 16, Dest, Dest); + // For ROR, OF is the XOR of the new CF bit and the most significant bit of the result. + // For ROL, OF is the LSB and MSB XOR'd together. + // OF is architecturally only defined for 1-bit rotate. + if (!IsConst || Const == 1) { + auto NewOF = _XorShift(OpSize, Res, Res, ShiftType::LSR, Left ? Size - 1 : 1); + SetRFLAG(NewOF, Left ? 0 : Size - 2, true); } - } - - auto ALUOp = _Ror(Size == 64 ? OpSize::i64Bit : OpSize::i32Bit, Dest, Src); - - StoreResult(GPRClass, Op, ALUOp, -1); - - GenerateFlags_RotateLeftImmediate(Op, ALUOp, Dest, Shift); + }); } void OpDispatchBuilder::ANDNBMIOp(OpcodeArgs) { @@ -6144,8 +6050,8 @@ void InstallOpcodeHandlers(Context::OperatingMode Mode) { {OPD(FEXCore::X86Tables::TYPE_GROUP_1, OpToIndex(0x83), 7), 1, &OpDispatchBuilder::CMPOp<1>}, // GROUP 2 - {OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xC0), 0), 1, &OpDispatchBuilder::ROLImmediateOp}, - {OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xC0), 1), 1, &OpDispatchBuilder::RORImmediateOp}, + {OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xC0), 0), 1, &OpDispatchBuilder::RotateOp}, + {OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xC0), 1), 1, &OpDispatchBuilder::RotateOp}, {OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xC0), 2), 1, &OpDispatchBuilder::RCLOp}, {OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xC0), 3), 1, &OpDispatchBuilder::RCROp}, {OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xC0), 4), 1, &OpDispatchBuilder::SHLImmediateOp}, @@ -6153,8 +6059,8 @@ void InstallOpcodeHandlers(Context::OperatingMode Mode) { {OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xC0), 6), 1, &OpDispatchBuilder::SHLImmediateOp}, // SAL {OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xC0), 7), 1, &OpDispatchBuilder::ASHRImmediateOp}, // SAR - {OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xC1), 0), 1, &OpDispatchBuilder::ROLImmediateOp}, - {OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xC1), 1), 1, &OpDispatchBuilder::RORImmediateOp}, + {OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xC1), 0), 1, &OpDispatchBuilder::RotateOp}, + {OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xC1), 1), 1, &OpDispatchBuilder::RotateOp}, {OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xC1), 2), 1, &OpDispatchBuilder::RCLOp}, {OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xC1), 3), 1, &OpDispatchBuilder::RCROp}, {OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xC1), 4), 1, &OpDispatchBuilder::SHLImmediateOp}, @@ -6162,8 +6068,8 @@ void InstallOpcodeHandlers(Context::OperatingMode Mode) { {OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xC1), 6), 1, &OpDispatchBuilder::SHLImmediateOp}, // SAL {OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xC1), 7), 1, &OpDispatchBuilder::ASHRImmediateOp}, // SAR - {OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xD0), 0), 1, &OpDispatchBuilder::ROLOp}, - {OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xD0), 1), 1, &OpDispatchBuilder::ROROp}, + {OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xD0), 0), 1, &OpDispatchBuilder::RotateOp}, + {OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xD0), 1), 1, &OpDispatchBuilder::RotateOp}, {OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xD0), 2), 1, &OpDispatchBuilder::RCLOp1Bit}, {OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xD0), 3), 1, &OpDispatchBuilder::RCROp8x1Bit}, {OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xD0), 4), 1, &OpDispatchBuilder::SHLOp}, @@ -6171,8 +6077,8 @@ void InstallOpcodeHandlers(Context::OperatingMode Mode) { {OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xD0), 6), 1, &OpDispatchBuilder::SHLOp}, // SAL {OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xD0), 7), 1, &OpDispatchBuilder::ASHROp}, // SAR - {OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xD1), 0), 1, &OpDispatchBuilder::ROLOp}, - {OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xD1), 1), 1, &OpDispatchBuilder::ROROp}, + {OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xD1), 0), 1, &OpDispatchBuilder::RotateOp}, + {OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xD1), 1), 1, &OpDispatchBuilder::RotateOp}, {OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xD1), 2), 1, &OpDispatchBuilder::RCLOp1Bit}, {OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xD1), 3), 1, &OpDispatchBuilder::RCROp1Bit}, {OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xD1), 4), 1, &OpDispatchBuilder::SHLOp}, @@ -6180,8 +6086,8 @@ void InstallOpcodeHandlers(Context::OperatingMode Mode) { {OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xD1), 6), 1, &OpDispatchBuilder::SHLOp}, // SAL {OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xD1), 7), 1, &OpDispatchBuilder::ASHROp}, // SAR - {OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xD2), 0), 1, &OpDispatchBuilder::ROLOp}, - {OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xD2), 1), 1, &OpDispatchBuilder::ROROp}, + {OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xD2), 0), 1, &OpDispatchBuilder::RotateOp}, + {OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xD2), 1), 1, &OpDispatchBuilder::RotateOp}, {OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xD2), 2), 1, &OpDispatchBuilder::RCLSmallerOp}, {OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xD2), 3), 1, &OpDispatchBuilder::RCRSmallerOp}, {OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xD2), 4), 1, &OpDispatchBuilder::SHLOp}, @@ -6189,8 +6095,8 @@ void InstallOpcodeHandlers(Context::OperatingMode Mode) { {OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xD2), 6), 1, &OpDispatchBuilder::SHLOp}, // SAL {OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xD2), 7), 1, &OpDispatchBuilder::ASHROp}, // SAR - {OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xD3), 0), 1, &OpDispatchBuilder::ROLOp}, - {OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xD3), 1), 1, &OpDispatchBuilder::ROROp}, + {OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xD3), 0), 1, &OpDispatchBuilder::RotateOp}, + {OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xD3), 1), 1, &OpDispatchBuilder::RotateOp}, {OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xD3), 2), 1, &OpDispatchBuilder::RCLOp}, {OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xD3), 3), 1, &OpDispatchBuilder::RCROp}, {OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xD3), 4), 1, &OpDispatchBuilder::SHLOp}, diff --git a/FEXCore/Source/Interface/Core/OpcodeDispatcher.h b/FEXCore/Source/Interface/Core/OpcodeDispatcher.h index cf0607118b..e65dd6374a 100644 --- a/FEXCore/Source/Interface/Core/OpcodeDispatcher.h +++ b/FEXCore/Source/Interface/Core/OpcodeDispatcher.h @@ -89,10 +89,6 @@ friend class FEXCore::IR::PassManager; TYPE_LSHRDI, TYPE_ASHR, TYPE_ASHRI, - TYPE_ROR, - TYPE_RORI, - TYPE_ROL, - TYPE_ROLI, TYPE_BEXTR, TYPE_BLSI, TYPE_BLSMSK, @@ -321,12 +317,8 @@ friend class FEXCore::IR::PassManager; template void ASHROp(OpcodeArgs); void ASHRImmediateOp(OpcodeArgs); - template - void ROROp(OpcodeArgs); - void RORImmediateOp(OpcodeArgs); - template - void ROLOp(OpcodeArgs); - void ROLImmediateOp(OpcodeArgs); + template + void RotateOp(OpcodeArgs); void RCROp1Bit(OpcodeArgs); void RCROp8x1Bit(OpcodeArgs); void RCROp(OpcodeArgs); @@ -1670,13 +1662,13 @@ friend class FEXCore::IR::PassManager; OrderedNode *Src1; } OneSource; - // Logical, LSHL, LSHR, ASHR, ROR, ROL + // Logical, LSHL, LSHR, ASHR struct { OrderedNode *Src1; OrderedNode *Src2; } TwoSource; - // LSHLI, LSHRI, ASHRI, RORI, ROLI + // LSHLI, LSHRI, ASHRI struct { OrderedNode *Src1; uint64_t Imm; @@ -1790,10 +1782,6 @@ friend class FEXCore::IR::PassManager; void CalculateFlags_ShiftRightImmediateCommon(uint8_t SrcSize, OrderedNode *Res, OrderedNode *Src1, uint64_t Shift); void CalculateFlags_SignShiftRight(uint8_t SrcSize, OrderedNode *Res, OrderedNode *Src1, OrderedNode *Src2); void CalculateFlags_SignShiftRightImmediate(uint8_t SrcSize, OrderedNode *Res, OrderedNode *Src1, uint64_t Shift); - void CalculateFlags_RotateRight(uint8_t SrcSize, OrderedNode *Res, OrderedNode *Src1, OrderedNode *Src2); - void CalculateFlags_RotateLeft(uint8_t SrcSize, OrderedNode *Res, OrderedNode *Src1, OrderedNode *Src2); - void CalculateFlags_RotateRightImmediate(uint8_t SrcSize, OrderedNode *Res, OrderedNode *Src1, uint64_t Shift); - void CalculateFlags_RotateLeftImmediate(uint8_t SrcSize, OrderedNode *Res, OrderedNode *Src1, uint64_t Shift); void CalculateFlags_BEXTR(OrderedNode *Src); void CalculateFlags_BLSI(uint8_t SrcSize, OrderedNode *Src); void CalculateFlags_BLSMSK(uint8_t SrcSize, OrderedNode *Res, OrderedNode *Src); @@ -1981,78 +1969,6 @@ friend class FEXCore::IR::PassManager; }; } - void GenerateFlags_RotateRight(FEXCore::X86Tables::DecodedOp Op, OrderedNode *Res, OrderedNode *Src1, OrderedNode *Src2) { - // Doesn't set all the flags, needs to calculate. - CalculateDeferredFlags(); - - CurrentDeferredFlags = DeferredFlagData { - .Type = FlagsGenerationType::TYPE_ROR, - .SrcSize = GetSrcSize(Op), - .Res = Res, - .Sources = { - .TwoSource = { - .Src1 = Src1, - .Src2 = Src2, - }, - }, - }; - } - - void GenerateFlags_RotateLeft(FEXCore::X86Tables::DecodedOp Op, OrderedNode *Res, OrderedNode *Src1, OrderedNode *Src2) { - // Doesn't set all the flags, needs to calculate. - CalculateDeferredFlags(); - - CurrentDeferredFlags = DeferredFlagData { - .Type = FlagsGenerationType::TYPE_ROL, - .SrcSize = GetSrcSize(Op), - .Res = Res, - .Sources = { - .TwoSource = { - .Src1 = Src1, - .Src2 = Src2, - }, - }, - }; - } - - void GenerateFlags_RotateRightImmediate(FEXCore::X86Tables::DecodedOp Op, OrderedNode *Res, OrderedNode *Src1, uint64_t Shift) { - if (Shift == 0) return; - - // Doesn't set all the flags, needs to calculate. - CalculateDeferredFlags(); - - CurrentDeferredFlags = DeferredFlagData { - .Type = FlagsGenerationType::TYPE_RORI, - .SrcSize = GetSrcSize(Op), - .Res = Res, - .Sources = { - .OneSrcImmediate = { - .Src1 = Src1, - .Imm = Shift, - }, - }, - }; - } - - void GenerateFlags_RotateLeftImmediate(FEXCore::X86Tables::DecodedOp Op, OrderedNode *Res, OrderedNode *Src1, uint64_t Shift) { - if (Shift == 0) return; - - // Doesn't set all the flags, needs to calculate. - CalculateDeferredFlags(); - - CurrentDeferredFlags = DeferredFlagData { - .Type = FlagsGenerationType::TYPE_ROLI, - .SrcSize = GetSrcSize(Op), - .Res = Res, - .Sources = { - .OneSrcImmediate = { - .Src1 = Src1, - .Imm = Shift, - }, - } - }; - } - void GenerateFlags_BEXTR(FEXCore::X86Tables::DecodedOp Op, OrderedNode *Src) { CurrentDeferredFlags = DeferredFlagData { .Type = FlagsGenerationType::TYPE_BEXTR, diff --git a/FEXCore/Source/Interface/Core/OpcodeDispatcher/Flags.cpp b/FEXCore/Source/Interface/Core/OpcodeDispatcher/Flags.cpp index b8aa8371fa..2be21cf7bc 100644 --- a/FEXCore/Source/Interface/Core/OpcodeDispatcher/Flags.cpp +++ b/FEXCore/Source/Interface/Core/OpcodeDispatcher/Flags.cpp @@ -401,34 +401,6 @@ void OpDispatchBuilder::CalculateDeferredFlags(uint32_t FlagsToCalculateMask) { CurrentDeferredFlags.Sources.OneSrcImmediate.Src1, CurrentDeferredFlags.Sources.OneSrcImmediate.Imm); break; - case FlagsGenerationType::TYPE_ROR: - CalculateFlags_RotateRight( - CurrentDeferredFlags.SrcSize, - CurrentDeferredFlags.Res, - CurrentDeferredFlags.Sources.TwoSource.Src1, - CurrentDeferredFlags.Sources.TwoSource.Src2); - break; - case FlagsGenerationType::TYPE_RORI: - CalculateFlags_RotateRightImmediate( - CurrentDeferredFlags.SrcSize, - CurrentDeferredFlags.Res, - CurrentDeferredFlags.Sources.OneSrcImmediate.Src1, - CurrentDeferredFlags.Sources.OneSrcImmediate.Imm); - break; - case FlagsGenerationType::TYPE_ROL: - CalculateFlags_RotateLeft( - CurrentDeferredFlags.SrcSize, - CurrentDeferredFlags.Res, - CurrentDeferredFlags.Sources.TwoSource.Src1, - CurrentDeferredFlags.Sources.TwoSource.Src2); - break; - case FlagsGenerationType::TYPE_ROLI: - CalculateFlags_RotateLeftImmediate( - CurrentDeferredFlags.SrcSize, - CurrentDeferredFlags.Res, - CurrentDeferredFlags.Sources.OneSrcImmediate.Src1, - CurrentDeferredFlags.Sources.OneSrcImmediate.Imm); - break; case FlagsGenerationType::TYPE_BEXTR: CalculateFlags_BEXTR(CurrentDeferredFlags.Res); break; @@ -835,107 +807,6 @@ void OpDispatchBuilder::CalculateFlags_ShiftRightDoubleImmediate(uint8_t SrcSize } } -void OpDispatchBuilder::CalculateFlags_RotateRight(uint8_t SrcSize, OrderedNode *Res, OrderedNode *Src1, OrderedNode *Src2) { - CalculateFlags_ShiftVariable(Src2, [this, SrcSize, Res](){ - auto SizeBits = SrcSize * 8; - const auto OpSize = SrcSize == 8 ? OpSize::i64Bit : OpSize::i32Bit; - - // Ends up faster overall if we don't have FlagM, slower if we do... - // If Shift != 1, OF is undefined so we choose to zero here. - if (!CTX->HostFeatures.SupportsFlagM) - ZeroCV(); - - // Extract the last bit shifted in to CF - SetRFLAG(Res, SizeBits - 1, true); - - // OF is set to the XOR of the new CF bit and the most significant bit of the result - // OF is architecturally only defined for 1-bit rotate, which is why this only happens when the shift is one. - auto NewOF = _XorShift(OpSize, Res, Res, ShiftType::LSR, 1); - SetRFLAG(NewOF, SizeBits - 2, true); - }); -} - -void OpDispatchBuilder::CalculateFlags_RotateLeft(uint8_t SrcSize, OrderedNode *Res, OrderedNode *Src1, OrderedNode *Src2) { - CalculateFlags_ShiftVariable(Src2, [this, SrcSize, Res](){ - const auto OpSize = SrcSize == 8 ? OpSize::i64Bit : OpSize::i32Bit; - auto SizeBits = SrcSize * 8; - - // Ends up faster overall if we don't have FlagM, slower if we do... - // If Shift != 1, OF is undefined so we choose to zero here. - if (!CTX->HostFeatures.SupportsFlagM) - ZeroCV(); - - // Extract the last bit shifted in to CF - //auto Size = _Constant(GetSrcSize(Res) * 8); - //auto ShiftAmt = _Sub(OpSize::i64Bit, Size, Src2); - SetRFLAG(Res, 0, true); - - // OF is the LSB and MSB XOR'd together. - // OF is set to the XOR of the new CF bit and the most significant bit of the result. - // OF is architecturally only defined for 1-bit rotate, which is why this only happens when the shift is one. - auto NewOF = _XorShift(OpSize, Res, Res, ShiftType::LSR, SizeBits - 1); - SetRFLAG(NewOF, 0, true); - }); -} - -void OpDispatchBuilder::CalculateFlags_RotateRightImmediate(uint8_t SrcSize, OrderedNode *Res, OrderedNode *Src1, uint64_t Shift) { - if (Shift == 0) return; - - const auto OpSize = SrcSize == 8 ? OpSize::i64Bit : OpSize::i32Bit; - auto SizeBits = SrcSize * 8; - - // Ends up faster overall if we don't have FlagM, slower if we do... - // If Shift != 1, OF is undefined so we choose to zero here. - if (!CTX->HostFeatures.SupportsFlagM) - ZeroCV(); - - // CF - { - // Extract the last bit shifted in to CF - SetRFLAG(Res, SizeBits - 1, true); - } - - // OF - { - if (Shift == 1) { - // OF is the top two MSBs XOR'd together - // OF is architecturally only defined for 1-bit rotate, which is why this only happens when the shift is one. - auto NewOF = _XorShift(OpSize, Res, Res, ShiftType::LSR, 1); - SetRFLAG(NewOF, SizeBits - 2, 1); - } - } -} - -void OpDispatchBuilder::CalculateFlags_RotateLeftImmediate(uint8_t SrcSize, OrderedNode *Res, OrderedNode *Src1, uint64_t Shift) { - if (Shift == 0) return; - - const auto OpSize = SrcSize == 8 ? OpSize::i64Bit : OpSize::i32Bit; - auto SizeBits = SrcSize * 8; - - // Ends up faster overall if we don't have FlagM, slower if we do... - // If Shift != 1, OF is undefined so we choose to zero here. - if (!CTX->HostFeatures.SupportsFlagM) - ZeroCV(); - - // CF - { - // Extract the last bit shifted in to CF - SetRFLAG(Res, 0, true); - } - - // OF - { - if (Shift == 1) { - // OF is the LSB and MSB XOR'd together. - // OF is set to the XOR of the new CF bit and the most significant bit of the result. - // OF is architecturally only defined for 1-bit rotate, which is why this only happens when the shift is one. - auto NewOF = _XorShift(OpSize, Res, Res, ShiftType::LSR, SizeBits - 1); - - SetRFLAG(NewOF, 0, true); - } - } -} - void OpDispatchBuilder::CalculateFlags_BEXTR(OrderedNode *Src) { // ZF is set properly. CF and OF are defined as being set to zero. SF, PF, and // AF are undefined. From 2aa1fd7fa337005b85d5db25ab9952e59cb5d1fc Mon Sep 17 00:00:00 2001 From: Alyssa Rosenzweig Date: Sun, 31 Mar 2024 14:41:57 -0400 Subject: [PATCH 4/4] InstCountCI: Update Signed-off-by: Alyssa Rosenzweig --- .../FlagM/PrimaryGroup.json | 142 +++++++-------- .../InstructionCountCI/PrimaryGroup.json | 166 ++++++++---------- 2 files changed, 136 insertions(+), 172 deletions(-) diff --git a/unittests/InstructionCountCI/FlagM/PrimaryGroup.json b/unittests/InstructionCountCI/FlagM/PrimaryGroup.json index dd7ce4bd1e..aea5406844 100644 --- a/unittests/InstructionCountCI/FlagM/PrimaryGroup.json +++ b/unittests/InstructionCountCI/FlagM/PrimaryGroup.json @@ -1246,8 +1246,8 @@ "ExpectedInstructionCount": 8, "Comment": "GROUP2 0xd0 /0", "ExpectedArm64ASM": [ - "uxtb w20, w4", - "bfi w20, w20, #8, #8", + "mov w20, w4", + "bfi w20, w4, #8, #8", "bfi w20, w20, #16, #16", "ror w20, w20, #31", "bfxil x4, x20, #0, #8", @@ -1260,8 +1260,8 @@ "ExpectedInstructionCount": 7, "Comment": "GROUP2 0xd0 /1", "ExpectedArm64ASM": [ - "uxtb w20, w4", - "bfi w20, w20, #8, #8", + "mov w20, w4", + "bfi w20, w4, #8, #8", "ror w20, w20, #1", "bfxil x4, x20, #0, #8", "rmif x20, #6, #nzCv", @@ -1338,8 +1338,8 @@ "ExpectedInstructionCount": 7, "Comment": "GROUP2 0xd1 /0", "ExpectedArm64ASM": [ - "uxth w20, w4", - "bfi w20, w20, #16, #16", + "mov w20, w4", + "bfi w20, w4, #16, #16", "ror w20, w20, #31", "bfxil x4, x20, #0, #16", "rmif x20, #63, #nzCv", @@ -1348,11 +1348,10 @@ ] }, "rol eax, 1": { - "ExpectedInstructionCount": 5, + "ExpectedInstructionCount": 4, "Comment": "GROUP2 0xd1 /0", "ExpectedArm64ASM": [ - "mov w20, w4", - "ror w4, w20, #31", + "ror w4, w4, #31", "rmif x4, #63, #nzCv", "eor w20, w4, w4, lsr #31", "rmif x20, #0, #nzcV" @@ -1372,8 +1371,8 @@ "ExpectedInstructionCount": 7, "Comment": "GROUP2 0xd1 /1", "ExpectedArm64ASM": [ - "uxth w20, w4", - "bfi w20, w20, #16, #16", + "mov w20, w4", + "bfi w20, w4, #16, #16", "ror w20, w20, #1", "bfxil x4, x20, #0, #16", "rmif x20, #14, #nzCv", @@ -1382,11 +1381,10 @@ ] }, "ror eax, 1": { - "ExpectedInstructionCount": 5, + "ExpectedInstructionCount": 4, "Comment": "GROUP2 0xd1 /1", "ExpectedArm64ASM": [ - "mov w20, w4", - "ror w4, w20, #1", + "ror w4, w4, #1", "rmif x4, #30, #nzCv", "eor w20, w4, w4, lsr #1", "rmif x20, #30, #nzcV" @@ -1585,36 +1583,33 @@ ] }, "rol al, cl": { - "ExpectedInstructionCount": 13, + "ExpectedInstructionCount": 11, "Comment": "GROUP2 0xd2 /0", "ExpectedArm64ASM": [ - "uxtb w20, w4", - "uxtb w21, w5", - "and w21, w21, #0x1f", - "bfi w20, w20, #8, #8", + "and x20, x5, #0x1f", + "cbz x20, #+0x28", + "mov w20, w4", + "bfi w20, w4, #8, #8", "bfi w20, w20, #16, #16", - "mov w22, #0x20", - "sub w22, w22, w21", - "ror w20, w20, w22", + "neg w21, w5", + "ror w20, w20, w21", "bfxil x4, x20, #0, #8", - "cbz x21, #+0x10", "rmif x20, #63, #nzCv", "eor w20, w20, w20, lsr #7", "rmif x20, #0, #nzcV" ] }, "ror al, cl": { - "ExpectedInstructionCount": 11, + "ExpectedInstructionCount": 10, "Comment": "GROUP2 0xd2 /1", "ExpectedArm64ASM": [ - "uxtb w20, w4", - "uxtb w21, w5", - "and w21, w21, #0x1f", - "bfi w20, w20, #8, #8", + "and x20, x5, #0x1f", + "cbz x20, #+0x24", + "mov w20, w4", + "bfi w20, w4, #8, #8", "bfi w20, w20, #16, #16", - "ror w20, w20, w21", + "ror w20, w20, w5", "bfxil x4, x20, #0, #8", - "cbz x21, #+0x10", "rmif x20, #6, #nzCv", "eor w20, w20, w20, lsr #1", "rmif x20, #6, #nzcV" @@ -1733,96 +1728,83 @@ ] }, "rol ax, cl": { - "ExpectedInstructionCount": 12, + "ExpectedInstructionCount": 10, "Comment": "GROUP2 0xd3 /0", "ExpectedArm64ASM": [ - "uxth w20, w4", - "uxth w21, w5", - "and w21, w21, #0x1f", - "bfi w20, w20, #16, #16", - "mov w22, #0x20", - "sub w22, w22, w21", - "ror w20, w20, w22", + "and x20, x5, #0x1f", + "cbz x20, #+0x24", + "mov w20, w4", + "bfi w20, w4, #16, #16", + "neg w21, w5", + "ror w20, w20, w21", "bfxil x4, x20, #0, #16", - "cbz x21, #+0x10", "rmif x20, #63, #nzCv", "eor w20, w20, w20, lsr #15", "rmif x20, #0, #nzcV" ] }, "rol eax, cl": { - "ExpectedInstructionCount": 11, + "ExpectedInstructionCount": 7, "Comment": "GROUP2 0xd3 /0", "ExpectedArm64ASM": [ - "mov w20, w4", - "mov w21, w5", - "and w21, w21, #0x1f", - "mov w22, #0x20", - "sub w22, w22, w21", - "ror w20, w20, w22", - "mov x4, x20", - "cbz x21, #+0x10", - "rmif x20, #63, #nzCv", - "eor w20, w20, w20, lsr #31", + "and x20, x5, #0x1f", + "cbz x20, #+0x18", + "neg w20, w5", + "ror w4, w4, w20", + "rmif x4, #63, #nzCv", + "eor w20, w4, w4, lsr #31", "rmif x20, #0, #nzcV" ] }, "rol rax, cl": { - "ExpectedInstructionCount": 9, + "ExpectedInstructionCount": 7, "Comment": "GROUP2 0xd3 /0", "ExpectedArm64ASM": [ "and x20, x5, #0x3f", - "mov w21, #0x40", - "sub x21, x21, x20", - "ror x21, x4, x21", - "mov x4, x21", - "cbz x20, #+0x10", - "rmif x21, #63, #nzCv", - "eor x20, x21, x21, lsr #63", + "cbz x20, #+0x18", + "neg x20, x5", + "ror x4, x4, x20", + "rmif x4, #63, #nzCv", + "eor x20, x4, x4, lsr #63", "rmif x20, #0, #nzcV" ] }, "ror ax, cl": { - "ExpectedInstructionCount": 10, + "ExpectedInstructionCount": 9, "Comment": "GROUP2 0xd3 /1", "ExpectedArm64ASM": [ - "uxth w20, w4", - "uxth w21, w5", - "and w21, w21, #0x1f", - "bfi w20, w20, #16, #16", - "ror w20, w20, w21", + "and x20, x5, #0x1f", + "cbz x20, #+0x20", + "mov w20, w4", + "bfi w20, w4, #16, #16", + "ror w20, w20, w5", "bfxil x4, x20, #0, #16", - "cbz x21, #+0x10", "rmif x20, #14, #nzCv", "eor w20, w20, w20, lsr #1", "rmif x20, #14, #nzcV" ] }, "ror eax, cl": { - "ExpectedInstructionCount": 9, + "ExpectedInstructionCount": 6, "Comment": "GROUP2 0xd3 /1", "ExpectedArm64ASM": [ - "mov w20, w4", - "mov w21, w5", - "and w21, w21, #0x1f", - "ror w20, w20, w21", - "mov x4, x20", - "cbz x21, #+0x10", - "rmif x20, #30, #nzCv", - "eor w20, w20, w20, lsr #1", + "and x20, x5, #0x1f", + "cbz x20, #+0x14", + "ror w4, w4, w5", + "rmif x4, #30, #nzCv", + "eor w20, w4, w4, lsr #1", "rmif x20, #30, #nzcV" ] }, "ror rax, cl": { - "ExpectedInstructionCount": 7, + "ExpectedInstructionCount": 6, "Comment": "GROUP2 0xd3 /1", "ExpectedArm64ASM": [ "and x20, x5, #0x3f", - "ror x21, x4, x20", - "mov x4, x21", - "cbz x20, #+0x10", - "rmif x21, #62, #nzCv", - "eor x20, x21, x21, lsr #1", + "cbz x20, #+0x14", + "ror x4, x4, x5", + "rmif x4, #62, #nzCv", + "eor x20, x4, x4, lsr #1", "rmif x20, #62, #nzcV" ] }, diff --git a/unittests/InstructionCountCI/PrimaryGroup.json b/unittests/InstructionCountCI/PrimaryGroup.json index 66426ee8c4..054b14d3ea 100644 --- a/unittests/InstructionCountCI/PrimaryGroup.json +++ b/unittests/InstructionCountCI/PrimaryGroup.json @@ -1417,8 +1417,8 @@ "ExpectedInstructionCount": 13, "Comment": "GROUP2 0xd0 /0", "ExpectedArm64ASM": [ - "uxtb w20, w4", - "bfi w20, w20, #8, #8", + "mov w20, w4", + "bfi w20, w4, #8, #8", "bfi w20, w20, #16, #16", "ror w20, w20, #31", "bfxil x4, x20, #0, #8", @@ -1436,8 +1436,8 @@ "ExpectedInstructionCount": 12, "Comment": "GROUP2 0xd0 /1", "ExpectedArm64ASM": [ - "uxtb w20, w4", - "bfi w20, w20, #8, #8", + "mov w20, w4", + "bfi w20, w4, #8, #8", "ror w20, w20, #1", "bfxil x4, x20, #0, #8", "mrs x21, nzcv", @@ -1546,8 +1546,8 @@ "ExpectedInstructionCount": 12, "Comment": "GROUP2 0xd1 /0", "ExpectedArm64ASM": [ - "uxth w20, w4", - "bfi w20, w20, #16, #16", + "mov w20, w4", + "bfi w20, w4, #16, #16", "ror w20, w20, #31", "bfxil x4, x20, #0, #16", "mrs x21, nzcv", @@ -1561,11 +1561,10 @@ ] }, "rol eax, 1": { - "ExpectedInstructionCount": 10, + "ExpectedInstructionCount": 9, "Comment": "GROUP2 0xd1 /0", "ExpectedArm64ASM": [ - "mov w20, w4", - "ror w4, w20, #31", + "ror w4, w4, #31", "mrs x20, nzcv", "and w20, w20, #0xc0000000", "ubfx x21, x4, #0, #1", @@ -1595,8 +1594,8 @@ "ExpectedInstructionCount": 12, "Comment": "GROUP2 0xd1 /1", "ExpectedArm64ASM": [ - "uxth w20, w4", - "bfi w20, w20, #16, #16", + "mov w20, w4", + "bfi w20, w4, #16, #16", "ror w20, w20, #1", "bfxil x4, x20, #0, #16", "mrs x21, nzcv", @@ -1610,11 +1609,10 @@ ] }, "ror eax, 1": { - "ExpectedInstructionCount": 10, + "ExpectedInstructionCount": 9, "Comment": "GROUP2 0xd1 /1", "ExpectedArm64ASM": [ - "mov w20, w4", - "ror w4, w20, #1", + "ror w4, w4, #1", "mrs x20, nzcv", "and w20, w20, #0xc0000000", "ubfx x21, x4, #31, #1", @@ -1904,19 +1902,17 @@ ] }, "rol al, cl": { - "ExpectedInstructionCount": 18, + "ExpectedInstructionCount": 16, "Comment": "GROUP2 0xd2 /0", "ExpectedArm64ASM": [ - "uxtb w20, w4", - "uxtb w21, w5", - "and w21, w21, #0x1f", - "bfi w20, w20, #8, #8", + "and x20, x5, #0x1f", + "cbz x20, #+0x3c", + "mov w20, w4", + "bfi w20, w4, #8, #8", "bfi w20, w20, #16, #16", - "mov w22, #0x20", - "sub w22, w22, w21", - "ror w20, w20, w22", + "neg w21, w5", + "ror w20, w20, w21", "bfxil x4, x20, #0, #8", - "cbz x21, #+0x24", "mrs x21, nzcv", "and w21, w21, #0xc0000000", "ubfx x22, x20, #0, #1", @@ -1928,17 +1924,16 @@ ] }, "ror al, cl": { - "ExpectedInstructionCount": 16, + "ExpectedInstructionCount": 15, "Comment": "GROUP2 0xd2 /1", "ExpectedArm64ASM": [ - "uxtb w20, w4", - "uxtb w21, w5", - "and w21, w21, #0x1f", - "bfi w20, w20, #8, #8", + "and x20, x5, #0x1f", + "cbz x20, #+0x38", + "mov w20, w4", + "bfi w20, w4, #8, #8", "bfi w20, w20, #16, #16", - "ror w20, w20, w21", + "ror w20, w20, w5", "bfxil x4, x20, #0, #8", - "cbz x21, #+0x24", "mrs x21, nzcv", "and w21, w21, #0xc0000000", "ubfx x22, x20, #7, #1", @@ -2087,18 +2082,16 @@ ] }, "rol ax, cl": { - "ExpectedInstructionCount": 17, + "ExpectedInstructionCount": 15, "Comment": "GROUP2 0xd3 /0", "ExpectedArm64ASM": [ - "uxth w20, w4", - "uxth w21, w5", - "and w21, w21, #0x1f", - "bfi w20, w20, #16, #16", - "mov w22, #0x20", - "sub w22, w22, w21", - "ror w20, w20, w22", + "and x20, x5, #0x1f", + "cbz x20, #+0x38", + "mov w20, w4", + "bfi w20, w4, #16, #16", + "neg w21, w5", + "ror w20, w20, w21", "bfxil x4, x20, #0, #16", - "cbz x21, #+0x24", "mrs x21, nzcv", "and w21, w21, #0xc0000000", "ubfx x22, x20, #0, #1", @@ -2110,58 +2103,51 @@ ] }, "rol eax, cl": { - "ExpectedInstructionCount": 16, + "ExpectedInstructionCount": 12, "Comment": "GROUP2 0xd3 /0", "ExpectedArm64ASM": [ - "mov w20, w4", - "mov w21, w5", - "and w21, w21, #0x1f", - "mov w22, #0x20", - "sub w22, w22, w21", - "ror w20, w20, w22", - "mov x4, x20", - "cbz x21, #+0x24", - "mrs x21, nzcv", - "and w21, w21, #0xc0000000", - "ubfx x22, x20, #0, #1", - "orr w21, w21, w22, lsl #29", - "eor w20, w20, w20, lsr #31", - "ubfx x20, x20, #0, #1", - "orr w20, w21, w20, lsl #28", + "and x20, x5, #0x1f", + "cbz x20, #+0x2c", + "neg w20, w5", + "ror w4, w4, w20", + "mrs x20, nzcv", + "and w20, w20, #0xc0000000", + "ubfx x21, x4, #0, #1", + "orr w20, w20, w21, lsl #29", + "eor w21, w4, w4, lsr #31", + "ubfx x21, x21, #0, #1", + "orr w20, w20, w21, lsl #28", "msr nzcv, x20" ] }, "rol rax, cl": { - "ExpectedInstructionCount": 14, + "ExpectedInstructionCount": 12, "Comment": "GROUP2 0xd3 /0", "ExpectedArm64ASM": [ "and x20, x5, #0x3f", - "mov w21, #0x40", - "sub x21, x21, x20", - "ror x21, x4, x21", - "mov x4, x21", - "cbz x20, #+0x24", + "cbz x20, #+0x2c", + "neg x20, x5", + "ror x4, x4, x20", "mrs x20, nzcv", "and w20, w20, #0xc0000000", - "ubfx x22, x21, #0, #1", - "orr w20, w20, w22, lsl #29", - "eor x21, x21, x21, lsr #63", + "ubfx x21, x4, #0, #1", + "orr w20, w20, w21, lsl #29", + "eor x21, x4, x4, lsr #63", "ubfx x21, x21, #0, #1", "orr w20, w20, w21, lsl #28", "msr nzcv, x20" ] }, "ror ax, cl": { - "ExpectedInstructionCount": 15, + "ExpectedInstructionCount": 14, "Comment": "GROUP2 0xd3 /1", "ExpectedArm64ASM": [ - "uxth w20, w4", - "uxth w21, w5", - "and w21, w21, #0x1f", - "bfi w20, w20, #16, #16", - "ror w20, w20, w21", + "and x20, x5, #0x1f", + "cbz x20, #+0x34", + "mov w20, w4", + "bfi w20, w4, #16, #16", + "ror w20, w20, w5", "bfxil x4, x20, #0, #16", - "cbz x21, #+0x24", "mrs x21, nzcv", "and w21, w21, #0xc0000000", "ubfx x22, x20, #15, #1", @@ -2173,38 +2159,34 @@ ] }, "ror eax, cl": { - "ExpectedInstructionCount": 14, + "ExpectedInstructionCount": 11, "Comment": "GROUP2 0xd3 /1", "ExpectedArm64ASM": [ - "mov w20, w4", - "mov w21, w5", - "and w21, w21, #0x1f", - "ror w20, w20, w21", - "mov x4, x20", - "cbz x21, #+0x24", - "mrs x21, nzcv", - "and w21, w21, #0xc0000000", - "ubfx x22, x20, #31, #1", - "orr w21, w21, w22, lsl #29", - "eor w20, w20, w20, lsr #1", - "ubfx x20, x20, #30, #1", - "orr w20, w21, w20, lsl #28", + "and x20, x5, #0x1f", + "cbz x20, #+0x28", + "ror w4, w4, w5", + "mrs x20, nzcv", + "and w20, w20, #0xc0000000", + "ubfx x21, x4, #31, #1", + "orr w20, w20, w21, lsl #29", + "eor w21, w4, w4, lsr #1", + "ubfx x21, x21, #30, #1", + "orr w20, w20, w21, lsl #28", "msr nzcv, x20" ] }, "ror rax, cl": { - "ExpectedInstructionCount": 12, + "ExpectedInstructionCount": 11, "Comment": "GROUP2 0xd3 /1", "ExpectedArm64ASM": [ "and x20, x5, #0x3f", - "ror x21, x4, x20", - "mov x4, x21", - "cbz x20, #+0x24", + "cbz x20, #+0x28", + "ror x4, x4, x5", "mrs x20, nzcv", "and w20, w20, #0xc0000000", - "lsr x22, x21, #63", - "orr w20, w20, w22, lsl #29", - "eor x21, x21, x21, lsr #1", + "lsr x21, x4, #63", + "orr w20, w20, w21, lsl #29", + "eor x21, x4, x4, lsr #1", "ubfx x21, x21, #62, #1", "orr w20, w20, w21, lsl #28", "msr nzcv, x20"