Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

rewrite ROL/ROR #3539

Merged
merged 4 commits into from
Apr 2, 2024
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
220 changes: 63 additions & 157 deletions FEXCore/Source/Interface/Core/OpcodeDispatcher.cpp
Original file line number Diff line number Diff line change
@@ -1891,165 +1891,71 @@ void OpDispatchBuilder::ASHRImmediateOp(OpcodeArgs) {
GenerateFlags_SignShiftRightImmediate(Op, Result, Dest, Shift);
}

template<bool Is1Bit>
void OpDispatchBuilder::ROROp(OpcodeArgs) {
OrderedNode *Src;
OrderedNode *Dest = LoadSource(GPRClass, Op, Op->Dest, Op->Flags);

const uint32_t Size = GetSrcBitSize(Op);
if constexpr (Is1Bit) {
Src = _Constant(std::max(32U, Size), 1);
} else {
Src = LoadSource(GPRClass, Op, Op->Src[1], Op->Flags);
}

// x86 masks the shift by 0x3F or 0x1F depending on size of op
if (Size == 64) {
Src = _And(OpSize::i64Bit, Src, _Constant(Size, 0x3F));
} else {
Src = _And(OpSize::i32Bit, Src, _Constant(Size, 0x1F));
}

if (Size < 32) {
// ARM doesn't support 8/16bit rotates. Emulate with an insert
// StoreResult truncates back to a 8/16 bit value
Dest = _Bfi(OpSize::i32Bit, Size, Size, Dest, Dest);
if (Size == 8 && !Is1Bit) {
// And because the shift size isn't masked to 8 bits, we need to fill the
// the full 32bits to get the correct result.
Dest = _Bfi(OpSize::i32Bit, 16, 16, Dest, Dest);
}
}

auto ALUOp = _Ror(Size == 64 ? OpSize::i64Bit : OpSize::i32Bit, Dest, Src);

StoreResult(GPRClass, Op, ALUOp, -1);

if constexpr (Is1Bit) {
GenerateFlags_RotateRightImmediate(Op, ALUOp, Dest, 1);
} else {
GenerateFlags_RotateRight(Op, ALUOp, Dest, Src);
}
}

void OpDispatchBuilder::RORImmediateOp(OpcodeArgs) {
// See ROLImmediateOp for masking explanation
OrderedNode *Dest = LoadSource(GPRClass, Op, Op->Dest, Op->Flags, {.AllowUpperGarbage = true});

LOGMAN_THROW_A_FMT(Op->Src[1].IsLiteral(), "Src1 needs to be literal here");

uint64_t Shift = Op->Src[1].Data.Literal.Value;
const uint32_t Size = GetSrcBitSize(Op);

// x86 masks the shift by 0x3F or 0x1F depending on size of op
if (Size == 64) {
Shift &= 0x3F;
} else {
Shift &= 0x1F;
}
template<bool Left, bool IsImmediate, bool Is1Bit>
void OpDispatchBuilder::RotateOp(OpcodeArgs) {
CalculateDeferredFlags();

OrderedNode *Src = _Constant(std::max(32U, Size), Shift);
auto LoadShift = [this, Op](bool MustMask) -> OrderedNode * {
// x86 masks the shift by 0x3F or 0x1F depending on size of op
const uint32_t Size = GetSrcBitSize(Op);
uint64_t Mask = Size == 64 ? 0x3F : 0x1F;

if (Size < 32) {
// ARM doesn't support 8/16bit rotates. Emulate with an insert
// StoreResult truncates back to a 8/16 bit value
Dest = _Bfi(OpSize::i32Bit, Size, Size, Dest, Dest);
if (Size == 8 && Shift > 8) {
// And because the shift size isn't masked to 8 bits, we need to fill the
// the full 32bits to get the correct result.
Dest = _Bfi(OpSize::i32Bit, 16, 16, Dest, Dest);
if (Is1Bit) {
return _Constant(1);
} else if (IsImmediate) {
LOGMAN_THROW_A_FMT(Op->Src[1].IsLiteral(), "Src1 needs to be literal here");
return _Constant(Op->Src[1].Data.Literal.Value & Mask);
} else {
auto Src = LoadSource(GPRClass, Op, Op->Src[1], Op->Flags, {.AllowUpperGarbage = true});
return MustMask ? _And(OpSize::i64Bit, Src, _Constant(Mask)) : Src;
}
}

auto ALUOp = _Ror(Size == 64 ? OpSize::i64Bit : OpSize::i32Bit, Dest, Src);

StoreResult(GPRClass, Op, ALUOp, -1);

GenerateFlags_RotateRightImmediate(Op, ALUOp, Dest, Shift);
}
};

template<bool Is1Bit>
void OpDispatchBuilder::ROLOp(OpcodeArgs) {
OrderedNode *Src;
OrderedNode *Dest = LoadSource(GPRClass, Op, Op->Dest, Op->Flags);
Calculate_ShiftVariable(LoadShift(true), [this, LoadShift, Op](){
const uint32_t Size = GetSrcBitSize(Op);
const auto OpSize = Size == 64 ? OpSize::i64Bit : OpSize::i32Bit;

const uint32_t Size = GetSrcBitSize(Op);
// We don't need to mask when we rematerialize since the Ror aborbs.
auto Src = LoadShift(false);

// Need to negate the shift so we can use ROR instead
if constexpr (Is1Bit) {
Src = _Constant(Size, 1);
} else {
Src = LoadSource(GPRClass, Op, Op->Src[1], Op->Flags);
}
uint64_t Const;
bool IsConst = IsValueConstant(WrapNode(Src), &Const);

// x86 masks the shift by 0x3F or 0x1F depending on size of op
if (Size == 64) {
Src = _And(OpSize::i64Bit, Src, _Constant(Size, 0x3F));
} else {
Src = _And(OpSize::i32Bit, Src, _Constant(Size, 0x1F));
}
// We fill the upper bits so we allow garbage on load.
auto Dest = LoadSource(GPRClass, Op, Op->Dest, Op->Flags, {.AllowUpperGarbage = true});

if (Size < 32) {
// ARM doesn't support 8/16bit rotates. Emulate with an insert
// StoreResult truncates back to a 8/16 bit value
Dest = _Bfi(OpSize::i32Bit, Size, Size, Dest, Dest);
if (Size == 8) {
// And because the shift size isn't masked to 8 bits, we need to fill the
// the full 32bits to get the correct result.
Dest = _Bfi(OpSize::i32Bit, 16, 16, Dest, Dest);
if (Size < 32) {
// ARM doesn't support 8/16bit rotates. Emulate with an insert
// StoreResult truncates back to a 8/16 bit value
Dest = _Bfi(OpSize, Size, Size, Dest, Dest);

if (Size == 8 && !(IsConst && Const < 8 && !Left)) {
// And because the shift size isn't masked to 8 bits, we need to fill the
// the full 32bits to get the correct result.
Dest = _Bfi(OpSize, 16, 16, Dest, Dest);
}
}
}

auto ALUOp = _Ror(Size == 64 ? OpSize::i64Bit : OpSize::i32Bit,
Dest,
_Sub(Size == 64 ? OpSize::i64Bit : OpSize::i32Bit, _Constant(Size, std::max(32U, Size)), Src));

StoreResult(GPRClass, Op, ALUOp, -1);

if constexpr (Is1Bit) {
GenerateFlags_RotateLeftImmediate(Op, ALUOp, Dest, 1);
} else {
GenerateFlags_RotateLeft(Op, ALUOp, Dest, Src);
}
}

void OpDispatchBuilder::ROLImmediateOp(OpcodeArgs) {
// For 32-bit, garbage is ignored in hardware. For < 32, see Bfi comment.
OrderedNode *Dest = LoadSource(GPRClass, Op, Op->Dest, Op->Flags, {.AllowUpperGarbage = true});
// To rotate 64-bits left, right-rotate by (64 - Shift) = -Shift mod 64.
auto Res = _Ror(OpSize, Dest, Left ? _Neg(OpSize, Src) : Src);
StoreResult(GPRClass, Op, Res, -1);

LOGMAN_THROW_A_FMT(Op->Src[1].IsLiteral(), "Src1 needs to be literal here");
// Ends up faster overall if we don't have FlagM, slower if we do...
// If Shift != 1, OF is undefined so we choose to zero here.
if (!CTX->HostFeatures.SupportsFlagM)
ZeroCV();

uint64_t Shift = Op->Src[1].Data.Literal.Value;
const uint32_t Size = GetSrcBitSize(Op);
// Extract the last bit shifted in to CF
SetRFLAG<FEXCore::X86State::RFLAG_CF_RAW_LOC>(Res, Left ? 0 : Size - 1, true);

// x86 masks the shift by 0x3F or 0x1F depending on size of op
if (Size == 64) {
Shift &= 0x3F;
} else {
Shift &= 0x1F;
}

// We also negate the shift so we can emulate Rol with Ror.
const auto NegatedShift = std::max(32U, Size) - Shift;
OrderedNode *Src = _Constant(Size, NegatedShift);

if (Size < 32) {
// ARM doesn't support 8/16bit rotates. Emulate with an insert
// StoreResult truncates back to a 8/16 bit value. The inserts have the side
// effect of stomping over any garbage we had in the upper bits.
Dest = _Bfi(OpSize::i32Bit, Size, Size, Dest, Dest);
if (Size == 8) {
// And because the shift size isn't masked to 8 bits, we need to fill the
// the full 32bits to get the correct result.
Dest = _Bfi(OpSize::i32Bit, 16, 16, Dest, Dest);
// For ROR, OF is the XOR of the new CF bit and the most significant bit of the result.
// For ROL, OF is the LSB and MSB XOR'd together.
// OF is architecturally only defined for 1-bit rotate.
if (!IsConst || Const == 1) {
auto NewOF = _XorShift(OpSize, Res, Res, ShiftType::LSR, Left ? Size - 1 : 1);
SetRFLAG<FEXCore::X86State::RFLAG_OF_RAW_LOC>(NewOF, Left ? 0 : Size - 2, true);
}
}

auto ALUOp = _Ror(Size == 64 ? OpSize::i64Bit : OpSize::i32Bit, Dest, Src);

StoreResult(GPRClass, Op, ALUOp, -1);

GenerateFlags_RotateLeftImmediate(Op, ALUOp, Dest, Shift);
});
}

void OpDispatchBuilder::ANDNBMIOp(OpcodeArgs) {
@@ -6144,53 +6050,53 @@ void InstallOpcodeHandlers(Context::OperatingMode Mode) {
{OPD(FEXCore::X86Tables::TYPE_GROUP_1, OpToIndex(0x83), 7), 1, &OpDispatchBuilder::CMPOp<1>},

// GROUP 2
{OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xC0), 0), 1, &OpDispatchBuilder::ROLImmediateOp},
{OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xC0), 1), 1, &OpDispatchBuilder::RORImmediateOp},
{OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xC0), 0), 1, &OpDispatchBuilder::RotateOp<true, true, false>},
{OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xC0), 1), 1, &OpDispatchBuilder::RotateOp<false, true, false>},
{OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xC0), 2), 1, &OpDispatchBuilder::RCLOp},
{OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xC0), 3), 1, &OpDispatchBuilder::RCROp},
{OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xC0), 4), 1, &OpDispatchBuilder::SHLImmediateOp},
{OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xC0), 5), 1, &OpDispatchBuilder::SHRImmediateOp},
{OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xC0), 6), 1, &OpDispatchBuilder::SHLImmediateOp}, // SAL
{OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xC0), 7), 1, &OpDispatchBuilder::ASHRImmediateOp}, // SAR

{OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xC1), 0), 1, &OpDispatchBuilder::ROLImmediateOp},
{OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xC1), 1), 1, &OpDispatchBuilder::RORImmediateOp},
{OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xC1), 0), 1, &OpDispatchBuilder::RotateOp<true, true, false>},
{OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xC1), 1), 1, &OpDispatchBuilder::RotateOp<false, true, false>},
{OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xC1), 2), 1, &OpDispatchBuilder::RCLOp},
{OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xC1), 3), 1, &OpDispatchBuilder::RCROp},
{OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xC1), 4), 1, &OpDispatchBuilder::SHLImmediateOp},
{OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xC1), 5), 1, &OpDispatchBuilder::SHRImmediateOp},
{OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xC1), 6), 1, &OpDispatchBuilder::SHLImmediateOp}, // SAL
{OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xC1), 7), 1, &OpDispatchBuilder::ASHRImmediateOp}, // SAR

{OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xD0), 0), 1, &OpDispatchBuilder::ROLOp<true>},
{OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xD0), 1), 1, &OpDispatchBuilder::ROROp<true>},
{OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xD0), 0), 1, &OpDispatchBuilder::RotateOp<true, true, true>},
{OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xD0), 1), 1, &OpDispatchBuilder::RotateOp<false, true, true>},
{OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xD0), 2), 1, &OpDispatchBuilder::RCLOp1Bit},
{OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xD0), 3), 1, &OpDispatchBuilder::RCROp8x1Bit},
{OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xD0), 4), 1, &OpDispatchBuilder::SHLOp<true>},
{OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xD0), 5), 1, &OpDispatchBuilder::SHROp<true>}, // 1Bit SHR
{OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xD0), 6), 1, &OpDispatchBuilder::SHLOp<true>}, // SAL
{OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xD0), 7), 1, &OpDispatchBuilder::ASHROp<true>}, // SAR

{OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xD1), 0), 1, &OpDispatchBuilder::ROLOp<true>},
{OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xD1), 1), 1, &OpDispatchBuilder::ROROp<true>},
{OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xD1), 0), 1, &OpDispatchBuilder::RotateOp<true, true, true>},
{OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xD1), 1), 1, &OpDispatchBuilder::RotateOp<false, true, true>},
{OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xD1), 2), 1, &OpDispatchBuilder::RCLOp1Bit},
{OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xD1), 3), 1, &OpDispatchBuilder::RCROp1Bit},
{OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xD1), 4), 1, &OpDispatchBuilder::SHLOp<true>},
{OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xD1), 5), 1, &OpDispatchBuilder::SHROp<true>}, // 1Bit SHR
{OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xD1), 6), 1, &OpDispatchBuilder::SHLOp<true>}, // SAL
{OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xD1), 7), 1, &OpDispatchBuilder::ASHROp<true>}, // SAR

{OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xD2), 0), 1, &OpDispatchBuilder::ROLOp<false>},
{OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xD2), 1), 1, &OpDispatchBuilder::ROROp<false>},
{OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xD2), 0), 1, &OpDispatchBuilder::RotateOp<true, false, false>},
{OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xD2), 1), 1, &OpDispatchBuilder::RotateOp<false, false, false>},
{OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xD2), 2), 1, &OpDispatchBuilder::RCLSmallerOp},
{OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xD2), 3), 1, &OpDispatchBuilder::RCRSmallerOp},
{OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xD2), 4), 1, &OpDispatchBuilder::SHLOp<false>},
{OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xD2), 5), 1, &OpDispatchBuilder::SHROp<false>}, // SHR by CL
{OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xD2), 6), 1, &OpDispatchBuilder::SHLOp<false>}, // SAL
{OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xD2), 7), 1, &OpDispatchBuilder::ASHROp<false>}, // SAR

{OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xD3), 0), 1, &OpDispatchBuilder::ROLOp<false>},
{OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xD3), 1), 1, &OpDispatchBuilder::ROROp<false>},
{OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xD3), 0), 1, &OpDispatchBuilder::RotateOp<true, false, false>},
{OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xD3), 1), 1, &OpDispatchBuilder::RotateOp<false, false, false>},
{OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xD3), 2), 1, &OpDispatchBuilder::RCLOp},
{OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xD3), 3), 1, &OpDispatchBuilder::RCROp},
{OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xD3), 4), 1, &OpDispatchBuilder::SHLOp<false>},
108 changes: 14 additions & 94 deletions FEXCore/Source/Interface/Core/OpcodeDispatcher.h
Original file line number Diff line number Diff line change
@@ -89,10 +89,6 @@ friend class FEXCore::IR::PassManager;
TYPE_LSHRDI,
TYPE_ASHR,
TYPE_ASHRI,
TYPE_ROR,
TYPE_RORI,
TYPE_ROL,
TYPE_ROLI,
TYPE_BEXTR,
TYPE_BLSI,
TYPE_BLSMSK,
@@ -321,12 +317,8 @@ friend class FEXCore::IR::PassManager;
template<bool SHR1Bit>
void ASHROp(OpcodeArgs);
void ASHRImmediateOp(OpcodeArgs);
template<bool Is1Bit>
void ROROp(OpcodeArgs);
void RORImmediateOp(OpcodeArgs);
template<bool Is1Bit>
void ROLOp(OpcodeArgs);
void ROLImmediateOp(OpcodeArgs);
template<bool Left, bool IsImmediate, bool Is1Bit>
void RotateOp(OpcodeArgs);
void RCROp1Bit(OpcodeArgs);
void RCROp8x1Bit(OpcodeArgs);
void RCROp(OpcodeArgs);
@@ -1670,13 +1662,13 @@ friend class FEXCore::IR::PassManager;
OrderedNode *Src1;
} OneSource;

// Logical, LSHL, LSHR, ASHR, ROR, ROL
// Logical, LSHL, LSHR, ASHR
struct {
OrderedNode *Src1;
OrderedNode *Src2;
} TwoSource;

// LSHLI, LSHRI, ASHRI, RORI, ROLI
// LSHLI, LSHRI, ASHRI
struct {
OrderedNode *Src1;
uint64_t Imm;
@@ -1725,15 +1717,12 @@ friend class FEXCore::IR::PassManager;
}

template <typename F>
void CalculateFlags_ShiftVariable(OrderedNode *Shift, F&& CalculateFlags) {
// We are the ones calculating the deferred flags. Don't recurse!
InvalidateDeferredFlags();

void Calculate_ShiftVariable(OrderedNode *Shift, F&& Calculate) {
// RCR can call this with constants, so handle that without branching.
uint64_t Const;
if (IsValueConstant(WrapNode(Shift), &Const)) {
if (Const)
CalculateFlags();
Calculate();

return;
}
@@ -1750,7 +1739,7 @@ friend class FEXCore::IR::PassManager;
SetCurrentCodeBlock(SetBlock);
StartNewBlock();
{
CalculateFlags();
Calculate();
Jump(EndBlock);
}

@@ -1759,6 +1748,13 @@ friend class FEXCore::IR::PassManager;
PossiblySetNZCVBits |= OldSetNZCVBits;
}

template <typename F>
void CalculateFlags_ShiftVariable(OrderedNode *Shift, F&& CalculateFlags) {
// We are the ones calculating the deferred flags. Don't recurse!
InvalidateDeferredFlags();
Calculate_ShiftVariable(Shift, CalculateFlags);
}

/**
* @name These functions are used by the deferred flag handling while it is calculating and storing flags in to RFLAGs.
* @{ */
@@ -1786,10 +1782,6 @@ friend class FEXCore::IR::PassManager;
void CalculateFlags_ShiftRightImmediateCommon(uint8_t SrcSize, OrderedNode *Res, OrderedNode *Src1, uint64_t Shift);
void CalculateFlags_SignShiftRight(uint8_t SrcSize, OrderedNode *Res, OrderedNode *Src1, OrderedNode *Src2);
void CalculateFlags_SignShiftRightImmediate(uint8_t SrcSize, OrderedNode *Res, OrderedNode *Src1, uint64_t Shift);
void CalculateFlags_RotateRight(uint8_t SrcSize, OrderedNode *Res, OrderedNode *Src1, OrderedNode *Src2);
void CalculateFlags_RotateLeft(uint8_t SrcSize, OrderedNode *Res, OrderedNode *Src1, OrderedNode *Src2);
void CalculateFlags_RotateRightImmediate(uint8_t SrcSize, OrderedNode *Res, OrderedNode *Src1, uint64_t Shift);
void CalculateFlags_RotateLeftImmediate(uint8_t SrcSize, OrderedNode *Res, OrderedNode *Src1, uint64_t Shift);
void CalculateFlags_BEXTR(OrderedNode *Src);
void CalculateFlags_BLSI(uint8_t SrcSize, OrderedNode *Src);
void CalculateFlags_BLSMSK(uint8_t SrcSize, OrderedNode *Res, OrderedNode *Src);
@@ -1977,78 +1969,6 @@ friend class FEXCore::IR::PassManager;
};
}

void GenerateFlags_RotateRight(FEXCore::X86Tables::DecodedOp Op, OrderedNode *Res, OrderedNode *Src1, OrderedNode *Src2) {
// Doesn't set all the flags, needs to calculate.
CalculateDeferredFlags();

CurrentDeferredFlags = DeferredFlagData {
.Type = FlagsGenerationType::TYPE_ROR,
.SrcSize = GetSrcSize(Op),
.Res = Res,
.Sources = {
.TwoSource = {
.Src1 = Src1,
.Src2 = Src2,
},
},
};
}

void GenerateFlags_RotateLeft(FEXCore::X86Tables::DecodedOp Op, OrderedNode *Res, OrderedNode *Src1, OrderedNode *Src2) {
// Doesn't set all the flags, needs to calculate.
CalculateDeferredFlags();

CurrentDeferredFlags = DeferredFlagData {
.Type = FlagsGenerationType::TYPE_ROL,
.SrcSize = GetSrcSize(Op),
.Res = Res,
.Sources = {
.TwoSource = {
.Src1 = Src1,
.Src2 = Src2,
},
},
};
}

void GenerateFlags_RotateRightImmediate(FEXCore::X86Tables::DecodedOp Op, OrderedNode *Res, OrderedNode *Src1, uint64_t Shift) {
if (Shift == 0) return;

// Doesn't set all the flags, needs to calculate.
CalculateDeferredFlags();

CurrentDeferredFlags = DeferredFlagData {
.Type = FlagsGenerationType::TYPE_RORI,
.SrcSize = GetSrcSize(Op),
.Res = Res,
.Sources = {
.OneSrcImmediate = {
.Src1 = Src1,
.Imm = Shift,
},
},
};
}

void GenerateFlags_RotateLeftImmediate(FEXCore::X86Tables::DecodedOp Op, OrderedNode *Res, OrderedNode *Src1, uint64_t Shift) {
if (Shift == 0) return;

// Doesn't set all the flags, needs to calculate.
CalculateDeferredFlags();

CurrentDeferredFlags = DeferredFlagData {
.Type = FlagsGenerationType::TYPE_ROLI,
.SrcSize = GetSrcSize(Op),
.Res = Res,
.Sources = {
.OneSrcImmediate = {
.Src1 = Src1,
.Imm = Shift,
},
}
};
}

void GenerateFlags_BEXTR(FEXCore::X86Tables::DecodedOp Op, OrderedNode *Src) {
CurrentDeferredFlags = DeferredFlagData {
.Type = FlagsGenerationType::TYPE_BEXTR,
129 changes: 0 additions & 129 deletions FEXCore/Source/Interface/Core/OpcodeDispatcher/Flags.cpp
Original file line number Diff line number Diff line change
@@ -401,34 +401,6 @@ void OpDispatchBuilder::CalculateDeferredFlags(uint32_t FlagsToCalculateMask) {
CurrentDeferredFlags.Sources.OneSrcImmediate.Src1,
CurrentDeferredFlags.Sources.OneSrcImmediate.Imm);
break;
case FlagsGenerationType::TYPE_ROR:
CalculateFlags_RotateRight(
CurrentDeferredFlags.SrcSize,
CurrentDeferredFlags.Res,
CurrentDeferredFlags.Sources.TwoSource.Src1,
CurrentDeferredFlags.Sources.TwoSource.Src2);
break;
case FlagsGenerationType::TYPE_RORI:
CalculateFlags_RotateRightImmediate(
CurrentDeferredFlags.SrcSize,
CurrentDeferredFlags.Res,
CurrentDeferredFlags.Sources.OneSrcImmediate.Src1,
CurrentDeferredFlags.Sources.OneSrcImmediate.Imm);
break;
case FlagsGenerationType::TYPE_ROL:
CalculateFlags_RotateLeft(
CurrentDeferredFlags.SrcSize,
CurrentDeferredFlags.Res,
CurrentDeferredFlags.Sources.TwoSource.Src1,
CurrentDeferredFlags.Sources.TwoSource.Src2);
break;
case FlagsGenerationType::TYPE_ROLI:
CalculateFlags_RotateLeftImmediate(
CurrentDeferredFlags.SrcSize,
CurrentDeferredFlags.Res,
CurrentDeferredFlags.Sources.OneSrcImmediate.Src1,
CurrentDeferredFlags.Sources.OneSrcImmediate.Imm);
break;
case FlagsGenerationType::TYPE_BEXTR:
CalculateFlags_BEXTR(CurrentDeferredFlags.Res);
break;
@@ -835,107 +807,6 @@ void OpDispatchBuilder::CalculateFlags_ShiftRightDoubleImmediate(uint8_t SrcSize
}
}

void OpDispatchBuilder::CalculateFlags_RotateRight(uint8_t SrcSize, OrderedNode *Res, OrderedNode *Src1, OrderedNode *Src2) {
CalculateFlags_ShiftVariable(Src2, [this, SrcSize, Res](){
auto SizeBits = SrcSize * 8;
const auto OpSize = SrcSize == 8 ? OpSize::i64Bit : OpSize::i32Bit;

// Ends up faster overall if we don't have FlagM, slower if we do...
// If Shift != 1, OF is undefined so we choose to zero here.
if (!CTX->HostFeatures.SupportsFlagM)
ZeroCV();

// Extract the last bit shifted in to CF
SetRFLAG<FEXCore::X86State::RFLAG_CF_RAW_LOC>(Res, SizeBits - 1, true);

// OF is set to the XOR of the new CF bit and the most significant bit of the result
// OF is architecturally only defined for 1-bit rotate, which is why this only happens when the shift is one.
auto NewOF = _XorShift(OpSize, Res, Res, ShiftType::LSR, 1);
SetRFLAG<FEXCore::X86State::RFLAG_OF_RAW_LOC>(NewOF, SizeBits - 2, true);
});
}

void OpDispatchBuilder::CalculateFlags_RotateLeft(uint8_t SrcSize, OrderedNode *Res, OrderedNode *Src1, OrderedNode *Src2) {
CalculateFlags_ShiftVariable(Src2, [this, SrcSize, Res](){
const auto OpSize = SrcSize == 8 ? OpSize::i64Bit : OpSize::i32Bit;
auto SizeBits = SrcSize * 8;

// Ends up faster overall if we don't have FlagM, slower if we do...
// If Shift != 1, OF is undefined so we choose to zero here.
if (!CTX->HostFeatures.SupportsFlagM)
ZeroCV();

// Extract the last bit shifted in to CF
//auto Size = _Constant(GetSrcSize(Res) * 8);
//auto ShiftAmt = _Sub(OpSize::i64Bit, Size, Src2);
SetRFLAG<FEXCore::X86State::RFLAG_CF_RAW_LOC>(Res, 0, true);

// OF is the LSB and MSB XOR'd together.
// OF is set to the XOR of the new CF bit and the most significant bit of the result.
// OF is architecturally only defined for 1-bit rotate, which is why this only happens when the shift is one.
auto NewOF = _XorShift(OpSize, Res, Res, ShiftType::LSR, SizeBits - 1);
SetRFLAG<FEXCore::X86State::RFLAG_OF_RAW_LOC>(NewOF, 0, true);
});
}

void OpDispatchBuilder::CalculateFlags_RotateRightImmediate(uint8_t SrcSize, OrderedNode *Res, OrderedNode *Src1, uint64_t Shift) {
if (Shift == 0) return;

const auto OpSize = SrcSize == 8 ? OpSize::i64Bit : OpSize::i32Bit;
auto SizeBits = SrcSize * 8;

// Ends up faster overall if we don't have FlagM, slower if we do...
// If Shift != 1, OF is undefined so we choose to zero here.
if (!CTX->HostFeatures.SupportsFlagM)
ZeroCV();

// CF
{
// Extract the last bit shifted in to CF
SetRFLAG<FEXCore::X86State::RFLAG_CF_RAW_LOC>(Res, SizeBits - 1, true);
}

// OF
{
if (Shift == 1) {
// OF is the top two MSBs XOR'd together
// OF is architecturally only defined for 1-bit rotate, which is why this only happens when the shift is one.
auto NewOF = _XorShift(OpSize, Res, Res, ShiftType::LSR, 1);
SetRFLAG<FEXCore::X86State::RFLAG_OF_RAW_LOC>(NewOF, SizeBits - 2, 1);
}
}
}

void OpDispatchBuilder::CalculateFlags_RotateLeftImmediate(uint8_t SrcSize, OrderedNode *Res, OrderedNode *Src1, uint64_t Shift) {
if (Shift == 0) return;

const auto OpSize = SrcSize == 8 ? OpSize::i64Bit : OpSize::i32Bit;
auto SizeBits = SrcSize * 8;

// Ends up faster overall if we don't have FlagM, slower if we do...
// If Shift != 1, OF is undefined so we choose to zero here.
if (!CTX->HostFeatures.SupportsFlagM)
ZeroCV();

// CF
{
// Extract the last bit shifted in to CF
SetRFLAG<FEXCore::X86State::RFLAG_CF_RAW_LOC>(Res, 0, true);
}

// OF
{
if (Shift == 1) {
// OF is the LSB and MSB XOR'd together.
// OF is set to the XOR of the new CF bit and the most significant bit of the result.
// OF is architecturally only defined for 1-bit rotate, which is why this only happens when the shift is one.
auto NewOF = _XorShift(OpSize, Res, Res, ShiftType::LSR, SizeBits - 1);

SetRFLAG<FEXCore::X86State::RFLAG_OF_RAW_LOC>(NewOF, 0, true);
}
}
}

void OpDispatchBuilder::CalculateFlags_BEXTR(OrderedNode *Src) {
// ZF is set properly. CF and OF are defined as being set to zero. SF, PF, and
// AF are undefined.
11 changes: 11 additions & 0 deletions FEXCore/Source/Interface/IR/Passes/ConstProp.cpp
Original file line number Diff line number Diff line change
@@ -865,6 +865,17 @@ bool ConstProp::ConstantPropagation(IREmitter *IREmit, const IRListView& Current
}
break;
}
case OP_NEG: {
auto Op = IROp->CW<IR::IROp_Neg>();
uint64_t Constant{};

if (IREmit->IsValueConstant(Op->Header.Args[0], &Constant)) {
uint64_t NewConstant = -Constant;
IREmit->ReplaceWithConstant(CodeNode, NewConstant);
Changed = true;
}
break;
}
case OP_LSHL: {
auto Op = IROp->CW<IR::IROp_Lshl>();
uint64_t Constant1{};
142 changes: 62 additions & 80 deletions unittests/InstructionCountCI/FlagM/PrimaryGroup.json
Original file line number Diff line number Diff line change
@@ -1246,8 +1246,8 @@
"ExpectedInstructionCount": 8,
"Comment": "GROUP2 0xd0 /0",
"ExpectedArm64ASM": [
"uxtb w20, w4",
"bfi w20, w20, #8, #8",
"mov w20, w4",
"bfi w20, w4, #8, #8",
"bfi w20, w20, #16, #16",
"ror w20, w20, #31",
"bfxil x4, x20, #0, #8",
@@ -1260,8 +1260,8 @@
"ExpectedInstructionCount": 7,
"Comment": "GROUP2 0xd0 /1",
"ExpectedArm64ASM": [
"uxtb w20, w4",
"bfi w20, w20, #8, #8",
"mov w20, w4",
"bfi w20, w4, #8, #8",
"ror w20, w20, #1",
"bfxil x4, x20, #0, #8",
"rmif x20, #6, #nzCv",
@@ -1338,8 +1338,8 @@
"ExpectedInstructionCount": 7,
"Comment": "GROUP2 0xd1 /0",
"ExpectedArm64ASM": [
"uxth w20, w4",
"bfi w20, w20, #16, #16",
"mov w20, w4",
"bfi w20, w4, #16, #16",
"ror w20, w20, #31",
"bfxil x4, x20, #0, #16",
"rmif x20, #63, #nzCv",
@@ -1348,11 +1348,10 @@
]
},
"rol eax, 1": {
"ExpectedInstructionCount": 5,
"ExpectedInstructionCount": 4,
"Comment": "GROUP2 0xd1 /0",
"ExpectedArm64ASM": [
"mov w20, w4",
"ror w4, w20, #31",
"ror w4, w4, #31",
"rmif x4, #63, #nzCv",
"eor w20, w4, w4, lsr #31",
"rmif x20, #0, #nzcV"
@@ -1372,8 +1371,8 @@
"ExpectedInstructionCount": 7,
"Comment": "GROUP2 0xd1 /1",
"ExpectedArm64ASM": [
"uxth w20, w4",
"bfi w20, w20, #16, #16",
"mov w20, w4",
"bfi w20, w4, #16, #16",
"ror w20, w20, #1",
"bfxil x4, x20, #0, #16",
"rmif x20, #14, #nzCv",
@@ -1382,11 +1381,10 @@
]
},
"ror eax, 1": {
"ExpectedInstructionCount": 5,
"ExpectedInstructionCount": 4,
"Comment": "GROUP2 0xd1 /1",
"ExpectedArm64ASM": [
"mov w20, w4",
"ror w4, w20, #1",
"ror w4, w4, #1",
"rmif x4, #30, #nzCv",
"eor w20, w4, w4, lsr #1",
"rmif x20, #30, #nzcV"
@@ -1585,36 +1583,33 @@
]
},
"rol al, cl": {
"ExpectedInstructionCount": 13,
"ExpectedInstructionCount": 11,
"Comment": "GROUP2 0xd2 /0",
"ExpectedArm64ASM": [
"uxtb w20, w4",
"uxtb w21, w5",
"and w21, w21, #0x1f",
"bfi w20, w20, #8, #8",
"and x20, x5, #0x1f",
"cbz x20, #+0x28",
"mov w20, w4",
"bfi w20, w4, #8, #8",
"bfi w20, w20, #16, #16",
"mov w22, #0x20",
"sub w22, w22, w21",
"ror w20, w20, w22",
"neg w21, w5",
"ror w20, w20, w21",
"bfxil x4, x20, #0, #8",
"cbz x21, #+0x10",
"rmif x20, #63, #nzCv",
"eor w20, w20, w20, lsr #7",
"rmif x20, #0, #nzcV"
]
},
"ror al, cl": {
"ExpectedInstructionCount": 11,
"ExpectedInstructionCount": 10,
"Comment": "GROUP2 0xd2 /1",
"ExpectedArm64ASM": [
"uxtb w20, w4",
"uxtb w21, w5",
"and w21, w21, #0x1f",
"bfi w20, w20, #8, #8",
"and x20, x5, #0x1f",
"cbz x20, #+0x24",
"mov w20, w4",
"bfi w20, w4, #8, #8",
"bfi w20, w20, #16, #16",
"ror w20, w20, w21",
"ror w20, w20, w5",
"bfxil x4, x20, #0, #8",
"cbz x21, #+0x10",
"rmif x20, #6, #nzCv",
"eor w20, w20, w20, lsr #1",
"rmif x20, #6, #nzcV"
@@ -1733,96 +1728,83 @@
]
},
"rol ax, cl": {
"ExpectedInstructionCount": 12,
"ExpectedInstructionCount": 10,
"Comment": "GROUP2 0xd3 /0",
"ExpectedArm64ASM": [
"uxth w20, w4",
"uxth w21, w5",
"and w21, w21, #0x1f",
"bfi w20, w20, #16, #16",
"mov w22, #0x20",
"sub w22, w22, w21",
"ror w20, w20, w22",
"and x20, x5, #0x1f",
"cbz x20, #+0x24",
"mov w20, w4",
"bfi w20, w4, #16, #16",
"neg w21, w5",
"ror w20, w20, w21",
"bfxil x4, x20, #0, #16",
"cbz x21, #+0x10",
"rmif x20, #63, #nzCv",
"eor w20, w20, w20, lsr #15",
"rmif x20, #0, #nzcV"
]
},
"rol eax, cl": {
"ExpectedInstructionCount": 11,
"ExpectedInstructionCount": 7,
"Comment": "GROUP2 0xd3 /0",
"ExpectedArm64ASM": [
"mov w20, w4",
"mov w21, w5",
"and w21, w21, #0x1f",
"mov w22, #0x20",
"sub w22, w22, w21",
"ror w20, w20, w22",
"mov x4, x20",
"cbz x21, #+0x10",
"rmif x20, #63, #nzCv",
"eor w20, w20, w20, lsr #31",
"and x20, x5, #0x1f",
"cbz x20, #+0x18",
"neg w20, w5",
"ror w4, w4, w20",
"rmif x4, #63, #nzCv",
"eor w20, w4, w4, lsr #31",
"rmif x20, #0, #nzcV"
]
},
"rol rax, cl": {
"ExpectedInstructionCount": 9,
"ExpectedInstructionCount": 7,
"Comment": "GROUP2 0xd3 /0",
"ExpectedArm64ASM": [
"and x20, x5, #0x3f",
"mov w21, #0x40",
"sub x21, x21, x20",
"ror x21, x4, x21",
"mov x4, x21",
"cbz x20, #+0x10",
"rmif x21, #63, #nzCv",
"eor x20, x21, x21, lsr #63",
"cbz x20, #+0x18",
"neg x20, x5",
"ror x4, x4, x20",
"rmif x4, #63, #nzCv",
"eor x20, x4, x4, lsr #63",
"rmif x20, #0, #nzcV"
]
},
"ror ax, cl": {
"ExpectedInstructionCount": 10,
"ExpectedInstructionCount": 9,
"Comment": "GROUP2 0xd3 /1",
"ExpectedArm64ASM": [
"uxth w20, w4",
"uxth w21, w5",
"and w21, w21, #0x1f",
"bfi w20, w20, #16, #16",
"ror w20, w20, w21",
"and x20, x5, #0x1f",
"cbz x20, #+0x20",
"mov w20, w4",
"bfi w20, w4, #16, #16",
"ror w20, w20, w5",
"bfxil x4, x20, #0, #16",
"cbz x21, #+0x10",
"rmif x20, #14, #nzCv",
"eor w20, w20, w20, lsr #1",
"rmif x20, #14, #nzcV"
]
},
"ror eax, cl": {
"ExpectedInstructionCount": 9,
"ExpectedInstructionCount": 6,
"Comment": "GROUP2 0xd3 /1",
"ExpectedArm64ASM": [
"mov w20, w4",
"mov w21, w5",
"and w21, w21, #0x1f",
"ror w20, w20, w21",
"mov x4, x20",
"cbz x21, #+0x10",
"rmif x20, #30, #nzCv",
"eor w20, w20, w20, lsr #1",
"and x20, x5, #0x1f",
"cbz x20, #+0x14",
"ror w4, w4, w5",
"rmif x4, #30, #nzCv",
"eor w20, w4, w4, lsr #1",
"rmif x20, #30, #nzcV"
]
},
"ror rax, cl": {
"ExpectedInstructionCount": 7,
"ExpectedInstructionCount": 6,
"Comment": "GROUP2 0xd3 /1",
"ExpectedArm64ASM": [
"and x20, x5, #0x3f",
"ror x21, x4, x20",
"mov x4, x21",
"cbz x20, #+0x10",
"rmif x21, #62, #nzCv",
"eor x20, x21, x21, lsr #1",
"cbz x20, #+0x14",
"ror x4, x4, x5",
"rmif x4, #62, #nzCv",
"eor x20, x4, x4, lsr #1",
"rmif x20, #62, #nzcV"
]
},
166 changes: 74 additions & 92 deletions unittests/InstructionCountCI/PrimaryGroup.json
Original file line number Diff line number Diff line change
@@ -1417,8 +1417,8 @@
"ExpectedInstructionCount": 13,
"Comment": "GROUP2 0xd0 /0",
"ExpectedArm64ASM": [
"uxtb w20, w4",
"bfi w20, w20, #8, #8",
"mov w20, w4",
"bfi w20, w4, #8, #8",
"bfi w20, w20, #16, #16",
"ror w20, w20, #31",
"bfxil x4, x20, #0, #8",
@@ -1436,8 +1436,8 @@
"ExpectedInstructionCount": 12,
"Comment": "GROUP2 0xd0 /1",
"ExpectedArm64ASM": [
"uxtb w20, w4",
"bfi w20, w20, #8, #8",
"mov w20, w4",
"bfi w20, w4, #8, #8",
"ror w20, w20, #1",
"bfxil x4, x20, #0, #8",
"mrs x21, nzcv",
@@ -1546,8 +1546,8 @@
"ExpectedInstructionCount": 12,
"Comment": "GROUP2 0xd1 /0",
"ExpectedArm64ASM": [
"uxth w20, w4",
"bfi w20, w20, #16, #16",
"mov w20, w4",
"bfi w20, w4, #16, #16",
"ror w20, w20, #31",
"bfxil x4, x20, #0, #16",
"mrs x21, nzcv",
@@ -1561,11 +1561,10 @@
]
},
"rol eax, 1": {
"ExpectedInstructionCount": 10,
"ExpectedInstructionCount": 9,
"Comment": "GROUP2 0xd1 /0",
"ExpectedArm64ASM": [
"mov w20, w4",
"ror w4, w20, #31",
"ror w4, w4, #31",
"mrs x20, nzcv",
"and w20, w20, #0xc0000000",
"ubfx x21, x4, #0, #1",
@@ -1595,8 +1594,8 @@
"ExpectedInstructionCount": 12,
"Comment": "GROUP2 0xd1 /1",
"ExpectedArm64ASM": [
"uxth w20, w4",
"bfi w20, w20, #16, #16",
"mov w20, w4",
"bfi w20, w4, #16, #16",
"ror w20, w20, #1",
"bfxil x4, x20, #0, #16",
"mrs x21, nzcv",
@@ -1610,11 +1609,10 @@
]
},
"ror eax, 1": {
"ExpectedInstructionCount": 10,
"ExpectedInstructionCount": 9,
"Comment": "GROUP2 0xd1 /1",
"ExpectedArm64ASM": [
"mov w20, w4",
"ror w4, w20, #1",
"ror w4, w4, #1",
"mrs x20, nzcv",
"and w20, w20, #0xc0000000",
"ubfx x21, x4, #31, #1",
@@ -1904,19 +1902,17 @@
]
},
"rol al, cl": {
"ExpectedInstructionCount": 18,
"ExpectedInstructionCount": 16,
"Comment": "GROUP2 0xd2 /0",
"ExpectedArm64ASM": [
"uxtb w20, w4",
"uxtb w21, w5",
"and w21, w21, #0x1f",
"bfi w20, w20, #8, #8",
"and x20, x5, #0x1f",
"cbz x20, #+0x3c",
"mov w20, w4",
"bfi w20, w4, #8, #8",
"bfi w20, w20, #16, #16",
"mov w22, #0x20",
"sub w22, w22, w21",
"ror w20, w20, w22",
"neg w21, w5",
"ror w20, w20, w21",
"bfxil x4, x20, #0, #8",
"cbz x21, #+0x24",
"mrs x21, nzcv",
"and w21, w21, #0xc0000000",
"ubfx x22, x20, #0, #1",
@@ -1928,17 +1924,16 @@
]
},
"ror al, cl": {
"ExpectedInstructionCount": 16,
"ExpectedInstructionCount": 15,
"Comment": "GROUP2 0xd2 /1",
"ExpectedArm64ASM": [
"uxtb w20, w4",
"uxtb w21, w5",
"and w21, w21, #0x1f",
"bfi w20, w20, #8, #8",
"and x20, x5, #0x1f",
"cbz x20, #+0x38",
"mov w20, w4",
"bfi w20, w4, #8, #8",
"bfi w20, w20, #16, #16",
"ror w20, w20, w21",
"ror w20, w20, w5",
"bfxil x4, x20, #0, #8",
"cbz x21, #+0x24",
"mrs x21, nzcv",
"and w21, w21, #0xc0000000",
"ubfx x22, x20, #7, #1",
@@ -2087,18 +2082,16 @@
]
},
"rol ax, cl": {
"ExpectedInstructionCount": 17,
"ExpectedInstructionCount": 15,
"Comment": "GROUP2 0xd3 /0",
"ExpectedArm64ASM": [
"uxth w20, w4",
"uxth w21, w5",
"and w21, w21, #0x1f",
"bfi w20, w20, #16, #16",
"mov w22, #0x20",
"sub w22, w22, w21",
"ror w20, w20, w22",
"and x20, x5, #0x1f",
"cbz x20, #+0x38",
"mov w20, w4",
"bfi w20, w4, #16, #16",
"neg w21, w5",
"ror w20, w20, w21",
"bfxil x4, x20, #0, #16",
"cbz x21, #+0x24",
"mrs x21, nzcv",
"and w21, w21, #0xc0000000",
"ubfx x22, x20, #0, #1",
@@ -2110,58 +2103,51 @@
]
},
"rol eax, cl": {
"ExpectedInstructionCount": 16,
"ExpectedInstructionCount": 12,
"Comment": "GROUP2 0xd3 /0",
"ExpectedArm64ASM": [
"mov w20, w4",
"mov w21, w5",
"and w21, w21, #0x1f",
"mov w22, #0x20",
"sub w22, w22, w21",
"ror w20, w20, w22",
"mov x4, x20",
"cbz x21, #+0x24",
"mrs x21, nzcv",
"and w21, w21, #0xc0000000",
"ubfx x22, x20, #0, #1",
"orr w21, w21, w22, lsl #29",
"eor w20, w20, w20, lsr #31",
"ubfx x20, x20, #0, #1",
"orr w20, w21, w20, lsl #28",
"and x20, x5, #0x1f",
"cbz x20, #+0x2c",
"neg w20, w5",
"ror w4, w4, w20",
"mrs x20, nzcv",
"and w20, w20, #0xc0000000",
"ubfx x21, x4, #0, #1",
"orr w20, w20, w21, lsl #29",
"eor w21, w4, w4, lsr #31",
"ubfx x21, x21, #0, #1",
"orr w20, w20, w21, lsl #28",
"msr nzcv, x20"
]
},
"rol rax, cl": {
"ExpectedInstructionCount": 14,
"ExpectedInstructionCount": 12,
"Comment": "GROUP2 0xd3 /0",
"ExpectedArm64ASM": [
"and x20, x5, #0x3f",
"mov w21, #0x40",
"sub x21, x21, x20",
"ror x21, x4, x21",
"mov x4, x21",
"cbz x20, #+0x24",
"cbz x20, #+0x2c",
"neg x20, x5",
"ror x4, x4, x20",
"mrs x20, nzcv",
"and w20, w20, #0xc0000000",
"ubfx x22, x21, #0, #1",
"orr w20, w20, w22, lsl #29",
"eor x21, x21, x21, lsr #63",
"ubfx x21, x4, #0, #1",
"orr w20, w20, w21, lsl #29",
"eor x21, x4, x4, lsr #63",
"ubfx x21, x21, #0, #1",
"orr w20, w20, w21, lsl #28",
"msr nzcv, x20"
]
},
"ror ax, cl": {
"ExpectedInstructionCount": 15,
"ExpectedInstructionCount": 14,
"Comment": "GROUP2 0xd3 /1",
"ExpectedArm64ASM": [
"uxth w20, w4",
"uxth w21, w5",
"and w21, w21, #0x1f",
"bfi w20, w20, #16, #16",
"ror w20, w20, w21",
"and x20, x5, #0x1f",
"cbz x20, #+0x34",
"mov w20, w4",
"bfi w20, w4, #16, #16",
"ror w20, w20, w5",
"bfxil x4, x20, #0, #16",
"cbz x21, #+0x24",
"mrs x21, nzcv",
"and w21, w21, #0xc0000000",
"ubfx x22, x20, #15, #1",
@@ -2173,38 +2159,34 @@
]
},
"ror eax, cl": {
"ExpectedInstructionCount": 14,
"ExpectedInstructionCount": 11,
"Comment": "GROUP2 0xd3 /1",
"ExpectedArm64ASM": [
"mov w20, w4",
"mov w21, w5",
"and w21, w21, #0x1f",
"ror w20, w20, w21",
"mov x4, x20",
"cbz x21, #+0x24",
"mrs x21, nzcv",
"and w21, w21, #0xc0000000",
"ubfx x22, x20, #31, #1",
"orr w21, w21, w22, lsl #29",
"eor w20, w20, w20, lsr #1",
"ubfx x20, x20, #30, #1",
"orr w20, w21, w20, lsl #28",
"and x20, x5, #0x1f",
"cbz x20, #+0x28",
"ror w4, w4, w5",
"mrs x20, nzcv",
"and w20, w20, #0xc0000000",
"ubfx x21, x4, #31, #1",
"orr w20, w20, w21, lsl #29",
"eor w21, w4, w4, lsr #1",
"ubfx x21, x21, #30, #1",
"orr w20, w20, w21, lsl #28",
"msr nzcv, x20"
]
},
"ror rax, cl": {
"ExpectedInstructionCount": 12,
"ExpectedInstructionCount": 11,
"Comment": "GROUP2 0xd3 /1",
"ExpectedArm64ASM": [
"and x20, x5, #0x3f",
"ror x21, x4, x20",
"mov x4, x21",
"cbz x20, #+0x24",
"cbz x20, #+0x28",
"ror x4, x4, x5",
"mrs x20, nzcv",
"and w20, w20, #0xc0000000",
"lsr x22, x21, #63",
"orr w20, w20, w22, lsl #29",
"eor x21, x21, x21, lsr #1",
"lsr x21, x4, #63",
"orr w20, w20, w21, lsl #29",
"eor x21, x4, x4, lsr #1",
"ubfx x21, x21, #62, #1",
"orr w20, w20, w21, lsl #28",
"msr nzcv, x20"