Skip to content

Commit

Permalink
OpcodeDispatcher: avoid xblock liveness with ROR/ROL
Browse files Browse the repository at this point in the history
Signed-off-by: Alyssa Rosenzweig <[email protected]>
  • Loading branch information
alyssarosenzweig committed Mar 29, 2024
1 parent 5b94d81 commit 7da1066
Show file tree
Hide file tree
Showing 3 changed files with 31 additions and 25 deletions.
4 changes: 2 additions & 2 deletions FEXCore/Source/Interface/Core/OpcodeDispatcher.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1928,7 +1928,7 @@ void OpDispatchBuilder::ROROp(OpcodeArgs) {
if constexpr (Is1Bit) {
GenerateFlags_RotateRightImmediate(Op, ALUOp, Dest, 1);
} else {
GenerateFlags_RotateRight(Op, ALUOp, Dest, Src);
GenerateFlags_RotateRight(Op, Src);
}
}

Expand Down Expand Up @@ -2005,7 +2005,7 @@ void OpDispatchBuilder::ROLOp(OpcodeArgs) {
if constexpr (Is1Bit) {
GenerateFlags_RotateLeftImmediate(Op, ALUOp, Dest, 1);
} else {
GenerateFlags_RotateLeft(Op, ALUOp, Dest, Src);
GenerateFlags_RotateLeft(Op, Src);
}
}

Expand Down
26 changes: 15 additions & 11 deletions FEXCore/Source/Interface/Core/OpcodeDispatcher.h
Original file line number Diff line number Diff line change
Expand Up @@ -1669,7 +1669,7 @@ friend class FEXCore::IR::PassManager;
OrderedNode *Src1;
} OneSource;

// Logical, LSHL, LSHR, ASHR, ROR, ROL
// Logical, LSHL, LSHR, ASHR
struct {
OrderedNode *Src1;
OrderedNode *Src2;
Expand All @@ -1688,6 +1688,12 @@ friend class FEXCore::IR::PassManager;

bool UpdateCF;
} TwoSrcImmediate;

// ROL, ROR
struct {
X86Tables::DecodedOp Op;
OrderedNode *Src2;
} Decoded;
} Sources{};
};

Expand Down Expand Up @@ -1785,8 +1791,8 @@ friend class FEXCore::IR::PassManager;
void CalculateFlags_ShiftRightImmediateCommon(uint8_t SrcSize, OrderedNode *Res, OrderedNode *Src1, uint64_t Shift);
void CalculateFlags_SignShiftRight(uint8_t SrcSize, OrderedNode *Res, OrderedNode *Src1, OrderedNode *Src2);
void CalculateFlags_SignShiftRightImmediate(uint8_t SrcSize, OrderedNode *Res, OrderedNode *Src1, uint64_t Shift);
void CalculateFlags_RotateRight(uint8_t SrcSize, OrderedNode *Res, OrderedNode *Src1, OrderedNode *Src2);
void CalculateFlags_RotateLeft(uint8_t SrcSize, OrderedNode *Res, OrderedNode *Src1, OrderedNode *Src2);
void CalculateFlags_RotateRight(uint8_t SrcSize, X86Tables::DecodedOp Op, OrderedNode *Src2);
void CalculateFlags_RotateLeft(uint8_t SrcSize, X86Tables::DecodedOp Op, OrderedNode *Src2);
void CalculateFlags_RotateRightImmediate(uint8_t SrcSize, OrderedNode *Res, OrderedNode *Src1, uint64_t Shift);
void CalculateFlags_RotateLeftImmediate(uint8_t SrcSize, OrderedNode *Res, OrderedNode *Src1, uint64_t Shift);
void CalculateFlags_BEXTR(OrderedNode *Src);
Expand Down Expand Up @@ -1976,34 +1982,32 @@ friend class FEXCore::IR::PassManager;
};
}

void GenerateFlags_RotateRight(FEXCore::X86Tables::DecodedOp Op, OrderedNode *Res, OrderedNode *Src1, OrderedNode *Src2) {
void GenerateFlags_RotateRight(FEXCore::X86Tables::DecodedOp Op, OrderedNode *Src2) {
// Doesn't set all the flags, needs to calculate.
CalculateDeferredFlags();

CurrentDeferredFlags = DeferredFlagData {
.Type = FlagsGenerationType::TYPE_ROR,
.SrcSize = GetSrcSize(Op),
.Res = Res,
.Sources = {
.TwoSource = {
.Src1 = Src1,
.Decoded = {
.Op = Op,
.Src2 = Src2,
},
},
};
}

void GenerateFlags_RotateLeft(FEXCore::X86Tables::DecodedOp Op, OrderedNode *Res, OrderedNode *Src1, OrderedNode *Src2) {
void GenerateFlags_RotateLeft(FEXCore::X86Tables::DecodedOp Op, OrderedNode *Src2) {
// Doesn't set all the flags, needs to calculate.
CalculateDeferredFlags();

CurrentDeferredFlags = DeferredFlagData {
.Type = FlagsGenerationType::TYPE_ROL,
.SrcSize = GetSrcSize(Op),
.Res = Res,
.Sources = {
.TwoSource = {
.Src1 = Src1,
.Decoded = {
.Op = Op,
.Src2 = Src2,
},
},
Expand Down
26 changes: 14 additions & 12 deletions FEXCore/Source/Interface/Core/OpcodeDispatcher/Flags.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -404,9 +404,8 @@ void OpDispatchBuilder::CalculateDeferredFlags(uint32_t FlagsToCalculateMask) {
case FlagsGenerationType::TYPE_ROR:
CalculateFlags_RotateRight(
CurrentDeferredFlags.SrcSize,
CurrentDeferredFlags.Res,
CurrentDeferredFlags.Sources.TwoSource.Src1,
CurrentDeferredFlags.Sources.TwoSource.Src2);
CurrentDeferredFlags.Sources.Decoded.Op,
CurrentDeferredFlags.Sources.Decoded.Src2);
break;
case FlagsGenerationType::TYPE_RORI:
CalculateFlags_RotateRightImmediate(
Expand All @@ -418,9 +417,8 @@ void OpDispatchBuilder::CalculateDeferredFlags(uint32_t FlagsToCalculateMask) {
case FlagsGenerationType::TYPE_ROL:
CalculateFlags_RotateLeft(
CurrentDeferredFlags.SrcSize,
CurrentDeferredFlags.Res,
CurrentDeferredFlags.Sources.TwoSource.Src1,
CurrentDeferredFlags.Sources.TwoSource.Src2);
CurrentDeferredFlags.Sources.Decoded.Op,
CurrentDeferredFlags.Sources.Decoded.Src2);
break;
case FlagsGenerationType::TYPE_ROLI:
CalculateFlags_RotateLeftImmediate(
Expand Down Expand Up @@ -835,11 +833,14 @@ void OpDispatchBuilder::CalculateFlags_ShiftRightDoubleImmediate(uint8_t SrcSize
}
}

void OpDispatchBuilder::CalculateFlags_RotateRight(uint8_t SrcSize, OrderedNode *Res, OrderedNode *Src1, OrderedNode *Src2) {
CalculateFlags_ShiftVariable(Src2, [this, SrcSize, Res](){
void OpDispatchBuilder::CalculateFlags_RotateRight(uint8_t SrcSize, X86Tables::DecodedOp Op, OrderedNode *Src2) {
CalculateFlags_ShiftVariable(Src2, [this, SrcSize, Op](){
auto SizeBits = SrcSize * 8;
const auto OpSize = SrcSize == 8 ? OpSize::i64Bit : OpSize::i32Bit;

// Rematerialize inside block. Only lower SrcSize bits are used.
OrderedNode *Res = LoadSource(GPRClass, Op, Op->Dest, Op->Flags, {.AllowUpperGarbage = true});

// Ends up faster overall if we don't have FlagM, slower if we do...
// If Shift != 1, OF is undefined so we choose to zero here.
if (!CTX->HostFeatures.SupportsFlagM)
Expand All @@ -855,19 +856,20 @@ void OpDispatchBuilder::CalculateFlags_RotateRight(uint8_t SrcSize, OrderedNode
});
}

void OpDispatchBuilder::CalculateFlags_RotateLeft(uint8_t SrcSize, OrderedNode *Res, OrderedNode *Src1, OrderedNode *Src2) {
CalculateFlags_ShiftVariable(Src2, [this, SrcSize, Res](){
void OpDispatchBuilder::CalculateFlags_RotateLeft(uint8_t SrcSize, X86Tables::DecodedOp Op, OrderedNode *Src2) {
CalculateFlags_ShiftVariable(Src2, [this, SrcSize, Op](){
const auto OpSize = SrcSize == 8 ? OpSize::i64Bit : OpSize::i32Bit;
auto SizeBits = SrcSize * 8;

// Rematerialize inside block. Only lower SrcSize bits are used.
OrderedNode *Res = LoadSource(GPRClass, Op, Op->Dest, Op->Flags, {.AllowUpperGarbage = true});

// Ends up faster overall if we don't have FlagM, slower if we do...
// If Shift != 1, OF is undefined so we choose to zero here.
if (!CTX->HostFeatures.SupportsFlagM)
ZeroCV();

// Extract the last bit shifted in to CF
//auto Size = _Constant(GetSrcSize(Res) * 8);
//auto ShiftAmt = _Sub(OpSize::i64Bit, Size, Src2);
SetRFLAG<FEXCore::X86State::RFLAG_CF_RAW_LOC>(Res, 0, true);

// OF is the LSB and MSB XOR'd together.
Expand Down

0 comments on commit 7da1066

Please sign in to comment.