Skip to content

Commit

Permalink
x87StackOptimizationPass: Minor opt to f80 fchs and fabs
Browse files Browse the repository at this point in the history
It's faster to load the f80 sign mask from our named vector constants
than synthesizing the values. Changes a 4 instruction sequence to
synthesize to be 1 load.
  • Loading branch information
Sonicadvance1 committed Jan 3, 2025
1 parent 6bc7a83 commit a47ed10
Show file tree
Hide file tree
Showing 3 changed files with 5 additions and 9 deletions.
1 change: 1 addition & 0 deletions FEXCore/Source/Interface/Core/CPUBackend.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ namespace CPU {
{0x43E0'0000'0000'0000ULL, 0x43E0'0000'0000'0000ULL}, // NAMED_VECTOR_CVTMAX_F64_I64
{0x8000'0000'8000'0000ULL, 0x8000'0000'8000'0000ULL}, // NAMED_VECTOR_CVTMAX_I32
{0x8000'0000'0000'0000ULL, 0x8000'0000'0000'0000ULL}, // NAMED_VECTOR_CVTMAX_I64
{0x0000'0000'0000'0000ULL, 0x0000'0000'0000'8000ULL}, // NAMED_VECTOR_F80_SIGN_MASK
};

constexpr static auto PSHUFLW_LUT {[]() consteval {
Expand Down
12 changes: 3 additions & 9 deletions FEXCore/Source/Interface/IR/Passes/x87StackOptimizationPass.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -885,10 +885,7 @@ void X87StackOptimization::Run(IREmitter* Emit) {
if (ReducedPrecisionMode) {
ResultNode = IREmit->_VFNeg(OpSize::i64Bit, OpSize::i64Bit, Value);
} else {
Ref Low = GetConstant(0);
Ref High = GetConstant(0b1'000'0000'0000'0000ULL);
Ref HelperNode = IREmit->_VCastFromGPR(OpSize::i128Bit, OpSize::i64Bit, Low);
HelperNode = IREmit->_VInsGPR(OpSize::i128Bit, OpSize::i64Bit, 1, HelperNode, High);
Ref HelperNode = IREmit->_LoadNamedVectorConstant(OpSize::i128Bit, IR::NamedVectorConstant::NAMED_VECTOR_F80_SIGN_MASK);
ResultNode = IREmit->_VXor(OpSize::i128Bit, OpSize::i8Bit, Value, HelperNode);
}
StoreStackValue(ResultNode);
Expand All @@ -903,11 +900,8 @@ void X87StackOptimization::Run(IREmitter* Emit) {
ResultNode = IREmit->_VFAbs(OpSize::i64Bit, OpSize::i64Bit, Value);
} else {
// Intermediate insts
Ref Low = GetConstant(~0ULL);
Ref High = GetConstant(0b0'111'1111'1111'1111ULL);
Ref HelperNode = IREmit->_VCastFromGPR(OpSize::i128Bit, OpSize::i64Bit, Low);
HelperNode = IREmit->_VInsGPR(OpSize::i128Bit, OpSize::i64Bit, 1, HelperNode, High);
ResultNode = IREmit->_VAnd(OpSize::i128Bit, OpSize::i8Bit, Value, HelperNode);
Ref HelperNode = IREmit->_LoadNamedVectorConstant(OpSize::i128Bit, IR::NamedVectorConstant::NAMED_VECTOR_F80_SIGN_MASK);
ResultNode = IREmit->_VAndn(OpSize::i128Bit, OpSize::i8Bit, Value, HelperNode);
}
StoreStackValue(ResultNode);
break;
Expand Down
1 change: 1 addition & 0 deletions FEXCore/include/FEXCore/IR/IR.h
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,7 @@ enum NamedVectorConstant : uint8_t {
NAMED_VECTOR_CVTMAX_F64_I64,
NAMED_VECTOR_CVTMAX_I32,
NAMED_VECTOR_CVTMAX_I64,
NAMED_VECTOR_F80_SIGN_MASK,

NAMED_VECTOR_CONST_POOL_MAX,
// Beginning of named constants that don't have a constant pool backing.
Expand Down

0 comments on commit a47ed10

Please sign in to comment.