Skip to content

Commit

Permalink
Merge pull request FEX-Emu#2053 from lioncash/vfcmpord
Browse files Browse the repository at this point in the history
IR: Handle 256-bit VFCMPORD/VFCMPUNO
  • Loading branch information
Sonicadvance1 authored Oct 10, 2022
2 parents 25a8a00 + c7ad066 commit 7b4b9a8
Show file tree
Hide file tree
Showing 3 changed files with 273 additions and 121 deletions.
46 changes: 30 additions & 16 deletions External/FEXCore/Source/Interface/Core/Interpreter/VectorOps.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1274,59 +1274,73 @@ DEF_OP(VFCMPLE) {
}

DEF_OP(VFCMPORD) {
auto Op = IROp->C<IR::IROp_VFCMPORD>();
const auto Op = IROp->C<IR::IROp_VFCMPORD>();
const uint8_t OpSize = IROp->Size;

void *Src1 = GetSrc<void*>(Data->SSAData, Op->Vector1);
void *Src2 = GetSrc<void*>(Data->SSAData, Op->Vector2);

const auto Func = [](auto a, auto b) { return (!std::isnan(a) && !std::isnan(b)) ? ~0ULL : 0; };

uint8_t Tmp[16];
const uint8_t Elements = OpSize / Op->Header.ElementSize;
uint8_t Tmp[Core::CPUState::XMM_AVX_REG_SIZE];

const uint8_t ElementSize = Op->Header.ElementSize;
const uint8_t Elements = OpSize / ElementSize;
const auto IsScalar = ElementSize == OpSize;

if (Op->Header.ElementSize == OpSize) {
switch (Op->Header.ElementSize) {
if (IsScalar) {
switch (ElementSize) {
DO_SCALAR_COMPARE_OP(4, float, uint32_t, Func);
DO_SCALAR_COMPARE_OP(8, double, uint64_t, Func);
default: LOGMAN_MSG_A_FMT("Unsupported elementSize: {}", Op->Header.ElementSize);
default:
LOGMAN_MSG_A_FMT("Unsupported element size: {}", ElementSize);
break;
}
}
else {
switch (Op->Header.ElementSize) {
switch (ElementSize) {
DO_VECTOR_COMPARE_OP(4, float, uint32_t, Func);
DO_VECTOR_COMPARE_OP(8, double, uint64_t, Func);
default: LOGMAN_MSG_A_FMT("Unsupported elementSize: {}", Op->Header.ElementSize);
default:
LOGMAN_MSG_A_FMT("Unsupported element size: {}", ElementSize);
break;
}
}

memcpy(GDP, Tmp, OpSize);
}

DEF_OP(VFCMPUNO) {
auto Op = IROp->C<IR::IROp_VFCMPUNO>();
const auto Op = IROp->C<IR::IROp_VFCMPUNO>();
const uint8_t OpSize = IROp->Size;

void *Src1 = GetSrc<void*>(Data->SSAData, Op->Vector1);
void *Src2 = GetSrc<void*>(Data->SSAData, Op->Vector2);

const auto Func = [](auto a, auto b) { return (std::isnan(a) || std::isnan(b)) ? ~0ULL : 0; };

uint8_t Tmp[16];
const uint8_t Elements = OpSize / Op->Header.ElementSize;
uint8_t Tmp[Core::CPUState::XMM_AVX_REG_SIZE];

const uint8_t ElementSize = Op->Header.ElementSize;
const uint8_t Elements = OpSize / ElementSize;
const auto IsScalar = ElementSize == OpSize;

if (Op->Header.ElementSize == OpSize) {
switch (Op->Header.ElementSize) {
if (IsScalar) {
switch (ElementSize) {
DO_SCALAR_COMPARE_OP(4, float, uint32_t, Func);
DO_SCALAR_COMPARE_OP(8, double, uint64_t, Func);
default: LOGMAN_MSG_A_FMT("Unsupported elementSize: {}", Op->Header.ElementSize);
default:
LOGMAN_MSG_A_FMT("Unsupported element size: {}", ElementSize);
break;
}
}
else {
switch (Op->Header.ElementSize) {
switch (ElementSize) {
DO_VECTOR_COMPARE_OP(4, float, uint32_t, Func);
DO_VECTOR_COMPARE_OP(8, double, uint64_t, Func);
default: LOGMAN_MSG_A_FMT("Unsupported elementSize: {}", Op->Header.ElementSize);
default:
LOGMAN_MSG_A_FMT("Unsupported element size: {}", ElementSize);
break;
}
}

Expand Down
258 changes: 183 additions & 75 deletions External/FEXCore/Source/Interface/Core/JIT/Arm64/VectorOps.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3183,100 +3183,208 @@ DEF_OP(VFCMPLE) {
}

DEF_OP(VFCMPORD) {
auto Op = IROp->C<IR::IROp_VFCMPORD>();
const uint8_t OpSize = IROp->Size;
if (Op->Header.ElementSize == OpSize) {
// Scalar
switch (Op->Header.ElementSize) {
const auto Op = IROp->C<IR::IROp_VFCMPORD>();
const auto OpSize = IROp->Size;

const auto ElementSize = Op->Header.ElementSize;
const auto IsScalar = ElementSize == OpSize;
const auto Is256Bit = OpSize == Core::CPUState::XMM_AVX_REG_SIZE;

const auto Dst = GetDst(Node);
const auto Vector1 = GetSrc(Op->Vector1.ID());
const auto Vector2 = GetSrc(Op->Vector2.ID());

if (HostSupportsSVE && Is256Bit && !IsScalar) {
const auto Mask = PRED_TMP_32B.Zeroing();
const auto ComparePred = p0;

// Ensure there's no junk in the temporary.
eor(VTMP1.Z().VnD(), VTMP1.Z().VnD(), VTMP1.Z().VnD());

// The idea is like comparing for unordered, but we just
// invert the predicate from the comparison to instead
// select all ordered elements in the vector.

switch (ElementSize) {
case 2: {
fcmuo(ComparePred.VnH(), Mask, Vector1.Z().VnH(), Vector2.Z().VnH());
not_(ComparePred.VnB(), Mask, ComparePred.VnB());
not_(VTMP1.Z().VnH(), ComparePred.Merging(), Vector1.Z().VnH());
orr(VTMP1.Z().VnH(), ComparePred.Merging(), VTMP1.Z().VnH(), Vector1.Z().VnH());
break;
}
case 4: {
fcmge(VTMP1.S(), GetSrc(Op->Vector1.ID()).S(), GetSrc(Op->Vector2.ID()).S());
fcmgt(VTMP2.S(), GetSrc(Op->Vector2.ID()).S(), GetSrc(Op->Vector1.ID()).S());
orr(GetDst(Node).V8B(), VTMP1.V8B(), VTMP2.V8B());
break;
fcmuo(ComparePred.VnS(), Mask, Vector1.Z().VnS(), Vector2.Z().VnS());
not_(ComparePred.VnB(), Mask, ComparePred.VnB());
not_(VTMP1.Z().VnS(), ComparePred.Merging(), Vector1.Z().VnS());
orr(VTMP1.Z().VnS(), ComparePred.Merging(), VTMP1.Z().VnS(), Vector1.Z().VnS());
break;
}
case 8: {
fcmge(VTMP1.D(), GetSrc(Op->Vector1.ID()).D(), GetSrc(Op->Vector2.ID()).D());
fcmgt(VTMP2.D(), GetSrc(Op->Vector2.ID()).D(), GetSrc(Op->Vector1.ID()).D());
orr(GetDst(Node).V8B(), VTMP1.V8B(), VTMP2.V8B());
break;
fcmuo(ComparePred.VnD(), Mask, Vector1.Z().VnD(), Vector2.Z().VnD());
not_(ComparePred.VnB(), Mask, ComparePred.VnB());
not_(VTMP1.Z().VnD(), ComparePred.Merging(), Vector1.Z().VnD());
orr(VTMP1.Z().VnD(), ComparePred.Merging(), VTMP1.Z().VnD(), Vector1.Z().VnD());
break;
}
default: LOGMAN_MSG_A_FMT("Unknown Element Size: {}", Op->Header.ElementSize); break;
default:
LOGMAN_MSG_A_FMT("Unknown Element Size: {}", ElementSize);
return;
}
}
else {
// Vector
switch (Op->Header.ElementSize) {
case 2: {
fcmge(VTMP1.V8H(), GetSrc(Op->Vector1.ID()).V8H(), GetSrc(Op->Vector2.ID()).V8H());
fcmgt(VTMP2.V8H(), GetSrc(Op->Vector2.ID()).V8H(), GetSrc(Op->Vector1.ID()).V8H());
orr(GetDst(Node).V16B(), VTMP1.V16B(), VTMP2.V16B());
break;
}
case 4: {
fcmge(VTMP1.V4S(), GetSrc(Op->Vector1.ID()).V4S(), GetSrc(Op->Vector2.ID()).V4S());
fcmgt(VTMP2.V4S(), GetSrc(Op->Vector2.ID()).V4S(), GetSrc(Op->Vector1.ID()).V4S());
orr(GetDst(Node).V16B(), VTMP1.V16B(), VTMP2.V16B());
break;

mov(Dst.Z().VnD(), VTMP1.Z().VnD());
} else {
if (IsScalar) {
switch (ElementSize) {
case 2: {
fcmge(VTMP1.H(), Vector1.H(), Vector2.H());
fcmgt(VTMP2.H(), Vector2.H(), Vector1.H());
orr(Dst.V8B(), VTMP1.V8B(), VTMP2.V8B());
break;
}
case 4: {
fcmge(VTMP1.S(), Vector1.S(), Vector2.S());
fcmgt(VTMP2.S(), Vector2.S(), Vector1.S());
orr(Dst.V8B(), VTMP1.V8B(), VTMP2.V8B());
break;
}
case 8: {
fcmge(VTMP1.D(), Vector1.D(), Vector2.D());
fcmgt(VTMP2.D(), Vector2.D(), Vector1.D());
orr(Dst.V8B(), VTMP1.V8B(), VTMP2.V8B());
break;
}
default:
LOGMAN_MSG_A_FMT("Unknown Element Size: {}", ElementSize);
break;
}
case 8: {
fcmge(VTMP1.V2D(), GetSrc(Op->Vector1.ID()).V2D(), GetSrc(Op->Vector2.ID()).V2D());
fcmgt(VTMP2.V2D(), GetSrc(Op->Vector2.ID()).V2D(), GetSrc(Op->Vector1.ID()).V2D());
orr(GetDst(Node).V16B(), VTMP1.V16B(), VTMP2.V16B());
break;
} else {
switch (ElementSize) {
case 2: {
fcmge(VTMP1.V8H(), Vector1.V8H(), Vector2.V8H());
fcmgt(VTMP2.V8H(), Vector2.V8H(), Vector1.V8H());
orr(Dst.V16B(), VTMP1.V16B(), VTMP2.V16B());
break;
}
case 4: {
fcmge(VTMP1.V4S(), Vector1.V4S(), Vector2.V4S());
fcmgt(VTMP2.V4S(), Vector2.V4S(), Vector1.V4S());
orr(Dst.V16B(), VTMP1.V16B(), VTMP2.V16B());
break;
}
case 8: {
fcmge(VTMP1.V2D(), Vector1.V2D(), Vector2.V2D());
fcmgt(VTMP2.V2D(), Vector2.V2D(), Vector1.V2D());
orr(Dst.V16B(), VTMP1.V16B(), VTMP2.V16B());
break;
}
default:
LOGMAN_MSG_A_FMT("Unknown Element Size: {}", ElementSize);
break;
}
default: LOGMAN_MSG_A_FMT("Unknown Element Size: {}", Op->Header.ElementSize); break;
}
}
}

DEF_OP(VFCMPUNO) {
auto Op = IROp->C<IR::IROp_VFCMPUNO>();
const uint8_t OpSize = IROp->Size;
if (Op->Header.ElementSize == OpSize) {
// Scalar
switch (Op->Header.ElementSize) {
const auto Op = IROp->C<IR::IROp_VFCMPUNO>();
const auto OpSize = IROp->Size;

const auto ElementSize = Op->Header.ElementSize;
const auto IsScalar = ElementSize == OpSize;
const auto Is256Bit = OpSize == Core::CPUState::XMM_AVX_REG_SIZE;

const auto Dst = GetDst(Node);
const auto Vector1 = GetSrc(Op->Vector1.ID());
const auto Vector2 = GetSrc(Op->Vector2.ID());

if (HostSupportsSVE && Is256Bit && !IsScalar) {
const auto Mask = PRED_TMP_32B.Zeroing();
const auto ComparePred = p0;

// Ensure there's no junk in the temporary.
eor(VTMP1.Z().VnD(), VTMP1.Z().VnD(), VTMP1.Z().VnD());

switch (ElementSize) {
case 2: {
fcmuo(ComparePred.VnH(), Mask, Vector1.Z().VnH(), Vector2.Z().VnH());
not_(VTMP1.Z().VnH(), ComparePred.Merging(), Vector1.Z().VnH());
orr(VTMP1.Z().VnH(), ComparePred.Merging(), VTMP1.Z().VnH(), Vector1.Z().VnH());
break;
}
case 4: {
fcmge(VTMP1.S(), GetSrc(Op->Vector1.ID()).S(), GetSrc(Op->Vector2.ID()).S());
fcmgt(VTMP2.S(), GetSrc(Op->Vector2.ID()).S(), GetSrc(Op->Vector1.ID()).S());
orr(GetDst(Node).V8B(), VTMP1.V8B(), VTMP2.V8B());
mvn(GetDst(Node).V8B(), GetDst(Node).V8B());
break;
fcmuo(ComparePred.VnS(), Mask, Vector1.Z().VnS(), Vector2.Z().VnS());
not_(VTMP1.Z().VnS(), ComparePred.Merging(), Vector1.Z().VnS());
orr(VTMP1.Z().VnS(), ComparePred.Merging(), VTMP1.Z().VnS(), Vector1.Z().VnS());
break;
}
case 8: {
fcmge(VTMP1.D(), GetSrc(Op->Vector1.ID()).D(), GetSrc(Op->Vector2.ID()).D());
fcmgt(VTMP2.D(), GetSrc(Op->Vector2.ID()).D(), GetSrc(Op->Vector1.ID()).D());
orr(GetDst(Node).V8B(), VTMP1.V8B(), VTMP2.V8B());
mvn(GetDst(Node).V8B(), GetDst(Node).V8B());
break;
fcmuo(ComparePred.VnD(), Mask, Vector1.Z().VnD(), Vector2.Z().VnD());
not_(VTMP1.Z().VnD(), ComparePred.Merging(), Vector1.Z().VnD());
orr(VTMP1.Z().VnD(), ComparePred.Merging(), VTMP1.Z().VnD(), Vector1.Z().VnD());
break;
}
default: LOGMAN_MSG_A_FMT("Unknown Element Size: {}", Op->Header.ElementSize); break;
default:
LOGMAN_MSG_A_FMT("Unknown Element Size: {}", ElementSize);
return;
}
}
else {
// Vector
switch (Op->Header.ElementSize) {
case 2: {
fcmge(VTMP1.V8H(), GetSrc(Op->Vector1.ID()).V8H(), GetSrc(Op->Vector2.ID()).V8H());
fcmgt(VTMP2.V8H(), GetSrc(Op->Vector2.ID()).V8H(), GetSrc(Op->Vector1.ID()).V8H());
orr(GetDst(Node).V16B(), VTMP1.V16B(), VTMP2.V16B());
mvn(GetDst(Node).V16B(), GetDst(Node).V16B());
break;
}
case 4: {
fcmge(VTMP1.V4S(), GetSrc(Op->Vector1.ID()).V4S(), GetSrc(Op->Vector2.ID()).V4S());
fcmgt(VTMP2.V4S(), GetSrc(Op->Vector2.ID()).V4S(), GetSrc(Op->Vector1.ID()).V4S());
orr(GetDst(Node).V16B(), VTMP1.V16B(), VTMP2.V16B());
mvn(GetDst(Node).V16B(), GetDst(Node).V16B());
break;

mov(Dst.Z().VnD(), VTMP1.Z().VnD());
} else {
if (IsScalar) {
switch (ElementSize) {
case 2: {
fcmge(VTMP1.H(), Vector1.H(), Vector2.H());
fcmgt(VTMP2.H(), Vector2.H(), Vector1.H());
orr(Dst.V8B(), VTMP1.V8B(), VTMP2.V8B());
mvn(Dst.V8B(), Dst.V8B());
break;
}
case 4: {
fcmge(VTMP1.S(), Vector1.S(), Vector2.S());
fcmgt(VTMP2.S(), Vector2.S(), Vector1.S());
orr(Dst.V8B(), VTMP1.V8B(), VTMP2.V8B());
mvn(Dst.V8B(), Dst.V8B());
break;
}
case 8: {
fcmge(VTMP1.D(), Vector1.D(), Vector2.D());
fcmgt(VTMP2.D(), Vector2.D(), Vector1.D());
orr(Dst.V8B(), VTMP1.V8B(), VTMP2.V8B());
mvn(Dst.V8B(), Dst.V8B());
break;
}
default:
LOGMAN_MSG_A_FMT("Unknown Element Size: {}", ElementSize);
break;
}
case 8: {
fcmge(VTMP1.V2D(), GetSrc(Op->Vector1.ID()).V2D(), GetSrc(Op->Vector2.ID()).V2D());
fcmgt(VTMP2.V2D(), GetSrc(Op->Vector2.ID()).V2D(), GetSrc(Op->Vector1.ID()).V2D());
orr(GetDst(Node).V16B(), VTMP1.V16B(), VTMP2.V16B());
mvn(GetDst(Node).V16B(), GetDst(Node).V16B());
break;
} else {
switch (ElementSize) {
case 2: {
fcmge(VTMP1.V8H(), Vector1.V8H(), Vector2.V8H());
fcmgt(VTMP2.V8H(), Vector2.V8H(), Vector1.V8H());
orr(Dst.V16B(), VTMP1.V16B(), VTMP2.V16B());
mvn(Dst.V16B(), Dst.V16B());
break;
}
case 4: {
fcmge(VTMP1.V4S(), Vector1.V4S(), Vector2.V4S());
fcmgt(VTMP2.V4S(), Vector2.V4S(), Vector1.V4S());
orr(Dst.V16B(), VTMP1.V16B(), VTMP2.V16B());
mvn(Dst.V16B(), Dst.V16B());
break;
}
case 8: {
fcmge(VTMP1.V2D(), Vector1.V2D(), Vector2.V2D());
fcmgt(VTMP2.V2D(), Vector2.V2D(), Vector1.V2D());
orr(Dst.V16B(), VTMP1.V16B(), VTMP2.V16B());
mvn(Dst.V16B(), Dst.V16B());
break;
}
default:
LOGMAN_MSG_A_FMT("Unknown Element Size: {}", ElementSize);
break;
}
default: LOGMAN_MSG_A_FMT("Unknown Element Size: {}", Op->Header.ElementSize); break;
}
}
}
Expand Down
Loading

0 comments on commit 7b4b9a8

Please sign in to comment.