From f4d229f1ba1a0a99ee1c92c1f7b47b1a015e15b1 Mon Sep 17 00:00:00 2001 From: lioncash Date: Thu, 6 Oct 2022 15:16:02 +0000 Subject: [PATCH 1/2] IR: Handle 256-bit VFCMPORD Extends VFCMPORD to handle 256-bit vectors. --- .../Interface/Core/Interpreter/VectorOps.cpp | 23 ++-- .../Interface/Core/JIT/Arm64/VectorOps.cpp | 127 +++++++++++++----- .../Interface/Core/JIT/x86_64/VectorOps.cpp | 45 ++++--- 3 files changed, 137 insertions(+), 58 deletions(-) diff --git a/External/FEXCore/Source/Interface/Core/Interpreter/VectorOps.cpp b/External/FEXCore/Source/Interface/Core/Interpreter/VectorOps.cpp index aff64810fd..abe54fb033 100644 --- a/External/FEXCore/Source/Interface/Core/Interpreter/VectorOps.cpp +++ b/External/FEXCore/Source/Interface/Core/Interpreter/VectorOps.cpp @@ -1274,7 +1274,7 @@ DEF_OP(VFCMPLE) { } DEF_OP(VFCMPORD) { - auto Op = IROp->C(); + const auto Op = IROp->C(); const uint8_t OpSize = IROp->Size; void *Src1 = GetSrc(Data->SSAData, Op->Vector1); @@ -1282,21 +1282,28 @@ DEF_OP(VFCMPORD) { const auto Func = [](auto a, auto b) { return (!std::isnan(a) && !std::isnan(b)) ? ~0ULL : 0; }; - uint8_t Tmp[16]; - const uint8_t Elements = OpSize / Op->Header.ElementSize; + uint8_t Tmp[Core::CPUState::XMM_AVX_REG_SIZE]; - if (Op->Header.ElementSize == OpSize) { - switch (Op->Header.ElementSize) { + const uint8_t ElementSize = Op->Header.ElementSize; + const uint8_t Elements = OpSize / ElementSize; + const auto IsScalar = ElementSize == OpSize; + + if (IsScalar) { + switch (ElementSize) { DO_SCALAR_COMPARE_OP(4, float, uint32_t, Func); DO_SCALAR_COMPARE_OP(8, double, uint64_t, Func); - default: LOGMAN_MSG_A_FMT("Unsupported elementSize: {}", Op->Header.ElementSize); + default: + LOGMAN_MSG_A_FMT("Unsupported element size: {}", ElementSize); + break; } } else { - switch (Op->Header.ElementSize) { + switch (ElementSize) { DO_VECTOR_COMPARE_OP(4, float, uint32_t, Func); DO_VECTOR_COMPARE_OP(8, double, uint64_t, Func); - default: LOGMAN_MSG_A_FMT("Unsupported elementSize: {}", Op->Header.ElementSize); + default: + LOGMAN_MSG_A_FMT("Unsupported element size: {}", ElementSize); + break; } } diff --git a/External/FEXCore/Source/Interface/Core/JIT/Arm64/VectorOps.cpp b/External/FEXCore/Source/Interface/Core/JIT/Arm64/VectorOps.cpp index 71f9649088..3632e6c233 100644 --- a/External/FEXCore/Source/Interface/Core/JIT/Arm64/VectorOps.cpp +++ b/External/FEXCore/Source/Interface/Core/JIT/Arm64/VectorOps.cpp @@ -3183,48 +3183,105 @@ DEF_OP(VFCMPLE) { } DEF_OP(VFCMPORD) { - auto Op = IROp->C(); - const uint8_t OpSize = IROp->Size; - if (Op->Header.ElementSize == OpSize) { - // Scalar - switch (Op->Header.ElementSize) { + const auto Op = IROp->C(); + const auto OpSize = IROp->Size; + + const auto ElementSize = Op->Header.ElementSize; + const auto IsScalar = ElementSize == OpSize; + const auto Is256Bit = OpSize == Core::CPUState::XMM_AVX_REG_SIZE; + + const auto Dst = GetDst(Node); + const auto Vector1 = GetSrc(Op->Vector1.ID()); + const auto Vector2 = GetSrc(Op->Vector2.ID()); + + if (HostSupportsSVE && Is256Bit && !IsScalar) { + const auto Mask = PRED_TMP_32B.Zeroing(); + const auto ComparePred = p0; + + // Ensure there's no junk in the temporary. + eor(VTMP1.Z().VnD(), VTMP1.Z().VnD(), VTMP1.Z().VnD()); + + // The idea is like comparing for unordered, but we just + // invert the predicate from the comparison to instead + // select all ordered elements in the vector. + + switch (ElementSize) { + case 2: { + fcmuo(ComparePred.VnH(), Mask, Vector1.Z().VnH(), Vector2.Z().VnH()); + not_(ComparePred.VnB(), Mask, ComparePred.VnB()); + not_(VTMP1.Z().VnH(), ComparePred.Merging(), Vector1.Z().VnH()); + orr(VTMP1.Z().VnH(), ComparePred.Merging(), VTMP1.Z().VnH(), Vector1.Z().VnH()); + break; + } case 4: { - fcmge(VTMP1.S(), GetSrc(Op->Vector1.ID()).S(), GetSrc(Op->Vector2.ID()).S()); - fcmgt(VTMP2.S(), GetSrc(Op->Vector2.ID()).S(), GetSrc(Op->Vector1.ID()).S()); - orr(GetDst(Node).V8B(), VTMP1.V8B(), VTMP2.V8B()); - break; + fcmuo(ComparePred.VnS(), Mask, Vector1.Z().VnS(), Vector2.Z().VnS()); + not_(ComparePred.VnB(), Mask, ComparePred.VnB()); + not_(VTMP1.Z().VnS(), ComparePred.Merging(), Vector1.Z().VnS()); + orr(VTMP1.Z().VnS(), ComparePred.Merging(), VTMP1.Z().VnS(), Vector1.Z().VnS()); + break; } case 8: { - fcmge(VTMP1.D(), GetSrc(Op->Vector1.ID()).D(), GetSrc(Op->Vector2.ID()).D()); - fcmgt(VTMP2.D(), GetSrc(Op->Vector2.ID()).D(), GetSrc(Op->Vector1.ID()).D()); - orr(GetDst(Node).V8B(), VTMP1.V8B(), VTMP2.V8B()); - break; + fcmuo(ComparePred.VnD(), Mask, Vector1.Z().VnD(), Vector2.Z().VnD()); + not_(ComparePred.VnB(), Mask, ComparePred.VnB()); + not_(VTMP1.Z().VnD(), ComparePred.Merging(), Vector1.Z().VnD()); + orr(VTMP1.Z().VnD(), ComparePred.Merging(), VTMP1.Z().VnD(), Vector1.Z().VnD()); + break; } - default: LOGMAN_MSG_A_FMT("Unknown Element Size: {}", Op->Header.ElementSize); break; + default: + LOGMAN_MSG_A_FMT("Unknown Element Size: {}", ElementSize); + return; } - } - else { - // Vector - switch (Op->Header.ElementSize) { - case 2: { - fcmge(VTMP1.V8H(), GetSrc(Op->Vector1.ID()).V8H(), GetSrc(Op->Vector2.ID()).V8H()); - fcmgt(VTMP2.V8H(), GetSrc(Op->Vector2.ID()).V8H(), GetSrc(Op->Vector1.ID()).V8H()); - orr(GetDst(Node).V16B(), VTMP1.V16B(), VTMP2.V16B()); - break; - } - case 4: { - fcmge(VTMP1.V4S(), GetSrc(Op->Vector1.ID()).V4S(), GetSrc(Op->Vector2.ID()).V4S()); - fcmgt(VTMP2.V4S(), GetSrc(Op->Vector2.ID()).V4S(), GetSrc(Op->Vector1.ID()).V4S()); - orr(GetDst(Node).V16B(), VTMP1.V16B(), VTMP2.V16B()); - break; + + mov(Dst.Z().VnD(), VTMP1.Z().VnD()); + } else { + if (IsScalar) { + switch (ElementSize) { + case 2: { + fcmge(VTMP1.H(), Vector1.H(), Vector2.H()); + fcmgt(VTMP2.H(), Vector2.H(), Vector1.H()); + orr(Dst.V8B(), VTMP1.V8B(), VTMP2.V8B()); + break; + } + case 4: { + fcmge(VTMP1.S(), Vector1.S(), Vector2.S()); + fcmgt(VTMP2.S(), Vector2.S(), Vector1.S()); + orr(Dst.V8B(), VTMP1.V8B(), VTMP2.V8B()); + break; + } + case 8: { + fcmge(VTMP1.D(), Vector1.D(), Vector2.D()); + fcmgt(VTMP2.D(), Vector2.D(), Vector1.D()); + orr(Dst.V8B(), VTMP1.V8B(), VTMP2.V8B()); + break; + } + default: + LOGMAN_MSG_A_FMT("Unknown Element Size: {}", ElementSize); + break; } - case 8: { - fcmge(VTMP1.V2D(), GetSrc(Op->Vector1.ID()).V2D(), GetSrc(Op->Vector2.ID()).V2D()); - fcmgt(VTMP2.V2D(), GetSrc(Op->Vector2.ID()).V2D(), GetSrc(Op->Vector1.ID()).V2D()); - orr(GetDst(Node).V16B(), VTMP1.V16B(), VTMP2.V16B()); - break; + } else { + switch (ElementSize) { + case 2: { + fcmge(VTMP1.V8H(), Vector1.V8H(), Vector2.V8H()); + fcmgt(VTMP2.V8H(), Vector2.V8H(), Vector1.V8H()); + orr(Dst.V16B(), VTMP1.V16B(), VTMP2.V16B()); + break; + } + case 4: { + fcmge(VTMP1.V4S(), Vector1.V4S(), Vector2.V4S()); + fcmgt(VTMP2.V4S(), Vector2.V4S(), Vector1.V4S()); + orr(Dst.V16B(), VTMP1.V16B(), VTMP2.V16B()); + break; + } + case 8: { + fcmge(VTMP1.V2D(), Vector1.V2D(), Vector2.V2D()); + fcmgt(VTMP2.V2D(), Vector2.V2D(), Vector1.V2D()); + orr(Dst.V16B(), VTMP1.V16B(), VTMP2.V16B()); + break; + } + default: + LOGMAN_MSG_A_FMT("Unknown Element Size: {}", ElementSize); + break; } - default: LOGMAN_MSG_A_FMT("Unknown Element Size: {}", Op->Header.ElementSize); break; } } } diff --git a/External/FEXCore/Source/Interface/Core/JIT/x86_64/VectorOps.cpp b/External/FEXCore/Source/Interface/Core/JIT/x86_64/VectorOps.cpp index ba65bca4c7..89407a6b53 100644 --- a/External/FEXCore/Source/Interface/Core/JIT/x86_64/VectorOps.cpp +++ b/External/FEXCore/Source/Interface/Core/JIT/x86_64/VectorOps.cpp @@ -1781,29 +1781,44 @@ DEF_OP(VFCMPLE) { } DEF_OP(VFCMPORD) { - auto Op = IROp->C(); - const uint8_t OpSize = IROp->Size; + const auto Op = IROp->C(); + const auto OpSize = IROp->Size; - if (Op->Header.ElementSize == OpSize) { - switch (Op->Header.ElementSize) { + const auto ElementSize = Op->Header.ElementSize; + const auto IsScalar = ElementSize == OpSize; + + const auto Dst = GetDst(Node); + const auto Vector1 = GetSrc(Op->Vector1.ID()); + const auto Vector2 = GetSrc(Op->Vector2.ID()); + + if (IsScalar) { + switch (ElementSize) { case 4: - vcmpss(GetDst(Node), GetSrc(Op->Vector1.ID()), GetSrc(Op->Vector2.ID()), 7); - break; + vcmpss(Dst, Vector1, Vector2, 7); + break; case 8: - vcmpsd(GetDst(Node), GetSrc(Op->Vector1.ID()), GetSrc(Op->Vector2.ID()), 7); - break; - default: LOGMAN_MSG_A_FMT("Unsupported element size: {}", Op->Header.ElementSize); + vcmpsd(Dst, Vector1, Vector2, 7); + break; + default: + LOGMAN_MSG_A_FMT("Unsupported element size: {}", ElementSize); + break; } } else { - switch (Op->Header.ElementSize) { + const auto DstYMM = ToYMM(Dst); + const auto Vector1YMM = ToYMM(Vector1); + const auto Vector2YMM = ToYMM(Vector2); + + switch (ElementSize) { case 4: - vcmpps(GetDst(Node), GetSrc(Op->Vector1.ID()), GetSrc(Op->Vector2.ID()), 7); - break; + vcmpps(DstYMM, Vector1YMM, Vector2YMM, 7); + break; case 8: - vcmppd(GetDst(Node), GetSrc(Op->Vector1.ID()), GetSrc(Op->Vector2.ID()), 7); - break; - default: LOGMAN_MSG_A_FMT("Unsupported element size: {}", Op->Header.ElementSize); + vcmppd(DstYMM, Vector1YMM, Vector2YMM, 7); + break; + default: + LOGMAN_MSG_A_FMT("Unsupported element size: {}", ElementSize); + break; } } } From c7ad06698766172b3a5811fa92a60acf387815e9 Mon Sep 17 00:00:00 2001 From: lioncash Date: Thu, 6 Oct 2022 15:36:48 +0000 Subject: [PATCH 2/2] IR: Handle 256-bit VFCMPUNO Extends VFCMPUNO to handle 256-bit vectors. --- .../Interface/Core/Interpreter/VectorOps.cpp | 23 +-- .../Interface/Core/JIT/Arm64/VectorOps.cpp | 131 ++++++++++++------ .../Interface/Core/JIT/x86_64/VectorOps.cpp | 45 ++++-- 3 files changed, 136 insertions(+), 63 deletions(-) diff --git a/External/FEXCore/Source/Interface/Core/Interpreter/VectorOps.cpp b/External/FEXCore/Source/Interface/Core/Interpreter/VectorOps.cpp index abe54fb033..e0b16cb5a2 100644 --- a/External/FEXCore/Source/Interface/Core/Interpreter/VectorOps.cpp +++ b/External/FEXCore/Source/Interface/Core/Interpreter/VectorOps.cpp @@ -1311,7 +1311,7 @@ DEF_OP(VFCMPORD) { } DEF_OP(VFCMPUNO) { - auto Op = IROp->C(); + const auto Op = IROp->C(); const uint8_t OpSize = IROp->Size; void *Src1 = GetSrc(Data->SSAData, Op->Vector1); @@ -1319,21 +1319,28 @@ DEF_OP(VFCMPUNO) { const auto Func = [](auto a, auto b) { return (std::isnan(a) || std::isnan(b)) ? ~0ULL : 0; }; - uint8_t Tmp[16]; - const uint8_t Elements = OpSize / Op->Header.ElementSize; + uint8_t Tmp[Core::CPUState::XMM_AVX_REG_SIZE]; + + const uint8_t ElementSize = Op->Header.ElementSize; + const uint8_t Elements = OpSize / ElementSize; + const auto IsScalar = ElementSize == OpSize; - if (Op->Header.ElementSize == OpSize) { - switch (Op->Header.ElementSize) { + if (IsScalar) { + switch (ElementSize) { DO_SCALAR_COMPARE_OP(4, float, uint32_t, Func); DO_SCALAR_COMPARE_OP(8, double, uint64_t, Func); - default: LOGMAN_MSG_A_FMT("Unsupported elementSize: {}", Op->Header.ElementSize); + default: + LOGMAN_MSG_A_FMT("Unsupported element size: {}", ElementSize); + break; } } else { - switch (Op->Header.ElementSize) { + switch (ElementSize) { DO_VECTOR_COMPARE_OP(4, float, uint32_t, Func); DO_VECTOR_COMPARE_OP(8, double, uint64_t, Func); - default: LOGMAN_MSG_A_FMT("Unsupported elementSize: {}", Op->Header.ElementSize); + default: + LOGMAN_MSG_A_FMT("Unsupported element size: {}", ElementSize); + break; } } diff --git a/External/FEXCore/Source/Interface/Core/JIT/Arm64/VectorOps.cpp b/External/FEXCore/Source/Interface/Core/JIT/Arm64/VectorOps.cpp index 3632e6c233..a0f02e8893 100644 --- a/External/FEXCore/Source/Interface/Core/JIT/Arm64/VectorOps.cpp +++ b/External/FEXCore/Source/Interface/Core/JIT/Arm64/VectorOps.cpp @@ -3287,53 +3287,104 @@ DEF_OP(VFCMPORD) { } DEF_OP(VFCMPUNO) { - auto Op = IROp->C(); - const uint8_t OpSize = IROp->Size; - if (Op->Header.ElementSize == OpSize) { - // Scalar - switch (Op->Header.ElementSize) { + const auto Op = IROp->C(); + const auto OpSize = IROp->Size; + + const auto ElementSize = Op->Header.ElementSize; + const auto IsScalar = ElementSize == OpSize; + const auto Is256Bit = OpSize == Core::CPUState::XMM_AVX_REG_SIZE; + + const auto Dst = GetDst(Node); + const auto Vector1 = GetSrc(Op->Vector1.ID()); + const auto Vector2 = GetSrc(Op->Vector2.ID()); + + if (HostSupportsSVE && Is256Bit && !IsScalar) { + const auto Mask = PRED_TMP_32B.Zeroing(); + const auto ComparePred = p0; + + // Ensure there's no junk in the temporary. + eor(VTMP1.Z().VnD(), VTMP1.Z().VnD(), VTMP1.Z().VnD()); + + switch (ElementSize) { + case 2: { + fcmuo(ComparePred.VnH(), Mask, Vector1.Z().VnH(), Vector2.Z().VnH()); + not_(VTMP1.Z().VnH(), ComparePred.Merging(), Vector1.Z().VnH()); + orr(VTMP1.Z().VnH(), ComparePred.Merging(), VTMP1.Z().VnH(), Vector1.Z().VnH()); + break; + } case 4: { - fcmge(VTMP1.S(), GetSrc(Op->Vector1.ID()).S(), GetSrc(Op->Vector2.ID()).S()); - fcmgt(VTMP2.S(), GetSrc(Op->Vector2.ID()).S(), GetSrc(Op->Vector1.ID()).S()); - orr(GetDst(Node).V8B(), VTMP1.V8B(), VTMP2.V8B()); - mvn(GetDst(Node).V8B(), GetDst(Node).V8B()); - break; + fcmuo(ComparePred.VnS(), Mask, Vector1.Z().VnS(), Vector2.Z().VnS()); + not_(VTMP1.Z().VnS(), ComparePred.Merging(), Vector1.Z().VnS()); + orr(VTMP1.Z().VnS(), ComparePred.Merging(), VTMP1.Z().VnS(), Vector1.Z().VnS()); + break; } case 8: { - fcmge(VTMP1.D(), GetSrc(Op->Vector1.ID()).D(), GetSrc(Op->Vector2.ID()).D()); - fcmgt(VTMP2.D(), GetSrc(Op->Vector2.ID()).D(), GetSrc(Op->Vector1.ID()).D()); - orr(GetDst(Node).V8B(), VTMP1.V8B(), VTMP2.V8B()); - mvn(GetDst(Node).V8B(), GetDst(Node).V8B()); - break; + fcmuo(ComparePred.VnD(), Mask, Vector1.Z().VnD(), Vector2.Z().VnD()); + not_(VTMP1.Z().VnD(), ComparePred.Merging(), Vector1.Z().VnD()); + orr(VTMP1.Z().VnD(), ComparePred.Merging(), VTMP1.Z().VnD(), Vector1.Z().VnD()); + break; } - default: LOGMAN_MSG_A_FMT("Unknown Element Size: {}", Op->Header.ElementSize); break; + default: + LOGMAN_MSG_A_FMT("Unknown Element Size: {}", ElementSize); + return; } - } - else { - // Vector - switch (Op->Header.ElementSize) { - case 2: { - fcmge(VTMP1.V8H(), GetSrc(Op->Vector1.ID()).V8H(), GetSrc(Op->Vector2.ID()).V8H()); - fcmgt(VTMP2.V8H(), GetSrc(Op->Vector2.ID()).V8H(), GetSrc(Op->Vector1.ID()).V8H()); - orr(GetDst(Node).V16B(), VTMP1.V16B(), VTMP2.V16B()); - mvn(GetDst(Node).V16B(), GetDst(Node).V16B()); - break; - } - case 4: { - fcmge(VTMP1.V4S(), GetSrc(Op->Vector1.ID()).V4S(), GetSrc(Op->Vector2.ID()).V4S()); - fcmgt(VTMP2.V4S(), GetSrc(Op->Vector2.ID()).V4S(), GetSrc(Op->Vector1.ID()).V4S()); - orr(GetDst(Node).V16B(), VTMP1.V16B(), VTMP2.V16B()); - mvn(GetDst(Node).V16B(), GetDst(Node).V16B()); - break; + + mov(Dst.Z().VnD(), VTMP1.Z().VnD()); + } else { + if (IsScalar) { + switch (ElementSize) { + case 2: { + fcmge(VTMP1.H(), Vector1.H(), Vector2.H()); + fcmgt(VTMP2.H(), Vector2.H(), Vector1.H()); + orr(Dst.V8B(), VTMP1.V8B(), VTMP2.V8B()); + mvn(Dst.V8B(), Dst.V8B()); + break; + } + case 4: { + fcmge(VTMP1.S(), Vector1.S(), Vector2.S()); + fcmgt(VTMP2.S(), Vector2.S(), Vector1.S()); + orr(Dst.V8B(), VTMP1.V8B(), VTMP2.V8B()); + mvn(Dst.V8B(), Dst.V8B()); + break; + } + case 8: { + fcmge(VTMP1.D(), Vector1.D(), Vector2.D()); + fcmgt(VTMP2.D(), Vector2.D(), Vector1.D()); + orr(Dst.V8B(), VTMP1.V8B(), VTMP2.V8B()); + mvn(Dst.V8B(), Dst.V8B()); + break; + } + default: + LOGMAN_MSG_A_FMT("Unknown Element Size: {}", ElementSize); + break; } - case 8: { - fcmge(VTMP1.V2D(), GetSrc(Op->Vector1.ID()).V2D(), GetSrc(Op->Vector2.ID()).V2D()); - fcmgt(VTMP2.V2D(), GetSrc(Op->Vector2.ID()).V2D(), GetSrc(Op->Vector1.ID()).V2D()); - orr(GetDst(Node).V16B(), VTMP1.V16B(), VTMP2.V16B()); - mvn(GetDst(Node).V16B(), GetDst(Node).V16B()); - break; + } else { + switch (ElementSize) { + case 2: { + fcmge(VTMP1.V8H(), Vector1.V8H(), Vector2.V8H()); + fcmgt(VTMP2.V8H(), Vector2.V8H(), Vector1.V8H()); + orr(Dst.V16B(), VTMP1.V16B(), VTMP2.V16B()); + mvn(Dst.V16B(), Dst.V16B()); + break; + } + case 4: { + fcmge(VTMP1.V4S(), Vector1.V4S(), Vector2.V4S()); + fcmgt(VTMP2.V4S(), Vector2.V4S(), Vector1.V4S()); + orr(Dst.V16B(), VTMP1.V16B(), VTMP2.V16B()); + mvn(Dst.V16B(), Dst.V16B()); + break; + } + case 8: { + fcmge(VTMP1.V2D(), Vector1.V2D(), Vector2.V2D()); + fcmgt(VTMP2.V2D(), Vector2.V2D(), Vector1.V2D()); + orr(Dst.V16B(), VTMP1.V16B(), VTMP2.V16B()); + mvn(Dst.V16B(), Dst.V16B()); + break; + } + default: + LOGMAN_MSG_A_FMT("Unknown Element Size: {}", ElementSize); + break; } - default: LOGMAN_MSG_A_FMT("Unknown Element Size: {}", Op->Header.ElementSize); break; } } } diff --git a/External/FEXCore/Source/Interface/Core/JIT/x86_64/VectorOps.cpp b/External/FEXCore/Source/Interface/Core/JIT/x86_64/VectorOps.cpp index 89407a6b53..4d2094d30f 100644 --- a/External/FEXCore/Source/Interface/Core/JIT/x86_64/VectorOps.cpp +++ b/External/FEXCore/Source/Interface/Core/JIT/x86_64/VectorOps.cpp @@ -1824,29 +1824,44 @@ DEF_OP(VFCMPORD) { } DEF_OP(VFCMPUNO) { - auto Op = IROp->C(); - const uint8_t OpSize = IROp->Size; + const auto Op = IROp->C(); + const auto OpSize = IROp->Size; - if (Op->Header.ElementSize == OpSize) { - switch (Op->Header.ElementSize) { + const auto ElementSize = Op->Header.ElementSize; + const auto IsScalar = ElementSize == OpSize; + + const auto Dst = GetDst(Node); + const auto Vector1 = GetSrc(Op->Vector1.ID()); + const auto Vector2 = GetSrc(Op->Vector2.ID()); + + if (IsScalar) { + switch (ElementSize) { case 4: - vcmpss(GetDst(Node), GetSrc(Op->Vector1.ID()), GetSrc(Op->Vector2.ID()), 3); - break; + vcmpss(Dst, Vector1, Vector2, 3); + break; case 8: - vcmpsd(GetDst(Node), GetSrc(Op->Vector1.ID()), GetSrc(Op->Vector2.ID()), 3); - break; - default: LOGMAN_MSG_A_FMT("Unsupported element size: {}", Op->Header.ElementSize); + vcmpsd(Dst, Vector1, Vector2, 3); + break; + default: + LOGMAN_MSG_A_FMT("Unsupported element size: {}", ElementSize); + break; } } else { - switch (Op->Header.ElementSize) { + const auto DstYMM = ToYMM(Dst); + const auto Vector1YMM = ToYMM(Vector1); + const auto Vector2YMM = ToYMM(Vector2); + + switch (ElementSize) { case 4: - vcmpps(GetDst(Node), GetSrc(Op->Vector1.ID()), GetSrc(Op->Vector2.ID()), 3); - break; + vcmpps(DstYMM, Vector1YMM, Vector2YMM, 3); + break; case 8: - vcmppd(GetDst(Node), GetSrc(Op->Vector1.ID()), GetSrc(Op->Vector2.ID()), 3); - break; - default: LOGMAN_MSG_A_FMT("Unsupported element size: {}", Op->Header.ElementSize); + vcmppd(DstYMM, Vector1YMM, Vector2YMM, 3); + break; + default: + LOGMAN_MSG_A_FMT("Unsupported element size: {}", ElementSize); + break; } } }