Skip to content

Commit

Permalink
Merge pull request FEX-Emu#4241 from Sonicadvance1/fix_h0f3a_rex_decode
Browse files Browse the repository at this point in the history
OpcodeDispatcher: Fixes FEX's H0F3A table handling of REX.W
  • Loading branch information
lioncash authored Jan 1, 2025
2 parents 3abe6c1 + a8272b7 commit 90b1ac4
Show file tree
Hide file tree
Showing 4 changed files with 132 additions and 59 deletions.
2 changes: 2 additions & 0 deletions FEXCore/Source/Interface/Core/OpcodeDispatcher.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4963,9 +4963,11 @@ void OpDispatchBuilder::InstallHostSpecificOpcodeHandlers() {
#define PF_3A_66 1
constexpr static std::tuple<uint16_t, uint8_t, FEXCore::X86Tables::OpDispatchPtr> H0F3A_AES[] = {
{OPD(0, PF_3A_66, 0xDF), 1, &OpDispatchBuilder::AESKeyGenAssist},
{OPD(1, PF_3A_66, 0xDF), 1, &OpDispatchBuilder::AESKeyGenAssist},
};
constexpr static std::tuple<uint16_t, uint8_t, FEXCore::X86Tables::OpDispatchPtr> H0F3A_PCLMUL[] = {
{OPD(0, PF_3A_66, 0x44), 1, &OpDispatchBuilder::PCLMULQDQOp},
{OPD(1, PF_3A_66, 0x44), 1, &OpDispatchBuilder::PCLMULQDQOp},
};

#undef PF_3A_NONE
Expand Down
82 changes: 54 additions & 28 deletions FEXCore/Source/Interface/Core/OpcodeDispatcher/H0F3ATables.h
Original file line number Diff line number Diff line change
Expand Up @@ -6,40 +6,66 @@ namespace FEXCore::IR {
#define OPD(REX, prefix, opcode) ((REX << 9) | (prefix << 8) | opcode)
#define PF_3A_NONE 0
#define PF_3A_66 1
constexpr std::tuple<uint16_t, uint8_t, FEXCore::X86Tables::OpDispatchPtr> OpDispatch_H0F3ATable[] = {
{OPD(0, PF_3A_66, 0x08), 1, &OpDispatchBuilder::VectorRound<OpSize::i32Bit>},
{OPD(0, PF_3A_66, 0x09), 1, &OpDispatchBuilder::VectorRound<OpSize::i64Bit>},
{OPD(0, PF_3A_66, 0x0A), 1, &OpDispatchBuilder::InsertScalarRound<OpSize::i32Bit>},
{OPD(0, PF_3A_66, 0x0B), 1, &OpDispatchBuilder::InsertScalarRound<OpSize::i64Bit>},
{OPD(0, PF_3A_66, 0x0C), 1, &OpDispatchBuilder::VectorBlend<OpSize::i32Bit>},
{OPD(0, PF_3A_66, 0x0D), 1, &OpDispatchBuilder::VectorBlend<OpSize::i64Bit>},
{OPD(0, PF_3A_66, 0x0E), 1, &OpDispatchBuilder::VectorBlend<OpSize::i16Bit>},

{OPD(0, PF_3A_NONE, 0x0F), 1, &OpDispatchBuilder::PAlignrOp},
{OPD(0, PF_3A_66, 0x0F), 1, &OpDispatchBuilder::PAlignrOp},

{OPD(0, PF_3A_66, 0x14), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::PExtrOp, OpSize::i8Bit>},
{OPD(0, PF_3A_66, 0x15), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::PExtrOp, OpSize::i16Bit>},
{OPD(0, PF_3A_66, 0x16), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::PExtrOp, OpSize::i32Bit>},
{OPD(0, PF_3A_66, 0x17), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::PExtrOp, OpSize::i32Bit>},
constexpr auto OpDispatchTableGenH0F3A = []() consteval {
constexpr auto OpDispatchTableGenH0F3AREX = []<uint16_t REX>() consteval {
constexpr std::tuple<uint16_t, uint8_t, FEXCore::X86Tables::OpDispatchPtr> Table[] = {
{OPD(REX, PF_3A_66, 0x08), 1, &OpDispatchBuilder::VectorRound<OpSize::i32Bit>},
{OPD(REX, PF_3A_66, 0x09), 1, &OpDispatchBuilder::VectorRound<OpSize::i64Bit>},
{OPD(REX, PF_3A_66, 0x0A), 1, &OpDispatchBuilder::InsertScalarRound<OpSize::i32Bit>},
{OPD(REX, PF_3A_66, 0x0B), 1, &OpDispatchBuilder::InsertScalarRound<OpSize::i64Bit>},
{OPD(REX, PF_3A_66, 0x0C), 1, &OpDispatchBuilder::VectorBlend<OpSize::i32Bit>},
{OPD(REX, PF_3A_66, 0x0D), 1, &OpDispatchBuilder::VectorBlend<OpSize::i64Bit>},
{OPD(REX, PF_3A_66, 0x0E), 1, &OpDispatchBuilder::VectorBlend<OpSize::i16Bit>},

{OPD(0, PF_3A_66, 0x20), 1, &OpDispatchBuilder::PINSROp<OpSize::i8Bit>},
{OPD(0, PF_3A_66, 0x21), 1, &OpDispatchBuilder::InsertPSOp},
{OPD(0, PF_3A_66, 0x22), 1, &OpDispatchBuilder::PINSROp<OpSize::i32Bit>},
{OPD(0, PF_3A_66, 0x40), 1, &OpDispatchBuilder::DPPOp<OpSize::i32Bit>},
{OPD(0, PF_3A_66, 0x41), 1, &OpDispatchBuilder::DPPOp<OpSize::i64Bit>},
{OPD(0, PF_3A_66, 0x42), 1, &OpDispatchBuilder::MPSADBWOp},
{OPD(REX, PF_3A_NONE, 0x0F), 1, &OpDispatchBuilder::PAlignrOp},
{OPD(REX, PF_3A_66, 0x0F), 1, &OpDispatchBuilder::PAlignrOp},

{OPD(REX, PF_3A_66, 0x14), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::PExtrOp, OpSize::i8Bit>},
{OPD(REX, PF_3A_66, 0x15), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::PExtrOp, OpSize::i16Bit>},
{OPD(REX, PF_3A_66, 0x17), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::PExtrOp, OpSize::i32Bit>},

{OPD(REX, PF_3A_66, 0x20), 1, &OpDispatchBuilder::PINSROp<OpSize::i8Bit>},
{OPD(REX, PF_3A_66, 0x21), 1, &OpDispatchBuilder::InsertPSOp},
{OPD(REX, PF_3A_66, 0x40), 1, &OpDispatchBuilder::DPPOp<OpSize::i32Bit>},
{OPD(REX, PF_3A_66, 0x41), 1, &OpDispatchBuilder::DPPOp<OpSize::i64Bit>},
{OPD(REX, PF_3A_66, 0x42), 1, &OpDispatchBuilder::MPSADBWOp},

{OPD(REX, PF_3A_66, 0x60), 1, &OpDispatchBuilder::VPCMPESTRMOp},
{OPD(REX, PF_3A_66, 0x61), 1, &OpDispatchBuilder::VPCMPESTRIOp},
{OPD(REX, PF_3A_66, 0x62), 1, &OpDispatchBuilder::VPCMPISTRMOp},
{OPD(REX, PF_3A_66, 0x63), 1, &OpDispatchBuilder::VPCMPISTRIOp},

{OPD(REX, PF_3A_NONE, 0xCC), 1, &OpDispatchBuilder::SHA1RNDS4Op},
};
return std::to_array(Table);
};

{OPD(0, PF_3A_66, 0x60), 1, &OpDispatchBuilder::VPCMPESTRMOp},
{OPD(0, PF_3A_66, 0x61), 1, &OpDispatchBuilder::VPCMPESTRIOp},
{OPD(0, PF_3A_66, 0x62), 1, &OpDispatchBuilder::VPCMPISTRMOp},
{OPD(0, PF_3A_66, 0x63), 1, &OpDispatchBuilder::VPCMPISTRIOp},
auto REX0 = OpDispatchTableGenH0F3AREX.template operator()<0>();
auto REX1 = OpDispatchTableGenH0F3AREX.template operator()<1>();
auto concat = []<typename T, size_t N1, size_t N2>(std::array<T, N1> const& lhs,
std::array<T, N2> const& rhs) consteval -> std::array<T, N1 + N2> {
std::array<T, N1 + N2> Table {};
for (size_t i = 0; i < N1; ++i) {
Table[i] = lhs[i];
}

{OPD(0, PF_3A_NONE, 0xCC), 1, &OpDispatchBuilder::SHA1RNDS4Op},
for (size_t i = 0; i < N2; ++i) {
Table[N1 + i] = rhs[i];
}

return Table;
};
return concat(REX0, REX1);
};

constexpr auto OpDispatch_H0F3ATableIgnoreREX = OpDispatchTableGenH0F3A();

constexpr std::tuple<uint16_t, uint8_t, FEXCore::X86Tables::OpDispatchPtr> OpDispatch_H0F3ATableNeedsREX0[] = {
{OPD(0, PF_3A_66, 0x16), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::PExtrOp, OpSize::i32Bit>},
{OPD(0, PF_3A_66, 0x22), 1, &OpDispatchBuilder::PINSROp<OpSize::i32Bit>},
};

constexpr std::tuple<uint16_t, uint8_t, FEXCore::X86Tables::OpDispatchPtr> OpDispatch_H0F3ATable_64[] = {
{OPD(1, PF_3A_66, 0x0F), 1, &OpDispatchBuilder::PAlignrOp},
{OPD(1, PF_3A_66, 0x16), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::PExtrOp, OpSize::i64Bit>},
{OPD(1, PF_3A_66, 0x22), 1, &OpDispatchBuilder::PINSROp<OpSize::i64Bit>},
};
Expand Down
73 changes: 42 additions & 31 deletions FEXCore/Source/Interface/Core/X86Tables/H0F3ATables.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,49 +21,60 @@ constexpr uint16_t PF_3A_66 = 1;

std::array<X86InstInfo, MAX_0F_3A_TABLE_SIZE> H0F3ATableOps = []() consteval {
std::array<X86InstInfo, MAX_0F_3A_TABLE_SIZE> Table{};
constexpr U16U8InfoStruct H0F3ATable[] = {
{OPD(0, PF_3A_NONE, 0x0F), 1, X86InstInfo{"PALIGNR", TYPE_INST, GenFlagsSameSize(SIZE_64BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS | FLAGS_SF_MMX, 1, nullptr}},
{OPD(0, PF_3A_66, 0x08), 1, X86InstInfo{"ROUNDPS", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 1, nullptr}},
{OPD(0, PF_3A_66, 0x09), 1, X86InstInfo{"ROUNDPD", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 1, nullptr}},
{OPD(0, PF_3A_66, 0x0A), 1, X86InstInfo{"ROUNDSS", TYPE_INST, GenFlagsSizes(SIZE_128BIT, SIZE_32BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 1, nullptr}},
{OPD(0, PF_3A_66, 0x0B), 1, X86InstInfo{"ROUNDSD", TYPE_INST, GenFlagsSizes(SIZE_128BIT, SIZE_64BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 1, nullptr}},
{OPD(0, PF_3A_66, 0x0C), 1, X86InstInfo{"BLENDPS", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 1, nullptr}},
{OPD(0, PF_3A_66, 0x0D), 1, X86InstInfo{"BLENDPD", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 1, nullptr}},
{OPD(0, PF_3A_66, 0x0E), 1, X86InstInfo{"PBLENDW", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 1, nullptr}},
{OPD(0, PF_3A_66, 0x0F), 1, X86InstInfo{"PALIGNR", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 1, nullptr}},

{OPD(0, PF_3A_66, 0x14), 1, X86InstInfo{"PEXTRB", TYPE_INST, GenFlagsSizes(SIZE_32BIT, SIZE_128BIT) | FLAGS_MODRM | FLAGS_SF_MOD_DST | FLAGS_SF_DST_GPR | FLAGS_XMM_FLAGS, 1, nullptr}},
{OPD(0, PF_3A_66, 0x15), 1, X86InstInfo{"PEXTRW", TYPE_INST, GenFlagsSizes(SIZE_16BIT, SIZE_128BIT) | FLAGS_MODRM | FLAGS_SF_MOD_DST | FLAGS_SF_DST_GPR | FLAGS_XMM_FLAGS, 1, nullptr}},
{OPD(0, PF_3A_66, 0x16), 1, X86InstInfo{"PEXTRD", TYPE_INST, GenFlagsSizes(SIZE_32BIT, SIZE_128BIT) | FLAGS_MODRM | FLAGS_SF_MOD_DST | FLAGS_SF_DST_GPR | FLAGS_XMM_FLAGS, 1, nullptr}},
{OPD(0, PF_3A_66, 0x17), 1, X86InstInfo{"EXTRACTPS", TYPE_INST, GenFlagsSizes(SIZE_32BIT, SIZE_128BIT) | FLAGS_MODRM | FLAGS_SF_MOD_DST | FLAGS_SF_DST_GPR | FLAGS_XMM_FLAGS, 1, nullptr}},
auto TableGen = []<uint16_t REX>() consteval {
constexpr U16U8InfoStruct Table[] = {
{OPD(REX, PF_3A_NONE, 0x0F), 1, X86InstInfo{"PALIGNR", TYPE_INST, GenFlagsSameSize(SIZE_64BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS | FLAGS_SF_MMX, 1, nullptr}},
{OPD(REX, PF_3A_66, 0x08), 1, X86InstInfo{"ROUNDPS", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 1, nullptr}},
{OPD(REX, PF_3A_66, 0x09), 1, X86InstInfo{"ROUNDPD", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 1, nullptr}},
{OPD(REX, PF_3A_66, 0x0A), 1, X86InstInfo{"ROUNDSS", TYPE_INST, GenFlagsSizes(SIZE_128BIT, SIZE_32BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 1, nullptr}},
{OPD(REX, PF_3A_66, 0x0B), 1, X86InstInfo{"ROUNDSD", TYPE_INST, GenFlagsSizes(SIZE_128BIT, SIZE_64BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 1, nullptr}},
{OPD(REX, PF_3A_66, 0x0C), 1, X86InstInfo{"BLENDPS", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 1, nullptr}},
{OPD(REX, PF_3A_66, 0x0D), 1, X86InstInfo{"BLENDPD", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 1, nullptr}},
{OPD(REX, PF_3A_66, 0x0E), 1, X86InstInfo{"PBLENDW", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 1, nullptr}},
{OPD(REX, PF_3A_66, 0x0F), 1, X86InstInfo{"PALIGNR", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 1, nullptr}},

{OPD(0, PF_3A_66, 0x20), 1, X86InstInfo{"PINSRB", TYPE_INST, GenFlagsDstSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS | FLAGS_SF_SRC_GPR, 1, nullptr}},
{OPD(0, PF_3A_66, 0x21), 1, X86InstInfo{"INSERTPS", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 1, nullptr}},
{OPD(0, PF_3A_66, 0x22), 1, X86InstInfo{"PINSRD", TYPE_INST, GenFlagsSizes(SIZE_128BIT, SIZE_32BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS | FLAGS_SF_SRC_GPR, 1, nullptr}},
{OPD(0, PF_3A_66, 0x40), 1, X86InstInfo{"DPPS", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 1, nullptr}},
{OPD(0, PF_3A_66, 0x41), 1, X86InstInfo{"DPPD", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 1, nullptr}},
{OPD(0, PF_3A_66, 0x42), 1, X86InstInfo{"MPSADBW", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 1, nullptr}},
{OPD(0, PF_3A_66, 0x44), 1, X86InstInfo{"PCLMULQDQ", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 1, nullptr}},
{OPD(REX, PF_3A_66, 0x14), 1, X86InstInfo{"PEXTRB", TYPE_INST, GenFlagsSizes(SIZE_32BIT, SIZE_128BIT) | FLAGS_MODRM | FLAGS_SF_MOD_DST | FLAGS_SF_DST_GPR | FLAGS_XMM_FLAGS, 1, nullptr}},
{OPD(REX, PF_3A_66, 0x15), 1, X86InstInfo{"PEXTRW", TYPE_INST, GenFlagsSizes(SIZE_16BIT, SIZE_128BIT) | FLAGS_MODRM | FLAGS_SF_MOD_DST | FLAGS_SF_DST_GPR | FLAGS_XMM_FLAGS, 1, nullptr}},
{OPD(REX, PF_3A_66, 0x17), 1, X86InstInfo{"EXTRACTPS", TYPE_INST, GenFlagsSizes(SIZE_32BIT, SIZE_128BIT) | FLAGS_MODRM | FLAGS_SF_MOD_DST | FLAGS_SF_DST_GPR | FLAGS_XMM_FLAGS, 1, nullptr}},

{OPD(REX, PF_3A_66, 0x20), 1, X86InstInfo{"PINSRB", TYPE_INST, GenFlagsDstSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS | FLAGS_SF_SRC_GPR, 1, nullptr}},
{OPD(REX, PF_3A_66, 0x21), 1, X86InstInfo{"INSERTPS", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 1, nullptr}},
{OPD(REX, PF_3A_66, 0x40), 1, X86InstInfo{"DPPS", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 1, nullptr}},
{OPD(REX, PF_3A_66, 0x41), 1, X86InstInfo{"DPPD", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 1, nullptr}},
{OPD(REX, PF_3A_66, 0x42), 1, X86InstInfo{"MPSADBW", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 1, nullptr}},
{OPD(REX, PF_3A_66, 0x44), 1, X86InstInfo{"PCLMULQDQ", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 1, nullptr}},

{OPD(REX, PF_3A_66, 0x60), 1, X86InstInfo{"PCMPESTRM", TYPE_INST, GenFlagsSizes(SIZE_128BIT, SIZE_32BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 1, nullptr}},
{OPD(REX, PF_3A_66, 0x61), 1, X86InstInfo{"PCMPESTRI", TYPE_INST, GenFlagsSizes(SIZE_128BIT, SIZE_32BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 1, nullptr}},
{OPD(REX, PF_3A_66, 0x62), 1, X86InstInfo{"PCMPISTRM", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 1, nullptr}},
{OPD(REX, PF_3A_66, 0x63), 1, X86InstInfo{"PCMPISTRI", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 1, nullptr}},

{OPD(0, PF_3A_66, 0x60), 1, X86InstInfo{"PCMPESTRM", TYPE_INST, GenFlagsSizes(SIZE_128BIT, SIZE_32BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 1, nullptr}},
{OPD(0, PF_3A_66, 0x61), 1, X86InstInfo{"PCMPESTRI", TYPE_INST, GenFlagsSizes(SIZE_128BIT, SIZE_32BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 1, nullptr}},
{OPD(0, PF_3A_66, 0x62), 1, X86InstInfo{"PCMPISTRM", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 1, nullptr}},
{OPD(0, PF_3A_66, 0x63), 1, X86InstInfo{"PCMPISTRI", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 1, nullptr}},
{OPD(REX, PF_3A_NONE, 0xCC), 1, X86InstInfo{"SHA1RNDS4", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 1, nullptr}},

{OPD(0, PF_3A_NONE, 0xCC), 1, X86InstInfo{"SHA1RNDS4", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 1, nullptr}},
{OPD(REX, PF_3A_66, 0xDF), 1, X86InstInfo{"AESKEYGENASSIST", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 1, nullptr}},
};
return std::to_array(Table);
};
constexpr auto H0F3ATable_IgnoresREX0 = TableGen.template operator()<0>();
constexpr auto H0F3ATable_IgnoresREX1 = TableGen.template operator()<1>();

GenerateTable(&Table.at(0), &H0F3ATable_IgnoresREX0.at(0), H0F3ATable_IgnoresREX0.size());
GenerateTable(&Table.at(0), &H0F3ATable_IgnoresREX1.at(0), H0F3ATable_IgnoresREX1.size());

{OPD(0, PF_3A_66, 0xDF), 1, X86InstInfo{"AESKEYGENASSIST", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 1, nullptr}},
constexpr U16U8InfoStruct TableNeedsREX[] = {
{OPD(0, PF_3A_66, 0x16), 1, X86InstInfo{"PEXTRD", TYPE_INST, GenFlagsSizes(SIZE_32BIT, SIZE_128BIT) | FLAGS_MODRM | FLAGS_SF_MOD_DST | FLAGS_SF_DST_GPR | FLAGS_XMM_FLAGS, 1, nullptr}},
{OPD(0, PF_3A_66, 0x22), 1, X86InstInfo{"PINSRD", TYPE_INST, GenFlagsSizes(SIZE_128BIT, SIZE_32BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS | FLAGS_SF_SRC_GPR, 1, nullptr}},
};
GenerateTable(&Table.at(0), TableNeedsREX, std::size(TableNeedsREX));

GenerateTable(&Table.at(0), H0F3ATable, std::size(H0F3ATable));
IR::InstallToTable(Table, IR::OpDispatch_H0F3ATableIgnoreREX);
IR::InstallToTable(Table, IR::OpDispatch_H0F3ATableNeedsREX0);

IR::InstallToTable(Table, IR::OpDispatch_H0F3ATable);
return Table;
}();

void InitializeH0F3ATables(Context::OperatingMode Mode) {
static constexpr U16U8InfoStruct H0F3ATable_64[] = {
{OPD(1, PF_3A_66, 0x0F), 1, X86InstInfo{"PALIGNR", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 1, nullptr}},
{OPD(1, PF_3A_66, 0x16), 1, X86InstInfo{"PEXTRQ", TYPE_INST, GenFlagsSizes(SIZE_64BIT, SIZE_128BIT) | FLAGS_MODRM | FLAGS_SF_MOD_DST | FLAGS_SF_DST_GPR | FLAGS_XMM_FLAGS, 1, nullptr}},
{OPD(1, PF_3A_66, 0x22), 1, X86InstInfo{"PINSRQ", TYPE_INST, GenFlagsSizes(SIZE_128BIT, SIZE_64BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS | FLAGS_SF_SRC_GPR, 1, nullptr}},
};
Expand Down
34 changes: 34 additions & 0 deletions unittests/ASM/FEX_bugs/H0F3AREXBug.asm
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
%ifdef CONFIG
{}
%endif

; FEX-Emu had a bug in decoding the H0F3A instruction table.
; It would accidentally require REX.W to not be set on the suite of instructions that ignore the flag.
; This just executes all instructions from H0F3A that ignore the REX.W flag, to ensure it decodes.

o64 palignr mm0, mm1, 0
o64 roundps xmm0, xmm1, 0
o64 roundpd xmm0, xmm1, 0
o64 roundss xmm0, xmm1, 0
o64 roundsd xmm0, xmm1, 0
o64 blendps xmm0, xmm1, 0
o64 blendpd xmm0, xmm1, 0
o64 palignr xmm0, xmm1, 0
o64 pextrb eax, xmm0, 0
o64 pextrw eax, xmm0, 0
o64 extractps eax, xmm0, 0
o64 extractps eax, xmm0, 0
o64 pinsrb xmm0, eax, 0
o64 insertps xmm0, xmm1, 0
o64 dpps xmm0, xmm1, 0
o64 dppd xmm0, xmm1, 0
o64 mpsadbw xmm0, xmm1, 0
o64 pclmulqdq xmm0, xmm1, 0
o64 pcmpestrm xmm0, xmm1, 0
o64 pcmpestri xmm0, xmm1, 0
o64 pcmpistrm xmm0, xmm1, 0
o64 pcmpistri xmm0, xmm1, 0
o64 sha1rnds4 xmm0, xmm1, 0
o64 aeskeygenassist xmm0, xmm1, 0

hlt

0 comments on commit 90b1ac4

Please sign in to comment.