Skip to content

Commit 3410c76

Browse files
authored
[JIT] Enable EGPRs in JIT by adding REX2 encoding to the backend. (#106557)
* Ruihan: POC with REX2 Update comments. Merge the REX2 changes into the original legacy emit path bug fix: Set REX2.W with correct mask code. register encoding and prefix emitting logics. Add REX2 prefix emit logic bug fixes Add Stress mode for REX2 encoding and some bug fixes resolve comments: 1. add assertion check for UD opcodes. 2. add checks for EGPRs. Add REX2 to emitOutputAM, and let LEA to be REX2 compatible. Add REX2.X encoding for SIB byte But fixes: add REX2 prefix on the path in RI where MOV is specially handled. Enable REX2 encoding for `movups` fixed bugs in REX2 prefix emitting logic when working with map 1 instructions, and enabled REX2 for POPCNT legacy map index-er bug fixes some clean-up Adding initial APX unit testing path. Adding a coredistools dll that has LLVM APX disasm capability. It must be coppied into a CORE_ROOT manually. clean up work for REX2 narrow the REX2 scope to `sub` only some clean up based on the comments. bug fix resolve comment * resolve comments * refactor register encoding for REX2 * merge REX2 path to legacy path * Enable REX2 in more instructions. * Avoid repeatedly estimate the size of REX2 prefix * Enable REX2 encoding on RI and SV path - SV path is mostly for debugging purposes Added encoding unit tests for instructions with immediates * Add rex2 support to rotate and shift. * CR session. * Testing infra updates: assert REX2 is enabled. Code refactoring: AddX86PrefixIfNeeded. * revert rcl_N and rcr_N, tp and latency data for these instructions is missing in JIT, may indicate these instructions are not being used in JIT, drop them for now. * partially enable REX2 on emitOutputAM, case covered: R_AR and AR_R. * Adding unit tests. * push, pop, inc, dec, neg, not, xadd, shld, shrd, cmpxchg, setcc, bswap. * bug fix for bswap * bt * xchg, idiv * Make sure add REX2 prefix if register encoding for EGPRs are being called before adding any prefix. * Ensure code size is correctly computed in R_R_I path. * clean up * Change all AddSimdPrefix to AddX86Prefix Refactor REX2 encoding stress logics. * div, mulEAX * filter out test from REX2 encoding when using ACC form. (this will have side effect that the estimated code will go up and mismatch with actual code size.) * Make sure REX prefix will not be added when emitting with REX2. * resolve comments. * make sure the APX debug knob is only available under debug build. * clean up some out-dated code. * enable movsxd * Enable "Call" * Enable "JMP" * resolve merge errors * formatting * remote coredistools.dll for internal tests only * bug fix * resolve comments * add more emitter tests. * resolve comments. * clean up some comments and tweak the REX2 stress logic * clean up * formatting. * resolve comments.
1 parent d108be1 commit 3410c76

11 files changed

+974
-161
lines changed

src/coreclr/jit/codegen.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -648,6 +648,7 @@ class CodeGen final : public CodeGenInterface
648648

649649
#if defined(TARGET_AMD64)
650650
void genAmd64EmitterUnitTestsSse2();
651+
void genAmd64EmitterUnitTestsApx();
651652
#endif
652653

653654
#endif // defined(DEBUG)

src/coreclr/jit/codegenlinear.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2698,6 +2698,10 @@ void CodeGen::genEmitterUnitTests()
26982698
{
26992699
genAmd64EmitterUnitTestsSse2();
27002700
}
2701+
if (unitTestSectionAll || (strstr(unitTestSection, "apx") != nullptr))
2702+
{
2703+
genAmd64EmitterUnitTestsApx();
2704+
}
27012705

27022706
#elif defined(TARGET_ARM64)
27032707
if (unitTestSectionAll || (strstr(unitTestSection, "general") != nullptr))

src/coreclr/jit/codegenxarch.cpp

Lines changed: 219 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9053,6 +9053,225 @@ void CodeGen::genAmd64EmitterUnitTestsSse2()
90539053
GetEmitter()->emitIns_R_R_R(INS_cvtsd2ss, EA_8BYTE, REG_XMM0, REG_XMM1, REG_XMM2);
90549054
}
90559055

9056+
/*****************************************************************************
9057+
* Unit tests for the APX instructions.
9058+
*/
9059+
9060+
void CodeGen::genAmd64EmitterUnitTestsApx()
9061+
{
9062+
emitter* theEmitter = GetEmitter();
9063+
9064+
genDefineTempLabel(genCreateTempLabel());
9065+
9066+
// This test suite needs REX2 enabled.
9067+
if (!theEmitter->UseRex2Encoding() && !theEmitter->emitComp->DoJitStressRex2Encoding())
9068+
{
9069+
return;
9070+
}
9071+
9072+
theEmitter->emitIns_R_R(INS_add, EA_1BYTE, REG_EAX, REG_ECX);
9073+
theEmitter->emitIns_R_R(INS_add, EA_2BYTE, REG_EAX, REG_ECX);
9074+
theEmitter->emitIns_R_R(INS_add, EA_4BYTE, REG_EAX, REG_ECX);
9075+
theEmitter->emitIns_R_R(INS_add, EA_8BYTE, REG_EAX, REG_ECX);
9076+
theEmitter->emitIns_R_R(INS_or, EA_4BYTE, REG_EAX, REG_ECX);
9077+
theEmitter->emitIns_R_R(INS_adc, EA_4BYTE, REG_EAX, REG_ECX);
9078+
theEmitter->emitIns_R_R(INS_sbb, EA_4BYTE, REG_EAX, REG_ECX);
9079+
theEmitter->emitIns_R_R(INS_and, EA_4BYTE, REG_EAX, REG_ECX);
9080+
theEmitter->emitIns_R_R(INS_sub, EA_4BYTE, REG_EAX, REG_ECX);
9081+
theEmitter->emitIns_R_R(INS_xor, EA_4BYTE, REG_EAX, REG_ECX);
9082+
theEmitter->emitIns_R_R(INS_cmp, EA_4BYTE, REG_EAX, REG_ECX);
9083+
theEmitter->emitIns_R_R(INS_test, EA_4BYTE, REG_EAX, REG_ECX);
9084+
theEmitter->emitIns_R_R(INS_bsf, EA_4BYTE, REG_EAX, REG_ECX);
9085+
theEmitter->emitIns_R_R(INS_bsr, EA_4BYTE, REG_EAX, REG_ECX);
9086+
9087+
theEmitter->emitIns_R_R(INS_cmovo, EA_4BYTE, REG_EAX, REG_ECX);
9088+
9089+
theEmitter->emitIns_Mov(INS_mov, EA_4BYTE, REG_EAX, REG_ECX, false);
9090+
theEmitter->emitIns_Mov(INS_movsx, EA_2BYTE, REG_EAX, REG_ECX, false);
9091+
theEmitter->emitIns_Mov(INS_movzx, EA_2BYTE, REG_EAX, REG_ECX, false);
9092+
9093+
theEmitter->emitIns_R_R(INS_popcnt, EA_4BYTE, REG_EAX, REG_ECX);
9094+
theEmitter->emitIns_R_R(INS_lzcnt, EA_4BYTE, REG_EAX, REG_ECX);
9095+
theEmitter->emitIns_R_R(INS_tzcnt, EA_4BYTE, REG_EAX, REG_ECX);
9096+
9097+
theEmitter->emitIns_R_I(INS_add, EA_4BYTE, REG_ECX, 0x05);
9098+
theEmitter->emitIns_R_I(INS_add, EA_2BYTE, REG_ECX, 0x05);
9099+
theEmitter->emitIns_R_I(INS_or, EA_4BYTE, REG_EAX, 0x05);
9100+
theEmitter->emitIns_R_I(INS_adc, EA_4BYTE, REG_EAX, 0x05);
9101+
theEmitter->emitIns_R_I(INS_sbb, EA_4BYTE, REG_EAX, 0x05);
9102+
theEmitter->emitIns_R_I(INS_and, EA_4BYTE, REG_EAX, 0x05);
9103+
theEmitter->emitIns_R_I(INS_sub, EA_4BYTE, REG_EAX, 0x05);
9104+
theEmitter->emitIns_R_I(INS_xor, EA_4BYTE, REG_EAX, 0x05);
9105+
theEmitter->emitIns_R_I(INS_cmp, EA_4BYTE, REG_EAX, 0x05);
9106+
theEmitter->emitIns_R_I(INS_test, EA_4BYTE, REG_EAX, 0x05);
9107+
9108+
theEmitter->emitIns_R_I(INS_mov, EA_4BYTE, REG_EAX, 0xE0);
9109+
9110+
// JIT tend to compress imm64 to imm32 if higher half is all-zero, make sure this test checks the path for imm64.
9111+
theEmitter->emitIns_R_I(INS_mov, EA_8BYTE, REG_RAX, 0xFFFF000000000000);
9112+
9113+
// shf reg, cl
9114+
theEmitter->emitIns_R(INS_rol, EA_4BYTE, REG_EAX);
9115+
theEmitter->emitIns_R(INS_ror, EA_4BYTE, REG_EAX);
9116+
theEmitter->emitIns_R(INS_rcl, EA_4BYTE, REG_EAX);
9117+
theEmitter->emitIns_R(INS_rcr, EA_4BYTE, REG_EAX);
9118+
theEmitter->emitIns_R(INS_shl, EA_4BYTE, REG_EAX);
9119+
theEmitter->emitIns_R(INS_shr, EA_4BYTE, REG_EAX);
9120+
theEmitter->emitIns_R(INS_sar, EA_4BYTE, REG_EAX);
9121+
9122+
// shf reg, 1
9123+
theEmitter->emitIns_R(INS_rol_1, EA_4BYTE, REG_EAX);
9124+
theEmitter->emitIns_R(INS_ror_1, EA_4BYTE, REG_EAX);
9125+
theEmitter->emitIns_R(INS_rcl_1, EA_4BYTE, REG_EAX);
9126+
theEmitter->emitIns_R(INS_rcr_1, EA_4BYTE, REG_EAX);
9127+
theEmitter->emitIns_R(INS_shl_1, EA_4BYTE, REG_EAX);
9128+
theEmitter->emitIns_R(INS_shr_1, EA_4BYTE, REG_EAX);
9129+
theEmitter->emitIns_R(INS_sar_1, EA_4BYTE, REG_EAX);
9130+
9131+
// shf reg, imm8
9132+
theEmitter->emitIns_R_I(INS_shl_N, EA_4BYTE, REG_ECX, 0x05);
9133+
theEmitter->emitIns_R_I(INS_shr_N, EA_4BYTE, REG_ECX, 0x05);
9134+
theEmitter->emitIns_R_I(INS_sar_N, EA_4BYTE, REG_ECX, 0x05);
9135+
theEmitter->emitIns_R_I(INS_rol_N, EA_4BYTE, REG_ECX, 0x05);
9136+
theEmitter->emitIns_R_I(INS_ror_N, EA_4BYTE, REG_ECX, 0x05);
9137+
// TODO-xarch-apx: not enable these 2 for now.
9138+
// theEmitter->emitIns_R_I(INS_rcl_N, EA_4BYTE, REG_ECX, 0x05);
9139+
// theEmitter->emitIns_R_I(INS_rcr_N, EA_4BYTE, REG_ECX, 0x05);
9140+
9141+
theEmitter->emitIns_R(INS_neg, EA_2BYTE, REG_EAX);
9142+
theEmitter->emitIns_R(INS_not, EA_2BYTE, REG_EAX);
9143+
9144+
theEmitter->emitIns_R_AR(INS_lea, EA_4BYTE, REG_ECX, REG_EAX, 4);
9145+
9146+
theEmitter->emitIns_R_AR(INS_mov, EA_1BYTE, REG_ECX, REG_EAX, 4);
9147+
theEmitter->emitIns_R_AR(INS_mov, EA_2BYTE, REG_ECX, REG_EAX, 4);
9148+
theEmitter->emitIns_R_AR(INS_mov, EA_4BYTE, REG_ECX, REG_EAX, 4);
9149+
theEmitter->emitIns_R_AR(INS_mov, EA_8BYTE, REG_ECX, REG_EAX, 4);
9150+
9151+
theEmitter->emitIns_R_AR(INS_add, EA_1BYTE, REG_EAX, REG_ECX, 4);
9152+
theEmitter->emitIns_R_AR(INS_add, EA_2BYTE, REG_EAX, REG_ECX, 4);
9153+
theEmitter->emitIns_R_AR(INS_add, EA_4BYTE, REG_EAX, REG_ECX, 4);
9154+
theEmitter->emitIns_R_AR(INS_add, EA_8BYTE, REG_EAX, REG_ECX, 4);
9155+
theEmitter->emitIns_R_AR(INS_or, EA_4BYTE, REG_EAX, REG_ECX, 4);
9156+
theEmitter->emitIns_R_AR(INS_adc, EA_4BYTE, REG_EAX, REG_ECX, 4);
9157+
theEmitter->emitIns_R_AR(INS_sbb, EA_4BYTE, REG_EAX, REG_ECX, 4);
9158+
theEmitter->emitIns_R_AR(INS_and, EA_4BYTE, REG_EAX, REG_ECX, 4);
9159+
theEmitter->emitIns_R_AR(INS_sub, EA_4BYTE, REG_EAX, REG_ECX, 4);
9160+
theEmitter->emitIns_R_AR(INS_xor, EA_4BYTE, REG_EAX, REG_ECX, 4);
9161+
theEmitter->emitIns_R_AR(INS_cmp, EA_4BYTE, REG_EAX, REG_ECX, 4);
9162+
theEmitter->emitIns_R_AR(INS_test, EA_4BYTE, REG_EAX, REG_ECX, 4);
9163+
theEmitter->emitIns_R_AR(INS_bsf, EA_4BYTE, REG_EAX, REG_ECX, 4);
9164+
theEmitter->emitIns_R_AR(INS_bsr, EA_4BYTE, REG_EAX, REG_ECX, 4);
9165+
theEmitter->emitIns_R_AR(INS_popcnt, EA_4BYTE, REG_EAX, REG_ECX, 4);
9166+
theEmitter->emitIns_R_AR(INS_lzcnt, EA_4BYTE, REG_EAX, REG_ECX, 4);
9167+
theEmitter->emitIns_R_AR(INS_tzcnt, EA_4BYTE, REG_EAX, REG_ECX, 4);
9168+
9169+
theEmitter->emitIns_AR_R(INS_add, EA_1BYTE, REG_EAX, REG_ECX, 4);
9170+
theEmitter->emitIns_AR_R(INS_add, EA_2BYTE, REG_EAX, REG_ECX, 4);
9171+
theEmitter->emitIns_AR_R(INS_add, EA_4BYTE, REG_EAX, REG_ECX, 4);
9172+
theEmitter->emitIns_AR_R(INS_add, EA_8BYTE, REG_EAX, REG_ECX, 4);
9173+
theEmitter->emitIns_AR_R(INS_or, EA_4BYTE, REG_EAX, REG_ECX, 4);
9174+
theEmitter->emitIns_AR_R(INS_adc, EA_4BYTE, REG_EAX, REG_ECX, 4);
9175+
theEmitter->emitIns_AR_R(INS_sbb, EA_4BYTE, REG_EAX, REG_ECX, 4);
9176+
theEmitter->emitIns_AR_R(INS_and, EA_4BYTE, REG_EAX, REG_ECX, 4);
9177+
theEmitter->emitIns_AR_R(INS_sub, EA_4BYTE, REG_EAX, REG_ECX, 4);
9178+
theEmitter->emitIns_AR_R(INS_xor, EA_4BYTE, REG_EAX, REG_ECX, 4);
9179+
theEmitter->emitIns_AR_R(INS_cmp, EA_4BYTE, REG_EAX, REG_ECX, 4);
9180+
theEmitter->emitIns_AR_R(INS_test, EA_4BYTE, REG_EAX, REG_ECX, 4);
9181+
9182+
theEmitter->emitIns_R_AR(INS_movsx, EA_2BYTE, REG_ECX, REG_EAX, 4);
9183+
theEmitter->emitIns_R_AR(INS_movzx, EA_2BYTE, REG_EAX, REG_ECX, 4);
9184+
theEmitter->emitIns_R_AR(INS_cmovo, EA_4BYTE, REG_EAX, REG_ECX, 4);
9185+
9186+
theEmitter->emitIns_AR_R(INS_xadd, EA_4BYTE, REG_EAX, REG_EDX, 2);
9187+
9188+
theEmitter->emitIns_R_R_I(INS_shld, EA_4BYTE, REG_EAX, REG_ECX, 5);
9189+
theEmitter->emitIns_R_R_I(INS_shrd, EA_2BYTE, REG_EAX, REG_ECX, 5);
9190+
// TODO-XArch-apx: S_R_I path only accepts SEE or VEX instructions,
9191+
// so I assuem shld/shrd will not be taking the first argument from stack.
9192+
// theEmitter->emitIns_S_R_I(INS_shld, EA_2BYTE, 1, 2, REG_EAX, 5);
9193+
// theEmitter->emitIns_S_R_I(INS_shrd, EA_2BYTE, 1, 2, REG_EAX, 5);
9194+
9195+
theEmitter->emitIns_AR_R(INS_cmpxchg, EA_2BYTE, REG_EAX, REG_EDX, 2);
9196+
9197+
theEmitter->emitIns_R(INS_seto, EA_1BYTE, REG_EDX);
9198+
9199+
theEmitter->emitIns_R(INS_bswap, EA_8BYTE, REG_EDX);
9200+
9201+
// INS_bt only has reg-to-reg form.
9202+
theEmitter->emitIns_R_R(INS_bt, EA_2BYTE, REG_EAX, REG_EDX);
9203+
9204+
theEmitter->emitIns_R(INS_idiv, EA_8BYTE, REG_EDX);
9205+
9206+
theEmitter->emitIns_R_R(INS_xchg, EA_8BYTE, REG_EAX, REG_EDX);
9207+
9208+
theEmitter->emitIns_R(INS_div, EA_8BYTE, REG_EDX);
9209+
theEmitter->emitIns_R(INS_mulEAX, EA_8BYTE, REG_EDX);
9210+
9211+
GenTreePhysReg physReg(REG_EDX);
9212+
physReg.SetRegNum(REG_EDX);
9213+
GenTreeIndir load = indirForm(TYP_INT, &physReg);
9214+
9215+
theEmitter->emitIns_R_A(INS_add, EA_1BYTE, REG_EAX, &load);
9216+
theEmitter->emitIns_R_A(INS_add, EA_2BYTE, REG_EAX, &load);
9217+
theEmitter->emitIns_R_A(INS_add, EA_4BYTE, REG_EAX, &load);
9218+
theEmitter->emitIns_R_A(INS_add, EA_8BYTE, REG_EAX, &load);
9219+
theEmitter->emitIns_R_A(INS_or, EA_4BYTE, REG_EAX, &load);
9220+
theEmitter->emitIns_R_A(INS_adc, EA_4BYTE, REG_EAX, &load);
9221+
theEmitter->emitIns_R_A(INS_sbb, EA_4BYTE, REG_EAX, &load);
9222+
theEmitter->emitIns_R_A(INS_and, EA_4BYTE, REG_EAX, &load);
9223+
theEmitter->emitIns_R_A(INS_sub, EA_4BYTE, REG_EAX, &load);
9224+
theEmitter->emitIns_R_A(INS_xor, EA_4BYTE, REG_EAX, &load);
9225+
theEmitter->emitIns_R_A(INS_cmp, EA_4BYTE, REG_EAX, &load);
9226+
theEmitter->emitIns_R_A(INS_test, EA_4BYTE, REG_EAX, &load);
9227+
theEmitter->emitIns_R_A(INS_bsf, EA_4BYTE, REG_EAX, &load);
9228+
theEmitter->emitIns_R_A(INS_bsr, EA_4BYTE, REG_EAX, &load);
9229+
9230+
// Note:
9231+
// All the tests below rely on the runtime status of the stack this unit tests attaching to,
9232+
// it might fail due to stack value unavailable/mismatch, since these tests are mainly for
9233+
// encoding correctness check, this kind of failures may be considered as not harmful.
9234+
9235+
theEmitter->emitIns_R_S(INS_add, EA_1BYTE, REG_EAX, 0, 0);
9236+
theEmitter->emitIns_R_S(INS_add, EA_2BYTE, REG_EAX, 0, 0);
9237+
theEmitter->emitIns_R_S(INS_add, EA_4BYTE, REG_EAX, 0, 0);
9238+
theEmitter->emitIns_R_S(INS_add, EA_8BYTE, REG_EAX, 0, 0);
9239+
theEmitter->emitIns_R_S(INS_or, EA_4BYTE, REG_EAX, 0, 0);
9240+
theEmitter->emitIns_R_S(INS_adc, EA_4BYTE, REG_EAX, 0, 0);
9241+
theEmitter->emitIns_R_S(INS_sbb, EA_4BYTE, REG_EAX, 0, 0);
9242+
theEmitter->emitIns_R_S(INS_and, EA_4BYTE, REG_EAX, 0, 0);
9243+
theEmitter->emitIns_R_S(INS_sub, EA_4BYTE, REG_EAX, 0, 0);
9244+
theEmitter->emitIns_R_S(INS_xor, EA_4BYTE, REG_EAX, 0, 0);
9245+
theEmitter->emitIns_R_S(INS_cmp, EA_4BYTE, REG_EAX, 0, 0);
9246+
theEmitter->emitIns_R_S(INS_test, EA_4BYTE, REG_EAX, 0, 0);
9247+
theEmitter->emitIns_S_R(INS_xadd, EA_2BYTE, REG_EAX, 0, 0);
9248+
9249+
theEmitter->emitIns_S_I(INS_shl_N, EA_4BYTE, 0, 0, 4);
9250+
theEmitter->emitIns_S(INS_shl_1, EA_4BYTE, 0, 4);
9251+
9252+
theEmitter->emitIns_R_S(INS_movsx, EA_2BYTE, REG_ECX, 0, 0);
9253+
theEmitter->emitIns_R_S(INS_movzx, EA_2BYTE, REG_EAX, 0, 0);
9254+
theEmitter->emitIns_R_S(INS_cmovo, EA_4BYTE, REG_EAX, 0, 0);
9255+
9256+
theEmitter->emitIns_R(INS_pop, EA_PTRSIZE, REG_EAX);
9257+
theEmitter->emitIns_R(INS_push, EA_PTRSIZE, REG_EAX);
9258+
theEmitter->emitIns_R(INS_pop_hide, EA_PTRSIZE, REG_EAX);
9259+
theEmitter->emitIns_R(INS_push_hide, EA_PTRSIZE, REG_EAX);
9260+
9261+
theEmitter->emitIns_S(INS_pop, EA_PTRSIZE, 0, 0);
9262+
theEmitter->emitIns_I(INS_push, EA_PTRSIZE, 50);
9263+
9264+
theEmitter->emitIns_R(INS_inc, EA_4BYTE, REG_EAX);
9265+
theEmitter->emitIns_AR(INS_inc, EA_2BYTE, REG_EAX, 2);
9266+
theEmitter->emitIns_S(INS_inc, EA_2BYTE, 0, 0);
9267+
theEmitter->emitIns_R(INS_dec, EA_4BYTE, REG_EAX);
9268+
theEmitter->emitIns_AR(INS_dec, EA_2BYTE, REG_EAX, 2);
9269+
theEmitter->emitIns_S(INS_dec, EA_2BYTE, 0, 0);
9270+
9271+
theEmitter->emitIns_S(INS_neg, EA_2BYTE, 0, 0);
9272+
theEmitter->emitIns_S(INS_not, EA_2BYTE, 0, 0);
9273+
}
9274+
90569275
#endif // defined(DEBUG) && defined(TARGET_AMD64)
90579276

90589277
#ifdef PROFILING_SUPPORTED

src/coreclr/jit/compiler.cpp

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2295,7 +2295,10 @@ void Compiler::compSetProcessor()
22952295
if (canUseEvexEncoding())
22962296
{
22972297
codeGen->GetEmitter()->SetUseEvexEncoding(true);
2298-
// TODO-XArch-AVX512 : Revisit other flags to be set once avx512 instructions are added.
2298+
}
2299+
if (canUseApxEncoding())
2300+
{
2301+
codeGen->GetEmitter()->SetUseRex2Encoding(true);
22992302
}
23002303
}
23012304
#endif // TARGET_XARCH

src/coreclr/jit/compiler.h

Lines changed: 48 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9921,6 +9921,17 @@ class Compiler
99219921
return (compOpportunisticallyDependsOn(InstructionSet_EVEX));
99229922
}
99239923

9924+
//------------------------------------------------------------------------
9925+
// canUseRex2Encoding - Answer the question: Is Rex2 encoding supported on this target.
9926+
//
9927+
// Returns:
9928+
// `true` if Rex2 encoding is supported, `false` if not.
9929+
//
9930+
bool canUseApxEncoding() const
9931+
{
9932+
return compOpportunisticallyDependsOn(InstructionSet_APX);
9933+
}
9934+
99249935
private:
99259936
//------------------------------------------------------------------------
99269937
// DoJitStressEvexEncoding- Answer the question: Do we force EVEX encoding.
@@ -9935,7 +9946,7 @@ class Compiler
99359946
// otherwise use VEX encoding but can be EVEX encoded to use EVEX encoding
99369947
// This requires AVX512F, AVX512BW, AVX512CD, AVX512DQ, and AVX512VL support
99379948

9938-
if (JitConfig.JitStressEvexEncoding() && IsBaselineVector512IsaSupportedOpportunistically())
9949+
if (JitStressEvexEncoding() && IsBaselineVector512IsaSupportedOpportunistically())
99399950
{
99409951
assert(compIsaSupportedDebugOnly(InstructionSet_AVX512F));
99419952
assert(compIsaSupportedDebugOnly(InstructionSet_AVX512F_VL));
@@ -9948,14 +9959,49 @@ class Compiler
99489959

99499960
return true;
99509961
}
9951-
else if (JitConfig.JitStressEvexEncoding() && compOpportunisticallyDependsOn(InstructionSet_AVX10v1))
9962+
else if (JitStressEvexEncoding() && compOpportunisticallyDependsOn(InstructionSet_AVX10v1))
9963+
{
9964+
return true;
9965+
}
9966+
#endif // DEBUG
9967+
9968+
return false;
9969+
}
9970+
9971+
//------------------------------------------------------------------------
9972+
// DoJitStressRex2Encoding- Answer the question: Do we force REX2 encoding.
9973+
//
9974+
// Returns:
9975+
// `true` if user requests REX2 encoding.
9976+
//
9977+
bool DoJitStressRex2Encoding() const
9978+
{
9979+
#ifdef DEBUG
9980+
if (JitConfig.JitStressRex2Encoding() && compOpportunisticallyDependsOn(InstructionSet_APX))
99529981
{
9982+
// we should make sure EVEX is also stressed when REX2 is stressed, as we will need to guarantee EGPR
9983+
// functionality is properly turned on for every instructions when REX2 is stress.
99539984
return true;
99549985
}
99559986
#endif // DEBUG
99569987

99579988
return false;
99589989
}
9990+
9991+
//------------------------------------------------------------------------
9992+
// JitStressEvexEncoding- Answer the question: Is Evex stress knob set
9993+
//
9994+
// Returns:
9995+
// `true` if user requests REX2 encoding.
9996+
//
9997+
bool JitStressEvexEncoding() const
9998+
{
9999+
#ifdef DEBUG
10000+
return JitConfig.JitStressEvexEncoding() || JitConfig.JitStressRex2Encoding();
10001+
#endif // DEBUG
10002+
10003+
return false;
10004+
}
995910005
#endif // TARGET_XARCH
996010006

996110007
/*

src/coreclr/jit/emit.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -470,6 +470,7 @@ class emitter
470470
#ifdef TARGET_XARCH
471471
SetUseVEXEncoding(false);
472472
SetUseEvexEncoding(false);
473+
SetUseRex2Encoding(false);
473474
#endif // TARGET_XARCH
474475

475476
emitDataSecCur = nullptr;

0 commit comments

Comments
 (0)