diff --git a/src/coreclr/jit/codegenarm64test.cpp b/src/coreclr/jit/codegenarm64test.cpp index 1485af58c0a5e2..8c964c58e61db8 100644 --- a/src/coreclr/jit/codegenarm64test.cpp +++ b/src/coreclr/jit/codegenarm64test.cpp @@ -6478,6 +6478,384 @@ void CodeGen::genArm64EmitterUnitTestsSve() INS_OPTS_SCALABLE_D); // LDFF1SH {.D }, /Z, [, .D] theEmitter->emitIns_R_R_R_R(INS_sve_ldff1w, EA_SCALABLE, REG_V4, REG_P3, REG_R2, REG_V1, INS_OPTS_SCALABLE_D); // LDFF1W {.D }, /Z, [, .D] + + // IF_SVE_IF_4A + theEmitter->emitIns_R_R_R_R(INS_sve_ldnt1b, EA_SCALABLE, REG_V3, REG_P2, REG_V1, REG_R0, + INS_OPTS_SCALABLE_S); // LDNT1B {.S }, /Z, [.S{, }] + theEmitter->emitIns_R_R_R_R(INS_sve_ldnt1h, EA_SCALABLE, REG_V0, REG_P1, REG_V2, REG_R3, + INS_OPTS_SCALABLE_S); // LDNT1H {.S }, /Z, [.S{, }] + theEmitter->emitIns_R_R_R_R(INS_sve_ldnt1sb, EA_SCALABLE, REG_V2, REG_P3, REG_V5, REG_R4, + INS_OPTS_SCALABLE_S); // LDNT1SB {.S }, /Z, [.S{, }] + theEmitter->emitIns_R_R_R_R(INS_sve_ldnt1sh, EA_SCALABLE, REG_V3, REG_P4, REG_V1, REG_R2, + INS_OPTS_SCALABLE_S); // LDNT1SH {.S }, /Z, [.S{, }] + // REG_ZR can be used due to the optional {, } of the format. + theEmitter->emitIns_R_R_R_R(INS_sve_ldnt1w, EA_SCALABLE, REG_V1, REG_P3, REG_V4, REG_ZR, + INS_OPTS_SCALABLE_S); // LDNT1W {.S }, /Z, [.S{, }] + + // IF_SVE_IF_4A_A + theEmitter->emitIns_R_R_R_R(INS_sve_ldnt1b, EA_SCALABLE, REG_V0, REG_P2, REG_V4, REG_R3, + INS_OPTS_SCALABLE_D); // LDNT1B {.D }, /Z, [.D{, }] + theEmitter->emitIns_R_R_R_R(INS_sve_ldnt1h, EA_SCALABLE, REG_V1, REG_P4, REG_V3, REG_R2, + INS_OPTS_SCALABLE_D); // LDNT1H {.D }, /Z, [.D{, }] + theEmitter->emitIns_R_R_R_R(INS_sve_ldnt1sb, EA_SCALABLE, REG_V2, REG_P3, REG_V4, REG_R5, + INS_OPTS_SCALABLE_D); // LDNT1SB {.D }, /Z, [.D{, }] + theEmitter->emitIns_R_R_R_R(INS_sve_ldnt1sh, EA_SCALABLE, REG_V3, REG_P2, REG_V1, REG_R0, + INS_OPTS_SCALABLE_D); // LDNT1SH {.D }, /Z, [.D{, }] + // REG_ZR can be used due to the optional {, } of the format. + theEmitter->emitIns_R_R_R_R(INS_sve_ldnt1w, EA_SCALABLE, REG_V4, REG_P1, REG_V2, REG_ZR, + INS_OPTS_SCALABLE_D); // LDNT1W {.D }, /Z, [.D{, }] + + // IF_SVE_IG_4A + theEmitter->emitIns_R_R_R_R(INS_sve_ldff1d, EA_SCALABLE, REG_V1, REG_P2, REG_R3, REG_R4, INS_OPTS_SCALABLE_D, + INS_SCALABLE_OPTS_LSL_N); // LDFF1D {.D }, /Z, [{, , LSL #3}] + theEmitter->emitIns_R_R_R_R(INS_sve_ldff1sw, EA_SCALABLE, REG_V2, REG_P3, REG_R4, REG_R5, INS_OPTS_SCALABLE_D, + INS_SCALABLE_OPTS_LSL_N); // LDFF1SW {.D }, /Z, [{, , LSL #2}] + // REG_ZR can be used due to the optional {, , LSL #2}} of the format, though it still requires passing + // INS_SCALABLE_OPTS_LSL_N with it. + theEmitter->emitIns_R_R_R_R(INS_sve_ldff1sw, EA_SCALABLE, REG_V2, REG_P3, REG_R4, REG_ZR, INS_OPTS_SCALABLE_D, + INS_SCALABLE_OPTS_LSL_N); // LDFF1SW {.D }, /Z, [{, , LSL #2}] + + // IF_SVE_IG_4A_D + theEmitter->emitIns_R_R_R_R(INS_sve_ldff1sb, EA_SCALABLE, REG_V1, REG_P0, REG_R2, REG_R4, + INS_OPTS_SCALABLE_H); // LDFF1SB {.H }, /Z, [{, }] + theEmitter->emitIns_R_R_R_R(INS_sve_ldff1sb, EA_SCALABLE, REG_V1, REG_P0, REG_R2, REG_R4, + INS_OPTS_SCALABLE_S); // LDFF1SB {.S }, /Z, [{, }] + theEmitter->emitIns_R_R_R_R(INS_sve_ldff1sb, EA_SCALABLE, REG_V1, REG_P0, REG_R2, REG_R4, + INS_OPTS_SCALABLE_D); // LDFF1SB {.D }, /Z, [{, }] + + // IF_SVE_IG_4A_E + theEmitter->emitIns_R_R_R_R(INS_sve_ldff1b, EA_SCALABLE, REG_V3, REG_P2, REG_R0, REG_R1, + INS_OPTS_SCALABLE_B); // LDFF1B {.B }, /Z, [{, }] + theEmitter->emitIns_R_R_R_R(INS_sve_ldff1b, EA_SCALABLE, REG_V3, REG_P2, REG_R0, REG_R1, + INS_OPTS_SCALABLE_H); // LDFF1B {.H }, /Z, [{, }] + theEmitter->emitIns_R_R_R_R(INS_sve_ldff1b, EA_SCALABLE, REG_V3, REG_P2, REG_R0, REG_R1, + INS_OPTS_SCALABLE_S); // LDFF1B {.S }, /Z, [{, }] + theEmitter->emitIns_R_R_R_R(INS_sve_ldff1b, EA_SCALABLE, REG_V3, REG_P2, REG_R0, REG_R1, + INS_OPTS_SCALABLE_D); // LDFF1B {.D }, /Z, [{, }] + + // IF_SVE_IG_4A_F + theEmitter->emitIns_R_R_R_R(INS_sve_ldff1sh, EA_SCALABLE, REG_V4, REG_P3, REG_R1, REG_R2, INS_OPTS_SCALABLE_S, + INS_SCALABLE_OPTS_LSL_N); // LDFF1SH {.S }, /Z, [{, , LSL #1}] + theEmitter->emitIns_R_R_R_R(INS_sve_ldff1sh, EA_SCALABLE, REG_V4, REG_P3, REG_R1, REG_R2, INS_OPTS_SCALABLE_D, + INS_SCALABLE_OPTS_LSL_N); // LDFF1SH {.D }, /Z, [{, , LSL #1}] + theEmitter->emitIns_R_R_R_R(INS_sve_ldff1w, EA_SCALABLE, REG_V1, REG_P0, REG_R2, REG_R3, INS_OPTS_SCALABLE_S, + INS_SCALABLE_OPTS_LSL_N); // LDFF1W {.S }, /Z, [{, , LSL #2}] + theEmitter->emitIns_R_R_R_R(INS_sve_ldff1w, EA_SCALABLE, REG_V1, REG_P0, REG_R2, REG_R3, INS_OPTS_SCALABLE_D, + INS_SCALABLE_OPTS_LSL_N); // LDFF1W {.D }, /Z, [{, , LSL #2}] + // REG_ZR can be used due to the optional {, , LSL #2}} of the format, though it still requires passing + // INS_SCALABLE_OPTS_LSL_N with it. + theEmitter->emitIns_R_R_R_R(INS_sve_ldff1w, EA_SCALABLE, REG_V1, REG_P0, REG_R2, REG_ZR, INS_OPTS_SCALABLE_D, + INS_SCALABLE_OPTS_LSL_N); // LDFF1W {.D }, /Z, [{, , LSL #2}] + + // IF_SVE_IG_4A_G + theEmitter->emitIns_R_R_R_R(INS_sve_ldff1h, EA_SCALABLE, REG_V3, REG_P1, REG_R4, REG_R0, INS_OPTS_SCALABLE_H, + INS_SCALABLE_OPTS_LSL_N); // LDFF1H {.H }, /Z, [{, , LSL #1}] + theEmitter->emitIns_R_R_R_R(INS_sve_ldff1h, EA_SCALABLE, REG_V3, REG_P1, REG_R4, REG_R0, INS_OPTS_SCALABLE_S, + INS_SCALABLE_OPTS_LSL_N); // LDFF1H {.S }, /Z, [{, , LSL #1}] + theEmitter->emitIns_R_R_R_R(INS_sve_ldff1h, EA_SCALABLE, REG_V3, REG_P1, REG_R4, REG_R0, INS_OPTS_SCALABLE_D, + INS_SCALABLE_OPTS_LSL_N); // LDFF1H {.D }, /Z, [{, , LSL #1}] + // REG_ZR can be used due to the optional {, , LSL #1}} of the format, though it still requires passing + // INS_SCALABLE_OPTS_LSL_N with it. + theEmitter->emitIns_R_R_R_R(INS_sve_ldff1h, EA_SCALABLE, REG_V3, REG_P1, REG_R4, REG_ZR, INS_OPTS_SCALABLE_D, + INS_SCALABLE_OPTS_LSL_N); // LDFF1H {.D }, /Z, [{, , LSL #1}] + + // IF_SVE_II_4A + theEmitter->emitIns_R_R_R_R(INS_sve_ld1d, EA_SCALABLE, REG_V0, REG_P2, REG_R1, REG_R3, INS_OPTS_SCALABLE_D, + INS_SCALABLE_OPTS_LSL_N); // LD1D {.D }, /Z, [, , LSL #3] + + // IF_SVE_II_4A_B + theEmitter->emitIns_R_R_R_R(INS_sve_ld1d, EA_SCALABLE, REG_V1, REG_P0, REG_R3, REG_R4, INS_OPTS_SCALABLE_Q, + INS_SCALABLE_OPTS_LSL_N); // LD1D {.Q }, /Z, [, , LSL #3] + + // IF_SVE_II_4A_H + theEmitter->emitIns_R_R_R_R(INS_sve_ld1w, EA_SCALABLE, REG_V5, REG_P3, REG_R4, REG_R1, INS_OPTS_SCALABLE_S, + INS_SCALABLE_OPTS_LSL_N); // LD1W {.S }, /Z, [, , LSL #2] + theEmitter->emitIns_R_R_R_R(INS_sve_ld1w, EA_SCALABLE, REG_V5, REG_P3, REG_R4, REG_R1, INS_OPTS_SCALABLE_D, + INS_SCALABLE_OPTS_LSL_N); // LD1W {.D }, /Z, [, , LSL #2] + theEmitter->emitIns_R_R_R_R(INS_sve_ld1w, EA_SCALABLE, REG_V5, REG_P3, REG_R4, REG_R1, INS_OPTS_SCALABLE_Q, + INS_SCALABLE_OPTS_LSL_N); // LD1W {.Q }, /Z, [, , LSL #2] + + // IF_SVE_IK_4A + theEmitter->emitIns_R_R_R_R(INS_sve_ld1sw, EA_SCALABLE, REG_V0, REG_P1, REG_R2, REG_R3, INS_OPTS_SCALABLE_D, + INS_SCALABLE_OPTS_LSL_N); // LD1SW {.D }, /Z, [, , LSL #2] + + // IF_SVE_IK_4A_F + theEmitter->emitIns_R_R_R_R(INS_sve_ld1sb, EA_SCALABLE, REG_V1, REG_P2, REG_R3, REG_R4, + INS_OPTS_SCALABLE_H); // LD1SB {.H }, /Z, [, ] + theEmitter->emitIns_R_R_R_R(INS_sve_ld1sb, EA_SCALABLE, REG_V1, REG_P2, REG_R3, REG_R4, + INS_OPTS_SCALABLE_S); // LD1SB {.S }, /Z, [, ] + theEmitter->emitIns_R_R_R_R(INS_sve_ld1sb, EA_SCALABLE, REG_V1, REG_P2, REG_R3, REG_R4, + INS_OPTS_SCALABLE_D); // LD1SB {.D }, /Z, [, ] + + // IF_SVE_IK_4A_G + theEmitter->emitIns_R_R_R_R(INS_sve_ld1sh, EA_SCALABLE, REG_V2, REG_P3, REG_R4, REG_R5, INS_OPTS_SCALABLE_S, + INS_SCALABLE_OPTS_LSL_N); // LD1SH {.S }, /Z, [, , LSL #1] + theEmitter->emitIns_R_R_R_R(INS_sve_ld1sh, EA_SCALABLE, REG_V2, REG_P3, REG_R4, REG_R5, INS_OPTS_SCALABLE_D, + INS_SCALABLE_OPTS_LSL_N); // LD1SH {.D }, /Z, [, , LSL #1] + + // IF_SVE_IK_4A_H + theEmitter->emitIns_R_R_R_R(INS_sve_ld1b, EA_SCALABLE, REG_V3, REG_P4, REG_R5, REG_R6, + INS_OPTS_SCALABLE_B); // LD1B {.B }, /Z, [, ] + theEmitter->emitIns_R_R_R_R(INS_sve_ld1b, EA_SCALABLE, REG_V3, REG_P4, REG_R5, REG_R6, + INS_OPTS_SCALABLE_H); // LD1B {.H }, /Z, [, ] + theEmitter->emitIns_R_R_R_R(INS_sve_ld1b, EA_SCALABLE, REG_V3, REG_P4, REG_R5, REG_R6, + INS_OPTS_SCALABLE_S); // LD1B {.S }, /Z, [, ] + theEmitter->emitIns_R_R_R_R(INS_sve_ld1b, EA_SCALABLE, REG_V3, REG_P4, REG_R5, REG_R6, + INS_OPTS_SCALABLE_D); // LD1B {.D }, /Z, [, ] + + // IF_SVE_IK_4A_I + theEmitter->emitIns_R_R_R_R(INS_sve_ld1h, EA_SCALABLE, REG_V4, REG_P2, REG_R3, REG_R1, INS_OPTS_SCALABLE_H, + INS_SCALABLE_OPTS_LSL_N); // LD1H {.H }, /Z, [, , LSL #1] + theEmitter->emitIns_R_R_R_R(INS_sve_ld1h, EA_SCALABLE, REG_V4, REG_P2, REG_R3, REG_R1, INS_OPTS_SCALABLE_S, + INS_SCALABLE_OPTS_LSL_N); // LD1H {.S }, /Z, [, , LSL #1] + theEmitter->emitIns_R_R_R_R(INS_sve_ld1h, EA_SCALABLE, REG_V4, REG_P2, REG_R3, REG_R1, INS_OPTS_SCALABLE_D, + INS_SCALABLE_OPTS_LSL_N); // LD1H {.D }, /Z, [, , LSL #1] + + // IF_SVE_IN_4A + theEmitter->emitIns_R_R_R_R(INS_sve_ldnt1b, EA_SCALABLE, REG_V4, REG_P2, REG_R1, REG_R3, + INS_OPTS_SCALABLE_B); // LDNT1B {.B }, /Z, [, ] + theEmitter->emitIns_R_R_R_R(INS_sve_ldnt1d, EA_SCALABLE, REG_V0, REG_P1, REG_R2, REG_R4, INS_OPTS_SCALABLE_D, + INS_SCALABLE_OPTS_LSL_N); // LDNT1D {.D }, /Z, [, , LSL #3] + theEmitter->emitIns_R_R_R_R(INS_sve_ldnt1h, EA_SCALABLE, REG_V0, REG_P3, REG_R4, REG_R5, INS_OPTS_SCALABLE_H, + INS_SCALABLE_OPTS_LSL_N); // LDNT1H {.H }, /Z, [, , LSL #1] + theEmitter->emitIns_R_R_R_R(INS_sve_ldnt1w, EA_SCALABLE, REG_V2, REG_P0, REG_R3, REG_R1, INS_OPTS_SCALABLE_S, + INS_SCALABLE_OPTS_LSL_N); // LDNT1W {.S }, /Z, [, , LSL #2] + + // IF_SVE_IP_4A + theEmitter->emitIns_R_R_R_R(INS_sve_ld1rob, EA_SCALABLE, REG_V0, REG_P1, REG_R3, REG_R2, + INS_OPTS_SCALABLE_B); // LD1ROB {.B }, /Z, [, ] + theEmitter->emitIns_R_R_R_R(INS_sve_ld1rod, EA_SCALABLE, REG_V0, REG_P2, REG_R1, REG_R3, INS_OPTS_SCALABLE_D, + INS_SCALABLE_OPTS_LSL_N); // LD1ROD {.D }, /Z, [, , LSL #3] + theEmitter->emitIns_R_R_R_R(INS_sve_ld1roh, EA_SCALABLE, REG_V4, REG_P3, REG_R2, REG_R1, INS_OPTS_SCALABLE_H, + INS_SCALABLE_OPTS_LSL_N); // LD1ROH {.H }, /Z, [, , LSL #1] + theEmitter->emitIns_R_R_R_R(INS_sve_ld1row, EA_SCALABLE, REG_V1, REG_P3, REG_R2, REG_R4, INS_OPTS_SCALABLE_S, + INS_SCALABLE_OPTS_LSL_N); // LD1ROW {.S }, /Z, [, , LSL #2] + theEmitter->emitIns_R_R_R_R(INS_sve_ld1rqb, EA_SCALABLE, REG_V3, REG_P1, REG_R4, REG_R2, + INS_OPTS_SCALABLE_B); // LD1RQB {.B }, /Z, [, ] + theEmitter->emitIns_R_R_R_R(INS_sve_ld1rqd, EA_SCALABLE, REG_V2, REG_P3, REG_R1, REG_R4, INS_OPTS_SCALABLE_D, + INS_SCALABLE_OPTS_LSL_N); // LD1RQD {.D }, /Z, [, , LSL #3] + theEmitter->emitIns_R_R_R_R(INS_sve_ld1rqh, EA_SCALABLE, REG_V1, REG_P2, REG_R3, REG_R4, INS_OPTS_SCALABLE_H, + INS_SCALABLE_OPTS_LSL_N); // LD1RQH {.H }, /Z, [, , LSL #1] + theEmitter->emitIns_R_R_R_R(INS_sve_ld1rqw, EA_SCALABLE, REG_V0, REG_P1, REG_R2, REG_R3, INS_OPTS_SCALABLE_S, + INS_SCALABLE_OPTS_LSL_N); // LD1RQW {.S }, /Z, [, , LSL #2] + + // IF_SVE_IR_4A + theEmitter->emitIns_R_R_R_R(INS_sve_ld2q, EA_SCALABLE, REG_V0, REG_P3, REG_R2, REG_R1, INS_OPTS_SCALABLE_Q, + INS_SCALABLE_OPTS_LSL_N); // LD2Q {.Q, .Q }, /Z, [, , LSL + // #4] + theEmitter->emitIns_R_R_R_R(INS_sve_ld3q, EA_SCALABLE, REG_V3, REG_P4, REG_R1, REG_R2, INS_OPTS_SCALABLE_Q, + INS_SCALABLE_OPTS_LSL_N); // LD3Q {.Q, .Q, .Q }, /Z, [, + // , + // LSL #4] + theEmitter->emitIns_R_R_R_R(INS_sve_ld4q, EA_SCALABLE, REG_V5, REG_P1, REG_R4, REG_R3, INS_OPTS_SCALABLE_Q, + INS_SCALABLE_OPTS_LSL_N); // LD4Q {.Q, .Q, .Q, .Q }, /Z, + // [, , LSL #4] + + // IF_SVE_IT_4A + theEmitter->emitIns_R_R_R_R(INS_sve_ld2b, EA_SCALABLE, REG_V0, REG_P1, REG_R2, REG_R3, + INS_OPTS_SCALABLE_B); // LD2B {.B, .B }, /Z, [, ] + theEmitter->emitIns_R_R_R_R(INS_sve_ld2d, EA_SCALABLE, REG_V7, REG_P6, REG_R5, REG_R4, INS_OPTS_SCALABLE_D, + INS_SCALABLE_OPTS_LSL_N); // LD2D {.D, .D }, /Z, [, , LSL + // #3] + theEmitter->emitIns_R_R_R_R(INS_sve_ld2h, EA_SCALABLE, REG_V8, REG_P5, REG_R9, REG_R10, INS_OPTS_SCALABLE_H, + INS_SCALABLE_OPTS_LSL_N); // LD2H {.H, .H }, /Z, [, , LSL + // #1] + theEmitter->emitIns_R_R_R_R(INS_sve_ld2w, EA_SCALABLE, REG_V6, REG_P5, REG_R4, REG_R7, INS_OPTS_SCALABLE_S, + INS_SCALABLE_OPTS_LSL_N); // LD2W {.S, .S }, /Z, [, , LSL + // #2] + theEmitter->emitIns_R_R_R_R(INS_sve_ld3b, EA_SCALABLE, REG_V1, REG_P0, REG_R3, REG_R2, + INS_OPTS_SCALABLE_B); // LD3B {.B, .B, .B }, /Z, [, ] + theEmitter->emitIns_R_R_R_R(INS_sve_ld3d, EA_SCALABLE, REG_V4, REG_P3, REG_R8, REG_R1, INS_OPTS_SCALABLE_D, + INS_SCALABLE_OPTS_LSL_N); // LD3D {.D, .D, .D }, /Z, [, + // , + // LSL #3] + theEmitter->emitIns_R_R_R_R(INS_sve_ld3h, EA_SCALABLE, REG_V30, REG_P2, REG_R9, REG_R4, INS_OPTS_SCALABLE_H, + INS_SCALABLE_OPTS_LSL_N); // LD3H {.H, .H, .H }, /Z, [, + // , + // LSL #1] + theEmitter->emitIns_R_R_R_R(INS_sve_ld3w, EA_SCALABLE, REG_V1, REG_P3, REG_R2, REG_R4, INS_OPTS_SCALABLE_S, + INS_SCALABLE_OPTS_LSL_N); // LD3W {.S, .S, .S }, /Z, [, + // , + // LSL #2] + theEmitter->emitIns_R_R_R_R(INS_sve_ld4b, EA_SCALABLE, REG_V0, REG_P1, REG_R2, REG_R3, + INS_OPTS_SCALABLE_B); // LD4B {.B, .B, .B, .B }, /Z, + // [, ] + theEmitter->emitIns_R_R_R_R(INS_sve_ld4d, EA_SCALABLE, REG_V0, REG_P3, REG_R2, REG_R1, INS_OPTS_SCALABLE_D, + INS_SCALABLE_OPTS_LSL_N); // LD4D {.D, .D, .D, .D }, /Z, + // [, , LSL #3] + theEmitter->emitIns_R_R_R_R(INS_sve_ld4h, EA_SCALABLE, REG_V13, REG_P6, REG_R5, REG_R4, INS_OPTS_SCALABLE_H, + INS_SCALABLE_OPTS_LSL_N); // LD4H {.H, .H, .H, .H }, /Z, + // [, , LSL #1] + theEmitter->emitIns_R_R_R_R(INS_sve_ld4w, EA_SCALABLE, REG_V10, REG_P3, REG_R2, REG_R5, INS_OPTS_SCALABLE_S, + INS_SCALABLE_OPTS_LSL_N); // LD4W {.S, .S, .S, .S }, /Z, + // [, , LSL #2] + + // IF_SVE_IU_4B + theEmitter->emitIns_R_R_R_R(INS_sve_ld1d, EA_SCALABLE, REG_V0, REG_P4, REG_R3, REG_V2, INS_OPTS_SCALABLE_D, + INS_SCALABLE_OPTS_LSL_N); // LD1D {.D }, /Z, [, .D, LSL #3] + theEmitter->emitIns_R_R_R_R(INS_sve_ld1sw, EA_SCALABLE, REG_V4, REG_P3, REG_R2, REG_V1, INS_OPTS_SCALABLE_D, + INS_SCALABLE_OPTS_LSL_N); // LD1SW {.D }, /Z, [, .D, LSL #2] + theEmitter->emitIns_R_R_R_R(INS_sve_ldff1d, EA_SCALABLE, REG_V5, REG_P6, REG_R7, REG_V8, INS_OPTS_SCALABLE_D, + INS_SCALABLE_OPTS_LSL_N); // LDFF1D {.D }, /Z, [, .D, LSL #3] + theEmitter->emitIns_R_R_R_R(INS_sve_ldff1sw, EA_SCALABLE, REG_V3, REG_P0, REG_R10, REG_V9, INS_OPTS_SCALABLE_D, + INS_SCALABLE_OPTS_LSL_N); // LDFF1SW {.D }, /Z, [, .D, LSL #2] + + // IF_SVE_IU_4B_B + theEmitter->emitIns_R_R_R_R(INS_sve_ld1sw, EA_SCALABLE, REG_V1, REG_P2, REG_R3, REG_V0, + INS_OPTS_SCALABLE_D); // LD1SW {.D }, /Z, [, .D] + theEmitter->emitIns_R_R_R_R(INS_sve_ldff1d, EA_SCALABLE, REG_V2, REG_P6, REG_R5, REG_V4, + INS_OPTS_SCALABLE_D); // LDFF1D {.D }, /Z, [, .D] + theEmitter->emitIns_R_R_R_R(INS_sve_ldff1sw, EA_SCALABLE, REG_V3, REG_P4, REG_R6, REG_V5, + INS_OPTS_SCALABLE_D); // LDFF1SW {.D }, /Z, [, .D] + + // IF_SVE_IU_4B_D + theEmitter->emitIns_R_R_R_R(INS_sve_ld1d, EA_SCALABLE, REG_V0, REG_P1, REG_R3, REG_V4, + INS_OPTS_SCALABLE_D); // LD1D {.D }, /Z, [, .D] + + // IF_SVE_IW_4A + theEmitter->emitIns_R_R_R_R(INS_sve_ld1q, EA_SCALABLE, REG_V0, REG_P1, REG_V2, REG_R3, + INS_OPTS_SCALABLE_Q); // LD1Q {.Q }, /Z, [.D{, }] + // REG_ZR can be used due to the optional {, } of the format. + theEmitter->emitIns_R_R_R_R(INS_sve_ld1q, EA_SCALABLE, REG_V0, REG_P1, REG_V2, REG_ZR, + INS_OPTS_SCALABLE_Q); // LD1Q {.Q }, /Z, [.D{, }] + + // IF_SVE_IX_4A + theEmitter->emitIns_R_R_R_R(INS_sve_ldnt1d, EA_SCALABLE, REG_V4, REG_P2, REG_V1, REG_R3, + INS_OPTS_SCALABLE_D); // LDNT1D {.D }, /Z, [.D{, }] + theEmitter->emitIns_R_R_R_R(INS_sve_ldnt1sw, EA_SCALABLE, REG_V7, REG_P1, REG_V0, REG_R1, + INS_OPTS_SCALABLE_D); // LDNT1SW {.D }, /Z, [.D{, }] + // REG_ZR can be used due to the optional {, } of the format. + theEmitter->emitIns_R_R_R_R(INS_sve_ldnt1sw, EA_SCALABLE, REG_V7, REG_P1, REG_V0, REG_ZR, + INS_OPTS_SCALABLE_D); // LDNT1SW {.D }, /Z, [.D{, }] + + // IF_SVE_IY_4A + theEmitter->emitIns_R_R_R_R(INS_sve_st1q, EA_SCALABLE, REG_V1, REG_P2, REG_V3, REG_R4, + INS_OPTS_SCALABLE_Q); // ST1Q {.Q }, , [.D{, }] + // REG_ZR can be used due to the optional {, } of the format. + theEmitter->emitIns_R_R_R_R(INS_sve_st1q, EA_SCALABLE, REG_V1, REG_P2, REG_V3, REG_ZR, + INS_OPTS_SCALABLE_Q); // ST1Q {.Q }, , [.D{, }] + + // IF_SVE_IZ_4A + theEmitter->emitIns_R_R_R_R(INS_sve_stnt1b, EA_SCALABLE, REG_V0, REG_P2, REG_V3, REG_R4, + INS_OPTS_SCALABLE_S); // STNT1B {.S }, , [.S{, }] + theEmitter->emitIns_R_R_R_R(INS_sve_stnt1h, EA_SCALABLE, REG_V2, REG_P7, REG_V6, REG_R5, + INS_OPTS_SCALABLE_S); // STNT1H {.S }, , [.S{, }] + theEmitter->emitIns_R_R_R_R(INS_sve_stnt1w, EA_SCALABLE, REG_V1, REG_P3, REG_V2, REG_R0, + INS_OPTS_SCALABLE_S); // STNT1W {.S }, , [.S{, }] + // REG_ZR can be used due to the optional {, } of the format. + theEmitter->emitIns_R_R_R_R(INS_sve_stnt1w, EA_SCALABLE, REG_V1, REG_P3, REG_V2, REG_ZR, + INS_OPTS_SCALABLE_S); // STNT1W {.S }, , [.S{, }] + + // IF_SVE_IZ_4A_A + theEmitter->emitIns_R_R_R_R(INS_sve_stnt1b, EA_SCALABLE, REG_V0, REG_P4, REG_V6, REG_R8, + INS_OPTS_SCALABLE_D); // STNT1B {.D }, , [.D{, }] + theEmitter->emitIns_R_R_R_R(INS_sve_stnt1h, EA_SCALABLE, REG_V5, REG_P3, REG_V1, REG_R2, + INS_OPTS_SCALABLE_D); // STNT1H {.D }, , [.D{, }] + theEmitter->emitIns_R_R_R_R(INS_sve_stnt1w, EA_SCALABLE, REG_V3, REG_P1, REG_V2, REG_R0, + INS_OPTS_SCALABLE_D); // STNT1W {.D }, , [.D{, }] + // REG_ZR can be used due to the optional {, } of the format. + theEmitter->emitIns_R_R_R_R(INS_sve_stnt1b, EA_SCALABLE, REG_V0, REG_P4, REG_V6, REG_ZR, + INS_OPTS_SCALABLE_D); // STNT1B {.D }, , [.D{, }] + + // IF_SVE_JA_4A + theEmitter->emitIns_R_R_R_R(INS_sve_stnt1d, EA_SCALABLE, REG_V1, REG_P3, REG_V4, REG_R5, + INS_OPTS_SCALABLE_D); // STNT1D {.D }, , [.D{, }] + // REG_ZR can be used due to the optional {, } of the format. + theEmitter->emitIns_R_R_R_R(INS_sve_stnt1d, EA_SCALABLE, REG_V0, REG_P4, REG_V5, REG_ZR, + INS_OPTS_SCALABLE_D); // STNT1D {.D }, , [.D{, }] + + // IF_SVE_JB_4A + theEmitter->emitIns_R_R_R_R(INS_sve_stnt1b, EA_SCALABLE, REG_V6, REG_P5, REG_R4, REG_R3, + INS_OPTS_SCALABLE_B); // STNT1B {.B }, , [, ] + theEmitter->emitIns_R_R_R_R(INS_sve_stnt1d, EA_SCALABLE, REG_V7, REG_P6, REG_R5, REG_R4, INS_OPTS_SCALABLE_D, + INS_SCALABLE_OPTS_LSL_N); // STNT1D {.D }, , [, , LSL #3] + theEmitter->emitIns_R_R_R_R(INS_sve_stnt1h, EA_SCALABLE, REG_V0, REG_P1, REG_R2, REG_R3, INS_OPTS_SCALABLE_H, + INS_SCALABLE_OPTS_LSL_N); // STNT1H {.H }, , [, , LSL #1] + theEmitter->emitIns_R_R_R_R(INS_sve_stnt1w, EA_SCALABLE, REG_V0, REG_P5, REG_R6, REG_R7, INS_OPTS_SCALABLE_S, + INS_SCALABLE_OPTS_LSL_N); // STNT1W {.S }, , [, , LSL #2] + + // IF_SVE_JC_4A + theEmitter->emitIns_R_R_R_R(INS_sve_st2b, EA_SCALABLE, REG_V0, REG_P1, REG_R2, REG_R4, + INS_OPTS_SCALABLE_B); // ST2B {.B, .B }, , [, ] + theEmitter->emitIns_R_R_R_R(INS_sve_st2d, EA_SCALABLE, REG_V1, REG_P7, REG_R6, REG_R5, INS_OPTS_SCALABLE_D, + INS_SCALABLE_OPTS_LSL_N); // ST2D {.D, .D }, , [, , LSL #3] + theEmitter->emitIns_R_R_R_R(INS_sve_st2h, EA_SCALABLE, REG_V2, REG_P3, REG_R5, REG_R6, INS_OPTS_SCALABLE_H, + INS_SCALABLE_OPTS_LSL_N); // ST2H {.H, .H }, , [, , LSL #1] + theEmitter->emitIns_R_R_R_R(INS_sve_st2w, EA_SCALABLE, REG_V0, REG_P2, REG_R8, REG_R7, INS_OPTS_SCALABLE_S, + INS_SCALABLE_OPTS_LSL_N); // ST2W {.S, .S }, , [, , LSL #2] + theEmitter->emitIns_R_R_R_R(INS_sve_st3b, EA_SCALABLE, REG_V0, REG_P1, REG_R3, REG_R4, + INS_OPTS_SCALABLE_B); // ST3B {.B, .B, .B }, , [, ] + theEmitter->emitIns_R_R_R_R(INS_sve_st3d, EA_SCALABLE, REG_V2, REG_P3, REG_R4, REG_R6, INS_OPTS_SCALABLE_D, + INS_SCALABLE_OPTS_LSL_N); // ST3D {.D, .D, .D }, , [, , + // LSL + // #3] + theEmitter->emitIns_R_R_R_R(INS_sve_st3h, EA_SCALABLE, REG_V1, REG_P0, REG_R3, REG_R8, INS_OPTS_SCALABLE_H, + INS_SCALABLE_OPTS_LSL_N); // ST3H {.H, .H, .H }, , [, , + // LSL + // #1] + theEmitter->emitIns_R_R_R_R(INS_sve_st3w, EA_SCALABLE, REG_V0, REG_P1, REG_R2, REG_R3, INS_OPTS_SCALABLE_S, + INS_SCALABLE_OPTS_LSL_N); // ST3W {.S, .S, .S }, , [, , + // LSL + // #2] + theEmitter->emitIns_R_R_R_R(INS_sve_st4b, EA_SCALABLE, REG_V0, REG_P6, REG_R5, REG_R4, + INS_OPTS_SCALABLE_B); // ST4B {.B, .B, .B, .B }, , [, + // ] + theEmitter->emitIns_R_R_R_R(INS_sve_st4d, EA_SCALABLE, REG_V5, REG_P2, REG_R1, REG_R0, INS_OPTS_SCALABLE_D, + INS_SCALABLE_OPTS_LSL_N); // ST4D {.D, .D, .D, .D }, , + // [, + // , LSL #3] + theEmitter->emitIns_R_R_R_R(INS_sve_st4h, EA_SCALABLE, REG_V1, REG_P0, REG_R9, REG_R8, INS_OPTS_SCALABLE_H, + INS_SCALABLE_OPTS_LSL_N); // ST4H {.H, .H, .H, .H }, , + // [, + // , LSL #1] + theEmitter->emitIns_R_R_R_R(INS_sve_st4w, EA_SCALABLE, REG_V0, REG_P1, REG_R4, REG_R5, INS_OPTS_SCALABLE_S, + INS_SCALABLE_OPTS_LSL_N); // ST4W {.S, .S, .S, .S }, , + // [, + // , LSL #2] + + // IF_SVE_JD_4C + theEmitter->emitIns_R_R_R_R(INS_sve_st1d, EA_SCALABLE, REG_V1, REG_P4, REG_R5, REG_R6, INS_OPTS_SCALABLE_D, + INS_SCALABLE_OPTS_LSL_N); // ST1D {.D }, , [, , LSL #3] + theEmitter->emitIns_R_R_R_R(INS_sve_st1w, EA_SCALABLE, REG_V2, REG_P1, REG_R8, REG_R7, INS_OPTS_SCALABLE_Q, + INS_SCALABLE_OPTS_LSL_N); // ST1W {.Q }, , [, , LSL #2] + + // IF_SVE_JD_4C_A + theEmitter->emitIns_R_R_R_R(INS_sve_st1d, EA_SCALABLE, REG_V3, REG_P5, REG_R6, REG_R1, INS_OPTS_SCALABLE_Q, + INS_SCALABLE_OPTS_LSL_N); // ST1D {.Q }, , [, , LSL #3] + + // IF_SVE_JF_4A + theEmitter->emitIns_R_R_R_R(INS_sve_st2q, EA_SCALABLE, REG_V0, REG_P2, REG_R3, REG_R5, INS_OPTS_SCALABLE_Q, + INS_SCALABLE_OPTS_LSL_N); // ST2Q {.Q, .Q }, , [, , LSL #4] + theEmitter->emitIns_R_R_R_R(INS_sve_st3q, EA_SCALABLE, REG_V1, REG_P4, REG_R2, REG_R8, INS_OPTS_SCALABLE_Q, + INS_SCALABLE_OPTS_LSL_N); // ST3Q {.Q, .Q, .Q }, , [, , + // LSL + // #4] + theEmitter->emitIns_R_R_R_R(INS_sve_st4q, EA_SCALABLE, REG_V4, REG_P1, REG_R8, REG_R2, INS_OPTS_SCALABLE_Q, + INS_SCALABLE_OPTS_LSL_N); // ST4Q {.Q, .Q, .Q, .Q }, , + // [, + // , LSL #4] + + // IF_SVE_JJ_4B + theEmitter->emitIns_R_R_R_R(INS_sve_st1d, EA_SCALABLE, REG_V0, REG_P3, REG_R2, REG_V1, INS_OPTS_SCALABLE_D, + INS_SCALABLE_OPTS_LSL_N); // ST1D {.D }, , [, .D, LSL #3] + theEmitter->emitIns_R_R_R_R(INS_sve_st1h, EA_SCALABLE, REG_V1, REG_P2, REG_R3, REG_V4, INS_OPTS_SCALABLE_D, + INS_SCALABLE_OPTS_LSL_N); // ST1H {.D }, , [, .D, LSL #1] + theEmitter->emitIns_R_R_R_R(INS_sve_st1w, EA_SCALABLE, REG_V2, REG_P3, REG_R4, REG_V5, INS_OPTS_SCALABLE_D, + INS_SCALABLE_OPTS_LSL_N); // ST1W {.D }, , [, .D, LSL #2] + + // IF_SVE_JJ_4B_C + theEmitter->emitIns_R_R_R_R(INS_sve_st1d, EA_SCALABLE, REG_V3, REG_P4, REG_R5, REG_V6, + INS_OPTS_SCALABLE_D); // ST1D {.D }, , [, .D] + + // IF_SVE_JJ_4B_E + theEmitter->emitIns_R_R_R_R(INS_sve_st1h, EA_SCALABLE, REG_V1, REG_P4, REG_R3, REG_V2, + INS_OPTS_SCALABLE_D); // ST1H {.D }, , [, .D] + theEmitter->emitIns_R_R_R_R(INS_sve_st1w, EA_SCALABLE, REG_V3, REG_P5, REG_R1, REG_V0, + INS_OPTS_SCALABLE_D); // ST1W {.D }, , [, .D] + + // IF_SVE_JK_4B + theEmitter->emitIns_R_R_R_R(INS_sve_st1b, EA_SCALABLE, REG_V6, REG_P3, REG_R0, REG_V4, + INS_OPTS_SCALABLE_D); // ST1B {.D }, , [, .D] } #endif // defined(TARGET_ARM64) && defined(DEBUG) diff --git a/src/coreclr/jit/emit.h b/src/coreclr/jit/emit.h index 87a540564245ec..45be55272f661f 100644 --- a/src/coreclr/jit/emit.h +++ b/src/coreclr/jit/emit.h @@ -1837,6 +1837,7 @@ class emitter #define PERFSCORE_THROUGHPUT_ZERO 0.0f // Only used for pseudo-instructions that don't generate code +#define PERFSCORE_THROUGHPUT_9X (1.0f / 9.0f) #define PERFSCORE_THROUGHPUT_6X (1.0f / 6.0f) // Hextuple issue #define PERFSCORE_THROUGHPUT_5X 0.20f // Pentuple issue #define PERFSCORE_THROUGHPUT_4X 0.25f // Quad issue diff --git a/src/coreclr/jit/emitarm64.cpp b/src/coreclr/jit/emitarm64.cpp index ae220e483174af..49ff10255b5ba1 100644 --- a/src/coreclr/jit/emitarm64.cpp +++ b/src/coreclr/jit/emitarm64.cpp @@ -1799,6 +1799,198 @@ void emitter::emitInsSanityCheck(instrDesc* id) assert(isScalableVectorSize(elemsize)); break; + case IF_SVE_IF_4A: // ...........mmmmm ...gggnnnnnttttt -- SVE2 32-bit gather non-temporal load (vector plus + // scalar) + case IF_SVE_IF_4A_A: // ...........mmmmm ...gggnnnnnttttt -- SVE2 32-bit gather non-temporal load (vector plus + // scalar) + elemsize = id->idOpSize(); + assert(insOptsScalableWords(id->idInsOpt())); + assert(isVectorRegister(id->idReg1())); // ttttt + assert(isPredicateRegister(id->idReg2())); // ggg + assert(isVectorRegister(id->idReg3())); // nnnnn + assert(isGeneralRegisterOrZR(id->idReg4())); // mmmmm + assert(isScalableVectorSize(elemsize)); + break; + + case IF_SVE_IG_4A: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous first-fault load (scalar plus scalar) + case IF_SVE_IG_4A_D: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous first-fault load (scalar plus + // scalar) + case IF_SVE_IG_4A_E: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous first-fault load (scalar plus + // scalar) + case IF_SVE_IG_4A_F: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous first-fault load (scalar plus + // scalar) + case IF_SVE_IG_4A_G: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous first-fault load (scalar plus + // scalar) + elemsize = id->idOpSize(); + assert(insOptsScalableStandard(id->idInsOpt())); + assert(isVectorRegister(id->idReg1())); // ttttt + assert(isPredicateRegister(id->idReg2())); // ggg + assert(isGeneralRegister(id->idReg3())); // nnnnn + assert(isGeneralRegisterOrZR(id->idReg4())); // mmmmm + assert(isScalableVectorSize(elemsize)); + break; + + case IF_SVE_II_4A: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous load (quadwords, scalar plus scalar) + elemsize = id->idOpSize(); + assert(id->idInsOpt() == INS_OPTS_SCALABLE_D); + assert(isVectorRegister(id->idReg1())); // ttttt + assert(isPredicateRegister(id->idReg2())); // ggg + assert(isGeneralRegister(id->idReg3())); // nnnnn + assert(isGeneralRegister(id->idReg4())); // mmmmm + assert(isScalableVectorSize(elemsize)); + break; + + case IF_SVE_II_4A_B: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous load (quadwords, scalar plus scalar) + elemsize = id->idOpSize(); + assert(id->idInsOpt() == INS_OPTS_SCALABLE_Q); + assert(isVectorRegister(id->idReg1())); // ttttt + assert(isPredicateRegister(id->idReg2())); // ggg + assert(isGeneralRegister(id->idReg3())); // nnnnn + assert(isGeneralRegister(id->idReg4())); // mmmmm + assert(isScalableVectorSize(elemsize)); + break; + + case IF_SVE_II_4A_H: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous load (quadwords, scalar plus scalar) + elemsize = id->idOpSize(); + assert(insOptsScalableWordsOrQuadwords(id->idInsOpt())); + assert(isVectorRegister(id->idReg1())); // ttttt + assert(isPredicateRegister(id->idReg2())); // ggg + assert(isGeneralRegister(id->idReg3())); // nnnnn + assert(isGeneralRegister(id->idReg4())); // mmmmm + assert(isScalableVectorSize(elemsize)); + break; + + case IF_SVE_IK_4A: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous load (scalar plus scalar) + case IF_SVE_IK_4A_F: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous load (scalar plus scalar) + case IF_SVE_IK_4A_G: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous load (scalar plus scalar) + case IF_SVE_IK_4A_H: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous load (scalar plus scalar) + case IF_SVE_IK_4A_I: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous load (scalar plus scalar) + elemsize = id->idOpSize(); + assert(insOptsScalableStandard(id->idInsOpt())); + assert(isVectorRegister(id->idReg1())); // ttttt + assert(isPredicateRegister(id->idReg2())); // ggg + assert(isGeneralRegister(id->idReg3())); // nnnnn + assert(isGeneralRegister(id->idReg4())); // mmmmm + assert(isScalableVectorSize(elemsize)); + break; + + case IF_SVE_IR_4A: // ...........mmmmm ...gggnnnnnttttt -- SVE load multiple structures (quadwords, scalar plus + // scalar) + elemsize = id->idOpSize(); + assert(id->idInsOpt() == INS_OPTS_SCALABLE_Q); + assert(isVectorRegister(id->idReg1())); // ttttt + assert(isPredicateRegister(id->idReg2())); // ggg + assert(isGeneralRegister(id->idReg3())); // nnnnn + assert(isGeneralRegister(id->idReg4())); // mmmmm + assert(isScalableVectorSize(elemsize)); + break; + + case IF_SVE_IU_4B: // ...........mmmmm ...gggnnnnnttttt -- SVE 64-bit gather load (scalar plus 32-bit unpacked + // scaled offsets) + case IF_SVE_IU_4B_B: // ...........mmmmm ...gggnnnnnttttt -- SVE 64-bit gather load (scalar plus 32-bit unpacked + // scaled offsets) + case IF_SVE_IU_4B_D: // ...........mmmmm ...gggnnnnnttttt -- SVE 64-bit gather load (scalar plus 32-bit unpacked + // scaled offsets) + elemsize = id->idOpSize(); + assert(id->idInsOpt() == INS_OPTS_SCALABLE_D); + assert(isVectorRegister(id->idReg1())); // ttttt + assert(isPredicateRegister(id->idReg2())); // ggg + assert(isGeneralRegister(id->idReg3())); // nnnnn + assert(isVectorRegister(id->idReg4())); // mmmmm + assert(isScalableVectorSize(elemsize)); + break; + + case IF_SVE_IW_4A: // ...........mmmmm ...gggnnnnnttttt -- SVE2 128-bit gather load (vector plus scalar) + case IF_SVE_IY_4A: // ...........mmmmm ...gggnnnnnttttt -- SVE2 128-bit scatter store (vector plus scalar) + elemsize = id->idOpSize(); + assert(id->idInsOpt() == INS_OPTS_SCALABLE_Q); + assert(isVectorRegister(id->idReg1())); // ttttt + assert(isPredicateRegister(id->idReg2())); // ggg + assert(isVectorRegister(id->idReg3())); // nnnnn + assert(isGeneralRegisterOrZR(id->idReg4())); // mmmmm + assert(isScalableVectorSize(elemsize)); + break; + + case IF_SVE_IX_4A: // ...........mmmmm ...gggnnnnnttttt -- SVE2 64-bit gather non-temporal load (vector plus + // scalar) + elemsize = id->idOpSize(); + assert(id->idInsOpt() == INS_OPTS_SCALABLE_D); + assert(isVectorRegister(id->idReg1())); // ttttt + assert(isPredicateRegister(id->idReg2())); // ggg + assert(isVectorRegister(id->idReg3())); // nnnnn + assert(isGeneralRegisterOrZR(id->idReg4())); // mmmmm + assert(isScalableVectorSize(elemsize)); + break; + + case IF_SVE_IZ_4A: // ...........mmmmm ...gggnnnnnttttt -- SVE2 32-bit scatter non-temporal store (vector plus + // scalar) + case IF_SVE_IZ_4A_A: // ...........mmmmm ...gggnnnnnttttt -- SVE2 32-bit scatter non-temporal store (vector plus + // scalar) + case IF_SVE_JA_4A: // ...........mmmmm ...gggnnnnnttttt -- SVE2 64-bit scatter non-temporal store (vector plus + // scalar) + elemsize = id->idOpSize(); + assert(insOptsScalableWords(id->idInsOpt())); + assert(isVectorRegister(id->idReg1())); // ttttt + assert(isPredicateRegister(id->idReg2())); // ggg + assert(isVectorRegister(id->idReg3())); // nnnnn + assert(isGeneralRegisterOrZR(id->idReg4())); // mmmmm + assert(isScalableVectorSize(elemsize)); + break; + + case IF_SVE_JD_4C: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous store (scalar plus scalar) + case IF_SVE_JD_4C_A: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous store (scalar plus scalar) + elemsize = id->idOpSize(); + assert(insOptsScalableDoubleWordsOrQuadword(id->idInsOpt())); + assert(isVectorRegister(id->idReg1())); // ttttt + assert(isPredicateRegister(id->idReg2())); // ggg + assert(isGeneralRegister(id->idReg3())); // nnnnn + assert(isGeneralRegister(id->idReg4())); // mmmmm + assert(isScalableVectorSize(elemsize)); + break; + + case IF_SVE_JF_4A: // ...........mmmmm ...gggnnnnnttttt -- SVE store multiple structures (quadwords, scalar plus + // scalar) + elemsize = id->idOpSize(); + assert(id->idInsOpt() == INS_OPTS_SCALABLE_Q); + assert(isVectorRegister(id->idReg1())); // ttttt + assert(isPredicateRegister(id->idReg2())); // ggg + assert(isGeneralRegister(id->idReg3())); // nnnnn + assert(isGeneralRegister(id->idReg4())); // mmmmm + assert(isScalableVectorSize(elemsize)); + break; + + case IF_SVE_IN_4A: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous non-temporal load (scalar plus scalar) + case IF_SVE_IP_4A: // ...........mmmmm ...gggnnnnnttttt -- SVE load and broadcast quadword (scalar plus scalar) + case IF_SVE_IT_4A: // ...........mmmmm ...gggnnnnnttttt -- SVE load multiple structures (scalar plus scalar) + case IF_SVE_JB_4A: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous non-temporal store (scalar plus + // scalar) + case IF_SVE_JC_4A: // ...........mmmmm ...gggnnnnnttttt -- SVE store multiple structures (scalar plus scalar) + elemsize = id->idOpSize(); + assert(insOptsScalableStandard(id->idInsOpt())); + assert(isVectorRegister(id->idReg1())); // ttttt + assert(isPredicateRegister(id->idReg2())); // ggg + assert(isGeneralRegister(id->idReg3())); // nnnnn + assert(isGeneralRegister(id->idReg4())); // mmmmm + assert(isScalableVectorSize(elemsize)); + break; + + case IF_SVE_JJ_4B: // ...........mmmmm ...gggnnnnnttttt -- SVE 64-bit scatter store (scalar plus 64-bit scaled + // offsets) + case IF_SVE_JJ_4B_C: // ...........mmmmm ...gggnnnnnttttt -- SVE 64-bit scatter store (scalar plus 64-bit scaled + // offsets) + case IF_SVE_JJ_4B_E: // ...........mmmmm ...gggnnnnnttttt -- SVE 64-bit scatter store (scalar plus 64-bit scaled + // offsets) + case IF_SVE_JK_4B: // ...........mmmmm ...gggnnnnnttttt -- SVE 64-bit scatter store (scalar plus 64-bit unscaled + // offsets) + elemsize = id->idOpSize(); + assert(id->idInsOpt() == INS_OPTS_SCALABLE_D); + assert(isVectorRegister(id->idReg1())); // ttttt + assert(isPredicateRegister(id->idReg2())); // ggg + assert(isGeneralRegister(id->idReg3())); // nnnnn + assert(isVectorRegister(id->idReg4())); // mmmmm + assert(isScalableVectorSize(elemsize)); + break; + default: printf("unexpected format %s\n", emitIfName(id->idInsFmt())); assert(!"Unexpected format"); @@ -11613,6 +11805,7 @@ void emitter::emitIns_R_R_R_R(instruction ins, assert(isGeneralRegister(reg2)); assert(isGeneralRegister(reg3)); assert(isGeneralRegister(reg4)); + assert(insScalableOptsNone(sopt)); fmt = IF_DR_4A; break; @@ -11626,6 +11819,7 @@ void emitter::emitIns_R_R_R_R(instruction ins, assert(isVectorRegister(reg2)); assert(isVectorRegister(reg3)); assert(isVectorRegister(reg4)); + assert(insScalableOptsNone(sopt)); fmt = IF_DV_4A; break; @@ -11633,6 +11827,48 @@ void emitter::emitIns_R_R_R_R(instruction ins, fmt = IF_NONE; break; + // Fallback handles emitting the SVE instructions. + default: + return emitInsSve_R_R_R_R(ins, attr, reg1, reg2, reg3, reg4, opt, sopt); + } + assert(fmt != IF_NONE); + + instrDesc* id = emitNewInstr(attr); + + id->idIns(ins); + id->idInsFmt(fmt); + id->idInsOpt(opt); + + id->idReg1(reg1); + id->idReg2(reg2); + id->idReg3(reg3); + id->idReg4(reg4); + + dispIns(id); + appendToCurIG(id); +} + +/***************************************************************************** + * + * Add a SVE instruction referencing four registers. + * Do not call this directly. Use 'emitIns_R_R_R_R' instead. + */ + +void emitter::emitInsSve_R_R_R_R(instruction ins, + emitAttr attr, + regNumber reg1, + regNumber reg2, + regNumber reg3, + regNumber reg4, + insOpts opt /* = INS_OPT_NONE*/, + insScalableOpts sopt /* = INS_SCALABLE_OPTS_NONE */) +{ + emitAttr size = EA_SIZE(attr); + insFormat fmt = IF_NONE; + + /* Figure out the encoding format of the instruction */ + switch (ins) + { case INS_sve_cmpeq: case INS_sve_cmpgt: case INS_sve_cmpge: @@ -11755,10 +11991,18 @@ void emitter::emitIns_R_R_R_R(instruction ins, assert(isGeneralRegister(reg3)); assert(isScalableVectorSize(size)); assert(insScalableOptsNone(sopt)); + if (insOptsScalableStandard(opt)) { - assert(isGeneralRegister(reg4)); - fmt = IF_SVE_JD_4A; + if (isGeneralRegister(reg4)) + { + fmt = IF_SVE_JD_4A; + } + else + { + assert(isVectorRegister(reg4)); + fmt = IF_SVE_JK_4B; + } } else { @@ -11787,13 +12031,30 @@ void emitter::emitIns_R_R_R_R(instruction ins, assert(isPredicateRegister(reg2)); assert(isGeneralRegister(reg3)); assert(isScalableVectorSize(size)); + if (insOptsScalableStandard(opt)) { - // st1h is reserved for scalable B - assert((ins == INS_sve_st1h) ? insOptsScalableAtLeastHalf(opt) : true); - assert(isGeneralRegister(reg4)); - assert(sopt == INS_SCALABLE_OPTS_LSL_N); - fmt = IF_SVE_JD_4A; + if (sopt == INS_SCALABLE_OPTS_LSL_N) + { + if (isGeneralRegister(reg4)) + { + // st1h is reserved for scalable B + assert((ins == INS_sve_st1h) ? insOptsScalableAtLeastHalf(opt) : true); + assert(sopt == INS_SCALABLE_OPTS_LSL_N); + fmt = IF_SVE_JD_4A; + } + else + { + assert(isVectorRegister(reg4)); + fmt = IF_SVE_JJ_4B; + } + } + else + { + assert(isVectorRegister(reg4)); + assert(insScalableOptsNone(sopt)); + fmt = IF_SVE_JJ_4B_E; + } } else { @@ -11838,15 +12099,38 @@ void emitter::emitIns_R_R_R_R(instruction ins, assert(isPredicateRegister(reg2)); assert(isGeneralRegister(reg3)); assert(isScalableVectorSize(size)); + if (insOptsScalableStandard(opt)) + { + if (sopt == INS_SCALABLE_OPTS_LSL_N) + { + if (isGeneralRegister(reg4)) + { + fmt = IF_SVE_JD_4B; + } + else + { + assert(isVectorRegister(reg4)); + fmt = IF_SVE_JJ_4B; + } + } + else + { + assert(isVectorRegister(reg4)); + assert(insScalableOptsNone(sopt)); + fmt = IF_SVE_JJ_4B_E; + } + } + else if (opt == INS_OPTS_SCALABLE_Q) { assert(isGeneralRegister(reg4)); assert(sopt == INS_SCALABLE_OPTS_LSL_N); - fmt = IF_SVE_JD_4B; + fmt = IF_SVE_JD_4C; } else { assert(insOptsScalable32bitExtends(opt)); + assert(isVectorRegister(reg4)); switch (opt) { case INS_OPTS_SCALABLE_S_UXTW: @@ -11883,29 +12167,63 @@ void emitter::emitIns_R_R_R_R(instruction ins, break; case INS_sve_st1d: - assert(insOptsScalable32bitExtends(opt)); assert(isVectorRegister(reg1)); assert(isPredicateRegister(reg2)); assert(isGeneralRegister(reg3)); assert(isScalableVectorSize(size)); - switch (opt) + + if (isGeneralRegister(reg4)) { - case INS_OPTS_SCALABLE_D_UXTW: - case INS_OPTS_SCALABLE_D_SXTW: - if (sopt == INS_SCALABLE_OPTS_MOD_N) + assert(sopt == INS_SCALABLE_OPTS_LSL_N); + if (opt == INS_OPTS_SCALABLE_Q) + { + fmt = IF_SVE_JD_4C_A; + } + else + { + assert(opt == INS_OPTS_SCALABLE_D); + fmt = IF_SVE_JD_4C; + } + } + else + { + assert(isVectorRegister(reg4)); + + if (opt == INS_OPTS_SCALABLE_D) + { + if (sopt == INS_SCALABLE_OPTS_LSL_N) { - fmt = IF_SVE_JJ_4A; + fmt = IF_SVE_JJ_4B; } else { assert(insScalableOptsNone(sopt)); - fmt = IF_SVE_JJ_4A_B; + fmt = IF_SVE_JJ_4B_C; } - break; + } + else + { + assert(insOptsScalable32bitExtends(opt)); + switch (opt) + { + case INS_OPTS_SCALABLE_D_UXTW: + case INS_OPTS_SCALABLE_D_SXTW: + if (sopt == INS_SCALABLE_OPTS_MOD_N) + { + fmt = IF_SVE_JJ_4A; + } + else + { + assert(insScalableOptsNone(sopt)); + fmt = IF_SVE_JJ_4A_B; + } + break; - default: - assert(!"Invalid options for scalable"); - break; + default: + assert(!"Invalid options for scalable"); + break; + } + } } break; @@ -11916,22 +12234,55 @@ void emitter::emitIns_R_R_R_R(instruction ins, assert(isVectorRegister(reg1)); assert(isPredicateRegister(reg2)); assert(isGeneralRegister(reg3)); - assert(isVectorRegister(reg4)); assert(isScalableVectorSize(size)); assert(insScalableOptsNone(sopt)); - if (insOptsScalableDoubleWord32bitExtends(opt)) - { - fmt = IF_SVE_HW_4A; - } - else if (insOptsScalableSingleWord32bitExtends(opt)) + if (isGeneralRegisterOrZR(reg4)) { - fmt = IF_SVE_HW_4A_A; + switch (ins) + { + case INS_sve_ldff1b: + assert(insOptsScalableStandard(opt)); + fmt = IF_SVE_IG_4A_E; + break; + + case INS_sve_ldff1sb: + assert(insOptsScalableAtLeastHalf(opt)); + fmt = IF_SVE_IG_4A_D; + break; + + case INS_sve_ld1sb: + assert(insOptsScalableAtLeastHalf(opt)); + fmt = IF_SVE_IK_4A_F; + break; + + case INS_sve_ld1b: + assert(insOptsScalableStandard(opt)); + fmt = IF_SVE_IK_4A_H; + break; + + default: + assert(!"Invalid instruction"); + break; + } } else { - assert(opt == INS_OPTS_SCALABLE_D); - fmt = IF_SVE_HW_4B; + assert(isVectorRegister(reg4)); + + if (insOptsScalableDoubleWord32bitExtends(opt)) + { + fmt = IF_SVE_HW_4A; + } + else if (insOptsScalableSingleWord32bitExtends(opt)) + { + fmt = IF_SVE_HW_4A_A; + } + else + { + assert(opt == INS_OPTS_SCALABLE_D); + fmt = IF_SVE_HW_4B; + } } break; @@ -11944,324 +12295,599 @@ void emitter::emitIns_R_R_R_R(instruction ins, assert(isVectorRegister(reg1)); assert(isPredicateRegister(reg2)); assert(isGeneralRegister(reg3)); - assert(isVectorRegister(reg4)); assert(isScalableVectorSize(size)); - if (insOptsScalableDoubleWord32bitExtends(opt)) - { - if (sopt == INS_SCALABLE_OPTS_MOD_N) - { - fmt = IF_SVE_HW_4A_A; - } - else - { - assert(insScalableOptsNone(sopt)); - fmt = IF_SVE_HW_4A_B; - } - } - else if (insOptsScalableSingleWord32bitExtends(opt)) + if (isGeneralRegisterOrZR(reg4)) { - if (sopt == INS_SCALABLE_OPTS_MOD_N) + assert(sopt == INS_SCALABLE_OPTS_LSL_N); + + switch (ins) { - fmt = IF_SVE_HW_4A; - } - else - { - assert(insScalableOptsNone(sopt)); - fmt = IF_SVE_HW_4A_C; + case INS_sve_ldff1h: + assert(insOptsScalableStandard(opt)); + fmt = IF_SVE_IG_4A_G; + break; + + case INS_sve_ldff1sh: + case INS_sve_ldff1w: + assert(insOptsScalableWords(opt)); + fmt = IF_SVE_IG_4A_F; + break; + + case INS_sve_ld1w: + assert(insOptsScalableWordsOrQuadwords(opt)); + fmt = IF_SVE_II_4A_H; + break; + + case INS_sve_ld1sh: + assert(insOptsScalableWords(opt)); + fmt = IF_SVE_IK_4A_G; + break; + + case INS_sve_ld1h: + assert(insOptsScalableAtLeastHalf(opt)); + fmt = IF_SVE_IK_4A_I; + break; + + default: + assert(!"Invalid instruction"); + break; } } else { - assert(opt == INS_OPTS_SCALABLE_D); - if (sopt == INS_SCALABLE_OPTS_LSL_N) + assert(isVectorRegister(reg4)); + + if (insOptsScalableDoubleWord32bitExtends(opt)) { - fmt = IF_SVE_HW_4B; + if (sopt == INS_SCALABLE_OPTS_MOD_N) + { + fmt = IF_SVE_HW_4A_A; + } + else + { + assert(insScalableOptsNone(sopt)); + fmt = IF_SVE_HW_4A_B; + } + } + else if (insOptsScalableSingleWord32bitExtends(opt)) + { + if (sopt == INS_SCALABLE_OPTS_MOD_N) + { + fmt = IF_SVE_HW_4A; + } + else + { + assert(insScalableOptsNone(sopt)); + fmt = IF_SVE_HW_4A_C; + } } else { - assert(insScalableOptsNone(sopt)); - fmt = IF_SVE_HW_4B_D; + assert(opt == INS_OPTS_SCALABLE_D); + if (sopt == INS_SCALABLE_OPTS_LSL_N) + { + fmt = IF_SVE_HW_4B; + } + else + { + assert(insScalableOptsNone(sopt)); + fmt = IF_SVE_HW_4B_D; + } } } break; - case INS_sve_ldff1sw: - case INS_sve_ldff1d: case INS_sve_ld1d: case INS_sve_ld1sw: - assert(insOptsScalableDoubleWord32bitExtends(opt)); + case INS_sve_ldff1d: + case INS_sve_ldff1sw: assert(isVectorRegister(reg1)); assert(isPredicateRegister(reg2)); assert(isGeneralRegister(reg3)); - assert(isVectorRegister(reg4)); assert(isScalableVectorSize(size)); - if (sopt == INS_SCALABLE_OPTS_MOD_N) + if (isGeneralRegisterOrZR(reg4)) + { + assert(sopt == INS_SCALABLE_OPTS_LSL_N); + + if (opt == INS_OPTS_SCALABLE_Q) + { + assert(reg4 != REG_ZR); + assert(ins == INS_sve_ld1d); + fmt = IF_SVE_II_4A_B; + } + else + { + assert(opt == INS_OPTS_SCALABLE_D); + + switch (ins) + { + case INS_sve_ldff1d: + case INS_sve_ldff1sw: + fmt = IF_SVE_IG_4A; + break; + + case INS_sve_ld1d: + assert(reg4 != REG_ZR); + fmt = IF_SVE_II_4A; + break; + + case INS_sve_ld1sw: + assert(reg4 != REG_ZR); + fmt = IF_SVE_IK_4A; + break; + + default: + assert(!"Invalid instruction"); + break; + } + } + } + else if (insOptsScalableDoubleWord32bitExtends(opt)) + { + assert(isVectorRegister(reg4)); + + if (sopt == INS_SCALABLE_OPTS_MOD_N) + { + fmt = IF_SVE_IU_4A; + } + else + { + assert(insScalableOptsNone(sopt)); + + if (ins == INS_sve_ld1d) + { + fmt = IF_SVE_IU_4A_C; + } + else + { + fmt = IF_SVE_IU_4A_A; + } + } + } + else if (sopt == INS_SCALABLE_OPTS_LSL_N) { - fmt = IF_SVE_IU_4A; + assert(isVectorRegister(reg4)); + assert(opt == INS_OPTS_SCALABLE_D); + fmt = IF_SVE_IU_4B; } else { + assert(isVectorRegister(reg4)); + assert(opt == INS_OPTS_SCALABLE_D); assert(insScalableOptsNone(sopt)); + if (ins == INS_sve_ld1d) { - fmt = IF_SVE_IU_4A_C; + fmt = IF_SVE_IU_4B_D; } else { - fmt = IF_SVE_IU_4A_A; + fmt = IF_SVE_IU_4B_B; } } break; - default: - unreached(); - break; - } - assert(fmt != IF_NONE); + case INS_sve_ldnt1b: + case INS_sve_ldnt1h: + case INS_sve_ldnt1w: + case INS_sve_ldnt1d: + case INS_sve_ldnt1sb: + case INS_sve_ldnt1sh: + case INS_sve_ldnt1sw: + assert(isVectorRegister(reg1)); + assert(isPredicateRegister(reg2)); + assert(isScalableVectorSize(size)); - // Use aliases. - switch (ins) - { - case INS_sve_cmple: - std::swap(reg3, reg4); - ins = INS_sve_cmpge; - break; - case INS_sve_cmplo: - std::swap(reg3, reg4); - ins = INS_sve_cmphi; - break; - case INS_sve_cmpls: - std::swap(reg3, reg4); - ins = INS_sve_cmphs; - break; - case INS_sve_cmplt: - std::swap(reg3, reg4); - ins = INS_sve_cmpgt; - break; - case INS_sve_facle: - std::swap(reg3, reg4); - ins = INS_sve_facge; - break; - case INS_sve_faclt: - std::swap(reg3, reg4); - ins = INS_sve_facgt; - break; - case INS_sve_fcmle: - std::swap(reg3, reg4); - ins = INS_sve_fcmge; - break; - case INS_sve_fcmlt: - std::swap(reg3, reg4); - ins = INS_sve_fcmgt; - break; - default: - break; - } + if (isGeneralRegister(reg3)) + { + assert(isGeneralRegister(reg4)); - instrDesc* id = emitNewInstr(attr); +#ifdef DEBUG + switch (ins) + { + case INS_sve_ldnt1b: + assert(opt == INS_OPTS_SCALABLE_B); + assert(insScalableOptsNone(sopt)); + break; - id->idIns(ins); - id->idInsFmt(fmt); - id->idInsOpt(opt); + case INS_sve_ldnt1h: + assert(opt == INS_OPTS_SCALABLE_H); + assert(sopt == INS_SCALABLE_OPTS_LSL_N); + break; - id->idReg1(reg1); - id->idReg2(reg2); - id->idReg3(reg3); - id->idReg4(reg4); + case INS_sve_ldnt1w: + assert(opt == INS_OPTS_SCALABLE_S); + assert(sopt == INS_SCALABLE_OPTS_LSL_N); + break; - dispIns(id); - appendToCurIG(id); -} + case INS_sve_ldnt1d: + assert(opt == INS_OPTS_SCALABLE_D); + assert(sopt == INS_SCALABLE_OPTS_LSL_N); + break; -/***************************************************************************** - * - * Add an instruction referencing a register and a condition code - */ + default: + assert(!"Invalid instruction"); + break; + } +#endif // DEBUG -void emitter::emitIns_R_COND(instruction ins, emitAttr attr, regNumber reg, insCond cond) -{ - insFormat fmt = IF_NONE; - condFlagsImm cfi; - cfi.immCFVal = 0; + fmt = IF_SVE_IN_4A; + } + else if ((ins == INS_sve_ldnt1d) || (ins == INS_sve_ldnt1sw)) + { + assert(insOptsScalableWords(opt)); + assert(isVectorRegister(reg3)); + assert(isGeneralRegisterOrZR(reg4)); + assert(insScalableOptsNone(sopt)); + assert(opt == INS_OPTS_SCALABLE_D); + fmt = IF_SVE_IX_4A; + } + else + { + assert(insOptsScalableWords(opt)); + assert(isVectorRegister(reg3)); + assert(isGeneralRegisterOrZR(reg4)); + assert(insScalableOptsNone(sopt)); - /* Figure out the encoding format of the instruction */ - switch (ins) - { - case INS_cset: - case INS_csetm: - assert(isGeneralRegister(reg)); - cfi.cond = cond; - fmt = IF_DR_1D; + if (opt == INS_OPTS_SCALABLE_S) + { + fmt = IF_SVE_IF_4A; + } + else + { + assert(opt == INS_OPTS_SCALABLE_D); + fmt = IF_SVE_IF_4A_A; + } + } break; - default: - unreached(); - break; + case INS_sve_ld1rob: + case INS_sve_ld1roh: + case INS_sve_ld1row: + case INS_sve_ld1rod: + case INS_sve_ld1rqb: + case INS_sve_ld1rqh: + case INS_sve_ld1rqw: + case INS_sve_ld1rqd: + assert(isVectorRegister(reg1)); + assert(isPredicateRegister(reg2)); + assert(isGeneralRegister(reg3)); + assert(isGeneralRegister(reg4)); + assert(isScalableVectorSize(size)); - } // end switch (ins) +#ifdef DEBUG + switch (ins) + { + case INS_sve_ld1rob: + case INS_sve_ld1rqb: + assert(opt == INS_OPTS_SCALABLE_B); + assert(insScalableOptsNone(sopt)); + break; - assert(fmt != IF_NONE); - assert(isValidImmCond(cfi.immCFVal)); + case INS_sve_ld1roh: + case INS_sve_ld1rqh: + assert(opt == INS_OPTS_SCALABLE_H); + assert(sopt == INS_SCALABLE_OPTS_LSL_N); + break; - instrDesc* id = emitNewInstrSC(attr, cfi.immCFVal); + case INS_sve_ld1row: + case INS_sve_ld1rqw: + assert(opt == INS_OPTS_SCALABLE_S); + assert(sopt == INS_SCALABLE_OPTS_LSL_N); + break; - id->idIns(ins); - id->idInsFmt(fmt); - id->idInsOpt(INS_OPTS_NONE); + case INS_sve_ld1rod: + case INS_sve_ld1rqd: + assert(opt == INS_OPTS_SCALABLE_D); + assert(sopt == INS_SCALABLE_OPTS_LSL_N); + break; - id->idReg1(reg); + default: + assert(!"Invalid instruction"); + break; + } +#endif // DEBUG - dispIns(id); - appendToCurIG(id); -} + fmt = IF_SVE_IP_4A; + break; -/***************************************************************************** - * - * Add an instruction referencing two registers and a condition code - */ - -void emitter::emitIns_R_R_COND(instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, insCond cond) -{ - insFormat fmt = IF_NONE; - condFlagsImm cfi; - cfi.immCFVal = 0; - - /* Figure out the encoding format of the instruction */ - switch (ins) - { - case INS_cinc: - case INS_cinv: - case INS_cneg: - assert(isGeneralRegister(reg1)); - assert(isGeneralRegisterOrZR(reg2)); - cfi.cond = cond; - fmt = IF_DR_2D; - break; - default: - unreached(); + case INS_sve_ld1q: + assert(isVectorRegister(reg1)); + assert(isPredicateRegister(reg2)); + assert(isVectorRegister(reg3)); + assert(isGeneralRegisterOrZR(reg4)); + assert(isScalableVectorSize(size)); + assert(opt == INS_OPTS_SCALABLE_Q); + assert(insScalableOptsNone(sopt)); + fmt = IF_SVE_IW_4A; break; - } // end switch (ins) - - assert(fmt != IF_NONE); - assert(isValidImmCond(cfi.immCFVal)); + case INS_sve_ld2q: + case INS_sve_ld3q: + case INS_sve_ld4q: + assert(isVectorRegister(reg1)); + assert(isPredicateRegister(reg2)); + assert(isGeneralRegister(reg3)); + assert(isGeneralRegister(reg4)); + assert(isScalableVectorSize(size)); + assert(opt == INS_OPTS_SCALABLE_Q); + assert(sopt == INS_SCALABLE_OPTS_LSL_N); + fmt = IF_SVE_IR_4A; + break; - instrDesc* id = emitNewInstrSC(attr, cfi.immCFVal); + case INS_sve_ld2b: + case INS_sve_ld3b: + case INS_sve_ld4b: + case INS_sve_ld2h: + case INS_sve_ld3h: + case INS_sve_ld4h: + case INS_sve_ld2w: + case INS_sve_ld3w: + case INS_sve_ld4w: + case INS_sve_ld2d: + case INS_sve_ld3d: + case INS_sve_ld4d: + assert(isVectorRegister(reg1)); + assert(isPredicateRegister(reg2)); + assert(isGeneralRegister(reg3)); + assert(isGeneralRegister(reg4)); + assert(isScalableVectorSize(size)); - id->idIns(ins); - id->idInsFmt(fmt); - id->idInsOpt(INS_OPTS_NONE); +#ifdef DEBUG + switch (ins) + { + case INS_sve_ld2b: + case INS_sve_ld3b: + case INS_sve_ld4b: + assert(opt == INS_OPTS_SCALABLE_B); + assert(insScalableOptsNone(sopt)); + break; - id->idReg1(reg1); - id->idReg2(reg2); + case INS_sve_ld2h: + case INS_sve_ld3h: + case INS_sve_ld4h: + assert(opt == INS_OPTS_SCALABLE_H); + assert(sopt == INS_SCALABLE_OPTS_LSL_N); + break; - dispIns(id); - appendToCurIG(id); -} + case INS_sve_ld2w: + case INS_sve_ld3w: + case INS_sve_ld4w: + assert(opt == INS_OPTS_SCALABLE_S); + assert(sopt == INS_SCALABLE_OPTS_LSL_N); + break; -/***************************************************************************** - * - * Add an instruction referencing two registers and a condition code - */ + case INS_sve_ld2d: + case INS_sve_ld3d: + case INS_sve_ld4d: + assert(opt == INS_OPTS_SCALABLE_D); + assert(sopt == INS_SCALABLE_OPTS_LSL_N); + break; -void emitter::emitIns_R_R_R_COND( - instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, regNumber reg3, insCond cond) -{ - insFormat fmt = IF_NONE; - condFlagsImm cfi; - cfi.immCFVal = 0; + default: + assert(!"Invalid instruction"); + break; + } +#endif // DEBUG - /* Figure out the encoding format of the instruction */ - switch (ins) - { - case INS_csel: - case INS_csinc: - case INS_csinv: - case INS_csneg: - assert(isGeneralRegister(reg1)); - assert(isGeneralRegisterOrZR(reg2)); - assert(isGeneralRegisterOrZR(reg3)); - cfi.cond = cond; - fmt = IF_DR_3D; + fmt = IF_SVE_IT_4A; break; - default: - unreached(); + case INS_sve_st1q: + assert(isVectorRegister(reg1)); + assert(isPredicateRegister(reg2)); + assert(isVectorRegister(reg3)); + assert(isGeneralRegisterOrZR(reg4)); + assert(isScalableVectorSize(size)); + assert(opt == INS_OPTS_SCALABLE_Q); + assert(insScalableOptsNone(sopt)); + fmt = IF_SVE_IY_4A; break; - } // end switch (ins) - - assert(fmt != IF_NONE); - assert(isValidImmCond(cfi.immCFVal)); - - instrDesc* id = emitNewInstr(attr); + case INS_sve_stnt1b: + case INS_sve_stnt1h: + case INS_sve_stnt1w: + case INS_sve_stnt1d: + assert(isVectorRegister(reg1)); + assert(isPredicateRegister(reg2)); + assert(isScalableVectorSize(size)); - id->idIns(ins); - id->idInsFmt(fmt); - id->idInsOpt(INS_OPTS_NONE); + if (isGeneralRegister(reg3)) + { + assert(isGeneralRegister(reg4)); +#ifdef DEBUG + switch (ins) + { + case INS_sve_stnt1b: + assert(opt == INS_OPTS_SCALABLE_B); + assert(insScalableOptsNone(sopt)); + break; - id->idReg1(reg1); - id->idReg2(reg2); - id->idReg3(reg3); - id->idSmallCns(cfi.immCFVal); + case INS_sve_stnt1h: + assert(opt == INS_OPTS_SCALABLE_H); + assert(sopt == INS_SCALABLE_OPTS_LSL_N); + break; - dispIns(id); - appendToCurIG(id); -} + case INS_sve_stnt1w: + assert(opt == INS_OPTS_SCALABLE_S); + assert(sopt == INS_SCALABLE_OPTS_LSL_N); + break; -/***************************************************************************** - * - * Add an instruction referencing two registers the flags and a condition code - */ + case INS_sve_stnt1d: + assert(opt == INS_OPTS_SCALABLE_D); + assert(sopt == INS_SCALABLE_OPTS_LSL_N); + break; -void emitter::emitIns_R_R_FLAGS_COND( - instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, insCflags flags, insCond cond) -{ - insFormat fmt = IF_NONE; - condFlagsImm cfi; - cfi.immCFVal = 0; + default: + assert(!"Invalid instruction"); + break; + } +#endif // DEBUG + fmt = IF_SVE_JB_4A; + } + else + { + assert(isVectorRegister(reg3)); + assert(isGeneralRegisterOrZR(reg4)); + assert(isScalableVectorSize(size)); + assert(insScalableOptsNone(sopt)); - /* Figure out the encoding format of the instruction */ - switch (ins) - { - case INS_ccmp: - case INS_ccmn: - assert(isGeneralRegister(reg1)); - assert(isGeneralRegister(reg2)); - cfi.flags = flags; - cfi.cond = cond; - fmt = IF_DR_2I; - break; - default: - unreached(); + if (opt == INS_OPTS_SCALABLE_S) + { + fmt = IF_SVE_IZ_4A; + } + else + { + assert(opt == INS_OPTS_SCALABLE_D); + if (ins == INS_sve_stnt1d) + { + fmt = IF_SVE_JA_4A; + } + else + { + fmt = IF_SVE_IZ_4A_A; + } + } + } break; - } // end switch (ins) - assert(fmt != IF_NONE); - assert(isValidImmCondFlags(cfi.immCFVal)); + case INS_sve_st2b: + case INS_sve_st3b: + case INS_sve_st4b: + case INS_sve_st2h: + case INS_sve_st3h: + case INS_sve_st4h: + case INS_sve_st2w: + case INS_sve_st3w: + case INS_sve_st4w: + case INS_sve_st2d: + case INS_sve_st3d: + case INS_sve_st4d: + assert(isVectorRegister(reg1)); + assert(isPredicateRegister(reg2)); + assert(isGeneralRegister(reg3)); + assert(isGeneralRegister(reg4)); + assert(isScalableVectorSize(size)); - instrDesc* id = emitNewInstrSC(attr, cfi.immCFVal); +#ifdef DEBUG + switch (ins) + { + case INS_sve_st2b: + case INS_sve_st3b: + case INS_sve_st4b: + assert(opt == INS_OPTS_SCALABLE_B); + assert(insScalableOptsNone(sopt)); + break; - id->idIns(ins); - id->idInsFmt(fmt); - id->idInsOpt(INS_OPTS_NONE); + case INS_sve_st2h: + case INS_sve_st3h: + case INS_sve_st4h: + assert(opt == INS_OPTS_SCALABLE_H); + assert(sopt == INS_SCALABLE_OPTS_LSL_N); + break; - id->idReg1(reg1); - id->idReg2(reg2); + case INS_sve_st2w: + case INS_sve_st3w: + case INS_sve_st4w: + assert(opt == INS_OPTS_SCALABLE_S); + assert(sopt == INS_SCALABLE_OPTS_LSL_N); + break; - dispIns(id); + case INS_sve_st2d: + case INS_sve_st3d: + case INS_sve_st4d: + assert(opt == INS_OPTS_SCALABLE_D); + assert(sopt == INS_SCALABLE_OPTS_LSL_N); + break; + + default: + assert(!"Invalid instruction"); + break; + } +#endif // DEBUG + fmt = IF_SVE_JC_4A; + break; + + case INS_sve_st2q: + case INS_sve_st3q: + case INS_sve_st4q: + assert(isVectorRegister(reg1)); + assert(isPredicateRegister(reg2)); + assert(isGeneralRegister(reg3)); + assert(isGeneralRegister(reg4)); + assert(isScalableVectorSize(size)); + assert(opt == INS_OPTS_SCALABLE_Q); + fmt = IF_SVE_JF_4A; + break; + + default: + unreached(); + break; + } + assert(fmt != IF_NONE); + + // Use aliases. + switch (ins) + { + case INS_sve_cmple: + std::swap(reg3, reg4); + ins = INS_sve_cmpge; + break; + case INS_sve_cmplo: + std::swap(reg3, reg4); + ins = INS_sve_cmphi; + break; + case INS_sve_cmpls: + std::swap(reg3, reg4); + ins = INS_sve_cmphs; + break; + case INS_sve_cmplt: + std::swap(reg3, reg4); + ins = INS_sve_cmpgt; + break; + case INS_sve_facle: + std::swap(reg3, reg4); + ins = INS_sve_facge; + break; + case INS_sve_faclt: + std::swap(reg3, reg4); + ins = INS_sve_facgt; + break; + case INS_sve_fcmle: + std::swap(reg3, reg4); + ins = INS_sve_fcmge; + break; + case INS_sve_fcmlt: + std::swap(reg3, reg4); + ins = INS_sve_fcmgt; + break; + default: + break; + } + + instrDesc* id = emitNewInstr(attr); + + id->idIns(ins); + id->idInsFmt(fmt); + id->idInsOpt(opt); + + id->idReg1(reg1); + id->idReg2(reg2); + id->idReg3(reg3); + id->idReg4(reg4); + + dispIns(id); appendToCurIG(id); } /***************************************************************************** * - * Add an instruction referencing a register, an immediate, the flags and a condition code + * Add an instruction referencing a register and a condition code */ -void emitter::emitIns_R_I_FLAGS_COND( - instruction ins, emitAttr attr, regNumber reg, int imm, insCflags flags, insCond cond) +void emitter::emitIns_R_COND(instruction ins, emitAttr attr, regNumber reg, insCond cond) { insFormat fmt = IF_NONE; condFlagsImm cfi; @@ -12270,33 +12896,21 @@ void emitter::emitIns_R_I_FLAGS_COND( /* Figure out the encoding format of the instruction */ switch (ins) { - case INS_ccmp: - case INS_ccmn: + case INS_cset: + case INS_csetm: assert(isGeneralRegister(reg)); - if (imm < 0) - { - ins = insReverse(ins); - imm = -imm; - } - if (isValidUimm5(imm)) - { - cfi.imm5 = imm; - cfi.flags = flags; - cfi.cond = cond; - fmt = IF_DI_1F; - } - else - { - assert(!"Instruction cannot be encoded: ccmp/ccmn imm5"); - } + cfi.cond = cond; + fmt = IF_DR_1D; break; + default: unreached(); break; + } // end switch (ins) assert(fmt != IF_NONE); - assert(isValidImmCondFlagsImm5(cfi.immCFVal)); + assert(isValidImmCond(cfi.immCFVal)); instrDesc* id = emitNewInstrSC(attr, cfi.immCFVal); @@ -12312,41 +12926,43 @@ void emitter::emitIns_R_I_FLAGS_COND( /***************************************************************************** * - * Add an instruction referencing a register, a SVE Pattern. + * Add an instruction referencing two registers and a condition code */ -void emitter::emitIns_R_PATTERN( - instruction ins, emitAttr attr, regNumber reg1, insOpts opt, insSvePattern pattern /* = SVE_PATTERN_ALL*/) +void emitter::emitIns_R_R_COND(instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, insCond cond) { - emitAttr elemsize = EA_UNKNOWN; - insFormat fmt = IF_NONE; + insFormat fmt = IF_NONE; + condFlagsImm cfi; + cfi.immCFVal = 0; /* Figure out the encoding format of the instruction */ switch (ins) { - case INS_sve_ptrue: - case INS_sve_ptrues: - assert(isPredicateRegister(reg1)); - assert(isScalableVectorSize(attr)); - assert(insOptsScalableStandard(opt)); - fmt = IF_SVE_DE_1A; + case INS_cinc: + case INS_cinv: + case INS_cneg: + assert(isGeneralRegister(reg1)); + assert(isGeneralRegisterOrZR(reg2)); + cfi.cond = cond; + fmt = IF_DR_2D; break; - default: unreached(); break; } // end switch (ins) + assert(fmt != IF_NONE); + assert(isValidImmCond(cfi.immCFVal)); - instrDesc* id = emitNewInstr(attr); + instrDesc* id = emitNewInstrSC(attr, cfi.immCFVal); id->idIns(ins); id->idInsFmt(fmt); + id->idInsOpt(INS_OPTS_NONE); id->idReg1(reg1); - id->idInsOpt(opt); - id->idSvePattern(pattern); + id->idReg2(reg2); dispIns(id); appendToCurIG(id); @@ -12354,26 +12970,28 @@ void emitter::emitIns_R_PATTERN( /***************************************************************************** * - * Add an instruction referencing a register, a SVE Pattern and an immediate. + * Add an instruction referencing two registers and a condition code */ -void emitter::emitIns_R_PATTERN_I(instruction ins, emitAttr attr, regNumber reg1, insSvePattern pattern, int imm) +void emitter::emitIns_R_R_R_COND( + instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, regNumber reg3, insCond cond) { - emitAttr size = EA_SIZE(attr); - emitAttr elemsize = EA_UNKNOWN; - insFormat fmt = IF_NONE; + insFormat fmt = IF_NONE; + condFlagsImm cfi; + cfi.immCFVal = 0; /* Figure out the encoding format of the instruction */ switch (ins) { - case INS_sve_cntb: - case INS_sve_cntd: - case INS_sve_cnth: - case INS_sve_cntw: + case INS_csel: + case INS_csinc: + case INS_csinv: + case INS_csneg: assert(isGeneralRegister(reg1)); - assert(size == EA_8BYTE); - assert(isValidUimm4From1(imm)); - fmt = IF_SVE_BL_1A; + assert(isGeneralRegisterOrZR(reg2)); + assert(isGeneralRegisterOrZR(reg3)); + cfi.cond = cond; + fmt = IF_DR_3D; break; default: @@ -12381,15 +12999,20 @@ void emitter::emitIns_R_PATTERN_I(instruction ins, emitAttr attr, regNumber reg1 break; } // end switch (ins) + assert(fmt != IF_NONE); + assert(isValidImmCond(cfi.immCFVal)); - instrDesc* id = emitNewInstrCns(attr, imm); + instrDesc* id = emitNewInstr(attr); id->idIns(ins); id->idInsFmt(fmt); + id->idInsOpt(INS_OPTS_NONE); id->idReg1(reg1); - id->idSvePattern(pattern); + id->idReg2(reg2); + id->idReg3(reg3); + id->idSmallCns(cfi.immCFVal); dispIns(id); appendToCurIG(id); @@ -12397,23 +13020,26 @@ void emitter::emitIns_R_PATTERN_I(instruction ins, emitAttr attr, regNumber reg1 /***************************************************************************** * - * Add a memory barrier instruction with a 'barrier' immediate + * Add an instruction referencing two registers the flags and a condition code */ -void emitter::emitIns_BARR(instruction ins, insBarrier barrier) +void emitter::emitIns_R_R_FLAGS_COND( + instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, insCflags flags, insCond cond) { - insFormat fmt = IF_NONE; - ssize_t imm = 0; + insFormat fmt = IF_NONE; + condFlagsImm cfi; + cfi.immCFVal = 0; /* Figure out the encoding format of the instruction */ switch (ins) { - case INS_dsb: - case INS_dmb: - case INS_isb: - - fmt = IF_SI_0B; - imm = (ssize_t)barrier; + case INS_ccmp: + case INS_ccmn: + assert(isGeneralRegister(reg1)); + assert(isGeneralRegister(reg2)); + cfi.flags = flags; + cfi.cond = cond; + fmt = IF_DR_2I; break; default: unreached(); @@ -12421,68 +13047,249 @@ void emitter::emitIns_BARR(instruction ins, insBarrier barrier) } // end switch (ins) assert(fmt != IF_NONE); + assert(isValidImmCondFlags(cfi.immCFVal)); - instrDesc* id = emitNewInstrSC(EA_8BYTE, imm); + instrDesc* id = emitNewInstrSC(attr, cfi.immCFVal); id->idIns(ins); id->idInsFmt(fmt); id->idInsOpt(INS_OPTS_NONE); + id->idReg1(reg1); + id->idReg2(reg2); + dispIns(id); appendToCurIG(id); } /***************************************************************************** * - * Add an instruction with a static data member operand. If 'size' is 0, the - * instruction operates on the address of the static member instead of its - * value (e.g. "push offset clsvar", rather than "push dword ptr [clsvar]"). - */ - -void emitter::emitIns_C(instruction ins, emitAttr attr, CORINFO_FIELD_HANDLE fldHnd, int offs) -{ - NYI("emitIns_C"); -} - -/***************************************************************************** - * - * Add an instruction referencing stack-based local variable. + * Add an instruction referencing a register, an immediate, the flags and a condition code */ -void emitter::emitIns_S(instruction ins, emitAttr attr, int varx, int offs) -{ - NYI("emitIns_S"); -} - -/***************************************************************************** - * - * Add an instruction referencing a register and a stack-based local variable. - */ -void emitter::emitIns_R_S(instruction ins, emitAttr attr, regNumber reg1, int varx, int offs) +void emitter::emitIns_R_I_FLAGS_COND( + instruction ins, emitAttr attr, regNumber reg, int imm, insCflags flags, insCond cond) { - emitAttr size = EA_SIZE(attr); - insFormat fmt = IF_NONE; - int disp = 0; - unsigned scale = 0; - bool isLdrStr = false; - - assert(offs >= 0); + insFormat fmt = IF_NONE; + condFlagsImm cfi; + cfi.immCFVal = 0; - // TODO-ARM64-CQ: use unscaled loads? /* Figure out the encoding format of the instruction */ switch (ins) { - case INS_strb: - case INS_ldrb: - case INS_ldrsb: - scale = 0; - break; - - case INS_strh: - case INS_ldrh: - case INS_ldrsh: - scale = 1; - break; + case INS_ccmp: + case INS_ccmn: + assert(isGeneralRegister(reg)); + if (imm < 0) + { + ins = insReverse(ins); + imm = -imm; + } + if (isValidUimm5(imm)) + { + cfi.imm5 = imm; + cfi.flags = flags; + cfi.cond = cond; + fmt = IF_DI_1F; + } + else + { + assert(!"Instruction cannot be encoded: ccmp/ccmn imm5"); + } + break; + default: + unreached(); + break; + } // end switch (ins) + + assert(fmt != IF_NONE); + assert(isValidImmCondFlagsImm5(cfi.immCFVal)); + + instrDesc* id = emitNewInstrSC(attr, cfi.immCFVal); + + id->idIns(ins); + id->idInsFmt(fmt); + id->idInsOpt(INS_OPTS_NONE); + + id->idReg1(reg); + + dispIns(id); + appendToCurIG(id); +} + +/***************************************************************************** + * + * Add an instruction referencing a register, a SVE Pattern. + */ + +void emitter::emitIns_R_PATTERN( + instruction ins, emitAttr attr, regNumber reg1, insOpts opt, insSvePattern pattern /* = SVE_PATTERN_ALL*/) +{ + emitAttr elemsize = EA_UNKNOWN; + insFormat fmt = IF_NONE; + + /* Figure out the encoding format of the instruction */ + switch (ins) + { + case INS_sve_ptrue: + case INS_sve_ptrues: + assert(isPredicateRegister(reg1)); + assert(isScalableVectorSize(attr)); + assert(insOptsScalableStandard(opt)); + fmt = IF_SVE_DE_1A; + break; + + default: + unreached(); + break; + + } // end switch (ins) + assert(fmt != IF_NONE); + + instrDesc* id = emitNewInstr(attr); + + id->idIns(ins); + id->idInsFmt(fmt); + + id->idReg1(reg1); + id->idInsOpt(opt); + id->idSvePattern(pattern); + + dispIns(id); + appendToCurIG(id); +} + +/***************************************************************************** + * + * Add an instruction referencing a register, a SVE Pattern and an immediate. + */ + +void emitter::emitIns_R_PATTERN_I(instruction ins, emitAttr attr, regNumber reg1, insSvePattern pattern, int imm) +{ + emitAttr size = EA_SIZE(attr); + emitAttr elemsize = EA_UNKNOWN; + insFormat fmt = IF_NONE; + + /* Figure out the encoding format of the instruction */ + switch (ins) + { + case INS_sve_cntb: + case INS_sve_cntd: + case INS_sve_cnth: + case INS_sve_cntw: + assert(isGeneralRegister(reg1)); + assert(size == EA_8BYTE); + assert(isValidUimm4From1(imm)); + fmt = IF_SVE_BL_1A; + break; + + default: + unreached(); + break; + + } // end switch (ins) + assert(fmt != IF_NONE); + + instrDesc* id = emitNewInstrCns(attr, imm); + + id->idIns(ins); + id->idInsFmt(fmt); + + id->idReg1(reg1); + id->idSvePattern(pattern); + + dispIns(id); + appendToCurIG(id); +} + +/***************************************************************************** + * + * Add a memory barrier instruction with a 'barrier' immediate + */ + +void emitter::emitIns_BARR(instruction ins, insBarrier barrier) +{ + insFormat fmt = IF_NONE; + ssize_t imm = 0; + + /* Figure out the encoding format of the instruction */ + switch (ins) + { + case INS_dsb: + case INS_dmb: + case INS_isb: + + fmt = IF_SI_0B; + imm = (ssize_t)barrier; + break; + default: + unreached(); + break; + } // end switch (ins) + + assert(fmt != IF_NONE); + + instrDesc* id = emitNewInstrSC(EA_8BYTE, imm); + + id->idIns(ins); + id->idInsFmt(fmt); + id->idInsOpt(INS_OPTS_NONE); + + dispIns(id); + appendToCurIG(id); +} + +/***************************************************************************** + * + * Add an instruction with a static data member operand. If 'size' is 0, the + * instruction operates on the address of the static member instead of its + * value (e.g. "push offset clsvar", rather than "push dword ptr [clsvar]"). + */ + +void emitter::emitIns_C(instruction ins, emitAttr attr, CORINFO_FIELD_HANDLE fldHnd, int offs) +{ + NYI("emitIns_C"); +} + +/***************************************************************************** + * + * Add an instruction referencing stack-based local variable. + */ + +void emitter::emitIns_S(instruction ins, emitAttr attr, int varx, int offs) +{ + NYI("emitIns_S"); +} + +/***************************************************************************** + * + * Add an instruction referencing a register and a stack-based local variable. + */ +void emitter::emitIns_R_S(instruction ins, emitAttr attr, regNumber reg1, int varx, int offs) +{ + emitAttr size = EA_SIZE(attr); + insFormat fmt = IF_NONE; + int disp = 0; + unsigned scale = 0; + bool isLdrStr = false; + + assert(offs >= 0); + + // TODO-ARM64-CQ: use unscaled loads? + /* Figure out the encoding format of the instruction */ + switch (ins) + { + case INS_strb: + case INS_ldrb: + case INS_ldrsb: + scale = 0; + break; + + case INS_strh: + case INS_ldrh: + case INS_ldrsh: + scale = 1; + break; case INS_ldrsw: scale = 2; @@ -15203,9 +16010,12 @@ void emitter::emitIns_Call(EmitCallType callType, case IF_SVE_IG_4A_G: case IF_SVE_IJ_3A: case IF_SVE_IK_4A: + case IF_SVE_IK_4A_F: + case IF_SVE_IK_4A_H: case IF_SVE_IU_4A_A: case IF_SVE_IU_4B_B: case IF_SVE_HX_3A_B: + case IF_SVE_IG_4A: case IF_SVE_IG_4A_D: case IF_SVE_IG_4A_E: case IF_SVE_IF_4A: @@ -15410,32 +16220,468 @@ void emitter::emitIns_Call(EmitCallType callType, case IF_SVE_DI_2A: return (regpos == 1 ? PREDICATE_NONE : PREDICATE_SIZED); - case IF_SVE_DK_3A: - assert((regpos == 2) || (regpos == 3)); - return ((regpos == 2) ? PREDICATE_NONE : PREDICATE_SIZED); + case IF_SVE_DK_3A: + assert((regpos == 2) || (regpos == 3)); + return ((regpos == 2) ? PREDICATE_NONE : PREDICATE_SIZED); + + default: + break; + } + + assert(!"Unexpected instruction format"); + return PREDICATE_NONE; +} + +/***************************************************************************** + * + * Returns true if the SVE instruction has a LSL addr. + * This is for formats that have [, , LSL #N], [{, , LSL #N}] + */ +/*static*/ bool emitter::insSveIsLslN(instruction ins, insFormat fmt) +{ + switch (fmt) + { + case IF_SVE_JD_4A: + switch (ins) + { + case INS_sve_st1h: + return true; + + default: + break; + } + break; + + case IF_SVE_JD_4B: + switch (ins) + { + case INS_sve_st1w: + return true; + + default: + break; + } + break; + + case IF_SVE_HW_4B: + switch (ins) + { + case INS_sve_ld1h: + case INS_sve_ld1sh: + case INS_sve_ldff1h: + case INS_sve_ldff1sh: + case INS_sve_ld1w: + case INS_sve_ldff1w: + return true; + + default: + break; + } + break; + + case IF_SVE_IG_4A: + switch (ins) + { + case INS_sve_ldff1d: + case INS_sve_ldff1sw: + return true; + + default: + break; + } + break; + + case IF_SVE_IG_4A_F: + switch (ins) + { + case INS_sve_ldff1sh: + case INS_sve_ldff1w: + return true; + + default: + break; + } + break; + + case IF_SVE_IG_4A_G: + switch (ins) + { + case INS_sve_ldff1h: + return true; + + default: + break; + } + break; + + case IF_SVE_II_4A: + case IF_SVE_II_4A_B: + switch (ins) + { + case INS_sve_ld1d: + return true; + + default: + break; + } + break; + + case IF_SVE_II_4A_H: + switch (ins) + { + case INS_sve_ld1w: + return true; + + default: + break; + } + break; + + case IF_SVE_IK_4A: + switch (ins) + { + case INS_sve_ld1sw: + return true; + + default: + break; + } + break; + + case IF_SVE_IK_4A_G: + switch (ins) + { + case INS_sve_ld1sh: + return true; + + default: + break; + } + break; + + case IF_SVE_IK_4A_I: + switch (ins) + { + case INS_sve_ld1h: + return true; + + default: + break; + } + break; + + case IF_SVE_IN_4A: + switch (ins) + { + case INS_sve_ldnt1d: + case INS_sve_ldnt1h: + case INS_sve_ldnt1w: + return true; + + default: + break; + } + break; + + case IF_SVE_IP_4A: + switch (ins) + { + case INS_sve_ld1roh: + case INS_sve_ld1row: + case INS_sve_ld1rod: + case INS_sve_ld1rqh: + case INS_sve_ld1rqw: + case INS_sve_ld1rqd: + return true; + + default: + break; + } + break; + + case IF_SVE_IR_4A: + switch (ins) + { + case INS_sve_ld2q: + case INS_sve_ld3q: + case INS_sve_ld4q: + return true; + + default: + break; + } + break; + + case IF_SVE_IT_4A: + switch (ins) + { + case INS_sve_ld2h: + case INS_sve_ld2w: + case INS_sve_ld2d: + case INS_sve_ld3h: + case INS_sve_ld3w: + case INS_sve_ld3d: + case INS_sve_ld4h: + case INS_sve_ld4w: + case INS_sve_ld4d: + return true; + + default: + break; + } + break; + + case IF_SVE_IU_4B: + switch (ins) + { + case INS_sve_ld1sw: + case INS_sve_ldff1sw: + case INS_sve_ld1d: + case INS_sve_ldff1d: + return true; + + default: + break; + } + break; + + case IF_SVE_JB_4A: + switch (ins) + { + case INS_sve_stnt1h: + case INS_sve_stnt1w: + case INS_sve_stnt1d: + return true; + + default: + break; + } + break; + + case IF_SVE_JC_4A: + switch (ins) + { + case INS_sve_st2h: + case INS_sve_st2w: + case INS_sve_st2d: + case INS_sve_st3h: + case INS_sve_st3w: + case INS_sve_st3d: + case INS_sve_st4h: + case INS_sve_st4w: + case INS_sve_st4d: + return true; + + default: + break; + } + break; + + case IF_SVE_JD_4C: + switch (ins) + { + case INS_sve_st1w: + case INS_sve_st1d: + return true; + + default: + break; + } + break; + + case IF_SVE_JD_4C_A: + switch (ins) + { + case INS_sve_st1d: + return true; + + default: + break; + } + break; + + case IF_SVE_JF_4A: + switch (ins) + { + case INS_sve_st2q: + case INS_sve_st3q: + case INS_sve_st4q: + return true; + + default: + break; + } + break; + + case IF_SVE_JJ_4B: + switch (ins) + { + case INS_sve_st1h: + case INS_sve_st1w: + case INS_sve_st1d: + return true; + + default: + break; + } + break; + + default: + break; + } + + return false; +} + +/***************************************************************************** + * + * Returns true if the SVE instruction has a addr. + * This is for formats that have [, .T, ], [, .T, #N] + */ +/*static*/ bool emitter::insSveIsModN(instruction ins, insFormat fmt) +{ + switch (fmt) + { + case IF_SVE_JJ_4A: + case IF_SVE_JJ_4A_B: + switch (ins) + { + case INS_sve_st1d: + case INS_sve_st1h: + case INS_sve_st1w: + return true; + + default: + break; + } + break; + + case IF_SVE_JJ_4A_C: + case IF_SVE_JJ_4A_D: + switch (ins) + { + case INS_sve_st1h: + case INS_sve_st1w: + return true; + + default: + break; + } + break; + + case IF_SVE_JK_4A: + case IF_SVE_JK_4A_B: + switch (ins) + { + case INS_sve_st1b: + return true; + + default: + break; + } + break; + + case IF_SVE_HW_4A: + case IF_SVE_HW_4A_A: + switch (ins) + { + case INS_sve_ld1b: + case INS_sve_ld1h: + case INS_sve_ld1sb: + case INS_sve_ld1sh: + case INS_sve_ld1w: + case INS_sve_ldff1b: + case INS_sve_ldff1h: + case INS_sve_ldff1sb: + case INS_sve_ldff1sh: + case INS_sve_ldff1w: + return true; + + default: + break; + } + break; + + case IF_SVE_HW_4A_B: + case IF_SVE_HW_4A_C: + switch (ins) + { + case INS_sve_ld1h: + case INS_sve_ld1sh: + case INS_sve_ld1w: + case INS_sve_ldff1h: + case INS_sve_ldff1sh: + case INS_sve_ldff1w: + return true; + + default: + break; + } + break; + + case IF_SVE_IU_4A: + switch (ins) + { + case INS_sve_ld1d: + case INS_sve_ld1sw: + case INS_sve_ldff1d: + case INS_sve_ldff1sw: + return true; + + default: + break; + } + break; + + case IF_SVE_IU_4A_A: + switch (ins) + { + case INS_sve_ld1sw: + case INS_sve_ldff1d: + case INS_sve_ldff1sw: + return true; + + default: + break; + } + break; + + case IF_SVE_IU_4A_C: + switch (ins) + { + case INS_sve_ld1d: + return true; + + default: + break; + } + break; default: break; } - assert(!"Unexpected instruction format"); - return PREDICATE_NONE; + return false; } /***************************************************************************** * - * Returns true if the SVE instruction has a LSL addr. - * This is for formats that have [, , LSL #N] + * Returns 0, 1, 2, 3 or 4 depending on the instruction and format. + * This is for formats that have [, .T, ], [, .T, #N], [, , LSL #N], + * [{, , LSL #N}] */ -/*static*/ bool emitter::insSveIsLslN(instruction ins, insFormat fmt) + +/*static*/ int emitter::insSveGetLslOrModN(instruction ins, insFormat fmt) { switch (fmt) { case IF_SVE_JD_4A: + assert(insSveIsLslN(ins, fmt)); + assert(!insSveIsModN(ins, fmt)); switch (ins) { case INS_sve_st1h: - return true; + return 1; default: break; @@ -15443,10 +16689,12 @@ void emitter::emitIns_Call(EmitCallType callType, break; case IF_SVE_JD_4B: + assert(insSveIsLslN(ins, fmt)); + assert(!insSveIsModN(ins, fmt)); switch (ins) { case INS_sve_st1w: - return true; + return 2; default: break; @@ -15454,310 +16702,434 @@ void emitter::emitIns_Call(EmitCallType callType, break; case IF_SVE_HW_4B: + assert(insSveIsLslN(ins, fmt)); + assert(!insSveIsModN(ins, fmt)); switch (ins) { case INS_sve_ld1h: case INS_sve_ld1sh: case INS_sve_ldff1h: case INS_sve_ldff1sh: + return 1; + case INS_sve_ld1w: case INS_sve_ldff1w: - return true; + return 2; default: break; } break; - default: - break; - } - - return false; -} - -/***************************************************************************** - * - * Returns true if the SVE instruction has a addr. - * This is for formats that have [, .T, ], [, .T, #N] - */ -/*static*/ bool emitter::insSveIsModN(instruction ins, insFormat fmt) -{ - switch (fmt) - { case IF_SVE_JJ_4A: case IF_SVE_JJ_4A_B: + case IF_SVE_JJ_4A_C: + case IF_SVE_JJ_4A_D: + case IF_SVE_JK_4A: + case IF_SVE_JK_4A_B: + case IF_SVE_HW_4A: + case IF_SVE_HW_4A_A: + case IF_SVE_HW_4A_B: + case IF_SVE_HW_4A_C: + case IF_SVE_IU_4A: + case IF_SVE_IU_4A_A: + case IF_SVE_IU_4A_C: + assert(!insSveIsLslN(ins, fmt)); + assert(insSveIsModN(ins, fmt)); switch (ins) { - case INS_sve_st1d: + case INS_sve_ld1h: + case INS_sve_ld1sh: + case INS_sve_ldff1h: + case INS_sve_ldff1sh: + switch (fmt) + { + case IF_SVE_HW_4A: + case IF_SVE_HW_4A_A: + return 1; + + default: + break; + } + return 0; + + case INS_sve_ld1w: + case INS_sve_ldff1w: + case INS_sve_ld1sw: + case INS_sve_ldff1sw: + switch (fmt) + { + case IF_SVE_HW_4A: + case IF_SVE_HW_4A_A: + case IF_SVE_IU_4A: + return 2; + + default: + break; + } + return 0; + + case INS_sve_ld1d: + case INS_sve_ldff1d: + switch (fmt) + { + case IF_SVE_IU_4A: + return 3; + + default: + break; + } + return 0; + case INS_sve_st1h: + switch (fmt) + { + case IF_SVE_JJ_4A_C: + case IF_SVE_JJ_4A_D: + return 0; + + default: + break; + } + return 1; + case INS_sve_st1w: - return true; + switch (fmt) + { + case IF_SVE_JJ_4A_C: + case IF_SVE_JJ_4A_D: + return 0; + + default: + break; + } + return 2; + + case INS_sve_st1d: + if (fmt == IF_SVE_JJ_4A_B) + { + return 0; + } + return 3; + + default: + break; + } + return 0; + + case IF_SVE_IG_4A: + assert(insSveIsLslN(ins, fmt)); + assert(!insSveIsModN(ins, fmt)); + switch (ins) + { + case INS_sve_ldff1sw: + return 2; + + case INS_sve_ldff1d: + return 3; default: break; } break; - case IF_SVE_JJ_4A_C: - case IF_SVE_JJ_4A_D: + case IF_SVE_IG_4A_F: + assert(insSveIsLslN(ins, fmt)); + assert(!insSveIsModN(ins, fmt)); switch (ins) { - case INS_sve_st1h: - case INS_sve_st1w: - return true; + case INS_sve_ldff1sh: + return 1; + + case INS_sve_ldff1w: + return 2; + + default: + break; + } + break; + + case IF_SVE_IG_4A_G: + assert(insSveIsLslN(ins, fmt)); + assert(!insSveIsModN(ins, fmt)); + switch (ins) + { + case INS_sve_ldff1h: + return 1; + + default: + break; + } + break; + + case IF_SVE_II_4A: + case IF_SVE_II_4A_B: + assert(insSveIsLslN(ins, fmt)); + assert(!insSveIsModN(ins, fmt)); + switch (ins) + { + case INS_sve_ld1d: + return 3; + + default: + break; + } + break; + + case IF_SVE_II_4A_H: + assert(insSveIsLslN(ins, fmt)); + assert(!insSveIsModN(ins, fmt)); + switch (ins) + { + case INS_sve_ld1w: + return 2; + + default: + break; + } + break; + + case IF_SVE_IK_4A: + assert(insSveIsLslN(ins, fmt)); + assert(!insSveIsModN(ins, fmt)); + switch (ins) + { + case INS_sve_ld1sw: + return 2; + + default: + break; + } + break; + + case IF_SVE_IK_4A_G: + assert(insSveIsLslN(ins, fmt)); + assert(!insSveIsModN(ins, fmt)); + switch (ins) + { + case INS_sve_ld1sh: + return 1; + + default: + break; + } + break; + + case IF_SVE_IK_4A_I: + assert(insSveIsLslN(ins, fmt)); + assert(!insSveIsModN(ins, fmt)); + switch (ins) + { + case INS_sve_ld1h: + return 1; + + default: + break; + } + break; + + case IF_SVE_IN_4A: + assert(insSveIsLslN(ins, fmt)); + assert(!insSveIsModN(ins, fmt)); + switch (ins) + { + case INS_sve_ldnt1h: + return 1; + case INS_sve_ldnt1w: + return 2; + case INS_sve_ldnt1d: + return 3; default: break; } break; - case IF_SVE_JK_4A: - case IF_SVE_JK_4A_B: + case IF_SVE_IP_4A: + assert(insSveIsLslN(ins, fmt)); + assert(!insSveIsModN(ins, fmt)); switch (ins) { - case INS_sve_st1b: - return true; + case INS_sve_ld1roh: + case INS_sve_ld1rqh: + return 1; + + case INS_sve_ld1row: + case INS_sve_ld1rqw: + return 2; + case INS_sve_ld1rod: + case INS_sve_ld1rqd: + return 3; default: break; } break; - case IF_SVE_HW_4A: - case IF_SVE_HW_4A_A: + case IF_SVE_IR_4A: + assert(insSveIsLslN(ins, fmt)); + assert(!insSveIsModN(ins, fmt)); switch (ins) { - case INS_sve_ld1b: - case INS_sve_ld1h: - case INS_sve_ld1sb: - case INS_sve_ld1sh: - case INS_sve_ld1w: - case INS_sve_ldff1b: - case INS_sve_ldff1h: - case INS_sve_ldff1sb: - case INS_sve_ldff1sh: - case INS_sve_ldff1w: - return true; + case INS_sve_ld2q: + case INS_sve_ld3q: + case INS_sve_ld4q: + return 4; default: break; } break; - case IF_SVE_HW_4A_B: - case IF_SVE_HW_4A_C: + case IF_SVE_IT_4A: + assert(insSveIsLslN(ins, fmt)); + assert(!insSveIsModN(ins, fmt)); switch (ins) { - case INS_sve_ld1h: - case INS_sve_ld1sh: - case INS_sve_ld1w: - case INS_sve_ldff1h: - case INS_sve_ldff1sh: - case INS_sve_ldff1w: - return true; + case INS_sve_ld2h: + case INS_sve_ld3h: + case INS_sve_ld4h: + return 1; + + case INS_sve_ld2w: + case INS_sve_ld3w: + case INS_sve_ld4w: + return 2; + + case INS_sve_ld2d: + case INS_sve_ld3d: + case INS_sve_ld4d: + return 3; default: break; } break; - case IF_SVE_IU_4A: + case IF_SVE_IU_4B: + assert(insSveIsLslN(ins, fmt)); + assert(!insSveIsModN(ins, fmt)); switch (ins) { - case INS_sve_ld1d: case INS_sve_ld1sw: - case INS_sve_ldff1d: case INS_sve_ldff1sw: - return true; + return 2; + + case INS_sve_ld1d: + case INS_sve_ldff1d: + return 3; default: break; } break; - case IF_SVE_IU_4A_A: + case IF_SVE_JB_4A: + assert(insSveIsLslN(ins, fmt)); + assert(!insSveIsModN(ins, fmt)); switch (ins) { - case INS_sve_ld1sw: - case INS_sve_ldff1d: - case INS_sve_ldff1sw: - return true; + case INS_sve_stnt1h: + return 1; + + case INS_sve_stnt1w: + return 2; + + case INS_sve_stnt1d: + return 3; default: break; } break; - case IF_SVE_IU_4A_C: + case IF_SVE_JC_4A: + assert(insSveIsLslN(ins, fmt)); + assert(!insSveIsModN(ins, fmt)); switch (ins) { - case INS_sve_ld1d: - return true; + case INS_sve_st2h: + case INS_sve_st3h: + case INS_sve_st4h: + return 1; + + case INS_sve_st2w: + case INS_sve_st3w: + case INS_sve_st4w: + return 2; + + case INS_sve_st2d: + case INS_sve_st3d: + case INS_sve_st4d: + return 3; default: break; } break; - default: - break; - } - - return false; -} - -/***************************************************************************** - * - * Returns 0, 1, 2 or 3 depending on the instruction and format. - * This is for formats that have [, .T, ], [, .T, #N], [, , LSL #N] - */ - -/*static*/ int emitter::insSveGetLslOrModN(instruction ins, insFormat fmt) -{ - switch (fmt) - { - case IF_SVE_JD_4A: + case IF_SVE_JD_4C: assert(insSveIsLslN(ins, fmt)); assert(!insSveIsModN(ins, fmt)); switch (ins) { - case INS_sve_st1h: - return 1; + case INS_sve_st1w: + return 2; + + case INS_sve_st1d: + return 3; default: break; } break; - case IF_SVE_JD_4B: + case IF_SVE_JD_4C_A: assert(insSveIsLslN(ins, fmt)); assert(!insSveIsModN(ins, fmt)); switch (ins) { - case INS_sve_st1w: - return 2; + case INS_sve_st1d: + return 3; default: break; } break; - case IF_SVE_HW_4B: + case IF_SVE_JF_4A: assert(insSveIsLslN(ins, fmt)); assert(!insSveIsModN(ins, fmt)); switch (ins) { - case INS_sve_ld1h: - case INS_sve_ld1sh: - case INS_sve_ldff1h: - case INS_sve_ldff1sh: - return 1; - - case INS_sve_ld1w: - case INS_sve_ldff1w: - return 2; + case INS_sve_st2q: + case INS_sve_st3q: + case INS_sve_st4q: + return 4; default: break; } break; - case IF_SVE_JJ_4A: - case IF_SVE_JJ_4A_B: - case IF_SVE_JJ_4A_C: - case IF_SVE_JJ_4A_D: - case IF_SVE_JK_4A: - case IF_SVE_JK_4A_B: - case IF_SVE_HW_4A: - case IF_SVE_HW_4A_A: - case IF_SVE_HW_4A_B: - case IF_SVE_HW_4A_C: - case IF_SVE_IU_4A: - case IF_SVE_IU_4A_A: - case IF_SVE_IU_4A_C: - assert(!insSveIsLslN(ins, fmt)); - assert(insSveIsModN(ins, fmt)); + case IF_SVE_JJ_4B: + assert(insSveIsLslN(ins, fmt)); + assert(!insSveIsModN(ins, fmt)); switch (ins) { - case INS_sve_ld1h: - case INS_sve_ld1sh: - case INS_sve_ldff1h: - case INS_sve_ldff1sh: - switch (fmt) - { - case IF_SVE_HW_4A: - case IF_SVE_HW_4A_A: - return 1; - - default: - break; - } - return 0; - - case INS_sve_ld1w: - case INS_sve_ldff1w: - case INS_sve_ld1sw: - case INS_sve_ldff1sw: - switch (fmt) - { - case IF_SVE_HW_4A: - case IF_SVE_HW_4A_A: - case IF_SVE_IU_4A: - return 2; - - default: - break; - } - return 0; - - case INS_sve_ld1d: - case INS_sve_ldff1d: - switch (fmt) - { - case IF_SVE_IU_4A: - return 3; - - default: - break; - } - return 0; - case INS_sve_st1h: - switch (fmt) - { - case IF_SVE_JJ_4A_C: - case IF_SVE_JJ_4A_D: - return 0; - - default: - break; - } return 1; case INS_sve_st1w: - switch (fmt) - { - case IF_SVE_JJ_4A_C: - case IF_SVE_JJ_4A_D: - return 0; - - default: - break; - } return 2; case INS_sve_st1d: - if (fmt == IF_SVE_JJ_4A_B) - { - return 0; - } return 3; default: break; } - return 0; + break; default: break; @@ -15786,6 +17158,11 @@ void emitter::emitIns_Call(EmitCallType callType, case INS_sve_ldnf1h: case INS_sve_ldnf1sb: case INS_sve_ldnf1b: + case INS_sve_ldff1b: + case INS_sve_ldff1sb: + case INS_sve_ldff1h: + case INS_sve_ldff1sh: + case INS_sve_ldff1w: return true; default: @@ -15795,13 +17172,14 @@ void emitter::emitIns_Call(EmitCallType callType, /***************************************************************************** * - * Returns the encoding to select the 1/2/4/8/16 byte elemsize for an Arm64 Sve vector instruction + * Returns the encoding to select the 1/2/4/8 byte elemsize for an Arm64 Sve vector instruction * for the 'dtype' field. */ /*static*/ emitter::code_t emitter::insEncodeSveElemsize_dtype(instruction ins, emitAttr size, code_t code) { assert(canEncodeSveElemsize_dtype(ins)); + assert(ins != INS_sve_ld1w); switch (size) { case EA_1BYTE: @@ -15809,6 +17187,7 @@ void emitter::emitIns_Call(EmitCallType callType, { case INS_sve_ld1b: case INS_sve_ldnf1b: + case INS_sve_ldff1b: return code; // By default, the instruction already encodes 8-bit. default: @@ -15823,10 +17202,13 @@ void emitter::emitIns_Call(EmitCallType callType, case INS_sve_ld1h: case INS_sve_ldnf1b: case INS_sve_ldnf1h: + case INS_sve_ldff1b: + case INS_sve_ldff1h: return code | (1 << 21); // Set bit '21' to 1. case INS_sve_ld1sb: case INS_sve_ldnf1sb: + case INS_sve_ldff1sb: return code | (1 << 22); // Set bit '22' to 1. default: @@ -15837,24 +17219,24 @@ void emitter::emitIns_Call(EmitCallType callType, case EA_4BYTE: switch (ins) { - case INS_sve_ld1w: - // Note: Bit '15' is not actually part of 'dtype', but it is necessary to set to '1' to get the - // proper encoding for S. - return (code | (1 << 15)) | (1 << 22); // Set bit '22' and '15' to 1. - case INS_sve_ldnf1w: + case INS_sve_ldff1w: return code; // By default, the instruction already encodes 32-bit. case INS_sve_ld1b: case INS_sve_ld1h: case INS_sve_ldnf1b: case INS_sve_ldnf1h: + case INS_sve_ldff1b: + case INS_sve_ldff1h: return code | (1 << 22); // Set bit '22' to 1. case INS_sve_ld1sb: case INS_sve_ld1sh: case INS_sve_ldnf1sb: case INS_sve_ldnf1sh: + case INS_sve_ldff1sb: + case INS_sve_ldff1sh: return code | (1 << 21); // Set bit '21' to 1. default: @@ -15865,24 +17247,24 @@ void emitter::emitIns_Call(EmitCallType callType, case EA_8BYTE: switch (ins) { - case INS_sve_ld1w: - // Note: Bit '15' is not actually part of 'dtype', but it is necessary to set to '1' to get the - // proper encoding for D. - return ((code | (1 << 15)) | (1 << 22)) | (1 << 21); // Set bit '22', '21' and '15' to 1. - case INS_sve_ldnf1w: + case INS_sve_ldff1w: return code | (1 << 21); // Set bit '21' to 1. Set bit '15' to 1. case INS_sve_ld1b: case INS_sve_ld1h: case INS_sve_ldnf1b: case INS_sve_ldnf1h: + case INS_sve_ldff1b: + case INS_sve_ldff1h: return (code | (1 << 22)) | (1 << 21); // Set bit '22' and '21' to 1. case INS_sve_ld1sb: case INS_sve_ld1sh: case INS_sve_ldnf1sb: case INS_sve_ldnf1sh: + case INS_sve_ldff1sb: + case INS_sve_ldff1sh: return code; // By default, the instruction already encodes 64-bit. default: @@ -15890,21 +17272,86 @@ void emitter::emitIns_Call(EmitCallType callType, } return code; + default: + assert(!"Invalid size for encoding dtype."); + } + + return code; +} + +/***************************************************************************** + * + * Returns the encoding to select the 4/8/16 byte elemsize for the Arm64 Sve vector instruction 'ld1w' + * for the 'dtype' field. + */ + +/*static*/ emitter::code_t emitter::insEncodeSveElemsize_dtype_ld1w(instruction ins, + insFormat fmt, + emitAttr size, + code_t code) +{ + assert(canEncodeSveElemsize_dtype(ins)); + assert(ins == INS_sve_ld1w); + switch (size) + { + case EA_4BYTE: + switch (fmt) + { + case IF_SVE_IH_3A_F: + // Note: Bit '15' is not actually part of 'dtype', but it is necessary to set to '1' to get the + // proper encoding for S. + return (code | (1 << 15)) | (1 << 22); // Set bit '22' and '15' to 1. + + case IF_SVE_II_4A_H: + // Note: Bit '14' is not actually part of 'dtype', but it is necessary to set to '1' to get the + // proper encoding for S. + return (code | (1 << 14)) | (1 << 22); // Set bit '22' and '14' to 1. + + default: + break; + } + break; + + case EA_8BYTE: + switch (fmt) + { + case IF_SVE_IH_3A_F: + // Note: Bit '15' is not actually part of 'dtype', but it is necessary to set to '1' to get the + // proper encoding for D. + return ((code | (1 << 15)) | (1 << 22)) | (1 << 21); // Set bit '22', '21' and '15' to 1. + + case IF_SVE_II_4A_H: + // Note: Bit '14' is not actually part of 'dtype', but it is necessary to set to '1' to get the + // proper encoding for D. + return ((code | (1 << 14)) | (1 << 22)) | (1 << 21); // Set bit '22', '21' and '14' to 1. + + default: + break; + } + break; + case EA_16BYTE: - switch (ins) + switch (fmt) { - case INS_sve_ld1w: + case IF_SVE_IH_3A_F: return code | (1 << 20); // Set bit '20' to 1. + case IF_SVE_II_4A_H: + // Note: Bit '15' is not actually part of 'dtype', but it is necessary to set to '1' to get the + // proper encoding for Q. + return code | (1 << 15); // Set bit '15' to 1. + default: - assert(!"Invalid instruction for encoding dtype."); + break; } - return code; + break; default: assert(!"Invalid size for encoding dtype."); + break; } + assert(!"Invalid instruction format"); return code; } @@ -18013,13 +19460,216 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp) dst += emitOutput_Instr(dst, code); break; - case IF_SR_1A: // SR_1A ................ ...........ttttt Rt (dc zva, mrs) - assert(insOptsNone(id->idInsOpt())); - code = emitInsCode(ins, fmt); - code |= insEncodeReg_Rt(id->idReg1()); // ttttt - dst += emitOutput_Instr(dst, code); - break; + case IF_SR_1A: // SR_1A ................ ...........ttttt Rt (dc zva, mrs) + assert(insOptsNone(id->idInsOpt())); + code = emitInsCode(ins, fmt); + code |= insEncodeReg_Rt(id->idReg1()); // ttttt + dst += emitOutput_Instr(dst, code); + break; + + default: + dst = emitOutput_InstrSve(dst, id); + break; + } + + // Determine if any registers now hold GC refs, or whether a register that was overwritten held a GC ref. + // We assume here that "id->idGCref()" is not GC_NONE only if the instruction described by "id" writes a + // GC ref to register "id->idReg1()". (It may, apparently, also not be GC_NONE in other cases, such as + // for stores, but we ignore those cases here.) + if (emitInsMayWriteToGCReg(id)) // True if "id->idIns()" writes to a register than can hold GC ref. + { + // We assume that "idReg1" is the primary destination register for all instructions + assert(!emitInsDestIsOp2(ins)); + if (id->idGCref() != GCT_NONE) + { + emitGCregLiveUpd(id->idGCref(), id->idReg1(), dst); + } + else + { + emitGCregDeadUpd(id->idReg1(), dst); + } + + if (emitInsMayWriteMultipleRegs(id)) + { + // INS_ldp etc... + // "idReg2" is the secondary destination register + if (id->idGCrefReg2() != GCT_NONE) + { + emitGCregLiveUpd(id->idGCrefReg2(), id->idReg2(), dst); + } + else + { + emitGCregDeadUpd(id->idReg2(), dst); + } + } + } + +SKIP_GC_UPDATE: + // Now we determine if the instruction has written to a (local variable) stack location, and either written a GC + // ref or overwritten one. + if (emitInsWritesToLclVarStackLoc(id) || emitInsWritesToLclVarStackLocPair(id)) + { + int varNum = id->idAddr()->iiaLclVar.lvaVarNum(); + unsigned ofs = AlignDown(id->idAddr()->iiaLclVar.lvaOffset(), TARGET_POINTER_SIZE); + bool FPbased; + int adr = emitComp->lvaFrameAddress(varNum, &FPbased); + if (id->idGCref() != GCT_NONE) + { + emitGCvarLiveUpd(adr + ofs, varNum, id->idGCref(), dst DEBUG_ARG(varNum)); + } + else + { + // If the type of the local is a gc ref type, update the liveness. + var_types vt; + if (varNum >= 0) + { + // "Regular" (non-spill-temp) local. + vt = var_types(emitComp->lvaTable[varNum].lvType); + } + else + { + TempDsc* tmpDsc = codeGen->regSet.tmpFindNum(varNum); + vt = tmpDsc->tdTempType(); + } + if (vt == TYP_REF || vt == TYP_BYREF) + { + emitGCvarDeadUpd(adr + ofs, dst DEBUG_ARG(varNum)); + } + } + if (emitInsWritesToLclVarStackLocPair(id)) + { + int varNum2 = varNum; + int adr2 = adr; + unsigned ofs2 = ofs; + unsigned ofs2Dist; + + if (id->idIsLclVarPair()) + { + bool FPbased2; + + emitLclVarAddr* lclVarAddr2 = emitGetLclVarPairLclVar2(id); + varNum2 = lclVarAddr2->lvaVarNum(); + ofs2 = lclVarAddr2->lvaOffset(); + + // If there are 2 GC vars in this instrDesc, get the 2nd variable + // that should be tracked. + adr2 = emitComp->lvaFrameAddress(varNum2, &FPbased2); + ofs2Dist = EA_SIZE_IN_BYTES(size); +#ifdef DEBUG + assert(FPbased == FPbased2); + if (FPbased) + { + assert(id->idReg3() == REG_FP); + } + else + { + assert(id->idReg3() == REG_SP); + } + assert(varNum2 != -1); +#endif // DEBUG + } + else + { + ofs2Dist = TARGET_POINTER_SIZE; + ofs2 += ofs2Dist; + } + + ofs2 = AlignDown(ofs2, ofs2Dist); + + if (id->idGCrefReg2() != GCT_NONE) + { +#ifdef DEBUG + if (id->idGCref() != GCT_NONE) + { + // If 1st register was a gc-var, then make sure the offset + // are correctly set for the 2nd register that is holding + // another gc-var. + assert((adr + ofs + ofs2Dist) == (adr2 + ofs2)); + } +#endif + emitGCvarLiveUpd(adr2 + ofs2, varNum2, id->idGCrefReg2(), dst DEBUG_ARG(varNum2)); + } + else + { + // If the type of the local is a gc ref type, update the liveness. + var_types vt; + if (varNum2 >= 0) + { + // "Regular" (non-spill-temp) local. + vt = var_types(emitComp->lvaTable[varNum2].lvType); + } + else + { + TempDsc* tmpDsc = codeGen->regSet.tmpFindNum(varNum2); + vt = tmpDsc->tdTempType(); + } + if (vt == TYP_REF || vt == TYP_BYREF) + { + emitGCvarDeadUpd(adr2 + ofs2, dst DEBUG_ARG(varNum2)); + } + } + } + } + +#ifdef DEBUG + /* Make sure we set the instruction descriptor size correctly */ + + size_t expected = emitSizeOfInsDsc(id); + assert(sz == expected); + + if (emitComp->opts.disAsm || emitComp->verbose) + { + emitDispIns(id, false, dspOffs, true, emitCurCodeOffs(odst), *dp, (dst - *dp), ig); + } + + if (emitComp->compDebugBreak) + { + // For example, set JitBreakEmitOutputInstr=a6 will break when this method is called for + // emitting instruction a6, (i.e. IN00a6 in jitdump). + if ((unsigned)JitConfig.JitBreakEmitOutputInstr() == id->idDebugOnlyInfo()->idNum) + { + assert(!"JitBreakEmitOutputInstr reached"); + } + } + + // Output any delta in GC info. + if (EMIT_GC_VERBOSE || emitComp->opts.disasmWithGC) + { + emitDispGCInfoDelta(); + } +#else + if (emitComp->opts.disAsm) + { + size_t expected = emitSizeOfInsDsc(id); + assert(sz == expected); + emitDispIns(id, false, 0, true, emitCurCodeOffs(odst), *dp, (dst - *dp), ig); + } +#endif + + /* All instructions are expected to generate code */ + + assert(*dp != dst || id->idIsEmptyAlign()); + + *dp = dst; + + return sz; +} + +/***************************************************************************** + * + * Append the machine code corresponding to the given SVE instruction descriptor. + */ +BYTE* emitter::emitOutput_InstrSve(BYTE* dst, instrDesc* id) +{ + code_t code = 0; + instruction ins = id->idIns(); + insFormat fmt = id->idInsFmt(); + emitAttr size = id->idOpSize(); + ssize_t imm; + + switch (fmt) + { case IF_SVE_CK_2A: // ................ .......NNNN.DDDD -- SVE unpack predicate elements code = emitInsCodeSve(ins, fmt); code |= insEncodeReg_P_3_to_0(id->idReg1()); // DDDD @@ -18636,7 +20286,14 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp) if (canEncodeSveElemsize_dtype(ins)) { - code = insEncodeSveElemsize_dtype(ins, optGetSveElemsize(id->idInsOpt()), code); + if (ins == INS_sve_ld1w) + { + code = insEncodeSveElemsize_dtype_ld1w(ins, fmt, optGetSveElemsize(id->idInsOpt()), code); + } + else + { + code = insEncodeSveElemsize_dtype(ins, optGetSveElemsize(id->idInsOpt()), code); + } } dst += emitOutput_Instr(dst, code); @@ -18762,192 +20419,114 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp) dst += emitOutput_Instr(dst, code); break; - default: - assert(!"Unexpected format"); + case IF_SVE_IF_4A: // ...........mmmmm ...gggnnnnnttttt -- SVE2 32-bit gather non-temporal load (vector plus + // scalar) + case IF_SVE_IF_4A_A: // ...........mmmmm ...gggnnnnnttttt -- SVE2 32-bit gather non-temporal load (vector plus + // scalar) + case IF_SVE_IW_4A: // ...........mmmmm ...gggnnnnnttttt -- SVE2 128-bit gather load (vector plus scalar) + case IF_SVE_IX_4A: // ...........mmmmm ...gggnnnnnttttt -- SVE2 64-bit gather non-temporal load (vector plus + // scalar) + case IF_SVE_IY_4A: // ...........mmmmm ...gggnnnnnttttt -- SVE2 128-bit scatter store (vector plus scalar) + case IF_SVE_IZ_4A: // ...........mmmmm ...gggnnnnnttttt -- SVE2 32-bit scatter non-temporal store (vector plus + // scalar) + case IF_SVE_IZ_4A_A: // ...........mmmmm ...gggnnnnnttttt -- SVE2 32-bit scatter non-temporal store (vector plus + // scalar) + case IF_SVE_JA_4A: // ...........mmmmm ...gggnnnnnttttt -- SVE2 64-bit scatter non-temporal store (vector plus + // scalar) + code = emitInsCodeSve(ins, fmt); + code |= insEncodeReg_V_4_to_0(id->idReg1()); // ttttt + code |= insEncodeReg_P_12_to_10(id->idReg2()); // ggg + code |= insEncodeReg_V_9_to_5(id->idReg3()); // nnnnn + code |= insEncodeReg_R_20_to_16(id->idReg4()); // mmmmm + dst += emitOutput_Instr(dst, code); break; - } - - // Determine if any registers now hold GC refs, or whether a register that was overwritten held a GC ref. - // We assume here that "id->idGCref()" is not GC_NONE only if the instruction described by "id" writes a - // GC ref to register "id->idReg1()". (It may, apparently, also not be GC_NONE in other cases, such as - // for stores, but we ignore those cases here.) - if (emitInsMayWriteToGCReg(id)) // True if "id->idIns()" writes to a register than can hold GC ref. - { - // We assume that "idReg1" is the primary destination register for all instructions - assert(!emitInsDestIsOp2(ins)); - if (id->idGCref() != GCT_NONE) - { - emitGCregLiveUpd(id->idGCref(), id->idReg1(), dst); - } - else - { - emitGCregDeadUpd(id->idReg1(), dst); - } - - if (emitInsMayWriteMultipleRegs(id)) - { - // INS_ldp etc... - // "idReg2" is the secondary destination register - if (id->idGCrefReg2() != GCT_NONE) - { - emitGCregLiveUpd(id->idGCrefReg2(), id->idReg2(), dst); - } - else - { - emitGCregDeadUpd(id->idReg2(), dst); - } - } - } - -SKIP_GC_UPDATE: - // Now we determine if the instruction has written to a (local variable) stack location, and either written a GC - // ref or overwritten one. - if (emitInsWritesToLclVarStackLoc(id) || emitInsWritesToLclVarStackLocPair(id)) - { - int varNum = id->idAddr()->iiaLclVar.lvaVarNum(); - unsigned ofs = AlignDown(id->idAddr()->iiaLclVar.lvaOffset(), TARGET_POINTER_SIZE); - bool FPbased; - int adr = emitComp->lvaFrameAddress(varNum, &FPbased); - if (id->idGCref() != GCT_NONE) - { - emitGCvarLiveUpd(adr + ofs, varNum, id->idGCref(), dst DEBUG_ARG(varNum)); - } - else - { - // If the type of the local is a gc ref type, update the liveness. - var_types vt; - if (varNum >= 0) - { - // "Regular" (non-spill-temp) local. - vt = var_types(emitComp->lvaTable[varNum].lvType); - } - else - { - TempDsc* tmpDsc = codeGen->regSet.tmpFindNum(varNum); - vt = tmpDsc->tdTempType(); - } - if (vt == TYP_REF || vt == TYP_BYREF) - { - emitGCvarDeadUpd(adr + ofs, dst DEBUG_ARG(varNum)); - } - } - if (emitInsWritesToLclVarStackLocPair(id)) - { - int varNum2 = varNum; - int adr2 = adr; - unsigned ofs2 = ofs; - unsigned ofs2Dist; - - if (id->idIsLclVarPair()) - { - bool FPbased2; - - emitLclVarAddr* lclVarAddr2 = emitGetLclVarPairLclVar2(id); - varNum2 = lclVarAddr2->lvaVarNum(); - ofs2 = lclVarAddr2->lvaOffset(); - - // If there are 2 GC vars in this instrDesc, get the 2nd variable - // that should be tracked. - adr2 = emitComp->lvaFrameAddress(varNum2, &FPbased2); - ofs2Dist = EA_SIZE_IN_BYTES(size); -#ifdef DEBUG - assert(FPbased == FPbased2); - if (FPbased) - { - assert(id->idReg3() == REG_FP); - } - else - { - assert(id->idReg3() == REG_SP); - } - assert(varNum2 != -1); -#endif // DEBUG - } - else - { - ofs2Dist = TARGET_POINTER_SIZE; - ofs2 += ofs2Dist; - } - ofs2 = AlignDown(ofs2, ofs2Dist); + case IF_SVE_IG_4A_D: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous first-fault load (scalar plus + // scalar) + case IF_SVE_IG_4A_E: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous first-fault load (scalar plus + // scalar) + case IF_SVE_IG_4A_F: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous first-fault load (scalar plus + // scalar) + case IF_SVE_IG_4A_G: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous first-fault load (scalar plus + // scalar) + case IF_SVE_II_4A_H: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous load (quadwords, scalar plus scalar) + case IF_SVE_IK_4A_F: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous load (scalar plus scalar) + case IF_SVE_IK_4A_G: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous load (scalar plus scalar) + case IF_SVE_IK_4A_H: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous load (scalar plus scalar) + case IF_SVE_IK_4A_I: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous load (scalar plus scalar) + code = emitInsCodeSve(ins, fmt); + code |= insEncodeReg_V_4_to_0(id->idReg1()); // ttttt + code |= insEncodeReg_P_12_to_10(id->idReg2()); // ggg + code |= insEncodeReg_R_9_to_5(id->idReg3()); // nnnnn + code |= insEncodeReg_R_20_to_16(id->idReg4()); // mmmmm - if (id->idGCrefReg2() != GCT_NONE) - { -#ifdef DEBUG - if (id->idGCref() != GCT_NONE) - { - // If 1st register was a gc-var, then make sure the offset - // are correctly set for the 2nd register that is holding - // another gc-var. - assert((adr + ofs + ofs2Dist) == (adr2 + ofs2)); - } -#endif - emitGCvarLiveUpd(adr2 + ofs2, varNum2, id->idGCrefReg2(), dst DEBUG_ARG(varNum2)); - } - else + if (canEncodeSveElemsize_dtype(ins)) { - // If the type of the local is a gc ref type, update the liveness. - var_types vt; - if (varNum2 >= 0) + if (ins == INS_sve_ld1w) { - // "Regular" (non-spill-temp) local. - vt = var_types(emitComp->lvaTable[varNum2].lvType); + code = insEncodeSveElemsize_dtype_ld1w(ins, fmt, optGetSveElemsize(id->idInsOpt()), code); } else { - TempDsc* tmpDsc = codeGen->regSet.tmpFindNum(varNum2); - vt = tmpDsc->tdTempType(); - } - if (vt == TYP_REF || vt == TYP_BYREF) - { - emitGCvarDeadUpd(adr2 + ofs2, dst DEBUG_ARG(varNum2)); + code = insEncodeSveElemsize_dtype(ins, optGetSveElemsize(id->idInsOpt()), code); } } - } - } - -#ifdef DEBUG - /* Make sure we set the instruction descriptor size correctly */ - size_t expected = emitSizeOfInsDsc(id); - assert(sz == expected); + dst += emitOutput_Instr(dst, code); + break; - if (emitComp->opts.disAsm || emitComp->verbose) - { - emitDispIns(id, false, dspOffs, true, emitCurCodeOffs(odst), *dp, (dst - *dp), ig); - } + case IF_SVE_IG_4A: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous first-fault load (scalar plus scalar) + case IF_SVE_II_4A: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous load (quadwords, scalar plus scalar) + case IF_SVE_II_4A_B: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous load (quadwords, scalar plus scalar) + case IF_SVE_IK_4A: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous load (scalar plus scalar) + case IF_SVE_IN_4A: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous non-temporal load (scalar plus scalar) + case IF_SVE_IP_4A: // ...........mmmmm ...gggnnnnnttttt -- SVE load and broadcast quadword (scalar plus scalar) + case IF_SVE_IR_4A: // ...........mmmmm ...gggnnnnnttttt -- SVE load multiple structures (quadwords, scalar plus + // scalar) + case IF_SVE_IT_4A: // ...........mmmmm ...gggnnnnnttttt -- SVE load multiple structures (scalar plus scalar) + case IF_SVE_JB_4A: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous non-temporal store (scalar plus + // scalar) + case IF_SVE_JC_4A: // ...........mmmmm ...gggnnnnnttttt -- SVE store multiple structures (scalar plus scalar) + case IF_SVE_JD_4C: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous store (scalar plus scalar) + case IF_SVE_JD_4C_A: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous store (scalar plus scalar) + case IF_SVE_JF_4A: // ...........mmmmm ...gggnnnnnttttt -- SVE store multiple structures (quadwords, scalar plus + // scalar) + code = emitInsCodeSve(ins, fmt); + code |= insEncodeReg_V_4_to_0(id->idReg1()); // ttttt + code |= insEncodeReg_P_12_to_10(id->idReg2()); // ggg + code |= insEncodeReg_R_9_to_5(id->idReg3()); // nnnnn + code |= insEncodeReg_R_20_to_16(id->idReg4()); // mmmmm + dst += emitOutput_Instr(dst, code); + break; - if (emitComp->compDebugBreak) - { - // For example, set JitBreakEmitOutputInstr=a6 will break when this method is called for - // emitting instruction a6, (i.e. IN00a6 in jitdump). - if ((unsigned)JitConfig.JitBreakEmitOutputInstr() == id->idDebugOnlyInfo()->idNum) - { - assert(!"JitBreakEmitOutputInstr reached"); - } - } + case IF_SVE_IU_4B: // ...........mmmmm ...gggnnnnnttttt -- SVE 64-bit gather load (scalar plus 32-bit unpacked + // scaled offsets) + case IF_SVE_IU_4B_B: // ...........mmmmm ...gggnnnnnttttt -- SVE 64-bit gather load (scalar plus 32-bit unpacked + // scaled offsets) + case IF_SVE_IU_4B_D: // ...........mmmmm ...gggnnnnnttttt -- SVE 64-bit gather load (scalar plus 32-bit unpacked + // scaled offsets) + case IF_SVE_JJ_4B: // ...........mmmmm ...gggnnnnnttttt -- SVE 64-bit scatter store (scalar plus 64-bit scaled + // offsets) + case IF_SVE_JJ_4B_C: // ...........mmmmm ...gggnnnnnttttt -- SVE 64-bit scatter store (scalar plus 64-bit scaled + // offsets) + case IF_SVE_JJ_4B_E: // ...........mmmmm ...gggnnnnnttttt -- SVE 64-bit scatter store (scalar plus 64-bit scaled + // offsets) + case IF_SVE_JK_4B: // ...........mmmmm ...gggnnnnnttttt -- SVE 64-bit scatter store (scalar plus 64-bit unscaled + // offsets) + code = emitInsCodeSve(ins, fmt); + code |= insEncodeReg_V_4_to_0(id->idReg1()); // ttttt + code |= insEncodeReg_P_12_to_10(id->idReg2()); // ggg + code |= insEncodeReg_R_9_to_5(id->idReg3()); // nnnnn + code |= insEncodeReg_V_20_to_16(id->idReg4()); // mmmmm + dst += emitOutput_Instr(dst, code); + break; - // Output any delta in GC info. - if (EMIT_GC_VERBOSE || emitComp->opts.disasmWithGC) - { - emitDispGCInfoDelta(); - } -#else - if (emitComp->opts.disAsm) - { - size_t expected = emitSizeOfInsDsc(id); - assert(sz == expected); - emitDispIns(id, false, 0, true, emitCurCodeOffs(odst), *dp, (dst - *dp), ig); + default: + assert(!"Unexpected format"); + break; } -#endif - - /* All instructions are expected to generate code */ - assert(*dp != dst || id->idIsEmptyAlign()); - - *dp = dst; - - return sz; + return dst; } /*****************************************************************************/ @@ -19251,17 +20830,38 @@ void emitter::emitDispSveExtendOptsModN(insOpts opt, int n) /***************************************************************************** * * Prints the encoding for the or LSL encoding along with the N value - * This is for formats that have [, .T, ], [, .T, #N], [, , LSL #N] + * This is for formats that have [, .T, ], [, .T, #N], [, , LSL #N], + * [{, , LSL #N}] */ void emitter::emitDispSveModAddr(instruction ins, regNumber reg1, regNumber reg2, insOpts opt, insFormat fmt) { printf("["); - emitDispReg(reg1, EA_8BYTE, true); + + if (isVectorRegister(reg1)) + { + // If the overall instruction is working on 128-bit + // registers, the size of this register for + // the mod addr is always 64-bit. + // Example: LD1Q {.Q }, /Z, [.D{, }] + if (opt == INS_OPTS_SCALABLE_Q) + { + emitDispSveReg(reg1, INS_OPTS_SCALABLE_D, reg2 != REG_ZR); + } + else + { + emitDispSveReg(reg1, opt, reg2 != REG_ZR); + } + } + else + { + emitDispReg(reg1, EA_8BYTE, reg2 != REG_ZR); + } + if (isVectorRegister(reg2)) { emitDispSveReg(reg2, opt, false); } - else + else if (reg2 != REG_ZR) { emitDispReg(reg2, EA_8BYTE, false); } @@ -19271,11 +20871,16 @@ void emitter::emitDispSveModAddr(instruction ins, regNumber reg1, regNumber reg2 emitDispComma(); emitDispSveExtendOptsModN(opt, insSveGetLslOrModN(ins, fmt)); } - else if (insSveIsLslN(ins, fmt)) + // Omit 'lsl #N' only if the second register is ZR. + else if ((reg2 != REG_ZR) && insSveIsLslN(ins, fmt)) { emitDispComma(); switch (insSveGetLslOrModN(ins, fmt)) { + case 4: + printf("lsl #4"); + break; + case 3: printf("lsl #3"); break; @@ -21869,9 +23474,161 @@ void emitter::emitDispInsHelp( // {.D }, /Z, [, .D, LSL #2] case IF_SVE_HW_4B: // ...........mmmmm ...gggnnnnnttttt -- SVE 32-bit gather load (scalar plus 32-bit unscaled // offsets) - // {.D }, /Z, [, .D] - case IF_SVE_HW_4B_D: // ...........mmmmm ...gggnnnnnttttt -- SVE 32-bit gather load (scalar plus 32-bit unscaled + // {.D }, /Z, [, .D] + case IF_SVE_HW_4B_D: // ...........mmmmm ...gggnnnnnttttt -- SVE 32-bit gather load (scalar plus 32-bit unscaled + // offsets) + // {.S }, /Z, [.S{, }] + case IF_SVE_IF_4A: // ...........mmmmm ...gggnnnnnttttt -- SVE2 32-bit gather non-temporal load (vector plus + // scalar) + // {.D }, /Z, [.D{, }] + case IF_SVE_IF_4A_A: // ...........mmmmm ...gggnnnnnttttt -- SVE2 32-bit gather non-temporal load (vector plus + // scalar) + // {.D }, /Z, [{, , LSL #3}] + // {.D }, /Z, [{, , LSL #2}] + case IF_SVE_IG_4A: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous first-fault load (scalar plus scalar) + // {.H }, /Z, [{, }] + // {.S }, /Z, [{, }] + // {.D }, /Z, [{, }] + case IF_SVE_IG_4A_D: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous first-fault load (scalar plus + // scalar) + // {.B }, /Z, [{, }] + // {.H }, /Z, [{, }] + // {.S }, /Z, [{, }] + // {.D }, /Z, [{, }] + case IF_SVE_IG_4A_E: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous first-fault load (scalar plus + // scalar) + // {.S }, /Z, [{, , LSL #1}] + // {.D }, /Z, [{, , LSL #1}] + // {.S }, /Z, [{, , LSL #2}] + // {.D }, /Z, [{, , LSL #2}] + case IF_SVE_IG_4A_F: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous first-fault load (scalar plus + // scalar) + // {.H }, /Z, [{, , LSL #1}] + // {.S }, /Z, [{, , LSL #1}] + // {.D }, /Z, [{, , LSL #1}] + case IF_SVE_IG_4A_G: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous first-fault load (scalar plus + // scalar) + // {.D }, /Z, [, , LSL #3] + case IF_SVE_II_4A: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous load (quadwords, scalar plus scalar) + // {.Q }, /Z, [, , LSL #3] + case IF_SVE_II_4A_B: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous load (quadwords, scalar plus scalar) + // {.D }, /Z, [, , LSL #2] + case IF_SVE_II_4A_H: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous load (quadwords, scalar plus scalar) + // {.D }, /Z, [, , LSL #2 + case IF_SVE_IK_4A: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous load (scalar plus scalar) + // {.H }, /Z, [, ] + // {.S }, /Z, [, ] + // {.D }, /Z, [, ] + case IF_SVE_IK_4A_F: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous load (scalar plus scalar) + // {.S }, /Z, [, , LSL #1] + // {.D }, /Z, [, , LSL #1] + case IF_SVE_IK_4A_G: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous load (scalar plus scalar) + // {.B }, /Z, [, ] + // {.H }, /Z, [, ] + // {.S }, /Z, [, ] + // {.D }, /Z, [, ] + case IF_SVE_IK_4A_H: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous load (scalar plus scalar) + // {.H }, /Z, [, , LSL #1] + // {.S }, /Z, [, , LSL #1] + // {.D }, /Z, [, , LSL #1] + case IF_SVE_IK_4A_I: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous load (scalar plus scalar) + // {.B }, /Z, [, ] + // {.H }, /Z, [, ] + // {.S }, /Z, [, ] + // {.D }, /Z, [, ] + case IF_SVE_IN_4A: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous non-temporal load (scalar plus scalar) + // {.B }, /Z, [, ] + // {.H }, /Z, [, ] + // {.S }, /Z, [, ] + // {.D }, /Z, [, ] + case IF_SVE_IP_4A: // ...........mmmmm ...gggnnnnnttttt -- SVE load and broadcast quadword (scalar plus scalar) + // {.Q, .Q }, /Z, [, , LSL #4] + // {.Q, .Q, .Q }, /Z, [, , LSL #4] + // {.Q, .Q, .Q, .Q }, /Z, [, , LSL #4] + case IF_SVE_IR_4A: // ...........mmmmm ...gggnnnnnttttt -- SVE load multiple structures (quadwords, scalar plus + // scalar) + // {.B, .B }, /Z, [, ] + // {.H, .H }, /Z, [, , LSL #1] + // {.S, .S }, /Z, [, , LSL #2] + // {.D, .D }, /Z, [, , LSL #3] + // {.B, .B, .B }, /Z, [, ] + // {.H, .H, .H }, /Z, [, , LSL #1] + // {.S, .S, .S }, /Z, [, , LSL #2] + // {.D, .D, .D }, /Z, [, , LSL #3] + // {.B, .B, .B, .B }, /Z, [, ] + // {.H, .H, .H, .H }, /Z, [, , LSL #1] + // {.S, .S, .S, .S }, /Z, [, , LSL #2] + // {.D, .D, .D, .D }, /Z, [, , LSL #3] + case IF_SVE_IT_4A: // ...........mmmmm ...gggnnnnnttttt -- SVE load multiple structures (scalar plus scalar) + // {.D }, /Z, [, .D, LSL #2] + // {.D }, /Z, [, .D, LSL #3] + case IF_SVE_IU_4B: // ...........mmmmm ...gggnnnnnttttt -- SVE 64-bit gather load (scalar plus 32-bit unpacked + // scaled offsets) + // {.D }, /Z, [, .D] + case IF_SVE_IU_4B_B: // ...........mmmmm ...gggnnnnnttttt -- SVE 64-bit gather load (scalar plus 32-bit unpacked + // scaled offsets) + // {.D }, /Z, [, .D] + case IF_SVE_IU_4B_D: // ...........mmmmm ...gggnnnnnttttt -- SVE 64-bit gather load (scalar plus 32-bit unpacked + // scaled offsets) + // {.Q }, /Z, [.D{, }] + case IF_SVE_IW_4A: // ...........mmmmm ...gggnnnnnttttt -- SVE2 128-bit gather load (vector plus scalar) + // {.D }, /Z, [.D{, }] + case IF_SVE_IX_4A: // ...........mmmmm ...gggnnnnnttttt -- SVE2 64-bit gather non-temporal load (vector plus + // scalar) + // {.Q }, , [.D{, }] + case IF_SVE_IY_4A: // ...........mmmmm ...gggnnnnnttttt -- SVE2 128-bit scatter store (vector plus scalar) + // {.S }, , [.S{, }] + case IF_SVE_IZ_4A: // ...........mmmmm ...gggnnnnnttttt -- SVE2 32-bit scatter non-temporal store (vector plus + // scalar) + // {.D }, , [.D{, }] + case IF_SVE_IZ_4A_A: // ...........mmmmm ...gggnnnnnttttt -- SVE2 32-bit scatter non-temporal store (vector plus + // scalar) + // {.D }, , [.D{, }] + case IF_SVE_JA_4A: // ...........mmmmm ...gggnnnnnttttt -- SVE2 64-bit scatter non-temporal store (vector plus + // scalar) + // {.B }, , [, ] + // {.H }, , [, , LSL #1] + // {.S }, , [, , LSL #2] + // {.D }, , [, , LSL #3] + case IF_SVE_JB_4A: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous non-temporal store (scalar plus + // scalar) + // {.B, .B }, , [, ] + // {.H, .H }, , [, , LSL #1] + // {.S, .S }, , [, , LSL #2] + // {.D, .D }, , [, , LSL #3] + // {.B, .B, .B }, , [, ] + // {.H, .H, .H }, , [, , LSL #1] + // {.S, .S, .S }, , [, , LSL #2] + // {.D, .D, .D }, , [, , LSL #3] + // {.B, .B, .B, .B }, , [, ] + // {.H, .H, .H, .H }, , [, , LSL #1] + // {.S, .S, .S, .S }, , [, , LSL #2] + // {.D, .D, .D, .D }, , [, , LSL #3] + case IF_SVE_JC_4A: // ...........mmmmm ...gggnnnnnttttt -- SVE store multiple structures (scalar plus scalar) + // {.Q }, , [, , LSL #2] + // {.D }, , [, , LSL #3] + case IF_SVE_JD_4C: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous store (scalar plus scalar) + // {.Q }, , [, , LSL #3] + case IF_SVE_JD_4C_A: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous store (scalar plus scalar) + // {.Q, .Q }, , [, , LSL #4] + // {.Q, .Q, .Q }, , [, , LSL #4] + // {.Q, .Q, .Q, .Q }, , [, , LSL #4] + case IF_SVE_JF_4A: // ...........mmmmm ...gggnnnnnttttt -- SVE store multiple structures (quadwords, scalar plus + // scalar) + // {.D }, , [, .D, LSL #1] + // {.D }, , [, .D, LSL #2] + // {.D }, , [, .D, LSL #3] + case IF_SVE_JJ_4B: // ...........mmmmm ...gggnnnnnttttt -- SVE 64-bit scatter store (scalar plus 64-bit scaled + // offsets) + // {.D }, , [, .D] + case IF_SVE_JJ_4B_C: // ...........mmmmm ...gggnnnnnttttt -- SVE 64-bit scatter store (scalar plus 64-bit scaled // offsets) + // {.D }, , [, .D] + case IF_SVE_JJ_4B_E: // ...........mmmmm ...gggnnnnnttttt -- SVE 64-bit scatter store (scalar plus 64-bit scaled + // offsets) + // {.D }, , [, .D] + case IF_SVE_JK_4B: // ...........mmmmm ...gggnnnnnttttt -- SVE 64-bit scatter store (scalar plus 64-bit unscaled + // offsets) emitDispSveConsecutiveRegList(id->idReg1(), insGetSveReg1ListSize(ins), id->idInsOpt(), true); // ttttt emitDispPredicateReg(id->idReg2(), insGetPredicateType(fmt), id->idInsOpt(), true); // ggg emitDispSveModAddr(ins, id->idReg3(), id->idReg4(), id->idInsOpt(), fmt); // nnnnn @@ -25011,6 +26768,314 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins result.insLatency = PERFSCORE_LATENCY_9C; break; + case IF_SVE_IF_4A: // ...........mmmmm ...gggnnnnnttttt -- SVE2 32-bit gather non-temporal load (vector plus + // scalar) + case IF_SVE_IF_4A_A: // ...........mmmmm ...gggnnnnnttttt -- SVE2 32-bit gather non-temporal load (vector plus + // scalar) + result.insThroughput = PERFSCORE_THROUGHPUT_2X; + result.insLatency = PERFSCORE_LATENCY_10C; + break; + + case IF_SVE_IG_4A: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous first-fault load (scalar plus scalar) + case IF_SVE_IG_4A_D: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous first-fault load (scalar plus + // scalar) + case IF_SVE_IG_4A_E: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous first-fault load (scalar plus + // scalar) + case IF_SVE_IG_4A_F: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous first-fault load (scalar plus + // scalar) + case IF_SVE_IG_4A_G: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous first-fault load (scalar plus + // scalar) + case IF_SVE_II_4A: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous load (quadwords, scalar plus scalar) + case IF_SVE_II_4A_B: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous load (quadwords, scalar plus scalar) + case IF_SVE_II_4A_H: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous load (quadwords, scalar plus scalar) + case IF_SVE_IK_4A: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous load (scalar plus scalar) + case IF_SVE_IK_4A_F: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous load (scalar plus scalar) + case IF_SVE_IK_4A_G: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous load (scalar plus scalar) + case IF_SVE_IK_4A_H: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous load (scalar plus scalar) + case IF_SVE_IK_4A_I: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous load (scalar plus scalar) + result.insThroughput = PERFSCORE_THROUGHPUT_1C; + result.insLatency = PERFSCORE_LATENCY_9C; + break; + + case IF_SVE_IN_4A: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous non-temporal load (scalar plus scalar) + result.insThroughput = PERFSCORE_THROUGHPUT_2X; + result.insLatency = PERFSCORE_LATENCY_10C; + break; + + case IF_SVE_IP_4A: // ...........mmmmm ...gggnnnnnttttt -- SVE load and broadcast quadword (scalar plus scalar) + switch (ins) + { + case INS_sve_ld1rqb: + result.insThroughput = PERFSCORE_THROUGHPUT_3C; + result.insLatency = PERFSCORE_LATENCY_6C; + break; + case INS_sve_ld1rob: + result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix + result.insLatency = PERFSCORE_LATENCY_1C; // need to fix + break; + case INS_sve_ld1rqh: + result.insThroughput = PERFSCORE_THROUGHPUT_3C; + result.insLatency = PERFSCORE_LATENCY_6C; + break; + case INS_sve_ld1roh: + result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix + result.insLatency = PERFSCORE_LATENCY_1C; // need to fix + break; + case INS_sve_ld1rqw: + result.insThroughput = PERFSCORE_THROUGHPUT_3C; + result.insLatency = PERFSCORE_LATENCY_6C; + break; + case INS_sve_ld1row: + result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix + result.insLatency = PERFSCORE_LATENCY_1C; // need to fix + break; + case INS_sve_ld1rqd: + result.insThroughput = PERFSCORE_THROUGHPUT_3C; + result.insLatency = PERFSCORE_LATENCY_6C; + break; + case INS_sve_ld1rod: + result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix + result.insLatency = PERFSCORE_LATENCY_1C; // need to fix + break; + default: + // all other instructions + perfScoreUnhandledInstruction(id, &result); + break; + } + break; + + case IF_SVE_IR_4A: // ...........mmmmm ...gggnnnnnttttt -- SVE load multiple structures (quadwords, scalar plus + // scalar) + switch (ins) + { + case INS_sve_ld2q: + result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix + result.insLatency = PERFSCORE_LATENCY_1C; // need to fix + break; + case INS_sve_ld3q: + result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix + result.insLatency = PERFSCORE_LATENCY_1C; // need to fix + break; + case INS_sve_ld4q: + result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix + result.insLatency = PERFSCORE_LATENCY_1C; // need to fix + break; + default: + // all other instructions + perfScoreUnhandledInstruction(id, &result); + break; + } + break; + + case IF_SVE_IT_4A: // ...........mmmmm ...gggnnnnnttttt -- SVE load multiple structures (scalar plus scalar) + switch (ins) + { + case INS_sve_ld2b: + result.insThroughput = PERFSCORE_THROUGHPUT_1C; + result.insLatency = PERFSCORE_LATENCY_9C; + break; + case INS_sve_ld3b: + result.insThroughput = PERFSCORE_THROUGHPUT_2X; + result.insLatency = PERFSCORE_LATENCY_10C; + break; + case INS_sve_ld4b: + result.insThroughput = PERFSCORE_THROUGHPUT_2X; + result.insLatency = PERFSCORE_LATENCY_10C; + break; + case INS_sve_ld2h: + result.insThroughput = PERFSCORE_THROUGHPUT_1C; + result.insLatency = PERFSCORE_LATENCY_9C; + break; + case INS_sve_ld3h: + result.insThroughput = PERFSCORE_THROUGHPUT_2X; + result.insLatency = PERFSCORE_LATENCY_10C; + break; + case INS_sve_ld4h: + result.insThroughput = PERFSCORE_THROUGHPUT_2X; + result.insLatency = PERFSCORE_LATENCY_10C; + break; + case INS_sve_ld2w: + result.insThroughput = PERFSCORE_THROUGHPUT_1C; + result.insLatency = PERFSCORE_LATENCY_9C; + break; + case INS_sve_ld3w: + result.insThroughput = PERFSCORE_THROUGHPUT_2X; + result.insLatency = PERFSCORE_LATENCY_10C; + break; + case INS_sve_ld4w: + result.insThroughput = PERFSCORE_THROUGHPUT_2X; + result.insLatency = PERFSCORE_LATENCY_10C; + break; + case INS_sve_ld2d: + result.insThroughput = PERFSCORE_THROUGHPUT_1C; + result.insLatency = PERFSCORE_LATENCY_9C; + break; + case INS_sve_ld3d: + result.insThroughput = PERFSCORE_THROUGHPUT_2X; + result.insLatency = PERFSCORE_LATENCY_10C; + break; + case INS_sve_ld4d: + result.insThroughput = PERFSCORE_THROUGHPUT_2X; + result.insLatency = PERFSCORE_LATENCY_10C; + break; + default: + // all other instructions + perfScoreUnhandledInstruction(id, &result); + break; + } + break; + + case IF_SVE_IU_4B: // ...........mmmmm ...gggnnnnnttttt -- SVE 64-bit gather load (scalar plus 32-bit unpacked + // scaled offsets) + case IF_SVE_IU_4B_B: // ...........mmmmm ...gggnnnnnttttt -- SVE 64-bit gather load (scalar plus 32-bit unpacked + // scaled offsets) + case IF_SVE_IU_4B_D: // ...........mmmmm ...gggnnnnnttttt -- SVE 64-bit gather load (scalar plus 32-bit unpacked + // scaled offsets) + result.insThroughput = PERFSCORE_THROUGHPUT_1C; + result.insLatency = PERFSCORE_LATENCY_9C; + break; + + case IF_SVE_IW_4A: // ...........mmmmm ...gggnnnnnttttt -- SVE2 128-bit gather load (vector plus scalar) + switch (ins) + { + case INS_sve_ld1q: + result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix + result.insLatency = PERFSCORE_LATENCY_1C; // need to fix + break; + default: + // all other instructions + perfScoreUnhandledInstruction(id, &result); + break; + } + break; + + case IF_SVE_IX_4A: // ...........mmmmm ...gggnnnnnttttt -- SVE2 64-bit gather non-temporal load (vector plus + // scalar) + result.insThroughput = PERFSCORE_THROUGHPUT_2X; + result.insLatency = PERFSCORE_LATENCY_10C; + break; + + case IF_SVE_IY_4A: // ...........mmmmm ...gggnnnnnttttt -- SVE2 128-bit scatter store (vector plus scalar) + switch (ins) + { + case INS_sve_st1q: + result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix + result.insLatency = PERFSCORE_LATENCY_1C; // need to fix + break; + default: + // all other instructions + perfScoreUnhandledInstruction(id, &result); + break; + } + break; + + case IF_SVE_IZ_4A: // ...........mmmmm ...gggnnnnnttttt -- SVE2 32-bit scatter non-temporal store (vector plus + // scalar) + case IF_SVE_IZ_4A_A: // ...........mmmmm ...gggnnnnnttttt -- SVE2 32-bit scatter non-temporal store (vector plus + // scalar) + case IF_SVE_JA_4A: // ...........mmmmm ...gggnnnnnttttt -- SVE2 64-bit scatter non-temporal store (vector plus + // scalar) + case IF_SVE_JB_4A: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous non-temporal store (scalar plus + // scalar) + case IF_SVE_JD_4C: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous store (scalar plus scalar) + case IF_SVE_JD_4C_A: // ...........mmmmm ...gggnnnnnttttt -- SVE contiguous store (scalar plus scalar) + result.insThroughput = PERFSCORE_THROUGHPUT_1C; + result.insLatency = PERFSCORE_LATENCY_2C; + break; + + case IF_SVE_JC_4A: // ...........mmmmm ...gggnnnnnttttt -- SVE store multiple structures (scalar plus scalar) + switch (ins) + { + case INS_sve_st2b: + result.insThroughput = PERFSCORE_THROUGHPUT_1C; + result.insLatency = PERFSCORE_LATENCY_4C; + break; + case INS_sve_st3b: + result.insThroughput = PERFSCORE_THROUGHPUT_2X; + result.insLatency = PERFSCORE_LATENCY_7C; + break; + case INS_sve_st4b: + result.insThroughput = PERFSCORE_THROUGHPUT_9X; + result.insLatency = PERFSCORE_LATENCY_11C; + break; + case INS_sve_st2h: + result.insThroughput = PERFSCORE_THROUGHPUT_1C; + result.insLatency = PERFSCORE_LATENCY_4C; + break; + case INS_sve_st3h: + result.insThroughput = PERFSCORE_THROUGHPUT_2X; + result.insLatency = PERFSCORE_LATENCY_7C; + break; + case INS_sve_st4h: + result.insThroughput = PERFSCORE_THROUGHPUT_9X; + result.insLatency = PERFSCORE_LATENCY_11C; + break; + case INS_sve_st2w: + result.insThroughput = PERFSCORE_THROUGHPUT_1C; + result.insLatency = PERFSCORE_LATENCY_4C; + break; + case INS_sve_st3w: + result.insThroughput = PERFSCORE_THROUGHPUT_2X; + result.insLatency = PERFSCORE_LATENCY_7C; + break; + case INS_sve_st4w: + result.insThroughput = PERFSCORE_THROUGHPUT_9X; + result.insLatency = PERFSCORE_LATENCY_11C; + break; + case INS_sve_st2d: + result.insThroughput = PERFSCORE_THROUGHPUT_1C; + result.insLatency = PERFSCORE_LATENCY_4C; + break; + case INS_sve_st3d: + result.insThroughput = PERFSCORE_THROUGHPUT_2X; + result.insLatency = PERFSCORE_LATENCY_7C; + break; + case INS_sve_st4d: + result.insThroughput = PERFSCORE_THROUGHPUT_9X; + result.insLatency = PERFSCORE_LATENCY_11C; + break; + default: + // all other instructions + perfScoreUnhandledInstruction(id, &result); + break; + } + break; + + case IF_SVE_JF_4A: // ...........mmmmm ...gggnnnnnttttt -- SVE store multiple structures (quadwords, scalar plus + // scalar) + switch (ins) + { + case INS_sve_st2q: + result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix + result.insLatency = PERFSCORE_LATENCY_1C; // need to fix + break; + case INS_sve_st3q: + result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix + result.insLatency = PERFSCORE_LATENCY_1C; // need to fix + break; + case INS_sve_st4q: + result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix + result.insLatency = PERFSCORE_LATENCY_1C; // need to fix + break; + default: + // all other instructions + perfScoreUnhandledInstruction(id, &result); + break; + } + break; + + case IF_SVE_JJ_4B: // ...........mmmmm ...gggnnnnnttttt -- SVE 64-bit scatter store (scalar plus 64-bit scaled + // offsets) + case IF_SVE_JJ_4B_C: // ...........mmmmm ...gggnnnnnttttt -- SVE 64-bit scatter store (scalar plus 64-bit scaled + // offsets) + case IF_SVE_JJ_4B_E: // ...........mmmmm ...gggnnnnnttttt -- SVE 64-bit scatter store (scalar plus 64-bit scaled + // offsets) + case IF_SVE_JK_4B: // ...........mmmmm ...gggnnnnnttttt -- SVE 64-bit scatter store (scalar plus 64-bit unscaled + // offsets) + result.insThroughput = PERFSCORE_THROUGHPUT_1C; + result.insLatency = PERFSCORE_LATENCY_2C; + break; + default: // all other instructions perfScoreUnhandledInstruction(id, &result); diff --git a/src/coreclr/jit/emitarm64.h b/src/coreclr/jit/emitarm64.h index 82d1d1dd02c200..6491fc3a416186 100644 --- a/src/coreclr/jit/emitarm64.h +++ b/src/coreclr/jit/emitarm64.h @@ -132,6 +132,9 @@ void emitInsLoadStoreOp(instruction ins, emitAttr attr, regNumber dataReg, GenTr // Emit the 32-bit Arm64 instruction 'code' into the 'dst' buffer unsigned emitOutput_Instr(BYTE* dst, code_t code); +// Append the machine code corresponding to the given SVE instruction descriptor. +BYTE* emitOutput_InstrSve(BYTE* dst, instrDesc* id); + // A helper method to return the natural scale for an EA 'size' static unsigned NaturalScale_helper(emitAttr size); @@ -514,24 +517,29 @@ static int insGetSveReg1ListSize(instruction ins); static PredicateType insGetPredicateType(insFormat fmt, int regpos = 0); // Returns true if the SVE instruction has a LSL addr. -// This is for formats that have [, , LSL #N] +// This is for formats that have [, , LSL #N], [{, , LSL #N}] static bool insSveIsLslN(instruction ins, insFormat fmt); // Returns true if the SVE instruction has a addr. // This is for formats that have [, .T, ], [, .T, #N] static bool insSveIsModN(instruction ins, insFormat fmt); -// Returns 0, 1, 2 or 3 depending on the instruction and format. -// This is for formats that have [, .T, ], [, .T, #N], [, , LSL #N] +// Returns 0, 1, 2, 3 or 4 depending on the instruction and format. +// This is for formats that have [, .T, ], [, .T, #N], [, , LSL #N], +// [{, , LSL #N}] static int insSveGetLslOrModN(instruction ins, insFormat fmt); // Returns true if the specified instruction can encode the 'dtype' field. static bool canEncodeSveElemsize_dtype(instruction ins); -// Returns the encoding to select the 1/2/4/8/16 byte elemsize for an Arm64 Sve vector instruction +// Returns the encoding to select the 1/2/4/8 byte elemsize for an Arm64 Sve vector instruction // for the 'dtype' field. static code_t insEncodeSveElemsize_dtype(instruction ins, emitAttr size, code_t code); +// Returns the encoding to select the 4/8/16 byte elemsize for the Arm64 Sve vector instruction 'ld1w' +// for the 'dtype' field. +static code_t insEncodeSveElemsize_dtype_ld1w(instruction ins, insFormat fmt, emitAttr size, code_t code); + // Returns the encoding for the immediate value as 4-bits at bit locations '19-16'. static code_t insEncodeSimm4_19_to_16(ssize_t imm); @@ -1088,6 +1096,12 @@ inline static bool insOptsScalableWordsOrQuadwords(insOpts opt) return (insOptsScalableWords(opt) || (opt == INS_OPTS_SCALABLE_Q)); } +inline static bool insOptsScalableDoubleWordsOrQuadword(insOpts opt) +{ + // `opt` is a double-word or quad-word. + return ((opt == INS_OPTS_SCALABLE_D) || (opt == INS_OPTS_SCALABLE_Q)); +} + inline static bool insOptsScalableAtLeastHalf(insOpts opt) { // `opt` is any of the half and above scalable types. @@ -1253,6 +1267,15 @@ void emitIns_R_R_R_R(instruction ins, insOpts opt = INS_OPTS_NONE, insScalableOpts sopt = INS_SCALABLE_OPTS_NONE); +void emitInsSve_R_R_R_R(instruction ins, + emitAttr attr, + regNumber reg1, + regNumber reg2, + regNumber reg3, + regNumber reg4, + insOpts opt = INS_OPTS_NONE, + insScalableOpts sopt = INS_SCALABLE_OPTS_NONE); + void emitIns_R_COND(instruction ins, emitAttr attr, regNumber reg, insCond cond); void emitIns_R_R_COND(instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, insCond cond); diff --git a/src/coreclr/jit/instrsarm64sve.h b/src/coreclr/jit/instrsarm64sve.h index ad5094bd141e36..594709ff9063a2 100644 --- a/src/coreclr/jit/instrsarm64sve.h +++ b/src/coreclr/jit/instrsarm64sve.h @@ -128,7 +128,7 @@ INST9(ld1h, "ld1h", 0, IF_SV // LD1H {.D }, /Z, [, .D] SVE_HW_4B_D 11000100110mmmmm 110gggnnnnnttttt C4C0 C000 // LD1H {.D }, /Z, [.D{, #}] SVE_HX_3A_E 10000100101iiiii 110gggnnnnnttttt 84A0 C000 // LD1H {.X }, /Z, [{, #, MUL VL}] SVE_IJ_3A_G 101001001000iiii 101gggnnnnnttttt A480 A000 - // LD1H {.D }, /Z, [, , LSL #1] SVE_IK_4A_I 10100100100mmmmm 010gggnnnnnttttt A480 4000 + // LD1H {.X }, /Z, [, , LSL #1] SVE_IK_4A_I 10100100100mmmmm 010gggnnnnnttttt A480 4000 // enum name info SVE_HW_4A SVE_HW_4A_A SVE_HW_4A_B SVE_HW_4A_C SVE_HW_4B SVE_HW_4B_D SVE_HX_3A_E SVE_IH_3A_F SVE_II_4A_H @@ -141,7 +141,7 @@ INST9(ld1w, "ld1w", 0, IF_SV // LD1W {.D }, /Z, [, .D] SVE_HW_4B_D 11000101010mmmmm 110gggnnnnnttttt C540 C000 // LD1W {.D }, /Z, [.D{, #}] SVE_HX_3A_E 10000101001iiiii 110gggnnnnnttttt 8520 C000 // LD1W {.X }, /Z, [{, #, MUL VL}] SVE_IH_3A_F 101001010000iiii 001gggnnnnnttttt A500 2000 - // LD1W {.D }, /Z, [, , LSL #2] SVE_II_4A_H 10100101000mmmmm 000gggnnnnnttttt A500 0000 + // LD1W {.X }, /Z, [, , LSL #2] SVE_II_4A_H 10100101000mmmmm 000gggnnnnnttttt A500 0000 // enum name info SVE_IH_3A SVE_IH_3A_A SVE_II_4A SVE_II_4A_B SVE_IU_4A SVE_IU_4A_C SVE_IU_4B SVE_IU_4B_D SVE_IV_3A @@ -214,7 +214,7 @@ INST8(ldff1w, "ldff1w", 0, IF_SV // LDFF1W {.D }, /Z, [, .D, LSL #2] SVE_HW_4B 11000101011mmmmm 111gggnnnnnttttt C560 E000 // LDFF1W {.D }, /Z, [, .D] SVE_HW_4B_D 11000101010mmmmm 111gggnnnnnttttt C540 E000 // LDFF1W {.D }, /Z, [.D{, #}] SVE_HX_3A_E 10000101001iiiii 111gggnnnnnttttt 8520 E000 - // LDFF1W {.D }, /Z, [{, , LSL #2}] SVE_IG_4A_F 10100101010mmmmm 011gggnnnnnttttt A540 6000 + // LDFF1W {.S }, /Z, [{, , LSL #2}] SVE_IG_4A_F 10100101010mmmmm 011gggnnnnnttttt A540 6000 // enum name info SVE_HW_4A SVE_HW_4A_A SVE_HW_4A_B SVE_HW_4A_C SVE_HW_4B SVE_HW_4B_D SVE_HX_3A_E SVE_IG_4A_G @@ -226,7 +226,7 @@ INST8(ldff1h, "ldff1h", 0, IF_SV // LDFF1H {.D }, /Z, [, .D, LSL #1] SVE_HW_4B 11000100111mmmmm 111gggnnnnnttttt C4E0 E000 // LDFF1H {.D }, /Z, [, .D] SVE_HW_4B_D 11000100110mmmmm 111gggnnnnnttttt C4C0 E000 // LDFF1H {.D }, /Z, [.D{, #}] SVE_HX_3A_E 10000100101iiiii 111gggnnnnnttttt 84A0 E000 - // LDFF1H {.D }, /Z, [{, , LSL #1}] SVE_IG_4A_G 10100100100mmmmm 011gggnnnnnttttt A480 6000 + // LDFF1H {.X }, /Z, [{, , LSL #1}] SVE_IG_4A_G 10100100100mmmmm 011gggnnnnnttttt A480 6000 // enum name info SVE_IJ_3A SVE_IK_4A SVE_IU_4A SVE_IU_4A_A SVE_IU_4B SVE_IU_4B_B SVE_IV_3A @@ -277,7 +277,7 @@ INST6(ld1b, "ld1b", 0, IF_SV // LD1B {.D }, /Z, [, .D] SVE_HW_4B 11000100010mmmmm 110gggnnnnnttttt C440 C000 // LD1B {.D }, /Z, [.D{, #}] SVE_HX_3A_B 10000100001iiiii 110gggnnnnnttttt 8420 C000 // LD1B {.B }, /Z, [{, #, MUL VL}] SVE_IJ_3A_E 101001000000iiii 101gggnnnnnttttt A400 A000 - // LD1B {.D }, /Z, [, ] SVE_IK_4A_H 10100100000mmmmm 010gggnnnnnttttt A400 4000 + // LD1B {.B }, /Z, [, ] SVE_IK_4A_H 10100100000mmmmm 010gggnnnnnttttt A400 4000 // enum name info SVE_HY_3A SVE_HY_3A_A SVE_HY_3B SVE_HZ_2A_B SVE_IA_2A SVE_IB_3A @@ -405,7 +405,7 @@ INST5(ldff1b, "ldff1b", 0, IF_SV // LDFF1B {.S }, /Z, [, .S, ] SVE_HW_4A_A 100001000h0mmmmm 011gggnnnnnttttt 8400 6000 // LDFF1B {.D }, /Z, [, .D] SVE_HW_4B 11000100010mmmmm 111gggnnnnnttttt C440 E000 // LDFF1B {.D }, /Z, [.D{, #}] SVE_HX_3A_B 10000100001iiiii 111gggnnnnnttttt 8420 E000 - // LDFF1B {.D }, /Z, [{, }] SVE_IG_4A_E 10100100000mmmmm 011gggnnnnnttttt A400 6000 + // LDFF1B {.B }, /Z, [{, }] SVE_IG_4A_E 10100100000mmmmm 011gggnnnnnttttt A400 6000 // enum name info SVE_AA_3A SVE_AU_3A SVE_BS_1A SVE_CZ_4A