From eaf375f2b117ad16c5e8612b35021f07daabd312 Mon Sep 17 00:00:00 2001 From: Krishnam Tibrewala Date: Wed, 18 Sep 2024 16:15:50 -0700 Subject: [PATCH] [AIE2] Enhance VMOV_mv_w instr itinerary --- llvm/lib/Target/AIE/AIE2GenFixupInstrInfo.td | 14 +- llvm/lib/Target/AIE/AIE2InstrInfo.cpp | 8 +- llvm/lib/Target/AIE/AIE2Schedule.td | 33 ++++- .../AIE/aie2/schedule/resource/cm_rm.mir | 68 +++++++++ .../AIE/aie2/schedule/resource/cm_wm.mir | 98 +++++++++++++ .../AIE/aie2/schedule/resource/w_wm.mir | 133 ++++++++++++++++++ 6 files changed, 346 insertions(+), 8 deletions(-) diff --git a/llvm/lib/Target/AIE/AIE2GenFixupInstrInfo.td b/llvm/lib/Target/AIE/AIE2GenFixupInstrInfo.td index db1535f2d8c7..c6b6b1b26a49 100644 --- a/llvm/lib/Target/AIE/AIE2GenFixupInstrInfo.td +++ b/llvm/lib/Target/AIE/AIE2GenFixupInstrInfo.td @@ -752,8 +752,18 @@ let Itinerary = II_MOV_CNTR in } // 5.6 VMOV - Move vector/accumulator register let Itinerary = II_VMOV_W in { - def VMOV_mv_w : AIE2_mv_w_inst_mv< (outs OP_mMvAMWQDst:$dst), (ins OP_mMvAMWQSrc:$src), - "vmov", "$dst, $src">; + let ItineraryRegPairs = [ItinRegClassPair, OperandRegClass<1, mAMm>]>, + ItinRegClassPair, OperandRegClass<1, eWH>]>, + ItinRegClassPair, OperandRegClass<1, mAMm>]>, + ItinRegClassPair, OperandRegClass<1, eWL>]>, + ItinRegClassPair, OperandRegClass<1, mAMm>]>, + ItinRegClassPair, OperandRegClass<1, eWH>]>, + ItinRegClassPair, OperandRegClass<1, eWL>]>, + ItinRegClassPair, OperandRegClass<1, eWL>]>, + ItinRegClassPair, OperandRegClass<1, eWH>]>] in { + def VMOV_mv_w : AIE2_mv_w_inst_mv< (outs OP_mMvAMWQDst:$dst), (ins OP_mMvAMWQSrc:$src), + "vmov", "$dst, $src">; + } } let Itinerary = II_VMOV_X in { def VMOV_mv_x : AIE2_mv_x_inst_mv< (outs OP_mMvBMXDst:$dst), (ins OP_mMvBMXSrc:$src), diff --git a/llvm/lib/Target/AIE/AIE2InstrInfo.cpp b/llvm/lib/Target/AIE/AIE2InstrInfo.cpp index 3467716411f8..663997c3ab99 100644 --- a/llvm/lib/Target/AIE/AIE2InstrInfo.cpp +++ b/llvm/lib/Target/AIE/AIE2InstrInfo.cpp @@ -1276,7 +1276,6 @@ unsigned AIE2InstrInfo::getNumBypassedCycles(const InstrItineraryData *ItinData, auto GetForwardingClass = [&](const MachineInstr &MI, unsigned OpIdx) { Register Reg = MI.getOperand(OpIdx).getReg(); switch (MI.getOpcode()) { - case AIE2::VMOV_mv_w: case AIE2::VCONV_FP32_BF16: assert(OpIdx < 2); return Reg.isPhysical() && AIE2::eWLRegClass.contains(Reg) @@ -1286,8 +1285,11 @@ unsigned AIE2InstrInfo::getNumBypassedCycles(const InstrItineraryData *ItinData, return Reg.isPhysical() && AIE2::mXmRegClass.contains(Reg) ? MovSlotBypassClass : 0U; - default: - return ItinData->getForwardingClass(MI.getDesc().getSchedClass(), OpIdx); + default: { + const MachineRegisterInfo &MRI = MI.getMF()->getRegInfo(); + return ItinData->getForwardingClass( + getSchedClass(MI.getDesc(), MI.operands(), MRI), OpIdx); + } } }; diff --git a/llvm/lib/Target/AIE/AIE2Schedule.td b/llvm/lib/Target/AIE/AIE2Schedule.td index 8fc169f4ff1d..f4007fc03197 100644 --- a/llvm/lib/Target/AIE/AIE2Schedule.td +++ b/llvm/lib/Target/AIE/AIE2Schedule.td @@ -309,6 +309,15 @@ def II_VMACf : InstrItinClass; def II_VMAX_LT : InstrItinClass; def II_VMIN_GE : InstrItinClass; def II_VMOV_W : InstrItinClass; +def II_VMOV_W_CM_CM : InstrItinClass; +def II_VMOV_W_CM_WMH : InstrItinClass; +def II_VMOV_W_WMH_CM : InstrItinClass; +def II_VMOV_W_CM_WML : InstrItinClass; +def II_VMOV_W_WML_CM : InstrItinClass; +def II_VMOV_W_WMH_WMH : InstrItinClass; +def II_VMOV_W_WML_WMH : InstrItinClass; +def II_VMOV_W_WMH_WML : InstrItinClass; +def II_VMOV_W_WML_WML : InstrItinClass; def II_VMOV_X : InstrItinClass; def II_VMOV_CM : InstrItinClass; def II_VMOV_CASCADE_READ : InstrItinClass; @@ -821,12 +830,30 @@ InstrItinData, InstrStage<1, [CM_WA_PORT]>], // However, Post-RA scheduling needs an exact itinerary, otherwise we might give // an optimistic latency to anti-dependencies. // See schedule/negative_latencies/bypass.mir for details. -// FIXME: II_VMOV_W and II_VMOV_X always book both the vector and accumulator read and write ports. +InstrItinData, PrefixCycle, SimpleCycle], + [2,1], [NoBypass, NoBypass]>, +InstrItinData, SimpleCycle], + [2,1], [NoBypass, NoBypass]>, +InstrItinData, SimpleCycle], + [2,1], [NoBypass, NoBypass]>, +InstrItinData, SimpleCycle], + [2,1], [NoBypass, NoBypass]>, +InstrItinData, SimpleCycle], + [2,1], [MOV_Bypass, NoBypass]>, +InstrItinData, SimpleCycle], + [2,1], [NoBypass, MOV_Bypass]>, +InstrItinData, SimpleCycle], + [2,1], [NoBypass, NoBypass]>, +InstrItinData, SimpleCycle], + [2,1], [MOV_Bypass, NoBypass]>, +InstrItinData, SimpleCycle], + [2,1], [NoBypass, MOV_Bypass]>, +InstrItinData, SimpleCycle], + [2,1], [MOV_Bypass, MOV_Bypass]>, +// FIXME: II_VMOV_X always book both the vector and accumulator read and write ports. // That is overly pessimistic as the instructions actually only use the port according to the register // they are reading/writing. We need to find a way to dynamically select itineraries for an instruction // based on its operands. -InstrItinData, PrefixCycle, SimpleCycle], - [2,1], [NoBypass, NoBypass]>, InstrItinData, PrefixCycle, SimpleCycle], [2,1], [MOV_Bypass, MOV_Bypass]>, InstrItinData, InstrStage<1, [CM_WM_PORT]>], [2,1]>, diff --git a/llvm/test/CodeGen/AIE/aie2/schedule/resource/cm_rm.mir b/llvm/test/CodeGen/AIE/aie2/schedule/resource/cm_rm.mir index 12c95c84dc47..ffa0ea3c2259 100644 --- a/llvm/test/CodeGen/AIE/aie2/schedule/resource/cm_rm.mir +++ b/llvm/test/CodeGen/AIE/aie2/schedule/resource/cm_rm.mir @@ -485,3 +485,71 @@ body: | $x0 = VFLOOR_S32_BF16_mFl2FxSrc_AM $amll3, $s0, implicit-def $srf2iflags, implicit $crf2imask $x0 = VFLOOR_S32_BF16_mFl2FxSrc_AM $amll3, $s0, implicit-def $srf2iflags, implicit $crf2imask ... + +# VADDMAC accesses CM_RM read port in cycle 3, VMOV_mv_w in cycle 1 only when reading to eAM reg class +# In the following test we expect NOP to be added after VADDMAC +--- +name: E3_VADDMAC_E2_VMOV_w_am +alignment: 16 +body: | + bb.0.entry: + ; CHECK-LABEL: name: E3_VADDMAC_E2_VMOV_w_am + ; CHECK: $cm0 = VADDMAC_vmac_bm_core_dense killed $cm0, killed $cm2, killed $x3, killed $x4, killed $r5 + ; CHECK-NEXT: $wl0 = VMOV_mv_w killed $amhh4 + ; CHECK-NEXT: NOP + ; CHECK-NEXT: $wh0 = VMOV_mv_w killed $amhh5 + ; CHECK-NEXT: NOP + $cm0 = VADDMAC_vmac_bm_core_dense $cm0, $cm2, $x3, $x4, $r5 + $wl0 = VMOV_mv_w $amhh4 + $wh0 = VMOV_mv_w $amhh5 +... + +--- +name: E3_VADDMAC_E2_VMOV_am_am +alignment: 16 +body: | + bb.0.entry: + ; CHECK-LABEL: name: E3_VADDMAC_E2_VMOV_am_am + ; CHECK: $cm0 = VADDMAC_vmac_bm_core_dense killed $cm0, killed $cm2, killed $x3, killed $x4, killed $r5 + ; CHECK-NEXT: $amlh4 = VMOV_mv_w killed $amhh4 + ; CHECK-NEXT: NOP + ; CHECK-NEXT: $amlh5 = VMOV_mv_w killed $amhh5 + ; CHECK-NEXT: NOP + $cm0 = VADDMAC_vmac_bm_core_dense $cm0, $cm2, $x3, $x4, $r5 + $amlh4 = VMOV_mv_w $amhh4 + $amlh5 = VMOV_mv_w $amhh5 +... + +# VADDMAC accesses CM_RM read port in cycle 3, VMOV_mv_w in cycle 1 only when reading to eAM reg class +# In the following test since we are reading from eWL/eWH we do NOT expect NOP to be added after VADDMAC +--- +name: E3_VADDMAC_E2_VMOV_w_w +alignment: 16 +body: | + bb.0.entry: + ; CHECK-LABEL: name: E3_VADDMAC_E2_VMOV_w_w + ; CHECK: $cm0 = VADDMAC_vmac_bm_core_dense killed $cm0, killed $cm2, killed $x3, killed $x4, killed $r5 + ; CHECK-NEXT: $wl0 = VMOV_mv_w killed $wh1 + ; CHECK-NEXT: $wh0 = VMOV_mv_w killed $wl1 + ; CHECK-NEXT: NOP + ; CHECK-NEXT: NOP + $cm0 = VADDMAC_vmac_bm_core_dense $cm0, $cm2, $x3, $x4, $r5 + $wl0 = VMOV_mv_w $wh1 + $wh0 = VMOV_mv_w $wl1 +... + +--- +name: E3_VADDMAC_E2_VMOV_am_w +alignment: 16 +body: | + bb.0.entry: + ; CHECK-LABEL: name: E3_VADDMAC_E2_VMOV_am_w + ; CHECK: $cm0 = VADDMAC_vmac_bm_core_dense killed $cm0, killed $cm2, killed $x3, killed $x4, killed $r5 + ; CHECK-NEXT: $amlh4 = VMOV_mv_w $wh1 + ; CHECK-NEXT: $amlh5 = VMOV_mv_w killed $wh1 + ; CHECK-NEXT: NOP + ; CHECK-NEXT: NOP + $cm0 = VADDMAC_vmac_bm_core_dense $cm0, $cm2, $x3, $x4, $r5 + $amlh4 = VMOV_mv_w $wh1 + $amlh5 = VMOV_mv_w $wh1 +... diff --git a/llvm/test/CodeGen/AIE/aie2/schedule/resource/cm_wm.mir b/llvm/test/CodeGen/AIE/aie2/schedule/resource/cm_wm.mir index 513462dce34a..9c9ed7aaa0fb 100644 --- a/llvm/test/CodeGen/AIE/aie2/schedule/resource/cm_wm.mir +++ b/llvm/test/CodeGen/AIE/aie2/schedule/resource/cm_wm.mir @@ -423,3 +423,101 @@ body: | $amhh7 = VMOV_mv_w $amll6 $amhh7 = VMOV_mv_w $amll6 ... + +# Test to make sure VMOV_mv_w reserve CM_WM port when writing to eAM reg class +# VUPS accesses CM_WM write port in cycle 3, VMOV_mv_w in cycle 2 when writing to eAM reg class +# We expect a NOP to be inserted to prevent resource conflict on CM_WM port +--- +name: E3_VUPS_E2_VMOV_am_wl +alignment: 16 +body: | + bb.0.entry: + ; CHECK-LABEL: name: E3_VUPS_E2_VMOV_am_wl + ; CHECK: $cm8 = VUPS_S64_D32_mv_ups_x2c killed $x2, killed $s2, implicit-def $srups_of, implicit $crsat, implicit $crupssign + ; CHECK-NEXT: NOP + ; CHECK-NEXT: $amhh0 = VMOV_mv_w killed $wl0 + ; CHECK-NEXT: NOP + $cm8 = VUPS_S64_D32_mv_ups_x2c $x2, $s2, implicit-def $srups_of, implicit $crsat, implicit $crupssign + $amhh0 = VMOV_mv_w $wl0 +... + +--- +name: E3_VUPS_E2_VMOV_am_amhl +alignment: 16 +body: | + bb.0.entry: + ; CHECK-LABEL: name: E3_VUPS_E2_VMOV_am_amhl + ; CHECK: $cm8 = VUPS_S64_D32_mv_ups_x2c killed $x2, killed $s2, implicit-def $srups_of, implicit $crsat, implicit $crupssign + ; CHECK-NEXT: NOP + ; CHECK-NEXT: $amhh0 = VMOV_mv_w killed $amlh1 + ; CHECK-NEXT: NOP + $cm8 = VUPS_S64_D32_mv_ups_x2c $x2, $s2, implicit-def $srups_of, implicit $crsat, implicit $crupssign + $amhh0 = VMOV_mv_w $amlh1 +... + +# Test to make sure VMOV_mv_w does not reserve CM_WM port when writing to eWH & eWL reg class +# VUPS accesses CM_WM write port in cycle 3, VMOV_mv_w in cycle 2 if writing to eAM reg class +--- +name: E3_VUPS_E2_VMOV_wl_wl +alignment: 16 +body: | + bb.0.entry: + ; CHECK-LABEL: name: E3_VUPS_E2_VMOV_wl_wl + ; CHECK: $cm8 = VUPS_S64_D32_mv_ups_x2c killed $x2, killed $s2, implicit-def $srups_of, implicit $crsat, implicit $crupssign + ; CHECK-NEXT: $wl0 = VMOV_mv_w killed $wl1 + ; CHECK-NEXT: NOP + $cm8 = VUPS_S64_D32_mv_ups_x2c $x2, $s2, implicit-def $srups_of, implicit $crsat, implicit $crupssign + $wl0 = VMOV_mv_w $wl1 +... + +--- +name: E3_VUPS_E2_VMOV_wh_wh +alignment: 16 +body: | + bb.0.entry: + ; CHECK-LABEL: name: E3_VUPS_E2_VMOV_wh_wh + ; CHECK: $cm8 = VUPS_S64_D32_mv_ups_x2c killed $x2, killed $s2, implicit-def $srups_of, implicit $crsat, implicit $crupssign + ; CHECK-NEXT: $wh0 = VMOV_mv_w killed $wh1 + ; CHECK-NEXT: NOP + $cm8 = VUPS_S64_D32_mv_ups_x2c $x2, $s2, implicit-def $srups_of, implicit $crsat, implicit $crupssign + $wh0 = VMOV_mv_w $wh1 +... + +--- +name: E3_VUPS_E2_VMOV_wh_wl +alignment: 16 +body: | + bb.0.entry: + ; CHECK-LABEL: name: E3_VUPS_E2_VMOV_wh_wl + ; CHECK: $cm8 = VUPS_S64_D32_mv_ups_x2c killed $x2, killed $s2, implicit-def $srups_of, implicit $crsat, implicit $crupssign + ; CHECK-NEXT: $wh0 = VMOV_mv_w killed $wl1 + ; CHECK-NEXT: NOP + $cm8 = VUPS_S64_D32_mv_ups_x2c $x2, $s2, implicit-def $srups_of, implicit $crsat, implicit $crupssign + $wh0 = VMOV_mv_w $wl1 +... + +--- +name: E3_VUPS_E2_VMOV_wl_wh +alignment: 16 +body: | + bb.0.entry: + ; CHECK-LABEL: name: E3_VUPS_E2_VMOV_wl_wh + ; CHECK: $cm8 = VUPS_S64_D32_mv_ups_x2c killed $x2, killed $s2, implicit-def $srups_of, implicit $crsat, implicit $crupssign + ; CHECK-NEXT: $wl0 = VMOV_mv_w killed $wh1 + ; CHECK-NEXT: NOP + $cm8 = VUPS_S64_D32_mv_ups_x2c $x2, $s2, implicit-def $srups_of, implicit $crsat, implicit $crupssign + $wl0 = VMOV_mv_w $wh1 +... + +--- +name: E3_VUPS_E2_VMOV_wl_am +alignment: 16 +body: | + bb.0.entry: + ; CHECK-LABEL: name: E3_VUPS_E2_VMOV_wl_am + ; CHECK: $cm8 = VUPS_S64_D32_mv_ups_x2c killed $x2, killed $s2, implicit-def $srups_of, implicit $crsat, implicit $crupssign + ; CHECK-NEXT: $wl0 = VMOV_mv_w killed $amhh0 + ; CHECK-NEXT: NOP + $cm8 = VUPS_S64_D32_mv_ups_x2c $x2, $s2, implicit-def $srups_of, implicit $crsat, implicit $crupssign + $wl0 = VMOV_mv_w $amhh0 +... diff --git a/llvm/test/CodeGen/AIE/aie2/schedule/resource/w_wm.mir b/llvm/test/CodeGen/AIE/aie2/schedule/resource/w_wm.mir index 83b62edaefa6..9f4a38a2a47f 100644 --- a/llvm/test/CodeGen/AIE/aie2/schedule/resource/w_wm.mir +++ b/llvm/test/CodeGen/AIE/aie2/schedule/resource/w_wm.mir @@ -67,3 +67,136 @@ body: | $wl3 = VSRSM_D16_S32 $bmh0, $s0, implicit-def $srsrs_of, implicit $crsat, implicit $crrnd, implicit $crsrssign $wl3 = VSRSM_D16_S32 $bmh0, $s0, implicit-def $srsrs_of, implicit $crsat, implicit $crrnd, implicit $crsrssign ... + +# VSRSM accesses vector WM write port in cycle 4, VMOV in cycle 2 when writing to eWL/eWH register class +--- +name: E4_VSRSM_E2_VMOV_WL_WH +alignment: 16 +body: | + bb.0.entry: + ; CHECK-LABEL: name: E4_VSRSM_E2_VMOV_WL_WH + ; CHECK: $wl3 = VSRSM_D16_S32 $bmh0, $s0, implicit-def $srsrs_of, implicit $crsat, implicit $crrnd, implicit $crsrssign + ; CHECK-NEXT: $wl3 = VSRSM_D16_S32 $bmh0, $s0, implicit-def $srsrs_of, implicit $crsat, implicit $crrnd, implicit $crsrssign + ; CHECK-NEXT: $wl3 = VSRSM_D16_S32 killed $bmh0, killed $s0, implicit-def $srsrs_of, implicit $crsat, implicit $crrnd, implicit $crsrssign + ; CHECK-NEXT: NOP + ; CHECK-NEXT: NOP + ; CHECK-NEXT: $wl0 = VMOV_mv_w killed $wh0 + ; CHECK-NEXT: NOP + $wl0 = VMOV_mv_w $wh0 + $wl3 = VSRSM_D16_S32 $bmh0, $s0, implicit-def $srsrs_of, implicit $crsat, implicit $crrnd, implicit $crsrssign + $wl3 = VSRSM_D16_S32 $bmh0, $s0, implicit-def $srsrs_of, implicit $crsat, implicit $crrnd, implicit $crsrssign + $wl3 = VSRSM_D16_S32 $bmh0, $s0, implicit-def $srsrs_of, implicit $crsat, implicit $crrnd, implicit $crsrssign +... + +--- +name: E4_VSRSM_E2_VMOV_WH_WL +alignment: 16 +body: | + bb.0.entry: + ; CHECK-LABEL: name: E4_VSRSM_E2_VMOV_WH_WL + ; CHECK: $wl3 = VSRSM_D16_S32 $bmh0, $s0, implicit-def $srsrs_of, implicit $crsat, implicit $crrnd, implicit $crsrssign + ; CHECK-NEXT: $wl3 = VSRSM_D16_S32 $bmh0, $s0, implicit-def $srsrs_of, implicit $crsat, implicit $crrnd, implicit $crsrssign + ; CHECK-NEXT: $wl3 = VSRSM_D16_S32 killed $bmh0, killed $s0, implicit-def $srsrs_of, implicit $crsat, implicit $crrnd, implicit $crsrssign + ; CHECK-NEXT: NOP + ; CHECK-NEXT: NOP + ; CHECK-NEXT: $wh0 = VMOV_mv_w killed $wl0 + ; CHECK-NEXT: NOP + $wh0 = VMOV_mv_w $wl0 + $wl3 = VSRSM_D16_S32 $bmh0, $s0, implicit-def $srsrs_of, implicit $crsat, implicit $crrnd, implicit $crsrssign + $wl3 = VSRSM_D16_S32 $bmh0, $s0, implicit-def $srsrs_of, implicit $crsat, implicit $crrnd, implicit $crsrssign + $wl3 = VSRSM_D16_S32 $bmh0, $s0, implicit-def $srsrs_of, implicit $crsat, implicit $crrnd, implicit $crsrssign +... + +--- +name: E4_VSRSM_E2_VMOV_WH_WH +alignment: 16 +body: | + bb.0.entry: + ; CHECK-LABEL: name: E4_VSRSM_E2_VMOV_WH_WH + ; CHECK: $wl3 = VSRSM_D16_S32 $bmh0, $s0, implicit-def $srsrs_of, implicit $crsat, implicit $crrnd, implicit $crsrssign + ; CHECK-NEXT: $wl3 = VSRSM_D16_S32 $bmh0, $s0, implicit-def $srsrs_of, implicit $crsat, implicit $crrnd, implicit $crsrssign + ; CHECK-NEXT: $wl3 = VSRSM_D16_S32 killed $bmh0, killed $s0, implicit-def $srsrs_of, implicit $crsat, implicit $crrnd, implicit $crsrssign + ; CHECK-NEXT: NOP + ; CHECK-NEXT: NOP + ; CHECK-NEXT: $wh0 = VMOV_mv_w killed $wh1 + ; CHECK-NEXT: NOP + $wh0 = VMOV_mv_w $wh1 + $wl3 = VSRSM_D16_S32 $bmh0, $s0, implicit-def $srsrs_of, implicit $crsat, implicit $crrnd, implicit $crsrssign + $wl3 = VSRSM_D16_S32 $bmh0, $s0, implicit-def $srsrs_of, implicit $crsat, implicit $crrnd, implicit $crsrssign + $wl3 = VSRSM_D16_S32 $bmh0, $s0, implicit-def $srsrs_of, implicit $crsat, implicit $crrnd, implicit $crsrssign +... + +--- +name: E4_VSRSM_E2_VMOV_WL_WL +alignment: 16 +body: | + bb.0.entry: + ; CHECK-LABEL: name: E4_VSRSM_E2_VMOV_WL_WL + ; CHECK: $wl3 = VSRSM_D16_S32 $bmh0, $s0, implicit-def $srsrs_of, implicit $crsat, implicit $crrnd, implicit $crsrssign + ; CHECK-NEXT: $wl3 = VSRSM_D16_S32 $bmh0, $s0, implicit-def $srsrs_of, implicit $crsat, implicit $crrnd, implicit $crsrssign + ; CHECK-NEXT: $wl3 = VSRSM_D16_S32 killed $bmh0, killed $s0, implicit-def $srsrs_of, implicit $crsat, implicit $crrnd, implicit $crsrssign + ; CHECK-NEXT: NOP + ; CHECK-NEXT: NOP + ; CHECK-NEXT: $wl0 = VMOV_mv_w killed $wl1 + ; CHECK-NEXT: NOP + $wl0 = VMOV_mv_w $wl1 + $wl3 = VSRSM_D16_S32 $bmh0, $s0, implicit-def $srsrs_of, implicit $crsat, implicit $crrnd, implicit $crsrssign + $wl3 = VSRSM_D16_S32 $bmh0, $s0, implicit-def $srsrs_of, implicit $crsat, implicit $crrnd, implicit $crsrssign + $wl3 = VSRSM_D16_S32 $bmh0, $s0, implicit-def $srsrs_of, implicit $crsat, implicit $crrnd, implicit $crsrssign +... + +--- +name: E4_VSRSM_E2_VMOV_WL_AM +alignment: 16 +body: | + bb.0.entry: + ; CHECK-LABEL: name: E4_VSRSM_E2_VMOV_WL_AM + ; CHECK: $wl3 = VSRSM_D16_S32 $bmh0, $s0, implicit-def $srsrs_of, implicit $crsat, implicit $crrnd, implicit $crsrssign + ; CHECK-NEXT: $wl3 = VSRSM_D16_S32 $bmh0, $s0, implicit-def $srsrs_of, implicit $crsat, implicit $crrnd, implicit $crsrssign + ; CHECK-NEXT: $wl3 = VSRSM_D16_S32 $bmh0, killed $s0, implicit-def $srsrs_of, implicit $crsat, implicit $crrnd, implicit $crsrssign + ; CHECK-NEXT: NOP + ; CHECK-NEXT: NOP + ; CHECK-NEXT: $wl0 = VMOV_mv_w killed $amhh0 + ; CHECK-NEXT: NOP + $wl0 = VMOV_mv_w $amhh0 + $wl3 = VSRSM_D16_S32 $bmh0, $s0, implicit-def $srsrs_of, implicit $crsat, implicit $crrnd, implicit $crsrssign + $wl3 = VSRSM_D16_S32 $bmh0, $s0, implicit-def $srsrs_of, implicit $crsat, implicit $crrnd, implicit $crsrssign + $wl3 = VSRSM_D16_S32 $bmh0, $s0, implicit-def $srsrs_of, implicit $crsat, implicit $crrnd, implicit $crsrssign +... + +# VSRSM accesses vector WM write port in cycle 4, VMOV in cycle 2 is NOT expected to reserve WM port when writing to AM register class +--- +name: E4_VSRSM_E2_VMOV_AM_AM +alignment: 16 +body: | + bb.0.entry: + ; CHECK-LABEL: name: E4_VSRSM_E2_VMOV_AM_AM + ; CHECK: $wl3 = VSRSM_D16_S32 $bmh0, $s0, implicit-def $srsrs_of, implicit $crsat, implicit $crrnd, implicit $crsrssign + ; CHECK-NEXT: $wl3 = VSRSM_D16_S32 $bmh0, $s0, implicit-def $srsrs_of, implicit $crsat, implicit $crrnd, implicit $crsrssign + ; CHECK-NEXT: $wl3 = VSRSM_D16_S32 $bmh0, killed $s0, implicit-def $srsrs_of, implicit $crsat, implicit $crrnd, implicit $crsrssign + ; CHECK-NEXT: $amhh3 = VMOV_mv_w killed $amhh0 + ; CHECK-NEXT: NOP + ; CHECK-NEXT: NOP + $amhh3 = VMOV_mv_w $amhh0 + $wl3 = VSRSM_D16_S32 $bmh0, $s0, implicit-def $srsrs_of, implicit $crsat, implicit $crrnd, implicit $crsrssign + $wl3 = VSRSM_D16_S32 $bmh0, $s0, implicit-def $srsrs_of, implicit $crsat, implicit $crrnd, implicit $crsrssign + $wl3 = VSRSM_D16_S32 $bmh0, $s0, implicit-def $srsrs_of, implicit $crsat, implicit $crrnd, implicit $crsrssign +... + +--- +name: E4_VSRSM_E2_VMOV_AM_WL +alignment: 16 +body: | + bb.0.entry: + ; CHECK-LABEL: name: E4_VSRSM_E2_VMOV_AM_WL + ; CHECK: $wl3 = VSRSM_D16_S32 $bmh0, $s0, implicit-def $srsrs_of, implicit $crsat, implicit $crrnd, implicit $crsrssign + ; CHECK-NEXT: $wl3 = VSRSM_D16_S32 $bmh0, $s0, implicit-def $srsrs_of, implicit $crsat, implicit $crrnd, implicit $crsrssign + ; CHECK-NEXT: $wl3 = VSRSM_D16_S32 killed $bmh0, killed $s0, implicit-def $srsrs_of, implicit $crsat, implicit $crrnd, implicit $crsrssign + ; CHECK-NEXT: $amhh3 = VMOV_mv_w killed $wl0 + ; CHECK-NEXT: NOP + ; CHECK-NEXT: NOP + $amhh3 = VMOV_mv_w $wl0 + $wl3 = VSRSM_D16_S32 $bmh0, $s0, implicit-def $srsrs_of, implicit $crsat, implicit $crrnd, implicit $crsrssign + $wl3 = VSRSM_D16_S32 $bmh0, $s0, implicit-def $srsrs_of, implicit $crsat, implicit $crrnd, implicit $crsrssign + $wl3 = VSRSM_D16_S32 $bmh0, $s0, implicit-def $srsrs_of, implicit $crsat, implicit $crrnd, implicit $crsrssign +...