Skip to content

Commit

Permalink
[AIE2] Enhance VMOV_mv_w instr itinerary
Browse files Browse the repository at this point in the history
  • Loading branch information
krishnamtibrewala committed Sep 20, 2024
1 parent 562ccea commit eaf375f
Show file tree
Hide file tree
Showing 6 changed files with 346 additions and 8 deletions.
14 changes: 12 additions & 2 deletions llvm/lib/Target/AIE/AIE2GenFixupInstrInfo.td
Original file line number Diff line number Diff line change
Expand Up @@ -752,8 +752,18 @@ let Itinerary = II_MOV_CNTR in
}
// 5.6 VMOV - Move vector/accumulator register
let Itinerary = II_VMOV_W in {
def VMOV_mv_w : AIE2_mv_w_inst_mv< (outs OP_mMvAMWQDst:$dst), (ins OP_mMvAMWQSrc:$src),
"vmov", "$dst, $src">;
let ItineraryRegPairs = [ItinRegClassPair<II_VMOV_W_CM_CM,[OperandRegClass<0, mAMm>, OperandRegClass<1, mAMm>]>,
ItinRegClassPair<II_VMOV_W_CM_WMH,[OperandRegClass<0, mAMm>, OperandRegClass<1, eWH>]>,
ItinRegClassPair<II_VMOV_W_WMH_CM,[OperandRegClass<0, eWH>, OperandRegClass<1, mAMm>]>,
ItinRegClassPair<II_VMOV_W_CM_WML,[OperandRegClass<0, mAMm>, OperandRegClass<1, eWL>]>,
ItinRegClassPair<II_VMOV_W_WML_CM,[OperandRegClass<0, eWL>, OperandRegClass<1, mAMm>]>,
ItinRegClassPair<II_VMOV_W_WMH_WMH,[OperandRegClass<0, eWH>, OperandRegClass<1, eWH>]>,
ItinRegClassPair<II_VMOV_W_WML_WML,[OperandRegClass<0, eWL>, OperandRegClass<1, eWL>]>,
ItinRegClassPair<II_VMOV_W_WMH_WML,[OperandRegClass<0, eWH>, OperandRegClass<1, eWL>]>,
ItinRegClassPair<II_VMOV_W_WML_WMH,[OperandRegClass<0, eWL>, OperandRegClass<1, eWH>]>] in {
def VMOV_mv_w : AIE2_mv_w_inst_mv< (outs OP_mMvAMWQDst:$dst), (ins OP_mMvAMWQSrc:$src),
"vmov", "$dst, $src">;
}
}
let Itinerary = II_VMOV_X in {
def VMOV_mv_x : AIE2_mv_x_inst_mv< (outs OP_mMvBMXDst:$dst), (ins OP_mMvBMXSrc:$src),
Expand Down
8 changes: 5 additions & 3 deletions llvm/lib/Target/AIE/AIE2InstrInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1276,7 +1276,6 @@ unsigned AIE2InstrInfo::getNumBypassedCycles(const InstrItineraryData *ItinData,
auto GetForwardingClass = [&](const MachineInstr &MI, unsigned OpIdx) {
Register Reg = MI.getOperand(OpIdx).getReg();
switch (MI.getOpcode()) {
case AIE2::VMOV_mv_w:
case AIE2::VCONV_FP32_BF16:
assert(OpIdx < 2);
return Reg.isPhysical() && AIE2::eWLRegClass.contains(Reg)
Expand All @@ -1286,8 +1285,11 @@ unsigned AIE2InstrInfo::getNumBypassedCycles(const InstrItineraryData *ItinData,
return Reg.isPhysical() && AIE2::mXmRegClass.contains(Reg)
? MovSlotBypassClass
: 0U;
default:
return ItinData->getForwardingClass(MI.getDesc().getSchedClass(), OpIdx);
default: {
const MachineRegisterInfo &MRI = MI.getMF()->getRegInfo();
return ItinData->getForwardingClass(
getSchedClass(MI.getDesc(), MI.operands(), MRI), OpIdx);
}
}
};

Expand Down
33 changes: 30 additions & 3 deletions llvm/lib/Target/AIE/AIE2Schedule.td
Original file line number Diff line number Diff line change
Expand Up @@ -309,6 +309,15 @@ def II_VMACf : InstrItinClass;
def II_VMAX_LT : InstrItinClass;
def II_VMIN_GE : InstrItinClass;
def II_VMOV_W : InstrItinClass;
def II_VMOV_W_CM_CM : InstrItinClass;
def II_VMOV_W_CM_WMH : InstrItinClass;
def II_VMOV_W_WMH_CM : InstrItinClass;
def II_VMOV_W_CM_WML : InstrItinClass;
def II_VMOV_W_WML_CM : InstrItinClass;
def II_VMOV_W_WMH_WMH : InstrItinClass;
def II_VMOV_W_WML_WMH : InstrItinClass;
def II_VMOV_W_WMH_WML : InstrItinClass;
def II_VMOV_W_WML_WML : InstrItinClass;
def II_VMOV_X : InstrItinClass;
def II_VMOV_CM : InstrItinClass;
def II_VMOV_CASCADE_READ : InstrItinClass;
Expand Down Expand Up @@ -821,12 +830,30 @@ InstrItinData<II_VMOV_D, [EmptyCycles<4>, InstrStage<1, [CM_WA_PORT]>],
// However, Post-RA scheduling needs an exact itinerary, otherwise we might give
// an optimistic latency to anti-dependencies.
// See schedule/negative_latencies/bypass.mir for details.
// FIXME: II_VMOV_W and II_VMOV_X always book both the vector and accumulator read and write ports.
InstrItinData<II_VMOV_W, [SimpleCycle<CM_RM_PORT>, PrefixCycle<W_WM_PORT>, SimpleCycle<CM_WM_PORT>],
[2,1], [NoBypass, NoBypass]>,
InstrItinData<II_VMOV_W_CM_CM, [SimpleCycle<CM_RM_PORT>, SimpleCycle<CM_WM_PORT>],
[2,1], [NoBypass, NoBypass]>,
InstrItinData<II_VMOV_W_WMH_CM, [SimpleCycle<CM_RM_PORT>, SimpleCycle<W_WM_PORT>],
[2,1], [NoBypass, NoBypass]>,
InstrItinData<II_VMOV_W_CM_WMH, [EmptyCycles<1>, SimpleCycle<CM_WM_PORT>],
[2,1], [NoBypass, NoBypass]>,
InstrItinData<II_VMOV_W_WML_CM, [SimpleCycle<CM_RM_PORT>, SimpleCycle<W_WM_PORT>],
[2,1], [MOV_Bypass, NoBypass]>,
InstrItinData<II_VMOV_W_CM_WML, [EmptyCycles<1>, SimpleCycle<CM_WM_PORT>],
[2,1], [NoBypass, MOV_Bypass]>,
InstrItinData<II_VMOV_W_WMH_WMH, [EmptyCycles<1>, SimpleCycle<W_WM_PORT>],
[2,1], [NoBypass, NoBypass]>,
InstrItinData<II_VMOV_W_WML_WMH, [EmptyCycles<1>, SimpleCycle<W_WM_PORT>],
[2,1], [MOV_Bypass, NoBypass]>,
InstrItinData<II_VMOV_W_WMH_WML, [EmptyCycles<1>, SimpleCycle<W_WM_PORT>],
[2,1], [NoBypass, MOV_Bypass]>,
InstrItinData<II_VMOV_W_WML_WML, [EmptyCycles<1>, SimpleCycle<W_WM_PORT>],
[2,1], [MOV_Bypass, MOV_Bypass]>,
// FIXME: II_VMOV_X always book both the vector and accumulator read and write ports.
// That is overly pessimistic as the instructions actually only use the port according to the register
// they are reading/writing. We need to find a way to dynamically select itineraries for an instruction
// based on its operands.
InstrItinData<II_VMOV_W, [InstrStage<1, [CM_RM_PORT]>, PrefixCycle<W_WM_PORT>, SimpleCycle<CM_WM_PORT>],
[2,1], [NoBypass, NoBypass]>,
InstrItinData<II_VMOV_X, [SimpleCycle<CM_RM_PORT>, PrefixCycle<W_WM_PORT>, SimpleCycle<CM_WM_PORT>],
[2,1], [MOV_Bypass, MOV_Bypass]>,
InstrItinData<II_VMOV_CM, [InstrStage<1, [CM_RM_PORT]>, InstrStage<1, [CM_WM_PORT]>], [2,1]>,
Expand Down
68 changes: 68 additions & 0 deletions llvm/test/CodeGen/AIE/aie2/schedule/resource/cm_rm.mir
Original file line number Diff line number Diff line change
Expand Up @@ -485,3 +485,71 @@ body: |
$x0 = VFLOOR_S32_BF16_mFl2FxSrc_AM $amll3, $s0, implicit-def $srf2iflags, implicit $crf2imask
$x0 = VFLOOR_S32_BF16_mFl2FxSrc_AM $amll3, $s0, implicit-def $srf2iflags, implicit $crf2imask
...

# VADDMAC accesses CM_RM read port in cycle 3, VMOV_mv_w in cycle 1 only when reading to eAM reg class
# In the following test we expect NOP to be added after VADDMAC
---
name: E3_VADDMAC_E2_VMOV_w_am
alignment: 16
body: |
bb.0.entry:
; CHECK-LABEL: name: E3_VADDMAC_E2_VMOV_w_am
; CHECK: $cm0 = VADDMAC_vmac_bm_core_dense killed $cm0, killed $cm2, killed $x3, killed $x4, killed $r5
; CHECK-NEXT: $wl0 = VMOV_mv_w killed $amhh4
; CHECK-NEXT: NOP
; CHECK-NEXT: $wh0 = VMOV_mv_w killed $amhh5
; CHECK-NEXT: NOP
$cm0 = VADDMAC_vmac_bm_core_dense $cm0, $cm2, $x3, $x4, $r5
$wl0 = VMOV_mv_w $amhh4
$wh0 = VMOV_mv_w $amhh5
...

---
name: E3_VADDMAC_E2_VMOV_am_am
alignment: 16
body: |
bb.0.entry:
; CHECK-LABEL: name: E3_VADDMAC_E2_VMOV_am_am
; CHECK: $cm0 = VADDMAC_vmac_bm_core_dense killed $cm0, killed $cm2, killed $x3, killed $x4, killed $r5
; CHECK-NEXT: $amlh4 = VMOV_mv_w killed $amhh4
; CHECK-NEXT: NOP
; CHECK-NEXT: $amlh5 = VMOV_mv_w killed $amhh5
; CHECK-NEXT: NOP
$cm0 = VADDMAC_vmac_bm_core_dense $cm0, $cm2, $x3, $x4, $r5
$amlh4 = VMOV_mv_w $amhh4
$amlh5 = VMOV_mv_w $amhh5
...

# VADDMAC accesses CM_RM read port in cycle 3, VMOV_mv_w in cycle 1 only when reading to eAM reg class
# In the following test since we are reading from eWL/eWH we do NOT expect NOP to be added after VADDMAC
---
name: E3_VADDMAC_E2_VMOV_w_w
alignment: 16
body: |
bb.0.entry:
; CHECK-LABEL: name: E3_VADDMAC_E2_VMOV_w_w
; CHECK: $cm0 = VADDMAC_vmac_bm_core_dense killed $cm0, killed $cm2, killed $x3, killed $x4, killed $r5
; CHECK-NEXT: $wl0 = VMOV_mv_w killed $wh1
; CHECK-NEXT: $wh0 = VMOV_mv_w killed $wl1
; CHECK-NEXT: NOP
; CHECK-NEXT: NOP
$cm0 = VADDMAC_vmac_bm_core_dense $cm0, $cm2, $x3, $x4, $r5
$wl0 = VMOV_mv_w $wh1
$wh0 = VMOV_mv_w $wl1
...

---
name: E3_VADDMAC_E2_VMOV_am_w
alignment: 16
body: |
bb.0.entry:
; CHECK-LABEL: name: E3_VADDMAC_E2_VMOV_am_w
; CHECK: $cm0 = VADDMAC_vmac_bm_core_dense killed $cm0, killed $cm2, killed $x3, killed $x4, killed $r5
; CHECK-NEXT: $amlh4 = VMOV_mv_w $wh1
; CHECK-NEXT: $amlh5 = VMOV_mv_w killed $wh1
; CHECK-NEXT: NOP
; CHECK-NEXT: NOP
$cm0 = VADDMAC_vmac_bm_core_dense $cm0, $cm2, $x3, $x4, $r5
$amlh4 = VMOV_mv_w $wh1
$amlh5 = VMOV_mv_w $wh1
...
98 changes: 98 additions & 0 deletions llvm/test/CodeGen/AIE/aie2/schedule/resource/cm_wm.mir
Original file line number Diff line number Diff line change
Expand Up @@ -423,3 +423,101 @@ body: |
$amhh7 = VMOV_mv_w $amll6
$amhh7 = VMOV_mv_w $amll6
...

# Test to make sure VMOV_mv_w reserve CM_WM port when writing to eAM reg class
# VUPS accesses CM_WM write port in cycle 3, VMOV_mv_w in cycle 2 when writing to eAM reg class
# We expect a NOP to be inserted to prevent resource conflict on CM_WM port
---
name: E3_VUPS_E2_VMOV_am_wl
alignment: 16
body: |
bb.0.entry:
; CHECK-LABEL: name: E3_VUPS_E2_VMOV_am_wl
; CHECK: $cm8 = VUPS_S64_D32_mv_ups_x2c killed $x2, killed $s2, implicit-def $srups_of, implicit $crsat, implicit $crupssign
; CHECK-NEXT: NOP
; CHECK-NEXT: $amhh0 = VMOV_mv_w killed $wl0
; CHECK-NEXT: NOP
$cm8 = VUPS_S64_D32_mv_ups_x2c $x2, $s2, implicit-def $srups_of, implicit $crsat, implicit $crupssign
$amhh0 = VMOV_mv_w $wl0
...

---
name: E3_VUPS_E2_VMOV_am_amhl
alignment: 16
body: |
bb.0.entry:
; CHECK-LABEL: name: E3_VUPS_E2_VMOV_am_amhl
; CHECK: $cm8 = VUPS_S64_D32_mv_ups_x2c killed $x2, killed $s2, implicit-def $srups_of, implicit $crsat, implicit $crupssign
; CHECK-NEXT: NOP
; CHECK-NEXT: $amhh0 = VMOV_mv_w killed $amlh1
; CHECK-NEXT: NOP
$cm8 = VUPS_S64_D32_mv_ups_x2c $x2, $s2, implicit-def $srups_of, implicit $crsat, implicit $crupssign
$amhh0 = VMOV_mv_w $amlh1
...

# Test to make sure VMOV_mv_w does not reserve CM_WM port when writing to eWH & eWL reg class
# VUPS accesses CM_WM write port in cycle 3, VMOV_mv_w in cycle 2 if writing to eAM reg class
---
name: E3_VUPS_E2_VMOV_wl_wl
alignment: 16
body: |
bb.0.entry:
; CHECK-LABEL: name: E3_VUPS_E2_VMOV_wl_wl
; CHECK: $cm8 = VUPS_S64_D32_mv_ups_x2c killed $x2, killed $s2, implicit-def $srups_of, implicit $crsat, implicit $crupssign
; CHECK-NEXT: $wl0 = VMOV_mv_w killed $wl1
; CHECK-NEXT: NOP
$cm8 = VUPS_S64_D32_mv_ups_x2c $x2, $s2, implicit-def $srups_of, implicit $crsat, implicit $crupssign
$wl0 = VMOV_mv_w $wl1
...

---
name: E3_VUPS_E2_VMOV_wh_wh
alignment: 16
body: |
bb.0.entry:
; CHECK-LABEL: name: E3_VUPS_E2_VMOV_wh_wh
; CHECK: $cm8 = VUPS_S64_D32_mv_ups_x2c killed $x2, killed $s2, implicit-def $srups_of, implicit $crsat, implicit $crupssign
; CHECK-NEXT: $wh0 = VMOV_mv_w killed $wh1
; CHECK-NEXT: NOP
$cm8 = VUPS_S64_D32_mv_ups_x2c $x2, $s2, implicit-def $srups_of, implicit $crsat, implicit $crupssign
$wh0 = VMOV_mv_w $wh1
...

---
name: E3_VUPS_E2_VMOV_wh_wl
alignment: 16
body: |
bb.0.entry:
; CHECK-LABEL: name: E3_VUPS_E2_VMOV_wh_wl
; CHECK: $cm8 = VUPS_S64_D32_mv_ups_x2c killed $x2, killed $s2, implicit-def $srups_of, implicit $crsat, implicit $crupssign
; CHECK-NEXT: $wh0 = VMOV_mv_w killed $wl1
; CHECK-NEXT: NOP
$cm8 = VUPS_S64_D32_mv_ups_x2c $x2, $s2, implicit-def $srups_of, implicit $crsat, implicit $crupssign
$wh0 = VMOV_mv_w $wl1
...

---
name: E3_VUPS_E2_VMOV_wl_wh
alignment: 16
body: |
bb.0.entry:
; CHECK-LABEL: name: E3_VUPS_E2_VMOV_wl_wh
; CHECK: $cm8 = VUPS_S64_D32_mv_ups_x2c killed $x2, killed $s2, implicit-def $srups_of, implicit $crsat, implicit $crupssign
; CHECK-NEXT: $wl0 = VMOV_mv_w killed $wh1
; CHECK-NEXT: NOP
$cm8 = VUPS_S64_D32_mv_ups_x2c $x2, $s2, implicit-def $srups_of, implicit $crsat, implicit $crupssign
$wl0 = VMOV_mv_w $wh1
...

---
name: E3_VUPS_E2_VMOV_wl_am
alignment: 16
body: |
bb.0.entry:
; CHECK-LABEL: name: E3_VUPS_E2_VMOV_wl_am
; CHECK: $cm8 = VUPS_S64_D32_mv_ups_x2c killed $x2, killed $s2, implicit-def $srups_of, implicit $crsat, implicit $crupssign
; CHECK-NEXT: $wl0 = VMOV_mv_w killed $amhh0
; CHECK-NEXT: NOP
$cm8 = VUPS_S64_D32_mv_ups_x2c $x2, $s2, implicit-def $srups_of, implicit $crsat, implicit $crupssign
$wl0 = VMOV_mv_w $amhh0
...
Loading

0 comments on commit eaf375f

Please sign in to comment.