Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[AIE2] Enhance VMOV_mv_w instr itinerary #190

Open
wants to merge 1 commit into
base: aie-public
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 12 additions & 2 deletions llvm/lib/Target/AIE/AIE2GenFixupInstrInfo.td
Original file line number Diff line number Diff line change
Expand Up @@ -752,8 +752,18 @@ let Itinerary = II_MOV_CNTR in
}
// 5.6 VMOV - Move vector/accumulator register
let Itinerary = II_VMOV_W in {
def VMOV_mv_w : AIE2_mv_w_inst_mv< (outs OP_mMvAMWQDst:$dst), (ins OP_mMvAMWQSrc:$src),
"vmov", "$dst, $src">;
let ItineraryRegPairs = [ItinRegClassPair<II_VMOV_W_CM_CM,[OperandRegClass<0, mAMm>, OperandRegClass<1, mAMm>]>,
ItinRegClassPair<II_VMOV_W_CM_WMH,[OperandRegClass<0, mAMm>, OperandRegClass<1, eWH>]>,
ItinRegClassPair<II_VMOV_W_WMH_CM,[OperandRegClass<0, eWH>, OperandRegClass<1, mAMm>]>,
ItinRegClassPair<II_VMOV_W_CM_WML,[OperandRegClass<0, mAMm>, OperandRegClass<1, eWL>]>,
ItinRegClassPair<II_VMOV_W_WML_CM,[OperandRegClass<0, eWL>, OperandRegClass<1, mAMm>]>,
ItinRegClassPair<II_VMOV_W_WMH_WMH,[OperandRegClass<0, eWH>, OperandRegClass<1, eWH>]>,
ItinRegClassPair<II_VMOV_W_WML_WML,[OperandRegClass<0, eWL>, OperandRegClass<1, eWL>]>,
ItinRegClassPair<II_VMOV_W_WMH_WML,[OperandRegClass<0, eWH>, OperandRegClass<1, eWL>]>,
ItinRegClassPair<II_VMOV_W_WML_WMH,[OperandRegClass<0, eWL>, OperandRegClass<1, eWH>]>] in {
def VMOV_mv_w : AIE2_mv_w_inst_mv< (outs OP_mMvAMWQDst:$dst), (ins OP_mMvAMWQSrc:$src),
"vmov", "$dst, $src">;
}
}
let Itinerary = II_VMOV_X in {
def VMOV_mv_x : AIE2_mv_x_inst_mv< (outs OP_mMvBMXDst:$dst), (ins OP_mMvBMXSrc:$src),
Expand Down
8 changes: 5 additions & 3 deletions llvm/lib/Target/AIE/AIE2InstrInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1276,7 +1276,6 @@ unsigned AIE2InstrInfo::getNumBypassedCycles(const InstrItineraryData *ItinData,
auto GetForwardingClass = [&](const MachineInstr &MI, unsigned OpIdx) {
Register Reg = MI.getOperand(OpIdx).getReg();
switch (MI.getOpcode()) {
case AIE2::VMOV_mv_w:
case AIE2::VCONV_FP32_BF16:
assert(OpIdx < 2);
return Reg.isPhysical() && AIE2::eWLRegClass.contains(Reg)
Expand All @@ -1286,8 +1285,11 @@ unsigned AIE2InstrInfo::getNumBypassedCycles(const InstrItineraryData *ItinData,
return Reg.isPhysical() && AIE2::mXmRegClass.contains(Reg)
? MovSlotBypassClass
: 0U;
default:
return ItinData->getForwardingClass(MI.getDesc().getSchedClass(), OpIdx);
default: {
const MachineRegisterInfo &MRI = MI.getMF()->getRegInfo();
return ItinData->getForwardingClass(
getSchedClass(MI.getDesc(), MI.operands(), MRI), OpIdx);
}
}
};

Expand Down
33 changes: 30 additions & 3 deletions llvm/lib/Target/AIE/AIE2Schedule.td
Original file line number Diff line number Diff line change
Expand Up @@ -309,6 +309,15 @@ def II_VMACf : InstrItinClass;
def II_VMAX_LT : InstrItinClass;
def II_VMIN_GE : InstrItinClass;
def II_VMOV_W : InstrItinClass;
def II_VMOV_W_CM_CM : InstrItinClass;
def II_VMOV_W_CM_WMH : InstrItinClass;
def II_VMOV_W_WMH_CM : InstrItinClass;
def II_VMOV_W_CM_WML : InstrItinClass;
def II_VMOV_W_WML_CM : InstrItinClass;
def II_VMOV_W_WMH_WMH : InstrItinClass;
def II_VMOV_W_WML_WMH : InstrItinClass;
def II_VMOV_W_WMH_WML : InstrItinClass;
def II_VMOV_W_WML_WML : InstrItinClass;
def II_VMOV_X : InstrItinClass;
def II_VMOV_CM : InstrItinClass;
def II_VMOV_CASCADE_READ : InstrItinClass;
Expand Down Expand Up @@ -821,12 +830,30 @@ InstrItinData<II_VMOV_D, [EmptyCycles<4>, InstrStage<1, [CM_WA_PORT]>],
// However, Post-RA scheduling needs an exact itinerary, otherwise we might give
// an optimistic latency to anti-dependencies.
// See schedule/negative_latencies/bypass.mir for details.
// FIXME: II_VMOV_W and II_VMOV_X always book both the vector and accumulator read and write ports.
InstrItinData<II_VMOV_W, [SimpleCycle<CM_RM_PORT>, PrefixCycle<W_WM_PORT>, SimpleCycle<CM_WM_PORT>],
[2,1], [NoBypass, NoBypass]>,
InstrItinData<II_VMOV_W_CM_CM, [SimpleCycle<CM_RM_PORT>, SimpleCycle<CM_WM_PORT>],
[2,1], [NoBypass, NoBypass]>,
InstrItinData<II_VMOV_W_WMH_CM, [SimpleCycle<CM_RM_PORT>, SimpleCycle<W_WM_PORT>],
[2,1], [NoBypass, NoBypass]>,
InstrItinData<II_VMOV_W_CM_WMH, [EmptyCycles<1>, SimpleCycle<CM_WM_PORT>],
[2,1], [NoBypass, NoBypass]>,
InstrItinData<II_VMOV_W_WML_CM, [SimpleCycle<CM_RM_PORT>, SimpleCycle<W_WM_PORT>],
[2,1], [MOV_Bypass, NoBypass]>,
InstrItinData<II_VMOV_W_CM_WML, [EmptyCycles<1>, SimpleCycle<CM_WM_PORT>],
[2,1], [NoBypass, MOV_Bypass]>,
InstrItinData<II_VMOV_W_WMH_WMH, [EmptyCycles<1>, SimpleCycle<W_WM_PORT>],
[2,1], [NoBypass, NoBypass]>,
InstrItinData<II_VMOV_W_WML_WMH, [EmptyCycles<1>, SimpleCycle<W_WM_PORT>],
[2,1], [MOV_Bypass, NoBypass]>,
InstrItinData<II_VMOV_W_WMH_WML, [EmptyCycles<1>, SimpleCycle<W_WM_PORT>],
[2,1], [NoBypass, MOV_Bypass]>,
InstrItinData<II_VMOV_W_WML_WML, [EmptyCycles<1>, SimpleCycle<W_WM_PORT>],
[2,1], [MOV_Bypass, MOV_Bypass]>,
// FIXME: II_VMOV_X always book both the vector and accumulator read and write ports.
// That is overly pessimistic as the instructions actually only use the port according to the register
// they are reading/writing. We need to find a way to dynamically select itineraries for an instruction
// based on its operands.
InstrItinData<II_VMOV_W, [InstrStage<1, [CM_RM_PORT]>, PrefixCycle<W_WM_PORT>, SimpleCycle<CM_WM_PORT>],
[2,1], [NoBypass, NoBypass]>,
InstrItinData<II_VMOV_X, [SimpleCycle<CM_RM_PORT>, PrefixCycle<W_WM_PORT>, SimpleCycle<CM_WM_PORT>],
[2,1], [MOV_Bypass, MOV_Bypass]>,
InstrItinData<II_VMOV_CM, [InstrStage<1, [CM_RM_PORT]>, InstrStage<1, [CM_WM_PORT]>], [2,1]>,
Expand Down
68 changes: 68 additions & 0 deletions llvm/test/CodeGen/AIE/aie2/schedule/resource/cm_rm.mir
Original file line number Diff line number Diff line change
Expand Up @@ -485,3 +485,71 @@ body: |
$x0 = VFLOOR_S32_BF16_mFl2FxSrc_AM $amll3, $s0, implicit-def $srf2iflags, implicit $crf2imask
$x0 = VFLOOR_S32_BF16_mFl2FxSrc_AM $amll3, $s0, implicit-def $srf2iflags, implicit $crf2imask
...

# VADDMAC accesses CM_RM read port in cycle 3, VMOV_mv_w in cycle 1 only when reading to eAM reg class
# In the following test we expect NOP to be added after VADDMAC
---
name: E3_VADDMAC_E2_VMOV_w_am
alignment: 16
body: |
bb.0.entry:
; CHECK-LABEL: name: E3_VADDMAC_E2_VMOV_w_am
; CHECK: $cm0 = VADDMAC_vmac_bm_core_dense killed $cm0, killed $cm2, killed $x3, killed $x4, killed $r5
; CHECK-NEXT: $wl0 = VMOV_mv_w killed $amhh4
; CHECK-NEXT: NOP
; CHECK-NEXT: $wh0 = VMOV_mv_w killed $amhh5
; CHECK-NEXT: NOP
$cm0 = VADDMAC_vmac_bm_core_dense $cm0, $cm2, $x3, $x4, $r5
$wl0 = VMOV_mv_w $amhh4
$wh0 = VMOV_mv_w $amhh5
...

---
name: E3_VADDMAC_E2_VMOV_am_am
alignment: 16
body: |
bb.0.entry:
; CHECK-LABEL: name: E3_VADDMAC_E2_VMOV_am_am
; CHECK: $cm0 = VADDMAC_vmac_bm_core_dense killed $cm0, killed $cm2, killed $x3, killed $x4, killed $r5
; CHECK-NEXT: $amlh4 = VMOV_mv_w killed $amhh4
; CHECK-NEXT: NOP
; CHECK-NEXT: $amlh5 = VMOV_mv_w killed $amhh5
; CHECK-NEXT: NOP
$cm0 = VADDMAC_vmac_bm_core_dense $cm0, $cm2, $x3, $x4, $r5
$amlh4 = VMOV_mv_w $amhh4
$amlh5 = VMOV_mv_w $amhh5
...

# VADDMAC accesses CM_RM read port in cycle 3, VMOV_mv_w in cycle 1 only when reading to eAM reg class
# In the following test since we are reading from eWL/eWH we do NOT expect NOP to be added after VADDMAC
---
name: E3_VADDMAC_E2_VMOV_w_w
alignment: 16
body: |
bb.0.entry:
; CHECK-LABEL: name: E3_VADDMAC_E2_VMOV_w_w
; CHECK: $cm0 = VADDMAC_vmac_bm_core_dense killed $cm0, killed $cm2, killed $x3, killed $x4, killed $r5
; CHECK-NEXT: $wl0 = VMOV_mv_w killed $wh1
; CHECK-NEXT: $wh0 = VMOV_mv_w killed $wl1
; CHECK-NEXT: NOP
; CHECK-NEXT: NOP
$cm0 = VADDMAC_vmac_bm_core_dense $cm0, $cm2, $x3, $x4, $r5
$wl0 = VMOV_mv_w $wh1
$wh0 = VMOV_mv_w $wl1
...

---
name: E3_VADDMAC_E2_VMOV_am_w
alignment: 16
body: |
bb.0.entry:
; CHECK-LABEL: name: E3_VADDMAC_E2_VMOV_am_w
; CHECK: $cm0 = VADDMAC_vmac_bm_core_dense killed $cm0, killed $cm2, killed $x3, killed $x4, killed $r5
; CHECK-NEXT: $amlh4 = VMOV_mv_w $wh1
; CHECK-NEXT: $amlh5 = VMOV_mv_w killed $wh1
; CHECK-NEXT: NOP
; CHECK-NEXT: NOP
$cm0 = VADDMAC_vmac_bm_core_dense $cm0, $cm2, $x3, $x4, $r5
$amlh4 = VMOV_mv_w $wh1
$amlh5 = VMOV_mv_w $wh1
...
98 changes: 98 additions & 0 deletions llvm/test/CodeGen/AIE/aie2/schedule/resource/cm_wm.mir
Original file line number Diff line number Diff line change
Expand Up @@ -423,3 +423,101 @@ body: |
$amhh7 = VMOV_mv_w $amll6
$amhh7 = VMOV_mv_w $amll6
...

# Test to make sure VMOV_mv_w reserve CM_WM port when writing to eAM reg class
# VUPS accesses CM_WM write port in cycle 3, VMOV_mv_w in cycle 2 when writing to eAM reg class
# We expect a NOP to be inserted to prevent resource conflict on CM_WM port
---
name: E3_VUPS_E2_VMOV_am_wl
alignment: 16
body: |
bb.0.entry:
; CHECK-LABEL: name: E3_VUPS_E2_VMOV_am_wl
; CHECK: $cm8 = VUPS_S64_D32_mv_ups_x2c killed $x2, killed $s2, implicit-def $srups_of, implicit $crsat, implicit $crupssign
; CHECK-NEXT: NOP
; CHECK-NEXT: $amhh0 = VMOV_mv_w killed $wl0
; CHECK-NEXT: NOP
$cm8 = VUPS_S64_D32_mv_ups_x2c $x2, $s2, implicit-def $srups_of, implicit $crsat, implicit $crupssign
$amhh0 = VMOV_mv_w $wl0
...

---
name: E3_VUPS_E2_VMOV_am_amhl
alignment: 16
body: |
bb.0.entry:
; CHECK-LABEL: name: E3_VUPS_E2_VMOV_am_amhl
; CHECK: $cm8 = VUPS_S64_D32_mv_ups_x2c killed $x2, killed $s2, implicit-def $srups_of, implicit $crsat, implicit $crupssign
; CHECK-NEXT: NOP
; CHECK-NEXT: $amhh0 = VMOV_mv_w killed $amlh1
; CHECK-NEXT: NOP
$cm8 = VUPS_S64_D32_mv_ups_x2c $x2, $s2, implicit-def $srups_of, implicit $crsat, implicit $crupssign
$amhh0 = VMOV_mv_w $amlh1
...

# Test to make sure VMOV_mv_w does not reserve CM_WM port when writing to eWH & eWL reg class
# VUPS accesses CM_WM write port in cycle 3, VMOV_mv_w in cycle 2 if writing to eAM reg class
---
name: E3_VUPS_E2_VMOV_wl_wl
alignment: 16
body: |
bb.0.entry:
; CHECK-LABEL: name: E3_VUPS_E2_VMOV_wl_wl
; CHECK: $cm8 = VUPS_S64_D32_mv_ups_x2c killed $x2, killed $s2, implicit-def $srups_of, implicit $crsat, implicit $crupssign
; CHECK-NEXT: $wl0 = VMOV_mv_w killed $wl1
; CHECK-NEXT: NOP
$cm8 = VUPS_S64_D32_mv_ups_x2c $x2, $s2, implicit-def $srups_of, implicit $crsat, implicit $crupssign
$wl0 = VMOV_mv_w $wl1
...

---
name: E3_VUPS_E2_VMOV_wh_wh
alignment: 16
body: |
bb.0.entry:
; CHECK-LABEL: name: E3_VUPS_E2_VMOV_wh_wh
; CHECK: $cm8 = VUPS_S64_D32_mv_ups_x2c killed $x2, killed $s2, implicit-def $srups_of, implicit $crsat, implicit $crupssign
; CHECK-NEXT: $wh0 = VMOV_mv_w killed $wh1
; CHECK-NEXT: NOP
$cm8 = VUPS_S64_D32_mv_ups_x2c $x2, $s2, implicit-def $srups_of, implicit $crsat, implicit $crupssign
$wh0 = VMOV_mv_w $wh1
...

---
name: E3_VUPS_E2_VMOV_wh_wl
alignment: 16
body: |
bb.0.entry:
; CHECK-LABEL: name: E3_VUPS_E2_VMOV_wh_wl
; CHECK: $cm8 = VUPS_S64_D32_mv_ups_x2c killed $x2, killed $s2, implicit-def $srups_of, implicit $crsat, implicit $crupssign
; CHECK-NEXT: $wh0 = VMOV_mv_w killed $wl1
; CHECK-NEXT: NOP
$cm8 = VUPS_S64_D32_mv_ups_x2c $x2, $s2, implicit-def $srups_of, implicit $crsat, implicit $crupssign
$wh0 = VMOV_mv_w $wl1
...

---
name: E3_VUPS_E2_VMOV_wl_wh
alignment: 16
body: |
bb.0.entry:
; CHECK-LABEL: name: E3_VUPS_E2_VMOV_wl_wh
; CHECK: $cm8 = VUPS_S64_D32_mv_ups_x2c killed $x2, killed $s2, implicit-def $srups_of, implicit $crsat, implicit $crupssign
; CHECK-NEXT: $wl0 = VMOV_mv_w killed $wh1
; CHECK-NEXT: NOP
$cm8 = VUPS_S64_D32_mv_ups_x2c $x2, $s2, implicit-def $srups_of, implicit $crsat, implicit $crupssign
$wl0 = VMOV_mv_w $wh1
...

---
name: E3_VUPS_E2_VMOV_wl_am
alignment: 16
body: |
bb.0.entry:
; CHECK-LABEL: name: E3_VUPS_E2_VMOV_wl_am
; CHECK: $cm8 = VUPS_S64_D32_mv_ups_x2c killed $x2, killed $s2, implicit-def $srups_of, implicit $crsat, implicit $crupssign
; CHECK-NEXT: $wl0 = VMOV_mv_w killed $amhh0
; CHECK-NEXT: NOP
$cm8 = VUPS_S64_D32_mv_ups_x2c $x2, $s2, implicit-def $srups_of, implicit $crsat, implicit $crupssign
$wl0 = VMOV_mv_w $amhh0
...
Loading
Loading