Skip to content

Commit

Permalink
[AIE2] Skip copies and bitcasts when combining stores
Browse files Browse the repository at this point in the history
  • Loading branch information
andcarminati committed Sep 20, 2024
1 parent 562ccea commit e963ac6
Show file tree
Hide file tree
Showing 6 changed files with 96 additions and 3 deletions.
6 changes: 3 additions & 3 deletions llvm/lib/Target/AIE/AIE2InstructionSelector.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3992,7 +3992,7 @@ bool AIE2InstructionSelector::selectG_AIE_STORE_PACK(MachineInstr &StoreI,
MachineRegisterInfo &MRI) {

Register PackResult = (StoreI.uses().begin())->getReg();
MachineInstr *PackOp = MRI.getUniqueVRegDef(PackResult);
MachineInstr *PackOp = getDefIgnoringCopiesAndBitcasts(PackResult, MRI);

assert(PackOp && "Expected SSA.");

Expand Down Expand Up @@ -4144,7 +4144,7 @@ bool AIE2InstructionSelector::selectG_AIE_STORE_SRS(MachineInstr &StoreI,
MachineRegisterInfo &MRI) {

Register SrsResult = (StoreI.uses().begin())->getReg();
MachineInstr *SrsOp = MRI.getUniqueVRegDef(SrsResult);
MachineInstr *SrsOp = getDefIgnoringCopiesAndBitcasts(SrsResult, MRI);

assert(SrsOp && "Expected SSA.");

Expand Down Expand Up @@ -4251,7 +4251,7 @@ bool AIE2InstructionSelector::selectG_AIE_STORE_CONV(MachineInstr &StoreI,
MachineRegisterInfo &MRI) {

Register ConvResult = (StoreI.uses().begin())->getReg();
MachineInstr *ConvOp = MRI.getUniqueVRegDef(ConvResult);
MachineInstr *ConvOp = getDefIgnoringCopiesAndBitcasts(ConvResult, MRI);

assert(ConvOp && "Expected SSA.");

Expand Down
15 changes: 15 additions & 0 deletions llvm/lib/Target/AIE/AIECombinerHelper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,21 @@ bool llvm::canDelayMemOp(MachineInstr &MemI, MachineInstr &Dest,
return none_of(InstrRange, UnsafeToMovePast);
}

/// Find the def instruction for \p Reg, folding away any trivial copies and
/// bitcasts. May return nullptr if \p Reg is not a generic virtual register.
MachineInstr *
llvm::getDefIgnoringCopiesAndBitcasts(Register Reg,
const MachineRegisterInfo &MRI) {

MachineInstr *DefInstr = getDefIgnoringCopies(Reg, MRI);

while (DefInstr->getOpcode() == TargetOpcode::G_BITCAST) {
DefInstr = getDefIgnoringCopies(DefInstr->getOperand(1).getReg(), MRI);
}

return DefInstr;
}

MachineInstr *findLastRegUseInBB(Register Reg, MachineInstr &IgnoreUser,
MachineRegisterInfo &MRI,
CombinerHelper &Helper,
Expand Down
4 changes: 4 additions & 0 deletions llvm/lib/Target/AIE/AIECombinerHelper.h
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,10 @@ bool matchGlobalValOffset(MachineInstr &MI, MachineRegisterInfo &MRI,
/// post-increment combining
bool canDelayMemOp(MachineInstr &MemI, MachineInstr &Dest,
MachineRegisterInfo &MRI);
/// Find the def instruction for \p Reg, folding away any trivial copies and
/// bitcasts. May return nullptr if \p Reg is not a generic virtual register.
MachineInstr *getDefIgnoringCopiesAndBitcasts(Register Reg,
const MachineRegisterInfo &MRI);

class InstrNode {
MachineInstr *BaseNode;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -215,3 +215,27 @@ body: |
%11:vregbank(<32 x s8>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aie2.pack.I4.I8), %1:vregbank(<32 x s16>), %6:gprregbank(s32)
G_AIE_OFFSET_STORE %11:vregbank(<32 x s8>), %2:ptrregbank(p0), %10:modregbank(s20) :: (store (<32 x s8>))
...

---
name: VST_PACK_D8_D16_COPY_BITCAST
alignment: 16
legalized: true
regBankSelected: true
body: |
bb.1.entry:
liveins: $p0, $r0, $x0
; CHECK-LABEL: name: VST_PACK_D8_D16_COPY_BITCAST
; CHECK: liveins: $p0, $r0, $x0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:vec512 = COPY $x0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:ep = COPY $p0
; CHECK-NEXT: VST_PACK_D8_D16_ag_idx_imm [[COPY1]], 96, [[COPY]], implicit $crsat, implicit $crpacksign :: (store (<8 x s32>))
%0:vregbank(<32 x s16>) = COPY $x0
%2:ptrregbank(p0) = COPY $p0
%6:gprregbank(s32) = G_CONSTANT i32 0
%5:vregbank(<32 x s8>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aie2.pack.I8.I16), %0:vregbank(<32 x s16>), %6:gprregbank(s32)
%3:modregbank(s20) = G_CONSTANT i20 96
%100:vregbank(<8 x s32>) = G_BITCAST %5(<32 x s8>)
%110:vregbank(<8 x s32>) = COPY %100(<8 x s32>)
G_AIE_OFFSET_STORE %110:vregbank(<8 x s32>), %2:ptrregbank(p0), %3:modregbank(s20) :: (store (<8 x s32>))
...
Original file line number Diff line number Diff line change
Expand Up @@ -1188,3 +1188,29 @@ body: |
G_AIE_OFFSET_STORE %107, %0, %11 :: (store (<16 x s32>) into stack - 64)
G_AIE_OFFSET_STORE %108, %0, %12 :: (store (<16 x s32>) into stack - 64)
...

---
name: VST_SRS_D8_S32_COPY_BITCAST
alignment: 16
legalized: true
regBankSelected: true
body: |
bb.1.entry:
liveins: $cm0, $p0, $r1
; CHECK-LABEL: name: VST_SRS_D8_S32_COPY_BITCAST
; CHECK: liveins: $cm0, $p0, $r1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:ep = COPY $p0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:edj = COPY $m0
; CHECK-NEXT: [[COPY2:%[0-9]+]]:er = COPY $r1
; CHECK-NEXT: [[COPY3:%[0-9]+]]:mss = COPY [[COPY2]]
; CHECK-NEXT: VST_SRS_D8_S32_ag_idx [[COPY]], [[COPY1]], %5:acc1024, [[COPY3]], implicit-def $srsrs_of, implicit $crsat, implicit $crrnd, implicit $crsrssign :: (store (<8 x s32>))
%0:ptrregbank(p0) = COPY $p0
%7:modregbank(s20) = COPY $m0
%101:gprregbank(s32) = COPY $r1
%102:gprregbank(s32) = G_CONSTANT i32 0
%103:vregbank(<32 x s8>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aie2.I256.v32.acc32.srs), %100:accregbank(<16 x s64>), %101:gprregbank(s32), %102:gprregbank(s32)
%144:vregbank(<8 x s32>) = G_BITCAST %103(<32 x s8>)
%201:vregbank(<8 x s32>) = COPY %144(<8 x s32>)
G_AIE_OFFSET_STORE %201(<8 x s32>), %0, %7 :: (store (<8 x s32>))
...
24 changes: 24 additions & 0 deletions llvm/test/CodeGen/AIE/aie2/GlobalISel/inst-select-vst_conv.mir
Original file line number Diff line number Diff line change
Expand Up @@ -257,3 +257,27 @@ body: |
%21:ptrregbank(p0) = G_AIE_POSTINC_STORE %104, %0, %8 :: (store (<16 x s16>))
PseudoRET implicit $lr
...

---
name: VST_CONV_COPY_BITCAST
legalized: true
regBankSelected: true
tracksRegLiveness: true
body: |
bb.0:
liveins: $p0, $bml0
; CHECK-LABEL: name: VST_CONV_COPY_BITCAST
; CHECK: liveins: $p0, $bml0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:ep = COPY $p0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:acc512 = COPY $bml0
; CHECK-NEXT: VST_CONV_BF16_FP32_ag_idx_imm [[COPY]], 0, [[COPY1]], implicit-def $srf2fflags, implicit $crrnd, implicit $crf2fmask :: (store (<8 x s32>))
; CHECK-NEXT: PseudoRET implicit $lr
%0:ptrregbank(p0) = COPY $p0
%100:accregbank(<8 x s64>) = COPY $bml0
%104:vregbank(<16 x s16>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aie2.v16accfloat.to.v16bf16), %100:accregbank(<8 x s64>)
%150:vregbank(<8 x s32>) = G_BITCAST %104(<16 x s16>)
%200:vregbank(<8 x s32>) = COPY %150(<8 x s32>)
G_STORE %200, %0 :: (store (<8 x s32>))
PseudoRET implicit $lr
...

0 comments on commit e963ac6

Please sign in to comment.