From 2cbc27124c993624d15989da40b09e52c8f0fa9e Mon Sep 17 00:00:00 2001 From: Andreu Carminati Date: Fri, 20 Sep 2024 10:40:23 +0100 Subject: [PATCH] [AIE2] Skip copies and bitcasts when combining stores --- .../Target/AIE/AIE2InstructionSelector.cpp | 6 ++--- llvm/lib/Target/AIE/AIECombinerHelper.cpp | 15 +++++++++++ llvm/lib/Target/AIE/AIECombinerHelper.h | 4 +++ .../inst-select-indexed-vst_pack.mir | 24 +++++++++++++++++ .../inst-select-indexed-vst_srs.mir | 26 +++++++++++++++++++ .../aie2/GlobalISel/inst-select-vst_conv.mir | 24 +++++++++++++++++ 6 files changed, 96 insertions(+), 3 deletions(-) diff --git a/llvm/lib/Target/AIE/AIE2InstructionSelector.cpp b/llvm/lib/Target/AIE/AIE2InstructionSelector.cpp index 715014139589..e940bb45eec8 100644 --- a/llvm/lib/Target/AIE/AIE2InstructionSelector.cpp +++ b/llvm/lib/Target/AIE/AIE2InstructionSelector.cpp @@ -3992,7 +3992,7 @@ bool AIE2InstructionSelector::selectG_AIE_STORE_PACK(MachineInstr &StoreI, MachineRegisterInfo &MRI) { Register PackResult = (StoreI.uses().begin())->getReg(); - MachineInstr *PackOp = MRI.getUniqueVRegDef(PackResult); + MachineInstr *PackOp = getDefIgnoringCopiesAndBitcasts(PackResult, MRI); assert(PackOp && "Expected SSA."); @@ -4144,7 +4144,7 @@ bool AIE2InstructionSelector::selectG_AIE_STORE_SRS(MachineInstr &StoreI, MachineRegisterInfo &MRI) { Register SrsResult = (StoreI.uses().begin())->getReg(); - MachineInstr *SrsOp = MRI.getUniqueVRegDef(SrsResult); + MachineInstr *SrsOp = getDefIgnoringCopiesAndBitcasts(SrsResult, MRI); assert(SrsOp && "Expected SSA."); @@ -4251,7 +4251,7 @@ bool AIE2InstructionSelector::selectG_AIE_STORE_CONV(MachineInstr &StoreI, MachineRegisterInfo &MRI) { Register ConvResult = (StoreI.uses().begin())->getReg(); - MachineInstr *ConvOp = MRI.getUniqueVRegDef(ConvResult); + MachineInstr *ConvOp = getDefIgnoringCopiesAndBitcasts(ConvResult, MRI); assert(ConvOp && "Expected SSA."); diff --git a/llvm/lib/Target/AIE/AIECombinerHelper.cpp b/llvm/lib/Target/AIE/AIECombinerHelper.cpp index 1df4cc3f46a5..42e916841791 100644 --- a/llvm/lib/Target/AIE/AIECombinerHelper.cpp +++ b/llvm/lib/Target/AIE/AIECombinerHelper.cpp @@ -91,6 +91,21 @@ bool llvm::canDelayMemOp(MachineInstr &MemI, MachineInstr &Dest, return none_of(InstrRange, UnsafeToMovePast); } +/// Find the def instruction for \p Reg, folding away any trivial copies and +/// bitcasts. May return nullptr if \p Reg is not a generic virtual register. +MachineInstr * +llvm::getDefIgnoringCopiesAndBitcasts(Register Reg, + const MachineRegisterInfo &MRI) { + + MachineInstr *DefInstr = getDefIgnoringCopies(Reg, MRI); + + while (DefInstr && DefInstr->getOpcode() == TargetOpcode::G_BITCAST) { + DefInstr = getDefIgnoringCopies(DefInstr->getOperand(1).getReg(), MRI); + } + + return DefInstr; +} + MachineInstr *findLastRegUseInBB(Register Reg, MachineInstr &IgnoreUser, MachineRegisterInfo &MRI, CombinerHelper &Helper, diff --git a/llvm/lib/Target/AIE/AIECombinerHelper.h b/llvm/lib/Target/AIE/AIECombinerHelper.h index fb907ff2e2cb..bfcd02a519eb 100644 --- a/llvm/lib/Target/AIE/AIECombinerHelper.h +++ b/llvm/lib/Target/AIE/AIECombinerHelper.h @@ -61,6 +61,10 @@ bool matchGlobalValOffset(MachineInstr &MI, MachineRegisterInfo &MRI, /// post-increment combining bool canDelayMemOp(MachineInstr &MemI, MachineInstr &Dest, MachineRegisterInfo &MRI); +/// Find the def instruction for \p Reg, folding away any trivial copies and +/// bitcasts. May return nullptr if \p Reg is not a generic virtual register. +MachineInstr *getDefIgnoringCopiesAndBitcasts(Register Reg, + const MachineRegisterInfo &MRI); class InstrNode { MachineInstr *BaseNode; diff --git a/llvm/test/CodeGen/AIE/aie2/GlobalISel/inst-select-indexed-vst_pack.mir b/llvm/test/CodeGen/AIE/aie2/GlobalISel/inst-select-indexed-vst_pack.mir index c3b1b465f8df..21c79b4ad902 100644 --- a/llvm/test/CodeGen/AIE/aie2/GlobalISel/inst-select-indexed-vst_pack.mir +++ b/llvm/test/CodeGen/AIE/aie2/GlobalISel/inst-select-indexed-vst_pack.mir @@ -215,3 +215,27 @@ body: | %11:vregbank(<32 x s8>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aie2.pack.I4.I8), %1:vregbank(<32 x s16>), %6:gprregbank(s32) G_AIE_OFFSET_STORE %11:vregbank(<32 x s8>), %2:ptrregbank(p0), %10:modregbank(s20) :: (store (<32 x s8>)) ... + +--- +name: VST_PACK_D8_D16_COPY_BITCAST +alignment: 16 +legalized: true +regBankSelected: true +body: | + bb.1.entry: + liveins: $p0, $r0, $x0 + ; CHECK-LABEL: name: VST_PACK_D8_D16_COPY_BITCAST + ; CHECK: liveins: $p0, $r0, $x0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vec512 = COPY $x0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:ep = COPY $p0 + ; CHECK-NEXT: VST_PACK_D8_D16_ag_idx_imm [[COPY1]], 96, [[COPY]], implicit $crsat, implicit $crpacksign :: (store (<8 x s32>)) + %0:vregbank(<32 x s16>) = COPY $x0 + %2:ptrregbank(p0) = COPY $p0 + %6:gprregbank(s32) = G_CONSTANT i32 0 + %5:vregbank(<32 x s8>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aie2.pack.I8.I16), %0:vregbank(<32 x s16>), %6:gprregbank(s32) + %3:modregbank(s20) = G_CONSTANT i20 96 + %100:vregbank(<8 x s32>) = G_BITCAST %5(<32 x s8>) + %110:vregbank(<8 x s32>) = COPY %100(<8 x s32>) + G_AIE_OFFSET_STORE %110:vregbank(<8 x s32>), %2:ptrregbank(p0), %3:modregbank(s20) :: (store (<8 x s32>)) +... diff --git a/llvm/test/CodeGen/AIE/aie2/GlobalISel/inst-select-indexed-vst_srs.mir b/llvm/test/CodeGen/AIE/aie2/GlobalISel/inst-select-indexed-vst_srs.mir index 4cd377b78d9b..a5afb10a47ca 100644 --- a/llvm/test/CodeGen/AIE/aie2/GlobalISel/inst-select-indexed-vst_srs.mir +++ b/llvm/test/CodeGen/AIE/aie2/GlobalISel/inst-select-indexed-vst_srs.mir @@ -1188,3 +1188,29 @@ body: | G_AIE_OFFSET_STORE %107, %0, %11 :: (store (<16 x s32>) into stack - 64) G_AIE_OFFSET_STORE %108, %0, %12 :: (store (<16 x s32>) into stack - 64) ... + +--- +name: VST_SRS_D8_S32_COPY_BITCAST +alignment: 16 +legalized: true +regBankSelected: true +body: | + bb.1.entry: + liveins: $cm0, $p0, $r1 + ; CHECK-LABEL: name: VST_SRS_D8_S32_COPY_BITCAST + ; CHECK: liveins: $cm0, $p0, $r1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:ep = COPY $p0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:edj = COPY $m0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:er = COPY $r1 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:mss = COPY [[COPY2]] + ; CHECK-NEXT: VST_SRS_D8_S32_ag_idx [[COPY]], [[COPY1]], %5:acc1024, [[COPY3]], implicit-def $srsrs_of, implicit $crsat, implicit $crrnd, implicit $crsrssign :: (store (<8 x s32>)) + %0:ptrregbank(p0) = COPY $p0 + %7:modregbank(s20) = COPY $m0 + %101:gprregbank(s32) = COPY $r1 + %102:gprregbank(s32) = G_CONSTANT i32 0 + %103:vregbank(<32 x s8>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aie2.I256.v32.acc32.srs), %100:accregbank(<16 x s64>), %101:gprregbank(s32), %102:gprregbank(s32) + %144:vregbank(<8 x s32>) = G_BITCAST %103(<32 x s8>) + %201:vregbank(<8 x s32>) = COPY %144(<8 x s32>) + G_AIE_OFFSET_STORE %201(<8 x s32>), %0, %7 :: (store (<8 x s32>)) +... diff --git a/llvm/test/CodeGen/AIE/aie2/GlobalISel/inst-select-vst_conv.mir b/llvm/test/CodeGen/AIE/aie2/GlobalISel/inst-select-vst_conv.mir index 3e8279e79e65..236d131eb0a7 100644 --- a/llvm/test/CodeGen/AIE/aie2/GlobalISel/inst-select-vst_conv.mir +++ b/llvm/test/CodeGen/AIE/aie2/GlobalISel/inst-select-vst_conv.mir @@ -257,3 +257,27 @@ body: | %21:ptrregbank(p0) = G_AIE_POSTINC_STORE %104, %0, %8 :: (store (<16 x s16>)) PseudoRET implicit $lr ... + +--- +name: VST_CONV_COPY_BITCAST +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $p0, $bml0 + ; CHECK-LABEL: name: VST_CONV_COPY_BITCAST + ; CHECK: liveins: $p0, $bml0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:ep = COPY $p0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:acc512 = COPY $bml0 + ; CHECK-NEXT: VST_CONV_BF16_FP32_ag_idx_imm [[COPY]], 0, [[COPY1]], implicit-def $srf2fflags, implicit $crrnd, implicit $crf2fmask :: (store (<8 x s32>)) + ; CHECK-NEXT: PseudoRET implicit $lr + %0:ptrregbank(p0) = COPY $p0 + %100:accregbank(<8 x s64>) = COPY $bml0 + %104:vregbank(<16 x s16>) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aie2.v16accfloat.to.v16bf16), %100:accregbank(<8 x s64>) + %150:vregbank(<8 x s32>) = G_BITCAST %104(<16 x s16>) + %200:vregbank(<8 x s32>) = COPY %150(<8 x s32>) + G_STORE %200, %0 :: (store (<8 x s32>)) + PseudoRET implicit $lr +...