Skip to content

Commit

Permalink
[GlobalISel] Match G_SHUFFLE_VECTORs representing sub-vector extracts
Browse files Browse the repository at this point in the history
  • Loading branch information
konstantinschwarz committed Oct 25, 2024
1 parent bb7ca7b commit 18efe7c
Show file tree
Hide file tree
Showing 6 changed files with 86 additions and 69 deletions.
4 changes: 4 additions & 0 deletions llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
Original file line number Diff line number Diff line change
Expand Up @@ -856,6 +856,10 @@ class CombinerHelper {
bool matchExtractVectorElementWithDifferentIndices(const MachineOperand &MO,
BuildFnTy &MatchInfo);

/// Transform:
/// res = G_SHUFFLE_VECTORS a, b, <0, 1> -> res, undef = G_UNMERGE_VALUES a
bool matchShuffleToExtractSubvector(MachineInstr &MI, BuildFnTy &MatchInfo);

/// Transform:
/// G_INTTOPTR (int G_CONSTANT x) -> (pointer G_CONSTANT x)
bool matchIntToPtrContant(MachineInstr &MI, MachineRegisterInfo &MRI,
Expand Down
12 changes: 11 additions & 1 deletion llvm/include/llvm/Target/GlobalISel/Combine.td
Original file line number Diff line number Diff line change
Expand Up @@ -1527,6 +1527,16 @@ def combine_shuffle_concat : GICombineRule<
[{ return Helper.matchCombineShuffleConcat(*${root}, ${matchinfo}); }]),
(apply [{ Helper.applyCombineShuffleConcat(*${root}, ${matchinfo}); }])>;

// Combines Shuffles representing vector extracts into Unmerges
// res = G_SHUFFLE_VECTORS a, b, <0, 1>
// ===>
// res, undef = G_UNMERGE_VALUES a
def combine_shuffle_to_extract_vector : GICombineRule<
(defs root:$root, build_fn_matchinfo:$matchinfo),
(match (wip_match_opcode G_SHUFFLE_VECTOR):$root,
[{ return Helper.matchShuffleToExtractSubvector(*${root}, ${matchinfo}); }]),
(apply [{ Helper.applyBuildFn(*${root}, ${matchinfo}); }])>;

def combine_inttoptr_constant : GICombineRule<
(defs root:$root, build_fn_matchinfo:$info),
(match (wip_match_opcode G_INTTOPTR):$root,
Expand Down Expand Up @@ -1642,7 +1652,7 @@ def all_combines : GICombineGroup<[trivial_combines, vector_ops_combines,
sub_add_reg, select_to_minmax, redundant_binop_in_equality,
fsub_to_fneg, commute_constant_to_rhs, match_ands, match_ors,
combine_concat_vector, double_icmp_zero_and_or_combine, match_addos,
combine_shuffle_concat]>;
combine_shuffle_concat, combine_shuffle_to_extract_vector]>;

// A combine group used to for prelegalizer combiners at -O0. The combines in
// this group have been selected based on experiments to balance code size and
Expand Down
55 changes: 55 additions & 0 deletions llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallBitVector.h"
#include "llvm/Analysis/CmpInstAnalysis.h"
#include "llvm/Analysis/VectorUtils.h"
#include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h"
#include "llvm/CodeGen/GlobalISel/GISelKnownBits.h"
#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
Expand Down Expand Up @@ -7254,6 +7255,60 @@ bool CombinerHelper::matchAddOverflow(MachineInstr &MI, BuildFnTy &MatchInfo) {
return false;
}

bool CombinerHelper::matchShuffleToExtractSubvector(MachineInstr &MI,
BuildFnTy &MatchInfo) {

assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
const Register DstReg = MI.getOperand(0).getReg();
const Register Src1Reg = MI.getOperand(1).getReg();
ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();

const LLT DstTy = MRI.getType(DstReg);
const LLT Src1Ty = MRI.getType(Src1Reg);

if (!DstTy.isVector() || !Src1Ty.isVector())
return false;

const unsigned NumDstElems = DstTy.getNumElements();
const unsigned NumSrc1Elems = Src1Ty.getNumElements();

if (NumDstElems * 2 != NumSrc1Elems)
return false;

auto CheckExtractMask = [=](unsigned Start, unsigned NumElems) -> bool {
auto ExtractMask = createSequentialMask(Start, NumElems, 0);

for (unsigned I = 0; I < NumDstElems; I++) {
if (Mask[I] == -1)
continue;

if (Mask[I] != ExtractMask[I])
return false;
}

return true;
};

const Register UndefReg = MRI.createGenericVirtualRegister(DstTy);
Register UnmergeDst1;
Register UnmergeDst2;
if (CheckExtractMask(0, NumDstElems)) {
UnmergeDst1 = DstReg;
UnmergeDst2 = UndefReg;
} else if (CheckExtractMask(NumDstElems, NumDstElems)) {
UnmergeDst1 = UndefReg;
UnmergeDst2 = DstReg;
} else {
return false;
}

MatchInfo = [=](MachineIRBuilder &B) {
B.buildUnmerge({UnmergeDst1, UnmergeDst2}, Src1Reg);
};

return true;
}

bool CombinerHelper::matchIntToPtrContant(MachineInstr &MI,
MachineRegisterInfo &MRI,
BuildFnTy &MatchInfo) {
Expand Down
7 changes: 2 additions & 5 deletions llvm/test/CodeGen/AArch64/ext-narrow-index.ll
Original file line number Diff line number Diff line change
Expand Up @@ -42,8 +42,7 @@ define <8 x i8> @i8_off8(<16 x i8> %arg1, <16 x i8> %arg2) {
;
; CHECK-GISEL-LABEL: i8_off8:
; CHECK-GISEL: // %bb.0: // %entry
; CHECK-GISEL-NEXT: ext v0.16b, v0.16b, v1.16b, #8
; CHECK-GISEL-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-GISEL-NEXT: mov d0, v0.d[1]
; CHECK-GISEL-NEXT: ret
entry:
%shuffle = shufflevector <16 x i8> %arg1, <16 x i8> %arg2, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
Expand Down Expand Up @@ -254,9 +253,7 @@ define <8 x i8> @i8_zero_off8(<16 x i8> %arg1) {
;
; CHECK-GISEL-LABEL: i8_zero_off8:
; CHECK-GISEL: // %bb.0: // %entry
; CHECK-GISEL-NEXT: movi v1.2d, #0000000000000000
; CHECK-GISEL-NEXT: ext v0.16b, v0.16b, v1.16b, #8
; CHECK-GISEL-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-GISEL-NEXT: mov d0, v0.d[1]
; CHECK-GISEL-NEXT: ret
entry:
%shuffle = shufflevector <16 x i8> %arg1, <16 x i8> zeroinitializer, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
Expand Down
12 changes: 4 additions & 8 deletions llvm/test/CodeGen/AArch64/vecreduce-add.ll
Original file line number Diff line number Diff line change
Expand Up @@ -3744,17 +3744,13 @@ define i32 @add_pair_v8i16_v4i32_double_sext_zext_shuffle(<8 x i16> %ax, <8 x i1
; CHECK-GI-LABEL: add_pair_v8i16_v4i32_double_sext_zext_shuffle:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: ushll v4.4s, v0.4h, #0
; CHECK-GI-NEXT: ushll2 v0.4s, v0.8h, #0
; CHECK-GI-NEXT: ushll v5.4s, v1.4h, #0
; CHECK-GI-NEXT: ushll2 v1.4s, v1.8h, #0
; CHECK-GI-NEXT: ushll v6.4s, v2.4h, #0
; CHECK-GI-NEXT: ushll2 v2.4s, v2.8h, #0
; CHECK-GI-NEXT: ushll v7.4s, v3.4h, #0
; CHECK-GI-NEXT: ushll2 v3.4s, v3.8h, #0
; CHECK-GI-NEXT: add v0.4s, v4.4s, v0.4s
; CHECK-GI-NEXT: add v1.4s, v5.4s, v1.4s
; CHECK-GI-NEXT: add v2.4s, v6.4s, v2.4s
; CHECK-GI-NEXT: add v3.4s, v7.4s, v3.4s
; CHECK-GI-NEXT: uaddw2 v0.4s, v4.4s, v0.8h
; CHECK-GI-NEXT: uaddw2 v1.4s, v5.4s, v1.8h
; CHECK-GI-NEXT: uaddw2 v2.4s, v6.4s, v2.8h
; CHECK-GI-NEXT: uaddw2 v3.4s, v7.4s, v3.8h
; CHECK-GI-NEXT: add v0.4s, v0.4s, v1.4s
; CHECK-GI-NEXT: add v1.4s, v2.4s, v3.4s
; CHECK-GI-NEXT: add v0.4s, v0.4s, v1.4s
Expand Down
65 changes: 10 additions & 55 deletions llvm/test/CodeGen/AIE/aie2/intrinsics-shufflevec.ll
Original file line number Diff line number Diff line change
Expand Up @@ -15,63 +15,18 @@ define <8 x i32> @test_extract_vector(<16 x i32> noundef %a, i32 noundef %idx) {
; CHECK-NEXT: nopa ; nopx // Delay Slot 5
; CHECK-NEXT: nop // Delay Slot 4
; CHECK-NEXT: nop // Delay Slot 3
; CHECK-NEXT: nop // Delay Slot 2
; CHECK-NEXT: mov r8, r16 // Delay Slot 1
; CHECK-NEXT: vmov x0, x2 // Delay Slot 2
; CHECK-NEXT: nop // Delay Slot 1
; CHECK-NEXT: // %bb.1: // %if.end
; CHECK-NEXT: mova r16, #8; nopb ; nopxm
; CHECK-NEXT: vextract.s32 r0, x2, r16
; CHECK-NEXT: mova r16, #9
; CHECK-NEXT: vextract.s32 r1, x2, r16
; CHECK-NEXT: mova r16, #10
; CHECK-NEXT: vextract.s32 r2, x2, r16
; CHECK-NEXT: mova r16, #11
; CHECK-NEXT: vextract.s32 r3, x2, r16
; CHECK-NEXT: mova r16, #12
; CHECK-NEXT: vextract.s32 r4, x2, r16
; CHECK-NEXT: mova r16, #13
; CHECK-NEXT: vextract.s32 r5, x2, r16
; CHECK-NEXT: mova r16, #15
; CHECK-NEXT: vextract.s32 r6, x2, r16
; CHECK-NEXT: mova r16, #14
; CHECK-NEXT: vextract.s32 r7, x2, r16
; CHECK-NEXT: vpush.lo.32 x0, r6, x0
; CHECK-NEXT: vpush.lo.32 x0, r7, x0
; CHECK-NEXT: vpush.lo.32 x0, r5, x0
; CHECK-NEXT: vpush.lo.32 x0, r4, x0
; CHECK-NEXT: ret lr
; CHECK-NEXT: vpush.lo.32 x0, r3, x0 // Delay Slot 5
; CHECK-NEXT: vpush.lo.32 x0, r2, x0 // Delay Slot 4
; CHECK-NEXT: vpush.lo.32 x0, r1, x0 // Delay Slot 3
; CHECK-NEXT: vpush.lo.32 x0, r0, x0 // Delay Slot 2
; CHECK-NEXT: mov r16, r8 // Delay Slot 1
; CHECK-NEXT: nopb ; nopa ; nops ; nopx ; vmov wl0, wh0; nopv
; CHECK-NEXT: .p2align 4
; CHECK-NEXT: .LBB0_2: // %if.then
; CHECK-NEXT: mova r16, #0; nopb ; nopxm
; CHECK-NEXT: vextract.s32 r0, x2, r16
; CHECK-NEXT: mova r16, #1
; CHECK-NEXT: vextract.s32 r1, x2, r16
; CHECK-NEXT: mova r16, #2
; CHECK-NEXT: vextract.s32 r2, x2, r16
; CHECK-NEXT: mova r16, #3
; CHECK-NEXT: vextract.s32 r3, x2, r16
; CHECK-NEXT: mova r16, #4
; CHECK-NEXT: vextract.s32 r4, x2, r16
; CHECK-NEXT: mova r16, #5
; CHECK-NEXT: vextract.s32 r5, x2, r16
; CHECK-NEXT: mova r16, #7
; CHECK-NEXT: vextract.s32 r6, x2, r16
; CHECK-NEXT: mova r16, #6
; CHECK-NEXT: vextract.s32 r7, x2, r16
; CHECK-NEXT: vpush.lo.32 x0, r6, x0
; CHECK-NEXT: vpush.lo.32 x0, r7, x0
; CHECK-NEXT: vpush.lo.32 x0, r5, x0
; CHECK-NEXT: vpush.lo.32 x0, r4, x0
; CHECK-NEXT: ret lr
; CHECK-NEXT: vpush.lo.32 x0, r3, x0 // Delay Slot 5
; CHECK-NEXT: vpush.lo.32 x0, r2, x0 // Delay Slot 4
; CHECK-NEXT: vpush.lo.32 x0, r1, x0 // Delay Slot 3
; CHECK-NEXT: vpush.lo.32 x0, r0, x0 // Delay Slot 2
; CHECK-NEXT: mov r16, r8 // Delay Slot 1
; CHECK-NEXT: .LBB0_2: // %return
; CHECK-NEXT: nopa ; ret lr
; CHECK-NEXT: nop // Delay Slot 5
; CHECK-NEXT: nop // Delay Slot 4
; CHECK-NEXT: nop // Delay Slot 3
; CHECK-NEXT: nop // Delay Slot 2
; CHECK-NEXT: nop // Delay Slot 1
entry:
%cmp = icmp eq i32 %idx, 0
br i1 %cmp, label %if.then, label %if.end
Expand Down

0 comments on commit 18efe7c

Please sign in to comment.