diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h index b2132562ac3f..5049ac071940 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h @@ -27,6 +27,7 @@ #include "llvm/CodeGenTypes/LowLevelType.h" #include "llvm/IR/InstrTypes.h" #include +#include namespace llvm { @@ -256,8 +257,21 @@ class CombinerHelper { /// concat_vectors. /// /// \pre MI.getOpcode() == G_SHUFFLE_VECTOR. - bool matchCombineShuffleVector(MachineInstr &MI, - SmallVectorImpl &Ops); + using GeneratorType = std::function()>; + + bool matchCombineShuffleVector(MachineInstr &MI, GeneratorType Generator, + const size_t TargetDstSize); + + /// Create G_UNMERGE_VECTOR instructions until the source has reached a + /// target vector size. + /// + /// Requires that the destination fits evenly in the source register. It + /// allows you to pass which of the different destination sized slices + /// you require. + Register createUnmergeValue(MachineInstr &MI, const Register SrcReg, + const Register DstReg, uint8_t DestinationIndex, + const uint32_t Start, const uint32_t End); + /// Replace \p MI with a concat_vectors with \p Ops. void applyCombineShuffleVector(MachineInstr &MI, const ArrayRef Ops); @@ -341,6 +355,15 @@ class CombinerHelper { applyCombineUnmergeMergeToPlainValues(MachineInstr &MI, SmallVectorImpl &Operands); + /// Transform G_SHUFFLE_VECTOR(G_MERGE ty X Y Z) -> G_MERGE ty X,Y,Z + bool + matchCombineShuffleVectorBuildVector(MachineInstr &MI, + SmallVectorImpl &Operands); + + void + applyCombineShuffleVectorBuildVector(MachineInstr &MI, + SmallVectorImpl &Operands); + /// Transform G_UNMERGE Constant -> Constant1, Constant2, ... bool matchCombineUnmergeConstant(MachineInstr &MI, SmallVectorImpl &Csts); diff --git a/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h b/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h index 25e47114e4a3..7f945720829d 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h @@ -4,6 +4,9 @@ // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // +// Modifications (c) Copyright 2024 Advanced Micro Devices, Inc. or its +// affiliates +// //===----------------------------------------------------------------------===// /// \file /// Declares convenience wrapper classes for interpreting MachineInstr instances @@ -240,6 +243,22 @@ class GUnmerge : public GenericMachineInstr { } }; +/// Represents a G_SHUFFLE_VECTOR. +class GShuffleVector : public GenericMachineInstr { +public: + /// Returns the number of source registers. + unsigned getNumSources() const { return getNumOperands() - 2; } + /// Returns the I'th source register. + Register getSourceReg(unsigned I) const { + assert(I + 1 <= getNumSources()); + return getReg(I + 1); + } + + static bool classof(const MachineInstr *MI) { + return MI->getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR; + } +}; + /// Represents G_BUILD_VECTOR, G_CONCAT_VECTORS or G_MERGE_VALUES. /// All these have the common property of generating a single value from /// multiple sources. diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td index 3c8d968c2764..3cb9d6529c28 100644 --- a/llvm/include/llvm/Target/GlobalISel/Combine.td +++ b/llvm/include/llvm/Target/GlobalISel/Combine.td @@ -756,7 +756,7 @@ def fneg_fneg_fold: GICombineRule < (apply (GIReplaceReg $dst, $src)) >; -// Fold (unmerge(merge x, y, z)) -> z, y, z. +// Fold (unmerge(merge x, y, z)) -> x, y, z. def unmerge_merge_matchinfo : GIDefMatchData<"SmallVector">; def unmerge_merge : GICombineRule< (defs root:$d, unmerge_merge_matchinfo:$info), @@ -765,6 +765,16 @@ def unmerge_merge : GICombineRule< (apply [{ Helper.applyCombineUnmergeMergeToPlainValues(*${d}, ${info}); }]) >; +// Fold (unmerge(merge x, y, z)) -> z, y, z. +def shufflevector_merge_matchinfo : GIDefMatchData<"SmallVector">; +def shufflevector_merge : GICombineRule< + (defs root:$d, shufflevector_merge_matchinfo:$info), + (match (wip_match_opcode G_SHUFFLE_VECTOR): $d, + [{ return Helper.matchCombineShuffleVectorBuildVector(*${d}, ${info}); }]), + (apply [{ Helper.applyCombineShuffleVectorBuildVector(*${d}, ${info}); }]) +>; + + // Fold merge(unmerge). def merge_unmerge : GICombineRule< (defs root:$d, register_matchinfo:$matchinfo), diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp index ec7ca5dc8e2b..6d38ed947e28 100644 --- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp @@ -13,6 +13,7 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallBitVector.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/Analysis/CmpInstAnalysis.h" #include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h" #include "llvm/CodeGen/GlobalISel/GISelKnownBits.h" @@ -27,6 +28,7 @@ #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/RegisterBankInfo.h" @@ -42,6 +44,8 @@ #include "llvm/Support/MathExtras.h" #include "llvm/Target/TargetMachine.h" #include +#include +#include #include #include @@ -371,7 +375,6 @@ void CombinerHelper::applyCombineShuffleConcat(MachineInstr &MI, SmallVector &Ops) { LLT SrcTy = MRI.getType(Ops[0]); Register UndefReg = 0; - for (unsigned i = 0; i < Ops.size(); i++) { if (Ops[i] == 0) { if (UndefReg == 0) @@ -384,17 +387,265 @@ void CombinerHelper::applyCombineShuffleConcat(MachineInstr &MI, MI.eraseFromParent(); } +// Create a stream from 0 to n with a specified number of steps +CombinerHelper::GeneratorType +adderGenerator(const int32_t From, const int32_t To, const int32_t StepSize) { + int32_t Counter = From; + return [Counter, To, StepSize]() mutable { + std::optional OldCount = std::optional(Counter); + Counter += StepSize; + if (OldCount == (To + StepSize)) + OldCount = {}; + return OldCount; + }; +} + +// Move to the next generator if it is exhausted allowing to chain generators +std::function()> concatGenerators( + SmallVector()>> Generators) { + auto *GeneratorIterator = Generators.begin(); + + return [GeneratorIterator, Generators]() mutable { + std::optional GenValue = (*GeneratorIterator)(); + if (!GenValue.has_value() && GeneratorIterator != Generators.end()) { + GeneratorIterator++; + GenValue = (*GeneratorIterator)(); + } + return GenValue; + }; +} + +Register CombinerHelper::createUnmergeValue( + MachineInstr &MI, const Register SrcReg, const Register DstReg, + const uint8_t DestinationIndex, const uint32_t Start, const uint32_t End) { + Builder.setInsertPt(*MI.getParent(), MI); + const LLT DstTy = MRI.getType(DstReg); + const LLT SrcTy = MRI.getType(SrcReg); + assert((DstTy.isScalar() || + (SrcTy.getNumElements() % DstTy.getNumElements()) == 0) && + "destination vector must divide source cleanly"); + + const unsigned HalfElements = SrcTy.getNumElements() / 2; + const LLT ScalarTy = SrcTy.getScalarType(); + const LLT HalfSizeTy = (HalfElements == 1) + ? ScalarTy + : LLT::fixed_vector(HalfElements, ScalarTy); + const Register TmpReg = MRI.createGenericVirtualRegister(HalfSizeTy); + Register TargetReg = DstReg; + if (DstTy != HalfSizeTy) { + TargetReg = MRI.createGenericVirtualRegister(HalfSizeTy); + } + + // Each destination fits n times into the source and each iteration we + // exactly half the source. Therefore we need to pick on which side we want + // to iterate on. + const uint32_t DstNumElements = + DstTy.isVector() ? DstTy.getNumElements() : 1; + const uint32_t HalfWay = Start + ((End - Start) / 2); + const uint32_t Position = DestinationIndex * DstNumElements; + + uint32_t NextStart, NextEnd; + if (Position < HalfWay) { + Builder.buildInstr(TargetOpcode::G_UNMERGE_VALUES, {TargetReg, TmpReg}, + {SrcReg}); + NextStart = Start; + NextEnd = HalfWay; + } else { + Builder.buildInstr(TargetOpcode::G_UNMERGE_VALUES, {TmpReg, TargetReg}, + {SrcReg}); + NextStart = HalfWay; + NextEnd = End; + } + + if (HalfSizeTy.isVector() && DstTy != HalfSizeTy) + return createUnmergeValue(MI, TargetReg, DstReg, DestinationIndex, + NextStart, NextEnd); + + return DstReg; +} + bool CombinerHelper::tryCombineShuffleVector(MachineInstr &MI) { + const Register DstReg = MI.getOperand(0).getReg(); + const Register SrcReg1 = MI.getOperand(1).getReg(); + const Register SrcReg2 = MI.getOperand(2).getReg(); + + const LLT DstTy = MRI.getType(DstReg); + const LLT SrcTy = MRI.getType(MI.getOperand(1).getReg()); + + const unsigned DstNumElts = DstTy.isVector() ? DstTy.getNumElements() : 1; + const unsigned SrcNumElts = SrcTy.isVector() ? SrcTy.getNumElements() : 1; + + // This test is a bit silly, but it is required because some tests rely on + // the legalizer changing the type of the shufflevector. + if (DstTy.getScalarSizeInBits() == 1) + return false; + + // {1, 2, ..., n} -> G_CONCAT_VECTOR + // Turns a shuffle vector that only increments into a concat vector + // instruction + GeneratorType CountUp = adderGenerator(0, DstNumElts - 1, 1); SmallVector Ops; - if (matchCombineShuffleVector(MI, Ops)) { + + if (matchCombineShuffleVector(MI, CountUp, 2 * SrcNumElts)) { + // The shuffle is concatenating multiple vectors together. + // Collect the different operands for that. + Register UndefReg; + const Register Src1 = MI.getOperand(1).getReg(); + const Register Src2 = MI.getOperand(2).getReg(); + const ArrayRef Mask = MI.getOperand(3).getShuffleMask(); + + // The destination can be longer than the source, so we separate them into + // equal blocks and check them separately to see if one of the blocks can be + // copied whole. + unsigned NumConcat = DstNumElts / SrcNumElts; + unsigned Index = 0; + for (unsigned Concat = 0; Concat < NumConcat; Concat++) { + unsigned Target = (Concat + 1) * SrcNumElts; + while (Index < Target) { + int MaskElt = Mask[Index]; + if (MaskElt >= 0) { + Ops.push_back((MaskElt < (int)SrcNumElts) ? Src1 : Src2); + break; + } + Index++; + } + + if (Index == Target) { + if (!UndefReg) { + Builder.setInsertPt(*MI.getParent(), MI); + UndefReg = Builder.buildUndef(SrcTy).getReg(0); + } + Ops.push_back(UndefReg); + } + + Index = Target; + } + applyCombineShuffleVector(MI, Ops); return true; } + + // {1, 2, ..., |DstVector|} -> G_UNMERGE_VALUES + // Extracts the first chunk of the same size of the destination vector from + // the source + GeneratorType FirstQuarter = adderGenerator(0, DstNumElts - 1, 1); + if (matchCombineShuffleVector(MI, FirstQuarter, DstNumElts - 1)) { + // This optimization does not work if the target type is not a power of two, + // this can happen in some backends that support uneven vector types. We + // also need to make sure that the vector can be split into two. + if (SrcTy == DstTy || ((SrcNumElts / 2) % 2) != 0 || + SrcNumElts % DstNumElts != 0) + return false; + ArrayRef Mask = MI.getOperand(3).getShuffleMask(); + const Register TargetReg = Mask[0] < (int)SrcNumElts ? SrcReg1 : SrcReg2; + createUnmergeValue(MI, TargetReg, DstReg, 0, 0, SrcNumElts); + MI.eraseFromParent(); + return true; + } + + // {|DstVector|, |DstVector|+1, ..., 2 * |DstVector|} -> G_UNMERGE_VALUES + // Extracts the second chunk of the same size of the destination vector from + // the source + GeneratorType SecondQuarter = + adderGenerator(DstNumElts, (DstNumElts * 2) - 1, 1); + if (matchCombineShuffleVector(MI, SecondQuarter, DstNumElts - 1)) { + if (((SrcNumElts / 2) % 2) != 0 || SrcNumElts % DstNumElts != 0) + return false; + ArrayRef Mask = MI.getOperand(3).getShuffleMask(); + const Register TargetReg = Mask[0] < (int)SrcNumElts ? SrcReg1 : SrcReg2; + createUnmergeValue(MI, TargetReg, DstReg, 1, 0, SrcNumElts); + MI.eraseFromParent(); + return true; + } + + // After this point, it is assumed our shufflevectors work on vectors that can + // be splint into two + if ((DstNumElts % 2) != 0) + return false; + + // {1, 2, ..., n/4, n/2, n/2+1, .... 3n/4} -> G_UNMERGE_VALUES + // Take the first halfs of the two vectors and concatenate them into one + // vector. + GeneratorType FirstEightA = adderGenerator(0, (DstNumElts / 2) - 1, 1); + GeneratorType FirstEightB = + adderGenerator(DstNumElts, DstNumElts + (DstNumElts / 2) - 1, 1); + + GeneratorType FirstAndThird = + concatGenerators(SmallVector{FirstEightA, FirstEightB}); + if (matchCombineShuffleVector(MI, FirstAndThird, (DstNumElts / 2) - 1)) { + if (DstNumElts <= 2) + return false; + const Register DstReg = MI.getOperand(0).getReg(); + const LLT HalfSrcTy = + LLT::fixed_vector(SrcNumElts / 2, SrcTy.getScalarType()); + const Register HalfOfA = createUnmergeValue( + MI, MI.getOperand(1).getReg(), + MRI.createGenericVirtualRegister(HalfSrcTy), 0, 0, SrcNumElts); + const Register HalfOfB = createUnmergeValue( + MI, MI.getOperand(2).getReg(), + MRI.createGenericVirtualRegister(HalfSrcTy), 0, 0, SrcNumElts); + + const ArrayRef Mask = MI.getOperand(3).getShuffleMask(); + if (Mask[0] <= 0) { + Builder.buildMergeLikeInstr(DstReg, {HalfOfA, HalfOfB}); + } else { + Builder.buildMergeLikeInstr(DstReg, {HalfOfB, HalfOfA}); + } + + MI.eraseFromParent(); + return true; + } + + // {n/2, n/2+1, ..., n, 0, 1, ..., n/2-1} + GeneratorType FirstHalf = adderGenerator(0, SrcNumElts / 2, 1); + GeneratorType SecondHalf = adderGenerator(SrcNumElts / 2, SrcNumElts, 1); + GeneratorType Reverse = + concatGenerators(SmallVector{FirstHalf, SecondHalf}); + + if (matchCombineShuffleVector(MI, Reverse, SrcNumElts)) { + // The shuffle is concatenating multiple vectors together. + // Collect the different operands for that. + Register UndefReg; + const Register Src1 = MI.getOperand(1).getReg(); + const Register Src2 = MI.getOperand(2).getReg(); + const ArrayRef Mask = MI.getOperand(3).getShuffleMask(); + + // The destination can be longer than the source, so we separate them into + // equal blocks and check them separately to see if one of the blocks can be + // copied whole. + unsigned NumConcat = DstNumElts / SrcNumElts; + unsigned Index = 0; + for (unsigned Concat = 0; Concat < NumConcat; Concat++) { + unsigned Target = (Concat + 1) * SrcNumElts; + while (Index < Target) { + int MaskElt = Mask[Index]; + if (MaskElt >= 0) { + Ops.push_back((MaskElt < (int)SrcNumElts) ? Src1 : Src2); + break; + } + Index++; + } + + if (Index == Target) { + if (!UndefReg) { + Builder.setInsertPt(*MI.getParent(), MI); + UndefReg = Builder.buildUndef(SrcTy).getReg(0); + } + Ops.push_back(UndefReg); + } + + Index = Target; + } + applyCombineShuffleVector(MI, {Ops[1], Ops[0]}); + return true; + } + return false; } bool CombinerHelper::matchCombineShuffleVector(MachineInstr &MI, - SmallVectorImpl &Ops) { + GeneratorType Generator, + const size_t TargetDstSize) { assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR && "Invalid instruction kind"); LLT DstType = MRI.getType(MI.getOperand(0).getReg()); @@ -421,51 +672,24 @@ bool CombinerHelper::matchCombineShuffleVector(MachineInstr &MI, // // TODO: If the size between the source and destination don't match // we could still emit an extract vector element in that case. - if (DstNumElts < 2 * SrcNumElts && DstNumElts != 1) - return false; - - // Check that the shuffle mask can be broken evenly between the - // different sources. - if (DstNumElts % SrcNumElts != 0) + if ((DstNumElts < TargetDstSize) && DstNumElts != 1) return false; - // Mask length is a multiple of the source vector length. - // Check if the shuffle is some kind of concatenation of the input - // vectors. - unsigned NumConcat = DstNumElts / SrcNumElts; - SmallVector ConcatSrcs(NumConcat, -1); ArrayRef Mask = MI.getOperand(3).getShuffleMask(); for (unsigned i = 0; i != DstNumElts; ++i) { int Idx = Mask[i]; + const int32_t ShiftIndex = Generator().value_or(-1); + // Undef value. - if (Idx < 0) + if (Idx < 0 || ShiftIndex < 0) continue; - // Ensure the indices in each SrcType sized piece are sequential and that + + // Ensure the indices in each SrcType sized piece are seqential and that // the same source is used for the whole piece. - if ((Idx % SrcNumElts != (i % SrcNumElts)) || - (ConcatSrcs[i / SrcNumElts] >= 0 && - ConcatSrcs[i / SrcNumElts] != (int)(Idx / SrcNumElts))) + if ((Idx % SrcNumElts != (ShiftIndex % SrcNumElts))) return false; - // Remember which source this index came from. - ConcatSrcs[i / SrcNumElts] = Idx / SrcNumElts; } - // The shuffle is concatenating multiple vectors together. - // Collect the different operands for that. - Register UndefReg; - Register Src2 = MI.getOperand(2).getReg(); - for (auto Src : ConcatSrcs) { - if (Src < 0) { - if (!UndefReg) { - Builder.setInsertPt(*MI.getParent(), MI); - UndefReg = Builder.buildUndef(SrcType).getReg(0); - } - Ops.push_back(UndefReg); - } else if (Src == 0) - Ops.push_back(Src1); - else - Ops.push_back(Src2); - } return true; } @@ -2047,6 +2271,92 @@ static Register peekThroughBitcast(Register Reg, return Reg; } +bool CombinerHelper::matchCombineShuffleVectorBuildVector( + MachineInstr &MI, SmallVectorImpl &Operands) { + assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR && + "Expected a shuffle vector"); + auto &ShuffleVector = cast(MI); + Register SrcReg1 = peekThroughBitcast(ShuffleVector.getSourceReg(0), MRI); + Register SrcReg2 = peekThroughBitcast(ShuffleVector.getSourceReg(1), MRI); + + // Check if the Source registers are either merges or implicit definitions + auto *SrcInstr1 = getOpcodeDef(SrcReg1, MRI); + auto *SrcInstr2 = getOpcodeDef(SrcReg2, MRI); + auto *IsUndef1 = getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, SrcReg1, MRI); + auto *IsUndef2 = getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, SrcReg2, MRI); + + // Our inputs need to be either be build vectors or undefined, register inputs + // break this optimization. You could maybe do something clever were you + // concatenate vectors to save half a build vector. + if ((!SrcInstr1 && !IsUndef1) || (!SrcInstr2 && !IsUndef2)) + return false; + + if (IsUndef1 && IsUndef2) + return true; + + Register UndefReg; + if (SrcInstr1 || SrcInstr2) + UndefReg = MRI.createGenericVirtualRegister(MRI.getType(SrcReg1)); + + // Since our inputs to shufflevector must be of the same size, we can reuse + // the size of the defined register. + const unsigned NumElements = (SrcInstr1 != 0) ? SrcInstr1->getNumSources() + : SrcInstr2->getNumSources(); + for (unsigned Idx = 0; Idx < NumElements; ++Idx) { + const Register Elt = + (SrcInstr1 != 0) ? SrcInstr1->getSourceReg(Idx) : UndefReg; + Operands.push_back(Elt); + } + + for (unsigned Idx = 0; Idx < NumElements; ++Idx) { + const Register Elt = + (SrcInstr2 != 0) ? SrcInstr2->getSourceReg(Idx) : UndefReg; + Operands.push_back(Elt); + } + + return true; +} + +void CombinerHelper::applyCombineShuffleVectorBuildVector( + MachineInstr &MI, SmallVectorImpl &Operands) { + assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR && + "Expected a shuffle vector"); + auto &ShuffleVector = cast(MI); + const Register SrcReg1 = + peekThroughBitcast(ShuffleVector.getSourceReg(0), MRI); + const Register SrcReg2 = + peekThroughBitcast(ShuffleVector.getSourceReg(1), MRI); + + // Check if the Source registers are either merges or implicit definitions + const MachineInstr *IsUndef1 = + getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, SrcReg1, MRI); + const MachineInstr *IsUndef2 = + getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, SrcReg2, MRI); + + // If they're both undefined, we will just return an undefined as well. + if (IsUndef1 && IsUndef2) { + Builder.buildUndef(ShuffleVector.getReg(0)); + MI.eraseFromParent(); + return; + } + + const LLT SrcReg1Ty = MRI.getType(SrcReg1); + const ArrayRef ShiftMask = MI.getOperand(3).getShuffleMask(); + Register UndefReg; + SmallVector Arguments; + for (int Index : ShiftMask) { + if (!UndefReg) { + UndefReg = Builder.buildUndef(SrcReg1Ty.getScalarType()).getReg(0); + } + + const Register Argument = Index >= 0 ? Operands[Index] : UndefReg; + Arguments.push_back(Argument); + } + + Builder.buildBuildVector(ShuffleVector.getOperand(0), Arguments); + MI.eraseFromParent(); +} + bool CombinerHelper::matchCombineUnmergeMergeToPlainValues( MachineInstr &MI, SmallVectorImpl &Operands) { assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES && diff --git a/llvm/lib/Target/AArch64/AArch64Combine.td b/llvm/lib/Target/AArch64/AArch64Combine.td index 10cad6d19244..b1c8c8f84c8b 100644 --- a/llvm/lib/Target/AArch64/AArch64Combine.td +++ b/llvm/lib/Target/AArch64/AArch64Combine.td @@ -295,5 +295,5 @@ def AArch64PostLegalizerCombiner ptr_add_immed_chain, overlapping_and, split_store_zero_128, undef_combines, select_to_minmax, or_to_bsp, combine_concat_vector, - commute_constant_to_rhs]> { + commute_constant_to_rhs, shufflevector_merge]> { } diff --git a/llvm/lib/Target/AIE/AIE2InstrPatterns.td b/llvm/lib/Target/AIE/AIE2InstrPatterns.td index d363f40bab53..835bc7a86962 100644 --- a/llvm/lib/Target/AIE/AIE2InstrPatterns.td +++ b/llvm/lib/Target/AIE/AIE2InstrPatterns.td @@ -597,6 +597,18 @@ def : Pat<(int_aie2_vshuffle VEC512:$s1, VEC512:$s2, eR:$mod), def : Pat<(int_aie2_vshuffle_bf16 VEC512:$s1, VEC512:$s2, eR:$mod), (VSHUFFLE VEC512:$s1, VEC512:$s2, eR:$mod)>; +// VSHUFFLE generic opcodes translation +def vshuffle_node : SDNode<"AIE2::G_AIE_VSHUFFLE", + SDTypeProfile<1, 3, [SDTCisVec<1>, SDTCisVec<2>, SDTCisInt<3>]>>; +def : GINodeEquiv; + +def : Pat<(v16i32 (vshuffle_node (v16i32 VEC512:$v0), (v16i32 VEC512:$v1), (i32 eR:$mode))), + (VSHUFFLE VEC512:$v0, VEC512:$v1, i32:$mode)>; +def : Pat<(v32i16 (vshuffle_node (v32i16 VEC512:$v0), (v32i16 VEC512:$v1), (i32 eR:$mode))), + (VSHUFFLE VEC512:$v0, VEC512:$v1, i32:$mode)>; +def : Pat<(v64i8 (vshuffle_node (v64i8 VEC512:$v0), (v64i8 VEC512:$v1), (i32 eR:$mode))), + (VSHUFFLE VEC512:$v0, VEC512:$v1, i32:$mode)>; + // VSHIFT Intrinsic (shift/shiftx/shift_bytes) def : Pat<(int_aie2_vshift_I512_I512 VEC512:$s1, VEC512:$s2, 0x0, eR:$shift), (VSHIFT VEC512:$s1, VEC512:$s2, eR:$shift)>; diff --git a/llvm/lib/Target/AIE/AIE2PostLegalizerCustomCombiner.cpp b/llvm/lib/Target/AIE/AIE2PostLegalizerCustomCombiner.cpp index 19dc56ee3ac6..f809dfcff5fe 100644 --- a/llvm/lib/Target/AIE/AIE2PostLegalizerCustomCombiner.cpp +++ b/llvm/lib/Target/AIE/AIE2PostLegalizerCustomCombiner.cpp @@ -21,6 +21,8 @@ #include "llvm/CodeGen/GlobalISel/CombinerInfo.h" #include "llvm/CodeGen/GlobalISel/GIMatchTableExecutorImpl.h" #include "llvm/CodeGen/GlobalISel/GISelKnownBits.h" +#include "llvm/CodeGen/GlobalISel/LegalizerHelper.h" +#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/InitializePasses.h" diff --git a/llvm/lib/Target/AIE/AIE2PreLegalizerCombiner.cpp b/llvm/lib/Target/AIE/AIE2PreLegalizerCombiner.cpp index 37865902ad13..b195a51e9299 100644 --- a/llvm/lib/Target/AIE/AIE2PreLegalizerCombiner.cpp +++ b/llvm/lib/Target/AIE/AIE2PreLegalizerCombiner.cpp @@ -15,6 +15,7 @@ #include "AIE2TargetMachine.h" #include "AIECombinerHelper.h" +#include "MCTargetDesc/AIE2MCTargetDesc.h" #include "llvm/CodeGen/GlobalISel/CSEInfo.h" #include "llvm/CodeGen/GlobalISel/Combiner.h" #include "llvm/CodeGen/GlobalISel/CombinerHelper.h" @@ -22,6 +23,7 @@ #include "llvm/CodeGen/GlobalISel/GIMatchTableExecutorImpl.h" #include "llvm/CodeGen/GlobalISel/GISelKnownBits.h" #include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/TargetOpcodes.h" #include "llvm/IR/IntrinsicsAIE2.h" #include "llvm/InitializePasses.h" @@ -62,10 +64,9 @@ class AIE2PreLegalizerCombinerImpl : public Combiner { const LegalizerInfo *LI); static const char *getName() { return "AIE2PreLegalizerCombiner"; } - bool tryCombineAll(MachineInstr &I) const override; - bool tryCombineAllImpl(MachineInstr &I) const; + bool tryCombineShuffleVector(MachineInstr &MI) const; bool tryToCombineVectorShiftsByZero(MachineInstr &MI) const; @@ -149,6 +150,91 @@ bool AIE2PreLegalizerCombinerImpl::tryToCombineIntrinsic( return false; } +bool createVShuffle(MachineInstr &MI, const LLT TargetTy, const uint8_t Mode) { + MachineIRBuilder MIB(MI); + MachineRegisterInfo &MRI = *MIB.getMRI(); + const Register DstReg = MI.getOperand(0).getReg(); + const LLT DstTy = MRI.getType(DstReg); + + if (DstTy != TargetTy) + return false; + + const Register Src1 = MI.getOperand(1).getReg(); + const Register Src2 = MI.getOperand(2).getReg(); + const Register ShuffleModeReg = + MRI.createGenericVirtualRegister(LLT::scalar(32)); + + // This combiner only cares about the lower bits, so we can pad the + // vector to cover the case where two separate vectors are shuffled. + // together + MIB.buildConstant(ShuffleModeReg, Mode); + if (MRI.getType(Src1) == TargetTy) { + MIB.buildInstr(AIE2::G_AIE_VSHUFFLE, {DstReg}, + {Src1, Src2, ShuffleModeReg}); + } else { + // We reuse the same register since we ignore the high part of the vector + const Register TmpRegister = MRI.createGenericVirtualRegister(TargetTy); + MIB.buildConcatVectors(TmpRegister, {Src1, Src2}); + MIB.buildInstr(AIE2::G_AIE_VSHUFFLE, {DstReg}, + {TmpRegister, TmpRegister, ShuffleModeReg}); + } + + MI.eraseFromParent(); + return true; +} + +CombinerHelper::GeneratorType sectionGenerator(const int32_t From, + const int32_t To, + const int32_t Partitions, + const int32_t Increment) { + int32_t RoundSize = To / Partitions; + int32_t Index = 0; + int32_t Round = 0; + + return [=]() mutable { + int32_t CurrentGroup = (Index / Increment) % Partitions; + int32_t GroupFirstElement = CurrentGroup * RoundSize; + int32_t IndexInGroup = Index % Increment; + int32_t OffsetGroup = Round * Increment; + int32_t Next = GroupFirstElement + IndexInGroup + OffsetGroup; + if (++Index % (Partitions * Increment) == 0) + Round++; + + std::optional Return = std::optional(Next); + if (Index == To + 1) + Return = {}; + return Return; + }; +} + +bool AIE2PreLegalizerCombinerImpl::tryCombineShuffleVector( + MachineInstr &MI) const { + const Register DstReg = MI.getOperand(0).getReg(); + const LLT DstTy = MRI.getType(DstReg); + const LLT SrcTy = MRI.getType(MI.getOperand(1).getReg()); + const unsigned DstNumElts = DstTy.isVector() ? DstTy.getNumElements() : 1; + const unsigned SrcNumElts = SrcTy.isVector() ? SrcTy.getNumElements() : 1; + MachineIRBuilder MIB(MI); + MachineRegisterInfo &MRI = *MIB.getMRI(); + + if (Helper.tryCombineShuffleVector(MI)) + return true; + + const LLT V64S8 = LLT::fixed_vector(64, 8); + CombinerHelper::GeneratorType FourPartitions = + sectionGenerator(0, DstNumElts, 4, 1); + if (Helper.matchCombineShuffleVector(MI, FourPartitions, DstNumElts)) + return createVShuffle(MI, V64S8, 35); + + const LLT V32S16 = LLT::fixed_vector(32, 16); + CombinerHelper::GeneratorType FourPartitionByTwo = + sectionGenerator(0, DstNumElts, 4, 2); + if (Helper.matchCombineShuffleVector(MI, FourPartitionByTwo, DstNumElts)) + return createVShuffle(MI, V32S16, 29); + + return false; +} + bool AIE2PreLegalizerCombinerImpl::tryCombineAll(MachineInstr &MI) const { if (tryCombineAllImpl(MI)) return true; @@ -167,6 +253,9 @@ bool AIE2PreLegalizerCombinerImpl::tryCombineAll(MachineInstr &MI) const { case TargetOpcode::G_INTRINSIC: { return tryToCombineIntrinsic(MI); } + case TargetOpcode::G_SHUFFLE_VECTOR: { + return tryCombineShuffleVector(MI); + } default: break; } diff --git a/llvm/lib/Target/AIE/AIECombine.td b/llvm/lib/Target/AIE/AIECombine.td index 5b747b0e07fa..4f28cffbe2b9 100644 --- a/llvm/lib/Target/AIE/AIECombine.td +++ b/llvm/lib/Target/AIE/AIECombine.td @@ -62,8 +62,9 @@ def AIE2PreLegalizerCombiner all_combines, combine_S20NarrowingOpt, combine_globalval_offset, combine_extract_vector_elt_and_zsa_ext, - combine_splat_vector ]> { - let CombineAllMethodName = "tryCombineAllImpl"; + combine_splat_vector, + shufflevector_merge ]> { + let CombineAllMethodName = "tryCombineAllImpl"; } def AIE2PostLegalizerGenericCombiner diff --git a/llvm/lib/Target/AIE/AIEInstrGISel.td b/llvm/lib/Target/AIE/AIEInstrGISel.td index 69154fa83819..6eea5bf96782 100644 --- a/llvm/lib/Target/AIE/AIEInstrGISel.td +++ b/llvm/lib/Target/AIE/AIEInstrGISel.td @@ -96,6 +96,12 @@ def G_AIE_BROADCAST_VECTOR : AIEGenericInstruction { let hasSideEffects = false; } +def G_AIE_VSHUFFLE : AIEGenericInstruction { + let OutOperandList = (outs type0:$dst); + let InOperandList = (ins type0:$src1, type0:$src2, type1:$mode); + let hasSideEffects = false; +} + // Create a larger vector by padding undefined values in the high bits def G_AIE_PAD_VECTOR_UNDEF : AIEGenericInstruction { let OutOperandList = (outs type0:$dst); diff --git a/llvm/lib/Target/AIE/AIELegalizerInfo.cpp b/llvm/lib/Target/AIE/AIELegalizerInfo.cpp index 9f00e1a33a60..8c456c3e2f08 100644 --- a/llvm/lib/Target/AIE/AIELegalizerInfo.cpp +++ b/llvm/lib/Target/AIE/AIELegalizerInfo.cpp @@ -505,6 +505,15 @@ AIELegalizerInfo::AIELegalizerInfo(const AIEBaseSubtarget &ST) { .clampMaxNumElements(0, S16, 32) .clampMaxNumElements(0, S32, 16) .custom(); + + getActionDefinitionsBuilder(G_SHUFFLE_VECTOR) + .unsupportedIf(IsNotValidDestinationVector) + // Checks if the shuffle is "canonical", this enables additional actions + // in the LLVM combiner and can change shuffle vectors legalization + .lowerIf([=](const LegalityQuery &Query) { + return Query.Types[0] == Query.Types[1]; + }) + .lower(); } getActionDefinitionsBuilder(G_JUMP_TABLE).custom(); diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCombine.td b/llvm/lib/Target/AMDGPU/AMDGPUCombine.td index 9218760538dc..c8ad547dbd3d 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUCombine.td +++ b/llvm/lib/Target/AMDGPU/AMDGPUCombine.td @@ -154,7 +154,7 @@ def gfx8_combines : GICombineGroup<[expand_promoted_fmed3]>; def AMDGPUPreLegalizerCombiner: GICombiner< "AMDGPUPreLegalizerCombinerImpl", - [all_combines, clamp_i64_to_i16, foldable_fneg]> { + [all_combines, clamp_i64_to_i16, foldable_fneg, shufflevector_merge]> { let CombineAllMethodName = "tryCombineAllImpl"; } diff --git a/llvm/lib/Target/RISCV/RISCVCombine.td b/llvm/lib/Target/RISCV/RISCVCombine.td index 3a5afb1b075c..7a14822f6bf1 100644 --- a/llvm/lib/Target/RISCV/RISCVCombine.td +++ b/llvm/lib/Target/RISCV/RISCVCombine.td @@ -12,7 +12,7 @@ include "llvm/Target/GlobalISel/Combine.td" def RISCVPreLegalizerCombiner: GICombiner< - "RISCVPreLegalizerCombinerImpl", [all_combines]> { + "RISCVPreLegalizerCombinerImpl", [all_combines, shufflevector_merge]> { } def RISCVO0PreLegalizerCombiner: GICombiner< diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-shufflevector.mir b/llvm/test/CodeGen/AArch64/GlobalISel/combine-shufflevector.mir index 0de989f8be75..b515593e5c4a 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/combine-shufflevector.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-shufflevector.mir @@ -101,7 +101,9 @@ body: | ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x s8>) = G_IMPLICIT_DEF ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<16 x s8>) = G_CONCAT_VECTORS %a(<4 x s8>), %b(<4 x s8>), [[DEF]](<4 x s8>), [[DEF]](<4 x s8>) ; CHECK-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<16 x s8>) = G_CONCAT_VECTORS %c(<4 x s8>), [[DEF]](<4 x s8>), [[DEF]](<4 x s8>), [[DEF]](<4 x s8>) - ; CHECK-NEXT: %z:_(<16 x s8>) = G_SHUFFLE_VECTOR [[CONCAT_VECTORS]](<16 x s8>), [[CONCAT_VECTORS1]], shufflemask(0, undef, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, undef, undef, undef, undef) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<8 x s8>), [[UV1:%[0-9]+]]:_(<8 x s8>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<16 x s8>) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(<8 x s8>), [[UV3:%[0-9]+]]:_(<8 x s8>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<16 x s8>) + ; CHECK-NEXT: %z:_(<16 x s8>) = G_CONCAT_VECTORS [[UV]](<8 x s8>), [[UV2]](<8 x s8>) ; CHECK-NEXT: $q0 = COPY %z(<16 x s8>) ; CHECK-NEXT: RET_ReallyLR implicit $q0 %p1:_(p0) = COPY $x0 @@ -179,7 +181,9 @@ body: | ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x s8>) = G_IMPLICIT_DEF ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<16 x s8>) = G_CONCAT_VECTORS %a(<4 x s8>), %b(<4 x s8>), [[DEF]](<4 x s8>), [[DEF]](<4 x s8>) ; CHECK-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<16 x s8>) = G_CONCAT_VECTORS %c(<4 x s8>), [[DEF]](<4 x s8>), [[DEF]](<4 x s8>), [[DEF]](<4 x s8>) - ; CHECK-NEXT: %z:_(<16 x s8>) = G_SHUFFLE_VECTOR [[CONCAT_VECTORS]](<16 x s8>), [[CONCAT_VECTORS1]], shufflemask(undef, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, undef, undef, undef, undef) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<8 x s8>), [[UV1:%[0-9]+]]:_(<8 x s8>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<16 x s8>) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(<8 x s8>), [[UV3:%[0-9]+]]:_(<8 x s8>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<16 x s8>) + ; CHECK-NEXT: %z:_(<16 x s8>) = G_CONCAT_VECTORS [[UV]](<8 x s8>), [[UV2]](<8 x s8>) ; CHECK-NEXT: $q0 = COPY %z(<16 x s8>) ; CHECK-NEXT: RET_ReallyLR implicit $q0 %p1:_(p0) = COPY $x0 diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-shuffle-vector-widen-crash.ll b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-shuffle-vector-widen-crash.ll index f7efaeaa5070..df52db40b142 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-shuffle-vector-widen-crash.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-shuffle-vector-widen-crash.ll @@ -8,13 +8,18 @@ declare i32 @llvm.aarch64.neon.uaddv.i32.v4i32(<4 x i32>) #0 define i32 @bar() { ; CHECK-LABEL: bar: ; CHECK: ; %bb.0: ; %bb -; CHECK-NEXT: movi.2d v0, #0000000000000000 -; CHECK-NEXT: mov b1, v0[1] -; CHECK-NEXT: mov b2, v0[2] -; CHECK-NEXT: mov b3, v0[3] -; CHECK-NEXT: mov.h v0[1], v1[0] +; CHECK-NEXT: mov w8, #0 ; =0x0 +; CHECK-NEXT: fmov s0, w8 +; CHECK-NEXT: mov.16b v1, v0 +; CHECK-NEXT: mov.b v1[1], v0[0] +; CHECK-NEXT: mov.b v1[2], v0[0] +; CHECK-NEXT: mov.b v1[3], v0[0] +; CHECK-NEXT: mov b0, v1[1] +; CHECK-NEXT: mov b2, v1[2] +; CHECK-NEXT: mov b3, v1[3] +; CHECK-NEXT: mov.h v1[1], v0[0] ; CHECK-NEXT: mov.h v2[1], v3[0] -; CHECK-NEXT: ushll.4s v0, v0, #0 +; CHECK-NEXT: ushll.4s v0, v1, #0 ; CHECK-NEXT: ushll.4s v1, v2, #0 ; CHECK-NEXT: mov.d v0[1], v1[0] ; CHECK-NEXT: movi.4s v1, #1 diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-shuffle-vector.mir b/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-shuffle-vector.mir index 2c9ae5b06b62..58b1a5ec7602 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-shuffle-vector.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-shuffle-vector.mir @@ -270,8 +270,10 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $q1 - ; CHECK-NEXT: [[SHUF:%[0-9]+]]:_(<4 x s32>) = G_SHUFFLE_VECTOR [[COPY]](<4 x s32>), [[COPY1]], shufflemask(4, 5, 0, 1) - ; CHECK-NEXT: RET_ReallyLR implicit [[SHUF]](<4 x s32>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s32>), [[UV1:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(<2 x s32>), [[UV3:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[COPY1]](<4 x s32>) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s32>) = G_CONCAT_VECTORS [[UV2]](<2 x s32>), [[UV]](<2 x s32>) + ; CHECK-NEXT: RET_ReallyLR implicit [[CONCAT_VECTORS]](<4 x s32>) %0:_(<4 x s32>) = COPY $q0 %1:_(<4 x s32>) = COPY $q1 %2:_(<4 x s32>) = G_SHUFFLE_VECTOR %0(<4 x s32>), %1(<4 x s32>), shufflemask(4,5,0,1) diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-undef.mir b/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-undef.mir index 7db4526ea070..5bf0f6c6186b 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-undef.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-undef.mir @@ -226,7 +226,6 @@ body: | %0:_(<2 x s32>) = G_SHUFFLE_VECTOR %1(<2 x s32>), %2(<2 x s32>), shufflemask(0, 1) $d0 = COPY %0(<2 x s32>) RET_ReallyLR implicit $d0 - ... --- name: shl_undef_rhs @@ -305,7 +304,6 @@ alignment: 4 tracksRegLiveness: true body: | bb.0: - ; Optimize these to zero? ; CHECK-LABEL: name: ashr_undef_lhs ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 10 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF diff --git a/llvm/test/CodeGen/AArch64/arm64-neon-copy.ll b/llvm/test/CodeGen/AArch64/arm64-neon-copy.ll index 749d6071c98d..cab45c64398a 100644 --- a/llvm/test/CodeGen/AArch64/arm64-neon-copy.ll +++ b/llvm/test/CodeGen/AArch64/arm64-neon-copy.ll @@ -1776,19 +1776,10 @@ entry: } define <16 x i8> @test_concat_v16i8_v16i8_v16i8(<16 x i8> %x, <16 x i8> %y) #0 { -; CHECK-SD-LABEL: test_concat_v16i8_v16i8_v16i8: -; CHECK-SD: // %bb.0: // %entry -; CHECK-SD-NEXT: mov v0.d[1], v1.d[0] -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: test_concat_v16i8_v16i8_v16i8: -; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: adrp x8, .LCPI126_0 -; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 -; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI126_0] -; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 -; CHECK-GI-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b -; CHECK-GI-NEXT: ret +; CHECK-LABEL: test_concat_v16i8_v16i8_v16i8: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mov v0.d[1], v1.d[0] +; CHECK-NEXT: ret entry: %vecinit30 = shufflevector <16 x i8> %x, <16 x i8> %y, <16 x i32> ret <16 x i8> %vecinit30 @@ -1803,9 +1794,7 @@ define <16 x i8> @test_concat_v16i8_v8i8_v16i8(<8 x i8> %x, <16 x i8> %y) #0 { ; ; CHECK-GI-LABEL: test_concat_v16i8_v8i8_v16i8: ; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1 -; CHECK-GI-NEXT: adrp x8, .LCPI127_0 -; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 +; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-GI-NEXT: mov b2, v0.b[1] ; CHECK-GI-NEXT: mov b3, v0.b[2] ; CHECK-GI-NEXT: mov b4, v0.b[3] @@ -1814,14 +1803,13 @@ define <16 x i8> @test_concat_v16i8_v8i8_v16i8(<8 x i8> %x, <16 x i8> %y) #0 { ; CHECK-GI-NEXT: mov b7, v0.b[6] ; CHECK-GI-NEXT: mov b16, v0.b[7] ; CHECK-GI-NEXT: mov v0.b[1], v2.b[0] -; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI127_0] ; CHECK-GI-NEXT: mov v0.b[2], v3.b[0] ; CHECK-GI-NEXT: mov v0.b[3], v4.b[0] ; CHECK-GI-NEXT: mov v0.b[4], v5.b[0] ; CHECK-GI-NEXT: mov v0.b[5], v6.b[0] ; CHECK-GI-NEXT: mov v0.b[6], v7.b[0] ; CHECK-GI-NEXT: mov v0.b[7], v16.b[0] -; CHECK-GI-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b +; CHECK-GI-NEXT: mov v0.d[1], v1.d[0] ; CHECK-GI-NEXT: ret entry: %vecext = extractelement <8 x i8> %x, i32 0 @@ -1999,19 +1987,10 @@ entry: } define <8 x i16> @test_concat_v8i16_v8i16_v8i16(<8 x i16> %x, <8 x i16> %y) #0 { -; CHECK-SD-LABEL: test_concat_v8i16_v8i16_v8i16: -; CHECK-SD: // %bb.0: // %entry -; CHECK-SD-NEXT: mov v0.d[1], v1.d[0] -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: test_concat_v8i16_v8i16_v8i16: -; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: adrp x8, .LCPI130_0 -; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 -; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI130_0] -; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 -; CHECK-GI-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b -; CHECK-GI-NEXT: ret +; CHECK-LABEL: test_concat_v8i16_v8i16_v8i16: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mov v0.d[1], v1.d[0] +; CHECK-NEXT: ret entry: %vecinit14 = shufflevector <8 x i16> %x, <8 x i16> %y, <8 x i32> ret <8 x i16> %vecinit14 @@ -2026,17 +2005,14 @@ define <8 x i16> @test_concat_v8i16_v4i16_v8i16(<4 x i16> %x, <8 x i16> %y) #0 { ; ; CHECK-GI-LABEL: test_concat_v8i16_v4i16_v8i16: ; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1 -; CHECK-GI-NEXT: adrp x8, .LCPI131_0 -; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 +; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-GI-NEXT: mov h2, v0.h[1] ; CHECK-GI-NEXT: mov h3, v0.h[2] ; CHECK-GI-NEXT: mov h4, v0.h[3] ; CHECK-GI-NEXT: mov v0.h[1], v2.h[0] -; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI131_0] ; CHECK-GI-NEXT: mov v0.h[2], v3.h[0] ; CHECK-GI-NEXT: mov v0.h[3], v4.h[0] -; CHECK-GI-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b +; CHECK-GI-NEXT: mov v0.d[1], v1.d[0] ; CHECK-GI-NEXT: ret entry: %vecext = extractelement <4 x i16> %x, i32 0 @@ -2142,19 +2118,10 @@ entry: } define <4 x i32> @test_concat_v4i32_v4i32_v4i32(<4 x i32> %x, <4 x i32> %y) #0 { -; CHECK-SD-LABEL: test_concat_v4i32_v4i32_v4i32: -; CHECK-SD: // %bb.0: // %entry -; CHECK-SD-NEXT: mov v0.d[1], v1.d[0] -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: test_concat_v4i32_v4i32_v4i32: -; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: adrp x8, .LCPI134_0 -; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 -; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI134_0] -; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 -; CHECK-GI-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b -; CHECK-GI-NEXT: ret +; CHECK-LABEL: test_concat_v4i32_v4i32_v4i32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mov v0.d[1], v1.d[0] +; CHECK-NEXT: ret entry: %vecinit6 = shufflevector <4 x i32> %x, <4 x i32> %y, <4 x i32> ret <4 x i32> %vecinit6 @@ -2169,13 +2136,10 @@ define <4 x i32> @test_concat_v4i32_v2i32_v4i32(<2 x i32> %x, <4 x i32> %y) #0 { ; ; CHECK-GI-LABEL: test_concat_v4i32_v2i32_v4i32: ; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1 -; CHECK-GI-NEXT: adrp x8, .LCPI135_0 -; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 +; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-GI-NEXT: mov s2, v0.s[1] ; CHECK-GI-NEXT: mov v0.s[1], v2.s[0] -; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI135_0] -; CHECK-GI-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b +; CHECK-GI-NEXT: mov v0.d[1], v1.d[0] ; CHECK-GI-NEXT: ret entry: %vecext = extractelement <2 x i32> %x, i32 0 diff --git a/llvm/test/CodeGen/AArch64/ext-narrow-index.ll b/llvm/test/CodeGen/AArch64/ext-narrow-index.ll index 2c5d33da93c8..db8250db4320 100644 --- a/llvm/test/CodeGen/AArch64/ext-narrow-index.ll +++ b/llvm/test/CodeGen/AArch64/ext-narrow-index.ll @@ -42,8 +42,7 @@ define <8 x i8> @i8_off8(<16 x i8> %arg1, <16 x i8> %arg2) { ; ; CHECK-GISEL-LABEL: i8_off8: ; CHECK-GISEL: // %bb.0: // %entry -; CHECK-GISEL-NEXT: ext v0.16b, v0.16b, v1.16b, #8 -; CHECK-GISEL-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-GISEL-NEXT: mov d0, v0.d[1] ; CHECK-GISEL-NEXT: ret entry: %shuffle = shufflevector <16 x i8> %arg1, <16 x i8> %arg2, <8 x i32> @@ -254,9 +253,7 @@ define <8 x i8> @i8_zero_off8(<16 x i8> %arg1) { ; ; CHECK-GISEL-LABEL: i8_zero_off8: ; CHECK-GISEL: // %bb.0: // %entry -; CHECK-GISEL-NEXT: movi v1.2d, #0000000000000000 -; CHECK-GISEL-NEXT: ext v0.16b, v0.16b, v1.16b, #8 -; CHECK-GISEL-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-GISEL-NEXT: mov d0, v0.d[1] ; CHECK-GISEL-NEXT: ret entry: %shuffle = shufflevector <16 x i8> %arg1, <16 x i8> zeroinitializer, <8 x i32> diff --git a/llvm/test/CodeGen/AArch64/shufflevector.ll b/llvm/test/CodeGen/AArch64/shufflevector.ll index b1131f287fe9..cfe5ec039ab9 100644 --- a/llvm/test/CodeGen/AArch64/shufflevector.ll +++ b/llvm/test/CodeGen/AArch64/shufflevector.ll @@ -210,24 +210,14 @@ define i32 @shufflevector_v4i8(<4 x i8> %a, <4 x i8> %b){ ; CHECK-GI-LABEL: shufflevector_v4i8: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1 ; CHECK-GI-NEXT: mov h2, v0.h[1] -; CHECK-GI-NEXT: mov h3, v1.h[1] -; CHECK-GI-NEXT: adrp x8, .LCPI15_0 -; CHECK-GI-NEXT: mov h4, v0.h[2] -; CHECK-GI-NEXT: mov h5, v0.h[3] -; CHECK-GI-NEXT: mov h6, v1.h[3] -; CHECK-GI-NEXT: mov v0.b[1], v2.b[0] -; CHECK-GI-NEXT: mov h2, v1.h[2] -; CHECK-GI-NEXT: mov v1.b[1], v3.b[0] -; CHECK-GI-NEXT: mov v0.b[2], v4.b[0] -; CHECK-GI-NEXT: mov v1.b[2], v2.b[0] -; CHECK-GI-NEXT: mov v0.b[3], v5.b[0] -; CHECK-GI-NEXT: mov v1.b[3], v6.b[0] -; CHECK-GI-NEXT: mov v0.d[1], v1.d[0] -; CHECK-GI-NEXT: ldr d1, [x8, :lo12:.LCPI15_0] -; CHECK-GI-NEXT: tbl v0.16b, { v0.16b }, v1.16b -; CHECK-GI-NEXT: fmov w0, s0 +; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1 +; CHECK-GI-NEXT: mov h0, v0.h[2] +; CHECK-GI-NEXT: mov v2.b[1], v0.b[0] +; CHECK-GI-NEXT: mov h0, v1.h[3] +; CHECK-GI-NEXT: mov v2.b[2], v1.b[0] +; CHECK-GI-NEXT: mov v2.b[3], v0.b[0] +; CHECK-GI-NEXT: fmov w0, s2 ; CHECK-GI-NEXT: ret %c = shufflevector <4 x i8> %a, <4 x i8> %b, <4 x i32> %d = bitcast <4 x i8> %c to i32 @@ -280,14 +270,8 @@ define i32 @shufflevector_v2i16(<2 x i16> %a, <2 x i16> %b){ ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1 -; CHECK-GI-NEXT: mov s2, v0.s[1] -; CHECK-GI-NEXT: mov s3, v1.s[1] -; CHECK-GI-NEXT: adrp x8, .LCPI17_0 -; CHECK-GI-NEXT: mov v0.h[1], v2.h[0] -; CHECK-GI-NEXT: mov v1.h[1], v3.h[0] -; CHECK-GI-NEXT: mov v0.d[1], v1.d[0] -; CHECK-GI-NEXT: ldr d1, [x8, :lo12:.LCPI17_0] -; CHECK-GI-NEXT: tbl v0.16b, { v0.16b }, v1.16b +; CHECK-GI-NEXT: mov s0, v0.s[1] +; CHECK-GI-NEXT: mov v0.h[1], v1.h[0] ; CHECK-GI-NEXT: fmov w0, s0 ; CHECK-GI-NEXT: ret %c = shufflevector <2 x i16> %a, <2 x i16> %b, <2 x i32> @@ -397,9 +381,12 @@ define i32 @shufflevector_v4i8_zeroes(<4 x i8> %a, <4 x i8> %b){ ; ; CHECK-GI-LABEL: shufflevector_v4i8_zeroes: ; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: fmov w8, s0 -; CHECK-GI-NEXT: dup v0.8b, w8 -; CHECK-GI-NEXT: fmov w0, s0 +; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-GI-NEXT: mov v1.16b, v0.16b +; CHECK-GI-NEXT: mov v1.b[1], v0.b[0] +; CHECK-GI-NEXT: mov v1.b[2], v0.b[0] +; CHECK-GI-NEXT: mov v1.b[3], v0.b[0] +; CHECK-GI-NEXT: fmov w0, s1 ; CHECK-GI-NEXT: ret %c = shufflevector <4 x i8> %a, <4 x i8> %b, <4 x i32> %d = bitcast <4 x i8> %c to i32 @@ -433,8 +420,8 @@ define i32 @shufflevector_v2i16_zeroes(<2 x i16> %a, <2 x i16> %b){ ; ; CHECK-GI-LABEL: shufflevector_v2i16_zeroes: ; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: fmov w8, s0 -; CHECK-GI-NEXT: dup v0.4h, w8 +; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-GI-NEXT: mov v0.h[1], v0.h[0] ; CHECK-GI-NEXT: fmov w0, s0 ; CHECK-GI-NEXT: ret %c = shufflevector <2 x i16> %a, <2 x i16> %b, <2 x i32> @@ -492,20 +479,11 @@ define <3 x i8> @shufflevector_v3i8(<3 x i8> %a, <3 x i8> %b) { ; ; CHECK-GI-LABEL: shufflevector_v3i8: ; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: fmov s0, w0 -; CHECK-GI-NEXT: fmov s1, w1 -; CHECK-GI-NEXT: adrp x8, .LCPI30_0 -; CHECK-GI-NEXT: fmov s2, w3 -; CHECK-GI-NEXT: fmov s3, w4 -; CHECK-GI-NEXT: mov v0.b[1], v1.b[0] +; CHECK-GI-NEXT: fmov s0, w1 ; CHECK-GI-NEXT: fmov s1, w2 -; CHECK-GI-NEXT: mov v2.b[1], v3.b[0] -; CHECK-GI-NEXT: fmov s3, w5 +; CHECK-GI-NEXT: mov v0.b[1], v1.b[0] +; CHECK-GI-NEXT: fmov s1, w4 ; CHECK-GI-NEXT: mov v0.b[2], v1.b[0] -; CHECK-GI-NEXT: ldr d1, [x8, :lo12:.LCPI30_0] -; CHECK-GI-NEXT: mov v2.b[2], v3.b[0] -; CHECK-GI-NEXT: mov v0.d[1], v2.d[0] -; CHECK-GI-NEXT: tbl v0.16b, { v0.16b }, v1.16b ; CHECK-GI-NEXT: mov b1, v0.b[1] ; CHECK-GI-NEXT: mov b2, v0.b[2] ; CHECK-GI-NEXT: fmov w0, s0 @@ -614,11 +592,14 @@ define <3 x i8> @shufflevector_v3i8_zeroes(<3 x i8> %a, <3 x i8> %b) { ; ; CHECK-GI-LABEL: shufflevector_v3i8_zeroes: ; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: dup v0.8b, w0 -; CHECK-GI-NEXT: mov b1, v0.b[1] -; CHECK-GI-NEXT: mov b2, v0.b[2] -; CHECK-GI-NEXT: fmov w0, s0 -; CHECK-GI-NEXT: fmov w1, s1 +; CHECK-GI-NEXT: fmov s0, w0 +; CHECK-GI-NEXT: mov v1.16b, v0.16b +; CHECK-GI-NEXT: mov v1.b[1], v0.b[0] +; CHECK-GI-NEXT: mov v1.b[2], v0.b[0] +; CHECK-GI-NEXT: mov b0, v1.b[1] +; CHECK-GI-NEXT: mov b2, v1.b[2] +; CHECK-GI-NEXT: fmov w0, s1 +; CHECK-GI-NEXT: fmov w1, s0 ; CHECK-GI-NEXT: fmov w2, s2 ; CHECK-GI-NEXT: ret %c = shufflevector <3 x i8> %a, <3 x i8> %b, <3 x i32> diff --git a/llvm/test/CodeGen/AArch64/vecreduce-add.ll b/llvm/test/CodeGen/AArch64/vecreduce-add.ll index 3254c5ebe9c6..42c68883351f 100644 --- a/llvm/test/CodeGen/AArch64/vecreduce-add.ll +++ b/llvm/test/CodeGen/AArch64/vecreduce-add.ll @@ -3744,17 +3744,13 @@ define i32 @add_pair_v8i16_v4i32_double_sext_zext_shuffle(<8 x i16> %ax, <8 x i1 ; CHECK-GI-LABEL: add_pair_v8i16_v4i32_double_sext_zext_shuffle: ; CHECK-GI: // %bb.0: // %entry ; CHECK-GI-NEXT: ushll v4.4s, v0.4h, #0 -; CHECK-GI-NEXT: ushll2 v0.4s, v0.8h, #0 ; CHECK-GI-NEXT: ushll v5.4s, v1.4h, #0 -; CHECK-GI-NEXT: ushll2 v1.4s, v1.8h, #0 ; CHECK-GI-NEXT: ushll v6.4s, v2.4h, #0 -; CHECK-GI-NEXT: ushll2 v2.4s, v2.8h, #0 ; CHECK-GI-NEXT: ushll v7.4s, v3.4h, #0 -; CHECK-GI-NEXT: ushll2 v3.4s, v3.8h, #0 -; CHECK-GI-NEXT: add v0.4s, v4.4s, v0.4s -; CHECK-GI-NEXT: add v1.4s, v5.4s, v1.4s -; CHECK-GI-NEXT: add v2.4s, v6.4s, v2.4s -; CHECK-GI-NEXT: add v3.4s, v7.4s, v3.4s +; CHECK-GI-NEXT: uaddw2 v0.4s, v4.4s, v0.8h +; CHECK-GI-NEXT: uaddw2 v1.4s, v5.4s, v1.8h +; CHECK-GI-NEXT: uaddw2 v2.4s, v6.4s, v2.8h +; CHECK-GI-NEXT: uaddw2 v3.4s, v7.4s, v3.8h ; CHECK-GI-NEXT: add v0.4s, v0.4s, v1.4s ; CHECK-GI-NEXT: add v1.4s, v2.4s, v3.4s ; CHECK-GI-NEXT: add v0.4s, v0.4s, v1.4s diff --git a/llvm/test/CodeGen/AIE/GlobalISel/xfail-legalize-shufflevector.mir b/llvm/test/CodeGen/AIE/GlobalISel/xfail-legalize-shufflevector.mir index 71cd84871cfc..9a03c03f8fa6 100644 --- a/llvm/test/CodeGen/AIE/GlobalISel/xfail-legalize-shufflevector.mir +++ b/llvm/test/CodeGen/AIE/GlobalISel/xfail-legalize-shufflevector.mir @@ -30,39 +30,6 @@ body: | PseudoRET implicit $lr ... ---- -name: f_32x8 -body: | - bb.0: - %1:_(<32 x s8>) = G_IMPLICIT_DEF - %2:_(p0) = G_IMPLICIT_DEF - %0:_(<32 x s8>) = G_SHUFFLE_VECTOR %1(<32 x s8>), %1, shufflemask(undef, undef, undef, undef, undef, undef, undef, undef, undef, undef, undef, undef, undef, undef, undef, undef, undef, undef, undef, undef, undef, undef, undef, undef, undef, undef, undef, undef, undef, undef, undef, undef) - G_STORE %0(<32 x s8>), %2(p0) :: (store (<32 x s8>) into `<32 x i8>* undef`, align 2) - PseudoRET implicit $lr -... - ---- -name: f_16x16 -body: | - bb.0: - %1:_(<16 x s16>) = G_IMPLICIT_DEF - %2:_(p0) = G_IMPLICIT_DEF - %0:_(<16 x s16>) = G_SHUFFLE_VECTOR %1(<16 x s16>), %1, shufflemask(undef, undef, undef, undef, undef, undef, undef, undef, undef, undef, undef, undef, undef, undef, undef, undef) - G_STORE %0(<16 x s16>), %2(p0) :: (store (<16 x s16>) into `<16 x i16>* undef`, align 2) - PseudoRET implicit $lr -... - ---- -name: f_32x16 -body: | - bb.0: - %1:_(<32 x s16>) = G_IMPLICIT_DEF - %2:_(p0) = G_IMPLICIT_DEF - %0:_(<32 x s16>) = G_SHUFFLE_VECTOR %1(<32 x s16>), %1, shufflemask(undef, undef, undef, undef, undef, undef, undef, undef, undef, undef, undef, undef, undef, undef, undef, undef, undef, undef, undef, undef, undef, undef, undef, undef, undef, undef, undef, undef, undef, undef, undef, undef) - G_STORE %0(<32 x s16>), %2(p0) :: (store (<32 x s16>) into `<32 x i16>* undef`, align 2) - PseudoRET implicit $lr -... - --- name: f_2x64 body: | @@ -72,4 +39,3 @@ body: | %0:_(<2 x s64>) = G_SHUFFLE_VECTOR %1(<2 x s64>), %1, shufflemask(undef, undef) G_STORE %0(<2 x s64>), %2(p0) :: (store (<2 x s64>) into `<2 x i64>* undef`, align 2) PseudoRET implicit $lr -... diff --git a/llvm/test/CodeGen/AIE/aie2/GlobalISel/inst-select-aie-vshuffle.mir b/llvm/test/CodeGen/AIE/aie2/GlobalISel/inst-select-aie-vshuffle.mir new file mode 100644 index 000000000000..489db1e15e7c --- /dev/null +++ b/llvm/test/CodeGen/AIE/aie2/GlobalISel/inst-select-aie-vshuffle.mir @@ -0,0 +1,83 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# +# This file is licensed under the Apache License v2.0 with LLVM Exceptions. +# See https://llvm.org/LICENSE.txt for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +# +# (c) Copyright 2023-2024 Advanced Micro Devices, Inc. or its affiliates +# +# RUN: llc -mtriple aie2 -run-pass=instruction-select %s -verify-machineinstrs -o - | FileCheck %s + +--- +name: vshuffle_32_m35 +legalized: true +regBankSelected: true +tracksRegLiveness: true +stack: + - { id: 0, name: "", size: 128, alignment: 32 } +body: | + bb.0.entry: + liveins: $x2 + ; CHECK-LABEL: name: vshuffle_32_m35 + ; CHECK: liveins: $x2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vec512 = COPY $x2 + ; CHECK-NEXT: [[MOV_RLC_imm10_pseudo:%[0-9]+]]:er = MOV_RLC_imm10_pseudo 29 + ; CHECK-NEXT: [[VSHUFFLE:%[0-9]+]]:vec512 = VSHUFFLE [[COPY]], [[COPY]], [[MOV_RLC_imm10_pseudo]] + ; CHECK-NEXT: $x0 = COPY [[VSHUFFLE]] + ; CHECK-NEXT: PseudoRET implicit $lr, implicit $x0 + %1:vregbank(<16 x s32>) = COPY $x2 + %2:gprregbank(s32) = G_CONSTANT i32 29 + %0:vregbank(<16 x s32>) = G_AIE_VSHUFFLE %1:vregbank, %1:vregbank, %2:gprregbank(s32) + $x0 = COPY %0:vregbank(<16 x s32>) + PseudoRET implicit $lr, implicit $x0 +... + +--- +name: vshuffle_16_m35 +legalized: true +regBankSelected: true +tracksRegLiveness: true +stack: + - { id: 0, name: "", size: 128, alignment: 32 } +body: | + bb.0.entry: + liveins: $x2 + ; CHECK-LABEL: name: vshuffle_16_m35 + ; CHECK: liveins: $x2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vec512 = COPY $x2 + ; CHECK-NEXT: [[MOV_RLC_imm10_pseudo:%[0-9]+]]:er = MOV_RLC_imm10_pseudo 29 + ; CHECK-NEXT: [[VSHUFFLE:%[0-9]+]]:vec512 = VSHUFFLE [[COPY]], [[COPY]], [[MOV_RLC_imm10_pseudo]] + ; CHECK-NEXT: $x0 = COPY [[VSHUFFLE]] + ; CHECK-NEXT: PseudoRET implicit $lr, implicit $x0 + %1:vregbank(<32 x s16>) = COPY $x2 + %2:gprregbank(s32) = G_CONSTANT i32 29 + %0:vregbank(<32 x s16>) = G_AIE_VSHUFFLE %1:vregbank, %1:vregbank, %2:gprregbank(s32) + $x0 = COPY %0:vregbank(<32 x s16>) + PseudoRET implicit $lr, implicit $x0 +... + +--- +name: vshuffle_8_m35 +legalized: true +regBankSelected: true +tracksRegLiveness: true +stack: + - { id: 0, name: "", size: 128, alignment: 32 } +body: | + bb.0.entry: + liveins: $x2 + ; CHECK-LABEL: name: vshuffle_8_m35 + ; CHECK: liveins: $x2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vec512 = COPY $x2 + ; CHECK-NEXT: [[MOV_RLC_imm10_pseudo:%[0-9]+]]:er = MOV_RLC_imm10_pseudo 29 + ; CHECK-NEXT: [[VSHUFFLE:%[0-9]+]]:vec512 = VSHUFFLE [[COPY]], [[COPY]], [[MOV_RLC_imm10_pseudo]] + ; CHECK-NEXT: $x0 = COPY [[VSHUFFLE]] + ; CHECK-NEXT: PseudoRET implicit $lr, implicit $x0 + %1:vregbank(<64 x s8>) = COPY $x2 + %2:gprregbank(s32) = G_CONSTANT i32 29 + %0:vregbank(<64 x s8>) = G_AIE_VSHUFFLE %1:vregbank, %1:vregbank, %2:gprregbank(s32) + $x0 = COPY %0:vregbank(<64 x s8>) + PseudoRET implicit $lr, implicit $x0 diff --git a/llvm/test/CodeGen/AIE/aie2/GlobalISel/legalize-shuffle-vector.mir b/llvm/test/CodeGen/AIE/aie2/GlobalISel/legalize-shuffle-vector.mir new file mode 100644 index 000000000000..b8a4a91b3c97 --- /dev/null +++ b/llvm/test/CodeGen/AIE/aie2/GlobalISel/legalize-shuffle-vector.mir @@ -0,0 +1,640 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# +# This file is licensed under the Apache License v2.0 with LLVM Exceptions. +# See https://llvm.org/LICENSE.txt for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +# +# (c) Copyright 2023-2024 Advanced Micro Devices, Inc. or its affiliates +# RUN: llc -mtriple aie2 -run-pass=legalizer %s -verify-machineinstrs -o - | FileCheck %s + +--- +name: test_shuffle_vec_256_32bit +body: | + bb.0: + liveins: $r6 + ; CHECK-LABEL: name: test_shuffle_vec_256_32bit + ; CHECK: liveins: $r6 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<8 x s32>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[AIE_SEXT_EXTRACT_VECTOR_ELT:%[0-9]+]]:_(s32) = G_AIE_SEXT_EXTRACT_VECTOR_ELT [[DEF]](<8 x s32>), [[C]](s32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[AIE_SEXT_EXTRACT_VECTOR_ELT1:%[0-9]+]]:_(s32) = G_AIE_SEXT_EXTRACT_VECTOR_ELT [[DEF]](<8 x s32>), [[C1]](s32) + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; CHECK-NEXT: [[AIE_SEXT_EXTRACT_VECTOR_ELT2:%[0-9]+]]:_(s32) = G_AIE_SEXT_EXTRACT_VECTOR_ELT [[DEF]](<8 x s32>), [[C2]](s32) + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 + ; CHECK-NEXT: [[AIE_SEXT_EXTRACT_VECTOR_ELT3:%[0-9]+]]:_(s32) = G_AIE_SEXT_EXTRACT_VECTOR_ELT [[DEF]](<8 x s32>), [[C3]](s32) + ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; CHECK-NEXT: [[AIE_SEXT_EXTRACT_VECTOR_ELT4:%[0-9]+]]:_(s32) = G_AIE_SEXT_EXTRACT_VECTOR_ELT [[DEF]](<8 x s32>), [[C4]](s32) + ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 5 + ; CHECK-NEXT: [[AIE_SEXT_EXTRACT_VECTOR_ELT5:%[0-9]+]]:_(s32) = G_AIE_SEXT_EXTRACT_VECTOR_ELT [[DEF]](<8 x s32>), [[C5]](s32) + ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 6 + ; CHECK-NEXT: [[AIE_SEXT_EXTRACT_VECTOR_ELT6:%[0-9]+]]:_(s32) = G_AIE_SEXT_EXTRACT_VECTOR_ELT [[DEF]](<8 x s32>), [[C6]](s32) + ; CHECK-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 7 + ; CHECK-NEXT: [[AIE_SEXT_EXTRACT_VECTOR_ELT7:%[0-9]+]]:_(s32) = G_AIE_SEXT_EXTRACT_VECTOR_ELT [[DEF]](<8 x s32>), [[C7]](s32) + ; CHECK-NEXT: [[AIE_SEXT_EXTRACT_VECTOR_ELT8:%[0-9]+]]:_(s32) = G_AIE_SEXT_EXTRACT_VECTOR_ELT [[DEF]](<8 x s32>), [[C]](s32) + ; CHECK-NEXT: [[AIE_SEXT_EXTRACT_VECTOR_ELT9:%[0-9]+]]:_(s32) = G_AIE_SEXT_EXTRACT_VECTOR_ELT [[DEF]](<8 x s32>), [[C1]](s32) + ; CHECK-NEXT: [[AIE_SEXT_EXTRACT_VECTOR_ELT10:%[0-9]+]]:_(s32) = G_AIE_SEXT_EXTRACT_VECTOR_ELT [[DEF]](<8 x s32>), [[C2]](s32) + ; CHECK-NEXT: [[AIE_SEXT_EXTRACT_VECTOR_ELT11:%[0-9]+]]:_(s32) = G_AIE_SEXT_EXTRACT_VECTOR_ELT [[DEF]](<8 x s32>), [[C3]](s32) + ; CHECK-NEXT: [[AIE_SEXT_EXTRACT_VECTOR_ELT12:%[0-9]+]]:_(s32) = G_AIE_SEXT_EXTRACT_VECTOR_ELT [[DEF]](<8 x s32>), [[C4]](s32) + ; CHECK-NEXT: [[AIE_SEXT_EXTRACT_VECTOR_ELT13:%[0-9]+]]:_(s32) = G_AIE_SEXT_EXTRACT_VECTOR_ELT [[DEF]](<8 x s32>), [[C5]](s32) + ; CHECK-NEXT: [[AIE_SEXT_EXTRACT_VECTOR_ELT14:%[0-9]+]]:_(s32) = G_AIE_SEXT_EXTRACT_VECTOR_ELT [[DEF]](<8 x s32>), [[C6]](s32) + ; CHECK-NEXT: [[AIE_SEXT_EXTRACT_VECTOR_ELT15:%[0-9]+]]:_(s32) = G_AIE_SEXT_EXTRACT_VECTOR_ELT [[DEF]](<8 x s32>), [[C7]](s32) + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<16 x s32>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[AIE_ADD_VECTOR_ELT_LEFT:%[0-9]+]]:_(<16 x s32>) = G_AIE_ADD_VECTOR_ELT_LEFT [[DEF1]], [[AIE_SEXT_EXTRACT_VECTOR_ELT15]](s32) + ; CHECK-NEXT: [[AIE_ADD_VECTOR_ELT_LEFT1:%[0-9]+]]:_(<16 x s32>) = G_AIE_ADD_VECTOR_ELT_LEFT [[AIE_ADD_VECTOR_ELT_LEFT]], [[AIE_SEXT_EXTRACT_VECTOR_ELT14]](s32) + ; CHECK-NEXT: [[AIE_ADD_VECTOR_ELT_LEFT2:%[0-9]+]]:_(<16 x s32>) = G_AIE_ADD_VECTOR_ELT_LEFT [[AIE_ADD_VECTOR_ELT_LEFT1]], [[AIE_SEXT_EXTRACT_VECTOR_ELT13]](s32) + ; CHECK-NEXT: [[AIE_ADD_VECTOR_ELT_LEFT3:%[0-9]+]]:_(<16 x s32>) = G_AIE_ADD_VECTOR_ELT_LEFT [[AIE_ADD_VECTOR_ELT_LEFT2]], [[AIE_SEXT_EXTRACT_VECTOR_ELT12]](s32) + ; CHECK-NEXT: [[AIE_ADD_VECTOR_ELT_LEFT4:%[0-9]+]]:_(<16 x s32>) = G_AIE_ADD_VECTOR_ELT_LEFT [[AIE_ADD_VECTOR_ELT_LEFT3]], [[AIE_SEXT_EXTRACT_VECTOR_ELT11]](s32) + ; CHECK-NEXT: [[AIE_ADD_VECTOR_ELT_LEFT5:%[0-9]+]]:_(<16 x s32>) = G_AIE_ADD_VECTOR_ELT_LEFT [[AIE_ADD_VECTOR_ELT_LEFT4]], [[AIE_SEXT_EXTRACT_VECTOR_ELT10]](s32) + ; CHECK-NEXT: [[AIE_ADD_VECTOR_ELT_LEFT6:%[0-9]+]]:_(<16 x s32>) = G_AIE_ADD_VECTOR_ELT_LEFT [[AIE_ADD_VECTOR_ELT_LEFT5]], [[AIE_SEXT_EXTRACT_VECTOR_ELT9]](s32) + ; CHECK-NEXT: [[AIE_ADD_VECTOR_ELT_LEFT7:%[0-9]+]]:_(<16 x s32>) = G_AIE_ADD_VECTOR_ELT_LEFT [[AIE_ADD_VECTOR_ELT_LEFT6]], [[AIE_SEXT_EXTRACT_VECTOR_ELT8]](s32) + ; CHECK-NEXT: [[AIE_ADD_VECTOR_ELT_LEFT8:%[0-9]+]]:_(<16 x s32>) = G_AIE_ADD_VECTOR_ELT_LEFT [[AIE_ADD_VECTOR_ELT_LEFT7]], [[AIE_SEXT_EXTRACT_VECTOR_ELT7]](s32) + ; CHECK-NEXT: [[AIE_ADD_VECTOR_ELT_LEFT9:%[0-9]+]]:_(<16 x s32>) = G_AIE_ADD_VECTOR_ELT_LEFT [[AIE_ADD_VECTOR_ELT_LEFT8]], [[AIE_SEXT_EXTRACT_VECTOR_ELT6]](s32) + ; CHECK-NEXT: [[AIE_ADD_VECTOR_ELT_LEFT10:%[0-9]+]]:_(<16 x s32>) = G_AIE_ADD_VECTOR_ELT_LEFT [[AIE_ADD_VECTOR_ELT_LEFT9]], [[AIE_SEXT_EXTRACT_VECTOR_ELT5]](s32) + ; CHECK-NEXT: [[AIE_ADD_VECTOR_ELT_LEFT11:%[0-9]+]]:_(<16 x s32>) = G_AIE_ADD_VECTOR_ELT_LEFT [[AIE_ADD_VECTOR_ELT_LEFT10]], [[AIE_SEXT_EXTRACT_VECTOR_ELT4]](s32) + ; CHECK-NEXT: [[AIE_ADD_VECTOR_ELT_LEFT12:%[0-9]+]]:_(<16 x s32>) = G_AIE_ADD_VECTOR_ELT_LEFT [[AIE_ADD_VECTOR_ELT_LEFT11]], [[AIE_SEXT_EXTRACT_VECTOR_ELT3]](s32) + ; CHECK-NEXT: [[AIE_ADD_VECTOR_ELT_LEFT13:%[0-9]+]]:_(<16 x s32>) = G_AIE_ADD_VECTOR_ELT_LEFT [[AIE_ADD_VECTOR_ELT_LEFT12]], [[AIE_SEXT_EXTRACT_VECTOR_ELT2]](s32) + ; CHECK-NEXT: [[AIE_ADD_VECTOR_ELT_LEFT14:%[0-9]+]]:_(<16 x s32>) = G_AIE_ADD_VECTOR_ELT_LEFT [[AIE_ADD_VECTOR_ELT_LEFT13]], [[AIE_SEXT_EXTRACT_VECTOR_ELT1]](s32) + ; CHECK-NEXT: [[AIE_ADD_VECTOR_ELT_LEFT15:%[0-9]+]]:_(<16 x s32>) = G_AIE_ADD_VECTOR_ELT_LEFT [[AIE_ADD_VECTOR_ELT_LEFT14]], [[AIE_SEXT_EXTRACT_VECTOR_ELT]](s32) + ; CHECK-NEXT: PseudoRET implicit $lr, implicit [[AIE_ADD_VECTOR_ELT_LEFT15]](<16 x s32>) + %0:_(<8 x s32>) = G_IMPLICIT_DEF + %1:_(<16 x s32>) = G_SHUFFLE_VECTOR %0(<8 x s32>), %0(<8 x s32>), shufflemask(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) + PseudoRET implicit $lr, implicit %1 +... + +--- +name: test_shuffle_vec_256_to_512_16bit +body: | + bb.0: + liveins: $r6 + ; CHECK-LABEL: name: test_shuffle_vec_256_to_512_16bit + ; CHECK: liveins: $r6 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<16 x s16>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<16 x s16>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[AIE_SEXT_EXTRACT_VECTOR_ELT:%[0-9]+]]:_(s32) = G_AIE_SEXT_EXTRACT_VECTOR_ELT [[DEF]](<16 x s16>), [[C]](s32) + ; CHECK-NEXT: [[ASSERT_SEXT:%[0-9]+]]:_(s32) = G_ASSERT_SEXT [[AIE_SEXT_EXTRACT_VECTOR_ELT]], 16 + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[AIE_SEXT_EXTRACT_VECTOR_ELT1:%[0-9]+]]:_(s32) = G_AIE_SEXT_EXTRACT_VECTOR_ELT [[DEF]](<16 x s16>), [[C1]](s32) + ; CHECK-NEXT: [[ASSERT_SEXT1:%[0-9]+]]:_(s32) = G_ASSERT_SEXT [[AIE_SEXT_EXTRACT_VECTOR_ELT1]], 16 + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; CHECK-NEXT: [[AIE_SEXT_EXTRACT_VECTOR_ELT2:%[0-9]+]]:_(s32) = G_AIE_SEXT_EXTRACT_VECTOR_ELT [[DEF]](<16 x s16>), [[C2]](s32) + ; CHECK-NEXT: [[ASSERT_SEXT2:%[0-9]+]]:_(s32) = G_ASSERT_SEXT [[AIE_SEXT_EXTRACT_VECTOR_ELT2]], 16 + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 + ; CHECK-NEXT: [[AIE_SEXT_EXTRACT_VECTOR_ELT3:%[0-9]+]]:_(s32) = G_AIE_SEXT_EXTRACT_VECTOR_ELT [[DEF]](<16 x s16>), [[C3]](s32) + ; CHECK-NEXT: [[ASSERT_SEXT3:%[0-9]+]]:_(s32) = G_ASSERT_SEXT [[AIE_SEXT_EXTRACT_VECTOR_ELT3]], 16 + ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; CHECK-NEXT: [[AIE_SEXT_EXTRACT_VECTOR_ELT4:%[0-9]+]]:_(s32) = G_AIE_SEXT_EXTRACT_VECTOR_ELT [[DEF]](<16 x s16>), [[C4]](s32) + ; CHECK-NEXT: [[ASSERT_SEXT4:%[0-9]+]]:_(s32) = G_ASSERT_SEXT [[AIE_SEXT_EXTRACT_VECTOR_ELT4]], 16 + ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 5 + ; CHECK-NEXT: [[AIE_SEXT_EXTRACT_VECTOR_ELT5:%[0-9]+]]:_(s32) = G_AIE_SEXT_EXTRACT_VECTOR_ELT [[DEF]](<16 x s16>), [[C5]](s32) + ; CHECK-NEXT: [[ASSERT_SEXT5:%[0-9]+]]:_(s32) = G_ASSERT_SEXT [[AIE_SEXT_EXTRACT_VECTOR_ELT5]], 16 + ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 6 + ; CHECK-NEXT: [[AIE_SEXT_EXTRACT_VECTOR_ELT6:%[0-9]+]]:_(s32) = G_AIE_SEXT_EXTRACT_VECTOR_ELT [[DEF]](<16 x s16>), [[C6]](s32) + ; CHECK-NEXT: [[ASSERT_SEXT6:%[0-9]+]]:_(s32) = G_ASSERT_SEXT [[AIE_SEXT_EXTRACT_VECTOR_ELT6]], 16 + ; CHECK-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 7 + ; CHECK-NEXT: [[AIE_SEXT_EXTRACT_VECTOR_ELT7:%[0-9]+]]:_(s32) = G_AIE_SEXT_EXTRACT_VECTOR_ELT [[DEF]](<16 x s16>), [[C7]](s32) + ; CHECK-NEXT: [[ASSERT_SEXT7:%[0-9]+]]:_(s32) = G_ASSERT_SEXT [[AIE_SEXT_EXTRACT_VECTOR_ELT7]], 16 + ; CHECK-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; CHECK-NEXT: [[AIE_SEXT_EXTRACT_VECTOR_ELT8:%[0-9]+]]:_(s32) = G_AIE_SEXT_EXTRACT_VECTOR_ELT [[DEF]](<16 x s16>), [[C8]](s32) + ; CHECK-NEXT: [[ASSERT_SEXT8:%[0-9]+]]:_(s32) = G_ASSERT_SEXT [[AIE_SEXT_EXTRACT_VECTOR_ELT8]], 16 + ; CHECK-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 9 + ; CHECK-NEXT: [[AIE_SEXT_EXTRACT_VECTOR_ELT9:%[0-9]+]]:_(s32) = G_AIE_SEXT_EXTRACT_VECTOR_ELT [[DEF]](<16 x s16>), [[C9]](s32) + ; CHECK-NEXT: [[ASSERT_SEXT9:%[0-9]+]]:_(s32) = G_ASSERT_SEXT [[AIE_SEXT_EXTRACT_VECTOR_ELT9]], 16 + ; CHECK-NEXT: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 + ; CHECK-NEXT: [[AIE_SEXT_EXTRACT_VECTOR_ELT10:%[0-9]+]]:_(s32) = G_AIE_SEXT_EXTRACT_VECTOR_ELT [[DEF]](<16 x s16>), [[C10]](s32) + ; CHECK-NEXT: [[ASSERT_SEXT10:%[0-9]+]]:_(s32) = G_ASSERT_SEXT [[AIE_SEXT_EXTRACT_VECTOR_ELT10]], 16 + ; CHECK-NEXT: [[C11:%[0-9]+]]:_(s32) = G_CONSTANT i32 11 + ; CHECK-NEXT: [[AIE_SEXT_EXTRACT_VECTOR_ELT11:%[0-9]+]]:_(s32) = G_AIE_SEXT_EXTRACT_VECTOR_ELT [[DEF]](<16 x s16>), [[C11]](s32) + ; CHECK-NEXT: [[ASSERT_SEXT11:%[0-9]+]]:_(s32) = G_ASSERT_SEXT [[AIE_SEXT_EXTRACT_VECTOR_ELT11]], 16 + ; CHECK-NEXT: [[C12:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 + ; CHECK-NEXT: [[AIE_SEXT_EXTRACT_VECTOR_ELT12:%[0-9]+]]:_(s32) = G_AIE_SEXT_EXTRACT_VECTOR_ELT [[DEF]](<16 x s16>), [[C12]](s32) + ; CHECK-NEXT: [[ASSERT_SEXT12:%[0-9]+]]:_(s32) = G_ASSERT_SEXT [[AIE_SEXT_EXTRACT_VECTOR_ELT12]], 16 + ; CHECK-NEXT: [[C13:%[0-9]+]]:_(s32) = G_CONSTANT i32 13 + ; CHECK-NEXT: [[AIE_SEXT_EXTRACT_VECTOR_ELT13:%[0-9]+]]:_(s32) = G_AIE_SEXT_EXTRACT_VECTOR_ELT [[DEF]](<16 x s16>), [[C13]](s32) + ; CHECK-NEXT: [[ASSERT_SEXT13:%[0-9]+]]:_(s32) = G_ASSERT_SEXT [[AIE_SEXT_EXTRACT_VECTOR_ELT13]], 16 + ; CHECK-NEXT: [[C14:%[0-9]+]]:_(s32) = G_CONSTANT i32 14 + ; CHECK-NEXT: [[AIE_SEXT_EXTRACT_VECTOR_ELT14:%[0-9]+]]:_(s32) = G_AIE_SEXT_EXTRACT_VECTOR_ELT [[DEF]](<16 x s16>), [[C14]](s32) + ; CHECK-NEXT: [[ASSERT_SEXT14:%[0-9]+]]:_(s32) = G_ASSERT_SEXT [[AIE_SEXT_EXTRACT_VECTOR_ELT14]], 16 + ; CHECK-NEXT: [[C15:%[0-9]+]]:_(s32) = G_CONSTANT i32 15 + ; CHECK-NEXT: [[AIE_SEXT_EXTRACT_VECTOR_ELT15:%[0-9]+]]:_(s32) = G_AIE_SEXT_EXTRACT_VECTOR_ELT [[DEF]](<16 x s16>), [[C15]](s32) + ; CHECK-NEXT: [[ASSERT_SEXT15:%[0-9]+]]:_(s32) = G_ASSERT_SEXT [[AIE_SEXT_EXTRACT_VECTOR_ELT15]], 16 + ; CHECK-NEXT: [[AIE_SEXT_EXTRACT_VECTOR_ELT16:%[0-9]+]]:_(s32) = G_AIE_SEXT_EXTRACT_VECTOR_ELT [[DEF1]](<16 x s16>), [[C]](s32) + ; CHECK-NEXT: [[ASSERT_SEXT16:%[0-9]+]]:_(s32) = G_ASSERT_SEXT [[AIE_SEXT_EXTRACT_VECTOR_ELT16]], 16 + ; CHECK-NEXT: [[AIE_SEXT_EXTRACT_VECTOR_ELT17:%[0-9]+]]:_(s32) = G_AIE_SEXT_EXTRACT_VECTOR_ELT [[DEF1]](<16 x s16>), [[C1]](s32) + ; CHECK-NEXT: [[ASSERT_SEXT17:%[0-9]+]]:_(s32) = G_ASSERT_SEXT [[AIE_SEXT_EXTRACT_VECTOR_ELT17]], 16 + ; CHECK-NEXT: [[AIE_SEXT_EXTRACT_VECTOR_ELT18:%[0-9]+]]:_(s32) = G_AIE_SEXT_EXTRACT_VECTOR_ELT [[DEF1]](<16 x s16>), [[C2]](s32) + ; CHECK-NEXT: [[ASSERT_SEXT18:%[0-9]+]]:_(s32) = G_ASSERT_SEXT [[AIE_SEXT_EXTRACT_VECTOR_ELT18]], 16 + ; CHECK-NEXT: [[AIE_SEXT_EXTRACT_VECTOR_ELT19:%[0-9]+]]:_(s32) = G_AIE_SEXT_EXTRACT_VECTOR_ELT [[DEF1]](<16 x s16>), [[C3]](s32) + ; CHECK-NEXT: [[ASSERT_SEXT19:%[0-9]+]]:_(s32) = G_ASSERT_SEXT [[AIE_SEXT_EXTRACT_VECTOR_ELT19]], 16 + ; CHECK-NEXT: [[AIE_SEXT_EXTRACT_VECTOR_ELT20:%[0-9]+]]:_(s32) = G_AIE_SEXT_EXTRACT_VECTOR_ELT [[DEF1]](<16 x s16>), [[C4]](s32) + ; CHECK-NEXT: [[ASSERT_SEXT20:%[0-9]+]]:_(s32) = G_ASSERT_SEXT [[AIE_SEXT_EXTRACT_VECTOR_ELT20]], 16 + ; CHECK-NEXT: [[AIE_SEXT_EXTRACT_VECTOR_ELT21:%[0-9]+]]:_(s32) = G_AIE_SEXT_EXTRACT_VECTOR_ELT [[DEF1]](<16 x s16>), [[C5]](s32) + ; CHECK-NEXT: [[ASSERT_SEXT21:%[0-9]+]]:_(s32) = G_ASSERT_SEXT [[AIE_SEXT_EXTRACT_VECTOR_ELT21]], 16 + ; CHECK-NEXT: [[AIE_SEXT_EXTRACT_VECTOR_ELT22:%[0-9]+]]:_(s32) = G_AIE_SEXT_EXTRACT_VECTOR_ELT [[DEF1]](<16 x s16>), [[C6]](s32) + ; CHECK-NEXT: [[ASSERT_SEXT22:%[0-9]+]]:_(s32) = G_ASSERT_SEXT [[AIE_SEXT_EXTRACT_VECTOR_ELT22]], 16 + ; CHECK-NEXT: [[AIE_SEXT_EXTRACT_VECTOR_ELT23:%[0-9]+]]:_(s32) = G_AIE_SEXT_EXTRACT_VECTOR_ELT [[DEF1]](<16 x s16>), [[C7]](s32) + ; CHECK-NEXT: [[ASSERT_SEXT23:%[0-9]+]]:_(s32) = G_ASSERT_SEXT [[AIE_SEXT_EXTRACT_VECTOR_ELT23]], 16 + ; CHECK-NEXT: [[AIE_SEXT_EXTRACT_VECTOR_ELT24:%[0-9]+]]:_(s32) = G_AIE_SEXT_EXTRACT_VECTOR_ELT [[DEF1]](<16 x s16>), [[C8]](s32) + ; CHECK-NEXT: [[ASSERT_SEXT24:%[0-9]+]]:_(s32) = G_ASSERT_SEXT [[AIE_SEXT_EXTRACT_VECTOR_ELT24]], 16 + ; CHECK-NEXT: [[AIE_SEXT_EXTRACT_VECTOR_ELT25:%[0-9]+]]:_(s32) = G_AIE_SEXT_EXTRACT_VECTOR_ELT [[DEF1]](<16 x s16>), [[C9]](s32) + ; CHECK-NEXT: [[ASSERT_SEXT25:%[0-9]+]]:_(s32) = G_ASSERT_SEXT [[AIE_SEXT_EXTRACT_VECTOR_ELT25]], 16 + ; CHECK-NEXT: [[AIE_SEXT_EXTRACT_VECTOR_ELT26:%[0-9]+]]:_(s32) = G_AIE_SEXT_EXTRACT_VECTOR_ELT [[DEF1]](<16 x s16>), [[C10]](s32) + ; CHECK-NEXT: [[ASSERT_SEXT26:%[0-9]+]]:_(s32) = G_ASSERT_SEXT [[AIE_SEXT_EXTRACT_VECTOR_ELT26]], 16 + ; CHECK-NEXT: [[AIE_SEXT_EXTRACT_VECTOR_ELT27:%[0-9]+]]:_(s32) = G_AIE_SEXT_EXTRACT_VECTOR_ELT [[DEF1]](<16 x s16>), [[C11]](s32) + ; CHECK-NEXT: [[ASSERT_SEXT27:%[0-9]+]]:_(s32) = G_ASSERT_SEXT [[AIE_SEXT_EXTRACT_VECTOR_ELT27]], 16 + ; CHECK-NEXT: [[AIE_SEXT_EXTRACT_VECTOR_ELT28:%[0-9]+]]:_(s32) = G_AIE_SEXT_EXTRACT_VECTOR_ELT [[DEF1]](<16 x s16>), [[C12]](s32) + ; CHECK-NEXT: [[ASSERT_SEXT28:%[0-9]+]]:_(s32) = G_ASSERT_SEXT [[AIE_SEXT_EXTRACT_VECTOR_ELT28]], 16 + ; CHECK-NEXT: [[AIE_SEXT_EXTRACT_VECTOR_ELT29:%[0-9]+]]:_(s32) = G_AIE_SEXT_EXTRACT_VECTOR_ELT [[DEF1]](<16 x s16>), [[C13]](s32) + ; CHECK-NEXT: [[ASSERT_SEXT29:%[0-9]+]]:_(s32) = G_ASSERT_SEXT [[AIE_SEXT_EXTRACT_VECTOR_ELT29]], 16 + ; CHECK-NEXT: [[AIE_SEXT_EXTRACT_VECTOR_ELT30:%[0-9]+]]:_(s32) = G_AIE_SEXT_EXTRACT_VECTOR_ELT [[DEF1]](<16 x s16>), [[C14]](s32) + ; CHECK-NEXT: [[ASSERT_SEXT30:%[0-9]+]]:_(s32) = G_ASSERT_SEXT [[AIE_SEXT_EXTRACT_VECTOR_ELT30]], 16 + ; CHECK-NEXT: [[AIE_SEXT_EXTRACT_VECTOR_ELT31:%[0-9]+]]:_(s32) = G_AIE_SEXT_EXTRACT_VECTOR_ELT [[DEF1]](<16 x s16>), [[C15]](s32) + ; CHECK-NEXT: [[ASSERT_SEXT31:%[0-9]+]]:_(s32) = G_ASSERT_SEXT [[AIE_SEXT_EXTRACT_VECTOR_ELT31]], 16 + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(<32 x s16>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[AIE_ADD_VECTOR_ELT_LEFT:%[0-9]+]]:_(<32 x s16>) = G_AIE_ADD_VECTOR_ELT_LEFT [[DEF2]], [[ASSERT_SEXT31]](s32) + ; CHECK-NEXT: [[AIE_ADD_VECTOR_ELT_LEFT1:%[0-9]+]]:_(<32 x s16>) = G_AIE_ADD_VECTOR_ELT_LEFT [[AIE_ADD_VECTOR_ELT_LEFT]], [[ASSERT_SEXT30]](s32) + ; CHECK-NEXT: [[AIE_ADD_VECTOR_ELT_LEFT2:%[0-9]+]]:_(<32 x s16>) = G_AIE_ADD_VECTOR_ELT_LEFT [[AIE_ADD_VECTOR_ELT_LEFT1]], [[ASSERT_SEXT29]](s32) + ; CHECK-NEXT: [[AIE_ADD_VECTOR_ELT_LEFT3:%[0-9]+]]:_(<32 x s16>) = G_AIE_ADD_VECTOR_ELT_LEFT [[AIE_ADD_VECTOR_ELT_LEFT2]], [[ASSERT_SEXT28]](s32) + ; CHECK-NEXT: [[AIE_ADD_VECTOR_ELT_LEFT4:%[0-9]+]]:_(<32 x s16>) = G_AIE_ADD_VECTOR_ELT_LEFT [[AIE_ADD_VECTOR_ELT_LEFT3]], [[ASSERT_SEXT27]](s32) + ; CHECK-NEXT: [[AIE_ADD_VECTOR_ELT_LEFT5:%[0-9]+]]:_(<32 x s16>) = G_AIE_ADD_VECTOR_ELT_LEFT [[AIE_ADD_VECTOR_ELT_LEFT4]], [[ASSERT_SEXT26]](s32) + ; CHECK-NEXT: [[AIE_ADD_VECTOR_ELT_LEFT6:%[0-9]+]]:_(<32 x s16>) = G_AIE_ADD_VECTOR_ELT_LEFT [[AIE_ADD_VECTOR_ELT_LEFT5]], [[ASSERT_SEXT25]](s32) + ; CHECK-NEXT: [[AIE_ADD_VECTOR_ELT_LEFT7:%[0-9]+]]:_(<32 x s16>) = G_AIE_ADD_VECTOR_ELT_LEFT [[AIE_ADD_VECTOR_ELT_LEFT6]], [[ASSERT_SEXT24]](s32) + ; CHECK-NEXT: [[AIE_ADD_VECTOR_ELT_LEFT8:%[0-9]+]]:_(<32 x s16>) = G_AIE_ADD_VECTOR_ELT_LEFT [[AIE_ADD_VECTOR_ELT_LEFT7]], [[ASSERT_SEXT23]](s32) + ; CHECK-NEXT: [[AIE_ADD_VECTOR_ELT_LEFT9:%[0-9]+]]:_(<32 x s16>) = G_AIE_ADD_VECTOR_ELT_LEFT [[AIE_ADD_VECTOR_ELT_LEFT8]], [[ASSERT_SEXT22]](s32) + ; CHECK-NEXT: [[AIE_ADD_VECTOR_ELT_LEFT10:%[0-9]+]]:_(<32 x s16>) = G_AIE_ADD_VECTOR_ELT_LEFT [[AIE_ADD_VECTOR_ELT_LEFT9]], [[ASSERT_SEXT21]](s32) + ; CHECK-NEXT: [[AIE_ADD_VECTOR_ELT_LEFT11:%[0-9]+]]:_(<32 x s16>) = G_AIE_ADD_VECTOR_ELT_LEFT [[AIE_ADD_VECTOR_ELT_LEFT10]], [[ASSERT_SEXT20]](s32) + ; CHECK-NEXT: [[AIE_ADD_VECTOR_ELT_LEFT12:%[0-9]+]]:_(<32 x s16>) = G_AIE_ADD_VECTOR_ELT_LEFT [[AIE_ADD_VECTOR_ELT_LEFT11]], [[ASSERT_SEXT19]](s32) + ; CHECK-NEXT: [[AIE_ADD_VECTOR_ELT_LEFT13:%[0-9]+]]:_(<32 x s16>) = G_AIE_ADD_VECTOR_ELT_LEFT [[AIE_ADD_VECTOR_ELT_LEFT12]], [[ASSERT_SEXT18]](s32) + ; CHECK-NEXT: [[AIE_ADD_VECTOR_ELT_LEFT14:%[0-9]+]]:_(<32 x s16>) = G_AIE_ADD_VECTOR_ELT_LEFT [[AIE_ADD_VECTOR_ELT_LEFT13]], [[ASSERT_SEXT17]](s32) + ; CHECK-NEXT: [[AIE_ADD_VECTOR_ELT_LEFT15:%[0-9]+]]:_(<32 x s16>) = G_AIE_ADD_VECTOR_ELT_LEFT [[AIE_ADD_VECTOR_ELT_LEFT14]], [[ASSERT_SEXT16]](s32) + ; CHECK-NEXT: [[AIE_ADD_VECTOR_ELT_LEFT16:%[0-9]+]]:_(<32 x s16>) = G_AIE_ADD_VECTOR_ELT_LEFT [[AIE_ADD_VECTOR_ELT_LEFT15]], [[ASSERT_SEXT15]](s32) + ; CHECK-NEXT: [[AIE_ADD_VECTOR_ELT_LEFT17:%[0-9]+]]:_(<32 x s16>) = G_AIE_ADD_VECTOR_ELT_LEFT [[AIE_ADD_VECTOR_ELT_LEFT16]], [[ASSERT_SEXT14]](s32) + ; CHECK-NEXT: [[AIE_ADD_VECTOR_ELT_LEFT18:%[0-9]+]]:_(<32 x s16>) = G_AIE_ADD_VECTOR_ELT_LEFT [[AIE_ADD_VECTOR_ELT_LEFT17]], [[ASSERT_SEXT13]](s32) + ; CHECK-NEXT: [[AIE_ADD_VECTOR_ELT_LEFT19:%[0-9]+]]:_(<32 x s16>) = G_AIE_ADD_VECTOR_ELT_LEFT [[AIE_ADD_VECTOR_ELT_LEFT18]], [[ASSERT_SEXT12]](s32) + ; CHECK-NEXT: [[AIE_ADD_VECTOR_ELT_LEFT20:%[0-9]+]]:_(<32 x s16>) = G_AIE_ADD_VECTOR_ELT_LEFT [[AIE_ADD_VECTOR_ELT_LEFT19]], [[ASSERT_SEXT11]](s32) + ; CHECK-NEXT: [[AIE_ADD_VECTOR_ELT_LEFT21:%[0-9]+]]:_(<32 x s16>) = G_AIE_ADD_VECTOR_ELT_LEFT [[AIE_ADD_VECTOR_ELT_LEFT20]], [[ASSERT_SEXT10]](s32) + ; CHECK-NEXT: [[AIE_ADD_VECTOR_ELT_LEFT22:%[0-9]+]]:_(<32 x s16>) = G_AIE_ADD_VECTOR_ELT_LEFT [[AIE_ADD_VECTOR_ELT_LEFT21]], [[ASSERT_SEXT9]](s32) + ; CHECK-NEXT: [[AIE_ADD_VECTOR_ELT_LEFT23:%[0-9]+]]:_(<32 x s16>) = G_AIE_ADD_VECTOR_ELT_LEFT [[AIE_ADD_VECTOR_ELT_LEFT22]], [[ASSERT_SEXT8]](s32) + ; CHECK-NEXT: [[AIE_ADD_VECTOR_ELT_LEFT24:%[0-9]+]]:_(<32 x s16>) = G_AIE_ADD_VECTOR_ELT_LEFT [[AIE_ADD_VECTOR_ELT_LEFT23]], [[ASSERT_SEXT7]](s32) + ; CHECK-NEXT: [[AIE_ADD_VECTOR_ELT_LEFT25:%[0-9]+]]:_(<32 x s16>) = G_AIE_ADD_VECTOR_ELT_LEFT [[AIE_ADD_VECTOR_ELT_LEFT24]], [[ASSERT_SEXT6]](s32) + ; CHECK-NEXT: [[AIE_ADD_VECTOR_ELT_LEFT26:%[0-9]+]]:_(<32 x s16>) = G_AIE_ADD_VECTOR_ELT_LEFT [[AIE_ADD_VECTOR_ELT_LEFT25]], [[ASSERT_SEXT5]](s32) + ; CHECK-NEXT: [[AIE_ADD_VECTOR_ELT_LEFT27:%[0-9]+]]:_(<32 x s16>) = G_AIE_ADD_VECTOR_ELT_LEFT [[AIE_ADD_VECTOR_ELT_LEFT26]], [[ASSERT_SEXT4]](s32) + ; CHECK-NEXT: [[AIE_ADD_VECTOR_ELT_LEFT28:%[0-9]+]]:_(<32 x s16>) = G_AIE_ADD_VECTOR_ELT_LEFT [[AIE_ADD_VECTOR_ELT_LEFT27]], [[ASSERT_SEXT3]](s32) + ; CHECK-NEXT: [[AIE_ADD_VECTOR_ELT_LEFT29:%[0-9]+]]:_(<32 x s16>) = G_AIE_ADD_VECTOR_ELT_LEFT [[AIE_ADD_VECTOR_ELT_LEFT28]], [[ASSERT_SEXT2]](s32) + ; CHECK-NEXT: [[AIE_ADD_VECTOR_ELT_LEFT30:%[0-9]+]]:_(<32 x s16>) = G_AIE_ADD_VECTOR_ELT_LEFT [[AIE_ADD_VECTOR_ELT_LEFT29]], [[ASSERT_SEXT1]](s32) + ; CHECK-NEXT: [[AIE_ADD_VECTOR_ELT_LEFT31:%[0-9]+]]:_(<32 x s16>) = G_AIE_ADD_VECTOR_ELT_LEFT [[AIE_ADD_VECTOR_ELT_LEFT30]], [[ASSERT_SEXT]](s32) + ; CHECK-NEXT: PseudoRET implicit $lr, implicit [[AIE_ADD_VECTOR_ELT_LEFT31]](<32 x s16>) + %0:_(<16 x s16>) = G_IMPLICIT_DEF + %1:_(<16 x s16>) = G_IMPLICIT_DEF + %2:_(<32 x s16>) = G_SHUFFLE_VECTOR %0(<16 x s16>), %1(<16 x s16>), shufflemask(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31) + PseudoRET implicit $lr, implicit %2 +... + +--- +name: test_shuffle_vec_256_to_512_8bit +body: | + bb.0: + liveins: $r6 + ; CHECK-LABEL: name: test_shuffle_vec_256_to_512_8bit + ; CHECK: liveins: $r6 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<32 x s8>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<32 x s8>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[AIE_SEXT_EXTRACT_VECTOR_ELT:%[0-9]+]]:_(s32) = G_AIE_SEXT_EXTRACT_VECTOR_ELT [[DEF]](<32 x s8>), [[C]](s32) + ; CHECK-NEXT: [[ASSERT_SEXT:%[0-9]+]]:_(s32) = G_ASSERT_SEXT [[AIE_SEXT_EXTRACT_VECTOR_ELT]], 8 + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[AIE_SEXT_EXTRACT_VECTOR_ELT1:%[0-9]+]]:_(s32) = G_AIE_SEXT_EXTRACT_VECTOR_ELT [[DEF]](<32 x s8>), [[C1]](s32) + ; CHECK-NEXT: [[ASSERT_SEXT1:%[0-9]+]]:_(s32) = G_ASSERT_SEXT [[AIE_SEXT_EXTRACT_VECTOR_ELT1]], 8 + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; CHECK-NEXT: [[AIE_SEXT_EXTRACT_VECTOR_ELT2:%[0-9]+]]:_(s32) = G_AIE_SEXT_EXTRACT_VECTOR_ELT [[DEF]](<32 x s8>), [[C2]](s32) + ; CHECK-NEXT: [[ASSERT_SEXT2:%[0-9]+]]:_(s32) = G_ASSERT_SEXT [[AIE_SEXT_EXTRACT_VECTOR_ELT2]], 8 + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 + ; CHECK-NEXT: [[AIE_SEXT_EXTRACT_VECTOR_ELT3:%[0-9]+]]:_(s32) = G_AIE_SEXT_EXTRACT_VECTOR_ELT [[DEF]](<32 x s8>), [[C3]](s32) + ; CHECK-NEXT: [[ASSERT_SEXT3:%[0-9]+]]:_(s32) = G_ASSERT_SEXT [[AIE_SEXT_EXTRACT_VECTOR_ELT3]], 8 + ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; CHECK-NEXT: [[AIE_SEXT_EXTRACT_VECTOR_ELT4:%[0-9]+]]:_(s32) = G_AIE_SEXT_EXTRACT_VECTOR_ELT [[DEF]](<32 x s8>), [[C4]](s32) + ; CHECK-NEXT: [[ASSERT_SEXT4:%[0-9]+]]:_(s32) = G_ASSERT_SEXT [[AIE_SEXT_EXTRACT_VECTOR_ELT4]], 8 + ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 5 + ; CHECK-NEXT: [[AIE_SEXT_EXTRACT_VECTOR_ELT5:%[0-9]+]]:_(s32) = G_AIE_SEXT_EXTRACT_VECTOR_ELT [[DEF]](<32 x s8>), [[C5]](s32) + ; CHECK-NEXT: [[ASSERT_SEXT5:%[0-9]+]]:_(s32) = G_ASSERT_SEXT [[AIE_SEXT_EXTRACT_VECTOR_ELT5]], 8 + ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 6 + ; CHECK-NEXT: [[AIE_SEXT_EXTRACT_VECTOR_ELT6:%[0-9]+]]:_(s32) = G_AIE_SEXT_EXTRACT_VECTOR_ELT [[DEF]](<32 x s8>), [[C6]](s32) + ; CHECK-NEXT: [[ASSERT_SEXT6:%[0-9]+]]:_(s32) = G_ASSERT_SEXT [[AIE_SEXT_EXTRACT_VECTOR_ELT6]], 8 + ; CHECK-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 7 + ; CHECK-NEXT: [[AIE_SEXT_EXTRACT_VECTOR_ELT7:%[0-9]+]]:_(s32) = G_AIE_SEXT_EXTRACT_VECTOR_ELT [[DEF]](<32 x s8>), [[C7]](s32) + ; CHECK-NEXT: [[ASSERT_SEXT7:%[0-9]+]]:_(s32) = G_ASSERT_SEXT [[AIE_SEXT_EXTRACT_VECTOR_ELT7]], 8 + ; CHECK-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; CHECK-NEXT: [[AIE_SEXT_EXTRACT_VECTOR_ELT8:%[0-9]+]]:_(s32) = G_AIE_SEXT_EXTRACT_VECTOR_ELT [[DEF]](<32 x s8>), [[C8]](s32) + ; CHECK-NEXT: [[ASSERT_SEXT8:%[0-9]+]]:_(s32) = G_ASSERT_SEXT [[AIE_SEXT_EXTRACT_VECTOR_ELT8]], 8 + ; CHECK-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 9 + ; CHECK-NEXT: [[AIE_SEXT_EXTRACT_VECTOR_ELT9:%[0-9]+]]:_(s32) = G_AIE_SEXT_EXTRACT_VECTOR_ELT [[DEF]](<32 x s8>), [[C9]](s32) + ; CHECK-NEXT: [[ASSERT_SEXT9:%[0-9]+]]:_(s32) = G_ASSERT_SEXT [[AIE_SEXT_EXTRACT_VECTOR_ELT9]], 8 + ; CHECK-NEXT: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 + ; CHECK-NEXT: [[AIE_SEXT_EXTRACT_VECTOR_ELT10:%[0-9]+]]:_(s32) = G_AIE_SEXT_EXTRACT_VECTOR_ELT [[DEF]](<32 x s8>), [[C10]](s32) + ; CHECK-NEXT: [[ASSERT_SEXT10:%[0-9]+]]:_(s32) = G_ASSERT_SEXT [[AIE_SEXT_EXTRACT_VECTOR_ELT10]], 8 + ; CHECK-NEXT: [[C11:%[0-9]+]]:_(s32) = G_CONSTANT i32 11 + ; CHECK-NEXT: [[AIE_SEXT_EXTRACT_VECTOR_ELT11:%[0-9]+]]:_(s32) = G_AIE_SEXT_EXTRACT_VECTOR_ELT [[DEF]](<32 x s8>), [[C11]](s32) + ; CHECK-NEXT: [[ASSERT_SEXT11:%[0-9]+]]:_(s32) = G_ASSERT_SEXT [[AIE_SEXT_EXTRACT_VECTOR_ELT11]], 8 + ; CHECK-NEXT: [[C12:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 + ; CHECK-NEXT: [[AIE_SEXT_EXTRACT_VECTOR_ELT12:%[0-9]+]]:_(s32) = G_AIE_SEXT_EXTRACT_VECTOR_ELT [[DEF]](<32 x s8>), [[C12]](s32) + ; CHECK-NEXT: [[ASSERT_SEXT12:%[0-9]+]]:_(s32) = G_ASSERT_SEXT [[AIE_SEXT_EXTRACT_VECTOR_ELT12]], 8 + ; CHECK-NEXT: [[C13:%[0-9]+]]:_(s32) = G_CONSTANT i32 13 + ; CHECK-NEXT: [[AIE_SEXT_EXTRACT_VECTOR_ELT13:%[0-9]+]]:_(s32) = G_AIE_SEXT_EXTRACT_VECTOR_ELT [[DEF]](<32 x s8>), [[C13]](s32) + ; CHECK-NEXT: [[ASSERT_SEXT13:%[0-9]+]]:_(s32) = G_ASSERT_SEXT [[AIE_SEXT_EXTRACT_VECTOR_ELT13]], 8 + ; CHECK-NEXT: [[C14:%[0-9]+]]:_(s32) = G_CONSTANT i32 14 + ; CHECK-NEXT: [[AIE_SEXT_EXTRACT_VECTOR_ELT14:%[0-9]+]]:_(s32) = G_AIE_SEXT_EXTRACT_VECTOR_ELT [[DEF]](<32 x s8>), [[C14]](s32) + ; CHECK-NEXT: [[ASSERT_SEXT14:%[0-9]+]]:_(s32) = G_ASSERT_SEXT [[AIE_SEXT_EXTRACT_VECTOR_ELT14]], 8 + ; CHECK-NEXT: [[C15:%[0-9]+]]:_(s32) = G_CONSTANT i32 15 + ; CHECK-NEXT: [[AIE_SEXT_EXTRACT_VECTOR_ELT15:%[0-9]+]]:_(s32) = G_AIE_SEXT_EXTRACT_VECTOR_ELT [[DEF]](<32 x s8>), [[C15]](s32) + ; CHECK-NEXT: [[ASSERT_SEXT15:%[0-9]+]]:_(s32) = G_ASSERT_SEXT [[AIE_SEXT_EXTRACT_VECTOR_ELT15]], 8 + ; CHECK-NEXT: [[C16:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[AIE_SEXT_EXTRACT_VECTOR_ELT16:%[0-9]+]]:_(s32) = G_AIE_SEXT_EXTRACT_VECTOR_ELT [[DEF]](<32 x s8>), [[C16]](s32) + ; CHECK-NEXT: [[ASSERT_SEXT16:%[0-9]+]]:_(s32) = G_ASSERT_SEXT [[AIE_SEXT_EXTRACT_VECTOR_ELT16]], 8 + ; CHECK-NEXT: [[C17:%[0-9]+]]:_(s32) = G_CONSTANT i32 17 + ; CHECK-NEXT: [[AIE_SEXT_EXTRACT_VECTOR_ELT17:%[0-9]+]]:_(s32) = G_AIE_SEXT_EXTRACT_VECTOR_ELT [[DEF]](<32 x s8>), [[C17]](s32) + ; CHECK-NEXT: [[ASSERT_SEXT17:%[0-9]+]]:_(s32) = G_ASSERT_SEXT [[AIE_SEXT_EXTRACT_VECTOR_ELT17]], 8 + ; CHECK-NEXT: [[C18:%[0-9]+]]:_(s32) = G_CONSTANT i32 18 + ; CHECK-NEXT: [[AIE_SEXT_EXTRACT_VECTOR_ELT18:%[0-9]+]]:_(s32) = G_AIE_SEXT_EXTRACT_VECTOR_ELT [[DEF]](<32 x s8>), [[C18]](s32) + ; CHECK-NEXT: [[ASSERT_SEXT18:%[0-9]+]]:_(s32) = G_ASSERT_SEXT [[AIE_SEXT_EXTRACT_VECTOR_ELT18]], 8 + ; CHECK-NEXT: [[C19:%[0-9]+]]:_(s32) = G_CONSTANT i32 19 + ; CHECK-NEXT: [[AIE_SEXT_EXTRACT_VECTOR_ELT19:%[0-9]+]]:_(s32) = G_AIE_SEXT_EXTRACT_VECTOR_ELT [[DEF]](<32 x s8>), [[C19]](s32) + ; CHECK-NEXT: [[ASSERT_SEXT19:%[0-9]+]]:_(s32) = G_ASSERT_SEXT [[AIE_SEXT_EXTRACT_VECTOR_ELT19]], 8 + ; CHECK-NEXT: [[C20:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 + ; CHECK-NEXT: [[AIE_SEXT_EXTRACT_VECTOR_ELT20:%[0-9]+]]:_(s32) = G_AIE_SEXT_EXTRACT_VECTOR_ELT [[DEF]](<32 x s8>), [[C20]](s32) + ; CHECK-NEXT: [[ASSERT_SEXT20:%[0-9]+]]:_(s32) = G_ASSERT_SEXT [[AIE_SEXT_EXTRACT_VECTOR_ELT20]], 8 + ; CHECK-NEXT: [[C21:%[0-9]+]]:_(s32) = G_CONSTANT i32 21 + ; CHECK-NEXT: [[AIE_SEXT_EXTRACT_VECTOR_ELT21:%[0-9]+]]:_(s32) = G_AIE_SEXT_EXTRACT_VECTOR_ELT [[DEF]](<32 x s8>), [[C21]](s32) + ; CHECK-NEXT: [[ASSERT_SEXT21:%[0-9]+]]:_(s32) = G_ASSERT_SEXT [[AIE_SEXT_EXTRACT_VECTOR_ELT21]], 8 + ; CHECK-NEXT: [[C22:%[0-9]+]]:_(s32) = G_CONSTANT i32 22 + ; CHECK-NEXT: [[AIE_SEXT_EXTRACT_VECTOR_ELT22:%[0-9]+]]:_(s32) = G_AIE_SEXT_EXTRACT_VECTOR_ELT [[DEF]](<32 x s8>), [[C22]](s32) + ; CHECK-NEXT: [[ASSERT_SEXT22:%[0-9]+]]:_(s32) = G_ASSERT_SEXT [[AIE_SEXT_EXTRACT_VECTOR_ELT22]], 8 + ; CHECK-NEXT: [[C23:%[0-9]+]]:_(s32) = G_CONSTANT i32 23 + ; CHECK-NEXT: [[AIE_SEXT_EXTRACT_VECTOR_ELT23:%[0-9]+]]:_(s32) = G_AIE_SEXT_EXTRACT_VECTOR_ELT [[DEF]](<32 x s8>), [[C23]](s32) + ; CHECK-NEXT: [[ASSERT_SEXT23:%[0-9]+]]:_(s32) = G_ASSERT_SEXT [[AIE_SEXT_EXTRACT_VECTOR_ELT23]], 8 + ; CHECK-NEXT: [[C24:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; CHECK-NEXT: [[AIE_SEXT_EXTRACT_VECTOR_ELT24:%[0-9]+]]:_(s32) = G_AIE_SEXT_EXTRACT_VECTOR_ELT [[DEF]](<32 x s8>), [[C24]](s32) + ; CHECK-NEXT: [[ASSERT_SEXT24:%[0-9]+]]:_(s32) = G_ASSERT_SEXT [[AIE_SEXT_EXTRACT_VECTOR_ELT24]], 8 + ; CHECK-NEXT: [[C25:%[0-9]+]]:_(s32) = G_CONSTANT i32 25 + ; CHECK-NEXT: [[AIE_SEXT_EXTRACT_VECTOR_ELT25:%[0-9]+]]:_(s32) = G_AIE_SEXT_EXTRACT_VECTOR_ELT [[DEF]](<32 x s8>), [[C25]](s32) + ; CHECK-NEXT: [[ASSERT_SEXT25:%[0-9]+]]:_(s32) = G_ASSERT_SEXT [[AIE_SEXT_EXTRACT_VECTOR_ELT25]], 8 + ; CHECK-NEXT: [[C26:%[0-9]+]]:_(s32) = G_CONSTANT i32 26 + ; CHECK-NEXT: [[AIE_SEXT_EXTRACT_VECTOR_ELT26:%[0-9]+]]:_(s32) = G_AIE_SEXT_EXTRACT_VECTOR_ELT [[DEF]](<32 x s8>), [[C26]](s32) + ; CHECK-NEXT: [[ASSERT_SEXT26:%[0-9]+]]:_(s32) = G_ASSERT_SEXT [[AIE_SEXT_EXTRACT_VECTOR_ELT26]], 8 + ; CHECK-NEXT: [[C27:%[0-9]+]]:_(s32) = G_CONSTANT i32 27 + ; CHECK-NEXT: [[AIE_SEXT_EXTRACT_VECTOR_ELT27:%[0-9]+]]:_(s32) = G_AIE_SEXT_EXTRACT_VECTOR_ELT [[DEF]](<32 x s8>), [[C27]](s32) + ; CHECK-NEXT: [[ASSERT_SEXT27:%[0-9]+]]:_(s32) = G_ASSERT_SEXT [[AIE_SEXT_EXTRACT_VECTOR_ELT27]], 8 + ; CHECK-NEXT: [[C28:%[0-9]+]]:_(s32) = G_CONSTANT i32 28 + ; CHECK-NEXT: [[AIE_SEXT_EXTRACT_VECTOR_ELT28:%[0-9]+]]:_(s32) = G_AIE_SEXT_EXTRACT_VECTOR_ELT [[DEF]](<32 x s8>), [[C28]](s32) + ; CHECK-NEXT: [[ASSERT_SEXT28:%[0-9]+]]:_(s32) = G_ASSERT_SEXT [[AIE_SEXT_EXTRACT_VECTOR_ELT28]], 8 + ; CHECK-NEXT: [[C29:%[0-9]+]]:_(s32) = G_CONSTANT i32 29 + ; CHECK-NEXT: [[AIE_SEXT_EXTRACT_VECTOR_ELT29:%[0-9]+]]:_(s32) = G_AIE_SEXT_EXTRACT_VECTOR_ELT [[DEF]](<32 x s8>), [[C29]](s32) + ; CHECK-NEXT: [[ASSERT_SEXT29:%[0-9]+]]:_(s32) = G_ASSERT_SEXT [[AIE_SEXT_EXTRACT_VECTOR_ELT29]], 8 + ; CHECK-NEXT: [[C30:%[0-9]+]]:_(s32) = G_CONSTANT i32 30 + ; CHECK-NEXT: [[AIE_SEXT_EXTRACT_VECTOR_ELT30:%[0-9]+]]:_(s32) = G_AIE_SEXT_EXTRACT_VECTOR_ELT [[DEF]](<32 x s8>), [[C30]](s32) + ; CHECK-NEXT: [[ASSERT_SEXT30:%[0-9]+]]:_(s32) = G_ASSERT_SEXT [[AIE_SEXT_EXTRACT_VECTOR_ELT30]], 8 + ; CHECK-NEXT: [[C31:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 + ; CHECK-NEXT: [[AIE_SEXT_EXTRACT_VECTOR_ELT31:%[0-9]+]]:_(s32) = G_AIE_SEXT_EXTRACT_VECTOR_ELT [[DEF]](<32 x s8>), [[C31]](s32) + ; CHECK-NEXT: [[ASSERT_SEXT31:%[0-9]+]]:_(s32) = G_ASSERT_SEXT [[AIE_SEXT_EXTRACT_VECTOR_ELT31]], 8 + ; CHECK-NEXT: [[AIE_SEXT_EXTRACT_VECTOR_ELT32:%[0-9]+]]:_(s32) = G_AIE_SEXT_EXTRACT_VECTOR_ELT [[DEF1]](<32 x s8>), [[C]](s32) + ; CHECK-NEXT: [[ASSERT_SEXT32:%[0-9]+]]:_(s32) = G_ASSERT_SEXT [[AIE_SEXT_EXTRACT_VECTOR_ELT32]], 8 + ; CHECK-NEXT: [[AIE_SEXT_EXTRACT_VECTOR_ELT33:%[0-9]+]]:_(s32) = G_AIE_SEXT_EXTRACT_VECTOR_ELT [[DEF1]](<32 x s8>), [[C1]](s32) + ; CHECK-NEXT: [[ASSERT_SEXT33:%[0-9]+]]:_(s32) = G_ASSERT_SEXT [[AIE_SEXT_EXTRACT_VECTOR_ELT33]], 8 + ; CHECK-NEXT: [[AIE_SEXT_EXTRACT_VECTOR_ELT34:%[0-9]+]]:_(s32) = G_AIE_SEXT_EXTRACT_VECTOR_ELT [[DEF1]](<32 x s8>), [[C2]](s32) + ; CHECK-NEXT: [[ASSERT_SEXT34:%[0-9]+]]:_(s32) = G_ASSERT_SEXT [[AIE_SEXT_EXTRACT_VECTOR_ELT34]], 8 + ; CHECK-NEXT: [[AIE_SEXT_EXTRACT_VECTOR_ELT35:%[0-9]+]]:_(s32) = G_AIE_SEXT_EXTRACT_VECTOR_ELT [[DEF1]](<32 x s8>), [[C3]](s32) + ; CHECK-NEXT: [[ASSERT_SEXT35:%[0-9]+]]:_(s32) = G_ASSERT_SEXT [[AIE_SEXT_EXTRACT_VECTOR_ELT35]], 8 + ; CHECK-NEXT: [[AIE_SEXT_EXTRACT_VECTOR_ELT36:%[0-9]+]]:_(s32) = G_AIE_SEXT_EXTRACT_VECTOR_ELT [[DEF1]](<32 x s8>), [[C4]](s32) + ; CHECK-NEXT: [[ASSERT_SEXT36:%[0-9]+]]:_(s32) = G_ASSERT_SEXT [[AIE_SEXT_EXTRACT_VECTOR_ELT36]], 8 + ; CHECK-NEXT: [[AIE_SEXT_EXTRACT_VECTOR_ELT37:%[0-9]+]]:_(s32) = G_AIE_SEXT_EXTRACT_VECTOR_ELT [[DEF1]](<32 x s8>), [[C5]](s32) + ; CHECK-NEXT: [[ASSERT_SEXT37:%[0-9]+]]:_(s32) = G_ASSERT_SEXT [[AIE_SEXT_EXTRACT_VECTOR_ELT37]], 8 + ; CHECK-NEXT: [[AIE_SEXT_EXTRACT_VECTOR_ELT38:%[0-9]+]]:_(s32) = G_AIE_SEXT_EXTRACT_VECTOR_ELT [[DEF1]](<32 x s8>), [[C6]](s32) + ; CHECK-NEXT: [[ASSERT_SEXT38:%[0-9]+]]:_(s32) = G_ASSERT_SEXT [[AIE_SEXT_EXTRACT_VECTOR_ELT38]], 8 + ; CHECK-NEXT: [[AIE_SEXT_EXTRACT_VECTOR_ELT39:%[0-9]+]]:_(s32) = G_AIE_SEXT_EXTRACT_VECTOR_ELT [[DEF1]](<32 x s8>), [[C7]](s32) + ; CHECK-NEXT: [[ASSERT_SEXT39:%[0-9]+]]:_(s32) = G_ASSERT_SEXT [[AIE_SEXT_EXTRACT_VECTOR_ELT39]], 8 + ; CHECK-NEXT: [[AIE_SEXT_EXTRACT_VECTOR_ELT40:%[0-9]+]]:_(s32) = G_AIE_SEXT_EXTRACT_VECTOR_ELT [[DEF1]](<32 x s8>), [[C8]](s32) + ; CHECK-NEXT: [[ASSERT_SEXT40:%[0-9]+]]:_(s32) = G_ASSERT_SEXT [[AIE_SEXT_EXTRACT_VECTOR_ELT40]], 8 + ; CHECK-NEXT: [[AIE_SEXT_EXTRACT_VECTOR_ELT41:%[0-9]+]]:_(s32) = G_AIE_SEXT_EXTRACT_VECTOR_ELT [[DEF1]](<32 x s8>), [[C9]](s32) + ; CHECK-NEXT: [[ASSERT_SEXT41:%[0-9]+]]:_(s32) = G_ASSERT_SEXT [[AIE_SEXT_EXTRACT_VECTOR_ELT41]], 8 + ; CHECK-NEXT: [[AIE_SEXT_EXTRACT_VECTOR_ELT42:%[0-9]+]]:_(s32) = G_AIE_SEXT_EXTRACT_VECTOR_ELT [[DEF1]](<32 x s8>), [[C10]](s32) + ; CHECK-NEXT: [[ASSERT_SEXT42:%[0-9]+]]:_(s32) = G_ASSERT_SEXT [[AIE_SEXT_EXTRACT_VECTOR_ELT42]], 8 + ; CHECK-NEXT: [[AIE_SEXT_EXTRACT_VECTOR_ELT43:%[0-9]+]]:_(s32) = G_AIE_SEXT_EXTRACT_VECTOR_ELT [[DEF1]](<32 x s8>), [[C11]](s32) + ; CHECK-NEXT: [[ASSERT_SEXT43:%[0-9]+]]:_(s32) = G_ASSERT_SEXT [[AIE_SEXT_EXTRACT_VECTOR_ELT43]], 8 + ; CHECK-NEXT: [[AIE_SEXT_EXTRACT_VECTOR_ELT44:%[0-9]+]]:_(s32) = G_AIE_SEXT_EXTRACT_VECTOR_ELT [[DEF1]](<32 x s8>), [[C12]](s32) + ; CHECK-NEXT: [[ASSERT_SEXT44:%[0-9]+]]:_(s32) = G_ASSERT_SEXT [[AIE_SEXT_EXTRACT_VECTOR_ELT44]], 8 + ; CHECK-NEXT: [[AIE_SEXT_EXTRACT_VECTOR_ELT45:%[0-9]+]]:_(s32) = G_AIE_SEXT_EXTRACT_VECTOR_ELT [[DEF1]](<32 x s8>), [[C13]](s32) + ; CHECK-NEXT: [[ASSERT_SEXT45:%[0-9]+]]:_(s32) = G_ASSERT_SEXT [[AIE_SEXT_EXTRACT_VECTOR_ELT45]], 8 + ; CHECK-NEXT: [[AIE_SEXT_EXTRACT_VECTOR_ELT46:%[0-9]+]]:_(s32) = G_AIE_SEXT_EXTRACT_VECTOR_ELT [[DEF1]](<32 x s8>), [[C14]](s32) + ; CHECK-NEXT: [[ASSERT_SEXT46:%[0-9]+]]:_(s32) = G_ASSERT_SEXT [[AIE_SEXT_EXTRACT_VECTOR_ELT46]], 8 + ; CHECK-NEXT: [[AIE_SEXT_EXTRACT_VECTOR_ELT47:%[0-9]+]]:_(s32) = G_AIE_SEXT_EXTRACT_VECTOR_ELT [[DEF1]](<32 x s8>), [[C15]](s32) + ; CHECK-NEXT: [[ASSERT_SEXT47:%[0-9]+]]:_(s32) = G_ASSERT_SEXT [[AIE_SEXT_EXTRACT_VECTOR_ELT47]], 8 + ; CHECK-NEXT: [[AIE_SEXT_EXTRACT_VECTOR_ELT48:%[0-9]+]]:_(s32) = G_AIE_SEXT_EXTRACT_VECTOR_ELT [[DEF1]](<32 x s8>), [[C16]](s32) + ; CHECK-NEXT: [[ASSERT_SEXT48:%[0-9]+]]:_(s32) = G_ASSERT_SEXT [[AIE_SEXT_EXTRACT_VECTOR_ELT48]], 8 + ; CHECK-NEXT: [[AIE_SEXT_EXTRACT_VECTOR_ELT49:%[0-9]+]]:_(s32) = G_AIE_SEXT_EXTRACT_VECTOR_ELT [[DEF1]](<32 x s8>), [[C17]](s32) + ; CHECK-NEXT: [[ASSERT_SEXT49:%[0-9]+]]:_(s32) = G_ASSERT_SEXT [[AIE_SEXT_EXTRACT_VECTOR_ELT49]], 8 + ; CHECK-NEXT: [[AIE_SEXT_EXTRACT_VECTOR_ELT50:%[0-9]+]]:_(s32) = G_AIE_SEXT_EXTRACT_VECTOR_ELT [[DEF1]](<32 x s8>), [[C18]](s32) + ; CHECK-NEXT: [[ASSERT_SEXT50:%[0-9]+]]:_(s32) = G_ASSERT_SEXT [[AIE_SEXT_EXTRACT_VECTOR_ELT50]], 8 + ; CHECK-NEXT: [[AIE_SEXT_EXTRACT_VECTOR_ELT51:%[0-9]+]]:_(s32) = G_AIE_SEXT_EXTRACT_VECTOR_ELT [[DEF1]](<32 x s8>), [[C19]](s32) + ; CHECK-NEXT: [[ASSERT_SEXT51:%[0-9]+]]:_(s32) = G_ASSERT_SEXT [[AIE_SEXT_EXTRACT_VECTOR_ELT51]], 8 + ; CHECK-NEXT: [[AIE_SEXT_EXTRACT_VECTOR_ELT52:%[0-9]+]]:_(s32) = G_AIE_SEXT_EXTRACT_VECTOR_ELT [[DEF1]](<32 x s8>), [[C20]](s32) + ; CHECK-NEXT: [[ASSERT_SEXT52:%[0-9]+]]:_(s32) = G_ASSERT_SEXT [[AIE_SEXT_EXTRACT_VECTOR_ELT52]], 8 + ; CHECK-NEXT: [[AIE_SEXT_EXTRACT_VECTOR_ELT53:%[0-9]+]]:_(s32) = G_AIE_SEXT_EXTRACT_VECTOR_ELT [[DEF1]](<32 x s8>), [[C21]](s32) + ; CHECK-NEXT: [[ASSERT_SEXT53:%[0-9]+]]:_(s32) = G_ASSERT_SEXT [[AIE_SEXT_EXTRACT_VECTOR_ELT53]], 8 + ; CHECK-NEXT: [[AIE_SEXT_EXTRACT_VECTOR_ELT54:%[0-9]+]]:_(s32) = G_AIE_SEXT_EXTRACT_VECTOR_ELT [[DEF1]](<32 x s8>), [[C22]](s32) + ; CHECK-NEXT: [[ASSERT_SEXT54:%[0-9]+]]:_(s32) = G_ASSERT_SEXT [[AIE_SEXT_EXTRACT_VECTOR_ELT54]], 8 + ; CHECK-NEXT: [[AIE_SEXT_EXTRACT_VECTOR_ELT55:%[0-9]+]]:_(s32) = G_AIE_SEXT_EXTRACT_VECTOR_ELT [[DEF1]](<32 x s8>), [[C23]](s32) + ; CHECK-NEXT: [[ASSERT_SEXT55:%[0-9]+]]:_(s32) = G_ASSERT_SEXT [[AIE_SEXT_EXTRACT_VECTOR_ELT55]], 8 + ; CHECK-NEXT: [[AIE_SEXT_EXTRACT_VECTOR_ELT56:%[0-9]+]]:_(s32) = G_AIE_SEXT_EXTRACT_VECTOR_ELT [[DEF1]](<32 x s8>), [[C24]](s32) + ; CHECK-NEXT: [[ASSERT_SEXT56:%[0-9]+]]:_(s32) = G_ASSERT_SEXT [[AIE_SEXT_EXTRACT_VECTOR_ELT56]], 8 + ; CHECK-NEXT: [[AIE_SEXT_EXTRACT_VECTOR_ELT57:%[0-9]+]]:_(s32) = G_AIE_SEXT_EXTRACT_VECTOR_ELT [[DEF1]](<32 x s8>), [[C25]](s32) + ; CHECK-NEXT: [[ASSERT_SEXT57:%[0-9]+]]:_(s32) = G_ASSERT_SEXT [[AIE_SEXT_EXTRACT_VECTOR_ELT57]], 8 + ; CHECK-NEXT: [[AIE_SEXT_EXTRACT_VECTOR_ELT58:%[0-9]+]]:_(s32) = G_AIE_SEXT_EXTRACT_VECTOR_ELT [[DEF1]](<32 x s8>), [[C26]](s32) + ; CHECK-NEXT: [[ASSERT_SEXT58:%[0-9]+]]:_(s32) = G_ASSERT_SEXT [[AIE_SEXT_EXTRACT_VECTOR_ELT58]], 8 + ; CHECK-NEXT: [[AIE_SEXT_EXTRACT_VECTOR_ELT59:%[0-9]+]]:_(s32) = G_AIE_SEXT_EXTRACT_VECTOR_ELT [[DEF1]](<32 x s8>), [[C27]](s32) + ; CHECK-NEXT: [[ASSERT_SEXT59:%[0-9]+]]:_(s32) = G_ASSERT_SEXT [[AIE_SEXT_EXTRACT_VECTOR_ELT59]], 8 + ; CHECK-NEXT: [[AIE_SEXT_EXTRACT_VECTOR_ELT60:%[0-9]+]]:_(s32) = G_AIE_SEXT_EXTRACT_VECTOR_ELT [[DEF1]](<32 x s8>), [[C28]](s32) + ; CHECK-NEXT: [[ASSERT_SEXT60:%[0-9]+]]:_(s32) = G_ASSERT_SEXT [[AIE_SEXT_EXTRACT_VECTOR_ELT60]], 8 + ; CHECK-NEXT: [[AIE_SEXT_EXTRACT_VECTOR_ELT61:%[0-9]+]]:_(s32) = G_AIE_SEXT_EXTRACT_VECTOR_ELT [[DEF1]](<32 x s8>), [[C29]](s32) + ; CHECK-NEXT: [[ASSERT_SEXT61:%[0-9]+]]:_(s32) = G_ASSERT_SEXT [[AIE_SEXT_EXTRACT_VECTOR_ELT61]], 8 + ; CHECK-NEXT: [[AIE_SEXT_EXTRACT_VECTOR_ELT62:%[0-9]+]]:_(s32) = G_AIE_SEXT_EXTRACT_VECTOR_ELT [[DEF1]](<32 x s8>), [[C30]](s32) + ; CHECK-NEXT: [[ASSERT_SEXT62:%[0-9]+]]:_(s32) = G_ASSERT_SEXT [[AIE_SEXT_EXTRACT_VECTOR_ELT62]], 8 + ; CHECK-NEXT: [[AIE_SEXT_EXTRACT_VECTOR_ELT63:%[0-9]+]]:_(s32) = G_AIE_SEXT_EXTRACT_VECTOR_ELT [[DEF1]](<32 x s8>), [[C31]](s32) + ; CHECK-NEXT: [[ASSERT_SEXT63:%[0-9]+]]:_(s32) = G_ASSERT_SEXT [[AIE_SEXT_EXTRACT_VECTOR_ELT63]], 8 + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(<64 x s8>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[AIE_ADD_VECTOR_ELT_LEFT:%[0-9]+]]:_(<64 x s8>) = G_AIE_ADD_VECTOR_ELT_LEFT [[DEF2]], [[ASSERT_SEXT63]](s32) + ; CHECK-NEXT: [[AIE_ADD_VECTOR_ELT_LEFT1:%[0-9]+]]:_(<64 x s8>) = G_AIE_ADD_VECTOR_ELT_LEFT [[AIE_ADD_VECTOR_ELT_LEFT]], [[ASSERT_SEXT62]](s32) + ; CHECK-NEXT: [[AIE_ADD_VECTOR_ELT_LEFT2:%[0-9]+]]:_(<64 x s8>) = G_AIE_ADD_VECTOR_ELT_LEFT [[AIE_ADD_VECTOR_ELT_LEFT1]], [[ASSERT_SEXT61]](s32) + ; CHECK-NEXT: [[AIE_ADD_VECTOR_ELT_LEFT3:%[0-9]+]]:_(<64 x s8>) = G_AIE_ADD_VECTOR_ELT_LEFT [[AIE_ADD_VECTOR_ELT_LEFT2]], [[ASSERT_SEXT60]](s32) + ; CHECK-NEXT: [[AIE_ADD_VECTOR_ELT_LEFT4:%[0-9]+]]:_(<64 x s8>) = G_AIE_ADD_VECTOR_ELT_LEFT [[AIE_ADD_VECTOR_ELT_LEFT3]], [[ASSERT_SEXT59]](s32) + ; CHECK-NEXT: [[AIE_ADD_VECTOR_ELT_LEFT5:%[0-9]+]]:_(<64 x s8>) = G_AIE_ADD_VECTOR_ELT_LEFT [[AIE_ADD_VECTOR_ELT_LEFT4]], [[ASSERT_SEXT58]](s32) + ; CHECK-NEXT: [[AIE_ADD_VECTOR_ELT_LEFT6:%[0-9]+]]:_(<64 x s8>) = G_AIE_ADD_VECTOR_ELT_LEFT [[AIE_ADD_VECTOR_ELT_LEFT5]], [[ASSERT_SEXT57]](s32) + ; CHECK-NEXT: [[AIE_ADD_VECTOR_ELT_LEFT7:%[0-9]+]]:_(<64 x s8>) = G_AIE_ADD_VECTOR_ELT_LEFT [[AIE_ADD_VECTOR_ELT_LEFT6]], [[ASSERT_SEXT56]](s32) + ; CHECK-NEXT: [[AIE_ADD_VECTOR_ELT_LEFT8:%[0-9]+]]:_(<64 x s8>) = G_AIE_ADD_VECTOR_ELT_LEFT [[AIE_ADD_VECTOR_ELT_LEFT7]], [[ASSERT_SEXT55]](s32) + ; CHECK-NEXT: [[AIE_ADD_VECTOR_ELT_LEFT9:%[0-9]+]]:_(<64 x s8>) = G_AIE_ADD_VECTOR_ELT_LEFT [[AIE_ADD_VECTOR_ELT_LEFT8]], [[ASSERT_SEXT54]](s32) + ; CHECK-NEXT: [[AIE_ADD_VECTOR_ELT_LEFT10:%[0-9]+]]:_(<64 x s8>) = G_AIE_ADD_VECTOR_ELT_LEFT [[AIE_ADD_VECTOR_ELT_LEFT9]], [[ASSERT_SEXT53]](s32) + ; CHECK-NEXT: [[AIE_ADD_VECTOR_ELT_LEFT11:%[0-9]+]]:_(<64 x s8>) = G_AIE_ADD_VECTOR_ELT_LEFT [[AIE_ADD_VECTOR_ELT_LEFT10]], [[ASSERT_SEXT52]](s32) + ; CHECK-NEXT: [[AIE_ADD_VECTOR_ELT_LEFT12:%[0-9]+]]:_(<64 x s8>) = G_AIE_ADD_VECTOR_ELT_LEFT [[AIE_ADD_VECTOR_ELT_LEFT11]], [[ASSERT_SEXT51]](s32) + ; CHECK-NEXT: [[AIE_ADD_VECTOR_ELT_LEFT13:%[0-9]+]]:_(<64 x s8>) = G_AIE_ADD_VECTOR_ELT_LEFT [[AIE_ADD_VECTOR_ELT_LEFT12]], [[ASSERT_SEXT50]](s32) + ; CHECK-NEXT: [[AIE_ADD_VECTOR_ELT_LEFT14:%[0-9]+]]:_(<64 x s8>) = G_AIE_ADD_VECTOR_ELT_LEFT [[AIE_ADD_VECTOR_ELT_LEFT13]], [[ASSERT_SEXT49]](s32) + ; CHECK-NEXT: [[AIE_ADD_VECTOR_ELT_LEFT15:%[0-9]+]]:_(<64 x s8>) = G_AIE_ADD_VECTOR_ELT_LEFT [[AIE_ADD_VECTOR_ELT_LEFT14]], [[ASSERT_SEXT48]](s32) + ; CHECK-NEXT: [[AIE_ADD_VECTOR_ELT_LEFT16:%[0-9]+]]:_(<64 x s8>) = G_AIE_ADD_VECTOR_ELT_LEFT [[AIE_ADD_VECTOR_ELT_LEFT15]], [[ASSERT_SEXT47]](s32) + ; CHECK-NEXT: [[AIE_ADD_VECTOR_ELT_LEFT17:%[0-9]+]]:_(<64 x s8>) = G_AIE_ADD_VECTOR_ELT_LEFT [[AIE_ADD_VECTOR_ELT_LEFT16]], [[ASSERT_SEXT46]](s32) + ; CHECK-NEXT: [[AIE_ADD_VECTOR_ELT_LEFT18:%[0-9]+]]:_(<64 x s8>) = G_AIE_ADD_VECTOR_ELT_LEFT [[AIE_ADD_VECTOR_ELT_LEFT17]], [[ASSERT_SEXT45]](s32) + ; CHECK-NEXT: [[AIE_ADD_VECTOR_ELT_LEFT19:%[0-9]+]]:_(<64 x s8>) = G_AIE_ADD_VECTOR_ELT_LEFT [[AIE_ADD_VECTOR_ELT_LEFT18]], [[ASSERT_SEXT44]](s32) + ; CHECK-NEXT: [[AIE_ADD_VECTOR_ELT_LEFT20:%[0-9]+]]:_(<64 x s8>) = G_AIE_ADD_VECTOR_ELT_LEFT [[AIE_ADD_VECTOR_ELT_LEFT19]], [[ASSERT_SEXT43]](s32) + ; CHECK-NEXT: [[AIE_ADD_VECTOR_ELT_LEFT21:%[0-9]+]]:_(<64 x s8>) = G_AIE_ADD_VECTOR_ELT_LEFT [[AIE_ADD_VECTOR_ELT_LEFT20]], [[ASSERT_SEXT42]](s32) + ; CHECK-NEXT: [[AIE_ADD_VECTOR_ELT_LEFT22:%[0-9]+]]:_(<64 x s8>) = G_AIE_ADD_VECTOR_ELT_LEFT [[AIE_ADD_VECTOR_ELT_LEFT21]], [[ASSERT_SEXT41]](s32) + ; CHECK-NEXT: [[AIE_ADD_VECTOR_ELT_LEFT23:%[0-9]+]]:_(<64 x s8>) = G_AIE_ADD_VECTOR_ELT_LEFT [[AIE_ADD_VECTOR_ELT_LEFT22]], [[ASSERT_SEXT40]](s32) + ; CHECK-NEXT: [[AIE_ADD_VECTOR_ELT_LEFT24:%[0-9]+]]:_(<64 x s8>) = G_AIE_ADD_VECTOR_ELT_LEFT [[AIE_ADD_VECTOR_ELT_LEFT23]], [[ASSERT_SEXT39]](s32) + ; CHECK-NEXT: [[AIE_ADD_VECTOR_ELT_LEFT25:%[0-9]+]]:_(<64 x s8>) = G_AIE_ADD_VECTOR_ELT_LEFT [[AIE_ADD_VECTOR_ELT_LEFT24]], [[ASSERT_SEXT38]](s32) + ; CHECK-NEXT: [[AIE_ADD_VECTOR_ELT_LEFT26:%[0-9]+]]:_(<64 x s8>) = G_AIE_ADD_VECTOR_ELT_LEFT [[AIE_ADD_VECTOR_ELT_LEFT25]], [[ASSERT_SEXT37]](s32) + ; CHECK-NEXT: [[AIE_ADD_VECTOR_ELT_LEFT27:%[0-9]+]]:_(<64 x s8>) = G_AIE_ADD_VECTOR_ELT_LEFT [[AIE_ADD_VECTOR_ELT_LEFT26]], [[ASSERT_SEXT36]](s32) + ; CHECK-NEXT: [[AIE_ADD_VECTOR_ELT_LEFT28:%[0-9]+]]:_(<64 x s8>) = G_AIE_ADD_VECTOR_ELT_LEFT [[AIE_ADD_VECTOR_ELT_LEFT27]], [[ASSERT_SEXT35]](s32) + ; CHECK-NEXT: [[AIE_ADD_VECTOR_ELT_LEFT29:%[0-9]+]]:_(<64 x s8>) = G_AIE_ADD_VECTOR_ELT_LEFT [[AIE_ADD_VECTOR_ELT_LEFT28]], [[ASSERT_SEXT34]](s32) + ; CHECK-NEXT: [[AIE_ADD_VECTOR_ELT_LEFT30:%[0-9]+]]:_(<64 x s8>) = G_AIE_ADD_VECTOR_ELT_LEFT [[AIE_ADD_VECTOR_ELT_LEFT29]], [[ASSERT_SEXT33]](s32) + ; CHECK-NEXT: [[AIE_ADD_VECTOR_ELT_LEFT31:%[0-9]+]]:_(<64 x s8>) = G_AIE_ADD_VECTOR_ELT_LEFT [[AIE_ADD_VECTOR_ELT_LEFT30]], [[ASSERT_SEXT32]](s32) + ; CHECK-NEXT: [[AIE_ADD_VECTOR_ELT_LEFT32:%[0-9]+]]:_(<64 x s8>) = G_AIE_ADD_VECTOR_ELT_LEFT [[AIE_ADD_VECTOR_ELT_LEFT31]], [[ASSERT_SEXT31]](s32) + ; CHECK-NEXT: [[AIE_ADD_VECTOR_ELT_LEFT33:%[0-9]+]]:_(<64 x s8>) = G_AIE_ADD_VECTOR_ELT_LEFT [[AIE_ADD_VECTOR_ELT_LEFT32]], [[ASSERT_SEXT30]](s32) + ; CHECK-NEXT: [[AIE_ADD_VECTOR_ELT_LEFT34:%[0-9]+]]:_(<64 x s8>) = G_AIE_ADD_VECTOR_ELT_LEFT [[AIE_ADD_VECTOR_ELT_LEFT33]], [[ASSERT_SEXT29]](s32) + ; CHECK-NEXT: [[AIE_ADD_VECTOR_ELT_LEFT35:%[0-9]+]]:_(<64 x s8>) = G_AIE_ADD_VECTOR_ELT_LEFT [[AIE_ADD_VECTOR_ELT_LEFT34]], [[ASSERT_SEXT28]](s32) + ; CHECK-NEXT: [[AIE_ADD_VECTOR_ELT_LEFT36:%[0-9]+]]:_(<64 x s8>) = G_AIE_ADD_VECTOR_ELT_LEFT [[AIE_ADD_VECTOR_ELT_LEFT35]], [[ASSERT_SEXT27]](s32) + ; CHECK-NEXT: [[AIE_ADD_VECTOR_ELT_LEFT37:%[0-9]+]]:_(<64 x s8>) = G_AIE_ADD_VECTOR_ELT_LEFT [[AIE_ADD_VECTOR_ELT_LEFT36]], [[ASSERT_SEXT26]](s32) + ; CHECK-NEXT: [[AIE_ADD_VECTOR_ELT_LEFT38:%[0-9]+]]:_(<64 x s8>) = G_AIE_ADD_VECTOR_ELT_LEFT [[AIE_ADD_VECTOR_ELT_LEFT37]], [[ASSERT_SEXT25]](s32) + ; CHECK-NEXT: [[AIE_ADD_VECTOR_ELT_LEFT39:%[0-9]+]]:_(<64 x s8>) = G_AIE_ADD_VECTOR_ELT_LEFT [[AIE_ADD_VECTOR_ELT_LEFT38]], [[ASSERT_SEXT24]](s32) + ; CHECK-NEXT: [[AIE_ADD_VECTOR_ELT_LEFT40:%[0-9]+]]:_(<64 x s8>) = G_AIE_ADD_VECTOR_ELT_LEFT [[AIE_ADD_VECTOR_ELT_LEFT39]], [[ASSERT_SEXT23]](s32) + ; CHECK-NEXT: [[AIE_ADD_VECTOR_ELT_LEFT41:%[0-9]+]]:_(<64 x s8>) = G_AIE_ADD_VECTOR_ELT_LEFT [[AIE_ADD_VECTOR_ELT_LEFT40]], [[ASSERT_SEXT22]](s32) + ; CHECK-NEXT: [[AIE_ADD_VECTOR_ELT_LEFT42:%[0-9]+]]:_(<64 x s8>) = G_AIE_ADD_VECTOR_ELT_LEFT [[AIE_ADD_VECTOR_ELT_LEFT41]], [[ASSERT_SEXT21]](s32) + ; CHECK-NEXT: [[AIE_ADD_VECTOR_ELT_LEFT43:%[0-9]+]]:_(<64 x s8>) = G_AIE_ADD_VECTOR_ELT_LEFT [[AIE_ADD_VECTOR_ELT_LEFT42]], [[ASSERT_SEXT20]](s32) + ; CHECK-NEXT: [[AIE_ADD_VECTOR_ELT_LEFT44:%[0-9]+]]:_(<64 x s8>) = G_AIE_ADD_VECTOR_ELT_LEFT [[AIE_ADD_VECTOR_ELT_LEFT43]], [[ASSERT_SEXT19]](s32) + ; CHECK-NEXT: [[AIE_ADD_VECTOR_ELT_LEFT45:%[0-9]+]]:_(<64 x s8>) = G_AIE_ADD_VECTOR_ELT_LEFT [[AIE_ADD_VECTOR_ELT_LEFT44]], [[ASSERT_SEXT18]](s32) + ; CHECK-NEXT: [[AIE_ADD_VECTOR_ELT_LEFT46:%[0-9]+]]:_(<64 x s8>) = G_AIE_ADD_VECTOR_ELT_LEFT [[AIE_ADD_VECTOR_ELT_LEFT45]], [[ASSERT_SEXT17]](s32) + ; CHECK-NEXT: [[AIE_ADD_VECTOR_ELT_LEFT47:%[0-9]+]]:_(<64 x s8>) = G_AIE_ADD_VECTOR_ELT_LEFT [[AIE_ADD_VECTOR_ELT_LEFT46]], [[ASSERT_SEXT16]](s32) + ; CHECK-NEXT: [[AIE_ADD_VECTOR_ELT_LEFT48:%[0-9]+]]:_(<64 x s8>) = G_AIE_ADD_VECTOR_ELT_LEFT [[AIE_ADD_VECTOR_ELT_LEFT47]], [[ASSERT_SEXT15]](s32) + ; CHECK-NEXT: [[AIE_ADD_VECTOR_ELT_LEFT49:%[0-9]+]]:_(<64 x s8>) = G_AIE_ADD_VECTOR_ELT_LEFT [[AIE_ADD_VECTOR_ELT_LEFT48]], [[ASSERT_SEXT14]](s32) + ; CHECK-NEXT: [[AIE_ADD_VECTOR_ELT_LEFT50:%[0-9]+]]:_(<64 x s8>) = G_AIE_ADD_VECTOR_ELT_LEFT [[AIE_ADD_VECTOR_ELT_LEFT49]], [[ASSERT_SEXT13]](s32) + ; CHECK-NEXT: [[AIE_ADD_VECTOR_ELT_LEFT51:%[0-9]+]]:_(<64 x s8>) = G_AIE_ADD_VECTOR_ELT_LEFT [[AIE_ADD_VECTOR_ELT_LEFT50]], [[ASSERT_SEXT12]](s32) + ; CHECK-NEXT: [[AIE_ADD_VECTOR_ELT_LEFT52:%[0-9]+]]:_(<64 x s8>) = G_AIE_ADD_VECTOR_ELT_LEFT [[AIE_ADD_VECTOR_ELT_LEFT51]], [[ASSERT_SEXT11]](s32) + ; CHECK-NEXT: [[AIE_ADD_VECTOR_ELT_LEFT53:%[0-9]+]]:_(<64 x s8>) = G_AIE_ADD_VECTOR_ELT_LEFT [[AIE_ADD_VECTOR_ELT_LEFT52]], [[ASSERT_SEXT10]](s32) + ; CHECK-NEXT: [[AIE_ADD_VECTOR_ELT_LEFT54:%[0-9]+]]:_(<64 x s8>) = G_AIE_ADD_VECTOR_ELT_LEFT [[AIE_ADD_VECTOR_ELT_LEFT53]], [[ASSERT_SEXT9]](s32) + ; CHECK-NEXT: [[AIE_ADD_VECTOR_ELT_LEFT55:%[0-9]+]]:_(<64 x s8>) = G_AIE_ADD_VECTOR_ELT_LEFT [[AIE_ADD_VECTOR_ELT_LEFT54]], [[ASSERT_SEXT8]](s32) + ; CHECK-NEXT: [[AIE_ADD_VECTOR_ELT_LEFT56:%[0-9]+]]:_(<64 x s8>) = G_AIE_ADD_VECTOR_ELT_LEFT [[AIE_ADD_VECTOR_ELT_LEFT55]], [[ASSERT_SEXT7]](s32) + ; CHECK-NEXT: [[AIE_ADD_VECTOR_ELT_LEFT57:%[0-9]+]]:_(<64 x s8>) = G_AIE_ADD_VECTOR_ELT_LEFT [[AIE_ADD_VECTOR_ELT_LEFT56]], [[ASSERT_SEXT6]](s32) + ; CHECK-NEXT: [[AIE_ADD_VECTOR_ELT_LEFT58:%[0-9]+]]:_(<64 x s8>) = G_AIE_ADD_VECTOR_ELT_LEFT [[AIE_ADD_VECTOR_ELT_LEFT57]], [[ASSERT_SEXT5]](s32) + ; CHECK-NEXT: [[AIE_ADD_VECTOR_ELT_LEFT59:%[0-9]+]]:_(<64 x s8>) = G_AIE_ADD_VECTOR_ELT_LEFT [[AIE_ADD_VECTOR_ELT_LEFT58]], [[ASSERT_SEXT4]](s32) + ; CHECK-NEXT: [[AIE_ADD_VECTOR_ELT_LEFT60:%[0-9]+]]:_(<64 x s8>) = G_AIE_ADD_VECTOR_ELT_LEFT [[AIE_ADD_VECTOR_ELT_LEFT59]], [[ASSERT_SEXT3]](s32) + ; CHECK-NEXT: [[AIE_ADD_VECTOR_ELT_LEFT61:%[0-9]+]]:_(<64 x s8>) = G_AIE_ADD_VECTOR_ELT_LEFT [[AIE_ADD_VECTOR_ELT_LEFT60]], [[ASSERT_SEXT2]](s32) + ; CHECK-NEXT: [[AIE_ADD_VECTOR_ELT_LEFT62:%[0-9]+]]:_(<64 x s8>) = G_AIE_ADD_VECTOR_ELT_LEFT [[AIE_ADD_VECTOR_ELT_LEFT61]], [[ASSERT_SEXT1]](s32) + ; CHECK-NEXT: [[AIE_ADD_VECTOR_ELT_LEFT63:%[0-9]+]]:_(<64 x s8>) = G_AIE_ADD_VECTOR_ELT_LEFT [[AIE_ADD_VECTOR_ELT_LEFT62]], [[ASSERT_SEXT]](s32) + ; CHECK-NEXT: PseudoRET implicit $lr, implicit [[AIE_ADD_VECTOR_ELT_LEFT63]](<64 x s8>) + %0:_(<32 x s8>) = G_IMPLICIT_DEF + %1:_(<32 x s8>) = G_IMPLICIT_DEF + %2:_(<64 x s8>) = G_SHUFFLE_VECTOR %0(<32 x s8>), %1(<32 x s8>), shufflemask(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63) + PseudoRET implicit $lr, implicit %2 +... + +--- +name: test_shuffle_vec_128_to_256_32bit +body: | + bb.0: + ; CHECK-LABEL: name: test_shuffle_vec_128_to_256_32bit + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42 + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<16 x s32>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<16 x s32>) = COPY [[DEF1]](<16 x s32>) + ; CHECK-NEXT: [[AIE_ADD_VECTOR_ELT_LEFT:%[0-9]+]]:_(<16 x s32>) = G_AIE_ADD_VECTOR_ELT_LEFT [[COPY]], [[DEF]](s32) + ; CHECK-NEXT: [[AIE_ADD_VECTOR_ELT_LEFT1:%[0-9]+]]:_(<16 x s32>) = G_AIE_ADD_VECTOR_ELT_LEFT [[AIE_ADD_VECTOR_ELT_LEFT]], [[DEF]](s32) + ; CHECK-NEXT: [[AIE_ADD_VECTOR_ELT_LEFT2:%[0-9]+]]:_(<16 x s32>) = G_AIE_ADD_VECTOR_ELT_LEFT [[AIE_ADD_VECTOR_ELT_LEFT1]], [[DEF]](s32) + ; CHECK-NEXT: [[AIE_ADD_VECTOR_ELT_LEFT3:%[0-9]+]]:_(<16 x s32>) = G_AIE_ADD_VECTOR_ELT_LEFT [[AIE_ADD_VECTOR_ELT_LEFT2]], [[DEF]](s32) + ; CHECK-NEXT: [[AIE_ADD_VECTOR_ELT_LEFT4:%[0-9]+]]:_(<16 x s32>) = G_AIE_ADD_VECTOR_ELT_LEFT [[AIE_ADD_VECTOR_ELT_LEFT3]], [[C]](s32) + ; CHECK-NEXT: [[AIE_ADD_VECTOR_ELT_LEFT5:%[0-9]+]]:_(<16 x s32>) = G_AIE_ADD_VECTOR_ELT_LEFT [[AIE_ADD_VECTOR_ELT_LEFT4]], [[C]](s32) + ; CHECK-NEXT: [[AIE_ADD_VECTOR_ELT_LEFT6:%[0-9]+]]:_(<16 x s32>) = G_AIE_ADD_VECTOR_ELT_LEFT [[AIE_ADD_VECTOR_ELT_LEFT5]], [[C]](s32) + ; CHECK-NEXT: [[AIE_ADD_VECTOR_ELT_LEFT7:%[0-9]+]]:_(<16 x s32>) = G_AIE_ADD_VECTOR_ELT_LEFT [[AIE_ADD_VECTOR_ELT_LEFT6]], [[C]](s32) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<8 x s32>), [[UV1:%[0-9]+]]:_(<8 x s32>) = G_UNMERGE_VALUES [[AIE_ADD_VECTOR_ELT_LEFT7]](<16 x s32>) + ; CHECK-NEXT: [[AIE_SEXT_EXTRACT_VECTOR_ELT:%[0-9]+]]:_(s32) = G_AIE_SEXT_EXTRACT_VECTOR_ELT [[UV]](<8 x s32>), [[C1]](s32) + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[AIE_SEXT_EXTRACT_VECTOR_ELT1:%[0-9]+]]:_(s32) = G_AIE_SEXT_EXTRACT_VECTOR_ELT [[UV]](<8 x s32>), [[C2]](s32) + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; CHECK-NEXT: [[AIE_SEXT_EXTRACT_VECTOR_ELT2:%[0-9]+]]:_(s32) = G_AIE_SEXT_EXTRACT_VECTOR_ELT [[UV]](<8 x s32>), [[C3]](s32) + ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 + ; CHECK-NEXT: [[AIE_SEXT_EXTRACT_VECTOR_ELT3:%[0-9]+]]:_(s32) = G_AIE_SEXT_EXTRACT_VECTOR_ELT [[UV]](<8 x s32>), [[C4]](s32) + ; CHECK-NEXT: [[AIE_SEXT_EXTRACT_VECTOR_ELT4:%[0-9]+]]:_(s32) = G_AIE_SEXT_EXTRACT_VECTOR_ELT [[UV]](<8 x s32>), [[C1]](s32) + ; CHECK-NEXT: [[AIE_SEXT_EXTRACT_VECTOR_ELT5:%[0-9]+]]:_(s32) = G_AIE_SEXT_EXTRACT_VECTOR_ELT [[UV]](<8 x s32>), [[C2]](s32) + ; CHECK-NEXT: [[AIE_SEXT_EXTRACT_VECTOR_ELT6:%[0-9]+]]:_(s32) = G_AIE_SEXT_EXTRACT_VECTOR_ELT [[UV]](<8 x s32>), [[C3]](s32) + ; CHECK-NEXT: [[AIE_SEXT_EXTRACT_VECTOR_ELT7:%[0-9]+]]:_(s32) = G_AIE_SEXT_EXTRACT_VECTOR_ELT [[UV]](<8 x s32>), [[C4]](s32) + ; CHECK-NEXT: [[AIE_ADD_VECTOR_ELT_LEFT8:%[0-9]+]]:_(<16 x s32>) = G_AIE_ADD_VECTOR_ELT_LEFT [[DEF1]], [[AIE_SEXT_EXTRACT_VECTOR_ELT7]](s32) + ; CHECK-NEXT: [[AIE_ADD_VECTOR_ELT_LEFT9:%[0-9]+]]:_(<16 x s32>) = G_AIE_ADD_VECTOR_ELT_LEFT [[AIE_ADD_VECTOR_ELT_LEFT8]], [[AIE_SEXT_EXTRACT_VECTOR_ELT6]](s32) + ; CHECK-NEXT: [[AIE_ADD_VECTOR_ELT_LEFT10:%[0-9]+]]:_(<16 x s32>) = G_AIE_ADD_VECTOR_ELT_LEFT [[AIE_ADD_VECTOR_ELT_LEFT9]], [[AIE_SEXT_EXTRACT_VECTOR_ELT5]](s32) + ; CHECK-NEXT: [[AIE_ADD_VECTOR_ELT_LEFT11:%[0-9]+]]:_(<16 x s32>) = G_AIE_ADD_VECTOR_ELT_LEFT [[AIE_ADD_VECTOR_ELT_LEFT10]], [[AIE_SEXT_EXTRACT_VECTOR_ELT4]](s32) + ; CHECK-NEXT: [[AIE_ADD_VECTOR_ELT_LEFT12:%[0-9]+]]:_(<16 x s32>) = G_AIE_ADD_VECTOR_ELT_LEFT [[AIE_ADD_VECTOR_ELT_LEFT11]], [[AIE_SEXT_EXTRACT_VECTOR_ELT3]](s32) + ; CHECK-NEXT: [[AIE_ADD_VECTOR_ELT_LEFT13:%[0-9]+]]:_(<16 x s32>) = G_AIE_ADD_VECTOR_ELT_LEFT [[AIE_ADD_VECTOR_ELT_LEFT12]], [[AIE_SEXT_EXTRACT_VECTOR_ELT2]](s32) + ; CHECK-NEXT: [[AIE_ADD_VECTOR_ELT_LEFT14:%[0-9]+]]:_(<16 x s32>) = G_AIE_ADD_VECTOR_ELT_LEFT [[AIE_ADD_VECTOR_ELT_LEFT13]], [[AIE_SEXT_EXTRACT_VECTOR_ELT1]](s32) + ; CHECK-NEXT: [[AIE_ADD_VECTOR_ELT_LEFT15:%[0-9]+]]:_(<16 x s32>) = G_AIE_ADD_VECTOR_ELT_LEFT [[AIE_ADD_VECTOR_ELT_LEFT14]], [[AIE_SEXT_EXTRACT_VECTOR_ELT]](s32) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(<8 x s32>), [[UV3:%[0-9]+]]:_(<8 x s32>) = G_UNMERGE_VALUES [[AIE_ADD_VECTOR_ELT_LEFT15]](<16 x s32>) + ; CHECK-NEXT: PseudoRET implicit $lr, implicit [[UV2]](<8 x s32>) + %0:_(s32) = G_CONSTANT i32 42 + %1:_(<4 x s32>) = G_BUILD_VECTOR %0(s32), %0(s32), %0(s32), %0(s32) + %2:_(<8 x s32>) = G_SHUFFLE_VECTOR %1(<4 x s32>), %1(<4 x s32>), shufflemask(0, 1, 2, 3, 4, 5, 6, 7) + PseudoRET implicit $lr, implicit %2 +... + +--- +name: test_shuffle_vec_512_to_1024_32bit +body: | + bb.0: + ; CHECK-LABEL: name: test_shuffle_vec_512_to_1024_32bit + ; CHECK: [[DEF:%[0-9]+]]:_(<16 x s32>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[AIE_SEXT_EXTRACT_VECTOR_ELT:%[0-9]+]]:_(s32) = G_AIE_SEXT_EXTRACT_VECTOR_ELT [[DEF]](<16 x s32>), [[C]](s32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[AIE_SEXT_EXTRACT_VECTOR_ELT1:%[0-9]+]]:_(s32) = G_AIE_SEXT_EXTRACT_VECTOR_ELT [[DEF]](<16 x s32>), [[C1]](s32) + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; CHECK-NEXT: [[AIE_SEXT_EXTRACT_VECTOR_ELT2:%[0-9]+]]:_(s32) = G_AIE_SEXT_EXTRACT_VECTOR_ELT [[DEF]](<16 x s32>), [[C2]](s32) + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 + ; CHECK-NEXT: [[AIE_SEXT_EXTRACT_VECTOR_ELT3:%[0-9]+]]:_(s32) = G_AIE_SEXT_EXTRACT_VECTOR_ELT [[DEF]](<16 x s32>), [[C3]](s32) + ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; CHECK-NEXT: [[AIE_SEXT_EXTRACT_VECTOR_ELT4:%[0-9]+]]:_(s32) = G_AIE_SEXT_EXTRACT_VECTOR_ELT [[DEF]](<16 x s32>), [[C4]](s32) + ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 5 + ; CHECK-NEXT: [[AIE_SEXT_EXTRACT_VECTOR_ELT5:%[0-9]+]]:_(s32) = G_AIE_SEXT_EXTRACT_VECTOR_ELT [[DEF]](<16 x s32>), [[C5]](s32) + ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 6 + ; CHECK-NEXT: [[AIE_SEXT_EXTRACT_VECTOR_ELT6:%[0-9]+]]:_(s32) = G_AIE_SEXT_EXTRACT_VECTOR_ELT [[DEF]](<16 x s32>), [[C6]](s32) + ; CHECK-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 7 + ; CHECK-NEXT: [[AIE_SEXT_EXTRACT_VECTOR_ELT7:%[0-9]+]]:_(s32) = G_AIE_SEXT_EXTRACT_VECTOR_ELT [[DEF]](<16 x s32>), [[C7]](s32) + ; CHECK-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; CHECK-NEXT: [[AIE_SEXT_EXTRACT_VECTOR_ELT8:%[0-9]+]]:_(s32) = G_AIE_SEXT_EXTRACT_VECTOR_ELT [[DEF]](<16 x s32>), [[C8]](s32) + ; CHECK-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 9 + ; CHECK-NEXT: [[AIE_SEXT_EXTRACT_VECTOR_ELT9:%[0-9]+]]:_(s32) = G_AIE_SEXT_EXTRACT_VECTOR_ELT [[DEF]](<16 x s32>), [[C9]](s32) + ; CHECK-NEXT: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 + ; CHECK-NEXT: [[AIE_SEXT_EXTRACT_VECTOR_ELT10:%[0-9]+]]:_(s32) = G_AIE_SEXT_EXTRACT_VECTOR_ELT [[DEF]](<16 x s32>), [[C10]](s32) + ; CHECK-NEXT: [[C11:%[0-9]+]]:_(s32) = G_CONSTANT i32 11 + ; CHECK-NEXT: [[AIE_SEXT_EXTRACT_VECTOR_ELT11:%[0-9]+]]:_(s32) = G_AIE_SEXT_EXTRACT_VECTOR_ELT [[DEF]](<16 x s32>), [[C11]](s32) + ; CHECK-NEXT: [[C12:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 + ; CHECK-NEXT: [[AIE_SEXT_EXTRACT_VECTOR_ELT12:%[0-9]+]]:_(s32) = G_AIE_SEXT_EXTRACT_VECTOR_ELT [[DEF]](<16 x s32>), [[C12]](s32) + ; CHECK-NEXT: [[C13:%[0-9]+]]:_(s32) = G_CONSTANT i32 13 + ; CHECK-NEXT: [[AIE_SEXT_EXTRACT_VECTOR_ELT13:%[0-9]+]]:_(s32) = G_AIE_SEXT_EXTRACT_VECTOR_ELT [[DEF]](<16 x s32>), [[C13]](s32) + ; CHECK-NEXT: [[C14:%[0-9]+]]:_(s32) = G_CONSTANT i32 14 + ; CHECK-NEXT: [[AIE_SEXT_EXTRACT_VECTOR_ELT14:%[0-9]+]]:_(s32) = G_AIE_SEXT_EXTRACT_VECTOR_ELT [[DEF]](<16 x s32>), [[C14]](s32) + ; CHECK-NEXT: [[C15:%[0-9]+]]:_(s32) = G_CONSTANT i32 15 + ; CHECK-NEXT: [[AIE_SEXT_EXTRACT_VECTOR_ELT15:%[0-9]+]]:_(s32) = G_AIE_SEXT_EXTRACT_VECTOR_ELT [[DEF]](<16 x s32>), [[C15]](s32) + ; CHECK-NEXT: [[AIE_SEXT_EXTRACT_VECTOR_ELT16:%[0-9]+]]:_(s32) = G_AIE_SEXT_EXTRACT_VECTOR_ELT [[DEF]](<16 x s32>), [[C]](s32) + ; CHECK-NEXT: [[AIE_SEXT_EXTRACT_VECTOR_ELT17:%[0-9]+]]:_(s32) = G_AIE_SEXT_EXTRACT_VECTOR_ELT [[DEF]](<16 x s32>), [[C1]](s32) + ; CHECK-NEXT: [[AIE_SEXT_EXTRACT_VECTOR_ELT18:%[0-9]+]]:_(s32) = G_AIE_SEXT_EXTRACT_VECTOR_ELT [[DEF]](<16 x s32>), [[C2]](s32) + ; CHECK-NEXT: [[AIE_SEXT_EXTRACT_VECTOR_ELT19:%[0-9]+]]:_(s32) = G_AIE_SEXT_EXTRACT_VECTOR_ELT [[DEF]](<16 x s32>), [[C3]](s32) + ; CHECK-NEXT: [[AIE_SEXT_EXTRACT_VECTOR_ELT20:%[0-9]+]]:_(s32) = G_AIE_SEXT_EXTRACT_VECTOR_ELT [[DEF]](<16 x s32>), [[C4]](s32) + ; CHECK-NEXT: [[AIE_SEXT_EXTRACT_VECTOR_ELT21:%[0-9]+]]:_(s32) = G_AIE_SEXT_EXTRACT_VECTOR_ELT [[DEF]](<16 x s32>), [[C5]](s32) + ; CHECK-NEXT: [[AIE_SEXT_EXTRACT_VECTOR_ELT22:%[0-9]+]]:_(s32) = G_AIE_SEXT_EXTRACT_VECTOR_ELT [[DEF]](<16 x s32>), [[C6]](s32) + ; CHECK-NEXT: [[AIE_SEXT_EXTRACT_VECTOR_ELT23:%[0-9]+]]:_(s32) = G_AIE_SEXT_EXTRACT_VECTOR_ELT [[DEF]](<16 x s32>), [[C7]](s32) + ; CHECK-NEXT: [[AIE_SEXT_EXTRACT_VECTOR_ELT24:%[0-9]+]]:_(s32) = G_AIE_SEXT_EXTRACT_VECTOR_ELT [[DEF]](<16 x s32>), [[C8]](s32) + ; CHECK-NEXT: [[AIE_SEXT_EXTRACT_VECTOR_ELT25:%[0-9]+]]:_(s32) = G_AIE_SEXT_EXTRACT_VECTOR_ELT [[DEF]](<16 x s32>), [[C9]](s32) + ; CHECK-NEXT: [[AIE_SEXT_EXTRACT_VECTOR_ELT26:%[0-9]+]]:_(s32) = G_AIE_SEXT_EXTRACT_VECTOR_ELT [[DEF]](<16 x s32>), [[C10]](s32) + ; CHECK-NEXT: [[AIE_SEXT_EXTRACT_VECTOR_ELT27:%[0-9]+]]:_(s32) = G_AIE_SEXT_EXTRACT_VECTOR_ELT [[DEF]](<16 x s32>), [[C11]](s32) + ; CHECK-NEXT: [[AIE_SEXT_EXTRACT_VECTOR_ELT28:%[0-9]+]]:_(s32) = G_AIE_SEXT_EXTRACT_VECTOR_ELT [[DEF]](<16 x s32>), [[C12]](s32) + ; CHECK-NEXT: [[AIE_SEXT_EXTRACT_VECTOR_ELT29:%[0-9]+]]:_(s32) = G_AIE_SEXT_EXTRACT_VECTOR_ELT [[DEF]](<16 x s32>), [[C13]](s32) + ; CHECK-NEXT: [[AIE_SEXT_EXTRACT_VECTOR_ELT30:%[0-9]+]]:_(s32) = G_AIE_SEXT_EXTRACT_VECTOR_ELT [[DEF]](<16 x s32>), [[C14]](s32) + ; CHECK-NEXT: [[AIE_SEXT_EXTRACT_VECTOR_ELT31:%[0-9]+]]:_(s32) = G_AIE_SEXT_EXTRACT_VECTOR_ELT [[DEF]](<16 x s32>), [[C15]](s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<16 x s32>) = COPY [[DEF]](<16 x s32>) + ; CHECK-NEXT: [[AIE_ADD_VECTOR_ELT_LEFT:%[0-9]+]]:_(<16 x s32>) = G_AIE_ADD_VECTOR_ELT_LEFT [[COPY]], [[AIE_SEXT_EXTRACT_VECTOR_ELT15]](s32) + ; CHECK-NEXT: [[AIE_ADD_VECTOR_ELT_LEFT1:%[0-9]+]]:_(<16 x s32>) = G_AIE_ADD_VECTOR_ELT_LEFT [[AIE_ADD_VECTOR_ELT_LEFT]], [[AIE_SEXT_EXTRACT_VECTOR_ELT14]](s32) + ; CHECK-NEXT: [[AIE_ADD_VECTOR_ELT_LEFT2:%[0-9]+]]:_(<16 x s32>) = G_AIE_ADD_VECTOR_ELT_LEFT [[AIE_ADD_VECTOR_ELT_LEFT1]], [[AIE_SEXT_EXTRACT_VECTOR_ELT13]](s32) + ; CHECK-NEXT: [[AIE_ADD_VECTOR_ELT_LEFT3:%[0-9]+]]:_(<16 x s32>) = G_AIE_ADD_VECTOR_ELT_LEFT [[AIE_ADD_VECTOR_ELT_LEFT2]], [[AIE_SEXT_EXTRACT_VECTOR_ELT12]](s32) + ; CHECK-NEXT: [[AIE_ADD_VECTOR_ELT_LEFT4:%[0-9]+]]:_(<16 x s32>) = G_AIE_ADD_VECTOR_ELT_LEFT [[AIE_ADD_VECTOR_ELT_LEFT3]], [[AIE_SEXT_EXTRACT_VECTOR_ELT11]](s32) + ; CHECK-NEXT: [[AIE_ADD_VECTOR_ELT_LEFT5:%[0-9]+]]:_(<16 x s32>) = G_AIE_ADD_VECTOR_ELT_LEFT [[AIE_ADD_VECTOR_ELT_LEFT4]], [[AIE_SEXT_EXTRACT_VECTOR_ELT10]](s32) + ; CHECK-NEXT: [[AIE_ADD_VECTOR_ELT_LEFT6:%[0-9]+]]:_(<16 x s32>) = G_AIE_ADD_VECTOR_ELT_LEFT [[AIE_ADD_VECTOR_ELT_LEFT5]], [[AIE_SEXT_EXTRACT_VECTOR_ELT9]](s32) + ; CHECK-NEXT: [[AIE_ADD_VECTOR_ELT_LEFT7:%[0-9]+]]:_(<16 x s32>) = G_AIE_ADD_VECTOR_ELT_LEFT [[AIE_ADD_VECTOR_ELT_LEFT6]], [[AIE_SEXT_EXTRACT_VECTOR_ELT8]](s32) + ; CHECK-NEXT: [[AIE_ADD_VECTOR_ELT_LEFT8:%[0-9]+]]:_(<16 x s32>) = G_AIE_ADD_VECTOR_ELT_LEFT [[AIE_ADD_VECTOR_ELT_LEFT7]], [[AIE_SEXT_EXTRACT_VECTOR_ELT7]](s32) + ; CHECK-NEXT: [[AIE_ADD_VECTOR_ELT_LEFT9:%[0-9]+]]:_(<16 x s32>) = G_AIE_ADD_VECTOR_ELT_LEFT [[AIE_ADD_VECTOR_ELT_LEFT8]], [[AIE_SEXT_EXTRACT_VECTOR_ELT6]](s32) + ; CHECK-NEXT: [[AIE_ADD_VECTOR_ELT_LEFT10:%[0-9]+]]:_(<16 x s32>) = G_AIE_ADD_VECTOR_ELT_LEFT [[AIE_ADD_VECTOR_ELT_LEFT9]], [[AIE_SEXT_EXTRACT_VECTOR_ELT5]](s32) + ; CHECK-NEXT: [[AIE_ADD_VECTOR_ELT_LEFT11:%[0-9]+]]:_(<16 x s32>) = G_AIE_ADD_VECTOR_ELT_LEFT [[AIE_ADD_VECTOR_ELT_LEFT10]], [[AIE_SEXT_EXTRACT_VECTOR_ELT4]](s32) + ; CHECK-NEXT: [[AIE_ADD_VECTOR_ELT_LEFT12:%[0-9]+]]:_(<16 x s32>) = G_AIE_ADD_VECTOR_ELT_LEFT [[AIE_ADD_VECTOR_ELT_LEFT11]], [[AIE_SEXT_EXTRACT_VECTOR_ELT3]](s32) + ; CHECK-NEXT: [[AIE_ADD_VECTOR_ELT_LEFT13:%[0-9]+]]:_(<16 x s32>) = G_AIE_ADD_VECTOR_ELT_LEFT [[AIE_ADD_VECTOR_ELT_LEFT12]], [[AIE_SEXT_EXTRACT_VECTOR_ELT2]](s32) + ; CHECK-NEXT: [[AIE_ADD_VECTOR_ELT_LEFT14:%[0-9]+]]:_(<16 x s32>) = G_AIE_ADD_VECTOR_ELT_LEFT [[AIE_ADD_VECTOR_ELT_LEFT13]], [[AIE_SEXT_EXTRACT_VECTOR_ELT1]](s32) + ; CHECK-NEXT: [[AIE_ADD_VECTOR_ELT_LEFT15:%[0-9]+]]:_(<16 x s32>) = G_AIE_ADD_VECTOR_ELT_LEFT [[AIE_ADD_VECTOR_ELT_LEFT14]], [[AIE_SEXT_EXTRACT_VECTOR_ELT]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<16 x s32>) = COPY [[DEF]](<16 x s32>) + ; CHECK-NEXT: [[AIE_ADD_VECTOR_ELT_LEFT16:%[0-9]+]]:_(<16 x s32>) = G_AIE_ADD_VECTOR_ELT_LEFT [[COPY1]], [[AIE_SEXT_EXTRACT_VECTOR_ELT31]](s32) + ; CHECK-NEXT: [[AIE_ADD_VECTOR_ELT_LEFT17:%[0-9]+]]:_(<16 x s32>) = G_AIE_ADD_VECTOR_ELT_LEFT [[AIE_ADD_VECTOR_ELT_LEFT16]], [[AIE_SEXT_EXTRACT_VECTOR_ELT30]](s32) + ; CHECK-NEXT: [[AIE_ADD_VECTOR_ELT_LEFT18:%[0-9]+]]:_(<16 x s32>) = G_AIE_ADD_VECTOR_ELT_LEFT [[AIE_ADD_VECTOR_ELT_LEFT17]], [[AIE_SEXT_EXTRACT_VECTOR_ELT29]](s32) + ; CHECK-NEXT: [[AIE_ADD_VECTOR_ELT_LEFT19:%[0-9]+]]:_(<16 x s32>) = G_AIE_ADD_VECTOR_ELT_LEFT [[AIE_ADD_VECTOR_ELT_LEFT18]], [[AIE_SEXT_EXTRACT_VECTOR_ELT28]](s32) + ; CHECK-NEXT: [[AIE_ADD_VECTOR_ELT_LEFT20:%[0-9]+]]:_(<16 x s32>) = G_AIE_ADD_VECTOR_ELT_LEFT [[AIE_ADD_VECTOR_ELT_LEFT19]], [[AIE_SEXT_EXTRACT_VECTOR_ELT27]](s32) + ; CHECK-NEXT: [[AIE_ADD_VECTOR_ELT_LEFT21:%[0-9]+]]:_(<16 x s32>) = G_AIE_ADD_VECTOR_ELT_LEFT [[AIE_ADD_VECTOR_ELT_LEFT20]], [[AIE_SEXT_EXTRACT_VECTOR_ELT26]](s32) + ; CHECK-NEXT: [[AIE_ADD_VECTOR_ELT_LEFT22:%[0-9]+]]:_(<16 x s32>) = G_AIE_ADD_VECTOR_ELT_LEFT [[AIE_ADD_VECTOR_ELT_LEFT21]], [[AIE_SEXT_EXTRACT_VECTOR_ELT25]](s32) + ; CHECK-NEXT: [[AIE_ADD_VECTOR_ELT_LEFT23:%[0-9]+]]:_(<16 x s32>) = G_AIE_ADD_VECTOR_ELT_LEFT [[AIE_ADD_VECTOR_ELT_LEFT22]], [[AIE_SEXT_EXTRACT_VECTOR_ELT24]](s32) + ; CHECK-NEXT: [[AIE_ADD_VECTOR_ELT_LEFT24:%[0-9]+]]:_(<16 x s32>) = G_AIE_ADD_VECTOR_ELT_LEFT [[AIE_ADD_VECTOR_ELT_LEFT23]], [[AIE_SEXT_EXTRACT_VECTOR_ELT23]](s32) + ; CHECK-NEXT: [[AIE_ADD_VECTOR_ELT_LEFT25:%[0-9]+]]:_(<16 x s32>) = G_AIE_ADD_VECTOR_ELT_LEFT [[AIE_ADD_VECTOR_ELT_LEFT24]], [[AIE_SEXT_EXTRACT_VECTOR_ELT22]](s32) + ; CHECK-NEXT: [[AIE_ADD_VECTOR_ELT_LEFT26:%[0-9]+]]:_(<16 x s32>) = G_AIE_ADD_VECTOR_ELT_LEFT [[AIE_ADD_VECTOR_ELT_LEFT25]], [[AIE_SEXT_EXTRACT_VECTOR_ELT21]](s32) + ; CHECK-NEXT: [[AIE_ADD_VECTOR_ELT_LEFT27:%[0-9]+]]:_(<16 x s32>) = G_AIE_ADD_VECTOR_ELT_LEFT [[AIE_ADD_VECTOR_ELT_LEFT26]], [[AIE_SEXT_EXTRACT_VECTOR_ELT20]](s32) + ; CHECK-NEXT: [[AIE_ADD_VECTOR_ELT_LEFT28:%[0-9]+]]:_(<16 x s32>) = G_AIE_ADD_VECTOR_ELT_LEFT [[AIE_ADD_VECTOR_ELT_LEFT27]], [[AIE_SEXT_EXTRACT_VECTOR_ELT19]](s32) + ; CHECK-NEXT: [[AIE_ADD_VECTOR_ELT_LEFT29:%[0-9]+]]:_(<16 x s32>) = G_AIE_ADD_VECTOR_ELT_LEFT [[AIE_ADD_VECTOR_ELT_LEFT28]], [[AIE_SEXT_EXTRACT_VECTOR_ELT18]](s32) + ; CHECK-NEXT: [[AIE_ADD_VECTOR_ELT_LEFT30:%[0-9]+]]:_(<16 x s32>) = G_AIE_ADD_VECTOR_ELT_LEFT [[AIE_ADD_VECTOR_ELT_LEFT29]], [[AIE_SEXT_EXTRACT_VECTOR_ELT17]](s32) + ; CHECK-NEXT: [[AIE_ADD_VECTOR_ELT_LEFT31:%[0-9]+]]:_(<16 x s32>) = G_AIE_ADD_VECTOR_ELT_LEFT [[AIE_ADD_VECTOR_ELT_LEFT30]], [[AIE_SEXT_EXTRACT_VECTOR_ELT16]](s32) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<32 x s32>) = G_CONCAT_VECTORS [[AIE_ADD_VECTOR_ELT_LEFT15]](<16 x s32>), [[AIE_ADD_VECTOR_ELT_LEFT31]](<16 x s32>) + ; CHECK-NEXT: PseudoRET implicit $lr, implicit [[CONCAT_VECTORS]](<32 x s32>) + %0:_(<16 x s32>) = G_IMPLICIT_DEF + %1:_(<32 x s32>) = G_SHUFFLE_VECTOR %0(<16 x s32>), %0(<16 x s32>), shufflemask(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31) + PseudoRET implicit $lr, implicit %1 +... + +# ShuffleVec also concatenates constants into vectors +--- +name: test_shuffle_vec_16_to_32 +body: | + bb.0: + ; CHECK-LABEL: name: test_shuffle_vec_16_to_32 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[C2]], [[C1]] + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C1]](s32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL]] + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CHECK-NEXT: PseudoRET implicit $lr, implicit [[BITCAST]](<2 x s16>) + %0:_(s16) = G_CONSTANT i16 8 + %1:_(s16) = G_CONSTANT i16 16 + %2:_(<2 x s16>) = G_SHUFFLE_VECTOR %0(s16), %1(s16), shufflemask(1, 0) + PseudoRET implicit $lr, implicit %2 +--- + +... +# AArch64 supports transforming the same type dst and src, so we do here as well. It has a different +# behaviour in that it enlarges the size of the destination vector, shuffles the bigger vector and +# throws out the higher order numbers. This is similar to AIE's ShuffleVec instruction. + +--- +name: test_canonised_256_to_512_32bit +body: | + bb.0: + ; CHECK-LABEL: name: test_canonised_256_to_512_32bit + ; CHECK: [[DEF:%[0-9]+]]:_(<8 x s32>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[AIE_SEXT_EXTRACT_VECTOR_ELT:%[0-9]+]]:_(s32) = G_AIE_SEXT_EXTRACT_VECTOR_ELT [[DEF]](<8 x s32>), [[C]](s32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[AIE_SEXT_EXTRACT_VECTOR_ELT1:%[0-9]+]]:_(s32) = G_AIE_SEXT_EXTRACT_VECTOR_ELT [[DEF]](<8 x s32>), [[C1]](s32) + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; CHECK-NEXT: [[AIE_SEXT_EXTRACT_VECTOR_ELT2:%[0-9]+]]:_(s32) = G_AIE_SEXT_EXTRACT_VECTOR_ELT [[DEF]](<8 x s32>), [[C2]](s32) + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 + ; CHECK-NEXT: [[AIE_SEXT_EXTRACT_VECTOR_ELT3:%[0-9]+]]:_(s32) = G_AIE_SEXT_EXTRACT_VECTOR_ELT [[DEF]](<8 x s32>), [[C3]](s32) + ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; CHECK-NEXT: [[AIE_SEXT_EXTRACT_VECTOR_ELT4:%[0-9]+]]:_(s32) = G_AIE_SEXT_EXTRACT_VECTOR_ELT [[DEF]](<8 x s32>), [[C4]](s32) + ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 5 + ; CHECK-NEXT: [[AIE_SEXT_EXTRACT_VECTOR_ELT5:%[0-9]+]]:_(s32) = G_AIE_SEXT_EXTRACT_VECTOR_ELT [[DEF]](<8 x s32>), [[C5]](s32) + ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 6 + ; CHECK-NEXT: [[AIE_SEXT_EXTRACT_VECTOR_ELT6:%[0-9]+]]:_(s32) = G_AIE_SEXT_EXTRACT_VECTOR_ELT [[DEF]](<8 x s32>), [[C6]](s32) + ; CHECK-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 7 + ; CHECK-NEXT: [[AIE_SEXT_EXTRACT_VECTOR_ELT7:%[0-9]+]]:_(s32) = G_AIE_SEXT_EXTRACT_VECTOR_ELT [[DEF]](<8 x s32>), [[C7]](s32) + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<16 x s32>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[AIE_ADD_VECTOR_ELT_LEFT:%[0-9]+]]:_(<16 x s32>) = G_AIE_ADD_VECTOR_ELT_LEFT [[DEF1]], [[AIE_SEXT_EXTRACT_VECTOR_ELT7]](s32) + ; CHECK-NEXT: [[AIE_ADD_VECTOR_ELT_LEFT1:%[0-9]+]]:_(<16 x s32>) = G_AIE_ADD_VECTOR_ELT_LEFT [[AIE_ADD_VECTOR_ELT_LEFT]], [[AIE_SEXT_EXTRACT_VECTOR_ELT6]](s32) + ; CHECK-NEXT: [[AIE_ADD_VECTOR_ELT_LEFT2:%[0-9]+]]:_(<16 x s32>) = G_AIE_ADD_VECTOR_ELT_LEFT [[AIE_ADD_VECTOR_ELT_LEFT1]], [[AIE_SEXT_EXTRACT_VECTOR_ELT5]](s32) + ; CHECK-NEXT: [[AIE_ADD_VECTOR_ELT_LEFT3:%[0-9]+]]:_(<16 x s32>) = G_AIE_ADD_VECTOR_ELT_LEFT [[AIE_ADD_VECTOR_ELT_LEFT2]], [[AIE_SEXT_EXTRACT_VECTOR_ELT4]](s32) + ; CHECK-NEXT: [[AIE_ADD_VECTOR_ELT_LEFT4:%[0-9]+]]:_(<16 x s32>) = G_AIE_ADD_VECTOR_ELT_LEFT [[AIE_ADD_VECTOR_ELT_LEFT3]], [[AIE_SEXT_EXTRACT_VECTOR_ELT3]](s32) + ; CHECK-NEXT: [[AIE_ADD_VECTOR_ELT_LEFT5:%[0-9]+]]:_(<16 x s32>) = G_AIE_ADD_VECTOR_ELT_LEFT [[AIE_ADD_VECTOR_ELT_LEFT4]], [[AIE_SEXT_EXTRACT_VECTOR_ELT2]](s32) + ; CHECK-NEXT: [[AIE_ADD_VECTOR_ELT_LEFT6:%[0-9]+]]:_(<16 x s32>) = G_AIE_ADD_VECTOR_ELT_LEFT [[AIE_ADD_VECTOR_ELT_LEFT5]], [[AIE_SEXT_EXTRACT_VECTOR_ELT1]](s32) + ; CHECK-NEXT: [[AIE_ADD_VECTOR_ELT_LEFT7:%[0-9]+]]:_(<16 x s32>) = G_AIE_ADD_VECTOR_ELT_LEFT [[AIE_ADD_VECTOR_ELT_LEFT6]], [[AIE_SEXT_EXTRACT_VECTOR_ELT]](s32) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<8 x s32>), [[UV1:%[0-9]+]]:_(<8 x s32>) = G_UNMERGE_VALUES [[AIE_ADD_VECTOR_ELT_LEFT7]](<16 x s32>) + ; CHECK-NEXT: PseudoRET implicit $lr, implicit [[UV]](<8 x s32>) + %0:_(<8 x s32>) = G_IMPLICIT_DEF + %1:_(<8 x s32>) = G_IMPLICIT_DEF + %2:_(<8 x s32>) = G_SHUFFLE_VECTOR %0(<8 x s32>), %0(<8 x s32>), shufflemask(0, 1, 2, 3, 12, 13, 14, 15) + PseudoRET implicit $lr, implicit %2 diff --git a/llvm/test/CodeGen/AIE/aie2/GlobalISel/prelegalizercombiner-shufflevector-buildvector.mir b/llvm/test/CodeGen/AIE/aie2/GlobalISel/prelegalizercombiner-shufflevector-buildvector.mir new file mode 100644 index 000000000000..9b0bf413a636 --- /dev/null +++ b/llvm/test/CodeGen/AIE/aie2/GlobalISel/prelegalizercombiner-shufflevector-buildvector.mir @@ -0,0 +1,420 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# +# This file is licensed under the Apache License v2.0 with LLVM Exceptions. +# See https://llvm.org/LICENSE.txt for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +# +# (c) Copyright 2024 Advanced Micro Devices, Inc. or its affiliates +# RUN: llc -mtriple aie2 -run-pass=aie2-prelegalizer-combiner %s -verify-machineinstrs -o - | FileCheck %s +--- +name: shufflevector_both_registers +legalized: false +body: | + bb.1.entry: + liveins: $wl2, $wl4 + ; CHECK-LABEL: name: shufflevector_both_registers + ; CHECK: liveins: $wl2, $wl4 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<8 x s32>) = COPY $wl2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<8 x s32>) = COPY $wl4 + ; CHECK-NEXT: [[SHUF:%[0-9]+]]:_(<16 x s32>) = G_SHUFFLE_VECTOR [[COPY]](<8 x s32>), [[COPY1]], shufflemask(4, 5, 3, 7, 2, 7, 1, 4, 1, 1, 5, 3, 3, 2, 0, 0) + ; CHECK-NEXT: $x0 = COPY [[SHUF]](<16 x s32>) + ; CHECK-NEXT: PseudoRET implicit $lr, implicit $x0 + %1:_(<8 x s32>) = COPY $wl2 + %2:_(<8 x s32>) = COPY $wl4 + %0:_(<16 x s32>) = G_SHUFFLE_VECTOR %1:_(<8 x s32>), %2:_, shufflemask(4, 5, 3, 7, 2, 7, 1, 4, 1, 1, 5, 3, 3, 2, 0, 0) + $x0 = COPY %0:_(<16 x s32>) + PseudoRET implicit $lr, implicit $x0 +... +--- +name: shufflevector_both_registers_undef +legalized: false +body: | + bb.1.entry: + liveins: $wl2, $wl4 + ; CHECK-LABEL: name: shufflevector_both_registers_undef + ; CHECK: liveins: $wl2, $wl4 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<8 x s32>) = COPY $wl2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<8 x s32>) = COPY $wl4 + ; CHECK-NEXT: [[SHUF:%[0-9]+]]:_(<16 x s32>) = G_SHUFFLE_VECTOR [[COPY]](<8 x s32>), [[COPY1]], shufflemask(4, 5, undef, 7, undef, 7, undef, 4, 1, 1, 5, undef, 3, undef, 0, 0) + ; CHECK-NEXT: $x0 = COPY [[SHUF]](<16 x s32>) + ; CHECK-NEXT: PseudoRET implicit $lr, implicit $x0 + %1:_(<8 x s32>) = COPY $wl2 + %2:_(<8 x s32>) = COPY $wl4 + %0:_(<16 x s32>) = G_SHUFFLE_VECTOR %1:_(<8 x s32>), %2:_, shufflemask(4, 5, -1, 7, -1, 7, -1, 4, 1, 1, 5, -1, 3, -1, 0, 0) + $x0 = COPY %0:_(<16 x s32>) + PseudoRET implicit $lr, implicit $x0 +... +--- +name: shufflevector_order +legalized: false +body: | + bb.1.entry: + liveins: $wl4 + ; CHECK-LABEL: name: shufflevector_order + ; CHECK: liveins: $wl4 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 + ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 5 + ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 6 + ; CHECK-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 7 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C1]](s32), [[C2]](s32), [[C3]](s32), [[C4]](s32), [[C5]](s32), [[C6]](s32), [[C7]](s32), [[C7]](s32), [[C6]](s32), [[C5]](s32), [[C4]](s32), [[C3]](s32), [[C2]](s32), [[C1]](s32), [[C]](s32) + ; CHECK-NEXT: $x0 = COPY [[BUILD_VECTOR]](<16 x s32>) + ; CHECK-NEXT: PseudoRET implicit $lr, implicit $x0 + %0:_(s32) = G_CONSTANT i32 0 + %1:_(s32) = G_CONSTANT i32 1 + %2:_(s32) = G_CONSTANT i32 2 + %3:_(s32) = G_CONSTANT i32 3 + %4:_(s32) = G_CONSTANT i32 4 + %5:_(s32) = G_CONSTANT i32 5 + %6:_(s32) = G_CONSTANT i32 6 + %7:_(s32) = G_CONSTANT i32 7 + %8:_(<8 x s32>) = G_BUILD_VECTOR %0(s32), %1(s32), %2(s32), %3(s32), %4(s32), %5(s32), %6(s32), %7(s32) + %9:_(<8 x s32>) = G_BUILD_VECTOR %7(s32), %6(s32), %5(s32), %4(s32), %3(s32), %2(s32), %1(s32), %0(s32) + %10:_(<16 x s32>) = G_SHUFFLE_VECTOR %8:_(<8 x s32>), %9:_, shufflemask(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) + $x0 = COPY %10:_(<16 x s32>) + PseudoRET implicit $lr, implicit $x0 +... +--- +name: shufflevector_order_undef +legalized: false +body: | + bb.1.entry: + liveins: $wl4 + ; CHECK-LABEL: name: shufflevector_order_undef + ; CHECK: liveins: $wl4 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 + ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 5 + ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 6 + ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 7 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C1]](s32), [[DEF]](s32), [[C3]](s32), [[DEF]](s32), [[C4]](s32), [[C5]](s32), [[C6]](s32), [[C6]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[C2]](s32), [[C1]](s32), [[C]](s32) + ; CHECK-NEXT: $x0 = COPY [[BUILD_VECTOR]](<16 x s32>) + ; CHECK-NEXT: PseudoRET implicit $lr, implicit $x0 + %0:_(s32) = G_CONSTANT i32 0 + %1:_(s32) = G_CONSTANT i32 1 + %2:_(s32) = G_CONSTANT i32 2 + %3:_(s32) = G_CONSTANT i32 3 + %4:_(s32) = G_CONSTANT i32 4 + %5:_(s32) = G_CONSTANT i32 5 + %6:_(s32) = G_CONSTANT i32 6 + %7:_(s32) = G_CONSTANT i32 7 + %8:_(<8 x s32>) = G_BUILD_VECTOR %0(s32), %1(s32), %2(s32), %3(s32), %4(s32), %5(s32), %6(s32), %7(s32) + %9:_(<8 x s32>) = G_BUILD_VECTOR %7(s32), %6(s32), %5(s32), %4(s32), %3(s32), %2(s32), %1(s32), %0(s32) + %10:_(<16 x s32>) = G_SHUFFLE_VECTOR %8:_(<8 x s32>), %9:_, shufflemask(0, 1, -1, 3, -1, 5, 6, 7, 8, -1, -1, -1, -1, 13, 14, 15) + $x0 = COPY %10:_(<16 x s32>) + PseudoRET implicit $lr, implicit $x0 +... +--- +name: shufflevector_order_reverse +legalized: false +body: | + bb.1.entry: + liveins: $wl4 + ; CHECK-LABEL: name: shufflevector_order_reverse + ; CHECK: liveins: $wl4 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 + ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 5 + ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 6 + ; CHECK-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 7 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s32>) = G_BUILD_VECTOR [[C7]](s32), [[C6]](s32), [[C5]](s32), [[C4]](s32), [[C3]](s32), [[C2]](s32), [[C1]](s32), [[C]](s32), [[C]](s32), [[C1]](s32), [[C2]](s32), [[C3]](s32), [[C4]](s32), [[C5]](s32), [[C6]](s32), [[C7]](s32) + ; CHECK-NEXT: $x0 = COPY [[BUILD_VECTOR]](<16 x s32>) + ; CHECK-NEXT: PseudoRET implicit $lr, implicit $x0 + %0:_(s32) = G_CONSTANT i32 0 + %1:_(s32) = G_CONSTANT i32 1 + %2:_(s32) = G_CONSTANT i32 2 + %3:_(s32) = G_CONSTANT i32 3 + %4:_(s32) = G_CONSTANT i32 4 + %5:_(s32) = G_CONSTANT i32 5 + %6:_(s32) = G_CONSTANT i32 6 + %7:_(s32) = G_CONSTANT i32 7 + %8:_(<8 x s32>) = G_BUILD_VECTOR %0(s32), %1(s32), %2(s32), %3(s32), %4(s32), %5(s32), %6(s32), %7(s32) + %9:_(<8 x s32>) = G_BUILD_VECTOR %7(s32), %6(s32), %5(s32), %4(s32), %3(s32), %2(s32), %1(s32), %0(s32) + %10:_(<16 x s32>) = G_SHUFFLE_VECTOR %8:_(<8 x s32>), %9:_, shufflemask(8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7) + $x0 = COPY %10:_(<16 x s32>) + PseudoRET implicit $lr, implicit $x0 +... +--- +name: shufflevector_bv_reg +legalized: false +body: | + bb.1.entry: + liveins: $wl4 + ; CHECK-LABEL: name: shufflevector_bv_reg + ; CHECK: liveins: $wl4 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 28 + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 18 + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 7 + ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 + ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; CHECK-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C1]](s32), [[C2]](s32), [[C3]](s32), [[C4]](s32), [[C5]](s32), [[C6]](s32), [[C7]](s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<8 x s32>) = COPY $wl4 + ; CHECK-NEXT: [[SHUF:%[0-9]+]]:_(<16 x s32>) = G_SHUFFLE_VECTOR [[BUILD_VECTOR]](<8 x s32>), [[COPY]], shufflemask(4, 5, 3, 7, 2, 7, 1, 4, 1, 1, 5, 3, 3, 2, 0, 0) + ; CHECK-NEXT: $x0 = COPY [[SHUF]](<16 x s32>) + ; CHECK-NEXT: PseudoRET implicit $lr, implicit $x0 + %0:_(s32) = G_CONSTANT i32 28 + %1:_(s32) = G_CONSTANT i32 24 + %2:_(s32) = G_CONSTANT i32 18 + %3:_(s32) = G_CONSTANT i32 8 + %4:_(s32) = G_CONSTANT i32 7 + %5:_(s32) = G_CONSTANT i32 3 + %6:_(s32) = G_CONSTANT i32 2 + %7:_(s32) = G_CONSTANT i32 1 + %8:_(<8 x s32>) = G_BUILD_VECTOR %0(s32), %1(s32), %2(s32), %3(s32), %4(s32), %5(s32), %6(s32), %7(s32) + %9:_(<8 x s32>) = COPY $wl4 + %10:_(<16 x s32>) = G_SHUFFLE_VECTOR %8:_(<8 x s32>), %9:_, shufflemask(4, 5, 3, 7, 2, 7, 1, 4, 1, 1, 5, 3, 3, 2, 0, 0) + $x0 = COPY %10:_(<16 x s32>) + PseudoRET implicit $lr, implicit $x0 +... +--- +name: shufflevector_bv_reg_undef +legalized: false +body: | + bb.1.entry: + liveins: $wl4 + ; CHECK-LABEL: name: shufflevector_bv_reg_undef + ; CHECK: liveins: $wl4 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 28 + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 18 + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 7 + ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 + ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; CHECK-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C1]](s32), [[C2]](s32), [[C3]](s32), [[C4]](s32), [[C5]](s32), [[C6]](s32), [[C7]](s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<8 x s32>) = COPY $wl4 + ; CHECK-NEXT: [[SHUF:%[0-9]+]]:_(<16 x s32>) = G_SHUFFLE_VECTOR [[BUILD_VECTOR]](<8 x s32>), [[COPY]], shufflemask(4, 5, 3, 7, undef, 7, 1, undef, 1, undef, 5, undef, 3, undef, 0, 0) + ; CHECK-NEXT: $x0 = COPY [[SHUF]](<16 x s32>) + ; CHECK-NEXT: PseudoRET implicit $lr, implicit $x0 + %0:_(s32) = G_CONSTANT i32 28 + %1:_(s32) = G_CONSTANT i32 24 + %2:_(s32) = G_CONSTANT i32 18 + %3:_(s32) = G_CONSTANT i32 8 + %4:_(s32) = G_CONSTANT i32 7 + %5:_(s32) = G_CONSTANT i32 3 + %6:_(s32) = G_CONSTANT i32 2 + %7:_(s32) = G_CONSTANT i32 1 + %8:_(<8 x s32>) = G_BUILD_VECTOR %0(s32), %1(s32), %2(s32), %3(s32), %4(s32), %5(s32), %6(s32), %7(s32) + %9:_(<8 x s32>) = COPY $wl4 + %10:_(<16 x s32>) = G_SHUFFLE_VECTOR %8:_(<8 x s32>), %9:_, shufflemask(4, 5, 3, 7, -1, 7, 1, -1, 1, -1, 5, -1, 3, -1, 0, 0) + $x0 = COPY %10:_(<16 x s32>) + PseudoRET implicit $lr, implicit $x0 +... +--- +name: shufflevector_bv_undef +legalized: false +body: | + bb.1.entry: + ; CHECK-LABEL: name: shufflevector_bv_undef + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 28 + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 18 + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 7 + ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 + ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s32>) = G_BUILD_VECTOR [[C4]](s32), [[C5]](s32), [[C3]](s32), [[C6]](s32), [[C2]](s32), [[C6]](s32), [[C1]](s32), [[C4]](s32), [[C1]](s32), [[C1]](s32), [[C5]](s32), [[C3]](s32), [[C3]](s32), [[C2]](s32), [[C]](s32), [[C]](s32) + ; CHECK-NEXT: $x0 = COPY [[BUILD_VECTOR]](<16 x s32>) + ; CHECK-NEXT: PseudoRET implicit $lr, implicit $x0 + %0:_(s32) = G_CONSTANT i32 28 + %1:_(s32) = G_CONSTANT i32 24 + %2:_(s32) = G_CONSTANT i32 18 + %3:_(s32) = G_CONSTANT i32 8 + %4:_(s32) = G_CONSTANT i32 7 + %5:_(s32) = G_CONSTANT i32 3 + %6:_(s32) = G_CONSTANT i32 2 + %7:_(s32) = G_CONSTANT i32 1 + %8:_(<8 x s32>) = G_BUILD_VECTOR %0(s32), %1(s32), %2(s32), %3(s32), %4(s32), %5(s32), %6(s32), %7(s32) + %9:_(<8 x s32>) = G_IMPLICIT_DEF + %10:_(<16 x s32>) = G_SHUFFLE_VECTOR %8:_(<8 x s32>), %9:_, shufflemask(4, 5, 3, 7, 2, 7, 1, 4, 1, 1, 5, 3, 3, 2, 0, 0) + $x0 = COPY %10:_(<16 x s32>) + PseudoRET implicit $lr, implicit $x0 +... +--- +name: shufflevector_bv_undef_undef +legalized: false +body: | + bb.1.entry: + ; CHECK-LABEL: name: shufflevector_bv_undef_undef + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 18 + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 7 + ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 + ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s32>) = G_BUILD_VECTOR [[C3]](s32), [[C4]](s32), [[C2]](s32), [[C5]](s32), [[DEF]](s32), [[C5]](s32), [[C]](s32), [[DEF]](s32), [[DEF]](s32), [[C]](s32), [[DEF]](s32), [[C2]](s32), [[DEF]](s32), [[C1]](s32), [[DEF]](s32), [[DEF]](s32) + ; CHECK-NEXT: $x0 = COPY [[BUILD_VECTOR]](<16 x s32>) + ; CHECK-NEXT: PseudoRET implicit $lr, implicit $x0 + %0:_(s32) = G_CONSTANT i32 28 + %1:_(s32) = G_CONSTANT i32 24 + %2:_(s32) = G_CONSTANT i32 18 + %3:_(s32) = G_CONSTANT i32 8 + %4:_(s32) = G_CONSTANT i32 7 + %5:_(s32) = G_CONSTANT i32 3 + %6:_(s32) = G_CONSTANT i32 2 + %7:_(s32) = G_CONSTANT i32 1 + %8:_(<8 x s32>) = G_BUILD_VECTOR %0(s32), %1(s32), %2(s32), %3(s32), %4(s32), %5(s32), %6(s32), %7(s32) + %9:_(<8 x s32>) = G_IMPLICIT_DEF + %10:_(<16 x s32>) = G_SHUFFLE_VECTOR %8:_(<8 x s32>), %9:_, shufflemask(4, 5, 3, 7, -1, 7, 1, -1, -1, 1, -1, 3, -1, 2, -1, -1) + $x0 = COPY %10:_(<16 x s32>) + PseudoRET implicit $lr, implicit $x0 +... +--- +name: shufflevector_reg_bv +legalized: false +body: | + bb.1.entry: + ; CHECK-LABEL: name: shufflevector_reg_bv + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 28 + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 18 + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 7 + ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 + ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; CHECK-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C1]](s32), [[C2]](s32), [[C3]](s32), [[C4]](s32), [[C5]](s32), [[C6]](s32), [[C7]](s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<8 x s32>) = COPY $wl4 + ; CHECK-NEXT: [[SHUF:%[0-9]+]]:_(<16 x s32>) = G_SHUFFLE_VECTOR [[COPY]](<8 x s32>), [[BUILD_VECTOR]], shufflemask(12, 13, 11, 15, 9, 9, 8, 12, 8, 8, 13, 12, 12, 10, 8, 9) + ; CHECK-NEXT: $x0 = COPY [[SHUF]](<16 x s32>) + ; CHECK-NEXT: PseudoRET implicit $lr, implicit $x0 + %0:_(s32) = G_CONSTANT i32 28 + %1:_(s32) = G_CONSTANT i32 24 + %2:_(s32) = G_CONSTANT i32 18 + %3:_(s32) = G_CONSTANT i32 8 + %4:_(s32) = G_CONSTANT i32 7 + %5:_(s32) = G_CONSTANT i32 3 + %6:_(s32) = G_CONSTANT i32 2 + %7:_(s32) = G_CONSTANT i32 1 + %8:_(<8 x s32>) = G_BUILD_VECTOR %0(s32), %1(s32), %2(s32), %3(s32), %4(s32), %5(s32), %6(s32), %7(s32) + %9:_(<8 x s32>) = COPY $wl4 + %10:_(<16 x s32>) = G_SHUFFLE_VECTOR %9:_(<8 x s32>), %8:_, shufflemask(12, 13, 11, 15, 9, 9, 8, 12, 8, 8, 13, 12, 12, 10, 8, 9) + $x0 = COPY %10:_(<16 x s32>) + PseudoRET implicit $lr, implicit $x0 +... +--- +name: shufflevector_undef_bv +legalized: false +body: | + bb.1.entry: + ; CHECK-LABEL: name: shufflevector_undef_bv + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 18 + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 7 + ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 + ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s32>) = G_BUILD_VECTOR [[C3]](s32), [[C4]](s32), [[C2]](s32), [[C6]](s32), [[C]](s32), [[C]](s32), [[C1]](s32), [[C3]](s32), [[C3]](s32), [[C5]](s32), [[C4]](s32), [[C3]](s32), [[C3]](s32), [[C1]](s32), [[C5]](s32), [[C]](s32) + ; CHECK-NEXT: $x0 = COPY [[BUILD_VECTOR]](<16 x s32>) + ; CHECK-NEXT: PseudoRET implicit $lr, implicit $x0 + %0:_(s32) = G_CONSTANT i32 28 + %1:_(s32) = G_CONSTANT i32 24 + %2:_(s32) = G_CONSTANT i32 18 + %3:_(s32) = G_CONSTANT i32 8 + %4:_(s32) = G_CONSTANT i32 7 + %5:_(s32) = G_CONSTANT i32 3 + %6:_(s32) = G_CONSTANT i32 2 + %7:_(s32) = G_CONSTANT i32 1 + %8:_(<8 x s32>) = G_BUILD_VECTOR %0(s32), %1(s32), %2(s32), %3(s32), %4(s32), %5(s32), %6(s32), %7(s32) + %9:_(<8 x s32>) = G_IMPLICIT_DEF + %10:_(<16 x s32>) = G_SHUFFLE_VECTOR %9:_(<8 x s32>), %8:_, shufflemask(12, 13, 11, 15, 9, 9, 10, 12, 12, 14, 13, 12, 12, 10, 14, 9) + $x0 = COPY %10:_(<16 x s32>) + PseudoRET implicit $lr, implicit $x0 +... +--- +name: shufflevector_undef_undef +legalized: false +body: | + bb.1.entry: + ; CHECK-LABEL: name: shufflevector_undef_undef + ; CHECK: [[DEF:%[0-9]+]]:_(<16 x s32>) = G_IMPLICIT_DEF + ; CHECK-NEXT: $x0 = COPY [[DEF]](<16 x s32>) + ; CHECK-NEXT: PseudoRET implicit $lr, implicit $x0 + %0:_(<8 x s32>) = G_IMPLICIT_DEF + %1:_(<8 x s32>) = G_IMPLICIT_DEF + %2:_(<16 x s32>) = G_SHUFFLE_VECTOR %0:_(<8 x s32>), %1:_, shufflemask(12, 13, 11, 15, 9, 9, 8, 12, 8, 8, 13, 12, 12, 10, 8, 9) + $x0 = COPY %2:_(<16 x s32>) + PseudoRET implicit $lr, implicit $x0 +... +--- +name: shufflevector_reg_undef +legalized: false +body: | + bb.1.entry: + liveins: $wl2 + ; CHECK-LABEL: name: shufflevector_reg_undef + ; CHECK: liveins: $wl2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<8 x s32>) = COPY $wl2 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<8 x s32>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[SHUF:%[0-9]+]]:_(<16 x s32>) = G_SHUFFLE_VECTOR [[COPY]](<8 x s32>), [[DEF]], shufflemask(12, 13, 11, 15, 9, 9, 8, 12, 8, 8, 13, 12, 12, 10, 8, 9) + ; CHECK-NEXT: $x0 = COPY [[SHUF]](<16 x s32>) + ; CHECK-NEXT: PseudoRET implicit $lr, implicit $x0 + %0:_(<8 x s32>) = COPY $wl2 + %1:_(<8 x s32>) = G_IMPLICIT_DEF + %2:_(<16 x s32>) = G_SHUFFLE_VECTOR %0:_(<8 x s32>), %1:_, shufflemask(12, 13, 11, 15, 9, 9, 8, 12, 8, 8, 13, 12, 12, 10, 8, 9) + $x0 = COPY %2:_(<16 x s32>) + PseudoRET implicit $lr, implicit $x0 +... +--- +name: shufflevector_undef_reg +legalized: false +body: | + bb.1.entry: + liveins: $wl2, $wl4 + ; CHECK-LABEL: name: shufflevector_undef_reg + ; CHECK: liveins: $wl2, $wl4 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<8 x s32>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<8 x s32>) = COPY $wl2 + ; CHECK-NEXT: [[SHUF:%[0-9]+]]:_(<16 x s32>) = G_SHUFFLE_VECTOR [[DEF]](<8 x s32>), [[COPY]], shufflemask(12, 13, 11, 15, 9, 9, 8, 12, 8, 8, 13, 12, 12, 10, 8, 9) + ; CHECK-NEXT: $x0 = COPY [[SHUF]](<16 x s32>) + ; CHECK-NEXT: PseudoRET implicit $lr, implicit $x0 + %0:_(<8 x s32>) = G_IMPLICIT_DEF + %1:_(<8 x s32>) = COPY $wl2 + %2:_(<16 x s32>) = G_SHUFFLE_VECTOR %0:_(<8 x s32>), %1:_, shufflemask(12, 13, 11, 15, 9, 9, 8, 12, 8, 8, 13, 12, 12, 10, 8, 9) + $x0 = COPY %2:_(<16 x s32>) + PseudoRET implicit $lr, implicit $x0 +... +--- +name: shufflevector_broadcast +legalized: false +body: | + bb.1.entry: + liveins: $r0 + ; CHECK-LABEL: name: shufflevector_broadcast + ; CHECK: liveins: $r0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $r0 + ; CHECK-NEXT: [[AIE_BROADCAST_VECTOR:%[0-9]+]]:_(<16 x s32>) = G_AIE_BROADCAST_VECTOR [[COPY]](s32) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<8 x s32>), [[UV1:%[0-9]+]]:_(<8 x s32>) = G_UNMERGE_VALUES [[AIE_BROADCAST_VECTOR]](<16 x s32>) + ; CHECK-NEXT: PseudoRET implicit $lr, implicit [[UV]](<8 x s32>) + %1:_(s32) = COPY $r0 + %3:_(<8 x s32>) = G_IMPLICIT_DEF + %5:_(s32) = G_IMPLICIT_DEF + %2:_(<8 x s32>) = G_BUILD_VECTOR %1(s32), %5(s32), %5(s32), %5(s32), %5(s32), %5(s32), %5(s32), %5(s32) + %0:_(<8 x s32>) = G_SHUFFLE_VECTOR %2(<8 x s32>), %3, shufflemask(0, 0, 0, 0, 0, 0, 0, 0) + PseudoRET implicit $lr, implicit %0 +... diff --git a/llvm/test/CodeGen/AIE/aie2/GlobalISel/prelegalizercombiner-shufflevector.mir b/llvm/test/CodeGen/AIE/aie2/GlobalISel/prelegalizercombiner-shufflevector.mir new file mode 100644 index 000000000000..6b8785691eb0 --- /dev/null +++ b/llvm/test/CodeGen/AIE/aie2/GlobalISel/prelegalizercombiner-shufflevector.mir @@ -0,0 +1,986 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# +# This file is licensed under the Apache License v2.0 with LLVM Exceptions. +# See https://llvm.org/LICENSE.txt for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +# +# (c) Copyright 2023-2024 Advanced Micro Devices, Inc. or its affiliates +# RUN: llc -mtriple aie2 -run-pass=aie2-prelegalizer-combiner %s -verify-machineinstrs -o - | FileCheck %s + +--- +name: concat_vector_32_512 +legalized: false +body: | + bb.1.entry: + liveins: $wl2, $wl4 + ; CHECK-LABEL: name: concat_vector_32_512 + ; CHECK: liveins: $wl2, $wl4 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<8 x s32>) = COPY $wl2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<8 x s32>) = COPY $wl4 + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<16 x s32>) = G_CONCAT_VECTORS [[COPY]](<8 x s32>), [[COPY1]](<8 x s32>) + ; CHECK-NEXT: $x0 = COPY [[CONCAT_VECTORS]](<16 x s32>) + ; CHECK-NEXT: PseudoRET implicit $lr, implicit $x0 + %1:_(<8 x s32>) = COPY $wl2 + %2:_(<8 x s32>) = COPY $wl4 + %0:_(<16 x s32>) = G_SHUFFLE_VECTOR %1:_(<8 x s32>), %2:_, shufflemask(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) + $x0 = COPY %0:_(<16 x s32>) + PseudoRET implicit $lr, implicit $x0 +... + +--- +name: concat_vector_32_1024 +legalized: false +body: | + bb.1.entry: + liveins: $x0, $x1 + ; CHECK-LABEL: name: concat_vector_32_1024 + ; CHECK: liveins: $x0, $x1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<16 x s32>) = COPY $x0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<16 x s32>) = COPY $x1 + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<32 x s32>) = G_CONCAT_VECTORS [[COPY]](<16 x s32>), [[COPY1]](<16 x s32>) + ; CHECK-NEXT: $y2 = COPY [[CONCAT_VECTORS]](<32 x s32>) + ; CHECK-NEXT: PseudoRET implicit $lr, implicit $y2 + %1:_(<16 x s32>) = COPY $x0 + %2:_(<16 x s32>) = COPY $x1 + %0:_(<32 x s32>) = G_SHUFFLE_VECTOR %1:_(<16 x s32>), %2:_, shufflemask(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31) + $y2 = COPY %0:_(<32 x s32>) + PseudoRET implicit $lr, implicit $y2 +... + +--- +name: concat_vector_32_256 +legalized: false +body: | + bb.1.entry: + liveins: $wl0 + ; CHECK-LABEL: name: concat_vector_32_256 + ; CHECK: liveins: $wl0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<8 x s32>) = COPY $wl0 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[COPY]](<8 x s32>) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s32>) = G_CONCAT_VECTORS [[UV1]](<4 x s32>), [[UV]](<4 x s32>) + ; CHECK-NEXT: $wl0 = COPY [[CONCAT_VECTORS]](<8 x s32>) + ; CHECK-NEXT: PseudoRET implicit $lr, implicit $wl0 + %1:_(<8 x s32>) = COPY $wl0 + %2:_(<4 x s32>), %3:_(<4 x s32>) = G_UNMERGE_VALUES %1:_(<8 x s32>) + %0:_(<8 x s32>) = G_SHUFFLE_VECTOR %3:_(<4 x s32>), %2:_, shufflemask(0, 1, 2, 3, 4, 5, 6, 7) + $wl0 = COPY %0:_(<8 x s32>) + PseudoRET implicit $lr, implicit $wl0 +... + +--- +name: concat_vector_16_512 +legalized: false +body: | + bb.1.entry: + liveins: $wl2, $wl4 + ; CHECK-LABEL: name: concat_vector_16_512 + ; CHECK: liveins: $wl2, $wl4 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<16 x s16>) = COPY $wl2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<16 x s16>) = COPY $wl4 + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<32 x s16>) = G_CONCAT_VECTORS [[COPY]](<16 x s16>), [[COPY1]](<16 x s16>) + ; CHECK-NEXT: $x0 = COPY [[CONCAT_VECTORS]](<32 x s16>) + ; CHECK-NEXT: PseudoRET implicit $lr, implicit $x0 + %1:_(<16 x s16>) = COPY $wl2 + %2:_(<16 x s16>) = COPY $wl4 + %0:_(<32 x s16>) = G_SHUFFLE_VECTOR %1:_(<16 x s16>), %2:_, shufflemask(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31) + $x0 = COPY %0:_(<32 x s16>) + PseudoRET implicit $lr, implicit $x0 +... + +--- +name: concat_vector_8_512 +legalized: false +body: | + bb.1.entry: + liveins: $wl2, $wl4 + ; CHECK-LABEL: name: concat_vector_8_512 + ; CHECK: liveins: $wl2, $wl4 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<32 x s8>) = COPY $wl2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<32 x s8>) = COPY $wl4 + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<64 x s8>) = G_CONCAT_VECTORS [[COPY]](<32 x s8>), [[COPY1]](<32 x s8>) + ; CHECK-NEXT: $x0 = COPY [[CONCAT_VECTORS]](<64 x s8>) + ; CHECK-NEXT: PseudoRET implicit $lr, implicit $x0 + %1:_(<32 x s8>) = COPY $wl2 + %2:_(<32 x s8>) = COPY $wl4 + %0:_(<64 x s8>) = G_SHUFFLE_VECTOR %1:_(<32 x s8>), %2:_, shufflemask(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63) + $x0 = COPY %0:_(<64 x s8>) + PseudoRET implicit $lr, implicit $x0 +... + +--- +name: concat_vector_32_512_second_end +legalized: false +body: | + bb.1.entry: + liveins: $wl2, $wl4 + ; CHECK-LABEL: name: concat_vector_32_512_second_end + ; CHECK: liveins: $wl2, $wl4 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<8 x s32>) = COPY $wl2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<8 x s32>) = COPY $wl4 + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<16 x s32>) = G_CONCAT_VECTORS [[COPY]](<8 x s32>), [[COPY1]](<8 x s32>) + ; CHECK-NEXT: $x0 = COPY [[CONCAT_VECTORS]](<16 x s32>) + ; CHECK-NEXT: PseudoRET implicit $lr, implicit $x0 + %1:_(<8 x s32>) = COPY $wl2 + %2:_(<8 x s32>) = COPY $wl4 + %0:_(<16 x s32>) = G_SHUFFLE_VECTOR %1:_(<8 x s32>), %2:_, shufflemask(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, -1, -1, -1, -1, -1) + $x0 = COPY %0:_(<16 x s32>) + PseudoRET implicit $lr, implicit $x0 +... + +--- +name: extract_vector_1024_to_512 +legalized: false +body: | + bb.1.entry: + liveins: $y2 + ; CHECK-LABEL: name: extract_vector_1024_to_512 + ; CHECK: liveins: $y2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<32 x s32>) = COPY $y2 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<16 x s32>), [[UV1:%[0-9]+]]:_(<16 x s32>) = G_UNMERGE_VALUES [[COPY]](<32 x s32>) + ; CHECK-NEXT: $x0 = COPY [[UV]](<16 x s32>) + ; CHECK-NEXT: PseudoRET implicit $lr, implicit $x0 + %1:_(<32 x s32>) = COPY $y2 + %0:_(<16 x s32>) = G_SHUFFLE_VECTOR %1:_(<32 x s32>), %1:_, shufflemask(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) + $x0 = COPY %0:_(<16 x s32>) + PseudoRET implicit $lr, implicit $x0 +... + +--- +name: concat_vector_32_512_first_start +legalized: false +body: | + bb.1.entry: + liveins: $wl2, $wl4 + ; CHECK-LABEL: name: concat_vector_32_512_first_start + ; CHECK: liveins: $wl2, $wl4 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<8 x s32>) = COPY $wl2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<8 x s32>) = COPY $wl4 + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<16 x s32>) = G_CONCAT_VECTORS [[COPY]](<8 x s32>), [[COPY1]](<8 x s32>) + ; CHECK-NEXT: $x0 = COPY [[CONCAT_VECTORS]](<16 x s32>) + ; CHECK-NEXT: PseudoRET implicit $lr, implicit $x0 + %1:_(<8 x s32>) = COPY $wl2 + %2:_(<8 x s32>) = COPY $wl4 + %0:_(<16 x s32>) = G_SHUFFLE_VECTOR %1:_(<8 x s32>), %2:_, shufflemask(-1, -1, -1, -1, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) + $x0 = COPY %0:_(<16 x s32>) + PseudoRET implicit $lr, implicit $x0 +... + +--- +name: extract_vector_1024_to_256 +legalized: false +body: | + bb.1.entry: + liveins: $y2 + ; CHECK-LABEL: name: extract_vector_1024_to_256 + ; CHECK: liveins: $y2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<32 x s32>) = COPY $y2 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<16 x s32>), [[UV1:%[0-9]+]]:_(<16 x s32>) = G_UNMERGE_VALUES [[COPY]](<32 x s32>) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(<8 x s32>), [[UV3:%[0-9]+]]:_(<8 x s32>) = G_UNMERGE_VALUES [[UV]](<16 x s32>) + ; CHECK-NEXT: $wl0 = COPY [[UV2]](<8 x s32>) + ; CHECK-NEXT: PseudoRET implicit $lr, implicit $x0 + %1:_(<32 x s32>) = COPY $y2 + %0:_(<8 x s32>) = G_SHUFFLE_VECTOR %1:_(<32 x s32>), %1:_, shufflemask(0, 1, 2, 3, 4, 5, 6, 7) + $wl0 = COPY %0:_(<8 x s32>) + PseudoRET implicit $lr, implicit $x0 +... + +--- +name: concat_vector_32_512_first_end +legalized: false +body: | + bb.1.entry: + liveins: $wl2, $wl4 + ; CHECK-LABEL: name: concat_vector_32_512_first_end + ; CHECK: liveins: $wl2, $wl4 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<8 x s32>) = COPY $wl2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<8 x s32>) = COPY $wl4 + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<16 x s32>) = G_CONCAT_VECTORS [[COPY]](<8 x s32>), [[COPY1]](<8 x s32>) + ; CHECK-NEXT: $x0 = COPY [[CONCAT_VECTORS]](<16 x s32>) + ; CHECK-NEXT: PseudoRET implicit $lr, implicit $x0 + %1:_(<8 x s32>) = COPY $wl2 + %2:_(<8 x s32>) = COPY $wl4 + %0:_(<16 x s32>) = G_SHUFFLE_VECTOR %1:_(<8 x s32>), %2:_, shufflemask(0, 1, 2, 3, -1, -1, -1, -1, 8, 9, 10, 11, 12, 13, 14, 15) + $x0 = COPY %0:_(<16 x s32>) + PseudoRET implicit $lr, implicit $x0 +... + +--- +name: concat_vector_32_512_second_start +legalized: false +body: | + bb.1.entry: + liveins: $wl2, $wl4 + ; CHECK-LABEL: name: concat_vector_32_512_second_start + ; CHECK: liveins: $wl2, $wl4 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<8 x s32>) = COPY $wl2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<8 x s32>) = COPY $wl4 + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<16 x s32>) = G_CONCAT_VECTORS [[COPY]](<8 x s32>), [[COPY1]](<8 x s32>) + ; CHECK-NEXT: $x0 = COPY [[CONCAT_VECTORS]](<16 x s32>) + ; CHECK-NEXT: PseudoRET implicit $lr, implicit $x0 + %1:_(<8 x s32>) = COPY $wl2 + %2:_(<8 x s32>) = COPY $wl4 + %0:_(<16 x s32>) = G_SHUFFLE_VECTOR %1:_(<8 x s32>), %2:_, shufflemask(0, 1, 2, 3, 4, 5, 6, 7, -1, -1, -1, -1, 12, 13, 14, 15) + $x0 = COPY %0:_(<16 x s32>) + PseudoRET implicit $lr, implicit $x0 +... + +--- +name: concat_vector_32_512_first_block +legalized: false +body: | + bb.1.entry: + liveins: $wl2, $wl4 + ; CHECK-LABEL: name: concat_vector_32_512_first_block + ; CHECK: liveins: $wl2, $wl4 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<8 x s32>) = COPY $wl4 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<8 x s32>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<16 x s32>) = G_CONCAT_VECTORS [[DEF]](<8 x s32>), [[COPY]](<8 x s32>) + ; CHECK-NEXT: $x0 = COPY [[CONCAT_VECTORS]](<16 x s32>) + ; CHECK-NEXT: PseudoRET implicit $lr, implicit $x0 + %1:_(<8 x s32>) = COPY $wl2 + %2:_(<8 x s32>) = COPY $wl4 + %0:_(<16 x s32>) = G_SHUFFLE_VECTOR %1:_(<8 x s32>), %2:_, shufflemask(-1, -1, -1, -1, -1, -1, -1, -1, 8, 9, 10, 11, 12, 13, 14, 15) + $x0 = COPY %0:_(<16 x s32>) + PseudoRET implicit $lr, implicit $x0 +... + +--- +name: concat_vector_32_512_second_block +legalized: false +body: | + bb.1.entry: + liveins: $wl2, $wl4 + ; CHECK-LABEL: name: concat_vector_32_512_second_block + ; CHECK: liveins: $wl2, $wl4 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<8 x s32>) = COPY $wl2 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<8 x s32>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<16 x s32>) = G_CONCAT_VECTORS [[COPY]](<8 x s32>), [[DEF]](<8 x s32>) + ; CHECK-NEXT: $x0 = COPY [[CONCAT_VECTORS]](<16 x s32>) + ; CHECK-NEXT: PseudoRET implicit $lr, implicit $x0 + %1:_(<8 x s32>) = COPY $wl2 + %2:_(<8 x s32>) = COPY $wl4 + %0:_(<16 x s32>) = G_SHUFFLE_VECTOR %1:_(<8 x s32>), %2:_, shufflemask(0, 1, 2, 3, 4, 5, 6, 7, -1, -1, -1, -1, -1, -1, -1, -1) + $x0 = COPY %0:_(<16 x s32>) + PseudoRET implicit $lr, implicit $x0 +... + +--- +name: concat_vector_32_512_random +legalized: false +body: | + bb.1.entry: + liveins: $wl2, $wl4 + ; CHECK-LABEL: name: concat_vector_32_512_random + ; CHECK: liveins: $wl2, $wl4 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<8 x s32>) = COPY $wl2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<8 x s32>) = COPY $wl4 + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<16 x s32>) = G_CONCAT_VECTORS [[COPY]](<8 x s32>), [[COPY1]](<8 x s32>) + ; CHECK-NEXT: $x0 = COPY [[CONCAT_VECTORS]](<16 x s32>) + ; CHECK-NEXT: PseudoRET implicit $lr, implicit $x0 + %1:_(<8 x s32>) = COPY $wl2 + %2:_(<8 x s32>) = COPY $wl4 + %0:_(<16 x s32>) = G_SHUFFLE_VECTOR %1:_(<8 x s32>), %2:_, shufflemask(0, -1, 2, -1, 4, -1, -1, 7, 8, 9, -1, 11, 12, -1, 14, -1) + $x0 = COPY %0:_(<16 x s32>) + PseudoRET implicit $lr, implicit $x0 +... + +--- +name: extract_vector_1024_to_128 +legalized: false +body: | + bb.1.entry: + liveins: $y2 + ; CHECK-LABEL: name: extract_vector_1024_to_128 + ; CHECK: liveins: $y2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<32 x s32>) = COPY $y2 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<16 x s32>), [[UV1:%[0-9]+]]:_(<16 x s32>) = G_UNMERGE_VALUES [[COPY]](<32 x s32>) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(<8 x s32>), [[UV3:%[0-9]+]]:_(<8 x s32>) = G_UNMERGE_VALUES [[UV]](<16 x s32>) + ; CHECK-NEXT: [[AIE_UNPAD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_AIE_UNPAD_VECTOR [[UV2]](<8 x s32>) + ; CHECK-NEXT: PseudoRET implicit $lr, implicit [[AIE_UNPAD_VECTOR]](<4 x s32>) + %1:_(<32 x s32>) = COPY $y2 + %0:_(<4 x s32>) = G_SHUFFLE_VECTOR %1:_(<32 x s32>), %1:_, shufflemask(0, 1, 2, 3) + PseudoRET implicit $lr, implicit %0 +... + +--- +name: extract_vector_1024_to_32 +legalized: false +body: | + bb.1.entry: + liveins: $y2 + ; CHECK-LABEL: name: extract_vector_1024_to_32 + ; CHECK: liveins: $y2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<128 x s8>) = COPY $y2 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<64 x s8>), [[UV1:%[0-9]+]]:_(<64 x s8>) = G_UNMERGE_VALUES [[COPY]](<128 x s8>) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(<32 x s8>), [[UV3:%[0-9]+]]:_(<32 x s8>) = G_UNMERGE_VALUES [[UV]](<64 x s8>) + ; CHECK-NEXT: [[AIE_UNPAD_VECTOR:%[0-9]+]]:_(<16 x s8>) = G_AIE_UNPAD_VECTOR [[UV2]](<32 x s8>) + ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(<8 x s8>), [[UV5:%[0-9]+]]:_(<8 x s8>) = G_UNMERGE_VALUES [[AIE_UNPAD_VECTOR]](<16 x s8>) + ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(<4 x s8>), [[UV7:%[0-9]+]]:_(<4 x s8>) = G_UNMERGE_VALUES [[UV4]](<8 x s8>) + ; CHECK-NEXT: [[UV8:%[0-9]+]]:_(<2 x s8>), [[UV9:%[0-9]+]]:_(<2 x s8>) = G_UNMERGE_VALUES [[UV6]](<4 x s8>) + ; CHECK-NEXT: PseudoRET implicit $lr, implicit [[UV8]](<2 x s8>) + %1:_(<128 x s8>) = COPY $y2 + %0:_(<2 x s8>) = G_SHUFFLE_VECTOR %1:_(<128 x s8>), %1:_, shufflemask(0, 1) + PseudoRET implicit $lr, implicit %0 +... + +--- +name: extract_vector_second_half_512_to_256 +legalized: false +body: | + bb.1.entry: + liveins: $x0, $x1 + ; CHECK-LABEL: name: extract_vector_second_half_512_to_256 + ; CHECK: liveins: $x0, $x1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<16 x s32>) = COPY $x0 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<8 x s32>), [[UV1:%[0-9]+]]:_(<8 x s32>) = G_UNMERGE_VALUES [[COPY]](<16 x s32>) + ; CHECK-NEXT: PseudoRET implicit $lr, implicit [[UV1]](<8 x s32>) + %1:_(<16 x s32>) = COPY $x0 + %2:_(<8 x s32>) = G_SHUFFLE_VECTOR %1:_(<16 x s32>), %1:_(<16 x s32>), shufflemask(8, 9, 10, 11, 12, 13, 14, 15) + PseudoRET implicit $lr, implicit %2 +... + +--- +name: extract_vector_second_half_512_to_128 +legalized: false +body: | + bb.1.entry: + liveins: $x0, $x1 + ; CHECK-LABEL: name: extract_vector_second_half_512_to_128 + ; CHECK: liveins: $x0, $x1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<16 x s32>) = COPY $x0 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<8 x s32>), [[UV1:%[0-9]+]]:_(<8 x s32>) = G_UNMERGE_VALUES [[COPY]](<16 x s32>) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(<4 x s32>), [[UV3:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[UV]](<8 x s32>) + ; CHECK-NEXT: PseudoRET implicit $lr, implicit [[UV3]](<4 x s32>) + %1:_(<16 x s32>) = COPY $x0 + %2:_(<4 x s32>) = G_SHUFFLE_VECTOR %1:_(<16 x s32>), %1:_(<16 x s32>), shufflemask(4, 5, 6, 7) + PseudoRET implicit $lr, implicit %2 +... + +--- +name: extract_vector_second_half_1024_to_512 +legalized: false +body: | + bb.1.entry: + liveins: $y2, $y3 + ; CHECK-LABEL: name: extract_vector_second_half_1024_to_512 + ; CHECK: liveins: $y2, $y3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<128 x s8>) = COPY $y2 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<64 x s8>), [[UV1:%[0-9]+]]:_(<64 x s8>) = G_UNMERGE_VALUES [[COPY]](<128 x s8>) + ; CHECK-NEXT: PseudoRET implicit $lr, implicit [[UV1]](<64 x s8>) + %1:_(<128 x s8>) = COPY $y2 + %2:_(<64 x s8>) = G_SHUFFLE_VECTOR %1:_(<128 x s8>), %1:_(<128 x s8>), shufflemask(64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127) + PseudoRET implicit $lr, implicit %2 +... + +--- +name: extract_vector_second_half_1024_to_32 +legalized: false +body: | + bb.1.entry: + liveins: $y2, $y3 + ; CHECK-LABEL: name: extract_vector_second_half_1024_to_32 + ; CHECK: liveins: $y2, $y3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<128 x s8>) = COPY $y2 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<64 x s8>), [[UV1:%[0-9]+]]:_(<64 x s8>) = G_UNMERGE_VALUES [[COPY]](<128 x s8>) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(<32 x s8>), [[UV3:%[0-9]+]]:_(<32 x s8>) = G_UNMERGE_VALUES [[UV]](<64 x s8>) + ; CHECK-NEXT: [[AIE_UNPAD_VECTOR:%[0-9]+]]:_(<16 x s8>) = G_AIE_UNPAD_VECTOR [[UV2]](<32 x s8>) + ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(<8 x s8>), [[UV5:%[0-9]+]]:_(<8 x s8>) = G_UNMERGE_VALUES [[AIE_UNPAD_VECTOR]](<16 x s8>) + ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(<4 x s8>), [[UV7:%[0-9]+]]:_(<4 x s8>) = G_UNMERGE_VALUES [[UV4]](<8 x s8>) + ; CHECK-NEXT: PseudoRET implicit $lr, implicit [[UV7]](<4 x s8>) + %1:_(<128 x s8>) = COPY $y2 + %2:_(<4 x s8>) = G_SHUFFLE_VECTOR %1:_(<128 x s8>), %1:_(<128 x s8>), shufflemask(4, 5, 6, 7) + PseudoRET implicit $lr, implicit %2 +... + +--- +name: extract_vector_third_half_1024 +legalized: false +body: | + bb.1.entry: + liveins: $y2, $y3 + ; CHECK-LABEL: name: extract_vector_third_half_1024 + ; CHECK: liveins: $y2, $y3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<32 x s32>) = COPY $y3 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<16 x s32>), [[UV1:%[0-9]+]]:_(<16 x s32>) = G_UNMERGE_VALUES [[COPY]](<32 x s32>) + ; CHECK-NEXT: PseudoRET implicit $lr, implicit [[UV]](<16 x s32>) + %1:_(<32 x s32>) = COPY $y2 + %2:_(<32 x s32>) = COPY $y3 + %0:_(<16 x s32>) = G_SHUFFLE_VECTOR %1:_(<32 x s32>), %2:_, shufflemask(32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47) + PseudoRET implicit $lr, implicit %0 +... + +--- +name: extract_vector_third_half_512 +legalized: false +body: | + bb.1.entry: + liveins: $x0, $x1 + ; CHECK-LABEL: name: extract_vector_third_half_512 + ; CHECK: liveins: $x0, $x1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<16 x s32>) = COPY $x1 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<8 x s32>), [[UV1:%[0-9]+]]:_(<8 x s32>) = G_UNMERGE_VALUES [[COPY]](<16 x s32>) + ; CHECK-NEXT: PseudoRET implicit $lr, implicit [[UV]](<8 x s32>) + %1:_(<16 x s32>) = COPY $x0 + %2:_(<16 x s32>) = COPY $x1 + %0:_(<8 x s32>) = G_SHUFFLE_VECTOR %1:_(<16 x s32>), %2:_, shufflemask(16, 17, 18, 19, 20, 21, 22, 23) + PseudoRET implicit $lr, implicit %0 +... + +--- +name: extract_vector_third_half_256 +legalized: false +body: | + bb.1.entry: + liveins: $wl0, $wl1 + ; CHECK-LABEL: name: extract_vector_third_half_256 + ; CHECK: liveins: $wl0, $wl1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<8 x s32>) = COPY $wl1 + ; CHECK-NEXT: [[AIE_UNPAD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_AIE_UNPAD_VECTOR [[COPY]](<8 x s32>) + ; CHECK-NEXT: PseudoRET implicit $lr, implicit [[AIE_UNPAD_VECTOR]](<4 x s32>) + %1:_(<8 x s32>) = COPY $wl0 + %2:_(<8 x s32>) = COPY $wl1 + %0:_(<4 x s32>) = G_SHUFFLE_VECTOR %1:_(<8 x s32>), %2:_, shufflemask(8, 9, 10, 11) + PseudoRET implicit $lr, implicit %0 +... + +--- +name: extract_vector_third_half_128 +legalized: false +body: | + bb.1.entry: + liveins: $q0, $q1 + ; CHECK-LABEL: name: extract_vector_third_half_128 + ; CHECK: liveins: $q0, $q1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q1 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s32>), [[UV1:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) + ; CHECK-NEXT: PseudoRET implicit $lr, implicit [[UV]](<2 x s32>) + %1:_(<4 x s32>) = COPY $q0 + %2:_(<4 x s32>) = COPY $q1 + %0:_(<2 x s32>) = G_SHUFFLE_VECTOR %1:_(<4 x s32>), %2:_, shufflemask(4, 5) + PseudoRET implicit $lr, implicit %0 +... + +--- +name: extract_vector_fourth_half_1024 +legalized: false +body: | + bb.1.entry: + liveins: $y2, $y3 + ; CHECK-LABEL: name: extract_vector_fourth_half_1024 + ; CHECK: liveins: $y2, $y3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<32 x s32>) = COPY $y3 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<16 x s32>), [[UV1:%[0-9]+]]:_(<16 x s32>) = G_UNMERGE_VALUES [[COPY]](<32 x s32>) + ; CHECK-NEXT: PseudoRET implicit $lr, implicit [[UV1]](<16 x s32>) + %1:_(<32 x s32>) = COPY $y2 + %2:_(<32 x s32>) = COPY $y3 + %0:_(<16 x s32>) = G_SHUFFLE_VECTOR %1:_(<32 x s32>), %2:_, shufflemask(48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63) + PseudoRET implicit $lr, implicit %0 +... + +--- +name: extract_vector_fourth_half_512 +legalized: false +body: | + bb.1.entry: + liveins: $x0, $x1 + ; CHECK-LABEL: name: extract_vector_fourth_half_512 + ; CHECK: liveins: $x0, $x1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<16 x s32>) = COPY $x1 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<8 x s32>), [[UV1:%[0-9]+]]:_(<8 x s32>) = G_UNMERGE_VALUES [[COPY]](<16 x s32>) + ; CHECK-NEXT: PseudoRET implicit $lr, implicit [[UV1]](<8 x s32>) + %1:_(<16 x s32>) = COPY $x0 + %2:_(<16 x s32>) = COPY $x1 + %0:_(<8 x s32>) = G_SHUFFLE_VECTOR %1:_(<16 x s32>), %2:_, shufflemask(24,25,26,27,28,29,30,31) + PseudoRET implicit $lr, implicit %0 +... + +--- +name: extract_vector_fourth_half_256 +legalized: false +body: | + bb.1.entry: + liveins: $wl0, $wl1 + ; CHECK-LABEL: name: extract_vector_fourth_half_256 + ; CHECK: liveins: $wl0, $wl1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<8 x s32>) = COPY $wl1 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[COPY]](<8 x s32>) + ; CHECK-NEXT: PseudoRET implicit $lr, implicit [[UV1]](<4 x s32>) + %1:_(<8 x s32>) = COPY $wl0 + %2:_(<8 x s32>) = COPY $wl1 + %0:_(<4 x s32>) = G_SHUFFLE_VECTOR %1:_(<8 x s32>), %2:_, shufflemask(12,13,14,15) + PseudoRET implicit $lr, implicit %0 +... + +--- +name: extract_vector_fourth_half_128 +legalized: false +body: | + bb.1.entry: + liveins: $q0, $q1 + ; CHECK-LABEL: name: extract_vector_fourth_half_128 + ; CHECK: liveins: $q0, $q1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q1 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s32>), [[UV1:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) + ; CHECK-NEXT: PseudoRET implicit $lr, implicit [[UV1]](<2 x s32>) + %1:_(<4 x s32>) = COPY $q0 + %2:_(<4 x s32>) = COPY $q1 + %0:_(<2 x s32>) = G_SHUFFLE_VECTOR %1:_(<4 x s32>), %2:_, shufflemask(6,7) + PseudoRET implicit $lr, implicit %0 +... + +--- +name: insert_vector_16_elements +legalized: false +body: | + bb.1.entry: + liveins: $x0, $x1 + ; CHECK-LABEL: name: insert_vector_16_elements + ; CHECK: liveins: $x0, $x1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<16 x s32>) = COPY $x0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<16 x s32>) = COPY $x1 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<8 x s32>), [[UV1:%[0-9]+]]:_(<8 x s32>) = G_UNMERGE_VALUES [[COPY]](<16 x s32>) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(<8 x s32>), [[UV3:%[0-9]+]]:_(<8 x s32>) = G_UNMERGE_VALUES [[COPY1]](<16 x s32>) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<16 x s32>) = G_CONCAT_VECTORS [[UV]](<8 x s32>), [[UV2]](<8 x s32>) + ; CHECK-NEXT: PseudoRET implicit $lr, implicit [[CONCAT_VECTORS]](<16 x s32>) + %1:_(<16 x s32>) = COPY $x0 + %2:_(<16 x s32>) = COPY $x1 + %3:_(<16 x s32>) = G_SHUFFLE_VECTOR %1:_(<16 x s32>), %2:_(<16 x s32>), shufflemask(0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 20, 21, 22, 23) + PseudoRET implicit $lr, implicit %3 +... + +--- +name: insert_vector_8_elements +legalized: false +body: | + bb.1.entry: + liveins: $wl0, $wl1 + ; CHECK-LABEL: name: insert_vector_8_elements + ; CHECK: liveins: $wl0, $wl1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<8 x s32>) = COPY $wl0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<8 x s32>) = COPY $wl1 + ; CHECK-NEXT: [[AIE_UNPAD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_AIE_UNPAD_VECTOR [[COPY]](<8 x s32>) + ; CHECK-NEXT: [[AIE_UNPAD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_AIE_UNPAD_VECTOR [[COPY1]](<8 x s32>) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s32>) = G_CONCAT_VECTORS [[AIE_UNPAD_VECTOR]](<4 x s32>), [[AIE_UNPAD_VECTOR1]](<4 x s32>) + ; CHECK-NEXT: PseudoRET implicit $lr, implicit [[CONCAT_VECTORS]](<8 x s32>) + %1:_(<8 x s32>) = COPY $wl0 + %2:_(<8 x s32>) = COPY $wl1 + %3:_(<8 x s32>) = G_SHUFFLE_VECTOR %1:_(<8 x s32>), %2:_(<8 x s32>), shufflemask(0, 1, 2, 3, 8, 9, 10, 11) + PseudoRET implicit $lr, implicit %3 +... + +--- +name: insert_vector_128_elements +legalized: false +body: | + bb.1.entry: + liveins: $y2, $y3 + ; CHECK-LABEL: name: insert_vector_128_elements + ; CHECK: liveins: $y2, $y3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<128 x s8>) = COPY $y2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<128 x s8>) = COPY $y3 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<64 x s8>), [[UV1:%[0-9]+]]:_(<64 x s8>) = G_UNMERGE_VALUES [[COPY]](<128 x s8>) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(<64 x s8>), [[UV3:%[0-9]+]]:_(<64 x s8>) = G_UNMERGE_VALUES [[COPY1]](<128 x s8>) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<128 x s8>) = G_CONCAT_VECTORS [[UV]](<64 x s8>), [[UV2]](<64 x s8>) + ; CHECK-NEXT: PseudoRET implicit $lr, implicit [[CONCAT_VECTORS]](<128 x s8>) + %1:_(<128 x s8>) = COPY $y2 + %2:_(<128 x s8>) = COPY $y3 + %3:_(<128 x s8>) = G_SHUFFLE_VECTOR %1:_(<128 x s8>), %2:_(<128 x s8>), shufflemask(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191) + PseudoRET implicit $lr, implicit %3 +... + +--- +name: insert_vector_16_elements_reverse +legalized: false +body: | + bb.1.entry: + liveins: $x0, $x1 + ; CHECK-LABEL: name: insert_vector_16_elements_reverse + ; CHECK: liveins: $x0, $x1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<16 x s32>) = COPY $x0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<16 x s32>) = COPY $x1 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<8 x s32>), [[UV1:%[0-9]+]]:_(<8 x s32>) = G_UNMERGE_VALUES [[COPY]](<16 x s32>) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(<8 x s32>), [[UV3:%[0-9]+]]:_(<8 x s32>) = G_UNMERGE_VALUES [[COPY1]](<16 x s32>) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<16 x s32>) = G_CONCAT_VECTORS [[UV2]](<8 x s32>), [[UV]](<8 x s32>) + ; CHECK-NEXT: PseudoRET implicit $lr, implicit [[CONCAT_VECTORS]](<16 x s32>) + %1:_(<16 x s32>) = COPY $x0 + %2:_(<16 x s32>) = COPY $x1 + %3:_(<16 x s32>) = G_SHUFFLE_VECTOR %1:_(<16 x s32>), %2:_(<16 x s32>), shufflemask(16, 17, 18, 19, 20, 21, 22, 23, 0, 1, 2, 3, 4, 5, 6, 7) + PseudoRET implicit $lr, implicit %3 +... + +--- +name: insert_vector_8_elements_reverse +legalized: false +body: | + bb.1.entry: + liveins: $wl0, $wl1 + ; CHECK-LABEL: name: insert_vector_8_elements_reverse + ; CHECK: liveins: $wl0, $wl1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<8 x s32>) = COPY $wl0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<8 x s32>) = COPY $wl1 + ; CHECK-NEXT: [[AIE_UNPAD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_AIE_UNPAD_VECTOR [[COPY]](<8 x s32>) + ; CHECK-NEXT: [[AIE_UNPAD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_AIE_UNPAD_VECTOR [[COPY1]](<8 x s32>) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s32>) = G_CONCAT_VECTORS [[AIE_UNPAD_VECTOR1]](<4 x s32>), [[AIE_UNPAD_VECTOR]](<4 x s32>) + ; CHECK-NEXT: PseudoRET implicit $lr, implicit [[CONCAT_VECTORS]](<8 x s32>) + %1:_(<8 x s32>) = COPY $wl0 + %2:_(<8 x s32>) = COPY $wl1 + %3:_(<8 x s32>) = G_SHUFFLE_VECTOR %1:_(<8 x s32>), %2:_(<8 x s32>), shufflemask(8, 9, 10, 11, 0, 1, 2, 3) + PseudoRET implicit $lr, implicit %3 +... + +--- +name: insert_vector_128_elements_reverse +legalized: false +body: | + bb.1.entry: + liveins: $y2, $y3 + ; CHECK-LABEL: name: insert_vector_128_elements_reverse + ; CHECK: liveins: $y2, $y3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<128 x s8>) = COPY $y2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<128 x s8>) = COPY $y3 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<64 x s8>), [[UV1:%[0-9]+]]:_(<64 x s8>) = G_UNMERGE_VALUES [[COPY]](<128 x s8>) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(<64 x s8>), [[UV3:%[0-9]+]]:_(<64 x s8>) = G_UNMERGE_VALUES [[COPY1]](<128 x s8>) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<128 x s8>) = G_CONCAT_VECTORS [[UV2]](<64 x s8>), [[UV]](<64 x s8>) + ; CHECK-NEXT: PseudoRET implicit $lr, implicit [[CONCAT_VECTORS]](<128 x s8>) + %1:_(<128 x s8>) = COPY $y2 + %2:_(<128 x s8>) = COPY $y3 + %3:_(<128 x s8>) = G_SHUFFLE_VECTOR %1:_(<128 x s8>), %2:_(<128 x s8>), shufflemask(128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63) + PseudoRET implicit $lr, implicit %3 +... + +--- +name: concat_vector_reverse_32_512 +legalized: false +body: | + bb.1.entry: + liveins: $wl2, $wl4 + ; CHECK-LABEL: name: concat_vector_reverse_32_512 + ; CHECK: liveins: $wl2, $wl4 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<8 x s32>) = COPY $wl2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<8 x s32>) = COPY $wl4 + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<16 x s32>) = G_CONCAT_VECTORS [[COPY1]](<8 x s32>), [[COPY]](<8 x s32>) + ; CHECK-NEXT: $x0 = COPY [[CONCAT_VECTORS]](<16 x s32>) + ; CHECK-NEXT: PseudoRET implicit $lr, implicit $x0 + %1:_(<8 x s32>) = COPY $wl2 + %2:_(<8 x s32>) = COPY $wl4 + %0:_(<16 x s32>) = G_SHUFFLE_VECTOR %1:_(<8 x s32>), %2:_, shufflemask(8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7) + $x0 = COPY %0:_(<16 x s32>) + PseudoRET implicit $lr, implicit $x0 +... + +--- +name: concat_vector_reverse_32_512_undef_start_first +legalized: false +body: | + bb.1.entry: + liveins: $wl2, $wl4 + ; CHECK-LABEL: name: concat_vector_reverse_32_512_undef_start_first + ; CHECK: liveins: $wl2, $wl4 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<8 x s32>) = COPY $wl2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<8 x s32>) = COPY $wl4 + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<16 x s32>) = G_CONCAT_VECTORS [[COPY1]](<8 x s32>), [[COPY]](<8 x s32>) + ; CHECK-NEXT: $x0 = COPY [[CONCAT_VECTORS]](<16 x s32>) + ; CHECK-NEXT: PseudoRET implicit $lr, implicit $x0 + %1:_(<8 x s32>) = COPY $wl2 + %2:_(<8 x s32>) = COPY $wl4 + %0:_(<16 x s32>) = G_SHUFFLE_VECTOR %1:_(<8 x s32>), %2:_, shufflemask(-1, -1, -1, -1, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7) + $x0 = COPY %0:_(<16 x s32>) + PseudoRET implicit $lr, implicit $x0 +... + +--- +name: concat_vector_reverse_32_512_start_end +legalized: false +body: | + bb.1.entry: + liveins: $wl2, $wl4 + ; CHECK-LABEL: name: concat_vector_reverse_32_512_start_end + ; CHECK: liveins: $wl2, $wl4 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<8 x s32>) = COPY $wl2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<8 x s32>) = COPY $wl4 + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<16 x s32>) = G_CONCAT_VECTORS [[COPY1]](<8 x s32>), [[COPY]](<8 x s32>) + ; CHECK-NEXT: $x0 = COPY [[CONCAT_VECTORS]](<16 x s32>) + ; CHECK-NEXT: PseudoRET implicit $lr, implicit $x0 + %1:_(<8 x s32>) = COPY $wl2 + %2:_(<8 x s32>) = COPY $wl4 + %0:_(<16 x s32>) = G_SHUFFLE_VECTOR %1:_(<8 x s32>), %2:_, shufflemask(8, 9, 10, 11, 12, -1, -1, -1, 0, 1, 2, 3, 4, 5, 6, 7) + $x0 = COPY %0:_(<16 x s32>) + PseudoRET implicit $lr, implicit $x0 +... + +--- +name: concat_vector_reverse_32_512_end_start +legalized: false +body: | + bb.1.entry: + liveins: $wl2, $wl4 + ; CHECK-LABEL: name: concat_vector_reverse_32_512_end_start + ; CHECK: liveins: $wl2, $wl4 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<8 x s32>) = COPY $wl2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<8 x s32>) = COPY $wl4 + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<16 x s32>) = G_CONCAT_VECTORS [[COPY1]](<8 x s32>), [[COPY]](<8 x s32>) + ; CHECK-NEXT: $x0 = COPY [[CONCAT_VECTORS]](<16 x s32>) + ; CHECK-NEXT: PseudoRET implicit $lr, implicit $x0 + %1:_(<8 x s32>) = COPY $wl2 + %2:_(<8 x s32>) = COPY $wl4 + %0:_(<16 x s32>) = G_SHUFFLE_VECTOR %1:_(<8 x s32>), %2:_, shufflemask(8, 9, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, 4, 5, 6, 7) + $x0 = COPY %0:_(<16 x s32>) + PseudoRET implicit $lr, implicit $x0 +... + +--- +name: concat_vector_reverse_32_512_end_end +legalized: false +body: | + bb.1.entry: + liveins: $wl2, $wl4 + ; CHECK-LABEL: name: concat_vector_reverse_32_512_end_end + ; CHECK: liveins: $wl2, $wl4 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<8 x s32>) = COPY $wl2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<8 x s32>) = COPY $wl4 + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<16 x s32>) = G_CONCAT_VECTORS [[COPY1]](<8 x s32>), [[COPY]](<8 x s32>) + ; CHECK-NEXT: $x0 = COPY [[CONCAT_VECTORS]](<16 x s32>) + ; CHECK-NEXT: PseudoRET implicit $lr, implicit $x0 + %1:_(<8 x s32>) = COPY $wl2 + %2:_(<8 x s32>) = COPY $wl4 + %0:_(<16 x s32>) = G_SHUFFLE_VECTOR %1:_(<8 x s32>), %2:_, shufflemask(8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, -1, -1, -1, -1) + $x0 = COPY %0:_(<16 x s32>) + PseudoRET implicit $lr, implicit $x0 +... + +--- +name: concat_vector_reverse_32_512_first_block +legalized: false +body: | + bb.1.entry: + liveins: $wl2, $wl4 + ; CHECK-LABEL: name: concat_vector_reverse_32_512_first_block + ; CHECK: liveins: $wl2, $wl4 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<8 x s32>) = COPY $wl2 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<8 x s32>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<16 x s32>) = G_CONCAT_VECTORS [[DEF]](<8 x s32>), [[COPY]](<8 x s32>) + ; CHECK-NEXT: $x0 = COPY [[CONCAT_VECTORS]](<16 x s32>) + ; CHECK-NEXT: PseudoRET implicit $lr, implicit $x0 + %1:_(<8 x s32>) = COPY $wl2 + %2:_(<8 x s32>) = COPY $wl4 + %0:_(<16 x s32>) = G_SHUFFLE_VECTOR %1:_(<8 x s32>), %2:_, shufflemask(-1, -1, -1, -1, -1, -1, -1, -1, 0, 1, 2, 3, 4, 5, 6, 7) + $x0 = COPY %0:_(<16 x s32>) + PseudoRET implicit $lr, implicit $x0 +... + +--- +name: concat_vector_reverse_32_512_second_block +legalized: false +body: | + bb.1.entry: + liveins: $wl2, $wl4 + ; CHECK-LABEL: name: concat_vector_reverse_32_512_second_block + ; CHECK: liveins: $wl2, $wl4 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<8 x s32>) = COPY $wl4 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<8 x s32>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<16 x s32>) = G_CONCAT_VECTORS [[COPY]](<8 x s32>), [[DEF]](<8 x s32>) + ; CHECK-NEXT: $x0 = COPY [[CONCAT_VECTORS]](<16 x s32>) + ; CHECK-NEXT: PseudoRET implicit $lr, implicit $x0 + %1:_(<8 x s32>) = COPY $wl2 + %2:_(<8 x s32>) = COPY $wl4 + %0:_(<16 x s32>) = G_SHUFFLE_VECTOR %1:_(<8 x s32>), %2:_, shufflemask(8, 9, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1) + $x0 = COPY %0:_(<16 x s32>) + PseudoRET implicit $lr, implicit $x0 +... + +--- +name: concat_vector_reverse_32_512_random +legalized: false +body: | + bb.1.entry: + liveins: $wl2, $wl4 + ; CHECK-LABEL: name: concat_vector_reverse_32_512_random + ; CHECK: liveins: $wl2, $wl4 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<8 x s32>) = COPY $wl2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<8 x s32>) = COPY $wl4 + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<16 x s32>) = G_CONCAT_VECTORS [[COPY1]](<8 x s32>), [[COPY]](<8 x s32>) + ; CHECK-NEXT: $x0 = COPY [[CONCAT_VECTORS]](<16 x s32>) + ; CHECK-NEXT: PseudoRET implicit $lr, implicit $x0 + %1:_(<8 x s32>) = COPY $wl2 + %2:_(<8 x s32>) = COPY $wl4 + %0:_(<16 x s32>) = G_SHUFFLE_VECTOR %1:_(<8 x s32>), %2:_, shufflemask(8, 9, -1, 11, 12, 13, -1, 15, 0, 1, -1, 3, 4, 5, -1, 7) + $x0 = COPY %0:_(<16 x s32>) + PseudoRET implicit $lr, implicit $x0 +... + +--- +name: shuffle_vector_32_4x4 +legalized: false +body: | + bb.1.entry: + liveins: $x0, $x1 + ; CHECK-LABEL: name: shuffle_vector_32_4x4 + ; CHECK: liveins: $x0, $x1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<16 x s32>) = COPY $x0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<16 x s32>) = COPY $x1 + ; CHECK-NEXT: [[SHUF:%[0-9]+]]:_(<16 x s32>) = G_SHUFFLE_VECTOR [[COPY]](<16 x s32>), [[COPY1]], shufflemask(0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15) + ; CHECK-NEXT: $x2 = COPY [[SHUF]](<16 x s32>) + ; CHECK-NEXT: PseudoRET implicit $lr, implicit $x2 + %1:_(<16 x s32>) = COPY $x0 + %2:_(<16 x s32>) = COPY $x1 + %0:_(<16 x s32>) = G_SHUFFLE_VECTOR %1:_(<16 x s32>), %2:_, shufflemask(0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15) + $x2 = COPY %0:_(<16 x s32>) + PseudoRET implicit $lr, implicit $x2 +... + +--- +name: shuffle_vector_16_4x4 +legalized: false +body: | + bb.1.entry: + liveins: $x0, $x1 + ; CHECK-LABEL: name: shuffle_vector_16_4x4 + ; CHECK: liveins: $x0, $x1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<32 x s16>) = COPY $x0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<32 x s16>) = COPY $x1 + ; CHECK-NEXT: [[SHUF:%[0-9]+]]:_(<32 x s16>) = G_SHUFFLE_VECTOR [[COPY]](<32 x s16>), [[COPY1]], shufflemask(0, 8, 16, 24, 1, 9, 17, 25, 2, 10, 18, 26, 3, 11, 19, 27, 4, 12, 20, 28, 5, 13, 21, 29, 6, 14, 22, 30, 7, 15, 23, 31) + ; CHECK-NEXT: $x2 = COPY [[SHUF]](<32 x s16>) + ; CHECK-NEXT: PseudoRET implicit $lr, implicit $x2 + %1:_(<32 x s16>) = COPY $x0 + %2:_(<32 x s16>) = COPY $x1 + %0:_(<32 x s16>) = G_SHUFFLE_VECTOR %1:_(<32 x s16>), %2:_, shufflemask(0, 8, 16, 24, 1, 9, 17, 25, 2, 10, 18, 26, 3, 11, 19, 27, 4, 12, 20, 28, 5, 13, 21, 29, 6, 14, 22, 30, 7, 15, 23, 31) + $x2 = COPY %0:_(<32 x s16>) + PseudoRET implicit $lr, implicit $x2 +... + +--- +name: shuffle_vector_8_512 +legalized: false +body: | + bb.1.entry: + liveins: $wl0, $wl1 + ; CHECK-LABEL: name: shuffle_vector_8_512 + ; CHECK: liveins: $wl0, $wl1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<32 x s8>) = COPY $wl0 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<32 x s8>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 35 + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<64 x s8>) = G_CONCAT_VECTORS [[COPY]](<32 x s8>), [[DEF]](<32 x s8>) + ; CHECK-NEXT: [[AIE_VSHUFFLE:%[0-9]+]]:_(<64 x s8>) = G_AIE_VSHUFFLE [[CONCAT_VECTORS]], [[CONCAT_VECTORS]], [[C]](s32) + ; CHECK-NEXT: $x2 = COPY [[AIE_VSHUFFLE]](<64 x s8>) + ; CHECK-NEXT: PseudoRET implicit $lr, implicit $x2 + %1:_(<32 x s8>) = COPY $wl0 + %2:_(<32 x s8>) = G_IMPLICIT_DEF + %0:_(<64 x s8>) = G_SHUFFLE_VECTOR %1:_(<32 x s8>), %2:_, shufflemask(0, 16, 32, 48, 1, 17, 33, 49, 2, 18, 34, 50, 3, 19, 35, 51, 4, 20, 36, 52, 5, 21, 37, 53, 6, 22, 38, 54, 7, 23, 39, 55, 8, 24, 40, 56, 9, 25, 41, 57, 10, 26, 42, 58, 11, 27, 43, 59, 12, 28, 44, 60, 13, 29, 45, 61, 14, 30, 46, 62, 15, 31, 47, 63) + $x2 = COPY %0:_(<64 x s8>) + PseudoRET implicit $lr, implicit $x2 +... + + +--- +name: shuffle_vector_8_1024 +legalized: false +body: | + bb.1.entry: + liveins: $x0, $x1 + ; CHECK-LABEL: name: shuffle_vector_8_1024 + ; CHECK: liveins: $x0, $x1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<64 x s8>) = COPY $x0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<64 x s8>) = COPY $x1 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 35 + ; CHECK-NEXT: [[AIE_VSHUFFLE:%[0-9]+]]:_(<64 x s8>) = G_AIE_VSHUFFLE [[COPY]], [[COPY1]], [[C]](s32) + ; CHECK-NEXT: $x2 = COPY [[AIE_VSHUFFLE]](<64 x s8>) + ; CHECK-NEXT: PseudoRET implicit $lr, implicit $x2 + %1:_(<64 x s8>) = COPY $x0 + %2:_(<64 x s8>) = COPY $x1 + %0:_(<64 x s8>) = G_SHUFFLE_VECTOR %1:_(<64 x s8>), %2:_, shufflemask(0, 16, 32, 48, 1, 17, 33, 49, 2, 18, 34, 50, 3, 19, 35, 51, 4, 20, 36, 52, 5, 21, 37, 53, 6, 22, 38, 54, 7, 23, 39, 55, 8, 24, 40, 56, 9, 25, 41, 57, 10, 26, 42, 58, 11, 27, 43, 59, 12, 28, 44, 60, 13, 29, 45, 61, 14, 30, 46, 62, 15, 31, 47, 63) + $x2 = COPY %0:_(<64 x s8>) + PseudoRET implicit $lr, implicit $x2 +... + +--- +name: shuffle_vector_1024_4x8 +legalized: false +body: | + bb.1.entry: + liveins: $x0, $x1 + ; CHECK-LABEL: name: shuffle_vector_1024_4x8 + ; CHECK: liveins: $x0, $x1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<32 x s16>) = COPY $x0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<32 x s16>) = COPY $x1 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 29 + ; CHECK-NEXT: [[AIE_VSHUFFLE:%[0-9]+]]:_(<32 x s16>) = G_AIE_VSHUFFLE [[COPY]], [[COPY1]], [[C]](s32) + ; CHECK-NEXT: $x2 = COPY [[AIE_VSHUFFLE]](<32 x s16>) + ; CHECK-NEXT: PseudoRET implicit $lr, implicit $x2 + %1:_(<32 x s16>) = COPY $x0 + %2:_(<32 x s16>) = COPY $x1 + %0:_(<32 x s16>) = G_SHUFFLE_VECTOR %1:_(<32 x s16>), %2:_, shufflemask(0, 1, 8, 9, 16, 17, 24, 25, 2, 3, 10, 11, 18, 19, 26, 27, 4, 5, 12, 13, 20, 21, 28, 29, 6, 7, 14, 15, 22, 23, 30, 31) + $x2 = COPY %0:_(<32 x s16>) + PseudoRET implicit $lr, implicit $x2 +... + +--- +name: shuffle_vector_512_4x8 +legalized: false +body: | + bb.1.entry: + liveins: $wl0, $wl1 + ; CHECK-LABEL: name: shuffle_vector_512_4x8 + ; CHECK: liveins: $wl0, $wl1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<16 x s16>) = COPY $wl0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<16 x s16>) = COPY $wl1 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 29 + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<32 x s16>) = G_CONCAT_VECTORS [[COPY]](<16 x s16>), [[COPY1]](<16 x s16>) + ; CHECK-NEXT: [[AIE_VSHUFFLE:%[0-9]+]]:_(<32 x s16>) = G_AIE_VSHUFFLE [[CONCAT_VECTORS]], [[CONCAT_VECTORS]], [[C]](s32) + ; CHECK-NEXT: $x2 = COPY [[AIE_VSHUFFLE]](<32 x s16>) + ; CHECK-NEXT: PseudoRET implicit $lr, implicit $x2 + %1:_(<16 x s16>) = COPY $wl0 + %2:_(<16 x s16>) = COPY $wl1 + %0:_(<32 x s16>) = G_SHUFFLE_VECTOR %1:_(<16 x s16>), %2:_, shufflemask(0, 1, 8, 9, 16, 17, 24, 25, 2, 3, 10, 11, 18, 19, 26, 27, 4, 5, 12, 13, 20, 21, 28, 29, 6, 7, 14, 15, 22, 23, 30, 31) + $x2 = COPY %0:_(<32 x s16>) + PseudoRET implicit $lr, implicit $x2 +... diff --git a/llvm/test/CodeGen/AIE/aie2/intrinsics-shufflevec.ll b/llvm/test/CodeGen/AIE/aie2/intrinsics-shufflevec.ll new file mode 100644 index 000000000000..0284bbbe9d7f --- /dev/null +++ b/llvm/test/CodeGen/AIE/aie2/intrinsics-shufflevec.ll @@ -0,0 +1,138 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; +; This file is licensed under the Apache License v2.0 with LLVM Exceptions. +; See https://llvm.org/LICENSE.txt for license information. +; SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +; +; (c) Copyright 2023-2024 Advanced Micro Devices, Inc. or its affiliates +; RUN: llc -O2 -mtriple=aie2 -verify-machineinstrs --issue-limit=1 %s -o - | FileCheck %s + +define <8 x i32> @test_extract_vector(<16 x i32> noundef %a, i32 noundef %idx) { +; CHECK-LABEL: test_extract_vector: +; CHECK: .p2align 4 +; CHECK-NEXT: // %bb.0: // %entry +; CHECK-NEXT: nopb ; nopa ; nops ; jz r0, #.LBB0_2; nopv +; CHECK-NEXT: nopa ; nopx // Delay Slot 5 +; CHECK-NEXT: nop // Delay Slot 4 +; CHECK-NEXT: nop // Delay Slot 3 +; CHECK-NEXT: vmov x0, x2 // Delay Slot 2 +; CHECK-NEXT: nop // Delay Slot 1 +; CHECK-NEXT: // %bb.1: // %if.end +; CHECK-NEXT: nopb ; nopa ; nops ; nopx ; vmov wl0, wh0; nopv +; CHECK-NEXT: .p2align 4 +; CHECK-NEXT: .LBB0_2: // %return +; CHECK-NEXT: nopa ; ret lr +; CHECK-NEXT: nop // Delay Slot 5 +; CHECK-NEXT: nop // Delay Slot 4 +; CHECK-NEXT: nop // Delay Slot 3 +; CHECK-NEXT: nop // Delay Slot 2 +; CHECK-NEXT: nop // Delay Slot 1 +entry: + %cmp = icmp eq i32 %idx, 0 + br i1 %cmp, label %if.then, label %if.end + +if.then: + %shuffle = shufflevector <16 x i32> %a, <16 x i32> poison, <8 x i32> + br label %return + +if.end: + %shuffle1 = shufflevector <16 x i32> %a, <16 x i32> poison, <8 x i32> + br label %return + +return: + %retval.0 = phi <8 x i32> [ %shuffle, %if.then ], [ %shuffle1, %if.end ] + ret <8 x i32> %retval.0 +} + +define <16 x i32> @test_insert_vector(<16 x i32> noundef %a, i32 noundef %idx, <8 x i32> noundef %b) { +; CHECK-LABEL: test_insert_vector: +; CHECK: .p2align 4 +; CHECK-NEXT: // %bb.0: // %entry +; CHECK-NEXT: nopb ; nopa ; nops ; jz r0, #.LBB1_2; nopv +; CHECK-NEXT: nopa ; nopx // Delay Slot 5 +; CHECK-NEXT: nop // Delay Slot 4 +; CHECK-NEXT: nop // Delay Slot 3 +; CHECK-NEXT: nop // Delay Slot 2 +; CHECK-NEXT: vmov wl0, wl4 // Delay Slot 1 +; CHECK-NEXT: // %bb.1: // %if.end +; CHECK-NEXT: nopb ; nopa ; nops ; ret lr ; nopm ; nopv +; CHECK-NEXT: nopx // Delay Slot 5 +; CHECK-NEXT: vmov wh2, wl0 // Delay Slot 4 +; CHECK-NEXT: nop // Delay Slot 3 +; CHECK-NEXT: vmov x0, x2 // Delay Slot 2 +; CHECK-NEXT: nop // Delay Slot 1 +; CHECK-NEXT: .p2align 4 +; CHECK-NEXT: .LBB1_2: // %if.then +; CHECK-NEXT: ret lr +; CHECK-NEXT: nop // Delay Slot 5 +; CHECK-NEXT: nop // Delay Slot 4 +; CHECK-NEXT: nop // Delay Slot 3 +; CHECK-NEXT: vmov wh0, wl2 // Delay Slot 2 +; CHECK-NEXT: nop // Delay Slot 1 +entry: + %shuffle = shufflevector <8 x i32> %b, <8 x i32> undef, <16 x i32> + %cmp = icmp eq i32 %idx, 0 + br i1 %cmp, label %if.then, label %if.end + +if.then: + %shuffle1 = shufflevector <16 x i32> %shuffle, <16 x i32> %a, <16 x i32> + br label %cleanup + +if.end: ; + %shuffle2 = shufflevector <16 x i32> %a, <16 x i32> %shuffle, <16 x i32> + br label %cleanup + +cleanup: + %retval.0 = phi <16 x i32> [ %shuffle1, %if.then ], [ %shuffle2, %if.end ] + ret <16 x i32> %retval.0 +} + +define <16 x i32> @test_concat_vector(<8 x i32> noundef %a, <8 x i32> noundef %b) { +; CHECK-LABEL: test_concat_vector: +; CHECK: .p2align 4 +; CHECK-NEXT: // %bb.0: // %entry +; CHECK-NEXT: nopb ; nopa ; nops ; ret lr ; nopm ; nopv +; CHECK-NEXT: nopx // Delay Slot 5 +; CHECK-NEXT: nop // Delay Slot 4 +; CHECK-NEXT: vmov wl0, wl2 // Delay Slot 3 +; CHECK-NEXT: vmov wh0, wl4 // Delay Slot 2 +; CHECK-NEXT: nop // Delay Slot 1 +entry: + %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <16 x i32> + ret <16 x i32> %shuffle +} + +define <16 x i32> @test_set_vector(i32 noundef %idx, <8 x i32> noundef %a) { +; CHECK-LABEL: test_set_vector: +; CHECK: .p2align 4 +; CHECK-NEXT: // %bb.0: // %entry +; CHECK-NEXT: mov r1, r16 +; CHECK-NEXT: eqz r0, r0 +; CHECK-NEXT: ret lr +; CHECK-NEXT: vmov wh0, wl2 // Delay Slot 5 +; CHECK-NEXT: vmov wl0, wl2 // Delay Slot 4 +; CHECK-NEXT: add r16, r0, #-1 // Delay Slot 3 +; CHECK-NEXT: vsel.32 x0, x0, x0, r16 // Delay Slot 2 +; CHECK-NEXT: mov r16, r1 // Delay Slot 1 +entry: + %cmp = icmp eq i32 %idx, 0 + %shuffle = shufflevector <8 x i32> %a, <8 x i32> undef, <16 x i32> + %shuffle1 = shufflevector <8 x i32> %a, <8 x i32> undef, <16 x i32> + %retval.0 = select i1 %cmp, <16 x i32> %shuffle, <16 x i32> %shuffle1 + ret <16 x i32> %retval.0 +} + +define i32 @test_extract_elem(<8 x i32> noundef %a, i32 noundef %idx) { +; CHECK-LABEL: test_extract_elem: +; CHECK: .p2align 4 +; CHECK-NEXT: // %bb.0: // %entry +; CHECK-NEXT: nopa ; nopb ; ret lr ; nopm ; nops +; CHECK-NEXT: mov r2, r16 // Delay Slot 5 +; CHECK-NEXT: mov r16, r1 // Delay Slot 4 +; CHECK-NEXT: vextract.s32 r0, x0, r16 // Delay Slot 3 +; CHECK-NEXT: nop // Delay Slot 2 +; CHECK-NEXT: mov r16, r2 // Delay Slot 1 +entry: + %vecext = extractelement <8 x i32> %a, i32 %idx + ret i32 %vecext +}