From 650d8a92e80c707c4db907d7bb21e556d5c34696 Mon Sep 17 00:00:00 2001 From: Sai Abhinay Anubola Date: Fri, 8 Nov 2024 15:01:46 +0530 Subject: [PATCH] Support for allowing direct VEXTRACT to 20-bit registers --- llvm/lib/Target/AIE/AIE2InstrInfo.cpp | 5 +- llvm/lib/Target/AIE/AIECombinerHelper.cpp | 44 ++- .../prelegalizercombiner-s20-narrowing.mir | 291 ++++++++++++++++++ .../verifier/verify-szext-extract-vec-elt.mir | 4 +- 4 files changed, 340 insertions(+), 4 deletions(-) diff --git a/llvm/lib/Target/AIE/AIE2InstrInfo.cpp b/llvm/lib/Target/AIE/AIE2InstrInfo.cpp index b1014a8d94a6..14b2e1355604 100644 --- a/llvm/lib/Target/AIE/AIE2InstrInfo.cpp +++ b/llvm/lib/Target/AIE/AIE2InstrInfo.cpp @@ -152,8 +152,9 @@ bool AIE2InstrInfo::verifyGenericInstruction(const MachineInstr &MI, switch (MI.getOpcode()) { case AIE2::G_AIE_ZEXT_EXTRACT_VECTOR_ELT: case AIE2::G_AIE_SEXT_EXTRACT_VECTOR_ELT: - ErrInfo = "Expected 32bit scalar destination"; - return MRI.getType(MI.getOperand(0).getReg()) == LLT::scalar(32); + ErrInfo = "Expected 32bit or 20bit scalar destination"; + return (MRI.getType(MI.getOperand(0).getReg()) == LLT::scalar(32) || + MRI.getType(MI.getOperand(0).getReg()) == LLT::scalar(20)); case AIE2::G_AIE_PAD_VECTOR_UNDEF: return verifySameLaneTypes(MI, ErrInfo) && isLegalTypeToUnpad(MRI.getType(MI.getOperand(0).getReg()), diff --git a/llvm/lib/Target/AIE/AIECombinerHelper.cpp b/llvm/lib/Target/AIE/AIECombinerHelper.cpp index 1590f9e045a0..7403334c3967 100644 --- a/llvm/lib/Target/AIE/AIECombinerHelper.cpp +++ b/llvm/lib/Target/AIE/AIECombinerHelper.cpp @@ -567,6 +567,19 @@ void llvm::applyGlobalValOffset(MachineInstr &MI, MachineRegisterInfo &MRI, B.buildConstant(LLT::scalar(20), -static_cast(Offset))); } +/// Check if the Intrinsic can produce S20 +static bool isS20Intrinsic(const MachineInstr &MI, + const MachineRegisterInfo &MRI) { + const unsigned IntrinsicID = cast(MI).getIntrinsicID(); + if (IntrinsicID == Intrinsic::aie2_vextract_elem8_I512 || + IntrinsicID == Intrinsic::aie2_vextract_elem16_I512 || + IntrinsicID == Intrinsic::aie2_vextract_elem32_I512) { + // Check if the sign value is constant + return getIConstantVRegSExtVal(MI.getOperand(4).getReg(), MRI).has_value(); + } + return false; +} + /// Checks whether the instruction produces or can be adapted to produce /// a single S20 output. static bool canProduceS20(const MachineRegisterInfo &MRI, @@ -581,6 +594,8 @@ static bool canProduceS20(const MachineRegisterInfo &MRI, case TargetOpcode::G_CONSTANT: case TargetOpcode::G_IMPLICIT_DEF: return true; + case TargetOpcode::G_INTRINSIC: + return isS20Intrinsic(MI, MRI); default: return false; } @@ -901,6 +916,31 @@ bool modifyToS20(InstrNode Start, MachineRegisterInfo &MRI, MachineIRBuilder &B, Helper.tryCombineCopy(*StartNodeMI); return true; } + case TargetOpcode::G_INTRINSIC: { + if (isS20Intrinsic(*StartNodeMI, MRI)) { + Register ExtractDstReg = StartNodeMI->getOperand(0).getReg(); + // Note: Operand 1 is the ID of the intrinsic + const Register SrcReg0 = StartNodeMI->getOperand(2).getReg(); + const Register SrcReg1 = StartNodeMI->getOperand(3).getReg(); + const Register SignReg = StartNodeMI->getOperand(4).getReg(); + + auto SignVal = getIConstantVRegSExtVal(SignReg, MRI); + assert(SignVal.has_value() && "Expected SignVal to be constant"); + // Erase the original instruction and set the type for the destination + // register + Helper.eraseInst(*StartNodeMI); + MRI.setType(ExtractDstReg, S20); + const unsigned Opcode = SignVal.value() + ? AIE2::G_AIE_SEXT_EXTRACT_VECTOR_ELT + : AIE2::G_AIE_ZEXT_EXTRACT_VECTOR_ELT; + StartNodeMI = + B.buildInstr(Opcode, {ExtractDstReg}, {SrcReg0, SrcReg1}).getInstr(); + } else { + LLVM_DEBUG(dbgs() << "Node :" << *StartNodeMI); + llvm_unreachable("Unexpected OpCode, while modifying IR"); + } + break; + } default: { LLVM_DEBUG(dbgs() << "Node :" << *StartNodeMI); llvm_unreachable("Unexpected OpCode, while modifying IR"); @@ -910,7 +950,9 @@ bool modifyToS20(InstrNode Start, MachineRegisterInfo &MRI, MachineIRBuilder &B, switch (StartNodeMI->getOpcode()) { case TargetOpcode::COPY: case TargetOpcode::G_LOAD: - case TargetOpcode::G_PHI: { + case TargetOpcode::G_PHI: + case AIE2::G_AIE_ZEXT_EXTRACT_VECTOR_ELT: + case AIE2::G_AIE_SEXT_EXTRACT_VECTOR_ELT: { const auto UseInstIter = MRI.use_nodbg_instructions(StartNodeMI->getOperand(0).getReg()); std::vector UseInstr; diff --git a/llvm/test/CodeGen/AIE/aie2/GlobalISel/prelegalizercombiner-s20-narrowing.mir b/llvm/test/CodeGen/AIE/aie2/GlobalISel/prelegalizercombiner-s20-narrowing.mir index 3bfdf5b1a88b..4791eba5b762 100644 --- a/llvm/test/CodeGen/AIE/aie2/GlobalISel/prelegalizercombiner-s20-narrowing.mir +++ b/llvm/test/CodeGen/AIE/aie2/GlobalISel/prelegalizercombiner-s20-narrowing.mir @@ -781,3 +781,294 @@ body: | $r0 = COPY %11 G_BR %bb.2 ... + +--- +name: valid_vextract8_add2d +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x0 + + ; CHECK-LABEL: name: valid_vextract8_add2d + ; CHECK: liveins: $x0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 7 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<64 x s8>) = COPY $x0 + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(p0) = G_CONSTANT i20 0 + ; CHECK-NEXT: [[AIE_ZEXT_EXTRACT_VECTOR_ELT:%[0-9]+]]:_(s20) = G_AIE_ZEXT_EXTRACT_VECTOR_ELT [[COPY]](<64 x s8>), [[C]](s32) + ; CHECK-NEXT: [[INT:%[0-9]+]]:_(p0), [[INT1:%[0-9]+]]:_(s20) = G_INTRINSIC intrinsic(@llvm.aie2.add.2d), [[C1]](p0), [[AIE_ZEXT_EXTRACT_VECTOR_ELT]](s20), [[AIE_ZEXT_EXTRACT_VECTOR_ELT]](s20), [[AIE_ZEXT_EXTRACT_VECTOR_ELT]](s20), [[AIE_ZEXT_EXTRACT_VECTOR_ELT]](s20) + ; CHECK-NEXT: $p0 = COPY [[INT]](p0) + %0:_(s32) = G_CONSTANT i32 7 + %1:_(s32) = G_CONSTANT i32 0 + %2:_(<64 x s8>) = COPY $x0 + %3:_(s32) = G_INTRINSIC intrinsic(@llvm.aie2.vextract.elem8.I512), %2(<64 x s8>), %0(s32), %1(s32) + %4:_(s20) = G_TRUNC %3(s32) + %5:_(p0) = G_CONSTANT i20 0 + %6:_(p0), %7:_(s20) = G_INTRINSIC intrinsic(@llvm.aie2.add.2d), %5:_(p0), %4:_(s20), %4:_(s20), %4:_(s20), %4:_(s20) + $p0 = COPY %6 +... + +--- +name: valid_vextract16_add2d +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x0 + + ; CHECK-LABEL: name: valid_vextract16_add2d + ; CHECK: liveins: $x0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 7 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<32 x s16>) = COPY $x0 + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(p0) = G_CONSTANT i20 0 + ; CHECK-NEXT: [[AIE_ZEXT_EXTRACT_VECTOR_ELT:%[0-9]+]]:_(s20) = G_AIE_ZEXT_EXTRACT_VECTOR_ELT [[COPY]](<32 x s16>), [[C]](s32) + ; CHECK-NEXT: [[INT:%[0-9]+]]:_(p0), [[INT1:%[0-9]+]]:_(s20) = G_INTRINSIC intrinsic(@llvm.aie2.add.2d), [[C1]](p0), [[AIE_ZEXT_EXTRACT_VECTOR_ELT]](s20), [[AIE_ZEXT_EXTRACT_VECTOR_ELT]](s20), [[AIE_ZEXT_EXTRACT_VECTOR_ELT]](s20), [[AIE_ZEXT_EXTRACT_VECTOR_ELT]](s20) + ; CHECK-NEXT: $p0 = COPY [[INT]](p0) + %0:_(s32) = G_CONSTANT i32 7 + %1:_(s32) = G_CONSTANT i32 0 + %2:_(<32 x s16>) = COPY $x0 + %3:_(s32) = G_INTRINSIC intrinsic(@llvm.aie2.vextract.elem16.I512), %2(<32 x s16>), %0(s32), %1(s32) + %4:_(s20) = G_TRUNC %3(s32) + %5:_(p0) = G_CONSTANT i20 0 + %6:_(p0), %7:_(s20) = G_INTRINSIC intrinsic(@llvm.aie2.add.2d), %5:_(p0), %4:_(s20), %4:_(s20), %4:_(s20), %4:_(s20) + $p0 = COPY %6 +... + +--- +name: valid_vextract32_add2d +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x0 + + ; CHECK-LABEL: name: valid_vextract32_add2d + ; CHECK: liveins: $x0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 7 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<16 x s32>) = COPY $x0 + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(p0) = G_CONSTANT i20 0 + ; CHECK-NEXT: [[AIE_ZEXT_EXTRACT_VECTOR_ELT:%[0-9]+]]:_(s20) = G_AIE_ZEXT_EXTRACT_VECTOR_ELT [[COPY]](<16 x s32>), [[C]](s32) + ; CHECK-NEXT: [[INT:%[0-9]+]]:_(p0), [[INT1:%[0-9]+]]:_(s20) = G_INTRINSIC intrinsic(@llvm.aie2.add.2d), [[C1]](p0), [[AIE_ZEXT_EXTRACT_VECTOR_ELT]](s20), [[AIE_ZEXT_EXTRACT_VECTOR_ELT]](s20), [[AIE_ZEXT_EXTRACT_VECTOR_ELT]](s20), [[AIE_ZEXT_EXTRACT_VECTOR_ELT]](s20) + ; CHECK-NEXT: $p0 = COPY [[INT]](p0) + %0:_(s32) = G_CONSTANT i32 7 + %1:_(s32) = G_CONSTANT i32 0 + %2:_(<16 x s32>) = COPY $x0 + %3:_(s32) = G_INTRINSIC intrinsic(@llvm.aie2.vextract.elem32.I512), %2(<16 x s32>), %0(s32), %1(s32) + %4:_(s20) = G_TRUNC %3(s32) + %5:_(p0) = G_CONSTANT i20 0 + %6:_(p0), %7:_(s20) = G_INTRINSIC intrinsic(@llvm.aie2.add.2d), %5:_(p0), %4:_(s20), %4:_(s20), %4:_(s20), %4:_(s20) + $p0 = COPY %6 +... + +# Only one Src Node (vextract8) for G_PTR_ADD that is narrowed to S20 type, intermediate G_TRUNC is removed +--- +name: valid_vextract8_PTR_ADD +legalized: false +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: valid_vextract8_PTR_ADD + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.1(0x80000000) + ; CHECK-NEXT: liveins: $p0, $x0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $p0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<64 x s8>) = COPY $x0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.1(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[AIE_SEXT_EXTRACT_VECTOR_ELT:%[0-9]+]]:_(s20) = G_AIE_SEXT_EXTRACT_VECTOR_ELT [[COPY1]](<64 x s8>), [[C]](s32) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[AIE_SEXT_EXTRACT_VECTOR_ELT]](s20) + ; CHECK-NEXT: G_STORE [[C]](s32), [[PTR_ADD]](p0) :: (store (s32)) + ; CHECK-NEXT: G_BR %bb.1 + bb.1: + successors: %bb.2(0x80000000); %bb.2(100.00%) + liveins: $p0, $x0 + %0:_(p0) = COPY $p0 + %1:_(s32) = G_CONSTANT i32 0 + %2:_(s32) = G_CONSTANT i32 1 + %3:_(<64 x s8>) = COPY $x0 + %4:_(s32) = G_INTRINSIC intrinsic(@llvm.aie2.vextract.elem8.I512), %3(<64 x s8>), %1(s32), %2(s32) + + bb.2: + successors: %bb.2(0x80000000); %bb.2(100.00%) + + %5:_(s20) = G_TRUNC %4:_(s32) + %6:_(p0) = G_PTR_ADD %0:_, %5:_(s20) + G_STORE %1:_(s32), %6:_(p0) :: (store (s32)) + G_BR %bb.2 +... + +# Only one Src Node (vextract16) for G_PTR_ADD that is narrowed to S20 type, intermediate G_TRUNC is removed +--- +name: valid_vextract16_PTR_ADD +legalized: false +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: valid_vextract16_PTR_ADD + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.1(0x80000000) + ; CHECK-NEXT: liveins: $p0, $x0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $p0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<32 x s16>) = COPY $x0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.1(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[AIE_SEXT_EXTRACT_VECTOR_ELT:%[0-9]+]]:_(s20) = G_AIE_SEXT_EXTRACT_VECTOR_ELT [[COPY1]](<32 x s16>), [[C]](s32) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[AIE_SEXT_EXTRACT_VECTOR_ELT]](s20) + ; CHECK-NEXT: G_STORE [[C]](s32), [[PTR_ADD]](p0) :: (store (s32)) + ; CHECK-NEXT: G_BR %bb.1 + bb.1: + successors: %bb.2(0x80000000); %bb.2(100.00%) + liveins: $p0, $x0 + %0:_(p0) = COPY $p0 + %1:_(s32) = G_CONSTANT i32 0 + %2:_(s32) = G_CONSTANT i32 1 + %3:_(<32 x s16>) = COPY $x0 + %4:_(s32) = G_INTRINSIC intrinsic(@llvm.aie2.vextract.elem16.I512), %3(<32 x s16>), %1(s32), %2(s32) + + bb.2: + successors: %bb.2(0x80000000); %bb.2(100.00%) + + %5:_(s20) = G_TRUNC %4:_(s32) + %6:_(p0) = G_PTR_ADD %0:_, %5:_(s20) + G_STORE %1:_(s32), %6:_(p0) :: (store (s32)) + G_BR %bb.2 +... + +# Only one Src Node (vextract32) for G_PTR_ADD that is narrowed to S20 type, intermediate G_TRUNC is removed +--- +name: valid_vextract32_PTR_ADD +legalized: false +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: valid_vextract32_PTR_ADD + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.1(0x80000000) + ; CHECK-NEXT: liveins: $p0, $x0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $p0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<16 x s32>) = COPY $x0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.1(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[AIE_SEXT_EXTRACT_VECTOR_ELT:%[0-9]+]]:_(s20) = G_AIE_SEXT_EXTRACT_VECTOR_ELT [[COPY1]](<16 x s32>), [[C]](s32) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[AIE_SEXT_EXTRACT_VECTOR_ELT]](s20) + ; CHECK-NEXT: G_STORE [[C]](s32), [[PTR_ADD]](p0) :: (store (s32)) + ; CHECK-NEXT: G_BR %bb.1 + bb.1: + successors: %bb.2(0x80000000); %bb.2(100.00%) + liveins: $p0, $x0 + %0:_(p0) = COPY $p0 + %1:_(s32) = G_CONSTANT i32 0 + %2:_(s32) = G_CONSTANT i32 1 + %3:_(<16 x s32>) = COPY $x0 + %4:_(s32) = G_INTRINSIC intrinsic(@llvm.aie2.vextract.elem32.I512), %3(<16 x s32>), %1(s32), %2(s32) + + bb.2: + successors: %bb.2(0x80000000); %bb.2(100.00%) + + %5:_(s20) = G_TRUNC %4:_(s32) + %6:_(p0) = G_PTR_ADD %0:_, %5:_(s20) + G_STORE %1:_(s32), %6:_(p0) :: (store (s32)) + G_BR %bb.2 +... + +# Negative Test Case: Narrowing to s20 is not possible because the vextract8 source node has a non-constant sign register +--- +name: valid_vextract8_add2d_neg +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x0, $r0 + + ; CHECK-LABEL: name: valid_vextract8_add2d_neg + ; CHECK: liveins: $x0, $r0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 7 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $r0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<64 x s8>) = COPY $x0 + ; CHECK-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.aie2.vextract.elem8.I512), [[COPY1]](<64 x s8>), [[C]](s32), [[COPY]](s32) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s20) = G_TRUNC [[INT]](s32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(p0) = G_CONSTANT i20 0 + ; CHECK-NEXT: [[INT1:%[0-9]+]]:_(p0), [[INT2:%[0-9]+]]:_(s20) = G_INTRINSIC intrinsic(@llvm.aie2.add.2d), [[C1]](p0), [[TRUNC]](s20), [[TRUNC]](s20), [[TRUNC]](s20), [[TRUNC]](s20) + ; CHECK-NEXT: $p0 = COPY [[INT1]](p0) + %0:_(s32) = G_CONSTANT i32 7 + %1:_(s32) = COPY $r0 + %2:_(<64 x s8>) = COPY $x0 + %3:_(s32) = G_INTRINSIC intrinsic(@llvm.aie2.vextract.elem8.I512), %2(<64 x s8>), %0(s32), %1(s32) + %4:_(s20) = G_TRUNC %3(s32) + %5:_(p0) = G_CONSTANT i20 0 + %6:_(p0), %7:_(s20) = G_INTRINSIC intrinsic(@llvm.aie2.add.2d), %5:_(p0), %4:_(s20), %4:_(s20), %4:_(s20), %4:_(s20) + $p0 = COPY %6 +... + +# Negative Test Case: Narrowing to s20 is not possible because the vextract16 source node has a non-constant sign register +--- +name: valid_vextract16_add2d_neg +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x0, $r0 + + ; CHECK-LABEL: name: valid_vextract16_add2d_neg + ; CHECK: liveins: $x0, $r0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 7 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $r0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<32 x s16>) = COPY $x0 + ; CHECK-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.aie2.vextract.elem16.I512), [[COPY1]](<32 x s16>), [[C]](s32), [[COPY]](s32) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s20) = G_TRUNC [[INT]](s32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(p0) = G_CONSTANT i20 0 + ; CHECK-NEXT: [[INT1:%[0-9]+]]:_(p0), [[INT2:%[0-9]+]]:_(s20) = G_INTRINSIC intrinsic(@llvm.aie2.add.2d), [[C1]](p0), [[TRUNC]](s20), [[TRUNC]](s20), [[TRUNC]](s20), [[TRUNC]](s20) + ; CHECK-NEXT: $p0 = COPY [[INT1]](p0) + %0:_(s32) = G_CONSTANT i32 7 + %1:_(s32) = COPY $r0 + %2:_(<32 x s16>) = COPY $x0 + %3:_(s32) = G_INTRINSIC intrinsic(@llvm.aie2.vextract.elem16.I512), %2(<32 x s16>), %0(s32), %1(s32) + %4:_(s20) = G_TRUNC %3(s32) + %5:_(p0) = G_CONSTANT i20 0 + %6:_(p0), %7:_(s20) = G_INTRINSIC intrinsic(@llvm.aie2.add.2d), %5:_(p0), %4:_(s20), %4:_(s20), %4:_(s20), %4:_(s20) + $p0 = COPY %6 +... + +# Negative Test Case: Narrowing to s20 is not possible because the vextract32 source node has a non-constant sign register +--- +name: valid_vextract32_add2d_neg +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x0, $r0 + + ; CHECK-LABEL: name: valid_vextract32_add2d_neg + ; CHECK: liveins: $x0, $r0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 7 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $r0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<16 x s32>) = COPY $x0 + ; CHECK-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.aie2.vextract.elem32.I512), [[COPY1]](<16 x s32>), [[C]](s32), [[COPY]](s32) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s20) = G_TRUNC [[INT]](s32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(p0) = G_CONSTANT i20 0 + ; CHECK-NEXT: [[INT1:%[0-9]+]]:_(p0), [[INT2:%[0-9]+]]:_(s20) = G_INTRINSIC intrinsic(@llvm.aie2.add.2d), [[C1]](p0), [[TRUNC]](s20), [[TRUNC]](s20), [[TRUNC]](s20), [[TRUNC]](s20) + ; CHECK-NEXT: $p0 = COPY [[INT1]](p0) + %0:_(s32) = G_CONSTANT i32 7 + %1:_(s32) = COPY $r0 + %2:_(<16 x s32>) = COPY $x0 + %3:_(s32) = G_INTRINSIC intrinsic(@llvm.aie2.vextract.elem32.I512), %2(<16 x s32>), %0(s32), %1(s32) + %4:_(s20) = G_TRUNC %3(s32) + %5:_(p0) = G_CONSTANT i20 0 + %6:_(p0), %7:_(s20) = G_INTRINSIC intrinsic(@llvm.aie2.add.2d), %5:_(p0), %4:_(s20), %4:_(s20), %4:_(s20), %4:_(s20) + $p0 = COPY %6 +... diff --git a/llvm/test/CodeGen/AIE/aie2/verifier/verify-szext-extract-vec-elt.mir b/llvm/test/CodeGen/AIE/aie2/verifier/verify-szext-extract-vec-elt.mir index b9f489914d56..b56fa14667f8 100644 --- a/llvm/test/CodeGen/AIE/aie2/verifier/verify-szext-extract-vec-elt.mir +++ b/llvm/test/CodeGen/AIE/aie2/verifier/verify-szext-extract-vec-elt.mir @@ -18,6 +18,8 @@ body: | %1:_(s32) = G_CONSTANT i32 1 %2:_(s32) = G_AIE_SEXT_EXTRACT_VECTOR_ELT %0(<16 x s16>), %1(s32) %3:_(s32) = G_AIE_ZEXT_EXTRACT_VECTOR_ELT %0(<16 x s16>), %1(s32) + %4:_(s20) = G_AIE_SEXT_EXTRACT_VECTOR_ELT %0(<16 x s16>), %1(s32) + %5:_(s20) = G_AIE_ZEXT_EXTRACT_VECTOR_ELT %0(<16 x s16>), %1(s32) ... --- @@ -25,7 +27,7 @@ name: nok alignment: 16 body: | bb.0 (align 16): - ; CHECK-COUNT-4: Bad machine code: Expected 32bit scalar destination + ; CHECK-COUNT-4: Bad machine code: Expected 32bit or 20bit scalar destination ; CHECK-NOT: Bad machine code %0:_(<16 x s16>) = COPY $wl0 %1:_(s32) = G_CONSTANT i32 1