Skip to content

Commit

Permalink
Support for allowing direct VEXTRACT to 20-bit registers
Browse files Browse the repository at this point in the history
  • Loading branch information
abhinay-anubola committed Nov 8, 2024
1 parent 1b74ec6 commit 650d8a9
Show file tree
Hide file tree
Showing 4 changed files with 340 additions and 4 deletions.
5 changes: 3 additions & 2 deletions llvm/lib/Target/AIE/AIE2InstrInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -152,8 +152,9 @@ bool AIE2InstrInfo::verifyGenericInstruction(const MachineInstr &MI,
switch (MI.getOpcode()) {
case AIE2::G_AIE_ZEXT_EXTRACT_VECTOR_ELT:
case AIE2::G_AIE_SEXT_EXTRACT_VECTOR_ELT:
ErrInfo = "Expected 32bit scalar destination";
return MRI.getType(MI.getOperand(0).getReg()) == LLT::scalar(32);
ErrInfo = "Expected 32bit or 20bit scalar destination";
return (MRI.getType(MI.getOperand(0).getReg()) == LLT::scalar(32) ||
MRI.getType(MI.getOperand(0).getReg()) == LLT::scalar(20));
case AIE2::G_AIE_PAD_VECTOR_UNDEF:
return verifySameLaneTypes(MI, ErrInfo) &&
isLegalTypeToUnpad(MRI.getType(MI.getOperand(0).getReg()),
Expand Down
44 changes: 43 additions & 1 deletion llvm/lib/Target/AIE/AIECombinerHelper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -567,6 +567,19 @@ void llvm::applyGlobalValOffset(MachineInstr &MI, MachineRegisterInfo &MRI,
B.buildConstant(LLT::scalar(20), -static_cast<int64_t>(Offset)));
}

/// Check if the Intrinsic can produce S20
static bool isS20Intrinsic(const MachineInstr &MI,
const MachineRegisterInfo &MRI) {
const unsigned IntrinsicID = cast<GIntrinsic>(MI).getIntrinsicID();
if (IntrinsicID == Intrinsic::aie2_vextract_elem8_I512 ||
IntrinsicID == Intrinsic::aie2_vextract_elem16_I512 ||
IntrinsicID == Intrinsic::aie2_vextract_elem32_I512) {
// Check if the sign value is constant
return getIConstantVRegSExtVal(MI.getOperand(4).getReg(), MRI).has_value();
}
return false;
}

/// Checks whether the instruction produces or can be adapted to produce
/// a single S20 output.
static bool canProduceS20(const MachineRegisterInfo &MRI,
Expand All @@ -581,6 +594,8 @@ static bool canProduceS20(const MachineRegisterInfo &MRI,
case TargetOpcode::G_CONSTANT:
case TargetOpcode::G_IMPLICIT_DEF:
return true;
case TargetOpcode::G_INTRINSIC:
return isS20Intrinsic(MI, MRI);
default:
return false;
}
Expand Down Expand Up @@ -901,6 +916,31 @@ bool modifyToS20(InstrNode Start, MachineRegisterInfo &MRI, MachineIRBuilder &B,
Helper.tryCombineCopy(*StartNodeMI);
return true;
}
case TargetOpcode::G_INTRINSIC: {
if (isS20Intrinsic(*StartNodeMI, MRI)) {
Register ExtractDstReg = StartNodeMI->getOperand(0).getReg();
// Note: Operand 1 is the ID of the intrinsic
const Register SrcReg0 = StartNodeMI->getOperand(2).getReg();
const Register SrcReg1 = StartNodeMI->getOperand(3).getReg();
const Register SignReg = StartNodeMI->getOperand(4).getReg();

auto SignVal = getIConstantVRegSExtVal(SignReg, MRI);
assert(SignVal.has_value() && "Expected SignVal to be constant");
// Erase the original instruction and set the type for the destination
// register
Helper.eraseInst(*StartNodeMI);
MRI.setType(ExtractDstReg, S20);
const unsigned Opcode = SignVal.value()
? AIE2::G_AIE_SEXT_EXTRACT_VECTOR_ELT
: AIE2::G_AIE_ZEXT_EXTRACT_VECTOR_ELT;
StartNodeMI =
B.buildInstr(Opcode, {ExtractDstReg}, {SrcReg0, SrcReg1}).getInstr();
} else {
LLVM_DEBUG(dbgs() << "Node :" << *StartNodeMI);
llvm_unreachable("Unexpected OpCode, while modifying IR");
}
break;
}
default: {
LLVM_DEBUG(dbgs() << "Node :" << *StartNodeMI);
llvm_unreachable("Unexpected OpCode, while modifying IR");
Expand All @@ -910,7 +950,9 @@ bool modifyToS20(InstrNode Start, MachineRegisterInfo &MRI, MachineIRBuilder &B,
switch (StartNodeMI->getOpcode()) {
case TargetOpcode::COPY:
case TargetOpcode::G_LOAD:
case TargetOpcode::G_PHI: {
case TargetOpcode::G_PHI:
case AIE2::G_AIE_ZEXT_EXTRACT_VECTOR_ELT:
case AIE2::G_AIE_SEXT_EXTRACT_VECTOR_ELT: {
const auto UseInstIter =
MRI.use_nodbg_instructions(StartNodeMI->getOperand(0).getReg());
std::vector<MachineInstr *> UseInstr;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -781,3 +781,294 @@ body: |
$r0 = COPY %11
G_BR %bb.2
...

---
name: valid_vextract8_add2d
legalized: false
tracksRegLiveness: true
body: |
bb.0.entry:
liveins: $x0
; CHECK-LABEL: name: valid_vextract8_add2d
; CHECK: liveins: $x0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 7
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<64 x s8>) = COPY $x0
; CHECK-NEXT: [[C1:%[0-9]+]]:_(p0) = G_CONSTANT i20 0
; CHECK-NEXT: [[AIE_ZEXT_EXTRACT_VECTOR_ELT:%[0-9]+]]:_(s20) = G_AIE_ZEXT_EXTRACT_VECTOR_ELT [[COPY]](<64 x s8>), [[C]](s32)
; CHECK-NEXT: [[INT:%[0-9]+]]:_(p0), [[INT1:%[0-9]+]]:_(s20) = G_INTRINSIC intrinsic(@llvm.aie2.add.2d), [[C1]](p0), [[AIE_ZEXT_EXTRACT_VECTOR_ELT]](s20), [[AIE_ZEXT_EXTRACT_VECTOR_ELT]](s20), [[AIE_ZEXT_EXTRACT_VECTOR_ELT]](s20), [[AIE_ZEXT_EXTRACT_VECTOR_ELT]](s20)
; CHECK-NEXT: $p0 = COPY [[INT]](p0)
%0:_(s32) = G_CONSTANT i32 7
%1:_(s32) = G_CONSTANT i32 0
%2:_(<64 x s8>) = COPY $x0
%3:_(s32) = G_INTRINSIC intrinsic(@llvm.aie2.vextract.elem8.I512), %2(<64 x s8>), %0(s32), %1(s32)
%4:_(s20) = G_TRUNC %3(s32)
%5:_(p0) = G_CONSTANT i20 0
%6:_(p0), %7:_(s20) = G_INTRINSIC intrinsic(@llvm.aie2.add.2d), %5:_(p0), %4:_(s20), %4:_(s20), %4:_(s20), %4:_(s20)
$p0 = COPY %6
...

---
name: valid_vextract16_add2d
legalized: false
tracksRegLiveness: true
body: |
bb.0.entry:
liveins: $x0
; CHECK-LABEL: name: valid_vextract16_add2d
; CHECK: liveins: $x0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 7
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<32 x s16>) = COPY $x0
; CHECK-NEXT: [[C1:%[0-9]+]]:_(p0) = G_CONSTANT i20 0
; CHECK-NEXT: [[AIE_ZEXT_EXTRACT_VECTOR_ELT:%[0-9]+]]:_(s20) = G_AIE_ZEXT_EXTRACT_VECTOR_ELT [[COPY]](<32 x s16>), [[C]](s32)
; CHECK-NEXT: [[INT:%[0-9]+]]:_(p0), [[INT1:%[0-9]+]]:_(s20) = G_INTRINSIC intrinsic(@llvm.aie2.add.2d), [[C1]](p0), [[AIE_ZEXT_EXTRACT_VECTOR_ELT]](s20), [[AIE_ZEXT_EXTRACT_VECTOR_ELT]](s20), [[AIE_ZEXT_EXTRACT_VECTOR_ELT]](s20), [[AIE_ZEXT_EXTRACT_VECTOR_ELT]](s20)
; CHECK-NEXT: $p0 = COPY [[INT]](p0)
%0:_(s32) = G_CONSTANT i32 7
%1:_(s32) = G_CONSTANT i32 0
%2:_(<32 x s16>) = COPY $x0
%3:_(s32) = G_INTRINSIC intrinsic(@llvm.aie2.vextract.elem16.I512), %2(<32 x s16>), %0(s32), %1(s32)
%4:_(s20) = G_TRUNC %3(s32)
%5:_(p0) = G_CONSTANT i20 0
%6:_(p0), %7:_(s20) = G_INTRINSIC intrinsic(@llvm.aie2.add.2d), %5:_(p0), %4:_(s20), %4:_(s20), %4:_(s20), %4:_(s20)
$p0 = COPY %6
...

---
name: valid_vextract32_add2d
legalized: false
tracksRegLiveness: true
body: |
bb.0.entry:
liveins: $x0
; CHECK-LABEL: name: valid_vextract32_add2d
; CHECK: liveins: $x0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 7
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<16 x s32>) = COPY $x0
; CHECK-NEXT: [[C1:%[0-9]+]]:_(p0) = G_CONSTANT i20 0
; CHECK-NEXT: [[AIE_ZEXT_EXTRACT_VECTOR_ELT:%[0-9]+]]:_(s20) = G_AIE_ZEXT_EXTRACT_VECTOR_ELT [[COPY]](<16 x s32>), [[C]](s32)
; CHECK-NEXT: [[INT:%[0-9]+]]:_(p0), [[INT1:%[0-9]+]]:_(s20) = G_INTRINSIC intrinsic(@llvm.aie2.add.2d), [[C1]](p0), [[AIE_ZEXT_EXTRACT_VECTOR_ELT]](s20), [[AIE_ZEXT_EXTRACT_VECTOR_ELT]](s20), [[AIE_ZEXT_EXTRACT_VECTOR_ELT]](s20), [[AIE_ZEXT_EXTRACT_VECTOR_ELT]](s20)
; CHECK-NEXT: $p0 = COPY [[INT]](p0)
%0:_(s32) = G_CONSTANT i32 7
%1:_(s32) = G_CONSTANT i32 0
%2:_(<16 x s32>) = COPY $x0
%3:_(s32) = G_INTRINSIC intrinsic(@llvm.aie2.vextract.elem32.I512), %2(<16 x s32>), %0(s32), %1(s32)
%4:_(s20) = G_TRUNC %3(s32)
%5:_(p0) = G_CONSTANT i20 0
%6:_(p0), %7:_(s20) = G_INTRINSIC intrinsic(@llvm.aie2.add.2d), %5:_(p0), %4:_(s20), %4:_(s20), %4:_(s20), %4:_(s20)
$p0 = COPY %6
...

# Only one Src Node (vextract8) for G_PTR_ADD that is narrowed to S20 type, intermediate G_TRUNC is removed
---
name: valid_vextract8_PTR_ADD
legalized: false
tracksRegLiveness: true
body: |
; CHECK-LABEL: name: valid_vextract8_PTR_ADD
; CHECK: bb.0:
; CHECK-NEXT: successors: %bb.1(0x80000000)
; CHECK-NEXT: liveins: $p0, $x0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $p0
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<64 x s8>) = COPY $x0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1:
; CHECK-NEXT: successors: %bb.1(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[AIE_SEXT_EXTRACT_VECTOR_ELT:%[0-9]+]]:_(s20) = G_AIE_SEXT_EXTRACT_VECTOR_ELT [[COPY1]](<64 x s8>), [[C]](s32)
; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[AIE_SEXT_EXTRACT_VECTOR_ELT]](s20)
; CHECK-NEXT: G_STORE [[C]](s32), [[PTR_ADD]](p0) :: (store (s32))
; CHECK-NEXT: G_BR %bb.1
bb.1:
successors: %bb.2(0x80000000); %bb.2(100.00%)
liveins: $p0, $x0
%0:_(p0) = COPY $p0
%1:_(s32) = G_CONSTANT i32 0
%2:_(s32) = G_CONSTANT i32 1
%3:_(<64 x s8>) = COPY $x0
%4:_(s32) = G_INTRINSIC intrinsic(@llvm.aie2.vextract.elem8.I512), %3(<64 x s8>), %1(s32), %2(s32)
bb.2:
successors: %bb.2(0x80000000); %bb.2(100.00%)
%5:_(s20) = G_TRUNC %4:_(s32)
%6:_(p0) = G_PTR_ADD %0:_, %5:_(s20)
G_STORE %1:_(s32), %6:_(p0) :: (store (s32))
G_BR %bb.2
...

# Only one Src Node (vextract16) for G_PTR_ADD that is narrowed to S20 type, intermediate G_TRUNC is removed
---
name: valid_vextract16_PTR_ADD
legalized: false
tracksRegLiveness: true
body: |
; CHECK-LABEL: name: valid_vextract16_PTR_ADD
; CHECK: bb.0:
; CHECK-NEXT: successors: %bb.1(0x80000000)
; CHECK-NEXT: liveins: $p0, $x0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $p0
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<32 x s16>) = COPY $x0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1:
; CHECK-NEXT: successors: %bb.1(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[AIE_SEXT_EXTRACT_VECTOR_ELT:%[0-9]+]]:_(s20) = G_AIE_SEXT_EXTRACT_VECTOR_ELT [[COPY1]](<32 x s16>), [[C]](s32)
; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[AIE_SEXT_EXTRACT_VECTOR_ELT]](s20)
; CHECK-NEXT: G_STORE [[C]](s32), [[PTR_ADD]](p0) :: (store (s32))
; CHECK-NEXT: G_BR %bb.1
bb.1:
successors: %bb.2(0x80000000); %bb.2(100.00%)
liveins: $p0, $x0
%0:_(p0) = COPY $p0
%1:_(s32) = G_CONSTANT i32 0
%2:_(s32) = G_CONSTANT i32 1
%3:_(<32 x s16>) = COPY $x0
%4:_(s32) = G_INTRINSIC intrinsic(@llvm.aie2.vextract.elem16.I512), %3(<32 x s16>), %1(s32), %2(s32)
bb.2:
successors: %bb.2(0x80000000); %bb.2(100.00%)
%5:_(s20) = G_TRUNC %4:_(s32)
%6:_(p0) = G_PTR_ADD %0:_, %5:_(s20)
G_STORE %1:_(s32), %6:_(p0) :: (store (s32))
G_BR %bb.2
...

# Only one Src Node (vextract32) for G_PTR_ADD that is narrowed to S20 type, intermediate G_TRUNC is removed
---
name: valid_vextract32_PTR_ADD
legalized: false
tracksRegLiveness: true
body: |
; CHECK-LABEL: name: valid_vextract32_PTR_ADD
; CHECK: bb.0:
; CHECK-NEXT: successors: %bb.1(0x80000000)
; CHECK-NEXT: liveins: $p0, $x0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $p0
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<16 x s32>) = COPY $x0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1:
; CHECK-NEXT: successors: %bb.1(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[AIE_SEXT_EXTRACT_VECTOR_ELT:%[0-9]+]]:_(s20) = G_AIE_SEXT_EXTRACT_VECTOR_ELT [[COPY1]](<16 x s32>), [[C]](s32)
; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[AIE_SEXT_EXTRACT_VECTOR_ELT]](s20)
; CHECK-NEXT: G_STORE [[C]](s32), [[PTR_ADD]](p0) :: (store (s32))
; CHECK-NEXT: G_BR %bb.1
bb.1:
successors: %bb.2(0x80000000); %bb.2(100.00%)
liveins: $p0, $x0
%0:_(p0) = COPY $p0
%1:_(s32) = G_CONSTANT i32 0
%2:_(s32) = G_CONSTANT i32 1
%3:_(<16 x s32>) = COPY $x0
%4:_(s32) = G_INTRINSIC intrinsic(@llvm.aie2.vextract.elem32.I512), %3(<16 x s32>), %1(s32), %2(s32)
bb.2:
successors: %bb.2(0x80000000); %bb.2(100.00%)
%5:_(s20) = G_TRUNC %4:_(s32)
%6:_(p0) = G_PTR_ADD %0:_, %5:_(s20)
G_STORE %1:_(s32), %6:_(p0) :: (store (s32))
G_BR %bb.2
...

# Negative Test Case: Narrowing to s20 is not possible because the vextract8 source node has a non-constant sign register
---
name: valid_vextract8_add2d_neg
legalized: false
tracksRegLiveness: true
body: |
bb.0.entry:
liveins: $x0, $r0
; CHECK-LABEL: name: valid_vextract8_add2d_neg
; CHECK: liveins: $x0, $r0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 7
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $r0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<64 x s8>) = COPY $x0
; CHECK-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.aie2.vextract.elem8.I512), [[COPY1]](<64 x s8>), [[C]](s32), [[COPY]](s32)
; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s20) = G_TRUNC [[INT]](s32)
; CHECK-NEXT: [[C1:%[0-9]+]]:_(p0) = G_CONSTANT i20 0
; CHECK-NEXT: [[INT1:%[0-9]+]]:_(p0), [[INT2:%[0-9]+]]:_(s20) = G_INTRINSIC intrinsic(@llvm.aie2.add.2d), [[C1]](p0), [[TRUNC]](s20), [[TRUNC]](s20), [[TRUNC]](s20), [[TRUNC]](s20)
; CHECK-NEXT: $p0 = COPY [[INT1]](p0)
%0:_(s32) = G_CONSTANT i32 7
%1:_(s32) = COPY $r0
%2:_(<64 x s8>) = COPY $x0
%3:_(s32) = G_INTRINSIC intrinsic(@llvm.aie2.vextract.elem8.I512), %2(<64 x s8>), %0(s32), %1(s32)
%4:_(s20) = G_TRUNC %3(s32)
%5:_(p0) = G_CONSTANT i20 0
%6:_(p0), %7:_(s20) = G_INTRINSIC intrinsic(@llvm.aie2.add.2d), %5:_(p0), %4:_(s20), %4:_(s20), %4:_(s20), %4:_(s20)
$p0 = COPY %6
...

# Negative Test Case: Narrowing to s20 is not possible because the vextract16 source node has a non-constant sign register
---
name: valid_vextract16_add2d_neg
legalized: false
tracksRegLiveness: true
body: |
bb.0.entry:
liveins: $x0, $r0
; CHECK-LABEL: name: valid_vextract16_add2d_neg
; CHECK: liveins: $x0, $r0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 7
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $r0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<32 x s16>) = COPY $x0
; CHECK-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.aie2.vextract.elem16.I512), [[COPY1]](<32 x s16>), [[C]](s32), [[COPY]](s32)
; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s20) = G_TRUNC [[INT]](s32)
; CHECK-NEXT: [[C1:%[0-9]+]]:_(p0) = G_CONSTANT i20 0
; CHECK-NEXT: [[INT1:%[0-9]+]]:_(p0), [[INT2:%[0-9]+]]:_(s20) = G_INTRINSIC intrinsic(@llvm.aie2.add.2d), [[C1]](p0), [[TRUNC]](s20), [[TRUNC]](s20), [[TRUNC]](s20), [[TRUNC]](s20)
; CHECK-NEXT: $p0 = COPY [[INT1]](p0)
%0:_(s32) = G_CONSTANT i32 7
%1:_(s32) = COPY $r0
%2:_(<32 x s16>) = COPY $x0
%3:_(s32) = G_INTRINSIC intrinsic(@llvm.aie2.vextract.elem16.I512), %2(<32 x s16>), %0(s32), %1(s32)
%4:_(s20) = G_TRUNC %3(s32)
%5:_(p0) = G_CONSTANT i20 0
%6:_(p0), %7:_(s20) = G_INTRINSIC intrinsic(@llvm.aie2.add.2d), %5:_(p0), %4:_(s20), %4:_(s20), %4:_(s20), %4:_(s20)
$p0 = COPY %6
...

# Negative Test Case: Narrowing to s20 is not possible because the vextract32 source node has a non-constant sign register
---
name: valid_vextract32_add2d_neg
legalized: false
tracksRegLiveness: true
body: |
bb.0.entry:
liveins: $x0, $r0
; CHECK-LABEL: name: valid_vextract32_add2d_neg
; CHECK: liveins: $x0, $r0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 7
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $r0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<16 x s32>) = COPY $x0
; CHECK-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.aie2.vextract.elem32.I512), [[COPY1]](<16 x s32>), [[C]](s32), [[COPY]](s32)
; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s20) = G_TRUNC [[INT]](s32)
; CHECK-NEXT: [[C1:%[0-9]+]]:_(p0) = G_CONSTANT i20 0
; CHECK-NEXT: [[INT1:%[0-9]+]]:_(p0), [[INT2:%[0-9]+]]:_(s20) = G_INTRINSIC intrinsic(@llvm.aie2.add.2d), [[C1]](p0), [[TRUNC]](s20), [[TRUNC]](s20), [[TRUNC]](s20), [[TRUNC]](s20)
; CHECK-NEXT: $p0 = COPY [[INT1]](p0)
%0:_(s32) = G_CONSTANT i32 7
%1:_(s32) = COPY $r0
%2:_(<16 x s32>) = COPY $x0
%3:_(s32) = G_INTRINSIC intrinsic(@llvm.aie2.vextract.elem32.I512), %2(<16 x s32>), %0(s32), %1(s32)
%4:_(s20) = G_TRUNC %3(s32)
%5:_(p0) = G_CONSTANT i20 0
%6:_(p0), %7:_(s20) = G_INTRINSIC intrinsic(@llvm.aie2.add.2d), %5:_(p0), %4:_(s20), %4:_(s20), %4:_(s20), %4:_(s20)
$p0 = COPY %6
...
Original file line number Diff line number Diff line change
Expand Up @@ -18,14 +18,16 @@ body: |
%1:_(s32) = G_CONSTANT i32 1
%2:_(s32) = G_AIE_SEXT_EXTRACT_VECTOR_ELT %0(<16 x s16>), %1(s32)
%3:_(s32) = G_AIE_ZEXT_EXTRACT_VECTOR_ELT %0(<16 x s16>), %1(s32)
%4:_(s20) = G_AIE_SEXT_EXTRACT_VECTOR_ELT %0(<16 x s16>), %1(s32)
%5:_(s20) = G_AIE_ZEXT_EXTRACT_VECTOR_ELT %0(<16 x s16>), %1(s32)
...

---
name: nok
alignment: 16
body: |
bb.0 (align 16):
; CHECK-COUNT-4: Bad machine code: Expected 32bit scalar destination
; CHECK-COUNT-4: Bad machine code: Expected 32bit or 20bit scalar destination
; CHECK-NOT: Bad machine code
%0:_(<16 x s16>) = COPY $wl0
%1:_(s32) = G_CONSTANT i32 1
Expand Down

0 comments on commit 650d8a9

Please sign in to comment.