Skip to content

Commit

Permalink
[GlobalISel][AArch64] Legalize G_ADD, G_SUB, G_AND, G_OR, and G_XOR f…
Browse files Browse the repository at this point in the history
…or SVE (llvm#110561)

Credits: llvm#72976

LLVM ERROR: cannot select: %3:zpr(<vscale x 2 x s64>) = G_MUL %0:fpr,
%1:fpr (in function: xmulnxv2i64)

;; mul
define void @xmulnxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b,
ptr %p) {
entry:
  %c = mul <vscale x 2 x i64> %a, %b
  store <vscale x 2 x i64> %c, ptr %p, align 16
   ret void
}

define void @mulnxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b,
ptr %p) {
entry:
  %c = mul <vscale x 4 x i32> %a, %b
  store <vscale x 4 x i32> %c, ptr %p, align 16
   ret void
}

define void @mulnxv8i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b,
ptr %p) {
entry:
  %c = mul <vscale x 8 x i16> %a, %b
  store <vscale x 8 x i16> %c, ptr %p, align 16
  ret void
}

define void @mulnxv16i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b,
ptr %p) {
entry:
  %c = mul <vscale x 16 x i8> %a, %b
  store <vscale x 16 x i8> %c, ptr %p, align 16
  ret void
}
  • Loading branch information
tschuett authored Oct 27, 2024
1 parent 5621929 commit 7b3da7b
Show file tree
Hide file tree
Showing 7 changed files with 257 additions and 10 deletions.
12 changes: 7 additions & 5 deletions llvm/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -998,8 +998,7 @@ class LegalizeRuleSet {
LegalizeAction::WidenScalar,
[=](const LegalityQuery &Query) {
const LLT VecTy = Query.Types[TypeIdx];
return VecTy.isVector() && !VecTy.isScalable() &&
VecTy.getSizeInBits() < VectorSize;
return VecTy.isFixedVector() && VecTy.getSizeInBits() < VectorSize;
},
[=](const LegalityQuery &Query) {
const LLT VecTy = Query.Types[TypeIdx];
Expand Down Expand Up @@ -1172,7 +1171,7 @@ class LegalizeRuleSet {
LegalizeAction::MoreElements,
[=](const LegalityQuery &Query) {
LLT VecTy = Query.Types[TypeIdx];
return VecTy.isVector() && VecTy.getElementType() == EltTy &&
return VecTy.isFixedVector() && VecTy.getElementType() == EltTy &&
VecTy.getNumElements() < MinElements;
},
[=](const LegalityQuery &Query) {
Expand All @@ -1190,7 +1189,7 @@ class LegalizeRuleSet {
LegalizeAction::MoreElements,
[=](const LegalityQuery &Query) {
LLT VecTy = Query.Types[TypeIdx];
return VecTy.isVector() && VecTy.getElementType() == EltTy &&
return VecTy.isFixedVector() && VecTy.getElementType() == EltTy &&
(VecTy.getNumElements() % NumElts != 0);
},
[=](const LegalityQuery &Query) {
Expand All @@ -1210,7 +1209,7 @@ class LegalizeRuleSet {
LegalizeAction::FewerElements,
[=](const LegalityQuery &Query) {
LLT VecTy = Query.Types[TypeIdx];
return VecTy.isVector() && VecTy.getElementType() == EltTy &&
return VecTy.isFixedVector() && VecTy.getElementType() == EltTy &&
VecTy.getNumElements() > MaxElements;
},
[=](const LegalityQuery &Query) {
Expand All @@ -1231,6 +1230,9 @@ class LegalizeRuleSet {
assert(MinTy.getElementType() == MaxTy.getElementType() &&
"Expected element types to agree");

assert((!MinTy.isScalableVector() && !MaxTy.isScalableVector()) &&
"Unexpected scalable vectors");

const LLT EltTy = MinTy.getElementType();
return clampMinNumElements(TypeIdx, EltTy, MinTy.getNumElements())
.clampMaxNumElements(TypeIdx, EltTy, MaxTy.getNumElements());
Expand Down
2 changes: 2 additions & 0 deletions llvm/lib/Target/AArch64/AArch64GenRegisterBankInfo.def
Original file line number Diff line number Diff line change
Expand Up @@ -183,6 +183,8 @@ unsigned AArch64GenRegisterBankInfo::getRegBankBaseIdxOffset(unsigned RBIdx,
const unsigned MinSize = Size.getKnownMinValue();
assert((!Size.isScalable() || MinSize >= 128) &&
"Scalable vector types should have size of at least 128 bits");
if (Size.isScalable())
return 3;
if (MinSize <= 16)
return 0;
if (MinSize <= 32)
Expand Down
4 changes: 2 additions & 2 deletions llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -393,8 +393,8 @@ bool AArch64CallLowering::lowerReturn(MachineIRBuilder &MIRBuilder,
// i1 is a special case because SDAG i1 true is naturally zero extended
// when widened using ANYEXT. We need to do it explicitly here.
auto &Flags = CurArgInfo.Flags[0];
if (MRI.getType(CurVReg).getSizeInBits() == 1 && !Flags.isSExt() &&
!Flags.isZExt()) {
if (MRI.getType(CurVReg).getSizeInBits() == TypeSize::getFixed(1) &&
!Flags.isSExt() && !Flags.isZExt()) {
CurVReg = MIRBuilder.buildZExt(LLT::scalar(8), CurVReg).getReg(0);
} else if (TLI.getNumRegistersForCallingConv(Ctx, CC, SplitEVTs[i]) ==
1) {
Expand Down
10 changes: 9 additions & 1 deletion llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -615,6 +615,7 @@ getMinClassForRegBank(const RegisterBank &RB, TypeSize SizeInBits,
unsigned RegBankID = RB.getID();

if (RegBankID == AArch64::GPRRegBankID) {
assert(!SizeInBits.isScalable() && "Unexpected scalable register size");
if (SizeInBits <= 32)
return GetAllRegSet ? &AArch64::GPR32allRegClass
: &AArch64::GPR32RegClass;
Expand All @@ -626,6 +627,12 @@ getMinClassForRegBank(const RegisterBank &RB, TypeSize SizeInBits,
}

if (RegBankID == AArch64::FPRRegBankID) {
if (SizeInBits.isScalable()) {
assert(SizeInBits == TypeSize::getScalable(128) &&
"Unexpected scalable register size");
return &AArch64::ZPRRegClass;
}

switch (SizeInBits) {
default:
return nullptr;
Expand Down Expand Up @@ -964,7 +971,8 @@ getRegClassesForCopy(MachineInstr &I, const TargetInstrInfo &TII,
// then we can pull it into the helpers that get the appropriate class for a
// register bank. Or make a new helper that carries along some constraint
// information.
if (SrcRegBank != DstRegBank && (DstSize == 1 && SrcSize == 1))
if (SrcRegBank != DstRegBank &&
(DstSize == TypeSize::getFixed(1) && SrcSize == TypeSize::getFixed(1)))
SrcSize = DstSize = TypeSize::getFixed(32);

return {getMinClassForRegBank(SrcRegBank, SrcSize, true),
Expand Down
30 changes: 29 additions & 1 deletion llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,7 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)

const bool HasCSSC = ST.hasCSSC();
const bool HasRCPC3 = ST.hasRCPC3();
const bool HasSVE = ST.hasSVE();

getActionDefinitionsBuilder(
{G_IMPLICIT_DEF, G_FREEZE, G_CONSTANT_FOLD_BARRIER})
Expand Down Expand Up @@ -127,7 +128,34 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
.clampNumElements(0, v2s64, v2s64)
.moreElementsToNextPow2(0);

getActionDefinitionsBuilder({G_ADD, G_SUB, G_MUL, G_AND, G_OR, G_XOR})
getActionDefinitionsBuilder({G_ADD, G_SUB, G_AND, G_OR, G_XOR})
.legalFor({s32, s64, v2s32, v2s64, v4s32, v4s16, v8s16, v16s8, v8s8})
.legalFor(HasSVE, {nxv16s8, nxv8s16, nxv4s32, nxv2s64})
.widenScalarToNextPow2(0)
.clampScalar(0, s32, s64)
.clampMaxNumElements(0, s8, 16)
.clampMaxNumElements(0, s16, 8)
.clampNumElements(0, v2s32, v4s32)
.clampNumElements(0, v2s64, v2s64)
.minScalarOrEltIf(
[=](const LegalityQuery &Query) {
return Query.Types[0].getNumElements() <= 2;
},
0, s32)
.minScalarOrEltIf(
[=](const LegalityQuery &Query) {
return Query.Types[0].getNumElements() <= 4;
},
0, s16)
.minScalarOrEltIf(
[=](const LegalityQuery &Query) {
return Query.Types[0].getNumElements() <= 16;
},
0, s8)
.scalarizeIf(scalarOrEltWiderThan(0, 64), 0)
.moreElementsToNextPow2(0);

getActionDefinitionsBuilder(G_MUL)
.legalFor({s32, s64, v2s32, v2s64, v4s32, v4s16, v8s16, v16s8, v8s8})
.widenScalarToNextPow2(0)
.clampScalar(0, s32, s64)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@
# DEBUG-NEXT: .. the first uncovered imm index: 0, OK
#
# DEBUG-NEXT: G_MUL (opcode {{[0-9]+}}): 1 type index, 0 imm indices
# DEBUG-NEXT: .. opcode {{[0-9]+}} is aliased to {{[0-9]+}}
# DEBUG-NEXT: .. the first uncovered type index: 1, OK
# DEBUG-NEXT: .. the first uncovered imm index: 0, OK
#
Expand Down
208 changes: 208 additions & 0 deletions llvm/test/CodeGen/AArch64/GlobalISel/sve-integer.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,208 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3
; RUN: llc < %s -mtriple aarch64 -mattr=+sve -aarch64-enable-gisel-sve=1 | FileCheck %s
; RUN: llc < %s -mtriple aarch64 -mattr=+sve -global-isel -aarch64-enable-gisel-sve=1 | FileCheck %s

;; add
define <vscale x 2 x i64> @addnxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
; CHECK-LABEL: addnxv2i64:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: add z0.d, z0.d, z1.d
; CHECK-NEXT: ret
entry:
%c = add <vscale x 2 x i64> %a, %b
ret <vscale x 2 x i64> %c
}

define <vscale x 4 x i32> @addnxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
; CHECK-LABEL: addnxv4i32:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: add z0.s, z0.s, z1.s
; CHECK-NEXT: ret
entry:
%c = add <vscale x 4 x i32> %a, %b
ret <vscale x 4 x i32> %c
}

define <vscale x 8 x i16> @addnxv8i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, ptr %p) {
; CHECK-LABEL: addnxv8i16:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: add z0.h, z0.h, z1.h
; CHECK-NEXT: ret
entry:
%c = add <vscale x 8 x i16> %a, %b
ret <vscale x 8 x i16> %c
}

define <vscale x 16 x i8> @addnxv16i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
; CHECK-LABEL: addnxv16i8:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: add z0.b, z0.b, z1.b
; CHECK-NEXT: ret
entry:
%c = add <vscale x 16 x i8> %a, %b
ret <vscale x 16 x i8> %c
}

;; sub
define <vscale x 2 x i64> @subnxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
; CHECK-LABEL: subnxv2i64:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: sub z0.d, z0.d, z1.d
; CHECK-NEXT: ret
entry:
%c = sub <vscale x 2 x i64> %a, %b
ret <vscale x 2 x i64> %c
}

define <vscale x 4 x i32> @subnxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
; CHECK-LABEL: subnxv4i32:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: sub z0.s, z0.s, z1.s
; CHECK-NEXT: ret
entry:
%c = sub <vscale x 4 x i32> %a, %b
ret <vscale x 4 x i32> %c
}

define <vscale x 8 x i16> @subnxv8i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, ptr %p) {
; CHECK-LABEL: subnxv8i16:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: sub z0.h, z0.h, z1.h
; CHECK-NEXT: ret
entry:
%c = sub <vscale x 8 x i16> %a, %b
ret <vscale x 8 x i16> %c
}

define <vscale x 16 x i8> @subnxv16i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
; CHECK-LABEL: subnxv16i8:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: sub z0.b, z0.b, z1.b
; CHECK-NEXT: ret
entry:
%c = sub <vscale x 16 x i8> %a, %b
ret <vscale x 16 x i8> %c
}

;; and
define <vscale x 2 x i64> @andnxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
; CHECK-LABEL: andnxv2i64:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: and z0.d, z0.d, z1.d
; CHECK-NEXT: ret
entry:
%c = and <vscale x 2 x i64> %a, %b
ret <vscale x 2 x i64> %c
}

define <vscale x 4 x i32> @andnxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
; CHECK-LABEL: andnxv4i32:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: and z0.d, z0.d, z1.d
; CHECK-NEXT: ret
entry:
%c = and <vscale x 4 x i32> %a, %b
ret <vscale x 4 x i32> %c
}

define <vscale x 8 x i16> @andnxv8i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, ptr %p) {
; CHECK-LABEL: andnxv8i16:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: and z0.d, z0.d, z1.d
; CHECK-NEXT: ret
entry:
%c = and <vscale x 8 x i16> %a, %b
ret <vscale x 8 x i16> %c
}

define <vscale x 16 x i8> @andnxv16i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
; CHECK-LABEL: andnxv16i8:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: and z0.d, z0.d, z1.d
; CHECK-NEXT: ret
entry:
%c = and <vscale x 16 x i8> %a, %b
ret <vscale x 16 x i8> %c
}

;; or
define <vscale x 2 x i64> @ornxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
; CHECK-LABEL: ornxv2i64:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: orr z0.d, z0.d, z1.d
; CHECK-NEXT: ret
entry:
%c = or <vscale x 2 x i64> %a, %b
ret <vscale x 2 x i64> %c
}

define <vscale x 4 x i32> @ornxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
; CHECK-LABEL: ornxv4i32:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: orr z0.d, z0.d, z1.d
; CHECK-NEXT: ret
entry:
%c = or <vscale x 4 x i32> %a, %b
ret <vscale x 4 x i32> %c
}

define <vscale x 8 x i16> @ornxv8i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, ptr %p) {
; CHECK-LABEL: ornxv8i16:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: orr z0.d, z0.d, z1.d
; CHECK-NEXT: ret
entry:
%c = or <vscale x 8 x i16> %a, %b
ret <vscale x 8 x i16> %c
}

define <vscale x 16 x i8> @ornxv16i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
; CHECK-LABEL: ornxv16i8:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: orr z0.d, z0.d, z1.d
; CHECK-NEXT: ret
entry:
%c = or <vscale x 16 x i8> %a, %b
ret <vscale x 16 x i8> %c
}

;; xor
define <vscale x 2 x i64> @xornxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
; CHECK-LABEL: xornxv2i64:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: eor z0.d, z0.d, z1.d
; CHECK-NEXT: ret
entry:
%c = xor <vscale x 2 x i64> %a, %b
ret <vscale x 2 x i64> %c
}

define <vscale x 4 x i32> @xornxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
; CHECK-LABEL: xornxv4i32:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: eor z0.d, z0.d, z1.d
; CHECK-NEXT: ret
entry:
%c = xor <vscale x 4 x i32> %a, %b
ret <vscale x 4 x i32> %c
}

define <vscale x 8 x i16> @xornxv8i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, ptr %p) {
; CHECK-LABEL: xornxv8i16:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: eor z0.d, z0.d, z1.d
; CHECK-NEXT: ret
entry:
%c = xor <vscale x 8 x i16> %a, %b
ret <vscale x 8 x i16> %c
}

define <vscale x 16 x i8> @xornxv16i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
; CHECK-LABEL: xornxv16i8:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: eor z0.d, z0.d, z1.d
; CHECK-NEXT: ret
entry:
%c = xor <vscale x 16 x i8> %a, %b
ret <vscale x 16 x i8> %c
}

0 comments on commit 7b3da7b

Please sign in to comment.