From 7b3da7b3b2b0e2f322dddf1f343571cc7fd09b09 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thorsten=20Sch=C3=BCtt?= Date: Sun, 27 Oct 2024 23:14:07 +0100 Subject: [PATCH] [GlobalISel][AArch64] Legalize G_ADD, G_SUB, G_AND, G_OR, and G_XOR for SVE (#110561) Credits: https://github.com/llvm/llvm-project/pull/72976 LLVM ERROR: cannot select: %3:zpr() = G_MUL %0:fpr, %1:fpr (in function: xmulnxv2i64) ;; mul define void @xmulnxv2i64( %a, %b, ptr %p) { entry: %c = mul %a, %b store %c, ptr %p, align 16 ret void } define void @mulnxv4i32( %a, %b, ptr %p) { entry: %c = mul %a, %b store %c, ptr %p, align 16 ret void } define void @mulnxv8i16( %a, %b, ptr %p) { entry: %c = mul %a, %b store %c, ptr %p, align 16 ret void } define void @mulnxv16i8( %a, %b, ptr %p) { entry: %c = mul %a, %b store %c, ptr %p, align 16 ret void } --- .../llvm/CodeGen/GlobalISel/LegalizerInfo.h | 12 +- .../AArch64/AArch64GenRegisterBankInfo.def | 2 + .../AArch64/GISel/AArch64CallLowering.cpp | 4 +- .../GISel/AArch64InstructionSelector.cpp | 10 +- .../AArch64/GISel/AArch64LegalizerInfo.cpp | 30 ++- .../GlobalISel/legalizer-info-validation.mir | 1 - .../CodeGen/AArch64/GlobalISel/sve-integer.ll | 208 ++++++++++++++++++ 7 files changed, 257 insertions(+), 10 deletions(-) create mode 100644 llvm/test/CodeGen/AArch64/GlobalISel/sve-integer.ll diff --git a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h index bcd44abb2088a0..6d71c150c8da6b 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h @@ -998,8 +998,7 @@ class LegalizeRuleSet { LegalizeAction::WidenScalar, [=](const LegalityQuery &Query) { const LLT VecTy = Query.Types[TypeIdx]; - return VecTy.isVector() && !VecTy.isScalable() && - VecTy.getSizeInBits() < VectorSize; + return VecTy.isFixedVector() && VecTy.getSizeInBits() < VectorSize; }, [=](const LegalityQuery &Query) { const LLT VecTy = Query.Types[TypeIdx]; @@ -1172,7 +1171,7 @@ class LegalizeRuleSet { LegalizeAction::MoreElements, [=](const LegalityQuery &Query) { LLT VecTy = Query.Types[TypeIdx]; - return VecTy.isVector() && VecTy.getElementType() == EltTy && + return VecTy.isFixedVector() && VecTy.getElementType() == EltTy && VecTy.getNumElements() < MinElements; }, [=](const LegalityQuery &Query) { @@ -1190,7 +1189,7 @@ class LegalizeRuleSet { LegalizeAction::MoreElements, [=](const LegalityQuery &Query) { LLT VecTy = Query.Types[TypeIdx]; - return VecTy.isVector() && VecTy.getElementType() == EltTy && + return VecTy.isFixedVector() && VecTy.getElementType() == EltTy && (VecTy.getNumElements() % NumElts != 0); }, [=](const LegalityQuery &Query) { @@ -1210,7 +1209,7 @@ class LegalizeRuleSet { LegalizeAction::FewerElements, [=](const LegalityQuery &Query) { LLT VecTy = Query.Types[TypeIdx]; - return VecTy.isVector() && VecTy.getElementType() == EltTy && + return VecTy.isFixedVector() && VecTy.getElementType() == EltTy && VecTy.getNumElements() > MaxElements; }, [=](const LegalityQuery &Query) { @@ -1231,6 +1230,9 @@ class LegalizeRuleSet { assert(MinTy.getElementType() == MaxTy.getElementType() && "Expected element types to agree"); + assert((!MinTy.isScalableVector() && !MaxTy.isScalableVector()) && + "Unexpected scalable vectors"); + const LLT EltTy = MinTy.getElementType(); return clampMinNumElements(TypeIdx, EltTy, MinTy.getNumElements()) .clampMaxNumElements(TypeIdx, EltTy, MaxTy.getNumElements()); diff --git a/llvm/lib/Target/AArch64/AArch64GenRegisterBankInfo.def b/llvm/lib/Target/AArch64/AArch64GenRegisterBankInfo.def index 82066b48c84b40..8ff59f60968beb 100644 --- a/llvm/lib/Target/AArch64/AArch64GenRegisterBankInfo.def +++ b/llvm/lib/Target/AArch64/AArch64GenRegisterBankInfo.def @@ -183,6 +183,8 @@ unsigned AArch64GenRegisterBankInfo::getRegBankBaseIdxOffset(unsigned RBIdx, const unsigned MinSize = Size.getKnownMinValue(); assert((!Size.isScalable() || MinSize >= 128) && "Scalable vector types should have size of at least 128 bits"); + if (Size.isScalable()) + return 3; if (MinSize <= 16) return 0; if (MinSize <= 32) diff --git a/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp b/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp index 5aee7804de3e3f..6cbfb018b3183a 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp @@ -393,8 +393,8 @@ bool AArch64CallLowering::lowerReturn(MachineIRBuilder &MIRBuilder, // i1 is a special case because SDAG i1 true is naturally zero extended // when widened using ANYEXT. We need to do it explicitly here. auto &Flags = CurArgInfo.Flags[0]; - if (MRI.getType(CurVReg).getSizeInBits() == 1 && !Flags.isSExt() && - !Flags.isZExt()) { + if (MRI.getType(CurVReg).getSizeInBits() == TypeSize::getFixed(1) && + !Flags.isSExt() && !Flags.isZExt()) { CurVReg = MIRBuilder.buildZExt(LLT::scalar(8), CurVReg).getReg(0); } else if (TLI.getNumRegistersForCallingConv(Ctx, CC, SplitEVTs[i]) == 1) { diff --git a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp index df0c09d32c074a..afea08ab092501 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp @@ -615,6 +615,7 @@ getMinClassForRegBank(const RegisterBank &RB, TypeSize SizeInBits, unsigned RegBankID = RB.getID(); if (RegBankID == AArch64::GPRRegBankID) { + assert(!SizeInBits.isScalable() && "Unexpected scalable register size"); if (SizeInBits <= 32) return GetAllRegSet ? &AArch64::GPR32allRegClass : &AArch64::GPR32RegClass; @@ -626,6 +627,12 @@ getMinClassForRegBank(const RegisterBank &RB, TypeSize SizeInBits, } if (RegBankID == AArch64::FPRRegBankID) { + if (SizeInBits.isScalable()) { + assert(SizeInBits == TypeSize::getScalable(128) && + "Unexpected scalable register size"); + return &AArch64::ZPRRegClass; + } + switch (SizeInBits) { default: return nullptr; @@ -964,7 +971,8 @@ getRegClassesForCopy(MachineInstr &I, const TargetInstrInfo &TII, // then we can pull it into the helpers that get the appropriate class for a // register bank. Or make a new helper that carries along some constraint // information. - if (SrcRegBank != DstRegBank && (DstSize == 1 && SrcSize == 1)) + if (SrcRegBank != DstRegBank && + (DstSize == TypeSize::getFixed(1) && SrcSize == TypeSize::getFixed(1))) SrcSize = DstSize = TypeSize::getFixed(32); return {getMinClassForRegBank(SrcRegBank, SrcSize, true), diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp index 5cd1fea75025cd..dd65dbe594a634 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp @@ -91,6 +91,7 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) const bool HasCSSC = ST.hasCSSC(); const bool HasRCPC3 = ST.hasRCPC3(); + const bool HasSVE = ST.hasSVE(); getActionDefinitionsBuilder( {G_IMPLICIT_DEF, G_FREEZE, G_CONSTANT_FOLD_BARRIER}) @@ -127,7 +128,34 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) .clampNumElements(0, v2s64, v2s64) .moreElementsToNextPow2(0); - getActionDefinitionsBuilder({G_ADD, G_SUB, G_MUL, G_AND, G_OR, G_XOR}) + getActionDefinitionsBuilder({G_ADD, G_SUB, G_AND, G_OR, G_XOR}) + .legalFor({s32, s64, v2s32, v2s64, v4s32, v4s16, v8s16, v16s8, v8s8}) + .legalFor(HasSVE, {nxv16s8, nxv8s16, nxv4s32, nxv2s64}) + .widenScalarToNextPow2(0) + .clampScalar(0, s32, s64) + .clampMaxNumElements(0, s8, 16) + .clampMaxNumElements(0, s16, 8) + .clampNumElements(0, v2s32, v4s32) + .clampNumElements(0, v2s64, v2s64) + .minScalarOrEltIf( + [=](const LegalityQuery &Query) { + return Query.Types[0].getNumElements() <= 2; + }, + 0, s32) + .minScalarOrEltIf( + [=](const LegalityQuery &Query) { + return Query.Types[0].getNumElements() <= 4; + }, + 0, s16) + .minScalarOrEltIf( + [=](const LegalityQuery &Query) { + return Query.Types[0].getNumElements() <= 16; + }, + 0, s8) + .scalarizeIf(scalarOrEltWiderThan(0, 64), 0) + .moreElementsToNextPow2(0); + + getActionDefinitionsBuilder(G_MUL) .legalFor({s32, s64, v2s32, v2s64, v4s32, v4s16, v8s16, v16s8, v8s8}) .widenScalarToNextPow2(0) .clampScalar(0, s32, s64) diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir index 80b6e4f6d528a2..0af60a503c5f1c 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir @@ -24,7 +24,6 @@ # DEBUG-NEXT: .. the first uncovered imm index: 0, OK # # DEBUG-NEXT: G_MUL (opcode {{[0-9]+}}): 1 type index, 0 imm indices -# DEBUG-NEXT: .. opcode {{[0-9]+}} is aliased to {{[0-9]+}} # DEBUG-NEXT: .. the first uncovered type index: 1, OK # DEBUG-NEXT: .. the first uncovered imm index: 0, OK # diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/sve-integer.ll b/llvm/test/CodeGen/AArch64/GlobalISel/sve-integer.ll new file mode 100644 index 00000000000000..bc51cf7bac23c9 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/GlobalISel/sve-integer.ll @@ -0,0 +1,208 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3 +; RUN: llc < %s -mtriple aarch64 -mattr=+sve -aarch64-enable-gisel-sve=1 | FileCheck %s +; RUN: llc < %s -mtriple aarch64 -mattr=+sve -global-isel -aarch64-enable-gisel-sve=1 | FileCheck %s + +;; add +define @addnxv2i64( %a, %b) { +; CHECK-LABEL: addnxv2i64: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: add z0.d, z0.d, z1.d +; CHECK-NEXT: ret +entry: + %c = add %a, %b + ret %c +} + +define @addnxv4i32( %a, %b) { +; CHECK-LABEL: addnxv4i32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: add z0.s, z0.s, z1.s +; CHECK-NEXT: ret +entry: + %c = add %a, %b + ret %c +} + +define @addnxv8i16( %a, %b, ptr %p) { +; CHECK-LABEL: addnxv8i16: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: add z0.h, z0.h, z1.h +; CHECK-NEXT: ret +entry: + %c = add %a, %b + ret %c +} + +define @addnxv16i8( %a, %b) { +; CHECK-LABEL: addnxv16i8: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: add z0.b, z0.b, z1.b +; CHECK-NEXT: ret +entry: + %c = add %a, %b + ret %c +} + +;; sub +define @subnxv2i64( %a, %b) { +; CHECK-LABEL: subnxv2i64: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: sub z0.d, z0.d, z1.d +; CHECK-NEXT: ret +entry: + %c = sub %a, %b + ret %c +} + +define @subnxv4i32( %a, %b) { +; CHECK-LABEL: subnxv4i32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: sub z0.s, z0.s, z1.s +; CHECK-NEXT: ret +entry: + %c = sub %a, %b + ret %c +} + +define @subnxv8i16( %a, %b, ptr %p) { +; CHECK-LABEL: subnxv8i16: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: sub z0.h, z0.h, z1.h +; CHECK-NEXT: ret +entry: + %c = sub %a, %b + ret %c +} + +define @subnxv16i8( %a, %b) { +; CHECK-LABEL: subnxv16i8: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: sub z0.b, z0.b, z1.b +; CHECK-NEXT: ret +entry: + %c = sub %a, %b + ret %c +} + +;; and +define @andnxv2i64( %a, %b) { +; CHECK-LABEL: andnxv2i64: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: and z0.d, z0.d, z1.d +; CHECK-NEXT: ret +entry: + %c = and %a, %b + ret %c +} + +define @andnxv4i32( %a, %b) { +; CHECK-LABEL: andnxv4i32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: and z0.d, z0.d, z1.d +; CHECK-NEXT: ret +entry: + %c = and %a, %b + ret %c +} + +define @andnxv8i16( %a, %b, ptr %p) { +; CHECK-LABEL: andnxv8i16: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: and z0.d, z0.d, z1.d +; CHECK-NEXT: ret +entry: + %c = and %a, %b + ret %c +} + +define @andnxv16i8( %a, %b) { +; CHECK-LABEL: andnxv16i8: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: and z0.d, z0.d, z1.d +; CHECK-NEXT: ret +entry: + %c = and %a, %b + ret %c +} + +;; or +define @ornxv2i64( %a, %b) { +; CHECK-LABEL: ornxv2i64: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: orr z0.d, z0.d, z1.d +; CHECK-NEXT: ret +entry: + %c = or %a, %b + ret %c +} + +define @ornxv4i32( %a, %b) { +; CHECK-LABEL: ornxv4i32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: orr z0.d, z0.d, z1.d +; CHECK-NEXT: ret +entry: + %c = or %a, %b + ret %c +} + +define @ornxv8i16( %a, %b, ptr %p) { +; CHECK-LABEL: ornxv8i16: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: orr z0.d, z0.d, z1.d +; CHECK-NEXT: ret +entry: + %c = or %a, %b + ret %c +} + +define @ornxv16i8( %a, %b) { +; CHECK-LABEL: ornxv16i8: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: orr z0.d, z0.d, z1.d +; CHECK-NEXT: ret +entry: + %c = or %a, %b + ret %c +} + +;; xor +define @xornxv2i64( %a, %b) { +; CHECK-LABEL: xornxv2i64: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: eor z0.d, z0.d, z1.d +; CHECK-NEXT: ret +entry: + %c = xor %a, %b + ret %c +} + +define @xornxv4i32( %a, %b) { +; CHECK-LABEL: xornxv4i32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: eor z0.d, z0.d, z1.d +; CHECK-NEXT: ret +entry: + %c = xor %a, %b + ret %c +} + +define @xornxv8i16( %a, %b, ptr %p) { +; CHECK-LABEL: xornxv8i16: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: eor z0.d, z0.d, z1.d +; CHECK-NEXT: ret +entry: + %c = xor %a, %b + ret %c +} + +define @xornxv16i8( %a, %b) { +; CHECK-LABEL: xornxv16i8: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: eor z0.d, z0.d, z1.d +; CHECK-NEXT: ret +entry: + %c = xor %a, %b + ret %c +}