Skip to content

Commit db5e401

Browse files
authored
[CostModel] Add type-based cost model for get.active.lane.mask intrinsic (#130132)
I recently realised that we return an invalid cost when requesting the type-based cost for the get.active.lane.mask intrinsic. I've fixed that in this patch by reusing the existing code for the non-type-based model.
1 parent dd9a2f0 commit db5e401

File tree

2 files changed

+34
-33
lines changed

2 files changed

+34
-33
lines changed

llvm/include/llvm/CodeGen/BasicTTIImpl.h

+22-21
Original file line numberDiff line numberDiff line change
@@ -1986,27 +1986,6 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
19861986
}
19871987
return Cost;
19881988
}
1989-
case Intrinsic::get_active_lane_mask: {
1990-
EVT ResVT = getTLI()->getValueType(DL, RetTy, true);
1991-
EVT ArgType = getTLI()->getValueType(DL, ICA.getArgTypes()[0], true);
1992-
1993-
// If we're not expanding the intrinsic then we assume this is cheap
1994-
// to implement.
1995-
if (!getTLI()->shouldExpandGetActiveLaneMask(ResVT, ArgType)) {
1996-
return getTypeLegalizationCost(RetTy).first;
1997-
}
1998-
1999-
// Create the expanded types that will be used to calculate the uadd_sat
2000-
// operation.
2001-
Type *ExpRetTy = VectorType::get(
2002-
ICA.getArgTypes()[0], cast<VectorType>(RetTy)->getElementCount());
2003-
IntrinsicCostAttributes Attrs(Intrinsic::uadd_sat, ExpRetTy, {}, FMF);
2004-
InstructionCost Cost =
2005-
thisT()->getTypeBasedIntrinsicInstrCost(Attrs, CostKind);
2006-
Cost += thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, ExpRetTy, RetTy,
2007-
CmpInst::ICMP_ULT, CostKind);
2008-
return Cost;
2009-
}
20101989
case Intrinsic::experimental_cttz_elts: {
20111990
EVT ArgType = getTLI()->getValueType(DL, ICA.getArgTypes()[0], true);
20121991

@@ -2054,6 +2033,7 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
20542033

20552034
return Cost;
20562035
}
2036+
case Intrinsic::get_active_lane_mask:
20572037
case Intrinsic::experimental_vector_match:
20582038
return thisT()->getTypeBasedIntrinsicInstrCost(ICA, CostKind);
20592039
case Intrinsic::modf:
@@ -2394,6 +2374,27 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
23942374
thisT()->getArithmeticInstrCost(BinaryOperator::And, RetTy, CostKind);
23952375
return Cost;
23962376
}
2377+
case Intrinsic::get_active_lane_mask: {
2378+
Type *ArgTy = ICA.getArgTypes()[0];
2379+
EVT ResVT = getTLI()->getValueType(DL, RetTy, true);
2380+
EVT ArgVT = getTLI()->getValueType(DL, ArgTy, true);
2381+
2382+
// If we're not expanding the intrinsic then we assume this is cheap
2383+
// to implement.
2384+
if (!getTLI()->shouldExpandGetActiveLaneMask(ResVT, ArgVT))
2385+
return getTypeLegalizationCost(RetTy).first;
2386+
2387+
// Create the expanded types that will be used to calculate the uadd_sat
2388+
// operation.
2389+
Type *ExpRetTy =
2390+
VectorType::get(ArgTy, cast<VectorType>(RetTy)->getElementCount());
2391+
IntrinsicCostAttributes Attrs(Intrinsic::uadd_sat, ExpRetTy, {}, FMF);
2392+
InstructionCost Cost =
2393+
thisT()->getTypeBasedIntrinsicInstrCost(Attrs, CostKind);
2394+
Cost += thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, ExpRetTy, RetTy,
2395+
CmpInst::ICMP_ULT, CostKind);
2396+
return Cost;
2397+
}
23972398
case Intrinsic::abs:
23982399
ISD = ISD::ABS;
23992400
break;

llvm/test/Analysis/CostModel/AArch64/sve-intrinsics.ll

+12-12
Original file line numberDiff line numberDiff line change
@@ -958,16 +958,16 @@ define void @get_lane_mask() #0 {
958958
; CHECK-VSCALE-2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
959959
;
960960
; TYPE_BASED_ONLY-LABEL: 'get_lane_mask'
961-
; TYPE_BASED_ONLY-NEXT: Cost Model: Invalid cost for instruction: %mask_nxv16i1_i64 = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i64(i64 undef, i64 undef)
962-
; TYPE_BASED_ONLY-NEXT: Cost Model: Invalid cost for instruction: %mask_nxv8i1_i64 = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i64(i64 undef, i64 undef)
963-
; TYPE_BASED_ONLY-NEXT: Cost Model: Invalid cost for instruction: %mask_nxv4i1_i64 = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 undef, i64 undef)
964-
; TYPE_BASED_ONLY-NEXT: Cost Model: Invalid cost for instruction: %mask_nxv2i1_i64 = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 undef, i64 undef)
965-
; TYPE_BASED_ONLY-NEXT: Cost Model: Invalid cost for instruction: %mask_nxv16i1_i32 = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i32(i32 undef, i32 undef)
966-
; TYPE_BASED_ONLY-NEXT: Cost Model: Invalid cost for instruction: %mask_nxv8i1_i32 = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i32(i32 undef, i32 undef)
967-
; TYPE_BASED_ONLY-NEXT: Cost Model: Invalid cost for instruction: %mask_nxv4i1_i32 = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i32(i32 undef, i32 undef)
968-
; TYPE_BASED_ONLY-NEXT: Cost Model: Invalid cost for instruction: %mask_nxv2i1_i32 = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i32(i32 undef, i32 undef)
969-
; TYPE_BASED_ONLY-NEXT: Cost Model: Invalid cost for instruction: %mask_nxv32i1_i64 = call <vscale x 32 x i1> @llvm.get.active.lane.mask.nxv32i1.i64(i64 undef, i64 undef)
970-
; TYPE_BASED_ONLY-NEXT: Cost Model: Invalid cost for instruction: %mask_nxv16i1_i16 = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i16(i16 undef, i16 undef)
961+
; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask_nxv16i1_i64 = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i64(i64 undef, i64 undef)
962+
; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask_nxv8i1_i64 = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i64(i64 undef, i64 undef)
963+
; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask_nxv4i1_i64 = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 undef, i64 undef)
964+
; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask_nxv2i1_i64 = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 undef, i64 undef)
965+
; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask_nxv16i1_i32 = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i32(i32 undef, i32 undef)
966+
; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask_nxv8i1_i32 = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i32(i32 undef, i32 undef)
967+
; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask_nxv4i1_i32 = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i32(i32 undef, i32 undef)
968+
; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask_nxv2i1_i32 = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i32(i32 undef, i32 undef)
969+
; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %mask_nxv32i1_i64 = call <vscale x 32 x i1> @llvm.get.active.lane.mask.nxv32i1.i64(i64 undef, i64 undef)
970+
; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %mask_nxv16i1_i16 = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i16(i16 undef, i16 undef)
971971
; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %mask_v16i1_i64 = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i64(i64 undef, i64 undef)
972972
; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %mask_v8i1_i64 = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i64(i64 undef, i64 undef)
973973
; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %mask_v4i1_i64 = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i64(i64 undef, i64 undef)
@@ -976,8 +976,8 @@ define void @get_lane_mask() #0 {
976976
; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %mask_v8i1_i32 = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32 undef, i32 undef)
977977
; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %mask_v4i1_i32 = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 undef, i32 undef)
978978
; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %mask_v2i1_i32 = call <2 x i1> @llvm.get.active.lane.mask.v2i1.i32(i32 undef, i32 undef)
979-
; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %mask_v32i1_i64 = call <32 x i1> @llvm.get.active.lane.mask.v32i1.i64(i64 undef, i64 undef)
980-
; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %mask_v16i1_i16 = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i16(i16 undef, i16 undef)
979+
; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %mask_v32i1_i64 = call <32 x i1> @llvm.get.active.lane.mask.v32i1.i64(i64 undef, i64 undef)
980+
; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %mask_v16i1_i16 = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i16(i16 undef, i16 undef)
981981
; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
982982
;
983983
%mask_nxv16i1_i64 = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i64(i64 undef, i64 undef)

0 commit comments

Comments
 (0)