Skip to content

Commit e0aed47

Browse files
committed
[LoopUtils] Cache VFs in addDiffRuntimeChecks (NFC)
Caching the runtime-VF, which is actually a vscale expression is safe, when we previously thought that it was unsafe, partly due to a bad FIXME in one of the tests. Strip the FIXME, and demonstrate that GeneratedRTChecks::create does the right thing, by moving the logic for caching runtime-VF to LoopUtils. As a result, we improve the code in GeneratedRTChecks::create, to avoid a non-intuitive footgun.
1 parent 80bdfcd commit e0aed47

File tree

3 files changed

+19
-18
lines changed

3 files changed

+19
-18
lines changed

llvm/lib/Transforms/Utils/LoopUtils.cpp

+8-2
Original file line numberDiff line numberDiff line change
@@ -2048,12 +2048,18 @@ Value *llvm::addDiffRuntimeChecks(
20482048
// Map to keep track of created compares, The key is the pair of operands for
20492049
// the compare, to allow detecting and re-using redundant compares.
20502050
DenseMap<std::pair<Value *, Value *>, Value *> SeenCompares;
2051+
// Map to detect redundant values returned by GetVF.
2052+
DenseMap<Type *, Value *> SeenVFs;
20512053
for (const auto &[SrcStart, SinkStart, AccessSize, NeedsFreeze] : Checks) {
20522054
Type *Ty = SinkStart->getType();
2055+
Value *VF = SeenVFs.lookup(Ty);
2056+
if (!VF) {
2057+
VF = GetVF(ChkBuilder, Ty->getScalarSizeInBits());
2058+
SeenVFs.insert({Ty, VF});
2059+
}
20532060
// Compute VF * IC * AccessSize.
20542061
auto *VFTimesUFTimesSize =
2055-
ChkBuilder.CreateMul(GetVF(ChkBuilder, Ty->getScalarSizeInBits()),
2056-
ConstantInt::get(Ty, IC * AccessSize));
2062+
ChkBuilder.CreateMul(VF, ConstantInt::get(Ty, IC * AccessSize));
20572063
Value *Diff =
20582064
Expander.expandCodeFor(SE.getMinusSCEV(SinkStart, SrcStart), Ty, Loc);
20592065

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

+11-15
Original file line numberDiff line numberDiff line change
@@ -1924,21 +1924,17 @@ class GeneratedRTChecks {
19241924
"vector.memcheck");
19251925

19261926
auto DiffChecks = RtPtrChecking.getDiffChecks();
1927-
if (DiffChecks) {
1928-
Value *RuntimeVF = nullptr;
1929-
MemRuntimeCheckCond = addDiffRuntimeChecks(
1930-
MemCheckBlock->getTerminator(), *DiffChecks, MemCheckExp,
1931-
[VF, &RuntimeVF](IRBuilderBase &B, unsigned Bits) {
1932-
if (!RuntimeVF)
1933-
RuntimeVF = getRuntimeVF(B, B.getIntNTy(Bits), VF);
1934-
return RuntimeVF;
1935-
},
1936-
IC);
1937-
} else {
1938-
MemRuntimeCheckCond = addRuntimeChecks(
1939-
MemCheckBlock->getTerminator(), L, RtPtrChecking.getChecks(),
1940-
MemCheckExp, VectorizerParams::HoistRuntimeChecks);
1941-
}
1927+
MemRuntimeCheckCond =
1928+
DiffChecks
1929+
? addDiffRuntimeChecks(
1930+
MemCheckBlock->getTerminator(), *DiffChecks, MemCheckExp,
1931+
[VF](IRBuilderBase &B, unsigned Bits) {
1932+
return getRuntimeVF(B, B.getIntNTy(Bits), VF);
1933+
},
1934+
IC)
1935+
: addRuntimeChecks(MemCheckBlock->getTerminator(), L,
1936+
RtPtrChecking.getChecks(), MemCheckExp,
1937+
VectorizerParams::HoistRuntimeChecks);
19421938
assert(MemRuntimeCheckCond &&
19431939
"no RT checks generated although RtPtrChecking "
19441940
"claimed checks are required");

llvm/test/Transforms/LoopVectorize/AArch64/sve-runtime-check-size-based-threshold.ll

-1
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,6 @@ target triple = "aarch64-unknown-linux-gnu"
55

66
; Test case where the minimum profitable trip count due to runtime checks
77
; exceeds VF.getKnownMinValue() * UF.
8-
; FIXME: The code currently incorrectly is missing a umax(VF * UF, 28).
98
define void @min_trip_count_due_to_runtime_checks_1(ptr %dst.1, ptr %dst.2, ptr %src.1, ptr %src.2, i64 %n) {
109
; CHECK-LABEL: @min_trip_count_due_to_runtime_checks_1(
1110
; CHECK-NEXT: entry:

0 commit comments

Comments
 (0)