Skip to content

Commit 703240c

Browse files
committed
[SROA] Maintain shadow/backing alloca when some slices are noncapturnig read-only calls to allow alloca partitioning/promotion
This is inspired by the original variant of D109749 by Graham Hunter, but is a more general version. Roughly, instead of promoting the alloca, we call it a shadow/backing alloca, go through all it's slices, clone(!) instructions that operated on it, but make them operate on the cloned alloca, and promote cloned alloca instead. This keeps the shadow/backing alloca, and all the original instructions around, which results in said shadow/backing alloca being a perfect mirror/representation of the promoted alloca's content, so calls that take the alloca as arguments (non-capturingly!) can be supported. For now, we require that the calls also don't modify the alloca's content, but that is only to simplify the initial implementation, and that will be supported in a follow-up. Overall, this leads to *smaller* codesize: https://llvm-compile-time-tracker.com/compare.php?from=a8b4f5bbab62091835205f3d648902432a4a5b58&to=aeae054055b125b011c1122f82c86457e159436f&stat=size-total and is roughly neutral compile-time wise: https://llvm-compile-time-tracker.com/compare.php?from=a8b4f5bbab62091835205f3d648902432a4a5b58&to=aeae054055b125b011c1122f82c86457e159436f&stat=instructions Reviewed By: djtodoro Differential Revision: https://reviews.llvm.org/D113520
1 parent f0891cd commit 703240c

File tree

2 files changed

+264
-183
lines changed

2 files changed

+264
-183
lines changed

llvm/lib/Transforms/Scalar/SROA.cpp

+148-3
Original file line numberDiff line numberDiff line change
@@ -167,6 +167,7 @@ class Slice {
167167
void makeUnsplittable() { UseAndIsSplittable.setInt(false); }
168168

169169
Use *getUse() const { return UseAndIsSplittable.getPointer(); }
170+
void setUse(Use *U) { UseAndIsSplittable.setPointer(U); }
170171

171172
bool isDead() const { return getUse() == nullptr; }
172173
void kill() { UseAndIsSplittable.setPointer(nullptr); }
@@ -218,7 +219,7 @@ class Slice {
218219
class llvm::sroa::AllocaSlices {
219220
public:
220221
/// Construct the slices of a particular alloca.
221-
AllocaSlices(const DataLayout &DL, AllocaInst &AI);
222+
AllocaSlices(const DataLayout &DL, AllocaInst &AI, bool &Changed);
222223

223224
/// Test whether a pointer to the allocation escapes our analysis.
224225
///
@@ -270,6 +271,12 @@ class llvm::sroa::AllocaSlices {
270271
return DeadUseIfPromotable;
271272
}
272273

274+
void forgetTheDead() {
275+
DeadUsers.clear();
276+
DeadUseIfPromotable.clear();
277+
DeadOperands.clear();
278+
};
279+
273280
/// Access the dead operands referring to this alloca.
274281
///
275282
/// These are operands which have cannot actually be used to refer to the
@@ -295,11 +302,18 @@ class llvm::sroa::AllocaSlices {
295302

296303
friend class AllocaSlices::SliceBuilder;
297304

305+
void formBackingAlloca(AllocaInst *AI, bool &Changed);
306+
298307
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
299308
/// Handle to alloca instruction to simplify method interfaces.
300309
AllocaInst &AI;
301310
#endif
302311

312+
/// Certain escaping uses of an alloca (non-capturing-ones)
313+
/// do not prevent promotion, but force retention of the alloca.
314+
/// This records if there are any such uses.
315+
bool NeedsBackingAlloca = false;
316+
303317
/// The instruction responsible for this alloca not having a known set
304318
/// of slices.
305319
///
@@ -1062,11 +1076,22 @@ class AllocaSlices::SliceBuilder : public PtrUseVisitor<SliceBuilder> {
10621076

10631077
void visitSelectInst(SelectInst &SI) { visitPHINodeOrSelectInst(SI); }
10641078

1079+
void visitCallBase(CallBase &CB) {
1080+
if (!IsOffsetKnown || !CB.doesNotCapture(U->getOperandNo()))
1081+
return PI.setAborted(&CB);
1082+
// If we know that the callee does not retain the pointer,
1083+
// then it does not prevent SROA, although we have to workaround this.
1084+
// However, for now, only allow uses, that, at most, read from said memory.
1085+
if (!CB.onlyReadsMemory() && !CB.onlyReadsMemory(U->getOperandNo()))
1086+
return PI.setAborted(&CB);
1087+
AS.NeedsBackingAlloca = true;
1088+
}
1089+
10651090
/// Disable SROA entirely if there are unhandled users of the alloca.
10661091
void visitInstruction(Instruction &I) { PI.setAborted(&I); }
10671092
};
10681093

1069-
AllocaSlices::AllocaSlices(const DataLayout &DL, AllocaInst &AI)
1094+
AllocaSlices::AllocaSlices(const DataLayout &DL, AllocaInst &AI, bool &Changed)
10701095
:
10711096
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
10721097
AI(AI),
@@ -1083,6 +1108,10 @@ AllocaSlices::AllocaSlices(const DataLayout &DL, AllocaInst &AI)
10831108
return;
10841109
}
10851110

1111+
// We may have found that the pointer to the AI escapes, but isn't captured.
1112+
if (NeedsBackingAlloca)
1113+
formBackingAlloca(&AI, Changed);
1114+
10861115
llvm::erase_if(Slices, [](const Slice &S) { return S.isDead(); });
10871116

10881117
// Sort the uses. This arranges for the offsets to be in ascending order,
@@ -3587,6 +3616,122 @@ class AggLoadStoreRewriter : public InstVisitor<AggLoadStoreRewriter, bool> {
35873616

35883617
} // end anonymous namespace
35893618

3619+
/// Apparently, we can promote the alloca, but some uses of the alloca
3620+
/// are calls (that don't capture it's address), which require for the
3621+
/// trace alloca to remain. To do so, we must form a new "backing" alloca,
3622+
/// which will be kept as an up-to-date backup of the to-be-promoted-alloca's
3623+
/// content, and used in it's place in these non-capturing calls.
3624+
/// FIXME: support non-readonly non-capturing calls.
3625+
void AllocaSlices::formBackingAlloca(AllocaInst *AllocaToPromote,
3626+
bool &Changed) {
3627+
assert(NeedsBackingAlloca &&
3628+
"Should not be called if there is no need to rewrite.");
3629+
3630+
// We are going to preserve all of the original instructions that were
3631+
// operating on the original alloca, so we must forget any instructions
3632+
// that were deemed as dead-to-be-deleted during normal promotion.
3633+
forgetTheDead();
3634+
3635+
Changed = true;
3636+
3637+
// Now, we want to retain all of the instructions operating on the original
3638+
// alloca, so to avoid much hassle, create a new alloca, and swap (RAUW) them.
3639+
AllocaInst *ShadowAlloca = cast<AllocaInst>(AllocaToPromote->clone());
3640+
ShadowAlloca->takeName(AllocaToPromote);
3641+
AllocaToPromote->setName(ShadowAlloca->getName() + ".prom");
3642+
ShadowAlloca->insertBefore(AllocaToPromote);
3643+
AllocaToPromote->replaceAllUsesWith(ShadowAlloca);
3644+
3645+
// Avoid recomputing the same pointer over and over again, cache it.
3646+
SmallDenseMap<std::pair<uint64_t, Type *>, Value *> RebasedPtrsCSE;
3647+
3648+
// Don't do anything fancy, just put new insts "right after" the alloca.
3649+
IRBuilderTy Builder(AllocaToPromote->getContext());
3650+
BasicBlock *AllocaToPromoteBB = AllocaToPromote->getParent();
3651+
Builder.SetInsertPoint(AllocaToPromoteBB,
3652+
AllocaToPromoteBB->getFirstInsertionPt());
3653+
3654+
// Give a pointer `Offset` bytes into the `AllocaToPromote` with `PtrTy` type.
3655+
auto getRebasedPtr = [&RebasedPtrsCSE, &Builder, AllocaToPromote,
3656+
DL = AllocaToPromote->getModule()->getDataLayout()](
3657+
PointerType *PtrTy, const uint64_t Offset) {
3658+
// Look it up in a cache first.
3659+
auto It = RebasedPtrsCSE.find({Offset, PtrTy});
3660+
if (It != RebasedPtrsCSE.end())
3661+
return It->second;
3662+
3663+
// Otherwise, create a new pointer, and cache it for the future.
3664+
Value *NewPtr = getAdjustedPtr(
3665+
Builder, DL, AllocaToPromote,
3666+
APInt(DL.getIndexSizeInBits(PtrTy->getAddressSpace()), Offset), PtrTy,
3667+
"");
3668+
RebasedPtrsCSE[{Offset, PtrTy}] = NewPtr;
3669+
3670+
return NewPtr;
3671+
};
3672+
3673+
// Some instructions may have several uses of an alloca, and there's
3674+
// a separate slice for each use, so we must cache each instruction
3675+
// we clone, so that we only clone it once,
3676+
// not for each slice that references it.
3677+
SmallDenseMap<Instruction *, Instruction *> InstrCloneMap;
3678+
3679+
// Now, let's just deal with each slice. Roughly, we need to clone each
3680+
// instruction that is referenced by a slice (once per instruction!),
3681+
// and change the appropriate pointer from pointing at the shadow alloca
3682+
// into pointing into the alloca we are going to promote.
3683+
//
3684+
// NOTE: the original instruction is generally preserved,
3685+
// because we need to maintain the content parity between the two allocas!
3686+
for (Slice &S : Slices) {
3687+
// Just completely ignore dead slices.
3688+
if (S.isDead())
3689+
continue;
3690+
3691+
// Which instruction does this slice represent?
3692+
Use *OrigUse = S.getUse();
3693+
auto *OrigInstr = cast<Instruction>(OrigUse->getUser());
3694+
3695+
// Now, we need to make a clone of this instruction, but operating on
3696+
// the alloca-to-be-promoted instead.
3697+
Instruction *ClonedInstr;
3698+
// Only clone instruction once! See if we already did that for this instr.
3699+
auto It = InstrCloneMap.find(OrigInstr);
3700+
if (It != InstrCloneMap.end())
3701+
ClonedInstr = It->second;
3702+
else {
3703+
// This is the first time this instruction is seen.
3704+
// Clone it next to the original instruction, and cache it.
3705+
ClonedInstr = OrigInstr->clone();
3706+
ClonedInstr->insertBefore(OrigInstr);
3707+
InstrCloneMap.insert({OrigInstr, ClonedInstr});
3708+
3709+
// Also, if the instruction was returning anything, we do that instead.
3710+
if (!ClonedInstr->getType()->isVoidTy()) {
3711+
assert(isa<LoadInst>(OrigInstr) &&
3712+
"Not expecting to encounter here anything other than a `load`.");
3713+
ClonedInstr->setName(OrigInstr->getName() + ".prom");
3714+
OrigInstr->replaceAllUsesWith(ClonedInstr);
3715+
}
3716+
3717+
if (isa<LoadInst>(OrigInstr))
3718+
// We know that all the offending (non-capturing) calls do not modify
3719+
// the content of the shadow alloca, so we do not need to propagate
3720+
// the content of the shadow alloca to the alloca-to-be-promoted.
3721+
DeadUsers.push_back(OrigInstr);
3722+
}
3723+
3724+
// Final touch: the slice should refer to the
3725+
// use of the alloca-to-be-promoted, while it currently refers to
3726+
// use of the shadow alloca, so rectify that.
3727+
Value *NewPtr = getRebasedPtr(cast<PointerType>(OrigUse->get()->getType()),
3728+
S.beginOffset());
3729+
Use &ClonedUse = ClonedInstr->getOperandUse(OrigUse->getOperandNo());
3730+
ClonedUse.set(NewPtr);
3731+
S.setUse(&ClonedUse);
3732+
}
3733+
}
3734+
35903735
/// Strip aggregate type wrapping.
35913736
///
35923737
/// This removes no-op aggregate types wrapping an underlying type. It will
@@ -4612,7 +4757,7 @@ bool SROAPass::runOnAlloca(AllocaInst &AI) {
46124757
Changed |= AggRewriter.rewrite(AI);
46134758

46144759
// Build the slices using a recursive instruction-visiting builder.
4615-
AllocaSlices AS(DL, AI);
4760+
AllocaSlices AS(DL, AI, Changed);
46164761
LLVM_DEBUG(AS.print(dbgs()));
46174762
if (AS.isEscaped())
46184763
return Changed;

0 commit comments

Comments
 (0)