@@ -167,6 +167,7 @@ class Slice {
167
167
void makeUnsplittable () { UseAndIsSplittable.setInt (false ); }
168
168
169
169
Use *getUse () const { return UseAndIsSplittable.getPointer (); }
170
+ void setUse (Use *U) { UseAndIsSplittable.setPointer (U); }
170
171
171
172
bool isDead () const { return getUse () == nullptr ; }
172
173
void kill () { UseAndIsSplittable.setPointer (nullptr ); }
@@ -218,7 +219,7 @@ class Slice {
218
219
class llvm ::sroa::AllocaSlices {
219
220
public:
220
221
// / Construct the slices of a particular alloca.
221
- AllocaSlices (const DataLayout &DL, AllocaInst &AI);
222
+ AllocaSlices (const DataLayout &DL, AllocaInst &AI, bool &Changed );
222
223
223
224
// / Test whether a pointer to the allocation escapes our analysis.
224
225
// /
@@ -270,6 +271,12 @@ class llvm::sroa::AllocaSlices {
270
271
return DeadUseIfPromotable;
271
272
}
272
273
274
+ void forgetTheDead () {
275
+ DeadUsers.clear ();
276
+ DeadUseIfPromotable.clear ();
277
+ DeadOperands.clear ();
278
+ };
279
+
273
280
// / Access the dead operands referring to this alloca.
274
281
// /
275
282
// / These are operands which have cannot actually be used to refer to the
@@ -295,11 +302,18 @@ class llvm::sroa::AllocaSlices {
295
302
296
303
friend class AllocaSlices ::SliceBuilder;
297
304
305
+ void formBackingAlloca (AllocaInst *AI, bool &Changed);
306
+
298
307
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
299
308
// / Handle to alloca instruction to simplify method interfaces.
300
309
AllocaInst &AI;
301
310
#endif
302
311
312
+ // / Certain escaping uses of an alloca (non-capturing-ones)
313
+ // / do not prevent promotion, but force retention of the alloca.
314
+ // / This records if there are any such uses.
315
+ bool NeedsBackingAlloca = false ;
316
+
303
317
// / The instruction responsible for this alloca not having a known set
304
318
// / of slices.
305
319
// /
@@ -1062,11 +1076,22 @@ class AllocaSlices::SliceBuilder : public PtrUseVisitor<SliceBuilder> {
1062
1076
1063
1077
void visitSelectInst (SelectInst &SI) { visitPHINodeOrSelectInst (SI); }
1064
1078
1079
+ void visitCallBase (CallBase &CB) {
1080
+ if (!IsOffsetKnown || !CB.doesNotCapture (U->getOperandNo ()))
1081
+ return PI.setAborted (&CB);
1082
+ // If we know that the callee does not retain the pointer,
1083
+ // then it does not prevent SROA, although we have to workaround this.
1084
+ // However, for now, only allow uses, that, at most, read from said memory.
1085
+ if (!CB.onlyReadsMemory () && !CB.onlyReadsMemory (U->getOperandNo ()))
1086
+ return PI.setAborted (&CB);
1087
+ AS.NeedsBackingAlloca = true ;
1088
+ }
1089
+
1065
1090
// / Disable SROA entirely if there are unhandled users of the alloca.
1066
1091
void visitInstruction (Instruction &I) { PI.setAborted (&I); }
1067
1092
};
1068
1093
1069
- AllocaSlices::AllocaSlices (const DataLayout &DL, AllocaInst &AI)
1094
+ AllocaSlices::AllocaSlices (const DataLayout &DL, AllocaInst &AI, bool &Changed )
1070
1095
:
1071
1096
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1072
1097
AI (AI),
@@ -1083,6 +1108,10 @@ AllocaSlices::AllocaSlices(const DataLayout &DL, AllocaInst &AI)
1083
1108
return ;
1084
1109
}
1085
1110
1111
+ // We may have found that the pointer to the AI escapes, but isn't captured.
1112
+ if (NeedsBackingAlloca)
1113
+ formBackingAlloca (&AI, Changed);
1114
+
1086
1115
llvm::erase_if (Slices, [](const Slice &S) { return S.isDead (); });
1087
1116
1088
1117
// Sort the uses. This arranges for the offsets to be in ascending order,
@@ -3587,6 +3616,122 @@ class AggLoadStoreRewriter : public InstVisitor<AggLoadStoreRewriter, bool> {
3587
3616
3588
3617
} // end anonymous namespace
3589
3618
3619
+ // / Apparently, we can promote the alloca, but some uses of the alloca
3620
+ // / are calls (that don't capture it's address), which require for the
3621
+ // / trace alloca to remain. To do so, we must form a new "backing" alloca,
3622
+ // / which will be kept as an up-to-date backup of the to-be-promoted-alloca's
3623
+ // / content, and used in it's place in these non-capturing calls.
3624
+ // / FIXME: support non-readonly non-capturing calls.
3625
+ void AllocaSlices::formBackingAlloca (AllocaInst *AllocaToPromote,
3626
+ bool &Changed) {
3627
+ assert (NeedsBackingAlloca &&
3628
+ " Should not be called if there is no need to rewrite." );
3629
+
3630
+ // We are going to preserve all of the original instructions that were
3631
+ // operating on the original alloca, so we must forget any instructions
3632
+ // that were deemed as dead-to-be-deleted during normal promotion.
3633
+ forgetTheDead ();
3634
+
3635
+ Changed = true ;
3636
+
3637
+ // Now, we want to retain all of the instructions operating on the original
3638
+ // alloca, so to avoid much hassle, create a new alloca, and swap (RAUW) them.
3639
+ AllocaInst *ShadowAlloca = cast<AllocaInst>(AllocaToPromote->clone ());
3640
+ ShadowAlloca->takeName (AllocaToPromote);
3641
+ AllocaToPromote->setName (ShadowAlloca->getName () + " .prom" );
3642
+ ShadowAlloca->insertBefore (AllocaToPromote);
3643
+ AllocaToPromote->replaceAllUsesWith (ShadowAlloca);
3644
+
3645
+ // Avoid recomputing the same pointer over and over again, cache it.
3646
+ SmallDenseMap<std::pair<uint64_t , Type *>, Value *> RebasedPtrsCSE;
3647
+
3648
+ // Don't do anything fancy, just put new insts "right after" the alloca.
3649
+ IRBuilderTy Builder (AllocaToPromote->getContext ());
3650
+ BasicBlock *AllocaToPromoteBB = AllocaToPromote->getParent ();
3651
+ Builder.SetInsertPoint (AllocaToPromoteBB,
3652
+ AllocaToPromoteBB->getFirstInsertionPt ());
3653
+
3654
+ // Give a pointer `Offset` bytes into the `AllocaToPromote` with `PtrTy` type.
3655
+ auto getRebasedPtr = [&RebasedPtrsCSE, &Builder, AllocaToPromote,
3656
+ DL = AllocaToPromote->getModule ()->getDataLayout ()](
3657
+ PointerType *PtrTy, const uint64_t Offset) {
3658
+ // Look it up in a cache first.
3659
+ auto It = RebasedPtrsCSE.find ({Offset, PtrTy});
3660
+ if (It != RebasedPtrsCSE.end ())
3661
+ return It->second ;
3662
+
3663
+ // Otherwise, create a new pointer, and cache it for the future.
3664
+ Value *NewPtr = getAdjustedPtr (
3665
+ Builder, DL, AllocaToPromote,
3666
+ APInt (DL.getIndexSizeInBits (PtrTy->getAddressSpace ()), Offset), PtrTy,
3667
+ " " );
3668
+ RebasedPtrsCSE[{Offset, PtrTy}] = NewPtr ;
3669
+
3670
+ return NewPtr ;
3671
+ };
3672
+
3673
+ // Some instructions may have several uses of an alloca, and there's
3674
+ // a separate slice for each use, so we must cache each instruction
3675
+ // we clone, so that we only clone it once,
3676
+ // not for each slice that references it.
3677
+ SmallDenseMap<Instruction *, Instruction *> InstrCloneMap;
3678
+
3679
+ // Now, let's just deal with each slice. Roughly, we need to clone each
3680
+ // instruction that is referenced by a slice (once per instruction!),
3681
+ // and change the appropriate pointer from pointing at the shadow alloca
3682
+ // into pointing into the alloca we are going to promote.
3683
+ //
3684
+ // NOTE: the original instruction is generally preserved,
3685
+ // because we need to maintain the content parity between the two allocas!
3686
+ for (Slice &S : Slices) {
3687
+ // Just completely ignore dead slices.
3688
+ if (S.isDead ())
3689
+ continue ;
3690
+
3691
+ // Which instruction does this slice represent?
3692
+ Use *OrigUse = S.getUse ();
3693
+ auto *OrigInstr = cast<Instruction>(OrigUse->getUser ());
3694
+
3695
+ // Now, we need to make a clone of this instruction, but operating on
3696
+ // the alloca-to-be-promoted instead.
3697
+ Instruction *ClonedInstr;
3698
+ // Only clone instruction once! See if we already did that for this instr.
3699
+ auto It = InstrCloneMap.find (OrigInstr);
3700
+ if (It != InstrCloneMap.end ())
3701
+ ClonedInstr = It->second ;
3702
+ else {
3703
+ // This is the first time this instruction is seen.
3704
+ // Clone it next to the original instruction, and cache it.
3705
+ ClonedInstr = OrigInstr->clone ();
3706
+ ClonedInstr->insertBefore (OrigInstr);
3707
+ InstrCloneMap.insert ({OrigInstr, ClonedInstr});
3708
+
3709
+ // Also, if the instruction was returning anything, we do that instead.
3710
+ if (!ClonedInstr->getType ()->isVoidTy ()) {
3711
+ assert (isa<LoadInst>(OrigInstr) &&
3712
+ " Not expecting to encounter here anything other than a `load`." );
3713
+ ClonedInstr->setName (OrigInstr->getName () + " .prom" );
3714
+ OrigInstr->replaceAllUsesWith (ClonedInstr);
3715
+ }
3716
+
3717
+ if (isa<LoadInst>(OrigInstr))
3718
+ // We know that all the offending (non-capturing) calls do not modify
3719
+ // the content of the shadow alloca, so we do not need to propagate
3720
+ // the content of the shadow alloca to the alloca-to-be-promoted.
3721
+ DeadUsers.push_back (OrigInstr);
3722
+ }
3723
+
3724
+ // Final touch: the slice should refer to the
3725
+ // use of the alloca-to-be-promoted, while it currently refers to
3726
+ // use of the shadow alloca, so rectify that.
3727
+ Value *NewPtr = getRebasedPtr (cast<PointerType>(OrigUse->get ()->getType ()),
3728
+ S.beginOffset ());
3729
+ Use &ClonedUse = ClonedInstr->getOperandUse (OrigUse->getOperandNo ());
3730
+ ClonedUse.set (NewPtr );
3731
+ S.setUse (&ClonedUse);
3732
+ }
3733
+ }
3734
+
3590
3735
// / Strip aggregate type wrapping.
3591
3736
// /
3592
3737
// / This removes no-op aggregate types wrapping an underlying type. It will
@@ -4612,7 +4757,7 @@ bool SROAPass::runOnAlloca(AllocaInst &AI) {
4612
4757
Changed |= AggRewriter.rewrite (AI);
4613
4758
4614
4759
// Build the slices using a recursive instruction-visiting builder.
4615
- AllocaSlices AS (DL, AI);
4760
+ AllocaSlices AS (DL, AI, Changed );
4616
4761
LLVM_DEBUG (AS.print (dbgs ()));
4617
4762
if (AS.isEscaped ())
4618
4763
return Changed;
0 commit comments