Skip to content

Commit

Permalink
[SimplifyCFG] Always allow hoisting if all instructions match. (llvm#…
Browse files Browse the repository at this point in the history
…97158)

Generalize hoistCommonCodeFromSuccessors's `EqTermsOnly` to
`AllInstsEqOnly` and always allow hoisting if all instructions match.

In that case, all instructions can be hoisted and the
original branch will be replaced and selects for PHIs are added. This
allows preserving metadata in more cases, using the existing hoisting
logic, whereas previously FoldTwoEntryPHINode would drop the metadata.


https://llvm-compile-time-tracker.com/compare.php?from=716360367fbdabac2c374c19b8746f4de49a5599&to=986b2c47df516b31d998c055400e4f62aa76edc6&stat=instructions:u

PR: llvm#97158
  • Loading branch information
fhahn authored Dec 13, 2024
1 parent 5f72f2c commit c4a78b6
Show file tree
Hide file tree
Showing 3 changed files with 121 additions and 116 deletions.
196 changes: 105 additions & 91 deletions llvm/lib/Transforms/Utils/SimplifyCFG.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -285,7 +285,7 @@ class SimplifyCFGOpt {
bool tryToSimplifyUncondBranchWithICmpInIt(ICmpInst *ICI,
IRBuilder<> &Builder);

bool hoistCommonCodeFromSuccessors(Instruction *TI, bool EqTermsOnly);
bool hoistCommonCodeFromSuccessors(Instruction *TI, bool AllInstsEqOnly);
bool hoistSuccIdenticalTerminatorToSwitchOrIf(
Instruction *TI, Instruction *I1,
SmallVectorImpl<Instruction *> &OtherSuccTIs);
Expand Down Expand Up @@ -1772,13 +1772,84 @@ static bool isSafeCheapLoadStore(const Instruction *I,
getLoadStoreAlignment(I) < Value::MaximumAlignment;
}

namespace {

// LockstepReverseIterator - Iterates through instructions
// in a set of blocks in reverse order from the first non-terminator.
// For example (assume all blocks have size n):
// LockstepReverseIterator I([B1, B2, B3]);
// *I-- = [B1[n], B2[n], B3[n]];
// *I-- = [B1[n-1], B2[n-1], B3[n-1]];
// *I-- = [B1[n-2], B2[n-2], B3[n-2]];
// ...
class LockstepReverseIterator {
ArrayRef<BasicBlock *> Blocks;
SmallVector<Instruction *, 4> Insts;
bool Fail;

public:
LockstepReverseIterator(ArrayRef<BasicBlock *> Blocks) : Blocks(Blocks) {
reset();
}

void reset() {
Fail = false;
Insts.clear();
for (auto *BB : Blocks) {
Instruction *Inst = BB->getTerminator();
for (Inst = Inst->getPrevNode(); Inst && isa<DbgInfoIntrinsic>(Inst);)
Inst = Inst->getPrevNode();
if (!Inst) {
// Block wasn't big enough.
Fail = true;
return;
}
Insts.push_back(Inst);
}
}

bool isValid() const { return !Fail; }

void operator--() {
if (Fail)
return;
for (auto *&Inst : Insts) {
for (Inst = Inst->getPrevNode(); Inst && isa<DbgInfoIntrinsic>(Inst);)
Inst = Inst->getPrevNode();
// Already at beginning of block.
if (!Inst) {
Fail = true;
return;
}
}
}

void operator++() {
if (Fail)
return;
for (auto *&Inst : Insts) {
for (Inst = Inst->getNextNode(); Inst && isa<DbgInfoIntrinsic>(Inst);)
Inst = Inst->getNextNode();
// Already at end of block.
if (!Inst) {
Fail = true;
return;
}
}
}

ArrayRef<Instruction *> operator*() const { return Insts; }
};

} // end anonymous namespace

/// Hoist any common code in the successor blocks up into the block. This
/// function guarantees that BB dominates all successors. If EqTermsOnly is
/// given, only perform hoisting in case both blocks only contain a terminator.
/// In that case, only the original BI will be replaced and selects for PHIs are
/// added.
/// function guarantees that BB dominates all successors. If AllInstsEqOnly is
/// given, only perform hoisting in case all successors blocks contain matching
/// instructions only. In that case, all instructions can be hoisted and the
/// original branch will be replaced and selects for PHIs are added.
bool SimplifyCFGOpt::hoistCommonCodeFromSuccessors(Instruction *TI,
bool EqTermsOnly) {
bool AllInstsEqOnly) {
// This does very trivial matching, with limited scanning, to find identical
// instructions in the two blocks. In particular, we don't want to get into
// O(N1*N2*...) situations here where Ni are the sizes of these successors. As
Expand Down Expand Up @@ -1807,17 +1878,35 @@ bool SimplifyCFGOpt::hoistCommonCodeFromSuccessors(Instruction *TI,
SuccIterPairs.push_back(SuccIterPair(SuccItr, 0));
}

// Check if only hoisting terminators is allowed. This does not add new
// instructions to the hoist location.
if (EqTermsOnly) {
// Skip any debug intrinsics, as they are free to hoist.
for (auto &SuccIter : make_first_range(SuccIterPairs)) {
auto *INonDbg = &*skipDebugIntrinsics(SuccIter);
if (!INonDbg->isTerminator())
return false;
if (AllInstsEqOnly) {
// Check if all instructions in the successor blocks match. This allows
// hoisting all instructions and removing the blocks we are hoisting from,
// so does not add any new instructions.
SmallVector<BasicBlock *> Succs = to_vector(successors(BB));
// Check if sizes and terminators of all successors match.
bool AllSame = none_of(Succs, [&Succs](BasicBlock *Succ) {
Instruction *Term0 = Succs[0]->getTerminator();
Instruction *Term = Succ->getTerminator();
return !Term->isSameOperationAs(Term0) ||
!equal(Term->operands(), Term0->operands()) ||
Succs[0]->size() != Succ->size();
});
if (!AllSame)
return false;
if (AllSame) {
LockstepReverseIterator LRI(Succs);
while (LRI.isValid()) {
Instruction *I0 = (*LRI)[0];
if (any_of(*LRI, [I0](Instruction *I) {
return !areIdenticalUpToCommutativity(I0, I);
})) {
return false;
}
--LRI;
}
}
// Now we know that we only need to hoist debug intrinsics and the
// terminator. Let the loop below handle those 2 cases.
// Now we know that all instructions in all successors can be hoisted. Let
// the loop below handle the hoisting.
}

// Count how many instructions were not hoisted so far. There's a limit on how
Expand Down Expand Up @@ -2350,81 +2439,6 @@ static void sinkLastInstruction(ArrayRef<BasicBlock*> Blocks) {
}
}

namespace {

// LockstepReverseIterator - Iterates through instructions
// in a set of blocks in reverse order from the first non-terminator.
// For example (assume all blocks have size n):
// LockstepReverseIterator I([B1, B2, B3]);
// *I-- = [B1[n], B2[n], B3[n]];
// *I-- = [B1[n-1], B2[n-1], B3[n-1]];
// *I-- = [B1[n-2], B2[n-2], B3[n-2]];
// ...
class LockstepReverseIterator {
ArrayRef<BasicBlock*> Blocks;
SmallVector<Instruction*,4> Insts;
bool Fail;

public:
LockstepReverseIterator(ArrayRef<BasicBlock*> Blocks) : Blocks(Blocks) {
reset();
}

void reset() {
Fail = false;
Insts.clear();
for (auto *BB : Blocks) {
Instruction *Inst = BB->getTerminator();
for (Inst = Inst->getPrevNode(); Inst && isa<DbgInfoIntrinsic>(Inst);)
Inst = Inst->getPrevNode();
if (!Inst) {
// Block wasn't big enough.
Fail = true;
return;
}
Insts.push_back(Inst);
}
}

bool isValid() const {
return !Fail;
}

void operator--() {
if (Fail)
return;
for (auto *&Inst : Insts) {
for (Inst = Inst->getPrevNode(); Inst && isa<DbgInfoIntrinsic>(Inst);)
Inst = Inst->getPrevNode();
// Already at beginning of block.
if (!Inst) {
Fail = true;
return;
}
}
}

void operator++() {
if (Fail)
return;
for (auto *&Inst : Insts) {
for (Inst = Inst->getNextNode(); Inst && isa<DbgInfoIntrinsic>(Inst);)
Inst = Inst->getNextNode();
// Already at end of block.
if (!Inst) {
Fail = true;
return;
}
}
}

ArrayRef<Instruction*> operator * () const {
return Insts;
}
};

} // end anonymous namespace

/// Check whether BB's predecessors end with unconditional branches. If it is
/// true, sink any common code from the predecessors to BB.
static bool sinkCommonCodeFromPredecessors(BasicBlock *BB,
Expand Down
21 changes: 6 additions & 15 deletions llvm/test/Transforms/SimplifyCFG/hoist-dbgvalue.ll
Original file line number Diff line number Diff line change
Expand Up @@ -7,19 +7,10 @@ define i32 @foo(i32 %i) nounwind ssp !dbg !0 {
; CHECK-NEXT: entry:
; CHECK-NEXT: #dbg_value(i32 [[I:%.*]], [[META7:![0-9]+]], !DIExpression(), [[META8:![0-9]+]])
; CHECK-NEXT: #dbg_value(i32 0, [[META9:![0-9]+]], !DIExpression(), [[META11:![0-9]+]])
; CHECK-NEXT: [[COND:%.*]] = icmp ne i32 [[I]], 0, !dbg [[DBG12:![0-9]+]]
; CHECK-NEXT: br i1 [[COND]], label [[THEN:%.*]], label [[ELSE:%.*]], !dbg [[DBG12]]
; CHECK: then:
; CHECK-NEXT: [[CALL_1:%.*]] = call i32 (...) @bar(), !dbg [[DBG13:![0-9]+]]
; CHECK-NEXT: #dbg_value(i32 [[CALL_1]], [[META9]], !DIExpression(), [[DBG13]])
; CHECK-NEXT: br label [[EXIT:%.*]], !dbg [[DBG15:![0-9]+]]
; CHECK: else:
; CHECK-NEXT: [[CALL_2:%.*]] = call i32 (...) @bar(), !dbg [[DBG16:![0-9]+]]
; CHECK-NEXT: #dbg_value(i32 [[CALL_2]], [[META9]], !DIExpression(), [[DBG16]])
; CHECK-NEXT: br label [[EXIT]], !dbg [[DBG18:![0-9]+]]
; CHECK: exit:
; CHECK-NEXT: [[K_0:%.*]] = phi i32 [ [[CALL_1]], [[THEN]] ], [ [[CALL_2]], [[ELSE]] ]
; CHECK-NEXT: ret i32 [[K_0]], !dbg [[DBG19:![0-9]+]]
; CHECK-NEXT: [[CALL_1:%.*]] = call i32 (...) @bar(), !dbg [[DBG12:![0-9]+]]
; CHECK-NEXT: #dbg_value(i32 [[CALL_1]], [[META9]], !DIExpression(), [[META13:![0-9]+]])
; CHECK-NEXT: #dbg_value(i32 [[CALL_1]], [[META9]], !DIExpression(), [[META15:![0-9]+]])
; CHECK-NEXT: ret i32 [[CALL_1]], !dbg [[DBG17:![0-9]+]]
;
entry:
call void @llvm.dbg.value(metadata i32 %i, metadata !6, metadata !DIExpression()), !dbg !7
Expand All @@ -46,8 +37,8 @@ define i1 @hoist_with_debug2(i32 %x) !dbg !22 {
; CHECK-LABEL: @hoist_with_debug2(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp ugt i32 [[X:%.*]], 2
; CHECK-NEXT: #dbg_value(i32 [[X]], [[META21:![0-9]+]], !DIExpression(), [[META23:![0-9]+]])
; CHECK-NEXT: #dbg_value(i32 [[X]], [[META21]], !DIExpression(), [[META23]])
; CHECK-NEXT: #dbg_value(i32 [[X]], [[META19:![0-9]+]], !DIExpression(), [[META21:![0-9]+]])
; CHECK-NEXT: #dbg_value(i32 [[X]], [[META19]], !DIExpression(), [[META21]])
; CHECK-NEXT: [[DOT:%.*]] = select i1 [[TOBOOL_NOT]], i1 false, i1 true
; CHECK-NEXT: ret i1 [[DOT]]
;
Expand Down
20 changes: 10 additions & 10 deletions llvm/test/Transforms/SimplifyCFG/hoisting-metadata.ll
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,7 @@ define i64 @hoist_load_with_matching_pointers_and_tbaa(i1 %c) {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: [[TMP:%.*]] = alloca i64, align 8
; CHECK-NEXT: call void @init(ptr [[TMP]])
; CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr [[TMP]], align 8
; CHECK-NOT: !tbaa
; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[TMP]], align 8
; CHECK-NOT: !tbaa
; CHECK-NEXT: [[P:%.*]] = select i1 [[C]], i64 [[TMP0]], i64 [[TMP1]]
; CHECK-NEXT: [[P:%.*]] = load i64, ptr [[TMP]], align 8, !tbaa [[TBAA0:![0-9]+]]
; CHECK-NEXT: ret i64 [[P]]
;
entry:
Expand Down Expand Up @@ -74,11 +70,7 @@ define i64 @hoist_load_with_different_tbaa(i1 %c) {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: [[TMP:%.*]] = alloca i64, align 8
; CHECK-NEXT: call void @init(ptr [[TMP]])
; CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr [[TMP]], align 8
; CHECK-NOT: !tbaa
; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[TMP]], align 8
; CHECK-NOT: !tbaa
; CHECK-NEXT: [[P:%.*]] = select i1 [[C]], i64 [[TMP0]], i64 [[TMP1]]
; CHECK-NEXT: [[P:%.*]] = load i64, ptr [[TMP]], align 8, !tbaa [[TBAA5:![0-9]+]]
; CHECK-NEXT: ret i64 [[P]]
;
entry:
Expand Down Expand Up @@ -135,3 +127,11 @@ exit:
!3 = !{!"omnipotent char", !4, i64 0}
!4 = !{!"Simple C++ TBAA"}
!5 = !{!3, !3, i64 0}
;.
; CHECK: [[TBAA0]] = !{[[META1:![0-9]+]], [[META1]], i64 0}
; CHECK: [[META1]] = !{!"p2 long long", [[META2:![0-9]+]], i64 0}
; CHECK: [[META2]] = !{!"any pointer", [[META3:![0-9]+]], i64 0}
; CHECK: [[META3]] = !{!"omnipotent char", [[META4:![0-9]+]], i64 0}
; CHECK: [[META4]] = !{!"Simple C++ TBAA"}
; CHECK: [[TBAA5]] = !{[[META3]], [[META3]], i64 0}
;.

0 comments on commit c4a78b6

Please sign in to comment.