diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp index 18a1cb6d44b4e1..17f4b396f753b4 100644 --- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp @@ -285,7 +285,7 @@ class SimplifyCFGOpt { bool tryToSimplifyUncondBranchWithICmpInIt(ICmpInst *ICI, IRBuilder<> &Builder); - bool hoistCommonCodeFromSuccessors(Instruction *TI, bool EqTermsOnly); + bool hoistCommonCodeFromSuccessors(Instruction *TI, bool AllInstsEqOnly); bool hoistSuccIdenticalTerminatorToSwitchOrIf( Instruction *TI, Instruction *I1, SmallVectorImpl &OtherSuccTIs); @@ -1772,13 +1772,84 @@ static bool isSafeCheapLoadStore(const Instruction *I, getLoadStoreAlignment(I) < Value::MaximumAlignment; } +namespace { + +// LockstepReverseIterator - Iterates through instructions +// in a set of blocks in reverse order from the first non-terminator. +// For example (assume all blocks have size n): +// LockstepReverseIterator I([B1, B2, B3]); +// *I-- = [B1[n], B2[n], B3[n]]; +// *I-- = [B1[n-1], B2[n-1], B3[n-1]]; +// *I-- = [B1[n-2], B2[n-2], B3[n-2]]; +// ... +class LockstepReverseIterator { + ArrayRef Blocks; + SmallVector Insts; + bool Fail; + +public: + LockstepReverseIterator(ArrayRef Blocks) : Blocks(Blocks) { + reset(); + } + + void reset() { + Fail = false; + Insts.clear(); + for (auto *BB : Blocks) { + Instruction *Inst = BB->getTerminator(); + for (Inst = Inst->getPrevNode(); Inst && isa(Inst);) + Inst = Inst->getPrevNode(); + if (!Inst) { + // Block wasn't big enough. + Fail = true; + return; + } + Insts.push_back(Inst); + } + } + + bool isValid() const { return !Fail; } + + void operator--() { + if (Fail) + return; + for (auto *&Inst : Insts) { + for (Inst = Inst->getPrevNode(); Inst && isa(Inst);) + Inst = Inst->getPrevNode(); + // Already at beginning of block. + if (!Inst) { + Fail = true; + return; + } + } + } + + void operator++() { + if (Fail) + return; + for (auto *&Inst : Insts) { + for (Inst = Inst->getNextNode(); Inst && isa(Inst);) + Inst = Inst->getNextNode(); + // Already at end of block. + if (!Inst) { + Fail = true; + return; + } + } + } + + ArrayRef operator*() const { return Insts; } +}; + +} // end anonymous namespace + /// Hoist any common code in the successor blocks up into the block. This -/// function guarantees that BB dominates all successors. If EqTermsOnly is -/// given, only perform hoisting in case both blocks only contain a terminator. -/// In that case, only the original BI will be replaced and selects for PHIs are -/// added. +/// function guarantees that BB dominates all successors. If AllInstsEqOnly is +/// given, only perform hoisting in case all successors blocks contain matching +/// instructions only. In that case, all instructions can be hoisted and the +/// original branch will be replaced and selects for PHIs are added. bool SimplifyCFGOpt::hoistCommonCodeFromSuccessors(Instruction *TI, - bool EqTermsOnly) { + bool AllInstsEqOnly) { // This does very trivial matching, with limited scanning, to find identical // instructions in the two blocks. In particular, we don't want to get into // O(N1*N2*...) situations here where Ni are the sizes of these successors. As @@ -1807,17 +1878,35 @@ bool SimplifyCFGOpt::hoistCommonCodeFromSuccessors(Instruction *TI, SuccIterPairs.push_back(SuccIterPair(SuccItr, 0)); } - // Check if only hoisting terminators is allowed. This does not add new - // instructions to the hoist location. - if (EqTermsOnly) { - // Skip any debug intrinsics, as they are free to hoist. - for (auto &SuccIter : make_first_range(SuccIterPairs)) { - auto *INonDbg = &*skipDebugIntrinsics(SuccIter); - if (!INonDbg->isTerminator()) - return false; + if (AllInstsEqOnly) { + // Check if all instructions in the successor blocks match. This allows + // hoisting all instructions and removing the blocks we are hoisting from, + // so does not add any new instructions. + SmallVector Succs = to_vector(successors(BB)); + // Check if sizes and terminators of all successors match. + bool AllSame = none_of(Succs, [&Succs](BasicBlock *Succ) { + Instruction *Term0 = Succs[0]->getTerminator(); + Instruction *Term = Succ->getTerminator(); + return !Term->isSameOperationAs(Term0) || + !equal(Term->operands(), Term0->operands()) || + Succs[0]->size() != Succ->size(); + }); + if (!AllSame) + return false; + if (AllSame) { + LockstepReverseIterator LRI(Succs); + while (LRI.isValid()) { + Instruction *I0 = (*LRI)[0]; + if (any_of(*LRI, [I0](Instruction *I) { + return !areIdenticalUpToCommutativity(I0, I); + })) { + return false; + } + --LRI; + } } - // Now we know that we only need to hoist debug intrinsics and the - // terminator. Let the loop below handle those 2 cases. + // Now we know that all instructions in all successors can be hoisted. Let + // the loop below handle the hoisting. } // Count how many instructions were not hoisted so far. There's a limit on how @@ -2350,81 +2439,6 @@ static void sinkLastInstruction(ArrayRef Blocks) { } } -namespace { - - // LockstepReverseIterator - Iterates through instructions - // in a set of blocks in reverse order from the first non-terminator. - // For example (assume all blocks have size n): - // LockstepReverseIterator I([B1, B2, B3]); - // *I-- = [B1[n], B2[n], B3[n]]; - // *I-- = [B1[n-1], B2[n-1], B3[n-1]]; - // *I-- = [B1[n-2], B2[n-2], B3[n-2]]; - // ... - class LockstepReverseIterator { - ArrayRef Blocks; - SmallVector Insts; - bool Fail; - - public: - LockstepReverseIterator(ArrayRef Blocks) : Blocks(Blocks) { - reset(); - } - - void reset() { - Fail = false; - Insts.clear(); - for (auto *BB : Blocks) { - Instruction *Inst = BB->getTerminator(); - for (Inst = Inst->getPrevNode(); Inst && isa(Inst);) - Inst = Inst->getPrevNode(); - if (!Inst) { - // Block wasn't big enough. - Fail = true; - return; - } - Insts.push_back(Inst); - } - } - - bool isValid() const { - return !Fail; - } - - void operator--() { - if (Fail) - return; - for (auto *&Inst : Insts) { - for (Inst = Inst->getPrevNode(); Inst && isa(Inst);) - Inst = Inst->getPrevNode(); - // Already at beginning of block. - if (!Inst) { - Fail = true; - return; - } - } - } - - void operator++() { - if (Fail) - return; - for (auto *&Inst : Insts) { - for (Inst = Inst->getNextNode(); Inst && isa(Inst);) - Inst = Inst->getNextNode(); - // Already at end of block. - if (!Inst) { - Fail = true; - return; - } - } - } - - ArrayRef operator * () const { - return Insts; - } - }; - -} // end anonymous namespace - /// Check whether BB's predecessors end with unconditional branches. If it is /// true, sink any common code from the predecessors to BB. static bool sinkCommonCodeFromPredecessors(BasicBlock *BB, diff --git a/llvm/test/Transforms/SimplifyCFG/hoist-dbgvalue.ll b/llvm/test/Transforms/SimplifyCFG/hoist-dbgvalue.ll index 1b115d64a048bf..c81f3280d81a32 100644 --- a/llvm/test/Transforms/SimplifyCFG/hoist-dbgvalue.ll +++ b/llvm/test/Transforms/SimplifyCFG/hoist-dbgvalue.ll @@ -7,19 +7,10 @@ define i32 @foo(i32 %i) nounwind ssp !dbg !0 { ; CHECK-NEXT: entry: ; CHECK-NEXT: #dbg_value(i32 [[I:%.*]], [[META7:![0-9]+]], !DIExpression(), [[META8:![0-9]+]]) ; CHECK-NEXT: #dbg_value(i32 0, [[META9:![0-9]+]], !DIExpression(), [[META11:![0-9]+]]) -; CHECK-NEXT: [[COND:%.*]] = icmp ne i32 [[I]], 0, !dbg [[DBG12:![0-9]+]] -; CHECK-NEXT: br i1 [[COND]], label [[THEN:%.*]], label [[ELSE:%.*]], !dbg [[DBG12]] -; CHECK: then: -; CHECK-NEXT: [[CALL_1:%.*]] = call i32 (...) @bar(), !dbg [[DBG13:![0-9]+]] -; CHECK-NEXT: #dbg_value(i32 [[CALL_1]], [[META9]], !DIExpression(), [[DBG13]]) -; CHECK-NEXT: br label [[EXIT:%.*]], !dbg [[DBG15:![0-9]+]] -; CHECK: else: -; CHECK-NEXT: [[CALL_2:%.*]] = call i32 (...) @bar(), !dbg [[DBG16:![0-9]+]] -; CHECK-NEXT: #dbg_value(i32 [[CALL_2]], [[META9]], !DIExpression(), [[DBG16]]) -; CHECK-NEXT: br label [[EXIT]], !dbg [[DBG18:![0-9]+]] -; CHECK: exit: -; CHECK-NEXT: [[K_0:%.*]] = phi i32 [ [[CALL_1]], [[THEN]] ], [ [[CALL_2]], [[ELSE]] ] -; CHECK-NEXT: ret i32 [[K_0]], !dbg [[DBG19:![0-9]+]] +; CHECK-NEXT: [[CALL_1:%.*]] = call i32 (...) @bar(), !dbg [[DBG12:![0-9]+]] +; CHECK-NEXT: #dbg_value(i32 [[CALL_1]], [[META9]], !DIExpression(), [[META13:![0-9]+]]) +; CHECK-NEXT: #dbg_value(i32 [[CALL_1]], [[META9]], !DIExpression(), [[META15:![0-9]+]]) +; CHECK-NEXT: ret i32 [[CALL_1]], !dbg [[DBG17:![0-9]+]] ; entry: call void @llvm.dbg.value(metadata i32 %i, metadata !6, metadata !DIExpression()), !dbg !7 @@ -46,8 +37,8 @@ define i1 @hoist_with_debug2(i32 %x) !dbg !22 { ; CHECK-LABEL: @hoist_with_debug2( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp ugt i32 [[X:%.*]], 2 -; CHECK-NEXT: #dbg_value(i32 [[X]], [[META21:![0-9]+]], !DIExpression(), [[META23:![0-9]+]]) -; CHECK-NEXT: #dbg_value(i32 [[X]], [[META21]], !DIExpression(), [[META23]]) +; CHECK-NEXT: #dbg_value(i32 [[X]], [[META19:![0-9]+]], !DIExpression(), [[META21:![0-9]+]]) +; CHECK-NEXT: #dbg_value(i32 [[X]], [[META19]], !DIExpression(), [[META21]]) ; CHECK-NEXT: [[DOT:%.*]] = select i1 [[TOBOOL_NOT]], i1 false, i1 true ; CHECK-NEXT: ret i1 [[DOT]] ; diff --git a/llvm/test/Transforms/SimplifyCFG/hoisting-metadata.ll b/llvm/test/Transforms/SimplifyCFG/hoisting-metadata.ll index 026002a4942af6..65ba6f6d3bcb1f 100644 --- a/llvm/test/Transforms/SimplifyCFG/hoisting-metadata.ll +++ b/llvm/test/Transforms/SimplifyCFG/hoisting-metadata.ll @@ -8,11 +8,7 @@ define i64 @hoist_load_with_matching_pointers_and_tbaa(i1 %c) { ; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: [[TMP:%.*]] = alloca i64, align 8 ; CHECK-NEXT: call void @init(ptr [[TMP]]) -; CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr [[TMP]], align 8 -; CHECK-NOT: !tbaa -; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[TMP]], align 8 -; CHECK-NOT: !tbaa -; CHECK-NEXT: [[P:%.*]] = select i1 [[C]], i64 [[TMP0]], i64 [[TMP1]] +; CHECK-NEXT: [[P:%.*]] = load i64, ptr [[TMP]], align 8, !tbaa [[TBAA0:![0-9]+]] ; CHECK-NEXT: ret i64 [[P]] ; entry: @@ -74,11 +70,7 @@ define i64 @hoist_load_with_different_tbaa(i1 %c) { ; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: [[TMP:%.*]] = alloca i64, align 8 ; CHECK-NEXT: call void @init(ptr [[TMP]]) -; CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr [[TMP]], align 8 -; CHECK-NOT: !tbaa -; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[TMP]], align 8 -; CHECK-NOT: !tbaa -; CHECK-NEXT: [[P:%.*]] = select i1 [[C]], i64 [[TMP0]], i64 [[TMP1]] +; CHECK-NEXT: [[P:%.*]] = load i64, ptr [[TMP]], align 8, !tbaa [[TBAA5:![0-9]+]] ; CHECK-NEXT: ret i64 [[P]] ; entry: @@ -135,3 +127,11 @@ exit: !3 = !{!"omnipotent char", !4, i64 0} !4 = !{!"Simple C++ TBAA"} !5 = !{!3, !3, i64 0} +;. +; CHECK: [[TBAA0]] = !{[[META1:![0-9]+]], [[META1]], i64 0} +; CHECK: [[META1]] = !{!"p2 long long", [[META2:![0-9]+]], i64 0} +; CHECK: [[META2]] = !{!"any pointer", [[META3:![0-9]+]], i64 0} +; CHECK: [[META3]] = !{!"omnipotent char", [[META4:![0-9]+]], i64 0} +; CHECK: [[META4]] = !{!"Simple C++ TBAA"} +; CHECK: [[TBAA5]] = !{[[META3]], [[META3]], i64 0} +;.