diff --git a/include/llvm/IR/Instruction.h b/include/llvm/IR/Instruction.h index df4f8df78b12..53e88de89492 100644 --- a/include/llvm/IR/Instruction.h +++ b/include/llvm/IR/Instruction.h @@ -197,6 +197,16 @@ class Instruction : public User, void setMetadata(unsigned KindID, MDNode *Node); void setMetadata(StringRef Kind, MDNode *Node); + /// Copy metadata from \p SrcInst to this instruction. \p WL, if not empty, + /// specifies the list of meta data that needs to be copied. If \p WL is + /// empty, all meta data will be copied. + void copyMetadata(const Instruction &SrcInst, ArrayRef WL = {}); + + /// If the instruction has "branch_weights" MD_prof metadata and the MDNode + /// has three operands (including name string), swap the order of the + /// metadata. + void swapProfMetadata(); + /// Drop all unknown metadata except for debug locations. /// @{ /// Passes are required to drop metadata they don't understand. This is a diff --git a/lib/IR/Instruction.cpp b/lib/IR/Instruction.cpp index ed08f85c60b6..d65eb40ed1a5 100644 --- a/lib/IR/Instruction.cpp +++ b/lib/IR/Instruction.cpp @@ -11,6 +11,7 @@ // //===----------------------------------------------------------------------===// +#include "llvm/ADT/DenseSet.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/CallSite.h" #include "llvm/IR/Constants.h" @@ -627,6 +628,47 @@ Instruction *Instruction::cloneImpl() const { llvm_unreachable("Subclass of Instruction failed to implement cloneImpl"); } +void Instruction::swapProfMetadata() { + MDNode *ProfileData = getMetadata(LLVMContext::MD_prof); + if (!ProfileData || ProfileData->getNumOperands() != 3 || + !isa(ProfileData->getOperand(0))) + return; + + MDString *MDName = cast(ProfileData->getOperand(0)); + if (MDName->getString() != "branch_weights") + return; + + // The first operand is the name. Fetch them backwards and build a new one. + Metadata *Ops[] = {ProfileData->getOperand(0), ProfileData->getOperand(2), + ProfileData->getOperand(1)}; + setMetadata(LLVMContext::MD_prof, + MDNode::get(ProfileData->getContext(), Ops)); +} + +/// Copy meta data from \p SrcInst to this instruction. If WL is empty, all +/// data will be copied, otherwise only ones specified in WL will be copied. +void Instruction::copyMetadata(const Instruction &SrcInst, + ArrayRef WL) { + if (!SrcInst.hasMetadata()) + return; + + DenseSet WLS; + for (unsigned M : WL) + WLS.insert(M); + + // Otherwise, enumerate and copy over metadata from the old instruction to the + // new one. + SmallVector, 4> TheMDs; + SrcInst.getAllMetadataOtherThanDebugLoc(TheMDs); + for (const auto &MD : TheMDs) { + if (WL.empty() || WLS.count(MD.first)) + setMetadata(MD.first, MD.second); + } + if (WL.empty() || WLS.count(LLVMContext::MD_dbg)) + setDebugLoc(SrcInst.getDebugLoc()); + return; +} + Instruction *Instruction::clone() const { Instruction *New = nullptr; switch (getOpcode()) { @@ -641,16 +683,6 @@ Instruction *Instruction::clone() const { } New->SubclassOptionalData = SubclassOptionalData; - if (!hasMetadata()) - return New; - - // Otherwise, enumerate and copy over metadata from the old instruction to the - // new one. - SmallVector, 4> TheMDs; - getAllMetadataOtherThanDebugLoc(TheMDs); - for (const auto &MD : TheMDs) - New->setMetadata(MD.first, MD.second); - - New->setDebugLoc(getDebugLoc()); + New->copyMetadata(*this); return New; } diff --git a/lib/IR/Instructions.cpp b/lib/IR/Instructions.cpp index b9c693ff19ad..b3f3d2bbb538 100644 --- a/lib/IR/Instructions.cpp +++ b/lib/IR/Instructions.cpp @@ -1209,15 +1209,7 @@ void BranchInst::swapSuccessors() { // Update profile metadata if present and it matches our structural // expectations. - MDNode *ProfileData = getMetadata(LLVMContext::MD_prof); - if (!ProfileData || ProfileData->getNumOperands() != 3) - return; - - // The first operand is the name. Fetch them backwards and build a new one. - Metadata *Ops[] = {ProfileData->getOperand(0), ProfileData->getOperand(2), - ProfileData->getOperand(1)}; - setMetadata(LLVMContext::MD_prof, - MDNode::get(ProfileData->getContext(), Ops)); + swapProfMetadata(); } BasicBlock *BranchInst::getSuccessorV(unsigned idx) const { diff --git a/lib/Transforms/Scalar/LoopUnswitch.cpp b/lib/Transforms/Scalar/LoopUnswitch.cpp index 71980e85e8ca..c954341438b0 100644 --- a/lib/Transforms/Scalar/LoopUnswitch.cpp +++ b/lib/Transforms/Scalar/LoopUnswitch.cpp @@ -742,42 +742,6 @@ static Loop *CloneLoop(Loop *L, Loop *PL, ValueToValueMapTy &VM, return &New; } -static void copyMetadata(Instruction *DstInst, const Instruction *SrcInst, - bool Swapped) { - if (!SrcInst || !SrcInst->hasMetadata()) - return; - - SmallVector, 4> MDs; - SrcInst->getAllMetadata(MDs); - for (auto &MD : MDs) { - switch (MD.first) { - default: - break; - case LLVMContext::MD_prof: - if (Swapped && MD.second->getNumOperands() == 3 && - isa(MD.second->getOperand(0))) { - MDString *MDName = cast(MD.second->getOperand(0)); - if (MDName->getString() == "branch_weights") { - auto *ValT = cast_or_null( - MD.second->getOperand(1))->getValue(); - auto *ValF = cast_or_null( - MD.second->getOperand(2))->getValue(); - assert(ValT && ValF && "Invalid Operands of branch_weights"); - auto NewMD = - MDBuilder(DstInst->getParent()->getContext()) - .createBranchWeights(cast(ValF)->getZExtValue(), - cast(ValT)->getZExtValue()); - MD.second = NewMD; - } - } - // fallthrough. - case LLVMContext::MD_make_implicit: - case LLVMContext::MD_dbg: - DstInst->setMetadata(MD.first, MD.second); - } - } -} - /// Emit a conditional branch on two values if LIC == Val, branch to TrueDst, /// otherwise branch to FalseDest. Insert the code immediately before InsertPt. void LoopUnswitch::EmitPreheaderBranchOnCondition(Value *LIC, Constant *Val, @@ -800,7 +764,14 @@ void LoopUnswitch::EmitPreheaderBranchOnCondition(Value *LIC, Constant *Val, // Insert the new branch. BranchInst *BI = BranchInst::Create(TrueDest, FalseDest, BranchVal, InsertPt); - copyMetadata(BI, TI, Swapped); + if (TI) { + // FIXME: check why white list is needed here: + ArrayRef WL = {LLVMContext::MD_dbg, LLVMContext::MD_prof, + LLVMContext::MD_make_implicit}; + BI->copyMetadata(*TI, WL); + if (Swapped) + BI->swapProfMetadata(); + } // If either edge is critical, split it. This helps preserve LoopSimplify // form for enclosing loops. diff --git a/lib/Transforms/Scalar/SROA.cpp b/lib/Transforms/Scalar/SROA.cpp index 7d33259c030b..6566cd0538ff 100644 --- a/lib/Transforms/Scalar/SROA.cpp +++ b/lib/Transforms/Scalar/SROA.cpp @@ -2390,6 +2390,10 @@ class llvm::sroa::AllocaSliceRewriter LI.isVolatile(), LI.getName()); if (LI.isVolatile()) NewLI->setAtomic(LI.getOrdering(), LI.getSynchScope()); + + // Try to preserve nonnull metadata + if (TargetTy->isPointerTy()) + NewLI->copyMetadata(LI, LLVMContext::MD_nonnull); V = NewLI; // If this is an integer load past the end of the slice (which means the diff --git a/lib/Transforms/Utils/PromoteMemoryToRegister.cpp b/lib/Transforms/Utils/PromoteMemoryToRegister.cpp index cbf385d56339..186ab34bca7a 100644 --- a/lib/Transforms/Utils/PromoteMemoryToRegister.cpp +++ b/lib/Transforms/Utils/PromoteMemoryToRegister.cpp @@ -15,7 +15,6 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Transforms/Utils/PromoteMemToReg.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/STLExtras.h" @@ -23,6 +22,7 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AliasSetTracker.h" +#include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/IteratedDominanceFrontier.h" #include "llvm/Analysis/ValueTracking.h" @@ -38,6 +38,7 @@ #include "llvm/IR/Metadata.h" #include "llvm/IR/Module.h" #include "llvm/Transforms/Utils/Local.h" +#include "llvm/Transforms/Utils/PromoteMemToReg.h" #include using namespace llvm; @@ -301,6 +302,18 @@ struct PromoteMem2Reg { } // end of anonymous namespace +/// Given a LoadInst LI this adds assume(LI != null) after it. +static void addAssumeNonNull(AssumptionCache *AC, LoadInst *LI) { + Function *AssumeIntrinsic = + Intrinsic::getDeclaration(LI->getModule(), Intrinsic::assume); + ICmpInst *LoadNotNull = new ICmpInst(ICmpInst::ICMP_NE, LI, + Constant::getNullValue(LI->getType())); + LoadNotNull->insertAfter(LI); + CallInst *CI = CallInst::Create(AssumeIntrinsic, {LoadNotNull}); + CI->insertAfter(LoadNotNull); + AC->registerAssumption(CI); +} + static void removeLifetimeIntrinsicUsers(AllocaInst *AI) { // Knowing that this alloca is promotable, we know that it's safe to kill all // instructions except for load and store. @@ -334,9 +347,9 @@ static void removeLifetimeIntrinsicUsers(AllocaInst *AI) { /// and thus must be phi-ed with undef. We fall back to the standard alloca /// promotion algorithm in that case. static bool rewriteSingleStoreAlloca(AllocaInst *AI, AllocaInfo &Info, - LargeBlockInfo &LBI, - DominatorTree &DT, - AliasSetTracker *AST) { + LargeBlockInfo &LBI, DominatorTree &DT, + AliasSetTracker *AST, + AssumptionCache *AC) { StoreInst *OnlyStore = Info.OnlyStore; bool StoringGlobalVal = !isa(OnlyStore->getOperand(0)); BasicBlock *StoreBB = OnlyStore->getParent(); @@ -387,6 +400,14 @@ static bool rewriteSingleStoreAlloca(AllocaInst *AI, AllocaInfo &Info, // code. if (ReplVal == LI) ReplVal = UndefValue::get(LI->getType()); + + // If the load was marked as nonnull we don't want to lose + // that information when we erase this Load. So we preserve + // it with an assume. + if (AC && LI->getMetadata(LLVMContext::MD_nonnull) && + !llvm::isKnownNonNullAt(ReplVal, LI, &DT)) + addAssumeNonNull(AC, LI); + LI->replaceAllUsesWith(ReplVal); if (AST && LI->getType()->isPointerTy()) AST->deleteValue(LI); @@ -435,7 +456,9 @@ static bool rewriteSingleStoreAlloca(AllocaInst *AI, AllocaInfo &Info, /// } static bool promoteSingleBlockAlloca(AllocaInst *AI, const AllocaInfo &Info, LargeBlockInfo &LBI, - AliasSetTracker *AST) { + AliasSetTracker *AST, + DominatorTree &DT, + AssumptionCache *AC) { // The trickiest case to handle is when we have large blocks. Because of this, // this code is optimized assuming that large blocks happen. This does not // significantly pessimize the small block case. This uses LargeBlockInfo to @@ -476,10 +499,17 @@ static bool promoteSingleBlockAlloca(AllocaInst *AI, const AllocaInfo &Info, // There is no store before this load, bail out (load may be affected // by the following stores - see main comment). return false; - } - else + } else { // Otherwise, there was a store before this load, the load takes its value. - LI->replaceAllUsesWith(std::prev(I)->second->getOperand(0)); + // Note, if the load was marked as nonnull we don't want to lose that + // information when we erase it. So we preserve it with an assume. + Value *ReplVal = std::prev(I)->second->getOperand(0); + if (AC && LI->getMetadata(LLVMContext::MD_nonnull) && + !llvm::isKnownNonNullAt(ReplVal, LI, &DT)) + addAssumeNonNull(AC, LI); + + LI->replaceAllUsesWith(ReplVal); + } if (AST && LI->getType()->isPointerTy()) AST->deleteValue(LI); @@ -553,7 +583,7 @@ void PromoteMem2Reg::run() { // If there is only a single store to this value, replace any loads of // it that are directly dominated by the definition with the value stored. if (Info.DefiningBlocks.size() == 1) { - if (rewriteSingleStoreAlloca(AI, Info, LBI, DT, AST)) { + if (rewriteSingleStoreAlloca(AI, Info, LBI, DT, AST, AC)) { // The alloca has been processed, move on. RemoveFromAllocasList(AllocaNum); ++NumSingleStore; @@ -564,7 +594,7 @@ void PromoteMem2Reg::run() { // If the alloca is only read and written in one basic block, just perform a // linear sweep over the block to eliminate it. if (Info.OnlyUsedInOneBlock && - promoteSingleBlockAlloca(AI, Info, LBI, AST)) { + promoteSingleBlockAlloca(AI, Info, LBI, AST, DT, AC)) { // The alloca has been processed, move on. RemoveFromAllocasList(AllocaNum); continue; @@ -938,6 +968,13 @@ void PromoteMem2Reg::RenamePass(BasicBlock *BB, BasicBlock *Pred, Value *V = IncomingVals[AI->second]; + // If the load was marked as nonnull we don't want to lose + // that information when we erase this Load. So we preserve + // it with an assume. + if (AC && LI->getMetadata(LLVMContext::MD_nonnull) && + !llvm::isKnownNonNullAt(V, LI, &DT)) + addAssumeNonNull(AC, LI); + // Anything using the load now uses the current value. LI->replaceAllUsesWith(V); if (AST && LI->getType()->isPointerTy()) diff --git a/test/Transforms/Mem2Reg/preserve-nonnull-load-metadata.ll b/test/Transforms/Mem2Reg/preserve-nonnull-load-metadata.ll new file mode 100644 index 000000000000..33a5b124c555 --- /dev/null +++ b/test/Transforms/Mem2Reg/preserve-nonnull-load-metadata.ll @@ -0,0 +1,89 @@ +; RUN: opt < %s -mem2reg -S | FileCheck %s + +; This tests that mem2reg preserves the !nonnull metadata on loads +; from allocas that get optimized out. + +; Check the case where the alloca in question has a single store. +define float* @single_store(float** %arg) { +; CHECK-LABEL: define float* @single_store +; CHECK: %arg.load = load float*, float** %arg, align 8 +; CHECK: [[ASSUME:%(.*)]] = icmp ne float* %arg.load, null +; CHECK: call void @llvm.assume(i1 {{.*}}[[ASSUME]]) +; CHECK: ret float* %arg.load +entry: + %buf = alloca float* + %arg.load = load float*, float** %arg, align 8 + store float* %arg.load, float** %buf, align 8 + %buf.load = load float*, float **%buf, !nonnull !0 + ret float* %buf.load +} + +; Check the case where the alloca in question has more than one +; store but still within one basic block. +define float* @single_block(float** %arg) { +; CHECK-LABEL: define float* @single_block +; CHECK: %arg.load = load float*, float** %arg, align 8 +; CHECK: [[ASSUME:%(.*)]] = icmp ne float* %arg.load, null +; CHECK: call void @llvm.assume(i1 {{.*}}[[ASSUME]]) +; CHECK: ret float* %arg.load +entry: + %buf = alloca float* + %arg.load = load float*, float** %arg, align 8 + store float* null, float** %buf, align 8 + store float* %arg.load, float** %buf, align 8 + %buf.load = load float*, float **%buf, !nonnull !0 + ret float* %buf.load +} + +; Check the case where the alloca in question has more than one +; store and also reads ands writes in multiple blocks. +define float* @multi_block(float** %arg) { +; CHECK-LABEL: define float* @multi_block +; CHECK-LABEL: entry: +; CHECK: %arg.load = load float*, float** %arg, align 8 +; CHECK: br label %next +; CHECK-LABEL: next: +; CHECK: [[ASSUME:%(.*)]] = icmp ne float* %arg.load, null +; CHECK: call void @llvm.assume(i1 {{.*}}[[ASSUME]]) +; CHECK: ret float* %arg.load +entry: + %buf = alloca float* + %arg.load = load float*, float** %arg, align 8 + store float* null, float** %buf, align 8 + br label %next +next: + store float* %arg.load, float** %buf, align 8 + %buf.load = load float*, float** %buf, !nonnull !0 + ret float* %buf.load +} + +; Check that we don't add an assume if it's not +; necessary i.e. the value is already implied to be nonnull +define float* @no_assume(float** %arg) { +; CHECK-LABEL: define float* @no_assume +; CHECK-LABEL: entry: +; CHECK: %arg.load = load float*, float** %arg, align 8 +; CHECK: %cn = icmp ne float* %arg.load, null +; CHECK: br i1 %cn, label %next, label %fin +; CHECK-LABEL: next: +; CHECK-NOT: call void @llvm.assume +; CHECK: ret float* %arg.load +; CHECK-LABEL: fin: +; CHECK: ret float* null +entry: + %buf = alloca float* + %arg.load = load float*, float** %arg, align 8 + %cn = icmp ne float* %arg.load, null + br i1 %cn, label %next, label %fin +next: +; At this point the above nonnull check ensures that +; the value %arg.load is nonnull in this block and thus +; we need not add the assume. + store float* %arg.load, float** %buf, align 8 + %buf.load = load float*, float** %buf, !nonnull !0 + ret float* %buf.load +fin: + ret float* null +} + +!0 = !{} diff --git a/test/Transforms/SROA/preserve-nonnull.ll b/test/Transforms/SROA/preserve-nonnull.ll new file mode 100644 index 000000000000..fc5ce6a445fa --- /dev/null +++ b/test/Transforms/SROA/preserve-nonnull.ll @@ -0,0 +1,26 @@ +; RUN: opt < %s -sroa -S | FileCheck %s +; +; Make sure that SROA doesn't lose nonnull metadata +; on loads from allocas that get optimized out. + +; CHECK-LABEL: define float* @yummy_nonnull +; CHECK: [[RETURN:%(.*)]] = load float*, float** %arg, align 8 +; CHECK: [[ASSUME:%(.*)]] = icmp ne float* {{.*}}[[RETURN]], null +; CHECK: call void @llvm.assume(i1 {{.*}}[[ASSUME]]) +; CHECK: ret float* {{.*}}[[RETURN]] + +define float* @yummy_nonnull(float** %arg) { +entry-block: + %buf = alloca float* + + %_arg_i8 = bitcast float** %arg to i8* + %_buf_i8 = bitcast float** %buf to i8* + call void @llvm.memcpy.p0i8.p0i8.i64(i8* %_buf_i8, i8* %_arg_i8, i64 8, i32 8, i1 false) + + %ret = load float*, float** %buf, align 8, !nonnull !0 + ret float* %ret +} + +declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i32, i1) + +!0 = !{}