diff --git a/clang/lib/Driver/ToolChains/AIE.cpp b/clang/lib/Driver/ToolChains/AIE.cpp index da081d7eb63d..bde8d897c335 100644 --- a/clang/lib/Driver/ToolChains/AIE.cpp +++ b/clang/lib/Driver/ToolChains/AIE.cpp @@ -184,6 +184,9 @@ void AIEToolChain::addClangTargetOptions( // Extend the max limit of the search depth in BasicAA CC1Args.append({"-mllvm", "-basic-aa-max-lookup-search-depth=10"}); + + // Enable Loop Iteration Count Assumptions + CC1Args.append({"-mllvm", "-enable-loop-iter-count-assumptions=true"}); } // Avoid using newer dwarf versions, as the simulator doesn't understand newer diff --git a/clang/test/CodeGen/aie/peel-itercount.c b/clang/test/CodeGen/aie/peel-itercount.c index ed2ceecc6456..4239c1f3ac36 100644 --- a/clang/test/CodeGen/aie/peel-itercount.c +++ b/clang/test/CodeGen/aie/peel-itercount.c @@ -14,8 +14,9 @@ // and that the itercounts have been updated appropriately // CHECK-LABEL: loop28_37 -// CHECK: for.body.preheader: -// CHECK: for.body.peel.next: +// CHECK: for.body.peel.next6: +// CHECK: for.cond.cleanup: +// CHECK: for.body: // CHECK: !llvm.loop !6 // CHECK: !6 = distinct !{!6, !7, !8, !9} // CHECK: !7 = !{!"llvm.loop.peeled.count", i32 2} diff --git a/llvm/include/llvm/Transforms/Utils/LoopIterCountAssumptions.h b/llvm/include/llvm/Transforms/Utils/LoopIterCountAssumptions.h new file mode 100644 index 000000000000..fd70abd3d2f6 --- /dev/null +++ b/llvm/include/llvm/Transforms/Utils/LoopIterCountAssumptions.h @@ -0,0 +1,35 @@ +//===-- LoopIterCountAssumptions.h - Add loop assumptions -------*- C++ -*-===// +// +// This file is licensed under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +// (c) Copyright 2024 Advanced Micro Devices, Inc. or its affiliates +// +//===----------------------------------------------------------------------===// +// +// This pass converts Loop Iteration Count Metadata to Assumptions which can be +// picked up by Loop Rotate to remove Loop Guards. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TRANSFORMS_UTILS_LOOPITERCOUNTASSUMPTIONS_H +#define LLVM_TRANSFORMS_UTILS_LOOPITERCOUNTASSUMPTIONS_H +#include "llvm/IR/PassManager.h" +#include "llvm/Passes/PassBuilder.h" + +namespace llvm { + +class Loop; +/// Converts Loop Iteration Count Metadata to Assumptions. +class LoopIterCountAssumptions + : public PassInfoMixin { + +public: + PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM, + LoopStandardAnalysisResults &AR, LPMUpdater &U); +}; + +} // namespace llvm + +#endif // LLVM_TRANSFORMS_UTILS_LOOPITERCOUNTASSUMPTIONS_H diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp index 8d408ca2363a..f51dbbcc59fd 100644 --- a/llvm/lib/Passes/PassBuilder.cpp +++ b/llvm/lib/Passes/PassBuilder.cpp @@ -4,6 +4,9 @@ // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // +// Modifications (c) Copyright 2024 Advanced Micro Devices, Inc. or its +// affiliates +// //===----------------------------------------------------------------------===// /// \file /// @@ -274,6 +277,7 @@ #include "llvm/Transforms/Utils/InstructionNamer.h" #include "llvm/Transforms/Utils/LCSSA.h" #include "llvm/Transforms/Utils/LibCallsShrinkWrap.h" +#include "llvm/Transforms/Utils/LoopIterCountAssumptions.h" #include "llvm/Transforms/Utils/LoopSimplify.h" #include "llvm/Transforms/Utils/LoopVersioning.h" #include "llvm/Transforms/Utils/LowerGlobalDtors.h" diff --git a/llvm/lib/Passes/PassBuilderPipelines.cpp b/llvm/lib/Passes/PassBuilderPipelines.cpp index 744e25808aa0..a357efc040b9 100644 --- a/llvm/lib/Passes/PassBuilderPipelines.cpp +++ b/llvm/lib/Passes/PassBuilderPipelines.cpp @@ -132,6 +132,7 @@ #include "llvm/Transforms/Utils/CountVisits.h" #include "llvm/Transforms/Utils/InjectTLIMappings.h" #include "llvm/Transforms/Utils/LibCallsShrinkWrap.h" +#include "llvm/Transforms/Utils/LoopIterCountAssumptions.h" #include "llvm/Transforms/Utils/Mem2Reg.h" #include "llvm/Transforms/Utils/MoveAutoInit.h" #include "llvm/Transforms/Utils/NameAnonGlobals.h" @@ -306,6 +307,11 @@ static cl::opt UseLoopVersioningLICM( "enable-loop-versioning-licm", cl::init(false), cl::Hidden, cl::desc("Enable the experimental Loop Versioning LICM pass")); +static cl::opt EnableLoopIterCountToAssumptions( + "enable-loop-iter-count-assumptions", cl::Hidden, cl::init(false), + cl::desc( + "Enable Conversion of Loop Iteration Count Metadata to Assumptions.")); + namespace llvm { extern cl::opt EnableMemProfContextDisambiguation; @@ -463,6 +469,9 @@ PassBuilder::buildO1FunctionSimplificationPipeline(OptimizationLevel Level, LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap, /*AllowSpeculation=*/false)); + if (EnableLoopIterCountToAssumptions) + LPM1.addPass(LoopIterCountAssumptions()); + LPM1.addPass(LoopRotatePass(/* Disable header duplication */ true, isLTOPreLink(Phase))); // TODO: Investigate promotion cap for O1. @@ -644,6 +653,9 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level, LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap, /*AllowSpeculation=*/false)); + if (EnableLoopIterCountToAssumptions) + LPM1.addPass(LoopIterCountAssumptions()); + // Disable header duplication in loop rotation at -Oz. LPM1.addPass(LoopRotatePass(EnableLoopHeaderDuplication || Level != OptimizationLevel::Oz, diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def index 2fbc7f7d88ba..9d534fdb2297 100644 --- a/llvm/lib/Passes/PassRegistry.def +++ b/llvm/lib/Passes/PassRegistry.def @@ -4,6 +4,9 @@ // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // +// Modifications (c) Copyright 2024 Advanced Micro Devices, Inc. or its +// affiliates +// //===----------------------------------------------------------------------===// // // This file is used as the registry of passes that are part of the core LLVM @@ -604,6 +607,7 @@ LOOP_PASS("loop-bound-split", LoopBoundSplitPass()) LOOP_PASS("loop-deletion", LoopDeletionPass()) LOOP_PASS("loop-idiom", LoopIdiomRecognizePass()) LOOP_PASS("loop-instsimplify", LoopInstSimplifyPass()) +LOOP_PASS("loop-iter-count-assumptions", LoopIterCountAssumptions()) LOOP_PASS("loop-predication", LoopPredicationPass()) LOOP_PASS("loop-reduce", LoopStrengthReducePass()) LOOP_PASS("loop-simplifycfg", LoopSimplifyCFGPass()) diff --git a/llvm/lib/Transforms/Utils/CMakeLists.txt b/llvm/lib/Transforms/Utils/CMakeLists.txt index 51e8821773c3..b089868e8298 100644 --- a/llvm/lib/Transforms/Utils/CMakeLists.txt +++ b/llvm/lib/Transforms/Utils/CMakeLists.txt @@ -1,3 +1,11 @@ +# +# This file is licensed under the Apache License v2.0 with LLVM Exceptions. +# See https://llvm.org/LICENSE.txt for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +# +# Modifications (c) Copyright 2024 Advanced Micro Devices, Inc. or its +# affiliates +# add_llvm_component_library(LLVMTransformUtils AddDiscriminators.cpp AMDGPUEmitPrintf.cpp @@ -39,6 +47,7 @@ add_llvm_component_library(LLVMTransformUtils LibCallsShrinkWrap.cpp Local.cpp LoopConstrainer.cpp + LoopIterCountAssumptions.cpp LoopPeel.cpp LoopRotationUtils.cpp LoopSimplify.cpp diff --git a/llvm/lib/Transforms/Utils/LoopIterCountAssumptions.cpp b/llvm/lib/Transforms/Utils/LoopIterCountAssumptions.cpp new file mode 100644 index 000000000000..9118793df91a --- /dev/null +++ b/llvm/lib/Transforms/Utils/LoopIterCountAssumptions.cpp @@ -0,0 +1,251 @@ +//===-- LoopIterCountAssumptions.cpp - add Loop assumptions -----*- C++ -*-===// +// +// This file is licensed under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +// (c) Copyright 2024 Advanced Micro Devices, Inc. or its affiliates +// +//===----------------------------------------------------------------------===// +// +// This pass converts Loop Iteration Count Metadata to Assumptions which can be +// picked up by Loop Rotate to remove Loop Guards. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Utils/LoopIterCountAssumptions.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/AssumptionCache.h" +#include "llvm/Analysis/MemorySSA.h" +#include "llvm/Analysis/MemorySSAUpdater.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/Value.h" +#include "llvm/Passes/PassBuilder.h" +#include "llvm/Support/Casting.h" +#include "llvm/Transforms/Scalar/LICM.h" +#include "llvm/Transforms/Utils/LoopUtils.h" +#include "llvm/Transforms/Utils/ScalarEvolutionExpander.h" + +#define DEBUG_TYPE "loop-iter-count-assumptions" + +using namespace llvm; + +namespace { + +std::string getFunctionAndBlockNames(const BasicBlock &BB) { + return BB.getParent()->getName().str() + " " + BB.getName().str(); +} + +/// Return the Branch Compare Instruction of CurrentLoop if the Loop is well +/// formed and this pass can process the Predicate +ICmpInst *getLoopCmpInst(const Loop &CurrentLoop) { + + if (CurrentLoop.isRotatedForm()) { + LLVM_DEBUG(dbgs() << "Loop already in rotated form. Will not add Loop " + "Iteration Count assumptions.\n"); + return nullptr; + } + + /// Check that the loop has a single Exiting Block. If the CurrentLoop + /// has multiple Exiting Blocks, ExitBB will be a nullptr + auto *ExitBB = CurrentLoop.getExitingBlock(); + if (!ExitBB) + return nullptr; + + BranchInst *BI = dyn_cast(ExitBB->getTerminator()); + if (!BI) + return nullptr; + + ICmpInst *LoopCmpInstr = dyn_cast(BI->getCondition()); + if (!LoopCmpInstr) + return nullptr; + + LLVM_DEBUG(dbgs() << "Condition Found: " << *LoopCmpInstr << "\n"); + return LoopCmpInstr; +} + +bool hasVariableStepSize(Value &Op, ScalarEvolution &SE) { + const SCEVAddRecExpr *AddRec = + dyn_cast_or_null(SE.getSCEV(&Op)); + if (!AddRec) + return false; + + const SCEV *StepSize = AddRec->getStepRecurrence(SE); + return !isa(StepSize); +} + +/// Return the AddRecExpr evaluated at Iteration \p IterCount if an +/// AddRecExpr can be extracted, otherwise return loop invariant Value of \p Op +Value *expandValueAtIteration(Value *Op, ScalarEvolution &SE, + SCEVExpander &Expander, + Instruction *InsertionPoint, Loop *CurrentLoop, + int64_t IterCount) { + const SCEVAddRecExpr *AddRec = + dyn_cast_or_null(SE.getSCEV(Op)); + if (AddRec) { + const SCEV *IterSCEV = + AddRec->evaluateAtIteration(SE.getConstant(APInt(32, IterCount)), SE); + + // Copy Overflow Flags to SCEV + SCEV::NoWrapFlags NWF = AddRec->getNoWrapFlags( + SCEV::NoWrapFlags(/*Mask=*/SCEV::FlagNUW | SCEV::FlagNSW)); + + // IterSCEV can either be an AddExpr or simplify to a MulExpr + // (in the case of zero offset and a variable stepsize), therefore assign + // Overflow Flags to every CommutativeExpr that will be generated from the + // AddRecExpr evaluation + // If IterSCEV can be evaluated to a constant, no need to add a Flag + auto *CE = dyn_cast(const_cast(IterSCEV)); + if (CE && NWF) { + CE->setNoWrapFlags(NWF); + IterSCEV = dyn_cast(CE); + } + + if (!Expander.isSafeToExpand(IterSCEV)) { + LLVM_DEBUG(dbgs() << "LoopIterCountAssumptions-Warning: Cannot Expand " + "Iteration Scalar Evolution" + << *IterSCEV << "\n"); + return nullptr; + } + return Expander.expandCodeFor(IterSCEV, Op->getType(), InsertionPoint); + } + + LLVM_DEBUG(dbgs() << "Could not extract AddRecExpr, will try to get loop " + "invariant Value of " + << *Op << "\n"); + + if (CurrentLoop->isLoopInvariant(Op)) + return Op; + + LLVM_DEBUG(dbgs() << "Operand is loop variant " << *Op << "\n"); + return nullptr; +} + +/// Try to create an assumption into the Loop PreHeader, that at iteration +/// \p IterCount the condition is true +void tryInsertIterationAssumption(ICmpInst &LoopCmpInstr, Loop &CurrentLoop, + int64_t IterCount, ScalarEvolution &SE, + AssumptionCache &AC) { + + if (!CurrentLoop.getLoopPreheader()) { + LLVM_DEBUG(dbgs() << "LoopIterCountAssumptions-Warning: Loop has no " + "preheader, will not insert Assumption!\n"); + return; + } + + Instruction *InsertionPoint = CurrentLoop.getLoopPreheader()->getTerminator(); + LLVM_DEBUG(dbgs() << "Inserting Assumption with IterCount " << IterCount + << " before: " << *InsertionPoint << "\n"); + + // LoopRotate uses SimplifyQuery to determine, if a Branch is conditional or + // not. SimplifyQuery can only take an Assumption into account, if it is + // before the to-be-evaluated Compare Instruction. Here they are inserted into + // the Preheader, so that the assumption is only valid once and not on every + // entry of the Loop Header. + IRBuilder<> Builder(dyn_cast(InsertionPoint)); + + SCEVExpander Expander( + SE, CurrentLoop.getLoopPreheader()->getModule()->getDataLayout(), + "expanded"); + + Value *LHS = expandValueAtIteration(LoopCmpInstr.getOperand(0), SE, Expander, + InsertionPoint, &CurrentLoop, IterCount); + if (!LHS) + return; + LLVM_DEBUG(dbgs() << "LHS = " << *LHS << "\n"); + + Value *RHS = expandValueAtIteration(LoopCmpInstr.getOperand(1), SE, Expander, + InsertionPoint, &CurrentLoop, IterCount); + + if (!RHS) + return; + LLVM_DEBUG(dbgs() << "RHS = " << *RHS << "\n"); + + // If the false-branch-target is to the Loop Body, inverse the + // predicate, since the Loop Condition is inversed to remain in the Loop + CmpInst::Predicate Pred = LoopCmpInstr.getPredicate(); + if (!CurrentLoop.contains( + dyn_cast(CurrentLoop.getExitingBlock()->getTerminator()) + ->getSuccessor(0))) + Pred = LoopCmpInstr.getInversePredicate(); + + Value *Cmp = Builder.CreateICmp(Pred, LHS, RHS); + + // Insert Assumption + CallInst *Assumption = Builder.CreateAssumption(Cmp); + AC.registerAssumption(dyn_cast(Assumption)); + LLVM_DEBUG(dbgs() << "With Comparator :" << *Cmp << "\n" + << "Assume :" << *Assumption << "\n"); +} + +/// Determine if the \param CurrentLoop is not rotated yet and Loop Iteration +/// Count Metadata is greater than 0. \return Minimum Iteration Count of the +/// Loop +std::optional getValidMinIterCount(Loop &CurrentLoop) { + BasicBlock *LoopHeader = CurrentLoop.getHeader(); + + // Dump loop summary + LLVM_DEBUG(if (CurrentLoop.getLoopPreheader()) { + dbgs() << "Preheader:" << CurrentLoop.getLoopPreheader()->getName() << "\n"; + } dbgs() << "LoopIterCountAssumption-Info: Function = " + << getFunctionAndBlockNames(*LoopHeader) << "\n"); + + std::optional RawMinIterationCount = getMinTripCount(&CurrentLoop); + if (!RawMinIterationCount) { + LLVM_DEBUG(dbgs() << "LoopIterCountAssumptions: Loop Iteration " + "Count not provided for " + << getFunctionAndBlockNames(*LoopHeader) << "\n"); + return std::nullopt; + } + + const int64_t MinIterCount = *RawMinIterationCount; + if (MinIterCount <= 0) { + LLVM_DEBUG(dbgs() << "LoopIterCountAssumptions-Warning: Loop Iteration " + "Count is smaller or equal to zero for " + << getFunctionAndBlockNames(*LoopHeader) << "\n"); + return std::nullopt; + } + + LLVM_DEBUG(dbgs() << "Processing Loop Iteration Count Metadata: " + << getFunctionAndBlockNames(*LoopHeader) << " (" + << MinIterCount << ")\n"); + return MinIterCount; +} + +void tryInsertIterationAssumptions(ICmpInst &LoopCmpInstr, Loop &CurrentLoop, + int64_t IterCount, + LoopStandardAnalysisResults &AR) { + const bool ContainsEqualPredicate = + LoopCmpInstr.getPredicate() == CmpInst::ICMP_EQ || + LoopCmpInstr.getPredicate() == CmpInst::ICMP_NE; + + // Guarantee that the Loop will execute at least once, to handle variable + // StepSizes and EQ/NE predicates + if (hasVariableStepSize(*LoopCmpInstr.getOperand(0), AR.SE) || + hasVariableStepSize(*LoopCmpInstr.getOperand(1), AR.SE) || + ContainsEqualPredicate) + tryInsertIterationAssumption(LoopCmpInstr, CurrentLoop, 0, AR.SE, AR.AC); + + // Insert Assumption evaluated at IterCount - 1 to prohibit Loop Unrolling + // from inserting a Loop Guard + tryInsertIterationAssumption(LoopCmpInstr, CurrentLoop, IterCount - 1, AR.SE, + AR.AC); +} + +} // namespace + +PreservedAnalyses LoopIterCountAssumptions::run(Loop &CurrentLoop, + LoopAnalysisManager &AM, + LoopStandardAnalysisResults &AR, + LPMUpdater &U) { + std::optional MinIterCount = getValidMinIterCount(CurrentLoop); + if (!MinIterCount) + return PreservedAnalyses::all(); + + ICmpInst *LoopCmpInstr = getLoopCmpInst(CurrentLoop); + if (!LoopCmpInstr) + return PreservedAnalyses::all(); + + tryInsertIterationAssumptions(*LoopCmpInstr, CurrentLoop, *MinIterCount, AR); + return PreservedAnalyses::all(); +} diff --git a/llvm/test/Transforms/Util/loop-iter-count-assumptions.ll b/llvm/test/Transforms/Util/loop-iter-count-assumptions.ll new file mode 100644 index 000000000000..adec592beaf7 --- /dev/null +++ b/llvm/test/Transforms/Util/loop-iter-count-assumptions.ll @@ -0,0 +1,1306 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; +; This file is licensed under the Apache License v2.0 with LLVM Exceptions. +; See https://llvm.org/LICENSE.txt for license information. +; SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +; +; (c) Copyright 2024 Advanced Micro Devices, Inc. or its affiliates +; unit test for the loop iteration metadata conversion to assumptions +; +; NOTE: Example file for converting loop iter count to assumptions in the Loop +; +; RUN: opt -S -passes='loop-mssa(licm,loop-iter-count-assumptions)' -verify-each < %s | FileCheck %s + + +; for loop where loop counter is incremented by +1 and starts at m +; for(int i=m; i 0) { +; for (i =0; i < n ; i++) +; } +; Function Attrs: mustprogress nounwind +define dso_local void @guardedLoop(ptr %ptr, i32 noundef %n) #0 { +; CHECK-LABEL: @guardedLoop( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[N:%.*]], 0 +; CHECK-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]] +; CHECK: if.then: +; CHECK-NEXT: [[TMP0:%.*]] = icmp slt i32 3, [[N]] +; CHECK-NEXT: call void @llvm.assume(i1 [[TMP0]]) +; CHECK-NEXT: br label [[FOR_COND:%.*]] +; CHECK: for.cond: +; CHECK-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[IF_THEN]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] +; CHECK-NEXT: [[CMP1:%.*]] = icmp slt i32 [[I_0]], [[N]] +; CHECK-NEXT: br i1 [[CMP1]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]] +; CHECK: for.cond.cleanup: +; CHECK-NEXT: br label [[IF_END]] +; CHECK: for.body: +; CHECK-NEXT: [[INC]] = add nsw i32 [[I_0]], 1 +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[PTR:%.*]], i32 [[I_0]] +; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], 8 +; CHECK-NEXT: store i32 [[ADD]], ptr [[ARRAYIDX]], align 4 +; CHECK-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP0]] +; CHECK: if.end: +; CHECK-NEXT: ret void +; +entry: + %cmp = icmp sgt i32 %n, 0 + br i1 %cmp, label %if.then, label %if.end + +if.then: ; preds = %entry + br label %for.cond + +for.cond: ; preds = %for.body, %if.then + %i.0 = phi i32 [ 0, %if.then ], [ %inc, %for.body ] + %cmp1 = icmp slt i32 %i.0, %n + br i1 %cmp1, label %for.body, label %for.cond.cleanup + +for.cond.cleanup: ; preds = %for.cond + br label %if.end + +for.body: ; preds = %for.cond + %inc = add nsw i32 %i.0, 1 + %arrayidx = getelementptr inbounds i32, ptr %ptr, i32 %i.0 + %1 = load i32, ptr %arrayidx, align 4 + %add = add nsw i32 %1, 8 + store i32 %add, ptr %arrayidx, align 4 + br label %for.cond, !llvm.loop !6 + +if.end: ; preds = %for.cond.cleanup, %entry + ret void +} + +; multiple exiting blocks +; for (i = start_val; i < upper_b; ++i) { +; if (i > bound) break; Function Attrs: mustprogress noinline nounwind optnone uwtable +define dso_local void @multipleExits(ptr noundef %ptr, i32 noundef %n, i32 noundef %Bound) #0 { +; CHECK-LABEL: @multipleExits( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[PTR_ADDR:%.*]] = alloca ptr, align 8 +; CHECK-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[BOUND_ADDR:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[I:%.*]] = alloca i32, align 4 +; CHECK-NEXT: store ptr [[PTR:%.*]], ptr [[PTR_ADDR]], align 8 +; CHECK-NEXT: store i32 [[N:%.*]], ptr [[N_ADDR]], align 4 +; CHECK-NEXT: store i32 [[BOUND:%.*]], ptr [[BOUND_ADDR]], align 4 +; CHECK-NEXT: store i32 0, ptr [[I]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[N_ADDR]], align 4 +; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[BOUND_ADDR]], align 4 +; CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[PTR_ADDR]], align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[PTR_ADDR]], align 8 +; CHECK-NEXT: [[I_PROMOTED:%.*]] = load i32, ptr [[I]], align 4 +; CHECK-NEXT: br label [[FOR_COND:%.*]] +; CHECK: for.cond: +; CHECK-NEXT: [[INC1:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_INC:%.*]] ], [ [[I_PROMOTED]], [[ENTRY:%.*]] ] +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[INC1]], [[TMP0]] +; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END_LOOPEXIT:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[CMP1:%.*]] = icmp sgt i32 [[INC1]], [[TMP1]] +; CHECK-NEXT: br i1 [[CMP1]], label [[IF_THEN:%.*]], label [[IF_END:%.*]] +; CHECK: if.then: +; CHECK-NEXT: [[INC1_LCSSA2:%.*]] = phi i32 [ [[INC1]], [[FOR_BODY]] ] +; CHECK-NEXT: store i32 [[INC1_LCSSA2]], ptr [[I]], align 4 +; CHECK-NEXT: br label [[FOR_END:%.*]] +; CHECK: if.end: +; CHECK-NEXT: [[IDXPROM:%.*]] = sext i32 [[INC1]] to i64 +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 [[IDXPROM]] +; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP4]], 8 +; CHECK-NEXT: [[IDXPROM2:%.*]] = sext i32 [[INC1]] to i64 +; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i64 [[IDXPROM2]] +; CHECK-NEXT: store i32 [[ADD]], ptr [[ARRAYIDX3]], align 4 +; CHECK-NEXT: br label [[FOR_INC]] +; CHECK: for.inc: +; CHECK-NEXT: [[INC]] = add nsw i32 [[INC1]], 1 +; CHECK-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP0]] +; CHECK: for.end.loopexit: +; CHECK-NEXT: [[INC1_LCSSA:%.*]] = phi i32 [ [[INC1]], [[FOR_COND]] ] +; CHECK-NEXT: store i32 [[INC1_LCSSA]], ptr [[I]], align 4 +; CHECK-NEXT: br label [[FOR_END]] +; CHECK: for.end: +; CHECK-NEXT: ret void +; +entry: + %ptr.addr = alloca ptr, align 8 + %n.addr = alloca i32, align 4 + %Bound.addr = alloca i32, align 4 + %i = alloca i32, align 4 + store ptr %ptr, ptr %ptr.addr, align 8 + store i32 %n, ptr %n.addr, align 4 + store i32 %Bound, ptr %Bound.addr, align 4 + store i32 0, ptr %i, align 4 + br label %for.cond + +for.cond: ; preds = %for.inc, %entry + %0 = load i32, ptr %i, align 4 + %1 = load i32, ptr %n.addr, align 4 + %cmp = icmp slt i32 %0, %1 + br i1 %cmp, label %for.body, label %for.end + +for.body: ; preds = %for.cond + %2 = load i32, ptr %i, align 4 + %3 = load i32, ptr %Bound.addr, align 4 + %cmp1 = icmp sgt i32 %2, %3 + br i1 %cmp1, label %if.then, label %if.end + +if.then: ; preds = %for.body + br label %for.end + +if.end: ; preds = %for.body + %4 = load ptr, ptr %ptr.addr, align 8 + %5 = load i32, ptr %i, align 4 + %idxprom = sext i32 %5 to i64 + %arrayidx = getelementptr inbounds i32, ptr %4, i64 %idxprom + %6 = load i32, ptr %arrayidx, align 4 + %add = add nsw i32 %6, 8 + %7 = load ptr, ptr %ptr.addr, align 8 + %8 = load i32, ptr %i, align 4 + %idxprom2 = sext i32 %8 to i64 + %arrayidx3 = getelementptr inbounds i32, ptr %7, i64 %idxprom2 + store i32 %add, ptr %arrayidx3, align 4 + br label %for.inc + +for.inc: ; preds = %if.end + %9 = load i32, ptr %i, align 4 + %inc = add nsw i32 %9, 1 + store i32 %inc, ptr %i, align 4 + br label %for.cond, !llvm.loop !6 + +for.end: ; preds = %if.then, %for.cond + ret void +} + +; multiple latches +; Function Attrs: mustprogress nofree norecurse nosync nounwind memory(argmem: readwrite, inaccessiblemem: write) +define dso_local void @multipleLatches(ptr nocapture %ptr, i32 noundef %n, i32 noundef %abort) local_unnamed_addr { +; CHECK-LABEL: @multipleLatches( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = icmp slt i32 3, [[N:%.*]] +; CHECK-NEXT: call void @llvm.assume(i1 [[TMP0]]) +; CHECK-NEXT: br label [[FOR_COND_OUTER:%.*]] +; CHECK: for.cond.outer: +; CHECK-NEXT: [[I_08_PH:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_INC:%.*]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[CMP1:%.*]] = icmp slt i32 [[I_08_PH]], [[N]] +; CHECK-NEXT: [[TMP1:%.*]] = trunc i32 [[I_08_PH]] to i20 +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[PTR:%.*]], i20 [[TMP1]] +; CHECK-NEXT: [[CONDIF:%.*]] = icmp ult i32 [[I_08_PH]], 3 +; CHECK-NEXT: br label [[FOR_COND:%.*]] +; CHECK: for.cond: +; CHECK-NEXT: br i1 [[CMP1]], label [[FOR_BODY:%.*]], label [[FOR_COND_CLEANUP:%.*]] +; CHECK: for.cond.cleanup: +; CHECK-NEXT: ret void +; CHECK: for.body: +; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP2]], 8 +; CHECK-NEXT: store i32 [[ADD]], ptr [[ARRAYIDX]], align 4 +; CHECK-NEXT: br i1 [[CONDIF]], label [[FOR_INC]], label [[FOR_COND]], !llvm.loop [[LOOP0]] +; CHECK: for.inc: +; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[I_08_PH]], 1 +; CHECK-NEXT: br label [[FOR_COND_OUTER]], !llvm.loop [[LOOP0]] +; +entry: + br label %for.cond + +for.cond: ; preds = %for.inc, %entry + %i.08 = phi i32 [ 0, %entry ], [ %inc, %for.inc ], [ %i.08, %for.body] + %cmp1 = icmp slt i32 %i.08, %n + br i1 %cmp1, label %for.body, label %for.cond.cleanup + +for.cond.cleanup: ; preds = %for.inc + ret void + +for.body: ; preds = %for.body + %1 = trunc i32 %i.08 to i20 + %arrayidx = getelementptr inbounds i32, ptr %ptr, i20 %1 + %2 = load i32, ptr %arrayidx, align 4 + %add = add nsw i32 %2, 8 + store i32 %add, ptr %arrayidx, align 4 + %condif = icmp ult i32 %i.08, 3 + br i1 %condif, label %for.inc, label %for.cond, !llvm.loop !6 + +for.inc: ; preds = %if.end, %for.body + %inc = add nuw nsw i32 %i.08, 1 + br label %for.cond, !llvm.loop !6 +} + +; add continue condition into the loop check +; for (int i=0; i < n; i++) { +; if (i % abort ==0 ) continue;} +; Function Attrs: mustprogress nofree norecurse nosync nounwind memory(argmem: readwrite, inaccessiblemem: write) +define dso_local void @ContinueCondition(ptr nocapture %ptr, i32 noundef %n, i32 noundef %abort) local_unnamed_addr #0 { +; CHECK-LABEL: @ContinueCondition( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.cond.cleanup: +; CHECK-NEXT: ret void +; CHECK: for.body: +; CHECK-NEXT: [[I_08:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_INC:%.*]] ] +; CHECK-NEXT: [[REM:%.*]] = srem i32 [[I_08]], [[ABORT:%.*]] +; CHECK-NEXT: [[CMP1:%.*]] = icmp eq i32 [[REM]], 0 +; CHECK-NEXT: br i1 [[CMP1]], label [[FOR_INC]], label [[IF_END:%.*]] +; CHECK: if.end: +; CHECK-NEXT: [[TMP0:%.*]] = trunc i32 [[I_08]] to i20 +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[PTR:%.*]], i20 [[TMP0]] +; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], 8 +; CHECK-NEXT: store i32 [[ADD]], ptr [[ARRAYIDX]], align 4 +; CHECK-NEXT: br label [[FOR_INC]] +; CHECK: for.inc: +; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[I_08]], 1 +; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i32 [[INC]], [[N:%.*]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP0]] +; +entry: + br label %for.body + +for.cond.cleanup: ; preds = %for.inc + ret void + +for.body: ; preds = %entry, %for.inc + %i.08 = phi i32 [ 0, %entry ], [ %inc, %for.inc ] + %rem = srem i32 %i.08, %abort + %cmp1 = icmp eq i32 %rem, 0 + br i1 %cmp1, label %for.inc, label %if.end + +if.end: ; preds = %for.body + %1 = trunc i32 %i.08 to i20 + %arrayidx = getelementptr inbounds i32, ptr %ptr, i20 %1 + %2 = load i32, ptr %arrayidx, align 4 + %add = add nsw i32 %2, 8 + store i32 %add, ptr %arrayidx, align 4 + br label %for.inc + +for.inc: ; preds = %for.body, %if.end + %inc = add nuw nsw i32 %i.08, 1 + %exitcond.not = icmp eq i32 %inc, %n + br i1 %exitcond.not, label %for.cond.cleanup, label %for.body, !llvm.loop !6 +} + +; for (i = 1; n = 20; i < n; i=2*i, ++n) +; Function Attrs: mustprogress noinline nounwind optnone uwtable +define dso_local void @complexSCEV(ptr noundef %ptr) #0 { +; CHECK-LABEL: @complexSCEV( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[PTR_ADDR:%.*]] = alloca ptr, align 8 +; CHECK-NEXT: [[I:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[N:%.*]] = alloca i32, align 4 +; CHECK-NEXT: store ptr [[PTR:%.*]], ptr [[PTR_ADDR]], align 8 +; CHECK-NEXT: store i32 1, ptr [[I]], align 4 +; CHECK-NEXT: store i32 20, ptr [[N]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[PTR_ADDR]], align 8 +; CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[PTR_ADDR]], align 8 +; CHECK-NEXT: [[I_PROMOTED:%.*]] = load i32, ptr [[I]], align 4 +; CHECK-NEXT: [[N_PROMOTED:%.*]] = load i32, ptr [[N]], align 4 +; CHECK-NEXT: br label [[FOR_COND:%.*]] +; CHECK: for.cond: +; CHECK-NEXT: [[INC2:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_INC:%.*]] ], [ [[N_PROMOTED]], [[ENTRY:%.*]] ] +; CHECK-NEXT: [[MUL1:%.*]] = phi i32 [ [[MUL:%.*]], [[FOR_INC]] ], [ [[I_PROMOTED]], [[ENTRY]] ] +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[MUL1]], [[INC2]] +; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[IDXPROM:%.*]] = sext i32 [[MUL1]] to i64 +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 [[IDXPROM]] +; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP2]], 8 +; CHECK-NEXT: [[IDXPROM1:%.*]] = sext i32 [[MUL1]] to i64 +; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 [[IDXPROM1]] +; CHECK-NEXT: store i32 [[ADD]], ptr [[ARRAYIDX2]], align 4 +; CHECK-NEXT: br label [[FOR_INC]] +; CHECK: for.inc: +; CHECK-NEXT: [[MUL]] = mul nsw i32 2, [[MUL1]] +; CHECK-NEXT: [[INC]] = add nsw i32 [[INC2]], 1 +; CHECK-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP0]] +; CHECK: for.end: +; CHECK-NEXT: [[INC2_LCSSA:%.*]] = phi i32 [ [[INC2]], [[FOR_COND]] ] +; CHECK-NEXT: [[MUL1_LCSSA:%.*]] = phi i32 [ [[MUL1]], [[FOR_COND]] ] +; CHECK-NEXT: store i32 [[MUL1_LCSSA]], ptr [[I]], align 4 +; CHECK-NEXT: store i32 [[INC2_LCSSA]], ptr [[N]], align 4 +; CHECK-NEXT: ret void +; +entry: + %ptr.addr = alloca ptr, align 8 + %i = alloca i32, align 4 + %n = alloca i32, align 4 + store ptr %ptr, ptr %ptr.addr, align 8 + store i32 1, ptr %i, align 4 + store i32 20, ptr %n, align 4 + br label %for.cond + +for.cond: ; preds = %for.inc, %entry + %0 = load i32, ptr %i, align 4 + %1 = load i32, ptr %n, align 4 + %cmp = icmp slt i32 %0, %1 + br i1 %cmp, label %for.body, label %for.end + +for.body: ; preds = %for.cond + %2 = load ptr, ptr %ptr.addr, align 8 + %3 = load i32, ptr %i, align 4 + %idxprom = sext i32 %3 to i64 + %arrayidx = getelementptr inbounds i32, ptr %2, i64 %idxprom + %4 = load i32, ptr %arrayidx, align 4 + %add = add nsw i32 %4, 8 + %5 = load ptr, ptr %ptr.addr, align 8 + %6 = load i32, ptr %i, align 4 + %idxprom1 = sext i32 %6 to i64 + %arrayidx2 = getelementptr inbounds i32, ptr %5, i64 %idxprom1 + store i32 %add, ptr %arrayidx2, align 4 + br label %for.inc + +for.inc: ; preds = %for.body + %7 = load i32, ptr %i, align 4 + %mul = mul nsw i32 2, %7 + store i32 %mul, ptr %i, align 4 + %8 = load i32, ptr %n, align 4 + %inc = add nsw i32 %8, 1 + store i32 %inc, ptr %n, align 4 + br label %for.cond, !llvm.loop !6 + +for.end: ; preds = %for.cond + ret void +} + +; loop variant upper bound +; hint: SCEV of n evaluates to scUnknown, detect that n is loop variant +; for (int i = 0; i < *n; i += m) { +; ptr[i] = ptr[i] + 8; +; *n += 1; +; } +define dso_local void @LoopVariantUpperBound(ptr noalias %ptr, ptr %n, i32 noundef %m) #0 { +; CHECK-LABEL: @LoopVariantUpperBound( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[N_PROMOTED:%.*]] = load i32, ptr [[N:%.*]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = icmp slt i32 0, [[N_PROMOTED]] +; CHECK-NEXT: call void @llvm.assume(i1 [[TMP0]]) +; CHECK-NEXT: [[TMP1:%.*]] = mul nsw i32 [[M:%.*]], 3 +; CHECK-NEXT: [[TMP2:%.*]] = add nsw i32 [[N_PROMOTED]], 3 +; CHECK-NEXT: [[TMP3:%.*]] = icmp slt i32 [[TMP1]], [[TMP2]] +; CHECK-NEXT: call void @llvm.assume(i1 [[TMP3]]) +; CHECK-NEXT: br label [[FOR_COND:%.*]] +; CHECK: for.cond: +; CHECK-NEXT: [[ADD21:%.*]] = phi i32 [ [[N_PROMOTED]], [[ENTRY:%.*]] ], [ [[ADD2:%.*]], [[FOR_BODY:%.*]] ] +; CHECK-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[ADD3:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], [[ADD21]] +; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]] +; CHECK: for.cond.cleanup: +; CHECK-NEXT: ret void +; CHECK: for.body: +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[PTR:%.*]], i32 [[I_0]] +; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP4]], 8 +; CHECK-NEXT: store i32 [[ADD]], ptr [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[ADD2]] = add nsw i32 [[ADD21]], 1 +; CHECK-NEXT: store i32 [[ADD2]], ptr [[N]], align 4 +; CHECK-NEXT: [[ADD3]] = add nsw i32 [[I_0]], [[M]] +; CHECK-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP0]] +; +entry: + br label %for.cond + +for.cond: ; preds = %for.body, %entry + %i.0 = phi i32 [ 0, %entry ], [ %add3, %for.body ] + %0 = load i32, ptr %n, align 4 + %cmp = icmp slt i32 %i.0, %0 + br i1 %cmp, label %for.body, label %for.cond.cleanup + +for.cond.cleanup: ; preds = %for.cond + ret void + +for.body: ; preds = %for.cond + %arrayidx = getelementptr inbounds i32, ptr %ptr, i32 %i.0 + %1 = load i32, ptr %arrayidx, align 4 + %add = add nsw i32 %1, 8 + store i32 %add, ptr %arrayidx, align 4 + %2 = load i32, ptr %n, align 4 + %add2 = add nsw i32 %2, 1 + store i32 %add2, ptr %n, align 4 + %add3 = add nsw i32 %i.0, %m + br label %for.cond, !llvm.loop !6 +} + +; loop invariant upper bound +; hint: SCEV of n evaluates to scUnknown, detect that n is loop invariant +; void LoopInvariantUpperBound(int restrict *ptr, int restrict *n, int m){ +; for (int i = 0; i < *n; i += m) { +; ptr[i] = ptr[i] + 8; +; } +;} +define dso_local void @LoopInvariantUpperBound(ptr noalias %ptr, ptr noalias %n, i32 noundef %m) #0 { +; CHECK-LABEL: @LoopInvariantUpperBound( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[N:%.*]], align 4 +; CHECK-NEXT: [[TMP1:%.*]] = icmp slt i32 0, [[TMP0]] +; CHECK-NEXT: call void @llvm.assume(i1 [[TMP1]]) +; CHECK-NEXT: [[TMP2:%.*]] = mul nsw i32 [[M:%.*]], 9 +; CHECK-NEXT: [[TMP3:%.*]] = icmp slt i32 [[TMP2]], [[TMP0]] +; CHECK-NEXT: call void @llvm.assume(i1 [[TMP3]]) +; CHECK-NEXT: br label [[FOR_COND:%.*]] +; CHECK: for.cond: +; CHECK-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[ADD2:%.*]], [[FOR_BODY:%.*]] ] +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], [[TMP0]] +; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]] +; CHECK: for.cond.cleanup: +; CHECK-NEXT: ret void +; CHECK: for.body: +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[PTR:%.*]], i32 [[I_0]] +; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP4]], 8 +; CHECK-NEXT: store i32 [[ADD]], ptr [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[ADD2]] = add nsw i32 [[I_0]], [[M]] +; CHECK-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP4:![0-9]+]] +; +entry: + br label %for.cond + +for.cond: ; preds = %for.body, %entry + %i.0 = phi i32 [ 0, %entry ], [ %add2, %for.body ] + %0 = load i32, ptr %n, align 4 + %cmp = icmp slt i32 %i.0, %0 + br i1 %cmp, label %for.body, label %for.cond.cleanup + +for.cond.cleanup: ; preds = %for.cond + ret void + +for.body: ; preds = %for.cond + %arrayidx = getelementptr inbounds i32, ptr %ptr, i32 %i.0 + %1 = load i32, ptr %arrayidx, align 4 + %add = add nsw i32 %1, 8 + store i32 %add, ptr %arrayidx, align 4 + %add2 = add nsw i32 %i.0, %m + br label %for.cond, !llvm.loop !3 +} + + +; incrementing with a reference upper bound, this pass must detect that it is loop invariant +; void ReferenceBound(int *ptr, int &n){ +; for (int i=4; i < n; i++) { +; ptr[i] = ptr[i]+8; +; } +;} +define dso_local void @ReferenceBound(ptr noalias %ptr, ptr nonnull align 4 dereferenceable(4) %n) #0 { +; CHECK-LABEL: @ReferenceBound( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[N:%.*]], align 4 +; CHECK-NEXT: [[TMP1:%.*]] = icmp slt i32 7, [[TMP0]] +; CHECK-NEXT: call void @llvm.assume(i1 [[TMP1]]) +; CHECK-NEXT: br label [[FOR_COND:%.*]] +; CHECK: for.cond: +; CHECK-NEXT: [[I_0:%.*]] = phi i32 [ 4, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], [[TMP0]] +; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]] +; CHECK: for.cond.cleanup: +; CHECK-NEXT: ret void +; CHECK: for.body: +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[PTR:%.*]], i32 [[I_0]] +; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP2]], 8 +; CHECK-NEXT: store i32 [[ADD]], ptr [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[INC]] = add nsw i32 [[I_0]], 1 +; CHECK-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP0]] +; +entry: + br label %for.cond + +for.cond: ; preds = %for.body, %entry + %i.0 = phi i32 [ 4, %entry ], [ %inc, %for.body ] + %0 = load i32, ptr %n, align 4 + %cmp = icmp slt i32 %i.0, %0 + br i1 %cmp, label %for.body, label %for.cond.cleanup + +for.cond.cleanup: ; preds = %for.cond + ret void + +for.body: ; preds = %for.cond + %arrayidx = getelementptr inbounds i32, ptr %ptr, i32 %i.0 + %1 = load i32, ptr %arrayidx, align 4 + %add = add nsw i32 %1, 8 + store i32 %add, ptr %arrayidx, align 4 + %inc = add nsw i32 %i.0, 1 + br label %for.cond, !llvm.loop !6 +} + + +; int &n +; for (int i =0 ; i < n + 1; i++) +define dso_local void @incrementOffsetOneSigned(ptr noalias %ptr, ptr nonnull align 32 dereferenceable(288) %n) #0 { +; CHECK-LABEL: @incrementOffsetOneSigned( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[BOUND:%.*]] = getelementptr inbounds i8, ptr [[N:%.*]], i20 84 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[BOUND]], align 4 +; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], 1 +; CHECK-NEXT: [[TMP1:%.*]] = icmp slt i32 3, [[ADD]] +; CHECK-NEXT: call void @llvm.assume(i1 [[TMP1]]) +; CHECK-NEXT: br label [[FOR_COND:%.*]] +; CHECK: for.cond: +; CHECK-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], [[ADD]] +; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]] +; CHECK: for.cond.cleanup: +; CHECK-NEXT: ret void +; CHECK: for.body: +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[PTR:%.*]], i32 [[I_0]] +; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP2]], 8 +; CHECK-NEXT: store i32 [[ADD1]], ptr [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[INC]] = add nsw i32 [[I_0]], 1 +; CHECK-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP0]] +; +entry: + br label %for.cond + +for.cond: ; preds = %for.body, %entry + %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.body ] + %bound = getelementptr inbounds i8, ptr %n, i20 84 + %0 = load i32, ptr %bound, align 4 + %add = add nsw i32 %0, 1 + %cmp = icmp slt i32 %i.0, %add + br i1 %cmp, label %for.body, label %for.cond.cleanup + +for.cond.cleanup: ; preds = %for.cond + ret void + +for.body: ; preds = %for.cond + %arrayidx = getelementptr inbounds i32, ptr %ptr, i32 %i.0 + %1 = load i32, ptr %arrayidx, align 4 + %add1 = add nsw i32 %1, 8 + store i32 %add1, ptr %arrayidx, align 4 + %inc = add nsw i32 %i.0, 1 + br label %for.cond, !llvm.loop !6 +} + +; unsigned &n +; for (unsigned i =0 ; i < n + 1; i++) +define dso_local void @incrementOffsetOneUnsigned(ptr noalias %ptr, ptr nonnull align 32 dereferenceable(288) %n) #0 { +; CHECK-LABEL: @incrementOffsetOneUnsigned( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[BOUND:%.*]] = getelementptr inbounds i8, ptr [[N:%.*]], i20 84 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[BOUND]], align 4 +; CHECK-NEXT: [[ADD:%.*]] = add nuw i32 [[TMP0]], 1 +; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i32 3, [[ADD]] +; CHECK-NEXT: call void @llvm.assume(i1 [[TMP1]]) +; CHECK-NEXT: br label [[FOR_COND:%.*]] +; CHECK: for.cond: +; CHECK-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] +; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[I_0]], [[ADD]] +; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]] +; CHECK: for.cond.cleanup: +; CHECK-NEXT: ret void +; CHECK: for.body: +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[PTR:%.*]], i32 [[I_0]] +; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP2]], 8 +; CHECK-NEXT: store i32 [[ADD1]], ptr [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[INC]] = add nuw i32 [[I_0]], 1 +; CHECK-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP0]] +; +entry: + br label %for.cond + +for.cond: ; preds = %for.body, %entry + %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.body ] + %bound = getelementptr inbounds i8, ptr %n, i20 84 + %0 = load i32, ptr %bound, align 4 + %add = add nuw i32 %0, 1 + %cmp = icmp ult i32 %i.0, %add + br i1 %cmp, label %for.body, label %for.cond.cleanup + +for.cond.cleanup: ; preds = %for.cond + ret void + +for.body: ; preds = %for.cond + %arrayidx = getelementptr inbounds i32, ptr %ptr, i32 %i.0 + %1 = load i32, ptr %arrayidx, align 4 + %add1 = add nsw i32 %1, 8 + store i32 %add1, ptr %arrayidx, align 4 + %inc = add nuw i32 %i.0, 1 + br label %for.cond, !llvm.loop !6 +} + +; int &n +; for (int i =0 ; i < n - 1; i++) +define dso_local void @incrementOffsetNegOneSigned(ptr noalias %ptr, ptr nonnull align 32 dereferenceable(288) %n) #0 { +; CHECK-LABEL: @incrementOffsetNegOneSigned( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[BOUND:%.*]] = getelementptr inbounds i8, ptr [[N:%.*]], i20 84 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[BOUND]], align 4 +; CHECK-NEXT: [[SUB:%.*]] = add nsw i32 [[TMP0]], -1 +; CHECK-NEXT: [[TMP1:%.*]] = icmp slt i32 3, [[SUB]] +; CHECK-NEXT: call void @llvm.assume(i1 [[TMP1]]) +; CHECK-NEXT: br label [[FOR_COND:%.*]] +; CHECK: for.cond: +; CHECK-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], [[SUB]] +; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]] +; CHECK: for.cond.cleanup: +; CHECK-NEXT: ret void +; CHECK: for.body: +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[PTR:%.*]], i32 [[I_0]] +; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP2]], 8 +; CHECK-NEXT: store i32 [[ADD]], ptr [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[INC]] = add nsw i32 [[I_0]], 1 +; CHECK-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP0]] +; +entry: + br label %for.cond + +for.cond: ; preds = %for.body, %entry + %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.body ] + %bound = getelementptr inbounds i8, ptr %n, i20 84 + %0 = load i32, ptr %bound, align 4 + %sub = add nsw i32 %0, -1 + %cmp = icmp slt i32 %i.0, %sub + br i1 %cmp, label %for.body, label %for.cond.cleanup + +for.cond.cleanup: ; preds = %for.cond + ret void + +for.body: ; preds = %for.cond + %arrayidx = getelementptr inbounds i32, ptr %ptr, i32 %i.0 + %1 = load i32, ptr %arrayidx, align 4 + %add = add nsw i32 %1, 8 + store i32 %add, ptr %arrayidx, align 4 + %inc = add nsw i32 %i.0, 1 + br label %for.cond, !llvm.loop !6 +} + +; unsigned &n +; for (unsigned i =0 ; i < n - 1; i++) +define dso_local void @incrementOffsetNegOneUnsigned(ptr noalias %ptr, ptr nonnull align 32 dereferenceable(288) %n) #0 { +; CHECK-LABEL: @incrementOffsetNegOneUnsigned( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[BOUND:%.*]] = getelementptr inbounds i8, ptr [[N:%.*]], i20 84 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[BOUND]], align 4 +; CHECK-NEXT: [[SUB:%.*]] = add i32 [[TMP0]], -1 +; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i32 3, [[SUB]] +; CHECK-NEXT: call void @llvm.assume(i1 [[TMP1]]) +; CHECK-NEXT: br label [[FOR_COND:%.*]] +; CHECK: for.cond: +; CHECK-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] +; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[I_0]], [[SUB]] +; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]] +; CHECK: for.cond.cleanup: +; CHECK-NEXT: ret void +; CHECK: for.body: +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[PTR:%.*]], i32 [[I_0]] +; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP2]], 8 +; CHECK-NEXT: store i32 [[ADD]], ptr [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[INC]] = add nuw i32 [[I_0]], 1 +; CHECK-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP0]] +; +entry: + br label %for.cond + +for.cond: ; preds = %for.body, %entry + %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.body ] + %bound = getelementptr inbounds i8, ptr %n, i20 84 + %0 = load i32, ptr %bound, align 4 + %sub = add i32 %0, -1 + %cmp = icmp ult i32 %i.0, %sub + br i1 %cmp, label %for.body, label %for.cond.cleanup + +for.cond.cleanup: ; preds = %for.cond + ret void + +for.body: ; preds = %for.cond + %arrayidx = getelementptr inbounds i32, ptr %ptr, i32 %i.0 + %1 = load i32, ptr %arrayidx, align 4 + %add = add nsw i32 %1, 8 + store i32 %add, ptr %arrayidx, align 4 + %inc = add nuw i32 %i.0, 1 + br label %for.cond, !llvm.loop !6 +} + +; do not insert Assumption if a call instruction occurs, since the +; result could be non-deterministic +; Function Attrs: mustprogress nofree norecurse nosync nounwind memory(argmem: readwrite, inaccessiblemem: write) +define dso_local void @incompleteInvarianceCheck(ptr nocapture %ptr, i32 noundef %n, i32 noundef %abort) local_unnamed_addr { +; CHECK-LABEL: @incompleteInvarianceCheck( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[FOR_COND_OUTER:%.*]] +; CHECK: for.cond.outer: +; CHECK-NEXT: [[I_08_PH:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_INC:%.*]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[INVARIANT_OP:%.*]] = sub nsw i32 [[I_08_PH]], 3 +; CHECK-NEXT: [[TMP0:%.*]] = trunc i32 [[I_08_PH]] to i20 +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[PTR:%.*]], i20 [[TMP0]] +; CHECK-NEXT: [[CONDIF:%.*]] = icmp ult i32 [[I_08_PH]], 3 +; CHECK-NEXT: br label [[FOR_COND:%.*]] +; CHECK: for.cond: +; CHECK-NEXT: [[N_0:%.*]] = call i32 @calcN(i32 [[N:%.*]]) +; CHECK-NEXT: [[CMP1:%.*]] = icmp sgt i32 [[N_0]], [[INVARIANT_OP]] +; CHECK-NEXT: br i1 [[CMP1]], label [[FOR_BODY:%.*]], label [[FOR_COND_CLEANUP:%.*]] +; CHECK: for.cond.cleanup: +; CHECK-NEXT: ret void +; CHECK: for.body: +; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], 8 +; CHECK-NEXT: store i32 [[ADD]], ptr [[ARRAYIDX]], align 4 +; CHECK-NEXT: br i1 [[CONDIF]], label [[FOR_INC]], label [[FOR_COND]] +; CHECK: for.inc: +; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[I_08_PH]], 1 +; CHECK-NEXT: br label [[FOR_COND_OUTER]], !llvm.loop [[LOOP0]] +; +entry: + br label %for.cond + +for.cond: ; preds = %for.inc, %entry + %i.08 = phi i32 [ 0, %entry ], [ %inc, %for.inc ], [ %i.08, %for.body] + %n.0 = call i32 @calcN(i32 %n) + %n.1 = add nsw nuw i32 %n.0, 3 + %cmp1 = icmp slt i32 %i.08, %n.1 + br i1 %cmp1, label %for.body, label %for.cond.cleanup + +for.cond.cleanup: ; preds = %for.inc + ret void + +for.body: ; preds = %for.body + %1 = trunc i32 %i.08 to i20 + %arrayidx = getelementptr inbounds i32, ptr %ptr, i20 %1 + %2 = load i32, ptr %arrayidx, align 4 + %add = add nsw i32 %2, 8 + store i32 %add, ptr %arrayidx, align 4 + %condif = icmp ult i32 %i.08, 3 + br i1 %condif, label %for.inc, label %for.cond + +for.inc: ; preds = %if.end, %for.body + %inc = add nuw nsw i32 %i.08, 1 + br label %for.cond, !llvm.loop !6 +} + +; check assumption insertion if an operand is a PHI node (%init) and outside of the Loop +; Function Attrs: mustprogress nofree norecurse nosync nounwind memory(argmem: readwrite, inaccessiblemem: write) +define dso_local void @doublePHINodes(ptr nocapture %ptr, i32 noundef %n, i32 noundef %abort) local_unnamed_addr { +; CHECK-LABEL: @doublePHINodes( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CMP_PREHEADER:%.*]] = icmp slt i32 [[ABORT:%.*]], 0 +; CHECK-NEXT: br i1 [[CMP_PREHEADER]], label [[FOR_PREHEADER:%.*]], label [[FOR_PREHEADER_END:%.*]] +; CHECK: for.preheader: +; CHECK-NEXT: [[INIT_0:%.*]] = add i32 0, 4 +; CHECK-NEXT: br label [[FOR_PREHEADER_END]] +; CHECK: for.preheader.end: +; CHECK-NEXT: [[INIT:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INIT_0]], [[FOR_PREHEADER]] ] +; CHECK-NEXT: [[N_1:%.*]] = add nuw nsw i32 [[N:%.*]], 3 +; CHECK-NEXT: [[TMP0:%.*]] = add nuw nsw i32 [[INIT]], 3 +; CHECK-NEXT: [[TMP1:%.*]] = icmp slt i32 [[TMP0]], [[N_1]] +; CHECK-NEXT: call void @llvm.assume(i1 [[TMP1]]) +; CHECK-NEXT: br label [[FOR_COND_OUTER:%.*]] +; CHECK: for.cond.outer: +; CHECK-NEXT: [[I_08_PH:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_INC:%.*]] ], [ [[INIT]], [[FOR_PREHEADER_END]] ] +; CHECK-NEXT: [[CMP1:%.*]] = icmp slt i32 [[I_08_PH]], [[N_1]] +; CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[I_08_PH]] to i20 +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[PTR:%.*]], i20 [[TMP2]] +; CHECK-NEXT: [[CONDIF:%.*]] = icmp ult i32 [[I_08_PH]], 3 +; CHECK-NEXT: br label [[FOR_COND:%.*]] +; CHECK: for.cond: +; CHECK-NEXT: br i1 [[CMP1]], label [[FOR_BODY:%.*]], label [[FOR_COND_CLEANUP:%.*]] +; CHECK: for.cond.cleanup: +; CHECK-NEXT: ret void +; CHECK: for.body: +; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], 8 +; CHECK-NEXT: store i32 [[ADD]], ptr [[ARRAYIDX]], align 4 +; CHECK-NEXT: br i1 [[CONDIF]], label [[FOR_INC]], label [[FOR_COND]], !llvm.loop [[LOOP0]] +; CHECK: for.inc: +; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[I_08_PH]], 1 +; CHECK-NEXT: br label [[FOR_COND_OUTER]], !llvm.loop [[LOOP0]] +; +entry: + %cmp.preheader = icmp slt i32 %abort, 0 + br i1 %cmp.preheader, label %for.preheader, label %for.preheader.end + +for.preheader: + %init.0 = add i32 0, 4 + br label %for.preheader.end + +for.preheader.end: + %init = phi i32 [ 0, %entry], [%init.0, %for.preheader] + br label %for.cond + + +for.cond: ; preds = %for.inc, %entry + %i.08 = phi i32 [ %init, %for.preheader.end ], [ %inc, %for.inc ], [ %i.08, %for.body] + %n.1 = add nsw nuw i32 %n, 3 + %cmp1 = icmp slt i32 %i.08, %n.1 + br i1 %cmp1, label %for.body, label %for.cond.cleanup + +for.cond.cleanup: ; preds = %for.inc + ret void + +for.body: ; preds = %for.body + %1 = trunc i32 %i.08 to i20 + %arrayidx = getelementptr inbounds i32, ptr %ptr, i20 %1 + %2 = load i32, ptr %arrayidx, align 4 + %add = add nsw i32 %2, 8 + store i32 %add, ptr %arrayidx, align 4 + %condif = icmp ult i32 %i.08, 3 + br i1 %condif, label %for.inc, label %for.cond, !llvm.loop !6 + +for.inc: ; preds = %if.end, %for.body + %inc = add nuw nsw i32 %i.08, 1 + br label %for.cond, !llvm.loop !6 +} + +declare dso_local i32 @calcN(i32 noundef %n) + +!3 = distinct !{!3, !11, !7, !9} +!6 = distinct !{!6, !7, !8, !9} +!7 = !{!"llvm.loop.mustprogress"} +!8 = !{!"llvm.loop.itercount.range", i64 4} +!9 = !{!"llvm.loop.unroll.disable"} +!11 = !{!"llvm.loop.itercount.range", i64 10, i64 65}