85
85
#include " llvm/Support/FileSystem.h"
86
86
#include " llvm/Support/raw_ostream.h"
87
87
#include " llvm/Transforms/IPO.h"
88
+ #include " llvm/Transforms/InstCombine/InstCombine.h"
88
89
#include " llvm/Transforms/Scalar.h"
89
90
#include " llvm/Transforms/Scalar/DCE.h"
90
91
#include " llvm/Transforms/Scalar/DeadStoreElimination.h"
@@ -171,21 +172,19 @@ static ICmpInst *getLastICmpInstWithPredicate(BasicBlock *BB,
171
172
return lastICmp;
172
173
}
173
174
174
- // Helper function to get the first ICmp instruction in a basic block
175
- static ICmpInst *getFirstICmpInst (BasicBlock *BB) {
175
+ template <typename T> static T *getFirstInst (BasicBlock *BB) {
176
176
for (Instruction &I : *BB) {
177
- if (auto *CI = dyn_cast<ICmpInst >(&I)) {
178
- return CI ;
177
+ if (T *Inst = dyn_cast<T >(&I)) {
178
+ return Inst ;
179
179
}
180
180
}
181
181
return nullptr ;
182
182
}
183
183
184
- // Helper function to get the last ICmp instruction in a basic block
185
- static ICmpInst *getLastICmpInst (BasicBlock *BB) {
186
- for (auto it = BB->rbegin (); it != BB->rend (); ++it) {
187
- if (auto *icmp = dyn_cast<ICmpInst>(&*it)) {
188
- return icmp;
184
+ template <typename T> static T *getLastInst (BasicBlock *BB) {
185
+ for (Instruction &I : reverse (*BB)) {
186
+ if (T *Inst = dyn_cast<T>(&I)) {
187
+ return Inst;
189
188
}
190
189
}
191
190
return nullptr ;
@@ -239,16 +238,6 @@ static PHINode *getLastI32Phi(BasicBlock *BB) {
239
238
return nullptr ;
240
239
}
241
240
242
- // Helper function to get the last PHI node in a basic block
243
- static PHINode *getLastPhi (BasicBlock *BB) {
244
- for (auto it = BB->rbegin (); it != BB->rend (); ++it) {
245
- if (auto *Phi = dyn_cast<PHINode>(&*it)) {
246
- return Phi;
247
- }
248
- }
249
- return nullptr ;
250
- }
251
-
252
241
// Helper function to get the first CallInst with a specific name in a basic
253
242
// block
254
243
static CallInst *getFirstCallInstWithName (BasicBlock *BB, StringRef Name) {
@@ -406,6 +395,38 @@ static void movePHINodesToTop(BasicBlock &BB,
406
395
}
407
396
}
408
397
398
+ static void modifyFirdAddToOr (BasicBlock *ClonedForBody) {
399
+ SmallVector<BinaryOperator *> addInsts;
400
+
401
+ // Collect all add instructions that meet the criteria
402
+ for (auto &I : *ClonedForBody) {
403
+ if (auto *binOp = dyn_cast<BinaryOperator>(&I)) {
404
+ if (binOp->getOpcode () == Instruction::Add && binOp->hasNoSignedWrap () &&
405
+ binOp->hasNoUnsignedWrap ()) {
406
+ addInsts.push_back (binOp);
407
+ }
408
+ }
409
+ }
410
+ if (addInsts.empty ()) {
411
+ return ;
412
+ }
413
+ // Replace each add instruction with an or disjoint instruction
414
+ for (auto it = addInsts.begin (); it != std::prev (addInsts.end ()); ++it) {
415
+ auto *addInst = *it;
416
+ // Create a new or disjoint instruction
417
+ Instruction *orInst =
418
+ BinaryOperator::CreateDisjoint (Instruction::Or, addInst->getOperand (0 ),
419
+ addInst->getOperand (1 ), " add" , addInst);
420
+
421
+ // Replace all uses of the add instruction
422
+ addInst->replaceAllUsesWith (orInst);
423
+
424
+ // Delete the original add instruction
425
+ addInst->eraseFromParent ();
426
+ orInst->setName (" add" );
427
+ }
428
+ }
429
+
409
430
// Helper function to update predecessors to point to a new preheader
410
431
static void updatePredecessorsToPreheader (BasicBlock *ForBody,
411
432
BasicBlock *ForBodyPreheader) {
@@ -1151,7 +1172,7 @@ static Value *expandForCondPreheader(
1151
1172
}
1152
1173
1153
1174
// Get the icmp instruction in ForCondPreheader
1154
- ICmpInst *icmpInst = getFirstICmpInst (ForCondPreheader);
1175
+ ICmpInst *icmpInst = getFirstInst<ICmpInst> (ForCondPreheader);
1155
1176
1156
1177
// Ensure we found the icmp instruction
1157
1178
assert (icmpInst && " Failed to find icmp instruction in ForCondPreheader" );
@@ -1278,7 +1299,7 @@ static void insertUnusedInstructionsBeforeIcmp(PHINode *phiI32InClonedForBody,
1278
1299
1279
1300
static void modifyClonedForBody (BasicBlock *ClonedForBody) {
1280
1301
1281
- ICmpInst *lastIcmpEq = getLastICmpInst (ClonedForBody);
1302
+ ICmpInst *lastIcmpEq = getLastInst<ICmpInst> (ClonedForBody);
1282
1303
assert (lastIcmpEq &&
1283
1304
" Failed to find last icmp eq instruction in ClonedForBody" );
1284
1305
@@ -1472,7 +1493,7 @@ static void modifyForCondPreheader2(BasicBlock *ClonedForBody,
1472
1493
}
1473
1494
1474
1495
// Find operand 1 of the icmp instruction from ClonedForBody
1475
- ICmpInst *firstIcmp = getFirstICmpInst (ClonedForBody);
1496
+ ICmpInst *firstIcmp = getFirstInst<ICmpInst> (ClonedForBody);
1476
1497
assert (firstIcmp && " Unable to find icmp instruction in ClonedForBody" );
1477
1498
Value *IcmpOperand1 = firstIcmp->getOperand (1 );
1478
1499
@@ -1549,7 +1570,7 @@ static void modifyForCondPreheader2(BasicBlock *ClonedForBody,
1549
1570
1550
1571
static Value *modifyClonedForBodyPreheader (BasicBlock *ClonedForBodyPreheader,
1551
1572
BasicBlock *ForBody) {
1552
- ICmpInst *firstIcmp = getFirstICmpInst (ForBody);
1573
+ ICmpInst *firstIcmp = getFirstInst<ICmpInst> (ForBody);
1553
1574
assert (firstIcmp && " Unable to find icmp instruction in ForBody" );
1554
1575
1555
1576
Value *IcmpOperand1 = firstIcmp->getOperand (1 );
@@ -2011,35 +2032,27 @@ static Instruction *modifyAddToOrInClonedForBody(BasicBlock *ClonedForBody) {
2011
2032
return orInst;
2012
2033
}
2013
2034
2014
- static void modifyAddToOr (BasicBlock *ClonedForBody) {
2015
- SmallVector<BinaryOperator *> addInsts;
2035
+ static void runInstCombinePass (Function &F) {
2036
+ // Create necessary analysis managers
2037
+ LoopAnalysisManager LAM;
2038
+ FunctionAnalysisManager FAM;
2039
+ CGSCCAnalysisManager CGAM;
2040
+ ModuleAnalysisManager MAM;
2016
2041
2017
- // Collect all add instructions that meet the criteria
2018
- for (auto &I : *ClonedForBody) {
2019
- if (auto *binOp = dyn_cast<BinaryOperator>(&I)) {
2020
- if (binOp->getOpcode () == Instruction::Add) {
2021
- addInsts.push_back (binOp);
2022
- }
2023
- }
2024
- }
2025
- if (addInsts.empty ()) {
2026
- return ;
2027
- }
2028
- // Replace each add instruction with an or disjoint instruction
2029
- for (auto it = addInsts.begin (); it != std::prev (addInsts.end ()); ++it) {
2030
- auto *addInst = *it;
2031
- // Create a new or disjoint instruction
2032
- Instruction *orInst =
2033
- BinaryOperator::CreateDisjoint (Instruction::Or, addInst->getOperand (0 ),
2034
- addInst->getOperand (1 ), " add" , addInst);
2042
+ // Create pass builder
2043
+ PassBuilder PB;
2035
2044
2036
- // Replace all uses of the add instruction
2037
- addInst->replaceAllUsesWith (orInst);
2045
+ // Register analyses
2046
+ PB.registerModuleAnalyses (MAM);
2047
+ PB.registerCGSCCAnalyses (CGAM);
2048
+ PB.registerFunctionAnalyses (FAM);
2049
+ PB.registerLoopAnalyses (LAM);
2050
+ PB.crossRegisterProxies (LAM, FAM, CGAM, MAM);
2038
2051
2039
- // Delete the original add instruction
2040
- addInst-> eraseFromParent () ;
2041
- orInst-> setName ( " add " );
2042
- }
2052
+ // Create function-level optimization pipeline
2053
+ FunctionPassManager FPM ;
2054
+ FPM. addPass ( InstCombinePass () );
2055
+ FPM. run (F, FAM);
2043
2056
}
2044
2057
2045
2058
static Value *unrolladdcClonedForBody (BasicBlock *ClonedForBody,
@@ -2058,7 +2071,7 @@ static Value *unrolladdcClonedForBody(BasicBlock *ClonedForBody,
2058
2071
assert (firstNonPHI && orInst && " Start or end instruction not found" );
2059
2072
2060
2073
// Find the icmp instruction
2061
- Instruction *icmpInst = getFirstICmpInst (ClonedForBody);
2074
+ Instruction *icmpInst = getFirstInst<ICmpInst> (ClonedForBody);
2062
2075
2063
2076
// Ensure that the icmp instruction is found
2064
2077
assert (icmpInst && " icmp instruction not found" );
@@ -2298,7 +2311,7 @@ static void unrollAddc(Function &F, ScalarEvolution &SE, Loop *L,
2298
2311
assert (ForCondPreheader && " Expected to find for.cond.preheader!" );
2299
2312
expandForCondPreheaderaddc (F, ForCondPreheader, ClonedForBody, ForBody, sub,
2300
2313
unroll_factor);
2301
- modifyAddToOr (ClonedForBody );
2314
+ runInstCombinePass (F );
2302
2315
groupAndReorderInstructions (ClonedForBody);
2303
2316
2304
2317
// Verify the function
@@ -2816,11 +2829,11 @@ static void postUnrollLoopWithCount(Function &F, Loop *L, int unroll_count) {
2816
2829
insertPhiNodesForFMulAdd (LoopHeader, LoopPreheader, FMulAddCalls);
2817
2830
2818
2831
movePHINodesToTop (*LoopHeader);
2819
- modifyAddToOr (LoopHeader );
2832
+ runInstCombinePass (F );
2820
2833
groupAndReorderInstructions (LoopHeader);
2821
2834
2822
2835
// Create for.end basic block after LoopHeader
2823
- ICmpInst *LastICmp = getLastICmpInst (LoopHeader);
2836
+ ICmpInst *LastICmp = getLastInst<ICmpInst> (LoopHeader);
2824
2837
LastICmp->setPredicate (ICmpInst::ICMP_ULT);
2825
2838
// Get the first operand of LastICmp
2826
2839
Value *Operand1 = LastICmp->getOperand (1 );
@@ -3023,7 +3036,7 @@ static bool shouldUnrollDotprodType(Function &F, LoopInfo *LI) {
3023
3036
}
3024
3037
3025
3038
static std::pair<Value *, Value *> modifyEntryBB (BasicBlock &entryBB) {
3026
- ICmpInst *icmp = getLastICmpInst (&entryBB);
3039
+ ICmpInst *icmp = getLastInst<ICmpInst> (&entryBB);
3027
3040
assert (icmp && " icmp not found" );
3028
3041
Value *start_index = icmp->getOperand (0 );
3029
3042
Value *end_index = icmp->getOperand (1 );
@@ -3115,7 +3128,7 @@ static void postUnrollLoopWithVariable(Function &F, Loop *L, int unroll_count) {
3115
3128
temp->insertBefore (LoopPreheader->getTerminator ());
3116
3129
}
3117
3130
3118
- ICmpInst *lastICmp = getLastICmpInst (ForBody7);
3131
+ ICmpInst *lastICmp = getLastInst<ICmpInst> (ForBody7);
3119
3132
assert (lastICmp && " icmp not found" );
3120
3133
lastICmp->setOperand (1 , Sub);
3121
3134
lastICmp->setPredicate (ICmpInst::ICMP_SLT);
@@ -3552,7 +3565,7 @@ static std::tuple<Value *, GetElementPtrInst *, Value *>
3552
3565
modifyOuterLoop4 (Loop *L, BasicBlock *ForBodyMerged,
3553
3566
BasicBlock *CloneForBodyPreheader) {
3554
3567
BasicBlock *BB = L->getHeader ();
3555
- PHINode *phi = getLastPhi (BB);
3568
+ PHINode *phi = getLastInst<PHINode> (BB);
3556
3569
// Add new instructions
3557
3570
IRBuilder<> Builder (BB);
3558
3571
Builder.SetInsertPoint (phi->getNextNode ());
@@ -3596,7 +3609,7 @@ static void modifyInnerLoop4(Loop *L, BasicBlock *ForBodyMerged, Value *Sub,
3596
3609
movePHINodesToTop (*ForBodyMerged);
3597
3610
3598
3611
groupAndReorderInstructions (ForBodyMerged);
3599
- ICmpInst *LastICmp = getLastICmpInst (ForBodyMerged);
3612
+ ICmpInst *LastICmp = getLastInst<ICmpInst> (ForBodyMerged);
3600
3613
LastICmp->setPredicate (ICmpInst::ICMP_ULT);
3601
3614
LastICmp->setOperand (1 , Sub);
3602
3615
swapTerminatorSuccessors (ForBodyMerged);
@@ -3653,7 +3666,8 @@ static void modifyInnerLoop4(Loop *L, BasicBlock *ForBodyMerged, Value *Sub,
3653
3666
AddPHI->addIncoming (Add2, NewForEnd);
3654
3667
Value *phifloatincomingvalue0 =
3655
3668
getFirstCallInstWithName (CloneForBody, " llvm.fmuladd.f32" );
3656
- Value *phii32incomingvalue0 = getLastICmpInst (CloneForBody)->getOperand (0 );
3669
+ Value *phii32incomingvalue0 =
3670
+ getLastInst<ICmpInst>(CloneForBody)->getOperand (0 );
3657
3671
for (PHINode &Phi : CloneForBody->phis ()) {
3658
3672
if (Phi.getType ()->isIntegerTy (32 )) {
3659
3673
Phi.setIncomingValue (0 , AddPHI);
@@ -3676,7 +3690,7 @@ static void modifyInnerLoop4(Loop *L, BasicBlock *ForBodyMerged, Value *Sub,
3676
3690
static std::tuple<Value *, Value *, GetElementPtrInst *>
3677
3691
modifyOuterLoop8 (Loop *L) {
3678
3692
BasicBlock *BB = L->getHeader ();
3679
- ICmpInst *LastICmp = getLastICmpInst (BB);
3693
+ ICmpInst *LastICmp = getLastInst<ICmpInst> (BB);
3680
3694
LastICmp->setPredicate (ICmpInst::ICMP_ULT);
3681
3695
swapTerminatorSuccessors (BB);
3682
3696
@@ -3714,7 +3728,7 @@ static std::tuple<Value *, Value *, GetElementPtrInst *>
3714
3728
modifyOuterLoop16 (Loop *L) {
3715
3729
BasicBlock *BB = L->getHeader ();
3716
3730
BasicBlock *BBLoopPreHeader = L->getLoopPreheader ();
3717
- ICmpInst *LastICmp = getLastICmpInst (BB);
3731
+ ICmpInst *LastICmp = getLastInst<ICmpInst> (BB);
3718
3732
LastICmp->setPredicate (ICmpInst::ICMP_ULT);
3719
3733
swapTerminatorSuccessors (BB);
3720
3734
@@ -3763,7 +3777,7 @@ static void modifyInnerLoop(Loop *L, BasicBlock *ForBodyMerged, Value *Add60,
3763
3777
movePHINodesToTop (*ForBodyMerged);
3764
3778
3765
3779
groupAndReorderInstructions (ForBodyMerged);
3766
- ICmpInst *LastICmp = getLastICmpInst (ForBodyMerged);
3780
+ ICmpInst *LastICmp = getLastInst<ICmpInst> (ForBodyMerged);
3767
3781
LastICmp->setPredicate (ICmpInst::ICMP_ULT);
3768
3782
LastICmp->setOperand (1 , Add60);
3769
3783
swapTerminatorSuccessors (ForBodyMerged);
@@ -3873,7 +3887,7 @@ static void modifyInnerLoop(Loop *L, BasicBlock *ForBodyMerged, Value *Add60,
3873
3887
3874
3888
Value *operand1 = unroll_count == 16
3875
3889
? getFirstI32Phi (OuterBB)
3876
- : getLastICmpInst (CloneForBody)->getOperand (1 );
3890
+ : getLastInst<ICmpInst> (CloneForBody)->getOperand (1 );
3877
3891
// Create a new comparison instruction
3878
3892
ICmpInst *NewCmp =
3879
3893
new ICmpInst (ICmpInst::ICMP_UGT, PhiSum, operand1, " cmp182.not587" );
@@ -3890,7 +3904,8 @@ static void modifyInnerLoop(Loop *L, BasicBlock *ForBodyMerged, Value *Add60,
3890
3904
getFirstCallInstWithName (CloneForBody, " llvm.fmuladd.f32" );
3891
3905
for (PHINode &Phi : CloneForBody->phis ()) {
3892
3906
if (Phi.getType ()->isIntegerTy (32 )) {
3893
- Phi.setIncomingValue (0 , getLastICmpInst (CloneForBody)->getOperand (0 ));
3907
+ Phi.setIncomingValue (0 ,
3908
+ getLastInst<ICmpInst>(CloneForBody)->getOperand (0 ));
3894
3909
Phi.setIncomingBlock (0 , CloneForBody);
3895
3910
Phi.setIncomingValue (1 , PhiSum);
3896
3911
Phi.setIncomingBlock (1 , ForEnd164);
@@ -3981,7 +3996,7 @@ static void modifyFirstCloneForBody(BasicBlock *CloneForBody,
3981
3996
lastAddInst = &I;
3982
3997
}
3983
3998
}
3984
- ICmpInst *LastCmpInst = getLastICmpInst (CloneForBody);
3999
+ ICmpInst *LastCmpInst = getLastInst<ICmpInst> (CloneForBody);
3985
4000
LastCmpInst->setOperand (0 , lastAddInst);
3986
4001
LastCmpInst->setOperand (1 , Operand1);
3987
4002
FirstI32Phi->setIncomingValue (1 , lastAddInst);
@@ -4045,7 +4060,7 @@ static void modifyFirdFirstLoop(Function &F, Loop *L, BasicBlock *ForBodyMerged,
4045
4060
getFirstI32Phi (ForCond23Preheader)->getIncomingBlock (0 );
4046
4061
Instruction *FirstI32Phi = getFirstI32Phi (ForCondCleanup3);
4047
4062
4048
- ICmpInst *LastICmp = getLastICmpInst (ForCondCleanup3);
4063
+ ICmpInst *LastICmp = getLastInst<ICmpInst> (ForCondCleanup3);
4049
4064
// Create new add instruction
4050
4065
IRBuilder<> Builder (LastICmp);
4051
4066
Value *Add269 = Builder.CreateNSWAdd (
@@ -4067,7 +4082,7 @@ static void modifyFirdFirstLoop(Function &F, Loop *L, BasicBlock *ForBodyMerged,
4067
4082
4068
4083
N_069->setIncomingValue (1 , Add281);
4069
4084
4070
- ICmpInst *LastICmpInPreheader = getLastICmpInst (ForCond23Preheader);
4085
+ ICmpInst *LastICmpInPreheader = getLastInst<ICmpInst> (ForCond23Preheader);
4071
4086
// Create new phi node
4072
4087
PHINode *N_0_lcssa = PHINode::Create (Type::getInt32Ty (F.getContext ()), 2 ,
4073
4088
" n.0.lcssa" , LastICmpInPreheader);
@@ -4093,7 +4108,7 @@ static void modifyFirdFirstLoop(Function &F, Loop *L, BasicBlock *ForBodyMerged,
4093
4108
Value *Add11 = Builder.CreateAdd (Operand1, CoeffPosLcssa);
4094
4109
4095
4110
ForBody27LrPh->getTerminator ()->setSuccessor (0 , CloneForBody);
4096
- ICmpInst *LastICmpInForBodyMerged = getLastICmpInst (ForBodyMerged);
4111
+ ICmpInst *LastICmpInForBodyMerged = getLastInst<ICmpInst> (ForBodyMerged);
4097
4112
LastICmpInForBodyMerged->setOperand (1 , Operand1);
4098
4113
LastICmpInForBodyMerged->setOperand (0 , Inc20_7);
4099
4114
@@ -4159,9 +4174,8 @@ static void modifyFirdFirstLoop(Function &F, Loop *L, BasicBlock *ForBodyMerged,
4159
4174
CI->setOperand (2 , PHI);
4160
4175
}
4161
4176
movePHINodesToTop (*ForBodyMerged);
4162
- modifyAddToOr (ForBodyMerged);
4163
-
4164
- ICmpInst *LastICmpForBodyMerged = getLastICmpInst (ForBodyMerged);
4177
+ modifyFirdAddToOr (ForBodyMerged);
4178
+ ICmpInst *LastICmpForBodyMerged = getLastInst<ICmpInst>(ForBodyMerged);
4165
4179
LastICmpForBodyMerged->setPredicate (ICmpInst::ICMP_SGT);
4166
4180
cast<Instruction>(LastICmpForBodyMerged->getOperand (0 ))
4167
4181
->setOperand (0 , getFirstI32Phi (ForBodyMerged));
@@ -4256,7 +4270,7 @@ static void modifyFirdFirstLoop(Function &F, Loop *L, BasicBlock *ForBodyMerged,
4256
4270
CoeffPosLcssaPhi->addIncoming (SubResult, ForCondCleanup26LoopExit);
4257
4271
// eraseAllStoreInstInBB(ForCondCleanup26);
4258
4272
4259
- ICmpInst *LastICmpForCondCleanup26 = getLastICmpInst (ForCondCleanup26);
4273
+ ICmpInst *LastICmpForCondCleanup26 = getLastInst<ICmpInst> (ForCondCleanup26);
4260
4274
4261
4275
LastICmpForCondCleanup26->setPredicate (ICmpInst::ICMP_SLT);
4262
4276
PHINode *FirstI32ForCondCleanup3 = getFirstI32Phi (ForCondCleanup3);
@@ -4314,7 +4328,7 @@ static void modifyFirdFirstLoop(Function &F, Loop *L, BasicBlock *ForBodyMerged,
4314
4328
0 , ConstantInt::get (getLastI32Phi (ForCond130Preheader)->getType (), 0 ));
4315
4329
LastI32Phi130->setIncomingValue (1 , AndResult);
4316
4330
4317
- ICmpInst *LastICmp130 = getLastICmpInst (ForCond130Preheader);
4331
+ ICmpInst *LastICmp130 = getLastInst<ICmpInst> (ForCond130Preheader);
4318
4332
LastICmp130->setOperand (1 , FirstI32ForCondCleanup3);
4319
4333
4320
4334
PHINode *LastI32PhiClone = getLastFloatPhi (CloneForBody);
@@ -4434,9 +4448,8 @@ static void modifyFirdSecondLoop(Function &F, Loop *L,
4434
4448
Add76310->addIncoming (Add76, ForBodyMerged);
4435
4449
4436
4450
movePHINodesToTop (*ForBodyMerged);
4437
- modifyAddToOr (ForBodyMerged);
4438
-
4439
- ICmpInst *LastICmp = getLastICmpInst (ForBodyMerged);
4451
+ modifyFirdAddToOr (ForBodyMerged);
4452
+ ICmpInst *LastICmp = getLastInst<ICmpInst>(ForBodyMerged);
4440
4453
LastICmp->setPredicate (ICmpInst::ICMP_SGT);
4441
4454
cast<Instruction>(Add76)->moveBefore (LastICmp);
4442
4455
LastICmp->setOperand (0 , Add76);
@@ -5043,6 +5056,7 @@ RISCVLoopUnrollAndRemainderPass::run(Function &F, FunctionAnalysisManager &AM) {
5043
5056
if (currentUnrollType == UnrollType::FIRD) {
5044
5057
addLegacyCommonOptimizationPasses (F);
5045
5058
}
5059
+
5046
5060
// Verify function
5047
5061
if (verifyFunction (F, &errs ())) {
5048
5062
LLVM_DEBUG (errs () << " Function verification failed\n " );
0 commit comments