@@ -81,6 +81,10 @@ static cl::opt<bool> DisableAdvancedPeeling(
8181 cl::desc(
8282 " Disable advance peeling. Issues for convergent targets (D134803)." ));
8383
84+ static cl::opt<bool > EnablePeelingForIV (
85+ " enable-peeling-for-iv" , cl::init(false ), cl::Hidden,
86+ cl::desc(" Enable peeling to convert Phi nodes into IVs" ));
87+
8488static const char *PeeledCountMetaData = " llvm.loop.peeled.count" ;
8589
8690// Check whether we are capable of peeling this loop.
@@ -155,45 +159,170 @@ namespace {
155159// corresponding calls to g are determined and the code for computing
156160// x, y, and a can be removed.
157161//
162+ // Similarly, there are cases where peeling makes Phi nodes loop-inductions
163+ // (i.e., the value is increased or decreased by a fixed amount on every
164+ // iteration). For example, consider the following function.
165+ //
166+ // #define N 100
167+ // void f(int a[], int b[]) {
168+ // int im = N - 1;
169+ // for (int i = 0; i < N; i++) {
170+ // a[i] = b[i] + b[im];
171+ // im = i;
172+ // }
173+ // }
174+ //
175+ // The IR of the loop will look something like the following.
176+ //
177+ // %i = phi i32 [ 0, %entry ], [ %i.next, %for.body ]
178+ // %im = phi i32 [ 99, %entry ], [ %i, %for.body ]
179+ // ...
180+ // %i.next = add nuw nsw i32 %i, 1
181+ // ...
182+ //
183+ // In this case, %im becomes a loop-induction variable by peeling 1 iteration,
184+ // because %i is a loop-induction one. The peeling count can be determined by
185+ // the same algorithm with loop-invariant case. Such peeling is profitable for
186+ // loop-vectorization.
187+ //
158188// The PhiAnalyzer class calculates how many times a loop should be
159189// peeled based on the above analysis of the phi nodes in the loop while
160190// respecting the maximum specified.
161191class PhiAnalyzer {
162192public:
163- PhiAnalyzer (const Loop &L, unsigned MaxIterations);
193+ PhiAnalyzer (const Loop &L, unsigned MaxIterations, bool PeelForIV );
164194
165195 // Calculate the sufficient minimum number of iterations of the loop to peel
166196 // such that phi instructions become determined (subject to allowable limits)
167197 std::optional<unsigned > calculateIterationsToPeel ();
168198
169199protected:
170- using PeelCounter = std::optional<unsigned >;
200+ enum class PeelCounterType {
201+ Invariant,
202+ Induction,
203+ };
204+
205+ using PeelCounterValue = std::pair<unsigned , PeelCounterType>;
206+ using PeelCounter = std::optional<PeelCounterValue>;
171207 const PeelCounter Unknown = std::nullopt ;
172208
173209 // Add 1 respecting Unknown and return Unknown if result over MaxIterations
174210 PeelCounter addOne (PeelCounter PC) const {
175211 if (PC == Unknown)
176212 return Unknown;
177- return (*PC + 1 <= MaxIterations) ? PeelCounter{*PC + 1 } : Unknown;
213+ auto [Val, Ty] = *PC;
214+ return (Val + 1 <= MaxIterations) ? PeelCounter ({Val + 1 , Ty}) : Unknown;
178215 }
179216
180- // Calculate the number of iterations after which the given value
181- // becomes an invariant.
217+ // Return a value representing zero for the given counter type.
218+ PeelCounter makeZero (PeelCounterType Ty) const {
219+ return PeelCounter ({0 , Ty});
220+ }
221+
222+ // Calculate the number of iterations after which the given value becomes an
223+ // invariant or an induction.
182224 PeelCounter calculate (const Value &);
183225
226+ // Auxiliary function to calculate the number of iterations for a comparison
227+ // instruction or a binary operator.
228+ PeelCounter mergeTwoCounter (const Instruction &CmpOrBinaryOp,
229+ const PeelCounterValue &LHS,
230+ const PeelCounterValue &RHS) const ;
231+
232+ // Returns true if the \p Phi is an induction in the target loop. This is a
233+ // lightweight check and possible to detect an IV in some cases.
234+ bool isInductionPHI (const PHINode *Phi) const ;
235+
184236 const Loop &L;
185237 const unsigned MaxIterations;
238+ const bool PeelForIV;
186239
187- // Map of Values to number of iterations to invariance
188- SmallDenseMap<const Value *, PeelCounter> IterationsToInvariance ;
240+ // Map of Values to number of iterations to invariance or induction
241+ SmallDenseMap<const Value *, PeelCounter> IterationsToInvarianceOrInduction ;
189242};
190243
191- PhiAnalyzer::PhiAnalyzer (const Loop &L, unsigned MaxIterations)
192- : L(L), MaxIterations(MaxIterations) {
244+ PhiAnalyzer::PhiAnalyzer (const Loop &L, unsigned MaxIterations, bool PeelForIV )
245+ : L(L), MaxIterations(MaxIterations), PeelForIV(PeelForIV) {
193246 assert (canPeel (&L) && " loop is not suitable for peeling" );
194247 assert (MaxIterations > 0 && " no peeling is allowed?" );
195248}
196249
250+ // / Test whether \p Phi is an induction variable. Although this can be
251+ // / determined using SCEV analysis, it is expensive to compute here. Instead,
252+ // / we perform cheaper checks that may not detect complex cases but are
253+ // / sufficient for some situations.
254+ bool PhiAnalyzer::isInductionPHI (const PHINode *Phi) const {
255+ // Currently we only support a loop that has single latch.
256+ BasicBlock *Latch = L.getLoopLatch ();
257+ if (Latch == nullptr )
258+ return false ;
259+
260+ Value *Cur = Phi->getIncomingValueForBlock (Latch);
261+ SmallPtrSet<Value *, 4 > Visited;
262+ bool VisitBinOp = false ;
263+
264+ // Starting from the incoming value of the Phi, we follow the use-def chain.
265+ // We consider Phi to be an IV if we can reach it again by traversing only
266+ // add, sub, or cast instructions.
267+ while (true ) {
268+ if (Cur == Phi)
269+ break ;
270+
271+ // Avoid infinite loop.
272+ if (Visited.contains (Cur))
273+ return false ;
274+
275+ auto *I = dyn_cast<Instruction>(Cur);
276+ if (!I || !L.contains (I))
277+ return false ;
278+
279+ Visited.insert (Cur);
280+
281+ if (auto *Cast = dyn_cast<CastInst>(I)) {
282+ Cur = Cast->getOperand (0 );
283+ } else if (auto *BinOp = dyn_cast<BinaryOperator>(I)) {
284+ if (BinOp->getOpcode () != Instruction::Add &&
285+ BinOp->getOpcode () != Instruction::Sub)
286+ return false ;
287+ if (!isa<ConstantInt>(BinOp->getOperand (1 )))
288+ return false ;
289+
290+ VisitBinOp = true ;
291+ Cur = BinOp->getOperand (0 );
292+ } else {
293+ return false ;
294+ }
295+ }
296+
297+ // Ignore cases where no binary operations are visited.
298+ return VisitBinOp;
299+ }
300+
301+ // / When either \p LHS or \p RHS is an IV, the result of \p CmpOrBinaryOp is
302+ // / considered an IV only if it is an addition or a subtraction. Otherwise the
303+ // / result can be a value that is neither an loop-invariant nor an IV.
304+ // /
305+ // / If both \p LHS and \p RHS are loop-invariants, then the result of
306+ // / \CmpOrBinaryOp is also a loop-invariant.
307+ PhiAnalyzer::PeelCounter
308+ PhiAnalyzer::mergeTwoCounter (const Instruction &CmpOrBinaryOp,
309+ const PeelCounterValue &LHS,
310+ const PeelCounterValue &RHS) const {
311+ auto &[LVal, LTy] = LHS;
312+ auto &[RVal, RTy] = RHS;
313+ unsigned NewVal = std::max (LVal, RVal);
314+
315+ if (LTy == PeelCounterType::Induction || RTy == PeelCounterType::Induction) {
316+ if (const auto *BinOp = dyn_cast<BinaryOperator>(&CmpOrBinaryOp)) {
317+ if (BinOp->getOpcode () == Instruction::Add ||
318+ BinOp->getOpcode () == Instruction::Sub)
319+ return PeelCounter ({NewVal, PeelCounterType::Induction});
320+ }
321+ return Unknown;
322+ }
323+ return PeelCounter ({NewVal, PeelCounterType::Invariant});
324+ }
325+
197326// This function calculates the number of iterations after which the value
198327// becomes an invariant. The pre-calculated values are memorized in a map.
199328// N.B. This number will be Unknown or <= MaxIterations.
@@ -212,25 +341,34 @@ PhiAnalyzer::PeelCounter PhiAnalyzer::calculate(const Value &V) {
212341 // If we already know the answer, take it from the map.
213342 // Otherwise, place Unknown to map to avoid infinite recursion. Such
214343 // cycles can never stop on an invariant.
215- auto [I, Inserted] = IterationsToInvariance.try_emplace (&V, Unknown);
344+ auto [I, Inserted] =
345+ IterationsToInvarianceOrInduction.try_emplace (&V, Unknown);
216346 if (!Inserted)
217347 return I->second ;
218348
219349 if (L.isLoopInvariant (&V))
220350 // Loop invariant so known at start.
221- return (IterationsToInvariance[&V] = 0 );
351+ return (IterationsToInvarianceOrInduction[&V] =
352+ makeZero (PeelCounterType::Invariant));
222353 if (const PHINode *Phi = dyn_cast<PHINode>(&V)) {
223354 if (Phi->getParent () != L.getHeader ()) {
224355 // Phi is not in header block so Unknown.
225- assert (IterationsToInvariance[&V] == Unknown && " unexpected value saved" );
356+ assert (IterationsToInvarianceOrInduction[&V] == Unknown &&
357+ " unexpected value saved" );
226358 return Unknown;
227359 }
360+
361+ // If Phi is an induction, register it as a starting point.
362+ if (PeelForIV && isInductionPHI (Phi))
363+ return (IterationsToInvarianceOrInduction[&V] =
364+ makeZero (PeelCounterType::Induction));
365+
228366 // We need to analyze the input from the back edge and add 1.
229367 Value *Input = Phi->getIncomingValueForBlock (L.getLoopLatch ());
230368 PeelCounter Iterations = calculate (*Input);
231- assert (IterationsToInvariance [Input] == Iterations &&
369+ assert (IterationsToInvarianceOrInduction [Input] == Iterations &&
232370 " unexpected value saved" );
233- return (IterationsToInvariance [Phi] = addOne (Iterations));
371+ return (IterationsToInvarianceOrInduction [Phi] = addOne (Iterations));
234372 }
235373 if (const Instruction *I = dyn_cast<Instruction>(&V)) {
236374 if (isa<CmpInst>(I) || I->isBinaryOp ()) {
@@ -241,26 +379,30 @@ PhiAnalyzer::PeelCounter PhiAnalyzer::calculate(const Value &V) {
241379 PeelCounter RHS = calculate (*I->getOperand (1 ));
242380 if (RHS == Unknown)
243381 return Unknown;
244- return (IterationsToInvariance[I] = {std::max (*LHS, *RHS)});
382+ return (IterationsToInvarianceOrInduction[I] =
383+ mergeTwoCounter (*I, *LHS, *RHS));
245384 }
246385 if (I->isCast ())
247386 // Cast instructions get the value of the operand.
248- return (IterationsToInvariance[I] = calculate (*I->getOperand (0 )));
387+ return (IterationsToInvarianceOrInduction[I] =
388+ calculate (*I->getOperand (0 )));
249389 }
250390 // TODO: handle more expressions
251391
252392 // Everything else is Unknown.
253- assert (IterationsToInvariance[&V] == Unknown && " unexpected value saved" );
393+ assert (IterationsToInvarianceOrInduction[&V] == Unknown &&
394+ " unexpected value saved" );
254395 return Unknown;
255396}
256397
257398std::optional<unsigned > PhiAnalyzer::calculateIterationsToPeel () {
258399 unsigned Iterations = 0 ;
259400 for (auto &PHI : L.getHeader ()->phis ()) {
260- PeelCounter ToInvariance = calculate (PHI);
261- if (ToInvariance != Unknown) {
262- assert (*ToInvariance <= MaxIterations && " bad result in phi analysis" );
263- Iterations = std::max (Iterations, *ToInvariance);
401+ PeelCounter ToInvarianceOrInduction = calculate (PHI);
402+ if (ToInvarianceOrInduction != Unknown) {
403+ unsigned Val = ToInvarianceOrInduction->first ;
404+ assert (Val <= MaxIterations && " bad result in phi analysis" );
405+ Iterations = std::max (Iterations, Val);
264406 if (Iterations == MaxIterations)
265407 break ;
266408 }
@@ -654,14 +796,15 @@ void llvm::computePeelCount(Loop *L, unsigned LoopSize,
654796 // in TTI.getPeelingPreferences or by the flag -unroll-peel-count.
655797 unsigned DesiredPeelCount = TargetPeelCount;
656798
657- // Here we try to get rid of Phis which become invariants after 1, 2, ..., N
658- // iterations of the loop. For this we compute the number for iterations after
659- // which every Phi is guaranteed to become an invariant, and try to peel the
660- // maximum number of iterations among these values, thus turning all those
661- // Phis into invariants.
799+ // Here we try to get rid of Phis which become invariants or inductions after
800+ // 1, 2, ..., N iterations of the loop. For this we compute the number for
801+ // iterations after which every Phi is guaranteed to become an invariant or an
802+ // induction, and try to peel the maximum number of iterations among these
803+ // values, thus turning all those Phis into invariants or inductions .
662804 if (MaxPeelCount > DesiredPeelCount) {
663805 // Check how many iterations are useful for resolving Phis
664- auto NumPeels = PhiAnalyzer (*L, MaxPeelCount).calculateIterationsToPeel ();
806+ auto NumPeels = PhiAnalyzer (*L, MaxPeelCount, EnablePeelingForIV)
807+ .calculateIterationsToPeel ();
665808 if (NumPeels)
666809 DesiredPeelCount = std::max (DesiredPeelCount, *NumPeels);
667810 }
@@ -680,7 +823,7 @@ void llvm::computePeelCount(Loop *L, unsigned LoopSize,
680823 if (DesiredPeelCount + AlreadyPeeled <= UnrollPeelMaxCount) {
681824 LLVM_DEBUG (dbgs () << " Peel " << DesiredPeelCount
682825 << " iteration(s) to turn"
683- << " some Phis into invariants.\n " );
826+ << " some Phis into invariants or inductions .\n " );
684827 PP.PeelCount = DesiredPeelCount;
685828 PP.PeelProfiledIterations = false ;
686829 PP.PeelLast = false ;
0 commit comments