diff --git a/llvm/lib/Transforms/Utils/LoopPeel.cpp b/llvm/lib/Transforms/Utils/LoopPeel.cpp index 27e70c5ddc0fc..ba0ac01cadd8e 100644 --- a/llvm/lib/Transforms/Utils/LoopPeel.cpp +++ b/llvm/lib/Transforms/Utils/LoopPeel.cpp @@ -81,6 +81,10 @@ static cl::opt DisableAdvancedPeeling( cl::desc( "Disable advance peeling. Issues for convergent targets (D134803).")); +static cl::opt EnablePeelingForIV( + "enable-peeling-for-iv", cl::init(false), cl::Hidden, + cl::desc("Enable peeling to convert Phi nodes into IVs")); + static const char *PeeledCountMetaData = "llvm.loop.peeled.count"; // Check whether we are capable of peeling this loop. @@ -155,45 +159,170 @@ namespace { // corresponding calls to g are determined and the code for computing // x, y, and a can be removed. // +// Similarly, there are cases where peeling makes Phi nodes loop-inductions +// (i.e., the value is increased or decreased by a fixed amount on every +// iteration). For example, consider the following function. +// +// #define N 100 +// void f(int a[], int b[]) { +// int im = N - 1; +// for (int i = 0; i < N; i++) { +// a[i] = b[i] + b[im]; +// im = i; +// } +// } +// +// The IR of the loop will look something like the following. +// +// %i = phi i32 [ 0, %entry ], [ %i.next, %for.body ] +// %im = phi i32 [ 99, %entry ], [ %i, %for.body ] +// ... +// %i.next = add nuw nsw i32 %i, 1 +// ... +// +// In this case, %im becomes a loop-induction variable by peeling 1 iteration, +// because %i is a loop-induction one. The peeling count can be determined by +// the same algorithm with loop-invariant case. Such peeling is profitable for +// loop-vectorization. +// // The PhiAnalyzer class calculates how many times a loop should be // peeled based on the above analysis of the phi nodes in the loop while // respecting the maximum specified. class PhiAnalyzer { public: - PhiAnalyzer(const Loop &L, unsigned MaxIterations); + PhiAnalyzer(const Loop &L, unsigned MaxIterations, bool PeelForIV); // Calculate the sufficient minimum number of iterations of the loop to peel // such that phi instructions become determined (subject to allowable limits) std::optional calculateIterationsToPeel(); protected: - using PeelCounter = std::optional; + enum class PeelCounterType { + Invariant, + Induction, + }; + + using PeelCounterValue = std::pair; + using PeelCounter = std::optional; const PeelCounter Unknown = std::nullopt; // Add 1 respecting Unknown and return Unknown if result over MaxIterations PeelCounter addOne(PeelCounter PC) const { if (PC == Unknown) return Unknown; - return (*PC + 1 <= MaxIterations) ? PeelCounter{*PC + 1} : Unknown; + auto [Val, Ty] = *PC; + return (Val + 1 <= MaxIterations) ? PeelCounter({Val + 1, Ty}) : Unknown; } - // Calculate the number of iterations after which the given value - // becomes an invariant. + // Return a value representing zero for the given counter type. + PeelCounter makeZero(PeelCounterType Ty) const { + return PeelCounter({0, Ty}); + } + + // Calculate the number of iterations after which the given value becomes an + // invariant or an induction. PeelCounter calculate(const Value &); + // Auxiliary function to calculate the number of iterations for a comparison + // instruction or a binary operator. + PeelCounter mergeTwoCounter(const Instruction &CmpOrBinaryOp, + const PeelCounterValue &LHS, + const PeelCounterValue &RHS) const; + + // Returns true if the \p Phi is an induction in the target loop. This is a + // lightweight check and possible to detect an IV in some cases. + bool isInductionPHI(const PHINode *Phi) const; + const Loop &L; const unsigned MaxIterations; + const bool PeelForIV; - // Map of Values to number of iterations to invariance - SmallDenseMap IterationsToInvariance; + // Map of Values to number of iterations to invariance or induction + SmallDenseMap IterationsToInvarianceOrInduction; }; -PhiAnalyzer::PhiAnalyzer(const Loop &L, unsigned MaxIterations) - : L(L), MaxIterations(MaxIterations) { +PhiAnalyzer::PhiAnalyzer(const Loop &L, unsigned MaxIterations, bool PeelForIV) + : L(L), MaxIterations(MaxIterations), PeelForIV(PeelForIV) { assert(canPeel(&L) && "loop is not suitable for peeling"); assert(MaxIterations > 0 && "no peeling is allowed?"); } +/// Test whether \p Phi is an induction variable. Although this can be +/// determined using SCEV analysis, it is expensive to compute here. Instead, +/// we perform cheaper checks that may not detect complex cases but are +/// sufficient for some situations. +bool PhiAnalyzer::isInductionPHI(const PHINode *Phi) const { + // Currently we only support a loop that has single latch. + BasicBlock *Latch = L.getLoopLatch(); + if (Latch == nullptr) + return false; + + Value *Cur = Phi->getIncomingValueForBlock(Latch); + SmallPtrSet Visited; + bool VisitBinOp = false; + + // Starting from the incoming value of the Phi, we follow the use-def chain. + // We consider Phi to be an IV if we can reach it again by traversing only + // add, sub, or cast instructions. + while (true) { + if (Cur == Phi) + break; + + // Avoid infinite loop. + if (Visited.contains(Cur)) + return false; + + auto *I = dyn_cast(Cur); + if (!I || !L.contains(I)) + return false; + + Visited.insert(Cur); + + if (auto *Cast = dyn_cast(I)) { + Cur = Cast->getOperand(0); + } else if (auto *BinOp = dyn_cast(I)) { + if (BinOp->getOpcode() != Instruction::Add && + BinOp->getOpcode() != Instruction::Sub) + return false; + if (!isa(BinOp->getOperand(1))) + return false; + + VisitBinOp = true; + Cur = BinOp->getOperand(0); + } else { + return false; + } + } + + // Ignore cases where no binary operations are visited. + return VisitBinOp; +} + +/// When either \p LHS or \p RHS is an IV, the result of \p CmpOrBinaryOp is +/// considered an IV only if it is an addition or a subtraction. Otherwise the +/// result can be a value that is neither an loop-invariant nor an IV. +/// +/// If both \p LHS and \p RHS are loop-invariants, then the result of +/// \CmpOrBinaryOp is also a loop-invariant. +PhiAnalyzer::PeelCounter +PhiAnalyzer::mergeTwoCounter(const Instruction &CmpOrBinaryOp, + const PeelCounterValue &LHS, + const PeelCounterValue &RHS) const { + auto &[LVal, LTy] = LHS; + auto &[RVal, RTy] = RHS; + unsigned NewVal = std::max(LVal, RVal); + + if (LTy == PeelCounterType::Induction || RTy == PeelCounterType::Induction) { + if (const auto *BinOp = dyn_cast(&CmpOrBinaryOp)) { + if (BinOp->getOpcode() == Instruction::Add || + BinOp->getOpcode() == Instruction::Sub) + return PeelCounter({NewVal, PeelCounterType::Induction}); + } + return Unknown; + } + return PeelCounter({NewVal, PeelCounterType::Invariant}); +} + // This function calculates the number of iterations after which the value // becomes an invariant. The pre-calculated values are memorized in a map. // N.B. This number will be Unknown or <= MaxIterations. @@ -212,25 +341,34 @@ PhiAnalyzer::PeelCounter PhiAnalyzer::calculate(const Value &V) { // If we already know the answer, take it from the map. // Otherwise, place Unknown to map to avoid infinite recursion. Such // cycles can never stop on an invariant. - auto [I, Inserted] = IterationsToInvariance.try_emplace(&V, Unknown); + auto [I, Inserted] = + IterationsToInvarianceOrInduction.try_emplace(&V, Unknown); if (!Inserted) return I->second; if (L.isLoopInvariant(&V)) // Loop invariant so known at start. - return (IterationsToInvariance[&V] = 0); + return (IterationsToInvarianceOrInduction[&V] = + makeZero(PeelCounterType::Invariant)); if (const PHINode *Phi = dyn_cast(&V)) { if (Phi->getParent() != L.getHeader()) { // Phi is not in header block so Unknown. - assert(IterationsToInvariance[&V] == Unknown && "unexpected value saved"); + assert(IterationsToInvarianceOrInduction[&V] == Unknown && + "unexpected value saved"); return Unknown; } + + // If Phi is an induction, register it as a starting point. + if (PeelForIV && isInductionPHI(Phi)) + return (IterationsToInvarianceOrInduction[&V] = + makeZero(PeelCounterType::Induction)); + // We need to analyze the input from the back edge and add 1. Value *Input = Phi->getIncomingValueForBlock(L.getLoopLatch()); PeelCounter Iterations = calculate(*Input); - assert(IterationsToInvariance[Input] == Iterations && + assert(IterationsToInvarianceOrInduction[Input] == Iterations && "unexpected value saved"); - return (IterationsToInvariance[Phi] = addOne(Iterations)); + return (IterationsToInvarianceOrInduction[Phi] = addOne(Iterations)); } if (const Instruction *I = dyn_cast(&V)) { if (isa(I) || I->isBinaryOp()) { @@ -241,26 +379,30 @@ PhiAnalyzer::PeelCounter PhiAnalyzer::calculate(const Value &V) { PeelCounter RHS = calculate(*I->getOperand(1)); if (RHS == Unknown) return Unknown; - return (IterationsToInvariance[I] = {std::max(*LHS, *RHS)}); + return (IterationsToInvarianceOrInduction[I] = + mergeTwoCounter(*I, *LHS, *RHS)); } if (I->isCast()) // Cast instructions get the value of the operand. - return (IterationsToInvariance[I] = calculate(*I->getOperand(0))); + return (IterationsToInvarianceOrInduction[I] = + calculate(*I->getOperand(0))); } // TODO: handle more expressions // Everything else is Unknown. - assert(IterationsToInvariance[&V] == Unknown && "unexpected value saved"); + assert(IterationsToInvarianceOrInduction[&V] == Unknown && + "unexpected value saved"); return Unknown; } std::optional PhiAnalyzer::calculateIterationsToPeel() { unsigned Iterations = 0; for (auto &PHI : L.getHeader()->phis()) { - PeelCounter ToInvariance = calculate(PHI); - if (ToInvariance != Unknown) { - assert(*ToInvariance <= MaxIterations && "bad result in phi analysis"); - Iterations = std::max(Iterations, *ToInvariance); + PeelCounter ToInvarianceOrInduction = calculate(PHI); + if (ToInvarianceOrInduction != Unknown) { + unsigned Val = ToInvarianceOrInduction->first; + assert(Val <= MaxIterations && "bad result in phi analysis"); + Iterations = std::max(Iterations, Val); if (Iterations == MaxIterations) break; } @@ -654,14 +796,15 @@ void llvm::computePeelCount(Loop *L, unsigned LoopSize, // in TTI.getPeelingPreferences or by the flag -unroll-peel-count. unsigned DesiredPeelCount = TargetPeelCount; - // Here we try to get rid of Phis which become invariants after 1, 2, ..., N - // iterations of the loop. For this we compute the number for iterations after - // which every Phi is guaranteed to become an invariant, and try to peel the - // maximum number of iterations among these values, thus turning all those - // Phis into invariants. + // Here we try to get rid of Phis which become invariants or inductions after + // 1, 2, ..., N iterations of the loop. For this we compute the number for + // iterations after which every Phi is guaranteed to become an invariant or an + // induction, and try to peel the maximum number of iterations among these + // values, thus turning all those Phis into invariants or inductions. if (MaxPeelCount > DesiredPeelCount) { // Check how many iterations are useful for resolving Phis - auto NumPeels = PhiAnalyzer(*L, MaxPeelCount).calculateIterationsToPeel(); + auto NumPeels = PhiAnalyzer(*L, MaxPeelCount, EnablePeelingForIV) + .calculateIterationsToPeel(); if (NumPeels) DesiredPeelCount = std::max(DesiredPeelCount, *NumPeels); } @@ -680,7 +823,7 @@ void llvm::computePeelCount(Loop *L, unsigned LoopSize, if (DesiredPeelCount + AlreadyPeeled <= UnrollPeelMaxCount) { LLVM_DEBUG(dbgs() << "Peel " << DesiredPeelCount << " iteration(s) to turn" - << " some Phis into invariants.\n"); + << " some Phis into invariants or inductions.\n"); PP.PeelCount = DesiredPeelCount; PP.PeelProfiledIterations = false; PP.PeelLast = false; diff --git a/llvm/test/Transforms/LoopUnroll/peel-loop-phi-analysis-iv-not-triggered.ll b/llvm/test/Transforms/LoopUnroll/peel-loop-phi-analysis-iv-not-triggered.ll new file mode 100644 index 0000000000000..747175a089a07 --- /dev/null +++ b/llvm/test/Transforms/LoopUnroll/peel-loop-phi-analysis-iv-not-triggered.ll @@ -0,0 +1,150 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt < %s -S -passes=loop-unroll -enable-peeling-for-iv | FileCheck %s +; RUN: opt < %s -S -passes=loop-unroll-full -enable-peeling-for-iv | FileCheck %s + +; Check that unnecessary peeling doesn't occur if for a comparison instruction +; between two instructions. The original code is as below. Both i and j are +; inductions, but the comparison i < j is not an induction. +; +; val = 42; +; for (i=0,j=100; i<10000; i+=2,j+=1) { +; a[i] = val; +; val = i < j; +; } +; +define void @dont_peel_cmp_ind_ind(ptr %a) { +; CHECK-LABEL: define void @dont_peel_cmp_ind_ind( +; CHECK-SAME: ptr [[A:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: br label %[[FOR_BODY:.*]] +; CHECK: [[FOR_BODY]]: +; CHECK-NEXT: [[I:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[I_NEXT:%.*]], %[[FOR_BODY]] ] +; CHECK-NEXT: [[J:%.*]] = phi i32 [ 100, %[[ENTRY]] ], [ [[J_NEXT:%.*]], %[[FOR_BODY]] ] +; CHECK-NEXT: [[VAL:%.*]] = phi i32 [ 42, %[[ENTRY]] ], [ [[VAL_NEXT:%.*]], %[[FOR_BODY]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i32 [[I]] +; CHECK-NEXT: store i32 10, ptr [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[VAL_NEXT_CMP:%.*]] = icmp slt i32 [[I]], [[J]] +; CHECK-NEXT: [[VAL_NEXT]] = zext i1 [[VAL_NEXT_CMP]] to i32 +; CHECK-NEXT: [[I_NEXT]] = add i32 [[I]], 2 +; CHECK-NEXT: [[J_NEXT]] = add i32 [[J]], 1 +; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[I_NEXT]], 10000 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp slt i32 [[I_NEXT]], 10000 +; CHECK-NEXT: br i1 [[EXITCOND]], label %[[FOR_BODY]], label %[[EXIT:.*]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: ret void +; +entry: + br label %for.body + +for.body: + %i = phi i32 [ 0, %entry ], [ %i.next, %for.body ] + %j = phi i32 [ 100, %entry ] , [ %j.next, %for.body ] + %val = phi i32 [ 42, %entry ], [ %val.next, %for.body ] + %arrayidx = getelementptr inbounds nuw i32, ptr %a, i32 %i + store i32 10, ptr %arrayidx, align 4 + %val.next.cmp = icmp slt i32 %i, %j + %val.next = zext i1 %val.next.cmp to i32 + %i.next = add i32 %i, 2 + %j.next = add i32 %j, 1 + %cmp = icmp ne i32 %i.next, 10000 + %exitcond = icmp slt i32 %i.next, 10000 + br i1 %exitcond, label %for.body, label %exit + +exit: + ret void +} + + +; Check that unnecessary peeling doesn't occur if for a bitwise instructions +; between IVs. The original code is as below. The variable i is an induction, +; but vals (val0 through val4) are not. +; +; val0 = 42; +; val1 = 42; +; val2 = 42; +; val3 = 42; +; val4 = 42; +; for (i=0,j=100; i<10000; i+=2,j+=1) { +; a[i] = val0; +; b[i] = val1; +; c[i] = val2; +; d[i] = val3; +; e[i] = val4; +; val0 = i & j; +; val1 = i | j; +; val2 = i ^ j; +; val3 = i >> j; +; val4 = i << j; +; } +; +define void @dont_peel_bitwise_op_iv_iv(ptr %a, ptr %b, ptr %c, ptr %d, ptr %e) { +; CHECK-LABEL: define void @dont_peel_bitwise_op_iv_iv( +; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]], ptr [[C:%.*]], ptr [[D:%.*]], ptr [[E:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: br label %[[FOR_BODY:.*]] +; CHECK: [[FOR_BODY]]: +; CHECK-NEXT: [[I:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[I_NEXT:%.*]], %[[FOR_BODY]] ] +; CHECK-NEXT: [[J:%.*]] = phi i32 [ 100, %[[ENTRY]] ], [ [[J_NEXT:%.*]], %[[FOR_BODY]] ] +; CHECK-NEXT: [[VAL0:%.*]] = phi i32 [ 42, %[[ENTRY]] ], [ [[VAL0_NEXT:%.*]], %[[FOR_BODY]] ] +; CHECK-NEXT: [[VAL1:%.*]] = phi i32 [ 42, %[[ENTRY]] ], [ [[VAL1_NEXT:%.*]], %[[FOR_BODY]] ] +; CHECK-NEXT: [[VAL2:%.*]] = phi i32 [ 42, %[[ENTRY]] ], [ [[VAL2_NEXT:%.*]], %[[FOR_BODY]] ] +; CHECK-NEXT: [[VAL3:%.*]] = phi i32 [ 42, %[[ENTRY]] ], [ [[VAL3_NEXT:%.*]], %[[FOR_BODY]] ] +; CHECK-NEXT: [[VAL4:%.*]] = phi i32 [ 42, %[[ENTRY]] ], [ [[VAL4_NEXT:%.*]], %[[FOR_BODY]] ] +; CHECK-NEXT: [[IDX_0:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i32 [[I]] +; CHECK-NEXT: [[IDX_1:%.*]] = getelementptr inbounds nuw i32, ptr [[B]], i32 [[I]] +; CHECK-NEXT: [[IDX_2:%.*]] = getelementptr inbounds nuw i32, ptr [[C]], i32 [[I]] +; CHECK-NEXT: [[IDX_3:%.*]] = getelementptr inbounds nuw i32, ptr [[D]], i32 [[I]] +; CHECK-NEXT: [[IDX_4:%.*]] = getelementptr inbounds nuw i32, ptr [[E]], i32 [[I]] +; CHECK-NEXT: store i32 [[VAL0]], ptr [[IDX_0]], align 4 +; CHECK-NEXT: store i32 [[VAL1]], ptr [[IDX_1]], align 4 +; CHECK-NEXT: store i32 [[VAL2]], ptr [[IDX_2]], align 4 +; CHECK-NEXT: store i32 [[VAL3]], ptr [[IDX_3]], align 4 +; CHECK-NEXT: store i32 [[VAL4]], ptr [[IDX_4]], align 4 +; CHECK-NEXT: [[VAL0_NEXT]] = and i32 [[I]], [[J]] +; CHECK-NEXT: [[VAL1_NEXT]] = or i32 [[I]], [[J]] +; CHECK-NEXT: [[VAL2_NEXT]] = xor i32 [[I]], [[J]] +; CHECK-NEXT: [[VAL3_NEXT]] = shl i32 [[I]], [[J]] +; CHECK-NEXT: [[VAL4_NEXT]] = lshr i32 [[I]], [[J]] +; CHECK-NEXT: [[I_NEXT]] = add i32 [[I]], 2 +; CHECK-NEXT: [[J_NEXT]] = add i32 [[J]], 1 +; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[I_NEXT]], 10000 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp slt i32 [[I_NEXT]], 10000 +; CHECK-NEXT: br i1 [[EXITCOND]], label %[[FOR_BODY]], label %[[EXIT:.*]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: ret void +; +entry: + br label %for.body + +for.body: + %i = phi i32 [ 0, %entry ], [ %i.next, %for.body ] + %j = phi i32 [ 100, %entry ] , [ %j.next, %for.body ] + %val0 = phi i32 [ 42, %entry ], [ %val0.next, %for.body ] + %val1 = phi i32 [ 42, %entry ], [ %val1.next, %for.body ] + %val2 = phi i32 [ 42, %entry ], [ %val2.next, %for.body ] + %val3 = phi i32 [ 42, %entry ], [ %val3.next, %for.body ] + %val4 = phi i32 [ 42, %entry ], [ %val4.next, %for.body ] + %idx.0 = getelementptr inbounds nuw i32, ptr %a, i32 %i + %idx.1 = getelementptr inbounds nuw i32, ptr %b, i32 %i + %idx.2 = getelementptr inbounds nuw i32, ptr %c, i32 %i + %idx.3 = getelementptr inbounds nuw i32, ptr %d, i32 %i + %idx.4 = getelementptr inbounds nuw i32, ptr %e, i32 %i + store i32 %val0, ptr %idx.0, align 4 + store i32 %val1, ptr %idx.1, align 4 + store i32 %val2, ptr %idx.2, align 4 + store i32 %val3, ptr %idx.3, align 4 + store i32 %val4, ptr %idx.4, align 4 + %val0.next = and i32 %i, %j + %val1.next = or i32 %i, %j + %val2.next = xor i32 %i, %j + %val3.next = shl i32 %i, %j + %val4.next = lshr i32 %i, %j + %i.next = add i32 %i, 2 + %j.next = add i32 %j, 1 + %cmp = icmp ne i32 %i.next, 10000 + %exitcond = icmp slt i32 %i.next, 10000 + br i1 %exitcond, label %for.body, label %exit + +exit: + ret void +} diff --git a/llvm/test/Transforms/LoopUnroll/peel-loop-phi-analysis-iv.ll b/llvm/test/Transforms/LoopUnroll/peel-loop-phi-analysis-iv.ll new file mode 100644 index 0000000000000..4e16bafc65f03 --- /dev/null +++ b/llvm/test/Transforms/LoopUnroll/peel-loop-phi-analysis-iv.ll @@ -0,0 +1,247 @@ +; RUN: opt -passes=loop-unroll -enable-peeling-for-iv -disable-output \ +; RUN: -pass-remarks-output=- %s | FileCheck %s +; RUN: opt -passes=loop-unroll-full -enable-peeling-for-iv -disable-output \ +; RUN: -pass-remarks-output=- %s | FileCheck %s + +; void g(int); +declare void @g(i32) + +; Check that phi analysis can handle a binary operator with an addition or a +; subtraction on loop-invariants or IVs. In the following case, the phis for x, +; y, a, and b become IVs by peeling. +; +; +; void g(int); +; void binary() { +; int x = 0; +; int y = 0; +; int a = 42; +; int b = 314; +; for(int i = 0; i <100000; ++i) { +; g(x); +; g(b); +; x = y; +; y = a + 1; +; a = i - 2; +; b = i + a; +; } +; } +; +; +; Consider the calls to g: +; +; | i | g(x) | g(b) | x | y | a | b | +; ---------------|-----|---------|----------|-----|-----|-----|-------| +; 1st iteration | 0 | g(0) | g(314) | 0 | 43 | -2 | -2 | +; 2nd iteration | 1 | g(0) | g(-2) | 43 | -1 | -1 | 0 | +; 3rd iteration | 2 | g(43) | g(0) | -1 | 0 | 0 | 2 | +; 4th iteration | 3 | g(-1) | g(2) | 0 | 1 | 1 | 4 | +; 5th iteration | 4 | g(0) | g(4) | 1 | 2 | 2 | 6 | +; i-th iteration | i | g(i-5) | g(2*i-4) | i-4 | i-3 | i-2 | 2*i-4 | +; +; After the 4th iteration, the arguments to g become IVs, so peeling 3 times +; converts all PHIs into IVs. +; + +; CHECK: --- !Passed +; CHECK-NEXT: Pass: loop-unroll +; CHECK-NEXT: Name: Peeled +; CHECK-NEXT: Function: binary_induction +; CHECK-NEXT: Args: +; CHECK-NEXT: - String: ' peeled loop by ' +; CHECK-NEXT: - PeelCount: '3' +; CHECK-NEXT: - String: ' iterations' +; CHECK-NEXT: ... +define void @binary_induction() { +entry: + br label %for.body + +exit: + ret void + +for.body: + %i = phi i32 [ 0, %entry ], [ %i.next, %for.body ] + %x = phi i32 [ 0, %entry ], [ %y, %for.body ] + %y = phi i32 [ 0, %entry ], [ %y.next, %for.body ] + %a = phi i32 [ 42, %entry ], [ %a.next, %for.body ] + %b = phi i32 [ 314, %entry ], [ %b.next, %for.body ] + tail call void @g(i32 %x) + tail call void @g(i32 %b) + %i.next = add i32 %i, 1 + %y.next = add i32 %a, 1 + %a.next = sub i32 %i, 2 + %b.next = add i32 %i, %a + %cmp = icmp ne i32 %i.next, 100000 + br i1 %cmp, label %for.body, label %exit +} + +; Check that peeling works fine in the following case. This is based on TSVC +; s291, where peeling 1 time makes the variable im an IV so we can vectorize +; the loop. +; +; #define N 100 +; void f(float * restrict a, float * restrict b) { +; int im = N - 1; +; for (int i = 0; i < N; i++) { +; a[i] = b[i] + b[im]; +; im = i; +; } +; } +; + +; CHECK: --- !Passed +; CHECK-NEXT: Pass: loop-unroll +; CHECK-NEXT: Name: Peeled +; CHECK-NEXT: Function: tsvc_s291 +; CHECK-NEXT: Args: +; CHECK-NEXT: - String: ' peeled loop by ' +; CHECK-NEXT: - PeelCount: '1' +; CHECK-NEXT: - String: ' iterations' +; CHECK-NEXT: ... +define void @tsvc_s291(ptr noalias %a, ptr noalias %b) { +entry: + br label %for.body + +for.body: + %i = phi i32 [0, %entry], [ %i.next, %for.body ] + %im = phi i32 [ 99, %entry ], [ %i, %for.body ] + %a.idx = getelementptr inbounds float, ptr %a, i32 %i + %b.idx.0 = getelementptr inbounds float, ptr %b, i32 %i + %b.idx.1 = getelementptr inbounds float, ptr %b, i32 %im + %lhs = load float, ptr %b.idx.0, align 4 + %rhs = load float, ptr %b.idx.1, align 4 + %sum = fadd float %lhs, %rhs + store float %sum, ptr %a.idx, align 4 + %i.next = add i32 %i, 1 + %cmp = icmp ne i32 %i.next, 100 + br i1 %cmp, label %for.body, label %exit + +exit: + ret void +} + +; Check that the unnecessary peeling occurs in the following case. The cause is +; that the analyzer determines a casted IV as a non-IV. +; +; for (unsigned int i=0; i<10000; i++) +; a[(unsigned long)j] = 10; +; + +; CHECK: --- !Passed +; CHECK-NEXT: Pass: loop-unroll +; CHECK-NEXT: Name: Peeled +; CHECK-NEXT: Function: induction_undesirable_peel1 +; CHECK-NEXT: Args: +; CHECK-NEXT: - String: ' peeled loop by ' +; CHECK-NEXT: - PeelCount: '1' +; CHECK-NEXT: - String: ' iterations' +; CHECK-NEXT: ... +define void @induction_undesirable_peel1(ptr %a) { +entry: + br label %for.body + +for.body: + %conv = phi i64 [ 0, %entry ], [ %conv.next, %for.body ] + %iv = phi i32 [ 0, %entry ], [ %iv.next, %for.body ] + %arrayidx = getelementptr inbounds nuw i32, ptr %a, i64 %conv + store i32 10, ptr %arrayidx, align 4 + %iv.next = add nsw nuw i32 %iv, 1 + %conv.next = zext i32 %iv.next to i64 + %cmp = icmp ugt i64 10000, %conv.next + br i1 %cmp, label %for.body, label %exit + +exit: + ret void +} + +; Check that the unnecessary peeling occurs in the following case. The analyzer +; cannot detect that the difference between the initial value of %i and %j is +; equal to the step value of the %i. +; +; int j = 0; +; for (int i=1; i