@@ -83,6 +83,10 @@ static cl::opt<bool> EnableHistogramVectorization(
83
83
" enable-histogram-loop-vectorization" , cl::init(false ), cl::Hidden,
84
84
cl::desc(" Enables autovectorization of some loops containing histograms" ));
85
85
86
+ static cl::opt<bool >
87
+ EnableCSA (" enable-csa-vectorization" , cl::init(false ), cl::Hidden,
88
+ cl::desc(" Control whether CSA loop vectorization is enabled" ));
89
+
86
90
// / Maximum vectorization interleave count.
87
91
static const unsigned MaxInterleaveFactor = 16 ;
88
92
@@ -750,6 +754,15 @@ bool LoopVectorizationLegality::setupOuterLoopInductions() {
750
754
return llvm::all_of (Header->phis (), IsSupportedPhi);
751
755
}
752
756
757
+ void LoopVectorizationLegality::addCSAPhi (
758
+ PHINode *Phi, const CSADescriptor &CSADesc,
759
+ SmallPtrSetImpl<Value *> &AllowedExit) {
760
+ assert (CSADesc.isValid () && " Expected Valid CSADescriptor" );
761
+ LLVM_DEBUG (dbgs () << " LV: found legal CSA opportunity" << *Phi << " \n " );
762
+ AllowedExit.insert (Phi);
763
+ CSAs.insert ({Phi, CSADesc});
764
+ }
765
+
753
766
// / Checks if a function is scalarizable according to the TLI, in
754
767
// / the sense that it should be vectorized and then expanded in
755
768
// / multiple scalar calls. This is represented in the
@@ -867,14 +880,23 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
867
880
continue ;
868
881
}
869
882
870
- // As a last resort, coerce the PHI to a AddRec expression
871
- // and re-try classifying it a an induction PHI.
883
+ // Try to coerce the PHI to a AddRec expression and re-try classifying
884
+ // it a an induction PHI.
872
885
if (InductionDescriptor::isInductionPHI (Phi, TheLoop, PSE, ID, true ) &&
873
886
!IsDisallowedStridedPointerInduction (ID)) {
874
887
addInductionPhi (Phi, ID, AllowedExit);
875
888
continue ;
876
889
}
877
890
891
+ // Check if the PHI can be classified as a CSA PHI.
892
+ if (EnableCSA || (TTI->enableCSAVectorization () &&
893
+ EnableCSA.getNumOccurrences () == 0 )) {
894
+ if (auto CSADesc = CSADescriptor::isCSAPhi (Phi, TheLoop)) {
895
+ addCSAPhi (Phi, CSADesc, AllowedExit);
896
+ continue ;
897
+ }
898
+ }
899
+
878
900
reportVectorizationFailure (" Found an unidentified PHI" ,
879
901
" value that could not be identified as "
880
902
" reduction is used outside the loop" ,
@@ -1858,11 +1880,15 @@ bool LoopVectorizationLegality::canFoldTailByMasking() const {
1858
1880
for (const auto &Reduction : getReductionVars ())
1859
1881
ReductionLiveOuts.insert (Reduction.second .getLoopExitInstr ());
1860
1882
1883
+ SmallPtrSet<const Value *, 8 > CSALiveOuts;
1884
+ for (const auto &CSA : getCSAs ())
1885
+ CSALiveOuts.insert (CSA.second .getAssignment ());
1886
+
1861
1887
// TODO: handle non-reduction outside users when tail is folded by masking.
1862
1888
for (auto *AE : AllowedExit) {
1863
1889
// Check that all users of allowed exit values are inside the loop or
1864
- // are the live-out of a reduction.
1865
- if (ReductionLiveOuts.count (AE))
1890
+ // are the live-out of a reduction or a CSA
1891
+ if (ReductionLiveOuts.count (AE) || CSALiveOuts. count (AE) )
1866
1892
continue ;
1867
1893
for (User *U : AE->users ()) {
1868
1894
Instruction *UI = cast<Instruction>(U);
0 commit comments