Skip to content

Commit e33012b

Browse files
fixup! do not use CSAState
1 parent 1628bb0 commit e33012b

File tree

8 files changed

+220
-301
lines changed

8 files changed

+220
-301
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h

+2-2
Original file line numberDiff line numberDiff line change
@@ -237,7 +237,7 @@ class VPBuilder {
237237
}
238238

239239
VPInstruction *createAnyActive(VPValue *Cond, DebugLoc DL,
240-
const Twine &Name) {
240+
const Twine &Name) {
241241
return createInstruction(VPInstruction::AnyActive, {Cond}, DL, Name);
242242
}
243243

@@ -249,7 +249,7 @@ class VPBuilder {
249249
}
250250

251251
VPInstruction *createAnyActiveEVL(VPValue *Cond, VPValue *EVL, DebugLoc DL,
252-
const Twine &Name) {
252+
const Twine &Name) {
253253
return createInstruction(VPInstruction::AnyActiveEVL, {Cond, EVL}, DL,
254254
Name);
255255
}

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

+49-87
Original file line numberDiff line numberDiff line change
@@ -2973,7 +2973,6 @@ void InnerLoopVectorizer::fixVectorizedLoop(VPTransformState &State) {
29732973
for (const auto &Entry : Legal->getInductionVars())
29742974
fixupIVUsers(Entry.first, Entry.second,
29752975
getOrCreateVectorTripCount(nullptr), LoopMiddleBlock, State);
2976-
fixCSALiveOuts(State, Plan);
29772976
}
29782977

29792978
for (Instruction *PI : PredicatedInstructions)
@@ -8731,13 +8730,18 @@ VPRecipeBuilder::tryToCreateWidenRecipe(Instruction *Instr,
87318730
// directly, enabling more efficient codegen.
87328731
PhiRecipe = new VPFirstOrderRecurrencePHIRecipe(Phi, *StartV);
87338732
} else if (Legal->isCSAPhi(Phi)) {
8734-
VPCSAState *State = Plan.getCSAStates().find(Phi)->second;
8735-
VPValue *InitData = State->getVPInitData();
8733+
VPValue *InitScalar = Plan.getOrAddLiveIn(
8734+
Phi->getIncomingValueForBlock(OrigLoop->getLoopPreheader()));
8735+
8736+
// Don't build full CSA for VF=ElementCount::getFixed(1)
8737+
bool IsScalarVF = LoopVectorizationPlanner::getDecisionAndClampRange(
8738+
[&](ElementCount VF) { return VF.isScalar(); }, Range);
8739+
87368740
// When the VF=getFixed(1), InitData is just InitScalar.
8737-
if (!InitData)
8738-
InitData = State->getVPInitScalar();
8741+
VPValue *InitData =
8742+
IsScalarVF ? InitScalar
8743+
: getVPValueOrAddLiveIn(PoisonValue::get(Phi->getType()));
87398744
PhiRecipe = new VPCSAHeaderPHIRecipe(Phi, InitData);
8740-
State->setPhiRecipe(cast<VPCSAHeaderPHIRecipe>(PhiRecipe));
87418745
} else {
87428746
llvm_unreachable(
87438747
"can only widen reductions, fixed-order recurrences, and CSAs here");
@@ -8778,13 +8782,17 @@ VPRecipeBuilder::tryToCreateWidenRecipe(Instruction *Instr,
87788782
return CSADescriptor::isCSASelect(CSA.second, SI);
87798783
});
87808784
if (CSADescIt != Legal->getCSAs().end()) {
8781-
PHINode *CSAPhi = CSADescIt->first;
8782-
VPCSAState *State = Plan.getCSAStates().find(CSAPhi)->second;
8783-
VPValue *VPDataPhi = State->getPhiRecipe();
8784-
auto *R = new VPCSADataUpdateRecipe(
8785-
SI, {VPDataPhi, Operands[0], Operands[1], Operands[2]});
8786-
State->setDataUpdate(R);
8787-
return R;
8785+
for (VPRecipeBase &R :
8786+
Plan.getVectorLoopRegion()->getEntryBasicBlock()->phis()) {
8787+
if (auto PhiR = dyn_cast<VPCSAHeaderPHIRecipe>(&R)) {
8788+
if (PhiR->getUnderlyingInstr() == CSADescIt->first) {
8789+
auto *R = new VPCSADataUpdateRecipe(
8790+
SI, {PhiR, Operands[0], Operands[1], Operands[2]});
8791+
PhiR->setDataUpdate(R);
8792+
return R;
8793+
}
8794+
}
8795+
}
87888796
}
87898797

87908798
return new VPWidenSelectRecipe(
@@ -8799,44 +8807,6 @@ VPRecipeBuilder::tryToCreateWidenRecipe(Instruction *Instr,
87998807
return tryToWiden(Instr, Operands, VPBB);
88008808
}
88018809

8802-
/// Add CSA Recipes that can occur before each instruction in the input IR
8803-
/// is processed and introduced into VPlan.
8804-
static void
8805-
addCSAPreprocessRecipes(const LoopVectorizationLegality::CSAList &CSAs,
8806-
Loop *OrigLoop, VPBasicBlock *PreheaderVPBB,
8807-
VPBasicBlock *HeaderVPBB, DebugLoc DL, VFRange &Range,
8808-
VPlan &Plan, VPRecipeBuilder &Builder) {
8809-
8810-
// Don't build full CSA for VF=ElementCount::getFixed(1)
8811-
bool IsScalarVF = LoopVectorizationPlanner::getDecisionAndClampRange(
8812-
[&](ElementCount VF) { return VF.isScalar(); }, Range);
8813-
8814-
for (const auto &CSA : CSAs) {
8815-
VPValue *VPInitScalar = Plan.getOrAddLiveIn(
8816-
CSA.first->getIncomingValueForBlock(OrigLoop->getLoopPreheader()));
8817-
8818-
// Scalar VF builds the scalar version of the loop. In that case,
8819-
// no maintenence of mask nor extraction in middle block is needed.
8820-
if (IsScalarVF) {
8821-
VPCSAState *S = new VPCSAState(VPInitScalar);
8822-
Plan.addCSAState(CSA.first, S);
8823-
continue;
8824-
}
8825-
8826-
VPBuilder PHB(PreheaderVPBB);
8827-
auto *VPInitMask = Builder.getVPValueOrAddLiveIn(
8828-
ConstantInt::getFalse(Type::getInt1Ty(CSA.first->getContext())));
8829-
auto *VPInitData =
8830-
Builder.getVPValueOrAddLiveIn(PoisonValue::get(CSA.first->getType()));
8831-
8832-
VPBuilder HB(HeaderVPBB);
8833-
auto *VPMaskPhi = HB.createCSAMaskPhi(VPInitMask, DL, "csa.mask.phi");
8834-
8835-
auto *S = new VPCSAState(VPInitScalar, VPInitData, VPMaskPhi);
8836-
Plan.addCSAState(CSA.first, S);
8837-
}
8838-
}
8839-
88408810
/// Add CSA Recipes that must occur after each instruction in the input IR
88418811
/// is processed and introduced into VPlan.
88428812
static void
@@ -8849,60 +8819,57 @@ addCSAPostprocessRecipes(VPRecipeBuilder &RecipeBuilder,
88498819
[&](ElementCount VF) { return VF.isScalar(); }, Range))
88508820
return;
88518821

8822+
VPBasicBlock *Header = Plan.getVectorLoopRegion()->getEntryBasicBlock();
88528823
for (const auto &CSA : CSAs) {
8853-
VPCSAState *CSAState = Plan.getCSAStates().find(CSA.first)->second;
8854-
VPCSADataUpdateRecipe *VPDataUpdate = CSAState->getDataUpdate();
8824+
// Build the MaskPhi recipe.
8825+
auto *VPInitMask = RecipeBuilder.getVPValueOrAddLiveIn(
8826+
ConstantInt::getFalse(Type::getInt1Ty(CSA.first->getContext())));
8827+
VPBuilder B;
8828+
B.setInsertPoint(Header, Header->getFirstNonPhi());
8829+
auto *VPMaskPhi = B.createCSAMaskPhi(VPInitMask, DL, "csa.mask.phi");
8830+
B.clearInsertionPoint();
88558831

8856-
assert(VPDataUpdate &&
8857-
"VPDataUpdate must have been introduced prior to postprocess");
8858-
assert(CSA.second.getCond() &&
8859-
"CSADescriptor must know how to describe the condition");
88608832
auto GetVPValue = [&](Value *I) {
88618833
return RecipeBuilder.getRecipe(cast<Instruction>(I))->getVPSingleValue();
88628834
};
8863-
VPValue *WidenedCond = GetVPValue(CSA.second.getCond());
8864-
VPValue *VPInitScalar = CSAState->getVPInitScalar();
8835+
VPCSADataUpdateRecipe *VPDataUpdate = cast<VPCSADataUpdateRecipe>(
8836+
cast<VPCSAHeaderPHIRecipe>(GetVPValue(CSA.first))->getVPNewData());
88658837

88668838
// The CSA optimization wants to use a condition such that when it is
88678839
// true, a new value is assigned. However, it is possible that a true lane
88688840
// in WidenedCond corresponds to selection of the initial value instead.
88698841
// In that case, we must use the negation of WidenedCond.
88708842
// i.e. select cond new_val old_val versus select cond.not old_val new_val
8843+
assert(CSA.second.getCond() &&
8844+
"CSADescriptor must know how to describe the condition");
8845+
VPValue *WidenedCond = GetVPValue(CSA.second.getCond());
88718846
VPValue *CondToUse = WidenedCond;
8872-
VPBuilder B;
88738847
if (cast<SelectInst>(CSA.second.getAssignment())->getTrueValue() ==
88748848
CSA.first) {
88758849
auto *VPNotCond = B.createNot(WidenedCond, DL);
8876-
VPNotCond->insertBefore(
8877-
GetVPValue(CSA.second.getAssignment())->getDefiningRecipe());
8850+
VPNotCond->insertBefore(VPDataUpdate);
88788851
CondToUse = VPNotCond;
88798852
}
88808853

8881-
auto *VPAnyActive =
8882-
B.createAnyActive(CondToUse, DL, "csa.cond.anyactive");
8883-
VPAnyActive->insertBefore(
8884-
GetVPValue(CSA.second.getAssignment())->getDefiningRecipe());
8854+
auto *VPAnyActive = B.createAnyActive(CondToUse, DL, "csa.cond.anyactive");
8855+
VPAnyActive->insertBefore(VPDataUpdate);
88858856

8886-
auto *VPMaskSel = B.createCSAMaskSel(CondToUse, CSAState->getVPMaskPhi(),
8887-
VPAnyActive, DL, "csa.mask.sel");
8857+
auto *VPMaskSel = B.createCSAMaskSel(CondToUse, VPMaskPhi, VPAnyActive, DL,
8858+
"csa.mask.sel");
88888859
VPMaskSel->insertAfter(VPAnyActive);
8860+
88898861
VPDataUpdate->setVPNewMaskAndVPAnyActive(VPMaskSel, VPAnyActive);
8862+
VPValue *VPInitScalar = Plan.getOrAddLiveIn(
8863+
CSA.first->getIncomingValueForBlock(OrigLoop->getLoopPreheader()));
8864+
SmallVector<PHINode *> PhiToFix;
8865+
for (User *U : VPDataUpdate->getUnderlyingValue()->users())
8866+
if (auto *Phi = dyn_cast<PHINode>(U);
8867+
Phi && Phi->getParent() == OrigLoop->getUniqueExitBlock())
8868+
PhiToFix.emplace_back(Phi);
88908869
VPCSAExtractScalarRecipe *ExtractScalarRecipe =
8891-
new VPCSAExtractScalarRecipe({VPInitScalar, VPMaskSel, VPDataUpdate});
8892-
8870+
new VPCSAExtractScalarRecipe({VPInitScalar, VPMaskSel, VPDataUpdate},
8871+
PhiToFix);
88938872
MiddleVPBB->insert(ExtractScalarRecipe, MiddleVPBB->getFirstNonPhi());
8894-
8895-
// Update CSAState with new recipes
8896-
CSAState->setExtractScalarRecipe(ExtractScalarRecipe);
8897-
CSAState->setVPAnyActive(VPAnyActive);
8898-
8899-
// Add live out for the CSA. We should be in LCSSA, so we are looking for
8900-
// Phi users in the unique exit block of the original updated value.
8901-
BasicBlock *OrigExit = OrigLoop->getUniqueExitBlock();
8902-
assert(OrigExit && "Expected a single exit block");
8903-
for (User *U :VPDataUpdate->getUnderlyingValue()->users())
8904-
if (auto *Phi = dyn_cast<PHINode>(U); Phi && Phi->getParent() == OrigExit)
8905-
Plan.addLiveOut(Phi, ExtractScalarRecipe);
89068873
}
89078874
}
89088875

@@ -9224,11 +9191,6 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {
92249191

92259192
VPRecipeBuilder RecipeBuilder(*Plan, OrigLoop, TLI, Legal, CM, PSE, Builder);
92269193

9227-
addCSAPreprocessRecipes(Legal->getCSAs(), OrigLoop, Plan->getPreheader(),
9228-
Plan->getVectorLoopRegion()->getEntryBasicBlock(), DL,
9229-
Range, *Plan, RecipeBuilder);
9230-
9231-
92329194
// ---------------------------------------------------------------------------
92339195
// Pre-construction: record ingredients whose recipes we'll need to further
92349196
// process after constructing the initial VPlan.

llvm/lib/Transforms/Vectorize/VPlan.cpp

-3
Original file line numberDiff line numberDiff line change
@@ -838,9 +838,6 @@ VPlan::~VPlan() {
838838
delete VPV;
839839
if (BackedgeTakenCount)
840840
delete BackedgeTakenCount;
841-
842-
for (std::pair<PHINode *, VPCSAState *> &S : CSAStates)
843-
delete S.second;
844841
}
845842

846843
VPIRBasicBlock *VPIRBasicBlock::fromBasicBlock(BasicBlock *IRBB) {

llvm/lib/Transforms/Vectorize/VPlan.h

+13-62
Original file line numberDiff line numberDiff line change
@@ -231,53 +231,6 @@ class VPLane {
231231
}
232232
};
233233

234-
class VPInstruction;
235-
class VPCSAHeaderPHIRecipe;
236-
class VPCSADataUpdateRecipe;
237-
class VPCSAExtractScalarRecipe;
238-
239-
/// VPCSAState holds information required to vectorize a conditional scalar
240-
/// assignment.
241-
class VPCSAState {
242-
VPValue *VPInitScalar = nullptr;
243-
VPValue *VPInitData = nullptr;
244-
VPInstruction *VPMaskPhi = nullptr;
245-
VPInstruction *VPAnyActive = nullptr;
246-
VPCSAHeaderPHIRecipe *VPPhiRecipe = nullptr;
247-
VPCSADataUpdateRecipe *VPDataUpdate = nullptr;
248-
VPCSAExtractScalarRecipe *VPExtractScalar = nullptr;
249-
250-
public:
251-
VPCSAState(VPValue *VPInitScalar, VPValue *InitData,
252-
VPInstruction *MaskPhi)
253-
: VPInitScalar(VPInitScalar), VPInitData(InitData), VPMaskPhi(MaskPhi) {}
254-
255-
VPCSAState(VPValue *VPInitScalar) : VPInitScalar(VPInitScalar) {}
256-
257-
VPValue *getVPInitScalar() const { return VPInitScalar; }
258-
259-
VPValue *getVPInitData() const { return VPInitData; }
260-
261-
VPInstruction *getVPMaskPhi() const { return VPMaskPhi; }
262-
263-
void setVPAnyActive(VPInstruction *AnyActive) { VPAnyActive = AnyActive; }
264-
VPInstruction *getVPAnyActive() { return VPAnyActive; }
265-
266-
VPCSAHeaderPHIRecipe *getPhiRecipe() const { return VPPhiRecipe; }
267-
268-
void setPhiRecipe(VPCSAHeaderPHIRecipe *R) { VPPhiRecipe = R; }
269-
270-
VPCSADataUpdateRecipe *getDataUpdate() const { return VPDataUpdate; }
271-
void setDataUpdate(VPCSADataUpdateRecipe *R) { VPDataUpdate = R; }
272-
273-
void setExtractScalarRecipe(VPCSAExtractScalarRecipe *R) {
274-
VPExtractScalar = R;
275-
}
276-
VPCSAExtractScalarRecipe *getExtractScalarRecipe() const {
277-
return VPExtractScalar;
278-
}
279-
};
280-
281234
/// VPTransformState holds information passed down when "executing" a VPlan,
282235
/// needed for generating the output IR.
283236
struct VPTransformState {
@@ -2940,7 +2893,10 @@ class VPCSAHeaderPHIRecipe final : public VPHeaderPHIRecipe {
29402893
}
29412894

29422895
VPValue *getVPInitData() { return getOperand(0); }
2943-
VPValue *getVPNewData() { return getOperand(1); }
2896+
2897+
VPValue *NewData = nullptr;
2898+
void setDataUpdate(VPValue *V) { NewData = V; }
2899+
VPValue *getVPNewData() { return NewData; }
29442900
};
29452901

29462902
class VPCSADataUpdateRecipe final : public VPSingleDefRecipe {
@@ -2994,15 +2950,19 @@ class VPCSADataUpdateRecipe final : public VPSingleDefRecipe {
29942950
};
29952951

29962952
class VPCSAExtractScalarRecipe final : public VPSingleDefRecipe {
2953+
SmallVector<PHINode *> PhisToFix;
2954+
29972955
public:
2998-
VPCSAExtractScalarRecipe(ArrayRef<VPValue *> Operands)
2999-
: VPSingleDefRecipe(VPDef::VPCSAExtractScalarSC, Operands) {}
2956+
VPCSAExtractScalarRecipe(ArrayRef<VPValue *> Operands,
2957+
SmallVector<PHINode *> PhisToFix)
2958+
: VPSingleDefRecipe(VPDef::VPCSAExtractScalarSC, Operands),
2959+
PhisToFix(PhisToFix) {}
30002960

30012961
~VPCSAExtractScalarRecipe() override = default;
30022962

30032963
VPCSAExtractScalarRecipe *clone() override {
30042964
SmallVector<VPValue *> Ops(operands());
3005-
return new VPCSAExtractScalarRecipe(Ops);
2965+
return new VPCSAExtractScalarRecipe(Ops, PhisToFix);
30062966
}
30072967

30082968
void execute(VPTransformState &State) override;
@@ -3016,6 +2976,8 @@ class VPCSAExtractScalarRecipe final : public VPSingleDefRecipe {
30162976
VPSlotTracker &SlotTracker) const override;
30172977
#endif
30182978

2979+
VP_CLASSOF_IMPL(VPDef::VPCSAExtractScalarSC)
2980+
30192981
VPValue *getVPInitScalar() const { return getOperand(0); }
30202982
VPValue *getVPMaskSel() const { return getOperand(1); }
30212983
VPValue *getVPDataSel() const { return getOperand(2); }
@@ -4002,11 +3964,6 @@ class VPlan {
40023964
/// definitions are VPValues that hold a pointer to their underlying IR.
40033965
SmallVector<VPValue *, 16> VPLiveInsToFree;
40043966

4005-
/// Values used outside the plan. It contains live-outs that need fixing. Any
4006-
/// live-out that is fixed outside VPlan needs to be removed. The remaining
4007-
/// live-outs are fixed via VPLiveOut::fixPhi.
4008-
MapVector<PHINode *, VPLiveOut *> LiveOuts;
4009-
40103967
/// Mapping from SCEVs to the VPValues representing their expansions.
40113968
/// NOTE: This mapping is temporary and will be removed once all users have
40123969
/// been modeled in VPlan directly.
@@ -4057,12 +4014,6 @@ class VPlan {
40574014
bool RequiresScalarEpilogueCheck,
40584015
bool TailFolded, Loop *TheLoop);
40594016

4060-
void addCSAState(PHINode *Phi, VPCSAState *S) { CSAStates.insert({Phi, S}); }
4061-
4062-
MapVector<PHINode *, VPCSAState *> const &getCSAStates() const {
4063-
return CSAStates;
4064-
}
4065-
40664017
/// Prepare the plan for execution, setting up the required live-in values.
40674018
void prepareToExecute(Value *TripCount, Value *VectorTripCount,
40684019
Value *CanonicalIVStartValue, VPTransformState &State);

llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp

+6-2
Original file line numberDiff line numberDiff line change
@@ -672,8 +672,7 @@ Value *VPInstruction::generate(VPTransformState &State) {
672672
ConstantInt::get(WidenedCond->getType()->getScalarType(), 0);
673673
Value *AnyActive = State.Builder.CreateIntrinsic(
674674
WidenedCond->getType()->getScalarType(), Intrinsic::vp_reduce_or,
675-
{StartValue, WidenedCond, AllOnesMask, EVL}, nullptr,
676-
"any.active");
675+
{StartValue, WidenedCond, AllOnesMask, EVL}, nullptr, "any.active");
677676
return AnyActive;
678677
}
679678
case VPInstruction::CSAVLPhi: {
@@ -2509,6 +2508,7 @@ void VPCSADataUpdateRecipe::execute(VPTransformState &State) {
25092508
"csa.data.sel");
25102509

25112510
DataPhi->addIncoming(DataSel, State.CFG.PrevBB);
2511+
25122512
State.set(this, DataSel);
25132513
}
25142514

@@ -2599,6 +2599,10 @@ void VPCSAExtractScalarRecipe::execute(VPTransformState &State) {
25992599
Value *LastIdxGEZero = State.Builder.CreateICmpSGE(LastIdx, Zero);
26002600
Value *ChooseFromVecOrInit =
26012601
State.Builder.CreateSelect(LastIdxGEZero, ExtractFromVec, InitScalar);
2602+
2603+
for (PHINode *Phi : PhisToFix)
2604+
Phi->addIncoming(ChooseFromVecOrInit, State.CFG.ExitBB);
2605+
26022606
State.set(this, ChooseFromVecOrInit, /*IsScalar=*/true);
26032607
}
26042608

0 commit comments

Comments
 (0)