Skip to content

Commit e55606a

Browse files
fixup! do not use CSAState
1 parent 9cccaa3 commit e55606a

File tree

8 files changed

+220
-301
lines changed

8 files changed

+220
-301
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h

+2-2
Original file line numberDiff line numberDiff line change
@@ -237,7 +237,7 @@ class VPBuilder {
237237
}
238238

239239
VPInstruction *createAnyActive(VPValue *Cond, DebugLoc DL,
240-
const Twine &Name) {
240+
const Twine &Name) {
241241
return createInstruction(VPInstruction::AnyActive, {Cond}, DL, Name);
242242
}
243243

@@ -249,7 +249,7 @@ class VPBuilder {
249249
}
250250

251251
VPInstruction *createAnyActiveEVL(VPValue *Cond, VPValue *EVL, DebugLoc DL,
252-
const Twine &Name) {
252+
const Twine &Name) {
253253
return createInstruction(VPInstruction::AnyActiveEVL, {Cond, EVL}, DL,
254254
Name);
255255
}

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

+49-87
Original file line numberDiff line numberDiff line change
@@ -2971,7 +2971,6 @@ void InnerLoopVectorizer::fixVectorizedLoop(VPTransformState &State) {
29712971
fixupIVUsers(Entry.first, Entry.second,
29722972
getOrCreateVectorTripCount(nullptr),
29732973
IVEndValues[Entry.first], LoopMiddleBlock, State);
2974-
IVEndValues[Entry.first], LoopMiddleBlock, Plan, State);
29752974
}
29762975

29772976
for (Instruction *PI : PredicatedInstructions)
@@ -8705,13 +8704,18 @@ VPRecipeBuilder::tryToCreateWidenRecipe(Instruction *Instr,
87058704
// directly, enabling more efficient codegen.
87068705
PhiRecipe = new VPFirstOrderRecurrencePHIRecipe(Phi, *StartV);
87078706
} else if (Legal->isCSAPhi(Phi)) {
8708-
VPCSAState *State = Plan.getCSAStates().find(Phi)->second;
8709-
VPValue *InitData = State->getVPInitData();
8707+
VPValue *InitScalar = Plan.getOrAddLiveIn(
8708+
Phi->getIncomingValueForBlock(OrigLoop->getLoopPreheader()));
8709+
8710+
// Don't build full CSA for VF=ElementCount::getFixed(1)
8711+
bool IsScalarVF = LoopVectorizationPlanner::getDecisionAndClampRange(
8712+
[&](ElementCount VF) { return VF.isScalar(); }, Range);
8713+
87108714
// When the VF=getFixed(1), InitData is just InitScalar.
8711-
if (!InitData)
8712-
InitData = State->getVPInitScalar();
8715+
VPValue *InitData =
8716+
IsScalarVF ? InitScalar
8717+
: getVPValueOrAddLiveIn(PoisonValue::get(Phi->getType()));
87138718
PhiRecipe = new VPCSAHeaderPHIRecipe(Phi, InitData);
8714-
State->setPhiRecipe(cast<VPCSAHeaderPHIRecipe>(PhiRecipe));
87158719
} else {
87168720
llvm_unreachable(
87178721
"can only widen reductions, fixed-order recurrences, and CSAs here");
@@ -8752,13 +8756,17 @@ VPRecipeBuilder::tryToCreateWidenRecipe(Instruction *Instr,
87528756
return CSADescriptor::isCSASelect(CSA.second, SI);
87538757
});
87548758
if (CSADescIt != Legal->getCSAs().end()) {
8755-
PHINode *CSAPhi = CSADescIt->first;
8756-
VPCSAState *State = Plan.getCSAStates().find(CSAPhi)->second;
8757-
VPValue *VPDataPhi = State->getPhiRecipe();
8758-
auto *R = new VPCSADataUpdateRecipe(
8759-
SI, {VPDataPhi, Operands[0], Operands[1], Operands[2]});
8760-
State->setDataUpdate(R);
8761-
return R;
8759+
for (VPRecipeBase &R :
8760+
Plan.getVectorLoopRegion()->getEntryBasicBlock()->phis()) {
8761+
if (auto PhiR = dyn_cast<VPCSAHeaderPHIRecipe>(&R)) {
8762+
if (PhiR->getUnderlyingInstr() == CSADescIt->first) {
8763+
auto *R = new VPCSADataUpdateRecipe(
8764+
SI, {PhiR, Operands[0], Operands[1], Operands[2]});
8765+
PhiR->setDataUpdate(R);
8766+
return R;
8767+
}
8768+
}
8769+
}
87628770
}
87638771

87648772
return new VPWidenSelectRecipe(
@@ -8773,44 +8781,6 @@ VPRecipeBuilder::tryToCreateWidenRecipe(Instruction *Instr,
87738781
return tryToWiden(Instr, Operands, VPBB);
87748782
}
87758783

8776-
/// Add CSA Recipes that can occur before each instruction in the input IR
8777-
/// is processed and introduced into VPlan.
8778-
static void
8779-
addCSAPreprocessRecipes(const LoopVectorizationLegality::CSAList &CSAs,
8780-
Loop *OrigLoop, VPBasicBlock *PreheaderVPBB,
8781-
VPBasicBlock *HeaderVPBB, DebugLoc DL, VFRange &Range,
8782-
VPlan &Plan, VPRecipeBuilder &Builder) {
8783-
8784-
// Don't build full CSA for VF=ElementCount::getFixed(1)
8785-
bool IsScalarVF = LoopVectorizationPlanner::getDecisionAndClampRange(
8786-
[&](ElementCount VF) { return VF.isScalar(); }, Range);
8787-
8788-
for (const auto &CSA : CSAs) {
8789-
VPValue *VPInitScalar = Plan.getOrAddLiveIn(
8790-
CSA.first->getIncomingValueForBlock(OrigLoop->getLoopPreheader()));
8791-
8792-
// Scalar VF builds the scalar version of the loop. In that case,
8793-
// no maintenence of mask nor extraction in middle block is needed.
8794-
if (IsScalarVF) {
8795-
VPCSAState *S = new VPCSAState(VPInitScalar);
8796-
Plan.addCSAState(CSA.first, S);
8797-
continue;
8798-
}
8799-
8800-
VPBuilder PHB(PreheaderVPBB);
8801-
auto *VPInitMask = Builder.getVPValueOrAddLiveIn(
8802-
ConstantInt::getFalse(Type::getInt1Ty(CSA.first->getContext())));
8803-
auto *VPInitData =
8804-
Builder.getVPValueOrAddLiveIn(PoisonValue::get(CSA.first->getType()));
8805-
8806-
VPBuilder HB(HeaderVPBB);
8807-
auto *VPMaskPhi = HB.createCSAMaskPhi(VPInitMask, DL, "csa.mask.phi");
8808-
8809-
auto *S = new VPCSAState(VPInitScalar, VPInitData, VPMaskPhi);
8810-
Plan.addCSAState(CSA.first, S);
8811-
}
8812-
}
8813-
88148784
/// Add CSA Recipes that must occur after each instruction in the input IR
88158785
/// is processed and introduced into VPlan.
88168786
static void
@@ -8823,60 +8793,57 @@ addCSAPostprocessRecipes(VPRecipeBuilder &RecipeBuilder,
88238793
[&](ElementCount VF) { return VF.isScalar(); }, Range))
88248794
return;
88258795

8796+
VPBasicBlock *Header = Plan.getVectorLoopRegion()->getEntryBasicBlock();
88268797
for (const auto &CSA : CSAs) {
8827-
VPCSAState *CSAState = Plan.getCSAStates().find(CSA.first)->second;
8828-
VPCSADataUpdateRecipe *VPDataUpdate = CSAState->getDataUpdate();
8798+
// Build the MaskPhi recipe.
8799+
auto *VPInitMask = RecipeBuilder.getVPValueOrAddLiveIn(
8800+
ConstantInt::getFalse(Type::getInt1Ty(CSA.first->getContext())));
8801+
VPBuilder B;
8802+
B.setInsertPoint(Header, Header->getFirstNonPhi());
8803+
auto *VPMaskPhi = B.createCSAMaskPhi(VPInitMask, DL, "csa.mask.phi");
8804+
B.clearInsertionPoint();
88298805

8830-
assert(VPDataUpdate &&
8831-
"VPDataUpdate must have been introduced prior to postprocess");
8832-
assert(CSA.second.getCond() &&
8833-
"CSADescriptor must know how to describe the condition");
88348806
auto GetVPValue = [&](Value *I) {
88358807
return RecipeBuilder.getRecipe(cast<Instruction>(I))->getVPSingleValue();
88368808
};
8837-
VPValue *WidenedCond = GetVPValue(CSA.second.getCond());
8838-
VPValue *VPInitScalar = CSAState->getVPInitScalar();
8809+
VPCSADataUpdateRecipe *VPDataUpdate = cast<VPCSADataUpdateRecipe>(
8810+
cast<VPCSAHeaderPHIRecipe>(GetVPValue(CSA.first))->getVPNewData());
88398811

88408812
// The CSA optimization wants to use a condition such that when it is
88418813
// true, a new value is assigned. However, it is possible that a true lane
88428814
// in WidenedCond corresponds to selection of the initial value instead.
88438815
// In that case, we must use the negation of WidenedCond.
88448816
// i.e. select cond new_val old_val versus select cond.not old_val new_val
8817+
assert(CSA.second.getCond() &&
8818+
"CSADescriptor must know how to describe the condition");
8819+
VPValue *WidenedCond = GetVPValue(CSA.second.getCond());
88458820
VPValue *CondToUse = WidenedCond;
8846-
VPBuilder B;
88478821
if (cast<SelectInst>(CSA.second.getAssignment())->getTrueValue() ==
88488822
CSA.first) {
88498823
auto *VPNotCond = B.createNot(WidenedCond, DL);
8850-
VPNotCond->insertBefore(
8851-
GetVPValue(CSA.second.getAssignment())->getDefiningRecipe());
8824+
VPNotCond->insertBefore(VPDataUpdate);
88528825
CondToUse = VPNotCond;
88538826
}
88548827

8855-
auto *VPAnyActive =
8856-
B.createAnyActive(CondToUse, DL, "csa.cond.anyactive");
8857-
VPAnyActive->insertBefore(
8858-
GetVPValue(CSA.second.getAssignment())->getDefiningRecipe());
8828+
auto *VPAnyActive = B.createAnyActive(CondToUse, DL, "csa.cond.anyactive");
8829+
VPAnyActive->insertBefore(VPDataUpdate);
88598830

8860-
auto *VPMaskSel = B.createCSAMaskSel(CondToUse, CSAState->getVPMaskPhi(),
8861-
VPAnyActive, DL, "csa.mask.sel");
8831+
auto *VPMaskSel = B.createCSAMaskSel(CondToUse, VPMaskPhi, VPAnyActive, DL,
8832+
"csa.mask.sel");
88628833
VPMaskSel->insertAfter(VPAnyActive);
8834+
88638835
VPDataUpdate->setVPNewMaskAndVPAnyActive(VPMaskSel, VPAnyActive);
8836+
VPValue *VPInitScalar = Plan.getOrAddLiveIn(
8837+
CSA.first->getIncomingValueForBlock(OrigLoop->getLoopPreheader()));
8838+
SmallVector<PHINode *> PhiToFix;
8839+
for (User *U : VPDataUpdate->getUnderlyingValue()->users())
8840+
if (auto *Phi = dyn_cast<PHINode>(U);
8841+
Phi && Phi->getParent() == OrigLoop->getUniqueExitBlock())
8842+
PhiToFix.emplace_back(Phi);
88648843
VPCSAExtractScalarRecipe *ExtractScalarRecipe =
8865-
new VPCSAExtractScalarRecipe({VPInitScalar, VPMaskSel, VPDataUpdate});
8866-
8844+
new VPCSAExtractScalarRecipe({VPInitScalar, VPMaskSel, VPDataUpdate},
8845+
PhiToFix);
88678846
MiddleVPBB->insert(ExtractScalarRecipe, MiddleVPBB->getFirstNonPhi());
8868-
8869-
// Update CSAState with new recipes
8870-
CSAState->setExtractScalarRecipe(ExtractScalarRecipe);
8871-
CSAState->setVPAnyActive(VPAnyActive);
8872-
8873-
// Add live out for the CSA. We should be in LCSSA, so we are looking for
8874-
// Phi users in the unique exit block of the original updated value.
8875-
BasicBlock *OrigExit = OrigLoop->getUniqueExitBlock();
8876-
assert(OrigExit && "Expected a single exit block");
8877-
for (User *U :VPDataUpdate->getUnderlyingValue()->users())
8878-
if (auto *Phi = dyn_cast<PHINode>(U); Phi && Phi->getParent() == OrigExit)
8879-
Plan.addLiveOut(Phi, ExtractScalarRecipe);
88808847
}
88818848
}
88828849

@@ -9194,11 +9161,6 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {
91949161

91959162
VPRecipeBuilder RecipeBuilder(*Plan, OrigLoop, TLI, Legal, CM, PSE, Builder);
91969163

9197-
addCSAPreprocessRecipes(Legal->getCSAs(), OrigLoop, Plan->getPreheader(),
9198-
Plan->getVectorLoopRegion()->getEntryBasicBlock(), DL,
9199-
Range, *Plan, RecipeBuilder);
9200-
9201-
92029164
// ---------------------------------------------------------------------------
92039165
// Pre-construction: record ingredients whose recipes we'll need to further
92049166
// process after constructing the initial VPlan.

llvm/lib/Transforms/Vectorize/VPlan.cpp

-3
Original file line numberDiff line numberDiff line change
@@ -856,9 +856,6 @@ VPlan::~VPlan() {
856856
delete VPV;
857857
if (BackedgeTakenCount)
858858
delete BackedgeTakenCount;
859-
860-
for (std::pair<PHINode *, VPCSAState *> &S : CSAStates)
861-
delete S.second;
862859
}
863860

864861
VPIRBasicBlock *VPIRBasicBlock::fromBasicBlock(BasicBlock *IRBB) {

llvm/lib/Transforms/Vectorize/VPlan.h

+13-62
Original file line numberDiff line numberDiff line change
@@ -231,53 +231,6 @@ class VPLane {
231231
}
232232
};
233233

234-
class VPInstruction;
235-
class VPCSAHeaderPHIRecipe;
236-
class VPCSADataUpdateRecipe;
237-
class VPCSAExtractScalarRecipe;
238-
239-
/// VPCSAState holds information required to vectorize a conditional scalar
240-
/// assignment.
241-
class VPCSAState {
242-
VPValue *VPInitScalar = nullptr;
243-
VPValue *VPInitData = nullptr;
244-
VPInstruction *VPMaskPhi = nullptr;
245-
VPInstruction *VPAnyActive = nullptr;
246-
VPCSAHeaderPHIRecipe *VPPhiRecipe = nullptr;
247-
VPCSADataUpdateRecipe *VPDataUpdate = nullptr;
248-
VPCSAExtractScalarRecipe *VPExtractScalar = nullptr;
249-
250-
public:
251-
VPCSAState(VPValue *VPInitScalar, VPValue *InitData,
252-
VPInstruction *MaskPhi)
253-
: VPInitScalar(VPInitScalar), VPInitData(InitData), VPMaskPhi(MaskPhi) {}
254-
255-
VPCSAState(VPValue *VPInitScalar) : VPInitScalar(VPInitScalar) {}
256-
257-
VPValue *getVPInitScalar() const { return VPInitScalar; }
258-
259-
VPValue *getVPInitData() const { return VPInitData; }
260-
261-
VPInstruction *getVPMaskPhi() const { return VPMaskPhi; }
262-
263-
void setVPAnyActive(VPInstruction *AnyActive) { VPAnyActive = AnyActive; }
264-
VPInstruction *getVPAnyActive() { return VPAnyActive; }
265-
266-
VPCSAHeaderPHIRecipe *getPhiRecipe() const { return VPPhiRecipe; }
267-
268-
void setPhiRecipe(VPCSAHeaderPHIRecipe *R) { VPPhiRecipe = R; }
269-
270-
VPCSADataUpdateRecipe *getDataUpdate() const { return VPDataUpdate; }
271-
void setDataUpdate(VPCSADataUpdateRecipe *R) { VPDataUpdate = R; }
272-
273-
void setExtractScalarRecipe(VPCSAExtractScalarRecipe *R) {
274-
VPExtractScalar = R;
275-
}
276-
VPCSAExtractScalarRecipe *getExtractScalarRecipe() const {
277-
return VPExtractScalar;
278-
}
279-
};
280-
281234
/// VPTransformState holds information passed down when "executing" a VPlan,
282235
/// needed for generating the output IR.
283236
struct VPTransformState {
@@ -2893,7 +2846,10 @@ class VPCSAHeaderPHIRecipe final : public VPHeaderPHIRecipe {
28932846
}
28942847

28952848
VPValue *getVPInitData() { return getOperand(0); }
2896-
VPValue *getVPNewData() { return getOperand(1); }
2849+
2850+
VPValue *NewData = nullptr;
2851+
void setDataUpdate(VPValue *V) { NewData = V; }
2852+
VPValue *getVPNewData() { return NewData; }
28972853
};
28982854

28992855
class VPCSADataUpdateRecipe final : public VPSingleDefRecipe {
@@ -2947,15 +2903,19 @@ class VPCSADataUpdateRecipe final : public VPSingleDefRecipe {
29472903
};
29482904

29492905
class VPCSAExtractScalarRecipe final : public VPSingleDefRecipe {
2906+
SmallVector<PHINode *> PhisToFix;
2907+
29502908
public:
2951-
VPCSAExtractScalarRecipe(ArrayRef<VPValue *> Operands)
2952-
: VPSingleDefRecipe(VPDef::VPCSAExtractScalarSC, Operands) {}
2909+
VPCSAExtractScalarRecipe(ArrayRef<VPValue *> Operands,
2910+
SmallVector<PHINode *> PhisToFix)
2911+
: VPSingleDefRecipe(VPDef::VPCSAExtractScalarSC, Operands),
2912+
PhisToFix(PhisToFix) {}
29532913

29542914
~VPCSAExtractScalarRecipe() override = default;
29552915

29562916
VPCSAExtractScalarRecipe *clone() override {
29572917
SmallVector<VPValue *> Ops(operands());
2958-
return new VPCSAExtractScalarRecipe(Ops);
2918+
return new VPCSAExtractScalarRecipe(Ops, PhisToFix);
29592919
}
29602920

29612921
void execute(VPTransformState &State) override;
@@ -2969,6 +2929,8 @@ class VPCSAExtractScalarRecipe final : public VPSingleDefRecipe {
29692929
VPSlotTracker &SlotTracker) const override;
29702930
#endif
29712931

2932+
VP_CLASSOF_IMPL(VPDef::VPCSAExtractScalarSC)
2933+
29722934
VPValue *getVPInitScalar() const { return getOperand(0); }
29732935
VPValue *getVPMaskSel() const { return getOperand(1); }
29742936
VPValue *getVPDataSel() const { return getOperand(2); }
@@ -3948,11 +3910,6 @@ class VPlan {
39483910
/// definitions are VPValues that hold a pointer to their underlying IR.
39493911
SmallVector<VPValue *, 16> VPLiveInsToFree;
39503912

3951-
/// Values used outside the plan. It contains live-outs that need fixing. Any
3952-
/// live-out that is fixed outside VPlan needs to be removed. The remaining
3953-
/// live-outs are fixed via VPLiveOut::fixPhi.
3954-
MapVector<PHINode *, VPLiveOut *> LiveOuts;
3955-
39563913
/// Mapping from SCEVs to the VPValues representing their expansions.
39573914
/// NOTE: This mapping is temporary and will be removed once all users have
39583915
/// been modeled in VPlan directly.
@@ -4003,12 +3960,6 @@ class VPlan {
40033960
bool RequiresScalarEpilogueCheck,
40043961
bool TailFolded, Loop *TheLoop);
40053962

4006-
void addCSAState(PHINode *Phi, VPCSAState *S) { CSAStates.insert({Phi, S}); }
4007-
4008-
MapVector<PHINode *, VPCSAState *> const &getCSAStates() const {
4009-
return CSAStates;
4010-
}
4011-
40123963
/// Prepare the plan for execution, setting up the required live-in values.
40133964
void prepareToExecute(Value *TripCount, Value *VectorTripCount,
40143965
Value *CanonicalIVStartValue, VPTransformState &State);

llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp

+6-2
Original file line numberDiff line numberDiff line change
@@ -672,8 +672,7 @@ Value *VPInstruction::generate(VPTransformState &State) {
672672
ConstantInt::get(WidenedCond->getType()->getScalarType(), 0);
673673
Value *AnyActive = State.Builder.CreateIntrinsic(
674674
WidenedCond->getType()->getScalarType(), Intrinsic::vp_reduce_or,
675-
{StartValue, WidenedCond, AllOnesMask, EVL}, nullptr,
676-
"any.active");
675+
{StartValue, WidenedCond, AllOnesMask, EVL}, nullptr, "any.active");
677676
return AnyActive;
678677
}
679678
case VPInstruction::CSAVLPhi: {
@@ -2502,6 +2501,7 @@ void VPCSADataUpdateRecipe::execute(VPTransformState &State) {
25022501
"csa.data.sel");
25032502

25042503
DataPhi->addIncoming(DataSel, State.CFG.PrevBB);
2504+
25052505
State.set(this, DataSel);
25062506
}
25072507

@@ -2592,6 +2592,10 @@ void VPCSAExtractScalarRecipe::execute(VPTransformState &State) {
25922592
Value *LastIdxGEZero = State.Builder.CreateICmpSGE(LastIdx, Zero);
25932593
Value *ChooseFromVecOrInit =
25942594
State.Builder.CreateSelect(LastIdxGEZero, ExtractFromVec, InitScalar);
2595+
2596+
for (PHINode *Phi : PhisToFix)
2597+
Phi->addIncoming(ChooseFromVecOrInit, State.CFG.ExitBB);
2598+
25952599
State.set(this, ChooseFromVecOrInit, /*IsScalar=*/true);
25962600
}
25972601

0 commit comments

Comments
 (0)