Skip to content

Commit fc2debe

Browse files
committed
[SelectionDAG][X86] Remove unused elements from atomic vector.
After splitting, all elements are created. The two components must be found by looking at the upper and lower half of EXTRACT_ELEMENT. This change extends EltsFromConsecutiveLoads to understand AtomicSDNode so that unused elements can be removed. commit-id:b83937a8
1 parent db5b862 commit fc2debe

File tree

6 files changed

+83
-187
lines changed

6 files changed

+83
-187
lines changed

llvm/include/llvm/CodeGen/SelectionDAG.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1843,7 +1843,7 @@ class SelectionDAG {
18431843
/// chain to the token factor. This ensures that the new memory node will have
18441844
/// the same relative memory dependency position as the old load. Returns the
18451845
/// new merged load chain.
1846-
SDValue makeEquivalentMemoryOrdering(LoadSDNode *OldLoad, SDValue NewMemOp);
1846+
SDValue makeEquivalentMemoryOrdering(MemSDNode *OldLoad, SDValue NewMemOp);
18471847

18481848
/// Topological-sort the AllNodes list and a
18491849
/// assign a unique node id for each node in the DAG based on their
@@ -2281,7 +2281,7 @@ class SelectionDAG {
22812281
/// merged. Check that both are nonvolatile and if LD is loading
22822282
/// 'Bytes' bytes from a location that is 'Dist' units away from the
22832283
/// location that the 'Base' load is loading from.
2284-
bool areNonVolatileConsecutiveLoads(LoadSDNode *LD, LoadSDNode *Base,
2284+
bool areNonVolatileConsecutiveLoads(MemSDNode *LD, MemSDNode *Base,
22852285
unsigned Bytes, int Dist) const;
22862286

22872287
/// Infer alignment of a load / store address. Return std::nullopt if it

llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp

Lines changed: 12 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -12218,7 +12218,7 @@ SDValue SelectionDAG::makeEquivalentMemoryOrdering(SDValue OldChain,
1221812218
return TokenFactor;
1221912219
}
1222012220

12221-
SDValue SelectionDAG::makeEquivalentMemoryOrdering(LoadSDNode *OldLoad,
12221+
SDValue SelectionDAG::makeEquivalentMemoryOrdering(MemSDNode *OldLoad,
1222212222
SDValue NewMemOp) {
1222312223
assert(isa<MemSDNode>(NewMemOp.getNode()) && "Expected a memop node");
1222412224
SDValue OldChain = SDValue(OldLoad, 1);
@@ -12911,17 +12911,21 @@ std::pair<SDValue, SDValue> SelectionDAG::UnrollVectorOverflowOp(
1291112911
getBuildVector(NewOvVT, dl, OvScalars));
1291212912
}
1291312913

12914-
bool SelectionDAG::areNonVolatileConsecutiveLoads(LoadSDNode *LD,
12915-
LoadSDNode *Base,
12914+
bool SelectionDAG::areNonVolatileConsecutiveLoads(MemSDNode *LD,
12915+
MemSDNode *Base,
1291612916
unsigned Bytes,
1291712917
int Dist) const {
1291812918
if (LD->isVolatile() || Base->isVolatile())
1291912919
return false;
12920-
// TODO: probably too restrictive for atomics, revisit
12921-
if (!LD->isSimple())
12922-
return false;
12923-
if (LD->isIndexed() || Base->isIndexed())
12924-
return false;
12920+
if (auto Ld = dyn_cast<LoadSDNode>(LD)) {
12921+
if (!Ld->isSimple())
12922+
return false;
12923+
if (Ld->isIndexed())
12924+
return false;
12925+
}
12926+
if (auto Ld = dyn_cast<LoadSDNode>(Base))
12927+
if (Ld->isIndexed())
12928+
return false;
1292512929
if (LD->getChain() != Base->getChain())
1292612930
return false;
1292712931
EVT VT = LD->getMemoryVT();

llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp

Lines changed: 17 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -195,8 +195,8 @@ bool BaseIndexOffset::contains(const SelectionDAG &DAG, int64_t BitSize,
195195
}
196196

197197
/// Parses tree in Ptr for base, index, offset addresses.
198-
static BaseIndexOffset matchLSNode(const LSBaseSDNode *N,
199-
const SelectionDAG &DAG) {
198+
template <typename T>
199+
static BaseIndexOffset matchSDNode(const T *N, const SelectionDAG &DAG) {
200200
SDValue Ptr = N->getBasePtr();
201201

202202
// (((B + I*M) + c)) + c ...
@@ -206,16 +206,18 @@ static BaseIndexOffset matchLSNode(const LSBaseSDNode *N,
206206
bool IsIndexSignExt = false;
207207

208208
// pre-inc/pre-dec ops are components of EA.
209-
if (N->getAddressingMode() == ISD::PRE_INC) {
210-
if (auto *C = dyn_cast<ConstantSDNode>(N->getOffset()))
211-
Offset += C->getSExtValue();
212-
else // If unknown, give up now.
213-
return BaseIndexOffset(SDValue(), SDValue(), 0, false);
214-
} else if (N->getAddressingMode() == ISD::PRE_DEC) {
215-
if (auto *C = dyn_cast<ConstantSDNode>(N->getOffset()))
216-
Offset -= C->getSExtValue();
217-
else // If unknown, give up now.
218-
return BaseIndexOffset(SDValue(), SDValue(), 0, false);
209+
if constexpr (std::is_same_v<T, LSBaseSDNode>) {
210+
if (N->getAddressingMode() == ISD::PRE_INC) {
211+
if (auto *C = dyn_cast<ConstantSDNode>(N->getOffset()))
212+
Offset += C->getSExtValue();
213+
else // If unknown, give up now.
214+
return BaseIndexOffset(SDValue(), SDValue(), 0, false);
215+
} else if (N->getAddressingMode() == ISD::PRE_DEC) {
216+
if (auto *C = dyn_cast<ConstantSDNode>(N->getOffset()))
217+
Offset -= C->getSExtValue();
218+
else // If unknown, give up now.
219+
return BaseIndexOffset(SDValue(), SDValue(), 0, false);
220+
}
219221
}
220222

221223
// Consume constant adds & ors with appropriate masking.
@@ -300,8 +302,10 @@ static BaseIndexOffset matchLSNode(const LSBaseSDNode *N,
300302

301303
BaseIndexOffset BaseIndexOffset::match(const SDNode *N,
302304
const SelectionDAG &DAG) {
305+
if (const auto *AN = dyn_cast<AtomicSDNode>(N))
306+
return matchSDNode(AN, DAG);
303307
if (const auto *LS0 = dyn_cast<LSBaseSDNode>(N))
304-
return matchLSNode(LS0, DAG);
308+
return matchSDNode(LS0, DAG);
305309
if (const auto *LN = dyn_cast<LifetimeSDNode>(N)) {
306310
if (LN->hasOffset())
307311
return BaseIndexOffset(LN->getOperand(1), SDValue(), LN->getOffset(),

llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5172,7 +5172,11 @@ void SelectionDAGBuilder::visitAtomicLoad(const LoadInst &I) {
51725172
L = DAG.getPtrExtOrTrunc(L, dl, VT);
51735173

51745174
setValue(&I, L);
5175-
DAG.setRoot(OutChain);
5175+
5176+
if (VT.isVector())
5177+
DAG.setRoot(InChain);
5178+
else
5179+
DAG.setRoot(OutChain);
51765180
}
51775181

51785182
void SelectionDAGBuilder::visitAtomicStore(const StoreInst &I) {

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 31 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -7191,15 +7191,19 @@ static SDValue LowerAsSplatVectorLoad(SDValue SrcOp, MVT VT, const SDLoc &dl,
71917191
}
71927192

71937193
// Recurse to find a LoadSDNode source and the accumulated ByteOffest.
7194-
static bool findEltLoadSrc(SDValue Elt, LoadSDNode *&Ld, int64_t &ByteOffset) {
7195-
if (ISD::isNON_EXTLoad(Elt.getNode())) {
7196-
auto *BaseLd = cast<LoadSDNode>(Elt);
7197-
if (!BaseLd->isSimple())
7198-
return false;
7194+
static bool findEltLoadSrc(SDValue Elt, MemSDNode *&Ld, int64_t &ByteOffset) {
7195+
if (auto *BaseLd = dyn_cast<AtomicSDNode>(Elt)) {
71997196
Ld = BaseLd;
72007197
ByteOffset = 0;
72017198
return true;
7202-
}
7199+
} else if (auto *BaseLd = dyn_cast<LoadSDNode>(Elt))
7200+
if (ISD::isNON_EXTLoad(Elt.getNode())) {
7201+
if (!BaseLd->isSimple())
7202+
return false;
7203+
Ld = BaseLd;
7204+
ByteOffset = 0;
7205+
return true;
7206+
}
72037207

72047208
switch (Elt.getOpcode()) {
72057209
case ISD::BITCAST:
@@ -7228,6 +7232,20 @@ static bool findEltLoadSrc(SDValue Elt, LoadSDNode *&Ld, int64_t &ByteOffset) {
72287232
}
72297233
}
72307234
break;
7235+
case ISD::EXTRACT_ELEMENT:
7236+
if (auto *IdxC = dyn_cast<ConstantSDNode>(Elt.getOperand(1))) {
7237+
SDValue Src = Elt.getOperand(0);
7238+
unsigned SrcSizeInBits = Src.getScalarValueSizeInBits();
7239+
unsigned DstSizeInBits = Elt.getScalarValueSizeInBits();
7240+
if (2 * DstSizeInBits == SrcSizeInBits && (SrcSizeInBits % 8) == 0 &&
7241+
findEltLoadSrc(Src, Ld, ByteOffset)) {
7242+
uint64_t Idx = IdxC->getZExtValue();
7243+
if (Idx == 1) // Get the upper half.
7244+
ByteOffset += SrcSizeInBits / (1 << 2 * 2);
7245+
return true;
7246+
}
7247+
}
7248+
break;
72317249
}
72327250

72337251
return false;
@@ -7252,7 +7270,7 @@ static SDValue EltsFromConsecutiveLoads(EVT VT, ArrayRef<SDValue> Elts,
72527270
APInt ZeroMask = APInt::getZero(NumElems);
72537271
APInt UndefMask = APInt::getZero(NumElems);
72547272

7255-
SmallVector<LoadSDNode*, 8> Loads(NumElems, nullptr);
7273+
SmallVector<MemSDNode *, 8> Loads(NumElems, nullptr);
72567274
SmallVector<int64_t, 8> ByteOffsets(NumElems, 0);
72577275

72587276
// For each element in the initializer, see if we've found a load, zero or an
@@ -7302,7 +7320,7 @@ static SDValue EltsFromConsecutiveLoads(EVT VT, ArrayRef<SDValue> Elts,
73027320
EVT EltBaseVT = EltBase.getValueType();
73037321
assert(EltBaseVT.getSizeInBits() == EltBaseVT.getStoreSizeInBits() &&
73047322
"Register/Memory size mismatch");
7305-
LoadSDNode *LDBase = Loads[FirstLoadedElt];
7323+
MemSDNode *LDBase = Loads[FirstLoadedElt];
73067324
assert(LDBase && "Did not find base load for merging consecutive loads");
73077325
unsigned BaseSizeInBits = EltBaseVT.getStoreSizeInBits();
73087326
unsigned BaseSizeInBytes = BaseSizeInBits / 8;
@@ -7316,8 +7334,8 @@ static SDValue EltsFromConsecutiveLoads(EVT VT, ArrayRef<SDValue> Elts,
73167334

73177335
// Check to see if the element's load is consecutive to the base load
73187336
// or offset from a previous (already checked) load.
7319-
auto CheckConsecutiveLoad = [&](LoadSDNode *Base, int EltIdx) {
7320-
LoadSDNode *Ld = Loads[EltIdx];
7337+
auto CheckConsecutiveLoad = [&](MemSDNode *Base, int EltIdx) {
7338+
MemSDNode *Ld = Loads[EltIdx];
73217339
int64_t ByteOffset = ByteOffsets[EltIdx];
73227340
if (ByteOffset && (ByteOffset % BaseSizeInBytes) == 0) {
73237341
int64_t BaseIdx = EltIdx - (ByteOffset / BaseSizeInBytes);
@@ -7345,7 +7363,7 @@ static SDValue EltsFromConsecutiveLoads(EVT VT, ArrayRef<SDValue> Elts,
73457363
}
73467364
}
73477365

7348-
auto CreateLoad = [&DAG, &DL, &Loads](EVT VT, LoadSDNode *LDBase) {
7366+
auto CreateLoad = [&DAG, &DL, &Loads](EVT VT, MemSDNode *LDBase) {
73497367
auto MMOFlags = LDBase->getMemOperand()->getFlags();
73507368
assert(LDBase->isSimple() &&
73517369
"Cannot merge volatile or atomic loads.");
@@ -9402,8 +9420,9 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
94029420
{
94039421
SmallVector<SDValue, 64> Ops(Op->ops().take_front(NumElems));
94049422
if (SDValue LD =
9405-
EltsFromConsecutiveLoads(VT, Ops, dl, DAG, Subtarget, false))
9423+
EltsFromConsecutiveLoads(VT, Ops, dl, DAG, Subtarget, false)) {
94069424
return LD;
9425+
}
94079426
}
94089427

94099428
// If this is a splat of pairs of 32-bit elements, we can use a narrower

0 commit comments

Comments
 (0)