-
Notifications
You must be signed in to change notification settings - Fork 12.4k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[SelectionDAG][X86] Split via Concat <n x T> vector types for atomic load #120640
base: users/jofrn/spr/main/2894ccd1
Are you sure you want to change the base?
Conversation
@llvm/pr-subscribers-llvm-selectiondag @llvm/pr-subscribers-backend-x86 Author: None (jofrn) Changes
Stack:
Full diff: https://github.com/llvm/llvm-project/pull/120640.diff 3 Files Affected:
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
index 3b3dddc44e3682..e0cd7319ac034b 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -946,6 +946,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer {
void SplitVecRes_FPOp_MultiType(SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitVecRes_IS_FPCLASS(SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo, SDValue &Hi);
+ void SplitVecRes_ATOMIC_LOAD(AtomicSDNode *LD, SDValue &Lo, SDValue &Hi);
void SplitVecRes_LOAD(LoadSDNode *LD, SDValue &Lo, SDValue &Hi);
void SplitVecRes_VP_LOAD(VPLoadSDNode *LD, SDValue &Lo, SDValue &Hi);
void SplitVecRes_VP_STRIDED_LOAD(VPStridedLoadSDNode *SLD, SDValue &Lo,
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index 7c4caa96244b8b..44adc3fdb4a5a7 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -1146,6 +1146,9 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
SplitVecRes_STEP_VECTOR(N, Lo, Hi);
break;
case ISD::SIGN_EXTEND_INREG: SplitVecRes_InregOp(N, Lo, Hi); break;
+ case ISD::ATOMIC_LOAD:
+ SplitVecRes_ATOMIC_LOAD(cast<AtomicSDNode>(N), Lo, Hi);
+ break;
case ISD::LOAD:
SplitVecRes_LOAD(cast<LoadSDNode>(N), Lo, Hi);
break;
@@ -2079,6 +2082,38 @@ void DAGTypeLegalizer::SplitVecRes_VP_SPLAT(SDNode *N, SDValue &Lo,
Hi = DAG.getNode(N->getOpcode(), dl, HiVT, N->getOperand(0), MaskHi, EVLHi);
}
+void DAGTypeLegalizer::SplitVecRes_ATOMIC_LOAD(AtomicSDNode *LD, SDValue &Lo,
+ SDValue &Hi) {
+ EVT LoVT, HiVT;
+ SDLoc dl(LD);
+ std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(LD->getValueType(0));
+
+ SDValue Ch = LD->getChain();
+ SDValue Ptr = LD->getBasePtr();
+ EVT MemoryVT = LD->getMemoryVT();
+
+ EVT LoMemVT, HiMemVT;
+ std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
+
+ Lo = DAG.getAtomic(ISD::ATOMIC_LOAD, dl, LoMemVT, LoMemVT, Ch, Ptr,
+ LD->getMemOperand());
+
+ MachinePointerInfo MPI;
+ IncrementPointer(LD, LoMemVT, MPI, Ptr);
+
+ Hi = DAG.getAtomic(ISD::ATOMIC_LOAD, dl, HiMemVT, HiMemVT, Ch, Ptr,
+ LD->getMemOperand());
+
+ // Build a factor node to remember that this load is independent of the
+ // other one.
+ Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),
+ Hi.getValue(1));
+
+ // Legalize the chain result - switch anything that used the old chain to
+ // use the new one.
+ ReplaceValueWith(SDValue(LD, 1), Ch);
+}
+
void DAGTypeLegalizer::SplitVecRes_LOAD(LoadSDNode *LD, SDValue &Lo,
SDValue &Hi) {
assert(ISD::isUNINDEXEDLoad(LD) && "Indexed load during type legalization!");
diff --git a/llvm/test/CodeGen/X86/atomic-load-store.ll b/llvm/test/CodeGen/X86/atomic-load-store.ll
index ba1bc4d98537d1..302a94aa9c1f60 100644
--- a/llvm/test/CodeGen/X86/atomic-load-store.ll
+++ b/llvm/test/CodeGen/X86/atomic-load-store.ll
@@ -176,6 +176,62 @@ define <2 x float> @atomic_vec2_float_align(ptr %x) nounwind {
ret <2 x float> %ret
}
+define <2 x half> @atomic_vec2_half(ptr %x) nounwind {
+; CHECK3-LABEL: atomic_vec2_half:
+; CHECK3: ## %bb.0:
+; CHECK3-NEXT: movzwl (%rdi), %eax
+; CHECK3-NEXT: movzwl 2(%rdi), %ecx
+; CHECK3-NEXT: pinsrw $0, %eax, %xmm0
+; CHECK3-NEXT: pinsrw $0, %ecx, %xmm1
+; CHECK3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; CHECK3-NEXT: retq
+;
+; CHECK0-LABEL: atomic_vec2_half:
+; CHECK0: ## %bb.0:
+; CHECK0-NEXT: movw (%rdi), %dx
+; CHECK0-NEXT: movw 2(%rdi), %cx
+; CHECK0-NEXT: ## implicit-def: $eax
+; CHECK0-NEXT: movw %dx, %ax
+; CHECK0-NEXT: ## implicit-def: $xmm0
+; CHECK0-NEXT: pinsrw $0, %eax, %xmm0
+; CHECK0-NEXT: ## implicit-def: $eax
+; CHECK0-NEXT: movw %cx, %ax
+; CHECK0-NEXT: ## implicit-def: $xmm1
+; CHECK0-NEXT: pinsrw $0, %eax, %xmm1
+; CHECK0-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; CHECK0-NEXT: retq
+ %ret = load atomic <2 x half>, ptr %x acquire, align 4
+ ret <2 x half> %ret
+}
+
+define <2 x bfloat> @atomic_vec2_bfloat(ptr %x) nounwind {
+; CHECK3-LABEL: atomic_vec2_bfloat:
+; CHECK3: ## %bb.0:
+; CHECK3-NEXT: movzwl (%rdi), %eax
+; CHECK3-NEXT: movzwl 2(%rdi), %ecx
+; CHECK3-NEXT: pinsrw $0, %ecx, %xmm1
+; CHECK3-NEXT: pinsrw $0, %eax, %xmm0
+; CHECK3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; CHECK3-NEXT: retq
+;
+; CHECK0-LABEL: atomic_vec2_bfloat:
+; CHECK0: ## %bb.0:
+; CHECK0-NEXT: movw (%rdi), %cx
+; CHECK0-NEXT: movw 2(%rdi), %dx
+; CHECK0-NEXT: ## implicit-def: $eax
+; CHECK0-NEXT: movw %dx, %ax
+; CHECK0-NEXT: ## implicit-def: $xmm1
+; CHECK0-NEXT: pinsrw $0, %eax, %xmm1
+; CHECK0-NEXT: ## implicit-def: $eax
+; CHECK0-NEXT: movw %cx, %ax
+; CHECK0-NEXT: ## implicit-def: $xmm0
+; CHECK0-NEXT: pinsrw $0, %eax, %xmm0
+; CHECK0-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; CHECK0-NEXT: retq
+ %ret = load atomic <2 x bfloat>, ptr %x acquire, align 4
+ ret <2 x bfloat> %ret
+}
+
define <1 x ptr> @atomic_vec1_ptr(ptr %x) nounwind {
; CHECK3-LABEL: atomic_vec1_ptr:
; CHECK3: ## %bb.0:
|
8ed9199
to
c9bdb95
Compare
b2f0b33
to
6737dda
Compare
c9bdb95
to
94a71a3
Compare
6737dda
to
2949391
Compare
94a71a3
to
34df4f7
Compare
34df4f7
to
761d4d9
Compare
2949391
to
78adf01
Compare
6506acb
to
db674f8
Compare
@@ -1146,6 +1146,9 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) { | |||
SplitVecRes_STEP_VECTOR(N, Lo, Hi); | |||
break; | |||
case ISD::SIGN_EXTEND_INREG: SplitVecRes_InregOp(N, Lo, Hi); break; | |||
case ISD::ATOMIC_LOAD: | |||
SplitVecRes_ATOMIC_LOAD(cast<AtomicSDNode>(N), Lo, Hi); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is not a valid legalization. This should be coerced to an equivalent width integer as a default action
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
After trying to coerce them via addRegisterClass
and setOperationPromotedToType
, it changes the what the DAG can match on, for example a non-atomic load with type <2 x half>
is affected. To avoid having to rewrite a bunch of patterns, it now builds a vector out of the elements that are split.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I don't understand the comment. This is unrelated to the set of legal types or legalization actions. There is no need to touch any patterns
db674f8
to
13ea377
Compare
23c9ff2
to
2c51f72
Compare
13ea377
to
e11194d
Compare
2c51f72
to
3a82883
Compare
3a82883
to
36161df
Compare
e11194d
to
3be4fa0
Compare
You can test this locally with the following command:git-clang-format --diff 04f0fd9229a40ce8b3ee556bcce0486fa6bc9fa5 bec6d57db1c13a0840d2a60f5d4ea860a7710853 --extensions cpp,h -- llvm/include/llvm/CodeGen/SelectionDAG.h llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp llvm/lib/Target/X86/X86ISelLowering.cpp View the diff from clang-format here.diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index 0fa318fcdc..bbde4e0437 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -1405,13 +1405,13 @@ void DAGTypeLegalizer::SplitVecRes_ATOMIC_LOAD(AtomicSDNode *LD) {
unsigned NumElts = MemoryVT.getVectorMinNumElements();
EVT IntMemoryVT = EVT::getVectorVT(*DAG.getContext(), MVT::i16, NumElts);
- EVT ElemVT = EVT::getVectorVT(*DAG.getContext(),
- MemoryVT.getVectorElementType(), 1);
+ EVT ElemVT =
+ EVT::getVectorVT(*DAG.getContext(), MemoryVT.getVectorElementType(), 1);
// Create a single atomic to load all the elements at once.
- SDValue Atomic = DAG.getAtomic(ISD::ATOMIC_LOAD, dl, IntMemoryVT, IntMemoryVT,
- LD->getChain(), LD->getBasePtr(),
- LD->getMemOperand());
+ SDValue Atomic =
+ DAG.getAtomic(ISD::ATOMIC_LOAD, dl, IntMemoryVT, IntMemoryVT,
+ LD->getChain(), LD->getBasePtr(), LD->getMemOperand());
// Instead of splitting, put all the elements back into a vector.
SmallVector<SDValue, 4> Ops;
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp
index a19af64a79..d09bc81d50 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp
@@ -195,8 +195,7 @@ bool BaseIndexOffset::contains(const SelectionDAG &DAG, int64_t BitSize,
}
template <typename T>
-static BaseIndexOffset matchSDNode(const T *N,
- const SelectionDAG &DAG) {
+static BaseIndexOffset matchSDNode(const T *N, const SelectionDAG &DAG) {
SDValue Ptr = N->getBasePtr();
// (((B + I*M) + c)) + c ...
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 2e4f3178fa..33c0e979c0 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -7132,7 +7132,7 @@ static SDValue EltsFromConsecutiveLoads(EVT VT, ArrayRef<SDValue> Elts,
APInt ZeroMask = APInt::getZero(NumElems);
APInt UndefMask = APInt::getZero(NumElems);
- SmallVector<T*, 8> Loads(NumElems, nullptr);
+ SmallVector<T *, 8> Loads(NumElems, nullptr);
SmallVector<int64_t, 8> ByteOffsets(NumElems, 0);
// For each element in the initializer, see if we've found a load, zero or an
@@ -7296,7 +7296,7 @@ static SDValue EltsFromConsecutiveLoads(EVT VT, ArrayRef<SDValue> Elts,
EVT::getVectorVT(*DAG.getContext(), VT.getScalarType(), HalfNumElems);
SDValue HalfLD =
EltsFromConsecutiveLoads<T>(HalfVT, Elts.drop_back(HalfNumElems), DL,
- DAG, Subtarget, IsAfterLegalize);
+ DAG, Subtarget, IsAfterLegalize);
if (HalfLD)
return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT),
HalfLD, DAG.getIntPtrConstant(0, DL));
@@ -7414,7 +7414,7 @@ static SDValue combineToConsecutiveLoads(EVT VT, SDValue Op, const SDLoc &DL,
}
assert(Elts.size() == VT.getVectorNumElements());
return EltsFromConsecutiveLoads<LoadSDNode>(VT, Elts, DL, DAG, Subtarget,
- IsAfterLegalize);
+ IsAfterLegalize);
}
static Constant *getConstantVector(MVT VT, ArrayRef<APInt> Bits,
@@ -9267,11 +9267,11 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
// See if we can use a vector load to get all of the elements.
{
SmallVector<SDValue, 64> Ops(Op->op_begin(), Op->op_begin() + NumElems);
- if (SDValue LD =
- EltsFromConsecutiveLoads<LoadSDNode>(VT, Ops, dl, DAG, Subtarget, false)) {
+ if (SDValue LD = EltsFromConsecutiveLoads<LoadSDNode>(VT, Ops, dl, DAG,
+ Subtarget, false)) {
return LD;
- } else if (SDValue LD =
- EltsFromConsecutiveLoads<AtomicSDNode>(VT, Ops, dl, DAG, Subtarget, false)) {
+ } else if (SDValue LD = EltsFromConsecutiveLoads<AtomicSDNode>(
+ VT, Ops, dl, DAG, Subtarget, false)) {
return LD;
}
}
@@ -57992,8 +57992,8 @@ static SDValue combineConcatVectorOps(const SDLoc &DL, MVT VT,
if (TLI->allowsMemoryAccess(Ctx, DAG.getDataLayout(), VT,
*FirstLd->getMemOperand(), &Fast) &&
Fast) {
- if (SDValue Ld =
- EltsFromConsecutiveLoads<LoadSDNode>(VT, Ops, DL, DAG, Subtarget, false))
+ if (SDValue Ld = EltsFromConsecutiveLoads<LoadSDNode>(VT, Ops, DL, DAG,
+ Subtarget, false))
return Ld;
}
}
|
3be4fa0
to
0cb57c2
Compare
f0d0f17
to
a96fdf1
Compare
0cb57c2
to
ba2a301
Compare
@@ -1146,6 +1146,9 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) { | |||
SplitVecRes_STEP_VECTOR(N, Lo, Hi); | |||
break; | |||
case ISD::SIGN_EXTEND_INREG: SplitVecRes_InregOp(N, Lo, Hi); break; | |||
case ISD::ATOMIC_LOAD: | |||
SplitVecRes_ATOMIC_LOAD(cast<AtomicSDNode>(N), Lo, Hi); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I don't understand the comment. This is unrelated to the set of legal types or legalization actions. There is no need to touch any patterns
EVT LoMemVT, HiMemVT; | ||
std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT); | ||
|
||
Lo = DAG.getAtomic(ISD::ATOMIC_LOAD, dl, LoMemVT, LoMemVT, Ch, Ptr, |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This should create one ATOMIC_LOAD with the bitcast integer type. You then unpack that result into the expected Lo/Hi, not the direct atomic results
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Title should also not say split, this is forcing the type to a legal integer
ba2a301
to
f129a3c
Compare
a96fdf1
to
e07e225
Compare
…load Vector types that aren't widened are 'split' via CONCAT_VECTORS so that a single ATOMIC_LOAD is issued for the entire vector at once. This change utilizes the load vectorization infrastructure in SelectionDAG in order to group the vectors. This enables SelectionDAG to translate vectors with type bfloat,half. commit-id:3a045357
f129a3c
to
bec6d57
Compare
Vector types that aren't widened are 'split' via CONCAT_VECTORS
so that a single ATOMIC_LOAD is issued for the entire vector at once.
This change utilizes the load vectorization infrastructure in
SelectionDAG in order to group the vectors. This enables SelectionDAG
to translate vectors with type bfloat,half.
Stack: