-
Notifications
You must be signed in to change notification settings - Fork 13.2k
[SelectionDAG][X86] Widen <2 x T> vector types for atomic load #120598
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: users/jofrn/spr/main/a06a5cc6
Are you sure you want to change the base?
Conversation
@llvm/pr-subscribers-backend-x86 @llvm/pr-subscribers-llvm-selectiondag Author: None (jofrn) Changes
Stack:
Full diff: https://github.com/llvm/llvm-project/pull/120598.diff 3 Files Affected:
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
index b81c9f87cb27d7..22b7c15f8768ae 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -1046,6 +1046,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer {
SDValue WidenVecRes_EXTRACT_SUBVECTOR(SDNode* N);
SDValue WidenVecRes_INSERT_SUBVECTOR(SDNode *N);
SDValue WidenVecRes_INSERT_VECTOR_ELT(SDNode* N);
+ SDValue WidenVecRes_ATOMIC_LOAD(AtomicSDNode* N);
SDValue WidenVecRes_LOAD(SDNode* N);
SDValue WidenVecRes_VP_LOAD(VPLoadSDNode *N);
SDValue WidenVecRes_VP_STRIDED_LOAD(VPStridedLoadSDNode *N);
@@ -1129,8 +1130,9 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer {
/// resulting wider type. It takes:
/// LdChain: list of chains for the load to be generated.
/// Ld: load to widen
+ template <typename T>
SDValue GenWidenVectorLoads(SmallVectorImpl<SDValue> &LdChain,
- LoadSDNode *LD);
+ T *LD, bool IsAtomic = false);
/// Helper function to generate a set of extension loads to load a vector with
/// a resulting wider type. It takes:
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index c85e4ba2cfa5a7..4dfdd22ba27869 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -4515,6 +4515,9 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
break;
case ISD::EXTRACT_SUBVECTOR: Res = WidenVecRes_EXTRACT_SUBVECTOR(N); break;
case ISD::INSERT_VECTOR_ELT: Res = WidenVecRes_INSERT_VECTOR_ELT(N); break;
+ case ISD::ATOMIC_LOAD:
+ Res = WidenVecRes_ATOMIC_LOAD(cast<AtomicSDNode>(N));
+ break;
case ISD::LOAD: Res = WidenVecRes_LOAD(N); break;
case ISD::STEP_VECTOR:
case ISD::SPLAT_VECTOR:
@@ -5901,6 +5904,30 @@ SDValue DAGTypeLegalizer::WidenVecRes_INSERT_VECTOR_ELT(SDNode *N) {
N->getOperand(1), N->getOperand(2));
}
+SDValue DAGTypeLegalizer::WidenVecRes_ATOMIC_LOAD(AtomicSDNode *N) {
+ SmallVector<SDValue, 16> LdChain; // Chain for the series of load
+ SDValue Result = GenWidenVectorLoads(LdChain, N, true /*IsAtomic*/);
+
+ if (Result) {
+ // If we generate a single load, we can use that for the chain. Otherwise,
+ // build a factor node to remember the multiple loads are independent and
+ // chain to that.
+ SDValue NewChain;
+ if (LdChain.size() == 1)
+ NewChain = LdChain[0];
+ else
+ NewChain = DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, LdChain);
+
+ // Modified the chain - switch anything that used the old chain to use
+ // the new one.
+ ReplaceValueWith(SDValue(N, 1), NewChain);
+
+ return Result;
+ }
+
+ report_fatal_error("Unable to widen atomic vector load");
+}
+
SDValue DAGTypeLegalizer::WidenVecRes_LOAD(SDNode *N) {
LoadSDNode *LD = cast<LoadSDNode>(N);
ISD::LoadExtType ExtType = LD->getExtensionType();
@@ -7699,8 +7726,9 @@ static SDValue BuildVectorFromScalar(SelectionDAG& DAG, EVT VecTy,
return DAG.getNode(ISD::BITCAST, dl, VecTy, VecOp);
}
+template <typename T>
SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVectorImpl<SDValue> &LdChain,
- LoadSDNode *LD) {
+ T *LD, bool IsAtomic) {
// The strategy assumes that we can efficiently load power-of-two widths.
// The routine chops the vector into the largest vector loads with the same
// element type or scalar loads and then recombines it to the widen vector
@@ -7757,8 +7785,13 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVectorImpl<SDValue> &LdChain,
} while (TypeSize::isKnownGT(RemainingWidth, NewVTWidth));
}
- SDValue LdOp = DAG.getLoad(*FirstVT, dl, Chain, BasePtr, LD->getPointerInfo(),
- LD->getOriginalAlign(), MMOFlags, AAInfo);
+ SDValue LdOp;
+ if (IsAtomic)
+ LdOp = DAG.getAtomic(ISD::ATOMIC_LOAD, dl, *FirstVT, *FirstVT, Chain,
+ BasePtr, LD->getMemOperand());
+ else
+ LdOp = DAG.getLoad(*FirstVT, dl, Chain, BasePtr, LD->getPointerInfo(),
+ LD->getOriginalAlign(), MMOFlags, AAInfo);
LdChain.push_back(LdOp.getValue(1));
// Check if we can load the element with one instruction.
diff --git a/llvm/test/CodeGen/X86/atomic-load-store.ll b/llvm/test/CodeGen/X86/atomic-load-store.ll
index a57ae767859b10..f3cc39dba95253 100644
--- a/llvm/test/CodeGen/X86/atomic-load-store.ll
+++ b/llvm/test/CodeGen/X86/atomic-load-store.ll
@@ -146,6 +146,16 @@ define <1 x i64> @atomic_vec1_i64_align(ptr %x) nounwind {
ret <1 x i64> %ret
}
+define <2 x i32> @atomic_vec2_i32_align(ptr %x) nounwind {
+; CHECK-LABEL: atomic_vec2_i32_align:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: movq (%rdi), %rax
+; CHECK-NEXT: movq %rax, %xmm0
+; CHECK-NEXT: retq
+ %ret = load atomic <2 x i32>, ptr %x acquire, align 8
+ ret <2 x i32> %ret
+}
+
define <1 x ptr> @atomic_vec1_ptr(ptr %x) nounwind {
; CHECK3-LABEL: atomic_vec1_ptr:
; CHECK3: ## %bb.0:
|
You can test this locally with the following command:git-clang-format --diff 03400f8c3f2d0eca529d4459e7dec946ea1818ea bc6c3559b910632c59f0c0c07de0031469465e35 --extensions cpp,h -- llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp View the diff from clang-format here.diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index 7eda83fe2b..e38b41e090 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -6013,7 +6013,7 @@ static std::optional<EVT> findMemType(SelectionDAG &DAG,
SDValue DAGTypeLegalizer::WidenVecRes_ATOMIC_LOAD(AtomicSDNode *LD) {
EVT WidenVT =
- TLI.getTypeToTransformTo(*DAG.getContext(),LD->getValueType(0));
+ TLI.getTypeToTransformTo(*DAG.getContext(), LD->getValueType(0));
EVT LdVT = LD->getMemoryVT();
SDLoc dl(LD);
assert(LdVT.isVector() && WidenVT.isVector());
@@ -6049,8 +6049,8 @@ SDValue DAGTypeLegalizer::WidenVecRes_ATOMIC_LOAD(AtomicSDNode *LD) {
BasePtr, LD->getMemOperand());
// Load the element with one instruction.
- SDValue Result = loadElement(LdOp, *FirstVT, WidenVT, LdWidth, FirstVTWidth,
- dl);
+ SDValue Result =
+ loadElement(LdOp, *FirstVT, WidenVT, LdWidth, FirstVTWidth, dl);
if (Result) {
// Modified the chain - switch anything that used the old chain to use
|
37e162b
to
f7691a8
Compare
cbaaeeb
to
d558091
Compare
34492d9
to
6952507
Compare
d558091
to
3f37dfc
Compare
6952507
to
3cf21cf
Compare
3f37dfc
to
89503f2
Compare
3cf21cf
to
a769a32
Compare
89503f2
to
b2f0b33
Compare
b2f0b33
to
6737dda
Compare
464bb05
to
ebba505
Compare
6737dda
to
2949391
Compare
ebba505
to
5bbc421
Compare
2949391
to
78adf01
Compare
5bbc421
to
746725f
Compare
e07e225
to
04f0fd9
Compare
@@ -5907,6 +5910,30 @@ SDValue DAGTypeLegalizer::WidenVecRes_INSERT_VECTOR_ELT(SDNode *N) { | |||
N->getOperand(1), N->getOperand(2)); | |||
} | |||
|
|||
SDValue DAGTypeLegalizer::WidenVecRes_ATOMIC_LOAD(AtomicSDNode *N) { | |||
SmallVector<SDValue, 16> LdChain; // Chain for the series of load | |||
SDValue Result = GenWidenVectorLoads(LdChain, N, /*IsAtomic=*/true); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I do not think you should call GenWidenVectorLoads. That has paths where the memory operation will be decomposed. I think you should try just replacing the load with an equivalent width integer, and extracting the parts to conform to the expectations of the type legalization
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Within GenWidenVectorLoads, MemVTs should be empty and so we will generate a single atomic anyway. I've added a check IsAtomic
to ensure we go down this path.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'd rather not call it at all. I don't see what it's buying you. None of the complexity in GenWidenVectorLoads applies. You issue the load with a new type and then have type matching code, or otherwise it needs to fail
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Alright. It was just buying a function call instead of duplicating code, but it also makes sense to separate it out.
04f0fd9
to
5254725
Compare
647a59b
to
b0364ee
Compare
5254725
to
a7fb95e
Compare
b0364ee
to
6667568
Compare
5124038
to
a847ecf
Compare
6667568
to
2af4d7c
Compare
a847ecf
to
a5d0fdb
Compare
2af4d7c
to
7562f64
Compare
a5d0fdb
to
ecb5203
Compare
bc9ddb8
to
657d3dc
Compare
ecb5203
to
4d505de
Compare
657d3dc
to
86d8d1c
Compare
4d505de
to
a5afaec
Compare
SDLoc dl(LD); | ||
assert(LdVT.isVector() && WidenVT.isVector()); | ||
assert(LdVT.isScalableVector() == WidenVT.isScalableVector()); | ||
assert(LdVT.getVectorElementType() == WidenVT.getVectorElementType()); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
(style) missing assert messages
86d8d1c
to
3c692ed
Compare
a5afaec
to
9186493
Compare
// Load the element with one instruction. | ||
SDValue Result; | ||
assert(TypeSize::isKnownLE(LdWidth, FirstVTWidth)); | ||
if (!FirstVT->isVector()) { | ||
unsigned NumElts = | ||
WidenWidth.getFixedValue() / FirstVTWidth.getFixedValue(); | ||
EVT NewVecVT = EVT::getVectorVT(*DAG.getContext(), *FirstVT, NumElts); | ||
SDValue VecOp = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, NewVecVT, LdOp); | ||
Result = DAG.getNode(ISD::BITCAST, dl, WidenVT, VecOp); | ||
} | ||
else if (FirstVT == WidenVT) | ||
Result = LdOp; | ||
else { | ||
// TODO: We don't currently have any tests that exercise this code path. | ||
assert(WidenWidth.getFixedValue() % FirstVTWidth.getFixedValue() == 0); | ||
unsigned NumConcat = | ||
WidenWidth.getFixedValue() / FirstVTWidth.getFixedValue(); | ||
SmallVector<SDValue, 16> ConcatOps(NumConcat); | ||
SDValue UndefVal = DAG.getUNDEF(*FirstVT); | ||
ConcatOps[0] = LdOp; | ||
for (unsigned i = 1; i != NumConcat; ++i) | ||
ConcatOps[i] = UndefVal; | ||
Result = DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT, ConcatOps); | ||
} | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The type coercion code is the sharable part that could be extracted into a helper function
9186493
to
4df72a2
Compare
Vector types of 2 elements must be widened. This change does this for vector types of atomic load in SelectionDAG so that it can translate aligned vectors of >1 size. Also, it also adds Pats to remove an extra MOV. commit-id:2894ccd1
4df72a2
to
bc6c355
Compare
05e7afd
to
03400f8
Compare
Vector types of 2 elements must be widened. This change does this
for vector types of atomic load in SelectionDAG
so that it can translate aligned vectors of >1 size. Also,
it also adds Pats to remove an extra MOV.
Stack: