-
Notifications
You must be signed in to change notification settings - Fork 12.4k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[SelectionDAG][X86] Widen <2 x T> vector types for atomic load #120598
base: users/jofrn/spr/main/a06a5cc6
Are you sure you want to change the base?
Conversation
@llvm/pr-subscribers-backend-x86 @llvm/pr-subscribers-llvm-selectiondag Author: None (jofrn) Changes
Stack:
Full diff: https://github.com/llvm/llvm-project/pull/120598.diff 3 Files Affected:
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
index b81c9f87cb27d7..22b7c15f8768ae 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -1046,6 +1046,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer {
SDValue WidenVecRes_EXTRACT_SUBVECTOR(SDNode* N);
SDValue WidenVecRes_INSERT_SUBVECTOR(SDNode *N);
SDValue WidenVecRes_INSERT_VECTOR_ELT(SDNode* N);
+ SDValue WidenVecRes_ATOMIC_LOAD(AtomicSDNode* N);
SDValue WidenVecRes_LOAD(SDNode* N);
SDValue WidenVecRes_VP_LOAD(VPLoadSDNode *N);
SDValue WidenVecRes_VP_STRIDED_LOAD(VPStridedLoadSDNode *N);
@@ -1129,8 +1130,9 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer {
/// resulting wider type. It takes:
/// LdChain: list of chains for the load to be generated.
/// Ld: load to widen
+ template <typename T>
SDValue GenWidenVectorLoads(SmallVectorImpl<SDValue> &LdChain,
- LoadSDNode *LD);
+ T *LD, bool IsAtomic = false);
/// Helper function to generate a set of extension loads to load a vector with
/// a resulting wider type. It takes:
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index c85e4ba2cfa5a7..4dfdd22ba27869 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -4515,6 +4515,9 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
break;
case ISD::EXTRACT_SUBVECTOR: Res = WidenVecRes_EXTRACT_SUBVECTOR(N); break;
case ISD::INSERT_VECTOR_ELT: Res = WidenVecRes_INSERT_VECTOR_ELT(N); break;
+ case ISD::ATOMIC_LOAD:
+ Res = WidenVecRes_ATOMIC_LOAD(cast<AtomicSDNode>(N));
+ break;
case ISD::LOAD: Res = WidenVecRes_LOAD(N); break;
case ISD::STEP_VECTOR:
case ISD::SPLAT_VECTOR:
@@ -5901,6 +5904,30 @@ SDValue DAGTypeLegalizer::WidenVecRes_INSERT_VECTOR_ELT(SDNode *N) {
N->getOperand(1), N->getOperand(2));
}
+SDValue DAGTypeLegalizer::WidenVecRes_ATOMIC_LOAD(AtomicSDNode *N) {
+ SmallVector<SDValue, 16> LdChain; // Chain for the series of load
+ SDValue Result = GenWidenVectorLoads(LdChain, N, true /*IsAtomic*/);
+
+ if (Result) {
+ // If we generate a single load, we can use that for the chain. Otherwise,
+ // build a factor node to remember the multiple loads are independent and
+ // chain to that.
+ SDValue NewChain;
+ if (LdChain.size() == 1)
+ NewChain = LdChain[0];
+ else
+ NewChain = DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, LdChain);
+
+ // Modified the chain - switch anything that used the old chain to use
+ // the new one.
+ ReplaceValueWith(SDValue(N, 1), NewChain);
+
+ return Result;
+ }
+
+ report_fatal_error("Unable to widen atomic vector load");
+}
+
SDValue DAGTypeLegalizer::WidenVecRes_LOAD(SDNode *N) {
LoadSDNode *LD = cast<LoadSDNode>(N);
ISD::LoadExtType ExtType = LD->getExtensionType();
@@ -7699,8 +7726,9 @@ static SDValue BuildVectorFromScalar(SelectionDAG& DAG, EVT VecTy,
return DAG.getNode(ISD::BITCAST, dl, VecTy, VecOp);
}
+template <typename T>
SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVectorImpl<SDValue> &LdChain,
- LoadSDNode *LD) {
+ T *LD, bool IsAtomic) {
// The strategy assumes that we can efficiently load power-of-two widths.
// The routine chops the vector into the largest vector loads with the same
// element type or scalar loads and then recombines it to the widen vector
@@ -7757,8 +7785,13 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVectorImpl<SDValue> &LdChain,
} while (TypeSize::isKnownGT(RemainingWidth, NewVTWidth));
}
- SDValue LdOp = DAG.getLoad(*FirstVT, dl, Chain, BasePtr, LD->getPointerInfo(),
- LD->getOriginalAlign(), MMOFlags, AAInfo);
+ SDValue LdOp;
+ if (IsAtomic)
+ LdOp = DAG.getAtomic(ISD::ATOMIC_LOAD, dl, *FirstVT, *FirstVT, Chain,
+ BasePtr, LD->getMemOperand());
+ else
+ LdOp = DAG.getLoad(*FirstVT, dl, Chain, BasePtr, LD->getPointerInfo(),
+ LD->getOriginalAlign(), MMOFlags, AAInfo);
LdChain.push_back(LdOp.getValue(1));
// Check if we can load the element with one instruction.
diff --git a/llvm/test/CodeGen/X86/atomic-load-store.ll b/llvm/test/CodeGen/X86/atomic-load-store.ll
index a57ae767859b10..f3cc39dba95253 100644
--- a/llvm/test/CodeGen/X86/atomic-load-store.ll
+++ b/llvm/test/CodeGen/X86/atomic-load-store.ll
@@ -146,6 +146,16 @@ define <1 x i64> @atomic_vec1_i64_align(ptr %x) nounwind {
ret <1 x i64> %ret
}
+define <2 x i32> @atomic_vec2_i32_align(ptr %x) nounwind {
+; CHECK-LABEL: atomic_vec2_i32_align:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: movq (%rdi), %rax
+; CHECK-NEXT: movq %rax, %xmm0
+; CHECK-NEXT: retq
+ %ret = load atomic <2 x i32>, ptr %x acquire, align 8
+ ret <2 x i32> %ret
+}
+
define <1 x ptr> @atomic_vec1_ptr(ptr %x) nounwind {
; CHECK3-LABEL: atomic_vec1_ptr:
; CHECK3: ## %bb.0:
|
✅ With the latest revision this PR passed the C/C++ code formatter. |
37e162b
to
f7691a8
Compare
cbaaeeb
to
d558091
Compare
34492d9
to
6952507
Compare
d558091
to
3f37dfc
Compare
6952507
to
3cf21cf
Compare
3f37dfc
to
89503f2
Compare
3cf21cf
to
a769a32
Compare
89503f2
to
b2f0b33
Compare
b2f0b33
to
6737dda
Compare
464bb05
to
ebba505
Compare
6737dda
to
2949391
Compare
ebba505
to
5bbc421
Compare
2949391
to
78adf01
Compare
5bbc421
to
746725f
Compare
a8cc74f
to
23c9ff2
Compare
746725f
to
592fa22
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Description says it's invalid, but it is now valid
; CHECK-NEXT: movl (%rdi), %eax | ||
; CHECK-NEXT: movd %eax, %xmm0 |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is broken, this should emit one atomic load
23c9ff2
to
2c51f72
Compare
64e813b
to
e1008c7
Compare
2c51f72
to
3a82883
Compare
if (LdChain.size() == 1) | ||
NewChain = LdChain[0]; | ||
else | ||
NewChain = DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, LdChain); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think getNode will handle the 1 entry case for you
@@ -52242,6 +52243,81 @@ static SDValue combineConstantPoolLoads(SDNode *N, const SDLoc &dl, | |||
return SDValue(); | |||
} | |||
|
|||
static MVT getScalarTypeFromVectorType(MVT VT) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is just getIntegerVT(DAG.getContext(), VT.getSizeInBits())
llvm_unreachable("Invalid VT for scalar type translation"); | ||
} | ||
|
||
static SDValue combineAtomicLoad(SDNode *N, SelectionDAG &DAG, |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I don't think any of this custom handling in the target should be necessary
; CHECK-NEXT: retq | ||
%ret = load atomic <2 x i16>, ptr %x acquire, align 4 | ||
ret <2 x i16> %ret | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Test 2 x half, 2 x bfloat, 4 x 16-bit elts
3a82883
to
36161df
Compare
e1008c7
to
51cb67f
Compare
36161df
to
f0d0f17
Compare
41ef993
to
d528b89
Compare
f0d0f17
to
a96fdf1
Compare
d528b89
to
84a1672
Compare
a96fdf1
to
e07e225
Compare
Vector types of 2 elements must be widened. This change does this for vector types of atomic load in SelectionDAG so that it can translate aligned vectors of >1 size. Also, it also adds Pats to remove an extra MOV. commit-id:2894ccd1
e07e225
to
04f0fd9
Compare
Vector types of 2 elements must be widened. This change does this
for vector types of atomic load in SelectionDAG
so that it can translate aligned vectors of >1 size. Also,
it also adds Pats to remove an extra MOV.
Stack: