[SelectionDAG] Split vector types for atomic load#120640
Closed
jofrn wants to merge 1 commit intousers/jofrn/spr/main/45989503from
Closed
[SelectionDAG] Split vector types for atomic load#120640jofrn wants to merge 1 commit intousers/jofrn/spr/main/45989503from
jofrn wants to merge 1 commit intousers/jofrn/spr/main/45989503from
Conversation
This was referenced Dec 19, 2024
Member
|
@llvm/pr-subscribers-llvm-selectiondag @llvm/pr-subscribers-backend-x86 Author: None (jofrn) Changes
Stack:
Full diff: https://github.com/llvm/llvm-project/pull/120640.diff 3 Files Affected:
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
index 3b3dddc44e3682..e0cd7319ac034b 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -946,6 +946,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer {
void SplitVecRes_FPOp_MultiType(SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitVecRes_IS_FPCLASS(SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo, SDValue &Hi);
+ void SplitVecRes_ATOMIC_LOAD(AtomicSDNode *LD, SDValue &Lo, SDValue &Hi);
void SplitVecRes_LOAD(LoadSDNode *LD, SDValue &Lo, SDValue &Hi);
void SplitVecRes_VP_LOAD(VPLoadSDNode *LD, SDValue &Lo, SDValue &Hi);
void SplitVecRes_VP_STRIDED_LOAD(VPStridedLoadSDNode *SLD, SDValue &Lo,
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index 7c4caa96244b8b..44adc3fdb4a5a7 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -1146,6 +1146,9 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
SplitVecRes_STEP_VECTOR(N, Lo, Hi);
break;
case ISD::SIGN_EXTEND_INREG: SplitVecRes_InregOp(N, Lo, Hi); break;
+ case ISD::ATOMIC_LOAD:
+ SplitVecRes_ATOMIC_LOAD(cast<AtomicSDNode>(N), Lo, Hi);
+ break;
case ISD::LOAD:
SplitVecRes_LOAD(cast<LoadSDNode>(N), Lo, Hi);
break;
@@ -2079,6 +2082,38 @@ void DAGTypeLegalizer::SplitVecRes_VP_SPLAT(SDNode *N, SDValue &Lo,
Hi = DAG.getNode(N->getOpcode(), dl, HiVT, N->getOperand(0), MaskHi, EVLHi);
}
+void DAGTypeLegalizer::SplitVecRes_ATOMIC_LOAD(AtomicSDNode *LD, SDValue &Lo,
+ SDValue &Hi) {
+ EVT LoVT, HiVT;
+ SDLoc dl(LD);
+ std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(LD->getValueType(0));
+
+ SDValue Ch = LD->getChain();
+ SDValue Ptr = LD->getBasePtr();
+ EVT MemoryVT = LD->getMemoryVT();
+
+ EVT LoMemVT, HiMemVT;
+ std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
+
+ Lo = DAG.getAtomic(ISD::ATOMIC_LOAD, dl, LoMemVT, LoMemVT, Ch, Ptr,
+ LD->getMemOperand());
+
+ MachinePointerInfo MPI;
+ IncrementPointer(LD, LoMemVT, MPI, Ptr);
+
+ Hi = DAG.getAtomic(ISD::ATOMIC_LOAD, dl, HiMemVT, HiMemVT, Ch, Ptr,
+ LD->getMemOperand());
+
+ // Build a factor node to remember that this load is independent of the
+ // other one.
+ Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),
+ Hi.getValue(1));
+
+ // Legalize the chain result - switch anything that used the old chain to
+ // use the new one.
+ ReplaceValueWith(SDValue(LD, 1), Ch);
+}
+
void DAGTypeLegalizer::SplitVecRes_LOAD(LoadSDNode *LD, SDValue &Lo,
SDValue &Hi) {
assert(ISD::isUNINDEXEDLoad(LD) && "Indexed load during type legalization!");
diff --git a/llvm/test/CodeGen/X86/atomic-load-store.ll b/llvm/test/CodeGen/X86/atomic-load-store.ll
index ba1bc4d98537d1..302a94aa9c1f60 100644
--- a/llvm/test/CodeGen/X86/atomic-load-store.ll
+++ b/llvm/test/CodeGen/X86/atomic-load-store.ll
@@ -176,6 +176,62 @@ define <2 x float> @atomic_vec2_float_align(ptr %x) nounwind {
ret <2 x float> %ret
}
+define <2 x half> @atomic_vec2_half(ptr %x) nounwind {
+; CHECK3-LABEL: atomic_vec2_half:
+; CHECK3: ## %bb.0:
+; CHECK3-NEXT: movzwl (%rdi), %eax
+; CHECK3-NEXT: movzwl 2(%rdi), %ecx
+; CHECK3-NEXT: pinsrw $0, %eax, %xmm0
+; CHECK3-NEXT: pinsrw $0, %ecx, %xmm1
+; CHECK3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; CHECK3-NEXT: retq
+;
+; CHECK0-LABEL: atomic_vec2_half:
+; CHECK0: ## %bb.0:
+; CHECK0-NEXT: movw (%rdi), %dx
+; CHECK0-NEXT: movw 2(%rdi), %cx
+; CHECK0-NEXT: ## implicit-def: $eax
+; CHECK0-NEXT: movw %dx, %ax
+; CHECK0-NEXT: ## implicit-def: $xmm0
+; CHECK0-NEXT: pinsrw $0, %eax, %xmm0
+; CHECK0-NEXT: ## implicit-def: $eax
+; CHECK0-NEXT: movw %cx, %ax
+; CHECK0-NEXT: ## implicit-def: $xmm1
+; CHECK0-NEXT: pinsrw $0, %eax, %xmm1
+; CHECK0-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; CHECK0-NEXT: retq
+ %ret = load atomic <2 x half>, ptr %x acquire, align 4
+ ret <2 x half> %ret
+}
+
+define <2 x bfloat> @atomic_vec2_bfloat(ptr %x) nounwind {
+; CHECK3-LABEL: atomic_vec2_bfloat:
+; CHECK3: ## %bb.0:
+; CHECK3-NEXT: movzwl (%rdi), %eax
+; CHECK3-NEXT: movzwl 2(%rdi), %ecx
+; CHECK3-NEXT: pinsrw $0, %ecx, %xmm1
+; CHECK3-NEXT: pinsrw $0, %eax, %xmm0
+; CHECK3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; CHECK3-NEXT: retq
+;
+; CHECK0-LABEL: atomic_vec2_bfloat:
+; CHECK0: ## %bb.0:
+; CHECK0-NEXT: movw (%rdi), %cx
+; CHECK0-NEXT: movw 2(%rdi), %dx
+; CHECK0-NEXT: ## implicit-def: $eax
+; CHECK0-NEXT: movw %dx, %ax
+; CHECK0-NEXT: ## implicit-def: $xmm1
+; CHECK0-NEXT: pinsrw $0, %eax, %xmm1
+; CHECK0-NEXT: ## implicit-def: $eax
+; CHECK0-NEXT: movw %cx, %ax
+; CHECK0-NEXT: ## implicit-def: $xmm0
+; CHECK0-NEXT: pinsrw $0, %eax, %xmm0
+; CHECK0-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; CHECK0-NEXT: retq
+ %ret = load atomic <2 x bfloat>, ptr %x acquire, align 4
+ ret <2 x bfloat> %ret
+}
+
define <1 x ptr> @atomic_vec1_ptr(ptr %x) nounwind {
; CHECK3-LABEL: atomic_vec1_ptr:
; CHECK3: ## %bb.0:
|
8ed9199 to
c9bdb95
Compare
b2f0b33 to
6737dda
Compare
c9bdb95 to
94a71a3
Compare
6737dda to
2949391
Compare
94a71a3 to
34df4f7
Compare
34df4f7 to
761d4d9
Compare
2949391 to
78adf01
Compare
6506acb to
db674f8
Compare
arsenm
reviewed
Dec 20, 2024
db674f8 to
13ea377
Compare
23c9ff2 to
2c51f72
Compare
13ea377 to
e11194d
Compare
2c51f72 to
3a82883
Compare
3a82883 to
36161df
Compare
e11194d to
3be4fa0
Compare
28f6bf3 to
309d817
Compare
arsenm
reviewed
May 6, 2025
| EVT LoIntVT = EVT::getIntegerVT(*DAG.getContext(), LoVT.getSizeInBits()); | ||
| EVT HiIntVT = EVT::getIntegerVT(*DAG.getContext(), HiVT.getSizeInBits()); | ||
| SDValue ExtractLo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, LoIntVT, ALD, | ||
| DAG.getIntPtrConstant(0, dl)); |
Contributor
There was a problem hiding this comment.
This probably should be a target constant but this appears to be what existing uses do
309d817 to
cd4402a
Compare
4383732 to
0fcd430
Compare
arsenm
reviewed
May 6, 2025
Comment on lines
1443
to
1444
| EVT MemIntVT = | ||
| EVT::getIntegerVT(*DAG.getContext(), 2 * LoMemVT.getSizeInBits()); |
Contributor
There was a problem hiding this comment.
MemVT should not be a derived quantity, directly bitcast the original memory type
0fcd430 to
5b5d948
Compare
cd4402a to
0e4399d
Compare
5b5d948 to
939a68f
Compare
4783f04 to
13a5e87
Compare
939a68f to
b6c4b48
Compare
13a5e87 to
3e8de67
Compare
6a946a3 to
7e560d9
Compare
b51658d to
507069a
Compare
7e560d9 to
e8dc4c2
Compare
507069a to
40b0a4e
Compare
e8dc4c2 to
539584c
Compare
40b0a4e to
eda6b72
Compare
e5413e4 to
6312f8c
Compare
eda6b72 to
4fccbd6
Compare
Vector types that aren't widened are split so that a single ATOMIC_LOAD is issued for the entire vector at once. This change utilizes the load vectorization infrastructure in SelectionDAG in order to group the vectors. This enables SelectionDAG to translate vectors with type bfloat,half. commit-id:3a045357
4fccbd6 to
f916347
Compare
6312f8c to
109bc60
Compare
Contributor
Author
|
Closing pull request: commit has gone away |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Add this suggestion to a batch that can be applied as a single commit.This suggestion is invalid because no changes were made to the code.Suggestions cannot be applied while the pull request is closed.Suggestions cannot be applied while viewing a subset of changes.Only one suggestion per line can be applied in a batch.Add this suggestion to a batch that can be applied as a single commit.Applying suggestions on deleted lines is not supported.You must change the existing code in this line in order to create a valid suggestion.Outdated suggestions cannot be applied.This suggestion has been applied or marked resolved.Suggestions cannot be applied from pending reviews.Suggestions cannot be applied on multi-line comments.Suggestions cannot be applied while the pull request is queued to merge.Suggestion cannot be applied right now. Please check back later.
Vector types that aren't widened are split
so that a single ATOMIC_LOAD is issued for the entire vector at once.
This change utilizes the load vectorization infrastructure in
SelectionDAG in order to group the vectors. This enables SelectionDAG
to translate vectors with type bfloat,half.
Stack: