Skip to content

[SelectionDAG][X86] Widen <2 x T> vector types for atomic load #120598

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: users/jofrn/spr/main/a06a5cc6
Choose a base branch
from

Conversation

jofrn
Copy link
Contributor

@jofrn jofrn commented Dec 19, 2024

@llvmbot
Copy link
Member

llvmbot commented Dec 19, 2024

@llvm/pr-subscribers-backend-x86

@llvm/pr-subscribers-llvm-selectiondag

Author: None (jofrn)

Changes

load atomic &lt;n x T&gt; is not valid. This change widens
vector types of atomic load in SelectionDAG
so that it can translate aligned vectors of >1 size.


Stack:

  • #120598 ⬅
  • #120387
  • #120386
  • #120385
  • #120384

⚠️ Part of a stack created by spr. Do not merge manually using the UI - doing so may have unexpected results.


Full diff: https://github.com/llvm/llvm-project/pull/120598.diff

3 Files Affected:

  • (modified) llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h (+3-1)
  • (modified) llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp (+36-3)
  • (modified) llvm/test/CodeGen/X86/atomic-load-store.ll (+10)
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
index b81c9f87cb27d7..22b7c15f8768ae 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -1046,6 +1046,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer {
   SDValue WidenVecRes_EXTRACT_SUBVECTOR(SDNode* N);
   SDValue WidenVecRes_INSERT_SUBVECTOR(SDNode *N);
   SDValue WidenVecRes_INSERT_VECTOR_ELT(SDNode* N);
+  SDValue WidenVecRes_ATOMIC_LOAD(AtomicSDNode* N);
   SDValue WidenVecRes_LOAD(SDNode* N);
   SDValue WidenVecRes_VP_LOAD(VPLoadSDNode *N);
   SDValue WidenVecRes_VP_STRIDED_LOAD(VPStridedLoadSDNode *N);
@@ -1129,8 +1130,9 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer {
   /// resulting wider type. It takes:
   ///   LdChain: list of chains for the load to be generated.
   ///   Ld:      load to widen
+  template <typename T>
   SDValue GenWidenVectorLoads(SmallVectorImpl<SDValue> &LdChain,
-                              LoadSDNode *LD);
+                              T *LD, bool IsAtomic = false);
 
   /// Helper function to generate a set of extension loads to load a vector with
   /// a resulting wider type. It takes:
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index c85e4ba2cfa5a7..4dfdd22ba27869 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -4515,6 +4515,9 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
     break;
   case ISD::EXTRACT_SUBVECTOR: Res = WidenVecRes_EXTRACT_SUBVECTOR(N); break;
   case ISD::INSERT_VECTOR_ELT: Res = WidenVecRes_INSERT_VECTOR_ELT(N); break;
+  case ISD::ATOMIC_LOAD:
+    Res = WidenVecRes_ATOMIC_LOAD(cast<AtomicSDNode>(N));
+    break;
   case ISD::LOAD:              Res = WidenVecRes_LOAD(N); break;
   case ISD::STEP_VECTOR:
   case ISD::SPLAT_VECTOR:
@@ -5901,6 +5904,30 @@ SDValue DAGTypeLegalizer::WidenVecRes_INSERT_VECTOR_ELT(SDNode *N) {
                      N->getOperand(1), N->getOperand(2));
 }
 
+SDValue DAGTypeLegalizer::WidenVecRes_ATOMIC_LOAD(AtomicSDNode *N) {
+  SmallVector<SDValue, 16> LdChain; // Chain for the series of load
+  SDValue Result = GenWidenVectorLoads(LdChain, N, true /*IsAtomic*/);
+
+  if (Result) {
+    // If we generate a single load, we can use that for the chain.  Otherwise,
+    // build a factor node to remember the multiple loads are independent and
+    // chain to that.
+    SDValue NewChain;
+    if (LdChain.size() == 1)
+      NewChain = LdChain[0];
+    else
+      NewChain = DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, LdChain);
+
+    // Modified the chain - switch anything that used the old chain to use
+    // the new one.
+    ReplaceValueWith(SDValue(N, 1), NewChain);
+
+    return Result;
+  }
+
+  report_fatal_error("Unable to widen atomic vector load");
+}
+
 SDValue DAGTypeLegalizer::WidenVecRes_LOAD(SDNode *N) {
   LoadSDNode *LD = cast<LoadSDNode>(N);
   ISD::LoadExtType ExtType = LD->getExtensionType();
@@ -7699,8 +7726,9 @@ static SDValue BuildVectorFromScalar(SelectionDAG& DAG, EVT VecTy,
   return DAG.getNode(ISD::BITCAST, dl, VecTy, VecOp);
 }
 
+template <typename T>
 SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVectorImpl<SDValue> &LdChain,
-                                              LoadSDNode *LD) {
+                                              T *LD, bool IsAtomic) {
   // The strategy assumes that we can efficiently load power-of-two widths.
   // The routine chops the vector into the largest vector loads with the same
   // element type or scalar loads and then recombines it to the widen vector
@@ -7757,8 +7785,13 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVectorImpl<SDValue> &LdChain,
     } while (TypeSize::isKnownGT(RemainingWidth, NewVTWidth));
   }
 
-  SDValue LdOp = DAG.getLoad(*FirstVT, dl, Chain, BasePtr, LD->getPointerInfo(),
-                             LD->getOriginalAlign(), MMOFlags, AAInfo);
+  SDValue LdOp;
+  if (IsAtomic)
+    LdOp = DAG.getAtomic(ISD::ATOMIC_LOAD, dl, *FirstVT, *FirstVT, Chain,
+        BasePtr, LD->getMemOperand());
+  else
+    LdOp = DAG.getLoad(*FirstVT, dl, Chain, BasePtr, LD->getPointerInfo(),
+        LD->getOriginalAlign(), MMOFlags, AAInfo);
   LdChain.push_back(LdOp.getValue(1));
 
   // Check if we can load the element with one instruction.
diff --git a/llvm/test/CodeGen/X86/atomic-load-store.ll b/llvm/test/CodeGen/X86/atomic-load-store.ll
index a57ae767859b10..f3cc39dba95253 100644
--- a/llvm/test/CodeGen/X86/atomic-load-store.ll
+++ b/llvm/test/CodeGen/X86/atomic-load-store.ll
@@ -146,6 +146,16 @@ define <1 x i64> @atomic_vec1_i64_align(ptr %x) nounwind {
   ret <1 x i64> %ret
 }
 
+define <2 x i32> @atomic_vec2_i32_align(ptr %x) nounwind {
+; CHECK-LABEL: atomic_vec2_i32_align:
+; CHECK:       ## %bb.0:
+; CHECK-NEXT:    movq (%rdi), %rax
+; CHECK-NEXT:    movq %rax, %xmm0
+; CHECK-NEXT:    retq
+  %ret = load atomic <2 x i32>, ptr %x acquire, align 8
+  ret <2 x i32> %ret
+}
+
 define <1 x ptr> @atomic_vec1_ptr(ptr %x) nounwind {
 ; CHECK3-LABEL: atomic_vec1_ptr:
 ; CHECK3:       ## %bb.0:

Copy link

github-actions bot commented Dec 19, 2024

⚠️ C/C++ code formatter, clang-format found issues in your code. ⚠️

You can test this locally with the following command:
git-clang-format --diff 03400f8c3f2d0eca529d4459e7dec946ea1818ea bc6c3559b910632c59f0c0c07de0031469465e35 --extensions cpp,h -- llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
View the diff from clang-format here.
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index 7eda83fe2b..e38b41e090 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -6013,7 +6013,7 @@ static std::optional<EVT> findMemType(SelectionDAG &DAG,
 
 SDValue DAGTypeLegalizer::WidenVecRes_ATOMIC_LOAD(AtomicSDNode *LD) {
   EVT WidenVT =
-      TLI.getTypeToTransformTo(*DAG.getContext(),LD->getValueType(0));
+      TLI.getTypeToTransformTo(*DAG.getContext(), LD->getValueType(0));
   EVT LdVT = LD->getMemoryVT();
   SDLoc dl(LD);
   assert(LdVT.isVector() && WidenVT.isVector());
@@ -6049,8 +6049,8 @@ SDValue DAGTypeLegalizer::WidenVecRes_ATOMIC_LOAD(AtomicSDNode *LD) {
                                BasePtr, LD->getMemOperand());
 
   // Load the element with one instruction.
-  SDValue Result = loadElement(LdOp, *FirstVT, WidenVT, LdWidth, FirstVTWidth,
-                               dl);
+  SDValue Result =
+      loadElement(LdOp, *FirstVT, WidenVT, LdWidth, FirstVTWidth, dl);
 
   if (Result) {
     // Modified the chain - switch anything that used the old chain to use

@jofrn jofrn force-pushed the users/jofrn/spr/main/a06a5cc6 branch from 37e162b to f7691a8 Compare December 19, 2024 16:42
@jofrn jofrn force-pushed the users/jofrn/spr/main/2894ccd1 branch 2 times, most recently from cbaaeeb to d558091 Compare December 19, 2024 19:15
@jofrn jofrn force-pushed the users/jofrn/spr/main/a06a5cc6 branch 2 times, most recently from 34492d9 to 6952507 Compare December 19, 2024 19:36
@jofrn jofrn force-pushed the users/jofrn/spr/main/2894ccd1 branch from d558091 to 3f37dfc Compare December 19, 2024 19:36
@jofrn jofrn force-pushed the users/jofrn/spr/main/a06a5cc6 branch from 6952507 to 3cf21cf Compare December 19, 2024 19:43
@jofrn jofrn force-pushed the users/jofrn/spr/main/2894ccd1 branch from 3f37dfc to 89503f2 Compare December 19, 2024 19:43
@jofrn jofrn force-pushed the users/jofrn/spr/main/a06a5cc6 branch from 3cf21cf to a769a32 Compare December 19, 2024 21:28
@jofrn jofrn force-pushed the users/jofrn/spr/main/2894ccd1 branch from 89503f2 to b2f0b33 Compare December 19, 2024 21:28
@jofrn jofrn force-pushed the users/jofrn/spr/main/2894ccd1 branch from b2f0b33 to 6737dda Compare December 19, 2024 21:33
@jofrn jofrn force-pushed the users/jofrn/spr/main/a06a5cc6 branch 2 times, most recently from 464bb05 to ebba505 Compare December 19, 2024 21:59
@jofrn jofrn force-pushed the users/jofrn/spr/main/2894ccd1 branch from 6737dda to 2949391 Compare December 19, 2024 21:59
@jofrn jofrn force-pushed the users/jofrn/spr/main/a06a5cc6 branch from ebba505 to 5bbc421 Compare December 20, 2024 11:25
@jofrn jofrn force-pushed the users/jofrn/spr/main/2894ccd1 branch from 2949391 to 78adf01 Compare December 20, 2024 11:25
@jofrn jofrn force-pushed the users/jofrn/spr/main/a06a5cc6 branch from 5bbc421 to 746725f Compare December 20, 2024 11:39
@jofrn jofrn force-pushed the users/jofrn/spr/main/2894ccd1 branch 2 times, most recently from e07e225 to 04f0fd9 Compare January 15, 2025 13:26
@@ -5907,6 +5910,30 @@ SDValue DAGTypeLegalizer::WidenVecRes_INSERT_VECTOR_ELT(SDNode *N) {
N->getOperand(1), N->getOperand(2));
}

SDValue DAGTypeLegalizer::WidenVecRes_ATOMIC_LOAD(AtomicSDNode *N) {
SmallVector<SDValue, 16> LdChain; // Chain for the series of load
SDValue Result = GenWidenVectorLoads(LdChain, N, /*IsAtomic=*/true);
Copy link
Contributor

@arsenm arsenm Jan 16, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I do not think you should call GenWidenVectorLoads. That has paths where the memory operation will be decomposed. I think you should try just replacing the load with an equivalent width integer, and extracting the parts to conform to the expectations of the type legalization

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Within GenWidenVectorLoads, MemVTs should be empty and so we will generate a single atomic anyway. I've added a check IsAtomic to ensure we go down this path.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'd rather not call it at all. I don't see what it's buying you. None of the complexity in GenWidenVectorLoads applies. You issue the load with a new type and then have type matching code, or otherwise it needs to fail

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Alright. It was just buying a function call instead of duplicating code, but it also makes sense to separate it out.

@jofrn jofrn force-pushed the users/jofrn/spr/main/2894ccd1 branch from 04f0fd9 to 5254725 Compare January 21, 2025 16:58
@jofrn jofrn force-pushed the users/jofrn/spr/main/a06a5cc6 branch 2 times, most recently from 647a59b to b0364ee Compare January 21, 2025 17:13
@jofrn jofrn force-pushed the users/jofrn/spr/main/2894ccd1 branch from 5254725 to a7fb95e Compare January 21, 2025 17:13
@jofrn jofrn force-pushed the users/jofrn/spr/main/a06a5cc6 branch from b0364ee to 6667568 Compare January 21, 2025 17:44
@jofrn jofrn force-pushed the users/jofrn/spr/main/2894ccd1 branch 2 times, most recently from 5124038 to a847ecf Compare January 21, 2025 17:50
@jofrn jofrn force-pushed the users/jofrn/spr/main/a06a5cc6 branch from 6667568 to 2af4d7c Compare January 21, 2025 17:50
@jofrn jofrn force-pushed the users/jofrn/spr/main/2894ccd1 branch from a847ecf to a5d0fdb Compare January 22, 2025 11:05
@jofrn jofrn force-pushed the users/jofrn/spr/main/a06a5cc6 branch from 2af4d7c to 7562f64 Compare January 22, 2025 11:05
@jofrn jofrn force-pushed the users/jofrn/spr/main/2894ccd1 branch from a5d0fdb to ecb5203 Compare January 22, 2025 11:43
@jofrn jofrn force-pushed the users/jofrn/spr/main/a06a5cc6 branch 2 times, most recently from bc9ddb8 to 657d3dc Compare January 22, 2025 17:38
@jofrn jofrn force-pushed the users/jofrn/spr/main/2894ccd1 branch from ecb5203 to 4d505de Compare January 22, 2025 17:38
@jofrn jofrn force-pushed the users/jofrn/spr/main/a06a5cc6 branch from 657d3dc to 86d8d1c Compare January 22, 2025 17:47
@jofrn jofrn force-pushed the users/jofrn/spr/main/2894ccd1 branch from 4d505de to a5afaec Compare January 22, 2025 17:47
SDLoc dl(LD);
assert(LdVT.isVector() && WidenVT.isVector());
assert(LdVT.isScalableVector() == WidenVT.isScalableVector());
assert(LdVT.getVectorElementType() == WidenVT.getVectorElementType());
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

(style) missing assert messages

@jofrn jofrn force-pushed the users/jofrn/spr/main/a06a5cc6 branch from 86d8d1c to 3c692ed Compare January 22, 2025 18:19
@jofrn jofrn force-pushed the users/jofrn/spr/main/2894ccd1 branch from a5afaec to 9186493 Compare January 22, 2025 18:19
Comment on lines 5954 to 6054
// Load the element with one instruction.
SDValue Result;
assert(TypeSize::isKnownLE(LdWidth, FirstVTWidth));
if (!FirstVT->isVector()) {
unsigned NumElts =
WidenWidth.getFixedValue() / FirstVTWidth.getFixedValue();
EVT NewVecVT = EVT::getVectorVT(*DAG.getContext(), *FirstVT, NumElts);
SDValue VecOp = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, NewVecVT, LdOp);
Result = DAG.getNode(ISD::BITCAST, dl, WidenVT, VecOp);
}
else if (FirstVT == WidenVT)
Result = LdOp;
else {
// TODO: We don't currently have any tests that exercise this code path.
assert(WidenWidth.getFixedValue() % FirstVTWidth.getFixedValue() == 0);
unsigned NumConcat =
WidenWidth.getFixedValue() / FirstVTWidth.getFixedValue();
SmallVector<SDValue, 16> ConcatOps(NumConcat);
SDValue UndefVal = DAG.getUNDEF(*FirstVT);
ConcatOps[0] = LdOp;
for (unsigned i = 1; i != NumConcat; ++i)
ConcatOps[i] = UndefVal;
Result = DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT, ConcatOps);
}

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The type coercion code is the sharable part that could be extracted into a helper function

@jofrn jofrn changed the base branch from users/jofrn/spr/main/a06a5cc6 to main February 2, 2025 20:25
@jofrn jofrn force-pushed the users/jofrn/spr/main/2894ccd1 branch from 9186493 to 4df72a2 Compare February 2, 2025 20:25
@jofrn jofrn changed the base branch from main to users/jofrn/spr/main/a06a5cc6 February 2, 2025 20:25
Vector types of 2 elements must be widened. This change does this
for vector types of atomic load in SelectionDAG
so that it can translate aligned vectors of >1 size. Also,
it also adds Pats to remove an extra MOV.

commit-id:2894ccd1
@jofrn jofrn force-pushed the users/jofrn/spr/main/2894ccd1 branch from 4df72a2 to bc6c355 Compare March 3, 2025 23:26
@jofrn jofrn force-pushed the users/jofrn/spr/main/a06a5cc6 branch from 05e7afd to 03400f8 Compare March 3, 2025 23:27
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
backend:X86 llvm:SelectionDAG SelectionDAGISel as well
Projects
None yet
Development

Successfully merging this pull request may close these issues.

4 participants