Skip to content

[X86] Manage atomic load of fp -> int promotion in DAG #118793

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 10 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions llvm/docs/LangRef.rst
Original file line number Diff line number Diff line change
Expand Up @@ -10956,8 +10956,8 @@ If the ``load`` is marked as ``atomic``, it takes an extra :ref:`ordering
<ordering>` and optional ``syncscope("<target-scope>")`` argument. The
``release`` and ``acq_rel`` orderings are not valid on ``load`` instructions.
Atomic loads produce :ref:`defined <memmodel>` results when they may see
multiple atomic stores. The type of the pointee must be an integer, pointer, or
floating-point type whose bit width is a power of two greater than or equal to
multiple atomic stores. The type of the pointee must be an integer, pointer,
floating-point, or vector type whose bit width is a power of two greater than or equal to
eight and less than or equal to a target-specific size limit. ``align`` must be
explicitly specified on atomic loads. Note: if the alignment is not greater or
equal to the size of the `<value>` type, the atomic operation is likely to
Expand Down Expand Up @@ -11097,8 +11097,8 @@ If the ``store`` is marked as ``atomic``, it takes an extra :ref:`ordering
<ordering>` and optional ``syncscope("<target-scope>")`` argument. The
``acquire`` and ``acq_rel`` orderings aren't valid on ``store`` instructions.
Atomic loads produce :ref:`defined <memmodel>` results when they may see
multiple atomic stores. The type of the pointee must be an integer, pointer, or
floating-point type whose bit width is a power of two greater than or equal to
multiple atomic stores. The type of the pointee must be an integer, pointer,
floating-point, or vector type whose bit width is a power of two greater than or equal to
eight and less than or equal to a target-specific size limit. ``align`` must be
explicitly specified on atomic stores. Note: if the alignment is not greater or
equal to the size of the `<value>` type, the atomic operation is likely to
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
Original file line number Diff line number Diff line change
Expand Up @@ -860,6 +860,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer {
SDValue ScalarizeVecRes_ExpOp(SDNode *N);
SDValue ScalarizeVecRes_INSERT_VECTOR_ELT(SDNode *N);
SDValue ScalarizeVecRes_LOAD(LoadSDNode *N);
SDValue ScalarizeVecRes_ATOMIC_LOAD(AtomicSDNode *N);
SDValue ScalarizeVecRes_SCALAR_TO_VECTOR(SDNode *N);
SDValue ScalarizeVecRes_VSELECT(SDNode *N);
SDValue ScalarizeVecRes_SELECT(SDNode *N);
Expand Down
16 changes: 16 additions & 0 deletions llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,9 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) {
case ISD::FP_ROUND: R = ScalarizeVecRes_FP_ROUND(N); break;
case ISD::FPOWI: R = ScalarizeVecRes_ExpOp(N); break;
case ISD::INSERT_VECTOR_ELT: R = ScalarizeVecRes_INSERT_VECTOR_ELT(N); break;
case ISD::ATOMIC_LOAD:
R = ScalarizeVecRes_ATOMIC_LOAD(cast<AtomicSDNode>(N));
break;
case ISD::LOAD: R = ScalarizeVecRes_LOAD(cast<LoadSDNode>(N));break;
case ISD::SCALAR_TO_VECTOR: R = ScalarizeVecRes_SCALAR_TO_VECTOR(N); break;
case ISD::SIGN_EXTEND_INREG: R = ScalarizeVecRes_InregOp(N); break;
Expand Down Expand Up @@ -451,6 +454,19 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_INSERT_VECTOR_ELT(SDNode *N) {
return Op;
}

SDValue DAGTypeLegalizer::ScalarizeVecRes_ATOMIC_LOAD(AtomicSDNode *N) {

SDValue Result = DAG.getAtomic(
ISD::ATOMIC_LOAD, SDLoc(N), N->getMemoryVT().getVectorElementType(),
N->getValueType(0).getVectorElementType(), N->getChain(), N->getBasePtr(),
N->getMemOperand());

// Legalize the chain result - switch anything that used the old chain to
// use the new one.
ReplaceValueWith(SDValue(N, 1), Result.getValue(1));
return Result;
}

SDValue DAGTypeLegalizer::ScalarizeVecRes_LOAD(LoadSDNode *N) {
assert(N->isUnindexed() && "Indexed vector load?");

Expand Down
14 changes: 8 additions & 6 deletions llvm/lib/IR/Verifier.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4255,9 +4255,10 @@ void Verifier::visitLoadInst(LoadInst &LI) {
Check(LI.getOrdering() != AtomicOrdering::Release &&
LI.getOrdering() != AtomicOrdering::AcquireRelease,
"Load cannot have Release ordering", &LI);
Check(ElTy->isIntOrPtrTy() || ElTy->isFloatingPointTy(),
"atomic load operand must have integer, pointer, or floating point "
"type!",
Check(ElTy->getScalarType()->isIntOrPtrTy() ||
ElTy->getScalarType()->isFloatingPointTy(),
"atomic load operand must have integer, pointer, floating point, "
"or vector type!",
ElTy, &LI);
checkAtomicMemAccessSize(ElTy, &LI);
} else {
Expand All @@ -4281,9 +4282,10 @@ void Verifier::visitStoreInst(StoreInst &SI) {
Check(SI.getOrdering() != AtomicOrdering::Acquire &&
SI.getOrdering() != AtomicOrdering::AcquireRelease,
"Store cannot have Acquire ordering", &SI);
Check(ElTy->isIntOrPtrTy() || ElTy->isFloatingPointTy(),
"atomic store operand must have integer, pointer, or floating point "
"type!",
Check(ElTy->getScalarType()->isIntOrPtrTy() ||
ElTy->getScalarType()->isFloatingPointTy(),
"atomic store operand must have integer, pointer, floating point, "
"or vector type!",
ElTy, &SI);
checkAtomicMemAccessSize(ElTy, &SI);
} else {
Expand Down
4 changes: 4 additions & 0 deletions llvm/lib/Target/X86/X86ISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2589,6 +2589,10 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(Op, MVT::f32, Promote);
}

setOperationPromotedToType(ISD::ATOMIC_LOAD, MVT::f16, MVT::i16);
setOperationPromotedToType(ISD::ATOMIC_LOAD, MVT::f32, MVT::i32);
setOperationPromotedToType(ISD::ATOMIC_LOAD, MVT::f64, MVT::i64);
Comment on lines +2592 to +2594
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is unreachable unless you touch shouldCastAtomicLoadInIR since the default will coerce this in IR

Copy link
Contributor Author

@jofrn jofrn Dec 5, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It is reachable during DAG to DAG translation. After scalarization, we promote:

  1. v1f32,ch = AtomicLoad<(load acquire (s32) from %ir.x)> t0, t2
  2. f32,ch = AtomicLoad<(load acquire (s32) from %ir.x)> t0, t2 // scalarize
  3. i32,ch = AtomicLoad<(load acquire (s32) from %ir.x)> t0, t2 // cast

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh, so this patch really just fixes the 1 x FP vector case... that's a weird edge case. So the description is now differently inaccurate. I guess it's fine to split them, but follow up should make shouldCastAtomicLoadInIR return none


// We have target-specific dag combine patterns for the following nodes:
setTargetDAGCombine({ISD::VECTOR_SHUFFLE,
ISD::SCALAR_TO_VECTOR,
Expand Down
9 changes: 9 additions & 0 deletions llvm/test/Assembler/atomic.ll
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,15 @@ define void @f(ptr %x) {
; CHECK: atomicrmw volatile usub_sat ptr %x, i32 10 syncscope("agent") monotonic
atomicrmw volatile usub_sat ptr %x, i32 10 syncscope("agent") monotonic

; CHECK : load atomic <1 x i32>, ptr %x unordered, align 4
load atomic <1 x i32>, ptr %x unordered, align 4
; CHECK : store atomic <1 x i32> splat (i32 3), ptr %x release, align 4
store atomic <1 x i32> <i32 3>, ptr %x release, align 4
; CHECK : load atomic <2 x i32>, ptr %x unordered, align 4
load atomic <2 x i32>, ptr %x unordered, align 4
; CHECK : store atomic <2 x i32> <i32 3, i32 4>, ptr %x release, align 4
store atomic <2 x i32> <i32 3, i32 4>, ptr %x release, align 4

; CHECK: fence syncscope("singlethread") release
fence syncscope("singlethread") release
; CHECK: fence seq_cst
Expand Down
84 changes: 83 additions & 1 deletion llvm/test/CodeGen/X86/atomic-load-store.ll
Original file line number Diff line number Diff line change
@@ -1,12 +1,17 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-apple-macosx10.7.0 -verify-machineinstrs | FileCheck %s
; RUN: llc < %s -mtriple=x86_64-apple-macosx10.7.0 -verify-machineinstrs -O0 | FileCheck %s
; RUN: llc < %s -mtriple=x86_64-apple-macosx10.7.0 -verify-machineinstrs -O0 | FileCheck %s --check-prefix=CHECK0
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Reduce check duplication:

; RUN: llc < %s -mtriple=x86_64-apple-macosx10.7.0 -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK,CHECK3
; RUN: llc < %s -mtriple=x86_64-apple-macosx10.7.0 -verify-machineinstrs -O0 | FileCheck %s --check-prefixes=CHECK,CHECK0


define void @test1(ptr %ptr, i32 %val1) {
; CHECK-LABEL: test1:
; CHECK: ## %bb.0:
; CHECK-NEXT: xchgl %esi, (%rdi)
; CHECK-NEXT: retq
;
; CHECK0-LABEL: test1:
; CHECK0: ## %bb.0:
; CHECK0-NEXT: xchgl %esi, (%rdi)
; CHECK0-NEXT: retq
store atomic i32 %val1, ptr %ptr seq_cst, align 4
ret void
}
Expand All @@ -16,6 +21,11 @@ define void @test2(ptr %ptr, i32 %val1) {
; CHECK: ## %bb.0:
; CHECK-NEXT: movl %esi, (%rdi)
; CHECK-NEXT: retq
;
; CHECK0-LABEL: test2:
; CHECK0: ## %bb.0:
; CHECK0-NEXT: movl %esi, (%rdi)
; CHECK0-NEXT: retq
store atomic i32 %val1, ptr %ptr release, align 4
ret void
}
Expand All @@ -25,6 +35,78 @@ define i32 @test3(ptr %ptr) {
; CHECK: ## %bb.0:
; CHECK-NEXT: movl (%rdi), %eax
; CHECK-NEXT: retq
;
; CHECK0-LABEL: test3:
; CHECK0: ## %bb.0:
; CHECK0-NEXT: movl (%rdi), %eax
; CHECK0-NEXT: retq
%val = load atomic i32, ptr %ptr seq_cst, align 4
ret i32 %val
}

define <1 x i32> @atomic_vec1_i32(ptr %x) {
; CHECK-LABEL: atomic_vec1_i32:
; CHECK: ## %bb.0:
; CHECK-NEXT: movl (%rdi), %eax
; CHECK-NEXT: retq
;
; CHECK0-LABEL: atomic_vec1_i32:
; CHECK0: ## %bb.0:
; CHECK0-NEXT: movl (%rdi), %eax
; CHECK0-NEXT: retq
%ret = load atomic <1 x i32>, ptr %x acquire, align 4
ret <1 x i32> %ret
}

define <1 x half> @atomic_vec1_half(ptr %x) {
; CHECK-LABEL: atomic_vec1_half:
; CHECK: ## %bb.0:
; CHECK-NEXT: movzwl (%rdi), %eax
; CHECK-NEXT: pinsrw $0, %eax, %xmm0
; CHECK-NEXT: retq
;
; CHECK0-LABEL: atomic_vec1_half:
; CHECK0: ## %bb.0:
; CHECK0-NEXT: movw (%rdi), %cx
; CHECK0-NEXT: ## implicit-def: $eax
; CHECK0-NEXT: movw %cx, %ax
; CHECK0-NEXT: ## implicit-def: $xmm0
; CHECK0-NEXT: pinsrw $0, %eax, %xmm0
; CHECK0-NEXT: retq
%ret = load atomic <1 x half>, ptr %x acquire, align 4
ret <1 x half> %ret
}

define <1 x float> @atomic_vec1_float(ptr %x) {
; CHECK-LABEL: atomic_vec1_float:
; CHECK: ## %bb.0:
; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; CHECK-NEXT: retq
;
; CHECK0-LABEL: atomic_vec1_float:
; CHECK0: ## %bb.0:
; CHECK0-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; CHECK0-NEXT: retq
%ret = load atomic <1 x float>, ptr %x acquire, align 4
ret <1 x float> %ret
}

define <1 x bfloat> @atomic_vec1_bfloat(ptr %x) {
; CHECK-LABEL: atomic_vec1_bfloat:
; CHECK: ## %bb.0:
; CHECK-NEXT: movzwl (%rdi), %eax
; CHECK-NEXT: pinsrw $0, %eax, %xmm0
; CHECK-NEXT: retq
;
; CHECK0-LABEL: atomic_vec1_bfloat:
; CHECK0: ## %bb.0:
; CHECK0-NEXT: movw (%rdi), %cx
; CHECK0-NEXT: ## implicit-def: $eax
; CHECK0-NEXT: movw %cx, %ax
; CHECK0-NEXT: ## implicit-def: $xmm0
; CHECK0-NEXT: pinsrw $0, %eax, %xmm0
; CHECK0-NEXT: retq
%ret = load atomic <1 x bfloat>, ptr %x acquire, align 4
ret <1 x bfloat> %ret
}

15 changes: 8 additions & 7 deletions llvm/test/Verifier/atomics.ll
Original file line number Diff line number Diff line change
@@ -1,14 +1,15 @@
; RUN: not opt -passes=verify < %s 2>&1 | FileCheck %s
; CHECK: atomic store operand must have integer, pointer, floating point, or vector type!
; CHECK: atomic load operand must have integer, pointer, floating point, or vector type!

; CHECK: atomic store operand must have integer, pointer, or floating point type!
; CHECK: atomic load operand must have integer, pointer, or floating point type!
%ty = type { i32 };

define void @foo(ptr %P, <1 x i64> %v) {
store atomic <1 x i64> %v, ptr %P unordered, align 8
define void @foo(ptr %P, %ty %v) {
store atomic %ty %v, ptr %P unordered, align 8
ret void
}

define <1 x i64> @bar(ptr %P) {
%v = load atomic <1 x i64>, ptr %P unordered, align 8
ret <1 x i64> %v
define %ty @bar(ptr %P) {
%v = load atomic %ty, ptr %P unordered, align 8
ret %ty %v
}
Loading