-
Notifications
You must be signed in to change notification settings - Fork 14.5k
[X86] Cast atomic vectors in IR to support floats #142320
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: users/jofrn/spr/main/45989503
Are you sure you want to change the base?
Conversation
@llvm/pr-subscribers-backend-x86 Author: None (jofrn) ChangesThis commit casts floats to ints in an atomic load during AtomicExpand to support Stack:
Full diff: https://github.com/llvm/llvm-project/pull/142320.diff 3 Files Affected:
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 776d3c0a42e2f..3debf30da0a29 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -32070,6 +32070,13 @@ X86TargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
}
}
+TargetLowering::AtomicExpansionKind
+X86TargetLowering::shouldCastAtomicLoadInIR(LoadInst *LI) const {
+ if (LI->getType()->getScalarType()->isFloatingPointTy())
+ return AtomicExpansionKind::CastToInteger;
+ return AtomicExpansionKind::None;
+}
+
LoadInst *
X86TargetLowering::lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *AI) const {
unsigned NativeWidth = Subtarget.is64Bit() ? 64 : 32;
diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h
index 5cb6b3e493a32..43cddb2b53bd6 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.h
+++ b/llvm/lib/Target/X86/X86ISelLowering.h
@@ -1839,6 +1839,8 @@ namespace llvm {
shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override;
TargetLoweringBase::AtomicExpansionKind
shouldExpandLogicAtomicRMWInIR(AtomicRMWInst *AI) const;
+ TargetLoweringBase::AtomicExpansionKind
+ shouldCastAtomicLoadInIR(LoadInst *LI) const override;
void emitBitTestAtomicRMWIntrinsic(AtomicRMWInst *AI) const override;
void emitCmpArithAtomicRMWIntrinsic(AtomicRMWInst *AI) const override;
diff --git a/llvm/test/CodeGen/X86/atomic-load-store.ll b/llvm/test/CodeGen/X86/atomic-load-store.ll
index 4b818b6cfa57e..039edcbf83544 100644
--- a/llvm/test/CodeGen/X86/atomic-load-store.ll
+++ b/llvm/test/CodeGen/X86/atomic-load-store.ll
@@ -207,19 +207,19 @@ define <1 x bfloat> @atomic_vec1_bfloat(ptr %x) {
; CHECK-O3-LABEL: atomic_vec1_bfloat:
; CHECK-O3: # %bb.0:
; CHECK-O3-NEXT: movzwl (%rdi), %eax
-; CHECK-O3-NEXT: pinsrw $0, %eax, %xmm0
+; CHECK-O3-NEXT: movd %eax, %xmm0
; CHECK-O3-NEXT: retq
;
; CHECK-SSE-O3-LABEL: atomic_vec1_bfloat:
; CHECK-SSE-O3: # %bb.0:
; CHECK-SSE-O3-NEXT: movzwl (%rdi), %eax
-; CHECK-SSE-O3-NEXT: pinsrw $0, %eax, %xmm0
+; CHECK-SSE-O3-NEXT: movd %eax, %xmm0
; CHECK-SSE-O3-NEXT: retq
;
; CHECK-AVX-O3-LABEL: atomic_vec1_bfloat:
; CHECK-AVX-O3: # %bb.0:
; CHECK-AVX-O3-NEXT: movzwl (%rdi), %eax
-; CHECK-AVX-O3-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0
+; CHECK-AVX-O3-NEXT: vmovd %eax, %xmm0
; CHECK-AVX-O3-NEXT: retq
;
; CHECK-O0-LABEL: atomic_vec1_bfloat:
@@ -227,8 +227,7 @@ define <1 x bfloat> @atomic_vec1_bfloat(ptr %x) {
; CHECK-O0-NEXT: movw (%rdi), %cx
; CHECK-O0-NEXT: # implicit-def: $eax
; CHECK-O0-NEXT: movw %cx, %ax
-; CHECK-O0-NEXT: # implicit-def: $xmm0
-; CHECK-O0-NEXT: pinsrw $0, %eax, %xmm0
+; CHECK-O0-NEXT: movd %eax, %xmm0
; CHECK-O0-NEXT: retq
;
; CHECK-SSE-O0-LABEL: atomic_vec1_bfloat:
@@ -236,8 +235,7 @@ define <1 x bfloat> @atomic_vec1_bfloat(ptr %x) {
; CHECK-SSE-O0-NEXT: movw (%rdi), %cx
; CHECK-SSE-O0-NEXT: # implicit-def: $eax
; CHECK-SSE-O0-NEXT: movw %cx, %ax
-; CHECK-SSE-O0-NEXT: # implicit-def: $xmm0
-; CHECK-SSE-O0-NEXT: pinsrw $0, %eax, %xmm0
+; CHECK-SSE-O0-NEXT: movd %eax, %xmm0
; CHECK-SSE-O0-NEXT: retq
;
; CHECK-AVX-O0-LABEL: atomic_vec1_bfloat:
@@ -245,8 +243,7 @@ define <1 x bfloat> @atomic_vec1_bfloat(ptr %x) {
; CHECK-AVX-O0-NEXT: movw (%rdi), %cx
; CHECK-AVX-O0-NEXT: # implicit-def: $eax
; CHECK-AVX-O0-NEXT: movw %cx, %ax
-; CHECK-AVX-O0-NEXT: # implicit-def: $xmm0
-; CHECK-AVX-O0-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0
+; CHECK-AVX-O0-NEXT: vmovd %eax, %xmm0
; CHECK-AVX-O0-NEXT: retq
%ret = load atomic <1 x bfloat>, ptr %x acquire, align 2
ret <1 x bfloat> %ret
@@ -377,6 +374,74 @@ define <2 x float> @atomic_vec2_float_align(ptr %x) {
ret <2 x float> %ret
}
+define <2 x half> @atomic_vec2_half(ptr %x) {
+; CHECK-O3-LABEL: atomic_vec2_half:
+; CHECK-O3: # %bb.0:
+; CHECK-O3-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; CHECK-O3-NEXT: retq
+;
+; CHECK-SSE-O3-LABEL: atomic_vec2_half:
+; CHECK-SSE-O3: # %bb.0:
+; CHECK-SSE-O3-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; CHECK-SSE-O3-NEXT: retq
+;
+; CHECK-AVX-O3-LABEL: atomic_vec2_half:
+; CHECK-AVX-O3: # %bb.0:
+; CHECK-AVX-O3-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; CHECK-AVX-O3-NEXT: retq
+;
+; CHECK-O0-LABEL: atomic_vec2_half:
+; CHECK-O0: # %bb.0:
+; CHECK-O0-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; CHECK-O0-NEXT: retq
+;
+; CHECK-SSE-O0-LABEL: atomic_vec2_half:
+; CHECK-SSE-O0: # %bb.0:
+; CHECK-SSE-O0-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; CHECK-SSE-O0-NEXT: retq
+;
+; CHECK-AVX-O0-LABEL: atomic_vec2_half:
+; CHECK-AVX-O0: # %bb.0:
+; CHECK-AVX-O0-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; CHECK-AVX-O0-NEXT: retq
+ %ret = load atomic <2 x half>, ptr %x acquire, align 4
+ ret <2 x half> %ret
+}
+
+define <2 x bfloat> @atomic_vec2_bfloat(ptr %x) {
+; CHECK-O3-LABEL: atomic_vec2_bfloat:
+; CHECK-O3: # %bb.0:
+; CHECK-O3-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; CHECK-O3-NEXT: retq
+;
+; CHECK-SSE-O3-LABEL: atomic_vec2_bfloat:
+; CHECK-SSE-O3: # %bb.0:
+; CHECK-SSE-O3-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; CHECK-SSE-O3-NEXT: retq
+;
+; CHECK-AVX-O3-LABEL: atomic_vec2_bfloat:
+; CHECK-AVX-O3: # %bb.0:
+; CHECK-AVX-O3-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; CHECK-AVX-O3-NEXT: retq
+;
+; CHECK-O0-LABEL: atomic_vec2_bfloat:
+; CHECK-O0: # %bb.0:
+; CHECK-O0-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; CHECK-O0-NEXT: retq
+;
+; CHECK-SSE-O0-LABEL: atomic_vec2_bfloat:
+; CHECK-SSE-O0: # %bb.0:
+; CHECK-SSE-O0-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; CHECK-SSE-O0-NEXT: retq
+;
+; CHECK-AVX-O0-LABEL: atomic_vec2_bfloat:
+; CHECK-AVX-O0: # %bb.0:
+; CHECK-AVX-O0-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; CHECK-AVX-O0-NEXT: retq
+ %ret = load atomic <2 x bfloat>, ptr %x acquire, align 4
+ ret <2 x bfloat> %ret
+}
+
define <1 x ptr> @atomic_vec1_ptr(ptr %x) nounwind {
; CHECK-O3-LABEL: atomic_vec1_ptr:
; CHECK-O3: # %bb.0:
@@ -457,19 +522,19 @@ define <1 x half> @atomic_vec1_half(ptr %x) {
; CHECK-O3-LABEL: atomic_vec1_half:
; CHECK-O3: # %bb.0:
; CHECK-O3-NEXT: movzwl (%rdi), %eax
-; CHECK-O3-NEXT: pinsrw $0, %eax, %xmm0
+; CHECK-O3-NEXT: movd %eax, %xmm0
; CHECK-O3-NEXT: retq
;
; CHECK-SSE-O3-LABEL: atomic_vec1_half:
; CHECK-SSE-O3: # %bb.0:
; CHECK-SSE-O3-NEXT: movzwl (%rdi), %eax
-; CHECK-SSE-O3-NEXT: pinsrw $0, %eax, %xmm0
+; CHECK-SSE-O3-NEXT: movd %eax, %xmm0
; CHECK-SSE-O3-NEXT: retq
;
; CHECK-AVX-O3-LABEL: atomic_vec1_half:
; CHECK-AVX-O3: # %bb.0:
; CHECK-AVX-O3-NEXT: movzwl (%rdi), %eax
-; CHECK-AVX-O3-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0
+; CHECK-AVX-O3-NEXT: vmovd %eax, %xmm0
; CHECK-AVX-O3-NEXT: retq
;
; CHECK-O0-LABEL: atomic_vec1_half:
@@ -477,8 +542,7 @@ define <1 x half> @atomic_vec1_half(ptr %x) {
; CHECK-O0-NEXT: movw (%rdi), %cx
; CHECK-O0-NEXT: # implicit-def: $eax
; CHECK-O0-NEXT: movw %cx, %ax
-; CHECK-O0-NEXT: # implicit-def: $xmm0
-; CHECK-O0-NEXT: pinsrw $0, %eax, %xmm0
+; CHECK-O0-NEXT: movd %eax, %xmm0
; CHECK-O0-NEXT: retq
;
; CHECK-SSE-O0-LABEL: atomic_vec1_half:
@@ -486,8 +550,7 @@ define <1 x half> @atomic_vec1_half(ptr %x) {
; CHECK-SSE-O0-NEXT: movw (%rdi), %cx
; CHECK-SSE-O0-NEXT: # implicit-def: $eax
; CHECK-SSE-O0-NEXT: movw %cx, %ax
-; CHECK-SSE-O0-NEXT: # implicit-def: $xmm0
-; CHECK-SSE-O0-NEXT: pinsrw $0, %eax, %xmm0
+; CHECK-SSE-O0-NEXT: movd %eax, %xmm0
; CHECK-SSE-O0-NEXT: retq
;
; CHECK-AVX-O0-LABEL: atomic_vec1_half:
@@ -495,8 +558,7 @@ define <1 x half> @atomic_vec1_half(ptr %x) {
; CHECK-AVX-O0-NEXT: movw (%rdi), %cx
; CHECK-AVX-O0-NEXT: # implicit-def: $eax
; CHECK-AVX-O0-NEXT: movw %cx, %ax
-; CHECK-AVX-O0-NEXT: # implicit-def: $xmm0
-; CHECK-AVX-O0-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0
+; CHECK-AVX-O0-NEXT: vmovd %eax, %xmm0
; CHECK-AVX-O0-NEXT: retq
%ret = load atomic <1 x half>, ptr %x acquire, align 2
ret <1 x half> %ret
@@ -841,6 +903,89 @@ define <4 x i16> @atomic_vec4_i16(ptr %x) nounwind {
ret <4 x i16> %ret
}
+define <4 x half> @atomic_vec4_half(ptr %x) nounwind {
+; CHECK-LABEL: atomic_vec4_half:
+; CHECK: # %bb.0:
+; CHECK-NEXT: movq (%rdi), %xmm0
+; CHECK-NEXT: retq
+ %ret = load atomic <4 x half>, ptr %x acquire, align 8
+ ret <4 x half> %ret
+}
+
+define <4 x bfloat> @atomic_vec4_bfloat(ptr %x) nounwind {
+; CHECK-LABEL: atomic_vec4_bfloat:
+; CHECK: # %bb.0:
+; CHECK-NEXT: movq (%rdi), %xmm0
+; CHECK-NEXT: retq
+ %ret = load atomic <4 x bfloat>, ptr %x acquire, align 8
+ ret <4 x bfloat> %ret
+}
+
+define <4 x float> @atomic_vec4_float_align(ptr %x) nounwind {
+; CHECK-O3-LABEL: atomic_vec4_float_align:
+; CHECK-O3: # %bb.0:
+; CHECK-O3-NEXT: pushq %rax
+; CHECK-O3-NEXT: movl $2, %esi
+; CHECK-O3-NEXT: callq __atomic_load_16@PLT
+; CHECK-O3-NEXT: movq %rdx, %xmm1
+; CHECK-O3-NEXT: movq %rax, %xmm0
+; CHECK-O3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; CHECK-O3-NEXT: popq %rax
+; CHECK-O3-NEXT: retq
+;
+; CHECK-SSE-O3-LABEL: atomic_vec4_float_align:
+; CHECK-SSE-O3: # %bb.0:
+; CHECK-SSE-O3-NEXT: pushq %rbx
+; CHECK-SSE-O3-NEXT: xorl %eax, %eax
+; CHECK-SSE-O3-NEXT: xorl %edx, %edx
+; CHECK-SSE-O3-NEXT: xorl %ecx, %ecx
+; CHECK-SSE-O3-NEXT: xorl %ebx, %ebx
+; CHECK-SSE-O3-NEXT: lock cmpxchg16b (%rdi)
+; CHECK-SSE-O3-NEXT: movq %rdx, %xmm1
+; CHECK-SSE-O3-NEXT: movq %rax, %xmm0
+; CHECK-SSE-O3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; CHECK-SSE-O3-NEXT: popq %rbx
+; CHECK-SSE-O3-NEXT: retq
+;
+; CHECK-AVX-O3-LABEL: atomic_vec4_float_align:
+; CHECK-AVX-O3: # %bb.0:
+; CHECK-AVX-O3-NEXT: vmovaps (%rdi), %xmm0
+; CHECK-AVX-O3-NEXT: retq
+;
+; CHECK-O0-LABEL: atomic_vec4_float_align:
+; CHECK-O0: # %bb.0:
+; CHECK-O0-NEXT: pushq %rax
+; CHECK-O0-NEXT: movl $2, %esi
+; CHECK-O0-NEXT: callq __atomic_load_16@PLT
+; CHECK-O0-NEXT: movq %rdx, %xmm1
+; CHECK-O0-NEXT: movq %rax, %xmm0
+; CHECK-O0-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; CHECK-O0-NEXT: popq %rax
+; CHECK-O0-NEXT: retq
+;
+; CHECK-SSE-O0-LABEL: atomic_vec4_float_align:
+; CHECK-SSE-O0: # %bb.0:
+; CHECK-SSE-O0-NEXT: pushq %rbx
+; CHECK-SSE-O0-NEXT: xorl %eax, %eax
+; CHECK-SSE-O0-NEXT: movl %eax, %ebx
+; CHECK-SSE-O0-NEXT: movq %rbx, %rax
+; CHECK-SSE-O0-NEXT: movq %rbx, %rdx
+; CHECK-SSE-O0-NEXT: movq %rbx, %rcx
+; CHECK-SSE-O0-NEXT: lock cmpxchg16b (%rdi)
+; CHECK-SSE-O0-NEXT: movq %rdx, %xmm1
+; CHECK-SSE-O0-NEXT: movq %rax, %xmm0
+; CHECK-SSE-O0-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; CHECK-SSE-O0-NEXT: popq %rbx
+; CHECK-SSE-O0-NEXT: retq
+;
+; CHECK-AVX-O0-LABEL: atomic_vec4_float_align:
+; CHECK-AVX-O0: # %bb.0:
+; CHECK-AVX-O0-NEXT: vmovaps (%rdi), %xmm0
+; CHECK-AVX-O0-NEXT: retq
+ %ret = load atomic <4 x float>, ptr %x acquire, align 16
+ ret <4 x float> %ret
+}
+
define <4 x float> @atomic_vec4_float(ptr %x) nounwind {
; CHECK-O3-LABEL: atomic_vec4_float:
; CHECK-O3: # %bb.0:
|
This commit casts floats to ints in an atomic load during AtomicExpand to support floating point types. It also is required to support 128 bit vectors in SSE/AVX. commit-id:80b9b6a7
d074cd3
to
6eb10b5
Compare
21475ae
to
b2b23bf
Compare
TargetLowering::AtomicExpansionKind | ||
X86TargetLowering::shouldCastAtomicLoadInIR(LoadInst *LI) const { | ||
if (LI->getType()->getScalarType()->isFloatingPointTy()) | ||
return AtomicExpansionKind::CastToInteger; | ||
return AtomicExpansionKind::None; | ||
} | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We want to get away from using the IR expansion
This commit casts floats to ints in an atomic load during AtomicExpand to support
floating point types. It also is required to support 128 bit vectors in SSE/AVX.
Stack: