-
Notifications
You must be signed in to change notification settings - Fork 13.2k
[X86] Add atomic vector tests for unaligned >1 sizes. #120387
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: users/jofrn/spr/main/f9d761c5
Are you sure you want to change the base?
Conversation
@llvm/pr-subscribers-backend-x86 Author: None (jofrn) ChangesAtomic vectors with size >1 are lowered to calls. Adding Stack:
Full diff: https://github.com/llvm/llvm-project/pull/120387.diff 1 Files Affected:
diff --git a/llvm/test/CodeGen/X86/atomic-load-store.ll b/llvm/test/CodeGen/X86/atomic-load-store.ll
index 2bde0d2ffd06ad..435e58bef6642d 100644
--- a/llvm/test/CodeGen/X86/atomic-load-store.ll
+++ b/llvm/test/CodeGen/X86/atomic-load-store.ll
@@ -110,3 +110,226 @@ define <1 x bfloat> @atomic_vec1_bfloat(ptr %x) {
ret <1 x bfloat> %ret
}
+define <1 x i64> @atomic_vec1_i64(ptr %x) {
+; CHECK-LABEL: atomic_vec1_i64:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: pushq %rax
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: movq %rdi, %rsi
+; CHECK-NEXT: movq %rsp, %rdx
+; CHECK-NEXT: movl $8, %edi
+; CHECK-NEXT: movl $2, %ecx
+; CHECK-NEXT: callq ___atomic_load
+; CHECK-NEXT: movq (%rsp), %rax
+; CHECK-NEXT: popq %rcx
+; CHECK-NEXT: retq
+;
+; CHECK0-LABEL: atomic_vec1_i64:
+; CHECK0: ## %bb.0:
+; CHECK0-NEXT: pushq %rax
+; CHECK0-NEXT: .cfi_def_cfa_offset 16
+; CHECK0-NEXT: movq %rdi, %rsi
+; CHECK0-NEXT: movl $8, %edi
+; CHECK0-NEXT: movq %rsp, %rdx
+; CHECK0-NEXT: movl $2, %ecx
+; CHECK0-NEXT: callq ___atomic_load
+; CHECK0-NEXT: movq (%rsp), %rax
+; CHECK0-NEXT: popq %rcx
+; CHECK0-NEXT: retq
+ %ret = load atomic <1 x i64>, ptr %x acquire, align 4
+ ret <1 x i64> %ret
+}
+
+define <1 x double> @atomic_vec1_double(ptr %x) {
+; CHECK-LABEL: atomic_vec1_double:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: pushq %rax
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: movq %rdi, %rsi
+; CHECK-NEXT: movq %rsp, %rdx
+; CHECK-NEXT: movl $8, %edi
+; CHECK-NEXT: movl $2, %ecx
+; CHECK-NEXT: callq ___atomic_load
+; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
+; CHECK-NEXT: popq %rax
+; CHECK-NEXT: retq
+;
+; CHECK0-LABEL: atomic_vec1_double:
+; CHECK0: ## %bb.0:
+; CHECK0-NEXT: pushq %rax
+; CHECK0-NEXT: .cfi_def_cfa_offset 16
+; CHECK0-NEXT: movq %rdi, %rsi
+; CHECK0-NEXT: movl $8, %edi
+; CHECK0-NEXT: movq %rsp, %rdx
+; CHECK0-NEXT: movl $2, %ecx
+; CHECK0-NEXT: callq ___atomic_load
+; CHECK0-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
+; CHECK0-NEXT: popq %rax
+; CHECK0-NEXT: retq
+ %ret = load atomic <1 x double>, ptr %x acquire, align 4
+ ret <1 x double> %ret
+}
+
+define <2 x i32> @atomic_vec2_i32(ptr %x) {
+; CHECK-LABEL: atomic_vec2_i32:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: pushq %rax
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: movq %rdi, %rsi
+; CHECK-NEXT: movq %rsp, %rdx
+; CHECK-NEXT: movl $8, %edi
+; CHECK-NEXT: movl $2, %ecx
+; CHECK-NEXT: callq ___atomic_load
+; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
+; CHECK-NEXT: popq %rax
+; CHECK-NEXT: retq
+;
+; CHECK0-LABEL: atomic_vec2_i32:
+; CHECK0: ## %bb.0:
+; CHECK0-NEXT: pushq %rax
+; CHECK0-NEXT: .cfi_def_cfa_offset 16
+; CHECK0-NEXT: movq %rdi, %rsi
+; CHECK0-NEXT: movl $8, %edi
+; CHECK0-NEXT: movq %rsp, %rdx
+; CHECK0-NEXT: movl $2, %ecx
+; CHECK0-NEXT: callq ___atomic_load
+; CHECK0-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
+; CHECK0-NEXT: popq %rax
+; CHECK0-NEXT: retq
+ %ret = load atomic <2 x i32>, ptr %x acquire, align 4
+ ret <2 x i32> %ret
+}
+
+define <4 x float> @atomic_vec4_float(ptr %x) {
+; CHECK-LABEL: atomic_vec4_float:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: subq $24, %rsp
+; CHECK-NEXT: .cfi_def_cfa_offset 32
+; CHECK-NEXT: movq %rdi, %rsi
+; CHECK-NEXT: movq %rsp, %rdx
+; CHECK-NEXT: movl $16, %edi
+; CHECK-NEXT: movl $2, %ecx
+; CHECK-NEXT: callq ___atomic_load
+; CHECK-NEXT: movaps (%rsp), %xmm0
+; CHECK-NEXT: addq $24, %rsp
+; CHECK-NEXT: retq
+;
+; CHECK0-LABEL: atomic_vec4_float:
+; CHECK0: ## %bb.0:
+; CHECK0-NEXT: subq $24, %rsp
+; CHECK0-NEXT: .cfi_def_cfa_offset 32
+; CHECK0-NEXT: movq %rdi, %rsi
+; CHECK0-NEXT: movl $16, %edi
+; CHECK0-NEXT: movq %rsp, %rdx
+; CHECK0-NEXT: movl $2, %ecx
+; CHECK0-NEXT: callq ___atomic_load
+; CHECK0-NEXT: movaps (%rsp), %xmm0
+; CHECK0-NEXT: addq $24, %rsp
+; CHECK0-NEXT: retq
+ %ret = load atomic <4 x float>, ptr %x acquire, align 4
+ ret <4 x float> %ret
+}
+
+define <8 x double> @atomic_vec8_double(ptr %x) {
+; CHECK-LABEL: atomic_vec8_double:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: subq $72, %rsp
+; CHECK-NEXT: .cfi_def_cfa_offset 80
+; CHECK-NEXT: movq %rdi, %rsi
+; CHECK-NEXT: movq %rsp, %rdx
+; CHECK-NEXT: movl $64, %edi
+; CHECK-NEXT: movl $2, %ecx
+; CHECK-NEXT: callq ___atomic_load
+; CHECK-NEXT: movaps (%rsp), %xmm0
+; CHECK-NEXT: movaps {{[0-9]+}}(%rsp), %xmm1
+; CHECK-NEXT: movaps {{[0-9]+}}(%rsp), %xmm2
+; CHECK-NEXT: movaps {{[0-9]+}}(%rsp), %xmm3
+; CHECK-NEXT: addq $72, %rsp
+; CHECK-NEXT: retq
+;
+; CHECK0-LABEL: atomic_vec8_double:
+; CHECK0: ## %bb.0:
+; CHECK0-NEXT: subq $72, %rsp
+; CHECK0-NEXT: .cfi_def_cfa_offset 80
+; CHECK0-NEXT: movq %rdi, %rsi
+; CHECK0-NEXT: movl $64, %edi
+; CHECK0-NEXT: movq %rsp, %rdx
+; CHECK0-NEXT: movl $2, %ecx
+; CHECK0-NEXT: callq ___atomic_load
+; CHECK0-NEXT: movapd (%rsp), %xmm0
+; CHECK0-NEXT: movapd {{[0-9]+}}(%rsp), %xmm1
+; CHECK0-NEXT: movapd {{[0-9]+}}(%rsp), %xmm2
+; CHECK0-NEXT: movapd {{[0-9]+}}(%rsp), %xmm3
+; CHECK0-NEXT: addq $72, %rsp
+; CHECK0-NEXT: retq
+ %ret = load atomic <8 x double>, ptr %x acquire, align 4
+ ret <8 x double> %ret
+}
+
+define <16 x bfloat> @atomic_vec16_bfloat(ptr %x) {
+; CHECK-LABEL: atomic_vec16_bfloat:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: subq $40, %rsp
+; CHECK-NEXT: .cfi_def_cfa_offset 48
+; CHECK-NEXT: movq %rdi, %rsi
+; CHECK-NEXT: movq %rsp, %rdx
+; CHECK-NEXT: movl $32, %edi
+; CHECK-NEXT: movl $2, %ecx
+; CHECK-NEXT: callq ___atomic_load
+; CHECK-NEXT: movaps (%rsp), %xmm0
+; CHECK-NEXT: movaps {{[0-9]+}}(%rsp), %xmm1
+; CHECK-NEXT: addq $40, %rsp
+; CHECK-NEXT: retq
+;
+; CHECK0-LABEL: atomic_vec16_bfloat:
+; CHECK0: ## %bb.0:
+; CHECK0-NEXT: subq $40, %rsp
+; CHECK0-NEXT: .cfi_def_cfa_offset 48
+; CHECK0-NEXT: movq %rdi, %rsi
+; CHECK0-NEXT: movl $32, %edi
+; CHECK0-NEXT: movq %rsp, %rdx
+; CHECK0-NEXT: movl $2, %ecx
+; CHECK0-NEXT: callq ___atomic_load
+; CHECK0-NEXT: movaps (%rsp), %xmm0
+; CHECK0-NEXT: movaps {{[0-9]+}}(%rsp), %xmm1
+; CHECK0-NEXT: addq $40, %rsp
+; CHECK0-NEXT: retq
+ %ret = load atomic <16 x bfloat>, ptr %x acquire, align 4
+ ret <16 x bfloat> %ret
+}
+
+define <32 x half> @atomic_vec32_half(ptr %x) {
+; CHECK-LABEL: atomic_vec32_half:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: subq $72, %rsp
+; CHECK-NEXT: .cfi_def_cfa_offset 80
+; CHECK-NEXT: movq %rdi, %rsi
+; CHECK-NEXT: movq %rsp, %rdx
+; CHECK-NEXT: movl $64, %edi
+; CHECK-NEXT: movl $2, %ecx
+; CHECK-NEXT: callq ___atomic_load
+; CHECK-NEXT: movaps (%rsp), %xmm0
+; CHECK-NEXT: movaps {{[0-9]+}}(%rsp), %xmm1
+; CHECK-NEXT: movaps {{[0-9]+}}(%rsp), %xmm2
+; CHECK-NEXT: movaps {{[0-9]+}}(%rsp), %xmm3
+; CHECK-NEXT: addq $72, %rsp
+; CHECK-NEXT: retq
+;
+; CHECK0-LABEL: atomic_vec32_half:
+; CHECK0: ## %bb.0:
+; CHECK0-NEXT: subq $72, %rsp
+; CHECK0-NEXT: .cfi_def_cfa_offset 80
+; CHECK0-NEXT: movq %rdi, %rsi
+; CHECK0-NEXT: movl $64, %edi
+; CHECK0-NEXT: movq %rsp, %rdx
+; CHECK0-NEXT: movl $2, %ecx
+; CHECK0-NEXT: callq ___atomic_load
+; CHECK0-NEXT: movaps (%rsp), %xmm0
+; CHECK0-NEXT: movaps {{[0-9]+}}(%rsp), %xmm1
+; CHECK0-NEXT: movaps {{[0-9]+}}(%rsp), %xmm2
+; CHECK0-NEXT: movaps {{[0-9]+}}(%rsp), %xmm3
+; CHECK0-NEXT: addq $72, %rsp
+; CHECK0-NEXT: retq
+ %ret = load atomic <32 x half>, ptr %x acquire, align 4
+ ret <32 x half> %ret
+}
|
441e75a
to
3773e66
Compare
@@ -110,3 +110,226 @@ define <1 x bfloat> @atomic_vec1_bfloat(ptr %x) { | |||
ret <1 x bfloat> %ret | |||
} | |||
|
|||
define <1 x i64> @atomic_vec1_i64(ptr %x) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
add nounwind to get rid of cfi noise
3773e66
to
813fffe
Compare
141279f
to
70bb5b9
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM - cheers
70bb5b9
to
dac7f1e
Compare
05a76cf
to
bb71e93
Compare
dac7f1e
to
df5e28c
Compare
That's not true; they're only lowered to calls when the alignment is not known to be sufficient (e.g. |
; CHECK0-NEXT: movq (%rsp), %rax | ||
; CHECK0-NEXT: popq %rcx | ||
; CHECK0-NEXT: retq | ||
%ret = load atomic <1 x ptr>, ptr %x acquire, align 4 |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Should test the naturally aligned case, this under-aligned one is a separate test
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Added that one here: https://github.com/llvm/llvm-project/pull/120385/files.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Probably should just add these kinds of cases with that PR. The set of strange under-aligned cases happen to work now, but I'm not sure that's enough reason to separately push them
bb71e93
to
5e8da05
Compare
255a011
to
e3dd939
Compare
5e8da05
to
e71ac05
Compare
; CHECK0-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero | ||
; CHECK0-NEXT: popq %rax | ||
; CHECK0-NEXT: retq | ||
%ret = load atomic <1 x double>, ptr %x acquire, align 4 |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
these are all under aligned
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Added this one here: https://github.com/llvm/llvm-project/pull/120386/files.
e3dd939
to
b336c25
Compare
0564ecb
to
454b8e6
Compare
b336c25
to
7ef2576
Compare
19fb0ba
to
d2b0634
Compare
d2b0634
to
9a7ee21
Compare
84a1672
to
6d84ebe
Compare
b2541fd
to
846ab2e
Compare
6d84ebe
to
647a59b
Compare
846ab2e
to
4698589
Compare
647a59b
to
b0364ee
Compare
4698589
to
e261f40
Compare
b0364ee
to
6667568
Compare
e261f40
to
6f50ac7
Compare
6667568
to
2af4d7c
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Tests are fine but I'd just merge these in with the support commits
2af4d7c
to
7562f64
Compare
8137157
to
a495eaa
Compare
7562f64
to
bc9ddb8
Compare
a495eaa
to
ab42ec1
Compare
657d3dc
to
86d8d1c
Compare
ab42ec1
to
6ac3c17
Compare
86d8d1c
to
3c692ed
Compare
6ac3c17
to
03a726d
Compare
3c692ed
to
05e7afd
Compare
Unaligned atomic vectors with size >1 are lowered to calls. Adding their tests separately here. commit-id:a06a5cc6
05e7afd
to
03400f8
Compare
f096c88
to
edd2af8
Compare
Unaligned atomic vectors with size >1 are lowered to calls.
Adding their tests separately here.
Stack: