Skip to content

[X86] Add atomic vector tests for unaligned >1 sizes. #120387

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: users/jofrn/spr/main/f9d761c5
Choose a base branch
from

Conversation

@llvmbot
Copy link
Member

llvmbot commented Dec 18, 2024

@llvm/pr-subscribers-backend-x86

Author: None (jofrn)

Changes

Atomic vectors with size >1 are lowered to calls. Adding
their tests separately here.


Stack:

  • #120387 ⬅
  • #120386
  • #120385
  • #120384

⚠️ Part of a stack created by spr. Do not merge manually using the UI - doing so may have unexpected results.


Full diff: https://github.com/llvm/llvm-project/pull/120387.diff

1 Files Affected:

  • (modified) llvm/test/CodeGen/X86/atomic-load-store.ll (+223)
diff --git a/llvm/test/CodeGen/X86/atomic-load-store.ll b/llvm/test/CodeGen/X86/atomic-load-store.ll
index 2bde0d2ffd06ad..435e58bef6642d 100644
--- a/llvm/test/CodeGen/X86/atomic-load-store.ll
+++ b/llvm/test/CodeGen/X86/atomic-load-store.ll
@@ -110,3 +110,226 @@ define <1 x bfloat> @atomic_vec1_bfloat(ptr %x) {
   ret <1 x bfloat> %ret
 }
 
+define <1 x i64> @atomic_vec1_i64(ptr %x) {
+; CHECK-LABEL: atomic_vec1_i64:
+; CHECK:       ## %bb.0:
+; CHECK-NEXT:    pushq %rax
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    movq %rdi, %rsi
+; CHECK-NEXT:    movq %rsp, %rdx
+; CHECK-NEXT:    movl $8, %edi
+; CHECK-NEXT:    movl $2, %ecx
+; CHECK-NEXT:    callq ___atomic_load
+; CHECK-NEXT:    movq (%rsp), %rax
+; CHECK-NEXT:    popq %rcx
+; CHECK-NEXT:    retq
+;
+; CHECK0-LABEL: atomic_vec1_i64:
+; CHECK0:       ## %bb.0:
+; CHECK0-NEXT:    pushq %rax
+; CHECK0-NEXT:    .cfi_def_cfa_offset 16
+; CHECK0-NEXT:    movq %rdi, %rsi
+; CHECK0-NEXT:    movl $8, %edi
+; CHECK0-NEXT:    movq %rsp, %rdx
+; CHECK0-NEXT:    movl $2, %ecx
+; CHECK0-NEXT:    callq ___atomic_load
+; CHECK0-NEXT:    movq (%rsp), %rax
+; CHECK0-NEXT:    popq %rcx
+; CHECK0-NEXT:    retq
+  %ret = load atomic <1 x i64>, ptr %x acquire, align 4
+  ret <1 x i64> %ret
+}
+
+define <1 x double> @atomic_vec1_double(ptr %x) {
+; CHECK-LABEL: atomic_vec1_double:
+; CHECK:       ## %bb.0:
+; CHECK-NEXT:    pushq %rax
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    movq %rdi, %rsi
+; CHECK-NEXT:    movq %rsp, %rdx
+; CHECK-NEXT:    movl $8, %edi
+; CHECK-NEXT:    movl $2, %ecx
+; CHECK-NEXT:    callq ___atomic_load
+; CHECK-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
+; CHECK-NEXT:    popq %rax
+; CHECK-NEXT:    retq
+;
+; CHECK0-LABEL: atomic_vec1_double:
+; CHECK0:       ## %bb.0:
+; CHECK0-NEXT:    pushq %rax
+; CHECK0-NEXT:    .cfi_def_cfa_offset 16
+; CHECK0-NEXT:    movq %rdi, %rsi
+; CHECK0-NEXT:    movl $8, %edi
+; CHECK0-NEXT:    movq %rsp, %rdx
+; CHECK0-NEXT:    movl $2, %ecx
+; CHECK0-NEXT:    callq ___atomic_load
+; CHECK0-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
+; CHECK0-NEXT:    popq %rax
+; CHECK0-NEXT:    retq
+  %ret = load atomic <1 x double>, ptr %x acquire, align 4
+  ret <1 x double> %ret
+}
+
+define <2 x i32> @atomic_vec2_i32(ptr %x) {
+; CHECK-LABEL: atomic_vec2_i32:
+; CHECK:       ## %bb.0:
+; CHECK-NEXT:    pushq %rax
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    movq %rdi, %rsi
+; CHECK-NEXT:    movq %rsp, %rdx
+; CHECK-NEXT:    movl $8, %edi
+; CHECK-NEXT:    movl $2, %ecx
+; CHECK-NEXT:    callq ___atomic_load
+; CHECK-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
+; CHECK-NEXT:    popq %rax
+; CHECK-NEXT:    retq
+;
+; CHECK0-LABEL: atomic_vec2_i32:
+; CHECK0:       ## %bb.0:
+; CHECK0-NEXT:    pushq %rax
+; CHECK0-NEXT:    .cfi_def_cfa_offset 16
+; CHECK0-NEXT:    movq %rdi, %rsi
+; CHECK0-NEXT:    movl $8, %edi
+; CHECK0-NEXT:    movq %rsp, %rdx
+; CHECK0-NEXT:    movl $2, %ecx
+; CHECK0-NEXT:    callq ___atomic_load
+; CHECK0-NEXT:    movq {{.*#+}} xmm0 = mem[0],zero
+; CHECK0-NEXT:    popq %rax
+; CHECK0-NEXT:    retq
+  %ret = load atomic <2 x i32>, ptr %x acquire, align 4
+  ret <2 x i32> %ret
+}
+
+define <4 x float> @atomic_vec4_float(ptr %x) {
+; CHECK-LABEL: atomic_vec4_float:
+; CHECK:       ## %bb.0:
+; CHECK-NEXT:    subq $24, %rsp
+; CHECK-NEXT:    .cfi_def_cfa_offset 32
+; CHECK-NEXT:    movq %rdi, %rsi
+; CHECK-NEXT:    movq %rsp, %rdx
+; CHECK-NEXT:    movl $16, %edi
+; CHECK-NEXT:    movl $2, %ecx
+; CHECK-NEXT:    callq ___atomic_load
+; CHECK-NEXT:    movaps (%rsp), %xmm0
+; CHECK-NEXT:    addq $24, %rsp
+; CHECK-NEXT:    retq
+;
+; CHECK0-LABEL: atomic_vec4_float:
+; CHECK0:       ## %bb.0:
+; CHECK0-NEXT:    subq $24, %rsp
+; CHECK0-NEXT:    .cfi_def_cfa_offset 32
+; CHECK0-NEXT:    movq %rdi, %rsi
+; CHECK0-NEXT:    movl $16, %edi
+; CHECK0-NEXT:    movq %rsp, %rdx
+; CHECK0-NEXT:    movl $2, %ecx
+; CHECK0-NEXT:    callq ___atomic_load
+; CHECK0-NEXT:    movaps (%rsp), %xmm0
+; CHECK0-NEXT:    addq $24, %rsp
+; CHECK0-NEXT:    retq
+  %ret = load atomic <4 x float>, ptr %x acquire, align 4
+  ret <4 x float> %ret
+}
+
+define <8 x double> @atomic_vec8_double(ptr %x) {
+; CHECK-LABEL: atomic_vec8_double:
+; CHECK:       ## %bb.0:
+; CHECK-NEXT:    subq $72, %rsp
+; CHECK-NEXT:    .cfi_def_cfa_offset 80
+; CHECK-NEXT:    movq %rdi, %rsi
+; CHECK-NEXT:    movq %rsp, %rdx
+; CHECK-NEXT:    movl $64, %edi
+; CHECK-NEXT:    movl $2, %ecx
+; CHECK-NEXT:    callq ___atomic_load
+; CHECK-NEXT:    movaps (%rsp), %xmm0
+; CHECK-NEXT:    movaps {{[0-9]+}}(%rsp), %xmm1
+; CHECK-NEXT:    movaps {{[0-9]+}}(%rsp), %xmm2
+; CHECK-NEXT:    movaps {{[0-9]+}}(%rsp), %xmm3
+; CHECK-NEXT:    addq $72, %rsp
+; CHECK-NEXT:    retq
+;
+; CHECK0-LABEL: atomic_vec8_double:
+; CHECK0:       ## %bb.0:
+; CHECK0-NEXT:    subq $72, %rsp
+; CHECK0-NEXT:    .cfi_def_cfa_offset 80
+; CHECK0-NEXT:    movq %rdi, %rsi
+; CHECK0-NEXT:    movl $64, %edi
+; CHECK0-NEXT:    movq %rsp, %rdx
+; CHECK0-NEXT:    movl $2, %ecx
+; CHECK0-NEXT:    callq ___atomic_load
+; CHECK0-NEXT:    movapd (%rsp), %xmm0
+; CHECK0-NEXT:    movapd {{[0-9]+}}(%rsp), %xmm1
+; CHECK0-NEXT:    movapd {{[0-9]+}}(%rsp), %xmm2
+; CHECK0-NEXT:    movapd {{[0-9]+}}(%rsp), %xmm3
+; CHECK0-NEXT:    addq $72, %rsp
+; CHECK0-NEXT:    retq
+  %ret = load atomic <8 x double>, ptr %x acquire, align 4
+  ret <8 x double> %ret
+}
+
+define <16 x bfloat> @atomic_vec16_bfloat(ptr %x) {
+; CHECK-LABEL: atomic_vec16_bfloat:
+; CHECK:       ## %bb.0:
+; CHECK-NEXT:    subq $40, %rsp
+; CHECK-NEXT:    .cfi_def_cfa_offset 48
+; CHECK-NEXT:    movq %rdi, %rsi
+; CHECK-NEXT:    movq %rsp, %rdx
+; CHECK-NEXT:    movl $32, %edi
+; CHECK-NEXT:    movl $2, %ecx
+; CHECK-NEXT:    callq ___atomic_load
+; CHECK-NEXT:    movaps (%rsp), %xmm0
+; CHECK-NEXT:    movaps {{[0-9]+}}(%rsp), %xmm1
+; CHECK-NEXT:    addq $40, %rsp
+; CHECK-NEXT:    retq
+;
+; CHECK0-LABEL: atomic_vec16_bfloat:
+; CHECK0:       ## %bb.0:
+; CHECK0-NEXT:    subq $40, %rsp
+; CHECK0-NEXT:    .cfi_def_cfa_offset 48
+; CHECK0-NEXT:    movq %rdi, %rsi
+; CHECK0-NEXT:    movl $32, %edi
+; CHECK0-NEXT:    movq %rsp, %rdx
+; CHECK0-NEXT:    movl $2, %ecx
+; CHECK0-NEXT:    callq ___atomic_load
+; CHECK0-NEXT:    movaps (%rsp), %xmm0
+; CHECK0-NEXT:    movaps {{[0-9]+}}(%rsp), %xmm1
+; CHECK0-NEXT:    addq $40, %rsp
+; CHECK0-NEXT:    retq
+  %ret = load atomic <16 x bfloat>, ptr %x acquire, align 4
+  ret <16 x bfloat> %ret
+}
+
+define <32 x half> @atomic_vec32_half(ptr %x) {
+; CHECK-LABEL: atomic_vec32_half:
+; CHECK:       ## %bb.0:
+; CHECK-NEXT:    subq $72, %rsp
+; CHECK-NEXT:    .cfi_def_cfa_offset 80
+; CHECK-NEXT:    movq %rdi, %rsi
+; CHECK-NEXT:    movq %rsp, %rdx
+; CHECK-NEXT:    movl $64, %edi
+; CHECK-NEXT:    movl $2, %ecx
+; CHECK-NEXT:    callq ___atomic_load
+; CHECK-NEXT:    movaps (%rsp), %xmm0
+; CHECK-NEXT:    movaps {{[0-9]+}}(%rsp), %xmm1
+; CHECK-NEXT:    movaps {{[0-9]+}}(%rsp), %xmm2
+; CHECK-NEXT:    movaps {{[0-9]+}}(%rsp), %xmm3
+; CHECK-NEXT:    addq $72, %rsp
+; CHECK-NEXT:    retq
+;
+; CHECK0-LABEL: atomic_vec32_half:
+; CHECK0:       ## %bb.0:
+; CHECK0-NEXT:    subq $72, %rsp
+; CHECK0-NEXT:    .cfi_def_cfa_offset 80
+; CHECK0-NEXT:    movq %rdi, %rsi
+; CHECK0-NEXT:    movl $64, %edi
+; CHECK0-NEXT:    movq %rsp, %rdx
+; CHECK0-NEXT:    movl $2, %ecx
+; CHECK0-NEXT:    callq ___atomic_load
+; CHECK0-NEXT:    movaps (%rsp), %xmm0
+; CHECK0-NEXT:    movaps {{[0-9]+}}(%rsp), %xmm1
+; CHECK0-NEXT:    movaps {{[0-9]+}}(%rsp), %xmm2
+; CHECK0-NEXT:    movaps {{[0-9]+}}(%rsp), %xmm3
+; CHECK0-NEXT:    addq $72, %rsp
+; CHECK0-NEXT:    retq
+  %ret = load atomic <32 x half>, ptr %x acquire, align 4
+  ret <32 x half> %ret
+}

@jofrn jofrn force-pushed the users/jofrn/spr/main/a06a5cc6 branch from 441e75a to 3773e66 Compare December 18, 2024 08:54
@@ -110,3 +110,226 @@ define <1 x bfloat> @atomic_vec1_bfloat(ptr %x) {
ret <1 x bfloat> %ret
}

define <1 x i64> @atomic_vec1_i64(ptr %x) {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

add nounwind to get rid of cfi noise

@jofrn jofrn force-pushed the users/jofrn/spr/main/a06a5cc6 branch from 3773e66 to 813fffe Compare December 18, 2024 11:45
@jofrn jofrn force-pushed the users/jofrn/spr/main/f9d761c5 branch from 141279f to 70bb5b9 Compare December 18, 2024 11:45
Copy link
Collaborator

@RKSimon RKSimon left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LGTM - cheers

@jofrn jofrn force-pushed the users/jofrn/spr/main/f9d761c5 branch from 70bb5b9 to dac7f1e Compare December 18, 2024 19:11
@jofrn jofrn force-pushed the users/jofrn/spr/main/a06a5cc6 branch 2 times, most recently from 05a76cf to bb71e93 Compare December 18, 2024 20:47
@jofrn jofrn force-pushed the users/jofrn/spr/main/f9d761c5 branch from dac7f1e to df5e28c Compare December 18, 2024 20:47
@jyknight
Copy link
Member

Atomic vectors with size >1 are lowered to calls.

That's not true; they're only lowered to calls when the alignment is not known to be sufficient (e.g. <2 x i32> must have align 8, not align 4).

; CHECK0-NEXT: movq (%rsp), %rax
; CHECK0-NEXT: popq %rcx
; CHECK0-NEXT: retq
%ret = load atomic <1 x ptr>, ptr %x acquire, align 4
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should test the naturally aligned case, this under-aligned one is a separate test

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Probably should just add these kinds of cases with that PR. The set of strange under-aligned cases happen to work now, but I'm not sure that's enough reason to separately push them

@jofrn jofrn force-pushed the users/jofrn/spr/main/a06a5cc6 branch from bb71e93 to 5e8da05 Compare December 19, 2024 02:29
@jofrn jofrn force-pushed the users/jofrn/spr/main/f9d761c5 branch 2 times, most recently from 255a011 to e3dd939 Compare December 19, 2024 02:31
@jofrn jofrn force-pushed the users/jofrn/spr/main/a06a5cc6 branch from 5e8da05 to e71ac05 Compare December 19, 2024 02:31
; CHECK0-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; CHECK0-NEXT: popq %rax
; CHECK0-NEXT: retq
%ret = load atomic <1 x double>, ptr %x acquire, align 4
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

these are all under aligned

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@jofrn jofrn changed the title [X86] Add atomic vector tests for >1 sizes. [X86] Add atomic vector tests for unaligned >1 sizes. Dec 19, 2024
@jofrn jofrn force-pushed the users/jofrn/spr/main/f9d761c5 branch from e3dd939 to b336c25 Compare December 19, 2024 13:16
@jofrn jofrn force-pushed the users/jofrn/spr/main/a06a5cc6 branch 2 times, most recently from 0564ecb to 454b8e6 Compare December 19, 2024 16:01
@jofrn jofrn force-pushed the users/jofrn/spr/main/f9d761c5 branch from b336c25 to 7ef2576 Compare December 19, 2024 16:01
@jofrn jofrn force-pushed the users/jofrn/spr/main/f9d761c5 branch from 19fb0ba to d2b0634 Compare January 7, 2025 15:31
@jofrn jofrn force-pushed the users/jofrn/spr/main/f9d761c5 branch from d2b0634 to 9a7ee21 Compare January 15, 2025 11:52
@jofrn jofrn force-pushed the users/jofrn/spr/main/a06a5cc6 branch 2 times, most recently from 84a1672 to 6d84ebe Compare January 15, 2025 13:26
@jofrn jofrn force-pushed the users/jofrn/spr/main/f9d761c5 branch from b2541fd to 846ab2e Compare January 21, 2025 16:58
@jofrn jofrn force-pushed the users/jofrn/spr/main/a06a5cc6 branch from 6d84ebe to 647a59b Compare January 21, 2025 16:58
@jofrn jofrn force-pushed the users/jofrn/spr/main/f9d761c5 branch from 846ab2e to 4698589 Compare January 21, 2025 17:13
@jofrn jofrn force-pushed the users/jofrn/spr/main/a06a5cc6 branch from 647a59b to b0364ee Compare January 21, 2025 17:13
@jofrn jofrn force-pushed the users/jofrn/spr/main/f9d761c5 branch from 4698589 to e261f40 Compare January 21, 2025 17:44
@jofrn jofrn force-pushed the users/jofrn/spr/main/a06a5cc6 branch from b0364ee to 6667568 Compare January 21, 2025 17:44
@jofrn jofrn force-pushed the users/jofrn/spr/main/f9d761c5 branch from e261f40 to 6f50ac7 Compare January 21, 2025 17:50
@jofrn jofrn force-pushed the users/jofrn/spr/main/a06a5cc6 branch from 6667568 to 2af4d7c Compare January 21, 2025 17:50
Copy link
Contributor

@arsenm arsenm left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Tests are fine but I'd just merge these in with the support commits

@jofrn jofrn force-pushed the users/jofrn/spr/main/a06a5cc6 branch from 2af4d7c to 7562f64 Compare January 22, 2025 11:05
@jofrn jofrn force-pushed the users/jofrn/spr/main/f9d761c5 branch 2 times, most recently from 8137157 to a495eaa Compare January 22, 2025 11:43
@jofrn jofrn force-pushed the users/jofrn/spr/main/a06a5cc6 branch from 7562f64 to bc9ddb8 Compare January 22, 2025 11:43
@jofrn jofrn force-pushed the users/jofrn/spr/main/f9d761c5 branch from a495eaa to ab42ec1 Compare January 22, 2025 17:38
@jofrn jofrn force-pushed the users/jofrn/spr/main/a06a5cc6 branch 2 times, most recently from 657d3dc to 86d8d1c Compare January 22, 2025 17:47
@jofrn jofrn force-pushed the users/jofrn/spr/main/f9d761c5 branch from ab42ec1 to 6ac3c17 Compare January 22, 2025 17:47
@jofrn jofrn force-pushed the users/jofrn/spr/main/a06a5cc6 branch from 86d8d1c to 3c692ed Compare January 22, 2025 18:19
@jofrn jofrn force-pushed the users/jofrn/spr/main/f9d761c5 branch from 6ac3c17 to 03a726d Compare January 22, 2025 18:19
@jofrn jofrn changed the base branch from users/jofrn/spr/main/f9d761c5 to main February 2, 2025 20:25
@jofrn jofrn force-pushed the users/jofrn/spr/main/a06a5cc6 branch from 3c692ed to 05e7afd Compare February 2, 2025 20:25
@jofrn jofrn changed the base branch from main to users/jofrn/spr/main/f9d761c5 February 2, 2025 20:25
Unaligned atomic vectors with size >1 are lowered to calls.
Adding their tests separately here.

commit-id:a06a5cc6
@jofrn jofrn force-pushed the users/jofrn/spr/main/a06a5cc6 branch from 05e7afd to 03400f8 Compare March 3, 2025 23:26
@jofrn jofrn force-pushed the users/jofrn/spr/main/f9d761c5 branch from f096c88 to edd2af8 Compare March 3, 2025 23:27
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

Successfully merging this pull request may close these issues.

5 participants