Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[AtomicExpand] Add bitcasts when expanding load atomic vector #120716

Open
wants to merge 1 commit into
base: users/jofrn/spr/main/b83937a8
Choose a base branch
from

Conversation

jofrn
Copy link
Contributor

@jofrn jofrn commented Dec 20, 2024

@llvmbot
Copy link
Member

llvmbot commented Dec 20, 2024

@llvm/pr-subscribers-llvm-transforms
@llvm/pr-subscribers-backend-x86

@llvm/pr-subscribers-backend-arm

Author: None (jofrn)

Changes

AtomicExpand fails for aligned load atomic <n x T> because it
does not find a compatible library call. This change marks load atomics
to not use sized calls and instead resort to using ___atomic_load.


Stack:

  • #120716 ⬅
  • #120640
  • #120598
  • #120387
  • #120386
  • #120385
  • #120384

⚠️ Part of a stack created by spr. Do not merge manually using the UI - doing so may have unexpected results.


Full diff: https://github.com/llvm/llvm-project/pull/120716.diff

3 Files Affected:

  • (modified) llvm/lib/CodeGen/AtomicExpandPass.cpp (+3-1)
  • (modified) llvm/test/CodeGen/ARM/atomic-load-store.ll (+57)
  • (modified) llvm/test/CodeGen/X86/atomic-load-store.ll (+51-10)
diff --git a/llvm/lib/CodeGen/AtomicExpandPass.cpp b/llvm/lib/CodeGen/AtomicExpandPass.cpp
index a75fa688d87a8d..d860bbf2685ed2 100644
--- a/llvm/lib/CodeGen/AtomicExpandPass.cpp
+++ b/llvm/lib/CodeGen/AtomicExpandPass.cpp
@@ -1884,7 +1884,9 @@ bool AtomicExpandImpl::expandAtomicOpToLibcall(
   IRBuilder<> Builder(I);
   IRBuilder<> AllocaBuilder(&I->getFunction()->getEntryBlock().front());
 
-  bool UseSizedLibcall = canUseSizedAtomicCall(Size, Alignment, DL);
+  const bool IsAtomic = isa<LoadInst>(I) ? cast<LoadInst>(I)->isAtomic() : false;
+  const bool UseSizedLibcall = !(I->getType()->isVectorTy() && IsAtomic) &&
+      canUseSizedAtomicCall(Size, Alignment, DL);
   Type *SizedIntTy = Type::getIntNTy(Ctx, Size * 8);
 
   const Align AllocaAlignment = DL.getPrefTypeAlign(SizedIntTy);
diff --git a/llvm/test/CodeGen/ARM/atomic-load-store.ll b/llvm/test/CodeGen/ARM/atomic-load-store.ll
index 560dfde356c29d..7f0f3008d2d5c2 100644
--- a/llvm/test/CodeGen/ARM/atomic-load-store.ll
+++ b/llvm/test/CodeGen/ARM/atomic-load-store.ll
@@ -983,3 +983,60 @@ define void @store_atomic_f64__seq_cst(ptr %ptr, double %val1) {
   store atomic double %val1, ptr %ptr seq_cst, align 8
   ret void
 }
+
+define <1 x ptr> @atomic_vec1_ptr(ptr %x) #0 {
+; ARM-LABEL: atomic_vec1_ptr:
+; ARM:       @ %bb.0:
+; ARM-NEXT:    ldr r0, [r0]
+; ARM-NEXT:    dmb ish
+; ARM-NEXT:    bx lr
+;
+; ARMOPTNONE-LABEL: atomic_vec1_ptr:
+; ARMOPTNONE:       @ %bb.0:
+; ARMOPTNONE-NEXT:    ldr r0, [r0]
+; ARMOPTNONE-NEXT:    dmb ish
+; ARMOPTNONE-NEXT:    bx lr
+;
+; THUMBTWO-LABEL: atomic_vec1_ptr:
+; THUMBTWO:       @ %bb.0:
+; THUMBTWO-NEXT:    ldr r0, [r0]
+; THUMBTWO-NEXT:    dmb ish
+; THUMBTWO-NEXT:    bx lr
+;
+; THUMBONE-LABEL: atomic_vec1_ptr:
+; THUMBONE:       @ %bb.0:
+; THUMBONE-NEXT:    push {r7, lr}
+; THUMBONE-NEXT:    movs r1, #0
+; THUMBONE-NEXT:    mov r2, r1
+; THUMBONE-NEXT:    bl __sync_val_compare_and_swap_4
+; THUMBONE-NEXT:    pop {r7, pc}
+;
+; ARMV4-LABEL: atomic_vec1_ptr:
+; ARMV4:       @ %bb.0:
+; ARMV4-NEXT:    push {r11, lr}
+; ARMV4-NEXT:    sub sp, sp, #8
+; ARMV4-NEXT:    add r2, sp, #4
+; ARMV4-NEXT:    mov r1, r0
+; ARMV4-NEXT:    mov r0, #4
+; ARMV4-NEXT:    mov r3, #2
+; ARMV4-NEXT:    bl __atomic_load
+; ARMV4-NEXT:    ldr r0, [sp, #4]
+; ARMV4-NEXT:    add sp, sp, #8
+; ARMV4-NEXT:    pop {r11, lr}
+; ARMV4-NEXT:    mov pc, lr
+;
+; ARMV6-LABEL: atomic_vec1_ptr:
+; ARMV6:       @ %bb.0:
+; ARMV6-NEXT:    ldr r0, [r0]
+; ARMV6-NEXT:    mov r1, #0
+; ARMV6-NEXT:    mcr p15, #0, r1, c7, c10, #5
+; ARMV6-NEXT:    bx lr
+;
+; THUMBM-LABEL: atomic_vec1_ptr:
+; THUMBM:       @ %bb.0:
+; THUMBM-NEXT:    ldr r0, [r0]
+; THUMBM-NEXT:    dmb sy
+; THUMBM-NEXT:    bx lr
+  %ret = load atomic <1 x ptr>, ptr %x acquire, align 4
+  ret <1 x ptr> %ret
+}
diff --git a/llvm/test/CodeGen/X86/atomic-load-store.ll b/llvm/test/CodeGen/X86/atomic-load-store.ll
index 3157e25a3eee20..e92d9ab4add1bc 100644
--- a/llvm/test/CodeGen/X86/atomic-load-store.ll
+++ b/llvm/test/CodeGen/X86/atomic-load-store.ll
@@ -399,17 +399,58 @@ define <2 x i32> @atomic_vec2_i32(ptr %x) nounwind {
   ret <2 x i32> %ret
 }
 
+define <2 x ptr> @atomic_vec2_ptr_align(ptr %x) nounwind {
+; CHECK3-LABEL: atomic_vec2_ptr_align:
+; CHECK3:       ## %bb.0:
+; CHECK3-NEXT:    subq $24, %rsp
+; CHECK3-NEXT:    movq %rdi, %rsi
+; CHECK3-NEXT:    movq %rsp, %rdx
+; CHECK3-NEXT:    movl $16, %edi
+; CHECK3-NEXT:    movl $2, %ecx
+; CHECK3-NEXT:    callq ___atomic_load
+; CHECK3-NEXT:    movaps (%rsp), %xmm0
+; CHECK3-NEXT:    addq $24, %rsp
+; CHECK3-NEXT:    retq
+;
+; CHECK0-LABEL: atomic_vec2_ptr_align:
+; CHECK0:       ## %bb.0:
+; CHECK0-NEXT:    subq $24, %rsp
+; CHECK0-NEXT:    movq %rdi, %rsi
+; CHECK0-NEXT:    movl $16, %edi
+; CHECK0-NEXT:    movq %rsp, %rdx
+; CHECK0-NEXT:    movl $2, %ecx
+; CHECK0-NEXT:    callq ___atomic_load
+; CHECK0-NEXT:    movdqa (%rsp), %xmm0
+; CHECK0-NEXT:    addq $24, %rsp
+; CHECK0-NEXT:    retq
+  %ret = load atomic <2 x ptr>, ptr %x acquire, align 16
+  ret <2 x ptr> %ret
+}
+
 define <4 x float> @atomic_vec4_float_align(ptr %x) nounwind {
-; CHECK-LABEL: atomic_vec4_float_align:
-; CHECK:       ## %bb.0:
-; CHECK-NEXT:    pushq %rax
-; CHECK-NEXT:    movl $2, %esi
-; CHECK-NEXT:    callq ___atomic_load_16
-; CHECK-NEXT:    movq %rdx, %xmm1
-; CHECK-NEXT:    movq %rax, %xmm0
-; CHECK-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
-; CHECK-NEXT:    popq %rax
-; CHECK-NEXT:    retq
+; CHECK3-LABEL: atomic_vec4_float_align:
+; CHECK3:       ## %bb.0:
+; CHECK3-NEXT:    subq $24, %rsp
+; CHECK3-NEXT:    movq %rdi, %rsi
+; CHECK3-NEXT:    movq %rsp, %rdx
+; CHECK3-NEXT:    movl $16, %edi
+; CHECK3-NEXT:    movl $2, %ecx
+; CHECK3-NEXT:    callq ___atomic_load
+; CHECK3-NEXT:    movaps (%rsp), %xmm0
+; CHECK3-NEXT:    addq $24, %rsp
+; CHECK3-NEXT:    retq
+;
+; CHECK0-LABEL: atomic_vec4_float_align:
+; CHECK0:       ## %bb.0:
+; CHECK0-NEXT:    subq $24, %rsp
+; CHECK0-NEXT:    movq %rdi, %rsi
+; CHECK0-NEXT:    movl $16, %edi
+; CHECK0-NEXT:    movq %rsp, %rdx
+; CHECK0-NEXT:    movl $2, %ecx
+; CHECK0-NEXT:    callq ___atomic_load
+; CHECK0-NEXT:    movaps (%rsp), %xmm0
+; CHECK0-NEXT:    addq $24, %rsp
+; CHECK0-NEXT:    retq
   %ret = load atomic <4 x float>, ptr %x acquire, align 16
   ret <4 x float> %ret
 }

Copy link

github-actions bot commented Dec 20, 2024

✅ With the latest revision this PR passed the C/C++ code formatter.

@jofrn jofrn force-pushed the users/jofrn/spr/main/3a045357 branch from 761d4d9 to 6506acb Compare December 20, 2024 11:39
@jofrn jofrn force-pushed the users/jofrn/spr/main/f430c1af branch 2 times, most recently from 241b7c7 to 1a9eae9 Compare December 20, 2024 11:52
@jofrn jofrn force-pushed the users/jofrn/spr/main/3a045357 branch from 6506acb to db674f8 Compare December 20, 2024 11:52
bool UseSizedLibcall = canUseSizedAtomicCall(Size, Alignment, DL);
const bool IsAtomic =
isa<LoadInst>(I) ? cast<LoadInst>(I)->isAtomic() : false;
const bool UseSizedLibcall = !(I->getType()->isVectorTy() && IsAtomic) &&
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This shouldn't need to consider whether it's atomic or the type. The sized call should work fine. Some casts may be necessary

@jofrn jofrn changed the title [AtomicExpand] Avoid sized call when expanding load atomic vector [AtomicExpand] Add bitcasts when expanding load atomic vector Jan 2, 2025
@jofrn jofrn force-pushed the users/jofrn/spr/main/3a045357 branch from db674f8 to 13ea377 Compare January 2, 2025 19:21
@jofrn jofrn force-pushed the users/jofrn/spr/main/f430c1af branch 2 times, most recently from 61ec1ef to dc2032f Compare January 2, 2025 20:03
@jofrn jofrn force-pushed the users/jofrn/spr/main/3a045357 branch from 13ea377 to e11194d Compare January 2, 2025 20:45
@jofrn jofrn force-pushed the users/jofrn/spr/main/f430c1af branch 2 times, most recently from 45e7035 to dd2c034 Compare January 6, 2025 19:25
@jofrn jofrn force-pushed the users/jofrn/spr/main/f430c1af branch 2 times, most recently from 751b5eb to 20bbd6e Compare January 7, 2025 15:31
@jofrn jofrn force-pushed the users/jofrn/spr/main/3a045357 branch from 0cb57c2 to ba2a301 Compare January 7, 2025 15:31
;
%ret = load atomic <2 x ptr>, ptr %x acquire, align 16
ret <2 x ptr> %ret
}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Check addrspace(270) vector. Also FP and int cases?

@jofrn jofrn force-pushed the users/jofrn/spr/main/3a045357 branch from ba2a301 to f129a3c Compare January 15, 2025 11:52
@jofrn jofrn force-pushed the users/jofrn/spr/main/f430c1af branch 3 times, most recently from 3696420 to 9fde3b5 Compare January 21, 2025 16:58
@jofrn jofrn force-pushed the users/jofrn/spr/main/3a045357 branch from bec6d57 to b36e103 Compare January 21, 2025 16:58
@jofrn jofrn force-pushed the users/jofrn/spr/main/f430c1af branch from 9fde3b5 to 4205f3f Compare January 21, 2025 17:13
@jofrn jofrn force-pushed the users/jofrn/spr/main/3a045357 branch 2 times, most recently from 1ac2df7 to 064b5a9 Compare January 21, 2025 17:44
@jofrn jofrn force-pushed the users/jofrn/spr/main/f430c1af branch 2 times, most recently from 30bd2fc to f4d372c Compare January 21, 2025 17:50
@jofrn jofrn force-pushed the users/jofrn/spr/main/3a045357 branch from 064b5a9 to a14a025 Compare January 21, 2025 17:50
I->getType()->isVectorTy() && !Result->getType()->isVectorTy()) {
TypeSize Size = Result->getType()->getPrimitiveSizeInBits();
assert((unsigned)Size % 2 == 0);
unsigned HalfSize = (unsigned)Size / 2;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

bitcast i128 to <2 x ptr> and then inttoptr <2 x i64> to <2 x ptr>?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hi, yes. Getting around to doing so. Thank you.

@jofrn jofrn force-pushed the users/jofrn/spr/main/3a045357 branch from a14a025 to 65ac272 Compare January 22, 2025 11:05
@jofrn jofrn force-pushed the users/jofrn/spr/main/f430c1af branch from f4d372c to 48963d8 Compare January 22, 2025 11:05
@jofrn jofrn force-pushed the users/jofrn/spr/main/3a045357 branch from 65ac272 to db0706b Compare January 22, 2025 11:43
@jofrn jofrn force-pushed the users/jofrn/spr/main/f430c1af branch from 48963d8 to 62a7386 Compare January 22, 2025 11:43
@jofrn jofrn force-pushed the users/jofrn/spr/main/3a045357 branch from db0706b to d70bc21 Compare January 22, 2025 17:38
@jofrn jofrn force-pushed the users/jofrn/spr/main/f430c1af branch 2 times, most recently from 45ccff2 to b46ebe3 Compare January 22, 2025 17:47
@jofrn jofrn force-pushed the users/jofrn/spr/main/3a045357 branch from d70bc21 to 7cb68e7 Compare January 22, 2025 17:47
@jofrn jofrn force-pushed the users/jofrn/spr/main/f430c1af branch from b46ebe3 to f208701 Compare January 22, 2025 18:19
@jofrn jofrn force-pushed the users/jofrn/spr/main/3a045357 branch from 7cb68e7 to dca36ef Compare January 22, 2025 18:19
@jofrn jofrn changed the base branch from users/jofrn/spr/main/3a045357 to main February 2, 2025 20:25
@jofrn jofrn force-pushed the users/jofrn/spr/main/f430c1af branch from f208701 to ceafc2d Compare February 2, 2025 20:25
@jofrn jofrn changed the base branch from main to users/jofrn/spr/main/b83937a8 February 2, 2025 20:25
@jofrn jofrn force-pushed the users/jofrn/spr/main/b83937a8 branch from fbe99a2 to 1c10a4c Compare February 2, 2025 20:43
@jofrn jofrn force-pushed the users/jofrn/spr/main/f430c1af branch from ceafc2d to d1a877b Compare February 2, 2025 20:43
AtomicExpand fails for aligned `load atomic <n x T>` because it
does not find a compatible library call. This change adds appropriate
bitcasts so that the call can be lowered.

commit-id:f430c1af
@jofrn jofrn force-pushed the users/jofrn/spr/main/f430c1af branch from d1a877b to 8dc96d0 Compare March 3, 2025 23:26
@jofrn jofrn force-pushed the users/jofrn/spr/main/b83937a8 branch from 1c10a4c to d7d7c3b Compare March 3, 2025 23:26
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

Successfully merging this pull request may close these issues.

4 participants