-
Notifications
You must be signed in to change notification settings - Fork 13.1k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[AtomicExpand] Add bitcasts when expanding load atomic vector #120716
base: users/jofrn/spr/main/b83937a8
Are you sure you want to change the base?
Conversation
@llvm/pr-subscribers-llvm-transforms @llvm/pr-subscribers-backend-arm Author: None (jofrn) ChangesAtomicExpand fails for aligned Stack:
Full diff: https://github.com/llvm/llvm-project/pull/120716.diff 3 Files Affected:
diff --git a/llvm/lib/CodeGen/AtomicExpandPass.cpp b/llvm/lib/CodeGen/AtomicExpandPass.cpp
index a75fa688d87a8d..d860bbf2685ed2 100644
--- a/llvm/lib/CodeGen/AtomicExpandPass.cpp
+++ b/llvm/lib/CodeGen/AtomicExpandPass.cpp
@@ -1884,7 +1884,9 @@ bool AtomicExpandImpl::expandAtomicOpToLibcall(
IRBuilder<> Builder(I);
IRBuilder<> AllocaBuilder(&I->getFunction()->getEntryBlock().front());
- bool UseSizedLibcall = canUseSizedAtomicCall(Size, Alignment, DL);
+ const bool IsAtomic = isa<LoadInst>(I) ? cast<LoadInst>(I)->isAtomic() : false;
+ const bool UseSizedLibcall = !(I->getType()->isVectorTy() && IsAtomic) &&
+ canUseSizedAtomicCall(Size, Alignment, DL);
Type *SizedIntTy = Type::getIntNTy(Ctx, Size * 8);
const Align AllocaAlignment = DL.getPrefTypeAlign(SizedIntTy);
diff --git a/llvm/test/CodeGen/ARM/atomic-load-store.ll b/llvm/test/CodeGen/ARM/atomic-load-store.ll
index 560dfde356c29d..7f0f3008d2d5c2 100644
--- a/llvm/test/CodeGen/ARM/atomic-load-store.ll
+++ b/llvm/test/CodeGen/ARM/atomic-load-store.ll
@@ -983,3 +983,60 @@ define void @store_atomic_f64__seq_cst(ptr %ptr, double %val1) {
store atomic double %val1, ptr %ptr seq_cst, align 8
ret void
}
+
+define <1 x ptr> @atomic_vec1_ptr(ptr %x) #0 {
+; ARM-LABEL: atomic_vec1_ptr:
+; ARM: @ %bb.0:
+; ARM-NEXT: ldr r0, [r0]
+; ARM-NEXT: dmb ish
+; ARM-NEXT: bx lr
+;
+; ARMOPTNONE-LABEL: atomic_vec1_ptr:
+; ARMOPTNONE: @ %bb.0:
+; ARMOPTNONE-NEXT: ldr r0, [r0]
+; ARMOPTNONE-NEXT: dmb ish
+; ARMOPTNONE-NEXT: bx lr
+;
+; THUMBTWO-LABEL: atomic_vec1_ptr:
+; THUMBTWO: @ %bb.0:
+; THUMBTWO-NEXT: ldr r0, [r0]
+; THUMBTWO-NEXT: dmb ish
+; THUMBTWO-NEXT: bx lr
+;
+; THUMBONE-LABEL: atomic_vec1_ptr:
+; THUMBONE: @ %bb.0:
+; THUMBONE-NEXT: push {r7, lr}
+; THUMBONE-NEXT: movs r1, #0
+; THUMBONE-NEXT: mov r2, r1
+; THUMBONE-NEXT: bl __sync_val_compare_and_swap_4
+; THUMBONE-NEXT: pop {r7, pc}
+;
+; ARMV4-LABEL: atomic_vec1_ptr:
+; ARMV4: @ %bb.0:
+; ARMV4-NEXT: push {r11, lr}
+; ARMV4-NEXT: sub sp, sp, #8
+; ARMV4-NEXT: add r2, sp, #4
+; ARMV4-NEXT: mov r1, r0
+; ARMV4-NEXT: mov r0, #4
+; ARMV4-NEXT: mov r3, #2
+; ARMV4-NEXT: bl __atomic_load
+; ARMV4-NEXT: ldr r0, [sp, #4]
+; ARMV4-NEXT: add sp, sp, #8
+; ARMV4-NEXT: pop {r11, lr}
+; ARMV4-NEXT: mov pc, lr
+;
+; ARMV6-LABEL: atomic_vec1_ptr:
+; ARMV6: @ %bb.0:
+; ARMV6-NEXT: ldr r0, [r0]
+; ARMV6-NEXT: mov r1, #0
+; ARMV6-NEXT: mcr p15, #0, r1, c7, c10, #5
+; ARMV6-NEXT: bx lr
+;
+; THUMBM-LABEL: atomic_vec1_ptr:
+; THUMBM: @ %bb.0:
+; THUMBM-NEXT: ldr r0, [r0]
+; THUMBM-NEXT: dmb sy
+; THUMBM-NEXT: bx lr
+ %ret = load atomic <1 x ptr>, ptr %x acquire, align 4
+ ret <1 x ptr> %ret
+}
diff --git a/llvm/test/CodeGen/X86/atomic-load-store.ll b/llvm/test/CodeGen/X86/atomic-load-store.ll
index 3157e25a3eee20..e92d9ab4add1bc 100644
--- a/llvm/test/CodeGen/X86/atomic-load-store.ll
+++ b/llvm/test/CodeGen/X86/atomic-load-store.ll
@@ -399,17 +399,58 @@ define <2 x i32> @atomic_vec2_i32(ptr %x) nounwind {
ret <2 x i32> %ret
}
+define <2 x ptr> @atomic_vec2_ptr_align(ptr %x) nounwind {
+; CHECK3-LABEL: atomic_vec2_ptr_align:
+; CHECK3: ## %bb.0:
+; CHECK3-NEXT: subq $24, %rsp
+; CHECK3-NEXT: movq %rdi, %rsi
+; CHECK3-NEXT: movq %rsp, %rdx
+; CHECK3-NEXT: movl $16, %edi
+; CHECK3-NEXT: movl $2, %ecx
+; CHECK3-NEXT: callq ___atomic_load
+; CHECK3-NEXT: movaps (%rsp), %xmm0
+; CHECK3-NEXT: addq $24, %rsp
+; CHECK3-NEXT: retq
+;
+; CHECK0-LABEL: atomic_vec2_ptr_align:
+; CHECK0: ## %bb.0:
+; CHECK0-NEXT: subq $24, %rsp
+; CHECK0-NEXT: movq %rdi, %rsi
+; CHECK0-NEXT: movl $16, %edi
+; CHECK0-NEXT: movq %rsp, %rdx
+; CHECK0-NEXT: movl $2, %ecx
+; CHECK0-NEXT: callq ___atomic_load
+; CHECK0-NEXT: movdqa (%rsp), %xmm0
+; CHECK0-NEXT: addq $24, %rsp
+; CHECK0-NEXT: retq
+ %ret = load atomic <2 x ptr>, ptr %x acquire, align 16
+ ret <2 x ptr> %ret
+}
+
define <4 x float> @atomic_vec4_float_align(ptr %x) nounwind {
-; CHECK-LABEL: atomic_vec4_float_align:
-; CHECK: ## %bb.0:
-; CHECK-NEXT: pushq %rax
-; CHECK-NEXT: movl $2, %esi
-; CHECK-NEXT: callq ___atomic_load_16
-; CHECK-NEXT: movq %rdx, %xmm1
-; CHECK-NEXT: movq %rax, %xmm0
-; CHECK-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
-; CHECK-NEXT: popq %rax
-; CHECK-NEXT: retq
+; CHECK3-LABEL: atomic_vec4_float_align:
+; CHECK3: ## %bb.0:
+; CHECK3-NEXT: subq $24, %rsp
+; CHECK3-NEXT: movq %rdi, %rsi
+; CHECK3-NEXT: movq %rsp, %rdx
+; CHECK3-NEXT: movl $16, %edi
+; CHECK3-NEXT: movl $2, %ecx
+; CHECK3-NEXT: callq ___atomic_load
+; CHECK3-NEXT: movaps (%rsp), %xmm0
+; CHECK3-NEXT: addq $24, %rsp
+; CHECK3-NEXT: retq
+;
+; CHECK0-LABEL: atomic_vec4_float_align:
+; CHECK0: ## %bb.0:
+; CHECK0-NEXT: subq $24, %rsp
+; CHECK0-NEXT: movq %rdi, %rsi
+; CHECK0-NEXT: movl $16, %edi
+; CHECK0-NEXT: movq %rsp, %rdx
+; CHECK0-NEXT: movl $2, %ecx
+; CHECK0-NEXT: callq ___atomic_load
+; CHECK0-NEXT: movaps (%rsp), %xmm0
+; CHECK0-NEXT: addq $24, %rsp
+; CHECK0-NEXT: retq
%ret = load atomic <4 x float>, ptr %x acquire, align 16
ret <4 x float> %ret
}
|
✅ With the latest revision this PR passed the C/C++ code formatter. |
761d4d9
to
6506acb
Compare
241b7c7
to
1a9eae9
Compare
6506acb
to
db674f8
Compare
bool UseSizedLibcall = canUseSizedAtomicCall(Size, Alignment, DL); | ||
const bool IsAtomic = | ||
isa<LoadInst>(I) ? cast<LoadInst>(I)->isAtomic() : false; | ||
const bool UseSizedLibcall = !(I->getType()->isVectorTy() && IsAtomic) && |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This shouldn't need to consider whether it's atomic or the type. The sized call should work fine. Some casts may be necessary
db674f8
to
13ea377
Compare
61ec1ef
to
dc2032f
Compare
13ea377
to
e11194d
Compare
45e7035
to
dd2c034
Compare
751b5eb
to
20bbd6e
Compare
0cb57c2
to
ba2a301
Compare
; | ||
%ret = load atomic <2 x ptr>, ptr %x acquire, align 16 | ||
ret <2 x ptr> %ret | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Check addrspace(270) vector. Also FP and int cases?
llvm/test/Transforms/AtomicExpand/X86/expand-atomic-non-integer.ll
Outdated
Show resolved
Hide resolved
ba2a301
to
f129a3c
Compare
3696420
to
9fde3b5
Compare
bec6d57
to
b36e103
Compare
9fde3b5
to
4205f3f
Compare
1ac2df7
to
064b5a9
Compare
30bd2fc
to
f4d372c
Compare
064b5a9
to
a14a025
Compare
I->getType()->isVectorTy() && !Result->getType()->isVectorTy()) { | ||
TypeSize Size = Result->getType()->getPrimitiveSizeInBits(); | ||
assert((unsigned)Size % 2 == 0); | ||
unsigned HalfSize = (unsigned)Size / 2; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
bitcast i128 to <2 x ptr> and then inttoptr <2 x i64> to <2 x ptr>?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Hi, yes. Getting around to doing so. Thank you.
a14a025
to
65ac272
Compare
f4d372c
to
48963d8
Compare
65ac272
to
db0706b
Compare
48963d8
to
62a7386
Compare
db0706b
to
d70bc21
Compare
45ccff2
to
b46ebe3
Compare
d70bc21
to
7cb68e7
Compare
b46ebe3
to
f208701
Compare
7cb68e7
to
dca36ef
Compare
f208701
to
ceafc2d
Compare
fbe99a2
to
1c10a4c
Compare
ceafc2d
to
d1a877b
Compare
AtomicExpand fails for aligned `load atomic <n x T>` because it does not find a compatible library call. This change adds appropriate bitcasts so that the call can be lowered. commit-id:f430c1af
d1a877b
to
8dc96d0
Compare
1c10a4c
to
d7d7c3b
Compare
AtomicExpand fails for aligned
load atomic <n x T>
because itdoes not find a compatible library call. This change adds appropriate
bitcasts so that the call can be lowered.
Stack: