Skip to content
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.

Commit 1a9eae9

Browse files
committedDec 20, 2024·
[AtomicExpand] Avoid sized call when expanding load atomic vector
AtomicExpand fails for aligned `load atomic <n x T>` because it does not find a compatible library call. This change marks load atomics to not use sized calls and instead resort to using `___atomic_load`. commit-id:f430c1af
1 parent db674f8 commit 1a9eae9

File tree

3 files changed

+112
-11
lines changed

3 files changed

+112
-11
lines changed
 

‎llvm/lib/CodeGen/AtomicExpandPass.cpp

+4-1
Original file line numberDiff line numberDiff line change
@@ -1884,7 +1884,10 @@ bool AtomicExpandImpl::expandAtomicOpToLibcall(
18841884
IRBuilder<> Builder(I);
18851885
IRBuilder<> AllocaBuilder(&I->getFunction()->getEntryBlock().front());
18861886

1887-
bool UseSizedLibcall = canUseSizedAtomicCall(Size, Alignment, DL);
1887+
const bool IsAtomic =
1888+
isa<LoadInst>(I) ? cast<LoadInst>(I)->isAtomic() : false;
1889+
const bool UseSizedLibcall = !(I->getType()->isVectorTy() && IsAtomic) &&
1890+
canUseSizedAtomicCall(Size, Alignment, DL);
18881891
Type *SizedIntTy = Type::getIntNTy(Ctx, Size * 8);
18891892

18901893
const Align AllocaAlignment = DL.getPrefTypeAlign(SizedIntTy);

‎llvm/test/CodeGen/ARM/atomic-load-store.ll

+57
Original file line numberDiff line numberDiff line change
@@ -983,3 +983,60 @@ define void @store_atomic_f64__seq_cst(ptr %ptr, double %val1) {
983983
store atomic double %val1, ptr %ptr seq_cst, align 8
984984
ret void
985985
}
986+
987+
define <1 x ptr> @atomic_vec1_ptr(ptr %x) #0 {
988+
; ARM-LABEL: atomic_vec1_ptr:
989+
; ARM: @ %bb.0:
990+
; ARM-NEXT: ldr r0, [r0]
991+
; ARM-NEXT: dmb ish
992+
; ARM-NEXT: bx lr
993+
;
994+
; ARMOPTNONE-LABEL: atomic_vec1_ptr:
995+
; ARMOPTNONE: @ %bb.0:
996+
; ARMOPTNONE-NEXT: ldr r0, [r0]
997+
; ARMOPTNONE-NEXT: dmb ish
998+
; ARMOPTNONE-NEXT: bx lr
999+
;
1000+
; THUMBTWO-LABEL: atomic_vec1_ptr:
1001+
; THUMBTWO: @ %bb.0:
1002+
; THUMBTWO-NEXT: ldr r0, [r0]
1003+
; THUMBTWO-NEXT: dmb ish
1004+
; THUMBTWO-NEXT: bx lr
1005+
;
1006+
; THUMBONE-LABEL: atomic_vec1_ptr:
1007+
; THUMBONE: @ %bb.0:
1008+
; THUMBONE-NEXT: push {r7, lr}
1009+
; THUMBONE-NEXT: movs r1, #0
1010+
; THUMBONE-NEXT: mov r2, r1
1011+
; THUMBONE-NEXT: bl __sync_val_compare_and_swap_4
1012+
; THUMBONE-NEXT: pop {r7, pc}
1013+
;
1014+
; ARMV4-LABEL: atomic_vec1_ptr:
1015+
; ARMV4: @ %bb.0:
1016+
; ARMV4-NEXT: push {r11, lr}
1017+
; ARMV4-NEXT: sub sp, sp, #8
1018+
; ARMV4-NEXT: add r2, sp, #4
1019+
; ARMV4-NEXT: mov r1, r0
1020+
; ARMV4-NEXT: mov r0, #4
1021+
; ARMV4-NEXT: mov r3, #2
1022+
; ARMV4-NEXT: bl __atomic_load
1023+
; ARMV4-NEXT: ldr r0, [sp, #4]
1024+
; ARMV4-NEXT: add sp, sp, #8
1025+
; ARMV4-NEXT: pop {r11, lr}
1026+
; ARMV4-NEXT: mov pc, lr
1027+
;
1028+
; ARMV6-LABEL: atomic_vec1_ptr:
1029+
; ARMV6: @ %bb.0:
1030+
; ARMV6-NEXT: ldr r0, [r0]
1031+
; ARMV6-NEXT: mov r1, #0
1032+
; ARMV6-NEXT: mcr p15, #0, r1, c7, c10, #5
1033+
; ARMV6-NEXT: bx lr
1034+
;
1035+
; THUMBM-LABEL: atomic_vec1_ptr:
1036+
; THUMBM: @ %bb.0:
1037+
; THUMBM-NEXT: ldr r0, [r0]
1038+
; THUMBM-NEXT: dmb sy
1039+
; THUMBM-NEXT: bx lr
1040+
%ret = load atomic <1 x ptr>, ptr %x acquire, align 4
1041+
ret <1 x ptr> %ret
1042+
}

‎llvm/test/CodeGen/X86/atomic-load-store.ll

+51-10
Original file line numberDiff line numberDiff line change
@@ -399,17 +399,58 @@ define <2 x i32> @atomic_vec2_i32(ptr %x) nounwind {
399399
ret <2 x i32> %ret
400400
}
401401

402+
define <2 x ptr> @atomic_vec2_ptr_align(ptr %x) nounwind {
403+
; CHECK3-LABEL: atomic_vec2_ptr_align:
404+
; CHECK3: ## %bb.0:
405+
; CHECK3-NEXT: subq $24, %rsp
406+
; CHECK3-NEXT: movq %rdi, %rsi
407+
; CHECK3-NEXT: movq %rsp, %rdx
408+
; CHECK3-NEXT: movl $16, %edi
409+
; CHECK3-NEXT: movl $2, %ecx
410+
; CHECK3-NEXT: callq ___atomic_load
411+
; CHECK3-NEXT: movaps (%rsp), %xmm0
412+
; CHECK3-NEXT: addq $24, %rsp
413+
; CHECK3-NEXT: retq
414+
;
415+
; CHECK0-LABEL: atomic_vec2_ptr_align:
416+
; CHECK0: ## %bb.0:
417+
; CHECK0-NEXT: subq $24, %rsp
418+
; CHECK0-NEXT: movq %rdi, %rsi
419+
; CHECK0-NEXT: movl $16, %edi
420+
; CHECK0-NEXT: movq %rsp, %rdx
421+
; CHECK0-NEXT: movl $2, %ecx
422+
; CHECK0-NEXT: callq ___atomic_load
423+
; CHECK0-NEXT: movdqa (%rsp), %xmm0
424+
; CHECK0-NEXT: addq $24, %rsp
425+
; CHECK0-NEXT: retq
426+
%ret = load atomic <2 x ptr>, ptr %x acquire, align 16
427+
ret <2 x ptr> %ret
428+
}
429+
402430
define <4 x float> @atomic_vec4_float_align(ptr %x) nounwind {
403-
; CHECK-LABEL: atomic_vec4_float_align:
404-
; CHECK: ## %bb.0:
405-
; CHECK-NEXT: pushq %rax
406-
; CHECK-NEXT: movl $2, %esi
407-
; CHECK-NEXT: callq ___atomic_load_16
408-
; CHECK-NEXT: movq %rdx, %xmm1
409-
; CHECK-NEXT: movq %rax, %xmm0
410-
; CHECK-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
411-
; CHECK-NEXT: popq %rax
412-
; CHECK-NEXT: retq
431+
; CHECK3-LABEL: atomic_vec4_float_align:
432+
; CHECK3: ## %bb.0:
433+
; CHECK3-NEXT: subq $24, %rsp
434+
; CHECK3-NEXT: movq %rdi, %rsi
435+
; CHECK3-NEXT: movq %rsp, %rdx
436+
; CHECK3-NEXT: movl $16, %edi
437+
; CHECK3-NEXT: movl $2, %ecx
438+
; CHECK3-NEXT: callq ___atomic_load
439+
; CHECK3-NEXT: movaps (%rsp), %xmm0
440+
; CHECK3-NEXT: addq $24, %rsp
441+
; CHECK3-NEXT: retq
442+
;
443+
; CHECK0-LABEL: atomic_vec4_float_align:
444+
; CHECK0: ## %bb.0:
445+
; CHECK0-NEXT: subq $24, %rsp
446+
; CHECK0-NEXT: movq %rdi, %rsi
447+
; CHECK0-NEXT: movl $16, %edi
448+
; CHECK0-NEXT: movq %rsp, %rdx
449+
; CHECK0-NEXT: movl $2, %ecx
450+
; CHECK0-NEXT: callq ___atomic_load
451+
; CHECK0-NEXT: movaps (%rsp), %xmm0
452+
; CHECK0-NEXT: addq $24, %rsp
453+
; CHECK0-NEXT: retq
413454
%ret = load atomic <4 x float>, ptr %x acquire, align 16
414455
ret <4 x float> %ret
415456
}

0 commit comments

Comments
 (0)
Please sign in to comment.