Skip to content

Commit 45e7035

Browse files
committed
[AtomicExpand] Add bitcasts when expanding load atomic vector
AtomicExpand fails for aligned `load atomic <n x T>` because it does not find a compatible library call. This change adds appropriate bitcasts so that the call can be lowered. commit-id:f430c1af
1 parent e11194d commit 45e7035

File tree

3 files changed

+89
-3
lines changed

3 files changed

+89
-3
lines changed

llvm/lib/CodeGen/AtomicExpandPass.cpp

+22-3
Original file line numberDiff line numberDiff line change
@@ -2060,9 +2060,28 @@ bool AtomicExpandImpl::expandAtomicOpToLibcall(
20602060
I->replaceAllUsesWith(V);
20612061
} else if (HasResult) {
20622062
Value *V;
2063-
if (UseSizedLibcall)
2064-
V = Builder.CreateBitOrPointerCast(Result, I->getType());
2065-
else {
2063+
if (UseSizedLibcall) {
2064+
// Add bitcasts from Result's T scalar type to I's <2 x T/2> vector type
2065+
if (I->getType()->getScalarType()->isIntOrPtrTy() &&
2066+
I->getType()->isVectorTy() && !Result->getType()->isVectorTy()) {
2067+
TypeSize Size = Result->getType()->getPrimitiveSizeInBits();
2068+
assert((unsigned)Size % 2 == 0);
2069+
unsigned HalfSize = (unsigned)Size / 2;
2070+
Value *Lo =
2071+
Builder.CreateTrunc(Result, IntegerType::get(Ctx, HalfSize));
2072+
Value *RS = Builder.CreateLShr(
2073+
Result, ConstantInt::get(IntegerType::get(Ctx, Size), HalfSize));
2074+
Value *Hi = Builder.CreateTrunc(RS, IntegerType::get(Ctx, HalfSize));
2075+
Value *Vec = Builder.CreateInsertElement(
2076+
VectorType::get(IntegerType::get(Ctx, HalfSize),
2077+
cast<VectorType>(I->getType())->getElementCount()),
2078+
Lo, ConstantInt::get(IntegerType::get(Ctx, 32), 0));
2079+
Vec = Builder.CreateInsertElement(
2080+
Vec, Hi, ConstantInt::get(IntegerType::get(Ctx, 32), 1));
2081+
V = Builder.CreateBitOrPointerCast(Vec, I->getType());
2082+
} else
2083+
V = Builder.CreateBitOrPointerCast(Result, I->getType());
2084+
} else {
20662085
V = Builder.CreateAlignedLoad(I->getType(), AllocaResult,
20672086
AllocaAlignment);
20682087
Builder.CreateLifetimeEnd(AllocaResult, SizeVal64);

llvm/test/CodeGen/ARM/atomic-load-store.ll

+52
Original file line numberDiff line numberDiff line change
@@ -983,3 +983,55 @@ define void @store_atomic_f64__seq_cst(ptr %ptr, double %val1) {
983983
store atomic double %val1, ptr %ptr seq_cst, align 8
984984
ret void
985985
}
986+
987+
define <1 x ptr> @atomic_vec1_ptr(ptr %x) #0 {
988+
; ARM-LABEL: atomic_vec1_ptr:
989+
; ARM: @ %bb.0:
990+
; ARM-NEXT: ldr r0, [r0]
991+
; ARM-NEXT: dmb ish
992+
; ARM-NEXT: bx lr
993+
;
994+
; ARMOPTNONE-LABEL: atomic_vec1_ptr:
995+
; ARMOPTNONE: @ %bb.0:
996+
; ARMOPTNONE-NEXT: ldr r0, [r0]
997+
; ARMOPTNONE-NEXT: dmb ish
998+
; ARMOPTNONE-NEXT: bx lr
999+
;
1000+
; THUMBTWO-LABEL: atomic_vec1_ptr:
1001+
; THUMBTWO: @ %bb.0:
1002+
; THUMBTWO-NEXT: ldr r0, [r0]
1003+
; THUMBTWO-NEXT: dmb ish
1004+
; THUMBTWO-NEXT: bx lr
1005+
;
1006+
; THUMBONE-LABEL: atomic_vec1_ptr:
1007+
; THUMBONE: @ %bb.0:
1008+
; THUMBONE-NEXT: push {r7, lr}
1009+
; THUMBONE-NEXT: movs r1, #0
1010+
; THUMBONE-NEXT: mov r2, r1
1011+
; THUMBONE-NEXT: bl __sync_val_compare_and_swap_4
1012+
; THUMBONE-NEXT: pop {r7, pc}
1013+
;
1014+
; ARMV4-LABEL: atomic_vec1_ptr:
1015+
; ARMV4: @ %bb.0:
1016+
; ARMV4-NEXT: push {r11, lr}
1017+
; ARMV4-NEXT: mov r1, #2
1018+
; ARMV4-NEXT: bl __atomic_load_4
1019+
; ARMV4-NEXT: mov r0, #0
1020+
; ARMV4-NEXT: pop {r11, lr}
1021+
; ARMV4-NEXT: mov pc, lr
1022+
;
1023+
; ARMV6-LABEL: atomic_vec1_ptr:
1024+
; ARMV6: @ %bb.0:
1025+
; ARMV6-NEXT: ldr r0, [r0]
1026+
; ARMV6-NEXT: mov r1, #0
1027+
; ARMV6-NEXT: mcr p15, #0, r1, c7, c10, #5
1028+
; ARMV6-NEXT: bx lr
1029+
;
1030+
; THUMBM-LABEL: atomic_vec1_ptr:
1031+
; THUMBM: @ %bb.0:
1032+
; THUMBM-NEXT: ldr r0, [r0]
1033+
; THUMBM-NEXT: dmb sy
1034+
; THUMBM-NEXT: bx lr
1035+
%ret = load atomic <1 x ptr>, ptr %x acquire, align 4
1036+
ret <1 x ptr> %ret
1037+
}

llvm/test/CodeGen/X86/atomic-load-store.ll

+15
Original file line numberDiff line numberDiff line change
@@ -354,6 +354,21 @@ define <2 x i32> @atomic_vec2_i32(ptr %x) nounwind {
354354
ret <2 x i32> %ret
355355
}
356356

357+
define <2 x ptr> @atomic_vec2_ptr_align(ptr %x) nounwind {
358+
; CHECK-LABEL: atomic_vec2_ptr_align:
359+
; CHECK: ## %bb.0:
360+
; CHECK-NEXT: pushq %rax
361+
; CHECK-NEXT: movl $2, %esi
362+
; CHECK-NEXT: callq ___atomic_load_16
363+
; CHECK-NEXT: movq %rdx, %xmm1
364+
; CHECK-NEXT: movq %rax, %xmm0
365+
; CHECK-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
366+
; CHECK-NEXT: popq %rax
367+
; CHECK-NEXT: retq
368+
%ret = load atomic <2 x ptr>, ptr %x acquire, align 16
369+
ret <2 x ptr> %ret
370+
}
371+
357372
define <4 x float> @atomic_vec4_float_align(ptr %x) nounwind {
358373
; CHECK-LABEL: atomic_vec4_float_align:
359374
; CHECK: ## %bb.0:

0 commit comments

Comments
 (0)