Skip to content

Commit 20bbd6e

Browse files
committed
[AtomicExpand] Add bitcasts when expanding load atomic vector
AtomicExpand fails for aligned `load atomic <n x T>` because it does not find a compatible library call. This change adds appropriate bitcasts so that the call can be lowered. commit-id:f430c1af
1 parent ba2a301 commit 20bbd6e

File tree

4 files changed

+103
-3
lines changed

4 files changed

+103
-3
lines changed

llvm/lib/CodeGen/AtomicExpandPass.cpp

+22-3
Original file line numberDiff line numberDiff line change
@@ -2060,9 +2060,28 @@ bool AtomicExpandImpl::expandAtomicOpToLibcall(
20602060
I->replaceAllUsesWith(V);
20612061
} else if (HasResult) {
20622062
Value *V;
2063-
if (UseSizedLibcall)
2064-
V = Builder.CreateBitOrPointerCast(Result, I->getType());
2065-
else {
2063+
if (UseSizedLibcall) {
2064+
// Add bitcasts from Result's T scalar type to I's <2 x T/2> vector type
2065+
if (I->getType()->getScalarType()->isIntOrPtrTy() &&
2066+
I->getType()->isVectorTy() && !Result->getType()->isVectorTy()) {
2067+
TypeSize Size = Result->getType()->getPrimitiveSizeInBits();
2068+
assert((unsigned)Size % 2 == 0);
2069+
unsigned HalfSize = (unsigned)Size / 2;
2070+
Value *Lo =
2071+
Builder.CreateTrunc(Result, IntegerType::get(Ctx, HalfSize));
2072+
Value *RS = Builder.CreateLShr(
2073+
Result, ConstantInt::get(IntegerType::get(Ctx, Size), HalfSize));
2074+
Value *Hi = Builder.CreateTrunc(RS, IntegerType::get(Ctx, HalfSize));
2075+
Value *Vec = Builder.CreateInsertElement(
2076+
VectorType::get(IntegerType::get(Ctx, HalfSize),
2077+
cast<VectorType>(I->getType())->getElementCount()),
2078+
Lo, ConstantInt::get(IntegerType::get(Ctx, 32), 0));
2079+
Vec = Builder.CreateInsertElement(
2080+
Vec, Hi, ConstantInt::get(IntegerType::get(Ctx, 32), 1));
2081+
V = Builder.CreateBitOrPointerCast(Vec, I->getType());
2082+
} else
2083+
V = Builder.CreateBitOrPointerCast(Result, I->getType());
2084+
} else {
20662085
V = Builder.CreateAlignedLoad(I->getType(), AllocaResult,
20672086
AllocaAlignment);
20682087
Builder.CreateLifetimeEnd(AllocaResult, SizeVal64);

llvm/test/CodeGen/ARM/atomic-load-store.ll

+52
Original file line numberDiff line numberDiff line change
@@ -983,3 +983,55 @@ define void @store_atomic_f64__seq_cst(ptr %ptr, double %val1) {
983983
store atomic double %val1, ptr %ptr seq_cst, align 8
984984
ret void
985985
}
986+
987+
define <1 x ptr> @atomic_vec1_ptr(ptr %x) #0 {
988+
; ARM-LABEL: atomic_vec1_ptr:
989+
; ARM: @ %bb.0:
990+
; ARM-NEXT: ldr r0, [r0]
991+
; ARM-NEXT: dmb ish
992+
; ARM-NEXT: bx lr
993+
;
994+
; ARMOPTNONE-LABEL: atomic_vec1_ptr:
995+
; ARMOPTNONE: @ %bb.0:
996+
; ARMOPTNONE-NEXT: ldr r0, [r0]
997+
; ARMOPTNONE-NEXT: dmb ish
998+
; ARMOPTNONE-NEXT: bx lr
999+
;
1000+
; THUMBTWO-LABEL: atomic_vec1_ptr:
1001+
; THUMBTWO: @ %bb.0:
1002+
; THUMBTWO-NEXT: ldr r0, [r0]
1003+
; THUMBTWO-NEXT: dmb ish
1004+
; THUMBTWO-NEXT: bx lr
1005+
;
1006+
; THUMBONE-LABEL: atomic_vec1_ptr:
1007+
; THUMBONE: @ %bb.0:
1008+
; THUMBONE-NEXT: push {r7, lr}
1009+
; THUMBONE-NEXT: movs r1, #0
1010+
; THUMBONE-NEXT: mov r2, r1
1011+
; THUMBONE-NEXT: bl __sync_val_compare_and_swap_4
1012+
; THUMBONE-NEXT: pop {r7, pc}
1013+
;
1014+
; ARMV4-LABEL: atomic_vec1_ptr:
1015+
; ARMV4: @ %bb.0:
1016+
; ARMV4-NEXT: push {r11, lr}
1017+
; ARMV4-NEXT: mov r1, #2
1018+
; ARMV4-NEXT: bl __atomic_load_4
1019+
; ARMV4-NEXT: mov r0, #0
1020+
; ARMV4-NEXT: pop {r11, lr}
1021+
; ARMV4-NEXT: mov pc, lr
1022+
;
1023+
; ARMV6-LABEL: atomic_vec1_ptr:
1024+
; ARMV6: @ %bb.0:
1025+
; ARMV6-NEXT: mov r1, #0
1026+
; ARMV6-NEXT: mcr p15, #0, r1, c7, c10, #5
1027+
; ARMV6-NEXT: ldr r0, [r0]
1028+
; ARMV6-NEXT: bx lr
1029+
;
1030+
; THUMBM-LABEL: atomic_vec1_ptr:
1031+
; THUMBM: @ %bb.0:
1032+
; THUMBM-NEXT: ldr r0, [r0]
1033+
; THUMBM-NEXT: dmb sy
1034+
; THUMBM-NEXT: bx lr
1035+
%ret = load atomic <1 x ptr>, ptr %x acquire, align 4
1036+
ret <1 x ptr> %ret
1037+
}

llvm/test/CodeGen/X86/atomic-load-store.ll

+15
Original file line numberDiff line numberDiff line change
@@ -362,6 +362,21 @@ define <2 x i32> @atomic_vec2_i32(ptr %x) nounwind {
362362
ret <2 x i32> %ret
363363
}
364364

365+
define <2 x ptr> @atomic_vec2_ptr_align(ptr %x) nounwind {
366+
; CHECK-LABEL: atomic_vec2_ptr_align:
367+
; CHECK: ## %bb.0:
368+
; CHECK-NEXT: pushq %rax
369+
; CHECK-NEXT: movl $2, %esi
370+
; CHECK-NEXT: callq ___atomic_load_16
371+
; CHECK-NEXT: movq %rdx, %xmm1
372+
; CHECK-NEXT: movq %rax, %xmm0
373+
; CHECK-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
374+
; CHECK-NEXT: popq %rax
375+
; CHECK-NEXT: retq
376+
%ret = load atomic <2 x ptr>, ptr %x acquire, align 16
377+
ret <2 x ptr> %ret
378+
}
379+
365380
define <4 x i8> @atomic_vec4_i8(ptr %x) nounwind {
366381
; CHECK3-LABEL: atomic_vec4_i8:
367382
; CHECK3: ## %bb.0:

llvm/test/Transforms/AtomicExpand/X86/expand-atomic-non-integer.ll

+14
Original file line numberDiff line numberDiff line change
@@ -151,3 +151,17 @@ define void @pointer_cmpxchg_expand6(ptr addrspace(1) %ptr,
151151
ret void
152152
}
153153

154+
define <2 x ptr> @atomic_vec2_ptr_align(ptr %x) nounwind {
155+
; CHECK-LABEL: @atomic_vec2_ptr_align(
156+
; CHECK-NEXT: [[TMP1:%.*]] = call i128 @__atomic_load_16(ptr [[X:%.*]], i32 2)
157+
; CHECK-NEXT: [[TMP2:%.*]] = trunc i128 [[TMP1]] to i64
158+
; CHECK-NEXT: [[TMP3:%.*]] = lshr i128 [[TMP1]], 64
159+
; CHECK-NEXT: [[TMP4:%.*]] = trunc i128 [[TMP3]] to i64
160+
; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x i64> poison, i64 [[TMP2]], i32 0
161+
; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x i64> [[TMP5]], i64 [[TMP4]], i32 1
162+
; CHECK-NEXT: [[TMP7:%.*]] = inttoptr <2 x i64> [[TMP6]] to <2 x ptr>
163+
; CHECK-NEXT: ret <2 x ptr> [[TMP7]]
164+
;
165+
%ret = load atomic <2 x ptr>, ptr %x acquire, align 16
166+
ret <2 x ptr> %ret
167+
}

0 commit comments

Comments
 (0)