Skip to content

Commit 8dc96d0

Browse files
committed
[AtomicExpand] Add bitcasts when expanding load atomic vector
AtomicExpand fails for aligned `load atomic <n x T>` because it does not find a compatible library call. This change adds appropriate bitcasts so that the call can be lowered. commit-id:f430c1af
1 parent d7d7c3b commit 8dc96d0

File tree

4 files changed

+163
-3
lines changed

4 files changed

+163
-3
lines changed

llvm/lib/CodeGen/AtomicExpandPass.cpp

+17-3
Original file line numberDiff line numberDiff line change
@@ -2062,9 +2062,23 @@ bool AtomicExpandImpl::expandAtomicOpToLibcall(
20622062
I->replaceAllUsesWith(V);
20632063
} else if (HasResult) {
20642064
Value *V;
2065-
if (UseSizedLibcall)
2066-
V = Builder.CreateBitOrPointerCast(Result, I->getType());
2067-
else {
2065+
if (UseSizedLibcall) {
2066+
// Add bitcasts from Result's scalar type to I's <n x ptr> vector type
2067+
if (I->getType()->getScalarType()->isPointerTy() &&
2068+
I->getType()->isVectorTy() && !Result->getType()->isVectorTy()) {
2069+
unsigned AS =
2070+
cast<PointerType>(I->getType()->getScalarType())->getAddressSpace();
2071+
ElementCount EC = cast<VectorType>(I->getType())->getElementCount();
2072+
Value *BC = Builder.CreateBitCast(
2073+
Result,
2074+
VectorType::get(IntegerType::get(Ctx, DL.getPointerSizeInBits(AS)),
2075+
EC));
2076+
Value *IntToPtr = Builder.CreateIntToPtr(
2077+
BC, VectorType::get(PointerType::get(Ctx, AS), EC));
2078+
V = Builder.CreateBitOrPointerCast(IntToPtr, I->getType());
2079+
} else
2080+
V = Builder.CreateBitOrPointerCast(Result, I->getType());
2081+
} else {
20682082
V = Builder.CreateAlignedLoad(I->getType(), AllocaResult,
20692083
AllocaAlignment);
20702084
Builder.CreateLifetimeEnd(AllocaResult, SizeVal64);

llvm/test/CodeGen/ARM/atomic-load-store.ll

+51
Original file line numberDiff line numberDiff line change
@@ -983,3 +983,54 @@ define void @store_atomic_f64__seq_cst(ptr %ptr, double %val1) {
983983
store atomic double %val1, ptr %ptr seq_cst, align 8
984984
ret void
985985
}
986+
987+
define <1 x ptr> @atomic_vec1_ptr(ptr %x) #0 {
988+
; ARM-LABEL: atomic_vec1_ptr:
989+
; ARM: @ %bb.0:
990+
; ARM-NEXT: ldr r0, [r0]
991+
; ARM-NEXT: dmb ish
992+
; ARM-NEXT: bx lr
993+
;
994+
; ARMOPTNONE-LABEL: atomic_vec1_ptr:
995+
; ARMOPTNONE: @ %bb.0:
996+
; ARMOPTNONE-NEXT: ldr r0, [r0]
997+
; ARMOPTNONE-NEXT: dmb ish
998+
; ARMOPTNONE-NEXT: bx lr
999+
;
1000+
; THUMBTWO-LABEL: atomic_vec1_ptr:
1001+
; THUMBTWO: @ %bb.0:
1002+
; THUMBTWO-NEXT: ldr r0, [r0]
1003+
; THUMBTWO-NEXT: dmb ish
1004+
; THUMBTWO-NEXT: bx lr
1005+
;
1006+
; THUMBONE-LABEL: atomic_vec1_ptr:
1007+
; THUMBONE: @ %bb.0:
1008+
; THUMBONE-NEXT: push {r7, lr}
1009+
; THUMBONE-NEXT: movs r1, #0
1010+
; THUMBONE-NEXT: mov r2, r1
1011+
; THUMBONE-NEXT: bl __sync_val_compare_and_swap_4
1012+
; THUMBONE-NEXT: pop {r7, pc}
1013+
;
1014+
; ARMV4-LABEL: atomic_vec1_ptr:
1015+
; ARMV4: @ %bb.0:
1016+
; ARMV4-NEXT: push {r11, lr}
1017+
; ARMV4-NEXT: mov r1, #2
1018+
; ARMV4-NEXT: bl __atomic_load_4
1019+
; ARMV4-NEXT: pop {r11, lr}
1020+
; ARMV4-NEXT: mov pc, lr
1021+
;
1022+
; ARMV6-LABEL: atomic_vec1_ptr:
1023+
; ARMV6: @ %bb.0:
1024+
; ARMV6-NEXT: mov r1, #0
1025+
; ARMV6-NEXT: mcr p15, #0, r1, c7, c10, #5
1026+
; ARMV6-NEXT: ldr r0, [r0]
1027+
; ARMV6-NEXT: bx lr
1028+
;
1029+
; THUMBM-LABEL: atomic_vec1_ptr:
1030+
; THUMBM: @ %bb.0:
1031+
; THUMBM-NEXT: ldr r0, [r0]
1032+
; THUMBM-NEXT: dmb sy
1033+
; THUMBM-NEXT: bx lr
1034+
%ret = load atomic <1 x ptr>, ptr %x acquire, align 4
1035+
ret <1 x ptr> %ret
1036+
}

llvm/test/CodeGen/X86/atomic-load-store.ll

+30
Original file line numberDiff line numberDiff line change
@@ -371,6 +371,21 @@ define <2 x i32> @atomic_vec2_i32(ptr %x) nounwind {
371371
ret <2 x i32> %ret
372372
}
373373

374+
define <2 x ptr> @atomic_vec2_ptr_align(ptr %x) nounwind {
375+
; CHECK-LABEL: atomic_vec2_ptr_align:
376+
; CHECK: ## %bb.0:
377+
; CHECK-NEXT: pushq %rax
378+
; CHECK-NEXT: movl $2, %esi
379+
; CHECK-NEXT: callq ___atomic_load_16
380+
; CHECK-NEXT: movq %rdx, %xmm1
381+
; CHECK-NEXT: movq %rax, %xmm0
382+
; CHECK-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
383+
; CHECK-NEXT: popq %rax
384+
; CHECK-NEXT: retq
385+
%ret = load atomic <2 x ptr>, ptr %x acquire, align 16
386+
ret <2 x ptr> %ret
387+
}
388+
374389
define <4 x i8> @atomic_vec4_i8(ptr %x) nounwind {
375390
; CHECK3-LABEL: atomic_vec4_i8:
376391
; CHECK3: ## %bb.0:
@@ -394,6 +409,21 @@ define <4 x i16> @atomic_vec4_i16(ptr %x) nounwind {
394409
ret <4 x i16> %ret
395410
}
396411

412+
define <4 x ptr addrspace(270)> @atomic_vec4_ptr270(ptr %x) nounwind {
413+
; CHECK-LABEL: atomic_vec4_ptr270:
414+
; CHECK: ## %bb.0:
415+
; CHECK-NEXT: pushq %rax
416+
; CHECK-NEXT: movl $2, %esi
417+
; CHECK-NEXT: callq ___atomic_load_16
418+
; CHECK-NEXT: movq %rdx, %xmm1
419+
; CHECK-NEXT: movq %rax, %xmm0
420+
; CHECK-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
421+
; CHECK-NEXT: popq %rax
422+
; CHECK-NEXT: retq
423+
%ret = load atomic <4 x ptr addrspace(270)>, ptr %x acquire, align 16
424+
ret <4 x ptr addrspace(270)> %ret
425+
}
426+
397427
define <4 x half> @atomic_vec4_half(ptr %x) nounwind {
398428
; CHECK-LABEL: atomic_vec4_half:
399429
; CHECK: ## %bb.0:

llvm/test/Transforms/AtomicExpand/X86/expand-atomic-non-integer.ll

+65
Original file line numberDiff line numberDiff line change
@@ -151,3 +151,68 @@ define void @pointer_cmpxchg_expand6(ptr addrspace(1) %ptr,
151151
ret void
152152
}
153153

154+
define <2 x ptr> @atomic_vec2_ptr_align(ptr %x) nounwind {
155+
; CHECK-LABEL: define <2 x ptr> @atomic_vec2_ptr_align(
156+
; CHECK-SAME: ptr [[X:%.*]]) #[[ATTR0:[0-9]+]] {
157+
; CHECK-NEXT: [[TMP1:%.*]] = call i128 @__atomic_load_16(ptr [[X]], i32 2)
158+
; CHECK-NEXT: [[TMP6:%.*]] = bitcast i128 [[TMP1]] to <2 x i64>
159+
; CHECK-NEXT: [[TMP7:%.*]] = inttoptr <2 x i64> [[TMP6]] to <2 x ptr>
160+
; CHECK-NEXT: ret <2 x ptr> [[TMP7]]
161+
;
162+
%ret = load atomic <2 x ptr>, ptr %x acquire, align 16
163+
ret <2 x ptr> %ret
164+
}
165+
166+
define <4 x ptr addrspace(270)> @atomic_vec4_ptr_align(ptr %x) nounwind {
167+
; CHECK-LABEL: define <4 x ptr addrspace(270)> @atomic_vec4_ptr_align(
168+
; CHECK-SAME: ptr [[X:%.*]]) #[[ATTR0]] {
169+
; CHECK-NEXT: [[TMP1:%.*]] = call i128 @__atomic_load_16(ptr [[X]], i32 2)
170+
; CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[TMP1]] to <4 x i32>
171+
; CHECK-NEXT: [[TMP3:%.*]] = inttoptr <4 x i32> [[TMP2]] to <4 x ptr addrspace(270)>
172+
; CHECK-NEXT: ret <4 x ptr addrspace(270)> [[TMP3]]
173+
;
174+
%ret = load atomic <4 x ptr addrspace(270)>, ptr %x acquire, align 16
175+
ret <4 x ptr addrspace(270)> %ret
176+
}
177+
178+
define <2 x i16> @atomic_vec2_i16(ptr %x) nounwind {
179+
; CHECK-LABEL: define <2 x i16> @atomic_vec2_i16(
180+
; CHECK-SAME: ptr [[X:%.*]]) #[[ATTR0]] {
181+
; CHECK-NEXT: [[RET:%.*]] = load atomic <2 x i16>, ptr [[X]] acquire, align 8
182+
; CHECK-NEXT: ret <2 x i16> [[RET]]
183+
;
184+
%ret = load atomic <2 x i16>, ptr %x acquire, align 8
185+
ret <2 x i16> %ret
186+
}
187+
188+
define <2 x half> @atomic_vec2_half(ptr %x) nounwind {
189+
; CHECK-LABEL: define <2 x half> @atomic_vec2_half(
190+
; CHECK-SAME: ptr [[X:%.*]]) #[[ATTR0]] {
191+
; CHECK-NEXT: [[RET:%.*]] = load atomic <2 x half>, ptr [[X]] acquire, align 8
192+
; CHECK-NEXT: ret <2 x half> [[RET]]
193+
;
194+
%ret = load atomic <2 x half>, ptr %x acquire, align 8
195+
ret <2 x half> %ret
196+
}
197+
198+
define <4 x i32> @atomic_vec4_i32(ptr %x) nounwind {
199+
; CHECK-LABEL: define <4 x i32> @atomic_vec4_i32(
200+
; CHECK-SAME: ptr [[X:%.*]]) #[[ATTR0]] {
201+
; CHECK-NEXT: [[TMP1:%.*]] = call i128 @__atomic_load_16(ptr [[X]], i32 2)
202+
; CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[TMP1]] to <4 x i32>
203+
; CHECK-NEXT: ret <4 x i32> [[TMP2]]
204+
;
205+
%ret = load atomic <4 x i32>, ptr %x acquire, align 16
206+
ret <4 x i32> %ret
207+
}
208+
209+
define <4 x float> @atomic_vec4_float(ptr %x) nounwind {
210+
; CHECK-LABEL: define <4 x float> @atomic_vec4_float(
211+
; CHECK-SAME: ptr [[X:%.*]]) #[[ATTR0]] {
212+
; CHECK-NEXT: [[TMP1:%.*]] = call i128 @__atomic_load_16(ptr [[X]], i32 2)
213+
; CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[TMP1]] to <4 x float>
214+
; CHECK-NEXT: ret <4 x float> [[TMP2]]
215+
;
216+
%ret = load atomic <4 x float>, ptr %x acquire, align 16
217+
ret <4 x float> %ret
218+
}

0 commit comments

Comments
 (0)