Skip to content

Commit f2b9675

Browse files
committed
[X86] Lowering of load atomic float via cast
X86 backend does not lower load atomic float, so it can be casted to an integer before lowering.
1 parent af5cac7 commit f2b9675

File tree

4 files changed

+86
-0
lines changed

4 files changed

+86
-0
lines changed

Diff for: llvm/lib/Target/X86/X86ISelLowering.cpp

+8
Original file line numberDiff line numberDiff line change
@@ -31158,6 +31158,14 @@ bool X86TargetLowering::needsCmpXchgNb(Type *MemType) const {
3115831158
return false;
3115931159
}
3116031160

31161+
TargetLoweringBase::AtomicExpansionKind
31162+
X86TargetLowering::shouldCastAtomicLoadInIR(LoadInst *LI) const {
31163+
if (const auto VT = dyn_cast<VectorType>(LI->getType()))
31164+
if (VT->getElementType()->isFloatingPointTy())
31165+
return AtomicExpansionKind::CastToInteger;
31166+
return TargetLowering::shouldCastAtomicLoadInIR(LI);
31167+
}
31168+
3116131169
TargetLoweringBase::AtomicExpansionKind
3116231170
X86TargetLowering::shouldExpandAtomicStoreInIR(StoreInst *SI) const {
3116331171
Type *MemType = SI->getValueOperand()->getType();

Diff for: llvm/lib/Target/X86/X86ISelLowering.h

+2
Original file line numberDiff line numberDiff line change
@@ -1808,6 +1808,8 @@ namespace llvm {
18081808
const MCPhysReg *getScratchRegisters(CallingConv::ID CC) const override;
18091809
ArrayRef<MCPhysReg> getRoundingControlRegisters() const override;
18101810

1811+
TargetLoweringBase::AtomicExpansionKind
1812+
shouldCastAtomicLoadInIR(LoadInst *LI) const override;
18111813
TargetLoweringBase::AtomicExpansionKind
18121814
shouldExpandAtomicLoadInIR(LoadInst *LI) const override;
18131815
TargetLoweringBase::AtomicExpansionKind

Diff for: llvm/test/CodeGen/X86/atomicvec-float.ll

+38
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+
; RUN: llc < %s --mtriple=x86_64 | FileCheck %s
3+
4+
define float @load_atomic_float(ptr %src) {
5+
; CHECK-LABEL: load_atomic_float:
6+
; CHECK: # %bb.0:
7+
; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
8+
; CHECK-NEXT: retq
9+
%ret = load atomic float, ptr %src acquire, align 4
10+
ret float %ret
11+
}
12+
13+
define <1 x float> @load_atomic_vector_float1(ptr %src) {
14+
; CHECK-LABEL: load_atomic_vector_float1:
15+
; CHECK: # %bb.0:
16+
; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
17+
; CHECK-NEXT: retq
18+
%ret = load atomic <1 x float>, ptr %src acquire, align 4
19+
ret <1 x float> %ret
20+
}
21+
22+
define <2 x float> @load_atomic_vector_float2(ptr %src) {
23+
; CHECK-LABEL: load_atomic_vector_float2:
24+
; CHECK: # %bb.0:
25+
; CHECK-NEXT: pushq %rax
26+
; CHECK-NEXT: .cfi_def_cfa_offset 16
27+
; CHECK-NEXT: movq %rdi, %rsi
28+
; CHECK-NEXT: movq %rsp, %rdx
29+
; CHECK-NEXT: movl $8, %edi
30+
; CHECK-NEXT: movl $2, %ecx
31+
; CHECK-NEXT: callq __atomic_load@PLT
32+
; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
33+
; CHECK-NEXT: popq %rax
34+
; CHECK-NEXT: .cfi_def_cfa_offset 8
35+
; CHECK-NEXT: retq
36+
%ret = load atomic <2 x float>, ptr %src acquire, align 4
37+
ret <2 x float> %ret
38+
}

Diff for: llvm/test/Transforms/AtomicExpand/atomicvec-float.ll

+38
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
2+
; RUN: opt < %s --mtriple=x86_64 --passes=atomic-expand -S -o - | FileCheck %s
3+
4+
define float @load_atomic_float(ptr %src) {
5+
; CHECK-LABEL: define float @load_atomic_float(
6+
; CHECK-SAME: ptr [[SRC:%.*]]) {
7+
; CHECK-NEXT: [[TMP1:%.*]] = load atomic i32, ptr [[SRC]] acquire, align 4
8+
; CHECK-NEXT: [[TMP2:%.*]] = bitcast i32 [[TMP1]] to float
9+
; CHECK-NEXT: ret float [[TMP2]]
10+
;
11+
%ret = load atomic float, ptr %src acquire, align 4
12+
ret float %ret
13+
}
14+
15+
define <1 x float> @load_atomic_vector_float1(ptr %src) {
16+
; CHECK-LABEL: define <1 x float> @load_atomic_vector_float1(
17+
; CHECK-SAME: ptr [[SRC:%.*]]) {
18+
; CHECK-NEXT: [[TMP1:%.*]] = load atomic i32, ptr [[SRC]] acquire, align 4
19+
; CHECK-NEXT: [[TMP2:%.*]] = bitcast i32 [[TMP1]] to <1 x float>
20+
; CHECK-NEXT: ret <1 x float> [[TMP2]]
21+
;
22+
%ret = load atomic <1 x float>, ptr %src acquire, align 4
23+
ret <1 x float> %ret
24+
}
25+
26+
define <2 x float> @load_atomic_vector_float2(ptr %src) {
27+
; CHECK-LABEL: define <2 x float> @load_atomic_vector_float2(
28+
; CHECK-SAME: ptr [[SRC:%.*]]) {
29+
; CHECK-NEXT: [[TMP1:%.*]] = alloca <2 x float>, align 8
30+
; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr [[TMP1]])
31+
; CHECK-NEXT: call void @__atomic_load(i64 8, ptr [[SRC]], ptr [[TMP1]], i32 2)
32+
; CHECK-NEXT: [[TMP2:%.*]] = load <2 x float>, ptr [[TMP1]], align 8
33+
; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr [[TMP1]])
34+
; CHECK-NEXT: ret <2 x float> [[TMP2]]
35+
;
36+
%ret = load atomic <2 x float>, ptr %src acquire, align 4
37+
ret <2 x float> %ret
38+
}

0 commit comments

Comments
 (0)