Skip to content

Commit 4d3fcb3

Browse files
committed
[SelectionDAG] Legalize <1 x T> vector types for atomic load
`load atomic <1 x T>` is not valid. This change legalizes vector types of atomic load via scalarization in SelectionDAG so that it can, for example, translate from `v1i32` to `i32`. commit-id:5c36cc8c
1 parent 4282e9f commit 4d3fcb3

File tree

3 files changed

+117
-2
lines changed

3 files changed

+117
-2
lines changed

llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h

+1
Original file line numberDiff line numberDiff line change
@@ -861,6 +861,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer {
861861
SDValue ScalarizeVecRes_ExpOp(SDNode *N);
862862
SDValue ScalarizeVecRes_INSERT_VECTOR_ELT(SDNode *N);
863863
SDValue ScalarizeVecRes_LOAD(LoadSDNode *N);
864+
SDValue ScalarizeVecRes_ATOMIC_LOAD(AtomicSDNode *N);
864865
SDValue ScalarizeVecRes_SCALAR_TO_VECTOR(SDNode *N);
865866
SDValue ScalarizeVecRes_VSELECT(SDNode *N);
866867
SDValue ScalarizeVecRes_SELECT(SDNode *N);

llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp

+15
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,9 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) {
6060
case ISD::FP_ROUND: R = ScalarizeVecRes_FP_ROUND(N); break;
6161
case ISD::FPOWI: R = ScalarizeVecRes_ExpOp(N); break;
6262
case ISD::INSERT_VECTOR_ELT: R = ScalarizeVecRes_INSERT_VECTOR_ELT(N); break;
63+
case ISD::ATOMIC_LOAD:
64+
R = ScalarizeVecRes_ATOMIC_LOAD(cast<AtomicSDNode>(N));
65+
break;
6366
case ISD::LOAD: R = ScalarizeVecRes_LOAD(cast<LoadSDNode>(N));break;
6467
case ISD::SCALAR_TO_VECTOR: R = ScalarizeVecRes_SCALAR_TO_VECTOR(N); break;
6568
case ISD::SIGN_EXTEND_INREG: R = ScalarizeVecRes_InregOp(N); break;
@@ -451,6 +454,18 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_INSERT_VECTOR_ELT(SDNode *N) {
451454
return Op;
452455
}
453456

457+
SDValue DAGTypeLegalizer::ScalarizeVecRes_ATOMIC_LOAD(AtomicSDNode *N) {
458+
SDValue Result = DAG.getAtomic(
459+
ISD::ATOMIC_LOAD, SDLoc(N), N->getMemoryVT().getVectorElementType(),
460+
N->getValueType(0).getVectorElementType(), N->getChain(), N->getBasePtr(),
461+
N->getMemOperand());
462+
463+
// Legalize the chain result - switch anything that used the old chain to
464+
// use the new one.
465+
ReplaceValueWith(SDValue(N, 1), Result.getValue(1));
466+
return Result;
467+
}
468+
454469
SDValue DAGTypeLegalizer::ScalarizeVecRes_LOAD(LoadSDNode *N) {
455470
assert(N->isUnindexed() && "Indexed vector load?");
456471

llvm/test/CodeGen/X86/atomic-load-store.ll

+101-2
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2-
; RUN: llc < %s -mtriple=x86_64-apple-macosx10.7.0 -verify-machineinstrs | FileCheck %s
3-
; RUN: llc < %s -mtriple=x86_64-apple-macosx10.7.0 -verify-machineinstrs -O0 | FileCheck %s
2+
; RUN: llc < %s -mtriple=x86_64-apple-macosx10.7.0 -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK,CHECK3
3+
; RUN: llc < %s -mtriple=x86_64-apple-macosx10.7.0 -verify-machineinstrs -O0 | FileCheck %s --check-prefixes=CHECK,CHECK0
44

55
define void @test1(ptr %ptr, i32 %val1) {
66
; CHECK-LABEL: test1:
@@ -28,3 +28,102 @@ define i32 @test3(ptr %ptr) {
2828
%val = load atomic i32, ptr %ptr seq_cst, align 4
2929
ret i32 %val
3030
}
31+
32+
define <1 x i32> @atomic_vec1_i32(ptr %x) {
33+
; CHECK-LABEL: atomic_vec1_i32:
34+
; CHECK: ## %bb.0:
35+
; CHECK-NEXT: movl (%rdi), %eax
36+
; CHECK-NEXT: retq
37+
%ret = load atomic <1 x i32>, ptr %x acquire, align 4
38+
ret <1 x i32> %ret
39+
}
40+
41+
define <1 x i8> @atomic_vec1_i8(ptr %x) {
42+
; CHECK3-LABEL: atomic_vec1_i8:
43+
; CHECK3: ## %bb.0:
44+
; CHECK3-NEXT: movzbl (%rdi), %eax
45+
; CHECK3-NEXT: retq
46+
;
47+
; CHECK0-LABEL: atomic_vec1_i8:
48+
; CHECK0: ## %bb.0:
49+
; CHECK0-NEXT: movb (%rdi), %al
50+
; CHECK0-NEXT: retq
51+
%ret = load atomic <1 x i8>, ptr %x acquire, align 4
52+
ret <1 x i8> %ret
53+
}
54+
55+
define <1 x i16> @atomic_vec1_i16(ptr %x) {
56+
; CHECK3-LABEL: atomic_vec1_i16:
57+
; CHECK3: ## %bb.0:
58+
; CHECK3-NEXT: movzwl (%rdi), %eax
59+
; CHECK3-NEXT: retq
60+
;
61+
; CHECK0-LABEL: atomic_vec1_i16:
62+
; CHECK0: ## %bb.0:
63+
; CHECK0-NEXT: movw (%rdi), %ax
64+
; CHECK0-NEXT: retq
65+
%ret = load atomic <1 x i16>, ptr %x acquire, align 4
66+
ret <1 x i16> %ret
67+
}
68+
69+
define <1 x i32> @atomic_vec1_i8_zext(ptr %x) {
70+
; CHECK3-LABEL: atomic_vec1_i8_zext:
71+
; CHECK3: ## %bb.0:
72+
; CHECK3-NEXT: movzbl (%rdi), %eax
73+
; CHECK3-NEXT: movzbl %al, %eax
74+
; CHECK3-NEXT: retq
75+
;
76+
; CHECK0-LABEL: atomic_vec1_i8_zext:
77+
; CHECK0: ## %bb.0:
78+
; CHECK0-NEXT: movb (%rdi), %al
79+
; CHECK0-NEXT: movzbl %al, %eax
80+
; CHECK0-NEXT: retq
81+
%ret = load atomic <1 x i8>, ptr %x acquire, align 4
82+
%zret = zext <1 x i8> %ret to <1 x i32>
83+
ret <1 x i32> %zret
84+
}
85+
86+
define <1 x i64> @atomic_vec1_i16_sext(ptr %x) {
87+
; CHECK3-LABEL: atomic_vec1_i16_sext:
88+
; CHECK3: ## %bb.0:
89+
; CHECK3-NEXT: movzwl (%rdi), %eax
90+
; CHECK3-NEXT: movswq %ax, %rax
91+
; CHECK3-NEXT: retq
92+
;
93+
; CHECK0-LABEL: atomic_vec1_i16_sext:
94+
; CHECK0: ## %bb.0:
95+
; CHECK0-NEXT: movw (%rdi), %ax
96+
; CHECK0-NEXT: movswq %ax, %rax
97+
; CHECK0-NEXT: retq
98+
%ret = load atomic <1 x i16>, ptr %x acquire, align 4
99+
%sret = sext <1 x i16> %ret to <1 x i64>
100+
ret <1 x i64> %sret
101+
}
102+
103+
define <1 x ptr addrspace(270)> @atomic_vec1_ptr270(ptr %x) {
104+
; CHECK-LABEL: atomic_vec1_ptr270:
105+
; CHECK: ## %bb.0:
106+
; CHECK-NEXT: movl (%rdi), %eax
107+
; CHECK-NEXT: retq
108+
%ret = load atomic <1 x ptr addrspace(270)>, ptr %x acquire, align 4
109+
ret <1 x ptr addrspace(270)> %ret
110+
}
111+
112+
define <1 x bfloat> @atomic_vec1_bfloat(ptr %x) {
113+
; CHECK3-LABEL: atomic_vec1_bfloat:
114+
; CHECK3: ## %bb.0:
115+
; CHECK3-NEXT: movzwl (%rdi), %eax
116+
; CHECK3-NEXT: pinsrw $0, %eax, %xmm0
117+
; CHECK3-NEXT: retq
118+
;
119+
; CHECK0-LABEL: atomic_vec1_bfloat:
120+
; CHECK0: ## %bb.0:
121+
; CHECK0-NEXT: movw (%rdi), %cx
122+
; CHECK0-NEXT: ## implicit-def: $eax
123+
; CHECK0-NEXT: movw %cx, %ax
124+
; CHECK0-NEXT: ## implicit-def: $xmm0
125+
; CHECK0-NEXT: pinsrw $0, %eax, %xmm0
126+
; CHECK0-NEXT: retq
127+
%ret = load atomic <1 x bfloat>, ptr %x acquire, align 4
128+
ret <1 x bfloat> %ret
129+
}

0 commit comments

Comments
 (0)