Skip to content

Commit 04f0fd9

Browse files
committed
[SelectionDAG][X86] Widen <2 x T> vector types for atomic load
Vector types of 2 elements must be widened. This change does this for vector types of atomic load in SelectionDAG so that it can translate aligned vectors of >1 size. Also, it also adds Pats to remove an extra MOV. commit-id:2894ccd1
1 parent 6d84ebe commit 04f0fd9

File tree

5 files changed

+118
-7
lines changed

5 files changed

+118
-7
lines changed

Diff for: llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h

+3-2
Original file line numberDiff line numberDiff line change
@@ -1046,6 +1046,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer {
10461046
SDValue WidenVecRes_EXTRACT_SUBVECTOR(SDNode* N);
10471047
SDValue WidenVecRes_INSERT_SUBVECTOR(SDNode *N);
10481048
SDValue WidenVecRes_INSERT_VECTOR_ELT(SDNode* N);
1049+
SDValue WidenVecRes_ATOMIC_LOAD(AtomicSDNode *N);
10491050
SDValue WidenVecRes_LOAD(SDNode* N);
10501051
SDValue WidenVecRes_VP_LOAD(VPLoadSDNode *N);
10511052
SDValue WidenVecRes_VP_STRIDED_LOAD(VPStridedLoadSDNode *N);
@@ -1129,8 +1130,8 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer {
11291130
/// resulting wider type. It takes:
11301131
/// LdChain: list of chains for the load to be generated.
11311132
/// Ld: load to widen
1132-
SDValue GenWidenVectorLoads(SmallVectorImpl<SDValue> &LdChain,
1133-
LoadSDNode *LD);
1133+
SDValue GenWidenVectorLoads(SmallVectorImpl<SDValue> &LdChain, MemSDNode *LD,
1134+
bool IsAtomic = false);
11341135

11351136
/// Helper function to generate a set of extension loads to load a vector with
11361137
/// a resulting wider type. It takes:

Diff for: llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp

+35-3
Original file line numberDiff line numberDiff line change
@@ -4521,6 +4521,9 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
45214521
break;
45224522
case ISD::EXTRACT_SUBVECTOR: Res = WidenVecRes_EXTRACT_SUBVECTOR(N); break;
45234523
case ISD::INSERT_VECTOR_ELT: Res = WidenVecRes_INSERT_VECTOR_ELT(N); break;
4524+
case ISD::ATOMIC_LOAD:
4525+
Res = WidenVecRes_ATOMIC_LOAD(cast<AtomicSDNode>(N));
4526+
break;
45244527
case ISD::LOAD: Res = WidenVecRes_LOAD(N); break;
45254528
case ISD::STEP_VECTOR:
45264529
case ISD::SPLAT_VECTOR:
@@ -5907,6 +5910,30 @@ SDValue DAGTypeLegalizer::WidenVecRes_INSERT_VECTOR_ELT(SDNode *N) {
59075910
N->getOperand(1), N->getOperand(2));
59085911
}
59095912

5913+
SDValue DAGTypeLegalizer::WidenVecRes_ATOMIC_LOAD(AtomicSDNode *N) {
5914+
SmallVector<SDValue, 16> LdChain; // Chain for the series of load
5915+
SDValue Result = GenWidenVectorLoads(LdChain, N, /*IsAtomic=*/true);
5916+
5917+
if (Result) {
5918+
// If we generate a single load, we can use that for the chain. Otherwise,
5919+
// build a factor node to remember the multiple loads are independent and
5920+
// chain to that.
5921+
SDValue NewChain;
5922+
if (LdChain.size() == 1)
5923+
NewChain = LdChain[0];
5924+
else
5925+
NewChain = DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, LdChain);
5926+
5927+
// Modified the chain - switch anything that used the old chain to use
5928+
// the new one.
5929+
ReplaceValueWith(SDValue(N, 1), NewChain);
5930+
5931+
return Result;
5932+
}
5933+
5934+
report_fatal_error("Unable to widen atomic vector load");
5935+
}
5936+
59105937
SDValue DAGTypeLegalizer::WidenVecRes_LOAD(SDNode *N) {
59115938
LoadSDNode *LD = cast<LoadSDNode>(N);
59125939
ISD::LoadExtType ExtType = LD->getExtensionType();
@@ -7706,7 +7733,7 @@ static SDValue BuildVectorFromScalar(SelectionDAG& DAG, EVT VecTy,
77067733
}
77077734

77087735
SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVectorImpl<SDValue> &LdChain,
7709-
LoadSDNode *LD) {
7736+
MemSDNode *LD, bool IsAtomic) {
77107737
// The strategy assumes that we can efficiently load power-of-two widths.
77117738
// The routine chops the vector into the largest vector loads with the same
77127739
// element type or scalar loads and then recombines it to the widen vector
@@ -7763,8 +7790,13 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVectorImpl<SDValue> &LdChain,
77637790
} while (TypeSize::isKnownGT(RemainingWidth, NewVTWidth));
77647791
}
77657792

7766-
SDValue LdOp = DAG.getLoad(*FirstVT, dl, Chain, BasePtr, LD->getPointerInfo(),
7767-
LD->getOriginalAlign(), MMOFlags, AAInfo);
7793+
SDValue LdOp;
7794+
if (IsAtomic)
7795+
LdOp = DAG.getAtomic(ISD::ATOMIC_LOAD, dl, *FirstVT, *FirstVT, Chain,
7796+
BasePtr, LD->getMemOperand());
7797+
else
7798+
LdOp = DAG.getLoad(*FirstVT, dl, Chain, BasePtr, LD->getPointerInfo(),
7799+
LD->getOriginalAlign(), MMOFlags, AAInfo);
77687800
LdChain.push_back(LdOp.getValue(1));
77697801

77707802
// Check if we can load the element with one instruction.

Diff for: llvm/lib/Target/X86/X86InstrCompiler.td

+7
Original file line numberDiff line numberDiff line change
@@ -1198,6 +1198,13 @@ def : Pat<(i16 (atomic_load_16 addr:$src)), (MOV16rm addr:$src)>;
11981198
def : Pat<(i32 (atomic_load_32 addr:$src)), (MOV32rm addr:$src)>;
11991199
def : Pat<(i64 (atomic_load_64 addr:$src)), (MOV64rm addr:$src)>;
12001200

1201+
def : Pat<(v4i32 (scalar_to_vector (i32 (anyext (i16 (atomic_load_16 addr:$src)))))),
1202+
(MOVDI2PDIrm addr:$src)>; // load atomic <2 x i8>
1203+
def : Pat<(v4i32 (scalar_to_vector (i32 (atomic_load_32 addr:$src)))),
1204+
(MOVDI2PDIrm addr:$src)>; // load atomic <2 x i16>
1205+
def : Pat<(v2i64 (scalar_to_vector (i64 (atomic_load_64 addr:$src)))),
1206+
(MOV64toPQIrm addr:$src)>; // load atomic <2 x i32,float>
1207+
12011208
// Floating point loads/stores.
12021209
def : Pat<(atomic_store_32 (i32 (bitconvert (f32 FR32:$src))), addr:$dst),
12031210
(MOVSSmr addr:$dst, FR32:$src)>, Requires<[UseSSE1]>;

Diff for: llvm/test/CodeGen/X86/atomic-load-store.ll

+72
Original file line numberDiff line numberDiff line change
@@ -146,6 +146,55 @@ define <1 x i64> @atomic_vec1_i64_align(ptr %x) nounwind {
146146
ret <1 x i64> %ret
147147
}
148148

149+
define <2 x i8> @atomic_vec2_i8(ptr %x) {
150+
; CHECK3-LABEL: atomic_vec2_i8:
151+
; CHECK3: ## %bb.0:
152+
; CHECK3-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
153+
; CHECK3-NEXT: retq
154+
;
155+
; CHECK0-LABEL: atomic_vec2_i8:
156+
; CHECK0: ## %bb.0:
157+
; CHECK0-NEXT: movw (%rdi), %cx
158+
; CHECK0-NEXT: ## implicit-def: $eax
159+
; CHECK0-NEXT: movw %cx, %ax
160+
; CHECK0-NEXT: movd %eax, %xmm0
161+
; CHECK0-NEXT: retq
162+
%ret = load atomic <2 x i8>, ptr %x acquire, align 4
163+
ret <2 x i8> %ret
164+
}
165+
166+
define <2 x i16> @atomic_vec2_i16(ptr %x) {
167+
; CHECK3-LABEL: atomic_vec2_i16:
168+
; CHECK3: ## %bb.0:
169+
; CHECK3-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
170+
; CHECK3-NEXT: retq
171+
;
172+
; CHECK0-LABEL: atomic_vec2_i16:
173+
; CHECK0: ## %bb.0:
174+
; CHECK0-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
175+
; CHECK0-NEXT: retq
176+
%ret = load atomic <2 x i16>, ptr %x acquire, align 4
177+
ret <2 x i16> %ret
178+
}
179+
180+
define <2 x i32> @atomic_vec2_i32_align(ptr %x) {
181+
; CHECK-LABEL: atomic_vec2_i32_align:
182+
; CHECK: ## %bb.0:
183+
; CHECK-NEXT: movq (%rdi), %xmm0
184+
; CHECK-NEXT: retq
185+
%ret = load atomic <2 x i32>, ptr %x acquire, align 8
186+
ret <2 x i32> %ret
187+
}
188+
189+
define <2 x float> @atomic_vec2_float_align(ptr %x) {
190+
; CHECK-LABEL: atomic_vec2_float_align:
191+
; CHECK: ## %bb.0:
192+
; CHECK-NEXT: movq (%rdi), %xmm0
193+
; CHECK-NEXT: retq
194+
%ret = load atomic <2 x float>, ptr %x acquire, align 8
195+
ret <2 x float> %ret
196+
}
197+
149198
define <1 x ptr> @atomic_vec1_ptr(ptr %x) nounwind {
150199
; CHECK3-LABEL: atomic_vec1_ptr:
151200
; CHECK3: ## %bb.0:
@@ -295,6 +344,29 @@ define <2 x i32> @atomic_vec2_i32(ptr %x) nounwind {
295344
ret <2 x i32> %ret
296345
}
297346

347+
define <4 x i8> @atomic_vec4_i8(ptr %x) nounwind {
348+
; CHECK3-LABEL: atomic_vec4_i8:
349+
; CHECK3: ## %bb.0:
350+
; CHECK3-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
351+
; CHECK3-NEXT: retq
352+
;
353+
; CHECK0-LABEL: atomic_vec4_i8:
354+
; CHECK0: ## %bb.0:
355+
; CHECK0-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
356+
; CHECK0-NEXT: retq
357+
%ret = load atomic <4 x i8>, ptr %x acquire, align 4
358+
ret <4 x i8> %ret
359+
}
360+
361+
define <4 x i16> @atomic_vec4_i16(ptr %x) nounwind {
362+
; CHECK-LABEL: atomic_vec4_i16:
363+
; CHECK: ## %bb.0:
364+
; CHECK-NEXT: movq (%rdi), %xmm0
365+
; CHECK-NEXT: retq
366+
%ret = load atomic <4 x i16>, ptr %x acquire, align 8
367+
ret <4 x i16> %ret
368+
}
369+
298370
define <4 x float> @atomic_vec4_float_align(ptr %x) nounwind {
299371
; CHECK-LABEL: atomic_vec4_float_align:
300372
; CHECK: ## %bb.0:

Diff for: llvm/test/CodeGen/X86/atomic-unordered.ll

+1-2
Original file line numberDiff line numberDiff line change
@@ -2275,8 +2275,7 @@ define i64 @load_i16_anyext_i64(ptr %ptr) {
22752275
;
22762276
; CHECK-O3-LABEL: load_i16_anyext_i64:
22772277
; CHECK-O3: # %bb.0:
2278-
; CHECK-O3-NEXT: movzwl (%rdi), %eax
2279-
; CHECK-O3-NEXT: vmovd %eax, %xmm0
2278+
; CHECK-O3-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
22802279
; CHECK-O3-NEXT: vmovq %xmm0, %rax
22812280
; CHECK-O3-NEXT: retq
22822281
%v = load atomic i16, ptr %ptr unordered, align 8

0 commit comments

Comments
 (0)