@@ -109,6 +109,34 @@ define <1 x ptr addrspace(270)> @atomic_vec1_ptr270(ptr %x) {
109
109
ret <1 x ptr addrspace (270 )> %ret
110
110
}
111
111
112
+ define <1 x ptr > @atomic_vec1_ptr (ptr %x ) nounwind {
113
+ ; CHECK3-LABEL: atomic_vec1_ptr:
114
+ ; CHECK3: ## %bb.0:
115
+ ; CHECK3-NEXT: pushq %rax
116
+ ; CHECK3-NEXT: movq %rdi, %rsi
117
+ ; CHECK3-NEXT: movq %rsp, %rdx
118
+ ; CHECK3-NEXT: movl $8, %edi
119
+ ; CHECK3-NEXT: movl $2, %ecx
120
+ ; CHECK3-NEXT: callq ___atomic_load
121
+ ; CHECK3-NEXT: movq (%rsp), %rax
122
+ ; CHECK3-NEXT: popq %rcx
123
+ ; CHECK3-NEXT: retq
124
+ ;
125
+ ; CHECK0-LABEL: atomic_vec1_ptr:
126
+ ; CHECK0: ## %bb.0:
127
+ ; CHECK0-NEXT: pushq %rax
128
+ ; CHECK0-NEXT: movq %rdi, %rsi
129
+ ; CHECK0-NEXT: movl $8, %edi
130
+ ; CHECK0-NEXT: movq %rsp, %rdx
131
+ ; CHECK0-NEXT: movl $2, %ecx
132
+ ; CHECK0-NEXT: callq ___atomic_load
133
+ ; CHECK0-NEXT: movq (%rsp), %rax
134
+ ; CHECK0-NEXT: popq %rcx
135
+ ; CHECK0-NEXT: retq
136
+ %ret = load atomic <1 x ptr >, ptr %x acquire , align 4
137
+ ret <1 x ptr > %ret
138
+ }
139
+
112
140
define <1 x half > @atomic_vec1_half (ptr %x ) {
113
141
; CHECK3-LABEL: atomic_vec1_half:
114
142
; CHECK3: ## %bb.0:
@@ -155,3 +183,213 @@ define <1 x bfloat> @atomic_vec1_bfloat(ptr %x) {
155
183
%ret = load atomic <1 x bfloat>, ptr %x acquire , align 4
156
184
ret <1 x bfloat> %ret
157
185
}
186
+
187
+ define <1 x i64 > @atomic_vec1_i64 (ptr %x ) nounwind {
188
+ ; CHECK3-LABEL: atomic_vec1_i64:
189
+ ; CHECK3: ## %bb.0:
190
+ ; CHECK3-NEXT: pushq %rax
191
+ ; CHECK3-NEXT: movq %rdi, %rsi
192
+ ; CHECK3-NEXT: movq %rsp, %rdx
193
+ ; CHECK3-NEXT: movl $8, %edi
194
+ ; CHECK3-NEXT: movl $2, %ecx
195
+ ; CHECK3-NEXT: callq ___atomic_load
196
+ ; CHECK3-NEXT: movq (%rsp), %rax
197
+ ; CHECK3-NEXT: popq %rcx
198
+ ; CHECK3-NEXT: retq
199
+ ;
200
+ ; CHECK0-LABEL: atomic_vec1_i64:
201
+ ; CHECK0: ## %bb.0:
202
+ ; CHECK0-NEXT: pushq %rax
203
+ ; CHECK0-NEXT: movq %rdi, %rsi
204
+ ; CHECK0-NEXT: movl $8, %edi
205
+ ; CHECK0-NEXT: movq %rsp, %rdx
206
+ ; CHECK0-NEXT: movl $2, %ecx
207
+ ; CHECK0-NEXT: callq ___atomic_load
208
+ ; CHECK0-NEXT: movq (%rsp), %rax
209
+ ; CHECK0-NEXT: popq %rcx
210
+ ; CHECK0-NEXT: retq
211
+ %ret = load atomic <1 x i64 >, ptr %x acquire , align 4
212
+ ret <1 x i64 > %ret
213
+ }
214
+
215
+ define <1 x double > @atomic_vec1_double (ptr %x ) nounwind {
216
+ ; CHECK3-LABEL: atomic_vec1_double:
217
+ ; CHECK3: ## %bb.0:
218
+ ; CHECK3-NEXT: pushq %rax
219
+ ; CHECK3-NEXT: movq %rdi, %rsi
220
+ ; CHECK3-NEXT: movq %rsp, %rdx
221
+ ; CHECK3-NEXT: movl $8, %edi
222
+ ; CHECK3-NEXT: movl $2, %ecx
223
+ ; CHECK3-NEXT: callq ___atomic_load
224
+ ; CHECK3-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
225
+ ; CHECK3-NEXT: popq %rax
226
+ ; CHECK3-NEXT: retq
227
+ ;
228
+ ; CHECK0-LABEL: atomic_vec1_double:
229
+ ; CHECK0: ## %bb.0:
230
+ ; CHECK0-NEXT: pushq %rax
231
+ ; CHECK0-NEXT: movq %rdi, %rsi
232
+ ; CHECK0-NEXT: movl $8, %edi
233
+ ; CHECK0-NEXT: movq %rsp, %rdx
234
+ ; CHECK0-NEXT: movl $2, %ecx
235
+ ; CHECK0-NEXT: callq ___atomic_load
236
+ ; CHECK0-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
237
+ ; CHECK0-NEXT: popq %rax
238
+ ; CHECK0-NEXT: retq
239
+ %ret = load atomic <1 x double >, ptr %x acquire , align 4
240
+ ret <1 x double > %ret
241
+ }
242
+
243
+ define <2 x i32 > @atomic_vec2_i32 (ptr %x ) nounwind {
244
+ ; CHECK3-LABEL: atomic_vec2_i32:
245
+ ; CHECK3: ## %bb.0:
246
+ ; CHECK3-NEXT: pushq %rax
247
+ ; CHECK3-NEXT: movq %rdi, %rsi
248
+ ; CHECK3-NEXT: movq %rsp, %rdx
249
+ ; CHECK3-NEXT: movl $8, %edi
250
+ ; CHECK3-NEXT: movl $2, %ecx
251
+ ; CHECK3-NEXT: callq ___atomic_load
252
+ ; CHECK3-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
253
+ ; CHECK3-NEXT: popq %rax
254
+ ; CHECK3-NEXT: retq
255
+ ;
256
+ ; CHECK0-LABEL: atomic_vec2_i32:
257
+ ; CHECK0: ## %bb.0:
258
+ ; CHECK0-NEXT: pushq %rax
259
+ ; CHECK0-NEXT: movq %rdi, %rsi
260
+ ; CHECK0-NEXT: movl $8, %edi
261
+ ; CHECK0-NEXT: movq %rsp, %rdx
262
+ ; CHECK0-NEXT: movl $2, %ecx
263
+ ; CHECK0-NEXT: callq ___atomic_load
264
+ ; CHECK0-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
265
+ ; CHECK0-NEXT: popq %rax
266
+ ; CHECK0-NEXT: retq
267
+ %ret = load atomic <2 x i32 >, ptr %x acquire , align 4
268
+ ret <2 x i32 > %ret
269
+ }
270
+
271
+ define <4 x float > @atomic_vec4_float (ptr %x ) nounwind {
272
+ ; CHECK3-LABEL: atomic_vec4_float:
273
+ ; CHECK3: ## %bb.0:
274
+ ; CHECK3-NEXT: subq $24, %rsp
275
+ ; CHECK3-NEXT: movq %rdi, %rsi
276
+ ; CHECK3-NEXT: movq %rsp, %rdx
277
+ ; CHECK3-NEXT: movl $16, %edi
278
+ ; CHECK3-NEXT: movl $2, %ecx
279
+ ; CHECK3-NEXT: callq ___atomic_load
280
+ ; CHECK3-NEXT: movaps (%rsp), %xmm0
281
+ ; CHECK3-NEXT: addq $24, %rsp
282
+ ; CHECK3-NEXT: retq
283
+ ;
284
+ ; CHECK0-LABEL: atomic_vec4_float:
285
+ ; CHECK0: ## %bb.0:
286
+ ; CHECK0-NEXT: subq $24, %rsp
287
+ ; CHECK0-NEXT: movq %rdi, %rsi
288
+ ; CHECK0-NEXT: movl $16, %edi
289
+ ; CHECK0-NEXT: movq %rsp, %rdx
290
+ ; CHECK0-NEXT: movl $2, %ecx
291
+ ; CHECK0-NEXT: callq ___atomic_load
292
+ ; CHECK0-NEXT: movaps (%rsp), %xmm0
293
+ ; CHECK0-NEXT: addq $24, %rsp
294
+ ; CHECK0-NEXT: retq
295
+ %ret = load atomic <4 x float >, ptr %x acquire , align 4
296
+ ret <4 x float > %ret
297
+ }
298
+
299
+ define <8 x double > @atomic_vec8_double (ptr %x ) nounwind {
300
+ ; CHECK3-LABEL: atomic_vec8_double:
301
+ ; CHECK3: ## %bb.0:
302
+ ; CHECK3-NEXT: subq $72, %rsp
303
+ ; CHECK3-NEXT: movq %rdi, %rsi
304
+ ; CHECK3-NEXT: movq %rsp, %rdx
305
+ ; CHECK3-NEXT: movl $64, %edi
306
+ ; CHECK3-NEXT: movl $2, %ecx
307
+ ; CHECK3-NEXT: callq ___atomic_load
308
+ ; CHECK3-NEXT: movaps (%rsp), %xmm0
309
+ ; CHECK3-NEXT: movaps {{[0-9]+}}(%rsp), %xmm1
310
+ ; CHECK3-NEXT: movaps {{[0-9]+}}(%rsp), %xmm2
311
+ ; CHECK3-NEXT: movaps {{[0-9]+}}(%rsp), %xmm3
312
+ ; CHECK3-NEXT: addq $72, %rsp
313
+ ; CHECK3-NEXT: retq
314
+ ;
315
+ ; CHECK0-LABEL: atomic_vec8_double:
316
+ ; CHECK0: ## %bb.0:
317
+ ; CHECK0-NEXT: subq $72, %rsp
318
+ ; CHECK0-NEXT: movq %rdi, %rsi
319
+ ; CHECK0-NEXT: movl $64, %edi
320
+ ; CHECK0-NEXT: movq %rsp, %rdx
321
+ ; CHECK0-NEXT: movl $2, %ecx
322
+ ; CHECK0-NEXT: callq ___atomic_load
323
+ ; CHECK0-NEXT: movapd (%rsp), %xmm0
324
+ ; CHECK0-NEXT: movapd {{[0-9]+}}(%rsp), %xmm1
325
+ ; CHECK0-NEXT: movapd {{[0-9]+}}(%rsp), %xmm2
326
+ ; CHECK0-NEXT: movapd {{[0-9]+}}(%rsp), %xmm3
327
+ ; CHECK0-NEXT: addq $72, %rsp
328
+ ; CHECK0-NEXT: retq
329
+ %ret = load atomic <8 x double >, ptr %x acquire , align 4
330
+ ret <8 x double > %ret
331
+ }
332
+
333
+ define <16 x bfloat> @atomic_vec16_bfloat (ptr %x ) nounwind {
334
+ ; CHECK3-LABEL: atomic_vec16_bfloat:
335
+ ; CHECK3: ## %bb.0:
336
+ ; CHECK3-NEXT: subq $40, %rsp
337
+ ; CHECK3-NEXT: movq %rdi, %rsi
338
+ ; CHECK3-NEXT: movq %rsp, %rdx
339
+ ; CHECK3-NEXT: movl $32, %edi
340
+ ; CHECK3-NEXT: movl $2, %ecx
341
+ ; CHECK3-NEXT: callq ___atomic_load
342
+ ; CHECK3-NEXT: movaps (%rsp), %xmm0
343
+ ; CHECK3-NEXT: movaps {{[0-9]+}}(%rsp), %xmm1
344
+ ; CHECK3-NEXT: addq $40, %rsp
345
+ ; CHECK3-NEXT: retq
346
+ ;
347
+ ; CHECK0-LABEL: atomic_vec16_bfloat:
348
+ ; CHECK0: ## %bb.0:
349
+ ; CHECK0-NEXT: subq $40, %rsp
350
+ ; CHECK0-NEXT: movq %rdi, %rsi
351
+ ; CHECK0-NEXT: movl $32, %edi
352
+ ; CHECK0-NEXT: movq %rsp, %rdx
353
+ ; CHECK0-NEXT: movl $2, %ecx
354
+ ; CHECK0-NEXT: callq ___atomic_load
355
+ ; CHECK0-NEXT: movaps (%rsp), %xmm0
356
+ ; CHECK0-NEXT: movaps {{[0-9]+}}(%rsp), %xmm1
357
+ ; CHECK0-NEXT: addq $40, %rsp
358
+ ; CHECK0-NEXT: retq
359
+ %ret = load atomic <16 x bfloat>, ptr %x acquire , align 4
360
+ ret <16 x bfloat> %ret
361
+ }
362
+
363
+ define <32 x half > @atomic_vec32_half (ptr %x ) nounwind {
364
+ ; CHECK3-LABEL: atomic_vec32_half:
365
+ ; CHECK3: ## %bb.0:
366
+ ; CHECK3-NEXT: subq $72, %rsp
367
+ ; CHECK3-NEXT: movq %rdi, %rsi
368
+ ; CHECK3-NEXT: movq %rsp, %rdx
369
+ ; CHECK3-NEXT: movl $64, %edi
370
+ ; CHECK3-NEXT: movl $2, %ecx
371
+ ; CHECK3-NEXT: callq ___atomic_load
372
+ ; CHECK3-NEXT: movaps (%rsp), %xmm0
373
+ ; CHECK3-NEXT: movaps {{[0-9]+}}(%rsp), %xmm1
374
+ ; CHECK3-NEXT: movaps {{[0-9]+}}(%rsp), %xmm2
375
+ ; CHECK3-NEXT: movaps {{[0-9]+}}(%rsp), %xmm3
376
+ ; CHECK3-NEXT: addq $72, %rsp
377
+ ; CHECK3-NEXT: retq
378
+ ;
379
+ ; CHECK0-LABEL: atomic_vec32_half:
380
+ ; CHECK0: ## %bb.0:
381
+ ; CHECK0-NEXT: subq $72, %rsp
382
+ ; CHECK0-NEXT: movq %rdi, %rsi
383
+ ; CHECK0-NEXT: movl $64, %edi
384
+ ; CHECK0-NEXT: movq %rsp, %rdx
385
+ ; CHECK0-NEXT: movl $2, %ecx
386
+ ; CHECK0-NEXT: callq ___atomic_load
387
+ ; CHECK0-NEXT: movaps (%rsp), %xmm0
388
+ ; CHECK0-NEXT: movaps {{[0-9]+}}(%rsp), %xmm1
389
+ ; CHECK0-NEXT: movaps {{[0-9]+}}(%rsp), %xmm2
390
+ ; CHECK0-NEXT: movaps {{[0-9]+}}(%rsp), %xmm3
391
+ ; CHECK0-NEXT: addq $72, %rsp
392
+ ; CHECK0-NEXT: retq
393
+ %ret = load atomic <32 x half >, ptr %x acquire , align 4
394
+ ret <32 x half > %ret
395
+ }
0 commit comments