@@ -146,6 +146,34 @@ define <1 x i64> @atomic_vec1_i64_align(ptr %x) nounwind {
146
146
ret <1 x i64 > %ret
147
147
}
148
148
149
+ define <1 x ptr > @atomic_vec1_ptr (ptr %x ) nounwind {
150
+ ; CHECK3-LABEL: atomic_vec1_ptr:
151
+ ; CHECK3: ## %bb.0:
152
+ ; CHECK3-NEXT: pushq %rax
153
+ ; CHECK3-NEXT: movq %rdi, %rsi
154
+ ; CHECK3-NEXT: movq %rsp, %rdx
155
+ ; CHECK3-NEXT: movl $8, %edi
156
+ ; CHECK3-NEXT: movl $2, %ecx
157
+ ; CHECK3-NEXT: callq ___atomic_load
158
+ ; CHECK3-NEXT: movq (%rsp), %rax
159
+ ; CHECK3-NEXT: popq %rcx
160
+ ; CHECK3-NEXT: retq
161
+ ;
162
+ ; CHECK0-LABEL: atomic_vec1_ptr:
163
+ ; CHECK0: ## %bb.0:
164
+ ; CHECK0-NEXT: pushq %rax
165
+ ; CHECK0-NEXT: movq %rdi, %rsi
166
+ ; CHECK0-NEXT: movl $8, %edi
167
+ ; CHECK0-NEXT: movq %rsp, %rdx
168
+ ; CHECK0-NEXT: movl $2, %ecx
169
+ ; CHECK0-NEXT: callq ___atomic_load
170
+ ; CHECK0-NEXT: movq (%rsp), %rax
171
+ ; CHECK0-NEXT: popq %rcx
172
+ ; CHECK0-NEXT: retq
173
+ %ret = load atomic <1 x ptr >, ptr %x acquire , align 4
174
+ ret <1 x ptr > %ret
175
+ }
176
+
149
177
define <1 x half > @atomic_vec1_half (ptr %x ) {
150
178
; CHECK3-LABEL: atomic_vec1_half:
151
179
; CHECK3: ## %bb.0:
@@ -182,3 +210,228 @@ define <1 x double> @atomic_vec1_double_align(ptr %x) nounwind {
182
210
%ret = load atomic <1 x double >, ptr %x acquire , align 8
183
211
ret <1 x double > %ret
184
212
}
213
+
214
+ define <1 x i64 > @atomic_vec1_i64 (ptr %x ) nounwind {
215
+ ; CHECK3-LABEL: atomic_vec1_i64:
216
+ ; CHECK3: ## %bb.0:
217
+ ; CHECK3-NEXT: pushq %rax
218
+ ; CHECK3-NEXT: movq %rdi, %rsi
219
+ ; CHECK3-NEXT: movq %rsp, %rdx
220
+ ; CHECK3-NEXT: movl $8, %edi
221
+ ; CHECK3-NEXT: movl $2, %ecx
222
+ ; CHECK3-NEXT: callq ___atomic_load
223
+ ; CHECK3-NEXT: movq (%rsp), %rax
224
+ ; CHECK3-NEXT: popq %rcx
225
+ ; CHECK3-NEXT: retq
226
+ ;
227
+ ; CHECK0-LABEL: atomic_vec1_i64:
228
+ ; CHECK0: ## %bb.0:
229
+ ; CHECK0-NEXT: pushq %rax
230
+ ; CHECK0-NEXT: movq %rdi, %rsi
231
+ ; CHECK0-NEXT: movl $8, %edi
232
+ ; CHECK0-NEXT: movq %rsp, %rdx
233
+ ; CHECK0-NEXT: movl $2, %ecx
234
+ ; CHECK0-NEXT: callq ___atomic_load
235
+ ; CHECK0-NEXT: movq (%rsp), %rax
236
+ ; CHECK0-NEXT: popq %rcx
237
+ ; CHECK0-NEXT: retq
238
+ %ret = load atomic <1 x i64 >, ptr %x acquire , align 4
239
+ ret <1 x i64 > %ret
240
+ }
241
+
242
+ define <1 x double > @atomic_vec1_double (ptr %x ) nounwind {
243
+ ; CHECK3-LABEL: atomic_vec1_double:
244
+ ; CHECK3: ## %bb.0:
245
+ ; CHECK3-NEXT: pushq %rax
246
+ ; CHECK3-NEXT: movq %rdi, %rsi
247
+ ; CHECK3-NEXT: movq %rsp, %rdx
248
+ ; CHECK3-NEXT: movl $8, %edi
249
+ ; CHECK3-NEXT: movl $2, %ecx
250
+ ; CHECK3-NEXT: callq ___atomic_load
251
+ ; CHECK3-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
252
+ ; CHECK3-NEXT: popq %rax
253
+ ; CHECK3-NEXT: retq
254
+ ;
255
+ ; CHECK0-LABEL: atomic_vec1_double:
256
+ ; CHECK0: ## %bb.0:
257
+ ; CHECK0-NEXT: pushq %rax
258
+ ; CHECK0-NEXT: movq %rdi, %rsi
259
+ ; CHECK0-NEXT: movl $8, %edi
260
+ ; CHECK0-NEXT: movq %rsp, %rdx
261
+ ; CHECK0-NEXT: movl $2, %ecx
262
+ ; CHECK0-NEXT: callq ___atomic_load
263
+ ; CHECK0-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
264
+ ; CHECK0-NEXT: popq %rax
265
+ ; CHECK0-NEXT: retq
266
+ %ret = load atomic <1 x double >, ptr %x acquire , align 4
267
+ ret <1 x double > %ret
268
+ }
269
+
270
+ define <2 x i32 > @atomic_vec2_i32 (ptr %x ) nounwind {
271
+ ; CHECK3-LABEL: atomic_vec2_i32:
272
+ ; CHECK3: ## %bb.0:
273
+ ; CHECK3-NEXT: pushq %rax
274
+ ; CHECK3-NEXT: movq %rdi, %rsi
275
+ ; CHECK3-NEXT: movq %rsp, %rdx
276
+ ; CHECK3-NEXT: movl $8, %edi
277
+ ; CHECK3-NEXT: movl $2, %ecx
278
+ ; CHECK3-NEXT: callq ___atomic_load
279
+ ; CHECK3-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
280
+ ; CHECK3-NEXT: popq %rax
281
+ ; CHECK3-NEXT: retq
282
+ ;
283
+ ; CHECK0-LABEL: atomic_vec2_i32:
284
+ ; CHECK0: ## %bb.0:
285
+ ; CHECK0-NEXT: pushq %rax
286
+ ; CHECK0-NEXT: movq %rdi, %rsi
287
+ ; CHECK0-NEXT: movl $8, %edi
288
+ ; CHECK0-NEXT: movq %rsp, %rdx
289
+ ; CHECK0-NEXT: movl $2, %ecx
290
+ ; CHECK0-NEXT: callq ___atomic_load
291
+ ; CHECK0-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
292
+ ; CHECK0-NEXT: popq %rax
293
+ ; CHECK0-NEXT: retq
294
+ %ret = load atomic <2 x i32 >, ptr %x acquire , align 4
295
+ ret <2 x i32 > %ret
296
+ }
297
+
298
+ define <4 x float > @atomic_vec4_float_align (ptr %x ) nounwind {
299
+ ; CHECK-LABEL: atomic_vec4_float_align:
300
+ ; CHECK: ## %bb.0:
301
+ ; CHECK-NEXT: pushq %rax
302
+ ; CHECK-NEXT: movl $2, %esi
303
+ ; CHECK-NEXT: callq ___atomic_load_16
304
+ ; CHECK-NEXT: movq %rdx, %xmm1
305
+ ; CHECK-NEXT: movq %rax, %xmm0
306
+ ; CHECK-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
307
+ ; CHECK-NEXT: popq %rax
308
+ ; CHECK-NEXT: retq
309
+ %ret = load atomic <4 x float >, ptr %x acquire , align 16
310
+ ret <4 x float > %ret
311
+ }
312
+
313
+ define <4 x float > @atomic_vec4_float (ptr %x ) nounwind {
314
+ ; CHECK3-LABEL: atomic_vec4_float:
315
+ ; CHECK3: ## %bb.0:
316
+ ; CHECK3-NEXT: subq $24, %rsp
317
+ ; CHECK3-NEXT: movq %rdi, %rsi
318
+ ; CHECK3-NEXT: movq %rsp, %rdx
319
+ ; CHECK3-NEXT: movl $16, %edi
320
+ ; CHECK3-NEXT: movl $2, %ecx
321
+ ; CHECK3-NEXT: callq ___atomic_load
322
+ ; CHECK3-NEXT: movaps (%rsp), %xmm0
323
+ ; CHECK3-NEXT: addq $24, %rsp
324
+ ; CHECK3-NEXT: retq
325
+ ;
326
+ ; CHECK0-LABEL: atomic_vec4_float:
327
+ ; CHECK0: ## %bb.0:
328
+ ; CHECK0-NEXT: subq $24, %rsp
329
+ ; CHECK0-NEXT: movq %rdi, %rsi
330
+ ; CHECK0-NEXT: movl $16, %edi
331
+ ; CHECK0-NEXT: movq %rsp, %rdx
332
+ ; CHECK0-NEXT: movl $2, %ecx
333
+ ; CHECK0-NEXT: callq ___atomic_load
334
+ ; CHECK0-NEXT: movaps (%rsp), %xmm0
335
+ ; CHECK0-NEXT: addq $24, %rsp
336
+ ; CHECK0-NEXT: retq
337
+ %ret = load atomic <4 x float >, ptr %x acquire , align 4
338
+ ret <4 x float > %ret
339
+ }
340
+
341
+ define <8 x double > @atomic_vec8_double (ptr %x ) nounwind {
342
+ ; CHECK3-LABEL: atomic_vec8_double:
343
+ ; CHECK3: ## %bb.0:
344
+ ; CHECK3-NEXT: subq $72, %rsp
345
+ ; CHECK3-NEXT: movq %rdi, %rsi
346
+ ; CHECK3-NEXT: movq %rsp, %rdx
347
+ ; CHECK3-NEXT: movl $64, %edi
348
+ ; CHECK3-NEXT: movl $2, %ecx
349
+ ; CHECK3-NEXT: callq ___atomic_load
350
+ ; CHECK3-NEXT: movaps (%rsp), %xmm0
351
+ ; CHECK3-NEXT: movaps {{[0-9]+}}(%rsp), %xmm1
352
+ ; CHECK3-NEXT: movaps {{[0-9]+}}(%rsp), %xmm2
353
+ ; CHECK3-NEXT: movaps {{[0-9]+}}(%rsp), %xmm3
354
+ ; CHECK3-NEXT: addq $72, %rsp
355
+ ; CHECK3-NEXT: retq
356
+ ;
357
+ ; CHECK0-LABEL: atomic_vec8_double:
358
+ ; CHECK0: ## %bb.0:
359
+ ; CHECK0-NEXT: subq $72, %rsp
360
+ ; CHECK0-NEXT: movq %rdi, %rsi
361
+ ; CHECK0-NEXT: movl $64, %edi
362
+ ; CHECK0-NEXT: movq %rsp, %rdx
363
+ ; CHECK0-NEXT: movl $2, %ecx
364
+ ; CHECK0-NEXT: callq ___atomic_load
365
+ ; CHECK0-NEXT: movapd (%rsp), %xmm0
366
+ ; CHECK0-NEXT: movapd {{[0-9]+}}(%rsp), %xmm1
367
+ ; CHECK0-NEXT: movapd {{[0-9]+}}(%rsp), %xmm2
368
+ ; CHECK0-NEXT: movapd {{[0-9]+}}(%rsp), %xmm3
369
+ ; CHECK0-NEXT: addq $72, %rsp
370
+ ; CHECK0-NEXT: retq
371
+ %ret = load atomic <8 x double >, ptr %x acquire , align 4
372
+ ret <8 x double > %ret
373
+ }
374
+
375
+ define <16 x bfloat> @atomic_vec16_bfloat (ptr %x ) nounwind {
376
+ ; CHECK3-LABEL: atomic_vec16_bfloat:
377
+ ; CHECK3: ## %bb.0:
378
+ ; CHECK3-NEXT: subq $40, %rsp
379
+ ; CHECK3-NEXT: movq %rdi, %rsi
380
+ ; CHECK3-NEXT: movq %rsp, %rdx
381
+ ; CHECK3-NEXT: movl $32, %edi
382
+ ; CHECK3-NEXT: movl $2, %ecx
383
+ ; CHECK3-NEXT: callq ___atomic_load
384
+ ; CHECK3-NEXT: movaps (%rsp), %xmm0
385
+ ; CHECK3-NEXT: movaps {{[0-9]+}}(%rsp), %xmm1
386
+ ; CHECK3-NEXT: addq $40, %rsp
387
+ ; CHECK3-NEXT: retq
388
+ ;
389
+ ; CHECK0-LABEL: atomic_vec16_bfloat:
390
+ ; CHECK0: ## %bb.0:
391
+ ; CHECK0-NEXT: subq $40, %rsp
392
+ ; CHECK0-NEXT: movq %rdi, %rsi
393
+ ; CHECK0-NEXT: movl $32, %edi
394
+ ; CHECK0-NEXT: movq %rsp, %rdx
395
+ ; CHECK0-NEXT: movl $2, %ecx
396
+ ; CHECK0-NEXT: callq ___atomic_load
397
+ ; CHECK0-NEXT: movaps (%rsp), %xmm0
398
+ ; CHECK0-NEXT: movaps {{[0-9]+}}(%rsp), %xmm1
399
+ ; CHECK0-NEXT: addq $40, %rsp
400
+ ; CHECK0-NEXT: retq
401
+ %ret = load atomic <16 x bfloat>, ptr %x acquire , align 4
402
+ ret <16 x bfloat> %ret
403
+ }
404
+
405
+ define <32 x half > @atomic_vec32_half (ptr %x ) nounwind {
406
+ ; CHECK3-LABEL: atomic_vec32_half:
407
+ ; CHECK3: ## %bb.0:
408
+ ; CHECK3-NEXT: subq $72, %rsp
409
+ ; CHECK3-NEXT: movq %rdi, %rsi
410
+ ; CHECK3-NEXT: movq %rsp, %rdx
411
+ ; CHECK3-NEXT: movl $64, %edi
412
+ ; CHECK3-NEXT: movl $2, %ecx
413
+ ; CHECK3-NEXT: callq ___atomic_load
414
+ ; CHECK3-NEXT: movaps (%rsp), %xmm0
415
+ ; CHECK3-NEXT: movaps {{[0-9]+}}(%rsp), %xmm1
416
+ ; CHECK3-NEXT: movaps {{[0-9]+}}(%rsp), %xmm2
417
+ ; CHECK3-NEXT: movaps {{[0-9]+}}(%rsp), %xmm3
418
+ ; CHECK3-NEXT: addq $72, %rsp
419
+ ; CHECK3-NEXT: retq
420
+ ;
421
+ ; CHECK0-LABEL: atomic_vec32_half:
422
+ ; CHECK0: ## %bb.0:
423
+ ; CHECK0-NEXT: subq $72, %rsp
424
+ ; CHECK0-NEXT: movq %rdi, %rsi
425
+ ; CHECK0-NEXT: movl $64, %edi
426
+ ; CHECK0-NEXT: movq %rsp, %rdx
427
+ ; CHECK0-NEXT: movl $2, %ecx
428
+ ; CHECK0-NEXT: callq ___atomic_load
429
+ ; CHECK0-NEXT: movaps (%rsp), %xmm0
430
+ ; CHECK0-NEXT: movaps {{[0-9]+}}(%rsp), %xmm1
431
+ ; CHECK0-NEXT: movaps {{[0-9]+}}(%rsp), %xmm2
432
+ ; CHECK0-NEXT: movaps {{[0-9]+}}(%rsp), %xmm3
433
+ ; CHECK0-NEXT: addq $72, %rsp
434
+ ; CHECK0-NEXT: retq
435
+ %ret = load atomic <32 x half >, ptr %x acquire , align 4
436
+ ret <32 x half > %ret
437
+ }
0 commit comments