@@ -205,63 +205,19 @@ define <2 x float> @atomic_vec2_float_align(ptr %x) {
205
205
}
206
206
207
207
define <2 x half> @atomic_vec2_half(ptr %x) {
208
- ; CHECK3-LABEL: atomic_vec2_half:
209
- ; CHECK3: ## %bb.0:
210
- ; CHECK3-NEXT: movl (%rdi), %eax
211
- ; CHECK3-NEXT: pinsrw $0, %eax, %xmm0
212
- ; CHECK3-NEXT: shrl $16, %eax
213
- ; CHECK3-NEXT: pinsrw $0, %eax, %xmm1
214
- ; CHECK3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
215
- ; CHECK3-NEXT: retq
216
- ;
217
- ; CHECK0-LABEL: atomic_vec2_half:
218
- ; CHECK0: ## %bb.0:
219
- ; CHECK0-NEXT: movl (%rdi), %eax
220
- ; CHECK0-NEXT: movl %eax, %ecx
221
- ; CHECK0-NEXT: shrl $16, %ecx
222
- ; CHECK0-NEXT: movw %cx, %dx
223
- ; CHECK0-NEXT: ## implicit-def: $ecx
224
- ; CHECK0-NEXT: movw %dx, %cx
225
- ; CHECK0-NEXT: ## implicit-def: $xmm1
226
- ; CHECK0-NEXT: pinsrw $0, %ecx, %xmm1
227
- ; CHECK0-NEXT: movw %ax, %cx
228
- ; CHECK0-NEXT: ## implicit-def: $eax
229
- ; CHECK0-NEXT: movw %cx, %ax
230
- ; CHECK0-NEXT: ## implicit-def: $xmm0
231
- ; CHECK0-NEXT: pinsrw $0, %eax, %xmm0
232
- ; CHECK0-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
233
- ; CHECK0-NEXT: retq
208
+ ; CHECK-LABEL: atomic_vec2_half:
209
+ ; CHECK: ## %bb.0:
210
+ ; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
211
+ ; CHECK-NEXT: retq
234
212
%ret = load atomic <2 x half>, ptr %x acquire, align 4
235
213
ret <2 x half> %ret
236
214
}
237
215
238
216
define <2 x bfloat> @atomic_vec2_bfloat(ptr %x) {
239
- ; CHECK3-LABEL: atomic_vec2_bfloat:
240
- ; CHECK3: ## %bb.0:
241
- ; CHECK3-NEXT: movl (%rdi), %eax
242
- ; CHECK3-NEXT: pinsrw $0, %eax, %xmm0
243
- ; CHECK3-NEXT: shrl $16, %eax
244
- ; CHECK3-NEXT: pinsrw $0, %eax, %xmm1
245
- ; CHECK3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
246
- ; CHECK3-NEXT: retq
247
- ;
248
- ; CHECK0-LABEL: atomic_vec2_bfloat:
249
- ; CHECK0: ## %bb.0:
250
- ; CHECK0-NEXT: movl (%rdi), %eax
251
- ; CHECK0-NEXT: movl %eax, %ecx
252
- ; CHECK0-NEXT: shrl $16, %ecx
253
- ; CHECK0-NEXT: ## kill: def $cx killed $cx killed $ecx
254
- ; CHECK0-NEXT: movw %ax, %dx
255
- ; CHECK0-NEXT: ## implicit-def: $eax
256
- ; CHECK0-NEXT: movw %dx, %ax
257
- ; CHECK0-NEXT: ## implicit-def: $xmm0
258
- ; CHECK0-NEXT: pinsrw $0, %eax, %xmm0
259
- ; CHECK0-NEXT: ## implicit-def: $eax
260
- ; CHECK0-NEXT: movw %cx, %ax
261
- ; CHECK0-NEXT: ## implicit-def: $xmm1
262
- ; CHECK0-NEXT: pinsrw $0, %eax, %xmm1
263
- ; CHECK0-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
264
- ; CHECK0-NEXT: retq
217
+ ; CHECK-LABEL: atomic_vec2_bfloat:
218
+ ; CHECK: ## %bb.0:
219
+ ; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
220
+ ; CHECK-NEXT: retq
265
221
%ret = load atomic <2 x bfloat>, ptr %x acquire, align 4
266
222
ret <2 x bfloat> %ret
267
223
}
@@ -439,110 +395,19 @@ define <4 x i16> @atomic_vec4_i16(ptr %x) nounwind {
439
395
}
440
396
441
397
define <4 x half> @atomic_vec4_half(ptr %x) nounwind {
442
- ; CHECK3-LABEL: atomic_vec4_half:
443
- ; CHECK3: ## %bb.0:
444
- ; CHECK3-NEXT: movq (%rdi), %rax
445
- ; CHECK3-NEXT: movl %eax, %ecx
446
- ; CHECK3-NEXT: shrl $16, %ecx
447
- ; CHECK3-NEXT: pinsrw $0, %ecx, %xmm1
448
- ; CHECK3-NEXT: pinsrw $0, %eax, %xmm0
449
- ; CHECK3-NEXT: movq %rax, %rcx
450
- ; CHECK3-NEXT: shrq $32, %rcx
451
- ; CHECK3-NEXT: pinsrw $0, %ecx, %xmm2
452
- ; CHECK3-NEXT: shrq $48, %rax
453
- ; CHECK3-NEXT: pinsrw $0, %eax, %xmm3
454
- ; CHECK3-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1],xmm2[2],xmm3[2],xmm2[3],xmm3[3]
455
- ; CHECK3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
456
- ; CHECK3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
457
- ; CHECK3-NEXT: retq
458
- ;
459
- ; CHECK0-LABEL: atomic_vec4_half:
460
- ; CHECK0: ## %bb.0:
461
- ; CHECK0-NEXT: movq (%rdi), %rax
462
- ; CHECK0-NEXT: movl %eax, %ecx
463
- ; CHECK0-NEXT: shrl $16, %ecx
464
- ; CHECK0-NEXT: movw %cx, %dx
465
- ; CHECK0-NEXT: ## implicit-def: $ecx
466
- ; CHECK0-NEXT: movw %dx, %cx
467
- ; CHECK0-NEXT: ## implicit-def: $xmm2
468
- ; CHECK0-NEXT: pinsrw $0, %ecx, %xmm2
469
- ; CHECK0-NEXT: movw %ax, %dx
470
- ; CHECK0-NEXT: ## implicit-def: $ecx
471
- ; CHECK0-NEXT: movw %dx, %cx
472
- ; CHECK0-NEXT: ## implicit-def: $xmm0
473
- ; CHECK0-NEXT: pinsrw $0, %ecx, %xmm0
474
- ; CHECK0-NEXT: movq %rax, %rcx
475
- ; CHECK0-NEXT: shrq $32, %rcx
476
- ; CHECK0-NEXT: movw %cx, %dx
477
- ; CHECK0-NEXT: ## implicit-def: $ecx
478
- ; CHECK0-NEXT: movw %dx, %cx
479
- ; CHECK0-NEXT: ## implicit-def: $xmm1
480
- ; CHECK0-NEXT: pinsrw $0, %ecx, %xmm1
481
- ; CHECK0-NEXT: shrq $48, %rax
482
- ; CHECK0-NEXT: movw %ax, %cx
483
- ; CHECK0-NEXT: ## implicit-def: $eax
484
- ; CHECK0-NEXT: movw %cx, %ax
485
- ; CHECK0-NEXT: ## implicit-def: $xmm3
486
- ; CHECK0-NEXT: pinsrw $0, %eax, %xmm3
487
- ; CHECK0-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3]
488
- ; CHECK0-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
489
- ; CHECK0-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
490
- ; CHECK0-NEXT: retq
398
+ ; CHECK-LABEL: atomic_vec4_half:
399
+ ; CHECK: ## %bb.0:
400
+ ; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
401
+ ; CHECK-NEXT: retq
491
402
%ret = load atomic <4 x half>, ptr %x acquire, align 8
492
403
ret <4 x half> %ret
493
404
}
494
405
495
406
define <4 x bfloat> @atomic_vec4_bfloat(ptr %x) nounwind {
496
- ; CHECK3-LABEL: atomic_vec4_bfloat:
497
- ; CHECK3: ## %bb.0:
498
- ; CHECK3-NEXT: movq (%rdi), %rax
499
- ; CHECK3-NEXT: movq %rax, %rcx
500
- ; CHECK3-NEXT: movq %rax, %rdx
501
- ; CHECK3-NEXT: pinsrw $0, %eax, %xmm0
502
- ; CHECK3-NEXT: ## kill: def $eax killed $eax killed $rax
503
- ; CHECK3-NEXT: shrl $16, %eax
504
- ; CHECK3-NEXT: shrq $32, %rcx
505
- ; CHECK3-NEXT: shrq $48, %rdx
506
- ; CHECK3-NEXT: pinsrw $0, %edx, %xmm1
507
- ; CHECK3-NEXT: pinsrw $0, %ecx, %xmm2
508
- ; CHECK3-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
509
- ; CHECK3-NEXT: pinsrw $0, %eax, %xmm1
510
- ; CHECK3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
511
- ; CHECK3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
512
- ; CHECK3-NEXT: retq
513
- ;
514
- ; CHECK0-LABEL: atomic_vec4_bfloat:
515
- ; CHECK0: ## %bb.0:
516
- ; CHECK0-NEXT: movq (%rdi), %rax
517
- ; CHECK0-NEXT: movl %eax, %ecx
518
- ; CHECK0-NEXT: shrl $16, %ecx
519
- ; CHECK0-NEXT: ## kill: def $cx killed $cx killed $ecx
520
- ; CHECK0-NEXT: movw %ax, %dx
521
- ; CHECK0-NEXT: movq %rax, %rsi
522
- ; CHECK0-NEXT: shrq $32, %rsi
523
- ; CHECK0-NEXT: ## kill: def $si killed $si killed $rsi
524
- ; CHECK0-NEXT: shrq $48, %rax
525
- ; CHECK0-NEXT: movw %ax, %di
526
- ; CHECK0-NEXT: ## implicit-def: $eax
527
- ; CHECK0-NEXT: movw %di, %ax
528
- ; CHECK0-NEXT: ## implicit-def: $xmm0
529
- ; CHECK0-NEXT: pinsrw $0, %eax, %xmm0
530
- ; CHECK0-NEXT: ## implicit-def: $eax
531
- ; CHECK0-NEXT: movw %si, %ax
532
- ; CHECK0-NEXT: ## implicit-def: $xmm1
533
- ; CHECK0-NEXT: pinsrw $0, %eax, %xmm1
534
- ; CHECK0-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
535
- ; CHECK0-NEXT: ## implicit-def: $eax
536
- ; CHECK0-NEXT: movw %dx, %ax
537
- ; CHECK0-NEXT: ## implicit-def: $xmm0
538
- ; CHECK0-NEXT: pinsrw $0, %eax, %xmm0
539
- ; CHECK0-NEXT: ## implicit-def: $eax
540
- ; CHECK0-NEXT: movw %cx, %ax
541
- ; CHECK0-NEXT: ## implicit-def: $xmm2
542
- ; CHECK0-NEXT: pinsrw $0, %eax, %xmm2
543
- ; CHECK0-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
544
- ; CHECK0-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
545
- ; CHECK0-NEXT: retq
407
+ ; CHECK-LABEL: atomic_vec4_bfloat:
408
+ ; CHECK: ## %bb.0:
409
+ ; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
410
+ ; CHECK-NEXT: retq
546
411
%ret = load atomic <4 x bfloat>, ptr %x acquire, align 8
547
412
ret <4 x bfloat> %ret
548
413
}
0 commit comments