@@ -172,8 +172,9 @@ define <8 x half> @test_insert_v8f16_insert_1(half %a) {
172
172
; CHECK-LABEL: test_insert_v8f16_insert_1:
173
173
; CHECK: // %bb.0:
174
174
; CHECK-NEXT: // kill: def $h0 killed $h0 def $q0
175
+ ; CHECK-NEXT: movi d1, #0000000000000000
175
176
; CHECK-NEXT: dup.8h v0, v0[0]
176
- ; CHECK-NEXT: mov.h v0[7], wzr
177
+ ; CHECK-NEXT: mov.h v0[7], v1[0]
177
178
; CHECK-NEXT: ret
178
179
%v.0 = insertelement <8 x half > <half undef , half undef , half undef , half undef , half undef , half undef , half undef , half 0 .0 >, half %a , i32 0
179
180
%v.1 = insertelement <8 x half > %v.0 , half %a , i32 1
@@ -278,8 +279,9 @@ define <4 x float> @test_insert_3_f32_undef_zero_vector(float %a) {
278
279
; CHECK-LABEL: test_insert_3_f32_undef_zero_vector:
279
280
; CHECK: // %bb.0:
280
281
; CHECK-NEXT: // kill: def $s0 killed $s0 def $q0
282
+ ; CHECK-NEXT: movi d1, #0000000000000000
281
283
; CHECK-NEXT: dup.4s v0, v0[0]
282
- ; CHECK-NEXT: mov.s v0[3], wzr
284
+ ; CHECK-NEXT: mov.s v0[3], v1[0]
283
285
; CHECK-NEXT: ret
284
286
%v.0 = insertelement <4 x float > <float undef , float undef , float undef , float 0 .000000e+00 >, float %a , i32 0
285
287
%v.1 = insertelement <4 x float > %v.0 , float %a , i32 1
@@ -347,12 +349,12 @@ define <8 x i16> @test_insert_v8i16_i16_zero(<8 x i16> %a) {
347
349
ret <8 x i16 > %v.0
348
350
}
349
351
350
- ; TODO: This should jsut be a mov.s v0[3], wzr
351
352
define <4 x half > @test_insert_v4f16_f16_zero (<4 x half > %a ) {
352
353
; CHECK-LABEL: test_insert_v4f16_f16_zero:
353
354
; CHECK: // %bb.0:
355
+ ; CHECK-NEXT: movi d1, #0000000000000000
354
356
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
355
- ; CHECK-NEXT: mov.h v0[0], wzr
357
+ ; CHECK-NEXT: mov.h v0[0], v1[0]
356
358
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
357
359
; CHECK-NEXT: ret
358
360
%v.0 = insertelement <4 x half > %a , half 0 .000000e+00 , i32 0
@@ -362,7 +364,8 @@ define <4 x half> @test_insert_v4f16_f16_zero(<4 x half> %a) {
362
364
define <8 x half > @test_insert_v8f16_f16_zero (<8 x half > %a ) {
363
365
; CHECK-LABEL: test_insert_v8f16_f16_zero:
364
366
; CHECK: // %bb.0:
365
- ; CHECK-NEXT: mov.h v0[6], wzr
367
+ ; CHECK-NEXT: movi d1, #0000000000000000
368
+ ; CHECK-NEXT: mov.h v0[6], v1[0]
366
369
; CHECK-NEXT: ret
367
370
%v.0 = insertelement <8 x half > %a , half 0 .000000e+00 , i32 6
368
371
ret <8 x half > %v.0
@@ -371,8 +374,9 @@ define <8 x half> @test_insert_v8f16_f16_zero(<8 x half> %a) {
371
374
define <2 x float > @test_insert_v2f32_f32_zero (<2 x float > %a ) {
372
375
; CHECK-LABEL: test_insert_v2f32_f32_zero:
373
376
; CHECK: // %bb.0:
377
+ ; CHECK-NEXT: movi d1, #0000000000000000
374
378
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
375
- ; CHECK-NEXT: mov.s v0[0], wzr
379
+ ; CHECK-NEXT: mov.s v0[0], v1[0]
376
380
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
377
381
; CHECK-NEXT: ret
378
382
%v.0 = insertelement <2 x float > %a , float 0 .000000e+00 , i32 0
@@ -382,7 +386,8 @@ define <2 x float> @test_insert_v2f32_f32_zero(<2 x float> %a) {
382
386
define <4 x float > @test_insert_v4f32_f32_zero (<4 x float > %a ) {
383
387
; CHECK-LABEL: test_insert_v4f32_f32_zero:
384
388
; CHECK: // %bb.0:
385
- ; CHECK-NEXT: mov.s v0[3], wzr
389
+ ; CHECK-NEXT: movi d1, #0000000000000000
390
+ ; CHECK-NEXT: mov.s v0[3], v1[0]
386
391
; CHECK-NEXT: ret
387
392
%v.0 = insertelement <4 x float > %a , float 0 .000000e+00 , i32 3
388
393
ret <4 x float > %v.0
@@ -391,8 +396,60 @@ define <4 x float> @test_insert_v4f32_f32_zero(<4 x float> %a) {
391
396
define <2 x double > @test_insert_v2f64_f64_zero (<2 x double > %a ) {
392
397
; CHECK-LABEL: test_insert_v2f64_f64_zero:
393
398
; CHECK: // %bb.0:
399
+ ; CHECK-NEXT: movi d1, #0000000000000000
400
+ ; CHECK-NEXT: mov.d v0[1], v1[0]
401
+ ; CHECK-NEXT: ret
402
+ %v.0 = insertelement <2 x double > %a , double 0 .000000e+00 , i32 1
403
+ ret <2 x double > %v.0
404
+ }
405
+
406
+ define <4 x half > @test_insert_v4f16_f16_zero_wzr (<4 x half > %a ) #1 {
407
+ ; CHECK-LABEL: test_insert_v4f16_f16_zero_wzr:
408
+ ; CHECK: // %bb.0:
409
+ ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
410
+ ; CHECK-NEXT: mov.h v0[0], wzr
411
+ ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
412
+ ; CHECK-NEXT: ret
413
+ %v.0 = insertelement <4 x half > %a , half 0 .000000e+00 , i32 0
414
+ ret <4 x half > %v.0
415
+ }
416
+
417
+ define <8 x half > @test_insert_v8f16_f16_zero_wzr (<8 x half > %a ) #1 {
418
+ ; CHECK-LABEL: test_insert_v8f16_f16_zero_wzr:
419
+ ; CHECK: // %bb.0:
420
+ ; CHECK-NEXT: mov.h v0[6], wzr
421
+ ; CHECK-NEXT: ret
422
+ %v.0 = insertelement <8 x half > %a , half 0 .000000e+00 , i32 6
423
+ ret <8 x half > %v.0
424
+ }
425
+
426
+ define <2 x float > @test_insert_v2f32_f32_zero_wzr (<2 x float > %a ) #1 {
427
+ ; CHECK-LABEL: test_insert_v2f32_f32_zero_wzr:
428
+ ; CHECK: // %bb.0:
429
+ ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
430
+ ; CHECK-NEXT: mov.s v0[0], wzr
431
+ ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
432
+ ; CHECK-NEXT: ret
433
+ %v.0 = insertelement <2 x float > %a , float 0 .000000e+00 , i32 0
434
+ ret <2 x float > %v.0
435
+ }
436
+
437
+ define <4 x float > @test_insert_v4f32_f32_zero_wzr (<4 x float > %a ) #1 {
438
+ ; CHECK-LABEL: test_insert_v4f32_f32_zero_wzr:
439
+ ; CHECK: // %bb.0:
440
+ ; CHECK-NEXT: mov.s v0[3], wzr
441
+ ; CHECK-NEXT: ret
442
+ %v.0 = insertelement <4 x float > %a , float 0 .000000e+00 , i32 3
443
+ ret <4 x float > %v.0
444
+ }
445
+
446
+ define <2 x double > @test_insert_v2f64_f64_zero_xzr (<2 x double > %a ) #1 {
447
+ ; CHECK-LABEL: test_insert_v2f64_f64_zero_xzr:
448
+ ; CHECK: // %bb.0:
394
449
; CHECK-NEXT: mov.d v0[1], xzr
395
450
; CHECK-NEXT: ret
396
451
%v.0 = insertelement <2 x double > %a , double 0 .000000e+00 , i32 1
397
452
ret <2 x double > %v.0
398
453
}
454
+
455
+ attributes #1 = {"tune-cpu" ="cortex-a55" }
0 commit comments