@@ -400,33 +400,21 @@ declare void @llvm.masked.scatter.nxv1i64.nxv1p0(<vscale x 1 x i64>, <vscale x 1
400
400
declare <vscale x 1 x i64 > @llvm.masked.gather.nxv1i64.nxv1p0 (<vscale x 1 x ptr >, i32 , <vscale x 1 x i1 >, <vscale x 1 x i64 >)
401
401
402
402
403
- ; TODO: Make the step loop variant to reflect what the loop vectorizer will emit
404
- ; in an EVL tail folding configuration.
405
-
406
403
define <vscale x 1 x i64 > @vp_gather (ptr %a , i32 %len ) {
407
404
; CHECK-LABEL: @vp_gather(
408
405
; CHECK-NEXT: vector.ph:
409
406
; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[LEN:%.*]] to i64
410
407
; CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64()
411
- ; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 1 x i64> @llvm.stepvector.nxv1i64()
412
- ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 1 x i64> poison, i64 [[TMP0]], i64 0
413
- ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 1 x i64> [[DOTSPLATINSERT]], <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
414
408
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
415
409
; CHECK: vector.body:
416
410
; CHECK-NEXT: [[VEC_IND_SCALAR:%.*]] = phi i64 [ 0, [[VECTOR_PH:%.*]] ], [ [[VEC_IND_NEXT_SCALAR:%.*]], [[VECTOR_BODY]] ]
417
411
; CHECK-NEXT: [[VEC_IND_SCALAR1:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT_SCALAR1:%.*]], [[VECTOR_BODY]] ]
418
- ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <vscale x 1 x i64> [ [[TMP1]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
419
412
; CHECK-NEXT: [[ACCUM:%.*]] = phi <vscale x 1 x i64> [ zeroinitializer, [[VECTOR_PH]] ], [ [[ACCUM_NEXT:%.*]], [[VECTOR_BODY]] ]
420
- ; CHECK-NEXT: [[ELEMS:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[VEC_IND_SCALAR]]
421
- ; CHECK-NEXT: [[EVL:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[ELEMS]], i32 1, i1 true)
422
- ; CHECK-NEXT: [[ODD:%.*]] = and <vscale x 1 x i64> [[VEC_IND]], splat (i64 1)
423
- ; CHECK-NEXT: [[MASK:%.*]] = icmp ne <vscale x 1 x i64> [[ODD]], zeroinitializer
424
413
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr [[STRUCT_FOO:%.*]], ptr [[A:%.*]], i64 [[VEC_IND_SCALAR1]], i32 3
425
- ; CHECK-NEXT: [[GATHER:%.*]] = call <vscale x 1 x i64> @llvm.experimental.vp.strided.load.nxv1i64.p0.i64(ptr [[TMP2]], i64 16, <vscale x 1 x i1> [[MASK]] , i32 [[EVL]] )
414
+ ; CHECK-NEXT: [[GATHER:%.*]] = call <vscale x 1 x i64> @llvm.experimental.vp.strided.load.nxv1i64.p0.i64(ptr [[TMP2]], i64 16, <vscale x 1 x i1> splat (i1 true) , i32 42 )
426
415
; CHECK-NEXT: [[ACCUM_NEXT]] = add <vscale x 1 x i64> [[ACCUM]], [[GATHER]]
427
416
; CHECK-NEXT: [[VEC_IND_NEXT_SCALAR]] = add nuw i64 [[VEC_IND_SCALAR]], [[TMP0]]
428
417
; CHECK-NEXT: [[VEC_IND_NEXT_SCALAR1]] = add i64 [[VEC_IND_SCALAR1]], [[TMP0]]
429
- ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 1 x i64> [[VEC_IND]], [[DOTSPLAT]]
430
418
; CHECK-NEXT: [[TMP3:%.*]] = icmp ne i64 [[VEC_IND_NEXT_SCALAR]], [[WIDE_TRIP_COUNT]]
431
419
; CHECK-NEXT: br i1 [[TMP3]], label [[FOR_COND_CLEANUP:%.*]], label [[VECTOR_BODY]]
432
420
; CHECK: for.cond.cleanup:
@@ -444,15 +432,8 @@ vector.body: ; preds = %vector.body, %vecto
444
432
%index = phi i64 [ 0 , %vector.ph ], [ %index.next , %vector.body ]
445
433
%vec.ind = phi <vscale x 1 x i64 > [ %1 , %vector.ph ], [ %vec.ind.next , %vector.body ]
446
434
%accum = phi <vscale x 1 x i64 > [ zeroinitializer , %vector.ph ], [ %accum.next , %vector.body ]
447
-
448
- %elems = sub i64 %wide.trip.count , %index
449
- %evl = call i32 @llvm.experimental.get.vector.length.i64 (i64 %elems , i32 1 , i1 true )
450
-
451
- %odd = and <vscale x 1 x i64 > %vec.ind , splat (i64 1 )
452
- %mask = icmp ne <vscale x 1 x i64 > %odd , splat (i64 0 )
453
-
454
435
%2 = getelementptr inbounds %struct.foo , ptr %a , <vscale x 1 x i64 > %vec.ind , i32 3
455
- %gather = call <vscale x 1 x i64 > @llvm.vp.gather (<vscale x 1 x ptr > %2 , <vscale x 1 x i1 > %mask , i32 %evl )
436
+ %gather = call <vscale x 1 x i64 > @llvm.vp.gather (<vscale x 1 x ptr > %2 , <vscale x 1 x i1 > splat ( i1 true ) , i32 42 )
456
437
%accum.next = add <vscale x 1 x i64 > %accum , %gather
457
438
%index.next = add nuw i64 %index , %0
458
439
%vec.ind.next = add <vscale x 1 x i64 > %vec.ind , %.splat
@@ -463,31 +444,19 @@ for.cond.cleanup: ; preds = %vector.body
463
444
ret <vscale x 1 x i64 > %accum.next
464
445
}
465
446
466
- ; TODO: Make the step loop variant to reflect what the loop vectorizer will emit
467
- ; in an EVL tail folding configuration.
468
-
469
447
define void @vp_scatter (ptr %a , i32 %len ) {
470
448
; CHECK-LABEL: @vp_scatter(
471
449
; CHECK-NEXT: vector.ph:
472
450
; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[LEN:%.*]] to i64
473
451
; CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64()
474
- ; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 1 x i64> @llvm.stepvector.nxv1i64()
475
- ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 1 x i64> poison, i64 [[TMP0]], i64 0
476
- ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 1 x i64> [[DOTSPLATINSERT]], <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
477
452
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
478
453
; CHECK: vector.body:
479
454
; CHECK-NEXT: [[VEC_IND_SCALAR:%.*]] = phi i64 [ 0, [[VECTOR_PH:%.*]] ], [ [[VEC_IND_NEXT_SCALAR:%.*]], [[VECTOR_BODY]] ]
480
455
; CHECK-NEXT: [[VEC_IND_SCALAR1:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT_SCALAR1:%.*]], [[VECTOR_BODY]] ]
481
- ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <vscale x 1 x i64> [ [[TMP1]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
482
- ; CHECK-NEXT: [[ELEMS:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[VEC_IND_SCALAR]]
483
- ; CHECK-NEXT: [[EVL:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[ELEMS]], i32 1, i1 true)
484
- ; CHECK-NEXT: [[ODD:%.*]] = and <vscale x 1 x i64> [[VEC_IND]], splat (i64 1)
485
- ; CHECK-NEXT: [[MASK:%.*]] = icmp ne <vscale x 1 x i64> [[ODD]], zeroinitializer
486
456
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr [[STRUCT_FOO:%.*]], ptr [[A:%.*]], i64 [[VEC_IND_SCALAR1]], i32 3
487
- ; CHECK-NEXT: call void @llvm.experimental.vp.strided.store.nxv1i64.p0.i64(<vscale x 1 x i64> zeroinitializer, ptr [[TMP2]], i64 16, <vscale x 1 x i1> [[MASK]] , i32 [[EVL]] )
457
+ ; CHECK-NEXT: call void @llvm.experimental.vp.strided.store.nxv1i64.p0.i64(<vscale x 1 x i64> zeroinitializer, ptr [[TMP2]], i64 16, <vscale x 1 x i1> splat (i1 true) , i32 42 )
488
458
; CHECK-NEXT: [[VEC_IND_NEXT_SCALAR]] = add nuw i64 [[VEC_IND_SCALAR]], [[TMP0]]
489
459
; CHECK-NEXT: [[VEC_IND_NEXT_SCALAR1]] = add i64 [[VEC_IND_SCALAR1]], [[TMP0]]
490
- ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 1 x i64> [[VEC_IND]], [[DOTSPLAT]]
491
460
; CHECK-NEXT: [[TMP3:%.*]] = icmp ne i64 [[VEC_IND_NEXT_SCALAR]], [[WIDE_TRIP_COUNT]]
492
461
; CHECK-NEXT: br i1 [[TMP3]], label [[FOR_COND_CLEANUP:%.*]], label [[VECTOR_BODY]]
493
462
; CHECK: for.cond.cleanup:
@@ -504,17 +473,120 @@ vector.ph:
504
473
vector.body: ; preds = %vector.body, %vector.ph
505
474
%index = phi i64 [ 0 , %vector.ph ], [ %index.next , %vector.body ]
506
475
%vec.ind = phi <vscale x 1 x i64 > [ %1 , %vector.ph ], [ %vec.ind.next , %vector.body ]
476
+ %2 = getelementptr inbounds %struct.foo , ptr %a , <vscale x 1 x i64 > %vec.ind , i32 3
477
+ tail call void @llvm.vp.scatter (<vscale x 1 x i64 > zeroinitializer , <vscale x 1 x ptr > %2 , <vscale x 1 x i1 > splat (i1 true ), i32 42 )
478
+ %index.next = add nuw i64 %index , %0
479
+ %vec.ind.next = add <vscale x 1 x i64 > %vec.ind , %.splat
480
+ %3 = icmp ne i64 %index.next , %wide.trip.count
481
+ br i1 %3 , label %for.cond.cleanup , label %vector.body
482
+
483
+ for.cond.cleanup: ; preds = %vector.body
484
+ ret void
485
+ }
486
+
487
+ ; Test that reflects what the loop vectorizer will generate for an EVL tail
488
+ ; folded loop
489
+
490
+ define <vscale x 1 x i64 > @evl_gather (ptr %a , i32 %len ) {
491
+ ; CHECK-LABEL: @evl_gather(
492
+ ; CHECK-NEXT: vector.ph:
493
+ ; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[LEN:%.*]] to i64
494
+ ; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 1 x i64> @llvm.stepvector.nxv1i64()
495
+ ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
496
+ ; CHECK: vector.body:
497
+ ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
498
+ ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <vscale x 1 x i64> [ [[TMP1]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
499
+ ; CHECK-NEXT: [[ACCUM:%.*]] = phi <vscale x 1 x i64> [ zeroinitializer, [[VECTOR_PH]] ], [ [[ACCUM_NEXT:%.*]], [[VECTOR_BODY]] ]
500
+ ; CHECK-NEXT: [[ELEMS:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[INDEX]]
501
+ ; CHECK-NEXT: [[EVL:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[ELEMS]], i32 1, i1 true)
502
+ ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_FOO:%.*]], ptr [[A:%.*]], <vscale x 1 x i64> [[VEC_IND]], i32 3
503
+ ; CHECK-NEXT: [[GATHER:%.*]] = call <vscale x 1 x i64> @llvm.vp.gather.nxv1i64.nxv1p0(<vscale x 1 x ptr> [[TMP2]], <vscale x 1 x i1> splat (i1 true), i32 [[EVL]])
504
+ ; CHECK-NEXT: [[ACCUM_NEXT]] = add <vscale x 1 x i64> [[ACCUM]], [[GATHER]]
505
+ ; CHECK-NEXT: [[EVL_ZEXT:%.*]] = zext i32 [[EVL]] to i64
506
+ ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[EVL_ZEXT]]
507
+ ; CHECK-NEXT: [[EVL_SPLATINSERT:%.*]] = insertelement <vscale x 1 x i64> poison, i64 [[EVL_ZEXT]], i64 0
508
+ ; CHECK-NEXT: [[EVL_SPLAT:%.*]] = shufflevector <vscale x 1 x i64> [[EVL_SPLATINSERT]], <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
509
+ ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 1 x i64> [[VEC_IND]], [[EVL_SPLAT]]
510
+ ; CHECK-NEXT: [[TMP3:%.*]] = icmp ne i64 [[INDEX_NEXT]], [[WIDE_TRIP_COUNT]]
511
+ ; CHECK-NEXT: br i1 [[TMP3]], label [[FOR_COND_CLEANUP:%.*]], label [[VECTOR_BODY]]
512
+ ; CHECK: for.cond.cleanup:
513
+ ; CHECK-NEXT: ret <vscale x 1 x i64> [[ACCUM_NEXT]]
514
+ ;
515
+ vector.ph:
516
+ %wide.trip.count = zext i32 %len to i64
517
+ %1 = tail call <vscale x 1 x i64 > @llvm.stepvector.nxv1i64 ()
518
+ br label %vector.body
519
+
520
+ vector.body: ; preds = %vector.body, %vector.ph
521
+ %index = phi i64 [ 0 , %vector.ph ], [ %index.next , %vector.body ]
522
+ %vec.ind = phi <vscale x 1 x i64 > [ %1 , %vector.ph ], [ %vec.ind.next , %vector.body ]
523
+ %accum = phi <vscale x 1 x i64 > [ zeroinitializer , %vector.ph ], [ %accum.next , %vector.body ]
507
524
508
525
%elems = sub i64 %wide.trip.count , %index
509
526
%evl = call i32 @llvm.experimental.get.vector.length.i64 (i64 %elems , i32 1 , i1 true )
510
527
511
- %odd = and <vscale x 1 x i64 > %vec.ind , splat (i64 1 )
512
- %mask = icmp ne <vscale x 1 x i64 > %odd , splat (i64 0 )
528
+ %2 = getelementptr inbounds %struct.foo , ptr %a , <vscale x 1 x i64 > %vec.ind , i32 3
529
+ %gather = call <vscale x 1 x i64 > @llvm.vp.gather (<vscale x 1 x ptr > %2 , <vscale x 1 x i1 > splat (i1 true ), i32 %evl )
530
+ %accum.next = add <vscale x 1 x i64 > %accum , %gather
531
+
532
+ %evl.zext = zext i32 %evl to i64
533
+ %index.next = add nuw i64 %index , %evl.zext
534
+ %evl.splatinsert = insertelement <vscale x 1 x i64 > poison, i64 %evl.zext , i64 0
535
+ %evl.splat = shufflevector <vscale x 1 x i64 > %evl.splatinsert , <vscale x 1 x i64 > poison, <vscale x 1 x i32 > zeroinitializer
536
+ %vec.ind.next = add <vscale x 1 x i64 > %vec.ind , %evl.splat
537
+ %3 = icmp ne i64 %index.next , %wide.trip.count
538
+ br i1 %3 , label %for.cond.cleanup , label %vector.body
539
+
540
+ for.cond.cleanup: ; preds = %vector.body
541
+ ret <vscale x 1 x i64 > %accum.next
542
+ }
543
+
544
+ ; Test that reflects what the loop vectorizer will generate for an EVL tail
545
+ ; folded loop
546
+
547
+ define void @evl_scatter (ptr %a , i32 %len ) {
548
+ ; CHECK-LABEL: @evl_scatter(
549
+ ; CHECK-NEXT: vector.ph:
550
+ ; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[LEN:%.*]] to i64
551
+ ; CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 1 x i64> @llvm.stepvector.nxv1i64()
552
+ ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
553
+ ; CHECK: vector.body:
554
+ ; CHECK-NEXT: [[VEC_IND_SCALAR:%.*]] = phi i64 [ 0, [[VECTOR_PH:%.*]] ], [ [[VEC_IND_NEXT_SCALAR:%.*]], [[VECTOR_BODY]] ]
555
+ ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <vscale x 1 x i64> [ [[TMP0]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
556
+ ; CHECK-NEXT: [[ELEMS:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[VEC_IND_SCALAR]]
557
+ ; CHECK-NEXT: [[EVL:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[ELEMS]], i32 1, i1 true)
558
+ ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_FOO:%.*]], ptr [[A:%.*]], <vscale x 1 x i64> [[VEC_IND]], i32 3
559
+ ; CHECK-NEXT: tail call void @llvm.vp.scatter.nxv1i64.nxv1p0(<vscale x 1 x i64> zeroinitializer, <vscale x 1 x ptr> [[TMP1]], <vscale x 1 x i1> splat (i1 true), i32 [[EVL]])
560
+ ; CHECK-NEXT: [[EVL_ZEXT:%.*]] = zext i32 [[EVL]] to i64
561
+ ; CHECK-NEXT: [[VEC_IND_NEXT_SCALAR]] = add nuw i64 [[VEC_IND_SCALAR]], [[EVL_ZEXT]]
562
+ ; CHECK-NEXT: [[EVL_SPLATINSERT:%.*]] = insertelement <vscale x 1 x i64> poison, i64 [[EVL_ZEXT]], i64 0
563
+ ; CHECK-NEXT: [[EVL_SPLAT:%.*]] = shufflevector <vscale x 1 x i64> [[EVL_SPLATINSERT]], <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
564
+ ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 1 x i64> [[VEC_IND]], [[EVL_SPLAT]]
565
+ ; CHECK-NEXT: [[TMP3:%.*]] = icmp ne i64 [[VEC_IND_NEXT_SCALAR]], [[WIDE_TRIP_COUNT]]
566
+ ; CHECK-NEXT: br i1 [[TMP3]], label [[FOR_COND_CLEANUP:%.*]], label [[VECTOR_BODY]]
567
+ ; CHECK: for.cond.cleanup:
568
+ ; CHECK-NEXT: ret void
569
+ ;
570
+ vector.ph:
571
+ %wide.trip.count = zext i32 %len to i64
572
+ %1 = tail call <vscale x 1 x i64 > @llvm.stepvector.nxv1i64 ()
573
+ br label %vector.body
574
+
575
+ vector.body: ; preds = %vector.body, %vector.ph
576
+ %index = phi i64 [ 0 , %vector.ph ], [ %index.next , %vector.body ]
577
+ %vec.ind = phi <vscale x 1 x i64 > [ %1 , %vector.ph ], [ %vec.ind.next , %vector.body ]
578
+
579
+ %elems = sub i64 %wide.trip.count , %index
580
+ %evl = call i32 @llvm.experimental.get.vector.length.i64 (i64 %elems , i32 1 , i1 true )
513
581
514
582
%2 = getelementptr inbounds %struct.foo , ptr %a , <vscale x 1 x i64 > %vec.ind , i32 3
515
- tail call void @llvm.vp.scatter (<vscale x 1 x i64 > zeroinitializer , <vscale x 1 x ptr > %2 , <vscale x 1 x i1 > %mask , i32 %evl )
516
- %index.next = add nuw i64 %index , %0
517
- %vec.ind.next = add <vscale x 1 x i64 > %vec.ind , %.splat
583
+ tail call void @llvm.vp.scatter (<vscale x 1 x i64 > zeroinitializer , <vscale x 1 x ptr > %2 , <vscale x 1 x i1 > splat (i1 true ), i32 %evl )
584
+
585
+ %evl.zext = zext i32 %evl to i64
586
+ %index.next = add nuw i64 %index , %evl.zext
587
+ %evl.splatinsert = insertelement <vscale x 1 x i64 > poison, i64 %evl.zext , i64 0
588
+ %evl.splat = shufflevector <vscale x 1 x i64 > %evl.splatinsert , <vscale x 1 x i64 > poison, <vscale x 1 x i32 > zeroinitializer
589
+ %vec.ind.next = add <vscale x 1 x i64 > %vec.ind , %evl.splat
518
590
%3 = icmp ne i64 %index.next , %wide.trip.count
519
591
br i1 %3 , label %for.cond.cleanup , label %vector.body
520
592
0 commit comments