Skip to content

Commit 02403f4

Browse files
committed
[RISCV] Split strided-load-store.ll tests into EVL and VP. NFC
None of the changes in #122232 or the upcoming #122244 are specific to the EVL, so split out the EVL tail-folded loops into separate "integration tests" that reflect the output of the loop vectorizer.
1 parent 273a94b commit 02403f4

File tree

1 file changed

+111
-39
lines changed

1 file changed

+111
-39
lines changed

llvm/test/CodeGen/RISCV/rvv/strided-load-store.ll

+111-39
Original file line numberDiff line numberDiff line change
@@ -400,33 +400,21 @@ declare void @llvm.masked.scatter.nxv1i64.nxv1p0(<vscale x 1 x i64>, <vscale x 1
400400
declare <vscale x 1 x i64> @llvm.masked.gather.nxv1i64.nxv1p0(<vscale x 1 x ptr>, i32, <vscale x 1 x i1>, <vscale x 1 x i64>)
401401

402402

403-
; TODO: Make the step loop variant to reflect what the loop vectorizer will emit
404-
; in an EVL tail folding configuration.
405-
406403
define <vscale x 1 x i64> @vp_gather(ptr %a, i32 %len) {
407404
; CHECK-LABEL: @vp_gather(
408405
; CHECK-NEXT: vector.ph:
409406
; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[LEN:%.*]] to i64
410407
; CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64()
411-
; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 1 x i64> @llvm.stepvector.nxv1i64()
412-
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 1 x i64> poison, i64 [[TMP0]], i64 0
413-
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 1 x i64> [[DOTSPLATINSERT]], <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
414408
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
415409
; CHECK: vector.body:
416410
; CHECK-NEXT: [[VEC_IND_SCALAR:%.*]] = phi i64 [ 0, [[VECTOR_PH:%.*]] ], [ [[VEC_IND_NEXT_SCALAR:%.*]], [[VECTOR_BODY]] ]
417411
; CHECK-NEXT: [[VEC_IND_SCALAR1:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT_SCALAR1:%.*]], [[VECTOR_BODY]] ]
418-
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <vscale x 1 x i64> [ [[TMP1]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
419412
; CHECK-NEXT: [[ACCUM:%.*]] = phi <vscale x 1 x i64> [ zeroinitializer, [[VECTOR_PH]] ], [ [[ACCUM_NEXT:%.*]], [[VECTOR_BODY]] ]
420-
; CHECK-NEXT: [[ELEMS:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[VEC_IND_SCALAR]]
421-
; CHECK-NEXT: [[EVL:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[ELEMS]], i32 1, i1 true)
422-
; CHECK-NEXT: [[ODD:%.*]] = and <vscale x 1 x i64> [[VEC_IND]], splat (i64 1)
423-
; CHECK-NEXT: [[MASK:%.*]] = icmp ne <vscale x 1 x i64> [[ODD]], zeroinitializer
424413
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr [[STRUCT_FOO:%.*]], ptr [[A:%.*]], i64 [[VEC_IND_SCALAR1]], i32 3
425-
; CHECK-NEXT: [[GATHER:%.*]] = call <vscale x 1 x i64> @llvm.experimental.vp.strided.load.nxv1i64.p0.i64(ptr [[TMP2]], i64 16, <vscale x 1 x i1> [[MASK]], i32 [[EVL]])
414+
; CHECK-NEXT: [[GATHER:%.*]] = call <vscale x 1 x i64> @llvm.experimental.vp.strided.load.nxv1i64.p0.i64(ptr [[TMP2]], i64 16, <vscale x 1 x i1> splat (i1 true), i32 42)
426415
; CHECK-NEXT: [[ACCUM_NEXT]] = add <vscale x 1 x i64> [[ACCUM]], [[GATHER]]
427416
; CHECK-NEXT: [[VEC_IND_NEXT_SCALAR]] = add nuw i64 [[VEC_IND_SCALAR]], [[TMP0]]
428417
; CHECK-NEXT: [[VEC_IND_NEXT_SCALAR1]] = add i64 [[VEC_IND_SCALAR1]], [[TMP0]]
429-
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 1 x i64> [[VEC_IND]], [[DOTSPLAT]]
430418
; CHECK-NEXT: [[TMP3:%.*]] = icmp ne i64 [[VEC_IND_NEXT_SCALAR]], [[WIDE_TRIP_COUNT]]
431419
; CHECK-NEXT: br i1 [[TMP3]], label [[FOR_COND_CLEANUP:%.*]], label [[VECTOR_BODY]]
432420
; CHECK: for.cond.cleanup:
@@ -444,15 +432,8 @@ vector.body: ; preds = %vector.body, %vecto
444432
%index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
445433
%vec.ind = phi <vscale x 1 x i64> [ %1, %vector.ph ], [ %vec.ind.next, %vector.body ]
446434
%accum = phi <vscale x 1 x i64> [ zeroinitializer, %vector.ph ], [ %accum.next, %vector.body ]
447-
448-
%elems = sub i64 %wide.trip.count, %index
449-
%evl = call i32 @llvm.experimental.get.vector.length.i64(i64 %elems, i32 1, i1 true)
450-
451-
%odd = and <vscale x 1 x i64> %vec.ind, splat (i64 1)
452-
%mask = icmp ne <vscale x 1 x i64> %odd, splat (i64 0)
453-
454435
%2 = getelementptr inbounds %struct.foo, ptr %a, <vscale x 1 x i64> %vec.ind, i32 3
455-
%gather = call <vscale x 1 x i64> @llvm.vp.gather(<vscale x 1 x ptr> %2, <vscale x 1 x i1> %mask, i32 %evl)
436+
%gather = call <vscale x 1 x i64> @llvm.vp.gather(<vscale x 1 x ptr> %2, <vscale x 1 x i1> splat (i1 true), i32 42)
456437
%accum.next = add <vscale x 1 x i64> %accum, %gather
457438
%index.next = add nuw i64 %index, %0
458439
%vec.ind.next = add <vscale x 1 x i64> %vec.ind, %.splat
@@ -463,31 +444,19 @@ for.cond.cleanup: ; preds = %vector.body
463444
ret <vscale x 1 x i64> %accum.next
464445
}
465446

466-
; TODO: Make the step loop variant to reflect what the loop vectorizer will emit
467-
; in an EVL tail folding configuration.
468-
469447
define void @vp_scatter(ptr %a, i32 %len) {
470448
; CHECK-LABEL: @vp_scatter(
471449
; CHECK-NEXT: vector.ph:
472450
; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[LEN:%.*]] to i64
473451
; CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64()
474-
; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 1 x i64> @llvm.stepvector.nxv1i64()
475-
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 1 x i64> poison, i64 [[TMP0]], i64 0
476-
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 1 x i64> [[DOTSPLATINSERT]], <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
477452
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
478453
; CHECK: vector.body:
479454
; CHECK-NEXT: [[VEC_IND_SCALAR:%.*]] = phi i64 [ 0, [[VECTOR_PH:%.*]] ], [ [[VEC_IND_NEXT_SCALAR:%.*]], [[VECTOR_BODY]] ]
480455
; CHECK-NEXT: [[VEC_IND_SCALAR1:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT_SCALAR1:%.*]], [[VECTOR_BODY]] ]
481-
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <vscale x 1 x i64> [ [[TMP1]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
482-
; CHECK-NEXT: [[ELEMS:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[VEC_IND_SCALAR]]
483-
; CHECK-NEXT: [[EVL:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[ELEMS]], i32 1, i1 true)
484-
; CHECK-NEXT: [[ODD:%.*]] = and <vscale x 1 x i64> [[VEC_IND]], splat (i64 1)
485-
; CHECK-NEXT: [[MASK:%.*]] = icmp ne <vscale x 1 x i64> [[ODD]], zeroinitializer
486456
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr [[STRUCT_FOO:%.*]], ptr [[A:%.*]], i64 [[VEC_IND_SCALAR1]], i32 3
487-
; CHECK-NEXT: call void @llvm.experimental.vp.strided.store.nxv1i64.p0.i64(<vscale x 1 x i64> zeroinitializer, ptr [[TMP2]], i64 16, <vscale x 1 x i1> [[MASK]], i32 [[EVL]])
457+
; CHECK-NEXT: call void @llvm.experimental.vp.strided.store.nxv1i64.p0.i64(<vscale x 1 x i64> zeroinitializer, ptr [[TMP2]], i64 16, <vscale x 1 x i1> splat (i1 true), i32 42)
488458
; CHECK-NEXT: [[VEC_IND_NEXT_SCALAR]] = add nuw i64 [[VEC_IND_SCALAR]], [[TMP0]]
489459
; CHECK-NEXT: [[VEC_IND_NEXT_SCALAR1]] = add i64 [[VEC_IND_SCALAR1]], [[TMP0]]
490-
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 1 x i64> [[VEC_IND]], [[DOTSPLAT]]
491460
; CHECK-NEXT: [[TMP3:%.*]] = icmp ne i64 [[VEC_IND_NEXT_SCALAR]], [[WIDE_TRIP_COUNT]]
492461
; CHECK-NEXT: br i1 [[TMP3]], label [[FOR_COND_CLEANUP:%.*]], label [[VECTOR_BODY]]
493462
; CHECK: for.cond.cleanup:
@@ -504,17 +473,120 @@ vector.ph:
504473
vector.body: ; preds = %vector.body, %vector.ph
505474
%index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
506475
%vec.ind = phi <vscale x 1 x i64> [ %1, %vector.ph ], [ %vec.ind.next, %vector.body ]
476+
%2 = getelementptr inbounds %struct.foo, ptr %a, <vscale x 1 x i64> %vec.ind, i32 3
477+
tail call void @llvm.vp.scatter(<vscale x 1 x i64> zeroinitializer, <vscale x 1 x ptr> %2, <vscale x 1 x i1> splat (i1 true), i32 42)
478+
%index.next = add nuw i64 %index, %0
479+
%vec.ind.next = add <vscale x 1 x i64> %vec.ind, %.splat
480+
%3 = icmp ne i64 %index.next, %wide.trip.count
481+
br i1 %3, label %for.cond.cleanup, label %vector.body
482+
483+
for.cond.cleanup: ; preds = %vector.body
484+
ret void
485+
}
486+
487+
; Test that reflects what the loop vectorizer will generate for an EVL tail
488+
; folded loop
489+
490+
define <vscale x 1 x i64> @evl_gather(ptr %a, i32 %len) {
491+
; CHECK-LABEL: @evl_gather(
492+
; CHECK-NEXT: vector.ph:
493+
; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[LEN:%.*]] to i64
494+
; CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 1 x i64> @llvm.stepvector.nxv1i64()
495+
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
496+
; CHECK: vector.body:
497+
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
498+
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <vscale x 1 x i64> [ [[TMP1]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
499+
; CHECK-NEXT: [[ACCUM:%.*]] = phi <vscale x 1 x i64> [ zeroinitializer, [[VECTOR_PH]] ], [ [[ACCUM_NEXT:%.*]], [[VECTOR_BODY]] ]
500+
; CHECK-NEXT: [[ELEMS:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[INDEX]]
501+
; CHECK-NEXT: [[EVL:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[ELEMS]], i32 1, i1 true)
502+
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_FOO:%.*]], ptr [[A:%.*]], <vscale x 1 x i64> [[VEC_IND]], i32 3
503+
; CHECK-NEXT: [[GATHER:%.*]] = call <vscale x 1 x i64> @llvm.vp.gather.nxv1i64.nxv1p0(<vscale x 1 x ptr> [[TMP2]], <vscale x 1 x i1> splat (i1 true), i32 [[EVL]])
504+
; CHECK-NEXT: [[ACCUM_NEXT]] = add <vscale x 1 x i64> [[ACCUM]], [[GATHER]]
505+
; CHECK-NEXT: [[EVL_ZEXT:%.*]] = zext i32 [[EVL]] to i64
506+
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[EVL_ZEXT]]
507+
; CHECK-NEXT: [[EVL_SPLATINSERT:%.*]] = insertelement <vscale x 1 x i64> poison, i64 [[EVL_ZEXT]], i64 0
508+
; CHECK-NEXT: [[EVL_SPLAT:%.*]] = shufflevector <vscale x 1 x i64> [[EVL_SPLATINSERT]], <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
509+
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 1 x i64> [[VEC_IND]], [[EVL_SPLAT]]
510+
; CHECK-NEXT: [[TMP3:%.*]] = icmp ne i64 [[INDEX_NEXT]], [[WIDE_TRIP_COUNT]]
511+
; CHECK-NEXT: br i1 [[TMP3]], label [[FOR_COND_CLEANUP:%.*]], label [[VECTOR_BODY]]
512+
; CHECK: for.cond.cleanup:
513+
; CHECK-NEXT: ret <vscale x 1 x i64> [[ACCUM_NEXT]]
514+
;
515+
vector.ph:
516+
%wide.trip.count = zext i32 %len to i64
517+
%1 = tail call <vscale x 1 x i64> @llvm.stepvector.nxv1i64()
518+
br label %vector.body
519+
520+
vector.body: ; preds = %vector.body, %vector.ph
521+
%index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
522+
%vec.ind = phi <vscale x 1 x i64> [ %1, %vector.ph ], [ %vec.ind.next, %vector.body ]
523+
%accum = phi <vscale x 1 x i64> [ zeroinitializer, %vector.ph ], [ %accum.next, %vector.body ]
507524

508525
%elems = sub i64 %wide.trip.count, %index
509526
%evl = call i32 @llvm.experimental.get.vector.length.i64(i64 %elems, i32 1, i1 true)
510527

511-
%odd = and <vscale x 1 x i64> %vec.ind, splat (i64 1)
512-
%mask = icmp ne <vscale x 1 x i64> %odd, splat (i64 0)
528+
%2 = getelementptr inbounds %struct.foo, ptr %a, <vscale x 1 x i64> %vec.ind, i32 3
529+
%gather = call <vscale x 1 x i64> @llvm.vp.gather(<vscale x 1 x ptr> %2, <vscale x 1 x i1> splat (i1 true), i32 %evl)
530+
%accum.next = add <vscale x 1 x i64> %accum, %gather
531+
532+
%evl.zext = zext i32 %evl to i64
533+
%index.next = add nuw i64 %index, %evl.zext
534+
%evl.splatinsert = insertelement <vscale x 1 x i64> poison, i64 %evl.zext, i64 0
535+
%evl.splat = shufflevector <vscale x 1 x i64> %evl.splatinsert, <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
536+
%vec.ind.next = add <vscale x 1 x i64> %vec.ind, %evl.splat
537+
%3 = icmp ne i64 %index.next, %wide.trip.count
538+
br i1 %3, label %for.cond.cleanup, label %vector.body
539+
540+
for.cond.cleanup: ; preds = %vector.body
541+
ret <vscale x 1 x i64> %accum.next
542+
}
543+
544+
; Test that reflects what the loop vectorizer will generate for an EVL tail
545+
; folded loop
546+
547+
define void @evl_scatter(ptr %a, i32 %len) {
548+
; CHECK-LABEL: @evl_scatter(
549+
; CHECK-NEXT: vector.ph:
550+
; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[LEN:%.*]] to i64
551+
; CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 1 x i64> @llvm.stepvector.nxv1i64()
552+
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
553+
; CHECK: vector.body:
554+
; CHECK-NEXT: [[VEC_IND_SCALAR:%.*]] = phi i64 [ 0, [[VECTOR_PH:%.*]] ], [ [[VEC_IND_NEXT_SCALAR:%.*]], [[VECTOR_BODY]] ]
555+
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <vscale x 1 x i64> [ [[TMP0]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
556+
; CHECK-NEXT: [[ELEMS:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[VEC_IND_SCALAR]]
557+
; CHECK-NEXT: [[EVL:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[ELEMS]], i32 1, i1 true)
558+
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_FOO:%.*]], ptr [[A:%.*]], <vscale x 1 x i64> [[VEC_IND]], i32 3
559+
; CHECK-NEXT: tail call void @llvm.vp.scatter.nxv1i64.nxv1p0(<vscale x 1 x i64> zeroinitializer, <vscale x 1 x ptr> [[TMP1]], <vscale x 1 x i1> splat (i1 true), i32 [[EVL]])
560+
; CHECK-NEXT: [[EVL_ZEXT:%.*]] = zext i32 [[EVL]] to i64
561+
; CHECK-NEXT: [[VEC_IND_NEXT_SCALAR]] = add nuw i64 [[VEC_IND_SCALAR]], [[EVL_ZEXT]]
562+
; CHECK-NEXT: [[EVL_SPLATINSERT:%.*]] = insertelement <vscale x 1 x i64> poison, i64 [[EVL_ZEXT]], i64 0
563+
; CHECK-NEXT: [[EVL_SPLAT:%.*]] = shufflevector <vscale x 1 x i64> [[EVL_SPLATINSERT]], <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
564+
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 1 x i64> [[VEC_IND]], [[EVL_SPLAT]]
565+
; CHECK-NEXT: [[TMP3:%.*]] = icmp ne i64 [[VEC_IND_NEXT_SCALAR]], [[WIDE_TRIP_COUNT]]
566+
; CHECK-NEXT: br i1 [[TMP3]], label [[FOR_COND_CLEANUP:%.*]], label [[VECTOR_BODY]]
567+
; CHECK: for.cond.cleanup:
568+
; CHECK-NEXT: ret void
569+
;
570+
vector.ph:
571+
%wide.trip.count = zext i32 %len to i64
572+
%1 = tail call <vscale x 1 x i64> @llvm.stepvector.nxv1i64()
573+
br label %vector.body
574+
575+
vector.body: ; preds = %vector.body, %vector.ph
576+
%index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
577+
%vec.ind = phi <vscale x 1 x i64> [ %1, %vector.ph ], [ %vec.ind.next, %vector.body ]
578+
579+
%elems = sub i64 %wide.trip.count, %index
580+
%evl = call i32 @llvm.experimental.get.vector.length.i64(i64 %elems, i32 1, i1 true)
513581

514582
%2 = getelementptr inbounds %struct.foo, ptr %a, <vscale x 1 x i64> %vec.ind, i32 3
515-
tail call void @llvm.vp.scatter(<vscale x 1 x i64> zeroinitializer, <vscale x 1 x ptr> %2, <vscale x 1 x i1> %mask, i32 %evl)
516-
%index.next = add nuw i64 %index, %0
517-
%vec.ind.next = add <vscale x 1 x i64> %vec.ind, %.splat
583+
tail call void @llvm.vp.scatter(<vscale x 1 x i64> zeroinitializer, <vscale x 1 x ptr> %2, <vscale x 1 x i1> splat (i1 true), i32 %evl)
584+
585+
%evl.zext = zext i32 %evl to i64
586+
%index.next = add nuw i64 %index, %evl.zext
587+
%evl.splatinsert = insertelement <vscale x 1 x i64> poison, i64 %evl.zext, i64 0
588+
%evl.splat = shufflevector <vscale x 1 x i64> %evl.splatinsert, <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
589+
%vec.ind.next = add <vscale x 1 x i64> %vec.ind, %evl.splat
518590
%3 = icmp ne i64 %index.next, %wide.trip.count
519591
br i1 %3, label %for.cond.cleanup, label %vector.body
520592

0 commit comments

Comments
 (0)