From f9d47850f364e39f2e9f530c80aa0ae051386406 Mon Sep 17 00:00:00 2001 From: Luke Lau Date: Wed, 15 Jan 2025 19:55:43 +0800 Subject: [PATCH 1/2] Precommit test --- .../test/CodeGen/RISCV/rvv/vp-reverse-loop.ll | 47 +++++++++++++++++++ 1 file changed, 47 insertions(+) create mode 100644 llvm/test/CodeGen/RISCV/rvv/vp-reverse-loop.ll diff --git a/llvm/test/CodeGen/RISCV/rvv/vp-reverse-loop.ll b/llvm/test/CodeGen/RISCV/rvv/vp-reverse-loop.ll new file mode 100644 index 0000000000000..576d4196bf4d1 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vp-reverse-loop.ll @@ -0,0 +1,47 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -mtriple=riscv64 -mattr=+m,+v -verify-machineinstrs < %s | FileCheck %s + +define void @reverse(ptr %p) { +; CHECK-LABEL: reverse: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: li a1, 0 +; CHECK-NEXT: li a2, 1024 +; CHECK-NEXT: .LBB0_1: # %loop +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: sub a3, a2, a1 +; CHECK-NEXT: slli a4, a1, 3 +; CHECK-NEXT: vsetvli a3, a3, e64, m1, ta, ma +; CHECK-NEXT: add a4, a0, a4 +; CHECK-NEXT: vle64.v v8, (a4) +; CHECK-NEXT: addi a5, a3, -1 +; CHECK-NEXT: vid.v v9 +; CHECK-NEXT: vrsub.vx v9, v9, a5 +; CHECK-NEXT: vrgather.vv v10, v8, v9 +; CHECK-NEXT: add a1, a1, a3 +; CHECK-NEXT: vse64.v v10, (a4) +; CHECK-NEXT: bltu a1, a2, .LBB0_1 +; CHECK-NEXT: # %bb.2: # %exit +; CHECK-NEXT: ret +entry: + br label %loop +loop: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] + + %cnt = sub i64 1024, %iv + %evl = call i32 @llvm.experimental.get.vector.length(i64 %cnt, i32 1, i1 true) + + %p.gep = getelementptr i64, ptr %p, i64 %iv + %v = call @llvm.vp.load(ptr %p.gep, splat (i1 true), i32 %evl) + + %w = call @llvm.experimental.vp.reverse( %v, splat (i1 true), i32 %evl) + + call void @llvm.vp.store( %w, ptr %p.gep, splat (i1 true), i32 %evl) + + %evl.zext = zext i32 %evl to i64 + %iv.next = add i64 %iv, %evl.zext + %done = icmp uge i64 %iv.next, 1024 + br i1 %done, label %exit, label %loop + +exit: + ret void +} From 275076d09d2421a9f0db0f06e2cb0197be4a2979 Mon Sep 17 00:00:00 2001 From: Luke Lau Date: Wed, 15 Jan 2025 20:09:43 +0800 Subject: [PATCH 2/2] [RISCV] Don't use EVL/Mask for vid when lowering vp.reverse vp.reverse intrinsics are emitted by the loop vectorizer when EVL tail folding is enabled, and currently end up generating code like this: .LBB0_1: # %loop # =>This Inner Loop Header: Depth=1 sub a3, a2, a1 slli a4, a1, 3 vsetvli a3, a3, e64, m1, ta, ma add a4, a0, a4 vle64.v v8, (a4) addi a5, a3, -1 vid.v v9 vrsub.vx v9, v9, a5 vrgather.vv v10, v8, v9 add a1, a1, a3 vse64.v v10, (a4) bltu a1, a2, .LBB0_1 The vid.v needed for the indices is calculated every loop, but because its AVL is set to the EVL computed by get.vector.length within the loop it isn't hoisted out. This changes the AVL used to be VLMAX so it can be made loop invariant: vsetvli a3, zero, e64, m1, ta, ma vid.v v8 .LBB0_1: # %loop # =>This Inner Loop Header: Depth=1 sub a3, a2, a1 slli a4, a1, 3 vsetvli a3, a3, e64, m1, ta, ma add a4, a0, a4 vle64.v v9, (a4) addi a5, a3, -1 vrsub.vx v10, v8, a5 vrgather.vv v11, v9, v10 add a1, a1, a3 vse64.v v11, (a4) bltu a1, a2, .LBB0_1 Now that we have RISCVVLOptimizer, It shouldn't increase the number of vsetvlis for straight-line code. This also removes the mask which isn't needed, in case it also prevents hoisting. --- llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 6 +- .../rvv/vp-reverse-float-fixed-vectors.ll | 12 +-- .../CodeGen/RISCV/rvv/vp-reverse-float.ll | 48 +++++----- .../RISCV/rvv/vp-reverse-int-fixed-vectors.ll | 24 ++--- llvm/test/CodeGen/RISCV/rvv/vp-reverse-int.ll | 90 +++++++++---------- .../test/CodeGen/RISCV/rvv/vp-reverse-loop.ll | 13 +-- .../rvv/vp-reverse-mask-fixed-vectors.ll | 56 +++++++----- .../test/CodeGen/RISCV/rvv/vp-reverse-mask.ll | 84 +++++++++-------- 8 files changed, 180 insertions(+), 153 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index b25cb128bce9f..32a94f3dee22c 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -12430,7 +12430,11 @@ RISCVTargetLowering::lowerVPReverseExperimental(SDValue Op, GatherOpc = RISCVISD::VRGATHEREI16_VV_VL; } - SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, IndicesVT, Mask, EVL); + // Don't use EVL or Mask for vid so it can be hoisted out of loops. + auto [TrueMask, VLMAX] = + getDefaultScalableVLOps(IndicesVT, DL, DAG, Subtarget); + SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, IndicesVT, TrueMask, VLMAX); + SDValue VecLen = DAG.getNode(ISD::SUB, DL, XLenVT, EVL, DAG.getConstant(1, DL, XLenVT)); SDValue VecLenSplat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IndicesVT, diff --git a/llvm/test/CodeGen/RISCV/rvv/vp-reverse-float-fixed-vectors.ll b/llvm/test/CodeGen/RISCV/rvv/vp-reverse-float-fixed-vectors.ll index 136f6e7bc9990..887edafe9c88a 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vp-reverse-float-fixed-vectors.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vp-reverse-float-fixed-vectors.ll @@ -5,10 +5,10 @@ define <2 x double> @test_vp_reverse_v2f64_masked(<2 x double> %src, <2 x i1> %mask, i32 zeroext %evl) { ; CHECK-LABEL: test_vp_reverse_v2f64_masked: ; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, a0, -1 ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; CHECK-NEXT: vid.v v9, v0.t -; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: vrsub.vx v10, v9, a0, v0.t +; CHECK-NEXT: vid.v v9 +; CHECK-NEXT: vrsub.vx v10, v9, a1, v0.t ; CHECK-NEXT: vrgather.vv v9, v8, v10, v0.t ; CHECK-NEXT: vmv.v.v v8, v9 ; CHECK-NEXT: ret @@ -34,10 +34,10 @@ define <2 x double> @test_vp_reverse_v2f64(<2 x double> %src, i32 zeroext %evl) define <4 x float> @test_vp_reverse_v4f32_masked(<4 x float> %src, <4 x i1> %mask, i32 zeroext %evl) { ; CHECK-LABEL: test_vp_reverse_v4f32_masked: ; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, a0, -1 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma -; CHECK-NEXT: vid.v v9, v0.t -; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: vrsub.vx v10, v9, a0, v0.t +; CHECK-NEXT: vid.v v9 +; CHECK-NEXT: vrsub.vx v10, v9, a1, v0.t ; CHECK-NEXT: vrgather.vv v9, v8, v10, v0.t ; CHECK-NEXT: vmv.v.v v8, v9 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/vp-reverse-float.ll b/llvm/test/CodeGen/RISCV/rvv/vp-reverse-float.ll index b235990ab5dd0..194eb222be01f 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vp-reverse-float.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vp-reverse-float.ll @@ -4,10 +4,10 @@ define @test_vp_reverse_nxv1f64_masked( %src, %mask, i32 zeroext %evl) { ; CHECK-LABEL: test_vp_reverse_nxv1f64_masked: ; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, a0, -1 ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; CHECK-NEXT: vid.v v9, v0.t -; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: vrsub.vx v10, v9, a0, v0.t +; CHECK-NEXT: vid.v v9 +; CHECK-NEXT: vrsub.vx v10, v9, a1, v0.t ; CHECK-NEXT: vrgather.vv v9, v8, v10, v0.t ; CHECK-NEXT: vmv.v.v v8, v9 ; CHECK-NEXT: ret @@ -33,10 +33,10 @@ define @test_vp_reverse_nxv1f64( %src define @test_vp_reverse_nxv2f32_masked( %src, %mask, i32 zeroext %evl) { ; CHECK-LABEL: test_vp_reverse_nxv2f32_masked: ; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, a0, -1 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma -; CHECK-NEXT: vid.v v9, v0.t -; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: vrsub.vx v10, v9, a0, v0.t +; CHECK-NEXT: vid.v v9 +; CHECK-NEXT: vrsub.vx v10, v9, a1, v0.t ; CHECK-NEXT: vrgather.vv v9, v8, v10, v0.t ; CHECK-NEXT: vmv.v.v v8, v9 ; CHECK-NEXT: ret @@ -62,10 +62,10 @@ define @test_vp_reverse_nxv2f32( %src, define @test_vp_reverse_nxv2f64_masked( %src, %mask, i32 zeroext %evl) { ; CHECK-LABEL: test_vp_reverse_nxv2f64_masked: ; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, a0, -1 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; CHECK-NEXT: vid.v v10, v0.t -; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: vrsub.vx v12, v10, a0, v0.t +; CHECK-NEXT: vid.v v10 +; CHECK-NEXT: vrsub.vx v12, v10, a1, v0.t ; CHECK-NEXT: vrgather.vv v10, v8, v12, v0.t ; CHECK-NEXT: vmv.v.v v8, v10 ; CHECK-NEXT: ret @@ -91,10 +91,10 @@ define @test_vp_reverse_nxv2f64( %src define @test_vp_reverse_nxv4f32_masked( %src, %mask, i32 zeroext %evl) { ; CHECK-LABEL: test_vp_reverse_nxv4f32_masked: ; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, a0, -1 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; CHECK-NEXT: vid.v v10, v0.t -; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: vrsub.vx v12, v10, a0, v0.t +; CHECK-NEXT: vid.v v10 +; CHECK-NEXT: vrsub.vx v12, v10, a1, v0.t ; CHECK-NEXT: vrgather.vv v10, v8, v12, v0.t ; CHECK-NEXT: vmv.v.v v8, v10 ; CHECK-NEXT: ret @@ -120,10 +120,10 @@ define @test_vp_reverse_nxv4f32( %src, define @test_vp_reverse_nxv4f64_masked( %src, %mask, i32 zeroext %evl) { ; CHECK-LABEL: test_vp_reverse_nxv4f64_masked: ; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, a0, -1 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; CHECK-NEXT: vid.v v12, v0.t -; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: vrsub.vx v16, v12, a0, v0.t +; CHECK-NEXT: vid.v v12 +; CHECK-NEXT: vrsub.vx v16, v12, a1, v0.t ; CHECK-NEXT: vrgather.vv v12, v8, v16, v0.t ; CHECK-NEXT: vmv.v.v v8, v12 ; CHECK-NEXT: ret @@ -149,10 +149,10 @@ define @test_vp_reverse_nxv4f64( %src define @test_vp_reverse_nxv8f32_masked( %src, %mask, i32 zeroext %evl) { ; CHECK-LABEL: test_vp_reverse_nxv8f32_masked: ; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, a0, -1 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; CHECK-NEXT: vid.v v12, v0.t -; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: vrsub.vx v16, v12, a0, v0.t +; CHECK-NEXT: vid.v v12 +; CHECK-NEXT: vrsub.vx v16, v12, a1, v0.t ; CHECK-NEXT: vrgather.vv v12, v8, v16, v0.t ; CHECK-NEXT: vmv.v.v v8, v12 ; CHECK-NEXT: ret @@ -178,10 +178,10 @@ define @test_vp_reverse_nxv8f32( %src, define @test_vp_reverse_nxv8f64_masked( %src, %mask, i32 zeroext %evl) { ; CHECK-LABEL: test_vp_reverse_nxv8f64_masked: ; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, a0, -1 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vid.v v16, v0.t -; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: vrsub.vx v24, v16, a0, v0.t +; CHECK-NEXT: vid.v v16 +; CHECK-NEXT: vrsub.vx v24, v16, a1, v0.t ; CHECK-NEXT: vrgather.vv v16, v8, v24, v0.t ; CHECK-NEXT: vmv.v.v v8, v16 ; CHECK-NEXT: ret @@ -207,10 +207,10 @@ define @test_vp_reverse_nxv8f64( %src define @test_vp_reverse_nxv16f32_masked( %src, %mask, i32 zeroext %evl) { ; CHECK-LABEL: test_vp_reverse_nxv16f32_masked: ; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, a0, -1 ; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma -; CHECK-NEXT: vid.v v16, v0.t -; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: vrsub.vx v24, v16, a0, v0.t +; CHECK-NEXT: vid.v v16 +; CHECK-NEXT: vrsub.vx v24, v16, a1, v0.t ; CHECK-NEXT: vrgather.vv v16, v8, v24, v0.t ; CHECK-NEXT: vmv.v.v v8, v16 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/vp-reverse-int-fixed-vectors.ll b/llvm/test/CodeGen/RISCV/rvv/vp-reverse-int-fixed-vectors.ll index 27f16f0285e12..33fa3539ade93 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vp-reverse-int-fixed-vectors.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vp-reverse-int-fixed-vectors.ll @@ -5,10 +5,10 @@ define <2 x i64> @test_vp_reverse_v2i64_masked(<2 x i64> %src, <2 x i1> %mask, i32 zeroext %evl) { ; CHECK-LABEL: test_vp_reverse_v2i64_masked: ; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, a0, -1 ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; CHECK-NEXT: vid.v v9, v0.t -; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: vrsub.vx v10, v9, a0, v0.t +; CHECK-NEXT: vid.v v9 +; CHECK-NEXT: vrsub.vx v10, v9, a1, v0.t ; CHECK-NEXT: vrgather.vv v9, v8, v10, v0.t ; CHECK-NEXT: vmv.v.v v8, v9 ; CHECK-NEXT: ret @@ -34,10 +34,10 @@ define <2 x i64> @test_vp_reverse_v2i64(<2 x i64> %src, i32 zeroext %evl) { define <4 x i32> @test_vp_reverse_v4i32_masked(<4 x i32> %src, <4 x i1> %mask, i32 zeroext %evl) { ; CHECK-LABEL: test_vp_reverse_v4i32_masked: ; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, a0, -1 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma -; CHECK-NEXT: vid.v v9, v0.t -; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: vrsub.vx v10, v9, a0, v0.t +; CHECK-NEXT: vid.v v9 +; CHECK-NEXT: vrsub.vx v10, v9, a1, v0.t ; CHECK-NEXT: vrgather.vv v9, v8, v10, v0.t ; CHECK-NEXT: vmv.v.v v8, v9 ; CHECK-NEXT: ret @@ -63,10 +63,10 @@ define <4 x i32> @test_vp_reverse_v4i32(<4 x i32> %src, i32 zeroext %evl) { define <8 x i16> @test_vp_reverse_v8i16_masked(<8 x i16> %src, <8 x i1> %mask, i32 zeroext %evl) { ; CHECK-LABEL: test_vp_reverse_v8i16_masked: ; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, a0, -1 ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma -; CHECK-NEXT: vid.v v9, v0.t -; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: vrsub.vx v10, v9, a0, v0.t +; CHECK-NEXT: vid.v v9 +; CHECK-NEXT: vrsub.vx v10, v9, a1, v0.t ; CHECK-NEXT: vrgather.vv v9, v8, v10, v0.t ; CHECK-NEXT: vmv.v.v v8, v9 ; CHECK-NEXT: ret @@ -92,10 +92,10 @@ define <8 x i16> @test_vp_reverse_v8i16(<8 x i16> %src, i32 zeroext %evl) { define <16 x i8> @test_vp_reverse_v16i8_masked(<16 x i8> %src, <16 x i1> %mask, i32 zeroext %evl) { ; CHECK-LABEL: test_vp_reverse_v16i8_masked: ; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, a0, -1 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; CHECK-NEXT: vid.v v10, v0.t -; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: vrsub.vx v10, v10, a0, v0.t +; CHECK-NEXT: vid.v v10 +; CHECK-NEXT: vrsub.vx v10, v10, a1, v0.t ; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, ma ; CHECK-NEXT: vrgatherei16.vv v9, v8, v10, v0.t ; CHECK-NEXT: vmv.v.v v8, v9 diff --git a/llvm/test/CodeGen/RISCV/rvv/vp-reverse-int.ll b/llvm/test/CodeGen/RISCV/rvv/vp-reverse-int.ll index 507f5154cf1ac..ab37e5f27bcef 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vp-reverse-int.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vp-reverse-int.ll @@ -4,10 +4,10 @@ define @test_vp_reverse_nxv1i64_masked( %src, %mask, i32 zeroext %evl) { ; CHECK-LABEL: test_vp_reverse_nxv1i64_masked: ; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, a0, -1 ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; CHECK-NEXT: vid.v v9, v0.t -; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: vrsub.vx v10, v9, a0, v0.t +; CHECK-NEXT: vid.v v9 +; CHECK-NEXT: vrsub.vx v10, v9, a1, v0.t ; CHECK-NEXT: vrgather.vv v9, v8, v10, v0.t ; CHECK-NEXT: vmv.v.v v8, v9 ; CHECK-NEXT: ret @@ -33,10 +33,10 @@ define @test_vp_reverse_nxv1i64( %src, i32 define @test_vp_reverse_nxv2i32_masked( %src, %mask, i32 zeroext %evl) { ; CHECK-LABEL: test_vp_reverse_nxv2i32_masked: ; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, a0, -1 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma -; CHECK-NEXT: vid.v v9, v0.t -; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: vrsub.vx v10, v9, a0, v0.t +; CHECK-NEXT: vid.v v9 +; CHECK-NEXT: vrsub.vx v10, v9, a1, v0.t ; CHECK-NEXT: vrgather.vv v9, v8, v10, v0.t ; CHECK-NEXT: vmv.v.v v8, v9 ; CHECK-NEXT: ret @@ -62,10 +62,10 @@ define @test_vp_reverse_nxv2i32( %src, i32 define @test_vp_reverse_nxv4i16_masked( %src, %mask, i32 zeroext %evl) { ; CHECK-LABEL: test_vp_reverse_nxv4i16_masked: ; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, a0, -1 ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma -; CHECK-NEXT: vid.v v9, v0.t -; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: vrsub.vx v10, v9, a0, v0.t +; CHECK-NEXT: vid.v v9 +; CHECK-NEXT: vrsub.vx v10, v9, a1, v0.t ; CHECK-NEXT: vrgather.vv v9, v8, v10, v0.t ; CHECK-NEXT: vmv.v.v v8, v9 ; CHECK-NEXT: ret @@ -91,10 +91,10 @@ define @test_vp_reverse_nxv4i16( %src, i32 define @test_vp_reverse_nxv8i8_masked( %src, %mask, i32 zeroext %evl) { ; CHECK-LABEL: test_vp_reverse_nxv8i8_masked: ; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, a0, -1 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; CHECK-NEXT: vid.v v10, v0.t -; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: vrsub.vx v10, v10, a0, v0.t +; CHECK-NEXT: vid.v v10 +; CHECK-NEXT: vrsub.vx v10, v10, a1, v0.t ; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, ma ; CHECK-NEXT: vrgatherei16.vv v9, v8, v10, v0.t ; CHECK-NEXT: vmv.v.v v8, v9 @@ -122,10 +122,10 @@ define @test_vp_reverse_nxv8i8( %src, i32 zer define @test_vp_reverse_nxv2i64_masked( %src, %mask, i32 zeroext %evl) { ; CHECK-LABEL: test_vp_reverse_nxv2i64_masked: ; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, a0, -1 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; CHECK-NEXT: vid.v v10, v0.t -; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: vrsub.vx v12, v10, a0, v0.t +; CHECK-NEXT: vid.v v10 +; CHECK-NEXT: vrsub.vx v12, v10, a1, v0.t ; CHECK-NEXT: vrgather.vv v10, v8, v12, v0.t ; CHECK-NEXT: vmv.v.v v8, v10 ; CHECK-NEXT: ret @@ -151,10 +151,10 @@ define @test_vp_reverse_nxv2i64( %src, i32 define @test_vp_reverse_nxv4i32_masked( %src, %mask, i32 zeroext %evl) { ; CHECK-LABEL: test_vp_reverse_nxv4i32_masked: ; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, a0, -1 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; CHECK-NEXT: vid.v v10, v0.t -; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: vrsub.vx v12, v10, a0, v0.t +; CHECK-NEXT: vid.v v10 +; CHECK-NEXT: vrsub.vx v12, v10, a1, v0.t ; CHECK-NEXT: vrgather.vv v10, v8, v12, v0.t ; CHECK-NEXT: vmv.v.v v8, v10 ; CHECK-NEXT: ret @@ -180,10 +180,10 @@ define @test_vp_reverse_nxv4i32( %src, i32 define @test_vp_reverse_nxv8i16_masked( %src, %mask, i32 zeroext %evl) { ; CHECK-LABEL: test_vp_reverse_nxv8i16_masked: ; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, a0, -1 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; CHECK-NEXT: vid.v v10, v0.t -; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: vrsub.vx v12, v10, a0, v0.t +; CHECK-NEXT: vid.v v10 +; CHECK-NEXT: vrsub.vx v12, v10, a1, v0.t ; CHECK-NEXT: vrgather.vv v10, v8, v12, v0.t ; CHECK-NEXT: vmv.v.v v8, v10 ; CHECK-NEXT: ret @@ -209,10 +209,10 @@ define @test_vp_reverse_nxv8i16( %src, i32 define @test_vp_reverse_nxv16i8_masked( %src, %mask, i32 zeroext %evl) { ; CHECK-LABEL: test_vp_reverse_nxv16i8_masked: ; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, a0, -1 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; CHECK-NEXT: vid.v v12, v0.t -; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: vrsub.vx v12, v12, a0, v0.t +; CHECK-NEXT: vid.v v12 +; CHECK-NEXT: vrsub.vx v12, v12, a1, v0.t ; CHECK-NEXT: vsetvli zero, zero, e8, m2, ta, ma ; CHECK-NEXT: vrgatherei16.vv v10, v8, v12, v0.t ; CHECK-NEXT: vmv.v.v v8, v10 @@ -240,10 +240,10 @@ define @test_vp_reverse_nxv16i8( %src, i32 define @test_vp_reverse_nxv4i64_masked( %src, %mask, i32 zeroext %evl) { ; CHECK-LABEL: test_vp_reverse_nxv4i64_masked: ; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, a0, -1 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; CHECK-NEXT: vid.v v12, v0.t -; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: vrsub.vx v16, v12, a0, v0.t +; CHECK-NEXT: vid.v v12 +; CHECK-NEXT: vrsub.vx v16, v12, a1, v0.t ; CHECK-NEXT: vrgather.vv v12, v8, v16, v0.t ; CHECK-NEXT: vmv.v.v v8, v12 ; CHECK-NEXT: ret @@ -269,10 +269,10 @@ define @test_vp_reverse_nxv4i64( %src, i32 define @test_vp_reverse_nxv8i32_masked( %src, %mask, i32 zeroext %evl) { ; CHECK-LABEL: test_vp_reverse_nxv8i32_masked: ; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, a0, -1 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; CHECK-NEXT: vid.v v12, v0.t -; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: vrsub.vx v16, v12, a0, v0.t +; CHECK-NEXT: vid.v v12 +; CHECK-NEXT: vrsub.vx v16, v12, a1, v0.t ; CHECK-NEXT: vrgather.vv v12, v8, v16, v0.t ; CHECK-NEXT: vmv.v.v v8, v12 ; CHECK-NEXT: ret @@ -298,10 +298,10 @@ define @test_vp_reverse_nxv8i32( %src, i32 define @test_vp_reverse_nxv16i16_masked( %src, %mask, i32 zeroext %evl) { ; CHECK-LABEL: test_vp_reverse_nxv16i16_masked: ; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, a0, -1 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; CHECK-NEXT: vid.v v12, v0.t -; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: vrsub.vx v16, v12, a0, v0.t +; CHECK-NEXT: vid.v v12 +; CHECK-NEXT: vrsub.vx v16, v12, a1, v0.t ; CHECK-NEXT: vrgather.vv v12, v8, v16, v0.t ; CHECK-NEXT: vmv.v.v v8, v12 ; CHECK-NEXT: ret @@ -327,10 +327,10 @@ define @test_vp_reverse_nxv16i16( %src, i define @test_vp_reverse_nxv32i8_masked( %src, %mask, i32 zeroext %evl) { ; CHECK-LABEL: test_vp_reverse_nxv32i8_masked: ; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, a0, -1 ; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma -; CHECK-NEXT: vid.v v16, v0.t -; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: vrsub.vx v16, v16, a0, v0.t +; CHECK-NEXT: vid.v v16 +; CHECK-NEXT: vrsub.vx v16, v16, a1, v0.t ; CHECK-NEXT: vsetvli zero, zero, e8, m4, ta, ma ; CHECK-NEXT: vrgatherei16.vv v12, v8, v16, v0.t ; CHECK-NEXT: vmv.v.v v8, v12 @@ -358,10 +358,10 @@ define @test_vp_reverse_nxv32i8( %src, i32 define @test_vp_reverse_nxv8i64_masked( %src, %mask, i32 zeroext %evl) { ; CHECK-LABEL: test_vp_reverse_nxv8i64_masked: ; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, a0, -1 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vid.v v16, v0.t -; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: vrsub.vx v24, v16, a0, v0.t +; CHECK-NEXT: vid.v v16 +; CHECK-NEXT: vrsub.vx v24, v16, a1, v0.t ; CHECK-NEXT: vrgather.vv v16, v8, v24, v0.t ; CHECK-NEXT: vmv.v.v v8, v16 ; CHECK-NEXT: ret @@ -387,10 +387,10 @@ define @test_vp_reverse_nxv8i64( %src, i32 define @test_vp_reverse_nxv16i32_masked( %src, %mask, i32 zeroext %evl) { ; CHECK-LABEL: test_vp_reverse_nxv16i32_masked: ; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, a0, -1 ; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma -; CHECK-NEXT: vid.v v16, v0.t -; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: vrsub.vx v24, v16, a0, v0.t +; CHECK-NEXT: vid.v v16 +; CHECK-NEXT: vrsub.vx v24, v16, a1, v0.t ; CHECK-NEXT: vrgather.vv v16, v8, v24, v0.t ; CHECK-NEXT: vmv.v.v v8, v16 ; CHECK-NEXT: ret @@ -416,10 +416,10 @@ define @test_vp_reverse_nxv16i32( %src, i define @test_vp_reverse_nxv32i16_masked( %src, %mask, i32 zeroext %evl) { ; CHECK-LABEL: test_vp_reverse_nxv32i16_masked: ; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, a0, -1 ; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma -; CHECK-NEXT: vid.v v16, v0.t -; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: vrsub.vx v24, v16, a0, v0.t +; CHECK-NEXT: vid.v v16 +; CHECK-NEXT: vrsub.vx v24, v16, a1, v0.t ; CHECK-NEXT: vrgather.vv v16, v8, v24, v0.t ; CHECK-NEXT: vmv.v.v v8, v16 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/vp-reverse-loop.ll b/llvm/test/CodeGen/RISCV/rvv/vp-reverse-loop.ll index 576d4196bf4d1..54f9670e62460 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vp-reverse-loop.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vp-reverse-loop.ll @@ -1,24 +1,27 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: llc -mtriple=riscv64 -mattr=+m,+v -verify-machineinstrs < %s | FileCheck %s +; Test that we hoist the vid.v out of the loop by not using the EVL for AVL. + define void @reverse(ptr %p) { ; CHECK-LABEL: reverse: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: li a1, 0 ; CHECK-NEXT: li a2, 1024 +; CHECK-NEXT: vsetvli a3, zero, e64, m1, ta, ma +; CHECK-NEXT: vid.v v8 ; CHECK-NEXT: .LBB0_1: # %loop ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: sub a3, a2, a1 ; CHECK-NEXT: slli a4, a1, 3 ; CHECK-NEXT: vsetvli a3, a3, e64, m1, ta, ma ; CHECK-NEXT: add a4, a0, a4 -; CHECK-NEXT: vle64.v v8, (a4) +; CHECK-NEXT: vle64.v v9, (a4) ; CHECK-NEXT: addi a5, a3, -1 -; CHECK-NEXT: vid.v v9 -; CHECK-NEXT: vrsub.vx v9, v9, a5 -; CHECK-NEXT: vrgather.vv v10, v8, v9 +; CHECK-NEXT: vrsub.vx v10, v8, a5 +; CHECK-NEXT: vrgather.vv v11, v9, v10 ; CHECK-NEXT: add a1, a1, a3 -; CHECK-NEXT: vse64.v v10, (a4) +; CHECK-NEXT: vse64.v v11, (a4) ; CHECK-NEXT: bltu a1, a2, .LBB0_1 ; CHECK-NEXT: # %bb.2: # %exit ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/vp-reverse-mask-fixed-vectors.ll b/llvm/test/CodeGen/RISCV/rvv/vp-reverse-mask-fixed-vectors.ll index 09d92c3c039f9..f07720698cfe1 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vp-reverse-mask-fixed-vectors.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vp-reverse-mask-fixed-vectors.ll @@ -5,15 +5,17 @@ define <2 x i1> @test_vp_reverse_v2i1_masked(<2 x i1> %src, <2 x i1> %mask, i32 zeroext %evl) { ; CHECK-LABEL: test_vp_reverse_v2i1_masked: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma -; CHECK-NEXT: vmv.v.i v9, 0 -; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: vmerge.vim v9, v9, 1, v0 +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; CHECK-NEXT: vmv1r.v v9, v0 ; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma -; CHECK-NEXT: vid.v v10, v0.t +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: vid.v v10 ; CHECK-NEXT: vrsub.vx v10, v10, a0, v0.t ; CHECK-NEXT: vsetvli zero, zero, e8, mf8, ta, ma +; CHECK-NEXT: vmv.v.i v11, 0 +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vmerge.vim v9, v11, 1, v0 +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vrgatherei16.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmsne.vi v0, v11, 0, v0.t ; CHECK-NEXT: ret @@ -44,15 +46,17 @@ define <2 x i1> @test_vp_reverse_v2i1(<2 x i1> %src, i32 zeroext %evl) { define <4 x i1> @test_vp_reverse_v4i1_masked(<4 x i1> %src, <4 x i1> %mask, i32 zeroext %evl) { ; CHECK-LABEL: test_vp_reverse_v4i1_masked: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma -; CHECK-NEXT: vmv.v.i v9, 0 -; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: vmerge.vim v9, v9, 1, v0 +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; CHECK-NEXT: vmv1r.v v9, v0 ; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; CHECK-NEXT: vid.v v10, v0.t +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: vid.v v10 ; CHECK-NEXT: vrsub.vx v10, v10, a0, v0.t ; CHECK-NEXT: vsetvli zero, zero, e8, mf4, ta, ma +; CHECK-NEXT: vmv.v.i v11, 0 +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vmerge.vim v9, v11, 1, v0 +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vrgatherei16.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmsne.vi v0, v11, 0, v0.t ; CHECK-NEXT: ret @@ -83,15 +87,17 @@ define <4 x i1> @test_vp_reverse_v4i1(<4 x i1> %src, i32 zeroext %evl) { define <8 x i1> @test_vp_reverse_v8i1_masked(<8 x i1> %src, <8 x i1> %mask, i32 zeroext %evl) { ; CHECK-LABEL: test_vp_reverse_v8i1_masked: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma -; CHECK-NEXT: vmv.v.i v9, 0 -; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: vmerge.vim v9, v9, 1, v0 +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; CHECK-NEXT: vmv1r.v v9, v0 ; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma -; CHECK-NEXT: vid.v v10, v0.t +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: vid.v v10 ; CHECK-NEXT: vrsub.vx v10, v10, a0, v0.t ; CHECK-NEXT: vsetvli zero, zero, e8, mf2, ta, ma +; CHECK-NEXT: vmv.v.i v11, 0 +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vmerge.vim v9, v11, 1, v0 +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vrgatherei16.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmsne.vi v0, v11, 0, v0.t ; CHECK-NEXT: ret @@ -122,15 +128,17 @@ define <8 x i1> @test_vp_reverse_v8i1(<8 x i1> %src, i32 zeroext %evl) { define <16 x i1> @test_vp_reverse_v16i1_masked(<16 x i1> %src, <16 x i1> %mask, i32 zeroext %evl) { ; CHECK-LABEL: test_vp_reverse_v16i1_masked: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma -; CHECK-NEXT: vmv.v.i v9, 0 -; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: vmerge.vim v9, v9, 1, v0 +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; CHECK-NEXT: vmv1r.v v9, v0 ; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma -; CHECK-NEXT: vid.v v10, v0.t +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: vid.v v10 ; CHECK-NEXT: vrsub.vx v10, v10, a0, v0.t ; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, ma +; CHECK-NEXT: vmv.v.i v12, 0 +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vmerge.vim v9, v12, 1, v0 +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vrgatherei16.vv v12, v9, v10, v0.t ; CHECK-NEXT: vmsne.vi v0, v12, 0, v0.t ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/vp-reverse-mask.ll b/llvm/test/CodeGen/RISCV/rvv/vp-reverse-mask.ll index 8e44d76e7010f..c0b0d3d8d0717 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vp-reverse-mask.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vp-reverse-mask.ll @@ -4,15 +4,17 @@ define @test_vp_reverse_nxv1i1_masked( %src, %mask, i32 zeroext %evl) { ; CHECK-LABEL: test_vp_reverse_nxv1i1_masked: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma -; CHECK-NEXT: vmv.v.i v9, 0 -; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: vmerge.vim v9, v9, 1, v0 +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; CHECK-NEXT: vmv1r.v v9, v0 ; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma -; CHECK-NEXT: vid.v v10, v0.t +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: vid.v v10 ; CHECK-NEXT: vrsub.vx v10, v10, a0, v0.t ; CHECK-NEXT: vsetvli zero, zero, e8, mf8, ta, ma +; CHECK-NEXT: vmv.v.i v11, 0 +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vmerge.vim v9, v11, 1, v0 +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vrgatherei16.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmsne.vi v0, v11, 0, v0.t ; CHECK-NEXT: ret @@ -43,15 +45,17 @@ define @test_vp_reverse_nxv1i1( %src, i32 zer define @test_vp_reverse_nxv2i1_masked( %src, %mask, i32 zeroext %evl) { ; CHECK-LABEL: test_vp_reverse_nxv2i1_masked: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma -; CHECK-NEXT: vmv.v.i v9, 0 -; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: vmerge.vim v9, v9, 1, v0 +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; CHECK-NEXT: vmv1r.v v9, v0 ; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; CHECK-NEXT: vid.v v10, v0.t +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: vid.v v10 ; CHECK-NEXT: vrsub.vx v10, v10, a0, v0.t ; CHECK-NEXT: vsetvli zero, zero, e8, mf4, ta, ma +; CHECK-NEXT: vmv.v.i v11, 0 +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vmerge.vim v9, v11, 1, v0 +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vrgatherei16.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmsne.vi v0, v11, 0, v0.t ; CHECK-NEXT: ret @@ -82,15 +86,17 @@ define @test_vp_reverse_nxv2i1( %src, i32 zer define @test_vp_reverse_nxv4i1_masked( %src, %mask, i32 zeroext %evl) { ; CHECK-LABEL: test_vp_reverse_nxv4i1_masked: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma -; CHECK-NEXT: vmv.v.i v9, 0 -; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: vmerge.vim v9, v9, 1, v0 +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; CHECK-NEXT: vmv1r.v v9, v0 ; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma -; CHECK-NEXT: vid.v v10, v0.t +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: vid.v v10 ; CHECK-NEXT: vrsub.vx v10, v10, a0, v0.t ; CHECK-NEXT: vsetvli zero, zero, e8, mf2, ta, ma +; CHECK-NEXT: vmv.v.i v11, 0 +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vmerge.vim v9, v11, 1, v0 +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vrgatherei16.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmsne.vi v0, v11, 0, v0.t ; CHECK-NEXT: ret @@ -121,15 +127,17 @@ define @test_vp_reverse_nxv4i1( %src, i32 zer define @test_vp_reverse_nxv8i1_masked( %src, %mask, i32 zeroext %evl) { ; CHECK-LABEL: test_vp_reverse_nxv8i1_masked: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma -; CHECK-NEXT: vmv.v.i v9, 0 -; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: vmerge.vim v9, v9, 1, v0 +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; CHECK-NEXT: vmv1r.v v9, v0 ; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma -; CHECK-NEXT: vid.v v10, v0.t +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: vid.v v10 ; CHECK-NEXT: vrsub.vx v10, v10, a0, v0.t ; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, ma +; CHECK-NEXT: vmv.v.i v12, 0 +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vmerge.vim v9, v12, 1, v0 +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vrgatherei16.vv v12, v9, v10, v0.t ; CHECK-NEXT: vmsne.vi v0, v12, 0, v0.t ; CHECK-NEXT: ret @@ -160,15 +168,17 @@ define @test_vp_reverse_nxv8i1( %src, i32 zer define @test_vp_reverse_nxv16i1_masked( %src, %mask, i32 zeroext %evl) { ; CHECK-LABEL: test_vp_reverse_nxv16i1_masked: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, ma -; CHECK-NEXT: vmv.v.i v10, 0 -; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: vmerge.vim v10, v10, 1, v0 +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; CHECK-NEXT: vmv1r.v v9, v0 ; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma -; CHECK-NEXT: vid.v v12, v0.t +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: vid.v v12 ; CHECK-NEXT: vrsub.vx v12, v12, a0, v0.t ; CHECK-NEXT: vsetvli zero, zero, e8, m2, ta, ma +; CHECK-NEXT: vmv.v.i v10, 0 +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vmerge.vim v10, v10, 1, v0 +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vrgatherei16.vv v16, v10, v12, v0.t ; CHECK-NEXT: vmsne.vi v8, v16, 0, v0.t ; CHECK-NEXT: vmv1r.v v0, v8 @@ -200,15 +210,17 @@ define @test_vp_reverse_nxv16i1( %src, i32 define @test_vp_reverse_nxv32i1_masked( %src, %mask, i32 zeroext %evl) { ; CHECK-LABEL: test_vp_reverse_nxv32i1_masked: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, ma -; CHECK-NEXT: vmv.v.i v12, 0 -; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: vmerge.vim v12, v12, 1, v0 +; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma +; CHECK-NEXT: vmv1r.v v9, v0 ; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vsetvli zero, zero, e16, m8, ta, ma -; CHECK-NEXT: vid.v v16, v0.t +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: vid.v v16 ; CHECK-NEXT: vrsub.vx v16, v16, a0, v0.t ; CHECK-NEXT: vsetvli zero, zero, e8, m4, ta, ma +; CHECK-NEXT: vmv.v.i v12, 0 +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vmerge.vim v12, v12, 1, v0 +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vrgatherei16.vv v24, v12, v16, v0.t ; CHECK-NEXT: vmsne.vi v8, v24, 0, v0.t ; CHECK-NEXT: vmv1r.v v0, v8