-
Notifications
You must be signed in to change notification settings - Fork 15.2k
[DAGCombine] Invert vselect to make TrueValue is binop #167499
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Conversation
ChunyuLiao
commented
Nov 11, 2025
Before: binop N0, (vselect Cond, IDC, FVal) --> vselect Cond, N0, (binop N0, FVal) After for special case: binop N0, (vselect Cond, IDC, FVal) --> vselect InvertCond, N0, (binop FVal, N0) It is possible to better eliminate the vmerge instruction for RISCV. I haven't seen any regression in other targets.
|
@llvm/pr-subscribers-llvm-selectiondag @llvm/pr-subscribers-backend-risc-v Author: Liao Chunyu (ChunyuLiao) ChangesFull diff: https://github.com/llvm/llvm-project/pull/167499.diff 3 Files Affected:
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index df353c4d91b1a..12e5e2ba1f811 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -2508,6 +2508,15 @@ static SDValue foldSelectWithIdentityConstant(SDNode *N, SelectionDAG &DAG,
FVal)) {
SDValue F0 = DAG.getFreeze(N0);
SDValue NewBO = DAG.getNode(Opcode, SDLoc(N), VT, F0, FVal, N->getFlags());
+ // For RISCV prefer to N0 == FVal
+ if (Cond.getOpcode() == ISD::SETCC) {
+ EVT CVT = Cond->getValueType(0);
+ ISD::CondCode NotCC = ISD::getSetCCInverse(
+ cast<CondCodeSDNode>(Cond.getOperand(2))->get(), CVT);
+ SDValue NCond = DAG.getSetCC(SDLoc(N), CVT, Cond.getOperand(0),
+ Cond.getOperand(1), NotCC);
+ return DAG.getSelect(SDLoc(N), VT, NCond, NewBO, F0);
+ }
return DAG.getSelect(SDLoc(N), VT, Cond, F0, NewBO);
}
// binop N0, (vselect Cond, TVal, IDC) --> vselect Cond, (binop N0, TVal), N0
diff --git a/llvm/test/CodeGen/RISCV/intrinsic-cttz-elts-vscale.ll b/llvm/test/CodeGen/RISCV/intrinsic-cttz-elts-vscale.ll
index a06c7505d543d..b258ad0f0bca0 100644
--- a/llvm/test/CodeGen/RISCV/intrinsic-cttz-elts-vscale.ll
+++ b/llvm/test/CodeGen/RISCV/intrinsic-cttz-elts-vscale.ll
@@ -11,14 +11,13 @@ define i32 @ctz_nxv4i32(<vscale x 4 x i32> %a) #0 {
; RV32-NEXT: vsetvli a1, zero, e16, m1, ta, ma
; RV32-NEXT: vid.v v10
; RV32-NEXT: li a1, -1
-; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; RV32-NEXT: vmsne.vi v0, v8, 0
; RV32-NEXT: srli a0, a0, 1
+; RV32-NEXT: vmv.v.x v11, a0
+; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; RV32-NEXT: vmseq.vi v0, v8, 0
; RV32-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; RV32-NEXT: vmv.v.x v8, a0
-; RV32-NEXT: vmadd.vx v10, a1, v8
-; RV32-NEXT: vmv.v.i v8, 0
-; RV32-NEXT: vmerge.vvm v8, v8, v10, v0
+; RV32-NEXT: vmadd.vx v10, a1, v11
+; RV32-NEXT: vmerge.vim v8, v10, 0, v0
; RV32-NEXT: vredmaxu.vs v8, v8, v8
; RV32-NEXT: vmv.x.s a1, v8
; RV32-NEXT: sub a0, a0, a1
@@ -32,14 +31,13 @@ define i32 @ctz_nxv4i32(<vscale x 4 x i32> %a) #0 {
; RV64-NEXT: vsetvli a1, zero, e16, m1, ta, ma
; RV64-NEXT: vid.v v10
; RV64-NEXT: li a1, -1
-; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; RV64-NEXT: vmsne.vi v0, v8, 0
; RV64-NEXT: srli a0, a0, 1
+; RV64-NEXT: vmv.v.x v11, a0
+; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; RV64-NEXT: vmseq.vi v0, v8, 0
; RV64-NEXT: vsetvli zero, zero, e16, m1, ta, ma
-; RV64-NEXT: vmv.v.x v8, a0
-; RV64-NEXT: vmadd.vx v10, a1, v8
-; RV64-NEXT: vmv.v.i v8, 0
-; RV64-NEXT: vmerge.vvm v8, v8, v10, v0
+; RV64-NEXT: vmadd.vx v10, a1, v11
+; RV64-NEXT: vmerge.vim v8, v10, 0, v0
; RV64-NEXT: vredmaxu.vs v8, v8, v8
; RV64-NEXT: vmv.x.s a1, v8
; RV64-NEXT: sub a0, a0, a1
@@ -113,13 +111,12 @@ define i64 @ctz_nxv8i1_no_range(<vscale x 8 x i16> %a) {
; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma
; RV64-NEXT: vid.v v16
; RV64-NEXT: li a1, -1
+; RV64-NEXT: vmv.v.x v24, a0
; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, ma
-; RV64-NEXT: vmsne.vi v0, v8, 0
+; RV64-NEXT: vmseq.vi v0, v8, 0
; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, ma
-; RV64-NEXT: vmv.v.x v8, a0
-; RV64-NEXT: vmadd.vx v16, a1, v8
-; RV64-NEXT: vmv.v.i v8, 0
-; RV64-NEXT: vmerge.vvm v8, v8, v16, v0
+; RV64-NEXT: vmadd.vx v16, a1, v24
+; RV64-NEXT: vmerge.vim v8, v16, 0, v0
; RV64-NEXT: vredmaxu.vs v8, v8, v8
; RV64-NEXT: vmv.x.s a1, v8
; RV64-NEXT: sub a0, a0, a1
diff --git a/llvm/test/CodeGen/RISCV/rvv/vadd-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vadd-sdnode.ll
index a9a13147f5c9b..3292c064bb4e9 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vadd-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vadd-sdnode.ll
@@ -814,6 +814,21 @@ define <vscale x 8 x i32> @vadd_vv_mask_nxv8i32(<vscale x 8 x i32> %va, <vscale
ret <vscale x 8 x i32> %vc
}
+define <vscale x 8 x i32> @vadd_vv_mask_nxv8i32_novmerge(<vscale x 8 x i32> %va, <vscale x 8 x i32> %vb,
+; CHECK-LABEL: vadd_vv_mask_nxv8i32_novmerge:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, mu
+; CHECK-NEXT: vmsne.vi v0, v8, 1
+; CHECK-NEXT: vadd.vv v16, v16, v12, v0.t
+; CHECK-NEXT: vmv.v.v v8, v16
+; CHECK-NEXT: ret
+ <vscale x 8 x i32> %vc) {
+ %mask = icmp eq <vscale x 8 x i32> %va, splat (i32 1)
+ %vs = select <vscale x 8 x i1> %mask, <vscale x 8 x i32> zeroinitializer , <vscale x 8 x i32> %vb
+ %vr = add nsw <vscale x 8 x i32> %vc, %vs
+ ret <vscale x 8 x i32> %vr
+}
+
define <vscale x 8 x i32> @vadd_vx_mask_nxv8i32(<vscale x 8 x i32> %va, i32 signext %b, <vscale x 8 x i1> %mask) {
; CHECK-LABEL: vadd_vx_mask_nxv8i32:
; CHECK: # %bb.0:
|
RKSimon
left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I suspect X86 AVX512 predicates might benefit from this as well - I can't recall if we have existing code to canonicalize this