-
Notifications
You must be signed in to change notification settings - Fork 15.7k
[RISCV][llvm] Support logical comparison codegen for P extension #174626
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Conversation
Result type of P extension's comparison instructions is same as operands and the result bits are all 1s or 0s so we need to set ZeroOrNegativeOneBooleanContent to make sext(setcc) auto combined.
|
@llvm/pr-subscribers-backend-risc-v Author: Brandon Wu (4vtomat) ChangesResult type of P extension's comparison instructions is same as operands Patch is 30.55 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/174626.diff 4 Files Affected:
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index d6b62736bdf60..43598eec5137d 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -546,6 +546,9 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setOperationAction({ISD::SHL, ISD::SRL, ISD::SRA}, VTs, Custom);
setOperationAction(ISD::BITCAST, VTs, Custom);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, VTs, Custom);
+ setOperationAction(ISD::SETCC, VTs, Legal);
+ // P extension vector comparisons produce all 1s for true, all 0s for false
+ setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
}
if (Subtarget.hasStdExtZfbfmin()) {
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoP.td b/llvm/lib/Target/RISCV/RISCVInstrInfoP.td
index 6830b476f5cd3..7a68707336050 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoP.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoP.td
@@ -1584,6 +1584,54 @@ let Predicates = [HasStdExtP] in {
// // splat pattern
def: Pat<(XLenVecI8VT (splat_vector (XLenVT GPR:$rs2))), (PADD_BS (XLenVT X0), GPR:$rs2)>;
def: Pat<(XLenVecI16VT (splat_vector (XLenVT GPR:$rs2))), (PADD_HS (XLenVT X0), GPR:$rs2)>;
+
+ // 8/16-bit comparison patterns (result is all 1s or all 0s per element)
+ // a == b
+ def: Pat<(XLenVecI8VT (setcc (XLenVecI8VT GPR:$rs1), (XLenVecI8VT GPR:$rs2), SETEQ)),
+ (PMSEQ_B GPR:$rs1, GPR:$rs2)>;
+ def: Pat<(XLenVecI16VT (setcc (XLenVecI16VT GPR:$rs1), (XLenVecI16VT GPR:$rs2), SETEQ)),
+ (PMSEQ_H GPR:$rs1, GPR:$rs2)>;
+ // a != b => !(a == b)
+ def: Pat<(XLenVecI8VT (setcc (XLenVecI8VT GPR:$rs1), (XLenVecI8VT GPR:$rs2), SETNE)),
+ (XORI (PMSEQ_B GPR:$rs1, GPR:$rs2), -1)>;
+ def: Pat<(XLenVecI16VT (setcc (XLenVecI16VT GPR:$rs1), (XLenVecI16VT GPR:$rs2), SETNE)),
+ (XORI (PMSEQ_H GPR:$rs1, GPR:$rs2), -1)>;
+ // a < b
+ def: Pat<(XLenVecI8VT (setcc (XLenVecI8VT GPR:$rs1), (XLenVecI8VT GPR:$rs2), SETLT)),
+ (PMSLT_B GPR:$rs1, GPR:$rs2)>;
+ def: Pat<(XLenVecI8VT (setcc (XLenVecI8VT GPR:$rs1), (XLenVecI8VT GPR:$rs2), SETULT)),
+ (PMSLTU_B GPR:$rs1, GPR:$rs2)>;
+ def: Pat<(XLenVecI16VT (setcc (XLenVecI16VT GPR:$rs1), (XLenVecI16VT GPR:$rs2), SETLT)),
+ (PMSLT_H GPR:$rs1, GPR:$rs2)>;
+ def: Pat<(XLenVecI16VT (setcc (XLenVecI16VT GPR:$rs1), (XLenVecI16VT GPR:$rs2), SETULT)),
+ (PMSLTU_H GPR:$rs1, GPR:$rs2)>;
+ // a <= b => !(b < a)
+ def: Pat<(XLenVecI8VT (setcc (XLenVecI8VT GPR:$rs1), (XLenVecI8VT GPR:$rs2), SETLE)),
+ (XORI (PMSLT_B GPR:$rs2, GPR:$rs1), -1)>;
+ def: Pat<(XLenVecI8VT (setcc (XLenVecI8VT GPR:$rs1), (XLenVecI8VT GPR:$rs2), SETULE)),
+ (XORI (PMSLTU_B GPR:$rs2, GPR:$rs1), -1)>;
+ def: Pat<(XLenVecI16VT (setcc (XLenVecI16VT GPR:$rs1), (XLenVecI16VT GPR:$rs2), SETLE)),
+ (XORI (PMSLT_H GPR:$rs2, GPR:$rs1), -1)>;
+ def: Pat<(XLenVecI16VT (setcc (XLenVecI16VT GPR:$rs1), (XLenVecI16VT GPR:$rs2), SETULE)),
+ (XORI (PMSLTU_H GPR:$rs2, GPR:$rs1), -1)>;
+ // a > b => b < a
+ def: Pat<(XLenVecI8VT (setcc (XLenVecI8VT GPR:$rs1), (XLenVecI8VT GPR:$rs2), SETGT)),
+ (PMSLT_B GPR:$rs2, GPR:$rs1)>;
+ def: Pat<(XLenVecI8VT (setcc (XLenVecI8VT GPR:$rs1), (XLenVecI8VT GPR:$rs2), SETUGT)),
+ (PMSLTU_B GPR:$rs2, GPR:$rs1)>;
+ def: Pat<(XLenVecI16VT (setcc (XLenVecI16VT GPR:$rs1), (XLenVecI16VT GPR:$rs2), SETGT)),
+ (PMSLT_H GPR:$rs2, GPR:$rs1)>;
+ def: Pat<(XLenVecI16VT (setcc (XLenVecI16VT GPR:$rs1), (XLenVecI16VT GPR:$rs2), SETUGT)),
+ (PMSLTU_H GPR:$rs2, GPR:$rs1)>;
+ // a >= b => !(a < b)
+ def: Pat<(XLenVecI8VT (setcc (XLenVecI8VT GPR:$rs1), (XLenVecI8VT GPR:$rs2), SETGE)),
+ (XORI (PMSLT_B GPR:$rs1, GPR:$rs2), -1)>;
+ def: Pat<(XLenVecI8VT (setcc (XLenVecI8VT GPR:$rs1), (XLenVecI8VT GPR:$rs2), SETUGE)),
+ (XORI (PMSLTU_B GPR:$rs1, GPR:$rs2), -1)>;
+ def: Pat<(XLenVecI16VT (setcc (XLenVecI16VT GPR:$rs1), (XLenVecI16VT GPR:$rs2), SETGE)),
+ (XORI (PMSLT_H GPR:$rs1, GPR:$rs2), -1)>;
+ def: Pat<(XLenVecI16VT (setcc (XLenVecI16VT GPR:$rs1), (XLenVecI16VT GPR:$rs2), SETUGE)),
+ (XORI (PMSLTU_H GPR:$rs1, GPR:$rs2), -1)>;
} // Predicates = [HasStdExtP]
let Predicates = [HasStdExtP, IsRV32] in {
@@ -1643,6 +1691,34 @@ let Predicates = [HasStdExtP, IsRV64] in {
// splat pattern
def: Pat<(v2i32 (splat_vector (XLenVT GPR:$rs2))), (PADD_WS (XLenVT X0), GPR:$rs2)>;
+ // 32-bit comparison patterns (result is all 1s or all 0s per element)
+ // a == b
+ def: Pat<(v2i32 (setcc (v2i32 GPR:$rs1), (v2i32 GPR:$rs2), SETEQ)),
+ (PMSEQ_W GPR:$rs1, GPR:$rs2)>;
+ // a != b => !(a == b)
+ def: Pat<(v2i32 (setcc (v2i32 GPR:$rs1), (v2i32 GPR:$rs2), SETNE)),
+ (XORI (PMSEQ_W GPR:$rs1, GPR:$rs2), -1)>;
+ // a < b
+ def: Pat<(v2i32 (setcc (v2i32 GPR:$rs1), (v2i32 GPR:$rs2), SETLT)),
+ (PMSLT_W GPR:$rs1, GPR:$rs2)>;
+ def: Pat<(v2i32 (setcc (v2i32 GPR:$rs1), (v2i32 GPR:$rs2), SETULT)),
+ (PMSLTU_W GPR:$rs1, GPR:$rs2)>;
+ // a <= b => !(b < a)
+ def: Pat<(v2i32 (setcc (v2i32 GPR:$rs1), (v2i32 GPR:$rs2), SETLE)),
+ (XORI (PMSLT_W GPR:$rs2, GPR:$rs1), -1)>;
+ def: Pat<(v2i32 (setcc (v2i32 GPR:$rs1), (v2i32 GPR:$rs2), SETULE)),
+ (XORI (PMSLTU_W GPR:$rs2, GPR:$rs1), -1)>;
+ // a > b => b < a
+ def: Pat<(v2i32 (setcc (v2i32 GPR:$rs1), (v2i32 GPR:$rs2), SETGT)),
+ (PMSLT_W GPR:$rs2, GPR:$rs1)>;
+ def: Pat<(v2i32 (setcc (v2i32 GPR:$rs1), (v2i32 GPR:$rs2), SETUGT)),
+ (PMSLTU_W GPR:$rs2, GPR:$rs1)>;
+ // a >= b => !(a < b)
+ def: Pat<(v2i32 (setcc (v2i32 GPR:$rs1), (v2i32 GPR:$rs2), SETGE)),
+ (XORI (PMSLT_W GPR:$rs1, GPR:$rs2), -1)>;
+ def: Pat<(v2i32 (setcc (v2i32 GPR:$rs1), (v2i32 GPR:$rs2), SETUGE)),
+ (XORI (PMSLTU_W GPR:$rs1, GPR:$rs2), -1)>;
+
// 32-bit logical shift left/right patterns
def: Pat<(v2i32 (shl GPR:$rs1, (v2i32 (splat_vector uimm5:$shamt)))),
(PSLLI_W GPR:$rs1, uimm5:$shamt)>;
diff --git a/llvm/test/CodeGen/RISCV/rvp-ext-rv32.ll b/llvm/test/CodeGen/RISCV/rvp-ext-rv32.ll
index 2836cda16b6d9..a1728e72ce3b9 100644
--- a/llvm/test/CodeGen/RISCV/rvp-ext-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvp-ext-rv32.ll
@@ -1118,3 +1118,335 @@ define void @test_pmulhsu_h_commuted(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
store <2 x i16> %res, ptr %ret_ptr
ret void
}
+
+; Comparison operations for v2i16
+define void @test_eq_h(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
+; CHECK-LABEL: test_eq_h:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lw a1, 0(a1)
+; CHECK-NEXT: lw a2, 0(a2)
+; CHECK-NEXT: pmseq.h a1, a1, a2
+; CHECK-NEXT: sw a1, 0(a0)
+; CHECK-NEXT: ret
+ %a = load <2 x i16>, ptr %a_ptr
+ %b = load <2 x i16>, ptr %b_ptr
+ %cmp = icmp eq <2 x i16> %a, %b
+ %res = sext <2 x i1> %cmp to <2 x i16>
+ store <2 x i16> %res, ptr %ret_ptr
+ ret void
+}
+
+define void @test_ne_h(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
+; CHECK-LABEL: test_ne_h:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lw a1, 0(a1)
+; CHECK-NEXT: lw a2, 0(a2)
+; CHECK-NEXT: pmseq.h a1, a1, a2
+; CHECK-NEXT: not a1, a1
+; CHECK-NEXT: sw a1, 0(a0)
+; CHECK-NEXT: ret
+ %a = load <2 x i16>, ptr %a_ptr
+ %b = load <2 x i16>, ptr %b_ptr
+ %cmp = icmp ne <2 x i16> %a, %b
+ %res = sext <2 x i1> %cmp to <2 x i16>
+ store <2 x i16> %res, ptr %ret_ptr
+ ret void
+}
+
+define void @test_slt_h(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
+; CHECK-LABEL: test_slt_h:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lw a1, 0(a1)
+; CHECK-NEXT: lw a2, 0(a2)
+; CHECK-NEXT: pmslt.h a1, a1, a2
+; CHECK-NEXT: sw a1, 0(a0)
+; CHECK-NEXT: ret
+ %a = load <2 x i16>, ptr %a_ptr
+ %b = load <2 x i16>, ptr %b_ptr
+ %cmp = icmp slt <2 x i16> %a, %b
+ %res = sext <2 x i1> %cmp to <2 x i16>
+ store <2 x i16> %res, ptr %ret_ptr
+ ret void
+}
+
+define void @test_sle_h(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
+; CHECK-LABEL: test_sle_h:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lw a1, 0(a1)
+; CHECK-NEXT: lw a2, 0(a2)
+; CHECK-NEXT: pmslt.h a1, a2, a1
+; CHECK-NEXT: not a1, a1
+; CHECK-NEXT: sw a1, 0(a0)
+; CHECK-NEXT: ret
+ %a = load <2 x i16>, ptr %a_ptr
+ %b = load <2 x i16>, ptr %b_ptr
+ %cmp = icmp sle <2 x i16> %a, %b
+ %res = sext <2 x i1> %cmp to <2 x i16>
+ store <2 x i16> %res, ptr %ret_ptr
+ ret void
+}
+
+define void @test_sgt_h(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
+; CHECK-LABEL: test_sgt_h:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lw a1, 0(a1)
+; CHECK-NEXT: lw a2, 0(a2)
+; CHECK-NEXT: pmslt.h a1, a2, a1
+; CHECK-NEXT: sw a1, 0(a0)
+; CHECK-NEXT: ret
+ %a = load <2 x i16>, ptr %a_ptr
+ %b = load <2 x i16>, ptr %b_ptr
+ %cmp = icmp sgt <2 x i16> %a, %b
+ %res = sext <2 x i1> %cmp to <2 x i16>
+ store <2 x i16> %res, ptr %ret_ptr
+ ret void
+}
+
+define void @test_sge_h(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
+; CHECK-LABEL: test_sge_h:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lw a1, 0(a1)
+; CHECK-NEXT: lw a2, 0(a2)
+; CHECK-NEXT: pmslt.h a1, a1, a2
+; CHECK-NEXT: not a1, a1
+; CHECK-NEXT: sw a1, 0(a0)
+; CHECK-NEXT: ret
+ %a = load <2 x i16>, ptr %a_ptr
+ %b = load <2 x i16>, ptr %b_ptr
+ %cmp = icmp sge <2 x i16> %a, %b
+ %res = sext <2 x i1> %cmp to <2 x i16>
+ store <2 x i16> %res, ptr %ret_ptr
+ ret void
+}
+
+define void @test_ult_h(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
+; CHECK-LABEL: test_ult_h:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lw a1, 0(a1)
+; CHECK-NEXT: lw a2, 0(a2)
+; CHECK-NEXT: pmsltu.h a1, a1, a2
+; CHECK-NEXT: sw a1, 0(a0)
+; CHECK-NEXT: ret
+ %a = load <2 x i16>, ptr %a_ptr
+ %b = load <2 x i16>, ptr %b_ptr
+ %cmp = icmp ult <2 x i16> %a, %b
+ %res = sext <2 x i1> %cmp to <2 x i16>
+ store <2 x i16> %res, ptr %ret_ptr
+ ret void
+}
+
+define void @test_ule_h(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
+; CHECK-LABEL: test_ule_h:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lw a1, 0(a1)
+; CHECK-NEXT: lw a2, 0(a2)
+; CHECK-NEXT: pmsltu.h a1, a2, a1
+; CHECK-NEXT: not a1, a1
+; CHECK-NEXT: sw a1, 0(a0)
+; CHECK-NEXT: ret
+ %a = load <2 x i16>, ptr %a_ptr
+ %b = load <2 x i16>, ptr %b_ptr
+ %cmp = icmp ule <2 x i16> %a, %b
+ %res = sext <2 x i1> %cmp to <2 x i16>
+ store <2 x i16> %res, ptr %ret_ptr
+ ret void
+}
+
+define void @test_ugt_h(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
+; CHECK-LABEL: test_ugt_h:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lw a1, 0(a1)
+; CHECK-NEXT: lw a2, 0(a2)
+; CHECK-NEXT: pmsltu.h a1, a2, a1
+; CHECK-NEXT: sw a1, 0(a0)
+; CHECK-NEXT: ret
+ %a = load <2 x i16>, ptr %a_ptr
+ %b = load <2 x i16>, ptr %b_ptr
+ %cmp = icmp ugt <2 x i16> %a, %b
+ %res = sext <2 x i1> %cmp to <2 x i16>
+ store <2 x i16> %res, ptr %ret_ptr
+ ret void
+}
+
+define void @test_uge_h(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
+; CHECK-LABEL: test_uge_h:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lw a1, 0(a1)
+; CHECK-NEXT: lw a2, 0(a2)
+; CHECK-NEXT: pmsltu.h a1, a1, a2
+; CHECK-NEXT: not a1, a1
+; CHECK-NEXT: sw a1, 0(a0)
+; CHECK-NEXT: ret
+ %a = load <2 x i16>, ptr %a_ptr
+ %b = load <2 x i16>, ptr %b_ptr
+ %cmp = icmp uge <2 x i16> %a, %b
+ %res = sext <2 x i1> %cmp to <2 x i16>
+ store <2 x i16> %res, ptr %ret_ptr
+ ret void
+}
+
+; Comparison operations for v4i8
+define void @test_eq_b(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
+; CHECK-LABEL: test_eq_b:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lw a1, 0(a1)
+; CHECK-NEXT: lw a2, 0(a2)
+; CHECK-NEXT: pmseq.b a1, a1, a2
+; CHECK-NEXT: sw a1, 0(a0)
+; CHECK-NEXT: ret
+ %a = load <4 x i8>, ptr %a_ptr
+ %b = load <4 x i8>, ptr %b_ptr
+ %cmp = icmp eq <4 x i8> %a, %b
+ %res = sext <4 x i1> %cmp to <4 x i8>
+ store <4 x i8> %res, ptr %ret_ptr
+ ret void
+}
+
+define void @test_ne_b(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
+; CHECK-LABEL: test_ne_b:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lw a1, 0(a1)
+; CHECK-NEXT: lw a2, 0(a2)
+; CHECK-NEXT: pmseq.b a1, a1, a2
+; CHECK-NEXT: not a1, a1
+; CHECK-NEXT: sw a1, 0(a0)
+; CHECK-NEXT: ret
+ %a = load <4 x i8>, ptr %a_ptr
+ %b = load <4 x i8>, ptr %b_ptr
+ %cmp = icmp ne <4 x i8> %a, %b
+ %res = sext <4 x i1> %cmp to <4 x i8>
+ store <4 x i8> %res, ptr %ret_ptr
+ ret void
+}
+
+define void @test_slt_b(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
+; CHECK-LABEL: test_slt_b:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lw a1, 0(a1)
+; CHECK-NEXT: lw a2, 0(a2)
+; CHECK-NEXT: pmslt.b a1, a1, a2
+; CHECK-NEXT: sw a1, 0(a0)
+; CHECK-NEXT: ret
+ %a = load <4 x i8>, ptr %a_ptr
+ %b = load <4 x i8>, ptr %b_ptr
+ %cmp = icmp slt <4 x i8> %a, %b
+ %res = sext <4 x i1> %cmp to <4 x i8>
+ store <4 x i8> %res, ptr %ret_ptr
+ ret void
+}
+
+define void @test_sle_b(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
+; CHECK-LABEL: test_sle_b:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lw a1, 0(a1)
+; CHECK-NEXT: lw a2, 0(a2)
+; CHECK-NEXT: pmslt.b a1, a2, a1
+; CHECK-NEXT: not a1, a1
+; CHECK-NEXT: sw a1, 0(a0)
+; CHECK-NEXT: ret
+ %a = load <4 x i8>, ptr %a_ptr
+ %b = load <4 x i8>, ptr %b_ptr
+ %cmp = icmp sle <4 x i8> %a, %b
+ %res = sext <4 x i1> %cmp to <4 x i8>
+ store <4 x i8> %res, ptr %ret_ptr
+ ret void
+}
+
+define void @test_sgt_b(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
+; CHECK-LABEL: test_sgt_b:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lw a1, 0(a1)
+; CHECK-NEXT: lw a2, 0(a2)
+; CHECK-NEXT: pmslt.b a1, a2, a1
+; CHECK-NEXT: sw a1, 0(a0)
+; CHECK-NEXT: ret
+ %a = load <4 x i8>, ptr %a_ptr
+ %b = load <4 x i8>, ptr %b_ptr
+ %cmp = icmp sgt <4 x i8> %a, %b
+ %res = sext <4 x i1> %cmp to <4 x i8>
+ store <4 x i8> %res, ptr %ret_ptr
+ ret void
+}
+
+define void @test_sge_b(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
+; CHECK-LABEL: test_sge_b:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lw a1, 0(a1)
+; CHECK-NEXT: lw a2, 0(a2)
+; CHECK-NEXT: pmslt.b a1, a1, a2
+; CHECK-NEXT: not a1, a1
+; CHECK-NEXT: sw a1, 0(a0)
+; CHECK-NEXT: ret
+ %a = load <4 x i8>, ptr %a_ptr
+ %b = load <4 x i8>, ptr %b_ptr
+ %cmp = icmp sge <4 x i8> %a, %b
+ %res = sext <4 x i1> %cmp to <4 x i8>
+ store <4 x i8> %res, ptr %ret_ptr
+ ret void
+}
+
+define void @test_ult_b(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
+; CHECK-LABEL: test_ult_b:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lw a1, 0(a1)
+; CHECK-NEXT: lw a2, 0(a2)
+; CHECK-NEXT: pmsltu.b a1, a1, a2
+; CHECK-NEXT: sw a1, 0(a0)
+; CHECK-NEXT: ret
+ %a = load <4 x i8>, ptr %a_ptr
+ %b = load <4 x i8>, ptr %b_ptr
+ %cmp = icmp ult <4 x i8> %a, %b
+ %res = sext <4 x i1> %cmp to <4 x i8>
+ store <4 x i8> %res, ptr %ret_ptr
+ ret void
+}
+
+define void @test_ule_b(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
+; CHECK-LABEL: test_ule_b:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lw a1, 0(a1)
+; CHECK-NEXT: lw a2, 0(a2)
+; CHECK-NEXT: pmsltu.b a1, a2, a1
+; CHECK-NEXT: not a1, a1
+; CHECK-NEXT: sw a1, 0(a0)
+; CHECK-NEXT: ret
+ %a = load <4 x i8>, ptr %a_ptr
+ %b = load <4 x i8>, ptr %b_ptr
+ %cmp = icmp ule <4 x i8> %a, %b
+ %res = sext <4 x i1> %cmp to <4 x i8>
+ store <4 x i8> %res, ptr %ret_ptr
+ ret void
+}
+
+define void @test_ugt_b(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
+; CHECK-LABEL: test_ugt_b:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lw a1, 0(a1)
+; CHECK-NEXT: lw a2, 0(a2)
+; CHECK-NEXT: pmsltu.b a1, a2, a1
+; CHECK-NEXT: sw a1, 0(a0)
+; CHECK-NEXT: ret
+ %a = load <4 x i8>, ptr %a_ptr
+ %b = load <4 x i8>, ptr %b_ptr
+ %cmp = icmp ugt <4 x i8> %a, %b
+ %res = sext <4 x i1> %cmp to <4 x i8>
+ store <4 x i8> %res, ptr %ret_ptr
+ ret void
+}
+
+define void @test_uge_b(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
+; CHECK-LABEL: test_uge_b:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lw a1, 0(a1)
+; CHECK-NEXT: lw a2, 0(a2)
+; CHECK-NEXT: pmsltu.b a1, a1, a2
+; CHECK-NEXT: not a1, a1
+; CHECK-NEXT: sw a1, 0(a0)
+; CHECK-NEXT: ret
+ %a = load <4 x i8>, ptr %a_ptr
+ %b = load <4 x i8>, ptr %b_ptr
+ %cmp = icmp uge <4 x i8> %a, %b
+ %res = sext <4 x i1> %cmp to <4 x i8>
+ store <4 x i8> %res, ptr %ret_ptr
+ ret void
+}
diff --git a/llvm/test/CodeGen/RISCV/rvp-ext-rv64.ll b/llvm/test/CodeGen/RISCV/rvp-ext-rv64.ll
index dfa1b242e656f..e08871b4e63bf 100644
--- a/llvm/test/CodeGen/RISCV/rvp-ext-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvp-ext-rv64.ll
@@ -1148,3 +1148,501 @@ define void @test_pmulhsu_w_commuted(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
store <2 x i32> %res, ptr %ret_ptr
ret void
}
+
+; Comparison operations for v4i16
+define void @test_eq_h(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
+; CHECK-LABEL: test_eq_h:
+; CHECK: # %bb.0:
+; CHECK-NEXT: ld a1, 0(a1)
+; CHECK-NEXT: ld a2, 0(a2)
+; CHECK-NEXT: pmseq.h a1, a1, a2
+; CHECK-NEXT: sd a1, 0(a0)
+; CHECK-NEXT: ret
+ %a = load <4 x i16>, ptr %a_ptr
+ %b = load <4 x i16>, ptr %b_ptr
+ %cmp = icmp eq <4 x i16> %a, %b
+ %res = sext <4 x i1> %cmp to <4 x i16>
+ store <4 x i16> %res, ptr %ret_ptr
+ ret void
+}
+
+define void @test_ne_h(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
+; CHECK-LABEL: test_ne_h:
+; CHECK: # %bb.0:
+; CHECK-NEXT: ld a1, 0(a1)
+; CHECK-NEXT: ld a2, 0(a2)
+; CHECK-NEXT: pmseq.h a1, a1, a2
+; CHECK-NEXT: not a1, a1
+; CHECK-NEXT: sd a1, 0(a0)
+; CHECK-NEXT: ret
+ %a = load <4 x i16>, ptr %a_ptr
+ %b = load <4 x i16>, ptr %b_ptr
+ %cmp = icmp ne <4 x i16> %a, %b
+ %res = sext <4 x i1> %cmp to <4 x i16>
+ store <4 x i16> %res, ptr %ret_ptr
+ ret void
+}
+
+define void @test_slt_h(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
+; CHECK-LABEL: test_slt_h:
+; CHECK: # %bb.0:
+; CHECK-NEXT: ld a1, 0(a1)
+; CHECK-NEXT: ld a2, 0(a2)
+; CHECK-NEXT: pmslt.h a1, a1, a2
+; CHECK-NEXT: sd a1, 0(a0)
+; CHECK-NEXT: ret
+ %a = load <4 x i16>, ptr %a_ptr
+ %b = load <4 x i16>, ptr %b_ptr
+ %cmp = icmp slt <4 x i16> %a, %b
+ %res = sext <4 x i1> %cmp to <4 x i16>
+ store <4 x i16> %res, ptr %ret_ptr
+ ret void
+}
+
+define void @test_sle_h(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
+; CHECK-LABEL: test_sle_h:
+; CHECK: # %bb.0:
+; CHECK-NEXT: ld a1, 0(a1)
+; CHECK-NEXT: ld a2, 0(a2)
+; CHECK-NEXT: pmslt.h a1, a2, a1
+; CHECK-NEXT: not a1, a1
+; CHECK-NEXT: sd a1, 0(a0)
+; CHECK-NEXT: ret
+ %a = load <4 x i16>, ptr %a_ptr
+ %b = load <4 x i16>, ptr %b_ptr
+ %cmp = icmp sle <4 x i16> %a, %b
+ %res = sext <4 x i1> %cmp to <4 x i16>
+ store <4 x i16> %res, ptr %ret_ptr
+ ret void
+}
+
+define void @test_sgt_h(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
+; CHECK-LABEL: test_sgt_h:
+; CHECK: # %bb.0:
+; CHECK-NEXT: ld a1, 0(a1)
+; CHECK-NEXT: ld a2, 0(a2)
+; CHECK-NEXT: pmslt.h a1, a2, a1
+; CHECK-NEXT: sd a1, 0(a0)
+; CHECK-NEXT: ret
+ %a = load <4 x i16>, ptr %a_ptr
+ %b = load <4 x i16>, ptr %b_ptr
+ %cmp = icmp sgt <4 x i16> %a, %b
+ %res = sext <4 x i1> %cmp to <4 x i16>
+ store <4 x i16> %res, ptr %ret_ptr
+ ret void
+}
+
+define void @test_sge_h(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
+; CHECK-LABEL: test_sge_h:
+; CHECK: # %bb.0:
+; CHECK-NEXT: ld a1, 0(a1)
+; CHECK-NEXT: ld a2, 0(a2)
+; CHECK-NEXT: pmslt.h a1, a1, a2
+; CHECK-NEXT: not a1, a1
+; CHECK-NEXT: sd a1, 0(a0)
+; CHECK-NEXT: ret
+ %a = load <4 x i16>, ptr %a_ptr
+ %b = load <4 x i16>, ptr %b_ptr
+ %cmp = icmp sge <4 x i16> %a, %b
+ %res = sext <4 x i1> %cmp to <4 x i16>
+ store <4 x i16> %res, ptr %ret_ptr
+ ret void
+}
+
+define void @test_ult_h(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
+; CHECK-LABEL: test_ult_h:
+; CHECK: # %bb.0:
+; CHECK-NEXT: ld a1, 0(a1)
+; CHECK-NEXT: ld a2, 0(a2)
+; CHECK-NEXT: pmsltu.h a1, a1, a2
+; CHECK-NEXT: sd a1, 0(a0)
+; CHECK-NEXT: ret
+ %a = load <4 x i16>, ptr %a_ptr
+ %b = load <4 x i16>, ptr %b_ptr
+ %cmp = icmp ult <4 x i16> %a, %b
+ %res = sext <4 x i1> %cmp to <4 x i16>
+ store <4 x i16> %res, ptr %ret_ptr
+ ret void
+}
+
+define void @test_ule_h(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
+; CHECK-LABEL: test_ule_h:
+; CHECK: # %bb.0:
+; CHECK-NEXT: ld a1, 0(a1)
+; CHECK-NEXT: ld a2, 0(a2)
+; CHECK-NEXT: pmsltu.h a1, a2, a1
+; CHECK-NEXT: not a1, a1
+; CHECK-NEXT: sd a1, 0(a0)
+; CHECK-NEXT: ret
+ %a = load <4 x i16>, ptr %a_ptr
+ %b = load <4 x i16>, ptr %b_ptr
+ %cmp = icmp ule <4 x i16> %a, %b
+ %res = sext <4 x i1> %cmp to <4 x...
[truncated]
|
| (PMSLTU_H GPR:$rs1, GPR:$rs2)>; | ||
| // a <= b => !(b < a) | ||
| def: Pat<(XLenVecI8VT (setcc (XLenVecI8VT GPR:$rs1), (XLenVecI8VT GPR:$rs2), SETLE)), | ||
| (XORI (PMSLT_B GPR:$rs2, GPR:$rs1), -1)>; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The rs1 and rs2 names belong to the instruction. They should always be (PMSLT_B GPR:$rs1, GPR:$rs2). The input pattern is where the swap should occur.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Uh I see
🪟 Windows x64 Test Results
✅ The build succeeded and all tests passed. |
| setOperationAction(ISD::EXTRACT_VECTOR_ELT, VTs, Custom); | ||
| setOperationAction(ISD::SETCC, VTs, Legal); | ||
| // P extension vector comparisons produce all 1s for true, all 0s for false | ||
| setBooleanVectorContents(ZeroOrNegativeOneBooleanContent); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This will affect scalable vectors too, right? I think this could turn into a problem
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think it's only a problem if P and V are enabled at the same time. They each have their own setBooleanVectorContents calls.
I think we can change V to use ZeroOrOneBooleanContent with minimal impact.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
do you mean change P to use ZeroOrOneBooleanContent and add custom combine for it?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Oops I meant to write "I think we can change V to use ZeroOrNegativeOneBooleanContent with minimal impact." I wrote that patch several years ago because of the old P extension proposal. I think I posted it, but I can't find it. It might have been in phabricator. It was not approved.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Oh I found it by searching my email https://reviews.llvm.org/D103758
topperc
left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
| setOperationAction({ISD::SHL, ISD::SRL, ISD::SRA}, VTs, Custom); | ||
| setOperationAction(ISD::BITCAST, VTs, Custom); | ||
| setOperationAction(ISD::EXTRACT_VECTOR_ELT, VTs, Custom); | ||
| setOperationAction(ISD::SETCC, VTs, Legal); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can we do
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 43598eec5137..aa3248b9cf5f 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -547,6 +547,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::BITCAST, VTs, Custom);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, VTs, Custom);
setOperationAction(ISD::SETCC, VTs, Legal);
+ setCondCodeAction({ISD::SETNE, ISD::SETGT, ISD::SETGE, ISD::SETUGT, ISD::SETUGE, ISD::SETULE, ISD::SETLE}, VTs, Expand);
// P extension vector comparisons produce all 1s for true, all 0s for false
setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
}
And remove all of the patterns except EQ, LT, LTU? We'll need the bitwise operation patch too.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
yes we can, but I need to first get #174598 merged to make xor pattern able to be selected lol
Result type of P extension's comparison instructions is same as operands
and the result bits are all 1s or 0s so we need to set
ZeroOrNegativeOneBooleanContent to make sext(setcc) auto combined.