diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index b23bffba4a0e0..db1973fc27326 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -554,6 +554,12 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, setOperationAction({ISD::SDIV, ISD::UDIV, ISD::SREM, ISD::UREM, ISD::SDIVREM, ISD::UDIVREM}, VTs, Expand); + setOperationAction(ISD::SETCC, VTs, Legal); + setCondCodeAction({ISD::SETNE, ISD::SETGT, ISD::SETGE, ISD::SETUGT, + ISD::SETUGE, ISD::SETULE, ISD::SETLE}, + VTs, Expand); + // P extension vector comparisons produce all 1s for true, all 0s for false + setBooleanVectorContents(ZeroOrNegativeOneBooleanContent); } if (Subtarget.hasStdExtZfbfmin()) { diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoP.td b/llvm/lib/Target/RISCV/RISCVInstrInfoP.td index 6603ece86c7b6..d2b122df62264 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoP.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoP.td @@ -1596,6 +1596,22 @@ let Predicates = [HasStdExtP] in { // // splat pattern def: Pat<(XLenVecI8VT (splat_vector (XLenVT GPR:$rs2))), (PADD_BS (XLenVT X0), GPR:$rs2)>; def: Pat<(XLenVecI16VT (splat_vector (XLenVT GPR:$rs2))), (PADD_HS (XLenVT X0), GPR:$rs2)>; + + // 8/16-bit comparison patterns (result is all 1s or all 0s per element) + // a == b + def: Pat<(XLenVecI8VT (setcc (XLenVecI8VT GPR:$rs1), (XLenVecI8VT GPR:$rs2), SETEQ)), + (PMSEQ_B GPR:$rs1, GPR:$rs2)>; + def: Pat<(XLenVecI16VT (setcc (XLenVecI16VT GPR:$rs1), (XLenVecI16VT GPR:$rs2), SETEQ)), + (PMSEQ_H GPR:$rs1, GPR:$rs2)>; + // a < b + def: Pat<(XLenVecI8VT (setcc (XLenVecI8VT GPR:$rs1), (XLenVecI8VT GPR:$rs2), SETLT)), + (PMSLT_B GPR:$rs1, GPR:$rs2)>; + def: Pat<(XLenVecI8VT (setcc (XLenVecI8VT GPR:$rs1), (XLenVecI8VT GPR:$rs2), SETULT)), + (PMSLTU_B GPR:$rs1, GPR:$rs2)>; + def: Pat<(XLenVecI16VT (setcc (XLenVecI16VT GPR:$rs1), (XLenVecI16VT GPR:$rs2), SETLT)), + (PMSLT_H GPR:$rs1, GPR:$rs2)>; + def: Pat<(XLenVecI16VT (setcc (XLenVecI16VT GPR:$rs1), (XLenVecI16VT GPR:$rs2), SETULT)), + (PMSLTU_H GPR:$rs1, GPR:$rs2)>; } // Predicates = [HasStdExtP] let Predicates = [HasStdExtP, IsRV32] in { @@ -1679,6 +1695,21 @@ let Predicates = [HasStdExtP, IsRV64] in { // splat pattern def: Pat<(v2i32 (splat_vector (XLenVT GPR:$rs2))), (PADD_WS (XLenVT X0), GPR:$rs2)>; + // 32-bit comparison patterns (result is all 1s or all 0s per element) + // a == b + def: Pat<(v2i32 (setcc (v2i32 GPR:$rs1), (v2i32 GPR:$rs2), SETEQ)), + (PMSEQ_W GPR:$rs1, GPR:$rs2)>; + // a < b + def: Pat<(v2i32 (setcc (v2i32 GPR:$rs1), (v2i32 GPR:$rs2), SETLT)), + (PMSLT_W GPR:$rs1, GPR:$rs2)>; + def: Pat<(v2i32 (setcc (v2i32 GPR:$rs1), (v2i32 GPR:$rs2), SETULT)), + (PMSLTU_W GPR:$rs1, GPR:$rs2)>; + // a > b => b < a + def: Pat<(v2i32 (setcc (v2i32 GPR:$rs2), (v2i32 GPR:$rs1), SETGT)), + (PMSLT_W GPR:$rs1, GPR:$rs2)>; + def: Pat<(v2i32 (setcc (v2i32 GPR:$rs2), (v2i32 GPR:$rs1), SETUGT)), + (PMSLTU_W GPR:$rs1, GPR:$rs2)>; + // 32-bit logical shift left/right patterns def: Pat<(v2i32 (shl GPR:$rs1, (v2i32 (splat_vector uimm5:$shamt)))), (PSLLI_W GPR:$rs1, uimm5:$shamt)>; diff --git a/llvm/test/CodeGen/RISCV/rvp-ext-rv32.ll b/llvm/test/CodeGen/RISCV/rvp-ext-rv32.ll index 6497203c70c73..10964a64b576a 100644 --- a/llvm/test/CodeGen/RISCV/rvp-ext-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvp-ext-rv32.ll @@ -1687,3 +1687,335 @@ define void @test_purem_b(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) { store <4 x i8> %res, ptr %ret_ptr ret void } + +; Comparison operations for v2i16 +define void @test_eq_h(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) { +; CHECK-LABEL: test_eq_h: +; CHECK: # %bb.0: +; CHECK-NEXT: lw a1, 0(a1) +; CHECK-NEXT: lw a2, 0(a2) +; CHECK-NEXT: pmseq.h a1, a1, a2 +; CHECK-NEXT: sw a1, 0(a0) +; CHECK-NEXT: ret + %a = load <2 x i16>, ptr %a_ptr + %b = load <2 x i16>, ptr %b_ptr + %cmp = icmp eq <2 x i16> %a, %b + %res = sext <2 x i1> %cmp to <2 x i16> + store <2 x i16> %res, ptr %ret_ptr + ret void +} + +define void @test_ne_h(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) { +; CHECK-LABEL: test_ne_h: +; CHECK: # %bb.0: +; CHECK-NEXT: lw a1, 0(a1) +; CHECK-NEXT: lw a2, 0(a2) +; CHECK-NEXT: pmseq.h a1, a1, a2 +; CHECK-NEXT: not a1, a1 +; CHECK-NEXT: sw a1, 0(a0) +; CHECK-NEXT: ret + %a = load <2 x i16>, ptr %a_ptr + %b = load <2 x i16>, ptr %b_ptr + %cmp = icmp ne <2 x i16> %a, %b + %res = sext <2 x i1> %cmp to <2 x i16> + store <2 x i16> %res, ptr %ret_ptr + ret void +} + +define void @test_slt_h(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) { +; CHECK-LABEL: test_slt_h: +; CHECK: # %bb.0: +; CHECK-NEXT: lw a1, 0(a1) +; CHECK-NEXT: lw a2, 0(a2) +; CHECK-NEXT: pmslt.h a1, a1, a2 +; CHECK-NEXT: sw a1, 0(a0) +; CHECK-NEXT: ret + %a = load <2 x i16>, ptr %a_ptr + %b = load <2 x i16>, ptr %b_ptr + %cmp = icmp slt <2 x i16> %a, %b + %res = sext <2 x i1> %cmp to <2 x i16> + store <2 x i16> %res, ptr %ret_ptr + ret void +} + +define void @test_sle_h(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) { +; CHECK-LABEL: test_sle_h: +; CHECK: # %bb.0: +; CHECK-NEXT: lw a1, 0(a1) +; CHECK-NEXT: lw a2, 0(a2) +; CHECK-NEXT: pmslt.h a1, a2, a1 +; CHECK-NEXT: not a1, a1 +; CHECK-NEXT: sw a1, 0(a0) +; CHECK-NEXT: ret + %a = load <2 x i16>, ptr %a_ptr + %b = load <2 x i16>, ptr %b_ptr + %cmp = icmp sle <2 x i16> %a, %b + %res = sext <2 x i1> %cmp to <2 x i16> + store <2 x i16> %res, ptr %ret_ptr + ret void +} + +define void @test_sgt_h(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) { +; CHECK-LABEL: test_sgt_h: +; CHECK: # %bb.0: +; CHECK-NEXT: lw a1, 0(a1) +; CHECK-NEXT: lw a2, 0(a2) +; CHECK-NEXT: pmslt.h a1, a2, a1 +; CHECK-NEXT: sw a1, 0(a0) +; CHECK-NEXT: ret + %a = load <2 x i16>, ptr %a_ptr + %b = load <2 x i16>, ptr %b_ptr + %cmp = icmp sgt <2 x i16> %a, %b + %res = sext <2 x i1> %cmp to <2 x i16> + store <2 x i16> %res, ptr %ret_ptr + ret void +} + +define void @test_sge_h(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) { +; CHECK-LABEL: test_sge_h: +; CHECK: # %bb.0: +; CHECK-NEXT: lw a1, 0(a1) +; CHECK-NEXT: lw a2, 0(a2) +; CHECK-NEXT: pmslt.h a1, a1, a2 +; CHECK-NEXT: not a1, a1 +; CHECK-NEXT: sw a1, 0(a0) +; CHECK-NEXT: ret + %a = load <2 x i16>, ptr %a_ptr + %b = load <2 x i16>, ptr %b_ptr + %cmp = icmp sge <2 x i16> %a, %b + %res = sext <2 x i1> %cmp to <2 x i16> + store <2 x i16> %res, ptr %ret_ptr + ret void +} + +define void @test_ult_h(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) { +; CHECK-LABEL: test_ult_h: +; CHECK: # %bb.0: +; CHECK-NEXT: lw a1, 0(a1) +; CHECK-NEXT: lw a2, 0(a2) +; CHECK-NEXT: pmsltu.h a1, a1, a2 +; CHECK-NEXT: sw a1, 0(a0) +; CHECK-NEXT: ret + %a = load <2 x i16>, ptr %a_ptr + %b = load <2 x i16>, ptr %b_ptr + %cmp = icmp ult <2 x i16> %a, %b + %res = sext <2 x i1> %cmp to <2 x i16> + store <2 x i16> %res, ptr %ret_ptr + ret void +} + +define void @test_ule_h(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) { +; CHECK-LABEL: test_ule_h: +; CHECK: # %bb.0: +; CHECK-NEXT: lw a1, 0(a1) +; CHECK-NEXT: lw a2, 0(a2) +; CHECK-NEXT: pmsltu.h a1, a2, a1 +; CHECK-NEXT: not a1, a1 +; CHECK-NEXT: sw a1, 0(a0) +; CHECK-NEXT: ret + %a = load <2 x i16>, ptr %a_ptr + %b = load <2 x i16>, ptr %b_ptr + %cmp = icmp ule <2 x i16> %a, %b + %res = sext <2 x i1> %cmp to <2 x i16> + store <2 x i16> %res, ptr %ret_ptr + ret void +} + +define void @test_ugt_h(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) { +; CHECK-LABEL: test_ugt_h: +; CHECK: # %bb.0: +; CHECK-NEXT: lw a1, 0(a1) +; CHECK-NEXT: lw a2, 0(a2) +; CHECK-NEXT: pmsltu.h a1, a2, a1 +; CHECK-NEXT: sw a1, 0(a0) +; CHECK-NEXT: ret + %a = load <2 x i16>, ptr %a_ptr + %b = load <2 x i16>, ptr %b_ptr + %cmp = icmp ugt <2 x i16> %a, %b + %res = sext <2 x i1> %cmp to <2 x i16> + store <2 x i16> %res, ptr %ret_ptr + ret void +} + +define void @test_uge_h(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) { +; CHECK-LABEL: test_uge_h: +; CHECK: # %bb.0: +; CHECK-NEXT: lw a1, 0(a1) +; CHECK-NEXT: lw a2, 0(a2) +; CHECK-NEXT: pmsltu.h a1, a1, a2 +; CHECK-NEXT: not a1, a1 +; CHECK-NEXT: sw a1, 0(a0) +; CHECK-NEXT: ret + %a = load <2 x i16>, ptr %a_ptr + %b = load <2 x i16>, ptr %b_ptr + %cmp = icmp uge <2 x i16> %a, %b + %res = sext <2 x i1> %cmp to <2 x i16> + store <2 x i16> %res, ptr %ret_ptr + ret void +} + +; Comparison operations for v4i8 +define void @test_eq_b(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) { +; CHECK-LABEL: test_eq_b: +; CHECK: # %bb.0: +; CHECK-NEXT: lw a1, 0(a1) +; CHECK-NEXT: lw a2, 0(a2) +; CHECK-NEXT: pmseq.b a1, a1, a2 +; CHECK-NEXT: sw a1, 0(a0) +; CHECK-NEXT: ret + %a = load <4 x i8>, ptr %a_ptr + %b = load <4 x i8>, ptr %b_ptr + %cmp = icmp eq <4 x i8> %a, %b + %res = sext <4 x i1> %cmp to <4 x i8> + store <4 x i8> %res, ptr %ret_ptr + ret void +} + +define void @test_ne_b(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) { +; CHECK-LABEL: test_ne_b: +; CHECK: # %bb.0: +; CHECK-NEXT: lw a1, 0(a1) +; CHECK-NEXT: lw a2, 0(a2) +; CHECK-NEXT: pmseq.b a1, a1, a2 +; CHECK-NEXT: not a1, a1 +; CHECK-NEXT: sw a1, 0(a0) +; CHECK-NEXT: ret + %a = load <4 x i8>, ptr %a_ptr + %b = load <4 x i8>, ptr %b_ptr + %cmp = icmp ne <4 x i8> %a, %b + %res = sext <4 x i1> %cmp to <4 x i8> + store <4 x i8> %res, ptr %ret_ptr + ret void +} + +define void @test_slt_b(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) { +; CHECK-LABEL: test_slt_b: +; CHECK: # %bb.0: +; CHECK-NEXT: lw a1, 0(a1) +; CHECK-NEXT: lw a2, 0(a2) +; CHECK-NEXT: pmslt.b a1, a1, a2 +; CHECK-NEXT: sw a1, 0(a0) +; CHECK-NEXT: ret + %a = load <4 x i8>, ptr %a_ptr + %b = load <4 x i8>, ptr %b_ptr + %cmp = icmp slt <4 x i8> %a, %b + %res = sext <4 x i1> %cmp to <4 x i8> + store <4 x i8> %res, ptr %ret_ptr + ret void +} + +define void @test_sle_b(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) { +; CHECK-LABEL: test_sle_b: +; CHECK: # %bb.0: +; CHECK-NEXT: lw a1, 0(a1) +; CHECK-NEXT: lw a2, 0(a2) +; CHECK-NEXT: pmslt.b a1, a2, a1 +; CHECK-NEXT: not a1, a1 +; CHECK-NEXT: sw a1, 0(a0) +; CHECK-NEXT: ret + %a = load <4 x i8>, ptr %a_ptr + %b = load <4 x i8>, ptr %b_ptr + %cmp = icmp sle <4 x i8> %a, %b + %res = sext <4 x i1> %cmp to <4 x i8> + store <4 x i8> %res, ptr %ret_ptr + ret void +} + +define void @test_sgt_b(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) { +; CHECK-LABEL: test_sgt_b: +; CHECK: # %bb.0: +; CHECK-NEXT: lw a1, 0(a1) +; CHECK-NEXT: lw a2, 0(a2) +; CHECK-NEXT: pmslt.b a1, a2, a1 +; CHECK-NEXT: sw a1, 0(a0) +; CHECK-NEXT: ret + %a = load <4 x i8>, ptr %a_ptr + %b = load <4 x i8>, ptr %b_ptr + %cmp = icmp sgt <4 x i8> %a, %b + %res = sext <4 x i1> %cmp to <4 x i8> + store <4 x i8> %res, ptr %ret_ptr + ret void +} + +define void @test_sge_b(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) { +; CHECK-LABEL: test_sge_b: +; CHECK: # %bb.0: +; CHECK-NEXT: lw a1, 0(a1) +; CHECK-NEXT: lw a2, 0(a2) +; CHECK-NEXT: pmslt.b a1, a1, a2 +; CHECK-NEXT: not a1, a1 +; CHECK-NEXT: sw a1, 0(a0) +; CHECK-NEXT: ret + %a = load <4 x i8>, ptr %a_ptr + %b = load <4 x i8>, ptr %b_ptr + %cmp = icmp sge <4 x i8> %a, %b + %res = sext <4 x i1> %cmp to <4 x i8> + store <4 x i8> %res, ptr %ret_ptr + ret void +} + +define void @test_ult_b(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) { +; CHECK-LABEL: test_ult_b: +; CHECK: # %bb.0: +; CHECK-NEXT: lw a1, 0(a1) +; CHECK-NEXT: lw a2, 0(a2) +; CHECK-NEXT: pmsltu.b a1, a1, a2 +; CHECK-NEXT: sw a1, 0(a0) +; CHECK-NEXT: ret + %a = load <4 x i8>, ptr %a_ptr + %b = load <4 x i8>, ptr %b_ptr + %cmp = icmp ult <4 x i8> %a, %b + %res = sext <4 x i1> %cmp to <4 x i8> + store <4 x i8> %res, ptr %ret_ptr + ret void +} + +define void @test_ule_b(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) { +; CHECK-LABEL: test_ule_b: +; CHECK: # %bb.0: +; CHECK-NEXT: lw a1, 0(a1) +; CHECK-NEXT: lw a2, 0(a2) +; CHECK-NEXT: pmsltu.b a1, a2, a1 +; CHECK-NEXT: not a1, a1 +; CHECK-NEXT: sw a1, 0(a0) +; CHECK-NEXT: ret + %a = load <4 x i8>, ptr %a_ptr + %b = load <4 x i8>, ptr %b_ptr + %cmp = icmp ule <4 x i8> %a, %b + %res = sext <4 x i1> %cmp to <4 x i8> + store <4 x i8> %res, ptr %ret_ptr + ret void +} + +define void @test_ugt_b(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) { +; CHECK-LABEL: test_ugt_b: +; CHECK: # %bb.0: +; CHECK-NEXT: lw a1, 0(a1) +; CHECK-NEXT: lw a2, 0(a2) +; CHECK-NEXT: pmsltu.b a1, a2, a1 +; CHECK-NEXT: sw a1, 0(a0) +; CHECK-NEXT: ret + %a = load <4 x i8>, ptr %a_ptr + %b = load <4 x i8>, ptr %b_ptr + %cmp = icmp ugt <4 x i8> %a, %b + %res = sext <4 x i1> %cmp to <4 x i8> + store <4 x i8> %res, ptr %ret_ptr + ret void +} + +define void @test_uge_b(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) { +; CHECK-LABEL: test_uge_b: +; CHECK: # %bb.0: +; CHECK-NEXT: lw a1, 0(a1) +; CHECK-NEXT: lw a2, 0(a2) +; CHECK-NEXT: pmsltu.b a1, a1, a2 +; CHECK-NEXT: not a1, a1 +; CHECK-NEXT: sw a1, 0(a0) +; CHECK-NEXT: ret + %a = load <4 x i8>, ptr %a_ptr + %b = load <4 x i8>, ptr %b_ptr + %cmp = icmp uge <4 x i8> %a, %b + %res = sext <4 x i1> %cmp to <4 x i8> + store <4 x i8> %res, ptr %ret_ptr + ret void +} diff --git a/llvm/test/CodeGen/RISCV/rvp-ext-rv64.ll b/llvm/test/CodeGen/RISCV/rvp-ext-rv64.ll index db5a510ae422b..48eda85f375cb 100644 --- a/llvm/test/CodeGen/RISCV/rvp-ext-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvp-ext-rv64.ll @@ -1812,3 +1812,501 @@ define void @test_purem_w(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) { store <2 x i32> %res, ptr %ret_ptr ret void } + +; Comparison operations for v4i16 +define void @test_eq_h(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) { +; CHECK-LABEL: test_eq_h: +; CHECK: # %bb.0: +; CHECK-NEXT: ld a1, 0(a1) +; CHECK-NEXT: ld a2, 0(a2) +; CHECK-NEXT: pmseq.h a1, a1, a2 +; CHECK-NEXT: sd a1, 0(a0) +; CHECK-NEXT: ret + %a = load <4 x i16>, ptr %a_ptr + %b = load <4 x i16>, ptr %b_ptr + %cmp = icmp eq <4 x i16> %a, %b + %res = sext <4 x i1> %cmp to <4 x i16> + store <4 x i16> %res, ptr %ret_ptr + ret void +} + +define void @test_ne_h(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) { +; CHECK-LABEL: test_ne_h: +; CHECK: # %bb.0: +; CHECK-NEXT: ld a1, 0(a1) +; CHECK-NEXT: ld a2, 0(a2) +; CHECK-NEXT: pmseq.h a1, a1, a2 +; CHECK-NEXT: not a1, a1 +; CHECK-NEXT: sd a1, 0(a0) +; CHECK-NEXT: ret + %a = load <4 x i16>, ptr %a_ptr + %b = load <4 x i16>, ptr %b_ptr + %cmp = icmp ne <4 x i16> %a, %b + %res = sext <4 x i1> %cmp to <4 x i16> + store <4 x i16> %res, ptr %ret_ptr + ret void +} + +define void @test_slt_h(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) { +; CHECK-LABEL: test_slt_h: +; CHECK: # %bb.0: +; CHECK-NEXT: ld a1, 0(a1) +; CHECK-NEXT: ld a2, 0(a2) +; CHECK-NEXT: pmslt.h a1, a1, a2 +; CHECK-NEXT: sd a1, 0(a0) +; CHECK-NEXT: ret + %a = load <4 x i16>, ptr %a_ptr + %b = load <4 x i16>, ptr %b_ptr + %cmp = icmp slt <4 x i16> %a, %b + %res = sext <4 x i1> %cmp to <4 x i16> + store <4 x i16> %res, ptr %ret_ptr + ret void +} + +define void @test_sle_h(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) { +; CHECK-LABEL: test_sle_h: +; CHECK: # %bb.0: +; CHECK-NEXT: ld a1, 0(a1) +; CHECK-NEXT: ld a2, 0(a2) +; CHECK-NEXT: pmslt.h a1, a2, a1 +; CHECK-NEXT: not a1, a1 +; CHECK-NEXT: sd a1, 0(a0) +; CHECK-NEXT: ret + %a = load <4 x i16>, ptr %a_ptr + %b = load <4 x i16>, ptr %b_ptr + %cmp = icmp sle <4 x i16> %a, %b + %res = sext <4 x i1> %cmp to <4 x i16> + store <4 x i16> %res, ptr %ret_ptr + ret void +} + +define void @test_sgt_h(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) { +; CHECK-LABEL: test_sgt_h: +; CHECK: # %bb.0: +; CHECK-NEXT: ld a1, 0(a1) +; CHECK-NEXT: ld a2, 0(a2) +; CHECK-NEXT: pmslt.h a1, a2, a1 +; CHECK-NEXT: sd a1, 0(a0) +; CHECK-NEXT: ret + %a = load <4 x i16>, ptr %a_ptr + %b = load <4 x i16>, ptr %b_ptr + %cmp = icmp sgt <4 x i16> %a, %b + %res = sext <4 x i1> %cmp to <4 x i16> + store <4 x i16> %res, ptr %ret_ptr + ret void +} + +define void @test_sge_h(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) { +; CHECK-LABEL: test_sge_h: +; CHECK: # %bb.0: +; CHECK-NEXT: ld a1, 0(a1) +; CHECK-NEXT: ld a2, 0(a2) +; CHECK-NEXT: pmslt.h a1, a1, a2 +; CHECK-NEXT: not a1, a1 +; CHECK-NEXT: sd a1, 0(a0) +; CHECK-NEXT: ret + %a = load <4 x i16>, ptr %a_ptr + %b = load <4 x i16>, ptr %b_ptr + %cmp = icmp sge <4 x i16> %a, %b + %res = sext <4 x i1> %cmp to <4 x i16> + store <4 x i16> %res, ptr %ret_ptr + ret void +} + +define void @test_ult_h(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) { +; CHECK-LABEL: test_ult_h: +; CHECK: # %bb.0: +; CHECK-NEXT: ld a1, 0(a1) +; CHECK-NEXT: ld a2, 0(a2) +; CHECK-NEXT: pmsltu.h a1, a1, a2 +; CHECK-NEXT: sd a1, 0(a0) +; CHECK-NEXT: ret + %a = load <4 x i16>, ptr %a_ptr + %b = load <4 x i16>, ptr %b_ptr + %cmp = icmp ult <4 x i16> %a, %b + %res = sext <4 x i1> %cmp to <4 x i16> + store <4 x i16> %res, ptr %ret_ptr + ret void +} + +define void @test_ule_h(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) { +; CHECK-LABEL: test_ule_h: +; CHECK: # %bb.0: +; CHECK-NEXT: ld a1, 0(a1) +; CHECK-NEXT: ld a2, 0(a2) +; CHECK-NEXT: pmsltu.h a1, a2, a1 +; CHECK-NEXT: not a1, a1 +; CHECK-NEXT: sd a1, 0(a0) +; CHECK-NEXT: ret + %a = load <4 x i16>, ptr %a_ptr + %b = load <4 x i16>, ptr %b_ptr + %cmp = icmp ule <4 x i16> %a, %b + %res = sext <4 x i1> %cmp to <4 x i16> + store <4 x i16> %res, ptr %ret_ptr + ret void +} + +define void @test_ugt_h(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) { +; CHECK-LABEL: test_ugt_h: +; CHECK: # %bb.0: +; CHECK-NEXT: ld a1, 0(a1) +; CHECK-NEXT: ld a2, 0(a2) +; CHECK-NEXT: pmsltu.h a1, a2, a1 +; CHECK-NEXT: sd a1, 0(a0) +; CHECK-NEXT: ret + %a = load <4 x i16>, ptr %a_ptr + %b = load <4 x i16>, ptr %b_ptr + %cmp = icmp ugt <4 x i16> %a, %b + %res = sext <4 x i1> %cmp to <4 x i16> + store <4 x i16> %res, ptr %ret_ptr + ret void +} + +define void @test_uge_h(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) { +; CHECK-LABEL: test_uge_h: +; CHECK: # %bb.0: +; CHECK-NEXT: ld a1, 0(a1) +; CHECK-NEXT: ld a2, 0(a2) +; CHECK-NEXT: pmsltu.h a1, a1, a2 +; CHECK-NEXT: not a1, a1 +; CHECK-NEXT: sd a1, 0(a0) +; CHECK-NEXT: ret + %a = load <4 x i16>, ptr %a_ptr + %b = load <4 x i16>, ptr %b_ptr + %cmp = icmp uge <4 x i16> %a, %b + %res = sext <4 x i1> %cmp to <4 x i16> + store <4 x i16> %res, ptr %ret_ptr + ret void +} + +; Comparison operations for v8i8 +define void @test_eq_b(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) { +; CHECK-LABEL: test_eq_b: +; CHECK: # %bb.0: +; CHECK-NEXT: ld a1, 0(a1) +; CHECK-NEXT: ld a2, 0(a2) +; CHECK-NEXT: pmseq.b a1, a1, a2 +; CHECK-NEXT: sd a1, 0(a0) +; CHECK-NEXT: ret + %a = load <8 x i8>, ptr %a_ptr + %b = load <8 x i8>, ptr %b_ptr + %cmp = icmp eq <8 x i8> %a, %b + %res = sext <8 x i1> %cmp to <8 x i8> + store <8 x i8> %res, ptr %ret_ptr + ret void +} + +define void @test_ne_b(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) { +; CHECK-LABEL: test_ne_b: +; CHECK: # %bb.0: +; CHECK-NEXT: ld a1, 0(a1) +; CHECK-NEXT: ld a2, 0(a2) +; CHECK-NEXT: pmseq.b a1, a1, a2 +; CHECK-NEXT: not a1, a1 +; CHECK-NEXT: sd a1, 0(a0) +; CHECK-NEXT: ret + %a = load <8 x i8>, ptr %a_ptr + %b = load <8 x i8>, ptr %b_ptr + %cmp = icmp ne <8 x i8> %a, %b + %res = sext <8 x i1> %cmp to <8 x i8> + store <8 x i8> %res, ptr %ret_ptr + ret void +} + +define void @test_slt_b(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) { +; CHECK-LABEL: test_slt_b: +; CHECK: # %bb.0: +; CHECK-NEXT: ld a1, 0(a1) +; CHECK-NEXT: ld a2, 0(a2) +; CHECK-NEXT: pmslt.b a1, a1, a2 +; CHECK-NEXT: sd a1, 0(a0) +; CHECK-NEXT: ret + %a = load <8 x i8>, ptr %a_ptr + %b = load <8 x i8>, ptr %b_ptr + %cmp = icmp slt <8 x i8> %a, %b + %res = sext <8 x i1> %cmp to <8 x i8> + store <8 x i8> %res, ptr %ret_ptr + ret void +} + +define void @test_sle_b(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) { +; CHECK-LABEL: test_sle_b: +; CHECK: # %bb.0: +; CHECK-NEXT: ld a1, 0(a1) +; CHECK-NEXT: ld a2, 0(a2) +; CHECK-NEXT: pmslt.b a1, a2, a1 +; CHECK-NEXT: not a1, a1 +; CHECK-NEXT: sd a1, 0(a0) +; CHECK-NEXT: ret + %a = load <8 x i8>, ptr %a_ptr + %b = load <8 x i8>, ptr %b_ptr + %cmp = icmp sle <8 x i8> %a, %b + %res = sext <8 x i1> %cmp to <8 x i8> + store <8 x i8> %res, ptr %ret_ptr + ret void +} + +define void @test_sgt_b(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) { +; CHECK-LABEL: test_sgt_b: +; CHECK: # %bb.0: +; CHECK-NEXT: ld a1, 0(a1) +; CHECK-NEXT: ld a2, 0(a2) +; CHECK-NEXT: pmslt.b a1, a2, a1 +; CHECK-NEXT: sd a1, 0(a0) +; CHECK-NEXT: ret + %a = load <8 x i8>, ptr %a_ptr + %b = load <8 x i8>, ptr %b_ptr + %cmp = icmp sgt <8 x i8> %a, %b + %res = sext <8 x i1> %cmp to <8 x i8> + store <8 x i8> %res, ptr %ret_ptr + ret void +} + +define void @test_sge_b(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) { +; CHECK-LABEL: test_sge_b: +; CHECK: # %bb.0: +; CHECK-NEXT: ld a1, 0(a1) +; CHECK-NEXT: ld a2, 0(a2) +; CHECK-NEXT: pmslt.b a1, a1, a2 +; CHECK-NEXT: not a1, a1 +; CHECK-NEXT: sd a1, 0(a0) +; CHECK-NEXT: ret + %a = load <8 x i8>, ptr %a_ptr + %b = load <8 x i8>, ptr %b_ptr + %cmp = icmp sge <8 x i8> %a, %b + %res = sext <8 x i1> %cmp to <8 x i8> + store <8 x i8> %res, ptr %ret_ptr + ret void +} + +define void @test_ult_b(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) { +; CHECK-LABEL: test_ult_b: +; CHECK: # %bb.0: +; CHECK-NEXT: ld a1, 0(a1) +; CHECK-NEXT: ld a2, 0(a2) +; CHECK-NEXT: pmsltu.b a1, a1, a2 +; CHECK-NEXT: sd a1, 0(a0) +; CHECK-NEXT: ret + %a = load <8 x i8>, ptr %a_ptr + %b = load <8 x i8>, ptr %b_ptr + %cmp = icmp ult <8 x i8> %a, %b + %res = sext <8 x i1> %cmp to <8 x i8> + store <8 x i8> %res, ptr %ret_ptr + ret void +} + +define void @test_ule_b(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) { +; CHECK-LABEL: test_ule_b: +; CHECK: # %bb.0: +; CHECK-NEXT: ld a1, 0(a1) +; CHECK-NEXT: ld a2, 0(a2) +; CHECK-NEXT: pmsltu.b a1, a2, a1 +; CHECK-NEXT: not a1, a1 +; CHECK-NEXT: sd a1, 0(a0) +; CHECK-NEXT: ret + %a = load <8 x i8>, ptr %a_ptr + %b = load <8 x i8>, ptr %b_ptr + %cmp = icmp ule <8 x i8> %a, %b + %res = sext <8 x i1> %cmp to <8 x i8> + store <8 x i8> %res, ptr %ret_ptr + ret void +} + +define void @test_ugt_b(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) { +; CHECK-LABEL: test_ugt_b: +; CHECK: # %bb.0: +; CHECK-NEXT: ld a1, 0(a1) +; CHECK-NEXT: ld a2, 0(a2) +; CHECK-NEXT: pmsltu.b a1, a2, a1 +; CHECK-NEXT: sd a1, 0(a0) +; CHECK-NEXT: ret + %a = load <8 x i8>, ptr %a_ptr + %b = load <8 x i8>, ptr %b_ptr + %cmp = icmp ugt <8 x i8> %a, %b + %res = sext <8 x i1> %cmp to <8 x i8> + store <8 x i8> %res, ptr %ret_ptr + ret void +} + +define void @test_uge_b(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) { +; CHECK-LABEL: test_uge_b: +; CHECK: # %bb.0: +; CHECK-NEXT: ld a1, 0(a1) +; CHECK-NEXT: ld a2, 0(a2) +; CHECK-NEXT: pmsltu.b a1, a1, a2 +; CHECK-NEXT: not a1, a1 +; CHECK-NEXT: sd a1, 0(a0) +; CHECK-NEXT: ret + %a = load <8 x i8>, ptr %a_ptr + %b = load <8 x i8>, ptr %b_ptr + %cmp = icmp uge <8 x i8> %a, %b + %res = sext <8 x i1> %cmp to <8 x i8> + store <8 x i8> %res, ptr %ret_ptr + ret void +} + +; Comparison operations for v2i32 +define void @test_eq_w(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) { +; CHECK-LABEL: test_eq_w: +; CHECK: # %bb.0: +; CHECK-NEXT: ld a1, 0(a1) +; CHECK-NEXT: ld a2, 0(a2) +; CHECK-NEXT: pmseq.w a1, a1, a2 +; CHECK-NEXT: sd a1, 0(a0) +; CHECK-NEXT: ret + %a = load <2 x i32>, ptr %a_ptr + %b = load <2 x i32>, ptr %b_ptr + %cmp = icmp eq <2 x i32> %a, %b + %res = sext <2 x i1> %cmp to <2 x i32> + store <2 x i32> %res, ptr %ret_ptr + ret void +} + +define void @test_ne_w(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) { +; CHECK-LABEL: test_ne_w: +; CHECK: # %bb.0: +; CHECK-NEXT: ld a1, 0(a1) +; CHECK-NEXT: ld a2, 0(a2) +; CHECK-NEXT: pmseq.w a1, a1, a2 +; CHECK-NEXT: not a1, a1 +; CHECK-NEXT: sd a1, 0(a0) +; CHECK-NEXT: ret + %a = load <2 x i32>, ptr %a_ptr + %b = load <2 x i32>, ptr %b_ptr + %cmp = icmp ne <2 x i32> %a, %b + %res = sext <2 x i1> %cmp to <2 x i32> + store <2 x i32> %res, ptr %ret_ptr + ret void +} + +define void @test_slt_w(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) { +; CHECK-LABEL: test_slt_w: +; CHECK: # %bb.0: +; CHECK-NEXT: ld a1, 0(a1) +; CHECK-NEXT: ld a2, 0(a2) +; CHECK-NEXT: pmslt.w a1, a1, a2 +; CHECK-NEXT: sd a1, 0(a0) +; CHECK-NEXT: ret + %a = load <2 x i32>, ptr %a_ptr + %b = load <2 x i32>, ptr %b_ptr + %cmp = icmp slt <2 x i32> %a, %b + %res = sext <2 x i1> %cmp to <2 x i32> + store <2 x i32> %res, ptr %ret_ptr + ret void +} + +define void @test_sle_w(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) { +; CHECK-LABEL: test_sle_w: +; CHECK: # %bb.0: +; CHECK-NEXT: ld a1, 0(a1) +; CHECK-NEXT: ld a2, 0(a2) +; CHECK-NEXT: pmslt.w a1, a2, a1 +; CHECK-NEXT: not a1, a1 +; CHECK-NEXT: sd a1, 0(a0) +; CHECK-NEXT: ret + %a = load <2 x i32>, ptr %a_ptr + %b = load <2 x i32>, ptr %b_ptr + %cmp = icmp sle <2 x i32> %a, %b + %res = sext <2 x i1> %cmp to <2 x i32> + store <2 x i32> %res, ptr %ret_ptr + ret void +} + +define void @test_sgt_w(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) { +; CHECK-LABEL: test_sgt_w: +; CHECK: # %bb.0: +; CHECK-NEXT: ld a1, 0(a1) +; CHECK-NEXT: ld a2, 0(a2) +; CHECK-NEXT: pmslt.w a1, a2, a1 +; CHECK-NEXT: sd a1, 0(a0) +; CHECK-NEXT: ret + %a = load <2 x i32>, ptr %a_ptr + %b = load <2 x i32>, ptr %b_ptr + %cmp = icmp sgt <2 x i32> %a, %b + %res = sext <2 x i1> %cmp to <2 x i32> + store <2 x i32> %res, ptr %ret_ptr + ret void +} + +define void @test_sge_w(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) { +; CHECK-LABEL: test_sge_w: +; CHECK: # %bb.0: +; CHECK-NEXT: ld a1, 0(a1) +; CHECK-NEXT: ld a2, 0(a2) +; CHECK-NEXT: pmslt.w a1, a1, a2 +; CHECK-NEXT: not a1, a1 +; CHECK-NEXT: sd a1, 0(a0) +; CHECK-NEXT: ret + %a = load <2 x i32>, ptr %a_ptr + %b = load <2 x i32>, ptr %b_ptr + %cmp = icmp sge <2 x i32> %a, %b + %res = sext <2 x i1> %cmp to <2 x i32> + store <2 x i32> %res, ptr %ret_ptr + ret void +} + +define void @test_ult_w(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) { +; CHECK-LABEL: test_ult_w: +; CHECK: # %bb.0: +; CHECK-NEXT: ld a1, 0(a1) +; CHECK-NEXT: ld a2, 0(a2) +; CHECK-NEXT: pmsltu.w a1, a1, a2 +; CHECK-NEXT: sd a1, 0(a0) +; CHECK-NEXT: ret + %a = load <2 x i32>, ptr %a_ptr + %b = load <2 x i32>, ptr %b_ptr + %cmp = icmp ult <2 x i32> %a, %b + %res = sext <2 x i1> %cmp to <2 x i32> + store <2 x i32> %res, ptr %ret_ptr + ret void +} + +define void @test_ule_w(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) { +; CHECK-LABEL: test_ule_w: +; CHECK: # %bb.0: +; CHECK-NEXT: ld a1, 0(a1) +; CHECK-NEXT: ld a2, 0(a2) +; CHECK-NEXT: pmsltu.w a1, a2, a1 +; CHECK-NEXT: not a1, a1 +; CHECK-NEXT: sd a1, 0(a0) +; CHECK-NEXT: ret + %a = load <2 x i32>, ptr %a_ptr + %b = load <2 x i32>, ptr %b_ptr + %cmp = icmp ule <2 x i32> %a, %b + %res = sext <2 x i1> %cmp to <2 x i32> + store <2 x i32> %res, ptr %ret_ptr + ret void +} + +define void @test_ugt_w(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) { +; CHECK-LABEL: test_ugt_w: +; CHECK: # %bb.0: +; CHECK-NEXT: ld a1, 0(a1) +; CHECK-NEXT: ld a2, 0(a2) +; CHECK-NEXT: pmsltu.w a1, a2, a1 +; CHECK-NEXT: sd a1, 0(a0) +; CHECK-NEXT: ret + %a = load <2 x i32>, ptr %a_ptr + %b = load <2 x i32>, ptr %b_ptr + %cmp = icmp ugt <2 x i32> %a, %b + %res = sext <2 x i1> %cmp to <2 x i32> + store <2 x i32> %res, ptr %ret_ptr + ret void +} + +define void @test_uge_w(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) { +; CHECK-LABEL: test_uge_w: +; CHECK: # %bb.0: +; CHECK-NEXT: ld a1, 0(a1) +; CHECK-NEXT: ld a2, 0(a2) +; CHECK-NEXT: pmsltu.w a1, a1, a2 +; CHECK-NEXT: not a1, a1 +; CHECK-NEXT: sd a1, 0(a0) +; CHECK-NEXT: ret + %a = load <2 x i32>, ptr %a_ptr + %b = load <2 x i32>, ptr %b_ptr + %cmp = icmp uge <2 x i32> %a, %b + %res = sext <2 x i1> %cmp to <2 x i32> + store <2 x i32> %res, ptr %ret_ptr + ret void +}