From 54b10f79a2b78908de368fecff50fd2b9ad5bb63 Mon Sep 17 00:00:00 2001 From: Brandon Wu Date: Tue, 6 Jan 2026 06:13:00 -0800 Subject: [PATCH 1/2] [RISCV][llvm] Support bitwise operation for XLEN fixed vectors This doesn't require p extension since it's just normal scalar instructions, but they're normally used with other p extension instructions so I just put them together. --- llvm/lib/Target/RISCV/RISCVInstrInfoP.td | 18 +++ llvm/test/CodeGen/RISCV/rvp-ext-rv32.ll | 118 +++++++++++++++ llvm/test/CodeGen/RISCV/rvp-ext-rv64.ll | 177 +++++++++++++++++++++++ 3 files changed, 313 insertions(+) diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoP.td b/llvm/lib/Target/RISCV/RISCVInstrInfoP.td index 6830b476f5cd3..541da81cce58d 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoP.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoP.td @@ -1489,6 +1489,18 @@ let Predicates = [HasStdExtP] in { def: Pat<(XLenVecI16VT (add GPR:$rs1, GPR:$rs2)), (PADD_H GPR:$rs1, GPR:$rs2)>; def: Pat<(XLenVecI16VT (sub GPR:$rs1, GPR:$rs2)), (PSUB_H GPR:$rs1, GPR:$rs2)>; + // 8-bit bitwise operation patterns + def: Pat<(XLenVecI8VT (and GPR:$rs1, GPR:$rs2)), (AND GPR:$rs1, GPR:$rs2)>; + def: Pat<(XLenVecI8VT (or GPR:$rs1, GPR:$rs2)), (OR GPR:$rs1, GPR:$rs2)>; + def: Pat<(XLenVecI8VT (xor GPR:$rs1, GPR:$rs2)), (XOR GPR:$rs1, GPR:$rs2)>; + def: Pat<(XLenVecI8VT (xor GPR:$rs1, immAllOnesV)), (XORI GPR:$rs1, -1)>; + + // 16-bit bitwise operation patterns + def: Pat<(XLenVecI16VT (and GPR:$rs1, GPR:$rs2)), (AND GPR:$rs1, GPR:$rs2)>; + def: Pat<(XLenVecI16VT (or GPR:$rs1, GPR:$rs2)), (OR GPR:$rs1, GPR:$rs2)>; + def: Pat<(XLenVecI16VT (xor GPR:$rs1, GPR:$rs2)), (XOR GPR:$rs1, GPR:$rs2)>; + def: Pat<(XLenVecI16VT (xor GPR:$rs1, immAllOnesV)), (XORI GPR:$rs1, -1)>; + // 8-bit saturating add/sub patterns def: Pat<(XLenVecI8VT (saddsat GPR:$rs1, GPR:$rs2)), (PSADD_B GPR:$rs1, GPR:$rs2)>; def: Pat<(XLenVecI8VT (uaddsat GPR:$rs1, GPR:$rs2)), (PSADDU_B GPR:$rs1, GPR:$rs2)>; @@ -1611,6 +1623,12 @@ let Predicates = [HasStdExtP, IsRV64] in { def: Pat<(v2i32 (add GPR:$rs1, GPR:$rs2)), (PADD_W GPR:$rs1, GPR:$rs2)>; def: Pat<(v2i32 (sub GPR:$rs1, GPR:$rs2)), (PSUB_W GPR:$rs1, GPR:$rs2)>; + // 32-bit bitwise operation patterns + def: Pat<(v2i32 (and GPR:$rs1, GPR:$rs2)), (AND GPR:$rs1, GPR:$rs2)>; + def: Pat<(v2i32 (or GPR:$rs1, GPR:$rs2)), (OR GPR:$rs1, GPR:$rs2)>; + def: Pat<(v2i32 (xor GPR:$rs1, GPR:$rs2)), (XOR GPR:$rs1, GPR:$rs2)>; + def: Pat<(v2i32 (xor GPR:$rs1, immAllOnesV)), (XORI GPR:$rs1, -1)>; + // 32-bit saturating add/sub patterns def: Pat<(v2i32 (saddsat GPR:$rs1, GPR:$rs2)), (PSADD_W GPR:$rs1, GPR:$rs2)>; def: Pat<(v2i32 (uaddsat GPR:$rs1, GPR:$rs2)), (PSADDU_W GPR:$rs1, GPR:$rs2)>; diff --git a/llvm/test/CodeGen/RISCV/rvp-ext-rv32.ll b/llvm/test/CodeGen/RISCV/rvp-ext-rv32.ll index 2836cda16b6d9..4665c77b6dcf8 100644 --- a/llvm/test/CodeGen/RISCV/rvp-ext-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvp-ext-rv32.ll @@ -64,6 +64,124 @@ define void @test_psub_b(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) { ret void } +; Test bitwise operations for v2i16 (use scalar instructions) +define void @test_and_h(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) { +; CHECK-LABEL: test_and_h: +; CHECK: # %bb.0: +; CHECK-NEXT: lw a1, 0(a1) +; CHECK-NEXT: lw a2, 0(a2) +; CHECK-NEXT: and a1, a1, a2 +; CHECK-NEXT: sw a1, 0(a0) +; CHECK-NEXT: ret + %a = load <2 x i16>, ptr %a_ptr + %b = load <2 x i16>, ptr %b_ptr + %res = and <2 x i16> %a, %b + store <2 x i16> %res, ptr %ret_ptr + ret void +} + +define void @test_or_h(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) { +; CHECK-LABEL: test_or_h: +; CHECK: # %bb.0: +; CHECK-NEXT: lw a1, 0(a1) +; CHECK-NEXT: lw a2, 0(a2) +; CHECK-NEXT: or a1, a1, a2 +; CHECK-NEXT: sw a1, 0(a0) +; CHECK-NEXT: ret + %a = load <2 x i16>, ptr %a_ptr + %b = load <2 x i16>, ptr %b_ptr + %res = or <2 x i16> %a, %b + store <2 x i16> %res, ptr %ret_ptr + ret void +} + +define void @test_xor_h(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) { +; CHECK-LABEL: test_xor_h: +; CHECK: # %bb.0: +; CHECK-NEXT: lw a1, 0(a1) +; CHECK-NEXT: lw a2, 0(a2) +; CHECK-NEXT: xor a1, a1, a2 +; CHECK-NEXT: sw a1, 0(a0) +; CHECK-NEXT: ret + %a = load <2 x i16>, ptr %a_ptr + %b = load <2 x i16>, ptr %b_ptr + %res = xor <2 x i16> %a, %b + store <2 x i16> %res, ptr %ret_ptr + ret void +} + +; Test bitwise operations for v4i8 (use scalar instructions) +define void @test_and_b(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) { +; CHECK-LABEL: test_and_b: +; CHECK: # %bb.0: +; CHECK-NEXT: lw a1, 0(a1) +; CHECK-NEXT: lw a2, 0(a2) +; CHECK-NEXT: and a1, a1, a2 +; CHECK-NEXT: sw a1, 0(a0) +; CHECK-NEXT: ret + %a = load <4 x i8>, ptr %a_ptr + %b = load <4 x i8>, ptr %b_ptr + %res = and <4 x i8> %a, %b + store <4 x i8> %res, ptr %ret_ptr + ret void +} + +define void @test_or_b(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) { +; CHECK-LABEL: test_or_b: +; CHECK: # %bb.0: +; CHECK-NEXT: lw a1, 0(a1) +; CHECK-NEXT: lw a2, 0(a2) +; CHECK-NEXT: or a1, a1, a2 +; CHECK-NEXT: sw a1, 0(a0) +; CHECK-NEXT: ret + %a = load <4 x i8>, ptr %a_ptr + %b = load <4 x i8>, ptr %b_ptr + %res = or <4 x i8> %a, %b + store <4 x i8> %res, ptr %ret_ptr + ret void +} + +define void @test_xor_b(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) { +; CHECK-LABEL: test_xor_b: +; CHECK: # %bb.0: +; CHECK-NEXT: lw a1, 0(a1) +; CHECK-NEXT: lw a2, 0(a2) +; CHECK-NEXT: xor a1, a1, a2 +; CHECK-NEXT: sw a1, 0(a0) +; CHECK-NEXT: ret + %a = load <4 x i8>, ptr %a_ptr + %b = load <4 x i8>, ptr %b_ptr + %res = xor <4 x i8> %a, %b + store <4 x i8> %res, ptr %ret_ptr + ret void +} + +define void @test_not_h(ptr %ret_ptr, ptr %a_ptr) { +; CHECK-LABEL: test_not_h: +; CHECK: # %bb.0: +; CHECK-NEXT: lw a1, 0(a1) +; CHECK-NEXT: not a1, a1 +; CHECK-NEXT: sw a1, 0(a0) +; CHECK-NEXT: ret + %a = load <2 x i16>, ptr %a_ptr + %res = xor <2 x i16> %a, splat(i16 -1) + store <2 x i16> %res, ptr %ret_ptr + ret void +} + +define void @test_not_b(ptr %ret_ptr, ptr %a_ptr) { +; CHECK-LABEL: test_not_b: +; CHECK: # %bb.0: +; CHECK-NEXT: lw a1, 0(a1) +; CHECK-NEXT: not a1, a1 +; CHECK-NEXT: sw a1, 0(a0) +; CHECK-NEXT: ret + %a = load <4 x i8>, ptr %a_ptr + %res = xor <4 x i8> %a, splat(i8 -1) + store <4 x i8> %res, ptr %ret_ptr + ret void +} + ; Test saturating add operations for v2i16 define void @test_psadd_h(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) { ; CHECK-LABEL: test_psadd_h: diff --git a/llvm/test/CodeGen/RISCV/rvp-ext-rv64.ll b/llvm/test/CodeGen/RISCV/rvp-ext-rv64.ll index dfa1b242e656f..d510fa0841192 100644 --- a/llvm/test/CodeGen/RISCV/rvp-ext-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvp-ext-rv64.ll @@ -63,6 +63,183 @@ define void @test_psub_b(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) { ret void } +; Test bitwise operations for v4i16 (use scalar instructions) +define void @test_and_h(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) { +; CHECK-LABEL: test_and_h: +; CHECK: # %bb.0: +; CHECK-NEXT: ld a1, 0(a1) +; CHECK-NEXT: ld a2, 0(a2) +; CHECK-NEXT: and a1, a1, a2 +; CHECK-NEXT: sd a1, 0(a0) +; CHECK-NEXT: ret + %a = load <4 x i16>, ptr %a_ptr + %b = load <4 x i16>, ptr %b_ptr + %res = and <4 x i16> %a, %b + store <4 x i16> %res, ptr %ret_ptr + ret void +} + +define void @test_or_h(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) { +; CHECK-LABEL: test_or_h: +; CHECK: # %bb.0: +; CHECK-NEXT: ld a1, 0(a1) +; CHECK-NEXT: ld a2, 0(a2) +; CHECK-NEXT: or a1, a1, a2 +; CHECK-NEXT: sd a1, 0(a0) +; CHECK-NEXT: ret + %a = load <4 x i16>, ptr %a_ptr + %b = load <4 x i16>, ptr %b_ptr + %res = or <4 x i16> %a, %b + store <4 x i16> %res, ptr %ret_ptr + ret void +} + +define void @test_xor_h(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) { +; CHECK-LABEL: test_xor_h: +; CHECK: # %bb.0: +; CHECK-NEXT: ld a1, 0(a1) +; CHECK-NEXT: ld a2, 0(a2) +; CHECK-NEXT: xor a1, a1, a2 +; CHECK-NEXT: sd a1, 0(a0) +; CHECK-NEXT: ret + %a = load <4 x i16>, ptr %a_ptr + %b = load <4 x i16>, ptr %b_ptr + %res = xor <4 x i16> %a, %b + store <4 x i16> %res, ptr %ret_ptr + ret void +} + +; Test bitwise operations for v8i8 (use scalar instructions) +define void @test_and_b(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) { +; CHECK-LABEL: test_and_b: +; CHECK: # %bb.0: +; CHECK-NEXT: ld a1, 0(a1) +; CHECK-NEXT: ld a2, 0(a2) +; CHECK-NEXT: and a1, a1, a2 +; CHECK-NEXT: sd a1, 0(a0) +; CHECK-NEXT: ret + %a = load <8 x i8>, ptr %a_ptr + %b = load <8 x i8>, ptr %b_ptr + %res = and <8 x i8> %a, %b + store <8 x i8> %res, ptr %ret_ptr + ret void +} + +define void @test_or_b(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) { +; CHECK-LABEL: test_or_b: +; CHECK: # %bb.0: +; CHECK-NEXT: ld a1, 0(a1) +; CHECK-NEXT: ld a2, 0(a2) +; CHECK-NEXT: or a1, a1, a2 +; CHECK-NEXT: sd a1, 0(a0) +; CHECK-NEXT: ret + %a = load <8 x i8>, ptr %a_ptr + %b = load <8 x i8>, ptr %b_ptr + %res = or <8 x i8> %a, %b + store <8 x i8> %res, ptr %ret_ptr + ret void +} + +define void @test_xor_b(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) { +; CHECK-LABEL: test_xor_b: +; CHECK: # %bb.0: +; CHECK-NEXT: ld a1, 0(a1) +; CHECK-NEXT: ld a2, 0(a2) +; CHECK-NEXT: xor a1, a1, a2 +; CHECK-NEXT: sd a1, 0(a0) +; CHECK-NEXT: ret + %a = load <8 x i8>, ptr %a_ptr + %b = load <8 x i8>, ptr %b_ptr + %res = xor <8 x i8> %a, %b + store <8 x i8> %res, ptr %ret_ptr + ret void +} + +; Test bitwise operations for v2i32 (use scalar instructions) +define void @test_and_w(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) { +; CHECK-LABEL: test_and_w: +; CHECK: # %bb.0: +; CHECK-NEXT: ld a1, 0(a1) +; CHECK-NEXT: ld a2, 0(a2) +; CHECK-NEXT: and a1, a1, a2 +; CHECK-NEXT: sd a1, 0(a0) +; CHECK-NEXT: ret + %a = load <2 x i32>, ptr %a_ptr + %b = load <2 x i32>, ptr %b_ptr + %res = and <2 x i32> %a, %b + store <2 x i32> %res, ptr %ret_ptr + ret void +} + +define void @test_or_w(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) { +; CHECK-LABEL: test_or_w: +; CHECK: # %bb.0: +; CHECK-NEXT: ld a1, 0(a1) +; CHECK-NEXT: ld a2, 0(a2) +; CHECK-NEXT: or a1, a1, a2 +; CHECK-NEXT: sd a1, 0(a0) +; CHECK-NEXT: ret + %a = load <2 x i32>, ptr %a_ptr + %b = load <2 x i32>, ptr %b_ptr + %res = or <2 x i32> %a, %b + store <2 x i32> %res, ptr %ret_ptr + ret void +} + +define void @test_xor_w(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) { +; CHECK-LABEL: test_xor_w: +; CHECK: # %bb.0: +; CHECK-NEXT: ld a1, 0(a1) +; CHECK-NEXT: ld a2, 0(a2) +; CHECK-NEXT: xor a1, a1, a2 +; CHECK-NEXT: sd a1, 0(a0) +; CHECK-NEXT: ret + %a = load <2 x i32>, ptr %a_ptr + %b = load <2 x i32>, ptr %b_ptr + %res = xor <2 x i32> %a, %b + store <2 x i32> %res, ptr %ret_ptr + ret void +} + +define void @test_not_h(ptr %ret_ptr, ptr %a_ptr) { +; CHECK-LABEL: test_not_h: +; CHECK: # %bb.0: +; CHECK-NEXT: ld a1, 0(a1) +; CHECK-NEXT: not a1, a1 +; CHECK-NEXT: sd a1, 0(a0) +; CHECK-NEXT: ret + %a = load <4 x i16>, ptr %a_ptr + %res = xor <4 x i16> %a, splat(i16 -1) + store <4 x i16> %res, ptr %ret_ptr + ret void +} + +define void @test_not_b(ptr %ret_ptr, ptr %a_ptr) { +; CHECK-LABEL: test_not_b: +; CHECK: # %bb.0: +; CHECK-NEXT: ld a1, 0(a1) +; CHECK-NEXT: not a1, a1 +; CHECK-NEXT: sd a1, 0(a0) +; CHECK-NEXT: ret + %a = load <8 x i8>, ptr %a_ptr + %res = xor <8 x i8> %a, splat(i8 -1) + store <8 x i8> %res, ptr %ret_ptr + ret void +} + +define void @test_not_w(ptr %ret_ptr, ptr %a_ptr) { +; CHECK-LABEL: test_not_w: +; CHECK: # %bb.0: +; CHECK-NEXT: ld a1, 0(a1) +; CHECK-NEXT: not a1, a1 +; CHECK-NEXT: sd a1, 0(a0) +; CHECK-NEXT: ret + %a = load <2 x i32>, ptr %a_ptr + %res = xor <2 x i32> %a, splat(i32 -1) + store <2 x i32> %res, ptr %ret_ptr + ret void +} + ; Test saturating add operations for v4i16 define void @test_psadd_h(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) { ; CHECK-LABEL: test_psadd_h: From 04e77d5466e81d89d819abf8a2bab1c8222bb723 Mon Sep 17 00:00:00 2001 From: Brandon Wu Date: Tue, 6 Jan 2026 09:27:41 -0800 Subject: [PATCH 2/2] fixup! use vnot patfrag --- llvm/lib/Target/RISCV/RISCVInstrInfoP.td | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoP.td b/llvm/lib/Target/RISCV/RISCVInstrInfoP.td index 541da81cce58d..d0e90ad0f3526 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoP.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoP.td @@ -1493,13 +1493,13 @@ let Predicates = [HasStdExtP] in { def: Pat<(XLenVecI8VT (and GPR:$rs1, GPR:$rs2)), (AND GPR:$rs1, GPR:$rs2)>; def: Pat<(XLenVecI8VT (or GPR:$rs1, GPR:$rs2)), (OR GPR:$rs1, GPR:$rs2)>; def: Pat<(XLenVecI8VT (xor GPR:$rs1, GPR:$rs2)), (XOR GPR:$rs1, GPR:$rs2)>; - def: Pat<(XLenVecI8VT (xor GPR:$rs1, immAllOnesV)), (XORI GPR:$rs1, -1)>; + def: Pat<(XLenVecI8VT (vnot GPR:$rs1)), (XORI GPR:$rs1, -1)>; // 16-bit bitwise operation patterns def: Pat<(XLenVecI16VT (and GPR:$rs1, GPR:$rs2)), (AND GPR:$rs1, GPR:$rs2)>; def: Pat<(XLenVecI16VT (or GPR:$rs1, GPR:$rs2)), (OR GPR:$rs1, GPR:$rs2)>; def: Pat<(XLenVecI16VT (xor GPR:$rs1, GPR:$rs2)), (XOR GPR:$rs1, GPR:$rs2)>; - def: Pat<(XLenVecI16VT (xor GPR:$rs1, immAllOnesV)), (XORI GPR:$rs1, -1)>; + def: Pat<(XLenVecI16VT (vnot GPR:$rs1)), (XORI GPR:$rs1, -1)>; // 8-bit saturating add/sub patterns def: Pat<(XLenVecI8VT (saddsat GPR:$rs1, GPR:$rs2)), (PSADD_B GPR:$rs1, GPR:$rs2)>; @@ -1627,7 +1627,7 @@ let Predicates = [HasStdExtP, IsRV64] in { def: Pat<(v2i32 (and GPR:$rs1, GPR:$rs2)), (AND GPR:$rs1, GPR:$rs2)>; def: Pat<(v2i32 (or GPR:$rs1, GPR:$rs2)), (OR GPR:$rs1, GPR:$rs2)>; def: Pat<(v2i32 (xor GPR:$rs1, GPR:$rs2)), (XOR GPR:$rs1, GPR:$rs2)>; - def: Pat<(v2i32 (xor GPR:$rs1, immAllOnesV)), (XORI GPR:$rs1, -1)>; + def: Pat<(v2i32 (vnot GPR:$rs1)), (XORI GPR:$rs1, -1)>; // 32-bit saturating add/sub patterns def: Pat<(v2i32 (saddsat GPR:$rs1, GPR:$rs2)), (PSADD_W GPR:$rs1, GPR:$rs2)>;