-
Notifications
You must be signed in to change notification settings - Fork 15.7k
[RISCV][llvm] Support bitwise operation for XLEN fixed vectors #174598
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
This doesn't require p extension since it's just normal scalar instructions, but they're normally used with other p extension instructions so I just put them together.
|
@llvm/pr-subscribers-backend-risc-v Author: Brandon Wu (4vtomat) ChangesThis doesn't require p extension since it's just normal scalar Full diff: https://github.com/llvm/llvm-project/pull/174598.diff 3 Files Affected:
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoP.td b/llvm/lib/Target/RISCV/RISCVInstrInfoP.td
index 6830b476f5cd3..541da81cce58d 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoP.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoP.td
@@ -1489,6 +1489,18 @@ let Predicates = [HasStdExtP] in {
def: Pat<(XLenVecI16VT (add GPR:$rs1, GPR:$rs2)), (PADD_H GPR:$rs1, GPR:$rs2)>;
def: Pat<(XLenVecI16VT (sub GPR:$rs1, GPR:$rs2)), (PSUB_H GPR:$rs1, GPR:$rs2)>;
+ // 8-bit bitwise operation patterns
+ def: Pat<(XLenVecI8VT (and GPR:$rs1, GPR:$rs2)), (AND GPR:$rs1, GPR:$rs2)>;
+ def: Pat<(XLenVecI8VT (or GPR:$rs1, GPR:$rs2)), (OR GPR:$rs1, GPR:$rs2)>;
+ def: Pat<(XLenVecI8VT (xor GPR:$rs1, GPR:$rs2)), (XOR GPR:$rs1, GPR:$rs2)>;
+ def: Pat<(XLenVecI8VT (xor GPR:$rs1, immAllOnesV)), (XORI GPR:$rs1, -1)>;
+
+ // 16-bit bitwise operation patterns
+ def: Pat<(XLenVecI16VT (and GPR:$rs1, GPR:$rs2)), (AND GPR:$rs1, GPR:$rs2)>;
+ def: Pat<(XLenVecI16VT (or GPR:$rs1, GPR:$rs2)), (OR GPR:$rs1, GPR:$rs2)>;
+ def: Pat<(XLenVecI16VT (xor GPR:$rs1, GPR:$rs2)), (XOR GPR:$rs1, GPR:$rs2)>;
+ def: Pat<(XLenVecI16VT (xor GPR:$rs1, immAllOnesV)), (XORI GPR:$rs1, -1)>;
+
// 8-bit saturating add/sub patterns
def: Pat<(XLenVecI8VT (saddsat GPR:$rs1, GPR:$rs2)), (PSADD_B GPR:$rs1, GPR:$rs2)>;
def: Pat<(XLenVecI8VT (uaddsat GPR:$rs1, GPR:$rs2)), (PSADDU_B GPR:$rs1, GPR:$rs2)>;
@@ -1611,6 +1623,12 @@ let Predicates = [HasStdExtP, IsRV64] in {
def: Pat<(v2i32 (add GPR:$rs1, GPR:$rs2)), (PADD_W GPR:$rs1, GPR:$rs2)>;
def: Pat<(v2i32 (sub GPR:$rs1, GPR:$rs2)), (PSUB_W GPR:$rs1, GPR:$rs2)>;
+ // 32-bit bitwise operation patterns
+ def: Pat<(v2i32 (and GPR:$rs1, GPR:$rs2)), (AND GPR:$rs1, GPR:$rs2)>;
+ def: Pat<(v2i32 (or GPR:$rs1, GPR:$rs2)), (OR GPR:$rs1, GPR:$rs2)>;
+ def: Pat<(v2i32 (xor GPR:$rs1, GPR:$rs2)), (XOR GPR:$rs1, GPR:$rs2)>;
+ def: Pat<(v2i32 (xor GPR:$rs1, immAllOnesV)), (XORI GPR:$rs1, -1)>;
+
// 32-bit saturating add/sub patterns
def: Pat<(v2i32 (saddsat GPR:$rs1, GPR:$rs2)), (PSADD_W GPR:$rs1, GPR:$rs2)>;
def: Pat<(v2i32 (uaddsat GPR:$rs1, GPR:$rs2)), (PSADDU_W GPR:$rs1, GPR:$rs2)>;
diff --git a/llvm/test/CodeGen/RISCV/rvp-ext-rv32.ll b/llvm/test/CodeGen/RISCV/rvp-ext-rv32.ll
index 2836cda16b6d9..4665c77b6dcf8 100644
--- a/llvm/test/CodeGen/RISCV/rvp-ext-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvp-ext-rv32.ll
@@ -64,6 +64,124 @@ define void @test_psub_b(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
ret void
}
+; Test bitwise operations for v2i16 (use scalar instructions)
+define void @test_and_h(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
+; CHECK-LABEL: test_and_h:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lw a1, 0(a1)
+; CHECK-NEXT: lw a2, 0(a2)
+; CHECK-NEXT: and a1, a1, a2
+; CHECK-NEXT: sw a1, 0(a0)
+; CHECK-NEXT: ret
+ %a = load <2 x i16>, ptr %a_ptr
+ %b = load <2 x i16>, ptr %b_ptr
+ %res = and <2 x i16> %a, %b
+ store <2 x i16> %res, ptr %ret_ptr
+ ret void
+}
+
+define void @test_or_h(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
+; CHECK-LABEL: test_or_h:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lw a1, 0(a1)
+; CHECK-NEXT: lw a2, 0(a2)
+; CHECK-NEXT: or a1, a1, a2
+; CHECK-NEXT: sw a1, 0(a0)
+; CHECK-NEXT: ret
+ %a = load <2 x i16>, ptr %a_ptr
+ %b = load <2 x i16>, ptr %b_ptr
+ %res = or <2 x i16> %a, %b
+ store <2 x i16> %res, ptr %ret_ptr
+ ret void
+}
+
+define void @test_xor_h(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
+; CHECK-LABEL: test_xor_h:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lw a1, 0(a1)
+; CHECK-NEXT: lw a2, 0(a2)
+; CHECK-NEXT: xor a1, a1, a2
+; CHECK-NEXT: sw a1, 0(a0)
+; CHECK-NEXT: ret
+ %a = load <2 x i16>, ptr %a_ptr
+ %b = load <2 x i16>, ptr %b_ptr
+ %res = xor <2 x i16> %a, %b
+ store <2 x i16> %res, ptr %ret_ptr
+ ret void
+}
+
+; Test bitwise operations for v4i8 (use scalar instructions)
+define void @test_and_b(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
+; CHECK-LABEL: test_and_b:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lw a1, 0(a1)
+; CHECK-NEXT: lw a2, 0(a2)
+; CHECK-NEXT: and a1, a1, a2
+; CHECK-NEXT: sw a1, 0(a0)
+; CHECK-NEXT: ret
+ %a = load <4 x i8>, ptr %a_ptr
+ %b = load <4 x i8>, ptr %b_ptr
+ %res = and <4 x i8> %a, %b
+ store <4 x i8> %res, ptr %ret_ptr
+ ret void
+}
+
+define void @test_or_b(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
+; CHECK-LABEL: test_or_b:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lw a1, 0(a1)
+; CHECK-NEXT: lw a2, 0(a2)
+; CHECK-NEXT: or a1, a1, a2
+; CHECK-NEXT: sw a1, 0(a0)
+; CHECK-NEXT: ret
+ %a = load <4 x i8>, ptr %a_ptr
+ %b = load <4 x i8>, ptr %b_ptr
+ %res = or <4 x i8> %a, %b
+ store <4 x i8> %res, ptr %ret_ptr
+ ret void
+}
+
+define void @test_xor_b(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
+; CHECK-LABEL: test_xor_b:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lw a1, 0(a1)
+; CHECK-NEXT: lw a2, 0(a2)
+; CHECK-NEXT: xor a1, a1, a2
+; CHECK-NEXT: sw a1, 0(a0)
+; CHECK-NEXT: ret
+ %a = load <4 x i8>, ptr %a_ptr
+ %b = load <4 x i8>, ptr %b_ptr
+ %res = xor <4 x i8> %a, %b
+ store <4 x i8> %res, ptr %ret_ptr
+ ret void
+}
+
+define void @test_not_h(ptr %ret_ptr, ptr %a_ptr) {
+; CHECK-LABEL: test_not_h:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lw a1, 0(a1)
+; CHECK-NEXT: not a1, a1
+; CHECK-NEXT: sw a1, 0(a0)
+; CHECK-NEXT: ret
+ %a = load <2 x i16>, ptr %a_ptr
+ %res = xor <2 x i16> %a, splat(i16 -1)
+ store <2 x i16> %res, ptr %ret_ptr
+ ret void
+}
+
+define void @test_not_b(ptr %ret_ptr, ptr %a_ptr) {
+; CHECK-LABEL: test_not_b:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lw a1, 0(a1)
+; CHECK-NEXT: not a1, a1
+; CHECK-NEXT: sw a1, 0(a0)
+; CHECK-NEXT: ret
+ %a = load <4 x i8>, ptr %a_ptr
+ %res = xor <4 x i8> %a, splat(i8 -1)
+ store <4 x i8> %res, ptr %ret_ptr
+ ret void
+}
+
; Test saturating add operations for v2i16
define void @test_psadd_h(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
; CHECK-LABEL: test_psadd_h:
diff --git a/llvm/test/CodeGen/RISCV/rvp-ext-rv64.ll b/llvm/test/CodeGen/RISCV/rvp-ext-rv64.ll
index dfa1b242e656f..d510fa0841192 100644
--- a/llvm/test/CodeGen/RISCV/rvp-ext-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvp-ext-rv64.ll
@@ -63,6 +63,183 @@ define void @test_psub_b(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
ret void
}
+; Test bitwise operations for v4i16 (use scalar instructions)
+define void @test_and_h(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
+; CHECK-LABEL: test_and_h:
+; CHECK: # %bb.0:
+; CHECK-NEXT: ld a1, 0(a1)
+; CHECK-NEXT: ld a2, 0(a2)
+; CHECK-NEXT: and a1, a1, a2
+; CHECK-NEXT: sd a1, 0(a0)
+; CHECK-NEXT: ret
+ %a = load <4 x i16>, ptr %a_ptr
+ %b = load <4 x i16>, ptr %b_ptr
+ %res = and <4 x i16> %a, %b
+ store <4 x i16> %res, ptr %ret_ptr
+ ret void
+}
+
+define void @test_or_h(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
+; CHECK-LABEL: test_or_h:
+; CHECK: # %bb.0:
+; CHECK-NEXT: ld a1, 0(a1)
+; CHECK-NEXT: ld a2, 0(a2)
+; CHECK-NEXT: or a1, a1, a2
+; CHECK-NEXT: sd a1, 0(a0)
+; CHECK-NEXT: ret
+ %a = load <4 x i16>, ptr %a_ptr
+ %b = load <4 x i16>, ptr %b_ptr
+ %res = or <4 x i16> %a, %b
+ store <4 x i16> %res, ptr %ret_ptr
+ ret void
+}
+
+define void @test_xor_h(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
+; CHECK-LABEL: test_xor_h:
+; CHECK: # %bb.0:
+; CHECK-NEXT: ld a1, 0(a1)
+; CHECK-NEXT: ld a2, 0(a2)
+; CHECK-NEXT: xor a1, a1, a2
+; CHECK-NEXT: sd a1, 0(a0)
+; CHECK-NEXT: ret
+ %a = load <4 x i16>, ptr %a_ptr
+ %b = load <4 x i16>, ptr %b_ptr
+ %res = xor <4 x i16> %a, %b
+ store <4 x i16> %res, ptr %ret_ptr
+ ret void
+}
+
+; Test bitwise operations for v8i8 (use scalar instructions)
+define void @test_and_b(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
+; CHECK-LABEL: test_and_b:
+; CHECK: # %bb.0:
+; CHECK-NEXT: ld a1, 0(a1)
+; CHECK-NEXT: ld a2, 0(a2)
+; CHECK-NEXT: and a1, a1, a2
+; CHECK-NEXT: sd a1, 0(a0)
+; CHECK-NEXT: ret
+ %a = load <8 x i8>, ptr %a_ptr
+ %b = load <8 x i8>, ptr %b_ptr
+ %res = and <8 x i8> %a, %b
+ store <8 x i8> %res, ptr %ret_ptr
+ ret void
+}
+
+define void @test_or_b(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
+; CHECK-LABEL: test_or_b:
+; CHECK: # %bb.0:
+; CHECK-NEXT: ld a1, 0(a1)
+; CHECK-NEXT: ld a2, 0(a2)
+; CHECK-NEXT: or a1, a1, a2
+; CHECK-NEXT: sd a1, 0(a0)
+; CHECK-NEXT: ret
+ %a = load <8 x i8>, ptr %a_ptr
+ %b = load <8 x i8>, ptr %b_ptr
+ %res = or <8 x i8> %a, %b
+ store <8 x i8> %res, ptr %ret_ptr
+ ret void
+}
+
+define void @test_xor_b(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
+; CHECK-LABEL: test_xor_b:
+; CHECK: # %bb.0:
+; CHECK-NEXT: ld a1, 0(a1)
+; CHECK-NEXT: ld a2, 0(a2)
+; CHECK-NEXT: xor a1, a1, a2
+; CHECK-NEXT: sd a1, 0(a0)
+; CHECK-NEXT: ret
+ %a = load <8 x i8>, ptr %a_ptr
+ %b = load <8 x i8>, ptr %b_ptr
+ %res = xor <8 x i8> %a, %b
+ store <8 x i8> %res, ptr %ret_ptr
+ ret void
+}
+
+; Test bitwise operations for v2i32 (use scalar instructions)
+define void @test_and_w(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
+; CHECK-LABEL: test_and_w:
+; CHECK: # %bb.0:
+; CHECK-NEXT: ld a1, 0(a1)
+; CHECK-NEXT: ld a2, 0(a2)
+; CHECK-NEXT: and a1, a1, a2
+; CHECK-NEXT: sd a1, 0(a0)
+; CHECK-NEXT: ret
+ %a = load <2 x i32>, ptr %a_ptr
+ %b = load <2 x i32>, ptr %b_ptr
+ %res = and <2 x i32> %a, %b
+ store <2 x i32> %res, ptr %ret_ptr
+ ret void
+}
+
+define void @test_or_w(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
+; CHECK-LABEL: test_or_w:
+; CHECK: # %bb.0:
+; CHECK-NEXT: ld a1, 0(a1)
+; CHECK-NEXT: ld a2, 0(a2)
+; CHECK-NEXT: or a1, a1, a2
+; CHECK-NEXT: sd a1, 0(a0)
+; CHECK-NEXT: ret
+ %a = load <2 x i32>, ptr %a_ptr
+ %b = load <2 x i32>, ptr %b_ptr
+ %res = or <2 x i32> %a, %b
+ store <2 x i32> %res, ptr %ret_ptr
+ ret void
+}
+
+define void @test_xor_w(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
+; CHECK-LABEL: test_xor_w:
+; CHECK: # %bb.0:
+; CHECK-NEXT: ld a1, 0(a1)
+; CHECK-NEXT: ld a2, 0(a2)
+; CHECK-NEXT: xor a1, a1, a2
+; CHECK-NEXT: sd a1, 0(a0)
+; CHECK-NEXT: ret
+ %a = load <2 x i32>, ptr %a_ptr
+ %b = load <2 x i32>, ptr %b_ptr
+ %res = xor <2 x i32> %a, %b
+ store <2 x i32> %res, ptr %ret_ptr
+ ret void
+}
+
+define void @test_not_h(ptr %ret_ptr, ptr %a_ptr) {
+; CHECK-LABEL: test_not_h:
+; CHECK: # %bb.0:
+; CHECK-NEXT: ld a1, 0(a1)
+; CHECK-NEXT: not a1, a1
+; CHECK-NEXT: sd a1, 0(a0)
+; CHECK-NEXT: ret
+ %a = load <4 x i16>, ptr %a_ptr
+ %res = xor <4 x i16> %a, splat(i16 -1)
+ store <4 x i16> %res, ptr %ret_ptr
+ ret void
+}
+
+define void @test_not_b(ptr %ret_ptr, ptr %a_ptr) {
+; CHECK-LABEL: test_not_b:
+; CHECK: # %bb.0:
+; CHECK-NEXT: ld a1, 0(a1)
+; CHECK-NEXT: not a1, a1
+; CHECK-NEXT: sd a1, 0(a0)
+; CHECK-NEXT: ret
+ %a = load <8 x i8>, ptr %a_ptr
+ %res = xor <8 x i8> %a, splat(i8 -1)
+ store <8 x i8> %res, ptr %ret_ptr
+ ret void
+}
+
+define void @test_not_w(ptr %ret_ptr, ptr %a_ptr) {
+; CHECK-LABEL: test_not_w:
+; CHECK: # %bb.0:
+; CHECK-NEXT: ld a1, 0(a1)
+; CHECK-NEXT: not a1, a1
+; CHECK-NEXT: sd a1, 0(a0)
+; CHECK-NEXT: ret
+ %a = load <2 x i32>, ptr %a_ptr
+ %res = xor <2 x i32> %a, splat(i32 -1)
+ store <2 x i32> %res, ptr %ret_ptr
+ ret void
+}
+
; Test saturating add operations for v4i16
define void @test_psadd_h(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
; CHECK-LABEL: test_psadd_h:
|
topperc
left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
…174598) This doesn't require p extension since it's just normal scalar instructions, but they're normally used with other p extension instructions so I just put them together.
This doesn't require p extension since it's just normal scalar
instructions, but they're normally used with other p extension
instructions so I just put them together.