Skip to content

Commit 9688df2

Browse files
Ana MihajlovicAna Mihajlovic
authored andcommitted
[isel] (x or y) xor -1 -> x nor y
1 parent 6477945 commit 9688df2

File tree

3 files changed

+150
-0
lines changed

3 files changed

+150
-0
lines changed

llvm/lib/Target/AMDGPU/SOPInstructions.td

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1925,6 +1925,20 @@ def : ScalarNot2Pat<S_ORN2_B32, or, v2i16>;
19251925
def : ScalarNot2Pat<S_ORN2_B64, or, v4i16>;
19261926
def : ScalarNot2Pat<S_ORN2_B64, or, v2i32>;
19271927

1928+
let WaveSizePredicate = isWave32 in {
1929+
def : GCNPat<
1930+
(i1 (not (or_oneuse i1:$src0, i1:$src1))),
1931+
(S_NOR_B32 i1:$src0, i1:$src1)
1932+
>;
1933+
}
1934+
1935+
let WaveSizePredicate = isWave64 in {
1936+
def : GCNPat<
1937+
(i1 (not (or_oneuse i1:$src0, i1:$src1))),
1938+
(S_NOR_B64 i1:$src0, i1:$src1)
1939+
>;
1940+
}
1941+
19281942
//===----------------------------------------------------------------------===//
19291943
// Target-specific instruction encodings.
19301944
//===----------------------------------------------------------------------===//
Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,68 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3
2+
; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx1200 -mattr="+wavefrontsize32,-wavefrontsize64" -o - < %s | FileCheck %s
3+
; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx1100 -mattr="+wavefrontsize32,-wavefrontsize64" -o - < %s | FileCheck %s
4+
5+
define amdgpu_ps void @divergent_i1_phi_if_else(ptr addrspace(1) %out, i32 %tid, i32 %a, i32 %b, i32 %c, i32 %d) {
6+
; CHECK-LABEL: divergent_i1_phi_if_else:
7+
; CHECK: ; %bb.0: ; %entry
8+
; CHECK-NEXT: v_cmp_le_u32_e64 s0, v3, v4
9+
; CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1)
10+
; CHECK-NEXT: s_mov_b32 s2, s0
11+
; CHECK-NEXT: s_and_saveexec_b32 s1, s0
12+
; CHECK-NEXT: ; %bb.1: ; %C
13+
; CHECK-NEXT: v_cmp_eq_u32_e32 vcc_lo, v3, v5
14+
; CHECK-NEXT: s_and_not1_b32 s2, s0, exec_lo
15+
; CHECK-NEXT: s_and_b32 s3, vcc_lo, exec_lo
16+
; CHECK-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
17+
; CHECK-NEXT: s_or_b32 s2, s2, s3
18+
; CHECK-NEXT: ; %bb.2: ; %MergeCF
19+
; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s1
20+
; CHECK-NEXT: s_nor_b32 s1, s0, s2
21+
; CHECK-NEXT: ; implicit-def: $sgpr0
22+
; CHECK-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
23+
; CHECK-NEXT: s_and_saveexec_b32 s2, s1
24+
; CHECK-NEXT: s_xor_b32 s1, exec_lo, s2
25+
; CHECK-NEXT: ; %bb.3: ; %B
26+
; CHECK-NEXT: v_cmp_gt_u32_e64 s0, 2, v2
27+
; CHECK-NEXT: ; implicit-def: $vgpr2
28+
; CHECK-NEXT: ; %bb.4: ; %Flow
29+
; CHECK-NEXT: s_and_not1_saveexec_b32 s1, s1
30+
; CHECK-NEXT: ; %bb.5: ; %A
31+
; CHECK-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v2
32+
; CHECK-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
33+
; CHECK-NEXT: s_and_not1_b32 s0, s0, exec_lo
34+
; CHECK-NEXT: s_and_b32 s2, vcc_lo, exec_lo
35+
; CHECK-NEXT: s_or_b32 s0, s0, s2
36+
; CHECK-NEXT: ; %bb.6: ; %exit
37+
; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s1
38+
; CHECK-NEXT: v_cndmask_b32_e64 v2, 2, 1, s0
39+
; CHECK-NEXT: global_store_b32 v[0:1], v2, off
40+
; CHECK-NEXT: s_endpgm
41+
entry:
42+
%x = icmp ule i32 %a, %b
43+
br i1 %x, label %C, label %MergeCF
44+
45+
C:
46+
%y = icmp eq i32 %a, %c
47+
br label %MergeCF
48+
49+
MergeCF:
50+
%z = phi i1 [ %x, %entry ], [ %y, %C ]
51+
%w = icmp ule i32 %a, %b
52+
%cmp = or i1 %w, %z
53+
br i1 %cmp, label %A, label %B
54+
55+
A:
56+
%val_A = icmp uge i32 %tid, 1
57+
br label %exit
58+
59+
B:
60+
%val_B = icmp ult i32 %tid, 2
61+
br label %exit
62+
63+
exit:
64+
%phi = phi i1 [ %val_A, %A ], [ %val_B, %B ]
65+
%sel = select i1 %phi, i32 1, i32 2
66+
store i32 %sel, ptr addrspace(1) %out
67+
ret void
68+
}
Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,68 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3
2+
; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx1200 -mattr="-wavefrontsize32,+wavefrontsize64" -o - < %s | FileCheck %s
3+
; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx1100 -mattr="-wavefrontsize32,+wavefrontsize64" -o - < %s | FileCheck %s
4+
5+
define amdgpu_ps void @divergent_i1_phi_if_else(ptr addrspace(1) %out, i32 %tid, i32 %a, i32 %b, i32 %c, i32 %d) {
6+
; CHECK-LABEL: divergent_i1_phi_if_else:
7+
; CHECK: ; %bb.0: ; %entry
8+
; CHECK-NEXT: v_cmp_le_u32_e64 s[0:1], v3, v4
9+
; CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1)
10+
; CHECK-NEXT: s_mov_b64 s[4:5], s[0:1]
11+
; CHECK-NEXT: s_and_saveexec_b64 s[2:3], s[0:1]
12+
; CHECK-NEXT: ; %bb.1: ; %C
13+
; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5
14+
; CHECK-NEXT: s_and_not1_b64 s[4:5], s[0:1], exec
15+
; CHECK-NEXT: s_and_b64 s[6:7], vcc, exec
16+
; CHECK-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
17+
; CHECK-NEXT: s_or_b64 s[4:5], s[4:5], s[6:7]
18+
; CHECK-NEXT: ; %bb.2: ; %MergeCF
19+
; CHECK-NEXT: s_or_b64 exec, exec, s[2:3]
20+
; CHECK-NEXT: s_nor_b64 s[2:3], s[0:1], s[4:5]
21+
; CHECK-NEXT: ; implicit-def: $sgpr0_sgpr1
22+
; CHECK-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
23+
; CHECK-NEXT: s_and_saveexec_b64 s[4:5], s[2:3]
24+
; CHECK-NEXT: s_xor_b64 s[2:3], exec, s[4:5]
25+
; CHECK-NEXT: ; %bb.3: ; %B
26+
; CHECK-NEXT: v_cmp_gt_u32_e64 s[0:1], 2, v2
27+
; CHECK-NEXT: ; implicit-def: $vgpr2
28+
; CHECK-NEXT: ; %bb.4: ; %Flow
29+
; CHECK-NEXT: s_and_not1_saveexec_b64 s[2:3], s[2:3]
30+
; CHECK-NEXT: ; %bb.5: ; %A
31+
; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v2
32+
; CHECK-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
33+
; CHECK-NEXT: s_and_not1_b64 s[0:1], s[0:1], exec
34+
; CHECK-NEXT: s_and_b64 s[4:5], vcc, exec
35+
; CHECK-NEXT: s_or_b64 s[0:1], s[0:1], s[4:5]
36+
; CHECK-NEXT: ; %bb.6: ; %exit
37+
; CHECK-NEXT: s_or_b64 exec, exec, s[2:3]
38+
; CHECK-NEXT: v_cndmask_b32_e64 v2, 2, 1, s[0:1]
39+
; CHECK-NEXT: global_store_b32 v[0:1], v2, off
40+
; CHECK-NEXT: s_endpgm
41+
entry:
42+
%x = icmp ule i32 %a, %b
43+
br i1 %x, label %C, label %MergeCF
44+
45+
C:
46+
%y = icmp eq i32 %a, %c
47+
br label %MergeCF
48+
49+
MergeCF:
50+
%z = phi i1 [ %x, %entry ], [ %y, %C ]
51+
%w = icmp ule i32 %a, %b
52+
%cmp = or i1 %w, %z
53+
br i1 %cmp, label %A, label %B
54+
55+
A:
56+
%val_A = icmp uge i32 %tid, 1
57+
br label %exit
58+
59+
B:
60+
%val_B = icmp ult i32 %tid, 2
61+
br label %exit
62+
63+
exit:
64+
%phi = phi i1 [ %val_A, %A ], [ %val_B, %B ]
65+
%sel = select i1 %phi, i32 1, i32 2
66+
store i32 %sel, ptr addrspace(1) %out
67+
ret void
68+
}

0 commit comments

Comments
 (0)