Skip to content

Commit 7eb1a32

Browse files
authored
[AMDGPU] Update EXECZ retention in SIPreEmitPeephole for GFX10/12 (#97676)
The check to maintain EXECZ branches only checks S_WAITCNT. Add handling for new waitcnt instructions in GFX10 and GFX12.
1 parent 2217933 commit 7eb1a32

File tree

4 files changed

+512
-1
lines changed

4 files changed

+512
-1
lines changed

llvm/lib/Target/AMDGPU/SIInstrInfo.h

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -967,6 +967,29 @@ class SIInstrInfo final : public AMDGPUGenInstrInfo {
967967
}
968968
}
969969

970+
bool isWaitcnt(unsigned Opcode) const {
971+
switch (getNonSoftWaitcntOpcode(Opcode)) {
972+
case AMDGPU::S_WAITCNT:
973+
case AMDGPU::S_WAITCNT_VSCNT:
974+
case AMDGPU::S_WAITCNT_VMCNT:
975+
case AMDGPU::S_WAITCNT_EXPCNT:
976+
case AMDGPU::S_WAITCNT_LGKMCNT:
977+
case AMDGPU::S_WAIT_LOADCNT:
978+
case AMDGPU::S_WAIT_LOADCNT_DSCNT:
979+
case AMDGPU::S_WAIT_STORECNT:
980+
case AMDGPU::S_WAIT_STORECNT_DSCNT:
981+
case AMDGPU::S_WAIT_SAMPLECNT:
982+
case AMDGPU::S_WAIT_BVHCNT:
983+
case AMDGPU::S_WAIT_EXPCNT:
984+
case AMDGPU::S_WAIT_DSCNT:
985+
case AMDGPU::S_WAIT_KMCNT:
986+
case AMDGPU::S_WAIT_IDLE:
987+
return true;
988+
default:
989+
return false;
990+
}
991+
}
992+
970993
bool isVGPRCopy(const MachineInstr &MI) const {
971994
assert(isCopyInstr(MI));
972995
Register Dest = MI.getOperand(0).getReg();

llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -328,7 +328,7 @@ bool SIPreEmitPeephole::mustRetainExeczBranch(
328328

329329
// These instructions are potentially expensive even if EXEC = 0.
330330
if (TII->isSMRD(MI) || TII->isVMEM(MI) || TII->isFLAT(MI) ||
331-
TII->isDS(MI) || MI.getOpcode() == AMDGPU::S_WAITCNT)
331+
TII->isDS(MI) || TII->isWaitcnt(MI.getOpcode()))
332332
return true;
333333

334334
++NumInstr;
Lines changed: 186 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,186 @@
1+
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
2+
# RUN: llc -mtriple=amdgcn -mcpu=gfx1030 -run-pass si-pre-emit-peephole -amdgpu-skip-threshold=10 -verify-machineinstrs %s -o - | FileCheck %s
3+
4+
---
5+
name: skip_waitcnt_vscnt
6+
body: |
7+
; CHECK-LABEL: name: skip_waitcnt_vscnt
8+
; CHECK: bb.0:
9+
; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
10+
; CHECK-NEXT: {{ $}}
11+
; CHECK-NEXT: S_CBRANCH_EXECZ %bb.2, implicit $exec
12+
; CHECK-NEXT: {{ $}}
13+
; CHECK-NEXT: bb.1:
14+
; CHECK-NEXT: successors: %bb.2(0x80000000)
15+
; CHECK-NEXT: {{ $}}
16+
; CHECK-NEXT: V_NOP_e32 implicit $exec
17+
; CHECK-NEXT: S_WAITCNT_VSCNT $sgpr_null, 0
18+
; CHECK-NEXT: {{ $}}
19+
; CHECK-NEXT: bb.2:
20+
; CHECK-NEXT: S_ENDPGM 0
21+
bb.0:
22+
successors: %bb.1, %bb.2
23+
S_CBRANCH_EXECZ %bb.2, implicit $exec
24+
25+
bb.1:
26+
successors: %bb.2
27+
V_NOP_e32 implicit $exec
28+
S_WAITCNT_VSCNT $sgpr_null, 0
29+
30+
bb.2:
31+
S_ENDPGM 0
32+
...
33+
34+
---
35+
name: skip_waitcnt_expcnt
36+
body: |
37+
; CHECK-LABEL: name: skip_waitcnt_expcnt
38+
; CHECK: bb.0:
39+
; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
40+
; CHECK-NEXT: {{ $}}
41+
; CHECK-NEXT: S_CBRANCH_EXECZ %bb.2, implicit $exec
42+
; CHECK-NEXT: {{ $}}
43+
; CHECK-NEXT: bb.1:
44+
; CHECK-NEXT: successors: %bb.2(0x80000000)
45+
; CHECK-NEXT: {{ $}}
46+
; CHECK-NEXT: V_NOP_e32 implicit $exec
47+
; CHECK-NEXT: S_WAITCNT_EXPCNT $sgpr_null, 0
48+
; CHECK-NEXT: {{ $}}
49+
; CHECK-NEXT: bb.2:
50+
; CHECK-NEXT: S_ENDPGM 0
51+
bb.0:
52+
successors: %bb.1, %bb.2
53+
S_CBRANCH_EXECZ %bb.2, implicit $exec
54+
55+
bb.1:
56+
successors: %bb.2
57+
V_NOP_e32 implicit $exec
58+
S_WAITCNT_EXPCNT $sgpr_null, 0
59+
60+
bb.2:
61+
S_ENDPGM 0
62+
...
63+
64+
---
65+
name: skip_waitcnt_vmcnt
66+
body: |
67+
; CHECK-LABEL: name: skip_waitcnt_vmcnt
68+
; CHECK: bb.0:
69+
; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
70+
; CHECK-NEXT: {{ $}}
71+
; CHECK-NEXT: S_CBRANCH_EXECZ %bb.2, implicit $exec
72+
; CHECK-NEXT: {{ $}}
73+
; CHECK-NEXT: bb.1:
74+
; CHECK-NEXT: successors: %bb.2(0x80000000)
75+
; CHECK-NEXT: {{ $}}
76+
; CHECK-NEXT: V_NOP_e32 implicit $exec
77+
; CHECK-NEXT: S_WAITCNT_VMCNT $sgpr_null, 0
78+
; CHECK-NEXT: {{ $}}
79+
; CHECK-NEXT: bb.2:
80+
; CHECK-NEXT: S_ENDPGM 0
81+
bb.0:
82+
successors: %bb.1, %bb.2
83+
S_CBRANCH_EXECZ %bb.2, implicit $exec
84+
85+
bb.1:
86+
successors: %bb.2
87+
V_NOP_e32 implicit $exec
88+
S_WAITCNT_VMCNT $sgpr_null, 0
89+
90+
bb.2:
91+
S_ENDPGM 0
92+
...
93+
94+
---
95+
name: skip_waitcnt_lgkmcnt
96+
body: |
97+
; CHECK-LABEL: name: skip_waitcnt_lgkmcnt
98+
; CHECK: bb.0:
99+
; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
100+
; CHECK-NEXT: {{ $}}
101+
; CHECK-NEXT: S_CBRANCH_EXECZ %bb.2, implicit $exec
102+
; CHECK-NEXT: {{ $}}
103+
; CHECK-NEXT: bb.1:
104+
; CHECK-NEXT: successors: %bb.2(0x80000000)
105+
; CHECK-NEXT: {{ $}}
106+
; CHECK-NEXT: V_NOP_e32 implicit $exec
107+
; CHECK-NEXT: S_WAITCNT_LGKMCNT $sgpr_null, 0
108+
; CHECK-NEXT: {{ $}}
109+
; CHECK-NEXT: bb.2:
110+
; CHECK-NEXT: S_ENDPGM 0
111+
bb.0:
112+
successors: %bb.1, %bb.2
113+
S_CBRANCH_EXECZ %bb.2, implicit $exec
114+
115+
bb.1:
116+
successors: %bb.2
117+
V_NOP_e32 implicit $exec
118+
S_WAITCNT_LGKMCNT $sgpr_null, 0
119+
120+
bb.2:
121+
S_ENDPGM 0
122+
...
123+
124+
---
125+
name: skip_wait_idle
126+
body: |
127+
; CHECK-LABEL: name: skip_wait_idle
128+
; CHECK: bb.0:
129+
; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
130+
; CHECK-NEXT: {{ $}}
131+
; CHECK-NEXT: S_CBRANCH_EXECZ %bb.2, implicit $exec
132+
; CHECK-NEXT: {{ $}}
133+
; CHECK-NEXT: bb.1:
134+
; CHECK-NEXT: successors: %bb.2(0x80000000)
135+
; CHECK-NEXT: {{ $}}
136+
; CHECK-NEXT: V_NOP_e32 implicit $exec
137+
; CHECK-NEXT: S_WAIT_IDLE
138+
; CHECK-NEXT: {{ $}}
139+
; CHECK-NEXT: bb.2:
140+
; CHECK-NEXT: S_ENDPGM 0
141+
bb.0:
142+
successors: %bb.1, %bb.2
143+
S_CBRANCH_EXECZ %bb.2, implicit $exec
144+
145+
bb.1:
146+
successors: %bb.2
147+
V_NOP_e32 implicit $exec
148+
S_WAIT_IDLE
149+
150+
bb.2:
151+
S_ENDPGM 0
152+
...
153+
154+
---
155+
name: skip_bvh
156+
body: |
157+
; CHECK-LABEL: name: skip_bvh
158+
; CHECK: bb.0:
159+
; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
160+
; CHECK-NEXT: {{ $}}
161+
; CHECK-NEXT: S_CBRANCH_EXECZ %bb.2, implicit $exec
162+
; CHECK-NEXT: {{ $}}
163+
; CHECK-NEXT: bb.1:
164+
; CHECK-NEXT: successors: %bb.2(0x80000000)
165+
; CHECK-NEXT: {{ $}}
166+
; CHECK-NEXT: V_NOP_e32 implicit $exec
167+
; CHECK-NEXT: $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14 = IMPLICIT_DEF
168+
; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = IMPLICIT_DEF
169+
; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_BVH_INTERSECT_RAY_sa_gfx11 $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14, renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, implicit $exec :: (dereferenceable load (s128), addrspace 7)
170+
; CHECK-NEXT: {{ $}}
171+
; CHECK-NEXT: bb.2:
172+
; CHECK-NEXT: S_ENDPGM 0
173+
bb.0:
174+
successors: %bb.1, %bb.2
175+
S_CBRANCH_EXECZ %bb.2, implicit $exec
176+
177+
bb.1:
178+
successors: %bb.2
179+
V_NOP_e32 implicit $exec
180+
$vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14 = IMPLICIT_DEF
181+
$sgpr0_sgpr1_sgpr2_sgpr3 = IMPLICIT_DEF
182+
$vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_BVH_INTERSECT_RAY_sa_gfx11 $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14, renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, implicit $exec :: (dereferenceable load (s128), addrspace 7)
183+
184+
bb.2:
185+
S_ENDPGM 0
186+
...

0 commit comments

Comments
 (0)