Skip to content

Commit 4c5074e

Browse files
authored
Add AMD stage support to /rerun-stage command and fix related bugs (#14463)
1 parent 41429a8 commit 4c5074e

File tree

3 files changed

+282
-112
lines changed

3 files changed

+282
-112
lines changed

.github/workflows/pr-test-amd.yml

Lines changed: 106 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,12 @@ on:
2020
- "sgl-kernel/**"
2121
- ".github/workflows/pr-test-amd.yml"
2222
workflow_dispatch:
23+
inputs:
24+
target_stage:
25+
description: "Specific stage to run (optional, for quick testing)"
26+
required: false
27+
type: string
28+
default: ""
2329

2430
concurrency:
2531
group: pr-test-amd-${{ github.ref }}
@@ -54,7 +60,15 @@ jobs:
5460
# =============================================== sgl-kernel ====================================================
5561
sgl-kernel-unit-test-amd:
5662
needs: [check-changes]
57-
if: needs.check-changes.outputs.sgl_kernel == 'true'
63+
if: |
64+
always() &&
65+
(
66+
(inputs.target_stage == 'sgl-kernel-unit-test-amd') ||
67+
(
68+
!inputs.target_stage &&
69+
needs.check-changes.outputs.sgl_kernel == 'true'
70+
)
71+
)
5872
strategy:
5973
fail-fast: false
6074
matrix:
@@ -90,8 +104,16 @@ jobs:
90104

91105
stage-a-test-1-amd:
92106
needs: [check-changes]
93-
if: always() && !failure() && !cancelled() &&
94-
((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true'))
107+
if: |
108+
always() &&
109+
(
110+
(inputs.target_stage == 'stage-a-test-1-amd') ||
111+
(
112+
!inputs.target_stage &&
113+
(!failure() && !cancelled()) &&
114+
((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true'))
115+
)
116+
)
95117
strategy:
96118
fail-fast: false
97119
matrix:
@@ -120,8 +142,16 @@ jobs:
120142
121143
unit-test-backend-1-gpu-amd:
122144
needs: [check-changes, stage-a-test-1-amd]
123-
if: always() && !failure() && !cancelled() &&
124-
((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true'))
145+
if: |
146+
always() &&
147+
(
148+
(inputs.target_stage == 'unit-test-backend-1-gpu-amd') ||
149+
(
150+
!inputs.target_stage &&
151+
(!failure() && !cancelled()) &&
152+
((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true'))
153+
)
154+
)
125155
strategy:
126156
fail-fast: false
127157
matrix:
@@ -150,8 +180,16 @@ jobs:
150180
151181
unit-test-backend-2-gpu-amd:
152182
needs: [check-changes, stage-a-test-1-amd]
153-
if: always() && !failure() && !cancelled() &&
154-
((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true'))
183+
if: |
184+
always() &&
185+
(
186+
(inputs.target_stage == 'unit-test-backend-2-gpu-amd') ||
187+
(
188+
!inputs.target_stage &&
189+
(!failure() && !cancelled()) &&
190+
((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true'))
191+
)
192+
)
155193
strategy:
156194
fail-fast: false
157195
matrix:
@@ -181,8 +219,17 @@ jobs:
181219
unit-test-backend-8-gpu-amd:
182220
needs: [check-changes, unit-test-backend-2-gpu-amd]
183221
# Temporarily disabled - uncomment when ready to re-enable
184-
if: false && always() && !failure() && !cancelled() &&
185-
((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true'))
222+
if: |
223+
always() &&
224+
(
225+
(inputs.target_stage == 'unit-test-backend-8-gpu-amd') ||
226+
(
227+
false &&
228+
!inputs.target_stage &&
229+
(!failure() && !cancelled()) &&
230+
((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true'))
231+
)
232+
)
186233
env:
187234
RUNNER_LABELS: linux-mi300-gpu-8
188235
strategy:
@@ -219,8 +266,16 @@ jobs:
219266
220267
performance-test-1-gpu-part-1-amd:
221268
needs: [check-changes, stage-a-test-1-amd]
222-
if: always() && !failure() && !cancelled() &&
223-
((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true'))
269+
if: |
270+
always() &&
271+
(
272+
(inputs.target_stage == 'performance-test-1-gpu-part-1-amd') ||
273+
(
274+
!inputs.target_stage &&
275+
(!failure() && !cancelled()) &&
276+
((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true'))
277+
)
278+
)
224279
strategy:
225280
fail-fast: false
226281
matrix:
@@ -264,8 +319,16 @@ jobs:
264319
265320
performance-test-1-gpu-part-2-amd:
266321
needs: [check-changes, stage-a-test-1-amd]
267-
if: always() && !failure() && !cancelled() &&
268-
((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true'))
322+
if: |
323+
always() &&
324+
(
325+
(inputs.target_stage == 'performance-test-1-gpu-part-2-amd') ||
326+
(
327+
!inputs.target_stage &&
328+
(!failure() && !cancelled()) &&
329+
((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true'))
330+
)
331+
)
269332
strategy:
270333
fail-fast: false
271334
matrix:
@@ -303,8 +366,16 @@ jobs:
303366
304367
performance-test-2-gpu-amd:
305368
needs: [check-changes, unit-test-backend-2-gpu-amd]
306-
if: always() && !failure() && !cancelled() &&
307-
((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true'))
369+
if: |
370+
always() &&
371+
(
372+
(inputs.target_stage == 'performance-test-2-gpu-amd') ||
373+
(
374+
!inputs.target_stage &&
375+
(!failure() && !cancelled()) &&
376+
((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true'))
377+
)
378+
)
308379
strategy:
309380
fail-fast: false
310381
matrix:
@@ -352,8 +423,16 @@ jobs:
352423
353424
accuracy-test-1-gpu-amd:
354425
needs: [check-changes, stage-a-test-1-amd]
355-
if: always() && !failure() && !cancelled() &&
356-
((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true'))
426+
if: |
427+
always() &&
428+
(
429+
(inputs.target_stage == 'accuracy-test-1-gpu-amd') ||
430+
(
431+
!inputs.target_stage &&
432+
(!failure() && !cancelled()) &&
433+
((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true'))
434+
)
435+
)
357436
strategy:
358437
fail-fast: false
359438
matrix:
@@ -383,8 +462,16 @@ jobs:
383462
384463
accuracy-test-2-gpu-amd:
385464
needs: [check-changes, accuracy-test-1-gpu-amd]
386-
if: always() && !failure() && !cancelled() &&
387-
((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true'))
465+
if: |
466+
always() &&
467+
(
468+
(inputs.target_stage == 'accuracy-test-2-gpu-amd') ||
469+
(
470+
!inputs.target_stage &&
471+
(!failure() && !cancelled()) &&
472+
((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true'))
473+
)
474+
)
388475
strategy:
389476
fail-fast: false
390477
matrix:

0 commit comments

Comments
 (0)