PR Test #622
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: PR Test | |
| # Dynamic run-name for /rerun-stage commands to enable URL lookup | |
| # Format: "[stage-name] sha" for fork PRs, "[stage-name]" for non-fork, default for normal runs | |
| run-name: ${{ inputs.target_stage && (inputs.pr_head_sha && format('[{0}] {1}', inputs.target_stage, inputs.pr_head_sha) || format('[{0}]', inputs.target_stage)) || '' }} | |
| on: | |
| schedule: | |
| - cron: '0 1,9,17 * * *' # Run 3x daily: 2am / 10am / 6pm Pacific (PDT) | |
| pull_request: | |
| branches: [main] | |
| workflow_dispatch: | |
| inputs: | |
| target_stage: | |
| description: "Specific stage to run (optional, for quick testing)" | |
| required: false | |
| type: string | |
| default: "" | |
| force_continue_on_error: | |
| description: "Force continue-on-error (test scheduled CI behavior)" | |
| required: false | |
| type: boolean | |
| default: false | |
| pr_head_sha: | |
| description: "PR head SHA to checkout (for /rerun-stage on fork PRs)" | |
| required: false | |
| type: string | |
| default: "" | |
| include_wheel_build: | |
| description: "When set with target_stage, also run sgl-kernel-build-wheels so the target stage uses the freshly-built kernel (for /rerun-stage on PRs that modify sgl-kernel/)" | |
| required: false | |
| type: boolean | |
| default: false | |
| test_parallel_dispatch: | |
| description: "Test parallel dispatch behavior (simulates scheduled run)" | |
| required: false | |
| type: boolean | |
| default: false | |
| workflow_call: | |
| inputs: | |
| git_ref: | |
| description: 'Git ref (branch, tag, or SHA) to test. If not provided, uses the default branch.' | |
| required: false | |
| type: string | |
| default: '' | |
| run_all_tests: | |
| description: "Run all tests (for releasing or testing purpose)" | |
| required: false | |
| type: boolean | |
| default: false | |
| skip_stage_health_check: | |
| description: "Skip stage health check fast-fail (e.g. for release branch cuts)" | |
| required: false | |
| type: boolean | |
| default: false | |
| concurrency: | |
| # Concurrency group structure: pr-test-{event}-{branch}-{pr_sha}-{stage} | |
| # - event_name prevents scheduled runs from colliding with fork PRs whose branch is named 'main' | |
| # (without it, both resolve the branch segment to 'main' and block each other) | |
| # - github.head_ref (pull_request) or github.ref_name (workflow_dispatch) normalizes to branch name | |
| # - pr_head_sha isolates /rerun-stage from main branch runs | |
| # - target_stage allows parallel stage dispatches to run independently | |
| group: pr-test-${{ github.event_name }}-${{ github.head_ref || github.ref_name || 'default' }}-${{ inputs.pr_head_sha || 'current' }}-${{ inputs.target_stage || inputs.git_ref || 'all' }} | |
| cancel-in-progress: ${{ github.event_name != 'workflow_call' }} | |
| env: | |
| SGLANG_IS_IN_CI: true | |
| SGLANG_CUDA_COREDUMP: "1" | |
| SGLANG_JIT_DEEPGEMM_FAST_WARMUP: true | |
| SKIP_STAGE_HEALTH_CHECK: ${{ inputs.skip_stage_health_check == true && 'true' || 'false' }} | |
| # Schedule / main-branch dispatch / workflow_call from main use refs/heads/main; PR events use refs/pull/*/merge | |
| PR_TEST_BYPASS_MAINTENANCE_ON_MAIN: ${{ github.ref == 'refs/heads/main' && 'true' || 'false' }} | |
| USE_VENV: false | |
| permissions: | |
| actions: write | |
| contents: read | |
| issues: read | |
| pull-requests: read | |
| jobs: | |
| # =============================================== check changes ==================================================== | |
| check-changes: | |
| runs-on: ubuntu-latest | |
| outputs: | |
| # Use API-based detection for target_stage mode (filter-api), otherwise use dorny/paths-filter (filter) | |
| main_package: ${{ steps.filter-api.outputs.main_package || steps.filter.outputs.main_package || steps.run-mode.outputs.run_all_tests }} | |
| # sgl_kernel is forced to false when target_stage is set AND include_wheel_build is NOT set, | |
| # since sgl-kernel-build-wheels normally skips in target_stage mode. When include_wheel_build | |
| # is true, keep the real value so the wheel build runs and the target stage downloads its | |
| # artifact (used by /rerun-stage on PRs that modify sgl-kernel/). | |
| # This prevents CUSTOM_BUILD_SGL_KERNEL=true when the wheel artifacts aren't available. | |
| # Note: If PR has kernel changes AND target_stage is set AND include_wheel_build is NOT set, | |
| # the validate-target-stage step will fail. | |
| sgl_kernel: ${{ (!inputs.target_stage || inputs.include_wheel_build) && (steps.filter-api.outputs.sgl_kernel || steps.filter.outputs.sgl_kernel) }} | |
| # Raw sgl_kernel value before target_stage override (used for validation) | |
| sgl_kernel_raw: ${{ steps.filter-api.outputs.sgl_kernel || steps.filter.outputs.sgl_kernel }} | |
| jit_kernel: ${{ steps.filter-api.outputs.jit_kernel || steps.filter.outputs.jit_kernel || steps.run-mode.outputs.run_all_tests }} | |
| multimodal_gen: ${{ steps.filter-api.outputs.multimodal_gen || steps.filter.outputs.multimodal_gen || steps.run-mode.outputs.run_all_tests }} | |
| max_parallel: ${{ steps.set-parallel.outputs.max_parallel }} | |
| max_parallel_small: ${{ steps.set-parallel.outputs.max_parallel_small }} | |
| max_parallel_2gpu: ${{ steps.set-parallel.outputs.max_parallel_2gpu }} | |
| b200_runner: ${{ steps.set-runner.outputs.b200_runner }} | |
| b200_low_disk_runner: ${{ steps.set-runner.outputs.b200_low_disk_runner }} | |
| enable_retry: ${{ steps.set-retry.outputs.enable_retry }} | |
| continue_on_error: ${{ steps.set-continue-on-error.outputs.continue_on_error }} | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| with: | |
| ref: ${{ inputs.pr_head_sha || inputs.git_ref || github.sha }} | |
| - uses: ./.github/actions/check-maintenance | |
| - name: Determine run mode | |
| id: run-mode | |
| run: | | |
| # Run all tests for scheduled runs and workflow_call (when ref input is provided) | |
| # Note: github.event_name is inherited from caller, so we detect workflow_call by checking inputs.git_ref | |
| if [[ "${{ github.event_name }}" == "schedule" || "${{ inputs.run_all_tests }}" == "true" ]]; then | |
| echo "run_all_tests=true" >> $GITHUB_OUTPUT | |
| echo "Run mode: ALL TESTS (schedule=${{ github.event_name == 'schedule' }}, run_all_tests=${{ inputs.run_all_tests }})" | |
| else | |
| echo "run_all_tests=false" >> $GITHUB_OUTPUT | |
| echo "Run mode: FILTERED (triggered by ${{ github.event_name }})" | |
| fi | |
| - name: Detect file changes | |
| id: filter | |
| uses: dorny/paths-filter@v3 | |
| # Only use paths-filter for pull_request events (where it works correctly) | |
| # For workflow_dispatch with target_stage, we use GitHub API in the next step | |
| if: steps.run-mode.outputs.run_all_tests != 'true' && !inputs.target_stage | |
| with: | |
| filters: | | |
| main_package: | |
| - ".github/workflows/pr-test.yml" | |
| - ".github/workflows/pr-gate.yml" | |
| - ".github/actions/**" | |
| - "python/pyproject.toml" | |
| - "python/sglang/!(multimodal_gen|jit_kernel/diffusion|jit_kernel/tests/diffusion|jit_kernel/benchmark/diffusion|cli)/**/!(*.md)" | |
| - "scripts/ci/cuda/*" | |
| - "scripts/ci/utils/*" | |
| - "test/**/!(*.md)" | |
| multimodal_gen: | |
| - ".github/workflows/pr-test.yml" | |
| - ".github/workflows/pr-test-multimodal-gen.yml" | |
| - "python/pyproject.toml" | |
| - "python/sglang/multimodal_gen/**/!(*.md|*.ipynb)" | |
| - "python/sglang/jit_kernel/diffusion/**" | |
| - "python/sglang/jit_kernel/tests/diffusion/**" | |
| - "python/sglang/jit_kernel/benchmark/diffusion/**" | |
| - "python/sglang/cli/**" | |
| jit_kernel: | |
| - ".github/workflows/pr-test.yml" | |
| - ".github/workflows/pr-test-jit-kernel.yml" | |
| - "python/pyproject.toml" | |
| - "python/sglang/jit_kernel/**" | |
| sgl_kernel: | |
| - ".github/workflows/pr-test-sgl-kernel.yml" | |
| - "sgl-kernel/**/!(*.md|THIRDPARTYNOTICES.txt|LICENSE)" | |
| # For /rerun-stage (workflow_dispatch with target_stage), dorny/paths-filter doesn't work | |
| # correctly because it falls back to "last commit" detection which breaks for merge commits. | |
| # Instead, we use the GitHub API to compare the PR commit against main. | |
| - name: Detect file changes via API (for target_stage) | |
| id: filter-api | |
| if: inputs.target_stage && inputs.pr_head_sha | |
| env: | |
| GH_TOKEN: ${{ github.token }} | |
| run: | | |
| echo "Detecting file changes via GitHub API for target_stage mode..." | |
| echo "PR head SHA: ${{ inputs.pr_head_sha }}" | |
| # Get the list of changed files by comparing PR commit against main | |
| # This correctly handles merge commits by looking at the actual PR diff | |
| CHANGED_FILES=$(gh api "repos/${{ github.repository }}/compare/main...${{ inputs.pr_head_sha }}" \ | |
| --jq '[.files[].filename] | .[]' 2>/dev/null || echo "") | |
| if [ -z "$CHANGED_FILES" ]; then | |
| echo "Warning: Could not fetch changed files from API, assuming no changes" | |
| echo "sgl_kernel=false" >> $GITHUB_OUTPUT | |
| echo "main_package=false" >> $GITHUB_OUTPUT | |
| echo "jit_kernel=false" >> $GITHUB_OUTPUT | |
| echo "multimodal_gen=false" >> $GITHUB_OUTPUT | |
| exit 0 | |
| fi | |
| echo "Changed files:" | |
| echo "$CHANGED_FILES" | head -20 | |
| echo "..." | |
| # Check for sgl-kernel changes | |
| if echo "$CHANGED_FILES" | grep -qE "^(sgl-kernel/|\.github/workflows/pr-test-sgl-kernel\.yml)"; then | |
| echo "sgl_kernel=true" >> $GITHUB_OUTPUT | |
| echo "Detected sgl-kernel changes" | |
| else | |
| echo "sgl_kernel=false" >> $GITHUB_OUTPUT | |
| fi | |
| # Check for main_package changes (excluding multimodal_gen, jit_kernel/diffusion, jit_kernel/tests/diffusion, jit_kernel/benchmark/diffusion, cli) | |
| # Note: Need to filter out multimodal_gen and diffusion-related paths before checking, not pipe grep -q output | |
| MAIN_PKG_FILES=$(echo "$CHANGED_FILES" | grep -E "^(python/sglang/|python/pyproject\.toml|scripts/ci/cuda/|scripts/ci/utils/|test/|\.github/workflows/pr-test\.yml|\.github/workflows/pr-gate\.yml|\.github/actions/)" | grep -v -E "^(python/sglang/multimodal_gen/|python/sglang/jit_kernel/diffusion/|python/sglang/jit_kernel/tests/diffusion/|python/sglang/jit_kernel/benchmark/diffusion/|python/sglang/cli/)" || true) | |
| if [ -n "$MAIN_PKG_FILES" ]; then | |
| echo "main_package=true" >> $GITHUB_OUTPUT | |
| echo "Detected main_package changes" | |
| else | |
| echo "main_package=false" >> $GITHUB_OUTPUT | |
| fi | |
| # Check for jit_kernel changes | |
| if echo "$CHANGED_FILES" | grep -qE "^(python/sglang/jit_kernel/|python/pyproject\.toml|\.github/workflows/pr-test\.yml|\.github/workflows/pr-test-jit-kernel\.yml)"; then | |
| echo "jit_kernel=true" >> $GITHUB_OUTPUT | |
| echo "Detected jit_kernel changes" | |
| else | |
| echo "jit_kernel=false" >> $GITHUB_OUTPUT | |
| fi | |
| # Check for multimodal_gen changes, including diffusion-specific jit_kernel coverage | |
| if echo "$CHANGED_FILES" | grep -qE "^(python/sglang/multimodal_gen/|python/sglang/cli/|python/sglang/jit_kernel/diffusion/|python/sglang/jit_kernel/tests/diffusion/|python/sglang/jit_kernel/benchmark/diffusion/|python/pyproject\.toml|\.github/workflows/pr-test\.yml|\.github/workflows/pr-test-multimodal-gen\.yml)"; then | |
| echo "multimodal_gen=true" >> $GITHUB_OUTPUT | |
| echo "Detected multimodal_gen changes" | |
| else | |
| echo "multimodal_gen=false" >> $GITHUB_OUTPUT | |
| fi | |
| - name: Set max-parallel based on run type | |
| id: set-parallel | |
| env: | |
| GH_TOKEN: ${{ github.token }} | |
| run: | | |
| # Determine if this run gets full parallelism (scheduled / high priority) | |
| FULL=false | |
| if [[ "${{ github.event_name }}" == "schedule" ]]; then | |
| FULL=true | |
| echo "Scheduled run detected, using full parallelism" | |
| elif [[ "${{ github.event_name }}" == "pull_request" && "${{ contains(github.event.pull_request.labels.*.name, 'high priority') }}" == "true" ]]; then | |
| FULL=true | |
| echo "High priority PR detected, using full parallelism" | |
| elif [[ -n "${{ inputs.target_stage }}" ]]; then | |
| # /rerun-stage (workflow_dispatch): query PR labels via GitHub API | |
| # Try SHA lookup first (fork PRs), fallback to branch name (non-fork PRs) | |
| LABELS="" | |
| PR_HEAD_SHA="${{ inputs.pr_head_sha }}" | |
| if [[ -n "$PR_HEAD_SHA" ]]; then | |
| LABELS=$(gh api "repos/${{ github.repository }}/commits/${PR_HEAD_SHA}/pulls" \ | |
| --jq '.[0].labels[].name' 2>/dev/null || true) | |
| fi | |
| if [[ -z "$LABELS" ]]; then | |
| LABELS=$(gh pr list --head "${{ github.ref_name }}" --repo "${{ github.repository }}" \ | |
| --json labels --jq '.[0].labels[].name' 2>/dev/null || true) | |
| fi | |
| echo "PR labels: ${LABELS:-"(none)"}" | |
| if echo "$LABELS" | grep -Fxq "high priority"; then | |
| FULL=true | |
| echo "High priority PR detected via API (/rerun-stage), using full parallelism" | |
| fi | |
| fi | |
| # Set max-parallel for each runner type | |
| # 1-gpu-h100: 14 partitions, 1-gpu-5090: 8 partitions, 2-gpu-h100: 4 partitions | |
| if [[ "$FULL" == "true" ]]; then | |
| LEVEL=full | |
| echo "max_parallel=14" >> $GITHUB_OUTPUT | |
| echo "max_parallel_small=8" >> $GITHUB_OUTPUT | |
| echo "max_parallel_2gpu=4" >> $GITHUB_OUTPUT | |
| else | |
| LEVEL=low | |
| echo "max_parallel=3" >> $GITHUB_OUTPUT | |
| echo "max_parallel_small=3" >> $GITHUB_OUTPUT | |
| echo "max_parallel_2gpu=2" >> $GITHUB_OUTPUT | |
| fi | |
| echo "parallel_level=$LEVEL" >> $GITHUB_OUTPUT | |
| echo "Parallelism level: $LEVEL" | |
| - name: Set B200 runner tag | |
| id: set-runner | |
| run: | | |
| # Use kernel-build runner only when sgl_kernel changes are detected AND we're not in target_stage mode | |
| # (target_stage skips wheel builds, so we can't use custom kernels) | |
| # Use API-based detection (filter-api) for target_stage mode, otherwise use dorny/paths-filter (filter) | |
| sgl_kernel="${{ steps.filter-api.outputs.sgl_kernel || steps.filter.outputs.sgl_kernel }}" | |
| target_stage="${{ inputs.target_stage }}" | |
| if [[ "$sgl_kernel" == "true" && -z "$target_stage" ]]; then | |
| echo "b200_runner=4-gpu-b200-kernel" >> $GITHUB_OUTPUT | |
| echo "b200_low_disk_runner=4-gpu-b200-kernel-low-disk" >> $GITHUB_OUTPUT | |
| else | |
| echo "b200_runner=4-gpu-b200" >> $GITHUB_OUTPUT | |
| echo "b200_low_disk_runner=4-gpu-b200-low-disk" >> $GITHUB_OUTPUT | |
| fi | |
| - name: Enable retry for CI | |
| id: set-retry | |
| run: | | |
| echo "enable_retry=true" >> $GITHUB_OUTPUT | |
| echo "Retry logic enabled for CI" | |
| - name: Set continue-on-error for full test runs | |
| id: set-continue-on-error | |
| run: | | |
| if [[ "${{ steps.run-mode.outputs.run_all_tests }}" == "true" || "${{ inputs.force_continue_on_error }}" == "true" ]]; then | |
| echo "continue_on_error=true" >> $GITHUB_OUTPUT | |
| echo "Full test run or force flag detected, enabling continue-on-error to run all tests" | |
| else | |
| echo "continue_on_error=false" >> $GITHUB_OUTPUT | |
| echo "Filtered run, continue-on-error disabled" | |
| fi | |
| - name: Validate target_stage with kernel changes | |
| # Fail only when PR has sgl-kernel changes AND the caller didn't opt into include_wheel_build. | |
| # include_wheel_build=true means sgl-kernel-build-wheels will run alongside the target stage | |
| # (see the sgl_kernel output and sgl-kernel-build-wheels if-conditions above/below), so it's | |
| # safe to proceed. | |
| if: inputs.target_stage && !inputs.include_wheel_build && (steps.filter-api.outputs.sgl_kernel == 'true' || steps.filter.outputs.sgl_kernel == 'true') | |
| run: | | |
| echo "::error::Cannot use /rerun-stage when PR has sgl-kernel changes without include_wheel_build." | |
| echo "::error::The sgl-kernel-build-wheels job is skipped in target_stage mode by default, but this PR modifies sgl-kernel/ files." | |
| echo "::error::The slash-command handler should have set include_wheel_build=true automatically; falling back to /tag-and-rerun-ci." | |
| echo "" | |
| echo "ERROR: Cannot use /rerun-stage when PR has sgl-kernel changes without include_wheel_build." | |
| echo "" | |
| echo "This PR modifies files in sgl-kernel/, which requires building custom kernel wheels." | |
| echo "Running the target stage without rebuilding the kernel would use the wrong (PyPI)" | |
| echo "version of sgl-kernel instead of your changes." | |
| echo "" | |
| echo "The /rerun-stage handler sets include_wheel_build=true automatically when it detects" | |
| echo "sgl-kernel/ changes on the PR. If you see this error, the handler may be outdated." | |
| echo "" | |
| echo "Alternatives:" | |
| echo " /tag-and-rerun-ci - Re-run the full workflow including kernel builds" | |
| echo " /rerun-ci - Re-run the full workflow" | |
| echo "" | |
| exit 1 | |
| - name: Show filter results in summary (table) | |
| run: | | |
| { | |
| echo "## Change Detection" | |
| echo "" | |
| echo "| Component | Changed |" | |
| echo "|----------------------|---------|" | |
| echo "| main_package | ${{ steps.filter-api.outputs.main_package || steps.filter.outputs.main_package || steps.run-mode.outputs.run_all_tests }} |" | |
| echo "| sgl_kernel (raw) | ${{ steps.filter-api.outputs.sgl_kernel || steps.filter.outputs.sgl_kernel }} |" | |
| echo "| sgl_kernel (used) | ${{ (!inputs.target_stage || inputs.include_wheel_build) && (steps.filter-api.outputs.sgl_kernel || steps.filter.outputs.sgl_kernel) }} |" | |
| echo "| jit_kernel | ${{ steps.filter-api.outputs.jit_kernel || steps.filter.outputs.jit_kernel || steps.run-mode.outputs.run_all_tests }} |" | |
| echo "| multimodal_gen | ${{ steps.filter-api.outputs.multimodal_gen || steps.filter.outputs.multimodal_gen || steps.run-mode.outputs.run_all_tests }} |" | |
| echo "| target_stage | ${{ inputs.target_stage || '(none)' }} |" | |
| echo "| detection_method | ${{ inputs.target_stage && 'GitHub API' || 'dorny/paths-filter' }} |" | |
| echo "| max_parallel | ${{ steps.set-parallel.outputs.parallel_level }} (h100=${{ steps.set-parallel.outputs.max_parallel }}, 5090=${{ steps.set-parallel.outputs.max_parallel_small }}, 2gpu=${{ steps.set-parallel.outputs.max_parallel_2gpu }}) |" | |
| echo "| b200_runner | ${{ steps.set-runner.outputs.b200_runner }} |" | |
| echo "| b200_low_disk_runner | ${{ steps.set-runner.outputs.b200_low_disk_runner }} |" | |
| echo "| enable_retry | ${{ steps.set-retry.outputs.enable_retry }} |" | |
| echo "| continue_on_error | ${{ steps.set-continue-on-error.outputs.continue_on_error }} |" | |
| } >> $GITHUB_STEP_SUMMARY | |
| # =============================================== Wait Jobs for Sequential PR Execution ==================================================== | |
| # These jobs poll GitHub API to wait for previous stages to complete. | |
| # For PR runs: wait jobs run and enforce sequential execution via polling. | |
| # For scheduled runs: wait jobs are skipped, enabling parallel execution for easier retry. | |
| wait-for-stage-a: | |
| needs: [check-changes, call-gate] | |
| if: | | |
| always() && | |
| !cancelled() && | |
| github.event_name == 'pull_request' && | |
| !inputs.target_stage && | |
| inputs.test_parallel_dispatch != true && | |
| (needs.check-changes.outputs.main_package == 'true' || needs.check-changes.outputs.sgl_kernel == 'true') && | |
| (needs.call-gate.result == 'success' || needs.call-gate.result == 'skipped') | |
| runs-on: ubuntu-latest | |
| outputs: | |
| stage_a_result: ${{ steps.wait.outputs.result }} | |
| steps: | |
| - uses: actions/checkout@v4 | |
| - uses: ./.github/actions/check-maintenance | |
| - uses: ./.github/actions/wait-for-jobs | |
| id: wait | |
| with: | |
| stage-name: stage-a | |
| jobs: '["stage-a-test-1-gpu-small", {"prefix": "stage-a-test-cpu", "expected_count": 4}]' | |
| max-wait-minutes: '240' | |
| wait-for-stage-b: | |
| needs: [check-changes, call-gate, wait-for-stage-a] | |
| if: | | |
| always() && | |
| !cancelled() && | |
| github.event_name == 'pull_request' && | |
| !inputs.target_stage && | |
| inputs.test_parallel_dispatch != true && | |
| (needs.check-changes.outputs.main_package == 'true' || needs.check-changes.outputs.sgl_kernel == 'true') && | |
| (needs.wait-for-stage-a.result == 'success' || needs.wait-for-stage-a.result == 'skipped') && | |
| (needs.call-gate.result == 'success' || needs.call-gate.result == 'skipped') | |
| runs-on: ubuntu-latest | |
| outputs: | |
| stage_b_result: ${{ steps.wait.outputs.result }} | |
| steps: | |
| - uses: actions/checkout@v4 | |
| - uses: ./.github/actions/check-maintenance | |
| - uses: ./.github/actions/wait-for-jobs | |
| id: wait | |
| with: | |
| stage-name: stage-b | |
| jobs: | | |
| [ | |
| {"prefix": "stage-b-test-1-gpu-small", "expected_count": 8}, | |
| {"prefix": "stage-b-test-1-gpu-large", "expected_count": 14}, | |
| {"prefix": "stage-b-test-2-gpu-large", "expected_count": 4}, | |
| {"prefix": "stage-b-test-4-gpu-b200", "expected_count": 1} | |
| ] | |
| max-wait-minutes: '480' | |
| # =============================================== PR Gate ==================================================== | |
| call-gate: | |
| needs: check-changes | |
| # Skip for scheduled runs (they run all tests) and when target_stage is specified | |
| if: | | |
| github.event_name != 'schedule' && | |
| inputs.test_parallel_dispatch != true && | |
| !inputs.target_stage && | |
| ( | |
| needs.check-changes.outputs.main_package == 'true' || | |
| needs.check-changes.outputs.sgl_kernel == 'true' || | |
| needs.check-changes.outputs.jit_kernel == 'true' || | |
| needs.check-changes.outputs.multimodal_gen == 'true' | |
| ) | |
| uses: ./.github/workflows/pr-gate.yml | |
| secrets: inherit | |
| # =============================================== sgl-kernel ==================================================== | |
| sgl-kernel-build-wheels: | |
| needs: [check-changes, call-gate] | |
| # Skip for scheduled runs (they run stages independently). Runs in target_stage mode only when | |
| # include_wheel_build is true (i.e. /rerun-stage on a PR with sgl-kernel changes), so the | |
| # target stage can download the freshly-built wheel. | |
| # | |
| # `always()` lets us run when call-gate is skipped (which it always is in target_stage mode by | |
| # design). The explicit needs.<x>.result checks preserve old gating for the normal PR path. | |
| if: | | |
| always() && | |
| github.event_name != 'schedule' && | |
| inputs.test_parallel_dispatch != true && | |
| needs.check-changes.result == 'success' && | |
| needs.check-changes.outputs.sgl_kernel == 'true' && | |
| ( | |
| (!inputs.target_stage && needs.call-gate.result == 'success') || | |
| (inputs.target_stage && inputs.include_wheel_build) | |
| ) | |
| runs-on: x64-kernel-build-node | |
| timeout-minutes: 240 | |
| strategy: | |
| matrix: | |
| include: | |
| - python-version: "3.10" | |
| cuda-version: "13.0" | |
| - python-version: "3.10" | |
| cuda-version: "12.9" | |
| name: Build Wheel | |
| steps: | |
| - name: Cleanup | |
| run: | | |
| sudo rm -rf $GITHUB_WORKSPACE/* || true | |
| - uses: actions/checkout@v4 | |
| with: | |
| submodules: "recursive" | |
| ref: ${{ inputs.pr_head_sha || inputs.git_ref || github.sha }} | |
| - uses: ./.github/actions/check-maintenance | |
| - name: Set up Python ${{ matrix.python-version }} | |
| uses: actions/setup-python@v5 | |
| with: | |
| python-version: ${{ matrix.python-version }} | |
| - name: Free Docker disk space | |
| run: | | |
| set -x | |
| # build.sh retags sgl-kernel-deps:cuda${CUDA_VERSION}-${PY_TAG}-${ARCH} | |
| # on every run, leaving the previous image as a dangling <none>:<none> | |
| # entry (~16-23 GB each). Prune them before building so the runner | |
| # doesn't fill up. The local buildx cache at ~/.cache/sgl-kernel/buildx | |
| # and the tagged sgl-kernel-deps image are not affected. | |
| # `until=12h` avoids racing with a sibling matrix cell (cuda 12.9 vs | |
| # 13.0) that may have just orphaned an image seconds ago. | |
| docker image prune -f --filter "until=12h" | |
| # Drop orphaned buildx builder volumes from past `docker buildx create` | |
| # invocations. The active `sgl-kernel-builder` volume is held open and | |
| # would fail to remove anyway, but skip it explicitly for clarity. | |
| for v in $(docker volume ls -q | grep '^buildx_buildkit_' | grep -v '^buildx_buildkit_sgl-kernel-builder' || true); do | |
| echo "Removing orphan buildx volume: $v" | |
| docker volume rm "$v" || true | |
| done | |
| df -h / | |
| - name: Build wheel for Python ${{ matrix.python-version }} and CUDA ${{ matrix.cuda-version }} | |
| run: | | |
| cd sgl-kernel | |
| ./build.sh "${{ matrix.python-version }}" "${{ matrix.cuda-version }}" | |
| env: | |
| USE_CCACHE: 1 | |
| - name: Verify wheel artifacts | |
| run: | | |
| ls -alh sgl-kernel/dist | |
| ls -alh sgl-kernel/dist/*.whl | |
| - name: Upload artifacts | |
| uses: actions/upload-artifact@v4 | |
| with: | |
| name: wheel-python${{ matrix.python-version }}-cuda${{ matrix.cuda-version }} | |
| path: sgl-kernel/dist/* | |
| if-no-files-found: error | |
| sgl-kernel-build-wheels-arm: | |
| needs: [check-changes, call-gate] | |
| # Skip for scheduled runs (they run stages independently). Runs in target_stage mode only when | |
| # include_wheel_build is true (i.e. /rerun-stage on a PR with sgl-kernel changes). | |
| # | |
| # See sgl-kernel-build-wheels above for the always() + result-check rationale. | |
| if: | | |
| always() && | |
| github.event_name != 'schedule' && | |
| inputs.test_parallel_dispatch != true && | |
| needs.check-changes.result == 'success' && | |
| needs.check-changes.outputs.sgl_kernel == 'true' && | |
| ( | |
| (!inputs.target_stage && needs.call-gate.result == 'success') || | |
| (inputs.target_stage && inputs.include_wheel_build) | |
| ) | |
| runs-on: arm-kernel-build-node | |
| timeout-minutes: 240 | |
| strategy: | |
| matrix: | |
| include: | |
| - python-version: "3.10" | |
| cuda-version: "13.0" | |
| - python-version: "3.10" | |
| cuda-version: "12.9" | |
| name: Build Wheel Arm | |
| steps: | |
| - name: Cleanup | |
| run: | | |
| if [ -d "$GITHUB_WORKSPACE" ]; then | |
| sudo rm -rf "$GITHUB_WORKSPACE"/* || true | |
| else | |
| echo "$GITHUB_WORKSPACE does not exist, nothing to clean" | |
| fi | |
| - uses: actions/checkout@v4 | |
| with: | |
| submodules: "recursive" | |
| ref: ${{ inputs.pr_head_sha || inputs.git_ref || github.sha }} | |
| - uses: ./.github/actions/check-maintenance | |
| - name: Set up Python ${{ matrix.python-version }} | |
| uses: actions/setup-python@v5 | |
| with: | |
| python-version: ${{ matrix.python-version }} | |
| - name: Free Docker disk space | |
| run: | | |
| set -x | |
| # See sgl-kernel-build-wheels above for the rationale. | |
| docker image prune -f --filter "until=12h" | |
| for v in $(docker volume ls -q | grep '^buildx_buildkit_' | grep -v '^buildx_buildkit_sgl-kernel-builder' || true); do | |
| echo "Removing orphan buildx volume: $v" | |
| docker volume rm "$v" || true | |
| done | |
| df -h / | |
| - name: Build wheel for Python ${{ matrix.python-version }} and CUDA ${{ matrix.cuda-version }} | |
| run: | | |
| cd sgl-kernel | |
| ./build.sh "${{ matrix.python-version }}" "${{ matrix.cuda-version }}" | |
| env: | |
| USE_CCACHE: 1 | |
| - name: Verify wheel artifacts | |
| run: | | |
| ls -alh sgl-kernel/dist | |
| ls -alh sgl-kernel/dist/*.whl | |
| - name: Upload artifacts | |
| uses: actions/upload-artifact@v4 | |
| with: | |
| name: wheel-python${{ matrix.python-version }}-cuda${{ matrix.cuda-version }}-aarch64 | |
| path: sgl-kernel/dist/* | |
| if-no-files-found: error | |
| call-sgl-kernel-tests: | |
| needs: [check-changes, call-gate, sgl-kernel-build-wheels] | |
| if: | | |
| github.event_name != 'schedule' && | |
| inputs.test_parallel_dispatch != true && | |
| !inputs.target_stage && | |
| needs.check-changes.outputs.sgl_kernel == 'true' | |
| uses: ./.github/workflows/pr-test-sgl-kernel.yml | |
| with: | |
| sgl_kernel: ${{ needs.check-changes.outputs.sgl_kernel }} | |
| b200_runner: ${{ needs.check-changes.outputs.b200_runner }} | |
| pr_head_sha: ${{ inputs.pr_head_sha || '' }} | |
| git_ref: ${{ inputs.git_ref || '' }} | |
| skip_stage_health_check: ${{ inputs.skip_stage_health_check == true }} | |
| secrets: inherit | |
| # =============================================== jit-kernel ==================================================== | |
| call-jit-kernel-tests: | |
| needs: [check-changes, call-gate] | |
| if: needs.check-changes.outputs.jit_kernel == 'true' | |
| uses: ./.github/workflows/pr-test-jit-kernel.yml | |
| with: | |
| jit_kernel: ${{ needs.check-changes.outputs.jit_kernel }} | |
| b200_runner: ${{ needs.check-changes.outputs.b200_runner }} | |
| pr_head_sha: ${{ inputs.pr_head_sha || '' }} | |
| git_ref: ${{ inputs.git_ref || '' }} | |
| target_stage: ${{ inputs.target_stage || '' }} | |
| test_parallel_dispatch: ${{ inputs.test_parallel_dispatch == true && 'true' || 'false' }} | |
| skip_stage_health_check: ${{ inputs.skip_stage_health_check == true }} | |
| secrets: inherit | |
| # =============================================== primary ==================================================== | |
| # Runs on 5090 (32GB, SM120) | |
| stage-a-test-1-gpu-small: | |
| needs: [check-changes, call-gate, sgl-kernel-build-wheels] | |
| if: | | |
| always() && | |
| ( | |
| (inputs.target_stage == 'stage-a-test-1-gpu-small') || | |
| ( | |
| !inputs.target_stage && | |
| ((github.event_name == 'schedule' || inputs.test_parallel_dispatch == true) || (!failure() && !cancelled())) && | |
| ((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true')) | |
| ) | |
| ) | |
| runs-on: 1-gpu-5090 | |
| timeout-minutes: 240 | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| with: | |
| ref: ${{ inputs.pr_head_sha || inputs.git_ref || github.sha }} | |
| - uses: ./.github/actions/check-stage-health | |
| - uses: ./.github/actions/check-maintenance | |
| - name: Download artifacts | |
| if: needs.check-changes.outputs.sgl_kernel == 'true' | |
| uses: actions/download-artifact@v4 | |
| with: | |
| path: sgl-kernel/dist/ | |
| merge-multiple: true | |
| pattern: wheel-python3.10-cuda* | |
| - name: Install dependencies | |
| timeout-minutes: 20 | |
| run: | | |
| CUSTOM_BUILD_SGL_KERNEL=${{needs.check-changes.outputs.sgl_kernel}} bash scripts/ci/cuda/ci_install_dependency.sh | |
| - name: Run test | |
| timeout-minutes: 10 | |
| env: | |
| CONTINUE_ON_ERROR_FLAG: ${{ needs.check-changes.outputs.continue_on_error == 'true' && '--continue-on-error' || '' }} | |
| run: | | |
| cd test/ | |
| python3 run_suite.py --hw cuda --suite stage-a-test-1-gpu-small $CONTINUE_ON_ERROR_FLAG | |
| - uses: ./.github/actions/upload-cuda-coredumps | |
| if: failure() | |
| - name: Cleanup venv | |
| if: always() | |
| run: bash scripts/ci/cuda/ci_cleanup_venv.sh | |
| stage-a-test-cpu: | |
| needs: [check-changes, call-gate] | |
| if: | | |
| always() && | |
| ( | |
| (inputs.target_stage == 'stage-a-test-cpu') || | |
| ( | |
| !inputs.target_stage && | |
| ((github.event_name == 'schedule' || inputs.test_parallel_dispatch == true) || (!failure() && !cancelled())) && | |
| (needs.check-changes.outputs.main_package == 'true') | |
| ) | |
| ) | |
| runs-on: ubuntu-latest | |
| timeout-minutes: 240 | |
| strategy: | |
| fail-fast: false | |
| matrix: | |
| partition: [0, 1, 2, 3] | |
| steps: | |
| - name: Free disk space | |
| run: | | |
| sudo rm -rf /usr/share/dotnet /usr/local/lib/android /opt/ghc | |
| df -h | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| with: | |
| ref: ${{ inputs.pr_head_sha || inputs.git_ref || github.sha }} | |
| - uses: ./.github/actions/check-stage-health | |
| - uses: ./.github/actions/check-maintenance | |
| - name: Set up Python | |
| uses: actions/setup-python@v5 | |
| with: | |
| python-version: '3.10' | |
| - name: Install uv | |
| uses: astral-sh/setup-uv@v5 | |
| # Needed by setuptools-rust to build the bundled native gRPC extension | |
| # (rust/sglang-grpc) when installing the main `sglang` wheel from source. | |
| - name: Install protoc | |
| run: sudo bash scripts/ci/utils/install_protoc.sh | |
| - name: Install Rust toolchain | |
| run: bash scripts/ci/utils/install_rustup.sh | |
| # uv pip targets a venv by default; setup-python has no venv — install into that interpreter (see UV_SYSTEM_PYTHON in https://docs.astral.sh/uv/guides/integration/github/) | |
| - name: Install dependencies | |
| timeout-minutes: 20 | |
| env: | |
| UV_SYSTEM_PYTHON: "1" | |
| run: | | |
| uv pip install -e "python[dev]" --index-strategy unsafe-best-match --prerelease allow | |
| - name: Run test | |
| timeout-minutes: 10 | |
| env: | |
| CONTINUE_ON_ERROR_FLAG: ${{ needs.check-changes.outputs.continue_on_error == 'true' && '--continue-on-error' || '' }} | |
| run: | | |
| cd test/ | |
| python3 run_suite.py --hw cpu --suite stage-a-test-cpu --auto-partition-id ${{ matrix.partition }} --auto-partition-size 4 $CONTINUE_ON_ERROR_FLAG | |
| # Runs on 5090 (32GB, SM120) | |
| stage-b-test-1-gpu-small: | |
| needs: [check-changes, call-gate, wait-for-stage-a, sgl-kernel-build-wheels] | |
| if: | | |
| always() && | |
| ( | |
| (inputs.target_stage == 'stage-b-test-1-gpu-small') || | |
| ( | |
| !inputs.target_stage && | |
| ((github.event_name == 'schedule' || inputs.test_parallel_dispatch == true) || (!failure() && !cancelled())) && | |
| ((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true')) | |
| ) | |
| ) | |
| runs-on: 1-gpu-5090 | |
| timeout-minutes: 240 | |
| strategy: | |
| fail-fast: false | |
| max-parallel: ${{ fromJson(needs.check-changes.outputs.max_parallel_small) }} | |
| matrix: | |
| partition: [0, 1, 2, 3, 4, 5, 6, 7] | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| with: | |
| ref: ${{ inputs.pr_head_sha || inputs.git_ref || github.sha }} | |
| - uses: ./.github/actions/check-stage-health | |
| - uses: ./.github/actions/check-maintenance | |
| - name: Download artifacts | |
| if: needs.check-changes.outputs.sgl_kernel == 'true' | |
| uses: actions/download-artifact@v4 | |
| with: | |
| path: sgl-kernel/dist/ | |
| merge-multiple: true | |
| pattern: wheel-python3.10-cuda* | |
| - name: Install dependencies | |
| timeout-minutes: 20 | |
| run: | | |
| CUSTOM_BUILD_SGL_KERNEL=${{needs.check-changes.outputs.sgl_kernel}} bash scripts/ci/cuda/ci_install_dependency.sh | |
| - name: Run test | |
| timeout-minutes: 30 | |
| env: | |
| CONTINUE_ON_ERROR_FLAG: ${{ needs.check-changes.outputs.continue_on_error == 'true' && '--continue-on-error' || '' }} | |
| run: | | |
| cd test/ | |
| python3 run_suite.py --hw cuda --suite stage-b-test-1-gpu-small --auto-partition-id ${{ matrix.partition }} --auto-partition-size 8 $CONTINUE_ON_ERROR_FLAG | |
| - uses: ./.github/actions/upload-cuda-coredumps | |
| if: failure() | |
| with: | |
| artifact-suffix: ${{ matrix.partition }} | |
| - name: Cleanup venv | |
| if: always() | |
| run: bash scripts/ci/cuda/ci_cleanup_venv.sh | |
| # Runs on H100 (80GB, SM90) - tests that don't pass on 5090 (FA3, FP8, high VRAM, etc.) | |
| stage-b-test-1-gpu-large: | |
| needs: [check-changes, call-gate, wait-for-stage-a, sgl-kernel-build-wheels] | |
| if: | | |
| always() && | |
| ( | |
| (inputs.target_stage == 'stage-b-test-1-gpu-large') || | |
| ( | |
| !inputs.target_stage && | |
| ((github.event_name == 'schedule' || inputs.test_parallel_dispatch == true) || (!failure() && !cancelled())) && | |
| ((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true')) | |
| ) | |
| ) | |
| runs-on: 1-gpu-h100 | |
| timeout-minutes: 240 | |
| strategy: | |
| fail-fast: false | |
| max-parallel: ${{ fromJson(needs.check-changes.outputs.max_parallel) }} | |
| matrix: | |
| partition: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13] | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| with: | |
| ref: ${{ inputs.pr_head_sha || inputs.git_ref || github.sha }} | |
| - uses: ./.github/actions/check-stage-health | |
| - uses: ./.github/actions/check-maintenance | |
| - name: Download artifacts | |
| if: needs.check-changes.outputs.sgl_kernel == 'true' | |
| uses: actions/download-artifact@v4 | |
| with: | |
| path: sgl-kernel/dist/ | |
| merge-multiple: true | |
| pattern: wheel-python3.10-cuda* | |
| - name: Install dependencies | |
| timeout-minutes: 20 | |
| run: | | |
| CUSTOM_BUILD_SGL_KERNEL=${{needs.check-changes.outputs.sgl_kernel}} bash scripts/ci/cuda/ci_install_dependency.sh | |
| - name: Run test | |
| timeout-minutes: 30 | |
| env: | |
| CONTINUE_ON_ERROR_FLAG: ${{ needs.check-changes.outputs.continue_on_error == 'true' && '--continue-on-error' || '' }} | |
| run: | | |
| cd test/ | |
| python3 run_suite.py --hw cuda --suite stage-b-test-1-gpu-large --auto-partition-id ${{ matrix.partition }} --auto-partition-size 14 --timeout-per-file 1800 $CONTINUE_ON_ERROR_FLAG | |
| - uses: ./.github/actions/upload-cuda-coredumps | |
| if: failure() | |
| with: | |
| artifact-suffix: ${{ matrix.partition }} | |
| - name: Cleanup venv | |
| if: always() | |
| run: bash scripts/ci/cuda/ci_cleanup_venv.sh | |
| stage-b-test-2-gpu-large: | |
| needs: [check-changes, call-gate, wait-for-stage-a, sgl-kernel-build-wheels] | |
| if: | | |
| always() && | |
| ( | |
| (inputs.target_stage == 'stage-b-test-2-gpu-large') || | |
| ( | |
| !inputs.target_stage && | |
| ((github.event_name == 'schedule' || inputs.test_parallel_dispatch == true) || (!failure() && !cancelled())) && | |
| ((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true')) | |
| ) | |
| ) | |
| runs-on: 2-gpu-h100 | |
| timeout-minutes: 240 | |
| strategy: | |
| fail-fast: false | |
| max-parallel: ${{ fromJson(needs.check-changes.outputs.max_parallel_2gpu) }} | |
| matrix: | |
| partition: [0, 1, 2, 3] | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| with: | |
| ref: ${{ inputs.pr_head_sha || inputs.git_ref || github.sha }} | |
| - uses: ./.github/actions/check-stage-health | |
| - uses: ./.github/actions/check-maintenance | |
| - name: Download artifacts | |
| if: needs.check-changes.outputs.sgl_kernel == 'true' | |
| uses: actions/download-artifact@v4 | |
| with: | |
| path: sgl-kernel/dist/ | |
| merge-multiple: true | |
| pattern: wheel-python3.10-cuda* | |
| - name: Install dependencies | |
| timeout-minutes: 20 | |
| run: | | |
| CUSTOM_BUILD_SGL_KERNEL=${{needs.check-changes.outputs.sgl_kernel}} bash scripts/ci/cuda/ci_install_dependency.sh | |
| - name: Run test | |
| timeout-minutes: 30 | |
| env: | |
| CONTINUE_ON_ERROR_FLAG: ${{ needs.check-changes.outputs.continue_on_error == 'true' && '--continue-on-error' || '' }} | |
| run: | | |
| cd test/ | |
| python3 run_suite.py --hw cuda --suite stage-b-test-2-gpu-large --auto-partition-id ${{ matrix.partition }} --auto-partition-size 4 $CONTINUE_ON_ERROR_FLAG | |
| - uses: ./.github/actions/upload-cuda-coredumps | |
| if: failure() | |
| with: | |
| artifact-suffix: ${{ matrix.partition }} | |
| - name: Cleanup venv | |
| if: always() | |
| run: bash scripts/ci/cuda/ci_cleanup_venv.sh | |
| stage-b-test-4-gpu-b200: | |
| needs: [check-changes, call-gate, wait-for-stage-a, sgl-kernel-build-wheels] | |
| if: | | |
| always() && | |
| ( | |
| (inputs.target_stage == 'stage-b-test-4-gpu-b200') || | |
| ( | |
| !inputs.target_stage && | |
| ((github.event_name == 'schedule' || inputs.test_parallel_dispatch == true) || (!failure() && !cancelled())) && | |
| ((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true')) | |
| ) | |
| ) | |
| # The `*-low-disk` label (resolved by `set-runner` to `4-gpu-b200-low-disk` or | |
| # `4-gpu-b200-kernel-low-disk`) is advertised by both the existing large-disk B200 | |
| # runners and the new low-disk runner, so this job can land on either pool. | |
| runs-on: ${{ needs.check-changes.outputs.b200_low_disk_runner }} | |
| timeout-minutes: 240 | |
| strategy: | |
| fail-fast: false | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| with: | |
| ref: ${{ inputs.pr_head_sha || inputs.git_ref || github.sha }} | |
| - uses: ./.github/actions/check-stage-health | |
| - uses: ./.github/actions/check-maintenance | |
| - name: Download artifacts | |
| if: needs.check-changes.outputs.sgl_kernel == 'true' | |
| uses: actions/download-artifact@v6 | |
| with: | |
| path: sgl-kernel/dist/ | |
| merge-multiple: true | |
| pattern: wheel-python3.10-cuda* | |
| - name: Install dependencies | |
| timeout-minutes: 20 | |
| run: | | |
| CUSTOM_BUILD_SGL_KERNEL=${{needs.check-changes.outputs.sgl_kernel}} bash scripts/ci/cuda/ci_install_dependency.sh | |
| - name: Run test | |
| timeout-minutes: 30 | |
| env: | |
| CONTINUE_ON_ERROR_FLAG: ${{ needs.check-changes.outputs.continue_on_error == 'true' && '--continue-on-error' || '' }} | |
| run: | | |
| cd test | |
| python3 run_suite.py --hw cuda --suite stage-b-test-4-gpu-b200 $CONTINUE_ON_ERROR_FLAG | |
| - name: Run FA4 jit_kernel tests (SM100+) | |
| timeout-minutes: 10 | |
| run: | | |
| python3 -m pytest -q python/sglang/jit_kernel/tests/test_flash_attention_4.py | |
| - uses: ./.github/actions/upload-cuda-coredumps | |
| if: failure() | |
| - name: Cleanup venv | |
| if: always() | |
| run: bash scripts/ci/cuda/ci_cleanup_venv.sh | |
| call-multimodal-gen-tests: | |
| needs: [check-changes, call-gate, sgl-kernel-build-wheels] | |
| if: | | |
| always() && | |
| !cancelled() && | |
| ( | |
| inputs.target_stage == 'multimodal-gen-test-1-gpu' || | |
| inputs.target_stage == 'multimodal-gen-test-2-gpu' || | |
| inputs.target_stage == 'multimodal-gen-component-accuracy' || | |
| inputs.target_stage == 'multimodal-gen-component-accuracy-1-gpu' || | |
| inputs.target_stage == 'multimodal-gen-component-accuracy-2-gpu' || | |
| inputs.target_stage == 'multimodal-gen-test-1-b200' || | |
| inputs.target_stage == 'multimodal-gen-unit-test' || | |
| ( | |
| !inputs.target_stage && | |
| ((github.event_name == 'schedule' || inputs.test_parallel_dispatch == true) || (!failure() && !cancelled())) && | |
| needs.check-changes.outputs.multimodal_gen == 'true' | |
| ) | |
| ) | |
| uses: ./.github/workflows/pr-test-multimodal-gen.yml | |
| with: | |
| multimodal_gen: ${{ needs.check-changes.outputs.multimodal_gen }} | |
| sgl_kernel: ${{ needs.check-changes.outputs.sgl_kernel }} | |
| b200_runner: ${{ needs.check-changes.outputs.b200_runner }} | |
| continue_on_error: ${{ needs.check-changes.outputs.continue_on_error }} | |
| pr_head_sha: ${{ inputs.pr_head_sha || '' }} | |
| git_ref: ${{ inputs.git_ref || '' }} | |
| target_stage: ${{ inputs.target_stage || '' }} | |
| test_parallel_dispatch: ${{ inputs.test_parallel_dispatch == true && 'true' || 'false' }} | |
| caller_needs_failure: ${{ (needs.call-gate.result == 'failure' || needs.sgl-kernel-build-wheels.result == 'failure' || needs.check-changes.result == 'failure') && 'true' || 'false' }} | |
| skip_stage_health_check: ${{ inputs.skip_stage_health_check == true && 'true' || 'false' }} | |
| secrets: inherit | |
| stage-c-test-4-gpu-h100: | |
| needs: [check-changes, call-gate, wait-for-stage-b, sgl-kernel-build-wheels] | |
| if: | | |
| always() && | |
| ( | |
| (inputs.target_stage == 'stage-c-test-4-gpu-h100') || | |
| ( | |
| !inputs.target_stage && | |
| ((github.event_name == 'schedule' || inputs.test_parallel_dispatch == true) || (!failure() && !cancelled())) && | |
| ((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true')) | |
| ) | |
| ) | |
| runs-on: 4-gpu-h100 | |
| timeout-minutes: 240 | |
| strategy: | |
| fail-fast: false | |
| matrix: | |
| part: [0, 1, 2] | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| with: | |
| ref: ${{ inputs.pr_head_sha || inputs.git_ref || github.sha }} | |
| - uses: ./.github/actions/check-stage-health | |
| - uses: ./.github/actions/check-maintenance | |
| - name: Download artifacts | |
| if: needs.check-changes.outputs.sgl_kernel == 'true' | |
| uses: actions/download-artifact@v4 | |
| with: | |
| path: sgl-kernel/dist/ | |
| merge-multiple: true | |
| pattern: wheel-python3.10-cuda* | |
| - name: Install dependencies | |
| timeout-minutes: 20 | |
| run: | | |
| CUSTOM_BUILD_SGL_KERNEL=${{needs.check-changes.outputs.sgl_kernel}} bash scripts/ci/cuda/ci_install_dependency.sh | |
| - name: Run test | |
| timeout-minutes: 30 | |
| env: | |
| CONTINUE_ON_ERROR_FLAG: ${{ needs.check-changes.outputs.continue_on_error == 'true' && '--continue-on-error' || '' }} | |
| run: | | |
| cd test | |
| python3 run_suite.py --hw cuda --suite stage-c-test-4-gpu-h100 --auto-partition-id ${{ matrix.part }} --auto-partition-size 3 $CONTINUE_ON_ERROR_FLAG | |
| - uses: ./.github/actions/upload-cuda-coredumps | |
| if: failure() | |
| with: | |
| artifact-suffix: ${{ matrix.part }} | |
| - name: Cleanup venv | |
| if: always() | |
| run: bash scripts/ci/cuda/ci_cleanup_venv.sh | |
| stage-c-test-8-gpu-h200: | |
| needs: [check-changes, call-gate, wait-for-stage-b, sgl-kernel-build-wheels] | |
| if: | | |
| always() && | |
| ( | |
| (inputs.target_stage == 'stage-c-test-8-gpu-h200') || | |
| ( | |
| !inputs.target_stage && | |
| ((github.event_name == 'schedule' || inputs.test_parallel_dispatch == true) || (!failure() && !cancelled())) && | |
| ((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true')) | |
| ) | |
| ) | |
| runs-on: 8-gpu-h200 | |
| timeout-minutes: 240 | |
| strategy: | |
| fail-fast: false | |
| matrix: | |
| part: [0, 1, 2, 3] | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| with: | |
| ref: ${{ inputs.pr_head_sha || inputs.git_ref || github.sha }} | |
| - uses: ./.github/actions/check-stage-health | |
| - uses: ./.github/actions/check-maintenance | |
| - name: Download artifacts | |
| if: needs.check-changes.outputs.sgl_kernel == 'true' | |
| uses: actions/download-artifact@v4 | |
| with: | |
| path: sgl-kernel/dist/ | |
| merge-multiple: true | |
| pattern: wheel-python3.10-cuda* | |
| - name: Install dependencies | |
| timeout-minutes: 20 | |
| run: | | |
| CUSTOM_BUILD_SGL_KERNEL=${{needs.check-changes.outputs.sgl_kernel}} bash scripts/ci/cuda/ci_install_dependency.sh | |
| - name: Warmup DeepGEMM JIT Compilation | |
| timeout-minutes: 25 | |
| run: | | |
| # Activate venv if available (GITHUB_ENV may have failed to propagate) | |
| [ -f "${SGLANG_CI_VENV_PATH:-/dev/null}/bin/activate" ] && source "${SGLANG_CI_VENV_PATH}/bin/activate" | |
| [ -f "${SGLANG_CI_VENV_PATH:-/dev/null}/env.sh" ] && source "${SGLANG_CI_VENV_PATH}/env.sh" | |
| python3 scripts/ci/cuda/warmup_deep_gemm.py \ | |
| deepseek-ai/DeepSeek-V3-0324:8 \ | |
| deepseek-ai/DeepSeek-V3.2-Exp:8 | |
| - name: Warmup Server CUDA Graphs | |
| timeout-minutes: 25 | |
| run: | | |
| [ -f "${SGLANG_CI_VENV_PATH:-/dev/null}/bin/activate" ] && source "${SGLANG_CI_VENV_PATH}/bin/activate" | |
| [ -f "${SGLANG_CI_VENV_PATH:-/dev/null}/env.sh" ] && source "${SGLANG_CI_VENV_PATH}/env.sh" | |
| python3 scripts/ci/cuda/warmup_server.py \ | |
| deepseek-ai/DeepSeek-V3-0324:8 \ | |
| inclusionAI/Ring-2.5-1T:8 | |
| - name: Run test | |
| timeout-minutes: 30 | |
| env: | |
| CONTINUE_ON_ERROR_FLAG: ${{ needs.check-changes.outputs.continue_on_error == 'true' && '--continue-on-error' || '' }} | |
| run: | | |
| cd test | |
| python3 run_suite.py --hw cuda --suite stage-c-test-8-gpu-h200 --auto-partition-id ${{ matrix.part }} --auto-partition-size 4 $CONTINUE_ON_ERROR_FLAG | |
| - uses: ./.github/actions/upload-cuda-coredumps | |
| if: failure() | |
| with: | |
| artifact-suffix: ${{ matrix.part }} | |
| - name: Cleanup venv | |
| if: always() | |
| run: bash scripts/ci/cuda/ci_cleanup_venv.sh | |
| stage-c-test-8-gpu-h20: | |
| needs: [check-changes, call-gate, wait-for-stage-b, sgl-kernel-build-wheels] | |
| if: | | |
| always() && | |
| ( | |
| (inputs.target_stage == 'stage-c-test-8-gpu-h20') || | |
| ( | |
| !inputs.target_stage && | |
| ((github.event_name == 'schedule' || inputs.test_parallel_dispatch == true) || (!failure() && !cancelled())) && | |
| ((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true')) | |
| ) | |
| ) | |
| runs-on: 8-gpu-h20 | |
| timeout-minutes: 240 | |
| env: | |
| SGLANG_CI_RDMA_ALL_DEVICES: "mlx5_1,mlx5_2,mlx5_3,mlx5_4" | |
| CU_VERSION: cu129 | |
| strategy: | |
| fail-fast: false | |
| matrix: | |
| part: [0, 1] | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| with: | |
| ref: ${{ inputs.pr_head_sha || inputs.git_ref || github.sha }} | |
| - uses: ./.github/actions/check-stage-health | |
| - uses: ./.github/actions/check-maintenance | |
| - name: Download artifacts | |
| if: needs.check-changes.outputs.sgl_kernel == 'true' | |
| uses: actions/download-artifact@v4 | |
| with: | |
| path: sgl-kernel/dist/ | |
| merge-multiple: true | |
| pattern: wheel-python3.10-cuda* | |
| - name: Install dependencies | |
| timeout-minutes: 20 | |
| run: | | |
| CUSTOM_BUILD_SGL_KERNEL=${{needs.check-changes.outputs.sgl_kernel}} bash scripts/ci/cuda/ci_install_deepep.sh | |
| - name: Run test | |
| timeout-minutes: 30 | |
| env: | |
| CONTINUE_ON_ERROR_FLAG: ${{ needs.check-changes.outputs.continue_on_error == 'true' && '--continue-on-error' || '' }} | |
| run: | | |
| cd test | |
| python3 run_suite.py --hw cuda --suite stage-c-test-8-gpu-h20 --auto-partition-id ${{ matrix.part }} --auto-partition-size 2 $CONTINUE_ON_ERROR_FLAG | |
| - uses: ./.github/actions/upload-cuda-coredumps | |
| if: failure() | |
| with: | |
| artifact-suffix: ${{ matrix.part }} | |
| - name: Cleanup venv | |
| if: always() | |
| run: bash scripts/ci/cuda/ci_cleanup_venv.sh | |
| stage-c-test-deepep-4-gpu-h100: | |
| needs: [check-changes, call-gate, wait-for-stage-b, sgl-kernel-build-wheels] | |
| if: | | |
| always() && | |
| ( | |
| (inputs.target_stage == 'stage-c-test-deepep-4-gpu-h100') || | |
| ( | |
| !inputs.target_stage && | |
| ((github.event_name == 'schedule' || inputs.test_parallel_dispatch == true) || (!failure() && !cancelled())) && | |
| ((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true')) | |
| ) | |
| ) | |
| runs-on: 4-gpu-h100 | |
| timeout-minutes: 240 | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| with: | |
| ref: ${{ inputs.pr_head_sha || inputs.git_ref || github.sha }} | |
| - uses: ./.github/actions/check-stage-health | |
| - uses: ./.github/actions/check-maintenance | |
| - name: Download artifacts | |
| if: needs.check-changes.outputs.sgl_kernel == 'true' | |
| uses: actions/download-artifact@v4 | |
| with: | |
| path: sgl-kernel/dist/ | |
| merge-multiple: true | |
| pattern: wheel-python3.10-cuda* | |
| - name: Install dependencies | |
| timeout-minutes: 20 | |
| run: | | |
| CUSTOM_BUILD_SGL_KERNEL=${{needs.check-changes.outputs.sgl_kernel}} bash scripts/ci/cuda/ci_install_deepep.sh | |
| - name: Warmup DeepGEMM JIT Compilation | |
| timeout-minutes: 25 | |
| run: | | |
| # Activate venv if available (GITHUB_ENV may have failed to propagate) | |
| [ -f "${SGLANG_CI_VENV_PATH:-/dev/null}/bin/activate" ] && source "${SGLANG_CI_VENV_PATH}/bin/activate" | |
| [ -f "${SGLANG_CI_VENV_PATH:-/dev/null}/env.sh" ] && source "${SGLANG_CI_VENV_PATH}/env.sh" | |
| python3 scripts/ci/cuda/warmup_deep_gemm.py \ | |
| lmsys/sglang-ci-dsv3-test:4 | |
| - name: Warmup Server CUDA Graphs | |
| timeout-minutes: 25 | |
| run: | | |
| [ -f "${SGLANG_CI_VENV_PATH:-/dev/null}/bin/activate" ] && source "${SGLANG_CI_VENV_PATH}/bin/activate" | |
| [ -f "${SGLANG_CI_VENV_PATH:-/dev/null}/env.sh" ] && source "${SGLANG_CI_VENV_PATH}/env.sh" | |
| python3 scripts/ci/cuda/warmup_server.py \ | |
| lmsys/sglang-ci-dsv3-test:4 | |
| - name: Run test | |
| timeout-minutes: 30 | |
| env: | |
| CONTINUE_ON_ERROR_FLAG: ${{ needs.check-changes.outputs.continue_on_error == 'true' && '--continue-on-error' || '' }} | |
| run: | | |
| cd test | |
| python3 run_suite.py --hw cuda --suite stage-c-test-deepep-4-gpu-h100 $CONTINUE_ON_ERROR_FLAG | |
| - uses: ./.github/actions/upload-cuda-coredumps | |
| if: failure() | |
| - name: Cleanup venv | |
| if: always() | |
| run: bash scripts/ci/cuda/ci_cleanup_venv.sh | |
| stage-c-test-deepep-8-gpu-h200: | |
| needs: [check-changes, call-gate, wait-for-stage-b, sgl-kernel-build-wheels] | |
| if: | | |
| always() && | |
| ( | |
| (inputs.target_stage == 'stage-c-test-deepep-8-gpu-h200') || | |
| ( | |
| !inputs.target_stage && | |
| ((github.event_name == 'schedule' || inputs.test_parallel_dispatch == true) || (!failure() && !cancelled())) && | |
| ((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true')) | |
| ) | |
| ) | |
| runs-on: 8-gpu-h200-deepep | |
| timeout-minutes: 240 | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| with: | |
| ref: ${{ inputs.pr_head_sha || inputs.git_ref || github.sha }} | |
| - uses: ./.github/actions/check-stage-health | |
| - uses: ./.github/actions/check-maintenance | |
| - name: Download artifacts | |
| if: needs.check-changes.outputs.sgl_kernel == 'true' | |
| uses: actions/download-artifact@v4 | |
| with: | |
| path: sgl-kernel/dist/ | |
| merge-multiple: true | |
| pattern: wheel-python3.10-cuda* | |
| - name: Install dependencies | |
| timeout-minutes: 20 | |
| run: | | |
| CUSTOM_BUILD_SGL_KERNEL=${{needs.check-changes.outputs.sgl_kernel}} bash scripts/ci/cuda/ci_install_deepep.sh | |
| - name: Warmup DeepGEMM JIT Compilation | |
| timeout-minutes: 25 | |
| run: | | |
| # Activate venv if available (GITHUB_ENV may have failed to propagate) | |
| [ -f "${SGLANG_CI_VENV_PATH:-/dev/null}/bin/activate" ] && source "${SGLANG_CI_VENV_PATH}/bin/activate" | |
| [ -f "${SGLANG_CI_VENV_PATH:-/dev/null}/env.sh" ] && source "${SGLANG_CI_VENV_PATH}/env.sh" | |
| python3 scripts/ci/cuda/warmup_deep_gemm.py \ | |
| deepseek-ai/DeepSeek-V3-0324:8 \ | |
| deepseek-ai/DeepSeek-V3.2-Exp:8 | |
| - name: Warmup Server CUDA Graphs | |
| timeout-minutes: 25 | |
| run: | | |
| [ -f "${SGLANG_CI_VENV_PATH:-/dev/null}/bin/activate" ] && source "${SGLANG_CI_VENV_PATH}/bin/activate" | |
| [ -f "${SGLANG_CI_VENV_PATH:-/dev/null}/env.sh" ] && source "${SGLANG_CI_VENV_PATH}/env.sh" | |
| python3 scripts/ci/cuda/warmup_server.py \ | |
| deepseek-ai/DeepSeek-V3-0324:8 | |
| - name: Run test | |
| timeout-minutes: 45 | |
| env: | |
| CONTINUE_ON_ERROR_FLAG: ${{ needs.check-changes.outputs.continue_on_error == 'true' && '--continue-on-error' || '' }} | |
| run: | | |
| cd test | |
| python3 run_suite.py --hw cuda --suite stage-c-test-deepep-8-gpu-h200 $CONTINUE_ON_ERROR_FLAG | |
| - uses: ./.github/actions/upload-cuda-coredumps | |
| if: failure() | |
| - name: Cleanup venv | |
| if: always() | |
| run: bash scripts/ci/cuda/ci_cleanup_venv.sh | |
| stage-c-test-4-gpu-b200: | |
| needs: [check-changes, call-gate, wait-for-stage-b, sgl-kernel-build-wheels] | |
| if: | | |
| always() && | |
| ( | |
| (inputs.target_stage == 'stage-c-test-4-gpu-b200') || | |
| ( | |
| !inputs.target_stage && | |
| ((github.event_name == 'schedule' || inputs.test_parallel_dispatch == true) || (!failure() && !cancelled())) && | |
| ((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true')) | |
| ) | |
| ) | |
| runs-on: ${{ needs.check-changes.outputs.b200_runner }} | |
| timeout-minutes: 240 | |
| strategy: | |
| fail-fast: false | |
| matrix: | |
| part: [0, 1, 2] | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| with: | |
| ref: ${{ inputs.pr_head_sha || inputs.git_ref || github.sha }} | |
| - uses: ./.github/actions/check-stage-health | |
| - uses: ./.github/actions/check-maintenance | |
| - name: Download artifacts | |
| if: needs.check-changes.outputs.sgl_kernel == 'true' | |
| uses: actions/download-artifact@v6 | |
| with: | |
| path: sgl-kernel/dist/ | |
| merge-multiple: true | |
| pattern: wheel-python3.10-cuda* | |
| - name: Install dependencies | |
| timeout-minutes: 20 | |
| run: | | |
| CUSTOM_BUILD_SGL_KERNEL=${{needs.check-changes.outputs.sgl_kernel}} bash scripts/ci/cuda/ci_install_dependency.sh | |
| - name: Run test | |
| timeout-minutes: 30 | |
| env: | |
| CONTINUE_ON_ERROR_FLAG: ${{ needs.check-changes.outputs.continue_on_error == 'true' && '--continue-on-error' || '' }} | |
| run: | | |
| cd test | |
| python3 run_suite.py --hw cuda --suite stage-c-test-4-gpu-b200 --auto-partition-id ${{ matrix.part }} --auto-partition-size 3 --timeout-per-file 1800 $CONTINUE_ON_ERROR_FLAG | |
| - uses: ./.github/actions/upload-cuda-coredumps | |
| if: failure() | |
| with: | |
| artifact-suffix: ${{ matrix.part }} | |
| - name: Cleanup venv | |
| if: always() | |
| run: bash scripts/ci/cuda/ci_cleanup_venv.sh | |
| stage-c-test-4-gpu-b200-small: | |
| needs: [check-changes, call-gate, wait-for-stage-b, sgl-kernel-build-wheels] | |
| if: | | |
| always() && | |
| ( | |
| (inputs.target_stage == 'stage-c-test-4-gpu-b200-small') || | |
| ( | |
| !inputs.target_stage && | |
| ((github.event_name == 'schedule' || inputs.test_parallel_dispatch == true) || (!failure() && !cancelled())) && | |
| ((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true')) | |
| ) | |
| ) | |
| # The `*-low-disk` label (resolved by `set-runner` to `4-gpu-b200-low-disk` or | |
| # `4-gpu-b200-kernel-low-disk`) is advertised by both the existing large-disk B200 | |
| # runners and the new low-disk runner, so this job can land on either pool. | |
| runs-on: ${{ needs.check-changes.outputs.b200_low_disk_runner }} | |
| timeout-minutes: 240 | |
| strategy: | |
| fail-fast: false | |
| matrix: | |
| part: [0, 1, 2] | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| with: | |
| ref: ${{ inputs.pr_head_sha || inputs.git_ref || github.sha }} | |
| - uses: ./.github/actions/check-stage-health | |
| - uses: ./.github/actions/check-maintenance | |
| - name: Download artifacts | |
| if: needs.check-changes.outputs.sgl_kernel == 'true' | |
| uses: actions/download-artifact@v6 | |
| with: | |
| path: sgl-kernel/dist/ | |
| merge-multiple: true | |
| pattern: wheel-python3.10-cuda* | |
| - name: Install dependencies | |
| timeout-minutes: 20 | |
| run: | | |
| CUSTOM_BUILD_SGL_KERNEL=${{needs.check-changes.outputs.sgl_kernel}} bash scripts/ci/cuda/ci_install_dependency.sh | |
| - name: Run test | |
| timeout-minutes: 30 | |
| env: | |
| CONTINUE_ON_ERROR_FLAG: ${{ needs.check-changes.outputs.continue_on_error == 'true' && '--continue-on-error' || '' }} | |
| run: | | |
| cd test | |
| python3 run_suite.py --hw cuda --suite stage-c-test-4-gpu-b200-small --auto-partition-id ${{ matrix.part }} --auto-partition-size 3 --timeout-per-file 1800 $CONTINUE_ON_ERROR_FLAG | |
| - uses: ./.github/actions/upload-cuda-coredumps | |
| if: failure() | |
| with: | |
| artifact-suffix: ${{ matrix.part }} | |
| - name: Cleanup venv | |
| if: always() | |
| run: bash scripts/ci/cuda/ci_cleanup_venv.sh | |
| # NOTE: GB200 stage temporarily disabled — no company-owned GB200 runner available yet. | |
| # Re-enable when a 4-gpu-gb200 runner is provisioned. | |
| # stage-c-test-4-gpu-gb200: | |
| # needs: [check-changes, call-gate, wait-for-stage-b, sgl-kernel-build-wheels-arm] | |
| # if: | | |
| # always() && | |
| # ( | |
| # (inputs.target_stage == 'stage-c-test-4-gpu-gb200') || | |
| # ( | |
| # !inputs.target_stage && | |
| # ((github.event_name == 'schedule' || inputs.test_parallel_dispatch == true) || (!failure() && !cancelled())) && | |
| # ((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true')) | |
| # ) | |
| # ) | |
| # runs-on: 4-gpu-gb200 | |
| # timeout-minutes: 240 | |
| # strategy: | |
| # fail-fast: false | |
| # steps: | |
| # - uses: ./.github/actions/check-maintenance | |
| # with: | |
| # github-token: ${{ github.token }} | |
| # | |
| # - name: Checkout code | |
| # uses: actions/checkout@v4 | |
| # with: | |
| # ref: ${{ inputs.pr_head_sha || inputs.git_ref || github.sha }} | |
| # | |
| # - name: Download artifacts | |
| # if: needs.check-changes.outputs.sgl_kernel == 'true' | |
| # uses: actions/download-artifact@v4 | |
| # with: | |
| # path: sgl-kernel/dist/ | |
| # merge-multiple: true | |
| # pattern: wheel-python3.10-cuda13.0-aarch64 | |
| # | |
| # - name: Install dependencies | |
| # timeout-minutes: 20 | |
| # run: | | |
| # CUSTOM_BUILD_SGL_KERNEL=${{needs.check-changes.outputs.sgl_kernel}} GRACE_BLACKWELL=1 bash scripts/ci/cuda/ci_install_deepep.sh | |
| # | |
| # - name: Run test | |
| # timeout-minutes: 45 | |
| # env: | |
| # CONTINUE_ON_ERROR_FLAG: ${{ needs.check-changes.outputs.continue_on_error == 'true' && '--continue-on-error' || '' }} | |
| # run: | | |
| # cd test | |
| # python3 run_suite.py --hw cuda --suite stage-c-test-4-gpu-gb200 --timeout-per-file 3600 $CONTINUE_ON_ERROR_FLAG | |
| # | |
| # - uses: ./.github/actions/upload-cuda-coredumps | |
| # if: failure() | |
| pr-test-finish: | |
| needs: | |
| [ | |
| call-gate, | |
| check-changes, | |
| sgl-kernel-build-wheels, | |
| sgl-kernel-build-wheels-arm, | |
| call-sgl-kernel-tests, | |
| wait-for-stage-a, | |
| wait-for-stage-b, | |
| call-jit-kernel-tests, | |
| call-multimodal-gen-tests, | |
| stage-a-test-1-gpu-small, | |
| stage-a-test-cpu, | |
| stage-b-test-1-gpu-small, | |
| stage-b-test-1-gpu-large, | |
| stage-b-test-2-gpu-large, | |
| stage-b-test-4-gpu-b200, | |
| stage-c-test-4-gpu-h100, | |
| stage-c-test-8-gpu-h20, | |
| stage-c-test-8-gpu-h200, | |
| stage-c-test-deepep-4-gpu-h100, | |
| stage-c-test-deepep-8-gpu-h200, | |
| stage-c-test-4-gpu-b200, | |
| stage-c-test-4-gpu-b200-small, | |
| # stage-c-test-4-gpu-gb200, # Temporarily disabled — no GB200 runner | |
| ] | |
| if: always() | |
| runs-on: ubuntu-latest | |
| steps: | |
| - name: Check all dependent job statuses | |
| run: | | |
| # Convert the 'needs' context to a JSON string | |
| json_needs='${{ toJson(needs) }}' | |
| # Get a list of all job names from the JSON keys | |
| job_names=$(echo "$json_needs" | jq -r 'keys_unsorted[]') | |
| for job in $job_names; do | |
| # For each job, extract its result | |
| result=$(echo "$json_needs" | jq -r --arg j "$job" '.[$j].result') | |
| # Print the job name and its result | |
| echo "$job: $result" | |
| # Check for failure or cancellation and exit if found | |
| if [[ "$result" == "failure" || "$result" == "cancelled" ]]; then | |
| echo "The above jobs failed." | |
| exit 1 | |
| fi | |
| done | |
| # If the loop completes, all jobs were successful | |
| echo "All jobs completed successfully" | |
| exit 0 |