Skip to content

Commit 65a1e1b

Browse files
authored
ci: conditional backend github workflow (#3141)
Signed-off-by: Anant Sharma <[email protected]>
1 parent a69efbd commit 65a1e1b

File tree

5 files changed

+251
-84
lines changed

5 files changed

+251
-84
lines changed
Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,77 @@
1+
name: 'Docker Build'
2+
description: 'Build Dynamo container images'
3+
inputs:
4+
framework:
5+
description: 'Framework to build'
6+
required: true
7+
default: 'vllm'
8+
target:
9+
description: 'Target to build'
10+
required: false
11+
default: 'runtime'
12+
image_tag:
13+
description: 'Custom image tag (optional, defaults to framework:latest)'
14+
required: false
15+
ngc_ci_access_token:
16+
description: 'NGC CI Access Token'
17+
required: false
18+
ci_token:
19+
description: 'CI Token'
20+
required: false
21+
aws_default_region:
22+
description: 'AWS Default Region'
23+
required: false
24+
sccache_s3_bucket:
25+
description: 'SCCache S3 Bucket'
26+
required: false
27+
aws_access_key_id:
28+
description: 'AWS Access Key ID'
29+
required: false
30+
aws_secret_access_key:
31+
description: 'AWS Secret Access Key'
32+
required: false
33+
34+
outputs:
35+
image_tag:
36+
description: 'Image Tag'
37+
value: ${{ steps.build.outputs.image_tag }}
38+
39+
runs:
40+
using: "composite"
41+
steps:
42+
- name: Set up Docker Buildx
43+
uses: docker/setup-buildx-action@v3
44+
- name: Login to NGC
45+
if: github.event.pull_request.head.repo.full_name == github.repository || github.event_name == 'push'
46+
shell: bash
47+
run: |
48+
echo "${{ inputs.ngc_ci_access_token }}" | docker login nvcr.io -u '$oauthtoken' --password-stdin
49+
- name: Cleanup
50+
if: always()
51+
shell: bash
52+
run: |
53+
docker system prune -af
54+
- name: Build image
55+
id: build
56+
shell: bash
57+
env:
58+
GITHUB_TOKEN: ${{ inputs.ci_token }}
59+
AWS_DEFAULT_REGION: ${{ inputs.aws_default_region }}
60+
SCCACHE_S3_BUCKET: ${{ inputs.sccache_s3_bucket }}
61+
AWS_ACCESS_KEY_ID: ${{ inputs.aws_access_key_id }}
62+
AWS_SECRET_ACCESS_KEY: ${{ inputs.aws_secret_access_key }}
63+
run: |
64+
# Determine image tag
65+
if [ -n "${{ inputs.image_tag }}" ]; then
66+
IMAGE_TAG="${{ inputs.image_tag }}"
67+
else
68+
IMAGE_TAG="${{ inputs.framework }}:latest"
69+
fi
70+
echo "image_tag=$IMAGE_TAG" >> $GITHUB_OUTPUT
71+
72+
./container/build.sh --tag "$IMAGE_TAG" \
73+
--target ${{ inputs.target }} \
74+
--framework ${{ inputs.framework }} \
75+
--use-sccache \
76+
--sccache-bucket "$SCCACHE_S3_BUCKET" \
77+
--sccache-region "$AWS_DEFAULT_REGION"

.github/actions/pytest/action.yml

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
name: 'Pytest'
2+
description: 'Run pytest on pre-built container images'
3+
inputs:
4+
pytest_marks:
5+
description: 'Pytest marks'
6+
required: true
7+
default: 'e2e and vllm and gpu_1 and not slow'
8+
image_tag:
9+
description: 'Image Tag to run tests on'
10+
required: true
11+
12+
13+
runs:
14+
using: "composite"
15+
steps:
16+
- name: Run tests
17+
shell: bash
18+
env:
19+
CONTAINER_ID: test_${{ github.run_id }}_${{ github.run_attempt }}_${{ github.job }}
20+
PYTEST_XML_FILE: pytest_test_report.xml
21+
HF_HOME: /runner/_work/_temp
22+
run: |
23+
docker run --runtime=nvidia --rm --gpus all -w /workspace \
24+
--network host \
25+
--name ${{ env.CONTAINER_ID }}_pytest \
26+
${{ inputs.image_tag }} \
27+
bash -c "pytest -xsv --basetemp=/tmp --junitxml=${{ env.PYTEST_XML_FILE }} -m \"${{ inputs.pytest_marks }}\""

.github/filters.yaml

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
docs: &docs
2+
- 'docs/**'
3+
- '**/*.md'
4+
- '**/*.rst'
5+
6+
ci: &ci
7+
- '.github/workflows/**'
8+
- '.github/filters.yaml'
9+
- '.github/actions/**'
10+
11+
has_code_changes:
12+
- *ci
13+
- 'benchmarks/**'
14+
- 'components/**'
15+
- 'container/**'
16+
- 'deploy/**'
17+
- 'examples/**'
18+
- 'launch/**'
19+
- 'lib/**'
20+
- 'recipes/**'
21+
- 'tests/**'
22+
- '*.toml'
23+
- '*.lock'
24+
- '*.py'
25+
- '*.rs'
26+
27+
vllm: &vllm
28+
- 'container/Dockerfile.vllm'
29+
- 'container/deps/requirements.vllm.txt'
30+
- 'container/deps/vllm/**'
31+
- 'components/backends/vllm/**'
32+
- 'tests/serve/test_vllm.py'
33+
34+
sglang: &sglang
35+
- 'container/Dockerfile.sglang'
36+
- 'container/Dockerfile.sglang-wideep'
37+
- 'components/backends/sglang/**'
38+
- 'container/build.sh'
39+
- 'tests/serve/test_sglang.py'
40+
41+
trtllm: &trtllm
42+
- 'container/Dockerfile.trtllm'
43+
- 'components/backends/trtllm/**'
44+
- 'container/build.sh'
45+
- 'container/build_trtllm_wheel.sh'
46+
- 'container/deps/**'
47+
- 'tests/serve/test_trtllm.py'
48+
49+
sdk:
50+
- 'deploy/**'
Lines changed: 96 additions & 62 deletions
Original file line numberDiff line numberDiff line change
@@ -1,78 +1,112 @@
11
# SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
22
# SPDX-License-Identifier: Apache-2.0
33

4-
name: NVIDIA Dynamo Backends Github Validation
4+
name: Docker Build and Test
55

66
on:
77
push:
88
branches:
99
- main
1010
- "pull-request/[0-9]+"
1111

12+
concurrency:
13+
group: ${{ github.workflow }}-build-test-${{ github.ref_name || github.run_id }}
14+
cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
15+
1216
jobs:
13-
build-test:
14-
runs-on: gpu-l40-amd64
15-
strategy:
16-
fail-fast: false
17-
matrix:
18-
framework: [vllm, sglang, trtllm]
19-
include:
20-
- framework: vllm
21-
target: runtime
22-
pytest_marks: "e2e and vllm and gpu_1 and not slow"
23-
- framework: sglang
24-
target: runtime
25-
pytest_marks: "e2e and sglang and gpu_1 and not slow"
26-
- framework: trtllm
27-
target: runtime
28-
pytest_marks: "e2e and trtllm_marker and gpu_1 and not slow"
17+
changed-files:
18+
runs-on: ubuntu-latest
19+
outputs:
20+
has_code_changes: ${{ steps.filter.outputs.has_code_changes }}
21+
steps:
22+
- name: Checkout code
23+
uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # v4.3.0
24+
- name: Check for changes
25+
uses: dorny/paths-filter@de90cc6fb38fc0963ad72b210f1f284cd68cea36 # v3.0.2
26+
id: filter
27+
with:
28+
filters: .github/filters.yaml
2929

30-
# Do not cancel main branch runs
31-
concurrency:
32-
group: ${{ github.workflow }}-${{ matrix.framework }}-build-test-${{ github.ref_name || github.run_id }}
33-
cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
30+
backend-status-check:
31+
runs-on: ubuntu-latest
32+
needs: [vllm, sglang, trtllm]
33+
if: always()
34+
steps:
35+
- name: "Check all dependent jobs"
36+
run: |
37+
echo '${{ toJson(needs) }}' | jq -e 'to_entries | map(.value.result) | all(. as $result | ["success", "skipped"] | any($result == .))'
3438
35-
name: Build and Test - ${{ matrix.framework }}
36-
env:
37-
CONTAINER_ID: test_${{ github.run_id }}_${{ github.run_attempt }}_${{ github.job }}_${{ matrix.framework }}
38-
PYTEST_XML_FILE: pytest_test_report.xml
39-
FRAMEWORK: ${{ matrix.framework }}
40-
TARGET: ${{ matrix.target }}
41-
PYTEST_MARKS: ${{ matrix.pytest_marks }}
39+
vllm:
40+
runs-on: gpu-l40-amd64
41+
needs: changed-files
42+
if: needs.changed-files.outputs.has_code_changes == 'true'
43+
steps:
44+
- name: Checkout code
45+
uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # v4.3.0
46+
- name: Build Container
47+
id: build-image
48+
uses: ./.github/actions/docker-build
49+
with:
50+
framework: vllm
51+
target: runtime
52+
ngc_ci_access_token: ${{ secrets.NGC_CI_ACCESS_TOKEN }}
53+
ci_token: ${{ secrets.CI_TOKEN }}
54+
aws_default_region: ${{ secrets.AWS_DEFAULT_REGION }}
55+
sccache_s3_bucket: ${{ secrets.SCCACHE_S3_BUCKET }}
56+
aws_access_key_id: ${{ secrets.AWS_ACCESS_KEY_ID }}
57+
aws_secret_access_key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
58+
- name: Run tests
59+
uses: ./.github/actions/pytest
60+
with:
61+
image_tag: ${{ steps.build-image.outputs.image_tag }}
62+
pytest_marks: "e2e and vllm and gpu_1 and not slow"
4263

64+
sglang:
65+
runs-on: gpu-l40-amd64
66+
needs: changed-files
67+
if: needs.changed-files.outputs.has_code_changes == 'true'
4368
steps:
4469
- name: Checkout repository
45-
uses: actions/checkout@v4
46-
- name: Set up Docker Buildx
47-
uses: docker/setup-buildx-action@v3
48-
- name: Login to NGC
49-
if: github.event.pull_request.head.repo.full_name == github.repository || github.event_name == 'push'
50-
run: |
51-
echo "${{ secrets.NGC_CI_ACCESS_TOKEN }}" | docker login nvcr.io -u '$oauthtoken' --password-stdin
52-
- name: Cleanup
53-
if: always()
54-
run: |
55-
docker system prune -af
56-
- name: Build image
57-
env:
58-
GITHUB_TOKEN: ${{ secrets.CI_TOKEN }}
59-
AWS_DEFAULT_REGION: ${{ secrets.AWS_DEFAULT_REGION }}
60-
SCCACHE_S3_BUCKET: ${{ secrets.SCCACHE_S3_BUCKET }}
61-
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
62-
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
63-
run: |
64-
./container/build.sh --tag ${{ matrix.framework }}:latest \
65-
--target ${{ matrix.target }} \
66-
--framework ${{ matrix.framework }} \
67-
--use-sccache \
68-
--sccache-bucket "$SCCACHE_S3_BUCKET" \
69-
--sccache-region "$AWS_DEFAULT_REGION"
70-
- name: Run pytest
71-
env:
72-
HF_HOME: /runner/_work/_temp
73-
run: |
74-
docker run --runtime=nvidia --rm --gpus all -w /workspace \
75-
--network host \
76-
--name ${{ env.CONTAINER_ID }}_pytest \
77-
${{ matrix.framework }}:latest \
78-
bash -c "pytest -xsv --basetemp=/tmp --junitxml=${{ env.PYTEST_XML_FILE }} -m \"${{ env.PYTEST_MARKS }}\""
70+
uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # v4.3.0
71+
- name: Build Container
72+
id: build-image
73+
uses: ./.github/actions/docker-build
74+
with:
75+
framework: sglang
76+
target: runtime
77+
ngc_ci_access_token: ${{ secrets.NGC_CI_ACCESS_TOKEN }}
78+
ci_token: ${{ secrets.CI_TOKEN }}
79+
aws_default_region: ${{ secrets.AWS_DEFAULT_REGION }}
80+
sccache_s3_bucket: ${{ secrets.SCCACHE_S3_BUCKET }}
81+
aws_access_key_id: ${{ secrets.AWS_ACCESS_KEY_ID }}
82+
aws_secret_access_key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
83+
- name: Run tests
84+
uses: ./.github/actions/pytest
85+
with:
86+
image_tag: ${{ steps.build-image.outputs.image_tag }}
87+
pytest_marks: "e2e and sglang and gpu_1"
88+
89+
trtllm:
90+
runs-on: gpu-l40-amd64
91+
needs: changed-files
92+
if: needs.changed-files.outputs.has_code_changes == 'true'
93+
steps:
94+
- name: Checkout code
95+
uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # v4.3.0
96+
- name: Build Container
97+
id: build-image
98+
uses: ./.github/actions/docker-build
99+
with:
100+
framework: trtllm
101+
target: runtime
102+
ngc_ci_access_token: ${{ secrets.NGC_CI_ACCESS_TOKEN }}
103+
ci_token: ${{ secrets.CI_TOKEN }}
104+
aws_default_region: ${{ secrets.AWS_DEFAULT_REGION }}
105+
sccache_s3_bucket: ${{ secrets.SCCACHE_S3_BUCKET }}
106+
aws_access_key_id: ${{ secrets.AWS_ACCESS_KEY_ID }}
107+
aws_secret_access_key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
108+
- name: Run tests
109+
uses: ./.github/actions/pytest
110+
with:
111+
image_tag: ${{ steps.build-image.outputs.image_tag }}
112+
pytest_marks: "e2e and trtllm_marker and gpu_1 and not slow"

.github/workflows/trigger_ci.yml

Lines changed: 1 addition & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -51,28 +51,7 @@ jobs:
5151
id: src_changes
5252
uses: dorny/paths-filter@v3
5353
with:
54-
filters: |
55-
vllm:
56-
- 'container/Dockerfile.vllm'
57-
- 'container/deps/requirements.vllm.txt'
58-
- 'container/deps/vllm/**'
59-
- 'components/backends/vllm/**'
60-
- 'tests/serve/test_vllm.py'
61-
trtllm:
62-
- 'container/Dockerfile.trtllm'
63-
- 'components/backends/trtllm/**'
64-
- 'container/build.sh'
65-
- 'container/build_trtllm_wheel.sh'
66-
- 'container/deps/**'
67-
- 'tests/serve/test_trtllm.py'
68-
sdk:
69-
- 'deploy/**'
70-
sglang:
71-
- 'container/Dockerfile.sglang'
72-
- 'container/Dockerfile.sglang-deepep'
73-
- 'components/backends/sglang/**'
74-
- 'container/build.sh'
75-
- 'tests/serve/test_sglang.py'
54+
filters: .github/filters.yaml
7655
- name: Check if Validation Workflow has run
7756
id: check_workflow
7857
uses: actions/github-script@v6

0 commit comments

Comments
 (0)