Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
38 commits
Select commit Hold shift + click to select a range
548ba0c
add composite workflow for backend testing
nv-anants Sep 17, 2025
1821690
temp: run from branch
nv-anants Sep 17, 2025
39fd0e9
test bash
nv-anants Sep 17, 2025
b2e96ae
test env
nv-anants Sep 17, 2025
e725e3c
split build and test actions
nv-anants Sep 18, 2025
5a8af8b
remove pytest in docker
nv-anants Sep 18, 2025
bb4ee32
test filter
nv-anants Sep 18, 2025
e7c29b3
test
nv-anants Sep 18, 2025
dc0d5fe
add path filter
nv-anants Sep 19, 2025
5fabb86
debug
nv-anants Sep 19, 2025
1d47885
debug 2
nv-anants Sep 19, 2025
81f77e9
test
nv-anants Sep 19, 2025
faf227f
test2
nv-anants Sep 19, 2025
754e3ca
test
nv-anants Sep 19, 2025
5b56480
add trtllm
nv-anants Sep 19, 2025
c40ffb6
cleanup
nv-anants Sep 19, 2025
4172c4c
Merge branch 'main' into anants/reorg-workflows
nv-anants Sep 19, 2025
77ffaaa
fixes
nv-anants Sep 19, 2025
983bf59
fix output
nv-anants Sep 19, 2025
882cbc2
naming
nv-anants Sep 19, 2025
ee33318
Merge branch 'main' into anants/reorg-workflows
nv-anants Sep 19, 2025
3f59c16
updates
nv-anants Sep 19, 2025
2fe00ac
exit errort
nv-anants Sep 19, 2025
c123fb4
Merge branch 'main' into anants/reorg-workflows
nv-anants Sep 19, 2025
1a3bbbe
add more files
nv-anants Sep 22, 2025
f8957cd
Merge branch 'main' into anants/reorg-workflows
nv-anants Sep 22, 2025
6dd5e18
fix
nv-anants Sep 22, 2025
4b0b3af
temp test
nv-anants Sep 22, 2025
3c29048
update rules
nv-anants Sep 22, 2025
69bd84b
Revert "temp test"
nv-anants Sep 22, 2025
0125b1d
minor
nv-anants Sep 22, 2025
d024fcc
use image tag as input
nv-anants Sep 22, 2025
c5ed6f3
Merge branch 'main' into anants/reorg-workflows
nv-anants Sep 22, 2025
b974954
Merge branch 'main' into anants/reorg-workflows
nv-anants Sep 23, 2025
2b2e788
update filter
nv-anants Sep 23, 2025
622a235
ai fixes
nv-anants Sep 23, 2025
528d192
review comments
nv-anants Sep 23, 2025
c492214
Merge branch 'main' into anants/reorg-workflows
nv-anants Sep 25, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
77 changes: 77 additions & 0 deletions .github/actions/docker-build/action.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
name: 'Docker Build'
description: 'Build Dynamo container images'
inputs:
framework:
description: 'Framework to build'
required: true
default: 'vllm'
target:
description: 'Target to build'
required: false
default: 'runtime'
image_tag:
description: 'Custom image tag (optional, defaults to framework:latest)'
required: false
ngc_ci_access_token:
description: 'NGC CI Access Token'
required: false
ci_token:
description: 'CI Token'
required: false
aws_default_region:
description: 'AWS Default Region'
required: false
sccache_s3_bucket:
description: 'SCCache S3 Bucket'
required: false
aws_access_key_id:
description: 'AWS Access Key ID'
required: false
aws_secret_access_key:
description: 'AWS Secret Access Key'
required: false

outputs:
image_tag:
description: 'Image Tag'
value: ${{ steps.build.outputs.image_tag }}

runs:
using: "composite"
steps:
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
- name: Login to NGC
if: github.event.pull_request.head.repo.full_name == github.repository || github.event_name == 'push'
shell: bash
run: |
echo "${{ inputs.ngc_ci_access_token }}" | docker login nvcr.io -u '$oauthtoken' --password-stdin
- name: Cleanup
if: always()
shell: bash
run: |
docker system prune -af
- name: Build image
id: build
shell: bash
env:
GITHUB_TOKEN: ${{ inputs.ci_token }}
AWS_DEFAULT_REGION: ${{ inputs.aws_default_region }}
SCCACHE_S3_BUCKET: ${{ inputs.sccache_s3_bucket }}
AWS_ACCESS_KEY_ID: ${{ inputs.aws_access_key_id }}
AWS_SECRET_ACCESS_KEY: ${{ inputs.aws_secret_access_key }}
run: |
# Determine image tag
if [ -n "${{ inputs.image_tag }}" ]; then
IMAGE_TAG="${{ inputs.image_tag }}"
else
IMAGE_TAG="${{ inputs.framework }}:latest"
fi
echo "image_tag=$IMAGE_TAG" >> $GITHUB_OUTPUT

./container/build.sh --tag "$IMAGE_TAG" \
--target ${{ inputs.target }} \
--framework ${{ inputs.framework }} \
--use-sccache \
--sccache-bucket "$SCCACHE_S3_BUCKET" \
--sccache-region "$AWS_DEFAULT_REGION"
27 changes: 27 additions & 0 deletions .github/actions/pytest/action.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
name: 'Pytest'
description: 'Run pytest on pre-built container images'
inputs:
pytest_marks:
description: 'Pytest marks'
required: true
default: 'e2e and vllm and gpu_1 and not slow'
image_tag:
description: 'Image Tag to run tests on'
required: true


runs:
using: "composite"
steps:
- name: Run tests
shell: bash
env:
CONTAINER_ID: test_${{ github.run_id }}_${{ github.run_attempt }}_${{ github.job }}
PYTEST_XML_FILE: pytest_test_report.xml
HF_HOME: /runner/_work/_temp
run: |
docker run --runtime=nvidia --rm --gpus all -w /workspace \
--network host \
--name ${{ env.CONTAINER_ID }}_pytest \
${{ inputs.image_tag }} \
bash -c "pytest -xsv --basetemp=/tmp --junitxml=${{ env.PYTEST_XML_FILE }} -m \"${{ inputs.pytest_marks }}\""
50 changes: 50 additions & 0 deletions .github/filters.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
docs: &docs
- 'docs/**'
- '**/*.md'
- '**/*.rst'

ci: &ci
- '.github/workflows/**'
- '.github/filters.yaml'
- '.github/actions/**'

has_code_changes:
- *ci
- 'benchmarks/**'
- 'components/**'
- 'container/**'
- 'deploy/**'
- 'examples/**'
- 'launch/**'
- 'lib/**'
- 'recipes/**'
- 'tests/**'
- '*.toml'
- '*.lock'
- '*.py'
- '*.rs'

vllm: &vllm
- 'container/Dockerfile.vllm'
- 'container/deps/requirements.vllm.txt'
- 'container/deps/vllm/**'
- 'components/backends/vllm/**'
- 'tests/serve/test_vllm.py'

sglang: &sglang
- 'container/Dockerfile.sglang'
- 'container/Dockerfile.sglang-wideep'
- 'components/backends/sglang/**'
- 'container/build.sh'
- 'tests/serve/test_sglang.py'

trtllm: &trtllm
- 'container/Dockerfile.trtllm'
- 'components/backends/trtllm/**'
- 'container/build.sh'
- 'container/build_trtllm_wheel.sh'
- 'container/deps/**'
- 'tests/serve/test_trtllm.py'

sdk:
- 'deploy/**'
158 changes: 96 additions & 62 deletions .github/workflows/container-validation-backends.yml
Original file line number Diff line number Diff line change
@@ -1,78 +1,112 @@
# SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0

name: NVIDIA Dynamo Backends Github Validation
name: Docker Build and Test

on:
push:
branches:
- main
- "pull-request/[0-9]+"

concurrency:
group: ${{ github.workflow }}-build-test-${{ github.ref_name || github.run_id }}
cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}

jobs:
build-test:
runs-on: gpu-l40-amd64
strategy:
fail-fast: false
matrix:
framework: [vllm, sglang, trtllm]
include:
- framework: vllm
target: runtime
pytest_marks: "e2e and vllm and gpu_1 and not slow"
- framework: sglang
target: runtime
pytest_marks: "e2e and sglang and gpu_1 and not slow"
- framework: trtllm
target: runtime
pytest_marks: "e2e and trtllm_marker and gpu_1 and not slow"
changed-files:
runs-on: ubuntu-latest
outputs:
has_code_changes: ${{ steps.filter.outputs.has_code_changes }}
steps:
- name: Checkout code
uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # v4.3.0
- name: Check for changes
uses: dorny/paths-filter@de90cc6fb38fc0963ad72b210f1f284cd68cea36 # v3.0.2
id: filter
with:
filters: .github/filters.yaml

# Do not cancel main branch runs
concurrency:
group: ${{ github.workflow }}-${{ matrix.framework }}-build-test-${{ github.ref_name || github.run_id }}
cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
backend-status-check:
runs-on: ubuntu-latest
needs: [vllm, sglang, trtllm]
if: always()
steps:
- name: "Check all dependent jobs"
run: |
echo '${{ toJson(needs) }}' | jq -e 'to_entries | map(.value.result) | all(. as $result | ["success", "skipped"] | any($result == .))'

name: Build and Test - ${{ matrix.framework }}
env:
CONTAINER_ID: test_${{ github.run_id }}_${{ github.run_attempt }}_${{ github.job }}_${{ matrix.framework }}
PYTEST_XML_FILE: pytest_test_report.xml
FRAMEWORK: ${{ matrix.framework }}
TARGET: ${{ matrix.target }}
PYTEST_MARKS: ${{ matrix.pytest_marks }}
vllm:
runs-on: gpu-l40-amd64
needs: changed-files
if: needs.changed-files.outputs.has_code_changes == 'true'
steps:
- name: Checkout code
uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # v4.3.0
- name: Build Container
id: build-image
uses: ./.github/actions/docker-build
with:
framework: vllm
target: runtime
ngc_ci_access_token: ${{ secrets.NGC_CI_ACCESS_TOKEN }}
ci_token: ${{ secrets.CI_TOKEN }}
aws_default_region: ${{ secrets.AWS_DEFAULT_REGION }}
sccache_s3_bucket: ${{ secrets.SCCACHE_S3_BUCKET }}
aws_access_key_id: ${{ secrets.AWS_ACCESS_KEY_ID }}
aws_secret_access_key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
- name: Run tests
uses: ./.github/actions/pytest
with:
image_tag: ${{ steps.build-image.outputs.image_tag }}
pytest_marks: "e2e and vllm and gpu_1 and not slow"

sglang:
runs-on: gpu-l40-amd64
needs: changed-files
if: needs.changed-files.outputs.has_code_changes == 'true'
steps:
- name: Checkout repository
uses: actions/checkout@v4
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
- name: Login to NGC
if: github.event.pull_request.head.repo.full_name == github.repository || github.event_name == 'push'
run: |
echo "${{ secrets.NGC_CI_ACCESS_TOKEN }}" | docker login nvcr.io -u '$oauthtoken' --password-stdin
- name: Cleanup
if: always()
run: |
docker system prune -af
- name: Build image
env:
GITHUB_TOKEN: ${{ secrets.CI_TOKEN }}
AWS_DEFAULT_REGION: ${{ secrets.AWS_DEFAULT_REGION }}
SCCACHE_S3_BUCKET: ${{ secrets.SCCACHE_S3_BUCKET }}
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
run: |
./container/build.sh --tag ${{ matrix.framework }}:latest \
--target ${{ matrix.target }} \
--framework ${{ matrix.framework }} \
--use-sccache \
--sccache-bucket "$SCCACHE_S3_BUCKET" \
--sccache-region "$AWS_DEFAULT_REGION"
- name: Run pytest
env:
HF_HOME: /runner/_work/_temp
run: |
docker run --runtime=nvidia --rm --gpus all -w /workspace \
--network host \
--name ${{ env.CONTAINER_ID }}_pytest \
${{ matrix.framework }}:latest \
bash -c "pytest -xsv --basetemp=/tmp --junitxml=${{ env.PYTEST_XML_FILE }} -m \"${{ env.PYTEST_MARKS }}\""
uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # v4.3.0
- name: Build Container
id: build-image
uses: ./.github/actions/docker-build
with:
framework: sglang
target: runtime
ngc_ci_access_token: ${{ secrets.NGC_CI_ACCESS_TOKEN }}
ci_token: ${{ secrets.CI_TOKEN }}
aws_default_region: ${{ secrets.AWS_DEFAULT_REGION }}
sccache_s3_bucket: ${{ secrets.SCCACHE_S3_BUCKET }}
aws_access_key_id: ${{ secrets.AWS_ACCESS_KEY_ID }}
aws_secret_access_key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
- name: Run tests
uses: ./.github/actions/pytest
with:
image_tag: ${{ steps.build-image.outputs.image_tag }}
pytest_marks: "e2e and sglang and gpu_1"

trtllm:
runs-on: gpu-l40-amd64
needs: changed-files
if: needs.changed-files.outputs.has_code_changes == 'true'
steps:
- name: Checkout code
uses: actions/checkout@08eba0b27e820071cde6df949e0beb9ba4906955 # v4.3.0
- name: Build Container
id: build-image
uses: ./.github/actions/docker-build
with:
framework: trtllm
target: runtime
ngc_ci_access_token: ${{ secrets.NGC_CI_ACCESS_TOKEN }}
ci_token: ${{ secrets.CI_TOKEN }}
aws_default_region: ${{ secrets.AWS_DEFAULT_REGION }}
sccache_s3_bucket: ${{ secrets.SCCACHE_S3_BUCKET }}
aws_access_key_id: ${{ secrets.AWS_ACCESS_KEY_ID }}
aws_secret_access_key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
- name: Run tests
uses: ./.github/actions/pytest
with:
image_tag: ${{ steps.build-image.outputs.image_tag }}
pytest_marks: "e2e and trtllm_marker and gpu_1 and not slow"
23 changes: 1 addition & 22 deletions .github/workflows/trigger_ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -51,28 +51,7 @@ jobs:
id: src_changes
uses: dorny/paths-filter@v3
with:
filters: |
vllm:
- 'container/Dockerfile.vllm'
- 'container/deps/requirements.vllm.txt'
- 'container/deps/vllm/**'
- 'components/backends/vllm/**'
- 'tests/serve/test_vllm.py'
trtllm:
- 'container/Dockerfile.trtllm'
- 'components/backends/trtllm/**'
- 'container/build.sh'
- 'container/build_trtllm_wheel.sh'
- 'container/deps/**'
- 'tests/serve/test_trtllm.py'
sdk:
- 'deploy/**'
sglang:
- 'container/Dockerfile.sglang'
- 'container/Dockerfile.sglang-deepep'
- 'components/backends/sglang/**'
- 'container/build.sh'
- 'tests/serve/test_sglang.py'
filters: .github/filters.yaml
- name: Check if Validation Workflow has run
id: check_workflow
uses: actions/github-script@v6
Expand Down
Loading