Skip to content

Commit e4aef71

Browse files
committed
ci: add support for vllm sanity testing on Github
Signed-off-by: Anant Sharma <[email protected]>
1 parent 1ed877f commit e4aef71

File tree

563 files changed

+55722
-14428
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

563 files changed

+55722
-14428
lines changed

.devcontainer/README.md

Lines changed: 259 additions & 40 deletions
Large diffs are not rendered by default.

.devcontainer/devcontainer.json

Lines changed: 25 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,10 @@
11
{
2-
"$schema": "https://json-schema.org/draft-07/schema#",
2+
"$schema": "https://raw.githubusercontent.com/devcontainers/spec/main/schemas/devContainer.schema.json",
33
"copyright": [
44
"SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.",
5-
"SPDX-License-Identifier: Apache-2.0",
6-
"Licensed under the Apache License, Version 2.0 (the \"License\");",
7-
"you may not use this file except in compliance with the License.",
8-
"You may obtain a copy of the License at",
9-
"http://www.apache.org/licenses/LICENSE-2.0",
10-
"Unless required by applicable law or agreed to in writing, software",
11-
"distributed under the License is distributed on an \"AS IS\" BASIS,",
12-
"WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.",
13-
"See the License for the specific language governing permissions and",
14-
"limitations under the License."
5+
"SPDX-License-Identifier: Apache-2.0"
156
],
16-
"name": "NVIDIA Dynamo Development",
7+
"name": "NVIDIA Dynamo Dev Container Development",
178
"remoteUser": "ubuntu", // Matches our container user
189
"updateRemoteUserUID": true, // Updates the UID of the remote user to match the host user, avoids permission errors
1910
"image": "dynamo:latest-vllm-local-dev", // Use the latest VLLM local dev image
@@ -27,7 +18,6 @@
2718
"--ulimit=stack=67108864",
2819
"--ulimit=nofile=65536:65536"
2920
],
30-
"service": "dynamo",
3121
"customizations": {
3222
"vscode": {
3323
"extensions": [
@@ -36,6 +26,8 @@
3626
"rust-lang.rust-analyzer"
3727
],
3828
"settings": {
29+
// Disable automatic copying of .gitconfig to avoid errors
30+
"dev.containers.copyGitConfig": false,
3931
"terminal.integrated.defaultProfile.linux": "bash",
4032
"terminal.integrated.cwd": "/home/ubuntu/dynamo",
4133

@@ -52,36 +44,40 @@
5244

5345
// Enhanced rust-analyzer configuration
5446
"rust-analyzer.linkedProjects": [
55-
"dynamo/Cargo.toml",
56-
"dynamo/lib/runtime/Cargo.toml",
57-
"dynamo/lib/llm/Cargo.toml",
58-
"dynamo/lib/tokens/Cargo.toml",
59-
"dynamo/lib/bindings/python/Cargo.toml",
60-
"dynamo/launch/dynamo-run/Cargo.toml"
47+
"Cargo.toml",
48+
"lib/runtime/Cargo.toml",
49+
"lib/llm/Cargo.toml",
50+
"lib/tokens/Cargo.toml",
51+
"lib/bindings/python/Cargo.toml",
52+
"launch/dynamo-run/Cargo.toml"
6153
],
62-
6354
"files.trimTrailingWhitespace": true,
6455
"files.insertFinalNewline": true
6556
}
6657
}
6758
},
68-
"workspaceFolder": "/home/ubuntu",
59+
"workspaceFolder": "/home/ubuntu/dynamo",
6960
"workspaceMount": "source=${localWorkspaceFolder},target=/home/ubuntu/dynamo,type=bind,consistency=cached",
7061
"userEnvProbe": "interactiveShell",
7162
"postCreateCommand": "/bin/bash /home/ubuntu/dynamo/.devcontainer/post-create.sh", // Runs cargo build and pip installs packages
7263
"containerEnv": {
73-
"GITHUB_TOKEN": "${localEnv:GITHUB_TOKEN}",
74-
"HF_TOKEN": "${localEnv:HF_TOKEN}",
64+
"DYNAMO_HOME": "/home/ubuntu/dynamo",
7565
"CARGO_HOME": "/home/ubuntu/dynamo/.build/.cargo",
76-
"RUSTUP_HOME": "/home/ubuntu/dynamo/.build/.rustup"
66+
"RUSTUP_HOME": "/home/ubuntu/dynamo/.build/.rustup",
67+
"CARGO_TARGET_DIR": "/home/ubuntu/dynamo/.build/target"
68+
},
69+
"remoteEnv": {
70+
// Optional convenience tokens passed from host. SSH agent is forwarded via your IDE setting, not here by default.
71+
"GITHUB_TOKEN": "${localEnv:GITHUB_TOKEN}",
72+
"HF_TOKEN": "${localEnv:HF_TOKEN}"
73+
// "SSH_AUTH_SOCK": "${env:SSH_AUTH_SOCK}" // Optional: only if you also bind-mount the socket path
7774
},
7875
"mounts": [
76+
// These are for convenience, so that the history and pre-commit cache are persisted between sessions
77+
"source=dynamo-bashhistory,target=/home/ubuntu/.commandhistory,type=volume",
78+
"source=dynamo-precommit-cache,target=/home/ubuntu/.cache/pre-commit,type=volume",
7979
// Default mounts
8080
"source=/tmp/,target=/tmp/,type=bind",
81-
"source=dynamo-bashhistory,target=/home/ubuntu/.commandhistory,type=volume", // For bash history
82-
"source=dynamo-precommit-cache,target=/home/ubuntu/.cache/pre-commit,type=volume" // For pre-commit cache
83-
84-
// Uncomment for additional functionality
85-
// "source=${localEnv:HF_HOME},target=/home/ubuntu/.cache/huggingface,type=bind", // Uncomment to enable HF Cache Mount. Make sure to set HF_HOME env var in you .bashrc
81+
"source=${localEnv:HOME}/.cache/huggingface,target=/home/ubuntu/.cache/huggingface,type=bind"
8682
]
8783
}

.devcontainer/post-create.sh

Lines changed: 79 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,24 @@
11
#!/bin/bash
2-
32
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
43
# SPDX-License-Identifier: Apache-2.0
5-
#
6-
# Licensed under the Apache License, Version 2.0 (the "License");
7-
# you may not use this file except in compliance with the License.
8-
# You may obtain a copy of the License at
9-
#
10-
# http://www.apache.org/licenses/LICENSE-2.0
11-
#
12-
# Unless required by applicable law or agreed to in writing, software
13-
# distributed under the License is distributed on an "AS IS" BASIS,
14-
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15-
# See the License for the specific language governing permissions and
16-
# limitations under the License.
4+
5+
set -eu
6+
7+
# Ensure we're not running as root
8+
if [ "$(id -u)" -eq 0 ]; then
9+
echo "❌ ERROR: This script should not be run as root!"
10+
echo "The script should run as the 'ubuntu' user, not root."
11+
echo "Current user: $(whoami) (UID: $(id -u))"
12+
exit 1
13+
fi
14+
15+
# Verify we're running as the expected user
16+
if [ "$(whoami)" != "ubuntu" ]; then
17+
echo "⚠️ WARNING: Expected to run as 'ubuntu' user, but running as '$(whoami)'"
18+
echo "This might cause permission issues."
19+
fi
20+
21+
echo "Running post-create script as user: $(whoami) (UID: $(id -u))"
1722

1823
trap 'echo "❌ ERROR: Command failed at line $LINENO: $BASH_COMMAND"; echo "⚠️ This was unexpected and setup was not completed. Can try to resolve yourself and then manually run the rest of the commands in this file or file a bug."' ERR
1924

@@ -35,36 +40,84 @@ retry() {
3540
return 0
3641
}
3742

38-
set -xe
43+
set -x
3944

4045
# Changing permission to match local user since volume mounts default to root ownership
41-
sudo chown -R ubuntu:ubuntu ~/.cache/pre-commit
46+
# Note: sudo is used here because the volume mount may have root ownership
47+
mkdir -p $HOME/.cache
48+
sudo chown -R ubuntu:ubuntu $HOME/.cache $HOME/dynamo
4249

4350
# Pre-commit hooks
4451
cd $HOME/dynamo && pre-commit install && retry pre-commit install-hooks
4552
pre-commit run --all-files || true # don't fail the build if pre-commit hooks fail
4653

4754
# Set build directory
48-
mkdir -p $HOME/dynamo/.build/target
49-
export CARGO_TARGET_DIR=$HOME/dynamo/.build/target
55+
export CARGO_TARGET_DIR=${CARGO_TARGET_DIR:-$HOME/dynamo/.build/target}
56+
mkdir -p $CARGO_TARGET_DIR
5057

51-
# build project, it will be saved at $HOME/dynamo/.build/target
58+
uv pip uninstall --yes ai-dynamo ai-dynamo-runtime 2>/dev/null || true
59+
60+
# Build project, with `dev` profile it will be saved at $CARGO_TARGET_DIR/debug
5261
cargo build --locked --profile dev --features mistralrs
53-
cargo doc --no-deps
5462

5563
# install the python bindings
56-
cd $HOME/dynamo/lib/bindings/python && retry maturin develop
64+
(cd $HOME/dynamo/lib/bindings/python && retry maturin develop)
5765

5866
# installs overall python packages, grabs binaries from .build/target/debug
59-
cd $HOME/dynamo && retry env DYNAMO_BIN_PATH=$HOME/dynamo/.build/target/debug uv pip install -e .
60-
61-
export PYTHONPATH=/home/ubuntu/dynamo/components/planner/src:$PYTHONPATH
67+
cd $HOME/dynamo && retry env DYNAMO_BIN_PATH=$CARGO_TARGET_DIR/debug uv pip install -e .
6268

63-
# TODO: Deprecated except vLLM v0
64-
if ! grep -q "export VLLM_KV_CAPI_PATH=" ~/.bashrc; then
65-
echo "export VLLM_KV_CAPI_PATH=$HOME/dynamo/.build/target/debug/libdynamo_llm_capi.so" >> ~/.bashrc
69+
# Extract the PYTHONPATH line from README.md
70+
PYTHONPATH_LINE=$(grep "^export PYTHONPATH=" $DYNAMO_HOME/README.md | head -n1)
71+
if [ -n "$PYTHONPATH_LINE" ]; then
72+
# Remove the ${PYTHONPATH}: prefix if it exists, then replace $(pwd) with the actual path
73+
MODIFIED_LINE=$(echo "$PYTHONPATH_LINE" | sed 's/\${PYTHONPATH}://g' | sed "s|\$(pwd)|$DYNAMO_HOME|g")
74+
eval "$MODIFIED_LINE"
75+
# Also add to .bashrc for persistence (with expanded path)
76+
if ! grep -q "export PYTHONPATH=" ~/.bashrc; then
77+
# MODIFIED_LINE already has $DYNAMO_HOME expanded to /home/ubuntu/dynamo
78+
echo "$MODIFIED_LINE" >> ~/.bashrc
79+
fi
80+
else
81+
# Back-up version if README.md changed. This is the version from 2025-08-19
82+
export PYTHONPATH=$DYNAMO_HOME/components/frontend/src:$DYNAMO_HOME/components/planner/src:$DYNAMO_HOME/components/backends/vllm/src:$DYNAMO_HOME/components/backends/sglang/src:$DYNAMO_HOME/components/backends/trtllm/src:$DYNAMO_HOME/components/backends/llama_cpp/src:$DYNAMO_HOME/components/backends/mocker/src
6683
fi
6784

6885
if ! grep -q "export GPG_TTY=" ~/.bashrc; then
6986
echo "export GPG_TTY=$(tty)" >> ~/.bashrc
7087
fi
88+
89+
# Unset empty tokens/variables to avoid issues with authentication and SSH
90+
if ! grep -q "# Unset empty tokens" ~/.bashrc; then
91+
echo -e "\n# Unset empty tokens and environment variables" >> ~/.bashrc
92+
echo '[ -z "$HF_TOKEN" ] && unset HF_TOKEN' >> ~/.bashrc
93+
echo '[ -z "$GITHUB_TOKEN" ] && unset GITHUB_TOKEN' >> ~/.bashrc
94+
echo '[ -z "$SSH_AUTH_SOCK" ] && unset SSH_AUTH_SOCK' >> ~/.bashrc
95+
fi
96+
97+
$HOME/dynamo/deploy/dynamo_check.py --import-check-only
98+
99+
{ set +x; } 2>/dev/null
100+
101+
# Check SSH agent forwarding status
102+
if [ -n "$SSH_AUTH_SOCK" ]; then
103+
if ssh-add -l > /dev/null 2>&1; then
104+
echo "SSH agent forwarding is working - found $(ssh-add -l | wc -l) key(s):"
105+
ssh-add -l
106+
else
107+
echo "⚠️ SSH_AUTH_SOCK is set but ssh-add failed - agent may not be accessible"
108+
fi
109+
else
110+
echo "⚠️ SSH agent forwarding not configured - SSH_AUTH_SOCK is not set"
111+
fi
112+
113+
cat <<EOF
114+
115+
✅ SUCCESS: Built cargo project, installed Python bindings, configured pre-commit hooks
116+
117+
Example commands:
118+
cargo build --locked --profile dev # Build Rust project in $CARGO_TARGET_DIR
119+
cd lib/bindings/python && maturin develop --uv # Update Python bindings (if you changed them)
120+
cargo fmt && cargo clippy # Format and lint code before committing
121+
cargo doc --no-deps # Generate documentation
122+
uv pip install -e . # Install various Python packages Dynamo depends on
123+
EOF
Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
# SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2+
# SPDX-License-Identifier: Apache-2.0
3+
4+
name: NVIDIA Github Validation
5+
6+
on:
7+
push:
8+
branches:
9+
- main
10+
- "pull-request/[0-9]+"
11+
12+
jobs:
13+
build-test:
14+
runs-on: gpu-l40-runners
15+
strategy:
16+
matrix:
17+
framework: [vllm]
18+
include:
19+
- framework: vllm
20+
target: runtime
21+
pytest_marks: "e2e and vllm and gpu_1 and not slow"
22+
# Do not cancel main branch runs
23+
concurrency:
24+
group: ${{ matrix.framework }}-build-test-${{ github.ref_name || github.run_id }}
25+
cancel-in-progress: ${{ github.ref_name != 'main' }}
26+
27+
name: Build and Test - ${{ matrix.framework }}
28+
env:
29+
CONTAINER_ID: test_${{ github.run_id }}_${{ github.run_attempt }}_${{ github.job }}_${{ matrix.framework }}
30+
PYTEST_XML_FILE: pytest_test_report.xml
31+
FRAMEWORK: ${{ matrix.framework }}
32+
TARGET: ${{ matrix.target }}
33+
PYTEST_MARKS: ${{ matrix.pytest_marks }}
34+
35+
steps:
36+
- name: Checkout repository
37+
uses: actions/checkout@v4
38+
- name: Set up Docker Buildx
39+
uses: docker/setup-buildx-action@v3
40+
- name: Login to NGC
41+
if: github.event.pull_request.head.repo.full_name == github.repository || github.event_name == 'push'
42+
run: |
43+
echo "${{ secrets.NGC_CI_ACCESS_TOKEN }}" | docker login nvcr.io -u '$oauthtoken' --password-stdin
44+
- name: Cleanup
45+
if: always()
46+
run: |
47+
docker system prune -af
48+
- name: Debug
49+
run: |
50+
lsmod | grep nvidia
51+
sudo dmesg | grep -i nvrm || true
52+
nvidia-smi
53+
- name: Build image
54+
env:
55+
GITHUB_TOKEN: ${{ secrets.CI_TOKEN }}
56+
AWS_DEFAULT_REGION: ${{ secrets.AWS_DEFAULT_REGION }}
57+
SCCACHE_S3_BUCKET: ${{ secrets.SCCACHE_S3_BUCKET }}
58+
run: |
59+
./container/build.sh --tag ${{ matrix.framework }}:latest \
60+
--target ${{ matrix.target }} \
61+
--framework ${{ matrix.framework }} \
62+
--use-sccache \
63+
--sccache-bucket "$SCCACHE_S3_BUCKET" \
64+
--sccache-region "$AWS_DEFAULT_REGION"
65+
- name: Run pytest
66+
run: |
67+
docker run --rm --gpus all -w /workspace \
68+
--name ${{ env.CONTAINER_ID }}_pytest \
69+
${{ matrix.framework }}:latest \
70+
bash -c "pytest -xsv --basetemp=/tmp --junitxml=${{ env.PYTEST_XML_FILE }} -m \"${{ env.PYTEST_MARKS }}\""

.github/workflows/build-and-test.yml renamed to .github/workflows/container-validation-dynamo.yml

Lines changed: 18 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,26 +1,19 @@
11
# SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
22
# SPDX-License-Identifier: Apache-2.0
3-
#
4-
# Licensed under the Apache License, Version 2.0 (the "License");
5-
# you may not use this file except in compliance with the License.
6-
# You may obtain a copy of the License at
7-
#
8-
# http://www.apache.org/licenses/LICENSE-2.0
9-
#
10-
# Unless required by applicable law or agreed to in writing, software
11-
# distributed under the License is distributed on an "AS IS" BASIS,
12-
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13-
# See the License for the specific language governing permissions and
14-
# limitations under the License.
153

16-
name: NVIDIA Test Github Validation
4+
name: NVIDIA Github Validation
175

186
on:
197
push:
208
branches:
219
- main
2210
pull_request:
2311

12+
# Do not cancel main branch runs
13+
concurrency:
14+
group: dynamo-build-test-${{ github.ref_name || github.run_id }}
15+
cancel-in-progress: ${{ github.ref_name != 'main' }}
16+
2417
jobs:
2518
build-test:
2619
runs-on:
@@ -53,7 +46,14 @@ jobs:
5346
docker compose up -d nats-server etcd-server
5447
- name: Run Rust checks (block-manager + integration tests)
5548
run: |
56-
docker run -v ${{ github.workspace }}:/workspace -w /workspace/lib/llm --name ${{ env.CONTAINER_ID }}_rust_checks ${{ steps.define_image_tag.outputs.image_tag }} bash -ec 'rustup component add rustfmt clippy && cargo fmt -- --check && cargo clippy --features block-manager --no-deps --all-targets -- -D warnings && cargo test --locked --all-targets --features=block-manager && cargo test --locked --features integration -- --nocapture'
49+
docker run --rm -v ${{ github.workspace }}:/workspace -w /workspace/lib/llm \
50+
--name ${{ env.CONTAINER_ID }}_rust_checks \
51+
${{ steps.define_image_tag.outputs.image_tag }} \
52+
bash -ec 'rustup component add rustfmt clippy && \
53+
cargo fmt -- --check && \
54+
cargo clippy --features block-manager --no-deps --all-targets -- -D warnings && \
55+
cargo test --locked --all-targets --features=block-manager && \
56+
cargo test --locked --features integration -- --nocapture'
5757
- name: Cleanup services
5858
if: always()
5959
working-directory: ./deploy
@@ -63,7 +63,10 @@ jobs:
6363
env:
6464
PYTEST_MARKS: "pre_merge or mypy"
6565
run: |
66-
docker run -v ${{ github.workspace }}:/workspace -w /workspace --name ${{ env.CONTAINER_ID }}_pytest ${{ steps.define_image_tag.outputs.image_tag }} bash -c "pytest --basetemp=/tmp --junitxml=${{ env.PYTEST_XML_FILE }} -m \"${{ env.PYTEST_MARKS }}\""
66+
docker run -v ${{ github.workspace }}:/workspace -w /workspace \
67+
--name ${{ env.CONTAINER_ID }}_pytest \
68+
${{ steps.define_image_tag.outputs.image_tag }} \
69+
bash -c "pytest --basetemp=/tmp --junitxml=${{ env.PYTEST_XML_FILE }} -m \"${{ env.PYTEST_MARKS }}\""
6770
- name: Copy test report from test Container
6871
if: always()
6972
run: |

.github/workflows/docs-link-check.yml

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -51,11 +51,23 @@ jobs:
5151
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
5252
run: |
5353
set -euo pipefail
54+
55+
# Set offline mode for pull requests, full check for pushes to main
56+
if [[ "${{ github.event_name }}" == "pull_request" ]]; then
57+
echo "Running lychee in offline mode (internal links only) for PR check"
58+
OFFLINE_FLAG="--offline"
59+
else
60+
echo "Running lychee in full mode (all links) for main branch"
61+
OFFLINE_FLAG=""
62+
fi
63+
5464
# Run lychee against all files in repo
5565
lychee \
5666
--cache \
5767
--no-progress \
58-
--exclude-path "ATTRIBUTIONS.*" \
68+
--root-dir "${{ github.workspace }}" \
69+
--exclude-path ".*ATTRIBUTIONS.*" \
5970
--accept "200..=299, 403, 429" \
6071
--exclude-all-private --exclude 0.0.0.0 \
72+
$OFFLINE_FLAG \
6173
.

0 commit comments

Comments
 (0)