Skip to content

Commit 5f57ea5

Browse files
chore: Finish vllm upgrade to 0.10.1 + cleanup (#2528)
1 parent 07cfc3a commit 5f57ea5

File tree

4 files changed

+18
-15
lines changed

4 files changed

+18
-15
lines changed

components/backends/vllm/src/dynamo/vllm/args.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -170,7 +170,7 @@ async def configure_ports_with_etcd(config: Config, etcd_client):
170170
logger.info(f"Allocated ZMQ KV events port: {kv_port} (worker_id={worker_id})")
171171

172172
# Allocate side channel ports
173-
# https://github.com/vllm-project/vllm/blob/releases/v0.10.0/vllm/distributed/kv_transfer/kv_connector/v1/nixl_connector.py#L372
173+
# https://github.com/vllm-project/vllm/blob/releases/v0.10.1/vllm/distributed/kv_transfer/kv_connector/v1/nixl_connector.py#L443
174174
# NIXL calculates ports as: base_port + (dp_rank * tp_size) + tp_rank
175175
# For dp_rank, we need to reserve tp_size consecutive ports
176176
tp_size = config.engine_args.tensor_parallel_size or 1

container/Dockerfile.vllm

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -13,15 +13,15 @@ ARG RUNTIME_IMAGE="nvcr.io/nvidia/cuda"
1313
ARG RUNTIME_IMAGE_TAG="12.8.1-runtime-ubuntu24.04"
1414

1515
# Make sure to update the dependency version in pyproject.toml when updating this
16-
ARG VLLM_REF="77a6bf07aedf132aad2b6719f6d87abc5d3311ab"
16+
ARG VLLM_REF="aab549870df50edf0512f0a59b574f692f546465" # from v0.10.1
1717
ARG TORCH_BACKEND="cu128"
1818

19-
# Match 0.10.0 vLLM release
20-
# https://github.com/vllm-project/vllm/releases/tag/v0.10.0
19+
# Match 0.10.1 vLLM release
20+
# https://github.com/vllm-project/vllm/releases/tag/v0.10.1
2121
# Pinned to commit before https://github.com/deepseek-ai/DeepGEMM/pull/112 for DeepGEMM which seems to break on H100:
2222
# "RuntimeError: Failed: CUDA runtime error csrc/jit/kernel_runtime.hpp:108 '98'"
2323
ARG DEEPGEMM_REF="f85ec64"
24-
ARG FLASHINF_REF="v0.2.8rc1"
24+
ARG FLASHINF_REF="v0.2.11"
2525

2626
# Define general architecture ARGs for supporting both x86 and aarch64 builds.
2727
# ARCH: Used for package suffixes (e.g., amd64, arm64)

container/deps/vllm/install_vllm.sh

Lines changed: 12 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -20,13 +20,16 @@ set -euo pipefail
2020

2121
# Parse arguments
2222
EDITABLE=true
23-
VLLM_REF="77a6bf07aedf132aad2b6719f6d87abc5d3311ab"
23+
VLLM_REF="aab549870df50edf0512f0a59b574f692f546465" # from v0.10.1
24+
# When updating above VLLM_REF make sure precompiled wheel file URL is correct. Run this command:
25+
# aws s3 ls s3://vllm-wheels/${VLLM_REF}/ --region us-west-2 --no-sign-request
26+
VLLM_PRECOMPILED_WHEEL_LOCATION="https://vllm-wheels.s3.us-west-2.amazonaws.com/${VLLM_REF}/vllm-0.10.1-cp38-abi3-manylinux1_x86_64.whl"
2427
VLLM_GIT_URL="https://github.com/vllm-project/vllm.git"
2528
MAX_JOBS=16
2629
INSTALLATION_DIR=/tmp
2730
ARCH=$(uname -m)
2831
DEEPGEMM_REF="f85ec64"
29-
FLASHINF_REF="v0.2.8rc1"
32+
FLASHINF_REF="v0.2.11"
3033
TORCH_BACKEND="cu128"
3134

3235
# Convert x86_64 to amd64 for consistency with Docker ARG
@@ -83,13 +86,13 @@ while [[ $# -gt 0 ]]; do
8386
echo "Options:"
8487
echo " --editable Install vllm in editable mode (default)"
8588
echo " --no-editable Install vllm in non-editable mode"
86-
echo " --vllm-ref REF Git reference to checkout (default: f4135232b9a8c4845f8961fb1cd17581c56ae2ce)"
87-
echo " --max-jobs NUM Maximum number of parallel jobs (default: 16)"
89+
echo f" --vllm-ref REF Git reference to checkout (default: ${VLLM_REF})"
90+
echo f" --max-jobs NUM Maximum number of parallel jobs (default: ${MAX_JOBS})"
8891
echo " --arch ARCH Architecture (amd64|arm64, default: auto-detect)"
89-
echo " --installation-dir DIR Directory to install vllm (default: /tmp/vllm)"
90-
echo " --deepgemm-ref REF Git reference for DeepGEMM (default: 1876566)"
91-
echo " --flashinf-ref REF Git reference for Flash Infer (default: v0.2.8rc1)"
92-
echo " --torch-backend BACKEND Torch backend to use (default: cu128)"
92+
echo f" --installation-dir DIR Directory to install vllm (default: ${INSTALLATION_DIR})"
93+
echo f" --deepgemm-ref REF Git reference for DeepGEMM (default: ${DEEPGEMM_REF})"
94+
echo f" --flashinf-ref REF Git reference for Flash Infer (default: ${FLASHINF_REF})"
95+
echo f" --torch-backend BACKEND Torch backend to use (default: ${TORCH_BACKEND})"
9396
exit 0
9497
;;
9598
*)
@@ -154,7 +157,7 @@ else
154157
exit 1
155158
fi
156159

157-
export VLLM_PRECOMPILED_WHEEL_LOCATION=https://vllm-wheels.s3.us-west-2.amazonaws.com/${VLLM_REF}/vllm-1.0.0.dev-cp38-abi3-manylinux1_x86_64.whl
160+
export VLLM_PRECOMPILED_WHEEL_LOCATION="${VLLM_PRECOMPILED_WHEEL_LOCATION}"
158161

159162
if [ "$EDITABLE" = "true" ]; then
160163
uv pip install -e . --torch-backend=$TORCH_BACKEND

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,7 @@ trtllm =[
5656
vllm = [
5757
"uvloop",
5858
"nixl<=0.4.1",
59-
"vllm==0.10.0",
59+
"vllm[flashinfer]==0.10.1",
6060
]
6161

6262
sglang = [

0 commit comments

Comments
 (0)