Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion components/src/dynamo/vllm/protocol.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,9 @@
from typing import Any, List, Optional

from pydantic import BaseModel, ConfigDict
from vllm.logprobs import PromptLogprobs
from vllm.outputs import CompletionOutput
from vllm.sequence import PromptLogprobs, RequestMetrics
from vllm.sequence import RequestMetrics


class MyRequestOutput(BaseModel):
Expand Down
4 changes: 2 additions & 2 deletions container/Dockerfile.vllm
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,9 @@ ARG RUNTIME_IMAGE_TAG="12.8.1-runtime-ubuntu24.04"
ARG CUDA_VERSION="12.8"

# Make sure to update the dependency version in pyproject.toml when updating this
ARG VLLM_REF="v0.10.2"
ARG VLLM_REF="v0.11.0"
# FlashInfer only respected when building vLLM from source, ie when VLLM_REF does not start with 'v' or for arm64 builds
ARG FLASHINF_REF="v0.3.0"
ARG FLASHINF_REF="v0.3.1"
ARG TORCH_BACKEND="cu128"

# If left blank, then we will fallback to vLLM defaults
Expand Down
12 changes: 5 additions & 7 deletions container/deps/vllm/install_vllm.sh
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@

set -euo pipefail

VLLM_REF="v0.10.2"
VLLM_REF="v0.11.0"

# Basic Configurations
ARCH=$(uname -m)
Expand All @@ -29,7 +29,7 @@ CUDA_VERSION="12.8" # For DEEPGEMM
# These flags are applicable when installing vLLM from source code
EDITABLE=true
VLLM_GIT_URL="https://github.com/vllm-project/vllm.git"
FLASHINF_REF="v0.3.0"
FLASHINF_REF="v0.3.1"

while [[ $# -gt 0 ]]; do
case $1 in
Expand Down Expand Up @@ -131,10 +131,8 @@ git clone $VLLM_GIT_URL vllm
cd vllm
git checkout $VLLM_REF

# TODO remove in future vLLM release, re-instate ignore torch script
# https://github.com/vllm-project/vllm/pull/24729
GIT_COMMITTER_NAME="Container Build" GIT_COMMITTER_EMAIL="[email protected]" git cherry-pick 740f064

# TODO leave this here in case we need to do cherry-picks in future
# GIT_COMMITTER_NAME="Container Build" GIT_COMMITTER_EMAIL="[email protected]" git cherry-pick 740f064

echo "\n=== Installing vLLM & FlashInfer ==="

Expand Down Expand Up @@ -243,4 +241,4 @@ echo "\n=== Installing EP Kernels (PPLX and DeepEP) ==="
cd ep_kernels/
TORCH_CUDA_ARCH_LIST="$TORCH_CUDA_ARCH_LIST" bash install_python_libraries.sh

echo "\n✅ All installations completed successfully!"
echo "\n✅ All installations completed successfully!"
Loading