@@ -20,13 +20,16 @@ set -euo pipefail
20
20
21
21
# Parse arguments
22
22
EDITABLE=true
23
- VLLM_REF=" 77a6bf07aedf132aad2b6719f6d87abc5d3311ab"
23
+ VLLM_REF=" aab549870df50edf0512f0a59b574f692f546465" # from v0.10.1
24
+ # When updating above VLLM_REF make sure precompiled wheel file URL is correct. Run this command:
25
+ # aws s3 ls s3://vllm-wheels/${VLLM_REF}/ --region us-west-2 --no-sign-request
26
+ VLLM_PRECOMPILED_WHEEL_LOCATION=" https://vllm-wheels.s3.us-west-2.amazonaws.com/${VLLM_REF} /vllm-0.10.1-cp38-abi3-manylinux1_x86_64.whl"
24
27
VLLM_GIT_URL=" https://github.com/vllm-project/vllm.git"
25
28
MAX_JOBS=16
26
29
INSTALLATION_DIR=/tmp
27
30
ARCH=$( uname -m)
28
31
DEEPGEMM_REF=" f85ec64"
29
- FLASHINF_REF=" v0.2.8rc1 "
32
+ FLASHINF_REF=" v0.2.11 "
30
33
TORCH_BACKEND=" cu128"
31
34
32
35
# Convert x86_64 to amd64 for consistency with Docker ARG
@@ -83,13 +86,13 @@ while [[ $# -gt 0 ]]; do
83
86
echo " Options:"
84
87
echo " --editable Install vllm in editable mode (default)"
85
88
echo " --no-editable Install vllm in non-editable mode"
86
- echo " --vllm-ref REF Git reference to checkout (default: f4135232b9a8c4845f8961fb1cd17581c56ae2ce )"
87
- echo " --max-jobs NUM Maximum number of parallel jobs (default: 16 )"
89
+ echo f " --vllm-ref REF Git reference to checkout (default: ${VLLM_REF} )"
90
+ echo f " --max-jobs NUM Maximum number of parallel jobs (default: ${MAX_JOBS} )"
88
91
echo " --arch ARCH Architecture (amd64|arm64, default: auto-detect)"
89
- echo " --installation-dir DIR Directory to install vllm (default: /tmp/vllm )"
90
- echo " --deepgemm-ref REF Git reference for DeepGEMM (default: 1876566 )"
91
- echo " --flashinf-ref REF Git reference for Flash Infer (default: v0.2.8rc1 )"
92
- echo " --torch-backend BACKEND Torch backend to use (default: cu128 )"
92
+ echo f " --installation-dir DIR Directory to install vllm (default: ${INSTALLATION_DIR} )"
93
+ echo f " --deepgemm-ref REF Git reference for DeepGEMM (default: ${DEEPGEMM_REF} )"
94
+ echo f " --flashinf-ref REF Git reference for Flash Infer (default: ${FLASHINF_REF} )"
95
+ echo f " --torch-backend BACKEND Torch backend to use (default: ${TORCH_BACKEND} )"
93
96
exit 0
94
97
;;
95
98
* )
154
157
exit 1
155
158
fi
156
159
157
- export VLLM_PRECOMPILED_WHEEL_LOCATION=https://vllm-wheels.s3.us-west-2.amazonaws.com/ ${VLLM_REF} /vllm-1.0.0.dev-cp38-abi3-manylinux1_x86_64.whl
160
+ export VLLM_PRECOMPILED_WHEEL_LOCATION=" ${VLLM_PRECOMPILED_WHEEL_LOCATION} "
158
161
159
162
if [ " $EDITABLE " = " true" ]; then
160
163
uv pip install -e . --torch-backend=$TORCH_BACKEND
0 commit comments