Skip to content

Commit 1bed7e4

Browse files
author
Ylang Tsou
committed
Add workflow to build vLLM-TPU wheel using PyPI tpu-inference
Signed-off-by: Ylang Tsou <[email protected]>
1 parent ca0914d commit 1bed7e4

File tree

6 files changed

+202
-1
lines changed

6 files changed

+202
-1
lines changed

.buildkite/pipeline_pypi.yml

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
steps:
2+
# -----------------------------------------------------------------
3+
# TEST STEPS - Calling wrapper
4+
# -----------------------------------------------------------------
5+
- label: "Wait for 20 mins"
6+
if: build.env("NIGHTLY") == "1"
7+
key: "wait_20_minutes"
8+
depends_on: "record_verified_commit_hashes"
9+
agents:
10+
queue: cpu
11+
commands:
12+
- "echo 'Starting 20 minute delay...'"
13+
- "sleep 1200"
14+
- "echo 'Delay finished, starting benchmarks.'"
15+
16+
- label: "Performance benchmarks for meta-llama/Llama-3.1-8B-Instruct"
17+
key: "meta-llama_Llama-3_1-8B-Instruct_Benchmark"
18+
if: build.env("NIGHTLY") == "1"
19+
depends_on: "wait_20_minutes"
20+
agents:
21+
queue: tpu_v6e_queue
22+
env:
23+
TEST_MODEL: meta-llama/Llama-3.1-8B-Instruct
24+
TENSOR_PARALLEL_SIZE: 1
25+
MINIMUM_THROUGHPUT_THRESHOLD: 10.77
26+
INPUT_LEN: 1800
27+
OUTPUT_LEN: 128
28+
PREFIX_LEN: 0
29+
MAX_MODEL_LEN: 2048
30+
MAX_NUM_SEQS: 256
31+
MAX_NUM_BATCHED_TOKENS: 1024
32+
commands:
33+
- |
34+
.buildkite/scripts/run_with_pypi.sh bash /workspace/tpu_inference/tests/e2e/benchmarking/benchmark.sh
35+
36+
- label: "Performance benchmarks for Qwen/Qwen3-4B"
37+
if: build.env("NIGHTLY") == "1"
38+
key: "Qwen_Qwen3-4B_Benchmark"
39+
depends_on: "wait_20_minutes"
40+
agents:
41+
queue: tpu_v6e_queue
42+
env:
43+
TEST_MODEL: Qwen/Qwen3-4B
44+
TENSOR_PARALLEL_SIZE: 1
45+
MINIMUM_THROUGHPUT_THRESHOLD: 11.00
46+
INPUT_LEN: 1800
47+
OUTPUT_LEN: 128
48+
PREFIX_LEN: 0
49+
MAX_MODEL_LEN: 2048
50+
MAX_NUM_SEQS: 94
51+
MAX_NUM_BATCHED_TOKENS: 4096
52+
commands:
53+
- |
54+
.buildkite/scripts/run_with_pypi.sh bash /workspace/tpu_inference/tests/e2e/benchmarking/benchmark.sh

.buildkite/scripts/bootstrap.sh

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ upload_pipeline() {
2929
# buildkite-agent pipeline upload .buildkite/pipeline_torch.yml
3030
buildkite-agent pipeline upload .buildkite/main.yml
3131
buildkite-agent pipeline upload .buildkite/nightly_releases.yml
32+
buildkite-agent pipeline upload .buildkite/pipeline_pypi.yml
3233
}
3334

3435
echo "--- Starting Buildkite Bootstrap ---"
Lines changed: 93 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,93 @@
1+
#!/bin/bash
2+
3+
set -e
4+
5+
# --- Script Configuration ---
6+
TPU_INFERENCE_VERSION=$1
7+
VLLM_TPU_VERSION=$2
8+
VLLM_BRANCH=${3:-"main"}
9+
VLLM_REPO="https://github.com/vllm-project/vllm.git"
10+
REPO_DIR="vllm"
11+
12+
# --- Argument Validation ---
13+
if [ "$#" -lt 2 ]; then
14+
echo "Usage: $0 <tpu-inference-version> <vllm-tpu-version> [vllm-branch-or-tag]"
15+
echo " [vllm-branch-or-tag] is optional, defaults to 'main'."
16+
exit 1
17+
fi
18+
19+
echo "--- Starting vLLM-TPU wheel build ---"
20+
echo "TPU Inference Version: ${TPU_INFERENCE_VERSION}"
21+
echo "vLLM-TPU Version: ${VLLM_TPU_VERSION}"
22+
echo "vLLM Branch/Tag: ${VLLM_BRANCH}"
23+
24+
# --- Step 1: Clone vLLM repository ---
25+
if [ -d "$REPO_DIR" ]; then
26+
echo "Repository '$REPO_DIR' already exists. Skipping clone."
27+
else
28+
echo "Cloning vLLM repository..."
29+
git clone ${VLLM_REPO}
30+
fi
31+
cd ${REPO_DIR}
32+
33+
# --- Step 1.5: Checkout the specified vLLM branch/tag ---
34+
echo "Checking out vLLM branch/tag: ${VLLM_BRANCH}..."
35+
if ! git checkout "${VLLM_BRANCH}"; then
36+
echo "ERROR: Failed to checkout branch/tag '${VLLM_BRANCH}'. Please check the branch/tag name."
37+
exit 1
38+
fi
39+
echo "Successfully checked out ${VLLM_BRANCH}."
40+
git pull || echo "Warning: Failed to pull updates (may be on a tag)."
41+
42+
# --- Step 2: Update tpu-inference version in requirements ---
43+
REQUIRED_LINE="tpu-inference==${TPU_INFERENCE_VERSION}"
44+
REQUIREMENTS_FILE="requirements/tpu.txt"
45+
BACKUP_FILE="${REQUIREMENTS_FILE}.bak"
46+
47+
echo "Updating tpu-inference version in $REQUIREMENTS_FILE..."
48+
49+
if [ -f "$REQUIREMENTS_FILE" ]; then
50+
# Check if the last character is NOT a newline. If not, append one.
51+
if [ "$(tail -c 1 "$REQUIREMENTS_FILE")" != "" ]; then
52+
echo "" >> "$REQUIREMENTS_FILE"
53+
echo "(Action: Added missing newline to the end of $REQUIREMENTS_FILE for safety.)"
54+
fi
55+
fi
56+
57+
if grep -q "^tpu-inference==" "$REQUIREMENTS_FILE"; then
58+
# Replace the existing version using sed, which creates the .bak file
59+
echo "(Action: Existing version found. Replacing.)"
60+
sed -i.bak "s/^tpu-inference==.*/$REQUIRED_LINE/" "$REQUIREMENTS_FILE"
61+
62+
else
63+
# Line not found -> Append the new line to the file end, and manually create .bak
64+
echo "(Action: Line not found. Appending new dependency.)"
65+
echo "$REQUIRED_LINE" >> "$REQUIREMENTS_FILE"
66+
67+
# Create an empty .bak file for consistency, so cleanup works later.
68+
touch "$BACKUP_FILE"
69+
fi
70+
71+
# --- Step 3: Execute the vLLM TPU build script ---
72+
echo "Ensuring 'build' package is installed..."
73+
pip install build
74+
echo "Executing the vLLM TPU build script..."
75+
bash tools/vllm-tpu/build.sh "${VLLM_TPU_VERSION}"
76+
77+
echo "--- Build complete! ---"
78+
echo "The wheel file can be found in the 'vllm/dist' directory."
79+
80+
# --- Step 4: Cleanup and Revert Requirements File ---
81+
echo "--- Cleaning up local changes ---"
82+
83+
if [ -f "$BACKUP_FILE" ]; then
84+
echo "Reverting $REQUIREMENTS_FILE from backup."
85+
# Remove the modified file
86+
rm -f "$REQUIREMENTS_FILE"
87+
# Rename the backup file back to the original name
88+
mv "$BACKUP_FILE" "$REQUIREMENTS_FILE"
89+
else
90+
echo "Warning: Backup file $BACKUP_FILE not found. Skipping revert."
91+
fi
92+
93+
echo "Cleanup complete. Script finished."
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
#!/bin/bash
2+
# Exit on error, exit on unset variable, fail on pipe errors.
3+
set -euo pipefail
4+
5+
# Build vllm-tpu with nightly tpu-inference from PyPI (using docker/Dockerfile.pypi instead of docker/Dockerfile).
6+
export RUN_WITH_PYPI="true"
7+
SCRIPT_DIR=$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" &>/dev/null && pwd)
8+
9+
# shellcheck disable=SC1091
10+
source "$SCRIPT_DIR/run_in_docker.sh"

.buildkite/scripts/setup_docker_env.sh

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,16 @@ setup_environment() {
77
local image_name_param=${1:-"vllm-tpu"}
88
IMAGE_NAME="$image_name_param"
99

10+
local DOCKERFILE_NAME="Dockerfile"
11+
12+
# Determine whether to build from PyPI packages or source.
13+
if [[ "${RUN_WITH_PYPI:-false}" == "true" ]]; then
14+
DOCKERFILE_NAME="Dockerfile.pypi"
15+
echo "Building from PyPI packages. Using docker/${DOCKERFILE_NAME}"
16+
else
17+
echo "Building from source. Using docker/${DOCKERFILE_NAME}"
18+
fi
19+
1020
if ! grep -q "^HF_TOKEN=" /etc/environment; then
1121
gcloud secrets versions access latest --secret=bm-agent-hf-token --quiet | \
1222
sudo tee -a /etc/environment > /dev/null <<< "HF_TOKEN=$(cat)"
@@ -60,5 +70,5 @@ setup_environment() {
6070
docker build \
6171
--build-arg VLLM_COMMIT_HASH="${VLLM_COMMIT_HASH}" \
6272
--build-arg IS_FOR_V7X="${IS_FOR_V7X:-false}" \
63-
--no-cache -f docker/Dockerfile -t "${IMAGE_NAME}:${BUILDKITE_COMMIT}" .
73+
--no-cache -f docker/${DOCKERFILE_NAME} -t "${IMAGE_NAME}:${BUILDKITE_COMMIT}" .
6474
}

docker/Dockerfile.pypi

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
ARG NIGHTLY_DATE="20250714"
2+
ARG BASE_IMAGE="us-central1-docker.pkg.dev/tpu-pytorch-releases/docker/xla:nightly_3.12_tpuvm_$NIGHTLY_DATE"
3+
# The latest main will be used if arg unspecified
4+
ARG VLLM_COMMIT_HASH=""
5+
6+
FROM $BASE_IMAGE
7+
8+
# Remove existing versions of dependencies
9+
RUN pip uninstall -y torch torch_xla torchvision
10+
11+
# Install some basic utilities
12+
RUN apt-get update && apt-get install -y \
13+
git \
14+
libopenblas-base libopenmpi-dev libomp-dev
15+
16+
# Install tpu_inference
17+
WORKDIR /workspace/tpu_inference
18+
COPY requirements_benchmarking.txt .
19+
# These are needed for the E2E benchmarking tests (i.e. tests/e2e/benchmarking/mlperf.sh)
20+
RUN pip install -r requirements_benchmarking.txt --retries 3
21+
COPY . .
22+
23+
# Build vllm-tpu wheel
24+
WORKDIR /workspace
25+
ARG VLLM_COMMIT_HASH
26+
RUN TPU_INFERENCE_VERSION=$(pip index versions tpu-inference --pre 2>/dev/null | grep -oE "[0-9]+\.[0-9]+\.[0-9]+\.dev[0-9]+" | head -n 1) && VLLM_TPU_VERSION=${TPU_INFERENCE_VERSION} && \
27+
bash tpu_inference/.buildkite/scripts/build_vllm_tpu.sh ${TPU_INFERENCE_VERSION} ${VLLM_TPU_VERSION} ${VLLM_COMMIT_HASH}
28+
29+
# Install vllm-tpu wheel
30+
RUN pip install --no-cache-dir vllm/dist/*.whl
31+
32+
33+
CMD ["/bin/bash"]

0 commit comments

Comments
 (0)