Skip to content

Commit 87a31c2

Browse files
author
Ylang Tsou
committed
Add Dockerfile to verify vllm-tpu wheel
Signed-off-by: Ylang Tsou <[email protected]>
1 parent cf4f845 commit 87a31c2

File tree

5 files changed

+288
-3
lines changed

5 files changed

+288
-3
lines changed

.buildkite/pipeline_test_pypi.yml

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
steps:
2+
# -----------------------------------------------------------------
3+
# TEST STEPS - Calling wrapper
4+
# -----------------------------------------------------------------
5+
- label: "Performance benchmarks for meta-llama/Llama-3.1-8B-Instruct"
6+
key: "meta-llama_Llama-3_1-8B-Instruct_Benchmark"
7+
agents:
8+
queue: tpu_v6e_queue
9+
env:
10+
TEST_MODEL: meta-llama/Llama-3.1-8B-Instruct
11+
TENSOR_PARALLEL_SIZE: 1
12+
MINIMUM_THROUGHPUT_THRESHOLD: 10.77
13+
INPUT_LEN: 1800
14+
OUTPUT_LEN: 128
15+
PREFIX_LEN: 0
16+
MAX_MODEL_LEN: 2048
17+
MAX_NUM_SEQS: 256
18+
MAX_NUM_BATCHED_TOKENS: 1024
19+
commands:
20+
- |
21+
.buildkite/scripts/run_with_pypi.sh bash /workspace/tpu_inference/tests/e2e/benchmarking/benchmark.sh
22+
23+
- label: "Performance benchmarks for Qwen/Qwen3-4B"
24+
key: "Qwen_Qwen3-4B_Benchmark"
25+
agents:
26+
queue: tpu_v6e_queue
27+
env:
28+
TEST_MODEL: Qwen/Qwen3-4B
29+
TENSOR_PARALLEL_SIZE: 1
30+
MINIMUM_THROUGHPUT_THRESHOLD: 11.00
31+
INPUT_LEN: 1800
32+
OUTPUT_LEN: 128
33+
PREFIX_LEN: 0
34+
MAX_MODEL_LEN: 2048
35+
MAX_NUM_SEQS: 94
36+
MAX_NUM_BATCHED_TOKENS: 4096
37+
commands:
38+
- |
39+
.buildkite/scripts/run_with_pypi.sh bash /workspace/tpu_inference/tests/e2e/benchmarking/benchmark.sh

.buildkite/scripts/bootstrap.sh

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -24,10 +24,10 @@ upload_pipeline() {
2424
VLLM_COMMIT_HASH=$(git ls-remote https://github.com/vllm-project/vllm.git HEAD | awk '{ print $1}')
2525
buildkite-agent meta-data set "VLLM_COMMIT_HASH" "${VLLM_COMMIT_HASH}"
2626
echo "Using vllm commit hash: $(buildkite-agent meta-data get "VLLM_COMMIT_HASH")"
27-
buildkite-agent pipeline upload .buildkite/pipeline_jax.yml
27+
buildkite-agent pipeline upload .buildkite/pipeline_test_pypi.yml
2828
# buildkite-agent pipeline upload .buildkite/pipeline_torch.yml
29-
buildkite-agent pipeline upload .buildkite/main.yml
30-
buildkite-agent pipeline upload .buildkite/nightly_releases.yml
29+
# buildkite-agent pipeline upload .buildkite/main.yml
30+
# buildkite-agent pipeline upload .buildkite/nightly_releases.yml
3131
}
3232

3333
echo "--- Starting Buildkite Bootstrap ---"
Lines changed: 93 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,93 @@
1+
#!/bin/bash
2+
3+
set -e
4+
5+
# --- Script Configuration ---
6+
TPU_INFERENCE_VERSION=$1
7+
VLLM_TPU_VERSION=$2
8+
VLLM_BRANCH=${3:-"main"}
9+
VLLM_REPO="https://github.com/vllm-project/vllm.git"
10+
REPO_DIR="vllm"
11+
12+
# --- Argument Validation ---
13+
if [ "$#" -lt 2 ]; then
14+
echo "Usage: $0 <tpu-inference-version> <vllm-tpu-version> [vllm-branch-or-tag]"
15+
echo " [vllm-branch-or-tag] is optional, defaults to 'main'."
16+
exit 1
17+
fi
18+
19+
echo "--- Starting vLLM-TPU wheel build ---"
20+
echo "TPU Inference Version: ${TPU_INFERENCE_VERSION}"
21+
echo "vLLM-TPU Version: ${VLLM_TPU_VERSION}"
22+
echo "vLLM Branch/Tag: ${VLLM_BRANCH}"
23+
24+
# --- Step 1: Clone vLLM repository ---
25+
if [ -d "$REPO_DIR" ]; then
26+
echo "Repository '$REPO_DIR' already exists. Skipping clone."
27+
else
28+
echo "Cloning vLLM repository..."
29+
git clone ${VLLM_REPO}
30+
fi
31+
cd ${REPO_DIR}
32+
33+
# --- Step 1.5: Checkout the specified vLLM branch/tag ---
34+
echo "Checking out vLLM branch/tag: ${VLLM_BRANCH}..."
35+
if ! git checkout "${VLLM_BRANCH}"; then
36+
echo "ERROR: Failed to checkout branch/tag '${VLLM_BRANCH}'. Please check the branch/tag name."
37+
exit 1
38+
fi
39+
echo "Successfully checked out ${VLLM_BRANCH}."
40+
git pull || echo "Warning: Failed to pull updates (may be on a tag)."
41+
42+
# --- Step 2: Update tpu-inference version in requirements ---
43+
REQUIRED_LINE="tpu-inference==${TPU_INFERENCE_VERSION}"
44+
REQUIREMENTS_FILE="requirements/tpu.txt"
45+
BACKUP_FILE="${REQUIREMENTS_FILE}.bak"
46+
47+
echo "Updating tpu-inference version in $REQUIREMENTS_FILE..."
48+
49+
if [ -f "$REQUIREMENTS_FILE" ]; then
50+
# Check if the last character is NOT a newline. If not, append one.
51+
if [ "$(tail -c 1 "$REQUIREMENTS_FILE")" != "" ]; then
52+
echo "" >> "$REQUIREMENTS_FILE"
53+
echo "(Action: Added missing newline to the end of $REQUIREMENTS_FILE for safety.)"
54+
fi
55+
fi
56+
57+
if grep -q "^tpu-inference==" "$REQUIREMENTS_FILE"; then
58+
# Replace the existing version using sed, which creates the .bak file
59+
echo "(Action: Existing version found. Replacing.)"
60+
sed -i.bak "s/^tpu-inference==.*/$REQUIRED_LINE/" "$REQUIREMENTS_FILE"
61+
62+
else
63+
# Line not found -> Append the new line to the file end, and manually create .bak
64+
echo "(Action: Line not found. Appending new dependency.)"
65+
echo "$REQUIRED_LINE" >> "$REQUIREMENTS_FILE"
66+
67+
# Create an empty .bak file for consistency, so cleanup works later.
68+
touch "$BACKUP_FILE"
69+
fi
70+
71+
# --- Step 3: Execute the vLLM TPU build script ---
72+
echo "Ensuring 'build' package is installed..."
73+
pip install build
74+
echo "Executing the vLLM TPU build script..."
75+
bash tools/vllm-tpu/build.sh "${VLLM_TPU_VERSION}"
76+
77+
echo "--- Build complete! ---"
78+
echo "The wheel file can be found in the 'vllm/dist' directory."
79+
80+
# --- Step 4: Cleanup and Revert Requirements File ---
81+
echo "--- Cleaning up local changes ---"
82+
83+
if [ -f "$BACKUP_FILE" ]; then
84+
echo "Reverting $REQUIREMENTS_FILE from backup."
85+
# Remove the modified file
86+
rm -f "$REQUIREMENTS_FILE"
87+
# Rename the backup file back to the original name
88+
mv "$BACKUP_FILE" "$REQUIREMENTS_FILE"
89+
else
90+
echo "Warning: Backup file $BACKUP_FILE not found. Skipping revert."
91+
fi
92+
93+
echo "Cleanup complete. Script finished."
Lines changed: 123 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,123 @@
1+
#!/bin/bash
2+
#
3+
# .buildkite/run_with_pypi.sh
4+
# ---------------------------
5+
6+
# Exit on error, exit on unset variable, fail on pipe errors.
7+
set -euo pipefail
8+
9+
if [ "$#" -eq 0 ]; then
10+
echo "ERROR: Usage: $0 <command_and_args_to_run_with_pypi...>"
11+
exit 1
12+
fi
13+
14+
ENV_VARS=(
15+
-e TEST_MODEL="${TEST_MODEL:-}"
16+
-e MINIMUM_ACCURACY_THRESHOLD="${MINIMUM_ACCURACY_THRESHOLD:-}"
17+
-e MINIMUM_THROUGHPUT_THRESHOLD="${MINIMUM_THROUGHPUT_THRESHOLD:-}"
18+
-e TENSOR_PARALLEL_SIZE="${TENSOR_PARALLEL_SIZE:-}"
19+
-e INPUT_LEN="${INPUT_LEN:-}"
20+
-e OUTPUT_LEN="${OUTPUT_LEN:-}"
21+
-e PREFIX_LEN="${PREFIX_LEN:-}"
22+
-e MAX_MODEL_LEN="${MAX_MODEL_LEN:-}"
23+
-e MAX_NUM_SEQS="${MAX_NUM_SEQS:-}"
24+
-e MAX_NUM_BATCHED_TOKENS="${MAX_NUM_BATCHED_TOKENS:-}"
25+
)
26+
27+
if ! grep -q "^HF_TOKEN=" /etc/environment; then
28+
gcloud secrets versions access latest --secret=bm-agent-hf-token --quiet | \
29+
sudo tee -a /etc/environment > /dev/null <<< "HF_TOKEN=$(cat)"
30+
echo "Added HF_TOKEN to /etc/environment."
31+
else
32+
echo "HF_TOKEN already exists in /etc/environment."
33+
fi
34+
35+
# shellcheck disable=1091
36+
source /etc/environment
37+
38+
if [ -z "${BUILDKITE_COMMIT:-}" ]; then
39+
echo "ERROR: BUILDKITE_COMMIT environment variable is not set." >&2
40+
echo "This script expects BUILDKITE_COMMIT to tag the Docker image." >&2
41+
exit 1
42+
fi
43+
44+
if [ -z "${MODEL_IMPL_TYPE:-}" ]; then
45+
MODEL_IMPL_TYPE=flax_nnx
46+
fi
47+
48+
# Try to cache HF models
49+
persist_cache_dir="/mnt/disks/persist/models"
50+
51+
if ( mkdir -p "$persist_cache_dir" ); then
52+
LOCAL_HF_HOME="$persist_cache_dir"
53+
else
54+
echo "Error: Failed to create $persist_cache_dir"
55+
exit 1
56+
fi
57+
DOCKER_HF_HOME="/tmp/hf_home"
58+
59+
# (TODO): Consider creating a remote registry to cache and share between agents.
60+
# Subsequent builds on the same host should be cached.
61+
62+
# Cleanup of existing containers and images.
63+
echo "Starting cleanup for vllm-tpu..."
64+
# Get all unique image IDs for the repository 'vllm-tpu'
65+
old_images=$(docker images vllm-tpu -q | uniq)
66+
total_containers=""
67+
68+
if [ -n "$old_images" ]; then
69+
echo "Found old vllm-tpu images. Checking for dependent containers..."
70+
# Loop through each image ID and find any containers (running or not) using it.
71+
for img_id in $old_images; do
72+
total_containers="$total_containers $(docker ps -a -q --filter "ancestor=$img_id")"
73+
done
74+
75+
# Remove any found containers
76+
if [ -n "$total_containers" ]; then
77+
echo "Removing leftover containers using vllm-tpu image(s)..."
78+
echo "$total_containers" | xargs -n1 | sort -u | xargs -r docker rm -f
79+
fi
80+
81+
echo "Removing old vllm-tpu image(s)..."
82+
docker rmi -f "$old_images"
83+
else
84+
echo "No vllm-tpu images found to clean up."
85+
fi
86+
87+
echo "Pruning old Docker build cache..."
88+
docker builder prune -f
89+
90+
echo "Cleanup complete."
91+
92+
echo "Installing Python dependencies"
93+
python3 -m pip install --progress-bar off buildkite-test-collector==0.1.9
94+
echo "Python dependencies installed"
95+
96+
97+
echo "--- Displaying current disk usage (df -h) ---"
98+
df -h
99+
echo "-----------------------------------------------"
100+
101+
IMAGE_NAME="vllm-tpu"
102+
docker build --no-cache -f docker/Dockerfile.pypi -t "${IMAGE_NAME}:${BUILDKITE_COMMIT}" .
103+
104+
exec docker run \
105+
--privileged \
106+
--net host \
107+
--shm-size=16G \
108+
--rm \
109+
-v "$LOCAL_HF_HOME":"$DOCKER_HF_HOME" \
110+
"${ENV_VARS[@]}" \
111+
-e HF_HOME="$DOCKER_HF_HOME" \
112+
-e MODEL_IMPL_TYPE="$MODEL_IMPL_TYPE" \
113+
-e HF_TOKEN="$HF_TOKEN" \
114+
-e VLLM_XLA_CACHE_PATH="$DOCKER_HF_HOME/.cache/jax_cache" \
115+
-e VLLM_XLA_CHECK_RECOMPILATION=1 \
116+
${QUANTIZATION:+-e QUANTIZATION="$QUANTIZATION"} \
117+
${NEW_MODEL_DESIGN:+-e NEW_MODEL_DESIGN="$NEW_MODEL_DESIGN"} \
118+
${USE_V6E8_QUEUE:+-e USE_V6E8_QUEUE="$USE_V6E8_QUEUE"} \
119+
${SKIP_ACCURACY_TESTS:+-e SKIP_ACCURACY_TESTS="$SKIP_ACCURACY_TESTS"} \
120+
${VLLM_MLA_DISABLE:+-e VLLM_MLA_DISABLE="$VLLM_MLA_DISABLE"} \
121+
"${IMAGE_NAME}:${BUILDKITE_COMMIT}" \
122+
"$@" # Pass all script arguments as the command to run in the container
123+
echo "docker run complete"

docker/Dockerfile.pypi

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
ARG NIGHTLY_DATE="20250714"
2+
ARG BASE_IMAGE="us-central1-docker.pkg.dev/tpu-pytorch-releases/docker/xla:nightly_3.12_tpuvm_$NIGHTLY_DATE"
3+
4+
FROM $BASE_IMAGE
5+
6+
# Remove existing versions of dependencies
7+
RUN pip uninstall -y torch torch_xla torchvision
8+
9+
# Install some basic utilities
10+
RUN apt-get update && apt-get install -y \
11+
git \
12+
libopenblas-base libopenmpi-dev libomp-dev
13+
14+
# Install tpu_inference
15+
WORKDIR /workspace/tpu_inference
16+
COPY . .
17+
RUN export TPU_INFERENCE_VERSION=$(pip index versions tpu-inference --pre 2>/dev/null | grep -oE "[0-9]+\.[0-9]+\.[0-9]+\.dev[0-9]+" | head -n 1) && \
18+
echo -n "${TPU_INFERENCE_VERSION}" > /tmp/tpu_inference_version
19+
20+
# Build vllm-tpu wheel
21+
WORKDIR /workspace
22+
RUN export VLLM_TPU_VERSION=$(cat /tmp/tpu_inference_version) && \
23+
bash tpu_inference/.buildkite/scripts/build_vllm_tpu.sh ${VLLM_TPU_VERSION} ${VLLM_TPU_VERSION}
24+
25+
# Install vllm-tpu wheel
26+
WORKDIR /workspace/vllm
27+
RUN pip install --no-cache-dir dist/*.whl
28+
29+
30+
CMD ["/bin/bash"]

0 commit comments

Comments
 (0)