Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
43 changes: 43 additions & 0 deletions .buildkite/pipeline_pypi.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
steps:
# -----------------------------------------------------------------
# TEST STEPS - Calling wrapper
# -----------------------------------------------------------------
- label: "Performance benchmarks for meta-llama/Llama-3.1-8B-Instruct"
key: "meta-llama_Llama-3_1-8B-Instruct_Benchmark"
#if: build.env("NIGHTLY") == "1"
depends_on: "record_verified_commit_hashes"
agents:
queue: tpu_v6e_queue
env:
TEST_MODEL: meta-llama/Llama-3.1-8B-Instruct
TENSOR_PARALLEL_SIZE: 1
MINIMUM_THROUGHPUT_THRESHOLD: 10.77
INPUT_LEN: 1800
OUTPUT_LEN: 128
PREFIX_LEN: 0
MAX_MODEL_LEN: 2048
MAX_NUM_SEQS: 256
MAX_NUM_BATCHED_TOKENS: 1024
commands:
- |
.buildkite/scripts/run_with_pypi.sh bash /workspace/tpu_inference/tests/e2e/benchmarking/benchmark.sh

- label: "Performance benchmarks for Qwen/Qwen3-4B"
#if: build.env("NIGHTLY") == "1"
key: "Qwen_Qwen3-4B_Benchmark"
depends_on: "record_verified_commit_hashes"
agents:
queue: tpu_v6e_queue
env:
TEST_MODEL: Qwen/Qwen3-4B
TENSOR_PARALLEL_SIZE: 1
MINIMUM_THROUGHPUT_THRESHOLD: 11.00
INPUT_LEN: 1800
OUTPUT_LEN: 128
PREFIX_LEN: 0
MAX_MODEL_LEN: 2048
MAX_NUM_SEQS: 94
MAX_NUM_BATCHED_TOKENS: 4096
commands:
- |
.buildkite/scripts/run_with_pypi.sh bash /workspace/tpu_inference/tests/e2e/benchmarking/benchmark.sh
7 changes: 4 additions & 3 deletions .buildkite/scripts/bootstrap.sh
Original file line number Diff line number Diff line change
Expand Up @@ -24,11 +24,12 @@ upload_pipeline() {
VLLM_COMMIT_HASH=$(git ls-remote https://github.com/vllm-project/vllm.git HEAD | awk '{ print $1}')
buildkite-agent meta-data set "VLLM_COMMIT_HASH" "${VLLM_COMMIT_HASH}"
echo "Using vllm commit hash: $(buildkite-agent meta-data get "VLLM_COMMIT_HASH")"
buildkite-agent pipeline upload .buildkite/pipeline_jax.yml
buildkite-agent pipeline upload .buildkite/pipeline_jax_tpu7x.yml
#buildkite-agent pipeline upload .buildkite/pipeline_jax.yml
#buildkite-agent pipeline upload .buildkite/pipeline_jax_tpu7x.yml
# buildkite-agent pipeline upload .buildkite/pipeline_torch.yml
buildkite-agent pipeline upload .buildkite/main.yml
buildkite-agent pipeline upload .buildkite/nightly_releases.yml
#buildkite-agent pipeline upload .buildkite/nightly_releases.yml
buildkite-agent pipeline upload .buildkite/pipeline_pypi.yml
}

echo "--- Starting Buildkite Bootstrap ---"
Expand Down
93 changes: 93 additions & 0 deletions .buildkite/scripts/build_vllm_tpu.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
#!/bin/bash

set -e

# --- Script Configuration ---
TPU_INFERENCE_VERSION=$1
VLLM_TPU_VERSION=$2
VLLM_BRANCH=${3:-"main"}
VLLM_REPO="https://github.com/vllm-project/vllm.git"
REPO_DIR="vllm"

# --- Argument Validation ---
if [ "$#" -lt 2 ]; then
echo "Usage: $0 <tpu-inference-version> <vllm-tpu-version> [vllm-branch-or-tag]"
echo " [vllm-branch-or-tag] is optional, defaults to 'main'."
exit 1
fi

echo "--- Starting vLLM-TPU wheel build ---"
echo "TPU Inference Version: ${TPU_INFERENCE_VERSION}"
echo "vLLM-TPU Version: ${VLLM_TPU_VERSION}"
echo "vLLM Branch/Tag: ${VLLM_BRANCH}"

# --- Step 1: Clone vLLM repository ---
if [ -d "$REPO_DIR" ]; then
echo "Repository '$REPO_DIR' already exists. Skipping clone."
else
echo "Cloning vLLM repository..."
git clone ${VLLM_REPO}
fi
cd ${REPO_DIR}

# --- Step 1.5: Checkout the specified vLLM branch/tag ---
echo "Checking out vLLM branch/tag: ${VLLM_BRANCH}..."
if ! git checkout "${VLLM_BRANCH}"; then
echo "ERROR: Failed to checkout branch/tag '${VLLM_BRANCH}'. Please check the branch/tag name."
exit 1
fi
echo "Successfully checked out ${VLLM_BRANCH}."
git pull || echo "Warning: Failed to pull updates (may be on a tag)."

# --- Step 2: Update tpu-inference version in requirements ---
REQUIRED_LINE="tpu-inference==${TPU_INFERENCE_VERSION}"
REQUIREMENTS_FILE="requirements/tpu.txt"
BACKUP_FILE="${REQUIREMENTS_FILE}.bak"

echo "Updating tpu-inference version in $REQUIREMENTS_FILE..."

if [ -f "$REQUIREMENTS_FILE" ]; then
# Check if the last character is NOT a newline. If not, append one.
if [ "$(tail -c 1 "$REQUIREMENTS_FILE")" != "" ]; then
echo "" >> "$REQUIREMENTS_FILE"
echo "(Action: Added missing newline to the end of $REQUIREMENTS_FILE for safety.)"
fi
fi

if grep -q "^tpu-inference==" "$REQUIREMENTS_FILE"; then
# Replace the existing version using sed, which creates the .bak file
echo "(Action: Existing version found. Replacing.)"
sed -i.bak "s/^tpu-inference==.*/$REQUIRED_LINE/" "$REQUIREMENTS_FILE"

else
# Line not found -> Append the new line to the file end, and manually create .bak
echo "(Action: Line not found. Appending new dependency.)"
echo "$REQUIRED_LINE" >> "$REQUIREMENTS_FILE"

# Create an empty .bak file for consistency, so cleanup works later.
touch "$BACKUP_FILE"
fi

# --- Step 3: Execute the vLLM TPU build script ---
echo "Ensuring 'build' package is installed..."
pip install build
echo "Executing the vLLM TPU build script..."
bash tools/vllm-tpu/build.sh "${VLLM_TPU_VERSION}"

echo "--- Build complete! ---"
echo "The wheel file can be found in the 'vllm/dist' directory."

# --- Step 4: Cleanup and Revert Requirements File ---
echo "--- Cleaning up local changes ---"

if [ -f "$BACKUP_FILE" ]; then
echo "Reverting $REQUIREMENTS_FILE from backup."
# Remove the modified file
rm -f "$REQUIREMENTS_FILE"
# Rename the backup file back to the original name
mv "$BACKUP_FILE" "$REQUIREMENTS_FILE"
else
echo "Warning: Backup file $BACKUP_FILE not found. Skipping revert."
fi

echo "Cleanup complete. Script finished."
46 changes: 46 additions & 0 deletions .buildkite/scripts/run_with_pypi.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
#!/bin/bash
# Exit on error, exit on unset variable, fail on pipe errors.
set -euo pipefail

# Get the nightly TPU_INFERENCE_VERSION based on the latest stable tag and current date.
LATEST_STABLE_TAG=$(git tag --sort=-v:refname | grep -E '^v[0-9]+\.[0-9]+\.[0-9]+$' | head -n 1)
BASE_VERSION=${LATEST_STABLE_TAG#v}
# TODO: Temporary logic for testing. Remove 'yesterday' before merging.
DATETIME_STR=$(date -d 'yesterday' +%Y%m%d)
TPU_INFERENCE_VERSION="${BASE_VERSION}.dev${DATETIME_STR}"

echo "Target Nightly Version: ${TPU_INFERENCE_VERSION}"

# Configuration
PACKAGE_NAME="tpu-inference"
MAX_RETRIES=20
SLEEP_SEC=60
FOUND_VERSION=false

echo "Checking PyPI for ${PACKAGE_NAME} == ${TPU_INFERENCE_VERSION}..."

# Retry logic to check if the version is available on PyPI
for ((i=1; i<=MAX_RETRIES; i++)); do
if pip index versions "${PACKAGE_NAME}" --pre 2>/dev/null | grep -q "${TPU_INFERENCE_VERSION}"; then
echo "Success! Found version ${TPU_INFERENCE_VERSION} on PyPI."
FOUND_VERSION=true
break
fi

echo "[Attempt $i/$MAX_RETRIES] Version not found yet. Waiting ${SLEEP_SEC} seconds..."
if [ "$i" -lt "$MAX_RETRIES" ]; then
sleep "$SLEEP_SEC"
fi
done

if [ "$FOUND_VERSION" = "false" ]; then
echo "The version ${TPU_INFERENCE_VERSION} was not found on PyPI."
exit 1
fi

# Build vllm-tpu with nightly tpu-inference from PyPI (using docker/Dockerfile.pypi instead of docker/Dockerfile).
export RUN_WITH_PYPI="true"
SCRIPT_DIR=$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" &>/dev/null && pwd)

# shellcheck disable=SC1091
source "$SCRIPT_DIR/run_in_docker.sh"
12 changes: 11 additions & 1 deletion .buildkite/scripts/setup_docker_env.sh
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,16 @@ setup_environment() {
local image_name_param=${1:-"vllm-tpu"}
IMAGE_NAME="$image_name_param"

local DOCKERFILE_NAME="Dockerfile"

# Determine whether to build from PyPI packages or source.
if [[ "${RUN_WITH_PYPI:-false}" == "true" ]]; then
DOCKERFILE_NAME="Dockerfile.pypi"
echo "Building from PyPI packages. Using docker/${DOCKERFILE_NAME}"
else
echo "Building from source. Using docker/${DOCKERFILE_NAME}"
fi

if ! grep -q "^HF_TOKEN=" /etc/environment; then
gcloud secrets versions access latest --secret=bm-agent-hf-token --quiet | \
sudo tee -a /etc/environment > /dev/null <<< "HF_TOKEN=$(cat)"
Expand Down Expand Up @@ -60,5 +70,5 @@ setup_environment() {
docker build \
--build-arg VLLM_COMMIT_HASH="${VLLM_COMMIT_HASH}" \
--build-arg IS_FOR_V7X="${IS_FOR_V7X:-false}" \
--no-cache -f docker/Dockerfile -t "${IMAGE_NAME}:${BUILDKITE_COMMIT}" .
--no-cache -f docker/${DOCKERFILE_NAME} -t "${IMAGE_NAME}:${BUILDKITE_COMMIT}" .
}
39 changes: 39 additions & 0 deletions docker/Dockerfile.pypi
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
ARG NIGHTLY_DATE="20250714"
ARG BASE_IMAGE="us-central1-docker.pkg.dev/tpu-pytorch-releases/docker/xla:nightly_3.12_tpuvm_$NIGHTLY_DATE"
# The latest main will be used if arg unspecified
ARG VLLM_COMMIT_HASH=""

FROM $BASE_IMAGE

ARG IS_FOR_V7X="false"

# Remove existing versions of dependencies
RUN pip uninstall -y torch torch_xla torchvision

# Install some basic utilities
RUN apt-get update && apt-get install -y \
git \
libopenblas-base libopenmpi-dev libomp-dev

# Install tpu_inference
WORKDIR /workspace/tpu_inference
COPY requirements_benchmarking.txt .
# These are needed for the E2E benchmarking tests (i.e. tests/e2e/benchmarking/mlperf.sh)
RUN --mount=type=cache,target=/root/.cache/pip pip install -r requirements_benchmarking.txt --retries 3
COPY requirements_v7x.txt .
RUN --mount=type=cache,target=/root/.cache/pip if [ "$IS_FOR_V7X" = "true" ]; then \
pip install -r requirements_v7x.txt; \
fi
COPY . .

# Build vllm-tpu wheel
WORKDIR /workspace
ARG VLLM_COMMIT_HASH
RUN TPU_INFERENCE_VERSION=$(pip index versions tpu-inference --pre 2>/dev/null | grep -oE "[0-9]+\.[0-9]+\.[0-9]+\.dev[0-9]+" | head -n 1) && VLLM_TPU_VERSION=${TPU_INFERENCE_VERSION} && \
bash tpu_inference/.buildkite/scripts/build_vllm_tpu.sh ${TPU_INFERENCE_VERSION} ${VLLM_TPU_VERSION} ${VLLM_COMMIT_HASH}

# Install vllm-tpu wheel
RUN pip install --no-cache-dir vllm/dist/*.whl


CMD ["/bin/bash"]