add check pypi version

Ylang Tsou · Ylang Tsou · commit 981ba3bda330 · 2025-12-12T11:32:41.000+08:00
Signed-off-by: Ylang Tsou &lt;ylangt@google.com&gt;
diff --git a/.buildkite/pipeline_pypi.yml b/.buildkite/pipeline_pypi.yml
@@ -2,21 +2,10 @@ steps:
   # -----------------------------------------------------------------
   # TEST STEPS - Calling wrapper
   # -----------------------------------------------------------------
-   - label: "Wait for 20 mins"
-     if: build.env("NIGHTLY") == "1"
-     key: "wait_20_minutes"
-     depends_on: "record_verified_commit_hashes"
-     agents:
-      queue: cpu
-     commands:
-      - "echo 'Starting 20 minute delay...'"
-      - "sleep 1200"
-      - "echo 'Delay finished, starting benchmarks.'"
-
    - label: "Performance benchmarks for meta-llama/Llama-3.1-8B-Instruct"
      key: "meta-llama_Llama-3_1-8B-Instruct_Benchmark"
-     if: build.env("NIGHTLY") == "1"
-     depends_on: "wait_20_minutes"
+     #if: build.env("NIGHTLY") == "1"
+     depends_on: "record_verified_commit_hashes"
      agents:
       queue: tpu_v6e_queue
      env:
@@ -34,9 +23,9 @@ steps:
         .buildkite/scripts/run_with_pypi.sh bash /workspace/tpu_inference/tests/e2e/benchmarking/benchmark.sh
 
    - label: "Performance benchmarks for Qwen/Qwen3-4B"
-     if: build.env("NIGHTLY") == "1"
+     #if: build.env("NIGHTLY") == "1"
      key: "Qwen_Qwen3-4B_Benchmark"
-     depends_on: "wait_20_minutes"
+     depends_on: "record_verified_commit_hashes"
      agents:
       queue: tpu_v6e_queue
      env:
diff --git a/.buildkite/scripts/bootstrap.sh b/.buildkite/scripts/bootstrap.sh
@@ -24,11 +24,11 @@ upload_pipeline() {
     VLLM_COMMIT_HASH=$(git ls-remote https://github.com/vllm-project/vllm.git HEAD | awk '{ print $1}')
     buildkite-agent meta-data set "VLLM_COMMIT_HASH" "${VLLM_COMMIT_HASH}"
     echo "Using vllm commit hash: $(buildkite-agent meta-data get "VLLM_COMMIT_HASH")"
-    buildkite-agent pipeline upload .buildkite/pipeline_jax.yml
-    buildkite-agent pipeline upload .buildkite/pipeline_jax_tpu7x.yml
+    #buildkite-agent pipeline upload .buildkite/pipeline_jax.yml
+    #buildkite-agent pipeline upload .buildkite/pipeline_jax_tpu7x.yml
     # buildkite-agent pipeline upload .buildkite/pipeline_torch.yml
     buildkite-agent pipeline upload .buildkite/main.yml
-    buildkite-agent pipeline upload .buildkite/nightly_releases.yml
+    #buildkite-agent pipeline upload .buildkite/nightly_releases.yml
     buildkite-agent pipeline upload .buildkite/pipeline_pypi.yml
 }
 
diff --git a/.buildkite/scripts/run_with_pypi.sh b/.buildkite/scripts/run_with_pypi.sh
@@ -2,6 +2,42 @@
 # Exit on error, exit on unset variable, fail on pipe errors.
 set -euo pipefail
 
+# Get the nightly TPU_INFERENCE_VERSION based on the latest stable tag and current date.
+LATEST_STABLE_TAG=$(git tag --sort=-v:refname | grep -E '^v[0-9]+\.[0-9]+\.[0-9]+$' | head -n 1)
+BASE_VERSION=${LATEST_STABLE_TAG#v}
+# TODO: Temporary logic for testing. Remove 'yesterday' before merging.
+DATETIME_STR=$(date -d 'yesterday' +%Y%m%d)
+TPU_INFERENCE_VERSION="${BASE_VERSION}.dev${DATETIME_STR}"
+
+echo "Target Nightly Version: ${TPU_INFERENCE_VERSION}"
+
+# Configuration
+PACKAGE_NAME="tpu-inference"
+MAX_RETRIES=20
+SLEEP_SEC=60
+FOUND_VERSION=false
+
+echo "Checking PyPI for ${PACKAGE_NAME} == ${TPU_INFERENCE_VERSION}..."
+
+# Retry logic to check if the version is available on PyPI
+for ((i=1; i<=MAX_RETRIES; i++)); do
+    if pip index versions "${PACKAGE_NAME}" --pre 2>/dev/null | grep -q "${TPU_INFERENCE_VERSION}"; then
+        echo "Success! Found version ${TPU_INFERENCE_VERSION} on PyPI."
+        FOUND_VERSION=true
+        break
+    fi
+
+    echo "[Attempt $i/$MAX_RETRIES] Version not found yet. Waiting ${SLEEP_SEC} seconds..."
+    if [ "$i" -lt "$MAX_RETRIES" ]; then
+        sleep "$SLEEP_SEC"
+    fi
+done
+
+if [ "$FOUND_VERSION" = "false" ]; then
+    echo "The version ${TPU_INFERENCE_VERSION} was not found on PyPI."
+    exit 1
+fi
+
 # Build vllm-tpu with nightly tpu-inference from PyPI (using docker/Dockerfile.pypi instead of docker/Dockerfile).
 export RUN_WITH_PYPI="true"
 SCRIPT_DIR=$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" &>/dev/null && pwd)
diff --git a/docker/Dockerfile.pypi b/docker/Dockerfile.pypi
@@ -5,6 +5,8 @@ ARG VLLM_COMMIT_HASH=""
 
 FROM $BASE_IMAGE
 
+ARG IS_FOR_V7X="false"
+
 # Remove existing versions of dependencies
 RUN pip uninstall -y torch torch_xla torchvision
 
@@ -17,7 +19,11 @@ RUN apt-get update && apt-get install -y \
 WORKDIR /workspace/tpu_inference
 COPY requirements_benchmarking.txt .
 # These are needed for the E2E benchmarking tests (i.e. tests/e2e/benchmarking/mlperf.sh)
-RUN pip install -r requirements_benchmarking.txt --retries 3
+RUN --mount=type=cache,target=/root/.cache/pip pip install -r requirements_benchmarking.txt --retries 3
+COPY requirements_v7x.txt .
+RUN --mount=type=cache,target=/root/.cache/pip if [ "$IS_FOR_V7X" = "true" ]; then \
+        pip install -r requirements_v7x.txt; \
+    fi
 COPY . .
 
 # Build vllm-tpu wheel