add 1st iteration of pre-merge pipeline

jcyang43 · jcyang43 · commit 245920984b6a · 2025-11-25T14:55:01.000-08:00
Signed-off-by: Johnny Yang &lt;johnnyyang@google.com&gt;
diff --git a/.buildkite/pipeline_jax.yml b/.buildkite/pipeline_jax.yml
@@ -107,25 +107,8 @@ steps:
            exit 0
          fi
 
-
-   - label: "JAX unit tests"
-     key: test_7
-     soft_fail: true
-     agents:
-       queue: tpu_v6e_queue
-     commands:
-       - |
-         .buildkite/scripts/run_in_docker.sh \
-           python3 -m pytest -s -v -x /workspace/tpu_inference/tests/ \
-           --ignore=/workspace/tpu_inference/tests/kernels \
-           --ignore=/workspace/tpu_inference/tests/lora \
-           --ignore=/workspace/tpu_inference/tests/e2e \
-           --ignore=/workspace/tpu_inference/tpu_inference/mock \
-           --ignore=/workspace/tpu_inference/tests/layers/vllm/test_compressed_tensors_moe.py \
-           --cov-config=/workspace/tpu_inference/.coveragerc --cov tpu_inference --cov-report term-missing --cov-fail-under=69
-
    - label: "JAX unit tests - kernels"
-     key: test_8
+     key: test_7
      soft_fail: true
      agents:
        queue: tpu_v6e_queue
@@ -144,7 +127,7 @@ steps:
          fi
 
    - label: "JAX unit tests - collective kernels"
-     key: test_9
+     key: test_8
      soft_fail: true
      agents:
        queue: tpu_v6e_8_queue
@@ -159,7 +142,7 @@ steps:
          fi
 
    - label: "lora e2e tests for JAX + vLLM models single chip"
-     key: test_10
+     key: test_9
      soft_fail: true
      agents:
        queue: tpu_v6e_queue
@@ -174,7 +157,7 @@ steps:
          fi
 
    - label: "E2E MLPerf tests for JAX + vLLM models on multiple chips"
-     key: test_11
+     key: test_10
      soft_fail: true
      env:
        MODEL_IMPL_TYPE: "vllm"
@@ -190,7 +173,7 @@ steps:
          fi
 
    - label: "E2E MLperf tests for DeepSeek-R1 (no accuracy, 12-decoder layers only)"
-     key: test_12
+     key: test_11
      soft_fail: true
      env:
        NEW_MODEL_DESIGN: "True"
@@ -209,7 +192,7 @@ steps:
          fi
 
    - label: "lora e2e tests for JAX + vLLM models multi chips"
-     key: test_13
+     key: test_12
      soft_fail: true
      env:
        USE_V6E8_QUEUE: "True"
@@ -227,7 +210,7 @@ steps:
          fi
 
    - label: "E2E data parallelism test"
-     key: test_14
+     key: test_13
      soft_fail: true
      env:
        NEW_MODEL_DESIGN: "True"
@@ -239,7 +222,7 @@ steps:
            bash -c 'python3 -m pytest -s -v -x /workspace/tpu_inference/tests/e2e/test_data_parallel.py'
 
    - label: "lora unit tests on single chip"
-     key: test_15
+     key: test_14
      soft_fail: true
      agents:
        queue: tpu_v6e_queue
@@ -250,7 +233,7 @@ steps:
            python3 -m pytest -s -v -x /workspace/tpu_inference/tests/lora/test_layers.py'
 
    - label: "lora unit tests on multi chips"
-     key: test_16
+     key: test_15
      soft_fail: true
      env:
        USE_V6E8_QUEUE: "True"
@@ -283,10 +266,9 @@ steps:
        - test_13
        - test_14
        - test_15
-       - test_16
      agents:
        queue: cpu
      commands:
        - |
          .buildkite/scripts/check_results.sh \
-           "TPU JAX Tests Failed" test_0 test_1 test_2 test_3 test_4 test_5 test_6 test_7 test_8 test_9 test_10 test_11 test_12 test_13 test_14 test_15 test_16
+           "TPU JAX Tests Failed" test_0 test_1 test_2 test_3 test_4 test_5 test_6 test_7 test_8 test_9 test_10 test_11 test_12 test_13 test_14 test_15
diff --git a/.buildkite/pipeline_pre_merge.yml b/.buildkite/pipeline_pre_merge.yml
@@ -0,0 +1,39 @@
+steps:
+  - label: "JAX unit tests"
+    soft_fail: true
+    agents:
+      queue: tpu_v6e_queue
+    commands:
+      - |
+        .buildkite/scripts/run_in_docker.sh \
+          python3 -m pytest -s -v -x /workspace/tpu_inference/tests/ \
+          --ignore=/workspace/tpu_inference/tests/kernels \
+          --ignore=/workspace/tpu_inference/tests/lora \
+          --ignore=/workspace/tpu_inference/tests/e2e \
+          --ignore=/workspace/tpu_inference/tpu_inference/mock \
+          --cov-config=/workspace/tpu_inference/.coveragerc --cov tpu_inference --cov-report term-missing --cov-fail-under=69
+
+  - label: "Upload modified feature(s)/model(s) introduced in this PR"
+    soft_fail: true
+    # nightly run automatically uploads all features & models, so the next line is needed to dedup
+    if: build.env("NIGHTLY") != "1"
+    agents:
+      queue: cpu
+    command: |
+      git fetch origin $BUILDKITE_PULL_REQUEST_BASE_BRANCH
+      BASE_REF="$BUILDKITE_PULL_REQUEST_BASE_BRANCH"
+      HEAD_REF="$BUILDKITE_COMMIT"
+
+      # Get newly added or modified yml files in '.buildkite/models/' OR '.buildkite/features/'
+      MODIFIED_YML_PATHS=$(git diff --name-only --diff-filter=AM "$BASE_REF" "$HEAD_REF" | grep -E '^\.buildkite\/(models|features)\/.*\.yml$' | tr '\n' ' ')
+
+      if [ -n "$MODIFIED_YML_PATHS" ]; then
+        echo "Detected new models/features yml files: $MODIFIED_YML_PATHS"
+        for FILE_PATH in $MODIFIED_YML_PATHS; do
+          echo "Processing and uploading pipeline: ${FILE_PATH}"
+          buildkite-agent pipeline upload "${FILE_PATH}"
+          echo "Successfully uploaded ${FILE_PATH}"
+        done
+      else
+        echo "No new models/features yml files detected"
+      fi
diff --git a/.buildkite/scripts/bootstrap.sh b/.buildkite/scripts/bootstrap.sh
@@ -20,18 +20,29 @@ else
   echo "Code files changed. Proceeding with pipeline upload."
 fi
 
+upload_pre_merge_pipeline() {
+    buildkite-agent pipeline upload .buildkite/pipeline_pre_merge.yml
+}
+
 upload_pipeline() {
-    VLLM_COMMIT_HASH=$(git ls-remote https://github.com/vllm-project/vllm.git HEAD | awk '{ print $1}')
-    buildkite-agent meta-data set "VLLM_COMMIT_HASH" "${VLLM_COMMIT_HASH}"
-    echo "Using vllm commit hash: $(buildkite-agent meta-data get "VLLM_COMMIT_HASH")"
     buildkite-agent pipeline upload .buildkite/pipeline_jax.yml
     # buildkite-agent pipeline upload .buildkite/pipeline_torch.yml
     buildkite-agent pipeline upload .buildkite/main.yml
     buildkite-agent pipeline upload .buildkite/nightly_releases.yml
+    upload_pre_merge_pipeline
 }
 
-echo "--- Starting Buildkite Bootstrap ---"
+fetch_latest_upstream_vllm_commit() {
+    # To help with debugging (when needed), perform setup to:
+    #    1. Use the same upstream vllm commit for all jobs in this CI run for consistency
+    #    2. Record which upstream commit this CI run is using
+    VLLM_COMMIT_HASH=$(git ls-remote https://github.com/vllm-project/vllm.git HEAD | awk '{ print $1}')
+    buildkite-agent meta-data set "VLLM_COMMIT_HASH" "${VLLM_COMMIT_HASH}"
+    echo "Using vllm commit hash: $(buildkite-agent meta-data get "VLLM_COMMIT_HASH")"
+}
 
+echo "--- Starting Buildkite Bootstrap ---"
+fetch_latest_upstream_vllm_commit
 # Check if the current build is a pull request
 if [ "$BUILDKITE_PULL_REQUEST" != "false" ]; then
   echo "This is a Pull Request build."
@@ -42,8 +53,8 @@ if [ "$BUILDKITE_PULL_REQUEST" != "false" ]; then
     echo "Found 'ready' label on PR. Uploading main pipeline..."
     upload_pipeline
   else
-    echo "No 'ready' label found on PR. Skipping main pipeline upload."
-    exit 0 # Exit with 0 to indicate success (no error, just skipped)
+    echo "No 'ready' label found on PR. Uploading fast check pipeline"
+    upload_pre_merge_pipeline
   fi
 else
   # If it's NOT a Pull Request (e.g., branch push, tag, manual build)