grpo runs of new hardware

A9isha · A9isha · commit 6e6d2f47338d · 2025-10-31T22:07:18.000Z
diff --git a/docker_build_dependency_image.sh b/docker_build_dependency_image.sh
@@ -142,14 +142,15 @@ if [[ ${INSTALL_POST_TRAINING} -eq 1 ]] ; then
     exit 1
   fi
 
-  # To install from local paths, we copy vllm and tpu_commons into the build context.
-  # This assumes vllm and tpu_commons are sibling directories to the current one (maxtext).
-  echo "Copying local vllm and tpu_commons directories into the build context..."
-  rsync -a --exclude='__pycache__' ../tpu_commons .
+  # To install from local paths, we copy vllm and tpu-inference into the build context.
+  # This assumes vllm and tpu-inference are sibling directories to the current one (maxtext).
+  echo "Copying local vllm and tpu-inference directories into the build context..."
+  rsync -a --exclude='__pycache__' ../tunix .
+  rsync -a --exclude='__pycache__' ../tpu-inference .
   rsync -a --exclude='__pycache__' ../vllm .
 
   # The cleanup is set to run even if the build fails to remove the copied directories.
-  trap "echo 'Cleaning up copied directories...' && rm -rf ./tpu_commons ./vllm" EXIT INT TERM
+  trap "echo 'Cleaning up copied directories...' && rm -rf ./tpu-inference ./vllm" EXIT INT TERM
 
   docker build \
     --network host \
diff --git a/maxtext_post_training_dependencies.Dockerfile b/maxtext_post_training_dependencies.Dockerfile
@@ -28,23 +28,27 @@ RUN echo "Installing Post-Training dependencies (vLLM, tpu-common, tunix) with M
 RUN --mount=type=cache,target=/root/.cache/pip pip install \
     aiohttp==3.12.15\
     keyring \
-    keyrings.google-artifactregistry-auth \
+    keyrings.google-artifactregistry-auth
+
+RUN --mount=type=cache,target=/root/.cache/pip pip install \
     numba==0.61.2
 
+# RUN VLLM_TARGET_DEVICE="tpu" pip install vllm
 # --- STAGE 2: Install Project Dependencies (The Main Cached Layer) ---
 
 # Copy *only* the dependency definition files.
-# This assumes vllm and tpu_commons are in the build context, copied from the parent directory.
+# This assumes vllm and tpu-inference are in the build context, copied from the parent directory.
 COPY vllm/requirements/tpu.txt /tmp/
 COPY vllm/requirements/build.txt /tmp/
 COPY vllm/requirements/common.txt /tmp/
-COPY tpu_commons/requirements.txt /tmp/
+COPY tpu-inference/requirements.txt /tmp/
 
 # Run the full dependency installation.
 # This entire layer is cached and will *only* be rebuilt if
 # these .txt files change.
-RUN --mount=type=cache,target=/root/.cache/pip bash -c ' \
+RUN --mount=type=cache,target=/root/.cache/pip bash -c ' \ 
     # Set the target device so pip installs the right JAX/libtpu
+    # Install tpu-inference dependencies
     export VLLM_TARGET_DEVICE="tpu" && \
     pip install -r /tmp/tpu.txt -r /tmp/build.txt -r /tmp/common.txt -r /tmp/requirements.txt --no-cache-dir --pre \
         --extra-index-url https://pypi.org/simple/ \
@@ -56,16 +60,34 @@ RUN --mount=type=cache,target=/root/.cache/pip bash -c ' \
         --find-links https://storage.googleapis.com/jax-releases/jax_nightly_releases.html \
         --find-links https://storage.googleapis.com/jax-releases/jaxlib_nightly_releases.html'
 
+    # Install tpu-inference dependencies
+RUN  --mount=type=cache,target=/root/.cache/pip bash -c ' \
+        pip install -r /tmp/requirements.txt --no-cache-dir --pre \
+        --extra-index-url https://pypi.org/simple/ \
+        --extra-index-url https://us-python.pkg.dev/ml-oss-artifacts-published/jax/simple/ \
+        --extra-index-url https://download.pytorch.org/whl/nightly/cpu \
+        --find-links https://storage.googleapis.com/jax-releases/libtpu_releases.html \
+        --find-links https://storage.googleapis.com/libtpu-wheels/index.html \
+        --find-links https://storage.googleapis.com/libtpu-releases/index.html \
+        --find-links https://storage.googleapis.com/jax-releases/jax_nightly_releases.html \
+        --find-links https://storage.googleapis.com/jax-releases/jaxlib_nightly_releases.html'
+
 # --- STAGE 3: Install Project Source Code ---
 
 # Now, copy the full source code. This invalidates cache frequently,
 # but the next step is fast.
 COPY vllm /vllm/
-COPY tpu_commons /tpu_commons/
+COPY tpu-inference /tpu-inference/
+COPY tunix /tunix
+
 
 # Install in editable mode. This is lightning-fast because all
 # dependencies were installed and cached in STAGE 2.
-RUN --mount=type=cache,target=/root/.cache/pip VLLM_TARGET_DEVICE="tpu" pip install -e /vllm/ -e /tpu_commons/
+RUN --mount=type=cache,target=/root/.cache/pip VLLM_TARGET_DEVICE="tpu" pip install -e /vllm/
+RUN --mount=type=cache,target=/root/.cache/pip pip install -e /tpu-inference/
+
+RUN --mount=type=cache,target=/root/.cache/pip pip install --no-deps /tunix/
+# RUN --mount=type=cache,target=/root/.cache/pip VLLM_TARGET_DEVICE="tpu" pip install -e /tpu-inference/
 
 RUN if [ "$MODE" = "post-training-experimental" ]; then \
     echo "MODE=grpo-experimental: Re-installing JAX/libtpu"; \
diff --git a/src/MaxText/examples/grpo_llama3_1_70b_demo_pw.py b/src/MaxText/examples/grpo_llama3_1_70b_demo_pw.py
@@ -195,12 +195,12 @@
 # ====== Training ======
 BATCH_SIZE = 1
 # Increase `NUM_BATCHES` and `MAX_STEPS` for better results.
-# NUM_BATCHES = 3738
-NUM_BATCHES = 4  # 200
+NUM_BATCHES = 3738
+# NUM_BATCHES = 4  # 200
 # Keep `NUM_TEST_BATCHES` low so that evaluation runs quickly. It can be
 # increased to a max. of 330 (if batch size is 4).
 NUM_TEST_BATCHES = 330
-NUM_TEST_BATCHES = 5  # 200
+# NUM_TEST_BATCHES = 5  # 200
 
 EVAL_EVERY_N_STEPS = 10  # this doesn't matter if `TRAIN_FRACTION = 1.0`.
 NUM_EPOCHS = 1  # can potentially train for more epochs
diff --git a/src/MaxText/examples/grpo_llama3_1_8b_demo_pw.py b/src/MaxText/examples/grpo_llama3_1_8b_demo_pw.py
@@ -142,20 +142,20 @@
 
 
 # ====== Input Checkpoint directory =====
-MODEL_CHECKPOINT_PATH = "/path/to/scanned/model/ckpt_load_dir/"
+MODEL_CHECKPOINT_PATH = "gs://mazumdera-test-bucket-europe-west4/llama3.1-8b-Instruct/scanned-pathways/0/items"
 
 # ====== Checkpoint directory =====
 LOG_DIR = f"{HOME}/content/tensorboard/grpo/logs_llama3/"
 if not os.path.exists(LOG_DIR):
   os.makedirs(LOG_DIR)
 
 # ===== Profiling =====
-PROFILE_DIR = f"/path/to/profile_dir/{run_id}/profiles_llama3/"
+PROFILE_DIR = f"gs://mazumdera-test-bucket-us-central2/grpo/v5p-64/llama3-1-8b/profile_dir/{run_id}/profiles_llama3/"
 if not epath.Path(PROFILE_DIR).exists():
   epath.Path(PROFILE_DIR).mkdir(parents=True)
 
 # ====== Checkpoint saving ======
-CKPT_DIR = f"/path/to/ckpt_save_dir/{run_id}/ckpts_llama3/"
+CKPT_DIR = f"gs://mazumdera-test-bucket-us-central2/grpo/v5p-64/llama3-1-8b/ckpt_save_dir/{run_id}/ckpts_llama3/"
 
 if not epath.Path(CKPT_DIR).exists():
   epath.Path(CKPT_DIR).mkdir(parents=True)
@@ -195,11 +195,12 @@
 # ====== Training ======
 BATCH_SIZE = 1
 # Increase `NUM_BATCHES` and `MAX_STEPS` for better results.
-# NUM_BATCHES = 3738
-NUM_BATCHES = 4  # 200
+NUM_BATCHES = 3738
+# NUM_BATCHES = 4  # 200
 # Keep `NUM_TEST_BATCHES` low so that evaluation runs quickly. It can be
 # increased to a max. of 330 (if batch size is 4).
-NUM_TEST_BATCHES = 5  # 200
+NUM_TEST_BATCHES = 330
+# NUM_TEST_BATCHES = 5  # 200
 
 EVAL_EVERY_N_STEPS = 10  # this doesn't matter if `TRAIN_FRACTION = 1.0`.
 NUM_EPOCHS = 1  # can potentially train for more epochs