Primus-Benchmark #89
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: Primus-Benchmark | |
| on: | |
| workflow_dispatch: | |
| schedule: | |
| - cron: '0 16 * * *' | |
| env: | |
| PRIMUS_TURBO_COMMIT: 5233748e9c5c5795a6484ab31ece47c442d29ec2 # feat(mxfp4): refactor gemm mxfp4 and mxfp8. fuse transpose, hadamard transform and quantization. (#195) | |
| BENCHMARK_ROOT_DIR: /wekafs/primus/benchmark | |
| jobs: | |
| run-benchmark-torch-part1: | |
| env: | |
| PRIMUS_WORKDIR: /wekafs/primus-data/primus_safe_ci/torch | |
| GPU_NAME: MI325 # Change this to your GPU model | |
| DATA_PATH: /wekafs/primus-data | |
| HSA_NO_SCRATCH_RECLAIM: 1 | |
| HF_TOKEN: ${{secrets.HF_TOKEN}} | |
| runs-on: [primus-lm-bench-torch-rwskq] | |
| steps: | |
| - run: echo "🎉 Begin Primus-Turbo Checkout." | |
| - name: Set commit hash to env | |
| run: echo "PRIMUS_TURBO_COMMIT=${PRIMUS_TURBO_COMMIT}" >> $GITHUB_ENV | |
| - name: Checkout Repo Primus-Turbo | |
| uses: actions/checkout@v4 | |
| with: | |
| repository: AMD-AIG-AIMA/Primus-Turbo | |
| submodules: "recursive" | |
| path: Primus-Turbo | |
| ref: ${{ env.PRIMUS_TURBO_COMMIT }} | |
| - run: echo "Begin Primus-Turbo Install." | |
| - name: Install Primus-Turbo | |
| run: | | |
| mv Primus-Turbo /tmp/ | |
| echo "Primus-Turbo dir: /tmp/Primus-Turbo" | |
| git config --global --add safe.directory /tmp/Primus-Turbo | |
| cd /tmp/Primus-Turbo | |
| start_time=$(date +%s) | |
| echo "✅ [Pip install requirements] started at: $(date)" | |
| mkdir -p ${PRIMUS_WORKDIR}/primus-cache | |
| MAX_JOBS=128 pip install --cache-dir=${PRIMUS_WORKDIR}/primus-cache --no-build-isolation --no-clean -r requirements.txt | |
| end_time=$(date +%s) | |
| elapsed=$((end_time - start_time)) | |
| echo "✅ [Pip install requirements] ended at: $(date)" | |
| echo "⏱️ [Pip install requirements] Total elapsed time: ${elapsed} seconds" | |
| start_time=$(date +%s) | |
| echo "✅ [build primus-turbo] started at: $(date)" | |
| pip3 install --no-build-isolation -e . -v | |
| end_time=$(date +%s) | |
| elapsed=$((end_time - start_time)) | |
| echo "✅ [build primus-turbo] ended at: $(date)" | |
| echo "⏱️ [build primus-turbo] Total elapsed time: ${elapsed} seconds" | |
| - run: echo "🎉 Begin Primus Benchmark." | |
| - uses: actions/checkout@v4 | |
| with: | |
| submodules: recursive | |
| - name: Show Environment Info | |
| run: | | |
| echo "Hostname: $(hostname)" | |
| echo "PWD: $(pwd)" | |
| echo "HOME: $HOME" | |
| echo "GITHUB_WORKSPACE: $GITHUB_WORKSPACE" | |
| echo "Runner Temp Dir: $RUNNER_TEMP" | |
| echo "Runner Tool Cache: $RUNNER_TOOL_CACHE" | |
| - name: Install Primus | |
| run: | | |
| pip install -r requirements.txt | |
| - name: Set BENCHMARK_PATH | |
| run: | | |
| BENCHMARK_DATE=$(date +%Y%m%d) | |
| BENCHMARK_DATE_DIR="${BENCHMARK_ROOT_DIR}/${BENCHMARK_DATE}" | |
| BENCHMARK_LOG_DIR="${BENCHMARK_DATE_DIR}/${GPU_NAME}" | |
| mkdir -p "${BENCHMARK_LOG_DIR}" | |
| echo "BENCHMARK_DATE_DIR=${BENCHMARK_DATE_DIR}" >> $GITHUB_ENV | |
| echo "BENCHMARK_LOG_DIR=${BENCHMARK_LOG_DIR}" >> $GITHUB_ENV | |
| # Megatron-LM Benchmarks | |
| - name: "[1/33][Megatron] llama3.3_70B-BF16" | |
| timeout-minutes: 60 | |
| continue-on-error: true | |
| run: | | |
| ./runner/primus-cli direct --log_file "${BENCHMARK_LOG_DIR}/megatron/llama3.3_70B-BF16.log" \ | |
| -- train pretrain --config examples/megatron/configs/MI300X/llama3.3_70B-BF16-pretrain.yaml | |
| - name: "[2/33][Megatron] llama3.3_70B-FP8" | |
| timeout-minutes: 60 | |
| continue-on-error: true | |
| run: | | |
| ./runner/primus-cli direct --log_file "${BENCHMARK_LOG_DIR}/megatron/llama3.3_70B-FP8.log" \ | |
| -- train pretrain --config examples/megatron/configs/MI300X/llama3.3_70B-FP8-pretrain.yaml | |
| - name: "[3/33][Megatron] llama2_7B-BF16" | |
| timeout-minutes: 60 | |
| continue-on-error: true | |
| run: | | |
| ./runner/primus-cli direct --log_file "${BENCHMARK_LOG_DIR}/megatron/llama2_7B-BF16.log" \ | |
| -- train pretrain --config examples/megatron/configs/MI300X/llama2_7B-BF16-pretrain.yaml | |
| - name: "[4/33][Megatron] llama2_7B-FP8" | |
| timeout-minutes: 60 | |
| continue-on-error: true | |
| run: | | |
| ./runner/primus-cli direct --log_file "${BENCHMARK_LOG_DIR}/megatron/llama2_7B-FP8.log" \ | |
| -- train pretrain --config examples/megatron/configs/MI300X/llama2_7B-FP8-pretrain.yaml | |
| - name: "[5/33][Megatron] llama2_70B-BF16" | |
| timeout-minutes: 60 | |
| continue-on-error: true | |
| run: | | |
| ./runner/primus-cli direct --log_file "${BENCHMARK_LOG_DIR}/megatron/llama2_70B-BF16.log" \ | |
| -- train pretrain --config examples/megatron/configs/MI300X/llama2_70B-BF16-pretrain.yaml | |
| - name: "[6/33][Megatron] llama2_70B-FP8" | |
| timeout-minutes: 60 | |
| continue-on-error: true | |
| run: | | |
| ./runner/primus-cli direct --log_file "${BENCHMARK_LOG_DIR}/megatron/llama2_70B-FP8.log" \ | |
| -- train pretrain --config examples/megatron/configs/MI300X/llama2_70B-FP8-pretrain.yaml | |
| - name: "[7/33][Megatron] llama3.1_8B-BF16" | |
| timeout-minutes: 60 | |
| continue-on-error: true | |
| run: | | |
| ./runner/primus-cli direct --log_file "${BENCHMARK_LOG_DIR}/megatron/llama3.1_8B-BF16.log" \ | |
| -- train pretrain --config examples/megatron/configs/MI300X/llama3.1_8B-BF16-pretrain.yaml | |
| - name: "[8/33][Megatron] llama3.1_8B-FP8" | |
| timeout-minutes: 60 | |
| continue-on-error: true | |
| run: | | |
| ./runner/primus-cli direct --log_file "${BENCHMARK_LOG_DIR}/megatron/llama3.1_8B-FP8.log" \ | |
| -- train pretrain --config examples/megatron/configs/MI300X/llama3.1_8B-FP8-pretrain.yaml | |
| - name: "[9/33][Megatron] qwen2.5_7B-BF16" | |
| timeout-minutes: 60 | |
| continue-on-error: true | |
| run: | | |
| ./runner/primus-cli direct --log_file "${BENCHMARK_LOG_DIR}/megatron/qwen2.5_7B-BF16.log" \ | |
| -- train pretrain --config examples/megatron/configs/MI300X/qwen2.5_7B-BF16-pretrain.yaml | |
| - name: "[10/33][Megatron] qwen2.5_7B-FP8" | |
| timeout-minutes: 60 | |
| continue-on-error: true | |
| run: | | |
| ./runner/primus-cli direct --log_file "${BENCHMARK_LOG_DIR}/megatron/qwen2.5_7B-FP8.log" \ | |
| -- train pretrain --config examples/megatron/configs/MI300X/qwen2.5_7B-FP8-pretrain.yaml | |
| - name: "[11/33][Megatron] mixtral_8x7B_v0.1-BF16" | |
| timeout-minutes: 60 | |
| continue-on-error: true | |
| run: | | |
| ./runner/primus-cli direct --log_file "${BENCHMARK_LOG_DIR}/megatron/mixtral_8x7B_v0.1-BF16.log" \ | |
| -- train pretrain --config examples/megatron/configs/MI300X/mixtral_8x7B_v0.1-BF16-pretrain.yaml | |
| - name: "[12/33][Megatron] mixtral_8x7B_v0.1-FP8" | |
| timeout-minutes: 60 | |
| continue-on-error: true | |
| run: | | |
| ./runner/primus-cli direct --log_file "${BENCHMARK_LOG_DIR}/megatron/mixtral_8x7B_v0.1-FP8.log" \ | |
| -- train pretrain --config examples/megatron/configs/MI300X/mixtral_8x7B_v0.1-FP8-pretrain.yaml | |
| - name: "[13/33][Megatron] llama3.1_70B-BF16" | |
| timeout-minutes: 60 | |
| continue-on-error: true | |
| run: | | |
| ./runner/primus-cli direct --log_file "${BENCHMARK_LOG_DIR}/megatron/llama3.1_70B-BF16.log" \ | |
| -- train pretrain --config examples/megatron/configs/MI300X/llama3.1_70B-BF16-pretrain.yaml | |
| - name: "[14/33][Megatron] llama3.1_70B-FP8" | |
| timeout-minutes: 60 | |
| continue-on-error: true | |
| run: | | |
| ./runner/primus-cli direct --log_file "${BENCHMARK_LOG_DIR}/megatron/llama3.1_70B-FP8.log" \ | |
| -- train pretrain --config examples/megatron/configs/MI300X/llama3.1_70B-FP8-pretrain.yaml | |
| - name: "[15/33][Megatron] qwen2.5_72B-BF16" | |
| timeout-minutes: 60 | |
| continue-on-error: true | |
| run: | | |
| ./runner/primus-cli direct --log_file "${BENCHMARK_LOG_DIR}/megatron/qwen2.5_72B-BF16.log" \ | |
| -- train pretrain --config examples/megatron/configs/MI300X/qwen2.5_72B-BF16-pretrain.yaml | |
| - name: "[16/33][Megatron] qwen2.5_72B-FP8" | |
| timeout-minutes: 60 | |
| continue-on-error: true | |
| run: | | |
| ./runner/primus-cli direct --log_file "${BENCHMARK_LOG_DIR}/megatron/qwen2.5_72B-FP8.log" \ | |
| -- train pretrain --config examples/megatron/configs/MI300X/qwen2.5_72B-FP8-pretrain.yaml | |
| - name: "[17/33][Megatron] deepseek_v2_lite-BF16" | |
| timeout-minutes: 60 | |
| continue-on-error: true | |
| run: | | |
| ./runner/primus-cli direct --log_file "${BENCHMARK_LOG_DIR}/megatron/deepseek_v2_lite-BF16.log" \ | |
| -- train pretrain --config examples/megatron/configs/MI300X/deepseek_v2_lite-BF16-pretrain.yaml | |
| - name: "[18/33][Megatron] deepseek_v2_lite-FP8" | |
| timeout-minutes: 60 | |
| continue-on-error: true | |
| run: | | |
| ./runner/primus-cli direct --log_file "${BENCHMARK_LOG_DIR}/megatron/deepseek_v2_lite-FP8.log" \ | |
| -- train pretrain --config examples/megatron/configs/MI300X/deepseek_v2_lite-FP8-pretrain.yaml | |
| - name: "[19/33][Megatron] gpt_oss_20B-BF16" | |
| timeout-minutes: 60 | |
| continue-on-error: true | |
| run: | | |
| ./runner/primus-cli direct --log_file "${BENCHMARK_LOG_DIR}/megatron/gpt_oss_20B-BF16.log" \ | |
| -- train pretrain --config examples/megatron/configs/MI300X/gpt_oss_20B-BF16-pretrain.yaml | |
| - name: "[20/33][Megatron] gpt_oss_20B-FP8" | |
| timeout-minutes: 60 | |
| continue-on-error: true | |
| run: | | |
| ./runner/primus-cli direct --log_file "${BENCHMARK_LOG_DIR}/megatron/gpt_oss_20B-FP8.log" \ | |
| -- train pretrain --config examples/megatron/configs/MI300X/gpt_oss_20B-FP8-pretrain.yaml | |
| - name: Clean | |
| run: | | |
| rm -rf ${PRIMUS_WORKDIR}/Primus-Turbo | |
| rm -rf ${PRIMUS_WORKDIR}/Primus | |
| run-benchmark-torch-part2: | |
| env: | |
| PRIMUS_WORKDIR: /wekafs/primus-data/primus_safe_ci/torch | |
| GPU_NAME: MI325 # Change this to your GPU model | |
| DATA_PATH: /wekafs/primus-data | |
| HSA_NO_SCRATCH_RECLAIM: 1 | |
| HF_TOKEN: ${{secrets.HF_TOKEN}} | |
| needs: run-benchmark-torch-part1 | |
| runs-on: [primus-lm-bench-torch-rwskq] | |
| steps: | |
| - run: echo "🎉 Begin Primus-Turbo Checkout." | |
| - name: Set commit hash to env | |
| run: echo "PRIMUS_TURBO_COMMIT=${PRIMUS_TURBO_COMMIT}" >> $GITHUB_ENV | |
| - name: Checkout Repo Primus-Turbo | |
| uses: actions/checkout@v4 | |
| with: | |
| repository: AMD-AIG-AIMA/Primus-Turbo | |
| submodules: "recursive" | |
| path: Primus-Turbo | |
| ref: ${{ env.PRIMUS_TURBO_COMMIT }} | |
| - run: echo "Begin Primus-Turbo Install." | |
| - name: Install Primus-Turbo | |
| run: | | |
| mv Primus-Turbo /tmp/ | |
| echo "Primus-Turbo dir: /tmp/Primus-Turbo" | |
| git config --global --add safe.directory /tmp/Primus-Turbo | |
| cd /tmp/Primus-Turbo | |
| start_time=$(date +%s) | |
| echo "✅ [Pip install requirements] started at: $(date)" | |
| mkdir -p ${PRIMUS_WORKDIR}/primus-cache | |
| MAX_JOBS=128 pip install --cache-dir=${PRIMUS_WORKDIR}/primus-cache --no-build-isolation --no-clean -r requirements.txt | |
| end_time=$(date +%s) | |
| elapsed=$((end_time - start_time)) | |
| echo "✅ [Pip install requirements] ended at: $(date)" | |
| echo "⏱️ [Pip install requirements] Total elapsed time: ${elapsed} seconds" | |
| start_time=$(date +%s) | |
| echo "✅ [build primus-turbo] started at: $(date)" | |
| pip3 install --no-build-isolation -e . -v | |
| end_time=$(date +%s) | |
| elapsed=$((end_time - start_time)) | |
| echo "✅ [build primus-turbo] ended at: $(date)" | |
| echo "⏱️ [build primus-turbo] Total elapsed time: ${elapsed} seconds" | |
| - run: echo "🎉 Begin Primus Benchmark." | |
| - uses: actions/checkout@v4 | |
| with: | |
| submodules: recursive | |
| - name: Show Environment Info | |
| run: | | |
| echo "Hostname: $(hostname)" | |
| echo "PWD: $(pwd)" | |
| echo "HOME: $HOME" | |
| echo "GITHUB_WORKSPACE: $GITHUB_WORKSPACE" | |
| echo "Runner Temp Dir: $RUNNER_TEMP" | |
| echo "Runner Tool Cache: $RUNNER_TOOL_CACHE" | |
| - name: Install Primus | |
| run: | | |
| pip install -r requirements.txt | |
| - name: Set BENCHMARK_PATH | |
| run: | | |
| BENCHMARK_DATE=$(date +%Y%m%d) | |
| BENCHMARK_DATE_DIR="${BENCHMARK_ROOT_DIR}/${BENCHMARK_DATE}" | |
| BENCHMARK_LOG_DIR="${BENCHMARK_DATE_DIR}/${GPU_NAME}" | |
| mkdir -p "${BENCHMARK_LOG_DIR}" | |
| echo "BENCHMARK_DATE_DIR=${BENCHMARK_DATE_DIR}" >> $GITHUB_ENV | |
| echo "BENCHMARK_LOG_DIR=${BENCHMARK_LOG_DIR}" >> $GITHUB_ENV | |
| - name: "[21/33][Megatron] llama3_70B-BF16" | |
| timeout-minutes: 60 | |
| continue-on-error: true | |
| run: | | |
| ./runner/primus-cli direct --log_file "${BENCHMARK_LOG_DIR}/megatron/llama3_70B-BF16.log" \ | |
| -- train pretrain --config examples/megatron/configs/MI300X/llama3_70B-BF16-pretrain.yaml | |
| - name: "[22/33][Megatron] llama3_70B-FP8" | |
| timeout-minutes: 60 | |
| continue-on-error: true | |
| run: | | |
| ./runner/primus-cli direct --log_file "${BENCHMARK_LOG_DIR}/megatron/llama3_70B-FP8.log" \ | |
| -- train pretrain --config examples/megatron/configs/MI300X/llama3_70B-FP8-pretrain.yaml | |
| - name: "[23/33][Megatron] llama3_8B-BF16" | |
| timeout-minutes: 60 | |
| continue-on-error: true | |
| run: | | |
| ./runner/primus-cli direct --log_file "${BENCHMARK_LOG_DIR}/megatron/llama3_8B-BF16.log" \ | |
| -- train pretrain --config examples/megatron/configs/MI300X/llama3_8B-BF16-pretrain.yaml | |
| - name: "[24/33][Megatron] llama3_8B-FP8" | |
| timeout-minutes: 60 | |
| continue-on-error: true | |
| run: | | |
| ./runner/primus-cli direct --log_file "${BENCHMARK_LOG_DIR}/megatron/llama3_8B-FP8.log" \ | |
| -- train pretrain --config examples/megatron/configs/MI300X/llama3_8B-FP8-pretrain.yaml | |
| # TorchTitan Benchmarks | |
| - name: "[25/33][TorchTitan] llama3.1_8B-BF16" | |
| timeout-minutes: 60 | |
| continue-on-error: true | |
| run: | | |
| ./runner/primus-cli direct --log_file "${BENCHMARK_LOG_DIR}/torchtitan/llama3.1_8B-BF16.log" \ | |
| -- train pretrain --config examples/torchtitan/configs/MI300X/llama3.1_8B-BF16-pretrain.yaml | |
| - name: "[26/33][TorchTitan] llama3.1_8B-FP8" | |
| timeout-minutes: 60 | |
| continue-on-error: true | |
| run: | | |
| ./runner/primus-cli direct --log_file "${BENCHMARK_LOG_DIR}/torchtitan/llama3.1_8B-FP8.log" \ | |
| -- train pretrain --config examples/torchtitan/configs/MI300X/llama3.1_8B-FP8-pretrain.yaml | |
| - name: "[27/33][TorchTitan] deepseek_v3_16b-BF16" | |
| timeout-minutes: 60 | |
| continue-on-error: true | |
| run: | | |
| ./runner/primus-cli direct --log_file "${BENCHMARK_LOG_DIR}/torchtitan/deepseek_v3_16b-BF16.log" \ | |
| -- train pretrain --config examples/torchtitan/configs/MI300X/deepseek_v3_16b-BF16-pretrain.yaml | |
| - name: "[28/33][TorchTitan] deepseek_v3_16b-FP8" | |
| timeout-minutes: 60 | |
| continue-on-error: true | |
| run: | | |
| ./runner/primus-cli direct --log_file "${BENCHMARK_LOG_DIR}/torchtitan/deepseek_v3_16b-FP8.log" \ | |
| -- train pretrain --config examples/torchtitan/configs/MI300X/deepseek_v3_16b-FP8-pretrain.yaml | |
| - name: "[29/33][TorchTitan] qwen3_1.7B" | |
| timeout-minutes: 60 | |
| continue-on-error: true | |
| run: | | |
| ./runner/primus-cli direct --log_file "${BENCHMARK_LOG_DIR}/torchtitan/qwen3_1.7B-pretrain.log" \ | |
| -- train pretrain --config examples/torchtitan/configs/MI300X/qwen3_1.7B-pretrain.yaml | |
| - name: "[30/33][TorchTitan] llama3.1_70B-BF16" | |
| timeout-minutes: 60 | |
| continue-on-error: true | |
| run: | | |
| ./runner/primus-cli direct --log_file "${BENCHMARK_LOG_DIR}/torchtitan/llama3.1_70B-BF16.log" \ | |
| -- train pretrain --config examples/torchtitan/configs/MI300X/llama3.1_70B-BF16-pretrain.yaml | |
| - name: "[31/33][TorchTitan] llama3.1_70B-FP8" | |
| timeout-minutes: 60 | |
| continue-on-error: true | |
| run: | | |
| ./runner/primus-cli direct --log_file "${BENCHMARK_LOG_DIR}/torchtitan/llama3.1_70B-FP8.log" \ | |
| -- train pretrain --config examples/torchtitan/configs/MI300X/llama3.1_70B-FP8-pretrain.yaml | |
| - name: "[32/33][TorchTitan] qwen3_0.6B" | |
| timeout-minutes: 60 | |
| continue-on-error: true | |
| run: | | |
| ./runner/primus-cli direct --log_file "${BENCHMARK_LOG_DIR}/torchtitan/qwen3_0.6B.log" \ | |
| -- train pretrain --config examples/torchtitan/configs/MI300X/qwen3_0.6B-pretrain.yaml | |
| - name: "[33/33][TorchTitan] qwen3_32B" | |
| timeout-minutes: 60 | |
| continue-on-error: true | |
| run: | | |
| ./runner/primus-cli direct --log_file "${BENCHMARK_LOG_DIR}/torchtitan/qwen3_32B.log" \ | |
| -- train pretrain --config examples/torchtitan/configs/MI300X/qwen3_32B-pretrain.yaml | |
| - name: Generate Summary Report | |
| run: | | |
| echo "Generate Summary Report" | |
| python3 tools/daily/daily_report.py --report-csv-path "${BENCHMARK_DATE_DIR}/summary.csv" --benchmark-log-dir "${BENCHMARK_LOG_DIR}" | |
| - name: Clean | |
| run: | | |
| rm -rf ${PRIMUS_WORKDIR}/Primus-Turbo | |
| rm -rf ${PRIMUS_WORKDIR}/Primus | |
| run-benchmark-jax: | |
| env: | |
| PRIMUS_WORKDIR: /wekafs/primus-data/primus_safe_ci/jax | |
| GPU_NAME: MI325 # Change this to your GPU model | |
| DATA_PATH: /wekafs/primus-data | |
| HF_TOKEN: ${{secrets.HF_TOKEN}} | |
| needs: run-benchmark-torch-part2 | |
| runs-on: [primus-lm-bench-jax-hhhf5] | |
| steps: | |
| - run: echo "🎉 Begin Primus-Turbo Checkout." | |
| - name: Set commit hash to env | |
| run: echo "PRIMUS_TURBO_COMMIT=${PRIMUS_TURBO_COMMIT}" >> $GITHUB_ENV | |
| - name: Checkout Repo Primus-Turbo | |
| uses: actions/checkout@v4 | |
| with: | |
| repository: AMD-AIG-AIMA/Primus-Turbo | |
| submodules: "recursive" | |
| path: Primus-Turbo | |
| ref: ${{ env.PRIMUS_TURBO_COMMIT }} | |
| - run: echo "Begin Primus-Turbo Install." | |
| - name: Install Primus-Turbo | |
| run: | | |
| mv Primus-Turbo /tmp/ | |
| echo "Primus-Turbo dir: /tmp/Primus-Turbo" | |
| git config --global --add safe.directory /tmp/Primus-Turbo | |
| cd /tmp/Primus-Turbo | |
| start_time=$(date +%s) | |
| echo "✅ [Pip install requirements] started at: $(date)" | |
| mkdir -p ${PRIMUS_WORKDIR}/primus-cache | |
| python3 -m pip install --upgrade pip setuptools | |
| pip3 install --cache-dir=${PRIMUS_WORKDIR}/primus-cache --pre torch torchvision --index-url https://download.pytorch.org/whl/nightly/rocm7.0 | |
| MAX_JOBS=128 pip3 install --cache-dir=${PRIMUS_WORKDIR}/primus-cache --no-build-isolation --no-clean -r requirements.txt | |
| end_time=$(date +%s) | |
| elapsed=$((end_time - start_time)) | |
| echo "✅ [Pip install requirements] ended at: $(date)" | |
| echo "⏱️ [Pip install requirements] Total elapsed time: ${elapsed} seconds" | |
| start_time=$(date +%s) | |
| echo "✅ [build primus-turbo] started at: $(date)" | |
| PRIMUS_TURBO_FRAMEWORK="JAX" pip3 install --no-build-isolation -e . -v | |
| end_time=$(date +%s) | |
| elapsed=$((end_time - start_time)) | |
| echo "✅ [build primus-turbo] ended at: $(date)" | |
| echo "⏱️ [build primus-turbo] Total elapsed time: ${elapsed} seconds" | |
| - run: echo "🎉 Begin Primus Benchmark." | |
| - uses: actions/checkout@v4 | |
| with: | |
| submodules: recursive | |
| - name: Show Environment Info | |
| run: | | |
| echo "Hostname: $(hostname)" | |
| echo "PWD: $(pwd)" | |
| echo "HOME: $HOME" | |
| echo "GITHUB_WORKSPACE: $GITHUB_WORKSPACE" | |
| echo "Runner Temp Dir: $RUNNER_TEMP" | |
| echo "Runner Tool Cache: $RUNNER_TOOL_CACHE" | |
| - name: Install Primus | |
| run: | | |
| pip install -r requirements-jax.txt | |
| - name: Set BENCHMARK_PATH | |
| run: | | |
| BENCHMARK_DATE=$(date +%Y%m%d) | |
| BENCHMARK_DATE_DIR="${BENCHMARK_ROOT_DIR}/${BENCHMARK_DATE}" | |
| BENCHMARK_LOG_DIR="${BENCHMARK_DATE_DIR}/${GPU_NAME}" | |
| mkdir -p "${BENCHMARK_LOG_DIR}" | |
| echo "BENCHMARK_DATE_DIR=${BENCHMARK_DATE_DIR}" >> $GITHUB_ENV | |
| echo "BENCHMARK_LOG_DIR=${BENCHMARK_LOG_DIR}" >> $GITHUB_ENV | |
| - name: "[1/7][MaxText] deepseek_v2_16B" | |
| timeout-minutes: 120 | |
| continue-on-error: true | |
| env: | |
| IP_INTERFACE: "eth0" | |
| NCCL_SOCKET_IFNAME: "eth0" | |
| GLOO_SOCKET_IFNAME: "eth0" | |
| run: | | |
| ./runner/primus-cli direct --log_file "${BENCHMARK_LOG_DIR}/maxtext/deepseek_v2_16B.log" \ | |
| -- train pretrain --config examples/maxtext/configs/MI300X/deepseek_v2_16B-pretrain.yaml | |
| - name: "[2/7][MaxText] llama2_70B" | |
| timeout-minutes: 120 | |
| continue-on-error: true | |
| run: | | |
| ./runner/primus-cli direct --log_file "${BENCHMARK_LOG_DIR}/maxtext/llama2_70B.log" \ | |
| -- train pretrain --config examples/maxtext/configs/MI300X/llama2_70B-pretrain.yaml | |
| - name: "[3/7][MaxText] llama2_7B" | |
| timeout-minutes: 120 | |
| continue-on-error: true | |
| run: | | |
| ./runner/primus-cli direct --log_file "${BENCHMARK_LOG_DIR}/maxtext/llama2_7B.log" \ | |
| -- train pretrain --config examples/maxtext/configs/MI300X/llama2_7B-pretrain.yaml | |
| - name: "[4/7][MaxText] llama3.3_70B" | |
| timeout-minutes: 120 | |
| continue-on-error: true | |
| run: | | |
| ./runner/primus-cli direct --log_file "${BENCHMARK_LOG_DIR}/maxtext/llama3.3_70B.log" \ | |
| -- train pretrain --config examples/maxtext/configs/MI300X/llama3.3_70B-pretrain.yaml | |
| - name: "[5/7][MaxText] llama3_70B" | |
| timeout-minutes: 120 | |
| continue-on-error: true | |
| run: | | |
| ./runner/primus-cli direct --log_file "${BENCHMARK_LOG_DIR}/maxtext/llama3_70B.log" \ | |
| -- train pretrain --config examples/maxtext/configs/MI300X/llama3_70B-pretrain.yaml | |
| - name: "[6/7][MaxText] llama3_8B" | |
| timeout-minutes: 120 | |
| continue-on-error: true | |
| run: | | |
| ./runner/primus-cli direct --log_file "${BENCHMARK_LOG_DIR}/maxtext/llama3_8B.log" \ | |
| -- train pretrain --config examples/maxtext/configs/MI300X/llama3_8B-pretrain.yaml | |
| - name: "[7/7][MaxText] mixtral_8x7B" | |
| timeout-minutes: 120 | |
| continue-on-error: true | |
| run: | | |
| ./runner/primus-cli direct --log_file "${BENCHMARK_LOG_DIR}/maxtext/mixtral_8x7B.log" \ | |
| -- train pretrain --config examples/maxtext/configs/MI300X/mixtral_8x7B-pretrain.yaml | |
| - name: Generate Summary Report | |
| run: | | |
| echo "Generate Summary Report" | |
| python3 tools/daily/daily_report.py --report-csv-path "${BENCHMARK_DATE_DIR}/summary.csv" --benchmark-log-dir "${BENCHMARK_LOG_DIR}" | |
| - name: Clean | |
| run: | | |
| rm -rf ${PRIMUS_WORKDIR}/Primus-Turbo | |
| rm -rf ${PRIMUS_WORKDIR}/Primus |