Skip to content

Primus-Benchmark

Primus-Benchmark #89

Workflow file for this run

name: Primus-Benchmark
on:
workflow_dispatch:
schedule:
- cron: '0 16 * * *'
env:
PRIMUS_TURBO_COMMIT: 5233748e9c5c5795a6484ab31ece47c442d29ec2 # feat(mxfp4): refactor gemm mxfp4 and mxfp8. fuse transpose, hadamard transform and quantization. (#195)
BENCHMARK_ROOT_DIR: /wekafs/primus/benchmark
jobs:
run-benchmark-torch-part1:
env:
PRIMUS_WORKDIR: /wekafs/primus-data/primus_safe_ci/torch
GPU_NAME: MI325 # Change this to your GPU model
DATA_PATH: /wekafs/primus-data
HSA_NO_SCRATCH_RECLAIM: 1
HF_TOKEN: ${{secrets.HF_TOKEN}}
runs-on: [primus-lm-bench-torch-rwskq]
steps:
- run: echo "🎉 Begin Primus-Turbo Checkout."
- name: Set commit hash to env
run: echo "PRIMUS_TURBO_COMMIT=${PRIMUS_TURBO_COMMIT}" >> $GITHUB_ENV
- name: Checkout Repo Primus-Turbo
uses: actions/checkout@v4
with:
repository: AMD-AIG-AIMA/Primus-Turbo
submodules: "recursive"
path: Primus-Turbo
ref: ${{ env.PRIMUS_TURBO_COMMIT }}
- run: echo "Begin Primus-Turbo Install."
- name: Install Primus-Turbo
run: |
mv Primus-Turbo /tmp/
echo "Primus-Turbo dir: /tmp/Primus-Turbo"
git config --global --add safe.directory /tmp/Primus-Turbo
cd /tmp/Primus-Turbo
start_time=$(date +%s)
echo "✅ [Pip install requirements] started at: $(date)"
mkdir -p ${PRIMUS_WORKDIR}/primus-cache
MAX_JOBS=128 pip install --cache-dir=${PRIMUS_WORKDIR}/primus-cache --no-build-isolation --no-clean -r requirements.txt
end_time=$(date +%s)
elapsed=$((end_time - start_time))
echo "✅ [Pip install requirements] ended at: $(date)"
echo "⏱️ [Pip install requirements] Total elapsed time: ${elapsed} seconds"
start_time=$(date +%s)
echo "✅ [build primus-turbo] started at: $(date)"
pip3 install --no-build-isolation -e . -v
end_time=$(date +%s)
elapsed=$((end_time - start_time))
echo "✅ [build primus-turbo] ended at: $(date)"
echo "⏱️ [build primus-turbo] Total elapsed time: ${elapsed} seconds"
- run: echo "🎉 Begin Primus Benchmark."
- uses: actions/checkout@v4
with:
submodules: recursive
- name: Show Environment Info
run: |
echo "Hostname: $(hostname)"
echo "PWD: $(pwd)"
echo "HOME: $HOME"
echo "GITHUB_WORKSPACE: $GITHUB_WORKSPACE"
echo "Runner Temp Dir: $RUNNER_TEMP"
echo "Runner Tool Cache: $RUNNER_TOOL_CACHE"
- name: Install Primus
run: |
pip install -r requirements.txt
- name: Set BENCHMARK_PATH
run: |
BENCHMARK_DATE=$(date +%Y%m%d)
BENCHMARK_DATE_DIR="${BENCHMARK_ROOT_DIR}/${BENCHMARK_DATE}"
BENCHMARK_LOG_DIR="${BENCHMARK_DATE_DIR}/${GPU_NAME}"
mkdir -p "${BENCHMARK_LOG_DIR}"
echo "BENCHMARK_DATE_DIR=${BENCHMARK_DATE_DIR}" >> $GITHUB_ENV
echo "BENCHMARK_LOG_DIR=${BENCHMARK_LOG_DIR}" >> $GITHUB_ENV
# Megatron-LM Benchmarks
- name: "[1/33][Megatron] llama3.3_70B-BF16"
timeout-minutes: 60
continue-on-error: true
run: |
./runner/primus-cli direct --log_file "${BENCHMARK_LOG_DIR}/megatron/llama3.3_70B-BF16.log" \
-- train pretrain --config examples/megatron/configs/MI300X/llama3.3_70B-BF16-pretrain.yaml
- name: "[2/33][Megatron] llama3.3_70B-FP8"
timeout-minutes: 60
continue-on-error: true
run: |
./runner/primus-cli direct --log_file "${BENCHMARK_LOG_DIR}/megatron/llama3.3_70B-FP8.log" \
-- train pretrain --config examples/megatron/configs/MI300X/llama3.3_70B-FP8-pretrain.yaml
- name: "[3/33][Megatron] llama2_7B-BF16"
timeout-minutes: 60
continue-on-error: true
run: |
./runner/primus-cli direct --log_file "${BENCHMARK_LOG_DIR}/megatron/llama2_7B-BF16.log" \
-- train pretrain --config examples/megatron/configs/MI300X/llama2_7B-BF16-pretrain.yaml
- name: "[4/33][Megatron] llama2_7B-FP8"
timeout-minutes: 60
continue-on-error: true
run: |
./runner/primus-cli direct --log_file "${BENCHMARK_LOG_DIR}/megatron/llama2_7B-FP8.log" \
-- train pretrain --config examples/megatron/configs/MI300X/llama2_7B-FP8-pretrain.yaml
- name: "[5/33][Megatron] llama2_70B-BF16"
timeout-minutes: 60
continue-on-error: true
run: |
./runner/primus-cli direct --log_file "${BENCHMARK_LOG_DIR}/megatron/llama2_70B-BF16.log" \
-- train pretrain --config examples/megatron/configs/MI300X/llama2_70B-BF16-pretrain.yaml
- name: "[6/33][Megatron] llama2_70B-FP8"
timeout-minutes: 60
continue-on-error: true
run: |
./runner/primus-cli direct --log_file "${BENCHMARK_LOG_DIR}/megatron/llama2_70B-FP8.log" \
-- train pretrain --config examples/megatron/configs/MI300X/llama2_70B-FP8-pretrain.yaml
- name: "[7/33][Megatron] llama3.1_8B-BF16"
timeout-minutes: 60
continue-on-error: true
run: |
./runner/primus-cli direct --log_file "${BENCHMARK_LOG_DIR}/megatron/llama3.1_8B-BF16.log" \
-- train pretrain --config examples/megatron/configs/MI300X/llama3.1_8B-BF16-pretrain.yaml
- name: "[8/33][Megatron] llama3.1_8B-FP8"
timeout-minutes: 60
continue-on-error: true
run: |
./runner/primus-cli direct --log_file "${BENCHMARK_LOG_DIR}/megatron/llama3.1_8B-FP8.log" \
-- train pretrain --config examples/megatron/configs/MI300X/llama3.1_8B-FP8-pretrain.yaml
- name: "[9/33][Megatron] qwen2.5_7B-BF16"
timeout-minutes: 60
continue-on-error: true
run: |
./runner/primus-cli direct --log_file "${BENCHMARK_LOG_DIR}/megatron/qwen2.5_7B-BF16.log" \
-- train pretrain --config examples/megatron/configs/MI300X/qwen2.5_7B-BF16-pretrain.yaml
- name: "[10/33][Megatron] qwen2.5_7B-FP8"
timeout-minutes: 60
continue-on-error: true
run: |
./runner/primus-cli direct --log_file "${BENCHMARK_LOG_DIR}/megatron/qwen2.5_7B-FP8.log" \
-- train pretrain --config examples/megatron/configs/MI300X/qwen2.5_7B-FP8-pretrain.yaml
- name: "[11/33][Megatron] mixtral_8x7B_v0.1-BF16"
timeout-minutes: 60
continue-on-error: true
run: |
./runner/primus-cli direct --log_file "${BENCHMARK_LOG_DIR}/megatron/mixtral_8x7B_v0.1-BF16.log" \
-- train pretrain --config examples/megatron/configs/MI300X/mixtral_8x7B_v0.1-BF16-pretrain.yaml
- name: "[12/33][Megatron] mixtral_8x7B_v0.1-FP8"
timeout-minutes: 60
continue-on-error: true
run: |
./runner/primus-cli direct --log_file "${BENCHMARK_LOG_DIR}/megatron/mixtral_8x7B_v0.1-FP8.log" \
-- train pretrain --config examples/megatron/configs/MI300X/mixtral_8x7B_v0.1-FP8-pretrain.yaml
- name: "[13/33][Megatron] llama3.1_70B-BF16"
timeout-minutes: 60
continue-on-error: true
run: |
./runner/primus-cli direct --log_file "${BENCHMARK_LOG_DIR}/megatron/llama3.1_70B-BF16.log" \
-- train pretrain --config examples/megatron/configs/MI300X/llama3.1_70B-BF16-pretrain.yaml
- name: "[14/33][Megatron] llama3.1_70B-FP8"
timeout-minutes: 60
continue-on-error: true
run: |
./runner/primus-cli direct --log_file "${BENCHMARK_LOG_DIR}/megatron/llama3.1_70B-FP8.log" \
-- train pretrain --config examples/megatron/configs/MI300X/llama3.1_70B-FP8-pretrain.yaml
- name: "[15/33][Megatron] qwen2.5_72B-BF16"
timeout-minutes: 60
continue-on-error: true
run: |
./runner/primus-cli direct --log_file "${BENCHMARK_LOG_DIR}/megatron/qwen2.5_72B-BF16.log" \
-- train pretrain --config examples/megatron/configs/MI300X/qwen2.5_72B-BF16-pretrain.yaml
- name: "[16/33][Megatron] qwen2.5_72B-FP8"
timeout-minutes: 60
continue-on-error: true
run: |
./runner/primus-cli direct --log_file "${BENCHMARK_LOG_DIR}/megatron/qwen2.5_72B-FP8.log" \
-- train pretrain --config examples/megatron/configs/MI300X/qwen2.5_72B-FP8-pretrain.yaml
- name: "[17/33][Megatron] deepseek_v2_lite-BF16"
timeout-minutes: 60
continue-on-error: true
run: |
./runner/primus-cli direct --log_file "${BENCHMARK_LOG_DIR}/megatron/deepseek_v2_lite-BF16.log" \
-- train pretrain --config examples/megatron/configs/MI300X/deepseek_v2_lite-BF16-pretrain.yaml
- name: "[18/33][Megatron] deepseek_v2_lite-FP8"
timeout-minutes: 60
continue-on-error: true
run: |
./runner/primus-cli direct --log_file "${BENCHMARK_LOG_DIR}/megatron/deepseek_v2_lite-FP8.log" \
-- train pretrain --config examples/megatron/configs/MI300X/deepseek_v2_lite-FP8-pretrain.yaml
- name: "[19/33][Megatron] gpt_oss_20B-BF16"
timeout-minutes: 60
continue-on-error: true
run: |
./runner/primus-cli direct --log_file "${BENCHMARK_LOG_DIR}/megatron/gpt_oss_20B-BF16.log" \
-- train pretrain --config examples/megatron/configs/MI300X/gpt_oss_20B-BF16-pretrain.yaml
- name: "[20/33][Megatron] gpt_oss_20B-FP8"
timeout-minutes: 60
continue-on-error: true
run: |
./runner/primus-cli direct --log_file "${BENCHMARK_LOG_DIR}/megatron/gpt_oss_20B-FP8.log" \
-- train pretrain --config examples/megatron/configs/MI300X/gpt_oss_20B-FP8-pretrain.yaml
- name: Clean
run: |
rm -rf ${PRIMUS_WORKDIR}/Primus-Turbo
rm -rf ${PRIMUS_WORKDIR}/Primus
run-benchmark-torch-part2:
env:
PRIMUS_WORKDIR: /wekafs/primus-data/primus_safe_ci/torch
GPU_NAME: MI325 # Change this to your GPU model
DATA_PATH: /wekafs/primus-data
HSA_NO_SCRATCH_RECLAIM: 1
HF_TOKEN: ${{secrets.HF_TOKEN}}
needs: run-benchmark-torch-part1
runs-on: [primus-lm-bench-torch-rwskq]
steps:
- run: echo "🎉 Begin Primus-Turbo Checkout."
- name: Set commit hash to env
run: echo "PRIMUS_TURBO_COMMIT=${PRIMUS_TURBO_COMMIT}" >> $GITHUB_ENV
- name: Checkout Repo Primus-Turbo
uses: actions/checkout@v4
with:
repository: AMD-AIG-AIMA/Primus-Turbo
submodules: "recursive"
path: Primus-Turbo
ref: ${{ env.PRIMUS_TURBO_COMMIT }}
- run: echo "Begin Primus-Turbo Install."
- name: Install Primus-Turbo
run: |
mv Primus-Turbo /tmp/
echo "Primus-Turbo dir: /tmp/Primus-Turbo"
git config --global --add safe.directory /tmp/Primus-Turbo
cd /tmp/Primus-Turbo
start_time=$(date +%s)
echo "✅ [Pip install requirements] started at: $(date)"
mkdir -p ${PRIMUS_WORKDIR}/primus-cache
MAX_JOBS=128 pip install --cache-dir=${PRIMUS_WORKDIR}/primus-cache --no-build-isolation --no-clean -r requirements.txt
end_time=$(date +%s)
elapsed=$((end_time - start_time))
echo "✅ [Pip install requirements] ended at: $(date)"
echo "⏱️ [Pip install requirements] Total elapsed time: ${elapsed} seconds"
start_time=$(date +%s)
echo "✅ [build primus-turbo] started at: $(date)"
pip3 install --no-build-isolation -e . -v
end_time=$(date +%s)
elapsed=$((end_time - start_time))
echo "✅ [build primus-turbo] ended at: $(date)"
echo "⏱️ [build primus-turbo] Total elapsed time: ${elapsed} seconds"
- run: echo "🎉 Begin Primus Benchmark."
- uses: actions/checkout@v4
with:
submodules: recursive
- name: Show Environment Info
run: |
echo "Hostname: $(hostname)"
echo "PWD: $(pwd)"
echo "HOME: $HOME"
echo "GITHUB_WORKSPACE: $GITHUB_WORKSPACE"
echo "Runner Temp Dir: $RUNNER_TEMP"
echo "Runner Tool Cache: $RUNNER_TOOL_CACHE"
- name: Install Primus
run: |
pip install -r requirements.txt
- name: Set BENCHMARK_PATH
run: |
BENCHMARK_DATE=$(date +%Y%m%d)
BENCHMARK_DATE_DIR="${BENCHMARK_ROOT_DIR}/${BENCHMARK_DATE}"
BENCHMARK_LOG_DIR="${BENCHMARK_DATE_DIR}/${GPU_NAME}"
mkdir -p "${BENCHMARK_LOG_DIR}"
echo "BENCHMARK_DATE_DIR=${BENCHMARK_DATE_DIR}" >> $GITHUB_ENV
echo "BENCHMARK_LOG_DIR=${BENCHMARK_LOG_DIR}" >> $GITHUB_ENV
- name: "[21/33][Megatron] llama3_70B-BF16"
timeout-minutes: 60
continue-on-error: true
run: |
./runner/primus-cli direct --log_file "${BENCHMARK_LOG_DIR}/megatron/llama3_70B-BF16.log" \
-- train pretrain --config examples/megatron/configs/MI300X/llama3_70B-BF16-pretrain.yaml
- name: "[22/33][Megatron] llama3_70B-FP8"
timeout-minutes: 60
continue-on-error: true
run: |
./runner/primus-cli direct --log_file "${BENCHMARK_LOG_DIR}/megatron/llama3_70B-FP8.log" \
-- train pretrain --config examples/megatron/configs/MI300X/llama3_70B-FP8-pretrain.yaml
- name: "[23/33][Megatron] llama3_8B-BF16"
timeout-minutes: 60
continue-on-error: true
run: |
./runner/primus-cli direct --log_file "${BENCHMARK_LOG_DIR}/megatron/llama3_8B-BF16.log" \
-- train pretrain --config examples/megatron/configs/MI300X/llama3_8B-BF16-pretrain.yaml
- name: "[24/33][Megatron] llama3_8B-FP8"
timeout-minutes: 60
continue-on-error: true
run: |
./runner/primus-cli direct --log_file "${BENCHMARK_LOG_DIR}/megatron/llama3_8B-FP8.log" \
-- train pretrain --config examples/megatron/configs/MI300X/llama3_8B-FP8-pretrain.yaml
# TorchTitan Benchmarks
- name: "[25/33][TorchTitan] llama3.1_8B-BF16"
timeout-minutes: 60
continue-on-error: true
run: |
./runner/primus-cli direct --log_file "${BENCHMARK_LOG_DIR}/torchtitan/llama3.1_8B-BF16.log" \
-- train pretrain --config examples/torchtitan/configs/MI300X/llama3.1_8B-BF16-pretrain.yaml
- name: "[26/33][TorchTitan] llama3.1_8B-FP8"
timeout-minutes: 60
continue-on-error: true
run: |
./runner/primus-cli direct --log_file "${BENCHMARK_LOG_DIR}/torchtitan/llama3.1_8B-FP8.log" \
-- train pretrain --config examples/torchtitan/configs/MI300X/llama3.1_8B-FP8-pretrain.yaml
- name: "[27/33][TorchTitan] deepseek_v3_16b-BF16"
timeout-minutes: 60
continue-on-error: true
run: |
./runner/primus-cli direct --log_file "${BENCHMARK_LOG_DIR}/torchtitan/deepseek_v3_16b-BF16.log" \
-- train pretrain --config examples/torchtitan/configs/MI300X/deepseek_v3_16b-BF16-pretrain.yaml
- name: "[28/33][TorchTitan] deepseek_v3_16b-FP8"
timeout-minutes: 60
continue-on-error: true
run: |
./runner/primus-cli direct --log_file "${BENCHMARK_LOG_DIR}/torchtitan/deepseek_v3_16b-FP8.log" \
-- train pretrain --config examples/torchtitan/configs/MI300X/deepseek_v3_16b-FP8-pretrain.yaml
- name: "[29/33][TorchTitan] qwen3_1.7B"
timeout-minutes: 60
continue-on-error: true
run: |
./runner/primus-cli direct --log_file "${BENCHMARK_LOG_DIR}/torchtitan/qwen3_1.7B-pretrain.log" \
-- train pretrain --config examples/torchtitan/configs/MI300X/qwen3_1.7B-pretrain.yaml
- name: "[30/33][TorchTitan] llama3.1_70B-BF16"
timeout-minutes: 60
continue-on-error: true
run: |
./runner/primus-cli direct --log_file "${BENCHMARK_LOG_DIR}/torchtitan/llama3.1_70B-BF16.log" \
-- train pretrain --config examples/torchtitan/configs/MI300X/llama3.1_70B-BF16-pretrain.yaml
- name: "[31/33][TorchTitan] llama3.1_70B-FP8"
timeout-minutes: 60
continue-on-error: true
run: |
./runner/primus-cli direct --log_file "${BENCHMARK_LOG_DIR}/torchtitan/llama3.1_70B-FP8.log" \
-- train pretrain --config examples/torchtitan/configs/MI300X/llama3.1_70B-FP8-pretrain.yaml
- name: "[32/33][TorchTitan] qwen3_0.6B"
timeout-minutes: 60
continue-on-error: true
run: |
./runner/primus-cli direct --log_file "${BENCHMARK_LOG_DIR}/torchtitan/qwen3_0.6B.log" \
-- train pretrain --config examples/torchtitan/configs/MI300X/qwen3_0.6B-pretrain.yaml
- name: "[33/33][TorchTitan] qwen3_32B"
timeout-minutes: 60
continue-on-error: true
run: |
./runner/primus-cli direct --log_file "${BENCHMARK_LOG_DIR}/torchtitan/qwen3_32B.log" \
-- train pretrain --config examples/torchtitan/configs/MI300X/qwen3_32B-pretrain.yaml
- name: Generate Summary Report
run: |
echo "Generate Summary Report"
python3 tools/daily/daily_report.py --report-csv-path "${BENCHMARK_DATE_DIR}/summary.csv" --benchmark-log-dir "${BENCHMARK_LOG_DIR}"
- name: Clean
run: |
rm -rf ${PRIMUS_WORKDIR}/Primus-Turbo
rm -rf ${PRIMUS_WORKDIR}/Primus
run-benchmark-jax:
env:
PRIMUS_WORKDIR: /wekafs/primus-data/primus_safe_ci/jax
GPU_NAME: MI325 # Change this to your GPU model
DATA_PATH: /wekafs/primus-data
HF_TOKEN: ${{secrets.HF_TOKEN}}
needs: run-benchmark-torch-part2
runs-on: [primus-lm-bench-jax-hhhf5]
steps:
- run: echo "🎉 Begin Primus-Turbo Checkout."
- name: Set commit hash to env
run: echo "PRIMUS_TURBO_COMMIT=${PRIMUS_TURBO_COMMIT}" >> $GITHUB_ENV
- name: Checkout Repo Primus-Turbo
uses: actions/checkout@v4
with:
repository: AMD-AIG-AIMA/Primus-Turbo
submodules: "recursive"
path: Primus-Turbo
ref: ${{ env.PRIMUS_TURBO_COMMIT }}
- run: echo "Begin Primus-Turbo Install."
- name: Install Primus-Turbo
run: |
mv Primus-Turbo /tmp/
echo "Primus-Turbo dir: /tmp/Primus-Turbo"
git config --global --add safe.directory /tmp/Primus-Turbo
cd /tmp/Primus-Turbo
start_time=$(date +%s)
echo "✅ [Pip install requirements] started at: $(date)"
mkdir -p ${PRIMUS_WORKDIR}/primus-cache
python3 -m pip install --upgrade pip setuptools
pip3 install --cache-dir=${PRIMUS_WORKDIR}/primus-cache --pre torch torchvision --index-url https://download.pytorch.org/whl/nightly/rocm7.0
MAX_JOBS=128 pip3 install --cache-dir=${PRIMUS_WORKDIR}/primus-cache --no-build-isolation --no-clean -r requirements.txt
end_time=$(date +%s)
elapsed=$((end_time - start_time))
echo "✅ [Pip install requirements] ended at: $(date)"
echo "⏱️ [Pip install requirements] Total elapsed time: ${elapsed} seconds"
start_time=$(date +%s)
echo "✅ [build primus-turbo] started at: $(date)"
PRIMUS_TURBO_FRAMEWORK="JAX" pip3 install --no-build-isolation -e . -v
end_time=$(date +%s)
elapsed=$((end_time - start_time))
echo "✅ [build primus-turbo] ended at: $(date)"
echo "⏱️ [build primus-turbo] Total elapsed time: ${elapsed} seconds"
- run: echo "🎉 Begin Primus Benchmark."
- uses: actions/checkout@v4
with:
submodules: recursive
- name: Show Environment Info
run: |
echo "Hostname: $(hostname)"
echo "PWD: $(pwd)"
echo "HOME: $HOME"
echo "GITHUB_WORKSPACE: $GITHUB_WORKSPACE"
echo "Runner Temp Dir: $RUNNER_TEMP"
echo "Runner Tool Cache: $RUNNER_TOOL_CACHE"
- name: Install Primus
run: |
pip install -r requirements-jax.txt
- name: Set BENCHMARK_PATH
run: |
BENCHMARK_DATE=$(date +%Y%m%d)
BENCHMARK_DATE_DIR="${BENCHMARK_ROOT_DIR}/${BENCHMARK_DATE}"
BENCHMARK_LOG_DIR="${BENCHMARK_DATE_DIR}/${GPU_NAME}"
mkdir -p "${BENCHMARK_LOG_DIR}"
echo "BENCHMARK_DATE_DIR=${BENCHMARK_DATE_DIR}" >> $GITHUB_ENV
echo "BENCHMARK_LOG_DIR=${BENCHMARK_LOG_DIR}" >> $GITHUB_ENV
- name: "[1/7][MaxText] deepseek_v2_16B"
timeout-minutes: 120
continue-on-error: true
env:
IP_INTERFACE: "eth0"
NCCL_SOCKET_IFNAME: "eth0"
GLOO_SOCKET_IFNAME: "eth0"
run: |
./runner/primus-cli direct --log_file "${BENCHMARK_LOG_DIR}/maxtext/deepseek_v2_16B.log" \
-- train pretrain --config examples/maxtext/configs/MI300X/deepseek_v2_16B-pretrain.yaml
- name: "[2/7][MaxText] llama2_70B"
timeout-minutes: 120
continue-on-error: true
run: |
./runner/primus-cli direct --log_file "${BENCHMARK_LOG_DIR}/maxtext/llama2_70B.log" \
-- train pretrain --config examples/maxtext/configs/MI300X/llama2_70B-pretrain.yaml
- name: "[3/7][MaxText] llama2_7B"
timeout-minutes: 120
continue-on-error: true
run: |
./runner/primus-cli direct --log_file "${BENCHMARK_LOG_DIR}/maxtext/llama2_7B.log" \
-- train pretrain --config examples/maxtext/configs/MI300X/llama2_7B-pretrain.yaml
- name: "[4/7][MaxText] llama3.3_70B"
timeout-minutes: 120
continue-on-error: true
run: |
./runner/primus-cli direct --log_file "${BENCHMARK_LOG_DIR}/maxtext/llama3.3_70B.log" \
-- train pretrain --config examples/maxtext/configs/MI300X/llama3.3_70B-pretrain.yaml
- name: "[5/7][MaxText] llama3_70B"
timeout-minutes: 120
continue-on-error: true
run: |
./runner/primus-cli direct --log_file "${BENCHMARK_LOG_DIR}/maxtext/llama3_70B.log" \
-- train pretrain --config examples/maxtext/configs/MI300X/llama3_70B-pretrain.yaml
- name: "[6/7][MaxText] llama3_8B"
timeout-minutes: 120
continue-on-error: true
run: |
./runner/primus-cli direct --log_file "${BENCHMARK_LOG_DIR}/maxtext/llama3_8B.log" \
-- train pretrain --config examples/maxtext/configs/MI300X/llama3_8B-pretrain.yaml
- name: "[7/7][MaxText] mixtral_8x7B"
timeout-minutes: 120
continue-on-error: true
run: |
./runner/primus-cli direct --log_file "${BENCHMARK_LOG_DIR}/maxtext/mixtral_8x7B.log" \
-- train pretrain --config examples/maxtext/configs/MI300X/mixtral_8x7B-pretrain.yaml
- name: Generate Summary Report
run: |
echo "Generate Summary Report"
python3 tools/daily/daily_report.py --report-csv-path "${BENCHMARK_DATE_DIR}/summary.csv" --benchmark-log-dir "${BENCHMARK_LOG_DIR}"
- name: Clean
run: |
rm -rf ${PRIMUS_WORKDIR}/Primus-Turbo
rm -rf ${PRIMUS_WORKDIR}/Primus