From 549b9d84330c327e6791fa812a7d60c0cf63572e Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Sun, 24 May 2026 18:20:10 +0300 Subject: [PATCH] ci : update build-self-hosted.yml (#23616) --- .github/workflows/build-self-hosted.yml | 86 ++++++++++++++++--- .github/workflows/build.yml | 102 ++++++++++++----------- .github/workflows/server-self-hosted.yml | 77 +++++++++-------- ci/run.sh | 2 +- 4 files changed, 167 insertions(+), 100 deletions(-) diff --git a/.github/workflows/build-self-hosted.yml b/.github/workflows/build-self-hosted.yml index 2851c45601f..c247222eb4b 100644 --- a/.github/workflows/build-self-hosted.yml +++ b/.github/workflows/build-self-hosted.yml @@ -57,7 +57,7 @@ env: jobs: determine-tag: name: Determine tag name - runs-on: ubuntu-slim + runs-on: [self-hosted, fast] outputs: tag_name: ${{ steps.tag.outputs.name }} steps: @@ -86,7 +86,7 @@ jobs: HF_UI_VERSION: ${{ needs.determine-tag.outputs.tag_name }} run: | nvidia-smi - GG_BUILD_CUDA=1 bash ./ci/run.sh ~/results/llama.cpp /mnt/llama.cpp + GG_BUILD_CUDA=1 bash ./ci/run.sh ~/results/llama.cpp ~/mnt/llama.cpp ggml-ci-nvidia-vulkan-cm: needs: determine-tag @@ -103,7 +103,7 @@ jobs: HF_UI_VERSION: ${{ needs.determine-tag.outputs.tag_name }} run: | vulkaninfo --summary - GG_BUILD_VULKAN=1 GGML_VK_DISABLE_COOPMAT2=1 bash ./ci/run.sh ~/results/llama.cpp /mnt/llama.cpp + GG_BUILD_VULKAN=1 GGML_VK_DISABLE_COOPMAT2=1 bash ./ci/run.sh ~/results/llama.cpp ~/mnt/llama.cpp ggml-ci-nvidia-vulkan-cm2: needs: determine-tag @@ -120,10 +120,11 @@ jobs: HF_UI_VERSION: ${{ needs.determine-tag.outputs.tag_name }} run: | vulkaninfo --summary - GG_BUILD_VULKAN=1 bash ./ci/run.sh ~/results/llama.cpp /mnt/llama.cpp + GG_BUILD_VULKAN=1 bash ./ci/run.sh ~/results/llama.cpp ~/mnt/llama.cpp ggml-ci-nvidia-webgpu: - runs-on: [self-hosted, Linux, NVIDIA] + needs: determine-tag + runs-on: [self-hosted, Linux, NVIDIA, X64] steps: - name: Clone @@ -149,10 +150,11 @@ jobs: GG_BUILD_WEBGPU=1 \ GG_BUILD_WEBGPU_DAWN_PREFIX="$GITHUB_WORKSPACE/dawn" \ GG_BUILD_WEBGPU_DAWN_DIR="$GITHUB_WORKSPACE/dawn/lib64/cmake/Dawn" \ - bash ./ci/run.sh ~/results/llama.cpp /mnt/llama.cpp + bash ./ci/run.sh ~/results/llama.cpp ~/mnt/llama.cpp # TODO: provision AMX-compatible machine #ggml-ci-cpu-amx: + # needs: determine-tag # runs-on: [self-hosted, Linux, CPU, AMX] # steps: @@ -163,10 +165,11 @@ jobs: # - name: Test # id: ggml-ci # run: | - # bash ./ci/run.sh ~/results/llama.cpp /mnt/llama.cpp + # bash ./ci/run.sh ~/results/llama.cpp ~/mnt/llama.cpp # TODO: provision AMD GPU machine # ggml-ci-amd-vulkan: + # needs: determine-tag # runs-on: [self-hosted, Linux, AMD] # steps: @@ -178,10 +181,11 @@ jobs: # id: ggml-ci # run: | # vulkaninfo --summary - # GG_BUILD_VULKAN=1 bash ./ci/run.sh ~/results/llama.cpp /mnt/llama.cpp + # GG_BUILD_VULKAN=1 bash ./ci/run.sh ~/results/llama.cpp ~/mnt/llama.cpp # TODO: provision AMD GPU machine # ggml-ci-amd-rocm: + # needs: determine-tag # runs-on: [self-hosted, Linux, AMD] # steps: @@ -193,7 +197,7 @@ jobs: # id: ggml-ci # run: | # amd-smi static - # GG_BUILD_ROCM=1 GG_BUILD_AMDGPU_TARGETS="gfx1101" bash ./ci/run.sh ~/results/llama.cpp /mnt/llama.cpp + # GG_BUILD_ROCM=1 GG_BUILD_AMDGPU_TARGETS="gfx1101" bash ./ci/run.sh ~/results/llama.cpp ~/mnt/llama.cpp ggml-ci-mac-metal: needs: determine-tag @@ -337,4 +341,66 @@ jobs: HF_UI_VERSION: ${{ needs.determine-tag.outputs.tag_name }} run: | source ./openvino_toolkit/setupvars.sh - GG_BUILD_OPENVINO=1 GGML_OPENVINO_DEVICE=GPU GG_BUILD_LOW_PERF=1 bash ./ci/run.sh ./tmp/results ./tmp/mnt + GG_BUILD_OPENVINO=1 GGML_OPENVINO_DEVICE=GPU GG_BUILD_LOW_PERF=1 bash ./ci/run.sh ~/results/llama.cpp ~/mnt/llama.cpp + + ggml-ci-arm64-cpu-low-perf: + needs: determine-tag + runs-on: [self-hosted, Linux, ARM64] + + steps: + - name: Clone + id: checkout + uses: actions/checkout@v6 + + - name: Test + id: ggml-ci + run: | + LLAMA_ARG_THREADS=$(nproc) GG_BUILD_LOW_PERF=1 bash ./ci/run.sh ~/results/llama.cpp ~/mnt/llama.cpp + + ggml-ci-arm64-cpu-high-perf: + needs: determine-tag + runs-on: [self-hosted, Linux, ARM64] + + steps: + - name: Clone + id: checkout + uses: actions/checkout@v6 + + - name: Test + id: ggml-ci + run: | + LLAMA_ARG_THREADS=$(nproc) GG_BUILD_HIGH_PERF=1 GG_BUILD_NO_SVE=1 GG_BUILD_NO_BF16=1 GG_BUILD_EXTRA_TESTS_0=1 bash ./ci/run.sh ~/results/llama.cpp ~/mnt/llama.cpp + +# TODO: not sure how to detect ARM flags on DGX Spark. currently get this error during cmake: +# CMake Warning at ggml/src/ggml-cpu/CMakeLists.txt:147 (message): +# ARM -march/-mcpu not found, -mcpu=native will be used +# +# if we resolve this, we should be able to offload these jobs to the self-hosted runners +# +# ggml-ci-arm64-cpu-high-perf-sve: +# needs: determine-tag +# runs-on: [self-hosted, Linux, NVIDIA, ARM64] +# +# steps: +# - name: Clone +# id: checkout +# uses: actions/checkout@v6 +# +# - name: Test +# id: ggml-ci +# run: | +# LLAMA_ARG_THREADS=$(nproc) GG_BUILD_NO_BF16=1 GG_BUILD_EXTRA_TESTS_0=1 bash ./ci/run.sh ~/results/llama.cpp ~/mnt/llama.cpp +# +# ggml-ci-arm64-cpu-kleidiai: +# needs: determine-tag +# runs-on: [self-hosted, Linux, NVIDIA, ARM64] +# +# steps: +# - name: Clone +# id: checkout +# uses: actions/checkout@v6 +# +# - name: Test +# id: ggml-ci +# run: | +# GG_BUILD_KLEIDIAI=1 GG_BUILD_EXTRA_TESTS_0=1 bash ./ci/run.sh ~/results/llama.cpp ~/mnt/llama.cpp diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 65fa24f4468..47b377ff72b 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -931,31 +931,32 @@ jobs: run: | LLAMA_ARG_THREADS=$(nproc) GG_BUILD_LOW_PERF=1 bash ./ci/run.sh ./tmp/results ./tmp/mnt - ggml-ci-arm64-cpu-low-perf: - runs-on: ubuntu-22.04-arm - - steps: - - name: Clone - id: checkout - uses: actions/checkout@v6 - - - name: ccache - uses: ggml-org/ccache-action@v1.2.21 - with: - key: ggml-ci-arm64-cpu-low-perf - evict-old-files: 1d - save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }} - - - name: Dependencies - id: depends - run: | - sudo apt-get update - sudo apt-get install build-essential - - - name: Test - id: ggml-ci - run: | - LLAMA_ARG_THREADS=$(nproc) GG_BUILD_LOW_PERF=1 bash ./ci/run.sh ./tmp/results ./tmp/mnt +# note: moved to build-self-hosted.yml - can remove from here when everything is stable +# ggml-ci-arm64-cpu-low-perf: +# runs-on: ubuntu-22.04-arm +# +# steps: +# - name: Clone +# id: checkout +# uses: actions/checkout@v6 +# +# - name: ccache +# uses: ggml-org/ccache-action@v1.2.21 +# with: +# key: ggml-ci-arm64-cpu-low-perf +# evict-old-files: 1d +# save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }} +# +# - name: Dependencies +# id: depends +# run: | +# sudo apt-get update +# sudo apt-get install build-essential +# +# - name: Test +# id: ggml-ci +# run: | +# LLAMA_ARG_THREADS=$(nproc) GG_BUILD_LOW_PERF=1 bash ./ci/run.sh ./tmp/results ./tmp/mnt ggml-ci-x64-cpu-high-perf: runs-on: ubuntu-22.04 @@ -983,31 +984,32 @@ jobs: run: | LLAMA_ARG_THREADS=$(nproc) GG_BUILD_HIGH_PERF=1 bash ./ci/run.sh ./tmp/results ./tmp/mnt - ggml-ci-arm64-cpu-high-perf: - runs-on: ubuntu-22.04-arm - - steps: - - name: Clone - id: checkout - uses: actions/checkout@v6 - - - name: ccache - uses: ggml-org/ccache-action@v1.2.21 - with: - key: ggml-ci-arm64-cpu-high-perf - evict-old-files: 1d - save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }} - - - name: Dependencies - id: depends - run: | - sudo apt-get update - sudo apt-get install build-essential - - - name: Test - id: ggml-ci - run: | - LLAMA_ARG_THREADS=$(nproc) GG_BUILD_HIGH_PERF=1 GG_BUILD_NO_SVE=1 GG_BUILD_NO_BF16=1 GG_BUILD_EXTRA_TESTS_0=1 bash ./ci/run.sh ./tmp/results ./tmp/mnt +# note: moved to build-self-hosted.yml - can remove from here when everything is stable +# ggml-ci-arm64-cpu-high-perf: +# runs-on: ubuntu-22.04-arm +# +# steps: +# - name: Clone +# id: checkout +# uses: actions/checkout@v6 +# +# - name: ccache +# uses: ggml-org/ccache-action@v1.2.21 +# with: +# key: ggml-ci-arm64-cpu-high-perf +# evict-old-files: 1d +# save: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }} +# +# - name: Dependencies +# id: depends +# run: | +# sudo apt-get update +# sudo apt-get install build-essential +# +# - name: Test +# id: ggml-ci +# run: | +# LLAMA_ARG_THREADS=$(nproc) GG_BUILD_HIGH_PERF=1 GG_BUILD_NO_SVE=1 GG_BUILD_NO_BF16=1 GG_BUILD_EXTRA_TESTS_0=1 bash ./ci/run.sh ./tmp/results ./tmp/mnt ggml-ci-arm64-cpu-high-perf-sve: runs-on: ubuntu-22.04-arm diff --git a/.github/workflows/server-self-hosted.yml b/.github/workflows/server-self-hosted.yml index 857c72a4619..91e0653943c 100644 --- a/.github/workflows/server-self-hosted.yml +++ b/.github/workflows/server-self-hosted.yml @@ -91,45 +91,44 @@ jobs: export ${{ matrix.extra_args }} pytest -v -x -m "not slow" - # TODO: provision CUDA runner - # server-cuda: - # runs-on: [self-hosted, llama-server, Linux, NVIDIA] - # - # name: server-cuda (${{ matrix.wf_name }}) - # strategy: - # matrix: - # build_type: [Release] - # wf_name: ["GPUx1"] - # include: - # - build_type: Release - # extra_args: "LLAMA_ARG_BACKEND_SAMPLING=1" - # wf_name: "GPUx1, backend-sampling" - # fail-fast: false - # - # steps: - # - name: Clone - # id: checkout - # uses: actions/checkout@v6 - # with: - # fetch-depth: 0 - # ref: ${{ github.event.inputs.sha || github.event.pull_request.head.sha || github.sha || github.head_ref || github.ref_name }} - # - # - name: Build - # id: cmake_build - # run: | - # cmake -B build -DGGML_SCHED_NO_REALLOC=ON - # cmake --build build --config ${{ matrix.build_type }} -j $(sysctl -n hw.logicalcpu) --target llama-server - # - # - name: Tests - # id: server_integration_tests - # if: ${{ (!matrix.disabled_on_pr || !github.event.pull_request) }} - # run: | - # cd tools/server/tests - # python3 -m venv venv - # source venv/bin/activate - # pip install -r requirements.txt - # export ${{ matrix.extra_args }} - # pytest -v -x -m "not slow" + server-cuda: + runs-on: [self-hosted, llama-server, Linux, NVIDIA] + + name: server-cuda (${{ matrix.wf_name }}) + strategy: + matrix: + build_type: [Release] + wf_name: ["GPUx1"] + include: + - build_type: Release + extra_args: "LLAMA_ARG_BACKEND_SAMPLING=1" + wf_name: "GPUx1, backend-sampling" + fail-fast: false + + steps: + - name: Clone + id: checkout + uses: actions/checkout@v6 + with: + fetch-depth: 0 + ref: ${{ github.event.inputs.sha || github.event.pull_request.head.sha || github.sha || github.head_ref || github.ref_name }} + + - name: Build + id: cmake_build + run: | + cmake -B build -DGGML_CUDA=ON -DGGML_SCHED_NO_REALLOC=ON + cmake --build build --config ${{ matrix.build_type }} -j $(nproc) --target llama-server + + - name: Tests + id: server_integration_tests + if: ${{ (!matrix.disabled_on_pr || !github.event.pull_request) }} + run: | + cd tools/server/tests + python3 -m venv venv + source venv/bin/activate + pip install -r requirements.txt + export ${{ matrix.extra_args }} + pytest -v -x -m "not slow" server-kleidiai: runs-on: ah-ubuntu_22_04-c8g_8x diff --git a/ci/run.sh b/ci/run.sh index b096dc23b66..4acf4375267 100755 --- a/ci/run.sh +++ b/ci/run.sh @@ -238,7 +238,7 @@ function gg_run_ctest_debug { (cmake -G "${CMAKE_GENERATOR}" -DCMAKE_BUILD_TYPE=Debug ${CMAKE_EXTRA} .. ) 2>&1 | tee -a $OUT/${ci}-cmake.log (time cmake --build . --config Debug -j$(nproc)) 2>&1 | tee -a $OUT/${ci}-make.log - (time ctest -C Debug --output-on-failure -L main -E "test-opt|test-backend-ops" ${CTEST_EXTRA}) 2>&1 | tee -a $OUT/${ci}-ctest.log + (time ctest -C Debug --output-on-failure -L main -E "test-opt|test-backend-ops|test-llama-archs" ${CTEST_EXTRA}) 2>&1 | tee -a $OUT/${ci}-ctest.log set +e }