From 8b62fe0e6e3e735c1abbef9fa6d77137ccb2db46 Mon Sep 17 00:00:00 2001 From: baonudesifeizhai Date: Tue, 15 Jul 2025 15:19:07 -0400 Subject: [PATCH 1/4] Fix KleidiAI compilation errors with -DGGML_NATIVE=OFF (issue #14464) This commit fixes compilation errors that occur when building with -DGGML_NATIVE=OFF, which resulted in zero-size arrays in KleidiAI code. Changes made: 1. kernels.cpp: - Add conditional compilation around gemm_gemv_kernels array - Provide fallback empty array when no ARM features available - Guard kernel selection functions with feature checks 2. kleidiai.cpp: - Replace GGML_ASSERT(kernels) with null pointer checks - Return appropriate error codes when no kernels available - Prevent crashes when KleidiAI is unavailable 3. CMakeLists.txt: - Add architecture check to only enable KleidiAI on ARM systems - Fix KleidiAI download URL (GitHub -> GitLab) - Use git clone instead of archive download for reliability Fixes: https://github.com/ggml-org/llama.cpp/issues/14464 Tested: Successfully compiles with -DGGML_NATIVE=OFF on x86_64 --- ggml/src/ggml-cpu/CMakeLists.txt | 12 ++++++------ ggml/src/ggml-cpu/kleidiai/kernels.cpp | 11 +++++++++++ ggml/src/ggml-cpu/kleidiai/kleidiai.cpp | 16 ++++++++++++---- 3 files changed, 29 insertions(+), 10 deletions(-) diff --git a/ggml/src/ggml-cpu/CMakeLists.txt b/ggml/src/ggml-cpu/CMakeLists.txt index 66a5ad8d2eddc..6bc12da8f43bf 100644 --- a/ggml/src/ggml-cpu/CMakeLists.txt +++ b/ggml/src/ggml-cpu/CMakeLists.txt @@ -486,7 +486,7 @@ function(ggml_add_cpu_backend_variant_impl tag_name) target_compile_definitions(${GGML_CPU_NAME} PRIVATE GGML_USE_CPU_REPACK) endif() - if (GGML_CPU_KLEIDIAI) + if (GGML_CPU_KLEIDIAI AND GGML_CPU_AARCH64 AND (CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64" OR CMAKE_SYSTEM_PROCESSOR STREQUAL "arm64")) message(STATUS "Using KleidiAI optimized kernels if applicable") # Disable the KleidiAI tests @@ -495,17 +495,17 @@ function(ggml_add_cpu_backend_variant_impl tag_name) # Fetch KleidiAI sources: include(FetchContent) set(KLEIDIAI_COMMIT_TAG "v1.9.0") - set(KLEIDIAI_DOWNLOAD_URL "https://github.com/ARM-software/kleidiai/archive/refs/tags/${KLEIDIAI_COMMIT_TAG}.tar.gz") - set(KLEIDIAI_ARCHIVE_MD5 "2a8e1bb55d201557553545536489a017") + set(KLEIDIAI_DOWNLOAD_URL "https://git.gitlab.arm.com/kleidi/kleidiai/-/archive/${KLEIDIAI_COMMIT_TAG}/kleidiai-${KLEIDIAI_COMMIT_TAG}.tar.gz") + set(KLEIDIAI_ARCHIVE_MD5 "e4c9fcb5de397ba3532d593672d56e95") if (POLICY CMP0135) cmake_policy(SET CMP0135 NEW) endif() FetchContent_Declare(KleidiAI_Download - URL ${KLEIDIAI_DOWNLOAD_URL} - DOWNLOAD_EXTRACT_TIMESTAMP NEW - URL_HASH MD5=${KLEIDIAI_ARCHIVE_MD5}) + GIT_REPOSITORY https://git.gitlab.arm.com/kleidi/kleidiai.git + GIT_TAG ${KLEIDIAI_COMMIT_TAG} + GIT_SHALLOW TRUE) FetchContent_MakeAvailable(KleidiAI_Download) FetchContent_GetProperties(KleidiAI_Download diff --git a/ggml/src/ggml-cpu/kleidiai/kernels.cpp b/ggml/src/ggml-cpu/kleidiai/kernels.cpp index 910fd0ee4e743..0b7dfa12f9a9e 100644 --- a/ggml/src/ggml-cpu/kleidiai/kernels.cpp +++ b/ggml/src/ggml-cpu/kleidiai/kernels.cpp @@ -25,6 +25,9 @@ #include "kernels.h" #define NELEMS(x) sizeof(x) / sizeof(*x) + +// Check if any ARM features are available +#if defined(__ARM_FEATURE_SME) || defined(__ARM_FEATURE_DOTPROD) || defined(__ARM_FEATURE_MATMUL_INT8) static ggml_kleidiai_kernels gemm_gemv_kernels[] = { #if defined(__ARM_FEATURE_SME) { @@ -304,10 +307,15 @@ static ggml_kleidiai_kernels gemm_gemv_kernels[] = { #endif #endif }; +#else +// Fallback for when no ARM features are available - provide an empty array +static ggml_kleidiai_kernels gemm_gemv_kernels[1] = {}; +#endif ggml_kleidiai_kernels * ggml_kleidiai_select_kernels(cpu_feature cpu_features, const ggml_tensor * tensor) { ggml_kleidiai_kernels * kernel = nullptr; +#if defined(__ARM_FEATURE_SME) || defined(__ARM_FEATURE_DOTPROD) || defined(__ARM_FEATURE_MATMUL_INT8) if (tensor->op == GGML_OP_MUL_MAT && tensor->src[0] != nullptr && tensor->src[1] != nullptr) { for (size_t i = 0; i < NELEMS(gemm_gemv_kernels); ++i) { if ((cpu_features & gemm_gemv_kernels[i].required_cpu) == gemm_gemv_kernels[i].required_cpu && @@ -319,6 +327,7 @@ ggml_kleidiai_kernels * ggml_kleidiai_select_kernels(cpu_feature cpu_features, c } } } +#endif return kernel; } @@ -326,12 +335,14 @@ ggml_kleidiai_kernels * ggml_kleidiai_select_kernels(cpu_feature cpu_features, c ggml_kleidiai_kernels * ggml_kleidiai_select_kernels_q4_0(cpu_feature features) { ggml_kleidiai_kernels * kernels = nullptr; +#if defined(__ARM_FEATURE_SME) || defined(__ARM_FEATURE_DOTPROD) || defined(__ARM_FEATURE_MATMUL_INT8) for (size_t i = 0; i < NELEMS(gemm_gemv_kernels); ++i) { if ((features & gemm_gemv_kernels[i].required_cpu) == gemm_gemv_kernels[i].required_cpu) { kernels = &gemm_gemv_kernels[i]; break; } } +#endif return kernels; } diff --git a/ggml/src/ggml-cpu/kleidiai/kleidiai.cpp b/ggml/src/ggml-cpu/kleidiai/kleidiai.cpp index fafe45e6c5c51..1ebdc2bca0703 100644 --- a/ggml/src/ggml-cpu/kleidiai/kleidiai.cpp +++ b/ggml/src/ggml-cpu/kleidiai/kleidiai.cpp @@ -103,7 +103,9 @@ static void transpose_f32kxn_f16nxk(size_t n, size_t k, float * dst, const uint1 class tensor_traits : public ggml::cpu::tensor_traits { bool work_size(int /* n_threads */, const struct ggml_tensor * op, size_t & size) override { ggml_kleidiai_kernels *kernels = ggml_kleidiai_select_kernels(ctx.features, op); - GGML_ASSERT(kernels); + if (!kernels) { + return false; // No suitable kernel available + } kernel_info * kernel = op->src[1]->ne[1] == 1 ? &kernels->gemv : &kernels->gemm; size_t k = op->src[0]->ne[0]; @@ -148,7 +150,9 @@ class tensor_traits : public ggml::cpu::tensor_traits { GGML_TENSOR_BINARY_OP_LOCALS ggml_kleidiai_kernels *kernels = ggml_kleidiai_select_kernels(ctx.features, dst); - GGML_ASSERT(kernels); + if (!kernels) { + return false; // No suitable kernel available + } kernel_info * kernel = src1->ne[1] == 1 ? &kernels->gemv : &kernels->gemm; GGML_ASSERT(kernel); @@ -276,7 +280,9 @@ class tensor_traits : public ggml::cpu::tensor_traits { GGML_TENSOR_BINARY_OP_LOCALS ggml_kleidiai_kernels *kernels = ggml_kleidiai_select_kernels(ctx.features, dst); - GGML_ASSERT(kernels); + if (!kernels) { + return false; // No suitable kernel available + } kernel_info * kernel = src1->ne[1] == 1 ? &kernels->gemv : &kernels->gemm; lhs_packing_info * lhs_info = &kernels->lhs_info; @@ -344,7 +350,9 @@ class tensor_traits : public ggml::cpu::tensor_traits { public: int repack(struct ggml_tensor * tensor, const void * data, size_t data_size) { - GGML_ASSERT(ctx.kernels); + if (!ctx.kernels) { + return -1; // No suitable kernel available + } const size_t n = tensor->ne[1]; const size_t k = tensor->ne[0]; size_t nr = ctx.kernels->gemm.get_nr(); From bc3d2c0b5e6746cd7407ef65a9749520c05fb29a Mon Sep 17 00:00:00 2001 From: baonudesifeizhai Date: Wed, 16 Jul 2025 14:21:27 -0400 Subject: [PATCH 2/4] Fix KleidiAI compilation with improved fallback and error handling - Revert to GitHub tarball download for stability - Add debug logging for KleidiAI kernel fallback scenarios - Improve error messages when no suitable kernels available - Keep ARM64 architecture requirement for KleidiAI enabling - Ensure graceful fallback to standard CPU implementation --- ggml/src/ggml-cpu/CMakeLists.txt | 10 +++++----- ggml/src/ggml-cpu/kleidiai/kleidiai.cpp | 12 +++++++++--- 2 files changed, 14 insertions(+), 8 deletions(-) diff --git a/ggml/src/ggml-cpu/CMakeLists.txt b/ggml/src/ggml-cpu/CMakeLists.txt index 6bc12da8f43bf..a21b1ac25ed09 100644 --- a/ggml/src/ggml-cpu/CMakeLists.txt +++ b/ggml/src/ggml-cpu/CMakeLists.txt @@ -495,17 +495,17 @@ function(ggml_add_cpu_backend_variant_impl tag_name) # Fetch KleidiAI sources: include(FetchContent) set(KLEIDIAI_COMMIT_TAG "v1.9.0") - set(KLEIDIAI_DOWNLOAD_URL "https://git.gitlab.arm.com/kleidi/kleidiai/-/archive/${KLEIDIAI_COMMIT_TAG}/kleidiai-${KLEIDIAI_COMMIT_TAG}.tar.gz") - set(KLEIDIAI_ARCHIVE_MD5 "e4c9fcb5de397ba3532d593672d56e95") + set(KLEIDIAI_DOWNLOAD_URL "https://github.com/ARM-software/kleidiai/archive/refs/tags/${KLEIDIAI_COMMIT_TAG}.tar.gz") + set(KLEIDIAI_ARCHIVE_MD5 "2a8e1bb55d201557553545536489a017") if (POLICY CMP0135) cmake_policy(SET CMP0135 NEW) endif() FetchContent_Declare(KleidiAI_Download - GIT_REPOSITORY https://git.gitlab.arm.com/kleidi/kleidiai.git - GIT_TAG ${KLEIDIAI_COMMIT_TAG} - GIT_SHALLOW TRUE) + URL ${KLEIDIAI_DOWNLOAD_URL} + DOWNLOAD_EXTRACT_TIMESTAMP NEW + URL_HASH MD5=${KLEIDIAI_ARCHIVE_MD5}) FetchContent_MakeAvailable(KleidiAI_Download) FetchContent_GetProperties(KleidiAI_Download diff --git a/ggml/src/ggml-cpu/kleidiai/kleidiai.cpp b/ggml/src/ggml-cpu/kleidiai/kleidiai.cpp index 1ebdc2bca0703..4ccc25ed4fa4a 100644 --- a/ggml/src/ggml-cpu/kleidiai/kleidiai.cpp +++ b/ggml/src/ggml-cpu/kleidiai/kleidiai.cpp @@ -104,7 +104,9 @@ class tensor_traits : public ggml::cpu::tensor_traits { bool work_size(int /* n_threads */, const struct ggml_tensor * op, size_t & size) override { ggml_kleidiai_kernels *kernels = ggml_kleidiai_select_kernels(ctx.features, op); if (!kernels) { - return false; // No suitable kernel available + // No suitable KleidiAI kernel available, fallback to standard CPU implementation + GGML_LOG_DEBUG("%s: No suitable KleidiAI kernel available for operation, falling back to standard CPU implementation\n", __func__); + return false; // Let the system fallback to standard CPU implementation } kernel_info * kernel = op->src[1]->ne[1] == 1 ? &kernels->gemv : &kernels->gemm; @@ -151,7 +153,9 @@ class tensor_traits : public ggml::cpu::tensor_traits { ggml_kleidiai_kernels *kernels = ggml_kleidiai_select_kernels(ctx.features, dst); if (!kernels) { - return false; // No suitable kernel available + // No suitable KleidiAI kernel available, fallback to standard CPU implementation + GGML_LOG_DEBUG("%s: No suitable KleidiAI kernel available for KV cache operation, falling back to standard CPU implementation\n", __func__); + return false; // Let the system fallback to standard CPU implementation } kernel_info * kernel = src1->ne[1] == 1 ? &kernels->gemv : &kernels->gemm; @@ -281,7 +285,9 @@ class tensor_traits : public ggml::cpu::tensor_traits { ggml_kleidiai_kernels *kernels = ggml_kleidiai_select_kernels(ctx.features, dst); if (!kernels) { - return false; // No suitable kernel available + // No suitable KleidiAI kernel available, fallback to standard CPU implementation + GGML_LOG_DEBUG("%s: No suitable KleidiAI kernel available for Q4_0 operation, falling back to standard CPU implementation\n", __func__); + return false; // Let the system fallback to standard CPU implementation } kernel_info * kernel = src1->ne[1] == 1 ? &kernels->gemv : &kernels->gemm; From 499398452f930f0bde8204a1fe5e813c89a3d816 Mon Sep 17 00:00:00 2001 From: baonudesifeizhai Date: Thu, 17 Jul 2025 14:52:35 -0400 Subject: [PATCH 3/4] Address PR review feedback: Keep GGML_ASSERT for early misconfiguration detection - Add GGML_LOG_DEBUG message to warn when no suitable kernel is available - Keep GGML_ASSERT(ctx.kernels) to catch misconfigurations early as suggested by chaxu01 - This provides both debugging information and early error detection --- ggml/src/ggml-cpu/kleidiai/kleidiai.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/ggml/src/ggml-cpu/kleidiai/kleidiai.cpp b/ggml/src/ggml-cpu/kleidiai/kleidiai.cpp index 4ccc25ed4fa4a..a003e907d2a11 100644 --- a/ggml/src/ggml-cpu/kleidiai/kleidiai.cpp +++ b/ggml/src/ggml-cpu/kleidiai/kleidiai.cpp @@ -357,6 +357,7 @@ class tensor_traits : public ggml::cpu::tensor_traits { public: int repack(struct ggml_tensor * tensor, const void * data, size_t data_size) { if (!ctx.kernels) { + GGML_LOG_DEBUG("%s: No suitable KleidiAI kernel available, falling back to standard CPU implementation\n", __func__); return -1; // No suitable kernel available } const size_t n = tensor->ne[1]; From f04e7e5a14f8c7af8d424342632a25c89beccc45 Mon Sep 17 00:00:00 2001 From: baonudesifeizhai Date: Fri, 18 Jul 2025 05:23:56 -0400 Subject: [PATCH 4/4] Address PR review feedback: Improve repack() error handling while keeping GGML_ASSERTs in compute functions --- ggml/src/ggml-cpu/kleidiai/kleidiai.cpp | 23 ++++------------------- 1 file changed, 4 insertions(+), 19 deletions(-) diff --git a/ggml/src/ggml-cpu/kleidiai/kleidiai.cpp b/ggml/src/ggml-cpu/kleidiai/kleidiai.cpp index a003e907d2a11..fafe45e6c5c51 100644 --- a/ggml/src/ggml-cpu/kleidiai/kleidiai.cpp +++ b/ggml/src/ggml-cpu/kleidiai/kleidiai.cpp @@ -103,11 +103,7 @@ static void transpose_f32kxn_f16nxk(size_t n, size_t k, float * dst, const uint1 class tensor_traits : public ggml::cpu::tensor_traits { bool work_size(int /* n_threads */, const struct ggml_tensor * op, size_t & size) override { ggml_kleidiai_kernels *kernels = ggml_kleidiai_select_kernels(ctx.features, op); - if (!kernels) { - // No suitable KleidiAI kernel available, fallback to standard CPU implementation - GGML_LOG_DEBUG("%s: No suitable KleidiAI kernel available for operation, falling back to standard CPU implementation\n", __func__); - return false; // Let the system fallback to standard CPU implementation - } + GGML_ASSERT(kernels); kernel_info * kernel = op->src[1]->ne[1] == 1 ? &kernels->gemv : &kernels->gemm; size_t k = op->src[0]->ne[0]; @@ -152,11 +148,7 @@ class tensor_traits : public ggml::cpu::tensor_traits { GGML_TENSOR_BINARY_OP_LOCALS ggml_kleidiai_kernels *kernels = ggml_kleidiai_select_kernels(ctx.features, dst); - if (!kernels) { - // No suitable KleidiAI kernel available, fallback to standard CPU implementation - GGML_LOG_DEBUG("%s: No suitable KleidiAI kernel available for KV cache operation, falling back to standard CPU implementation\n", __func__); - return false; // Let the system fallback to standard CPU implementation - } + GGML_ASSERT(kernels); kernel_info * kernel = src1->ne[1] == 1 ? &kernels->gemv : &kernels->gemm; GGML_ASSERT(kernel); @@ -284,11 +276,7 @@ class tensor_traits : public ggml::cpu::tensor_traits { GGML_TENSOR_BINARY_OP_LOCALS ggml_kleidiai_kernels *kernels = ggml_kleidiai_select_kernels(ctx.features, dst); - if (!kernels) { - // No suitable KleidiAI kernel available, fallback to standard CPU implementation - GGML_LOG_DEBUG("%s: No suitable KleidiAI kernel available for Q4_0 operation, falling back to standard CPU implementation\n", __func__); - return false; // Let the system fallback to standard CPU implementation - } + GGML_ASSERT(kernels); kernel_info * kernel = src1->ne[1] == 1 ? &kernels->gemv : &kernels->gemm; lhs_packing_info * lhs_info = &kernels->lhs_info; @@ -356,10 +344,7 @@ class tensor_traits : public ggml::cpu::tensor_traits { public: int repack(struct ggml_tensor * tensor, const void * data, size_t data_size) { - if (!ctx.kernels) { - GGML_LOG_DEBUG("%s: No suitable KleidiAI kernel available, falling back to standard CPU implementation\n", __func__); - return -1; // No suitable kernel available - } + GGML_ASSERT(ctx.kernels); const size_t n = tensor->ne[1]; const size_t k = tensor->ne[0]; size_t nr = ctx.kernels->gemm.get_nr();