3
3
Date: Fri, 14 Jun 2024 14:55:47 +0100
4
4
Subject: [PATCH] Use KleidiAI Int4 Matmul micro-kernels in llama.cpp
5
5
6
- - Update CMake file to fetch the Int4 micro-kernels from the KleidiAI
6
+ * Update CMake file to fetch the Int4 micro-kernels from the KleidiAI
7
7
repository
8
- - Implement a KleidiAI backend for llama.cpp
8
+ * Implement a KleidiAI backend for llama.cpp
9
9
10
10
Signed-off-by: Gian Marco Iodice <
[email protected] >
11
11
---
20
20
create mode 100644 ggml-kleidiai.h
21
21
22
22
diff --git a/CMakeLists.txt b/CMakeLists.txt
23
- index 08481334..5c0458e9 100644
23
+ index 08481334..99382573 100644
24
24
--- a/CMakeLists.txt
25
25
+++ b/CMakeLists.txt
26
26
@@ -548,6 +548,53 @@ if (LLAMA_VULKAN)
@@ -31,9 +31,9 @@ index 08481334..5c0458e9 100644
31
31
+
32
32
+ # Fetch KleidiAI sources:
33
33
+ include(FetchContent)
34
- + set(KLEIDIAI_COMMIT_SHA "b0911c80b35e41dc9c22075a63e83c217fd0a106 ")
34
+ + set(KLEIDIAI_COMMIT_SHA "d6c3b987e445e5e1daeda94e3c2888efaa07ca50 ")
35
35
+ set(KLEIDIAI_DOWNLOAD_URL "https://gitlab.arm.com/kleidi/kleidiai/-/archive/${KLEIDIAI_COMMIT_SHA}/kleidiai-${KLEIDIAI_COMMIT_SHA}.tar.gz")
36
- + set(KLEIDIAI_ARCHIVE_MD5 "8b54226586eb18957c374a6d1434f4f2 ")
36
+ + set(KLEIDIAI_ARCHIVE_MD5 "8e94e73bfa00ea038fd6e3d13f59080f ")
37
37
+
38
38
+ if (POLICY CMP0135)
39
39
+ cmake_policy(SET CMP0135 NEW)
@@ -86,7 +86,7 @@ index 08481334..5c0458e9 100644
86
86
87
87
target_include_directories(ggml PUBLIC . ${LLAMA_EXTRA_INCLUDES})
88
88
diff --git a/ggml-alloc.c b/ggml-alloc.c
89
- index bd367c42..ac099392 100644
89
+ index bd367c42..ed4ce0ae 100644
90
90
--- a/ggml-alloc.c
91
91
+++ b/ggml-alloc.c
92
92
@@ -9,6 +9,10 @@
@@ -105,12 +105,12 @@ index bd367c42..ac099392 100644
105
105
if (t->data == NULL && t->view_src == NULL) {
106
106
this_size = GGML_PAD(ggml_backend_buft_get_alloc_size(buft, t), alignment);
107
107
+ #if defined(GGML_USE_KLEIDIAI)
108
- + // Temporary solution to allocate more memore if needed for packing the weights.
108
+ + // Temporary solution to allocate more memory if needed for packing the weights.
109
109
+ // This method is not sufficient as we assume that the weights are for matmul only.
110
110
+ // However, weights could belong to other operations
111
- + const int64_t iai_diff = (ggml_kai_get_const_workspace_size_matmul(t) - this_size);
112
- + if (iai_diff > 0) {
113
- + this_size += iai_diff ;
111
+ + const int64_t kai_diff = (ggml_kai_get_const_workspace_size_matmul(t) - this_size);
112
+ + if (kai_diff > 0) {
113
+ + this_size += kai_diff ;
114
114
+ }
115
115
+ #endif
116
116
}
0 commit comments