1
- From 453e52a763043e95b23c88176792e065377189ad Mon Sep 17 00:00:00 2001
2
- From: Charles Xu <chaxu01@e125126. arm.com>
3
- Date: Tue, 9 Jul 2024 08:49:27 +0200
4
- Subject: [PATCH] Updated to be able to build on Linux
1
+ From 617486784d5394fbb54f4d99a4860a050318a4e8 Mon Sep 17 00:00:00 2001
2
+ From: Gian Marco Iodice <gianmarco.iodice@ arm.com>
3
+ Date: Tue, 16 Jul 2024 17:28:50 +0100
4
+ Subject: [PATCH] Use KleidiAI Int4 Matmul micro-kernels in llama.cpp
5
5
6
+ - Update CMake file to fetch the Int4 micro-kernels from the KleidiAI
7
+ repository
8
+ - Implement a KleidiAI backend for llama.cpp
9
+
10
+ Signed-off-by: Gian Marco Iodice <
[email protected] >
6
11
---
7
12
CMakeLists.txt | 48 ++++
8
13
ggml-alloc.c | 13 ++
@@ -15,7 +20,7 @@ Subject: [PATCH] Updated to be able to build on Linux
15
20
create mode 100644 ggml-kleidiai.h
16
21
17
22
diff --git a/CMakeLists.txt b/CMakeLists.txt
18
- index 08481334..99382573 100644
23
+ index 08481334..22504ad2 100644
19
24
--- a/CMakeLists.txt
20
25
+++ b/CMakeLists.txt
21
26
@@ -548,6 +548,53 @@ if (LLAMA_VULKAN)
@@ -26,9 +31,9 @@ index 08481334..99382573 100644
26
31
+
27
32
+ # Fetch KleidiAI sources:
28
33
+ include(FetchContent)
29
- + set(KLEIDIAI_COMMIT_SHA "d6c3b987e445e5e1daeda94e3c2888efaa07ca50 ")
34
+ + set(KLEIDIAI_COMMIT_SHA "187d9aacddfb678c09f0831b18f87401b1b353c3 ")
30
35
+ set(KLEIDIAI_DOWNLOAD_URL "https://gitlab.arm.com/kleidi/kleidiai/-/archive/${KLEIDIAI_COMMIT_SHA}/kleidiai-${KLEIDIAI_COMMIT_SHA}.tar.gz")
31
- + set(KLEIDIAI_ARCHIVE_MD5 "8e94e73bfa00ea038fd6e3d13f59080f ")
36
+ + set(KLEIDIAI_ARCHIVE_MD5 "4a1eee013cb20464b534cb01212d19c9 ")
32
37
+
33
38
+ if (POLICY CMP0135)
34
39
+ cmake_policy(SET CMP0135 NEW)
@@ -113,7 +118,7 @@ index bd367c42..ed4ce0ae 100644
113
118
if (this_size > max_size) {
114
119
diff --git a/ggml-kleidiai.cpp b/ggml-kleidiai.cpp
115
120
new file mode 100644
116
- index 00000000..aa53086d
121
+ index 00000000..6800f63e
117
122
--- /dev/null
118
123
+++ b/ggml-kleidiai.cpp
119
124
@@ -0,0 +1,560 @@
@@ -171,8 +176,8 @@ index 00000000..aa53086d
171
176
+ static bool g_kai_loaded = false;
172
177
+
173
178
+ // Basic backend memory allocator
174
- + static uint8_t* extra_mem [MAX_EXTRA_BUFFERS];
175
- + static int32_t extra_mem_idx = 0;
179
+ + static uint8_t* g_extra_mem [MAX_EXTRA_BUFFERS];
180
+ + static int32_t g_extra_mem_idx = 0;
176
181
+
177
182
+ typedef void (*kai_matmul_func_t)(const struct ggml_compute_params * params, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst);
178
183
+
@@ -546,7 +551,7 @@ index 00000000..aa53086d
546
551
+ free(reshaped_data);
547
552
+ cur->extra = cur->data;
548
553
+ #else
549
- + extra_mem[extra_mem_idx ++] = reshaped_data;
554
+ + g_extra_mem[g_extra_mem_idx ++] = reshaped_data;
550
555
+ cur->extra = reshaped_data;
551
556
+ #endif
552
557
+ } else {
@@ -671,10 +676,10 @@ index 00000000..aa53086d
671
676
+ }
672
677
+
673
678
+ GGML_CALL void ggml_kai_free_extra_mem(void) {
674
- + for(int32_t i = extra_mem_idx - 1; i >= 0; i--) {
675
- + free(extra_mem [i]);
679
+ + for(int32_t i = g_extra_mem_idx - 1; i >= 0; i--) {
680
+ + free(g_extra_mem [i]);
676
681
+ }
677
- + extra_mem_idx = 0;
682
+ + g_extra_mem_idx = 0;
678
683
+ }
679
684
+ #endif // defined(__aarch64__) && (defined(__ANDROID__) || defined(__linux__))
680
685
diff --git a/ggml-kleidiai.h b/ggml-kleidiai.h
@@ -840,5 +845,5 @@ index 05591aa4..735dde04 100644
840
845
}
841
846
842
847
- -
843
- 2.34 .1
848
+ 2.25 .1
844
849
0 commit comments