Skip to content

Commit beee073

Browse files
Merge pull request #139 from gmiodice/fix_oob
Update KleidiAI example for llama.cpp
2 parents 8799390 + 1ee4c88 commit beee073

File tree

2 files changed

+22
-17
lines changed

2 files changed

+22
-17
lines changed

kleidiai-examples/llama_cpp/0001-Use-KleidiAI-Int4-Matmul-micro-kernels-in-llama.cpp.patch

Lines changed: 20 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,13 @@
1-
From 453e52a763043e95b23c88176792e065377189ad Mon Sep 17 00:00:00 2001
2-
From: Charles Xu <chaxu01@e125126.arm.com>
3-
Date: Tue, 9 Jul 2024 08:49:27 +0200
4-
Subject: [PATCH] Updated to be able to build on Linux
1+
From 617486784d5394fbb54f4d99a4860a050318a4e8 Mon Sep 17 00:00:00 2001
2+
From: Gian Marco Iodice <gianmarco.iodice@arm.com>
3+
Date: Tue, 16 Jul 2024 17:28:50 +0100
4+
Subject: [PATCH] Use KleidiAI Int4 Matmul micro-kernels in llama.cpp
55

6+
- Update CMake file to fetch the Int4 micro-kernels from the KleidiAI
7+
repository
8+
- Implement a KleidiAI backend for llama.cpp
9+
10+
Signed-off-by: Gian Marco Iodice <[email protected]>
611
---
712
CMakeLists.txt | 48 ++++
813
ggml-alloc.c | 13 ++
@@ -15,7 +20,7 @@ Subject: [PATCH] Updated to be able to build on Linux
1520
create mode 100644 ggml-kleidiai.h
1621

1722
diff --git a/CMakeLists.txt b/CMakeLists.txt
18-
index 08481334..99382573 100644
23+
index 08481334..22504ad2 100644
1924
--- a/CMakeLists.txt
2025
+++ b/CMakeLists.txt
2126
@@ -548,6 +548,53 @@ if (LLAMA_VULKAN)
@@ -26,9 +31,9 @@ index 08481334..99382573 100644
2631
+
2732
+ # Fetch KleidiAI sources:
2833
+ include(FetchContent)
29-
+ set(KLEIDIAI_COMMIT_SHA "d6c3b987e445e5e1daeda94e3c2888efaa07ca50")
34+
+ set(KLEIDIAI_COMMIT_SHA "187d9aacddfb678c09f0831b18f87401b1b353c3")
3035
+ set(KLEIDIAI_DOWNLOAD_URL "https://gitlab.arm.com/kleidi/kleidiai/-/archive/${KLEIDIAI_COMMIT_SHA}/kleidiai-${KLEIDIAI_COMMIT_SHA}.tar.gz")
31-
+ set(KLEIDIAI_ARCHIVE_MD5 "8e94e73bfa00ea038fd6e3d13f59080f")
36+
+ set(KLEIDIAI_ARCHIVE_MD5 "4a1eee013cb20464b534cb01212d19c9")
3237
+
3338
+ if (POLICY CMP0135)
3439
+ cmake_policy(SET CMP0135 NEW)
@@ -113,7 +118,7 @@ index bd367c42..ed4ce0ae 100644
113118
if (this_size > max_size) {
114119
diff --git a/ggml-kleidiai.cpp b/ggml-kleidiai.cpp
115120
new file mode 100644
116-
index 00000000..aa53086d
121+
index 00000000..6800f63e
117122
--- /dev/null
118123
+++ b/ggml-kleidiai.cpp
119124
@@ -0,0 +1,560 @@
@@ -171,8 +176,8 @@ index 00000000..aa53086d
171176
+static bool g_kai_loaded = false;
172177
+
173178
+// Basic backend memory allocator
174-
+static uint8_t* extra_mem[MAX_EXTRA_BUFFERS];
175-
+static int32_t extra_mem_idx = 0;
179+
+static uint8_t* g_extra_mem[MAX_EXTRA_BUFFERS];
180+
+static int32_t g_extra_mem_idx = 0;
176181
+
177182
+typedef void (*kai_matmul_func_t)(const struct ggml_compute_params * params, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst);
178183
+
@@ -546,7 +551,7 @@ index 00000000..aa53086d
546551
+ free(reshaped_data);
547552
+ cur->extra = cur->data;
548553
+#else
549-
+ extra_mem[extra_mem_idx++] = reshaped_data;
554+
+ g_extra_mem[g_extra_mem_idx++] = reshaped_data;
550555
+ cur->extra = reshaped_data;
551556
+#endif
552557
+ } else {
@@ -671,10 +676,10 @@ index 00000000..aa53086d
671676
+}
672677
+
673678
+GGML_CALL void ggml_kai_free_extra_mem(void) {
674-
+ for(int32_t i = extra_mem_idx - 1; i >= 0; i--) {
675-
+ free(extra_mem[i]);
679+
+ for(int32_t i = g_extra_mem_idx - 1; i >= 0; i--) {
680+
+ free(g_extra_mem[i]);
676681
+ }
677-
+ extra_mem_idx = 0;
682+
+ g_extra_mem_idx = 0;
678683
+}
679684
+#endif // defined(__aarch64__) && (defined(__ANDROID__) || defined(__linux__))
680685
diff --git a/ggml-kleidiai.h b/ggml-kleidiai.h
@@ -840,5 +845,5 @@ index 05591aa4..735dde04 100644
840845
}
841846

842847
--
843-
2.34.1
848+
2.25.1
844849

kleidiai-examples/llama_cpp/README.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -101,12 +101,12 @@ cmake -DLLAMA_KLEIDIAI=ON -DCMAKE_TOOLCHAIN_FILE=${NDK_PATH}/build/cmake/android
101101

102102
make -j4
103103
```
104-
Build the llama.cpp project for Linux:
104+
Build the llama.cpp project for Linux®:
105105

106106
```bash
107107
mkdir build && cd build
108108

109-
cmake -DCMAKE_C_FLAGS=-march=armv8.2-a+dotprod+i8mm -DCMAKE_CXX_FLAGS=-march=armv8.2-a+dotprod+i8mm -DLLAMA_KLEIDIAI=ON
109+
cmake -DLLAMA_KLEIDIAI=ON -DCMAKE_C_FLAGS=-march=armv8.2-a+dotprod+i8mm -DCMAKE_CXX_FLAGS=-march=armv8.2-a+dotprod+i8mm ..
110110

111111
make -j4
112112
```

0 commit comments

Comments
 (0)