Skip to content

Commit 2321def

Browse files
Merge pull request #142 from chaxu01/feature/my-ML-examples
Update with latest KleidiAI release
2 parents 307bed2 + f9360f1 commit 2321def

File tree

1 file changed

+11
-11
lines changed

1 file changed

+11
-11
lines changed

kleidiai-examples/llama_cpp/0001-Use-KleidiAI-Int4-Matmul-micro-kernels-in-llama.cpp.patch

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
1-
From 25ba8dfa43e2b4b101b890c88464b638427d3d42 Mon Sep 17 00:00:00 2001
1+
From 8d4bc83e2144cbbe5e634a53ac07a2c6a709b9c0 Mon Sep 17 00:00:00 2001
22
From: Charles Xu <[email protected]>
3-
Date: Wed, 17 Jul 2024 13:28:18 +0200
3+
Date: Wed, 21 Aug 2024 07:31:51 +0200
44
Subject: [PATCH] Use KleidiAI Int4 Matmul micro-kernels in llama.cpp
55

66
- Update CMake file to fetch the Int4 micro-kernels from the KleidiAI
@@ -21,7 +21,7 @@ Signed-off-by: Charles Xu <[email protected]>
2121
create mode 100644 ggml-kleidiai.h
2222

2323
diff --git a/CMakeLists.txt b/CMakeLists.txt
24-
index 08481334..07f8f601 100644
24+
index 08481334..6aed4fc6 100644
2525
--- a/CMakeLists.txt
2626
+++ b/CMakeLists.txt
2727
@@ -548,6 +548,57 @@ if (LLAMA_VULKAN)
@@ -32,9 +32,9 @@ index 08481334..07f8f601 100644
3232
+
3333
+ # Fetch KleidiAI sources:
3434
+ include(FetchContent)
35-
+ set(KLEIDIAI_COMMIT_SHA "187d9aacddfb678c09f0831b18f87401b1b353c3")
35+
+ set(KLEIDIAI_COMMIT_SHA "cb27bbe4cd47bb15d8236df3250ff105ef64e65b")
3636
+ set(KLEIDIAI_DOWNLOAD_URL "https://gitlab.arm.com/kleidi/kleidiai/-/archive/${KLEIDIAI_COMMIT_SHA}/kleidiai-${KLEIDIAI_COMMIT_SHA}.tar.gz")
37-
+ set(KLEIDIAI_ARCHIVE_MD5 "4a1eee013cb20464b534cb01212d19c9")
37+
+ set(KLEIDIAI_ARCHIVE_MD5 "f4fa5d1070d9f0ab96f5c021d292dde3")
3838
+
3939
+ if (POLICY CMP0135)
4040
+ cmake_policy(SET CMP0135 NEW)
@@ -66,7 +66,7 @@ index 08481334..07f8f601 100644
6666
+ ${KLEIDIAI_SRC}/kai/ukernels/matmul/pack/)
6767
+
6868
+ list(APPEND GGML_SOURCES_KLEIDIAI ${KLEIDIAI_SRC}/kai/ukernels/matmul/pack/kai_lhs_quant_pack_qsi8d32p_f32.c)
69-
+ list(APPEND GGML_SOURCES_KLEIDIAI ${KLEIDIAI_SRC}/kai/ukernels/matmul/pack/kai_rhs_pack_nxk_qsi4c32f16scalep_qsu4c32s16s0.c)
69+
+ list(APPEND GGML_SOURCES_KLEIDIAI ${KLEIDIAI_SRC}/kai/ukernels/matmul/pack/kai_rhs_pack_nxk_qsi4c32pscalef16_qsu4c32s16s0.c)
7070
+ list(APPEND GGML_SOURCES_KLEIDIAI ${KLEIDIAI_SRC}/kai/ukernels/matmul/matmul_clamp_f32_qsi8d32p_qsi4c32p/kai_matmul_clamp_f32_qsi8d32p1x8_qsi4c32p4x8_1x4x32_neon_dotprod.c)
7171
+ list(APPEND GGML_SOURCES_KLEIDIAI ${KLEIDIAI_SRC}/kai/ukernels/matmul/matmul_clamp_f32_qsi8d32p_qsi4c32p/kai_matmul_clamp_f32_qsi8d32p4x8_qsi4c32p4x8_8x4x32_neon_i8mm.c)
7272
+
@@ -123,7 +123,7 @@ index bd367c42..ed4ce0ae 100644
123123
if (this_size > max_size) {
124124
diff --git a/ggml-kleidiai.cpp b/ggml-kleidiai.cpp
125125
new file mode 100644
126-
index 00000000..257a0d4c
126+
index 00000000..9129ea99
127127
--- /dev/null
128128
+++ b/ggml-kleidiai.cpp
129129
@@ -0,0 +1,675 @@
@@ -176,7 +176,7 @@ index 00000000..257a0d4c
176176
+// KleidiAI micro-kernels
177177
+#include "kai_matmul_clamp_f32_qsi8d32p_qsi4c32p_interface.h"
178178
+#include "kai_lhs_quant_pack_qsi8d32p_f32.h"
179-
+#include "kai_rhs_pack_nxk_qsi4c32f16scalep_qsu4c32s16s0.h"
179+
+#include "kai_rhs_pack_nxk_qsi4c32pscalef16_qsu4c32s16s0.h"
180180
+#include "kai_matmul_clamp_f32_qsi8d32p1x8_qsi4c32p4x8_1x4x32_neon_dotprod.h"
181181
+#include "kai_matmul_clamp_f32_qsi8d32p4x8_qsi4c32p4x8_8x4x32_neon_i8mm.h"
182182
+
@@ -473,7 +473,7 @@ index 00000000..257a0d4c
473473
+ v.nr = ukernel->get_nr();
474474
+ v.kr = ukernel->get_kr();
475475
+ v.sr = ukernel->get_sr();
476-
+ v.packed_size = kai_get_rhs_packed_size_rhs_pack_nxk_qsi4c32f16scalep_qsu4c32s16s0(n, k, v.nr, v.kr, k_q4_0_block_size /* 32 */);
476+
+ v.packed_size = kai_get_rhs_packed_size_rhs_pack_nxk_qsi4c32pscalef16_qsu4c32s16s0(n, k, v.nr, v.kr, k_q4_0_block_size /* 32 */);
477477
+
478478
+ return v;
479479
+}
@@ -638,11 +638,11 @@ index 00000000..257a0d4c
638638
+ // Temporary memory for the computation.
639639
+ uint8_t *reshaped_data = (uint8_t*)malloc(reshaped_data_sz);
640640
+
641-
+ struct kai_rhs_pack_nxk_qsi4c32f16scalep_qsu4c32s16s0_params params;
641+
+ struct kai_rhs_pack_nxk_qsi4c32pscalef16_qsu4c32s16s0_params params;
642642
+ params.lhs_zero_point = 1;
643643
+ params.rhs_zero_point = 8;
644644
+
645-
+ kai_run_rhs_pack_nxk_qsi4c32f16scalep_qsu4c32s16s0(
645+
+ kai_run_rhs_pack_nxk_qsi4c32pscalef16_qsu4c32s16s0(
646646
+ 1, n, k, // Dimensions
647647
+ rhs_packing_params.nr, // Nr
648648
+ rhs_packing_params.kr, // Kr

0 commit comments

Comments
 (0)