1
- From 25ba8dfa43e2b4b101b890c88464b638427d3d42 Mon Sep 17 00:00:00 2001
1
+ From 8d4bc83e2144cbbe5e634a53ac07a2c6a709b9c0 Mon Sep 17 00:00:00 2001
2
2
From: Charles Xu <
[email protected] >
3
- Date: Wed, 17 Jul 2024 13:28:18 +0200
3
+ Date: Wed, 21 Aug 2024 07:31:51 +0200
4
4
Subject: [PATCH] Use KleidiAI Int4 Matmul micro-kernels in llama.cpp
5
5
6
6
- Update CMake file to fetch the Int4 micro-kernels from the KleidiAI
21
21
create mode 100644 ggml-kleidiai.h
22
22
23
23
diff --git a/CMakeLists.txt b/CMakeLists.txt
24
- index 08481334..07f8f601 100644
24
+ index 08481334..6aed4fc6 100644
25
25
--- a/CMakeLists.txt
26
26
+++ b/CMakeLists.txt
27
27
@@ -548,6 +548,57 @@ if (LLAMA_VULKAN)
@@ -32,9 +32,9 @@ index 08481334..07f8f601 100644
32
32
+
33
33
+ # Fetch KleidiAI sources:
34
34
+ include(FetchContent)
35
- + set(KLEIDIAI_COMMIT_SHA "187d9aacddfb678c09f0831b18f87401b1b353c3 ")
35
+ + set(KLEIDIAI_COMMIT_SHA "cb27bbe4cd47bb15d8236df3250ff105ef64e65b ")
36
36
+ set(KLEIDIAI_DOWNLOAD_URL "https://gitlab.arm.com/kleidi/kleidiai/-/archive/${KLEIDIAI_COMMIT_SHA}/kleidiai-${KLEIDIAI_COMMIT_SHA}.tar.gz")
37
- + set(KLEIDIAI_ARCHIVE_MD5 "4a1eee013cb20464b534cb01212d19c9 ")
37
+ + set(KLEIDIAI_ARCHIVE_MD5 "f4fa5d1070d9f0ab96f5c021d292dde3 ")
38
38
+
39
39
+ if (POLICY CMP0135)
40
40
+ cmake_policy(SET CMP0135 NEW)
@@ -66,7 +66,7 @@ index 08481334..07f8f601 100644
66
66
+ ${KLEIDIAI_SRC}/kai/ukernels/matmul/pack/)
67
67
+
68
68
+ list(APPEND GGML_SOURCES_KLEIDIAI ${KLEIDIAI_SRC}/kai/ukernels/matmul/pack/kai_lhs_quant_pack_qsi8d32p_f32.c)
69
- + list(APPEND GGML_SOURCES_KLEIDIAI ${KLEIDIAI_SRC}/kai/ukernels/matmul/pack/kai_rhs_pack_nxk_qsi4c32f16scalep_qsu4c32s16s0 .c)
69
+ + list(APPEND GGML_SOURCES_KLEIDIAI ${KLEIDIAI_SRC}/kai/ukernels/matmul/pack/kai_rhs_pack_nxk_qsi4c32pscalef16_qsu4c32s16s0 .c)
70
70
+ list(APPEND GGML_SOURCES_KLEIDIAI ${KLEIDIAI_SRC}/kai/ukernels/matmul/matmul_clamp_f32_qsi8d32p_qsi4c32p/kai_matmul_clamp_f32_qsi8d32p1x8_qsi4c32p4x8_1x4x32_neon_dotprod.c)
71
71
+ list(APPEND GGML_SOURCES_KLEIDIAI ${KLEIDIAI_SRC}/kai/ukernels/matmul/matmul_clamp_f32_qsi8d32p_qsi4c32p/kai_matmul_clamp_f32_qsi8d32p4x8_qsi4c32p4x8_8x4x32_neon_i8mm.c)
72
72
+
@@ -123,7 +123,7 @@ index bd367c42..ed4ce0ae 100644
123
123
if (this_size > max_size) {
124
124
diff --git a/ggml-kleidiai.cpp b/ggml-kleidiai.cpp
125
125
new file mode 100644
126
- index 00000000..257a0d4c
126
+ index 00000000..9129ea99
127
127
--- /dev/null
128
128
+++ b/ggml-kleidiai.cpp
129
129
@@ -0,0 +1,675 @@
@@ -176,7 +176,7 @@ index 00000000..257a0d4c
176
176
+ // KleidiAI micro-kernels
177
177
+ #include "kai_matmul_clamp_f32_qsi8d32p_qsi4c32p_interface.h"
178
178
+ #include "kai_lhs_quant_pack_qsi8d32p_f32.h"
179
- + #include "kai_rhs_pack_nxk_qsi4c32f16scalep_qsu4c32s16s0 .h"
179
+ + #include "kai_rhs_pack_nxk_qsi4c32pscalef16_qsu4c32s16s0 .h"
180
180
+ #include "kai_matmul_clamp_f32_qsi8d32p1x8_qsi4c32p4x8_1x4x32_neon_dotprod.h"
181
181
+ #include "kai_matmul_clamp_f32_qsi8d32p4x8_qsi4c32p4x8_8x4x32_neon_i8mm.h"
182
182
+
@@ -473,7 +473,7 @@ index 00000000..257a0d4c
473
473
+ v.nr = ukernel->get_nr();
474
474
+ v.kr = ukernel->get_kr();
475
475
+ v.sr = ukernel->get_sr();
476
- + v.packed_size = kai_get_rhs_packed_size_rhs_pack_nxk_qsi4c32f16scalep_qsu4c32s16s0 (n, k, v.nr, v.kr, k_q4_0_block_size /* 32 */);
476
+ + v.packed_size = kai_get_rhs_packed_size_rhs_pack_nxk_qsi4c32pscalef16_qsu4c32s16s0 (n, k, v.nr, v.kr, k_q4_0_block_size /* 32 */);
477
477
+
478
478
+ return v;
479
479
+ }
@@ -638,11 +638,11 @@ index 00000000..257a0d4c
638
638
+ // Temporary memory for the computation.
639
639
+ uint8_t *reshaped_data = (uint8_t*)malloc(reshaped_data_sz);
640
640
+
641
- + struct kai_rhs_pack_nxk_qsi4c32f16scalep_qsu4c32s16s0_params params;
641
+ + struct kai_rhs_pack_nxk_qsi4c32pscalef16_qsu4c32s16s0_params params;
642
642
+ params.lhs_zero_point = 1;
643
643
+ params.rhs_zero_point = 8;
644
644
+
645
- + kai_run_rhs_pack_nxk_qsi4c32f16scalep_qsu4c32s16s0 (
645
+ + kai_run_rhs_pack_nxk_qsi4c32pscalef16_qsu4c32s16s0 (
646
646
+ 1, n, k, // Dimensions
647
647
+ rhs_packing_params.nr, // Nr
648
648
+ rhs_packing_params.kr, // Kr
0 commit comments