artalis-io
diff --git a/‎Makefile‎
Lines changed: 30 additions & 3 deletions b/‎Makefile‎
Lines changed: 30 additions & 3 deletions
diff --git a/‎README.md‎
Lines changed: 74 additions & 154 deletions b/‎README.md‎
Lines changed: 74 additions & 154 deletions
diff --git a/‎docs/roadmap.md‎
Lines changed: 10 additions & 3 deletions b/‎docs/roadmap.md‎
Lines changed: 10 additions & 3 deletions
diff --git a/‎include/gguf.h‎
Lines changed: 22 additions & 13 deletions b/‎include/gguf.h‎
Lines changed: 22 additions & 13 deletions
@@ -28,12 +28,21 @@ ifneq ($(filter arm% aarch%,$(UNAME_M)),)
     src/quant/tq1_neon_sdot.c src/quant/tq1_neon.c src/quant/tq1_scalar.c \
     src/quant/q8_neon.c src/quant/q8_scalar.c \
     src/quant/q4_neon_sdot.c src/quant/q4_neon.c src/quant/q4_scalar.c \
+    src/quant/q4_1_neon.c src/quant/q4_1_scalar.c \
+    src/quant/bf16_neon.c src/quant/bf16_scalar.c \
     src/quant/q6k_neon.c src/quant/q6k_scalar.c \
     src/quant/q8k_neon.c src/quant/q8k_scalar.c \
     src/quant/q4k_neon.c src/quant/q4k_scalar.c \
     src/quant/q5k_neon.c src/quant/q5k_scalar.c \
     src/quant/q3k_neon.c src/quant/q3k_scalar.c \
-    src/quant/q2k_neon.c src/quant/q2k_scalar.c
+    src/quant/q2k_neon.c src/quant/q2k_scalar.c \
+    src/quant/iq4nl_neon.c src/quant/iq4nl_scalar.c \
+    src/quant/iq4xs_neon.c src/quant/iq4xs_scalar.c \
+    src/quant/iq3xxs_neon.c src/quant/iq3xxs_scalar.c \
+    src/quant/iq3s_neon.c src/quant/iq3s_scalar.c \
+    src/quant/iq2xxs_neon.c src/quant/iq2xxs_scalar.c \
+    src/quant/iq2xs_neon.c src/quant/iq2xs_scalar.c \
+    src/quant/iq2s_neon.c src/quant/iq2s_scalar.c
 
   TRANSFORMER_BACKEND = src/transformer/rmsnorm_neon.c src/transformer/rmsnorm_scalar.c \
     src/transformer/gqa_neon.c src/transformer/gqa_scalar.c \
@@ -45,12 +54,21 @@ else
     src/quant/tq2_scalar.c src/quant/tq1_scalar.c \
     src/quant/q8_avx2.c src/quant/q8_scalar.c \
     src/quant/q4_avx2.c src/quant/q4_scalar.c \
+    src/quant/q4_1_avx2.c src/quant/q4_1_scalar.c \
+    src/quant/bf16_avx2.c src/quant/bf16_scalar.c \
     src/quant/q6k_avx2.c src/quant/q6k_scalar.c \
     src/quant/q8k_avx2.c src/quant/q8k_scalar.c \
     src/quant/q4k_avx2.c src/quant/q4k_scalar.c \
     src/quant/q5k_avx2.c src/quant/q5k_scalar.c \
     src/quant/q3k_avx2.c src/quant/q3k_scalar.c \
-    src/quant/q2k_avx2.c src/quant/q2k_scalar.c
+    src/quant/q2k_avx2.c src/quant/q2k_scalar.c \
+    src/quant/iq4nl_avx2.c src/quant/iq4nl_scalar.c \
+    src/quant/iq4xs_avx2.c src/quant/iq4xs_scalar.c \
+    src/quant/iq3xxs_avx2.c src/quant/iq3xxs_scalar.c \
+    src/quant/iq3s_avx2.c src/quant/iq3s_scalar.c \
+    src/quant/iq2xxs_avx2.c src/quant/iq2xxs_scalar.c \
+    src/quant/iq2xs_avx2.c src/quant/iq2xs_scalar.c \
+    src/quant/iq2s_avx2.c src/quant/iq2s_scalar.c
 
   TRANSFORMER_BACKEND = src/transformer/rmsnorm_avx2.c src/transformer/rmsnorm_scalar.c \
     src/transformer/gqa_avx2.c src/transformer/gqa_scalar.c \
@@ -158,12 +176,21 @@ AVX2_QUANT_SRCS = $(QUANT_COMMON) \
     src/quant/tq2_scalar.c src/quant/tq1_scalar.c \
     src/quant/q8_avx2.c src/quant/q8_scalar.c \
     src/quant/q4_avx2.c src/quant/q4_scalar.c \
+    src/quant/q4_1_avx2.c src/quant/q4_1_scalar.c \
+    src/quant/bf16_avx2.c src/quant/bf16_scalar.c \
     src/quant/q6k_avx2.c src/quant/q6k_scalar.c \
     src/quant/q8k_avx2.c src/quant/q8k_scalar.c \
     src/quant/q4k_avx2.c src/quant/q4k_scalar.c \
     src/quant/q5k_avx2.c src/quant/q5k_scalar.c \
     src/quant/q3k_avx2.c src/quant/q3k_scalar.c \
-    src/quant/q2k_avx2.c src/quant/q2k_scalar.c
+    src/quant/q2k_avx2.c src/quant/q2k_scalar.c \
+    src/quant/iq4nl_avx2.c src/quant/iq4nl_scalar.c \
+    src/quant/iq4xs_avx2.c src/quant/iq4xs_scalar.c \
+    src/quant/iq3xxs_avx2.c src/quant/iq3xxs_scalar.c \
+    src/quant/iq3s_avx2.c src/quant/iq3s_scalar.c \
+    src/quant/iq2xxs_avx2.c src/quant/iq2xxs_scalar.c \
+    src/quant/iq2xs_avx2.c src/quant/iq2xs_scalar.c \
+    src/quant/iq2s_avx2.c src/quant/iq2s_scalar.c
 
 AVX2_TRANSFORMER_BACKEND = src/transformer/rmsnorm_avx2.c src/transformer/rmsnorm_scalar.c \
     src/transformer/gqa_avx2.c src/transformer/gqa_scalar.c \
 
@@ -73,11 +73,18 @@ Development roadmap for bitnet.c.
 - [x] Q3_K (3-bit k-quant) with NEON/AVX2/WASM kernels
 - [x] Non-tied output weights (separate output projection matrix)
 
-## Phase 8: Extended Format Coverage
+## Phase 8: Extended Format Coverage — Done
 
 - [x] Q2_K (2-bit k-quant) with NEON/AVX2/WASM kernels
-- [ ] IQ2_XXS, IQ2_XS, IQ2_S (codebook-based ~2 bpw, designed)
-- [ ] F16/BF16 weight types (unquantized dense matvec, designed)
+- [x] Q4_1 (4-bit with min) with NEON/AVX2/WASM kernels
+- [x] BF16 weight type with NEON/AVX2/WASM kernels
+- [x] IQ4_NL (4-bit non-linear codebook) with NEON/AVX2/WASM kernels
+- [x] IQ4_XS (4-bit non-linear with sub-block scales) with NEON/AVX2/WASM kernels
+- [x] IQ3_XXS (3-bit codebook) with NEON/AVX2/WASM kernels
+- [x] IQ3_S (3-bit codebook with separate signs) with NEON/AVX2/WASM kernels
+- [x] IQ2_XXS (2-bit codebook) with NEON/AVX2/WASM kernels
+- [x] IQ2_XS (2-bit codebook with scales) with NEON/AVX2/WASM kernels
+- [x] IQ2_S (2-bit codebook, 1024-entry grid) with NEON/AVX2/WASM kernels
 
 ## Performance Analysis (M1 Max, bitnet-b1.58-2B-4T)
 
 
@@ -29,19 +29,28 @@ enum {
 
 // GGUF tensor types we care about
 enum {
-    BN_GGUF_TENSOR_F32   = 0,
-    BN_GGUF_TENSOR_F16   = 1,
-    BN_GGUF_TENSOR_Q4_0  = 2,
-    BN_GGUF_TENSOR_Q8_0  = 8,
-    BN_GGUF_TENSOR_Q2_K  = 10,
-    BN_GGUF_TENSOR_Q3_K  = 11,
-    BN_GGUF_TENSOR_Q4_K  = 12,
-    BN_GGUF_TENSOR_Q5_K  = 13,
-    BN_GGUF_TENSOR_Q6_K  = 14,
-    BN_GGUF_TENSOR_Q8_K  = 15,
-    BN_GGUF_TENSOR_TQ1_0 = 34,
-    BN_GGUF_TENSOR_TQ2_0 = 35,
-    BN_GGUF_TENSOR_I2_S  = 36,
+    BN_GGUF_TENSOR_F32      = 0,
+    BN_GGUF_TENSOR_F16      = 1,
+    BN_GGUF_TENSOR_Q4_0     = 2,
+    BN_GGUF_TENSOR_Q4_1     = 3,
+    BN_GGUF_TENSOR_Q8_0     = 8,
+    BN_GGUF_TENSOR_Q2_K     = 10,
+    BN_GGUF_TENSOR_Q3_K     = 11,
+    BN_GGUF_TENSOR_Q4_K     = 12,
+    BN_GGUF_TENSOR_Q5_K     = 13,
+    BN_GGUF_TENSOR_Q6_K     = 14,
+    BN_GGUF_TENSOR_Q8_K     = 15,
+    BN_GGUF_TENSOR_IQ2_XXS  = 16,
+    BN_GGUF_TENSOR_IQ2_XS   = 17,
+    BN_GGUF_TENSOR_IQ3_XXS  = 18,
+    BN_GGUF_TENSOR_IQ4_NL   = 20,
+    BN_GGUF_TENSOR_IQ3_S    = 21,
+    BN_GGUF_TENSOR_IQ2_S    = 22,
+    BN_GGUF_TENSOR_IQ4_XS   = 23,
+    BN_GGUF_TENSOR_BF16     = 30,
+    BN_GGUF_TENSOR_TQ1_0    = 34,
+    BN_GGUF_TENSOR_TQ2_0    = 35,
+    BN_GGUF_TENSOR_I2_S     = 36,
 };
 
 typedef struct {