vllm-project · RobMulla · Dec 3, 2025 · Dec 5, 2025 · Dec 5, 2025
@@ -24,3 +24,26 @@ These tables show the models currently tested for accuracy and performance.
 This table shows the features currently tested for accuracy and performance.
 
 {{ read_csv('../support_matrices/feature_support_matrix.csv', keep_default_na=False) }}
+
+## Kernel Support
+
+This table shows the current kernel support status.
+
+{{ read_csv('../support_matrices/kernel_support_matrix.csv', keep_default_na=False) }}
+
+## Parallelism Support
+
+This table shows the current parallelism support status.
+
+{{ read_csv('../support_matrices/parallelism_support_matrix.csv', keep_default_na=False) }}
+
+## Quantization Support
+
+This table shows the current quantization support status.
+
+{{ read_csv('../support_matrices/quantization_support_matrix.csv', keep_default_na=False) }}
+
+!!! info "Legend"
+    *   ✅ Supported
+    *   🚧 Coming Soon
+    *   ❌ Not Supported
@@ -1,11 +1,16 @@
 Feature,CorrectnessTest,PerformanceTest
-"Collective Communication Matmul",✅,N/A
-"Prefix Caching",✅,✅
-"Multimodal Inputs",✅,✅
-"Quantized Matmul Attention and KV Cache",✅,✅
 "Chunked Prefill",✅,✅
-"JAX-Path Qxix Quantization",✅,✅
+"DCN-based P/D disaggregation",🚧,🚧
+"KV cache host offloading",🚧,🚧
+"Llama 4 Maverick",🚧,🚧
+"LoRA_Torch",✅,🚧
+"Multimodal Inputs",✅,✅
+"Out-of-tree model support",✅,✅
+"Prefix Caching",✅,✅
 "Single Program Multi Data",✅,✅
+"Speculative Decoding: Eagle3",✅,✅
 "Speculative Decoding: Ngram",✅,✅
-"Structured Decoding",✅,N/A
-"Ragged Paged Attention V3",✅,✅
+"async scheduler",✅,✅
+"runai_model_streamer_loader",✅,N/A
+"sampling_params",✅,N/A
+"structured_decoding",✅,N/A
@@ -0,0 +1,8 @@
+Feature,CorrectnessTest,PerformanceTest
+"Collective Communication Matmul",✅,🚧
+"MLA",🚧,🚧
+"MoE",🚧,🚧
+"Quantized Attention",🚧,🚧
+"Quantized KV Cache",🚧,🚧
+"Quantized Matmul",🚧,🚧
+"Ragged Paged Attention V3",✅,✅
@@ -1,8 +1,8 @@
 Feature,CorrectnessTest,PerformanceTest
 "Chunked Prefill",✅,✅
-"DCN-based P/D disaggregation",to be added,to be added
-"KV cache host offloading",to be added,to be added
-"Llama 4 Maverick",to be added,to be added
+"DCN-based P/D disaggregation",🚧,🚧
+"KV cache host offloading",🚧,🚧
+"Llama 4 Maverick",🚧,🚧
 "LoRA_Torch",❌,N/A
 "Multimodal Inputs",✅,❌
 "Out-of-tree model support",✅,❌

@@ -1,8 +1,8 @@
 Feature,CorrectnessTest,PerformanceTest
-"Collective Communication Matmul",✅,to be added
-"MLA",to be added,to be added
-"MoE",to be added,to be added
-"Quantized Attention",to be added,to be added
-"Quantized KV Cache",to be added,to be added
-"Quantized Matmul",to be added,to be added
+"Collective Communication Matmul",✅,🚧
+"MLA",🚧,🚧
+"MoE",🚧,🚧
+"Quantized Attention",🚧,🚧
+"Quantized KV Cache",🚧,🚧
+"Quantized Matmul",🚧,🚧
 "Ragged Paged Attention V3",✅,✅
@@ -1,7 +1,7 @@
 Feature,CorrectnessTest,PerformanceTest
-"CP",to be added,to be added
-"DP",❌,N/A
-"EP",to be added,to be added
+"CP",🚧,🚧
+"DP",❌,🚧
+"EP",🚧,🚧
 "PP",✅,✅
-"SP",to be added,to be added
-"TP",to be added,to be added
+"SP",🚧,🚧
+"TP",🚧,🚧
@@ -1,7 +1,7 @@
 Feature,Recommended TPU Generations,CorrectnessTest,PerformanceTest
-"AWQ INT4","v5, v6",to be added,to be added
-"FP4 W4A16",v7,to be added,to be added
-"FP8 W8A8",v7,to be added,to be added
-"FP8 W8A16",v7,to be added,to be added
-"INT4 W4A16","v5, v6",to be added,to be added
-"INT8 W8A8","v5, v6",to be added,to be added
+"AWQ INT4","v5, v6",🚧,🚧
+"FP4 W4A16",v7,🚧,🚧
+"FP8 W8A8",v7,🚧,🚧
+"FP8 W8A16",v7,🚧,🚧
+"INT4 W4A16","v5, v6",🚧,🚧
+"INT8 W8A8","v5, v6",🚧,🚧
@@ -0,0 +1,7 @@
+Feature,CorrectnessTest,PerformanceTest
+"CP",🚧,🚧
+"DP",❌,N/A
+"EP",🚧,🚧
+"PP",✅,✅
+"SP",🚧,🚧
+"TP",🚧,🚧
@@ -0,0 +1,7 @@
+Feature,Recommended TPU Generations,CorrectnessTest,PerformanceTest
+"AWQ INT4","v5, v6",🚧,🚧
+"FP4 W4A16",v7,🚧,🚧
+"FP8 W8A8",v7,🚧,🚧
+"FP8 W8A16",v7,🚧,🚧
+"INT4 W4A16","v5, v6",🚧,🚧
+"INT8 W8A8","v5, v6",🚧,🚧
@@ -1,7 +1,8 @@
 Model,UnitTest,IntegrationTest,Benchmark
 "meta-llama/Llama-3.3-70B-Instruct",✅,✅,✅
-"Qwen/Qwen3-32B",✅,✅,✅
+"Qwen/Qwen3-4B",✅,✅,✅
 "google/gemma-3-27b-it",✅,✅,✅
+"Qwen/Qwen3-32B",✅,✅,✅
+"meta-llama/Llama-Guard-4-12B",✅,✅,✅
 "meta-llama/Llama-3.1-8B-Instruct",✅,✅,✅
 "Qwen/Qwen3-30B-A3B",✅,✅,✅
-"Qwen/Qwen3-4B",✅,✅,✅