From ddb16245b7cb7c1f41ff328a408cd113a95917f5 Mon Sep 17 00:00:00 2001
From: Charlie Ruan <53290280+CharlieFRuan@users.noreply.github.com>
Date: Sun, 3 Nov 2024 22:19:34 -0800
Subject: [PATCH] [Model] Add SmolLM2, deprecate SmolLM1 (#623)

This PR adds the following SmolLM2 prebuilt models:
- SmolLM2-1.7B-Instruct-q4f16_1-MLC
- SmolLM2-1.7B-Instruct-q4f32_1-MLC
- SmolLM2-360M-Instruct-q0f16-MLC
- SmolLM2-360M-Instruct-q0f32-MLC
- SmolLM2-360M-Instruct-q4f16_1-MLC
- SmolLM2-360M-Instruct-q4f132_1-MLC
- SmolLM2-135M-Instruct-q0f16-MLC
- SmolLM2-135M-Instruct-q0f32-MLC

Also removes SmolLM 1 models
---
 src/config.ts | 136 +++++++++++++++-----------------------------------
 1 file changed, 41 insertions(+), 95 deletions(-)

diff --git a/src/config.ts b/src/config.ts
index 202289b2..c478eccc 100644
--- a/src/config.ts
+++ b/src/config.ts
@@ -717,168 +717,114 @@ export const prebuiltAppConfig: AppConfig = {
         sliding_window_size: -1,
       },
     },
-    // SmolLM
+    // SmolLM2
     {
-      model: "https://huggingface.co/mlc-ai/SmolLM-1.7B-Instruct-q0f16-MLC",
-      model_id: "SmolLM-1.7B-Instruct-q0f16-MLC",
+      model: "https://huggingface.co/mlc-ai/SmolLM2-1.7B-Instruct-q4f16_1-MLC",
+      model_id: "SmolLM2-1.7B-Instruct-q4f16_1-MLC",
       model_lib:
         modelLibURLPrefix +
         modelVersion +
-        "/SmolLM-1.7B-Instruct-q0f16-ctx2k_cs1k-webgpu.wasm",
-      vram_required_MB: 3736.19,
+        "/SmolLM2-1.7B-Instruct-q4f16_1-ctx4k_cs1k-webgpu.wasm",
+      vram_required_MB: 1774.19,
       low_resource_required: true,
       required_features: ["shader-f16"],
       overrides: {
-        context_window_size: 2048,
-      },
-    },
-    {
-      model: "https://huggingface.co/mlc-ai/SmolLM-1.7B-Instruct-q0f32-MLC",
-      model_id: "SmolLM-1.7B-Instruct-q0f32-MLC",
-      model_lib:
-        modelLibURLPrefix +
-        modelVersion +
-        "/SmolLM-1.7B-Instruct-q0f32-ctx2k_cs1k-webgpu.wasm",
-      vram_required_MB: 7432.38,
-      low_resource_required: false,
-      overrides: {
-        context_window_size: 2048,
-      },
-    },
-    {
-      model: "https://huggingface.co/mlc-ai/SmolLM-1.7B-Instruct-q4f16_1-MLC",
-      model_id: "SmolLM-1.7B-Instruct-q4f16_1-MLC",
-      model_lib:
-        modelLibURLPrefix +
-        modelVersion +
-        "/SmolLM-1.7B-Instruct-q4f16_1-ctx2k_cs1k-webgpu.wasm",
-      vram_required_MB: 1390.19,
-      low_resource_required: true,
-      required_features: ["shader-f16"],
-      overrides: {
-        context_window_size: 2048,
+        context_window_size: 4096,
       },
     },
     {
-      model: "https://huggingface.co/mlc-ai/SmolLM-1.7B-Instruct-q4f32_1-MLC",
-      model_id: "SmolLM-1.7B-Instruct-q4f32_1-MLC",
+      model: "https://huggingface.co/mlc-ai/SmolLM2-1.7B-Instruct-q4f32_1-MLC",
+      model_id: "SmolLM2-1.7B-Instruct-q4f32_1-MLC",
       model_lib:
         modelLibURLPrefix +
         modelVersion +
-        "/SmolLM-1.7B-Instruct-q4f32_1-ctx2k_cs1k-webgpu.wasm",
-      vram_required_MB: 1924.38,
+        "/SmolLM2-1.7B-Instruct-q4f32_1-ctx4k_cs1k-webgpu.wasm",
+      vram_required_MB: 2692.38,
       low_resource_required: true,
       overrides: {
-        context_window_size: 2048,
+        context_window_size: 4096,
       },
     },
 
     {
-      model: "https://huggingface.co/mlc-ai/SmolLM-360M-Instruct-q0f16-MLC",
-      model_id: "SmolLM-360M-Instruct-q0f16-MLC",
-      model_lib:
-        modelLibURLPrefix +
-        modelVersion +
-        "/SmolLM-360M-Instruct-q0f16-ctx2k_cs1k-webgpu.wasm",
-      vram_required_MB: 791.99,
-      low_resource_required: true,
-      required_features: ["shader-f16"],
-      overrides: {
-        context_window_size: 2048,
-      },
-    },
-    {
-      model: "https://huggingface.co/mlc-ai/SmolLM-360M-Instruct-q0f32-MLC",
-      model_id: "SmolLM-360M-Instruct-q0f32-MLC",
+      model: "https://huggingface.co/mlc-ai/SmolLM2-360M-Instruct-q0f16-MLC",
+      model_id: "SmolLM2-360M-Instruct-q0f16-MLC",
       model_lib:
         modelLibURLPrefix +
         modelVersion +
-        "/SmolLM-360M-Instruct-q0f32-ctx2k_cs1k-webgpu.wasm",
-      vram_required_MB: 1583.99,
-      low_resource_required: true,
-      overrides: {
-        context_window_size: 2048,
-      },
-    },
-    {
-      model: "https://huggingface.co/mlc-ai/SmolLM-360M-Instruct-q4f16_1-MLC",
-      model_id: "SmolLM-360M-Instruct-q4f16_1-MLC",
-      model_lib:
-        modelLibURLPrefix +
-        modelVersion +
-        "/SmolLM-360M-Instruct-q4f16_1-ctx2k_cs1k-webgpu.wasm",
-      vram_required_MB: 296.06,
+        "/SmolLM2-360M-Instruct-q0f16-ctx4k_cs1k-webgpu.wasm",
+      vram_required_MB: 871.99,
       low_resource_required: true,
       required_features: ["shader-f16"],
       overrides: {
-        context_window_size: 2048,
+        context_window_size: 4096,
       },
     },
     {
-      model: "https://huggingface.co/mlc-ai/SmolLM-360M-Instruct-q4f32_1-MLC",
-      model_id: "SmolLM-360M-Instruct-q4f32_1-MLC",
+      model: "https://huggingface.co/mlc-ai/SmolLM2-360M-Instruct-q0f32-MLC",
+      model_id: "SmolLM2-360M-Instruct-q0f32-MLC",
       model_lib:
         modelLibURLPrefix +
         modelVersion +
-        "/SmolLM-360M-Instruct-q4f32_1-ctx2k_cs1k-webgpu.wasm",
-      vram_required_MB: 419.61,
+        "/SmolLM2-360M-Instruct-q0f32-ctx4k_cs1k-webgpu.wasm",
+      vram_required_MB: 1743.99,
       low_resource_required: true,
       overrides: {
-        context_window_size: 2048,
+        context_window_size: 4096,
       },
     },
     {
-      model: "https://huggingface.co/mlc-ai/SmolLM-135M-Instruct-q0f16-MLC",
-      model_id: "SmolLM-135M-Instruct-q0f16-MLC",
+      model: "https://huggingface.co/mlc-ai/SmolLM2-360M-Instruct-q4f16_1-MLC",
+      model_id: "SmolLM2-360M-Instruct-q4f16_1-MLC",
       model_lib:
         modelLibURLPrefix +
         modelVersion +
-        "/SmolLM-135M-Instruct-q0f16-ctx2k_cs1k-webgpu.wasm",
-      vram_required_MB: 314.69,
+        "/SmolLM2-360M-Instruct-q4f16_1-ctx4k_cs1k-webgpu.wasm",
+      vram_required_MB: 376.06,
       low_resource_required: true,
       required_features: ["shader-f16"],
       overrides: {
-        context_window_size: 2048,
+        context_window_size: 4096,
       },
     },
     {
-      model: "https://huggingface.co/mlc-ai/SmolLM-135M-Instruct-q0f32-MLC",
-      model_id: "SmolLM-135M-Instruct-q0f32-MLC",
+      model: "https://huggingface.co/mlc-ai/SmolLM2-360M-Instruct-q4f32_1-MLC",
+      model_id: "SmolLM2-360M-Instruct-q4f32_1-MLC",
       model_lib:
         modelLibURLPrefix +
         modelVersion +
-        "/SmolLM-135M-Instruct-q0f32-ctx2k_cs1k-webgpu.wasm",
-      vram_required_MB: 629.38,
+        "/SmolLM2-360M-Instruct-q4f32_1-ctx4k_cs1k-webgpu.wasm",
+      vram_required_MB: 579.61,
       low_resource_required: true,
       overrides: {
-        context_window_size: 2048,
+        context_window_size: 4096,
       },
     },
     {
-      model: "https://huggingface.co/mlc-ai/SmolLM-135M-Instruct-q4f16_1-MLC",
-      model_id: "SmolLM-135M-Instruct-q4f16_1-MLC",
+      model: "https://huggingface.co/mlc-ai/SmolLM2-135M-Instruct-q0f16-MLC",
+      model_id: "SmolLM2-135M-Instruct-q0f16-MLC",
       model_lib:
         modelLibURLPrefix +
         modelVersion +
-        "/SmolLM-135M-Instruct-q4f16_1-ctx2k_cs1k-webgpu.wasm",
-      vram_required_MB: 130.33,
+        "/SmolLM2-135M-Instruct-q0f16-ctx4k_cs1k-webgpu.wasm",
+      vram_required_MB: 359.69,
       low_resource_required: true,
       required_features: ["shader-f16"],
       overrides: {
-        context_window_size: 2048,
+        context_window_size: 4096,
       },
     },
     {
-      model: "https://huggingface.co/mlc-ai/SmolLM-135M-Instruct-q4f32_1-MLC",
-      model_id: "SmolLM-135M-Instruct-q4f32_1-MLC",
+      model: "https://huggingface.co/mlc-ai/SmolLM2-135M-Instruct-q0f32-MLC",
+      model_id: "SmolLM2-135M-Instruct-q0f32-MLC",
       model_lib:
         modelLibURLPrefix +
         modelVersion +
-        "/SmolLM-135M-Instruct-q4f32_1-ctx2k_cs1k-webgpu.wasm",
-      vram_required_MB: 196.54,
+        "/SmolLM2-135M-Instruct-q0f32-ctx4k_cs1k-webgpu.wasm",
+      vram_required_MB: 719.38,
       low_resource_required: true,
       overrides: {
-        context_window_size: 2048,
+        context_window_size: 4096,
       },
     },
     // Gemma2