From ddb16245b7cb7c1f41ff328a408cd113a95917f5 Mon Sep 17 00:00:00 2001 From: Charlie Ruan <53290280+CharlieFRuan@users.noreply.github.com> Date: Sun, 3 Nov 2024 22:19:34 -0800 Subject: [PATCH] [Model] Add SmolLM2, deprecate SmolLM1 (#623) This PR adds the following SmolLM2 prebuilt models: - SmolLM2-1.7B-Instruct-q4f16_1-MLC - SmolLM2-1.7B-Instruct-q4f32_1-MLC - SmolLM2-360M-Instruct-q0f16-MLC - SmolLM2-360M-Instruct-q0f32-MLC - SmolLM2-360M-Instruct-q4f16_1-MLC - SmolLM2-360M-Instruct-q4f132_1-MLC - SmolLM2-135M-Instruct-q0f16-MLC - SmolLM2-135M-Instruct-q0f32-MLC Also removes SmolLM 1 models --- src/config.ts | 136 +++++++++++++++----------------------------------- 1 file changed, 41 insertions(+), 95 deletions(-) diff --git a/src/config.ts b/src/config.ts index 202289b2..c478eccc 100644 --- a/src/config.ts +++ b/src/config.ts @@ -717,168 +717,114 @@ export const prebuiltAppConfig: AppConfig = { sliding_window_size: -1, }, }, - // SmolLM + // SmolLM2 { - model: "https://huggingface.co/mlc-ai/SmolLM-1.7B-Instruct-q0f16-MLC", - model_id: "SmolLM-1.7B-Instruct-q0f16-MLC", + model: "https://huggingface.co/mlc-ai/SmolLM2-1.7B-Instruct-q4f16_1-MLC", + model_id: "SmolLM2-1.7B-Instruct-q4f16_1-MLC", model_lib: modelLibURLPrefix + modelVersion + - "/SmolLM-1.7B-Instruct-q0f16-ctx2k_cs1k-webgpu.wasm", - vram_required_MB: 3736.19, + "/SmolLM2-1.7B-Instruct-q4f16_1-ctx4k_cs1k-webgpu.wasm", + vram_required_MB: 1774.19, low_resource_required: true, required_features: ["shader-f16"], overrides: { - context_window_size: 2048, - }, - }, - { - model: "https://huggingface.co/mlc-ai/SmolLM-1.7B-Instruct-q0f32-MLC", - model_id: "SmolLM-1.7B-Instruct-q0f32-MLC", - model_lib: - modelLibURLPrefix + - modelVersion + - "/SmolLM-1.7B-Instruct-q0f32-ctx2k_cs1k-webgpu.wasm", - vram_required_MB: 7432.38, - low_resource_required: false, - overrides: { - context_window_size: 2048, - }, - }, - { - model: "https://huggingface.co/mlc-ai/SmolLM-1.7B-Instruct-q4f16_1-MLC", - model_id: "SmolLM-1.7B-Instruct-q4f16_1-MLC", - model_lib: - modelLibURLPrefix + - modelVersion + - "/SmolLM-1.7B-Instruct-q4f16_1-ctx2k_cs1k-webgpu.wasm", - vram_required_MB: 1390.19, - low_resource_required: true, - required_features: ["shader-f16"], - overrides: { - context_window_size: 2048, + context_window_size: 4096, }, }, { - model: "https://huggingface.co/mlc-ai/SmolLM-1.7B-Instruct-q4f32_1-MLC", - model_id: "SmolLM-1.7B-Instruct-q4f32_1-MLC", + model: "https://huggingface.co/mlc-ai/SmolLM2-1.7B-Instruct-q4f32_1-MLC", + model_id: "SmolLM2-1.7B-Instruct-q4f32_1-MLC", model_lib: modelLibURLPrefix + modelVersion + - "/SmolLM-1.7B-Instruct-q4f32_1-ctx2k_cs1k-webgpu.wasm", - vram_required_MB: 1924.38, + "/SmolLM2-1.7B-Instruct-q4f32_1-ctx4k_cs1k-webgpu.wasm", + vram_required_MB: 2692.38, low_resource_required: true, overrides: { - context_window_size: 2048, + context_window_size: 4096, }, }, { - model: "https://huggingface.co/mlc-ai/SmolLM-360M-Instruct-q0f16-MLC", - model_id: "SmolLM-360M-Instruct-q0f16-MLC", - model_lib: - modelLibURLPrefix + - modelVersion + - "/SmolLM-360M-Instruct-q0f16-ctx2k_cs1k-webgpu.wasm", - vram_required_MB: 791.99, - low_resource_required: true, - required_features: ["shader-f16"], - overrides: { - context_window_size: 2048, - }, - }, - { - model: "https://huggingface.co/mlc-ai/SmolLM-360M-Instruct-q0f32-MLC", - model_id: "SmolLM-360M-Instruct-q0f32-MLC", + model: "https://huggingface.co/mlc-ai/SmolLM2-360M-Instruct-q0f16-MLC", + model_id: "SmolLM2-360M-Instruct-q0f16-MLC", model_lib: modelLibURLPrefix + modelVersion + - "/SmolLM-360M-Instruct-q0f32-ctx2k_cs1k-webgpu.wasm", - vram_required_MB: 1583.99, - low_resource_required: true, - overrides: { - context_window_size: 2048, - }, - }, - { - model: "https://huggingface.co/mlc-ai/SmolLM-360M-Instruct-q4f16_1-MLC", - model_id: "SmolLM-360M-Instruct-q4f16_1-MLC", - model_lib: - modelLibURLPrefix + - modelVersion + - "/SmolLM-360M-Instruct-q4f16_1-ctx2k_cs1k-webgpu.wasm", - vram_required_MB: 296.06, + "/SmolLM2-360M-Instruct-q0f16-ctx4k_cs1k-webgpu.wasm", + vram_required_MB: 871.99, low_resource_required: true, required_features: ["shader-f16"], overrides: { - context_window_size: 2048, + context_window_size: 4096, }, }, { - model: "https://huggingface.co/mlc-ai/SmolLM-360M-Instruct-q4f32_1-MLC", - model_id: "SmolLM-360M-Instruct-q4f32_1-MLC", + model: "https://huggingface.co/mlc-ai/SmolLM2-360M-Instruct-q0f32-MLC", + model_id: "SmolLM2-360M-Instruct-q0f32-MLC", model_lib: modelLibURLPrefix + modelVersion + - "/SmolLM-360M-Instruct-q4f32_1-ctx2k_cs1k-webgpu.wasm", - vram_required_MB: 419.61, + "/SmolLM2-360M-Instruct-q0f32-ctx4k_cs1k-webgpu.wasm", + vram_required_MB: 1743.99, low_resource_required: true, overrides: { - context_window_size: 2048, + context_window_size: 4096, }, }, { - model: "https://huggingface.co/mlc-ai/SmolLM-135M-Instruct-q0f16-MLC", - model_id: "SmolLM-135M-Instruct-q0f16-MLC", + model: "https://huggingface.co/mlc-ai/SmolLM2-360M-Instruct-q4f16_1-MLC", + model_id: "SmolLM2-360M-Instruct-q4f16_1-MLC", model_lib: modelLibURLPrefix + modelVersion + - "/SmolLM-135M-Instruct-q0f16-ctx2k_cs1k-webgpu.wasm", - vram_required_MB: 314.69, + "/SmolLM2-360M-Instruct-q4f16_1-ctx4k_cs1k-webgpu.wasm", + vram_required_MB: 376.06, low_resource_required: true, required_features: ["shader-f16"], overrides: { - context_window_size: 2048, + context_window_size: 4096, }, }, { - model: "https://huggingface.co/mlc-ai/SmolLM-135M-Instruct-q0f32-MLC", - model_id: "SmolLM-135M-Instruct-q0f32-MLC", + model: "https://huggingface.co/mlc-ai/SmolLM2-360M-Instruct-q4f32_1-MLC", + model_id: "SmolLM2-360M-Instruct-q4f32_1-MLC", model_lib: modelLibURLPrefix + modelVersion + - "/SmolLM-135M-Instruct-q0f32-ctx2k_cs1k-webgpu.wasm", - vram_required_MB: 629.38, + "/SmolLM2-360M-Instruct-q4f32_1-ctx4k_cs1k-webgpu.wasm", + vram_required_MB: 579.61, low_resource_required: true, overrides: { - context_window_size: 2048, + context_window_size: 4096, }, }, { - model: "https://huggingface.co/mlc-ai/SmolLM-135M-Instruct-q4f16_1-MLC", - model_id: "SmolLM-135M-Instruct-q4f16_1-MLC", + model: "https://huggingface.co/mlc-ai/SmolLM2-135M-Instruct-q0f16-MLC", + model_id: "SmolLM2-135M-Instruct-q0f16-MLC", model_lib: modelLibURLPrefix + modelVersion + - "/SmolLM-135M-Instruct-q4f16_1-ctx2k_cs1k-webgpu.wasm", - vram_required_MB: 130.33, + "/SmolLM2-135M-Instruct-q0f16-ctx4k_cs1k-webgpu.wasm", + vram_required_MB: 359.69, low_resource_required: true, required_features: ["shader-f16"], overrides: { - context_window_size: 2048, + context_window_size: 4096, }, }, { - model: "https://huggingface.co/mlc-ai/SmolLM-135M-Instruct-q4f32_1-MLC", - model_id: "SmolLM-135M-Instruct-q4f32_1-MLC", + model: "https://huggingface.co/mlc-ai/SmolLM2-135M-Instruct-q0f32-MLC", + model_id: "SmolLM2-135M-Instruct-q0f32-MLC", model_lib: modelLibURLPrefix + modelVersion + - "/SmolLM-135M-Instruct-q4f32_1-ctx2k_cs1k-webgpu.wasm", - vram_required_MB: 196.54, + "/SmolLM2-135M-Instruct-q0f32-ctx4k_cs1k-webgpu.wasm", + vram_required_MB: 719.38, low_resource_required: true, overrides: { - context_window_size: 2048, + context_window_size: 4096, }, }, // Gemma2