exo-explore · giuseppegambino92 · Nov 12, 2024
diff --git a/exo/models.py b/exo/models.py
@@ -45,7 +45,8 @@
   "qwen-2.5-coder-3b": {"MLXDynamicShardInferenceEngine": Shard(model_id="mlx-community/Qwen2.5-Coder-3B-Instruct-4bit", start_layer=0, end_layer=0, n_layers=36),},
   "qwen-2.5-coder-7b": {"MLXDynamicShardInferenceEngine": Shard(model_id="mlx-community/Qwen2.5-Coder-7B-Instruct-4bit", start_layer=0, end_layer=0, n_layers=28),},
   "qwen-2.5-coder-14b": {"MLXDynamicShardInferenceEngine": Shard(model_id="mlx-community/Qwen2.5-Coder-14B-Instruct-4bit", start_layer=0, end_layer=0, n_layers=48),},
-  "qwen-2.5-coder-32b": {"MLXDynamicShardInferenceEngine": Shard(model_id="mlx-community/Qwen2.5-Coder-32B-Instruct-4bit", start_layer=0, end_layer=0, n_layers=64),},
+  "qwen-2.5-coder-32b-4bit": {"MLXDynamicShardInferenceEngine": Shard(model_id="mlx-community/Qwen2.5-Coder-32B-Instruct-4bit", start_layer=0, end_layer=0, n_layers=64),},
+  "qwen-2.5-coder-32b-8bit": {"MLXDynamicShardInferenceEngine": Shard(model_id="mlx-community/Qwen2.5-Coder-32B-Instruct-8bit", start_layer=0, end_layer=0, n_layers=64),},
   "qwen-2.5-7b": {"MLXDynamicShardInferenceEngine": Shard(model_id="mlx-community/Qwen2.5-7B-Instruct-4bit", start_layer=0, end_layer=0, n_layers=28),},
   "qwen-2.5-math-7b": {"MLXDynamicShardInferenceEngine": Shard(model_id="mlx-community/Qwen2.5-Math-7B-Instruct-4bit", start_layer=0, end_layer=0, n_layers=28),},
   "qwen-2.5-14b": {"MLXDynamicShardInferenceEngine": Shard(model_id="mlx-community/Qwen2.5-14B-Instruct-4bit", start_layer=0, end_layer=0, n_layers=48),},

diff --git a/exo/tinychat/index.html b/exo/tinychat/index.html
@@ -50,8 +50,9 @@
 <option value="qwen-2.5-coder-3b">Qwen 2.5 Coder 3B</option>
 <option value="qwen-2.5-coder-7b">Qwen 2.5 Coder 7B</option>
 <option value="qwen-2.5-coder-14b">Qwen 2.5 Coder 14B</option>
-<option value="qwen-2.5-coder-32b">Qwen 2.5 Coder 32B</option>
-<option value="qwen-2.5-7b">Qwen 2.5 7B</option>
+<option value="qwen-2.5-coder-32b-4bit">Qwen 2.5 Coder 32B (4-bit)</option>
+<option value="qwen-2.5-coder-32b-8bit">Qwen 2.5 Coder 32B (8-bit)</option>
+  <option value="qwen-2.5-7b">Qwen 2.5 7B</option>
 <option value="qwen-2.5-math-7b">Qwen 2.5 7B (Math)</option>
 <option value="qwen-2.5-14b">Qwen 2.5 14B</option>
 <option value="qwen-2.5-72b">Qwen 2.5 72B</option>