doc: clarify that --quantize is not needed for pre-quantized models (…

…#2536)
huggingface · Sep 19, 2024 · abd24dd · abd24dd
1 parent c103760
commit abd24dd
Show file tree

Hide file tree

Showing 3 changed files with 9 additions and 2 deletions.
diff --git a/docs/source/reference/launcher.md b/docs/source/reference/launcher.md
@@ -55,7 +55,9 @@ Options:
 ## QUANTIZE
 ```shell
       --quantize <QUANTIZE>
-          Whether you want the model to be quantized
+          Quantization method to use for the model. It is not necessary to specify this option for pre-quantized models, since the quantization method is read from the model configuration.
+
+          Marlin kernels will be used automatically for GPTQ/AWQ models.
 
           [env: QUANTIZE=]
 

diff --git a/flake.nix b/flake.nix
@@ -157,6 +157,7 @@
                 pyright
                 pytest
                 pytest-asyncio
+                redocly
                 ruff
                 syrupy
               ]);

diff --git a/launcher/src/main.rs b/launcher/src/main.rs
@@ -367,7 +367,11 @@ struct Args {
     #[clap(long, env)]
     num_shard: Option<usize>,
 
-    /// Whether you want the model to be quantized.
+    /// Quantization method to use for the model. It is not necessary to specify this option
+    /// for pre-quantized models, since the quantization method is read from the model
+    /// configuration.
+    ///
+    /// Marlin kernels will be used automatically for GPTQ/AWQ models.
     #[clap(long, env, value_enum)]
     quantize: Option<Quantization>,
-Original file line number
+Diff line change
@@ Expand Up / @@ -157,6 +157,7 @@ @@
                     pyright
                     pytest
                     pytest-asyncio
+                    redocly
                     ruff
                     syrupy
                   ]);
@@ Expand Down @@