@@ -29,8 +29,12 @@ struct Args {
2929 quantize : bool ,
3030 #[ clap( default_value = "128" , long, env) ]
3131 max_concurrent_requests : usize ,
32+ #[ clap( default_value = "4" , long, env) ]
33+ max_stop_sequences : usize ,
3234 #[ clap( default_value = "1000" , long, env) ]
3335 max_input_length : usize ,
36+ #[ clap( default_value = "1512" , long, env) ]
37+ max_total_tokens : usize ,
3438 #[ clap( default_value = "32" , long, env) ]
3539 max_batch_size : usize ,
3640 #[ clap( default_value = "20" , long, env) ]
@@ -79,7 +83,9 @@ fn main() -> ExitCode {
7983 num_shard,
8084 quantize,
8185 max_concurrent_requests,
86+ max_stop_sequences,
8287 max_input_length,
88+ max_total_tokens,
8389 max_batch_size,
8490 max_waiting_tokens,
8591 port,
@@ -299,8 +305,12 @@ fn main() -> ExitCode {
299305 "text-generation-router" . to_string( ) ,
300306 "--max-concurrent-requests" . to_string( ) ,
301307 max_concurrent_requests. to_string( ) ,
308+ "--max-stop-sequences" . to_string( ) ,
309+ max_stop_sequences. to_string( ) ,
302310 "--max-input-length" . to_string( ) ,
303311 max_input_length. to_string( ) ,
312+ "--max-total-tokens" . to_string( ) ,
313+ max_total_tokens. to_string( ) ,
304314 "--max-batch-size" . to_string( ) ,
305315 max_batch_size. to_string( ) ,
306316 "--max-waiting-tokens" . to_string( ) ,
0 commit comments