From fb81c0d1c479d39454b397cd2a254eb77760bfed Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adrien=20Gallou=C3=ABt?= Date: Thu, 6 Feb 2025 10:53:57 +0100 Subject: [PATCH] Thanks clippy MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Adrien Gallouët --- backends/llamacpp/src/backend.rs | 28 ++++++++++++---------------- backends/llamacpp/src/main.rs | 12 ++++++------ 2 files changed, 18 insertions(+), 22 deletions(-) diff --git a/backends/llamacpp/src/backend.rs b/backends/llamacpp/src/backend.rs index bed7d2bde7a..5d5eab43cb7 100644 --- a/backends/llamacpp/src/backend.rs +++ b/backends/llamacpp/src/backend.rs @@ -38,7 +38,7 @@ impl FromStr for LlamacppSplitMode { "row" => Ok(LlamacppSplitMode::Row), _ => match s.parse::() { Ok(n) => Ok(LlamacppSplitMode::GPU(n)), - Err(_) => Err(format!("Choose a GPU number or `layer` or `row`")), + Err(_) => Err("Choose a GPU number or `layer` or `row`".to_string()), } } } @@ -176,8 +176,7 @@ impl LlamacppRequest { from: &ValidGenerateRequest, tx: UnboundedSender>, ) -> Option{ - if let Some(input_ids) = from.input_ids.as_ref() { - Some(LlamacppRequest { + from.input_ids.as_ref().map(|input_ids| LlamacppRequest { input_ids: input_ids.iter().map(|&x| x as i32).collect(), top_k: from.parameters.top_k as _, top_p: from.parameters.top_p as _, @@ -190,12 +189,9 @@ impl LlamacppRequest { penalty_freq: from.parameters.frequency_penalty as _, penalty_present: 0.0, // disabled max_new_tokens: from.stopping_parameters.max_new_tokens as _, - tx: tx, + tx, time: Instant::now(), }) - } else { - None - } } } @@ -404,7 +400,7 @@ impl LlamacppSampler { for (token, logprob) in llamacpp.logprobs.iter_mut().enumerate() { *logprob = llamacpp::llama_token_data { id: token as _, - logit: unsafe { *logits.offset(token as _) }, + logit: unsafe { *logits.add(token) }, p: 0.0, }; } @@ -484,7 +480,7 @@ impl LlamacppBackend { Ok(Some(request)) => { let n_tokens_to_add = request.input_ids.len(); - if n_tokens + n_tokens_to_add > conf.max_batch_total_tokens as usize { + if n_tokens + n_tokens_to_add > conf.max_batch_total_tokens { flush(&mut requests, &mut n_tokens); } n_tokens += n_tokens_to_add; @@ -511,7 +507,7 @@ impl LlamacppBackend { let _ = status_tx.send(true); while let Ok(requests) = sync_rx.recv() { - if shutdown_rx.borrow().clone() { + if *shutdown_rx.borrow() { break; } let start_time = Instant::now(); @@ -521,7 +517,7 @@ impl LlamacppBackend { for (seq_id, request) in requests.iter().enumerate() { debug!("Request: {:?}", request); // TODO remove this - let sampler = match LlamacppSampler::new(&request) { + let sampler = match LlamacppSampler::new(request) { Some(sampler) => sampler, _ => { let _ = request.tx.send(Err(InferError::IncompleteGeneration)); @@ -543,7 +539,7 @@ impl LlamacppBackend { batch_pos: llamacpp.batch.n_tokens as usize - 1, token: llamacpp::LLAMA_TOKEN_NULL, pos: last_pos as llamacpp::llama_pos + 1, - sampler: sampler, + sampler, text: String::with_capacity(1024), n_new_tokens: 0, running: true, @@ -584,8 +580,8 @@ impl LlamacppBackend { let token = Token { id: next as _, text: piece, - logprob: logprob, - special: special, + logprob, + special, }; let finish: Option = { if unsafe { llamacpp::vocab_is_eog(llamacpp.vocab, next) } { @@ -598,7 +594,7 @@ impl LlamacppBackend { }; if let Some(reason) = finish { let _ = requests[seq.id].tx.send(Ok(InferStreamResponse::End { - token: token, + token, top_tokens: vec![], generated_text: GeneratedText { text: seq.text.clone(), @@ -613,7 +609,7 @@ impl LlamacppBackend { continue; } let _ = requests[seq.id].tx.send(Ok(InferStreamResponse::Intermediate { - token: token, + token, top_tokens: vec![], })); } diff --git a/backends/llamacpp/src/main.rs b/backends/llamacpp/src/main.rs index df15189b272..762764a709f 100644 --- a/backends/llamacpp/src/main.rs +++ b/backends/llamacpp/src/main.rs @@ -215,7 +215,7 @@ async fn main() -> Result<(), RouterError> { .ok(); let params = FromPretrainedParameters { revision: args.revision.clone(), - token: token, + token, ..Default::default() }; Tokenizer::from_pretrained( @@ -227,8 +227,8 @@ async fn main() -> Result<(), RouterError> { let (backend, ok, shutdown) = LlamacppBackend::new( LlamacppConfig { model_gguf: args.model_gguf, - n_threads: n_threads, - n_threads_batch: n_threads_batch, + n_threads, + n_threads_batch, n_gpu_layers: args.n_gpu_layers, split_mode: args.split_mode, defrag_threshold: args.defrag_threshold, @@ -239,9 +239,9 @@ async fn main() -> Result<(), RouterError> { type_k: args.type_k, type_v: args.type_v, offload_kqv: args.offload_kqv, - max_batch_total_tokens: max_batch_total_tokens, - max_physical_batch_total_tokens: max_physical_batch_total_tokens, - max_batch_size: max_batch_size, + max_batch_total_tokens, + max_physical_batch_total_tokens, + max_batch_size, batch_timeout: tokio::time::Duration::from_millis(5), }, tokenizer,