Skip to content

Commit

Permalink
fix: Telemetry (#2957)
Browse files Browse the repository at this point in the history
* fix: add telemetry regular pings and fix unhandled errors avoid not sending telemetry stop events.

* fix: simplify error handling

* fix: update ping delay and update doc.

* fix: clippy

* doc: Rephrase properly.
  • Loading branch information
Hugoch authored Jan 28, 2025
1 parent db922eb commit c690da5
Show file tree
Hide file tree
Showing 3 changed files with 40 additions and 12 deletions.
2 changes: 1 addition & 1 deletion docs/source/usage_statistics.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

Text Generation Inference collects anonymous usage statistics to help us improve the service. The collected data is used to improve TGI and to understand what causes failures. The data is collected transparently and any sensitive information is omitted.

Data is sent twice, once on server startup and once when server stops. Also, usage statistics are only enabled when TGI is running in docker to avoid collecting data then TGI runs directly on the host machine.
Usage statistics are collected only when TGI is running in a Docker container. This prevents data collection when TGI is run directly on the host machine. The collected data includes startup and shutdown events, as well as a heartbeat signal sent every 15 minutes.

## What data is collected

Expand Down
47 changes: 37 additions & 10 deletions router/src/server.rs
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,9 @@ use std::fs::File;
use std::io::BufReader;
use std::net::{IpAddr, Ipv4Addr, SocketAddr};
use std::path::{Path, PathBuf};
use std::sync::atomic::{AtomicBool, Ordering};
use std::sync::Arc;
use std::time::Duration;
use thiserror::Error;
use tokio::select;
use tokio::signal;
Expand Down Expand Up @@ -1819,9 +1822,9 @@ pub async fn run(
HubTokenizerConfig::default()
});

let tokenizer: Tokenizer = {
let tokenizer: Result<Tokenizer, WebServerError> = {
use pyo3::prelude::*;
pyo3::Python::with_gil(|py| -> PyResult<()> {
Python::with_gil(|py| -> PyResult<()> {
py_resolve_tokenizer(py, &tokenizer_name, revision.as_deref(), trust_remote_code)?;
Ok(())
})
Expand All @@ -1832,16 +1835,16 @@ pub async fn run(
let out = legacy_tokenizer_handle(config_filename.as_ref());
out.ok_or(err)
})
.expect("We cannot load a tokenizer");
.map_err(|_| WebServerError::Tokenizer("Unable to load tokenizer.".to_string()))?;
let filename = "out/tokenizer.json";
if let Ok(tok) = tokenizers::Tokenizer::from_file(filename) {
Tokenizer::Rust(tok)
Ok(Tokenizer::Rust(tok))
} else {
Tokenizer::Python {
Ok(Tokenizer::Python {
tokenizer_name: tokenizer_name.clone(),
revision: revision.clone(),
trust_remote_code,
}
})
}
};

Expand Down Expand Up @@ -1901,11 +1904,27 @@ pub async fn run(
_ => None,
};

if let Some(ref ua) = user_agent {
let stop_usage_thread = Arc::new(AtomicBool::new(false));
let stop_usage_thread_clone = stop_usage_thread.clone();
if let Some(ua) = user_agent.clone() {
let start_event =
usage_stats::UsageStatsEvent::new(ua.clone(), usage_stats::EventType::Start, None);
tokio::spawn(async move {
// send start event
start_event.send().await;
let mut last_report = Instant::now();
while !stop_usage_thread_clone.load(Ordering::Relaxed) {
if last_report.elapsed() > Duration::from_secs(900) {
let report_event = usage_stats::UsageStatsEvent::new(
ua.clone(),
usage_stats::EventType::Ping,
None,
);
report_event.send().await;
last_report = Instant::now();
}
tokio::time::sleep(Duration::from_secs(1)).await;
}
});
};
let compat_return_full_text = match &model_info.pipeline_tag {
Expand All @@ -1926,7 +1945,7 @@ pub async fn run(
validation_workers,
api_key,
config,
(tokenizer, tokenizer_config),
(tokenizer?, tokenizer_config),
(preprocessor_config, processor_config),
hostname,
port,
Expand All @@ -1943,6 +1962,7 @@ pub async fn run(
.await;

if let Some(ua) = user_agent {
stop_usage_thread.store(true, Ordering::Relaxed);
match result {
Ok(_) => {
let stop_event = usage_stats::UsageStatsEvent::new(
Expand Down Expand Up @@ -2419,8 +2439,13 @@ async fn start(
}
} else {
// Run server

let listener = tokio::net::TcpListener::bind(&addr).await.unwrap();
let listener = match tokio::net::TcpListener::bind(&addr).await {
Ok(listener) => listener,
Err(e) => {
tracing::error!("Failed to bind to {addr}: {e}");
return Err(WebServerError::Axum(Box::new(e)));
}
};
axum::serve(listener, app)
.with_graceful_shutdown(shutdown_signal())
.await
Expand Down Expand Up @@ -2535,4 +2560,6 @@ impl From<InferError> for Event {
pub enum WebServerError {
#[error("Axum error: {0}")]
Axum(#[from] axum::BoxError),
#[error("Tokenizer error: {0}")]
Tokenizer(String),
}
3 changes: 2 additions & 1 deletion router/src/usage_stats.rs
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ pub enum EventType {
Start,
Stop,
Error,
Ping,
}

#[derive(Debug, Serialize)]
Expand Down Expand Up @@ -70,7 +71,7 @@ impl UsageStatsEvent {
.post(TELEMETRY_URL)
.headers(headers)
.body(body)
.timeout(Duration::from_secs(5))
.timeout(Duration::from_secs(10))
.send()
.await;
}
Expand Down

0 comments on commit c690da5

Please sign in to comment.