Skip to content

Commit 7a707c7

Browse files
committed
refactor: make Prometheus metric naming conventions more consistent
- Rename connections_total to current_connections (gauge for active connections) - Rename client_disconnects_total to disconnected_clients_total (better ordering) - Rename PROCESSING_TIME_MS_TOTAL to PROCESSING_MS_TOTAL (more concise) - Apply unit_aggregation pattern: AVG_PROCESSING_MS -> PROCESSING_MS_AVG - Sync ComponentNatsServerPrometheusMetrics variable names with metric constants - Update documentation with comprehensive naming transformation rules - Add units _messages and _connections to naming conventions - Update all code references, documentation, and test comments consistently These changes follow Prometheus best practices by distinguishing gauge vs counter metrics and using consistent {unit}_{aggregation} naming patterns. Signed-off-by: Keiven Chang <[email protected]>
1 parent 2d39f1b commit 7a707c7

File tree

7 files changed

+85
-67
lines changed

7 files changed

+85
-67
lines changed

deploy/metrics/README.md

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,7 @@ Some components expose additional metrics specific to their functionality:
7070

7171
When using Dynamo HTTP Frontend (`--framework VLLM` or `--framework TRTLLM`), these metrics are automatically exposed with the `dynamo_frontend_*` prefix and include `model` labels containing the model name:
7272

73-
- `dynamo_frontend_inflight_requests_total`: Inflight requests (gauge)
73+
- `dynamo_frontend_inflight_requests`: Inflight requests (gauge)
7474
- `dynamo_frontend_queued_requests_total`: Number of requests in HTTP processing queue (gauge)
7575
- `dynamo_frontend_input_sequence_tokens`: Input sequence length (histogram)
7676
- `dynamo_frontend_inter_token_latency_seconds`: Inter-token latency (histogram)
@@ -79,7 +79,7 @@ When using Dynamo HTTP Frontend (`--framework VLLM` or `--framework TRTLLM`), th
7979
- `dynamo_frontend_requests_total`: Total LLM requests (counter)
8080
- `dynamo_frontend_time_to_first_token_seconds`: Time to first token (histogram)
8181

82-
**Note**: The `dynamo_frontend_inflight_requests_total` metric tracks requests from HTTP handler start until the complete response is finished, while `dynamo_frontend_queued_requests_total` tracks requests from HTTP handler start until first token generation begins (including prefill time). HTTP queue time is a subset of inflight time.
82+
**Note**: The `dynamo_frontend_inflight_requests` metric tracks requests from HTTP handler start until the complete response is finished, while `dynamo_frontend_queued_requests_total` tracks requests from HTTP handler start until first token generation begins (including prefill time). HTTP queue time is a subset of inflight time.
8383

8484
#### Request Processing Flow
8585

@@ -125,10 +125,10 @@ Try launching a frontend and a Mocker backend that allows 3 concurrent requests:
125125
$ python -m dynamo.frontend --http-port 8000
126126
$ python -m dynamo.mocker --model-path Qwen/Qwen3-0.6B --max-num-seqs 3
127127
# Launch your 10 concurrent clients here
128-
# Then check the queued_requests_total and inflight_requests_total metrics from the frontend:
128+
# Then check the queued_requests_total and inflight_requests metrics from the frontend:
129129
$ curl -s localhost:8000/metrics|grep -v '^#'|grep -E 'queue|inflight'
130130
dynamo_frontend_queued_requests_total{model="qwen/qwen3-0.6b"} 7
131-
dynamo_frontend_inflight_requests_total{model="qwen/qwen3-0.6b"} 10
131+
dynamo_frontend_inflight_requests{model="qwen/qwen3-0.6b"} 10
132132
```
133133

134134
**Real setup using vLLM (instead of Mocker):**
@@ -271,8 +271,8 @@ let component = namespace.component("my_component")?;
271271
let endpoint = component.endpoint("my_endpoint")?;
272272

273273
// Create endpoint-level counters (this is a Prometheus Counter type)
274-
let total_requests = endpoint.create_counter(
275-
"total_requests",
274+
let requests_total = endpoint.create_counter(
275+
"requests_total",
276276
"Total requests across all namespaces",
277277
&[]
278278
)?;
@@ -449,8 +449,8 @@ let latency = endpoint.create_histogram(
449449

450450
```rust
451451
// Aggregate metrics across multiple endpoints
452-
let total_requests = namespace.create_counter(
453-
"total_requests",
452+
let requests_total = namespace.create_counter(
453+
"requests_total",
454454
"Total requests across all endpoints",
455455
&[]
456456
)?;

lib/llm/src/http/service/metrics.rs

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -117,7 +117,8 @@ impl Metrics {
117117
///
118118
/// The following metrics will be created with the configured prefix:
119119
/// - `{prefix}_requests_total` - IntCounterVec for the total number of requests processed
120-
/// - `{prefix}_inflight_requests` - IntGaugeVec for the number of inflight requests
120+
/// - `{prefix}_inflight_requests` - IntGaugeVec for the number of inflight/concurrent requests
121+
/// - `{prefix}_disconnected_clients_total` - IntGauge for the number of connections dropped by clients
121122
/// - `{prefix}_request_duration_seconds` - HistogramVec for the duration of requests
122123
/// - `{prefix}_input_sequence_tokens` - HistogramVec for input sequence length in tokens
123124
/// - `{prefix}_output_sequence_tokens` - HistogramVec for output sequence length in tokens
@@ -148,15 +149,15 @@ impl Metrics {
148149

149150
let inflight_gauge = IntGaugeVec::new(
150151
Opts::new(
151-
frontend_metric_name(frontend_service::INFLIGHT_REQUESTS_TOTAL),
152+
frontend_metric_name(frontend_service::INFLIGHT_REQUESTS),
152153
"Number of inflight requests",
153154
),
154155
&["model"],
155156
)
156157
.unwrap();
157158

158159
let client_disconnect_gauge = prometheus::IntGauge::new(
159-
frontend_metric_name("client_disconnects"),
160+
frontend_metric_name(frontend_service::DISCONNECTED_CLIENTS_TOTAL),
160161
"Number of connections dropped by clients",
161162
)
162163
.unwrap();

lib/runtime/examples/system_metrics/tests/integration_test.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -106,7 +106,7 @@ async fn test_backend_with_metrics() -> Result<()> {
106106
println!("{}", metrics_content);
107107
println!("=== END METRICS CONTENT ===");
108108

109-
// Parse and verify ingress metrics are greater than 0 (except inflight_requests)
109+
// Parse and verify ingress metrics are greater than 0 (except inflight_requests_total)
110110
verify_ingress_metrics_greater_than_0(&metrics_content);
111111

112112
println!("Successfully retrieved and verified metrics!");
@@ -131,7 +131,7 @@ async fn test_backend_with_metrics() -> Result<()> {
131131
}
132132

133133
fn verify_ingress_metrics_greater_than_0(metrics_content: &str) {
134-
// Define the work handler metrics we want to verify (excluding inflight_requests which can be 0)
134+
// Define the work handler metrics we want to verify (excluding inflight_requests_total which can be 0)
135135
let metrics_to_verify = [
136136
"my_custom_bytes_processed_total",
137137
"requests_total",

lib/runtime/src/metrics.rs

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1176,8 +1176,8 @@ dynamo_component_nats_client_connection_state 1
11761176
# TYPE dynamo_component_latency histogram
11771177
dynamo_component_latency_bucket{le="0.1"} 10
11781178
dynamo_component_latency_bucket{le="0.5"} 25
1179-
dynamo_component_nats_service_total_requests 100
1180-
dynamo_component_nats_service_total_errors 5"#;
1179+
dynamo_component_nats_service_requests_total 100
1180+
dynamo_component_nats_service_errors_total 5"#;
11811181

11821182
// Test remove_nats_lines (excludes NATS lines but keeps help/type)
11831183
let filtered_out = super::test_helpers::remove_nats_lines(test_input);
@@ -1421,7 +1421,7 @@ mod test_metricsregistry_nats {
14211421
1.0,
14221422
1.0,
14231423
), // Should be connected
1424-
(build_component_metric_name(nats_client::CONNECTS), 1.0, 1.0), // Should have 1 connection
1424+
(build_component_metric_name(nats_client::CURRENT_CONNECTIONS), 1.0, 1.0), // Should have 1 connection
14251425
(
14261426
build_component_metric_name(nats_client::IN_TOTAL_BYTES),
14271427
800.0,
@@ -1449,17 +1449,17 @@ mod test_metricsregistry_nats {
14491449
0.0,
14501450
), // No processing yet
14511451
(
1452-
build_component_metric_name(nats_service::TOTAL_ERRORS),
1452+
build_component_metric_name(nats_service::ERRORS_TOTAL),
14531453
0.0,
14541454
0.0,
14551455
), // No errors yet
14561456
(
1457-
build_component_metric_name(nats_service::TOTAL_REQUESTS),
1457+
build_component_metric_name(nats_service::REQUESTS_TOTAL),
14581458
0.0,
14591459
0.0,
14601460
), // No requests yet
14611461
(
1462-
build_component_metric_name(nats_service::TOTAL_PROCESSING_MS),
1462+
build_component_metric_name(nats_service::PROCESSING_MS_TOTAL),
14631463
0.0,
14641464
0.0,
14651465
), // No processing yet
@@ -1550,7 +1550,7 @@ mod test_metricsregistry_nats {
15501550
1.0,
15511551
1.0,
15521552
), // Connected
1553-
(build_component_metric_name(nats_client::CONNECTS), 1.0, 1.0), // 1 connection
1553+
(build_component_metric_name(nats_client::CURRENT_CONNECTIONS), 1.0, 1.0), // 1 connection
15541554
(
15551555
build_component_metric_name(nats_client::IN_TOTAL_BYTES),
15561556
20000.0,
@@ -1578,17 +1578,17 @@ mod test_metricsregistry_nats {
15781578
1.0,
15791579
), // Low processing time
15801580
(
1581-
build_component_metric_name(nats_service::TOTAL_ERRORS),
1581+
build_component_metric_name(nats_service::ERRORS_TOTAL),
15821582
0.0,
15831583
0.0,
15841584
), // No errors
15851585
(
1586-
build_component_metric_name(nats_service::TOTAL_REQUESTS),
1586+
build_component_metric_name(nats_service::REQUESTS_TOTAL),
15871587
0.0,
15881588
0.0,
15891589
), // No work handler requests
15901590
(
1591-
build_component_metric_name(nats_service::TOTAL_PROCESSING_MS),
1591+
build_component_metric_name(nats_service::PROCESSING_MS_TOTAL),
15921592
0.0,
15931593
5.0,
15941594
), // Low total processing time

lib/runtime/src/metrics/prometheus_names.rs

Lines changed: 34 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -13,26 +13,37 @@
1313
//! **Prefix**: Component identifier (`dynamo_component_`, `dynamo_frontend_`, etc.)
1414
//! **Name**: Descriptive snake_case name indicating what is measured
1515
//! **Suffix**:
16-
//! - Units: `_seconds`, `_bytes`, `_ms`, `_percent`
16+
//! - Units: `_seconds`, `_bytes`, `_ms`, `_percent`, `_messages`, `_connections`
1717
//! - Counters: `_total` (not `total_` prefix)
1818
//! - Note: Do not use `_counter`, `_gauge`, `_time`, or `_size` in Prometheus names (too vague)
1919
//!
2020
//! **Common Transformations**:
2121
//! - ❌ `_counter` → ✅ `_total`
22+
//! - ❌ `_sum` → ✅ `_total`
23+
//! - ❌ `_gauge` → ✅ (no suffix needed for current values)
2224
//! - ❌ `_time` → ✅ `_seconds`, `_ms`, `_hours`, `_duration_seconds`
25+
//! - ❌ `_time_total` → ✅ `_seconds_total`, `_ms_total`, `_hours_total`
26+
//! - ❌ `_total_time` → ✅ `_seconds_total`, `_ms_total`, `_hours_total`
27+
//! - ❌ `_total_time_seconds` → ✅ `_seconds_total`
28+
//! - ❌ `_average_time` → ✅ `_seconds_avg`, `_ms_avg`
2329
//! - ❌ `_size` → ✅ `_bytes`, `_total`, `_length`
24-
//! - ❌ `_gauge` → ✅ (no suffix needed for current values)
30+
//! - ❌ `_some_request_size` → ✅ `_some_request_bytes_avg`
2531
//! - ❌ `_rate` → ✅ `_per_second`, `_per_minute`
32+
//! - ❌ `client_disconnects_total` → ✅ `disconnected_clients_total`
33+
//! - ❌ `inflight_requests_total` → ✅ `inflight_requests` (gauge, not counter)
34+
//! - ❌ `connections_total` → ✅ `current_connections` (gauge, not counter)
2635
//!
2736
//! **Examples**:
2837
//! - ✅ `dynamo_frontend_requests_total` - Total request counter (not `incoming_requests`)
2938
//! - ✅ `dynamo_frontend_request_duration_seconds` - Request duration histogram (not `response_time`)
3039
//! - ✅ `dynamo_component_errors_total` - Total error counter (not `total_errors`)
3140
//! - ✅ `dynamo_component_memory_usage_bytes` - Memory usage gauge
32-
//! - ✅ `dynamo_frontend_inflight_requests_total` - Current inflight requests gauge
41+
//! - ✅ `dynamo_frontend_inflight_requests` - Current inflight requests gauge
3342
//! - ✅ `nats_client_connection_duration_ms` - Connection time in milliseconds
3443
//! - ✅ `dynamo_component_cpu_usage_percent` - CPU usage percentage
3544
//! - ✅ `dynamo_frontend_tokens_per_second` - Token generation rate
45+
//! - ✅ `nats_client_current_connections` - Current active connections gauge
46+
//! - ✅ `nats_client_in_messages` - Total messages received counter
3647
//!
3748
//! ## Key Differences: Prometheus Metric Names vs Prometheus Label Names
3849
//!
@@ -76,8 +87,12 @@ pub mod frontend_service {
7687
/// Number of requests waiting in HTTP queue before receiving the first response.
7788
pub const QUEUED_REQUESTS_TOTAL: &str = "queued_requests_total";
7889

79-
/// Number of inflight requests going to the engine (vLLM, SGLang, ...)
80-
pub const INFLIGHT_REQUESTS_TOTAL: &str = "inflight_requests_total";
90+
/// Number of inflight/concurrent requests going to the engine (vLLM, SGLang, ...)
91+
/// Note: Gauge metric measuring current requests, not cumulative total
92+
pub const INFLIGHT_REQUESTS: &str = "inflight_requests";
93+
94+
/// Number of connections dropped by clients
95+
pub const DISCONNECTED_CLIENTS_TOTAL: &str = "disconnected_clients_total";
8196

8297
/// Duration of LLM requests
8398
pub const REQUEST_DURATION_SECONDS: &str = "request_duration_seconds";
@@ -124,7 +139,8 @@ pub mod work_handler {
124139
/// Total number of bytes sent in responses by work handler
125140
pub const RESPONSE_BYTES_TOTAL: &str = "response_bytes_total";
126141

127-
/// Number of requests currently being processed by work handler
142+
/// Number of inflight/concurrent requests currently being processed by work handler
143+
/// Note: Gauge metric measuring current requests, not cumulative total
128144
pub const INFLIGHT_REQUESTS: &str = "inflight_requests";
129145

130146
/// Time spent processing requests by work handler (histogram)
@@ -182,8 +198,9 @@ pub mod nats_client {
182198
/// Total number of messages sent by NATS client
183199
pub const OUT_MESSAGES: &str = nats_client_name!("out_messages");
184200

185-
/// Total number of connections established by NATS client
186-
pub const CONNECTS: &str = nats_client_name!("connects");
201+
/// Current number of active connections for NATS client
202+
/// Note: Gauge metric measuring current connections, not cumulative total
203+
pub const CURRENT_CONNECTIONS: &str = nats_client_name!("current_connections");
187204

188205
/// Current connection state of NATS client (0=disconnected, 1=connected, 2=reconnecting)
189206
pub const CONNECTION_STATE: &str = nats_client_name!("connection_state");
@@ -202,16 +219,16 @@ pub mod nats_service {
202219
pub const PREFIX: &str = nats_service_name!("");
203220

204221
/// Average processing time in milliseconds (maps to: average_processing_time in ms)
205-
pub const AVG_PROCESSING_MS: &str = nats_service_name!("avg_processing_time_ms");
222+
pub const PROCESSING_MS_AVG: &str = nats_service_name!("processing_ms_avg");
206223

207224
/// Total errors across all endpoints (maps to: num_errors)
208-
pub const TOTAL_ERRORS: &str = nats_service_name!("total_errors");
225+
pub const ERRORS_TOTAL: &str = nats_service_name!("errors_total");
209226

210227
/// Total requests across all endpoints (maps to: num_requests)
211-
pub const TOTAL_REQUESTS: &str = nats_service_name!("total_requests");
228+
pub const REQUESTS_TOTAL: &str = nats_service_name!("requests_total");
212229

213230
/// Total processing time in milliseconds (maps to: processing_time in ms)
214-
pub const TOTAL_PROCESSING_MS: &str = nats_service_name!("total_processing_time_ms");
231+
pub const PROCESSING_MS_TOTAL: &str = nats_service_name!("processing_ms_total");
215232

216233
/// Number of active services (derived from ServiceSet.services)
217234
pub const ACTIVE_SERVICES: &str = nats_service_name!("active_services");
@@ -223,7 +240,7 @@ pub mod nats_service {
223240
/// All NATS client Prometheus metric names as an array for iteration/validation
224241
pub const DRT_NATS_METRICS: &[&str] = &[
225242
nats_client::CONNECTION_STATE,
226-
nats_client::CONNECTS,
243+
nats_client::CURRENT_CONNECTIONS,
227244
nats_client::IN_TOTAL_BYTES,
228245
nats_client::IN_MESSAGES,
229246
nats_client::OUT_OVERHEAD_BYTES,
@@ -233,10 +250,10 @@ pub const DRT_NATS_METRICS: &[&str] = &[
233250
/// All component service Prometheus metric names as an array for iteration/validation
234251
/// (ordered to match NatsStatsMetrics fields)
235252
pub const COMPONENT_NATS_METRICS: &[&str] = &[
236-
nats_service::AVG_PROCESSING_MS, // maps to: average_processing_time (nanoseconds)
237-
nats_service::TOTAL_ERRORS, // maps to: num_errors
238-
nats_service::TOTAL_REQUESTS, // maps to: num_requests
239-
nats_service::TOTAL_PROCESSING_MS, // maps to: processing_time (nanoseconds)
253+
nats_service::PROCESSING_MS_AVG, // maps to: average_processing_time (nanoseconds)
254+
nats_service::ERRORS_TOTAL, // maps to: num_errors
255+
nats_service::REQUESTS_TOTAL, // maps to: num_requests
256+
nats_service::PROCESSING_MS_TOTAL, // maps to: processing_time (nanoseconds)
240257
nats_service::ACTIVE_SERVICES, // derived from ServiceSet.services
241258
nats_service::ACTIVE_ENDPOINTS, // derived from ServiceInfo.endpoints
242259
];

0 commit comments

Comments
 (0)