Skip to content

Commit 6298aff

Browse files
committed
refactor: standardize Prometheus metric naming conventions (part 1)
Updated metric names to use consistent _total suffix for gauge metrics: - inflight_request_total -> inflight_requests_total - Added client_disconnects_total to prometheus_names.rs - Updated documentation and test comments to reflect new naming This is one of many PRs to address the naming convention. Only the Prometheus external names are updated for this PR. Not addressed for this PR: NATS naming (in the next PR), and variable name changes (another PR). Signed-off-by: Keiven Chang <[email protected]>
1 parent 2bb1b26 commit 6298aff

File tree

7 files changed

+16
-12
lines changed

7 files changed

+16
-12
lines changed

deploy/metrics/README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ As of Q2 2025, Dynamo HTTP Frontend metrics are exposed when you build container
4040

4141
The core Dynamo backend system automatically exposes metrics with the `dynamo_component_*` prefix for all components that use the `DistributedRuntime` framework:
4242

43-
- `dynamo_component_inflight_requests`: Requests currently being processed (gauge)
43+
- `dynamo_component_inflight_requests_total`: Requests currently being processed (gauge)
4444
- `dynamo_component_request_bytes_total`: Total bytes received in requests (counter)
4545
- `dynamo_component_request_duration_seconds`: Request processing time (histogram)
4646
- `dynamo_component_requests_total`: Total requests processed (counter)

lib/llm/src/http/service/metrics.rs

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -117,7 +117,8 @@ impl Metrics {
117117
///
118118
/// The following metrics will be created with the configured prefix:
119119
/// - `{prefix}_requests_total` - IntCounterVec for the total number of requests processed
120-
/// - `{prefix}_inflight_requests` - IntGaugeVec for the number of inflight requests
120+
/// - `{prefix}_inflight_requests_total` - IntGaugeVec for the number of inflight requests
121+
/// - `{prefix}_client_disconnects_total` - IntGauge for the number of connections dropped by clients
121122
/// - `{prefix}_request_duration_seconds` - HistogramVec for the duration of requests
122123
/// - `{prefix}_input_sequence_tokens` - HistogramVec for input sequence length in tokens
123124
/// - `{prefix}_output_sequence_tokens` - HistogramVec for output sequence length in tokens
@@ -156,7 +157,7 @@ impl Metrics {
156157
.unwrap();
157158

158159
let client_disconnect_gauge = prometheus::IntGauge::new(
159-
frontend_metric_name("client_disconnects"),
160+
frontend_metric_name(frontend_service::CLIENT_DISCONNECTS_TOTAL),
160161
"Number of connections dropped by clients",
161162
)
162163
.unwrap();

lib/runtime/examples/system_metrics/README.md

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,7 @@ The `dynamo_component_errors_total` metric includes the following error types:
6262
- `dynamo_component_request_duration_seconds` - Request processing time
6363

6464
### Gauges
65-
- `dynamo_component_inflight_requests` - Number of requests currently being processed
65+
- `dynamo_component_inflight_requests_total` - Number of requests currently being processed
6666

6767
### Custom Metrics (Optional)
6868
- `dynamo_component_bytes_processed_total` - Total data bytes processed by system handler (example)
@@ -80,9 +80,9 @@ These labels are prefixed with "dynamo_" to avoid collisions with Kubernetes and
8080
When the system is running, you'll see metrics from http://<ip>:<port>/metrics like this:
8181

8282
```prometheus
83-
# HELP dynamo_component_inflight_requests Number of requests currently being processed by component endpoint
84-
# TYPE dynamo_component_inflight_requests gauge
85-
dynamo_component_inflight_requests{dynamo_component="example_component",dynamo_endpoint="example_endpoint9881",dynamo_namespace="example_namespace"} 0
83+
# HELP dynamo_component_inflight_requests_total Number of requests currently being processed by component endpoint
84+
# TYPE dynamo_component_inflight_requests_total gauge
85+
dynamo_component_inflight_requests_total{dynamo_component="example_component",dynamo_endpoint="example_endpoint9881",dynamo_namespace="example_namespace"} 0
8686
8787
# HELP dynamo_component_bytes_processed_total Example of a custom metric. Total number of data bytes processed by system handler
8888
# TYPE dynamo_component_bytes_processed_total counter

lib/runtime/examples/system_metrics/tests/integration_test.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -106,7 +106,7 @@ async fn test_backend_with_metrics() -> Result<()> {
106106
println!("{}", metrics_content);
107107
println!("=== END METRICS CONTENT ===");
108108

109-
// Parse and verify ingress metrics are greater than 0 (except inflight_requests)
109+
// Parse and verify ingress metrics are greater than 0 (except inflight_requests_total)
110110
verify_ingress_metrics_greater_than_0(&metrics_content);
111111

112112
println!("Successfully retrieved and verified metrics!");
@@ -131,7 +131,7 @@ async fn test_backend_with_metrics() -> Result<()> {
131131
}
132132

133133
fn verify_ingress_metrics_greater_than_0(metrics_content: &str) {
134-
// Define the work handler metrics we want to verify (excluding inflight_requests which can be 0)
134+
// Define the work handler metrics we want to verify (excluding inflight_requests_total which can be 0)
135135
let metrics_to_verify = [
136136
"my_custom_bytes_processed_total",
137137
"requests_total",

lib/runtime/src/metrics.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1619,7 +1619,7 @@ mod test_metricsregistry_nats {
16191619
23000.0,
16201620
), // ~75-125% of 20660
16211621
(
1622-
build_component_metric_name(work_handler::INFLIGHT_REQUESTS),
1622+
build_component_metric_name(work_handler::INFLIGHT_REQUESTS_TOTAL),
16231623
0.0,
16241624
1.0,
16251625
), // 0 or very low

lib/runtime/src/metrics/prometheus_names.rs

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,9 @@ pub mod frontend_service {
7979
/// Number of inflight requests going to the engine (vLLM, SGLang, ...)
8080
pub const INFLIGHT_REQUESTS_TOTAL: &str = "inflight_requests_total";
8181

82+
/// Number of connections dropped by clients
83+
pub const CLIENT_DISCONNECTS_TOTAL: &str = "client_disconnects_total";
84+
8285
/// Duration of LLM requests
8386
pub const REQUEST_DURATION_SECONDS: &str = "request_duration_seconds";
8487

@@ -125,7 +128,7 @@ pub mod work_handler {
125128
pub const RESPONSE_BYTES_TOTAL: &str = "response_bytes_total";
126129

127130
/// Number of requests currently being processed by work handler
128-
pub const INFLIGHT_REQUESTS: &str = "inflight_requests";
131+
pub const INFLIGHT_REQUESTS_TOTAL: &str = "inflight_requests_total";
129132

130133
/// Time spent processing requests by work handler (histogram)
131134
pub const REQUEST_DURATION_SECONDS: &str = "request_duration_seconds";

lib/runtime/src/pipeline/network/ingress/push_handler.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,7 @@ impl WorkHandlerMetrics {
6161
)?;
6262

6363
let inflight_requests = endpoint.create_intgauge(
64-
work_handler::INFLIGHT_REQUESTS,
64+
work_handler::INFLIGHT_REQUESTS_TOTAL,
6565
"Number of requests currently being processed by work handler",
6666
metrics_labels,
6767
)?;

0 commit comments

Comments
 (0)