Skip to content

Commit 6f7f6b1

Browse files
authored
feat: Rename dynamo_component_concurrent_requests (#2515)
Signed-off-by: Tzu-Ling Kan <[email protected]>
1 parent 199b9a3 commit 6f7f6b1

File tree

7 files changed

+21
-25
lines changed

7 files changed

+21
-25
lines changed

deploy/metrics/README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ As of Q2 2025, Dynamo HTTP Frontend metrics are exposed when you build container
4040

4141
The core Dynamo backend system automatically exposes metrics with the `dynamo_component_*` prefix for all components that use the `DistributedRuntime` framework:
4242

43-
- `dynamo_component_concurrent_requests`: Requests currently being processed (gauge)
43+
- `dynamo_component_inflight_requests`: Requests currently being processed (gauge)
4444
- `dynamo_component_request_bytes_total`: Total bytes received in requests (counter)
4545
- `dynamo_component_request_duration_seconds`: Request processing time (histogram)
4646
- `dynamo_component_requests_total`: Total requests processed (counter)

lib/runtime/examples/system_metrics/README.md

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,7 @@ The `dynamo_component_errors_total` metric includes the following error types:
6262
- `dynamo_component_request_duration_seconds` - Request processing time
6363

6464
### Gauges
65-
- `dynamo_component_concurrent_requests` - Number of requests currently being processed
65+
- `dynamo_component_inflight_requests` - Number of requests currently being processed
6666

6767
### Custom Metrics (Optional)
6868
- `dynamo_component_bytes_processed_total` - Total data bytes processed by system handler (example)
@@ -80,9 +80,9 @@ These labels are prefixed with "dynamo_" to avoid collisions with Kubernetes and
8080
When the system is running, you'll see metrics from http://<ip>:<port>/metrics like this:
8181

8282
```prometheus
83-
# HELP dynamo_component_concurrent_requests Number of requests currently being processed by component endpoint
84-
# TYPE dynamo_component_concurrent_requests gauge
85-
dynamo_component_concurrent_requests{dynamo_component="example_component",dynamo_endpoint="example_endpoint9881",dynamo_namespace="example_namespace"} 0
83+
# HELP dynamo_component_inflight_requests Number of requests currently being processed by component endpoint
84+
# TYPE dynamo_component_inflight_requests gauge
85+
dynamo_component_inflight_requests{dynamo_component="example_component",dynamo_endpoint="example_endpoint9881",dynamo_namespace="example_namespace"} 0
8686
8787
# HELP dynamo_component_bytes_processed_total Example of a custom metric. Total number of data bytes processed by system handler
8888
# TYPE dynamo_component_bytes_processed_total counter

lib/runtime/examples/system_metrics/tests/integration_test.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -118,7 +118,7 @@ async fn test_backend_with_metrics() -> Result<()> {
118118
println!("{}", metrics_content);
119119
println!("=== END METRICS CONTENT ===");
120120

121-
// Parse and verify ingress metrics are greater than 0 (except concurrent_requests)
121+
// Parse and verify ingress metrics are greater than 0 (except inflight_requests)
122122
verify_ingress_metrics_greater_than_0(&metrics_content);
123123

124124
println!("Successfully retrieved and verified metrics!");
@@ -143,7 +143,7 @@ async fn test_backend_with_metrics() -> Result<()> {
143143
}
144144

145145
fn verify_ingress_metrics_greater_than_0(metrics_content: &str) {
146-
// Define the work handler metrics we want to verify (excluding concurrent_requests which can be 0)
146+
// Define the work handler metrics we want to verify (excluding inflight_requests which can be 0)
147147
let metrics_to_verify = [
148148
"my_custom_bytes_processed_total",
149149
"requests_total",

lib/runtime/src/metrics.rs

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1651,11 +1651,7 @@ mod test_metricsregistry_nats {
16511651
18000.0,
16521652
23000.0,
16531653
), // ~75-125% of 20660
1654-
(
1655-
build_metric_name(work_handler::CONCURRENT_REQUESTS),
1656-
0.0,
1657-
1.0,
1658-
), // 0 or very low
1654+
(build_metric_name(work_handler::INFLIGHT_REQUESTS), 0.0, 1.0), // 0 or very low
16591655
// Histograms have _{count,sum} suffixes
16601656
(
16611657
format!(

lib/runtime/src/metrics/prometheus_names.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -127,7 +127,7 @@ pub mod work_handler {
127127
pub const RESPONSE_BYTES_TOTAL: &str = "response_bytes_total";
128128

129129
/// Number of requests currently being processed by work handler
130-
pub const CONCURRENT_REQUESTS: &str = "concurrent_requests";
130+
pub const INFLIGHT_REQUESTS: &str = "inflight_requests";
131131

132132
/// Time spent processing requests by work handler (histogram)
133133
pub const REQUEST_DURATION_SECONDS: &str = "request_duration_seconds";

lib/runtime/src/pipeline/network/ingress/push_handler.rs

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ use tracing::Instrument;
2626
pub struct WorkHandlerMetrics {
2727
pub request_counter: IntCounter,
2828
pub request_duration: Histogram,
29-
pub concurrent_requests: IntGauge,
29+
pub inflight_requests: IntGauge,
3030
pub request_bytes: IntCounter,
3131
pub response_bytes: IntCounter,
3232
pub error_counter: IntCounterVec,
@@ -36,15 +36,15 @@ impl WorkHandlerMetrics {
3636
pub fn new(
3737
request_counter: IntCounter,
3838
request_duration: Histogram,
39-
concurrent_requests: IntGauge,
39+
inflight_requests: IntGauge,
4040
request_bytes: IntCounter,
4141
response_bytes: IntCounter,
4242
error_counter: IntCounterVec,
4343
) -> Self {
4444
Self {
4545
request_counter,
4646
request_duration,
47-
concurrent_requests,
47+
inflight_requests,
4848
request_bytes,
4949
response_bytes,
5050
error_counter,
@@ -68,8 +68,8 @@ impl WorkHandlerMetrics {
6868
None,
6969
)?;
7070

71-
let concurrent_requests = endpoint.create_intgauge(
72-
"concurrent_requests",
71+
let inflight_requests = endpoint.create_intgauge(
72+
"inflight_requests",
7373
"Number of requests currently being processed by work handler",
7474
&[],
7575
)?;
@@ -96,7 +96,7 @@ impl WorkHandlerMetrics {
9696
Ok(Self::new(
9797
request_counter,
9898
request_duration,
99-
concurrent_requests,
99+
inflight_requests,
100100
request_bytes,
101101
response_bytes,
102102
error_counter,
@@ -121,7 +121,7 @@ where
121121

122122
if let Some(m) = self.metrics() {
123123
m.request_counter.inc();
124-
m.concurrent_requests.inc();
124+
m.inflight_requests.inc();
125125
m.request_bytes.inc_by(payload.len() as u64);
126126
}
127127

@@ -289,7 +289,7 @@ where
289289
if let Some(m) = self.metrics() {
290290
let duration = start_time.elapsed();
291291
m.request_duration.observe(duration.as_secs_f64());
292-
m.concurrent_requests.dec();
292+
m.inflight_requests.dec();
293293
}
294294

295295
Ok(())

tests/router/test_router_e2e_with_mockers.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -131,7 +131,7 @@ async def send_request_with_retry(url: str, payload: dict, max_retries: int = 4)
131131
return False
132132

133133

134-
async def send_concurrent_requests(urls: list, payload: dict, num_requests: int):
134+
async def send_inflight_requests(urls: list, payload: dict, num_requests: int):
135135
"""Send multiple requests concurrently, alternating between URLs if multiple provided"""
136136

137137
# First, send test requests with retry to ensure all systems are ready
@@ -228,7 +228,7 @@ def test_mocker_kv_router(request, runtime_services):
228228

229229
# Use async to send requests concurrently for better performance
230230
asyncio.run(
231-
send_concurrent_requests(
231+
send_inflight_requests(
232232
[
233233
f"http://localhost:{frontend_port}/v1/chat/completions"
234234
], # Pass as list
@@ -301,7 +301,7 @@ def test_mocker_two_kv_router(request, runtime_services):
301301

302302
# Use async to send requests concurrently, alternating between routers
303303
asyncio.run(
304-
send_concurrent_requests(
304+
send_inflight_requests(
305305
router_urls,
306306
TEST_PAYLOAD,
307307
NUM_REQUESTS,
@@ -404,7 +404,7 @@ def test_mocker_kv_router_overload_503(request, runtime_services):
404404

405405
# First, send one request with retry to ensure system is ready
406406
logger.info("Sending initial request to ensure system is ready...")
407-
asyncio.run(send_concurrent_requests([url], test_payload_503, 1))
407+
asyncio.run(send_inflight_requests([url], test_payload_503, 1))
408408

409409
# Now send 50 concurrent requests to exhaust resources, then verify 503
410410
logger.info("Sending 50 concurrent requests to exhaust resources...")

0 commit comments

Comments
 (0)