Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion deploy/metrics/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ As of Q2 2025, Dynamo HTTP Frontend metrics are exposed when you build container

The core Dynamo backend system automatically exposes metrics with the `dynamo_component_*` prefix for all components that use the `DistributedRuntime` framework:

- `dynamo_component_concurrent_requests`: Requests currently being processed (gauge)
- `dynamo_component_inflight_requests`: Requests currently being processed (gauge)
- `dynamo_component_request_bytes_total`: Total bytes received in requests (counter)
- `dynamo_component_request_duration_seconds`: Request processing time (histogram)
- `dynamo_component_requests_total`: Total requests processed (counter)
Expand Down
8 changes: 4 additions & 4 deletions lib/runtime/examples/system_metrics/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ The `dynamo_component_errors_total` metric includes the following error types:
- `dynamo_component_request_duration_seconds` - Request processing time

### Gauges
- `dynamo_component_concurrent_requests` - Number of requests currently being processed
- `dynamo_component_inflight_requests` - Number of requests currently being processed

### Custom Metrics (Optional)
- `dynamo_component_bytes_processed_total` - Total data bytes processed by system handler (example)
Expand All @@ -80,9 +80,9 @@ These labels are prefixed with "dynamo_" to avoid collisions with Kubernetes and
When the system is running, you'll see metrics from http://<ip>:<port>/metrics like this:

```prometheus
# HELP dynamo_component_concurrent_requests Number of requests currently being processed by component endpoint
# TYPE dynamo_component_concurrent_requests gauge
dynamo_component_concurrent_requests{dynamo_component="example_component",dynamo_endpoint="example_endpoint9881",dynamo_namespace="example_namespace"} 0
# HELP dynamo_component_inflight_requests Number of requests currently being processed by component endpoint
# TYPE dynamo_component_inflight_requests gauge
dynamo_component_inflight_requests{dynamo_component="example_component",dynamo_endpoint="example_endpoint9881",dynamo_namespace="example_namespace"} 0

# HELP dynamo_component_bytes_processed_total Example of a custom metric. Total number of data bytes processed by system handler
# TYPE dynamo_component_bytes_processed_total counter
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,7 @@ async fn test_backend_with_metrics() -> Result<()> {
println!("{}", metrics_content);
println!("=== END METRICS CONTENT ===");

// Parse and verify ingress metrics are greater than 0 (except concurrent_requests)
// Parse and verify ingress metrics are greater than 0 (except inflight_requests)
verify_ingress_metrics_greater_than_0(&metrics_content);

println!("Successfully retrieved and verified metrics!");
Expand All @@ -143,7 +143,7 @@ async fn test_backend_with_metrics() -> Result<()> {
}

fn verify_ingress_metrics_greater_than_0(metrics_content: &str) {
// Define the work handler metrics we want to verify (excluding concurrent_requests which can be 0)
// Define the work handler metrics we want to verify (excluding inflight_requests which can be 0)
let metrics_to_verify = [
"my_custom_bytes_processed_total",
"requests_total",
Expand Down
6 changes: 1 addition & 5 deletions lib/runtime/src/metrics.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1651,11 +1651,7 @@ mod test_metricsregistry_nats {
18000.0,
23000.0,
), // ~75-125% of 20660
(
build_metric_name(work_handler::CONCURRENT_REQUESTS),
0.0,
1.0,
), // 0 or very low
(build_metric_name(work_handler::INFLIGHT_REQUESTS), 0.0, 1.0), // 0 or very low
// Histograms have _{count,sum} suffixes
(
format!(
Expand Down
2 changes: 1 addition & 1 deletion lib/runtime/src/metrics/prometheus_names.rs
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,7 @@ pub mod work_handler {
pub const RESPONSE_BYTES_TOTAL: &str = "response_bytes_total";

/// Number of requests currently being processed by work handler
pub const CONCURRENT_REQUESTS: &str = "concurrent_requests";
pub const INFLIGHT_REQUESTS: &str = "inflight_requests";

/// Time spent processing requests by work handler (histogram)
pub const REQUEST_DURATION_SECONDS: &str = "request_duration_seconds";
Expand Down
16 changes: 8 additions & 8 deletions lib/runtime/src/pipeline/network/ingress/push_handler.rs
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ use tracing::Instrument;
pub struct WorkHandlerMetrics {
pub request_counter: IntCounter,
pub request_duration: Histogram,
pub concurrent_requests: IntGauge,
pub inflight_requests: IntGauge,
pub request_bytes: IntCounter,
pub response_bytes: IntCounter,
pub error_counter: IntCounterVec,
Expand All @@ -36,15 +36,15 @@ impl WorkHandlerMetrics {
pub fn new(
request_counter: IntCounter,
request_duration: Histogram,
concurrent_requests: IntGauge,
inflight_requests: IntGauge,
request_bytes: IntCounter,
response_bytes: IntCounter,
error_counter: IntCounterVec,
) -> Self {
Self {
request_counter,
request_duration,
concurrent_requests,
inflight_requests,
request_bytes,
response_bytes,
error_counter,
Expand All @@ -68,8 +68,8 @@ impl WorkHandlerMetrics {
None,
)?;

let concurrent_requests = endpoint.create_intgauge(
"concurrent_requests",
let inflight_requests = endpoint.create_intgauge(
"inflight_requests",
"Number of requests currently being processed by work handler",
&[],
)?;
Expand All @@ -96,7 +96,7 @@ impl WorkHandlerMetrics {
Ok(Self::new(
request_counter,
request_duration,
concurrent_requests,
inflight_requests,
request_bytes,
response_bytes,
error_counter,
Expand All @@ -121,7 +121,7 @@ where

if let Some(m) = self.metrics() {
m.request_counter.inc();
m.concurrent_requests.inc();
m.inflight_requests.inc();
m.request_bytes.inc_by(payload.len() as u64);
}

Expand Down Expand Up @@ -289,7 +289,7 @@ where
if let Some(m) = self.metrics() {
let duration = start_time.elapsed();
m.request_duration.observe(duration.as_secs_f64());
m.concurrent_requests.dec();
m.inflight_requests.dec();
}

Ok(())
Expand Down
8 changes: 4 additions & 4 deletions tests/router/test_router_e2e_with_mockers.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,7 @@ async def send_request_with_retry(url: str, payload: dict, max_retries: int = 4)
return False


async def send_concurrent_requests(urls: list, payload: dict, num_requests: int):
async def send_inflight_requests(urls: list, payload: dict, num_requests: int):
"""Send multiple requests concurrently, alternating between URLs if multiple provided"""

# First, send test requests with retry to ensure all systems are ready
Expand Down Expand Up @@ -228,7 +228,7 @@ def test_mocker_kv_router(request, runtime_services):

# Use async to send requests concurrently for better performance
asyncio.run(
send_concurrent_requests(
send_inflight_requests(
[
f"http://localhost:{frontend_port}/v1/chat/completions"
], # Pass as list
Expand Down Expand Up @@ -301,7 +301,7 @@ def test_mocker_two_kv_router(request, runtime_services):

# Use async to send requests concurrently, alternating between routers
asyncio.run(
send_concurrent_requests(
send_inflight_requests(
router_urls,
TEST_PAYLOAD,
NUM_REQUESTS,
Expand Down Expand Up @@ -404,7 +404,7 @@ def test_mocker_kv_router_overload_503(request, runtime_services):

# First, send one request with retry to ensure system is ready
logger.info("Sending initial request to ensure system is ready...")
asyncio.run(send_concurrent_requests([url], test_payload_503, 1))
asyncio.run(send_inflight_requests([url], test_payload_503, 1))

# Now send 50 concurrent requests to exhaust resources, then verify 503
logger.info("Sending 50 concurrent requests to exhaust resources...")
Expand Down
Loading