Skip to content
Merged
Show file tree
Hide file tree
Changes from 9 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
122 changes: 13 additions & 109 deletions deploy/metrics/grafana_dashboards/grafana-kvbm-dashboard.json
Original file line number Diff line number Diff line change
Expand Up @@ -118,8 +118,8 @@
"targets": [
{
"disableTextWrap": false,
"editorMode": "builder",
"expr": "dynamo_component_matched_tokens{dynamo_namespace=\"kvbm_connector_leader\"}",
"editorMode": "code",
"expr": "kvbm_matched_tokens",
"fullMetaSearch": false,
"includeNullMetadata": true,
"legendFormat": "__auto",
Expand Down Expand Up @@ -227,8 +227,8 @@
"targets": [
{
"disableTextWrap": false,
"editorMode": "builder",
"expr": "dynamo_component_offload_requests{dynamo_namespace=\"kvbm_connector_leader\"}",
"editorMode": "code",
"expr": "kvbm_offload_requests",
"fullMetaSearch": false,
"includeNullMetadata": true,
"legendFormat": "__auto",
Expand Down Expand Up @@ -323,8 +323,8 @@
"targets": [
{
"disableTextWrap": false,
"editorMode": "builder",
"expr": "dynamo_component_offload_blocks_d2h{dynamo_namespace=\"kvbm_connector_leader\"}",
"editorMode": "code",
"expr": "kvbm_offload_blocks_d2h",
"fullMetaSearch": false,
"includeNullMetadata": true,
"legendFormat": "__auto",
Expand All @@ -336,102 +336,6 @@
"title": "Offload Blocks - Device to Host",
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus",
"uid": "P1809F7CD0C75ACF3"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"barWidthFactor": 0.6,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green"
},
{
"color": "red",
"value": 80
}
]
}
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 12,
"x": 0,
"y": 18
},
"id": 1,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"hideZeros": false,
"mode": "single",
"sort": "none"
}
},
"pluginVersion": "12.0.1",
"targets": [
{
"disableTextWrap": false,
"editorMode": "builder",
"expr": "dynamo_component_save_kv_layer_requests{dynamo_namespace=\"kvbm_connector_worker\"}",
"fullMetaSearch": false,
"includeNullMetadata": true,
"legendFormat": "__auto",
"range": true,
"refId": "A",
"useBackend": false
}
],
"title": "Save KV Layer Requests",
"type": "timeseries"
},
{
"collapsed": false,
"gridPos": {
Expand Down Expand Up @@ -528,8 +432,8 @@
"targets": [
{
"disableTextWrap": false,
"editorMode": "builder",
"expr": "dynamo_component_onboard_requests{dynamo_namespace=\"kvbm_connector_leader\"}",
"editorMode": "code",
"expr": "kvbm_onboard_requests",
"fullMetaSearch": false,
"includeNullMetadata": true,
"legendFormat": "__auto",
Expand Down Expand Up @@ -624,8 +528,8 @@
"targets": [
{
"disableTextWrap": false,
"editorMode": "builder",
"expr": "dynamo_component_onboard_blocks_h2d{dynamo_namespace=\"kvbm_connector_leader\"}",
"editorMode": "code",
"expr": "kvbm_onboard_blocks_h2d",
"fullMetaSearch": false,
"includeNullMetadata": true,
"legendFormat": "__auto",
Expand Down Expand Up @@ -720,8 +624,8 @@
"targets": [
{
"disableTextWrap": false,
"editorMode": "builder",
"expr": "dynamo_component_onboard_blocks_d2d{dynamo_namespace=\"kvbm_connector_leader\"}",
"editorMode": "code",
"expr": "kvbm_onboard_blocks_d2d",
"fullMetaSearch": false,
"includeNullMetadata": true,
"legendFormat": "__auto",
Expand Down Expand Up @@ -750,4 +654,4 @@
"title": "KVBM Dashboard",
"uid": "3f679257-70a5-402c-92b4-05382337b548",
"version": 7
}
}
10 changes: 2 additions & 8 deletions deploy/metrics/prometheus.yml
Original file line number Diff line number Diff line change
Expand Up @@ -59,16 +59,10 @@ scrape_configs:
- targets: ['host.docker.internal:9091'] # metrics aggregation service on host

# KVBM leader related metrics
- job_name: 'kvbm-leader-metrics'
- job_name: 'kvbm-metrics'
scrape_interval: 2s
static_configs:
- targets: ['host.docker.internal:6882']

# KVBM worker related metrics
- job_name: 'kvbm-worker-metrics'
scrape_interval: 2s
static_configs:
- targets: ['host.docker.internal:6881']
- targets: ['host.docker.internal:6880']

# Uncomment to see its own Prometheus metrics
# - job_name: 'prometheus'
Expand Down
10 changes: 4 additions & 6 deletions docs/guides/run_kvbm_in_trtllm.md
Original file line number Diff line number Diff line change
Expand Up @@ -109,18 +109,16 @@ Follow below steps to enable metrics collection and view via Grafana dashboard:
# Start the basic services (etcd & natsd), along with Prometheus and Grafana
docker compose -f deploy/docker-compose.yml --profile metrics up -d

# set env var DYN_SYSTEM_ENABLED to true, DYN_SYSTEM_PORT to 6880, DYN_KVBM_SLEEP to 5, when launch via dynamo
# NOTE: Make sure port 6881 (for KVBM worker metrics) and port 6882 (for KVBM leader metrics) are available.
# NOTE: DYN_KVBM_SLEEP is needed to avoid metrics port conflict between KVBM leader and worker
DYN_SYSTEM_ENABLED=true DYN_SYSTEM_PORT=6880 DYN_KVBM_SLEEP=5 \
# set env var DYN_KVBM_METRICS to true, when launch via dynamo
# Optionally set DYN_KVBM_METRICS_PORT to choose the /metrics port (default: 6880).
DYN_KVBM_METRICS=true \
python3 -m dynamo.trtllm \
--model-path deepseek-ai/DeepSeek-R1-Distill-Llama-8B \
--served-model-name deepseek-ai/DeepSeek-R1-Distill-Llama-8B \
--extra-engine-args /tmp/kvbm_llm_api_config.yaml &

# optional if firewall blocks KVBM metrics ports to send prometheus metrics
sudo ufw allow 6881/tcp
sudo ufw allow 6882/tcp
sudo ufw allow 6880/tcp
```

View grafana metrics via http://localhost:3001 (default login: dynamo/dynamo) and look for KVBM Dashboard
9 changes: 4 additions & 5 deletions docs/guides/run_kvbm_in_vllm.md
Original file line number Diff line number Diff line change
Expand Up @@ -77,16 +77,15 @@ Follow below steps to enable metrics collection and view via Grafana dashboard:
# Start the basic services (etcd & natsd), along with Prometheus and Grafana
docker compose -f deploy/docker-compose.yml --profile metrics up -d

# set env var DYN_SYSTEM_ENABLED to true, DYN_SYSTEM_PORT to 6880, DYN_KVBM_SLEEP to 5, when launch via dynamo
# NOTE: Make sure port 6881 (for KVBM worker metrics) and port 6882 (for KVBM leader metrics) are available.
DYN_SYSTEM_ENABLED=true DYN_SYSTEM_PORT=6880 \
# set env var DYN_KVBM_METRICS to true, when launch via dynamo
# Optionally set DYN_KVBM_METRICS_PORT to choose the /metrics port (default: 6880).
DYN_KVBM_METRICS=true \
python -m dynamo.vllm \
--model deepseek-ai/DeepSeek-R1-Distill-Llama-8B \
--connector kvbm &

# optional if firewall blocks KVBM metrics ports to send prometheus metrics
sudo ufw allow 6881/tcp
sudo ufw allow 6882/tcp
sudo ufw allow 6880/tcp
```

View grafana metrics via http://localhost:3001 (default login: dynamo/dynamo) and look for KVBM Dashboard
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ pub mod recorder;
pub mod slot;

use super::*;
use dynamo_llm::block_manager::metrics_kvbm::KvbmMetrics;
use dynamo_llm::block_manager::metrics_kvbm::{KvbmMetrics, KvbmMetricsRegistry};
use dynamo_runtime::DistributedRuntime;
use slot::{ConnectorSlotManager, SlotError, SlotManager, SlotState};

Expand All @@ -15,7 +15,6 @@ use crate::llm::block_manager::{
VllmBlockManager, distributed::KvbmLeader as PyKvbmLeader, vllm::KvbmRequest,
vllm::connector::leader::slot::VllmConnectorSlot,
};
use dynamo_runtime::metrics::prometheus_names::kvbm_connector;

use dynamo_llm::block_manager::{
BasicMetadata, DiskStorage, ImmutableBlock, PinnedStorage,
Expand Down Expand Up @@ -103,11 +102,11 @@ impl KvConnectorLeader {
let drt = drt.inner().clone();
let handle: Handle = drt.runtime().primary();

let ns = drt
.namespace(kvbm_connector::KVBM_CONNECTOR_LEADER)
.unwrap();

let kvbm_metrics = KvbmMetrics::new(&ns);
let kvbm_metrics = KvbmMetrics::new(
&KvbmMetricsRegistry::default(),
kvbm_metrics_endpoint_enabled(),
parse_kvbm_metrics_port(),
);
let kvbm_metrics_clone = kvbm_metrics.clone();

let slot_manager_cell = Arc::new(OnceLock::new());
Expand Down Expand Up @@ -615,3 +614,30 @@ impl PyKvConnectorLeader {
.map_err(to_pyerr)
}
}

pub fn kvbm_metrics_endpoint_enabled() -> bool {
std::env::var("DYN_KVBM_METRICS")
.map(|v| v == "1" || v.eq_ignore_ascii_case("true"))
.unwrap_or(false)
}

pub fn parse_kvbm_metrics_port() -> u16 {
match std::env::var("DYN_KVBM_METRICS_PORT") {
Ok(val) => match val.trim().parse::<u16>() {
Ok(port) => port,
Err(_) => {
tracing::warn!(
"[kvbm] Invalid DYN_KVBM_METRICS_PORT='{}', falling back to 6880",
val
);
6880
}
},
Err(_) => {
tracing::warn!(
"DYN_KVBM_METRICS_PORT not present or couldn’t be interpreted, falling back to 6880"
);
6880
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -100,11 +100,11 @@ impl KvConnectorLeaderRecorder {
let drt = drt.inner().clone();
let handle: Handle = drt.runtime().primary();

let ns = drt
.namespace(kvbm_connector::KVBM_CONNECTOR_LEADER)
.unwrap();

let kvbm_metrics = KvbmMetrics::new(&ns);
let kvbm_metrics = KvbmMetrics::new(
&KvbmMetricsRegistry::default(),
kvbm_metrics_endpoint_enabled(),
parse_kvbm_metrics_port(),
);
let kvbm_metrics_clone = kvbm_metrics.clone();

let token = CancellationToken::new();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,12 @@ use crate::llm::block_manager::BlockManagerBuilder;
use crate::llm::block_manager::vllm::connector::leader::slot::{
ConnectorSlotManager, SlotManager, SlotState,
};
use crate::llm::block_manager::vllm::connector::leader::{
kvbm_metrics_endpoint_enabled, parse_kvbm_metrics_port,
};
use crate::llm::block_manager::{distributed::KvbmLeader as PyKvbmLeader, vllm::KvbmRequest};
use anyhow;
use dynamo_llm::block_manager::metrics_kvbm::KvbmMetrics;
use dynamo_runtime::metrics::prometheus_names::kvbm_connector;
use dynamo_llm::block_manager::metrics_kvbm::{KvbmMetrics, KvbmMetricsRegistry};
use std::collections::HashSet;
use std::sync::{Arc, OnceLock};
use tokio::runtime::Handle;
Expand Down Expand Up @@ -76,11 +78,12 @@ impl KvConnectorLeader {
let drt = drt.inner().clone();
let handle: Handle = drt.runtime().primary();

let ns = drt
.namespace(kvbm_connector::KVBM_CONNECTOR_LEADER)
.unwrap();
let kvbm_metrics = KvbmMetrics::new(
&KvbmMetricsRegistry::default(),
kvbm_metrics_endpoint_enabled(),
parse_kvbm_metrics_port(),
);

let kvbm_metrics = KvbmMetrics::new(&ns);
let kvbm_metrics_clone = kvbm_metrics.clone();

let slot_manager_cell = Arc::new(OnceLock::new());
Expand Down
Loading
Loading