diff --git a/Cargo.lock b/Cargo.lock index 42094da04f..2dea698746 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2091,6 +2091,28 @@ dependencies = [ "uuid 1.18.1", ] +[[package]] +name = "dynamo-codegen" +version = "0.1.0" +dependencies = [ + "anyhow", + "proc-macro2", + "quote", + "syn 2.0.106", +] + +[[package]] +name = "dynamo-engine-llamacpp" +version = "0.5.1" +dependencies = [ + "async-stream", + "dynamo-llm", + "dynamo-runtime", + "llama-cpp-2", + "tokio", + "tracing", +] + [[package]] name = "dynamo-engine-mistralrs" version = "0.5.1" diff --git a/Cargo.toml b/Cargo.toml index 3512e76bc1..d9d7c4a7a3 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -11,6 +11,7 @@ members = [ "lib/async-openai", "lib/parsers", "lib/bindings/c", + "lib/bindings/python/codegen", "lib/engines/*", ] # Exclude certain packages that are slow to build and we don't ship as flagship diff --git a/components/src/dynamo/planner/utils/prometheus.py b/components/src/dynamo/planner/utils/prometheus.py index 8a9fab6d4b..1aced71b54 100644 --- a/components/src/dynamo/planner/utils/prometheus.py +++ b/components/src/dynamo/planner/utils/prometheus.py @@ -19,7 +19,7 @@ from prometheus_api_client import PrometheusConnect from pydantic import BaseModel, ValidationError -from dynamo._core import prometheus_names +from dynamo import prometheus_names from dynamo.runtime.logging import configure_dynamo_logging configure_dynamo_logging() @@ -94,7 +94,7 @@ def _get_average_metric( def get_avg_inter_token_latency(self, interval: str, model_name: str): return self._get_average_metric( - prometheus_names.frontend.inter_token_latency_seconds, + prometheus_names.frontend_service.INTER_TOKEN_LATENCY_SECONDS, interval, "avg inter token latency", model_name, @@ -102,7 +102,7 @@ def get_avg_inter_token_latency(self, interval: str, model_name: str): def get_avg_time_to_first_token(self, interval: str, model_name: str): return self._get_average_metric( - prometheus_names.frontend.time_to_first_token_seconds, + prometheus_names.frontend_service.TIME_TO_FIRST_TOKEN_SECONDS, interval, "avg time to first token", model_name, @@ -110,7 +110,7 @@ def get_avg_time_to_first_token(self, interval: str, model_name: str): def get_avg_request_duration(self, interval: str, model_name: str): return self._get_average_metric( - prometheus_names.frontend.request_duration_seconds, + prometheus_names.frontend_service.REQUEST_DURATION_SECONDS, interval, "avg request duration", model_name, @@ -119,7 +119,7 @@ def get_avg_request_duration(self, interval: str, model_name: str): def get_avg_request_count(self, interval: str, model_name: str): # This function follows a different query pattern than the other metrics try: - requests_total_metric = prometheus_names.frontend.requests_total + requests_total_metric = prometheus_names.frontend_service.REQUESTS_TOTAL raw_res = self.prom.custom_query( query=f"increase({requests_total_metric}[{interval}])" ) @@ -138,7 +138,7 @@ def get_avg_request_count(self, interval: str, model_name: str): def get_avg_input_sequence_tokens(self, interval: str, model_name: str): return self._get_average_metric( - prometheus_names.frontend.input_sequence_tokens, + prometheus_names.frontend_service.INPUT_SEQUENCE_TOKENS, interval, "avg input sequence tokens", model_name, @@ -146,7 +146,7 @@ def get_avg_input_sequence_tokens(self, interval: str, model_name: str): def get_avg_output_sequence_tokens(self, interval: str, model_name: str): return self._get_average_metric( - prometheus_names.frontend.output_sequence_tokens, + prometheus_names.frontend_service.OUTPUT_SEQUENCE_TOKENS, interval, "avg output sequence tokens", model_name, diff --git a/lib/bindings/python/codegen/Cargo.toml b/lib/bindings/python/codegen/Cargo.toml new file mode 100644 index 0000000000..2b96155188 --- /dev/null +++ b/lib/bindings/python/codegen/Cargo.toml @@ -0,0 +1,18 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +[package] +name = "dynamo-codegen" +version = "0.1.0" +edition = "2021" +license = "Apache-2.0" + +[dependencies] +syn = { version = "2.0", features = ["full", "extra-traits"] } +quote = "1.0" +proc-macro2 = "1.0" +anyhow = "1.0" + +[[bin]] +name = "gen-python-prometheus-names" +path = "src/gen_python_prometheus_names.rs" diff --git a/lib/bindings/python/codegen/README.md b/lib/bindings/python/codegen/README.md new file mode 100644 index 0000000000..09c5d1c4e7 --- /dev/null +++ b/lib/bindings/python/codegen/README.md @@ -0,0 +1,38 @@ +# Dynamo Codegen + +Python code generator for Dynamo Python bindings. + +## gen-python-prometheus-names + +Generates `prometheus_names.py` from Rust source `lib/runtime/src/metrics/prometheus_names.rs`. + +### Usage + +```bash +cargo run -p dynamo-codegen --bin gen-python-prometheus-names +``` + +### What it does + +- Parses Rust AST from `lib/runtime/src/metrics/prometheus_names.rs` +- Generates Python classes with constants at `lib/bindings/python/src/dynamo/prometheus_names.py` +- Handles macro-generated constants (e.g., `kvstats_name!("active_blocks")` → `"kvstats_active_blocks"`) + +### Example + +**Rust input:** +```rust +pub mod kvstats { + pub const ACTIVE_BLOCKS: &str = kvstats_name!("active_blocks"); +} +``` + +**Python output:** +```python +class kvstats: + ACTIVE_BLOCKS = "kvstats_active_blocks" +``` + +### When to run + +Run after modifying `lib/runtime/src/metrics/prometheus_names.rs` to regenerate the Python file. diff --git a/lib/bindings/python/codegen/src/gen_python_prometheus_names.rs b/lib/bindings/python/codegen/src/gen_python_prometheus_names.rs new file mode 100644 index 0000000000..a9f55dbdaa --- /dev/null +++ b/lib/bindings/python/codegen/src/gen_python_prometheus_names.rs @@ -0,0 +1,218 @@ +// SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +//! Binary to generate Python prometheus_names from Rust source + +use anyhow::{Context, Result}; +use dynamo_codegen::prometheus_parser::{ModuleDef, PrometheusParser}; +use std::collections::HashMap; +use std::path::PathBuf; + +/// Generates Python module code from parsed Rust prometheus_names modules. +/// Converts Rust const declarations into Python class attributes with deterministic ordering. +struct PythonGenerator<'a> { + modules: &'a HashMap, +} + +impl<'a> PythonGenerator<'a> { + fn new(parser: &'a PrometheusParser) -> Self { + Self { + modules: &parser.modules, + } + } + + fn load_template(template_name: &str) -> String { + let template_path = PathBuf::from(env!("CARGO_MANIFEST_DIR")) + .join("templates") + .join(template_name); + + std::fs::read_to_string(&template_path) + .unwrap_or_else(|_| panic!("Failed to read template: {}", template_path.display())) + } + + fn generate_python_file(&self) -> String { + let mut output = Self::load_template("prometheus_names.py.template"); + + // Append generated classes + output.push_str(&self.generate_classes()); + + output + } + + fn generate_classes(&self) -> String { + let mut lines = Vec::new(); + + // Sort module names to ensure deterministic output + let mut module_names: Vec<&String> = self.modules.keys().collect(); + module_names.sort(); + + // Generate simple classes with constants as class attributes + for module_name in module_names { + let module = &self.modules[module_name]; + lines.push(format!("class {}:", module_name)); + + // Use doc comment from module if available + if !module.doc_comment.is_empty() { + let first_line = module.doc_comment.lines().next().unwrap_or("").trim(); + if !first_line.is_empty() { + lines.push(format!(" \"\"\"{}\"\"\"", first_line)); + } + } + lines.push("".to_string()); + + for constant in &module.constants { + if !constant.doc_comment.is_empty() { + for comment_line in constant.doc_comment.lines() { + lines.push(format!(" # {}", comment_line)); + } + } + lines.push(format!(" {} = \"{}\"", constant.name, constant.value)); + } + + lines.push("".to_string()); + } + + lines.join("\n") + } +} + +fn main() -> Result<()> { + let args: Vec = std::env::args().collect(); + + let mut source_path: Option = None; + let mut output_path: Option = None; + + let mut i = 1; + while i < args.len() { + match args[i].as_str() { + "--source" => { + i += 1; + if i < args.len() { + source_path = Some(PathBuf::from(&args[i])); + } + } + "--output" => { + i += 1; + if i < args.len() { + output_path = Some(PathBuf::from(&args[i])); + } + } + "--help" | "-h" => { + print_usage(); + return Ok(()); + } + _ => { + eprintln!("Unknown argument: {}", args[i]); + print_usage(); + std::process::exit(1); + } + } + i += 1; + } + + // Determine paths relative to codegen directory + let codegen_dir = PathBuf::from(env!("CARGO_MANIFEST_DIR")); + + let source = source_path.unwrap_or_else(|| { + // From: lib/bindings/python/codegen + // To: lib/runtime/src/metrics/prometheus_names.rs + codegen_dir + .join("../../../runtime/src/metrics/prometheus_names.rs") + .canonicalize() + .expect("Failed to resolve source path") + }); + + let output = output_path.unwrap_or_else(|| { + // From: lib/bindings/python/codegen + // To: lib/bindings/python/src/dynamo/prometheus_names.py + codegen_dir + .join("../src/dynamo/prometheus_names.py") + .canonicalize() + .unwrap_or_else(|_| { + // If file doesn't exist yet, resolve the parent directory + let dir = codegen_dir + .join("../src/dynamo") + .canonicalize() + .expect("Failed to resolve output directory"); + dir.join("prometheus_names.py") + }) + }); + + println!("Generating Python prometheus_names from Rust source"); + println!("Source: {}", source.display()); + println!("Output: {}", output.display()); + println!(); + + let content = std::fs::read_to_string(&source) + .with_context(|| format!("Failed to read source file: {}", source.display()))?; + + println!("Parsing Rust AST..."); + let parser = PrometheusParser::parse_file(&content)?; + + println!("Found {} modules:", parser.modules.len()); + let mut module_names: Vec<&String> = parser.modules.keys().collect(); + module_names.sort(); + for name in module_names.iter() { + let module = &parser.modules[name.as_str()]; + println!( + " - {}: {} constants{}", + name, + module.constants.len(), + if module.is_macro_generated { + " (macro-generated)" + } else { + "" + } + ); + } + + println!("\nGenerating Python prometheus_names module..."); + let generator = PythonGenerator::new(&parser); + let python_code = generator.generate_python_file(); + + // Ensure output directory exists + if let Some(parent) = output.parent() { + std::fs::create_dir_all(parent) + .with_context(|| format!("Failed to create output directory: {}", parent.display()))?; + } + + std::fs::write(&output, python_code) + .with_context(|| format!("Failed to write output file: {}", output.display()))?; + + println!("✓ Generated Python prometheus_names: {}", output.display()); + println!("\nSuccess! Python module ready for import."); + + Ok(()) +} + +fn print_usage() { + println!( + r#" +gen-python-prometheus-names - Generate Python prometheus_names from Rust source + +Usage: gen-python-prometheus-names [OPTIONS] + +Parses lib/runtime/src/metrics/prometheus_names.rs and generates a pure Python +module with 1:1 constant mappings at lib/bindings/python/src/dynamo/prometheus_names.py + +This allows Python code to import Prometheus metric constants without Rust bindings: + from dynamo.prometheus_names import frontend_service, kvstats + +OPTIONS: + --source PATH Path to Rust source file + (default: lib/runtime/src/metrics/prometheus_names.rs) + + --output PATH Path to Python output file + (default: lib/bindings/python/src/dynamo/prometheus_names.py) + + --help, -h Print this help message + +EXAMPLES: + # Generate with default paths + cargo run -p dynamo-codegen --bin gen-python-prometheus-names + + # Generate with custom output + cargo run -p dynamo-codegen --bin gen-python-prometheus-names -- --output /tmp/test.py +"# + ); +} diff --git a/lib/bindings/python/codegen/src/lib.rs b/lib/bindings/python/codegen/src/lib.rs new file mode 100644 index 0000000000..59d74d2395 --- /dev/null +++ b/lib/bindings/python/codegen/src/lib.rs @@ -0,0 +1,8 @@ +// SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +//! Code generation utilities for Dynamo project +//! +//! This crate provides tools to generate code from Rust sources to other languages. + +pub mod prometheus_parser; diff --git a/lib/bindings/python/codegen/src/prometheus_parser.rs b/lib/bindings/python/codegen/src/prometheus_parser.rs new file mode 100644 index 0000000000..ad120e4c9f --- /dev/null +++ b/lib/bindings/python/codegen/src/prometheus_parser.rs @@ -0,0 +1,228 @@ +// SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +//! Parser for prometheus_names.rs to extract constants and modules + +use anyhow::{Context, Result}; +use std::collections::HashMap; +use syn::{File, Item, ItemConst, ItemMacro, ItemMod}; + +#[derive(Debug, Clone)] +pub struct ConstantDef { + pub name: String, + pub value: String, + pub doc_comment: String, +} + +#[derive(Debug, Clone)] +pub struct ModuleDef { + pub name: String, + pub constants: Vec, + pub doc_comment: String, + pub is_macro_generated: bool, + pub macro_prefix: Option, +} + +pub struct PrometheusParser { + pub modules: HashMap, +} + +impl PrometheusParser { + pub fn parse_file(content: &str) -> Result { + let ast: File = syn::parse_str(content).context("Failed to parse Rust file")?; + + let mut modules = HashMap::new(); + + for item in ast.items { + if let Item::Mod(module) = item { + if let Some(parsed_module) = Self::parse_module(&module)? { + modules.insert(parsed_module.name.clone(), parsed_module); + } + } + } + + Ok(Self { modules }) + } + + fn parse_module(module: &ItemMod) -> Result> { + // Only process public modules + if !matches!(module.vis, syn::Visibility::Public(_)) { + return Ok(None); + } + + let module_name = module.ident.to_string(); + let doc_comment = Self::extract_doc_comment(&module.attrs); + + let (_, items) = match &module.content { + Some(content) => content, + None => return Ok(None), + }; + + let mut constants = Vec::new(); + let mut is_macro_generated = false; + let mut macro_prefix = None; + + for item in items { + match item { + Item::Const(const_item) => { + if let Some(const_def) = Self::parse_const(const_item)? { + constants.push(const_def); + } + } + Item::Macro(macro_item) => { + // Check if this is a macro_rules! that generates names with a prefix + if let Some(prefix) = Self::extract_macro_prefix(macro_item) { + is_macro_generated = true; + macro_prefix = Some(prefix); + } + } + _ => {} + } + } + + // Apply macro prefix to constants if needed + if is_macro_generated && macro_prefix.is_some() { + let prefix = macro_prefix.as_ref().unwrap(); + for constant in &mut constants { + // Only apply if the constant doesn't already have the prefix + if constant.name == "PREFIX" { + // PREFIX constant should be just the prefix with trailing underscore + continue; + } + // Check if value looks like it should have prefix applied + // (doesn't already start with the prefix) + if !constant.value.starts_with(prefix) { + constant.value = format!("{}_{}", prefix, constant.value); + } + } + } + + Ok(Some(ModuleDef { + name: module_name, + constants, + doc_comment, + is_macro_generated, + macro_prefix, + })) + } + + fn parse_const(const_item: &ItemConst) -> Result> { + // Only process public constants + if !matches!(const_item.vis, syn::Visibility::Public(_)) { + return Ok(None); + } + + // Only process &str constants + let is_str_type = matches!(&*const_item.ty, syn::Type::Reference(type_ref) + if matches!(&*type_ref.elem, syn::Type::Path(path) + if path.path.segments.last().map(|s| s.ident == "str").unwrap_or(false))); + + if !is_str_type { + return Ok(None); + } + + let name = const_item.ident.to_string(); + let doc_comment = Self::extract_doc_comment(&const_item.attrs); + + // Extract the string value + let value = Self::extract_string_value(&const_item.expr)?; + + Ok(Some(ConstantDef { + name, + value, + doc_comment, + })) + } + + fn extract_string_value(expr: &syn::Expr) -> Result { + match expr { + // Direct string literal: "value" + syn::Expr::Lit(lit_expr) => { + if let syn::Lit::Str(lit_str) = &lit_expr.lit { + Ok(lit_str.value()) + } else { + anyhow::bail!("Expected string literal") + } + } + // Macro invocation: some_macro!("value") + syn::Expr::Macro(macro_expr) => { + // Try to extract the string from macro arguments + Self::extract_from_macro_tokens(¯o_expr.mac.tokens) + } + // Method call: "value".to_string() + syn::Expr::MethodCall(method_call) => Self::extract_string_value(&method_call.receiver), + _ => anyhow::bail!("Unsupported expression type for constant value"), + } + } + + fn extract_from_macro_tokens(tokens: &proc_macro2::TokenStream) -> Result { + // Parse the tokens to find string literals + let tokens_str = tokens.to_string(); + + // Look for string literals in the token stream + // This handles cases like: concat!("prefix_", "value") + let parts: Vec<&str> = tokens_str + .split('"') + .enumerate() + .filter(|(i, _)| i % 2 == 1) + .map(|(_, s)| s) + .collect(); + + if parts.is_empty() { + anyhow::bail!("No string literals found in macro"); + } + + // Concatenate all string parts (for concat! macro) + Ok(parts.join("")) + } + + fn extract_macro_prefix(macro_item: &ItemMacro) -> Option { + // Check if this is a macro_rules! with a name ending in "_name" + let macro_name = macro_item.ident.as_ref()?.to_string(); + if !macro_name.ends_with("_name") { + return None; + } + + // Try to extract the prefix from the macro body + // Looking for patterns like: concat!("prefix_", $name) + let tokens_str = macro_item.mac.tokens.to_string(); + + // Look for concat! with a string literal + // Pattern: concat ! ( "prefix_" , ... + if let Some(concat_start) = tokens_str.find("concat !") { + let after_concat = &tokens_str[concat_start..]; + // Find the first string literal after concat! + if let Some(quote_start) = after_concat.find('"') { + let after_quote = &after_concat[quote_start + 1..]; + if let Some(quote_end) = after_quote.find('"') { + let prefix = &after_quote[..quote_end]; + // Remove trailing underscore if present + return Some(prefix.trim_end_matches('_').to_string()); + } + } + } + + None + } + + fn extract_doc_comment(attrs: &[syn::Attribute]) -> String { + let mut doc_lines = Vec::new(); + + for attr in attrs { + if attr.path().is_ident("doc") { + if let syn::Meta::NameValue(meta) = &attr.meta { + if let syn::Expr::Lit(lit) = &meta.value { + if let syn::Lit::Str(lit_str) = &lit.lit { + let line = lit_str.value().trim().to_string(); + if !line.is_empty() { + doc_lines.push(line); + } + } + } + } + } + } + + doc_lines.join("\n") + } +} diff --git a/lib/bindings/python/codegen/templates/prometheus_names.py.template b/lib/bindings/python/codegen/templates/prometheus_names.py.template new file mode 100644 index 0000000000..0976726457 --- /dev/null +++ b/lib/bindings/python/codegen/templates/prometheus_names.py.template @@ -0,0 +1,30 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +""" +Python constants for Prometheus metric names + +AUTO-GENERATED from lib/runtime/src/metrics/prometheus_names.rs +DO NOT EDIT THIS FILE MANUALLY + +To regenerate this file after modifying lib/runtime/src/metrics/prometheus_names.rs: + cargo run -p dynamo-codegen --bin gen-python-prometheus-names + +This module provides pure Python access to Prometheus metric name constants +without requiring Rust bindings. + +Usage (both patterns supported): + # Pattern 1: Import module + from dynamo import prometheus_names + print(prometheus_names.frontend_service.REQUESTS_TOTAL) # "requests_total" + print(prometheus_names.kvstats.ACTIVE_BLOCKS) # "kvstats_active_blocks" + + # Pattern 2: Import specific classes + from dynamo.prometheus_names import frontend_service, kvstats + print(frontend_service.REQUESTS_TOTAL) # "requests_total" + print(kvstats.ACTIVE_BLOCKS) # "kvstats_active_blocks" +""" + +from __future__ import annotations + + diff --git a/lib/bindings/python/rust/lib.rs b/lib/bindings/python/rust/lib.rs index 3af5f5eddc..08f2cad512 100644 --- a/lib/bindings/python/rust/lib.rs +++ b/lib/bindings/python/rust/lib.rs @@ -56,7 +56,6 @@ mod llm; mod parsers; mod planner; mod prometheus_metrics; -mod prometheus_names; type JsonServerStreamingIngress = Ingress, ManyOut>>; @@ -185,7 +184,6 @@ fn _core(m: &Bound<'_, PyModule>) -> PyResult<()> { engine::add_to_module(m)?; parsers::add_to_module(m)?; - prometheus_names::add_to_module(m)?; m.add_class::()?; let prometheus_metrics = PyModule::new(m.py(), "prometheus_metrics")?; diff --git a/lib/bindings/python/rust/prometheus_names.rs b/lib/bindings/python/rust/prometheus_names.rs deleted file mode 100644 index a636fa68ad..0000000000 --- a/lib/bindings/python/rust/prometheus_names.rs +++ /dev/null @@ -1,361 +0,0 @@ -// SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. -// SPDX-License-Identifier: Apache-2.0 - -//! Python bindings for Prometheus metric name constants -//! -//! ⚠️ **CRITICAL: SYNC WITH RUST SOURCE AND PYTHON TYPE STUBS** ⚠️ -//! This file exposes constants from `lib/runtime/src/metrics/prometheus_names.rs` to Python. -//! When the source file is modified, you MUST update BOTH files to match: -//! -//! 1. **This Rust file** - Update the actual Python bindings implementation -//! 2. **Python type stubs** - Update `lib/bindings/python/src/dynamo/_core.pyi` -//! The .pyi file provides type hints for IDEs and static type checkers. -//! Without updating it, IDEs won't recognize new classes/methods for autocomplete. -//! -//! The constants here should mirror the structure and values from the Rust source. -//! Any changes to metric names in the source must be reflected here immediately. -//! -//! Files to sync: -//! - Source: `lib/runtime/src/metrics/prometheus_names.rs` -//! - This file: `lib/bindings/python/rust/prometheus_names.rs` -//! - Type stubs: `lib/bindings/python/src/dynamo/_core.pyi` -//! -//! ## Python Usage Example -//! -//! ```python -//! from dynamo._core import prometheus_names -//! -//! # Access metrics directly (no constructor call needed!) -//! frontend = prometheus_names.frontend -//! print(frontend.requests_total) # "dynamo_frontend_requests_total" -//! print(frontend.queued_requests) # "dynamo_frontend_queued_requests" -//! print(frontend.inflight_requests) # "dynamo_frontend_inflight_requests" -//! print(frontend.disconnected_clients) # "dynamo_frontend_disconnected_clients" -//! print(frontend.request_duration_seconds) # "dynamo_frontend_request_duration_seconds" -//! print(frontend.input_sequence_tokens) # "dynamo_frontend_input_sequence_tokens" -//! print(frontend.output_sequence_tokens) # "dynamo_frontend_output_sequence_tokens" -//! print(frontend.time_to_first_token_seconds) # "dynamo_frontend_time_to_first_token_seconds" -//! print(frontend.inter_token_latency_seconds) # "dynamo_frontend_inter_token_latency_seconds" -//! print(frontend.model_context_length) # "dynamo_frontend_model_context_length" -//! print(frontend.model_kv_cache_block_size) # "dynamo_frontend_model_kv_cache_block_size" -//! print(frontend.model_migration_limit) # "dynamo_frontend_model_migration_limit" -//! -//! work_handler = prometheus_names.work_handler -//! print(work_handler.requests_total) # "dynamo_component_requests_total" -//! print(work_handler.request_bytes_total) # "dynamo_component_request_bytes_total" -//! print(work_handler.response_bytes_total) # "dynamo_component_response_bytes_total" -//! print(work_handler.inflight_requests) # "dynamo_component_inflight_requests" -//! print(work_handler.request_duration_seconds) # "dynamo_component_request_duration_seconds" -//! print(work_handler.errors_total) # "dynamo_component_errors_total" -//! -//! kvstats = prometheus_names.kvstats -//! print(kvstats.active_blocks) # "kvstats_active_blocks" -//! print(kvstats.total_blocks) # "kvstats_total_blocks" -//! print(kvstats.gpu_cache_usage_percent) # "kvstats_gpu_cache_usage_percent" -//! print(kvstats.gpu_prefix_cache_hit_rate) # "kvstats_gpu_prefix_cache_hit_rate" -//! -//! # Use in Prometheus queries -//! query = f"rate({frontend.requests_total}[5m])" -//! pattern = rf'{work_handler.requests_total}\{{[^}}]*model="[^"]*"[^}}]*\}}' -//! ``` - -use dynamo_runtime::metrics::prometheus_names::*; -use pyo3::prelude::*; - -/// Main container for all Prometheus metric name constants -#[pyclass] -pub struct PrometheusNames; - -#[pymethods] -impl PrometheusNames { - /// Frontend service metrics - #[getter] - fn frontend(&self) -> FrontendService { - FrontendService - } - - /// Work handler metrics - #[getter] - fn work_handler(&self) -> WorkHandler { - WorkHandler - } - - /// KV stats metrics - #[getter] - fn kvstats(&self) -> KvStatsMetrics { - KvStatsMetrics - } -} - -/// Frontend service metrics (LLM HTTP service) -/// These methods return the full metric names with the "dynamo_frontend_" prefix -/// -/// Note: We use instance methods instead of static methods for better Python ergonomics -/// - The `concat!` macro only accepts string literals, not const references -/// - We need to combine `name_prefix::FRONTEND` + `frontend_service::*` constants at runtime -/// - This ensures we use actual Rust constants rather than hardcoded literals -#[pyclass] -pub struct FrontendService; - -#[pymethods] -impl FrontendService { - /// Total number of LLM requests processed - #[getter] - fn requests_total(&self) -> String { - format!( - "{}_{}", - name_prefix::FRONTEND, - frontend_service::REQUESTS_TOTAL - ) - } - - /// Number of requests waiting in HTTP queue before receiving the first response - #[getter] - fn queued_requests(&self) -> String { - format!( - "{}_{}", - name_prefix::FRONTEND, - frontend_service::QUEUED_REQUESTS - ) - } - - /// Number of inflight requests going to the engine (vLLM, SGLang, ...) - #[getter] - fn inflight_requests(&self) -> String { - format!( - "{}_{}", - name_prefix::FRONTEND, - frontend_service::INFLIGHT_REQUESTS - ) - } - - /// Duration of LLM requests - #[getter] - fn request_duration_seconds(&self) -> String { - format!( - "{}_{}", - name_prefix::FRONTEND, - frontend_service::REQUEST_DURATION_SECONDS - ) - } - - /// Input sequence length in tokens - #[getter] - fn input_sequence_tokens(&self) -> String { - format!( - "{}_{}", - name_prefix::FRONTEND, - frontend_service::INPUT_SEQUENCE_TOKENS - ) - } - - /// Output sequence length in tokens - #[getter] - fn output_sequence_tokens(&self) -> String { - format!( - "{}_{}", - name_prefix::FRONTEND, - frontend_service::OUTPUT_SEQUENCE_TOKENS - ) - } - - /// Time to first token in seconds - #[getter] - fn time_to_first_token_seconds(&self) -> String { - format!( - "{}_{}", - name_prefix::FRONTEND, - frontend_service::TIME_TO_FIRST_TOKEN_SECONDS - ) - } - - /// Inter-token latency in seconds - #[getter] - fn inter_token_latency_seconds(&self) -> String { - format!( - "{}_{}", - name_prefix::FRONTEND, - frontend_service::INTER_TOKEN_LATENCY_SECONDS - ) - } - - /// Number of disconnected clients - #[getter] - fn disconnected_clients(&self) -> String { - format!( - "{}_{}", - name_prefix::FRONTEND, - frontend_service::DISCONNECTED_CLIENTS - ) - } - - /// Model total KV blocks - #[getter] - fn model_total_kv_blocks(&self) -> String { - format!( - "{}_{}", - name_prefix::FRONTEND, - frontend_service::MODEL_TOTAL_KV_BLOCKS - ) - } - - /// Model max number of sequences - #[getter] - fn model_max_num_seqs(&self) -> String { - format!( - "{}_{}", - name_prefix::FRONTEND, - frontend_service::MODEL_MAX_NUM_SEQS - ) - } - - /// Model max number of batched tokens - #[getter] - fn model_max_num_batched_tokens(&self) -> String { - format!( - "{}_{}", - name_prefix::FRONTEND, - frontend_service::MODEL_MAX_NUM_BATCHED_TOKENS - ) - } - - /// Model context length - #[getter] - fn model_context_length(&self) -> String { - format!( - "{}_{}", - name_prefix::FRONTEND, - frontend_service::MODEL_CONTEXT_LENGTH - ) - } - - /// Model KV cache block size - #[getter] - fn model_kv_cache_block_size(&self) -> String { - format!( - "{}_{}", - name_prefix::FRONTEND, - frontend_service::MODEL_KV_CACHE_BLOCK_SIZE - ) - } - - /// Model migration limit - #[getter] - fn model_migration_limit(&self) -> String { - format!( - "{}_{}", - name_prefix::FRONTEND, - frontend_service::MODEL_MIGRATION_LIMIT - ) - } -} - -/// Work handler metrics (component request processing) -/// These methods return the full metric names with the "dynamo_component_" prefix -#[pyclass] -pub struct WorkHandler; - -#[pymethods] -impl WorkHandler { - /// Total number of requests processed by work handler - #[getter] - fn requests_total(&self) -> String { - format!( - "{}_{}", - name_prefix::COMPONENT, - work_handler::REQUESTS_TOTAL - ) - } - - /// Total number of bytes received in requests by work handler - #[getter] - fn request_bytes_total(&self) -> String { - format!( - "{}_{}", - name_prefix::COMPONENT, - work_handler::REQUEST_BYTES_TOTAL - ) - } - - /// Total number of bytes sent in responses by work handler - #[getter] - fn response_bytes_total(&self) -> String { - format!( - "{}_{}", - name_prefix::COMPONENT, - work_handler::RESPONSE_BYTES_TOTAL - ) - } - - /// Number of requests currently being processed by work handler - #[getter] - fn inflight_requests(&self) -> String { - format!( - "{}_{}", - name_prefix::COMPONENT, - work_handler::INFLIGHT_REQUESTS - ) - } - - /// Time spent processing requests by work handler (histogram) - #[getter] - fn request_duration_seconds(&self) -> String { - format!( - "{}_{}", - name_prefix::COMPONENT, - work_handler::REQUEST_DURATION_SECONDS - ) - } - - /// Total number of errors in work handler processing - #[getter] - fn errors_total(&self) -> String { - format!("{}_{}", name_prefix::COMPONENT, work_handler::ERRORS_TOTAL) - } -} - -/// KV stats metrics (KV cache statistics) -/// These methods return the metric names with the "kvstats_" prefix -#[pyclass] -pub struct KvStatsMetrics; - -#[pymethods] -impl KvStatsMetrics { - /// Number of active KV cache blocks currently in use - #[getter] - fn active_blocks(&self) -> String { - kvstats::ACTIVE_BLOCKS.to_string() - } - - /// Total number of KV cache blocks available - #[getter] - fn total_blocks(&self) -> String { - kvstats::TOTAL_BLOCKS.to_string() - } - - /// GPU cache usage as a percentage (0.0-1.0) - #[getter] - fn gpu_cache_usage_percent(&self) -> String { - kvstats::GPU_CACHE_USAGE_PERCENT.to_string() - } - - /// GPU prefix cache hit rate as a percentage (0.0-1.0) - #[getter] - fn gpu_prefix_cache_hit_rate(&self) -> String { - kvstats::GPU_PREFIX_CACHE_HIT_RATE.to_string() - } -} - -/// Add prometheus_names module to the Python bindings -pub fn add_to_module(m: &Bound<'_, PyModule>) -> PyResult<()> { - m.add_class::()?; - m.add_class::()?; - m.add_class::()?; - m.add_class::()?; - - // Add a module-level singleton instance for convenience - let prometheus_names_instance = PrometheusNames; - m.add("prometheus_names", prometheus_names_instance)?; - - Ok(()) -} diff --git a/lib/bindings/python/src/dynamo/_prometheus_names.pyi b/lib/bindings/python/src/dynamo/_prometheus_names.pyi deleted file mode 100644 index 94cafc1225..0000000000 --- a/lib/bindings/python/src/dynamo/_prometheus_names.pyi +++ /dev/null @@ -1,235 +0,0 @@ -# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. -# SPDX-License-Identifier: Apache-2.0 - -""" -Python type stubs for Prometheus metric name constants - -⚠️ **CRITICAL: SYNC WITH RUST SOURCE** ⚠️ -This file must stay in sync with: -- Source: `lib/runtime/src/metrics/prometheus_names.rs` -- Bindings: `lib/bindings/python/rust/prometheus_names.rs` - -When the Rust source is modified, update all three files immediately. -""" - -class PrometheusNames: - """ - Main container for all Prometheus metric name constants - """ - - @property - def frontend(self) -> FrontendService: - """ - Frontend service metrics - """ - ... - - @property - def work_handler(self) -> WorkHandler: - """ - Work handler metrics - """ - ... - - @property - def kvstats(self) -> KvStatsMetrics: - """ - KV stats metrics - """ - ... - -class FrontendService: - """ - Frontend service metrics (LLM HTTP service) - These methods return the full metric names with the "dynamo_frontend_" prefix - """ - - @property - def requests_total(self) -> str: - """ - Total number of LLM requests processed - """ - ... - - @property - def queued_requests(self) -> str: - """ - Number of requests waiting in HTTP queue before receiving the first response - """ - ... - - @property - def inflight_requests(self) -> str: - """ - Number of inflight requests going to the engine (vLLM, SGLang, ...) - """ - ... - - @property - def request_duration_seconds(self) -> str: - """ - Duration of LLM requests - """ - ... - - @property - def input_sequence_tokens(self) -> str: - """ - Input sequence length in tokens - """ - ... - - @property - def output_sequence_tokens(self) -> str: - """ - Output sequence length in tokens - """ - ... - - @property - def time_to_first_token_seconds(self) -> str: - """ - Time to first token in seconds - """ - ... - - @property - def inter_token_latency_seconds(self) -> str: - """ - Inter-token latency in seconds - """ - ... - - @property - def disconnected_clients(self) -> str: - """ - Number of disconnected clients - """ - ... - - @property - def model_total_kv_blocks(self) -> str: - """ - Model total KV blocks - """ - ... - - @property - def model_max_num_seqs(self) -> str: - """ - Model max number of sequences - """ - ... - - @property - def model_max_num_batched_tokens(self) -> str: - """ - Model max number of batched tokens - """ - ... - - @property - def model_context_length(self) -> str: - """ - Model context length - """ - ... - - @property - def model_kv_cache_block_size(self) -> str: - """ - Model KV cache block size - """ - ... - - @property - def model_migration_limit(self) -> str: - """ - Model migration limit - """ - ... - -class WorkHandler: - """ - Work handler metrics (component request processing) - These methods return the full metric names with the "dynamo_component_" prefix - """ - - @property - def requests_total(self) -> str: - """ - Total number of requests processed by work handler - """ - ... - - @property - def request_bytes_total(self) -> str: - """ - Total number of bytes received in requests by work handler - """ - ... - - @property - def response_bytes_total(self) -> str: - """ - Total number of bytes sent in responses by work handler - """ - ... - - @property - def inflight_requests(self) -> str: - """ - Number of requests currently being processed by work handler - """ - ... - - @property - def request_duration_seconds(self) -> str: - """ - Time spent processing requests by work handler (histogram) - """ - ... - - @property - def errors_total(self) -> str: - """ - Total number of errors in work handler processing - """ - ... - -class KvStatsMetrics: - """ - KV stats metrics (KV cache statistics) - These methods return the metric names with the "kvstats_" prefix - """ - - @property - def active_blocks(self) -> str: - """ - Number of active KV cache blocks currently in use - """ - ... - - @property - def total_blocks(self) -> str: - """ - Total number of KV cache blocks available - """ - ... - - @property - def gpu_cache_usage_percent(self) -> str: - """ - GPU cache usage as a percentage (0.0-1.0) - """ - ... - - @property - def gpu_prefix_cache_hit_rate(self) -> str: - """ - GPU prefix cache hit rate as a percentage (0.0-1.0) - """ - ... - -# Module-level singleton instance for convenient access -prometheus_names: PrometheusNames diff --git a/lib/bindings/python/src/dynamo/prometheus_names.py b/lib/bindings/python/src/dynamo/prometheus_names.py new file mode 100644 index 0000000000..a380b3e65b --- /dev/null +++ b/lib/bindings/python/src/dynamo/prometheus_names.py @@ -0,0 +1,201 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +""" +Python constants for Prometheus metric names + +AUTO-GENERATED from lib/runtime/src/metrics/prometheus_names.rs +DO NOT EDIT THIS FILE MANUALLY + +To regenerate this file after modifying lib/runtime/src/metrics/prometheus_names.rs: + cargo run -p dynamo-codegen --bin gen-python-prometheus-names + +This module provides pure Python access to Prometheus metric name constants +without requiring Rust bindings. + +Usage (both patterns supported): + # Pattern 1: Import module + from dynamo import prometheus_names + print(prometheus_names.frontend_service.REQUESTS_TOTAL) # "requests_total" + print(prometheus_names.kvstats.ACTIVE_BLOCKS) # "kvstats_active_blocks" + + # Pattern 2: Import specific classes + from dynamo.prometheus_names import frontend_service, kvstats + print(frontend_service.REQUESTS_TOTAL) # "requests_total" + print(kvstats.ACTIVE_BLOCKS) # "kvstats_active_blocks" +""" + +from __future__ import annotations + + +class distributed_runtime: + """DistributedRuntime core metrics""" + + # Total uptime of the DistributedRuntime in seconds + UPTIME_SECONDS = "uptime_seconds" + + +class frontend_service: + """Frontend service metrics (LLM HTTP service)""" + + # Environment variable that overrides the default metric prefix + METRICS_PREFIX_ENV = "DYN_METRICS_PREFIX" + # Total number of LLM requests processed + REQUESTS_TOTAL = "requests_total" + # Number of requests waiting in HTTP queue before receiving the first response (gauge) + QUEUED_REQUESTS = "queued_requests" + # Number of inflight/concurrent requests going to the engine (vLLM, SGLang, ...) + # Note: This is a gauge metric (current state) that can go up and down, so no _total suffix + INFLIGHT_REQUESTS = "inflight_requests" + # Number of disconnected clients (gauge that can go up and down) + DISCONNECTED_CLIENTS = "disconnected_clients" + # Duration of LLM requests + REQUEST_DURATION_SECONDS = "request_duration_seconds" + # Input sequence length in tokens + INPUT_SEQUENCE_TOKENS = "input_sequence_tokens" + # Output sequence length in tokens + OUTPUT_SEQUENCE_TOKENS = "output_sequence_tokens" + # Time to first token in seconds + TIME_TO_FIRST_TOKEN_SECONDS = "time_to_first_token_seconds" + # Inter-token latency in seconds + INTER_TOKEN_LATENCY_SECONDS = "inter_token_latency_seconds" + # Model configuration metrics + # Runtime config metrics (from ModelRuntimeConfig): + # Total KV blocks available for a worker serving the model + MODEL_TOTAL_KV_BLOCKS = "model_total_kv_blocks" + # Maximum number of sequences for a worker serving the model (runtime config) + MODEL_MAX_NUM_SEQS = "model_max_num_seqs" + # Maximum number of batched tokens for a worker serving the model (runtime config) + MODEL_MAX_NUM_BATCHED_TOKENS = "model_max_num_batched_tokens" + # MDC metrics (from ModelDeploymentCard): + # Maximum context length for a worker serving the model (MDC) + MODEL_CONTEXT_LENGTH = "model_context_length" + # KV cache block size for a worker serving the model (MDC) + MODEL_KV_CACHE_BLOCK_SIZE = "model_kv_cache_block_size" + # Request migration limit for a worker serving the model (MDC) + MODEL_MIGRATION_LIMIT = "model_migration_limit" + + +class kvbm_connector: + """KVBM connector""" + + # KVBM connector leader + KVBM_CONNECTOR_LEADER = "kvbm_connector_leader" + # KVBM connector worker + KVBM_CONNECTOR_WORKER = "kvbm_connector_worker" + + +class kvrouter: + # Number of KV cache events applied to the index (including status) + KV_CACHE_EVENTS_APPLIED = "kv_cache_events_applied" + + +class kvstats: + """KvStats metrics from LLM workers""" + + # Prefix for all KvStats metrics + PREFIX = "" + # Number of active KV cache blocks currently in use + ACTIVE_BLOCKS = "kvstats_active_blocks" + # Total number of KV cache blocks available + TOTAL_BLOCKS = "kvstats_total_blocks" + # GPU cache usage as a percentage (0.0-1.0) + GPU_CACHE_USAGE_PERCENT = "kvstats_gpu_cache_usage_percent" + # GPU prefix cache hit rate as a percentage (0.0-1.0) + GPU_PREFIX_CACHE_HIT_RATE = "kvstats_gpu_prefix_cache_hit_rate" + + +class labels: + """Automatically inserted Prometheus label names used across the metrics system""" + + # Label for component identification + COMPONENT = "dynamo_component" + # Label for namespace identification + NAMESPACE = "dynamo_namespace" + # Label for endpoint identification + ENDPOINT = "dynamo_endpoint" + + +class name_prefix: + """Metric name prefixes used across the metrics system""" + + # Prefix for all Prometheus metric names. + COMPONENT = "dynamo_component" + # Prefix for frontend service metrics + FRONTEND = "dynamo_frontend" + + +class nats_client: + """NATS client metrics. DistributedRuntime contains a NATS client shared by all children)""" + + # Prefix for all NATS client metrics + PREFIX = "" + # Total number of bytes received by NATS client + IN_TOTAL_BYTES = "nats_client_in_total_bytes" + # Total number of bytes sent by NATS client + OUT_OVERHEAD_BYTES = "nats_client_out_overhead_bytes" + # Total number of messages received by NATS client + IN_MESSAGES = "nats_client_in_messages" + # Total number of messages sent by NATS client + OUT_MESSAGES = "nats_client_out_messages" + # Current number of active connections for NATS client + # Note: Gauge metric measuring current connections, not cumulative total + CURRENT_CONNECTIONS = "nats_client_current_connections" + # Current connection state of NATS client (0=disconnected, 1=connected, 2=reconnecting) + CONNECTION_STATE = "nats_client_connection_state" + + +class nats_service: + """NATS service metrics, from the $SRV.STATS. requests on NATS server""" + + # Prefix for all NATS service metrics + PREFIX = "" + # Average processing time in milliseconds (maps to: average_processing_time in ms) + PROCESSING_MS_AVG = "nats_service_processing_ms_avg" + # Total errors across all endpoints (maps to: num_errors) + ERRORS_TOTAL = "nats_service_errors_total" + # Total requests across all endpoints (maps to: num_requests) + REQUESTS_TOTAL = "nats_service_requests_total" + # Total processing time in milliseconds (maps to: processing_time in ms) + PROCESSING_MS_TOTAL = "nats_service_processing_ms_total" + # Number of active services (derived from ServiceSet.services) + ACTIVE_SERVICES = "nats_service_active_services" + # Number of active endpoints (derived from ServiceInfo.endpoints) + ACTIVE_ENDPOINTS = "nats_service_active_endpoints" + + +class task_tracker: + """Task tracker Prometheus metric name suffixes""" + + # Total number of tasks issued/submitted + TASKS_ISSUED_TOTAL = "tasks_issued_total" + # Total number of tasks started + TASKS_STARTED_TOTAL = "tasks_started_total" + # Total number of successfully completed tasks + TASKS_SUCCESS_TOTAL = "tasks_success_total" + # Total number of cancelled tasks + TASKS_CANCELLED_TOTAL = "tasks_cancelled_total" + # Total number of failed tasks + TASKS_FAILED_TOTAL = "tasks_failed_total" + # Total number of rejected tasks + TASKS_REJECTED_TOTAL = "tasks_rejected_total" + + +class work_handler: + """Work handler Prometheus metric names""" + + # Total number of requests processed by work handler + REQUESTS_TOTAL = "requests_total" + # Total number of bytes received in requests by work handler + REQUEST_BYTES_TOTAL = "request_bytes_total" + # Total number of bytes sent in responses by work handler + RESPONSE_BYTES_TOTAL = "response_bytes_total" + # Number of requests currently being processed by work handler + # Note: This is a gauge metric (current state) that can go up and down, so no _total suffix + INFLIGHT_REQUESTS = "inflight_requests" + # Time spent processing requests by work handler (histogram) + REQUEST_DURATION_SECONDS = "request_duration_seconds" + # Total number of errors in work handler processing + ERRORS_TOTAL = "errors_total" + # Label name for error type classification + ERROR_TYPE_LABEL = "error_type" diff --git a/lib/runtime/src/metrics/prometheus_names.rs b/lib/runtime/src/metrics/prometheus_names.rs index de4a786916..5f6af41e4d 100644 --- a/lib/runtime/src/metrics/prometheus_names.rs +++ b/lib/runtime/src/metrics/prometheus_names.rs @@ -6,12 +6,12 @@ //! This module provides centralized Prometheus metric name constants and sanitization functions //! for various components to ensure consistency and avoid duplication across the codebase. //! -//! ⚠️ **CRITICAL: SYNC WITH PYTHON BINDINGS** ⚠️ -//! When modifying constants in this file, you MUST also update: -//! `lib/bindings/python/rust/prometheus_names.rs` +//! ⚠️ **CRITICAL: REGENERATE PYTHON FILE AFTER CHANGES** ⚠️ +//! When modifying constants in this file, regenerate the Python module: +//! cargo run -p dynamo-codegen --bin gen-python-prometheus-names //! -//! The Python bindings expose these constants to Python code and must stay in sync. -//! Any changes here should be reflected in the Python bindings immediately. +//! This generates `lib/bindings/python/src/dynamo/prometheus_names.py` +//! with pure Python constants (no Rust bindings needed). //! //! ## Naming Conventions //! @@ -84,8 +84,7 @@ pub mod labels { /// Frontend service metrics (LLM HTTP service) /// -/// ⚠️ SYNC ALERT: These constants are exposed to Python via: -/// `lib/bindings/python/rust/prometheus_names.rs` - FrontendService class +/// ⚠️ Python codegen: Run gen-python-prometheus-names after changes pub mod frontend_service { // TODO: Move DYN_METRICS_PREFIX and other environment variable names to environment_names.rs // for centralized environment variable constant management across the codebase diff --git a/tests/utils/payloads.py b/tests/utils/payloads.py index e7b547e576..a7a7bdc551 100644 --- a/tests/utils/payloads.py +++ b/tests/utils/payloads.py @@ -20,7 +20,7 @@ from dataclasses import dataclass from typing import Any, Dict, List -from dynamo._core import prometheus_names +from dynamo import prometheus_names logger = logging.getLogger(__name__) @@ -206,7 +206,7 @@ def response_handler(self, response: Any) -> str: return response.text def validate(self, response: Any, content: str) -> None: - requests_total_name = prometheus_names.work_handler.requests_total + requests_total_name = prometheus_names.work_handler.REQUESTS_TOTAL pattern = ( rf'{re.escape(requests_total_name)}\{{[^}}]*model="[^"]*"[^}}]*\}}\s+(\d+)' )