Implement http endpoint for debug config

Aphoh · Aphoh · commit d77290403d47 · 2025-10-07T10:40:00.000-07:00
Signed-off-by: William Arnold &lt;7565007+Aphoh@users.noreply.github.com&gt;
diff --git a/components/src/dynamo/common/config_dump/__init__.py b/components/src/dynamo/common/config_dump/__init__.py
@@ -12,6 +12,7 @@
     add_config_dump_args,
     dump_config,
     get_config_dump,
+    get_config_endpoint,
     register_encoder,
 )
 from dynamo.common.config_dump.environment import get_environment_vars
@@ -25,6 +26,7 @@
     "add_config_dump_args",
     "dump_config",
     "get_config_dump",
+    "get_config_endpoint",
     "get_environment_vars",
     "get_gpu_info",
     "get_runtime_info",
diff --git a/components/src/dynamo/common/config_dump/config_dumper.py b/components/src/dynamo/common/config_dump/config_dumper.py
@@ -8,7 +8,7 @@
 import logging
 import pathlib
 from enum import Enum
-from typing import Any, Dict, Optional
+from typing import Any, Dict, Optional, Union
 
 from dynamo.common._version import __version__
 
@@ -77,6 +77,16 @@ def _get_vllm_version() -> Optional[str]:
         return None
 
 
+async def get_config_endpoint(config: Any, request=None):
+    try:
+        # TODO: Putting the dict instead of the string doesn't get sent
+        # through the endpoint correctly...
+        yield {"status": "success", "message": get_config_dump(config)}
+    except Exception as e:
+        logger.exception("Unexpected error dumping config")
+        yield {"status": "error", "message": str(e)}
+
+
 def dump_config(dump_config_to: Optional[str], config: Any) -> None:
     """
     Dump the configuration to a file or stdout.
@@ -112,6 +122,15 @@ def dump_config(dump_config_to: Optional[str], config: Any) -> None:
 def get_config_dump(config: Any, extra_info: Optional[Dict[str, Any]] = None) -> str:
     """
     Collect comprehensive config information about a backend instance.
+    """
+    return canonical_json_encoder.encode(_get_config_dump_data(config, extra_info))
+
+
+def _get_config_dump_data(
+    config: Any, extra_info: Optional[Dict[str, Any]] = None
+) -> Dict[str, Any]:
+    """
+    Collect comprehensive config information about a backend instance.
 
     Args:
         config: Any JSON-serializable object containing the backend configuration.
@@ -148,7 +167,7 @@ def get_config_dump(config: Any, extra_info: Optional[Dict[str, Any]] = None) ->
         if extra_info:
             config_dump.update(extra_info)
 
-        return canonical_json_encoder.encode(config_dump)
+        return config_dump
 
     except Exception as e:
         logger.error(f"Error collecting config dump: {e}")
@@ -157,7 +176,7 @@ def get_config_dump(config: Any, extra_info: Optional[Dict[str, Any]] = None) ->
             "error": f"Failed to collect config dump: {str(e)}",
             "system_info": get_system_info(),  # Always try to include basic system info
         }
-        return canonical_json_encoder.encode(error_info)
+        return error_info
 
 
 def add_config_dump_args(parser: argparse.ArgumentParser):
@@ -176,13 +195,15 @@ def add_config_dump_args(parser: argparse.ArgumentParser):
 
 
 @functools.singledispatch
-def _preprocess_for_encode(obj: object) -> object:
+def _preprocess_for_encode(obj: object) -> Union[Dict[str, Any], str]:
     """
     Single dispatch function for preprocessing objects before JSON encoding.
 
     This function should be extended using @register_encoder decorator
     for backend-specific types.
     """
+    if isinstance(obj, dict):
+        return obj
     if dataclasses.is_dataclass(obj) and not isinstance(obj, type):
         return dataclasses.asdict(obj)
     logger.warning(f"Unknown type {type(obj)}, using __dict__ or str(obj)")
diff --git a/components/src/dynamo/sglang/main.py b/components/src/dynamo/sglang/main.py
@@ -2,6 +2,8 @@
 # SPDX-License-Identifier: Apache-2.0
 
 import asyncio
+import functools
+import json
 import logging
 import signal
 import sys
@@ -10,7 +12,7 @@
 import uvloop
 
 from dynamo.llm import ModelInput, ModelType
-from dynamo.common.config_dump import dump_config
+from dynamo.common.config_dump import dump_config, get_config_endpoint
 from dynamo.runtime import DistributedRuntime, dynamo_worker
 from dynamo.runtime.logging import configure_dynamo_logging
 from dynamo.sglang.args import Config, DisaggregationMode, parse_args
@@ -75,6 +77,7 @@ async def init(runtime: DistributedRuntime, config: Config):
     await component.create_service()
 
     generate_endpoint = component.endpoint(dynamo_args.endpoint)
+    dump_config_endpoint = component.endpoint("dump_config")
 
     prefill_client = None
     if config.serving_mode == DisaggregationMode.DECODE:
@@ -115,6 +118,10 @@ async def init(runtime: DistributedRuntime, config: Config):
                 dynamo_args,
                 readiness_gate=ready_event,
             ),
+            dump_config_endpoint.serve_endpoint(
+                functools.partial(get_config_endpoint, config),
+                metrics_labels=[("model", server_args.served_model_name)],
+            ),
         )
     except Exception as e:
         logging.error(f"Failed to serve endpoints: {e}")
@@ -140,6 +147,7 @@ async def init_prefill(runtime: DistributedRuntime, config: Config):
     await component.create_service()
 
     generate_endpoint = component.endpoint(dynamo_args.endpoint)
+    dump_config_endpoint = component.endpoint("dump_config")
 
     handler = PrefillWorkerHandler(component, engine, config)
 
@@ -151,7 +159,11 @@ async def init_prefill(runtime: DistributedRuntime, config: Config):
             graceful_shutdown=True,
             metrics_labels=[("model", server_args.served_model_name)],
             health_check_payload=health_check_payload,
-        )
+        ),
+        dump_config_endpoint.serve_endpoint(
+            functools.partial(get_config_endpoint, config),
+            metrics_labels=[("model", server_args.served_model_name)],
+        ),
     ]
 
     try:
@@ -206,6 +218,10 @@ async def init_embedding(runtime: DistributedRuntime, config: Config):
                 output_type=ModelType.Embedding,
                 readiness_gate=ready_event,
             ),
+            dump_config_endpoint.serve_endpoint(
+                functools.partial(get_config_endpoint, config),
+                metrics_labels=[("model", server_args.served_model_name)],
+            ),
         )
     except Exception as e:
         logging.error(f"Failed to serve embedding endpoints: {e}")
@@ -229,6 +245,7 @@ async def init_multimodal_processor(runtime: DistributedRuntime, config: Config)
     await component.create_service()
 
     generate_endpoint = component.endpoint(dynamo_args.endpoint)
+    dump_config_endpoint = component.endpoint("dump_config")
 
     # For processor, we need to connect to the encode worker
     encode_worker_client = (
@@ -260,6 +277,10 @@ async def init_multimodal_processor(runtime: DistributedRuntime, config: Config)
                 input_type=ModelInput.Text,
                 readiness_gate=ready_event,
             ),
+            dump_config_endpoint.serve_endpoint(
+                functools.partial(get_config_endpoint, config),
+                metrics_labels=[("model", server_args.served_model_name)],
+            ),
         )
     except Exception as e:
         logging.error(f"Failed to serve endpoints: {e}")
@@ -278,6 +299,7 @@ async def init_multimodal_encode_worker(runtime: DistributedRuntime, config: Con
     await component.create_service()
 
     generate_endpoint = component.endpoint(dynamo_args.endpoint)
+    dump_config_endpoint = component.endpoint("dump_config")
 
     # For encode worker, we need to connect to the downstream LLM worker
     pd_worker_client = (
@@ -297,7 +319,11 @@ async def init_multimodal_encode_worker(runtime: DistributedRuntime, config: Con
             handler.generate,
             graceful_shutdown=True,
             metrics_labels=[("model", server_args.served_model_name)],
-        )
+        ),
+        dump_config_endpoint.serve_endpoint(
+            functools.partial(get_config_endpoint, config),
+            metrics_labels=[("model", server_args.served_model_name)],
+        ),
     ]
 
     try:
@@ -319,6 +345,7 @@ async def init_multimodal_worker(runtime: DistributedRuntime, config: Config):
     await component.create_service()
 
     generate_endpoint = component.endpoint(dynamo_args.endpoint)
+    dump_config_endpoint = component.endpoint("config")
 
     engine = sgl.Engine(server_args=server_args)
 
@@ -337,10 +364,16 @@ async def init_multimodal_worker(runtime: DistributedRuntime, config: Config):
     await handler.async_init()
 
     try:
-        await generate_endpoint.serve_endpoint(
-            handler.generate,
-            metrics_labels=[("model", server_args.served_model_name)],
-            graceful_shutdown=True,
+        await asyncio.gather(
+            generate_endpoint.serve_endpoint(
+                handler.generate,
+                metrics_labels=[("model", server_args.served_model_name)],
+                graceful_shutdown=True,
+            ),
+            dump_config_endpoint.serve_endpoint(
+                functools.partial(get_config_endpoint, config),
+                metrics_labels=[("model", server_args.served_model_name)],
+            ),
         )
     except Exception as e:
         logging.error(f"Failed to serve endpoints: {e}")
@@ -361,6 +394,7 @@ async def init_multimodal_prefill_worker(runtime: DistributedRuntime, config: Co
     await component.create_service()
 
     generate_endpoint = component.endpoint(dynamo_args.endpoint)
+    dump_config_endpoint = component.endpoint("dump_config")
 
     handler = MultimodalPrefillWorkerHandler(component, engine, config)
     await handler.async_init()
@@ -374,7 +408,11 @@ async def init_multimodal_prefill_worker(runtime: DistributedRuntime, config: Co
                 graceful_shutdown=True,
                 metrics_labels=[("model", server_args.served_model_name)],
                 health_check_payload=health_check_payload,
-            )
+            ),
+            dump_config_endpoint.serve_endpoint(
+                functools.partial(get_config_endpoint, config),
+                metrics_labels=[("model", server_args.served_model_name)],
+            ),
         )
     except Exception as e:
         logging.error(f"Failed to serve endpoints: {e}")
diff --git a/lib/llm/src/entrypoint/input/http.rs b/lib/llm/src/entrypoint/input/http.rs
@@ -57,7 +57,7 @@ pub async fn run(runtime: Runtime, engine_config: EngineConfig) -> anyhow::Resul
             let distributed_runtime = DistributedRuntime::from_settings(runtime.clone()).await?;
             let etcd_client = distributed_runtime.etcd_client();
             // This allows the /health endpoint to query etcd for active instances
-            http_service_builder = http_service_builder.with_etcd_client(etcd_client.clone());
+            http_service_builder = http_service_builder.with_drt(Some(distributed_runtime.clone()));
             let http_service = http_service_builder.build()?;
             match etcd_client {
                 Some(ref etcd_client) => {
diff --git a/lib/llm/src/http/service.rs b/lib/llm/src/http/service.rs
@@ -25,6 +25,7 @@ pub mod error;
 pub mod health;
 pub mod metrics;
 pub mod service_v2;
+pub mod config;
 
 pub use axum;
 pub use metrics::Metrics;
diff --git a/lib/llm/src/http/service/config.rs b/lib/llm/src/http/service/config.rs
diff --git a/lib/llm/src/http/service/service_v2.rs b/lib/llm/src/http/service/service_v2.rs