Skip to content

Commit d772904

Browse files
committed
Implement http endpoint for debug config
Signed-off-by: William Arnold <[email protected]>
1 parent 33c6e2f commit d772904

File tree

7 files changed

+321
-19
lines changed

7 files changed

+321
-19
lines changed

components/src/dynamo/common/config_dump/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
add_config_dump_args,
1313
dump_config,
1414
get_config_dump,
15+
get_config_endpoint,
1516
register_encoder,
1617
)
1718
from dynamo.common.config_dump.environment import get_environment_vars
@@ -25,6 +26,7 @@
2526
"add_config_dump_args",
2627
"dump_config",
2728
"get_config_dump",
29+
"get_config_endpoint",
2830
"get_environment_vars",
2931
"get_gpu_info",
3032
"get_runtime_info",

components/src/dynamo/common/config_dump/config_dumper.py

Lines changed: 25 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
import logging
99
import pathlib
1010
from enum import Enum
11-
from typing import Any, Dict, Optional
11+
from typing import Any, Dict, Optional, Union
1212

1313
from dynamo.common._version import __version__
1414

@@ -77,6 +77,16 @@ def _get_vllm_version() -> Optional[str]:
7777
return None
7878

7979

80+
async def get_config_endpoint(config: Any, request=None):
81+
try:
82+
# TODO: Putting the dict instead of the string doesn't get sent
83+
# through the endpoint correctly...
84+
yield {"status": "success", "message": get_config_dump(config)}
85+
except Exception as e:
86+
logger.exception("Unexpected error dumping config")
87+
yield {"status": "error", "message": str(e)}
88+
89+
8090
def dump_config(dump_config_to: Optional[str], config: Any) -> None:
8191
"""
8292
Dump the configuration to a file or stdout.
@@ -112,6 +122,15 @@ def dump_config(dump_config_to: Optional[str], config: Any) -> None:
112122
def get_config_dump(config: Any, extra_info: Optional[Dict[str, Any]] = None) -> str:
113123
"""
114124
Collect comprehensive config information about a backend instance.
125+
"""
126+
return canonical_json_encoder.encode(_get_config_dump_data(config, extra_info))
127+
128+
129+
def _get_config_dump_data(
130+
config: Any, extra_info: Optional[Dict[str, Any]] = None
131+
) -> Dict[str, Any]:
132+
"""
133+
Collect comprehensive config information about a backend instance.
115134
116135
Args:
117136
config: Any JSON-serializable object containing the backend configuration.
@@ -148,7 +167,7 @@ def get_config_dump(config: Any, extra_info: Optional[Dict[str, Any]] = None) ->
148167
if extra_info:
149168
config_dump.update(extra_info)
150169

151-
return canonical_json_encoder.encode(config_dump)
170+
return config_dump
152171

153172
except Exception as e:
154173
logger.error(f"Error collecting config dump: {e}")
@@ -157,7 +176,7 @@ def get_config_dump(config: Any, extra_info: Optional[Dict[str, Any]] = None) ->
157176
"error": f"Failed to collect config dump: {str(e)}",
158177
"system_info": get_system_info(), # Always try to include basic system info
159178
}
160-
return canonical_json_encoder.encode(error_info)
179+
return error_info
161180

162181

163182
def add_config_dump_args(parser: argparse.ArgumentParser):
@@ -176,13 +195,15 @@ def add_config_dump_args(parser: argparse.ArgumentParser):
176195

177196

178197
@functools.singledispatch
179-
def _preprocess_for_encode(obj: object) -> object:
198+
def _preprocess_for_encode(obj: object) -> Union[Dict[str, Any], str]:
180199
"""
181200
Single dispatch function for preprocessing objects before JSON encoding.
182201
183202
This function should be extended using @register_encoder decorator
184203
for backend-specific types.
185204
"""
205+
if isinstance(obj, dict):
206+
return obj
186207
if dataclasses.is_dataclass(obj) and not isinstance(obj, type):
187208
return dataclasses.asdict(obj)
188209
logger.warning(f"Unknown type {type(obj)}, using __dict__ or str(obj)")

components/src/dynamo/sglang/main.py

Lines changed: 46 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,8 @@
22
# SPDX-License-Identifier: Apache-2.0
33

44
import asyncio
5+
import functools
6+
import json
57
import logging
68
import signal
79
import sys
@@ -10,7 +12,7 @@
1012
import uvloop
1113

1214
from dynamo.llm import ModelInput, ModelType
13-
from dynamo.common.config_dump import dump_config
15+
from dynamo.common.config_dump import dump_config, get_config_endpoint
1416
from dynamo.runtime import DistributedRuntime, dynamo_worker
1517
from dynamo.runtime.logging import configure_dynamo_logging
1618
from dynamo.sglang.args import Config, DisaggregationMode, parse_args
@@ -75,6 +77,7 @@ async def init(runtime: DistributedRuntime, config: Config):
7577
await component.create_service()
7678

7779
generate_endpoint = component.endpoint(dynamo_args.endpoint)
80+
dump_config_endpoint = component.endpoint("dump_config")
7881

7982
prefill_client = None
8083
if config.serving_mode == DisaggregationMode.DECODE:
@@ -115,6 +118,10 @@ async def init(runtime: DistributedRuntime, config: Config):
115118
dynamo_args,
116119
readiness_gate=ready_event,
117120
),
121+
dump_config_endpoint.serve_endpoint(
122+
functools.partial(get_config_endpoint, config),
123+
metrics_labels=[("model", server_args.served_model_name)],
124+
),
118125
)
119126
except Exception as e:
120127
logging.error(f"Failed to serve endpoints: {e}")
@@ -140,6 +147,7 @@ async def init_prefill(runtime: DistributedRuntime, config: Config):
140147
await component.create_service()
141148

142149
generate_endpoint = component.endpoint(dynamo_args.endpoint)
150+
dump_config_endpoint = component.endpoint("dump_config")
143151

144152
handler = PrefillWorkerHandler(component, engine, config)
145153

@@ -151,7 +159,11 @@ async def init_prefill(runtime: DistributedRuntime, config: Config):
151159
graceful_shutdown=True,
152160
metrics_labels=[("model", server_args.served_model_name)],
153161
health_check_payload=health_check_payload,
154-
)
162+
),
163+
dump_config_endpoint.serve_endpoint(
164+
functools.partial(get_config_endpoint, config),
165+
metrics_labels=[("model", server_args.served_model_name)],
166+
),
155167
]
156168

157169
try:
@@ -206,6 +218,10 @@ async def init_embedding(runtime: DistributedRuntime, config: Config):
206218
output_type=ModelType.Embedding,
207219
readiness_gate=ready_event,
208220
),
221+
dump_config_endpoint.serve_endpoint(
222+
functools.partial(get_config_endpoint, config),
223+
metrics_labels=[("model", server_args.served_model_name)],
224+
),
209225
)
210226
except Exception as e:
211227
logging.error(f"Failed to serve embedding endpoints: {e}")
@@ -229,6 +245,7 @@ async def init_multimodal_processor(runtime: DistributedRuntime, config: Config)
229245
await component.create_service()
230246

231247
generate_endpoint = component.endpoint(dynamo_args.endpoint)
248+
dump_config_endpoint = component.endpoint("dump_config")
232249

233250
# For processor, we need to connect to the encode worker
234251
encode_worker_client = (
@@ -260,6 +277,10 @@ async def init_multimodal_processor(runtime: DistributedRuntime, config: Config)
260277
input_type=ModelInput.Text,
261278
readiness_gate=ready_event,
262279
),
280+
dump_config_endpoint.serve_endpoint(
281+
functools.partial(get_config_endpoint, config),
282+
metrics_labels=[("model", server_args.served_model_name)],
283+
),
263284
)
264285
except Exception as e:
265286
logging.error(f"Failed to serve endpoints: {e}")
@@ -278,6 +299,7 @@ async def init_multimodal_encode_worker(runtime: DistributedRuntime, config: Con
278299
await component.create_service()
279300

280301
generate_endpoint = component.endpoint(dynamo_args.endpoint)
302+
dump_config_endpoint = component.endpoint("dump_config")
281303

282304
# For encode worker, we need to connect to the downstream LLM worker
283305
pd_worker_client = (
@@ -297,7 +319,11 @@ async def init_multimodal_encode_worker(runtime: DistributedRuntime, config: Con
297319
handler.generate,
298320
graceful_shutdown=True,
299321
metrics_labels=[("model", server_args.served_model_name)],
300-
)
322+
),
323+
dump_config_endpoint.serve_endpoint(
324+
functools.partial(get_config_endpoint, config),
325+
metrics_labels=[("model", server_args.served_model_name)],
326+
),
301327
]
302328

303329
try:
@@ -319,6 +345,7 @@ async def init_multimodal_worker(runtime: DistributedRuntime, config: Config):
319345
await component.create_service()
320346

321347
generate_endpoint = component.endpoint(dynamo_args.endpoint)
348+
dump_config_endpoint = component.endpoint("config")
322349

323350
engine = sgl.Engine(server_args=server_args)
324351

@@ -337,10 +364,16 @@ async def init_multimodal_worker(runtime: DistributedRuntime, config: Config):
337364
await handler.async_init()
338365

339366
try:
340-
await generate_endpoint.serve_endpoint(
341-
handler.generate,
342-
metrics_labels=[("model", server_args.served_model_name)],
343-
graceful_shutdown=True,
367+
await asyncio.gather(
368+
generate_endpoint.serve_endpoint(
369+
handler.generate,
370+
metrics_labels=[("model", server_args.served_model_name)],
371+
graceful_shutdown=True,
372+
),
373+
dump_config_endpoint.serve_endpoint(
374+
functools.partial(get_config_endpoint, config),
375+
metrics_labels=[("model", server_args.served_model_name)],
376+
),
344377
)
345378
except Exception as e:
346379
logging.error(f"Failed to serve endpoints: {e}")
@@ -361,6 +394,7 @@ async def init_multimodal_prefill_worker(runtime: DistributedRuntime, config: Co
361394
await component.create_service()
362395

363396
generate_endpoint = component.endpoint(dynamo_args.endpoint)
397+
dump_config_endpoint = component.endpoint("dump_config")
364398

365399
handler = MultimodalPrefillWorkerHandler(component, engine, config)
366400
await handler.async_init()
@@ -374,7 +408,11 @@ async def init_multimodal_prefill_worker(runtime: DistributedRuntime, config: Co
374408
graceful_shutdown=True,
375409
metrics_labels=[("model", server_args.served_model_name)],
376410
health_check_payload=health_check_payload,
377-
)
411+
),
412+
dump_config_endpoint.serve_endpoint(
413+
functools.partial(get_config_endpoint, config),
414+
metrics_labels=[("model", server_args.served_model_name)],
415+
),
378416
)
379417
except Exception as e:
380418
logging.error(f"Failed to serve endpoints: {e}")

lib/llm/src/entrypoint/input/http.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,7 @@ pub async fn run(runtime: Runtime, engine_config: EngineConfig) -> anyhow::Resul
5757
let distributed_runtime = DistributedRuntime::from_settings(runtime.clone()).await?;
5858
let etcd_client = distributed_runtime.etcd_client();
5959
// This allows the /health endpoint to query etcd for active instances
60-
http_service_builder = http_service_builder.with_etcd_client(etcd_client.clone());
60+
http_service_builder = http_service_builder.with_drt(Some(distributed_runtime.clone()));
6161
let http_service = http_service_builder.build()?;
6262
match etcd_client {
6363
Some(ref etcd_client) => {

lib/llm/src/http/service.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ pub mod error;
2525
pub mod health;
2626
pub mod metrics;
2727
pub mod service_v2;
28+
pub mod config;
2829

2930
pub use axum;
3031
pub use metrics::Metrics;

0 commit comments

Comments
 (0)