22# SPDX-License-Identifier: Apache-2.0 
33
44import  asyncio 
5+ import  functools 
6+ import  json 
57import  logging 
68import  signal 
79import  sys 
1012import  uvloop 
1113
1214from  dynamo .llm  import  ModelInput , ModelType 
13- from  dynamo .common .config_dump  import  dump_config 
15+ from  dynamo .common .config_dump  import  dump_config ,  get_config_endpoint 
1416from  dynamo .runtime  import  DistributedRuntime , dynamo_worker 
1517from  dynamo .runtime .logging  import  configure_dynamo_logging 
1618from  dynamo .sglang .args  import  Config , DisaggregationMode , parse_args 
@@ -75,6 +77,7 @@ async def init(runtime: DistributedRuntime, config: Config):
7577    await  component .create_service ()
7678
7779    generate_endpoint  =  component .endpoint (dynamo_args .endpoint )
80+     dump_config_endpoint  =  component .endpoint ("dump_config" )
7881
7982    prefill_client  =  None 
8083    if  config .serving_mode  ==  DisaggregationMode .DECODE :
@@ -115,6 +118,10 @@ async def init(runtime: DistributedRuntime, config: Config):
115118                dynamo_args ,
116119                readiness_gate = ready_event ,
117120            ),
121+             dump_config_endpoint .serve_endpoint (
122+                 functools .partial (get_config_endpoint , config ),
123+                 metrics_labels = [("model" , server_args .served_model_name )],
124+             ),
118125        )
119126    except  Exception  as  e :
120127        logging .error (f"Failed to serve endpoints: { e }  )
@@ -140,6 +147,7 @@ async def init_prefill(runtime: DistributedRuntime, config: Config):
140147    await  component .create_service ()
141148
142149    generate_endpoint  =  component .endpoint (dynamo_args .endpoint )
150+     dump_config_endpoint  =  component .endpoint ("dump_config" )
143151
144152    handler  =  PrefillWorkerHandler (component , engine , config )
145153
@@ -151,7 +159,11 @@ async def init_prefill(runtime: DistributedRuntime, config: Config):
151159            graceful_shutdown = True ,
152160            metrics_labels = [("model" , server_args .served_model_name )],
153161            health_check_payload = health_check_payload ,
154-         )
162+         ),
163+         dump_config_endpoint .serve_endpoint (
164+             functools .partial (get_config_endpoint , config ),
165+             metrics_labels = [("model" , server_args .served_model_name )],
166+         ),
155167    ]
156168
157169    try :
@@ -206,6 +218,10 @@ async def init_embedding(runtime: DistributedRuntime, config: Config):
206218                output_type = ModelType .Embedding ,
207219                readiness_gate = ready_event ,
208220            ),
221+             dump_config_endpoint .serve_endpoint (
222+                 functools .partial (get_config_endpoint , config ),
223+                 metrics_labels = [("model" , server_args .served_model_name )],
224+             ),
209225        )
210226    except  Exception  as  e :
211227        logging .error (f"Failed to serve embedding endpoints: { e }  )
@@ -229,6 +245,7 @@ async def init_multimodal_processor(runtime: DistributedRuntime, config: Config)
229245    await  component .create_service ()
230246
231247    generate_endpoint  =  component .endpoint (dynamo_args .endpoint )
248+     dump_config_endpoint  =  component .endpoint ("dump_config" )
232249
233250    # For processor, we need to connect to the encode worker 
234251    encode_worker_client  =  (
@@ -260,6 +277,10 @@ async def init_multimodal_processor(runtime: DistributedRuntime, config: Config)
260277                input_type = ModelInput .Text ,
261278                readiness_gate = ready_event ,
262279            ),
280+             dump_config_endpoint .serve_endpoint (
281+                 functools .partial (get_config_endpoint , config ),
282+                 metrics_labels = [("model" , server_args .served_model_name )],
283+             ),
263284        )
264285    except  Exception  as  e :
265286        logging .error (f"Failed to serve endpoints: { e }  )
@@ -278,6 +299,7 @@ async def init_multimodal_encode_worker(runtime: DistributedRuntime, config: Con
278299    await  component .create_service ()
279300
280301    generate_endpoint  =  component .endpoint (dynamo_args .endpoint )
302+     dump_config_endpoint  =  component .endpoint ("dump_config" )
281303
282304    # For encode worker, we need to connect to the downstream LLM worker 
283305    pd_worker_client  =  (
@@ -297,7 +319,11 @@ async def init_multimodal_encode_worker(runtime: DistributedRuntime, config: Con
297319            handler .generate ,
298320            graceful_shutdown = True ,
299321            metrics_labels = [("model" , server_args .served_model_name )],
300-         )
322+         ),
323+         dump_config_endpoint .serve_endpoint (
324+             functools .partial (get_config_endpoint , config ),
325+             metrics_labels = [("model" , server_args .served_model_name )],
326+         ),
301327    ]
302328
303329    try :
@@ -319,6 +345,7 @@ async def init_multimodal_worker(runtime: DistributedRuntime, config: Config):
319345    await  component .create_service ()
320346
321347    generate_endpoint  =  component .endpoint (dynamo_args .endpoint )
348+     dump_config_endpoint  =  component .endpoint ("config" )
322349
323350    engine  =  sgl .Engine (server_args = server_args )
324351
@@ -337,10 +364,16 @@ async def init_multimodal_worker(runtime: DistributedRuntime, config: Config):
337364    await  handler .async_init ()
338365
339366    try :
340-         await  generate_endpoint .serve_endpoint (
341-             handler .generate ,
342-             metrics_labels = [("model" , server_args .served_model_name )],
343-             graceful_shutdown = True ,
367+         await  asyncio .gather (
368+             generate_endpoint .serve_endpoint (
369+                 handler .generate ,
370+                 metrics_labels = [("model" , server_args .served_model_name )],
371+                 graceful_shutdown = True ,
372+             ),
373+             dump_config_endpoint .serve_endpoint (
374+                 functools .partial (get_config_endpoint , config ),
375+                 metrics_labels = [("model" , server_args .served_model_name )],
376+             ),
344377        )
345378    except  Exception  as  e :
346379        logging .error (f"Failed to serve endpoints: { e }  )
@@ -361,6 +394,7 @@ async def init_multimodal_prefill_worker(runtime: DistributedRuntime, config: Co
361394    await  component .create_service ()
362395
363396    generate_endpoint  =  component .endpoint (dynamo_args .endpoint )
397+     dump_config_endpoint  =  component .endpoint ("dump_config" )
364398
365399    handler  =  MultimodalPrefillWorkerHandler (component , engine , config )
366400    await  handler .async_init ()
@@ -374,7 +408,11 @@ async def init_multimodal_prefill_worker(runtime: DistributedRuntime, config: Co
374408                graceful_shutdown = True ,
375409                metrics_labels = [("model" , server_args .served_model_name )],
376410                health_check_payload = health_check_payload ,
377-             )
411+             ),
412+             dump_config_endpoint .serve_endpoint (
413+                 functools .partial (get_config_endpoint , config ),
414+                 metrics_labels = [("model" , server_args .served_model_name )],
415+             ),
378416        )
379417    except  Exception  as  e :
380418        logging .error (f"Failed to serve endpoints: { e }  )
0 commit comments