@@ -65,6 +65,7 @@ class VariableInterface:
6565 # following is for tool parsers
6666 tool_parser : Optional [ToolParser ] = None
6767 allow_terminate_by_client : bool = False
68+ enable_abort_handling : bool = False
6869
6970
7071router = APIRouter ()
@@ -1152,6 +1153,11 @@ async def free_cache(cache_free_request: DistServeCacheFreeRequest) -> JSONRespo
11521153@router .post ('/abort_request' )
11531154async def abort_request (request : AbortRequest , raw_request : Request = None ):
11541155 """Abort an ongoing request."""
1156+ if not VariableInterface .enable_abort_handling :
1157+ return Response (
1158+ status_code = 501 ,
1159+ content = 'This server does not support abort requests. Enable with --enable-abort-handling flag.' )
1160+
11551161 if request .abort_all :
11561162 await VariableInterface .async_engine .stop_all_session ()
11571163 else :
@@ -1323,6 +1329,7 @@ def serve(model_path: str,
13231329 reasoning_parser : Optional [str ] = None ,
13241330 tool_call_parser : Optional [str ] = None ,
13251331 allow_terminate_by_client : bool = False ,
1332+ enable_abort_handling : bool = False ,
13261333 ** kwargs ):
13271334 """An example to perform model inference through the command line
13281335 interface.
@@ -1381,6 +1388,7 @@ def serve(model_path: str,
13811388 logger .setLevel (log_level )
13821389
13831390 VariableInterface .allow_terminate_by_client = allow_terminate_by_client
1391+ VariableInterface .enable_abort_handling = enable_abort_handling
13841392 if api_keys is not None :
13851393 if isinstance (api_keys , str ):
13861394 api_keys = api_keys .split (',' )
0 commit comments