Skip to content

Commit 5a3c52f

Browse files
committed
fix access None req_metrics when sending abort_request
1 parent 0051af7 commit 5a3c52f

File tree

4 files changed

+11
-16
lines changed

4 files changed

+11
-16
lines changed

lmdeploy/metrics/metrics_processor.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -122,7 +122,9 @@ async def _run_metrics_handler(self):
122122
outputs, req_state, iteration_stats = update_data
123123

124124
# update request state according the engine events
125-
req_state.update_from_events(outputs.req_metrics.engine_events)
125+
if outputs and outputs.req_metrics:
126+
# when users visit "/abort_request" endpoint, `req_metrics` might be None
127+
req_state.update_from_events(outputs.req_metrics.engine_events)
126128

127129
# update iteration stats based on outputs and request state.
128130
# some attributes of req_state will also be updated, e.g., lastest_token_time

lmdeploy/metrics/stats.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -198,6 +198,9 @@ def update_from_output(self, outputs: EngineOutput, req_state: RequestState):
198198
outputs (EngineOutput): The output from the engine containing information about the current iteration.
199199
req_state (RequestState): The state of the request, including timestamps and token counts.
200200
"""
201+
if outputs.req_metrics is None:
202+
# when users visit "/abort_request" endpoint, `req_metrics` might be None
203+
return
201204
new_generation_tokens = len(outputs.token_ids)
202205
if new_generation_tokens == 0:
203206
return

lmdeploy/serve/openai/api_server.py

Lines changed: 2 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -955,18 +955,8 @@ async def generate(request: GenerateReqInput, raw_request: Request = None):
955955
do_preprocess=False,
956956
)
957957

958-
def create_finish_reason(finish_reason):
959-
# TODO: add detail info
960-
if not finish_reason:
961-
return None
962-
if finish_reason == 'length':
963-
return dict(type='length')
964-
if finish_reason == 'stop':
965-
return dict(type='stop')
966-
return dict(type='abort')
967-
968958
def create_generate_response_json(res, text, output_ids, logprobs, finish_reason):
969-
meta = GenerateReqMetaOutput(finish_reason=create_finish_reason(finish_reason),
959+
meta = GenerateReqMetaOutput(finish_reason=dict(type=finish_reason) if finish_reason else None,
970960
output_token_logprobs=logprobs or None,
971961
prompt_tokens=res.input_token_len,
972962
completion_tokens=res.generate_token_len)
@@ -1005,7 +995,7 @@ async def _inner_call():
1005995
for tok, tok_logprobs in zip(res.token_ids, res.logprobs):
1006996
logprobs.append((tok_logprobs[tok], tok))
1007997
nonlocal response
1008-
meta = GenerateReqMetaOutput(finish_reason=create_finish_reason(res.finish_reason),
998+
meta = GenerateReqMetaOutput(finish_reason=dict(type=res.finish_reason) if res.finish_reason else None,
1009999
output_token_logprobs=logprobs or None,
10101000
prompt_tokens=res.input_token_len,
10111001
completion_tokens=res.generate_token_len)

lmdeploy/serve/openai/protocol.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -256,7 +256,7 @@ class ChatCompletionResponseStreamChoice(BaseModel):
256256
index: int
257257
delta: DeltaMessage
258258
logprobs: Optional[ChoiceLogprobs] = None
259-
finish_reason: Optional[Literal['stop', 'length', 'tool_calls', 'error']] = None
259+
finish_reason: Optional[Literal['stop', 'length', 'tool_calls', 'error', 'abort']] = None
260260

261261

262262
class ChatCompletionStreamResponse(BaseModel):
@@ -314,7 +314,7 @@ class CompletionResponseChoice(BaseModel):
314314
text: str
315315
logprobs: Optional[LogProbs] = None
316316
gen_tokens: Optional[List[int]] = None
317-
finish_reason: Optional[Literal['stop', 'length', 'tool_calls', 'error']] = None
317+
finish_reason: Optional[Literal['stop', 'length', 'tool_calls', 'error', 'abort']] = None
318318

319319

320320
class CompletionResponse(BaseModel):
@@ -430,7 +430,7 @@ class GenerateResponse(BaseModel):
430430
tokens: int
431431
input_tokens: int
432432
history_tokens: int
433-
finish_reason: Optional[Literal['stop', 'length', 'tool_calls', 'error']] = None
433+
finish_reason: Optional[Literal['stop', 'length', 'tool_calls', 'error', 'abort']] = None
434434

435435

436436
class UpdateParamsRequest(BaseModel):

0 commit comments

Comments
 (0)