Skip to content

Commit 33f96ff

Browse files
LiqinruiGliqinrui
andauthored
[BugFix] rollback max_tokens and min_tokens when continue to infer (#5052)
Co-authored-by: liqinrui <[email protected]>
1 parent ff26158 commit 33f96ff

File tree

3 files changed

+2
-152
lines changed

3 files changed

+2
-152
lines changed

fastdeploy/entrypoints/engine_client.py

Lines changed: 1 addition & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -210,14 +210,7 @@ async def add_requests(self, task):
210210
task["prompt_token_ids_len"] = len(task["prompt_token_ids"])
211211
input_ids_len = task["prompt_token_ids_len"]
212212

213-
completion_token_len = len(task["completion_token_ids"]) if task.get("completion_token_ids") else 0
214-
task["max_tokens"] = min(
215-
self.max_model_len - input_ids_len, max(0, task.get("max_tokens") - completion_token_len)
216-
)
217-
218-
if task.get("min_tokens") is not None:
219-
task["min_tokens"] = max(1, task["min_tokens"] - completion_token_len)
220-
213+
task["max_tokens"] = min(self.max_model_len - input_ids_len, task.get("max_tokens"))
221214
min_tokens = task.get("min_tokens", 1)
222215
if "messages" in task:
223216
del task["messages"]

fastdeploy/input/ernie4_5_vl_processor/ernie4_5_vl_processor.py

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -252,9 +252,7 @@ def process_request_dict(self, request, max_model_len=None):
252252
else:
253253
raise ValueError(f"Request must contain 'prompt', or 'messages': {request}")
254254

255-
completion_token_len = 0
256255
if request.get("completion_token_ids"):
257-
completion_token_len = len(request.get("completion_token_ids"))
258256
self.append_completion_tokens(outputs, request["completion_token_ids"])
259257

260258
outputs = self.pack_outputs(outputs)
@@ -271,9 +269,7 @@ def process_request_dict(self, request, max_model_len=None):
271269
request["max_tokens"] = max(1, max_model_len - len(request["prompt_token_ids"]))
272270
tmp_max_tokens = request["max_tokens"]
273271
else:
274-
tmp_max_tokens = min(
275-
max_model_len - len(request["prompt_token_ids"]), max(0, request["max_tokens"] - completion_token_len)
276-
)
272+
request["max_tokens"] = min(max_model_len - len(request["prompt_token_ids"]), request["max_tokens"])
277273
if request.get("reasoning_max_tokens") is None:
278274
request["reasoning_max_tokens"] = max(int(tmp_max_tokens * 0.8), 1)
279275
data_processor_logger.info(f"Processed request {request}")

tests/entrypoints/openai/test_max_and_min_tokens.py

Lines changed: 0 additions & 139 deletions
This file was deleted.

0 commit comments

Comments
 (0)