Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 6 additions & 1 deletion nanovllm/engine/llm_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,18 @@
from tqdm.auto import tqdm
from transformers import AutoTokenizer
import torch.multiprocessing as mp
from typing import TypedDict

from nanovllm.config import Config
from nanovllm.sampling_params import SamplingParams
from nanovllm.engine.sequence import Sequence
from nanovllm.engine.scheduler import Scheduler
from nanovllm.engine.model_runner import ModelRunner

class GenerateOutput(TypedDict):
text: str
token_ids: list[int]


class LLMEngine:

Expand Down Expand Up @@ -62,7 +67,7 @@ def generate(
prompts: list[str] | list[list[int]],
sampling_params: SamplingParams | list[SamplingParams],
use_tqdm: bool = True,
) -> list[str]:
) -> list[GenerateOutput]:
pbar = tqdm(total=len(prompts), desc="Generating", dynamic_ncols=True, disable=not use_tqdm)
if not isinstance(sampling_params, list):
sampling_params = [sampling_params] * len(prompts)
Expand Down