Skip to content

Commit

Permalink
upgrade infinity and vllm
Browse files Browse the repository at this point in the history
  • Loading branch information
hiyouga committed Apr 6, 2024
1 parent 775a8a7 commit 4caabfc
Show file tree
Hide file tree
Showing 7 changed files with 21 additions and 15 deletions.
4 changes: 2 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,9 @@
check_dirs := src tests

quality:
ruff $(check_dirs)
ruff check $(check_dirs)
ruff format --check $(check_dirs)

style:
ruff $(check_dirs) --fix
ruff check $(check_dirs) --fix
ruff format $(check_dirs)
3 changes: 2 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,8 @@ A unified language model server built upon [vllm](https://github.com/vllm-projec
### Install

```bash
pip install -U imitater
pip install packaging
pip install -e .
```

### Launch Server
Expand Down
4 changes: 2 additions & 2 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
infinity-emb[torch]==0.0.17
infinity-emb[torch,optimum]>=0.0.31
openai>=1.5.0
sse-starlette
vllm>=0.3.3
vllm @ git+https://github.com/vllm-project/vllm.git@54951ac4bfb7f4224cb8f5ffc89b214c950107d8
2 changes: 1 addition & 1 deletion src/imitater/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "0.2.3"
__version__ = "0.2.4.dev0"
8 changes: 4 additions & 4 deletions src/imitater/model/embed_model.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from dataclasses import dataclass, fields
from typing import TYPE_CHECKING, List, Tuple

from infinity_emb import AsyncEmbeddingEngine
from infinity_emb import AsyncEmbeddingEngine, EngineArgs
from typing_extensions import Self

from ..utils.modelscope import try_download_model_from_ms
Expand Down Expand Up @@ -62,13 +62,13 @@ def __init__(self, config: "EmbedConfig") -> None:

def _init_infinity_engine(self) -> None:
if len(self.config.device) != 1:
raise ValueError("Embedding model only accepts one device.")
raise ValueError("Embedding model does not support multi-GPUs yet.")

self._engine = AsyncEmbeddingEngine(
engine_args = EngineArgs(
model_name_or_path=self.config.path,
batch_size=self.config.batch_size,
device="cuda",
)
self._engine = AsyncEmbeddingEngine.from_args(engine_args)

async def startup(self) -> None:
r"""
Expand Down
2 changes: 1 addition & 1 deletion src/imitater/service/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -206,7 +206,7 @@ async def create_embeddings(request: "EmbeddingsRequest"):
return await _create_openai_embeddings(request, embed_models[request.model])

for process in processes:
thread = Thread(target=print_subprocess_stdout, args=[process])
thread = Thread(target=print_subprocess_stdout, args=[process], daemon=True)
thread.start()

uvicorn.run(app, host="0.0.0.0", port=port)
Expand Down
13 changes: 9 additions & 4 deletions src/imitater/service/common.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,13 @@
from subprocess import Popen
from typing import Optional
from typing import TYPE_CHECKING, Optional

from ..utils.generic import jsonify
from .protocol import ChatCompletionMessage, ChatCompletionStreamResponse, ChatCompletionStreamResponseChoice, Finish
from .protocol import ChatCompletionStreamResponse, ChatCompletionStreamResponseChoice


if TYPE_CHECKING:
from subprocess import Popen

from .protocol import ChatCompletionMessage, Finish


def print_subprocess_stdout(process: "Popen") -> None:
Expand All @@ -20,7 +25,7 @@ def create_stream_chunk(
model: str,
delta: "ChatCompletionMessage",
index: Optional[int] = 0,
finish_reason: Optional[Finish] = None,
finish_reason: Optional["Finish"] = None,
) -> str:
choice = ChatCompletionStreamResponseChoice(index=index, delta=delta, finish_reason=finish_reason)
chunk = ChatCompletionStreamResponse(id=request_id, model=model, choices=[choice])
Expand Down

0 comments on commit 4caabfc

Please sign in to comment.