Release 0.0.15

kerthcet · kerthcet · commit c8c850a6e7b9 · 2023-12-27T17:39:59.000+08:00
Signed-off-by: kerthcet &lt;kerthcet@gmail.com&gt;
diff --git a/README.md b/README.md
@@ -1,6 +1,16 @@
 # llmlite
 
-A library helps to communicate with all kinds of LLMs consistently.
+**llmlite** is a library helps to communicate with all kinds of LLMs consistently.
+
+## Features
+
+- State-of-the-art LLMs support
+- Continuous Batching via [vLLM](https://github.com/vllm-project/vllm)
+- Quantization([issue#37](https://github.com/InftyAI/llmlite/issues/37))
+- Adapter support([issue#51](https://github.com/InftyAI/llmlite/issues/51))
+- Streaming support([issue#52](https://github.com/InftyAI/llmlite/issues/52))
+
+### Model Support
 
 | Model | State | System Prompt | Note |
 | ---- | ---- | ---- | ---- |
@@ -14,28 +24,26 @@ A library helps to communicate with all kinds of LLMs consistently.
 | Falcon | RoadMap 📋 | | [issue#8](https://github.com/InftyAI/ChatLLM/issues/8)
 | StableLM | RoadMap 📋 | | [issue#11](https://github.com/InftyAI/ChatLLM/issues/11) |
 | Baichuan2 | RoadMap 📋 | | [issue#34](https://github.com/InftyAI/llmlite/issues/34)
-| ... | ... | ... | ... |
 
-We're also planning to support different inference backends as below:
+### Backend Support
 
 | backend | State | Note |
 | ---- | ---- | ---- |
 | [huggingface](https://github.com/huggingface) | Done ✅ | Support by huggingface pipeline |
 | [vLLM](https://github.com/vllm-project/vllm) | Done ✅ | |
-| ... | ... | ... |
 
 ## How to install
 
 ```cmd
-pip install llmlite==0.0.9
+pip install llmlite==0.0.15
 ```
 
 ## How to use
 
 ### Chat
 
 ```python
-from llmlite.apis import ChatLLM, ChatMessage
+from llmlite import ChatLLM, ChatMessage
 
 chat = ChatLLM(
     model_name_or_path="meta-llama/Llama-2-7b-chat-hf", # required
@@ -53,6 +61,35 @@ result = chat.completion(
 
 ```
 
+### Continuous Batching
+
+_This is mostly supported by vLLM, you can enable this by configuring the **backend**._
+
+```python
+from llmlite import ChatLLM, ChatMessage
+
+chat = ChatLLM(
+    model_name_or_path="meta-llama/Llama-2-7b-chat-hf",
+    backend="vllm",
+)
+
+results = chat.completion(
+    messages=[
+        [
+            ChatMessage(role="system", content="You're a honest assistant."),
+            ChatMessage( role="user", content="There's a llama in my garden, what should I do?"),
+        ],
+        [
+            ChatMessage(role="user", content="What's the population of the world?"),
+        ],
+    ],
+    max_tokens=2048,
+)
+
+for result in results:
+    print(f"RESULT: \n{result}\n\n")
+```
+
 `llmlite` also supports other parameters like `temperature`, `max_length`, `do_sample`, `top_k`, `top_p` to help control the length, randomness and diversity of the generated text.
 
 See **[examples](./examples/)** for reference.
@@ -62,14 +99,14 @@ See **[examples](./examples/)** for reference.
 You can use `llmlite` to help you generate full prompts, for instance:
 
 ```python
-from llmlite.apis import ChatMessage, LlamaChat
+from llmlite import ChatLLM
 
 messages = [
     ChatMessage(role="system", content="You're a honest assistant."),
     ChatMessage(role="user", content="There's a llama in my garden, what should I do?"),
 ]
 
-LlamaChat.prompt(messages)
+ChatLLM.prompt("meta-llama/Llama-2-7b-chat-hf", messages)
 
 # Output:
 # <s>[INST] <<SYS>>
@@ -83,12 +120,6 @@ LlamaChat.prompt(messages)
 
 Set the env variable `LOG_LEVEL` for log configuration, default to `INFO`, others like DEBUG, INFO, WARNING etc..
 
-## Roadmap
-
-- Adapter support
-- Quantization
-- Streaming
-
 ## Contributions
 
 🚀 All kinds of contributions are welcomed ! Please follow [Contributing](/CONTRIBUTING.md).
diff --git a/examples/chatglm2.py b/examples/chatglm2.py
@@ -1,4 +1,4 @@
-from llmlite.apis import ChatLLM, ChatMessage
+from llmlite import ChatLLM, ChatMessage
 
 chat = ChatLLM(
     model_name_or_path="THUDM/chatglm2-6b",
diff --git a/examples/chatgpt.py b/examples/chatgpt.py
@@ -1,4 +1,4 @@
-from llmlite.apis import ChatLLM, ChatMessage
+from llmlite import ChatLLM, ChatMessage
 
 # You should set the OPENAI_API_KEY first.
 
diff --git a/examples/codellama.py b/examples/codellama.py
@@ -1,4 +1,4 @@
-from llmlite.apis import ChatLLM, ChatMessage
+from llmlite import ChatLLM, ChatMessage
 
 chat = ChatLLM(
     model_name_or_path="codellama/CodeLlama-13b-instruct-hf",
diff --git a/examples/llama2.py b/examples/llama2.py
@@ -1,4 +1,4 @@
-from llmlite.apis import ChatLLM, ChatMessage
+from llmlite import ChatLLM, ChatMessage
 
 chat = ChatLLM(
     model_name_or_path="meta-llama/Llama-2-7b-chat-hf",
diff --git a/examples/vllm.py b/examples/vllm.py
@@ -1,12 +1,11 @@
-from llmlite.apis import ChatLLM, ChatMessage
+from llmlite import ChatLLM, ChatMessage
 
 chat = ChatLLM(
     model_name_or_path="meta-llama/Llama-2-7b-chat-hf",
-    task="text-generation",
     backend="vllm",
 )
 
-result = chat.completion(
+results = chat.completion(
     messages=[
         [
             ChatMessage(role="system", content="You're a honest assistant."),
@@ -15,8 +14,7 @@
             ),
         ],
         [
-            ChatMessage(role="system", content="You're a honest assistant."),
-            ChatMessage(role="user", content="How many people are their in China?"),
+            ChatMessage(role="user", content="How many people are there in China?"),
         ],
     ],
     max_tokens=2048,
@@ -25,4 +23,5 @@
     # top_k=3,
 )
 
-print(result)
+for result in results:
+    print(f"RESULT: \n{result}\n\n")
diff --git a/llmlite/__init__.py b/llmlite/__init__.py
@@ -0,0 +1,9 @@
+from llmlite.apis.chatllm import ChatLLM
+from llmlite.llms.messages import ChatMessage
+
+__version__ = "0.0.15"
+
+__all__ = [
+    "ChatLLM",
+    "ChatMessage",
+]
diff --git a/llmlite/apis/__init__.py b/llmlite/apis/__init__.py
@@ -1,15 +0,0 @@
-from llmlite.apis.chatllm import ChatLLM
-from llmlite.llms.messages import ChatMessage
-from llmlite.llms.chatglm import ChatGLM
-from llmlite.llms.llama import Llama
-from llmlite.llms.chatgpt import ChatGPT
-
-__version__ = "0.0.7"
-
-__all__ = [
-    "ChatLLM",
-    "ChatMessage",
-    "Llama",
-    "ChatGLM",
-    "ChatGPT",
-]
diff --git a/llmlite/backends/vllm_backend.py b/llmlite/backends/vllm_backend.py
@@ -13,6 +13,8 @@ def __init__(
         **kwargs,
     ):
         trust_remote_code = kwargs.pop("trust_remote_code", True)
+        # 'task' is an unexpected keyword argument to vLLM.
+        _ = kwargs.pop("task", None)
 
         self._model = vllm(
             model=model_name_or_path,
diff --git a/pyproject.toml b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "llmlite"
-version = "0.0.9"
+version = "0.0.15"
 description = "A library helps to chat with all kinds of LLMs consistently."
 authors = ["InftyAI"]
 license = "MIT License"

Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,4 @@`
`1`		`-from llmlite.apis import ChatLLM, ChatMessage`
	`1`	`+from llmlite import ChatLLM, ChatMessage`
`2`	`2`
`3`	`3`	`chat = ChatLLM(`
`4`	`4`	`model_name_or_path="THUDM/chatglm2-6b",`