Skip to content

Commit 4139054

Browse files
v1.4.1 (#1568)
1 parent 0f2daad commit 4139054

File tree

13 files changed

+1302
-763
lines changed

13 files changed

+1302
-763
lines changed

Cargo.lock

Lines changed: 176 additions & 127 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ members = [
99
resolver = "2"
1010

1111
[workspace.package]
12-
version = "1.4.0"
12+
version = "1.4.1"
1313
edition = "2021"
1414
authors = ["Olivier Dehaene"]
1515
homepage = "https://github.com/huggingface/text-generation-inference"

Dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -225,7 +225,7 @@ COPY server/Makefile server/Makefile
225225
RUN cd server && \
226226
make gen-server && \
227227
pip install -r requirements_cuda.txt && \
228-
pip install ".[bnb, accelerate, quantize, peft]" --no-cache-dir
228+
pip install ".[bnb, accelerate, quantize, peft, outlines]" --no-cache-dir
229229

230230
# Install benchmarker
231231
COPY --from=builder /usr/src/target/release/text-generation-benchmark /usr/local/bin/text-generation-benchmark

Dockerfile_amd

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -150,7 +150,7 @@ COPY server/Makefile server/Makefile
150150
RUN cd server && \
151151
make gen-server && \
152152
pip install -r requirements_rocm.txt && \
153-
pip install ".[accelerate, peft]" --no-cache-dir
153+
pip install ".[accelerate, peft, outlines]" --no-cache-dir
154154

155155
# Install benchmarker
156156
COPY --from=builder /usr/src/target/release/text-generation-benchmark /usr/local/bin/text-generation-benchmark

docs/openapi.json

Lines changed: 35 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
"name": "Apache 2.0",
1111
"url": "https://www.apache.org/licenses/LICENSE-2.0"
1212
},
13-
"version": "1.4.0"
13+
"version": "1.4.1"
1414
},
1515
"paths": {
1616
"/": {
@@ -590,8 +590,11 @@
590590
"minimum": 0
591591
},
592592
"logprobs": {
593-
"type": "number",
594-
"format": "float",
593+
"allOf": [
594+
{
595+
"$ref": "#/components/schemas/ChatCompletionLogprobs"
596+
}
597+
],
595598
"nullable": true
596599
}
597600
}
@@ -710,7 +713,7 @@
710713
"presence_penalty": {
711714
"type": "number",
712715
"format": "float",
713-
"description": "UNUSED\nNumber between -2.0 and 2.0. Positive values penalize new tokens based on whether they appear in the text so far,\nincreasing the model's likelihood to talk about new topics",
716+
"description": "Number between -2.0 and 2.0. Positive values penalize new tokens based on whether they appear in the text so far,\nincreasing the model's likelihood to talk about new topics",
714717
"example": 0.1,
715718
"nullable": true
716719
},
@@ -734,7 +737,7 @@
734737
"top_logprobs": {
735738
"type": "integer",
736739
"format": "int32",
737-
"description": "UNUSED\nAn integer between 0 and 5 specifying the number of most likely tokens to return at each token position, each with\nan associated log probability. logprobs must be set to true if this parameter is used.",
740+
"description": "An integer between 0 and 5 specifying the number of most likely tokens to return at each token position, each with\nan associated log probability. logprobs must be set to true if this parameter is used.",
738741
"example": "5",
739742
"nullable": true,
740743
"minimum": 0
@@ -870,6 +873,22 @@
870873
"default": "false",
871874
"example": true
872875
},
876+
"frequency_penalty": {
877+
"type": "number",
878+
"format": "float",
879+
"default": "null",
880+
"example": 0.1,
881+
"nullable": true,
882+
"exclusiveMinimum": -2
883+
},
884+
"grammar": {
885+
"allOf": [
886+
{
887+
"$ref": "#/components/schemas/GrammarType"
888+
}
889+
],
890+
"nullable": true
891+
},
873892
"max_new_tokens": {
874893
"type": "integer",
875894
"format": "int32",
@@ -1026,6 +1045,12 @@
10261045
"example": "null",
10271046
"nullable": true
10281047
},
1048+
"max_batch_size": {
1049+
"type": "integer",
1050+
"example": "null",
1051+
"nullable": true,
1052+
"minimum": 0
1053+
},
10291054
"max_batch_total_tokens": {
10301055
"type": "integer",
10311056
"format": "int32",
@@ -1119,6 +1144,11 @@
11191144
"type": "string",
11201145
"example": "My name is David and I"
11211146
},
1147+
"name": {
1148+
"type": "string",
1149+
"example": "\"David\"",
1150+
"nullable": true
1151+
},
11221152
"role": {
11231153
"type": "string",
11241154
"example": "user"

integration-tests/pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[tool.poetry]
22
name = "text-generation-integration-tests"
3-
version = "1.4.0"
3+
version = "1.4.1"
44
description = "Text Generation Inference integration tests"
55
authors = ["Nicolas Patry <[email protected]>"]
66

server/Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ install-megablocks:
2323
install: gen-server
2424
pip install pip --upgrade
2525
pip install -r requirements_cuda.txt
26-
pip install -e ".[bnb, accelerate, quantize, peft]"
26+
pip install -e ".[bnb, accelerate, quantize, peft, outlines]"
2727

2828
run-dev:
2929
SAFETENSORS_FAST_GPU=1 python -m torch.distributed.run --nproc_per_node=2 text_generation_server/cli.py serve bigscience/bloom-560m --sharded

0 commit comments

Comments
 (0)