Skip to content

Commit f9ee2c4

Browse files
authored
Upgrading all versions. (#1759)
1 parent 06c3d4b commit f9ee2c4

File tree

11 files changed

+781
-754
lines changed

11 files changed

+781
-754
lines changed

Cargo.lock

Lines changed: 103 additions & 137 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,10 @@ edition = "2021"
1414
authors = ["Olivier Dehaene"]
1515
homepage = "https://github.com/huggingface/text-generation-inference"
1616

17+
[workspace.dependencies]
18+
tokenizers = { version = "0.19.1", features = ["http"] }
19+
hf-hub = { version = "0.3.1", features = ["tokio"] }
20+
1721
[profile.release]
1822
debug = 1
1923
incremental = true

benchmark/Cargo.toml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,9 +23,9 @@ serde_json = "1.0"
2323
tabled = "0.14.0"
2424
text-generation-client = { path = "../router/client" }
2525
thiserror = "1.0.48"
26-
tokenizers = { version = "0.14.0", features = ["http"] }
26+
tokenizers = { workspace = true }
2727
tokio = { version = "1.32.0", features = ["rt", "rt-multi-thread", "parking_lot", "signal", "sync", "macros"] }
2828
tui = {package = "ratatui", version = "0.23", default-features = false, features = ["crossterm"]}
2929
tracing = "0.1.37"
3030
tracing-subscriber = { version = "0.3.17", features = ["json", "env-filter"] }
31-
hf-hub = "0.3.1"
31+
hf-hub = { workspace = true }

docs/openapi.json

Lines changed: 29 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -408,9 +408,14 @@
408408
},
409409
"responses": {
410410
"200": {
411-
"description": "Generated Text",
411+
"description": "Generated Chat Completion",
412412
"content": {
413413
"application/json": {
414+
"schema": {
415+
"$ref": "#/components/schemas/ChatCompletion"
416+
}
417+
},
418+
"text/event-stream": {
414419
"schema": {
415420
"$ref": "#/components/schemas/ChatCompletionChunk"
416421
}
@@ -492,11 +497,16 @@
492497
},
493498
"responses": {
494499
"200": {
495-
"description": "Generated Text",
500+
"description": "Generated Chat Completion",
496501
"content": {
497502
"application/json": {
498503
"schema": {
499-
"$ref": "#/components/schemas/ChatCompletionChunk"
504+
"$ref": "#/components/schemas/Completion"
505+
}
506+
},
507+
"text/event-stream": {
508+
"schema": {
509+
"$ref": "#/components/schemas/CompletionCompleteChunk"
500510
}
501511
}
502512
}
@@ -930,7 +940,7 @@
930940
"tool_prompt": {
931941
"type": "string",
932942
"description": "A prompt to be appended before the tools",
933-
"example": "\"Based on the conversation, please choose the most appropriate tool to use: \"",
943+
"example": "\"You will be presented with a JSON schema representing a set of tools.\nIf the user request lacks of sufficient information to make a precise tool selection: Do not invent any tool's properties, instead notify with an error message.\n\nJSON Schema:\n\"",
934944
"nullable": true
935945
},
936946
"tools": {
@@ -1071,7 +1081,10 @@
10711081
"example": "mistralai/Mistral-7B-Instruct-v0.2"
10721082
},
10731083
"prompt": {
1074-
"type": "string",
1084+
"type": "array",
1085+
"items": {
1086+
"type": "string"
1087+
},
10751088
"description": "The prompt to generate completions for.",
10761089
"example": "What is Deep Learning?"
10771090
},
@@ -1234,17 +1247,17 @@
12341247
"type": "object",
12351248
"required": [
12361249
"name",
1237-
"parameters"
1250+
"arguments"
12381251
],
12391252
"properties": {
1253+
"arguments": {},
12401254
"description": {
12411255
"type": "string",
12421256
"nullable": true
12431257
},
12441258
"name": {
12451259
"type": "string"
1246-
},
1247-
"parameters": {}
1260+
}
12481261
}
12491262
},
12501263
"GenerateParameters": {
@@ -1260,7 +1273,7 @@
12601273
},
12611274
"decoder_input_details": {
12621275
"type": "boolean",
1263-
"default": "true"
1276+
"default": "false"
12641277
},
12651278
"details": {
12661279
"type": "boolean",
@@ -1285,6 +1298,7 @@
12851298
"$ref": "#/components/schemas/GrammarType"
12861299
}
12871300
],
1301+
"default": "null",
12881302
"nullable": true
12891303
},
12901304
"max_new_tokens": {
@@ -1478,6 +1492,7 @@
14781492
"max_batch_total_tokens",
14791493
"max_waiting_tokens",
14801494
"validation_workers",
1495+
"max_client_batch_size",
14811496
"version"
14821497
],
14831498
"properties": {
@@ -1503,6 +1518,11 @@
15031518
"example": "2",
15041519
"minimum": 0
15051520
},
1521+
"max_client_batch_size": {
1522+
"type": "integer",
1523+
"example": "32",
1524+
"minimum": 0
1525+
},
15061526
"max_concurrent_requests": {
15071527
"type": "integer",
15081528
"description": "Router Parameters",

router/Cargo.toml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ axum-tracing-opentelemetry = "0.14.1"
2121
text-generation-client = { path = "client" }
2222
clap = { version = "4.4.5", features = ["derive", "env"] }
2323
futures = "0.3.28"
24-
hf-hub = { version = "0.3.0", features = ["tokio"] }
24+
hf-hub = { workspace = true }
2525
jsonschema = { version = "0.17.1", features = ["draft202012"] }
2626
metrics = "0.21.1"
2727
metrics-exporter-prometheus = { version = "0.12.1", features = [] }
@@ -33,7 +33,7 @@ reqwest = { version = "0.11.20", features = [] }
3333
serde = "1.0.188"
3434
serde_json = "1.0.107"
3535
thiserror = "1.0.48"
36-
tokenizers = { version = "0.15.1", features = ["http"] }
36+
tokenizers = { workspace = true}
3737
tokio = { version = "1.32.0", features = ["rt", "rt-multi-thread", "parking_lot", "signal", "sync"] }
3838
tokio-stream = "0.1.14"
3939
tower-http = { version = "0.4.4", features = ["cors"] }

router/src/lib.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -238,7 +238,7 @@ pub(crate) struct GenerateParameters {
238238
#[schema(default = "true")]
239239
pub details: bool,
240240
#[serde(default)]
241-
#[schema(default = "true")]
241+
#[schema(default = "false")]
242242
pub decoder_input_details: bool,
243243
#[serde(default)]
244244
#[schema(
@@ -252,6 +252,7 @@ pub(crate) struct GenerateParameters {
252252
#[schema(exclusive_minimum = 0, nullable = true, default = "null", example = 5)]
253253
pub top_n_tokens: Option<u32>,
254254
#[serde(default)]
255+
#[schema(nullable = true, default = "null", example = "null")]
255256
pub grammar: Option<GrammarType>,
256257
}
257258

0 commit comments

Comments
 (0)