diff --git a/Cargo.lock b/Cargo.lock index 915de0d582e..547cff9b78f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -24,7 +24,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e89da841a80418a9b391ebaea17f5c112ffaaa96f621d2c285b5174da76b9011" dependencies = [ "cfg-if", - "getrandom", + "getrandom 0.2.15", "once_cell", "serde", "version_check", @@ -48,9 +48,9 @@ checksum = "4aa90d7ce82d4be67b64039a3d588d38dbcc6736577de4a847025ce5b0c468d1" [[package]] name = "allocator-api2" -version = "0.2.20" +version = "0.2.21" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "45862d1c77f2228b9e10bc609d5bc203d86ebc9b87ad8d5d5167a6c9abf739d9" +checksum = "683d7910e743518b0e34f1186f92494becacb047c7b6bf616c96772180fef923" [[package]] name = "android-tzdata" @@ -108,19 +108,20 @@ dependencies = [ [[package]] name = "anstyle-wincon" -version = "3.0.6" +version = "3.0.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2109dbce0e72be3ec00bed26e6a7479ca384ad226efdd66db8fa2e3a38c83125" +checksum = "ca3534e77181a9cc07539ad51f2141fe32f6c3ffd4df76db8ad92346b003ae4e" dependencies = [ "anstyle", + "once_cell", "windows-sys 0.59.0", ] [[package]] name = "anyhow" -version = "1.0.93" +version = "1.0.95" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4c95c10ba0b00a02636238b814946408b1322d5ac4760326e6fb8ec956d85775" +checksum = "34ac096ce696dc2fcabef30516bb13c0a68a11d30131d3df6f04711467681b04" [[package]] name = "arbitrary" @@ -142,7 +143,7 @@ checksum = "0ae92a5119aa49cdbcf6b9f893fe4e1d98b04ccbf82ee0584ad948a44a734dea" dependencies = [ "proc-macro2", "quote", - "syn 2.0.89", + "syn 2.0.96", ] [[package]] @@ -181,18 +182,18 @@ checksum = "c7c24de15d275a1ecfd47a380fb4d5ec9bfe0933f309ed5e705b775596a3574d" dependencies = [ "proc-macro2", "quote", - "syn 2.0.89", + "syn 2.0.96", ] [[package]] name = "async-trait" -version = "0.1.83" +version = "0.1.85" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "721cae7de5c34fbb2acd27e21e6d2cf7b886dce0c27388d46c4e6c47ea4318dd" +checksum = "3f934833b4b7233644e5848f235df3f57ed8c80f1528a26c3dfa13d2147fa056" dependencies = [ "proc-macro2", "quote", - "syn 2.0.89", + "syn 2.0.96", ] [[package]] @@ -266,28 +267,26 @@ dependencies = [ [[package]] name = "aws-lc-rs" -version = "1.11.0" +version = "1.12.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fe7c2840b66236045acd2607d5866e274380afd87ef99d6226e961e2cb47df45" +checksum = "4c2b7ddaa2c56a367ad27a094ad8ef4faacf8a617c2575acb2ba88949df999ca" dependencies = [ "aws-lc-sys", - "mirai-annotations", "paste", "zeroize", ] [[package]] name = "aws-lc-sys" -version = "0.23.0" +version = "0.25.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ad3a619a9de81e1d7de1f1186dcba4506ed661a0e483d84410fdef0ee87b2f96" +checksum = "71b2ddd3ada61a305e1d8bb6c005d1eaa7d14d903681edfc400406d523a9b491" dependencies = [ - "bindgen", + "bindgen 0.69.5", "cc", "cmake", "dunce", "fs_extra", - "libc", "paste", ] @@ -304,7 +303,7 @@ dependencies = [ "futures-util", "http 0.2.12", "http-body 0.4.6", - "hyper 0.14.31", + "hyper 0.14.32", "itoa", "matchit", "memchr", @@ -333,10 +332,10 @@ dependencies = [ "axum-core 0.4.5", "bytes", "futures-util", - "http 1.1.0", + "http 1.2.0", "http-body 1.0.1", "http-body-util", - "hyper 1.5.1", + "hyper 1.6.0", "hyper-util", "itoa", "matchit", @@ -351,7 +350,7 @@ dependencies = [ "serde_urlencoded", "sync_wrapper 1.0.2", "tokio", - "tower 0.5.1", + "tower 0.5.2", "tower-layer", "tower-service", "tracing", @@ -383,7 +382,7 @@ dependencies = [ "async-trait", "bytes", "futures-util", - "http 1.1.0", + "http 1.2.0", "http-body 1.0.1", "http-body-util", "mime", @@ -404,7 +403,7 @@ dependencies = [ "axum 0.7.9", "futures-core", "futures-util", - "http 1.1.0", + "http 1.2.0", "opentelemetry 0.21.0", "pin-project-lite", "tower 0.4.13", @@ -452,7 +451,7 @@ version = "0.69.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "271383c67ccabffb7381723dea0672a673f292304fcb45c01cc648c7a8d58088" dependencies = [ - "bitflags 2.6.0", + "bitflags 2.8.0", "cexpr", "clang-sys", "itertools 0.12.1", @@ -463,12 +462,32 @@ dependencies = [ "proc-macro2", "quote", "regex", - "rustc-hash", + "rustc-hash 1.1.0", "shlex", - "syn 2.0.89", + "syn 2.0.96", "which", ] +[[package]] +name = "bindgen" +version = "0.71.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5f58bf3d7db68cfbac37cfc485a8d711e87e064c3d0fe0435b92f7a407f9d6b3" +dependencies = [ + "bitflags 2.8.0", + "cexpr", + "clang-sys", + "itertools 0.13.0", + "log", + "prettyplease", + "proc-macro2", + "quote", + "regex", + "rustc-hash 2.1.0", + "shlex", + "syn 2.0.96", +] + [[package]] name = "bit-set" version = "0.8.0" @@ -498,9 +517,9 @@ checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" [[package]] name = "bitflags" -version = "2.6.0" +version = "2.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b048fb63fd8b5923fc5aa7b340d8e156aec7ec02f0c78fa8a6ddc2613f6f71de" +checksum = "8f68f53c83ab957f72c32642f3868eec03eb974d1fb82e453128456482613d36" [[package]] name = "bitstream-io" @@ -531,9 +550,9 @@ checksum = "c360505aed52b7ec96a3636c3f039d99103c37d1d9b4f7a8c743d3ea9ffcd03b" [[package]] name = "bumpalo" -version = "3.16.0" +version = "3.17.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "79296716171880943b8470b5f8d03aa55eb2e645a4874bdbb28adb49162e012c" +checksum = "1628fb46dfa0b37568d12e5edd512553eccf6a22a78e8bde00bb4aed84d5bdbf" [[package]] name = "bytecount" @@ -543,9 +562,9 @@ checksum = "5ce89b21cab1437276d2650d57e971f9d548a2d9037cc231abdc0562b97498ce" [[package]] name = "bytemuck" -version = "1.20.0" +version = "1.21.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8b37c88a63ffd85d15b406896cc343916d7cf57838a847b3a6f2ca5d39a5695a" +checksum = "ef657dfab802224e671f5818e9a4935f9b1957ed18e58292690cc39e7a4092a3" [[package]] name = "byteorder" @@ -561,9 +580,9 @@ checksum = "8f1fe948ff07f4bd06c30984e69f5b4899c516a3ef74f34df92a2df2ab535495" [[package]] name = "bytes" -version = "1.8.0" +version = "1.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9ac0150caa2ae65ca5bd83f25c7de183dea78d4d366469f148435e2acfbad0da" +checksum = "325918d6fe32f23b19878fe4b34794ae41fc19ddbe53b10571a4874d44ffd39b" [[package]] name = "camino" @@ -576,9 +595,9 @@ dependencies = [ [[package]] name = "cargo-platform" -version = "0.1.8" +version = "0.1.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "24b1f0365a6c6bb4020cd05806fd0d33c44d38046b8bd7f0e40814b9763cabfc" +checksum = "e35af189006b9c0f00a064685c727031e3ed2d8020f7ba284d78cc2671bd36ea" dependencies = [ "serde", ] @@ -594,7 +613,7 @@ dependencies = [ "semver", "serde", "serde_json", - "thiserror", + "thiserror 1.0.69", ] [[package]] @@ -620,9 +639,9 @@ dependencies = [ [[package]] name = "cc" -version = "1.2.1" +version = "1.2.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fd9de9f2205d5ef3fd67e685b0df337994ddd4495e2a28d185500d0e1edfea47" +checksum = "13208fcbb66eaeffe09b99fffbe1af420f00a7b35aa99ad683dfc1aa76145229" dependencies = [ "jobserver", "libc", @@ -704,9 +723,9 @@ dependencies = [ [[package]] name = "clap" -version = "4.5.21" +version = "4.5.27" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fb3b4b9e5a7c7514dfa52869339ee98b3156b0bfb4e8a77c4ff4babb64b1604f" +checksum = "769b0145982b4b48713e01ec42d61614425f27b7058bda7180a3a41f30104796" dependencies = [ "clap_builder", "clap_derive", @@ -714,9 +733,9 @@ dependencies = [ [[package]] name = "clap_builder" -version = "4.5.21" +version = "4.5.27" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b17a95aa67cc7b5ebd32aa5370189aa0d79069ef1c64ce893bd30fb24bff20ec" +checksum = "1b26884eb4b57140e4d2d93652abfa49498b938b3c9179f9fc487b0acc3edad7" dependencies = [ "anstream", "anstyle", @@ -726,27 +745,27 @@ dependencies = [ [[package]] name = "clap_derive" -version = "4.5.18" +version = "4.5.24" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4ac6a0c7b1a9e9a5186361f67dfa1b88213572f427fb9ab038efb2bd8c582dab" +checksum = "54b755194d6389280185988721fffba69495eed5ee9feeee9a599b53db80318c" dependencies = [ "heck 0.5.0", "proc-macro2", "quote", - "syn 2.0.89", + "syn 2.0.96", ] [[package]] name = "clap_lex" -version = "0.7.3" +version = "0.7.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "afb84c814227b90d6895e01398aee0d8033c00e7466aca416fb6a8e0eb19d8a7" +checksum = "f46ad14479a25103f283c0f10005961cf086d8dc42205bb44c46ac563475dca6" [[package]] name = "cmake" -version = "0.1.51" +version = "0.1.53" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fb1e43aa7fd152b1f968787f7dbcdeb306d1867ff373c69955211876c053f91a" +checksum = "e24a03c8b52922d68a1589ad61032f2c1aa5a8158d2aa0d93c6e9534944bbad6" dependencies = [ "cc", ] @@ -775,9 +794,9 @@ checksum = "5b63caa9aa9397e2d9480a9b13673856c78d8ac123288526c37d7839f2a86990" [[package]] name = "compact_str" -version = "0.8.0" +version = "0.8.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6050c3a16ddab2e412160b31f2c871015704239bca62f72f6e5f0be631d3f644" +checksum = "3b79c4069c6cad78e2e0cdfcbd26275770669fb39fd308a752dc110e83b9af32" dependencies = [ "castaway", "cfg-if", @@ -789,15 +808,15 @@ dependencies = [ [[package]] name = "console" -version = "0.15.8" +version = "0.15.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0e1f83fc076bd6dd27517eacdf25fef6c4dfe5f1d7448bafaaf3a26f13b5e4eb" +checksum = "ea3c6ecd8059b57859df5c69830340ed3c41d30e3da0c1cbed90a96ac853041b" dependencies = [ "encode_unicode", - "lazy_static", "libc", - "unicode-width 0.1.14", - "windows-sys 0.52.0", + "once_cell", + "unicode-width 0.2.0", + "windows-sys 0.59.0", ] [[package]] @@ -828,9 +847,9 @@ checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b" [[package]] name = "cpufeatures" -version = "0.2.16" +version = "0.2.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "16b80225097f2e5ae4e7179dd2266824648f3e2f49d9134d584b76389d31c4c3" +checksum = "59ed5838eebb26a2bb2e58f6d5b5316989ae9d08bab10e0e6d103e656d1b0280" dependencies = [ "libc", ] @@ -882,18 +901,18 @@ dependencies = [ [[package]] name = "crossbeam-channel" -version = "0.5.13" +version = "0.5.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "33480d6946193aa8033910124896ca395333cae7e2d1113d1fef6c3272217df2" +checksum = "06ba6d68e24814cb8de6bb986db8222d3a027d15872cabc0d18817bc3c0e4471" dependencies = [ "crossbeam-utils", ] [[package]] name = "crossbeam-deque" -version = "0.8.5" +version = "0.8.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "613f8cc01fe9cf1a3eb3d7f488fd2fa8388403e97039e2f73692932e291a770d" +checksum = "9dd111b7b7f7d55b72c0a6ae361660ee5853c9af73f70c3c2ef6858b950e2e51" dependencies = [ "crossbeam-epoch", "crossbeam-utils", @@ -910,9 +929,9 @@ dependencies = [ [[package]] name = "crossbeam-utils" -version = "0.8.20" +version = "0.8.21" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "22ec99545bb0ed0ea7bb9b8e1e9122ea386ff8a48c0922e43f36d45ab09e0e80" +checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28" [[package]] name = "crossterm" @@ -920,7 +939,7 @@ version = "0.28.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "829d955a0bb380ef178a640b91779e3987da38c9aea133b20614cfed8cdea9c6" dependencies = [ - "bitflags 2.6.0", + "bitflags 2.8.0", "crossterm_winapi", "mio", "parking_lot", @@ -941,9 +960,9 @@ dependencies = [ [[package]] name = "crunchy" -version = "0.2.2" +version = "0.2.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7a81dae078cea95a014a339291cec439d2f232ebe854a9d672b796c6afafa9b7" +checksum = "43da5946c66ffcc7745f48db692ffbb10a83bfe0afd96235c5c2a4fb23994929" [[package]] name = "crypto-common" @@ -988,46 +1007,61 @@ dependencies = [ [[package]] name = "cxx" -version = "1.0.130" +version = "1.0.137" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "23c042a0ba58aaff55299632834d1ea53ceff73d62373f62c9ae60890ad1b942" +checksum = "0fc894913dccfed0f84106062c284fa021c3ba70cb1d78797d6f5165d4492e45" dependencies = [ "cc", + "cxxbridge-cmd", "cxxbridge-flags", "cxxbridge-macro", + "foldhash", "link-cplusplus", ] [[package]] name = "cxx-build" -version = "1.0.130" +version = "1.0.137" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "45dc1c88d0fdac57518a9b1f6c4f4fb2aca8f3c30c0d03d7d8518b47ca0bcea6" +checksum = "503b2bfb6b3e8ce7f95d865a67419451832083d3186958290cee6c53e39dfcfe" dependencies = [ "cc", "codespan-reporting", "proc-macro2", "quote", "scratch", - "syn 2.0.89", + "syn 2.0.96", +] + +[[package]] +name = "cxxbridge-cmd" +version = "1.0.137" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e0d2cb64a95b4b5a381971482235c4db2e0208302a962acdbe314db03cbbe2fb" +dependencies = [ + "clap 4.5.27", + "codespan-reporting", + "proc-macro2", + "quote", + "syn 2.0.96", ] [[package]] name = "cxxbridge-flags" -version = "1.0.130" +version = "1.0.137" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "aa7ed7d30b289e2592cc55bc2ccd89803a63c913e008e6eb59f06cddf45bb52f" +checksum = "5f797b0206463c9c2a68ed605ab28892cca784f1ef066050f4942e3de26ad885" [[package]] name = "cxxbridge-macro" -version = "1.0.130" +version = "1.0.137" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0b8c465d22de46b851c04630a5fc749a26005b263632ed2e0d9cc81518ead78d" +checksum = "e79010a2093848e65a3e0f7062d3f02fb2ef27f866416dfe436fccfa73d3bb59" dependencies = [ "proc-macro2", "quote", "rustversion", - "syn 2.0.89", + "syn 2.0.96", ] [[package]] @@ -1051,7 +1085,7 @@ dependencies = [ "proc-macro2", "quote", "strsim", - "syn 2.0.89", + "syn 2.0.96", ] [[package]] @@ -1062,7 +1096,7 @@ checksum = "d336a2a514f6ccccaa3e09b02d41d35330c07ddf03a62165fcec10bb561c7806" dependencies = [ "darling_core", "quote", - "syn 2.0.89", + "syn 2.0.96", ] [[package]] @@ -1092,7 +1126,7 @@ dependencies = [ "darling", "proc-macro2", "quote", - "syn 2.0.89", + "syn 2.0.96", ] [[package]] @@ -1102,15 +1136,9 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ab63b0e2bf4d5928aff72e83a7dace85d7bba5fe12dcc3c5a572d78caffd3f3c" dependencies = [ "derive_builder_core", - "syn 2.0.89", + "syn 2.0.96", ] -[[package]] -name = "diff" -version = "0.1.13" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "56254986775e3233ffa9c4d7d3faaf6d36a2c09d30b20687e9f88bc8bafc16c8" - [[package]] name = "digest" version = "0.10.7" @@ -1150,7 +1178,7 @@ checksum = "97369cbbc041bc366949bc74d34658d6cda5621039731c6310521892a3a20ae0" dependencies = [ "proc-macro2", "quote", - "syn 2.0.89", + "syn 2.0.96", ] [[package]] @@ -1161,9 +1189,9 @@ checksum = "92773504d58c093f6de2459af4af33faa518c13451eb8f2b5698ed3d36e7c813" [[package]] name = "easy-cast" -version = "0.5.2" +version = "0.5.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "10936778145f3bea71fd9bf61332cce28c28e96a380714f7ab34838b80733fd6" +checksum = "72852736692ec862655eca398c9bb1b476161b563c9f80f45f4808b9629750d6" dependencies = [ "libm", ] @@ -1185,9 +1213,9 @@ dependencies = [ [[package]] name = "encode_unicode" -version = "0.3.6" +version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a357d28ed41a50f9c765dbfe56cbc04a64e53e5fc58ba79fbc34c10ef3df831f" +checksum = "34aa73646ffb006b8f5147f3dc182bd4bcb190227ce861fc4a4844bf8e3cb2c0" [[package]] name = "encoding_rs" @@ -1206,12 +1234,12 @@ checksum = "5443807d6dff69373d433ab9ef5378ad8df50ca6298caf15de6e52e24aaf54d5" [[package]] name = "errno" -version = "0.3.9" +version = "0.3.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "534c5cf6194dfab3db3242765c03bbe257cf92f22b38f6bc0c58d59108a820ba" +checksum = "33d852cb9b869c2a9b3df2f71a3074817f01e1844f839a144f5fcef059a4eb5d" dependencies = [ "libc", - "windows-sys 0.52.0", + "windows-sys 0.59.0", ] [[package]] @@ -1251,15 +1279,15 @@ dependencies = [ [[package]] name = "fastrand" -version = "2.2.0" +version = "2.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "486f806e73c5707928240ddc295403b1b93c96a02038563881c4a2fd84b81ac4" +checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be" [[package]] name = "fdeflate" -version = "0.3.6" +version = "0.3.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "07c6f4c64c1d33a3111c4466f7365ebdcc37c5bd1ea0d62aae2e3d722aacbedb" +checksum = "1e6853b52649d4ac5c0bd02320cddc5ba956bdb407c4b75a2c6b75bf51500f8c" dependencies = [ "simd-adler32", ] @@ -1311,9 +1339,9 @@ checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" [[package]] name = "foldhash" -version = "0.1.3" +version = "0.1.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f81ec6369c545a7d40e4589b5597581fa1c441fe1cce96dd1de43159910a36a2" +checksum = "a0d2fde1f7b3d48b8395d5f2de76c18a528bd6a9cdde438df747bfcba3e05d6f" [[package]] name = "foreign-types" @@ -1411,7 +1439,7 @@ checksum = "162ee34ebcb7c64a8abebc059ce0fee27c2262618d7b60ed8faf72fef13c3650" dependencies = [ "proc-macro2", "quote", - "syn 2.0.89", + "syn 2.0.96", ] [[package]] @@ -1471,7 +1499,19 @@ checksum = "c4567c8db10ae91089c99af84c68c38da3ec2f087c3f82960bcdbf3656b6f4d7" dependencies = [ "cfg-if", "libc", - "wasi", + "wasi 0.11.0+wasi-snapshot-preview1", +] + +[[package]] +name = "getrandom" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "43a49c392881ce6d5c3b8cb70f98717b7c07aabbdff06687b9030dbfbe2725f8" +dependencies = [ + "cfg-if", + "libc", + "wasi 0.13.3+wasi-0.2.2", + "windows-targets 0.52.6", ] [[package]] @@ -1492,9 +1532,9 @@ checksum = "07e28edb80900c19c28f1072f2e8aeca7fa06b23cd4169cefe1af5aa3260783f" [[package]] name = "glob" -version = "0.3.1" +version = "0.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d2fabcfbdc87f4758337ca535fb41a6d701b65693ce38287d856d1674551ec9b" +checksum = "a8d1add55171497b4705a648c6b583acafb01d58050a51727785f0b2c8e0a2b2" [[package]] name = "grpc-metadata" @@ -1518,7 +1558,7 @@ dependencies = [ "futures-sink", "futures-util", "http 0.2.12", - "indexmap 2.6.0", + "indexmap 2.7.1", "slab", "tokio", "tokio-util", @@ -1536,8 +1576,8 @@ dependencies = [ "fnv", "futures-core", "futures-sink", - "http 1.1.0", - "indexmap 2.6.0", + "http 1.2.0", + "indexmap 2.7.1", "slab", "tokio", "tokio-util", @@ -1577,9 +1617,9 @@ dependencies = [ [[package]] name = "hashbrown" -version = "0.15.1" +version = "0.15.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3a9bfc1af68b1726ea47d3d5109de126281def866b33970e10fbab11b5dafab3" +checksum = "bf151400ff0baff5465007dd2f3e717f3fe502074ca563069ce3a6629d07b289" dependencies = [ "allocator-api2", "equivalent", @@ -1629,18 +1669,18 @@ dependencies = [ "reqwest 0.11.27", "serde", "serde_json", - "thiserror", + "thiserror 1.0.69", "tokio", "ureq", ] [[package]] name = "home" -version = "0.5.9" +version = "0.5.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e3d1354bf6b7235cb4a0576c2619fd4ed18183f689b12b006a0ee7329eeff9a5" +checksum = "589533453244b0995c858700322199b2becb13b627df2851f64a2775d024abcf" dependencies = [ - "windows-sys 0.52.0", + "windows-sys 0.59.0", ] [[package]] @@ -1667,9 +1707,9 @@ dependencies = [ [[package]] name = "http" -version = "1.1.0" +version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "21b9ddb458710bc376481b842f5da65cdf31522de232c1ca8146abce2a358258" +checksum = "f16ca2af56261c99fba8bac40a10251ce8188205a4c448fbb745a2e4daa76fea" dependencies = [ "bytes", "fnv", @@ -1694,7 +1734,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1efedce1fb8e6913f23e0c92de8e62cd5b772a67e7b3946df930a62566c93184" dependencies = [ "bytes", - "http 1.1.0", + "http 1.2.0", ] [[package]] @@ -1705,16 +1745,16 @@ checksum = "793429d76616a256bcb62c2a2ec2bed781c8307e797e2598c50010f2bee2544f" dependencies = [ "bytes", "futures-util", - "http 1.1.0", + "http 1.2.0", "http-body 1.0.1", "pin-project-lite", ] [[package]] name = "httparse" -version = "1.9.5" +version = "1.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7d71d3574edd2771538b901e6549113b4006ece66150fb69c0fb6d9a2adae946" +checksum = "f2d708df4e7140240a16cd6ab0ab65c972d7433ab77819ea693fde9c43811e2a" [[package]] name = "httpdate" @@ -1724,9 +1764,9 @@ checksum = "df3b46402a9d5adb4c86a0cf463f42e19994e3ee891101b1841f30a545cb49a9" [[package]] name = "hyper" -version = "0.14.31" +version = "0.14.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8c08302e8fa335b151b788c775ff56e7a03ae64ff85c548ee820fecb70356e85" +checksum = "41dfc780fdec9373c01bae43289ea34c972e40ee3c9f6b3c8801a35f35586ce7" dependencies = [ "bytes", "futures-channel", @@ -1748,15 +1788,15 @@ dependencies = [ [[package]] name = "hyper" -version = "1.5.1" +version = "1.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "97818827ef4f364230e16705d4706e2897df2bb60617d6ca15d598025a3c481f" +checksum = "cc2b571658e38e0c01b1fdca3bbbe93c00d3d71693ff2770043f8c29bc7d6f80" dependencies = [ "bytes", "futures-channel", "futures-util", "h2 0.4.7", - "http 1.1.0", + "http 1.2.0", "http-body 1.0.1", "httparse", "httpdate", @@ -1769,16 +1809,16 @@ dependencies = [ [[package]] name = "hyper-rustls" -version = "0.27.3" +version = "0.27.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "08afdbb5c31130e3034af566421053ab03787c640246a446327f550d11bcb333" +checksum = "2d191583f3da1305256f22463b9bb0471acad48a4e534a5218b9963e9c1f59b2" dependencies = [ "futures-util", - "http 1.1.0", - "hyper 1.5.1", + "http 1.2.0", + "hyper 1.6.0", "hyper-util", "log", - "rustls 0.23.17", + "rustls 0.23.21", "rustls-native-certs", "rustls-pki-types", "tokio", @@ -1792,7 +1832,7 @@ version = "0.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bbb958482e8c7be4bc3cf272a766a2b0bf1a6755e7a6ae777f017a31d11b13b1" dependencies = [ - "hyper 0.14.31", + "hyper 0.14.32", "pin-project-lite", "tokio", "tokio-io-timeout", @@ -1805,7 +1845,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d6183ddfa99b85da61a140bea0efc93fdf56ceaa041b37d553518030827f9905" dependencies = [ "bytes", - "hyper 0.14.31", + "hyper 0.14.32", "native-tls", "tokio", "tokio-native-tls", @@ -1820,9 +1860,9 @@ dependencies = [ "bytes", "futures-channel", "futures-util", - "http 1.1.0", + "http 1.2.0", "http-body 1.0.1", - "hyper 1.5.1", + "hyper 1.6.0", "pin-project-lite", "socket2", "tokio", @@ -1968,7 +2008,7 @@ checksum = "1ec89e9337638ecdc08744df490b221a7399bf8d164eb52a665454e60e075ad6" dependencies = [ "proc-macro2", "quote", - "syn 2.0.89", + "syn 2.0.96", ] [[package]] @@ -2023,9 +2063,9 @@ dependencies = [ [[package]] name = "image-webp" -version = "0.2.0" +version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e031e8e3d94711a9ccb5d6ea357439ef3dcbed361798bd4071dc4d9793fbe22f" +checksum = "b77d01e822461baa8409e156015a1d91735549f0f2c17691bd2d996bef238f7f" dependencies = [ "byteorder-lite", "quick-error", @@ -2049,20 +2089,20 @@ dependencies = [ [[package]] name = "indexmap" -version = "2.6.0" +version = "2.7.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "707907fe3c25f5424cce2cb7e1cbcafee6bdbe735ca90ef77c29e84591e5b9da" +checksum = "8c9c992b02b5b4c94ea26e32fe5bccb7aa7d9f390ab5c1221ff895bc7ea8b652" dependencies = [ "equivalent", - "hashbrown 0.15.1", + "hashbrown 0.15.2", "serde", ] [[package]] name = "indicatif" -version = "0.17.9" +version = "0.17.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cbf675b85ed934d3c67b5c5469701eec7db22689d0a2139d856e0925fa28b281" +checksum = "183b3088984b400f4cfac3620d5e076c84da5364016b4f49473de574b2586235" dependencies = [ "console", "number_prefix", @@ -2085,23 +2125,22 @@ checksum = "94bd26b1b737bc11f183620072e188d1c6ede67e0e78682228d66b49ec510e17" dependencies = [ "opentelemetry 0.20.0", "opentelemetry-otlp", - "thiserror", + "thiserror 1.0.69", "tracing", "tracing-opentelemetry 0.21.0", ] [[package]] name = "instability" -version = "0.3.3" +version = "0.3.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b829f37dead9dc39df40c2d3376c179fdfd2ac771f53f55d3c30dc096a3c0c6e" +checksum = "0bf9fed6d91cfb734e7476a06bde8300a1b94e217e1b523b6f0cd1a01998c71d" dependencies = [ "darling", "indoc", - "pretty_assertions", "proc-macro2", "quote", - "syn 2.0.89", + "syn 2.0.96", ] [[package]] @@ -2112,14 +2151,14 @@ checksum = "c34819042dc3d3971c46c2190835914dfbe0c3c13f61449b2997f4e9722dfa60" dependencies = [ "proc-macro2", "quote", - "syn 2.0.89", + "syn 2.0.96", ] [[package]] name = "ipnet" -version = "2.10.1" +version = "2.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ddc24109865250148c2e0f3d25d4f0f479571723792d3802153c60922a4fb708" +checksum = "469fb0b9cefa57e3ef31275ee7cacb78f2fdca44e4765491884a2b119d4eb130" [[package]] name = "is_terminal_polyfill" @@ -2165,9 +2204,9 @@ dependencies = [ [[package]] name = "itoa" -version = "1.0.13" +version = "1.0.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "540654e97a3f4470a492cd30ff187bc95d89557a903a2bbf112e2fae98104ef2" +checksum = "d75a2a4b1b190afb6f5425f10f6a8f959d2ea0b9c2b1d79553551850539e4674" [[package]] name = "jobserver" @@ -2186,18 +2225,19 @@ checksum = "f5d4a7da358eff58addd2877a45865158f0d78c911d43a5784ceb7bbf52833b0" [[package]] name = "js-sys" -version = "0.3.72" +version = "0.3.77" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6a88f1bda2bd75b0452a14784937d796722fdebfe50df998aeb3f0b7603019a9" +checksum = "1cfaf33c695fc6e08064efbc1f72ec937429614f25eef83af942d0e227c3a28f" dependencies = [ + "once_cell", "wasm-bindgen", ] [[package]] name = "jsonschema" -version = "0.28.0" +version = "0.28.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "74d8eb539cdb4222da29bb658cc9881aa2477b33fb1a74c5c31450395fc1a4b2" +checksum = "4b8f66fe41fa46a5c83ed1c717b7e0b4635988f427083108c8cf0a882cc13441" dependencies = [ "ahash", "base64 0.22.1", @@ -2212,7 +2252,7 @@ dependencies = [ "percent-encoding", "referencing", "regex-syntax 0.8.5", - "reqwest 0.12.9", + "reqwest 0.12.12", "serde", "serde_json", "uuid-simd", @@ -2244,9 +2284,9 @@ checksum = "b5aba8db14291edd000dfcc4d620c7ebfb122c613afb886ca8803fa4e128a20a" [[package]] name = "libfuzzer-sys" -version = "0.4.8" +version = "0.4.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9b9569d2f74e257076d8c6bfa73fb505b46b851e51ddaecc825944aa3bed17fa" +checksum = "cf78f52d400cf2d84a3a973a78a592b4adc535739e0a5597a0da6f0c357adc75" dependencies = [ "arbitrary", "cc", @@ -2254,9 +2294,9 @@ dependencies = [ [[package]] name = "libloading" -version = "0.8.5" +version = "0.8.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4979f22fdb869068da03c9f7528f8297c6fd2606bc3a4affe42e6a823fdb8da4" +checksum = "fc2f4eb4bc735547cfed7c0a4922cbd04a4655978c09b54f1f7b228750664c34" dependencies = [ "cfg-if", "windows-targets 0.52.6", @@ -2274,7 +2314,7 @@ version = "0.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c0ff37bd590ca25063e35af745c343cb7a0271906fb7b37e4813e8f79f00268d" dependencies = [ - "bitflags 2.6.0", + "bitflags 2.8.0", "libc", ] @@ -2289,15 +2329,15 @@ dependencies = [ [[package]] name = "linux-raw-sys" -version = "0.4.14" +version = "0.4.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "78b3ae25bc7c8c38cec158d1f2757ee79e9b3740fbc7ccf0e59e4b08d793fa89" +checksum = "d26c52dbd32dccf2d10cac7725f8eae5296885fb5703b261f7d0a0739ec807ab" [[package]] name = "litemap" -version = "0.7.3" +version = "0.7.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "643cb0b8d4fcc284004d5fd0d67ccf61dfffadb7f75e1e71bc420f4688a3a704" +checksum = "4ee93343901ab17bd981295f2cf0026d4ad018c7c31ba84549a4ddbb47a45104" [[package]] name = "lock_api" @@ -2311,9 +2351,9 @@ dependencies = [ [[package]] name = "log" -version = "0.4.22" +version = "0.4.25" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a7a70ba024b9dc04c27ea2f0c0548feb474ec5c54bba33a7f72f873a39d07b24" +checksum = "04cbf5b083de1c7e0222a7a51dbfdba1cbe1c6ab0b15e29fff3f6c077fd9cd9f" [[package]] name = "loop9" @@ -2330,7 +2370,7 @@ version = "0.12.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "234cf4f4a04dc1f57e24b96cc0cd600cf2af460d4161ac5ecdd0af8e1f3b2a38" dependencies = [ - "hashbrown 0.15.1", + "hashbrown 0.15.2", ] [[package]] @@ -2413,15 +2453,15 @@ checksum = "b4f0c8427b39666bf970460908b213ec09b3b350f20c0c2eabcbba51704a08e6" dependencies = [ "base64 0.22.1", "http-body-util", - "hyper 1.5.1", + "hyper 1.6.0", "hyper-rustls", "hyper-util", - "indexmap 2.6.0", + "indexmap 2.7.1", "ipnet", "metrics", "metrics-util", "quanta", - "thiserror", + "thiserror 1.0.69", "tokio", "tracing", ] @@ -2459,9 +2499,9 @@ dependencies = [ [[package]] name = "minijinja" -version = "2.5.0" +version = "2.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2c37e1b517d1dcd0e51dc36c4567b9d5a29262b3ec8da6cb5d35e27a8fb529b5" +checksum = "cff7b8df5e85e30b87c2b0b3f58ba3a87b68e133738bf512a7713769326dbca9" dependencies = [ "serde", "serde_json", @@ -2469,9 +2509,9 @@ dependencies = [ [[package]] name = "minijinja-contrib" -version = "2.5.0" +version = "2.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7fe51f1a6a8285f03fcd1544d834234fe8db285f29e1c2253600c93b3ae19242" +checksum = "7ac3e47a9006ed0500425a092c9f8b2e56d10f8aeec8ce870c5e8a7c6ef2d7c3" dependencies = [ "minijinja", "serde", @@ -2485,9 +2525,9 @@ checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a" [[package]] name = "miniz_oxide" -version = "0.8.0" +version = "0.8.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e2d80299ef12ff69b16a84bb182e3b9df68b5a91574d3d4fa6e41b65deec4df1" +checksum = "b8402cab7aefae129c6977bb0ff1b8fd9a04eb5b51efc50a70bea51cda0c7924" dependencies = [ "adler2", "simd-adler32", @@ -2495,23 +2535,16 @@ dependencies = [ [[package]] name = "mio" -version = "1.0.2" +version = "1.0.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "80e04d1dcff3aae0704555fe5fee3bcfaf3d1fdf8a7e521d5b9d2b42acb52cec" +checksum = "2886843bf800fba2e3377cff24abf6379b4c4d5c6681eaf9ea5b0d15090450bd" dependencies = [ - "hermit-abi 0.3.9", "libc", "log", - "wasi", + "wasi 0.11.0+wasi-snapshot-preview1", "windows-sys 0.52.0", ] -[[package]] -name = "mirai-annotations" -version = "1.12.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c9be0862c1b3f26a88803c4a49de6889c10e608b3ee9344e6ef5b45fb37ad3d1" - [[package]] name = "monostate" version = "0.1.13" @@ -2530,7 +2563,7 @@ checksum = "a7ce64b975ed4f123575d11afd9491f2e37bbd5813fbfbc0f09ae1fbddea74e0" dependencies = [ "proc-macro2", "quote", - "syn 2.0.89", + "syn 2.0.96", ] [[package]] @@ -2552,7 +2585,7 @@ dependencies = [ "futures", "pin-project", "rand", - "thiserror", + "thiserror 1.0.69", "tokio", "tokio-util", "tracing", @@ -2560,9 +2593,9 @@ dependencies = [ [[package]] name = "native-tls" -version = "0.2.12" +version = "0.2.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a8614eb2c83d59d1c8cc974dd3f920198647674a0a035e1af1fa58707e317466" +checksum = "0dab59f8e050d5df8e4dd87d9206fb6f65a483e20ac9fda365ade4fab353196c" dependencies = [ "libc", "log", @@ -2596,7 +2629,7 @@ dependencies = [ "bytes", "futures", "hostname", - "hyper 0.14.31", + "hyper 0.14.32", "muxado", "once_cell", "parking_lot", @@ -2604,7 +2637,7 @@ dependencies = [ "rustls-pemfile", "serde", "serde_json", - "thiserror", + "thiserror 1.0.69", "tokio", "tokio-retry", "tokio-util", @@ -2618,7 +2651,7 @@ version = "0.28.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ab2156c4fce2f8df6c499cc1c763e4394b7482525bf2a9701c9d79d215f519e4" dependencies = [ - "bitflags 2.6.0", + "bitflags 2.8.0", "cfg-if", "cfg_aliases 0.1.1", "libc", @@ -2630,7 +2663,7 @@ version = "0.29.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "71e2746dc3a24dd78b3cfcb7be93368c6de9963d30f43a6a73998a9cf4b17b46" dependencies = [ - "bitflags 2.6.0", + "bitflags 2.8.0", "cfg-if", "cfg_aliases 0.2.1", "libc", @@ -2730,7 +2763,7 @@ checksum = "ed3955f1a9c7c0c15e092f9c887db08b1fc683305fdf6eb6684f22555355e202" dependencies = [ "proc-macro2", "quote", - "syn 2.0.89", + "syn 2.0.96", ] [[package]] @@ -2801,9 +2834,9 @@ checksum = "830b246a0e5f20af87141b25c173cd1b609bd7779a4617d6ec582abaf90870f3" [[package]] name = "object" -version = "0.36.5" +version = "0.36.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "aedf0a2d09c573ed1d8d85b30c119153926a2b36dce0ab28322c09a117a4683e" +checksum = "62948e14d923ea95ea2c7c86c71013138b66525b86bdc08d2dcc262bdb497b87" dependencies = [ "memchr", ] @@ -2844,11 +2877,11 @@ checksum = "b410bbe7e14ab526a0e86877eb47c6996a2bd7746f027ba551028c925390e4e9" [[package]] name = "openssl" -version = "0.10.68" +version = "0.10.69" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6174bc48f102d208783c2c84bf931bb75927a617866870de8a4ea85597f871f5" +checksum = "f5e534d133a060a3c19daec1eb3e98ec6f4685978834f2dbadfe2ec215bab64e" dependencies = [ - "bitflags 2.6.0", + "bitflags 2.8.0", "cfg-if", "foreign-types", "libc", @@ -2865,14 +2898,14 @@ checksum = "a948666b637a0f465e8564c73e89d4dde00d72d4d473cc972f390fc3dcee7d9c" dependencies = [ "proc-macro2", "quote", - "syn 2.0.89", + "syn 2.0.96", ] [[package]] name = "openssl-probe" -version = "0.1.5" +version = "0.1.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ff011a302c396a5197692431fc1948019154afc178baf7d8e37367442a4601cf" +checksum = "d05e27ee213611ffe7d6348b942e8f942b37114c00cc03cec254295a4a17852e" [[package]] name = "openssl-sys" @@ -2904,11 +2937,11 @@ checksum = "1e32339a5dc40459130b3bd269e9892439f55b33e772d2a9d402a789baaf4e8a" dependencies = [ "futures-core", "futures-sink", - "indexmap 2.6.0", + "indexmap 2.7.1", "js-sys", "once_cell", "pin-project-lite", - "thiserror", + "thiserror 1.0.69", "urlencoding", ] @@ -2926,7 +2959,7 @@ dependencies = [ "opentelemetry_api", "opentelemetry_sdk 0.20.0", "prost 0.11.9", - "thiserror", + "thiserror 1.0.69", "tokio", "tonic 0.9.2", ] @@ -2964,7 +2997,7 @@ dependencies = [ "js-sys", "once_cell", "pin-project-lite", - "thiserror", + "thiserror 1.0.69", "urlencoding", ] @@ -2986,7 +3019,7 @@ dependencies = [ "rand", "regex", "serde_json", - "thiserror", + "thiserror 1.0.69", "tokio", "tokio-stream", ] @@ -3005,10 +3038,10 @@ dependencies = [ "glob", "once_cell", "opentelemetry 0.21.0", - "ordered-float 4.5.0", + "ordered-float 4.6.0", "percent-encoding", "rand", - "thiserror", + "thiserror 1.0.69", ] [[package]] @@ -3028,9 +3061,9 @@ dependencies = [ [[package]] name = "ordered-float" -version = "4.5.0" +version = "4.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c65ee1f9701bf938026630b455d5315f490640234259037edb259798b3bcf85e" +checksum = "7bb71e1b3fa6ca1c61f383464aaf2bb0e2f8e772a1f01d486832464de363b951" dependencies = [ "num-traits", ] @@ -3048,9 +3081,9 @@ dependencies = [ [[package]] name = "outref" -version = "0.5.1" +version = "0.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4030760ffd992bef45b0ae3f10ce1aba99e33464c90d14dd7c039884963ddc7a" +checksum = "1a80800c0488c3a21695ea981a54918fbb37abf04f4d0720c453632255e2ff0e" [[package]] name = "overload" @@ -3111,34 +3144,34 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b4c5cc86750666a3ed20bdaf5ca2a0344f9c67674cae0515bec2da16fbaa47db" dependencies = [ "fixedbitset", - "indexmap 2.6.0", + "indexmap 2.7.1", ] [[package]] name = "pin-project" -version = "1.1.7" +version = "1.1.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "be57f64e946e500c8ee36ef6331845d40a93055567ec57e8fae13efd33759b95" +checksum = "1e2ec53ad785f4d35dac0adea7f7dc6f1bb277ad84a680c7afefeae05d1f5916" dependencies = [ "pin-project-internal", ] [[package]] name = "pin-project-internal" -version = "1.1.7" +version = "1.1.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3c0f5fad0874fc7abcd4d750e76917eaebbecaa2c20bde22e1dbeeba8beb758c" +checksum = "d56a66c0c55993aa927429d0f8a0abfd74f084e4d9c192cffed01e418d83eefb" dependencies = [ "proc-macro2", "quote", - "syn 2.0.89", + "syn 2.0.96", ] [[package]] name = "pin-project-lite" -version = "0.2.15" +version = "0.2.16" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "915a1e146535de9163f3987b8944ed8cf49a18bb0056bcebcdcece385cece4ff" +checksum = "3b3cff922bd51709b605d9ead9aa71031d81447142d828eb4a6eba76fe619f9b" [[package]] name = "pin-utils" @@ -3182,9 +3215,9 @@ dependencies = [ [[package]] name = "png" -version = "0.17.14" +version = "0.17.16" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "52f9d46a34a05a6a57566bc2bfae066ef07585a6e3fa30fbbdff5936380623f0" +checksum = "82151a2fc869e011c153adc57cf2789ccb8d9906ce52c0b39a6b5697749d7526" dependencies = [ "bitflags 1.3.2", "crc32fast", @@ -3195,9 +3228,9 @@ dependencies = [ [[package]] name = "portable-atomic" -version = "1.9.0" +version = "1.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cc9c68a3f6da06753e9335d63e27f6b9754dd1920d941135b7ea8224f141adb2" +checksum = "280dc24453071f1b63954171985a0b0d30058d287960968b9b2aca264c8d4ee6" [[package]] name = "powerfmt" @@ -3214,24 +3247,14 @@ dependencies = [ "zerocopy", ] -[[package]] -name = "pretty_assertions" -version = "1.4.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3ae130e2f271fbc2ac3a40fb1d07180839cdbbe443c7a27e1e3c13c5cac0116d" -dependencies = [ - "diff", - "yansi", -] - [[package]] name = "prettyplease" -version = "0.2.25" +version = "0.2.29" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "64d1ec885c64d0457d564db4ec299b2dae3f9c02808b8ad9c3a089c591b18033" +checksum = "6924ced06e1f7dfe3fa48d57b9f74f55d8915f5036121bef647ef4b204895fac" dependencies = [ "proc-macro2", - "syn 2.0.89", + "syn 2.0.96", ] [[package]] @@ -3260,9 +3283,9 @@ dependencies = [ [[package]] name = "proc-macro2" -version = "1.0.92" +version = "1.0.93" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "37d3544b3f2748c54e147655edb5025752e2303145b5aefb3c3ea2c78b973bb0" +checksum = "60946a68e5f9d28b0dc1c21bb8a97ee7d018a8b322fa57838ba31cc878e22d99" dependencies = [ "unicode-ident", ] @@ -3283,7 +3306,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a65f2e60fbf1063868558d69c6beacf412dc755f9fc020f514b7955fc914fe30" dependencies = [ "quote", - "syn 2.0.89", + "syn 2.0.96", ] [[package]] @@ -3323,7 +3346,7 @@ dependencies = [ "prost 0.12.6", "prost-types", "regex", - "syn 2.0.89", + "syn 2.0.96", "tempfile", ] @@ -3350,7 +3373,7 @@ dependencies = [ "itertools 0.12.1", "proc-macro2", "quote", - "syn 2.0.89", + "syn 2.0.96", ] [[package]] @@ -3409,7 +3432,7 @@ dependencies = [ "proc-macro2", "pyo3-macros-backend", "quote", - "syn 2.0.89", + "syn 2.0.96", ] [[package]] @@ -3422,7 +3445,7 @@ dependencies = [ "proc-macro2", "pyo3-build-config", "quote", - "syn 2.0.89", + "syn 2.0.96", ] [[package]] @@ -3436,15 +3459,15 @@ dependencies = [ [[package]] name = "quanta" -version = "0.12.3" +version = "0.12.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8e5167a477619228a0b284fac2674e3c388cba90631d7b7de620e6f1fcd08da5" +checksum = "3bd1fe6824cea6538803de3ff1bc0cf3949024db3d43c9643024bfb33a807c0e" dependencies = [ "crossbeam-utils", "libc", "once_cell", "raw-cpuid", - "wasi", + "wasi 0.11.0+wasi-snapshot-preview1", "web-sys", "winapi", ] @@ -3457,9 +3480,9 @@ checksum = "a993555f31e5a609f617c12db6250dedcac1b0a85076912c436e6fc9b2c8e6a3" [[package]] name = "quote" -version = "1.0.37" +version = "1.0.38" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b5b9d34b8991d19d98081b46eacdd8eb58c6f2b201139f7c5f643cc155a633af" +checksum = "0e4dccaaaf89514f546c693ddc140f729f958c247918a13380cccc6078391acc" dependencies = [ "proc-macro2", ] @@ -3491,7 +3514,7 @@ version = "0.6.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" dependencies = [ - "getrandom", + "getrandom 0.2.15", ] [[package]] @@ -3500,7 +3523,7 @@ version = "0.28.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fdef7f9be5c0122f890d58bdf4d964349ba6a6161f705907526d891efabba57d" dependencies = [ - "bitflags 2.6.0", + "bitflags 2.8.0", "cassowary", "compact_str", "crossterm", @@ -3545,7 +3568,7 @@ dependencies = [ "rand_chacha", "simd_helpers", "system-deps", - "thiserror", + "thiserror 1.0.69", "v_frame", "wasm-bindgen", ] @@ -3567,11 +3590,11 @@ dependencies = [ [[package]] name = "raw-cpuid" -version = "11.2.0" +version = "11.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1ab240315c661615f2ee9f0f2cd32d5a7343a84d5ebcccb99d46e6637565e7b0" +checksum = "c6928fa44c097620b706542d428957635951bade7143269085389d42c8a4927e" dependencies = [ - "bitflags 2.6.0", + "bitflags 2.8.0", ] [[package]] @@ -3607,11 +3630,11 @@ dependencies = [ [[package]] name = "redox_syscall" -version = "0.5.7" +version = "0.5.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9b6dfecf2c74bce2466cabf93f6664d6998a69eb21e39f4207930065b27b771f" +checksum = "03a862b389f93e68874fbf580b9de08dd02facb9a788ebadaf4a3fd33cf58834" dependencies = [ - "bitflags 2.6.0", + "bitflags 2.8.0", ] [[package]] @@ -3620,9 +3643,9 @@ version = "0.4.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ba009ff324d1fc1b900bd1fdb31564febe58a8ccc8a6fdbb93b543d33b13ca43" dependencies = [ - "getrandom", + "getrandom 0.2.15", "libredox", - "thiserror", + "thiserror 1.0.69", ] [[package]] @@ -3642,14 +3665,14 @@ checksum = "bcc303e793d3734489387d205e9b186fac9c6cfacedd98cbb2e8a5943595f3e6" dependencies = [ "proc-macro2", "quote", - "syn 2.0.89", + "syn 2.0.96", ] [[package]] name = "referencing" -version = "0.28.0" +version = "0.28.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "093a875008827c0ae15c746189966e162faa05bf347719d06302c548ac63630f" +checksum = "d0dcb5ab28989ad7c91eb1b9531a37a1a137cc69a0499aee4117cae4a107c464" dependencies = [ "ahash", "fluent-uri", @@ -3716,7 +3739,7 @@ dependencies = [ "h2 0.3.26", "http 0.2.12", "http-body 0.4.6", - "hyper 0.14.31", + "hyper 0.14.32", "hyper-tls", "ipnet", "js-sys", @@ -3744,19 +3767,19 @@ dependencies = [ [[package]] name = "reqwest" -version = "0.12.9" +version = "0.12.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a77c62af46e79de0a562e1a9849205ffcb7fc1238876e9bd743357570e04046f" +checksum = "43e734407157c3c2034e0258f5e4473ddb361b1e85f95a66690d67264d7cd1da" dependencies = [ "base64 0.22.1", "bytes", "futures-channel", "futures-core", "futures-util", - "http 1.1.0", + "http 1.2.0", "http-body 1.0.1", "http-body-util", - "hyper 1.5.1", + "hyper 1.6.0", "hyper-util", "ipnet", "js-sys", @@ -3770,6 +3793,7 @@ dependencies = [ "serde_urlencoded", "sync_wrapper 1.0.2", "tokio", + "tower 0.5.2", "tower-service", "url", "wasm-bindgen", @@ -3807,7 +3831,7 @@ checksum = "c17fa4cb658e3583423e915b9f3acc01cceaee1860e33d59ebae66adc3a2dc0d" dependencies = [ "cc", "cfg-if", - "getrandom", + "getrandom 0.2.15", "libc", "spin 0.9.8", "untrusted 0.9.0", @@ -3834,7 +3858,7 @@ dependencies = [ "proc-macro2", "quote", "rust-embed-utils", - "syn 2.0.89", + "syn 2.0.96", "walkdir", ] @@ -3860,6 +3884,12 @@ version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2" +[[package]] +name = "rustc-hash" +version = "2.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c7fb8039b3032c191086b10f11f319a6e99e1e82889c5cc6046f515c9db1d497" + [[package]] name = "rustc_version" version = "0.4.1" @@ -3871,15 +3901,15 @@ dependencies = [ [[package]] name = "rustix" -version = "0.38.41" +version = "0.38.44" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d7f649912bc1495e167a6edee79151c84b1bad49748cb4f1f1167f459f6224f6" +checksum = "fdb5bc1ae2baa591800df16c9ca78619bf65c0488b41b96ccec5d11220d8c154" dependencies = [ - "bitflags 2.6.0", + "bitflags 2.8.0", "errno", "libc", "linux-raw-sys", - "windows-sys 0.52.0", + "windows-sys 0.59.0", ] [[package]] @@ -3910,9 +3940,9 @@ dependencies = [ [[package]] name = "rustls" -version = "0.23.17" +version = "0.23.21" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7f1a745511c54ba6d4465e8d5dfbd81b45791756de28d4981af70d6dca128f1e" +checksum = "8f287924602bf649d949c63dc8ac8b235fa5387d394020705b80c4eb597ce5b8" dependencies = [ "aws-lc-rs", "log", @@ -3932,7 +3962,7 @@ dependencies = [ "openssl-probe", "rustls-pki-types", "schannel", - "security-framework 3.0.1", + "security-framework 3.2.0", ] [[package]] @@ -3946,9 +3976,9 @@ dependencies = [ [[package]] name = "rustls-pki-types" -version = "1.10.0" +version = "1.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "16f1201b3c9a7ee8039bcadc17b7e605e2945b27eee7631788c1bd2b0643674b" +checksum = "917ce264624a4b4db1c364dcc35bfca9ded014d0a958cd47ad3e960e988ea51c" [[package]] name = "rustls-webpki" @@ -3964,15 +3994,15 @@ dependencies = [ [[package]] name = "rustversion" -version = "1.0.18" +version = "1.0.19" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0e819f2bc632f285be6d7cd36e25940d45b2391dd6d9b939e79de557f7014248" +checksum = "f7c45b9784283f1b2e7fb61b42047c2fd678ef0960d4f6f1eba131594cc369d4" [[package]] name = "ryu" -version = "1.0.18" +version = "1.0.19" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f3cb5ba0dc43242ce17de99c180e96db90b235b8a9fdc9543c96d2209116bd9f" +checksum = "6ea1a2d0a644769cc99faa24c3ad26b379b786fe7c36fd3c546254801650e6dd" [[package]] name = "same-file" @@ -4020,7 +4050,7 @@ version = "2.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "897b2245f0b511c87893af39b033e5ca9cce68824c4d7e7630b5a1d339658d02" dependencies = [ - "bitflags 2.6.0", + "bitflags 2.8.0", "core-foundation 0.9.4", "core-foundation-sys", "libc", @@ -4029,11 +4059,11 @@ dependencies = [ [[package]] name = "security-framework" -version = "3.0.1" +version = "3.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e1415a607e92bec364ea2cf9264646dcce0f91e6d65281bd6f2819cca3bf39c8" +checksum = "271720403f46ca04f7ba6f55d438f8bd878d6b8ca0a1046e8228c4145bcbb316" dependencies = [ - "bitflags 2.6.0", + "bitflags 2.8.0", "core-foundation 0.10.0", "core-foundation-sys", "libc", @@ -4042,9 +4072,9 @@ dependencies = [ [[package]] name = "security-framework-sys" -version = "2.12.1" +version = "2.14.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fa39c7303dc58b5543c94d22c1766b0d31f2ee58306363ea622b10bbc075eaa2" +checksum = "49db231d56a190491cb4aeda9527f1ad45345af50b0851622a7adb8c03b01c32" dependencies = [ "core-foundation-sys", "libc", @@ -4052,18 +4082,18 @@ dependencies = [ [[package]] name = "semver" -version = "1.0.23" +version = "1.0.25" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "61697e0a1c7e512e84a621326239844a24d8207b4669b41bc18b32ea5cbf988b" +checksum = "f79dfe2d285b0488816f30e700a7438c5a73d816b5b7d3ac72fbc48b0d185e03" dependencies = [ "serde", ] [[package]] name = "serde" -version = "1.0.215" +version = "1.0.217" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6513c1ad0b11a9376da888e3e0baa0077f1aed55c17f50e7b2397136129fb88f" +checksum = "02fc4265df13d6fa1d00ecff087228cc0a2b5f3c0e87e258d8b94a156e984c70" dependencies = [ "serde_derive", ] @@ -4090,22 +4120,22 @@ dependencies = [ [[package]] name = "serde_derive" -version = "1.0.215" +version = "1.0.217" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ad1e866f866923f252f05c889987993144fb74e722403468a4ebd70c3cd756c0" +checksum = "5a9bf7cf98d04a2b28aead066b7496853d4779c9cc183c440dbac457641e19a0" dependencies = [ "proc-macro2", "quote", - "syn 2.0.89", + "syn 2.0.96", ] [[package]] name = "serde_json" -version = "1.0.133" +version = "1.0.138" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c7fceb2473b9166b2294ef05efcb65a3db80803f0b03ef86a5fc88a2b85ee377" +checksum = "d434192e7da787e94a6ea7e9670b26a036d0ca41e0b7efb2676dd32bae872949" dependencies = [ - "indexmap 2.6.0", + "indexmap 2.7.1", "itoa", "memchr", "ryu", @@ -4246,9 +4276,9 @@ checksum = "3c5e1a9a646d36c3599cd173a41282daf47c44583ad367b8e6837255952e5c67" [[package]] name = "socket2" -version = "0.5.7" +version = "0.5.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ce305eb0b4296696835b71df73eb912e0f1ffd2556a501fcede6e0c50349191c" +checksum = "c970269d99b64e60ec3bd6ad27270092a5394c4e309314b18ae3fe575695fbe8" dependencies = [ "libc", "windows-sys 0.52.0", @@ -4315,7 +4345,7 @@ dependencies = [ "proc-macro2", "quote", "rustversion", - "syn 2.0.89", + "syn 2.0.96", ] [[package]] @@ -4337,9 +4367,9 @@ dependencies = [ [[package]] name = "syn" -version = "2.0.89" +version = "2.0.96" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "44d46482f1c1c87acd84dea20c1bf5ebff4c757009ed6bf19cfd36fb10e92c4e" +checksum = "d5d0adab1ae378d7f53bdebc67a39f1f151407ef230f0ce2883572f5d8985c80" dependencies = [ "proc-macro2", "quote", @@ -4369,7 +4399,7 @@ checksum = "c8af7666ab7b6390ab78131fb5b0fce11d6b7a6951602017c35fa82800708971" dependencies = [ "proc-macro2", "quote", - "syn 2.0.89", + "syn 2.0.96", ] [[package]] @@ -4453,12 +4483,13 @@ checksum = "61c41af27dd6d1e27b1b16b489db798443478cef1f06a660c96db617ba5de3b1" [[package]] name = "tempfile" -version = "3.14.0" +version = "3.16.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "28cce251fcbc87fac86a866eeb0d6c2d536fc16d06f184bb61aeae11aa4cee0c" +checksum = "38c246215d7d24f48ae091a2902398798e05d978b24315d6efbc00ede9a8bb91" dependencies = [ "cfg-if", "fastrand", + "getrandom 0.3.1", "once_cell", "rustix", "windows-sys 0.59.0", @@ -4478,16 +4509,16 @@ name = "text-generation-backends-trtllm" version = "3.1.1-dev0" dependencies = [ "async-trait", - "clap 4.5.21", + "clap 4.5.27", "cmake", "cxx", "cxx-build", - "hashbrown 0.15.1", + "hashbrown 0.15.2", "hf-hub", "pkg-config", "pyo3", "text-generation-router", - "thiserror", + "thiserror 1.0.69", "tokenizers", "tokio", "tokio-stream", @@ -4499,7 +4530,7 @@ name = "text-generation-benchmark" version = "3.1.1-dev0" dependencies = [ "average", - "clap 4.5.21", + "clap 4.5.27", "float-ord", "hf-hub", "ratatui", @@ -4507,7 +4538,7 @@ dependencies = [ "serde_json", "tabled", "text-generation-client", - "thiserror", + "thiserror 1.0.69", "tokenizers", "tokio", "tracing", @@ -4524,7 +4555,7 @@ dependencies = [ "grpc-metadata", "prost 0.12.6", "prost-build", - "thiserror", + "thiserror 1.0.69", "tokio", "tonic 0.10.2", "tonic-build", @@ -4536,7 +4567,7 @@ dependencies = [ name = "text-generation-launcher" version = "3.1.1-dev0" dependencies = [ - "clap 4.5.21", + "clap 4.5.27", "ctrlc", "float_eq", "hf-hub", @@ -4547,7 +4578,7 @@ dependencies = [ "reqwest 0.11.27", "serde", "serde_json", - "thiserror", + "thiserror 1.0.69", "tracing", "tracing-subscriber", "vergen", @@ -4564,7 +4595,7 @@ dependencies = [ "axum-tracing-opentelemetry", "base64 0.22.1", "chrono", - "clap 4.5.21", + "clap 4.5.27", "csv", "futures", "futures-util", @@ -4590,7 +4621,7 @@ dependencies = [ "serde", "serde_json", "sysinfo", - "thiserror", + "thiserror 1.0.69", "tokenizers", "tokio", "tokio-stream", @@ -4605,6 +4636,23 @@ dependencies = [ "vergen", ] +[[package]] +name = "text-generation-router-llamacpp" +version = "3.1.1-dev0" +dependencies = [ + "async-trait", + "bindgen 0.71.1", + "clap 4.5.27", + "num_cpus", + "pkg-config", + "text-generation-router", + "thiserror 2.0.11", + "tokenizers", + "tokio", + "tokio-stream", + "tracing", +] + [[package]] name = "text-generation-router-v2" version = "3.1.1-dev0" @@ -4614,7 +4662,7 @@ dependencies = [ "axum 0.7.9", "axum-tracing-opentelemetry", "base64 0.22.1", - "clap 4.5.21", + "clap 4.5.27", "futures", "futures-util", "grpc-metadata", @@ -4639,7 +4687,7 @@ dependencies = [ "serde_json", "slotmap", "text-generation-router", - "thiserror", + "thiserror 1.0.69", "tokenizers", "tokio", "tokio-stream", @@ -4663,7 +4711,7 @@ dependencies = [ "axum 0.7.9", "axum-tracing-opentelemetry", "base64 0.22.1", - "clap 4.5.21", + "clap 4.5.27", "criterion", "futures", "futures-util", @@ -4690,7 +4738,7 @@ dependencies = [ "serde_json", "slotmap", "text-generation-router", - "thiserror", + "thiserror 1.0.69", "tokenizers", "tokio", "tokio-stream", @@ -4720,7 +4768,16 @@ version = "1.0.69" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b6aaf5339b578ea85b50e080feb250a3e8ae8cfcdff9a461c9ec2904bc923f52" dependencies = [ - "thiserror-impl", + "thiserror-impl 1.0.69", +] + +[[package]] +name = "thiserror" +version = "2.0.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d452f284b73e6d76dd36758a0c8684b1d5be31f92b89d07fd5822175732206fc" +dependencies = [ + "thiserror-impl 2.0.11", ] [[package]] @@ -4731,7 +4788,18 @@ checksum = "4fee6c4efc90059e10f81e6d42c60a18f76588c3d74cb83a0b242a2b6c7504c1" dependencies = [ "proc-macro2", "quote", - "syn 2.0.89", + "syn 2.0.96", +] + +[[package]] +name = "thiserror-impl" +version = "2.0.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "26afc1baea8a989337eeb52b6e72a039780ce45c3edfcc9c5b9d112feeb173c2" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.96", ] [[package]] @@ -4757,9 +4825,9 @@ dependencies = [ [[package]] name = "time" -version = "0.3.36" +version = "0.3.37" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5dfd88e563464686c916c7e46e623e520ddc6d79fa6641390f2e3fa86e83e885" +checksum = "35e7868883861bd0e56d9ac6efcaaca0d6d5d82a2a7ec8209ff492c07cf37b21" dependencies = [ "deranged", "itoa", @@ -4780,9 +4848,9 @@ checksum = "ef927ca75afb808a4d64dd374f00a2adf8d0fcff8e7b184af886c3c87ec4a3f3" [[package]] name = "time-macros" -version = "0.2.18" +version = "0.2.19" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3f252a68540fde3a3877aeea552b832b40ab9a69e318efd078774a01ddee1ccf" +checksum = "2834e6017e3e5e4b9834939793b282bc03b37a3336245fa820e35e233e2a85de" dependencies = [ "num-conv", "time-core", @@ -4810,14 +4878,14 @@ dependencies = [ [[package]] name = "tokenizers" -version = "0.20.3" +version = "0.20.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "67b67c92f6d705e2a1d106fb0b28c696f9074901a9c656ee5d9f5de204c39bf7" +checksum = "3b08cc37428a476fc9e20ac850132a513a2e1ce32b6a31addf2b74fa7033b905" dependencies = [ "aho-corasick", "derive_builder", "esaxx-rs", - "getrandom", + "getrandom 0.2.15", "hf-hub", "indicatif", "itertools 0.12.1", @@ -4835,7 +4903,7 @@ dependencies = [ "serde", "serde_json", "spm_precompiled", - "thiserror", + "thiserror 1.0.69", "unicode-normalization-alignments", "unicode-segmentation", "unicode_categories", @@ -4877,7 +4945,7 @@ checksum = "6e06d43f1345a3bcd39f6a56dbb7dcab2ba47e68e8ac134855e7e2bdbaf8cab8" dependencies = [ "proc-macro2", "quote", - "syn 2.0.89", + "syn 2.0.96", ] [[package]] @@ -4903,12 +4971,11 @@ dependencies = [ [[package]] name = "tokio-rustls" -version = "0.26.0" +version = "0.26.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0c7bc40d0e5a97695bb96e27995cd3a08538541b0a846f65bba7a359f36700d4" +checksum = "5f6d0975eaace0cf0fcadee4e4aaa5da15b5c079146f2cffb67c113be122bf37" dependencies = [ - "rustls 0.23.17", - "rustls-pki-types", + "rustls 0.23.21", "tokio", ] @@ -4925,9 +4992,9 @@ dependencies = [ [[package]] name = "tokio-util" -version = "0.7.12" +version = "0.7.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "61e7c3654c13bcd040d4a03abee2c75b1d14a37b423cf5a813ceae1cc903ec6a" +checksum = "d7fcaa8d55a2bdd6b83ace262b016eca0d79ee02818c5c1bcdf0305114081078" dependencies = [ "bytes", "futures-core", @@ -4964,7 +5031,7 @@ version = "0.22.22" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4ae48d6208a266e853d946088ed816055e556cc6028c5e8e2b84d9fa5dd7c7f5" dependencies = [ - "indexmap 2.6.0", + "indexmap 2.7.1", "serde", "serde_spanned", "toml_datetime", @@ -4986,7 +5053,7 @@ dependencies = [ "h2 0.3.26", "http 0.2.12", "http-body 0.4.6", - "hyper 0.14.31", + "hyper 0.14.32", "hyper-timeout", "percent-encoding", "pin-project", @@ -5013,7 +5080,7 @@ dependencies = [ "h2 0.3.26", "http 0.2.12", "http-body 0.4.6", - "hyper 0.14.31", + "hyper 0.14.32", "hyper-timeout", "percent-encoding", "pin-project", @@ -5036,7 +5103,7 @@ dependencies = [ "proc-macro2", "prost-build", "quote", - "syn 2.0.89", + "syn 2.0.96", ] [[package]] @@ -5061,14 +5128,14 @@ dependencies = [ [[package]] name = "tower" -version = "0.5.1" +version = "0.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2873938d487c3cfb9aed7546dc9f2711d867c9f90c46b889989a2cb84eba6b4f" +checksum = "d039ad9159c98b70ecfd540b2573b97f7f52c3e8d9f8ad57a24b916a536975f9" dependencies = [ "futures-core", "futures-util", "pin-project-lite", - "sync_wrapper 0.1.2", + "sync_wrapper 1.0.2", "tokio", "tower-layer", "tower-service", @@ -5081,9 +5148,9 @@ version = "0.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1e9cd434a998747dd2c4276bc96ee2e0c7a2eadf3cae88e52be55a05fa9053f5" dependencies = [ - "bitflags 2.6.0", + "bitflags 2.8.0", "bytes", - "http 1.1.0", + "http 1.2.0", "http-body 1.0.1", "http-body-util", "pin-project-lite", @@ -5105,9 +5172,9 @@ checksum = "8df9b6e13f2d32c91b9bd719c00d1958837bc7dec474d94952798cc8e69eeec3" [[package]] name = "tracing" -version = "0.1.40" +version = "0.1.41" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c3523ab5a71916ccf420eebdf5521fcef02141234bbc0b8a49f2fdc4544364ef" +checksum = "784e0ac535deb450455cbfa28a6f0df145ea1bb7ae51b821cf5e7927fdcfbdd0" dependencies = [ "log", "pin-project-lite", @@ -5117,20 +5184,20 @@ dependencies = [ [[package]] name = "tracing-attributes" -version = "0.1.27" +version = "0.1.28" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "34704c8d6ebcbc939824180af020566b01a7c01f80641264eba0999f6c2b6be7" +checksum = "395ae124c09f9e6918a2310af6038fba074bcf474ac352496d5910dd59a2226d" dependencies = [ "proc-macro2", "quote", - "syn 2.0.89", + "syn 2.0.96", ] [[package]] name = "tracing-core" -version = "0.1.32" +version = "0.1.33" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c06d3da6113f116aaee68e4d601191614c9053067f9ab7f6edbcb161237daa54" +checksum = "e672c95779cf947c5311f83787af4fa8fffd12fb27e4993211a84bdfd9610f9c" dependencies = [ "once_cell", "valuable", @@ -5198,7 +5265,7 @@ version = "0.16.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9920abb6a3ee3a2af7d30c9ff02900f8481935d36723c3da95cf807468218e8c" dependencies = [ - "http 1.1.0", + "http 1.2.0", "opentelemetry 0.21.0", "tracing", "tracing-opentelemetry 0.22.0", @@ -5206,9 +5273,9 @@ dependencies = [ [[package]] name = "tracing-serde" -version = "0.1.3" +version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bc6b213177105856957181934e4920de57730fc69bf42c37ee5bb664d406d9e1" +checksum = "704b1aeb7be0d0a84fc9828cae51dab5970fee5088f83d1dd7ee6f6246fc6ff1" dependencies = [ "serde", "tracing-core", @@ -5216,9 +5283,9 @@ dependencies = [ [[package]] name = "tracing-subscriber" -version = "0.3.18" +version = "0.3.19" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ad0f048c97dbd9faa9b7df56362b8ebcaa52adb06b498c050d2f4e32f90a7a8b" +checksum = "e8189decb5ac0fa7bc8b96b7cb9b2701d60d48805aca84a238004d665fcc4008" dependencies = [ "matchers", "nu-ansi-term", @@ -5249,15 +5316,15 @@ checksum = "42ff0bf0c66b8238c6f3b578df37d0b7848e55df8577b3f74f92a69acceeb825" [[package]] name = "unicase" -version = "2.8.0" +version = "2.8.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7e51b68083f157f853b6379db119d1c1be0e6e4dec98101079dec41f6f5cf6df" +checksum = "75b844d17643ee918803943289730bec8aac480150456169e647ed0b576ba539" [[package]] name = "unicode-ident" -version = "1.0.14" +version = "1.0.16" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "adb9e6ca4f869e1180728b7950e35922a7fc6397f7b641499e8f3ef06e50dc83" +checksum = "a210d160f08b701c8721ba1c726c11662f877ea6b7094007e1ca9a1041945034" [[package]] name = "unicode-normalization-alignments" @@ -5343,9 +5410,9 @@ dependencies = [ [[package]] name = "url" -version = "2.5.3" +version = "2.5.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8d157f1b96d14500ffdc1f10ba712e780825526c03d9a49b4d0324b0d9113ada" +checksum = "32f8b686cadd1473f4bd0117a5d28d36b1ade384ea9b5069a1c40aefed7fda60" dependencies = [ "form_urlencoded", "idna", @@ -5382,7 +5449,7 @@ version = "4.2.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c5afb1a60e207dca502682537fefcfd9921e71d0b83e9576060f09abc6efab23" dependencies = [ - "indexmap 2.6.0", + "indexmap 2.7.1", "serde", "serde_json", "utoipa-gen", @@ -5398,7 +5465,7 @@ dependencies = [ "proc-macro2", "quote", "regex", - "syn 2.0.89", + "syn 2.0.96", ] [[package]] @@ -5419,24 +5486,24 @@ dependencies = [ [[package]] name = "uuid" -version = "1.11.0" +version = "1.12.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f8c5f0a0af699448548ad1a2fbf920fb4bee257eae39953ba95cb84891a0446a" +checksum = "b3758f5e68192bb96cc8f9b7e2c2cfdabb435499a28499a42f8f984092adad4b" dependencies = [ - "getrandom", + "getrandom 0.2.15", "rand", "uuid-macro-internal", ] [[package]] name = "uuid-macro-internal" -version = "1.11.0" +version = "1.12.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6b91f57fe13a38d0ce9e28a03463d8d3c2468ed03d75375110ec71d93b449a08" +checksum = "f8a86d88347b61a0e17b9908a67efcc594130830bf1045653784358dd023e294" dependencies = [ "proc-macro2", "quote", - "syn 2.0.89", + "syn 2.0.96", ] [[package]] @@ -5463,9 +5530,9 @@ dependencies = [ [[package]] name = "valuable" -version = "0.1.0" +version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "830b7e5d4d90034032940e4ace0d9a9a057e7a45cd94e6c007832e39edb82f6d" +checksum = "ba73ea9cf16a25df0c8caa16c51acb937d5712a8429db78a3ee29d5dcacd3a65" [[package]] name = "vcpkg" @@ -5532,49 +5599,59 @@ version = "0.11.0+wasi-snapshot-preview1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" +[[package]] +name = "wasi" +version = "0.13.3+wasi-0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "26816d2e1a4a36a2940b96c5296ce403917633dff8f3440e9b236ed6f6bacad2" +dependencies = [ + "wit-bindgen-rt", +] + [[package]] name = "wasm-bindgen" -version = "0.2.95" +version = "0.2.100" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "128d1e363af62632b8eb57219c8fd7877144af57558fb2ef0368d0087bddeb2e" +checksum = "1edc8929d7499fc4e8f0be2262a241556cfc54a0bea223790e71446f2aab1ef5" dependencies = [ "cfg-if", "once_cell", + "rustversion", "wasm-bindgen-macro", ] [[package]] name = "wasm-bindgen-backend" -version = "0.2.95" +version = "0.2.100" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cb6dd4d3ca0ddffd1dd1c9c04f94b868c37ff5fac97c30b97cff2d74fce3a358" +checksum = "2f0a0651a5c2bc21487bde11ee802ccaf4c51935d0d3d42a6101f98161700bc6" dependencies = [ "bumpalo", "log", - "once_cell", "proc-macro2", "quote", - "syn 2.0.89", + "syn 2.0.96", "wasm-bindgen-shared", ] [[package]] name = "wasm-bindgen-futures" -version = "0.4.45" +version = "0.4.50" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cc7ec4f8827a71586374db3e87abdb5a2bb3a15afed140221307c3ec06b1f63b" +checksum = "555d470ec0bc3bb57890405e5d4322cc9ea83cebb085523ced7be4144dac1e61" dependencies = [ "cfg-if", "js-sys", + "once_cell", "wasm-bindgen", "web-sys", ] [[package]] name = "wasm-bindgen-macro" -version = "0.2.95" +version = "0.2.100" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e79384be7f8f5a9dd5d7167216f022090cf1f9ec128e6e6a482a2cb5c5422c56" +checksum = "7fe63fc6d09ed3792bd0897b314f53de8e16568c2b3f7982f468c0bf9bd0b407" dependencies = [ "quote", "wasm-bindgen-macro-support", @@ -5582,28 +5659,31 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro-support" -version = "0.2.95" +version = "0.2.100" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "26c6ab57572f7a24a4985830b120de1594465e5d500f24afe89e16b4e833ef68" +checksum = "8ae87ea40c9f689fc23f209965b6fb8a99ad69aeeb0231408be24920604395de" dependencies = [ "proc-macro2", "quote", - "syn 2.0.89", + "syn 2.0.96", "wasm-bindgen-backend", "wasm-bindgen-shared", ] [[package]] name = "wasm-bindgen-shared" -version = "0.2.95" +version = "0.2.100" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "65fc09f10666a9f147042251e0dda9c18f166ff7de300607007e96bdebc1068d" +checksum = "1a05d73b933a847d6cccdda8f838a22ff101ad9bf93e33684f39c1f5f0eece3d" +dependencies = [ + "unicode-ident", +] [[package]] name = "web-sys" -version = "0.3.72" +version = "0.3.77" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f6488b90108c040df0fe62fa815cbdee25124641df01814dd7282749234c6112" +checksum = "33b6dd2ef9186f1f2072e409e99cd22a975331a6b3591b12c764e0e55c60d5d2" dependencies = [ "js-sys", "wasm-bindgen", @@ -5962,9 +6042,9 @@ checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" [[package]] name = "winnow" -version = "0.6.20" +version = "0.6.25" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "36c1fec1a2bb5866f07c25f68c26e565c4c200aebb96d7e55710c19d3e8ac49b" +checksum = "ad699df48212c6cc6eb4435f35500ac6fd3b9913324f938aea302022ce19d310" dependencies = [ "memchr", ] @@ -5979,6 +6059,15 @@ dependencies = [ "windows-sys 0.48.0", ] +[[package]] +name = "wit-bindgen-rt" +version = "0.33.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3268f3d866458b787f390cf61f4bbb563b922d091359f9608842999eaee3943c" +dependencies = [ + "bitflags 2.8.0", +] + [[package]] name = "write16" version = "1.0.0" @@ -5991,17 +6080,11 @@ version = "0.5.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1e9df38ee2d2c3c5948ea468a8406ff0db0b29ae1ffde1bcf20ef305bcc95c51" -[[package]] -name = "yansi" -version = "1.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cfe53a6657fd280eaa890a3bc59152892ffa3e30101319d168b781ed6529b049" - [[package]] name = "yoke" -version = "0.7.4" +version = "0.7.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6c5b1314b079b0930c31e3af543d8ee1757b1951ae1e1565ec704403a7240ca5" +checksum = "120e6aef9aa629e3d4f52dc8cc43a015c7724194c97dfaf45180d2daf2b77f40" dependencies = [ "serde", "stable_deref_trait", @@ -6011,13 +6094,13 @@ dependencies = [ [[package]] name = "yoke-derive" -version = "0.7.4" +version = "0.7.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "28cc31741b18cb6f1d5ff12f5b7523e3d6eb0852bbbad19d73905511d9849b95" +checksum = "2380878cad4ac9aac1e2435f3eb4020e8374b5f13c296cb75b4620ff8e229154" dependencies = [ "proc-macro2", "quote", - "syn 2.0.89", + "syn 2.0.96", "synstructure", ] @@ -6039,27 +6122,27 @@ checksum = "fa4f8080344d4671fb4e831a13ad1e68092748387dfc4f55e356242fae12ce3e" dependencies = [ "proc-macro2", "quote", - "syn 2.0.89", + "syn 2.0.96", ] [[package]] name = "zerofrom" -version = "0.1.4" +version = "0.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "91ec111ce797d0e0784a1116d0ddcdbea84322cd79e5d5ad173daeba4f93ab55" +checksum = "cff3ee08c995dee1859d998dea82f7374f2826091dd9cd47def953cae446cd2e" dependencies = [ "zerofrom-derive", ] [[package]] name = "zerofrom-derive" -version = "0.1.4" +version = "0.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0ea7b4a3637ea8669cedf0f1fd5c286a17f3de97b8dd5a70a6c167a1730e63a5" +checksum = "595eed982f7d355beb85837f651fa22e90b3c044842dc7f2c2842c086f295808" dependencies = [ "proc-macro2", "quote", - "syn 2.0.89", + "syn 2.0.96", "synstructure", ] @@ -6088,7 +6171,7 @@ checksum = "6eafa6dfb17584ea3e2bd6e76e0cc15ad7af12b09abdd1ca55961bed9b1063c6" dependencies = [ "proc-macro2", "quote", - "syn 2.0.89", + "syn 2.0.96", ] [[package]] @@ -6120,9 +6203,9 @@ dependencies = [ [[package]] name = "zune-jpeg" -version = "0.4.13" +version = "0.4.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "16099418600b4d8f028622f73ff6e3deaabdff330fb9a2a131dea781ee8b0768" +checksum = "99a5bab8d7dedf81405c4bb1f2b83ea057643d9cb28778cea9eecddeedd2e028" dependencies = [ "zune-core", ] diff --git a/Cargo.toml b/Cargo.toml index 6fd4b51d7e4..df7f2a73e56 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -5,6 +5,7 @@ members = [ "backends/v3", "backends/grpc-metadata", "backends/trtllm", + "backends/llamacpp", "launcher", "router" ] diff --git a/Dockerfile_llamacpp b/Dockerfile_llamacpp new file mode 100644 index 00000000000..2eb62a1f66c --- /dev/null +++ b/Dockerfile_llamacpp @@ -0,0 +1,76 @@ +FROM nvidia/cuda:12.8.0-cudnn-devel-ubuntu24.04 AS deps + +ARG llamacpp_version=b4651 +ARG llamacpp_cuda=OFF +ARG cuda_arch=75-real;80-real;86-real;89-real;90-real + +WORKDIR /opt/src + +ENV DEBIAN_FRONTEND=noninteractive +RUN apt update && apt install -y \ + clang \ + cmake \ + curl \ + git \ + python3-dev \ + libssl-dev \ + pkg-config \ + tar + +ADD https://github.com/ggerganov/llama.cpp/archive/refs/tags/${llamacpp_version}.tar.gz /opt/src/ +RUN tar -xzf ${llamacpp_version}.tar.gz \ + && cd llama.cpp-${llamacpp_version} \ + && cmake -B build \ + -DCMAKE_INSTALL_PREFIX=/usr \ + -DCMAKE_INSTALL_LIBDIR=/usr/lib \ + -DCMAKE_C_COMPILER=clang \ + -DCMAKE_CXX_COMPILER=clang++ \ + -DCMAKE_CUDA_ARCHITECTURES=${cuda_arch} \ + -DGGML_CUDA=${llamacpp_cuda} \ + -DLLAMA_BUILD_COMMON=OFF \ + -DLLAMA_BUILD_TESTS=OFF \ + -DLLAMA_BUILD_EXAMPLES=OFF \ + -DLLAMA_BUILD_SERVER=OFF \ + && cmake --build build --parallel --config Release \ + && cmake --install build + +WORKDIR /app +COPY rust-toolchain.toml rust-toolchain.toml +RUN curl -sSf https://sh.rustup.rs | sh -s -- -y --no-modify-path --default-toolchain none +ENV PATH="/root/.cargo/bin:$PATH" +RUN cargo install cargo-chef --locked + +FROM deps AS planner +COPY . . +RUN cargo chef prepare --recipe-path recipe.json + +FROM deps AS builder +COPY --from=planner /app/recipe.json recipe.json +RUN cargo chef cook \ + --recipe-path recipe.json \ + --profile release-opt \ + --package text-generation-router-llamacpp +COPY . . +RUN cargo build \ + --profile release-opt \ + --package text-generation-router-llamacpp --frozen + +FROM nvidia/cuda:12.8.0-cudnn-runtime-ubuntu24.04 + +RUN apt update && apt install -y \ + python3-venv \ + python3-pip + +RUN python3 -m venv /venv +ENV PATH="/venv/bin:$PATH" + +COPY backends/llamacpp/requirements.txt requirements.txt +RUN pip3 install --no-cache-dir -r requirements.txt + +COPY --from=builder /usr/lib/libllama.so /usr/lib/ +COPY --from=builder /usr/lib/libggml*.so /usr/lib/ +COPY --from=builder /app/target/release-opt/text-generation-router-llamacpp /usr/bin/ + +ENV HF_HUB_ENABLE_HF_TRANSFER=1 + +ENTRYPOINT ["text-generation-router-llamacpp"] diff --git a/backends/llamacpp/Cargo.toml b/backends/llamacpp/Cargo.toml new file mode 100644 index 00000000000..18c2ed0a80b --- /dev/null +++ b/backends/llamacpp/Cargo.toml @@ -0,0 +1,21 @@ +[package] +name = "text-generation-router-llamacpp" +version.workspace = true +edition.workspace = true +authors.workspace = true +homepage.workspace = true + +[build-dependencies] +bindgen = "0.71.1" +pkg-config = "0.3.31" + +[dependencies] +async-trait = "0.1.85" +clap = "4.5.27" +num_cpus = "1.16.0" +text-generation-router = { path = "../../router" } +thiserror = "2.0.11" +tokenizers.workspace = true +tokio = "1.43.0" +tokio-stream = "0.1.17" +tracing = "0.1.41" diff --git a/backends/llamacpp/README.md b/backends/llamacpp/README.md new file mode 100644 index 00000000000..0971efc5a39 --- /dev/null +++ b/backends/llamacpp/README.md @@ -0,0 +1,24 @@ +# Llamacpp backend + +If all your dependencies are installed at the system level, running +cargo build should be sufficient. However, if you want to experiment +with different versions of llama.cpp, some additional setup is required. + +## Install llama.cpp + + LLAMACPP_PREFIX=$(pwd)/llama.cpp.out + + git clone https://github.com/ggerganov/llama.cpp + cd llama.cpp + cmake -B build \ + -DCMAKE_INSTALL_PREFIX="$LLAMACPP_PREFIX" \ + -DLLAMA_BUILD_COMMON=OFF \ + -DLLAMA_BUILD_TESTS=OFF \ + -DLLAMA_BUILD_EXAMPLES=OFF \ + -DLLAMA_BUILD_SERVER=OFF + cmake --build build --config Release -j + cmake --install build + +## Build TGI + + PKG_CONFIG_PATH="$LLAMACPP_PREFIX/lib/pkgconfig" cargo build diff --git a/backends/llamacpp/build.rs b/backends/llamacpp/build.rs new file mode 100644 index 00000000000..499583cd445 --- /dev/null +++ b/backends/llamacpp/build.rs @@ -0,0 +1,48 @@ +use bindgen::callbacks::{ItemInfo, ParseCallbacks}; +use std::env; +use std::path::PathBuf; + +#[derive(Debug)] +struct PrefixStripper; + +impl ParseCallbacks for PrefixStripper { + fn generated_name_override(&self, item_info: ItemInfo<'_>) -> Option { + item_info.name.strip_prefix("llama_").map(str::to_string) + } +} + +fn main() { + if let Some(cuda_version) = option_env!("CUDA_VERSION") { + let mut version: Vec<&str> = cuda_version.split('.').collect(); + if version.len() > 2 { + version.pop(); + } + let cuda_version = format!("cuda-{}", version.join(".")); + pkg_config::Config::new().probe(&cuda_version).unwrap(); + } + let llama = pkg_config::Config::new().probe("llama").unwrap(); + + for path in &llama.link_paths { + println!("cargo:rustc-link-arg=-Wl,-rpath,{}", path.display()); + } + println!("cargo:rustc-link-arg=-Wl,--disable-new-dtags"); + + let bindings = bindgen::Builder::default() + .clang_args( + llama + .include_paths + .iter() + .map(|p| format!("-I{}", p.display())), + ) + .header_contents("llama_bindings.h", "#include ") + .prepend_enum_name(false) + .parse_callbacks(Box::new(PrefixStripper)) + .parse_callbacks(Box::new(bindgen::CargoCallbacks::new())) + .generate() + .expect("Unable to generate bindings"); + + let out_path = PathBuf::from(env::var("OUT_DIR").unwrap()); + bindings + .write_to_file(out_path.join("llamacpp.rs")) + .expect("Couldn't write bindings!"); +} diff --git a/backends/llamacpp/requirements.txt b/backends/llamacpp/requirements.txt new file mode 100644 index 00000000000..5c5d0cc7f11 --- /dev/null +++ b/backends/llamacpp/requirements.txt @@ -0,0 +1,3 @@ +transformers==4.48.2 +huggingface-hub==0.28.1 +hf-transfer==0.1.9 diff --git a/backends/llamacpp/src/backend.rs b/backends/llamacpp/src/backend.rs new file mode 100644 index 00000000000..1566e1bf968 --- /dev/null +++ b/backends/llamacpp/src/backend.rs @@ -0,0 +1,679 @@ +mod llamacpp { + #![allow(non_upper_case_globals)] + #![allow(non_camel_case_types)] + #![allow(non_snake_case)] + #![allow(dead_code)] + include!(concat!(env!("OUT_DIR"), "/llamacpp.rs")); +} +use async_trait::async_trait; +use std::ffi::CString; +use std::mem::replace; +use std::str::FromStr; +use std::sync::{mpsc, Once}; +use text_generation_router::infer::{Backend, GeneratedText, InferError, InferStreamResponse}; +use text_generation_router::validation::ValidGenerateRequest; +use text_generation_router::{FinishReason, Token}; +use thiserror::Error; +use tokenizers::Tokenizer; +use tokio::sync::mpsc::{unbounded_channel, UnboundedSender}; +use tokio::sync::{oneshot, watch}; +use tokio::task::{spawn, spawn_blocking}; +use tokio::time::{timeout, Duration, Instant}; +use tokio_stream::wrappers::UnboundedReceiverStream; +use tracing::instrument; +use tracing::{debug, error, info, trace, warn}; + +#[derive(Debug, Clone, Copy)] +pub enum LlamacppSplitMode { + GPU(usize), + Layer, + Row, +} + +impl FromStr for LlamacppSplitMode { + type Err = String; + fn from_str(s: &str) -> Result { + match s.to_lowercase().as_str() { + "layer" => Ok(LlamacppSplitMode::Layer), + "row" => Ok(LlamacppSplitMode::Row), + _ => match s.parse::() { + Ok(n) => Ok(LlamacppSplitMode::GPU(n)), + Err(_) => Err("Choose a GPU number or `layer` or `row`".to_string()), + }, + } + } +} + +#[derive(Debug, Clone, Copy, clap::ValueEnum)] +pub enum LlamacppNuma { + Disabled, + Distribute, + Isolate, + Numactl, + Mirror, +} + +#[allow(non_camel_case_types)] +#[derive(Debug, Clone, Copy, clap::ValueEnum)] +pub enum LlamacppGGMLType { + F32, + F16, + Q4_0, + Q4_1, + Q5_0, + Q5_1, + Q8_0, + Q8_1, + Q2_K, + Q3_K, + Q4_K, + Q5_K, + Q6_K, + Q8_K, + IQ2_XXS, + IQ2_XS, + IQ3_XXS, + IQ1_S, + IQ4_NL, + IQ3_S, + IQ2_S, + IQ4_XS, + I8, + I16, + I32, + I64, + F64, + IQ1_M, + BF16, + TQ1_0, + TQ2_0, +} + +// TODO: macro +impl LlamacppGGMLType { + fn to_ggml_type(self) -> llamacpp::ggml_type { + match self { + LlamacppGGMLType::F32 => llamacpp::GGML_TYPE_F32, + LlamacppGGMLType::F16 => llamacpp::GGML_TYPE_F16, + LlamacppGGMLType::Q4_0 => llamacpp::GGML_TYPE_Q4_0, + LlamacppGGMLType::Q4_1 => llamacpp::GGML_TYPE_Q4_1, + LlamacppGGMLType::Q5_0 => llamacpp::GGML_TYPE_Q5_0, + LlamacppGGMLType::Q5_1 => llamacpp::GGML_TYPE_Q5_1, + LlamacppGGMLType::Q8_0 => llamacpp::GGML_TYPE_Q8_0, + LlamacppGGMLType::Q8_1 => llamacpp::GGML_TYPE_Q8_1, + LlamacppGGMLType::Q2_K => llamacpp::GGML_TYPE_Q2_K, + LlamacppGGMLType::Q3_K => llamacpp::GGML_TYPE_Q3_K, + LlamacppGGMLType::Q4_K => llamacpp::GGML_TYPE_Q4_K, + LlamacppGGMLType::Q5_K => llamacpp::GGML_TYPE_Q5_K, + LlamacppGGMLType::Q6_K => llamacpp::GGML_TYPE_Q6_K, + LlamacppGGMLType::Q8_K => llamacpp::GGML_TYPE_Q8_K, + LlamacppGGMLType::IQ2_XXS => llamacpp::GGML_TYPE_IQ2_XXS, + LlamacppGGMLType::IQ2_XS => llamacpp::GGML_TYPE_IQ2_XS, + LlamacppGGMLType::IQ3_XXS => llamacpp::GGML_TYPE_IQ3_XXS, + LlamacppGGMLType::IQ1_S => llamacpp::GGML_TYPE_IQ1_S, + LlamacppGGMLType::IQ4_NL => llamacpp::GGML_TYPE_IQ4_NL, + LlamacppGGMLType::IQ3_S => llamacpp::GGML_TYPE_IQ3_S, + LlamacppGGMLType::IQ2_S => llamacpp::GGML_TYPE_IQ2_S, + LlamacppGGMLType::IQ4_XS => llamacpp::GGML_TYPE_IQ4_XS, + LlamacppGGMLType::I8 => llamacpp::GGML_TYPE_I8, + LlamacppGGMLType::I16 => llamacpp::GGML_TYPE_I16, + LlamacppGGMLType::I32 => llamacpp::GGML_TYPE_I32, + LlamacppGGMLType::I64 => llamacpp::GGML_TYPE_I64, + LlamacppGGMLType::F64 => llamacpp::GGML_TYPE_F64, + LlamacppGGMLType::IQ1_M => llamacpp::GGML_TYPE_IQ1_M, + LlamacppGGMLType::BF16 => llamacpp::GGML_TYPE_BF16, + LlamacppGGMLType::TQ1_0 => llamacpp::GGML_TYPE_TQ1_0, + LlamacppGGMLType::TQ2_0 => llamacpp::GGML_TYPE_TQ2_0, + } + } +} + +pub struct LlamacppConfig { + pub model_gguf: String, + pub max_batch_total_tokens: usize, + pub max_physical_batch_total_tokens: usize, + pub max_batch_size: usize, + pub batch_timeout: Duration, + pub n_threads: usize, + pub n_threads_batch: usize, + pub n_gpu_layers: usize, + pub split_mode: LlamacppSplitMode, + pub numa: LlamacppNuma, + pub defrag_threshold: f32, + pub use_mmap: bool, + pub use_mlock: bool, + pub offload_kqv: bool, + pub flash_attention: bool, + pub type_k: LlamacppGGMLType, + pub type_v: LlamacppGGMLType, +} + +#[derive(Debug)] +struct LlamacppRequest { + input_ids: Vec, + top_k: i32, + top_p: f32, + typical_p: f32, + min_keep: usize, + temp: f32, + seed: u32, + penalty_last_n: i32, + penalty_repeat: f32, + penalty_freq: f32, + penalty_present: f32, + max_new_tokens: usize, + tx: UnboundedSender>, + time: Instant, +} + +pub struct LlamacppBackend { + tx: UnboundedSender, + status: watch::Receiver, +} + +impl LlamacppRequest { + fn new( + from: &ValidGenerateRequest, + tx: UnboundedSender>, + ) -> Option { + from.input_ids.as_ref().map(|input_ids| LlamacppRequest { + input_ids: input_ids.iter().map(|&x| x as i32).collect(), + top_k: from.parameters.top_k as _, + top_p: from.parameters.top_p as _, + typical_p: from.parameters.typical_p as _, + min_keep: 0, // disabled + temp: from.parameters.temperature as _, + seed: from.parameters.seed as _, + penalty_last_n: 64, // 0 = disabled, -1 = context size + penalty_repeat: from.parameters.repetition_penalty as _, + penalty_freq: from.parameters.frequency_penalty as _, + penalty_present: 0.0, // disabled + max_new_tokens: from.stopping_parameters.max_new_tokens as _, + tx, + time: Instant::now(), + }) + } +} + +struct Llamacpp { + model: *mut llamacpp::llama_model, + ctx: *mut llamacpp::llama_context, + vocab: *const llamacpp::llama_vocab, + logprobs: Vec, + batch: llamacpp::llama_batch, +} + +extern "C" fn llamacpp_log_callback( + level: llamacpp::ggml_log_level, + msg: *const std::os::raw::c_char, + _user_data: *mut std::os::raw::c_void, +) { + let cmsg = unsafe { std::ffi::CStr::from_ptr(msg) }; + let rmsg = cmsg.to_string_lossy().trim_end_matches('\n').to_string(); + + match level { + llamacpp::GGML_LOG_LEVEL_DEBUG => debug!(target: "llamacpp", "{}", rmsg), + llamacpp::GGML_LOG_LEVEL_INFO => info!(target: "llamacpp", "{}", rmsg), + llamacpp::GGML_LOG_LEVEL_WARN => warn!(target: "llamacpp", "{}", rmsg), + llamacpp::GGML_LOG_LEVEL_ERROR => error!(target: "llamacpp", "{}", rmsg), + _ => trace!(target: "llamacpp", "{}", rmsg), + } +} + +impl Llamacpp { + fn new(conf: LlamacppConfig) -> Result { + let gguf = CString::new(conf.model_gguf)?; + + let model = unsafe { + let mut params = llamacpp::model_default_params(); + params.n_gpu_layers = conf.n_gpu_layers as _; + params.split_mode = match conf.split_mode { + LlamacppSplitMode::GPU(_) => llamacpp::LLAMA_SPLIT_MODE_NONE, + LlamacppSplitMode::Layer => llamacpp::LLAMA_SPLIT_MODE_LAYER, + LlamacppSplitMode::Row => llamacpp::LLAMA_SPLIT_MODE_ROW, + }; + params.main_gpu = match conf.split_mode { + LlamacppSplitMode::GPU(n) => n as _, + _ => 0, + }; + params.use_mmap = conf.use_mmap; + params.use_mlock = conf.use_mlock; + llamacpp::model_load_from_file(gguf.as_ptr(), params) + }; + if model.is_null() { + return Err(BackendError::Llamacpp("Failed to load model".to_string())); + } + let ctx = unsafe { + let mut params = llamacpp::context_default_params(); + params.n_ctx = conf.max_batch_total_tokens as _; + params.n_batch = conf.max_batch_total_tokens as _; + params.n_ubatch = conf.max_physical_batch_total_tokens as _; + params.n_seq_max = conf.max_batch_size as _; + params.n_threads = conf.n_threads as _; + params.n_threads_batch = conf.n_threads_batch as _; + params.defrag_thold = conf.defrag_threshold; + params.offload_kqv = conf.offload_kqv; + params.flash_attn = conf.flash_attention; + params.type_k = conf.type_k.to_ggml_type(); + params.type_v = conf.type_v.to_ggml_type(); + params.no_perf = true; + llamacpp::init_from_model(model, params) + }; + if ctx.is_null() { + return Err(BackendError::Llamacpp("Failed to init context".to_string())); + } + let vocab = unsafe { llamacpp::model_get_vocab(model) }; + if vocab.is_null() { + return Err(BackendError::Llamacpp("Failed to get vocab".to_string())); + } + let n_tokens = unsafe { llamacpp::vocab_n_tokens(vocab) }; + let mut logprobs = Vec::with_capacity(n_tokens as usize); + + for token in 0..n_tokens { + logprobs.push(llamacpp::llama_token_data { + id: token, + logit: 0.0, + p: 0.0, + }); + } + let batch = unsafe { llamacpp::batch_init(conf.max_batch_total_tokens as _, 0, 1) }; + Ok(Llamacpp { + model, + ctx, + vocab, + logprobs, + batch, + }) + } + + fn decode(&mut self) -> i32 { + unsafe { llamacpp::decode(self.ctx, self.batch) } + } + + fn clear_kv_cache(&mut self, seq_id: llamacpp::llama_seq_id) { + unsafe { + llamacpp::kv_cache_seq_rm(self.ctx, seq_id, -1, -1); + } + } + + fn batch_push( + &mut self, + token: llamacpp::llama_token, + pos: llamacpp::llama_pos, + seq_id: llamacpp::llama_seq_id, + logits: bool, + ) -> usize { + let n = self.batch.n_tokens as usize; + unsafe { + *self.batch.token.add(n) = token; + *self.batch.pos.add(n) = pos; + *self.batch.n_seq_id.add(n) = 1; + *(*self.batch.seq_id.add(n)).add(0) = seq_id; + *self.batch.logits.add(n) = logits as i8; + } + self.batch.n_tokens += 1; + n + } +} + +impl Drop for Llamacpp { + fn drop(&mut self) { + if !self.ctx.is_null() { + unsafe { llamacpp::free(self.ctx) }; + } + if !self.model.is_null() { + unsafe { llamacpp::model_free(self.model) }; + } + unsafe { llamacpp::batch_free(self.batch) }; + } +} + +struct LlamacppSampler { + chain: *mut llamacpp::llama_sampler, +} + +impl LlamacppSampler { + fn new(req: &LlamacppRequest) -> Option { + let chain = unsafe { + let params = llamacpp::sampler_chain_default_params(); + llamacpp::sampler_chain_init(params) + }; + if chain.is_null() { + error!("Failed to init sampler"); + return None; + } + let (top_k, top_p, typical_p, temp, penalties, dist) = unsafe { + ( + llamacpp::sampler_init_top_k(req.top_k), + llamacpp::sampler_init_top_p(req.top_p, req.min_keep), + llamacpp::sampler_init_typical(req.typical_p, req.min_keep), + llamacpp::sampler_init_temp(req.temp), + llamacpp::sampler_init_penalties( + req.penalty_last_n, + req.penalty_repeat, + req.penalty_freq, + req.penalty_present, + ), + llamacpp::sampler_init_dist(req.seed), + ) + }; + let all = &[ + ("top_k", top_k), + ("top_p", top_p), + ("typical_p", typical_p), + ("temp", temp), + ("penalties", penalties), + ("dist", dist), + ]; + let mut failed = false; + + for (k, v) in all { + if v.is_null() { + error!("Failed to init {k} sampler"); + failed = true; + } else { + unsafe { llamacpp::sampler_chain_add(chain, *v) }; + } + } + if failed { + unsafe { llamacpp::sampler_free(chain) }; + None + } else { + Some(LlamacppSampler { chain }) + } + } + + fn sample(&self, llamacpp: &mut Llamacpp, idx: usize) -> (llamacpp::llama_token, f32) { + let logits = unsafe { llamacpp::get_logits_ith(llamacpp.ctx, idx as _) }; + for (token, logprob) in llamacpp.logprobs.iter_mut().enumerate() { + *logprob = llamacpp::llama_token_data { + id: token as _, + logit: unsafe { *logits.add(token) }, + p: 0.0, + }; + } + let mut view = llamacpp::llama_token_data_array { + data: llamacpp.logprobs.as_mut_ptr(), + size: llamacpp.logprobs.len(), + selected: -1, + sorted: false, + }; + unsafe { + llamacpp::sampler_apply(self.chain, &mut view); + let logprob = *view.data.offset(view.selected as _); + llamacpp::sampler_accept(self.chain, logprob.id); + (logprob.id, logprob.p.ln()) + } + } +} + +impl Drop for LlamacppSampler { + fn drop(&mut self) { + if !self.chain.is_null() { + unsafe { llamacpp::sampler_free(self.chain) }; + } + } +} + +struct LlamacppSeq { + id: usize, + batch_pos: usize, + token: llamacpp::llama_token, + pos: llamacpp::llama_pos, + sampler: LlamacppSampler, + text: String, + n_new_tokens: usize, + running: bool, +} + +static INIT: Once = Once::new(); + +impl LlamacppBackend { + pub fn new( + conf: LlamacppConfig, + tokenizer: Tokenizer, + ) -> ( + Self, + oneshot::Receiver>, + watch::Sender, + ) { + // Setup llama & export logs, once and for all + INIT.call_once(|| unsafe { + llamacpp::log_set(Some(llamacpp_log_callback), std::ptr::null_mut()); + llamacpp::backend_init(); + llamacpp::numa_init(match conf.numa { + LlamacppNuma::Disabled => llamacpp::GGML_NUMA_STRATEGY_DISABLED, + LlamacppNuma::Distribute => llamacpp::GGML_NUMA_STRATEGY_DISTRIBUTE, + LlamacppNuma::Isolate => llamacpp::GGML_NUMA_STRATEGY_ISOLATE, + LlamacppNuma::Numactl => llamacpp::GGML_NUMA_STRATEGY_NUMACTL, + LlamacppNuma::Mirror => llamacpp::GGML_NUMA_STRATEGY_MIRROR, + }); + }); + + let (status_tx, status_rx) = watch::channel(false); + let (shutdown_tx, shutdown_rx) = watch::channel(false); + let (ok_tx, ok_rx) = oneshot::channel(); + let (tx, mut rx) = unbounded_channel::(); + let (sync_tx, sync_rx) = mpsc::channel(); + + spawn(async move { + let mut n_tokens = 0; + let mut requests = Vec::with_capacity(conf.max_batch_size); + + let flush = |requests: &mut Vec<_>, n_tokens: &mut usize| { + if !requests.is_empty() { + let _ = + sync_tx.send(replace(requests, Vec::with_capacity(conf.max_batch_size))); + *n_tokens = 0; + } + }; + loop { + match timeout(conf.batch_timeout, rx.recv()).await { + Ok(Some(request)) => { + let n_tokens_to_add = request.input_ids.len(); + + if n_tokens + n_tokens_to_add > conf.max_batch_total_tokens { + flush(&mut requests, &mut n_tokens); + } + n_tokens += n_tokens_to_add; + requests.push(request); + + if requests.len() == conf.max_batch_size { + flush(&mut requests, &mut n_tokens); + } + } + Ok(None) => break, // closed + Err(_) => flush(&mut requests, &mut n_tokens), // timeout + } + } + }); + + spawn_blocking(move || { + let mut llamacpp = match Llamacpp::new(conf) { + Ok(v) => { + let _ = ok_tx.send(Ok(())); + v + } + Err(e) => { + let _ = ok_tx.send(Err(e)); + return; + } + }; + let vocab = tokenizer.get_added_vocabulary(); + + // health() returns true + let _ = status_tx.send(true); + + while let Ok(requests) = sync_rx.recv() { + if *shutdown_rx.borrow() { + break; + } + let start_time = Instant::now(); + let mut seqs: Vec = Vec::with_capacity(requests.len()); + llamacpp.batch.n_tokens = 0; + + for (seq_id, request) in requests.iter().enumerate() { + debug!("Request: {:?}", request); + // TODO remove this + let sampler = match LlamacppSampler::new(request) { + Some(sampler) => sampler, + _ => { + let _ = request.tx.send(Err(InferError::IncompleteGeneration)); + continue; + } + }; + let last_pos = request.input_ids.len() - 1; + + for (pos, &token_id) in request.input_ids.iter().enumerate() { + llamacpp.batch_push( + token_id as llamacpp::llama_token, + pos as llamacpp::llama_pos, + seq_id as llamacpp::llama_seq_id, + pos == last_pos, // check samplers + ); + } + seqs.push(LlamacppSeq { + id: seq_id, + batch_pos: llamacpp.batch.n_tokens as usize - 1, + token: llamacpp::LLAMA_TOKEN_NULL, + pos: last_pos as llamacpp::llama_pos + 1, + sampler, + text: String::with_capacity(1024), + n_new_tokens: 0, + running: true, + }); + } + while llamacpp.batch.n_tokens > 0 { + if llamacpp.decode() != 0 { + warn!("llama_decode failed, clearing kv cache"); + llamacpp.clear_kv_cache(-1); + for seq in seqs.iter_mut() { + let _ = requests[seq.id] + .tx + .send(Err(InferError::IncompleteGeneration)); + seq.running = false; + } + break; + } + for seq in seqs.iter_mut() { + if !seq.running { + continue; + } + let (next, logprob) = seq.sampler.sample(&mut llamacpp, seq.batch_pos); + seq.n_new_tokens += 1; + seq.token = next; + + let piece = match tokenizer.decode(&[next as u32], false) { + Ok(piece) => piece, + Err(e) => { + error!("Failed to decode token: {e}"); + let _ = requests[seq.id] + .tx + .send(Err(InferError::IncompleteGeneration)); + seq.running = false; + continue; + } + }; + let special = vocab.is_special_token(&piece); + + if !special { + seq.text.push_str(&piece); + } + let token = Token { + id: next as _, + text: piece, + logprob, + special, + }; + let finish: Option = { + if unsafe { llamacpp::vocab_is_eog(llamacpp.vocab, next) } { + Some(FinishReason::EndOfSequenceToken) + } else if seq.n_new_tokens == requests[seq.id].max_new_tokens { + Some(FinishReason::Length) + } else { + None + } + }; + if let Some(reason) = finish { + let _ = requests[seq.id].tx.send(Ok(InferStreamResponse::End { + token, + top_tokens: vec![], + generated_text: GeneratedText { + text: seq.text.clone(), + generated_tokens: seq.n_new_tokens as _, + finish_reason: reason, + seed: Some(requests[seq.id].seed as _), + }, + start: start_time, + queued: requests[seq.id].time, + })); + seq.running = false; + continue; + } + let _ = requests[seq.id] + .tx + .send(Ok(InferStreamResponse::Intermediate { + token, + top_tokens: vec![], + })); + } + // generate a new batch + llamacpp.batch.n_tokens = 0; + + for seq in seqs.iter_mut() { + if seq.running { + seq.batch_pos = + llamacpp.batch_push(seq.token, seq.pos, seq.id as _, true); + seq.pos += 1; + } else { + llamacpp.clear_kv_cache(seq.id as _); + } + } + } + } + }); + ( + Self { + tx, + status: status_rx, + }, + ok_rx, + shutdown_tx, + ) + } +} + +#[async_trait] +impl Backend for LlamacppBackend { + #[instrument(skip_all)] + fn schedule( + &self, + request: ValidGenerateRequest, + ) -> Result>, InferError> { + debug!(?request); + let (tx, rx) = unbounded_channel::>(); + match LlamacppRequest::new(&request, tx) { + Some(v) => match self.tx.send(v) { + Err(e) => Err(InferError::GenerationError(e.to_string())), + _ => Ok(UnboundedReceiverStream::new(rx)), + }, + _ => Err(InferError::GenerationError("Bad request".to_string())), + } + } + + async fn health(&self, _: bool) -> bool { + *self.status.borrow() + } + + fn name(&self) -> &'static str { + "llamacpp" + } +} + +#[derive(Debug, Error)] +pub enum BackendError { + #[error("CString error: {0}")] + CStringError(#[from] std::ffi::NulError), + #[error("Llamacpp error: {0}")] + Llamacpp(String), +} diff --git a/backends/llamacpp/src/main.rs b/backends/llamacpp/src/main.rs new file mode 100644 index 00000000000..5a07acdcde9 --- /dev/null +++ b/backends/llamacpp/src/main.rs @@ -0,0 +1,284 @@ +mod backend; + +use backend::{ + BackendError, LlamacppBackend, LlamacppConfig, LlamacppGGMLType, LlamacppNuma, + LlamacppSplitMode, +}; +use clap::Parser; +use text_generation_router::{logging, server, usage_stats}; +use thiserror::Error; +use tokenizers::{FromPretrainedParameters, Tokenizer}; +use tokio::sync::oneshot::error::RecvError; +use tracing::{error, warn}; + +/// Backend Configuration +#[derive(Parser, Debug)] +#[clap(author, version, about, long_about = None)] +struct Args { + /// Name of the model to load. + #[clap(long, env)] + model_id: String, + + /// Revision of the model. + #[clap(default_value = "main", long, env)] + revision: String, + + /// Path to the GGUF model file for inference. + #[clap(long, env)] + model_gguf: String, // TODO Option() with hf->gguf & quantize + + /// Number of threads to use for generation. + #[clap(long, env)] + n_threads: Option, + + /// Number of threads to use for batch processing. + #[clap(long, env)] + n_threads_batch: Option, + + /// Number of layers to store in VRAM. + #[clap(default_value = "0", long, env)] + n_gpu_layers: usize, + + /// Split the model across multiple GPUs. + #[clap(default_value = "layer", long, env)] + split_mode: LlamacppSplitMode, + + /// Defragment the KV cache if holes/size > threshold. + #[clap(default_value = "-1.0", long, env)] + defrag_threshold: f32, + + /// Enable NUMA optimizations. + #[clap(default_value = "disabled", value_enum, long, env)] + numa: LlamacppNuma, + + /// Use memory mapping for the model. + #[clap(long, env)] + use_mmap: bool, + + /// Use memory locking to prevent swapping. + #[clap(long, env)] + use_mlock: bool, + + /// Enable offloading of KQV operations to the GPU. + #[clap(long, env)] + offload_kqv: bool, + + /// Enable flash attention for faster inference. (EXPERIMENTAL) + #[clap(long, env)] + flash_attention: bool, + + /// Data type used for K cache. + #[clap(default_value = "f16", value_enum, long, env)] + type_k: LlamacppGGMLType, + + /// Data type used for V cache. + #[clap(default_value = "f16", value_enum, long, env)] + type_v: LlamacppGGMLType, + + /// Number of tokenizer workers used for payload validation and truncation. + #[clap(default_value = "2", long, env)] + validation_workers: usize, + + /// Maximum number of concurrent requests. + #[clap(long, env)] + max_concurrent_requests: Option, + + /// Maximum number of input tokens per request. + #[clap(default_value = "1024", long, env)] + max_input_tokens: usize, + + /// Maximum number of total tokens (input + output) per request. + #[clap(default_value = "2048", long, env)] + max_total_tokens: usize, + + /// Maximum number of tokens in a batch. + #[clap(long, env)] + max_batch_total_tokens: Option, + + /// Maximum number of tokens in a physical batch. + #[clap(long, env)] + max_physical_batch_total_tokens: Option, + + /// Maximum number of requests per batch. + #[clap(long, env)] + max_batch_size: Option, + + /// IP address to listen on. + #[clap(default_value = "0.0.0.0", long)] + hostname: String, + + /// Port to listen on. + #[clap(default_value = "3000", long, short, env)] + port: u16, + + /// Enable JSON output format. + #[clap(long, env)] + json_output: bool, + + /// OTLP endpoint for telemetry data. + #[clap(long, env)] + otlp_endpoint: Option, + + /// Service name for OTLP telemetry. + #[clap(default_value = "text-generation-inference.router", long, env)] + otlp_service_name: String, + + /// Allowed origins for CORS. + #[clap(long, env)] + cors_allow_origin: Option>, + + /// Path to the tokenizer configuration file. + #[clap(long, env)] + tokenizer_config_path: Option, + + /// Disable grammar support. + #[clap(long, env)] + disable_grammar_support: bool, + + /// Maximum number of inputs per request. + #[clap(default_value = "4", long, env)] + max_client_batch_size: usize, + + /// Level of usage statistics collection. + #[clap(default_value = "on", long, env)] + usage_stats: usage_stats::UsageStatsLevel, + + /// Maximum payload size in bytes. + #[clap(default_value = "2000000", long, env)] + payload_limit: usize, +} + +#[tokio::main] +async fn main() -> Result<(), RouterError> { + let args = Args::parse(); + + logging::init_logging(args.otlp_endpoint, args.otlp_service_name, args.json_output); + + let n_threads = match args.n_threads { + Some(0) | None => num_cpus::get(), + Some(threads) => threads, + }; + let n_threads_batch = match args.n_threads_batch { + Some(0) | None => n_threads, + Some(threads) => threads, + }; + let max_batch_size = match args.max_batch_size { + Some(0) | None => n_threads_batch, + Some(threads) => threads, + }; + let max_batch_total_tokens = match args.max_batch_total_tokens { + None => max_batch_size * args.max_total_tokens, + Some(size) => size, + }; + let max_physical_batch_total_tokens = match args.max_physical_batch_total_tokens { + None => max_batch_total_tokens, + Some(size) => size, + }; + let max_concurrent_requests = match args.max_concurrent_requests { + None => max_batch_size * 2, + Some(size) => size, + }; + if args.max_input_tokens >= args.max_total_tokens { + return Err(RouterError::ArgumentValidation( + "`max_input_tokens` must be < `max_total_tokens`".to_string(), + )); + } + if args.max_total_tokens > max_batch_total_tokens { + return Err(RouterError::ArgumentValidation( + "`max_total_tokens` must be <= `max_batch_total_tokens`".to_string(), + )); + } + if max_batch_size * args.max_total_tokens > max_batch_total_tokens { + return Err(RouterError::ArgumentValidation( + "`max_batch_size` * `max_total_tokens` must be <= `max_batch_total_tokens`".to_string(), + )); + } + + // TODO: check if we use the same cache of Server + // check if llamacpp is faster + let tokenizer = { + let token = std::env::var("HF_TOKEN") + .or_else(|_| std::env::var("HUGGING_FACE_HUB_TOKEN")) + .ok(); + let params = FromPretrainedParameters { + revision: args.revision.clone(), + token, + ..Default::default() + }; + Tokenizer::from_pretrained(args.model_id.clone(), Some(params))? + }; + + let (backend, ok, shutdown) = LlamacppBackend::new( + LlamacppConfig { + model_gguf: args.model_gguf, + n_threads, + n_threads_batch, + n_gpu_layers: args.n_gpu_layers, + split_mode: args.split_mode, + defrag_threshold: args.defrag_threshold, + numa: args.numa, + use_mmap: args.use_mmap, + use_mlock: args.use_mlock, + flash_attention: args.flash_attention, + type_k: args.type_k, + type_v: args.type_v, + offload_kqv: args.offload_kqv, + max_batch_total_tokens, + max_physical_batch_total_tokens, + max_batch_size, + batch_timeout: tokio::time::Duration::from_millis(5), + }, + tokenizer, + ); + ok.await??; + + if cfg!(debug_assertions) { + warn!("Graceful shutdown disabled!"); + let _ = tokio::task::spawn(async move { + let _ = tokio::signal::ctrl_c().await; + let _ = shutdown.send(true); + }); + } + + server::run( + backend, + max_concurrent_requests, + 0, // max_best_of + 0, // max_stop_sequences + 0, // max_top_n_tokens + args.max_input_tokens, + args.max_total_tokens, + args.validation_workers, + None, // api_key + args.model_id, // tokenizer_name + args.tokenizer_config_path, + Some(args.revision), + false, // trust_remote_code + args.hostname, + args.port, + args.cors_allow_origin, + false, // ngrok, + None, // ngrok_authtoken, + None, // ngrok_edge, + args.disable_grammar_support, + args.max_client_batch_size, + args.usage_stats, + args.payload_limit, + ) + .await?; + Ok(()) +} + +#[derive(Debug, Error)] +enum RouterError { + #[error("Argument validation error: {0}")] + ArgumentValidation(String), + #[error("Tokenizer error: {0}")] + Tokenizer(#[from] tokenizers::Error), + #[error("Backend error: {0}")] + Backend(#[from] BackendError), + #[error("WebServer error: {0}")] + WebServer(#[from] server::WebServerError), + #[error("Recv error: {0}")] + RecvError(#[from] RecvError), +} diff --git a/docs/source/_toctree.yml b/docs/source/_toctree.yml index 8fcba516bd2..e073353fca9 100644 --- a/docs/source/_toctree.yml +++ b/docs/source/_toctree.yml @@ -52,6 +52,8 @@ - sections: - local: backends/trtllm title: TensorRT-LLM + - local: backends/llamacpp + title: Llamacpp title: Backends - sections: - local: reference/launcher diff --git a/docs/source/backends/llamacpp.md b/docs/source/backends/llamacpp.md new file mode 100644 index 00000000000..dbd93e86606 --- /dev/null +++ b/docs/source/backends/llamacpp.md @@ -0,0 +1,120 @@ +# Llamacpp Backend + +The llamacpp backend facilitates the deployment of large language models +(LLMs) by integrating [llama.cpp][llama.cpp], an advanced inference engine +optimized for both CPU and GPU computation. This backend is a component +of Hugging Face’s **Text Generation Inference (TGI)** suite, +specifically designed to streamline the deployment of LLMs in production +environments. + +## Key Capabilities + +- Full compatibility with GGUF format and all quantization formats + (GGUF-related constraints may be mitigated dynamically by on-the-fly + generation in future updates) +- Optimized inference on CPU and GPU architectures +- Containerized deployment, eliminating dependency complexity +- Seamless interoperability with the Hugging Face ecosystem + +## Model Compatibility + +This backend leverages models formatted in **GGUF**, providing an +optimized balance between computational efficiency and model accuracy. +You will find the best models on [Hugging Face][GGUF]. + +## Build Docker image + +For optimal performance, the Docker image is compiled with native CPU +instructions, thus it's highly recommended to execute the container on +the host used during the build process. Efforts are ongoing to enhance +portability while maintaining high computational efficiency. + +```bash +docker build \ + -t tgi-llamacpp \ + https://github.com/huggingface/text-generation-inference.git \ + -f Dockerfile_llamacpp +``` + +### Build parameters + +| Parameter | Description | +| ------------------------------------ | --------------------------------- | +| `--build-arg llamacpp_version=bXXXX` | Specific version of llama.cpp | +| `--build-arg llamacpp_cuda=ON` | Enables CUDA acceleration | +| `--build-arg cuda_arch=ARCH` | Defines target CUDA architecture | + +## Model preparation + +Retrieve a GGUF model and store it in a specific directory, for example: + +```bash +mkdir -p ~/models +cd ~/models +curl -LOJ "https://huggingface.co/Qwen/Qwen2.5-3B-Instruct-GGUF/resolve/main/qwen2.5-3b-instruct-q4_0.gguf?download=true" +``` + +## Run Docker image + +### CPU-based inference + +```bash +docker run \ + -p 3000:3000 \ + -e "HF_TOKEN=$HF_TOKEN" \ + -v "$HOME/models:/models" \ + tgi-llamacpp \ + --model-id "Qwen/Qwen2.5-3B-Instruct" \ + --model-gguf "/models/qwen2.5-3b-instruct-q4_0.gguf" +``` + +### GPU-Accelerated inference + +```bash +docker run \ + --gpus all \ + -p 3000:3000 \ + -e "HF_TOKEN=$HF_TOKEN" \ + -v "$HOME/models:/models" \ + tgi-llamacpp \ + --n-gpu-layers 99 + --model-id "Qwen/Qwen2.5-3B-Instruct" \ + --model-gguf "/models/qwen2.5-3b-instruct-q4_0.gguf" +``` + +## Advanced parameters + +A full listing of configurable parameters is available in the `--help`: + +```bash +docker run tgi-llamacpp --help + +``` + +The table below summarizes key options: + +| Parameter | Description | +|-------------------------------------|------------------------------------------------------------------------| +| `--n-threads` | Number of threads to use for generation | +| `--n-threads-batch` | Number of threads to use for batch processing | +| `--n-gpu-layers` | Number of layers to store in VRAM | +| `--split-mode` | Split the model across multiple GPUs | +| `--defrag-threshold` | Defragment the KV cache if holes/size > threshold | +| `--numa` | Enable NUMA optimizations | +| `--use-mmap` | Use memory mapping for the model | +| `--use-mlock` | Use memory locking to prevent swapping | +| `--offload-kqv` | Enable offloading of KQV operations to the GPU | +| `--flash-attention` | Enable flash attention for faster inference | +| `--type-k` | Data type used for K cache | +| `--type-v` | Data type used for V cache | +| `--validation-workers` | Number of tokenizer workers used for payload validation and truncation | +| `--max-concurrent-requests` | Maximum number of concurrent requests | +| `--max-input-tokens` | Maximum number of input tokens per request | +| `--max-total-tokens` | Maximum number of total tokens (input + output) per request | +| `--max-batch-total-tokens` | Maximum number of tokens in a batch | +| `--max-physical-batch-total-tokens` | Maximum number of tokens in a physical batch | +| `--max-batch-size` | Maximum number of requests per batch | + +--- +[llama.cpp]: https://github.com/ggerganov/llama.cpp +[GGUF]: https://huggingface.co/models?library=gguf&sort=trending diff --git a/docs/source/multi_backend_support.md b/docs/source/multi_backend_support.md index c4df15bc2ca..03d6d30be55 100644 --- a/docs/source/multi_backend_support.md +++ b/docs/source/multi_backend_support.md @@ -11,3 +11,5 @@ TGI remains consistent across backends, allowing you to switch between them seam * **[TGI TRTLLM backend](./backends/trtllm)**: This backend leverages NVIDIA's TensorRT library to accelerate LLM inference. It utilizes specialized optimizations and custom kernels for enhanced performance. However, it requires a model-specific compilation step for each GPU architecture. +* **[TGI Llamacpp backend](./backends/llamacpp)**: This backend facilitates the deployment of large language models + (LLMs) by integrating [llama.cpp][llama.cpp], an advanced inference engine optimized for both CPU and GPU computation.