1
- ARG llama_version=b4623
2
- ARG llama_hardware_target=cpu
1
+ FROM nvidia/cuda:12.6.3-cudnn-devel-ubuntu24.04 AS deps
2
+
3
+ ARG llama_version=b4628
3
4
ARG llama_cuda_arch=75-real;80-real;86-real;89-real;90-real
4
5
5
- FROM nvidia/cuda:12.6.3-cudnn-devel-ubuntu24.04 AS base
6
+ WORKDIR /opt/src
6
7
7
8
ENV DEBIAN_FRONTEND=noninteractive
8
9
RUN apt update && apt install -y \
9
- python3-venv \
10
- python3-pip
11
-
12
- RUN python3 -m venv /venv
13
- ENV PATH="/venv/bin:$PATH"
14
- RUN pip3 install --no-cache-dir transformers
15
-
16
- FROM base AS deps
17
- WORKDIR /opt/src
18
-
19
- RUN apt install -y \
20
10
clang \
21
11
cmake \
22
12
curl \
23
13
git \
14
+ python3-dev \
24
15
libssl-dev \
25
16
pkg-config \
26
17
tar
27
18
28
- FROM deps AS llamacpp-builder
29
- ARG llama_version
30
- ARG llama_cuda_arch
31
- ENV LLAMA_VERSION=${llama_version}
32
-
33
- ADD https://github.com/ggerganov/llama.cpp/archive/refs/tags/${LLAMA_VERSION}.tar.gz /opt/src/
34
- RUN tar -xzf ${LLAMA_VERSION}.tar.gz && \
35
- cd llama.cpp-${LLAMA_VERSION} && \
36
- cmake \
37
- -B build \
38
- -DCMAKE_INSTALL_PREFIX=/usr/llama \
19
+ ADD https://github.com/ggerganov/llama.cpp/archive/refs/tags/${llama_version}.tar.gz /opt/src/
20
+ RUN tar -xzf ${llama_version}.tar.gz \
21
+ && cd llama.cpp-${llama_version} \
22
+ && cmake -B build \
23
+ -DCMAKE_INSTALL_PREFIX=/usr \
24
+ -DCMAKE_INSTALL_LIBDIR=/usr/lib \
39
25
-DCMAKE_C_COMPILER=clang \
40
26
-DCMAKE_CXX_COMPILER=clang++ \
41
27
-DCMAKE_CUDA_ARCHITECTURES=${llama_cuda_arch} \
@@ -44,44 +30,49 @@ RUN tar -xzf ${LLAMA_VERSION}.tar.gz && \
44
30
-DLLAMA_BUILD_TESTS=OFF \
45
31
-DLLAMA_BUILD_EXAMPLES=OFF \
46
32
-DLLAMA_BUILD_SERVER=OFF \
47
- && cmake --build build --parallel --config Release -j \
33
+ && cmake --build build --parallel --config Release \
48
34
&& cmake --install build
49
35
50
- FROM deps AS rust-builder
36
+ WORKDIR /app
51
37
COPY rust-toolchain.toml rust-toolchain.toml
52
38
RUN curl -sSf https://sh.rustup.rs | sh -s -- -y --no-modify-path --default-toolchain none
53
39
ENV PATH="/root/.cargo/bin:$PATH"
40
+ RUN cargo install cargo-chef --locked
54
41
42
+ FROM deps AS planner
55
43
COPY . .
56
- COPY --from=llamacpp-builder /usr/llama/lib/ /usr/lib/
57
- COPY --from=llamacpp-builder /usr/llama/include/ /usr/include/
58
-
44
+ RUN cargo chef prepare --recipe-path recipe.json
59
45
60
- ARG llama_hardware_target
61
- ENV TGI_LLAMA_HARDWARE_TARGET=${llama_hardware_target}
62
- RUN export TGI_LIB_SEARCH_PATH=/usr/local/cuda/lib64:/usr/local/cuda/lib64/stubs && \
63
- ln -s /usr/local/cuda/lib64/stubs/libcuda.so /usr/local/cuda/lib64/stubs/libcuda.so.1 && \
64
- cargo build \
46
+ FROM deps AS builder
47
+ COPY --from=planner /app/recipe.json recipe.json
48
+ RUN cargo chef cook \
49
+ --recipe-path recipe.json \
65
50
--profile release-opt \
66
51
--package text-generation-router-llamacpp
52
+ COPY . .
53
+ ENV TGI_LLAMA_PKG_CUDA=cuda-12.6
54
+ RUN cargo build \
55
+ --profile release-opt \
56
+ --package text-generation-router-llamacpp --frozen
67
57
68
- FROM nvidia/cuda:12.6.3-cudnn-runtime-ubuntu24.04
69
- WORKDIR / usr/bin
58
+ # fix libcuda.so.1 ?
59
+ RUN cp "$(pkg-config --variable=libdir cuda-12.6)"/stubs/libcuda.so / usr/lib/libcuda.so.1
70
60
71
- ENV DEBIAN_FRONTEND=noninteractive
72
- ENV PATH="/venv/bin:$PATH"
61
+ FROM nvidia/cuda:12.6.3-cudnn-runtime-ubuntu24.04
73
62
74
63
RUN apt update && apt install -y \
75
- openssl \
76
64
python3-venv \
77
65
python3-pip
78
66
79
- RUN python3 -m venv /venv && \
80
- pip3 install --no-cache-dir -r transformers
67
+ RUN python3 -m venv /venv
68
+ ENV PATH="/venv/bin:$PATH"
69
+
70
+ COPY backends/llamacpp/requirements.txt requirements.txt
71
+ RUN pip3 install --no-cache-dir -r requirements.txt
81
72
82
- COPY --from=llamacpp- builder /usr/llama/ lib/ /usr/lib/
83
- COPY --from=llamacpp- builder /usr/llama/include/ /usr/include /
84
- COPY --from=llamacpp- builder /usr/llama/bin/ /usr/bin /
85
- COPY --from=rust- builder /opt/src/ target/release-opt/text-generation-router-llamacpp /usr/bin/text-generation-launcher
73
+ COPY --from=builder /usr/lib/libllama.so /usr/lib/
74
+ COPY --from=builder /usr/lib/libggml*.so /usr/lib /
75
+ COPY --from=builder /usr/lib/libcuda.so.1 /usr/lib /
76
+ COPY --from=builder /app/ target/release-opt/text-generation-router-llamacpp /usr/bin/
86
77
87
- ENTRYPOINT ["text-generation-launcher "]
78
+ # ENTRYPOINT ["text-generation-router-llamacpp "]
0 commit comments