Skip to content

Commit 76516da

Browse files
committed
Update Dockerfile_llamacpp
Signed-off-by: Adrien Gallouët <[email protected]>
1 parent 1ee3ea7 commit 76516da

File tree

1 file changed

+38
-47
lines changed

1 file changed

+38
-47
lines changed

Diff for: Dockerfile_llamacpp

+38-47
Original file line numberDiff line numberDiff line change
@@ -1,41 +1,27 @@
1-
ARG llama_version=b4623
2-
ARG llama_hardware_target=cpu
1+
FROM nvidia/cuda:12.6.3-cudnn-devel-ubuntu24.04 AS deps
2+
3+
ARG llama_version=b4628
34
ARG llama_cuda_arch=75-real;80-real;86-real;89-real;90-real
45

5-
FROM nvidia/cuda:12.6.3-cudnn-devel-ubuntu24.04 AS base
6+
WORKDIR /opt/src
67

78
ENV DEBIAN_FRONTEND=noninteractive
89
RUN apt update && apt install -y \
9-
python3-venv \
10-
python3-pip
11-
12-
RUN python3 -m venv /venv
13-
ENV PATH="/venv/bin:$PATH"
14-
RUN pip3 install --no-cache-dir transformers
15-
16-
FROM base AS deps
17-
WORKDIR /opt/src
18-
19-
RUN apt install -y \
2010
clang \
2111
cmake \
2212
curl \
2313
git \
14+
python3-dev \
2415
libssl-dev \
2516
pkg-config \
2617
tar
2718

28-
FROM deps AS llamacpp-builder
29-
ARG llama_version
30-
ARG llama_cuda_arch
31-
ENV LLAMA_VERSION=${llama_version}
32-
33-
ADD https://github.com/ggerganov/llama.cpp/archive/refs/tags/${LLAMA_VERSION}.tar.gz /opt/src/
34-
RUN tar -xzf ${LLAMA_VERSION}.tar.gz && \
35-
cd llama.cpp-${LLAMA_VERSION} && \
36-
cmake \
37-
-B build \
38-
-DCMAKE_INSTALL_PREFIX=/usr/llama \
19+
ADD https://github.com/ggerganov/llama.cpp/archive/refs/tags/${llama_version}.tar.gz /opt/src/
20+
RUN tar -xzf ${llama_version}.tar.gz \
21+
&& cd llama.cpp-${llama_version} \
22+
&& cmake -B build \
23+
-DCMAKE_INSTALL_PREFIX=/usr \
24+
-DCMAKE_INSTALL_LIBDIR=/usr/lib \
3925
-DCMAKE_C_COMPILER=clang \
4026
-DCMAKE_CXX_COMPILER=clang++ \
4127
-DCMAKE_CUDA_ARCHITECTURES=${llama_cuda_arch} \
@@ -44,44 +30,49 @@ RUN tar -xzf ${LLAMA_VERSION}.tar.gz && \
4430
-DLLAMA_BUILD_TESTS=OFF \
4531
-DLLAMA_BUILD_EXAMPLES=OFF \
4632
-DLLAMA_BUILD_SERVER=OFF \
47-
&& cmake --build build --parallel --config Release -j \
33+
&& cmake --build build --parallel --config Release \
4834
&& cmake --install build
4935

50-
FROM deps AS rust-builder
36+
WORKDIR /app
5137
COPY rust-toolchain.toml rust-toolchain.toml
5238
RUN curl -sSf https://sh.rustup.rs | sh -s -- -y --no-modify-path --default-toolchain none
5339
ENV PATH="/root/.cargo/bin:$PATH"
40+
RUN cargo install cargo-chef --locked
5441

42+
FROM deps AS planner
5543
COPY . .
56-
COPY --from=llamacpp-builder /usr/llama/lib/ /usr/lib/
57-
COPY --from=llamacpp-builder /usr/llama/include/ /usr/include/
58-
44+
RUN cargo chef prepare --recipe-path recipe.json
5945

60-
ARG llama_hardware_target
61-
ENV TGI_LLAMA_HARDWARE_TARGET=${llama_hardware_target}
62-
RUN export TGI_LIB_SEARCH_PATH=/usr/local/cuda/lib64:/usr/local/cuda/lib64/stubs && \
63-
ln -s /usr/local/cuda/lib64/stubs/libcuda.so /usr/local/cuda/lib64/stubs/libcuda.so.1 && \
64-
cargo build \
46+
FROM deps AS builder
47+
COPY --from=planner /app/recipe.json recipe.json
48+
RUN cargo chef cook \
49+
--recipe-path recipe.json \
6550
--profile release-opt \
6651
--package text-generation-router-llamacpp
52+
COPY . .
53+
ENV TGI_LLAMA_PKG_CUDA=cuda-12.6
54+
RUN cargo build \
55+
--profile release-opt \
56+
--package text-generation-router-llamacpp --frozen
6757

68-
FROM nvidia/cuda:12.6.3-cudnn-runtime-ubuntu24.04
69-
WORKDIR /usr/bin
58+
# fix libcuda.so.1 ?
59+
RUN cp "$(pkg-config --variable=libdir cuda-12.6)"/stubs/libcuda.so /usr/lib/libcuda.so.1
7060

71-
ENV DEBIAN_FRONTEND=noninteractive
72-
ENV PATH="/venv/bin:$PATH"
61+
FROM nvidia/cuda:12.6.3-cudnn-runtime-ubuntu24.04
7362

7463
RUN apt update && apt install -y \
75-
openssl \
7664
python3-venv \
7765
python3-pip
7866

79-
RUN python3 -m venv /venv && \
80-
pip3 install --no-cache-dir -r transformers
67+
RUN python3 -m venv /venv
68+
ENV PATH="/venv/bin:$PATH"
69+
70+
COPY backends/llamacpp/requirements.txt requirements.txt
71+
RUN pip3 install --no-cache-dir -r requirements.txt
8172

82-
COPY --from=llamacpp-builder /usr/llama/lib/ /usr/lib/
83-
COPY --from=llamacpp-builder /usr/llama/include/ /usr/include/
84-
COPY --from=llamacpp-builder /usr/llama/bin/ /usr/bin/
85-
COPY --from=rust-builder /opt/src/target/release-opt/text-generation-router-llamacpp /usr/bin/text-generation-launcher
73+
COPY --from=builder /usr/lib/libllama.so /usr/lib/
74+
COPY --from=builder /usr/lib/libggml*.so /usr/lib/
75+
COPY --from=builder /usr/lib/libcuda.so.1 /usr/lib/
76+
COPY --from=builder /app/target/release-opt/text-generation-router-llamacpp /usr/bin/
8677

87-
ENTRYPOINT ["text-generation-launcher"]
78+
#ENTRYPOINT ["text-generation-router-llamacpp"]

0 commit comments

Comments
 (0)