Skip to content

Fix sagemaker-entrypoint* & remove SageMaker and Vertex from Dockerfile* #699

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 5 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 0 additions & 9 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -110,14 +110,5 @@ FROM base AS http

COPY --from=http-builder /usr/src/target/release/text-embeddings-router /usr/local/bin/text-embeddings-router

# Amazon SageMaker compatible image
FROM http AS sagemaker
COPY --chmod=775 sagemaker-entrypoint.sh entrypoint.sh

ENTRYPOINT ["./entrypoint.sh"]

# Default image
FROM http

ENTRYPOINT ["text-embeddings-router"]
CMD ["--json-output"]
60 changes: 7 additions & 53 deletions Dockerfile-cuda-all
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,6 @@ FROM base-builder AS builder

ARG GIT_SHA
ARG DOCKER_LABEL
ARG VERTEX="false"

# sccache specific variables
ARG SCCACHE_GHA_ENABLED
Expand All @@ -51,39 +50,19 @@ COPY --from=planner /usr/src/recipe.json recipe.json

RUN --mount=type=secret,id=actions_results_url,env=ACTIONS_RESULTS_URL \
--mount=type=secret,id=actions_runtime_token,env=ACTIONS_RUNTIME_TOKEN \
if [ $VERTEX = "true" ]; \
then \
cargo chef cook --release --features google --recipe-path recipe.json && sccache -s; \
else \
cargo chef cook --release --recipe-path recipe.json && sccache -s; \
fi;
cargo chef cook --release --recipe-path recipe.json && sccache -s;

RUN --mount=type=secret,id=actions_results_url,env=ACTIONS_RESULTS_URL \
--mount=type=secret,id=actions_runtime_token,env=ACTIONS_RUNTIME_TOKEN \
if [ $VERTEX = "true" ]; \
then \
CUDA_COMPUTE_CAP=75 cargo chef cook --release --features google --features candle-cuda-turing --recipe-path recipe.json && sccache -s; \
else \
CUDA_COMPUTE_CAP=75 cargo chef cook --release --features candle-cuda-turing --recipe-path recipe.json && sccache -s; \
fi;
CUDA_COMPUTE_CAP=75 cargo chef cook --release --features candle-cuda-turing --recipe-path recipe.json && sccache -s;

RUN --mount=type=secret,id=actions_results_url,env=ACTIONS_RESULTS_URL \
--mount=type=secret,id=actions_runtime_token,env=ACTIONS_RUNTIME_TOKEN \
if [ $VERTEX = "true" ]; \
then \
CUDA_COMPUTE_CAP=80 cargo chef cook --release --features google --features candle-cuda --recipe-path recipe.json && sccache -s; \
else \
CUDA_COMPUTE_CAP=80 cargo chef cook --release --features candle-cuda --recipe-path recipe.json && sccache -s; \
fi;
CUDA_COMPUTE_CAP=80 cargo chef cook --release --features candle-cuda --recipe-path recipe.json && sccache -s;

RUN --mount=type=secret,id=actions_results_url,env=ACTIONS_RESULTS_URL \
--mount=type=secret,id=actions_runtime_token,env=ACTIONS_RUNTIME_TOKEN \
if [ $VERTEX = "true" ]; \
then \
CUDA_COMPUTE_CAP=90 cargo chef cook --release --features google --features candle-cuda --recipe-path recipe.json && sccache -s; \
else \
CUDA_COMPUTE_CAP=90 cargo chef cook --release --features candle-cuda --recipe-path recipe.json && sccache -s; \
fi;
CUDA_COMPUTE_CAP=90 cargo chef cook --release --features candle-cuda --recipe-path recipe.json && sccache -s;

COPY backends backends
COPY core core
Expand All @@ -93,34 +72,19 @@ COPY Cargo.lock ./

RUN --mount=type=secret,id=actions_results_url,env=ACTIONS_RESULTS_URL \
--mount=type=secret,id=actions_runtime_token,env=ACTIONS_RUNTIME_TOKEN \
if [ $VERTEX = "true" ]; \
then \
CUDA_COMPUTE_CAP=75 cargo build --release --bin text-embeddings-router -F candle-cuda-turing -F google && sccache -s; \
else \
CUDA_COMPUTE_CAP=75 cargo build --release --bin text-embeddings-router -F candle-cuda-turing && sccache -s; \
fi;
CUDA_COMPUTE_CAP=75 cargo build --release --bin text-embeddings-router -F candle-cuda-turing && sccache -s;

RUN mv /usr/src/target/release/text-embeddings-router /usr/src/target/release/text-embeddings-router-75

RUN --mount=type=secret,id=actions_results_url,env=ACTIONS_RESULTS_URL \
--mount=type=secret,id=actions_runtime_token,env=ACTIONS_RUNTIME_TOKEN \
if [ $VERTEX = "true" ]; \
then \
CUDA_COMPUTE_CAP=80 cargo build --release --bin text-embeddings-router -F candle-cuda -F google && sccache -s; \
else \
CUDA_COMPUTE_CAP=80 cargo build --release --bin text-embeddings-router -F candle-cuda && sccache -s; \
fi;
CUDA_COMPUTE_CAP=80 cargo build --release --bin text-embeddings-router -F candle-cuda && sccache -s;

RUN mv /usr/src/target/release/text-embeddings-router /usr/src/target/release/text-embeddings-router-80

RUN --mount=type=secret,id=actions_results_url,env=ACTIONS_RESULTS_URL \
--mount=type=secret,id=actions_runtime_token,env=ACTIONS_RUNTIME_TOKEN \
if [ $VERTEX = "true" ]; \
then \
CUDA_COMPUTE_CAP=90 cargo build --release --bin text-embeddings-router -F candle-cuda -F google && sccache -s; \
else \
CUDA_COMPUTE_CAP=90 cargo build --release --bin text-embeddings-router -F candle-cuda && sccache -s; \
fi;
CUDA_COMPUTE_CAP=90 cargo build --release --bin text-embeddings-router -F candle-cuda && sccache -s;

RUN mv /usr/src/target/release/text-embeddings-router /usr/src/target/release/text-embeddings-router-90

Expand All @@ -142,16 +106,6 @@ COPY --from=builder /usr/src/target/release/text-embeddings-router-75 /usr/local
COPY --from=builder /usr/src/target/release/text-embeddings-router-80 /usr/local/bin/text-embeddings-router-80
COPY --from=builder /usr/src/target/release/text-embeddings-router-90 /usr/local/bin/text-embeddings-router-90

# Amazon SageMaker compatible image
FROM base AS sagemaker

COPY --chmod=775 sagemaker-entrypoint-cuda-all.sh entrypoint.sh

ENTRYPOINT ["./entrypoint.sh"]

# Default image
FROM base

COPY --chmod=775 cuda-all-entrypoint.sh entrypoint.sh

ENTRYPOINT ["./entrypoint.sh"]
Expand Down
14 changes: 6 additions & 8 deletions cuda-all-entrypoint.sh
Original file line number Diff line number Diff line change
@@ -1,21 +1,19 @@
#!/bin/bash

if ! command -v nvidia-smi &> /dev/null; then
if ! command -v nvidia-smi &>/dev/null; then
echo "Error: 'nvidia-smi' command not found."
exit 1
fi

compute_cap=$(nvidia-smi --query-gpu=compute_cap --format=csv | sed -n '2p' | sed 's/\.//g')

if [ ${compute_cap} -eq 75 ]
then
if [ ${compute_cap} -eq 75 ]; then
exec text-embeddings-router-75 "$@"
elif [ ${compute_cap} -ge 80 -a ${compute_cap} -lt 90 ]
then
elif [ ${compute_cap} -ge 80 -a ${compute_cap} -lt 90 ]; then
exec text-embeddings-router-80 "$@"
elif [ ${compute_cap} -eq 90 ]
then
elif [ ${compute_cap} -eq 90 ]; then
exec text-embeddings-router-90 "$@"
else
echo "cuda compute cap ${compute_cap} is not supported"; exit 1
echo "cuda compute cap ${compute_cap} is not supported"
exit 1
fi
81 changes: 22 additions & 59 deletions sagemaker-entrypoint-cuda-all.sh
Original file line number Diff line number Diff line change
@@ -1,14 +1,21 @@
#!/bin/bash

if ! command -v nvidia-smi &>/dev/null; then
echo "Error: 'nvidia-smi' command not found."
exit 1
fi

# Function to compare version numbers
verlte() {
[ "$1" = "$2" ] && return 1 || [ "$2" = "$(echo -e "$1\n$2" | sort -V | head -n1)" ]
}

# CUDA compat libs logic
if [ -f /usr/local/cuda/compat/libcuda.so.1 ]; then
CUDA_COMPAT_MAX_DRIVER_VERSION=$(readlink /usr/local/cuda/compat/libcuda.so.1 | cut -d"." -f 3-)
echo "CUDA compat package requires Nvidia driver ≤${CUDA_COMPAT_MAX_DRIVER_VERSION}"
cat /proc/driver/nvidia/version
NVIDIA_DRIVER_VERSION=$(sed -n 's/^NVRM.*Kernel Module *\([0-9.]*\).*$/\1/p' /proc/driver/nvidia/version 2>/dev/null || true)
NVIDIA_DRIVER_VERSION=$(sed -n 's/^NVRM.*Kernel Module \([0-9.]*\).*$/\1/p' /proc/driver/nvidia/version 2>/dev/null || true)
echo "Current installed Nvidia driver version is ${NVIDIA_DRIVER_VERSION}"
if [ $(verlte "$CUDA_COMPAT_MAX_DRIVER_VERSION" "$NVIDIA_DRIVER_VERSION") ]; then
echo "Setup CUDA compatibility libs path to LD_LIBRARY_PATH"
Expand All @@ -21,71 +28,27 @@ else
echo "Skip CUDA compat libs setup as package not found"
fi

# Model variables check
if [[ -z "${HF_MODEL_ID}" ]]; then
echo "HF_MODEL_ID must be set"
exit 1
echo "HF_MODEL_ID must be set"
exit 1
fi
export MODEL_ID="${HF_MODEL_ID}"

if [[ -n "${HF_MODEL_REVISION}" ]]; then
export REVISION="${HF_MODEL_REVISION}"
fi

if ! command -v nvidia-smi &> /dev/null; then
echo "Error: 'nvidia-smi' command not found."
exit 1
fi

# Query GPU name using nvidia-smi
gpu_name=$(nvidia-smi --query-gpu=gpu_name --format=csv | awk 'NR==2')
if [ $? -ne 0 ]; then
echo "Error: $gpu_name"
echo "Query gpu_name failed"
else
echo "Query gpu_name succeeded. Printing output: $gpu_name"
export REVISION="${HF_MODEL_REVISION}"
fi

# Function to get compute capability based on GPU name
get_compute_cap() {
gpu_name="$1"

# Check if the GPU name contains "A10G"
if [[ "$gpu_name" == *"A10G"* ]]; then
echo "86"
# Check if the GPU name contains "A100"
elif [[ "$gpu_name" == *"A100"* ]]; then
echo "80"
# Check if the GPU name contains "H100"
elif [[ "$gpu_name" == *"H100"* ]]; then
echo "90"
# Cover Nvidia T4
elif [[ "$gpu_name" == *"T4"* ]]; then
echo "75"
# Cover Nvidia L4
elif [[ "$gpu_name" == *"L4"* ]]; then
echo "89"
else
echo "80" # Default compute capability
fi
}

if [[ -z "${CUDA_COMPUTE_CAP}" ]]
then
compute_cap=$(get_compute_cap "$gpu_name")
echo "the compute_cap is $compute_cap"
else
compute_cap=$CUDA_COMPUTE_CAP
fi
compute_cap=$(nvidia-smi --query-gpu=compute_cap --format=csv | sed -n '2p' | sed 's/\.//g')

if [[ ${compute_cap} -eq 75 ]]
then
text-embeddings-router-75 --port 8080 --json-output
elif [[ ${compute_cap} -ge 80 && ${compute_cap} -lt 90 ]]
then
text-embeddings-router-80 --port 8080 --json-output
elif [[ ${compute_cap} -eq 90 ]]
then
text-embeddings-router-90 --port 8080 --json-output
# Router selection logic
if [ ${compute_cap} -eq 75 ]; then
exec text-embeddings-router-75 --port 8080 --json-output
elif [ ${compute_cap} -ge 80 -a ${compute_cap} -lt 90 ]; then
exec text-embeddings-router-80 --port 8080 --json-output
elif [ ${compute_cap} -eq 90 ]; then
exec text-embeddings-router-90 --port 8080 --json-output
else
echo "cuda compute cap ${compute_cap} is not supported"; exit 1
echo "cuda compute cap ${compute_cap} is not supported"
exit 1
fi
8 changes: 4 additions & 4 deletions sagemaker-entrypoint.sh
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
#!/bin/bash

if [[ -z "${HF_MODEL_ID}" ]]; then
echo "HF_MODEL_ID must be set"
exit 1
echo "HF_MODEL_ID must be set"
exit 1
fi
export MODEL_ID="${HF_MODEL_ID}"

if [[ -n "${HF_MODEL_REVISION}" ]]; then
export REVISION="${HF_MODEL_REVISION}"
export REVISION="${HF_MODEL_REVISION}"
fi

text-embeddings-router --port 8080 --json-output
exec text-embeddings-router --port 8080 --json-output
Loading