diff --git a/Dockerfile b/Dockerfile index 97d3c4e9..2bd8e491 100644 --- a/Dockerfile +++ b/Dockerfile @@ -110,14 +110,5 @@ FROM base AS http COPY --from=http-builder /usr/src/target/release/text-embeddings-router /usr/local/bin/text-embeddings-router -# Amazon SageMaker compatible image -FROM http AS sagemaker -COPY --chmod=775 sagemaker-entrypoint.sh entrypoint.sh - -ENTRYPOINT ["./entrypoint.sh"] - -# Default image -FROM http - ENTRYPOINT ["text-embeddings-router"] CMD ["--json-output"] diff --git a/Dockerfile-cuda-all b/Dockerfile-cuda-all index 5dca432a..c1e9e2ec 100644 --- a/Dockerfile-cuda-all +++ b/Dockerfile-cuda-all @@ -35,7 +35,6 @@ FROM base-builder AS builder ARG GIT_SHA ARG DOCKER_LABEL -ARG VERTEX="false" # sccache specific variables ARG SCCACHE_GHA_ENABLED @@ -51,39 +50,19 @@ COPY --from=planner /usr/src/recipe.json recipe.json RUN --mount=type=secret,id=actions_results_url,env=ACTIONS_RESULTS_URL \ --mount=type=secret,id=actions_runtime_token,env=ACTIONS_RUNTIME_TOKEN \ - if [ $VERTEX = "true" ]; \ - then \ - cargo chef cook --release --features google --recipe-path recipe.json && sccache -s; \ - else \ - cargo chef cook --release --recipe-path recipe.json && sccache -s; \ - fi; + cargo chef cook --release --recipe-path recipe.json && sccache -s; RUN --mount=type=secret,id=actions_results_url,env=ACTIONS_RESULTS_URL \ --mount=type=secret,id=actions_runtime_token,env=ACTIONS_RUNTIME_TOKEN \ - if [ $VERTEX = "true" ]; \ - then \ - CUDA_COMPUTE_CAP=75 cargo chef cook --release --features google --features candle-cuda-turing --recipe-path recipe.json && sccache -s; \ - else \ - CUDA_COMPUTE_CAP=75 cargo chef cook --release --features candle-cuda-turing --recipe-path recipe.json && sccache -s; \ - fi; + CUDA_COMPUTE_CAP=75 cargo chef cook --release --features candle-cuda-turing --recipe-path recipe.json && sccache -s; RUN --mount=type=secret,id=actions_results_url,env=ACTIONS_RESULTS_URL \ --mount=type=secret,id=actions_runtime_token,env=ACTIONS_RUNTIME_TOKEN \ - if [ $VERTEX = "true" ]; \ - then \ - CUDA_COMPUTE_CAP=80 cargo chef cook --release --features google --features candle-cuda --recipe-path recipe.json && sccache -s; \ - else \ - CUDA_COMPUTE_CAP=80 cargo chef cook --release --features candle-cuda --recipe-path recipe.json && sccache -s; \ - fi; + CUDA_COMPUTE_CAP=80 cargo chef cook --release --features candle-cuda --recipe-path recipe.json && sccache -s; RUN --mount=type=secret,id=actions_results_url,env=ACTIONS_RESULTS_URL \ --mount=type=secret,id=actions_runtime_token,env=ACTIONS_RUNTIME_TOKEN \ - if [ $VERTEX = "true" ]; \ - then \ - CUDA_COMPUTE_CAP=90 cargo chef cook --release --features google --features candle-cuda --recipe-path recipe.json && sccache -s; \ - else \ - CUDA_COMPUTE_CAP=90 cargo chef cook --release --features candle-cuda --recipe-path recipe.json && sccache -s; \ - fi; + CUDA_COMPUTE_CAP=90 cargo chef cook --release --features candle-cuda --recipe-path recipe.json && sccache -s; COPY backends backends COPY core core @@ -93,34 +72,19 @@ COPY Cargo.lock ./ RUN --mount=type=secret,id=actions_results_url,env=ACTIONS_RESULTS_URL \ --mount=type=secret,id=actions_runtime_token,env=ACTIONS_RUNTIME_TOKEN \ - if [ $VERTEX = "true" ]; \ - then \ - CUDA_COMPUTE_CAP=75 cargo build --release --bin text-embeddings-router -F candle-cuda-turing -F google && sccache -s; \ - else \ - CUDA_COMPUTE_CAP=75 cargo build --release --bin text-embeddings-router -F candle-cuda-turing && sccache -s; \ - fi; + CUDA_COMPUTE_CAP=75 cargo build --release --bin text-embeddings-router -F candle-cuda-turing && sccache -s; RUN mv /usr/src/target/release/text-embeddings-router /usr/src/target/release/text-embeddings-router-75 RUN --mount=type=secret,id=actions_results_url,env=ACTIONS_RESULTS_URL \ --mount=type=secret,id=actions_runtime_token,env=ACTIONS_RUNTIME_TOKEN \ - if [ $VERTEX = "true" ]; \ - then \ - CUDA_COMPUTE_CAP=80 cargo build --release --bin text-embeddings-router -F candle-cuda -F google && sccache -s; \ - else \ - CUDA_COMPUTE_CAP=80 cargo build --release --bin text-embeddings-router -F candle-cuda && sccache -s; \ - fi; + CUDA_COMPUTE_CAP=80 cargo build --release --bin text-embeddings-router -F candle-cuda && sccache -s; RUN mv /usr/src/target/release/text-embeddings-router /usr/src/target/release/text-embeddings-router-80 RUN --mount=type=secret,id=actions_results_url,env=ACTIONS_RESULTS_URL \ --mount=type=secret,id=actions_runtime_token,env=ACTIONS_RUNTIME_TOKEN \ - if [ $VERTEX = "true" ]; \ - then \ - CUDA_COMPUTE_CAP=90 cargo build --release --bin text-embeddings-router -F candle-cuda -F google && sccache -s; \ - else \ - CUDA_COMPUTE_CAP=90 cargo build --release --bin text-embeddings-router -F candle-cuda && sccache -s; \ - fi; + CUDA_COMPUTE_CAP=90 cargo build --release --bin text-embeddings-router -F candle-cuda && sccache -s; RUN mv /usr/src/target/release/text-embeddings-router /usr/src/target/release/text-embeddings-router-90 @@ -142,16 +106,6 @@ COPY --from=builder /usr/src/target/release/text-embeddings-router-75 /usr/local COPY --from=builder /usr/src/target/release/text-embeddings-router-80 /usr/local/bin/text-embeddings-router-80 COPY --from=builder /usr/src/target/release/text-embeddings-router-90 /usr/local/bin/text-embeddings-router-90 -# Amazon SageMaker compatible image -FROM base AS sagemaker - -COPY --chmod=775 sagemaker-entrypoint-cuda-all.sh entrypoint.sh - -ENTRYPOINT ["./entrypoint.sh"] - -# Default image -FROM base - COPY --chmod=775 cuda-all-entrypoint.sh entrypoint.sh ENTRYPOINT ["./entrypoint.sh"] diff --git a/cuda-all-entrypoint.sh b/cuda-all-entrypoint.sh index d9be21ea..6f1c909b 100644 --- a/cuda-all-entrypoint.sh +++ b/cuda-all-entrypoint.sh @@ -1,21 +1,19 @@ #!/bin/bash -if ! command -v nvidia-smi &> /dev/null; then +if ! command -v nvidia-smi &>/dev/null; then echo "Error: 'nvidia-smi' command not found." exit 1 fi compute_cap=$(nvidia-smi --query-gpu=compute_cap --format=csv | sed -n '2p' | sed 's/\.//g') -if [ ${compute_cap} -eq 75 ] -then +if [ ${compute_cap} -eq 75 ]; then exec text-embeddings-router-75 "$@" -elif [ ${compute_cap} -ge 80 -a ${compute_cap} -lt 90 ] -then +elif [ ${compute_cap} -ge 80 -a ${compute_cap} -lt 90 ]; then exec text-embeddings-router-80 "$@" -elif [ ${compute_cap} -eq 90 ] -then +elif [ ${compute_cap} -eq 90 ]; then exec text-embeddings-router-90 "$@" else - echo "cuda compute cap ${compute_cap} is not supported"; exit 1 + echo "cuda compute cap ${compute_cap} is not supported" + exit 1 fi diff --git a/sagemaker-entrypoint-cuda-all.sh b/sagemaker-entrypoint-cuda-all.sh index a3c63cbb..0d89ce28 100644 --- a/sagemaker-entrypoint-cuda-all.sh +++ b/sagemaker-entrypoint-cuda-all.sh @@ -1,14 +1,21 @@ #!/bin/bash +if ! command -v nvidia-smi &>/dev/null; then + echo "Error: 'nvidia-smi' command not found." + exit 1 +fi + +# Function to compare version numbers verlte() { [ "$1" = "$2" ] && return 1 || [ "$2" = "$(echo -e "$1\n$2" | sort -V | head -n1)" ] } +# CUDA compat libs logic if [ -f /usr/local/cuda/compat/libcuda.so.1 ]; then CUDA_COMPAT_MAX_DRIVER_VERSION=$(readlink /usr/local/cuda/compat/libcuda.so.1 | cut -d"." -f 3-) echo "CUDA compat package requires Nvidia driver ≤${CUDA_COMPAT_MAX_DRIVER_VERSION}" cat /proc/driver/nvidia/version - NVIDIA_DRIVER_VERSION=$(sed -n 's/^NVRM.*Kernel Module *\([0-9.]*\).*$/\1/p' /proc/driver/nvidia/version 2>/dev/null || true) + NVIDIA_DRIVER_VERSION=$(sed -n 's/^NVRM.*Kernel Module \([0-9.]*\).*$/\1/p' /proc/driver/nvidia/version 2>/dev/null || true) echo "Current installed Nvidia driver version is ${NVIDIA_DRIVER_VERSION}" if [ $(verlte "$CUDA_COMPAT_MAX_DRIVER_VERSION" "$NVIDIA_DRIVER_VERSION") ]; then echo "Setup CUDA compatibility libs path to LD_LIBRARY_PATH" @@ -21,71 +28,27 @@ else echo "Skip CUDA compat libs setup as package not found" fi +# Model variables check if [[ -z "${HF_MODEL_ID}" ]]; then - echo "HF_MODEL_ID must be set" - exit 1 + echo "HF_MODEL_ID must be set" + exit 1 fi export MODEL_ID="${HF_MODEL_ID}" if [[ -n "${HF_MODEL_REVISION}" ]]; then - export REVISION="${HF_MODEL_REVISION}" -fi - -if ! command -v nvidia-smi &> /dev/null; then - echo "Error: 'nvidia-smi' command not found." - exit 1 -fi - -# Query GPU name using nvidia-smi -gpu_name=$(nvidia-smi --query-gpu=gpu_name --format=csv | awk 'NR==2') -if [ $? -ne 0 ]; then - echo "Error: $gpu_name" - echo "Query gpu_name failed" -else - echo "Query gpu_name succeeded. Printing output: $gpu_name" + export REVISION="${HF_MODEL_REVISION}" fi -# Function to get compute capability based on GPU name -get_compute_cap() { - gpu_name="$1" - - # Check if the GPU name contains "A10G" - if [[ "$gpu_name" == *"A10G"* ]]; then - echo "86" - # Check if the GPU name contains "A100" - elif [[ "$gpu_name" == *"A100"* ]]; then - echo "80" - # Check if the GPU name contains "H100" - elif [[ "$gpu_name" == *"H100"* ]]; then - echo "90" - # Cover Nvidia T4 - elif [[ "$gpu_name" == *"T4"* ]]; then - echo "75" - # Cover Nvidia L4 - elif [[ "$gpu_name" == *"L4"* ]]; then - echo "89" - else - echo "80" # Default compute capability - fi -} - -if [[ -z "${CUDA_COMPUTE_CAP}" ]] -then - compute_cap=$(get_compute_cap "$gpu_name") - echo "the compute_cap is $compute_cap" -else - compute_cap=$CUDA_COMPUTE_CAP -fi +compute_cap=$(nvidia-smi --query-gpu=compute_cap --format=csv | sed -n '2p' | sed 's/\.//g') -if [[ ${compute_cap} -eq 75 ]] -then - text-embeddings-router-75 --port 8080 --json-output -elif [[ ${compute_cap} -ge 80 && ${compute_cap} -lt 90 ]] -then - text-embeddings-router-80 --port 8080 --json-output -elif [[ ${compute_cap} -eq 90 ]] -then - text-embeddings-router-90 --port 8080 --json-output +# Router selection logic +if [ ${compute_cap} -eq 75 ]; then + exec text-embeddings-router-75 --port 8080 --json-output +elif [ ${compute_cap} -ge 80 -a ${compute_cap} -lt 90 ]; then + exec text-embeddings-router-80 --port 8080 --json-output +elif [ ${compute_cap} -eq 90 ]; then + exec text-embeddings-router-90 --port 8080 --json-output else - echo "cuda compute cap ${compute_cap} is not supported"; exit 1 + echo "cuda compute cap ${compute_cap} is not supported" + exit 1 fi diff --git a/sagemaker-entrypoint.sh b/sagemaker-entrypoint.sh index b6cec7bb..1f3a4a9d 100644 --- a/sagemaker-entrypoint.sh +++ b/sagemaker-entrypoint.sh @@ -1,13 +1,13 @@ #!/bin/bash if [[ -z "${HF_MODEL_ID}" ]]; then - echo "HF_MODEL_ID must be set" - exit 1 + echo "HF_MODEL_ID must be set" + exit 1 fi export MODEL_ID="${HF_MODEL_ID}" if [[ -n "${HF_MODEL_REVISION}" ]]; then - export REVISION="${HF_MODEL_REVISION}" + export REVISION="${HF_MODEL_REVISION}" fi -text-embeddings-router --port 8080 --json-output +exec text-embeddings-router --port 8080 --json-output