diff --git a/.github/workflows/container-validation-backends.yml b/.github/workflows/container-validation-backends.yml new file mode 100644 index 0000000000..ed8d8e3d84 --- /dev/null +++ b/.github/workflows/container-validation-backends.yml @@ -0,0 +1,70 @@ +# SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +name: NVIDIA Github Validation + +on: + push: + branches: + - main + - "pull-request/[0-9]+" + +jobs: + build-test: + runs-on: gpu-l40-runners + strategy: + matrix: + framework: [vllm] + include: + - framework: vllm + target: runtime + pytest_marks: "e2e and vllm and gpu_1 and not slow" + # Do not cancel main branch runs + concurrency: + group: ${{ matrix.framework }}-build-test-${{ github.ref_name || github.run_id }} + cancel-in-progress: ${{ github.ref_name != 'main' }} + + name: Build and Test - ${{ matrix.framework }} + env: + CONTAINER_ID: test_${{ github.run_id }}_${{ github.run_attempt }}_${{ github.job }}_${{ matrix.framework }} + PYTEST_XML_FILE: pytest_test_report.xml + FRAMEWORK: ${{ matrix.framework }} + TARGET: ${{ matrix.target }} + PYTEST_MARKS: ${{ matrix.pytest_marks }} + + steps: + - name: Checkout repository + uses: actions/checkout@v4 + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + - name: Login to NGC + if: github.event.pull_request.head.repo.full_name == github.repository || github.event_name == 'push' + run: | + echo "${{ secrets.NGC_CI_ACCESS_TOKEN }}" | docker login nvcr.io -u '$oauthtoken' --password-stdin + - name: Cleanup + if: always() + run: | + docker system prune -af + - name: Debug + run: | + lsmod | grep nvidia + sudo dmesg | grep -i nvrm || true + nvidia-smi + - name: Build image + env: + GITHUB_TOKEN: ${{ secrets.CI_TOKEN }} + AWS_DEFAULT_REGION: ${{ secrets.AWS_DEFAULT_REGION }} + SCCACHE_S3_BUCKET: ${{ secrets.SCCACHE_S3_BUCKET }} + run: | + ./container/build.sh --tag ${{ matrix.framework }}:latest \ + --target ${{ matrix.target }} \ + --framework ${{ matrix.framework }} \ + --use-sccache \ + --sccache-bucket "$SCCACHE_S3_BUCKET" \ + --sccache-region "$AWS_DEFAULT_REGION" + - name: Run pytest + run: | + docker run --rm --gpus all -w /workspace \ + --name ${{ env.CONTAINER_ID }}_pytest \ + ${{ matrix.framework }}:latest \ + bash -c "pytest -xsv --basetemp=/tmp --junitxml=${{ env.PYTEST_XML_FILE }} -m \"${{ env.PYTEST_MARKS }}\"" diff --git a/.github/workflows/build-and-test.yml b/.github/workflows/container-validation-dynamo.yml similarity index 67% rename from .github/workflows/build-and-test.yml rename to .github/workflows/container-validation-dynamo.yml index 75fe74a922..1f9d593352 100644 --- a/.github/workflows/build-and-test.yml +++ b/.github/workflows/container-validation-dynamo.yml @@ -1,19 +1,7 @@ # SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -name: NVIDIA Test Github Validation +name: NVIDIA Github Validation on: push: @@ -21,6 +9,11 @@ on: - main pull_request: +# Do not cancel main branch runs +concurrency: + group: dynamo-build-test-${{ github.ref_name || github.run_id }} + cancel-in-progress: ${{ github.ref_name != 'main' }} + jobs: build-test: runs-on: @@ -53,7 +46,14 @@ jobs: docker compose up -d nats-server etcd-server - name: Run Rust checks (block-manager + integration tests) run: | - docker run -v ${{ github.workspace }}:/workspace -w /workspace/lib/llm --name ${{ env.CONTAINER_ID }}_rust_checks ${{ steps.define_image_tag.outputs.image_tag }} bash -ec 'rustup component add rustfmt clippy && cargo fmt -- --check && cargo clippy --features block-manager --no-deps --all-targets -- -D warnings && cargo test --locked --all-targets --features=block-manager && cargo test --locked --features integration -- --nocapture' + docker run --rm -v ${{ github.workspace }}:/workspace -w /workspace/lib/llm \ + --name ${{ env.CONTAINER_ID }}_rust_checks \ + ${{ steps.define_image_tag.outputs.image_tag }} \ + bash -ec 'rustup component add rustfmt clippy && \ + cargo fmt -- --check && \ + cargo clippy --features block-manager --no-deps --all-targets -- -D warnings && \ + cargo test --locked --all-targets --features=block-manager && \ + cargo test --locked --features integration -- --nocapture' - name: Cleanup services if: always() working-directory: ./deploy @@ -63,7 +63,10 @@ jobs: env: PYTEST_MARKS: "pre_merge or mypy" run: | - docker run -v ${{ github.workspace }}:/workspace -w /workspace --name ${{ env.CONTAINER_ID }}_pytest ${{ steps.define_image_tag.outputs.image_tag }} bash -c "pytest --basetemp=/tmp --junitxml=${{ env.PYTEST_XML_FILE }} -m \"${{ env.PYTEST_MARKS }}\"" + docker run -v ${{ github.workspace }}:/workspace -w /workspace \ + --name ${{ env.CONTAINER_ID }}_pytest \ + ${{ steps.define_image_tag.outputs.image_tag }} \ + bash -c "pytest --basetemp=/tmp --junitxml=${{ env.PYTEST_XML_FILE }} -m \"${{ env.PYTEST_MARKS }}\"" - name: Copy test report from test Container if: always() run: | diff --git a/container/Dockerfile b/container/Dockerfile index cc30a18ba4..7a7b051c2f 100644 --- a/container/Dockerfile +++ b/container/Dockerfile @@ -1,14 +1,22 @@ # SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 +################################## +########## Build Arguments ######## +################################## + +# Base image configuration ARG BASE_IMAGE="nvcr.io/nvidia/cuda-dl-base" # TODO OPS-612: NCCL will hang with 25.03, so use 25.01 for now # Please check https://github.com/ai-dynamo/dynamo/pull/1065 # for details and reproducer to manually test if the image # can be updated to later versions. ARG BASE_IMAGE_TAG="25.01-cuda12.8-devel-ubuntu24.04" + +# Build configuration ARG RELEASE_BUILD=false ARG ENABLE_KVBM=false +ARG CARGO_BUILD_JOBS # Define general architecture ARGs for supporting both x86 and aarch64 builds. # ARCH: Used for package suffixes (e.g., amd64, arm64) @@ -23,6 +31,17 @@ ARG ENABLE_KVBM=false ARG ARCH=amd64 ARG ARCH_ALT=x86_64 +# SCCACHE configuration +ARG USE_SCCACHE +ARG SCCACHE_BUCKET="" +ARG SCCACHE_REGION="" + +# NIXL configuration +ARG NIXL_UCX_REF=v1.19.0 +ARG NIXL_REF=0.4.1 + +# Python configuration +ARG PYTHON_VERSION=3.12 ################################## ########## Base Image ############ @@ -30,44 +49,66 @@ ARG ARCH_ALT=x86_64 FROM ${BASE_IMAGE}:${BASE_IMAGE_TAG} AS base -# Redeclare ARCH and ARCH_ALT so they're available in this stage +# Redeclare ARGs for this stage ARG ARCH ARG ARCH_ALT -ARG CARGO_BUILD_JOBS - -ARG NIXL_UCX_REF=v1.19.0 -ARG NIXL_REF=0.4.1 - -# Environment variables for NIXL -ENV NIXL_SRC_DIR=/opt/nixl \ - NIXL_PREFIX=/opt/nvidia/nvda_nixl \ - NIXL_LIB_DIR=/opt/nvidia/nvda_nixl/lib/${ARCH_ALT}-linux-gnu \ - NIXL_PLUGIN_DIR=/opt/nvidia/nvda_nixl/lib/${ARCH_ALT}-linux-gnu/plugins +ARG PYTHON_VERSION +ARG USE_SCCACHE +ARG SCCACHE_BUCKET +ARG SCCACHE_REGION +ARG NIXL_UCX_REF +ARG NIXL_REF USER root -ARG PYTHON_VERSION=3.12 +WORKDIR /opt/dynamo + +################################## +########## Tool Installation ##### +################################## +# Install uv package manager COPY --from=ghcr.io/astral-sh/uv:latest /uv /uvx /bin/ +# Install SCCACHE if requested +COPY container/use-sccache.sh /tmp/use-sccache.sh +RUN if [ "$USE_SCCACHE" = "true" ]; then \ + /tmp/use-sccache.sh install; \ + fi + +# Set SCCACHE environment variables +ENV SCCACHE_BUCKET=${USE_SCCACHE:+${SCCACHE_BUCKET}} \ + SCCACHE_REGION=${USE_SCCACHE:+${SCCACHE_REGION}} \ + SCCACHE_S3_KEY_PREFIX=${USE_SCCACHE:+${ARCH}} \ + RUSTC_WRAPPER=${USE_SCCACHE:+sccache} \ + CMAKE_C_COMPILER_LAUNCHER=${USE_SCCACHE:+sccache} \ + CMAKE_CXX_COMPILER_LAUNCHER=${USE_SCCACHE:+sccache} \ + CMAKE_CUDA_COMPILER_LAUNCHER=${USE_SCCACHE:+sccache} + +################################## +########## Rust Setup ############ +################################## + # Rust environment setup ENV RUSTUP_HOME=/usr/local/rustup \ CARGO_HOME=/usr/local/cargo \ PATH=/usr/local/cargo/bin:$PATH \ RUST_VERSION=1.89.0 -WORKDIR /opt/dynamo - # Define Rust target based on ARCH_ALT ARG ARG RUSTARCH=${ARCH_ALT}-unknown-linux-gnu -# Install Rust using RUSTARCH derived from ARCH_ALT +# Install Rust RUN wget --tries=3 --waitretry=5 "https://static.rust-lang.org/rustup/archive/1.28.1/${RUSTARCH}/rustup-init" && \ - # TODO OPS-591: Add SHA check back based on RUSTARCH chmod +x rustup-init && \ ./rustup-init -y --no-modify-path --profile minimal --default-toolchain $RUST_VERSION --default-host ${RUSTARCH} && \ rm rustup-init && \ chmod -R a+w $RUSTUP_HOME $CARGO_HOME +################################## +########## System Dependencies ### +################################## + +# Install system packages RUN apt-get update -y \ && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \ # NIXL build dependencies @@ -96,12 +137,17 @@ RUN apt-get update -y \ && apt-get clean \ && rm -rf /var/lib/apt/lists/* -# Download external dependencies in parallel for better performance +################################## +########## External Services ##### +################################## + +# Install NATS server ENV NATS_VERSION="v2.10.28" RUN --mount=type=cache,target=/var/cache/apt \ wget --tries=3 --waitretry=5 https://github.com/nats-io/nats-server/releases/download/${NATS_VERSION}/nats-server-${NATS_VERSION}-${ARCH}.deb && \ dpkg -i nats-server-${NATS_VERSION}-${ARCH}.deb && rm nats-server-${NATS_VERSION}-${ARCH}.deb +# Install etcd ENV ETCD_VERSION="v3.5.21" RUN wget --tries=3 --waitretry=5 https://github.com/etcd-io/etcd/releases/download/$ETCD_VERSION/etcd-$ETCD_VERSION-linux-${ARCH}.tar.gz -O /tmp/etcd.tar.gz && \ mkdir -p /usr/local/bin/etcd && \ @@ -109,13 +155,21 @@ RUN wget --tries=3 --waitretry=5 https://github.com/etcd-io/etcd/releases/downlo rm /tmp/etcd.tar.gz ENV PATH=/usr/local/bin/etcd/:$PATH -### UCX EFA Setup ### +################################## +########## UCX Build ############# +################################## + +# Build and install UCX RUN rm -rf /opt/hpcx/ucx && \ rm -rf /usr/local/ucx && \ echo "Building UCX with reference $NIXL_UCX_REF" && \ cd /usr/local/src && \ git clone https://github.com/openucx/ucx.git && \ cd ucx && git checkout $NIXL_UCX_REF && \ + CC=${USE_SCCACHE:+sccache gcc} && \ + CXX=${USE_SCCACHE:+sccache g++} && \ + export CC=${CC} && \ + export CXX=${CXX} && \ ./autogen.sh && \ ./configure \ --prefix=/usr/local/ucx \ @@ -133,6 +187,7 @@ RUN rm -rf /opt/hpcx/ucx && \ --enable-mt && \ make -j$(nproc) && \ make -j$(nproc) install-strip && \ + /tmp/use-sccache.sh show-stats "UCX" && \ echo "/usr/local/ucx/lib" > /etc/ld.so.conf.d/ucx.conf && \ echo "/usr/local/ucx/lib/ucx" >> /etc/ld.so.conf.d/ucx.conf && \ ldconfig && \ @@ -144,8 +199,17 @@ ENV CPATH=/usr/include:$CPATH \ PATH=/usr/bin:$PATH \ PKG_CONFIG_PATH=/usr/lib/pkgconfig:$PKG_CONFIG_PATH -### NIXL SETUP ### -# Clone nixl source with shallow clone for faster download +################################## +########## NIXL Setup ############ +################################## + +# NIXL environment setup +ENV NIXL_SRC_DIR=/opt/nixl \ + NIXL_PREFIX=/opt/nvidia/nvda_nixl \ + NIXL_LIB_DIR=/opt/nvidia/nvda_nixl/lib/${ARCH_ALT}-linux-gnu \ + NIXL_PLUGIN_DIR=/opt/nvidia/nvda_nixl/lib/${ARCH_ALT}-linux-gnu/plugins + +# Build and install NIXL RUN git clone --depth 1 --branch ${NIXL_REF} "https://github.com/ai-dynamo/nixl.git" ${NIXL_SRC_DIR} && \ cd ${NIXL_SRC_DIR} && \ if [ "$ARCH" = "arm64" ]; then \ @@ -154,13 +218,13 @@ RUN git clone --depth 1 --branch ${NIXL_REF} "https://github.com/ai-dynamo/nixl. nixl_build_args=""; \ fi && \ meson setup build/ --buildtype=release --prefix=$NIXL_PREFIX $nixl_build_args && \ - ninja -C build/ -j$(nproc) && \ - ninja -C build/ install && \ + ninja -C build/ -j$(nproc) && ninja -C build/ install && \ + /tmp/use-sccache.sh show-stats "NIXL" && \ echo "$NIXL_LIB_DIR" > /etc/ld.so.conf.d/nixl.conf && \ echo "$NIXL_PLUGIN_DIR" >> /etc/ld.so.conf.d/nixl.conf && \ ldconfig -# Install NIXL Python module +# Build NIXL Python module # TODO OPS-590: Move gds_path selection based on arch into NIXL build and re-enable gds backend for arm64 RUN if [ "$ARCH" = "arm64" ]; then \ cd ${NIXL_SRC_DIR} && uv build . --out-dir /opt/dynamo/wheelhouse/nixl \ @@ -169,11 +233,15 @@ RUN if [ "$ARCH" = "arm64" ]; then \ cd ${NIXL_SRC_DIR} && uv build . --out-dir /opt/dynamo/wheelhouse/nixl; \ fi -# Create virtual environment +################################## +########## Python Environment #### +################################## + +# Create and activate virtual environment +ARG PYTHON_VERSION RUN mkdir -p /opt/dynamo/venv && \ - uv venv /opt/dynamo/venv --python 3.12 + uv venv /opt/dynamo/venv --python $PYTHON_VERSION -# Activate virtual environment ENV VIRTUAL_ENV=/opt/dynamo/venv \ PATH="/opt/dynamo/venv/bin:${PATH}" @@ -191,43 +259,58 @@ ARG ARCH_ALT FROM quay.io/pypa/manylinux_2_28_${ARCH_ALT} AS wheel_builder +# Redeclare ARGs for this stage +ARG ARCH +ARG ARCH_ALT ARG CARGO_BUILD_JOBS -# Set CARGO_BUILD_JOBS to 16 if not provided -# This is to prevent cargo from building $(nproc) jobs in parallel, -# which might exceed the number of opened files limit. -ENV CARGO_BUILD_JOBS=${CARGO_BUILD_JOBS:-16} -# Use build arg RELEASE_BUILD = true to generate wheels for Python 3.10, 3.11 and 3.12. ARG RELEASE_BUILD -# Use arg ENABLE_KVBM = true to turn on the block-manager feature ARG ENABLE_KVBM +ARG USE_SCCACHE +ARG SCCACHE_BUCKET +ARG SCCACHE_REGION WORKDIR /opt/dynamo -RUN dnf update -y \ - && dnf install -y llvm-toolset protobuf-compiler python3.12-devel \ - && dnf clean all \ - && rm -rf /var/cache/dnf - -ENV RUSTUP_HOME=/usr/local/rustup \ +# Set environment variables +ENV CARGO_BUILD_JOBS=${CARGO_BUILD_JOBS:-16} \ + RUSTUP_HOME=/usr/local/rustup \ CARGO_HOME=/usr/local/cargo \ CARGO_TARGET_DIR=/opt/dynamo/target \ VIRTUAL_ENV=/opt/dynamo/venv \ - NIXL_PREFIX=/opt/nvidia/nvda_nixl + NIXL_PREFIX=/opt/nvidia/nvda_nixl \ + PATH=/usr/local/cargo/bin:/opt/dynamo/venv/bin:$PATH + +# Install system dependencies +ARG PYTHON_VERSION +RUN dnf update -y \ + && dnf install -y llvm-toolset protobuf-compiler python${PYTHON_VERSION}-devel wget \ + && dnf clean all \ + && rm -rf /var/cache/dnf +# Copy artifacts from base stage COPY --from=base $RUSTUP_HOME $RUSTUP_HOME COPY --from=base $CARGO_HOME $CARGO_HOME COPY --from=base $NIXL_PREFIX $NIXL_PREFIX COPY --from=base $VIRTUAL_ENV $VIRTUAL_ENV -ENV PATH=$CARGO_HOME/bin:$VIRTUAL_ENV/bin:$PATH -# Copy configuration files first for better layer caching -COPY pyproject.toml README.md LICENSE Cargo.toml Cargo.lock rust-toolchain.toml hatch_build.py /opt/dynamo/ +# Install SCCACHE if requested +COPY container/use-sccache.sh /tmp/use-sccache.sh +RUN if [ "$USE_SCCACHE" = "true" ]; then \ + /tmp/use-sccache.sh install; \ + fi + +# Set SCCACHE environment variables +ENV SCCACHE_BUCKET=${USE_SCCACHE:+${SCCACHE_BUCKET}} \ + SCCACHE_REGION=${USE_SCCACHE:+${SCCACHE_REGION}} \ + SCCACHE_S3_KEY_PREFIX=${USE_SCCACHE:+${ARCH}} \ + RUSTC_WRAPPER=${USE_SCCACHE:+sccache} -# Copy source code +# Copy source code (order matters for layer caching) +COPY pyproject.toml README.md LICENSE Cargo.toml Cargo.lock rust-toolchain.toml hatch_build.py /opt/dynamo/ COPY lib/ /opt/dynamo/lib/ COPY components/ /opt/dynamo/components/ -# Build dynamo wheel +# Build wheels RUN uv build --wheel --out-dir /opt/dynamo/dist && \ cd /opt/dynamo/lib/bindings/python && \ uv pip install maturin[patchelf] && \ @@ -237,14 +320,15 @@ RUN uv build --wheel --out-dir /opt/dynamo/dist && \ maturin build --release --out /opt/dynamo/dist; \ fi && \ if [ "$RELEASE_BUILD" = "true" ]; then \ - # do not enable KVBM feature, ensure compatibility with lower glibc uv run --python 3.11 maturin build --release --out /opt/dynamo/dist && \ uv run --python 3.10 maturin build --release --out /opt/dynamo/dist; \ - fi + fi && \ + /tmp/use-sccache.sh show-stats "Dynamo" ############################################## ########## Dev entrypoint image ############## ############################################## + FROM base AS dev # Application environment variables @@ -254,16 +338,13 @@ ENV DYNAMO_HOME=/opt/dynamo \ WORKDIR /opt/dynamo +# Copy built artifacts COPY --from=wheel_builder /opt/dynamo/dist/*.whl /opt/dynamo/wheelhouse/ COPY --from=wheel_builder $CARGO_TARGET_DIR $CARGO_TARGET_DIR - -# Copy Cargo cache to avoid re-downloading dependencies COPY --from=wheel_builder $CARGO_HOME $CARGO_HOME -# Temporarily copy benchmarks folder for installation +# Install Python packages COPY benchmarks/ /opt/dynamo/benchmarks/ - -# Install all python packages RUN uv pip install \ /opt/dynamo/wheelhouse/ai_dynamo_runtime*cp312*.whl \ /opt/dynamo/wheelhouse/ai_dynamo*any.whl \ @@ -271,10 +352,10 @@ RUN uv pip install \ /opt/dynamo/benchmarks && \ rm -rf /opt/dynamo/benchmarks -# Copy launch banner +# Setup launch banner RUN --mount=type=bind,source=./container/launch_message.txt,target=/opt/dynamo/launch_message.txt \ sed '/^#\s/d' /opt/dynamo/launch_message.txt > ~/.launch_screen && \ echo "cat ~/.launch_screen" >> ~/.bashrc ENTRYPOINT ["/opt/nvidia/nvidia_entrypoint.sh"] -CMD [] +CMD [] \ No newline at end of file diff --git a/container/Dockerfile.vllm b/container/Dockerfile.vllm index 4341c63e31..8440261465 100644 --- a/container/Dockerfile.vllm +++ b/container/Dockerfile.vllm @@ -2,7 +2,7 @@ # SPDX-License-Identifier: Apache-2.0 ARG BASE_IMAGE="nvcr.io/nvidia/cuda-dl-base" -# FIXME: NCCL will hang with 25.03, so use 25.01 for now +# TODO OPS-612: NCCL will hang with 25.03, so use 25.01 for now # Please check https://github.com/ai-dynamo/dynamo/pull/1065 # for details and reproducer to manually test if the image # can be updated to later versions. @@ -16,6 +16,11 @@ ARG RUNTIME_IMAGE_TAG="12.8.1-runtime-ubuntu24.04" ARG VLLM_REF="1da94e673c257373280026f75ceb4effac80e892" # from v0.10.1.1 ARG TORCH_BACKEND="cu128" +# sccache configuration - inherit from base build +ARG USE_SCCACHE +ARG SCCACHE_BUCKET="" +ARG SCCACHE_REGION="" + # Match 0.10.1.1 vLLM release # https://github.com/vllm-project/vllm/releases/tag/v0.10.1.1 # Pinned to commit before https://github.com/deepseek-ai/DeepGEMM/pull/112 for DeepGEMM which seems to break on H100: @@ -37,187 +42,172 @@ ARG FLASHINF_REF="v0.2.11" # without adding if statements everywhere, so just define both as ARGs for now. ARG ARCH=amd64 ARG ARCH_ALT=x86_64 - -################################## -########## Base Image ############ -################################## - -FROM ${BASE_IMAGE}:${BASE_IMAGE_TAG} AS base - -# Redeclare ARCH, ARCH_ALT, TORCH_BACKEND so they're available in this stage -ARG ARCH -ARG ARCH_ALT -ARG TORCH_BACKEND - -USER root +# Python configuration ARG PYTHON_VERSION=3.12 -RUN apt-get update -y && \ - apt-get install -y --no-install-recommends \ - # NIXL build dependencies - cmake \ - meson \ - ninja-build \ - pybind11-dev \ - # These headers are missing with the hpcx installer, required - # by UCX to find RDMA devices - libibverbs-dev rdma-core ibverbs-utils libibumad-dev \ - libnuma-dev librdmacm-dev ibverbs-providers \ - # Rust build dependencies - clang \ - libclang-dev \ - git \ - build-essential \ - protobuf-compiler \ - libssl-dev \ - pkg-config \ - # Install utilities - nvtop \ - tmux \ - vim \ - autoconf \ - automake \ - libtool \ - net-tools \ - # For Prometheus - curl tar ca-certificates && \ - rm -rf /var/lib/apt/lists/* - -ARG NIXL_UCX_REF=v1.19.0 -ARG NIXL_REF=0.4.1 +ARG DYNAMO_BASE_IMAGE="dynamo:latest-none" +FROM ${DYNAMO_BASE_IMAGE} AS dynamo_base -ENV NIXL_SRC_DIR=/opt/nixl -ENV NIXL_PREFIX=/opt/nvidia/nvda_nixl -ARG ARCH_ALT -ENV NIXL_LIB_DIR=$NIXL_PREFIX/lib/${ARCH_ALT}-linux-gnu -ENV NIXL_PLUGIN_DIR=$NIXL_LIB_DIR/plugins -ENV LD_LIBRARY_PATH=$NIXL_LIB_DIR:$NIXL_PLUGIN_DIR:$LD_LIBRARY_PATH - -WORKDIR /workspace - -### UCX EFA Setup ### -RUN rm -rf /opt/hpcx/ucx && \ - rm -rf /usr/local/ucx && \ - echo "Building UCX with reference $NIXL_UCX_REF" && \ - cd /usr/local/src && \ - git clone https://github.com/openucx/ucx.git && \ - cd ucx && \ - git checkout $NIXL_UCX_REF && \ - ./autogen.sh && ./configure \ - --prefix=/usr/local/ucx \ - --enable-shared \ - --disable-static \ - --disable-doxygen-doc \ - --enable-optimizations \ - --enable-cma \ - --enable-devel-headers \ - --with-cuda=/usr/local/cuda \ - --with-verbs \ - --with-efa \ - --with-dm \ - --with-gdrcopy=/usr/local \ - --enable-mt && \ - make -j && \ - make -j install-strip && \ - ldconfig - -ENV LD_LIBRARY_PATH=\ -/usr/lib:/usr/local/ucx/lib:\ -/usr/local/ucx/lib/ucx:\ -$LD_LIBRARY_PATH -ENV CPATH=/usr/include -ENV PATH=/usr/bin:$PATH -ENV PKG_CONFIG_PATH=/usr/lib/pkgconfig -SHELL ["/bin/bash", "-c"] - -WORKDIR /workspace - -### NIXL SETUP ### -# Clone nixl source -# TEMP: disable gds backend for arm64 -RUN git clone "https://github.com/ai-dynamo/nixl.git" ${NIXL_SRC_DIR} && \ - cd ${NIXL_SRC_DIR} && \ - git checkout ${NIXL_REF} && \ - if [ "$ARCH" = "arm64" ]; then \ - nixl_build_args="-Ddisable_gds_backend=true"; \ - else \ - nixl_build_args=""; \ - fi && \ - mkdir build && \ - meson setup build/ --buildtype=release --prefix=$NIXL_PREFIX $nixl_build_args && \ - cd build/ && \ - ninja && \ - ninja install; - -### NATS & ETCD SETUP ### -ENV ETCD_VERSION="v3.5.21" -RUN wget --tries=3 --waitretry=5 https://github.com/nats-io/nats-server/releases/download/v2.10.28/nats-server-v2.10.28-${ARCH}.deb && \ - dpkg -i nats-server-v2.10.28-${ARCH}.deb && rm nats-server-v2.10.28-${ARCH}.deb && \ - wget --tries=3 --waitretry=5 https://github.com/etcd-io/etcd/releases/download/$ETCD_VERSION/etcd-$ETCD_VERSION-linux-${ARCH}.tar.gz -O /tmp/etcd.tar.gz && \ - mkdir -p /usr/local/bin/etcd && \ - tar -xvf /tmp/etcd.tar.gz -C /usr/local/bin/etcd --strip-components=1 && \ - rm /tmp/etcd.tar.gz -ENV PATH=/usr/local/bin/etcd/:$PATH +######################################################## +########## Framework Development Image ################ +######################################################## +# +# PURPOSE: Framework development and vLLM compilation +# +# This stage builds and compiles framework dependencies including: +# - vLLM inference engine with CUDA support +# - DeepGEMM and FlashInfer optimizations +# - All necessary build tools and compilation dependencies +# - Framework-level Python packages and extensions +# +# Use this stage when you need to: +# - Build vLLM from source with custom modifications +# - Develop or debug framework-level components +# - Create custom builds with specific optimization flags +# +# Use dynamo base image (see /container/Dockerfile for more details) +FROM ${BASE_IMAGE}:${BASE_IMAGE_TAG} AS framework + +RUN apt-get update -y \ + && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \ + # vLLM build dependencies + cmake \ + ibverbs-providers \ + ibverbs-utils \ + libibumad-dev \ + libibverbs-dev \ + libnuma-dev \ + librdmacm-dev \ + rdma-core \ + && apt-get clean \ + && rm -rf /var/lib/apt/lists/* ### VIRTUAL ENVIRONMENT SETUP ### -# Install uv and create virtualenv -ENV VIRTUAL_ENV=/opt/dynamo/venv COPY --from=ghcr.io/astral-sh/uv:latest /uv /uvx /bin/ -RUN mkdir /opt/dynamo && \ - uv venv ${VIRTUAL_ENV} --python 3.12 +ARG PYTHON_VERSION +# Create virtual environment +RUN mkdir -p /opt/dynamo/venv && \ + uv venv /opt/dynamo/venv --python $PYTHON_VERSION # Activate virtual environment -ENV PATH="${VIRTUAL_ENV}/bin:${PATH}" - -# Install NIXL Python module -# TODO: Move gds_path selection based on arch into NIXL build -# TEMP: disable gds backend for arm64 -RUN if [ "$ARCH" = "arm64" ]; then \ - cd ${NIXL_SRC_DIR} && uv build . --out-dir /workspace/wheels/nixl \ - --config-settings=setup-args="-Ddisable_gds_backend=true"; \ - else \ - cd ${NIXL_SRC_DIR} && uv build . --out-dir /workspace/wheels/nixl; \ - fi && \ - # Install the wheel - # TODO: Move NIXL wheel install to the wheel_builder stage - uv pip install /workspace/wheels/nixl/*.whl +ENV VIRTUAL_ENV=/opt/dynamo/venv \ + PATH="/opt/dynamo/venv/bin:${PATH}" +ARG ARCH # Install vllm - keep this early in Dockerfile to avoid # rebuilds from unrelated source code changes ARG VLLM_REF ARG VLLM_GIT_URL ARG DEEPGEMM_REF ARG FLASHINF_REF +ARG TORCH_BACKEND ARG MAX_JOBS=16 ENV MAX_JOBS=$MAX_JOBS ENV CUDA_HOME=/usr/local/cuda +# Install sccache if requested +COPY container/use-sccache.sh /tmp/use-sccache.sh +# Install sccache if requested +ARG USE_SCCACHE +ARG ARCH_ALT +ARG SCCACHE_BUCKET +ARG SCCACHE_REGION + +ENV ARCH_ALT=${ARCH_ALT} +RUN if [ "$USE_SCCACHE" = "true" ]; then \ + /tmp/use-sccache.sh install; \ + fi + +# Set environment variables - they'll be empty strings if USE_SCCACHE=false +ENV SCCACHE_BUCKET=${USE_SCCACHE:+${SCCACHE_BUCKET}} \ + SCCACHE_REGION=${USE_SCCACHE:+${SCCACHE_REGION}} \ + SCCACHE_S3_KEY_PREFIX=${USE_SCCACHE:+${ARCH}} \ + CMAKE_C_COMPILER_LAUNCHER=${USE_SCCACHE:+sccache} \ + CMAKE_CXX_COMPILER_LAUNCHER=${USE_SCCACHE:+sccache} \ + CMAKE_CUDA_COMPILER_LAUNCHER=${USE_SCCACHE:+sccache} +# Install VLLM and related dependencies RUN --mount=type=bind,source=./container/deps/,target=/tmp/deps \ --mount=type=cache,target=/root/.cache/uv \ # TODO - split vllm, DeepEP, DeepGeMM, PPLX installs # Should be able to select how you want your build to go cp /tmp/deps/vllm/install_vllm.sh /tmp/install_vllm.sh && \ chmod +x /tmp/install_vllm.sh && \ - /tmp/install_vllm.sh --editable --vllm-ref $VLLM_REF --max-jobs $MAX_JOBS --arch $ARCH --installation-dir /opt --deepgemm-ref $DEEPGEMM_REF --flashinf-ref $FLASHINF_REF --torch-backend $TORCH_BACKEND; + /tmp/install_vllm.sh --editable --vllm-ref $VLLM_REF --max-jobs $MAX_JOBS --arch $ARCH --installation-dir /opt --deepgemm-ref $DEEPGEMM_REF --flashinf-ref $FLASHINF_REF --torch-backend $TORCH_BACKEND && \ + /tmp/use-sccache.sh show-stats "vLLM"; ENV LD_LIBRARY_PATH=\ /opt/vllm/tools/ep_kernels/ep_kernels_workspace/nvshmem_install/lib:\ $LD_LIBRARY_PATH -# Common dependencies -RUN --mount=type=bind,source=./container/deps/requirements.txt,target=/tmp/requirements.txt \ - uv pip install --requirement /tmp/requirements.txt -### MISC UTILITY SETUP ### +################################################## +########## Runtime Image ######################## +################################################## +# +# PURPOSE: Production runtime environment +# +# This stage creates a lightweight production-ready image containing: +# - Pre-compiled vLLM and framework dependencies +# - Dynamo runtime libraries and Python packages +# - Essential runtime dependencies and configurations +# - Optimized for inference workloads and deployment +# +# Use this stage when you need: +# - Production deployment of Dynamo with vLLM +# - Minimal runtime footprint without build tools +# - Ready-to-run inference server environment +# - Base for custom application containers +# + +FROM ${RUNTIME_IMAGE}:${RUNTIME_IMAGE_TAG} AS runtime -# Install test dependencies -RUN --mount=type=bind,source=./container/deps/requirements.test.txt,target=/tmp/requirements.txt \ - uv pip install --requirement /tmp/requirements.txt && \ - pyright --help > /dev/null 2>&1 && \ - printf "[safe]\n directory=/workspace\n" > /root/.gitconfig +WORKDIR /workspace +ENV DYNAMO_HOME=/opt/dynamo +ENV VIRTUAL_ENV=/opt/dynamo/venv +ENV PATH="${VIRTUAL_ENV}/bin:${PATH}" + +ARG ARCH_ALT +ENV NIXL_PREFIX=/opt/nvidia/nvda_nixl +ENV NIXL_LIB_DIR=$NIXL_PREFIX/lib/${ARCH_ALT}-linux-gnu +ENV NIXL_PLUGIN_DIR=$NIXL_LIB_DIR/plugins + +# Install Python, build-essential and python3-dev as apt dependencies +RUN apt-get update && \ + DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \ + # Python runtime - CRITICAL for virtual environment to work + python3-dev \ + build-essential \ + # jq and curl for polling various endpoints and health checks + jq \ + curl \ + # Libraries required by UCX to find RDMA devices + libibverbs1 rdma-core ibverbs-utils libibumad3 \ + libnuma1 librdmacm1 ibverbs-providers \ + # JIT Kernel Compilation, flashinfer + ninja-build \ + g++ \ + # prometheus dependencies + ca-certificates && \ + rm -rf /var/lib/apt/lists/* + +# Copy CUDA development tools (nvcc, headers, dependencies, etc.) from base devel image +COPY --from=framework /usr/local/cuda/bin/nvcc /usr/local/cuda/bin/nvcc +COPY --from=framework /usr/local/cuda/bin/cudafe++ /usr/local/cuda/bin/cudafe++ +COPY --from=framework /usr/local/cuda/bin/ptxas /usr/local/cuda/bin/ptxas +COPY --from=framework /usr/local/cuda/bin/fatbinary /usr/local/cuda/bin/fatbinary +COPY --from=framework /usr/local/cuda/include/ /usr/local/cuda/include/ +COPY --from=framework /usr/local/cuda/nvvm /usr/local/cuda/nvvm +COPY --from=framework /usr/local/cuda/lib64/libcudart.so* /usr/local/cuda/lib64/ + +### COPY NATS & ETCD ### +# Copy nats and etcd from dev image +COPY --from=dynamo_base /usr/bin/nats-server /usr/bin/nats-server +COPY --from=dynamo_base /usr/local/bin/etcd/ /usr/local/bin/etcd/ +# Add ETCD and CUDA binaries to PATH so cicc and other CUDA tools are accessible +ENV PATH=/usr/local/bin/etcd/:/usr/local/cuda/nvvm/bin:$PATH # Install prometheus ARG PROM_VERSION=3.4.1 @@ -227,41 +217,122 @@ RUN ARCH=$(dpkg --print-architecture) && \ arm64) PLATFORM=linux-arm64 ;; \ *) echo "Unsupported architecture: $ARCH" && exit 1 ;; \ esac && \ - curl -fsSL https://github.com/prometheus/prometheus/releases/download/v${PROM_VERSION}/prometheus-${PROM_VERSION}.${PLATFORM}.tar.gz \ + curl -fsSL "https://github.com/prometheus/prometheus/releases/download/v${PROM_VERSION}/prometheus-${PROM_VERSION}.${PLATFORM}.tar.gz" \ | tar -xz -C /tmp && \ - mv /tmp/prometheus-${PROM_VERSION}.${PLATFORM}/prometheus /usr/local/bin/ && \ + mv "/tmp/prometheus-${PROM_VERSION}.${PLATFORM}/prometheus" /usr/local/bin/ && \ chmod +x /usr/local/bin/prometheus && \ - rm -rf /tmp/prometheus-${PROM_VERSION}.${PLATFORM} + rm -rf "/tmp/prometheus-${PROM_VERSION}.${PLATFORM}" -### BUILDS ### +# Copy UCX from dev image as plugin for NIXL +# Copy NIXL source from devr image +# Copy dynamo wheels for gitlab artifacts +COPY --from=dynamo_base /usr/local/ucx /usr/local/ucx +COPY --from=dynamo_base $NIXL_PREFIX $NIXL_PREFIX -ENV RUSTUP_HOME=/usr/local/rustup \ - CARGO_HOME=/usr/local/cargo \ - PATH=/usr/local/cargo/bin:$PATH \ - RUST_VERSION=1.89.0 - -# Define Rust target based on ARCH_ALT ARG -ARG RUSTARCH=${ARCH_ALT}-unknown-linux-gnu - -# Install Rust using RUSTARCH derived from ARCH_ALT -RUN wget --tries=3 --waitretry=5 "https://static.rust-lang.org/rustup/archive/1.28.1/${RUSTARCH}/rustup-init" && \ - # TODO: Add SHA check back based on RUSTARCH - chmod +x rustup-init && \ - ./rustup-init -y --no-modify-path --profile default --default-toolchain $RUST_VERSION --default-host ${RUSTARCH} && \ - rm rustup-init && \ - chmod -R a+w $RUSTUP_HOME $CARGO_HOME - -ARG CARGO_BUILD_JOBS -# Set CARGO_BUILD_JOBS to 16 if not provided -# This is to prevent cargo from building $(nproc) jobs in parallel, -# which might exceed the number of opened files limit. -ENV CARGO_BUILD_JOBS=${CARGO_BUILD_JOBS:-16} +# Copies vllm, DeepEP, DeepGEMM, PPLX repos (all editable installs) and nvshmem binaries +COPY --from=framework /opt/vllm /opt/vllm + +ENV LD_LIBRARY_PATH=\ +/opt/vllm/tools/ep_kernels/ep_kernels_workspace/nvshmem_install/lib:\ +$NIXL_LIB_DIR:\ +$NIXL_PLUGIN_DIR:\ +/usr/local/ucx/lib:\ +/usr/local/ucx/lib/ucx:\ +$LD_LIBRARY_PATH + +### VIRTUAL ENVIRONMENT SETUP ### + +COPY --from=ghcr.io/astral-sh/uv:latest /uv /uvx /bin/ +ARG PYTHON_VERSION +RUN uv venv ${VIRTUAL_ENV} --python $PYTHON_VERSION + +# Copy virtual environment from framework image to avoid re-installing framework + vllm dependencies +COPY --from=framework \ + /opt/dynamo/venv/lib/python${PYTHON_VERSION}/site-packages \ + /opt/dynamo/venv/lib/python${PYTHON_VERSION}/site-packages + +# Install dynamo, NIXL, and dynamo-specific dependencies +COPY benchmarks/ /opt/dynamo/benchmarks/ +COPY --from=dynamo_base /opt/dynamo/wheelhouse/ /opt/dynamo/wheelhouse/ +RUN uv pip install \ + /opt/dynamo/wheelhouse/ai_dynamo_runtime*cp312*.whl \ + /opt/dynamo/wheelhouse/ai_dynamo*any.whl \ + /opt/dynamo/wheelhouse/nixl/nixl*.whl \ + /opt/dynamo/benchmarks && \ + rm -rf /opt/dynamo/benchmarks + +# Install common and test dependencies +RUN --mount=type=bind,source=./container/deps/requirements.txt,target=/tmp/requirements.txt \ + --mount=type=bind,source=./container/deps/requirements.test.txt,target=/tmp/requirements.test.txt \ + uv pip install --requirement /tmp/requirements.txt --requirement /tmp/requirements.test.txt + +# Copy benchmarks, examples, and tests for CI +COPY . /workspace/ + +# Copy attribution files +COPY ATTRIBUTION* LICENSE /workspace/ +# Copy launch banner +RUN --mount=type=bind,source=./container/launch_message.txt,target=/workspace/launch_message.txt \ + sed '/^#\s/d' /workspace/launch_message.txt > ~/.launch_screen && \ + echo "cat ~/.launch_screen" >> ~/.bashrc && \ + echo "source $VIRTUAL_ENV/bin/activate" >> ~/.bashrc + +ENTRYPOINT ["/opt/nvidia/nvidia_entrypoint.sh"] +CMD [] ####################################### -########## Local Development ########## +########## Local Development ####### ####################################### +# +# PURPOSE: Local development +# +# This stage adds development tools, utilities, and dependencies specifically +# needed for: +# - Local development and debugging +# - vscode/cursor development +# +# Use this stage when you need a full development environment with additional +# tooling beyond the base runtime image. -FROM base AS local-dev +FROM runtime AS local-dev + +# Install utilities +RUN apt-get update -y && \ + apt-get install -y --no-install-recommends \ + # Install utilities + nvtop \ + wget \ + tmux \ + vim \ + autoconf \ + automake \ + libtool \ + net-tools \ + git \ + # Build Dependencies + autoconf \ + automake \ + cmake \ + git \ + libtool \ + meson \ + net-tools \ + ninja-build \ + pybind11-dev \ + # Rust build dependencies + clang \ + libclang-dev \ + protobuf-compiler && \ + rm -rf /var/lib/apt/lists/* + +# Rust environment setup +ENV RUSTUP_HOME=/usr/local/rustup \ + CARGO_HOME=/usr/local/cargo \ + CARGO_TARGET_DIR=/opt/dynamo/target \ + PATH=/usr/local/cargo/bin:$PATH + +COPY --from=dynamo_base $RUSTUP_HOME $RUSTUP_HOME +COPY --from=dynamo_base $CARGO_HOME $CARGO_HOME # https://code.visualstudio.com/remote/advancedcontainers/add-nonroot-user # Will use the default ubuntu user, but give sudo access @@ -282,9 +353,9 @@ RUN apt-get update && apt-get install -y sudo gnupg2 gnupg1 \ # This is a slow operation (~40s on my cpu) # Much better than chown -R $USERNAME:$USERNAME /opt/dynamo/venv (~10min on my cpu) -COPY --from=base --chown=$USER_UID:$USER_GID ${VIRTUAL_ENV} ${VIRTUAL_ENV} +COPY --from=runtime --chown=$USER_UID:$USER_GID ${VIRTUAL_ENV} ${VIRTUAL_ENV} RUN chown $USERNAME:$USERNAME ${VIRTUAL_ENV} -COPY --from=base --chown=$USERNAME:$USERNAME /usr/local/bin /usr/local/bin +COPY --from=runtime --chown=$USERNAME:$USERNAME /usr/local/bin /usr/local/bin # so we can use maturin develop RUN uv pip install maturin[patchelf] @@ -303,208 +374,5 @@ RUN SNIPPET="export PROMPT_COMMAND='history -a' && export HISTFILE=$HOME/.comman RUN mkdir -p /home/$USERNAME/.cache/ -ENTRYPOINT ["/opt/nvidia/nvidia_entrypoint.sh"] - -################################## -##### Wheel Build Image ########## -################################## - -# Redeclare ARCH_ALT ARG so it's available for interpolation in the FROM instruction -ARG ARCH_ALT - -FROM quay.io/pypa/manylinux_2_28_${ARCH_ALT} AS wheel_builder - -ARG CARGO_BUILD_JOBS -# Set CARGO_BUILD_JOBS to 16 if not provided -# This is to prevent cargo from building $(nproc) jobs in parallel, -# which might exceed the number of opened files limit. -ENV CARGO_BUILD_JOBS=${CARGO_BUILD_JOBS:-16} -# Use build arg RELEASE_BUILD = true to generate wheels for Python 3.10, 3.11 and 3.12. -ARG RELEASE_BUILD -# Use arg ENABLE_KVBM = true to turn on the block-manager feature -ARG ENABLE_KVBM - -# Keep in sync with the base image. -ENV NIXL_PREFIX=/opt/nvidia/nvda_nixl - -WORKDIR /workspace - -RUN yum update -y \ - && yum install -y llvm-toolset \ - && yum install -y python3.12-devel \ - && yum install -y protobuf-compiler \ - && yum clean all \ - && rm -rf /var/cache/yum - -ENV RUSTUP_HOME=/usr/local/rustup \ - CARGO_HOME=/usr/local/cargo \ - CARGO_TARGET_DIR=/workspace/target \ - VIRTUAL_ENV=/opt/dynamo/venv - -COPY --from=base $RUSTUP_HOME $RUSTUP_HOME -COPY --from=base $CARGO_HOME $CARGO_HOME -COPY --from=base $NIXL_PREFIX $NIXL_PREFIX -COPY --from=base /workspace /workspace -COPY --from=base $VIRTUAL_ENV $VIRTUAL_ENV -ENV PATH=$CARGO_HOME/bin:$VIRTUAL_ENV/bin:$PATH - -# Copy configuration files -COPY pyproject.toml /workspace/ -COPY README.md /workspace/ -COPY LICENSE /workspace/ -COPY Cargo.toml /workspace/ -COPY Cargo.lock /workspace/ -COPY rust-toolchain.toml /workspace/ -COPY hatch_build.py /workspace/ - -# Copy source code -COPY lib/ /workspace/lib/ -COPY components /workspace/components -COPY launch /workspace/launch - -RUN cargo build \ - --release \ - --locked \ - --features dynamo-llm/block-manager \ - --workspace - -# Build dynamo wheel -RUN uv build --wheel --out-dir /workspace/dist && \ - cd /workspace/lib/bindings/python && \ - uv pip install maturin[patchelf] && \ - if [ "$ENABLE_KVBM" = "true" ]; then \ - maturin build --release --features block-manager --out /workspace/dist; \ - else \ - maturin build --release --out /workspace/dist; \ - fi && \ - if [ "$RELEASE_BUILD" = "true" ]; then \ - # do not enable KVBM feature, ensure compatibility with lower glibc - uv run --python 3.11 maturin build --release --out /workspace/dist && \ - uv run --python 3.10 maturin build --release --out /workspace/dist; \ - fi - -####################################### -########## CI Minimum Image ########### -####################################### -FROM base AS ci_minimum - -ENV DYNAMO_HOME=/workspace -ENV CARGO_TARGET_DIR=/workspace/target - -WORKDIR /workspace - -COPY --from=wheel_builder /workspace /workspace -COPY --from=wheel_builder $NIXL_PREFIX $NIXL_PREFIX - -# Copy Cargo cache to avoid re-downloading dependencies -COPY --from=wheel_builder $CARGO_HOME $CARGO_HOME - -# Copy rest of the code -COPY . /workspace - -# Package the bindings -RUN mkdir -p /opt/dynamo/bindings/wheels && \ - mkdir /opt/dynamo/bindings/lib && \ - cp dist/ai_dynamo*cp312*.whl /opt/dynamo/bindings/wheels/. && \ - cp target/release/metrics /usr/local/bin - -RUN uv pip install /workspace/dist/ai_dynamo_runtime*cp312*.whl && \ - uv pip install /workspace/dist/ai_dynamo*any.whl - -RUN uv pip install /workspace/benchmarks - -# Copy launch banner -RUN --mount=type=bind,source=./container/launch_message.txt,target=/workspace/launch_message.txt \ - sed '/^#\s/d' /workspace/launch_message.txt > ~/.launch_screen && \ - echo "cat ~/.launch_screen" >> ~/.bashrc - -######################################## -########## Development Image ########### -######################################## -FROM ci_minimum AS dev - -ENTRYPOINT ["/opt/nvidia/nvidia_entrypoint.sh"] - -CMD [] - -#################################### -########## Runtime Image ########### -#################################### - -FROM ${RUNTIME_IMAGE}:${RUNTIME_IMAGE_TAG} AS runtime - -WORKDIR /workspace -ENV DYNAMO_HOME=/workspace -ENV VIRTUAL_ENV=/opt/dynamo/venv -ENV PATH="${VIRTUAL_ENV}/bin:${PATH}" - -ARG ARCH_ALT -ENV NIXL_PREFIX=/opt/nvidia/nvda_nixl -ENV NIXL_LIB_DIR=$NIXL_PREFIX/lib/${ARCH_ALT}-linux-gnu -ENV NIXL_PLUGIN_DIR=$NIXL_LIB_DIR/plugins - -# Install build-essential and python3-dev as apt dependencies -RUN apt-get update && \ - DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \ - build-essential \ - python3-dev \ - # jq and curl for polling various endpoints and health checks - jq \ - curl \ - # For debugging - vim \ - # Libraries required by UCX to find RDMA devices - libibverbs1 rdma-core ibverbs-utils libibumad3 \ - libnuma1 librdmacm1 ibverbs-providers \ - # JIT Kernel Compilation, flashinfer - ninja-build \ - g++ \ - cuda-toolkit-12-8 && \ - rm -rf /var/lib/apt/lists/* - -### COPY NATS & ETCD & PROMETHEUS ### -# Copy nats and etcd from base image -COPY --from=base /usr/bin/nats-server /usr/bin/nats-server -COPY --from=base /usr/local/bin/etcd/ /usr/local/bin/etcd/ -ENV PATH=/usr/local/bin/etcd/:$PATH - -# Copy prometheus from base image -COPY --from=base /usr/local/bin/prometheus /usr/local/bin/prometheus - -# Copy UCX from base image as plugin for NIXL -# Copy NIXL source from wheel_builder image -# Copy dynamo wheels for gitlab artifacts -COPY --from=base /usr/local/ucx /usr/local/ucx -COPY --from=wheel_builder $NIXL_PREFIX $NIXL_PREFIX -COPY --from=wheel_builder /workspace/dist/*.whl wheelhouse/ - -# Copies vllm, DeepEP, DeepGEMM, PPLX repos (all editable installs) and nvshmem binaries -COPY --from=base /opt/vllm /opt/vllm - -ENV LD_LIBRARY_PATH=\ -/opt/vllm/tools/ep_kernels/ep_kernels_workspace/nvshmem_install/lib:\ -$NIXL_LIB_DIR:\ -$NIXL_PLUGIN_DIR:\ -/usr/local/ucx/lib:\ -/usr/local/ucx/lib/ucx:\ -$LD_LIBRARY_PATH - -# Copy entire venv -# Theres a lot of stuff we'd have to re-compile (for arm64) -# TODO: use pip ai-dynamo[vllm] in venv to replicate end user environment -# Copy metrics binary from wheel_builder image, not part of ai-dynamo wheel -COPY --from=ci_minimum /workspace/target/release/metrics /usr/local/bin/metrics -COPY --from=ci_minimum ${VIRTUAL_ENV} ${VIRTUAL_ENV} - -# Keep everything from ci_minimum for mypy and other pre-merge tests -# TODO: Remove this once we have a functional CI image built on top of the runtime image -COPY --from=ci_minimum /workspace/ /workspace/ - -# Copy launch banner -RUN --mount=type=bind,source=./container/launch_message.txt,target=/workspace/launch_message.txt \ - sed '/^#\s/d' /workspace/launch_message.txt > ~/.launch_screen && \ - echo "cat ~/.launch_screen" >> ~/.bashrc && \ - echo "source $VIRTUAL_ENV/bin/activate" >> ~/.bashrc - ENTRYPOINT ["/opt/nvidia/nvidia_entrypoint.sh"] CMD [] diff --git a/container/build.sh b/container/build.sh index 71646c6e40..c72ad87269 100755 --- a/container/build.sh +++ b/container/build.sh @@ -121,6 +121,11 @@ NIXL_UCX_EFA_REF=9d2b88a1f67faf9876f267658bd077b379b8bb76 NO_CACHE="" +# sccache configuration for S3 +USE_SCCACHE="" +SCCACHE_BUCKET="" +SCCACHE_REGION="" + get_options() { while :; do case $1 in @@ -282,9 +287,25 @@ get_options() { --make-efa) NIXL_UCX_REF=$NIXL_UCX_EFA_REF ;; - --) - shift - break + --use-sccache) + USE_SCCACHE=true + ;; + --sccache-bucket) + if [ "$2" ]; then + SCCACHE_BUCKET=$2 + shift + else + missing_requirement "$1" + fi + ;; + + --sccache-region) + if [ "$2" ]; then + SCCACHE_REGION=$2 + shift + else + missing_requirement "$1" + fi ;; -?*) error 'ERROR: Unknown option: ' "$1" @@ -345,6 +366,16 @@ get_options() { else TARGET_STR="--target dev" fi + + # Validate sccache configuration + if [ "$USE_SCCACHE" = true ]; then + if [ -z "$SCCACHE_BUCKET" ]; then + error "ERROR: --sccache-bucket is required when --use-sccache is specified" + fi + if [ -z "$SCCACHE_REGION" ]; then + error "ERROR: --sccache-region is required when --use-sccache is specified" + fi + fi } @@ -360,6 +391,15 @@ show_image_options() { echo " Build Context: '${BUILD_CONTEXT}'" echo " Build Arguments: '${BUILD_ARGS}'" echo " Framework: '${FRAMEWORK}'" + if [ "$USE_SCCACHE" = true ]; then + echo " sccache: Enabled" + echo " sccache Bucket: '${SCCACHE_BUCKET}'" + echo " sccache Region: '${SCCACHE_REGION}'" + + if [ -n "$SCCACHE_S3_KEY_PREFIX" ]; then + echo " sccache S3 Key Prefix: '${SCCACHE_S3_KEY_PREFIX}'" + fi + fi echo "" } @@ -386,6 +426,9 @@ show_help() { echo " [--make-efa Enables EFA support for NIXL]" echo " [--enable-kvbm Enables KVBM support in Python 3.12]" echo " [--trtllm-use-nixl-kvcache-experimental Enables NIXL KVCACHE experimental support for TensorRT-LLM]" + echo " [--use-sccache enable sccache for Rust/C/C++ compilation caching]" + echo " [--sccache-bucket S3 bucket name for sccache (required with --use-sccache)]" + echo " [--sccache-region S3 region for sccache (required with --use-sccache)]" exit 0 } @@ -400,6 +443,7 @@ error() { get_options "$@" + # Automatically set ARCH and ARCH_ALT if PLATFORM is linux/arm64 ARCH="amd64" if [[ "$PLATFORM" == *"linux/arm64"* ]]; then @@ -547,6 +591,15 @@ if [ -n "${NIXL_UCX_REF}" ]; then BUILD_ARGS+=" --build-arg NIXL_UCX_REF=${NIXL_UCX_REF} " fi +# Add sccache build arguments +if [ "$USE_SCCACHE" = true ]; then + BUILD_ARGS+=" --build-arg USE_SCCACHE=true" + BUILD_ARGS+=" --build-arg SCCACHE_BUCKET=${SCCACHE_BUCKET}" + BUILD_ARGS+=" --build-arg SCCACHE_REGION=${SCCACHE_REGION}" + + +fi + LATEST_TAG="--tag dynamo:latest-${FRAMEWORK,,}" if [ -n "${TARGET}" ]; then LATEST_TAG="${LATEST_TAG}-${TARGET}" @@ -558,6 +611,24 @@ if [ -z "$RUN_PREFIX" ]; then set -x fi -$RUN_PREFIX docker build -f $DOCKERFILE $TARGET_STR $PLATFORM $BUILD_ARGS $CACHE_FROM $CACHE_TO $TAG $LATEST_TAG $BUILD_CONTEXT_ARG $BUILD_CONTEXT $NO_CACHE +# TODO: Follow 2-step build process for all frameworks once necessary changes are made to the sglang and TRT-LLM backend Dockerfiles. +if [[ $FRAMEWORK == "VLLM" ]]; then + # Define base image tag before using it + DYNAMO_BASE_IMAGE="dynamo-base:${VERSION}" + # Start base image build + echo "======================================" + echo "Starting Build 1: Base Image" + echo "======================================" + $RUN_PREFIX docker build -f "${SOURCE_DIR}/Dockerfile" --target dev $PLATFORM $BUILD_ARGS $CACHE_FROM $CACHE_TO --tag $DYNAMO_BASE_IMAGE $BUILD_CONTEXT_ARG $BUILD_CONTEXT $NO_CACHE + # Start framework build + echo "======================================" + echo "Starting Build 2: Framework Image" + echo "======================================" + BUILD_ARGS+=" --build-arg DYNAMO_BASE_IMAGE=${DYNAMO_BASE_IMAGE}" + $RUN_PREFIX docker build -f $DOCKERFILE $TARGET_STR $PLATFORM $BUILD_ARGS $CACHE_FROM $CACHE_TO $TAG $LATEST_TAG $BUILD_CONTEXT_ARG $BUILD_CONTEXT $NO_CACHE +else + $RUN_PREFIX docker build -f $DOCKERFILE $TARGET_STR $PLATFORM $BUILD_ARGS $CACHE_FROM $CACHE_TO $TAG $LATEST_TAG $BUILD_CONTEXT_ARG $BUILD_CONTEXT $NO_CACHE +fi + { set +x; } 2>/dev/null diff --git a/container/use-sccache.sh b/container/use-sccache.sh new file mode 100755 index 0000000000..0fd4098818 --- /dev/null +++ b/container/use-sccache.sh @@ -0,0 +1,86 @@ +#!/bin/bash +# SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +set -euo pipefail + +# sccache management script +# This script handles sccache installation, environment setup, and statistics display + +SCCACHE_VERSION="v0.8.2" + + +usage() { + cat << EOF +Usage: $0 [COMMAND] [OPTIONS] + +Commands: + install Install sccache binary (requires ARCH_ALT environment variable) + show-stats Display sccache statistics with optional build name + help Show this help message + +Environment variables: + USE_SCCACHE Set to 'true' to enable sccache + SCCACHE_BUCKET S3 bucket name (fallback if not passed as parameter) + SCCACHE_REGION S3 region (fallback if not passed as parameter) + ARCH Architecture for S3 key prefix (fallback if not passed as parameter) + ARCH_ALT Alternative architecture name for downloads (e.g., x86_64, aarch64) + +Examples: + # Install sccache (requires ARCH_ALT to be set) + ARCH_ALT=x86_64 $0 install + # Show stats with build name + $0 show-stats "UCX" +EOF +} + +install_sccache() { + if [ -z "${ARCH_ALT:-}" ]; then + echo "Error: ARCH_ALT environment variable is required for sccache installation" + exit 1 + fi + echo "Installing sccache ${SCCACHE_VERSION} for architecture ${ARCH_ALT}..." + # Download and install sccache + wget --tries=3 --waitretry=5 \ + "https://github.com/mozilla/sccache/releases/download/${SCCACHE_VERSION}/sccache-${SCCACHE_VERSION}-${ARCH_ALT}-unknown-linux-musl.tar.gz" + tar -xzf "sccache-${SCCACHE_VERSION}-${ARCH_ALT}-unknown-linux-musl.tar.gz" + mv "sccache-${SCCACHE_VERSION}-${ARCH_ALT}-unknown-linux-musl/sccache" /usr/local/bin/ + # Cleanup + rm -rf sccache* + echo "sccache installed successfully" +} + +show_stats() { + if command -v sccache >/dev/null 2>&1; then + echo "=== sccache statistics AFTER $1 ===" + sccache --show-stats + else + echo "sccache is not available" + fi +} + +main() { + case "${1:-help}" in + install) + install_sccache + ;; + generate-env) + shift # Remove the command from arguments + generate_env_file "$@" # Pass all remaining arguments + ;; + show-stats) + shift # Remove the command from arguments + show_stats "$@" # Pass all remaining arguments + ;; + help|--help|-h) + usage + ;; + *) + echo "Unknown command: $1" + usage + exit 1 + ;; + esac +} + +main "$@" diff --git a/tests/serve/test_vllm.py b/tests/serve/test_vllm.py index f58d67ec52..41affb405e 100644 --- a/tests/serve/test_vllm.py +++ b/tests/serve/test_vllm.py @@ -263,7 +263,6 @@ def vllm_config_test(request): @pytest.mark.e2e -@pytest.mark.slow def test_serve_deployment(vllm_config_test, request, runtime_services): """ Test dynamo serve deployments with different graph configurations.