Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 39 additions & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
default_stages: [pre-commit, pre-push, manual]

repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v5.0.0
hooks:
- id: check-symlinks
- id: destroyed-symlinks
- id: trailing-whitespace
- id: end-of-file-fixer
- id: check-yaml
args: [--allow-multiple-documents]
# - id: check-toml
- id: check-ast
- id: check-added-large-files
- id: check-merge-conflict
- id: check-shebang-scripts-are-executable
- id: detect-private-key
- id: debug-statements
- id: no-commit-to-branch
- repo: https://github.com/PyCQA/isort
rev: 5.13.2
hooks:
- id: isort
args:
- "--profile"
- "black"
- "filter-files"
- repo: https://github.com/psf/black
rev: 24.10.0
hooks:
- id: black-jupyter
- repo: https://github.com/kynan/nbstripout
rev: 0.8.1
hooks:
- id: nbstripout
args:
- '--keep-output'
- '--extra-keys=metadata.kernelspec metadata.language_info.version'
23 changes: 22 additions & 1 deletion build_and_install.sh
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,34 @@ ROCM_IDX_URL=${4:-https://rocm.prereleases.amd.com/whl/gfx94X-dcgpu}
# The default for THEROCK_BASE_IMAGE is current, but may change. Make sure to track TheRock's dockerfile.
THEROCK_BASE_IMAGE=${5:-quay.io/pypa/manylinux_2_28_x86_64@sha256:d632b5e68ab39e59e128dcf0e59e438b26f122d7f2d45f3eea69ffd2877ab017}

echo "TARGET : $TARGET"

if [[ $TARGET != cuda* && $TARGET != rocm* && $TARGET != "therock" ]]; then
echo "Usage: $0 [cuda|rocm|therock] [all|rdma|p2p|efa|ep] [py_version] [rocm_index_url] [therock_base_image]" >&2
exit 1
fi

ARCH_SUFFIX=$(uname -m)
./build.sh $TARGET $BUILD_TYPE $PY_VER $ROCM_IDX_URL $THEROCK_BASE_IMAGE

echo "ARCH_SUFFIX : $ARCH_SUFFIX"

is_docker_container() {
[ -f /.dockerenv ] || grep -q docker /proc/1/cgroup 2>/dev/null
}

# Check if docker command is available and working
has_docker_command() {
command -v docker &> /dev/null && docker info &> /dev/null
}

if is_docker_container || !has_docker_command; then
echo "Running inside the docker : ./build_insider_docker.sh $TARGET $BUILD_TYPE $PY_VER $ROCM_IDX_URL $THEROCK_BASE_IMAGE"
./build_insider_docker.sh $TARGET $BUILD_TYPE $PY_VER $ROCM_IDX_URL $THEROCK_BASE_IMAGE
else
echo "Running with the docker"
./build.sh $TARGET $BUILD_TYPE $PY_VER $ROCM_IDX_URL $THEROCK_BASE_IMAGE
fi

pip install -r requirements.txt
pip uninstall uccl -y || true
if [[ $TARGET != "therock" ]]; then
Expand Down
243 changes: 243 additions & 0 deletions build_insider_docker.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,243 @@
#!/bin/bash
set -e

# -----------------------
# Build uccl wheels for CUDA (NVIDIA) and ROCm (AMD) backends/targets.
# The host machine does *not* need CUDA or ROCm – everything lives inside
# a purpose-built Docker image derived from Ubuntu 22.04.
#
# Usage:
# ./build.sh [cuda|rocm|therock] [all|rdma|p2p|efa|ep] [py_version] [rocm_index_url] [therock_base_image]
#
# The wheels are written to wheelhouse-[cuda|rocm|therock]
# -----------------------
ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"

TARGET=${1:-cuda}
BUILD_TYPE=${2:-all}
PY_VER=${3:-$(python3 -c "import sys; print(f'{sys.version_info.major}.{sys.version_info.minor}')")}
ARCH="$(uname -m)"
# The default for ROCM_IDX_URL depends on the gfx architecture of your GPU and the index URLs may change.
ROCM_IDX_URL=${4:-https://rocm.prereleases.amd.com/whl/gfx94X-dcgpu}
# The default for THEROCK_BASE_IMAGE is current, but may change. Make sure to track TheRock's dockerfile.
THEROCK_BASE_IMAGE=${5:-quay.io/pypa/manylinux_2_28_x86_64@sha256:d632b5e68ab39e59e128dcf0e59e438b26f122d7f2d45f3eea69ffd2877ab017}
IS_EFA=$( [ -d "/sys/class/infiniband/" ] && ls /sys/class/infiniband/ 2>/dev/null | grep -q rdmap && echo "EFA support: true" ) || echo "EFA support: false"


if [[ $TARGET != cuda* && $TARGET != rocm* && $TARGET != "therock" ]]; then
echo "Usage: $0 [cuda|rocm|therock] [all|rdma|p2p|efa|ep|eccl] [py_version] [rocm_index_url]" >&2
exit 1
fi

if [[ $ARCH == "aarch64" && ( $TARGET == rocm* || $TARGET == "therock" ) ]]; then
echo "Skipping ROCm build on Arm64 (no ROCm toolchain)."
exit 1
fi

rm -r uccl.egg-info >/dev/null 2>&1 || true
rm -r dist >/dev/null 2>&1 || true
rm -r build >/dev/null 2>&1 || true
WHEEL_DIR="wheelhouse-${TARGET}"
rm -r "${WHEEL_DIR}" >/dev/null 2>&1 || true
mkdir -p "${WHEEL_DIR}"

build_rccl_nccl_h() {
# Unlike CUDA, ROCM does not include nccl.h. So we need to build rccl to get nccl.h.
if [[ ! -f "thirdparty/rccl/build/release/include/nccl.h" ]]; then
cd thirdparty/rccl
# Just to get nccl.h, not the whole library
CXX=/opt/rocm/bin/hipcc cmake -B build/release -S . -DCMAKE_EXPORT_COMPILE_COMMANDS=OFF >/dev/null 2>&1 || true
cd ../..
fi
}

build_rdma() {
local TARGET="$1"
local ARCH="$2"
local IS_EFA="$3"

set -euo pipefail
echo "[container] build_rdma Target: $TARGET"

if [[ "$TARGET" == cuda* ]]; then
cd collective/rdma && make clean && make -j$(nproc) && cd ../../
TARGET_SO=collective/rdma/libnccl-net-uccl.so
elif [[ "$TARGET" == rocm* ]]; then
if [[ "$ARCH" == "aarch64" ]]; then
echo "Skipping ROCm build on Arm64 (no ROCm toolchain)."
return
fi
cd collective/rdma && make clean -f Makefile.rocm && make -j$(nproc) -f Makefile.rocm && cd ../../
TARGET_SO=collective/rdma/librccl-net-uccl.so
elif [[ "$TARGET" == "therock" ]]; then
if [[ "$ARCH" == "aarch64" ]]; then
echo "Skipping ROCm build on Arm64 (no ROCm toolchain)."
return
fi
# Unlike CUDA, ROCM does not include nccl.h. So we need to build rccl to get nccl.h.
if [[ ! -f "thirdparty/rccl/build/release/include/nccl.h" ]]; then
cd thirdparty/rccl
# Just to get nccl.h, not the whole library
CXX=hipcc cmake -B build/release -S . -DCMAKE_EXPORT_COMPILE_COMMANDS=OFF -DCMAKE_PREFIX_PATH=$(rocm-sdk path --cmake) -DROCM_PATH=$(rocm-sdk path --root) -DHIP_PLATFORM=amd >/dev/null 2>&1 || true
cd ../..
fi
cd collective/rdma && make clean -f Makefile.therock && make -j$(nproc) -f Makefile.therock HIP_HOME=$(rocm-sdk path --root) CONDA_LIB_HOME=$VIRTUAL_ENV/lib && cd ../../
TARGET_SO=collective/rdma/librccl-net-uccl.so
fi

echo "[container] Copying RDMA .so to uccl/lib/"
mkdir -p uccl/lib
cp ${TARGET_SO} uccl/lib/
}

build_efa() {
local TARGET="$1"
local ARCH="$2"
local IS_EFA="$3"

set -euo pipefail
echo "[container] build_efa Target: $TARGET"

if [[ "$ARCH" == "aarch64" || "$TARGET" == rocm* || "$TARGET" == "therock" ]]; then
echo "Skipping EFA build on Arm64 (no EFA installer) or ROCm (no CUDA)."
return
fi
cd collective/efa && make clean && make -j$(nproc) && cd ../../

# EFA requires a custom NCCL.
cd thirdparty/nccl-sg
make src.build -j$(nproc) NVCC_GENCODE="-gencode=arch=compute_80,code=sm_80"
cd ../..

echo "[container] Copying EFA .so to uccl/lib/"
mkdir -p uccl/lib
cp collective/efa/libnccl-net-efa.so uccl/lib/
cp thirdparty/nccl-sg/build/lib/libnccl.so uccl/lib/libnccl-efa.so
}

build_p2p() {
local TARGET="$1"
local ARCH="$2"
local IS_EFA="$3"

set -euo pipefail
echo "[container] build_p2p Target: $TARGET"

cd p2p
if [[ "$TARGET" == cuda* ]]; then
make clean && make -j$(nproc)
elif [[ "$TARGET" == rocm* ]]; then
make clean -f Makefile.rocm && make -j$(nproc) -f Makefile.rocm
elif [[ "$TARGET" == "therock" ]]; then
make clean -f Makefile.therock && make -j$(nproc) -f Makefile.therock HIP_HOME=$(rocm-sdk path --root) CONDA_LIB_HOME=$VIRTUAL_ENV/lib
fi
cd ..

echo "[container] Copying P2P .so, collective.py and utils.py to uccl/"
mkdir -p uccl
mkdir -p uccl/lib
if [[ -z "${USE_TCPX:-}" || "$USE_TCPX" != "1" ]]; then
cp p2p/p2p.*.so uccl/
cp p2p/collective.py uccl/
cp p2p/transfer.py uccl/
cp p2p/utils.py uccl/
else
echo "[container] USE_TCPX=1, skipping copying p2p runtime files"
fi
}

build_ep() {
local TARGET="$1"
local ARCH="$2"
local IS_EFA="$3"

set -euo pipefail
echo "[container] build_ep Target: $TARGET"

if [[ "$TARGET" == "therock" ]]; then
echo "Skipping GPU-driven build on therock (no GPU-driven support yet)."
elif [[ "$TARGET" == rocm* ]]; then
cd ep
python3 setup.py build
cd ..
echo "[container] Copying GPU-driven .so to uccl/"
mkdir -p uccl/lib
cp ep/build/**/*.so uccl/
elif [[ "$TARGET" == cuda* ]]; then
cd ep
make clean && make -j$(nproc) all
cd ..
echo "[container] Copying GPU-driven .so to uccl/"
mkdir -p uccl/lib
cp ep/*.so uccl/
fi
}

build_eccl() {
local TARGET="$1"
local ARCH="$2"
local IS_EFA="$3"

set -euo pipefail
echo "[container] build_eccl Target: $TARGET"

cd eccl
if [[ "$TARGET" == cuda* ]]; then
echo "Skipping eccl build on Cuda."
return
elif [[ "$TARGET" == rocm* ]]; then
make clean -f Makefile.rocm && make -j$(nproc) -f Makefile.rocm
fi
cd ..

echo "[container] Copying eccl .so to uccl/"
# mkdir -p uccl/lib
# cp eccl/eccl.*.so uccl/
}

# Build (contains toolchain + CUDA/ROCm)
echo "[2/3] Building..."

export USE_TCPX="${USE_TCPX:-0}"
export MAKE_NORMAL_MODE="${MAKE_NORMAL_MODE:-}"
export FUNCTION_DEF="$(declare -f build_rccl_nccl_h build_rdma build_efa build_p2p build_ep build_eccl)"

set -euo pipefail

eval "$FUNCTION_DEF"

echo "BUILD_TYPE : ${BUILD_TYPE}"

if [[ $TARGET == "cuda" && "$ARCH" == "x86_64" ]]; then

export CUDA_HOME=/usr/local/cuda
export PATH=$PATH:$CUDA_HOME/bin

# install dependencies
apt-get install libelf-dev

# defaul to BUILD_TYPE all
build_rdma "$TARGET" "$ARCH" "$IS_EFA"
build_efa "$TARGET" "$ARCH" "$IS_EFA"
build_p2p "$TARGET" "$ARCH" "$IS_EFA"
build_ep "$TARGET" "$ARCH" "$IS_EFA"
# NOTE (yiakwy) : eccl is skpipped on CUDA platform
build_eccl "$TARGET" "$ARCH" "$IS_EFA"

else

echo "$TARGET is not supported yet."
exit 1

fi

cd $ROOT

python${PY_VER} -m build

auditwheel repair dist/uccl-*.whl --exclude "libtorch*.so" --exclude "libc10*.so" --exclude "libibverbs.so.1" --exclude "libcudart.so.12" --exclude "libamdhip64.so.*" --exclude "libcuda.so.1" -w $ROOT/${WHEEL_DIR}
auditwheel show $ROOT/${WHEEL_DIR}/*.whl

# 3. Done
echo "[3/3] Wheel built successfully (stored in ${WHEEL_DIR}):"
ls -lh "$ROOT/${WHEEL_DIR}"/uccl-*.whl || true
2 changes: 1 addition & 1 deletion collective/rdma/nccl_plugin.cc
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
#include "nccl_net.h"
#include <nccl_net.h>
#include "transport.h"
#include "transport_config.h"
#include "util_rdma.h"
Expand Down
6 changes: 3 additions & 3 deletions docker/Dockerfile.cuda
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ RUN apt-get update && \
build-essential cmake git ninja-build g++ make patchelf \
rdma-core libibverbs-dev \
libgoogle-glog-dev libgflags-dev libgtest-dev libelf-dev \
libnuma-dev libdrm-dev libdrm-amdgpu1 \
libnuma-dev \
pkg-config zlib1g-dev curl && \
\
# ───── Add Python ${PY_VER} PPA & install Python ${PY_VER} + setuptools ─────
Expand Down Expand Up @@ -47,7 +47,7 @@ RUN python${PY_VER} -m pip install --no-cache-dir --upgrade setuptools
# ───── Set Python ${PY_VER} as default python3 and python3-config ─────
RUN ln -sf /usr/bin/python${PY_VER} /usr/local/bin/python3 && \
ln -sf /usr/bin/python${PY_VER}-config /usr/local/bin/python3-config

WORKDIR /io

CMD ["bash"]
CMD ["bash"]