Skip to content

Commit

Permalink
update build process
Browse files Browse the repository at this point in the history
  • Loading branch information
epwalsh committed Oct 30, 2024
1 parent c46ae3b commit 3cf1d75
Show file tree
Hide file tree
Showing 2 changed files with 26 additions and 7 deletions.
10 changes: 7 additions & 3 deletions Makefile
Original file line number Diff line number Diff line change
@@ -1,11 +1,13 @@
# NOTE: make sure CUDA versions match across these variables
BASE_IMAGE = ghcr.io/allenai/pytorch:2.5.1-cuda12.1-python3.11-v2024.10.29
CUDA_TOOLKIT_VERSION = 12.1.0
TORCH_CUDA_VERSION = 121

# NOTE: when upgrading the nightly version you also need to upgrade the torch version specification
# in 'pyproject.toml' to include that nightly version.
NIGHTLY_VERSION = "2.6.0.dev20241009+cu121 --index-url https://download.pytorch.org/whl/nightly/cu121"
TORCHAO_VERSION = "torchao==0.5.0 --extra-index-url https://download.pytorch.org/whl/cu121"
NIGHTLY_VERSION = "2.6.0.dev20241009+cu121"
TORCHAO_VERSION = "torchao==0.5.0"
MEGABLOCKS_VERSION = "megablocks[gg] @ git+https://[email protected]/epwalsh/megablocks.git@epwalsh/deps"
CUDA_TOOLKIT_VERSION = 12.1.0

VERSION = $(shell python src/olmo_core/version.py)
VERSION_SHORT = $(shell python src/olmo_core/version.py short)
Expand Down Expand Up @@ -49,6 +51,7 @@ stable-image :
--build-arg BUILDKIT_INLINE_CACHE=1 \
--build-arg BASE=$(BASE_IMAGE) \
--build-arg CUDA_TOOLKIT_VERSION=$(CUDA_TOOLKIT_VERSION) \
--build-arg TORCH_CUDA_VERSION=$(TORCH_CUDA_VERSION) \
--build-arg MEGABLOCKS_VERSION=$(MEGABLOCKS_VERSION) \
--build-arg TORCHAO_VERSION=$(TORCHAO_VERSION) \
--target stable \
Expand All @@ -62,6 +65,7 @@ nightly-image :
--build-arg BUILDKIT_INLINE_CACHE=1 \
--build-arg BASE=$(BASE_IMAGE) \
--build-arg CUDA_TOOLKIT_VERSION=$(CUDA_TOOLKIT_VERSION) \
--build-arg TORCH_CUDA_VERSION=$(TORCH_CUDA_VERSION) \
--build-arg MEGABLOCKS_VERSION=$(MEGABLOCKS_VERSION) \
--build-arg TORCHAO_VERSION=$(TORCHAO_VERSION) \
--build-arg NIGHTLY_VERSION=$(NIGHTLY_VERSION) \
Expand Down
23 changes: 19 additions & 4 deletions src/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -13,22 +13,33 @@ WORKDIR /app/build
ARG CUDA_TOOLKIT_VERSION
RUN conda install -y -c nvidia cuda-toolkit==${CUDA_TOOLKIT_VERSION}

ARG TORCH_CUDA_VERSION

# Build megablocks and grouped-gemm.
ENV TORCH_CUDA_ARCH_LIST="8.0 9.0"
ENV GROUPED_GEMM_CUTLASS=1
ARG MEGABLOCKS_VERSION
RUN pip wheel --no-build-isolation --no-cache-dir "${MEGABLOCKS_VERSION}" \
&& rm -rf torch-*.whl numpy-*.whl triton-*.whl
RUN pip wheel --no-build-isolation --no-cache-dir \
--extra-index-url https://download.pytorch.org/whl/${TORCH_CUDA_VERSION} \
"${MEGABLOCKS_VERSION}" \
&& rm -rf torch-*.whl numpy-*.whl triton-*.whl nvidia*.whl

# Flash-attn from pre-built wheel (can't get this to work at the moment)
#RUN wget https://github.com/Dao-AILab/flash-attention/releases/download/v2.6.3/flash_attn-2.6.3+cu123torch2.4cxx11abiTRUE-cp311-cp311-linux_x86_64.whl

#########################################################################
# Stable image
#########################################################################

FROM ${BASE} as stable

ARG TORCH_CUDA_VERSION

# Install torchao.
ARG TORCHAO_VERSION
RUN pip install --no-cache-dir ${TORCHAO_VERSION}
RUN pip install --no-cache-dir \
--extra-index-url https://download.pytorch.org/whl/${TORCH_CUDA_VERSION} \
${TORCHAO_VERSION}

# Copy and install wheels from build image.
COPY --from=build /app/build /app/build
Expand All @@ -50,5 +61,9 @@ WORKDIR /app/olmo-core

FROM stable as nightly

ARG TORCH_CUDA_VERSION

ARG NIGHTLY_VERSION
RUN pip install --no-cache-dir --pre torch==${NIGHTLY_VERSION}
RUN pip install --no-cache-dir --pre \
--index-url https://download.pytorch.org/whl/nightly/${TORCH_CUDA_VERSION} \
torch==${NIGHTLY_VERSION}

0 comments on commit 3cf1d75

Please sign in to comment.