-
Notifications
You must be signed in to change notification settings - Fork 9
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add support for MoE models with megablocks (#60)
- Loading branch information
Showing
35 changed files
with
1,281 additions
and
205 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -109,10 +109,27 @@ jobs: | |
matrix: | ||
task: | ||
- name: Test (GPU) | ||
run: pytest -v --color=yes --durations=3 -m gpu src/test/ --ignore-glob='src/test/distributed/checkpoint*' | ||
image: olmo-core | ||
gpus: 2 | ||
run: | | ||
pytest -v --color=yes --durations=3 -m gpu \ | ||
--ignore-glob='src/test/distributed/checkpoint*' \ | ||
--ignore-glob='src/test/nn/moe*' \ | ||
src/test/ | ||
- name: Test checkpoint (GPU) | ||
run: pytest -v --color=yes --durations=3 -m gpu src/test/distributed/checkpoint* | ||
image: olmo-core | ||
gpus: 2 | ||
run: | | ||
pytest -v --color=yes --durations=3 -m gpu \ | ||
src/test/distributed/checkpoint* | ||
- name: Test MoE (GPU) | ||
image: olmo-core-nightly | ||
gpus: 1 | ||
run: | | ||
pytest -v --color=yes --durations=3 -m gpu \ | ||
src/test/nn/moe* | ||
steps: | ||
- uses: actions/checkout@v3 | ||
|
||
|
@@ -142,7 +159,7 @@ jobs: | |
- name: Get full image name | ||
if: env.BEAKER_TOKEN != '' | ||
run: | ||
echo "BEAKER_IMAGE=$(make get-full-beaker-image-name)" >> $GITHUB_ENV | ||
echo "BEAKER_IMAGE=$(make get-full-beaker-image-name IMAGE_NAME=${{ matrix.task.image }})" >> $GITHUB_ENV | ||
|
||
- name: GPU Tests | ||
uses: allenai/[email protected] | ||
|
@@ -160,7 +177,7 @@ jobs: | |
priority: low | ||
preemptible: true | ||
resources: | ||
gpuCount: 2 | ||
gpuCount: ${{ matrix.task.gpus }} | ||
constraints: | ||
cluster: | ||
- ai2/allennlp-cirrascale | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,8 +1,11 @@ | ||
BASE_IMAGE = ghcr.io/allenai/pytorch:2.4.1-cuda12.1-python3.11 | ||
|
||
# NOTE: when upgrading the nightly version you also need to upgrade the torch version specification | ||
# in 'pyproject.toml' to include that nightly version. | ||
NIGHTLY_VERSION = "2.6.0.dev20241009+cu121 --index-url https://download.pytorch.org/whl/nightly/cu121" | ||
TORCHAO_VERSION = "0.5.0 --extra-index-url https://download.pytorch.org/whl/cu121" | ||
TORCHAO_VERSION = "torchao==0.5.0 --extra-index-url https://download.pytorch.org/whl/cu121" | ||
MEGABLOCKS_VERSION = "megablocks[gg] @ git+https://[email protected]/epwalsh/megablocks.git@epwalsh/deps" | ||
CUDA_TOOLKIT_VERSION = 12.1.0 | ||
|
||
VERSION = $(shell python src/olmo_core/version.py) | ||
VERSION_SHORT = $(shell python src/olmo_core/version.py short) | ||
|
@@ -45,25 +48,33 @@ stable-image : | |
docker build -f src/Dockerfile \ | ||
--build-arg BUILDKIT_INLINE_CACHE=1 \ | ||
--build-arg BASE=$(BASE_IMAGE) \ | ||
--build-arg CUDA_TOOLKIT_VERSION=$(CUDA_TOOLKIT_VERSION) \ | ||
--build-arg MEGABLOCKS_VERSION=$(MEGABLOCKS_VERSION) \ | ||
--build-arg TORCHAO_VERSION=$(TORCHAO_VERSION) \ | ||
--target stable \ | ||
--progress plain \ | ||
-t $(IMAGE_BASENAME) . | ||
|
||
.PHONY : beaker-image-stable | ||
beaker-image-stable : stable-image | ||
./src/scripts/beaker/create_beaker_image.sh $(IMAGE_BASENAME) $(IMAGE_BASENAME) $(BEAKER_WORKSPACE) | ||
./src/scripts/beaker/create_beaker_image.sh $(IMAGE_BASENAME) $(IMAGE_BASENAME)-v$(VERSION_SHORT) $(BEAKER_WORKSPACE) | ||
./src/scripts/beaker/create_beaker_image.sh $(IMAGE_BASENAME) $(IMAGE_BASENAME)-v$(VERSION) $(BEAKER_WORKSPACE) | ||
echo "Built image '$(IMAGE_BASENAME)', size: $$(docker inspect -f '{{ .Size }}' $(IMAGE_BASENAME) | numfmt --to=si)" | ||
|
||
.PHONY : nightly-image | ||
nightly-image : | ||
docker build -f src/Dockerfile \ | ||
--build-arg BUILDKIT_INLINE_CACHE=1 \ | ||
--build-arg BASE=$(BASE_IMAGE) \ | ||
--build-arg CUDA_TOOLKIT_VERSION=$(CUDA_TOOLKIT_VERSION) \ | ||
--build-arg MEGABLOCKS_VERSION=$(MEGABLOCKS_VERSION) \ | ||
--build-arg TORCHAO_VERSION=$(TORCHAO_VERSION) \ | ||
--build-arg NIGHTLY_VERSION=$(NIGHTLY_VERSION) \ | ||
--target nightly \ | ||
--progress plain \ | ||
-t $(IMAGE_BASENAME)-nightly . | ||
echo "Built image '$(IMAGE_BASENAME)-nightly', size: $$(docker inspect -f '{{ .Size }}' $(IMAGE_BASENAME)-nightly | numfmt --to=si)" | ||
|
||
.PHONY : beaker-image-stable | ||
beaker-image-stable : stable-image | ||
./src/scripts/beaker/create_beaker_image.sh $(IMAGE_BASENAME) $(IMAGE_BASENAME) $(BEAKER_WORKSPACE) | ||
./src/scripts/beaker/create_beaker_image.sh $(IMAGE_BASENAME) $(IMAGE_BASENAME)-v$(VERSION_SHORT) $(BEAKER_WORKSPACE) | ||
./src/scripts/beaker/create_beaker_image.sh $(IMAGE_BASENAME) $(IMAGE_BASENAME)-v$(VERSION) $(BEAKER_WORKSPACE) | ||
|
||
.PHONY : beaker-image-nightly | ||
beaker-image-nightly : nightly-image | ||
|
@@ -77,4 +88,4 @@ get-beaker-workspace : | |
|
||
.PHONY : get-full-beaker-image-name | ||
get-full-beaker-image-name : | ||
@./src/scripts/beaker/get_full_image_name.sh $(IMAGE_BASENAME) $(BEAKER_WORKSPACE) | ||
@./src/scripts/beaker/get_full_image_name.sh $(IMAGE_NAME) $(BEAKER_WORKSPACE) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
``nn.attention`` | ||
================ | ||
|
||
.. automodule:: olmo_core.nn.attention | ||
:members: | ||
:member-order: bysource |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
``nn.feed_forward`` | ||
=================== | ||
|
||
.. automodule:: olmo_core.nn.feed_forward | ||
:members: | ||
:member-order: bysource |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
``nn.layer_norm`` | ||
================= | ||
|
||
.. automodule:: olmo_core.nn.layer_norm | ||
:members: | ||
:member-order: bysource |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
``nn.moe`` | ||
========== | ||
|
||
.. automodule:: olmo_core.nn.moe | ||
:members: | ||
:member-order: bysource |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
``nn.rope`` | ||
=========== | ||
|
||
.. automodule:: olmo_core.nn.rope | ||
:members: | ||
:member-order: bysource |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,22 @@ | ||
from typing import TypeVar | ||
|
||
T = TypeVar("T") | ||
|
||
|
||
def beta_feature(f: T) -> T: | ||
""" | ||
Mark a class or function as a beta feature. | ||
""" | ||
if f.__doc__ is None: | ||
f.__doc__ = "" | ||
|
||
f.__doc__ += """ | ||
.. warning:: | ||
This is a beta feature! The API is subject to change even with minor and patch releases. | ||
If you choose to use this feature please read the `CHANGELOG <https://github.com/allenai/OLMo-core/blob/main/CHANGELOG.md>`_ | ||
before upgrading your version of this library. | ||
""" | ||
|
||
return f |
Oops, something went wrong.