diff --git a/.github/workflows/build-docker-images-release.yml b/.github/workflows/build-docker-images-release.yml index 2b4ddbedefd..3a5c5052fac 100644 --- a/.github/workflows/build-docker-images-release.yml +++ b/.github/workflows/build-docker-images-release.yml @@ -82,3 +82,23 @@ jobs: push: true tags: huggingface/accelerate:gpu-deepspeed-release-${{needs.get-version.outputs.version}} + version-cuda-fp8-transformerengine: + name: "Latest Accelerate GPU FP8 TransformerEngine [version]" + runs-on: + group: aws-g6-4xlarge-plus + needs: get-version + steps: + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v2 + - name: Login to DockerHub + uses: docker/login-action@v2 + with: + username: ${{ secrets.DOCKERHUB_USERNAME }} + password: ${{ secrets.DOCKERHUB_PASSWORD }} + + - name: Build and Push GPU + uses: docker/build-push-action@v4 + with: + file: docker/accelerate-gpu/Dockerfile + push: true + tags: huggingface/accelerate:gpu-fp8-transformerengine-release-${{needs.get-version.outputs.version}} \ No newline at end of file diff --git a/.github/workflows/build_docker_images.yml b/.github/workflows/build_docker_images.yml index 50020fd8d00..2050bbba714 100644 --- a/.github/workflows/build_docker_images.yml +++ b/.github/workflows/build_docker_images.yml @@ -86,3 +86,25 @@ jobs: huggingface/accelerate:gpu-deepspeed-nightly huggingface/accelerate:gpu-deepspeed-nightly-${{ env.date }} + latest-cuda-fp8-transformerengine: + name: "Latest Accelerate GPU FP8 TransformerEngine [dev]" + runs-on: + group: aws-g6-4xlarge-plus + steps: + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v2 + - name: Login to DockerHub + uses: docker/login-action@v2 + with: + username: ${{ secrets.DOCKERHUB_USERNAME }} + password: ${{ secrets.DOCKERHUB_PASSWORD }} + - name: Get current date + id: date + run: | + echo "date=$(date '+%Y-%m-%d')" >> $GITHUB_ENV + - name: Build and Push GPU + uses: docker/build-push-action@v4 + with: + file: benchmarks/fp8/Dockerfile + push: true + tags: huggingface/accelerate:gpu-fp8-transformerengine-nightly-${{ env.date }} \ No newline at end of file diff --git a/benchmarks/fp8/README.md b/benchmarks/fp8/README.md index a91336ac956..71fb42dbdc1 100644 --- a/benchmarks/fp8/README.md +++ b/benchmarks/fp8/README.md @@ -15,6 +15,8 @@ To run them, it's recommended to use a docker image (see the attached `Dockerfil ## Running: +There are official Docker images located at `huggingface/accelerate:gpu-fp8-transformerengine-nightly` which can be used. + You can run all scripts using the core `accelerate launch` command without any `accelerate config` being needed. For single GPU, run it via `python`: diff --git a/docker/README.md b/docker/README.md index 6cd5efbc4c1..2e19e6caa4a 100644 --- a/docker/README.md +++ b/docker/README.md @@ -33,6 +33,7 @@ huggingface/accelerate:{accelerator}-{nightly,release} * `cpu`: Comes compiled off of `python:3.9-slim` and is designed for non-CUDA based workloads. * More to come soon * `gpu-deepspeed`: Comes compiled off of the `nvidia/cuda` image and includes core parts like `bitsandbytes` as well as the latest `deepspeed` version. Runs off python 3.10. +* `gpu-fp8-transformerengine`: Comes compiled off of `nvcr.io/nvidia/pytorch` and is specifically for running the `benchmarks/fp8` scripts on devices which support FP8 operations using the `TransformerEngine` library (RTX 4090, H100, etc) ## Nightlies vs Releases