Skip to content

Commit 02fa028

Browse files
authored
Merge pull request #108 from filecoin-project/galargh-patch-1
ci: test gpu on self-hosted runners
2 parents 0e64ce8 + c1a5959 commit 02fa028

File tree

1 file changed

+31
-8
lines changed

1 file changed

+31
-8
lines changed

.github/workflows/ci.yml

Lines changed: 31 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,12 @@
11
name: CI
22

3-
on: [pull_request, push]
3+
on:
4+
pull_request:
5+
push:
6+
branches:
7+
- master
48

5-
# Cancel a job if there's a new on on the same branch started.
9+
# Cancel a job if there's a new one on the same branch started.
610
# Based on https://stackoverflow.com/questions/58895283/stop-already-running-workflow-job-in-github-actions/67223051#67223051
711
concurrency:
812
group: ${{ github.ref }}
@@ -14,8 +18,7 @@ env:
1418
# Faster crates.io index checkout.
1519
CARGO_REGISTRIES_CRATES_IO_PROTOCOL: sparse
1620
RUST_LOG: debug
17-
# Build the kernel only for the single architecture . This should reduce
18-
# the overall compile-time significantly.
21+
# Build the kernel only for the single architecture. This should reduce the overall compile-time significantly.
1922
EC_GPU_CUDA_NVCC_ARGS: --fatbin --gpu-architecture=sm_75 --generate-code=arch=compute_75,code=sm_75
2023
BELLMAN_CUDA_NVCC_ARGS: --fatbin --gpu-architecture=sm_75 --generate-code=arch=compute_75,code=sm_75
2124
NEPTUNE_CUDA_NVCC_ARGS: --fatbin --gpu-architecture=sm_75 --generate-code=arch=compute_75,code=sm_75
@@ -27,7 +30,9 @@ jobs:
2730
steps:
2831
- uses: actions/checkout@v4
2932
- name: Install required packages
30-
run: sudo apt install --no-install-recommends --yes libhwloc-dev nvidia-cuda-toolkit ocl-icd-opencl-dev
33+
run: |
34+
sudo apt-get update
35+
sudo apt-get install --no-install-recommends --yes libhwloc-dev nvidia-cuda-toolkit ocl-icd-opencl-dev
3136
- name: Install cargo clippy
3237
run: rustup component add clippy
3338
- name: Run cargo clippy
@@ -44,13 +49,31 @@ jobs:
4449
run: cargo fmt --all -- --check
4550

4651
test:
47-
runs-on: ubuntu-24.04
52+
runs-on: ['self-hosted', 'linux', 'x64', '2xlarge+gpu']
4853
name: Test
4954
steps:
5055
- uses: actions/checkout@v4
56+
# TODO: Move the driver installation to the AMI.
57+
# https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/install-nvidia-driver.html
58+
# https://www.nvidia.com/en-us/drivers/
59+
- name: Install CUDA drivers
60+
run: |
61+
curl -L --fail -o nvidia-driver-local-repo-ubuntu2404-570.148.08_1.0-1_amd64.deb https://us.download.nvidia.com/tesla/570.148.08/nvidia-driver-local-repo-ubuntu2404-570.148.08_1.0-1_amd64.deb
62+
echo "26188e02a028874c653a6072666fd267d597a3fd3db67cdfb66b1398626a512f" nvidia-driver-local-repo-ubuntu2404-570.148.08_1.0-1_amd64.deb | sha256sum --check
63+
sudo dpkg -i nvidia-driver-local-repo-ubuntu2404-570.148.08_1.0-1_amd64.deb
64+
sudo cp /var/nvidia-driver-local-repo-ubuntu2404-570.148.08/nvidia-driver-local-*-keyring.gpg /usr/share/keyrings/
65+
sudo apt-get update
66+
sudo apt-get install --no-install-recommends --yes cuda-drivers
67+
rm nvidia-driver-local-repo-ubuntu2404-570.148.08_1.0-1_amd64.deb
5168
- name: Install required packages
52-
run: sudo apt install --no-install-recommends --yes libhwloc-dev nvidia-cuda-toolkit ocl-icd-opencl-dev
53-
# In case no GPUs are available, it's using the CPU fallback.
69+
run: |
70+
sudo apt-get update
71+
sudo apt-get install --no-install-recommends --yes libhwloc-dev nvidia-cuda-toolkit ocl-icd-opencl-dev
72+
# TODO: Remove this and other rust installation directives from jobs running
73+
# on self-hosted runners once rust is available on these machines by default
74+
- uses: dtolnay/rust-toolchain@21dc36fb71dd22e3317045c0c31a3f4249868b17
75+
with:
76+
toolchain: 1.83
5477
- name: Test
5578
run: cargo test --verbose
5679

0 commit comments

Comments
 (0)