filecoin-project · galargh · Aug 20, 2025 · Jun 28, 2025 · Jun 28, 2025 · Jul 7, 2025
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -1,8 +1,12 @@
 name: CI
 
-on: [pull_request, push]
+on:
+  pull_request:
+  push:
+    branches:
+      - master
 
-# Cancel a job if there's a new on on the same branch started.
+# Cancel a job if there's a new one on the same branch started.
 # Based on https://stackoverflow.com/questions/58895283/stop-already-running-workflow-job-in-github-actions/67223051#67223051
 concurrency:
   group: ${{ github.ref }}
@@ -14,8 +18,7 @@ env:
   # Faster crates.io index checkout.
   CARGO_REGISTRIES_CRATES_IO_PROTOCOL: sparse
   RUST_LOG: debug
-  # Build the kernel only for the single architecture . This should reduce
-  # the overall compile-time significantly.
+  # Build the kernel only for the single architecture. This should reduce the overall compile-time significantly.
   EC_GPU_CUDA_NVCC_ARGS: --fatbin --gpu-architecture=sm_75 --generate-code=arch=compute_75,code=sm_75
   BELLMAN_CUDA_NVCC_ARGS: --fatbin --gpu-architecture=sm_75 --generate-code=arch=compute_75,code=sm_75
   NEPTUNE_CUDA_NVCC_ARGS: --fatbin --gpu-architecture=sm_75 --generate-code=arch=compute_75,code=sm_75
@@ -27,7 +30,9 @@ jobs:
     steps:
       - uses: actions/checkout@v4
       - name: Install required packages
-        run: sudo apt install --no-install-recommends --yes libhwloc-dev nvidia-cuda-toolkit ocl-icd-opencl-dev
+        run: |
+          sudo apt-get update
+          sudo apt-get install --no-install-recommends --yes libhwloc-dev nvidia-cuda-toolkit ocl-icd-opencl-dev
       - name: Install cargo clippy
         run: rustup component add clippy
       - name: Run cargo clippy
@@ -44,13 +49,31 @@ jobs:
         run: cargo fmt --all -- --check
 
   test:
-    runs-on: ubuntu-24.04
+    runs-on: ['self-hosted', 'linux', 'x64', '2xlarge+gpu']
     name: Test
     steps:
       - uses: actions/checkout@v4
+      # TODO: Move the driver installation to the AMI.
+      # https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/install-nvidia-driver.html
+      # https://www.nvidia.com/en-us/drivers/
+      - name: Install CUDA drivers
+        run: |
+          curl -L --fail -o nvidia-driver-local-repo-ubuntu2404-570.148.08_1.0-1_amd64.deb https://us.download.nvidia.com/tesla/570.148.08/nvidia-driver-local-repo-ubuntu2404-570.148.08_1.0-1_amd64.deb
+          echo "26188e02a028874c653a6072666fd267d597a3fd3db67cdfb66b1398626a512f" nvidia-driver-local-repo-ubuntu2404-570.148.08_1.0-1_amd64.deb | sha256sum --check
+          sudo dpkg -i nvidia-driver-local-repo-ubuntu2404-570.148.08_1.0-1_amd64.deb
+          sudo cp /var/nvidia-driver-local-repo-ubuntu2404-570.148.08/nvidia-driver-local-*-keyring.gpg /usr/share/keyrings/
+          sudo apt-get update
+          sudo apt-get install --no-install-recommends --yes cuda-drivers
+          rm nvidia-driver-local-repo-ubuntu2404-570.148.08_1.0-1_amd64.deb
       - name: Install required packages
-        run: sudo apt install --no-install-recommends --yes libhwloc-dev nvidia-cuda-toolkit ocl-icd-opencl-dev
-      # In case no GPUs are available, it's using the CPU fallback.
+        run: |
+          sudo apt-get update
+          sudo apt-get install --no-install-recommends --yes libhwloc-dev nvidia-cuda-toolkit ocl-icd-opencl-dev
+      # TODO: Remove this and other rust installation directives from jobs running
+      # on self-hosted runners once rust is available on these machines by default
+      - uses: dtolnay/rust-toolchain@21dc36fb71dd22e3317045c0c31a3f4249868b17
+        with:
+          toolchain: 1.83
       - name: Test
         run: cargo test --verbose