Skip to content
Open
14 changes: 12 additions & 2 deletions .github/workflows/main.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -201,7 +201,13 @@ jobs:
test-gpu:
needs: build-gpu-image
if: ${{ always() && needs.build-gpu-image.result == 'success' }}
runs-on: [self-hosted, gpu-local]
# Only one GPU job at a time across all workflow runs — our AWS quota
# allows a single GPU instance. Note: GitHub queues at most one pending
# job per group; a third arrival cancels the pending one.
concurrency:
group: gpu-tests
runs-on:
- "runs-on=${{ github.run_id }}/family=g4dn.xlarge/image=ubuntu24-gpu-x64/spot=true"
container:
image: ${{ needs.build-gpu-image.outputs.image }}
credentials:
Expand Down Expand Up @@ -232,7 +238,10 @@ jobs:
test-cudatoolkit:
needs: build-gpu-image
if: ${{ always() && needs.build-gpu-image.result == 'success' }}
runs-on: [self-hosted, gpu-local]
concurrency:
group: gpu-tests
runs-on:
- "runs-on=${{ github.run_id }}/family=g4dn.xlarge/image=ubuntu24-gpu-x64/spot=true"
container:
image: ${{ needs.build-gpu-image.outputs.image }}
credentials:
Expand All @@ -245,6 +254,7 @@ jobs:
TORCH_TEST: 1
TORCH_TEST_CUDA: 1
CUDA: "12.8"
TORCH_SKIP_SLOW_TESTS: 1

steps:
- name: Verify GPU access
Expand Down
6 changes: 6 additions & 0 deletions tests/testthat/helper-tensor.R
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,12 @@ skip_if_cuda_not_available <- function() {
}
}

skip_slow_tests <- function() {
if (Sys.getenv("TORCH_SKIP_SLOW_TESTS", "0") == "1") {
skip("Skipping slow test (TORCH_SKIP_SLOW_TESTS=1)")
}
}

skip_if_not_m1_mac <- function() {
if (!grepl("darwin", R.version$os)) {
skip("Not on MacOS")
Expand Down
3 changes: 2 additions & 1 deletion tests/testthat/test-autocast.R
Original file line number Diff line number Diff line change
Expand Up @@ -181,8 +181,9 @@ test_that("internal cpp_amp_check works", {
})

test_that("grad scalers work correctly", {

skip_if_cuda_not_available()
skip_slow_tests()

make_model <- function(in_size, out_size, num_layers) {
layers <- list()
Expand Down
6 changes: 4 additions & 2 deletions tests/testthat/test-cuda.R
Original file line number Diff line number Diff line change
Expand Up @@ -51,11 +51,13 @@ test_that("cuda is really available", {

test_that("cuda memory snapshot works", {
skip_if_cuda_not_available()

skip_slow_tests()

withr::defer(cuda_record_memory_history(enabled = NULL))
cuda_record_memory_history(enabled = "all", max_entries = 1e3)
x <- torch_randn(16, device="cuda")
memory <- cuda_memory_snapshot()

expect_true(class(memory) == "raw")
expect_true(length(memory) > 100)
})
Loading