Skip to content

Commit 823a63f

Browse files
Merge pull request #2496 from AI-Hypercomputer:tests_improved
PiperOrigin-RevId: 823644326
2 parents 58dba0a + e7b334a commit 823a63f

File tree

4 files changed

+32
-9
lines changed

4 files changed

+32
-9
lines changed

.github/workflows/RunTests.yml

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -65,17 +65,22 @@ jobs:
6565

6666
cpu_unit_tests:
6767
needs: tpu_image
68+
strategy:
69+
fail-fast: false
70+
matrix:
71+
worker_group: [1, 2, 3, 4]
6872
uses: ./.github/workflows/run_tests_internal.yml
6973
with:
7074
device_type: cpu
7175
device_name: X64
72-
cloud_runner: linux-x86-n2-16
7376
image_type: tpu
7477
pytest_marker: 'cpu_only'
7578
xla_python_client_mem_fraction: 0.75
7679
tf_force_gpu_allow_growth: false
7780
container_resource_option: "--privileged"
7881
is_scheduled_run: ${{ github.event_name == 'schedule' }}
82+
worker_group: ${{ matrix.worker_group }}
83+
total_workers: 4
7984

8085
tpu_unit_tests:
8186
needs: tpu_image

.github/workflows/build_and_test_maxtext.yml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,7 @@ jobs:
5151
fail-fast: false # don't cancel all jobs on failure
5252
matrix:
5353
image_type: ["py312"]
54+
worker_group: [1, 2, 3, 4]
5455
with:
5556
device_type: cpu
5657
device_name: X64
@@ -61,6 +62,8 @@ jobs:
6162
tf_force_gpu_allow_growth: false
6263
container_resource_option: "--privileged"
6364
is_scheduled_run: ${{ github.event_name == 'schedule' }}
65+
worker_group: ${{ matrix.worker_group }}
66+
total_workers: 4
6467

6568
maxtext_tpu_unit_tests:
6669
needs: build_and_upload_maxtext_package

.github/workflows/run_tests_against_package.yml

Lines changed: 11 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -31,10 +31,6 @@ on:
3131
pytest_marker:
3232
required: true
3333
type: string
34-
pytest_addopts:
35-
required: false
36-
type: string
37-
default: ''
3834
is_scheduled_run:
3935
required: true
4036
type: string
@@ -50,12 +46,20 @@ on:
5046
cloud_runner:
5147
required: false
5248
type: string
49+
worker_group:
50+
required: false
51+
type: number
52+
default: 1
53+
total_workers:
54+
required: false
55+
type: number
56+
default: 1
5357

5458
permissions:
5559
contents: read
5660
jobs:
5761
run:
58-
runs-on: ${{ inputs.cloud_runner }}
62+
runs-on: ${{ inputs.cloud_runner != '' && inputs.cloud_runner || fromJson(format('["self-hosted", "{0}", "{1}"]', inputs.device_type, inputs.device_name)) }}
5963
container:
6064
image: gcr.io/tpu-prod-env-multipod/maxtext-unit-test-${{ inputs.device_type == 'cpu' && 'tpu' || inputs.device_type }}:${{ inputs.image_type != '' && inputs.image_type }}
6165
env:
@@ -102,4 +106,5 @@ jobs:
102106
export LIBTPU_INIT_ARGS='--xla_tpu_scoped_vmem_limit_kib=65536'
103107
fi
104108
# TODO: Fix the skipped tests and remove the deselect flags
105-
.venv/bin/python3 -m pytest ${{ inputs.pytest_addopts }} -v -m "${FINAL_PYTEST_MARKER}" --durations=0 --deselect "tests/aot_hlo_identical_test.py::AotHloIdenticalTest::test_default_hlo_match" --deselect "tests/tokenizer_test.py::TokenizerTest::test_detokenize"
109+
[ "${{ inputs.total_workers }}" -gt 1 ] && .venv/bin/python3 -m pip install --quiet pytest-split && SPLIT_ARGS="--splits ${{ inputs.total_workers }} --group ${{ inputs.worker_group }}" || SPLIT_ARGS=""
110+
.venv/bin/python3 -m pytest -v -m "${FINAL_PYTEST_MARKER}" --durations=0 --deselect "tests/aot_hlo_identical_test.py::AotHloIdenticalTest::test_default_hlo_match" --deselect "tests/tokenizer_test.py::TokenizerTest::test_detokenize" $SPLIT_ARGS

.github/workflows/run_tests_internal.yml

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,14 @@ on:
5050
cloud_runner:
5151
required: false
5252
type: string
53+
worker_group:
54+
required: false
55+
type: number
56+
default: 1
57+
total_workers:
58+
required: false
59+
type: number
60+
default: 1
5361

5462
jobs:
5563
run:
@@ -70,5 +78,7 @@ jobs:
7078
else
7179
FINAL_PYTEST_MARKER="${{ inputs.pytest_marker }} and not scheduled_only"
7280
fi
73-
python3 -m pip install -e . --no-dependencies &&
74-
LIBTPU_INIT_ARGS='--xla_tpu_scoped_vmem_limit_kib=65536' python3 -m pytest ${{ inputs.pytest_addopts }} -v -m "${FINAL_PYTEST_MARKER}" --durations=0
81+
python3 -m pip install -e . --no-dependencies
82+
[ "${{ inputs.total_workers }}" -gt 1 ] && python3 -m pip install --quiet pytest-split && SPLIT_ARGS="--splits ${{ inputs.total_workers }} --group ${{ inputs.worker_group }}" || SPLIT_ARGS=""
83+
export LIBTPU_INIT_ARGS='--xla_tpu_scoped_vmem_limit_kib=65536'
84+
python3 -m pytest ${{ inputs.pytest_addopts }} -v -m "${FINAL_PYTEST_MARKER}" --durations=0 $SPLIT_ARGS

0 commit comments

Comments
 (0)