(GitHub Actions) Build All, Run NoGPU #34

Workflow file for this run

.github/workflows/test-ga-nogpu.yml at a91f711

	# GitHub Actions hosted runners workflow
	# Builds all compiler/MPI combinations, runs only nogpu configurations
	#
	name: (GitHub Actions) Build All, Run NoGPU

	on:
	workflow_dispatch:
	inputs:
	mpas-repository:
	description: 'MPAS source repo (e.g. MPAS-Dev/MPAS-Model). Leave empty to use this repo.'
	required: false
	default: ''
	mpas-ref:
	description: 'Git ref (branch, tag, or SHA) in the MPAS source repo'
	required: false
	default: ''
	branches:
	- master
	- develop

	# Container image pattern: ncarcisl/cisldev-x86_64-almalinux9-{compiler}-{mpi}:devel
	# MPI flags: openmpi needs --allow-run-as-root --oversubscribe (handled by run-mpas)

	jobs:
	#===========================================================================
	# BUILD: All compiler/MPI combinations (nogpu only for GitHub runners)
	#===========================================================================
	build:
	strategy:
	fail-fast: false
	matrix:
	compiler: [gcc, nvhpc, oneapi]
	mpi: [mpich3, mpich, openmpi]
	io: [smiol, pio]

	name: Build (${{ matrix.compiler }}, ${{ matrix.mpi }}, ${{ matrix.io }})
	runs-on: ubuntu-latest
	container:
	image: ncarcisl/cisldev-x86_64-almalinux9-${{ matrix.compiler }}-${{ matrix.mpi }}:devel

	steps:
	- uses: actions/checkout@v4
	with:
	repository: ${{ inputs.mpas-repository \|\| github.repository }}
	ref: ${{ inputs.mpas-ref \|\| '' }}
	submodules: 'true'

	- uses: actions/checkout@v4
	if: ${{ inputs.mpas-repository != '' }}
	with:
	path: _ci
	sparse-checkout: .github

	- name: Overlay CI infrastructure
	if: ${{ inputs.mpas-repository != '' }}
	shell: bash
	run: cp -r _ci/.github . && rm -rf _ci

	- name: Build MPAS-A
	uses: ./.github/actions/build-mpas
	with:
	compiler: ${{ matrix.compiler }}
	use-pio: ${{ matrix.io == 'pio' }}

	- name: Upload executable
	uses: actions/upload-artifact@v4
	with:
	name: exe-${{ matrix.compiler }}-${{ matrix.mpi }}-nogpu-${{ matrix.io }}
	path: atmosphere_model
	retention-days: 1

	#===========================================================================
	# RUN: NoGPU configurations, 1 and 4 MPI ranks
	#===========================================================================
	run:
	needs: build
	if: always() && needs.build.result != 'cancelled'
	strategy:
	fail-fast: false
	matrix:
	num-procs: [1, 4]
	compiler: [gcc, nvhpc, oneapi]
	mpi: [mpich3, mpich, openmpi]
	io: [smiol, pio]

	name: Run ${{ matrix.num-procs }}proc (${{ matrix.compiler }}, ${{ matrix.mpi }}, ${{ matrix.io }})
	runs-on: ubuntu-latest
	container:
	image: ncarcisl/cisldev-x86_64-almalinux9-${{ matrix.compiler }}-${{ matrix.mpi }}:devel

	steps:
	- uses: actions/checkout@v4

	- name: Download executable
	id: download
	uses: actions/download-artifact@v4
	continue-on-error: true
	with:
	name: exe-${{ matrix.compiler }}-${{ matrix.mpi }}-nogpu-${{ matrix.io }}

	- name: Run MPAS-A
	if: steps.download.outcome == 'success'
	uses: ./.github/actions/run-mpas
	with:
	executable: ./atmosphere_model
	num-procs: ${{ matrix.num-procs }}
	mpi-impl: ${{ matrix.mpi != 'openmpi' && 'mpich' \|\| 'openmpi' }}
	working-dir: run-${{ matrix.num-procs }}proc-${{ matrix.compiler }}-${{ matrix.mpi }}-${{ matrix.io }}

	- name: Upload logs
	uses: actions/upload-artifact@v4
	if: always() && steps.download.outcome == 'success'
	with:
	name: logs-${{ matrix.num-procs }}proc-${{ matrix.compiler }}-${{ matrix.mpi }}-nogpu-${{ matrix.io }}
	path: run-${{ matrix.num-procs }}proc-${{ matrix.compiler }}-${{ matrix.mpi }}-${{ matrix.io }}/log.*
	retention-days: 5

	#===========================================================================
	# VALIDATE: Compare logs against reference and across MPI ranks
	#===========================================================================
	validate:
	needs: run
	if: always()
	runs-on: ubuntu-latest
	name: Validate Results

	steps:
	- uses: actions/checkout@v4
	with:
	sparse-checkout: .github

	- uses: actions/setup-python@v5
	with:
	python-version: '3.11'

	- name: Download all logs
	uses: actions/download-artifact@v4
	with:
	pattern: logs-*
	path: logs

	- name: Generate expected config lists
	id: expected
	shell: bash
	run: \|
	COMPILERS=(gcc nvhpc oneapi)
	MPIS=(mpich3 mpich openmpi)
	IOS=(smiol pio)

	REF_LIST=""
	DECOMP_LIST=""
	for compiler in "${COMPILERS[@]}"; do
	for mpi in "${MPIS[@]}"; do
	for io in "${IOS[@]}"; do
	REF_LIST="${REF_LIST:+${REF_LIST},}logs-1proc-${compiler}-${mpi}-nogpu-${io}"
	DECOMP_LIST="${DECOMP_LIST:+${DECOMP_LIST},}4proc vs 1proc: ${compiler}-${mpi}-nogpu-${io}"
	done
	done
	done

	echo "ref=${REF_LIST}" >> $GITHUB_OUTPUT
	echo "decomp=${DECOMP_LIST}" >> $GITHUB_OUTPUT

	- name: Validate 1-proc logs against reference
	uses: ./.github/actions/validate-logs
	with:
	logs-path: logs
	log-filter: 1proc
	reference-log: .github/test-cases/240km/reference_log.atmosphere.0000.out
	expected-configs: ${{ steps.expected.outputs.ref }}

	- name: Decomposition test — compare 4-proc logs against 1-proc logs
	shell: bash
	run: \|
	python .github/actions/validate-logs/compare_logs.py \
	logs \
	--decomposition-test \
	--allow-missing \
	--summary-file "$GITHUB_STEP_SUMMARY" \
	--expected "${{ steps.expected.outputs.decomp }}"

	#===========================================================================
	# ECT RUN: 3 perturbed ensemble members per matrix combination (120km)
	#===========================================================================
	ect-run:
	needs: build
	if: always() && needs.build.result != 'cancelled'
	strategy:
	fail-fast: false
	matrix:
	compiler: [gcc, nvhpc, oneapi]
	mpi: [mpich3, mpich, openmpi]
	io: [smiol, pio]
	num-procs: [1, 4]

	name: ECT ${{ matrix.num-procs }}proc (${{ matrix.compiler }}, ${{ matrix.mpi }}, ${{ matrix.io }})
	runs-on: ubuntu-latest
	container:
	image: ncarcisl/cisldev-x86_64-almalinux9-${{ matrix.compiler }}-${{ matrix.mpi }}:devel

	steps:
	- uses: actions/checkout@v4

	- name: Download executable
	id: download
	uses: actions/download-artifact@v4
	continue-on-error: true
	with:
	name: exe-${{ matrix.compiler }}-${{ matrix.mpi }}-nogpu-${{ matrix.io }}

	- name: Download test case
	if: steps.download.outcome == 'success'
	uses: ./.github/actions/download-testdata
	with:
	test-case: ect-120km
	dest-dir: base-case

	- name: Download spin-up restart
	if: steps.download.outcome == 'success'
	id: restart
	shell: bash
	run: \|
	source .github/test-cases/ect-120km/config.env
	RESTART="${ECT_SUMMARY_PREFIX}_restart.nc"
	echo "Downloading ${RESTART} from ${DATA_REPO}..."
	HTTP_CODE=$(curl -sL --retry 5 --retry-delay 5 -w "%{http_code}" \
	"https://github.com/${DATA_REPO}/raw/main/${RESTART}" \
	-o "${RESTART}")
	if [ "${HTTP_CODE}" = "200" ]; then
	echo "Downloaded restart: $(du -h ${RESTART})"
	echo "available=true" >> $GITHUB_OUTPUT
	echo "file=${RESTART}" >> $GITHUB_OUTPUT
	else
	echo "::warning::Spin-up restart not available (HTTP ${HTTP_CODE}), running from cold-start init.nc"
	echo "available=false" >> $GITHUB_OUTPUT
	fi

	- name: Run perturbed MPAS-A
	if: steps.download.outcome == 'success'
	uses: ./.github/actions/run-perturb-mpas
	with:
	base-dir: base-case
	executable: ./atmosphere_model
	member-start: '0'
	member-end: '2'
	num-ranks: ${{ matrix.num-procs }}
	mpi-impl: ${{ matrix.mpi != 'openmpi' && 'mpich' \|\| 'openmpi' }}
	restart-file: ${{ steps.restart.outputs.available == 'true' && steps.restart.outputs.file \|\| '' }}

	- name: Upload history files
	uses: actions/upload-artifact@v4
	if: always() && steps.download.outcome == 'success'
	with:
	name: ect-history-${{ matrix.compiler }}-${{ matrix.mpi }}-${{ matrix.io }}-${{ matrix.num-procs }}proc
	path: history-output/history.*.nc
	retention-days: 1

	#===========================================================================
	# ECT VALIDATE: PyCECT validation per matrix combination
	#===========================================================================
	ect-validate:
	needs: ect-run
	if: always() && needs.ect-run.result != 'cancelled'
	strategy:
	fail-fast: false
	matrix:
	compiler: [gcc, nvhpc, oneapi]
	mpi: [mpich3, mpich, openmpi]
	io: [smiol, pio]
	num-procs: [1, 4]

	name: ECT Validate ${{ matrix.num-procs }}proc (${{ matrix.compiler }}, ${{ matrix.mpi }}, ${{ matrix.io }})
	runs-on: ubuntu-latest

	steps:
	- uses: actions/checkout@v4
	with:
	sparse-checkout: .github

	- uses: actions/setup-python@v5
	with:
	python-version: '3.11'

	- name: Download history files
	id: download
	continue-on-error: true
	uses: actions/download-artifact@v4
	with:
	name: ect-history-${{ matrix.compiler }}-${{ matrix.mpi }}-${{ matrix.io }}-${{ matrix.num-procs }}proc
	path: ect-test-files

	- name: Run ECT validation
	if: steps.download.outcome == 'success'
	uses: ./.github/actions/validate-ect
	with:
	history-dir: ect-test-files
	label: ${{ matrix.compiler }}/${{ matrix.mpi }}/${{ matrix.io }}/${{ matrix.num-procs }}proc
	dimensions: \|
	compiler=${{ matrix.compiler }}
	mpi=${{ matrix.mpi }}
	io=${{ matrix.io }}
	ranks=${{ matrix.num-procs }}proc

	- name: Upload result
	if: always()
	uses: actions/upload-artifact@v4
	with:
	name: ect-result-${{ matrix.compiler }}-${{ matrix.mpi }}-${{ matrix.io }}-${{ matrix.num-procs }}proc
	path: ect-result.txt
	retention-days: 1

	#===========================================================================
	# ECT SUMMARY: Consolidated ECT results table
	#===========================================================================
	ect-summary:
	needs: ect-validate
	if: always()
	runs-on: ubuntu-latest
	name: ECT Summary

	steps:
	- uses: actions/checkout@v4
	with:
	sparse-checkout: .github

	- name: Download all ECT results
	uses: actions/download-artifact@v4
	with:
	pattern: ect-result-*
	path: ect-results

	- name: Generate summary
	uses: ./.github/actions/ect-summary
	with:
	results-path: ect-results

	#===========================================================================
	# CLEANUP: Remove executable artifacts
	#===========================================================================
	cleanup:
	needs: [run, validate, ect-run, ect-validate, ect-summary]
	if: always()
	runs-on: ubuntu-latest
	name: Cleanup

	steps:
	- name: Delete executable artifacts
	uses: geekyeggo/delete-artifact@v5
	with:
	name: \|
	exe-*
	ect-history-*
	failOnError: false

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

(GitHub Actions) Build All, Run NoGPU #34

Workflow file

(GitHub Actions) Build All, Run NoGPU #34

Uh oh!

Workflow file for this run