(GitHub Actions) Build All, Run NoGPU #34
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # GitHub Actions hosted runners workflow | |
| # Builds all compiler/MPI combinations, runs only nogpu configurations | |
| # | |
| name: (GitHub Actions) Build All, Run NoGPU | |
| on: | |
| workflow_dispatch: | |
| inputs: | |
| mpas-repository: | |
| description: 'MPAS source repo (e.g. MPAS-Dev/MPAS-Model). Leave empty to use this repo.' | |
| required: false | |
| default: '' | |
| mpas-ref: | |
| description: 'Git ref (branch, tag, or SHA) in the MPAS source repo' | |
| required: false | |
| default: '' | |
| branches: | |
| - master | |
| - develop | |
| # Container image pattern: ncarcisl/cisldev-x86_64-almalinux9-{compiler}-{mpi}:devel | |
| # MPI flags: openmpi needs --allow-run-as-root --oversubscribe (handled by run-mpas) | |
| jobs: | |
| #=========================================================================== | |
| # BUILD: All compiler/MPI combinations (nogpu only for GitHub runners) | |
| #=========================================================================== | |
| build: | |
| strategy: | |
| fail-fast: false | |
| matrix: | |
| compiler: [gcc, nvhpc, oneapi] | |
| mpi: [mpich3, mpich, openmpi] | |
| io: [smiol, pio] | |
| name: Build (${{ matrix.compiler }}, ${{ matrix.mpi }}, ${{ matrix.io }}) | |
| runs-on: ubuntu-latest | |
| container: | |
| image: ncarcisl/cisldev-x86_64-almalinux9-${{ matrix.compiler }}-${{ matrix.mpi }}:devel | |
| steps: | |
| - uses: actions/checkout@v4 | |
| with: | |
| repository: ${{ inputs.mpas-repository || github.repository }} | |
| ref: ${{ inputs.mpas-ref || '' }} | |
| submodules: 'true' | |
| - uses: actions/checkout@v4 | |
| if: ${{ inputs.mpas-repository != '' }} | |
| with: | |
| path: _ci | |
| sparse-checkout: .github | |
| - name: Overlay CI infrastructure | |
| if: ${{ inputs.mpas-repository != '' }} | |
| shell: bash | |
| run: cp -r _ci/.github . && rm -rf _ci | |
| - name: Build MPAS-A | |
| uses: ./.github/actions/build-mpas | |
| with: | |
| compiler: ${{ matrix.compiler }} | |
| use-pio: ${{ matrix.io == 'pio' }} | |
| - name: Upload executable | |
| uses: actions/upload-artifact@v4 | |
| with: | |
| name: exe-${{ matrix.compiler }}-${{ matrix.mpi }}-nogpu-${{ matrix.io }} | |
| path: atmosphere_model | |
| retention-days: 1 | |
| #=========================================================================== | |
| # RUN: NoGPU configurations, 1 and 4 MPI ranks | |
| #=========================================================================== | |
| run: | |
| needs: build | |
| if: always() && needs.build.result != 'cancelled' | |
| strategy: | |
| fail-fast: false | |
| matrix: | |
| num-procs: [1, 4] | |
| compiler: [gcc, nvhpc, oneapi] | |
| mpi: [mpich3, mpich, openmpi] | |
| io: [smiol, pio] | |
| name: Run ${{ matrix.num-procs }}proc (${{ matrix.compiler }}, ${{ matrix.mpi }}, ${{ matrix.io }}) | |
| runs-on: ubuntu-latest | |
| container: | |
| image: ncarcisl/cisldev-x86_64-almalinux9-${{ matrix.compiler }}-${{ matrix.mpi }}:devel | |
| steps: | |
| - uses: actions/checkout@v4 | |
| - name: Download executable | |
| id: download | |
| uses: actions/download-artifact@v4 | |
| continue-on-error: true | |
| with: | |
| name: exe-${{ matrix.compiler }}-${{ matrix.mpi }}-nogpu-${{ matrix.io }} | |
| - name: Run MPAS-A | |
| if: steps.download.outcome == 'success' | |
| uses: ./.github/actions/run-mpas | |
| with: | |
| executable: ./atmosphere_model | |
| num-procs: ${{ matrix.num-procs }} | |
| mpi-impl: ${{ matrix.mpi != 'openmpi' && 'mpich' || 'openmpi' }} | |
| working-dir: run-${{ matrix.num-procs }}proc-${{ matrix.compiler }}-${{ matrix.mpi }}-${{ matrix.io }} | |
| - name: Upload logs | |
| uses: actions/upload-artifact@v4 | |
| if: always() && steps.download.outcome == 'success' | |
| with: | |
| name: logs-${{ matrix.num-procs }}proc-${{ matrix.compiler }}-${{ matrix.mpi }}-nogpu-${{ matrix.io }} | |
| path: run-${{ matrix.num-procs }}proc-${{ matrix.compiler }}-${{ matrix.mpi }}-${{ matrix.io }}/log.* | |
| retention-days: 5 | |
| #=========================================================================== | |
| # VALIDATE: Compare logs against reference and across MPI ranks | |
| #=========================================================================== | |
| validate: | |
| needs: run | |
| if: always() | |
| runs-on: ubuntu-latest | |
| name: Validate Results | |
| steps: | |
| - uses: actions/checkout@v4 | |
| with: | |
| sparse-checkout: .github | |
| - uses: actions/setup-python@v5 | |
| with: | |
| python-version: '3.11' | |
| - name: Download all logs | |
| uses: actions/download-artifact@v4 | |
| with: | |
| pattern: logs-* | |
| path: logs | |
| - name: Generate expected config lists | |
| id: expected | |
| shell: bash | |
| run: | | |
| COMPILERS=(gcc nvhpc oneapi) | |
| MPIS=(mpich3 mpich openmpi) | |
| IOS=(smiol pio) | |
| REF_LIST="" | |
| DECOMP_LIST="" | |
| for compiler in "${COMPILERS[@]}"; do | |
| for mpi in "${MPIS[@]}"; do | |
| for io in "${IOS[@]}"; do | |
| REF_LIST="${REF_LIST:+${REF_LIST},}logs-1proc-${compiler}-${mpi}-nogpu-${io}" | |
| DECOMP_LIST="${DECOMP_LIST:+${DECOMP_LIST},}4proc vs 1proc: ${compiler}-${mpi}-nogpu-${io}" | |
| done | |
| done | |
| done | |
| echo "ref=${REF_LIST}" >> $GITHUB_OUTPUT | |
| echo "decomp=${DECOMP_LIST}" >> $GITHUB_OUTPUT | |
| - name: Validate 1-proc logs against reference | |
| uses: ./.github/actions/validate-logs | |
| with: | |
| logs-path: logs | |
| log-filter: 1proc | |
| reference-log: .github/test-cases/240km/reference_log.atmosphere.0000.out | |
| expected-configs: ${{ steps.expected.outputs.ref }} | |
| - name: Decomposition test — compare 4-proc logs against 1-proc logs | |
| shell: bash | |
| run: | | |
| python .github/actions/validate-logs/compare_logs.py \ | |
| logs \ | |
| --decomposition-test \ | |
| --allow-missing \ | |
| --summary-file "$GITHUB_STEP_SUMMARY" \ | |
| --expected "${{ steps.expected.outputs.decomp }}" | |
| #=========================================================================== | |
| # ECT RUN: 3 perturbed ensemble members per matrix combination (120km) | |
| #=========================================================================== | |
| ect-run: | |
| needs: build | |
| if: always() && needs.build.result != 'cancelled' | |
| strategy: | |
| fail-fast: false | |
| matrix: | |
| compiler: [gcc, nvhpc, oneapi] | |
| mpi: [mpich3, mpich, openmpi] | |
| io: [smiol, pio] | |
| num-procs: [1, 4] | |
| name: ECT ${{ matrix.num-procs }}proc (${{ matrix.compiler }}, ${{ matrix.mpi }}, ${{ matrix.io }}) | |
| runs-on: ubuntu-latest | |
| container: | |
| image: ncarcisl/cisldev-x86_64-almalinux9-${{ matrix.compiler }}-${{ matrix.mpi }}:devel | |
| steps: | |
| - uses: actions/checkout@v4 | |
| - name: Download executable | |
| id: download | |
| uses: actions/download-artifact@v4 | |
| continue-on-error: true | |
| with: | |
| name: exe-${{ matrix.compiler }}-${{ matrix.mpi }}-nogpu-${{ matrix.io }} | |
| - name: Download test case | |
| if: steps.download.outcome == 'success' | |
| uses: ./.github/actions/download-testdata | |
| with: | |
| test-case: ect-120km | |
| dest-dir: base-case | |
| - name: Download spin-up restart | |
| if: steps.download.outcome == 'success' | |
| id: restart | |
| shell: bash | |
| run: | | |
| source .github/test-cases/ect-120km/config.env | |
| RESTART="${ECT_SUMMARY_PREFIX}_restart.nc" | |
| echo "Downloading ${RESTART} from ${DATA_REPO}..." | |
| HTTP_CODE=$(curl -sL --retry 5 --retry-delay 5 -w "%{http_code}" \ | |
| "https://github.com/${DATA_REPO}/raw/main/${RESTART}" \ | |
| -o "${RESTART}") | |
| if [ "${HTTP_CODE}" = "200" ]; then | |
| echo "Downloaded restart: $(du -h ${RESTART})" | |
| echo "available=true" >> $GITHUB_OUTPUT | |
| echo "file=${RESTART}" >> $GITHUB_OUTPUT | |
| else | |
| echo "::warning::Spin-up restart not available (HTTP ${HTTP_CODE}), running from cold-start init.nc" | |
| echo "available=false" >> $GITHUB_OUTPUT | |
| fi | |
| - name: Run perturbed MPAS-A | |
| if: steps.download.outcome == 'success' | |
| uses: ./.github/actions/run-perturb-mpas | |
| with: | |
| base-dir: base-case | |
| executable: ./atmosphere_model | |
| member-start: '0' | |
| member-end: '2' | |
| num-ranks: ${{ matrix.num-procs }} | |
| mpi-impl: ${{ matrix.mpi != 'openmpi' && 'mpich' || 'openmpi' }} | |
| restart-file: ${{ steps.restart.outputs.available == 'true' && steps.restart.outputs.file || '' }} | |
| - name: Upload history files | |
| uses: actions/upload-artifact@v4 | |
| if: always() && steps.download.outcome == 'success' | |
| with: | |
| name: ect-history-${{ matrix.compiler }}-${{ matrix.mpi }}-${{ matrix.io }}-${{ matrix.num-procs }}proc | |
| path: history-output/history.*.nc | |
| retention-days: 1 | |
| #=========================================================================== | |
| # ECT VALIDATE: PyCECT validation per matrix combination | |
| #=========================================================================== | |
| ect-validate: | |
| needs: ect-run | |
| if: always() && needs.ect-run.result != 'cancelled' | |
| strategy: | |
| fail-fast: false | |
| matrix: | |
| compiler: [gcc, nvhpc, oneapi] | |
| mpi: [mpich3, mpich, openmpi] | |
| io: [smiol, pio] | |
| num-procs: [1, 4] | |
| name: ECT Validate ${{ matrix.num-procs }}proc (${{ matrix.compiler }}, ${{ matrix.mpi }}, ${{ matrix.io }}) | |
| runs-on: ubuntu-latest | |
| steps: | |
| - uses: actions/checkout@v4 | |
| with: | |
| sparse-checkout: .github | |
| - uses: actions/setup-python@v5 | |
| with: | |
| python-version: '3.11' | |
| - name: Download history files | |
| id: download | |
| continue-on-error: true | |
| uses: actions/download-artifact@v4 | |
| with: | |
| name: ect-history-${{ matrix.compiler }}-${{ matrix.mpi }}-${{ matrix.io }}-${{ matrix.num-procs }}proc | |
| path: ect-test-files | |
| - name: Run ECT validation | |
| if: steps.download.outcome == 'success' | |
| uses: ./.github/actions/validate-ect | |
| with: | |
| history-dir: ect-test-files | |
| label: ${{ matrix.compiler }}/${{ matrix.mpi }}/${{ matrix.io }}/${{ matrix.num-procs }}proc | |
| dimensions: | | |
| compiler=${{ matrix.compiler }} | |
| mpi=${{ matrix.mpi }} | |
| io=${{ matrix.io }} | |
| ranks=${{ matrix.num-procs }}proc | |
| - name: Upload result | |
| if: always() | |
| uses: actions/upload-artifact@v4 | |
| with: | |
| name: ect-result-${{ matrix.compiler }}-${{ matrix.mpi }}-${{ matrix.io }}-${{ matrix.num-procs }}proc | |
| path: ect-result.txt | |
| retention-days: 1 | |
| #=========================================================================== | |
| # ECT SUMMARY: Consolidated ECT results table | |
| #=========================================================================== | |
| ect-summary: | |
| needs: ect-validate | |
| if: always() | |
| runs-on: ubuntu-latest | |
| name: ECT Summary | |
| steps: | |
| - uses: actions/checkout@v4 | |
| with: | |
| sparse-checkout: .github | |
| - name: Download all ECT results | |
| uses: actions/download-artifact@v4 | |
| with: | |
| pattern: ect-result-* | |
| path: ect-results | |
| - name: Generate summary | |
| uses: ./.github/actions/ect-summary | |
| with: | |
| results-path: ect-results | |
| #=========================================================================== | |
| # CLEANUP: Remove executable artifacts | |
| #=========================================================================== | |
| cleanup: | |
| needs: [run, validate, ect-run, ect-validate, ect-summary] | |
| if: always() | |
| runs-on: ubuntu-latest | |
| name: Cleanup | |
| steps: | |
| - name: Delete executable artifacts | |
| uses: geekyeggo/delete-artifact@v5 | |
| with: | |
| name: | | |
| exe-* | |
| ect-history-* | |
| failOnError: false |