From 75aec90bfdc4485816d610d237b479030743fe67 Mon Sep 17 00:00:00 2001 From: Jian Sun Date: Thu, 26 Feb 2026 21:16:43 -0700 Subject: [PATCH 1/2] add CI workflow on CIRRUS --- .github/workflows/build_and_run_cirrus.yml | 93 +++++++++++++++++++ .gitmodules | 4 +- ccs_config | 2 +- .../outfrq_rrtmgp_cirrus_gpu/shell_commands | 13 +++ .../cam/outfrq_rrtmgp_cirrus_gpu/user_nl_cam | 25 +++++ .../cam/outfrq_rrtmgp_cirrus_gpu/user_nl_cpl | 5 + 6 files changed, 139 insertions(+), 3 deletions(-) create mode 100644 .github/workflows/build_and_run_cirrus.yml create mode 100644 cime_config/testdefs/testmods_dirs/cam/outfrq_rrtmgp_cirrus_gpu/shell_commands create mode 100644 cime_config/testdefs/testmods_dirs/cam/outfrq_rrtmgp_cirrus_gpu/user_nl_cam create mode 100644 cime_config/testdefs/testmods_dirs/cam/outfrq_rrtmgp_cirrus_gpu/user_nl_cpl diff --git a/.github/workflows/build_and_run_cirrus.yml b/.github/workflows/build_and_run_cirrus.yml new file mode 100644 index 00000000..1d437a7c --- /dev/null +++ b/.github/workflows/build_and_run_cirrus.yml @@ -0,0 +1,93 @@ +name: Build and run CAM-SIMA on CIRRUS + +on: + push: + branches: + - development + pull_request: + branches: + - development + workflow_dispatch: + +concurrency: + group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} + cancel-in-progress: true + +jobs: + build-and-run-on-CIRRUS: + name: Run ${{ matrix.image }} on ${{ matrix.runner }} + permissions: + contents: read + env: + TMP_DIR: tmp + TMP_OUTPUT: case_output.log + strategy: + fail-fast: false + matrix: + test_type: [ SMS_Ln2 ] + compset: [ FPHYStest ] + res: [ ne3pg3_ne3pg3_mg37 ] + test_config: [ outfrq_rrtmgp_cirrus_gpu ] + image: [ ubuntu24.04_nvhpc25.7_openmpi5.0.8_cuda12.9:e9f55f1 ] + include: + - image: ubuntu24.04_nvhpc25.7_openmpi5.0.8_cuda12.9:e9f55f1 + runner: gha-runner-gpu-camsima + compiler: nvhpc + runs-on: ${{ matrix.runner }} + environment: CI-tests-on-CIRRUS + container: + image: hub.k8s.ucar.edu/cam-sima/${{ matrix.image }} + options: ${{ matrix.runner == 'gha-runner-gpu-camsima' && '--gpus all' }} + env: + NVIDIA_DRIVER_CAPABILITIES: compute,utility + credentials: + username: ${{ secrets.hub_user || 'dummy-user' }} + password: ${{ secrets.hub_password || 'dummy-password' }} + defaults: + run: + shell: bash + steps: + - name: Checkout push code + uses: actions/checkout@v4 + + - name: Set up some git configurations + run: | + git config --global user.email "example_user@example.com" + git config --global user.name "example_user" + + - name: Checkout individual components + run: | + ./bin/git-fleximod -C ${GITHUB_WORKSPACE} update + + - name: Build and run ${{ matrix.test_type }} test of ${{ matrix.compset }} at ${{ matrix.res }} resolution + run: | + export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$PIO/lib:$NETCDF_FORTRAN_PATH/lib:$NETCDF_C_PATH/lib:$LAPACK/lib:$LAPACK/lib64:$PNETCDF/lib + if [ "${{ matrix.runner }}" == "gha-runner-gpu-camsima" ]; then + export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$CUDA_ROOT/lib64:$CUDA_ROOT/lib64/stubs + fi + cd cime/scripts + ./create_test ${{ matrix.test_type }}.${{ matrix.res }}.${{ matrix.compset }}.cirrus_${{ matrix.compiler }}.cam-${{ matrix.test_config }} --output-root /$TMP_DIR/ci_test --no-batch 2>&1 | tee "$TMP_OUTPUT" + + - name: Check the test PASS/FAIL status + if: always() + run: | + cd /$TMP_DIR/ci_test + STATUS_OUTPUT=$(./cs.status.*) + echo "$STATUS_OUTPUT" + OVERALL_STATUS=$(echo "$STATUS_OUTPUT" | grep -oP 'Overall:\s*\K\w+') + if [ "$OVERALL_STATUS" == "PASS" ]; then + echo "Test passed" + else + echo "Test FAILED (Overall status: ${OVERALL_STATUS:-not found})" + exit 911 + fi + + - name: Upload logs to GitHub artifacts when the CI test fails + if: failure() + uses: actions/upload-artifact@v4 + with: + name: logs.${{ matrix.test_type }}.${{ matrix.compset }}.${{ matrix.compiler }}.${{ matrix.runner }} + path: | + /$TMP_DIR/ci_test/${{ matrix.test_type }}.${{ matrix.res }}.${{ matrix.compset }}.cirrus_${{ matrix.compiler }}.cam-${{ matrix.test_config }}.*/cesm.log* + /$TMP_DIR/ci_test/${{ matrix.test_type }}.${{ matrix.res }}.${{ matrix.compset }}.cirrus_${{ matrix.compiler }}.cam-${{ matrix.test_config }}.*/atm.log* + retention-days: 7 \ No newline at end of file diff --git a/.gitmodules b/.gitmodules index 6bb16a2c..e71d78f1 100644 --- a/.gitmodules +++ b/.gitmodules @@ -31,8 +31,8 @@ fxDONOTUSEurl = https://github.com/earth-system-radiation/rrtmgp-data.git [submodule "ccs_config"] path = ccs_config - url = https://github.com/ESMCI/ccs_config_cesm.git - fxtag = ccs_config_cesm1.0.72 + url = https://github.com/ESMCI/ccs_config_cesm.git + fxtag = ccs_config_cesm1.0.76 fxrequired = ToplevelRequired fxDONOTUSEurl = https://github.com/ESMCI/ccs_config_cesm.git [submodule "cdeps"] diff --git a/ccs_config b/ccs_config index bd399012..ee6e502c 160000 --- a/ccs_config +++ b/ccs_config @@ -1 +1 @@ -Subproject commit bd39901220166f8f8d7368cdf5aad8b704df3602 +Subproject commit ee6e502c79ce4aef6fefa13293f0b8823e5a7a84 diff --git a/cime_config/testdefs/testmods_dirs/cam/outfrq_rrtmgp_cirrus_gpu/shell_commands b/cime_config/testdefs/testmods_dirs/cam/outfrq_rrtmgp_cirrus_gpu/shell_commands new file mode 100644 index 00000000..fd8c2878 --- /dev/null +++ b/cime_config/testdefs/testmods_dirs/cam/outfrq_rrtmgp_cirrus_gpu/shell_commands @@ -0,0 +1,13 @@ +./xmlchange NTASKS=4 +./xmlchange NTHRDS=1 +./xmlchange ROOTPE='0' +./xmlchange ROF_NCPL=`./xmlquery --value ATM_NCPL` +./xmlchange GLC_NCPL=`./xmlquery --value ATM_NCPL` +./xmlchange TIMER_DETAIL='6' +./xmlchange TIMER_LEVEL='999' +./xmlchange GPU_TYPE=a10 +./xmlchange OPENACC_GPU_OFFLOAD=TRUE +./xmlchange OVERSUBSCRIBE_GPU=TRUE +./xmlchange NGPUS_PER_NODE=1 +./xmlchange CAM_CONFIG_OPTS="--dyn none --physics-suites rrtmgp" +./xmlchange RUN_STARTDATE=1979-01-01 diff --git a/cime_config/testdefs/testmods_dirs/cam/outfrq_rrtmgp_cirrus_gpu/user_nl_cam b/cime_config/testdefs/testmods_dirs/cam/outfrq_rrtmgp_cirrus_gpu/user_nl_cam new file mode 100644 index 00000000..f1be88f3 --- /dev/null +++ b/cime_config/testdefs/testmods_dirs/cam/outfrq_rrtmgp_cirrus_gpu/user_nl_cam @@ -0,0 +1,25 @@ +! these are CPU FHISTC_LTso snapshots +ncdata = '/glade/sima_baselines/cam_sima_test_snapshots/cam_ne3pg3_fhistc_ltso_rrtmgp_derecho_gnu_before_c20251013.nc' +ncdata_check = '/glade/sima_baselines/cam_sima_test_snapshots/cam_ne3pg3_fhistc_ltso_rrtmgp_derecho_gnu_after_c20251013.nc' + +! tolerances for testing (currently have high tolerance due to CPU snapshots) +ncdata_check_err = .true. +min_difference = 1e-08 + +! vertical levels in snapshot +pver = 58 + +! Do radiation on every timestep we're testing +irad_always=3 + +! diagnostic output +hist_output_frequency;h1: 1*nsteps +hist_precision;h1: REAL64 +hist_add_inst_fields;h1: HR +! Cloud output +hist_add_inst_fields;h1: TOT_CLD_VISTAU,TOT_ICLD_VISTAU,ICE_ICLD_VISTAU,LIQ_ICLD_VISTAU +! Longwave diagnostic output +hist_add_inst_fields;h1: QRL,QRLC,FLNT,FLNTC,FLUT,FLUTC,LWCF,FLN200,FLN200C,FLNR,FLNS,FLNSC,FLDS,FLDSC,FUL,FDL,FULC,FDLC +! Shortwave diagnostic fields +hist_add_inst_fields;h1: SOLIN,QRS,QRSC,FSNT,FSNTC,FSNTOA,FSNTOAC,SWCF,FSUTOA,FSN200,FSN200C,FSNR,SOLL,SOLS,SOLLD,SOLSD +hist_add_inst_fields;h1: FSNS,FSNSC,FSDS,FSDSC,FUS,FDS,FUSC,FDSC diff --git a/cime_config/testdefs/testmods_dirs/cam/outfrq_rrtmgp_cirrus_gpu/user_nl_cpl b/cime_config/testdefs/testmods_dirs/cam/outfrq_rrtmgp_cirrus_gpu/user_nl_cpl new file mode 100644 index 00000000..08311045 --- /dev/null +++ b/cime_config/testdefs/testmods_dirs/cam/outfrq_rrtmgp_cirrus_gpu/user_nl_cpl @@ -0,0 +1,5 @@ +! Set fixed orbital parameters +orb_mode='fixed_parameters' +orb_eccen = 0. +orb_obliq = 0. +orb_mvelp = 0. From 5d8754c7629430eccec9e2bd792ed7858a174540 Mon Sep 17 00:00:00 2001 From: Jian Sun Date: Wed, 11 Mar 2026 16:20:07 -0600 Subject: [PATCH 2/2] address kuanchi's comments --- .github/workflows/build_and_run_cirrus.yml | 29 ++++++++++--------- .gitmodules | 4 +-- .../outfrq_rrtmgp_cirrus_gpu/shell_commands | 4 +-- 3 files changed, 20 insertions(+), 17 deletions(-) diff --git a/.github/workflows/build_and_run_cirrus.yml b/.github/workflows/build_and_run_cirrus.yml index 1d437a7c..2d59be7b 100644 --- a/.github/workflows/build_and_run_cirrus.yml +++ b/.github/workflows/build_and_run_cirrus.yml @@ -1,5 +1,8 @@ name: Build and run CAM-SIMA on CIRRUS +permissions: + contents: read + on: push: branches: @@ -16,8 +19,6 @@ concurrency: jobs: build-and-run-on-CIRRUS: name: Run ${{ matrix.image }} on ${{ matrix.runner }} - permissions: - contents: read env: TMP_DIR: tmp TMP_OUTPUT: case_output.log @@ -48,7 +49,7 @@ jobs: shell: bash steps: - name: Checkout push code - uses: actions/checkout@v4 + uses: actions/checkout@v6 - name: Set up some git configurations run: | @@ -57,16 +58,16 @@ jobs: - name: Checkout individual components run: | - ./bin/git-fleximod -C ${GITHUB_WORKSPACE} update + ./bin/git-fleximod -C "${GITHUB_WORKSPACE}" update - name: Build and run ${{ matrix.test_type }} test of ${{ matrix.compset }} at ${{ matrix.res }} resolution run: | - export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$PIO/lib:$NETCDF_FORTRAN_PATH/lib:$NETCDF_C_PATH/lib:$LAPACK/lib:$LAPACK/lib64:$PNETCDF/lib - if [ "${{ matrix.runner }}" == "gha-runner-gpu-camsima" ]; then - export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$CUDA_ROOT/lib64:$CUDA_ROOT/lib64/stubs + export LD_LIBRARY_PATH="$LD_LIBRARY_PATH:$PIO/lib:$NETCDF_FORTRAN_PATH/lib:$NETCDF_C_PATH/lib:$LAPACK/lib:$LAPACK/lib64:$PNETCDF/lib" + if [ "${{ matrix.runner }}" = "gha-runner-gpu-camsima" ]; then + export LD_LIBRARY_PATH="$LD_LIBRARY_PATH:$CUDA_ROOT/lib64:$CUDA_ROOT/lib64/stubs" fi cd cime/scripts - ./create_test ${{ matrix.test_type }}.${{ matrix.res }}.${{ matrix.compset }}.cirrus_${{ matrix.compiler }}.cam-${{ matrix.test_config }} --output-root /$TMP_DIR/ci_test --no-batch 2>&1 | tee "$TMP_OUTPUT" + ./create_test "${{ matrix.test_type }}.${{ matrix.res }}.${{ matrix.compset }}.cirrus_${{ matrix.compiler }}.cam-${{ matrix.test_config }}" --output-root "/$TMP_DIR/ci_test" --no-batch 2>&1 | tee "$TMP_OUTPUT" - name: Check the test PASS/FAIL status if: always() @@ -75,19 +76,21 @@ jobs: STATUS_OUTPUT=$(./cs.status.*) echo "$STATUS_OUTPUT" OVERALL_STATUS=$(echo "$STATUS_OUTPUT" | grep -oP 'Overall:\s*\K\w+') - if [ "$OVERALL_STATUS" == "PASS" ]; then + if [ "$OVERALL_STATUS" = "PASS" ]; then echo "Test passed" else echo "Test FAILED (Overall status: ${OVERALL_STATUS:-not found})" - exit 911 + exit 123 fi - name: Upload logs to GitHub artifacts when the CI test fails if: failure() - uses: actions/upload-artifact@v4 + uses: actions/upload-artifact@v7 with: name: logs.${{ matrix.test_type }}.${{ matrix.compset }}.${{ matrix.compiler }}.${{ matrix.runner }} path: | - /$TMP_DIR/ci_test/${{ matrix.test_type }}.${{ matrix.res }}.${{ matrix.compset }}.cirrus_${{ matrix.compiler }}.cam-${{ matrix.test_config }}.*/cesm.log* - /$TMP_DIR/ci_test/${{ matrix.test_type }}.${{ matrix.res }}.${{ matrix.compset }}.cirrus_${{ matrix.compiler }}.cam-${{ matrix.test_config }}.*/atm.log* + /"$TMP_DIR"/ci_test/"${{ matrix.test_type }}.${{ matrix.res }}.${{ matrix.compset }}.cirrus_${{ matrix.compiler }}.cam-${{ matrix.test_config }}.*"/bld/atm.bldlog* + /"$TMP_DIR"/ci_test/"${{ matrix.test_type }}.${{ matrix.res }}.${{ matrix.compset }}.cirrus_${{ matrix.compiler }}.cam-${{ matrix.test_config }}.*"/bld/cesm.bldlog* + /"$TMP_DIR"/ci_test/"${{ matrix.test_type }}.${{ matrix.res }}.${{ matrix.compset }}.cirrus_${{ matrix.compiler }}.cam-${{ matrix.test_config }}.*"/run/cesm.log* + /"$TMP_DIR"/ci_test/"${{ matrix.test_type }}.${{ matrix.res }}.${{ matrix.compset }}.cirrus_${{ matrix.compiler }}.cam-${{ matrix.test_config }}.*"/run/atm.log* retention-days: 7 \ No newline at end of file diff --git a/.gitmodules b/.gitmodules index 13aa7f1b..ed73b06c 100644 --- a/.gitmodules +++ b/.gitmodules @@ -31,8 +31,8 @@ fxDONOTUSEurl = https://github.com/earth-system-radiation/rrtmgp-data.git [submodule "ccs_config"] path = ccs_config - url = https://github.com/ESMCI/ccs_config_cesm.git - fxtag = ccs_config_cesm1.0.76 + url = https://github.com/ESMCI/ccs_config_cesm.git + fxtag = ccs_config_cesm1.0.76 fxrequired = ToplevelRequired fxDONOTUSEurl = https://github.com/ESMCI/ccs_config_cesm.git [submodule "cdeps"] diff --git a/cime_config/testdefs/testmods_dirs/cam/outfrq_rrtmgp_cirrus_gpu/shell_commands b/cime_config/testdefs/testmods_dirs/cam/outfrq_rrtmgp_cirrus_gpu/shell_commands index fd8c2878..fa42c41f 100644 --- a/cime_config/testdefs/testmods_dirs/cam/outfrq_rrtmgp_cirrus_gpu/shell_commands +++ b/cime_config/testdefs/testmods_dirs/cam/outfrq_rrtmgp_cirrus_gpu/shell_commands @@ -1,8 +1,8 @@ ./xmlchange NTASKS=4 ./xmlchange NTHRDS=1 ./xmlchange ROOTPE='0' -./xmlchange ROF_NCPL=`./xmlquery --value ATM_NCPL` -./xmlchange GLC_NCPL=`./xmlquery --value ATM_NCPL` +./xmlchange ROF_NCPL=$(./xmlquery --value ATM_NCPL) +./xmlchange GLC_NCPL=$(./xmlquery --value ATM_NCPL) ./xmlchange TIMER_DETAIL='6' ./xmlchange TIMER_LEVEL='999' ./xmlchange GPU_TYPE=a10