Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
34 commits
Select commit Hold shift + click to select a range
6e47296
Consolidating OpenACC device-host memory transfers
abishekg7 May 13, 2025
ac59b66
Fixing bug associated with rho_zz_2 not being copied out at the end o…
abishekg7 May 22, 2025
2ccf89d
Moving some OpenACC data movements to subroutines
abishekg7 Jun 14, 2025
f55d650
Removing acc data xfer timers for device variables using create/delete
abishekg7 Jul 3, 2025
cf373f5
Using acc declare create for rho_zz_int and corresponding cleanup
abishekg7 Jul 8, 2025
34d4c8c
Removing atm_advance_scalars_mono ACC_data_xfer timers around create/…
abishekg7 Jul 8, 2025
9d3c3fc
Simplifying OpenACC data transfers around the call to mpas_reconstruc…
abishekg7 Jul 8, 2025
4b7137d
Need to copyout u_2 and w_2 at the end of dynamics
abishekg7 Aug 14, 2025
70c1e42
Fixes to produce correct results with CURVATURE
abishekg7 Aug 22, 2025
c694751
Adding option to enable GPU execution of mpas_reconstruct_2d
abishekg7 Oct 3, 2025
905f1a0
fixes needed with intel compiler
abishekg7 Oct 13, 2025
168d949
Add data movement for some fields under the mpas_halo_groups
May 7, 2025
a8bd6ad
Add a data region and acc kernels to the 2D packing code
May 7, 2025
f1f7814
Add the update directives that should have been part of the last commit
May 7, 2025
fd2b66c
Comment out data present region, see if this causes an error
May 7, 2025
897d711
Expand the data managed on the GPU for the halo exchange
May 7, 2025
0ed1de3
Remove the OpenACC management of recvBuf
May 7, 2025
841f26e
Add update host(sendBuf) back, address answer diff
May 7, 2025
904937f
Expand to other packing kernels, only update sendBuf after packing fi…
May 7, 2025
e4e8c53
Change to simple integers to access the buffers and the field arrays
May 7, 2025
8184e5e
Add kernels to unpacking loops and use a data present region to try t…
May 8, 2025
e854038
Change from data copyin regions to enter/exit directives for the r?ar…
May 8, 2025
b33a379
Re-enable update host for sendBuf, add update device recvBuf
May 8, 2025
caa7ac6
Remove update directives, use acc host_data use_device(...) near MPI …
May 8, 2025
9ef2669
checkpoints: acc pack + cuda aware mpi working
abishekg7 Aug 6, 2025
06ed6ba
seems to be working
abishekg7 Aug 7, 2025
c20312b
Optimized packing and unpacking loops. Adding timers and other cleanup
abishekg7 Aug 7, 2025
6ef38cd
Working savepoint
abishekg7 Aug 12, 2025
4ccddf7
u_2 and w_2 need to be copied out after dynamics + cleanup
abishekg7 Aug 13, 2025
708b97c
using attach in a directive instead of the acc_attach library call
abishekg7 Aug 13, 2025
5cbee6b
Using attach clause in parallel region will also auto detach at end o…
abishekg7 Aug 13, 2025
1404a2f
Reverting the indexing in loops and comment cleanup
abishekg7 Aug 13, 2025
75fea6c
New namelist option to switch on or off GPU-Aware MPI
abishekg7 Aug 13, 2025
1e08917
Adding a dependency to mpas_timer.o in mpas_halo.o
abishekg7 Sep 25, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions src/core_atmosphere/Registry.xml
Original file line number Diff line number Diff line change
Expand Up @@ -392,6 +392,10 @@
units="-"
description="Method to use for exchanging halos"
possible_values="`mpas_dmpar', `mpas_halo'"/>
<nml_option name="config_gpu_aware_mpi" type="logical" default_value="false"
units="-"
description="Whether to use GPU-aware MPI for halo exchanges"
possible_values=".true. or .false."/>
</nml_record>

<!-- **************************************************************************************** -->
Expand Down
32 changes: 0 additions & 32 deletions src/core_atmosphere/dynamics/mpas_atm_boundaries.F
Original file line number Diff line number Diff line change
Expand Up @@ -395,18 +395,14 @@ subroutine mpas_atm_get_bdy_tend(clock, block, vertDim, horizDim, field, delta_t
nullify(tend)
call mpas_pool_get_array(lbc, 'lbc_'//trim(field), tend, 1)

MPAS_ACC_TIMER_START('mpas_atm_get_bdy_tend [ACC_data_xfer]')
if (associated(tend)) then
!$acc enter data copyin(tend)
else
call mpas_pool_get_array(lbc, 'lbc_scalars', tend_scalars, 1)
!$acc enter data copyin(tend_scalars)

! Ensure the integer pointed to by idx_ptr is copied to the gpu device
call mpas_pool_get_dimension(lbc, 'index_'//trim(field), idx_ptr)
idx = idx_ptr
end if
MPAS_ACC_TIMER_STOP('mpas_atm_get_bdy_tend [ACC_data_xfer]')

!$acc parallel default(present)
if (associated(tend)) then
Expand All @@ -426,13 +422,6 @@ subroutine mpas_atm_get_bdy_tend(clock, block, vertDim, horizDim, field, delta_t
end if
!$acc end parallel

MPAS_ACC_TIMER_START('mpas_atm_get_bdy_tend [ACC_data_xfer]')
if (associated(tend)) then
!$acc exit data delete(tend)
else
!$acc exit data delete(tend_scalars)
end if
MPAS_ACC_TIMER_STOP('mpas_atm_get_bdy_tend [ACC_data_xfer]')

end subroutine mpas_atm_get_bdy_tend

Expand Down Expand Up @@ -533,9 +522,6 @@ subroutine mpas_atm_get_bdy_state_2d(clock, block, vertDim, horizDim, field, del
! query the field as a scalar constituent
!
if (associated(tend) .and. associated(state)) then
MPAS_ACC_TIMER_START('mpas_atm_get_bdy_state_2d [ACC_data_xfer]')
!$acc enter data copyin(tend, state)
MPAS_ACC_TIMER_STOP('mpas_atm_get_bdy_state_2d [ACC_data_xfer]')

!$acc parallel default(present)
!$acc loop gang vector collapse(2)
Expand All @@ -546,20 +532,13 @@ subroutine mpas_atm_get_bdy_state_2d(clock, block, vertDim, horizDim, field, del
end do
!$acc end parallel

MPAS_ACC_TIMER_START('mpas_atm_get_bdy_state_2d [ACC_data_xfer]')
!$acc exit data delete(tend, state)
MPAS_ACC_TIMER_STOP('mpas_atm_get_bdy_state_2d [ACC_data_xfer]')
else
call mpas_pool_get_array(lbc, 'lbc_scalars', tend_scalars, 1)
call mpas_pool_get_array(lbc, 'lbc_scalars', state_scalars, 2)
call mpas_pool_get_dimension(lbc, 'index_'//trim(field), idx_ptr)

idx=idx_ptr ! Avoid non-array pointer for OpenACC

MPAS_ACC_TIMER_START('mpas_atm_get_bdy_state_2d [ACC_data_xfer]')
!$acc enter data copyin(tend_scalars, state_scalars)
MPAS_ACC_TIMER_STOP('mpas_atm_get_bdy_state_2d [ACC_data_xfer]')

!$acc parallel default(present)
!$acc loop gang vector collapse(2)
do i=1, horizDim+1
Expand All @@ -569,9 +548,6 @@ subroutine mpas_atm_get_bdy_state_2d(clock, block, vertDim, horizDim, field, del
end do
!$acc end parallel

MPAS_ACC_TIMER_START('mpas_atm_get_bdy_state_2d [ACC_data_xfer]')
!$acc exit data delete(tend_scalars, state_scalars)
MPAS_ACC_TIMER_STOP('mpas_atm_get_bdy_state_2d [ACC_data_xfer]')
end if

end subroutine mpas_atm_get_bdy_state_2d
Expand Down Expand Up @@ -652,10 +628,6 @@ subroutine mpas_atm_get_bdy_state_3d(clock, block, innerDim, vertDim, horizDim,
call mpas_pool_get_array(lbc, 'lbc_'//trim(field), tend, 1)
call mpas_pool_get_array(lbc, 'lbc_'//trim(field), state, 2)

MPAS_ACC_TIMER_START('mpas_atm_get_bdy_state_3d [ACC_data_xfer]')
!$acc enter data copyin(tend, state)
MPAS_ACC_TIMER_STOP('mpas_atm_get_bdy_state_3d [ACC_data_xfer]')

!$acc parallel default(present)
!$acc loop gang vector collapse(3)
do i=1, horizDim+1
Expand All @@ -667,10 +639,6 @@ subroutine mpas_atm_get_bdy_state_3d(clock, block, innerDim, vertDim, horizDim,
end do
!$acc end parallel

MPAS_ACC_TIMER_START('mpas_atm_get_bdy_state_3d [ACC_data_xfer]')
!$acc exit data delete(tend, state)
MPAS_ACC_TIMER_STOP('mpas_atm_get_bdy_state_3d [ACC_data_xfer]')

end subroutine mpas_atm_get_bdy_state_3d


Expand Down
47 changes: 45 additions & 2 deletions src/core_atmosphere/dynamics/mpas_atm_iau.F
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,15 @@
! Additional copyright and license information can be found in the LICENSE file
! distributed with this code, or at http://mpas-dev.github.com/license.html
!

#ifdef MPAS_OPENACC
#define MPAS_ACC_TIMER_START(X) call mpas_timer_start(X)
#define MPAS_ACC_TIMER_STOP(X) call mpas_timer_stop(X)
#else
#define MPAS_ACC_TIMER_START(X)
#define MPAS_ACC_TIMER_STOP(X)
#endif

module mpas_atm_iau

use mpas_derived_types
Expand All @@ -13,9 +22,10 @@ module mpas_atm_iau
use mpas_dmpar
use mpas_constants
use mpas_log, only : mpas_log_write
use mpas_timer

!public :: atm_compute_iau_coef, atm_add_tend_anal_incr

!public :: atm_compute_iau_coef, atm_add_tend_anal_incr

contains

!==================================================================================================
Expand Down Expand Up @@ -76,6 +86,39 @@ real (kind=RKIND) function atm_iau_coef(configs, itimestep, dt) result(wgt_iau)
end if

end function atm_iau_coef

!==================================================================================================
subroutine update_d2h_pre_add_tend_anal_incr(configs,structs)
!==================================================================================================

implicit none

type (mpas_pool_type), intent(in) :: configs
type (mpas_pool_type), intent(inout) :: structs

type (mpas_pool_type), pointer :: tend
type (mpas_pool_type), pointer :: state
type (mpas_pool_type), pointer :: diag

real (kind=RKIND), dimension(:,:), pointer :: rho_edge, rho_zz, theta_m
real(kind=RKIND),dimension(:,:,:), pointer :: scalars, tend_scalars

call mpas_pool_get_subpool(structs, 'tend', tend)
call mpas_pool_get_subpool(structs, 'state', state)
call mpas_pool_get_subpool(structs, 'diag', diag)

MPAS_ACC_TIMER_START('atm_srk3: physics ACC_data_xfer')
call mpas_pool_get_array(state, 'theta_m', theta_m, 1)
call mpas_pool_get_array(state, 'scalars', scalars, 1)
call mpas_pool_get_array(state, 'rho_zz', rho_zz, 2)
call mpas_pool_get_array(diag , 'rho_edge', rho_edge)
!$acc update self(theta_m, scalars, rho_zz, rho_edge)

call mpas_pool_get_array(tend, 'scalars_tend', tend_scalars)
!$acc update self(tend_scalars)
MPAS_ACC_TIMER_STOP('atm_srk3: physics ACC_data_xfer')

end subroutine update_d2h_pre_add_tend_anal_incr

!==================================================================================================
subroutine atm_add_tend_anal_incr (configs, structs, itimestep, dt, tend_ru, tend_rtheta, tend_rho)
Expand Down
Loading