Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
30 commits
Select commit Hold shift + click to select a range
6e47296
Consolidating OpenACC device-host memory transfers
abishekg7 May 13, 2025
ac59b66
Fixing bug associated with rho_zz_2 not being copied out at the end o…
abishekg7 May 22, 2025
2ccf89d
Moving some OpenACC data movements to subroutines
abishekg7 Jun 14, 2025
f55d650
Removing acc data xfer timers for device variables using create/delete
abishekg7 Jul 3, 2025
cf373f5
Using acc declare create for rho_zz_int and corresponding cleanup
abishekg7 Jul 8, 2025
34d4c8c
Removing atm_advance_scalars_mono ACC_data_xfer timers around create/…
abishekg7 Jul 8, 2025
9d3c3fc
Simplifying OpenACC data transfers around the call to mpas_reconstruc…
abishekg7 Jul 8, 2025
4b7137d
Need to copyout u_2 and w_2 at the end of dynamics
abishekg7 Aug 14, 2025
7be0315
Add data movement for some fields under the mpas_halo_groups
gdicker1 May 7, 2025
9126128
Add a data region and acc kernels to the 2D packing code
gdicker1 May 7, 2025
38ee36f
Add the update directives that should have been part of the last commit
gdicker1 May 7, 2025
fa984fe
Comment out data present region, see if this causes an error
gdicker1 May 7, 2025
ec79911
Expand the data managed on the GPU for the halo exchange
gdicker1 May 7, 2025
e98a57e
Remove the OpenACC management of recvBuf
gdicker1 May 7, 2025
4ee67d5
Add update host(sendBuf) back, address answer diff
gdicker1 May 7, 2025
40aecd6
Expand to other packing kernels, only update sendBuf after packing fi…
gdicker1 May 7, 2025
dc6eae7
Change to simple integers to access the buffers and the field arrays
gdicker1 May 7, 2025
130d75e
Add kernels to unpacking loops and use a data present region to try t…
gdicker1 May 8, 2025
e27a75f
Change from data copyin regions to enter/exit directives for the r?ar…
gdicker1 May 8, 2025
52b3450
Re-enable update host for sendBuf, add update device recvBuf
gdicker1 May 8, 2025
4285b92
Remove update directives, use acc host_data use_device(...) near MPI …
gdicker1 May 8, 2025
fda45c4
checkpoints: acc pack + cuda aware mpi working
abishekg7 Aug 6, 2025
250c4d9
seems to be working
abishekg7 Aug 7, 2025
0d3709e
Optimized packing and unpacking loops. Adding timers and other cleanup
abishekg7 Aug 7, 2025
f51f57a
Working savepoint
abishekg7 Aug 12, 2025
7b1ff2c
u_2 and w_2 need to be copied out after dynamics + cleanup
abishekg7 Aug 13, 2025
ef09e0b
using attach in a directive instead of the acc_attach library call
abishekg7 Aug 13, 2025
3af6f9d
Using attach clause in parallel region will also auto detach at end o…
abishekg7 Aug 13, 2025
00c92b0
Reverting the indexing in loops and comment cleanup
abishekg7 Aug 13, 2025
a8fda92
New namelist option to switch on or off GPU-Aware MPI
abishekg7 Aug 13, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions src/core_atmosphere/Registry.xml
Original file line number Diff line number Diff line change
Expand Up @@ -392,6 +392,10 @@
units="-"
description="Method to use for exchanging halos"
possible_values="`mpas_dmpar', `mpas_halo'"/>
<nml_option name="config_gpu_aware_mpi" type="logical" default_value="false"
units="-"
description="Whether to use GPU-aware MPI for halo exchanges"
possible_values=".true. or .false."/>
</nml_record>

<!-- **************************************************************************************** -->
Expand Down
32 changes: 0 additions & 32 deletions src/core_atmosphere/dynamics/mpas_atm_boundaries.F
Original file line number Diff line number Diff line change
Expand Up @@ -395,18 +395,14 @@ subroutine mpas_atm_get_bdy_tend(clock, block, vertDim, horizDim, field, delta_t
nullify(tend)
call mpas_pool_get_array(lbc, 'lbc_'//trim(field), tend, 1)

MPAS_ACC_TIMER_START('mpas_atm_get_bdy_tend [ACC_data_xfer]')
if (associated(tend)) then
!$acc enter data copyin(tend)
else
call mpas_pool_get_array(lbc, 'lbc_scalars', tend_scalars, 1)
!$acc enter data copyin(tend_scalars)

! Ensure the integer pointed to by idx_ptr is copied to the gpu device
call mpas_pool_get_dimension(lbc, 'index_'//trim(field), idx_ptr)
idx = idx_ptr
end if
MPAS_ACC_TIMER_STOP('mpas_atm_get_bdy_tend [ACC_data_xfer]')

!$acc parallel default(present)
if (associated(tend)) then
Expand All @@ -426,13 +422,6 @@ subroutine mpas_atm_get_bdy_tend(clock, block, vertDim, horizDim, field, delta_t
end if
!$acc end parallel

MPAS_ACC_TIMER_START('mpas_atm_get_bdy_tend [ACC_data_xfer]')
if (associated(tend)) then
!$acc exit data delete(tend)
else
!$acc exit data delete(tend_scalars)
end if
MPAS_ACC_TIMER_STOP('mpas_atm_get_bdy_tend [ACC_data_xfer]')

end subroutine mpas_atm_get_bdy_tend

Expand Down Expand Up @@ -533,9 +522,6 @@ subroutine mpas_atm_get_bdy_state_2d(clock, block, vertDim, horizDim, field, del
! query the field as a scalar constituent
!
if (associated(tend) .and. associated(state)) then
MPAS_ACC_TIMER_START('mpas_atm_get_bdy_state_2d [ACC_data_xfer]')
!$acc enter data copyin(tend, state)
MPAS_ACC_TIMER_STOP('mpas_atm_get_bdy_state_2d [ACC_data_xfer]')

!$acc parallel default(present)
!$acc loop gang vector collapse(2)
Expand All @@ -546,20 +532,13 @@ subroutine mpas_atm_get_bdy_state_2d(clock, block, vertDim, horizDim, field, del
end do
!$acc end parallel

MPAS_ACC_TIMER_START('mpas_atm_get_bdy_state_2d [ACC_data_xfer]')
!$acc exit data delete(tend, state)
MPAS_ACC_TIMER_STOP('mpas_atm_get_bdy_state_2d [ACC_data_xfer]')
else
call mpas_pool_get_array(lbc, 'lbc_scalars', tend_scalars, 1)
call mpas_pool_get_array(lbc, 'lbc_scalars', state_scalars, 2)
call mpas_pool_get_dimension(lbc, 'index_'//trim(field), idx_ptr)

idx=idx_ptr ! Avoid non-array pointer for OpenACC

MPAS_ACC_TIMER_START('mpas_atm_get_bdy_state_2d [ACC_data_xfer]')
!$acc enter data copyin(tend_scalars, state_scalars)
MPAS_ACC_TIMER_STOP('mpas_atm_get_bdy_state_2d [ACC_data_xfer]')

!$acc parallel default(present)
!$acc loop gang vector collapse(2)
do i=1, horizDim+1
Expand All @@ -569,9 +548,6 @@ subroutine mpas_atm_get_bdy_state_2d(clock, block, vertDim, horizDim, field, del
end do
!$acc end parallel

MPAS_ACC_TIMER_START('mpas_atm_get_bdy_state_2d [ACC_data_xfer]')
!$acc exit data delete(tend_scalars, state_scalars)
MPAS_ACC_TIMER_STOP('mpas_atm_get_bdy_state_2d [ACC_data_xfer]')
end if

end subroutine mpas_atm_get_bdy_state_2d
Expand Down Expand Up @@ -652,10 +628,6 @@ subroutine mpas_atm_get_bdy_state_3d(clock, block, innerDim, vertDim, horizDim,
call mpas_pool_get_array(lbc, 'lbc_'//trim(field), tend, 1)
call mpas_pool_get_array(lbc, 'lbc_'//trim(field), state, 2)

MPAS_ACC_TIMER_START('mpas_atm_get_bdy_state_3d [ACC_data_xfer]')
!$acc enter data copyin(tend, state)
MPAS_ACC_TIMER_STOP('mpas_atm_get_bdy_state_3d [ACC_data_xfer]')

!$acc parallel default(present)
!$acc loop gang vector collapse(3)
do i=1, horizDim+1
Expand All @@ -667,10 +639,6 @@ subroutine mpas_atm_get_bdy_state_3d(clock, block, innerDim, vertDim, horizDim,
end do
!$acc end parallel

MPAS_ACC_TIMER_START('mpas_atm_get_bdy_state_3d [ACC_data_xfer]')
!$acc exit data delete(tend, state)
MPAS_ACC_TIMER_STOP('mpas_atm_get_bdy_state_3d [ACC_data_xfer]')

end subroutine mpas_atm_get_bdy_state_3d


Expand Down
47 changes: 45 additions & 2 deletions src/core_atmosphere/dynamics/mpas_atm_iau.F
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,15 @@
! Additional copyright and license information can be found in the LICENSE file
! distributed with this code, or at http://mpas-dev.github.com/license.html
!

#ifdef MPAS_OPENACC
#define MPAS_ACC_TIMER_START(X) call mpas_timer_start(X)
#define MPAS_ACC_TIMER_STOP(X) call mpas_timer_stop(X)
#else
#define MPAS_ACC_TIMER_START(X)
#define MPAS_ACC_TIMER_STOP(X)
#endif

module mpas_atm_iau

use mpas_derived_types
Expand All @@ -13,9 +22,10 @@ module mpas_atm_iau
use mpas_dmpar
use mpas_constants
use mpas_log, only : mpas_log_write
use mpas_timer

!public :: atm_compute_iau_coef, atm_add_tend_anal_incr

!public :: atm_compute_iau_coef, atm_add_tend_anal_incr

contains

!==================================================================================================
Expand Down Expand Up @@ -76,6 +86,39 @@ real (kind=RKIND) function atm_iau_coef(configs, itimestep, dt) result(wgt_iau)
end if

end function atm_iau_coef

!==================================================================================================
subroutine update_d2h_pre_add_tend_anal_incr(configs,structs)
!==================================================================================================

implicit none

type (mpas_pool_type), intent(in) :: configs
type (mpas_pool_type), intent(inout) :: structs

type (mpas_pool_type), pointer :: tend
type (mpas_pool_type), pointer :: state
type (mpas_pool_type), pointer :: diag

real (kind=RKIND), dimension(:,:), pointer :: rho_edge, rho_zz, theta_m
real(kind=RKIND),dimension(:,:,:), pointer :: scalars, tend_scalars

call mpas_pool_get_subpool(structs, 'tend', tend)
call mpas_pool_get_subpool(structs, 'state', state)
call mpas_pool_get_subpool(structs, 'diag', diag)

MPAS_ACC_TIMER_START('atm_srk3: physics ACC_data_xfer')
call mpas_pool_get_array(state, 'theta_m', theta_m, 1)
call mpas_pool_get_array(state, 'scalars', scalars, 1)
call mpas_pool_get_array(state, 'rho_zz', rho_zz, 2)
call mpas_pool_get_array(diag , 'rho_edge', rho_edge)
!$acc update self(theta_m, scalars, rho_zz, rho_edge)

call mpas_pool_get_array(tend, 'scalars_tend', tend_scalars)
!$acc update self(tend_scalars)
MPAS_ACC_TIMER_STOP('atm_srk3: physics ACC_data_xfer')

end subroutine update_d2h_pre_add_tend_anal_incr

!==================================================================================================
subroutine atm_add_tend_anal_incr (configs, structs, itimestep, dt, tend_ru, tend_rtheta, tend_rho)
Expand Down
Loading