17
17
!> communicating the halos of all fields in a group.
18
18
!
19
19
!-----------------------------------------------------------------------
20
+
21
+ #ifdef MPAS_OPENACC
22
+ #define MPAS_ACC_TIMER_START(X) call mpas_timer_start(X)
23
+ #define MPAS_ACC_TIMER_STOP(X) call mpas_timer_stop(X)
24
+ #else
25
+ #define MPAS_ACC_TIMER_START(X)
26
+ #define MPAS_ACC_TIMER_STOP(X)
27
+ #endif
28
+
20
29
module mpas_halo
21
30
22
31
implicit none
@@ -281,9 +290,8 @@ subroutine mpas_halo_exch_group_complete(domain, groupName, iErr)
281
290
call refactor_lists(domain, groupName, iErr)
282
291
283
292
if ( newGroup% nGroupSendNeighbors <= 0 ) then
284
- !call mpas_log_write(' No send neighbors for halo exchange group ' // trim (groupName))
285
293
return
286
- end if
294
+ end if
287
295
288
296
289
297
! Always copy in the main data member first
@@ -547,7 +555,8 @@ subroutine mpas_halo_exch_group_full_halo_exch(domain, groupName, iErr)
547
555
use mpas_derived_types, only : domain_type, mpas_halo_group, MPAS_HALO_REAL, MPAS_LOG_CRIT
548
556
use mpas_pool_routines, only : mpas_pool_get_array
549
557
use mpas_log, only : mpas_log_write
550
- use mpas_kind_types, only : RKIND
558
+ use mpas_timer, only : mpas_timer_start, mpas_timer_stop
559
+
551
560
552
561
! Parameters
553
562
#ifdef MPAS_USE_MPI_F08
@@ -595,12 +604,13 @@ subroutine mpas_halo_exch_group_full_halo_exch(domain, groupName, iErr)
595
604
integer :: maxNRecvList
596
605
integer , dimension (:,:,:), CONTIGUOUS pointer :: recvListSrc, recvListDst
597
606
integer , dimension (:), CONTIGUOUS pointer :: unpackOffsets
598
- real (kind = RKIND), dimension (:), pointer :: sendBufptr, recvBufptr
607
+
599
608
600
609
if (present (iErr)) then
601
610
iErr = 0
602
611
end if
603
612
613
+
604
614
!
605
615
! Find this halo exhange group in the list of groups
606
616
!
@@ -618,10 +628,11 @@ subroutine mpas_halo_exch_group_full_halo_exch(domain, groupName, iErr)
618
628
messageType= MPAS_LOG_CRIT)
619
629
end if
620
630
621
- if ( group% nGroupSendNeighbors <= 0 ) then
622
- !call mpas_log_write(' group has no halo exchanges: ' // trim (groupName))
631
+ if ( group% nGroupSendNeighbors <= 0 ) then
623
632
return
624
- end if
633
+ end if
634
+
635
+ call mpas_timer_start(' full_halo_exch' )
625
636
!
626
637
! Get the rank of this task and the MPI communicator to use from the first field in
627
638
! the group; all fields should be using the same communicator, so this should not
@@ -634,11 +645,7 @@ subroutine mpas_halo_exch_group_full_halo_exch(domain, groupName, iErr)
634
645
#endif
635
646
rank = group % fields(1 ) % compactHaloInfo(8 )
636
647
637
- sendBufptr = > group % sendBuf
638
- recvBufptr = > group % recvBuf
639
-
640
- !!!$acc data present (group % recvBuf(:), group % sendBuf(:))
641
- !$acc data present (sendBufptr,recvBufptr)
648
+ !$acc data present (group % recvBuf(:), group % sendBuf(:))
642
649
643
650
!
644
651
! Initiate non- blocking MPI receives for all neighbors
@@ -648,12 +655,8 @@ subroutine mpas_halo_exch_group_full_halo_exch(domain, groupName, iErr)
648
655
bufstart = group % groupRecvOffsets(i)
649
656
bufend = group % groupRecvOffsets(i) + group % groupRecvCounts(i) - 1
650
657
!TO DO : how do we determine appropriate type here?
651
- ! !$acc host_data use_device(group % recvBuf)
652
- ! call MPI_Irecv(group % recvBuf(bufstart:bufend), group % groupRecvCounts(i), MPI_REALKIND, &
653
- ! group % groupRecvNeighbors(i), group % groupRecvNeighbors(i), comm, &
654
- ! group % recvRequests(i), mpi_ierr)
655
- !$acc host_data use_device(recvBufptr)
656
- call MPI_Irecv(recvBufptr(bufstart:bufend), group % groupRecvCounts(i), MPI_REALKIND, &
658
+ !$acc host_data use_device(group % recvBuf)
659
+ call MPI_Irecv(group % recvBuf(bufstart:bufend), group % groupRecvCounts(i), MPI_REALKIND, &
657
660
group % groupRecvNeighbors(i), group % groupRecvNeighbors(i), comm, &
658
661
group % recvRequests(i), mpi_ierr)
659
662
!$acc end host_data
@@ -695,14 +698,18 @@ subroutine mpas_halo_exch_group_full_halo_exch(domain, groupName, iErr)
695
698
call mpas_pool_get_array(domain % blocklist % allFields, trim (group % fields(i) % fieldName), &
696
699
group % fields(i) % r1arr, timeLevel= group % fields(i) % timeLevel)
697
700
698
- ! !$acc data copyin(group % fields(i) % r1arr(:) )
701
+ MPAS_ACC_TIMER_START( ' halo_exch [ACC_data_xfer] ' )
699
702
!$acc enter data copyin(group % fields(i) % r1arr(:))
703
+ MPAS_ACC_TIMER_STOP(' halo_exch [ACC_data_xfer]' )
700
704
!
701
705
! Pack send buffer for all neighbors for current field
702
706
!
703
- !$acc kernels default(present)
707
+ call mpas_timer_start(' packing_halo_exch' )
708
+ !$acc parallel default(present)
709
+ !$acc loop gang collapse(2 )
704
710
do iEndp = 1 , nSendEndpts
705
711
do iHalo = 1 , nHalos
712
+ !$acc loop vector
706
713
do j = 1 , maxNSendList
707
714
if (j <= nSendLists(iHalo,iEndp)) then
708
715
idxBuf = packOffsets(iEndp) + sendListDst(j,iHalo,iEndp)
@@ -712,9 +719,9 @@ subroutine mpas_halo_exch_group_full_halo_exch(domain, groupName, iErr)
712
719
end do
713
720
end do
714
721
end do
715
- !$acc end kernels
716
- ! !$acc end data
717
- !!$acc update device(group % sendBuf(:))
722
+ !$acc end parallel
723
+ call mpas_timer_stop( ' packing_halo_exch ' )
724
+
718
725
!
719
726
! Packing code for 2 - d real - valued fields
720
727
!
@@ -725,18 +732,23 @@ subroutine mpas_halo_exch_group_full_halo_exch(domain, groupName, iErr)
725
732
!
726
733
! Pack send buffer for all neighbors for current field
727
734
!
728
-
735
+
729
736
! Use data regions for specificity and so the reference or attachment counters are easier to make sense of
730
737
! Present should also cause an attach action. OpenACC Spec2.7 Section 2.7 .2 describes ' attach action'
731
738
! !$acc data present (group) present (group % fields(i)) present (group % sendBuf(:), group % fields(i) % sendListSrc(:,:,:))
732
739
! !$acc data copyin(group % fields(i) % r2arr(:,:))
740
+ MPAS_ACC_TIMER_START(' halo_exch [ACC_data_xfer]' )
733
741
!$acc enter data copyin(group % fields(i) % r2arr(:,:))
742
+ MPAS_ACC_TIMER_STOP(' halo_exch [ACC_data_xfer]' )
734
743
744
+ call mpas_timer_start(' packing_halo_exch' )
735
745
! Kernels is good enough, use default present to force a run- time error if programmer forgot something
736
- !$acc kernels default(present)
746
+ !$acc parallel default(present)
747
+ !$acc loop gang collapse(3 )
737
748
do iEndp = 1 , nSendEndpts
738
749
do iHalo = 1 , nHalos
739
750
do j = 1 , maxNSendList
751
+ !$acc loop vector
740
752
do i1 = 1 , dim1
741
753
if (j <= nSendLists(iHalo,iEndp)) then
742
754
idxBuf = packOffsets(iEndp) + dim1 * (sendListDst(j,iHalo,iEndp) - 1 ) + i1
@@ -747,27 +759,30 @@ subroutine mpas_halo_exch_group_full_halo_exch(domain, groupName, iErr)
747
759
end do
748
760
end do
749
761
end do
750
- !$acc end kernels
751
- ! !$acc end data
752
- ! !$acc end data
753
- !!$acc update device(group % sendBuf(:))
762
+ !$acc end parallel
763
+ call mpas_timer_stop(' packing_halo_exch' )
764
+
754
765
!
755
766
! Packing code for 3 - d real - valued fields
756
767
!
757
768
case (3 )
758
769
call mpas_pool_get_array(domain % blocklist % allFields, trim (group % fields(i) % fieldName), &
759
770
group % fields(i) % r3arr, group % fields(i) % timeLevel)
760
- ! !$acc data copyin(group % fields(i) % r3arr(:,:,:) )
771
+ MPAS_ACC_TIMER_START( ' halo_exch [ACC_data_xfer] ' )
761
772
!$acc enter data copyin(group % fields(i) % r3arr(:,:,:))
773
+ MPAS_ACC_TIMER_STOP(' halo_exch [ACC_data_xfer]' )
762
774
763
775
!
764
776
! Pack send buffer for all neighbors for current field
765
777
!
766
- !$acc kernels default(present)
778
+ call mpas_timer_start(' packing_halo_exch' )
779
+ !$acc parallel default(present)
780
+ !$acc loop gang collapse(4 )
767
781
do iEndp = 1 , nSendEndpts
768
782
do iHalo = 1 , nHalos
769
783
do j = 1 , maxNSendList
770
784
do i2 = 1 , dim2
785
+ !$acc loop vector
771
786
do i1 = 1 , dim1
772
787
if (j <= nSendLists(iHalo,iEndp)) then
773
788
idxBuf = packOffsets(iEndp) + dim1* dim2* (sendListDst(j,iHalo,iEndp) - 1 ) &
@@ -780,9 +795,8 @@ subroutine mpas_halo_exch_group_full_halo_exch(domain, groupName, iErr)
780
795
end do
781
796
end do
782
797
end do
783
- !$acc end kernels
784
- ! !$acc end data
785
- !!$acc update device(group % sendBuf(:))
798
+ !$acc end parallel
799
+ call mpas_timer_stop(' packing_halo_exch' )
786
800
787
801
end select
788
802
end if
@@ -796,12 +810,8 @@ subroutine mpas_halo_exch_group_full_halo_exch(domain, groupName, iErr)
796
810
bufstart = group % groupSendOffsets(i)
797
811
bufend = group % groupSendOffsets(i) + group % groupSendCounts(i) - 1
798
812
!TO DO : how do we determine appropriate type here?
799
- ! !$acc host_data use_device(group % sendBuf)
800
- ! call MPI_Isend(group % sendBuf(bufstart:bufend), group % groupSendCounts(i), MPI_REALKIND, &
801
- ! group % groupSendNeighbors(i), rank, comm, &
802
- ! group % sendRequests(i), mpi_ierr)
803
- !$acc host_data use_device(sendBufptr)
804
- call MPI_Isend(sendBufptr(bufstart:bufend), group % groupSendCounts(i), MPI_REALKIND, &
813
+ !$acc host_data use_device(group % sendBuf)
814
+ call MPI_Isend(group % sendBuf(bufstart:bufend), group % groupSendCounts(i), MPI_REALKIND, &
805
815
group % groupSendNeighbors(i), rank, comm, &
806
816
group % sendRequests(i), mpi_ierr)
807
817
!$acc end host_data
@@ -859,10 +869,11 @@ subroutine mpas_halo_exch_group_full_halo_exch(domain, groupName, iErr)
859
869
!
860
870
! Unpack recv buffer from all neighbors for current field
861
871
!
862
- !!$acc update host(group % recvBuf(:) )
863
- !! $acc wait
864
- !$acc kernels default(present)
872
+ call mpas_timer_start( ' unpacking_halo_exch ' )
873
+ !$acc parallel default(present)
874
+ !$acc loop gang
865
875
do iHalo = 1 , nHalos
876
+ !$acc loop vector
866
877
do j = 1 , maxNRecvList
867
878
if (j <= nRecvLists(iHalo,iEndp)) then
868
879
idxArr = recvListDst(j,iHalo,iEndp)
@@ -871,8 +882,8 @@ subroutine mpas_halo_exch_group_full_halo_exch(domain, groupName, iErr)
871
882
end if
872
883
end do
873
884
end do
874
- !$acc end kernels
875
- !!$acc exit data copyout(group % fields(i) % r1arr(:) )
885
+ !$acc end parallel
886
+ call mpas_timer_stop( ' unpacking_halo_exch ' )
876
887
877
888
!
878
889
! Unpacking code for 2 - d real - valued fields
@@ -881,11 +892,13 @@ subroutine mpas_halo_exch_group_full_halo_exch(domain, groupName, iErr)
881
892
!
882
893
! Unpack recv buffer from all neighbors for current field
883
894
!
884
- !!$acc update host(group % recvBuf(:) )
885
- !! $acc wait
886
- !$acc kernels default(present)
895
+ call mpas_timer_start( ' unpacking_halo_exch ' )
896
+ !$acc parallel default(present)
897
+ !$acc loop gang
887
898
do iHalo = 1 , nHalos
899
+ !$acc loop worker
888
900
do j = 1 , maxNRecvList
901
+ !$acc loop vector
889
902
do i1 = 1 , dim1
890
903
if (j <= nRecvLists(iHalo,iEndp)) then
891
904
idxArr = recvListDst(j,iHalo,iEndp)
@@ -895,8 +908,8 @@ subroutine mpas_halo_exch_group_full_halo_exch(domain, groupName, iErr)
895
908
end do
896
909
end do
897
910
end do
898
- !$acc end kernels
899
- !!$acc exit data copyout(group % fields(i) % r2arr(:,:) )
911
+ !$acc end parallel
912
+ call mpas_timer_stop( ' unpacking_halo_exch ' )
900
913
901
914
!
902
915
! Unpacking code for 3 - d real - valued fields
@@ -905,11 +918,12 @@ subroutine mpas_halo_exch_group_full_halo_exch(domain, groupName, iErr)
905
918
!
906
919
! Unpack recv buffer from all neighbors for current field
907
920
!
908
- !!$acc update host(group % recvBuf(:) )
909
- !! $acc wait
910
- !$acc kernels default(present )
921
+ call mpas_timer_start( ' unpacking_halo_exch ' )
922
+ !$acc parallel default(present)
923
+ !$acc loop gang collapse( 2 )
911
924
do iHalo = 1 , nHalos
912
925
do j = 1 , maxNRecvList
926
+ !$acc loop vector collapse(2 )
913
927
do i2 = 1 , dim2
914
928
do i1 = 1 , dim1
915
929
if (j <= nRecvLists(iHalo,iEndp)) then
@@ -922,14 +936,15 @@ subroutine mpas_halo_exch_group_full_halo_exch(domain, groupName, iErr)
922
936
end do
923
937
end do
924
938
end do
925
- !$acc end kernels
926
- !!$acc exit data copyout(group % fields(i) % r3arr(:,:,:) )
939
+ !$acc end parallel
940
+ call mpas_timer_stop( ' unpacking_halo_exch ' )
927
941
928
942
end select
929
943
end if
930
944
end do
931
945
end do
932
946
947
+ MPAS_ACC_TIMER_START(' halo_exch [ACC_data_xfer]' )
933
948
do i = 1 , group % nFields
934
949
if (group % fields(i) % fieldType == MPAS_HALO_REAL) then
935
950
select case (group % fields(i) % nDims)
@@ -958,20 +973,7 @@ subroutine mpas_halo_exch_group_full_halo_exch(domain, groupName, iErr)
958
973
959
974
! For the present (group % recvBuf(:), group % sendBuf(:))
960
975
!$acc end data
961
- ! !$acc wait
962
- ! do i = 1 , group % nFields
963
- ! if (group % fields(i) % fieldType == MPAS_HALO_REAL) then
964
- ! select case (group % fields(i) % nDims)
965
- ! case (1 )
966
- ! !$acc exit data copyout(group % fields(i) % r1arr(:))
967
- ! case (2 )
968
- ! !$acc exit data copyout(group % fields(i) % r2arr(:,:))
969
- ! case (3 )
970
- ! !$acc exit data copyout(group % fields(i) % r3arr(:,:,:))
971
- ! end select
972
- ! end if
973
- ! end do
974
- ! !$acc wait
976
+ MPAS_ACC_TIMER_STOP(' halo_exch [ACC_data_xfer]' )
975
977
976
978
!
977
979
! Nullify array pointers - not necessary for correctness, but helpful when debugging
@@ -992,6 +994,8 @@ subroutine mpas_halo_exch_group_full_halo_exch(domain, groupName, iErr)
992
994
!
993
995
call MPI_Waitall(group % nGroupSendNeighbors, group % sendRequests, MPI_STATUSES_IGNORE, mpi_ierr)
994
996
997
+ call mpas_timer_stop(' full_halo_exch' )
998
+
995
999
end subroutine mpas_halo_exch_group_full_halo_exch
996
1000
997
1001
0 commit comments