diff --git a/src/gen_modules_diag.F90 b/src/gen_modules_diag.F90 index 0fc940790..c466c7e7d 100644 --- a/src/gen_modules_diag.F90 +++ b/src/gen_modules_diag.F90 @@ -3266,7 +3266,11 @@ subroutine dvd_add_difflux_bhvisc(do_SDdvd, tr_num, dvd_tot, tr, trstar, gamma0_ ! first round !$OMP PARALLEL DEFAULT(SHARED) PRIVATE(edge, nz, ednodes, edelem, elnodes_l, elnodes_r, & !$OMP nu1, nl1, du, dv, dt, len, vi) -!$OMP DO +#if defined(__openmp_reproducible) +!$OMP DO ORDERED +#else +!$OMP DO +#endif do edge=1, myDim_edge2D!+eDim_edge2D ! skip boundary edges only consider inner edges if (myList_edge2D(edge) > edge2D_in) cycle @@ -3313,7 +3317,11 @@ subroutine dvd_add_difflux_bhvisc(do_SDdvd, tr_num, dvd_tot, tr, trstar, gamma0_ !___________________________________________________________________________ ! second round: +#if defined(__openmp_reproducible) +!$OMP DO ORDERED +#else !$OMP DO +#endif do edge=1, myDim_edge2D!+eDim_edge2D ! skip boundary edges only consider inner edges if (myList_edge2D(edge) > edge2D_in) cycle diff --git a/src/gen_support.F90 b/src/gen_support.F90 index 01ef9c0c6..93345156c 100644 --- a/src/gen_support.F90 +++ b/src/gen_support.F90 @@ -337,13 +337,16 @@ subroutine integrate_nod_2D(data, int2D, partit, mesh) #if !defined(__openmp_reproducible) !$OMP PARALLEL DEFAULT(SHARED) PRIVATE(row) !$OMP DO REDUCTION (+: lval) -#endif do row=1, myDim_nod2D lval=lval+data(row)*areasvol(ulevels_nod2D(row),row) end do -#if !defined(__openmp_reproducible) !$OMP END DO !$OMP END PARALLEL +#else + ! Use serial computation for reproducible results + do row=1, myDim_nod2D + lval=lval+data(row)*areasvol(ulevels_nod2D(row),row) + end do #endif int2D=0.0_WP call MPI_AllREDUCE(lval, int2D, 1, MPI_DOUBLE_PRECISION, MPI_SUM, & @@ -374,7 +377,12 @@ subroutine integrate_nod_3D(data, int3D, partit, mesh) #include "associate_mesh_ass.h" lval=0.0_WP +#if defined(__openmp_reproducible) +!$OMP PARALLEL DEFAULT(SHARED) PRIVATE(row, k, lval_row) +!$OMP DO ORDERED +#else !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(row, k, lval_row) REDUCTION(+: lval) +#endif do row=1, myDim_nod2D lval_row = 0. do k=ulevels_nod2D(row), nlevels_nod2D(row)-1 @@ -382,13 +390,19 @@ subroutine integrate_nod_3D(data, int3D, partit, mesh) end do #if defined(__openmp_reproducible) !$OMP ORDERED -#endif +!$OMP ATOMIC UPDATE lval = lval + lval_row -#if defined(__openmp_reproducible) !$OMP END ORDERED +#else + lval = lval + lval_row #endif end do +#if defined(__openmp_reproducible) +!$OMP END DO +!$OMP END PARALLEL +#else !$OMP END PARALLEL DO +#endif int3D=0.0_WP call MPI_AllREDUCE(lval, int3D, 1, MPI_DOUBLE_PRECISION, MPI_SUM, & @@ -539,6 +553,7 @@ FUNCTION omp_min_max_sum1(arr, pos1, pos2, what, partit, nan) CASE ('min') val=arr(1) +#if !defined(__openmp_reproducible) !$OMP PARALLEL DEFAULT(SHARED) PRIVATE(n) !$OMP DO REDUCTION(min: val) do n=pos1, pos2 @@ -546,9 +561,13 @@ FUNCTION omp_min_max_sum1(arr, pos1, pos2, what, partit, nan) end do !$OMP END DO !$OMP END PARALLEL +#else + val = minval(arr(pos1:pos2)) +#endif CASE ('max') val=arr(1) +#if !defined(__openmp_reproducible) !$OMP PARALLEL DEFAULT(SHARED) PRIVATE(n) !$OMP DO REDUCTION(max: val) do n=pos1, pos2 @@ -556,6 +575,9 @@ FUNCTION omp_min_max_sum1(arr, pos1, pos2, what, partit, nan) end do !$OMP END DO !$OMP END PARALLEL +#else + val = maxval(arr(pos1:pos2)) +#endif CASE DEFAULT if (partit%mype==0) write(*,*) trim(what), ' is not implemented in omp_min_max_sum case!' @@ -575,7 +597,7 @@ FUNCTION omp_min_max_sum2(arr, pos11, pos12, pos21, pos22, what, partit, nan) character(3), intent(in) :: what real(kind=WP), optional :: nan !to be implemented upon the need (for masked arrays) real(kind=WP) :: omp_min_max_sum2 - real(kind=WP) :: val, vmasked, val_part(pos11:pos12) + real(kind=WP) :: val, vmasked, val_part(pos21:pos22) integer :: i, j @@ -588,6 +610,7 @@ FUNCTION omp_min_max_sum2(arr, pos11, pos12, pos21, pos22, what, partit, nan) CASE ('min') if (.not. present(nan)) vmasked=huge(vmasked) !just some crazy number val=arr(1,1) +#if !defined(__openmp_reproducible) !$OMP PARALLEL DEFAULT(SHARED) PRIVATE(i, j) !$OMP DO REDUCTION(min: val) do j=pos21, pos22 @@ -597,10 +620,14 @@ FUNCTION omp_min_max_sum2(arr, pos11, pos12, pos21, pos22, what, partit, nan) end do !$OMP END DO !$OMP END PARALLEL +#else + val = minval(arr(pos11:pos12,pos21:pos22), mask=(arr(pos11:pos12,pos21:pos22)/=vmasked)) +#endif CASE ('max') if (.not. present(nan)) vmasked=tiny(vmasked) !just some crazy number val=arr(1,1) +#if !defined(__openmp_reproducible) !$OMP PARALLEL DEFAULT(SHARED) PRIVATE(i, j) !$OMP DO REDUCTION(max: val) do j=pos21, pos22 @@ -610,6 +637,9 @@ FUNCTION omp_min_max_sum2(arr, pos11, pos12, pos21, pos22, what, partit, nan) end do !$OMP END DO !$OMP END PARALLEL +#else + val = maxval(arr(pos11:pos12,pos21:pos22), mask=(arr(pos11:pos12,pos21:pos22)/=vmasked)) +#endif CASE ('sum') if (.not. present(nan)) vmasked=huge(vmasked) !just some crazy number @@ -663,7 +693,12 @@ subroutine integrate_elem_3D(data, int3D, partit, mesh) #include "associate_mesh_ass.h" lval=0.0_WP +#if defined(__openmp_reproducible) +!$OMP PARALLEL DEFAULT(SHARED) PRIVATE(row, k, lval_row) +!$OMP DO ORDERED +#else !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(row, k, lval_row) REDUCTION(+: lval) +#endif do row=1, myDim_elem2D if(elem2D_nodes(1, row) > myDim_nod2D) cycle lval_row = 0. @@ -672,13 +707,19 @@ subroutine integrate_elem_3D(data, int3D, partit, mesh) end do #if defined(__openmp_reproducible) !$OMP ORDERED -#endif +!$OMP ATOMIC UPDATE lval = lval + lval_row -#if defined(__openmp_reproducible) !$OMP END ORDERED +#else + lval = lval + lval_row #endif end do +#if defined(__openmp_reproducible) +!$OMP END DO +!$OMP END PARALLEL +#else !$OMP END PARALLEL DO +#endif int3D=0.0_WP call MPI_AllREDUCE(lval, int3D, 1, MPI_DOUBLE_PRECISION, MPI_SUM, & @@ -707,7 +748,12 @@ subroutine integrate_elem_2D(data, int2D, partit, mesh) #include "associate_mesh_ass.h" lval=0.0_WP +#if defined(__openmp_reproducible) +!$OMP PARALLEL DEFAULT(SHARED) PRIVATE(row) +!$OMP DO ORDERED +#else !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(row) REDUCTION(+: lval) +#endif do row=1, myDim_elem2D if(elem2D_nodes(1, row) > myDim_nod2D) cycle #if defined(__openmp_reproducible) @@ -718,7 +764,12 @@ subroutine integrate_elem_2D(data, int2D, partit, mesh) !$OMP END ORDERED #endif end do +#if defined(__openmp_reproducible) +!$OMP END DO +!$OMP END PARALLEL +#else !$OMP END PARALLEL DO +#endif int2D=0.0_WP call MPI_AllREDUCE(lval, int2D, 1, MPI_DOUBLE_PRECISION, MPI_SUM, & diff --git a/src/ice_EVP.F90 b/src/ice_EVP.F90 index 21ed55fe2..beff8f684 100755 --- a/src/ice_EVP.F90 +++ b/src/ice_EVP.F90 @@ -227,7 +227,11 @@ subroutine stress2rhs(ice, partit, mesh) #endif #ifndef ENABLE_OPENACC +#if defined(__openmp_reproducible) +!$OMP DO ORDERED +#else !$OMP DO +#endif #else !$ACC PARALLEL LOOP GANG VECTOR DEFAULT(PRESENT) #if !defined(DISABLE_OPENACC_ATOMICS) @@ -716,7 +720,11 @@ subroutine EVPdynamics(ice, partit, mesh) ! apply sea ice velocity boundary condition #ifndef ENABLE_OPENACC +#if defined(__openmp_reproducible) +!$OMP DO ORDERED +#else !$OMP DO +#endif #else ! With the binary data of np2 goes only inside the first if !$ACC PARALLEL LOOP GANG VECTOR DEFAULT(PRESENT) diff --git a/src/ice_fct.F90 b/src/ice_fct.F90 index ce4564fa6..44ac6a72d 100755 --- a/src/ice_fct.F90 +++ b/src/ice_fct.F90 @@ -759,7 +759,11 @@ subroutine ice_fem_fct(tr_array_id, ice, partit, mesh) #endif #ifndef ENABLE_OPENACC +#if defined(__openmp_reproducible) +!$OMP DO ORDERED +#else !$OMP DO +#endif #else #if !defined(DISABLE_OPENACC_ATOMICS) !$ACC PARALLEL LOOP GANG VECTOR PRIVATE(elnodes) DEFAULT(PRESENT) @@ -901,7 +905,11 @@ subroutine ice_fem_fct(tr_array_id, ice, partit, mesh) #endif #ifndef ENABLE_OPENACC +#if defined(__openmp_reproducible) +!$OMP DO ORDERED +#else !$OMP DO +#endif #else #if !defined(DISABLE_OPENACC_ATOMICS) !$ACC PARALLEL LOOP GANG VECTOR PRIVATE(elnodes) DEFAULT(PRESENT) @@ -959,7 +967,11 @@ subroutine ice_fem_fct(tr_array_id, ice, partit, mesh) end do #ifndef ENABLE_OPENACC !$OMP END DO +#if defined(__openmp_reproducible) +!$OMP DO ORDERED +#else !$OMP DO +#endif #else !$ACC END PARALLEL LOOP #if !defined(DISABLE_OPENACC_ATOMICS) @@ -1018,7 +1030,11 @@ subroutine ice_fem_fct(tr_array_id, ice, partit, mesh) end do #ifndef ENABLE_OPENACC !$OMP END DO +#if defined(__openmp_reproducible) +!$OMP DO ORDERED +#else !$OMP DO +#endif #else !$ACC END PARALLEL LOOP #if !defined(DISABLE_OPENACC_ATOMICS) @@ -1078,7 +1094,11 @@ subroutine ice_fem_fct(tr_array_id, ice, partit, mesh) end do #ifndef ENABLE_OPENACC !$OMP END DO +#if defined(__openmp_reproducible) +!$OMP DO ORDERED +#else !$OMP DO +#endif #else !$ACC END PARALLEL LOOP #endif @@ -1166,8 +1186,12 @@ SUBROUTINE ice_mass_matrix_fill(ice, partit, mesh) mass_matrix => ice%work%fct_massmatrix(:) ! ! a) -!$OMP PARALLEL DEFAULT(SHARED) PRIVATE(n, k, row, elem, elnodes, q, offset, ipos, aa) +!$OMP PARALLEL DEFAULT(SHARED) PRIVATE(n, k, row, elem, elnodes, q, offset, ipos, aa, flag, iflag) +#if defined(__openmp_reproducible) +!$OMP DO ORDERED +#else !$OMP DO +#endif DO elem=1,myDim_elem2D elnodes=elem2D_nodes(:,elem) @@ -1326,7 +1350,11 @@ subroutine ice_TG_rhs_div(ice, partit, mesh) #ifndef ENABLE_OPENACC !$OMP PARALLEL DEFAULT(SHARED) PRIVATE(diff, entries, um, vm, vol, dx, dy, n, q, row, elem, elnodes, c1, c2, c3, c4, cx1, cx2, cx3, cx4, entries2) +#if defined(__openmp_reproducible) +!$OMP DO ORDERED +#else !$OMP DO +#endif #else #if !defined(DISABLE_OPENACC_ATOMICS) !$ACC PARALLEL LOOP GANG VECTOR PRIVATE(elnodes, dx, dy, entries, entries2) DEFAULT(PRESENT) diff --git a/src/ice_maEVP.F90 b/src/ice_maEVP.F90 index 791de938e..7a9ed277a 100644 --- a/src/ice_maEVP.F90 +++ b/src/ice_maEVP.F90 @@ -245,7 +245,11 @@ subroutine ssh2rhs(ice, partit, mesh) !_____________________________________________________________________________ ! use floating sea ice for zlevel and zstar if (use_floatice .and. .not. trim(which_ale)=='linfs') then +#if defined(__openmp_reproducible) +!$OMP DO ORDERED +#else !$OMP DO +#endif do elem=1,myDim_elem2d elnodes=elem2D_nodes(:,elem) !_______________________________________________________________________ @@ -285,7 +289,11 @@ subroutine ssh2rhs(ice, partit, mesh) end do !$OMP END DO else +#if defined(__openmp_reproducible) +!$OMP DO ORDERED +#else !$OMP DO +#endif do elem=1,myDim_elem2d elnodes=elem2D_nodes(:,elem) !_______________________________________________________________________ @@ -371,7 +379,11 @@ subroutine stress2rhs_m(ice, partit, mesh) !$OMP END PARALLEL DO !$OMP PARALLEL DEFAULT(SHARED) PRIVATE(elem, elnodes, k, row, dx, dy, vol, mf, aa, bb, mass, cluster_area, elevation_elem) +#if defined(__openmp_reproducible) +!$OMP DO ORDERED +#else !$OMP DO +#endif do elem=1,myDim_elem2d elnodes=elem2D_nodes(:,elem) !_______________________________________________________________________ @@ -545,7 +557,11 @@ subroutine EVPdynamics_m(ice, partit, mesh) ! use floating sea ice for zlevel and zstar !$OMP PARALLEL DEFAULT(SHARED) PRIVATE(el, elnodes, vol, dx, dy, p_ice, n, bb, aa) if (use_floatice .and. .not. trim(which_ale)=='linfs') then +#if defined(__openmp_reproducible) +!$OMP DO ORDERED +#else !$OMP DO +#endif do el=1,myDim_elem2d elnodes=elem2D_nodes(:,el) @@ -588,7 +604,11 @@ subroutine EVPdynamics_m(ice, partit, mesh) !_____________________________________________________________________________ ! use levitating sea ice for linfs, zlevel and zstar else +#if defined(__openmp_reproducible) +!$OMP DO ORDERED +#else !$OMP DO +#endif do el=1,myDim_elem2d elnodes=elem2D_nodes(:,el) !_______________________________________________________________________ @@ -685,7 +705,11 @@ subroutine EVPdynamics_m(ice, partit, mesh) ! SD, 30.07.2014 !_______________________________________________________________________ !$OMP PARALLEL DEFAULT(SHARED) PRIVATE(el, i, ed, row, elnodes, dx, dy, meancos, eps1, eps2, delta, pressure, umod, drag, rhsu, rhsv, det, n) +#if defined(__openmp_reproducible) +!$OMP DO ORDERED +#else !$OMP DO +#endif do el=1,myDim_elem2D if (ulevels(el)>1) cycle diff --git a/src/oce_adv_tra_driver.F90 b/src/oce_adv_tra_driver.F90 index 9b9606ea8..07251812f 100644 --- a/src/oce_adv_tra_driver.F90 +++ b/src/oce_adv_tra_driver.F90 @@ -131,7 +131,11 @@ subroutine do_oce_adv_tra(dt, vel, w, wi, we, tr_num, dynamics, tracers, partit, #endif #ifndef ENABLE_OPENACC +#if defined(__openmp_reproducible) +!$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(e, enodes, el, nl1, nu1, nl2, nu2, nu12, nl12, nz) ORDERED +#else !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(e, enodes, el, nl1, nu1, nl2, nu2, nu12, nl12, nz) +#endif #else #if !defined(DISABLE_OPENACC_ATOMICS) !$ACC PARALLEL LOOP GANG PRIVATE(enodes, el) DEFAULT(PRESENT) VECTOR_LENGTH(acc_vl) @@ -493,7 +497,11 @@ subroutine oce_tra_adv_flux2dtracer(dt, dttf_h, dttf_v, flux_h, flux_v, partit, #endif ! Horizontal #ifndef ENABLE_OPENACC +#if defined(__openmp_reproducible) +!$OMP DO ORDERED +#else !$OMP DO +#endif #else #if !defined(DISABLE_OPENACC_ATOMICS) !$ACC PARALLEL LOOP GANG PRIVATE(enodes, el) DEFAULT(PRESENT) VECTOR_LENGTH(acc_vl) diff --git a/src/oce_adv_tra_fct.F90 b/src/oce_adv_tra_fct.F90 index 218622e3c..751bdd0cc 100644 --- a/src/oce_adv_tra_fct.F90 +++ b/src/oce_adv_tra_fct.F90 @@ -267,7 +267,7 @@ subroutine oce_tra_adv_fct(dt, ttf, lo, adf_h, adf_v, fct_ttf_min, fct_ttf_max, #endif !Vertical #ifndef ENABLE_OPENACC -!$OMP DO +!$OMP DO #else !$ACC PARALLEL LOOP GANG DEFAULT(PRESENT) VECTOR_LENGTH(acc_vl) #endif @@ -288,7 +288,11 @@ subroutine oce_tra_adv_fct(dt, ttf, lo, adf_h, adf_v, fct_ttf_min, fct_ttf_max, #endif #ifndef ENABLE_OPENACC +#if defined(__openmp_reproducible) +!$OMP DO ORDERED +#else !$OMP DO +#endif #else !Horizontal #if !defined(DISABLE_OPENACC_ATOMICS) diff --git a/src/oce_ale.F90 b/src/oce_ale.F90 index b86c7a87f..841e63aa2 100644 --- a/src/oce_ale.F90 +++ b/src/oce_ale.F90 @@ -1709,8 +1709,11 @@ subroutine update_stiff_mat_ale(partit, mesh) ! to finite volumes" --> stiff matrix part ! loop over lcal edges factor=g*dt*alpha*theta - +#if defined(__openmp_reproducible) +!$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(n, i, j, k, row, ed, n2, enodes, elnodes, el, elem, npos, offset, nini, nend, fx, fy) ORDERED +#else !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(n, i, j, k, row, ed, n2, enodes, elnodes, el, elem, npos, offset, nini, nend, fx, fy) +#endif do ed=1,myDim_edge2D !! Attention ! enodes ... local node indices of nodes that edge ed enodes=edges(:,ed) @@ -1840,7 +1843,11 @@ subroutine compute_ssh_rhs_ale(dynamics, partit, mesh) !$OMP PARALLEL DEFAULT(SHARED) PRIVATE(ed, el, enodes, n, nz, nzmin, nzmax, c1, c2, deltaX1, deltaX2, deltaY1, deltaY2, & !$OMP dumc1_1, dumc1_2, dumc2_1, dumc2_2) +#if defined(__openmp_reproducible) +!$OMP DO ORDERED +#else !$OMP DO +#endif do ed=1, myDim_edge2D ! local indice of nodes that span up edge ed enodes=edges(:,ed) @@ -1992,7 +1999,11 @@ subroutine compute_hbar_ale(dynamics, partit, mesh) !$OMP PARALLEL DEFAULT(SHARED) PRIVATE(ed, el, enodes, elem, elnodes, n, nz, nzmin, nzmax, & !$OMP c1, c2, deltaX1, deltaX2, deltaY1, deltaY2) +#if defined(__openmp_reproducible) +!$OMP DO ORDERED +#else !$OMP DO +#endif do ed=1, myDim_edge2D ! local indice of nodes that span up edge ed enodes=edges(:,ed) @@ -2171,7 +2182,11 @@ subroutine vert_vel_ale(dynamics, partit, mesh) !$OMP END PARALLEL DO !$OMP PARALLEL DEFAULT(SHARED) PRIVATE(ed, enodes, el, deltaX1, deltaY1, nz, nzmin, nzmax, deltaX2, deltaY2, c1, c2) +#if defined(__openmp_reproducible) +!$OMP DO ORDERED +#else !$OMP DO +#endif do ed=1, myDim_edge2D ! local indice of nodes that span up edge ed enodes=edges(:,ed) @@ -2725,7 +2740,11 @@ subroutine compute_vert_vel_transpv(dynamics, partit, mesh) !___________________________________________________________________________ !$OMP PARALLEL DEFAULT(SHARED) PRIVATE(ed, ednodes, edelem, nz, nzmin, nzmax, & !$OMP deltaX1, deltaY1, deltaX2, deltaY2, c1, c2) +#if defined(__openmp_reproducible) +!$OMP DO ORDERED +#else !$OMP DO +#endif do ed=1, myDim_edge2D ! local indice of nodes that span up edge ed ednodes=edges(:,ed) diff --git a/src/oce_ale_pressure_bv.F90 b/src/oce_ale_pressure_bv.F90 index e66abfe85..0ec89c50d 100644 --- a/src/oce_ale_pressure_bv.F90 +++ b/src/oce_ale_pressure_bv.F90 @@ -232,6 +232,7 @@ subroutine pressure_bv(tracers, partit, mesh) !___________________________________________________________________________ ! Screen salinity a =0.0_WP +#if !defined(__openmp_reproducible) !$OMP PARALLEL DEFAULT(SHARED) PRIVATE(node, nz, nzmin, nzmax) !$OMP DO REDUCTION(min: a) do node=1, myDim_nod2D+eDim_nod2D @@ -243,6 +244,10 @@ subroutine pressure_bv(tracers, partit, mesh) enddo !$OMP END DO !$OMP END PARALLEL +#else + ! Reproducible sequential minval + a = minval(salt(1:nl-1, 1:myDim_nod2D+eDim_nod2D)) +#endif !___________________________________________________________________________ ! model explodes, no OpenMP parallelization ! @@ -3238,6 +3243,7 @@ subroutine init_ref_density_advanced(tracers, partit, mesh) vol1D=0.0_WP ref_temp1D=0.0_WP ref_salt1D=0.0_WP +#if !defined(__openmp_reproducible) !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(node, nz, nzmin, nzmax) REDUCTION(+:vol1D) do node=1,myDim_nod2d x=geo_coord_nod2D(1,node)/rad @@ -3253,6 +3259,22 @@ subroutine init_ref_density_advanced(tracers, partit, mesh) end do end do !$OMP END PARALLEL DO +#else +! Reproducible sequential summation +do node=1,myDim_nod2d + x=geo_coord_nod2D(1,node)/rad + y=geo_coord_nod2D(1,node)/rad + if ((x>=-6.) .AND. (x<=42) .AND. (y>=30.15) .AND. (y<=42)) CYCLE + if ((x>= 2.) .AND. (x<=42) .AND. (y>=41) .AND. (y<=48)) CYCLE + nzmin = 1 + nzmax = nlevels_nod2d(node)-1 + do nz=nzmin,nzmax + ref_temp1D(nz)=ref_temp1D(nz)+areasvol(nz,node)*temp(nz,node) + ref_salt1D(nz)=ref_salt1D(nz)+areasvol(nz,node)*salt(nz,node) + vol1D(nz)=vol1D(nz)+areasvol(nz,node) + end do +end do +#endif call MPI_Allreduce(MPI_IN_PLACE, ref_temp1D, mesh%nl-1, MPI_DOUBLE, MPI_SUM, partit%MPI_COMM_FESOM, MPIerr) call MPI_Allreduce(MPI_IN_PLACE, ref_salt1D, mesh%nl-1, MPI_DOUBLE, MPI_SUM, partit%MPI_COMM_FESOM, MPIerr) call MPI_Allreduce(MPI_IN_PLACE, vol1D, mesh%nl-1, MPI_DOUBLE, MPI_SUM, partit%MPI_COMM_FESOM, MPIerr) diff --git a/src/oce_ale_ssh_splitexpl_subcycl.F90 b/src/oce_ale_ssh_splitexpl_subcycl.F90 index e1069027f..1441a8317 100644 --- a/src/oce_ale_ssh_splitexpl_subcycl.F90 +++ b/src/oce_ale_ssh_splitexpl_subcycl.F90 @@ -110,7 +110,11 @@ subroutine momentum_adv_scalar_transpv(dynamics, partit, mesh) !$OMP PARALLEL DEFAULT(SHARED) PRIVATE(node, elem, ed, nz, nl1, ul1, nl2, ul2, nl12, ul12, & !$OMP uv1, uv2, uv12, qc, qu, qd, wu, wv, & !$OMP ednodes, edelem, un1, un2) -!$OMP DO +#if defined(__openmp_reproducible) +!$OMP DO ORDERED +#else +!$OMP DO +#endif do node=1, myDim_nod2D ul1 = ulevels_nod2D(node) nl1 = nlevels_nod2D(node) @@ -179,7 +183,11 @@ subroutine momentum_adv_scalar_transpv(dynamics, partit, mesh) !___________________________________________________________________________ ! 2nd. compute horizontal advection component: u*du/dx, u*dv/dx & v*du/dy, v*dv/dy ! loop over triangle edges -!$OMP DO +#if defined(__openmp_reproducible) +!$OMP DO ORDERED +#else +!$OMP DO +#endif do ed=1, myDim_edge2D ! local indice of nodes that span up edge ed ednodes = edges(:,ed) @@ -199,6 +207,13 @@ subroutine momentum_adv_scalar_transpv(dynamics, partit, mesh) un1(ul1:nl1) = ( UVh(2, ul1:nl1, edelem(1))*edge_cross_dxdy(1,ed) & - UVh(1, ul1:nl1, edelem(1))*edge_cross_dxdy(2,ed)) + !___________________________________________________________________ + ! ensure openmp numerical reproducability + ! block had to be moved outside of if statement, otherwise the else + ! part was missing the !$OMP ORDERED +#if defined(__openmp_reproducible) +!$OMP ORDERED +#endif !_______________________________________________________________________ ! if edelem(2)==0 than edge is boundary edge if(edelem(2)>0) then @@ -221,11 +236,6 @@ subroutine momentum_adv_scalar_transpv(dynamics, partit, mesh) ul12 = max(ul1, ul2) nl12 = min(nl1, nl2) - !___________________________________________________________________ - ! ensure openmp numerical reproducability -#if defined(__openmp_reproducible) -!$OMP ORDERED -#endif !___________________________________________________________________ !NR add contribution to first edge node --> ednodes(1) !NR Do not calculate on Halo nodes, as the result will not be used. @@ -303,7 +313,7 @@ subroutine momentum_adv_scalar_transpv(dynamics, partit, mesh) !_______________________________________________________________________ ! if edelem(2)==0 than edge is boundary edge - else ! --> if(edelem(2)>0) then + else !___________________________________________________________________ !NR add contribution to first edge node --> ednodes(1) !NR Do not calculate on Halo nodes, as the result will not be used. @@ -338,13 +348,13 @@ subroutine momentum_adv_scalar_transpv(dynamics, partit, mesh) call omp_unset_lock(partit%plock(ednodes(2))) #endif end if - end if ! --> if(edelem(2)>0) then + end if #if defined(__openmp_reproducible) !$OMP END ORDERED #endif - end do ! --> do ed=1, myDim_edge2D + end do !$OMP END DO !___________________________________________________________________________ @@ -847,7 +857,11 @@ subroutine compute_BT_step_SE_ale(dynamics, partit, mesh) ! remove viscosity !$OMP PARALLEL DEFAULT(SHARED) PRIVATE(edge, edelem, ednodes, hh, len, & !$OMP vi, update_ubt, update_vbt) -!$OMP DO +#if defined(__openmp_reproducible) +!$OMP DO ORDERED +#else +!$OMP DO +#endif do edge=1, myDim_edge2D+eDim_edge2D ! if ed is an outer boundary edge, skip it @@ -902,7 +916,11 @@ subroutine compute_BT_step_SE_ale(dynamics, partit, mesh) ! remove bottom drag if (dynamics%se_bottdrag) then !$OMP PARALLEL DEFAULT(SHARED) PRIVATE(elem, elnodes, nzmax, hh) -!$OMP DO +#if defined(__openmp_reproducible) +!$OMP DO ORDERED +#else +!$OMP DO +#endif do elem=1, myDim_elem2D elnodes= elem2D_nodes(:,elem) nzmax = nlevels(elem) @@ -952,7 +970,11 @@ subroutine compute_BT_step_SE_ale(dynamics, partit, mesh) !___________________________________________________________________ !$OMP PARALLEL DEFAULT(SHARED) PRIVATE(edge, edelem, ednodes, hh, len, & !$OMP vi, update_ubt, update_vbt) -!$OMP DO +#if defined(__openmp_reproducible) +!$OMP DO ORDERED +#else +!$OMP DO +#endif do edge=1, myDim_edge2D+eDim_edge2D ! if ed is an outer boundary edge, skip it @@ -1138,7 +1160,11 @@ subroutine compute_BT_step_SE_ale(dynamics, partit, mesh) ! and advance ssh --> eta^(n+(m+1)/M) = eta^(n+(m)/M) - dt/M * div_H * [...] !$OMP PARALLEL DEFAULT(SHARED) PRIVATE(edge, ednodes, edelem, c1, c2, & !$OMP deltaX1, deltaX2, deltaY1, deltaY2) +#if defined(__openmp_reproducible) +!$OMP DO ORDERED +#else !$OMP DO +#endif do edge=1, myDim_edge2D ednodes = edges(:,edge) edelem = edge_tri(:,edge) diff --git a/src/oce_ale_tracer.F90 b/src/oce_ale_tracer.F90 index dd4ce9101..79233acf5 100644 --- a/src/oce_ale_tracer.F90 +++ b/src/oce_ale_tracer.F90 @@ -1204,7 +1204,11 @@ subroutine diff_part_hor_redi(tracers, partit, mesh) !$OMP nl1, ul1, nl2, ul2, nl12, ul12, nz, el, elnodes, enodes, & !$OMP c, Fx, Fy, Tx, Ty, Tx_z, Ty_z, SxTz, SyTz, Tz, & !$OMP rhs1, rhs2, Kh, dz) +#if defined(__openmp_reproducible) +!$OMP DO ORDERED +#else !$OMP DO +#endif do edge=1, myDim_edge2D rhs1=0.0_WP rhs2=0.0_WP @@ -1377,7 +1381,11 @@ SUBROUTINE diff_part_bh(tr_num, dynamics, tracers, partit, mesh) !$OMP PARALLEL DEFAULT(SHARED) PRIVATE(n, nz, ed, el, en, k, elem, nzmin, nzmax, u1, v1, len, vi, tt, ww, & !$OMP elnodes1, elnodes2) +#if defined(__openmp_reproducible) +!$OMP DO ORDERED +#else !$OMP DO +#endif DO ed=1, myDim_edge2D!+eDim_edge2D if (myList_edge2D(ed) > edge2D_in) cycle el=edge_tri(:,ed) @@ -1423,7 +1431,11 @@ SUBROUTINE diff_part_bh(tr_num, dynamics, tracers, partit, mesh) ! =========== ! Second round: ! =========== +#if defined(__openmp_reproducible) +!$OMP DO ORDERED +#else !$OMP DO +#endif DO ed=1, myDim_edge2D!+eDim_edge2D if (myList_edge2D(ed)>edge2D_in) cycle el=edge_tri(:,ed) diff --git a/src/oce_ale_vel_rhs.F90 b/src/oce_ale_vel_rhs.F90 index 2cf0ab2b0..2f19e7959 100644 --- a/src/oce_ale_vel_rhs.F90 +++ b/src/oce_ale_vel_rhs.F90 @@ -423,7 +423,11 @@ subroutine momentum_adv_scalar(dynamics, partit, mesh) !___________________________________________________________________________ ! 2nd. compute horizontal advection component: u*du/dx, u*dv/dx & v*du/dy, v*dv/dy ! loop over triangle edges +#if defined(__openmp_reproducible) +!$OMP DO ORDERED +#else !$OMP DO +#endif do ed=1, myDim_edge2D nod = edges(:,ed) el1 = edge_tri(1,ed) @@ -454,6 +458,9 @@ subroutine momentum_adv_scalar(dynamics, partit, mesh) ! compute horizontal normal velocity with respect to the edge from triangle ! centroid towards triangel edge mid-pointe for element el2 when it is valid ! --> if its a boundary triangle el2 will be not valid +#if defined(__openmp_reproducible) +!$OMP ORDERED +#endif if (el2>0) then ! --> el2 is valid element nl2 = nlevels(el2)-1 ul2 = ulevels(el2) @@ -469,10 +476,6 @@ subroutine momentum_adv_scalar(dynamics, partit, mesh) un1(1:ul1-1) = 0._WP un2(1:ul2-1) = 0._WP -#if defined(__openmp_reproducible) -!$OMP ORDERED -#endif - ! first edge node ! Do not calculate on Halo nodes, as the result will not be used. ! The "if" is cheaper than the avoided computiations. diff --git a/src/oce_dyn.F90 b/src/oce_dyn.F90 index a265851bf..d995bc087 100755 --- a/src/oce_dyn.F90 +++ b/src/oce_dyn.F90 @@ -323,7 +323,11 @@ SUBROUTINE visc_filt_bcksct(dynamics, partit, mesh) !___________________________________________________________________________ !$OMP PARALLEL DEFAULT(SHARED) PRIVATE(u1, v1, len, vi, nz, ed, el, nelem, k, elem, nzmin, nzmax, update_u, update_v) +#if defined(__openmp_reproducible) +!$OMP DO ORDERED +#else !$OMP DO +#endif DO ed=1, myDim_edge2D+eDim_edge2D if(myList_edge2D(ed)>edge2D_in) cycle el=edge_tri(:,ed) @@ -473,7 +477,11 @@ SUBROUTINE visc_filt_bilapl(dynamics, partit, mesh) !___________________________________________________________________________ ! Sum up velocity differences over edge with respect to elemtnal index !$OMP PARALLEL DEFAULT(SHARED) PRIVATE(u1, v1, len, vi, ed, el, nz, nzmin, nzmax) +#if defined(__openmp_reproducible) +!$OMP DO ORDERED +#else !$OMP DO +#endif DO ed=1, myDim_edge2D+eDim_edge2D if(myList_edge2D(ed)>edge2D_in) cycle el = edge_tri(:,ed) @@ -533,7 +541,11 @@ SUBROUTINE visc_filt_bilapl(dynamics, partit, mesh) !$OMP BARRIER !___________________________________________________________________________ +#if defined(__openmp_reproducible) +!$OMP DO ORDERED +#else !$OMP DO +#endif DO ed=1, myDim_edge2D+eDim_edge2D if(myList_edge2D(ed)>edge2D_in) cycle el=edge_tri(:,ed) @@ -627,7 +639,11 @@ SUBROUTINE visc_filt_bidiff(dynamics, partit, mesh) !___________________________________________________________________________ !$OMP PARALLEL DEFAULT(SHARED) PRIVATE(u1, v1, len, vi, ed, el, nz, nzmin, nzmax, update_u, update_v) +#if defined(__openmp_reproducible) +!$OMP DO ORDERED +#else !$OMP DO +#endif DO ed=1, myDim_edge2D+eDim_edge2D if(myList_edge2D(ed)>edge2D_in) cycle el=edge_tri(:,ed) @@ -675,7 +691,11 @@ SUBROUTINE visc_filt_bidiff(dynamics, partit, mesh) !$OMP BARRIER !___________________________________________________________________________ +#if defined(__openmp_reproducible) +!$OMP DO ORDERED +#else !$OMP DO +#endif DO ed=1, myDim_edge2D+eDim_edge2D if(myList_edge2D(ed)>edge2D_in) cycle el=edge_tri(:,ed) diff --git a/src/oce_mesh.F90 b/src/oce_mesh.F90 index c61ae3374..ee21d83af 100755 --- a/src/oce_mesh.F90 +++ b/src/oce_mesh.F90 @@ -2510,6 +2510,7 @@ SUBROUTINE mesh_auxiliary_arrays(partit, mesh) center_x(n)=ax center_y(n)=ay mesh%elem_cos(n)=cos(ay) + if (metric_factor_zero) then mesh%metric_factor(n)=0.0_WP else diff --git a/src/oce_muscl_adv.F90 b/src/oce_muscl_adv.F90 index cb3c0638a..67075689a 100755 --- a/src/oce_muscl_adv.F90 +++ b/src/oce_muscl_adv.F90 @@ -58,6 +58,7 @@ subroutine muscl_adv_init(twork, partit, mesh) !___________________________________________________________________________ nn_size=0 k=0 +#if !defined(__openmp_reproducible) !$OMP PARALLEL DEFAULT(SHARED) PRIVATE(n) !$OMP DO REDUCTION(max: k) do n=1, myDim_nod2D @@ -73,6 +74,10 @@ subroutine muscl_adv_init(twork, partit, mesh) end do !$OMP END DO !$OMP END PARALLEL +#else + ! Reproducible sequential maxval + k = maxval(SSH_stiff%rowptr(2:myDim_nod2D+1) - SSH_stiff%rowptr(1:myDim_nod2D)) +#endif nn_size=k !___________________________________________________________________________ allocate(mesh%nn_num(myDim_nod2D), mesh%nn_pos(nn_size,myDim_nod2D)) @@ -92,7 +97,11 @@ subroutine muscl_adv_init(twork, partit, mesh) allocate(twork%nboundary_lay(myDim_nod2D+eDim_nod2D)) !node n becomes a boundary node after layer twork%nboundary_lay(n) twork%nboundary_lay=nl-1 !$OMP PARALLEL DEFAULT(SHARED) PRIVATE(n, k, n1, n2) +#if defined(__openmp_reproducible) +!$OMP DO ORDERED +#else !$OMP DO +#endif do n=1, myDim_edge2D ! n1 and n2 are local indices n1=edges(1,n) diff --git a/src/solver.F90 b/src/solver.F90 index 2a1a95af1..3554513f9 100644 --- a/src/solver.F90 +++ b/src/solver.F90 @@ -252,7 +252,7 @@ subroutine ssh_solve_cg(x, rhs, solverinfo, partit, mesh) !$OMP END PARALLEL DO #else sprod(1) = sum(rr(1:myDim_nod2D) * zz(1:myDim_nod2D)) - sprod(1) = sum(rr(1:myDim_nod2D) * rr(1:myDim_nod2D)) + sprod(2) = sum(rr(1:myDim_nod2D) * rr(1:myDim_nod2D)) #endif call MPI_Allreduce(MPI_IN_PLACE, sprod, 2, MPI_DOUBLE, MPI_SUM, partit%MPI_COMM_FESOM, MPIerr)