diff --git a/radiation/CMakeLists.txt b/radiation/CMakeLists.txt index db5ae2fd..b032f0f2 100644 --- a/radiation/CMakeLists.txt +++ b/radiation/CMakeLists.txt @@ -49,6 +49,8 @@ set( radiation_SOURCES radiation_liquid_optics_socrates.F90 radiation_lw_derivatives.F90 radiation_matrix.F90 + radiation_mcica_omp_lw.F90 + radiation_mcica_omp_sw.F90 radiation_mcica_acc_lw.F90 radiation_mcica_acc_sw.F90 radiation_mcica_lw.F90 diff --git a/radiation/radiation_adding_ica_lw.F90 b/radiation/radiation_adding_ica_lw.F90 index 4fdced20..990d5b69 100644 --- a/radiation/radiation_adding_ica_lw.F90 +++ b/radiation/radiation_adding_ica_lw.F90 @@ -21,6 +21,8 @@ module radiation_adding_ica_lw public + !$omp declare target(fast_adding_ica_lw_omp) + !$omp declare target(calc_fluxes_no_scattering_lw_omp) contains !--------------------------------------------------------------------- @@ -281,6 +283,125 @@ subroutine fast_adding_ica_lw(ncol, nlev, & end subroutine fast_adding_ica_lw + !--------------------------------------------------------------------- + ! Use the scalar "adding" method to compute longwave flux profiles, + ! including scattering in cloudy layers only. + subroutine fast_adding_ica_lw_omp(jg, ng, nlev, & + & reflectance, transmittance, source_up, source_dn, emission_surf, albedo_surf, & + & is_clear_sky_layer, i_cloud_top, flux_dn_clear, & + & flux_up, flux_dn, albedo, inv_denominator, source) + + use parkind1, only : jprb + implicit none + + ! Inputs + integer, intent(in) :: jg ! spectral index + integer, intent(in) :: ng ! number of spectral bands + integer, intent(in) :: nlev ! number of levels + + ! Surface emission (W m-2) and albedo + real(jprb), intent(in), dimension(ng) :: emission_surf, albedo_surf + + ! Diffuse reflectance and transmittance of each layer + real(jprb), intent(in), dimension(ng, nlev) :: reflectance, transmittance + + ! Emission from each layer in an upward and downward direction + real(jprb), intent(in), dimension(ng, nlev) :: source_up, source_dn + + ! Determine which layers are cloud-free + logical, intent(in) :: is_clear_sky_layer(nlev) + + ! Index to highest cloudy layer + integer, intent(in) :: i_cloud_top + + ! Pre-computed clear-sky downwelling fluxes (W m-2) at half-levels + real(jprb), intent(in), dimension(ng, nlev+1) :: flux_dn_clear + + ! Resulting fluxes (W m-2) at half-levels: diffuse upwelling and + ! downwelling + real(jprb), intent(out), dimension(ng, nlev+1) :: flux_up, flux_dn + real(jprb), intent(out), dimension(ng, nlev+1) :: albedo, inv_denominator + + ! Upwelling radiation at each half-level due to emission below + ! that half-level (W m-2) + real(jprb), intent(out), dimension(ng, nlev+1) :: source + + ! Loop index for model level and column + integer :: jlev + + ! + ! Ideally, we would use the associate statement below, however this doesn't + ! work with NVCOMPILER. Thus, we pass albedo and inv_denominator in from the + ! calling code. However, we pass it in as flux up and flux_dn. That is, the + ! flux_up and flux_dn are temporarilly reusable. + ! + !associate(albedo=>flux_up, inv_denominator=>flux_dn) + + albedo(jg,nlev+1) = albedo_surf(jg) + + ! At the surface, the source is thermal emission + source(jg,nlev+1) = emission_surf(jg) + + ! Work back up through the atmosphere and compute the albedo of + ! the entire earth/atmosphere system below that half-level, and + ! also the "source", which is the upwelling flux due to emission + ! below that level + do jlev = nlev,i_cloud_top,-1 + if (is_clear_sky_layer(jlev)) then + ! Reflectance of this layer is zero, simplifying the expression + albedo(jg,jlev) = transmittance(jg,jlev)*transmittance(jg,jlev)*albedo(jg,jlev+1) + source(jg,jlev) = source_up(jg,jlev) & + & + transmittance(jg,jlev) * (source(jg,jlev+1) & + & + albedo(jg,jlev+1)*source_dn(jg,jlev)) + else + ! Loop over columns; explicit loop seems to be faster + ! Lacis and Hansen (1974) Eq 33, Shonk & Hogan (2008) Eq 10: + inv_denominator(jg,jlev+1) = 1.0_jprb & + & / (1.0_jprb-albedo(jg,jlev+1)*reflectance(jg,jlev)) + ! Shonk & Hogan (2008) Eq 9, Petty (2006) Eq 13.81: + albedo(jg,jlev) = reflectance(jg,jlev) + transmittance(jg,jlev)*transmittance(jg,jlev) & + & * albedo(jg,jlev+1) * inv_denominator(jg,jlev+1) + ! Shonk & Hogan (2008) Eq 11: + source(jg,jlev) = source_up(jg,jlev) & + & + transmittance(jg,jlev) * (source(jg,jlev+1) & + & + albedo(jg,jlev+1)*source_dn(jg,jlev)) & + & * inv_denominator(jg,jlev+1) + end if + end do + + ! Copy over downwelling fluxes above cloud from clear sky + do jlev = 1,i_cloud_top + flux_dn(jg,jlev) = flux_dn_clear(jg,jlev) + enddo + + ! Compute the fluxes above the highest cloud + flux_up(jg,i_cloud_top) = source(jg,i_cloud_top) & + & + albedo(jg,i_cloud_top)*flux_dn(jg,i_cloud_top) + do jlev = i_cloud_top-1,1,-1 + flux_up(jg,jlev) = transmittance(jg,jlev)*flux_up(jg,jlev+1) + source_up(jg,jlev) + end do + + ! Work back down through the atmosphere from cloud top computing + ! the fluxes at each half-level + do jlev = i_cloud_top,nlev + if (is_clear_sky_layer(jlev)) then + flux_dn(jg,jlev+1) = transmittance(jg,jlev)*flux_dn(jg,jlev) & + & + source_dn(jg,jlev) + flux_up(jg,jlev+1) = albedo(jg,jlev+1)*flux_dn(jg,jlev+1) & + & + source(jg,jlev+1) + else + ! Shonk & Hogan (2008) Eq 14 (after simplification): + flux_dn(jg,jlev+1) & + & = (transmittance(jg,jlev)*flux_dn(jg,jlev) & + & + reflectance(jg,jlev)*source(jg,jlev+1) & + & + source_dn(jg,jlev)) * inv_denominator(jg,jlev+1) + ! Shonk & Hogan (2008) Eq 12: + flux_up(jg,jlev+1) = albedo(jg,jlev+1)*flux_dn(jg,jlev+1) & + & + source(jg,jlev+1) + end if + end do + + end subroutine fast_adding_ica_lw_omp !--------------------------------------------------------------------- ! If there is no scattering then fluxes may be computed simply by @@ -370,4 +491,61 @@ subroutine calc_fluxes_no_scattering_lw(ncol, nlev, & end subroutine calc_fluxes_no_scattering_lw + + !--------------------------------------------------------------------- + ! If there is no scattering then fluxes may be computed simply by + ! passing down through the atmosphere computing the downwelling + ! fluxes from the transmission and emission of each layer, and then + ! passing back up through the atmosphere to compute the upwelling + ! fluxes in the same way. + subroutine calc_fluxes_no_scattering_lw_omp(jg, ng, nlev, & + & transmittance, source_up, source_dn, emission_surf, albedo_surf, flux_up, flux_dn) + + use parkind1, only : jprb + implicit none + + ! Inputs + integer, intent(in) :: jg, ng + integer, intent(in) :: nlev ! number of levels + + ! Surface emission (W m-2) and albedo + real(jprb), intent(in), dimension(ng) :: emission_surf, albedo_surf + + ! Diffuse reflectance and transmittance of each layer + real(jprb), intent(in), dimension(ng, nlev) :: transmittance + + ! Emission from each layer in an upward and downward direction + real(jprb), intent(in), dimension(ng, nlev) :: source_up, source_dn + + ! Resulting fluxes (W m-2) at half-levels: diffuse upwelling and + ! downwelling + real(jprb), intent(out), dimension(ng, nlev+1) :: flux_up, flux_dn + + ! Loop index for model level + integer :: jlev, jcol + + ! At top-of-atmosphere there is no diffuse downwelling radiation + flux_dn(jg,1) = 0.0_jprb + + ! Work down through the atmosphere computing the downward fluxes + ! at each half-level +! Added for DWD (2020) +!NEC$ outerloop_unroll(8) + do jlev = 1,nlev + flux_dn(jg,jlev+1) = transmittance(jg,jlev)*flux_dn(jg,jlev) + source_dn(jg,jlev) + end do + + ! Surface reflection and emission + flux_up(jg,nlev+1) = emission_surf(jg) + albedo_surf(jg) * flux_dn(jg,nlev+1) + + ! Work back up through the atmosphere computing the upward fluxes + ! at each half-level +! Added for DWD (2020) +!NEC$ outerloop_unroll(8) + do jlev = nlev,1,-1 + flux_up(jg,jlev) = transmittance(jg,jlev)*flux_up(jg,jlev+1) + source_up(jg,jlev) + end do + + end subroutine calc_fluxes_no_scattering_lw_omp + end module radiation_adding_ica_lw diff --git a/radiation/radiation_adding_ica_sw.F90 b/radiation/radiation_adding_ica_sw.F90 index 90db91ad..67eb8d11 100644 --- a/radiation/radiation_adding_ica_sw.F90 +++ b/radiation/radiation_adding_ica_sw.F90 @@ -19,6 +19,7 @@ module radiation_adding_ica_sw public + !$omp declare target(adding_ica_sw_omp) contains subroutine adding_ica_sw(ncol, nlev, incoming_toa, & @@ -162,4 +163,128 @@ subroutine adding_ica_sw(ncol, nlev, incoming_toa, & end subroutine adding_ica_sw + subroutine adding_ica_sw_omp(jg, ng, nlev, incoming_toa, & + & albedo_surf_diffuse, albedo_surf_direct, cos_sza, & + & reflectance, transmittance, ref_dir, trans_dir_diff, trans_dir_dir, & + & flux_up, flux_dn_diffuse, flux_dn_direct, albedo, inv_denominator,& + & source) + + use parkind1, only : jprb + implicit none + + ! Inputs + integer, intent(in) :: jg, ng ! number of columns (may be spectral intervals) + integer, intent(in) :: nlev ! number of levels + + ! Incoming downwelling solar radiation at top-of-atmosphere (W m-2) + real(jprb), intent(in), dimension(ng) :: incoming_toa + + ! Surface albedo to diffuse and direct radiation + real(jprb), intent(in), dimension(ng) :: albedo_surf_diffuse, & + & albedo_surf_direct + + ! Cosine of the solar zenith angle + real(jprb), intent(in) :: cos_sza + + ! Diffuse reflectance and transmittance of each layer + real(jprb), intent(in), dimension(ng, nlev) :: reflectance, transmittance + + ! Fraction of direct-beam solar radiation entering the top of a + ! layer that is reflected back up or scattered forward into the + ! diffuse stream at the base of the layer + real(jprb), intent(in), dimension(ng, nlev) :: ref_dir, trans_dir_diff + + ! Direct transmittance, i.e. fraction of direct beam that + ! penetrates a layer without being scattered or absorbed + real(jprb), intent(in), dimension(ng, nlev) :: trans_dir_dir + + ! Resulting fluxes (W m-2) at half-levels: diffuse upwelling, + ! diffuse downwelling and direct downwelling + real(jprb), intent(out), dimension(ng, nlev+1) :: flux_up, flux_dn_diffuse, & + & flux_dn_direct + + real(jprb), intent(out), dimension(ng, nlev+1) :: albedo, inv_denominator + + ! Upwelling radiation at each half-level due to scattering of the + ! direct beam below that half-level (W m-2) + real(jprb), intent(out), dimension(ng, nlev+1) :: source + + ! Loop index for model level and column + integer :: jlev + + ! Compute profile of direct (unscattered) solar fluxes at each + ! half-level by working down through the atmosphere + flux_dn_direct(jg,1) = incoming_toa(jg) + do jlev = 1,nlev + flux_dn_direct(jg,jlev+1) = flux_dn_direct(jg,jlev)*trans_dir_dir(jg,jlev) + end do + + ! + ! Ideally, we would use the associate statement below, however this doesn't + ! work with NVCOMPILER. Thus, we pass albedo and inv_denominator in from the + ! calling code however, we pass it in as flux up and flux_dn_diffuse. That is, + ! the flux_up and flux_dn_diffuse are temporarilly reusable. + ! + !associate(albedo=>flux_up, inv_denominator=>flux_dn_diffuse) + + albedo(jg,nlev+1) = albedo_surf_diffuse(jg) + + ! At the surface, the direct solar beam is reflected back into the + ! diffuse stream + source(jg,nlev+1) = albedo_surf_direct(jg) * flux_dn_direct(jg,nlev+1) * cos_sza + + ! Work back up through the atmosphere and compute the albedo of + ! the entire earth/atmosphere system below that half-level, and + ! also the "source", which is the upwelling flux due to direct + ! radiation that is scattered below that level +! Added for DWD (2020) +!NEC$ outerloop_unroll(8) + do jlev = nlev,1,-1 + ! Next loop over columns. We could do this by indexing the + ! entire inner dimension as follows, e.g. for the first line: + ! inv_denominator(jg,jlev) = 1.0_jprb / (1.0_jprb-albedo(jg,jlev+1)*reflectance(jg,jlev)) + ! and similarly for subsequent lines, but this slows down the + ! routine by a factor of 2! Rather, we do it with an explicit + ! loop. + + ! Lacis and Hansen (1974) Eq 33, Shonk & Hogan (2008) Eq 10: + inv_denominator(jg,jlev+1) = 1.0_jprb / (1.0_jprb-albedo(jg,jlev+1)*reflectance(jg,jlev)) + ! Shonk & Hogan (2008) Eq 9, Petty (2006) Eq 13.81: + albedo(jg,jlev) = reflectance(jg,jlev) + transmittance(jg,jlev) * transmittance(jg,jlev) & + & * albedo(jg,jlev+1) * inv_denominator(jg,jlev+1) + ! Shonk & Hogan (2008) Eq 11: + source(jg,jlev) = ref_dir(jg,jlev)*flux_dn_direct(jg,jlev) & + & + transmittance(jg,jlev)*(source(jg,jlev+1) & + & + albedo(jg,jlev+1)*trans_dir_diff(jg,jlev)*flux_dn_direct(jg,jlev)) & + & * inv_denominator(jg,jlev+1) + end do + + ! At top-of-atmosphere there is no diffuse downwelling radiation + flux_dn_diffuse(jg,1) = 0.0_jprb + + ! At top-of-atmosphere, all upwelling radiation is due to + ! scattering by the direct beam below that level + flux_up(jg,1) = source(jg,1) + + ! Work back down through the atmosphere computing the fluxes at + ! each half-level +! Added for DWD (2020) +!NEC$ outerloop_unroll(8) + do jlev = 1,nlev + ! Shonk & Hogan (2008) Eq 14 (after simplification): + flux_dn_diffuse(jg,jlev+1) & + & = (transmittance(jg,jlev)*flux_dn_diffuse(jg,jlev) & + & + reflectance(jg,jlev)*source(jg,jlev+1) & + & + trans_dir_diff(jg,jlev)*flux_dn_direct(jg,jlev)) * inv_denominator(jg,jlev+1) + ! Shonk & Hogan (2008) Eq 12: + flux_up(jg,jlev+1) = albedo(jg,jlev+1)*flux_dn_diffuse(jg,jlev+1) & + & + source(jg,jlev+1) + flux_dn_direct(jg,jlev) = flux_dn_direct(jg,jlev)*cos_sza + end do + + flux_dn_direct(jg,nlev+1) = flux_dn_direct(jg,nlev+1)*cos_sza + !end associate + + end subroutine adding_ica_sw_omp + end module radiation_adding_ica_sw diff --git a/radiation/radiation_cloud_cover.F90 b/radiation/radiation_cloud_cover.F90 index c0a81b81..af0aa55e 100644 --- a/radiation/radiation_cloud_cover.F90 +++ b/radiation/radiation_cloud_cover.F90 @@ -42,6 +42,7 @@ module radiation_cloud_cover ! Maximum cloud fraction distinguishable from 1 real(jprb), parameter :: MaxCloudFrac = 1.0_jprb-epsilon(1.0_jprb)*10.0_jprb + !$omp declare target(beta2alpha) contains diff --git a/radiation/radiation_cloud_generator_acc.F90 b/radiation/radiation_cloud_generator_acc.F90 index 39cf8cb7..9b54e19b 100644 --- a/radiation/radiation_cloud_generator_acc.F90 +++ b/radiation/radiation_cloud_generator_acc.F90 @@ -23,6 +23,10 @@ module radiation_cloud_generator_acc + implicit none + + !$omp declare target(cloud_generator_omp) + public contains @@ -261,4 +265,228 @@ subroutine cloud_generator_acc(ng, nlev, & end subroutine cloud_generator_acc + + !--------------------------------------------------------------------- + ! Generate scaling factors for the cloud optical depth to represent + ! cloud overlap, the overlap of internal cloud inhomogeneities and + ! the fractional standard deviation of these inhomogeneities, for + ! use in a Monte Carlo Independent Column Approximation radiation + ! scheme. All returned profiles contain cloud, and the total cloud + ! cover is also returned, so the calling function can then do a + ! weighted average of clear and cloudy skies; this is a way to + ! reduce the Monte Carlo noise in profiles with low cloud cover. + subroutine cloud_generator_omp(jg, ng, nlev, & + & iseed, frac_threshold, & + & frac, overlap_param, decorrelation_scaling, & + & fractional_std, & + & sample_ncdf, sample_nfsd, sample_fsd1, & + & sample_inv_fsd_interval, sample_val, & + & od_scaling, total_cloud_cover, & + & ibegin, iend, & + & cum_cloud_cover, & + & pair_cloud_cover) + + use parkind1, only : jprb, jpib + use radiation_random_numbers, only : IMinstdA0, IMinstdA, IMinstdM, uniform_distribution_omp + implicit none + + ! Inputs + integer, intent(in) :: jg ! index into the g direction + integer, intent(in) :: ng ! number of g points + integer, intent(in) :: nlev ! number of model levels + integer, intent(in) :: iseed ! seed for random number generator + + ! Only cloud fractions above this threshold are considered to be + ! clouds + real(jprb), intent(in) :: frac_threshold + + ! Cloud fraction on full levels + real(jprb), intent(in) :: frac(nlev) + + ! Cloud overlap parameter for interfaces between model layers, + ! where 0 indicates random overlap and 1 indicates maximum-random + ! overlap + real(jprb), intent(in) :: overlap_param(nlev-1) + + ! Overlap parameter for internal inhomogeneities + real(jprb), intent(in) :: decorrelation_scaling + + ! Fractional standard deviation at each layer + real(jprb), intent(in) :: fractional_std(nlev) + + ! Object for sampling from a lognormal or gamma distribution + integer, intent(in) :: sample_ncdf, sample_nfsd + real(jprb), intent(in) :: sample_fsd1, sample_inv_fsd_interval + real(jprb), intent(in), dimension(:,:) :: sample_val + + ! Outputs + + ! Cloud optical depth scaling factor, with 0 indicating clear sky + real(jprb), intent(out) :: od_scaling(ng,nlev) + + ! Total cloud cover using cloud fraction and overlap parameter + real(jprb), intent(in) :: total_cloud_cover + + ! Local variables + + ! Cumulative cloud cover from TOA to the base of each layer + real(jprb), intent(in) :: cum_cloud_cover(nlev) + + ! First and last cloudy layers + integer, intent(in) :: ibegin, iend + + ! Scaled random number for finding cloud + real(jprb) :: trigger + + ! Uniform deviates between 0 and 1 + real(jprb) :: rand_top + + ! Overlap parameter of inhomogeneities + real(jprb) :: overlap_param_inhom + + real(jprb) :: rand_cloud, rand_inhom1, rand_inhom2 + + integer :: itrigger + + ! Loop index for model level and g-point + integer :: jlev + + ! Cloud cover of a pair of layers, and amount by which cloud at + ! next level increases total cloud cover as seen from above + real(jprb), intent(inout), dimension(nlev-1) :: pair_cloud_cover + real(jprb) :: overhang + + integer :: jcloud + ! Number of contiguous cloudy layers for which to compute optical + ! depth scaling + integer :: n_layers_to_scale + + ! Is it time to fill the od_scaling variable? + logical :: do_fill_od_scaling + + ! variables from manual inlining of sample_vec_from_pdf + ! Index to look-up table + integer :: ifsd, icdf + ! Weights in bilinear interpolation + real(jprb) :: wfsd, wcdf + + ! local variable for the acc rng + integer(kind=jpib) :: istate + + if (total_cloud_cover >= frac_threshold) then + ! Loop over ng columns (this loop should be paralized as soon as acc RNG is available) + do jlev = 1,nlev + od_scaling(jg,jlev) = 0.0_jprb + end do + + ! begin manuel inline of istate = initialize_acc(iseed, jg) + istate = REAL(ABS(iseed),jprb) + ! Use a modified (and vectorized) C++ minstd_rand0 algorithm to populate the state + istate = nint(mod( istate*jg*(1._jprb-0.05_jprb*jg+0.005_jprb*jg**2)*IMinstdA0, IMinstdM)) + + ! One warmup of the C++ minstd_rand algorithm + istate = mod(IMinstdA * istate, IMinstdM) + ! end manuel inline of istate = initialize_acc(iseed, jg) + call uniform_distribution_omp(istate, rand_top) + + ! Find the cloud top height corresponding to the current + ! random number, and store in itrigger + trigger = rand_top * total_cloud_cover + itrigger = iend + !$ACC LOOP SEQ + do jlev = ibegin,iend + if (trigger <= cum_cloud_cover(jlev)) then + itrigger = min(jlev, itrigger) + end if + end do + ! manual inline: call generate_column_exp_ran >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> + ! So far our vertically contiguous cloud contains only one layer + n_layers_to_scale = 1 + + ! Locate the clouds below this layer: first generate some more + ! random numbers + ! Loop from the layer below the local cloud top down to the + ! bottom-most cloudy layer + do jlev = itrigger+1,iend+1 + do_fill_od_scaling = .false. + if (jlev <= iend) then + call uniform_distribution_omp(istate, rand_cloud) + if (n_layers_to_scale > 0) then + ! There is a cloud above, in which case the probability + ! of cloud in the layer below is as follows + if (rand_cloud*frac(jlev-1) & + & < frac(jlev) + frac(jlev-1) - pair_cloud_cover(jlev-1)) then + ! Add another cloudy layer + n_layers_to_scale = n_layers_to_scale + 1 + else + ! Reached the end of a contiguous set of cloudy layers and + ! will compute the optical depth scaling immediately. + do_fill_od_scaling = .true. + end if + else + overhang = cum_cloud_cover(jlev)-cum_cloud_cover(jlev-1) + ! There is clear-sky above, in which case the + ! probability of cloud in the layer below is as follows + if (rand_cloud*(cum_cloud_cover(jlev-1) - frac(jlev-1)) & + & < pair_cloud_cover(jlev-1) - overhang - frac(jlev-1)) then + ! A new cloud top + n_layers_to_scale = 1 + end if + end if + else + ! We are at the bottom of the cloudy layers in the model, + ! so in a moment need to populate the od_scaling array + do_fill_od_scaling = .true. + end if ! (jlev <= iend) + + if (do_fill_od_scaling) then + call uniform_distribution_omp(istate, rand_inhom1) + ! Loop through the sequence of cloudy layers + !$ACC LOOP SEQ PRIVATE(rand_inhom2, overlap_param_inhom, wcdf, icdf, & + !$ACC wfsd, ifsd) + do jcloud = max(2,jlev-n_layers_to_scale),jlev-1 + + call uniform_distribution_omp(istate, rand_inhom2) + + ! Set overlap parameter of inhomogeneities + overlap_param_inhom = overlap_param(jcloud-1) + if ( ibegin<=jcloud-1 .and. jcloud-1< iend .and. & + & overlap_param(jcloud-1) > 0.0_jprb) then + overlap_param_inhom = & + & overlap_param(jcloud-1)**(1.0_jprb/decorrelation_scaling) + end if + + ! Use second random number, and inhomogeneity overlap + ! parameter, to decide whether the first random number + ! should be repeated (corresponding to maximum overlap) + ! or not (corresponding to random overlap) + if ( jcloud > jlev-n_layers_to_scale .and. & + & rand_inhom2 >= overlap_param_inhom) then + call uniform_distribution_omp(istate, rand_inhom1) + end if + ! manual inline >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> + ! Bilinear interpolation with bounds + wcdf = rand_inhom1 * (sample_ncdf-1) + 1.0_jprb + icdf = max(1, min(int(wcdf), sample_ncdf-1)) + wcdf = max(0.0_jprb, min(wcdf - icdf, 1.0_jprb)) + + wfsd = (fractional_std(jcloud)-sample_fsd1) * sample_inv_fsd_interval + 1.0_jprb + ifsd = max(1, min(int(wfsd), sample_nfsd-1)) + wfsd = max(0.0_jprb, min(wfsd - ifsd, 1.0_jprb)) + + od_scaling(jg,jcloud) = (1.0_jprb-wcdf)*(1.0_jprb-wfsd) * sample_val(icdf ,ifsd) & + & + (1.0_jprb-wcdf)* wfsd * sample_val(icdf ,ifsd+1) & + & + wcdf *(1.0_jprb-wfsd) * sample_val(icdf+1,ifsd) & + & + wcdf * wfsd * sample_val(icdf+1,ifsd+1) + ! manual inline <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< + end do + + n_layers_to_scale = 0 + + end if ! (do_fill_od_scaling) + end do ! jlev = itrigger+1,iend+1 + end if ! (total_cloud_cover < frac_threshold) + + end subroutine cloud_generator_omp + end module radiation_cloud_generator_acc diff --git a/radiation/radiation_interface.F90 b/radiation/radiation_interface.F90 index 97cbf5ec..694b6aff 100644 --- a/radiation/radiation_interface.F90 +++ b/radiation/radiation_interface.F90 @@ -225,6 +225,8 @@ subroutine radiation(ncol, nlev, istartcol, iendcol, config, & use radiation_mcica_lw, only : solver_mcica_lw use radiation_mcica_acc_sw, only : solver_mcica_acc_sw use radiation_mcica_acc_lw, only : solver_mcica_acc_lw + use radiation_mcica_omp_sw, only : solver_mcica_omp_sw + use radiation_mcica_omp_lw, only : solver_mcica_omp_lw use radiation_cloudless_sw, only : solver_cloudless_sw use radiation_cloudless_lw, only : solver_cloudless_lw use radiation_homogeneous_sw, only : solver_homogeneous_sw @@ -319,6 +321,10 @@ subroutine radiation(ncol, nlev, istartcol, iendcol, config, & !$ACC planck_hl, lw_emission, lw_albedo, sw_albedo_direct, & !$ACC sw_albedo_diffuse, incoming_sw) + !$OMP TARGET ENTER DATA MAP(ALLOC: od_lw, ssa_lw, g_lw, od_lw_cloud, ssa_lw_cloud, g_lw_cloud, & + !$OMP od_sw, ssa_sw, g_sw, od_sw_cloud, ssa_sw_cloud, g_sw_cloud, & + !$OMP planck_hl, lw_emission, lw_albedo, sw_albedo_direct, & + !$OMP sw_albedo_diffuse, incoming_sw) if (thermodynamics%pressure_hl(istartcol,2) & & < thermodynamics%pressure_hl(istartcol,1)) then ! Input arrays are arranged in order of decreasing pressure / @@ -407,6 +413,7 @@ subroutine radiation(ncol, nlev, istartcol, iendcol, config, & & od_lw, ssa_lw, g_lw, od_sw, ssa_sw, g_sw) end if else + !$OMP TARGET TEAMS DISTRIBUTE PARALLEL DO COLLAPSE(2) !$ACC PARALLEL DEFAULT(PRESENT) ASYNC(1) !$ACC LOOP GANG VECTOR COLLAPSE(2) do jcol = istartcol,iendcol @@ -425,6 +432,7 @@ subroutine radiation(ncol, nlev, istartcol, iendcol, config, & end do end do !$ACC END PARALLEL + !$OMP END TARGET TEAMS DISTRIBUTE PARALLEL DO end if ! For diagnostic purposes, save these intermediate variables to @@ -436,14 +444,17 @@ subroutine radiation(ncol, nlev, istartcol, iendcol, config, & write(rad_prop_file_name,'(a,a,i4.4,a,i4.4,a)') & & rad_prop_base_file_name, '_', istartcol, '-',iendcol,'.nc' end if -#ifdef _OPENACC +#if defined(_OPENACC) || defined(OMPGPU) + !$OMP TARGET UPDATE FROM(od_lw, ssa_lw, g_lw, od_lw_cloud, ssa_lw_cloud, g_lw_cloud, & + !$OMP planck_hl, lw_emission, lw_albedo, sw_albedo_direct, sw_albedo_diffuse, & + !$OMP incoming_sw, od_sw, ssa_sw, g_sw, od_sw_cloud, ssa_sw_cloud, g_sw_cloud) !$ACC UPDATE HOST(od_lw, ssa_lw, g_lw, od_lw_cloud, ssa_lw_cloud, g_lw_cloud, & !$ACC planck_hl, lw_emission, lw_albedo, sw_albedo_direct, sw_albedo_diffuse, & !$ACC incoming_sw, od_sw, ssa_sw, g_sw, od_sw_cloud, ssa_sw_cloud, g_sw_cloud) call cloud%update_host(cloud) call flux%update_host(flux) #endif - call save_radiative_properties(trim(rad_prop_file_name), & + call save_radiative_properties(trim(rad_prop_file_name), & & nlev, istartcol, iendcol, & & config, single_level, thermodynamics, cloud, & & planck_hl, lw_emission, lw_albedo, & @@ -460,7 +471,9 @@ subroutine radiation(ncol, nlev, istartcol, iendcol, config, & !$ACC WAIT if (config%i_solver_lw == ISolverMcICA) then -#ifdef _OPENACC +#if defined(_OPENACC) || defined(OMPGPU) + !$OMP TARGET UPDATE FROM(od_lw, ssa_lw, g_lw, od_lw_cloud, ssa_lw_cloud, g_lw_cloud, planck_hl, & + !$OMP& lw_emission, lw_albedo) !$ACC UPDATE HOST(od_lw, ssa_lw, g_lw, od_lw_cloud, ssa_lw_cloud, g_lw_cloud, planck_hl, lw_emission, lw_albedo) !$ACC WAIT(1) call cloud%update_host(cloud) @@ -472,17 +485,24 @@ subroutine radiation(ncol, nlev, istartcol, iendcol, config, & & config, single_level, cloud, & & od_lw, ssa_lw, g_lw, od_lw_cloud, ssa_lw_cloud, & & g_lw_cloud, planck_hl, lw_emission, lw_albedo, flux) -#ifdef _OPENACC +#if defined(_OPENACC) || defined(OMPGPU) !$ACC WAIT(1) call flux%update_device(flux) !$ACC WAIT(1) #endif else if (config%i_solver_lw == ISolverMcICAACC) then ! Compute fluxes using the McICA ACC longwave solver +#if defined(OMPGPU) + call solver_mcica_omp_lw(nlev,istartcol,iendcol, & + & config, single_level, cloud, & + & od_lw, ssa_lw, g_lw, od_lw_cloud, ssa_lw_cloud, & + & g_lw_cloud, planck_hl, lw_emission, lw_albedo, flux) +#else call solver_mcica_acc_lw(nlev,istartcol,iendcol, & & config, single_level, cloud, & & od_lw, ssa_lw, g_lw, od_lw_cloud, ssa_lw_cloud, & & g_lw_cloud, planck_hl, lw_emission, lw_albedo, flux) +#endif else if (config%i_solver_lw == ISolverSPARTACUS) then ! Compute fluxes using the SPARTACUS longwave solver call solver_spartacus_lw(nlev,istartcol,iendcol, & @@ -516,7 +536,9 @@ subroutine radiation(ncol, nlev, istartcol, iendcol, config, & !$ACC WAIT if (config%i_solver_sw == ISolverMcICA) then -#ifdef _OPENACC +#if defined(_OPENACC) || defined(OMPGPU) + !$OMP TARGET UPDATE FROM(od_sw, ssa_sw, g_sw, od_sw_cloud, ssa_sw_cloud, g_sw_cloud, & + !$OMP& sw_albedo_direct, sw_albedo_diffuse, incoming_sw) !$ACC UPDATE HOST(od_sw, ssa_sw, g_sw, od_sw_cloud, ssa_sw_cloud, g_sw_cloud, sw_albedo_direct, sw_albedo_diffuse, incoming_sw) !$ACC WAIT(1) call cloud%update_host(cloud) @@ -529,18 +551,26 @@ subroutine radiation(ncol, nlev, istartcol, iendcol, config, & & od_sw, ssa_sw, g_sw, od_sw_cloud, ssa_sw_cloud, & & g_sw_cloud, sw_albedo_direct, sw_albedo_diffuse, & & incoming_sw, flux) -#ifdef _OPENACC +#if defined(_OPENACC) || defined(OMPGPU) !$ACC WAIT(1) call flux%update_device(flux) !$ACC WAIT(1) #endif else if (config%i_solver_sw == ISolverMcICAACC) then ! Compute fluxes using the McICA ACC shortwave solver +#if defined(OMPGPU) + call solver_mcica_omp_sw(nlev,istartcol,iendcol, & + & config, single_level, cloud, & + & od_sw, ssa_sw, g_sw, od_sw_cloud, ssa_sw_cloud, & + & g_sw_cloud, sw_albedo_direct, sw_albedo_diffuse, & + & incoming_sw, flux) +#else call solver_mcica_acc_sw(nlev,istartcol,iendcol, & & config, single_level, cloud, & & od_sw, ssa_sw, g_sw, od_sw_cloud, ssa_sw_cloud, & & g_sw_cloud, sw_albedo_direct, sw_albedo_diffuse, & & incoming_sw, flux) +#endif else if (config%i_solver_sw == ISolverSPARTACUS) then ! Compute fluxes using the SPARTACUS shortwave solver call solver_spartacus_sw(nlev,istartcol,iendcol, & @@ -581,6 +611,10 @@ subroutine radiation(ncol, nlev, istartcol, iendcol, config, & !$ACC WAIT !$ACC END DATA + !$OMP TARGET EXIT DATA MAP(DELETE:od_lw, ssa_lw, g_lw, od_lw_cloud, ssa_lw_cloud, g_lw_cloud, & + !$OMP od_sw, ssa_sw, g_sw, od_sw_cloud, ssa_sw_cloud, g_sw_cloud, & + !$OMP planck_hl, lw_emission, lw_albedo, sw_albedo_direct, & + !$OMP sw_albedo_diffuse, incoming_sw) if (lhook) call dr_hook('radiation_interface:radiation',1,hook_handle) end subroutine radiation diff --git a/radiation/radiation_lw_derivatives.F90 b/radiation/radiation_lw_derivatives.F90 index 1fd123fb..12b0e5f6 100644 --- a/radiation/radiation_lw_derivatives.F90 +++ b/radiation/radiation_lw_derivatives.F90 @@ -34,6 +34,11 @@ module radiation_lw_derivatives + implicit none + + !$omp declare target(calc_lw_derivatives_ica_omp) + !$omp declare target(modify_lw_derivatives_ica_omp) + public contains @@ -184,7 +189,109 @@ subroutine modify_lw_derivatives_ica(ng, nlev, icol, transmittance, & end subroutine modify_lw_derivatives_ica + !--------------------------------------------------------------------- + ! Calculation for the Independent Column Approximation + ! uses global memory to for writing out the results of the reduction + subroutine calc_lw_derivatives_ica_omp(ng, nlev, icol, transmittance, flux_up_surf, lw_derivatives, lw_derivatives_g) + + use parkind1, only : jprb + implicit none + + ! Inputs + integer, intent(in) :: ng ! number of spectral intervals + integer, intent(in) :: nlev ! number of levels + integer, intent(in) :: icol ! Index of column for output + real(jprb), intent(in) :: transmittance(ng,nlev) + real(jprb), intent(in) :: flux_up_surf(ng) ! Upwelling surface spectral flux (W m-2) + + ! Output + real(jprb), intent(out) :: lw_derivatives(:,:) ! dimensioned (ncol,nlev+1) + + ! Rate of change of spectral flux at a given height with respect + ! to the surface value + real(jprb), intent(inout) :: lw_derivatives_g(ng) + + real(jprb) :: sum_flux_up_surf, sum_lw_derivatives_g + + integer :: jlev, jg + + sum_flux_up_surf = 0.0_jprb + do jg = 1,ng + sum_flux_up_surf = sum_flux_up_surf + flux_up_surf(jg) + end do + ! Initialize the derivatives at the surface + do jg = 1,ng + lw_derivatives_g(jg) = flux_up_surf(jg) / sum_flux_up_surf + end do + lw_derivatives(icol, nlev+1) = 1.0_jprb + + ! Move up through the atmosphere computing the derivatives at each + ! half-level + do jlev = nlev,1,-1 + sum_lw_derivatives_g = 0.0_jprb + do jg = 1,ng + lw_derivatives_g(jg) = lw_derivatives_g(jg) * transmittance(jg,jlev) + sum_lw_derivatives_g = sum_lw_derivatives_g + lw_derivatives_g(jg) + end do + lw_derivatives(icol,jlev) = sum_lw_derivatives_g + end do + + end subroutine calc_lw_derivatives_ica_omp + + !--------------------------------------------------------------------- + ! Calculation for the Independent Column Approximation + ! uses global memory to for writing out the results of the reduction + subroutine modify_lw_derivatives_ica_omp(ng, nlev, icol, transmittance, flux_up_surf, & + & weight, lw_derivatives, lw_derivatives_g) + + use parkind1, only : jprb + + implicit none + + ! Inputs + integer, intent(in) :: ng ! number of spectral intervals + integer, intent(in) :: nlev ! number of levels + integer, intent(in) :: icol ! Index of column for output + real(jprb), intent(in) :: transmittance(ng,nlev) + real(jprb), intent(in) :: flux_up_surf(ng) ! Upwelling surface spectral flux (W m-2) + real(jprb), intent(in) :: weight ! Weight new values against existing + + ! Output + real(jprb), intent(inout) :: lw_derivatives(:,:) ! dimensioned (ncol,nlev+1) + + ! Rate of change of spectral flux at a given height with respect + ! to the surface value + real(jprb), intent(inout) :: lw_derivatives_g(ng) + + real(jprb) :: sum_flux_up_surf, sum_lw_derivatives_g + + integer :: jlev, jg + + sum_flux_up_surf = 0.0_jprb + do jg = 1,ng + sum_flux_up_surf = sum_flux_up_surf + flux_up_surf(jg) + end do + ! Initialize the derivatives at the surface + do jg = 1,ng + lw_derivatives_g(jg) = flux_up_surf(jg) / sum_flux_up_surf + end do + + ! This value must be 1 so no weighting applied + lw_derivatives(icol, nlev+1) = 1.0_jprb + + ! Move up through the atmosphere computing the derivatives at each + ! half-level + do jlev = nlev,1,-1 + sum_lw_derivatives_g = 0.0_jprb + do jg = 1,ng + lw_derivatives_g(jg) = lw_derivatives_g(jg) * transmittance(jg,jlev) + sum_lw_derivatives_g = sum_lw_derivatives_g + lw_derivatives_g(jg) + end do + lw_derivatives(icol,jlev) = (1.0_jprb - weight) * lw_derivatives(icol,jlev) & + & + weight * sum_lw_derivatives_g + end do + end subroutine modify_lw_derivatives_ica_omp !--------------------------------------------------------------------- ! Calculation for solvers involving multiple regions and matrices diff --git a/radiation/radiation_mcica_omp_lw.F90 b/radiation/radiation_mcica_omp_lw.F90 new file mode 100644 index 00000000..215f45e5 --- /dev/null +++ b/radiation/radiation_mcica_omp_lw.F90 @@ -0,0 +1,545 @@ +! radiation_mcica_omp_lw.F90 - Monte-Carlo Independent Column Approximation longtwave solver +! +! (C) Copyright 2015- ECMWF. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! +! Author: Robin Hogan +! Email: r.j.hogan@ecmwf.int +! +! Modifications +! 2017-04-11 R. Hogan Receive emission/albedo rather than planck/emissivity +! 2017-04-22 R. Hogan Store surface fluxes at all g-points +! 2017-07-12 R. Hogan Call fast adding method if only clouds scatter +! 2017-10-23 R. Hogan Renamed single-character variables + +#include "ecrad_config.h" + +module radiation_mcica_omp_lw + + use radiation_io, only : nulout + public + +contains + + !--------------------------------------------------------------------- + ! Longwave Monte Carlo Independent Column Approximation + ! (McICA). This implementation performs a clear-sky and a cloudy-sky + ! calculation, and then weights the two to get the all-sky fluxes + ! according to the total cloud cover. This method reduces noise for + ! low cloud cover situations, and exploits the clear-sky + ! calculations that are usually performed for diagnostic purposes + ! simultaneously. The cloud generator has been carefully written + ! such that the stochastic cloud field satisfies the prescribed + ! overlap parameter accounting for this weighting. + subroutine solver_mcica_omp_lw(nlev,istartcol,iendcol, & + & config, single_level, cloud, & + & od, ssa, g, od_cloud, ssa_cloud, g_cloud, planck_hl, & + & emission, albedo, & + & flux) + + use parkind1, only : jprb + use yomhook, only : lhook, dr_hook, jphook + + use radiation_io, only : nulerr, radiation_abort + use radiation_config, only : config_type + use radiation_single_level, only : single_level_type + use radiation_cloud, only : cloud_type + use radiation_flux, only : flux_type + use radiation_two_stream, only : calc_ref_trans_lw_single_level_omp, & + & calc_no_scattering_transmittance_lw_omp, & + & calc_no_scattering_transmittance_lw_single_cell_omp + use radiation_adding_ica_lw, only : fast_adding_ica_lw_omp, calc_fluxes_no_scattering_lw_omp + + use radiation_lw_derivatives, only : calc_lw_derivatives_ica_omp, modify_lw_derivatives_ica_omp + use radiation_cloud_generator_acc, only: cloud_generator_omp + use radiation_cloud_cover, only : beta2alpha, MaxCloudFrac + +#ifdef HAVE_ROCTX + use roctx_profiling, only: roctxstartrange, roctxendrange + use iso_c_binding, only: c_null_char +#endif + + implicit none + + ! Inputs + integer, intent(in) :: nlev ! number of model levels + integer, intent(in) :: istartcol, iendcol ! range of columns to process + type(config_type), intent(in) :: config + type(single_level_type), intent(in) :: single_level + type(cloud_type), intent(in) :: cloud + + ! Gas and aerosol optical depth, single-scattering albedo and + ! asymmetry factor at each longwave g-point + real(jprb), intent(in), dimension(config%n_g_lw, nlev, istartcol:iendcol) :: & + & od + real(jprb), intent(in), dimension(config%n_g_lw_if_scattering, nlev, istartcol:iendcol) :: & + & ssa, g + + ! Cloud and precipitation optical depth, single-scattering albedo and + ! asymmetry factor in each longwave band + real(jprb), intent(in), dimension(config%n_bands_lw,nlev,istartcol:iendcol) :: & + & od_cloud + real(jprb), intent(in), dimension(config%n_bands_lw_if_scattering, & + & nlev,istartcol:iendcol) :: ssa_cloud, g_cloud + + ! Planck function at each half-level and the surface + real(jprb), intent(in), dimension(config%n_g_lw,nlev+1,istartcol:iendcol) :: & + & planck_hl + + ! Emission (Planck*emissivity) and albedo (1-emissivity) at the + ! surface at each longwave g-point + real(jprb), intent(in), dimension(config%n_g_lw, istartcol:iendcol) :: emission, albedo + + ! Output + type(flux_type), intent(inout):: flux + + !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + ! Local variables : Mapped into Global Memory + !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + + ! Fluxes per g point + real(jprb), dimension(config%n_g_lw, nlev+1, istartcol:iendcol) :: flux_up, flux_dn + real(jprb), dimension(config%n_g_lw, nlev+1, istartcol:iendcol) :: flux_up_clear, flux_dn_clear + + ! Identify clear-sky layers + logical :: is_clear_sky_layer(nlev, istartcol:iendcol) + + ! workaround that allows inling of cloud generator + real(jprb), dimension(config%pdf_sampler%ncdf, config%pdf_sampler%nfsd) :: sample_val + + ! temporary arrays to increase performance + real(jprb), dimension(nlev, istartcol:iendcol) :: frac, frac_std + real(jprb), dimension(nlev-1, istartcol:iendcol) :: overlap_param + + ! temporary arrays + real(jprb), dimension(nlev, istartcol:iendcol) :: cum_cloud_cover + real(jprb), dimension(nlev-1, istartcol:iendcol) :: pair_cloud_cover + + ! Cumulative product needed in computation of total_cloud_cover + real(jprb) :: cum_product(istartcol:iendcol) + + ! First and last cloudy layers + integer :: ibegin(istartcol:iendcol), iend(istartcol:iendcol) + + !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + ! Local variables : Was stack and is now in Global Memory + !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + + ! Diffuse reflectance and transmittance for each layer in clear + ! and all skies + real(jprb), dimension(config%n_g_lw, nlev, istartcol:iendcol) :: trans_clear, reflectance, transmittance + + ! Emission by a layer into the upwelling or downwelling diffuse + ! streams, in clear and all skies + real(jprb), dimension(config%n_g_lw, nlev, istartcol:iendcol) :: source_up, source_dn + + ! Optical depth scaling from the cloud generator, zero indicating + ! clear skies + real(jprb), dimension(config%n_g_lw,nlev, istartcol:iendcol) :: od_scaling + + ! Temporary working array + real(jprb), dimension(config%n_g_lw,nlev+1, istartcol:iendcol) :: tmp_work_source + real(jprb), dimension(config%n_g_lw, istartcol:iendcol) :: tmp_derivatives + + !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + ! Local variables : Stack + !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + + ! Modified optical depth after McICA scaling to represent cloud + ! inhomogeneity + real(jprb) :: od_cloud_new + + ! Combined gas+aerosol+cloud optical depth, single scattering + ! albedo and asymmetry factor + real(jprb) :: od_total, ssa_total, g_total + + ! Combined scattering optical depth + real(jprb) :: scat_od, scat_od_total(config%n_g_lw) + + ! Total cloud cover output from the cloud generator + real(jprb) :: total_cloud_cover + + ! "Alpha" overlap parameter + real(jprb) :: overlap_alpha + + ! Temporary storage for more efficient summation + real(jprb) :: sum_up, sum_dn, sum_up_clr, sum_dn_clr + + ! Index of the highest cloudy layer + integer :: i_cloud_top + + ! Number of g points + integer :: ng + + ! Loop indices for level, column and g point + integer :: jlev, jcol, jg + + real(jphook) :: hook_handle + !real(jprb) totalMem + integer :: file_idx, fidx1, fidx2, fidx3 + +#ifdef HAVE_ROCTX + call roctxStartRange("radiation::mcica_omp_lw"//c_null_char) +#endif + + if (lhook) call dr_hook('radiation_mcica_omp_lw:solver_mcica_omp_lw',0,hook_handle) + + if (.not. config%do_clear) then + write(nulerr,'(a)') '*** Error: longwave McICA OMP requires clear-sky calculation to be performed' + call radiation_abort() + end if + + ng = config%n_g_lw + + !totalMem = 6*ng * nlev + !totalMem = totalMem+5*(ng)*(nlev+1) !flux_up,dn,up_clear,dn_clear,source + !totalmem = totalMem*(iendcol-istartcol)*SIZEOF((real(jprb)))/1.e9 + !write(nulout,'(a,a,i0,a,g0.5)') __FILE__, " : LINE = ", __LINE__, " total_memory=",totalMem + + !$OMP TARGET ENTER DATA MAP(ALLOC: flux_up, flux_dn, flux_up_clear, flux_dn_clear, & + !$OMP is_clear_sky_layer, & + !$OMP sample_val, frac, frac_std, overlap_param, cum_cloud_cover, & + !$OMP pair_cloud_cover, cum_product, ibegin, iend) +#if defined(__amdflang__) + !$OMP TARGET DATA MAP(PRESENT, ALLOC: config, single_level, cloud, od, ssa, g, od_cloud, ssa_cloud, & + !$OMP g_cloud, planck_hl, emission, albedo, flux) +#endif + + ! + ! These allocations need to moved into a TEAMS PRIVATE clause, I think. + ! + + !$OMP TARGET ENTER DATA MAP(ALLOC: trans_clear, od_scaling, & + !$OMP reflectance, transmittance, source_up, source_dn, tmp_work_source, & + !$OMP tmp_derivatives) + + !$OMP TARGET TEAMS DISTRIBUTE PARALLEL DO COLLAPSE(2) + do jlev = 1,config%pdf_sampler%nfsd + do jcol = 1,config%pdf_sampler%ncdf + sample_val(jcol,jlev) = config%pdf_sampler%val(jcol,jlev) + end do + end do + !$OMP END TARGET TEAMS DISTRIBUTE PARALLEL DO + + !$OMP TARGET TEAMS DISTRIBUTE PARALLEL DO COLLAPSE(2) + do jcol = istartcol,iendcol + do jlev = 1, nlev + frac(jlev, jcol) = cloud%fraction(jcol,jlev) + frac_std(jlev, jcol) = cloud%fractional_std(jcol,jlev) + is_clear_sky_layer(jlev,jcol) = .true. + end do + end do + !$OMP END TARGET TEAMS DISTRIBUTE PARALLEL DO + + !$OMP TARGET TEAMS DISTRIBUTE PARALLEL DO COLLAPSE(2) + do jcol = istartcol,iendcol + do jlev = 1, nlev-1 + overlap_param(jlev, jcol) = cloud%overlap_param(jcol,jlev) + end do + end do + !$OMP END TARGET TEAMS DISTRIBUTE PARALLEL DO + + !$OMP TARGET TEAMS DISTRIBUTE PARALLEL DO COLLAPSE(2) PRIVATE(overlap_alpha) + do jcol = istartcol,iendcol + !--------------------------------------------------------------------- + ! manual inline from cum_cloud_cover_exp_ran >>>>>>>>>>>>>>>>>>>>>>>> + ! Loop to compute total cloud cover and the cumulative cloud cover + ! down to the base of each layer + do jlev = 1,nlev-1 + ! Convert to "alpha" overlap parameter if necessary + if (config%use_beta_overlap) then + overlap_alpha = beta2alpha(overlap_param(jlev,jcol), & + & frac(jlev,jcol), frac(jlev+1,jcol)) + else + overlap_alpha = overlap_param(jlev,jcol) + end if + + ! Compute the combined cloud cover of layers jlev and jlev+1 + pair_cloud_cover(jlev, jcol) = overlap_alpha*max(frac(jlev,jcol),frac(jlev+1,jcol)) & + & + (1.0_jprb - overlap_alpha) & + & * (frac(jlev,jcol)+frac(jlev+1,jcol)-frac(jlev,jcol)*frac(jlev+1,jcol)) + end do + end do + !$OMP END TARGET TEAMS DISTRIBUTE PARALLEL DO + + !$OMP TARGET TEAMS DISTRIBUTE PARALLEL DO + do jcol = istartcol,iendcol + cum_cloud_cover(1, jcol) = frac(1,jcol) + cum_product(jcol) = 1.0_jprb - frac(1,jcol) + do jlev = 1,nlev-1 + if (frac(jlev,jcol) >= MaxCloudFrac) then + ! Cloud cover has reached one + cum_product(jcol) = 0.0_jprb + else + cum_product(jcol) = cum_product(jcol) * (1.0_jprb - pair_cloud_cover(jlev, jcol)) & + & / (1.0_jprb - frac(jlev,jcol)) + end if + cum_cloud_cover(jlev+1, jcol) = 1.0_jprb - cum_product(jcol) + end do + flux%cloud_cover_lw(jcol) = cum_cloud_cover(nlev,jcol); + if (flux%cloud_cover_lw(jcol) < config%cloud_fraction_threshold) then + ! Treat column as clear sky: calling function therefore will not + ! use od_scaling so we don't need to calculate it + flux%cloud_cover_lw(jcol) = 0.0_jprb + end if + end do + !$OMP END TARGET TEAMS DISTRIBUTE PARALLEL DO + + !$OMP TARGET TEAMS DISTRIBUTE PARALLEL DO + do jcol = istartcol,iendcol + if (flux%cloud_cover_lw(jcol) >= config%cloud_fraction_threshold) then + ! Cloud is present: need to calculate od_scaling + ! Find range of cloudy layers + ibegin(jcol) = nlev + do jlev = 1, nlev + if( frac(jlev,jcol) > 0.0_jprb ) then + ibegin(jcol) = min(jlev, ibegin(jcol)) + end if + end do + + iend(jcol) = ibegin(jcol) + do jlev = ibegin(jcol)+1,nlev + if (frac(jlev,jcol) > 0.0_jprb) then + iend(jcol) = max(jlev, iend(jcol)) + end if + end do + end if + end do + !$OMP END TARGET TEAMS DISTRIBUTE PARALLEL DO + + !$OMP TARGET TEAMS DISTRIBUTE PARALLEL DO COLLAPSE(3) + do jcol = istartcol,iendcol + do jlev = 1, nlev+1 + do jg = 1, ng + flux_up_clear(jg,jlev,jcol) = 0.0_jprb + flux_dn_clear(jg,jlev,jcol) = 0.0_jprb + flux_up(jg,jlev,jcol) = 0.0_jprb + flux_dn(jg,jlev,jcol) = 0.0_jprb + end do + end do + end do + !$OMP END TARGET TEAMS DISTRIBUTE PARALLEL DO + + ! + ! This kernel does band independent computations. Some computation is done across veritical levels + ! + !$OMP TARGET TEAMS DISTRIBUTE PARALLEL DO COLLAPSE(2) PRIVATE(jcol, jg) FIRSTPRIVATE(istartcol, iendcol, ng, nlev) & + !$OMP& THREAD_LIMIT(256) + do jcol = istartcol,iendcol + do jg = 1, ng + call cloud_generator_omp(jg, ng, nlev, & + & single_level%iseed(jcol) + 997, & + & config%cloud_fraction_threshold, & + & frac(:,jcol), overlap_param(:,jcol), & + & config%cloud_inhom_decorr_scaling, frac_std(:,jcol), & + & config%pdf_sampler%ncdf, config%pdf_sampler%nfsd, & + & config%pdf_sampler%fsd1, config%pdf_sampler%inv_fsd_interval, & + & sample_val, & + & od_scaling(:,:,jcol), flux%cloud_cover_lw(jcol)+0.0_jprb, & ! Workaround for nvhpc-24.1 + & ibegin(jcol), iend(jcol), & + & cum_cloud_cover=cum_cloud_cover(:,jcol), & + & pair_cloud_cover=pair_cloud_cover(:,jcol)) + enddo + enddo + + !$OMP TARGET TEAMS DISTRIBUTE PARALLEL DO COLLAPSE(2) PRIVATE(od_cloud_new, od_total, ssa_total, g_total, scat_od, & + !$OMP& jcol, jg, jlev, i_cloud_top) FIRSTPRIVATE(istartcol, iendcol, ng, nlev) THREAD_LIMIT(1024) + do jcol = istartcol,iendcol + do jg = 1, ng + ! Clear-sky calculation + ! Non-scattering case: use simpler functions for + ! transmission and emission + call calc_no_scattering_transmittance_lw_omp(jg, ng, nlev, od(:,:,jcol), & + & planck_hl(:,:,jcol), trans_clear(:,:,jcol), source_up(:,:,jcol), & + & source_dn(:,:,jcol)) + + ! Simpler down-then-up method to compute fluxes + call calc_fluxes_no_scattering_lw_omp(jg, ng, nlev, & + & trans_clear(:,:,jcol), source_up(:,:,jcol), source_dn(:,:,jcol), & + & emission(:,jcol), albedo(:,jcol), & + & flux_up_clear(:,:,jcol), flux_dn_clear(:,:,jcol)) + + ! Store surface spectral downwelling fluxes + flux%lw_dn_surf_clear_g(jg,jcol) = flux_dn_clear(jg,nlev+1,jcol) + ! Do cloudy-sky calculation; add a prime number to the seed in + ! the longwave + + if (flux%cloud_cover_lw(jcol) >= config%cloud_fraction_threshold) then + ! Total-sky calculation + i_cloud_top = nlev+1 + + do jlev = 1,nlev + ! Compute combined gas+aerosol+cloud optical properties + if (frac(jlev,jcol) >= config%cloud_fraction_threshold) then + is_clear_sky_layer(jlev,jcol) = .false. + ! Get index to the first cloudy layer from the top + if (i_cloud_top > jlev) then + i_cloud_top = jlev + end if + + od_cloud_new = od_scaling(jg,jlev,jcol) & + & * od_cloud(config%i_band_from_reordered_g_lw(jg),jlev,jcol) + od_total = od(jg,jlev,jcol) + od_cloud_new + ssa_total = 0.0_jprb + g_total = 0.0_jprb + + if (config%do_lw_cloud_scattering) then + ! Scattering case: calculate reflectance and + ! transmittance at each model level + if (od_total > 0.0_jprb) then + scat_od = ssa_cloud(config%i_band_from_reordered_g_lw(jg),jlev,jcol) & + & * od_cloud_new + ssa_total = scat_od / od_total + if (scat_od > 0.0_jprb) then + g_total = g_cloud(config%i_band_from_reordered_g_lw(jg),jlev,jcol) & + & * ssa_cloud(config%i_band_from_reordered_g_lw(jg),jlev,jcol) & + & * od_cloud_new / scat_od + end if + end if + + ! Compute cloudy-sky reflectance, transmittance etc at + ! each model level + call calc_ref_trans_lw_single_level_omp(jg, ng, & + & od_total, ssa_total, g_total, & + & planck_hl(:,jlev:jlev+1,jcol), & + & reflectance(:,jlev,jcol), transmittance(:,jlev,jcol), & + & source_up(:,jlev,jcol), source_dn(:,jlev,jcol)) + else + ! No-scattering case: use simpler functions for + ! transmission and emission + call calc_no_scattering_transmittance_lw_single_cell_omp(od_total, & + & planck_hl(jg,jlev,jcol), planck_hl(jg,jlev+1,jcol), transmittance(jg,jlev,jcol), & + & source_up(jg,jlev,jcol), source_dn(jg,jlev,jcol)) + end if + + else + ! Clear-sky layer: copy over clear-sky values + reflectance(jg,jlev,jcol) = 0.0 + transmittance(jg,jlev,jcol) = trans_clear(jg,jlev,jcol) + end if + end do + + if(config%do_lw_cloud_scattering) then + ! Use adding method to compute fluxes but optimize for the + ! presence of clear-sky layers + call fast_adding_ica_lw_omp(jg, ng, nlev, reflectance(:,:,jcol), transmittance(:,:,jcol), & + & source_up(:,:,jcol), source_dn(:,:,jcol), & + & emission(:,jcol), albedo(:,jcol), & + & is_clear_sky_layer(:,jcol), i_cloud_top, flux_dn_clear(:,:,jcol), & + & flux_up(:,:,jcol), flux_dn(:,:,jcol), & + & flux_up(:,:,jcol), flux_dn(:,:,jcol), & + & source=tmp_work_source(:,:,jcol)) + else + ! Simpler down-then-up method to compute fluxes + call calc_fluxes_no_scattering_lw_omp(jg, ng, nlev, & + & transmittance(:,:,jcol), source_up(:,:,jcol), source_dn(:,:,jcol), & + & emission(:,jcol), albedo(:,jcol), & + & flux_up(:,:,jcol), flux_dn(:,:,jcol)) + end if + + ! Cloudy flux profiles currently assume completely overcast + ! skies; perform weighted average with clear-sky profile + ! Store surface spectral downwelling fluxes + flux%lw_dn_surf_g(jg,jcol) = flux%cloud_cover_lw(jcol)*flux_dn(jg,nlev+1,jcol) & + & + (1.0_jprb - flux%cloud_cover_lw(jcol))*flux%lw_dn_surf_clear_g(jg,jcol) + else + ! No cloud in profile and clear-sky fluxes already + ! calculated: copy them over + flux%lw_dn_surf_g(jg,jcol) = flux%lw_dn_surf_clear_g(jg,jcol) + end if + end do + end do + !$OMP END TARGET TEAMS DISTRIBUTE PARALLEL DO + + ! + ! Separate out the derivative computation, which has reductions across spectral bands + ! + if (config%do_lw_derivatives) then + !$OMP TARGET TEAMS DISTRIBUTE PARALLEL DO THREAD_LIMIT(64) + do jcol = istartcol,iendcol + if (flux%cloud_cover_lw(jcol) >= config%cloud_fraction_threshold) then + call calc_lw_derivatives_ica_omp(ng, nlev, jcol, transmittance(:,:,jcol), flux_up(:,nlev+1,jcol), & + & flux%lw_derivatives, tmp_derivatives(:,jcol)) + if (flux%cloud_cover_lw(jcol) < 1.0_jprb - config%cloud_fraction_threshold) then + ! Modify the existing derivative with the contribution from the clear sky + call modify_lw_derivatives_ica_omp(ng, nlev, jcol, trans_clear(:,:,jcol), flux_up_clear(:,nlev+1,jcol), & + & 1.0_jprb-flux%cloud_cover_lw(jcol), flux%lw_derivatives, tmp_derivatives(:,jcol)) + end if + else + call calc_lw_derivatives_ica_omp(ng, nlev, jcol, trans_clear(:,:,jcol), flux_up_clear(:,nlev+1,jcol), & + & flux%lw_derivatives, tmp_derivatives(:,jcol)) + endif + enddo + !$OMP END TARGET TEAMS DISTRIBUTE PARALLEL DO + endif + + ! Loop through columns + !$OMP TARGET TEAMS DISTRIBUTE PARALLEL DO COLLAPSE(2) PRIVATE(total_cloud_cover, sum_up, sum_dn, sum_up_clr, sum_dn_clr) +#if defined(__amdflang__) + !$OMP TILE SIZES(64,1) +#endif + do jcol = istartcol,iendcol + do jlev = 1,nlev+1 + + sum_up_clr = 0._jprb + sum_dn_clr = 0._jprb + do jg = 1,ng + sum_up_clr = sum_up_clr + flux_up_clear(jg,jlev,jcol) + sum_dn_clr = sum_dn_clr + flux_dn_clear(jg,jlev,jcol) + end do + flux%lw_up_clear(jcol,jlev) = sum_up_clr + flux%lw_dn_clear(jcol,jlev) = sum_dn_clr + + total_cloud_cover = flux%cloud_cover_lw(jcol) + if (total_cloud_cover >= config%cloud_fraction_threshold) then + + ! Store overcast broadband fluxes + sum_up = 0._jprb + sum_dn = 0._jprb + do jg = 1,ng + sum_up = sum_up + flux_up(jg,jlev,jcol) + sum_dn = sum_dn + flux_dn(jg,jlev,jcol) + end do + flux%lw_up(jcol,jlev) = total_cloud_cover*sum_up + (1.0_jprb - total_cloud_cover)*sum_up_clr + flux%lw_dn(jcol,jlev) = total_cloud_cover*sum_dn + (1.0_jprb - total_cloud_cover)*sum_dn_clr + + else + + flux%lw_up(jcol,jlev) = sum_up_clr + flux%lw_dn(jcol,jlev) = sum_dn_clr + + end if + end do + end do +#if defined(__amdflang__) + !$OMP END TILE +#endif + !$OMP END TARGET TEAMS DISTRIBUTE PARALLEL DO + + !$OMP TARGET EXIT DATA MAP(DELETE: trans_clear, od_scaling, & + !$OMP reflectance, transmittance, source_up, source_dn, tmp_work_source, & + !$OMP tmp_derivatives) + + !$OMP TARGET EXIT DATA MAP(DELETE: flux_up, flux_dn, flux_up_clear, flux_dn_clear, & + !$OMP is_clear_sky_layer, & + !$OMP sample_val, frac, frac_std, overlap_param, cum_cloud_cover, & + !$OMP pair_cloud_cover, cum_product, ibegin, iend) + +#if defined(__amdflang__) + !$OMP END TARGET DATA +#endif + +#ifdef HAVE_ROCTX + call roctxEndRange +#endif + if (lhook) call dr_hook('radiation_mcica_omp_lw:solver_mcica_omp_lw',1,hook_handle) + + end subroutine solver_mcica_omp_lw + +end module radiation_mcica_omp_lw diff --git a/radiation/radiation_mcica_omp_sw.F90 b/radiation/radiation_mcica_omp_sw.F90 new file mode 100644 index 00000000..c26291a9 --- /dev/null +++ b/radiation/radiation_mcica_omp_sw.F90 @@ -0,0 +1,592 @@ +! radiation_mcica_omp_sw.F90 - Monte-Carlo Independent Column Approximation shortwave solver +! +! (C) Copyright 2015- ECMWF. +! +! This software is licensed under the terms of the Apache Licence Version 2.0 +! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +! +! In applying this licence, ECMWF does not waive the privileges and immunities +! granted to it by virtue of its status as an intergovernmental organisation +! nor does it submit to any jurisdiction. +! +! Author: Robin Hogan +! Email: r.j.hogan@ecmwf.int +! +! Modifications +! 2017-04-11 R. Hogan Receive albedos at g-points +! 2017-04-22 R. Hogan Store surface fluxes at all g-points +! 2017-10-23 R. Hogan Renamed single-character variables + +#include "ecrad_config.h" + +module radiation_mcica_omp_sw + + use radiation_io, only : nulout + public + +contains + + ! Provides elemental function "delta_eddington" +#include "radiation_delta_eddington.h" + + !--------------------------------------------------------------------- + ! Shortwave Monte Carlo Independent Column Approximation + ! (McICA). This implementation performs a clear-sky and a cloudy-sky + ! calculation, and then weights the two to get the all-sky fluxes + ! according to the total cloud cover. This method reduces noise for + ! low cloud cover situations, and exploits the clear-sky + ! calculations that are usually performed for diagnostic purposes + ! simultaneously. The cloud generator has been carefully written + ! such that the stochastic cloud field satisfies the prescribed + ! overlap parameter accounting for this weighting. + subroutine solver_mcica_omp_sw(nlev,istartcol,iendcol, & + & config, single_level, cloud, & + & od, ssa, g, od_cloud, ssa_cloud, g_cloud, & + & albedo_direct, albedo_diffuse, incoming_sw, & + & flux) + + use parkind1, only : jprb + use yomhook, only : lhook, dr_hook, jphook + + use radiation_io, only : nulerr, radiation_abort + use radiation_config, only : config_type + use radiation_single_level, only : single_level_type + use radiation_cloud, only : cloud_type + use radiation_flux, only : flux_type + use radiation_two_stream, only : calc_two_stream_gammas_sw_single_band_omp, & + & calc_reflectance_transmittance_sw_single_band_omp, & + & calc_ref_trans_sw_omp, calc_ref_trans_sw_single_level_omp + + use radiation_adding_ica_sw, only : adding_ica_sw_omp + use radiation_cloud_generator_acc, only: cloud_generator_omp + use radiation_cloud_cover, only : beta2alpha, MaxCloudFrac + +#ifdef HAVE_ROCTX + use roctx_profiling, only: roctxstartrange, roctxendrange + use iso_c_binding, only: c_null_char +#endif + + implicit none + + ! Inputs + integer, intent(in) :: nlev ! number of model levels + integer, intent(in) :: istartcol, iendcol ! range of columns to process + type(config_type), intent(in) :: config + type(single_level_type), intent(in) :: single_level + type(cloud_type), intent(in) :: cloud + + ! Gas and aerosol optical depth, single-scattering albedo and + ! asymmetry factor at each shortwave g-point + real(jprb), intent(in), dimension(config%n_g_sw, nlev, istartcol:iendcol) :: & + & od, ssa, g + + ! Cloud and precipitation optical depth, single-scattering albedo and + ! asymmetry factor in each shortwave band + real(jprb), intent(in), dimension(config%n_bands_sw,nlev,istartcol:iendcol) :: & + & od_cloud, ssa_cloud, g_cloud + + ! Direct and diffuse surface albedos, and the incoming shortwave + ! flux into a plane perpendicular to the incoming radiation at + ! top-of-atmosphere in each of the shortwave g points + real(jprb), intent(in), dimension(config%n_g_sw,istartcol:iendcol) :: & + & albedo_direct, albedo_diffuse, incoming_sw + + ! Output + type(flux_type), intent(inout):: flux + + !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + ! Local variables : Mapped into Global Memory + !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + + ! Fluxes per g point + real(jprb), dimension(config%n_g_sw, nlev+1, istartcol:iendcol) :: flux_up, flux_dn_diffuse, flux_dn_direct + real(jprb), dimension(config%n_g_sw, nlev+1, istartcol:iendcol) :: flux_up_clear, flux_dn_diffuse_clear, flux_dn_direct_clear + + ! workaround that allows inling of cloud generator + real(jprb), dimension(config%pdf_sampler%ncdf, config%pdf_sampler%nfsd) :: sample_val + + ! copies to increase performance + real(jprb), dimension(nlev, istartcol:iendcol) :: frac, frac_std + real(jprb), dimension(nlev-1, istartcol:iendcol) :: overlap_param + + real(jprb), dimension(nlev, istartcol:iendcol) :: cum_cloud_cover + real(jprb), dimension(nlev-1, istartcol:iendcol) :: pair_cloud_cover + + ! Cumulative product needed in computation of total_cloud_cover + real(jprb) :: cum_product(istartcol:iendcol) + + ! First and last cloudy layers + integer :: ibegin(istartcol:iendcol), iend(istartcol:iendcol) + + !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + ! Local variables : Was stack and is now in Global Memory + !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + + ! Transmittance for the direct beam in clear and all skies + real(jprb), dimension(config%n_g_sw, nlev, istartcol:iendcol) :: ref_clear, trans_clear, ref_dir_clear + !real(jprb), dimension(config%n_g_sw, nlev, istartcol:iendcol) :: trans_dir_diff_clear, trans_dir_dir_clear + + ! Fraction of direct beam scattered by a layer into the upwelling + ! or downwelling diffuse streams, in clear and all skies + real(jprb), dimension(config%n_g_sw, nlev, istartcol:iendcol) :: ref_dir, trans_dir_diff + + ! Transmittance for the direct beam in clear and all skies + real(jprb), dimension(config%n_g_sw, nlev, istartcol:iendcol) :: trans_dir_dir + + ! Diffuse reflectance and transmittance for each layer in clear + ! and all skies + real(jprb), dimension(config%n_g_sw, nlev, istartcol:iendcol) :: reflectance, transmittance + + ! Optical depth scaling from the cloud generator, zero indicating + ! clear skies + real(jprb), dimension(config%n_g_sw,nlev,istartcol:iendcol) :: od_scaling + + ! Temporary working array + real(jprb), dimension(config%n_g_sw,nlev+1,istartcol:iendcol) :: tmp_work_source + + !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + ! Local variables : Stack + !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + + ! Cosine of solar zenith angle + real(jprb) :: cos_sza + + ! Combined gas+aerosol+cloud optical depth, single scattering + ! albedo and asymmetry factor + real(jprb) :: od_total, ssa_total, g_total + + ! Combined scattering optical depth + real(jprb) :: scat_od + + ! Two-stream coefficients + real(jprb) :: gamma1, gamma2, gamma3 + + ! Modified optical depth after McICA scaling to represent cloud + ! inhomogeneity + real(jprb) :: od_cloud_new + + ! "Alpha" overlap parameter + real(jprb) :: overlap_alpha + + ! Auxiliary for more efficient summation + real(jprb) :: sum_up, sum_dn_direct, sum_dn_diffuse + + ! Number of g points + integer :: ng + + ! Loop indices for level, column and g point + integer :: jlev, jcol, jg + !real(jprb) totalMem + real(jphook) :: hook_handle + +#ifdef HAVE_ROCTX + call roctxStartRange("radiation::mcica_omp_sw"//c_null_char) +#endif + + if (lhook) call dr_hook('radiation_mcica_omp_sw:solver_mcica_omp_sw',0,hook_handle) + + if (.not. config%do_clear) then + write(nulerr,'(a)') '*** Error: shortwave McICA OMP requires clear-sky calculation to be performed' + call radiation_abort() + end if + + !$OMP TARGET ENTER DATA MAP(ALLOC: ref_clear, trans_clear, ref_dir_clear, & + !$OMP& od_scaling, tmp_work_source,& + !$OMP& ref_dir, trans_dir_diff, trans_dir_dir, reflectance, transmittance) + !$OMP TARGET ENTER DATA MAP(ALLOC: flux_up, flux_dn_diffuse, flux_dn_direct, & + !$OMP flux_up_clear, flux_dn_diffuse_clear, flux_dn_direct_clear, & + !$OMP sample_val, frac, frac_std, overlap_param, & + !$OMP cum_cloud_cover, pair_cloud_cover, cum_product, ibegin, iend) +#if defined(__amdflang__) + !$OMP TARGET DATA MAP(PRESENT, ALLOC: config, single_level, cloud, od, ssa, g, od_cloud, & + !$OMP ssa_cloud, g_cloud, albedo_direct, albedo_diffuse, & + !$OMP incoming_sw, flux) +#endif + + ng = config%n_g_sw + + !totalMem = 9*ng * nlev + !totalMem = totalMem+7*(ng)*(nlev+1) !flux_up,dn,up_clear,dn_clear,source + !totalmem = totalMem*(iendcol-istartcol)*SIZEOF((real(jprb)))/1.e9 + !write(nulout,'(a,a,i0,a,g0.5)') __FILE__, " : LINE = ", __LINE__, " total_memory=",totalMem + + !$OMP TARGET TEAMS DISTRIBUTE PARALLEL DO COLLAPSE(2) + do jlev = 1,config%pdf_sampler%nfsd + do jcol = 1,config%pdf_sampler%ncdf + sample_val(jcol,jlev) = config%pdf_sampler%val(jcol,jlev) + end do + end do + !$OMP END TARGET TEAMS DISTRIBUTE PARALLEL DO + + !$OMP TARGET TEAMS DISTRIBUTE PARALLEL DO COLLAPSE(2) + do jcol = istartcol,iendcol + do jlev = 1, nlev + frac(jlev, jcol) = cloud%fraction(jcol,jlev) + frac_std(jlev, jcol) = cloud%fractional_std(jcol,jlev) + end do + end do + !$OMP END TARGET TEAMS DISTRIBUTE PARALLEL DO + + !$OMP TARGET TEAMS DISTRIBUTE PARALLEL DO COLLAPSE(2) + do jcol = istartcol,iendcol + do jlev = 1, nlev-1 + overlap_param(jlev, jcol) = cloud%overlap_param(jcol,jlev) + end do + end do + !$OMP END TARGET TEAMS DISTRIBUTE PARALLEL DO + + !$OMP TARGET TEAMS DISTRIBUTE PARALLEL DO COLLAPSE(2) PRIVATE(overlap_alpha) + do jcol = istartcol,iendcol + !Only perform calculation if sun above the horizon + !--------------------------------------------------------------------- + ! manual inline from cum_cloud_cover_exp_ran >>>>>>>>>>>>>>>>>>>>>>>> + ! Loop to compute total cloud cover and the cumulative cloud cover + ! down to the base of each layer + do jlev = 1,nlev-1 + if (single_level%cos_sza(jcol) > 0.0_jprb ) then + ! Convert to "alpha" overlap parameter if necessary + if (config%use_beta_overlap) then + overlap_alpha = beta2alpha(overlap_param(jlev,jcol), & + & frac(jlev,jcol), frac(jlev+1,jcol)) + else + overlap_alpha = overlap_param(jlev,jcol) + end if + ! Compute the combined cloud cover of layers jlev and jlev+1 + pair_cloud_cover(jlev, jcol) = overlap_alpha*max(frac(jlev,jcol),frac(jlev+1,jcol)) & + & + (1.0_jprb - overlap_alpha) & + & * (frac(jlev,jcol)+frac(jlev+1,jcol)-frac(jlev,jcol)*frac(jlev+1,jcol)) + end if + end do + end do + !$OMP END TARGET TEAMS DISTRIBUTE PARALLEL DO + + !$OMP TARGET TEAMS DISTRIBUTE PARALLEL DO + do jcol = istartcol,iendcol + !Only perform calculation if sun above the horizon + if (single_level%cos_sza(jcol) > 0.0_jprb ) then + cum_cloud_cover(1, jcol) = frac(1,jcol) + cum_product(jcol) = 1.0_jprb - frac(1,jcol) + do jlev = 1,nlev-1 + if (frac(jlev,jcol) >= MaxCloudFrac) then + ! Cloud cover has reached one + cum_product(jcol) = 0.0_jprb + else + cum_product(jcol) = cum_product(jcol) * (1.0_jprb - pair_cloud_cover(jlev, jcol)) & + & / (1.0_jprb - frac(jlev,jcol)) + end if + cum_cloud_cover(jlev+1, jcol) = 1.0_jprb - cum_product(jcol) + end do + flux%cloud_cover_sw(jcol) = cum_cloud_cover(nlev,jcol); + if (flux%cloud_cover_sw(jcol) < config%cloud_fraction_threshold) then + ! Treat column as clear sky: calling function therefore will not + ! use od_scaling so we don't need to calculate it + flux%cloud_cover_sw(jcol) = 0.0_jprb + end if + end if + end do + !$OMP END TARGET TEAMS DISTRIBUTE PARALLEL DO + + !$OMP TARGET TEAMS DISTRIBUTE PARALLEL DO + do jcol = istartcol,iendcol + !Only perform calculation if sun above the horizon + if (single_level%cos_sza(jcol) > 0.0_jprb .and. flux%cloud_cover_sw(jcol) >= config%cloud_fraction_threshold) then + ! Cloud is present: need to calculate od_scaling + ! Find range of cloudy layers + ibegin(jcol) = nlev + do jlev = 1, nlev + if( frac(jlev,jcol) > 0.0_jprb ) then + ibegin(jcol) = min(jlev, ibegin(jcol)) + end if + end do + + iend(jcol) = ibegin(jcol) + do jlev = ibegin(jcol)+1,nlev + if (frac(jlev,jcol) > 0.0_jprb) then + iend(jcol) = max(jlev, iend(jcol)) + end if + end do + end if + end do + !$OMP END TARGET TEAMS DISTRIBUTE PARALLEL DO + + !$OMP TARGET TEAMS DISTRIBUTE PARALLEL DO COLLAPSE(2) PRIVATE(jcol, jg) FIRSTPRIVATE(istartcol, iendcol, ng, nlev) & + !$OMP& THREAD_LIMIT(512) + do jcol = istartcol,iendcol + do jg = 1, ng + ! Do cloudy-sky calculation + call cloud_generator_omp(jg, ng, nlev, & + & single_level%iseed(jcol) + 0, & ! Workaround for nvhpc-24.1 + & config%cloud_fraction_threshold, & + & frac(:,jcol), overlap_param(:,jcol), & + & config%cloud_inhom_decorr_scaling, frac_std(:,jcol), & + & config%pdf_sampler%ncdf, config%pdf_sampler%nfsd, & + & config%pdf_sampler%fsd1, config%pdf_sampler%inv_fsd_interval, & + & sample_val, & + & od_scaling(:,:,jcol), flux%cloud_cover_sw(jcol)+0.0_jprb, & ! Workaround for nvhpc-24.1 + & ibegin(jcol), iend(jcol), & + & cum_cloud_cover=cum_cloud_cover(:,jcol), & + & pair_cloud_cover=pair_cloud_cover(:,jcol)) + enddo + enddo + + !$OMP TARGET TEAMS DISTRIBUTE PARALLEL DO COLLAPSE(2) PRIVATE(cos_sza, gamma1, gamma2, gamma3, od_cloud_new, od_total, & + !$OMP& ssa_total, g_total, scat_od, jcol, jg, jlev) FIRSTPRIVATE(istartcol, iendcol, ng, nlev) THREAD_LIMIT(512) + do jcol = istartcol,iendcol + do jg = 1, ng + ! Only perform calculation if sun above the horizon + if (single_level%cos_sza(jcol) > 0.0_jprb) then + cos_sza = single_level%cos_sza(jcol) + + ! Clear-sky calculation - first compute clear-sky reflectance, + ! transmittance etc at each model level + if (.not. config%do_sw_delta_scaling_with_gases) then + ! Delta-Eddington scaling has already been performed to the + ! aerosol part of od, ssa and g + call calc_ref_trans_sw_omp(jg, ng, nlev, & + & cos_sza, od(:,:,jcol), ssa(:,:,jcol), g(:,:,jcol), & + & ref_clear(:,:,jcol), trans_clear(:,:,jcol), & + & ref_dir_clear(:,:,jcol), trans_dir_diff(:,:,jcol), & + & trans_dir_dir(:,:,jcol)) + else + ! Apply delta-Eddington scaling to the aerosol-gas mixture + do jlev = 1,nlev + od_total = od(jg,jlev,jcol) + ssa_total = ssa(jg,jlev,jcol) + g_total = g(jg,jlev,jcol) + call delta_eddington(od_total, ssa_total, g_total) + call calc_two_stream_gammas_sw_single_band_omp(jg, & + & cos_sza, ssa_total, g_total, & + & gamma1, gamma2, gamma3) + + call calc_reflectance_transmittance_sw_single_band_omp(jg, ng, & + & cos_sza, od_total, ssa_total, & + & gamma1, gamma2, gamma3, & + & ref_clear(:,jlev,jcol), trans_clear(:,jlev,jcol), & + & ref_dir_clear(:,jlev,jcol), trans_dir_diff(:,jlev,jcol), & + & trans_dir_dir(:,jlev,jcol) ) + end do + end if + + ! Use adding method to compute fluxes + call adding_ica_sw_omp(jg, ng, nlev, incoming_sw(:,jcol), & + & albedo_diffuse(:,jcol), albedo_direct(:,jcol), cos_sza, & + & ref_clear(:,:,jcol), trans_clear(:,:,jcol), ref_dir_clear(:,:,jcol), trans_dir_diff(:,:,jcol), & + & trans_dir_dir(:,:,jcol), flux_up(:,:,jcol), flux_dn_diffuse(:,:,jcol), flux_dn_direct(:,:,jcol), & + & flux_up(:,:,jcol), flux_dn_diffuse(:,:,jcol), source=tmp_work_source(:,:,jcol)) + + ! save temporarily clear-sky broadband fluxes + do jlev = 1,nlev+1 + flux_up_clear(jg,jlev,jcol) = flux_up(jg,jlev,jcol) + flux_dn_direct_clear(jg,jlev,jcol) = flux_dn_direct(jg,jlev,jcol) + flux_dn_diffuse_clear(jg,jlev,jcol) = flux_dn_diffuse(jg,jlev,jcol) + end do + + ! Store spectral downwelling fluxes at surface + flux%sw_dn_diffuse_surf_clear_g(jg,jcol) = flux_dn_diffuse(jg,nlev+1,jcol) + flux%sw_dn_direct_surf_clear_g(jg,jcol) = flux_dn_direct(jg,nlev+1,jcol) + + if (flux%cloud_cover_sw(jcol) >= config%cloud_fraction_threshold) then + ! Total-sky calculation + do jlev = 1,nlev + ! Compute combined gas+aerosol+cloud optical properties + if (frac(jlev,jcol) >= config%cloud_fraction_threshold) then + od_cloud_new = od_scaling(jg,jlev,jcol) & + & * od_cloud(config%i_band_from_reordered_g_sw(jg),jlev,jcol) + od_total = od(jg,jlev,jcol) + od_cloud_new + ssa_total = 0.0_jprb + g_total = 0.0_jprb + + ! In single precision we need to protect against the + ! case that od_total > 0.0 and ssa_total > 0.0 but + ! od_total*ssa_total == 0 due to underflow + if (od_total > 0.0_jprb) then + scat_od = ssa(jg,jlev,jcol)*od(jg,jlev,jcol) & + & + ssa_cloud(config%i_band_from_reordered_g_sw(jg),jlev,jcol) & + & * od_cloud_new + ssa_total = scat_od / od_total + if (scat_od > 0.0_jprb) then + g_total = (g(jg,jlev,jcol)*ssa(jg,jlev,jcol)*od(jg,jlev,jcol) & + & + g_cloud(config%i_band_from_reordered_g_sw(jg),jlev,jcol) & + & * ssa_cloud(config%i_band_from_reordered_g_sw(jg),jlev,jcol) & + & * od_cloud_new) & + & / scat_od + end if + end if + ! Apply delta-Eddington scaling to the cloud-aerosol-gas + ! mixture + if (config%do_sw_delta_scaling_with_gases) then + call delta_eddington(od_total, ssa_total, g_total) + end if + + ! Compute cloudy-sky reflectance, transmittance etc at + ! each model level + call calc_ref_trans_sw_single_level_omp(jg, ng, & + & cos_sza, od_total, ssa_total, g_total, & + & reflectance(:,jlev,jcol), transmittance(:,jlev,jcol), & + & ref_dir(:,jlev,jcol), trans_dir_diff(:,jlev,jcol), & + & trans_dir_dir(:,jlev,jcol)) + else + ! Clear-sky layer: copy over clear-sky values + reflectance(jg,jlev,jcol) = ref_clear(jg,jlev,jcol) + transmittance(jg,jlev,jcol) = trans_clear(jg,jlev,jcol) + ref_dir(jg,jlev,jcol) = ref_dir_clear(jg,jlev,jcol) + !trans_dir_diff(jg,jlev,jcol) = trans_dir_diff_clear(jg,jlev,jcol) + !trans_dir_dir(jg,jlev,jcol) = trans_dir_dir_clear(jg,jlev,jcol) + end if + end do + + ! Use adding method to compute fluxes for an overcast sky + call adding_ica_sw_omp(jg, ng, nlev, incoming_sw(:,jcol), & + & albedo_diffuse(:,jcol), albedo_direct(:,jcol), cos_sza, & + & reflectance(:,:,jcol), transmittance(:,:,jcol), ref_dir(:,:,jcol), trans_dir_diff(:,:,jcol), & + & trans_dir_dir(:,:,jcol), flux_up(:,:,jcol), flux_dn_diffuse(:,:,jcol), flux_dn_direct(:,:,jcol), & + & flux_up(:,:,jcol), flux_dn_diffuse(:,:,jcol), source=tmp_work_source(:,:,jcol)) + + ! Likewise for surface spectral fluxes + flux%sw_dn_diffuse_surf_g(jg,jcol) = flux_dn_diffuse(jg,nlev+1,jcol) + flux%sw_dn_direct_surf_g(jg,jcol) = flux_dn_direct(jg,nlev+1,jcol) + flux%sw_dn_diffuse_surf_g(jg,jcol) = flux%cloud_cover_sw(jcol) *flux%sw_dn_diffuse_surf_g(jg,jcol) & + & + (1.0_jprb - flux%cloud_cover_sw(jcol))*flux%sw_dn_diffuse_surf_clear_g(jg,jcol) + flux%sw_dn_direct_surf_g(jg,jcol) = flux%cloud_cover_sw(jcol) *flux%sw_dn_direct_surf_g(jg,jcol) & + & + (1.0_jprb - flux%cloud_cover_sw(jcol))*flux%sw_dn_direct_surf_clear_g(jg,jcol) + + else + ! No cloud in profile and clear-sky fluxes already + ! calculated: copy them over + flux%sw_dn_diffuse_surf_g(jg,jcol) = flux%sw_dn_diffuse_surf_clear_g(jg,jcol) + flux%sw_dn_direct_surf_g(jg,jcol) = flux%sw_dn_direct_surf_clear_g(jg,jcol) + + end if ! Cloud is present in profile + else + flux%sw_dn_diffuse_surf_g(jg,jcol) = 0.0_jprb + flux%sw_dn_direct_surf_g(jg,jcol) = 0.0_jprb + flux%sw_dn_diffuse_surf_clear_g(jg,jcol) = 0.0_jprb + flux%sw_dn_direct_surf_clear_g(jg,jcol) = 0.0_jprb + end if ! Sun above horizon + end do !loop over spectral bands + end do ! Loop over columns + !$OMP END TARGET TEAMS DISTRIBUTE PARALLEL DO + + ! Loop through columns + !$OMP TARGET TEAMS DISTRIBUTE PARALLEL DO COLLAPSE(2) PRIVATE(cos_sza, sum_dn_diffuse, sum_dn_direct, sum_up) +#if defined(__amdflang__) + !$OMP TILE SIZES(64,1) +#endif + do jcol = istartcol,iendcol + do jlev = 1, nlev+1 + + ! Only perform calculation if sun above the horizon + if (single_level%cos_sza(jcol) > 0.0_jprb) then + cos_sza = single_level%cos_sza(jcol) + + ! Sum over g-points to compute and save clear-sky broadband + ! fluxes + sum_up = 0.0_jprb + sum_dn_direct = 0.0_jprb + sum_dn_diffuse = 0.0_jprb + + do jg = 1,ng + sum_up = sum_up + flux_up_clear(jg,jlev,jcol) + sum_dn_direct = sum_dn_direct + flux_dn_direct_clear(jg,jlev,jcol) + sum_dn_diffuse = sum_dn_diffuse + flux_dn_diffuse_clear(jg,jlev,jcol) + end do + flux%sw_up_clear(jcol,jlev) = sum_up + if (allocated(flux%sw_dn_direct_clear)) then + flux%sw_dn_direct_clear(jcol,jlev) = sum_dn_direct + end if + flux%sw_dn_clear(jcol,jlev) = sum_dn_diffuse + sum_dn_direct + + if (flux%cloud_cover_sw(jcol) >= config%cloud_fraction_threshold) then + ! Store overcast broadband fluxes + sum_up = 0.0_jprb + sum_dn_direct = 0.0_jprb + sum_dn_diffuse = 0.0_jprb + do jg = 1,ng + sum_up = sum_up + flux_up(jg,jlev,jcol) + sum_dn_direct = sum_dn_direct + flux_dn_direct(jg,jlev,jcol) + sum_dn_diffuse = sum_dn_diffuse + flux_dn_diffuse(jg,jlev,jcol) + end do + flux%sw_up(jcol,jlev) = sum_up + if (allocated(flux%sw_dn_direct)) then + flux%sw_dn_direct(jcol,jlev) = sum_dn_direct + end if + flux%sw_dn(jcol,jlev) = sum_dn_diffuse + sum_dn_direct + + end if + end if ! Sun above horizon + end do ! Loop over columns + end do +#if defined(__amdflang__) + !$OMP END TILE +#endif + !$OMP END TARGET TEAMS DISTRIBUTE PARALLEL DO + + ! Loop through columns + !$OMP TARGET TEAMS DISTRIBUTE PARALLEL DO COLLAPSE(2) PRIVATE(cos_sza) + do jlev = 1, nlev+1 + do jcol = istartcol,iendcol + ! Only perform calculation if sun above the horizon + if (single_level%cos_sza(jcol) > 0.0_jprb) then + cos_sza = single_level%cos_sza(jcol) + + if (flux%cloud_cover_sw(jcol) >= config%cloud_fraction_threshold) then + ! Cloudy flux profiles currently assume completely overcast + ! skies; perform weighted average with clear-sky profile + flux%sw_up(jcol,jlev) = flux%cloud_cover_sw(jcol) *flux%sw_up(jcol,jlev) & + & + (1.0_jprb - flux%cloud_cover_sw(jcol))*flux%sw_up_clear(jcol,jlev) + flux%sw_dn(jcol,jlev) = flux%cloud_cover_sw(jcol) *flux%sw_dn(jcol,jlev) & + & + (1.0_jprb - flux%cloud_cover_sw(jcol))*flux%sw_dn_clear(jcol,jlev) + if (allocated(flux%sw_dn_direct)) then + flux%sw_dn_direct(jcol,jlev) = flux%cloud_cover_sw(jcol) *flux%sw_dn_direct(jcol,jlev) & + & + (1.0_jprb - flux%cloud_cover_sw(jcol))*flux%sw_dn_direct_clear(jcol,jlev) + end if + + else + ! No cloud in profile and clear-sky fluxes already + ! calculated: copy them over + flux%sw_up(jcol,jlev) = flux%sw_up_clear(jcol,jlev) + flux%sw_dn(jcol,jlev) = flux%sw_dn_clear(jcol,jlev) + if (allocated(flux%sw_dn_direct)) then + flux%sw_dn_direct(jcol,jlev) = flux%sw_dn_direct_clear(jcol,jlev) + end if + + end if ! Cloud is present in profile + else + ! Set fluxes to zero if sun is below the horizon + flux%sw_up(jcol,jlev) = 0.0_jprb + flux%sw_dn(jcol,jlev) = 0.0_jprb + if (allocated(flux%sw_dn_direct)) then + flux%sw_dn_direct(jcol,jlev) = 0.0_jprb + end if + flux%sw_up_clear(jcol,jlev) = 0.0_jprb + flux%sw_dn_clear(jcol,jlev) = 0.0_jprb + if (allocated(flux%sw_dn_direct_clear)) then + flux%sw_dn_direct_clear(jcol,jlev) = 0.0_jprb + end if + end if ! Sun above horizon + end do ! Loop over columns + end do ! Loop over levels + !$OMP END TARGET TEAMS DISTRIBUTE PARALLEL DO + + !$OMP TARGET EXIT DATA MAP(DELETE: flux_up, flux_dn_diffuse, flux_dn_direct, & + !$OMP flux_up_clear, flux_dn_diffuse_clear, flux_dn_direct_clear, & + !$OMP sample_val, frac, frac_std, overlap_param, & + !$OMP cum_cloud_cover, pair_cloud_cover, cum_product, ibegin, iend) + + !$OMP TARGET EXIT DATA MAP(DELETE: ref_clear, trans_clear, ref_dir_clear, & + !$OMP& od_scaling, tmp_work_source,& + !$OMP& ref_dir, trans_dir_diff, trans_dir_dir, reflectance, transmittance) + +#if defined(__amdflang__) + !$OMP END TARGET DATA +#endif + +#ifdef HAVE_ROCTX + call roctxEndRange +#endif + + if (lhook) call dr_hook('radiation_mcica_omp_sw:solver_mcica_omp_sw',1,hook_handle) + + end subroutine solver_mcica_omp_sw + +end module radiation_mcica_omp_sw diff --git a/radiation/radiation_random_numbers.F90 b/radiation/radiation_random_numbers.F90 index 6bd674f2..964b6495 100644 --- a/radiation/radiation_random_numbers.F90 +++ b/radiation/radiation_random_numbers.F90 @@ -45,8 +45,15 @@ module radiation_random_numbers implicit none + !$omp declare target(uniform_distribution_omp) + +#if defined(__INTEL_COMPILER) || (defined(__NVCOMPILER) && defined(OMPGPU)) + ! This is needed in order for the intel llvm ifx compiler to build this successfully + public +#else public :: rng_type, IRngMinstdVector, IRngNative, initialize_acc, & - & uniform_distribution_acc, IMinstdA0, IMinstdA, IMinstdM + & uniform_distribution_acc, IMinstdA0, IMinstdA, IMinstdM, IMinstdScale, uniform_distribution_omp +#endif enum, bind(c) enumerator IRngNative, & ! Built-in Fortran-90 RNG @@ -343,5 +350,21 @@ function uniform_distribution_acc(istate) result(randnum) end function uniform_distribution_acc + !--------------------------------------------------------------------- + ! Populate vector "randnum" with pseudo-random numbers; if rannum is + ! of length greater than nmaxstreams (specified when the generator + ! was initialized) then only the first nmaxstreams elements will be + ! assigned. + subroutine uniform_distribution_omp(istate,randnum) + + integer(kind=jpib), intent(inout) :: istate + real(kind=jprb), intent(out) :: randnum + + ! C++ minstd_rand algorithm + istate = mod(IMinstdA * istate, IMinstdM) + randnum = IMinstdScale * istate + + end subroutine uniform_distribution_omp + end module radiation_random_numbers diff --git a/radiation/radiation_two_stream.F90 b/radiation/radiation_two_stream.F90 index 69372b52..26bb2d4f 100644 --- a/radiation/radiation_two_stream.F90 +++ b/radiation/radiation_two_stream.F90 @@ -43,6 +43,15 @@ module radiation_two_stream ! Uncomment the following to turn Dr Hook on. !#define DO_DR_HOOK_TWO_STREAM + !$omp declare target(calc_two_stream_gammas_sw_single_band_omp) + !$omp declare target(calc_reflectance_transmittance_sw_single_band_omp) + !$omp declare target(calc_ref_trans_sw_omp) + !$omp declare target(calc_ref_trans_sw_single_level_omp) + + !$omp declare target(calc_ref_trans_lw_single_level_omp) + !$omp declare target(calc_no_scattering_transmittance_lw_omp) + !$omp declare target(calc_no_scattering_transmittance_lw_single_cell_omp) + contains !--------------------------------------------------------------------- @@ -1078,4 +1087,594 @@ subroutine calc_frac_scattered_diffuse_sw(ng, od, & end subroutine calc_frac_scattered_diffuse_sw + !--------------------------------------------------------------------- + ! + ! OpenMP routines + ! + !--------------------------------------------------------------------- + + + !--------------------------------------------------------------------- + ! OpenMP-optimized variant that avoids the local allocation of gamma + ! coefficients + subroutine calc_ref_trans_lw_single_level_omp(jg, ng, & + & od, ssa, asymmetry, planck, & + & reflectance, transmittance, source_up, source_dn) + + integer, intent(in) :: jg, ng + + ! Optical depth and single scattering albedo + real(jprb), intent(in) :: od + + ! Single scattering albedo and asymmetry factor + real(jprb), intent(in) :: ssa, asymmetry + + ! The Planck terms (functions of temperature) at the top and + ! bottom of the layer + real(jprb), intent(in), dimension(ng,2) :: planck + + ! The diffuse reflectance and transmittance, i.e. the fraction of + ! diffuse radiation incident on a layer from either top or bottom + ! that is reflected back or transmitted through + real(jprb), intent(out), dimension(ng) :: reflectance, transmittance + + ! The upward emission at the top of the layer and the downward + ! emission at its base, due to emission from within the layer + real(jprb), intent(out), dimension(ng) :: source_up, source_dn + + real(jprb) :: gamma1, gamma2 + + ! The two transfer coefficients from the two-stream + ! differential equations + !real(jprb), dimension(ng) :: k_exponent, exponential + real(jprb) :: reftrans_factor + real(jprb) :: exponential2 ! = exp(-2*k_exponent*od) + real(jprb) :: coeff, coeff_up_top, coeff_up_bot, coeff_dn_top, coeff_dn_bot, factor + + ! + ! Ideally, we would use the associate statement below, however this doesn't + ! work with NVCOMPILER with OpenMP offload. Thus, we just use the more the array + ! that is pointed to :( + ! +#if defined(__amdflang__) + associate (exponential=>transmittance, k_exponent=>source_up) +#endif + + factor = (LwDiffusivityWP * 0.5_jprb) * ssa + gamma1 = LwDiffusivityWP - factor*(1.0_jprb + asymmetry) + gamma2 = factor * (1.0_jprb - asymmetry) +#if defined(__amdflang__) + k_exponent(jg) = sqrt(max((gamma1 - gamma2) * (gamma1 + gamma2), & + 1.0e-12_jprb)) ! Eq 18 of Meador & Weaver (1980) + + exponential(jg) = exp(-k_exponent(jg)*od) +#else + source_up(jg) = sqrt(max((gamma1 - gamma2) * (gamma1 + gamma2), & + 1.0e-12_jprb)) ! Eq 18 of Meador & Weaver (1980) + + transmittance(jg) = exp(-source_up(jg)*od) +#endif + + if (od > 1.0e-3_jprb) then +#if defined(__amdflang__) + exponential2 = exponential(jg)*exponential(jg) + reftrans_factor = 1.0 / (k_exponent(jg) + gamma1 + (k_exponent(jg) - gamma1)*exponential2) +#else + exponential2 = transmittance(jg)*transmittance(jg) + reftrans_factor = 1.0 / (source_up(jg) + gamma1 + (source_up(jg) - gamma1)*exponential2) +#endif + ! Meador & Weaver (1980) Eq. 25 + reflectance(jg) = gamma2 * (1.0_jprb - exponential2) * reftrans_factor + ! Meador & Weaver (1980) Eq. 26 +#if defined(__amdflang__) + transmittance(jg) = 2.0_jprb * k_exponent(jg) * exponential(jg) * reftrans_factor +#else + transmittance(jg) = 2.0_jprb * source_up(jg) * transmittance(jg) * reftrans_factor +#endif + ! Compute upward and downward emission assuming the Planck + ! function to vary linearly with optical depth within the layer + ! (e.g. Wiscombe , JQSRT 1976). + + ! Stackhouse and Stephens (JAS 1991) Eqs 5 & 12 + coeff = (planck(jg,2)-planck(jg,1)) / (od*(gamma1+gamma2)) + coeff_up_top = coeff + planck(jg,1) + coeff_up_bot = coeff + planck(jg,2) + coeff_dn_top = -coeff + planck(jg,1) + coeff_dn_bot = -coeff + planck(jg,2) + source_up(jg) = coeff_up_top - reflectance(jg) * coeff_dn_top - transmittance(jg) * coeff_up_bot + source_dn(jg) = coeff_dn_bot - reflectance(jg) * coeff_up_bot - transmittance(jg) * coeff_dn_top + else if (od < 1.0e-8_jprb) then + source_up(jg) = 0.0_jprb + source_dn(jg) = 0.0_jprb + else + reflectance(jg) = gamma2 * od +#if defined(__amdflang__) + transmittance(jg) = (1.0_jprb - k_exponent(jg)*od) / (1.0_jprb + od*(gamma1-k_exponent(jg))) +#else + transmittance(jg) = (1.0_jprb - source_up(jg)*od) / (1.0_jprb + od*(gamma1-source_up(jg))) +#endif + source_up(jg) = (1.0_jprb - reflectance(jg) - transmittance(jg)) & + & * 0.5 * (planck(jg,1) + planck(jg,2)) + source_dn(jg) = source_up(jg) + end if + +#if defined(__amdflang__) + end associate +#endif + + end subroutine calc_ref_trans_lw_single_level_omp + + !--------------------------------------------------------------------- + ! Compute the longwave transmittance to diffuse radiation in the + ! no-scattering case, as well as the upward flux at the top and the + ! downward flux at the base of the layer due to emission from within + ! the layer assuming a linear variation of Planck function within + ! the layer. + subroutine calc_no_scattering_transmittance_lw_omp(jg, ng, nlev, & + & od, planck, transmittance, source_up, source_dn) + + integer, intent(in) :: jg, ng, nlev + + ! Optical depth and single scattering albedo + real(jprb), intent(in), dimension(ng,nlev) :: od + + ! The Planck terms (functions of temperature) at the top and + ! bottom of the layer + real(jprb), intent(in), dimension(ng,nlev+1) :: planck + + ! The diffuse transmittance, i.e. the fraction of diffuse + ! radiation incident on a layer from either top or bottom that is + ! reflected back or transmitted through + real(jprb), intent(out), dimension(ng,nlev) :: transmittance + + ! The upward emission at the top of the layer and the downward + ! emission at its base, due to emission from within the layer + real(jprb), intent(out), dimension(ng,nlev) :: source_up, source_dn + + real(jprb) :: coeff, coeff_up_top, coeff_up_bot, coeff_dn_top, coeff_dn_bot !, planck_mean + + integer :: jlev + + do jlev = 1, nlev + ! Compute upward and downward emission assuming the Planck + ! function to vary linearly with optical depth within the layer + ! (e.g. Wiscombe , JQSRT 1976). + coeff = LwDiffusivityWP*od(jg,jlev) +!#ifdef DWD_TWO_STREAM_OPTIMIZATIONS + transmittance(jg,jlev) = exp(-coeff) + !#endif + if (od(jg,jlev) > 1.0e-3_jprb) then + ! Simplified from calc_reflectance_transmittance_lw above + coeff = (planck(jg,jlev+1)-planck(jg,jlev)) / coeff + coeff_up_top = coeff + planck(jg,jlev) + coeff_up_bot = coeff + planck(jg,jlev+1) + coeff_dn_top = -coeff + planck(jg,jlev) + coeff_dn_bot = -coeff + planck(jg,jlev+1) + source_up(jg,jlev) = coeff_up_top - transmittance(jg,jlev) * coeff_up_bot + source_dn(jg,jlev) = coeff_dn_bot - transmittance(jg,jlev) * coeff_dn_top + else + ! Linear limit at low optical depth + source_up(jg,jlev) = coeff * 0.5_jprb * (planck(jg,jlev)+planck(jg,jlev+1)) + source_dn(jg,jlev) = source_up(jg,jlev) + end if + end do + + end subroutine calc_no_scattering_transmittance_lw_omp + + !--------------------------------------------------------------------- + ! Compute the longwave transmittance to diffuse radiation in the + ! no-scattering case, as well as the upward flux at the top and the + ! downward flux at the base of the layer due to emission from within + ! the layer assuming a linear variation of Planck function within + ! the layer. + subroutine calc_no_scattering_transmittance_lw_single_cell_omp( & + & od, planck_top, planck_bot, transmittance, source_up, source_dn) + + ! Optical depth and single scattering albedo as scalar + real(jprb), intent(in) :: od + + ! The Planck terms (functions of temperature) at the top and + ! bottom of the layer + real(jprb), intent(in) :: planck_top, planck_bot + + ! The diffuse transmittance, i.e. the fraction of diffuse + ! radiation incident on a layer from either top or bottom that is + ! reflected back or transmitted through + real(jprb), intent(out) :: transmittance + + ! The upward emission at the top of the layer and the downward + ! emission at its base, due to emission from within the layer + real(jprb), intent(out) :: source_up, source_dn + + real(jprb) :: coeff, coeff_up_top, coeff_up_bot, coeff_dn_top, coeff_dn_bot !, planck_mean + + ! Compute upward and downward emission assuming the Planck + ! function to vary linearly with optical depth within the layer + ! (e.g. Wiscombe , JQSRT 1976). + coeff = LwDiffusivityWP*od + transmittance = exp(-coeff) + if (od > 1.0e-3_jprb) then + ! Simplified from calc_reflectance_transmittance_lw above + coeff = (planck_bot-planck_top) / coeff + coeff_up_top = coeff + planck_top + coeff_up_bot = coeff + planck_bot + coeff_dn_top = -coeff + planck_top + coeff_dn_bot = -coeff + planck_bot + source_up = coeff_up_top - transmittance * coeff_up_bot + source_dn = coeff_dn_bot - transmittance * coeff_dn_top + else + ! Linear limit at low optical depth + source_up = coeff * 0.5_jprb * (planck_top+planck_bot) + source_dn = source_up + end if + + end subroutine calc_no_scattering_transmittance_lw_single_cell_omp + + !--------------------------------------------------------------------- + ! Calculate the two-stream coefficients gamma1-gamma4 in the + ! shortwave + subroutine calc_two_stream_gammas_sw_single_band_omp(jg, mu0, ssa, g, & + & gamma1, gamma2, gamma3) + + integer, intent(in) :: jg + ! Cosine of solar zenith angle, single scattering albedo and + ! asymmetry factor: + real(jprb), intent(in) :: mu0 + real(jprb), intent(in) :: ssa, g + real(jprb), intent(out) :: gamma1, gamma2, gamma3 + + real(jprb) :: factor + + ! Zdunkowski "PIFM" (Zdunkowski et al., 1980; Contributions to + ! Atmospheric Physics 53, 147-66) +! Added for DWD (2020) +!NEC$ shortloop + ! gamma1 = 2.0_jprb - ssa * (1.25_jprb + 0.75_jprb*g) + ! gamma2 = 0.75_jprb *(ssa * (1.0_jprb - g)) + ! gamma3 = 0.5_jprb - (0.75_jprb*mu0)*g + ! Optimized version: + factor = 0.75_jprb*g + gamma1 = 2.0_jprb - ssa * (1.25_jprb + factor) + gamma2 = ssa * (0.75_jprb - factor) + gamma3 = 0.5_jprb - mu0*factor + + end subroutine calc_two_stream_gammas_sw_single_band_omp + + !--------------------------------------------------------------------- + ! Compute the shortwave reflectance and transmittance to diffuse + ! radiation using the Meador & Weaver formulas, as well as the + ! "direct" reflection and transmission, which really means the rate + ! of transfer of direct solar radiation (into a plane perpendicular + ! to the direct beam) into diffuse upward and downward streams at + ! the top and bottom of the layer, respectively. Finally, + ! trans_dir_dir is the transmittance of the atmosphere to direct + ! radiation with no scattering. + subroutine calc_reflectance_transmittance_sw_single_band_omp(jg, ng, mu0, od, ssa, & + & gamma1, gamma2, gamma3, ref_diff, trans_diff, & + & ref_dir, trans_dir_diff, trans_dir_dir) + + integer, intent(in) :: jg, ng + + ! Cosine of solar zenith angle + real(jprb), intent(in) :: mu0 + + ! Optical depth and single scattering albedo + real(jprb), intent(in) :: od, ssa + + ! The three transfer coefficients from the two-stream + ! differentiatial equations (computed by calc_two_stream_gammas) + real(jprb), intent(in) :: gamma1, gamma2, gamma3 + + ! The direct reflectance and transmittance, i.e. the fraction of + ! incoming direct solar radiation incident at the top of a layer + ! that is either reflected back (ref_dir) or scattered but + ! transmitted through the layer to the base (trans_dir_diff) + real(jprb), intent(out), dimension(ng) :: ref_dir, trans_dir_diff + + ! The diffuse reflectance and transmittance, i.e. the fraction of + ! diffuse radiation incident on a layer from either top or bottom + ! that is reflected back or transmitted through + real(jprb), intent(out), dimension(ng) :: ref_diff, trans_diff + + ! Transmittance of the direct been with no scattering + real(jprb), intent(out), dimension(ng) :: trans_dir_dir + + real(jprd) :: gamma4, alpha1, alpha2, k_exponent, reftrans_factor + real(jprb) :: exponential0 ! = exp(-od/mu0) + real(jprd) :: exponential ! = exp(-k_exponent*od) + real(jprd) :: exponential2 ! = exp(-2*k_exponent*od) + real(jprd) :: k_mu0, k_gamma3, k_gamma4 + real(jprd) :: k_2_exponential, od_over_mu0 + + ! Local value of cosine of solar zenith angle, in case it needs to be + ! tweaked to avoid near division by zero. This is intentionally in working + ! precision (jprb) rather than fixing at double precision (jprd). + real(jprb) :: mu0_local + +! Added for DWD (2020) +!NEC$ shortloop + + gamma4 = 1.0_jprd - gamma3 + alpha1 = gamma1*gamma4 + gamma2*gamma3 ! Eq. 16 + alpha2 = gamma1*gamma3 + gamma2*gamma4 ! Eq. 17 + + k_exponent = sqrt(max((gamma1 - gamma2) * (gamma1 + gamma2), & + & 1.0e-12_jprd)) ! Eq 18 + + ! We had a rare crash where k*mu0 was within around 1e-13 of 1, + ! leading to ref_dir and trans_dir_diff being well outside the range + ! 0-1. The following approach is appropriate when k_exponent is double + ! precision and mu0_local is single precision, although work is needed + ! to make this entire routine secure in single precision. + mu0_local = mu0 + if (abs(1.0_jprd - k_exponent*mu0) < 1000.0_jprd * epsilon(1.0_jprd)) then + mu0_local = mu0 * (1.0_jprb + SIGN(1._jprd,k_exponent*mu0-1._jprd)*10.0_jprb*epsilon(1.0_jprb)) + end if + + od_over_mu0 = max(od / mu0_local, 0.0_jprd) + + ! Note that if the minimum value is reduced (e.g. to 1.0e-24) + ! then noise starts to appear as a function of solar zenith + ! angle + k_mu0 = k_exponent*mu0_local + k_gamma3 = k_exponent*gamma3 + k_gamma4 = k_exponent*gamma4 + ! Check for mu0 <= 0! + exponential0 = exp(-od_over_mu0) + trans_dir_dir(jg) = exponential0 + exponential = exp(-k_exponent*od) + + exponential2 = exponential*exponential + k_2_exponential = 2.0_jprd * k_exponent * exponential + + reftrans_factor = 1.0_jprd / (k_exponent + gamma1 + (k_exponent - gamma1)*exponential2) + + ! Meador & Weaver (1980) Eq. 25 + ref_diff(jg) = gamma2 * (1.0_jprd - exponential2) * reftrans_factor + + ! Meador & Weaver (1980) Eq. 26 + trans_diff(jg) = k_2_exponential * reftrans_factor + + ! Here we need mu0 even though it wasn't in Meador and Weaver + ! because we are assuming the incoming direct flux is defined + ! to be the flux into a plane perpendicular to the direction of + ! the sun, not into a horizontal plane + reftrans_factor = mu0_local * ssa * reftrans_factor / (1.0_jprd - k_mu0*k_mu0) + + ! Meador & Weaver (1980) Eq. 14, multiplying top & bottom by + ! exp(-k_exponent*od) in case of very high optical depths + ref_dir(jg) = reftrans_factor & + & * ( (1.0_jprd - k_mu0) * (alpha2 + k_gamma3) & + & -(1.0_jprd + k_mu0) * (alpha2 - k_gamma3)*exponential2 & + & -k_2_exponential*(gamma3 - alpha2*mu0_local)*exponential0) + + ! Meador & Weaver (1980) Eq. 15, multiplying top & bottom by + ! exp(-k_exponent*od), minus the 1*exp(-od/mu0) term representing direct + ! unscattered transmittance. + trans_dir_diff(jg) = reftrans_factor * ( k_2_exponential*(gamma4 + alpha1*mu0_local) & + & - exponential0 & + & * ( (1.0_jprd + k_mu0) * (alpha1 + k_gamma4) & + & -(1.0_jprd - k_mu0) * (alpha1 - k_gamma4) * exponential2) ) + + ! Final check that ref_dir + trans_dir_diff <= 1 + ref_dir(jg) = max(0.0_jprb, min(ref_dir(jg), 1.0_jprb)) + trans_dir_diff(jg) = max(0.0_jprb, min(trans_dir_diff(jg), 1.0_jprb-ref_dir(jg))) + + end subroutine calc_reflectance_transmittance_sw_single_band_omp + + !--------------------------------------------------------------------- + ! OpenMP-optimized variant that avoids the local allocation of gamma + ! coefficients + subroutine calc_ref_trans_sw_omp(jg, ng, nlev, mu0, od, ssa, & + & asymmetry, ref_diff, trans_diff, & + & ref_dir, trans_dir_diff, trans_dir_dir) + + implicit none + + integer, intent(in) :: jg, ng, nlev + + ! Cosine of solar zenith angle + real(jprb), intent(in) :: mu0 + + ! Optical depth and single scattering albedo + real(jprb), intent(in), dimension(ng, nlev) :: od, ssa, asymmetry + + ! The direct reflectance and transmittance, i.e. the fraction of + ! incoming direct solar radiation incident at the top of a layer + ! that is either reflected back (ref_dir) or scattered but + ! transmitted through the layer to the base (trans_dir_diff) + real(jprb), intent(out), dimension(ng, nlev) :: ref_dir, trans_dir_diff + + ! The diffuse reflectance and transmittance, i.e. the fraction of + ! diffuse radiation incident on a layer from either top or bottom + ! that is reflected back or transmitted through + real(jprb), intent(out), dimension(ng, nlev) :: ref_diff, trans_diff + + ! Transmittance of the direct been with no scattering + real(jprb), intent(out), dimension(ng, nlev) :: trans_dir_dir + + real(jprb) :: gamma1, gamma2, gamma3, gamma4 + real(jprb) :: alpha1, alpha2, k_exponent + real(jprb) :: exponential ! = exp(-k_exponent*od) + + real(jprb) :: reftrans_factor, factor + real(jprb) :: exponential2 ! = exp(-2*k_exponent*od) + real(jprb) :: k_mu0, k_gamma3, k_gamma4 + real(jprb) :: k_2_exponential, one_minus_kmu0_sqr + integer :: jlev + + ! GPU-capable and vector-optimized version for ICON + do jlev = 1, nlev + + trans_dir_dir(jg,jlev) = max(-max(od(jg,jlev) * (1.0_jprb/mu0),0.0_jprb),-1000.0_jprb) + trans_dir_dir(jg,jlev) = exp(trans_dir_dir(jg,jlev)) + + ! Zdunkowski "PIFM" (Zdunkowski et al., 1980; Contributions to + ! Atmospheric Physics 53, 147-66) + factor = 0.75_jprb*asymmetry(jg,jlev) + + gamma1 = 2.0_jprb - ssa(jg,jlev) * (1.25_jprb + factor) + gamma2 = ssa(jg,jlev) * (0.75_jprb - factor) + gamma3 = 0.5_jprb - mu0*factor + gamma4 = 1.0_jprb - gamma3 + + alpha1 = gamma1*gamma4 + gamma2*gamma3 ! Eq. 16 + alpha2 = gamma1*gamma3 + gamma2*gamma4 ! Eq. 17 +#ifdef PARKIND1_SINGLE + k_exponent = sqrt(max((gamma1 - gamma2) * (gamma1 + gamma2), 1.0e-6_jprb)) ! Eq 18 +#else + k_exponent = sqrt(max((gamma1 - gamma2) * (gamma1 + gamma2), 1.0e-12_jprb)) ! Eq 18 +#endif + + exponential = exp(-k_exponent*od(jg,jlev)) + + k_mu0 = k_exponent*mu0 + one_minus_kmu0_sqr = 1.0_jprb - k_mu0*k_mu0 + k_gamma3 = k_exponent*gamma3 + k_gamma4 = k_exponent*gamma4 + exponential2 = exponential*exponential + k_2_exponential = 2.0_jprb * k_exponent * exponential + reftrans_factor = 1.0_jprb / (k_exponent + gamma1 + (k_exponent - gamma1)*exponential2) + + ! Meador & Weaver (1980) Eq. 25 + ref_diff(jg,jlev) = gamma2 * (1.0_jprb - exponential2) * reftrans_factor + + ! Meador & Weaver (1980) Eq. 26 + trans_diff(jg,jlev) = k_2_exponential * reftrans_factor + + ! Here we need mu0 even though it wasn't in Meador and Weaver + ! because we are assuming the incoming direct flux is defined to + ! be the flux into a plane perpendicular to the direction of the + ! sun, not into a horizontal plane + reftrans_factor = mu0 * ssa(jg,jlev) * reftrans_factor & + & / merge(one_minus_kmu0_sqr, epsilon(1.0_jprb), abs(one_minus_kmu0_sqr) > epsilon(1.0_jprb)) + + ! Meador & Weaver (1980) Eq. 14, multiplying top & bottom by + ! exp(-k_exponent*od) in case of very high optical depths + ref_dir(jg,jlev) = reftrans_factor & + & * ( (1.0_jprb - k_mu0) * (alpha2 + k_gamma3) & + & -(1.0_jprb + k_mu0) * (alpha2 - k_gamma3)*exponential2 & + & -k_2_exponential*(gamma3 - alpha2*mu0)*trans_dir_dir(jg,jlev) ) + + ! Meador & Weaver (1980) Eq. 15, multiplying top & bottom by + ! exp(-k_exponent*od), minus the 1*exp(-od/mu0) term + ! representing direct unscattered transmittance. + trans_dir_diff(jg,jlev) = reftrans_factor * ( k_2_exponential*(gamma4 + alpha1*mu0) & + & - trans_dir_dir(jg,jlev) & + & * ( (1.0_jprb + k_mu0) * (alpha1 + k_gamma4) & + & -(1.0_jprb - k_mu0) * (alpha1 - k_gamma4) * exponential2) ) + + ! Final check that ref_dir + trans_dir_diff <= 1 + ref_dir(jg,jlev) = max(0.0_jprb, min(ref_dir(jg,jlev), mu0*(1.0_jprb-trans_dir_dir(jg,jlev)))) + trans_dir_diff(jg,jlev) = max(0.0_jprb, min(trans_dir_diff(jg,jlev), mu0*(1.0_jprb-trans_dir_dir(jg,jlev))-ref_dir(jg,jlev))) + + end do + + end subroutine calc_ref_trans_sw_omp + + !--------------------------------------------------------------------- + ! OpenMP-optimized variant that avoids the local allocation of gamma + ! coefficients + subroutine calc_ref_trans_sw_single_level_omp(jg, ng, mu0, od, ssa, & + & asymmetry, ref_diff, trans_diff, & + & ref_dir, trans_dir_diff, trans_dir_dir) + + implicit none + + integer, intent(in) :: jg, ng + + ! Cosine of solar zenith angle + real(jprb), intent(in) :: mu0 + + ! Optical depth and single scattering albedo + real(jprb), intent(in) :: od, ssa, asymmetry + + ! The direct reflectance and transmittance, i.e. the fraction of + ! incoming direct solar radiation incident at the top of a layer + ! that is either reflected back (ref_dir) or scattered but + ! transmitted through the layer to the base (trans_dir_diff) + real(jprb), intent(out), dimension(ng) :: ref_dir, trans_dir_diff + + ! The diffuse reflectance and transmittance, i.e. the fraction of + ! diffuse radiation incident on a layer from either top or bottom + ! that is reflected back or transmitted through + real(jprb), intent(out), dimension(ng) :: ref_diff, trans_diff + + ! Transmittance of the direct been with no scattering + real(jprb), intent(out), dimension(ng) :: trans_dir_dir + + real(jprb) :: gamma1, gamma2, gamma3, gamma4 + real(jprb) :: alpha1, alpha2, k_exponent + real(jprb) :: exponential ! = exp(-k_exponent*od) + + real(jprb) :: reftrans_factor, factor + real(jprb) :: exponential2 ! = exp(-2*k_exponent*od) + real(jprb) :: k_mu0, k_gamma3, k_gamma4 + real(jprb) :: k_2_exponential, one_minus_kmu0_sqr + + ! GPU-capable and vector-optimized version for ICON + trans_dir_dir(jg) = max(-max(od * (1.0_jprb/mu0),0.0_jprb),-1000.0_jprb) + trans_dir_dir(jg) = exp(trans_dir_dir(jg)) + + ! Zdunkowski "PIFM" (Zdunkowski et al., 1980; Contributions to + ! Atmospheric Physics 53, 147-66) + factor = 0.75_jprb*asymmetry + + gamma1 = 2.0_jprb - ssa * (1.25_jprb + factor) + gamma2 = ssa * (0.75_jprb - factor) + gamma3 = 0.5_jprb - mu0*factor + gamma4 = 1.0_jprb - gamma3 + + alpha1 = gamma1*gamma4 + gamma2*gamma3 ! Eq. 16 + alpha2 = gamma1*gamma3 + gamma2*gamma4 ! Eq. 17 +#ifdef PARKIND1_SINGLE + k_exponent = sqrt(max((gamma1 - gamma2) * (gamma1 + gamma2), 1.0e-6_jprb)) ! Eq 18 +#else + k_exponent = sqrt(max((gamma1 - gamma2) * (gamma1 + gamma2), 1.0e-12_jprb)) ! Eq 18 +#endif + + exponential = exp(-k_exponent*od) + + k_mu0 = k_exponent*mu0 + one_minus_kmu0_sqr = 1.0_jprb - k_mu0*k_mu0 + k_gamma3 = k_exponent*gamma3 + k_gamma4 = k_exponent*gamma4 + exponential2 = exponential*exponential + k_2_exponential = 2.0_jprb * k_exponent * exponential + reftrans_factor = 1.0_jprb / (k_exponent + gamma1 + (k_exponent - gamma1)*exponential2) + + ! Meador & Weaver (1980) Eq. 25 + ref_diff(jg) = gamma2 * (1.0_jprb - exponential2) * reftrans_factor + + ! Meador & Weaver (1980) Eq. 26 + trans_diff(jg) = k_2_exponential * reftrans_factor + + ! Here we need mu0 even though it wasn't in Meador and Weaver + ! because we are assuming the incoming direct flux is defined to + ! be the flux into a plane perpendicular to the direction of the + ! sun, not into a horizontal plane + reftrans_factor = mu0 * ssa * reftrans_factor & + & / merge(one_minus_kmu0_sqr, epsilon(1.0_jprb), abs(one_minus_kmu0_sqr) > epsilon(1.0_jprb)) + + ! Meador & Weaver (1980) Eq. 14, multiplying top & bottom by + ! exp(-k_exponent*od) in case of very high optical depths + ref_dir(jg) = reftrans_factor & + & * ( (1.0_jprb - k_mu0) * (alpha2 + k_gamma3) & + & -(1.0_jprb + k_mu0) * (alpha2 - k_gamma3)*exponential2 & + & -k_2_exponential*(gamma3 - alpha2*mu0)*trans_dir_dir(jg) ) + + ! Meador & Weaver (1980) Eq. 15, multiplying top & bottom by + ! exp(-k_exponent*od), minus the 1*exp(-od/mu0) term + ! representing direct unscattered transmittance. + trans_dir_diff(jg) = reftrans_factor * ( k_2_exponential*(gamma4 + alpha1*mu0) & + & - trans_dir_dir(jg) & + & * ( (1.0_jprb + k_mu0) * (alpha1 + k_gamma4) & + & -(1.0_jprb - k_mu0) * (alpha1 - k_gamma4) * exponential2) ) + + ! Final check that ref_dir + trans_dir_diff <= 1 + ref_dir(jg) = max(0.0_jprb, min(ref_dir(jg), mu0*(1.0_jprb-trans_dir_dir(jg)))) + trans_dir_diff(jg) = max(0.0_jprb, min(trans_dir_diff(jg), mu0*(1.0_jprb-trans_dir_dir(jg))-ref_dir(jg))) + + end subroutine calc_ref_trans_sw_single_level_omp + end module radiation_two_stream