diff --git a/radiation/CMakeLists.txt b/radiation/CMakeLists.txt
index db5ae2fd..b032f0f2 100644
--- a/radiation/CMakeLists.txt
+++ b/radiation/CMakeLists.txt
@@ -49,6 +49,8 @@ set( radiation_SOURCES
         radiation_liquid_optics_socrates.F90
         radiation_lw_derivatives.F90
         radiation_matrix.F90
+        radiation_mcica_omp_lw.F90
+        radiation_mcica_omp_sw.F90
         radiation_mcica_acc_lw.F90
         radiation_mcica_acc_sw.F90
         radiation_mcica_lw.F90
diff --git a/radiation/radiation_adding_ica_lw.F90 b/radiation/radiation_adding_ica_lw.F90
index 4fdced20..990d5b69 100644
--- a/radiation/radiation_adding_ica_lw.F90
+++ b/radiation/radiation_adding_ica_lw.F90
@@ -21,6 +21,8 @@ module radiation_adding_ica_lw
 
   public
 
+  !$omp declare target(fast_adding_ica_lw_omp)
+  !$omp declare target(calc_fluxes_no_scattering_lw_omp)
 contains
 
   !---------------------------------------------------------------------
@@ -281,6 +283,125 @@ subroutine fast_adding_ica_lw(ncol, nlev, &
 
   end subroutine fast_adding_ica_lw
 
+  !---------------------------------------------------------------------
+  ! Use the scalar "adding" method to compute longwave flux profiles,
+  ! including scattering in cloudy layers only.
+  subroutine fast_adding_ica_lw_omp(jg, ng, nlev, &
+       &  reflectance, transmittance, source_up, source_dn, emission_surf, albedo_surf, &
+       &  is_clear_sky_layer, i_cloud_top, flux_dn_clear, &
+       &  flux_up, flux_dn, albedo, inv_denominator, source)
+
+    use parkind1, only           : jprb
+    implicit none
+
+    ! Inputs
+    integer, intent(in) :: jg ! spectral index
+    integer, intent(in) :: ng ! number of spectral bands
+    integer, intent(in) :: nlev ! number of levels
+
+    ! Surface emission (W m-2) and albedo
+    real(jprb), intent(in),  dimension(ng) :: emission_surf, albedo_surf
+
+    ! Diffuse reflectance and transmittance of each layer
+    real(jprb), intent(in),  dimension(ng, nlev)   :: reflectance, transmittance
+
+    ! Emission from each layer in an upward and downward direction
+    real(jprb), intent(in),  dimension(ng, nlev)   :: source_up, source_dn
+
+    ! Determine which layers are cloud-free
+    logical, intent(in) :: is_clear_sky_layer(nlev)
+
+    ! Index to highest cloudy layer
+    integer, intent(in) :: i_cloud_top
+
+    ! Pre-computed clear-sky downwelling fluxes (W m-2) at half-levels
+    real(jprb), intent(in), dimension(ng, nlev+1)  :: flux_dn_clear
+
+    ! Resulting fluxes (W m-2) at half-levels: diffuse upwelling and
+    ! downwelling
+    real(jprb), intent(out), dimension(ng, nlev+1) :: flux_up, flux_dn
+    real(jprb), intent(out), dimension(ng, nlev+1) :: albedo, inv_denominator
+
+    ! Upwelling radiation at each half-level due to emission below
+    ! that half-level (W m-2)
+    real(jprb), intent(out), dimension(ng, nlev+1) :: source
+
+    ! Loop index for model level and column
+    integer :: jlev
+
+    !
+    ! Ideally, we would use the associate statement below, however this doesn't
+    ! work with NVCOMPILER. Thus, we pass albedo and inv_denominator in from the
+    ! calling code. However, we pass it in as flux up and flux_dn. That is, the
+    ! flux_up and flux_dn are temporarilly reusable.
+    !
+    !associate(albedo=>flux_up, inv_denominator=>flux_dn)
+
+    albedo(jg,nlev+1) = albedo_surf(jg)
+
+    ! At the surface, the source is thermal emission
+    source(jg,nlev+1) = emission_surf(jg)
+
+    ! Work back up through the atmosphere and compute the albedo of
+    ! the entire earth/atmosphere system below that half-level, and
+    ! also the "source", which is the upwelling flux due to emission
+    ! below that level
+    do jlev = nlev,i_cloud_top,-1
+      if (is_clear_sky_layer(jlev)) then
+         ! Reflectance of this layer is zero, simplifying the expression
+         albedo(jg,jlev) = transmittance(jg,jlev)*transmittance(jg,jlev)*albedo(jg,jlev+1)
+         source(jg,jlev) = source_up(jg,jlev) &
+              &  + transmittance(jg,jlev) * (source(jg,jlev+1) &
+              &                    + albedo(jg,jlev+1)*source_dn(jg,jlev))
+      else
+         ! Loop over columns; explicit loop seems to be faster
+         ! Lacis and Hansen (1974) Eq 33, Shonk & Hogan (2008) Eq 10:
+         inv_denominator(jg,jlev+1) = 1.0_jprb &
+              &  / (1.0_jprb-albedo(jg,jlev+1)*reflectance(jg,jlev))
+         ! Shonk & Hogan (2008) Eq 9, Petty (2006) Eq 13.81:
+         albedo(jg,jlev) = reflectance(jg,jlev) + transmittance(jg,jlev)*transmittance(jg,jlev) &
+              &  * albedo(jg,jlev+1) * inv_denominator(jg,jlev+1)
+         ! Shonk & Hogan (2008) Eq 11:
+         source(jg,jlev) = source_up(jg,jlev) &
+              &  + transmittance(jg,jlev) * (source(jg,jlev+1) &
+              &                    + albedo(jg,jlev+1)*source_dn(jg,jlev)) &
+              &                   * inv_denominator(jg,jlev+1)
+      end if
+    end do
+
+    ! Copy over downwelling fluxes above cloud from clear sky
+    do jlev = 1,i_cloud_top
+       flux_dn(jg,jlev) = flux_dn_clear(jg,jlev)
+    enddo
+
+    ! Compute the fluxes above the highest cloud
+    flux_up(jg,i_cloud_top) = source(jg,i_cloud_top) &
+         &                 + albedo(jg,i_cloud_top)*flux_dn(jg,i_cloud_top)
+    do jlev = i_cloud_top-1,1,-1
+       flux_up(jg,jlev) = transmittance(jg,jlev)*flux_up(jg,jlev+1) + source_up(jg,jlev)
+    end do
+
+    ! Work back down through the atmosphere from cloud top computing
+    ! the fluxes at each half-level
+    do jlev = i_cloud_top,nlev
+      if (is_clear_sky_layer(jlev)) then
+         flux_dn(jg,jlev+1) = transmittance(jg,jlev)*flux_dn(jg,jlev) &
+              &               + source_dn(jg,jlev)
+         flux_up(jg,jlev+1) = albedo(jg,jlev+1)*flux_dn(jg,jlev+1) &
+              &               + source(jg,jlev+1)
+      else
+         ! Shonk & Hogan (2008) Eq 14 (after simplification):
+         flux_dn(jg,jlev+1) &
+              &  = (transmittance(jg,jlev)*flux_dn(jg,jlev) &
+              &     + reflectance(jg,jlev)*source(jg,jlev+1) &
+              &     + source_dn(jg,jlev)) * inv_denominator(jg,jlev+1)
+         ! Shonk & Hogan (2008) Eq 12:
+         flux_up(jg,jlev+1) = albedo(jg,jlev+1)*flux_dn(jg,jlev+1) &
+              &               + source(jg,jlev+1)
+      end if
+   end do
+
+  end subroutine fast_adding_ica_lw_omp
 
   !---------------------------------------------------------------------
   ! If there is no scattering then fluxes may be computed simply by
@@ -370,4 +491,61 @@ subroutine calc_fluxes_no_scattering_lw(ncol, nlev, &
 
   end subroutine calc_fluxes_no_scattering_lw
 
+
+  !---------------------------------------------------------------------
+  ! If there is no scattering then fluxes may be computed simply by
+  ! passing down through the atmosphere computing the downwelling
+  ! fluxes from the transmission and emission of each layer, and then
+  ! passing back up through the atmosphere to compute the upwelling
+  ! fluxes in the same way.
+  subroutine calc_fluxes_no_scattering_lw_omp(jg, ng, nlev, &
+       &  transmittance, source_up, source_dn, emission_surf, albedo_surf, flux_up, flux_dn)
+
+    use parkind1, only           : jprb
+    implicit none
+
+    ! Inputs
+    integer, intent(in) :: jg, ng
+    integer, intent(in) :: nlev ! number of levels
+
+    ! Surface emission (W m-2) and albedo
+    real(jprb), intent(in),  dimension(ng) :: emission_surf, albedo_surf
+
+    ! Diffuse reflectance and transmittance of each layer
+    real(jprb), intent(in),  dimension(ng, nlev)   :: transmittance
+
+    ! Emission from each layer in an upward and downward direction
+    real(jprb), intent(in),  dimension(ng, nlev)   :: source_up, source_dn
+
+    ! Resulting fluxes (W m-2) at half-levels: diffuse upwelling and
+    ! downwelling
+    real(jprb), intent(out), dimension(ng, nlev+1) :: flux_up, flux_dn
+
+    ! Loop index for model level
+    integer :: jlev, jcol
+
+    ! At top-of-atmosphere there is no diffuse downwelling radiation
+    flux_dn(jg,1) = 0.0_jprb
+
+    ! Work down through the atmosphere computing the downward fluxes
+    ! at each half-level
+! Added for DWD (2020)
+!NEC$ outerloop_unroll(8)
+    do jlev = 1,nlev
+       flux_dn(jg,jlev+1) = transmittance(jg,jlev)*flux_dn(jg,jlev) + source_dn(jg,jlev)
+    end do
+
+    ! Surface reflection and emission
+    flux_up(jg,nlev+1) = emission_surf(jg) + albedo_surf(jg) * flux_dn(jg,nlev+1)
+
+    ! Work back up through the atmosphere computing the upward fluxes
+    ! at each half-level
+! Added for DWD (2020)
+!NEC$ outerloop_unroll(8)
+    do jlev = nlev,1,-1
+       flux_up(jg,jlev) = transmittance(jg,jlev)*flux_up(jg,jlev+1) + source_up(jg,jlev)
+    end do
+
+  end subroutine calc_fluxes_no_scattering_lw_omp
+
 end module radiation_adding_ica_lw
diff --git a/radiation/radiation_adding_ica_sw.F90 b/radiation/radiation_adding_ica_sw.F90
index 90db91ad..67eb8d11 100644
--- a/radiation/radiation_adding_ica_sw.F90
+++ b/radiation/radiation_adding_ica_sw.F90
@@ -19,6 +19,7 @@ module radiation_adding_ica_sw
 
   public
 
+  !$omp declare target(adding_ica_sw_omp)
 contains
 
   subroutine adding_ica_sw(ncol, nlev, incoming_toa, &
@@ -162,4 +163,128 @@ subroutine adding_ica_sw(ncol, nlev, incoming_toa, &
 
   end subroutine adding_ica_sw
 
+  subroutine adding_ica_sw_omp(jg, ng, nlev, incoming_toa, &
+       &  albedo_surf_diffuse, albedo_surf_direct, cos_sza, &
+       &  reflectance, transmittance, ref_dir, trans_dir_diff, trans_dir_dir, &
+       &  flux_up, flux_dn_diffuse, flux_dn_direct, albedo, inv_denominator,&
+       &  source)
+
+    use parkind1, only           : jprb
+    implicit none
+
+    ! Inputs
+    integer, intent(in) :: jg, ng ! number of columns (may be spectral intervals)
+    integer, intent(in) :: nlev ! number of levels
+
+    ! Incoming downwelling solar radiation at top-of-atmosphere (W m-2)
+    real(jprb), intent(in),  dimension(ng)         :: incoming_toa
+
+    ! Surface albedo to diffuse and direct radiation
+    real(jprb), intent(in),  dimension(ng)         :: albedo_surf_diffuse, &
+         &                                              albedo_surf_direct
+
+    ! Cosine of the solar zenith angle
+    real(jprb), intent(in)                           :: cos_sza
+
+    ! Diffuse reflectance and transmittance of each layer
+    real(jprb), intent(in),  dimension(ng, nlev)   :: reflectance, transmittance
+
+    ! Fraction of direct-beam solar radiation entering the top of a
+    ! layer that is reflected back up or scattered forward into the
+    ! diffuse stream at the base of the layer
+    real(jprb), intent(in),  dimension(ng, nlev)   :: ref_dir, trans_dir_diff
+
+    ! Direct transmittance, i.e. fraction of direct beam that
+    ! penetrates a layer without being scattered or absorbed
+    real(jprb), intent(in),  dimension(ng, nlev)   :: trans_dir_dir
+
+    ! Resulting fluxes (W m-2) at half-levels: diffuse upwelling,
+    ! diffuse downwelling and direct downwelling
+    real(jprb), intent(out), dimension(ng, nlev+1) :: flux_up, flux_dn_diffuse, &
+         &                                              flux_dn_direct
+
+    real(jprb), intent(out), dimension(ng, nlev+1) :: albedo, inv_denominator
+
+    ! Upwelling radiation at each half-level due to scattering of the
+    ! direct beam below that half-level (W m-2)
+    real(jprb), intent(out), dimension(ng, nlev+1) :: source
+
+    ! Loop index for model level and column
+    integer :: jlev
+
+    ! Compute profile of direct (unscattered) solar fluxes at each
+    ! half-level by working down through the atmosphere
+    flux_dn_direct(jg,1) = incoming_toa(jg)
+    do jlev = 1,nlev
+      flux_dn_direct(jg,jlev+1) = flux_dn_direct(jg,jlev)*trans_dir_dir(jg,jlev)
+    end do
+
+    !
+    ! Ideally, we would use the associate statement below, however this doesn't
+    ! work with NVCOMPILER. Thus, we pass albedo and inv_denominator in from the
+    ! calling code however, we pass it in as flux up and flux_dn_diffuse. That is,
+    ! the flux_up and flux_dn_diffuse are temporarilly reusable.
+    !
+    !associate(albedo=>flux_up, inv_denominator=>flux_dn_diffuse)
+
+    albedo(jg,nlev+1) = albedo_surf_diffuse(jg)
+
+    ! At the surface, the direct solar beam is reflected back into the
+    ! diffuse stream
+    source(jg,nlev+1) = albedo_surf_direct(jg) * flux_dn_direct(jg,nlev+1) * cos_sza
+
+    ! Work back up through the atmosphere and compute the albedo of
+    ! the entire earth/atmosphere system below that half-level, and
+    ! also the "source", which is the upwelling flux due to direct
+    ! radiation that is scattered below that level
+! Added for DWD (2020)
+!NEC$ outerloop_unroll(8)
+    do jlev = nlev,1,-1
+      ! Next loop over columns. We could do this by indexing the
+      ! entire inner dimension as follows, e.g. for the first line:
+      !   inv_denominator(jg,jlev) = 1.0_jprb / (1.0_jprb-albedo(jg,jlev+1)*reflectance(jg,jlev))
+      ! and similarly for subsequent lines, but this slows down the
+      ! routine by a factor of 2!  Rather, we do it with an explicit
+      ! loop.
+
+      ! Lacis and Hansen (1974) Eq 33, Shonk & Hogan (2008) Eq 10:
+       inv_denominator(jg,jlev+1) = 1.0_jprb / (1.0_jprb-albedo(jg,jlev+1)*reflectance(jg,jlev))
+       ! Shonk & Hogan (2008) Eq 9, Petty (2006) Eq 13.81:
+       albedo(jg,jlev) = reflectance(jg,jlev) + transmittance(jg,jlev) * transmittance(jg,jlev) &
+            &                                     * albedo(jg,jlev+1) * inv_denominator(jg,jlev+1)
+       ! Shonk & Hogan (2008) Eq 11:
+       source(jg,jlev) = ref_dir(jg,jlev)*flux_dn_direct(jg,jlev) &
+            &  + transmittance(jg,jlev)*(source(jg,jlev+1) &
+            &        + albedo(jg,jlev+1)*trans_dir_diff(jg,jlev)*flux_dn_direct(jg,jlev)) &
+            &  * inv_denominator(jg,jlev+1)
+    end do
+
+    ! At top-of-atmosphere there is no diffuse downwelling radiation
+    flux_dn_diffuse(jg,1) = 0.0_jprb
+
+    ! At top-of-atmosphere, all upwelling radiation is due to
+    ! scattering by the direct beam below that level
+    flux_up(jg,1) = source(jg,1)
+
+    ! Work back down through the atmosphere computing the fluxes at
+    ! each half-level
+! Added for DWD (2020)
+!NEC$ outerloop_unroll(8)
+    do jlev = 1,nlev
+       ! Shonk & Hogan (2008) Eq 14 (after simplification):
+       flux_dn_diffuse(jg,jlev+1) &
+            &  = (transmittance(jg,jlev)*flux_dn_diffuse(jg,jlev) &
+            &     + reflectance(jg,jlev)*source(jg,jlev+1) &
+            &     + trans_dir_diff(jg,jlev)*flux_dn_direct(jg,jlev)) * inv_denominator(jg,jlev+1)
+       ! Shonk & Hogan (2008) Eq 12:
+       flux_up(jg,jlev+1) = albedo(jg,jlev+1)*flux_dn_diffuse(jg,jlev+1) &
+            &            + source(jg,jlev+1)
+       flux_dn_direct(jg,jlev) = flux_dn_direct(jg,jlev)*cos_sza
+    end do
+
+    flux_dn_direct(jg,nlev+1) = flux_dn_direct(jg,nlev+1)*cos_sza
+    !end associate
+
+  end subroutine adding_ica_sw_omp
+
 end module radiation_adding_ica_sw
diff --git a/radiation/radiation_cloud_cover.F90 b/radiation/radiation_cloud_cover.F90
index c0a81b81..af0aa55e 100644
--- a/radiation/radiation_cloud_cover.F90
+++ b/radiation/radiation_cloud_cover.F90
@@ -42,6 +42,7 @@ module radiation_cloud_cover
   ! Maximum cloud fraction distinguishable from 1
   real(jprb), parameter :: MaxCloudFrac = 1.0_jprb-epsilon(1.0_jprb)*10.0_jprb
 
+  !$omp declare target(beta2alpha)
 
 contains
 
diff --git a/radiation/radiation_cloud_generator_acc.F90 b/radiation/radiation_cloud_generator_acc.F90
index 39cf8cb7..9b54e19b 100644
--- a/radiation/radiation_cloud_generator_acc.F90
+++ b/radiation/radiation_cloud_generator_acc.F90
@@ -23,6 +23,10 @@
 
 module radiation_cloud_generator_acc
 
+  implicit none
+
+  !$omp declare target(cloud_generator_omp)
+
   public
 
 contains
@@ -261,4 +265,228 @@ subroutine cloud_generator_acc(ng, nlev, &
 
   end subroutine cloud_generator_acc
 
+
+    !---------------------------------------------------------------------
+  ! Generate scaling factors for the cloud optical depth to represent
+  ! cloud overlap, the overlap of internal cloud inhomogeneities and
+  ! the fractional standard deviation of these inhomogeneities, for
+  ! use in a Monte Carlo Independent Column Approximation radiation
+  ! scheme. All returned profiles contain cloud, and the total cloud
+  ! cover is also returned, so the calling function can then do a
+  ! weighted average of clear and cloudy skies; this is a way to
+  ! reduce the Monte Carlo noise in profiles with low cloud cover.
+  subroutine cloud_generator_omp(jg, ng, nlev, &
+    &  iseed, frac_threshold, &
+    &  frac, overlap_param, decorrelation_scaling, &
+    &  fractional_std, &
+    &  sample_ncdf, sample_nfsd, sample_fsd1, &
+    &  sample_inv_fsd_interval, sample_val, &
+    &  od_scaling, total_cloud_cover, &
+    &  ibegin, iend, &
+    &  cum_cloud_cover, &
+    &  pair_cloud_cover)
+
+    use parkind1,                 only : jprb, jpib
+    use radiation_random_numbers, only : IMinstdA0, IMinstdA, IMinstdM, uniform_distribution_omp
+    implicit none
+
+    ! Inputs
+    integer, intent(in)     :: jg    ! index into the g direction
+    integer, intent(in)     :: ng    ! number of g points
+    integer, intent(in)     :: nlev  ! number of model levels
+    integer, intent(in)     :: iseed ! seed for random number generator
+
+    ! Only cloud fractions above this threshold are considered to be
+    ! clouds
+    real(jprb), intent(in)  :: frac_threshold
+
+    ! Cloud fraction on full levels
+    real(jprb), intent(in)  :: frac(nlev)
+
+    ! Cloud overlap parameter for interfaces between model layers,
+    ! where 0 indicates random overlap and 1 indicates maximum-random
+    ! overlap
+    real(jprb), intent(in)  :: overlap_param(nlev-1)
+
+    ! Overlap parameter for internal inhomogeneities
+    real(jprb), intent(in)  :: decorrelation_scaling
+
+    ! Fractional standard deviation at each layer
+    real(jprb), intent(in)  :: fractional_std(nlev)
+
+    ! Object for sampling from a lognormal or gamma distribution
+    integer, intent(in)  :: sample_ncdf, sample_nfsd
+    real(jprb), intent(in)  :: sample_fsd1, sample_inv_fsd_interval
+    real(jprb), intent(in), dimension(:,:)  :: sample_val
+
+    ! Outputs
+
+    ! Cloud optical depth scaling factor, with 0 indicating clear sky
+    real(jprb), intent(out) :: od_scaling(ng,nlev)
+
+    ! Total cloud cover using cloud fraction and overlap parameter
+    real(jprb), intent(in) :: total_cloud_cover
+
+    ! Local variables
+
+    ! Cumulative cloud cover from TOA to the base of each layer
+    real(jprb), intent(in) :: cum_cloud_cover(nlev)
+
+    ! First and last cloudy layers
+    integer, intent(in) :: ibegin, iend
+
+    ! Scaled random number for finding cloud
+    real(jprb) :: trigger
+
+    ! Uniform deviates between 0 and 1
+    real(jprb) :: rand_top
+
+    ! Overlap parameter of inhomogeneities
+    real(jprb) :: overlap_param_inhom
+
+    real(jprb) :: rand_cloud, rand_inhom1, rand_inhom2
+
+    integer :: itrigger
+
+    ! Loop index for model level and g-point
+    integer :: jlev
+
+    ! Cloud cover of a pair of layers, and amount by which cloud at
+    ! next level increases total cloud cover as seen from above
+    real(jprb), intent(inout), dimension(nlev-1) :: pair_cloud_cover
+    real(jprb) :: overhang
+
+    integer          :: jcloud
+    ! Number of contiguous cloudy layers for which to compute optical
+    ! depth scaling
+    integer :: n_layers_to_scale
+
+    ! Is it time to fill the od_scaling variable?
+    logical :: do_fill_od_scaling
+
+    ! variables from manual inlining of sample_vec_from_pdf
+    ! Index to look-up table
+    integer    :: ifsd, icdf
+    ! Weights in bilinear interpolation
+    real(jprb) :: wfsd, wcdf
+
+    ! local variable for the acc rng
+    integer(kind=jpib) :: istate
+
+    if (total_cloud_cover >= frac_threshold) then
+      ! Loop over ng columns (this loop should be paralized as soon as acc RNG is available)
+       do jlev = 1,nlev
+          od_scaling(jg,jlev) = 0.0_jprb
+       end do
+
+       ! begin manuel inline of istate = initialize_acc(iseed, jg)
+       istate = REAL(ABS(iseed),jprb)
+       ! Use a modified (and vectorized) C++ minstd_rand0 algorithm to populate the state
+       istate = nint(mod( istate*jg*(1._jprb-0.05_jprb*jg+0.005_jprb*jg**2)*IMinstdA0, IMinstdM))
+
+       ! One warmup of the C++ minstd_rand algorithm
+       istate = mod(IMinstdA * istate, IMinstdM)
+       ! end manuel inline of istate = initialize_acc(iseed, jg)
+       call uniform_distribution_omp(istate, rand_top)
+
+       ! Find the cloud top height corresponding to the current
+       ! random number, and store in itrigger
+       trigger = rand_top * total_cloud_cover
+       itrigger = iend
+       !$ACC LOOP SEQ
+       do jlev = ibegin,iend
+         if (trigger <= cum_cloud_cover(jlev)) then
+           itrigger = min(jlev, itrigger)
+         end if
+       end do
+       ! manual inline: call generate_column_exp_ran >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
+       ! So far our vertically contiguous cloud contains only one layer
+       n_layers_to_scale = 1
+
+       ! Locate the clouds below this layer: first generate some more
+       ! random numbers
+       ! Loop from the layer below the local cloud top down to the
+       ! bottom-most cloudy layer
+       do jlev = itrigger+1,iend+1
+          do_fill_od_scaling = .false.
+          if (jlev <= iend) then
+             call uniform_distribution_omp(istate, rand_cloud)
+             if (n_layers_to_scale > 0) then
+                ! There is a cloud above, in which case the probability
+                ! of cloud in the layer below is as follows
+                if (rand_cloud*frac(jlev-1) &
+                     &  < frac(jlev) + frac(jlev-1) - pair_cloud_cover(jlev-1)) then
+                   ! Add another cloudy layer
+                   n_layers_to_scale = n_layers_to_scale + 1
+                else
+                   ! Reached the end of a contiguous set of cloudy layers and
+                   ! will compute the optical depth scaling immediately.
+                   do_fill_od_scaling = .true.
+                end if
+             else
+                overhang = cum_cloud_cover(jlev)-cum_cloud_cover(jlev-1)
+                ! There is clear-sky above, in which case the
+                ! probability of cloud in the layer below is as follows
+                if (rand_cloud*(cum_cloud_cover(jlev-1) - frac(jlev-1)) &
+                     &  < pair_cloud_cover(jlev-1) - overhang - frac(jlev-1)) then
+                   ! A new cloud top
+                   n_layers_to_scale = 1
+                end if
+             end if
+          else
+             ! We are at the bottom of the cloudy layers in the model,
+             ! so in a moment need to populate the od_scaling array
+             do_fill_od_scaling = .true.
+          end if ! (jlev <= iend)
+
+          if (do_fill_od_scaling) then
+             call uniform_distribution_omp(istate, rand_inhom1)
+             ! Loop through the sequence of cloudy layers
+             !$ACC LOOP SEQ PRIVATE(rand_inhom2, overlap_param_inhom, wcdf, icdf, &
+             !$ACC   wfsd, ifsd)
+             do jcloud = max(2,jlev-n_layers_to_scale),jlev-1
+
+                call uniform_distribution_omp(istate, rand_inhom2)
+
+                ! Set overlap parameter of inhomogeneities
+                overlap_param_inhom = overlap_param(jcloud-1)
+                if ( ibegin<=jcloud-1 .and. jcloud-1< iend .and. &
+                     & overlap_param(jcloud-1) > 0.0_jprb) then
+                   overlap_param_inhom = &
+                        & overlap_param(jcloud-1)**(1.0_jprb/decorrelation_scaling)
+                end if
+
+                ! Use second random number, and inhomogeneity overlap
+                ! parameter, to decide whether the first random number
+                ! should be repeated (corresponding to maximum overlap)
+                ! or not (corresponding to random overlap)
+                if ( jcloud > jlev-n_layers_to_scale .and. &
+                     & rand_inhom2 >= overlap_param_inhom) then
+                  call uniform_distribution_omp(istate, rand_inhom1)
+                end if
+                ! manual inline >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
+                ! Bilinear interpolation with bounds
+                wcdf = rand_inhom1 * (sample_ncdf-1) + 1.0_jprb
+                icdf = max(1, min(int(wcdf), sample_ncdf-1))
+                wcdf = max(0.0_jprb, min(wcdf - icdf, 1.0_jprb))
+
+                wfsd = (fractional_std(jcloud)-sample_fsd1) * sample_inv_fsd_interval + 1.0_jprb
+                ifsd = max(1, min(int(wfsd), sample_nfsd-1))
+                wfsd = max(0.0_jprb, min(wfsd - ifsd, 1.0_jprb))
+
+                od_scaling(jg,jcloud) =    (1.0_jprb-wcdf)*(1.0_jprb-wfsd) * sample_val(icdf  ,ifsd)   &
+                     & + (1.0_jprb-wcdf)*          wfsd  * sample_val(icdf  ,ifsd+1) &
+                     & +           wcdf *(1.0_jprb-wfsd) * sample_val(icdf+1,ifsd)   &
+                     & +           wcdf *          wfsd  * sample_val(icdf+1,ifsd+1)
+                ! manual inline <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+             end do
+
+             n_layers_to_scale = 0
+
+          end if ! (do_fill_od_scaling)
+       end do ! jlev = itrigger+1,iend+1
+    end if ! (total_cloud_cover < frac_threshold)
+
+  end subroutine cloud_generator_omp
+
 end module radiation_cloud_generator_acc
diff --git a/radiation/radiation_interface.F90 b/radiation/radiation_interface.F90
index 97cbf5ec..694b6aff 100644
--- a/radiation/radiation_interface.F90
+++ b/radiation/radiation_interface.F90
@@ -225,6 +225,8 @@ subroutine radiation(ncol, nlev, istartcol, iendcol, config, &
     use radiation_mcica_lw,       only : solver_mcica_lw
     use radiation_mcica_acc_sw,   only : solver_mcica_acc_sw
     use radiation_mcica_acc_lw,   only : solver_mcica_acc_lw
+    use radiation_mcica_omp_sw,   only : solver_mcica_omp_sw
+    use radiation_mcica_omp_lw,   only : solver_mcica_omp_lw
     use radiation_cloudless_sw,   only : solver_cloudless_sw
     use radiation_cloudless_lw,   only : solver_cloudless_lw
     use radiation_homogeneous_sw, only : solver_homogeneous_sw
@@ -319,6 +321,10 @@ subroutine radiation(ncol, nlev, istartcol, iendcol, config, &
     !$ACC             planck_hl, lw_emission, lw_albedo, sw_albedo_direct, &
     !$ACC             sw_albedo_diffuse, incoming_sw)
 
+    !$OMP TARGET ENTER DATA MAP(ALLOC: od_lw, ssa_lw, g_lw, od_lw_cloud, ssa_lw_cloud, g_lw_cloud, &
+    !$OMP                       od_sw, ssa_sw, g_sw, od_sw_cloud, ssa_sw_cloud, g_sw_cloud, &
+    !$OMP                       planck_hl, lw_emission, lw_albedo, sw_albedo_direct, &
+    !$OMP                       sw_albedo_diffuse, incoming_sw)
     if (thermodynamics%pressure_hl(istartcol,2) &
          &  < thermodynamics%pressure_hl(istartcol,1)) then
       ! Input arrays are arranged in order of decreasing pressure /
@@ -407,6 +413,7 @@ subroutine radiation(ncol, nlev, istartcol, iendcol, config, &
                &  od_lw, ssa_lw, g_lw, od_sw, ssa_sw, g_sw)
         end if
       else
+        !$OMP TARGET TEAMS DISTRIBUTE PARALLEL DO COLLAPSE(2)
         !$ACC PARALLEL DEFAULT(PRESENT) ASYNC(1)
         !$ACC LOOP GANG VECTOR COLLAPSE(2)
         do jcol = istartcol,iendcol
@@ -425,6 +432,7 @@ subroutine radiation(ncol, nlev, istartcol, iendcol, config, &
           end do
         end do
         !$ACC END PARALLEL
+        !$OMP END TARGET TEAMS DISTRIBUTE PARALLEL DO
       end if
 
       ! For diagnostic purposes, save these intermediate variables to
@@ -436,14 +444,17 @@ subroutine radiation(ncol, nlev, istartcol, iendcol, config, &
           write(rad_prop_file_name,'(a,a,i4.4,a,i4.4,a)') &
                &  rad_prop_base_file_name, '_', istartcol, '-',iendcol,'.nc'
         end if
-#ifdef _OPENACC
+#if defined(_OPENACC) || defined(OMPGPU)
+          !$OMP TARGET UPDATE FROM(od_lw, ssa_lw, g_lw, od_lw_cloud, ssa_lw_cloud, g_lw_cloud, &
+          !$OMP             planck_hl, lw_emission, lw_albedo, sw_albedo_direct, sw_albedo_diffuse, &
+          !$OMP             incoming_sw, od_sw, ssa_sw, g_sw, od_sw_cloud, ssa_sw_cloud, g_sw_cloud)
           !$ACC UPDATE HOST(od_lw, ssa_lw, g_lw, od_lw_cloud, ssa_lw_cloud, g_lw_cloud, &
           !$ACC             planck_hl, lw_emission, lw_albedo, sw_albedo_direct, sw_albedo_diffuse, &
           !$ACC             incoming_sw, od_sw, ssa_sw, g_sw, od_sw_cloud, ssa_sw_cloud, g_sw_cloud)
           call cloud%update_host(cloud)
           call flux%update_host(flux)
 #endif
-        call save_radiative_properties(trim(rad_prop_file_name), &
+          call save_radiative_properties(trim(rad_prop_file_name), &
              &  nlev, istartcol, iendcol, &
              &  config, single_level, thermodynamics, cloud, &
              &  planck_hl, lw_emission, lw_albedo, &
@@ -460,7 +471,9 @@ subroutine radiation(ncol, nlev, istartcol, iendcol, config, &
 
         !$ACC WAIT
         if (config%i_solver_lw == ISolverMcICA) then
-#ifdef _OPENACC
+#if defined(_OPENACC) || defined(OMPGPU)
+          !$OMP TARGET UPDATE FROM(od_lw, ssa_lw, g_lw, od_lw_cloud, ssa_lw_cloud, g_lw_cloud, planck_hl, &
+          !$OMP& lw_emission, lw_albedo)
           !$ACC UPDATE HOST(od_lw, ssa_lw, g_lw, od_lw_cloud, ssa_lw_cloud, g_lw_cloud, planck_hl, lw_emission, lw_albedo)
           !$ACC WAIT(1)
           call cloud%update_host(cloud)
@@ -472,17 +485,24 @@ subroutine radiation(ncol, nlev, istartcol, iendcol, config, &
                &  config, single_level, cloud, &
                &  od_lw, ssa_lw, g_lw, od_lw_cloud, ssa_lw_cloud, &
                &  g_lw_cloud, planck_hl, lw_emission, lw_albedo, flux)
-#ifdef _OPENACC
+#if defined(_OPENACC) || defined(OMPGPU)
           !$ACC WAIT(1)
           call flux%update_device(flux)
           !$ACC WAIT(1)
 #endif
         else if (config%i_solver_lw == ISolverMcICAACC) then
           ! Compute fluxes using the McICA ACC longwave solver
+#if defined(OMPGPU)
+          call solver_mcica_omp_lw(nlev,istartcol,iendcol, &
+               &  config, single_level, cloud, &
+               &  od_lw, ssa_lw, g_lw, od_lw_cloud, ssa_lw_cloud, &
+               &  g_lw_cloud, planck_hl, lw_emission, lw_albedo, flux)
+#else
           call solver_mcica_acc_lw(nlev,istartcol,iendcol, &
                &  config, single_level, cloud, &
                &  od_lw, ssa_lw, g_lw, od_lw_cloud, ssa_lw_cloud, &
                &  g_lw_cloud, planck_hl, lw_emission, lw_albedo, flux)
+#endif
         else if (config%i_solver_lw == ISolverSPARTACUS) then
           ! Compute fluxes using the SPARTACUS longwave solver
           call solver_spartacus_lw(nlev,istartcol,iendcol, &
@@ -516,7 +536,9 @@ subroutine radiation(ncol, nlev, istartcol, iendcol, config, &
 
         !$ACC WAIT
         if (config%i_solver_sw == ISolverMcICA) then
-#ifdef _OPENACC
+#if defined(_OPENACC) || defined(OMPGPU)
+          !$OMP TARGET UPDATE FROM(od_sw, ssa_sw, g_sw, od_sw_cloud, ssa_sw_cloud, g_sw_cloud, &
+          !$OMP& sw_albedo_direct, sw_albedo_diffuse, incoming_sw)
           !$ACC UPDATE HOST(od_sw, ssa_sw, g_sw, od_sw_cloud, ssa_sw_cloud, g_sw_cloud, sw_albedo_direct, sw_albedo_diffuse, incoming_sw)
           !$ACC WAIT(1)
           call cloud%update_host(cloud)
@@ -529,18 +551,26 @@ subroutine radiation(ncol, nlev, istartcol, iendcol, config, &
                &  od_sw, ssa_sw, g_sw, od_sw_cloud, ssa_sw_cloud, &
                &  g_sw_cloud, sw_albedo_direct, sw_albedo_diffuse, &
                &  incoming_sw, flux)
-#ifdef _OPENACC
+#if defined(_OPENACC) || defined(OMPGPU)
           !$ACC WAIT(1)
           call flux%update_device(flux)
           !$ACC WAIT(1)
 #endif
         else if (config%i_solver_sw == ISolverMcICAACC) then
           ! Compute fluxes using the McICA ACC shortwave solver
+#if defined(OMPGPU)
+          call solver_mcica_omp_sw(nlev,istartcol,iendcol, &
+               &  config, single_level, cloud, &
+               &  od_sw, ssa_sw, g_sw, od_sw_cloud, ssa_sw_cloud, &
+               &  g_sw_cloud, sw_albedo_direct, sw_albedo_diffuse, &
+               &  incoming_sw, flux)
+#else
           call solver_mcica_acc_sw(nlev,istartcol,iendcol, &
                &  config, single_level, cloud, &
                &  od_sw, ssa_sw, g_sw, od_sw_cloud, ssa_sw_cloud, &
                &  g_sw_cloud, sw_albedo_direct, sw_albedo_diffuse, &
                &  incoming_sw, flux)
+#endif
         else if (config%i_solver_sw == ISolverSPARTACUS) then
           ! Compute fluxes using the SPARTACUS shortwave solver
           call solver_spartacus_sw(nlev,istartcol,iendcol, &
@@ -581,6 +611,10 @@ subroutine radiation(ncol, nlev, istartcol, iendcol, config, &
     !$ACC WAIT
     !$ACC END DATA
 
+    !$OMP TARGET EXIT DATA MAP(DELETE:od_lw, ssa_lw, g_lw, od_lw_cloud, ssa_lw_cloud, g_lw_cloud, &
+    !$OMP                      od_sw, ssa_sw, g_sw, od_sw_cloud, ssa_sw_cloud, g_sw_cloud, &
+    !$OMP                      planck_hl, lw_emission, lw_albedo, sw_albedo_direct, &
+    !$OMP                      sw_albedo_diffuse, incoming_sw)
     if (lhook) call dr_hook('radiation_interface:radiation',1,hook_handle)
 
   end subroutine radiation
diff --git a/radiation/radiation_lw_derivatives.F90 b/radiation/radiation_lw_derivatives.F90
index 1fd123fb..12b0e5f6 100644
--- a/radiation/radiation_lw_derivatives.F90
+++ b/radiation/radiation_lw_derivatives.F90
@@ -34,6 +34,11 @@
 
 module radiation_lw_derivatives
 
+  implicit none
+
+  !$omp declare target(calc_lw_derivatives_ica_omp)
+  !$omp declare target(modify_lw_derivatives_ica_omp)
+
   public
 
 contains
@@ -184,7 +189,109 @@ subroutine modify_lw_derivatives_ica(ng, nlev, icol, transmittance, &
 
   end subroutine modify_lw_derivatives_ica
 
+  !---------------------------------------------------------------------
+  ! Calculation for the Independent Column Approximation
+  ! uses global memory to for writing out the results of the reduction
+  subroutine calc_lw_derivatives_ica_omp(ng, nlev, icol, transmittance, flux_up_surf, lw_derivatives, lw_derivatives_g)
+
+    use parkind1, only           : jprb
+    implicit none
+
+    ! Inputs
+    integer,    intent(in) :: ng   ! number of spectral intervals
+    integer,    intent(in) :: nlev ! number of levels
+    integer,    intent(in) :: icol ! Index of column for output
+    real(jprb), intent(in) :: transmittance(ng,nlev)
+    real(jprb), intent(in) :: flux_up_surf(ng) ! Upwelling surface spectral flux (W m-2)
+
+    ! Output
+    real(jprb), intent(out) :: lw_derivatives(:,:) ! dimensioned (ncol,nlev+1)
+
+    ! Rate of change of spectral flux at a given height with respect
+    ! to the surface value
+    real(jprb), intent(inout) :: lw_derivatives_g(ng)
+
+    real(jprb) :: sum_flux_up_surf, sum_lw_derivatives_g
+
+    integer    :: jlev, jg
+
+    sum_flux_up_surf = 0.0_jprb
+    do jg = 1,ng
+       sum_flux_up_surf = sum_flux_up_surf + flux_up_surf(jg)
+    end do
+    ! Initialize the derivatives at the surface
+    do jg = 1,ng
+      lw_derivatives_g(jg) = flux_up_surf(jg) / sum_flux_up_surf
+    end do
+    lw_derivatives(icol, nlev+1) = 1.0_jprb
+
+    ! Move up through the atmosphere computing the derivatives at each
+    ! half-level
+    do jlev = nlev,1,-1
+      sum_lw_derivatives_g = 0.0_jprb
+      do jg = 1,ng
+        lw_derivatives_g(jg) = lw_derivatives_g(jg) * transmittance(jg,jlev)
+        sum_lw_derivatives_g = sum_lw_derivatives_g + lw_derivatives_g(jg)
+      end do
+      lw_derivatives(icol,jlev) = sum_lw_derivatives_g
+    end do
+
+  end subroutine calc_lw_derivatives_ica_omp
+
+  !---------------------------------------------------------------------
+  ! Calculation for the Independent Column Approximation
+  ! uses global memory to for writing out the results of the reduction
+  subroutine modify_lw_derivatives_ica_omp(ng, nlev, icol, transmittance, flux_up_surf, &
+       &                                   weight, lw_derivatives, lw_derivatives_g)
+
+    use parkind1, only           : jprb
+
+    implicit none
+
+    ! Inputs
+    integer,    intent(in) :: ng   ! number of spectral intervals
+    integer,    intent(in) :: nlev ! number of levels
+    integer,    intent(in) :: icol ! Index of column for output
+    real(jprb), intent(in) :: transmittance(ng,nlev)
+    real(jprb), intent(in) :: flux_up_surf(ng) ! Upwelling surface spectral flux (W m-2)
+    real(jprb), intent(in) :: weight ! Weight new values against existing
+
+    ! Output
+    real(jprb), intent(inout) :: lw_derivatives(:,:) ! dimensioned (ncol,nlev+1)
+
+    ! Rate of change of spectral flux at a given height with respect
+    ! to the surface value
+    real(jprb), intent(inout) :: lw_derivatives_g(ng)
+
+    real(jprb) :: sum_flux_up_surf, sum_lw_derivatives_g
+
+    integer    :: jlev, jg
+
+    sum_flux_up_surf = 0.0_jprb
+    do jg = 1,ng
+      sum_flux_up_surf = sum_flux_up_surf + flux_up_surf(jg)
+    end do
+    ! Initialize the derivatives at the surface
+    do jg = 1,ng
+      lw_derivatives_g(jg) = flux_up_surf(jg) / sum_flux_up_surf
+    end do
+
+    ! This value must be 1 so no weighting applied
+    lw_derivatives(icol, nlev+1) = 1.0_jprb
+
+    ! Move up through the atmosphere computing the derivatives at each
+    ! half-level
+    do jlev = nlev,1,-1
+      sum_lw_derivatives_g = 0.0_jprb
+      do jg = 1,ng
+        lw_derivatives_g(jg) = lw_derivatives_g(jg) * transmittance(jg,jlev)
+        sum_lw_derivatives_g = sum_lw_derivatives_g + lw_derivatives_g(jg)
+      end do
+      lw_derivatives(icol,jlev) = (1.0_jprb - weight) * lw_derivatives(icol,jlev) &
+           &                    + weight * sum_lw_derivatives_g
+    end do
 
+  end subroutine modify_lw_derivatives_ica_omp
 
   !---------------------------------------------------------------------
   ! Calculation for solvers involving multiple regions and matrices
diff --git a/radiation/radiation_mcica_omp_lw.F90 b/radiation/radiation_mcica_omp_lw.F90
new file mode 100644
index 00000000..215f45e5
--- /dev/null
+++ b/radiation/radiation_mcica_omp_lw.F90
@@ -0,0 +1,545 @@
+! radiation_mcica_omp_lw.F90 - Monte-Carlo Independent Column Approximation longtwave solver
+!
+! (C) Copyright 2015- ECMWF.
+!
+! This software is licensed under the terms of the Apache Licence Version 2.0
+! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
+!
+! In applying this licence, ECMWF does not waive the privileges and immunities
+! granted to it by virtue of its status as an intergovernmental organisation
+! nor does it submit to any jurisdiction.
+!
+! Author:  Robin Hogan
+! Email:   r.j.hogan@ecmwf.int
+!
+! Modifications
+!   2017-04-11  R. Hogan  Receive emission/albedo rather than planck/emissivity
+!   2017-04-22  R. Hogan  Store surface fluxes at all g-points
+!   2017-07-12  R. Hogan  Call fast adding method if only clouds scatter
+!   2017-10-23  R. Hogan  Renamed single-character variables
+
+#include "ecrad_config.h"
+
+module radiation_mcica_omp_lw
+
+  use radiation_io, only : nulout
+  public
+
+contains
+
+  !---------------------------------------------------------------------
+  ! Longwave Monte Carlo Independent Column Approximation
+  ! (McICA). This implementation performs a clear-sky and a cloudy-sky
+  ! calculation, and then weights the two to get the all-sky fluxes
+  ! according to the total cloud cover. This method reduces noise for
+  ! low cloud cover situations, and exploits the clear-sky
+  ! calculations that are usually performed for diagnostic purposes
+  ! simultaneously. The cloud generator has been carefully written
+  ! such that the stochastic cloud field satisfies the prescribed
+  ! overlap parameter accounting for this weighting.
+  subroutine solver_mcica_omp_lw(nlev,istartcol,iendcol, &
+       &  config, single_level, cloud, &
+       &  od, ssa, g, od_cloud, ssa_cloud, g_cloud, planck_hl, &
+       &  emission, albedo, &
+       &  flux)
+
+    use parkind1, only           : jprb
+    use yomhook,  only           : lhook, dr_hook, jphook
+
+    use radiation_io,   only           : nulerr, radiation_abort
+    use radiation_config, only         : config_type
+    use radiation_single_level, only   : single_level_type
+    use radiation_cloud, only          : cloud_type
+    use radiation_flux, only           : flux_type
+    use radiation_two_stream, only     : calc_ref_trans_lw_single_level_omp, &
+         &                               calc_no_scattering_transmittance_lw_omp, &
+         &                               calc_no_scattering_transmittance_lw_single_cell_omp
+    use radiation_adding_ica_lw, only  : fast_adding_ica_lw_omp, calc_fluxes_no_scattering_lw_omp
+    
+    use radiation_lw_derivatives, only : calc_lw_derivatives_ica_omp, modify_lw_derivatives_ica_omp
+    use radiation_cloud_generator_acc, only: cloud_generator_omp
+    use radiation_cloud_cover, only    : beta2alpha, MaxCloudFrac
+
+#ifdef HAVE_ROCTX
+    use roctx_profiling, only: roctxstartrange, roctxendrange
+    use iso_c_binding, only: c_null_char
+#endif
+
+    implicit none
+
+    ! Inputs
+    integer, intent(in) :: nlev               ! number of model levels
+    integer, intent(in) :: istartcol, iendcol ! range of columns to process
+    type(config_type),        intent(in) :: config
+    type(single_level_type),  intent(in) :: single_level
+    type(cloud_type),         intent(in) :: cloud
+
+    ! Gas and aerosol optical depth, single-scattering albedo and
+    ! asymmetry factor at each longwave g-point
+    real(jprb), intent(in), dimension(config%n_g_lw, nlev, istartcol:iendcol) :: &
+         &  od
+    real(jprb), intent(in), dimension(config%n_g_lw_if_scattering, nlev, istartcol:iendcol) :: &
+         &  ssa, g
+
+    ! Cloud and precipitation optical depth, single-scattering albedo and
+    ! asymmetry factor in each longwave band
+    real(jprb), intent(in), dimension(config%n_bands_lw,nlev,istartcol:iendcol)   :: &
+         &  od_cloud
+    real(jprb), intent(in), dimension(config%n_bands_lw_if_scattering, &
+         &  nlev,istartcol:iendcol) :: ssa_cloud, g_cloud
+
+    ! Planck function at each half-level and the surface
+    real(jprb), intent(in), dimension(config%n_g_lw,nlev+1,istartcol:iendcol) :: &
+         &  planck_hl
+
+    ! Emission (Planck*emissivity) and albedo (1-emissivity) at the
+    ! surface at each longwave g-point
+    real(jprb), intent(in), dimension(config%n_g_lw, istartcol:iendcol) :: emission, albedo
+
+    ! Output
+    type(flux_type), intent(inout):: flux
+
+    !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+    ! Local variables : Mapped into Global Memory
+    !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+    
+    ! Fluxes per g point
+    real(jprb), dimension(config%n_g_lw, nlev+1, istartcol:iendcol) :: flux_up, flux_dn
+    real(jprb), dimension(config%n_g_lw, nlev+1, istartcol:iendcol) :: flux_up_clear, flux_dn_clear
+
+    ! Identify clear-sky layers
+    logical :: is_clear_sky_layer(nlev, istartcol:iendcol)
+
+    ! workaround that allows inling of cloud generator
+    real(jprb), dimension(config%pdf_sampler%ncdf, config%pdf_sampler%nfsd)  :: sample_val
+
+    ! temporary arrays to increase performance
+    real(jprb), dimension(nlev, istartcol:iendcol) :: frac, frac_std
+    real(jprb), dimension(nlev-1, istartcol:iendcol) :: overlap_param
+
+    ! temporary arrays
+    real(jprb), dimension(nlev, istartcol:iendcol) :: cum_cloud_cover
+    real(jprb), dimension(nlev-1, istartcol:iendcol) :: pair_cloud_cover
+
+    ! Cumulative product needed in computation of total_cloud_cover
+    real(jprb) :: cum_product(istartcol:iendcol)
+
+    ! First and last cloudy layers
+    integer :: ibegin(istartcol:iendcol), iend(istartcol:iendcol)
+
+    !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+    ! Local variables : Was stack and is now in Global Memory
+    !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+
+    ! Diffuse reflectance and transmittance for each layer in clear
+    ! and all skies
+    real(jprb), dimension(config%n_g_lw, nlev, istartcol:iendcol) :: trans_clear, reflectance, transmittance
+
+    ! Emission by a layer into the upwelling or downwelling diffuse
+    ! streams, in clear and all skies
+    real(jprb), dimension(config%n_g_lw, nlev, istartcol:iendcol) :: source_up, source_dn
+
+    ! Optical depth scaling from the cloud generator, zero indicating
+    ! clear skies
+    real(jprb), dimension(config%n_g_lw,nlev, istartcol:iendcol) :: od_scaling
+    
+        ! Temporary working array
+    real(jprb), dimension(config%n_g_lw,nlev+1, istartcol:iendcol) :: tmp_work_source
+    real(jprb), dimension(config%n_g_lw, istartcol:iendcol) :: tmp_derivatives
+
+    !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+    ! Local variables : Stack
+    !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+    
+    ! Modified optical depth after McICA scaling to represent cloud
+    ! inhomogeneity
+    real(jprb) :: od_cloud_new
+    
+    ! Combined gas+aerosol+cloud optical depth, single scattering
+    ! albedo and asymmetry factor
+    real(jprb) :: od_total, ssa_total, g_total
+
+    ! Combined scattering optical depth
+    real(jprb) :: scat_od, scat_od_total(config%n_g_lw)
+
+    ! Total cloud cover output from the cloud generator
+    real(jprb) :: total_cloud_cover
+
+    ! "Alpha" overlap parameter
+    real(jprb) :: overlap_alpha
+
+    ! Temporary storage for more efficient summation
+    real(jprb) :: sum_up, sum_dn, sum_up_clr, sum_dn_clr
+
+    ! Index of the highest cloudy layer
+    integer :: i_cloud_top
+
+    ! Number of g points
+    integer :: ng
+
+    ! Loop indices for level, column and g point
+    integer :: jlev, jcol, jg
+
+    real(jphook) :: hook_handle
+    !real(jprb)  totalMem
+    integer :: file_idx, fidx1, fidx2, fidx3
+
+#ifdef HAVE_ROCTX
+    call roctxStartRange("radiation::mcica_omp_lw"//c_null_char)
+#endif
+
+    if (lhook) call dr_hook('radiation_mcica_omp_lw:solver_mcica_omp_lw',0,hook_handle)
+
+    if (.not. config%do_clear) then
+      write(nulerr,'(a)') '*** Error: longwave McICA OMP requires clear-sky calculation to be performed'
+      call radiation_abort()
+    end if
+
+    ng = config%n_g_lw
+
+    !totalMem = 6*ng * nlev
+    !totalMem = totalMem+5*(ng)*(nlev+1) !flux_up,dn,up_clear,dn_clear,source
+    !totalmem = totalMem*(iendcol-istartcol)*SIZEOF((real(jprb)))/1.e9
+    !write(nulout,'(a,a,i0,a,g0.5)') __FILE__, " : LINE = ", __LINE__, " total_memory=",totalMem
+
+    !$OMP TARGET ENTER DATA MAP(ALLOC: flux_up, flux_dn, flux_up_clear, flux_dn_clear, &
+    !$OMP             is_clear_sky_layer, &
+    !$OMP             sample_val, frac, frac_std, overlap_param, cum_cloud_cover, &
+    !$OMP             pair_cloud_cover, cum_product, ibegin, iend)
+#if defined(__amdflang__)
+    !$OMP TARGET DATA MAP(PRESENT, ALLOC: config, single_level, cloud, od, ssa, g, od_cloud, ssa_cloud, &
+    !$OMP             g_cloud, planck_hl, emission, albedo, flux)
+#endif
+
+    !
+    ! These allocations need to moved into a TEAMS PRIVATE clause, I think.
+    !
+
+    !$OMP TARGET ENTER DATA MAP(ALLOC: trans_clear, od_scaling, &
+    !$OMP   reflectance, transmittance, source_up, source_dn, tmp_work_source, &
+    !$OMP   tmp_derivatives)
+
+    !$OMP TARGET TEAMS DISTRIBUTE PARALLEL DO COLLAPSE(2)
+    do jlev = 1,config%pdf_sampler%nfsd
+      do jcol = 1,config%pdf_sampler%ncdf
+        sample_val(jcol,jlev) = config%pdf_sampler%val(jcol,jlev)
+      end do
+    end do
+    !$OMP END TARGET TEAMS DISTRIBUTE PARALLEL DO
+
+    !$OMP TARGET TEAMS DISTRIBUTE PARALLEL DO COLLAPSE(2)
+    do jcol = istartcol,iendcol
+      do jlev = 1, nlev
+        frac(jlev, jcol) = cloud%fraction(jcol,jlev)
+        frac_std(jlev, jcol) = cloud%fractional_std(jcol,jlev)
+        is_clear_sky_layer(jlev,jcol) = .true.
+      end do
+    end do
+    !$OMP END TARGET TEAMS DISTRIBUTE PARALLEL DO
+
+    !$OMP TARGET TEAMS DISTRIBUTE PARALLEL DO COLLAPSE(2)
+    do jcol = istartcol,iendcol
+      do jlev = 1, nlev-1
+        overlap_param(jlev, jcol) = cloud%overlap_param(jcol,jlev)
+      end do
+    end do
+    !$OMP END TARGET TEAMS DISTRIBUTE PARALLEL DO
+
+    !$OMP TARGET TEAMS DISTRIBUTE PARALLEL DO COLLAPSE(2) PRIVATE(overlap_alpha)
+    do jcol = istartcol,iendcol
+      !---------------------------------------------------------------------
+      ! manual inline from cum_cloud_cover_exp_ran >>>>>>>>>>>>>>>>>>>>>>>>
+      ! Loop to compute total cloud cover and the cumulative cloud cover
+      ! down to the base of each layer
+      do jlev = 1,nlev-1
+        ! Convert to "alpha" overlap parameter if necessary
+        if (config%use_beta_overlap) then
+          overlap_alpha = beta2alpha(overlap_param(jlev,jcol), &
+                &                     frac(jlev,jcol), frac(jlev+1,jcol))
+        else
+          overlap_alpha = overlap_param(jlev,jcol)
+        end if
+
+        ! Compute the combined cloud cover of layers jlev and jlev+1
+        pair_cloud_cover(jlev, jcol) = overlap_alpha*max(frac(jlev,jcol),frac(jlev+1,jcol)) &
+              &  + (1.0_jprb - overlap_alpha) &
+              &  * (frac(jlev,jcol)+frac(jlev+1,jcol)-frac(jlev,jcol)*frac(jlev+1,jcol))
+      end do
+    end do
+    !$OMP END TARGET TEAMS DISTRIBUTE PARALLEL DO
+
+    !$OMP TARGET TEAMS DISTRIBUTE PARALLEL DO
+    do jcol = istartcol,iendcol
+      cum_cloud_cover(1, jcol) = frac(1,jcol)
+      cum_product(jcol) = 1.0_jprb - frac(1,jcol)
+      do jlev = 1,nlev-1
+        if (frac(jlev,jcol) >= MaxCloudFrac) then
+          ! Cloud cover has reached one
+          cum_product(jcol) = 0.0_jprb
+        else
+          cum_product(jcol) = cum_product(jcol) * (1.0_jprb - pair_cloud_cover(jlev, jcol)) &
+                &  / (1.0_jprb - frac(jlev,jcol))
+        end if
+        cum_cloud_cover(jlev+1, jcol) = 1.0_jprb - cum_product(jcol)
+      end do
+      flux%cloud_cover_lw(jcol) = cum_cloud_cover(nlev,jcol);
+      if (flux%cloud_cover_lw(jcol) < config%cloud_fraction_threshold) then
+        ! Treat column as clear sky: calling function therefore will not
+        ! use od_scaling so we don't need to calculate it
+        flux%cloud_cover_lw(jcol) = 0.0_jprb
+      end if
+    end do
+    !$OMP END TARGET TEAMS DISTRIBUTE PARALLEL DO
+
+    !$OMP TARGET TEAMS DISTRIBUTE PARALLEL DO
+    do jcol = istartcol,iendcol
+      if (flux%cloud_cover_lw(jcol) >= config%cloud_fraction_threshold) then
+        ! Cloud is present: need to calculate od_scaling
+        ! Find range of cloudy layers
+        ibegin(jcol) = nlev
+        do jlev = 1, nlev
+          if( frac(jlev,jcol) > 0.0_jprb ) then
+            ibegin(jcol) = min(jlev, ibegin(jcol))
+          end if
+        end do
+
+        iend(jcol) = ibegin(jcol)
+        do jlev = ibegin(jcol)+1,nlev
+          if (frac(jlev,jcol) > 0.0_jprb) then
+            iend(jcol) = max(jlev, iend(jcol))
+          end if
+        end do
+      end if
+    end do
+    !$OMP END TARGET TEAMS DISTRIBUTE PARALLEL DO
+
+    !$OMP TARGET TEAMS DISTRIBUTE PARALLEL DO COLLAPSE(3)
+    do jcol = istartcol,iendcol
+      do jlev = 1, nlev+1
+         do jg = 1, ng
+            flux_up_clear(jg,jlev,jcol) = 0.0_jprb
+            flux_dn_clear(jg,jlev,jcol) = 0.0_jprb
+            flux_up(jg,jlev,jcol) = 0.0_jprb
+            flux_dn(jg,jlev,jcol) = 0.0_jprb
+         end do
+      end do
+    end do
+    !$OMP END TARGET TEAMS DISTRIBUTE PARALLEL DO
+
+    !
+    ! This kernel does band independent computations. Some computation is done across veritical levels
+    !
+    !$OMP TARGET TEAMS DISTRIBUTE PARALLEL DO COLLAPSE(2) PRIVATE(jcol, jg) FIRSTPRIVATE(istartcol, iendcol, ng, nlev) &
+    !$OMP& THREAD_LIMIT(256)
+    do jcol = istartcol,iendcol
+       do jg = 1, ng
+          call cloud_generator_omp(jg, ng, nlev, &
+               &  single_level%iseed(jcol) + 997, &
+               &  config%cloud_fraction_threshold, &
+               &  frac(:,jcol), overlap_param(:,jcol), &
+               &  config%cloud_inhom_decorr_scaling, frac_std(:,jcol), &
+               &  config%pdf_sampler%ncdf, config%pdf_sampler%nfsd, &
+               &  config%pdf_sampler%fsd1, config%pdf_sampler%inv_fsd_interval, &
+               &  sample_val, &
+               &  od_scaling(:,:,jcol), flux%cloud_cover_lw(jcol)+0.0_jprb, & ! Workaround for nvhpc-24.1
+               &  ibegin(jcol), iend(jcol), &
+               &  cum_cloud_cover=cum_cloud_cover(:,jcol), &
+               &  pair_cloud_cover=pair_cloud_cover(:,jcol))
+       enddo
+    enddo
+
+    !$OMP TARGET TEAMS DISTRIBUTE PARALLEL DO COLLAPSE(2) PRIVATE(od_cloud_new, od_total, ssa_total, g_total, scat_od, &
+    !$OMP& jcol, jg, jlev, i_cloud_top) FIRSTPRIVATE(istartcol, iendcol, ng, nlev) THREAD_LIMIT(1024)
+    do jcol = istartcol,iendcol
+       do jg = 1, ng
+          ! Clear-sky calculation
+          ! Non-scattering case: use simpler functions for
+          ! transmission and emission
+          call calc_no_scattering_transmittance_lw_omp(jg, ng, nlev, od(:,:,jcol), &
+               &  planck_hl(:,:,jcol), trans_clear(:,:,jcol), source_up(:,:,jcol), &
+               &  source_dn(:,:,jcol))
+
+          ! Simpler down-then-up method to compute fluxes
+          call calc_fluxes_no_scattering_lw_omp(jg, ng, nlev, &
+               &  trans_clear(:,:,jcol), source_up(:,:,jcol), source_dn(:,:,jcol), &
+               &  emission(:,jcol), albedo(:,jcol), &
+               &  flux_up_clear(:,:,jcol), flux_dn_clear(:,:,jcol))
+
+          ! Store surface spectral downwelling fluxes
+          flux%lw_dn_surf_clear_g(jg,jcol) = flux_dn_clear(jg,nlev+1,jcol)
+          ! Do cloudy-sky calculation; add a prime number to the seed in
+          ! the longwave
+
+          if (flux%cloud_cover_lw(jcol) >= config%cloud_fraction_threshold) then
+             ! Total-sky calculation
+             i_cloud_top = nlev+1
+
+             do jlev = 1,nlev
+                ! Compute combined gas+aerosol+cloud optical properties
+                if (frac(jlev,jcol) >= config%cloud_fraction_threshold) then
+                   is_clear_sky_layer(jlev,jcol) = .false.
+                   ! Get index to the first cloudy layer from the top
+                   if (i_cloud_top > jlev) then
+                      i_cloud_top = jlev
+                   end if
+                   
+                   od_cloud_new = od_scaling(jg,jlev,jcol) &
+                        &  * od_cloud(config%i_band_from_reordered_g_lw(jg),jlev,jcol)
+                   od_total  = od(jg,jlev,jcol) + od_cloud_new
+                   ssa_total = 0.0_jprb
+                   g_total   = 0.0_jprb
+
+                   if (config%do_lw_cloud_scattering) then
+                      ! Scattering case: calculate reflectance and
+                      ! transmittance at each model level
+                      if (od_total > 0.0_jprb) then
+                         scat_od = ssa_cloud(config%i_band_from_reordered_g_lw(jg),jlev,jcol) &
+                              &     * od_cloud_new
+                         ssa_total = scat_od / od_total
+                         if (scat_od > 0.0_jprb) then
+                            g_total = g_cloud(config%i_band_from_reordered_g_lw(jg),jlev,jcol) &
+                                 &     * ssa_cloud(config%i_band_from_reordered_g_lw(jg),jlev,jcol) &
+                                 &     *  od_cloud_new / scat_od
+                         end if
+                      end if
+
+                      ! Compute cloudy-sky reflectance, transmittance etc at
+                      ! each model level
+                      call calc_ref_trans_lw_single_level_omp(jg, ng, &
+                           &  od_total, ssa_total, g_total, &
+                           &  planck_hl(:,jlev:jlev+1,jcol), &
+                           &  reflectance(:,jlev,jcol), transmittance(:,jlev,jcol), &
+                           &  source_up(:,jlev,jcol), source_dn(:,jlev,jcol))
+                   else
+                      ! No-scattering case: use simpler functions for
+                      ! transmission and emission
+                      call calc_no_scattering_transmittance_lw_single_cell_omp(od_total, &
+                           &  planck_hl(jg,jlev,jcol), planck_hl(jg,jlev+1,jcol), transmittance(jg,jlev,jcol), &
+                           &  source_up(jg,jlev,jcol), source_dn(jg,jlev,jcol))
+                   end if
+
+                else
+                   ! Clear-sky layer: copy over clear-sky values
+                   reflectance(jg,jlev,jcol) = 0.0
+                   transmittance(jg,jlev,jcol) = trans_clear(jg,jlev,jcol)
+                end if
+             end do
+
+             if(config%do_lw_cloud_scattering) then
+                ! Use adding method to compute fluxes but optimize for the
+                ! presence of clear-sky layers
+                call fast_adding_ica_lw_omp(jg, ng, nlev, reflectance(:,:,jcol), transmittance(:,:,jcol), &
+                     &  source_up(:,:,jcol), source_dn(:,:,jcol), &
+                     &  emission(:,jcol), albedo(:,jcol), &
+                     &  is_clear_sky_layer(:,jcol), i_cloud_top, flux_dn_clear(:,:,jcol), &
+                     &  flux_up(:,:,jcol), flux_dn(:,:,jcol), &
+                     &  flux_up(:,:,jcol), flux_dn(:,:,jcol), &
+                     &  source=tmp_work_source(:,:,jcol))
+             else
+                ! Simpler down-then-up method to compute fluxes
+                call calc_fluxes_no_scattering_lw_omp(jg, ng, nlev, &
+                     &  transmittance(:,:,jcol), source_up(:,:,jcol), source_dn(:,:,jcol), &
+                     &  emission(:,jcol), albedo(:,jcol), &
+                     &  flux_up(:,:,jcol), flux_dn(:,:,jcol))
+             end if
+
+             ! Cloudy flux profiles currently assume completely overcast
+             ! skies; perform weighted average with clear-sky profile
+             ! Store surface spectral downwelling fluxes
+             flux%lw_dn_surf_g(jg,jcol) = flux%cloud_cover_lw(jcol)*flux_dn(jg,nlev+1,jcol) &
+                  &  + (1.0_jprb - flux%cloud_cover_lw(jcol))*flux%lw_dn_surf_clear_g(jg,jcol)
+          else
+             ! No cloud in profile and clear-sky fluxes already
+             ! calculated: copy them over
+             flux%lw_dn_surf_g(jg,jcol) = flux%lw_dn_surf_clear_g(jg,jcol)
+          end if
+       end do
+    end do
+    !$OMP END TARGET TEAMS DISTRIBUTE PARALLEL DO
+
+    !
+    ! Separate out the derivative computation, which has reductions across spectral bands
+    !
+    if (config%do_lw_derivatives) then
+      !$OMP TARGET TEAMS DISTRIBUTE PARALLEL DO THREAD_LIMIT(64)
+      do jcol = istartcol,iendcol
+        if (flux%cloud_cover_lw(jcol) >= config%cloud_fraction_threshold) then
+          call calc_lw_derivatives_ica_omp(ng, nlev, jcol, transmittance(:,:,jcol), flux_up(:,nlev+1,jcol), &
+               &                       flux%lw_derivatives, tmp_derivatives(:,jcol))
+          if (flux%cloud_cover_lw(jcol) < 1.0_jprb - config%cloud_fraction_threshold) then
+            ! Modify the existing derivative with the contribution from the clear sky
+            call modify_lw_derivatives_ica_omp(ng, nlev, jcol, trans_clear(:,:,jcol), flux_up_clear(:,nlev+1,jcol), &
+                 &                             1.0_jprb-flux%cloud_cover_lw(jcol), flux%lw_derivatives, tmp_derivatives(:,jcol))
+          end if
+        else
+          call calc_lw_derivatives_ica_omp(ng, nlev, jcol, trans_clear(:,:,jcol), flux_up_clear(:,nlev+1,jcol), &
+               &                       flux%lw_derivatives, tmp_derivatives(:,jcol))
+        endif
+      enddo
+      !$OMP END TARGET TEAMS DISTRIBUTE PARALLEL DO
+    endif
+
+    ! Loop through columns
+    !$OMP TARGET TEAMS DISTRIBUTE PARALLEL DO COLLAPSE(2) PRIVATE(total_cloud_cover, sum_up, sum_dn, sum_up_clr, sum_dn_clr)
+#if defined(__amdflang__)
+    !$OMP TILE SIZES(64,1)
+#endif
+    do jcol = istartcol,iendcol
+       do jlev = 1,nlev+1
+
+        sum_up_clr = 0._jprb
+        sum_dn_clr = 0._jprb
+        do jg = 1,ng
+          sum_up_clr = sum_up_clr + flux_up_clear(jg,jlev,jcol)
+          sum_dn_clr = sum_dn_clr + flux_dn_clear(jg,jlev,jcol)
+        end do
+        flux%lw_up_clear(jcol,jlev) = sum_up_clr
+        flux%lw_dn_clear(jcol,jlev) = sum_dn_clr
+
+        total_cloud_cover = flux%cloud_cover_lw(jcol)
+        if (total_cloud_cover >= config%cloud_fraction_threshold) then
+
+          ! Store overcast broadband fluxes
+          sum_up = 0._jprb
+          sum_dn = 0._jprb
+          do jg = 1,ng
+            sum_up = sum_up + flux_up(jg,jlev,jcol)
+            sum_dn = sum_dn + flux_dn(jg,jlev,jcol)
+          end do
+          flux%lw_up(jcol,jlev) = total_cloud_cover*sum_up + (1.0_jprb - total_cloud_cover)*sum_up_clr
+          flux%lw_dn(jcol,jlev) = total_cloud_cover*sum_dn + (1.0_jprb - total_cloud_cover)*sum_dn_clr
+
+        else
+
+          flux%lw_up(jcol,jlev) = sum_up_clr
+          flux%lw_dn(jcol,jlev) = sum_dn_clr
+
+        end if
+      end do
+    end do
+#if defined(__amdflang__)
+    !$OMP END TILE
+#endif
+    !$OMP END TARGET TEAMS DISTRIBUTE PARALLEL DO
+
+    !$OMP TARGET EXIT DATA MAP(DELETE: trans_clear, od_scaling, &
+    !$OMP   reflectance, transmittance, source_up, source_dn, tmp_work_source, &
+    !$OMP   tmp_derivatives)
+
+    !$OMP TARGET EXIT DATA MAP(DELETE: flux_up, flux_dn, flux_up_clear, flux_dn_clear, &
+    !$OMP             is_clear_sky_layer, &
+    !$OMP             sample_val, frac, frac_std, overlap_param, cum_cloud_cover, &
+    !$OMP             pair_cloud_cover, cum_product, ibegin, iend)
+
+#if defined(__amdflang__)
+    !$OMP END TARGET DATA
+#endif
+
+#ifdef HAVE_ROCTX
+    call roctxEndRange
+#endif
+    if (lhook) call dr_hook('radiation_mcica_omp_lw:solver_mcica_omp_lw',1,hook_handle)
+
+  end subroutine solver_mcica_omp_lw
+
+end module radiation_mcica_omp_lw
diff --git a/radiation/radiation_mcica_omp_sw.F90 b/radiation/radiation_mcica_omp_sw.F90
new file mode 100644
index 00000000..c26291a9
--- /dev/null
+++ b/radiation/radiation_mcica_omp_sw.F90
@@ -0,0 +1,592 @@
+! radiation_mcica_omp_sw.F90 - Monte-Carlo Independent Column Approximation shortwave solver
+!
+! (C) Copyright 2015- ECMWF.
+!
+! This software is licensed under the terms of the Apache Licence Version 2.0
+! which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
+!
+! In applying this licence, ECMWF does not waive the privileges and immunities
+! granted to it by virtue of its status as an intergovernmental organisation
+! nor does it submit to any jurisdiction.
+!
+! Author:  Robin Hogan
+! Email:   r.j.hogan@ecmwf.int
+!
+! Modifications
+!   2017-04-11  R. Hogan  Receive albedos at g-points
+!   2017-04-22  R. Hogan  Store surface fluxes at all g-points
+!   2017-10-23  R. Hogan  Renamed single-character variables
+
+#include "ecrad_config.h"
+
+module radiation_mcica_omp_sw
+
+  use radiation_io, only : nulout
+  public
+
+contains
+
+  ! Provides elemental function "delta_eddington"
+#include "radiation_delta_eddington.h"
+
+  !---------------------------------------------------------------------
+  ! Shortwave Monte Carlo Independent Column Approximation
+  ! (McICA). This implementation performs a clear-sky and a cloudy-sky
+  ! calculation, and then weights the two to get the all-sky fluxes
+  ! according to the total cloud cover. This method reduces noise for
+  ! low cloud cover situations, and exploits the clear-sky
+  ! calculations that are usually performed for diagnostic purposes
+  ! simultaneously. The cloud generator has been carefully written
+  ! such that the stochastic cloud field satisfies the prescribed
+  ! overlap parameter accounting for this weighting.
+  subroutine solver_mcica_omp_sw(nlev,istartcol,iendcol, &
+       &  config, single_level, cloud, &
+       &  od, ssa, g, od_cloud, ssa_cloud, g_cloud, &
+       &  albedo_direct, albedo_diffuse, incoming_sw, &
+       &  flux)
+
+    use parkind1, only           : jprb
+    use yomhook,  only           : lhook, dr_hook, jphook
+
+    use radiation_io,   only           : nulerr, radiation_abort
+    use radiation_config, only         : config_type
+    use radiation_single_level, only   : single_level_type
+    use radiation_cloud, only          : cloud_type
+    use radiation_flux, only           : flux_type
+    use radiation_two_stream, only     : calc_two_stream_gammas_sw_single_band_omp, &
+         &                               calc_reflectance_transmittance_sw_single_band_omp, &
+         &                               calc_ref_trans_sw_omp, calc_ref_trans_sw_single_level_omp
+    
+    use radiation_adding_ica_sw, only  : adding_ica_sw_omp
+    use radiation_cloud_generator_acc, only: cloud_generator_omp
+    use radiation_cloud_cover, only    : beta2alpha, MaxCloudFrac
+
+#ifdef HAVE_ROCTX
+    use roctx_profiling, only: roctxstartrange, roctxendrange
+    use iso_c_binding, only: c_null_char
+#endif
+
+    implicit none
+
+    ! Inputs
+    integer, intent(in) :: nlev               ! number of model levels
+    integer, intent(in) :: istartcol, iendcol ! range of columns to process
+    type(config_type),        intent(in) :: config
+    type(single_level_type),  intent(in) :: single_level
+    type(cloud_type),         intent(in) :: cloud
+
+    ! Gas and aerosol optical depth, single-scattering albedo and
+    ! asymmetry factor at each shortwave g-point
+    real(jprb), intent(in), dimension(config%n_g_sw, nlev, istartcol:iendcol) :: &
+         &  od, ssa, g
+
+    ! Cloud and precipitation optical depth, single-scattering albedo and
+    ! asymmetry factor in each shortwave band
+    real(jprb), intent(in), dimension(config%n_bands_sw,nlev,istartcol:iendcol)   :: &
+         &  od_cloud, ssa_cloud, g_cloud
+
+    ! Direct and diffuse surface albedos, and the incoming shortwave
+    ! flux into a plane perpendicular to the incoming radiation at
+    ! top-of-atmosphere in each of the shortwave g points
+    real(jprb), intent(in), dimension(config%n_g_sw,istartcol:iendcol) :: &
+         &  albedo_direct, albedo_diffuse, incoming_sw
+
+    ! Output
+    type(flux_type), intent(inout):: flux
+
+    !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+    ! Local variables : Mapped into Global Memory
+    !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+
+    ! Fluxes per g point
+    real(jprb), dimension(config%n_g_sw, nlev+1, istartcol:iendcol) :: flux_up, flux_dn_diffuse, flux_dn_direct
+    real(jprb), dimension(config%n_g_sw, nlev+1, istartcol:iendcol) :: flux_up_clear, flux_dn_diffuse_clear, flux_dn_direct_clear
+
+    ! workaround that allows inling of cloud generator
+    real(jprb), dimension(config%pdf_sampler%ncdf, config%pdf_sampler%nfsd) :: sample_val
+
+    ! copies to increase performance
+    real(jprb), dimension(nlev, istartcol:iendcol) :: frac, frac_std
+    real(jprb), dimension(nlev-1, istartcol:iendcol) :: overlap_param
+
+    real(jprb), dimension(nlev, istartcol:iendcol) :: cum_cloud_cover
+    real(jprb), dimension(nlev-1, istartcol:iendcol) :: pair_cloud_cover
+
+    ! Cumulative product needed in computation of total_cloud_cover
+    real(jprb) :: cum_product(istartcol:iendcol)
+
+    ! First and last cloudy layers
+    integer :: ibegin(istartcol:iendcol), iend(istartcol:iendcol)
+ 
+    !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+    ! Local variables : Was stack and is now in Global Memory
+    !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+
+    ! Transmittance for the direct beam in clear and all skies
+    real(jprb), dimension(config%n_g_sw, nlev, istartcol:iendcol) :: ref_clear,  trans_clear, ref_dir_clear
+    !real(jprb), dimension(config%n_g_sw, nlev, istartcol:iendcol) :: trans_dir_diff_clear, trans_dir_dir_clear
+
+    ! Fraction of direct beam scattered by a layer into the upwelling
+    ! or downwelling diffuse streams, in clear and all skies
+    real(jprb), dimension(config%n_g_sw, nlev, istartcol:iendcol) :: ref_dir, trans_dir_diff
+
+    ! Transmittance for the direct beam in clear and all skies
+    real(jprb), dimension(config%n_g_sw, nlev, istartcol:iendcol) :: trans_dir_dir
+
+    ! Diffuse reflectance and transmittance for each layer in clear
+    ! and all skies
+    real(jprb), dimension(config%n_g_sw, nlev, istartcol:iendcol) :: reflectance, transmittance
+
+    ! Optical depth scaling from the cloud generator, zero indicating
+    ! clear skies
+    real(jprb), dimension(config%n_g_sw,nlev,istartcol:iendcol) :: od_scaling
+
+    ! Temporary working array
+    real(jprb), dimension(config%n_g_sw,nlev+1,istartcol:iendcol) :: tmp_work_source
+
+    !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+    ! Local variables : Stack
+    !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+    
+    ! Cosine of solar zenith angle
+    real(jprb)                                 :: cos_sza
+
+    ! Combined gas+aerosol+cloud optical depth, single scattering
+    ! albedo and asymmetry factor
+    real(jprb) :: od_total, ssa_total, g_total
+
+    ! Combined scattering optical depth
+    real(jprb) :: scat_od
+
+    ! Two-stream coefficients
+    real(jprb) :: gamma1, gamma2, gamma3
+
+    ! Modified optical depth after McICA scaling to represent cloud
+    ! inhomogeneity
+    real(jprb) :: od_cloud_new
+
+    ! "Alpha" overlap parameter
+    real(jprb) :: overlap_alpha
+
+    ! Auxiliary for more efficient summation
+    real(jprb) :: sum_up, sum_dn_direct, sum_dn_diffuse
+
+    ! Number of g points
+    integer :: ng
+
+    ! Loop indices for level, column and g point
+    integer :: jlev, jcol, jg
+    !real(jprb)  totalMem
+    real(jphook) :: hook_handle
+
+#ifdef HAVE_ROCTX
+    call roctxStartRange("radiation::mcica_omp_sw"//c_null_char)
+#endif
+
+    if (lhook) call dr_hook('radiation_mcica_omp_sw:solver_mcica_omp_sw',0,hook_handle)
+
+    if (.not. config%do_clear) then
+      write(nulerr,'(a)') '*** Error: shortwave McICA OMP requires clear-sky calculation to be performed'
+      call radiation_abort()
+    end if
+
+    !$OMP TARGET ENTER DATA MAP(ALLOC: ref_clear,  trans_clear, ref_dir_clear, &
+    !$OMP&   od_scaling, tmp_work_source,&
+    !$OMP&   ref_dir, trans_dir_diff, trans_dir_dir, reflectance, transmittance)
+    !$OMP TARGET ENTER DATA MAP(ALLOC: flux_up, flux_dn_diffuse, flux_dn_direct, &
+    !$OMP             flux_up_clear, flux_dn_diffuse_clear, flux_dn_direct_clear, &
+    !$OMP             sample_val, frac, frac_std, overlap_param, &
+    !$OMP             cum_cloud_cover, pair_cloud_cover, cum_product, ibegin, iend)
+#if defined(__amdflang__)
+    !$OMP TARGET DATA MAP(PRESENT, ALLOC: config, single_level, cloud, od, ssa, g, od_cloud, &
+    !$OMP             ssa_cloud, g_cloud, albedo_direct, albedo_diffuse, &
+    !$OMP             incoming_sw, flux)
+#endif
+
+    ng = config%n_g_sw
+
+    !totalMem = 9*ng * nlev
+    !totalMem = totalMem+7*(ng)*(nlev+1) !flux_up,dn,up_clear,dn_clear,source
+    !totalmem = totalMem*(iendcol-istartcol)*SIZEOF((real(jprb)))/1.e9
+    !write(nulout,'(a,a,i0,a,g0.5)') __FILE__, " : LINE = ", __LINE__, " total_memory=",totalMem
+
+    !$OMP TARGET TEAMS DISTRIBUTE PARALLEL DO COLLAPSE(2)
+    do jlev = 1,config%pdf_sampler%nfsd
+      do jcol = 1,config%pdf_sampler%ncdf
+        sample_val(jcol,jlev) = config%pdf_sampler%val(jcol,jlev)
+      end do
+    end do
+    !$OMP END TARGET TEAMS DISTRIBUTE PARALLEL DO
+
+    !$OMP TARGET TEAMS DISTRIBUTE PARALLEL DO COLLAPSE(2)
+    do jcol = istartcol,iendcol
+      do jlev = 1, nlev
+        frac(jlev, jcol) = cloud%fraction(jcol,jlev)
+        frac_std(jlev, jcol) = cloud%fractional_std(jcol,jlev)
+      end do
+    end do
+    !$OMP END TARGET TEAMS DISTRIBUTE PARALLEL DO
+
+    !$OMP TARGET TEAMS DISTRIBUTE PARALLEL DO COLLAPSE(2)
+    do jcol = istartcol,iendcol
+      do jlev = 1, nlev-1
+        overlap_param(jlev, jcol) = cloud%overlap_param(jcol,jlev)
+      end do
+    end do
+    !$OMP END TARGET TEAMS DISTRIBUTE PARALLEL DO
+
+    !$OMP TARGET TEAMS DISTRIBUTE PARALLEL DO COLLAPSE(2) PRIVATE(overlap_alpha)
+    do jcol = istartcol,iendcol
+      !Only perform calculation if sun above the horizon
+      !---------------------------------------------------------------------
+      ! manual inline from cum_cloud_cover_exp_ran >>>>>>>>>>>>>>>>>>>>>>>>
+      ! Loop to compute total cloud cover and the cumulative cloud cover
+      ! down to the base of each layer
+      do jlev = 1,nlev-1
+        if (single_level%cos_sza(jcol) > 0.0_jprb ) then
+          ! Convert to "alpha" overlap parameter if necessary
+          if (config%use_beta_overlap) then
+            overlap_alpha = beta2alpha(overlap_param(jlev,jcol), &
+                  &                     frac(jlev,jcol), frac(jlev+1,jcol))
+          else
+            overlap_alpha = overlap_param(jlev,jcol)
+          end if
+          ! Compute the combined cloud cover of layers jlev and jlev+1
+          pair_cloud_cover(jlev, jcol) = overlap_alpha*max(frac(jlev,jcol),frac(jlev+1,jcol)) &
+                &  + (1.0_jprb - overlap_alpha) &
+                &  * (frac(jlev,jcol)+frac(jlev+1,jcol)-frac(jlev,jcol)*frac(jlev+1,jcol))
+        end if
+      end do
+    end do
+    !$OMP END TARGET TEAMS DISTRIBUTE PARALLEL DO
+
+    !$OMP TARGET TEAMS DISTRIBUTE PARALLEL DO
+    do jcol = istartcol,iendcol
+      !Only perform calculation if sun above the horizon
+      if (single_level%cos_sza(jcol) > 0.0_jprb ) then
+        cum_cloud_cover(1, jcol) = frac(1,jcol)
+        cum_product(jcol) = 1.0_jprb - frac(1,jcol)
+        do jlev = 1,nlev-1
+          if (frac(jlev,jcol) >= MaxCloudFrac) then
+            ! Cloud cover has reached one
+            cum_product(jcol) = 0.0_jprb
+          else
+            cum_product(jcol) = cum_product(jcol) * (1.0_jprb - pair_cloud_cover(jlev, jcol)) &
+                  &  / (1.0_jprb - frac(jlev,jcol))
+          end if
+          cum_cloud_cover(jlev+1, jcol) = 1.0_jprb - cum_product(jcol)
+        end do
+        flux%cloud_cover_sw(jcol) = cum_cloud_cover(nlev,jcol);
+        if (flux%cloud_cover_sw(jcol) < config%cloud_fraction_threshold) then
+          ! Treat column as clear sky: calling function therefore will not
+          ! use od_scaling so we don't need to calculate it
+          flux%cloud_cover_sw(jcol) = 0.0_jprb
+        end if
+      end if
+    end do
+    !$OMP END TARGET TEAMS DISTRIBUTE PARALLEL DO
+
+    !$OMP TARGET TEAMS DISTRIBUTE PARALLEL DO
+    do jcol = istartcol,iendcol
+      !Only perform calculation if sun above the horizon
+      if (single_level%cos_sza(jcol) > 0.0_jprb .and. flux%cloud_cover_sw(jcol) >= config%cloud_fraction_threshold) then
+        ! Cloud is present: need to calculate od_scaling
+        ! Find range of cloudy layers
+        ibegin(jcol) = nlev
+        do jlev = 1, nlev
+          if( frac(jlev,jcol) > 0.0_jprb ) then
+            ibegin(jcol) = min(jlev, ibegin(jcol))
+          end if
+        end do
+
+        iend(jcol) = ibegin(jcol)
+        do jlev = ibegin(jcol)+1,nlev
+          if (frac(jlev,jcol) > 0.0_jprb) then
+            iend(jcol) = max(jlev, iend(jcol))
+          end if
+        end do
+      end if
+    end do
+    !$OMP END TARGET TEAMS DISTRIBUTE PARALLEL DO
+
+    !$OMP TARGET TEAMS DISTRIBUTE PARALLEL DO COLLAPSE(2) PRIVATE(jcol, jg) FIRSTPRIVATE(istartcol, iendcol, ng, nlev) &
+    !$OMP& THREAD_LIMIT(512)
+    do jcol = istartcol,iendcol
+       do jg = 1, ng
+          ! Do cloudy-sky calculation
+          call cloud_generator_omp(jg, ng, nlev, &
+               &  single_level%iseed(jcol) + 0, & ! Workaround for nvhpc-24.1
+               &  config%cloud_fraction_threshold, &
+               &  frac(:,jcol), overlap_param(:,jcol), &
+               &  config%cloud_inhom_decorr_scaling, frac_std(:,jcol), &
+               &  config%pdf_sampler%ncdf, config%pdf_sampler%nfsd, &
+               &  config%pdf_sampler%fsd1, config%pdf_sampler%inv_fsd_interval, &
+               &  sample_val, &
+               &  od_scaling(:,:,jcol), flux%cloud_cover_sw(jcol)+0.0_jprb, & ! Workaround for nvhpc-24.1
+               &  ibegin(jcol), iend(jcol), &
+               &  cum_cloud_cover=cum_cloud_cover(:,jcol), &
+               &  pair_cloud_cover=pair_cloud_cover(:,jcol))
+       enddo
+    enddo
+
+    !$OMP TARGET TEAMS DISTRIBUTE PARALLEL DO COLLAPSE(2) PRIVATE(cos_sza, gamma1, gamma2, gamma3, od_cloud_new, od_total, &
+    !$OMP& ssa_total, g_total, scat_od, jcol, jg, jlev) FIRSTPRIVATE(istartcol, iendcol, ng, nlev) THREAD_LIMIT(512)
+    do jcol = istartcol,iendcol
+       do jg = 1, ng
+          ! Only perform calculation if sun above the horizon
+          if (single_level%cos_sza(jcol) > 0.0_jprb) then
+             cos_sza = single_level%cos_sza(jcol)
+
+             ! Clear-sky calculation - first compute clear-sky reflectance,
+             ! transmittance etc at each model level
+             if (.not. config%do_sw_delta_scaling_with_gases) then
+                ! Delta-Eddington scaling has already been performed to the
+                ! aerosol part of od, ssa and g
+                call calc_ref_trans_sw_omp(jg, ng, nlev, &
+                     &  cos_sza, od(:,:,jcol), ssa(:,:,jcol), g(:,:,jcol), &
+                     &  ref_clear(:,:,jcol), trans_clear(:,:,jcol), &
+                     &  ref_dir_clear(:,:,jcol), trans_dir_diff(:,:,jcol), &
+                     &  trans_dir_dir(:,:,jcol))
+             else
+                ! Apply delta-Eddington scaling to the aerosol-gas mixture
+                do jlev = 1,nlev
+                   od_total  =  od(jg,jlev,jcol)
+                   ssa_total = ssa(jg,jlev,jcol)
+                   g_total   =   g(jg,jlev,jcol)
+                   call delta_eddington(od_total, ssa_total, g_total)
+                   call calc_two_stream_gammas_sw_single_band_omp(jg, &
+                        &  cos_sza, ssa_total, g_total, &
+                        &  gamma1, gamma2, gamma3)
+
+                   call calc_reflectance_transmittance_sw_single_band_omp(jg, ng, &
+                        &  cos_sza, od_total, ssa_total, &
+                        &  gamma1, gamma2, gamma3, &
+                        &  ref_clear(:,jlev,jcol), trans_clear(:,jlev,jcol), &
+                        &  ref_dir_clear(:,jlev,jcol), trans_dir_diff(:,jlev,jcol), &
+                        &  trans_dir_dir(:,jlev,jcol) )
+                end do
+             end if
+
+             ! Use adding method to compute fluxes
+             call adding_ica_sw_omp(jg, ng, nlev, incoming_sw(:,jcol), &
+                  &  albedo_diffuse(:,jcol), albedo_direct(:,jcol), cos_sza, &
+                  &  ref_clear(:,:,jcol), trans_clear(:,:,jcol), ref_dir_clear(:,:,jcol), trans_dir_diff(:,:,jcol), &
+                  &  trans_dir_dir(:,:,jcol), flux_up(:,:,jcol), flux_dn_diffuse(:,:,jcol), flux_dn_direct(:,:,jcol), &
+                  &  flux_up(:,:,jcol), flux_dn_diffuse(:,:,jcol), source=tmp_work_source(:,:,jcol))
+
+             ! save temporarily clear-sky broadband fluxes
+             do jlev = 1,nlev+1
+                flux_up_clear(jg,jlev,jcol) = flux_up(jg,jlev,jcol)
+                flux_dn_direct_clear(jg,jlev,jcol) = flux_dn_direct(jg,jlev,jcol)
+                flux_dn_diffuse_clear(jg,jlev,jcol) = flux_dn_diffuse(jg,jlev,jcol)
+             end do
+             
+             ! Store spectral downwelling fluxes at surface
+             flux%sw_dn_diffuse_surf_clear_g(jg,jcol) = flux_dn_diffuse(jg,nlev+1,jcol)
+             flux%sw_dn_direct_surf_clear_g(jg,jcol)  = flux_dn_direct(jg,nlev+1,jcol)
+
+             if (flux%cloud_cover_sw(jcol) >= config%cloud_fraction_threshold) then
+                ! Total-sky calculation
+                do jlev = 1,nlev
+                   ! Compute combined gas+aerosol+cloud optical properties
+                   if (frac(jlev,jcol) >= config%cloud_fraction_threshold) then
+                      od_cloud_new = od_scaling(jg,jlev,jcol) &
+                           &  * od_cloud(config%i_band_from_reordered_g_sw(jg),jlev,jcol)
+                      od_total  = od(jg,jlev,jcol) + od_cloud_new
+                      ssa_total = 0.0_jprb
+                      g_total   = 0.0_jprb
+                         
+                      ! In single precision we need to protect against the
+                      ! case that od_total > 0.0 and ssa_total > 0.0 but
+                      ! od_total*ssa_total == 0 due to underflow
+                      if (od_total > 0.0_jprb) then
+                         scat_od = ssa(jg,jlev,jcol)*od(jg,jlev,jcol) &
+                              &     + ssa_cloud(config%i_band_from_reordered_g_sw(jg),jlev,jcol) &
+                              &     *  od_cloud_new
+                         ssa_total = scat_od / od_total
+                         if (scat_od > 0.0_jprb) then
+                            g_total = (g(jg,jlev,jcol)*ssa(jg,jlev,jcol)*od(jg,jlev,jcol) &
+                                 &     +   g_cloud(config%i_band_from_reordered_g_sw(jg),jlev,jcol) &
+                                 &     * ssa_cloud(config%i_band_from_reordered_g_sw(jg),jlev,jcol) &
+                                 &     *  od_cloud_new) &
+                                 &     / scat_od
+                         end if
+                      end if
+                      ! Apply delta-Eddington scaling to the cloud-aerosol-gas
+                      ! mixture
+                      if (config%do_sw_delta_scaling_with_gases) then
+                         call delta_eddington(od_total, ssa_total, g_total)
+                      end if
+
+                      ! Compute cloudy-sky reflectance, transmittance etc at
+                      ! each model level
+                      call calc_ref_trans_sw_single_level_omp(jg, ng, &
+                           &  cos_sza, od_total, ssa_total, g_total, &
+                           &  reflectance(:,jlev,jcol), transmittance(:,jlev,jcol), &
+                           &  ref_dir(:,jlev,jcol), trans_dir_diff(:,jlev,jcol), &
+                           &  trans_dir_dir(:,jlev,jcol))
+                   else
+                      ! Clear-sky layer: copy over clear-sky values
+                      reflectance(jg,jlev,jcol) = ref_clear(jg,jlev,jcol)
+                      transmittance(jg,jlev,jcol) = trans_clear(jg,jlev,jcol)
+                      ref_dir(jg,jlev,jcol) = ref_dir_clear(jg,jlev,jcol)
+                      !trans_dir_diff(jg,jlev,jcol) = trans_dir_diff_clear(jg,jlev,jcol)
+                      !trans_dir_dir(jg,jlev,jcol) = trans_dir_dir_clear(jg,jlev,jcol)
+                   end if
+                end do
+
+                ! Use adding method to compute fluxes for an overcast sky
+                call adding_ica_sw_omp(jg, ng, nlev, incoming_sw(:,jcol), &
+                     &  albedo_diffuse(:,jcol), albedo_direct(:,jcol), cos_sza, &
+                     &  reflectance(:,:,jcol), transmittance(:,:,jcol), ref_dir(:,:,jcol), trans_dir_diff(:,:,jcol), &
+                     &  trans_dir_dir(:,:,jcol), flux_up(:,:,jcol), flux_dn_diffuse(:,:,jcol), flux_dn_direct(:,:,jcol), &
+                     &  flux_up(:,:,jcol), flux_dn_diffuse(:,:,jcol), source=tmp_work_source(:,:,jcol))
+                
+                ! Likewise for surface spectral fluxes
+                flux%sw_dn_diffuse_surf_g(jg,jcol) = flux_dn_diffuse(jg,nlev+1,jcol)
+                flux%sw_dn_direct_surf_g(jg,jcol)  = flux_dn_direct(jg,nlev+1,jcol)
+                flux%sw_dn_diffuse_surf_g(jg,jcol) = flux%cloud_cover_sw(jcol) *flux%sw_dn_diffuse_surf_g(jg,jcol) &
+                     &     + (1.0_jprb - flux%cloud_cover_sw(jcol))*flux%sw_dn_diffuse_surf_clear_g(jg,jcol)
+                flux%sw_dn_direct_surf_g(jg,jcol) = flux%cloud_cover_sw(jcol) *flux%sw_dn_direct_surf_g(jg,jcol) &
+                     &     + (1.0_jprb - flux%cloud_cover_sw(jcol))*flux%sw_dn_direct_surf_clear_g(jg,jcol)
+                
+             else
+                ! No cloud in profile and clear-sky fluxes already
+                ! calculated: copy them over
+                flux%sw_dn_diffuse_surf_g(jg,jcol) = flux%sw_dn_diffuse_surf_clear_g(jg,jcol)
+                flux%sw_dn_direct_surf_g(jg,jcol)  = flux%sw_dn_direct_surf_clear_g(jg,jcol)
+
+             end if ! Cloud is present in profile
+          else
+             flux%sw_dn_diffuse_surf_g(jg,jcol) = 0.0_jprb
+             flux%sw_dn_direct_surf_g(jg,jcol)  = 0.0_jprb
+             flux%sw_dn_diffuse_surf_clear_g(jg,jcol) = 0.0_jprb
+             flux%sw_dn_direct_surf_clear_g(jg,jcol)  = 0.0_jprb
+          end if ! Sun above horizon
+       end do !loop over spectral bands
+    end do ! Loop over columns
+    !$OMP END TARGET TEAMS DISTRIBUTE PARALLEL DO
+
+    ! Loop through columns
+    !$OMP TARGET TEAMS DISTRIBUTE PARALLEL DO COLLAPSE(2) PRIVATE(cos_sza, sum_dn_diffuse, sum_dn_direct, sum_up)
+#if defined(__amdflang__)
+    !$OMP TILE SIZES(64,1)
+#endif
+    do jcol = istartcol,iendcol
+       do jlev = 1, nlev+1
+
+        ! Only perform calculation if sun above the horizon
+        if (single_level%cos_sza(jcol) > 0.0_jprb) then
+          cos_sza = single_level%cos_sza(jcol)
+
+          ! Sum over g-points to compute and save clear-sky broadband
+          ! fluxes
+          sum_up = 0.0_jprb
+          sum_dn_direct = 0.0_jprb
+          sum_dn_diffuse = 0.0_jprb
+
+          do jg = 1,ng
+             sum_up = sum_up + flux_up_clear(jg,jlev,jcol)
+             sum_dn_direct = sum_dn_direct + flux_dn_direct_clear(jg,jlev,jcol)
+             sum_dn_diffuse = sum_dn_diffuse + flux_dn_diffuse_clear(jg,jlev,jcol)
+          end do
+          flux%sw_up_clear(jcol,jlev) = sum_up
+          if (allocated(flux%sw_dn_direct_clear)) then
+             flux%sw_dn_direct_clear(jcol,jlev) = sum_dn_direct
+          end if
+          flux%sw_dn_clear(jcol,jlev) = sum_dn_diffuse + sum_dn_direct
+          
+          if (flux%cloud_cover_sw(jcol) >= config%cloud_fraction_threshold) then
+             ! Store overcast broadband fluxes
+             sum_up = 0.0_jprb
+             sum_dn_direct = 0.0_jprb
+             sum_dn_diffuse = 0.0_jprb
+             do jg = 1,ng
+                sum_up = sum_up + flux_up(jg,jlev,jcol)
+                sum_dn_direct = sum_dn_direct + flux_dn_direct(jg,jlev,jcol)
+                sum_dn_diffuse = sum_dn_diffuse + flux_dn_diffuse(jg,jlev,jcol)
+             end do
+             flux%sw_up(jcol,jlev) = sum_up
+             if (allocated(flux%sw_dn_direct)) then
+                flux%sw_dn_direct(jcol,jlev) = sum_dn_direct
+             end if
+             flux%sw_dn(jcol,jlev) = sum_dn_diffuse + sum_dn_direct
+             
+          end if
+       end if ! Sun above horizon
+      end do ! Loop over columns
+    end do
+#if defined(__amdflang__)
+    !$OMP END TILE
+#endif
+    !$OMP END TARGET TEAMS DISTRIBUTE PARALLEL DO
+
+    ! Loop through columns
+    !$OMP TARGET TEAMS DISTRIBUTE PARALLEL DO COLLAPSE(2) PRIVATE(cos_sza)
+    do jlev = 1, nlev+1
+      do jcol = istartcol,iendcol
+        ! Only perform calculation if sun above the horizon
+        if (single_level%cos_sza(jcol) > 0.0_jprb) then
+          cos_sza = single_level%cos_sza(jcol)
+
+          if (flux%cloud_cover_sw(jcol) >= config%cloud_fraction_threshold) then
+            ! Cloudy flux profiles currently assume completely overcast
+            ! skies; perform weighted average with clear-sky profile
+            flux%sw_up(jcol,jlev) =  flux%cloud_cover_sw(jcol) *flux%sw_up(jcol,jlev) &
+                &  + (1.0_jprb - flux%cloud_cover_sw(jcol))*flux%sw_up_clear(jcol,jlev)
+            flux%sw_dn(jcol,jlev) =  flux%cloud_cover_sw(jcol) *flux%sw_dn(jcol,jlev) &
+                &  + (1.0_jprb - flux%cloud_cover_sw(jcol))*flux%sw_dn_clear(jcol,jlev)
+            if (allocated(flux%sw_dn_direct)) then
+              flux%sw_dn_direct(jcol,jlev) = flux%cloud_cover_sw(jcol) *flux%sw_dn_direct(jcol,jlev) &
+                  &  + (1.0_jprb - flux%cloud_cover_sw(jcol))*flux%sw_dn_direct_clear(jcol,jlev)
+            end if
+
+          else
+            ! No cloud in profile and clear-sky fluxes already
+            ! calculated: copy them over
+            flux%sw_up(jcol,jlev) = flux%sw_up_clear(jcol,jlev)
+            flux%sw_dn(jcol,jlev) = flux%sw_dn_clear(jcol,jlev)
+            if (allocated(flux%sw_dn_direct)) then
+              flux%sw_dn_direct(jcol,jlev) = flux%sw_dn_direct_clear(jcol,jlev)
+            end if
+
+          end if ! Cloud is present in profile
+        else
+          ! Set fluxes to zero if sun is below the horizon
+          flux%sw_up(jcol,jlev) = 0.0_jprb
+          flux%sw_dn(jcol,jlev) = 0.0_jprb
+          if (allocated(flux%sw_dn_direct)) then
+            flux%sw_dn_direct(jcol,jlev) = 0.0_jprb
+          end if
+          flux%sw_up_clear(jcol,jlev) = 0.0_jprb
+          flux%sw_dn_clear(jcol,jlev) = 0.0_jprb
+          if (allocated(flux%sw_dn_direct_clear)) then
+            flux%sw_dn_direct_clear(jcol,jlev) = 0.0_jprb
+          end if
+        end if ! Sun above horizon
+      end do ! Loop over columns
+    end do ! Loop over levels
+    !$OMP END TARGET TEAMS DISTRIBUTE PARALLEL DO
+
+    !$OMP TARGET EXIT DATA MAP(DELETE: flux_up, flux_dn_diffuse, flux_dn_direct, &
+    !$OMP             flux_up_clear, flux_dn_diffuse_clear, flux_dn_direct_clear, &
+    !$OMP             sample_val, frac, frac_std, overlap_param, &
+    !$OMP             cum_cloud_cover, pair_cloud_cover, cum_product, ibegin, iend)
+
+    !$OMP TARGET EXIT DATA MAP(DELETE: ref_clear,  trans_clear, ref_dir_clear, &
+    !$OMP&   od_scaling, tmp_work_source,&
+    !$OMP&   ref_dir, trans_dir_diff, trans_dir_dir, reflectance, transmittance)
+
+#if defined(__amdflang__)
+    !$OMP END TARGET DATA
+#endif
+
+#ifdef HAVE_ROCTX
+    call roctxEndRange
+#endif
+
+    if (lhook) call dr_hook('radiation_mcica_omp_sw:solver_mcica_omp_sw',1,hook_handle)
+
+  end subroutine solver_mcica_omp_sw
+
+end module radiation_mcica_omp_sw
diff --git a/radiation/radiation_random_numbers.F90 b/radiation/radiation_random_numbers.F90
index 6bd674f2..964b6495 100644
--- a/radiation/radiation_random_numbers.F90
+++ b/radiation/radiation_random_numbers.F90
@@ -45,8 +45,15 @@ module radiation_random_numbers
 
   implicit none
 
+  !$omp declare target(uniform_distribution_omp)
+
+#if defined(__INTEL_COMPILER) || (defined(__NVCOMPILER) && defined(OMPGPU))
+  ! This is needed in order for the intel llvm ifx compiler to build this successfully
+  public
+#else
   public :: rng_type, IRngMinstdVector, IRngNative, initialize_acc, &
-    &  uniform_distribution_acc, IMinstdA0, IMinstdA, IMinstdM
+    &  uniform_distribution_acc, IMinstdA0, IMinstdA, IMinstdM, IMinstdScale, uniform_distribution_omp
+#endif
 
   enum, bind(c)
     enumerator IRngNative, &    ! Built-in Fortran-90 RNG
@@ -343,5 +350,21 @@ function uniform_distribution_acc(istate) result(randnum)
 
   end function uniform_distribution_acc
 
+  !---------------------------------------------------------------------
+  ! Populate vector "randnum" with pseudo-random numbers; if rannum is
+  ! of length greater than nmaxstreams (specified when the generator
+  ! was initialized) then only the first nmaxstreams elements will be
+  ! assigned.
+  subroutine uniform_distribution_omp(istate,randnum)
+
+    integer(kind=jpib), intent(inout) :: istate
+    real(kind=jprb), intent(out)   :: randnum
+
+    ! C++ minstd_rand algorithm
+    istate = mod(IMinstdA * istate, IMinstdM)
+    randnum = IMinstdScale * istate
+
+  end subroutine uniform_distribution_omp
+
 
 end module radiation_random_numbers
diff --git a/radiation/radiation_two_stream.F90 b/radiation/radiation_two_stream.F90
index 69372b52..26bb2d4f 100644
--- a/radiation/radiation_two_stream.F90
+++ b/radiation/radiation_two_stream.F90
@@ -43,6 +43,15 @@ module radiation_two_stream
   ! Uncomment the following to turn Dr Hook on.
 !#define DO_DR_HOOK_TWO_STREAM
 
+  !$omp declare target(calc_two_stream_gammas_sw_single_band_omp)
+  !$omp declare target(calc_reflectance_transmittance_sw_single_band_omp)
+  !$omp declare target(calc_ref_trans_sw_omp)
+  !$omp declare target(calc_ref_trans_sw_single_level_omp)
+
+  !$omp declare target(calc_ref_trans_lw_single_level_omp)
+  !$omp declare target(calc_no_scattering_transmittance_lw_omp)
+  !$omp declare target(calc_no_scattering_transmittance_lw_single_cell_omp)
+
 contains
 
   !---------------------------------------------------------------------
@@ -1078,4 +1087,594 @@ subroutine calc_frac_scattered_diffuse_sw(ng, od, &
 
   end subroutine calc_frac_scattered_diffuse_sw
 
+  !---------------------------------------------------------------------
+  !
+  ! OpenMP routines
+  !
+  !---------------------------------------------------------------------
+
+
+  !---------------------------------------------------------------------
+  ! OpenMP-optimized variant that avoids the local allocation of gamma
+  ! coefficients
+  subroutine calc_ref_trans_lw_single_level_omp(jg, ng, &
+    &    od, ssa, asymmetry, planck, &
+    &    reflectance, transmittance, source_up, source_dn)
+
+    integer, intent(in) :: jg, ng
+
+    ! Optical depth and single scattering albedo
+    real(jprb), intent(in) :: od
+
+    ! Single scattering albedo and asymmetry factor
+    real(jprb), intent(in) :: ssa, asymmetry
+
+    ! The Planck terms (functions of temperature) at the top and
+    ! bottom of the layer
+    real(jprb), intent(in), dimension(ng,2) :: planck
+
+    ! The diffuse reflectance and transmittance, i.e. the fraction of
+    ! diffuse radiation incident on a layer from either top or bottom
+    ! that is reflected back or transmitted through
+    real(jprb), intent(out), dimension(ng) :: reflectance, transmittance
+
+    ! The upward emission at the top of the layer and the downward
+    ! emission at its base, due to emission from within the layer
+    real(jprb), intent(out), dimension(ng) :: source_up, source_dn
+
+    real(jprb) :: gamma1, gamma2
+
+    ! The two transfer coefficients from the two-stream
+    ! differential equations
+    !real(jprb), dimension(ng)                     :: k_exponent, exponential
+    real(jprb) :: reftrans_factor
+    real(jprb) :: exponential2 ! = exp(-2*k_exponent*od)
+    real(jprb) :: coeff, coeff_up_top, coeff_up_bot, coeff_dn_top, coeff_dn_bot, factor
+
+    !
+    ! Ideally, we would use the associate statement below, however this doesn't
+    ! work with NVCOMPILER with OpenMP offload. Thus, we just use the more the array
+    ! that is pointed to :(
+    !
+#if defined(__amdflang__)
+    associate (exponential=>transmittance, k_exponent=>source_up)
+#endif
+
+    factor = (LwDiffusivityWP * 0.5_jprb) * ssa
+    gamma1 = LwDiffusivityWP - factor*(1.0_jprb + asymmetry)
+    gamma2 = factor * (1.0_jprb - asymmetry)
+#if defined(__amdflang__)
+    k_exponent(jg) = sqrt(max((gamma1 - gamma2) * (gamma1 + gamma2), &
+         1.0e-12_jprb)) ! Eq 18 of Meador & Weaver (1980)
+
+    exponential(jg) = exp(-k_exponent(jg)*od)
+#else
+    source_up(jg) = sqrt(max((gamma1 - gamma2) * (gamma1 + gamma2), &
+         1.0e-12_jprb)) ! Eq 18 of Meador & Weaver (1980)
+
+    transmittance(jg) = exp(-source_up(jg)*od)
+#endif
+
+    if (od > 1.0e-3_jprb) then
+#if defined(__amdflang__)
+       exponential2 = exponential(jg)*exponential(jg)
+       reftrans_factor = 1.0 / (k_exponent(jg)  + gamma1 + (k_exponent(jg) - gamma1)*exponential2)
+#else
+       exponential2 = transmittance(jg)*transmittance(jg)
+       reftrans_factor = 1.0 / (source_up(jg)  + gamma1 + (source_up(jg) - gamma1)*exponential2)
+#endif
+       ! Meador & Weaver (1980) Eq. 25
+       reflectance(jg) = gamma2 * (1.0_jprb - exponential2) * reftrans_factor
+       ! Meador & Weaver (1980) Eq. 26
+#if defined(__amdflang__)
+       transmittance(jg) = 2.0_jprb * k_exponent(jg) * exponential(jg) * reftrans_factor
+#else
+       transmittance(jg) = 2.0_jprb * source_up(jg) * transmittance(jg) * reftrans_factor
+#endif
+       ! Compute upward and downward emission assuming the Planck
+       ! function to vary linearly with optical depth within the layer
+       ! (e.g. Wiscombe , JQSRT 1976).
+
+       ! Stackhouse and Stephens (JAS 1991) Eqs 5 & 12
+       coeff = (planck(jg,2)-planck(jg,1)) / (od*(gamma1+gamma2))
+       coeff_up_top  =  coeff + planck(jg,1)
+       coeff_up_bot  =  coeff + planck(jg,2)
+       coeff_dn_top  = -coeff + planck(jg,1)
+       coeff_dn_bot  = -coeff + planck(jg,2)
+       source_up(jg) =  coeff_up_top - reflectance(jg) * coeff_dn_top - transmittance(jg) * coeff_up_bot
+       source_dn(jg) =  coeff_dn_bot - reflectance(jg) * coeff_up_bot - transmittance(jg) * coeff_dn_top
+    else if (od < 1.0e-8_jprb) then
+       source_up(jg) = 0.0_jprb
+       source_dn(jg) = 0.0_jprb
+    else
+      reflectance(jg) = gamma2 * od
+#if defined(__amdflang__)
+      transmittance(jg) = (1.0_jprb - k_exponent(jg)*od) / (1.0_jprb + od*(gamma1-k_exponent(jg)))
+#else
+      transmittance(jg) = (1.0_jprb - source_up(jg)*od) / (1.0_jprb + od*(gamma1-source_up(jg)))
+#endif
+       source_up(jg) = (1.0_jprb - reflectance(jg) - transmittance(jg)) &
+            &       * 0.5 * (planck(jg,1) + planck(jg,2))
+       source_dn(jg) = source_up(jg)
+    end if
+
+#if defined(__amdflang__)
+    end associate
+#endif
+
+  end subroutine calc_ref_trans_lw_single_level_omp
+
+  !---------------------------------------------------------------------
+  ! Compute the longwave transmittance to diffuse radiation in the
+  ! no-scattering case, as well as the upward flux at the top and the
+  ! downward flux at the base of the layer due to emission from within
+  ! the layer assuming a linear variation of Planck function within
+  ! the layer.
+  subroutine calc_no_scattering_transmittance_lw_omp(jg, ng, nlev, &
+       &    od, planck, transmittance, source_up, source_dn)
+
+    integer, intent(in) :: jg, ng, nlev
+
+    ! Optical depth and single scattering albedo
+    real(jprb), intent(in), dimension(ng,nlev) :: od
+
+    ! The Planck terms (functions of temperature) at the top and
+    ! bottom of the layer
+    real(jprb), intent(in), dimension(ng,nlev+1) :: planck
+
+    ! The diffuse transmittance, i.e. the fraction of diffuse
+    ! radiation incident on a layer from either top or bottom that is
+    ! reflected back or transmitted through
+    real(jprb), intent(out), dimension(ng,nlev) :: transmittance
+
+    ! The upward emission at the top of the layer and the downward
+    ! emission at its base, due to emission from within the layer
+    real(jprb), intent(out), dimension(ng,nlev) :: source_up, source_dn
+
+    real(jprb) :: coeff, coeff_up_top, coeff_up_bot, coeff_dn_top, coeff_dn_bot !, planck_mean
+
+    integer :: jlev
+
+    do jlev = 1, nlev
+       ! Compute upward and downward emission assuming the Planck
+       ! function to vary linearly with optical depth within the layer
+       ! (e.g. Wiscombe , JQSRT 1976).
+       coeff = LwDiffusivityWP*od(jg,jlev)
+!#ifdef DWD_TWO_STREAM_OPTIMIZATIONS
+       transmittance(jg,jlev) = exp(-coeff)
+       !#endif
+       if (od(jg,jlev) > 1.0e-3_jprb) then
+          ! Simplified from calc_reflectance_transmittance_lw above
+          coeff = (planck(jg,jlev+1)-planck(jg,jlev)) / coeff
+          coeff_up_top  =  coeff + planck(jg,jlev)
+          coeff_up_bot  =  coeff + planck(jg,jlev+1)
+          coeff_dn_top  = -coeff + planck(jg,jlev)
+          coeff_dn_bot  = -coeff + planck(jg,jlev+1)
+          source_up(jg,jlev) =  coeff_up_top - transmittance(jg,jlev) * coeff_up_bot
+          source_dn(jg,jlev) =  coeff_dn_bot - transmittance(jg,jlev) * coeff_dn_top
+       else
+          ! Linear limit at low optical depth
+          source_up(jg,jlev) = coeff * 0.5_jprb * (planck(jg,jlev)+planck(jg,jlev+1))
+          source_dn(jg,jlev) = source_up(jg,jlev)
+       end if
+    end do
+
+  end subroutine calc_no_scattering_transmittance_lw_omp
+
+  !---------------------------------------------------------------------
+  ! Compute the longwave transmittance to diffuse radiation in the
+  ! no-scattering case, as well as the upward flux at the top and the
+  ! downward flux at the base of the layer due to emission from within
+  ! the layer assuming a linear variation of Planck function within
+  ! the layer.
+  subroutine calc_no_scattering_transmittance_lw_single_cell_omp( &
+       &    od, planck_top, planck_bot, transmittance, source_up, source_dn)
+
+    ! Optical depth and single scattering albedo as scalar
+    real(jprb), intent(in) :: od
+
+    ! The Planck terms (functions of temperature) at the top and
+    ! bottom of the layer
+    real(jprb), intent(in) :: planck_top, planck_bot
+
+    ! The diffuse transmittance, i.e. the fraction of diffuse
+    ! radiation incident on a layer from either top or bottom that is
+    ! reflected back or transmitted through
+    real(jprb), intent(out) :: transmittance
+
+    ! The upward emission at the top of the layer and the downward
+    ! emission at its base, due to emission from within the layer
+    real(jprb), intent(out) :: source_up, source_dn
+
+    real(jprb) :: coeff, coeff_up_top, coeff_up_bot, coeff_dn_top, coeff_dn_bot !, planck_mean
+
+    ! Compute upward and downward emission assuming the Planck
+    ! function to vary linearly with optical depth within the layer
+    ! (e.g. Wiscombe , JQSRT 1976).
+    coeff = LwDiffusivityWP*od
+    transmittance = exp(-coeff)
+    if (od > 1.0e-3_jprb) then
+       ! Simplified from calc_reflectance_transmittance_lw above
+       coeff = (planck_bot-planck_top) / coeff
+       coeff_up_top  =  coeff + planck_top
+       coeff_up_bot  =  coeff + planck_bot
+       coeff_dn_top  = -coeff + planck_top
+       coeff_dn_bot  = -coeff + planck_bot
+       source_up =  coeff_up_top - transmittance * coeff_up_bot
+       source_dn =  coeff_dn_bot - transmittance * coeff_dn_top
+    else
+       ! Linear limit at low optical depth
+       source_up = coeff * 0.5_jprb * (planck_top+planck_bot)
+       source_dn = source_up
+    end if
+
+  end subroutine calc_no_scattering_transmittance_lw_single_cell_omp
+
+  !---------------------------------------------------------------------
+  ! Calculate the two-stream coefficients gamma1-gamma4 in the
+  ! shortwave
+  subroutine calc_two_stream_gammas_sw_single_band_omp(jg, mu0, ssa, g, &
+       &                                               gamma1, gamma2, gamma3)
+
+    integer, intent(in) :: jg
+    ! Cosine of solar zenith angle, single scattering albedo and
+    ! asymmetry factor:
+    real(jprb), intent(in)  :: mu0
+    real(jprb), intent(in)  :: ssa, g
+    real(jprb), intent(out) :: gamma1, gamma2, gamma3
+
+    real(jprb) :: factor
+
+    ! Zdunkowski "PIFM" (Zdunkowski et al., 1980; Contributions to
+    ! Atmospheric Physics 53, 147-66)
+! Added for DWD (2020)
+!NEC$ shortloop
+    !      gamma1 = 2.0_jprb  - ssa * (1.25_jprb + 0.75_jprb*g)
+    !      gamma2 = 0.75_jprb *(ssa * (1.0_jprb - g))
+    !      gamma3 = 0.5_jprb  - (0.75_jprb*mu0)*g
+    ! Optimized version:
+    factor = 0.75_jprb*g
+    gamma1 = 2.0_jprb  - ssa * (1.25_jprb + factor)
+    gamma2 = ssa * (0.75_jprb - factor)
+    gamma3 = 0.5_jprb  - mu0*factor
+
+  end subroutine calc_two_stream_gammas_sw_single_band_omp
+
+  !---------------------------------------------------------------------
+  ! Compute the shortwave reflectance and transmittance to diffuse
+  ! radiation using the Meador & Weaver formulas, as well as the
+  ! "direct" reflection and transmission, which really means the rate
+  ! of transfer of direct solar radiation (into a plane perpendicular
+  ! to the direct beam) into diffuse upward and downward streams at
+  ! the top and bottom of the layer, respectively.  Finally,
+  ! trans_dir_dir is the transmittance of the atmosphere to direct
+  ! radiation with no scattering.
+  subroutine calc_reflectance_transmittance_sw_single_band_omp(jg, ng, mu0, od, ssa, &
+       &      gamma1, gamma2, gamma3, ref_diff, trans_diff, &
+       &      ref_dir, trans_dir_diff, trans_dir_dir)
+
+    integer, intent(in) :: jg, ng
+
+    ! Cosine of solar zenith angle
+    real(jprb), intent(in) :: mu0
+
+    ! Optical depth and single scattering albedo
+    real(jprb), intent(in) :: od, ssa
+
+    ! The three transfer coefficients from the two-stream
+    ! differentiatial equations (computed by calc_two_stream_gammas)
+    real(jprb), intent(in) :: gamma1, gamma2, gamma3
+
+    ! The direct reflectance and transmittance, i.e. the fraction of
+    ! incoming direct solar radiation incident at the top of a layer
+    ! that is either reflected back (ref_dir) or scattered but
+    ! transmitted through the layer to the base (trans_dir_diff)
+    real(jprb), intent(out), dimension(ng) :: ref_dir, trans_dir_diff
+
+    ! The diffuse reflectance and transmittance, i.e. the fraction of
+    ! diffuse radiation incident on a layer from either top or bottom
+    ! that is reflected back or transmitted through
+    real(jprb), intent(out), dimension(ng) :: ref_diff, trans_diff
+
+    ! Transmittance of the direct been with no scattering
+    real(jprb), intent(out), dimension(ng) :: trans_dir_dir
+
+    real(jprd) :: gamma4, alpha1, alpha2, k_exponent, reftrans_factor
+    real(jprb) :: exponential0 ! = exp(-od/mu0)
+    real(jprd) :: exponential  ! = exp(-k_exponent*od)
+    real(jprd) :: exponential2 ! = exp(-2*k_exponent*od)
+    real(jprd) :: k_mu0, k_gamma3, k_gamma4
+    real(jprd) :: k_2_exponential, od_over_mu0
+
+    ! Local value of cosine of solar zenith angle, in case it needs to be
+    ! tweaked to avoid near division by zero. This is intentionally in working
+    ! precision (jprb) rather than fixing at double precision (jprd).
+    real(jprb) :: mu0_local
+
+! Added for DWD (2020)
+!NEC$ shortloop
+
+    gamma4 = 1.0_jprd - gamma3
+    alpha1 = gamma1*gamma4     + gamma2*gamma3 ! Eq. 16
+    alpha2 = gamma1*gamma3 + gamma2*gamma4    ! Eq. 17
+
+    k_exponent = sqrt(max((gamma1 - gamma2) * (gamma1 + gamma2), &
+         &       1.0e-12_jprd)) ! Eq 18
+
+    ! We had a rare crash where k*mu0 was within around 1e-13 of 1,
+    ! leading to ref_dir and trans_dir_diff being well outside the range
+    ! 0-1. The following approach is appropriate when k_exponent is double
+    ! precision and mu0_local is single precision, although work is needed
+    ! to make this entire routine secure in single precision.
+    mu0_local = mu0
+    if (abs(1.0_jprd - k_exponent*mu0) < 1000.0_jprd * epsilon(1.0_jprd)) then
+       mu0_local = mu0 * (1.0_jprb + SIGN(1._jprd,k_exponent*mu0-1._jprd)*10.0_jprb*epsilon(1.0_jprb))
+    end if
+
+    od_over_mu0 = max(od / mu0_local, 0.0_jprd)
+
+    ! Note that if the minimum value is reduced (e.g. to 1.0e-24)
+    ! then noise starts to appear as a function of solar zenith
+    ! angle
+    k_mu0 = k_exponent*mu0_local
+    k_gamma3 = k_exponent*gamma3
+    k_gamma4 = k_exponent*gamma4
+    ! Check for mu0 <= 0!
+    exponential0 = exp(-od_over_mu0)
+    trans_dir_dir(jg) = exponential0
+    exponential = exp(-k_exponent*od)
+
+    exponential2 = exponential*exponential
+    k_2_exponential = 2.0_jprd * k_exponent * exponential
+
+    reftrans_factor = 1.0_jprd / (k_exponent + gamma1 + (k_exponent - gamma1)*exponential2)
+
+    ! Meador & Weaver (1980) Eq. 25
+    ref_diff(jg) = gamma2 * (1.0_jprd - exponential2) * reftrans_factor
+
+    ! Meador & Weaver (1980) Eq. 26
+    trans_diff(jg) = k_2_exponential * reftrans_factor
+
+    ! Here we need mu0 even though it wasn't in Meador and Weaver
+    ! because we are assuming the incoming direct flux is defined
+    ! to be the flux into a plane perpendicular to the direction of
+    ! the sun, not into a horizontal plane
+    reftrans_factor = mu0_local * ssa * reftrans_factor / (1.0_jprd - k_mu0*k_mu0)
+
+    ! Meador & Weaver (1980) Eq. 14, multiplying top & bottom by
+    ! exp(-k_exponent*od) in case of very high optical depths
+    ref_dir(jg) = reftrans_factor &
+         &  * ( (1.0_jprd - k_mu0) * (alpha2 + k_gamma3) &
+         &     -(1.0_jprd + k_mu0) * (alpha2 - k_gamma3)*exponential2 &
+         &     -k_2_exponential*(gamma3 - alpha2*mu0_local)*exponential0)
+
+    ! Meador & Weaver (1980) Eq. 15, multiplying top & bottom by
+    ! exp(-k_exponent*od), minus the 1*exp(-od/mu0) term representing direct
+    ! unscattered transmittance.
+    trans_dir_diff(jg) = reftrans_factor * ( k_2_exponential*(gamma4 + alpha1*mu0_local) &
+         & - exponential0 &
+         & * ( (1.0_jprd + k_mu0) * (alpha1 + k_gamma4) &
+         &    -(1.0_jprd - k_mu0) * (alpha1 - k_gamma4) * exponential2) )
+
+    ! Final check that ref_dir + trans_dir_diff <= 1
+    ref_dir(jg) = max(0.0_jprb, min(ref_dir(jg), 1.0_jprb))
+    trans_dir_diff(jg) = max(0.0_jprb, min(trans_dir_diff(jg), 1.0_jprb-ref_dir(jg)))
+
+  end subroutine calc_reflectance_transmittance_sw_single_band_omp
+
+  !---------------------------------------------------------------------
+  ! OpenMP-optimized variant that avoids the local allocation of gamma
+  ! coefficients
+  subroutine calc_ref_trans_sw_omp(jg, ng, nlev, mu0, od, ssa, &
+    &      asymmetry, ref_diff, trans_diff, &
+    &      ref_dir, trans_dir_diff, trans_dir_dir)
+
+    implicit none
+
+    integer, intent(in) :: jg, ng, nlev
+
+    ! Cosine of solar zenith angle
+    real(jprb), intent(in) :: mu0
+
+    ! Optical depth and single scattering albedo
+    real(jprb), intent(in), dimension(ng, nlev) :: od, ssa, asymmetry
+
+    ! The direct reflectance and transmittance, i.e. the fraction of
+    ! incoming direct solar radiation incident at the top of a layer
+    ! that is either reflected back (ref_dir) or scattered but
+    ! transmitted through the layer to the base (trans_dir_diff)
+    real(jprb), intent(out), dimension(ng, nlev) :: ref_dir, trans_dir_diff
+
+    ! The diffuse reflectance and transmittance, i.e. the fraction of
+    ! diffuse radiation incident on a layer from either top or bottom
+    ! that is reflected back or transmitted through
+    real(jprb), intent(out), dimension(ng, nlev) :: ref_diff, trans_diff
+
+    ! Transmittance of the direct been with no scattering
+    real(jprb), intent(out), dimension(ng, nlev) :: trans_dir_dir
+
+    real(jprb) :: gamma1, gamma2, gamma3, gamma4
+    real(jprb) :: alpha1, alpha2, k_exponent
+    real(jprb) :: exponential ! = exp(-k_exponent*od)
+
+    real(jprb) :: reftrans_factor, factor
+    real(jprb) :: exponential2 ! = exp(-2*k_exponent*od)
+    real(jprb) :: k_mu0, k_gamma3, k_gamma4
+    real(jprb) :: k_2_exponential, one_minus_kmu0_sqr
+    integer    :: jlev
+
+    ! GPU-capable and vector-optimized version for ICON
+    do jlev = 1, nlev
+
+      trans_dir_dir(jg,jlev) = max(-max(od(jg,jlev) * (1.0_jprb/mu0),0.0_jprb),-1000.0_jprb)
+      trans_dir_dir(jg,jlev) = exp(trans_dir_dir(jg,jlev))
+
+      ! Zdunkowski "PIFM" (Zdunkowski et al., 1980; Contributions to
+      ! Atmospheric Physics 53, 147-66)
+      factor = 0.75_jprb*asymmetry(jg,jlev)
+
+      gamma1 = 2.0_jprb  - ssa(jg,jlev) * (1.25_jprb + factor)
+      gamma2 = ssa(jg,jlev) * (0.75_jprb - factor)
+      gamma3 = 0.5_jprb  - mu0*factor
+      gamma4 = 1.0_jprb - gamma3
+
+      alpha1 = gamma1*gamma4 + gamma2*gamma3 ! Eq. 16
+      alpha2 = gamma1*gamma3 + gamma2*gamma4 ! Eq. 17
+#ifdef PARKIND1_SINGLE
+      k_exponent = sqrt(max((gamma1 - gamma2) * (gamma1 + gamma2), 1.0e-6_jprb))  ! Eq 18
+#else
+      k_exponent = sqrt(max((gamma1 - gamma2) * (gamma1 + gamma2), 1.0e-12_jprb)) ! Eq 18
+#endif
+
+      exponential = exp(-k_exponent*od(jg,jlev))
+
+      k_mu0 = k_exponent*mu0
+      one_minus_kmu0_sqr = 1.0_jprb - k_mu0*k_mu0
+      k_gamma3 = k_exponent*gamma3
+      k_gamma4 = k_exponent*gamma4
+      exponential2 = exponential*exponential
+      k_2_exponential = 2.0_jprb * k_exponent * exponential
+      reftrans_factor = 1.0_jprb / (k_exponent + gamma1 + (k_exponent - gamma1)*exponential2)
+
+      ! Meador & Weaver (1980) Eq. 25
+      ref_diff(jg,jlev) = gamma2 * (1.0_jprb - exponential2) * reftrans_factor
+
+      ! Meador & Weaver (1980) Eq. 26
+      trans_diff(jg,jlev) = k_2_exponential * reftrans_factor
+
+      ! Here we need mu0 even though it wasn't in Meador and Weaver
+      ! because we are assuming the incoming direct flux is defined to
+      ! be the flux into a plane perpendicular to the direction of the
+      ! sun, not into a horizontal plane
+      reftrans_factor = mu0 * ssa(jg,jlev) * reftrans_factor &
+           &  / merge(one_minus_kmu0_sqr, epsilon(1.0_jprb), abs(one_minus_kmu0_sqr) > epsilon(1.0_jprb))
+
+      ! Meador & Weaver (1980) Eq. 14, multiplying top & bottom by
+      ! exp(-k_exponent*od) in case of very high optical depths
+      ref_dir(jg,jlev) = reftrans_factor &
+           &  * ( (1.0_jprb - k_mu0) * (alpha2 + k_gamma3) &
+           &     -(1.0_jprb + k_mu0) * (alpha2 - k_gamma3)*exponential2 &
+           &     -k_2_exponential*(gamma3 - alpha2*mu0)*trans_dir_dir(jg,jlev) )
+
+      ! Meador & Weaver (1980) Eq. 15, multiplying top & bottom by
+      ! exp(-k_exponent*od), minus the 1*exp(-od/mu0) term
+      ! representing direct unscattered transmittance.
+      trans_dir_diff(jg,jlev) = reftrans_factor * ( k_2_exponential*(gamma4 + alpha1*mu0) &
+           & - trans_dir_dir(jg,jlev) &
+           & * ( (1.0_jprb + k_mu0) * (alpha1 + k_gamma4) &
+           &    -(1.0_jprb - k_mu0) * (alpha1 - k_gamma4) * exponential2) )
+
+      ! Final check that ref_dir + trans_dir_diff <= 1
+      ref_dir(jg,jlev)        = max(0.0_jprb, min(ref_dir(jg,jlev), mu0*(1.0_jprb-trans_dir_dir(jg,jlev))))
+      trans_dir_diff(jg,jlev) = max(0.0_jprb, min(trans_dir_diff(jg,jlev), mu0*(1.0_jprb-trans_dir_dir(jg,jlev))-ref_dir(jg,jlev)))
+
+    end do
+
+  end subroutine calc_ref_trans_sw_omp
+
+  !---------------------------------------------------------------------
+  ! OpenMP-optimized variant that avoids the local allocation of gamma
+  ! coefficients
+  subroutine calc_ref_trans_sw_single_level_omp(jg, ng, mu0, od, ssa, &
+    &      asymmetry, ref_diff, trans_diff, &
+    &      ref_dir, trans_dir_diff, trans_dir_dir)
+
+    implicit none
+
+    integer, intent(in) :: jg, ng
+
+    ! Cosine of solar zenith angle
+    real(jprb), intent(in) :: mu0
+
+    ! Optical depth and single scattering albedo
+    real(jprb), intent(in) :: od, ssa, asymmetry
+
+    ! The direct reflectance and transmittance, i.e. the fraction of
+    ! incoming direct solar radiation incident at the top of a layer
+    ! that is either reflected back (ref_dir) or scattered but
+    ! transmitted through the layer to the base (trans_dir_diff)
+    real(jprb), intent(out), dimension(ng) :: ref_dir, trans_dir_diff
+
+    ! The diffuse reflectance and transmittance, i.e. the fraction of
+    ! diffuse radiation incident on a layer from either top or bottom
+    ! that is reflected back or transmitted through
+    real(jprb), intent(out), dimension(ng) :: ref_diff, trans_diff
+
+    ! Transmittance of the direct been with no scattering
+    real(jprb), intent(out), dimension(ng) :: trans_dir_dir
+
+    real(jprb) :: gamma1, gamma2, gamma3, gamma4
+    real(jprb) :: alpha1, alpha2, k_exponent
+    real(jprb) :: exponential ! = exp(-k_exponent*od)
+
+    real(jprb) :: reftrans_factor, factor
+    real(jprb) :: exponential2 ! = exp(-2*k_exponent*od)
+    real(jprb) :: k_mu0, k_gamma3, k_gamma4
+    real(jprb) :: k_2_exponential, one_minus_kmu0_sqr
+
+    ! GPU-capable and vector-optimized version for ICON
+    trans_dir_dir(jg) = max(-max(od * (1.0_jprb/mu0),0.0_jprb),-1000.0_jprb)
+    trans_dir_dir(jg) = exp(trans_dir_dir(jg))
+
+    ! Zdunkowski "PIFM" (Zdunkowski et al., 1980; Contributions to
+    ! Atmospheric Physics 53, 147-66)
+    factor = 0.75_jprb*asymmetry
+
+    gamma1 = 2.0_jprb  - ssa * (1.25_jprb + factor)
+    gamma2 = ssa * (0.75_jprb - factor)
+    gamma3 = 0.5_jprb  - mu0*factor
+    gamma4 = 1.0_jprb - gamma3
+
+    alpha1 = gamma1*gamma4 + gamma2*gamma3 ! Eq. 16
+    alpha2 = gamma1*gamma3 + gamma2*gamma4 ! Eq. 17
+#ifdef PARKIND1_SINGLE
+    k_exponent = sqrt(max((gamma1 - gamma2) * (gamma1 + gamma2), 1.0e-6_jprb))  ! Eq 18
+#else
+    k_exponent = sqrt(max((gamma1 - gamma2) * (gamma1 + gamma2), 1.0e-12_jprb)) ! Eq 18
+#endif
+
+    exponential = exp(-k_exponent*od)
+
+    k_mu0 = k_exponent*mu0
+    one_minus_kmu0_sqr = 1.0_jprb - k_mu0*k_mu0
+    k_gamma3 = k_exponent*gamma3
+    k_gamma4 = k_exponent*gamma4
+    exponential2 = exponential*exponential
+    k_2_exponential = 2.0_jprb * k_exponent * exponential
+    reftrans_factor = 1.0_jprb / (k_exponent + gamma1 + (k_exponent - gamma1)*exponential2)
+
+    ! Meador & Weaver (1980) Eq. 25
+    ref_diff(jg) = gamma2 * (1.0_jprb - exponential2) * reftrans_factor
+
+    ! Meador & Weaver (1980) Eq. 26
+    trans_diff(jg) = k_2_exponential * reftrans_factor
+
+    ! Here we need mu0 even though it wasn't in Meador and Weaver
+    ! because we are assuming the incoming direct flux is defined to
+    ! be the flux into a plane perpendicular to the direction of the
+    ! sun, not into a horizontal plane
+    reftrans_factor = mu0 * ssa * reftrans_factor &
+         &  / merge(one_minus_kmu0_sqr, epsilon(1.0_jprb), abs(one_minus_kmu0_sqr) > epsilon(1.0_jprb))
+
+    ! Meador & Weaver (1980) Eq. 14, multiplying top & bottom by
+    ! exp(-k_exponent*od) in case of very high optical depths
+    ref_dir(jg) = reftrans_factor &
+         &  * ( (1.0_jprb - k_mu0) * (alpha2 + k_gamma3) &
+         &     -(1.0_jprb + k_mu0) * (alpha2 - k_gamma3)*exponential2 &
+         &     -k_2_exponential*(gamma3 - alpha2*mu0)*trans_dir_dir(jg) )
+
+    ! Meador & Weaver (1980) Eq. 15, multiplying top & bottom by
+    ! exp(-k_exponent*od), minus the 1*exp(-od/mu0) term
+    ! representing direct unscattered transmittance.
+    trans_dir_diff(jg) = reftrans_factor * ( k_2_exponential*(gamma4 + alpha1*mu0) &
+         & - trans_dir_dir(jg) &
+         & * ( (1.0_jprb + k_mu0) * (alpha1 + k_gamma4) &
+         &    -(1.0_jprb - k_mu0) * (alpha1 - k_gamma4) * exponential2) )
+
+    ! Final check that ref_dir + trans_dir_diff <= 1
+    ref_dir(jg)        = max(0.0_jprb, min(ref_dir(jg), mu0*(1.0_jprb-trans_dir_dir(jg))))
+    trans_dir_diff(jg) = max(0.0_jprb, min(trans_dir_diff(jg), mu0*(1.0_jprb-trans_dir_dir(jg))-ref_dir(jg)))
+
+  end subroutine calc_ref_trans_sw_single_level_omp
+
 end module radiation_two_stream