diff --git a/ifsrrtm/srtm_taumol16.F90 b/ifsrrtm/srtm_taumol16.F90 index 0f988bbc..83b28539 100644 --- a/ifsrrtm/srtm_taumol16.F90 +++ b/ifsrrtm/srtm_taumol16.F90 @@ -210,6 +210,35 @@ SUBROUTINE SRTM_TAUMOL16 & !$OMP END TARGET TEAMS DISTRIBUTE PARALLEL DO ENDDO +#if defined(OMPGPU) + !$OMP TARGET TEAMS DISTRIBUTE PARALLEL DO PRIVATE(ind0, ind1, z_tauray) + DO iplon = KIDIA, KFDIA + DO i_lay = laytrop_max+1, i_nlayers + IF (k_jp(iplon,i_lay-1) < layreffr & + & .AND. k_jp(iplon,i_lay) >= layreffr) i_laysolfr(iplon) = i_lay + ENDDO + ENDDO + !$OMP END TARGET TEAMS DISTRIBUTE PARALLEL DO + !$OMP TARGET TEAMS DISTRIBUTE PARALLEL DO COLLAPSE(2) PRIVATE(ind0, ind1, z_tauray) + DO i_lay = laytrop_max+1, i_nlayers + DO iplon = KIDIA, KFDIA + ind0 = ((k_jp(iplon,i_lay)-13)*5+(k_jt(iplon,i_lay)-1))*nspb(16) + 1 + ind1 = ((k_jp(iplon,i_lay)-12)*5+(k_jt1(iplon,i_lay)-1))*nspb(16)+ 1 + z_tauray = p_colmol(iplon,i_lay) * rayl +!$NEC unroll(NG16) + DO ig = 1, ng16 + p_taug(iplon,i_lay,ig) = p_colch4(iplon,i_lay) * & + & (p_fac00(iplon,i_lay) * absb(ind0 ,ig) + & + & p_fac10(iplon,i_lay) * absb(ind0+1,ig) + & + & p_fac01(iplon,i_lay) * absb(ind1 ,ig) + & + & p_fac11(iplon,i_lay) * absb(ind1+1,ig)) + IF (i_lay == i_laysolfr(iplon)) p_sfluxzen(iplon,ig) = sfluxrefc(ig) + p_taur(iplon,i_lay,ig) = z_tauray + ENDDO + ENDDO + ENDDO + !$OMP END TARGET TEAMS DISTRIBUTE PARALLEL DO +#else !$ACC LOOP SEQ DO i_lay = llaytrop_max+1, i_nlayers !$OMP TARGET TEAMS DISTRIBUTE PARALLEL DO PRIVATE(ind0, ind1, z_tauray) @@ -234,6 +263,7 @@ SUBROUTINE SRTM_TAUMOL16 & ENDDO !$OMP END TARGET TEAMS DISTRIBUTE PARALLEL DO ENDDO +#endif !$ACC END PARALLEL !$ACC WAIT diff --git a/ifsrrtm/srtm_taumol17.F90 b/ifsrrtm/srtm_taumol17.F90 index 70a08fad..6c6e5582 100644 --- a/ifsrrtm/srtm_taumol17.F90 +++ b/ifsrrtm/srtm_taumol17.F90 @@ -224,6 +224,54 @@ SUBROUTINE SRTM_TAUMOL17 & !$OMP END TARGET TEAMS DISTRIBUTE PARALLEL DO ENDDO +#if defined(OMPGPU) + !$OMP TARGET TEAMS DISTRIBUTE PARALLEL DO + DO iplon = KIDIA, KFDIA + DO i_lay = laytrop_max+1, i_nlayers + IF (k_jp(iplon,i_lay-1) < layreffr & + & .AND. k_jp(iplon,i_lay) >= layreffr) i_laysolfr(iplon) = i_lay + ENDDO + ENDDO + !$OMP END TARGET TEAMS DISTRIBUTE PARALLEL DO + !$OMP TARGET TEAMS DISTRIBUTE PARALLEL DO COLLAPSE(2) PRIVATE(ind0, ind1, indf, js, z_fs, z_speccomb, z_specmult, z_specparm, z_tauray) + DO i_lay = laytrop_max+1, i_nlayers + DO iplon = KIDIA, KFDIA + z_speccomb = p_colh2o(iplon,i_lay) + strrat*p_colco2(iplon,i_lay) + z_specparm = p_colh2o(iplon,i_lay)/z_speccomb + z_specparm = MIN(p_oneminus(iplon),z_specparm) + z_specmult = 4._JPRB*(z_specparm) + js = 1 + INT(z_specmult) + z_fs = z_specmult - AINT(z_specmult) + ind0 = ((k_jp(iplon,i_lay)-13)*5+(k_jt(iplon,i_lay)-1))*nspb(17)+ js + ind1 = ((k_jp(iplon,i_lay)-12)*5+(k_jt1(iplon,i_lay)-1))*nspb(17)+js + indf = k_indfor(iplon,i_lay) + z_tauray = p_colmol(iplon,i_lay) * rayl + +!$NEC unroll(NG17) + DO ig = 1, ng17 + p_taug(iplon,i_lay,ig) = z_speccomb * & + & ( & + & (1._JPRB- z_fs) * ( absb(ind0,ig) * p_fac00(iplon,i_lay) + & + & absb(ind0+5,ig) * p_fac10(iplon,i_lay) + & + & absb(ind1,ig) * p_fac01(iplon,i_lay) + & + & absb(ind1+5,ig) * p_fac11(iplon,i_lay))+ & + & z_fs * ( absb(ind0+1,ig) * p_fac00(iplon,i_lay) + & + & absb(ind0+6,ig) * p_fac10(iplon,i_lay) + & + & absb(ind1+1,ig) * p_fac01(iplon,i_lay) + & + & absb(ind1+6,ig) * p_fac11(iplon,i_lay) ) & + & ) + & + & p_colh2o(iplon,i_lay) * & + & p_forfac(iplon,i_lay) * (forrefc(indf,ig) + & + & p_forfrac(iplon,i_lay) * & + & (forrefc(indf+1,ig) - forrefc(indf,ig))) + IF (i_lay == i_laysolfr(iplon)) p_sfluxzen(iplon,ig) = sfluxrefc(ig,js) & + & + z_fs * (sfluxrefc(ig,js+1) - sfluxrefc(ig,js)) + p_taur(iplon,i_lay,ig) = z_tauray + ENDDO + ENDDO + ENDDO + !$OMP END TARGET TEAMS DISTRIBUTE PARALLEL DO +#else !$ACC LOOP SEQ DO i_lay = llaytrop_max+1, i_nlayers !$OMP TARGET TEAMS DISTRIBUTE PARALLEL DO PRIVATE(ind0, ind1, indf, js, z_fs, z_speccomb, z_specmult, z_specparm, z_tauray) @@ -267,6 +315,7 @@ SUBROUTINE SRTM_TAUMOL17 & ENDDO !$OMP END TARGET TEAMS DISTRIBUTE PARALLEL DO ENDDO +#endif !$ACC END PARALLEL !$ACC WAIT diff --git a/ifsrrtm/srtm_taumol18.F90 b/ifsrrtm/srtm_taumol18.F90 index 47993408..44012972 100644 --- a/ifsrrtm/srtm_taumol18.F90 +++ b/ifsrrtm/srtm_taumol18.F90 @@ -96,7 +96,61 @@ SUBROUTINE SRTM_TAUMOL18 & ENDDO !$OMP END TARGET TEAMS DISTRIBUTE PARALLEL DO +#if defined(OMPGPU) + !$OMP TARGET TEAMS DISTRIBUTE PARALLEL DO + DO iplon = KIDIA, KFDIA + DO i_lay = 1, laytrop_min + IF (k_jp(iplon,i_lay) < layreffr & + & .AND. k_jp(iplon,i_lay+1) >= layreffr) & + & i_laysolfr(iplon) = MIN(i_lay+1,k_laytrop(iplon)) + ENDDO + ENDDO + !$OMP END TARGET TEAMS DISTRIBUTE PARALLEL DO + !$OMP TARGET TEAMS DISTRIBUTE PARALLEL DO COLLAPSE(2) PRIVATE(ind0, ind1, inds, indf, js, z_fs, & + !$OMP z_speccomb, z_specmult, z_specparm, z_tauray) + DO i_lay = 1, laytrop_min + DO iplon = KIDIA, KFDIA + z_speccomb = p_colh2o(iplon,i_lay) + strrat*p_colch4(iplon,i_lay) + z_specparm = p_colh2o(iplon,i_lay)/z_speccomb + z_specparm = MIN(p_oneminus(iplon),z_specparm) + z_specmult = 8._JPRB*(z_specparm) + js = 1 + INT(z_specmult) + z_fs = z_specmult - AINT(z_specmult) + ind0 = ((k_jp(iplon,i_lay)-1)*5+(k_jt(iplon,i_lay)-1))*nspa(18) + js + ind1 = (k_jp(iplon,i_lay)*5+(k_jt1(iplon,i_lay)-1))*nspa(18) + js + inds = k_indself(iplon,i_lay) + indf = k_indfor(iplon,i_lay) + z_tauray = p_colmol(iplon,i_lay) * rayl +!$NEC unroll(NG18) + DO ig = 1, ng18 + p_taug(iplon,i_lay,ig) = z_speccomb * & + & ( & + & (1._JPRB- z_fs) * ( absa(ind0,ig) * p_fac00(iplon,i_lay) + & + & absa(ind0+9,ig) * p_fac10(iplon,i_lay) + & + & absa(ind1,ig) * p_fac01(iplon,i_lay) + & + & absa(ind1+9,ig) * p_fac11(iplon,i_lay) )+ & + & z_fs * ( absa(ind0+1,ig) * p_fac00(iplon,i_lay) + & + & absa(ind0+10,ig) * p_fac10(iplon,i_lay) + & + & absa(ind1+1,ig) * p_fac01(iplon,i_lay) + & + & absa(ind1+10,ig) * p_fac11(iplon,i_lay) ) & + & ) + & + & p_colh2o(iplon,i_lay) * & + & (p_selffac(iplon,i_lay) * (selfrefc(inds,ig) + & + & p_selffrac(iplon,i_lay) * & + & (selfrefc(inds+1,ig) - selfrefc(inds,ig))) + & + & p_forfac(iplon,i_lay) * (forrefc(indf,ig) + & + & p_forfrac(iplon,i_lay) * & + & (forrefc(indf+1,ig) - forrefc(indf,ig)))) + IF (i_lay == i_laysolfr(iplon)) & + & p_sfluxzen(iplon,ig) = sfluxrefc(ig,js) & + & + z_fs * (sfluxrefc(ig,js+1) - sfluxrefc(ig,js)) + p_taur(iplon,i_lay,ig) = z_tauray + ENDDO + ENDDO + ENDDO + !$OMP END TARGET TEAMS DISTRIBUTE PARALLEL DO +#else !$ACC LOOP SEQ DO i_lay = 1, llaytrop_min !$OMP TARGET TEAMS DISTRIBUTE PARALLEL DO PRIVATE(ind0, ind1, inds, indf, js, z_fs, & @@ -148,6 +202,7 @@ SUBROUTINE SRTM_TAUMOL18 & ENDDO !$OMP END TARGET TEAMS DISTRIBUTE PARALLEL DO ENDDO +#endif !$ACC LOOP SEQ DO i_lay = llaytrop_min+1, llaytrop_max diff --git a/ifsrrtm/srtm_taumol19.F90 b/ifsrrtm/srtm_taumol19.F90 index 2ed99d26..0eb724e8 100644 --- a/ifsrrtm/srtm_taumol19.F90 +++ b/ifsrrtm/srtm_taumol19.F90 @@ -96,6 +96,61 @@ SUBROUTINE SRTM_TAUMOL19 & ENDDO !$OMP END TARGET TEAMS DISTRIBUTE PARALLEL DO +#if defined(OMPGPU) + !$OMP TARGET TEAMS DISTRIBUTE PARALLEL DO + DO iplon = KIDIA, KFDIA + DO i_lay = 1, laytrop_min + IF (k_jp(iplon,i_lay) < layreffr & + & .AND. k_jp(iplon,i_lay+1) >= layreffr) & + & i_laysolfr(iplon) = MIN(i_lay+1,k_laytrop(iplon)) + ENDDO + ENDDO + !$OMP END TARGET TEAMS DISTRIBUTE PARALLEL DO + !$OMP TARGET TEAMS DISTRIBUTE PARALLEL DO COLLAPSE(2) PRIVATE(ind0, ind1, inds, indf, js, z_fs, z_speccomb, z_specmult, z_specparm, & + !$OMP z_tauray) + DO i_lay = 1, laytrop_min + DO iplon = KIDIA, KFDIA + z_speccomb = p_colh2o(iplon,i_lay) + strrat*p_colco2(iplon,i_lay) + z_specparm = p_colh2o(iplon,i_lay)/z_speccomb + z_specparm = MIN(p_oneminus(iplon),z_specparm) + z_specmult = 8._JPRB*(z_specparm) + js = 1 + INT(z_specmult) + z_fs = z_specmult - AINT(z_specmult) + ind0 = ((k_jp(iplon,i_lay)-1)*5+(k_jt(iplon,i_lay)-1))*nspa(19) + js + ind1 = (k_jp(iplon,i_lay)*5+(k_jt1(iplon,i_lay)-1))*nspa(19) + js + inds = k_indself(iplon,i_lay) + indf = k_indfor(iplon,i_lay) + z_tauray = p_colmol(iplon,i_lay) * rayl + +!$NEC unroll(NG19) + DO ig = 1 , ng19 + p_taug(iplon,i_lay,ig) = z_speccomb * & + & ( & + & (1._JPRB- z_fs) * ( absa(ind0,ig) * p_fac00(iplon,i_lay) + & + & absa(ind0+9,ig) * p_fac10(iplon,i_lay) + & + & absa(ind1,ig) * p_fac01(iplon,i_lay) + & + & absa(ind1+9,ig) * p_fac11(iplon,i_lay) )+ & + & z_fs * ( absa(ind0+1,ig) * p_fac00(iplon,i_lay) + & + & absa(ind0+10,ig) * p_fac10(iplon,i_lay) + & + & absa(ind1+1,ig) * p_fac01(iplon,i_lay) + & + & absa(ind1+10,ig) * p_fac11(iplon,i_lay) ) & + & ) + & + & p_colh2o(iplon,i_lay) * & + & (p_selffac(iplon,i_lay) * (selfrefc(inds,ig) + & + & p_selffrac(iplon,i_lay) * & + & (selfrefc(inds+1,ig) - selfrefc(inds,ig))) + & + & p_forfac(iplon,i_lay) * (forrefc(indf,ig) + & + & p_forfrac(iplon,i_lay) * & + & (forrefc(indf+1,ig) - forrefc(indf,ig)))) + IF (i_lay == i_laysolfr(iplon)) & + & p_sfluxzen(iplon,ig) = sfluxrefc(ig,js) & + & + z_fs * (sfluxrefc(ig,js+1) - sfluxrefc(ig,js)) + p_taur(iplon,i_lay,ig) = z_tauray + ENDDO + ENDDO + ENDDO + !$OMP END TARGET TEAMS DISTRIBUTE PARALLEL DO +#else !$ACC LOOP SEQ DO i_lay = 1, llaytrop_min !$OMP TARGET TEAMS DISTRIBUTE PARALLEL DO PRIVATE(ind0, ind1, inds, indf, js, z_fs, z_speccomb, z_specmult, z_specparm, & @@ -147,6 +202,7 @@ SUBROUTINE SRTM_TAUMOL19 & ENDDO !$OMP END TARGET TEAMS DISTRIBUTE PARALLEL DO ENDDO +#endif !$ACC LOOP SEQ DO i_lay = llaytrop_min+1, llaytrop_max diff --git a/ifsrrtm/srtm_taumol20.F90 b/ifsrrtm/srtm_taumol20.F90 index 24d5c97e..d8700590 100644 --- a/ifsrrtm/srtm_taumol20.F90 +++ b/ifsrrtm/srtm_taumol20.F90 @@ -94,6 +94,45 @@ SUBROUTINE SRTM_TAUMOL20 & ENDDO !$OMP END TARGET TEAMS DISTRIBUTE PARALLEL DO +#if defined(OMPGPU) + !$OMP TARGET TEAMS DISTRIBUTE PARALLEL DO + DO iplon = KIDIA, KFDIA + DO i_lay = 1, laytrop_min + IF (k_jp(iplon,i_lay) < layreffr & + & .AND. k_jp(iplon,i_lay+1) >= layreffr) & + & i_laysolfr(iplon) = MIN(i_lay+1,k_laytrop(iplon)) + ENDDO + ENDDO + !$OMP END TARGET TEAMS DISTRIBUTE PARALLEL DO + !$OMP TARGET TEAMS DISTRIBUTE PARALLEL DO COLLAPSE(2) PRIVATE(IND0, IND1, INDS, INDF, Z_TAURAY) + DO i_lay = 1, laytrop_min + DO iplon = KIDIA, KFDIA + ind0 = ((k_jp(iplon,i_lay)-1)*5+(k_jt(iplon,i_lay)-1))*nspa(20) + 1 + ind1 = (k_jp(iplon,i_lay)*5+(k_jt1(iplon,i_lay)-1))*nspa(20) + 1 + inds = k_indself(iplon,i_lay) + indf = k_indfor(iplon,i_lay) + z_tauray = p_colmol(iplon,i_lay) * rayl +!$NEC unroll(NG20) + DO ig = 1 , ng20 + p_taug(iplon,i_lay,ig) = p_colh2o(iplon,i_lay) * & + & ((p_fac00(iplon,i_lay) * absa(ind0,ig) + & + & p_fac10(iplon,i_lay) * absa(ind0+1,ig) + & + & p_fac01(iplon,i_lay) * absa(ind1,ig) + & + & p_fac11(iplon,i_lay) * absa(ind1+1,ig)) + & + & p_selffac(iplon,i_lay) * (selfrefc(inds,ig) + & + & p_selffrac(iplon,i_lay) * & + & (selfrefc(inds+1,ig) - selfrefc(inds,ig))) + & + & p_forfac(iplon,i_lay) * (forrefc(indf,ig) + & + & p_forfrac(iplon,i_lay) * & + & (forrefc(indf+1,ig) - forrefc(indf,ig)))) & + & + p_colch4(iplon,i_lay) * absch4c(ig) + p_taur(iplon,i_lay,ig) = z_tauray + IF(i_lay == i_laysolfr(iplon)) p_sfluxzen(iplon,ig)=sfluxrefc(ig) + ENDDO + ENDDO + ENDDO + !$OMP END TARGET TEAMS DISTRIBUTE PARALLEL DO +#else !$ACC LOOP SEQ DO i_lay = 1, llaytrop_min !$OMP TARGET TEAMS DISTRIBUTE PARALLEL DO PRIVATE(IND0, IND1, INDS, INDF, Z_TAURAY) @@ -127,6 +166,7 @@ SUBROUTINE SRTM_TAUMOL20 & ENDDO !$OMP END TARGET TEAMS DISTRIBUTE PARALLEL DO ENDDO +#endif !$ACC LOOP SEQ DO i_lay = llaytrop_min+1, llaytrop_max diff --git a/ifsrrtm/srtm_taumol21.F90 b/ifsrrtm/srtm_taumol21.F90 index 4f03b2d3..0a4314ca 100644 --- a/ifsrrtm/srtm_taumol21.F90 +++ b/ifsrrtm/srtm_taumol21.F90 @@ -96,7 +96,61 @@ SUBROUTINE SRTM_TAUMOL21 & ENDDO !$OMP END TARGET TEAMS DISTRIBUTE PARALLEL DO +#if defined(OMPGPU) + !$OMP TARGET TEAMS DISTRIBUTE PARALLEL DO + DO iplon = KIDIA, KFDIA + DO i_lay = 1, laytrop_min + IF (k_jp(iplon,i_lay) < layreffr & + & .AND. k_jp(iplon,i_lay+1) >= layreffr) & + & i_laysolfr(iplon) = MIN(i_lay+1,k_laytrop(iplon)) + ENDDO + ENDDO + !$OMP END TARGET TEAMS DISTRIBUTE PARALLEL DO + !$OMP TARGET TEAMS DISTRIBUTE PARALLEL DO COLLAPSE(2) PRIVATE(ind0, ind1, inds, indf, js, z_fs, & + !$OMP z_speccomb, z_specmult, z_specparm, z_tauray) + DO i_lay = 1, laytrop_min + DO iplon = KIDIA, KFDIA + z_speccomb = p_colh2o(iplon,i_lay) + strrat*p_colco2(iplon,i_lay) + z_specparm = p_colh2o(iplon,i_lay)/z_speccomb + z_specparm = MIN(p_oneminus(iplon),z_specparm) + z_specmult = 8._JPRB*(z_specparm) + js = 1 + INT(z_specmult) + z_fs = z_specmult - AINT(z_specmult) + ind0 = ((k_jp(iplon,i_lay)-1)*5+(k_jt(iplon,i_lay)-1))*nspa(21) + js + ind1 = (k_jp(iplon,i_lay)*5+(k_jt1(iplon,i_lay)-1))*nspa(21) + js + inds = k_indself(iplon,i_lay) + indf = k_indfor(iplon,i_lay) + z_tauray = p_colmol(iplon,i_lay) * rayl +!$NEC unroll(NG21) + DO ig = 1 , ng21 + p_taug(iplon,i_lay,ig) = z_speccomb * & + & ( & + & (1._JPRB- z_fs) * ( absa(ind0,ig) * p_fac00(iplon,i_lay) + & + & absa(ind0+9,ig) * p_fac10(iplon,i_lay) + & + & absa(ind1,ig) * p_fac01(iplon,i_lay) + & + & absa(ind1+9,ig) * p_fac11(iplon,i_lay) )+ & + & z_fs * ( absa(ind0+1,ig) * p_fac00(iplon,i_lay) + & + & absa(ind0+10,ig) * p_fac10(iplon,i_lay) + & + & absa(ind1+1,ig) * p_fac01(iplon,i_lay) + & + & absa(ind1+10,ig) * p_fac11(iplon,i_lay) ) & + & ) + & + & p_colh2o(iplon,i_lay) * & + & (p_selffac(iplon,i_lay) * (selfrefc(inds,ig) + & + & p_selffrac(iplon,i_lay) * & + & (selfrefc(inds+1,ig) - selfrefc(inds,ig))) + & + & p_forfac(iplon,i_lay) * (forrefc(indf,ig) + & + & p_forfrac(iplon,i_lay) * & + & (forrefc(indf+1,ig) - forrefc(indf,ig)))) + IF (i_lay == i_laysolfr(iplon)) & + & p_sfluxzen(iplon,ig) = sfluxrefc(ig,js) & + & + z_fs * (sfluxrefc(ig,js+1) - sfluxrefc(ig,js)) + p_taur(iplon,i_lay,ig) = z_tauray + ENDDO + ENDDO + ENDDO + !$OMP END TARGET TEAMS DISTRIBUTE PARALLEL DO +#else !$ACC LOOP SEQ DO i_lay = 1, llaytrop_min !$OMP TARGET TEAMS DISTRIBUTE PARALLEL DO PRIVATE(ind0, ind1, inds, indf, js, z_fs, & @@ -148,6 +202,7 @@ SUBROUTINE SRTM_TAUMOL21 & ENDDO !$OMP END TARGET TEAMS DISTRIBUTE PARALLEL DO ENDDO +#endif !$ACC LOOP SEQ DO i_lay = llaytrop_min+1, llaytrop_max diff --git a/ifsrrtm/srtm_taumol22.F90 b/ifsrrtm/srtm_taumol22.F90 index 709d6943..7b9dfd15 100644 --- a/ifsrrtm/srtm_taumol22.F90 +++ b/ifsrrtm/srtm_taumol22.F90 @@ -101,6 +101,63 @@ SUBROUTINE SRTM_TAUMOL22 & ENDDO !$OMP END TARGET TEAMS DISTRIBUTE PARALLEL DO +#if defined(OMPGPU) + !$OMP TARGET TEAMS DISTRIBUTE PARALLEL DO + DO iplon = KIDIA, KFDIA + DO i_lay = 1, laytrop_min + IF (k_jp(iplon,i_lay) < layreffr & + & .AND. k_jp(iplon,i_lay+1) >= layreffr) & + & i_laysolfr(iplon) = MIN(i_lay+1,k_laytrop(iplon)) + ENDDO + ENDDO + !$OMP END TARGET TEAMS DISTRIBUTE PARALLEL DO + !$OMP TARGET TEAMS DISTRIBUTE PARALLEL DO COLLAPSE(2) PRIVATE(ind0, ind1, inds, indf, js, z_fs, & + !$OMP z_speccomb, z_specmult, z_specparm, z_tauray, z_o2cont) + DO i_lay = 1, laytrop_min + DO iplon = KIDIA, KFDIA + z_o2cont = 4.35e-4_JPRB*p_colo2(iplon,i_lay)/(350.0_JPRB*2.0_JPRB) + z_speccomb = p_colh2o(iplon,i_lay) + & + & z_o2adj*strrat*p_colo2(iplon,i_lay) + z_specparm = p_colh2o(iplon,i_lay)/z_speccomb + z_specparm = MIN(p_oneminus(iplon),z_specparm) + z_specmult = 8._JPRB*(z_specparm) + js = 1 + INT(z_specmult) + z_fs = z_specmult - AINT(z_specmult) + ind0 = ((k_jp(iplon,i_lay)-1)*5+(k_jt(iplon,i_lay)-1))*nspa(22) + js + ind1 = (k_jp(iplon,i_lay)*5+(k_jt1(iplon,i_lay)-1))*nspa(22) + js + inds = k_indself(iplon,i_lay) + indf = k_indfor(iplon,i_lay) + z_tauray = p_colmol(iplon,i_lay) * rayl +!$NEC unroll(NG22) + DO ig = 1 , ng22 + p_taug(iplon,i_lay,ig) = z_speccomb * & + & ( & + & (1._JPRB- z_fs) * ( absa(ind0,ig) * p_fac00(iplon,i_lay) + & + & absa(ind0+9,ig) * p_fac10(iplon,i_lay) + & + & absa(ind1,ig) * p_fac01(iplon,i_lay) + & + & absa(ind1+9,ig) * p_fac11(iplon,i_lay) )+ & + & z_fs * ( absa(ind0+1,ig) * p_fac00(iplon,i_lay) + & + & absa(ind0+10,ig) * p_fac10(iplon,i_lay) + & + & absa(ind1+1,ig) * p_fac01(iplon,i_lay) + & + & absa(ind1+10,ig) * p_fac11(iplon,i_lay) ) & + & ) + & + & p_colh2o(iplon,i_lay) * & + & (p_selffac(iplon,i_lay) * (selfrefc(inds,ig) + & + & p_selffrac(iplon,i_lay) * & + & (selfrefc(inds+1,ig) - selfrefc(inds,ig))) + & + & p_forfac(iplon,i_lay) * (forrefc(indf,ig) + & + & p_forfrac(iplon,i_lay) * & + & (forrefc(indf+1,ig) - forrefc(indf,ig)))) & + & + z_o2cont + IF (i_lay == i_laysolfr(iplon)) & + & p_sfluxzen(iplon,ig) = sfluxrefc(ig,js) & + & + z_fs * (sfluxrefc(ig,js+1) - sfluxrefc(ig,js)) + p_taur(iplon,i_lay,ig) = z_tauray + ENDDO + ENDDO + ENDDO + !$OMP END TARGET TEAMS DISTRIBUTE PARALLEL DO +#else !$ACC LOOP SEQ DO i_lay = 1, llaytrop_min !$OMP TARGET TEAMS DISTRIBUTE PARALLEL DO PRIVATE(ind0, ind1, inds, indf, js, z_fs, & @@ -154,6 +211,7 @@ SUBROUTINE SRTM_TAUMOL22 & ENDDO !$OMP END TARGET TEAMS DISTRIBUTE PARALLEL DO ENDDO +#endif !$ACC LOOP SEQ DO i_lay = llaytrop_min+1, llaytrop_max diff --git a/ifsrrtm/srtm_taumol23.F90 b/ifsrrtm/srtm_taumol23.F90 index 2cc627c7..d5a95d6a 100644 --- a/ifsrrtm/srtm_taumol23.F90 +++ b/ifsrrtm/srtm_taumol23.F90 @@ -93,7 +93,46 @@ SUBROUTINE SRTM_TAUMOL23 & ENDDO !$OMP END TARGET TEAMS DISTRIBUTE PARALLEL DO +#if defined(OMPGPU) + !$OMP TARGET TEAMS DISTRIBUTE PARALLEL DO + DO iplon = KIDIA, KFDIA + DO i_lay = 1, laytrop_min + IF (k_jp(iplon,i_lay) < layreffr & + & .AND. k_jp(iplon,i_lay+1) >= layreffr) & + & i_laysolfr(iplon) = MIN(i_lay+1,k_laytrop(iplon)) + ENDDO + ENDDO + !$OMP END TARGET TEAMS DISTRIBUTE PARALLEL DO + !$OMP TARGET TEAMS DISTRIBUTE PARALLEL DO COLLAPSE(2) PRIVATE(ind0, ind1, inds, indf, z_tauray) + DO i_lay = 1, laytrop_min + DO iplon = KIDIA, KFDIA + ind0 = ((k_jp(iplon,i_lay)-1)*5+(k_jt(iplon,i_lay)-1))*nspa(23) + 1 + ind1 = (k_jp(iplon,i_lay)*5+(k_jt1(iplon,i_lay)-1))*nspa(23) + 1 + inds = k_indself(iplon,i_lay) + indf = k_indfor(iplon,i_lay) +!$NEC unroll(NG23) + DO ig = 1 , ng23 + z_tauray = p_colmol(iplon,i_lay) * raylc(ig) + p_taug(iplon,i_lay,ig) = p_colh2o(iplon,i_lay) * & + & (givfac * (p_fac00(iplon,i_lay) * absa(ind0,ig) + & + & p_fac10(iplon,i_lay) * absa(ind0+1,ig) + & + & p_fac01(iplon,i_lay) * absa(ind1,ig) + & + & p_fac11(iplon,i_lay) * absa(ind1+1,ig)) + & + & p_selffac(iplon,i_lay) * (selfrefc(inds,ig) + & + & p_selffrac(iplon,i_lay) * & + & (selfrefc(inds+1,ig) - selfrefc(inds,ig))) + & + & p_forfac(iplon,i_lay) * (forrefc(indf,ig) + & + & p_forfrac(iplon,i_lay) * & + & (forrefc(indf+1,ig) - forrefc(indf,ig)))) + IF (i_lay == i_laysolfr(iplon)) & + p_sfluxzen(iplon,ig) = sfluxrefc(ig) + p_taur(iplon,i_lay,ig) = z_tauray + ENDDO + ENDDO + ENDDO + !$OMP END TARGET TEAMS DISTRIBUTE PARALLEL DO +#else !$ACC LOOP SEQ DO i_lay = 1, llaytrop_min !$OMP TARGET TEAMS DISTRIBUTE PARALLEL DO PRIVATE(ind0, ind1, inds, indf, z_tauray) @@ -129,6 +168,7 @@ SUBROUTINE SRTM_TAUMOL23 & ENDDO !$OMP END TARGET TEAMS DISTRIBUTE PARALLEL DO ENDDO +#endif !$ACC LOOP SEQ DO i_lay = llaytrop_min+1, llaytrop_max diff --git a/ifsrrtm/srtm_taumol24.F90 b/ifsrrtm/srtm_taumol24.F90 index 8b6b3472..a5cbd03d 100644 --- a/ifsrrtm/srtm_taumol24.F90 +++ b/ifsrrtm/srtm_taumol24.F90 @@ -98,6 +98,63 @@ SUBROUTINE SRTM_TAUMOL24 & ENDDO !$OMP END TARGET TEAMS DISTRIBUTE PARALLEL DO +#if defined(OMPGPU) + !$OMP TARGET TEAMS DISTRIBUTE PARALLEL DO + DO iplon = KIDIA, KFDIA + DO i_lay = 1, laytrop_min + IF (k_jp(iplon,i_lay) < layreffr & + & .AND. k_jp(iplon,i_lay+1) >= layreffr) & + & i_laysolfr(iplon) = MIN(i_lay+1,k_laytrop(iplon)) + ENDDO + ENDDO + !$OMP END TARGET TEAMS DISTRIBUTE PARALLEL DO + !$OMP TARGET TEAMS DISTRIBUTE PARALLEL DO COLLAPSE(2) PRIVATE(ind0, ind1, inds, indf, js, z_fs, & + !$OMP z_speccomb, z_specmult, z_specparm, z_tauray) + DO i_lay = 1, laytrop_min + DO iplon = KIDIA, KFDIA + z_speccomb = p_colh2o(iplon,i_lay) + strrat*p_colo2(iplon,i_lay) + z_specparm = p_colh2o(iplon,i_lay)/z_speccomb + z_specparm = MIN(p_oneminus(iplon),z_specparm) + z_specmult = 8._JPRB*(z_specparm) + js = 1 + INT(z_specmult) + z_fs = z_specmult - AINT(z_specmult) + ind0 = ((k_jp(iplon,i_lay)-1)*5+(k_jt(iplon,i_lay)-1))*nspa(24) + js + ind1 = (k_jp(iplon,i_lay)*5+(k_jt1(iplon,i_lay)-1))*nspa(24) + js + inds = k_indself(iplon,i_lay) + indf = k_indfor(iplon,i_lay) + +!$NEC unroll(NG24) + DO ig = 1 , ng24 + z_tauray = p_colmol(iplon,i_lay) * (raylac(ig,js) + & + & z_fs * (raylac(ig,js+1) - raylac(ig,js))) + p_taug(iplon,i_lay,ig) = z_speccomb * & + & ( & + & (1._JPRB- z_fs) * ( absa(ind0,ig) * p_fac00(iplon,i_lay) + & + & absa(ind0+9,ig) * p_fac10(iplon,i_lay) + & + & absa(ind1,ig) * p_fac01(iplon,i_lay) + & + & absa(ind1+9,ig) * p_fac11(iplon,i_lay) )+ & + & z_fs * ( absa(ind0+1,ig) * p_fac00(iplon,i_lay) + & + & absa(ind0+10,ig) * p_fac10(iplon,i_lay) + & + & absa(ind1+1,ig) * p_fac01(iplon,i_lay) + & + & absa(ind1+10,ig) * p_fac11(iplon,i_lay) ) & + & ) + & + & p_colo3(iplon,i_lay) * abso3ac(ig) + & + & p_colh2o(iplon,i_lay) * & + & (p_selffac(iplon,i_lay) * (selfrefc(inds,ig) + & + & p_selffrac(iplon,i_lay) * & + & (selfrefc(inds+1,ig) - selfrefc(inds,ig))) + & + & p_forfac(iplon,i_lay) * (forrefc(indf,ig) + & + & p_forfrac(iplon,i_lay) * & + & (forrefc(indf+1,ig) - forrefc(indf,ig)))) + IF (i_lay == i_laysolfr(iplon)) & + & p_sfluxzen(iplon,ig) = sfluxrefc(ig,js) & + & + z_fs * (sfluxrefc(ig,js+1) - sfluxrefc(ig,js)) + p_taur(iplon,i_lay,ig) = z_tauray + ENDDO + ENDDO + ENDDO + !$OMP END TARGET TEAMS DISTRIBUTE PARALLEL DO +#else !$ACC LOOP SEQ DO i_lay = 1, llaytrop_min !$OMP TARGET TEAMS DISTRIBUTE PARALLEL DO PRIVATE(ind0, ind1, inds, indf, js, z_fs, & @@ -151,6 +208,7 @@ SUBROUTINE SRTM_TAUMOL24 & ENDDO !$OMP END TARGET TEAMS DISTRIBUTE PARALLEL DO ENDDO +#endif !$ACC LOOP SEQ DO i_lay = llaytrop_min+1, llaytrop_max diff --git a/ifsrrtm/srtm_taumol25.F90 b/ifsrrtm/srtm_taumol25.F90 index 054134ad..1ef80705 100644 --- a/ifsrrtm/srtm_taumol25.F90 +++ b/ifsrrtm/srtm_taumol25.F90 @@ -88,6 +88,37 @@ SUBROUTINE SRTM_TAUMOL25 & ENDDO !$OMP END TARGET TEAMS DISTRIBUTE PARALLEL DO +#if defined(OMPGPU) + !$OMP TARGET TEAMS DISTRIBUTE PARALLEL DO + DO iplon = KIDIA, KFDIA + DO i_lay = 1, laytrop_min + IF (k_jp(iplon,i_lay) < layreffr .AND. & + & k_jp(iplon,i_lay+1) >= layreffr) & + & i_laysolfr(iplon) = MIN(i_lay+1,k_laytrop(iplon)) + ENDDO + ENDDO + !$OMP END TARGET TEAMS DISTRIBUTE PARALLEL DO + !$OMP TARGET TEAMS DISTRIBUTE PARALLEL DO COLLAPSE(2) PRIVATE(ind0, ind1, z_tauray) + DO i_lay = 1, laytrop_min + DO iplon = KIDIA, KFDIA + ind0 = ((k_jp(iplon,i_lay)-1)*5+(k_jt(iplon,i_lay)-1))*nspa(25) + 1 + ind1 = (k_jp(iplon,i_lay)*5+(k_jt1(iplon,i_lay)-1))*nspa(25) + 1 +!$NEC unroll(NG25) + DO ig = 1 , ng25 + z_tauray = p_colmol(iplon,i_lay) * raylc(ig) + p_taug(iplon,i_lay,ig) = p_colh2o(iplon,i_lay) * & + & (p_fac00(iplon,i_lay) * absa(ind0,ig) + & + & p_fac10(iplon,i_lay) * absa(ind0+1,ig) + & + & p_fac01(iplon,i_lay) * absa(ind1,ig) + & + & p_fac11(iplon,i_lay) * absa(ind1+1,ig)) + & + & p_colo3(iplon,i_lay) * abso3ac(ig) + IF(i_lay == i_laysolfr(iplon)) p_sfluxzen(iplon,ig)=sfluxrefc(ig) + p_taur(iplon,i_lay,ig) = z_tauray + ENDDO + ENDDO + ENDDO + !$OMP END TARGET TEAMS DISTRIBUTE PARALLEL DO +#else !$ACC LOOP SEQ DO i_lay = 1, llaytrop_min !$OMP TARGET TEAMS DISTRIBUTE PARALLEL DO PRIVATE(ind0, ind1, z_tauray) @@ -114,6 +145,7 @@ SUBROUTINE SRTM_TAUMOL25 & ENDDO !$OMP END TARGET TEAMS DISTRIBUTE PARALLEL DO ENDDO +#endif !$ACC LOOP SEQ DO i_lay = llaytrop_min+1, llaytrop_max