diff --git a/GEOSagcm_GridComp/GEOSphysics_GridComp/GEOSsurface_GridComp/Utils/Raster/makebcs/make_bcs_questionary.py b/GEOSagcm_GridComp/GEOSphysics_GridComp/GEOSsurface_GridComp/Utils/Raster/makebcs/make_bcs_questionary.py index 386d834c4..4e4fb5978 100755 --- a/GEOSagcm_GridComp/GEOSphysics_GridComp/GEOSsurface_GridComp/Utils/Raster/makebcs/make_bcs_questionary.py +++ b/GEOSagcm_GridComp/GEOSphysics_GridComp/GEOSsurface_GridComp/Utils/Raster/makebcs/make_bcs_questionary.py @@ -199,6 +199,7 @@ def ask_questions(default_grid="Cubed-Sphere"): "v11 : NL3 + JPL veg height + PEATMAP + MODIS snow alb v2", \ "v12 : NL3 + JPL veg height + PEATMAP + MODIS snow alb v2 + Argentina peatland fix", \ "v13 : NL3 + JPL veg height + PEATMAP + MODIS snow alb v2 + Argentina peatland fix + mean land elevation fix", \ + "v15 : NL3 + JPL veg height + PEATMAP GPA22 + MODIS snow alb v2 + HWSDv1.2+ mean land elevation fix", \ "ICA : Icarus (archived*: /discover/nobackup/projects/gmao/bcs_shared/legacy_bcs/Icarus/)", \ "GM4 : Ganymed-4_0 (archived*: /discover/nobackup/projects/gmao/bcs_shared/legacy_bcs/Ganymed-4_0/)", \ "F25 : Fortuna-2_5 (archived*: n/a)"], diff --git a/GEOSagcm_GridComp/GEOSphysics_GridComp/GEOSsurface_GridComp/Utils/Raster/makebcs/mod_process_hres_data.F90 b/GEOSagcm_GridComp/GEOSphysics_GridComp/GEOSsurface_GridComp/Utils/Raster/makebcs/mod_process_hres_data.F90 index 6300fdebd..9ea4f1065 100644 --- a/GEOSagcm_GridComp/GEOSphysics_GridComp/GEOSsurface_GridComp/Utils/Raster/makebcs/mod_process_hres_data.F90 +++ b/GEOSagcm_GridComp/GEOSphysics_GridComp/GEOSsurface_GridComp/Utils/Raster/makebcs/mod_process_hres_data.F90 @@ -143,10 +143,10 @@ SUBROUTINE ESA2CLM (nc, nr, n_land, tile_lat, tile_pfs, Rst_id) ! CLM description (17) CatchmentCNCLM description (19) ! -------------------- ------------------------------ - ! 'BARE' 1 bare (does not have bare soil) + ! 'BARE' 1 bare (does not have bare soil) ! 'NLEt' 2 needleleaf evergreen temperate tree 1 ! 'NLEB' 3 needleleaf evergreen boreal tree 2 - ! 'NLDB' 4 needleleaf deciduous boreal tree 3 + ! 'NLDB' 4 needleleaf deciduous boreal tree 3 ! 'BLET' 5 broadleaf evergreen tropical tree 4 ! 'BLEt' 6 broadleaf evergreen temperate tree 5 ! 'BLDT' 7 broadleaf deciduous tropical tree 6 @@ -158,12 +158,12 @@ SUBROUTINE ESA2CLM (nc, nr, n_land, tile_lat, tile_pfs, Rst_id) ! 'BLDBS' 12 broadleaf deciduous boreal shrub 12 ! 'AC3G' 13 arctic c3 grass 13 ! 'CC3G' 14 cool c3 grass 14 cool c3 grass [moisture + deciduous] - ! 'CC3Gm' cool c3 grass 15 cool c3 grass [moisture stress only] + ! 'CC3Gm' cool c3 grass 15 cool c3 grass [moisture stress only] ! 'WC4G' 15 warm c4 grass 16 ! 'WC4Gm' warm c4 grass 17 ! 'CROP' 16 crop 18 crop [moisture + deciduous] ! 'CROPm' crop 19 crop [moisture stress only] - ! 17 water + ! 17 water dx_clm = 360./N_lon_clm dy_clm = 180./N_lat_clm @@ -3394,8 +3394,8 @@ SUBROUTINE soil_para_hwsd (nx,ny, n_land, tile_pfs, tile_id) implicit none integer, intent(in) :: nx, ny, n_land - integer, intent(in) :: tile_pfs(:) - integer, target, intent(in) :: tile_id(:,:) + integer, intent(in) :: tile_pfs(:) ! Pfafstetter index of tile (tile-space) + integer, target, intent(in) :: tile_id(:,:) ! tile ID on raster grid (0=ocean, otherwise land or landice or lake) real, dimension (:), allocatable :: & @@ -3425,7 +3425,6 @@ SUBROUTINE soil_para_hwsd (nx,ny, n_land, tile_pfs, tile_id) integer (kind=2), allocatable, dimension (:) :: & data_vec1, data_vec2,data_vec3, data_vec4,data_vec5, data_vec6 REAL, ALLOCATABLE, dimension (:) :: soildepth, grav_vec,soc_vec,poc_vec - ! ncells_top,ncells_top_pro,ncells_sub_pro ! ncells_* not used integer(kind=2) , allocatable, dimension (:) :: ss_clay, & ss_sand,ss_clay_all,ss_sand_all,ss_oc_all REAL, ALLOCATABLE :: count_soil(:) @@ -3447,6 +3446,11 @@ SUBROUTINE soil_para_hwsd (nx,ny, n_land, tile_pfs, tile_id) REAL, DIMENSION (:), POINTER :: PMAP REAL, ALLOCATABLE, DIMENSION (:,:) :: PMAPR + ! PEATMAP update GPA22 + integer :: oc_cap_nonpeat + integer :: oc_peat_top, oc_peat_sub + integer :: n_valid_top, n_valid_prof + ! --------- VARIABLES FOR *OPENMP* PARALLEL ENVIRONMENT ------------ ! @@ -3651,6 +3655,10 @@ SUBROUTINE soil_para_hwsd (nx,ny, n_land, tile_pfs, tile_id) if (trim(SOILBCS)=='HWSD_b') then tmpversion = 'v3' + else if (trim(SOILBCS)=='HWSDv2') then + ! v4 is /discover/nobackup/projects/gmao/bcs_shared/preprocessing_bcs_inputs/land/soil/soil_properties/v2/out_HWSDv2_NGDC_STATSGO_noMASK/ + ! New soil files with HWSDv2 - D2 layer both top and sub + tmpversion = 'v4' else if (trim(SOILBCS)=='HWSD') then tmpversion = 'v2' else @@ -3719,17 +3727,19 @@ SUBROUTINE soil_para_hwsd (nx,ny, n_land, tile_pfs, tile_id) if(status == 0) then status = NF_GET_att_INT (ncid, NF_GLOBAL,'i_ind_offset_LL',iLL); VERIFY_(STATUS) status = NF_GET_att_INT (ncid, NF_GLOBAL,'j_ind_offset_LL',jLL); VERIFY_(STATUS) + + ! read UNDEF and ScaleFactor (sf) for Clay0-30 only (variable 4); ! assume UNDEF and ScaleFactor (sf) are the same for *all* variables read below ! (ok for SoilProperties_H[xx]V[yy].nc as of 29 Apr 2022). - status = NF_GET_att_INT (ncid, 4,'UNDEF',d_undef); VERIFY_(STATUS) - status = NF_GET_att_REAL (ncid, 4,'ScaleFactor',sf); VERIFY_(STATUS) - status = NF_GET_VARA_INT (ncid, 4,(/1,1/),(/nc_10,nr_10/),net_data1); VERIFY_(STATUS) - status = NF_GET_VARA_INT (ncid, 5,(/1,1/),(/nc_10,nr_10/),net_data2); VERIFY_(STATUS) - status = NF_GET_VARA_INT (ncid, 6,(/1,1/),(/nc_10,nr_10/),net_data3); VERIFY_(STATUS) - status = NF_GET_VARA_INT (ncid, 7,(/1,1/),(/nc_10,nr_10/),net_data4); VERIFY_(STATUS) - status = NF_GET_VARA_INT (ncid, 8,(/1,1/),(/nc_10,nr_10/),net_data5); VERIFY_(STATUS) - status = NF_GET_VARA_INT (ncid, 9,(/1,1/),(/nc_10,nr_10/),net_data6); VERIFY_(STATUS) - status = NF_GET_VARA_INT (ncid,10,(/1,1/),(/nc_10,nr_10/),net_data7); VERIFY_(STATUS) + status = NF_GET_att_INT (ncid, 4,'UNDEF',d_undef); VERIFY_(STATUS) + status = NF_GET_att_REAL (ncid, 4,'ScaleFactor',sf); VERIFY_(STATUS) + status = NF_GET_VARA_INT (ncid, 4,(/1,1/),(/nc_10,nr_10/),net_data1); VERIFY_(STATUS) ! Clay 0-30 + status = NF_GET_VARA_INT (ncid, 5,(/1,1/),(/nc_10,nr_10/),net_data2); VERIFY_(STATUS) ! Sand 0-30 + status = NF_GET_VARA_INT (ncid, 6,(/1,1/),(/nc_10,nr_10/),net_data3); VERIFY_(STATUS) ! OrgC 0-30 + status = NF_GET_VARA_INT (ncid, 7,(/1,1/),(/nc_10,nr_10/),net_data4); VERIFY_(STATUS) ! Clay 30-100 + status = NF_GET_VARA_INT (ncid, 8,(/1,1/),(/nc_10,nr_10/),net_data5); VERIFY_(STATUS) ! Sand 30-100 + status = NF_GET_VARA_INT (ncid, 9,(/1,1/),(/nc_10,nr_10/),net_data6); VERIFY_(STATUS) ! OrgC 30-100 + status = NF_GET_VARA_INT (ncid,10,(/1,1/),(/nc_10,nr_10/),net_data7); VERIFY_(STATUS) ! Gravel do j = jLL,jLL + nr_10 -1 do i = iLL, iLL + nc_10 -1 if(net_data1(i-iLL +1 ,j - jLL +1) /= d_undef) & @@ -3761,28 +3771,69 @@ SUBROUTINE soil_para_hwsd (nx,ny, n_land, tile_pfs, tile_id) deallocate (net_data6) deallocate (net_data7) - ! ---------------------------------------------------------------------------- + ! ------------------------------------------------------------------------------------ + ! + ! PEATMAP preprocessing of HWSD organic-carbon fields on raster grid, used later for + ! soil classification in tile space. + ! + ! In PEATMAP mode, peat support is introduced through the top/subsurface OC raster + ! arrays that are later copied into ss_oc_all: + ! - oc_top -> ss_oc_all(1 :i) + ! - oc_sub -> ss_oc_all(i+1:2*i) + ! where i is the number of raster grid cells wihtin a given tile. + ! + ! Strict GPA22 behavior: + ! - Outside the GPA22 peat mask, oc_top is capped just below the peat + ! threshold cF_lim(4) so that HWSD cannot create peat there. + ! - Inside the GPA22 peat mask, oc_top is forced to OC value for peat. + ! + ! All PEATMAP variants: + ! - Subsurface peat from HWSD is not allowed; oc_sub peat is moved to + ! peat-rich mineral Group 3. + ! + ! Final tile top/profile classes are assigned later from these raster-derived fields. - if(use_PEATMAP) then + if (use_PEATMAP) then print *, 'PEATMAP_THRESHOLD_1 : ', PEATMAP_THRESHOLD_1 - allocate(pmapr (1:i_highd,1:j_highd)) - status = NF_OPEN (trim(MAKE_BCS_INPUT_DIR)//'/land/soil/SOIL-DATA/PEATMAP_mask.nc4', NF_NOWRITE, ncid) - status = NF_GET_VARA_REAL (ncid,NC_VarID(NCID,'PEATMAP'), (/1,1/),(/i_highd, j_highd/), pmapr) ; VERIFY_(STATUS) - - ! move HWSD sub-surface peat to peat-rich mineral Group 3 because merged surface peat defines sub-surface peat - + allocate(pmapr(1:i_highd,1:j_highd)) + + if (PEATMAP_STRICT_GPA22) then + ! Conservative GPA22 peat mask: + ! GPA22 value 1 = peat; GPA22 values 0 and 2 = non-peat. + status = NF_OPEN(trim(MAKE_BCS_INPUT_DIR)// & + '/land/soil/SOIL-DATA/v2/PEATMAP_from_GPA22_like_old_conservative.nc4', & + NF_NOWRITE, ncid) ; VERIFY_(STATUS) + else + ! Legacy PEATMAP mask used by the original hybrid path. + status = NF_OPEN(trim(MAKE_BCS_INPUT_DIR)// & + '/land/soil/SOIL-DATA/PEATMAP_mask.nc4', & + NF_NOWRITE, ncid) ; VERIFY_(STATUS) + endif + + status = NF_GET_VARA_REAL(ncid, NC_VarID(ncid,'PEATMAP'), (/1,1/), (/i_highd,j_highd/), pmapr) ; VERIFY_(STATUS) + + ! Strict GPA22 + ! prevent HWSD oc_top from generating peat outside the GPA22 peat mask. + if (PEATMAP_STRICT_GPA22) then + where (pmapr < PEATMAP_THRESHOLD_1) + oc_top = min( oc_top, FLOOR(cF_lim(4)/sf) ) ! force oc_top<=872 (cF_lim(4)/sf=872.0930) + endwhere + endif + + ! In all PEATMAP variants, subsurface orgC must *not* fall into peat class. + ! Reassign HWSD subsurface peat to peat-rich mineral Group 3. where (oc_sub*sf >= cF_lim(4)) oc_sub = NINT(8./sf) endwhere - - ! Hybridize: add OC 1km PEATMAP pixels to HWSD oc_top - + + ! In all PEATMAP variants, set top-layer OrgC to value associated with peat. + ! In strict GPA22 mode, oc_top has already been capped outside the mask. where (pmapr >= PEATMAP_THRESHOLD_1) oc_top = NINT(33.0/sf) endwhere - - deallocate (pmapr) - status = NF_CLOSE(ncid) + + deallocate(pmapr) + status = NF_CLOSE(ncid) ; VERIFY_(STATUS) endif ! ---------------------------------------------------------------------------- @@ -3862,7 +3913,7 @@ SUBROUTINE soil_para_hwsd (nx,ny, n_land, tile_pfs, tile_id) ! ---------------------------------------------------------------------------- - ! compute peat fraction on tile for CLM45+ (for fires?) + ! from raster oc_top, compute peat area fraction of land tiles for CLM45+ (for fires?) allocate(pmap (1:n_land)) !allocate(count_soil(1:n_land)) ! already allocated above @@ -3909,9 +3960,6 @@ SUBROUTINE soil_para_hwsd (nx,ny, n_land, tile_pfs, tile_id) allocate (grav_vec (1:n_land)) allocate (soc_vec (1:n_land)) allocate (poc_vec (1:n_land)) - !allocate (ncells_top (1:n_land)) ! ncells_* not used - !allocate (ncells_top_pro (1:n_land)) ! ncells_* not used - !allocate (ncells_sub_pro (1:n_land)) ! ncells_* not used !allocate(count_soil(1:n_land)) count_soil = 0. @@ -3919,10 +3967,6 @@ SUBROUTINE soil_para_hwsd (nx,ny, n_land, tile_pfs, tile_id) soc_vec = 0. ! soil orgC (top layer 0-30) poc_vec = 0. ! soil orgC (profile layer 0-100) - !ncells_top = 0. ! ncells_* not used - !ncells_top_pro = 0. ! ncells_* not used - !ncells_sub_pro = 0. ! ncells_* not used - n =1 do j=1,ny_adj do i=1,nx_adj @@ -4005,20 +4049,20 @@ SUBROUTINE soil_para_hwsd (nx,ny, n_land, tile_pfs, tile_id) ! ! Read Woesten soil parameters and CLSM tau parameters for soil classes (1:253) - allocate(a_sand (1:n_SoilClasses)) - allocate(a_clay (1:n_SoilClasses)) - allocate(a_silt (1:n_SoilClasses)) - allocate(a_oc (1:n_SoilClasses)) - allocate(a_bee (1:n_SoilClasses)) - allocate(a_psis (1:n_SoilClasses)) - allocate(a_poros (1:n_SoilClasses)) - allocate(a_wp (1:n_SoilClasses)) - allocate(a_aksat (1:n_SoilClasses)) - allocate(atau (1:n_SoilClasses)) - allocate(btau (1:n_SoilClasses)) - allocate(atau_2cm(1:n_SoilClasses)) - allocate(btau_2cm(1:n_SoilClasses)) - allocate(a_wpsurf(1:n_SoilClasses)) + allocate(a_sand ( 1:n_SoilClasses)) + allocate(a_clay ( 1:n_SoilClasses)) + allocate(a_silt ( 1:n_SoilClasses)) + allocate(a_oc ( 1:n_SoilClasses)) + allocate(a_bee ( 1:n_SoilClasses)) + allocate(a_psis ( 1:n_SoilClasses)) + allocate(a_poros ( 1:n_SoilClasses)) + allocate(a_wp ( 1:n_SoilClasses)) + allocate(a_aksat ( 1:n_SoilClasses)) + allocate(atau ( 1:n_SoilClasses)) + allocate(btau ( 1:n_SoilClasses)) + allocate(atau_2cm( 1:n_SoilClasses)) + allocate(btau_2cm( 1:n_SoilClasses)) + allocate(a_wpsurf( 1:n_SoilClasses)) allocate(a_porosurf(1:n_SoilClasses)) ! SoilClasses-SoilHyd-TauParam.dat and SoilClasses-SoilHyd-TauParam.peatmap differ @@ -4148,19 +4192,18 @@ SUBROUTINE soil_para_hwsd (nx,ny, n_land, tile_pfs, tile_id) !$OMP sf,data_vec1,data_vec2,data_vec3, & !$OMP data_vec4,data_vec5,data_vec6,cF_lim, & !$OMP table_map,soil_class_top,soil_class_com, & - !$OMP soc_vec,poc_vec,use_PEATMAP) & - !ncells_* not used !$OMP soc_vec,poc_vec,ncells_top,ncells_top_pro,& - !ncells_* not used !$OMP ncells_sub_pro,use_PEATMAP) & + !$OMP soc_vec,poc_vec,use_PEATMAP, & + !$OMP PEATMAP_STRICT_GPA22) & !$OMP PRIVATE(n,i,j,k,icount,t_count,i1,i2,ss_clay, & !$OMP ss_sand,ss_clay_all,ss_sand_all, & !$OMP ss_oc_all,cFamily,factor,o_cl,o_clp,ktop, & - !$OMP min_percs, fac_count, write_debug) + !$OMP min_percs, fac_count, write_debug, & + !$OMP n_valid_top, n_valid_prof) ! loop through tiles (split into two loops for OpenMP) DO t_count = 1,n_threads DO n = low_ind(t_count),upp_ind(t_count) - write_debug = .false. ! if (n==171010) write_debug = .true. @@ -4192,12 +4235,11 @@ SUBROUTINE soil_para_hwsd (nx,ny, n_land, tile_pfs, tile_id) loop: do while (tileid_vec(icount)== n) if(icount <= size(tileid_vec,1)) icount = icount + 1 - if(icount > size(tileid_vec,1)) exit loop + if(icount > size(tileid_vec,1)) exit loop end do loop i2 = icount -1 - i = i2 - i1 + 1 ! number of land raster grid cells that make up tile n (?) - + i = i2 - i1 + 1 ! number of land raster grid cells that make up tile n (?) ! ------------------------------------------------------------------- ! @@ -4225,104 +4267,156 @@ SUBROUTINE soil_para_hwsd (nx,ny, n_land, tile_pfs, tile_id) ss_sand_all (1+i:2*i) = data_vec5(i1:i2) ss_oc_all (1+i:2*i) = data_vec6(i1:i2) ! <-- oc_sub - ! ----------------------------------------------------------------------- ! - ! determine aggregate/dominant orgC *top* layer soil class ("o_cl") of tile n - - cFamily = 0. - !! factor = 1. - - do j=1,i - if(j <= i) factor = 1. - if((ss_oc_all(j)*sf >= cF_lim(1)).and. (ss_oc_all(j)*sf < cF_lim(2))) cFamily(1) = cFamily(1) + factor - if((ss_oc_all(j)*sf >= cF_lim(2)).and. (ss_oc_all(j)*sf < cF_lim(3))) cFamily(2) = cFamily(2) + factor - if((ss_oc_all(j)*sf >= cF_lim(3)).and. (ss_oc_all(j)*sf < cF_lim(4))) cFamily(3) = cFamily(3) + factor - if((ss_oc_all(j)*sf >= cF_lim(4) )) cFamily(4) = cFamily(4) + factor + ! Determine aggregate/dominant orgC *top* class (o_cl) for tile n. + ! + ! cFamily(1:4) counts raster pixels whose top-layer orgC falls into + ! orgC classes 1:4. Pixels with invalid orgC do not contribute. + ! + ! n_valid_top counts raster pixels with valid top-layer orgC and is + ! used only to detect tiles with no valid top-layer support. + ! + cFamily = 0. + n_valid_top = 0 + + do j=1,i ! loop through top orgC of raster grid cells that make up tile n + + if (ss_oc_all(j)*sf >= cF_lim(1)) n_valid_top = n_valid_top + 1 ! count all raster pixels with valid top orgC + + if((ss_oc_all(j)*sf >= cF_lim(1)).and.(ss_oc_all(j)*sf < cF_lim(2))) cFamily(1) = cFamily(1) + 1. + if((ss_oc_all(j)*sf >= cF_lim(2)).and.(ss_oc_all(j)*sf < cF_lim(3))) cFamily(2) = cFamily(2) + 1. + if((ss_oc_all(j)*sf >= cF_lim(3)).and.(ss_oc_all(j)*sf < cF_lim(4))) cFamily(3) = cFamily(3) + 1. + if((ss_oc_all(j)*sf >= cF_lim(4)) ) cFamily(4) = cFamily(4) + 1. + end do - if (sum(cFamily) == 0.) o_cl = 1 ! default is o_cl=1 (if somehow no grid cell has top-layer orgC >=0.) - - !! if (.not. use_PEATMAP) then - - ! assign dominant *top* layer org soil class (even if only a minority of the contributing - ! raster grid cells is peat) - - if (sum(cFamily) > 0.) o_cl = maxloc(cFamily, dim = 1) - - !! else - - if (use_PEATMAP) then - - ! PEATMAP: tile has *top* layer peat class only if more than 50% of the contributing - ! raster grid cells are peat (may loose some peat tiles w.r.t. non-PEATMAP bcs version) + if (n_valid_top == 0) then + ! + ! No valid top-layer orgC values: fall back to mineral orgC class 1. + ! + o_cl = 1 - if (cFamily(4)/real(i) > PEATMAP_THRESHOLD_2) then - o_cl = 4 + else if (PEATMAP_STRICT_GPA22) then + ! + ! Strict GPA22 behavior: + ! classify peat only when peat support exceeds the threshold + ! relative to the full tile pixel count. Otherwise choose the + ! dominant mineral orgC class (1–3). + ! + if (cFamily(4)/real(i) > PEATMAP_THRESHOLD_2) then + o_cl = 4 else - if (sum(cFamily(1:3)) > 0.) o_cl = maxloc(cFamily(1:3), dim = 1) ! o_cl = 1, 2, or 3 + if (sum(cFamily(1:3)) > 0.) then + o_cl = maxloc(cFamily(1:3), dim = 1) + else + o_cl = 1 + endif endif - endif + else + ! + ! Legacy behavior preserved for backward compatibility. + ! + o_cl = maxloc(cFamily, dim = 1) + if (use_PEATMAP) then + if (cFamily(4)/real(i) > PEATMAP_THRESHOLD_2) then + o_cl = 4 + else + if (sum(cFamily(1:3)) > 0.) o_cl = maxloc(cFamily(1:3), dim = 1) ! o_cl = 1, 2, or 3 + endif + endif - ! determine aggregate/dominant orgC *profile* (0-100) soil class ("o_clp") of tile n, - ! weight factor=1. for top (0-30) layer and weight factor=2.33 for sub (30-100) layer + endif + ! ----------------------------------------------------------------------- + ! + ! Determine aggregate/dominant orgC *profile* class (o_clp) for tile n. + ! The top layer (0–30 cm) has weight 1.0 and the sub layer (30–100 cm) + ! has weight 2.33. + ! + ! cFamily(1:4) stores weighted profile support for orgC classes 1:4. + ! Layers with invalid orgC do not contribute. + ! + ! n_valid_prof counts layers with valid orgC and is used only to detect + ! tiles with no valid profile support. cFamily = 0. + n_valid_prof = 0 - do j=1,2*i + do j=1,2*i ! loop through top *and* sub orgC of raster grid cells that make up tile n + if(j <= i) factor = 1. - if(j > i) factor = 2.33 - if((ss_oc_all(j)*sf >= cF_lim(1)).and. (ss_oc_all(j)*sf < cF_lim(2))) cFamily(1) = cFamily(1) + factor - if((ss_oc_all(j)*sf >= cF_lim(2)).and. (ss_oc_all(j)*sf < cF_lim(3))) cFamily(2) = cFamily(2) + factor - if((ss_oc_all(j)*sf >= cF_lim(3)).and. (ss_oc_all(j)*sf < cF_lim(4))) cFamily(3) = cFamily(3) + factor - if((ss_oc_all(j)*sf >= cF_lim(4) )) cFamily(4) = cFamily(4) + factor + if(j > i) factor = 2.33 + + if (ss_oc_all(j)*sf >= cF_lim(1)) n_valid_prof = n_valid_prof + 1 ! count all raster pixels with valid top orgC + + if((ss_oc_all(j)*sf >= cF_lim(1)).and.(ss_oc_all(j)*sf < cF_lim(2))) cFamily(1) = cFamily(1) + factor + if((ss_oc_all(j)*sf >= cF_lim(2)).and.(ss_oc_all(j)*sf < cF_lim(3))) cFamily(2) = cFamily(2) + factor + if((ss_oc_all(j)*sf >= cF_lim(3)).and.(ss_oc_all(j)*sf < cF_lim(4))) cFamily(3) = cFamily(3) + factor + if((ss_oc_all(j)*sf >= cF_lim(4)) ) cFamily(4) = cFamily(4) + factor + end do - ! NOTE: For PEATMAP, oc_sub was cut back to 8./sf above: - ! "! move HWSD sub-surface peat to peat-rich mineral Group 3 because merged surface peat defines sub-surface peat" - ! "where (oc_sub*sf >= cF_lim(4)) " - ! " oc_sub = NINT(8./sf) " - ! "endwhere " - ! For PEATMAP, the sub-layer weight of 2.33 should only count towards cFamily(1:3), and in most cases the - ! maxloc statement below should therefore result in o_clp = 1, 2, or 3 only. However, if the top-layer orgC - ! is peat for most contributing raster grid cells and the sub-layer orgC values are relatively evenly spread - ! over orgC classes 1, 2, and 3, then maxloc(cFamily) can result in o_clp=4. + if (n_valid_prof == 0) then + ! + ! No valid profile orgC values: fall back to mineral orgC class 1. + ! + o_clp = 1 + else if (PEATMAP_STRICT_GPA22) then + ! + ! Strict GPA22 behavior: + ! if top-layer peat support makes the tile peat, then profile follows + ! consistently as peat. Otherwise use weighted profile support only to + ! distinguish mineral orgC classes 1–3. + ! + if (o_cl == 4) then + o_clp = 4 + else + if (sum(cFamily(1:3)) > 0.) then + o_clp = maxloc(cFamily(1:3), dim = 1) + else + o_clp = 1 + endif + endif + else + ! + ! Legacy behavior preserved for backward compatibility. + ! + o_clp = maxloc(cFamily, dim = 1) - if (sum(cFamily) == 0.) o_clp = 1 - if (sum(cFamily) > 0.) o_clp = maxloc(cFamily, dim = 1) + endif ! ---------------------------------------------------------------------------------------- ! - ! Determine *top* layer mineral/organic soil class of tile n + ! Determine final *top* layer soil *class* of tile n from the dominant + ! top-layer orgC class (o_cl) and representative top-layer clay/sand. if(o_cl == 4) then - ! Top-layer soil class of tile n is peat. + ! Top-layer soil class of tile n is *peat*. + ! Compute average top-layer orgC (only across raster grid cells whose top layer is peat). - soil_class_top(n) = n_SoilClasses + soil_class_top(n) = n_SoilClasses ! assign peat class ktop = 0 do j=1,i ! avg only across contributing raster grid cells that are peat if(ss_oc_all(j)*sf >= cF_lim(4)) then - soc_vec (n) = soc_vec(n) + ss_oc_all(j)*sf + soc_vec (n) = soc_vec(n) + ss_oc_all(j)*sf ! "soc" = "surface orgC" ktop = ktop + 1 endif end do if(ktop.ne.0) soc_vec (n) = soc_vec(n)/ktop - !ncells_top(n) = 100.*float(ktop)/float(i) ! ncells_* not used else - ! Top-layer soil class of tile n is mineral. - ! Compute average top-layer orgC (only across raster grid cells within same orgC class) - ! and collect all clay/sand pairs of raster grid cells within same orgC class. + ! Top-layer soil class of tile n is *mineral*. + + ! Compute average top-layer orgC across raster grid cells in the assigned + ! orgC class and collect clay/sand pairs from those cells where texture data are valid. - !k = 1 !cleanup k counter - !ktop = 1 !cleanup k counter - ktop = 0 !cleanup k counter + ktop = 0 ! counter of raster grid cells with assigned orgC class do j=1,i ! loop only through top-layer elements of ss_*_all @@ -4331,11 +4425,12 @@ SUBROUTINE soil_para_hwsd (nx,ny, n_land, tile_pfs, tile_id) if((ss_clay_all(j)*sf >= 0.).and.(ss_sand_all(j)*sf >= 0.)) then ! avoiding no-data-values - ktop = ktop + 1 !cleanup k counter + ktop = ktop + 1 ss_clay (ktop) = ss_clay_all(j) ss_sand (ktop) = ss_sand_all(j) - ! adjust clay and sand content if outside joint physical bounds + ! adjust clay and sand content if outside joint physical bounds: + ! must have sand+clay<=100; if violated, reset smaller of the two if((ss_clay (ktop) + ss_sand (ktop)) > 9999) then ! note: 9999 = 99.99% (scale factor = 0.01) if(ss_clay (ktop) >= ss_sand (ktop)) then ss_sand (ktop) = 10000 - ss_clay (ktop) @@ -4344,19 +4439,12 @@ SUBROUTINE soil_para_hwsd (nx,ny, n_land, tile_pfs, tile_id) endif endif soc_vec (n) = soc_vec(n) + ss_oc_all(j)*sf ! sum up top-layer orgC - !k = k + 1 !cleanup k counter - !ktop = ktop + 1 !cleanup k counter endif endif end do - !k = k - 1 !cleanup k counter - !ktop = ktop -1 !cleanup k counter - if(ktop.ne.0) soc_vec (n) = soc_vec(n)/ktop ! normalize top-layer orgC - !ncells_top(n) = 100.*float(ktop)/float(i) ! ncells_* not used - ! debugging output if (write_debug) write(80+n,*)ktop,o_cl if(ktop > 0 .and. write_debug) then @@ -4368,68 +4456,85 @@ SUBROUTINE soil_para_hwsd (nx,ny, n_land, tile_pfs, tile_id) ! to the average (top-layer) clay/sand across all raster grid cells within the ! dominant orgC class. - j = center_pix_int0(sf, ktop,ktop, ss_clay(1:ktop),ss_sand(1:ktop)) + j = center_pix_int0(sf, ktop, ktop, ss_clay(1:ktop),ss_sand(1:ktop)) ! Assign soil class of raster grid cell j to tile n - + if(j >=1) then min_percs%clay_perc = ss_clay(j)*sf min_percs%sand_perc = ss_sand(j)*sf min_percs%silt_perc = 100. - ss_clay(j)*sf - ss_sand(j)*sf soil_class_top (n) = table_map(soil_class (min_percs),o_cl) endif - + ! debugging output if (write_debug) write(80+n,*)j endif ! o_cl==4 + + ! Strict GPA22 fallback: + ! if no valid top-layer soil class could be assigned (for example, no valid + ! representative clay/sand pair was found), fall back to mineral class 1 + ! rather than leaving the class undefined. + if (PEATMAP_STRICT_GPA22) then + if (soil_class_top(n) < 1) then + soil_class_top(n) = 1 + soc_vec(n) = cF_lim(1) + endif + endif ! debugging output if (write_debug) write(80+n,*)soil_class_top (n) ! ------------------------------------------------------------------------------- - ! - ! determine aggregate sand/clay/orgC for *profile* layer of tile n + ! + ! Determine final *profile* soil *class* of tile n from the dominant + ! profile orgC class (o_clp) and representative profile clay/sand. + ! Profile orgC support is weighted by 1.0 for the top layer (0–30 cm) + ! and 2.33 for the sub layer (30–100 cm). if(o_clp == 4) then - ! Profile-layer soil class of tile n is peat. - ! Compute average profile-layer orgC (only across raster grid cells and layers that are peat) + ! *Profile* soil class of tile n is *peat*. + + ! Compute weighted average profile orgC (only across raster grid cells/layers whose orgC class is peat). + + soil_class_com(n) = n_SoilClasses ! assign peat class - soil_class_com(n) = n_SoilClasses fac_count = 0. - k =0 - ktop =0 + k = 0 + ktop = 0 + do j=1,2*i if(ss_oc_all(j)*sf >= cF_lim(4)) then if(j <= i) factor = 1. ! top layer contribution 1 <= j <=i if(j > i) factor = 2.33 ! sub layer contribution i+1 <= j <=2*i if(j > i) k = k + 1 ! sub layer counter if(j <= i) ktop = ktop + 1 ! top layer counter - poc_vec (n) = poc_vec(n) + ss_oc_all(j)*sf*factor ! weighted sum of orgC + poc_vec (n) = poc_vec(n) + ss_oc_all(j)*sf*factor ! weighted sum of orgC ("poc" = "profile orgC") fac_count = fac_count + factor ! sum of weights endif end do if(fac_count.ne.0) poc_vec (n) = poc_vec (n)/fac_count ! normalize - !ncells_sub_pro(n) = 100.*float(k)/float(i) ! ncells_* not used - !ncells_top_pro(n) = 100.*float(ktop)/float(i) ! ncells_* not used + else + + ! *Profile* soil class of tile n is *mineral*. - ! Profile-layer soil class of tile n is mineral. - ! Compute average profile-layer orgC (only across raster grid cells within same orgC class) - ! and collect all clay/sand pairs of raster grid cells within same orgC class. + ! Compute weighted average profile orgC across raster grid cells/layers + ! in the assigned orgC class and collect clay/sand pairs where texture + ! data are valid. - !k = 1 !cleanup k counter - !ktop = 1 !cleanup k counter - k = 0 !cleanup k counter - ktop = 0 !cleanup k counter + k = 0 + ktop = 0 - ss_clay=0 - ss_sand=0 + ss_clay = 0 + ss_sand = 0 fac_count = 0. do j=1,2*i ! loop through both top (1<=j<=i) layer and sub (i+1<=j<=2*i) layer elements - ! avg only across contributing raster grid cells and layers with orgC class as that assigned to tile n + ! Average only across contributing raster grid cells/layers whose + ! orgC class matches the assigned profile orgC class. if((ss_oc_all(j)*sf >= cF_lim(o_clp)).and.(ss_oc_all(j)*sf < cF_lim(o_clp + 1))) then if((ss_clay_all(j)*sf >= 0.).and.(ss_sand_all(j)*sf >= 0.)) then ! avoiding no-data-values @@ -4440,15 +4545,9 @@ SUBROUTINE soil_para_hwsd (nx,ny, n_land, tile_pfs, tile_id) poc_vec (n) = poc_vec(n) + ss_oc_all(j)*sf*factor ! weighted sum of orgC fac_count = fac_count + factor - k = k + 1 ! counter for top and sub contributions !cleanup k counter - - if (j<=i) ktop = ktop + 1 ! counter for top contributions only !cleanup k counter - + k = k + 1 ! counter for top and sub contributions - !obsolete20220502 The code within the if-then and if-else statements below was nearly identical, - !obsolete20220502 except for the omission of the ktop counter from the else block. - !obsolete20220502 - !obsolete20220502 if(j <= i) then + if (j<=i) ktop = ktop + 1 ! counter for top contributions only ss_clay (k) = ss_clay_all(j) ss_sand (k) = ss_sand_all(j) @@ -4461,46 +4560,25 @@ SUBROUTINE soil_para_hwsd (nx,ny, n_land, tile_pfs, tile_id) ss_clay (k) = 10000 - ss_sand (k) endif endif - !k = k + 1 !cleanup k counter - !ktop = ktop + 1 !cleanup k counter - - !obsolete20220502 else - !obsolete20220502 ss_clay (k) = ss_clay_all(j) - !obsolete20220502 ss_sand (k) = ss_sand_all(j) - !obsolete20220502 if((ss_clay (k) + ss_sand (k)) > 9999) then - !obsolete20220502 if(ss_clay (k) >= ss_sand (k)) then - !obsolete20220502 ss_sand (k) = 10000 - ss_clay (k) - !obsolete20220502 else - !obsolete20220502 ss_clay (k) = 10000 - ss_sand (k) - !obsolete20220502 endif - !obsolete20220502 endif - !obsolete20220502 !k = k + 1 !cleanup k counter - !obsolete20220502 endif endif endif end do - !k = k - 1 !cleanup k counter - !ktop = ktop -1 !cleanup k counter - if(fac_count.ne.0) poc_vec (n) = poc_vec(n)/fac_count ! normalize profile-layer orgC - !ncells_top_pro(n) = 100.*float(ktop)/float(i) ! ncells_* not used - !ncells_sub_pro(n) = 100.*float(k-ktop)/float(i) ! ncells_* not used ! debugging output - if (write_debug) write (80+n,*)ktop,k,o_cl + if (write_debug) write (80+n,*)ktop,k,o_clp if (write_debug) write (80+n,*)ss_clay(1:k) if (write_debug) write (80+n,*)ss_sand(1:k) - ! Determine the raster grid cell and layer j that has clay/sand content closest - ! to the average (profile) clay/sand across all raster grid cells within the + ! Determine the raster grid cell/layer j whose clay/sand is closest + ! to the mean profile clay/sand across all raster grid cells within the ! dominant orgC class. j = center_pix_int0 (sf, ktop,k, ss_clay(1:k),ss_sand(1:k)) - ! Assign soil class of raster grid cell and layer j to tile n - + ! Assign the soil class of representative raster grid cell/layer j to tile n. if(j >=1) then min_percs%clay_perc = ss_clay(j)*sf min_percs%sand_perc = ss_sand(j)*sf @@ -4514,6 +4592,15 @@ SUBROUTINE soil_para_hwsd (nx,ny, n_land, tile_pfs, tile_id) if (write_debug) close(80+n) endif ! o_clp==4 + ! Strict GPA22 fallback: + ! if no valid profile soil class could be assigned, fall back to the + ! already assigned top-layer soil class rather than leaving the profile class undefined. + if (PEATMAP_STRICT_GPA22) then + if (soil_class_com(n) < 1) then + soil_class_com(n) = soil_class_top(n) + poc_vec(n) = soc_vec(n) + endif + endif deallocate (ss_clay,ss_sand,ss_clay_all,ss_sand_all,ss_oc_all) END DO @@ -4544,71 +4631,95 @@ SUBROUTINE soil_para_hwsd (nx,ny, n_land, tile_pfs, tile_id) fname ='clsm/tau_param.dat' open (12,file=trim(fname),form='formatted',status='unknown',action = 'write') - - !obsolete20220502 fname ='clsm/mosaic_veg_typs_fracs' - !obsolete20220502 open (13,file=trim(fname),form='formatted',status='old',action = 'read') - do n = 1, n_land - - !obsolete20220502 read (13,*) tindex,pfafindex,vtype - - ! fill gaps from neighbor for rare missing values caused by inconsistent masks + ! Handle rare remaining missing soil classes after tile classification. + ! + ! Legacy behavior is preserved by default. + ! In strict GPA22 mode, when profile class information is missing, + ! use a consistent neighbor fill so top/profile classes and dependent + ! tile properties are copied together. if ((soil_class_top (n) == -9999).or.(soil_class_com (n) == -9999)) then - - ! if com-layer has data, the issue is only with top-layer - - if(soil_class_com (n) >= 1) soil_class_top (n) = soil_class_com (n) - - ! if there is nothing, look for the neighbor - ! - ! ^ - ! | - ! | The comment above seems wrong; could have soil_class_top(n)>=1, unless - ! earlier soil_class_com was set equal to soil_class_top whenever - ! soil_class_top was available and soil_class_com was not. - + ! Legacy fallback for the case where only the top-layer class is missing + ! but the profile class is already defined. + if ((soil_class_top(n) == -9999) .and. (soil_class_com(n) >= 1)) then + if (.not. PEATMAP_STRICT_GPA22) then + soil_class_top(n) = soil_class_com(n) + endif + endif + + ! If the profile class is missing, search "neighboring" tiles for a fill value, + ! where "neighbors" are defined in terms of proximity of tile index (not necessarily geographical proximity) if (soil_class_com (n) == -9999) then - - ! Look for neighbor j (regardless of soil_class_top) and set both - ! soil_class_com(n) and soil_class_top(n) equal to the neighbor's - ! soil_class_com(j). - + do k = 1, n_land j = 0 i1 = n - k i2 = n + k - if(i1 >= 1) then - if (soil_class_com (i1) >=1) j = i1 ! tentatively use "lower" neighbor unless out of range - endif + + if (PEATMAP_STRICT_GPA22) then + ! Strict GPA22: + ! borrow only from a neighbor with both top and profile classes + ! defined, and copy the associated tile properties consistently. + if (i1 >= 1) then + if ((soil_class_top(i1) >= 1) .and. (soil_class_com(i1) >= 1)) j = i1 ! start with "lower" neighbor unless out of range + endif - if(1 <= i2 .and. i2 <=n_land) then - if (soil_class_com (i2) >=1) j = i2 ! "upper" neighbor prevails unless out of range - endif + if (i2 <= n_land) then + if ((soil_class_top(i2) >= 1) .and. (soil_class_com(i2) >= 1)) j = i2 ! "upper" neighbor prevails unless out of range + endif + + if (j > 0) then + soil_class_com(n) = soil_class_com(j) + soil_class_top(n) = soil_class_top(j) + grav_vec(n) = grav_vec(j) + soc_vec(n) = soc_vec(j) + poc_vec(n) = poc_vec(j) + pmap(n) = pmap(j) + soildepth(n) = soildepth(j) + endif + + else + ! Legacy behavior preserved for backward compatibility. + ! Note: this copies the top class from the neighbor's profile class. + if (i1 >= 1) then + if (soil_class_com(i1) >= 1) j = i1 + endif + + if (1 <= i2 .and. i2 <= n_land) then + if (soil_class_com(i2) >= 1) j = i2 + endif + + if (j > 0) then + soil_class_com(n) = soil_class_com(j) + soil_class_top(n) = soil_class_com(j) + grav_vec(n) = grav_vec(j) + soc_vec(n) = soc_vec(j) + poc_vec(n) = poc_vec(j) + endif - if (j > 0) then - soil_class_com (n) = soil_class_com (j) - !soil_class_top (n) = soil_class_com (n) - soil_class_top (n) = soil_class_com (j) ! should be faster/safer than usin gsoil_class_com(n) - grav_vec(n) = grav_vec(j) - soc_vec(n) = soc_vec (j) - poc_vec(n) = poc_vec (j) endif - if (soil_class_com (n) >=1) exit + if (soil_class_com(n) >= 1) exit end do + endif endif fac_surf = soil_class_top(n) fac = soil_class_com(n) - - if(use_PEATMAP) then - ! the maximum peat soil depth is set to the value Michel used to derive parameters (5000.) - if (fac_surf == 253) soildepth(n) = 5000. ! max(soildepth(n),5000.) - ! reset subsurface to peat if surface soil type is peat - if (fac_surf == 253) fac = 253 + + if (use_PEATMAP) then + ! The maximum peat soil depth is set to the value Michel used to derive + ! parameters (5000.). + if (fac_surf == 253) soildepth(n) = 5000. + + ! Legacy PEATMAP behavior: + ! if the surface class is peat, force the written profile class to peat. + ! In strict GPA22 mode, profile peat must already have been assigned + ! upstream, so do not override fac here. + if ((fac_surf == 253) .and. (.not. PEATMAP_STRICT_GPA22)) fac = 253 endif wp_wetness = a_wp(fac) /a_poros(fac) @@ -4620,7 +4731,6 @@ SUBROUTINE soil_para_hwsd (nx,ny, n_land, tile_pfs, tile_id) pfafindex = tile_pfs(n) ! write soil_param.first - write (11,'(i10,i8,i4,i4,3f8.4,f12.8,f7.4,f10.4,3f7.3,4f7.3,2f10.4, f8.4)')tindex,pfafindex, & fac_surf, fac, a_bee(fac),a_psis(fac),a_poros(fac),& this_cond,wp_wetness,soildepth(n), & @@ -4660,15 +4770,12 @@ SUBROUTINE soil_para_hwsd (nx,ny, n_land, tile_pfs, tile_id) close (11, status = 'keep') close (12, status = 'keep') - !obsolete20220502 close (13, status = 'keep') - deallocate (data_vec1, data_vec2,data_vec3, data_vec4,data_vec5, data_vec6) deallocate (tileid_vec) deallocate (a_sand,a_clay,a_silt,a_oc,a_bee,a_psis, & a_poros,a_wp,a_aksat,atau,btau,a_wpsurf,a_porosurf, & atau_2cm,btau_2cm) deallocate (soildepth, grav_vec,soc_vec,poc_vec,soil_class_top,soil_class_com) - !ncells_top,ncells_top_pro,ncells_sub_pro,soil_class_top,soil_class_com) ! ncells_* not used ! write catch_params.nc4 [soil hydraulic and srfexc-rzexc time scale parameters] @@ -4755,7 +4862,7 @@ END SUBROUTINE soil_para_hwsd ! ==================================================================== ! - INTEGER FUNCTION center_pix_int0 (sf,ktop, ktot, x,y) + INTEGER FUNCTION center_pix_int0 (sf, ktop, ktot, x, y) implicit none @@ -4778,10 +4885,10 @@ INTEGER FUNCTION center_pix_int0 (sf,ktop, ktot, x,y) integer, intent (in) :: ktop,ktot real, intent (in) :: sf - real :: xi,xj,yi,yj + real :: xi,yi real :: length - integer :: i,j,npix + integer :: i,j real :: zi, zj, mindist,xc,yc,zc length = 0. diff --git a/GEOSagcm_GridComp/GEOSphysics_GridComp/GEOSsurface_GridComp/Utils/Raster/makebcs/rmTinyCatchParaMod.F90 b/GEOSagcm_GridComp/GEOSphysics_GridComp/GEOSsurface_GridComp/Utils/Raster/makebcs/rmTinyCatchParaMod.F90 index 307934f61..db897ccde 100644 --- a/GEOSagcm_GridComp/GEOSphysics_GridComp/GEOSsurface_GridComp/Utils/Raster/makebcs/rmTinyCatchParaMod.F90 +++ b/GEOSagcm_GridComp/GEOSphysics_GridComp/GEOSsurface_GridComp/Utils/Raster/makebcs/rmTinyCatchParaMod.F90 @@ -61,6 +61,7 @@ module rmTinyCatchParaMod logical, public, save :: use_PEATMAP = .false. logical, public, save :: jpl_height = .false. + logical, public, save :: PEATMAP_STRICT_GPA22 = .false. character*8, public, save :: LAIBCS = 'UNDEF' character*6, public, save :: SOILBCS = 'UNDEF' character*6, public, save :: MODALB = 'UNDEF' @@ -105,6 +106,7 @@ SUBROUTINE init_bcs_config(LBCSV) ! NGDC : Soil parameters from Reynolds et al. 2000, doi:10.1029/2000WR900130 (MERRA-2, Fortuna, Ganymed, Icarus) ! HWSD : Merged HWSDv1.21-STATSGO2 soil properties on 43200x21600 with Woesten et al. (1999) parameters ! HWSD_b : As in HWSD but with surgical fix of Argentina peatland issue (38S,60W) + ! HWSDv2 : HWSD updated from v1.2 to v2. And sub and top are D2 layer ! ! OUTLETV: Definition of outlet locations. DEFAULT : N/A ! N/A : No information (do not create routing "TRN" files). @@ -125,6 +127,7 @@ SUBROUTINE init_bcs_config(LBCSV) GNU = 2.17 use_PEATMAP = .false. jpl_height = .false. + PEATMAP_STRICT_GPA22= .false. case ("GM4", "ICA") LAIBCS = 'GSWP2' @@ -135,6 +138,7 @@ SUBROUTINE init_bcs_config(LBCSV) GNU = 1.0 use_PEATMAP = .false. jpl_height = .false. + PEATMAP_STRICT_GPA22= .false. case ("NL3") LAIBCS = 'MODGEO' @@ -145,6 +149,7 @@ SUBROUTINE init_bcs_config(LBCSV) GNU = 1.0 use_PEATMAP = .false. jpl_height = .false. + PEATMAP_STRICT_GPA22= .false. case ("NL4") LAIBCS = 'MODGEO' @@ -155,6 +160,7 @@ SUBROUTINE init_bcs_config(LBCSV) GNU = 1.0 use_PEATMAP = .false. jpl_height = .true. + PEATMAP_STRICT_GPA22= .false. case ("NL5") LAIBCS = 'MODGEO' @@ -165,6 +171,7 @@ SUBROUTINE init_bcs_config(LBCSV) GNU = 1.0 use_PEATMAP = .true. jpl_height = .true. + PEATMAP_STRICT_GPA22= .false. case ("v06") LAIBCS = 'MODGEO' @@ -175,6 +182,7 @@ SUBROUTINE init_bcs_config(LBCSV) GNU = 1.0 use_PEATMAP = .true. jpl_height = .true. + PEATMAP_STRICT_GPA22= .false. case ("v07") LAIBCS = 'MODGEO' @@ -185,6 +193,7 @@ SUBROUTINE init_bcs_config(LBCSV) GNU = 1.0 use_PEATMAP = .true. jpl_height = .false. + PEATMAP_STRICT_GPA22= .false. case ("v08") LAIBCS = 'MODGEO' @@ -195,6 +204,7 @@ SUBROUTINE init_bcs_config(LBCSV) GNU = 1.0 use_PEATMAP = .false. jpl_height = .false. + PEATMAP_STRICT_GPA22= .false. case ("v09") LAIBCS = 'MODGEO' @@ -205,6 +215,7 @@ SUBROUTINE init_bcs_config(LBCSV) GNU = 1.0 use_PEATMAP = .true. jpl_height = .false. + PEATMAP_STRICT_GPA22= .false. case ("v10") LAIBCS = 'MODGEO' @@ -215,6 +226,7 @@ SUBROUTINE init_bcs_config(LBCSV) GNU = 1.0 use_PEATMAP = .true. jpl_height = .false. + PEATMAP_STRICT_GPA22= .false. case ("v11") LAIBCS = 'MODGEO' @@ -225,6 +237,7 @@ SUBROUTINE init_bcs_config(LBCSV) GNU = 1.0 use_PEATMAP = .true. jpl_height = .true. + PEATMAP_STRICT_GPA22= .false. case ("v12","v13") @@ -241,6 +254,19 @@ SUBROUTINE init_bcs_config(LBCSV) GNU = 1.0 use_PEATMAP = .true. jpl_height = .true. + PEATMAP_STRICT_GPA22= .false. + + case ("v15") + LAIBCS = 'MODGEO' + SOILBCS = 'HWSD_b' + MODALB = 'MODIS2' + SNOWALB = 'MODC061v2' + OUTLETV = "v2" + GNU = 1.0 + use_PEATMAP = .true. + jpl_height = .true. + PEATMAP_STRICT_GPA22= .true. + case default @@ -2573,6 +2599,8 @@ SUBROUTINE create_model_para_woesten (Maskfile, nbcatch, tile_lon, tile_lat, til logical :: file_exists REAL, ALLOCATABLE, DIMENSION (:,:) :: parms4file integer :: ncid, status + ! peat GPA22 additions + logical :: target_is_peat, donor_is_peat ! --------- VARIABLES FOR *OPENMP* PARALLEL ENVIRONMENT ------------ ! @@ -2979,8 +3007,10 @@ SUBROUTINE create_model_para_woesten (Maskfile, nbcatch, tile_lon, tile_lat, til DO n=1,nbcatch - ! Read soil_param.first again...; this is (almost certainly) needed to maintain consistency - ! between soil_param.first and soil_param.dat, see comments above. + ! Re-read soil_param.first for tile n so soil_param.dat starts from the + ! original tile identity/properties written by mod_process_hres_data. + ! This preserves consistency between soil_param.first and soil_param.dat + ! before any bad-SAT donor repair is applied below. read(10,'(i10,i8,i4,i4,3f8.4,f12.8,f7.4,f10.4,3f7.3,4f7.3,2f10.4, f8.4)') & tindex2(n),pfaf2(n),soil_class_top(n),soil_class_com(n), & @@ -2989,34 +3019,76 @@ SUBROUTINE create_model_para_woesten (Maskfile, nbcatch, tile_lon, tile_lat, til a_sand_surf(n),a_clay_surf(n),atile_sand(n),atile_clay(n) , & wpwet_surf(n),poros_surf(n), pmap(n) - ! This revised if block replaces the complex, nested if block commented out above - - if ( (ars1(n)==9999.) .or. (arw1(n)==9999.) ) then - - ! some parameter values are no-data --> find nearest tile k with good parameters + ! If SAT parameters are invalid for tile n, choose donor tile k. + ! + ! Legacy behavior: + ! use the nearest tile with valid SAT parameters. + ! + ! Strict GPA22 behavior: + ! first prefer a donor with the same peat/mineral state as the + ! target tile, so repaired bulk hydraulics do not cross peat and + ! mineral regimes. If no same-state donor exists, fall back to + ! the original nearest-valid donor search. + if ( (ars1(n)==9999.) .or. (arw1(n)==9999.) ) then dist_save = 1000000. k = 0 + + ! Define target tile peat/mineral state from its current top/profile classes. + target_is_peat = (soil_class_top(n) == 253) .or. (soil_class_com(n) == 253) + do i = 1,nbcatch - if(i /= n) then - if((ars1(i).ne.9999.).and.(arw1(i).ne.9999.)) then - - tile_distance = (tile_lon(i) - tile_lon(n)) * (tile_lon(i) - tile_lon(n)) + & - (tile_lat(i) - tile_lat(n)) * (tile_lat(i) - tile_lat(n)) - if(tile_distance < dist_save) then - k = i - dist_save = tile_distance - endif - endif + if (i == n) cycle + if ((ars1(i) == 9999.) .or. (arw1(i) == 9999.)) cycle + + ! In strict GPA22 mode, only consider donors with the same + ! peat/mineral state as the target tile. + if (PEATMAP_STRICT_GPA22) then + donor_is_peat = (soil_class_top(i) == 253) .or. (soil_class_com(i) == 253) + if (donor_is_peat .neqv. target_is_peat) cycle + endif + + tile_distance = (tile_lon(i) - tile_lon(n)) * (tile_lon(i) - tile_lon(n)) + & + (tile_lat(i) - tile_lat(n)) * (tile_lat(i) - tile_lat(n)) + + if (tile_distance < dist_save) then + k = i + dist_save = tile_distance endif enddo - ! record in file clsm/bad_sat_param.tiles - write (41,*)n,k ! n="bad" tile, k=tile from which parameters are taken + + ! Strict GPA22 fallback: + ! if no same-state donor exists, revert to the original + ! nearest-valid donor search so a donor is still always found. + if ((k == 0) .and. PEATMAP_STRICT_GPA22) then + dist_save = 1000000. + do i = 1,nbcatch + if (i == n) cycle + if ((ars1(i) == 9999.) .or. (arw1(i) == 9999.)) cycle + + tile_distance = (tile_lon(i) - tile_lon(n)) * (tile_lon(i) - tile_lon(n)) + & + (tile_lat(i) - tile_lat(n)) * (tile_lat(i) - tile_lat(n)) + + if (tile_distance < dist_save) then + k = i + dist_save = tile_distance + endif + enddo + endif + + if (k == 0) then + write(*,*) 'ERROR: no donor tile found for tile ', n + stop + endif + + ! Record indices of (repaired) target tile "n" and donor tile "k" in file "clsm/bad_sat_param.tiles" + write (41,*) n, k ! Overwrite parms4file when filling in parameters from neighboring tile k. ! For "good" tiles, keep parms4file as read earlier from catch_params.nc4, - ! which is why this must be done within the "then" block of the "if" statement. - ! This is necessary for backward 0-diff compatibility of catch_params.nc4. + ! which is why this must be done within the "then" block of the "if" + ! statement. This is necessary for backward 0-diff compatibility of + ! catch_params.nc4. parms4file (n,12) = BEE(k) parms4file (n,16) = COND(k) @@ -3027,11 +3099,10 @@ SUBROUTINE create_model_para_woesten (Maskfile, nbcatch, tile_lon, tile_lat, til else - ! nominal case, all parameters are good - + ! Nominal case: current tile n already has valid parameters. k = n - end if + end if ! for current tile n, write parameters of tile k into ar.new (20), bf.dat (30), ts.dat (40), ! and soil_param.dat (42) @@ -3041,21 +3112,36 @@ SUBROUTINE create_model_para_woesten (Maskfile, nbcatch, tile_lon, tile_lat, til ars1(k),ars2(k),ars3(k), & ara1(k),ara2(k),ara3(k),ara4(k), & arw1(k),arw2(k),arw3(k),arw4(k) - + write(30,'(i10,i8,f5.2,3(2x,e13.7))')tindex2(n),pfaf2(n),gnu,bf1(k),bf2(k),bf3(k) - + write(40,'(i10,i8,f5.2,4(2x,e13.7))')tindex2(n),pfaf2(n),gnu, & tsa1(k),tsa2(k),tsb1(k),tsb2(k) - - write(42,'(i10,i8,i4,i4,3f8.4,f12.8,f7.4,f10.4,3f7.3,4f7.3,2f10.4, f8.4)') & - tindex2(n),pfaf2(n),soil_class_top(k),soil_class_com(k), & - BEE(k), PSIS(k),POROS(k),COND(k),WPWET(k),soildepth(k), & - grav_vec(k),soc_vec(k),poc_vec(k), & - a_sand_surf(k),a_clay_surf(k),atile_sand(k),atile_clay(k) , & - wpwet_surf(k),poros_surf(k), pmap(k) - + + ! write "soil_param.dat" file; n = target tile, k = donor tile. + + if (PEATMAP_STRICT_GPA22) then + ! write only bulk hydraulic fields from donor tile k. + write(42,'(i10,i8,i4,i4,3f8.4,f12.8,f7.4,f10.4,3f7.3,4f7.3,2f10.4, f8.4)') & + tindex2(n),pfaf2(n), & ! n (target) + soil_class_top(n),soil_class_com(n), & ! n (target) + BEE(k), PSIS(k),POROS(k),COND(k),WPWET(k),soildepth(k), & ! k (donor) + grav_vec(n),soc_vec(n),poc_vec(n), & ! n (target) + a_sand_surf(n),a_clay_surf(n),atile_sand(n),atile_clay(n), & ! n (target) + wpwet_surf(n),poros_surf(n), pmap(n) ! n (target) + else + ! Legacy path: preserve original donor-copy behavior. + write(42,'(i10,i8,i4,i4,3f8.4,f12.8,f7.4,f10.4,3f7.3,4f7.3,2f10.4, f8.4)') & + tindex2(n),pfaf2(n), & ! n (target) + soil_class_top(k),soil_class_com(k), & ! k (donor) + BEE(k), PSIS(k),POROS(k),COND(k),WPWET(k),soildepth(k), & ! k (donor) + grav_vec(k),soc_vec(k),poc_vec(k), & ! k (donor) + a_sand_surf(k),a_clay_surf(k),atile_sand(k),atile_clay(k), & ! k (donor) + wpwet_surf(k),poros_surf(k), pmap(k) ! k (donor) + endif + ! record ar.new, bf.dat, and ts.dat parameters for later writing into catch_params.nc4 - + if (allocated (parms4file)) then parms4file (n, 1) = ara1(k) parms4file (n, 2) = ara2(k) diff --git a/GEOSagcm_GridComp/GEOSphysics_GridComp/GEOSsurface_GridComp/Utils/Raster/preproc/soil/soil_properties/v2/README_code_sequence b/GEOSagcm_GridComp/GEOSphysics_GridComp/GEOSsurface_GridComp/Utils/Raster/preproc/soil/soil_properties/v2/README_code_sequence new file mode 100755 index 000000000..16f978824 --- /dev/null +++ b/GEOSagcm_GridComp/GEOSphysics_GridComp/GEOSsurface_GridComp/Utils/Raster/preproc/soil/soil_properties/v2/README_code_sequence @@ -0,0 +1,275 @@ +Updated: Dec 2025 +=============================================== +HWSDv2 Soil Processing Workflow (D2 Prototype) +All codes are on GitHub here: + /@GEOSgcm_GridComp/GEOSagcm_GridComp/GEOSphysics_GridComp/GEOSsurface_GridComp/Utils/Raster/preproc/soil/soil_properties/v2/ +only data is stored here: +/discover/nobackup/projects/gmao/bcs_shared/preprocessing_bcs_inputs/land/soil/soil_properties/v2/ + +STEP3 merging data set -/NGDC-Reynolds/ - Read NGDC/Reynolds 0.5° (4320×2160) fields from `path` (native bin/img inputs). +STEP4 merging data set -/STATSGO2/ +STEP4 merging data set -/Africa-AFSIS/ + +=============================================== +IMPORTANT: Source database formats and conversion +-------------------------------------- +HWSD and STATSGO source datasets are distributed as Microsoft Access databases (*.mdb). + +Specifically: +- HWSDv2 attributes are provided in HWSD2.mdb +- STATSGO / STATSGO2 attributes are provided in soildb_*.mdb + +While IDL can read *.mdb files directly, this proved fragile and +difficult to maintain for large-scale, vectorized processing. +Python-based solutions were explored; however, reliable access to the +required ODBC drivers and database schema support was only available +in specific parallel Python environments and was not portable across +platforms. Probably duable but SI team would have to help with libraries. + +Therefore, as a first preprocessing step: +- The *.mdb databases are converted to SQLite +- SQLite is used as the authoritative attribute store +- R is used to query SQLite and rasterize HWSDv2 fields efficiently + +This conversion is a structural preprocessing step and does not modify +any scientific content of the datasets. + +STATSGO since old verion was used we use files Sarith converted with IDL. + +=============================================== +D2 Prototype STEPs: +HWSDv2 (D2) → R (extract & rasterize works for all layers) + → Python (global mosaic per layer D2 now) + → symlink D2→top/sub + → Fortran STEP3 (merge NGDC–Reynolds gap fill HWSDv2) + → Fortran STEP4 (overlay STATSGO + optional AFSIS over STEP3) + → IDL STEP5 (NetCDF writer setup / attributes) + → IDL STEP5 (final gap fill + write 36×18 BCS NetCDF tiles). +=============================================== +STEP3: Background gap-fill using NGDC–Reynolds + +HWSDv2 provides the primary global soil properties. +NGDC–Reynolds fields are used only as a background fill where HWSDv2 is missing. +No external land mask is used; validity is determined directly from HWSDv2 fields. + +STEP4: Regional overlays (STATSGO2 / AFSIS) + +Higher-resolution regional datasets overwrite the STEP3 result +where available (STATSGO2 over CONUS, optional AFSIS over Africa). +=============================================== + +This workflow generates global 30-arcsecond soil fields from HWSDv2, fills spatial gaps +, and applies regional refinements for use in GEOS BCS generation. + +Only D2 (20–40 cm) is processed from HWSDv2 for final product. + +Legacy tools require separate top and sub layers, so the prototype currently maps: + +*_top → D2 +*_sub → D2 + + +Datasets and tools +=============================================== + +HWSDv2 – primary global soil dataset (30-arcsec) +NGDC–Reynolds – coarse global fallback (gap filling) +STATSGO – CONUS refinement +AFSIS – Africa refinement + +Note: +=========== +STATSGO (v2, ~2006) - GSM-based national soil map- NAD83- Distributed with soildb_US_2002.mdb +STATSGO2 (v3, ~2016)- Updated GSM (still STATSGO) - WGS84- Distributed with soildb_US_2003.mdb +This is what code refers to as STATSGO2 + +Tools +=============================================== + +R – HWSDv2 tile generation from raster + SQLite DB +Python – global mosaicking and diagnostics +Fortran – gap filling and regional overlays +IDL – final BCS-ready NetCDF generation + +Higher-quality datasets always override coarser ones while preserving global coverage. + +Workflow summary +============================================== +R → HWSDv2 10° tiles (D2 only) +Python → global 30″ NetCDF (Global_*_D2.nc4) +Symlinks → *_top / *_sub → D2 +Fortran → merge Reynolds + overlays (STATSGO, AFSIS) +IDL → final BCS NetCDF tiles (36×18) + +Detailed steps +----------------------------------------- +1) HWSDv2 preprocessing (R) +----------------------------------------- +Inputs: +HWSDv2 SMU raster +HWSDv2 attribute DB (SQLite) + +Processing: +D2 only (20–40 cm) +dominant component selection (largest SHARE) +QC and derived fields (OM, porosity, etc.) + +Outputs: +10° × 10° NetCDF tiles at 30″ + +Example: +Lon_-180_-170_Lat_70_80_D2.nc + +----------------------------------------- +2) Global mosaicking (Python) +----------------------------------------- +Stitch 10° tiles into global grids + +Outputs: +Global_Sand_D2.nc4 +Global_Clay_D2.nc4 +Global_OC_D2.nc4 + +Grid size: 43200 × 21600 + +----------------------------------------- +3) Top/sub workaround (prototype) +----------------------------------------- +Legacy Fortran requires separate layers: + +ln -sf Global_Sand_D2.nc4 sand_top.nc4 +ln -sf Global_Sand_D2.nc4 sand_sub.nc4 + +Applied to clay, sand, silt, OC, texture. + +----------------------------------------- +4) Fortran stage 1 — Reynolds gap fill +----------------------------------------- + +Code : STEP3_process_soildata.f90 + +Regrid NGDC–Reynolds to 30″ +Overwrite NGDC–Reynolds wherever HWSDv2 provides valid (non-missing) data +Fallback to NGDC–Reynolds only where HWSDv2 is missing +(No external land mask is used in STEP3) + +Outputs +MERGED-DATA/*.dat + + +This step performs the actual Reynolds merge. + +----------------------------------------- +5) Fortran stage 2 — regional overlays +----------------------------------------- +Code STEP4_process_soildata.f90 + +Purpose: +Overlay STATSGO over CONUS +Optionally overlay AFSIS over Africa + +Outputs: + +HWSDv2-NGDC-STATSGO/ +HWSDv2-NGDC-STATSGO-AFSIS/ +data_sources.msk + +----------------------------------------- +6) Final BCS generation (IDL) +----------------------------------------- + +Function: +Read merged global binaries +Apply horizontal gap filling +Write 36 × 18 NetCDF tiles (1200 × 1200 each) + +Gap filling expands neighborhood until a donor is found. +Entire vertical column (top+sub) is copied from the donor pixel. +Guards prevent empty-median and indexing failures. + +STEP5 operates in "noMASK" mode: +- Internal mask controls gap-fill logic only. +- mask(*) = 1 is applied solely to force creation of all 36×18 tiles. +- The NetCDF Mask variable is NOT a land/ocean or provenance mask. + +Outputs: +SoilProperties_H01V01.nc +... +SoilProperties_H36V18.nc + +Coarse fragments / Gravel handling +---------------------------------- +HWSDv2 provides a native coarse fragments (Gravel) field. +This replaces the legacy texture-derived gravel proxy used in HWSDv1.2. + +For compatibility with legacy STEP5 logic: +- The HWSDv2 coarse fragments field (D2) is mapped to both *_top and *_sub inputs. +- STEP5 combines top and sub using the legacy 0.3 / 0.7 weighting. +- The final Gravel field is stored as short with ScaleFactor = 0.01 (percent). + +====================================================== +Notes on masks: +====================================================== +Three distinct mask concepts exist in the HWSDv2 pipeline: + +1) Legacy HWSD land mask (hwsd_mask.bin) + Used in older HWSDv1.x pipelines. + NOT used anywhere in the HWSDv2 workflow. + +2) Data-driven validity mask (STEP3 + STEP5 internal) + Soil presence is inferred directly from variable validity + (e.g., clay/sand values not equal to missing_value). + This mask controls overwrite and horizontal gap-fill behavior + and naturally preserves islands and small land features. + +3) STEP5 tile output mask + Written to the NetCDF tiles as the variable "Mask". + This mask reflects data validity and fill state at write time. + It is NOT a land/ocean mask and must not be interpreted scientifically. + +================================================================== +Minimal reproducible run order +================================================================== +1. +----- +R: HWSDv2 → 10° tiles (D2) - STEP1_vectorized_final_processing_soil.R (needs HWSDv2 raster + SQLite DB) +module load R +Rscript STEP1_vectorized_final_processing_soil.R + +2. +----- +Python: tiles → global NetCDF - STEP2_stich_global_var_layer.py ( + diagnostics / plots) +module load python +python3 STEP2_stich_global_var_layer.py + +Symlinks: D2 → *_top / *_sub +3. +----- +Fortran: STEP3 (Reynolds merge) - STEP3_process_soildata.f90 / .x +Fortran: STEP4 (STATSGO / AFSIS overlays) - STEP4_process_soildata.f90 / .x +Instructions on modules and compilation in header +4. +---- +IDL: final BCS NetCDF tiles - + STEP5_create_netcdf_soildata_bcs_shared_path_noMASK_2.pro + STEP5_write_netcdf_file.pro + gravel helper: nc4_coarse_D2_to_gravel_dat.pro +module load idl +.r nc4_coarse_D2_to_gravel_dat.pro +.r STEP5_write_netcdf_file.pro +.r STEP5_create_netcdf_soildata_bcs_shared_path_noMASK_2.pro + +================================================================== +Outputs: +MERGED-DATA/ +HWSDv2-NGDC-STATSGO* +opt_HWSDv2_NGDC_STATSGO*_* + +Final comments: +--------------------- +This prototype preserves the scientific intent and behavior of the legacy pipeline while: + +cleaning R / Python / IDL logic +making gap filling cleaner +isolating remaining cleanup to Fortran only, if we want to update statsgo as well +No new interpolation assumptions have been introduced. diff --git a/GEOSagcm_GridComp/GEOSphysics_GridComp/GEOSsurface_GridComp/Utils/Raster/preproc/soil/soil_properties/v2/STEP1_vectorized_final_processing_soil.R b/GEOSagcm_GridComp/GEOSphysics_GridComp/GEOSsurface_GridComp/Utils/Raster/preproc/soil/soil_properties/v2/STEP1_vectorized_final_processing_soil.R new file mode 100755 index 000000000..80d42d126 --- /dev/null +++ b/GEOSagcm_GridComp/GEOSphysics_GridComp/GEOSsurface_GridComp/Utils/Raster/preproc/soil/soil_properties/v2/STEP1_vectorized_final_processing_soil.R @@ -0,0 +1,400 @@ +library(dplyr) +library(RSQLite) +library(raster) +library(ncdf4) + +# ---------------------------------------------------------------------- +# HWSD2 Soil Processing (R) +# +# Inputs: +# - HWSD2.bil (+ HWSD2.hdr, HWSD2.prj): global HWSD2 raster containing HWSD2_SMU_ID per pixel +# - HWSD2.sqlite: SQLite version of the HWSD2 attribute database (converted from HWSD2.mdb) +# +# Example source files present in the HWSD2 package: +# HWSD2.bil, HWSD2.hdr, HWSD2.prj, HWSD2.stx, HWSD2.mdb +# HWSD2_RASTER.zip, HWSD2_DB.zip +# +# Run environment (Discover): +# $ module load R/4.3.1 +# $ Rscript STEP1_vectorized_final_processing_soil.R +# +# Outputs: +# - NetCDF tiles written as 10° x 10° files per layer (D1..D7), named like: +# Lon_-180_-170_Lat_70_80_D1.nc +# - Each tile contains 2-D variables on (lat, lon): +# SOC, Sand, Silt, Clay, Coarse, Bulk_Density, Ref_Bulk_Density, PH, +# Share, Porosity, Organic_Matter, Sequence_Used +# +# ---------------------------------------------------------------------- +# DATA MODEL / ORIENTATION NOTES +# +# 1) SQLite database (HWSD2.sqlite) +# - Contains soil attributes keyed by HWSD2_SMU_ID (mapping unit ID), +# depth layer (D1..D7), and component SEQUENCE/SHARE. +# - The database has NO spatial grid information (no lat/lon). +# +# 2) HWSD2 raster (HWSD2.bil + .hdr/.prj) +# - Provides the spatial grid (lat/lon, resolution, and row/column layout). +# - Each raster pixel stores a HWSD2_SMU_ID, which links to the SQLite attributes. +# +# 3) Raster -> matrices in R +# - raster::as.matrix() returns [row, col] = [lat, lon] in raster row order +# (typically row 1 = northernmost). +# - We flip matrix rows once so that latitude is ascending (south -> north), +# +# 4) NetCDF output convention +# - Variables are written with dimensions (lat, lon), e.g., Sand(lat, lon). +# - If we ever transpose a matrix, it is ONLY to match the declared NetCDF +# dimension order. No spatial meaning is "undone"—it is just array layout. +# +# ---------------------------------------------------------------------- +# COMPONENT SELECTION POLICY (HWSD2_LAYERS table) +# +# For each HWSD2_SMU_ID and layer: +# 1) Drop any component with invalid values (any selected field < 0). +# 2) Select the remaining component with the largest SHARE (dominant component). +# +# Derived fields: +# - OrganicMatter = min(SOC * 1.72, 100) [Van Bemmelen factor] +# - ParticleDensity: peat/volcanic/mineral heuristic +# - Porosity = clamp(1 - BULK_DENSITY / ParticleDensity, 0..1) +# ---------------------------------------------------------------------- + +# ---------------------------------------------------------------------- +# Paths +# ---------------------------------------------------------------------- +raster_path <- "/discover/nobackup/projects/gmao/bcs_shared/preprocessing_bcs_inputs/land/soil/soil_properties/v2/HWSD2.bil" +sqlite_path <- "/discover/nobackup/projects/gmao/bcs_shared/preprocessing_bcs_inputs/land/soil/soil_properties/v2/HWSD2.sqlite" +# All output for all layers (D1..D7) is in this dir , for now I kept only D2 +output_directory <- "/discover/nobackup/projects/gmao/bcs_shared/preprocessing_bcs_inputs/land/soil/soil_properties/v2/output_STEP1/" + +if (!dir.exists(output_directory)) { + dir.create(output_directory, recursive = TRUE) + cat("Created output directory:", output_directory, "\n") +} + +# ---------------------------------------------------------------------- +# Layers to process +# NOTE: depth_m is metadata here; this script writes 2-D (lat,lon) fields. +# ---------------------------------------------------------------------- +depth_ranges <- list( + "D1" = list(depth = "0-20cm", depth_m = 0.2), + "D2" = list(depth = "20-40cm", depth_m = 0.2), + "D3" = list(depth = "40-60cm", depth_m = 0.2), + "D4" = list(depth = "60-80cm", depth_m = 0.2), + "D5" = list(depth = "80-100cm", depth_m = 0.2), + "D6" = list(depth = "100-150cm", depth_m = 0.5), + "D7" = list(depth = "150-200cm", depth_m = 0.5) +) + +# ---------------------------------------------------------------------- +# Load full HWSD2 raster once +# Raster stores HWSD2_SMU_ID per pixel. +# ---------------------------------------------------------------------- +cat("Loading full HWSD2 raster:", raster_path, "\n") +raster_data <- raster(raster_path) +cat("Raster loaded.\n") + +# Derive pixel resolution from raster rather than hardcoding +xdim <- res(raster_data)[1] +ydim <- res(raster_data)[2] + +# ---------------------------------------------------------------------- +# Helper: fetch + select one representative component per SMU for a layer +# +# Selection policy (IMPORTANT): +# For each HWSD2_SMU_ID, HWSD2 may have multiple components (SEQUENCE). +# We: +# 1) Drop any component with invalid soil properties (negative values). +# 2) Select the remaining component with the largest SHARE. +# +# Rationale: +# - negative values are treated as invalid/missing +# - SHARE selects the dominant component for each mapping unit +# +# Output: +# One row per HWSD2_SMU_ID (plus NA rows for SMUs with no valid component) +# ---------------------------------------------------------------------- +fetch_soil_data_layer <- function(db_conn, layer, smu_ids) { + + smu_ids <- unique(na.omit(smu_ids)) + if (length(smu_ids) == 0) return(NULL) + + # Force integer to avoid scientific notation in SQL IN() list + smu_ids <- as.integer(smu_ids) + smu_ids <- smu_ids[!is.na(smu_ids)] + if (length(smu_ids) == 0) return(NULL) + + query <- paste0( + "SELECT HWSD2_SMU_ID, SEQUENCE, SHARE, COARSE, SAND, SILT, CLAY, ", + "BULK AS BULK_DENSITY, REF_BULK AS REF_BULK_DENSITY, ORG_CARBON AS SOC, PH_WATER ", + "FROM HWSD2_LAYERS ", + "WHERE LAYER = '", layer, "' ", + "AND HWSD2_SMU_ID IN (", paste(smu_ids, collapse = ","), ")" + ) + + soil_data <- dbGetQuery(db_conn, query) + if (nrow(soil_data) == 0) return(NULL) + + # QC + select dominant SHARE component + soil_selected <- soil_data %>% + mutate(HWSD2_SMU_ID = as.integer(HWSD2_SMU_ID)) %>% + filter( + COARSE >= 0, SAND >= 0, SILT >= 0, CLAY >= 0, + BULK_DENSITY >= 0, REF_BULK_DENSITY >= 0, + SOC >= 0, PH_WATER >= 0 + ) %>% + arrange(HWSD2_SMU_ID, desc(SHARE)) %>% + group_by(HWSD2_SMU_ID) %>% + slice(1) %>% + ungroup() %>% + mutate( + # Organic matter from SOC using Van Bemmelen factor (1.72), clamp to 100% + OrganicMatter = pmin(SOC * 1.72, 100), + + # Particle density heuristic (g/cm^3) + ParticleDensity = case_when( + OrganicMatter > 20 ~ 100 / ((OrganicMatter / 1.3) + ((100 - OrganicMatter) / 2.65)), # peat-like + BULK_DENSITY < 1.0 & CLAY > 30 & SAND < 30 ~ 2.3, # volcanic-like + TRUE ~ 2.65 # mineral + ), + + # Porosity derived from bulk density and particle density, clamp [0,1] + Porosity = pmax(0, pmin(1, 1 - (BULK_DENSITY / ParticleDensity))) + ) + + # Ensure all requested SMUs appear (missing -> NA row) + missing_smus <- setdiff(smu_ids, unique(soil_selected$HWSD2_SMU_ID)) + if (length(missing_smus) > 0) { + soil_selected <- bind_rows( + soil_selected, + data.frame( + HWSD2_SMU_ID = missing_smus, + SEQUENCE = NA_integer_, + SHARE = NA_real_, + COARSE = NA_real_, + SAND = NA_real_, + SILT = NA_real_, + CLAY = NA_real_, + BULK_DENSITY = NA_real_, + REF_BULK_DENSITY = NA_real_, + SOC = NA_real_, + PH_WATER = NA_real_, + OrganicMatter = NA_real_, + ParticleDensity = NA_real_, + Porosity = NA_real_ + ) + ) + } + + # One row per SMU_ID (guaranteed) + soil_selected %>% distinct(HWSD2_SMU_ID, .keep_all = TRUE) +} + +# ---------------------------------------------------------------------- +# Helper: vectorized mapping from raster SMU_ID values -> soil properties +# +# Instead of looping over SMU_IDs, we: +# - take the raster cell SMU_ID vector v (length ncell) +# - build idx = match(v, table$HWSD2_SMU_ID) +# - index soil property vectors by idx (fast) +# +# IMPORTANT ABOUT ORIENTATION: +# raster row 1 is typically north. +# We output lat ascending (south->north), so after creating matrices +# we flip rows once before writing NetCDF. +# ---------------------------------------------------------------------- +vector_map_to_matrices <- function(raster_chunk, soil_tbl) { + + v <- getValues(raster_chunk) # HWSD2_SMU_ID per cell (length ncell) + v_int <- as.integer(v) # ensure integer IDs + + idx <- match(v_int, soil_tbl$HWSD2_SMU_ID) # NA where SMU missing/NA + + # Create vectors for every cell + soc_vec <- soil_tbl$SOC[idx] + sand_vec <- soil_tbl$SAND[idx] + silt_vec <- soil_tbl$SILT[idx] + clay_vec <- soil_tbl$CLAY[idx] + coarse_vec <- soil_tbl$COARSE[idx] + bd_vec <- soil_tbl$BULK_DENSITY[idx] + rbd_vec <- soil_tbl$REF_BULK_DENSITY[idx] + ph_vec <- soil_tbl$PH_WATER[idx] + share_vec <- soil_tbl$SHARE[idx] + por_vec <- soil_tbl$Porosity[idx] + om_vec <- soil_tbl$OrganicMatter[idx] + seq_vec <- soil_tbl$SEQUENCE[idx] + + # reshape: assign cell values into a raster, then as.matrix() + # This guarantees we follow raster's internal cell ordering. + tmp <- raster(raster_chunk) + + to_mat <- function(vec) { + rr <- tmp + values(rr) <- vec + as.matrix(rr) # [row, col] => [lat, lon] in raster's row order (north->south) + } + + mats <- list( + SOC = to_mat(soc_vec), + Sand = to_mat(sand_vec), + Silt = to_mat(silt_vec), + Clay = to_mat(clay_vec), + Coarse = to_mat(coarse_vec), + Bulk_Density = to_mat(bd_vec), + Ref_Bulk_Density = to_mat(rbd_vec), + PH = to_mat(ph_vec), + Share = to_mat(share_vec), + Porosity = to_mat(por_vec), + Organic_Matter = to_mat(om_vec), + Sequence_Used = to_mat(seq_vec) + ) + + mats +} + +# ---------------------------------------------------------------------- +# Process one 10° tile for one layer +# Outputs a NetCDF with dims (lat, lon) and lat ascending. +# ---------------------------------------------------------------------- +process_chunk <- function(db_conn, layer, lon_min, lon_max, lat_min, lat_max) { + + cat("Tile Lon:", lon_min, lon_max, "Lat:", lat_min, lat_max, "Layer:", layer, "\n") + + chunk_extent <- extent(lon_min, lon_max, lat_min, lat_max) + raster_chunk <- crop(raster_data, chunk_extent) + + if (is.null(raster_chunk) || ncell(raster_chunk) == 0) { + cat(" -> Empty tile, skipping.\n") + return(invisible(NULL)) + } + + ncols_chunk <- ncol(raster_chunk) + nrows_chunk <- nrow(raster_chunk) + + # lon centers (ascending west->east) + lon <- seq(from = xmin(raster_chunk) + xdim / 2, by = xdim, length.out = ncols_chunk) + + # lat centers: build ascending south->north + # (raster row order is typically north->south; we will flip data rows later) + lat <- seq(from = ymin(raster_chunk) + ydim / 2, by = ydim, length.out = nrows_chunk) + + # Create NetCDF dimensions in the same order as our matrices: (lat, lon) + lat_dim <- ncdim_def("lat", "degrees_north", lat) + lon_dim <- ncdim_def("lon", "degrees_east", lon) + + # Fill values + fill_f <- 1e15 + fill_i <- -9999L + + var_defs <- list( + SOC = ncvar_def("SOC", "%", list(lon_dim, lat_dim), missval = fill_f, longname = "Soil Organic Carbon", compression = 4), + Sand = ncvar_def("Sand", "%", list(lon_dim, lat_dim), missval = fill_f, longname = "Sand", compression = 4), + Silt = ncvar_def("Silt", "%", list(lon_dim, lat_dim), missval = fill_f, longname = "Silt", compression = 4), + Clay = ncvar_def("Clay", "%", list(lon_dim, lat_dim), missval = fill_f, longname = "Clay", compression = 4), + Coarse = ncvar_def("Coarse", "%", list(lon_dim, lat_dim), missval = fill_f, longname = "Coarse Fragments", compression = 4), + Bulk_Density = ncvar_def("Bulk_Density", "g cm-3", list(lon_dim, lat_dim), missval = fill_f, longname = "Bulk Density", compression = 4), + Ref_Bulk_Density = ncvar_def("Ref_Bulk_Density", "g cm-3", list(lon_dim, lat_dim), missval = fill_f, longname = "Reference Bulk Density", compression = 4), + PH = ncvar_def("PH", "-", list(lon_dim, lat_dim), missval = fill_f, longname = "pH in Water (-log(H+))", compression = 4), + Share = ncvar_def("Share", "%", list(lon_dim, lat_dim), missval = fill_f, longname = "Selected Component Share", compression = 4), + Porosity = ncvar_def("Porosity", "m3/m3", list(lon_dim, lat_dim), missval = fill_f, longname = "Porosity", compression = 4), + Organic_Matter = ncvar_def("Organic_Matter", "%", list(lon_dim, lat_dim), missval = fill_f, longname = "Organic Matter Content", compression = 4), + Sequence_Used = ncvar_def("Sequence_Used", "-", list(lon_dim, lat_dim), missval = fill_i, longname = "Sequence Used for Selected Component", compression = 4, prec = "integer") + ) + + output_file <- file.path(output_directory, paste0( + "Lon_", lon_min, "_", lon_max, "_Lat_", lat_min, "_", lat_max, "_", layer, ".nc" + )) + if (file.exists(output_file)) file.remove(output_file) + + nc <- nc_create(output_file, var_defs) + + # Unique SMUs in this tile + smu_ids <- unique(getValues(raster_chunk)) + smu_ids <- smu_ids[!is.na(smu_ids)] + + if (length(smu_ids) == 0) { + cat(" -> No SMU IDs found; writing empty tile.\n") + nc_close(nc) + return(invisible(NULL)) + } + + # Fetch soil table for these SMUs and this layer + soil_tbl <- fetch_soil_data_layer(db_conn, layer, smu_ids) + if (is.null(soil_tbl) || nrow(soil_tbl) == 0) { + cat(" -> No soil records returned; writing empty tile.\n") + nc_close(nc) + return(invisible(NULL)) + } + + # Vectorized mapping from raster values to matrices + mats <- vector_map_to_matrices(raster_chunk, soil_tbl) + + # Flip rows so matrix rows correspond to lat ascending (south->north) + for (nm in names(mats)) { + mats[[nm]] <- mats[[nm]][nrow(mats[[nm]]):1, , drop = FALSE] + } + + put_f <- function(name, mat) { + mat[is.na(mat)] <- fill_f + ncvar_put(nc, var_defs[[name]], t(mat)) + } + put_i <- function(name, mat) { + mat[is.na(mat)] <- fill_i + ncvar_put(nc, var_defs[[name]], t(mat)) + } + put_f("SOC", mats$SOC) + put_f("Sand", mats$Sand) + put_f("Silt", mats$Silt) + put_f("Clay", mats$Clay) + put_f("Coarse", mats$Coarse) + put_f("Bulk_Density", mats$Bulk_Density) + put_f("Ref_Bulk_Density", mats$Ref_Bulk_Density) + put_f("PH", mats$PH) + put_f("Share", mats$Share) + put_f("Porosity", mats$Porosity) + put_f("Organic_Matter", mats$Organic_Matter) + put_i("Sequence_Used", mats$Sequence_Used) + + nc_close(nc) + cat(" -> Wrote:", output_file, "\n") + invisible(NULL) +} + +# ---------------------------------------------------------------------- +# Process all 10-degree tiles for one layer +# ---------------------------------------------------------------------- +process_layer_chunks <- function(layer) { + cat("\n=== Processing layer:", layer, "===\n") + t0 <- Sys.time() + + db_conn <- dbConnect(SQLite(), sqlite_path) + on.exit(dbDisconnect(db_conn), add = TRUE) + + lon_intervals <- seq(-180, 170, by = 10) + lat_intervals <- seq(-90, 80, by = 10) + + for (lon_min in lon_intervals) { + lon_max <- lon_min + 10 + for (lat_min in lat_intervals) { + lat_max <- lat_min + 10 + tryCatch( + process_chunk(db_conn, layer, lon_min, lon_max, lat_min, lat_max), + error = function(e) { + cat("ERROR tile Lon:", lon_min, lon_max, "Lat:", lat_min, lat_max, "Layer:", layer, "\n") + cat(" ", e$message, "\n") + } + ) + } + } + + cat("Layer", layer, "done. Time:", Sys.time() - t0, "\n") +} + +# ---------------------------------------------------------------------- +# Run all layers requested +# ---------------------------------------------------------------------- +for (layer in names(depth_ranges)) { + process_layer_chunks(layer) +} + diff --git a/GEOSagcm_GridComp/GEOSphysics_GridComp/GEOSsurface_GridComp/Utils/Raster/preproc/soil/soil_properties/v2/STEP2_stich_global_var_layer.py b/GEOSagcm_GridComp/GEOSphysics_GridComp/GEOSsurface_GridComp/Utils/Raster/preproc/soil/soil_properties/v2/STEP2_stich_global_var_layer.py new file mode 100755 index 000000000..dd320d1d6 --- /dev/null +++ b/GEOSagcm_GridComp/GEOSphysics_GridComp/GEOSsurface_GridComp/Utils/Raster/preproc/soil/soil_properties/v2/STEP2_stich_global_var_layer.py @@ -0,0 +1,100 @@ +#!/usr/bin/env python3 +import os +import re +import numpy as np +import xarray as xr + +DX = 1.0 / 120.0 # 0.008333333333... degrees (30 arc-sec) +LON_SIZE = 43200 # 360 / DX +LAT_SIZE = 21600 # 180 / DX +FILL = -9999.0 + +# Parse lon/lat bounds from filename +PAT = re.compile(r"Lon_(-?\d+)_(-?\d+)_Lat_(-?\d+)_(-?\d+)_([A-Z0-9]+)\.nc$") + +def lon_to_col(lon): + # lon in [-180, 180), map to [0, LON_SIZE) + return int(round((lon + 180.0) / DX)) + +def lat_to_row(lat): + # lat in [-90, 90), map to [0, LAT_SIZE) + return int(round((lat + 90.0) / DX)) + +def mosaic_global(variable, layer, tiles_dir, out_nc4): + # Global grid centers (optional, but nice to write) + lon = np.linspace(-180.0 + DX/2, 180.0 - DX/2, LON_SIZE, dtype=np.float32) + lat = np.linspace(-90.0 + DX/2, 90.0 - DX/2, LAT_SIZE, dtype=np.float32) + + global_data = np.full((LAT_SIZE, LON_SIZE), np.nan, dtype=np.float32) + + tiles = sorted(f for f in os.listdir(tiles_dir) if f.endswith(f"_{layer}.nc")) + if not tiles: + raise FileNotFoundError(f"No tiles for layer {layer} in {tiles_dir}") + + for fn in tiles: + m = PAT.search(fn) + if not m: + # skip unexpected filenames + continue + + lon0, lon1, lat0, lat1, lyr = m.groups() + lon0 = float(lon0); lon1 = float(lon1) + lat0 = float(lat0); lat1 = float(lat1) + + # Compute target slice in global array + # Example tile: Lon_-180_-170 Lat_70_80 => col0..col1, row0..row1 + col0 = lon_to_col(lon0) + col1 = lon_to_col(lon1) + row0 = lat_to_row(lat0) + row1 = lat_to_row(lat1) + + path = os.path.join(tiles_dir, fn) + with xr.open_dataset(path) as ds: + if variable not in ds: + continue + + # Ensure tile data is (lat,lon) + tile = ds[variable].transpose("lat", "lon").astype("float32").values + + # Safety: expected tile size + if tile.shape != (1200, 1200): + raise ValueError(f"Unexpected tile shape {tile.shape} in {fn}") + + global_data[row0:row1, col0:col1] = tile + + # Replace NaN with fill + global_filled = np.where(np.isnan(global_data), FILL, global_data).astype(np.float32) + + out = xr.Dataset( + {variable: (("lat", "lon"), global_filled)}, + coords={"lat": lat, "lon": lon}, + attrs={"description": f"Global {variable} mosaicked from HWSD2 tiles", "layer": layer} + ) + + # Set fill value attributes (helps CDO/NCO) + out[variable].attrs["_FillValue"] = np.float32(FILL) + out[variable].attrs["missing_value"] = np.float32(FILL) + + out.to_netcdf(out_nc4, format="NETCDF4") + print(f"Saved: {out_nc4}") + +if __name__ == "__main__": + tiles_dir = "/discover/nobackup/projects/gmao/bcs_shared/preprocessing_bcs_inputs/land/soil/soil_properties/v2/output_STEP1/" +#Sand +#Silt +#Clay +#Coarse +#SOC +#Organic_Matter +#Share +#Porosity +#Bulk_Density +#Ref_Bulk_Density +#PH +#Sequence_Used + layer = "D2" + variable = "Organic_Matter" #"Sand" + out_nc4 = f"Global_{variable}_{layer}.nc4" + + mosaic_global(variable, layer, tiles_dir, out_nc4) + diff --git a/GEOSagcm_GridComp/GEOSphysics_GridComp/GEOSsurface_GridComp/Utils/Raster/preproc/soil/soil_properties/v2/STEP3_process_soildata.f90 b/GEOSagcm_GridComp/GEOSphysics_GridComp/GEOSsurface_GridComp/Utils/Raster/preproc/soil/soil_properties/v2/STEP3_process_soildata.f90 new file mode 100755 index 000000000..652799d4a --- /dev/null +++ b/GEOSagcm_GridComp/GEOSphysics_GridComp/GEOSsurface_GridComp/Utils/Raster/preproc/soil/soil_properties/v2/STEP3_process_soildata.f90 @@ -0,0 +1,789 @@ +PROGRAM STEP3_process_soildata + +! Legacy starting code was written by Sarith Mahanama since then code evolved with updated data sets, file formats and bug fixes. + +! Pipeline summary (STEP3): +! 1) Read NGDC/Reynolds 0.5° (4320×2160) fields from `path` (native bin/img inputs). +! 2) Regrid NGDC/Reynolds to 30 arcsec (43200×21600) and write F77-unformatted +! binaries into `path_regrid` (v2/MERGED-DATA/): +! PRtestBiljana_{gtext,clay,silt,sand,om,poros}_{top,sub}_30sec.dat +! 3) Merge regridded NGDC/Reynolds (layer1) with HWSDv2 NetCDF fields (layer2), +! using the HWSD land mask, and write merged 30-arcsec binaries into `path3` +! (v2/MERGED-DATA/): +! {clay,silt,sand,oc,gtext}_{top,sub}_30sec_testBiljana.dat +! (symlinks may be created separately for downstream convenience). +!================================================= +! Build / Run notes (Discover / SLES15) +! +! 1) Load Intel + IntelMPI environment (so mpiifort is available) +! module load comp/gcc/11.4.0 +! module load comp/intel/2024.2.0 +! module load mpi/openmpi/4.1.6/intel-2024.2.0 +! source /home/wjiang/swdev/github/GEOSgcm/@env/g5_modules.sh +! +! 2) Compile against ESMA Baselibs NetCDF (ifort + intelmpi, SLES15): +! +! BASE=/discover/swdev/gmao_SIteam/Baselibs/ESMA-Baselibs-7.24.0/x86_64-pc-linux-gnu/ifort_2021.6.0-intelmpi_2021.13.0-SLES15/Linux +! +! mpiifort -O2 -g STEP3_process_soildata.f90 -o STEP3_process_soildata.x \ +! -I$BASE/include/netcdf \ +! -L$BASE/lib \ +! -lnetcdff -lnetcdf \ +! -lhdf5_hl -lhdf5 -lz -lsz -lbz2 \ +! -lmfhdf -ldf \ +! -ljpeg \ +! -lcurl -lssl -lcrypto \ +! -lbrotlidec -lbrotlicommon -lzstd \ +! -lxml2 \ +! -ltirpc \ +! -ldl -lm +! OR for copy paste below: +!mpiifort -O2 -g STEP3_process_soildata.f90 -o STEP3_process_soildata.x -I$BASE/include/netcdf -L$BASE/lib -lnetcdff -lnetcdf -lhdf5_hl -lhdf5 -lz -lsz -lbz2 -lmfhdf -ldf -ljpeg -lcurl -lssl -lcrypto -lbrotlidec -lbrotlicommon -lzstd -lxml2 -ltirpc -ldl -lm + +! Note: ifort prints a deprecation remark (#10448); it is harmless. +! +! 3) Run (ensure runtime libs are found): +! +! export LD_LIBRARY_PATH=$BASE/lib:$LD_LIBRARY_PATH +! ./STEP3_process_soildata.x +! +!================================================= +! Debug memory issues run: valgrind ./STEP3_process_soildata.x +!================================================= + +use netcdf + +implicit none + +INTEGER :: i,j,k,ii,c1,c2,irrecs +INTEGER, PARAMETER :: nc_ngdc = 4320, nr_ngdc = 2160,nc=43200,nr=21600 +INTEGER (kind=1), allocatable, dimension (:,:) :: gtext,clay,silt,sand,om,poros + +character(len=300) :: path, path1, path2, path3, path_regrid + +LOGICAL, parameter :: regrid_ngdc = .true. !.false. + + ! ----------------------- ! + ! for gtext file ! + integer(kind=1), allocatable :: data_bytes(:) ! Array to hold all bytes + integer(kind=1), allocatable :: data_reshaped(:,:) ! Reshaped array (nr_ngdc, 8 + nc_ngdc) + integer(kind=1), allocatable :: data_array(:,:) ! Final data array (nr_ngdc, nc_ngdc) + integer :: ios + integer :: actual_size + integer :: jjj, iii + integer, parameter :: extra_bytes_per_row = 8 + integer, parameter :: bytes_per_row = extra_bytes_per_row + nc_ngdc + integer, parameter :: expected_data_size = nc_ngdc * nr_ngdc + integer, parameter :: expected_total_size = expected_data_size + (nr_ngdc * extra_bytes_per_row) + +! The goal is to produce top- and sub-layer soil property fields on the 30-arcsec global grid +! (nc=43200, nr=21600). Inputs arrive at different native resolutions and in separate files +! (gtext, clay, silt, sand, OM/SOC, porosity). When needed, NGDC/Reynolds fields are first +! regridded to the 30-arcsec grid, then merged with HWSD on a land-mask basis. +! +! NGDC/Reynolds soil texture input is on a 0.5° × 0.5° grid (4320 × 2160) and is regridded +! to 30 arcsec (43200 × 21600) when regrid_ngdc = .true. + +! if there is regridding to be done (regrid_ngdc = .true.), read in the datasets and regrid them +if(regrid_ngdc) then + path='/discover/nobackup/projects/gmao/bcs_shared/preprocessing_bcs_inputs/land/soil/soil_properties/v2/NGDC-Reynolds/' + path_regrid = '/discover/nobackup/projects/gmao/bcs_shared/preprocessing_bcs_inputs/land/soil/soil_properties/v2/MERGED-DATA/' + + allocate (gtext (1:nc_ngdc,1:nr_ngdc)) + allocate (clay (1:nc_ngdc,1:nr_ngdc)) + allocate (silt (1:nc_ngdc,1:nr_ngdc)) + allocate (sand (1:nc_ngdc,1:nr_ngdc)) + allocate (om (1:nc_ngdc,1:nr_ngdc)) + allocate (poros (1:nc_ngdc,1:nr_ngdc)) + + ! Doing Top Layer first. + + ! set everything to zero + gtext =0 + clay =0 + silt =0 + sand =0 + om =0 + poros =0 + + ! calculate number of "records" in i-direction + irrecs = nc_ngdc / 4 + if (4*irrecs /= nc_ngdc) then + print *, 'Error: nc_ngdc not divisible by 4:', nc_ngdc + stop + end if + + ! ----------------------- ! + ! ----------------------- ! + ! For now, gtext file needs a special reading sequence ! + ! ----------------------- ! + open(unit=10, file=trim(path)//'dtex_tp1.bin', form='unformatted', access='stream', & + status='old', action='read', iostat=ios) + + ! Get File Size ! + inquire(unit=10, size=actual_size) + if (actual_size /= expected_total_size) then + print *, 'Error: Unexpected data size for', trim('dtex_tp1.bin') + print *, 'Expected:', expected_total_size, 'bytes, Got:', actual_size, 'bytes.' + close(10) + stop + end if + print *, 'File size is as expected:', actual_size, 'bytes.' + + ! Allocate Arrays ! + allocate(data_bytes(actual_size)) + allocate(data_reshaped(nr_ngdc, bytes_per_row)) + allocate(data_array(nr_ngdc, nc_ngdc)) + + ! Read Data Bytes ! + read(10) data_bytes + close(10) + + ! Reshape Data Bytes ! + ! Each row has (8 + nc_ngdc) bytes + ! Assign data_bytes to data_reshaped (row-major to column-major) + do jjj = 1, nr_ngdc + data_reshaped(jjj, :) = data_bytes( (jjj-1)*bytes_per_row + 1 : jjj*bytes_per_row ) + end do + + ! Extract Data Array ! + ! Skip the first 8 bytes of each row + do jjj = 1, nr_ngdc + data_array(jjj, :) = data_reshaped(jjj, extra_bytes_per_row + 1 : extra_bytes_per_row + nc_ngdc ) + end do + + ! Correct the shape + do iii =1,nr_ngdc + gtext(iii ,:)=data_array(nr_ngdc:1:-1,iii ) + gtext(iii+nr_ngdc,:)=data_array(nr_ngdc:1:-1,iii+nr_ngdc) + end do + + ! Deallocate Arrays ! + deallocate(data_bytes) + deallocate(data_reshaped) + deallocate(data_array) + + ! ----------------------- ! + ! DONE WITH GTEXT! Other parameters are read in a regular way ! + ! ----------------------- ! + + open (unit=11, file=trim(path)//'clay_tp1.img',form='unformatted', status='old',& + access='direct',recl=1,convert = 'little_endian',action='read') + open (unit=12, file=trim(path)//'silt_tp1.img',form='unformatted', status='old',& + access='direct',recl=1,convert = 'little_endian',action='read') + open (unit=13, file=trim(path)//'sand_tp1.img',form='unformatted', status='old',& + access='direct',recl=1,convert = 'little_endian',action='read') + open (unit=14, file=trim(path)//'om_top1.img' ,form='unformatted', status='old',& + access='direct',recl=1,convert = 'little_endian',action='read') + open (unit=15, file=trim(path)//'por_top1.img',form='unformatted', status='old',& + access='direct',recl=1,convert = 'little_endian',action='read') + + ! read the files to get clay, silt, sand, poros(ity), om, and gtext + k =0 + do j=nr_ngdc,1,-1 ! loop over rows + do i=1,irrecs ! loop over the "records" in i-direction + k=k+1 ! which record to read + c1 = (4*i)-3 ! first element to read from this record + c2 = (4*i) ! last element to read from this record + ! read (10, rec=k) (gtext(ii,j), ii=c1,c2) + read (11, rec=k) (clay (ii,j), ii=c1,c2) + read (12, rec=k) (silt (ii,j), ii=c1,c2) + read (13, rec=k) (sand (ii,j), ii=c1,c2) + read (14, rec=k) (om (ii,j), ii=c1,c2) + read (15, rec=k) (poros(ii,j), ii=c1,c2) + end do + end do + + close (11,status='keep') + close (12,status='keep') + close (13,status='keep') + close (14,status='keep') + close (15,status='keep') + + ! data for TOP layer are now read. Start regridding + ! To do so, we use RegridRaster fucntion (see the bottom of the script). Here, + ! on the input we provide info on the size of the input (nc_ngdc,nr_ngdc) and + ! output (nc,nr) grids, as well as the data itself, and the output filename. + + call RegridRaster(nc_ngdc,nr_ngdc,nc,nr,gtext,trim(path_regrid)//'PRtestBiljana_gtext_top') + call RegridRaster(nc_ngdc,nr_ngdc,nc,nr,clay ,trim(path_regrid)//'PRtestBiljana_clay_top' ) + call RegridRaster(nc_ngdc,nr_ngdc,nc,nr,silt ,trim(path_regrid)//'PRtestBiljana_silt_top' ) + call RegridRaster(nc_ngdc,nr_ngdc,nc,nr,sand ,trim(path_regrid)//'PRtestBiljana_sand_top' ) + call RegridRaster(nc_ngdc,nr_ngdc,nc,nr,om ,trim(path_regrid)//'PRtestBiljana_om_top' ) + call RegridRaster(nc_ngdc,nr_ngdc,nc,nr,poros,trim(path_regrid)//'PRtestBiljana_poros_top') + + ! Done regridding. Move to SUB layer and repeat the same steps as in TOP layer + +! Sub layer + gtext =0 + clay =0 + silt =0 + sand =0 + om =0 + poros =0 + + irrecs = nc_ngdc / 4 + if (4*irrecs /= nc_ngdc) then + print *, 'Error: nc_ngdc not divisible by 4:', nc_ngdc + stop + end if + + ! ----------------------- ! + ! For now, gtext file needs a special reading sequence ! + ! ----------------------- ! + open(unit=10, file=trim(path)//'dtex_sb1.bin', form='unformatted', access='stream', & + status='old', action='read', iostat=ios) + + ! Get File Size ! + inquire(unit=10, size=actual_size) + if (actual_size /= expected_total_size) then + print *, 'Error: Unexpected data size for', trim('dtex_sb1.bin') + print *, 'Expected:', expected_total_size, 'bytes, Got:', actual_size, 'bytes.' + close(10) + stop + end if + print *, 'File size is as expected:', actual_size, 'bytes.' + + ! Allocate Arrays ! + allocate(data_bytes(actual_size)) + allocate(data_reshaped(nr_ngdc, bytes_per_row)) + allocate(data_array(nr_ngdc, nc_ngdc)) + + ! Read Data Bytes ! + read(10) data_bytes + close(10) + + print*, "done reading sub gtext" + ! Reshape Data Bytes ! + ! Each row has (8 + nc_ngdc) bytes + ! Assign data_bytes to data_reshaped (row-major to column-major) + do jjj = 1, nr_ngdc + data_reshaped(jjj, :) = data_bytes( (jjj-1)*bytes_per_row + 1 : jjj*bytes_per_row ) + end do + + ! Extract Data Array ! + ! Skip the first 8 bytes of each row + do jjj = 1, nr_ngdc + data_array(jjj, :) = data_reshaped(jjj, extra_bytes_per_row + 1 : extra_bytes_per_row + nc_ngdc ) + end do + + ! Correct the shape + do iii =1,nr_ngdc + gtext(iii ,:)=data_array(nr_ngdc:1:-1,iii ) + gtext(iii+nr_ngdc,:)=data_array(nr_ngdc:1:-1,iii+nr_ngdc) + end do + + ! Deallocate Arrays ! + deallocate(data_bytes) + deallocate(data_reshaped) + deallocate(data_array) + + ! ----------------------- ! + ! DONE WITH GTEXT! Other parameters are read in a regular way ! + ! ----------------------- ! + + open (unit=11, file=trim(path)//'clay_sb1.img',form='unformatted', status='old',& + access='direct',recl=1,convert = 'little_endian',action='read') + open (unit=12, file=trim(path)//'silt_sb1.img',form='unformatted', status='old',& + access='direct',recl=1,convert = 'little_endian',action='read') + open (unit=13, file=trim(path)//'sand_sb1.img',form='unformatted', status='old',& + access='direct',recl=1,convert = 'little_endian',action='read') + open (unit=14, file=trim(path)//'om_sub1.img' ,form='unformatted', status='old',& + access='direct',recl=1,convert = 'little_endian',action='read') + open (unit=15, file=trim(path)//'por_sub1.img',form='unformatted', status='old',& + access='direct',recl=1,convert = 'little_endian',action='read') + + k =0 + do j=nr_ngdc,1,-1 + do i=1,irrecs + k=k+1 + c1 = (4*i)-3 + c2 = (4*i) + ! read (10, rec=k) (gtext(ii,j), ii=c1,c2) + read (11, rec=k) (clay (ii,j), ii=c1,c2) + read (12, rec=k) (silt (ii,j), ii=c1,c2) + read (13, rec=k) (sand (ii,j), ii=c1,c2) + read (14, rec=k) (om (ii,j), ii=c1,c2) + read (15, rec=k) (poros(ii,j), ii=c1,c2) + end do + end do + + close (11,status='keep') + close (12,status='keep') + close (13,status='keep') + close (14,status='keep') + close (15,status='keep') + + call RegridRaster(nc_ngdc,nr_ngdc,nc,nr,gtext,trim(path_regrid)//'PRtestBiljana_gtext_sub') + call RegridRaster(nc_ngdc,nr_ngdc,nc,nr,clay ,trim(path_regrid)//'PRtestBiljana_clay_sub' ) + call RegridRaster(nc_ngdc,nr_ngdc,nc,nr,silt ,trim(path_regrid)//'PRtestBiljana_silt_sub' ) + call RegridRaster(nc_ngdc,nr_ngdc,nc,nr,sand ,trim(path_regrid)//'PRtestBiljana_sand_sub' ) + call RegridRaster(nc_ngdc,nr_ngdc,nc,nr,om ,trim(path_regrid)//'PRtestBiljana_om_sub' ) + call RegridRaster(nc_ngdc,nr_ngdc,nc,nr,poros,trim(path_regrid)//'PRtestBiljana_poros_sub') + +endif + +! Regridding is performed only if needed (regrid_ngdc = .true.). +! At this point, all datasets are on the same target grid. +! Originally, NGDC/Reynolds data are on nc_ngdc = 4320 × nr_ngdc = 2160 (0.5° grid). +! After regridding, all fields are on nc = 43200 × nr = 21600 (30-arcsec grid). +! +! We can now proceed with merging the variables. +! The merge is mask-free: HWSDv2 values overwrite NGDC/Reynolds values +! wherever HWSDv2 provides valid (non-missing) data; otherwise NGDC is retained. +! +! <<<<< MERGING BEGINS >>>>> +! STEP1: NGDC-HWSD Merger +! LAYER1: NGDC Reynolds Data +! LAYER2: HWSD DATA + +! ------------------------------------------------------------ +! STEP3 merge setup: +! +! Layer 1 (NGDC/Reynolds): +! - Already regridded to 30 arcsec and written to: +! path_regrid = .../v2/MERGED-DATA/ +! - Files: PRtestBiljana_*_{top,sub}_30sec.dat +! +! Layer 2 (HWSDv2): +! - Read directly from NetCDF files in: +! path2 = .../v2/input_STEP3_also_output_STEP2/ +! +! Merge logic (mask-free): +! - If HWSDv2 provides a valid (non-missing) value at a grid cell, +! it overwrites the NGDC/Reynolds value. +! - If HWSDv2 is missing, the NGDC/Reynolds value is retained. +! - No external land mask (e.g., hwsd_mask.bin) is used. +! +! Outputs: +! - Merged 30-arcsec binaries written to: +! path3 = .../v2/MERGED-DATA/ +! ------------------------------------------------------------ +path2 = '/discover/nobackup/projects/gmao/bcs_shared/preprocessing_bcs_inputs/' // & + 'land/soil/soil_properties/v2/input_STEP3_also_output_STEP2/' +path3 = '/discover/nobackup/projects/gmao/bcs_shared/preprocessing_bcs_inputs/' // & + 'land/soil/soil_properties/v2/MERGED-DATA/' + +path1 = path_regrid + +! Ready to Merge!!! +! Merging is done in same manner first for the top- and then for sub-layer. + +print *,'Step 1: Merging NGDC & HWSD ' +print *,' Top Layer ' +print *,'============================' +print *,' ' + +! in case the regridding was performed (regrid_ngdc = .true.) +! each of the regridded datasets (i.e., gtext,clay,silt,sand,om_su,poros) +! needs to be merged. + +if(regrid_ngdc) then + + ! call merge_data function (see bottom of the script) and send the regridded file + ! (e.g., clay_top_30sec.dat) and the "other" file (e.g., clay_top.bin), together + ! with grid size (nc,nr) and their paths/filenames. (Mask-free merge: overwrite where HWSDv2 is valid.) + + call merge_data (1.,nc,nr, & + path1,path2,path3, & + 'PRtestBiljana_sand_top_30sec.dat', & + 'sand_top.nc4', & + 'sand_top_30sec_testBiljana.dat') + call merge_data (1.,nc,nr, & + path1,path2,path3, & + 'PRtestBiljana_clay_top_30sec.dat', & ! dataset-1 to be merged (this is what the regriding part of the code created) + 'clay_top.nc4', & ! dataset-2 to be merged (where did this one come from?) + 'clay_top_30sec_testBiljana.dat') ! output (merged data) file + + call merge_data (1.,nc,nr, & + path1,path2,path3, & + 'PRtestBiljana_silt_top_30sec.dat', & + 'silt_top.nc4', & + 'silt_top_30sec_testBiljana.dat') + + call merge_data (1.0,nc,nr, & + path1,path2,path3, & + 'PRtestBiljana_om_top_30sec.dat', & + 'OC_top.nc4', & + 'oc_top_30sec_testBiljana.dat') +endif + +call merge_data (2.,nc,nr, & + path1,path2,path3, & + 'PRtestBiljana_gtext_top_30sec.dat', & + 'gtext_top.nc4', & + 'gtext_top_30sec_testBiljana.dat') + +! repeat for Sub-layer +print *,' ' +print *,' Sub Layer ' +print *,'============================' +print *,' ' +if(regrid_ngdc) then + + call merge_data (1.,nc,nr, & + path1,path2,path3, & + 'PRtestBiljana_clay_sub_30sec.dat', & + 'clay_sub.nc4', & + 'clay_sub_30sec_testBiljana.dat') + + call merge_data (1.,nc,nr, & + path1,path2,path3, & + 'PRtestBiljana_silt_sub_30sec.dat', & + 'silt_sub.nc4', & + 'silt_sub_30sec_testBiljana.dat') + + call merge_data (1.,nc,nr, & + path1,path2,path3, & + 'PRtestBiljana_sand_sub_30sec.dat', & + 'sand_sub.nc4', & + 'sand_sub_30sec_testBiljana.dat') + + call merge_data (1.0,nc,nr, & + path1,path2,path3, & + 'PRtestBiljana_om_sub_30sec.dat', & + 'OC_sub.nc4', & + 'oc_sub_30sec_testBiljana.dat') +endif +call merge_data (2.,nc,nr, & + path1,path2,path3, & + 'PRtestBiljana_gtext_sub_30sec.dat', & + 'gtext_sub.nc4', & + 'gtext_sub_30sec_testBiljana.dat') + +! LAYER1: NGDC-HWSD merged data +! LAYER2: STATSGO + +! LAYER1: NGDC-HWSD-STATSGO merged data +! LAYER2: CANADA + +! LAYER1: NGDC-HWSD-STATSGO-CANADA merged data +! LAYER2: AUSTRALIA + + +END PROGRAM STEP3_process_soildata + +! ----------------------------------------------------------------------------------- + +subroutine RegridRaster(nx,ny,nc,nr,Rin,path) + + ! this is regridding routine, that dilevers the input data + ! on a new grid. It can upscale or downscale. All it needs + ! is the input and output grid size (nx,ny,nc,nr), the data + ! to be regridded, and the name of the output file. + integer, intent (in) :: nx,ny,nc,nr + integer (kind=1),intent(IN) :: Rin (nx,ny) + real,allocatable, dimension (:,:) :: Rout + CHARACTER(LEN=*) :: path + REAL :: xx, yy + integer :: i,j,ii,jj + allocate (Rout(1:nc,1:nr)) + Rout=0 + xx = size(Rin ,1)/float(size(Rout,1)) + yy = size(Rin ,2)/float(size(Rout,2)) + + print *,'Writing ..:',trim(path) ! this line should really go after the do loop + + do j=1,nr + jj = int((j-1)*yy) + 1 + jj = max(1, min(jj, ny)) + do i=1,nc + ii = int((i-1)*xx) + 1 + ii = max(1, min(ii, nx)) + Rout(i,j) = Rin(ii,jj)*1. + end do + end do + + ! write out the regridded data array + open (10,file=trim(path)//'_30sec.dat',form='unformatted',status='unknown') + do j=1,nr + write (10) Rout(:,j) + end do + close (10,status='keep') + deallocate (Rout) +end subroutine RegridRaster + +! ----------------------------------------------------------------------------------- + +subroutine merge_data(sf, nc, nr, path1, path2, path3, file1, file2, file3) + use netcdf + use ieee_arithmetic ! For ieee_is_nan function + implicit none + + ! Inputs + integer, intent(in) :: nc, nr + real, intent(in) :: sf + character(len=300), intent(in) :: path1, path2, path3 + character(len=*), intent(in) :: file1, file2, file3 + + ! Local variables + integer :: i, j, retval, ncid, varid, lat_len, lon_len + logical :: is_netcdf + real, dimension(13) :: HWSD_CLASS + real, allocatable :: data1(:, :) + real(4), allocatable :: data2(:, :) + character(len=100) :: variable_name + character(len=100) :: base_file2 + character(len=128) :: errmsg + real(4) :: fill_value + integer :: ndims + integer, allocatable :: dimids(:) + character(len=NF90_MAX_NAME) :: dim_name + integer :: dim_len + integer(8) :: count_positive + integer :: iostat + + ! Initialize HWSD_CLASS + data HWSD_CLASS /12., 11., 12., 8., 9., 5., 4., 10., 6., 7., 3., 2., 1./ + + ! Allocate arrays + allocate(data1(nc, nr)) + allocate(data2(nc, nr)) ! Matching dimensions to the global grid + + ! Extract base file name for file2 + call get_base_file_name(trim(file2), base_file2) + print *, "Base file name extracted: ", trim(base_file2)//'END' + + ! Assign variable name based on base file name + if (base_file2 == 'sand_top.nc4' .or. base_file2 == 'sand_sub.nc4') then + variable_name = 'Sand' + else if (base_file2 == 'silt_top.nc4' .or. base_file2 == 'silt_sub.nc4') then + variable_name = 'Silt' + else if (base_file2 == 'clay_top.nc4' .or. base_file2 == 'clay_sub.nc4') then + variable_name = 'Clay' + else if (base_file2 == 'OC_top.nc4' .or. base_file2 == 'OC_sub.nc4' ) then + variable_name = 'SOC' + else if (base_file2 == 'gtext_top.nc4' .or. base_file2 == 'gtext_sub.nc4') then + variable_name = 'Coarse' + else + print *, "Error: Unknown file name for variable assignment:", trim(base_file2) + stop + end if + + print *, "Variable name assigned: <", trim(variable_name), ">" + + ! Read data1 from file1 (binary) + open(unit=10, file=trim(path1)//trim(file1), form="unformatted", & + status="old", action="read") + do j = 1, nr + read(10) data1(:, j) + end do + close(10) + ! Harmonize units for SOC merge: Reynolds file is OM; convert to OC so output is OC everywhere + if (variable_name == 'SOC') then + if (index(trim(file1), '_om_') > 0) then + where (data1 >= 0.0) data1 = data1 / 1.72 + print *, 'INFO: Converted layer-1 OM->OC for file1=', trim(file1) + end if + end if + + ! Debugging: Print sample value from data1 + print *, "Sample data1(1, 1): ", data1(1, 1) + + ! Open NetCDF file + retval = nf90_open(trim(path2)//trim(file2), nf90_nowrite, ncid) + if (retval /= nf90_noerr) then + errmsg = nf90_strerror(retval) + print *, "Error opening NetCDF file: ", trim(errmsg) + stop + end if + print *, "Detected NetCDF format for file2." + + ! Attempt to inquire about the variable using nf90_inq_varid + retval = nf90_inq_varid(ncid, trim(variable_name), varid) + if (retval /= nf90_noerr) then + errmsg = nf90_strerror(retval) + print *, "Error: Variable <", trim(variable_name), "> not found in NetCDF file." + print *, "NetCDF error code: ", retval + print *, "NetCDF error message: ", trim(errmsg) + stop + end if + print *, "Successfully found variable: <", trim(variable_name), ">, varid: ", varid + + ! Inquire about the number of dimensions + retval = nf90_inquire_variable(ncid, varid, ndims=ndims) + if (retval /= nf90_noerr) then + errmsg = nf90_strerror(retval) + print *, "Error in nf90_inquire_variable for varid=", varid + print *, "NetCDF error message: ", trim(errmsg) + stop + end if + + ! Allocate dimids array based on ndims + allocate(dimids(ndims)) + + ! Inquire about dimension IDs + retval = nf90_inquire_variable(ncid, varid, dimids=dimids) + if (retval /= nf90_noerr) then + errmsg = nf90_strerror(retval) + print *, "Error in nf90_inquire_variable for varid=", varid + print *, "NetCDF error message: ", trim(errmsg) + stop + end if + + if (ndims /= 2) then + print *, "Error: Expected variable to have 2 dimensions, but got ", ndims + stop + end if + + ! Get dimensions lengths and names + retval = nf90_inquire_dimension(ncid, dimids(1), name=dim_name, len=dim_len) + if (retval /= nf90_noerr) then + errmsg = nf90_strerror(retval) + print *, "Error in nf90_inquire_dimension for dimid=", dimids(1) + print *, "NetCDF error message: ", trim(errmsg) + stop + end if + print *, "Dimension 1: name=", trim(dim_name), " length=", dim_len + + ! Assign lengths based on dimension names + if (trim(dim_name) == 'lat') then + lat_len = dim_len + else if (trim(dim_name) == 'lon') then + lon_len = dim_len + else + print *, "Error: Unexpected dimension name: ", trim(dim_name) + stop + end if + + retval = nf90_inquire_dimension(ncid, dimids(2), name=dim_name, len=dim_len) + if (retval /= nf90_noerr) then + errmsg = nf90_strerror(retval) + print *, "Error in nf90_inquire_dimension for dimid=", dimids(2) + print *, "NetCDF error message: ", trim(errmsg) + stop + end if + print *, "Dimension 2: name=", trim(dim_name), " length=", dim_len + + ! Assign lengths based on dimension names + if (trim(dim_name) == 'lat') then + lat_len = dim_len + else if (trim(dim_name) == 'lon') then + lon_len = dim_len + else + print *, "Error: Unexpected dimension name: ", trim(dim_name) + stop + end if + + ! Check if dimensions match + if (lat_len /= nr) then + print *, "Error: Latitude dimension mismatch. Expected ", nr, " but got ", lat_len + stop + end if + if (lon_len /= nc) then + print *, "Error: Longitude dimension mismatch. Expected ", nc, " but got ", lon_len + stop + end if + + print *, "Dimension checks passed." + print *, "Attempting to read variable: ", trim(variable_name) + + print *, "Expected dimensions: nc=", nc, " nr=", nr + print *, "NetCDF dimensions: lon_len=", lon_len, " lat_len=", lat_len + + ! Try to get the 'missing_value' attribute + retval = nf90_get_att(ncid, varid, 'missing_value', fill_value) + if (retval /= nf90_noerr) then + ! If 'missing_value' is not found, default to -9999.0 + fill_value = -9999.0 + print *, "No 'missing_value' attribute found; defaulting fill_value to ", fill_value + else + print *, "'missing_value' attribute found; fill_value set to ", fill_value + end if + + ! Read the variable into data2 + retval = nf90_get_var(ncid, varid, data2) + if (retval /= nf90_noerr) then + errmsg = nf90_strerror(retval) + print *, "Error reading variable from NetCDF file: ", trim(variable_name) + print *, "NetCDF error message: ", trim(errmsg) + stop + end if + print *, "Successfully read variable: ", trim(variable_name) + + ! Close the NetCDF file + retval = nf90_close(ncid) + if (retval /= nf90_noerr) then + print *, "Error closing NetCDF file." + stop + end if + + ! Deallocate dimids array + deallocate(dimids) + + ! Debugging: Print a sample value from data2 + print *, "Sample data2(1, 1): ", data2(1, 1) + + ! Replace missing values in data2 with NaN + do j = 1, nr + do i = 1, nc + if (data2(i, j) == fill_value) data2(i, j) = ieee_value(0.0, ieee_quiet_nan) + end do + end do + + ! Merge data + count_positive = 0_8 + + do j = 1, nr + do i = 1, nc + if (.not. ieee_is_nan(data2(i,j))) then + count_positive = count_positive + 1_8 + + if (sf == 2.0) then + data1(i,j) = HWSD_CLASS(int(data2(i,j))) + else + data1(i,j) = data2(i,j) * sf + end if + end if + end do + end do + + print *, "Number of valid data2 values processed: ", count_positive + + + + ! Debugging: Print a sample merged value + print *, "Sample merged data1(1, 1): ", data1(1, 1) + + ! Write the merged data array to file3 + open(unit=10, file=trim(path3)//trim(file3), form="unformatted", & + status="unknown", action="write", iostat=iostat) + if (iostat /= 0) then + print *, "Error opening file for writing: ", trim(path3)//trim(file3) + stop + end if + + do j = 1, nr + write(10, iostat=iostat) data1(:, j) + if (iostat /= 0) then + print *, "Error writing to file at row ", j + stop + end if + end do + close(10) + + print *, "Merged data written to file: ", trim(file3) + +end subroutine merge_data + +subroutine get_base_file_name(file_path, base_name) + implicit none + character(len=*), intent(in) :: file_path + character(len=100), intent(out) :: base_name + integer :: slash_pos + + ! Find the last slash + slash_pos = len_trim(file_path) + do while (slash_pos > 0 .and. file_path(slash_pos:slash_pos) /= '/') + slash_pos = slash_pos - 1 + end do + + ! Extract the base file name + if (slash_pos > 0) then + base_name = file_path(slash_pos+1:) + else + base_name = file_path + end if + base_name = trim(base_name) +end subroutine get_base_file_name + diff --git a/GEOSagcm_GridComp/GEOSphysics_GridComp/GEOSsurface_GridComp/Utils/Raster/preproc/soil/soil_properties/v2/STEP4_process_soildata.f90 b/GEOSagcm_GridComp/GEOSphysics_GridComp/GEOSsurface_GridComp/Utils/Raster/preproc/soil/soil_properties/v2/STEP4_process_soildata.f90 new file mode 100755 index 000000000..3f519cc8f --- /dev/null +++ b/GEOSagcm_GridComp/GEOSphysics_GridComp/GEOSsurface_GridComp/Utils/Raster/preproc/soil/soil_properties/v2/STEP4_process_soildata.f90 @@ -0,0 +1,996 @@ +PROGRAM STEP4_process_soildata + +! Legacy starting code was written by Sarith Mahanama since then code evolved with updated data sets, file formats and bug fixes. + +! Differs from process_soildata.f90 in several ways. Some include: +! - Uses different version of data HWSDv1.21 instead of HWSD +! - Does not read in pro_ files (porosity?!) +! - Populates with missing data in the main part of the code rather than in subroutine +! - Current workflow uses merge_min_statsgo/merge_carbon_statsgo/merge_min/merge_carbon. +! - has a number of conditions when merging and/or regriding. + +!================================================= +! Build / Run notes (Discover / SLES15) +! +! This program uses NetCDF-Fortran (`use netcdf`). On Discover, avoid nf-config from +! GEOSpyD (can point to a netcdf.mod built by a different compiler). Use ESMA Baselibs +! NetCDF built with Intel Fortran instead. +! +! Compile: +! BASE=/discover/swdev/gmao_SIteam/Baselibs/ESMA-Baselibs-7.24.0/x86_64-pc-linux-gnu/ifort_2021.6.0-intelmpi_2021.13.0-SLES15/Linux +! +! mpiifort -O2 -g STEP4_process_soildata.f90 -o STEP4_process_soildata.x \ +! -I$BASE/include/netcdf \ +! -L$BASE/lib \ +! -lnetcdff -lnetcdf \ +! -lhdf5_hl -lhdf5 -lz -lsz -lbz2 \ +! -lmfhdf -ldf -ljpeg \ +! -lcurl -lssl -lcrypto \ +! -lbrotlidec -lbrotlicommon -lzstd \ +! -lxml2 -ltirpc \ +! -ldl -lm + +! one line to use: +!mpiifort -O2 -g STEP4_process_soildata.f90 -o STEP4_process_soildata.x -I$BASE/include/netcdf -L$BASE/lib -lnetcdff -lnetcdf -lhdf5_hl -lhdf5 -lz -lsz -lbz2 -lmfhdf -ldf -ljpeg -lcurl -lssl -lcrypto -lbrotlidec -lbrotlicommon -lzstd -lxml2 -ltirpc -ldl -lm + +! +! Run: +! export LD_LIBRARY_PATH=$BASE/lib:$LD_LIBRARY_PATH +! ./process_soildata.x +! +! Inputs: +! - STEP3 merged binaries: v2/MERGED-DATA/*_{top,sub}_30sec*.dat +! - STATSGO2: .../STATSGO2/statsgo_* +! - AFSIS: .../Africa-AFSIS/afsis_* +! +! Outputs: +! - v2/HWSDv2-NGDC-STATSGO/ (+ data_sources.msk) +! - v2/HWSDv2-NGDC-STATSGO-AFSIS/ (+ data_sources.msk) +!================================================= + +implicit none +INTEGER, PARAMETER :: nc_ngdc = 4320, nr_ngdc = 2160,nc=43200,nr=21600 +INTEGER (kind=1), allocatable, dimension (:,:) :: gtext,clay,silt,sand,om,poros +INTEGER , allocatable, dimension (:,:) :: src_mask +INTEGER :: i,j,k,ii,c1,c2,irrecs +LOGICAL, parameter :: regrid_ngdc = .false. +CHARACTER*300 :: path +CHARACTER*300 :: path1,path2,path3 +logical, parameter :: ver_merge = .true. +integer :: layer_id ! (1=>HWSD; 2=> STATSGO) + +! This NGDC part wont be used below, so the flag is set to .false. +! Reading and Regridding NGDC Reynolds data +if(regrid_ngdc) then + path='/discover/nobackup/smahanam/NGDC-Reynolds/' + allocate (gtext (1:nc_ngdc,1:nr_ngdc)) + allocate (clay (1:nc_ngdc,1:nr_ngdc)) + allocate (silt (1:nc_ngdc,1:nr_ngdc)) + allocate (sand (1:nc_ngdc,1:nr_ngdc)) + allocate (om (1:nc_ngdc,1:nr_ngdc)) + allocate (poros (1:nc_ngdc,1:nr_ngdc)) + + ! Top Layer + gtext =0 + clay =0 + silt =0 + sand =0 + om =0 + poros =0 + irrecs = nint (nc_ngdc / 4.0) + open (unit=10, file=trim(path)//'dtex_tp1.img',form='unformatted', status='old',& + access='direct',recl=1,convert = 'little_endian',action='read') + open (unit=11, file=trim(path)//'clay_tp1.img',form='unformatted', status='old',& + access='direct',recl=1,convert = 'little_endian',action='read') + open (unit=12, file=trim(path)//'silt_tp1.img',form='unformatted', status='old',& + access='direct',recl=1,convert = 'little_endian',action='read') + open (unit=13, file=trim(path)//'sand_tp1.img',form='unformatted', status='old',& + access='direct',recl=1,convert = 'little_endian',action='read') + open (unit=14, file=trim(path)//'om_top1.img' ,form='unformatted', status='old',& + access='direct',recl=1,convert = 'little_endian',action='read') +! open (unit=15, file=trim(path)//'por_top1.img',form='unformatted', status='old',& +! access='direct',recl=1,convert = 'little_endian',action='read') + + k =0 + do j=nr_ngdc,1,-1 + do i=1,irrecs + k=k+1 + c1 = (4*i)-3 + c2 = (4*i) + read (10, rec=k) (gtext(ii,j), ii=c1,c2) + read (11, rec=k) (clay (ii,j), ii=c1,c2) + read (12, rec=k) (silt (ii,j), ii=c1,c2) + read (13, rec=k) (sand (ii,j), ii=c1,c2) + read (14, rec=k) (om (ii,j), ii=c1,c2) +! read (15, rec=k) (poros(ii,j), ii=c1,c2) +! print *, gtext(ii,j),sand (ii,j),clay (ii,j) + do ii = c1,c2 + if((gtext(ii,j) ==0).or.((sand (ii,j) + clay (ii,j)) == 0.)) then + gtext(ii,j) = -99 + clay (ii,j) = -99 + silt (ii,j) = -99 + sand (ii,j) = -99 + om (ii,j) = -99 +! pause + endif + end do + end do + end do + + close (10,status='keep') + close (11,status='keep') + close (12,status='keep') + close (13,status='keep') + close (14,status='keep') + close (15,status='keep') + + call RegridRaster(nc_ngdc,nr_ngdc,nc,nr,gtext,trim(path)//'gtext_top') + call RegridRaster(nc_ngdc,nr_ngdc,nc,nr,clay ,trim(path)//'clay_top' ) + call RegridRaster(nc_ngdc,nr_ngdc,nc,nr,silt ,trim(path)//'silt_top' ) + call RegridRaster(nc_ngdc,nr_ngdc,nc,nr,sand ,trim(path)//'sand_top' ) + call RegridRaster(nc_ngdc,nr_ngdc,nc,nr,om ,trim(path)//'om_top' ) +! call RegridRaster(nc_ngdc,nr_ngdc,nc,nr,poros,trim(path)//'poros_top') + +! Sub layer + gtext =0 + clay =0 + silt =0 + sand =0 + om =0 + poros =0 + irrecs = nint (nc_ngdc / 4.0) + open (unit=10, file=trim(path)//'dtex_sb1.img',form='unformatted', status='old',& + access='direct',recl=1,convert = 'little_endian',action='read') + open (unit=11, file=trim(path)//'clay_sb1.img',form='unformatted', status='old',& + access='direct',recl=1,convert = 'little_endian',action='read') + open (unit=12, file=trim(path)//'silt_sb1.img',form='unformatted', status='old',& + access='direct',recl=1,convert = 'little_endian',action='read') + open (unit=13, file=trim(path)//'sand_sb1.img',form='unformatted', status='old',& + access='direct',recl=1,convert = 'little_endian',action='read') + open (unit=14, file=trim(path)//'om_sub1.img' ,form='unformatted', status='old',& + access='direct',recl=1,convert = 'little_endian',action='read') +! open (unit=15, file=trim(path)//'por_sub1.img',form='unformatted', status='old',& +! access='direct',recl=1,convert = 'little_endian',action='read') + + k =0 + do j=nr_ngdc,1,-1 + do i=1,irrecs + k=k+1 + c1 = (4*i)-3 + c2 = (4*i) + read (10, rec=k) (gtext(ii,j), ii=c1,c2) + read (11, rec=k) (clay (ii,j), ii=c1,c2) + read (12, rec=k) (silt (ii,j), ii=c1,c2) + read (13, rec=k) (sand (ii,j), ii=c1,c2) + read (14, rec=k) (om (ii,j), ii=c1,c2) +! read (15, rec=k)(poros(ii,j), ii=c1,c2) + do ii = c1,c2 + if((gtext(ii,j) ==0).or.((sand (ii,j) + clay (ii,j)) == 0.)) then + gtext(ii,j) = -99 + clay (ii,j) = -99 + silt (ii,j) = -99 + sand (ii,j) = -99 + om (ii,j) = -99 + endif + end do + end do + end do + + close (10,status='keep') + close (11,status='keep') + close (12,status='keep') + close (13,status='keep') + close (14,status='keep') + close (15,status='keep') + + call RegridRaster(nc_ngdc,nr_ngdc,nc,nr,gtext,trim(path)//'gtext_sub') + call RegridRaster(nc_ngdc,nr_ngdc,nc,nr,clay ,trim(path)//'clay_sub' ) + call RegridRaster(nc_ngdc,nr_ngdc,nc,nr,silt ,trim(path)//'silt_sub' ) + call RegridRaster(nc_ngdc,nr_ngdc,nc,nr,sand ,trim(path)//'sand_sub' ) + call RegridRaster(nc_ngdc,nr_ngdc,nc,nr,om ,trim(path)//'om_sub' ) +! call RegridRaster(nc_ngdc,nr_ngdc,nc,nr,poros,trim(path)//'poros_sub') + +endif + +! +! <<<<< OVERLAYING BEGINS >>>>> +! >>>>> WE DON'T USE NGDC ANY MORE <<<<< +! STEP1: Overlay HWSD on NGDC +! LAYER1: NGDC Reynolds Data +! LAYER2: HWSDv2 +! +! STEP 2: Overlay Statsgo on HWSDv2 +! LAYER1: HWSDv2 +! LAYER2: STATSGO2 + +allocate (src_mask(1:nc,1:nr)) +src_mask = 0 + +! NGDC+HWSD_v2 data (have to be created first; and they are) +! combined HWSDv2 and reynolds +path1 ='/discover/nobackup/projects/gmao/bcs_shared/preprocessing_bcs_inputs/land/soil/soil_properties/v2/MERGED-DATA/' +! STATSGO file +path2 ='/discover/nobackup/projects/gmao/bcs_shared/preprocessing_bcs_inputs/land/soil/soil_properties/v2/STATSGO2/statsgo_' +! Output file +path3 ='/discover/nobackup/projects/gmao/bcs_shared/preprocessing_bcs_inputs/land/soil/soil_properties/v2/HWSDv2-NGDC-STATSGO/' + +print *,'Step 1: Overlaying STATSGO on HWSDv1.21 while vertical merging ' +print *,'===============================================================' +print *,' ' + +call merge_min_statsgo (.true., ver_merge,1, nc,nr,src_mask, & + path1,path2,path3) + +print *,maxval(src_mask),minval(src_mask) + +call merge_carbon_statsgo(.true., ver_merge,1, nc,nr,src_mask, & + path1,path2,path3) +print *,maxval(src_mask),minval(src_mask) + + open (10,file=trim(path3)//'data_sources.msk',form='unformatted', & + status='unknown',action='write') + do j = 1,nr + write (10) (src_mask (i,j),i =1,nc) + end do + + close (10, status ='keep') + +! Data generated in the step above +path1 ='/discover/nobackup/projects/gmao/bcs_shared/preprocessing_bcs_inputs/land/soil/soil_properties/v2/HWSDv2-NGDC-STATSGO/' + +! AFSIS data +path2 ='/discover/nobackup/projects/gmao/bcs_shared/preprocessing_bcs_inputs/land/soil/soil_properties/v2/Africa-AFSIS/afsis_' + +! output file +path3 ='/discover/nobackup/projects/gmao/bcs_shared/preprocessing_bcs_inputs/land/soil/soil_properties/v2/HWSDv2-NGDC-STATSGO-AFSIS/' + +print *,'Step 2: Overlaying AFSIS on HWSDv1.21-STATSGO while vertical merging ' +print *,'=====================================================================' +print *,' ' + + +call merge_min (.false., ver_merge, 2, nc,nr,src_mask, & + path1,path2,path3) + +print *,maxval(src_mask),minval(src_mask) + +call merge_carbon(.false., ver_merge, 2, nc,nr,src_mask, & + path1,path2,path3) +print *,maxval(src_mask),minval(src_mask) + + open (10,file=trim(path3)//'data_sources.msk',form='unformatted', & + status='unknown',action='write') + do j = 1,nr + write (10) (src_mask (i,j),i =1,nc) + end do + + close (10, status ='keep') + +END PROGRAM STEP4_process_soildata +! +! ----------------------------------------------------------------------------------- +! +subroutine RegridRaster(nx,ny,nc,nr,Rin,path) + integer, intent (in) :: nx,ny,nc,nr + integer (kind=1),intent(IN) :: Rin (nx,ny) + real,allocatable, dimension (:,:) :: Rout + CHARACTER(LEN=*) :: path + REAL :: xx, yy + integer :: i,j,ii,jj + allocate (Rout(1:nc,1:nr)) + Rout=0 + xx = size(Rin ,1)/float(size(Rout,1)) + yy = size(Rin ,2)/float(size(Rout,2)) + + print *,'Writing ..:',trim(path) + + do j=1,size(Rout,2) + jj = (j-1)*yy + 1 + do i=1,size(Rout,1) + ii = (i-1)*xx + 1 + Rout(i,j) = Rin(ii,jj) + if (Rin(ii,jj) == -99) Rout(i,j) = -9999. + end do + end do + + open (10,file=trim(path)//'_30sec.dat',form='unformatted',status='unknown') + do j=1,nr + write (10) Rout(:,j) + end do + close (10,status='keep') + deallocate (Rout) +end subroutine RegridRaster + +! ----------------------------------------------------------------------------------- +! to accomodate WHSD-v2 input format (in .nc) a separate [copy] of merging subroutines +! is created and use in Step-1, although the mathodology of merging is same in both +! _statsgo and _afsis +subroutine merge_min_statsgo (self_fill,ver_merge,layer_id,nc,nr,src_mask, & + path1,path2,path3) + + use netcdf + use, intrinsic :: ieee_arithmetic ! Import IEEE module for NaN detection + + implicit none + integer, intent(in) :: nc,nr,layer_id + logical, intent (in) :: self_fill,ver_merge + character(len=300),intent (in) :: path1,path2,path3 + character(len=100) :: file1,file2,file3,file4 + character(len=100) :: file1_bin,file2_bin,file3_bin,file4_bin + character(len=100) :: file5,file6,file7,file8 + integer, intent (inout), dimension (nc,nr) :: src_mask + + integer :: i,j,lp1 + + real, allocatable, dimension (:) :: data1, data2, data3, data4 + real, allocatable, dimension (:) :: data5, data6, data7, data8 + real :: sum_top, sum_sub + + integer :: retval,ncid1,ncid2,ncid3,ncid4,varid1,varid2,varid3,varid4 + real(4) :: fill_value1,fill_value2,fill_value3,fill_value4 + + allocate (data1(1:nc)) + allocate (data2(1:nc)) + allocate (data3(1:nc)) + allocate (data4(1:nc)) + allocate (data5(1:nc)) + allocate (data6(1:nc)) + allocate (data7(1:nc)) + allocate (data8(1:nc)) + + file1 = 'clay_top_30sec.dat' + file2 = 'sand_top_30sec.dat' + file3 = 'clay_sub_30sec.dat' + file4 = 'sand_sub_30sec.dat' + + ! note: for HWDS_v2-NGDC files, we are reading binary format + ! These files need to be generated first! + file1_bin = 'clay_top_30sec_testBiljana.dat' + file2_bin = 'sand_top_30sec_testBiljana.dat' + file3_bin = 'clay_sub_30sec_testBiljana.dat' + file4_bin = 'sand_sub_30sec_testBiljana.dat' + + print *,'file1 :',trim(path1)//trim(file1) + print *,'file2 :',trim(path2)//trim(file1) + print *,'file3 :',trim(path3)//trim(file1) + print *,'file1 :',trim(path1)//trim(file1_bin) + print *,'file2 :',trim(path2)//trim(file1_bin) + print *,'file3 :',trim(path3)//trim(file1_bin) + + lp1 = layer_id + 1 + + if(self_fill) print *,'Vertical self-filling of first layer' + if(ver_merge) print *,'Vertical MERGING layers' + + ! These are HWSDv2-NGDC data + open (10,file=trim(path1)//trim(file1_bin),form='unformatted', & + status='old',action='read') + open (11,file=trim(path1)//trim(file2_bin),form='unformatted', & + status='old',action='read') + open (12,file=trim(path1)//trim(file3_bin),form='unformatted', & + status='old',action='read') + open (13,file=trim(path1)//trim(file4_bin),form='unformatted', & + status='old',action='read') + + ! These are STATSFO data + open (20,file=trim(path2)//trim(file1),form='unformatted', & + status='old',action='read') + open (21,file=trim(path2)//trim(file2),form='unformatted', & + status='old',action='read') + open (22,file=trim(path2)//trim(file3),form='unformatted', & + status='old',action='read') + open (23,file=trim(path2)//trim(file4),form='unformatted', & + status='old',action='read') + + open (30,file=trim(path3)//trim(file1),form='unformatted', & + status='unknown',action='write') + open (31,file=trim(path3)//trim(file2),form='unformatted', & + status='unknown',action='write') + open (32,file=trim(path3)//trim(file3),form='unformatted', & + status='unknown',action='write') + open (33,file=trim(path3)//trim(file4),form='unformatted', & + status='unknown',action='write') + + + do j=1,nr + + ! These are HWSD-v2-NGDC data + read (10) (data1 (i),i=1,nc) + read (11) (data2 (i),i=1,nc) + read (12) (data3 (i),i=1,nc) + read (13) (data4 (i),i=1,nc) + + ! Replace missing values (if any) with -9999.0 + do i = 1, nc + if (ieee_is_nan(data1(i))) data1(i) = -9999.0 + if (ieee_is_nan(data2(i))) data2(i) = -9999.0 + if (ieee_is_nan(data3(i))) data3(i) = -9999.0 + if (ieee_is_nan(data4(i))) data4(i) = -9999.0 + end do + + ! These are STATSFO data + read (20) (data5 (i),i=1,nc) + read (21) (data6 (i),i=1,nc) + read (22) (data7 (i),i=1,nc) + read (23) (data8 (i),i=1,nc) + + do i=1,nc + + sum_top = data1(i) + data2(i) + sum_sub = data3(i) + data4(i) + + if(layer_id == 1) then + if(sum_top >= 0.) src_mask(i,j) = 1 + if(sum_sub >= 0.) src_mask(i,j) = src_mask(i,j) + 10 + endif + + if(ver_merge) then + if(self_fill) then + if((sum_top >=0.).and.(sum_sub < 0.)) then + data3 (i) = data1 (i) + data4 (i) = data2 (i) + src_mask(i,j) = src_mask(i,j) + 80 + endif + + if((sum_top < 0.).and.(sum_sub >= 0.)) then + data1 (i) = data3 (i) + data2 (i) = data4 (i) + src_mask(i,j) = src_mask(i,j) + 8 + endif + endif + endif + + sum_top = data5(i) + data6(i) + sum_sub = data7(i) + data8(i) + + if((sum_top >=0.).and.(sum_sub >=0.)) then !SM: remember to switch + data1 (i) = data5 (i) + data2 (i) = data6 (i) + data3 (i) = data7 (i) + data4 (i) = data8 (i) + src_mask(i,j) = 100*(src_mask(i,j)/100) + 10*lp1 + lp1 + endif + + if(ver_merge) then + if((sum_top >=0.).and.(sum_sub < 0.)) then + data1 (i) = data5 (i) + data2 (i) = data6 (i) + data3 (i) = data5 (i) + data4 (i) = data6 (i) + src_mask(i,j) = 100*(src_mask(i,j)/100) + 80 + lp1 + endif + + if((sum_top < 0.).and.(sum_sub >= 0.)) then + data1 (i) = data7 (i) + data2 (i) = data8 (i) + data3 (i) = data7 (i) + data4 (i) = data8 (i) + src_mask(i,j) = 100*(src_mask(i,j)/100) + 10*lp1 + 8 + endif + endif +! if((data1(i) >= 0.).and.(src_mask(i,j) ==0.)) & +! print *,i,j,data1(i),data2(i),data3(i),data4(i),src_mask(i,j) +! if((data1(i) < 0.).and.(src_mask(i,j) > 0.)) then +! print *,i,j,data1(i),data2(i),data3(i),data4(i),src_mask(i,j), & +! data5(i),data6(i),data7(i),data8(i) +! pause +! endif + end do + + write (30) (data1 (i),i=1,nc) + write (31) (data2 (i),i=1,nc) + write (32) (data3 (i),i=1,nc) + write (33) (data4 (i),i=1,nc) + end do + + deallocate (data1, data2, data3, data4) + deallocate (data5, data6, data7, data8) + + ! These are HWSD-v2-NGDC files + close (10,status='keep') + close (11,status='keep') + close (12,status='keep') + close (13,status='keep') + + ! These are STATSFO files + close (20,status='keep') + close (21,status='keep') + close (22,status='keep') + close (23,status='keep') + + ! These are output WHSD-v2-STATSFO files + close (30,status='keep') + close (31,status='keep') + close (32,status='keep') + close (33,status='keep') + +end subroutine merge_min_statsgo + +! ----------------------------------------------------------------------------------- + +subroutine merge_carbon_statsgo (self_fill,ver_merge, layer_id,nc,nr,src_mask, & + path1,path2,path3) + + use netcdf + use ieee_arithmetic ! For ieee_is_nan function + implicit none + integer, intent(in) :: nc,nr + logical, intent(in) :: self_fill, ver_merge + integer, intent(in) :: layer_id + real :: SUB2TOP + character(len=300),intent (in) :: path1,path2,path3 + character(len=100) :: file1,file2,file1_bin,file2_bin + + integer :: i,j, lp1,min_dig + real, allocatable, dimension (:) :: data1, data2, data3, data4 + integer, intent (inout), dimension (nc,nr) :: src_mask + real :: sum_top, sum_sub + + !character(len=100) :: variable_name, file1_whds, file2_whds + integer :: retval,ncid1,ncid2,varid1,varid2 + real(4) :: fill_value1,fill_value2 + + allocate (data1(1:nc)) + allocate (data2(1:nc)) + allocate (data3(1:nc)) + allocate (data4(1:nc)) + + file1 = 'oc_top_30sec.dat' + file2 = 'oc_sub_30sec.dat' + + file1_bin = 'oc_top_30sec_testBiljana.dat' + file2_bin = 'oc_sub_30sec_testBiljana.dat' + + print *,'file1 :',trim(path1)//trim(file1) + print *,'file2 :',trim(path2)//trim(file1) + print *,'file2 :',trim(path3)//trim(file1) + + if(self_fill) print *,'Vertical self-filling of first layer' + + ! These are HWSD-v2-NGDC data + open (10,file=trim(path1)//trim(file1_bin),form='unformatted', & + status='old',action='read') + open (11,file=trim(path1)//trim(file2_bin),form='unformatted', & + status='old',action='read') + + ! These are STATSFO data + open (20,file=trim(path2)//trim(file1),form='unformatted', & + status='old',action='read') + open (21,file=trim(path2)//trim(file2),form='unformatted', & + status='old',action='read') + + open (30,file=trim(path3)//trim(file1),form='unformatted', & + status='unknown',action='write') + open (31,file=trim(path3)//trim(file2),form='unformatted', & + status='unknown',action='write') + + lp1 = layer_id + 1 + + do j=1,nr + + ! These are HWSD-v2 data + read (10) (data1 (i),i=1,nc) + read (11) (data2 (i),i=1,nc) + + ! If needed, convert om into oc (apply factor to match units): [om] = 1.72 * [oc] => [oc] = [om]/1.72 + !data1=data1/1.72 + !data2=data2/1.72 + + ! Replace missing values (if any) with -9999.0 + do i = 1, nc + if (ieee_is_nan(data1(i))) data1(i) = -9999.0 + if (ieee_is_nan(data2(i))) data2(i) = -9999.0 + end do + + ! These are STATSFO data + read (20) (data3 (i),i=1,nc) + read (21) (data4 (i),i=1,nc) + + do i=1,nc + + if (layer_id == 1) then + if(data1 (i) >=0.) data1(i) = data1(i) + if(data2 (i) >=0.) data2(i) = data2(i) + endif + + sum_top = data1(i) + sum_sub = data2(i) + SUB2TOP = 1. + + if(layer_id == 1) then + if(sum_top >= 0.) src_mask(i,j) = src_mask (i,j) + 100 + if(sum_sub >= 0.) src_mask(i,j) = src_mask (i,j) + 1000 + endif + + if(ver_merge) then + if(self_fill) then + if((sum_top >=0.).and.(sum_sub < 0.)) then + if(data1(i) < 15./1.72) SUB2TOP = 0.3 + data2 (i) = data1 (i) * SUB2TOP + src_mask(i,j) = src_mask (i,j) + 8000 + endif + + if((sum_top < 0.).and.(sum_sub >= 0.)) then + if(data2(i) < 15./1.72) SUB2TOP = 0.3 + data1 (i) = data2 (i) / SUB2TOP + src_mask(i,j) = src_mask (i,j) + 800 + endif + endif + endif + + sum_top = data3(i) + sum_sub = data4(i) + SUB2TOP = 1. + min_dig = src_mask (i,j) - 100*(src_mask (i,j)/100) + + if((sum_top >=0.).and.(sum_sub >=0.)) then !SM: switch to and + data1 (i) = data3 (i) + data2 (i) = data4 (i) + src_mask(i,j) = 1000*lp1 + 100*lp1 + min_dig + if(src_mask(i,j) == 3344) print *,i,j,sum_top,sum_sub,min_dig,lp1,layer_id + endif + + if(ver_merge) then + if((sum_top >=0.).and.(sum_sub < 0.)) then + if(data3(i) < 15./1.72) SUB2TOP = 0.3 + data1 (i) = data3 (i) + data2 (i) = data1 (i) * SUB2TOP + src_mask(i,j) = 8000 + 100*lp1 + min_dig + endif + + if((sum_top < 0.).and.(sum_sub >= 0.)) then + if(data4(i) < 15./1.72) SUB2TOP = 0.3 + data2 (i) = data4 (i) + data1 (i) = data2 (i) / SUB2TOP + src_mask(i,j) = 1000*lp1 + 800 + min_dig + endif + endif + if(src_mask(i,j) == 3344) print *,i,j,sum_top,sum_sub,min_dig,lp1,layer_id + end do + + write (30) (data1 (i),i=1,nc) + write (31) (data2 (i),i=1,nc) + + end do + + deallocate (data1,data2,data3,data4) + + ! These are WHSD-v2 files + close (10,status='keep') + close (11,status='keep') + + ! These are STATSFO files + close (20,status='keep') + close (21,status='keep') + + ! These are WHSD-v2-STATSFO files + close (30,status='keep') + close (31,status='keep') + +end subroutine merge_carbon_statsgo + +! ----------------------------------------------------------------------------------- + +subroutine merge_min (self_fill,ver_merge,layer_id,nc,nr,src_mask, & + path1,path2,path3) + implicit none + integer, intent(in) :: nc,nr,layer_id + logical, intent (in) :: self_fill,ver_merge + character(len=300),intent (in) :: path1,path2,path3 + character(len=100) :: file1,file2,file3,file4 + character(len=100) :: file5,file6,file7,file8 + integer, intent (inout), dimension (nc,nr) :: src_mask + + integer :: i,j,lp1 + + real, allocatable, dimension (:) :: data1, data2, data3, data4 + real, allocatable, dimension (:) :: data5, data6, data7, data8 + real :: sum_top, sum_sub + + allocate (data1(1:nc)) + allocate (data2(1:nc)) + allocate (data3(1:nc)) + allocate (data4(1:nc)) + allocate (data5(1:nc)) + allocate (data6(1:nc)) + allocate (data7(1:nc)) + allocate (data8(1:nc)) + + file1 = 'clay_top_30sec.dat' + file2 = 'sand_top_30sec.dat' + file3 = 'clay_sub_30sec.dat' + file4 = 'sand_sub_30sec.dat' + + print *,'file1 :',trim(path1)//trim(file1) + print *,'file2 :',trim(path2)//trim(file1) + print *,'file3 :',trim(path3)//trim(file1) + + lp1 = layer_id + 1 + + if(self_fill) print *,'Vertical self-filling of first layer' + if(ver_merge) print *,'Vertical MERGING layers' + open (10,file=trim(path1)//trim(file1),form='unformatted', & + status='old',action='read') + open (11,file=trim(path1)//trim(file2),form='unformatted', & + status='old',action='read') + open (12,file=trim(path1)//trim(file3),form='unformatted', & + status='old',action='read') + open (13,file=trim(path1)//trim(file4),form='unformatted', & + status='old',action='read') + + open (20,file=trim(path2)//trim(file1),form='unformatted', & + status='old',action='read') + open (21,file=trim(path2)//trim(file2),form='unformatted', & + status='old',action='read') + open (22,file=trim(path2)//trim(file3),form='unformatted', & + status='old',action='read') + open (23,file=trim(path2)//trim(file4),form='unformatted', & + status='old',action='read') + + open (30,file=trim(path3)//trim(file1),form='unformatted', & + status='unknown',action='write') + open (31,file=trim(path3)//trim(file2),form='unformatted', & + status='unknown',action='write') + open (32,file=trim(path3)//trim(file3),form='unformatted', & + status='unknown',action='write') + open (33,file=trim(path3)//trim(file4),form='unformatted', & + status='unknown',action='write') + + + do j=1,nr + + read (10) (data1 (i),i=1,nc) + read (11) (data2 (i),i=1,nc) + read (12) (data3 (i),i=1,nc) + read (13) (data4 (i),i=1,nc) + + read (20) (data5 (i),i=1,nc) + read (21) (data6 (i),i=1,nc) + read (22) (data7 (i),i=1,nc) + read (23) (data8 (i),i=1,nc) + + do i=1,nc + + sum_top = data1(i) + data2(i) + sum_sub = data3(i) + data4(i) + + if(layer_id == 1) then + if(sum_top >= 0.) src_mask(i,j) = 1 + if(sum_sub >= 0.) src_mask(i,j) = src_mask(i,j) + 10 + endif + + if(ver_merge) then + if(self_fill) then + if((sum_top >=0.).and.(sum_sub < 0.)) then + data3 (i) = data1 (i) + data4 (i) = data2 (i) + src_mask(i,j) = src_mask(i,j) + 80 + endif + + if((sum_top < 0.).and.(sum_sub >= 0.)) then + data1 (i) = data3 (i) + data2 (i) = data4 (i) + src_mask(i,j) = src_mask(i,j) + 8 + endif + endif + endif + + sum_top = data5(i) + data6(i) + sum_sub = data7(i) + data8(i) + + if((sum_top >=0.).and.(sum_sub >=0.)) then !SM: remember to switch + data1 (i) = data5 (i) + data2 (i) = data6 (i) + data3 (i) = data7 (i) + data4 (i) = data8 (i) + src_mask(i,j) = 100*(src_mask(i,j)/100) + 10*lp1 + lp1 + endif + + if(ver_merge) then + if((sum_top >=0.).and.(sum_sub < 0.)) then + data1 (i) = data5 (i) + data2 (i) = data6 (i) + data3 (i) = data5 (i) + data4 (i) = data6 (i) + src_mask(i,j) = 100*(src_mask(i,j)/100) + 80 + lp1 + endif + + if((sum_top < 0.).and.(sum_sub >= 0.)) then + data1 (i) = data7 (i) + data2 (i) = data8 (i) + data3 (i) = data7 (i) + data4 (i) = data8 (i) + src_mask(i,j) = 100*(src_mask(i,j)/100) + 10*lp1 + 8 + endif + endif +! if((data1(i) >= 0.).and.(src_mask(i,j) ==0.)) & +! print *,i,j,data1(i),data2(i),data3(i),data4(i),src_mask(i,j) +! if((data1(i) < 0.).and.(src_mask(i,j) > 0.)) then +! print *,i,j,data1(i),data2(i),data3(i),data4(i),src_mask(i,j), & +! data5(i),data6(i),data7(i),data8(i) +! pause +! endif + end do + + write (30) (data1 (i),i=1,nc) + write (31) (data2 (i),i=1,nc) + write (32) (data3 (i),i=1,nc) + write (33) (data4 (i),i=1,nc) + end do + + deallocate (data1, data2, data3, data4) + deallocate (data5, data6, data7, data8) + + close (10,status='keep') + close (11,status='keep') + close (12,status='keep') + close (13,status='keep') + + close (20,status='keep') + close (21,status='keep') + close (22,status='keep') + close (23,status='keep') + + close (30,status='keep') + close (31,status='keep') + close (32,status='keep') + close (33,status='keep') + +end subroutine merge_min + +! ----------------------------------------------------------------------------------- + +subroutine merge_carbon (self_fill,ver_merge, layer_id,nc,nr,src_mask, & + path1,path2,path3) + implicit none + integer, intent(in) :: nc,nr + logical, intent(in) :: self_fill, ver_merge + integer, intent(in) :: layer_id + real :: SUB2TOP + character(len=300),intent (in) :: path1,path2,path3 + character(len=100) :: file1,file2,file3,file4 + + integer :: i,j, lp1,min_dig + real, allocatable, dimension (:) :: data1, data2, data3, data4 + integer, intent (inout), dimension (nc,nr) :: src_mask + real :: sum_top, sum_sub + + allocate (data1(1:nc)) + allocate (data2(1:nc)) + allocate (data3(1:nc)) + allocate (data4(1:nc)) + + file1 = 'oc_top_30sec.dat' + file2 = 'oc_sub_30sec.dat' + + print *,'file1 :',trim(path1)//trim(file1) + print *,'file2 :',trim(path2)//trim(file1) + print *,'file3 :',trim(path3)//trim(file1) + + if(self_fill) print *,'Vertical self-filling of first layer' + + open (10,file=trim(path1)//trim(file1),form='unformatted', & + status='old',action='read') + open (11,file=trim(path1)//trim(file2),form='unformatted', & + status='old',action='read') + + open (20,file=trim(path2)//trim(file1),form='unformatted', & + status='old',action='read') + open (21,file=trim(path2)//trim(file2),form='unformatted', & + status='old',action='read') + + open (30,file=trim(path3)//trim(file1),form='unformatted', & + status='unknown',action='write') + open (31,file=trim(path3)//trim(file2),form='unformatted', & + status='unknown',action='write') + + lp1 = layer_id + 1 + + do j=1,nr + + read (10) (data1 (i),i=1,nc) + read (11) (data2 (i),i=1,nc) + + read (20) (data3 (i),i=1,nc) + read (21) (data4 (i),i=1,nc) + + do i=1,nc + + if (layer_id == 1) then + if(data1 (i) >=0.) data1(i) = data1(i) + if(data2 (i) >=0.) data2(i) = data2(i) + endif + + sum_top = data1(i) + sum_sub = data2(i) + SUB2TOP = 1. + + if(layer_id == 1) then + if(sum_top >= 0.) src_mask(i,j) = src_mask (i,j) + 100 + if(sum_sub >= 0.) src_mask(i,j) = src_mask (i,j) + 1000 + endif + + if(ver_merge) then + if(self_fill) then + if((sum_top >=0.).and.(sum_sub < 0.)) then + if(data1(i) < 15./1.72) SUB2TOP = 0.3 + data2 (i) = data1 (i) * SUB2TOP + src_mask(i,j) = src_mask (i,j) + 8000 + endif + + if((sum_top < 0.).and.(sum_sub >= 0.)) then + if(data2(i) < 15./1.72) SUB2TOP = 0.3 + data1 (i) = data2 (i) / SUB2TOP + src_mask(i,j) = src_mask (i,j) + 800 + endif + endif + endif + + sum_top = data3(i) + sum_sub = data4(i) + SUB2TOP = 1. + min_dig = src_mask (i,j) - 100*(src_mask (i,j)/100) + + if((sum_top >=0.).and.(sum_sub >=0.)) then !SM: switch to and + data1 (i) = data3 (i) + data2 (i) = data4 (i) + src_mask(i,j) = 1000*lp1 + 100*lp1 + min_dig + if(src_mask(i,j) == 3344) print *,i,j,sum_top,sum_sub,min_dig,lp1,layer_id + endif + + if(ver_merge) then + if((sum_top >=0.).and.(sum_sub < 0.)) then + if(data3(i) < 15./1.72) SUB2TOP = 0.3 + data1 (i) = data3 (i) + data2 (i) = data1 (i) * SUB2TOP + src_mask(i,j) = 8000 + 100*lp1 + min_dig + endif + + if((sum_top < 0.).and.(sum_sub >= 0.)) then + if(data4(i) < 15./1.72) SUB2TOP = 0.3 + data2 (i) = data4 (i) + data1 (i) = data2 (i) / SUB2TOP + src_mask(i,j) = 1000*lp1 + 800 + min_dig + endif + endif + if(src_mask(i,j) == 3344) print *,i,j,sum_top,sum_sub,min_dig,lp1,layer_id + end do + + write (30) (data1 (i),i=1,nc) + write (31) (data2 (i),i=1,nc) + + end do + + deallocate (data1,data2,data3,data4) + + close (10,status='keep') + close (11,status='keep') + + close (20,status='keep') + close (21,status='keep') + + close (30,status='keep') + close (31,status='keep') + +end subroutine merge_carbon + +subroutine get_base_file_name(file_path, base_name) + implicit none + character(len=*), intent(in) :: file_path + character(len=100), intent(out) :: base_name + integer :: slash_pos + + ! Find the last slash + slash_pos = len_trim(file_path) + do while (slash_pos > 0 .and. file_path(slash_pos:slash_pos) /= '/') + slash_pos = slash_pos - 1 + end do + + ! Extract the base file name + if (slash_pos > 0) then + base_name = file_path(slash_pos+1:) + else + base_name = file_path + end if + base_name = trim(base_name) +end subroutine get_base_file_name diff --git a/GEOSagcm_GridComp/GEOSphysics_GridComp/GEOSsurface_GridComp/Utils/Raster/preproc/soil/soil_properties/v2/STEP5_create_netcdf_soildata_bcs_shared_path_noMASK.pro b/GEOSagcm_GridComp/GEOSphysics_GridComp/GEOSsurface_GridComp/Utils/Raster/preproc/soil/soil_properties/v2/STEP5_create_netcdf_soildata_bcs_shared_path_noMASK.pro new file mode 100755 index 000000000..3244effa2 --- /dev/null +++ b/GEOSagcm_GridComp/GEOSphysics_GridComp/GEOSsurface_GridComp/Utils/Raster/preproc/soil/soil_properties/v2/STEP5_create_netcdf_soildata_bcs_shared_path_noMASK.pro @@ -0,0 +1,478 @@ +;============================================================================== +; STEP5: Build 30-arcsec (nc=43200 x nr=21600) global soil-property arrays, +; fill local holes horizontally, and write 36x18 BCS tiles to NetCDF. +; +; INPUTS: +; - HWSDv2/NGDC/STATSGO soil fields (F77_UNFORMATTED row strips) from `path`: +; clay_top_30sec.dat, sand_top_30sec.dat, oc_top_30sec.dat, +; clay_sub_30sec.dat, sand_sub_30sec.dat, oc_sub_30sec.dat +; - Gravel fraction (top/sub) from `path3`: +; gravel_top_30sec.dat, gravel_sub_30sec.dat +; - Source mask from `path`: +; data_sources.msk +; - Legacy tile-domain index from path2: tile_id.rst (ONLY file used from path2) +; +; TILE_ID USAGE (legacy): +; tileid(i,j) is used ONLY to: +; (1) identify pixels outside any valid 36×18 BCS tile (tileid==0), +; (2) restrict hole-filling to valid tile domain (1..maxcat), +; (3) suppress writing completely empty tiles (skip tiles with max(tileid)<1). +; Soil validity and merging are driven by data presence (HWSDv2 values), +; not by tileid. +; +; OUTPUT: +; - NetCDF tile files: +; SoilProperties_HxxVyy.nc +; containing Mask + Clay/Sand/OC (top/sub) + Gravel +; NOTE: +; This workflow does NOT use a legacy HWSD land/sea mask. Validity and gap-filling +; are driven by data presence (clay/sand) plus tile-domain gating via tile_id.rst. +;============================================================================== + +nc=43200l +nr=21600l +nx = nc/36 +ny = nr/18 +dxy = 360./nc +sf = .01 +un='%' +uf = -9999 + + ;HWSDv2+NGDC+STATSGO2+AFSIS + ;path='/discover/nobackup/projects/gmao/bcs_shared/preprocessing_bcs_inputs/land/soil/soil_properties/v2/HWSDv2-NGDC-STATSGO-AFSIS/' + + ;HWSDv2+NGDC+STATSGO2 + path='/discover/nobackup/projects/gmao/bcs_shared/preprocessing_bcs_inputs/land/soil/soil_properties/v2/HWSDv2-NGDC-STATSGO/' + +; for files I don't have on discover I need path2 so I can run IDL code + path2 ='/discover/nobackup/projects/gmao/bcs_shared/preprocessing_bcs_inputs/land/soil/soil_properties/v2/' + path3 = '/discover/nobackup/projects/gmao/bcs_shared/preprocessing_bcs_inputs/land/soil/soil_properties/v2/MERGED-DATA/' + + file0 = path2+'tile_id.rst' + +;HWSDv2+NGDC+STATSGO2 +;path='/discover/nobackup/projects/gmao/bcs_shared/preprocessing_bcs_inputs/land/soil/soil_properties/v2/HWSDv2-NGDC-STATSGO/' + file1 = 'clay_top_30sec.dat' + file2 = 'sand_top_30sec.dat' + file3 = 'oc_top_30sec.dat' + file4 = 'clay_sub_30sec.dat' + file5 = 'sand_sub_30sec.dat' + file6 = 'oc_sub_30sec.dat' +;------------------------------------------------------------------------------ +; Coarse fragments / Gravel handling (HWSDv2) +;------------------------------------------------------------------------------ +; HWSDv2 provides a native coarse fragments (Gravel) field (coarse fragments, D2). +; This replaces the legacy HWSDv1.2 gravel proxy that was derived from textures. +; +; Inputs used here: +; - We ingest coarse fragments via prebuilt 30-arcsec intermediates (derived +; from HWSDv2 D2): +; gravel_top_30sec.dat and gravel_sub_30sec.dat +; +; Implementation notes (legacy STEP5 compatibility): +; - We read two gravel inputs (top/sub) as integer percent (0..100) with uf=-9999. +; - To match legacy STEP5 behavior, we combine top/sub into one Gravel value using +; the historical weighting: 0.3*top + 0.7*sub. +; - The output Gravel is stored as short with ScaleFactor=0.01, so we convert +; integer percent -> integer (percent*100) before writing. +; +; Historical (HWSDv1.2) inputs (no longer used): +; ;file7 = path2+'HWSDv1.21/top_grav.bin' +; ;file8 = path2+'HWSDv1.21/sub_grav.bin' +;------------------------------------------------------------------------------ + + file7 = path3+'gravel_top_30sec.dat' + file8 = path3+'gravel_sub_30sec.dat' + +strip0 = lonarr(nc) +strip1 = fltarr(nc) +strip2 = fltarr(nc) +strip3 = fltarr(nc) +strip4 = fltarr(nc) +strip5 = fltarr(nc) +strip6 = fltarr(nc) +; HWSDv1.2 ;strip7 = fltarr(nc) +; HWSDv1.2 ;strip8 = fltarr(nc) +strip7 = intarr(nc) +strip8 = intarr(nc) + +;------------------------------------------------------------------------------ +; Mask concepts used in this workflow +;------------------------------------------------------------------------------ +; 1) Legacy HWSD land mask (e.g., hwsd_mask.bin): +; Used in older pipelines. NOT used in HWSDv2 processing here. +; +; 2) Data-driven validity mask (this script: `mask`): +; mask(i,j)=1 when sufficient soil data are present (top or sub texture valid). +; mask(i,j)=0 for holes inside valid tiles (eligible for gap filling). +; mask(i,j)=-9999 for out-of-domain pixels (tileid==0 and no data). +; +; 3) NetCDF "Mask" variable written to tiles: +; This is the final `mask` subset written to each NetCDF tile file. +; It encodes data validity / fill state, NOT a scientific land/sea mask. +;------------------------------------------------------------------------------ +mask = intarr(nc,nr) +tileid= lonarr(nc,nr) + +data1 = intarr(nc,nr) +data2 = intarr(nc,nr) +data3 = intarr(nc,nr) +data4 = intarr(nc,nr) +data5 = intarr(nc,nr) +data6 = intarr(nc,nr) +data7 = intarr(nc,nr) + +openr, lun0 ,file0 , /get_lun,/F77_UNFORMATTED +openr, lun7 ,file7 , /get_lun,/F77_UNFORMATTED +openr, lun8 ,file8 , /get_lun,/F77_UNFORMATTED + + ;OPTION 1 +openr, lun1 ,path+file1, /get_lun,/F77_UNFORMATTED +openr, lun2 ,path+file2, /get_lun,/F77_UNFORMATTED +openr, lun3 ,path+file3, /get_lun,/F77_UNFORMATTED +openr, lun4 ,path+file4, /get_lun,/F77_UNFORMATTED +openr, lun5 ,path+file5, /get_lun,/F77_UNFORMATTED +openr, lun6 ,path+file6, /get_lun,/F77_UNFORMATTED + +for j = 0L, nr-1L do begin + + readu,lun1 ,strip1 + readu,lun2 ,strip2 + readu,lun3 ,strip3 + readu,lun4 ,strip4 + readu,lun5 ,strip5 + readu,lun6 ,strip6 + readu,lun7 ,strip7 + readu,lun8 ,strip8 + readu,lun0 ,strip0 + strip3 = strip3 ; HWSDv2 alredy OC;/1.72 + strip6 = strip6 ; HWSDv2 alredy OC;;/1.72 ; OM=> OC + mask(*,j) = 0 + tileid(*,J) = strip0 + data1 (*,j) = fix(100.*strip1) + data2 (*,j) = fix(100.*strip2) + data3 (*,j) = fix(100.*strip3) + data4 (*,j) = fix(100.*strip4) + data5 (*,j) = fix(100.*strip5) + data6 (*,j) = fix(100.*strip6) + ; HWSDv1.2 ;data7 (*,j) = fix(100.*(0.3*strip7 + 0.7*strip8)) + good7 = (strip7 ne uf) + good8 = (strip8 ne uf) + + out = intarr(nc) + uf + + idx = where(good7 AND good8, nidx) + if (nidx gt 0) then out[idx] = long(0.3*float(strip7[idx]) + 0.7*float(strip8[idx]) + 0.5) + + idx = where(good7 AND (NOT good8), nidx) + if (nidx gt 0) then out[idx] = strip7[idx] + + idx = where(good8 AND (NOT good7), nidx) + if (nidx gt 0) then out[idx] = strip8[idx] + ; Convert out from integer percent -> integer (percent*100) for ScaleFactor=0.01 + idx = where(out ne uf, nidx2) + if (nidx2 gt 0) then out[idx] = out[idx] * 100L + + + data7(*,j) = out + +endfor +; Read full src_mask (same shape as data1..data7) +src_mask = lonarr(nc,nr) +tmp = lonarr(nc) + +openr, lunsrc2, path+'data_sources.msk', /get_lun, /F77_UNFORMATTED +for j=0L, nr-1L do begin + readu, lunsrc2, tmp + src_mask(*,j) = tmp +endfor +close, lunsrc2 + + +m2 = src_mask mod 100L +is_statsgo_mask = (m2 eq 22L) OR (m2 eq 82L) OR (m2 eq 28L) + +;tileid (where (tileid eq 0)) = uf +;print,max(tileid),min(tileid) +maxcat = 64770L +undef = uf + +for j = 0L, nr-1L do begin + for i = 0L, nc-1L do begin + + top_tex = ( (data1(i,j) gt 0) AND (data2(i,j) gt 0) ) ; clay_top & sand_top + sub_tex = ( (data4(i,j) gt 0) AND (data5(i,j) gt 0) ) ; clay_sub & sand_sub + + if (top_tex OR sub_tex) then mask(i,j) = 1 $ + else if ((mask(i,j) eq 0) AND (tileid(i,j) eq 0)) then mask(i,j) = -9999 + + endfor +endfor + + +data1 (where (mask le 0)) = undef +data2 (where (mask le 0)) = undef +data3 (where (mask le 0)) = undef +data4 (where (mask le 0)) = undef +data5 (where (mask le 0)) = undef +data6 (where (mask le 0)) = undef +data7 (where (mask le 0)) = undef + +idx = where(is_statsgo_mask AND (data1 eq uf), n1) ; clay_top missing in statsgo footprint +idx = where(is_statsgo_mask AND (data2 eq uf), n2) ; sand_top missing +idx = where(is_statsgo_mask AND (data3 eq uf), n3) ; oc_top missing +idx = where(is_statsgo_mask AND (data4 eq uf), n4) ; clay_sub missing +idx = where(is_statsgo_mask AND (data5 eq uf), n5) ; sand_sub missing +idx = where(is_statsgo_mask AND (data6 eq uf), n6) ; oc_sub missing + +print, 'PRE-FILL missing inside STAT footprint: clayT=',n1,' sandT=',n2,' ocT=',n3,' clayS=',n4,' sandS=',n5,' ocS=',n6 +n1_pre = n1 & n2_pre = n2 & n3_pre = n3 +n4_pre = n4 & n5_pre = n5 & n6_pre = n6 + +close,lun0 +close,lun1 +close,lun2 +close,lun3 +close,lun4 +close,lun5 +close,lun6 +close,lun7 +close,lun8 + +print, 'Done reading. Starting horizontal filling' + +;stop +;goto,jump2 +mask2 =mask +t0 = systime(/seconds) +DEBUG = 0L ;set to 1L when you want verbose prints +n_try = 0L ; times we enter fill block +n_fill = 0L ; times we actually fill (copy donor) +n_try_statsgo = 0L +n_fill_statsgo = 0L +ms = 0L & mt = 0L & mval = 0L + +; Also count how many STATSGO-footprint pixels remain missing AFTER fill +n_undef_statsgo_pre = 0L +n_undef_statsgo_post = 0L +n_hitcap = 0L ; times we fail to find donor within lmax +n_bigL = 0L ; times l got "large" (diagnostic) +n_hole_statsgo = 0L +n_hole_statsgo_tile = 0L +n_hole_statsgo_mask1 = 0L + +for j = 0L, nr-1L do begin + try0 = n_try + fill0 = n_fill + hit0 = n_hitcap + + for i = 0L, nc-1L do begin + + ; STATSGO footprint tag (for diagnostics only) + m2 = src_mask(i,j) mod 100L + is_statsgo = (m2 eq 22L) OR (m2 eq 82L) OR (m2 eq 28L) + + ; define "hole" once (used for both debug + fill) + hole_top = ( (data1(i,j) eq uf) OR (data2(i,j) eq uf) OR $ + ((data1(i,j) eq 0L) AND (data2(i,j) eq 0L)) ) + + hole_sub = ( (data4(i,j) eq uf) OR (data5(i,j) eq uf) OR $ + ((data4(i,j) eq 0L) AND (data5(i,j) eq 0L)) ) + + ; Debug counters: STAT footprint holes (global) + if (is_statsgo AND (hole_top OR hole_sub)) then begin + n_hole_statsgo = n_hole_statsgo + 1L + if ((tileid(i,j) ge 1L) AND (tileid(i,j) le maxcat)) then $ + n_hole_statsgo_tile = n_hole_statsgo_tile + 1L + if (mask(i,j) eq 1) then $ + n_hole_statsgo_mask1 = n_hole_statsgo_mask1 + 1L + endif + + ; Only fill inside valid tile domain + if ((tileid(i,j) ge 1L) AND (tileid(i,j) le maxcat)) then begin + + ; old (too strict it blocks all holes) + ; if ((mask(i,j) eq 1) AND (hole_top OR hole_sub)) then begin + ; new (allows filling holes that are mask=0 but inside valid tiles) + ;If this pixel is inside a valid tile and is not ocean (mask ≥ 0), and it has missing soil data, then try to fill it. + ; Here mask means : Do I currently have enough soil data at this pixel to consider it valid? + if ((mask(i,j) ge 0) AND (hole_top OR hole_sub)) then begin + + n_try = n_try + 1L + if (is_statsgo) then n_try_statsgo = n_try_statsgo + 1L + + if (DEBUG AND (j ge 3995L) AND (j le 4005L) AND (n_try le 20L)) then begin + print, 'HOLE j=', j, ' i=', i, ' mask=', mask(i,j), ' data5=', data5(i,j), $ + ' top0=', ((data1(i,j) eq 0L) AND (data2(i,j) eq 0L)), $ + ' sub0=', ((data4(i,j) eq 0L) AND (data5(i,j) eq 0L)) + endif + + ; 50-pixel donor existence check (reuse for debug + skip) + imn0 = MAX([i-50L,0L]) & imx0 = MIN([i+50L,nc-1L]) + jmn0 = MAX([j-50L,0L]) & jmx0 = MIN([j+50L,nr-1L]) + + msk50 = max(mask2(imn0:imx0, jmn0:jmx0)) + dat50 = max(data5(imn0:imx0, jmn0:jmx0)) + + if (DEBUG AND (j ge 3400L) AND (j le 4200L) AND ((n_try-try0) le 5L)) then begin + print, 'HOLE quickcheck: j=', j, ' i=', i, ' max(mask2@50)=', msk50, ' max(data5@50)=', dat50 + endif + ; if no land donor within 50 pixels, skip immediately (avoid expensive l search) + if (msk50 eq 0) then begin + n_hitcap = n_hitcap + 1L + continue + endif + + l = 3 + lmax = 500L ; SCIENCE: do not borrow donor beyond ~2.5° + ; lmax=300 ≈ 300×0.00833° ≈ 2.5° (~275 km lat) + ; lmax=500 ≈ 4.17° (~460 km lat) + subset =0. + subset2=undef + ; DEBUG only for first few holes at j=4000 + ;dbg = (j eq 4000L) AND (n_try le 5L) + dbg = ((j ge 3995L) AND (j le 4005L) AND (n_try le 5L)) + if (dbg) then print, 'DEBUG start i=', i, ' mask=', mask(i,j), ' tileid=', tileid(i,j), ' data5=', data5(i,j), ' lmax=', lmax + + ; while ((max(subset) eq 0) or (max(subset2) le 0)) do begin + while ( ((max(subset) eq 0) OR (max(subset2) le 0)) AND (l le lmax) ) do begin + imx=i+l + imn=i-l + jmn=j-l + jmx=j+l + imn=MAX([imn,0]) + jmn=MAX([jmn,0]) + imx=MIN([imx,nc-1]) + jmx=MIN([jmx,nr-1]) + subset = mask2 (imn:imx,jmn:jmx) + subset2= data5 (imn:imx,jmn:jmx) + if (dbg AND ((l eq 3L) OR (l eq 10L) OR (l eq 50L) OR (l eq 100L) OR (l eq lmax))) then begin + ms = max(subset) + mt = max(subset2) + print, 'DEBUG i=', i, ' l=', l, ' max(subset)=', ms, ' max(subset2)=', mt + endif + + mval = max(subset2,max_subscript) +; print,l, max(subset) ,max(subset2) + ;l = l + 1 + if (l lt 50L) then l = l + 1L else l = MIN([l*2L, lmax+1L]) + + + endwhile +; if (l gt lmax) then continue ; SCIENCE: leave missing rather than far donor + if (l gt lmax) then begin + n_hitcap = n_hitcap + 1L + if ((n_hitcap mod 10000L) eq 0) then print, 'hitcap count=', n_hitcap, ' at j=', j + continue + endif + + l = l -1 + if (l gt 50L) then n_bigL = n_bigL + 1L + d1=imx-imn+1 + d2=jmx-jmn+1 + + ii0 = where((subset2 gt 0.) AND (subset2 ne undef), nsub) + if (nsub le 0) then continue ; no usable donors in window + + sube = float(subset2[ii0]) + med = median(sube) + + ; prefer an exact (rounded) median if present, else closest-to-median + imed = long(med + 0.5) + midx = where(sube eq imed, nm) + + if (nm gt 0) then begin + max_subscript = ii0(midx(0)) + endif else begin + del = abs(float(sube) - med) + junk = min(del, kmin0) + max_subscript = ii0(kmin0) + endelse + + ii = max_subscript - d1*(max_subscript/d1) + imn ;i -l + jj = max_subscript/d1 + jmn;j - l + + mask(i,j) = 1 + mask2(i,j) = 1 + data1(i,j) = data1(ii,jj) + data2(i,j) = data2(ii,jj) + data3(i,j) = data3(ii,jj) + data4(i,j) = data4(ii,jj) + data5(i,j) = data5(ii,jj) + data6(i,j) = data6(ii,jj) + data7(i,j) = data7(ii,jj) + + n_fill = n_fill + 1L + if (is_statsgo) then n_fill_statsgo = n_fill_statsgo + 1L +; stop + endif + endif + endfor + dtry = (n_try-try0) & dfill = (n_fill-fill0) & dhit=(n_hitcap-hit0) + if (DEBUG AND (j ge 3400L) AND (j le 4200L) AND ((j mod 10L) eq 0) AND (dtry gt 0)) then begin + print, 'j=', j, ' Δtry=', dtry, ' Δfill=', dfill, ' Δhit=', dhit + endif +endfor + +idx = where(is_statsgo_mask AND (data1 eq uf), n1) +idx = where(is_statsgo_mask AND (data2 eq uf), n2) +idx = where(is_statsgo_mask AND (data3 eq uf), n3) +idx = where(is_statsgo_mask AND (data4 eq uf), n4) +idx = where(is_statsgo_mask AND (data5 eq uf), n5) +idx = where(is_statsgo_mask AND (data6 eq uf), n6) + +print, 'FILL DONE elapsed_s=', systime(/seconds)-t0, $ + ' try=', n_try, ' fill=', n_fill, ' hitcap=', n_hitcap, ' bigL=', n_bigL + +print, 'FILL efficiency: fill/try=', float(n_fill)/float(n_try), $ + ' hitcap/try=', float(n_hitcap)/float(n_try) + +print, 'STAT footprint holes: total=', n_hole_statsgo, ' tile_ok=', n_hole_statsgo_tile +print, 'STAT footprint filled: try=', n_try_statsgo, ' fill=', n_fill_statsgo + +print, 'PRE-FILL missing in STAT footprint: clayT=',n1_pre,' sandT=',n2_pre,' ocT=',n3_pre,' clayS=',n4_pre,' sandS=',n5_pre,' ocS=',n6_pre +print, 'POST-FILL missing in STAT footprint: clayT=',n1,' sandT=',n2,' ocT=',n3,' clayS=',n4,' sandS=',n5,' ocS=',n6 + + +mask2=0 +;jump2: +;stop +for jx =1,18 do begin + j1 = long((jx -1)*ny) + j2 = long(jx*ny) - 1L + for ix =1,36 do begin + i1 = long((ix -1)*nx) + i2 = long(ix*nx) - 1L + subset_tileid = tileid(i1:i2, j1:j2) + if (max(subset_tileid) ge 1) then begin + ; OPTION 3 + filename = '/discover/nobackup/projects/gmao/bcs_shared/preprocessing_bcs_inputs/land/soil/soil_properties/v2/out_HWSDv2_NGDC_STATSGO_noMASK/' + $ + ;filename = '/discover/nobackup/projects/gmao/bcs_shared/preprocessing_bcs_inputs/land/soil/soil_properties/v2/out_HWSDv2_NGDC_STATSGO_AFSIS_noMASK/' + $ + 'SoilProperties_H'+string(ix,'(i2.2)')+'V'+string(jx,'(i2.2)') + '.nc' + + subset_data1 = data1(i1:i2,j1:j2) + subset_data2 = data2(i1:i2,j1:j2) + subset_data3 = data3(i1:i2,j1:j2) + subset_data4 = data4(i1:i2,j1:j2) + subset_data5 = data5(i1:i2,j1:j2) + subset_data6 = data6(i1:i2,j1:j2) + subset_data7 = data7(i1:i2,j1:j2) + + ; but write the ORIGINAL mask as the NetCDF Mask variable: + subset_mask_out = mask(i1:i2, j1:j2) + + long_data = indgen(nx)*dxy + (i1+1)*dxy - dxy/2. -180. + lat_data = indgen(ny)*dxy + (j1+1)*dxy - dxy/2. -90. + + STEP5_write_netcdf_file,filename = filename, $ + nc_global = nc, nr_global = nr, nc_local = nx, nr_local = ny, i_offset = i1+1, j_offset = j1+1, $ + long_data= long_data, lat_data = lat_data, cellsize=dxy*3600.,scale_factor = [1.,sf,sf,sf,sf,sf,sf,sf], $ + units = ['_',un,un,un,un,un,un,un], undef=uf, varname = ['Mask','Clay0_30','Sand0_30','OC0_30','Clay30_100','Sand30_100','OC30_100','Gravel'], $ + data1 = subset_mask_out, data2=subset_data1,data3=subset_data2,data4=subset_data3,data5=subset_data4,data6=subset_data5, $ + data7 = subset_data6,data8=subset_data7 + + endif + endfor +endfor + +end diff --git a/GEOSagcm_GridComp/GEOSphysics_GridComp/GEOSsurface_GridComp/Utils/Raster/preproc/soil/soil_properties/v2/audit_step4_vs_statsgo.f90 b/GEOSagcm_GridComp/GEOSphysics_GridComp/GEOSsurface_GridComp/Utils/Raster/preproc/soil/soil_properties/v2/audit_step4_vs_statsgo.f90 new file mode 100755 index 000000000..7c831e760 --- /dev/null +++ b/GEOSagcm_GridComp/GEOSphysics_GridComp/GEOSsurface_GridComp/Utils/Raster/preproc/soil/soil_properties/v2/audit_step4_vs_statsgo.f90 @@ -0,0 +1,127 @@ +program audit_step4_vs_statsgo + ! to compile: ifort -O2 -g audit_step4_vs_statsgo.f90 -o audit_step4_vs_statsgo.x + ! to run : ./audit_step4_vs_statsgo.x + ! summaries of how step4 compares to statsgo used to fill + implicit none + integer, parameter :: nc = 43200 + integer, parameter :: nr = 21600 + + character(len=*), parameter :: dir_out = & + '/discover/nobackup/projects/gmao/bcs_shared/preprocessing_bcs_inputs/land/soil/soil_properties/v2/HWSDv2-NGDC-STATSGO/' + + character(len=*), parameter :: dir_stat = & + '/discover/nobackup/projects/gmao/bcs_shared/preprocessing_bcs_inputs/land/soil/soil_properties/v2/STATSGO2/statsgo_' + + character(len=*), parameter :: fmask = 'data_sources.msk' + + call audit_var('clay_top', 'clay_top_30sec.dat') + call audit_var('sand_top', 'sand_top_30sec.dat') + call audit_var('clay_sub', 'clay_sub_30sec.dat') + call audit_var('sand_sub', 'sand_sub_30sec.dat') + call audit_var('oc_top', 'oc_top_30sec.dat') + call audit_var('oc_sub', 'oc_sub_30sec.dat') + +contains + + logical function is_statsgo_used(maskval) + implicit none + integer(4), intent(in) :: maskval + integer(4) :: m + m = mod(maskval, 100_4) + is_statsgo_used = (m == 22_4 .or. m == 82_4 .or. m == 28_4) + end function is_statsgo_used + + subroutine audit_var(label, varfile) + implicit none + character(len=*), intent(in) :: label, varfile + + integer :: i, j + integer :: u_mask, u_out, u_stat + integer(4), allocatable :: mask_row(:) + real(4), allocatable :: out_row(:), stat_row(:) + real(4) :: max_abs, absd + integer :: nstatrow + integer(8) :: n_used, n_used_stat_valid + integer(8) :: n_mis_all, n_mis_stat_valid + real(8) :: hole_frac + + allocate(mask_row(nc)) + allocate(out_row(nc)) + allocate(stat_row(nc)) + + u_mask = 10 + u_out = 11 + u_stat = 12 + + open(u_mask, file=trim(dir_out)//trim(fmask), form='unformatted', status='old', action='read') + open(u_out, file=trim(dir_out)//trim(varfile), form='unformatted', status='old', action='read') + + ! STATSGO: F77 sequential with 4-byte markers per row; likely endian mismatch + open(u_stat, file=trim(dir_stat)//trim(varfile), form='unformatted', status='old', action='read', convert='little_endian') + + max_abs = 0.0_4 + n_used = 0_8 + n_used_stat_valid = 0_8 + n_mis_all = 0_8 + n_mis_stat_valid = 0_8 + + do j = 1, nr + read(u_mask) (mask_row(i), i=1,nc) + read(u_out) (out_row(i), i=1,nc) + read(u_stat) (stat_row(i), i=1,nc) + +! if (j == 1 .or. j == nr/2 .or. j == nr) then + if (j == 15600 .or. j == 15000 .or. j == 16200) then + print *, '--- ', trim(label), ' row j=', j + print *, 'STAT(r4) min/max=', minval(stat_row), maxval(stat_row), & + ' neg=', count(stat_row < 0.0_4), ' gt100=', count(stat_row > 100.0_4) + print *, 'OUT (r4) min/max=', minval(out_row), maxval(out_row), & + ' neg=', count(out_row < 0.0_4), ' gt100=', count(out_row > 100.0_4) + print *, 'STAT samples:', stat_row(1), stat_row(1000), stat_row(20000) + print *, 'OUT samples:', out_row(1), out_row(1000), out_row(20000) + end if + nstatrow = 0 + do i = 1, nc + if (is_statsgo_used(mask_row(i))) nstatrow = nstatrow + 1 + end do + + if (nstatrow > 0 .and. (j == 1 .or. mod(j,2000)==0)) then + print *, 'ROW j=', j, ' statsgo_used_pixels_in_row=', nstatrow, & + ' STAT min/max=', minval(stat_row), maxval(stat_row), & + ' OUT min/max=', minval(out_row), maxval(out_row) + end if + + do i = 1, nc + if (is_statsgo_used(mask_row(i))) then + n_used = n_used + 1_8 + + absd = abs(out_row(i) - stat_row(i)) + if (absd > 0.0_4) n_mis_all = n_mis_all + 1_8 + + if (stat_row(i) >= 0.0_4) then + n_used_stat_valid = n_used_stat_valid + 1_8 + if (absd > 0.0_4) n_mis_stat_valid = n_mis_stat_valid + 1_8 + end if + end if + end do + + end do + + close(u_mask); close(u_out); close(u_stat) + + hole_frac = 0.0d0 + if (n_used > 0_8) hole_frac = dble(n_used - n_used_stat_valid) / dble(n_used) + + print *, 'RESULT ', trim(label) + print *, ' STATSGO-used pixels =', n_used + print *, ' STATSGO-used & STAT valid =', n_used_stat_valid + print *, ' Mismatch (all STATSGO-used) =', n_mis_all + print *, ' Mismatch (STAT valid only) =', n_mis_stat_valid + print *, ' Hole fraction (STAT missing within used mask)=', hole_frac + + + deallocate(mask_row, out_row, stat_row) + end subroutine audit_var + +end program audit_step4_vs_statsgo + diff --git a/GEOSagcm_GridComp/GEOSphysics_GridComp/GEOSsurface_GridComp/Utils/Raster/preproc/soil/soil_properties/v2/check_all_tiles_raw_checker.py b/GEOSagcm_GridComp/GEOSphysics_GridComp/GEOSsurface_GridComp/Utils/Raster/preproc/soil/soil_properties/v2/check_all_tiles_raw_checker.py new file mode 100755 index 000000000..c60bfa22c --- /dev/null +++ b/GEOSagcm_GridComp/GEOSphysics_GridComp/GEOSsurface_GridComp/Utils/Raster/preproc/soil/soil_properties/v2/check_all_tiles_raw_checker.py @@ -0,0 +1,73 @@ +import glob, os +import numpy as np +import xarray as xr + +#d = "/discover/nobackup/projects/gmao/bcs_shared/preprocessing_bcs_inputs/land/soil/soil_properties/v2/out_HWSDv2_NGDC_STATSGO_noMASK" +d = "/discover/nobackup/projects/gmao/bcs_shared/make_bcs_inputs/land/soil/SOIL-DATA/soil_properties/v3/" + +vars_ = ["Clay0_30","Sand0_30","OC0_30","Clay30_100","Sand30_100","OC30_100","Gravel"] +files = sorted(glob.glob(os.path.join(d, "SoilProperties_H??V??.nc"))) +print("Tiles:", len(files)) + +def get_undef_and_sf(da): + # STEP5 tiles use these non-CF attribute names + undef = da.attrs.get("UNDEF", -9999) + sf = float(da.attrs.get("ScaleFactor", 1.0)) + return undef, sf + +gmin = {v: np.inf for v in vars_} +gmax = {v: -np.inf for v in vars_} +gsum = {v: 0.0 for v in vars_} +gcnt = {v: 0 for v in vars_} + +for f in files: + with xr.open_dataset(f, decode_cf=False, mask_and_scale=False) as ds: + for v in vars_: + da = ds[v] + a = da.values + undef, sf = get_undef_and_sf(da) + + m = (a != undef) + n = int(m.sum()) + if n == 0: + continue + + x = a[m].astype(np.float64) * sf # percent + gmin[v] = min(gmin[v], float(x.min())) + gmax[v] = max(gmax[v], float(x.max())) + gsum[v] += float(x.sum()) + gcnt[v] += n + +print("\nGlobal stats (percent):") +for v in vars_: + if gcnt[v] == 0: + print(v, "NO VALID DATA") + else: + print(f"{v:12s} min={gmin[v]:8.3f} mean={gsum[v]/gcnt[v]:8.3f} max={gmax[v]:8.3f} n={gcnt[v]}") + +# Range checks in scaled units +for v in ["Sand0_30", "Gravel"]: + neg = 0 + gt100 = 0 + bad_tiles = [] + + for f in files: + with xr.open_dataset(f, decode_cf=False, mask_and_scale=False) as ds: + da = ds[v] + a = da.values + undef, sf = get_undef_and_sf(da) + + m = (a != undef) + if not np.any(m): + continue + + x = a[m].astype(np.float64) * sf + neg += int((x < 0).sum()) + gt100 += int((x > 100).sum()) + + if np.any((x < 0) | (x > 100)): + bad_tiles.append(os.path.basename(f)) + + print(f"\n{v}: count(x<0)={neg} count(x>100)={gt100} out of n={gcnt[v]}") + print(f"{v}: tiles with out-of-range values:", bad_tiles[:20], "count=", len(bad_tiles)) + diff --git a/GEOSagcm_GridComp/GEOSphysics_GridComp/GEOSsurface_GridComp/Utils/Raster/preproc/soil/soil_properties/v2/one_tile_plot_checker.py b/GEOSagcm_GridComp/GEOSphysics_GridComp/GEOSsurface_GridComp/Utils/Raster/preproc/soil/soil_properties/v2/one_tile_plot_checker.py new file mode 100755 index 000000000..bfe0c08fc --- /dev/null +++ b/GEOSagcm_GridComp/GEOSphysics_GridComp/GEOSsurface_GridComp/Utils/Raster/preproc/soil/soil_properties/v2/one_tile_plot_checker.py @@ -0,0 +1,65 @@ +import xarray as xr +import numpy as np +import matplotlib.pyplot as plt +import os + + +# ---- USER SETTINGS ---- +# discover base current +DIR_OLD = "/discover/nobackup/projects/gmao/bcs_shared/make_bcs_inputs/land/soil/SOIL-DATA/soil_properties/v3/" +# discover what I produce +#DIR_NEW = "/discover/nobackup/projects/gmao/bcs_shared/preprocessing_bcs_inputs/land/soil/soil_properties/v2/out_HWSDv2_NGDC_STATSGO_AFSIS_noMASK/" # w afsis +#DIR_NEW = "/discover/nobackup/projects/gmao/bcs_shared/preprocessing_bcs_inputs/land/soil/soil_properties/v2/out_HWSDv2_NGDC_STATSGO_noMASK/" # no afsis +DIR_NEW = "/discover/nobackup/projects/gmao/bcs_shared/preprocessing_bcs_inputs/land/soil/soil_properties/v4/" # no afsis +TILE = "SoilProperties_H20V09.nc" # africa tile +#TILE = "SoilProperties_H10V17.nc" #"SoilProperties_H13V06.nc" # change per region +#TILE = "SoilProperties_H13V06.nc" # change per region +VAR = "Clay0_30" +# ---------------------- + + +f_old = os.path.join(DIR_OLD, TILE) +f_new = os.path.join(DIR_NEW, TILE) + +def get_undef_sf(da): + undef = da.attrs.get("UNDEF", -9999) + sf = float(da.attrs.get("ScaleFactor", 1.0)) + return undef, sf + +with xr.open_dataset(f_old, decode_cf=False, mask_and_scale=False) as a, \ + xr.open_dataset(f_new, decode_cf=False, mask_and_scale=False) as b: + + xa = a[VAR].values.astype(np.float64) + yb = b[VAR].values.astype(np.float64) + + undef_a, sf_a = get_undef_sf(a[VAR]) + undef_b, sf_b = get_undef_sf(b[VAR]) + + # Convert each to physical percent first + x = np.where(xa != undef_a, xa * sf_a, np.nan) + y = np.where(yb != undef_b, yb * sf_b, np.nan) + +diff = y - x # percent (percentage points) + +plt.figure(figsize=(6, 5)) +im = plt.imshow(diff, origin="lower", cmap="RdBu_r", vmin=-20, vmax=20) +plt.colorbar(im, label=f"{VAR} diff (percentage points)") +plt.title(f"{VAR} NEW - OLD\n{TILE}") +plt.tight_layout() +plt.show() + +old_undef = ~np.isfinite(x) +new_undef = ~np.isfinite(y) + +print("old undef %:", 100*old_undef.mean()) +print("new undef %:", 100*new_undef.mean()) +print("old undef -> new filled %:", 100*np.mean(old_undef & ~new_undef)) +print("old filled -> new undef %:", 100*np.mean(~old_undef & new_undef)) + +both = (~old_undef) & (~new_undef) +d = diff[both] +print("both-filled count:", both.sum()) +print("diff stats (pp): min/mean/max =", np.min(d), np.mean(d), np.max(d)) +print("fraction |diff|>1pp:", np.mean(np.abs(d) > 1.0)) +print("fraction |diff|>5pp:", np.mean(np.abs(d) > 5.0)) + diff --git a/GEOSagcm_GridComp/GEOSphysics_GridComp/GEOSsurface_GridComp/Utils/Raster/preproc/soil/soil_properties/v2/plot_SoilPropertiesHiiVjj.py b/GEOSagcm_GridComp/GEOSphysics_GridComp/GEOSsurface_GridComp/Utils/Raster/preproc/soil/soil_properties/v2/plot_SoilPropertiesHiiVjj.py new file mode 100755 index 000000000..9b24c5932 --- /dev/null +++ b/GEOSagcm_GridComp/GEOSphysics_GridComp/GEOSsurface_GridComp/Utils/Raster/preproc/soil/soil_properties/v2/plot_SoilPropertiesHiiVjj.py @@ -0,0 +1,409 @@ +import os +import numpy as np +import xarray as xr +import matplotlib.pyplot as plt + +# ----------------------------- +# User settings +# ----------------------------- +V3_DIR = "/discover/nobackup/projects/gmao/bcs_shared/make_bcs_inputs/land/soil/SOIL-DATA/soil_properties/v3/" +#NEW_DIR = "/discover/nobackup/projects/gmao/bcs_shared/preprocessing_bcs_inputs/land/soil/soil_properties/v2/out_HWSDv2_NGDC_STATSGO_noMASK/" +NEW_DIR = "/discover/nobackup/projects/gmao/bcs_shared/preprocessing_bcs_inputs/land/soil/soil_properties/v4/" +OUTDIR = "./soil_global_plots" +os.makedirs(OUTDIR, exist_ok=True) + +# Variables we care about (Silt is derived) +PLOT_VARS = ["Mask", "Clay0_30", "Sand0_30", "Silt0_30", "OC0_30"] # Silt is derived from Clay+Sand + +# Coarse global plotting resolution (degrees) +# 0.25 is nicer but heavier. 0.5 is fast. +GLOBAL_RES_DEG = 0.5 + +# Tile list (H01..H36, V01..V18) +HS = range(1, 37) +VS = range(1, 19) + +# Optional zoom region (set to None to skip) +# Example: Caribbean-ish +ZOOM = None +#ZOOM = dict(name="Caribbean", lon_min=-85, lon_max=-55, lat_min=-25, lat_max=5) +#ZOOM = dict(name="Indonesia", lon_min=90, lon_max=150, lat_min=-15, lat_max=15) +#ZOOM = dict(name="CONUS_tile_H09V13", lon_min=-100, lon_max=-90, lat_min=30, lat_max=40) +ZOOM = dict(name="Africa_tile_H20V09", lon_min=10, lon_max=20, lat_min=-10, lat_max=0) + +# Native zoom sampling step (1 = full, 2 = every other pixel, etc.) +ZOOM_STEP = 1 + +# ----------------------------- +# Helpers +# ----------------------------- +def tile_name(h, v): + return f"SoilProperties_H{h:02d}V{v:02d}.nc" + +def get_undef_sf(da): + undef = da.attrs.get("UNDEF", -9999) + sf = float(da.attrs.get("ScaleFactor", 1.0)) + return undef, sf + +def read_percent(ds, varname): + """ + Return var in physical percent units (float64), with undef->nan. + """ + da = ds[varname] + a = da.values.astype(np.float64) + undef, sf = get_undef_sf(da) + out = np.where(a != undef, a * sf, np.nan) + return out + +def read_mask(ds): + da = ds["Mask"] + a = da.values.astype(np.float64) + undef = da.attrs.get("UNDEF", -9999) + # keep 0/1, undef->nan + return np.where(a != undef, a, np.nan) + +def compute_silt(clay_pct, sand_pct): + silt = 100.0 - clay_pct - sand_pct + # clamp physically a bit; keep NaNs + silt = np.where(np.isfinite(silt), np.clip(silt, 0.0, 100.0), np.nan) + return silt + +def land_mask_from(ds): + clay = read_percent(ds, "Clay0_30") + sand = read_percent(ds, "Sand0_30") + return np.isfinite(clay) | np.isfinite(sand) + +def accumulate_to_grid(lat2d, lon2d, val2d, lat_bins, lon_bins, sum_grid, cnt_grid): + """ + Vectorized binning accumulation: adds val2d into (lat,lon) coarse bins. + """ + m = np.isfinite(val2d) + if not np.any(m): + return + + latv = lat2d[m] + lonv = lon2d[m] + vv = val2d[m] + + # bin indices + yi = np.digitize(latv, lat_bins) - 1 + xi = np.digitize(lonv, lon_bins) - 1 + + ok = (yi >= 0) & (yi < sum_grid.shape[0]) & (xi >= 0) & (xi < sum_grid.shape[1]) + yi = yi[ok]; xi = xi[ok]; vv = vv[ok] + if yi.size == 0: + return + + np.add.at(sum_grid, (yi, xi), vv) + np.add.at(cnt_grid, (yi, xi), 1) + +def finalize_mean(sum_grid, cnt_grid): + mean = np.full_like(sum_grid, np.nan, dtype=np.float64) + ok = cnt_grid > 0 + mean[ok] = sum_grid[ok] / cnt_grid[ok] + return mean + +def plot_global(field2d, lat_bins, lon_bins, title, out_png, vmin=None, vmax=None, cmap="viridis"): + plt.figure(figsize=(11, 5)) + extent = [lon_bins[0], lon_bins[-1], lat_bins[0], lat_bins[-1]] + im = plt.imshow(field2d, origin="lower", extent=extent, vmin=vmin, vmax=vmax, cmap=cmap, aspect="auto") + plt.colorbar(im, label="percent") + plt.title(title) + plt.xlabel("lon") + plt.ylabel("lat") + plt.tight_layout() + plt.savefig(out_png, dpi=200) + plt.close() + +def plot_global_diff(diff2d, lat_bins, lon_bins, title, out_png, vmax=5.0): + plt.figure(figsize=(11, 5)) + extent = [lon_bins[0], lon_bins[-1], lat_bins[0], lat_bins[-1]] + im = plt.imshow(diff2d, origin="lower", extent=extent, vmin=-vmax, vmax=vmax, + cmap="RdBu_r", aspect="auto") + plt.colorbar(im, label="NEW - v3 (percentage points)") + plt.title(title) + plt.xlabel("lon") + plt.ylabel("lat") + plt.tight_layout() + plt.savefig(out_png, dpi=200) + plt.close() + +def build_global_coarse_map(root_dir, varname, res_deg=0.5): + """ + Build coarse global mean map for a variable from 36x18 tiles. + varname can be Clay0_30, Sand0_30, OC0_30, or derived Silt0_30. + """ + lat_bins = np.arange(-90.0, 90.0 + res_deg, res_deg) + lon_bins = np.arange(-180.0, 180.0 + res_deg, res_deg) + + sum_grid = np.zeros((len(lat_bins)-1, len(lon_bins)-1), dtype=np.float64) + cnt_grid = np.zeros((len(lat_bins)-1, len(lon_bins)-1), dtype=np.int64) + max_grid = None + if varname == "Mask": + max_grid = np.zeros((len(lat_bins)-1, len(lon_bins)-1), dtype=np.float64) + + for h in HS: + for v in VS: + f = os.path.join(root_dir, tile_name(h, v)) + if not os.path.exists(f): + continue + + with xr.open_dataset(f, decode_cf=False, mask_and_scale=False) as ds: + lat1 = ds["latitude"].values.astype(np.float64) + lon1 = ds["longitude"].values.astype(np.float64) + + # build 2D grids (lat varies by row, lon by col) + lat2d = np.repeat(lat1[:, None], lon1.size, axis=1) + lon2d = np.repeat(lon1[None, :], lat1.size, axis=0) + + if varname == "Silt0_30": + clay = read_percent(ds, "Clay0_30") + sand = read_percent(ds, "Sand0_30") + val = compute_silt(clay, sand) + + accumulate_to_grid(lat2d, lon2d, val, lat_bins, lon_bins, sum_grid, cnt_grid) + + elif varname == "Mask": + val = read_mask(ds) + m = np.isfinite(val) + if np.any(m): + latv = lat2d[m] + lonv = lon2d[m] + vv = val[m] + + yi = np.digitize(latv, lat_bins) - 1 + xi = np.digitize(lonv, lon_bins) - 1 + + ok = (yi >= 0) & (yi < max_grid.shape[0]) & (xi >= 0) & (xi < max_grid.shape[1]) + yi = yi[ok]; xi = xi[ok]; vv = vv[ok] + + np.maximum.at(max_grid, (yi, xi), vv) + + else: + val = read_percent(ds, varname) + accumulate_to_grid(lat2d, lon2d, val, lat_bins, lon_bins, sum_grid, cnt_grid) + + if varname == "Mask": + return max_grid, lat_bins, lon_bins + + mean = finalize_mean(sum_grid, cnt_grid) + return mean, lat_bins, lon_bins + +def build_zoom_native(root_dir, varname, lon_min, lon_max, lat_min, lat_max, step=1): + """ + Build a native-resolution (tile-res) zoom mosaic for a lon/lat box. + Stitches only tiles that intersect the region and samples with step. + Returns (field, lon2d, lat2d) for plotting with pcolormesh/imshow. + """ + chunks = [] + + for h in HS: + for v in VS: + f = os.path.join(root_dir, tile_name(h, v)) + if not os.path.exists(f): + continue + + with xr.open_dataset(f, decode_cf=False, mask_and_scale=False) as ds: + lat = ds["latitude"].values.astype(np.float64) + lon = ds["longitude"].values.astype(np.float64) + + # quick tile bbox reject + if (lat.max() < lat_min) or (lat.min() > lat_max) or (lon.max() < lon_min) or (lon.min() > lon_max): + continue + + # index window in this tile + ilat = np.where((lat >= lat_min) & (lat <= lat_max))[0] + ilon = np.where((lon >= lon_min) & (lon <= lon_max))[0] + if ilat.size == 0 or ilon.size == 0: + continue + + ilat = ilat[::step] + ilon = ilon[::step] + + if varname == "Silt0_30": + clay = read_percent(ds, "Clay0_30")[np.ix_(ilat, ilon)] + sand = read_percent(ds, "Sand0_30")[np.ix_(ilat, ilon)] + val = compute_silt(clay, sand) + elif varname == "Mask": + val = read_mask(ds)[np.ix_(ilat, ilon)] + else: + val = read_percent(ds, varname)[np.ix_(ilat, ilon)] + + lat_sub = lat[ilat] + lon_sub = lon[ilon] + + chunks.append((lat_sub, lon_sub, val)) + + if not chunks: + return None, None, None + + # We’ll mosaic by binning onto a regular lat/lon grid at native sample spacing: + # infer approximate dx/dy from the first chunk + lat0, lon0, _ = chunks[0] + dlat = np.median(np.abs(np.diff(lat0))) if lat0.size > 1 else 0.008333 + dlon = np.median(np.abs(np.diff(lon0))) if lon0.size > 1 else 0.008333 + + lat_bins = np.arange(lat_min, lat_max + dlat, dlat) + lon_bins = np.arange(lon_min, lon_max + dlon, dlon) + + sum_grid = np.zeros((lat_bins.size, lon_bins.size), dtype=np.float64) + cnt_grid = np.zeros((lat_bins.size, lon_bins.size), dtype=np.int64) + + for lat_sub, lon_sub, val in chunks: + lat2d = np.repeat(lat_sub[:, None], lon_sub.size, axis=1) + lon2d = np.repeat(lon_sub[None, :], lat_sub.size, axis=0) + accumulate_to_grid(lat2d, lon2d, val, lat_bins, lon_bins, sum_grid, cnt_grid) + + mean = finalize_mean(sum_grid, cnt_grid) + + lon2d = np.repeat(lon_bins[None, :], lat_bins.size, axis=0) + lat2d = np.repeat(lat_bins[:, None], lon_bins.size, axis=1) + return mean, lon2d, lat2d + +def plot_zoom(field, lon2d, lat2d, title, out_png, vmin=None, vmax=None, cmap="viridis"): + plt.figure(figsize=(9, 6)) + extent = [lon2d.min(), lon2d.max(), lat2d.min(), lat2d.max()] + im = plt.imshow(field, origin="lower", extent=extent, vmin=vmin, vmax=vmax, cmap=cmap, aspect="auto") + plt.colorbar(im, label="percent") + plt.title(title) + plt.xlabel("lon") + plt.ylabel("lat") + plt.tight_layout() + plt.savefig(out_png, dpi=200) + plt.close() + +def plot_zoom_diff(diff, lon2d, lat2d, title, out_png, vmax=5.0): + plt.figure(figsize=(9, 6)) + extent = [lon2d.min(), lon2d.max(), lat2d.min(), lat2d.max()] + im = plt.imshow(diff, origin="lower", extent=extent, vmin=-vmax, vmax=vmax, + cmap="RdBu_r", aspect="auto") + plt.colorbar(im, label="NEW - v3 (percentage points)") + plt.title(title) + plt.xlabel("lon") + plt.ylabel("lat") + plt.tight_layout() + plt.savefig(out_png, dpi=200) + plt.close() + +# ----------------------------- +# Main +# ----------------------------- +def main(): + # Global coarse maps + for v in PLOT_VARS: + print("Building global coarse:", v) + + new_map, lat_bins, lon_bins = build_global_coarse_map( + NEW_DIR, v, res_deg=GLOBAL_RES_DEG + ) + v3_map, _, _ = build_global_coarse_map( + V3_DIR, v, res_deg=GLOBAL_RES_DEG + ) + + # ---- variable-specific plotting settings ---- + if v == "Mask": + vmin, vmax = 0, 1 + cmap = "gray" + title_new = ( + f"NEW Mask (coarse {GLOBAL_RES_DEG}°)\n" + "1 = soil properties exist, 0 = no soil" + ) + title_v3 = ( + f"v3 Mask (coarse {GLOBAL_RES_DEG}°)\n" + "1 = soil properties exist, 0 = no soil" + ) + else: + vmin, vmax = 0, 100 + cmap = "jet" + title_new = f"NEW {v} (coarse {GLOBAL_RES_DEG}°)" + title_v3 = f"v3 {v} (coarse {GLOBAL_RES_DEG}°)" + + # ---- Plot absolute maps ---- + plot_global( + new_map, lat_bins, lon_bins, + title_new, + os.path.join(OUTDIR, f"global_NEW_{v}_{GLOBAL_RES_DEG}deg.png"), + vmin=vmin, vmax=vmax, cmap=cmap + ) + + plot_global( + v3_map, lat_bins, lon_bins, + title_v3, + os.path.join(OUTDIR, f"global_v3_{v}_{GLOBAL_RES_DEG}deg.png"), + vmin=vmin, vmax=vmax, cmap=cmap + ) + + # ---- Plot difference (skip for Mask) ---- + if v != "Mask": + diff = new_map - v3_map + vmax_diff = 2.0 if "OC" in v else 10.0 + + plot_global_diff( + diff, lat_bins, lon_bins, + f"NEW − v3 {v} (coarse {GLOBAL_RES_DEG}°)", + os.path.join(OUTDIR, f"global_DIFF_{v}_{GLOBAL_RES_DEG}deg.png"), + vmax=vmax_diff + ) + + + # Optional zoom + if ZOOM is not None: + name = ZOOM["name"] + lon_min, lon_max = ZOOM["lon_min"], ZOOM["lon_max"] + lat_min, lat_max = ZOOM["lat_min"], ZOOM["lat_max"] + + for v in PLOT_VARS: + print("Building zoom:", name, v) + + new_zoom, lon2d, lat2d = build_zoom_native( + NEW_DIR, v, lon_min, lon_max, lat_min, lat_max, step=ZOOM_STEP + ) + v3_zoom, lon2d2, lat2d2 = build_zoom_native( + V3_DIR, v, lon_min, lon_max, lat_min, lat_max, step=ZOOM_STEP + ) + + if new_zoom is None or v3_zoom is None: + print(" zoom region had no data for", v) + continue + + if v == "Mask": + vmin, vmax = 0, 1 + cmap = "gray" + title_new = f"NEW Mask zoom {name}\n1 = soil properties exist, 0 = no soil" + title_v3 = f"v3 Mask zoom {name}\n1 = soil properties exist, 0 = no soil" + else: + vmin, vmax = 0, 100 + cmap = "jet" + title_new = f"NEW {v} zoom {name} (step={ZOOM_STEP})" + title_v3 = f"v3 {v} zoom {name} (step={ZOOM_STEP})" + + plot_zoom( + new_zoom, lon2d, lat2d, + title_new, + os.path.join(OUTDIR, f"zoom_{name}_NEW_{v}_step{ZOOM_STEP}.png"), + vmin=vmin, vmax=vmax, cmap=cmap + ) + + plot_zoom( + v3_zoom, lon2d2, lat2d2, + title_v3, + os.path.join(OUTDIR, f"zoom_{name}_v3_{v}_step{ZOOM_STEP}.png"), + vmin=vmin, vmax=vmax, cmap=cmap + ) + + # Difference (skip for Mask) + if v != "Mask": + diffz = new_zoom - v3_zoom + vmax_diff = 2.0 if "OC" in v else 10.0 + plot_zoom_diff( + diffz, lon2d, lat2d, + f"NEW − v3 {v} zoom {name} (step={ZOOM_STEP})", + os.path.join(OUTDIR, f"zoom_{name}_DIFF_{v}_step{ZOOM_STEP}.png"), + vmax=vmax_diff + ) + + print("Wrote plots under:", OUTDIR) + +if __name__ == "__main__": + main() + diff --git a/GEOSagcm_GridComp/GEOSphysics_GridComp/GEOSsurface_GridComp/Utils/Raster/preproc/soil/soil_properties/v2/plot_stich_global_var_layer_checker.py b/GEOSagcm_GridComp/GEOSphysics_GridComp/GEOSsurface_GridComp/Utils/Raster/preproc/soil/soil_properties/v2/plot_stich_global_var_layer_checker.py new file mode 100755 index 000000000..2941115f3 --- /dev/null +++ b/GEOSagcm_GridComp/GEOSphysics_GridComp/GEOSsurface_GridComp/Utils/Raster/preproc/soil/soil_properties/v2/plot_stich_global_var_layer_checker.py @@ -0,0 +1,130 @@ +#!/usr/bin/env python3 +import os +import sys +import time +import xarray as xr +import numpy as np +import matplotlib.pyplot as plt +import cartopy.crs as ccrs +from matplotlib.colors import Normalize +from matplotlib.cm import ScalarMappable +### +#python3 plot_stich_global_var_layer.py Sand D2 +#python3 plot_stich_global_var_layer.py Silt D2 +#python3 plot_stich_global_var_layer.py Clay D2 + +nc_dir = "/discover/nobackup/projects/gmao/bcs_shared/preprocessing_bcs_inputs/land/soil/soil_properties/v2/output_STEP1/" + +VAR_RANGES = { + "Sand": (0, 100), + "Silt": (0, 100), + "Clay": (0, 100), + "Coarse": (0, 100), + "SOC": (0, 100), + "Organic_Matter": (0, 100), + "Share": (0, 100), + "Porosity": (0, 1), + "Bulk_Density": (0, 2.5), + "Ref_Bulk_Density": (0.5, 2.5), + "PH": (0, 10), + "Sequence_Used": (0, 8), +} + +# Zoom control: +# - set to None for global +# - or set to [lonmin, lonmax, latmin, latmax] for zoom +zoom_extent = None +# zoom_extent = [-110, -100, 20, 30] # example zoom + + +def stitch_plot(variable, layer, extent=None, heartbeat=25): + t0 = time.time() + + files = sorted(f for f in os.listdir(nc_dir) if f.endswith(f"_{layer}.nc")) + if not files: + print(f"[WARN] No files found for layer {layer} in {nc_dir}") + return 1 + + print(f"\n[INFO] Start plot: variable={variable} layer={layer} tiles={len(files)} extent={extent}") + + vmin, vmax = VAR_RANGES.get(variable, (None, None)) + norm = Normalize(vmin=vmin, vmax=vmax) if (vmin is not None and vmax is not None) else None + cmap = plt.colormaps["YlGnBu"] + + fig, ax = plt.subplots(figsize=(14, 7), subplot_kw={"projection": ccrs.PlateCarree()}) + plotted = False + + for i, fn in enumerate(files, start=1): + path = os.path.join(nc_dir, fn) + + if i == 1 or (i % heartbeat == 0) or (i == len(files)): + print(f"[INFO] tile {i:3d}/{len(files)}: {fn}") + + # Use context manager to ensure file handles close even on errors + with xr.open_dataset(path) as ds: + if variable not in ds: + continue + + da = ds[variable].transpose("lat", "lon") + lat = ds["lat"].values + lon = ds["lon"].values + + data = np.ma.masked_invalid(da.values) + lon_grid, lat_grid = np.meshgrid(lon, lat) + + ax.pcolormesh( + lon_grid, lat_grid, data, + cmap=cmap, norm=norm, + shading="auto", + transform=ccrs.PlateCarree() + ) + plotted = True + + if not plotted: + print(f"[WARN] Variable '{variable}' not found in any tiles for layer {layer}") + plt.close() + return 2 + + ax.coastlines(resolution="10m" if extent else "110m") + gl = ax.gridlines(draw_labels=True, linewidth=0.5, color="gray", alpha=0.5, linestyle="--") + gl.top_labels = False + gl.right_labels = False + + if extent: + ax.set_extent(extent, crs=ccrs.PlateCarree()) + tag = f"zoom_lon{extent[0]}_{extent[1]}_lat{extent[2]}_{extent[3]}" + else: + ax.set_global() + tag = "global" + + ax.set_title(f"{variable} ({layer}) — {tag}", pad=20) + + sm = ScalarMappable(norm=norm, cmap=cmap) + sm.set_array([]) + fig.colorbar(sm, ax=ax, orientation="vertical", label=variable) + + out = f"{variable}_{layer}_{tag}.png" + plt.savefig(out, dpi=300, bbox_inches="tight") + plt.close() + + dt = time.time() - t0 + print(f"[DONE] Saved: {out} ({dt:.1f} s)") + return 0 + + +if __name__ == "__main__": + # Usage: + # python3 plot_stich_global_var_layer.py Sand D2 + # If not provided, defaults: + variable = sys.argv[1] if len(sys.argv) > 1 else "Sand" + layer = sys.argv[2] if len(sys.argv) > 2 else "D2" + + print("\n========== HWSD2 Stitch Plotter ==========") + print(f"[INFO] nc_dir: {nc_dir}") + print(f"[INFO] variable: {variable}") + print(f"[INFO] layer: {layer}") + print(f"[INFO] zoom: {zoom_extent}") + print("=========================================\n") + + raise SystemExit(stitch_plot(variable, layer, extent=zoom_extent)) +