Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 9 additions & 11 deletions 4b_remapping/1_topo/1_find_HRU_elevation.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,10 @@
"# modules\n",
"import os\n",
"import geopandas as gpd\n",
"from rasterstats import zonal_stats\n",
"from pathlib import Path\n",
"from shutil import copyfile\n",
"from datetime import datetime"
"from datetime import datetime\n",
"from exactextract import exact_extract"
]
},
{
Expand Down Expand Up @@ -250,7 +250,8 @@
"outputs": [],
"source": [
"# Load the shapefile\n",
"gdf = gpd.read_file(str(intersect_path/intersect_name))"
"gdf = gpd.read_file(str(intersect_path/intersect_name))\n",
"raster_path = dem_path / dem_name"
]
},
{
Expand All @@ -260,10 +261,7 @@
"outputs": [],
"source": [
"# Calculate zonal statistics\n",
"stats = zonal_stats(gdf, \n",
" str(dem_path/dem_name), \n",
" stats=['mean'], \n",
" all_touched=True)"
"elev_means = exact_extract(str(raster_path), gdf, 'mean', output='pandas')"
]
},
{
Expand All @@ -273,7 +271,7 @@
"outputs": [],
"source": [
"# Add the mean elevation to the GeoDataFrame\n",
"gdf['elev_mean'] = [stat['mean'] for stat in stats]"
"gdf['elev_mean'] = elev_means"
]
},
{
Expand Down Expand Up @@ -363,9 +361,9 @@
],
"metadata": {
"kernelspec": {
"display_name": "summa-env",
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "summa-env"
"name": "python3"
},
"language_info": {
"codemirror_mode": {
Expand All @@ -377,7 +375,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.8"
"version": "3.11.11"
}
},
"nbformat": 4,
Expand Down
13 changes: 5 additions & 8 deletions 4b_remapping/1_topo/1_find_HRU_elevation.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,10 @@
# modules
import os
import geopandas as gpd
from rasterstats import zonal_stats
from pathlib import Path
from shutil import copyfile
from datetime import datetime

from exactextract import exact_extract

# --- Control file handling
# Easy access to control file folder
Expand Down Expand Up @@ -113,16 +112,14 @@ def make_default_path(suffix):

# --- Rasterstats analysis
# Load the shapefile
gdf = gpd.read_file(str(intersect_path/intersect_name))
gdf = gpd.read_file(intersect_path / intersect_name)
raster_path = dem_path / dem_name

# Calculate zonal statistics
stats = zonal_stats(gdf,
str(dem_path/dem_name),
stats=['mean'],
all_touched=True)
elev_means = exact_extract(str(raster_path), gdf, 'mean', output='pandas')

# Add the mean elevation to the GeoDataFrame
gdf['elev_mean'] = [stat['mean'] for stat in stats]
gdf['elev_mean'] = elev_means

# Save the updated GeoDataFrame
gdf.to_file(str(intersect_path/intersect_name))
Expand Down
84 changes: 39 additions & 45 deletions 4b_remapping/1_topo/2_find_HRU_soil_classes.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -19,10 +19,9 @@
"from shutil import copyfile\n",
"from datetime import datetime\n",
"import geopandas as gpd\n",
"import rasterio\n",
"from rasterstats import zonal_stats\n",
"import pandas as pd\n",
"import numpy as np"
"import numpy as np\n",
"from exactextract import exact_extract"
]
},
{
Expand Down Expand Up @@ -201,7 +200,7 @@
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Rasterstats analysis"
"#### Exactextract analysis"
]
},
{
Expand All @@ -211,7 +210,8 @@
"outputs": [],
"source": [
"# Load the shapefile\n",
"gdf = gpd.read_file(catchment_path / catchment_name)"
"gdf = gpd.read_file(catchment_path / catchment_name)\n",
"raster_path = soil_path / soil_name"
]
},
{
Expand All @@ -220,10 +220,13 @@
"metadata": {},
"outputs": [],
"source": [
"# Open the raster file\n",
"with rasterio.open(soil_path / soil_name) as src:\n",
" affine = src.transform\n",
" array = src.read(1) # Read the first band"
"# Perform exactextract with fractional coverage per class\n",
"stats = exact_extract(\n",
" str(raster_path),\n",
" gdf,\n",
" [\"unique\", \"frac\"],\n",
" output='pandas'\n",
")"
]
},
{
Expand All @@ -232,53 +235,44 @@
"metadata": {},
"outputs": [],
"source": [
"# Get unique values in the raster\n",
"unique_values = np.unique(array).astype(int)\n",
"unique_values = unique_values[unique_values != src.nodata] # Remove nodata value if present"
"# Build a list of row-wise dicts: {soil_class: fraction}\n",
"rows = []\n",
"for i, (classes, fractions) in stats.iterrows():\n",
" row_dict = {k: v for k, v in zip(classes, fractions)}\n",
" rows.append(row_dict)"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/Users/cyrilthebault/Postdoc_Ucal/02_DATA/01_RAW/CWARHM-main/cwarhm-env/lib/python3.11/site-packages/rasterstats/io.py:328: NodataWarning: Setting nodata to -999; specify nodata explicitly\n",
" warnings.warn(\n"
]
}
],
"source": [
"# Perform zonal statistics for each unique value\n",
"results = []\n",
"for value in unique_values:\n",
" category_stats = zonal_stats(gdf, array, affine=affine, stats=['count'], categorical=True, \n",
" category_map={value: 1})\n",
" counts = [stat.get(1, 0) for stat in category_stats] # Get count for category 1, default to 0 if not present\n",
" results.append(pd.DataFrame(counts, columns=[f'USGS_{value}']))"
]
},
{
"cell_type": "code",
"execution_count": 17,
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Combine all results\n",
"hist_df = pd.concat(results, axis=1).fillna(0).astype(int)"
"# Convert to DataFrame\n",
"df_stats = pd.DataFrame(rows)\n",
"\n",
"# Replace NaN with 0, ensure float type\n",
"df_stats = df_stats.fillna(0).astype(float)\n",
"\n",
"# Sort columns numerically\n",
"sorted_columns = sorted(df_stats.columns, key=lambda x: int(x))\n",
"df_stats = df_stats[sorted_columns]\n",
"\n",
"# Round to 4 digit\n",
"df_stats = df_stats.round(4)\n",
"\n",
"# Rename columns to USGS soil class format\n",
"df_stats.columns = [f'USGS_{int(col)}' for col in df_stats.columns]"
]
},
{
"cell_type": "code",
"execution_count": 18,
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Combine the original GeoDataFrame with the histogram results\n",
"result = gdf.join(hist_df)"
"# Merge with original GeoDataFrame\n",
"result = gdf.join(df_stats)"
]
},
{
Expand Down Expand Up @@ -368,9 +362,9 @@
],
"metadata": {
"kernelspec": {
"display_name": "summa-env",
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "summa-env"
"name": "python3"
},
"language_info": {
"codemirror_mode": {
Expand All @@ -382,7 +376,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.8"
"version": "3.11.11"
}
},
"nbformat": 4,
Expand Down
52 changes: 31 additions & 21 deletions 4b_remapping/1_topo/2_find_HRU_soil_classes.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,9 @@
from shutil import copyfile
from datetime import datetime
import geopandas as gpd
import rasterio
from rasterstats import zonal_stats
import pandas as pd
import numpy as np
from exactextract import exact_extract


# --- Control file handling
Expand Down Expand Up @@ -92,32 +91,43 @@ def make_default_path(suffix):
intersect_path.mkdir(parents=True, exist_ok=True)


# --- Rasterstats analysis
# --- Exactextract analysis
# Load the shapefile
gdf = gpd.read_file(catchment_path / catchment_name)
raster_path = soil_path / soil_name

# Open the raster file
with rasterio.open(soil_path / soil_name) as src:
affine = src.transform
array = src.read(1) # Read the first band
# Perform exactextract with fractional coverage per class
stats = exact_extract(
str(raster_path),
gdf,
["unique", "frac"],
output='pandas'
)

# Get unique values in the raster
unique_values = np.unique(array).astype(int)
unique_values = unique_values[unique_values != src.nodata] # Remove nodata value if present
# Build a list of row-wise dicts: {soil_class: fraction}
rows = []
for i, (classes, fractions) in stats.iterrows():
row_dict = {k: v for k, v in zip(classes, fractions)}
rows.append(row_dict)

# Perform zonal statistics for each unique value
results = []
for value in unique_values:
category_stats = zonal_stats(gdf, array, affine=affine, stats=['count'], categorical=True,
category_map={value: 1})
counts = [stat.get(1, 0) for stat in category_stats] # Get count for category 1, default to 0 if not present
results.append(pd.DataFrame(counts, columns=[f'USGS_{value}']))
# Convert to DataFrame
df_stats = pd.DataFrame(rows)

# Combine all results
hist_df = pd.concat(results, axis=1).fillna(0).astype(int)
# Replace NaN with 0, ensure float type
df_stats = df_stats.fillna(0).astype(float)

# Combine the original GeoDataFrame with the histogram results
result = gdf.join(hist_df)
# Sort columns numerically
sorted_columns = sorted(df_stats.columns, key=lambda x: int(x))
df_stats = df_stats[sorted_columns]

# Round to 4 digit
df_stats = df_stats.round(4)

# Rename columns to USGS soil class format
df_stats.columns = [f'USGS_{int(col)}' for col in df_stats.columns]

# Merge with original GeoDataFrame
result = gdf.join(df_stats)

# Save the result
result.to_file(intersect_path / intersect_name)
Expand Down
Loading