From 8766ac0433404921dac1494a6dd4caaec14e0b1e Mon Sep 17 00:00:00 2001
From: Michael Levy <mike.levy.work@gmail.com>
Date: Fri, 24 Apr 2026 11:14:26 -0600
Subject: [PATCH 1/2] Add notebook to generate riv_nut fluxes

Runs with BGC enabled require river nutrient fluxes on the ocean grid. The
notebook added in this commit maps river nutrients from a runoff grid to the
new ocean grid.

Note that adding a new runoff grid (e.g. ERA5) will require generating river
nutrients on that new grid before this script can map them to the ocean grid.
Scripts to create those files will be added in a future PR.
---
 .../Map river fluxes to ocean grid.ipynb      | 247 ++++++++++++++++++
 bgc/riv_flux/MappingClass.py                  |  53 ++++
 bgc/riv_flux/README                           |  14 +
 envs/environment.yml                          |   1 +
 4 files changed, 315 insertions(+)
 create mode 100644 bgc/riv_flux/Map river fluxes to ocean grid.ipynb
 create mode 100644 bgc/riv_flux/MappingClass.py
 create mode 100644 bgc/riv_flux/README

diff --git a/bgc/riv_flux/Map river fluxes to ocean grid.ipynb b/bgc/riv_flux/Map river fluxes to ocean grid.ipynb
new file mode 100644
index 0000000..e578cbf
--- /dev/null
+++ b/bgc/riv_flux/Map river fluxes to ocean grid.ipynb	
@@ -0,0 +1,247 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "707d7297-b13d-4af4-80f3-017073eafe24",
+   "metadata": {},
+   "source": [
+    "#### Generate river input files on ocean grid\n",
+    "\n",
+    "Currently the CESM ocean component (either MOM6 or POP) reads in riverine fluxes and adds them to surface fluxes returned from MARBL.\n",
+    "Matt Long has river inputs on the `rJRA025` and `rx1` runoff grids, generated by scripts in the [`cgd-os` repo](https://github.com/NCAR/cgd-os).\n",
+    "I was able to adapt those scripts to generate river inputs on the `r05` grid.\n",
+    "For each pair of runoff and ocean grids, we use the same runoff -> ocean map as CESM."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "6f266237-7ef3-477d-8326-e34b0014a8ec",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%load_ext autoreload\n",
+    "%autoreload 2\n",
+    "\n",
+    "from datetime import datetime\n",
+    "import os\n",
+    "\n",
+    "import xarray as xr\n",
+    "import numpy as np\n",
+    "\n",
+    "from MappingClass import ESMFMappingWrapper\n",
+    "\n",
+    "date_str = datetime.now().strftime(\"%Y%m%d\")\n",
+    "full_date = '-'.join((date_str[:4], date_str[4:6], date_str[6:]))\n",
+    "print(f'xarray: {xr.__version__}')\n",
+    "print(f'numpy: {np.__version__}')\n",
+    "print(f'date string: {date_str}')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "e2e34fdc-a266-4357-964a-616857edba7a",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# User name appears in netCDF metadata\n",
+    "user_full_name = 'M. Levy'\n",
+    "\n",
+    "# The source grid is the runoff grid\n",
+    "# src_grid = 'rx1'\n",
+    "# src_grid = 'rJRA025'\n",
+    "src_grid = 'r05'\n",
+    "\n",
+    "# The destination grid is the ocean grid\n",
+    "# dst_grid = 'tx0.66v1'\n",
+    "# dst_grid = 'tx2_3v2'\n",
+    "dst_grid = 'tx2_3v3'"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "f3f3811e-ae23-4c73-b939-2a7ee96154e0",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "runoff_data = dict()\n",
+    "mapping_dict = dict()\n",
+    "mapping_suffix = dict()\n",
+    "out_file = dict()\n",
+    "rof_grids = ['rx1', 'rJRA025', 'r05']\n",
+    "\n",
+    "for rof_grid in rof_grids:\n",
+    "    mapping_dict[rof_grid] = dict()\n",
+    "    mapping_suffix[rof_grid] = dict()\n",
+    "    out_file[rof_grid] = dict()\n",
+    "\n",
+    "# Store location of runoff data\n",
+    "runoff_root = os.path.join(os.sep, 'glade', 'campaign', 'cgd', 'oce', 'datasets', 'cesm', 'river_nutrients')\n",
+    "runoff_data['rx1'] = os.path.join(runoff_root, 'river_nutrients.GNEWS_GNM.daitren_rx1.20190602.nc')\n",
+    "runoff_data['rJRA025'] = os.path.join(runoff_root, 'river_nutrients.GNEWS_GNM.JRA55.20190602.nc')\n",
+    "runoff_data['r05'] = os.path.join(runoff_root, 'river_nutrients.GNEWS_GNM.mozart05.20240202.nc')\n",
+    "\n",
+    "# Store location of mapping files\n",
+    "root_dir = os.path.join(os.sep, 'glade', 'campaign', 'cesm', 'cesmdata', 'inputdata', 'cpl', 'gridmaps')\n",
+    "# rx1 -> tx0.66v1\n",
+    "suffix = 'nnsm_e1000r300_190315'\n",
+    "mapping_suffix['rx1']['tx0.66v1'] = suffix\n",
+    "mapping_dict['rx1']['tx0.66v1'] = os.path.join(root_dir, 'rx1', f'map_rx1_to_tx0.66v1_{suffix}.nc')\n",
+    "# JRA025 -> tx0.66v1\n",
+    "suffix = 'nnsm_e333r100_190910'\n",
+    "mapping_suffix['rJRA025']['tx0.66v1'] = suffix\n",
+    "mapping_dict['rJRA025']['tx0.66v1'] = os.path.join(root_dir, 'rJRA025', f'map_JRA025m_to_tx0.66v1_{suffix}.nc')\n",
+    "# JRA025 -> tx2_3v2\n",
+    "suffix = 'nnsm_e250r250_241211'\n",
+    "mapping_suffix['rJRA025']['tx2_3v2'] = suffix\n",
+    "mapping_dict['rJRA025']['tx2_3v2'] = os.path.join(root_dir, 'rJRA025', f'map_jra_to_tx2_3_{suffix}.nc')\n",
+    "# r05 -> tx2_3v2\n",
+    "suffix = 'nnsm_e250r250_230914'\n",
+    "mapping_suffix['r05']['tx2_3v2'] = suffix\n",
+    "mapping_dict['r05']['tx2_3v2'] = os.path.join(root_dir, 'r05', f'map_r05_to_tx2_3_{suffix}.nc')\n",
+    "# JRA025 -> tx2_3v3\n",
+    "suffix = 'nnsm_e100r100_260306'\n",
+    "mapping_suffix['rJRA025']['tx2_3v3'] = suffix\n",
+    "mapping_dict['rJRA025']['tx2_3v3'] = os.path.join(root_dir, 'rJRA025', f'map_JRA025m_to_tx2_3v3_{suffix}.nc')\n",
+    "# r05 -> tx2_3v3\n",
+    "suffix = 'nnsm_e100r100_260306'\n",
+    "mapping_suffix['r05']['tx2_3v3'] = suffix\n",
+    "mapping_dict['r05']['tx2_3v3'] = os.path.join(root_dir, 'r05', f'map_r05_to_tx2_3v3_{suffix}.nc')\n",
+    "\n",
+    "# Names for netcdf output\n",
+    "for rof_grid in rof_grids:\n",
+    "    for ocn_grid in mapping_dict[rof_grid]:\n",
+    "        out_file[rof_grid][ocn_grid] = f'riv_nut.gnews_gnm.{rof_grid}_to_{ocn_grid}_{mapping_suffix[rof_grid][ocn_grid]}.{date_str}.nc'\n",
+    "\n",
+    "out_file[src_grid][dst_grid]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "aa948982-3a9c-415b-9c06-7d2ce4492381",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "out_vars = [\n",
+    "            'din_riv_flux',\n",
+    "            'dip_riv_flux',\n",
+    "            'don_riv_flux',\n",
+    "            'dop_riv_flux',\n",
+    "            'dsi_riv_flux',\n",
+    "            'dfe_riv_flux',\n",
+    "            'dic_riv_flux',\n",
+    "            'alk_riv_flux',\n",
+    "            'doc_riv_flux'\n",
+    "           ]\n",
+    "\n",
+    "ds_roff = xr.open_dataset(runoff_data[src_grid], decode_times=False)[out_vars]\n",
+    "ds_roff"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "9b4dfe62-d01f-4a3b-9234-beba89244ea7",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "mapping_obj = ESMFMappingWrapper(mapping_dict[src_grid][dst_grid])\n",
+    "da_out = mapping_obj.map_var(ds_roff, out_vars[0])\n",
+    "ds = da_out.to_dataset()\n",
+    "for var in out_vars:\n",
+    "    if var in ds:\n",
+    "        continue\n",
+    "    ds[var] = mapping_obj.map_var(ds_roff, var)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "5b206cc3-73ef-4694-baee-c7313c43c234",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Update time attributes\n",
+    "ds['time'].attrs = ds_roff['time'].attrs\n",
+    "ds['time'].attrs['axis'] = 'T'\n",
+    "ds['time'].attrs['cartesian_axis'] = 'T'\n",
+    "ds['time'].encoding['_FillValue'] = None\n",
+    "\n",
+    "# Update global attributes\n",
+    "ds.attrs['author'] = f'Created by {user_full_name}'\n",
+    "ds.attrs['creation_date'] = full_date\n",
+    "ds.attrs['procedure'] = 'Data sets are first interpolated to a runoff grid, then mapped to the ocean with the intention of using the same mapping file as for freshwater runoff.'\n",
+    "ds.attrs['source'] = 'Blend of GlobalNEWS and IMAGE-GNM data: GlobalNEWS: https://marine.rutgers.edu/globalnews/index.htm; IMAGE-GNM: https://easy.dans.knaw.nl/ui/datasets/id/easy-dataset:64145'\n",
+    "for attr in ds_roff.attrs:\n",
+    "    if 'assumption' in attr:\n",
+    "        ds.attrs[attr] = ds_roff.attrs[attr]\n",
+    "ds.attrs['mapping_src_file'] = runoff_data[src_grid]\n",
+    "ds.attrs['mapping_file'] = mapping_dict[src_grid][dst_grid]\n",
+    "ds"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "9ee7f34b-5394-4b62-b4fa-beb6b40fca28",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Prepend 1900-01-01 time level and append 2001-01-01\n",
+    "# Since units are \"days since 0001-01-01\", YYYY-01-01 = 365*(YYYY-1)\n",
+    "ds_pre = ds.isel(time=[0]) # use a list to maintain time as first dimension\n",
+    "ds_pre.assign_coords(time=[365*(1900-1)])\n",
+    "\n",
+    "ds_post = ds.isel(time=[-1]) # use a list to maintain time as first dimension\n",
+    "ds_post['time'].assign_coords(time=[365*(2001-1)])\n",
+    "\n",
+    "ds_concat = xr.concat([ds_pre, ds, ds_post], \"time\", compat=\"override\", coords=\"minimal\")\n",
+    "ds_concat"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "0286dc04-9d22-480f-9c58-b191f6457532",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "ds_concat.isel(time=0)['din_riv_flux'].plot(levels=[0, 1e-9, 1e-8, 1e-7, 1e-6, 1e-5, 1e-4])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "f7881c54-1ad1-4ca4-a602-b3a82c5a7a4f",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "ds_concat.to_netcdf(out_file[src_grid][dst_grid], unlimited_dims='time')"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python [conda env:analysis]",
+   "language": "python",
+   "name": "conda-env-analysis-py"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.14.4"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/bgc/riv_flux/MappingClass.py b/bgc/riv_flux/MappingClass.py
new file mode 100644
index 0000000..2b4d803
--- /dev/null
+++ b/bgc/riv_flux/MappingClass.py
@@ -0,0 +1,53 @@
+import numpy as np
+from scipy.sparse import csc_matrix
+import xarray as xr
+
+class ESMFMappingWrapper(object):
+    def __init__(self, ESMF_mapping_file_name):
+        self._ds_map = xr.open_dataset(ESMF_mapping_file_name)
+        self._src_grid_size = np.prod(self._ds_map['src_grid_dims'].data)
+        self._src_grid_shape = (self._ds_map['src_grid_dims'].data[1], self._ds_map['src_grid_dims'].data[0])
+        self._dst_grid_size = np.prod(self._ds_map['dst_grid_dims'].data)
+        self._dst_grid_shape = (self._ds_map['dst_grid_dims'].data[1], self._ds_map['dst_grid_dims'].data[0])
+        self._mapping_weights_sparse = csc_matrix((self._ds_map['S'].data,
+                                                  (self._ds_map['row'].data-1, self._ds_map['col'].data-1)),
+                                                  shape=(self._dst_grid_size, self._src_grid_size))
+
+    ##################
+
+    def map_var(self, ds_src, var_name, scale_factor=0.01):
+        da_list = []
+        for time in range(len(ds_src['time'])):
+            native_values = ds_src[var_name].isel(time=time).data.reshape(self._src_grid_size)
+            mapped_values = self._mapping_weights_sparse*(scale_factor*native_values)
+            da_list.append(xr.DataArray(mapped_values.reshape(self._dst_grid_shape), dims=('y', 'x'), name=var_name))
+            if var_name in ['din_riv_flux', 'don_riv_flux']:
+                conv_factor = 28*1e-15*86400*365 # 28 mg / mmol, 1e-15 Tg / mg, 86400 s/d, 365 d/yr
+            elif var_name in ['dic_riv_flux', 'doc_riv_flux']:
+                conv_factor = 12*1e-18*86400*365 # 28 mg / mmol, 1e-18 Pg / mg, 86400 s/d, 365 d/yr
+            elif var_name in ['dip_riv_flux', 'dop_riv_flux', 'dsi_riv_flux']:
+                conv_factor = 1e-15*86400*365 # 1e-15 Tmol / mmol, 86400 s/d, 365 d/yr
+            elif var_name in ['dfe_riv_flux']:
+                conv_factor = 1e-12*86400*365 # 1e-12 Gmol / mmol, 86400 s/d, 365 d/yr
+            elif var_name in ['alk_riv_flux']:
+                continue
+            else:
+                conv_factor = 0
+            self._compute_global_integral(native_values, mapped_values, conv_factor, var_name, print_stat=time in [0, len(ds_src['time'])-1])
+        da = xr.concat(da_list, dim='time').assign_coords({'time': ds_src['time'].data})
+        da.attrs = ds_src[var_name].attrs
+        da.attrs['units'] = 'mmol/m^2/s'
+        da.encoding['_FillValue'] = None
+        return da
+    
+    ##################
+
+    def _compute_global_integral(self, native_values, mapped_values, conv_factor, var, print_stat=False):
+        r = 6371220 # radius of earth in m
+        native_area = r*r*self._ds_map['area_a'].data # r^2 since area is rad^2 not m^2
+        mapped_area = r*r*self._ds_map['area_b'].data # r^2 since area is rad^2 not m^2
+        if print_stat:
+            global_sum_native = np.sum(native_values*native_area*(0.01*conv_factor)) # 0.01 nmol/cm^2 -> mmol/m^2
+            global_sum_mapped = np.sum(mapped_values*mapped_area*conv_factor)
+            rel_err = np.abs((global_sum_native-global_sum_mapped)/global_sum_native)
+            print(f'{var} stats: sums are {global_sum_native:.3e} (native) and {global_sum_mapped:.3e} (mapped); rel_err is {rel_err:.3e}')
\ No newline at end of file
diff --git a/bgc/riv_flux/README b/bgc/riv_flux/README
new file mode 100644
index 0000000..27109a1
--- /dev/null
+++ b/bgc/riv_flux/README
@@ -0,0 +1,14 @@
+Generating new river nutrient flux files is a two step process:
+
+1. Create river nutrient flux dataset on the CESM runoff grid
+2. Map that dataset from the runoff grid to the ocean grid
+
+Step [1] is complicated -- it involves computing climatologies from the
+GlobalNEWS dataset and mapping them to the runoff grid. Scripts to do that
+exist in https://github.com/NCAR/cgd-os/tree/master/cesm/forcing/river_nutrients
+but those have not been copied to this repository yet -- we will do that when
+we are ready to bring in a new runoff grid.
+
+Users can complete step [2] with  "Map river fluxes to ocean grid.ipynb".
+All that is required is updating the dictionary that contains pointers to
+the rof -> ocn mapping files.
diff --git a/envs/environment.yml b/envs/environment.yml
index 40f96d2..7670d28 100644
--- a/envs/environment.yml
+++ b/envs/environment.yml
@@ -11,3 +11,4 @@ dependencies:
     - jupyter
     - jupyterlab
     - netCDF4
+    - scipy

From 6bacace1dc26a9fac42cef0538fb9a6c58bfa557 Mon Sep 17 00:00:00 2001
From: Michael Levy <mike.levy.work@gmail.com>
Date: Fri, 24 Apr 2026 11:45:14 -0600
Subject: [PATCH 2/2] Fixed bug in computing time

assign_coords() returns a new dataset, it does not augment in place... so I
wasn't actually adjusting the time dimension the way I thought. Also, I wasn't
capturing the attributes of the time coordinate, and I was writing out a
netCDF4 file instead of netCDF3-64bit
---
 bgc/riv_flux/Map river fluxes to ocean grid.ipynb | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/bgc/riv_flux/Map river fluxes to ocean grid.ipynb b/bgc/riv_flux/Map river fluxes to ocean grid.ipynb
index e578cbf..45db223 100644
--- a/bgc/riv_flux/Map river fluxes to ocean grid.ipynb	
+++ b/bgc/riv_flux/Map river fluxes to ocean grid.ipynb	
@@ -192,13 +192,13 @@
    "source": [
     "# Prepend 1900-01-01 time level and append 2001-01-01\n",
     "# Since units are \"days since 0001-01-01\", YYYY-01-01 = 365*(YYYY-1)\n",
-    "ds_pre = ds.isel(time=[0]) # use a list to maintain time as first dimension\n",
-    "ds_pre.assign_coords(time=[365*(1900-1)])\n",
+    "ds_pre = ds.isel(time=[0]).assign_coords(time=[365*(1900-1)]) # use a list to maintain time as first dimension\n",
     "\n",
-    "ds_post = ds.isel(time=[-1]) # use a list to maintain time as first dimension\n",
-    "ds_post['time'].assign_coords(time=[365*(2001-1)])\n",
+    "ds_post = ds.isel(time=[-1]).assign_coords(time=[365*(2001-1)]) # use a list to maintain time as first dimension\n",
     "\n",
     "ds_concat = xr.concat([ds_pre, ds, ds_post], \"time\", compat=\"override\", coords=\"minimal\")\n",
+    "ds_concat[\"time\"].attrs = ds[\"time\"].attrs\n",
+    "ds_concat[\"time\"].encoding[\"_FillValue\"] = None\n",
     "ds_concat"
    ]
   },
@@ -219,7 +219,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "ds_concat.to_netcdf(out_file[src_grid][dst_grid], unlimited_dims='time')"
+    "ds_concat.to_netcdf(out_file[src_grid][dst_grid], unlimited_dims='time', format=\"NETCDF3_64BIT\")"
    ]
   }
  ],