NCAR · mnlevy1981 · Apr 24, 2026 · Apr 24, 2026
diff --git a/bgc/riv_flux/Map river fluxes to ocean grid.ipynb b/bgc/riv_flux/Map river fluxes to ocean grid.ipynb
@@ -0,0 +1,247 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "707d7297-b13d-4af4-80f3-017073eafe24",
+   "metadata": {},
+   "source": [
+    "#### Generate river input files on ocean grid\n",
+    "\n",
+    "Currently the CESM ocean component (either MOM6 or POP) reads in riverine fluxes and adds them to surface fluxes returned from MARBL.\n",
+    "Matt Long has river inputs on the `rJRA025` and `rx1` runoff grids, generated by scripts in the [`cgd-os` repo](https://github.com/NCAR/cgd-os).\n",
+    "I was able to adapt those scripts to generate river inputs on the `r05` grid.\n",
+    "For each pair of runoff and ocean grids, we use the same runoff -> ocean map as CESM."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "6f266237-7ef3-477d-8326-e34b0014a8ec",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%load_ext autoreload\n",
+    "%autoreload 2\n",
+    "\n",
+    "from datetime import datetime\n",
+    "import os\n",
+    "\n",
+    "import xarray as xr\n",
+    "import numpy as np\n",
+    "\n",
+    "from MappingClass import ESMFMappingWrapper\n",
+    "\n",
+    "date_str = datetime.now().strftime(\"%Y%m%d\")\n",
+    "full_date = '-'.join((date_str[:4], date_str[4:6], date_str[6:]))\n",
+    "print(f'xarray: {xr.__version__}')\n",
+    "print(f'numpy: {np.__version__}')\n",
+    "print(f'date string: {date_str}')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "e2e34fdc-a266-4357-964a-616857edba7a",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# User name appears in netCDF metadata\n",
+    "user_full_name = 'M. Levy'\n",
+    "\n",
+    "# The source grid is the runoff grid\n",
+    "# src_grid = 'rx1'\n",
+    "# src_grid = 'rJRA025'\n",
+    "src_grid = 'r05'\n",
+    "\n",
+    "# The destination grid is the ocean grid\n",
+    "# dst_grid = 'tx0.66v1'\n",
+    "# dst_grid = 'tx2_3v2'\n",
+    "dst_grid = 'tx2_3v3'"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "f3f3811e-ae23-4c73-b939-2a7ee96154e0",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "runoff_data = dict()\n",
+    "mapping_dict = dict()\n",
+    "mapping_suffix = dict()\n",
+    "out_file = dict()\n",
+    "rof_grids = ['rx1', 'rJRA025', 'r05']\n",
+    "\n",
+    "for rof_grid in rof_grids:\n",
+    "    mapping_dict[rof_grid] = dict()\n",
+    "    mapping_suffix[rof_grid] = dict()\n",
+    "    out_file[rof_grid] = dict()\n",
+    "\n",
+    "# Store location of runoff data\n",
+    "runoff_root = os.path.join(os.sep, 'glade', 'campaign', 'cgd', 'oce', 'datasets', 'cesm', 'river_nutrients')\n",
+    "runoff_data['rx1'] = os.path.join(runoff_root, 'river_nutrients.GNEWS_GNM.daitren_rx1.20190602.nc')\n",
+    "runoff_data['rJRA025'] = os.path.join(runoff_root, 'river_nutrients.GNEWS_GNM.JRA55.20190602.nc')\n",
+    "runoff_data['r05'] = os.path.join(runoff_root, 'river_nutrients.GNEWS_GNM.mozart05.20240202.nc')\n",
+    "\n",
+    "# Store location of mapping files\n",
+    "root_dir = os.path.join(os.sep, 'glade', 'campaign', 'cesm', 'cesmdata', 'inputdata', 'cpl', 'gridmaps')\n",
+    "# rx1 -> tx0.66v1\n",
+    "suffix = 'nnsm_e1000r300_190315'\n",
+    "mapping_suffix['rx1']['tx0.66v1'] = suffix\n",
+    "mapping_dict['rx1']['tx0.66v1'] = os.path.join(root_dir, 'rx1', f'map_rx1_to_tx0.66v1_{suffix}.nc')\n",
+    "# JRA025 -> tx0.66v1\n",
+    "suffix = 'nnsm_e333r100_190910'\n",
+    "mapping_suffix['rJRA025']['tx0.66v1'] = suffix\n",
+    "mapping_dict['rJRA025']['tx0.66v1'] = os.path.join(root_dir, 'rJRA025', f'map_JRA025m_to_tx0.66v1_{suffix}.nc')\n",
+    "# JRA025 -> tx2_3v2\n",
+    "suffix = 'nnsm_e250r250_241211'\n",
+    "mapping_suffix['rJRA025']['tx2_3v2'] = suffix\n",
+    "mapping_dict['rJRA025']['tx2_3v2'] = os.path.join(root_dir, 'rJRA025', f'map_jra_to_tx2_3_{suffix}.nc')\n",
+    "# r05 -> tx2_3v2\n",
+    "suffix = 'nnsm_e250r250_230914'\n",
+    "mapping_suffix['r05']['tx2_3v2'] = suffix\n",
+    "mapping_dict['r05']['tx2_3v2'] = os.path.join(root_dir, 'r05', f'map_r05_to_tx2_3_{suffix}.nc')\n",
+    "# JRA025 -> tx2_3v3\n",
+    "suffix = 'nnsm_e100r100_260306'\n",
+    "mapping_suffix['rJRA025']['tx2_3v3'] = suffix\n",
+    "mapping_dict['rJRA025']['tx2_3v3'] = os.path.join(root_dir, 'rJRA025', f'map_JRA025m_to_tx2_3v3_{suffix}.nc')\n",
+    "# r05 -> tx2_3v3\n",
+    "suffix = 'nnsm_e100r100_260306'\n",
+    "mapping_suffix['r05']['tx2_3v3'] = suffix\n",
+    "mapping_dict['r05']['tx2_3v3'] = os.path.join(root_dir, 'r05', f'map_r05_to_tx2_3v3_{suffix}.nc')\n",
+    "\n",
+    "# Names for netcdf output\n",
+    "for rof_grid in rof_grids:\n",
+    "    for ocn_grid in mapping_dict[rof_grid]:\n",
+    "        out_file[rof_grid][ocn_grid] = f'riv_nut.gnews_gnm.{rof_grid}_to_{ocn_grid}_{mapping_suffix[rof_grid][ocn_grid]}.{date_str}.nc'\n",
+    "\n",
+    "out_file[src_grid][dst_grid]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "aa948982-3a9c-415b-9c06-7d2ce4492381",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "out_vars = [\n",
+    "            'din_riv_flux',\n",
+    "            'dip_riv_flux',\n",
+    "            'don_riv_flux',\n",
+    "            'dop_riv_flux',\n",
+    "            'dsi_riv_flux',\n",
+    "            'dfe_riv_flux',\n",
+    "            'dic_riv_flux',\n",
+    "            'alk_riv_flux',\n",
+    "            'doc_riv_flux'\n",
+    "           ]\n",
+    "\n",
+    "ds_roff = xr.open_dataset(runoff_data[src_grid], decode_times=False)[out_vars]\n",
+    "ds_roff"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "9b4dfe62-d01f-4a3b-9234-beba89244ea7",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "mapping_obj = ESMFMappingWrapper(mapping_dict[src_grid][dst_grid])\n",
+    "da_out = mapping_obj.map_var(ds_roff, out_vars[0])\n",
+    "ds = da_out.to_dataset()\n",
+    "for var in out_vars:\n",
+    "    if var in ds:\n",
+    "        continue\n",
+    "    ds[var] = mapping_obj.map_var(ds_roff, var)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "5b206cc3-73ef-4694-baee-c7313c43c234",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Update time attributes\n",
+    "ds['time'].attrs = ds_roff['time'].attrs\n",
+    "ds['time'].attrs['axis'] = 'T'\n",
+    "ds['time'].attrs['cartesian_axis'] = 'T'\n",
+    "ds['time'].encoding['_FillValue'] = None\n",
+    "\n",
+    "# Update global attributes\n",
+    "ds.attrs['author'] = f'Created by {user_full_name}'\n",
+    "ds.attrs['creation_date'] = full_date\n",
+    "ds.attrs['procedure'] = 'Data sets are first interpolated to a runoff grid, then mapped to the ocean with the intention of using the same mapping file as for freshwater runoff.'\n",
+    "ds.attrs['source'] = 'Blend of GlobalNEWS and IMAGE-GNM data: GlobalNEWS: https://marine.rutgers.edu/globalnews/index.htm; IMAGE-GNM: https://easy.dans.knaw.nl/ui/datasets/id/easy-dataset:64145'\n",
+    "for attr in ds_roff.attrs:\n",
+    "    if 'assumption' in attr:\n",
+    "        ds.attrs[attr] = ds_roff.attrs[attr]\n",
+    "ds.attrs['mapping_src_file'] = runoff_data[src_grid]\n",
+    "ds.attrs['mapping_file'] = mapping_dict[src_grid][dst_grid]\n",
+    "ds"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "9ee7f34b-5394-4b62-b4fa-beb6b40fca28",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Prepend 1900-01-01 time level and append 2001-01-01\n",
+    "# Since units are \"days since 0001-01-01\", YYYY-01-01 = 365*(YYYY-1)\n",
+    "ds_pre = ds.isel(time=[0]).assign_coords(time=[365*(1900-1)]) # use a list to maintain time as first dimension\n",
+    "\n",
+    "ds_post = ds.isel(time=[-1]).assign_coords(time=[365*(2001-1)]) # use a list to maintain time as first dimension\n",
+    "\n",
+    "ds_concat = xr.concat([ds_pre, ds, ds_post], \"time\", compat=\"override\", coords=\"minimal\")\n",
+    "ds_concat[\"time\"].attrs = ds[\"time\"].attrs\n",
+    "ds_concat[\"time\"].encoding[\"_FillValue\"] = None\n",
+    "ds_concat"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "0286dc04-9d22-480f-9c58-b191f6457532",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "ds_concat.isel(time=0)['din_riv_flux'].plot(levels=[0, 1e-9, 1e-8, 1e-7, 1e-6, 1e-5, 1e-4])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "f7881c54-1ad1-4ca4-a602-b3a82c5a7a4f",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "ds_concat.to_netcdf(out_file[src_grid][dst_grid], unlimited_dims='time', format=\"NETCDF3_64BIT\")"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python [conda env:analysis]",
+   "language": "python",
+   "name": "conda-env-analysis-py"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.14.4"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/bgc/riv_flux/MappingClass.py b/bgc/riv_flux/MappingClass.py
@@ -0,0 +1,53 @@
+import numpy as np
+from scipy.sparse import csc_matrix
+import xarray as xr
+
+class ESMFMappingWrapper(object):
+    def __init__(self, ESMF_mapping_file_name):
+        self._ds_map = xr.open_dataset(ESMF_mapping_file_name)
+        self._src_grid_size = np.prod(self._ds_map['src_grid_dims'].data)
+        self._src_grid_shape = (self._ds_map['src_grid_dims'].data[1], self._ds_map['src_grid_dims'].data[0])
+        self._dst_grid_size = np.prod(self._ds_map['dst_grid_dims'].data)
+        self._dst_grid_shape = (self._ds_map['dst_grid_dims'].data[1], self._ds_map['dst_grid_dims'].data[0])
+        self._mapping_weights_sparse = csc_matrix((self._ds_map['S'].data,
+                                                  (self._ds_map['row'].data-1, self._ds_map['col'].data-1)),
+                                                  shape=(self._dst_grid_size, self._src_grid_size))
+
+    ##################
+
+    def map_var(self, ds_src, var_name, scale_factor=0.01):
+        da_list = []
+        for time in range(len(ds_src['time'])):
+            native_values = ds_src[var_name].isel(time=time).data.reshape(self._src_grid_size)
+            mapped_values = self._mapping_weights_sparse*(scale_factor*native_values)
+            da_list.append(xr.DataArray(mapped_values.reshape(self._dst_grid_shape), dims=('y', 'x'), name=var_name))
+            if var_name in ['din_riv_flux', 'don_riv_flux']:
+                conv_factor = 28*1e-15*86400*365 # 28 mg / mmol, 1e-15 Tg / mg, 86400 s/d, 365 d/yr
+            elif var_name in ['dic_riv_flux', 'doc_riv_flux']:
+                conv_factor = 12*1e-18*86400*365 # 28 mg / mmol, 1e-18 Pg / mg, 86400 s/d, 365 d/yr
+            elif var_name in ['dip_riv_flux', 'dop_riv_flux', 'dsi_riv_flux']:
+                conv_factor = 1e-15*86400*365 # 1e-15 Tmol / mmol, 86400 s/d, 365 d/yr
+            elif var_name in ['dfe_riv_flux']:
+                conv_factor = 1e-12*86400*365 # 1e-12 Gmol / mmol, 86400 s/d, 365 d/yr
+            elif var_name in ['alk_riv_flux']:
+                continue
+            else:
+                conv_factor = 0
+            self._compute_global_integral(native_values, mapped_values, conv_factor, var_name, print_stat=time in [0, len(ds_src['time'])-1])
+        da = xr.concat(da_list, dim='time').assign_coords({'time': ds_src['time'].data})
+        da.attrs = ds_src[var_name].attrs
+        da.attrs['units'] = 'mmol/m^2/s'
+        da.encoding['_FillValue'] = None
+        return da
+
+    ##################
+
+    def _compute_global_integral(self, native_values, mapped_values, conv_factor, var, print_stat=False):
+        r = 6371220 # radius of earth in m
+        native_area = r*r*self._ds_map['area_a'].data # r^2 since area is rad^2 not m^2
+        mapped_area = r*r*self._ds_map['area_b'].data # r^2 since area is rad^2 not m^2
+        if print_stat:
+            global_sum_native = np.sum(native_values*native_area*(0.01*conv_factor)) # 0.01 nmol/cm^2 -> mmol/m^2
+            global_sum_mapped = np.sum(mapped_values*mapped_area*conv_factor)
+            rel_err = np.abs((global_sum_native-global_sum_mapped)/global_sum_native)
+            print(f'{var} stats: sums are {global_sum_native:.3e} (native) and {global_sum_mapped:.3e} (mapped); rel_err is {rel_err:.3e}')
diff --git a/bgc/riv_flux/README b/bgc/riv_flux/README
@@ -0,0 +1,14 @@
+Generating new river nutrient flux files is a two step process:
+
+1. Create river nutrient flux dataset on the CESM runoff grid
+2. Map that dataset from the runoff grid to the ocean grid
+
+Step [1] is complicated -- it involves computing climatologies from the
+GlobalNEWS dataset and mapping them to the runoff grid. Scripts to do that
+exist in https://github.com/NCAR/cgd-os/tree/master/cesm/forcing/river_nutrients
+but those have not been copied to this repository yet -- we will do that when
+we are ready to bring in a new runoff grid.
+
+Users can complete step [2] with  "Map river fluxes to ocean grid.ipynb".
+All that is required is updating the dictionary that contains pointers to
+the rof -> ocn mapping files.
diff --git a/envs/environment.yml b/envs/environment.yml
@@ -11,3 +11,4 @@ dependencies:
     - jupyter
     - jupyterlab
     - netCDF4
+    - scipy
-Original file line number
+Diff line change
@@ Expand Up / @@ -11,3 +11,4 @@ dependencies: @@
         - jupyter
         - jupyterlab
         - netCDF4
+        - scipy