diff --git a/data/fieldlist_CMIP.jsonc b/data/fieldlist_CMIP.jsonc
index 19417846c..63a894c5b 100644
--- a/data/fieldlist_CMIP.jsonc
+++ b/data/fieldlist_CMIP.jsonc
@@ -392,6 +392,12 @@
"units": "degC",
"ndim": 4
},
+ "sftlf": {
+ "standard_name": "land_area_fraction",
+ "realm": "atmos",
+ "units": "%",
+ "ndim": 2
+ },
// Variables for Convective Transition Diagnostics module:
// ta: 3D temperature, units = K:
"ta": {
diff --git a/diagnostics/WWEs/WWE_diag_tools.py b/diagnostics/WWEs/WWE_diag_tools.py
new file mode 100644
index 000000000..0609de1d0
--- /dev/null
+++ b/diagnostics/WWEs/WWE_diag_tools.py
@@ -0,0 +1,506 @@
+import numpy as np
+import xarray as xr
+import os
+import xesmf as xe
+import matplotlib.pyplot as plt
+from scipy.ndimage import (
+ label,
+ find_objects,
+ sum as ndi_sum,
+ mean as ndi_mean,
+ center_of_mass)
+from scipy import stats
+import scipy
+
+def low_pass_weights(window, cutoff):
+ """Calculate weights for a low pass Lanczos filter.
+ Args:
+ window: int
+ The length of the filter window.
+ cutoff: float
+ The cutoff frequency in inverse time steps.
+ From: https://github.com/liv0505/Lanczos-Filter/blob/master/lanczosbp.py
+ """
+ order = ((window - 1) // 2 ) + 1
+ nwts = 2 * order + 1
+ w = np.zeros([nwts])
+ n = nwts // 2
+ w[n] = 2 * cutoff
+ k = np.arange(1., n)
+ sigma = np.sin(np.pi * k / n) * n / (np.pi * k)
+
+ firstfactor = np.sin(2. * np.pi * cutoff * k) / (np.pi * k)
+ w[n-1:0:-1] = firstfactor * sigma
+ w[n+1:-1] = firstfactor * sigma
+
+ return w[1:-1]
+
+def filter_data(data = None, nweights = 201, a = 5):
+ weights = low_pass_weights(nweights, 1./a)
+
+ weight_array = xr.DataArray(weights, dims = ['window'])
+
+ #apply the filters using the rolling methos with the weights
+ filtered_data = data.rolling(time = len(weights), center = True).construct('window').dot(weight_array)
+
+ return filtered_data
+
+def land_mask_using_etopo(ds = None, topo_latgrid_1D = None, topo_longrid_1D = None,
+ topo_data1D = None, lf_cutoff = 10):
+
+ half_xbin_size = float((ds.lon - np.roll(ds.lon, 1))[1:].mean()/2)
+ half_ybin_size = float((ds.lat - np.roll(ds.lat, 1))[1:].mean()/2)
+
+ first_xval = float((ds.lon[0] - half_xbin_size).values)
+ xbin_edges = np.asarray(ds.lon + half_xbin_size)
+ xbin_edges = np.insert(xbin_edges, 0, first_xval)
+
+ first_yval = float((ds.lat[0] - half_ybin_size).values)
+ ybin_edges = np.asarray(ds.lat + half_ybin_size)
+ ybin_edges = np.insert(ybin_edges, 0, first_yval)
+
+ bin_topo = stats.binned_statistic_2d(topo_latgrid_1D, topo_longrid_1D, topo_data1D,
+ statistic = lambda topo_data1D : np.count_nonzero(topo_data1D >= 0)/topo_data1D.size,
+ bins=[ybin_edges, xbin_edges], expand_binnumbers = True)
+
+ lf_vals = bin_topo.statistic * 100.
+ ls_mask = (lf_vals < lf_cutoff)*1
+
+ return ls_mask
+
+def regridder_model2obs(lon_vals = None, lat_vals = None, in_data = None, type_name = 'bilinear', isperiodic = True):
+ out_grid = xr.Dataset(
+ {
+ "lat": (["lat"], lat_vals),
+ "lon": (["lon"], lon_vals)
+ })
+
+ regridder = xe.Regridder(in_data, out_grid, method = type_name, periodic = isperiodic)
+
+ return regridder
+
+#Used for removing the first n harmonics from the seasonal cycle
+def nharm(x, N):
+ if x.any()==0:
+ print('here')
+ return np.zeros(N)
+ fft_output = scipy.fft.fft(x)
+ freq = scipy.fft.fftfreq(len(x), d = 1)
+ filtered_fft_output = np.array([fft_output[i] if round(np.abs(1/f),2) in\
+ [round(j,2) for j in [N, N/2, N/3]] else 0 for i, f in enumerate(freq)])
+ #,N/2,N/3
+ filtered_sig = scipy.fft.ifft(filtered_fft_output)
+ filtered = filtered_sig.real
+
+ return filtered
+
+"""""
+Explanation of bb_sizes used below:
+followed - https://stackoverflow.com/questions/36200763/objects-sizes-along-dimension
+
+"for object_slice in bb_slices" loops through each set of labeled_wwb slices
+Example:
+for object_slice in bb_slices:
+ print(object_slice)
+
+(slice(110, 112, None), slice(0, 1, None), slice(31, 35, None))
+(slice(111, 114, None), slice(0, 1, None), slice(19, 27, None))
+(slice(127, 130, None), slice(0, 1, None), slice(12, 21, None))
+etc.
+
+-----------------------------------------------------------------------
+"s.stop-s.start for s in object_slice"
+
+Example:
+for s in bb_slices[50]:
+ print(s.stop - s.start)
+
+3, 1, 13
+since the bb_slices[50] = (slice(644, 647, None), slice(0, 1, None), slice(85, 98, None))
+
+-----------------------------------------------------------------------
+Note, for IWW:
+The 0th blob (i.e., blob that has array index of 0) has 1 for a label.
+So, when indexing the labels need to add 1. (i.e., the slices for blob #17,
+which is the first blob that qualifies as a WWB, are
+(slice(295, 302, None), slice(0, 1, None), slice(15, 42, None)). These slices
+correspond to the blob labeled with and 18. print(labeled_blobs[bb_slices[iblob]])
+returns values of 0 and 18 since the slice includes the bounding box of the blob
+
+"""""
+def isolate_WWEs(data = None, tauu_thresh = 0.04, mintime = 5,
+ minlons = 10, xmin = 3, xmax = 3, ymin = 3,
+ ymax = 3, xtend_past_lon = 140):
+
+ lon_array = np.asarray(data["lon"])
+
+ # 1) Create mask for tauu > tauu_thresh (default is 0.04 following Puy et al. (2016))
+ tauu_mask = data.where(data > tauu_thresh, 0) #Assign elements with tauu < tauu_thresh zero
+ tauu_mask = tauu_mask.where(tauu_mask == 0, 1) #Assign elements with tauu > tauu_thresh one
+
+ # 2) Find tauu blobs:
+ #assign each contiguous region of tauu > 0.04 a unique number using the mask generated above
+ labeled_blobs, n_blobs = label(tauu_mask)
+
+ #Find the bounding box of each wwb feature
+ bb_slices = find_objects(labeled_blobs)
+
+ # 3) Find the size of the bounding box for each wwb feature,
+ #Explanation give above
+ bb_sizes = np.array([[s.stop-s.start for s in object_slice]
+ for object_slice in bb_slices])
+
+ # 4) Find where blobs last at least X days and for X° of longitude
+ time_index = np.where(np.asarray(data.dims) == 'time')
+ lon_index = np.where(np.asarray(data.dims) == 'lon')
+
+ w_wwe = np.where((bb_sizes[:, time_index] >= mintime) & (bb_sizes[:, lon_index] >= minlons))
+ n_wwe = np.count_nonzero((bb_sizes[:, time_index] >= mintime) &
+ (bb_sizes[:, lon_index] >= minlons))
+ #w_wwe_array = np.asarray(w_wwe)
+
+ # 5) Make a mask of only the blobs that qualify as WWEs
+ #Create wwe_mask array to be filled
+ wwe_mask = np.zeros_like(labeled_blobs)
+
+ #Loop through blobs that satisfiy intensity, duration, and zonal length criteria
+ for i in range(len(w_wwe[0])):
+ #w_temp = np.where(flat_lab_blobs == w_wwe_array[0][i]+1)
+ w_temp = np.where(labeled_blobs == w_wwe[0][i]+1)
+ wwe_mask[w_temp] = 1
+
+ # 6) Label WWEs
+ labeled_wwes, n_wwes = label(wwe_mask)
+
+ # 7) Loop over all WWEs to see if any of them are close enough < 3 days and < 3° to count as same event
+ wwes_after_merging = find_nearby_wwes_merge(n_wwes = n_wwes, labeled_wwes = labeled_wwes,
+ xmin = xmin, xmax = xmax, ymin = ymin, ymax = ymax)
+
+ # 8)Renumber the labels from 1 to nWWEs:
+ #At this point some of the WWE labels have been eliminated because they were merged,
+ #so need to renumber the labels from 1 to nWWEs
+ new_wwe_labels = renumber_wwes(wwe_labels = wwes_after_merging)
+
+ # 9) Check to see if the WWE extends beyond 140°E
+ # Find the bounding box of each wwe feature
+ bb_slices = find_objects(new_wwe_labels)
+
+ #Find max longitudes for each WWE
+ #The -1 for finding the indices is because the bb_slice gives the slice values and the stop value
+ #is not inclusive in python (e.g., array[3:10] includes elements 3-9 and NOT 10)
+ max_time_lon_ind = np.array([[s.stop for s in object_slice]
+ for object_slice in bb_slices])
+ max_lon_inds = max_time_lon_ind[:, lon_index[0][0]] -1
+ max_lons = lon_array[max_lon_inds]
+
+ #Find min longitudes for each WWE
+ min_time_lon_ind = np.array([[s.start for s in object_slice]
+ for object_slice in bb_slices])
+ min_lon_inds = min_time_lon_ind[:, lon_index[0][0]] -1
+ min_lons = lon_array[min_lon_inds]
+
+ wwe_labels_count = np.unique(new_wwe_labels)[1:]
+ wwe_maxlonsLTX = np.where(max_lons < xtend_past_lon)
+ n_wwe_maxlonsLTX = np.count_nonzero(max_lons < xtend_past_lon)
+
+ # 10) Remove the WWE that isn't long enough
+ if n_wwe_maxlonsLTX > 0:
+ for i2remove in range(n_wwe_maxlonsLTX):
+ wwe2remove = wwe_labels_count[wwe_maxlonsLTX[0][i2remove]]
+
+ wwe_labels_afterRemoveX = np.where(new_wwe_labels == wwe2remove, 0, new_wwe_labels)
+
+ min_lons = np.delete(min_lons, wwe_maxlonsLTX)
+ max_lons = np.delete(max_lons, wwe_maxlonsLTX)
+
+ # 11) Relabel WWE again becasue now more WWEs have potentially been removed
+ wwe_labels_final = renumber_wwes(wwe_labels = wwe_labels_afterRemoveX)
+
+ # 12) Final mask array
+ wwe_mask_final = np.where(wwe_labels_final !=0, 1, 0)
+
+ else:
+ wwe_labels_final = new_wwe_labels
+ wwe_mask_final = wwe_mask
+
+ return wwe_labels_final, wwe_mask_final
+
+def find_nearby_wwes_merge(n_wwes = 0, labeled_wwes = None, xmin = 3, xmax = 3,
+ ymin = 3, ymax = 3):
+
+ for i in range(1, n_wwes):
+ wblob = np.where(labeled_wwes == i)
+ npts = np.asarray(wblob).shape[1]
+
+ #Loop over each point in WWE and check surrounding 3 days and 3° for another labeled WWE
+ for ipt in range(npts):
+ if len(wblob) == 3:
+ check_wwe_label = labeled_wwes[wblob[0][ipt] - xmin : wblob[0][ipt] + xmax + 1,
+ 0,
+ wblob[2][ipt] - ymin : wblob[2][ipt] + ymax + 1]
+ if len(wblob) == 2:
+ check_wwe_label = labeled_wwes[wblob[0][ipt] - xmin : wblob[0][ipt] + xmax + 1,
+ wblob[1][ipt] - ymin : wblob[1][ipt] + ymax + 1]
+
+ n_diff_label = np.count_nonzero((check_wwe_label != i) & (check_wwe_label != 0))
+
+ #Replace nearby WWE label with earlier in time and/or space WWE, so the later WWE becomes
+ #a part of the smaller numbered WWE
+ if n_diff_label > 0:
+ w_diff_label = np.where((check_wwe_label != i) & (check_wwe_label != 0))
+ unique_overlapping_wwes = np.unique(check_wwe_label[w_diff_label])
+
+ for ioverlapwwe in range(unique_overlapping_wwes.size):
+ w_ioverlapwwe = np.where(labeled_wwes == unique_overlapping_wwes[ioverlapwwe])
+ labeled_wwes[w_ioverlapwwe] = i
+
+ return labeled_wwes
+
+def renumber_wwes(wwe_labels = None):
+ uniq_labels0 = np.unique(wwe_labels)
+
+ #Remove the 0 label as it's not a WWE
+ uniq_labels = uniq_labels0[~(uniq_labels0 == 0)]
+ new_wwe_labels = np.zeros_like(wwe_labels)
+
+ #Re-label wwes 0-nWWEs, so that find_object works correctly
+ jj = 1
+
+ for iwwe in range(len(uniq_labels)):
+ w_wwe_label = np.where(wwe_labels == uniq_labels[iwwe])
+ new_wwe_labels[w_wwe_label] = jj
+ jj += 1
+
+ return new_wwe_labels
+
+def WWE_characteristics(wwe_labels = None, data = None):
+
+ #Find the bounding box of each WEE feature
+ bb_slices = find_objects(wwe_labels)
+
+ #Find the size of the bounding box for each wwb feature,
+ #Explanation give above
+ object_sizes = np.array([[s.stop-s.start for s in object_slice]
+ for object_slice in bb_slices])
+
+ #Find the duration, length, and integrated wind work (IWW) of each WWE
+ time_index = np.where(np.asarray(data.dims) == 'time')
+ lon_index = np.where(np.asarray(data.dims) == 'lon')
+
+ duration = object_sizes[:, time_index[0][0]]
+ zonal_extent = object_sizes[:, lon_index[0][0]]
+
+ array_label_values = np.arange(len(bb_slices))+1
+ wwe_sum = ndi_sum(np.asarray(data), wwe_labels, array_label_values)
+ wwe_mean = ndi_mean(np.asarray(data), wwe_labels, array_label_values)
+
+ return duration, zonal_extent, wwe_sum, wwe_mean
+
+def find_WWE_time_lon(data = None, wwe_labels = None, lon = None, time_array = None):
+
+ bb_slices = find_objects(wwe_labels)
+
+ time_index = np.where(np.asarray(data.dims) == 'time')
+ lon_index = np.where(np.asarray(data.dims) == 'lon')
+
+ uniq_labels = np.unique(wwe_labels)[1:]
+ time_lon_center = center_of_mass(np.asarray(data), wwe_labels, uniq_labels)
+
+ #Use zip to extract the first & second element of
+ #each tuple in the time_lon_center list
+ cent_time_ind = list(zip(*time_lon_center))[int(time_index[0])]
+ cent_lon_ind = np.asarray(list(zip(*time_lon_center))[int(lon_index[0])])
+
+ #3/15/2023: round time to nearest day. I doubt the SSH data will
+ #be finer than a day, so rounding to a day seems sufficient.
+ cent_time_ind = np.round(cent_time_ind).astype("int")
+
+ lower_lon_val = lon[np.floor(cent_lon_ind).astype("int")]
+ upper_lon_val = lon[np.ceil(cent_lon_ind).astype("int")]
+
+ #Make sure longitude delta is only 1degree
+ delta_lon = upper_lon_val - lower_lon_val
+ if np.count_nonzero(delta_lon != 1) > 0:
+ print("Longitude delta GT 1degree")
+
+ #Interpolate the longitude values using interpolation equation
+ # y = y1 + (y2 - y1) * [(x - x1)/(x2 - x1)]
+ # y(s) in this case are the lon values and x(s) are the lon indicies
+ # since the longitudes and indicies increment by one the euqtion
+ # reduces to y = y1 + (x - x1)
+ center_lon_vals = lower_lon_val + (cent_lon_ind - np.floor(cent_lon_ind))
+ center_time_vals= time_array[cent_time_ind]
+
+ #Added 4/25/2024
+ #Find max time for each WWE'
+ #The -1 for finding the indices is because the bb_slice gives the slice values and the stop value
+ #is not inclusive in python (e.g., array[3:10] includes elements 3-9 and NOT 10)
+ max_time_lon_ind = np.array([[s.stop for s in object_slice]
+ for object_slice in bb_slices])
+ max_time_inds = max_time_lon_ind[:, time_index[0][0]] - 1
+ max_times = time_array[max_time_inds]
+
+ #Find min time for each WWE
+ min_time_lon_ind = np.array([[s.start for s in object_slice]
+ for object_slice in bb_slices])
+ min_time_inds = min_time_lon_ind[:, time_index[0][0]]
+ min_times = time_array[min_time_inds]
+
+ #Find max longitudes for each WWE
+ max_time_lon_ind = np.array([[s.stop for s in object_slice]
+ for object_slice in bb_slices])
+ max_lon_inds = max_time_lon_ind[:, lon_index[0][0]] - 1
+ max_lons = lon[max_lon_inds]
+
+ #Find min longitudes for each WWE
+ min_time_lon_ind = np.array([[s.start for s in object_slice]
+ for object_slice in bb_slices])
+ min_lon_inds = min_time_lon_ind[:, lon_index[0][0]]
+ min_lons = lon[min_lon_inds]
+
+ return center_lon_vals, center_time_vals, min_times, max_times, min_lons, max_lons
+
+
+def events_per_lon(in_da = None):
+
+ nevents = np.unique(in_da)[1:].size #Don't count zero
+ event_nums = np.unique(in_da)[1:]
+
+ event_lon_mask = np.zeros(nevents*in_da.lon.size).reshape(nevents, in_da.lon.size)
+ var_shape_list = in_da.shape
+
+ for ievent in range(nevents):
+ lon_axis = var_shape_list.index(in_da.lon.shape[0])
+ temp_lons = np.zeros(in_da.lon.size)
+ w = np.where(in_da == event_nums[ievent])
+ uniq_wlons= np.unique(w[lon_axis])
+ temp_lons[uniq_wlons] = 1
+ event_lon_mask[ievent] = temp_lons
+
+ count_event_lons = np.sum(event_lon_mask, axis = 0)
+
+ return count_event_lons, nevents
+
+#####################################################################
+###PLOTTING CODE
+#####################################################################
+def plot_model_Hovmollers_by_year(data = None, wwe_mask = None, lon_vals = None,
+ tauu_time = None, savename = '',
+ start_date = '', end_date = ''):
+
+ year_array = np.unique(tauu_time.dt.year)
+ nyears = np.unique(tauu_time.dt.year).size
+
+ fig, ax = plt.subplots(ncols=5, nrows=4, figsize = (15, 16), sharex = True, sharey = True)
+ axlist = ax.flatten()
+ shade_choice = 'bwr'
+ levs = np.linspace(-0.1, 0.1, 21)
+
+ kwargs = {'fontsize':12}
+ ####################################################################################
+ #Loop through each year to make a Hovmoller panel of filtered zonal wind stress
+ #for each year overlapped with WWE blobs
+ ####################################################################################
+ for iyear in range(20):
+ wiyear = np.where((np.asarray(tauu_time.dt.year) == year_array[iyear]))
+
+ ########################################################################
+ #Plot details
+ ########################################################################=
+ cf = axlist[iyear].contourf(np.asarray(lon_vals), np.arange(0, tauu_time[wiyear[0]].size),
+ np.asarray(data[wiyear[0], :]), levels = levs,
+ cmap = shade_choice, extend = 'both')
+
+ cl = axlist[iyear].contour(np.asarray(lon_vals), np.arange(0, tauu_time[wiyear[0]].size),
+ wwe_mask[wiyear[0], :], cmap = 'binary', linewidths = 1)
+
+ axlist[iyear].grid(alpha = 0.5)
+
+ if iyear >=15 :axlist[iyear].set_xlabel('longitude', **kwargs)
+ if iyear%5 == 0: axlist[iyear].set_ylabel('day of year', **kwargs)
+ axlist[iyear].set_title(str(year_array[iyear]), fontsize=12, loc = 'left')
+ axlist[iyear].tick_params(axis='y', labelsize=12)
+ axlist[iyear].tick_params(axis='x', labelsize=12)
+ plt.subplots_adjust(bottom=0.1, right=0.8, top=0.9)
+
+ cbar_ax = fig.add_axes([0.81, 0.35, 0.015, 0.3])
+ cbar_ax.tick_params(labelsize=12)
+ cb = plt.colorbar(cf, cax=cbar_ax)
+ cb.set_label(label = '$\u03C4_x$ (N $m^{-2}$)', fontsize = 12)
+
+ plt.savefig(savename + '.' + start_date + '.YearlyHovmollers.png', bbox_inches='tight')
+
+ if year_array.size > 20:
+ fig, ax = plt.subplots(ncols=5, nrows=4, figsize = (15, 16), sharex = True, sharey = True)
+ axlist = ax.flatten()
+
+ for iyear in range(year_array.size - 20):
+ wiyear = np.where((np.asarray(tauu_time.dt.year) == year_array[iyear + 20]))
+
+ ####################################################################
+ #Plot details
+ ########################################################################
+ cf = axlist[iyear].contourf(np.asarray(lon_vals), np.arange(0, tauu_time[wiyear[0]].size),
+ np.asarray(data[wiyear[0], :]), levels = levs,
+ cmap = shade_choice, extend = 'both')
+
+ cl = axlist[iyear].contour(np.asarray(lon_vals), np.arange(0, tauu_time[wiyear[0]].size),
+ wwe_mask[wiyear[0], :], cmap = 'binary', linewidths = 1)
+
+ axlist[iyear].grid(alpha = 0.5)
+
+ if iyear >=15 :axlist[iyear].set_xlabel('longitude', **kwargs)
+ if iyear%5 == 0: axlist[iyear].set_ylabel('day of year', **kwargs)
+ axlist[iyear].set_title(str(year_array[iyear + 20]), fontsize=12, loc = 'left')
+ axlist[iyear].tick_params(axis='y', labelsize=12)
+ axlist[iyear].tick_params(axis='x', labelsize=12)
+ plt.subplots_adjust(bottom=0.1, right=0.8, top=0.9)
+
+ cbar_ax = fig.add_axes([0.81, 0.35, 0.015, 0.3])
+ cbar_ax.tick_params(labelsize=12)
+ cb = plt.colorbar(cf, cax=cbar_ax)
+ cb.set_label(label = '$\u03C4_x$ (N $m^{-2}$)', fontsize = 12)
+
+ #start2ndpage = str(int(first_year) + 20)
+ plt.savefig(savename + '.' + end_date + '.YearlyHovmollers.png', bbox_inches='tight')
+
+ return cf
+
+def plot_WWE_likelihood_per_lon(lons = None, model_prop_per_day = None,
+ obs_prop_per_day = None, savepath = '',
+ model_name = ''):
+
+ #Set the plot
+ fig, ax = plt.subplots(figsize=(6, 4))
+
+ #Model simulation
+ cf = ax.plot(lons, model_prop_per_day)
+ cf = ax.fill_between(lons, model_prop_per_day*0, model_prop_per_day, alpha=0.9, label = model_name)
+
+ #Observations
+ cf2 = ax.plot(lons, obs_prop_per_day, color = 'gray')
+ cf2 = ax.fill_between(lons, obs_prop_per_day*0, obs_prop_per_day, color = 'gray', alpha = 0.7, label = 'TropFlux observations')
+
+ #Format information
+ ax.legend(fontsize = 12)
+ ax.set_title(model_name, fontsize = 14)
+ ax.set_xlabel('longitude', fontsize = 14)
+ ax.set_ylabel('Probability per day (%)', fontsize = 14)
+ ax.tick_params(axis='y', labelsize=14)
+ ax.tick_params(axis='x', labelsize=14)
+ ax.set_ylim(0, 1.8)
+ ax.grid(alpha = 0.5)
+
+ #Add second axis
+ ax2 = ax.twinx()
+ ytick_vals = ax.get_yticks()
+ ytick_vals[-1] = 1.8
+ label = np.around(100/ytick_vals, decimals = 1)
+ label = ['inf', '200.0', '100.0', '66.7', '']
+ ax2.set_yticks(ytick_vals)
+ ax2.set_yticklabels(label, fontsize = 14)
+ ax2.set_xlabel('Longitude', fontsize = 14)
+ ax2.set_ylabel('Rate of return (days)', fontsize = 14)
+ plt.savefig(savepath + model_name + "_and_TropFlux_WWE_prob_per_day.png", bbox_inches='tight')
+
+ return cf
diff --git a/diagnostics/WWEs/WWEs.html b/diagnostics/WWEs/WWEs.html
new file mode 100644
index 000000000..0e2379e2d
--- /dev/null
+++ b/diagnostics/WWEs/WWEs.html
@@ -0,0 +1,78 @@
+
+
+
Westerly wind events
+
+Westerly wind events
+
+Westerly wind events (WWEs) are anomalously strong, long lasting westerlies
+that predominantly occur over the west Pacific Ocean. They are capable
+of exciting oceanic Kelvin waves, which in turn warm the ocean surface
+and deepen the thermocline as the wave propagates eastward. These
+WWE-Kelvin wave interactions can therefore play an important role in
+ENSO variability and it is important to understand the fidelity of
+WWEs and Kelvin waves in earth system models relative to observations.
+
+
+This POD identifies WWEs and their characteristics across the Pacific
+Ocean (120E-280E) using daily equatorially averaged (2.5S - 2.5N)
+120-day highpass filtered zonal wind stress from TropFlux observation
+and earth system models (ESMs). The WWE characteristics include
+central longitude and date, zonal extent, duration, and integrated
+wind work (IWW), which is the sum of all zonal wind stress within the
+WWE time-longitude patch. The IWW indicates the WWE's total forcing to
+the ocean. The methods for WWE identification are fully discussed in
+Riley Dellaripa et al. (2024).
+
+The results of the WWE identification are plotted below in
+time-longitude space (i.e., Hovmollers). The shading is the anomalous
+zonal wind stress and the black outlines indicate the WWEs. Each
+Hovmoller represents one year in the obserations or ESM.
+
+Also plotted is a 1D histogram of the likelihood of each 1 longitude
+bin across the Pacific experiencing a WWE per day. The likelihood is
+calculated as the total number of unique events at each 1 longitude
+divided by the total number of days in each data set. The reciprocal
+of the frequncy indicates the average return rate of a WWE per
+longitude or the average number of days between WWEs at each
+longitude.
+
+A complementary POD assesses the fidelity of oceanic
+Kelvin waves in ESMs.
+
+
+Reference:
+Riley Dellaripa, E. M., C. A. DeMott, J. Cui, and E. D. Maloney, 2024:
+Evaluation of Equatorial Westerly Wind Events in the Pacific Ocean in
+CMIP6 Models. J. Climate, https://doi.org/10.1175/JCLI-D-23-0629.1
+
+{{CASENAME}}
+ Hovmollers of zonal wind stress and WWEs:
+
+
+
+
+
+ |
+WWE probability per longitude in TropFlux and {{CASENAME}}
+ | plot
+ |
diff --git a/diagnostics/WWEs/WWEs.py b/diagnostics/WWEs/WWEs.py
new file mode 100644
index 000000000..8e7d85c95
--- /dev/null
+++ b/diagnostics/WWEs/WWEs.py
@@ -0,0 +1,433 @@
+import matplotlib
+matplotlib.use("Agg") # non-X windows backend
+
+import matplotlib.pyplot as plt
+import numpy as np
+import xarray as xr
+import os
+import time
+import xesmf as xe
+import scipy
+from scipy import stats
+from functools import partial
+import intake
+import sys
+import yaml
+
+from WWE_diag_tools import (
+ land_mask_using_etopo,
+ regridder_model2obs,
+ nharm,
+ filter_data,
+ isolate_WWEs,
+ WWE_characteristics,
+ find_WWE_time_lon,
+ plot_model_Hovmollers_by_year,
+ events_per_lon,
+ plot_WWE_likelihood_per_lon)
+
+####################################################################################
+#Define some paths and functions
+####################################################################################
+def find_WWEs_and_characteristics(in_data = None, tauu_thresh = 0.04, mintime = 5, minlons = 10,
+ xminmax = (3, 3), yminmax = (3, 3), minmax_dur_bins = (5, 27),
+ dur_bin_space = 2, minmax_IWW_bins = (1, 42), IWW_bin_space = 4,
+ xtend_past_lon = 140):
+ '''
+ This function call the following functions within WWE_diag_tools.py
+ - isolate_WWEs
+ - find_nearby_wwes_merge
+ - renumber_wwes
+ - WWE_chracteristics
+ - find_WWE_time_lon
+ '''
+
+ start_time = time.time()
+
+ # 1) Find WWEs
+ #The isolate_WWEs function uses the find_nearby_wwes_merge, renumber_wwes functions
+ WWE_labels, WWE_mask = isolate_WWEs(data = in_data, tauu_thresh = tauu_thresh, mintime = mintime,
+ minlons = minlons, xmin = xminmax[0], xmax = xminmax[1],
+ ymin = yminmax[0], ymax = yminmax[1], xtend_past_lon = xtend_past_lon)
+
+ # 2) Find characteristics (i.e., duration, zonal extent, integrated wind work sum and mean) of each WWE
+ #Uses WWE_characteristics function
+ duration, zonal_extent, IWW, tauu_mean = WWE_characteristics(wwe_labels = WWE_labels, data = in_data)
+
+ # 3) Find central, min, and max time and longitude of each WWE
+ #Uses find_WWE_time_lon function
+ tauu_time = in_data["time"]
+ tauu_lon = in_data["lon"]
+ lon_array = np.asarray(tauu_lon)
+
+ center_lons, center_times, min_times, max_times, min_lons, max_lons \
+ = find_WWE_time_lon(data = in_data, wwe_labels = WWE_labels,
+ lon = lon_array, time_array = tauu_time)
+
+ print("--- %s seconds to ID WWEs and compute characteristics---" % (time.time() - start_time))
+
+ return duration, IWW, zonal_extent, tauu_mean, WWE_labels, WWE_mask, center_lons, \
+ center_times, min_times, max_times, min_lons, max_lons, \
+ lon_array, tauu_time
+
+
+def save_filtered_tauu_WWEchar(WWE_labels = None, WWE_mask = None, tauu_anom_vals = None, duration = None, IWW = None,
+ zonal_extent = None,tauu_anom_mean = None, tauu_abs_mean = None,
+ center_lon_vals = None, center_time_vals = None, min_lon_vals = None,
+ max_lon_vals = None, min_time_vals = None, max_time_vals = None,
+ lon_array = None, tauu_time = None, save_name = '',
+ filt_descrip = '120-day HP filtered'):
+
+ uniq_WWE_labels = np.unique(WWE_labels)[1:]
+ #reference_time = pd.Timestamp('1970-01-01T00:00:00Z')
+
+ data_vars = dict(
+ wwe_labels =(['time', 'lon'], WWE_labels.squeeze(), dict(units='None', long_name='Unique label for each WWE')),
+ wwe_mask =(['time', 'lon'], WWE_mask.squeeze(), dict(units='Binary', long_name='1s are where WWEs are located, 0s are locations without WWEs')),
+ tauu_anom =(['time', 'lon'], tauu_anom_vals.squeeze(), dict(units='Pa', long_name = 'Mean ' + filt_descrip + ' zonal wind stress')),
+ tauu_anom_mean=(['events'], tauu_anom_mean, dict(units='Pa', long_name = 'Mean ' + filt_descrip + ' zonal wind stress per WWE')),
+ tauu_abs_mean =(['events'], tauu_abs_mean, dict(units='Pa', long_name = 'Mean absolute zonal wind stress per WWE')),
+ duration =(['events'], duration, dict(units='Days', long_name = 'duration of each WWE')),
+ IWW_vals =(['events'], IWW, dict(units='Pa', long_name='Integrated wind work for each WWE')),
+ zonal_extent =(['events'], zonal_extent, dict(units='Degrees', long_name = 'Longitudinal extent of each WWE')),
+ center_lons =(['events'], center_lon_vals, dict(units='Degrees', long_name = 'Mass-weighted center longitude for each WWE')),
+ min_lons =(['events'], min_lon_vals, dict(units='Degrees', long_name = 'Min longitude for each WWE')),
+ max_lons =(['events'], max_lon_vals, dict(units='Degrees', long_name = 'Max longitude for each WWE')),
+ min_times =(['events'], min_time_vals),
+ max_times =(['events'], max_time_vals),
+ center_times =(['events'], center_time_vals)
+ )
+
+ ds = xr.Dataset(data_vars = data_vars,
+ coords=dict(
+ events= (["events"], uniq_WWE_labels),
+ lon = lon_array,
+ time = tauu_time,
+ ),
+ attrs=dict(description= filt_descrip + " zonal wind stress and WWE characteristics. Generated using MDTF POD WWE diagnostic")
+ )
+
+ ds.to_netcdf(save_name + '.WWE_characteristics.nc')
+
+ return ds
+
+def _preprocess(x, lon_bnds, lat_bnds):
+ return x.sel(lon=slice(*lon_bnds), lat=slice(*lat_bnds))
+
+
+print("\n=======================================")
+print("BEGIN WWEs.py ")
+print("=======================================")
+
+print("*** Parse MDTF-set environment variables ...")
+work_dir = os.environ["WORK_DIR"]
+obs_dir = os.environ["OBS_DATA"]
+casename = os.environ["CASENAME"]
+start_date = os.environ["startdate"]
+end_date = os.environ["enddate"]
+first_year = os.environ["first_yr"]
+last_year = os.environ["last_yr"]
+static_thresh= os.environ['do_static_threshold']
+min_lat = float(os.environ["min_lat"])
+max_lat = float(os.environ["max_lat"])
+min_lon = float(os.environ["min_lon"])
+max_lon = float(os.environ["max_lon"])
+regrid_method= os.environ["regrid_method"]
+
+#Define lats to average tauu over and lon range to analyze
+lat_lim_list = [min_lat, max_lat]
+lon_lim_list = [min_lon, max_lon]
+
+###########################################################################
+########### Part 1 ########################################################
+########### Get TropFlux observations needed for regridding and plotting
+###########################################################################
+print(f'*** Now working on obs data\n------------------------------')
+obs_file_WWEs = obs_dir + '/TropFlux_120-dayHPfiltered_tauu_1980-2014.nc'
+
+print(f'*** Reading obs data from {obs_file_WWEs}')
+obs_WWEs = xr.open_dataset(obs_file_WWEs)
+print(obs_WWEs)
+
+# Subset the data for the user defined first and last years #
+obs_WWEs = obs_WWEs.sel(time=slice(first_year, last_year))
+
+obs_lons = obs_WWEs.lon
+obs_lats = obs_WWEs.lat
+obs_time = obs_WWEs.time
+Pac_lons = obs_WWEs.Pac_lon
+obs_WWE_mask = obs_WWEs.WWE_mask
+TropFlux_filt_tauu = obs_WWEs.filtered_tauu
+TropFlux_WWEsperlon = obs_WWEs.WWEs_per_lon
+
+###################################################################################
+######### PART 2 ##################################################################
+######### Prepare Model output for WWE ID code#####################################
+###################################################################################
+print(f'*** Now starting work on {casename}\n------------------------------')
+print('*** Reading variables ...')
+
+#These variables come from the case_env_file that the framework creates
+#the case_env_file points to the csv file, which in turn points to the data files.
+#Variables from the data files are then read in. See example_multicase.py
+# Read the input model data
+
+case_env_file = os.environ["case_env_file"]
+assert os.path.isfile(case_env_file), f"case environment file not found"
+with open(case_env_file, 'r') as stream:
+ try:
+ case_info = yaml.safe_load(stream)
+ except yaml.YAMLError as exc:
+ print(exc)
+
+cat_def_file = case_info['CATALOG_FILE']
+case_list = case_info['CASE_LIST']
+
+#Use partial function to only load part of the data file
+lon_bnds, lat_bnds = (0, 360), (-32.5, 32.5)
+partial_func = partial(_preprocess, lon_bnds=lon_bnds, lat_bnds=lat_bnds)
+
+# open the csv file using information provided by the catalog definition file
+cat = intake.open_esm_datastore(cat_def_file)
+
+# all cases share variable names and dimension coords in this example, so just get first result for each
+tauu_var = [case['tauu_var'] for case in case_list.values()][0]
+time_coord = [case['time_coord'] for case in case_list.values()][0]
+lat_coord = [case['lat_coord'] for case in case_list.values()][0]
+lon_coord = [case['lon_coord'] for case in case_list.values()][0]
+
+############################################################################
+#Filter catalog by desired variable and output frequency
+############################################################################
+#Get tauu (zonal wind stress) variable
+tauu_subset = cat.search(variable_id=tauu_var, frequency="day")
+
+# convert tauu_subset catalog to an xarray dataset dict
+tauu_dict = tauu_subset.to_dataset_dict(preprocess = partial_func,
+ xarray_open_kwargs={"decode_times": True, "use_cftime": True})
+
+for k, v in tauu_dict.items():
+ tauu_arr = tauu_dict[k][tauu_var]
+
+##################################################################
+#Get sftlf (land fraction) variable if it exists & mask out land
+##################################################################
+key = 'sftlf_var'
+x = list(case_list[casename].keys())
+
+if(x.count(key) == 1):
+ print("Using model land fraction variable to mask out land")
+ sftlf_var = [case['sftlf_var'] for case in case_list.values()][0]
+ sftlf_subset = cat.search(variable_id=sftlf_var, frequency="fx")
+ # convert sftlf_subset catalog to an xarray dataset dict
+ sftlf_dict = sftlf_subset.to_dataset_dict(preprocess = partial_func)
+
+ for k, v in sftlf_dict.items():
+ sftlf_arr = sftlf_dict[k][sftlf_var]
+
+ #mask out land in tauu
+ masked_tauu = tauu_arr.where(sftlf_arr < 10)
+
+if(x.count(key) == 0):
+ print("Need to use etopo.nc file to mask out the land")
+ print('Program will exit for now, as need to build in more code')
+ #ls_mask = land_mask_using_etopo(ds = model_ds, topo_latgrid_1D = topo_latgrid_1D,
+ # topo_longrid_1D = topo_longrid_1D,
+ # topo_data1D = topo_data1D, lf_cutoff = 10)
+ #masked_tauu = model_ds[tauu_name].where(ls_mask == 1)
+ sys.exit()
+
+if(x.count(key) > 1):
+ print('Error: Multiple land fraction (sftlf) files found. There should only be one!')
+ print('Program will exit')
+ sys.exit()
+
+##################################################
+#Convert masked_tauu dataaray back to dataset
+tauu_ds = masked_tauu.to_dataset()
+
+##################################################
+#Only keep data during desired time range
+tauu_ds = tauu_ds.where((tauu_ds.time.dt.year >= int(first_year)) &
+ (tauu_ds.time.dt.year <= int(last_year)), drop = True)
+
+##################################################
+#Create a mask variable for the tauu dataset
+tauu_ds["mask"] = xr.where(~np.isnan(tauu_ds[tauu_var].isel(time = 0)), 1, 0)
+
+##################################################
+#Regrid tauu to the TropFlux obs grid
+##################################################
+print('lon size before regridding:', tauu_ds.lon.size)
+print('Start regrid code using the following method:', regrid_method)
+
+if tauu_ds.lat.size > 1:
+ print('tauu_ds.lat.size > 1')
+ regridder_tauu = regridder_model2obs(lon_vals = np.asarray(obs_lons), lat_vals = np.asarray(obs_lats),
+ in_data = tauu_ds, type_name = regrid_method,
+ isperiodic = True)
+ re_model_tauu = regridder_tauu(tauu_ds[tauu_var], skipna = True)
+
+print('lon size after regridding:', re_model_tauu.lon.size)
+
+##################################################
+#Find region of interest
+#At this point, re_model_tauu is a DataArray
+##################################################
+tauu_region = ((re_model_tauu).where(
+ (re_model_tauu.lat >= np.array(lat_lim_list).min()) &
+ (re_model_tauu.lat <= np.array(lat_lim_list).max()) &
+ (re_model_tauu.lon >= np.array(lon_lim_list).min()) &
+ (re_model_tauu.lon <= np.array(lon_lim_list).max()),
+ drop = True))
+
+print('tauu_region:', tauu_region)
+
+##################################################
+#Average over the latitudes
+##################################################
+#The xarray mean function ignores the nans
+tauu_region_latavg = tauu_region.mean(dim = 'lat')
+
+###################################################################################
+#Check to see if westerly zonal wind stresses are recorded as positive or negative
+###################################################################################
+mean_lon220p5 = np.array(np.mean(tauu_region_latavg.sel(lon = 220.5)))
+print('mean tauu at 220.5E:', mean_lon220p5)
+factor = -1 if mean_lon220p5 > 0 else 1
+tauu = tauu_region_latavg * factor
+
+#Control the chunk size because the chunk size when computing data2use below goes to (1, 1)
+#using the latest MDTF python 3.12, which then takes forever to run
+tauu = tauu.chunk({"time": tauu["time"].size})
+
+print('tauu after lat averaging:', tauu)
+print('At this point, tauu is a DataArray with time longitude dimensions on the TropFlux grid')
+
+###################################################################################
+#Filter tauu to use as input to find WWEs and their chracteristics
+###################################################################################
+#filt_dataLP = filter_data(data = tauu, nweights = 201, a = 5)
+#For now the only option is to apply a 120-day highpass filter
+filt_dataHP = filter_data(data = tauu, nweights = 201, a = 120)
+
+#As above, control the chunk size
+filt_dataHP = filt_dataHP.chunk({"time": tauu["time"].size})
+
+data2use = tauu - filt_dataHP
+obs_tauu_thresh = 0.04 #Nm-2 Two standard deviations of the TropFlux lat-averaged 120E-280E zonal wind stress.
+tauu_thresh2use = obs_tauu_thresh if static_thresh is True else np.round(data2use.std()*2, decimals = 2)
+
+print('tauu_thresh2use:', tauu_thresh2use)
+print('data2use', data2use)
+
+###################################################################################
+######### PART 3 ##################################################################
+######### Find & Save WWEs and their characteristics & statistics #################
+###################################################################################
+duration, IWW, zonal_extent, tauu_mean, WWE_labels, WWE_mask, center_lons, \
+center_times, min_times, max_times, min_lons, max_lons, lon_array, tauu_time = \
+find_WWEs_and_characteristics(in_data = data2use, tauu_thresh = tauu_thresh2use, mintime = 5, minlons = 10,
+ xminmax = (3, 3), yminmax = (3, 3), minmax_dur_bins = (5, 27),
+ dur_bin_space = 2, minmax_IWW_bins = (1, 42), IWW_bin_space = 4,
+ xtend_past_lon = 140)
+
+durationB, zonal_extentB, tauu_sum, tauu_abs_mean = WWE_characteristics(wwe_labels = WWE_labels, data = tauu)
+
+
+print('nWWEs:', duration.size)
+print('')
+
+##################################################################################
+# Save the WWE characteristics and statistics to a netcdf file
+##################################################################################
+print('Saving the WWE characteristics to a netcdf file')
+
+save_name = f"{work_dir}/model/netCDF/{casename}.{first_year}-{last_year}"
+
+WWE_chars = save_filtered_tauu_WWEchar(WWE_labels = WWE_labels, WWE_mask = WWE_mask, tauu_anom_vals = np.asarray(data2use),
+ duration = duration, IWW = IWW, zonal_extent = zonal_extent,
+ tauu_anom_mean = tauu_mean, tauu_abs_mean = tauu_abs_mean,
+ center_lon_vals = center_lons, center_time_vals = np.asarray(center_times),
+ min_lon_vals = min_lons, max_lon_vals = max_lons,
+ min_time_vals = np.asarray(min_times), max_time_vals = np.asarray(max_times),
+ lon_array = lon_array, tauu_time = np.asarray(tauu_time), save_name = save_name)
+
+###################################################################################
+######### PART 4 ##################################################################
+######### Plot Hovmollers and histograms for observations and model ###############
+###################################################################################
+#Plot the yearly Hovmollers for observations
+plot_model_Hovmollers_by_year(data = TropFlux_filt_tauu, wwe_mask = obs_WWE_mask,
+ lon_vals = Pac_lons, tauu_time = obs_time,
+ savename = f"{work_dir}/obs/PS/TropFlux",
+ start_date = '1980-1999', end_date = '2000-2014')
+
+###########################################################################
+# Plot Homollers for Model
+###########################################################################
+plot_model_Hovmollers_by_year(data = data2use, wwe_mask = WWE_mask,
+ lon_vals = lon_array, tauu_time = tauu_time,
+ savename = f"{work_dir}/model/PS/{casename}",
+ start_date = start_date, end_date = end_date)
+
+
+###########################################################################
+# Plot the likelihood of a WWE affecting a given 1degree longitude
+###########################################################################
+#Convert WWE_lavels from a numpy array to a DataArray
+WWE_labels_da = xr.DataArray(data=WWE_labels, dims = ['time', 'lon'],
+ coords=dict(
+ lon=(['lon'], lon_array),
+ time=tauu_time,),
+ attrs=dict(description="WWE labels", units = 'N/A',)
+ )
+
+
+#Call function events_per_lon in WWE_diag_tools.py, which calculates the
+#number of unique WWEs affecting a given 1degree longitude bin (i.e., count_all_event_lons)
+count_all_event_lons, nall_events = events_per_lon(in_da = WWE_labels_da)
+
+
+#Convert count_all_even_lons into a probability per day
+obs_prop_per_day = TropFlux_WWEsperlon/obs_time.size*100.
+model_prop_per_day = count_all_event_lons/tauu_time.size*100.
+
+#Save the count and probability of a unique WWE per 1degree longitude bin
+data_vars = dict(
+ model_WWEs_per_lon =(['lon'], count_all_event_lons, dict(units='count', long_name='Number of unique WWEs affecting a 1degree lon bin from the model')),
+ model_freq_WWEs_per_lon =(['lon'], model_prop_per_day, dict(units='fractuion', long_name='The fraction of unique WWEs affecting a 1degree lon bin from the model calculated as count/ndays ')),
+ obs_WWEs_per_lon =(['lon'], np.asarray(TropFlux_WWEsperlon), dict(units='count', long_name='Number of unique WWEs affecting a 1degree lon bin in TropFlux observations from 1980-2014')),
+ obs_freq_WWEs_per_lon =(['lon'], np.asarray(obs_prop_per_day), dict(units='fractuion', long_name='The fraction of unique WWEs affecting a 1degree lon bin from TropFlux calculated as count/ndays '))
+)
+
+ds = xr.Dataset(data_vars = data_vars,
+ coords=dict(lon = lon_array,),
+ attrs=dict(description= "Variables needed to make the *model*_and_TropFlux_WWE_prob_per_day.png figures that the WWEs POD produces")
+ )
+
+ds.to_netcdf(f"{work_dir}/model/netCDF/{casename}_and_TropFlux_WWE_probability_per_day.nc")
+
+
+# ***** Make the plot ******
+save_path = f"{work_dir}/model/PS/"
+model_titlename = {casename}
+
+plot_WWE_likelihood_per_lon(lons = Pac_lons, model_prop_per_day = model_prop_per_day,
+ obs_prop_per_day = obs_prop_per_day, savepath = save_path,
+ model_name = casename)
+###################################################################################
+######### PART 5 ##################################################################
+#Close the catalog files and release variable dict reference for garbage collection
+###################################################################################
+cat.close()
+tauu_dict = None
+
+###################################################################################
+######### PART 6 ##################################################################
+######### Confirm POD executed successfully #######################################
+###################################################################################
+# ----------------------------------------
+print("Last log message by example_multicase POD: finished successfully!")
+sys.exit(0)
+
diff --git a/diagnostics/WWEs/WWEs_run_config.yml b/diagnostics/WWEs/WWEs_run_config.yml
new file mode 100644
index 000000000..a5c45b96b
--- /dev/null
+++ b/diagnostics/WWEs/WWEs_run_config.yml
@@ -0,0 +1,76 @@
+# Runtime yaml configuration file template for the MDTF-diagnostics package
+# Create a copy of this file for personal use, and pass it to the framework
+# with the -f | --configfile flag
+
+# List of POD(s) to run
+pod_list:
+ - "WWEs"
+
+# Case list entries (must be unique IDs for each simulation)
+#case_list:
+# "INM-CM4-8_historical_r1i1p1f1_gr1" :
+# model: "INM"
+# convention: "CMIP"
+# startdate: "19800101"
+# enddate: "20141231"
+
+case_list:
+ "CESM2_historical_r1i1p1f1_gn" :
+ model: "CESM2"
+ convention: "CMIP"
+ startdate: "19800101"
+ enddate: "20150101"
+
+# "tauu_Eday_CESM2_historical_r1i1p1f1_gn_19900101-19991231" :
+# model: "CESM2"
+# convention: "CMIP"
+# startdate: "19900101000000"
+# enddate: "19991231000000"
+
+### Data location settings ###
+# Required: full or relative path to ESM-intake catalog header file
+DATA_CATALOG: "./diagnostics/WWEs/esm_catalog_CESM2_historical_r1i1p1f1.json"
+# Optional: Parent directory containing observational data used by individual PODs.
+# If defined, the framework assumes observational data is in OBS_DATA_ROOT/[POD_NAME]
+OBS_DATA_ROOT: "../inputdata/obs_data"
+# Required: Working directory location. Temporary files are written here.
+# Final output is also written here if the OUTPUT_DIR is not defined.
+WORK_DIR: "../wkdir"
+# Optional: Location to write final output if you don't want it in the wkdir
+OUTPUT_DIR: "../wkdir"
+### Environment Settings ###
+# Required: Location of the Anaconda/miniconda installation to use for managing
+# dependencies (path returned by running `conda info --base`.)
+conda_root: "/Users/eriley/anaconda3"
+# Optional: Directory containing the framework-specific conda environments. This should
+# be equal to the "--env_dir" flag passed to conda_env_setup.sh. If left
+# blank, the framework will look for its environments in conda_root/envs
+conda_env_root: "/Users/eriley/anaconda3/envs"
+# Location of micromamba executable; REQUIRED if using micromamba
+micromamba_exe: ""
+### Data type settings ###
+# set to true to handle data files > 4 GB
+large_file: False
+### Output Settings ###
+# Set to true to have PODs save postscript figures in addition to bitmaps.
+save_ps: False
+# If true, leave pp data in OUTPUT_DIR after preprocessing; if false, delete pp data after PODs
+# run to completion
+save_pp_data: True
+# Set to true to perform data translation; default is True:
+translate_data: True
+# Set to true to save HTML and bitmap plots in a .tar file.
+make_variab_tar: False
+# Set to true to overwrite results in OUTPUT_DIR; otherwise results saved
+# under a unique name.
+overwrite: False
+# Generate html output for multiple figures per case
+"make_multicase_figure_html": False
+### Developer settings ###
+# Set to True to run the preprocessor
+run_pp: False
+# Additional custom preprocessing scripts to run on data
+# place these scripts in the MDTF-diagnostics/user_scripts directory
+# The framework will run the specified scripts whether run_pp is set to True or False
+user_pp_scripts:
+ - ""
diff --git a/diagnostics/WWEs/doc/WWE.rst b/diagnostics/WWEs/doc/WWE.rst
new file mode 100644
index 000000000..e149b0bfc
--- /dev/null
+++ b/diagnostics/WWEs/doc/WWE.rst
@@ -0,0 +1,137 @@
+.. This is a comment in RestructuredText format (two periods and a space).
+Westerly Wind Event Diagnostic Documentation
+================================
+
+Last update: 2024-09-19
+
+This POD identifies westerly wind events (WWEs) in the equatorial Pacific
+(120E-280E) using daily equatrially-averaged (2.5S-2.5N) 120-day
+highpass filtered zonal wind stress. WWEs in TropFlux observations are
+compared to WWEs produced by earth system models. Two sets of figures
+are created:
+
+1. Hovmollers of 120-day highpass filtered zonal wind stress for each
+ year of the data considered. Each year is a separate panel with the
+ first 20 years occurring on one output figure and the next 20 years
+ on the second output figure. Time-longitude zonal wind stress
+ patches that qualify as WWEs are outlined in black. The Hovmollers
+ are created for both the TropFlux observations and the ESM.
+
+2. A 1D histogram of the likelihood of each 1 longitude
+bin across the Pacific experiencing a WWE per day. The likelihood is
+calculated as the total number of unique events at each 1 longitude
+divided by the total number of days in each data set. The reciprocal
+of the frequncy indicates the average return rate of a WWE per
+longitude or the average number of days between WWEs at each
+longitude.
+
+The variables needed to created these figures are saved in the
+follwing directories:
+{work_dir}/WWEs/model/netCDF/ and /inputdata/obs_data/WWEs/
+
+Version & Contact info
+----------------------
+- Version/revision information: v1.0 (September 2024)
+- PI: Emily M. Riley Dellaripa (Emily.Riley@colostate.edu), Colorado State University
+- Current Developer: Emily M. Riley Dellaripa (Emily.Riley@colostate.edu), Colorado State University
+- Contributors: Charlotte A. DeMott (CSU); Eric D. Maloney (CSU);
+ Jingxuan Cui (CSU)
+
+Open source copyright agreement
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+The MDTF framework is distributed under the LGPLv3 license (see LICENSE.txt).
+
+Functionality
+-------------
+
+This POD is composed of two files. The main driver script is
+``WWEs.py``, which contains the code and functions which perform the
+diagnostic computations and call the plotting code. The plotting code
+and additional functions needed to identify the WWEs and determine
+their characteristics are contained in ``WWE_diag_tools.py``. The main
+driver script reads in the necessary data, prepares the zonal wind
+stress for identifying WWEs, calls the function that identifies the
+WWEs and their characteristics, saves information to netcdf files
+including: the WWE labels and mask and their characteristics (i.e.,
+central time and longitude, zonal extent, duration, and integrated
+wind work (i.e., total forcing of zonal wind stress to the ocean)),
+and finally calls the functions that make the plots.
+
+The observational data used is TropFlux daily zonal surface fluxes
+that are available from 1980-2014 (Praveen Kumar 2014).
+
+The POD is based on Riley Dellaripa et al. (2024) and details of the
+methods, motivation, and results of our work can be found there.
+
+By default the WWEs in the model output are identified using a 0.04 N/m2
+threshold, which reflects two standard deviations of the
+equatorially-averaged 120-day highpass filtered TropFlux zonal wind
+stress between 1980-2014. Users can adjust the settings.jsonc file to
+not use the 0.04 N/m2 threshold, but instead calculate the models two
+standard deviations of its equatorially-averaged 120-day highpass
+filtered zonal wind stress and use that as the threshold for
+identifying the model's WWEs.
+
+Required programming language and libraries
+-------------------------------------------
+
+* Python >= 3.12
+* matplotlib
+* xarray
+* numpy
+* os
+* time
+* xesmf
+* scipy
+* functools
+* intake
+* yaml
+* sys
+
+These dependencies are included in the python3_base environment
+provided by the automated installation script
+for the MDTF Framework.
+
+Required model output variables
+-------------------------------
+
+This POD requires the following variables:
+
+* Zonal wind stress (TAUU, 3D (time-lat-lon), Units: Pa, Frequency: Daily
+* Percentage of the grid cell occupied by land (sftlf), 2D
+ (lat-lon), Units: %, Frequency: Fixed
+
+
+References
+----------
+1. Riley Dellaripa et al. (2024): Evaluation of Equatorial Westerly
+ Wind Events in the Pacific Ocean in CMIP6 Models. *J. Climate*,
+ `doi: 10.1175/JCLI-D-23-0629.1 < https://doi.org/10.1175/JCLI-D-23-0629.1>`__.
+
+2. Praveen Kumar,B., J. Vialard, M. Lengaigne, V. S. N. Murty, M. J. McPhaden M. F. Cronin, F. Pinsard,
+ and K. Gopala Reddy, 2013: TropFlux wind stresses over the tropical
+ oceans: evaluation and comparison with other products. Clim. Dyn.,
+ 40, 2049–2071, `doi: 10.1007/s00382-012-1455-4 < https://doi.org/10.1007/s00382-012-1455-4>`__.
+
+More about this diagnostic
+--------------------------
+Westerly wind events (WWEs) are anomalously strong, long lasting
+westerlies that occur primarily over the Pacific Ocean. The momentum
+imparted on the ocean from WWEs have the potential to excite eastward
+propagating oceanic Kelvin wave. The oceanic Kelvin waves, in turn,
+warm the ocean surface and depress the thermocline as they propagate
+eastward, which can have impacts on ENSO development. Because of WWEs
+relationship to oceanic Kelvin waves and ENSO development, it is
+important to determine how well earth system models (ESMs) replicate
+them. The inability for a model to accurately replicate WWE frequency,
+location, and strength has potential consequences for ENSO
+development. WWEs are frequently associated with Madden Julian
+Oscillaiton (MJO) events or equatorial convectively coupled Rosby
+waves (CRWs), though MJO and CRWs are not the only source of WWEs. The
+inability of a model to appropriately represent WWE forcing can be
+linked to deficiencies in a models representation of MJO and CRW
+variability, though there are some models that accurately capture MJO
+and CRW variability while still misprepresenting WWE forcing in the
+west Pacific. For more details on this work please see Riley Dellaripa
+et al. (2014).
diff --git a/diagnostics/WWEs/esm_catalog_CESM2_historical_r1i1p1f1.csv b/diagnostics/WWEs/esm_catalog_CESM2_historical_r1i1p1f1.csv
new file mode 100644
index 000000000..f84762f87
--- /dev/null
+++ b/diagnostics/WWEs/esm_catalog_CESM2_historical_r1i1p1f1.csv
@@ -0,0 +1,3 @@
+activity_id,branch_method,branch_time_in_child,branch_time_in_parent,experiment,experiment_id,frequency,grid,grid_label,institution_id,nominal_resolution,parent_activity_id,parent_experiment_id,parent_source_id,parent_time_units,parent_variant_label,product,realm,source_id,source_type,sub_experiment,sub_experiment_id,table_id,variable_id,variant_label,member_id,standard_name,long_name,units,vertical_levels,init_year,start_time,end_time,time_range,path,version
+CMIP,standard,674885,219000,all-forcing simulation of the recent past,historical,day,native 0.9x1.25 finite volume grid (192x288 latxlon),gn,NCAR,100 km,CMIP,piControl,CESM2,days since 0001-01-01 00:00:00,r1i1p1f1,model-output,atmos,CESM2,AOGCM BGC,none,none,Eday,tauu,r1i1p1f1,r1i1p1f1,surface_downward_eastward_stress,Surface Downward Eastward Wind Stress,Pa,1,,1/1/80 0:00,1/1/15 0:00,1980-01-01 00:00:00-2015-01-01 00:00:00,/Users/eriley/NOAA_POD/model_data/CESM2/tauu/tauu_Eday_CESM2_historical_r1i1p1f1_gn_19800101-20150101.nc,v0
+CMIP,standard,674885,219000,all-forcing simulation of the recent past,historical,fx,native 0.9x1.25 finite volume grid (192x288 latxlon),gn,NCAR,100 km,CMIP,piControl,CESM2,days since 0001-01-01 00:00:00,r1i1p1f1,model-output,atmos,CESM2,AOGCM BGC,none,none,fx,sftlf,r1i1p1f1,r1i1p1f1,land_area_fraction,Percentage of the grid cell occupied by land (including lakes),%,1,,,,,/Users/eriley/NOAA_POD/model_data/sftlf_historical/CESM2/sftlf_fx_CESM2_historical_r1i1p1f1_gn.nc,v0
\ No newline at end of file
diff --git a/diagnostics/WWEs/esm_catalog_CESM2_historical_r1i1p1f1.json b/diagnostics/WWEs/esm_catalog_CESM2_historical_r1i1p1f1.json
new file mode 100644
index 000000000..e37e6c28c
--- /dev/null
+++ b/diagnostics/WWEs/esm_catalog_CESM2_historical_r1i1p1f1.json
@@ -0,0 +1,181 @@
+{
+ "esmcat_version": "0.0.1",
+ "attributes": [
+ {
+ "column_name": "activity_id",
+ "vocabulary": ""
+ },
+ {
+ "column_name": "branch_method",
+ "vocabulary": ""
+ },
+ {
+ "column_name": "branch_time_in_child",
+ "vocabulary": ""
+ },
+ {
+ "column_name": "branch_time_in_parent",
+ "vocabulary": ""
+ },
+ {
+ "column_name": "experiment",
+ "vocabulary": ""
+ },
+ {
+ "column_name": "experiment_id",
+ "vocabulary": ""
+ },
+ {
+ "column_name": "frequency",
+ "vocabulary": ""
+ },
+ {
+ "column_name": "grid",
+ "vocabulary": ""
+ },
+ {
+ "column_name": "grid_label",
+ "vocabulary": ""
+ },
+ {
+ "column_name": "institution_id",
+ "vocabulary": ""
+ },
+ {
+ "column_name": "nominal_resolution",
+ "vocabulary": ""
+ },
+ {
+ "column_name": "parent_activity_id",
+ "vocabulary": ""
+ },
+ {
+ "column_name": "parent_experiment_id",
+ "vocabulary": ""
+ },
+ {
+ "column_name": "parent_source_id",
+ "vocabulary": ""
+ },
+ {
+ "column_name": "parent_time_units",
+ "vocabulary": ""
+ },
+ {
+ "column_name": "parent_variant_label",
+ "vocabulary": ""
+ },
+ {
+ "column_name": "product",
+ "vocabulary": ""
+ },
+ {
+ "column_name": "realm",
+ "vocabulary": ""
+ },
+ {
+ "column_name": "source_id",
+ "vocabulary": ""
+ },
+ {
+ "column_name": "source_type",
+ "vocabulary": ""
+ },
+ {
+ "column_name": "sub_experiment",
+ "vocabulary": ""
+ },
+ {
+ "column_name": "sub_experiment_id",
+ "vocabulary": ""
+ },
+ {
+ "column_name": "table_id",
+ "vocabulary": ""
+ },
+ {
+ "column_name": "variable_id",
+ "vocabulary": ""
+ },
+ {
+ "column_name": "variant_label",
+ "vocabulary": ""
+ },
+ {
+ "column_name": "member_id",
+ "vocabulary": ""
+ },
+ {
+ "column_name": "standard_name",
+ "vocabulary": ""
+ },
+ {
+ "column_name": "long_name",
+ "vocabulary": ""
+ },
+ {
+ "column_name": "units",
+ "vocabulary": ""
+ },
+ {
+ "column_name": "vertical_levels",
+ "vocabulary": ""
+ },
+ {
+ "column_name": "init_year",
+ "vocabulary": ""
+ },
+ {
+ "column_name": "start_time",
+ "vocabulary": ""
+ },
+ {
+ "column_name": "end_time",
+ "vocabulary": ""
+ },
+ {
+ "column_name": "time_range",
+ "vocabulary": ""
+ },
+ {
+ "column_name": "path",
+ "vocabulary": ""
+ },
+ {
+ "column_name": "version",
+ "vocabulary": ""
+ }
+ ],
+ "assets": {
+ "column_name": "path",
+ "format": "netcdf",
+ "format_column_name": null
+ },
+ "aggregation_control": {
+ "variable_column_name": "variable_id",
+ "groupby_attrs": [
+ "activity_id",
+ "institution_id",
+ "source_id",
+ "experiment_id",
+ "frequency",
+ "member_id",
+ "table_id",
+ "grid_label",
+ "realm",
+ "variant_label"
+ ],
+ "aggregations": [
+ {
+ "type": "union",
+ "attribute_name": "variable_id",
+ "options": {}
+ }
+ ]
+ },
+ "id": "esm_catalog_CESM2_historical_r1i1p1f1",
+ "description": null,
+ "title": null,
+ "last_updated": "2025-03-03T20:03:41Z",
+ "catalog_file": "file:///Users/eriley/mdtf/MDTF-diagnostics/diagnostics/WWEs/esm_catalog_CESM2_historical_r1i1p1f1.csv"
+}
\ No newline at end of file
diff --git a/diagnostics/WWEs/esm_catalog_INM-CM4-8_historical_r1i1p1f1.csv b/diagnostics/WWEs/esm_catalog_INM-CM4-8_historical_r1i1p1f1.csv
new file mode 100644
index 000000000..0b3bf6a98
--- /dev/null
+++ b/diagnostics/WWEs/esm_catalog_INM-CM4-8_historical_r1i1p1f1.csv
@@ -0,0 +1,3 @@
+activity_id,branch_method,branch_time_in_child,branch_time_in_parent,experiment,experiment_id,frequency,grid,grid_label,institution_id,nominal_resolution,parent_activity_id,parent_experiment_id,parent_source_id,parent_time_units,parent_variant_label,product,realm,source_id,source_type,sub_experiment,sub_experiment_id,table_id,variable_id,variant_label,member_id,standard_name,long_name,units,vertical_levels,init_year,start_time,end_time,time_range,path,version
+CMIP,standard,0,35405,all-forcing simulation of the recent past,historical,day,gs2x1.5,gr1,INM,100 km,CMIP,piControl,INM-CM4-8,days since 1850-01-01,r1i1p1f1,model-output,atmos,INM-CM4-8,AOGCM AER,none,none,Eday,tauu,r1i1p1f1,r1i1p1f1,surface_downward_eastward_stress,Surface Downward Eastward Wind Stress,Pa,1,,1/1/80 12:00,12/31/14 12:00,1980-01-01 12:00:00-2014-12-31 12:00:00,/Users/eriley/NOAA_POD/model_data/INM-CM4-8/tauu/tauu_Eday_INM-CM4-8_historical_r1i1p1f1_gr1_19800101-20141231.nc,v0
+CMIP,standard,0,35405,all-forcing simulation of the recent past,historical,fx,gs2x1.5,gr1,INM,100 km,CMIP,piControl,INM-CM4-8,days since 1850-01-01,r1i1p1f1,model-output,atmos,INM-CM4-8,AOGCM AER,none,none,fx,sftlf,r1i1p1f1,r1i1p1f1,land_area_fraction,Percentage of the grid cell occupied by land (including lakes),%,1,,,,,/Users/eriley/NOAA_POD/model_data/sftlf_historical/INM-CM4-8/sftlf_fx_INM-CM4-8_historical_r1i1p1f1_gr1.nc,v0
\ No newline at end of file
diff --git a/diagnostics/WWEs/esm_catalog_INM-CM4-8_historical_r1i1p1f1.json b/diagnostics/WWEs/esm_catalog_INM-CM4-8_historical_r1i1p1f1.json
new file mode 100644
index 000000000..3a66d8d24
--- /dev/null
+++ b/diagnostics/WWEs/esm_catalog_INM-CM4-8_historical_r1i1p1f1.json
@@ -0,0 +1,181 @@
+{
+ "esmcat_version": "0.0.1",
+ "attributes": [
+ {
+ "column_name": "activity_id",
+ "vocabulary": ""
+ },
+ {
+ "column_name": "branch_method",
+ "vocabulary": ""
+ },
+ {
+ "column_name": "branch_time_in_child",
+ "vocabulary": ""
+ },
+ {
+ "column_name": "branch_time_in_parent",
+ "vocabulary": ""
+ },
+ {
+ "column_name": "experiment",
+ "vocabulary": ""
+ },
+ {
+ "column_name": "experiment_id",
+ "vocabulary": ""
+ },
+ {
+ "column_name": "frequency",
+ "vocabulary": ""
+ },
+ {
+ "column_name": "grid",
+ "vocabulary": ""
+ },
+ {
+ "column_name": "grid_label",
+ "vocabulary": ""
+ },
+ {
+ "column_name": "institution_id",
+ "vocabulary": ""
+ },
+ {
+ "column_name": "nominal_resolution",
+ "vocabulary": ""
+ },
+ {
+ "column_name": "parent_activity_id",
+ "vocabulary": ""
+ },
+ {
+ "column_name": "parent_experiment_id",
+ "vocabulary": ""
+ },
+ {
+ "column_name": "parent_source_id",
+ "vocabulary": ""
+ },
+ {
+ "column_name": "parent_time_units",
+ "vocabulary": ""
+ },
+ {
+ "column_name": "parent_variant_label",
+ "vocabulary": ""
+ },
+ {
+ "column_name": "product",
+ "vocabulary": ""
+ },
+ {
+ "column_name": "realm",
+ "vocabulary": ""
+ },
+ {
+ "column_name": "source_id",
+ "vocabulary": ""
+ },
+ {
+ "column_name": "source_type",
+ "vocabulary": ""
+ },
+ {
+ "column_name": "sub_experiment",
+ "vocabulary": ""
+ },
+ {
+ "column_name": "sub_experiment_id",
+ "vocabulary": ""
+ },
+ {
+ "column_name": "table_id",
+ "vocabulary": ""
+ },
+ {
+ "column_name": "variable_id",
+ "vocabulary": ""
+ },
+ {
+ "column_name": "variant_label",
+ "vocabulary": ""
+ },
+ {
+ "column_name": "member_id",
+ "vocabulary": ""
+ },
+ {
+ "column_name": "standard_name",
+ "vocabulary": ""
+ },
+ {
+ "column_name": "long_name",
+ "vocabulary": ""
+ },
+ {
+ "column_name": "units",
+ "vocabulary": ""
+ },
+ {
+ "column_name": "vertical_levels",
+ "vocabulary": ""
+ },
+ {
+ "column_name": "init_year",
+ "vocabulary": ""
+ },
+ {
+ "column_name": "start_time",
+ "vocabulary": ""
+ },
+ {
+ "column_name": "end_time",
+ "vocabulary": ""
+ },
+ {
+ "column_name": "time_range",
+ "vocabulary": ""
+ },
+ {
+ "column_name": "path",
+ "vocabulary": ""
+ },
+ {
+ "column_name": "version",
+ "vocabulary": ""
+ }
+ ],
+ "assets": {
+ "column_name": "path",
+ "format": "netcdf",
+ "format_column_name": null
+ },
+ "aggregation_control": {
+ "variable_column_name": "variable_id",
+ "groupby_attrs": [
+ "activity_id",
+ "institution_id",
+ "source_id",
+ "experiment_id",
+ "frequency",
+ "member_id",
+ "table_id",
+ "grid_label",
+ "realm",
+ "variant_label"
+ ],
+ "aggregations": [
+ {
+ "type": "union",
+ "attribute_name": "variable_id",
+ "options": {}
+ }
+ ]
+ },
+ "id": "esm_catalog_CESM2_historical_r1i1p1f1",
+ "description": null,
+ "title": null,
+ "last_updated": "2025-03-03T18:09:54Z",
+ "catalog_file": "file:///Users/eriley/mdtf/MDTF-diagnostics/diagnostics/WWEs/esm_catalog_INM-CM4-8_historical_r1i1p1f1.csv"
+}
diff --git a/diagnostics/WWEs/settings.jsonc b/diagnostics/WWEs/settings.jsonc
new file mode 100644
index 000000000..e5bbbcdc4
--- /dev/null
+++ b/diagnostics/WWEs/settings.jsonc
@@ -0,0 +1,67 @@
+// Basic POD Settings
+{
+ "settings" : {
+ "description" : "WWE POD",
+ "driver" : "WWEs.py",
+ "long_name" : " WWE POD",
+ "convention": "cmip",
+ "runtime_requirements": {
+ "python3": ["matplotlib", "xarray", "netCDF4", "pandas", "xesmf", "scipy"]
+ },
+ "pod_env_vars": {
+ "first_yr": "1980",
+ "last_yr": "2014",
+ // End of 20 is the last year in the first 20 years analyzed. This will be the first page of Hovmollers created
+ "endof20": "1999",
+ // The first year in the second set of 20 years worth of Hovmollers created
+ "start2nd20": "2000",
+ // Lower latitude limit for zonal wind stress lat band avgs (defaults to -2.5)
+ "min_lat": "-2.5",
+ // Upper latitude limit for zonal wind stress lat band avgs (defaults to 2.5)
+ "max_lat": "2.5",
+ // Lower longitude limit for zonal wind stress, lons should be 0-360 (defaults to 120)
+ "min_lon": "120",
+ // Upper longitude limit for zonal wind stresx, lons should be 0-360 (defaults to 280)
+ "max_lon": "280",
+ // Choose how native model grid should be regridded to match TropFlux grid (default is 'conservative_normed')
+ // The other option is biliniar
+ "regrid_method": "conservative_normed",
+ //How the zonal wind stress threshold to ID WWEs is determine.
+ //Default True uses the same 2sigma threshold as the obs.
+ //If False, the model's 2sigma tauu is the threshold
+ "do_static_threshold": "True"
+ }
+ },
+// Variable Coordinates
+ "dimensions": {
+ "lat": {
+ "standard_name": "latitude",
+ "units": "degrees_north",
+ "axis": "Y"
+ },
+ "lon": {
+ "standard_name": "longitude",
+ "units": "degrees_east",
+ "axis": "X"
+ },
+ "time": {"standard_name": "time"}
+ },
+
+// Variables
+ "varlist" : {
+ "tauu": {
+ "frequency" : "day",
+ "dimensions": ["time", "lat", "lon"],
+ "standard_name" : "surface_downward_eastward_stress",
+ "realm": "atmos",
+ "units": "Pa"
+ },
+ "sftlf": {
+ // "frequency": "fx",
+ "dimensions": ["lat", "lon"],
+ "standard_name": "land_area_fraction",
+ "realm": "land",
+ "units": "%"
+ }
+ }
+}
\ No newline at end of file
diff --git a/diagnostics/example_multicase/ERD_multirun_config_template.jsonc b/diagnostics/example_multicase/ERD_multirun_config_template.jsonc
new file mode 100644
index 000000000..a5362c26b
--- /dev/null
+++ b/diagnostics/example_multicase/ERD_multirun_config_template.jsonc
@@ -0,0 +1,114 @@
+// This a template for configuring MDTF to run PODs that analyze multi-run/ensemble data
+//
+// Copy this file, rename it, and customize the settings as needed
+// Pass your file to the framework using the -f/--input-file flag.
+// Any other explicit command line options will override what's listed here.
+//
+// All text to the right of an unquoted "//" is a comment and ignored, as well
+// as blank lines (JSONC quasi-standard.)
+//
+// Remove your test config file, or any changes you make to this template if you do not rename it,
+// from your remote repository before you submit a PR for review.
+// To generate CMIP synthetic data in the example dataset, run the following:
+// > mamba env create --force -q -f ./src/conda/_env_synthetic_data.yml
+// > conda activate _MDTF_synthetic_data
+// > pip install mdtf-test-data
+// > cd /mdtf
+// > mkdir mdtf_test_data && cd mdtf_test_data
+// > mdtf_synthetic.py -c CMIP --startyear 1980 --nyears 5
+// > mdtf_synthetic.py -c CMIP --startyear 1985 --nyears 5
+// Note that MODEL_DATA_ROOT assumes that mdtf_test_data is one directory above MDTF-diagnostics
+// in this sample config file
+{
+ // Run each ensemble on the example POD.
+ // Add other PODs that work on ensemble datasets to the pod_list as needed
+ "pod_list" : [
+ "example_multicase"
+ ],
+ // Each case corresponds to a different simulation/output dataset
+ "case_list":
+ {
+ "CMIP_Synthetic_r1i1p1f1_gr1_19800101-19841231":
+ {
+ "model": "test",
+ "convention": "CMIP",
+ "startdate": "19800101",
+ "enddate": "19841231"
+ },
+ "CMIP_Synthetic_r1i1p1f1_gr1_19850101-19891231":
+ {
+ "model": "test",
+ "convention": "CMIP",
+ "startdate": "19850101",
+ "enddate": "19891231"
+ }
+ },
+
+ // PATHS ---------------------------------------------------------------------
+ // Location of supporting data downloaded when the framework was installed.
+ // If a relative path is given, it's resolved relative to the MDTF-diagnostics
+ // code directory. Environment variables (eg, $HOME) can be referenced with a
+ // "$" and will be expended to their current values when the framework runs.
+ // Full or relative path to model data ESM-intake catalog header file
+ "DATA_CATALOG": "./diagnostics/example_multicase/esm_catalog_CMIP_synthetic_r1i1p1f1_gr1.json",
+
+ // Backwards compatibility
+ //"MODEL_DATA_ROOT": "./inputdata/mdtf_test_data",
+
+ // Parent directory containing observational data used by individual PODs.
+ "OBS_DATA_ROOT": "../inputdata/obs_data",
+
+ // Working directory.
+ "WORK_DIR": "../wkdir",
+
+ // Directory to write output. The results of each run of the framework will be
+ // put in a subdirectory of this directory. Defaults to WORKING_DIR if blank.
+ "OUTPUT_DIR": "../wkdir",
+
+ // Location of the Anaconda/miniconda or micromamba installation to use for managing
+ // dependencies (path returned by running `[conda | micromamba] info`.) If empty,
+ // framework will attempt to determine location of system's conda installation.
+ "conda_root": "/Users/eriley/anaconda3",
+
+ // Directory containing the framework-specific conda environments. This should
+ // be equal to the "--env_dir" flag passed to conda_env_setup.sh. If left
+ // blank, the framework will look for its environments in conda_root/envs
+ "conda_env_root": "/Users/eriley/anaconda3/envs",
+
+ // Path to micromamba executable if using micromamba
+ "micromamba_exe": "",
+
+ // SETTINGS ------------------------------------------------------------------
+ // Any command-line option recognized by the mdtf script (type `mdtf --help`)
+ // can be set here, in the form "flag name": "desired setting".
+
+ // Settings affecting what output is generated:
+ // Set to true to run the preprocessor; default true:
+ "run_pp": false,
+
+ // Set to true to perform data translation; default false:
+ "translate_data": true,
+
+ // Set to true to have PODs save postscript figures in addition to bitmaps.
+ "save_ps": false,
+
+ // Set to true for files > 4 GB
+ "large_file": false,
+
+ // If true, leave pp data in OUTPUT_DIR after preprocessing; if false, delete pp data after PODs
+ // run to completion
+ "save_pp_data": true,
+
+ // Set to true to save HTML and bitmap plots in a .tar file.
+ "make_variab_tar": false,
+
+ // Generate html output for multiple figures per case
+ "make_multicase_figure_html": false,
+
+ // Set to true to overwrite results in OUTPUT_DIR; otherwise results saved
+ // under a unique name.
+ "overwrite": false,
+ // List with custom preprocessing script(s) to run on data
+ // Place these scripts in the user_scripts directory of your copy of the MDTF-diagnostics repository
+ "user_pp_scripts" : ["example_pp_script.py"]
+}
diff --git a/diagnostics/example_multicase/esm_catalog_CMIP_synthetic_r1i1p1f1_gr1.csv b/diagnostics/example_multicase/esm_catalog_CMIP_synthetic_r1i1p1f1_gr1.csv
index 68776a919..42ec4e46e 100644
--- a/diagnostics/example_multicase/esm_catalog_CMIP_synthetic_r1i1p1f1_gr1.csv
+++ b/diagnostics/example_multicase/esm_catalog_CMIP_synthetic_r1i1p1f1_gr1.csv
@@ -1,3 +1,3 @@
-activity_id,branch_method,branch_time_in_child,branch_time_in_parent,experiment,experiment_id,frequency,grid,grid_label,institution_id,nominal_resolution,parent_activity_id,parent_experiment_id,parent_source_id,parent_time_units,parent_variant_label,product,realm,source_id,source_type,sub_experiment,sub_experiment_id,table_id,variable_id,variant_label,member_id,standard_name,long_name,units,vertical_levels,init_year,start_time,end_time,time_range,path,version
-CMIP,standard,,,,synthetic,day,,gr,,,CMIP,,,days since 1980-01-01,r1i1p1f1,,atmos,,,none,none,day,tas,r1i1p1f1,r1i1p1f1,air_temperature,Near-Surface Air Temperature,K,1,,1980-01-01,1984-12-31,1980-01-01-1984-12-31,/Users/jess/mdtf/inputdata/mdtf_test_data/CMIP_Synthetic_r1i1p1f1_gr1_19800101-19841231/day/CMIP_Synthetic_r1i1p1f1_gr1_19800101-19841231.tas.day.nc,none
-CMIP,standard,,,,synthetic,day,,gr,,,CMIP,,,days since 1985-01-01,r1i1p1f1,,atmos,,,none,none,day,tas,r1i1p1f1,r1i1p1f1,air_temperature,Near-Surface Air Temperature,K,1,,1985-01-01,1989-12-31,1985-01-01-1989-12-31,/Users/jess/mdtf/inputdata/mdtf_test_data/CMIP_Synthetic_r1i1p1f1_gr1_19850101-19891231/day/CMIP_Synthetic_r1i1p1f1_gr1_19850101-19891231.tas.day.nc,none
+activity_id,branch_method,branch_time_in_child,branch_time_in_parent,experiment,experiment_id,frequency,grid,grid_label,institution_id,nominal_resolution,parent_activity_id,parent_experiment_id,parent_source_id,parent_time_units,parent_variant_label,product,realm,source_id,source_type,sub_experiment,sub_experiment_id,table_id,variable_id,variant_label,member_id,standard_name,long_name,units,vertical_levels,init_year,start_time,end_time,time_range,path,version
+CMIP,standard,,,,synthetic,day,,gr,,,CMIP,,,days since 1980-01-01,r1i1p1f1,,atmos,,,none,none,day,tas,r1i1p1f1,r1i1p1f1,air_temperature,Near-Surface Air Temperature,K,1,,1980-01-01,1984-12-31,1980-01-01-1984-12-31,/Users/eriley/mdtf/inputdata/mdtf_test_data/CMIP_Synthetic_r1i1p1f1_gr1_19800101-19841231/day/CMIP_Synthetic_r1i1p1f1_gr1_19800101-19841231.tas.day.nc,none
+CMIP,standard,,,,synthetic,day,,gr,,,CMIP,,,days since 1985-01-01,r1i1p1f1,,atmos,,,none,none,day,tas,r1i1p1f1,r1i1p1f1,air_temperature,Near-Surface Air Temperature,K,1,,1985-01-01,1989-12-31,1985-01-01-1989-12-31,/Users/eriley/mdtf/inputdata/mdtf_test_data/CMIP_Synthetic_r1i1p1f1_gr1_19850101-19891231/day/CMIP_Synthetic_r1i1p1f1_gr1_19850101-19891231.tas.day.nc,none
\ No newline at end of file
diff --git a/diagnostics/example_multicase/esm_catalog_CMIP_synthetic_r1i1p1f1_gr1.json b/diagnostics/example_multicase/esm_catalog_CMIP_synthetic_r1i1p1f1_gr1.json
index c62245645..7035c2d6d 100644
--- a/diagnostics/example_multicase/esm_catalog_CMIP_synthetic_r1i1p1f1_gr1.json
+++ b/diagnostics/example_multicase/esm_catalog_CMIP_synthetic_r1i1p1f1_gr1.json
@@ -177,5 +177,5 @@
"description": null,
"title": null,
"last_updated": "2023-06-01",
- "catalog_file": "file:/Users/jess/mdtf/MDTF-diagnostics/diagnostics/example_multicase/esm_catalog_CMIP_synthetic_r1i1p1f1_gr1.csv"
-}
\ No newline at end of file
+ "catalog_file": "file:///Users/eriley/mdtf/MDTF-diagnostics/diagnostics/example_multicase/esm_catalog_CMIP_synthetic_r1i1p1f1_gr1.csv"
+}
diff --git a/diagnostics/example_multicase/runtime_config.yml b/diagnostics/example_multicase/runtime_config.yml
new file mode 100755
index 000000000..0ea49986f
--- /dev/null
+++ b/diagnostics/example_multicase/runtime_config.yml
@@ -0,0 +1,69 @@
+# Runtime yaml configuration file template for the MDTF-diagnostics package
+# Create a copy of this file for personal use, and pass it to the framework
+# with the -f | --configfile flag
+
+# List of POD(s) to run
+pod_list:
+ - "example_multicase"
+
+# Case list entries (must be unique IDs for each simulation)
+case_list:
+ "CMIP_Synthetic_r1i1p1f1_gr1_19800101-19841231" :
+ model: "test"
+ convention: "CMIP"
+ startdate: "19800101120000"
+ enddate: "19841231000000"
+
+# "CMIP_Synthetic_r1i1p1f1_gr1_19850101-19891231" :
+# model: "test"
+# convention: "CMIP"
+# startdate: "19850101000000"
+# enddate: "19891231000000"
+
+### Data location settings ###
+# Required: full or relative path to ESM-intake catalog header file
+DATA_CATALOG: "./diagnostics/example_multicase/esm_catalog_CMIP_synthetic_r1i1p1f1_gr1.json"
+# Optional: Parent directory containing observational data used by individual PODs.
+# If defined, the framework assumes observational data is in OBS_DATA_ROOT/[POD_NAME]
+OBS_DATA_ROOT: "../inputdata/obs_data"
+# Required: Working directory location. Temporary files are written here.
+# Final output is also written here if the OUTPUT_DIR is not defined.
+WORK_DIR: "../wkdir"
+# Optional: Location to write final output if you don't want it in the wkdir
+OUTPUT_DIR: "../wkdir"
+### Environment Settings ###
+# Required: Location of the Anaconda/miniconda installation to use for managing
+# dependencies (path returned by running `conda info --base`.)
+conda_root: "/Users/eriley/anaconda3"
+# Optional: Directory containing the framework-specific conda environments. This should
+# be equal to the "--env_dir" flag passed to conda_env_setup.sh. If left
+# blank, the framework will look for its environments in conda_root/envs
+conda_env_root: "../conda_envs"
+# Location of micromamba executable; REQUIRED if using micromamba
+micromamba_exe: ""
+### Data type settings ###
+# set to true to handle data files > 4 GB
+large_file: False
+### Output Settings ###
+# Set to true to have PODs save postscript figures in addition to bitmaps.
+save_ps: False
+# If true, leave pp data in OUTPUT_DIR after preprocessing; if false, delete pp data after PODs
+# run to completion
+save_pp_data: True
+# Set to true to perform data translation; default is True:
+translate_data: True
+# Set to true to save HTML and bitmap plots in a .tar file.
+make_variab_tar: False
+# Set to true to overwrite results in OUTPUT_DIR; otherwise results saved
+# under a unique name.
+overwrite: False
+# Generate html output for multiple figures per case
+"make_multicase_figure_html": False
+### Developer settings ###
+# Set to True to run the preprocessor
+run_pp: True
+# Additional custom preprocessing scripts to run on data
+# place these scripts in the MDTF-diagnostics/user_scripts directory
+# The framework will run the specified scripts whether run_pp is set to True or False
+user_pp_scripts:
+ - ""
diff --git a/src/conda/env_dev.yml b/src/conda/env_dev.yml
index 367390b96..b5883caee 100644
--- a/src/conda/env_dev.yml
+++ b/src/conda/env_dev.yml
@@ -41,7 +41,7 @@ dependencies:
- cloud_sptheme
- snakeviz=2.2.0
- graphviz=2.50.0
-- pip=24.3.1
-- pip:
- - gprof2dot==2024.6.6
- - viztracer==1.0.0
+#- pip=24.3.1
+#- pip:
+# - gprof2dot==2024.6.6
+# - viztracer==1.0.0
diff --git a/src/conda/env_python3_base.yml b/src/conda/env_python3_base.yml
index 69136076a..96e416d8c 100644
--- a/src/conda/env_python3_base.yml
+++ b/src/conda/env_python3_base.yml
@@ -40,16 +40,15 @@ dependencies:
- bottleneck=1.3.8
- gfortran=11.3.0
- momlevel=1.0.1
-- pip=23.3.1
-- pip:
- - falwa==2.0.0
- - cmocean
- - regionmask
- - momgrid==1.9.0
- - git+https://github.com/jkrasting/doralite
- - git+https://github.com/raphaeldussin/static_downsampler
- - git+https://github.com/jkrasting/xcompare
- - git+https://github.com/raphaeldussin/xoverturning
- - git+https://github.com/jkrasting/xwavelet
- - git+https://github.com/jetesdal/xwmt
- - git+https://github.com/raphaeldussin/om4labs
+#- pip=23.3.1
+#- pip:
+# - falwa==2.0.0
+# - cmocean
+# - regionmask
+# - global_land_mask
+# - git+https://github.com/raphaeldussin/static_downsampler
+# - git+https://github.com/jkrasting/xcompare
+# - git+https://github.com/raphaeldussin/xoverturning
+# - git+https://github.com/jkrasting/xwavelet
+# - git+https://github.com/jetesdal/xwmt
+# - git+https://github.com/raphaeldussin/om4labs
diff --git a/src/conda/env_python3_base_micromamba.yml b/src/conda/env_python3_base_micromamba.yml
index b3c81dd2a..6670285a1 100644
--- a/src/conda/env_python3_base_micromamba.yml
+++ b/src/conda/env_python3_base_micromamba.yml
@@ -37,17 +37,15 @@ dependencies:
- jupyterlab=4.1.0
- bottleneck=1.3.8
- gfortran=11.3.0
-- pip=23.3.1
-- pip:
- - cmocean
- - regionmask
- - falwa==2.0.0
- - momgrid==1.9.0
- - git+https://github.com/jkrasting/doralite
- - git+https://github.com/ajdawson/gridfill
- - git+https://github.com/raphaeldussin/static_downsampler
- - git+https://github.com/jkrasting/xcompare
- - git+https://github.com/raphaeldussin/xoverturning
- - git+https://github.com/jkrasting/xwavelet
- - git+https://github.com/jetesdal/xwmt
- - git+https://github.com/raphaeldussin/om4labs
+#- pip=23.3.1
+#- pip:
+# - cmocean
+# - regionmask
+# - falwa==2.0.0
+# - git+https://github.com/ajdawson/gridfill
+# - git+https://github.com/raphaeldussin/static_downsampler
+# - git+https://github.com/jkrasting/xcompare
+# - git+https://github.com/raphaeldussin/xoverturning
+# - git+https://github.com/jkrasting/xwavelet
+# - git+https://github.com/jetesdal/xwmt
+# - git+https://github.com/raphaeldussin/om4labs
diff --git a/tools/catalog_builder/test_builder_config.yml b/tools/catalog_builder/test_builder_config.yml
new file mode 100644
index 000000000..f0e5392d4
--- /dev/null
+++ b/tools/catalog_builder/test_builder_config.yml
@@ -0,0 +1,31 @@
+## Configuration file template for catalog_builder
+## DRS convention to use cmip | gfdl | cesm
+convention: cmip
+## IMPORTANT: Attempting to build a catalog of the entire contents of a pp directory will likely max out available PPAN resources (i.e., it takes longer than 12 hours to build a catalog for atmos/ts/monthly/5yr one node w/16 threads). It is strongly recommended to use include_patterns and/or exclude_patterns to target a specific subset of variables and dates to improve the performance of the catalog builder.
+## Path(s) to the root directory with the target dataset
+data_root_dirs:
+ - /Users/eriley/NOAA_POD/model_data/CESM2/tauu
+ - /Users/eriley/NOAA_POD/model_data/sftlf_historical/CESM2
+ # - /archive/oar.gfdl.cmip6/ESM4/DECK/ESM4_historical_D1/gfdl.ncrc4-intel16-prod-openmp/pp/atmos/ts/monthly/5yr
+## (optional) dataset id used to determine parser for selected convention. Accepted values: am5
+# dataset_id: am5
+## depth to traverse for files from data_root_dir(s)
+## (e.g., files that are in the root directory have dir_depth=1)
+dir_depth: 0
+## where to write catalog csv and json header files
+output_dir: /Users/eriley/mdtf/MDTF-diagnostics/diagnostics/WWEs
+## name of catalog (.csv and .json will be appended to catalog and header files)
+output_filename: esm_catalog_CESM2_historical_r1i1p1f1
+## number of threads: 16 (for job running on one analysis node)
+## The example catalog for the UDA directory takes a little over 5 min to build
+num_threads: 1
+## optional list of patterns to include in file and directory search
+#include_patterns:
+# - "*19800101-2014*.nc"
+# - "*hght.nc"
+# - "*slp.nc"
+# - "*t_surf.nc"
+# - "*t_ref.nc"
+## optional list of patterns to exclude from file and directory search
+exclude_patterns:
+ - "DO_NOT_USE"